Imported Upstream version 5.0.5

author Anas Nashif <anas.nashif@intel.com>

Mon, 5 Nov 2012 15:23:53 +0000 (07:23 -0800)

committer Anas Nashif <anas.nashif@intel.com>

Mon, 5 Nov 2012 15:23:53 +0000 (07:23 -0800)
author Anas Nashif <anas.nashif@intel.com>
Mon, 5 Nov 2012 15:23:53 +0000 (07:23 -0800)
committer Anas Nashif <anas.nashif@intel.com>
Mon, 5 Nov 2012 15:23:53 +0000 (07:23 -0800)
diff --git a/.gdbinit b/.gdbinit

new file mode 100644 (file)

index 0000000..6328dc9
--- /dev/null
+++ b/.gdbinit
@@ -0,0 +1,32 @@
+# Copyright 1999 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+define pz
+set __gmpz_dump ($)
+end
+
+define pq
+set __gmpz_dump ($->_mp_num)
+echo /
+set __gmpz_dump ($->_mp_den)
+end
+
+define pf
+set __gmpf_dump ($)
+end
+
diff --git a/AUTHORS b/AUTHORS

new file mode 100644 (file)

index 0000000..79bfa81
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,55 @@
+Authors of GNU MP (in chronological order of initial contribution)
+
+Torbjörn Granlund      Main author
+
+John Amanatides                Original version of mpz/pprime_p.c
+
+Paul Zimmermann                mpn/generic/mul_fft.c, dc_divrem_n.c, rootrem.c,
+                       old mpz/powm.c, old toom3 code.
+
+Ken Weber              mpn/generic/bdivmod.c, old mpn/generic/gcd.c
+
+Bennet Yee             mpz/jacobi.c mpz/legendre.c
+
+Andreas Schwab         mpn/m68k/lshift.asm, mpn/m68k/rshift.asm
+
+Robert Harley          Old mpn/generic/mul_n.c, many files in mpn/arm
+
+Linus Nordberg         Random number framework, original autoconfery
+
+Kent Boortz            MacOS 9 port
+
+Kevin Ryde             Most x86 assembly, new autoconfery, and countless other
+                       things (please see the GMP manual for complete list)
+
+Gerardo Ballabio       gmpxx.h and C++ istream input
+
+Pedro Gimeno           Mersenne Twister random generator, other random number
+                       revisions
+
+Jason Moxham           mpz/fac_ui.c and gen-fac_ui.c
+
+Niels Möller           mpn/generic/hgcd2.c, gcd.c, gcdext.c, matrix22_mul.c,
+                       hgcd.c, gcdext_1.c, gcd_subdiv_step.c, gcd_lehmer.c,
+                       gcdext_subdiv_step.c, gcdext_lehmer.c,
+                       toom_interpolate_7pts, mulmod_bnm1.c, dcpi1_bdiv_qr.c,
+                       dcpi1_bdiv_q.c, sbpi1_bdiv_qr.c, sbpi1_bdiv_q.c,
+                       toom_eval_dgr3_pm1.c, toom_eval_dgr3_pm2.c,
+                       toom_eval_pm1.c, toom_eval_pm2.c, toom_eval_pm2exp.c,
+                       divexact.c, mpn/x86/invert_limb.asm,
+                       mpn/x86_64/invert_limb.asm, mpz/nextprime.c,
+                       mpz/divexact.c.
+
+Marco Bodrato          mpn/generic/toom44_mul.c, toom4_sqr.c, toom53_mul.c,
+                       toom62_mul.c, toom43_mul.c, toom52_mul.c,
+                       toom_interpolate_6pts.c, toom_couple_handling.c,
+                       toom63_mul.c, toom_interpolate_8pts.c,
+                       toom6h_mul.c, toom6_sqr.c, toom_interpolate_12pts.c,
+                       toom8h_mul.c, toom8_sqr.c, toom_interpolate_16pts.c,
+                       mulmod_bnm1.c, sqrmod_bnm1.c, nussbaumer_mul.c,
+                       toom_eval_pm2.c, toom_eval_pm2rexp.c,
+                       mullo_n.c, invert.c, invertappr.c.
+
+David Harvey           mpn/x86_64/mul_basecase.asm
+
+Martin Boij            mpn/generic/perfpow.c
diff --git a/COPYING b/COPYING

new file mode 100644 (file)

index 0000000..94a9ed0
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/COPYING.LIB b/COPYING.LIB

new file mode 100644 (file)

index 0000000..fc8a5de
--- /dev/null
+++ b/COPYING.LIB
@@ -0,0 +1,165 @@
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions. 
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version. 
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/ChangeLog b/ChangeLog

new file mode 100644 (file)

index 0000000..01b90c3
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,26499 @@
+2012-05-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.0.5 released.
+
+       * mpn/Makefile.am (TARG_DIST): Remove thumb, since directory now empty.
+
+2012-04-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/thumb/add_n.s: Remove broken code.
+       * mpn/thumb/sub_n.s: Likewise.
+
+2012-04-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+       * gmp-h.in (_GMP_H_HAVE_FILE): Test also __STDIO_LOADED (for VMS).
+
+2012-03-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Fix typo in coreisbr recognition.
+
+2012-03-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Handle AMD 11h correctly.
+
+2012-03-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpz/t-invert.c: Avoid testing mod 0.
+       * doc/gmp.texi (mpz_invert): Specify mod 0 is not handled.
+
+2012-02-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/logic.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add logic.
+
+       * tests/mpz/t-invert.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-invert.
+
+2012-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * doc/gmp.texi (Multiplication Algorithms): Add Toom[68]'n'half.
+
+2012-02-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.0.4 released.
+
+2012-02-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (mpn_toom3*_itch): Support any recursion depth.
+       * tests/refmpn.c (refmpn_mul): Restore tight allocations.
+
+2012-02-09  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmp-impl.h (ABS_CAST): New macro.
+       * mpf/cmp_si.c: Use ABS_CAST.
+       * mpf/get_si.c: Use ABS_CAST.
+       * mpf/iset_si.c: Use ABS_CAST.
+       * mpf/set_si.c: Use ABS_CAST.
+       * mpq/set_si.c: Use ABS_CAST.
+       * mpz/cmp_si.c: Use ABS_CAST.
+       * mpz/get_si.c: Use ABS_CAST.
+       * mpz/iset_si.c: Use ABS_CAST.
+       * mpz/mul_i.h: Use ABS_CAST.
+       * mpz/set_si.c: Use ABS_CAST.
+
+2012-02-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+2012-02-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc32/divrem_2.asm: Fix off-by-one condition in invert_limb
+       code.
+
+2012-02-08  Niels Möller  <nisse@lysator.liu.se>
+
+       * doc/gmp.texi (mpz_gcdext): Describe cofactor canonicalization.
+       (mpn_gcdext): Copied doc updates from main repo.
+
+2012-02-07  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (mpn_gcdext): Fixed assert, related to the
+       special case A = (2k+1) G, B = 2 G.
+
+2012-02-06  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (hgcd_matrix_update_q): Fixed carry handling
+       bug.
+
+       * tests/mpz/t-gcd.c (main): Omit tests with urandomb operands.
+       * tests/mpn/t-hgcd.c (main): Likewise.
+
+2012-02-05  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-gcd.c (main): Add tests with rrandomb operands.
+       * tests/mpn/t-hgcd.c (main): Likewise.
+
+       * mpn/generic/gcdext_subdiv_step.c (mpn_gcdext_subdiv_step):
+       Bugfix, in u1 += q * u0, handle carry in all cases. Also normalize
+       the product q * u0.
+
+2012-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/refmpn.c (refmpn_mul): More conservative allocations.
+
+2012-02-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bd1/gmp-mparam.h: New file.
+
+       * longlong.h (udiv_qrnnd from sdiv_qrnnd): Declare udiv_w_sdiv.
+
+       * mpn/generic/udiv_w_sdiv.c: Use c89 function header.
+
+2012-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_16pts.c: Correct an unlikely 32-bit bug.
+
+2012-02-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom63_mul.c: Allow s+t==n by adjusting an ASSERT.
+       * mpn/generic/toom_interpolate_8pts.c: Perform final incr iff s+t!=n.
+
+       * tests/mpn/t-toom6h.c (MIN_BN): Make more consistent with ASSERT in
+       tested function.
+
+2012-02-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-mul.c: New file.
+       * tests/mpn/Makefile.am: Compile it.
+
+2012-01-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c (SQR_BASECASE_LIM): New name for
+       SQR_BASECASE_MAX.
+       (SQR_BASECASE_LIM, fat variant): Define to read __gmpn_cpuvec.
+       (SQR_BASECASE_LIM, native variant): Define to SQR_TOOM2_THRESHOLD
+       straight, without arithmetic.
+       (mpn_local_sqr): Use BELOW_THRESHOLD as per Marco's suggestion.
+
+2012-01-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-powm.c: Ensure all sizes are seen.
+
+2012-01-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.0.3 released.
+
+       * Upgrade to libtool 2.4.2.
+
+2012-01-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c: Remove unused tuneup variables.
+
+2012-01-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/p6/gmp-mparam.h: New file.
+       * mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
+       * mpn/x86_64/bobcat/gmp-mparam.h: New file.
+
+2012-01-18  Marc Glisse  <marc.glisse@inria.fr>
+
+       * doc/gmp.texi (mpf_class::mpf_class): Use mp_bitcnt_t.
+
+2012-01-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+       * configure.in: Add ultrasparc T4 support.
+
+       * demos/isprime.c (main): Run 25 millerrabin tests.
+
+2012-01-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
+       ULONG_MAX, when returning "infinity".
+       * mpz/scan1.c (mpz_scan1): Likewise.
+
+2011-12-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/hamdist.c: Fix typo in a return statement.
+
+2011-12-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c: Handle fat binaries better.
+
+2011-12-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Fix typo making HAVE_NATIVE_mpn_X fail for fat
+       functions.
+
+       * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Add a missing break.
+
+2011-12-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat.c: Copy fake cpuid code from x86/fat/fat.c.
+
+       * gmp-impl.h (DECL_divexact_1): Fix typo in return type.
+
+2011-11-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/udiv_w_sdiv.c: Use CNST_LIMB for some constants.
+
+2011-11-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Overhaul x86/x86_64 support, merging three case
+       statements into one.
+
+2011-11-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/gmp.texi (Formatted Output Strings): Clarify rules for mpf_t
+       precision.
+
+2011-11-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_RELEASE): Renamed from typo name.
+
+2011-11-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Split x86 CPUs into more subtypes for more accurate
+       passing of gcc flags.
+
+       * configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
+       mechanism.
+
+2011-11-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
+       (redcify): Likewise.
+       (mpn_powm_sec): Likewise.
+
+       * mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
+       initialisation.
+
+2011-10-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (s390): Rewrite support to handle known CPUs.
+       * config.guess: Recognise s390 CPUs.
+       * config.sub: Match s390 CPUs.
+       * acinclude.m4 (S390_PATTERN, S390X_PATTERN): New defines.
+
+2011-10-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       From Per Olofsson:
+       * mpn/generic/popham.c: Add __GMP_NOTHROW to make it match gmp.h.
+
+       * configure.in: AC_DEFINE HAVE_HOST_CPU_s390_zarch.
+       * longlong.h (s390): Use it.
+       (s390 umul_ppmm): Fix typo in pure C variant.
+
+2011-10-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (s390): Put back an accidentally deleted #else.
+
+       * configure.in (s390): Unset extra_functions for s390x.
+
+2011-10-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (s390 umul_ppmm): With new-enough gcc, avoid asm.
+
+       From Andreas Krebbel:
+       * longlong.h (s390 umul_ppmm): Support 32-bit limbs with gcc using
+       64-bit registers.
+       (s390 udiv_qrnnd): Likewise.
+
+2011-10-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (s390x): Pass -mzarch to gcc in 32-bit mode.
+
+       * longlong.h (s390x): Add __CLOBBER_CC for relevant asm patterns.
+
+2011-10-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       From Marco Trudel:
+       * tests/mpz/t-scan.c (check_ref): Fix loop end bound.
+
+2011-10-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (s390x): Put back UDItype casts to make gcc reloading use
+       right more for constants.
+       (s390x count_leading_zeros): Disable until we support z10 specifically.
+       (s390x add_ssaaaa): Remove algsi/slgsi until we support z10.
+
+2011-10-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (s390): Add 32-bit zarch umul_ppmm and udiv_qrnnd.
+       (s390): Overhaul 32-bit and 64-bit code.
+
+2011-10-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/Makefile.am (TARG_DIST): Add s390_32 and s390_64, remove s390.
+
+       * doc/gmp.texi (Custom Allocation): Rephrase a paragraph.
+
+       * demos/factorize.c: Run 25 Miller-Rabin tests.
+
+       * mpz/nextprime.c: Run 25 mpz_millerrabin tests (was 10).
+
+2011-10-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Support s390x.
+
+       * longlong.h: Add support for 64-bit s390x.
+
+       * mpn/s390_64: New directory.
+       * mpn/s390_32: Directory renamed from mpn/s390.
+
+2011-09-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sh/sh2/submul_1.s: Make this old submul_1 implementation
+       actually compute intended function.
+
+2011-09-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sh: Migrate files to '.asm'.
+       * configure.in: Recognise sh3 and sh4.
+
+2011-08-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * printf/doprntf.c (__gmp_doprnt_mpf): For DOPRNT_CONV_FIXED, ask for
+       one more digit.
+
+2011-08-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpf/sub.c: Fix typo in copy condition.  Delay an allocation.
+
+2011-08-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/rand/t-lc2exp.c (check_bigc): Call abort after reporting error.
+
+2011-07-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/invert_limb.asm: Swap around some registers to silence 'as'
+       warnings.
+
+2011-07-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q): Get mpn_sub_1 size
+       argument right.
+
+2011-07-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/misc/t-locale.c: Disable test for mingw.
+
+       * configure.in (x86_64 *-*-mingw*): Handle also cygwin here; clear out
+       extra_functions_64.
+
+2011-07-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Don't print newline in x86 cpuid function.
+       Rewrite x86-64 cpu recognition asm code to work under Windoze.
+
+2011-06-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * acinclude.m4 (GMP_ASM_RODATA): Fix typo in 2011-04-10 change.
+
+       * configure.in: Surround tr ranges with [] for portability.
+
+2011-05-08  Marc Glisse  <marc.glisse@inria.fr>
+
+       * doc/gmp.texi (gmp_randclass::get_f): Replace unsigned long
+       with mp_bitcnt_t.
+
+2011-05-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.0.2 released.
+
+       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+2011-05-05  Marc Glisse  <marc.glisse@inria.fr>
+
+       [These changes were made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * mpn/x86_64/fat/fat.c: Update for Sandy Bridge.
+       * config.guess: warning to keep it in sync with fat.c.
+
+2011-05-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat_entry.asm: (PIC_OR_DARWIN): New symbol.  Use it to
+       work around Darwin problems.
+
+2011-05-02  Marc Glisse  <marc.glisse@inria.fr>
+
+       * configfsf.guess: Update to version of 2011-02-02.
+       * configfsf.sub: Update to version of 2011-03-23.
+
+2011-04-30  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmp-h.in (mpz_cdiv_q_2exp): Use mp_bitcnt_t to match the definition
+       and the documentation.
+       (mpz_remove): Likewise.
+       (mpf_eq): Likewise.
+
+       * ltmain.sh: Remove.
+       * .bootstrap: Let libtoolize generate ltmain.sh.
+
+       * doc/gmp.texi (mpf_urandomb): Explicit the fact that it does not
+       change the precision.
+
+2011-04-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       [This change was made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * configure.in (x86_64): Support bobcat specifically.
+       (x86): Match bobcat and bulldozer, handle like k10.
+
+2011-04-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (speed_cyclecounter): Always use PIC variant when
+       compiled with Apple's GCC.
+
+2011-04-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc32/sparc-defs.m4 (changecom): Don't redefine '!' as it
+       interferes with expressions.
+
+2011-04-10  Niels Möller  <nisse@lysator.liu.se>
+
+       [This change was made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * configure.in: Add invert_limb_table to extra_functions_64 on
+       x86_64.
+
+2011-04-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * acinclude.m4 (GMP_ASM_RODATA): Make 'foo' larger to avoid clang
+       problems.
+
+2011-03-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/invert_limb.asm: Protect movzwl register parameters from
+       being interpreted as m4 macro parameters.
+
+2011-03-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (hppa): Under linux, treat 64-bit processors as if they
+       were 32-bit processors.
+
+2011-03-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_16pts.c: Remove ambiguity.
+
+2011-03-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/powerpc.asm: Use powerpc syntax, not power syntax.
+
+2011-03-09  Marc Glisse  <marc.glisse@inria.fr>
+
+       * doc/gmp.texi: Remove void return type from constructors. Document
+       explicit constructors. Document mpf_class::mpf_class(mpf_t).
+
+2011-02-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/p6/sse2/mod_1_4.asm: Fix typo in MULFUNC_PROLOGUE.
+
+2011-02-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/popcount.asm: Add a MULFUNC_PROLOGUE.
+       * mpn/x86_64/pentium4/popcount.asm: Likewise.
+
+2011-01-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       [These changes were made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * config.guess: Recognise new Intel processors.
+
+       * config.guess: Support 'coreinhm' and 'coreisbr'.
+       * config.sub: Likewise.
+       * configure.in: Likewise.
+
+2011-01-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/mul.c: Remove redundant size computation.
+
+2010-11-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/aors_n.asm: Don't rely on ZF after 'bt' insn.
+       Use 64-bit 'test' to support operands of 2^32 limbs and more.
+
+2010-11-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       [These changes were made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * mp_get_fns.c: Add a __GMP_NOTHROW for coherency with prototype.
+       * mp_set_fns.c: Likewise.
+       * mpf/cmp.c: Likewise.
+       * mpf/cmp_si.c: Likewise.
+       * mpf/cmp_ui.c: Likewise.
+       * mpf/fits_s.h: Likewise.
+       * mpf/fits_u.h: Likewise.
+       * mpf/get_dfl_prec.c: Likewise.
+       * mpf/get_prc.c: Likewise.
+       * mpf/get_si.c: Likewise.
+       * mpf/get_ui.c: Likewise.
+       * mpf/int_p.c: Likewise.
+       * mpf/set_dfl_prec.c: Likewise.
+       * mpf/set_prc_raw.c: Likewise.
+       * mpf/size.c: Likewise.
+       * mpf/swap.c: Likewise.
+       * mpq/equal.c: Likewise.
+       * mpq/swap.c: Likewise.
+       * mpz/cmp.c: Likewise.
+       * mpz/cmp_si.c: Likewise.
+       * mpz/cmp_ui.c: Likewise.
+       * mpz/cmpabs.c: Likewise.
+       * mpz/cmpabs_ui.c: Likewise.
+       * mpz/cong_2exp.c: Likewise.
+       * mpz/divis_2exp.c: Likewise.
+       * mpz/fits_s.h: Likewise.
+       * mpz/get_si.c: Likewise.
+       * mpz/hamdist.c: Likewise.
+       * mpz/scan0.c: Likewise.
+       * mpz/scan1.c: Likewise.
+       * mpz/sizeinbase.c: Likewise.
+       * mpz/swap.c: Likewise.
+       * mpz/tstbit.c: Likewise.
+
+2010-11-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       [This change was made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * configure.in (AC_INIT): Amend bug reporting address with manual
+       reference.
+
+2010-11-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
+
+2010-10-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/gcd_1.asm: Use m4_lshift to avoid << operator.
+       * mpn/x86/k7/gcd_1.asm: Likewise.
+
+2010-08-20  Niels Möller  <nisse@lysator.liu.se>
+
+       Suggested by Ozkan Sezer:
+       * configure.in: If $M4 is already set in the environment, don't
+       touch it. Fixed the case that no assembler files are used, and
+       GMP_PROG_M4 is omitted.
+
+2010-06-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-mod_1.
+       * tests/mpn/t-mod_1.c: New file.
+
+2010-05-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): New function.
+       * gmp-impl.h: Declare it.
+       * tune/common.c (speed_mpn_mupi_div_qr): Use new itch function.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Pass parameters right
+       for new itch function.
+
+2010-05-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_2.c: Use asm code just for GNU C.
+
+2010-05-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       [This change was made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * tune/tuneup.c (tune_mod_1): Fix typo.
+
+2010-05-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       [These changes were made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * longlong.h: Undo 2009-03-01 change for powerpc64, it gives poor code.
+
+       * tune/tuneup.c (tune_mod_1): Use more typical divisor, for the benefit
+       of machines with early-out multipliers.
+
+2010-05-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       [This change was made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * tune/tuneup.c (tune_mod_1): Fix typo.
+
+2010-05-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       [This change was made after the 5.0.2 release, but inserted here to
+       match the change chronology of the main repository.]
+
+       * tune/tuneup.c (tune_mod_1): Measure MOD_1_1_TO_MOD_1_2_THRESHOLD and
+       MOD_1_2_TO_MOD_1_4_THRESHOLD before MOD_1U_TO_MOD_1_1_THRESHOLD for
+       correctness.
+
+2010-04-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-toom6h.c (SIZE_LOG): Define.
+       * tests/mpn/t-toom8h.c (SIZE_LOG): Likewise.
+
+2010-04-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/divrem_2.asm: Use "orb" instead of "or" to work around
+       Solaris assembler bug.
+       * mpn/x86_64/mpn/x86_64/divrem_2.asm: Likewise.
+
+2010-03-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom33_mul.c: Fix mpn_add_n_sub_n usage.
+       * mpn/generic/toom3_sqr.c: Likewise.
+       * mpn/generic/toom63_mul.c: Likewise.
+
+2010-03-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom_interpolate_6pts.c: Call mpn_sublsh2_n and
+       mpn_sublsh_n with correct args.
+
+2010-03-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/tests.h (TESTS_REPS): Fix typo.
+
+2010-02-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/pa64/aors_n.asm: Fix typo in last change.
+
+2010-02-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-perfpow.c (check_random): Use mp_limb_t type for limb
+       variables.
+
+       * tests/mpn/t-div.c: Cast a switch index to placate HP's cc.
+       * tests/mpn/t-bdiv.c: Likewise.
+
+       * mpn/pa64/aors_n.asm: Fix support of the 2.0n ABI.
+
+2010-02-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_3.c: Cast a switch index.
+
+       * mpn/generic/sqrtrem.c: Use CNST_LIMB.
+
+2010-02-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_redc): Set min_size and min_is_always when
+       measuring REDC_1_TO_REDC_2_THRESHOLD.
+       (tune_mod_1): Set min_size for PREINV_MOD_1_TO_MOD_1_THRESHOLD.
+
+       * mpn/x86_64/aorrlsh_n.asm (cnt): Fix a typo.
+       * mpn/x86_64/lshsub_n.asm: Likewise.
+
+2010-02-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.0.1 released.
+
+       * mpn/generic/powm.c: Use rp target area for power table computation in
+       order to use less scratch.
+
+       * mpn/generic/binvert.c (mpn_binvert_itch): Enable more economical
+       mpn_mulmod_bnm1_itch call.
+
+       * mpn/generic/mu_div_qr.c: Remove always true #if.
+       * mpn/generic/mu_divappr_q.c: Likewise.
+       * mpn/generic/mu_bdiv_q.c: Likewise.
+       * mpn/generic/mu_bdiv_qr.c: Likewise.
+
+2010-02-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+       * mpn/powerpc64/mode64/gmp-mparam.h: Remove {MUL,SQR}_FFT_TABLE2.
+       * mpn/x86/p6/gmp-mparam.h: Likewise.
+       * mpn/x86/p6/mmx/gmp-mparam.h: Likewise.
+       * mpn/generic/mul_fft.c: Don't depend on FFT_TABLE2, it was broken.
+
+2010-01-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft_internal): Remove arguments n, m,
+       k and rec; add argument sqr.  Don't call mpn_mul_fft_decompose here,
+       instead do that in all callers.
+       (mpn_mul_fft): Trim allocation when squaring, and use TMP_ALLOC*, not
+       explicit alloc/free.
+       (mpn_fft_div_2exp_modF): Avoid a scalar division.
+       (mpn_fft_mul_modF_K): Replace some multiplies by K with shifting by k.
+       (mpn_fft_mul_2exp_modF): Make function more symmetrical.
+
+2010-01-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mu_div_q.c (mpn_mu_div_q_itch): Rewrite.
+       * mpn/generic/mu_div_qr.c (mpn_mu_div_qr_itch): Re-enable
+       better mulmod itch estimate.
+       * mpn/generic/mu_divappr_q.c (mpn_mu_divappr_q_itch): Likewise.
+       * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Likewise.
+       * mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Likewise.
+
+2010-01-27 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mu_div_qr.c (mpn_mu_div_qr_itch): Disabled guessed
+       estimate, enabled a conservative one.
+       * mpn/generic/mu_divappr_q.c (mpn_mu_divappr_q_itch): Likewise.
+       * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Likewise.
+       * mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Likewise.
+
+2010-01-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Partial rewrite to
+       reduce memory usage.
+       * mpn/generic/sqrmod_bnm1.c (mpn_sqrmod_bnm1): Likewise.
+       (mpn_sqrmod_bnm1_next_size): New function.
+
+       * gmp-impl.h (mpn_mulmod_bnm1_itch): Accepts 3 parameters now.
+       (mpn_sqrmod_bnm1_itch): New inline function.
+       (mpn_sqrmod_bnm1_next_size): Declaration and mangling.
+       * mpn/generic/nussbaumer_mul.c: Use the new functions.
+
+       * mpn/generic/invertappr.c (mpn_ni_invertappr): Use new syntax for
+       mpn_mulmod_bnm1_itch.
+       * mpn/generic/mu_divappr_q.c (mpn_mu_divappr_q_itch): Likewise.
+       * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Likewise.
+       * mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Likewise.
+       * mpn/generic/mu_div_qr.c (mpn_mu_div_qr_itch): Likewise.
+       * mpn/generic/binvert.c (mpn_binvert_itch): Likewise.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL): Likewise.
+       (SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED): Likewise.
+
+       * tests/mpn/t-sqrmod_bnm1.c, tests/mpn/t-mulmod_bnm1.c: Test
+       reduced memory usage.
+
+2010-01-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (INSERT_FFTTAB): New macro, like old insertion code but
+       also inserting a sentinel.
+       (fftmes): Use INSERT_FFTTAB for inserting new measurements.
+       Limit k range to best_k - 4 ... best_k + 4.
+
+2010-01-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+       (__GMP_MP_RELEASE): New macro.
+
+       * mpf/div.c: Rewrite to use mpn_div_q.
+
+2010-01-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Add FFT_TABLE3 tables for a basic set of machines.
+
+       * configure.in: Use -mtune=nocona for 64-bit pentium4.
+
+       * config.guess: Recognise many more Intel processors.
+
+       * tune/common.c: Whitespace cleanup.
+       (speed_mpn_matrix22_mul): Rewrite.
+
+2010-01-21  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/nussbaumer_mul.c (mpn_nussbaumer_mul): Take
+       advantage of new mpn_mulmod_bnm1 interface, to reduce allocation.
+
+       * tests/mpn/t-mulmod_bnm1.c (ref_mulmod_bnm1, main): Adapted to
+       mpn_mulmod_bnm1 interface change.
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Interface change,
+       in case an + bn < rn, only write an + bn output limbs. New input
+       requirement, an + bn > rn/2.
+       * mpn/generic/sqrmod_bnm1.c (mpn_sqrmod_bnm1): Corresponding
+       changes.
+
+2010-01-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (fftmes): Round up initial n according to initial k.
+       Limit k to 24 in loop.  Remove an obsolete always-true condition.
+       Remove a redundant trace printout.
+
+2010-01-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (fftmes): New function
+       (fft): Rewrite.
+       (mpn_mul_fft_lcm): New function, copied from mpn/generic/mul_fft.c.
+       (fftfill): New function, code taken from mul_fft.c (mpn_mul_fft).
+       (cached_measure): New function.
+
+       * gmp-impl.h (struct fft_table_nk): Moved from mul_fft.c.
+       (MUL_FFT_TABLE3, SQR_FFT_TABLE3): Provide dummy versions for tuneup
+       builds.
+       (FFT_TABLE3_SIZE): Increase value for tuneup builds.
+
+       * mpn/generic/mul_fft.c: Handle a new FFT threshold table type ("3").
+       Misc cleanups to old table type code.
+
+2010-01-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/darwin.m4: Fix typo in last change.
+
+2010-01-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GMP_EXTERN_INLINE): Remove "extern" for newer Sun C.
+
+       * gmp-impl.h (GMP_LIMB_BYTES): New define.
+
+       * mpn/x86_64/darwin.m4 (LEA): New define.
+
+       * mpn/x86/invert_limb.asm (approx_tab): Use DEF_OBJECT.
+       Rename and globalise it to work around Mac OS bug.
+
+       With Philip McLaughlin:
+       * mpn/x86_64/gcd_1.asm (ctz_table): Don't use local prefix, but
+       use DEF_OBJECT...END_OBJECT.
+       Keep stack pointer at ABI mandated alignment over call.
+
+2010-01-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.c (routine): Remove obsolete mpn_dc_tdiv_qr and
+       mpn_dc_div_qr_n.
+       * tune/common.c (speed_mpn_dc_tdiv_qr, speed_mpn_dcpi1_div_qr_n):
+       Remove now unused functions.
+       * tune/speed.h (SPEED_ROUTINE_MPN_DC_DIVREM_N,
+       SPEED_ROUTINE_MPN_DC_DIVREM_SB, SPEED_ROUTINE_MPN_DC_TDIV_QR): Remove
+       now unused macros.
+
+       * mpn/x86_64/fat/fat_entry.asm (mpn_cpuid_available): Remove function.
+
+       * ltmain.sh: Upgrade from 1.5.24 to 2.2.6b.
+       * ylwrap: New file.
+       * .bootstrap: Remove explicit versions.
+
+       * doc/gmp.texi (Block-wise Barrett Division): New node.
+
+       * mpn/generic/powm.c: Change some #if to plain 'if' to avoid fat build
+       problems.
+
+2010-01-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV): Accept arguments for size
+       restrictions.
+       * tune/common.c (speed_mpn_sbpi1_div_qr, speed_mpn_dcpi1_div_qr,
+       (speed_mpn_sbpi1_divappr_q, speed_mpn_dcpi1_divappr_q): Pass size
+       limits for SPEED_ROUTINE_MPN_PI1_DIV.
+
+       * tune/speed.c (routine): Allow .r argument for mpn_sbpi1_divappr_q and
+       mpn_dcpi1_divappr_q.
+
+2010-01-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.0.0 released.
+
+       * mpn/generic/div_q.c: Handle mpn_*_divappr_q returning high limb
+       everywhere.
+
+2010-01-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Update MUL_FFT_TABLE2 and SQR_FFT_TABLE2 for many machines.
+
+       * mpn/generic/mu_div_q.c: Account for divisor truncation error as well
+       as mpn_mu_divappr_q's error.
+
+       * mpn/generic/mu_div_q.c: Handle mpn_preinv_mu_divappr_q returning a
+       high limb.
+
+       * tests/mpn/t-bdiv.c: Move a random call for debugability.
+       * tests/mpn/t-div.c: Likewise.
+
+       * mpn/generic/mu_divappr_q.c: Rewrite quotient round-up code.
+
+       * mpn/generic/mu_div_qr.c: Handle carry-out from a carry propagation
+       subtract.
+       * mpn/generic/mu_divappr_q.c: Likewise.
+
+       * mpn/generic/mu_divappr_q.c
+       (mpn_preinv_mu_divappr_q, mpn_mu_divappr_q): Declare dividend constant.
+       * gmp-impl.h: Likewise.
+
+       * perfpow.c (mpn_perfect_power_p): Call mpn_divexact instead of
+       mpn_bdiv_q (with too little scratch space!).
+
+       From Niels Möller:
+       * tests/mpn/t-div.c (check_one): Get rid of the poorly managed variable
+       tn.
+
+       * mpn/minithres/gmp-mparam.h: Add all lately defined thresholds.
+
+       * mpn/generic/div_q.c: Use SB division for small quotients as well as
+       small divisors.  Fix typo in itch call.
+
+2010-01-06  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-div.c (check_one): Checking based on multiplication,
+       refmpn_mul, rather than refmpn_tdiv_qr.
+
+2010-01-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom8h_mul.c: Avoid overflows of mp_size_t.
+
+2010-01-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP__): Bump.
+       (__GNU_MP_VERSION,__GNU_MP_VERSION_MINOR,__GNU_MP_VERSION_PATCHLEVEL):
+       Bump version info.
+       * mp-h.in (__GNU_MP__): Bump.
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+       * doc/gmp.texi: Rewrite mpn_gcdext text.  Remove some out-of-date
+       text in Algorithms chapter.
+
+       * mpn/generic/div_q.c: Properly handle np=scratch.  Fix critical typo
+       in final adjustment code.  Misc cleanups.
+
+       * mpn/generic/rootrem.c: Use mpn_div_q.
+       * mpz/tdiv_q.c: Likewise.
+
+       * tests/mpn/t-div.c: Test mpn_div_q.
+       (SIZE_LOG): Up to 17.
+
+       * mpn/generic/div_q.c: New file.
+       * configure.in (gmp_mpn_functions): Add div_q.
+
+       * mpn/generic/mu_div_q.c: Actually declare dividend constant.
+
+2010-01-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (fft): Separate tuning of modf and full products.
+       (struct fft_param_t): New field, mul_modf_function.
+       (tune_fft_sqr): Fix typo.
+       (tune_fft_mul, tune_fft_sqr): Initialise mul_modf_function field.
+       * tune/common.c (speed_mpn_fft_mul, speed_mpn_fft_sqr): New functions.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED): Clean up.
+
+       * mpn/generic/mul.c: Simplify rational expression.
+
+       * gmp-impl.h: Cleanup threshold variables; remove obsolete ones and
+       make all possibly needed definitions for existing ones.
+       * tune/tuneup.c (tune_mul): Write fractions-compensated values to
+       threshold variables.
+
+2010-01-03 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tune/common.c, tune/speed.c, tune/speed.h: Support measuring
+       mpn_toom43_mul.
+
+       * mpn/generic/toom_interpolate_6pts.c: Small reorganisation.
+
+2010-01-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD): Default to
+       INV_MULMOD_BNM1_THRESHOLD/2 instead.
+
+       * gmp-impl.h (INV_APPR_THRESHOLD, INV_MULMOD_BNM1_THRESHOLD): Default
+       here...
+       * mpn/generic/invert.c, mpn/generic/invertappr.c: ...not here.
+
+       * tests/mpn/t-div.c: Rewrite operand generation code.
+
+2010-01-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD): Default to
+       INV_MULMOD_BNM1_THRESHOLD.
+
+2010-01-02  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/dcpi1_div_q.c: Handle divappr approximation problem more
+       efficiently.
+       * mpn/generic/mu_div_q.c: Likewise.
+
+       * mpn/generic/invert.c: Remove duplicated code.
+
+2010-01-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD): Default to 0.
+
+       * mpn/generic/mu_div_qr.c: Rewrite to use mpn_mulmod_bnm1.  Clean up
+       scratch usage.  Improve itch functions.
+       * mpn/generic/mu_divappr_q.c: Likewise.
+       * mpn/generic/mu_bdiv_qr.c: Likewise.
+       * mpn/generic/mu_div_q.c: Likewise.
+
+       * mpn/generic/dcpi1_bdiv_qr.c: Add parameter ASSERTs.
+       * mpn/generic/dcpi1_bdiv_q.c: Likewise.
+
+       * tests/mpn/t-bdiv.c: Replace with unit testing code, based on t-div.c.
+       Increase COUNT to 500.
+
+       * tests/mpn/t-div.c: Avoid generating too small test operands.
+       Move SB suppression limit downwards.  Increase COUNT to 200.
+
+2009-12-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/tdiv_qr.c: Handle numerator/remainder overlap in MU case.
+
+       * tests/tests.h (TESTS_REPS): New macro.
+       * tests/mpz/dive.c: Use larger operands, decrease default reps, use
+       TESTS_REPS.
+       * tests/mpz/convert.c: Likewise.
+       * tests/mpz/t-sqrtrem.c: Likewise.
+       * tests/mpz/reuse: Likewise.
+       * tests/mpz/t-root.c: Likewise.
+       * tests/mpz/t-tdiv.c: Likewise.
+       * tests/mpz/t-gcd.c: Likewise.
+       * tests/mpz/t-powm.c: Likewise.
+
+2009-12-31  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom8_sqr.c (SQR_TOOM8_MAX): Avoid overflow.
+       * mpn/generic/toom6_sqr.c (SQR_TOOM6_MAX): Likewise.
+
+       * mpn/generic/mulmod_bnm1.c: Don't mention MISUSE any more,
+       simply consider UNLIKELY any unexpected size.
+
+2009-12-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (speed_mpn_sbordcpi1_div_qr): New function.
+       (tune_mu_div): Use it.
+
+2009-12-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mu_bdiv, tune_dc_bdiv, tune_mu_div)
+       (tune_dc_div): Clear global s.r to make speed functions do 2n/n.
+
+       * tune/speed.c (routine): New entries for mpn_mu_div_qr and
+       mpn_mupi_div_qr.  Allow .r parameter for mpn_sbpi1_div_qr,
+       mpn_dcpi1_div_qr.
+       * tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV, SPEED_ROUTINE_MPN_MU_DIV_QR)
+       (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Handle .r parameter.
+
+       * tests/mpz/t-tdiv.c: Increase operands size again.
+
+       * mpn/generic/tdiv_qr.c: Attempt to choose between DC and MU cleverer.
+
+       * mpn/generic/tdiv_qr.c: Don't overwrite rp with unnecessary temporary
+       alloc.
+
+2009-12-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mu_div): Tune MUPI_DIV_QR_THRESHOLD.
+       * tune/speed.h (struct speed_params): Allow 3 source operands.
+       (SPEED_ROUTINE_MPN_MUPI_DIV_QR): New macro.
+       * tune/common.c (speed_mpn_mupi_div_qr): New function.
+
+       * mpn/generic/tdiv_qr.c: Call mpn_mu_div_qr.
+
+       * tests/mpz/t-tdiv.c: Use larger test operands.
+
+       * mpn/generic/mu_div_qr.c (mpn_mu_div_qr2): Remove code for dn==1.
+
+       * mpz/mul.c: Call mpn_sqr directly.  Use PTR,SIZ,ALLOC.
+
+       * tune/tuneup.c (tune_mu_div): Set min_size to 6, DC functions require
+       this.
+
+       * tests/mpn/t-div.c: Call mu_div functions with operands that generate
+       a high quotient limb.
+
+       * mpn/generic/mu_div_qr.c: Rewrite to return a high quotient limb,
+       to let dividend argument be constant, and as a general cleanup.
+       * mpn/generic/mu_divappr_q.c: Likewise.
+       * mpn/generic/mu_div_q.c: Likewise.
+       * gmp-impl.h: Update declarations of changed functions.
+
+       * mpn/generic/invertappr.c (mpn_invertappr): Allocate scratch space
+       when caller passed NULL.
+
+2009-12-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom_couple_handling.c: Prefix name with mpn_.
+       * gmp-impl.h: Likewise.
+       * mpn/generic/toom63_mul.c: Likewise.
+       * mpn/generic/toom6_sqr.c: Likewise.
+       * mpn/generic/toom6h_mul.c: Likewise.
+       * mpn/generic/toom8_sqr.c: Likewise.
+       * mpn/generic/toom8h_mul.c: Likewise.
+
+       * configure.in (gmp_mpn_functions_optional) Move "com" from here...
+       (gmp_mpn_functions): ...to here.
+       * mpn/generic/com.c: New file.
+       * (mpn_com): New name for mpn_com_n.  Make public.
+       * (mpn_neg): Analogous changes.
+
+       * tune/tuneup.c (tune_mu_div, tune_mu_bdiv): Set step_factor.
+
+       * tune/common.c, tune/speed.c, tune/speed.h: Support measuring
+       mpn_lshiftc.
+
+       * tests/devel/try.c: Test mpn_lshiftc.
+       * tests/refmpn.c (refmpn_com): New function.
+       (refmpn_lshiftc): Likewise.
+
+       * configure.in (gmp_mpn_functions_optional) Move lshiftc from here...
+       (gmp_mpn_functions): ...to here.
+       * mpn/generic/lshiftc.c: New file.
+       * mpn/x86_64/lshiftc.asm: New file.
+       * mpn/x86_64/core2/lshiftc.asm: New file.
+       * mpn/generic/mul_fft.c (mpn_lshiftc): Remove.
+
+       * mpn/x86_64/core2/lshift.asm: Tweak for better Core iN performance.
+       * mpn/x86_64/core2/rshift.asm: Likewise.
+
+2009-12-27  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mul.c: Use toom6h and toom8h for almost balanced.
+
+       * mpn/generic/mullo_n.c (mpn_dc_mullo_n): New ratio, to be used in
+       Toom-8 range.
+
+2009-12-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * (mpn_sqr): New name for mpn_sqr_n.  Many files affected.
+
+       * tune/tuneup.c (tune_mullo): Up step_factor for MULLO_MUL_N_THRESHOLD.
+       (tune_invertappr, tune_invert, tune_binvert): Let max_size default.
+
+       * tune/tuneup.c (tune_mu_div, tune_mu_bdiv) New functions.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MU_DIV_Q): New macro.
+       (SPEED_ROUTINE_MPN_MU_DIV_QR): Likewise.
+       (SPEED_ROUTINE_MPN_MU_BDIV_Q): Likewise.
+       (SPEED_ROUTINE_MPN_MU_BDIV_QR): Likewise.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add bdiv_q.c and bdiv_qr.c.
+       * tune/common.c (speed_mpn_mu_div_qr): New function.
+       (speed_mpn_mu_divappr_q): Likewise.
+       (speed_mpn_mu_div_q): Likewise.
+       (speed_mpn_mu_bdiv_q): Likewise.
+       (speed_mpn_mu_bdiv_qr): Likewise.
+
+       * mpn/*/gmp-mparam.h: Fix incorrect MOD_1U_TO_MOD_1_1_THRESHOLD 0
+       values.
+
+       * gmp-impl.h (MODEXACT_1_ODD_THRESHOLD): Remove.
+       (BMOD_1_TO_MOD_1_THRESHOLD): New parameter, with the reverse meaning of
+       MODEXACT_1_ODD_THRESHOLD.
+       (MPN_MOD_OR_MODEXACT_1_ODD): Use BMOD_1_TO_MOD_1_THRESHOLD.
+       * mpn/generic/divis.c, mpz/{cong.c,cong_ui.c,divis_ui.c}: Likewise.
+       * tune/tuneup.c (tune_modexact_1_odd): Tune BMOD_1_TO_MOD_1_THRESHOLD;
+       Do not assume native mpn_modexact_1_odd is faster than mpn_mod_1.
+       (tuned_speed_mpn_mod_1): Remove variable.
+       (tune_mod_1): Fix thinkos.  Suppress printing of "always" etc.
+       (all): Measure for divrem_1, mod_1, divexact_1, etc first, since Toom
+       depends on some of them.
+
+       * mpn/generic/toom22_mul.c (TOOM22_MUL_REC): New name for
+       TOOM22_MUL_MN_REC.
+
+2009-12-26  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-toom32.c (MIN_AN, MIN_BN, MAX_BN): Relax
+       requirements a bit.
+
+       * mpn/generic/toom32_mul.c (mpn_toom32_mul): Relax requirement on
+       input sizes, to support s+t>=n (used to be s+t>=n+2). Keep high
+       limbs of the evaluated values in scalar variables.
+
+       * mpn/generic/sbpi1_divappr_q.c (mpn_sbpi1_divappr_q): Remove
+       unused variables.
+
+       * mpn/generic/toom32_mul.c (mpn_toom32_mul): Fixed left-over use
+       of mpn_addsub_n which should be mpn_add_n_sub_n.
+
+2009-12-26  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add new toom files (spotted by Torbjorn).
+
+       * gmp-impl.h (mpn_toom6_sqr_itch): Rename to mpn_toom6_mul_n_itch and redefine.
+       (mpn_toom8_sqr_itch): Rename to mpn_toom8_mul_n_itch and redefine.
+       * mpn/generic/mul_n.c: Use renamed _itch macros.
+
+2009-12-25  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-toom32.c (MIN_AN, MIN_BN, MAX_BN): Tightened requirements.
+       * gmp-impl.h (mpn_toom32_mul_itch): Updated. Less scratch needed
+       by toom32 itself, and also the pointwise multiplications are
+       currently mpn_mul_n with no supplied scratch.
+       * mpn/generic/toom32_mul.c (mpn_toom32_mul): Reorganized
+       interpolation to use less scratch space. No longer supports the
+       most extreme size ratios.
+
+2009-12-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_preinv_mod_1): Purge.
+       (tune_mod_1): Use speed_mpn_mod_1_tune for
+       PREINV_MOD_1_TO_MOD_1_THRESHOLD
+
+       * mpn/generic/dcpi1_divappr_q.c: Handle 2n/n properly.  Don't use full
+       precision in mpn_sbpi1_divappr_q call.  Misc cleanup.
+
+       * tune/tuneup.c (tune_mod_1): Add a check_size for
+       PREINV_MOD_1_TO_MOD_1_THRESHOLD.
+
+2009-12-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/mod_1_div.c (MOD_1N_TO_MOD_1_1_THRESHOLD,
+       (MOD_1U_TO_MOD_1_1_THRESHOLD): Set.
+       * tune/mod_1_inv.c (MOD_1N_TO_MOD_1_1_THRESHOLD,
+       (MOD_1U_TO_MOD_1_1_THRESHOLD): Set.
+
+       * gmp-impl.h (USE_PREINV_MOD_1): Remove.
+       (MPN_MOD_OR_PREINV_MOD_1): Define to choose functions dynamically in
+       terms of PREINV_MOD_1_TO_MOD_1_THRESHOLD (used to choose statically
+       using USE_PREINV_MOD_1).
+       * mpn/generic/perfsqr.c (PERFSQR_MOD_PP): Corresponding updates.
+
+       * tune/tuneup.c (tune_mod_1): Rewrite.
+       * gmp-impl.h (MOD_1N_TO_MOD_1_1_THRESHOLD): New.
+       (MOD_1U_TO_MOD_1_1_THRESHOLD): New name for MOD_1_1_THRESHOLD.
+       (MOD_1_1_TO_MOD_1_2_THRESHOLD): Mew name for MOD_1_2_THRESHOLD.
+       (MOD_1_2_TO_MOD_1_4_THRESHOLD): New name for MOD_1_4_THRESHOLD.
+       * mpn/generic/mod_1.c: Corresponding updates.
+
+2009-12-24  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mul_n.c: Use also toom6h and toom8h.
+       * mpn/generic/sqr_n.c: Use also toom6 and toom8.
+       * gmp-impl.h: Initial support for tuning of Toom-6half and Toom-8half.
+       * tune/tuneup.c: Tune Toom-6half and Toom-8half thresholds.
+
+2009-12-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_4.c: Get ASSERT right.
+       * mpn/generic/mod_1_3.c: Likewise.
+       * mpn/generic/mod_1_2.c: Likewise.
+
+       * mpn/generic/powm_sec.c: Use SQR_TOOM2_THRESHOLD as limit for a native
+       mpn_sqr_basecase, not TUNE_SQR_TOOM2_MAX.
+
+2009-12-23  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+       mpn_toom8h_mul and mpn_toom8_sqr speed.
+
+       * mpn/generic/toom_eval_pm2exp.c: Fix ASSERTs.
+
+       * mpn/generic/toom8h_mul.c: New file.
+       * mpn/generic/toom8_sqr.c: New file.
+       * mpn/generic/toom_interpolate_16pts.c: New file.
+       * gmp-impl.h: Provide corresponding declarations.
+       * configure.in (gmp_mpn_functions): List toom_interpolate_16pts,
+       toom8h_mul, and toom8h_sqr.
+       * tests/mpn/t-toom8h.c: New test program.
+
+       * mpn/generic/toom6_sqr.c: New file, was part of toom6h_mul.
+       * mpn/generic/toom6h_mul.c: Removed _sqr.
+
+       * mpn/generic/mulmod_bnm1.c: Nailify CRT.
+       * mpn/generic/sqrmod_bnm1.c: Likewise.
+
+       * mpn/generic/mullo_n.c: Split dc_mullo_n function;
+       ALLOC memory at once.
+
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Update.
+
+       * mpn/generic/toom6h_mul.c: Add prefix to toom_interpolate_12pts.
+       * mpn/generic/toom_interpolate_12pts.c: Likewise.
+
+       * mpn/generic/invertappr.c (mpn_bc_invertappr): Use mpn_divrem_2.
+       * mpn/generic/invert.c: Faster basecase, use mpn_sbpi1_div_q.
+
+       * mpn/generic/toom_eval_pm2exp.c: Assert support for degree 3.
+       * mpn/generic/toom6h_mul.c: Avoid obsolete _itch function.
+
+2009-12-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+       mpn_mod_1_1p, mpn_mod_1s_2p, mpn_mod_1s_3p, mpn_mod_1s_4p.
+
+       * tests/mpz/t-powm.c: Test mpz_powm_sec.
+
+       * mpz/powm_sec.c: New file.
+       * gmp-h.in: Declare it.
+       * Makefile.am, mpz/Makefile.am: Compile it.
+       * doc/gmp.texi: Document it.
+
+       * mpn/generic/powm_sec.c (mpn_powm_sec_itch): New function.
+       (mpn_powm_sec): Use passed scratch, no local allocation.
+       Allow exp argument = 1.
+       (win_size): Start loop from 1.
+
+       * mpn/generic/powm.c (win_size): Start loop from 1.
+
+2009-12-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-div.c: New file.
+       * tests/mpn/Makefile.am: Compile it.
+
+       * mpn/generic/mu_divappr_q.c: Handle quotient overflow.
+
+       * mpn/generic/mu_div_q.c (mpn_mu_div_q_itch): New function.
+
+2009-12-22  Niels Möller  <<nisse@lysator.liu.se>>
+
+       * mpn/generic/sbpi1_div_q.c: Use udiv_qr_3by2.  Intended to change
+       nothing after preprocessing.
+
+       * mpn/generic/sbpi1_divappr_q.c: For the last call to udiv_qr_3by2,
+       avoid using memory locations as output parameters, and revert to
+       explicitly copying n1 and n0 to memory.
+
+       * gmp-impl.h (udiv_qr_3by2): Tweaked to expand to precisely the
+       same code as was used before the introduction of this macro.
+       Eliminated some local variables, instead do multiple updates to
+       the output parameters.
+
+2009-12-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-toom6h.c (MIN_AN): Set to MUL_TOOM6H_THRESHOLD to avoid
+       invalid recursive sizes.
+
+       * tests/mpn/t-bdiv.c: Get itch function calls right.
+
+       * mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Rewrite.
+       * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Simplify.
+
+       * mpn/generic/bdiv_qr.c (mpn_bdiv_qr): Simplify, don't allocate.
+       (mpn_bdiv_qr_itch): Conditionalise on MU_BDIV_QR_THRESHOLD.
+
+2009-12-18  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-bdiv.c: Add red-zones.
+
+2009-12-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/sbpi1_div_q.c: Fix fixup code to work for qn = 0.
+
+       * mpn/generic/dcpi1_divappr_q.c: Handle qn = 1 and qn = 2 for initial
+       quotient block (code block copied from dcpi1_div_qr.c).
+
+       * mpn/generic/dcpi1_div_qr.c: Rewrite singular case giving q limb of
+       GMP_NUMB_MAX.  Remove an impossible qn = 0 case.
+
+       * mpn/generic/dcpi1_bdiv_q.c: Remove a spurious mpn_sub_1.
+
+       * mpn/generic/mul.c: Put back call to mpn_mul_n.
+
+       * tune/tuneup.c (all): Call tune_mulmod_bnm1 before tuning fft due to
+       dependency on mulmod_bnm1 from both mul_fft_mul and from mullo_n.
+
+       * mpn/generic/dcpi1_divappr_q.c: ASSERT that dn >= 6 and nn > dn.
+       * mpn/generic/dcpi1_div_q.c: ASSERT that dn >= 6 and nn-dn >= 3.
+       * mpn/generic/dcpi1_div_qr.c: ASSERT that dn >= 6 and nn-dn >= 3.
+
+       * mpn/generic/bdiv_q_1.c (mpn_pi1_bdiv_q_1): Renamed from
+       mpn_bdiv_q_1_pi1.
+       * All references changed.
+
+       * configure.in: Add --enable-old-fft-full.
+       * tune/speed.c (routine): Conditionalise mpn_mul_fft_full references on
+       WANT_OLD_FFT_FULL.
+       * tune/common.c (speed_mpn_mul_fft_full)
+       (speed_mpn_mul_fft_full_sqr): Likewise.
+       * mpn/generic/mul_fft.c (mpn_mul_fft_full): Include iff
+       WANT_OLD_FFT_FULL.
+
+2009-12-21  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (mpn_toom6h_mul_itch): New inline function.
+       (MUL_TOOM6H_THRESHOLD): Default value.
+       (SQR_TOOM6_THRESHOLD): Default value.
+       * mpn/generic/toom6h_mul.c: Remove definitions moved to gmp-impl.h.
+       * tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+       mpn_toom6h_mul and mpn_toom6_sqr speed.
+
+       * mpn/generic/toom63_mul.c: Remove unused TMP_*.
+
+       * mpn/generic/toom_eval_pm2rexp.c: New file.
+       * gmp-impl.h: Provide corresponding declaration.
+       * configure.in (gmp_mpn_functions): List toom_eval_pm2rexp.
+       * mpn/generic/toom6h_mul.c: Use shared toom_eval_pm2rexp.
+
+       * mpn/generic/toom_couple_handling.c: New file, helper function
+       for high degree Toom.
+       * gmp-impl.h: Provide corresponding declaration.
+       * configure.in (gmp_mpn_functions): List toom_couple_handling.
+       * mpn/generic/toom6h_mul.c: Use shared toom_couple_handling.
+       * mpn/generic/toom63_mul.c: Likewise.
+
+       * mpn/generic/toom6h_mul.c: New file.
+       * mpn/generic/toom_interpolate_12pts.c: New file.
+       * gmp-impl.h: Provide corresponding declarations.
+       * configure.in (gmp_mpn_functions): List toom_interpolate_12pts,
+       toom6h_mul.
+       * tests/mpn/t-toom6h.c: New test program.
+
+       * tests/mpn/t-mulmod_bnm1.c (ref_mulmod_bnm1): Use ref_mul.
+       * tests/mpn/t-sqrmod_bnm1.c (ref_sqrmod_bnm1): Likewise.
+
+2009-12-20  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): New CRT.
+       * mpn/generic/sqrmod_bnm1.c (mpn_sqrmod_bnm1): Likewise.
+
+2009-12-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Change all bit counts for bignums to use mp_bitcnt_t.
+
+       * mpn/generic/bdivmod.c: File removed.  All references purged.
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft_full): Disable.
+
+       * gmp-impl.h: Define mpn_fft_mul as an alias for mpn_nussbaumer_mul.
+       * mpn/generic/mul.c: Refer mpn_fft_mul.
+       * mpn/generic/mul_n.c: Likewise.
+       * mpn/generic/sqr_n.c: Likewise.
+       * mpn/generic/mullo_n.c: Likewise.
+
+       * mpn/generic/mul.c: Loop also over mpn_nussbaumer_mul, as suggested by
+       Marco.  Use TMP_SALLOC_LIMBS in more places.  Clean up ws allocation.
+
+2009-12-19  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_8pts.c: Nailify.
+
+2009-12-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul.c: Major rewrite.  Use toom43, toom53, toom63.
+       Call mpn_nussbaumer_mul for largest operands.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL): New macro.
+       (SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL): New macro.
+       (SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL): New macro.
+       (SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL): New macro.
+       (SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL): New macro.
+       (SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL): New macro.
+       * tune/common.c (speed_mpn_toom63_mul): New function.
+       (speed_mpn_toom32_for_toom43_mul): New function.
+       (speed_mpn_toom43_for_toom32_mul): New function.
+       (speed_mpn_toom32_for_toom53_mul): New function.
+       (speed_mpn_toom53_for_toom32_mul): New function.
+       (speed_mpn_toom42_for_toom53_mul): New function.
+       (speed_mpn_toom53_for_toom42_mul): New function.
+       * tune/tuneup.c (tune_mul_n): New name for old tune_mul.
+       (tune_sqr_n): New name for old tune_sqr.
+       (tune_mul): New function, for unbalanced multiplication.
+       * gmp-impl.h: Provide declarations for corresponding threshold vars.
+
+       * gmp-impl.h (mpn_rsh1add_nc, mpn_rsh1sub_nc): Declare.
+       * mpn/asm-defs.m4: Likewise.
+       * configure.in: Add corresponding HAVE_NATIVEs.
+       * mpn/x86_64/rsh1aors_n.asm: Add _nc entry point.
+
+2009-12-18  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/divexact.c: Rewrite to use mpn_divexact.
+
+       * mpn/generic/bdiv_q_1.c (mpn_bdiv_q_1): Deleted some unused
+       variables.
+
+       * mpn/generic/toom52_mul.c (mpn_toom52_mul)
+       [HAVE_NATIVE_mpn_add_n_sub_n]: Moved declaration of cy to avoid a
+       compiler warning.
+
+       * gmp-impl.h (gmp_pi1_t): Eliminated inv21 member.
+       (invert_pi1): ...and don't store it here.
+
+       * mpn/generic/toom63_mul.c (mpn_toom63_mul): Simplified
+       calculation of block size n.
+       * gmp-impl.h (mpn_toom63_mul_itch): Likewise.
+
+       * mpn/generic/toom_eval_pm2exp.c (mpn_toom_eval_pm2exp): Fixed
+       output asserts.
+
+2009-12-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-toom63.c: New test program.
+
+2009-12-18  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/invert.c: Nailify.
+       * mpn/generic/invertappr.c: Nailify.
+       * mpn/generic/mulmod_bnm1.c: Nailify.
+       * mpn/generic/sqrmod_bnm1.c: Nailify.
+
+       * tests/mpn/t-invert.c: New test program.
+
+       * mpn/generic/toom63_mul.c: New file.
+       * mpn/generic/toom_interpolate_8pts.c: New file.
+       * gmp-impl.h: Provide corresponding declarations.
+       * configure.in (gmp_mpn_functions): List toom_interpolate_8pts and
+       toom63_mul.
+
+2009-12-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul.c: Move allocation of ws to where it is used.
+       Identify toom22, 32, 42, in that order (in two places).  Use midline
+       between toom22, 32, 42.
+       * mpn/generic/toom22_mul.c (TOOM22_MUL_MN_REC): Call also
+       mpn_toom32_mul.
+
+       * doc/gmp.texi: Update References section.  Update Contributors
+       section.  Misc updates.
+
+       * gmp-impl.h: Renew default values for all THRESHOLDs.
+
+2009-12-17  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/divexact.c (mpn_divexact): Don't require that the
+       dividend is normalized. Use MPN_DIVREM_OR_PREINV_DIVREM_1. When
+       shifting, allocate and process only the low qn+1 limbs. Eliminated
+       code for the impossible case nn < qn.
+
+       * mpn/generic/dcpi1_div_qr.c (mpn_dcpi1_div_qr): Added some input
+       asserts.
+
+       * mpn/generic/dcpi1_div_qr.c (mpn_dcpi1_div_qr): In the case that
+       the initial quotient block is a single limb, use 3/2 division,
+       thereby eliminating the only use of gmp_pi1_t->inv21.
+
+2009-12-17  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/invert.c: Added some comment.
+       * mpn/generic/invertappr.c: Slightly better threshold handling.
+       * gmp-impl.h (INV_NEWTON_THRESHOLD): Default to 200.
+
+       * mpn/generic/nussbaumer_mul.c: New file.
+       * configure.in (gmp_mpn_functions): Add nussbaumer_mul.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add nussbaumer_mul.
+       * gmp-impl.h (mpn_nussbaumer_mul): Added prototype and name-mangling.
+       * tune/speed.h (speed_mpn_nussbaumer_mul): Declare function.
+       * tune/common.c (speed_mpn_nussbaumer_mul): New function.
+       * tune/speed.c (routine): Add speed_mpn_nussbaumer_mul.
+
+       * mpn/generic/sqrmod_bnm1.c: New file.
+       * configure.in (gmp_mpn_functions): Add sqrmod_bnm1.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add sqrmod_bnm1.
+       * gmp-impl.h (mpn_sqrmod_bnm1): Added prototype and name-mangling.
+       (SQRMOD_BNM1_THRESHOLD): support for the new threshold.
+       * tune/speed.h (speed_mpn_sqrmod_bnm1): Declare function.
+       * tune/common.c (speed_mpn_sqrmod_bnm1): New function.
+       * tune/speed.c (routine): Add speed_mpn_sqrmod_bnm1.
+       * tests/mpn/t-mulmod_bnm1.c: Attribution.
+       * tests/mpn/t-sqrmod_bnm1.c: New test file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add t-sqrmod_bnm1.
+
+       * tune/tuneup.c: Tune SQRMOD_BNM1_THRESHOLD.
+
+       * mpn/generic/nussbaumer_mul.c (mpn_nussbaumer_mul): Mimic fft_mul,
+       use squaring if operands coincide.
+       * tune/speed.h (speed_mpn_nussbaumer_mul_sqr): Declare function.
+       * tune/common.c (speed_mpn_nussbaumer_mul_sqr): New function.
+       * tune/speed.c (routine): Add speed_mpn_nussbaumer_mul_sqr.
+
+2009-12-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/bdiv_q.c (mpn_bdiv_q_itch): Rewrite.
+
+2009-12-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-bdiv.c (bdiv_q_valid_p, bdiv_qr_valid_p): Call refmpn_mul
+       instead of refmpn_mul_basecase.
+       * tests/mpn/toom-shared.h: Likewise.
+       * tests/refmpn.c (refmpn_mullo_n,refmpn_sqr,refmpn_mul_any): Likewise.
+
+       * minithres/gmp-mparam.h: Add new thresholds, trim old values.
+
+       * mpn/generic/powm.c: Use mp_bitcnt_t for bit counts.
+       Handle REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD in
+       non-WANT_REDC_2 INNERLOOP expansion code.
+       * mpn/generic/powm_sec.c: Use mp_bitcnt_t for bit counts.
+
+2009-12-16  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-gcd.c (main): Added test case to exercise the
+       unlikely u0 == u1 case in mpn_gcdext_lehmer_n.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Get ASSERT
+       right.
+
+2009-12-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-mul.c: Misc cleanups.
+       (mul_basecase): Remove.
+       (ref_mpn_mul): Remove.
+       * tests/refmpn.c (refmpn_mul): New function, mainly from t-mul.c's
+       ref_mpn_mul.
+       (refmpn_mullo_n): Add a missing free.
+
+       * tune/speed.c (routine): Measure speed_mpn_{sb,dc}pi1_div_qr,
+       mpn_{sb,dc}pi1_divappr_q, mpn_{sb,dc}pi1_bdiv_qr, and
+       mpn_{sb,dc}pi1_bdiv_q.
+
+       * mpn/generic/invertappr.c: New file, meat from invert.c.
+       * mpn/generic/invert.c: Leave just mpn_invert.c.
+       * configure.in (gmp_mpn_functions): Add invertappr.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add invertappr.c.
+       * gmp-impl.h (mpn_invert_itch, mpn_invertappr_itch): New macros.
+
+2009-12-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/gcdext_subdiv_step.c: Get an ASSERT right.
+
+2009-12-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/sbpi1_div_qr.c (mpn_sbpi1_div_qr): A very small step
+       towards nail support.
+
+2009-12-15  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (mpn_ni_invertappr): Added prototype and name-mangling.
+       * mpn/generic/mulmod_bnm1.c: Comment representation of class [0].
+
+2009-12-14  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/sbpi1_divappr_q.c (mpn_sbpi1_divappr_q): Use
+       udiv_qr_3by2.
+
+2009-12-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_binvert): Remove BINV_MULMOD_BNM1_THRESHOLD
+       tuning, it was always zero and caused BINV_NEWTON_THRESHOLD to be
+       wrong (as pointed out by Marco).
+       * (BINV_MULMOD_BNM1_THRESHOLD): Clean from other files too.
+
+2009-12-14  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/invert.c: Improved comments.
+       (mpn_bc_invertappr): Conditionally re-enable mpn_dcpi1_divappr_q.
+
+2009-12-14  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (udiv_qr_3by2): Fix typo in argument list.
+
+2009-12-13  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (udiv_qr_3by2): New macro.
+       * mpn/generic/sbpi1_div_qr.c (mpn_sbpi1_div_qr): Use udiv_qr_3by2.
+
+2009-12-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/dcpi1_divappr_q.c (mpn_dcpi1_divappr_q): Avoid a buffer
+       overrun.
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft_full): Handle carry-out from 2nd
+       mpn_mul_fft, add an ASSERT for the 1st mpn_mul_fft.  Replace some
+       comments on cc's range with ASSERTs.
+
+       * mpn/generic/gcdext.c (compute_v): Normalise tp[] after mpn_mul.
+
+       * mpz/powm.c: Rework buffer handling.
+
+2009-12-13  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/toom-shared.h (main): Use refmpn_mul_basecase to check
+       results (slow!). Iteration counts of all toom tests reduced
+       considerably.
+
+2009-12-13  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/invert.c (mpn_invertapp): Split in _bc and _ni.
+       (mpn_bc_invertappr): New function, the basecase.
+       (mpn_ni_invertapp): New function, Newton iteration.
+       (mpn_invert): Use mpn_ni_invertapp.
+       * tune/tuneup.c (tune_invert): Min for INV_APPR_THRESHOLD.
+       (tune_invertappr): Min for INV_NEWTON_THRESHOLD.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_NI_INVERTAPPR): New macro.
+       (speed_mpn_ni_invertappr): Declare function.
+       * tune/common.c (speed_mpn_ni_invertappr): New function.
+       * tune/speed.c (routine): Add speed_mpn_ni_invertappr.
+
+       * tune/tuneup.c (tune_invertappr): Use speed_mpn_ni_invertappr to
+       tune INV_MULMOD_BNM1_THRESHOLD.
+
+2009-12-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Rewrite.
+
+2009-12-12  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpn/t-mulmod_bnm1.c (main): Disable B^n+1 stressing test
+       for odd sizes.
+
+       * mpn/generic/invert.c: Complete rewrite. Uses Newton iterations.
+       * gmp-impl.h (mpn_invertappr): Added prototype and name-mangling.
+       (mpn_invertappr_itch): Added prototype and name-mangling.
+       (INV_APPR_THRESHOLD): Support for a new tunable const.
+       * tune/speed.h (SPEED_ROUTINE_MPN_INVERTAPPR): New macro.
+       (speed_mpn_invertappr): Declare function.
+       * tune/common.c (speed_mpn_invertappr): New function.
+       * tune/speed.c (routine): Add speed_mpn_invertappr.
+       * tune/tuneup.c (tune_invertappr): New function: was tune_invert.
+       (tune_invert): Now tune only INV_APPR_THRESHOLD.
+       (all): Enable call to tune_invert and tune_invertappr.
+
+2009-12-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/binvert.c: Use mpn_mulmod_bnm1 instead of FFT wrapping.
+       Old, evidently broken wrapping code removed.
+       * tune/tuneup.c (tune_binvert): Tune BINV_MULMOD_BNM1_THRESHOLD.
+       * gmp-impl.h: Provide declarations for corresponding threshold var.
+
+       * tests/mpn/t-bdiv.c (COUNT): Decrease to keep run time reasonable.
+
+       * tune/tuneup.c (tune_invert): Tune INV_MULMOD_BNM1_THRESHOLD.
+       * gmp-impl.h: Provide declarations for corresponding threshold var.
+
+       * tests/mpn/t-mulmod_bnm1.c: Avoid a division by zero.
+
+       * configure.in: Set up different paths for different 64-bit sparc
+       processors.
+       * mpn/sparc64/ultrasparc34/gmp-mparam.h: New file.
+
+2009-12-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/*/gmp-mparam.h: Regenerate many of these files.
+
+2009-12-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (mpn_divexact): Removed scratch pointer from
+       prototype.
+       * mpn/generic/gcdext.c (divexact): Deleted, moved to...
+       * mpn/generic/divexact.c (mpn_divexact): New implementation (moved
+       from gcdext.c). The bidirectional divexact is kept but #if:ed out.
+       Interface change, since the new code doesn't take a scratch
+       argument.
+
+       * tests/mpn/t-mulmod_bnm1.c (main): Ensure that an >= bn. Lowered
+       MIN_N to 1. Various fixes to handle n == 1 properly.
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Small interface
+       change, require an >= bn.
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Fixed non-recursive
+       case to not write beyond end of result area.
+
+2009-12-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL): New macro, made
+       from now deleted SPEED_ROUTINE_MPN_MULMOD_BNM1.
+       * tune/common.c (speed_mpn_bc_mulmod_bnm1): New function.
+       (speed_mpn_mulmod_bnm1): Use SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL.
+       * tune/speed.c (routine): Add mpn_bc_mulmod_bnm1.
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1_next_size): Rewrite.
+
+       * tune/tuneup.c (tune_mulmod_bnm1): Rewrite.
+
+2009-12-08  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mulmod_bnm1.c (mpn_bc_mulmod_bnm1,
+       mpn_bc_mulmod_bnp1): Added a parameter for scratch area, possibly
+       same as result area (as suggested by Niels Möller).
+       (mpn_mulmod_bnm1): Calls changed accordingly.
+
+2009-12-08  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]: Use
+       table lookup for count_trailing_zeros. Binary algorithm still
+       disabled by default.
+
+       * mpn/generic/gcdext.c (divexact): Local definition of divexact,
+       using mpn_bdiv_q.
+       (compute_v): Use it.
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-bdiv.
+
+       * tests/mpn/t-bdiv.c: New file.
+
+       * mpn/generic/bdiv_q.c (mpn_bdiv_q): Fixed bad quotient length,
+       should have qn == nn.
+
+       * mpn/generic/bdiv_qr.c (mpn_bdiv_qr): Pass correct nn length to
+       the lower-level functions.
+
+2009-12-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED): New define.
+       * tune/common.c (speed_mpn_mulmod_bnm1_rounded): New function.
+       * tune/speed.c (routine): Add mpn_mulmod_bnm1_rounded for measuring
+       mpn_mulmod_bnm1 at recommended sizes.
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1_next_size): Rewrite.
+       (mpn_bc_mulmod_bnm1): Use mpn_add_n instead of mpn_add.
+
+       * tune/speed.c (routine): Add mpn_invert.
+
+       * tune/tuneup.c (tune_invert): New function.
+       * tune/speed.h (SPEED_ROUTINE_MPN_INVERT): New macro.
+       * tune/common.c (speed_mpn_invert): New function.
+       * gmp-impl.h: Provide declarations for corresponding threshold var.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add invert.c.
+
+2009-12-08  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/devel/try.c: Test mpn_addlsh2_n and mpn_{add,sub}lsh_n;
+       mpn_rsblsh_n now tests all shift values.
+       * tests/refmpn.c (refmpn_addlsh_n, refmpn_sublsh_n): New functions.
+       (refmpn_addlsh1_n): Use generic refmpn_addlsh_n.
+       (refmpn_sublsh1_n): Use generic refmpn_sublsh_n.
+       (refmpn_addlsh2_n): New function.
+       * tests/tests.h: Declare new functions.
+
+2009-12-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mulmod_bnm1): Up min_size to 12.
+
+       * Globally: Rename *mullow* to *mullo*, *MULLOW* to *MULLO*.
+
+       * configure.in: Don't include ev5 directory for ev6* and ev7.  Misc
+       alpha path cleanups.
+       * mpn/alpha/add_n.asm: Replaced by mpn/alpha/ev5/add_n.asm.
+       * mpn/alpha/sub_n.asm: Replaced by mpn/alpha/ev5/sub_n.asm.
+       * mpn/alpha/lshift.asm: Replaced by mpn/alpha/ev5/lshift.asm.
+       * mpn/alpha/rshift.asm: Replaced by mpn/alpha/ev5/rshift.asm.
+       * mpn/alpha/com_n.asm: New, moved from mpn/alpha/ev5/rshift.asm.
+       * mpn/alpha/ev5/diveby3.asm: New, moved from mpn/alpha/diveby3.asm.
+
+       * mpn/powerpc64/mode64/diveby3.asm: Remove, it is slower than
+       mpn_bdiv_dbm1c on all hardware.
+
+       * mpn/generic/powm_sec.c: Rework logic for mpn_sqr_basecase size limit.
+
+       * gmp-impl.h (mpn_redc_1_sec): Declare.
+       * configure.in (gmp_mpn_functions): Add redc_1_sec.
+
+2009-12-06  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/devel/try.c (try_one): DATA_SRC0_HIGHBIT sets the high bit.
+
+2009-12-05  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_eval_dgr3_pm1.c: Change return value: 0 or ~0.
+       * mpn/generic/toom_eval_dgr3_pm2.c: Likewise.
+       * mpn/generic/toom_eval_pm1.c: Likewise.
+       * mpn/generic/toom_eval_pm2exp.c: Likewise.
+       * mpn/generic/toom_eval_pm2.c: Rewrite to use mpn_addlsh2_n.
+
+       * mpn/generic/toom_interpolate_5pts.c: Param sa is a flag, not a sign.
+
+       * mpn/generic/toom33_mul.c: Adapt to changes above.
+       * mpn/generic/toom3_sqr.c: Likewise.
+       * mpn/generic/toom42_mul.c: Likewise.
+       * mpn/generic/toom43_mul.c: Reduce branches.
+       * mpn/generic/toom44_mul.c: Likewise.
+       * mpn/generic/toom53_mul.c: Likewise.
+       * mpn/generic/toom62_mul.c: Likewise.
+
+       * mpn/generic/toom52_mul.c: Use toom_eval_ functions.
+
+       * mpn/generic/toom4_sqr.c: Avoid C99 construct.
+       * mpn/generic/toom_interpolate_7pts.c: Likewise.
+
+2009-12-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_1_sec.c: New file.
+       * mpn/generic/powm_sec.c: Use redc_1_sec.  Use dummy full subtract
+       instead of mpn_cmp since the latter leaks to the side channel.
+       (mpn_local_sqr_n): New function, with associated macros.
+       (mpn_powm_sec): Use mpn_local_sqr_n.
+
+       * configure.in (HAVE_NATIVE): Add missing functions, then sort.
+
+2009-12-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_dc_div): Up min_size to 6.
+       (tune_mod_1): Set MOD_1_1_THRESHOLD min_size to 2.
+
+       * tune/speed.h: Negate "binvert"-type inverses, as required.
+
+       * mpn/generic/redc_1.c: Add ASSERTs.
+       * mpn/generic/redc_2.c: Likewise.
+
+       * mpn/generic/sbpi1_bdiv_q.c: Simplify loops, indexing.
+
+2009-12-03  Yann Droneaud  <yann@droneaud.fr>
+
+       * acinclude.m4 ([long long reliability test 1]): Add a "static" for C99
+       inline semantics compatibility.
+
+2009-12-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Move intptr_t test into common AC_CHECK_TYPES.
+
+       * mpn/generic/gcdext.c: Add a TMP_FREE.
+
+2009-12-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]:
+       Added various masking tricks.
+
+       * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]:
+       Reimplemented binary gcdext, with proper canonicalization.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Handle v == 0
+       from mpn_gcdext_1.
+       * mpn/generic/gcdext_1.c (mpn_gcdext_1): Allow inputs with a < b,
+       assertions fixed accordingly.
+
+2009-12-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c: Tune DC_DIVAPPR_Q_THRESHOLD.  Rewrite
+       DC_DIV_QR_THRESHOLD tuning code.
+       (tune_dc_div): Rewrite.
+       * tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV): New macro.
+       * tune/common.c (speed_mpn_sbpi1_div_qr, speed_mpn_dcpi1_div_qr,
+       speed_mpn_sbpi1_divappr_q, speed_mpn_sbpi1_bdiv_qr): New functions.
+       * gmp-impl.h: Provide declarations for corresponding threshold vars.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add dcpi1_divappr_q.c.
+
+       * tune/tuneup.c (tune_binvert): Up max_size.
+
+2009-12-02  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/devel/try.c: Test mpn_rsblsh2_n and mpn_rsblsh_n.
+       * tests/refmpn.c (refmpn_rsblsh_n, refmpn_rsblsh2_n): New functions.
+       (refmpn_rsblsh1_n): Use generic refmpn_rsblsh_n.
+       * tests/tests.h: Declare new functions.
+
+2009-12-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext_subdiv_step.c (mpn_gcdext_subdiv_step):
+       Select the right cofactor in the cases A == B or A == 2B.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Deleted
+       handling of ap[0] == 0 and bp[0] == 0; these cases don't happen.
+       Select the right cofactor in the case ap[0] == bp[0].
+       * mpn/generic/gcdext.c (mpn_gcdext): Analogous changes.
+
+2009-12-02  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-h.in (mpn_gcdext_1): Updated prototype.
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Updated for
+       signed cofactors from gcdext_1.
+       * mpn/generic/gcdext_1.c (mpn_gcdext_1): Use Euclid's algorithm,
+       and return signed cofactors.
+
+2009-12-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/gmp.texi (Low-level Functions): Document mpn_sqr_n.
+
+       * tune/speed.c (routine): Add mpn_binvert.
+
+       * tune/tuneup.c: Tune BINV_NEWTON_THRESHOLD.
+       (tune_binvert): New function.
+       * tune/speed.h (SPEED_ROUTINE_MPN_BINVERT): New macro.
+       * tune/common.c (speed_mpn_binvert): New function.
+       * gmp-impl.h: Provide declarations for corresponding threshold var.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add binvert.c.
+
+       * tune/tuneup.c: Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRESHOLD.
+       (tune_dc_bdiv): New function.
+       (tune_dc_div): New name for tune_dc.
+       * tune/speed.h (SPEED_ROUTINE_MPN_PI1_BDIV_QR,
+       SPEED_ROUTINE_MPN_PI1_BDIV_Q): New macros.
+       * tune/common.c (speed_mpn_sbpi1_bdiv_qr, speed_mpn_dcpi1_bdiv_qr,
+       speed_mpn_sbpi1_bdiv_q, speed_mpn_dcpi1_bdiv_q): New functions.
+       * gmp-impl.h: Provide declarations for corresponding threshold vars.
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add dcpi1_bdiv_qr.c and
+       dcpi1_bdiv_q.c.
+
+2009-12-01  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom53_mul.c: Removed double computation of vinf.
+
+       * mpn/x86_64/aorrlsh_n.asm: Correct return value for rsblsh_n.
+       * mpn/asm-defs.m4 (define_mpn): Add rsblsh_n.
+       * gmp-impl.h (mpn_rsblsh_n): Added prototype and name-mangling.
+
+       * mpn/generic/fib2_ui.c: Reduce the amount of temporary storage.
+       Use mpn_rsblsh_n.
+
+2009-12-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_n.c: Rework temp allocation.
+
+       * mpn/generic/dcpi1_bdiv_qr.c (mpn_dcpi1_bdiv_qr_n_itch): Add pi1 also
+       to this function.
+
+       * mpn/generic/dcpi1_bdiv_q.c: Get the mpn_sbpi1_bdiv_q call right.
+       Misc cleanups.
+
+       * tune/speed.c (routine): Fix typo in last change.
+       Add mpn_redc_2.
+
+       * tune/speed.h (SPEED_ROUTINE_REDC_N): Set min size properly.
+
+2009-12-01  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.c (routine): Added mpn_toom42_mul and mpn_redc_n.
+       * tune/speed.h (SPEED_ROUTINE_MPN_TOOM42_MUL): New macro.
+       (speed_mpn_toom42_mul): Declare function.
+       * tune/common.c (speed_mpn_toom42_mul): New function.
+       * gmp-impl.h (MPN_TOOM42_MUL_MINSIZE): New constant.
+
+2009-11-30  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/fib2_ui.c: Use mpn_rsblsh2_n.
+
+2009-11-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/pentium4/gmp-mparam.h
+       (HAVE_NATIVE_mpn_addlsh1_n, HAVE_NATIVE_mpn_sublsh1_n): Don't undef.
+
+       * Makefile.am (EXTRA_DIST): Remove macos.
+
+2009-11-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_redc): Set min_size to 16 for redc_n tuning.
+
+       * mpn/x86_64/sqr_basecase.asm (SQR_TOOM2_THRESHOLD_MAX): Avoid quoting
+       to allow configure.in parse it more easily.  Trim from 120 to 80.
+
+2009-11-28  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mulmod_bnm1.c: Basecases made simpler, this also corrects
+       a bug affecting previous version.
+
+2009-11-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Handle atom also in 32-bit mode.
+       * mpn/x86/atom/gmp-mparam.h: New file.
+
+       * gmp-impl.h (MULMOD_BNM1_THRESHOLD): Default.
+
+       * mpn/generic/redc_n.c: Use mpn_mulmod_bnm1 instead of mpn_mul_n.
+
+       * Use TMP_ALLOC_LIMBS consistently.
+       * Finish renaming BITS_PER_MP_LIMB to GMP_LIMB_BITS.
+
+       * macos: Remove entire directory.
+
+2009-11-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/corei/gmp-mparam.h: New file.
+       * mpn/x86_64/core2/gmp-mparam.h: Now for just core2.
+       * mpn/powerpc64/mode64/p3/gmp-mparam.h: New file.
+       * mpn/powerpc64/mode64/p4/gmp-mparam.h: New file.
+       * mpn/powerpc64/mode64/p5/gmp-mparam.h: New file.
+
+       * config.guess: Return "corei" for core i7 and core i5.
+       * config.sub: Recognise "corei".
+       * acinclude.m4 (X86_64_PATTERN): Add corei.
+       * configure.in (powerpc): Set up more CPU-specific paths.
+       (x86): Handle corei.
+
+       * mpz/powm.c: Allow input operand overlap also when exponent = 1.
+       Misc cleanups.
+
+2009-11-26  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpn/t-mulmod_bnm1.c: New test file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add t-mulmod_bnm1.
+
+       * mpn/generic/mullow_n.c: Comments on Mulders' trick implementation.
+
+2009-11-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm.c: Make comments reflect current code state.
+
+       * tests/devel/try.c: Make mpn_mullow_n testing actually work.
+
+2009-11-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/powm.c: Clean up unused defs.
+
+2009-11-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_redc): Rewrite.
+       * mpn/generic/powm.c: Use REDC_1_TO_REDC_2_THRESHOLD,
+       REDC_1_TO_REDC_N_THRESHOLD, and REDC_2_TO_REDC_N_THRESHOLD.
+       Get rid of previous REDC params, including LOCAL_REDC_N_THRESHOLD.
+       (WANT_REDC_2): Define.
+       * gmp-impl.h: Corresponding changes.
+
+2009-11-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm.c: Fix typo.
+       Define LOCAL_REDC_N_THRESHOLD, use in REDC_2_THRESHOLD...
+       REDC_N_THRESHOLD chain.
+
+2009-11-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mullow): Set min_size to 1.
+
+       * mpn/generic/powm_sec.c: Use just mpn_mul_basecase and
+       mpn_sqr_basecase for multiplication and squaring.
+
+       * tune/tuneup.c: Tune REDC_2_THRESHOLD and REDC_N_THRESHOLD.
+       (tune_redc): New function.
+       (tune_powm): Remove function.
+       * tune/speed.h (SPEED_ROUTINE_REDC_2, SPEED_ROUTINE_REDC_N): New.
+       * tune/common.c (speed_mpn_redc_2, speed_mpn_redc_n): New.
+
+       * mpz/powm.c: Complete rewrite.  Use mpn_powm and mpn_powlo.
+       * mpn/generic/powm.c: Rewrite.
+       * mpn/generic/redc_n.c: New file.
+       * configure.in (gmp_mpn_functions): Add redc_n.
+       * gmp-impl.h (REDC_2_THRESHOLD, REDC_N_THRESHOLD): Default, and define
+       for tuneup.
+
+2009-11-21  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mullow_n.c: Disable Mulders' trick for small operands,
+       use fft for bigger ones.
+       * tests/mpn/t-mullo.c: New test file.
+
+2009-11-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mullow): Rewrite.
+
+2009-11-21  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h: Removed unused macros (CACHED_ABOVE_THRESHOLD and
+       CACHED_BELOW_THRESHOLD).
+
+       * mpn/generic/mullow_n.c: Use Mulders' trick.
+       * tune/tuneup.c (tune_mullow): MULLOW_MUL_N_THRESHOLD range of
+       search depends on FFT tuning;
+       (all): Anticipate tune_fft_{mul,sqr}.
+
+       * tune/speed.c (routine): Add entry related to mpn_mulmod_bnm1.
+
+2009-11-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom_eval_dgr3_pm2.c (mpn_toom_eval_dgr3_pm2)
+       [HAVE_NATIVE_mpn_add_n_sub_n]: Fixed typo in mpn_add_n_sub_n call
+       (spotted by Marco Bodrato).
+       * mpn/generic/toom_eval_pm2.c (mpn_toom_eval_pm2): Likewise.
+       * mpn/generic/toom_eval_pm2exp.c (mpn_toom_eval_pm2exp): Likewise.
+
+       * mpn/generic/toom_eval_pm2.c (mpn_toom_eval_pm2) [HAVE_NATIVE_mpn_addlsh_n]:
+       Fixed missing declaration.
+
+       * mpn/asm-defs.m4 (define_mpn): Add addlsh_n.
+       * gmp-impl.h (mpn_addlsh_n): Added prototype and name-mangling.
+
+2009-11-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom_eval_pm2.c (mpn_toom_eval_pm2): New file.
+       * mpn/generic/toom53_mul.c (mpn_toom53_mul): Use mpn_toom_eval_pm2.
+       * mpn/generic/toom62_mul.c (mpn_toom62_mul): Likewise.
+       * configure.in (gmp_mpn_functions): Added toom_eval_dgr3_pm2.
+
+2009-11-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (mpn_and_n, etc): Adapt to now-public logic functions.
+
+       * config.guess: Recognise VIA nano.
+       * config.sub: Likewise.
+       * configure.in: Generalise x86_64 support; recognise VIA nano.
+
+2009-11-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.c (routine): Add measurement of mpn_addlsh2_n,
+       mpn_sublsh2_n, mpn_rsblsh2_n.
+       * tune/common.c: Add speed routines for lsh2 functions.
+
+       * mpn/generic/divis.c: Use MU_BDIV_QR_THRESHOLD.
+
+       * configure.in (gmp_mpn_functions_optional): Add *lsh_n functions.
+
+       * mpn/generic/toom_eval_pm2exp.c: Make HAVE_NATIVE_mpn_addlsh_n code
+       work.
+
+       * mpn/x86_64/aorrlsh2_n.asm: Optimise inner loop.
+
+       * configure.in (gmp_mpn_functions_optional): Remove copyi,copyd, they
+       are now in gmp_mpn_functions.  Analogously move logical functions.
+
+2009-11-16  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom53_mul.c: Use addlsh2 for evaluation (and fix typo).
+       * mpn/generic/toom_eval_dgr3_pm2.c: Likewise (affects toom44 and 43).
+
+       * mpn/asm-defs.m4: Fix comments for op_lsh2 new functions.
+       * gmp-impl.h: Likewise.
+       * tests/mpz/t-fac_ui.c: Fix a comment.
+
+2009-11-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aorrlsh2_n.asm: New file.
+       * configure.in: Add support for addlsh2_n, sublsh2_n, and rsblsh2_n,
+       including mulfuncs.
+       * gmp-impl.h (mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n): Declare.
+       * mpn/asm-defs.m4: Likewise.
+
+       * mpn/generic/copyi.c: New file.
+       * mpn/generic/copyd.c: Likewise.
+       * mpn/generic/zero.c: Likewise.
+       * gmp-h.in: Declare new functions.
+       * configure.in (gmp_mpn_functions): Add new functions.
+
+2009-11-15  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1_next_size): fix typo
+
+       * mpn/generic/toom33_mul.c: Use rsblsh1 for evaluation.
+       * mpn/generic/toom3_sqr.c: Likewise.
+
+2009-11-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom52_mul.c: Use mpn_addlsh1_n.
+
+       * mpn/generic/toom52_mul.c: Toggle the right flag bit in an
+       HAVE_NATIVE_mpn_add_n_sub_n arm.
+
+       * tests/mpz/t-remove.c: New file.
+
+       * mpn/generic/remove.c: Major overhaul.  Add parameter 'cap'.
+
+       * mpn/generic/binvert.c: Fix typo in last change.
+
+       * mpn/generic/bdiv_qr.c: Make it actually work.  Also use passed-in
+       scratch space.
+
+       * mpn/generic/mu_bdiv_qr.c: Reset FFT parameters for each call.
+
+2009-11-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/gcd_1.asm (MASK): Compute from MAXSHIFT.
+
+2009-11-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/binvert.c: Simplify, fix comments.
+
+       * tests/devel/try.c: Test mpn_invert and mpn_binvert.
+
+       * tests/refmpn.c (refmpn_invert, refmpn_binvert): New functions.
+       * tests/tests.h: Declare new functions.
+
+2009-11-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Supply compiler options for atom in 32-bit mode.
+
+       * acinclude.m4 (X86_64_PATTERN): New.
+       * configure.in: Setup and use X86_64_PATTERN.
+
+       * mpn/x86_64/fat/fat.c: New file.
+       * mpn/x86_64/fat/fat_entry.asm: New file.
+       * mpn/x86_64/fat: Copy C placeholder files from mpn/x86/fat.
+       * mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): New, copied from
+       mpn/x86/x86-defs.m4.
+       * configure.in: Move down x86 fat setup code until after ABI has been
+       determined; generalise to handle x86_64.
+
+2009-11-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/fat/mod_1.c: New file.
+
+       * acinclude.m4 (GMP_C_FOR_BUILD_ANSI): Avoid poor quoting.
+
+2009-11-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (MPN_LOGOPS_N_INLINE): Rewrite, update interface.  Callers
+       updated.
+       * mpn/generic/logops_n.c: New file.
+       * doc/gmp.texi (Low-level Functions): Document logical mpn functions.
+
+2009-11-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1): Adapt to new
+       mpn_mulmod_bnm1 interface.
+
+2009-11-07  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mulmod_bnm1.c: New interface, with size
+       specified for all operands in mpn_mulmod_bnm1.
+       * gmp-impl.h: Changed mpn_mulmod_bnm1 prototype.
+
+2009-11-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/gcd_1.asm: Actually use div-reduced value.
+       Mnemonic cleanup.
+
+       * mpn/x86_64/gcd_1.asm: New file.
+
+2009-11-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add sqr_n.c.
+
+2009-11-03  Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_6pts.c: removed an addmul_1 and cleanup.
+
+2009-11-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (gmp_mpn_functions): Remove obsolete functions
+       dc_divrem_n and sb_divrem_mn.
+       * gmp-impl.h: Misc cleanup.
+       (mpn_sb_divrem_mn, mpn_dc_divrem_n): Remove.
+       (DIV_DC_THRESHOLD): Remove.
+       * mpn/generic/dc_divrem_n.c: Remove.
+       * mpn/generic/sb_divrem_mn.c: Remove.
+       * mpn/generic/tdiv_qr.c: Use DC_DIV_QR_THRESHOLD, not DIV_DC_THRESHOLD.
+
+       * tests/devel/try.c: Replace mpn_sb_divrem_mn by mpn_sbpi1_div_qr.
+       * tests/refmpn.c (refmpn_sb_div_qr): New name for refmpn_sb_divrem_mn.
+
+       * tune/Makefile.am (libspeed_la_SOURCES): Remove sb_div.c and sb_inv.c.
+       (TUNE_MPN_SRCS_BASIC): Remove sb_divrem_mn.c.
+       * tune/common.c (speed_mpn_dcpi1_div_qr_n): New function.
+        Remove mpn_sb_divrem_mn related functions.
+       * tune/speed.c (routine): Remove entries related to mpn_dc_divrem and
+       mpn_sb_divrem.
+       (routine): New entry for mpn_dc_div_qr_n.
+       * tune/speed.h (SPEED_ROUTINE_MPN_DC_DIVREM_CALL): Compute inverse
+       needed by pi1 calls.
+       (SPEED_ROUTINE_MPN_SB_DIVREM_M3): Remove.
+       * tune/tuneup.c (tune_sb_preinv): Remove.
+       (tune_dc): Update to measure DC_DIV_QR_THRESHOLD.
+
+       * mpn/generic/sb_divappr_q.c: Remove.
+
+2009-11-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h: Misc minor cleanups.
+
+2009-10-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (toom itch functions): Simplify, make some into macros.
+       (MPN_KARA_MUL_N_TSIZE, MPN_KARA_SQR_N_TSIZE): Remove.
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Remove.
+       * mpn/generic/mul_n.c (mpn_sqr_n): Move from here...
+       * mpn/generic/sqr_n.c: ...to this new file.
+       * configure.in (gmp_mpn_functions): Add sqr_n.
+
+       * Globally change
+         MUL_TOOM3_THRESHOLD => MUL_TOOM33_THRESHOLD,
+         MUL_KARATSUBA_THRESHOLD => MUL_TOOM22_THRESHOLD,
+         SQR_KARATSUBA_THRESHOLD => SQR_TOOM2_THRESHOLD,
+       and associated names analogously.
+
+2009-10-31  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom_interpolate_7pts.c: Changed evaluation points,
+       replacing -1/2 by -2.
+       * mpn/generic/toom44_mul.c: Updated to use new evaluation points,
+       and use mpn_toom_eval_dgr3_pm2.
+       * mpn/generic/toom4_sqr.c (mpn_toom4_sqr): Likewise.
+       * mpn/generic/toom53_mul.c (mpn_toom53_mul): Updated to use new
+       evaluation points, and use mpn_toom_eval_pm1 and
+       mpn_toom_eval_pm2exp.
+       * mpn/generic/toom62_mul.c (mpn_toom62_mul): Likewise.
+
+       * mpn/generic/toom_eval_pm2exp.c: New file.
+       * mpn/generic/toom_eval_pm1.c: New file.
+
+       * mpn/generic/toom43_mul.c (mpn_toom43_mul): Use
+       mpn_toom_eval_dgr3_pm2.
+
+2009-10-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add toom2* and toom3* files.
+
+2009-10-30  Niels Möller  <nisse@lysator.liu.se>
+
+       * configure.in (gmp_mpn_functions): Added toom_eval_dgr3_pm2.
+       * gmp-impl.h: Added prototype for mpn_toom_eval_dgr3_pm2.
+       * mpn/generic/toom_eval_dgr3_pm2.c: New file.
+
+2009-10-29  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom43_mul.c (mpn_toom43_mul): Use
+       mpn_toom_eval_dgr3_pm1.
+       * mpn/generic/toom42_mul.c (mpn_toom42_mul): Likewise.
+
+2009-10-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mulmod_bnm1.c: Replace some add_1 by INCR.
+
+       * gmp-impl.h (mpn_mulmod_bnm1_itch): New macro.
+
+       * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Call mpn_mul_fft.
+       (mpn_mulmod_bnm1_next_size): Adopt to SS FFT.
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft): Make it return high limb.
+       (mpn_mul_fft_internal): Likewise.
+
+       * mpn/generic/mulmod_bnm1.c: New file, by Niels Möller.
+       * configure.in (gmp_mpn_functions): Add mulmod_bnm1.
+       * gmp-impl.h: Add related declarations.
+       * tune/tuneup.c: Tune MULMOD_BNM1_THRESHOLD.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1): New macro.
+       * tune/common.c (speed_mpn_mulmod_bnm1): New function.
+       * Makefile.am (TUNE_MPN_SRCS_BASIC): Add mulmod_bnm1.c.
+
+       * gmp-impl.h (mpn_kara_mul_n, mpn_kara_sqr_n): Remove declarations.
+       * tune/common.c: Remove/rename kara functions.
+       * tune/speed.h: Likewise.
+
+       * tests/devel/try.c: Clean up usage of %p printf arguments.
+
+       * gmp-impl.h: Update MUL/SQR MINSIZE macros to reflect new function
+       names and limitations
+       * tune/tuneup.c: Use updated macro names.
+       * tune/speed.h: Likewise.
+       * tests/devel/try.c: Test new mul/sqr functions, remove old tests.
+
+2009-10-29  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.c: Added support for mpn_toom4_sqr,
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_TOOM4_SQR): New macro.
+       (SPEED_ROUTINE_MPN_KARA_MUL_N): Deleted.
+       (SPEED_ROUTINE_MPN_TOOM3_MUL_N): Deleted.
+       (SPEED_ROUTINE_MPN_TOOM2_SQR): Use mpn_toom2_sqr_itch.
+
+       * gmp-impl.h (mpn_toom3_mul_n, mpn_toom3_sqr_n): Remove
+       declarations.
+       (mpn_toom2_sqr_itch): Add margin for recursive calls.
+
+2009-10-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n): Deleted old Karatsuba
+       implementation.
+       (mpn_kara_sqr_n): Likewise deleted.
+
+       * mpn/generic/mul_n.c (mpn_sqr_n): Use mpn_toom2_sqr and
+       mpn_toom3_sqr, not the old implementations.
+
+       * gmp-impl.h (MPN_TOOM3_MUL_N_TSIZE): Deleted, replaced by
+       mpn_toom33_mul_itch.
+       (MPN_TOOM3_SQR_N_TSIZE): Deleted, replaced by
+       mpn_toom3_sqr_itch.
+       (mpn_toom33_mul_itch): Needs more scratch.
+       (mpn_toom3_sqr_itch): Likewise.
+       * tune/speed.h (SPEED_ROUTINE_MPN_TOOM3_MUL_N): Use
+       mpn_toom33_mul_itch.
+       (SPEED_ROUTINE_MPN_TOOM3_SQR_N): Use mpn_toom3_sqr_itch.
+       * mpn/generic/mul_n.c (mpn_mul_n): Use mpn_toom33_mul_itch.
+       (mpn_sqr_n): Use mpn_toom3_sqr_itch.
+
+       * mpn/generic/toom33_mul.c (mpn_toom33_mul): Avoid TMP_ALLOC. Needs
+       some more supplied scratch instead.
+       * mpn/generic/toom3_sqr.c (mpn_toom3_sqr): Likewise.
+
+2009-10-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (invert_pi1): Streamline, as suggested by Niels.
+
+2009-10-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/bdiv_q.c: Update to call new functions.
+       * mpn/generic/bdiv_qr.c: Likewise.
+       * mpn/generic/binvert.c: Likewise.
+       * mpn/generic/divexact.c: Likewise.
+       * mpn/generic/divis.c: Likewise.
+       * mpn/generic/perfpow.c: Likewise.
+       * mpn/generic/tdiv_qr.c: Likewise.
+       * mpn/generic/dcpi1_bdiv_q.c: New file.
+       * mpn/generic/dcpi1_bdiv_qr.c: New file.
+       * mpn/generic/dcpi1_div_q.c: New file.
+       * mpn/generic/dcpi1_div_qr.c: New file.
+       * mpn/generic/dcpi1_divappr_q.c: New file.
+       * mpn/generic/sbpi1_bdiv_q.c: New file.
+       * mpn/generic/sbpi1_bdiv_qr.c: New file.
+       * mpn/generic/sbpi1_div_q.c: New file.
+       * mpn/generic/sbpi1_div_qr.c: New file.
+       * mpn/generic/sbpi1_divappr_q.c: New file.
+       * mpn/generic/dc_bdiv_q.c: Removed.
+       * mpn/generic/dc_bdiv_qr.c: Removed.
+       * mpn/generic/dc_div_q.c: Removed.
+       * mpn/generic/dc_div_qr.c: Removed.
+       * mpn/generic/dc_divappr_q.c: Removed.
+       * mpn/generic/sb_bdiv_q.c: Removed.
+       * mpn/generic/sb_bdiv_qr.c: Removed.
+       * mpn/generic/sb_div_q.c: Removed.
+       * mpn/generic/sb_div_qr.c: Removed.
+
+       * configure.in (gmp_mpn_functions): Add new division functions, remove
+       obsolete division functions.
+
+       * gmp-impl.h: Add declarations of new division functions, remove
+       corresponding obsolete declarations.
+       (gmp_pi1_t, gmp_pi2_t): New types.
+       (invert_pi1): New macro for computing 2/1 and 3/2 inverses.
+
+2009-10-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (mpn_toom62_mul_itch): New function.
+
+       * tests/mpn/t-toom53.c: New test program.
+       * tests/mpn/t-toom62.c: New test program.
+
+2009-10-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/get_d.c: Fix code handling denorms for 64-bit machines.
+       * tests/mpf/t-get_d.c (test_denorms): New function.
+
+2009-10-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom52_mul.c (mpn_toom52_mul): Use supplied scratch
+       space, not TMP_ALLOC. Interface change, now requires input sizes
+       such that s + t >= 5.
+
+       * gmp-impl.h (mpn_toom52_mul_itch): New function.
+
+       * tests/mpn/t-toom52.c: New test program.
+
+2009-10-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/sqr_basecase.asm: Tune for speed and a 7% size decrease.
+
+2009-10-22  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-toom44.c: New test program.
+       * tests/mpn/t-toom33.c: New test program.
+
+       * tests/mpn/toom-shared.h (main): Reorganized input generation.
+       Users are now supposed to define macros MAX_AN, MIN_BN and MAX_BN.
+       Updated existing toom test programs.
+
+2009-10-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/devel/try.c: Fix typos in last change.
+
+2009-10-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/asm-defs.m4 (define_mpn): Add mullow_basecase.
+
+       * tests/devel/try.c: Test mpn_mullow_n.
+
+       * tests/refmpn.c (refmpn_mullow_n): New function.
+       * tests/tests.h: Declare it.
+
+2009-10-21  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/toom-shared.h (main): Check for writes outside of the
+       product or scratch area.
+
+       * gmp-impl.h (mpn_toom43_mul_itch): New function.
+
+       * mpn/generic/toom43_mul.c (mpn_toom43_mul): Use supplied scratch
+       space, not TMP_ALLOC. Interface change, now requires input sizes
+       such that s + t >= 5.
+
+2009-10-20  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/toom-shared.h (MIN_BLOCK): New constant, which can be
+       overridden by users. Needed by t-toom42 and t-toom43.
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-toom32,
+       t-toom42 and t-toom43.
+       * tests/mpn/t-toom43.c: New test program.
+       * tests/mpn/t-toom42.c: New test program.
+       * tests/mpn/t-toom32.c: New test program.
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-toom22.
+       * tests/mpn/t-toom22.c: New test file.
+       * tests/mpn/toom-shared.h: New file. Test framework for Toom
+       functions.
+
+2009-10-14  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd_itch): Thanks to the new
+       mpn_matrix22_mul_strassen, the scratch need is reduced by 16%.
+
+2009-10-14  Marco Bodrato  <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/matrix22_mul.c (mpn_matrix22_mul_strassen): New
+       Strassen-like algorithm, to reduce the amount of temporary
+       storage.
+       (mpn_matrix22_mul_itch): Updated to reflect the reduced storage
+       need.
+
+2009-10-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Rename mpn_addsub_n to mpn_add_n_sub_n.
+
+2009-10-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/tdiv_qr.c: Call mpn_divrem_1 and mpn_dc_div_qr instead of
+       old functions.
+
+       * mpn/generic/mul_n.c: Call toom22 and toom33 instead of old functions.
+
+       * mpn/generic/toom42_mul.c (TOOM42_MUL_N_REC): Renamed from
+       TOOM22_MUL_N_REC.  Unconditionally call the generic mpn_mul_n.
+       * mpn/generic/toom32_mul.c: Analogous changes.
+
+2009-09-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/invert_limb.asm: Rewrite. Exploit cancellation in the
+       Newton iteration.
+
+2009-09-27  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86/invert_limb.asm: Reduce register usage. Eliminated $1
+       arguments to add, sub and shift.
+
+2009-09-25  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86/invert_limb.asm: New file.
+
+2009-09-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom33_mul.c: Use new toom functions for all recursive
+       products.
+       * mpn/generic/toom3_sqr.c: Likewise.
+       * mpn/generic/toom44_mul.c: Likewise.
+       * mpn/generic/toom4_sqr.c: Likewise.
+
+       * mpn/generic/add_n.c: Relax operand overlap ASSERTs.
+       * mpn/generic/sub_n.c: Likewise.
+
+2009-09-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       Suggested by Uwe Mueller:
+       * printf/doprnt.c: Use "%ld" for exponent printing.
+       * printf/doprntf.c (__gmp_doprnt_mpf): Make expval "long".
+
+2009-09-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Handle mingw64.
+       * gmp-impl.h (gmp_intptr_t): Declare.
+       * tests/amd64check.c (calling_conventions_values): Use CNST_LIMB.
+       * tests/memory.c: Use gmp_intptr_t; print pointers using C90 "%p".
+       * tests/misc.c: Use gmp_intptr_t.
+       * tests/mpq/t-get_str.c: Print pointers using C90 "%p".
+
+2009-08-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Remove silly ASSERT code.
+
+       * mpn/asm-defs.m4 (define_mpn): Remove mod_1s_1p, add mod_1_1p.
+
+       * mpn/arm/invert_limb.asm: Complete rewrite.
+
+       * longlong.h: Document LONGLONG_STANDALONE and NO_ASM.
+
+2009-08-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/dive_ui.c (check_random): Avoid zero divisors.
+
+2009-07-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_1.c: Tweak to handle any modulus (possibility
+       pointed out by Per Austrin).
+       (mpn_mod_1_1p): Renamed from mpn_mod_1s_1p.
+       (mpn_mod_1_1p_cps): Renamed from mpn_mod_1s_1p_cps.
+       *mpn/generic/mod_1.c (mpn_mod_1): Reorganise to call mpn_mod_1_1p for
+       any modulus.
+
+2009-07-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Pass arch for x86 also in 64-bit mode.
+
+2009-07-26  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess (_cpuid): Recognise more Intel "Core" processors.
+
+2009-07-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpf/eq.c: Rewrite.
+
+       * tests/mpf/t-eq.c: New test.
+
+2009-07-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (__mp_bases): Remove this alias.
+
+       * mpf/get_str.c: Use less overflow prone expression for computing limb
+       allocation.
+       * mpz/inp_str.c: Likewise.
+       * mpf/set_str.c: Likewise.
+       * mpz/set_str.c: Likewise.
+
+2009-07-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcd_1.c (mpn_gcd_1): Use masking tricks to reduce
+       the number of branches in the loop.
+
+2009-06-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * demos/factorize.c (factor_using_pollard_rho): Rewrite.
+
+       * mpz/clears.c: New file.
+       * mpq/clears.c: New file.
+       * mpf/clears.c: New file.
+       * gmp-h.in (mpz_clears, mpq_clears, mpf_clears): Declare.
+       * mpz/Makefile.am: Add clears.c.
+       * mpq/Makefile.am: Add clears.c.
+       * mpf/Makefile.am: Add clears.c.
+       * Makefile.am: Add these also to respective OBJECTS variables.
+       * doc/gmp.texi: Document inits function and clears functions.
+
+2009-06-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mp-h.in (mp_bitcnt_t): Declare here too.
+
+2009-06-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpq/inits.c: New file.
+       * mpf/inits.c: New file.
+       * gmp-h.in (mpz_inits, mpq_inits, mpf_inits): Declare .
+
+       * mpn/generic/remove.c: New file.
+       * configure.in (gmp_mpn_functions): Add remove.
+       * gmp-impl.h (mpn_remove): Declare.
+
+       * gmp-h.in (mp_bitcnt_t): New basic type.
+       * mpn/generic/perfpow.c (mp_bitcnt_t): Remove private definition.
+
+       * mpn/generic/bdiv_qr.c: Make it actually work.
+
+       * mpn/x86_64/core2/aorsmul_1.asm: Rewrite to use shorter pipeline and
+       to need fewer registers.
+
+2009-06-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/rsh1aors_n.asm: New file.
+       * mpn/x86_64/rsh1add_n.asm: Remove.
+       * mpn/x86_64/rsh1sub_n.asm: Remove.
+
+       * mpz/inits.c: New file.
+
+       * gen-trialdivtab.c: Wrap limb constants into CNST_LIMB.
+
+       With Martin Boij:
+       * mpn/generic/perfpow.c (binv_root, binv_sqroot): Change from being
+       recursive to being iterative.
+       (mpn_perfect_power_p): Reorganise temp memory usage to avoid a buffer
+       overrun.  Trim allocation of next and prev.  Never create oversize
+       products in the multiplicity binary search.
+
+       * mpn/generic/dc_div_q.c: Add missing TMP_FREE.
+
+2009-06-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       Revert:
+       * mpn/generic/perfpow.c (perfpow): Test exponents up to ub, inclusive.
+
+2009-06-16  Martin Boij  <mboij@kth.se>
+
+       * mpn/generic/perfpow.c (logs): Use more conservative table.
+
+2009-06-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/pa64/aors_n.asm: New file.
+       * mpn/pa64/add_n.asm: Remove.
+       * mpn/pa64/sub_n.asm: Remove.
+
+       * mpn/generic/perfpow.c (perfpow): Test exponents up to ub, inclusive.
+
+2009-06-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bdiv_q_1.asm: Optimise away a mov insn.
+       * mpn/x86_64/dive_1.asm: Likewise.
+
+       * mpn/generic/perfpow.c (binv_root): Use mpn_bdiv_q_1, not
+       mpn_divexact_itch for 2-adic division.
+       (all functions): Micro optimise.
+
+       * Makefile.am (libmp_la_SOURCES): Add nextprime.c.
+
+2009-06-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (mpn_perfect_power_p): Declare.
+       * configure.in (gmp_mpn_functions): Add perfpow.
+       * mpz/perfpow.c: Now trivial, simply calls mpn_perfect_power_p.
+
+2009-06-13  Martin Boij  <mboij@kth.se>
+
+       * mpn/generic/perfpow.c: New file.
+       * tests/mpz/t-perfpow.c: Rewrite.
+
+2009-06-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/bdiv_qr.c: New file.
+       * mpn/generic/bdiv_q.c: New file.
+       * configure.in (gmp_mpn_functions): Add bdiv_qr and bdiv_q.
+       * gmp-impl.h: Declare new functions.
+
+       * nextprime.c: New file.
+       * gmp-impl.h (gmp_primesieve_t, gmp_init_primesieve, gmp_nextprime):
+       Declare
+       * Makefile.am (libgmp_la_SOURCES): Add nextprime.c.
+
+2009-06-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/trialdiv.c: New file.
+       * gen-trialdivtab.c: New file.
+       * configure.in (gmp_mpn_functions): Add trialdiv.
+       * gmp-impl.h (mpn_trialdiv): Declare
+       * Makefile.am: Add rules for gen-trialdivtab and trialdiv.
+
+       * longlong.h (arm count_leading_zeros): Define for armv5.
+
+       * gmp-impl.h: Move down toom itch functions to after we've #defined
+       all THRESHOLDs.
+
+       * dumbmp.c (isprime): Replace with slightly less inefficient code.
+       (mpz_tdiv_r): New function.
+
+2009-06-11  Niels Möller  <nisse@lysator.liu.se>
+
+       Support for mpn_toom32_mul in speed:
+       * tune/speed.c (routine): Added mpn_toom32_mul.
+       * tune/speed.h (SPEED_ROUTINE_MPN_TOOM32_MUL): New macro.
+       * tune/common.c (speed_mpn_toom32_mul): New function.
+
+       * gmp-impl.h (mpn_toom32_mul_itch): Count scratch space needed
+       for the calls to mpn_toom22_mul.
+       (ABOVE_THRESHOLD): Moved this and related macros so it can be used
+       by mpn_toom32_mul_itch.
+       (mpn_toom22_mul_itch): Count scratch space for recursive calls.
+
+2009-06-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/mod_1_4.asm: New file, mainly for k7, but perhaps useful
+       also for k6 and non-sse p6.
+
+2009-06-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mod_1_4.asm: Minor size reducing tweaks.
+
+       * mpn/x86/mod_1.asm: Remove obsolete file.
+       * mpn/x86/k7/mmx/mod_1.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mod_1.asm: Likewise.
+       * mpn/x86/p6/mod_1.asm: Likewise.
+       * mpn/x86/pentium/mod_1.asm: Likewise.
+
+2009-06-08  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom4_sqr.c (mpn_toom4_sqr): Reorganized, to reduce
+       the need for scratch space, and get rid of TMP_ALLOC. Also use
+       mpn_toom_eval_dgr3_pm1.
+
+       * mpn/generic/toom_interpolate_6pts.c (mpn_toom_interpolate_6pts):
+       Stricter ASSERTs based on maximum size of polynomial coefficients.
+       Improved comments on the signedness of intermediate values.
+
+2009-06-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom2_sqr.c: Make it actually work.
+
+       * mpn/generic/toom3_sqr.c: Reduce local scratch space.
+
+2009-06-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul_fft.c (FFT_TABLE2_SIZE): Default to 200.
+       (MUL_FFT_TABLE2_SIZE, SQR_FFT_TABLE2_SIZE): Let these decide
+       FFT_TABLE2_SIZE if they are defined.
+       (struct nk): Use bit field.
+
+2009-06-05  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom44_mul.c (mpn_toom44_mult): Use
+       mpn_toom_eval_dgr3_pm1.
+
+       * mpn/generic/toom_eval_dgr3_pm1.c: New file.
+
+       * mpn/generic/toom_interpolate_7pts.c (mpn_toom_interpolate_7pts):
+       Minor cleanup, use mpn_add rather than mpn_add_n + MPN_INCR_U.
+
+       * mpn/generic/toom44_mul.c (mpn_toom44_mul): Reorganized, to
+       reduce the need for scratch space, and get rid of TMP_ALLOC.
+
+2009-06-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom_interpolate_7pts.c: Fall back mpn_divexact_byN to
+       mpn_bdiv_q_1_pi1, if the latter is NATIVE.
+
+2009-06-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bdiv_q_1.asm: New file.
+
+       * configure.in (HAVE_NATIVE): Add recently added functions.
+       (GMP_MULFUNC_CHOICES): Handle addlsh_n, sublsh_n, rsblsh_n.
+
+       * tune/common.c (speed_mpn_bdiv_q_1, speed_mpn_bdiv_q_1_pi1):
+       New functions.
+       * tune/speed.c (routine): Add mpn_bdiv_q_1 and mpn_bdiv_q_1_pi1.
+       * tune/speed.h (SPEED_ROUTINE_MPN_BDIV_Q_1_PI1): New #define.
+       (SPEED_ROUTINE_MPN_BDIV_Q_1): Mew #define.
+
+       * configure.in (gmp_mpn_functions): Add bdiv_q_1.
+       * mpn/generic/bdiv_q_1.c: New file.
+       * mpn/asm-defs.m4 (define_mpn): Add mpn_bdiv_q_1 and mpn_bdiv_q_1_pi1.
+       * gmp-impl.h (mpn_bdiv_q_1, mpn_bdiv_q_1_pi1): Declare.
+
+       * mpn/x86_64/lshift.asm: Cleanup.
+       * mpn/x86_64/rshift.asm: Cleanup.
+
+       * mpn/x86_64/addlsh1_n.asm: Removed.
+       * mpn/x86_64/aorrlsh1_n.asm: Generalised addlsh1_n.asm to handle
+       addlsh1_n and rsblsh1_n functionality.
+
+       * tests/refmpn.c (refmpn_rsblsh1_n): New function.
+       * tests/devel/try.c: Test mpn_rsblsh1_n.
+       * tests/tests.h: Declare refmpn_rsblsh1_n.
+       * tune/common.c (speed_mpn_rsblsh1_n): New function.
+       * tune/speed.c (routine): Add mpn_rsblsh1_n.
+       * tune/speed.h (mpn_rsblsh1_n): Declare.
+
+       * configure.in (gmp_mpn_functions_optional): Add rsblsh1_n.
+       (GMP_MULFUNC_CHOICES): Handle rsblsh1_n defined with a mulfunc.
+       * mpn/asm-defs.m4 (define_mpn): Add rsblsh1_n.
+       * gmp-impl.h (mpn_rsblsh1_n): Declare.
+
+       * mpn/generic/toom32_mul.c: Consistently use TOOM22_MUL_N_REC.
+
+2009-06-03  Marco Bodrato  <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom43_mul.c: New file.
+       * mpn/generic/toom52_mul.c: New file.
+       * mpn/generic/toom_interpolate_6pts.c: New file.
+
+2009-06-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (gmp_mpn_functions): Add toom43_mul, toom52_mul, and
+       toom_interpolate_6pts, but also some previously forgotten functions.
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Likewise.
+       * gmp-impl.h: Declare new functions. Sort toom function declarations.
+
+       * gmp-impl.h: Rename  toom4_* flags enum to toom7_*.  Relevant C files
+       updated.
+
+       * mpn/generic/toom_interpolate_7pts (divexact_2exp): Remove.
+
+2009-06-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * demos/factorize.c: Add -q command line option.
+
+2009-06-02  Marco Bodrato  <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_7pts.c: Streamline, resulting in speed
+       improvements.
+
+       * mpn/generic/toom_interpolate_5pts.c: Likewise, but also completely
+       do away with explicit scratch space.
+       * gmp-impl.h (mpn_toom_interpolate_5pts): Update prototype.
+
+       * mpn/generic/mul_n.c (mpn_toom3_sqr_n, mpn_toom3_mul_n):
+       Update toom_interpolate_5pts call without scratch space parameter.
+       * mpn/generic/toom3_sqr.c: Likewise.
+       * mpn/generic/toom42_mul.c: Likewise.
+       * mpn/generic/toom33_mul.c: Likewise.
+
+       * mpn/generic/toom33_mul.c: Reduce local scratch space.
+       * mpn/generic/toom32_mul.c: Rewrite to not use local scratch space.
+
+2009-06-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom22_mul.c (TOOM22_MUL_MN_REC): New macro, use it for
+       oo point.
+
+2009-06-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul.c: Loop to avoid excessive recursion in toom33 and
+       toom44 slicing code.
+
+       * mpz/remove.c: Correctly handle multiplicity that does not fit an int.
+
+       * Makefile.am (dist-hook): Check library version consistency.
+
+       * mpn/generic/mul.c: Rewrite.
+
+2009-05-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-divis.c (check_random): Create huge test operands.
+
+       * mpn/generic/toom44_mul.c: Allocate temp space using one TMP_ALLOC
+       call, not multiple TMP_SALLOC.
+       * mpn/generic/toom4_sqr.c: Likewise.
+
+       * gmp-impl.h (mpn_toom22_mul_itch): Replace totally wrong code.
+
+       * mpn/generic/mullow_n.c: Relax overlap requirement implied by ASSERT.
+
+       * mpn/generic/divis.c: Rewrite.
+
+       * gmp-impl.h (mpn_mu_bdiv_qr): Now returns mp_limb_t.
+       (mpn_toom2_sqr_itch): Simplify.
+
+       * mpn/generic/mu_bdiv_qr.c: Implement properly.
+
+2009-05-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_1.c: Add proper ASSERT functionality cps function.
+       * mpn/generic/mod_1_2.c: Likewise.
+       * mpn/generic/mod_1_3.c: Likewise.
+       * mpn/generic/mod_1_4.c: Likewise.
+
+       * tune: Add speed measuring of toom22, toom33, and toom44.
+
+       * mpn/generic/toom22_mul.c: Handle potentially unbalanced coefficient
+       product better.
+
+2009-05-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-mul.c (ref_mpn_mul): Use mpn_toom44_mul in FFT range for
+       better huge-operands performance.
+
+2009-05-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * acinclude.m4 (GMP_ASM_LSYM_PREFIX): Try "$L" too, before "$".
+
+2009-05-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (mpn_mod_1s_1p,mpn_mod_1s_2p,mpn_mod_1s_3p,mpn_mod_1s_4p):
+       Declare using __GMP_ATTRIBUTE_PURE.
+
+       * tune/tuneup.c (tune_mod_1): Specify check_size for measuring mod_1_N
+       functions.
+       (one): Remove redundant size loop exit condition.
+
+2009-05-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/pentium4/sse2/mod_1_4.asm: New file.
+       * mpn/x86/p6/sse2/mod_1_4.asm: New file (grabbing pentium4 code).
+
+2009-05-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_VERSION_MINOR): Bump to 4.
+       (__GNU_MP_VERSION_PATCHLEVEL): Set to -1.
+
+       * mpn/x86_64/mod_1_4.asm: New file.
+
+       * mpn/asm-defs.m4: Correct names for mod_1_N functions.
+       Add defines for corresponding cps functions.
+
+       * mpn/generic/mod_1_2.c: Support any sizes > 1.
+       * mpn/generic/mod_1_3.c: Likewise.
+       * mpn/generic/mod_1_4.c: Likewise.
+
+2009-05-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 4.3.1 released.
+
+2009-05-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_VERSION_MINOR): Bump.
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+2009-05-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz: Add MPZ_CHECK_FORMAT to many tests.
+
+2009-05-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/pentium4/sse2/mul_basecase.asm: Avoid L(ret), "ret" is
+       defined in x86-defs.m4.
+
+2009-05-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/p6/aors_n.asm: Use L() for labels.
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mul_1.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mul_basecase.asm: Likewise.
+       * mpn/x86/pentium4/sse2/sqr_basecase.asm: Likewise.
+       * mpn/x86_64/lshift.asm: Likewise.
+       * mpn/x86_64/rshift.asm: Likewise.
+
+       * tests/cxx/t-locale.cc (point_string): Declare as extern "C" to
+       placate compilers that mangle variable names.
+
+2009-05-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-gcd.c: Generate operands that are multiple of each other.
+
+2009-05-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GMP_EXTERN_INLINE): Support for more systems.
+       (gmp_randinit_set): Add missing __GMP_DECLSPEC.
+
+2009-04-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/neg_n.c: New file.
+       * configure.in (gmp_mpn_functions): Add neg_n.
+       * mpn/asm-defs.m4 (define_mpn): Add neg_n.
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Add neg_n.c.
+       * gmp-h.in: Handle mpn_neg_n properly.
+
+       * mpn/generic/toom_interpolate_7pts.c (divexact_2exp): Nailify.
+
+       * mpn/generic/gcdext.c: Change some MPN_NORMALIZE to
+       MPN_NORMALIZE_NOT_ZERO.
+       * mpn/generic/gcdext_lehmer.c: Likewise.
+       Add a MPN_NORMALIZE_NOT_ZERO.
+
+       * mpn/generic/binvert.c: Remove own mpn_neg_n.
+
+       * tests/mpz/t-gcd.c: Add some MPZ_CHECK_FORMAT calls.
+
+2009-04-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/Makefile.am (TARG_DIST): Add minithres.
+
+       * mpn/generic/bdiv_dbm1c.c: Handle nails.
+
+2009-04-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Recognise more POWER processor types.
+
+2009-04-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/pentium4/sse2/popcount.asm: Work around Apple reloc bug.
+       * mpn/x86/darwin.m4: Define symbol "DARWIN".
+
+2009-04-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm.c (mpn_redc_n): Use ASSERT_ALWAYS, not abort().
+       * mpn/generic/powm_sec.c: Likewise.
+
+       * mpn/powerpc64/aix.m4 (EXTERN_FUNC): New define.  Add dummy variants
+       for other m4 files.
+       * mpn/powerpc64/mode64/divrem_1.asm: Use EXTERN_FUNC.
+       * mpn/powerpc64/mode64/divrem_1.asm: Likewise.
+
+2009-04-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/x86_64-defs.m4 (JUMPTABSECT): New define.
+       * mpn/x86_64/darwin.m4: Likewise.
+       * mpn/x86_64/sqr_basecase.asm: Rework switch code using JUMPTABSECT.
+
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer):
+       Remove an unused variable.
+
+       * mpn/x86/x86-defs.m4 (LEA): Get SIZE arguments right.
+
+2009-04-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 4.3.0 released.
+
+       * scanf/doscan.c (__gmp_doscan): Pad 3-operand scanf call with dummy
+       argument.
+       * scanf/sscanffuns.c (scan): Disable vsscanf variant for now.
+
+2009-04-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * scanf/sscanffuns.c (scan): Rewrite to use stdarg.
+
+       * tests/mpz/t-root.c: Rewrite.  Add unconditional gcc 4.3.2 tests.
+
+2009-04-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm.c: New file.
+       * mpn/generic/powlo.c: New file.
+       * mpn/generic/powm_sec.c: New file.
+       * configure.in (gmp_mpn_functions): List new functions.
+
+2009-04-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/urandomm.c: Amend last fix.
+
+2009-04-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Support Sun cc for x86_64.
+
+       * mpz/urandomm.c: Handle operand overlap.
+
+2009-03-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (powerpc): Brave removing -Wa,-mppc64, in the hope that
+       GCC now passes the proper options.
+
+2009-03-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/divrem_1.asm: Add a nop to save a cycle in unnormalised
+       case.
+
+2009-03-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * ia64/gmp-mparam.h, arm/gmp-mparam.h, x86/p6/mmx/gmp-mparam.h,
+       pa32/hppa2_0/gmp-mparam.h sparc32/v9/gmp-mparam.h: Update.
+
+2009-03-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/ia64/bdiv_dbm1c.asm: Accept/return carry.
+
+2009-03-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (64-bit sparc/solaris): Pass -xO3, not -O3 to solaris
+       system compiler.
+
+2009-03-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (mips, powerpc): Provide assembly-free umul_ppmm for newer
+       gcc.
+
+2009-02-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_2.c: Remove code for testing and timing.  Update
+       to current FSF header.
+       * mpn/generic/redc_1.c: Update to current FSF header.
+
+2009-01-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/powm.c (redc): Remove.
+       (mpz_powm): Use mpn_redc_1 instead of redc.
+
+       * tests/mpz/t-powm.c: Rewrite reference code.
+
+2009-01-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz: Increase reps for many tests.
+
+       * mpn/generic/rootrem.c (mpn_rootrem_internal): Use MPN_DECR_U instead of
+       mpn_sub_1 (works around gcc 4.3 bugs and is also faster).
+
+2009-01-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/tests.h: Declare refmpn_divrem_2.
+
+2009-01-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/perfpow.c: Add TMP_FREE before every return statement.
+
+       * mpn/generic/rootrem.c (mpn_rootrem_internal): Add a missing TMP_FREE.
+
+       * configure.in (gcc_cflags, gcc_64_cflags): Revert from -O3 to -O2,
+       the change was accidental and cause too much miscompilation.
+
+2009-01-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mod_1): Run MOD_1_x_THRESHOLD tests also when
+       longlong.h specified UDIV_PREINV_ALWAYS.
+
+       * mpn/generic/mod_1.c (mpn_mod_1): Properly check for normalisation
+       divisor.
+
+2009-01-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mod_1): Tune for MOD_1_1_THRESHOLD,
+       MOD_1_2_THRESHOLD, and MOD_1_4_THRESHOLD.
+
+       * mpn/generic/mod_1.c: Rewrite.
+       * mpn/generic/mod_1_1.c: New file.
+       * mpn/generic/mod_1_2.c: New file.
+       * mpn/generic/mod_1_3.c: New file.
+       * mpn/generic/mod_1_4.c: New file.
+       * configure.in (gmp_mpn_functions): Add mod_1_*.
+       * mpn/asm-defs.m4 (define_mpn): Add mod_1_*.
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Add mod_1_*.c.
+       * gmp-impl.h: Declare new mpn_mod_1s_* functions and associated
+       THRESHOLD macros.
+       (udiv_rnd_preinv): New macro.
+
+2009-01-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_gcd_dc,tune_gcdext_dc): Lower step_factor to 0.1.
+
+2009-01-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-nextprime.c: New test file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-nextprime.
+
+       From Niels Möller:
+       * mpz/nextprime.c: Handle large prime gaps by limiting incr.
+
+2009-01-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/and.c, mpz/ior.c, mpz/xor.c: Re-read only necessary source
+       pointers after reallocation.  Misc cleanup.
+
+       * gmp-impl.h (MPN_TOOM44_MAX_N): New define, replaces MPN_TOOM3_MAX_N.
+
+       * mpn/x86/fat/diveby3.c: New file.
+
+2008-12-30  Niels Möller  <nisse@lysator.liu.se>
+
+       * doc/gmp.texi (Greatest Common Divisor Algorithms): Updated
+       section on GCD algorithms.
+
+2008-12-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/gmp.texi (Multiplication Algorithms): Add descriptions of Toom-4
+       and unbalanced multiplication.
+       (Radix to Binary): Add warning that text is outdated,
+       (Contributors): Fix typos.
+
+       * mpn/generic/toom*.c: Use coherent MAYBE_ macros for trimming
+       unreachable recursive functions.
+       * gmp-impl.h: Update toom itch functions.
+
+       * mpn/x86_64/sqr_basecase.asm: Slightly increase stack allocation, to
+       placate tuneup.
+
+2008-12-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/pentium4/aors_n.asm: Tune prologue code.
+
+       * mpn/x86_64/pentium4/aorslsh1_n.asm: New file.
+
+       * mpn/x86_64/darwin.m4: Define symbol "DARWIN".
+       * mpn/x86_64/invert_limb.asm: Work around darwin quirks.
+
+       * mpn/x86_64/sqr_basecase.asm: Further optimize, support Darwin.
+
+       * mpn/x86_64/invert_limb.asm: New file.
+
+2008-12-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/aorslsh1_n.asm: New file.
+
+2008-12-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/perfpow.c: Handle negative arguments properly.
+       * tests/mpz/t-perfpow.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-perfpow.
+
+2008-12-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-mul.c (dump_abort): Improve error message.
+
+       * gcd.c gcd_subdiv_step.c gcdext.c gcdext_subdiv_step.c:
+       Remove private mpn_zero_p.
+
+       * tune/tuneup.c (tune_mul): Tune for MUL_TOOM44_THRESHOLD.
+       (tune_sqr): Tune for SQR_TOOM4_THRESHOLD.
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add toom44_mul.c and
+       toom4_sqr.c.
+
+       * configure.in (gmp_mpn_functions): Toom function updates.
+
+       * Rename mpn/mul_toomMN.c to mpn/toomMN_mul.c.  Function names changed
+       accordingly.
+
+       * mpn/toomMN_mul.c: Add scratch parameter.  Do recursive multiplies
+       properly.  Misc tuning.  Remove CHECK and TIMING code.
+
+       * mpn/toom2_sqr.c, mpn/toom3_sqr.c, mpn/toom4_sqr.c: New files.
+
+       * gmp-impl.h (mpn_toomMN_mul_itch): Several new functions.
+       (mpn_zero_p): New functions.
+       Add various TOOM4/TOOM44 related parameters.
+       Update mpn_toomMN_mul prototypes.
+
+       * mpn/generic/mul_n.c (mpn_mul_n): Call mpn_toom44_mul.  Use TMP_BALLOC
+       instead of malloc.
+       (mpn_sqr_n): Analogous changes.
+
+       * mpn/generic/mul.c: Update unbalanced toom code to pass scratch space.
+
+2008-12-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/nextprime.c: Add TMP_SDECL/MARK/FREE.
+
+2008-12-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/sqrtrem.c (mpn_sqrtrem1): Rewrite, improve interface.
+       (invsqrttab): New table, remove table approx_tab.
+       (mpn_sqrtrem2): Optimize, update mpn_sqrtrem1 call.
+       (mpn_sqrtrem): Update mpn_sqrtrem1 call.
+
+2008-12-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/nextprime.c: Run 10 mpz_millerrabin tests (was 5).
+       Give credit to authors.
+
+       * mpn/x86_64/redc_1.asm: Align stack as mandated by ABI.
+
+       * mpn/x86_64/divrem_2.asm: Add some comments.
+
+       * mpn/x86_64/darwin.m4: New file.
+       * configure.in: Use x86_64/darwin.m4.
+
+2008-12-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/projects.html: Remove GCD and division projects, update text on
+       multiplication.
+
+       * doc/tasks.html: Add a caution about that the file is somewhat
+       outdated.
+
+2008-12-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/alpha/ev6/aorsmul_1.asm: New file (same code for mpn_addmul_1,
+       much improved for mpn_submul_1).
+       * mpn/alpha/ev6/addmul_1: File removed.
+       * mpn/alpha/ev6/submul_1: File removed.
+
+2008-12-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       From David Harvey:
+       * mpn/x86_64/mul_basecase.asm: Further tweaks for code size and speed.
+
+       * mpn/powerpc64/mode64/divrem_1.asm: Rewrite.
+
+       * mpn/powerpc64/mode64/mul_basecase.asm: New file.
+
+2008-12-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/gmp-mparam.h: New file.
+
+       * gmp-impl.h: Additional cleanups.
+       (mpn_set_str_compute_powtab): New prototype.
+       (mpn_powm, mpn_powlo): New prototypes.
+
+       * mpz/pow_ui.c: Handle some small exponents locally.
+
+2008-12-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/set_str.c: Remove prototypes (they are in gmp-impl.h).
+
+       * tune/set_strs.c, tune/set_strb.c: Make prototypes effective by moving
+       the #define mpn_set_str* before including gmp-impl.h.
+
+       * All files: Change _PROTO => __GMP_PROTO.
+
+       * tune/speed.c (routine): Remove non-working choice mpn_set_str_subquad.
+       * tune/common.c (speed_mpn_dc_set_str): Remove, it is broken.
+
+       * mpn/generic/toom_interpolate_7pts.c (divexact_2exp): Make this static,
+       and inline it.
+
+       * gmp-impl.h: Major cleanup.
+       (Remove formal parameter names.  Use __GMP_PROTO consistently.  Move
+       __GMP_PROTO and __MPN use to adjacent lines for declared function.
+       Fix typos.  Remove code inside #if 0.)
+
+       * configure.in (gmp_mpn_functions): Add mul_toom33.  Reformat.
+
+2008-12-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_1.c: New file.
+       * mpn/generic/redc_2.c: New file.
+
+       * configure.in (gmp_mpn_functions): List redc_1 and redc_2.
+       (HAVE_NATIVE): Likewise.
+
+       * tune/common.c (speed_mpn_redc_1): Renamed from speed_redc.
+       * tune/speed.c (routine): Remove "redc", and "mpn_redc_1".
+       * tune/speed.h (SPEED_ROUTINE_REDC_1): Renamed from SPEED_ROUTINE_REDC.
+       Updated call.
+       * tune/tuneup.c (tune_powm): Update redc call.
+
+2008-12-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/sqr_basecase.asm: Inline a combined diagonal product code
+       and addlsh1 loop.  Misc cleanup.
+
+2008-12-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/sqr_basecase.asm: New file.
+
+2008-11-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/sqr_basecase.c: Fix typo in mpn_addmul_2s variant.
+
+2008-11-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/redc_1.asm: Rewrite.
+
+2008-11-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/refmpn.c (refmpn_redc_1): New function.
+
+2008-11-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/aorsmul_1.asm: Actually handle mpn_submul_1.
+
+2008-11-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/divrem_1.asm: Rewrite.
+
+       * alpha/divrem_2.asm: New file.
+       * powerpc32/divrem_2.asm: New file.
+       * powerpc64/mode64/divrem_2.asm: New file.
+       * x86/divrem_2.asm: New file.
+       * x86_64/divrem_2.asm: New file.
+       * tests/refmpn.c (refmpn_divrem_2): New function.
+
+2008-11-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/mul_1.asm: Rewrite for smaller size and better speed.
+       * mpn/x86/k7/aorsmul_1.asm: Likewise.
+
+       * acinclude.m4 (GMP_VERSION): Include last component even when zero.
+
+2008-11-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/README: Rewrite.
+
+       * tests/devel/try.c (malloc_region, mprotect_maybe): Add casts for
+       printf type correctness.
+
+       * gmp-h.in (__GNU_MP_VERSION_MINOR): Bump.
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
+       Bump version info.
+
+2008-11-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h: Rename modlimb_invert to binvert_limb.
+       * tune/speed.h: Likewise.
+       * tune/modlinv.c: Likewise.
+       * tune/common.c: Likewise.
+       * tests/t-modlinv.c: Likewise.
+       * tests/t-constants.c: Likewise.
+       * mpn/sparc64/mode1o.c: Likewise.
+       * mpn/alpha/dive_1.c: Likewise.
+       * mpn/sparc64/dive_1.c: Likewise.
+       * mpn/generic/mode1o.c: Likewise.
+       * mpn/generic/dive_1.c: Likewise.
+       * mpn/generic/bdivmod.c: Likewise.
+       * mpn/alpha/mode1o.asm: Likewise.
+       * mpn/asm-defs.m4: Likewise.
+       * mpn/ia64/mode1o.asm: Likewise.
+       * mpn/powerpc32/README: Likewise.
+       * mpn/powerpc32/mode1o.asm: Likewise.
+       * mpn/powerpc64/mode64/dive_1.asm: Likewise.
+       * mpn/powerpc64/mode64/mode1o.asm: Likewise.
+       * mpn/x86/dive_1.asm: Likewise.
+       * mpn/x86/k6/mmx/dive_1.asm: Likewise.
+       * mpn/x86/k6/mode1o.asm: Likewise.
+       * mpn/x86/k7/dive_1.asm: Likewise.
+       * mpn/x86/k7/mode1o.asm: Likewise.
+       * mpn/x86/p6/dive_1.asm: Likewise.
+       * mpn/x86/p6/mode1o.asm: Likewise.
+       * mpn/x86/pentium/dive_1.asm: Likewise.
+       * mpn/x86/pentium/mode1o.asm: Likewise.
+       * mpn/x86/pentium4/sse2/dive_1.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mode1o.asm: Likewise.
+       * mpn/x86_64/dive_1.asm: Likewise.
+       * mpn/x86_64/mode1o.asm: Likewise.
+
+       * mpn/x86_64/aors_n.asm: Replace with slightly faster, more alignment
+       neutral loop.
+
+2008-11-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Remove gcd_finda related declarations.
+       * gmp-impl.h (mpn_gcd_finda): Remove declaration.
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Remove gcd_finda.
+       * mpn/asm-defs.m4: Remove define_mpn(gcd_finda).
+       * mpn/x86/k6/gcd_finda.asm: Remove file.
+       * tests/devel/try.c (param_init): Remove mpn_gcd_finda.
+       (choice_array): Remove mpn_gcd_finda.
+       * tests/mpn/t-instrument.c (check): Remove testing of mpn_gcd_finda.
+       * tests/refmpn.c (refmpn_gcd_finda): Remove.
+       * tests/tests.h (refmpn_gcd_finda): Remove declaration.
+       * tune/common.c (speed_mpn_gcd_finda): Remove.
+       * tune/gcd_finda_gen.c: Remove file.
+       * tune/speed.h (speed_mpn_gcd_finda): Remove declaration.
+       * tune/speed.c (routine): Remove mpn_gcd_finda entry.
+
+       * tests/mpz/t-powm.c: Print test number when failing a test.
+
+       * mpn/x86_64/redc_1.asm (CALL): Move from here...
+       * mpn/x86_64/x86_64-defs.m4: ...to here.
+
+       * gmp-impl.h (mpn_jacobi_base): Remove parameter names.
+
+2008-11-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpf/t-conv.c: Add some specific tests, supplementing the random
+       tests.
+
+2008-11-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpf/set_str.c: Default 'base' before letting exp_base inherit it.
+
+       * tests/cxx/t-prec.cc: Use the right precision for all float constants.
+
+2008-11-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/gmp.texi (Float Comparison): Update mpf_eq documentation.
+
+       * mpf/eq.c: Compare the right number of bits.
+
+2008-11-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       Undo, it made testing too slow:
+       * tests/mpz/t-mul.c: Use slower geometric progression for operand
+       sizes.
+
+       * mpn/x86/k7/mod_34lsub1.asm: Use movzb for masking low 8 bits.
+
+2008-10-31  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd2.c (div1): New function (taken from old gcdext
+       implementation)
+       (mpn_hgcd2): Use single precision for the second half of the work.
+
+2008-10-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/p6/sse2/gmp-mparam.h: New file.
+
+2008-10-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (x86 fat_path): Add "x86/p6/sse2".
+
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec_init): Recognize sse2 capable p6
+       (pentiumm, core2).
+
+       * mpn/x86/p6/sse2/mul_1.asm: New file.
+       * mpn/x86/p6/sse2/addmul_1.asm: New file.
+       * mpn/x86/p6/sse2/submul_1.asm: New file.
+       * mpn/x86/p6/sse2/mul_basecase.asm: New file.
+       * mpn/x86/p6/sse2/sqr_basecase.asm: New file.
+       * mpn/x86/p6/sse2/popcount.asm: New file.
+
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec_init): Handle "extended" fields for
+       model and family.
+
+2008-10-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       From Mickael Gastineau:
+       * gmp-h.in (gmp_urandomm_ui, gmp_urandomb_ui): Add __GMP_DECLSPEC.
+
+2008-10-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (mpn_gcdext_1): Remove bogus __GMP_ATTRIBUTE_PURE.
+
+2008-10-27  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/common.c (speed_mpn_hgcd): Call mpn_hgcd_matrix_init once
+       for each call to mpn_hgcd.
+       (speed_mpn_hgcd_lehmer): Likewise.
+
+2008-10-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Point to p6/sse2 for pentiumm and core2.
+
+       * gmp-impl.h (mpn_add_nc, mpn_sub_nc): Move these macros to after fat
+       definitions.
+
+       * tune/common.c, tune/speed.c, tune/speed.h:
+       Add speed measurement of mpn_bdiv_dbm1c.
+
+2008-10-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/gmp-mparam.h (MUL_FFT_TABLE2, SQR_FFT_TABLE2): Extend.
+
+       * mpz/nextprime.c: Move declarations to function beginning.
+
+2008-10-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (DECL_gcdext_1): Deleted.
+
+2008-10-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/aors_n.asm: New file.
+       * mpn/x86_64/atom/gmp-mparam.h: New file.
+
+2008-10-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       With Neils Möller:
+       * mpz/nextprime.c: Rewrite.
+
+       * tests/devel/try.c (main): Use strtol for 's' and 'S' optargs.
+
+       * mpn/x86_64/pentium4/rshift.asm: Misc cleanups.
+       * mpn/x86_64/pentium4/lshift.asm: Likewise.
+
+       * mpn/x86_64/pentium4/aors_n.asm: Use fewer registers.
+
+       * configure.in: Set up specific path for x86_64/atom.
+
+2008-10-21  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Removed
+       qstack.c.
+       * mpn/generic/qstack.c: Deleted obsolete file.
+
+2008-10-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/aorsmul_1.asm: New file.
+
+2008-10-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aors_n.asm: Remove redundant MULFUNC_PROLOGUE.
+
+       * gmp-impl.h (popc_limb): Remove redundant checks of GMP_LIMB_BITS
+       inside several of these macros.
+
+2008-10-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-mul.c: Use slower geometric progression for operand
+       sizes.  Do every other tests for same size operands.
+
+2008-10-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mul_basecase.asm: Simplify addressing in epilogue.
+
+       * mpn/mips64/divrem_1.asm: Remove file, it is n32-only, and uses an old
+       algorithm.
+
+       * config.guess, config.sub, configure.in: Support Intel Atom processor.
+
+2008-10-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpq/mul.c: Fix typo in last change.
+
+2008-10-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/refmpn.c (refmpn_sb_divrem_mn): Work around a gcc bug.
+
+2008-10-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpq/mul.c: Use TMP_ALLOC.  Cleanup.
+       * mpq/div.c: Likewise.
+
+       * mpn/x86_64/mul_basecase.asm: Use lea directly for loading entry point
+       addresses.
+
+2008-10-09  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86/k7/gmp-mparam.h: Updated GCD-related values.
+
+2008-10-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft_internal): Do store
+       mpn_fft_norm_modF return value, if (rec).
+
+2008-10-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aorsmul_1.asm: Replace with faster code.
+       * mpn/x86_64/mul_1.asm: Likewise.
+       * mpn/x86_64/addmul_2.asm: Likewise.
+       * mpn/x86_64/mul_2.asm: Likewise.
+       * mpn/x86_64/mul_basecase.asm: Likewise.
+
+2008-10-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/minithres/gmp-mparam.h: Update FFT values.
+
+2008-10-02  Niels Möller  <nisse@lysator.liu.se>
+
+       * hgcd.c (mpn_hgcd_matrix_mul): Fixed normalization bug.
+
+2008-09-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Handle --enable-minithres.
+       * mpn/minithres/gmp-mparam.h: Update all values.
+
+2008-09-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.c (routine): New entry for mpn_mul.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MUL): Renamed from
+       SPEED_ROUTINE_MPN_MUL_BASECASE.
+       (speed_mpn_mul): Renamed from speed_mpn_mul_basecase.
+       (SPEED_ROUTINE_MPN_MUL): Allocate our own memory of xp operand.
+
+       * tune/common.c: Corresponding changes.
+
+2008-09-22  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (hgcd_mul_matrix_vector): New function,
+       replaces addmul2_n. Needs less copying.
+       (mpn_gcdext): Use hgcd_mul_matrix_vector. Updated for interface
+       change in mpn_gcdext_subdiv_step
+
+       * mpn/generic/hgcd.c (hgcd_matrix_mul_1): Rewritten to use
+       mpn_hgcd_mul_matrix1_vector.
+       (hgcd_step): Updated for interface change in
+       mpn_hgcd_mul_matrix1_inverse_vector.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Updated for
+       interface changes in mpn_hgcd_mul_matrix1_vector,
+       mpn_hgcd_mul_matrix1_inverse_vector and mpn_gcdext_subdiv_step.
+
+       * mpn/generic/gcd_lehmer.c (mpn_gcd_lehmer_n): Updated for
+       interface change in mpn_hgcd_mul_matrix1_inverse_vector.
+
+       * mpn/generic/gcdext_subdiv_step.c (mpn_gcdext_subdiv_step): Use
+       separate scratch arguments for the quotient and for the cofactor
+       update.
+
+       * mpn/generic/hgcd2.c (mpn_hgcd_mul_matrix1_vector): Interface
+       change. Store first element in rp and leave ap unmodified. No
+       additional scratch space or copying needed. Callers that require
+       modification in place still need to copy one of the inputs.
+       (mpn_hgcd_mul_matrix1_inverse_vector): Likewise.
+
+2008-09-22  Niels Möller <nisse@lysator.liu.se>  <nisse@king.swox.se>
+
+       * mpn/generic/hgcd.c (hgcd_matrix_mul_1): Use mpn_addaddmul_1msb0.
+       * mpn/generic/hgcd2.c (mpn_hgcd_mul_matrix1_vector): Likewise.
+
+       * mpn/generic/gcd.c: Use libspeed for timing measurements.
+
+       * gmp-impl.h: Declare mpn_addaddmul_1msb0.
+       * mpn/asm-defs.m4: Added addaddmul_1msb0.
+       * mpn/x86_64/addaddmul_1msb0.asm: New file.
+       * configure.in (gmp_mpn_functions_optional): Added
+       addaddmul_1msb0.
+       (HAVE_NATIVE): List addaddmul_1msb0.
+
+2008-09-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/get_str.c (GET_STR_DC_THRESHOLD): Remove default.
+       (GET_STR_PRECOMPUTE_THRESHOLD): Likewise.
+       Misc code cleanups.
+
+       * gmp-impl.h (mpn_dc_set_str_itch): Allocate GMP_LIMB_BITS more limbs.
+
+       Revert:
+       * mpn/generic/set_str.c:
+       (mpn_dc_set_str): Remove impossible case, replace by an ASSERT.
+
+2008-09-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/alpha/ev6/gmp-mparam.h (DIVEXACT_BY3_METHOD): Define.
+
+       * mpn/ia64/diveby3.asm: Remove.
+       * mpn/x86/diveby3.asm: Remove.
+       * mpn/x86/k6/diveby3.asm: Remove.
+       * mpn/x86/k7/diveby3.asm: Remove.
+       * mpn/x86/p6/diveby3.asm: Remove.
+       * mpn/x86/pentium/diveby3.asm: Remove.
+       * mpn/x86_64/diveby3.asm: Remove.
+       * mpn/x86/pentium4/sse2/diveby3.asm: Remove.
+
+       * configure.in (HAVE_NATIVE): List divexact_by3c.
+
+       * gmp-impl.h (mpn_divexact_by3c): Override gmp-h.in's definition.
+       (DIVEXACT_BY3_METHOD): Don't default to 0 if
+       HAVE_NATIVE_mpn_divexact_by3c.
+
+2008-09-18  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcd.c (main): Added code for tuning of CHOOSE_P.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_matrix_mul): Assert that inputs are
+       normalized.
+
+2008-09-17  Niels Möller <nisse@lysator.liu.se>  <nisse@king.swox.se>
+
+       * mpn/generic/gcdext.c (mpn_gcdext): p = n/5 caused a
+       slowdown for large inputs. As a compromise, use p = n/2 for the
+       first iteration, and p = n/3 for the rest. Handle the first
+       iteration specially, since the initial u0 and u1 are trivial.
+
+       * mpn/x86_64/gmp-mparam.h (GCDEXT_DC_THRESHOLD): Reduced threshold
+       from 409 to 390.
+
+       * mpn/generic/gcdext.c (CHOOSE_P): New macro. Use p = n/5.
+       (mpn_gcdext): Use CHOOSE_P, and generalized the calculation of
+       scratch space.
+
+       * tune/tuneup.c (tune_hgcd): Use default step factor.
+
+       * mpn/x86_64/gmp-mparam.h: (GCD_DC_THRESHOLD): Reduced from 493 to
+       412.
+
+       * mpn/generic/gcd.c (CHOOSE_P): New macro, to determine the
+       split when calling hgcd. Use p = 2n/3, as that seems better than
+       the more obvious split p = n/2.
+       (mpn_gcd): Use CHOOSE_P, and generalized the calculation of
+       scratch space.
+
+2008-09-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom_interpolate_7pts.c: Use new mpn_divexact_byN
+       functions.
+
+       * gmp-impl.h (mpn_divexact_by3, mpn_divexact_by5, mpn_divexact_by7,
+       mpn_divexact_by9, mpn_divexact_by11, mpn_divexact_by13,
+       mpn_divexact_by15): New macros, defined in terms of mpn_bdiv_dbm1.
+
+       * configure.in (gmp_mpn_functions): List bdiv_dbm1c.
+       (HAVE_NATIVE): Likewise.
+       * mpn/asm-defs.m4: Define bdiv_dbm1c.
+       * gmp-impl.h (mpn_bdiv_dbm1c): Declare.
+       (mpn_bdiv_dbm1): New macro.
+       * mpn/generic/bdiv_dbm1c.c: New file.
+       * mpn/alpha/bdiv_dbm1c.asm: New file.
+       * mpn/ia64/bdiv_dbm1c.asm: New file.
+       * mpn/powerpc32/bdiv_dbm1c.asm: New file.
+       * mpn/powerpc64/mode64/bdiv_dbm1c.asm: New file.
+       * mpn/x86/bdiv_dbm1c.asm: New file.
+       * mpn/x86_64/bdiv_dbm1c.asm: New file.
+
+       * mpn/generic/diveby3.c: Add mpn_bdiv_dbm1c based function.
+       Choose function depending on DIVEXACT_BY3_METHOD.
+       * gmp-impl.h (DIVEXACT_BY3_METHOD): Provide default.
+
+2008-09-16  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd_addmul2_n): Moved function to
+       gcdext.c, where it is used.
+       * mpn/generic/gcdext.c (addmul2_n): Moved and renamed, was
+       mpn_hgcd_addmul2_n. Made static. Deleted input normalization.
+       Deleted rn argument.
+       (mpn_gcdext): Updated calls to addmul2_n, and added assertions.
+
+       * gmp-impl.h (MPN_HGCD_MATRIX_INIT_ITCH): Increased storage by four limbs.
+       (MPN_HGCD_LEHMER_ITCH): Reduced storage by one limb.
+       (MPN_GCD_SUBDIV_STEP_ITCH): Likewise.
+       (MPN_GCD_LEHMER_N_ITCH): Likewise.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_matrix_init): Use two extra limbs.
+       (hgcd_step): Use overlapping arguments to mpn_tdiv_qr.
+       (mpn_hgcd_matrix_mul): Deleted normalization code. Tigher bounds
+       for the element size of the product. Needs two extra limbs of
+       storage for the elements.
+       (mpn_hgcd_itch): Updated storage calculation.
+
+       * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): Use
+       overlapping arguments to mpn_tdiv_qr. Use mpn_zero_p.
+
+       * mpn/generic/gcd.c (mpn_gcd): Use mpn_zero_p.
+
+2008-09-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd_matrix_init): Updated for deleted
+       tp pointer.
+       (hgcd_matrix_update_q): Likewise.
+       (mpn_hgcd_matrix_mul): Likewise.
+       (mpn_hgcd_itch): Updated calculation of scratch space.
+
+       * gmp-impl.h (struct hgcd_matrix): Deleted tp pointer.
+       (MPN_HGCD_MATRIX_INIT_ITCH): Reduced storage.
+       (mpn_hgcd_step, MPN_HGCD_STEP_ITCH): Deleted declarations.
+
+2008-09-15  Niels Möller <nisse@lysator.liu.se>  <nisse@king.swox.se>
+
+       * mpn/x86_64/gmp-mparam.h (MATRIX22_STRASSEN_THRESHOLD): New
+       threshold.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_matrix_mul): Use mpn_matrix22_mul.
+       (mpn_hgcd_itch): Updated calculation of scratch space. Use
+       count_leading_zeros to get the recursion depth.
+
+       * mpn/generic/gcd.c (mpn_gcd): Fixed calculation of scratch space,
+       and use mpn_hgcd_itch.
+
+2008-09-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (tune_matrix22_mul): New function.
+       (all): Use it.
+
+       * tune/common.c (speed_mpn_matrix22_mul): New function.
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Added matrix22_mul.c.
+
+       * tests/mpn/t-matrix22.c: Use MATRIX22_STRASSEN_THRESHOLD to
+       select sizes for tests.
+
+       * gmp-impl.h (MATRIX22_STRASSEN_THRESHOLD): New threshold
+
+       * configure.in (gmp_mpn_functions): Added matrix22_mul.
+       * gmp-impl.h: Added declarations for mpn_matrix22_mul and related
+       functions.
+
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Added
+       matrix22_mul.c.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-matrix22.
+
+       * tests/mpn/t-matrix22.c: New file.
+       * mpn/generic/matrix22_mul.c: New file.
+
+2008-09-11  Niels Möller  <nisse@king.swox.se>
+
+       * tune/tuneup.c: Updated tuning of gcdext.
+
+       * mpn/x86_64/gmp-mparam.h (GCDEXT_DC_THRESHOLD): Reduced threshold
+       from 713 to 409.
+
+2008-09-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h: Updated for gcdext changes.
+       (GCDEXT_DC_THRESHOLD): New constant, renamed from
+       GCDEXT_SCHOENHAGE_THRESHOLD.
+
+       * mpn/generic/gcdext.c (compute_v): Accept non-normalized a and b
+       as inputs.
+       (mpn_gcdext): Rewrote and simplified. Now uses the new mpn_hgcd
+       interface.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_addmul2_n): Renamed from addmul2_n
+       and made non-static. Changed interface to take non-normalized
+       inputs, and only two size arguments.
+       (mpn_hgcd_matrix_mul): Simplified using new mpn_hgcd_addmul2_n.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_itch): Deleted
+       function.
+       (mpn_gcdext_lehmer_n): Renamed from mpn_gcd_lehmer. Now takes
+       inputs of equal size. Moved the code for the division step to a
+       separate function...
+       * mpn/generic/gcdext_subdiv_step.c (mpn_gcdext_subdiv_step): New
+       file, new function.
+
+       * configure.in (gmp_mpn_functions): Added gcdext_subdiv_step.
+
+2008-09-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/devel/anymul_1.c: Include <string.h>.
+
+       * gmp-h.in: Unconditionally include <cstdio>.
+
+2008-09-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/common.c: #if:ed out speed_mpn_gcd_binary and
+       speed_mpn_gcd_accel.
+       * tune/speed.c (routine): #if:ed out mpn_gcd_binary, mpn_gcd_accel
+       and find_a.
+       * tune/Makefile.am (libspeed_la_SOURCES): Removed gcd_bin.c
+       gcd_accel.c gcd_finda_gen.c.
+       * tune/tuneup.c: Enable tuning of GCD_DC_THRESHOLD.
+
+       * mpn/generic/gcd.c (mpn_gcd): Rewrote and simplified. Now uses
+       the new mpn_hgcd interface.
+
+       * */gmp-mparam.h: Renamed GCD_SCHOENHAGE_THRESHOLD to
+       GCD_DC_THRESHOLD.
+
+       * mpn/generic/gcd_lehmer.c (mpn_gcd_lehmer_n): Renamed (was
+       mpn_gcd_lehmer). Now takes inputs of equal size.
+
+       * mpn/generic/gcd_lehmer.c (mpn_gcd_lehmer): Reintroduced gcd_2,
+       to get better performance for small inputs.
+
+       * mpn/generic/hgcd.c: Don't hardcode small HGCD_THRESHOLD.
+       * mpn/x86_64/gmp-mparam.h (HGCD_THRESHOLD): Reduced from 145 to
+       120.
+       * */gmp-mparam.h: Renamed HGCD_SCHOENHAGE_THRESHOLD to
+       HGCD_THRESHOLD.
+
+2008-09-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/gmp.texi: Fix a typo and clarify mpn_gcdext docs.
+
+2008-09-09  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer): Adapted
+       to new hgcd interface.
+
+       * gmp-impl.h (MPN_HGCD_LEHMER_ITCH): New macro.
+
+       * hgcd.c (mpn_hgcd_lehmer): Renamed function, from hgcd_base. Made
+       non-static.
+
+       * gcd_lehmer.c (mpn_gcd_lehmer): Use hgcd2 also for n == 2.
+
+       * gcdext_lehmer.c (mpn_gcdext_lehmer): Simplified code for
+       division step. Added proper book-keeping of swaps, which affect
+       the sign of the returned cofactor.
+
+       * tests/mpz/t-gcd.c (one_test): Display co-factor when mpn_gcdext
+       fails.
+
+       * gcd_lehmer.c (mpn_gcd_lehmer): At end of loop, need to handle
+       the special case n == 1 correctly.
+
+       * gcd_subdiv_step.c (mpn_gcd_subdiv_step): Simplified function.
+       The special cancellation logic is not needed here.
+
+2008-09-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/invert.c: Add working but slow code.
+
+       * mpn/x86_64/x86_64-defs.m4 (R32, R8): New macros.
+
+       * mpn/ia64/submul_1.asm: Move some labels for broader assembler
+       compatibility.
+
+       * gmp-impl.h (mpn_mul_3, mpn_mul_4): Declare.
+       * tests/tests.h (refmpn_mul_3, refmpn_mul_4): Declare.
+       * tests/try.c (param_init): Set things up for mpn_mul_3 and mpn_mul_4.
+       (choice_array): Likewise.
+       (call): Likewise.
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES):
+       Add mul_3.c and mul_4.
+       * mpn/asm-defs.m4: Define mul_3 and mul_4.
+       * tests/refmpn.c (refmpn_mul_N): New function.
+       (refmpn_mul_2): Remove old definition, call refmpn_mul_N.
+       (refmpn_mul_3, refmpn_mul_4): New functions.
+       * tune/common.c (speed_mpn_mul_3, speed_mpn_mul_4): New functions.
+       * tune/speed.h (speed_mpn_mul_3, speed_mpn_mul_4): Declare.
+       * tune/speed.c (routine): New entries for mpn_mul_2 and mpn_mul_3.
+
+       * ltmain.sh: Update to libtool 1.5.24.
+
+       * mpn/generic/mul_toom22.c: Compute s and t more cleverly.
+
+2008-09-08  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-hgcd.c: Updated tests. Rewrite of hgcd_ref.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_itch): New function.
+       (mpn_gcdext_lehmer): Various bugfixes.
+
+       * gcdext.c (mpn_gcdext): Allocate scratch space for gcdext_lehmer.
+
+       * mpn/generic/gcd_lehmer.c (gcd_2): ASSERT that inputs are odd.
+       (mpn_gcd_lehmer): Added tp argument, for scratch space. Make both
+       arguments odd before calling gcd_2.
+
+       * mpn/generic/hgcd.c (mpn_hgcd): Allow the trivial case n <= 2,
+       and return 0 immediately.
+
+       * gmp-impl.h (MPN_EXTRACT_NUMB): New macro.
+
+       * configure.in (gmp_mpn_functions): Added gcdext_lehmer.
+
+2008-09-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom_interpolate_7pts.c: Use mpn_divexact_by3c instead of
+       divexact_odd.
+
+       * doc/texinfo.tex: Update to 2007-06-29.13.
+
+       * doc/gmp.texi: Update GMP site URL.  Fix some typos.
+
+       * demos/pexpr.c (main): Allow bases up to 62.
+
+       * gmp-impl.h: Remove formal parameter names from function prototypes.
+
+       * config.guess: Recognize recent AMD and Itanium CPUs.
+       Default X86 CPU recognition to configfsf.guess' value.
+
+       * configure.in: Handle core2 separately from athlon64.
+
+2008-09-05  Niels Möller  <nisse@lysator.liu.se>
+
+       * */Makefile.in, configure, aclocal.m4, config.in: Removed files
+       from repository. They're instead generated by automake and
+       autoconf before distribution.
+
+2008-08-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpf/set_str.c: Allocate mantissa space based on mantissa size,
+       not on destination variable space.
+       * mpf/set_str.c: Accept unary plus before exponent.
+
+2008-08-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul_toom22.c: Add statistics gathering functionality,
+       triggered by cpp predef STAT.
+
+       From David Harvey:
+       * mpn/generic/mul_toom22.c: Decrease scratch space usage.
+
+2008-08-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/misc/t-scanf.c: Avoid negative arguments to _ui functions.
+       * tests/misc/t-printf.c: Likewise.
+
+       * acinclude.m4 (X86_PATTERN): Add geode.
+
+       * acinclude.m4 (CL_AS_NOEXECSTACK): Avoid -q flag to grep.
+
+2008-08-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * acinclude.m4 (CL_AS_NOEXECSTACK): New.
+       * configure.in: Use CL_AS_NOEXECSTACK.
+       * mpn/Makeasm.am: Use ASM_FLAGS (defined by CL_AS_NOEXECSTACK).
+
+       * gmpxx.h (__GMP_DBL_LIMBS): Use DBL_MAX_EXP instead of
+       std::numeric_limits<double>::max_exponent for better portability.
+
+2008-07-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmpxx.h (__GMP_DBL_LIMBS): New #define.
+       (__GMP_ULI_LIMBS): New #define.
+       (__GMPXX_TMP_UI): New macro.
+       (__GMPXX_TMP_SI): New macro.
+       (__GMPXX_TMP_D): New macro.
+       (struct __gmp_binary_and): Rewrite, using the new macros.
+       (struct __gmp_binary_ior): Likewise.
+       (struct __gmp_binary_xor): Likewise.
+
+2008-07-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/cxx/t-binary.cc: Add some tests for logical operations.
+
+2008-07-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmpxx.h: Use __GMPZ_* instead of __GMPZZ_* for bitwise ops, remove
+       __GMPZZ_*.
+       Remove repeated #undefs.
+       (__gmp_alloc_cstring): Declare freefunc as extern "C".
+
+2008-07-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GMP_CC): New define, undocumented for now.
+       (__GMP_CFLAGS): Likewise.
+
+2008-07-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/amd64check.c: Fix a printf type clash.
+
+       * mpz/realloc.c: Amend last fix.
+
+       * gmp-h.in: Include <cstdlib> for C++.
+       * gmp-h.in: Handle new gcc 4.3 inline semantics defaults.
+
+       * configfsf.guess: Update to version of 2008-04-14.
+       * configfsf.sub: Update to version of 2008-06-16.
+
+       * configure.in: Separate core2 and athlon64 flags handling.
+
+2008-06-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Recognize pentiumm and AMD geode.
+       * config.sub: Likewise.
+       * configure.in: Likewise.
+
+2008-06-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Disallow odd nails sizes.
+       * configure.in: Inherit default gcc_cflags/gcc_64_cflags everywhere.
+
+2008-05-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/init2.c: Rewrite to avoid internal overflow and to detect mpz_t
+       overflow.
+       * mpz/realloc2.c: Likewise.
+       * mpz/realloc.c: Detect mpz_t overflow.
+
+2008-05-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (sparc): Remove -fast, it causes documented
+       miscompilation.
+
+       * config.guess: Properly handle the "extended" variants of x86 cpuid.
+
+2008-05-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (mpn_mul_fft): Now void.
+       (udiv_qrnnd_preinv3): Special case for constant (nl).
+
+2008-05-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mul_fft.c: Clean up types in TRACE (printf (...)).
+       (TRACE): Redefine to allow command line control.
+       (mpn_mul_fft_internal): Now void, remove return value.
+       (mpn_mul_fft): Likewise.
+       (MPN_FFT_TABLE2_SIZE): Up size fro 256 to 512.
+       (mpn_fft_fft): Call mpn_fft_mul_2exp_modF just once instead of twice,
+       then add/subtract result.  Get rid of temp allocation as a result.
+       Remove some redundant CNST_LIMB.
+       (mpn_fft_fftinv): Analogous changes.
+       (mpn_fft_sub_modF): Re-enable, now needed by mpn_fft_fft and
+       mpn_fft_fftinv.
+
+2008-03-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-mul.c (main): Let GMP_CHECK_FFT mean largest allowed
+       power-of-2 of test operands.
+
+2008-02-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/cxx/t-binary.cc (check_mpz): Expect floor rounding for right
+       shift.
+
+2008-02-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/mul_i.h: Check sml's size (not the signed small_mult).
+
+       * longlong.h (umul_ppmm) [alpha]: Define using __builtin_alpha_umulh
+       when possible.
+
+       * longlong.h (count_trailing_zeros): Force destination register mode.
+
+       * gmpxx.h (struct __gmp_binary_rshift): Use floor rounding, not
+       truncation.
+
+       * gmpxx.h (__gmp_binary_and, __gmp_binary_ior, __gmp_binary_xor):
+       Add variants with unsigned long int argument.
+
+       * config.sub: Recog geode.
+       * config.guess: Likewise.
+       * acinclude.m4 (X86_PATTERN): Likewise.
+
+2008-02-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/p6/aors_n.asm: Use Zdisp to work around GNU as bug.
+       * mpn/x86/x86-defs.m4 (Zdisp): Add more instructions.
+
+2008-02-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aors_n.asm: New file.
+       * mpn/x86_64/add_n.asm: Delete.
+       * mpn/x86_64/sub_n.asm: Delete.
+
+2008-02-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k6/mmx/dive_1.asm: Fix typo in last change.
+
+2007-12-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/set_str.c (mpf_set_str): Write own code for converting the
+       exponent, avoids strtol base < 36 limitation.
+
+2007-10-28  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (mpn_dc_get_str_itch): New macro.
+       (mpn_dc_get_str_powtab_alloc): New macro.
+       (struct powers): Add field "shift".
+
+       * mpn/generic/get_str.c: Compute powers without low zero limbs; all
+       functions modified.  Correct temporary allocation.  Misc cleanups.
+
+       * mpn/generic/set_str.c: Compute powers without low zero limbs; all
+       functions modified.
+       (mpn_dc_set_str): Remove impossible case, replace by an ASSERT.
+
+2007-10-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/set_str.c: Remove default thresholds, not in gmp-impl.h.
+       (mpn_dc_set_str): Insert ASSERT_ALWAYS in a presumably dead code arm.
+
+2007-10-22  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (mpn_add_nc): Define as inline function, unless NATIVE.
+       (mpn_sub_nc): Likewise.
+
+2007-10-17  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/misc/t-printf.c: Fix a printf type clash.
+       * tests/mpq/t-get_str.c: Likewise.
+       * tests/mpz/t-import.c: Likewise.
+
+       * acinclude.m4: Conditionally disable some tests when compiled by a C++
+       compiler.
+
+       * gmp-impl.h (udiv_qrnnd_preinv3): Remove an unused variable.
+
+       * mpn/generic/hgcd.c: Add some WANT_ASSERTs to shut up warnings.
+
+2007-10-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/elf.m4 (LEAL): Define as an alias for LEA.
+       * mpn/powerpc32/darwin.m4 (LEAL): Likewise.
+       * mpn/powerpc64/aix.m4: Likewise.
+
+       * mpn/powerpc64/vmx/popcount.asm: Use LEAL.
+
+       * mpn/powerpc64/darwin.m4 (LEAL): New name for LEA, since it is only
+       usable for local symbols.
+       (LEA): Replace with code for external references.
+
+       * mpn/powerpc32/vmx/mod_34lsub1.asm: Use LEAL.
+
+2007-10-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/dive_1.asm: Use LEA, remove explicit movl_eip_*.
+       * mpn/x86/k6/mode1o.asm: Likewise.
+       * mpn/x86/k6/mmx/dive_1.asm: Likewise.
+       * mpn/x86/k7/dive_1.asm: Likewise.
+       * mpn/x86/k7/mode1o.asm: Likewise.
+       * mpn/x86/p6/dive_1.asm: Likewise.
+       * mpn/x86/p6/mode1o.asm: Likewise.
+       * mpn/x86/pentium4/sse2/dive_1.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mode1o.asm: Likewise.
+       * mpn/x86/pentium4/sse2/popcount.asm: Likewise.
+
+       * mpn/x86/p6/aors_n.asm: Table cycle counts.
+
+       * mpn/x86/k7/mod_34lsub1.asm: Fix over-optimistic cycle count claims.
+
+       * mpn/x86/x86-defs.m4 (DEF_OBJECT, END_OBJECT): New define's.
+
+       * mpn/x86/darwin.m4 (LEA): Put also movl_eip_XX into EPILOGUE_cpu.
+       Expect target register to have prepended %.
+
+       * mpn/x86_64/add_n.asm: Use L() for labels.
+       * mpn/x86_64/addlsh1_n.asm: Likewise.
+       * mpn/x86_64/addmul_2.asm: Likewise.
+       * mpn/x86_64/aorrlsh_n.asm: Likewise.
+       * mpn/x86_64/aorsmul_1.asm: Likewise.
+       * mpn/x86_64/com_n.asm: Likewise.
+       * mpn/x86_64/copyd.asm: Likewise.
+       * mpn/x86_64/copyi.asm: Likewise.
+       * mpn/x86_64/diveby3.asm: Likewise.
+       * mpn/x86_64/logops_n.asm: Likewise.
+       * mpn/x86_64/lshsub_n.asm: Likewise.
+       * mpn/x86_64/mul_1.asm: Likewise.
+       * mpn/x86_64/mul_2.asm: Likewise.
+       * mpn/x86_64/mul_basecase.asm: Likewise.
+       * mpn/x86_64/popham.asm: Likewise.
+       * mpn/x86_64/redc_1.asm: Likewise.
+       * mpn/x86_64/rsh1add_n.asm: Likewise.
+       * mpn/x86_64/rsh1sub_n.asm: Likewise.
+       * mpn/x86_64/rshift.asm: Likewise.
+       * mpn/x86_64/sub_n.asm: Likewise.
+       * mpn/x86_64/sublsh1_n.asm Likewise.
+       * mpn/x86_64/pentium4/aors_n.asm: Likewise.
+       * mpn/x86_64/pentium4/lshift.asm: Likewise.
+       * mpn/x86_64/pentium4/rshift.asm: Likewise.
+
+       * mpn/x86_64/x86_64-defs.m4: New file, defining LEA, DEF_OBJECT, and
+       END_OBJECT.
+
+       * mpn/generic/mul.c: Put TMP_DECL as last decl.
+
+2007-10-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/popcount.asm: New file.
+
+2007-09-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/get_str.c: Cast a char index to int to shut up compilers.
+
+       * mpn/generic/dc_div_qr.c: Pass dummy scratch argument to mpn_invert.
+       * mpn/generic/dc_divappr_q.c: Likewise.
+       * mpn/generic/mu_div_qr.c: Likewise.
+       * mpn/generic/mu_divappr_q.c: Likewise.
+       * mpn/generic/mu_div_q.c: Likewise.
+       * mpn/generic/divexact.c: Likewise.
+
+       * mpn/generic/invert.c: New file, placeholder for now.
+
+2007-09-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/toom_interpolate_5pts.c: New file, contents from
+       mpn/generic/mul_n.c
+       * mpn/generic/mul_n.c (mpn_toom3_interpolate): Function removed.
+
+       * mpn/generic/toom_interpolate_7pts.c: New file.
+
+       * mpn/x86/k7/mmx/popham.asm: Table cycle counts.
+
+       * mpn/x86/k6/README: Update URLs.
+
+       * mpn/powerpc32/README: Update URL's, company names.
+
+       * mpn/generic/get_d.c: Complete rewrite.
+
+       * mpn/generic/mul_toom33.c: New file.
+
+       * mpn/generic/mul_toom22.c: Make orthogonal with other toomXY files.
+       * mpn/generic/mul_toom32.c: Likewise.
+       * mpn/generic/mul_toom42.c: Likewise.
+
+       * mpn/alpha/invert_limb.asm: Update cycle counts.  Fix a comment typo.
+
+       * mpf/get_str.c: Include stdlib.h, not stdio.h for NULL.
+
+       * doc/gmp.texi: Fix a typo.
+
+       * memory.c (__gmp_default_allocate, __gmp_default_reallocate):
+       Cast size operands in error fprintf's.
+
+       * longlong.h (sub_ddmmss) [powerpc 64]: Add more variants for constant
+       args.
+
+       * gmp-impl.h (udiv_qrnnd_preinv3): New define.
+       * gmp-impl.h (ULONG_PARITY): Exclude masquerading __INTEL_COMPILER from
+       ia64 asm.
+
+       * gmp-h.in (mpn_neg_n): New function.
+
+2007-09-18  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (main): Add -v option.
+       (enum op_t): New tag TIMING.
+       (mpz_eval_expr): Execute TIMING.
+       (fns): Add TIMING entry.
+
+       * gmp-impl.h: Add decls and THRESHOLDs for new toom multiplication
+       functions and division functions.
+
+2007-09-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/addlsh1_n.asm: Use L() for labels.
+       * mpn/powerpc32/sublsh1_n.asm: Likewise.
+
+2007-09-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/x86-defs.m4 (LEA): New define.
+       * mpn/x86/darwin.m4: New file, for now just defining LEA.
+       * configure.in: Pick up x86/darwin.m4.
+       * mpn/x86/*: Use LEA for PIC references.
+
+       * configure.in: For X86/32, treat core2 like pentium3.
+
+2007-09-06  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/amd64check.c (calling_conventions_values): Put constants,
+       dynamic values in this array (was in scalars).
+       (calling_conventions_check): Corresponding changes.
+       * tests/amd64call.asm: Rewrite to be PIC, smaller, using amd64check.c's
+       array.
+
+2007-09-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/mul_basecase.asm: Misc cleanups.
+       * mpn/x86/pentium4/sse2/sqr_basecase.asm: Likewise.
+
+       * mpn/x86_64/mod_34lsub1.asm: Optimize loop, reduce code size.
+
+       * tests/amd64call.asm: Remove bogus no-op moves.
+
+2007-09-03  Torbjorn Granlund  <tege@swox.com>
+
+       From Richard Guenther:
+       * gmp-h.in (__GMP_EXTERN_INLINE): Declare conditionally on
+       __GNUC_STDC_INLINE__.
+
+       * tests/cxx/t-locale.cc: #include <cstdlib>, for abort.
+
+       * mpn/x86_64/core2/popcount.asm: New file.
+       * mpn/x86_64/pentium4/popcount.asm: New file.
+
+       * mpn/x86_64/addmul_2.asm: New file.
+       * mpn/x86_64/mul_2.asm: New file.
+
+       * mpn/x86_64/aorsmul_1.asm: Use 32-bit mov for zeroing registers
+       (saves space).
+
+2007-09-01  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Handle athlon64, core2, and pentium4 separately for
+       64-bit ABI.
+
+       * config.sub: Recog athlon64, core2, and opteron.
+
+       * config.guess: Do two x86 variants, for 32-bit ABI and 64-bit ABI.
+       Return "athlon64" and "core2", not x86_64.
+
+2007-08-31  Torbjorn Granlund  <tege@swox.com>
+
+       From Patrick Pelissier:
+       * gmp-h.in: Don't refer to FILE from C++ unless we've seen FILE.
+
+2007-08-30  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/isprime.c: Include string.h for strcmp.
+
+       * demos/factorize.c (main): Declare to int.
+
+2007-06-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/pentium4/lshift.asm: Minor tuning.
+       * mpn/x86_64/pentium4/rshift.asm: Likewise.
+
+2007-05-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/aors_n.asm: Add _nc entry points.
+
+2007-05-22  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/memory.c: Cast calls to new mem* calls to avoid unaligned ops.
+
+2007-05-16  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/convert.c: Tweak operand sizes for best coverage.
+
+       * tests/memory.c: Add red zones around allocations.
+
+2007-05-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_1.asm: Make mul_1c entry point actually work.
+
+       * mpn/generic/set_str.c (mpn_dc_set_str): Avoid calling mpn_add_n when
+       ln == 0.
+
+       * tests/mpz/convert.c (string_urandomb): New function.
+       (main): Use it by enabling ifdef'ed out code.
+
+2007-04-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/mul_basecase.asm: Complete rewrite.
+
+       * mpn/x86_64/copyi.asm: Use short shift-by-one form.  Misc cleanups.
+       * mpn/x86_64/copyi.asm: Likewise.
+       * mpn/x86_64/popham.asm: Likewise.
+
+       * mpn/x86_64/aorsmul_1.asm: Cleanup formatting.
+
+2007-04-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/divexact.c: Handle undefined case of |N| < |D| to avoid segfaults.
+
+2007-02-24  Torbjorn Granlund  <tege@swox.com>
+
+       * doc/gmp.texi (Toom 3-Way Multiplication): Fix typo.
+       (mpz_scan0, mpz_scan1): Fix typos.
+       (Float Internals): Rewrite paragraph about struct types.
+
+2007-02-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/sqr_basecase.asm: Complete rewrite (except
+       diagonal code).
+
+2007-02-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_fft.c (mpn_fft_fft): New name for mpn_fft_fft_sqr,
+       old mpn_fft_fft removed.
+       (mpn_mul_fft_internal): Call mpn_fft_fft separately for each operand.
+       (mpn_fft_add_modF): Rewrite to avoid random branches.
+       (mpn_fft_sub_modF): Likewise.
+
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Complete rewrite.
+       * mpn/x86/pentium4/sse2/mul_1.asm: Complete rewrite.
+       * mpn/x86/pentium4/sse2/mul_basecase.asm: Complete rewrite, based on
+       new addmul and mul code.
+
+2007-01-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Get loop count for frac
+       development right.
+
+       * mpn/powerpc32/vmx/mod_34lsub1.asm: New file.
+
+       * mpn/powerpc32/aors_n.asm: New file, complete rewrite.
+       * mpn/powerpc32/add_n.asm: Remove.
+       * mpn/powerpc32/sub_n.asm: Remove.
+
+2007-01-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/core2/aors_n.asm: Add _nc entry points, minor cleanups.
+
+       * mpn/x86_64/core2/lshift.asm: Rewrite.
+       * mpn/x86_64/core2/rshift.asm: Rewrite.
+
+       * mpn/x86_64/pentium4/lshift.asm: Swap some loop insns for a small
+       speedup.
+       * mpn/x86_64/pentium4/rshift.asm: New file, based on lshift.asm.
+
+       * mpn/x86_64/pentium4/gmp-mparam.h: New file.
+
+       * mpn/x86_64/pentium4/aors_n.asm: Complete rewrite of add/subtract
+       code.
+       * mpn/x86_64/pentium4/add_n.asm: Remove.
+       * mpn/x86_64/pentium4/sub_n.asm: Remove.
+
+2007-01-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/lshift.asm: Add special case for cnt=1.
+
+2007-01-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/aorsmul_1.asm: New file, written from scratch, finally at
+       3.0 c/l on K8 (addmul_1 was 3.3; submul_1 was 3.5).
+       * mpn/x86_64/addmul_1.asm: Remove.
+       * mpn/x86_64/submul_1.asm: Remove.
+
+2006-12-29  Torbjorn Granlund  <tege@swox.com>
+
+       * randmt.c (__gmp_randclear_mt): Initialize ALLOC field, like in
+       __gmp_randinit_mt_noseed.
+       (__gmp_randclear_mt, __gmp_randinit_mt_noseed): Make similar functions
+       look similar.
+       (__gmp_randclear_mt): Pass actually allocated size.
+
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Add mul_toom22.c,
+       mul_toom32.c, mul_toom42.c.
+
+       * configure.in: Recognize athlon64 and core2 as alternatives to x86_64.
+       Provide special settings for core2.
+
+       * configure.in (gmp_mpn_functions): Add mul_toom22, mul_toom32,
+       mul_toom42.
+
+       * mpn/generic/mul_toom22.c: New file.
+       * mpn/generic/mul.c: Use mpn_mul_toom22.  Trim cutoff points between
+       the mpn_mul_toomN2 functions.  Handle balanced operands at function
+       entry.
+
+2006-12-29  Marco Bodrato  <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/mul_n.c: Rewrite interpolation code.
+
+2006-12-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_toom32.c: New file.
+       * mpn/generic/mul_toom42.c: New file.
+       * mpn/generic/mul.c: Use mpn_mul_toom32 and mpn_mul_toom42 for
+       unbalanced operands.
+
+2006-12-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/aorrlsh_n.asm: New file.
+       * mpn/x86_64/lshsub_n.asm: New file.
+
+       * mpn/x86_64/core2/aors_n.asm: New file.
+       * mpn/x86_64/core2/lshift.asm: New file.
+       * mpn/x86_64/core2/rshift.asm: New file.
+
+       * mpn/x86/p6/aors_n.asm: Replace K7 grabbing code with P6 specific
+       code.
+
+       * mpn/x86/p6/lshsub_n.asm: New file.
+
+2006-11-23  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MUL_BASECASE): Allocate space for xp
+       locally, s->xp might be insufficient.
+
+2006-11-22  Torbjorn Granlund  <tege@swox.com>
+
+       * randmt.c (__gmp_randinit_mt_noseed): Initialize ALLOC field of result
+       param.
+
+2006-11-06  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/set_strp.c: New file.
+
+2006-11-04  Torbjorn Granlund  <tege@swox.com>
+
+       * extract-dbl.c: Rewrite to handle nails better, and for general
+       optimization.
+
+       * mpz/bin_uiui.c: Simplify.
+
+       * longlong.h (umul_ppmm) [mmix]: New.
+
+       * tune/tuneup.c, tune/common.c, tune/speed.c, tune/speed.h,
+       tune/set_strb.c, tune/set_strs.c: Add tuning and speed measurements
+       of separate SET_STR_DC_THRESHOLD and SET_STR_PRECOMPUTE_THRESHOLD.
+       Add tuning and speed measurement of mpn_addsub_n.
+
+2006-10-31  Torbjorn Granlund  <tege@swox.com>
+
+       * gmpxx.h: Remove ternary stuff, it is hardly an optimization and it
+       writes to destination before reading all source operands.
+
+2006-10-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/set_str.c: Complete rewrite.
+       * mpn/generic/get_str.c: Likewise.
+
+       * gmp-impl.h (struct powers, powers_t): New types.
+       Restructure GET_STR_* and SET_STR_* thresholds.
+
+2006-09-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/rootrem.c: Remove some redundant casts.
+
+2006-07-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/nails/addmul_2.asm: Make it run at claimed speed.
+       * mpn/alpha/ev6/nails/addmul_4.asm: Likewise.
+
+       * mpf/get_str.c: Avoid copying result when not needed.  Misc cleanups.
+
+       * tests/amd64call.asm: Use jmp instead of jmpq to placate Solaris.
+
+2006-06-30  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (powerpc-*): Remove repeated path component.
+
+2006-06-15  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: (ia64-*-linux*): Don't use -O3.
+
+2006-06-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpq/get_str.c: Fix upper base limit boundary in an ASSERT.
+
+       * tests/refmpn.c (refmpn_sb_divrem_mn): Use ASSERT_CARRY for add-back.
+
+2006-05-31  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-set_d.c (check_data): Add more data points.
+
+       * mpz/set_d.c: Handle negative return values from __gmp_extract_double.
+
+2006-05-17  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Clear out gcc_cflags_cpu and gcc_cflags_arch for a fat
+       build.
+
+2006-05-16  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/primes.c (find_primes): Increase mpz_probab_prime_p cnt to 10.
+
+       * mpn/generic/addsub_n.c: Fix criteria form when to call _nc functions.
+
+2006-05-12  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Recognize more ppc processor types.
+
+2006-05-11  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.c (usage): Update URL for gnuplot and quickplot.
+
+2006-05-10  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (powerpc-*-*): Pass -maltivec to assembler for
+       appropriate CPUs.
+
+2006-05-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/aix.m4 (LEA): Remove [RW] attribute.
+
+2006-05-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/vmx/popcount.asm: Conditionally zero extend n.
+
+2006-04-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/divexact.c: Call mpz_tdiv_q for large operands.
+
+       * configure.in (powerpc-*-darwin): Remove -fast, it affects PIC.
+
+2006-04-26  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Try to recognize Ultrasparc T1 (as ultrasparct1).
+       * config.sub: Handle ultrasparct1.
+
+2006-04-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/gmp-mparam.h: Retune, without separation of GNUC and
+       non-GNUC data.
+
+2006-04-20  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/convert.c: Increase operands range.
+
+2006-04-19  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Support powerpc eABI.
+       * mpn/powerpc32/eabi.m4: New file.
+
+       * configure.in: Support powerpc *bsd.
+       * mpn/powerpc64/elf.m4: New name for mpn/powerpc64/linux64.m4.
+       * mpn/powerpc32/elf.m4: New name for mpn/powerpc32/linux.m4.
+
+       * mpn/powerpc64/linux64.m4 (ASM_END): Quote TOC_ENTRY.
+
+2006-04-18  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (gmp_mpn_functions_optional): Add lshiftc.
+       (HAVE_NATIVE): Add lshiftc.
+
+       * mpn/powerpc64/mode64/invert_limb.asm: Use LEA, not LDSYM.
+       * mpn/powerpc64/mode64/mode1o.asm: Likewise.
+       * mpn/powerpc64/mode64/dive_1.asm: Likewise.
+
+       * mpn/powerpc64/linux64.m4 (TOC_ENTRY): Define to empty.
+       * mpn/powerpc64/aix.m4 (TOC_ENTRY): Likewise.
+       * mpn/powerpc32/aix.m4 (TOC_ENTRY): Likewise.
+
+       * mpn/powerpc32/aix.m4 (EXTERN): New, copied form powerpc64/aix.m4.
+       * mpn/powerpc32/mode1o.asm: Use EXTERN.
+       * mpn/powerpc32/linux.m4 (EXTERN): Provide dummy definition.
+       * mpn/powerpc32/darwin.m4 (EXTERN): Likewise.
+
+2006-04-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_fft.c: Use new thresholds mechanism if MUL_FFT_TABLE2
+       is defined.
+       (mpn_lshiftc): New name for mpn_lshift_com (for consistency with some
+       stuff already in 4.1.4.
+       (mpn_fft_mul_2exp_modF): Reorganize initial operand reductions to avoid
+       divisions.
+
+       * tests/devel/try.c (choice_array): Add mpn_addsub_n[c].
+
+2006-04-11  Torbjorn Granlund  <tege@swox.com>
+
+       * aclocal.m4: Regenerate with patched libtool.
+
+       * mpn/asm-defs.m4 (ASM_END): Provide (empty) default.
+
+2006-04-08  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (gmp_mpn_functions_optional): Add addsub.
+
+       * gmpxx.h: Remove missed MPFR references.
+
+       * gmp-impl.h (LIMBS_PER_DOUBLE): Adjust formula to not be pessimistic.
+
+       * gmp-impl.h (TMP_*, WANT_TMP_DEBUG): Don't expect marker argument;
+       define TMP_SALLOC and TMP_BALLOC.
+
+       * mpn/minithres/gmp-mparam.h: New file.
+
+       * tests/mpz/t-io_raw.c: Fix printf type/arg mismatches.
+       * tests/mpz/t-export.c: Likewise.
+       * tests/mpz/io.c: Likewise.
+       * tests/t-constants.c: Likewise.
+
+       * mpn/ia64/popcount.asm: Append "cond.dptk" to conditional branches to
+       placate icc.
+       * mpn/ia64/hamdist.asm: Likewise.
+       * mpn/ia64/lorrshift.asm: Likewise.
+       * mpn/ia64/dive_1.asm: Likewise.
+
+2006-04-05  Torbjorn Granlund  <tege@swox.com>
+
+       * tal-notreent.c (__gmp_tmp_mark): Add "struct" tag for tmp_marker.
+       (__gmp_tmp_free): Likewise.
+
+       * mpn/generic/mul_fft.c: Optimize many scalar divisions and mod
+       operations into masks and shifts.
+       (mpn_fft_mul_modF_K): Fix a spurious ASSERT_NOCARRY.
+
+2006-03-26  Torbjorn Granlund  <tege@swox.com>
+
+       * Version 4.2 released.
+
+       * mpn/powerpc64/aix.m4 (LEA): Renamed from LDSYM.
+       * mpn/powerpc64/darwin.m4: Likewise.
+       * mpn/powerpc64/linux64.m4: Likewise.
+       * mpn/powerpc64/vmx/popcount.asm: Use LEA, not LDSYM.
+
+2006-03-23  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h: (class gmp_allocated_string): Prefix strlen with std::.
+
+       * gmpxx.h (__GMP_DEFINE_TERNARY_EXPR2): Remove for now.
+       (struct __gmp_ternary_addmul2): Likewise.
+       (struct __gmp_ternary_submul2): Likewise.
+
+       * gmpxx.h: #include <cstring>.
+       (struct __gmp_alloc_cstring): Prefix strlen with std::.
+
+       * mpn/x86/pentium/com_n.asm: Add TEXT and ALIGN.
+       * mpn/x86/pentium/copyi.asm: Likewise.
+       * mpn/x86/pentium/copyd.asm: Likewise.
+
+2006-03-22  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-h.in: Add a "using std::FILE" for C++.
+       (_GMP_H_HAVE_FILE): Check also _ISO_STDIO_ISO_H.
+
+       * gmpxx.h: Remove mpfr code.
+       * tests/cxx: Likewise.
+
+       * gmp-impl.h (FORCE_DOUBLE): Rename a tempvar to avoid a clash with
+       GNU/Linux public include file.
+
+       * configure.in (powerpc64, darwin): New optional, gcc_cflags_subtype.
+       Grab powerpc32/darwin.m4 for ABI=mode32.
+
+       * configure.in: Use host_cpu whenever just the cpu type is needed.
+
+2006-03-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/get_si.c: Fix a typo.
+
+       * tests/mpq/t-get_d.c (check_random): Improve random generation for
+       nails.
+
+2006-02-28  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpq/t-get_d.c (check_random): New function.
+       (main): Call check_random.
+
+       * mpq/set_d.c: Make choices based on LIMBS_PER_DOUBLE, not
+       BITS_PER_MP_LIMB.  Make it work for LIMBS_PER_DOUBLE == 4.
+       Use MPZ_REALLOC.
+
+       * mpz/set_d.c: Make it work for LIMBS_PER_DOUBLE == 4.
+
+       * extract-dbl.c: Make it work for LIMBS_PER_DOUBLE > 3.
+
+2006-02-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/cmp_d.c: Declare `i'.
+       * mpz/cmpabs_d.c: Likewise.
+
+2006-02-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/vmx/copyd.asm: Set right VRSAVE bits.
+       * mpn/powerpc32/vmx/copyi.asm: Likewise.
+
+2006-02-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/vmx/logops_n.asm: New file.
+
+       * mpn/powerpc32/diveby3.asm: Rewrite.
+
+2006-02-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/vmx/copyi.asm: New file.
+       * mpn/powerpc32/vmx/copyd.asm: New file.
+
+2006-02-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/nails/aors_n.asm (CYSH): Import proper setting from
+       deleted mpn_sub_n.
+
+2006-02-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/addmul_1.asm: Correct slotting comments.
+
+2006-02-15  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/anymul_1.c: Copy error reporting code from addmul_N.c.
+
+       * tests/devel/addmul_N.c: New file.
+       * tests/devel/mul_N.c: New file.
+
+       * mpn/alpha/default.m4 (PROLOGUE_cpu): Align functions at 16-byte
+       boundary.
+
+       * mpn/alpha/ev6/nails/aors_n.asm: New file.
+       * mpn/alpha/ev6/nails/add_n.asm: Remove.
+       * mpn/alpha/ev6/nails/sub_n.asm: Remove.
+
+       * mpn/alpha/ev6/nails/addmul_1.asm: Rewrite.
+       * mpn/alpha/ev6/nails/submul_1.asm: Likewise.
+       * mpn/alpha/ev6/nails/mul_1.asm: Likewise.
+
+       * mpn/alpha/ev6/nails/addmul_2.asm: Use L() for labels.
+       * mpn/alpha/ev6/nails/addmul_3.asm: Use L() for labels.
+       * mpn/alpha/ev6/nails/addmul_4.asm: Use L() for labels.
+
+2006-02-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/diveby3.asm: Trivially reorder loop insns to save
+       1 c/l.
+
+       * mpn/x86_64/dive_1.asm: Use movabsq to support large model non-PIC.
+
+       * mpn/x86_64/rsh1add_n.asm: Replace high register with rbx.
+       * mpn/x86_64/rsh1sub_n.asm: Likewise.
+
+2006-02-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/sqr_diagonal.asm: Software pipeline.
+
+       * mpn/powerpc64/vmx/popcount.asm: Add prefetching.
+
+2006-02-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/diveby3.asm: Rewrite.
+
+2006-02-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/vmx/popcount.asm: Remove mpn_hamdist partial code.
+       Move compare for huge n so that it is always executed.
+
+2006-02-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/linux.m4 (LEA): Add support for PIC.
+
+       * configure.in (powerpc): New optional, gcc_cflags_subtype.
+
+       * mpn/x86_64/pentium4/add_n.asm: New file.
+       * mpn/x86_64/pentium4/sub_n.asm: New file.
+       * mpn/x86_64/pentium4/lshift.asm: New file.
+
+       * mpn/powerpc64/linux64.m4 (PROLOGUE_cpu): Align function start to
+       16-multiple.
+       * mpn/powerpc64/aix.m4: Likewise.
+       * mpn/powerpc64/darwin.m4: Likewise.
+
+       * mpn/powerpc64/copyi.asm: Align loop to 16-multiple.
+       * mpn/powerpc64/copyd.asm: Likewise
+
+       * configure.in (powerpc): Add vmx to relevant paths.
+
+       * mpn/powerpc64/linux64.m4 (DEF_OBJECT): Accept 2nd argument, for
+       alignment.
+       * mpn/powerpc64/aix.m4: Likewise.
+       * mpn/powerpc64/darwin.m4: Likewise.
+
+       * mpn/powerpc32/linux.m4 (DEF_OBJECT, END_OBJECT): New macros,
+       inherited from powerpc64 versions.
+       * mpn/powerpc32/aix.m4: Likewise.
+       * mpn/powerpc32/darwin.m4: Likewise.
+
+       * mpn/powerpc64/vmx/popcount.asm: New file, for ppc32 and ppc64.
+       * mpn/powerpc32/vmx/popcount.asm: New file, grabbing above file.
+
+2006-01-22  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Generalize OS-dependent patterns for powerpcs.
+
+2006-01-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/popham.asm: Optimize.
+
+       * config.guess: Recognize power4 and up under linux-gnu.
+       * config.sub: Generalize power recognition code.
+       * acinclude.m4 (POWERPC64_PATTERN): Add 64-bit powerpc processors.
+       * configure.in: Recognize powerpc processors masquerading as power
+       processors.
+
+2006-01-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/logops_n.asm: Rewrite for more stable speed and smaller
+       code.
+       * mpn/x86_64/com_n.asm: Likewise.
+
+2006-01-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/addlsh1_n.asm: Rewrite to use indexed addressing.
+       * mpn/x86_64/sublsh1_n.asm: Likewise.
+
+2006-01-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/diveby3.c: Use GMP standard parameter names.  Nailify
+       alternative code.  Use restrict for params.
+
+       * configure.in: Recognize andn_n as not needing nailification.
+
+       * tests/mpq/t-equal.c (check_various): Disable a test that gives common
+       factors for GMP_NUMB_BITS == 62.
+
+2006-01-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Fix digit count computation,
+       was inaccurate for nails.
+
+2006-01-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/mode1o.asm: Remove unneeded carry register zeroing.
+
+2006-01-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/sqr_diagonal.asm: New file.
+
+2006-01-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/mod_34lsub1.asm: Tune to 1.5 c/l.
+
+       * mpn/generic/mullow_n.c (MUL_BASECASE_ALLOC): New #define.
+       (mpn_mullow_n): Use it.
+
+       * mpn/powerpc64/mode64/dive_1.asm: Use EXTERN.
+       * mpn/powerpc64/mode64/mode1o.asm: Likewise.
+
+       * mpn/powerpc64/aix.m4 (EXTERN): Define to import symbol.
+       (LDSYM): Remove [RW] attribute.
+       * mpn/powerpc64/linux64.m4 (EXTERN): Dummy definition.
+       * mpn/powerpc64/darwin.m4 (EXTERN): Likewise.
+
+2006-01-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/mode1o.asm: New file.
+
+       * mpn/powerpc64/mode64/dive_1.asm: Use L() for labels.  Invoke ASM_END.
+
+       * mpn/powerpc64/mode64/invert_limb.asm: Invoke ASM_END.
+
+       * mpn/powerpc64/linux64.m4: Move toc entry generation from direct at
+       DEF_OBJECT to delayed via LDSYM, define ASM_END to output it.
+       * mpn/powerpc64/aix.m4: Likewise.
+       * mpn/powerpc64/darwin.m4: Define a dummy ASM_END.
+
+       * mpn/powerpc64/mode64/addmul_1.asm: Add POWER5 timings.
+       * mpn/powerpc64/mode64/mul_1.asm: Likewise.
+
+       * mpn/powerpc64/mode64/submul_1.asm: Tweak to save 1.5 c/l for POWER5.
+
+2006-01-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/dive_1.asm: New file.
+
+       * mpn/powerpc64/mode64/invert_limb.asm: Add missing ASM_START.
+
+       * mpn/powerpc64/mode64/addmul_1.asm: Fix a comment typo.
+
+       * mpn/x86_64/diveby3.asm: Rewrite.
+
+2006-01-03  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Update bugs reporting address.
+
+       * mpn/powerpc64/mode64/diveby3.asm: Trim a cycle off of POWER4 timing.
+       Misc cleanup.
+
+2006-01-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/linux64.m4 (CALL): New macro.
+       * mpn/powerpc64/aix.m4: Likewise.
+       * mpn/powerpc64/darwin.m4: Likewise, also define macro "DARWIN".
+
+2005-12-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/mod_34lsub1.asm: New file.
+
+2005-12-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/mod_34lsub1.asm: New file.
+
+2005-12-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86_64/submul_1.asm: Save a push/pop by not using register r12.
+       Use addq instead of leaq for pointer updates; schedule them.  (These
+       changes shaves one cycle of overhead and 0.25 c/l.)
+
+2005-12-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/ui_div.c: Implement workaround for GCC bug triggered on alpha.
+       * mpf/set_q.c: Likewise.
+
+2005-12-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/tdiv_qr.c: Remove statement with no effect.
+       Rename dead variable to `dummy'.
+
+2005-12-15  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (setup_error_handler): Add a missing ";".
+
+2005-11-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul.c: Crudely call mpn_mul_fft_full before checking
+       for unbalanced operands.
+
+       * mpn/generic/mul_fft.c: Remove many scalar divisions.
+       (mpn_mul_fft_lcm): Simplify.
+       (mpn_mul_fft_decompose): Rewrite to handle arbitrarily unbalanced
+       operands.
+
+2005-11-22  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Properly recognize all 32-bit Solaris releases.
+
+2005-11-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_fft.c: Inline mpn_fft_mul_2exp_modF,
+       mpn_fft_add_modF and mpn_fft_normalize.
+
+2005-11-02  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/reuse.c: Increase operand size, decrease # of reps.
+
+       * mpz/rootrem.c: Adapt to new mpn_rootrem.
+       * mpz/root.c: Likewise.
+
+       * tests/mpz/reuse.c: Test mpz_rootrem.
+
+       With Paul Zimmermann:
+       * mpn/generic/rootrem.c: Complete rewrite.
+
+2005-10-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/pprime_p.c (mpz_probab_prime_p): Considerably limit trial
+       dividing.
+
+       * mpz/perfpow.c (mpz_perfect_power_p): Use mpz_divisible_ui_p instead
+       of mpz_tdiv_ui.
+
+       * mpz/divegcd.c: Correct probability number for GCD == 1.
+
+       * mpn/x86_64/mul_basecase.asm: Remove an obsolete comment.
+
+       * mpn/x86: Add cycle counts for array of x86 processors.
+
+       * mpn/x86/k7/mod_34lsub1.asm: Remove spurious mentions of ebp.
+
+       * mpn/powerpc32: Add POWER5 timings.
+
+       * mpn/powerpc32/README: Describe global reference variations.
+
+       * mpn/ia64/divrem_2.asm: Add some comments.
+
+       * mpn/ia64/divrem_1.asm: Reformat.
+
+       * mpn/ia64/addmul_2.asm: Correct a comment on slotting.
+       * mpn/ia64/logops_n.asm: Likewise.
+
+       * mpn/ia64/addmul_1.asm: Remove a redundant preg mutex decl.
+
+       * mpn/generic/dive_1.c: Whitespace cleanup.
+
+       * mpn/alpha/ev6/nails/addmul_1.asm: Correct comments on slotting.
+       * mpn/alpha/ev6/nails/addmul_2.asm: Likewise.
+       * mpn/alpha/ev6/nails/addmul_4.asm: Likewise.
+
+       * mpf/out_str.c: List some allocation improvement ideas.
+
+       * doc/gmp.texi: Update many URLs and email addresses.
+
+       * gmp-h.in (_GMP_H_HAVE_FILE): Check also _STDIO_H_INCLUDED.
+
+2005-10-26  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/tuneup.c (tune_mullow): Update param.max_size for each threshold
+       measurement.
+
+       * configure.in (POWERPC64_PATTERN/*-*-darwin*): Set
+       SPEED_CYCLECOUNTER_OBJ_mode64 and cyclecounter_size_mode64.
+       (POWERPC64_PATTERN/*-*-linux*): Likewise.
+
+2005-10-03  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/factorize.c (factor_using_division_2kp): Honor verbose flag.
+       (factor_using_pollard_rho): Divide out new factor before it's
+       clobbered.  Don't stop factoring after a composite factor was found.
+
+2005-09-17  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (fns): Add factorial keywords.
+
+2005-08-16  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/Makefile.am (EXTRA_DIST): Change "amd64" => "x86_64".
+       * mpn/Makefile.am (TARG_DIST): Change "amd64" => "x86_64".
+
+2005-08-15  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Change "amd64" => "x86_64".
+
+2005-06-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/pre_mod_1.c: Canonicalize variable names.
+
+       * mpn/generic/divrem.c: Rate qxn test as UNLIKELY.
+
+       * mpn/generic/gcdext.c (sanity_check_row): Invoke TMP_MARK.
+
+       * tune/tuneup.c (tune_mullow): Fix all max_size fields.
+
+       * gmp-impl.h (SQR_TOOM3_THRESHOLD_LIMIT): New #define.
+       * tune/tuneup.c (tune_sqr): Use SQR_TOOM3_THRESHOLD_LIMIT.
+       (sqr_toom3_threshold): Initialize from SQR_TOOM3_THRESHOLD_LIMIT.
+
+       * mpn/generic/mul_n.c (mpn_sqr_n): Use SQR_TOOM3_THRESHOLD_LIMIT.
+
+       * gmp-impl.h (mpn_nand_n, mpn_iorn_n, mpn_nior_n, mpn_xnor_n):
+       Handle nails.
+
+2005-06-13  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (gcdext_schoenhage): Check for the
+       (unlikely) case that one of the hgcd/euclid steps results in two
+       remainders of one limb each. Then use gcdext_1.
+
+2005-06-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/sub_n.asm: Analogous changes as to add_n.asm last.
+
+2005-06-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/add_n.asm: Rewrite inner loop to load later.
+       Add mpn_add_nc entry.
+
+       * mpn/alpha/ev6/addmul_1.asm: Remove redundant initial loads.
+
+2005-06-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/dive_1.asm: Fix issues with HP-UX.
+
+2005-06-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/diveby3.asm: Update TODO list.
+
+       * mpn/ia64/mode1o.asm: Fix comment typos.
+
+       * mpn/ia64/dive_1.asm: New file.
+
+2005-06-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mode1o.asm: Add prefetching.
+
+       * mpn/generic/dive_1.c: Use variable h for upper umul_ppmm result.
+
+2005-06-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/hamdist.asm: Complete rewrite.
+       * mpn/ia64/popcount.asm: Rewrite to use multi-pronged feed-in.
+
+       * mpn/ia64/aors_n.asm: Rewrite feed-in code.
+       * mpn/ia64/rsh1aors_n.asm: Likewise.
+       * mpn/ia64/aorslsh1_n.asm: Likewise.
+       * mpn/ia64/lorrshift.asm: Likewise.
+
+2005-06-04  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/try.c (choice_array): Exclude mpn_preinv_mod_1 unless
+       USE_PREINV_MOD_1.
+       (choice_array): Exclude mpn_sqr_basecase if SQR_KARATSUBA_THRESHOLD
+       is zero.
+
+2005-06-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/addmul_1.asm: Prefix all labels with "$".
+       * mpn/alpha/ev6/mul_1.asm: Likewise.
+
+2005-06-02  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/refmpn.c (refmpn_divmod_1c_workaround): Implement workaround
+       to gcc 3.4.x bug triggered on powerpc64 with 32-bit ABI.
+
+2005-06-01  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/try.c (main): Fix a typo.
+
+2005-05-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/addmul_1.asm: Rewrite for L1 cache, add prefetch.
+
+2005-05-30  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/misc.c (tests_rand_start): Mask random seed to 32 bits.
+
+2005-05-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode32/mul_1.asm: Handle BROKEN_LONGLONG_PARAM.
+       * mpn/powerpc64/mode32/addmul_1.asm: Likewise.
+       * mpn/powerpc64/mode32/submul_1.asm: Likewise.
+
+       * mpn/powerpc32/mode1o.asm: Rewrite to actually work.
+
+       * mpn/powerpc32/aix.m4 (LEA): New macro.
+       (ASM_END): New macro.
+
+       * mpn/powerpc32/linux.m4: New file.
+       * mpn/powerpc32/darwin.m4: New file.
+       * configure.in: Use linux.m4 and darwin.m4.
+       (powerpc64-linux-gnu): Add support for mode32.
+
+2005-05-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mullow_n.c: Remove FIXME mentioning fixed flaw.
+
+       * tests/mpz/t-cmp_d.c (check_one): Fix printf fmt string typo.
+
+       * demos/isprime.c: #include stdlib.h.
+       * tests/rand/t-urbui.c: Likewise.
+       * tests/rand/t-urmui.c: Likewise.
+
+       * tests/mpz/t-popcount.c (check_random): Remove spurious printf arg.
+
+       * mpn/ia64/lorrshift.asm: Cleanup code layout.
+       * mpn/ia64/popcount.asm: Likewise.
+
+2005-05-24  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/try.c (param_init) [TYPE_GET_STR]: Set retval field.
+       (compare): Handle SIZE_GET_STR as SIZE_RETVAL.
+
+       * tests/refmpn.c (refmpn_get_str): Rewrite to make it work.
+
+2005-05-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/add_n.asm: Add mpn_add_nc entry point.
+       * mpn/amd64/sub_n.asm: Add mpn_sub_nc entry point.
+
+       * longlong.h (many places): Remove lvalue casts.
+
+       * gmp-impl.h (MPF_SIGNIFICANT_DIGITS): Cast prec to avoid overflow
+       for > 4G digits.
+
+       * mpn/alpha/ev6/add_n.asm: Prefetch using ldl.
+       * mpn/alpha/ev6/sub_n.asm: Likewise.
+
+       * mpn/alpha/ev6/slot.pl (optable): Recognize negq and ldl.
+
+       * mpn/ia64/aors_n.asm: Prefetch using lfetch.
+       * mpn/ia64/lorrshift.asm: Likewise.
+       * mpn/ia64/popcount.asm: Likewise.
+       * mpn/ia64/diveby3.asm: Likewise.
+
+2005-05-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev67/popcount.asm: Prefetch.
+       * mpn/alpha/ev67/hamdist.asm: Prefetch.
+
+       * longlong.h (add_ssaaaa) [x86]: Remove lvalue casts.
+       (sub_ddmmss) [x86]: Likewise.
+
+       * tests/devel/try.c (param_init) [TYPE_MPZ_JACOBI]: Add DATA_SRC1_ODD.
+       (param_init) [TYPE_MPZ_KRONECKER]: Clear inherited DATA_SRC1_ODD.
+       (param_init) [TYPE_DIVEXACT_1]: Use symbolic name DIVISOR_LIMB.
+
+2005-05-21  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/try.c (param_init) [TYPE_MPZ_JACOBI]: Initialize divisor
+       field according to UDIV_NEEDS_NORMALIZATION.
+
+       * mpz/mul_i.h: Remove left-over TMP_XXXX marker arguments.
+
+2005-05-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/addmul_1.asm (mpn_addmul_1c): Put carry in
+       proper register.
+
+       * mpn/generic/sqr_basecase.c (mpn_sqr_basecase, addmul_2 version):
+       Avoid accesses out-of-bound in MPN_SQR_DIAGONAL applicate code.
+
+2005-05-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/diveby3.asm: Make it actually work.
+
+       * gmp-impl.h (MULLOW_BASECASE_THRESHOLD_LIMIT): New #define.
+       * mpn/generic/mullow_n.c: Use fixed stack allocation for the smallest
+       operands; use TMP_S* allocation for medium operands.
+
+       * gmp-impl.h: Remove nested TUNE_PROGRAM_BUILD test.
+
+2005-05-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c: Make squaring and multiplication code more
+       similar.  Use TMP_S* functions.
+
+       * gmp-impl.h (TMP_DECL, TMP_MARK, TMP_FREE): Get rid of argument.
+       (TMP_SALLOC): New macro for "small" allocations.
+       (TMP_BALLOC): New macro for "big" allocations.
+       (TMP_SDECL, TMP_SMARK, TMP_SFREE): New macros for functions that use
+       just TMP_SALLOC.
+       (WANT_TMP_ALLOCA): Make default functions choose alloca or reentrant
+       functions, depending on size.
+
+       * *.c: Remove TMP_XXXX marker arguments.
+
+       * acinclude.m4 (WANT_TMP): Want tal-reent.lo also for alloca case.
+
+2005-05-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/gmp-mparam.h: Further extend FFT tables.
+
+2005-05-15  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (udiv_qrnnd_preinv2): Pull an add into add_ssaaaa.
+       (udiv_qrnnd_preinv2gen): Likewise.
+
+2005-05-14  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (add_ssaaaa) [x86_64]: Restrict allowed immediate
+       operands.
+       * (sub_ddmmss) [x86_64]: Likewise.
+
+2005-05-02  Torbjorn Granlund  <tege@swox.com>
+
+       * acinclude.m4 (GMP_HPC_HPPA_2_0): Make gmp_tmp_v1 sed pattern handle
+       version numbers like B.11.X.32509-32512.GP.
+
+       * mpn/m68k/aors_n.asm: Correct MULFUNC_PROLOGUE.
+
+       * mpn/powerpc64/mode64/aors_n.asm: Add a MULFUNC_PROLOGUE.
+
+       * mpf/inp_str.c: Use plain int for mpf_set_str return value (works
+       around gcc 4 bug).
+
+       * acinclude.m4 (GMP_ASM_POWERPC_PIC_ALWAYS): Handle darwin's assembly
+       syntax.
+       (long long reliability test 1): New GMP_PROG_CC_WORKS_PART test.
+       (long long reliability test 2): New GMP_PROG_CC_WORKS_PART test.
+
+       * configure.in: Add mode64 support for darwin.  Use darwin.m4.
+       Add cflags_opt flags for mode32 darwin.
+
+       * mpn/powerpc64: Use L() for all asm files.
+
+       * mpn/asm-defs.m4 (PIC_ALWAYS): Define PIC just iff PIC_ALWAYS = "yes".
+
+       * mpn/powerpc64/darwin.m4: New file.
+
+       * mpn/powerpc64/linux64.m4: Remove TOCREF, add LDSYM.
+       Rework DEF_OBJECT to need just one argument.
+       * mpn/powerpc64/aix.m4: Likewise.
+
+       * mpn/powerpc64/mode64/invert_limb.asm: Load approx_tab address with
+       LDSYM.  Optimize somewhat.  Remove 2nd DEF_OBJECT operand.
+
+2005-05-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/popham.c: Compute final summation differently for 64-bit.
+
+       * tests/mpz/t-popcount.c (check_random): New function.
+       (main): Call it.
+
+2005-04-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/add_n.asm: Use r9 instead of rbx to save push/pop.
+       * mpn/amd64/sub_n.asm: Likewise.
+
+2005-04-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/copyi.asm: If HAVE_ABI_mode32, ignore upper 32 bits of
+       mp_size_t argument.
+       * mpn/powerpc64/copyd.asm: Likewise.
+       * mpn/powerpc64/sqr_diagonal.asm: Likewise.
+       * mpn/powerpc64/lshift.asm: Likewise.
+       * mpn/powerpc64/rshift.asm: Likewise.
+       * mpn/powerpc64/logops_n.asm: Likewise.
+       * mpn/powerpc64/com_n.asm: Likewise.
+
+2005-04-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/rootrem.c: Allocate PP_ALLOC limbs also for qp.
+
+2005-04-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/add_n.asm: Add nc entry point.
+       * mpn/powerpc32/sub_n.asm: Likewise.
+
+       * mpn/amd64/*.asm: Add Prescott/Nocona cycle/limb numbers.
+
+       * mpn/alpha/add_n.asm: Add correct cycle/limb numbers.
+       * mpn/alpha/sub_n.asm: Likewise.
+       * mpn/alpha/ev5/add_n.asm: Likewise.
+       * mpn/alpha/ev5/sub_n.asm: Likewise.
+
+2005-03-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/k7/gmp-mparam.h: Fix typo in last change.
+
+2005-03-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/gmp-mparam.h: Update.
+
+       * mpn/alpha/gmp-mparam.h: Update.
+       * mpn/alpha/ev5/gmp-mparam.h: Update.
+       * mpn/alpha/ev6/gmp-mparam.h: Update.
+
+       * mpn/ia64/gmp-mparam.h: Update.
+
+       * mpn/x86/p6/mmx/gmp-mparam.h: Update.
+       * mpn/x86/pentium4/sse2/gmp-mparam.h: Update.
+       * mpn/x86/k7/gmp-mparam.h: Update.
+
+       * tests/mpz/t-gcd.c (main): Honor command line reps argument.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL): Simplify and correct code
+       for generating test operands.
+
+2005-03-17  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (qstack_adjust): New argument d, saying how much
+       to adjust the top quotient.
+       (hgcd_adjust): The quotient can be off by either 1 or 2.
+
+2005-03-16  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-gcd.c (MAX_SCHOENHAGE_THRESHOLD): Set to largest of
+       gcd,gcdext thresholds.
+
+2005-03-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (gcdext_schoenhage): When calling gcdext_lehmer,
+       reuse all temporary limb storage, including the storage used for the
+       qstack.
+
+2005-03-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/logops_n.asm: Add MULFUNC_PROLOGUE.
+
+2005-03-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/gmp-mparam.h: Extend MUL_FFT_TABLE and SQR_FFT_TABLE.
+       * mpn/ia64/gmp-mparam.h: Likewise.
+
+2005-02-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/divrem_1.asm: Add preinv entry point.
+
+2005-01-13  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MPN_SIZEINBASE): Count bits in type size_t.
+       (MPN_SIZEINBASE_16): Likewise.
+
+2004-12-17  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.c (run_gnuplot): Use lines, not linespoints.
+       Output a reset gnuplot command initially.
+
+2004-12-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/random2.c (gmp_rrandomb): Rework again.
+       * mpz/rrandomb.c (gmp_rrandomb): Likewise.
+
+       * mpn/amd64/redc_1.asm: Call via PLT when PIC.
+
+2004-11-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/divrem_1.asm: Add preinv entry point.
+       * mpn/amd64/gmp-mparam.h: Set USE_PREINV_DIVREM_1 to 1.
+
+2004-11-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/diveby3.asm: Use correct prefetch instruction.
+
+2004-11-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/diveby3.asm: Add ",gp" glue in PROLOGUE.
+       Add r31 dummy operand to `br' instruction.
+
+2004-11-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/addmul_1.asm: Rewrite.
+       * mpn/powerpc64/mode64/mul_1.asm: Rewrite.
+
+       * configure.in: Invoke AC_C_RESTRICT.
+
+2004-11-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/diveby3.asm: New file.
+
+2004-11-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/popham.asm: New file.
+
+2004-11-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/add_n.asm: Correct cycle count.
+       * mpn/amd64/sub_n.asm: Likewise.
+
+       * mpn/amd64/dive_1.asm: Speed divisors with many factors of 2.
+
+2004-11-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/dive_1.asm: New file.
+
+2004-11-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/popham.c: Add comment.
+
+2004-11-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/com_n.asm: New file.
+
+       * mpn/amd64/logops_n.asm: New file.
+
+2004-11-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/com_n.asm: New file.
+
+2004-11-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/diveby3.asm: New file.
+
+       * config.guess: Strip any PPC string in /proc/cpuinfo.
+       Recognize 970 in that code.
+
+2004-11-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/mul_basecase.asm: New file.
+
+       * mpn/amd64/redc_1.asm: New file.
+
+2004-10-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/addlsh1_n.asm: Correct cycle counts.
+
+       * mpn/powerpc64/README: Update POWER5/PPC970 pipeline information.
+
+       * mpn/generic/mul_basecase.c (MAX_LEFT): Add comment.
+
+       * doc/gmp.texi: Consistently use "x86" denotation.
+       (Assembler SIMD Instructions): Mention SSE2 usage.
+
+       * demos/pexpr.c (main): Handle "negative" base in mpz_sizeinbase call.
+
+2004-10-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/submul_1.asm: Shave 2 cycles/limb with new carry
+       inversion trick.
+
+2004-10-16  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Support icc under x86.
+       (ia64-*-linux*): Pass -no-gcc to icc.
+
+2004-10-15  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (ia64 umul_ppmm): Add version for icc.
+
+       * configure.in: Support icc under ia64-*-linux*.
+
+       * acinclude.m4: New "compiler works" test for icc 8.1 bug.
+       (GMP_PROG_CC_IS_GNU): Don't let Intel's icc fool us it is GCC.
+
+2004-10-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c: Add a few missing TMP_MARK.
+
+2004-10-14  Torbjorn Granlund  <tege@swox.com>
+
+       * acinclude.m4 (GMP_ASM_W32): Try also "data4".
+
+       * mpn/ia64/logops_n.asm: Don't use naked "br", rejected by Intel
+       assembler.
+       * mpn/ia64/aors_n.asm: Likewise.
+
+       * mpn/ia64/divrem_2.asm: Add ".prologue".
+
+       * mpn/ia64/hamdist.asm: Put alloc first in bundle, enforced by the
+       Intel assembler.
+
+       * longlong.h: Exclude masquerading __INTEL_COMPILER from ia64 asm.
+       * gmp-impl.h: Likewise.
+
+2004-10-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_2.asm: Rewrite function entry code, write new code for
+       n=2.
+       * mpn/ia64/addmul_2.asm: Likewise.
+
+       * tests/devel/try.c: Handle mpn_mul_2 like mpn_addmul_2.
+
+       * tune/speed.c (routine): Make R parameter optional for mpn_mul_2.
+
+2004-10-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/addmul_1.asm: Update a comment.
+
+       * tests/devel/aors_n.c: #include tests.h.
+       * tests/devel/anymul_1.c: Likewise.
+       * tests/devel/shift.c: Likewise.
+       * tests/devel/copy.c: Likewise.
+
+       * tests/devel/aors_n.c: Handle also mpn_addlsh1_n, mpn_sublsh1_n,
+       mpn_rsh1add_n, and mpn_rsh1sub_n.
+
+       * mpn/ia64/submul_1.asm: Add TODO item.
+
+       * mpn/ia64/aors_n.asm: Rewrite function entry code (again).
+       * mpn/ia64/aorslsh1_n.asm: Likewise.
+       * mpn/ia64/logops_n.asm: Likewise.
+
+       * mpn/ia64/rsh1aors_n.asm: Tune function entry and feed-in code.
+       * mpn/ia64/lorrshift.asm: Likewise.  Remove several spurious loads.
+
+       * tests/devel/Makefile.am (EXTRA_PROGRAMS): Updates for yesterday's
+       file removals and additions.
+
+2004-10-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/copyi.asm: Tune function entry code.
+       * mpn/ia64/copyd.asm: Likewise.
+
+       * mpn/ia64/logops_n.asm: Tune function entry and feed-in code for speed
+       and size.
+       * mpn/ia64/aors_n.asm: Likewise.
+
+       * mpn/powerpc64/logops_n.asm: Correct cycles counts.
+       * mpn/powerpc64/mode64/aors_n.asm: Likewise.
+
+       * tests/devel/copy.c: Handle both MPN_COPY_INCR and MPN_COPY_DECR.
+
+       * tests/devel/logops_n.c: New file, handle all logical operations.
+
+       * tests/devel/anymul_1.c: New file, handle mpn_mul_1, mpn_addmul_1, and
+       mpn_submul_1
+       * tests/devel/mul_1.c: Remove.
+       * tests/devel/addmul_1.c: Remove.
+       * tests/devel/submul_1.c: Remove.
+
+       * tests/devel/shift.c: New file, handle mpn_lshift and mpn_rshift.
+       * tests/devel/lshift.c: Remove.
+       * tests/devel/rshift.c: Remove.
+
+       * tests/devel/aors_n.c: New file, handle mpn_add_n and mpn_sub_n.
+       * tests/devel/add_n.c: Remove.
+       * tests/devel/sub_n.c: Remove.
+
+2004-10-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/linux64.m4: Define DEF_OBJECT, END_OBJECT, and TOCREF.
+       * mpn/powerpc64/aix.m4: Likewise.
+       * mpn/powerpc64/mode64/invert_limb.asm: Use DEF_OBJECT, END_OBJECT, and
+       TOCREF for approx_tab.
+
+       * mpn/amd64/mul_1.asm: Add mpn_mul_1c entry point.
+
+2004-10-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/copyi.asm: New file.
+       * mpn/powerpc64/copyd.asm: New file.
+       * gmp-h.in: Remove PPC MPN_COPY variants.
+       * gmp-impl.h: Likewise.
+
+       * mpn/powerpc64/logops_n.asm: New file.
+
+       * mpn/powerpc64/mode64/invert_limb.asm: New file.
+
+2004-10-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/aors_n.asm: New file, optimized for POWER4 and
+       its derivatives.
+       * mpn/powerpc64/mode64/add_n.asm: Delete.
+       * mpn/powerpc64/mode64/sub_n.asm: Delete.
+
+       * configfsf.guess: Patch HP-UX code to accommodate HP compiler's new
+       inability to read from stdin.
+
+       * mpn/powerpc64/mode64/addsub_n.asm: Remove accidentally added file.
+
+2004-10-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/README: Update for new developments, fix typos.
+
+       * mpn/amd64/mul_1.asm: Tweak addressing (3.25 => 3.0 cycles/limb).
+
+       * mpn/amd64/addmul_1.asm: Remove unreachable code block.
+
+2004-09-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/addmul_1.asm: Rewrite, now 3.25 cycles/limb.
+
+       * mpn/ia64/addmul_1.asm: Slightly enhance cross-jumping for code
+       density.
+       * mpn/ia64/mul_1.asm: Analogous changes.
+
+2004-09-29  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (x86 ULONG_PARITY): Work around GCC change of "q" register
+       flag.
+
+2004-09-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/divrem_1.asm: Add cycle counts to loop.
+
+       * mpn/ia64/divrem_2.asm: New file.
+
+2004-09-28  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft): Fix a bug in the choice of the
+       recursive fft parameters.
+
+2004-09-20  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/misc.c (tests_rand_start): Default to strtoul for re-seeding.
+
+       * tests/mpz/t-mul.c (ref_mpn_mul): Fudge tmp allocation for toom3.
+
+2004-09-19  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/misc.c (tests_rand_start): Shift tv_usec for better seeding.
+
+2004-09-18  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/misc.c (tests_rand_start): Invoke fflush after printing seed.
+
+       * tests/mpz/t-mul.c (main): Check environment for GMP_CHECK_FFT, run
+       extra FFT tests if set.
+       (ref_mpn_mul): Use library code for kara and toom, but skewded so that
+       we never use the same algorithm that we're testing.
+       (mul_kara): Delete.
+       (debug_mp): Print just one line of large numbers.
+       (ref_mpn_mul): Rework usage of tp temporary space.
+
+2004-09-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_2.asm: For HAVE_ABI_32, convert vp.
+       * mpn/ia64/addmul_2.asm: Likewise.
+
+2004-09-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/invert_limb.asm: Rewrite.
+
+       * mpn/ia64/logops_n.asm: Insert some more stops.
+
+2004-09-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/gmp-mparam.h: Update.
+       * mpn/amd64/gmp-mparam.h: Update.
+
+       * mpn/ia64/sqr_diagonal.asm: Shave off a few cycles.
+
+2004-09-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_2.asm: New file.
+       * mpn/ia64/addmul_2.asm: New file.
+
+       * mpn/ia64/addmul_1.asm: Tune a cycle from prologue.
+
+       * mpn/ia64/lorrshift.asm: Insert stops after several branches.
+       * mpn/ia64/aorslsh1_n.asm: Likewise.
+       * mpn/ia64/rsh1aors_n.asm: Likewise.
+
+       * mpn/generic/sqr_basecase.c: In variant for HAVE_NATIVE_mpn_addmul_2,
+       accumulate carry also for when HAVE_NATIVE_mpn_addlsh1_n.
+
+2004-09-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/submul_1.asm: Rewrite.
+
+       * mpn/ia64/addmul_1.asm: Format to placate HP-UX assembler.
+       * mpn/ia64/mul_1.asm: Likewise.
+
+2004-09-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_1.asm: Optimize feed-in code.
+       * mpn/ia64/addmul_1.asm: Rewrite feed-in code.
+
+2004-08-29  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-sizeinbase.c: Disable mpz_fake_bits and check_sample.
+
+2004-07-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/addmul_1.asm: Format to placate HP-UX assembler.
+
+2004-06-17  Kevin Ryde  <kevin@swox.se>
+
+       * doc/gmp.texi: Use @. when sentence ends with a capital, for good
+       spacing in tex.
+       (Language Bindings): Add gmp-d, reported by Ben Hinkle.  Update SWI
+       Prolog URL, reported by Jan Wielemaker.
+
+2004-06-09  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Handle --enable-fat.  Use that to enable x86 fat
+       builds, remove magic meaning of i386-*-*.
+
+2004-06-03  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (memset): Use a local char* pointer, in case parameter is
+       something else (eg. tune/common.c).  Reported by Emmanuel Thomé.
+
+2004-06-01  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (i?86-*-*): Avoid "Illegal instruction" message which
+       goes to stdout on 80386 freebsd4.9.
+
+2004-05-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (gcdext_1_u): New function.
+       (mpn_gcdext): Use it.
+
+2004-05-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c (gcdext_1_odd): Use masking to avoid jumps.
+
+2004-05-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Add Prescott cycle numbers.
+
+       * mpn/amd64/divrem_1.asm: Shave a cycle from fraction development code.
+
+       * mpn/powerpc32/lshift.asm: Add more cycle numbers.
+       * mpn/powerpc32/rshift.asm: Likewise.
+
+       * mpn/ia64/addmul_1.asm: Reformat.
+
+2004-05-21  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (mpn_mullow_n, mpn_mullow_basecase): Declare.
+
+       * tune/Makefile.am: Compile gcdext.c.
+
+       * gmp-impl.h (GET_STR_THRESHOLD_LIMIT): Lower outrageous value to 150.
+       (GCDEXT_SCHOENHAGE_THRESHOLD): Set reasonable default.  Override when
+       TUNE_PROGRAM_BUILD.
+       (GCDEXT_THRESHOLD): Remove.
+
+       * tune/tuneup.c (gcdext_schoenhage_threshold): New variable.
+       (gcdext_threshold): Remove variable.
+       (tune_gcd_schoenhage): Lower step_factor to 0.1.
+       (tune_gcdext_schoenhage): New function, based on tune_gcd_schoenhage.
+       (tune_gcdext): Remove function.
+       (all): Corresponding changes.
+
+2004-05-21  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c: Complete rewrite.  Uses fast Lehmer code for
+       small operands, and Schoenhage code for large operands.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL): Ensure first operand is
+       not smaller than 2nd operand.
+
+2004-05-17  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (mpz_get_ui): Use #if instead of plain if, and for nails
+       use ?: same as normal case, to avoid warnings from Borland C++ 6.0.
+       Reported by delta trinity.
+
+2004-05-15  Kevin Ryde  <kevin@swox.se>
+
+       * tune/time.c (getrusage_backwards_p): New function
+       (speed_time_init): Use it to exclude broken netbsd1.4.1 getrusage.
+       * configure.in (m68*-*-netbsd1.4*): Remove code pretending getrusage
+       doesn't exist.
+       * tune/README (NetBSD 1.4.1 m68k): Update notes.
+
+       * configure.in (mips*-*-* ABI=n32): Remove gcc_n32_ldflags and
+       cc_n32_ldflags, libtool knows to put the linker in n32 mode.
+
+2004-05-15  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess (powerpc*-*-*): Add more processor types to mfpvr code.
+       * configure.in: Generalize powerpc subtype matching code.
+
+       * mpz/fac_ui.c: Misc cleanups, spelling corrections.
+
+2004-05-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/sub.c: When one operand cancels high limbs of the other, strip
+       high zeros on the balance before truncating to destination precision.
+       Truncating first loses accuracy and can lead to a result 0 despite
+       operands being not equal.  Reported by John Abbott.
+       Also, ensure exponent is zero when result is zero, for instance if
+       operands are exactly equal.
+       * tests/mpf/t-sub.c (check_data): New function, exercising these.
+
+2004-05-12  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_PROG_RANLIB): New macro, supposedly required by
+       automake, though it doesn't complain.
+
+       * demos/expr/Makefile.am (ARFLAGS): Add a default setting, to
+       workaround an automake bug.
+
+2004-05-10  Kevin Ryde  <kevin@swox.se>
+
+       * */Makefile.in, install-sh, aclocal.m4: Update to automake 1.8.4.
+
+       * doc/gmp.texi (Demonstration Programs): Add a remark about expression
+       evaluation in the main gmp library.
+
+       * demos/expr/exprfa.c (mpf_expr_a): Correction to mpX_init, use
+       mpf_init2 to follow requested precision.
+       * demos/expr/exprza.c, demos/expr/exprqa.c: Use wrappers for mpX_init,
+       to make parameters match.
+
+       * demos/expr/run-expr.c: Don't use getopt, to avoid needing configury
+       for optarg declaration.  Remove TRY macro, rename foo and bar to var_a
+       and var_b, for clarity.
+       * demos/expr/expr-impl.h: Don't use expr-config.h.
+       * configure.in (demos/expr/expr-config.h): Remove.
+       * demos/expr/expr-config.in: Remove file.
+
+2004-05-08  Kevin Ryde  <kevin@swox.se>
+
+       * doc/configuration (Configure): Update for current automake not
+       copying acinclude.m4 into aclocal.m4.
+
+       * configure.in, Makefile.am, doc/gmp.texi, doc/configuration,
+       tests/cxx/Makefile.am, demos/expr/Makefile.am, demos/expr/README,
+       demos/expr/expr.c, demos/expr/expr.h, demos/expr/expr-config-h.in,
+       demos/expr/expr-impl.h, demos/expr/run-expr.c, demos/expr/t-expr.c:
+       MPFR now published separately, remove various bits.
+       * mpfr/*, tests/cxx/t-headfr.cc, demos/expr/exprfr.c,
+       demos/expr/exprfra.c: Remove.
+
+2004-05-07  Kevin Ryde  <kevin@swox.se>
+
+       * tests/cxx/Makefile.am (TESTS_ENVIRONMENT): Amend c++ shared library
+       path hack, on k62-unknown-dragonfly1.0 /usr/bin/make runs its commands
+       "set -e", so we need an "|| true" in case there's nothing to copy (for
+       instance in a static build).
+
+2004-05-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/mode1o.c: Remove, in favour of ...
+       * mpn/alpha/mode1o.asm: New file.
+       * mpn/alpha/alpha-defs.m4 (bwx_available_p): New macro.
+
+       * tune/amd64.asm: Save rbx in r10 rather than on the stack.
+
+       * configure.in (x86_64-*-*): Try also "-march=k8 -mno-sse2", in case
+       we're in ABI=32 on an old OS not supporting xmm regs.
+       (GMP_GCC_PENTIUM4_SSE2, GMP_OS_X86_XMM): Run these tests under
+       -march=k8 too, and not under ABI=64.
+
+       * doc/gmp.texi (Converting Integers): For mpz_get_d, note truncation
+       and overflows.  For mpz_get_d_2exp note truncation, note result if
+       OP==0, and cross reference libc frexp.
+       (Rational Conversions): For mpq_get_d, note truncation and overflows.
+       (Converting Floats): For mpf_get_d, note truncation and overflows.
+       For mpf_get_d_2exp, note truncation, note result if OP==0.
+       (Assembler Code Organisation): Note nails subdirectories.
+       Clarification of get_d_2exp OP==0 reported by Sylvain Pion.
+
+2004-05-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mullow_n.c, mpn/generic/mullow_basecase.c: New files
+       (mainly by Niels Möller).
+       * configure.in, mpn/Makefile.am: Add them.
+
+       * gmp-impl.h (MULLOW_BASECASE_THRESHOLD, MULLOW_DC_THRESHOLD,
+       MULLOW_MUL_N_THRESHOLD): Override for TUNE_PROGRAM_BUILD.
+
+       * tune/Makefile.am: Compile mullow_n.c.
+       * tune/common.c (speed_mpn_mullow_n, speed_mpn_mullow_basecase):
+       New functions.
+       * tune/speed.c (routine): Add entries for mpn_mullow_n and
+       mpn_mullow_basecase.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MULLOW_N_CALL,
+       SPEED_ROUTINE_MPN_MULLOW_BASECASE): New #defines.
+       * tune/tuneup.c (tune_mullow): New function.
+
+       * gmp-impl.h (invert_limb): Compute branch-freely.
+
+2004-05-02  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/amd64/mode1o.asm: Use movabsq to support large model non-PIC.
+       Use 32-bit insns to save code bytes, and to save a couple of cycles on
+       the initial setup multiplies.
+
+2004-05-01  Kevin Ryde  <kevin@swox.se>
+
+       * doc/gmp.texi (References): Update gcc online docs url to
+       gcc.gnu.org.
+
+       * configure.in (mips*-*-irix[6789]*): Correction to m4 quoting of this
+       pattern.  (Believe the mips64*-*-* part also used picks up all current
+       irix6 tuples anyway.)  Reported by Rainer Orth.
+
+2004-04-30  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_X86_GOT_EAX_EMITTED,
+       GMP_ASM_X86_GOT_EAX_OK): New macros.
+       (GMP_PROG_CC_WORKS): Use them to detect an old gas bug tickled by
+       recent gcc.  Reported by David Newman.
+
+       * doc/gmp.texi (Reentrancy): Note also gmp_randinit_default as an
+       alternative to gmp_randinit.
+
+2004-04-29  Torbjorn Granlund  <tege@swox.com>
+
+       * configfsf.guess: Update to 2004-03-12.
+       * configfsf.sub: Likewise.
+
+2004-04-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/rrandomb.c (gmp_rrandomb): Rework to avoid extra limb allocation
+       and to generate even numbers.
+       * mpn/generic/random2.c (gmp_rrandomb): Likewise.
+
+2004-04-25  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (FORCE_DOUBLE): Don't use an asm with a match constraint
+       on a memory output, apparently not supported and provokes a warning
+       from gcc 3.4.
+
+2004-04-24  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (count_leading_zeros_gcc_clz,
+       count_trailing_zeros_gcc_ctz): New macros.
+       (count_leading_zeros, count_trailing_zeros) [x86]: Use them on gcc
+       3.4.
+
+       * configure.in (x86-*-* gcc_cflags_cpu): Give a -mtune at the start of
+       each option list, for use by gcc 3.4 to avoid deprecation warnings
+       about -mcpu.
+
+       * mpz/aorsmul.c, mpz/aorsmul_i.c, mpz/cfdiv_q_2exp.c,
+       mpz/cfdiv_r_2exp.c, mpq/aors.c, mpf/ceilfloor.c: Give REGPARM_ATTR()
+       on function definition too, as demanded by gcc 3.4.
+
+2004-04-22  Kevin Ryde  <kevin@swox.se>
+
+       * tests/rand/t-lc2exp.c (check_bigc1): New test.
+
+       * doc/fdl.texi: Tweak @appendixsubsec -> @appendixsec to match our
+       preference for this in an @appendix, and because texi2pdf doesn't
+       support @appendixsubsec directly within an @appendix.
+
+2004-04-20  Kevin Ryde  <kevin@swox.se>
+
+       * doc/texinfo.tex: Update to 2004-04-07.08 from texinfo 4.7.
+       * doc/gmp.texi, mpfr/mpfr.texi (@copying): Don't put a line break in
+       @ref within @copying, recent texinfo.tex doesn't like that.
+
+       * demos/perl/GMP.xs (static_functable): Treat cygwin the same as mingw
+       DLLs.
+
+       * */Makefile.in, install-sh: Update to automake 1.8.3.
+       * ltmain.sh, aclocal.m4, configure: Update to libtool 1.5.6.
+
+       * gmp-impl.h (LIMB_HIGHBIT_TO_MASK): Use a compile-time constant
+       expression, rather than a configure test.
+       * acinclude.m4, configure.in (GMP_C_RIGHT_SHIFT): Remove, no longer
+       needed.
+       * tests/t-hightomask.c: New file.
+       * tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * macos/configure (parse_top_configure): Look for PACKAGE_NAME and
+       PACKAGE_VERSION now used by autoconf.
+       (what_objects): Only demand 9 object files, as for instance occurs in
+       the scanf directory.
+       (asm files): Transform labels L(foo) -> Lfoo.  Take func name from
+       PROLOGUE to support empty "EPILOGUE()".  Recognise and substitute
+       register name "define()"s.
+       * macos/Makefile.in (CmnObjs): Add tal-notreent.o.
+
+2004-04-19  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_ROOTREM): New #define.
+       (speed_mpn_rootrem): Declare.
+       * tune/common.c (speed_mpn_rootrem): New function.
+       * tune/speed.c (routine): Add entry for mpn_rootrem.
+
+2004-04-16  Kevin Ryde  <kevin@swox.se>
+
+       * doc/fdl.texi: Update from FSF, just fixing a couple of typos.
+
+       * macos/configure, macos/Makefile.in: Add printf and scanf directories.
+
+       * tests/mpz/t-gcd.c (check_data): New function, exercising K6
+       gcd_finda bug.
+
+2004-04-14  Kevin Ryde  <kevin@swox.se>
+
+       * doc/gmp.texi (Reentrancy, Random State Initialization): Note
+       gmp_randinit use of gmp_errno is not thread safe.  Reported by Vincent
+       Lefèvre.
+
+       * doc/gmp.texi (Random State Initialization): Add index entries for
+       gmp_errno and constants.
+
+       * mpn/m68k/README: Update _SHORT_LIMB -> __GMP_SHORT_LIMB.
+
+       * configure.in (--enable-mpbsd): Typo Berkley -> Berkeley in help msg.
+
+2004-04-12  Kevin Ryde  <kevin@swox.se>
+
+       * demos/perl/GMP.xs (static_functable): New macro, use it for all
+       function tables, to support mingw DLL builds.
+       * demos/perl/INSTALL (NOTES FOR PARTICULAR SYSTEMS): Remove note on
+       DLLs, should be ok now.
+
+       * demos/perl/sample.pl: Print the module and library versions in use.
+
+       * demos/perl/GMP.pm, Makefile.PL (VERSION): Set to '2.00'.
+       * demos/perl/GMP.pm (COPYRIGHT): New in the doc section.
+
+       * Makefile.am: Note 4.1.3 libtool versioning info, and REVISION policy.
+
+       * tal-debug.c: Add <stdlib.h> for abort.
+
+2004-04-07  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/refmpf.c (refmpf_add_ulp): Adjust exponent when needed.
+
+       * mpn/generic/random2.c: Rewrite (clone mpz/rrandomb.c).
+
+2004-04-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/gcd_finda.asm: Correction jbe -> jb in initial setups.
+       Zero flag is wrong here, it relects only the high limb of the compare,
+       leading to n1>=n2 not satisfied and wrong results.  cp[1]==0x7FFFFFFF
+       with cp[0]>=0x80000001 provokes this.
+
+       * doc/gmp.texi (BSD Compatible Functions): Note "pow" name clash under
+       the pow function description too.
+       (Language Bindings): Add XEmacs (betas at this stage).  Reported by
+       Jerry James.
+
+       * tests/refmpn.c (refmpn_mod2): Correction to ASSERTs, r==a is allowed.
+
+       * gen-psqr.c (generate_mod): Cast mpz_invert_ui_2exp args, for K&R.
+       * gen-bases.c, gen-fib.c, gen-psqr.c: For mpz_out_str, use stdout
+       instead of 0, in case a K&R treats int and FILE* params differently.
+
+2004-04-04  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (BSWAP_LIMB) [amd64]: New macro.
+       (FORCE_DOUBLE): Use this for amd64 too.
+
+       * tests/amd64check.c, tests/amd64call.asm: New files, derived in part
+       from x86check.c and x86call.asm.
+       * tests/Makefile.am (EXTRA_libtests_la_SOURCES): Add them.
+       * configure.in (x86_64-*-* ABI=64): Use them.
+
+2004-04-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/amd64/mode1o.asm: New file.
+       * mpn/amd64/amd64-defs.m4 (ASSERT): New macro.
+
+       * mpn/x86/k7/mmx/divrem_1.asm, mpn/x86/pentium4/sse2/divrem_1.asm: Add
+       note on how "dr" part of algorithm is handled.
+
+       * mpn/x86/k7/dive_1.asm, mpn/x86/k7/mod_34lsub1.asm,
+       mpn/x86/k7/mode1o.asm: Note Hammer (32-bit mode) speeds.
+
+2004-03-31  Kevin Ryde  <kevin@swox.se>
+
+       * doc/gmp.texi (Language Bindings): Add GOO, MLGMP and Numerix.
+
+       * mpf/mul_2exp.c, mpf/div_2exp.c: Rate u==0 as UNLIKELY.
+
+2004-03-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/divrem_1.asm: Trim a few cycles.
+
+2004-03-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/sublsh1_n.asm: Fix typo.
+
+       * mpn/generic/divrem_1.c: Fix typo.
+
+       * mpn/generic/sqr_basecase.c: Fix typo.
+
+       * mpn/amd64/divrem_1.asm: New file.
+
+2004-03-20  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (power, powerpc): Add comments on how we select this code.
+
+       * gmp-h.in (mpz_get_ui): Use ?: instead of mask style, gcc treats the
+       two identically but ?: is a bit clearer.
+
+       * insert-dbl.c: Remove file, no longer used, scaling is now integrated
+       in mpn_get_d.
+       * Makefile.am (libgmp_la_SOURCES): Remove insert-dbl.c.
+       * gmp-impl.h (__gmp_scale2): Remove prototype.
+
+2004-03-17  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec_init, fake_cpuid_table): Add x86_64.
+
+       * mpq/get_d.c: Use mpn_tdiv_qr, demand den>0 per canonical form.
+
+2004-03-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/sqr_basecase.c: Add versions using mpn_addmul_2 and
+       mpn_addmul_2s.
+
+2004-03-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/mul_ui.c: Incorporate carry from low limbs, for exactness.
+       * tests/mpf/t-mul_ui.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpf/div.c: Use mpn_tdiv_qr.  Use just one TMP_ALLOC.  Use full
+       divisor, since truncating can lose accuracy.
+       * tests/mpf/t-div.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * tests/mpf/t-set_q.c, tests/mpf/t-ui_div.c (check_various): Amend
+       bogus 99/4 test.
+       * tests/mpf/t-ui_div.c (check_rand): Exercise r==v overlap.
+
+       * tests/refmpf.c, tests/tests.h (refmpf_set_overlap): New function.
+
+       * mpf/cmp_si.c [nails]: Correction, cast vval in exp comparisons, for
+       when vval=-0x800..00 and limb==longlong.
+
+       * mpf/cmp_si.c [nails]: Correction, return usign instead of 1 when
+       uexp==2 but value bigger than an mp_limb_t.
+       * tests/mpf/t-cmp_si.c (check_data): Add test cases.
+
+       * tests/trace.c (mpf_trace): Use ABS(mp_trace_base) to allow for
+       negative bases used for upper case hex in integer traces.
+
+2004-03-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/sb_divrem_mn.c: Correct header comment.
+
+2004-03-11  Kevin Ryde  <kevin@swox.se>
+
+       * aclocal.m4, configure, ltmain.sh: Downgrade to libtool 1.5, version
+       1.5.2 doesn't remove .libs/*.a files when rebuilding, which is bad for
+       development when changing contents or with duplicate named files like
+       we have.
+
+       Revert this, ie restore AR_FLAGS=cq:
+       * acinclude.m4 (GMP_PROG_AR): Remove AR_FLAGS=cq, libtool 1.5.2 now
+       does this itself on detecting duplicate object filenames in piecewise
+       linking mode.
+
+       * randbui.c, randmui.c [longlong+nails]: Correction to conditionals
+       for second limb.
+
+       * mpz/aors_ui.h, mpz/cdiv_q_ui.c, mpz/cdiv_qr_ui.c, mpz/cdiv_r_ui.c,
+       mpz/cdiv_ui.c, mpz/fdiv_q_ui.c, mpz/fdiv_qr_ui.c, mpz/fdiv_r_ui.c,
+       mpz/fdiv_ui.c, mpz/gcd_ui.c, mpz/iset_ui.c, mpz/lcm_ui.c,
+       mpz/set_ui.c, mpz/tdiv_q_ui.c, mpz/tdiv_qr_ui.c, mpz/tdiv_r_ui.c,
+       mpz/tdiv_ui.c, mpz/ui_sub.c, mpf/div_ui.c, mpf/mul_ui.c
+       [longlong+nails]: Amend #if to avoid warnings about shift amount.
+
+2004-03-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/reldiff.c: Use rprec+ysize limbs for d, to ensure accurate
+       result.  Inline mpf_abs(d,d) and mpf_cmp_ui(x,0), and rate the latter
+       UNLIKELY.
+
+       * mpf/ui_div.c: Use mpn_tdiv_qr.  Use just one TMP_ALLOC.  Use full
+       divisor, since truncating can lose accuracy.
+       * tests/mpf/t-ui_div.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpf/set_q.c: Expand TMP_ALLOC_LIMBS_2, to make conditional clearer
+       and avoid 1 limb alloc when not wanted.
+
+       * gmp-impl.h (WANT_TMP_DEBUG): Define to 0 if not defined.
+       (TMP_ALLOC_LIMBS_2): Use "if" within macro rather than "#if", for less
+       preprocessor conditionals.
+
+       * mpf/mul_2exp.c, mpf/div_2exp.c: Add some comments.
+
+       * tests/refmpn.c (refmpn_sb_divrem_mn, refmpn_tdiv_qr): Nailify.
+
+2004-03-04  Kevin Ryde  <kevin@swox.se>
+
+       * gen-psqr.c (print): Add CNST_LIMB in PERFSQR_MOD_TEST, for benefit
+       of K&R.
+       * tests/mpn/t-perfsqr.c (PERFSQR_MOD_1): Use CNST_LIMB for K&R.
+
+       * doc/configuration (Configure): Remove mkinstalldirs, no longer used.
+
+       * acinclude.m4 (GMP_PROG_AR): Remove AR_FLAGS=cq, libtool 1.5.2 now
+       does this itself on detecting duplicate object filenames in piecewise
+       linking mode.
+
+       * configure.in (hppa2.0*-*-*): Test sizeof(long) == 4 or 8 to verify
+       ABI=2.0n versus ABI=2.0w.  In particular this lets CC=cc_bundled
+       correctly fall back to ABI=2.0n (we don't automatically add CC=+DD64
+       to that compiler, currently).
+
+       * doc/gmp.texi (Reentrancy): Note C++ mpf_class constructors using
+       global default precision.
+       (Random State Miscellaneous): Describe gmp_urandomb_ui as giving N
+       bits.
+       (C++ Interface Floats): Describe operator= copying the value, not the
+       precision, and what this can mean about copy constructor versus
+       default constructor plus assignment.
+
+       * mpf/set_q.c: Use mpn_tdiv_qr rather than mpn_divrem, so no shifting.
+       Don't truncate the divisor, it can make the result inaccurate.
+       * tests/mpf/t-set_q.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpf/set.c: Use MPN_COPY_INCR, in case r==u and ABSIZ(u) > PREC(r)+1.
+       No actual bug here, because MPN_COPY has thusfar been an alias for
+       MPN_COPY_INCR, only an ASSERT failure.
+       * tests/mpf/t-set.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpf/set.c, mpf/iset.c: Do MPN_COPY last, for possible tail call.
+
+       * mpf/set_d.c: Rate d==0 as UNLIKELY.  Store size before extract call,
+       to shorten lifespan of "negative".
+
+       * mpf/init.c, mpf/init2.c, mpf/iset_d.c, mpf/iset_si.c,
+       mpf/iset_str.c, mpf/iset_ui.c: Store prec before alloc call, for one
+       less live quantity across that call.
+       * mpf/init.c, mpf/init2.c, mpf/iset_str.c: Store size and exp before
+       alloc call, to overlap with other operations.
+
+       * tests/refmpf.c, tests/tests.h (refmpf_fill, refmpf_normalize,
+       refmpf_validate, refmpf_validate_division): New functions.
+
+       * tests/refmpn.c, tests/tests.h (refmpn_copy_extend,
+       refmpn_lshift_or_copy_any, refmpn_rshift_or_copy_any): New functions.
+
+       * tal-debug.c: Add <string.h> for strcmp.
+
+       * tests/cxx/t-istream.cc (check_mpz, check_mpq, check_mpf): Use size_t
+       for loop index, to quieten g++ warning.
+
+2004-03-02  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpn/t-hgcd.c: Use __GMP_PROTO on prototypes.
+
+2004-03-01  Torbjorn Granlund  <tege@swox.com>
+
+       With Karl Hasselström:
+       * mpn/generic/dc_divrem_n.c (mpn_dc_div_2_by_1): New function, with
+       meat from old mpn_dc_divrem_n.  Accept scratch parameter.  Rewrite to
+       avoid a recursive call.
+       (mpn_dc_div_3_by_2): New function, with meat from old
+       mpn_dc_div_3_halves_by_2.  Accept scratch parameter.
+       (mpn_dc_divrem_n): Now just allocate scratch space and call new
+       mpn_dc_div_2_by_1.
+
+2004-02-29  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (count_leading_zeros) [alpha gcc]: New version, inlining
+       mpn/alpha/cntlz.asm cmpbge technique.
+
+       * aclocal.m4, configure, install-sh, missing, ltmain.sh,
+       */Makefile.in: Update to automake 1.8.2 and libtool 1.5.2.
+
+       * doc/gmp.texi (C++ Interface Integers): Note / and % rounding follows
+       C99 / and %.
+       (Exact Remainder): Index entries for divisibility testing algorithm.
+
+       * tune/time.c (speed_endtime): Return 0.0 for negative time measured.
+       Revise usage comments for clarity.
+       * tune/common.c (speed_measure): Recognise speed_endtime 0.0 for
+       failed measurement.
+
+       * tests/mpn/t-get_d.c (check_rand): Correction to nhigh_mask setup.
+
+2004-02-27  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/tuneup.c (tune_dc, tune_set_str): Up param.step_factor.
+
+       * tests/mpz/t-gcd.c: Decrease # of tests to 50.
+
+2004-02-27  Kevin Ryde  <kevin@swox.se>
+
+       * tests/devel/try.c: Add a comment that this is not for Cray systems.
+
+       * mpf/set_q.c: Don't support den(q)<0, demand canonical form in the
+       usual way.
+
+2004-02-24  Torbjorn Granlund  <tege@swox.com>
+
+       From Kevin:
+       * mpn/generic/mul_fft.c (mpn_fft_add_modF): Loop until normalization
+       criterion met.
+
+2004-02-22  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS, GMP_OS_X86_XMM, GMP_PROG_CXX_WORKS):
+       Remove files that might look like compiler output, so our "||"
+       alternatives are not fooled.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Add test for lshift_com code
+       mis-compiled by certain IA-64 HP cc at +O3.
+
+       * gmp-impl.h (USE_LEADING_REGPARM): Disable under prof or gprof, for
+       the benefit of freebsd where .mcount clobbers registers.  Spotted by
+       Torbjorn.
+       * configure.in (WANT_PROFILING_PROF, WANT_PROFILING_GPROF): New
+       AC_DEFINEs.
+
+2004-02-21  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (sparc64-*-*bsd*): Amend -m32 setup for ABI=32, so it's
+       not used in ABI=64 on the BSD systems.
+
+2004-02-18  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-gcd.c (gcdext_valid_p): New function.
+       (ref_mpz_gcd): Deleted function.
+       (one_test): Rearranged to call mpz_gcdext first, so that the
+       returned value can be validated.
+       (main): Don't use ref_mpz_gcd.
+
+2004-02-18  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MPN_TOOM3_MAX_N): Move to !WANT_FFT section.
+
+       * tests/mpz/t-mul.c: Exclude special huge operands unless WANT_FFT.
+
+       * mpz/rrandomb.c (gmp_rrandomb): Rewrite.
+
+       * mpn/generic/mul_n.c (mpn_toom3_sqr_n): Remove write-only variable c5.
+
+2004-02-18  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/iset_si.c, mpf/iset_ui.c, mpf/set_si.c, mpf/set_ui.c [nails]:
+       Always store second limb, to avoid a conditional.
+
+       * tests/mpf/t-get_ui.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+       * tests/mpf/t-get_si.c (check_limbdata): Further tests.
+       * gmp-impl.h (MP_EXP_T_MAX, MP_EXP_T_MIN): New defines.
+
+       * mpf/get_ui.c, mpf/get_si.c: Remove size==0 test, it's covered by
+       other conditions.  Attempt greater clarity by expressing conditions as
+       based on available data range.
+       * mpf/get_si.c [nails]: Correction, don't bail on exp > abs_size,
+       since may still have second limb above radix point available.
+       * mpf/get_ui.c: Nailify.
+
+2004-02-16  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/scan0.c, mpz/scan1.c: Use count_trailing_zeros, instead of
+       count_leading_zeros on limb&-limb.
+
+       * mpf/sqrt.c: Use "/ 2" for exp, avoiding C undefined behaviour on
+       ">>" of negatives.  Correction to comment, exp is rounded upwards.
+       SIZ(r) always prec now, no need for tsize expression.  Store EXP(r)
+       and SIZ(r) where calculated to reduce variable lifespans.  Make tsize
+       mp_size_t not mp_exp_t, though of course those are currently the same.
+
+       * gmp-h.in (GMP_ERROR_ALLOCATE, GMP_ERROR_BAD_STRING,
+       GMP_ERROR_UNUSED_ERROR): Remove, never used or documented, and we
+       don't want to use globals for communicating error information.
+
+       * mpz/gcd_ui.c [nails]: Correction, actually return a value.
+
+       * mpn/generic/addmul_1.c, mpn/generic/submul_1.c [nails==1]: Add code.
+
+2004-02-15  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-jac.c (check_data): Remove unnecessary variable
+       "answer".
+
+2004-02-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/aors_n.asm: Break a group with a RAW conflict.
+
+2004-02-14  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_C_RIGHT_SHIFT): Note that it's "long"s which we're
+       concerned about.
+
+       * mpn/generic/mul_n.c: Add some remarks about toom3 high zero
+       stripping.
+
+       * mpn/generic/scan0.c, mpn/generic/scan1.c: Remove design issue
+       remarks.  What to do about going outside `up' space is a problem, but
+       anything to address it would be an incompatible change.
+
+2004-02-12  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpn/t-hgcd.c: Remove unused variables.
+
+       * mpn/ia64/hamdist.asm: Remove bundling incompatible with HP-UX
+       assembler.  Misc HP-UX changes.
+       * mpn/ia64/gcd_1.asm: Add some syntax to placid the HP-UX assembler.
+
+2004-02-11  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (power, powerpc): Use HAVE_HOST_CPU_FAMILY_power and
+       HAVE_HOST_CPU_FAMILY_powerpc rather than various cpp defines.
+
+       * gmp-impl.h: Add remarks about limits.h and Cray etc.
+
+       * mpn/ia64/mul_1.asm: Don't put .pred directives on labelled lines,
+       hpux 11.23 assembler doesn't like that.
+       * mpn/ia64/README: Add a note on this.
+
+       * dumbmp.c (mpz_mul): Set ALLOC(r) for new data block used.  Reported
+       by Jason Moxham.
+
+       * mpn/pa32/README, mpn/pa64/README (REFERENCES): New sections.
+
+2004-02-10  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-gcd.c: Decrease # of tests run.
+
+       * mpn/*/gmp-mparam.h: Add HGCD values, update TOOM values.
+
+2004-02-01  Torbjorn Granlund  <tege@swox.com>
+
+       From Kevin:
+       * config.guess: Recognize AMD's hammer processors, return x86_64.
+
+2004-01-31  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_cmp_sum3): Declare static.
+
+2004-01-25  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add t-hgcd.
+
+       * mpn/generic/hgcd.c (hgcd_jebelean): Simplify, use mpn_cmp_sum3.
+       (mpn_cmp_sum3): New function.
+       (mpn_diff_smaller_p): Remove.
+       (hgcd_final, hgcd_jebelean, hgcd_small_1, hgcd_small_2, euclid_step):
+       Remove tp,talloc arguments.  Callers changed.
+
+2004-01-25  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/tuneup.c (all): Reenable calls of tune_gcd_schoenhage and
+       tune_hgcd.
+
+       * mpn/generic/gcd.c: Reenable Schoenhage code.
+
+       With Niels Möller:
+       * mpn/generic/hgcd.c: Add const and inline to several functions.
+       (qstack_push_start qstack_push_end qstack_push_quotient): Remove.
+       (euclid_step): Insert removed functions here.
+       (hgcd_adjust): Simplify, don't handle d != 1.
+       (qstack_adjust): Corresponding changes.
+       (mpn_hgcd2_lehmer_step): Remove redundant tests for bh against zero.
+       (hgcd_start_row_p): Tweak.
+       (hgcd_final): Shorten life of ralloc.
+
+2004-01-24  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpf/t-sqrt.c (check_rand1): Further diagnostic printouts.
+
+       * mpn/generic/sqrtrem.c (mpn_sqrtrem): Add ASSERT_MPN.
+       (mpn_dc_sqrtrem): Add casts for K&R.
+
+       * mpf/sqrt_ui.c: Nailify.
+
+       * mpf/set_z.c: Do MPN_COPY last, for possible tail call.
+
+       * doc/gmp.texi (Miscellaneous Float Functions): For mpf_random2, note
+       exponent is in limbs.
+
+       * mpn/ia64/README: Add remark about concentrating on itanium-2.
+
+2004-01-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/sqrt.c: Change tsize calculation to get prec limbs result
+       always, previously got prec+1 when exp was odd.
+       * tests/mpf/t-sqrt.c (check_rand1): New function, code from main.
+       (check_rand2): New function.
+
+       * mpf/sqrt_ui.c: Change rsize calculation to get prec limbs result,
+       previously got prec+1.
+       * tests/mpf/t-sqrt_ui.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * tests/refmpf.c, tests/tests.h (refmpf_add_ulp,
+       refmpf_set_prec_limbs): New functions.
+
+       * mpz/get_d_2exp.c, mpf/get_d_2exp.c: Remove x86+m68k force to double,
+       mpn_get_d now does this.  Remove res==1.0 check for round upwards,
+       mpn_get_d now rounds towards zero.  Move exp store to make mpn_get_d a
+       tail call.
+
+       * configure.in (x86-*-*): Use ABI=32 rather than ABI=standard.
+       Use gcc -m32 when available, to force mode on bi-arch amd64 gcc.
+       * configure.in, acinclude.m4 (x86_64-*-*): Merge into plain x86 setups
+       as ABI=64.  Support ABI=32, using athlon code.  Use gcc -mcpu=k8,
+       -march=k8.
+       (amd64-*-*): Remove pattern, config.sub only gives x86_64.
+       * doc/gmp.texi (ABI and ISA): Add x86_64 dual ABIs.
+
+       * mpn/amd64/README: Add reference to ABI spec.
+
+2004-01-17  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (hgcd_adjust): Backed out mpn_addlsh1_n
+       change for now.
+
+       * mpn/generic/hgcd.c (hgcd_adjust): Fixed calls of mpn_addlsh1_n.
+
+2004-01-17  Kevin Ryde  <kevin@swox.se>
+
+       * tune/README: Remove open/mpn versions of toom3, no longer exist.
+       * tune/powerpc64.asm: Remove unused L(again).
+       * tune/time.c (mftb): Note single mftb possible for powerpc64.
+
+       * mpn/generic/mode1o.c: Use "c<s" to do underflow detection in last
+       step, for better parallelism.
+
+       * mpn/generic/get_d.c: Preserve comments about hppa fcnv,udw,dbl from
+       previous mpz_get_d code.
+
+       * tune/freq.c: Add some comments about systems not covered.
+
+       * gmp-h.in (_GMP_H_HAVE_FILE): Add _MSL_STDIO_H for Metrowerks.
+       Reported by Tomas Zahradnicky.
+
+2004-01-16  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_diff_smaller_p): Use MPN_DECR_U.
+       (hgcd_adjust): Use mpn_addlsh1_n when available.
+
+2004-01-16  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (powerpc64-*-linux*): Try gcc64.  Try -m64 with
+       "cflags_maybe" to get it used in all probing.  Add sizeof-long-8 test
+       to check the mode is right if -m64 is not applicable.
+
+2004-01-15  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (--with-readline=detect): Check for readline/readline.h
+       and readline/history.h.  Report result of detection.
+
+2004-01-14  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.c (routine): Disabled speed_mpn_hgcd_lehmer.
+       * tune/common.c (speed_mpn_hgcd_lehmer): Disabled function.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_lehmer_itch, mpn_hgcd_lehmer)
+       (mpn_hgcd_equal): Deleted functions.
+
+       * mpn/generic/gcd.c (hgcd_start_row_p): Deleted function.
+       (gcd_schoenhage): Deleted assertion code using mpn_hgcd_lehmer.
+
+       * mpn/generic/hgcd.c (hgcd_final): Fixed ASSERT typos.
+       (mpn_hgcd): To use Lehmer's algorithm, call hgcd_final directly,
+       not mpn_hgcd_lehmer.
+
+       * mpn/generic/gcd.c (gcd_schoenhage): Updated for changes to
+       mpn_hgcd and mpn_hgcd_fix. (Schoenhage code is still disabled).
+
+       * gmp-impl.h (mpn_hgcd_fix): Updated prototype.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_fix): Replaced a bunch of arguments
+       by a pointer const struct hgcd_row *s. Updated callers.
+
+       * mpn/generic/hgcd.c (hgcd_start_row_p): Use const for the input.
+       Moved function definition before hgcd_jebelean.
+       (hgcd_jebelean): Interface change, analogous to hgcd2.
+       (mpn_hgcd_fix): Normalize v. Require that v > 0.
+       (hgcd_adjust): Fix bug in carry update.
+       (mpn_hgcd): Reorganized again, to adapt to mpn_hgcd/hgcd_jebelean
+       now sometimes returning 1. Reintroduced hgcd_adjust.
+
+       * mpn/generic/hgcd.c (hgcd_final): Streamlined logic for the first
+       hgcd2 call.
+
+       * mpn/generic/hgcd2.c (mpn_hgcd2): Interface change. Return 1
+       instead of 2, in the no progress case r0=A, r1=B.
+
+       * mpn/generic/hgcd.c (hgcd_adjust): Changed arguments and return
+       value. Now takes a struct hgcd_row * and the uv size, and returns
+       updated uvsize.
+       (hgcd_final): Special handling of the case hgcd2 returning 1. Now
+       uses hgcd_adjust, instead of a full Euclid division.
+
+2004-01-13  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (euclid_step, hgcd_case0): Merged into a
+       single function euclid_step.
+       (mpn_hgcd): Reorganized the logic for the second recursive call.
+       Avoid unnecessary Euclid steps.
+
+       * tests/mpn/t-hgcd.c (hgcd_values): One more test value.
+
+       * tests/mpn/t-hgcd.c (hgcd_values): Added values that trigged the
+       hgcd_jebelean bug.
+
+       * mpn/generic/hgcd.c (hgcd_jebelean): Fixed off by one error.
+       (mpn_hgcd): Simplified the logic for the first recursive call. Now
+       it uses only the correct values from the recursive call, and
+       doesn't do tricks with hgcd_adjust (hgcd_adjust will probably be
+       reintroduced later, though).
+
+       * tests/mpn/t-hgcd.c (mpz_mpn_equal, hgcd_ref_equal)
+       (hgcd_ref_init, hgcd_ref_clear): New functions.
+       (hgcd_ref): Reference implementation of hgcd, using mpz.
+       (one_test): Use hgcd_ref. Don't use mpn_hgcd_lehmer.
+       (main): Skip one_step if both input values are zero.
+
+2004-01-12  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (hgcd_final): Rewritten, now uses Lehmer
+       steps instead of a division loop.
+       (mpn_hgcd_lehmer): Deleted old Lehmer code, instead just
+       initialize and then call hgcd_final.
+
+       * tests/tests.h: Added refmpn_free_limbs prototype.
+       * tests/refmpn.c (refmpn_free_limbs): New function.
+
+       * tests/mpn/t-hgcd.c: Try the same kind of random inputs as for
+       mpz/t-gcd.
+
+2004-01-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd_lehmer): Rewritten, after some more
+       analysis of the size reduction for one Lehmer step.
+
+       * tests/mpn/t-hgcd.c: New file.
+
+2004-01-11  Torbjorn Granlund  <tege@swox.com>
+
+       With Niels Möller:
+       * mpn/generic/hgcd.c (hgcd_normalize): Fix ASSERTs.
+       (hgcd_mul): Normalize R[1].uvp[1].  Add some more ASSERTs.
+       (hgcd_update_uv): Streamline.  ASSERT that input and output is
+       normalized.
+
+2004-01-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/ev6/slot.pl: New file, derived in part from
+       mpn/x86/k6/cross.pl.
+
+       * mpn/alpha/alpha-defs.m4 (ASSERT): New macro.
+
+       * mpn/asm-defs.m4 (m4_ifdef): New macro, avoiding OSF 4.0 m4 bug.
+       (m4_assert_defined): Use it.
+
+       * mpn/alpha/default.m4, mpn/alpha/unicos.m4 (LDGP): New macro.
+       * mpn/alpha/ev67/gcd_1.asm: Use it to re-establish gp after jsr.
+
+       * configure.in, demos/calc/Makefile.am: Use -lcurses or -lncurses with
+       readline, when available.
+
+       * longlong.h (sub_ddmmss) [generic]: Use al<bl for the borrow rather
+       than __x>al, since the former can be done without waiting for __x,
+       helping superscalar chips, in particular alpha ev5 and ev6.
+
+       * longlong.h (sub_ddmmss) [ia64]: New macro.
+
+       * tests/t-sub.c: New file.
+       * tests/Makefile.am (check_PROGRAMS): Add it.
+       * tests/refmpn.c, tests/tests.h (refmpn_sub_ddmmss): New function.
+
+2004-01-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/p6/mod_34lsub1.asm: New file, derived in part from
+       mpn/x86/mod_34lsub1.asm.
+
+       * configure.in (IA64_PATTERN): Use -mtune on gcc 3.4.
+
+2004-01-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, mp-h.in (__GMP_SHORT_LIMB): Renamed from _SHORT_LIMB, to
+       keep in our namespace.  (Not actually used anywhere currently.)
+       Reported by Patrick Pelissier.
+
+       * mp-h.in: Use "! defined (__GMP_WITHIN_CONFIGURE)" in the same style
+       as gmp-h.in (though mp-h.in is not actually used during configure).
+
+       * mp-h.in (__GMP_DECLSPEC_EXPORT, __GMP_DECLSPEC_IMPORT) [__GNUC__]:
+       Use __dllexport__ and __dllimport__ to keep out of application
+       namespace.  Same previously done in gmp-h.in.
+
+2004-01-06  Kevin Ryde  <kevin@swox.se>
+
+       * configfsf.sub, configfsf.guess: Update to 2004-01-05.
+       * configure.in (amd64-*-* | x86_64-*-*): Update comments on what
+       configfsf.sub does.
+
+2004-01-04  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/README (REFERENCES): Add tru64 assembly manuals.
+       (ASSEMBLY RULES): Note what gcc says about !literal! etc.
+
+2004-01-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/ev67/gcd_1.asm: New file.
+
+       * mpn/x86/pentium4/sse2/rsh1add_n.asm: New file, derived in part from
+       mpn/x86/pentium4/sse2/addlsh1_n.asm.
+
+       * mpn/x86/p6/p3mmx/popham.asm: Note measured speeds.
+
+       * mpn/ia64/hamdist.asm: Correction to inputs vs locals in alloc (makes
+       no difference to the generated code).  Corrections to a couple of
+       comments.
+
+       * mpn/x86/pentium4/sse2/addlsh1_n.asm (PARAM_CARRY): Remove macro, not
+       used, no such parameter.
+
+       * mpn/generic/gcd.c: Use <stdio.h> for NULL.
+
+       * doc/gmp.texi (Single Limb Division): Correction to tex expression
+       for (1/2)x1.  And minor wording tweaks elsewhere.
+
+       * gmp-impl.h (mpn_rsh1add_n, mpn_rsh1sub_n): Correction to comments
+       about how carries returned.
+
+       * longlong.h (umul_ppmm) [generic]: Add comments about squaring
+       (dropped from tasks list)
+
+2003-12-31  Kevin Ryde  <kevin@swox.se>
+
+       * demos/perl/GMP.xs (scan0, scan1): Return ~0 for not-found.
+       * demos/perl/GMP.pm: Describe this, remove the note about ULONG_MAX
+       being the same as ~0 (which is not true in old perl).
+       * demos/perl/test.pl: Update tests.
+       * demos/perl/typemap (gmp_UV): New type.
+
+       * demos/perl/test.pl (fits_slong_p): Comment out uv_max test, it won't
+       necessarily exceed a long.
+
+       * demos/perl/GMP.pm: Add a remark about get_str to the bugs section.
+
+       * mpn/generic/sqrtrem.c, mpz/fac_ui.c, tests/mpf/reuse.c: Add casts
+       for K&R.
+       * tests/mpf/t-muldiv.c: Make ulimb, vlimb into ulongs, which is how
+       they're used, for the benefit of K&R calling.
+
+       * doc/gmp.texi (Square Root Algorithm): Add a summary of the algorithm.
+       And add further index entries in various places.
+
+       * mpz/lucnum_ui.c, mpz/lucnum2_ui.c: Use mpn_addlsh1_n when available.
+
+       * gmp-impl.h, mpn/generic/mul_n.c (mpn_addlsh1_n, mpn_sublsh1_n,
+       mpn_rsh1add_n, mpn_rsh1sub_n): Move descriptions to gmp-impl.h with
+       the prototypes, for ease of locating.
+
+2003-12-30  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/tuneup.c (all): Disable calls of tune_gcd_schoenhage and
+       tune_hgcd for now.
+
+2003-12-29  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-gcd.c: Rewrite, based on suggestions by Kevin.
+
+       * mpn/ia64/mul_1.asm: Amend TODO list.
+
+       * mpn/sparc64/README: Remove mpn_Xmul_2, done.
+       Add blurb about L1 cache conflicts.
+
+       * mpn/generic/gcd.c: Disable Schoenhage code for now.
+
+2003-12-29  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_fft.c, mpz/root.c, mpq/cmp_ui.c: Add casts for K&R.
+
+2003-12-27  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-mul.c (mul_kara, mul_basecase): Use __GMP_PROTO.
+
+       * mpn/generic/gcd.c (NHGCD_SWAP4_2, NHGCD_SWAP3_LEFT),
+       mpn/generic/hgcd.c (HGCD_SWAP4_LEFT, HGCD_SWAP4_RIGHT, HGCD_SWAP4_2,
+       HGCD_SWAP3_LEFT): Aggregate initializers for automatics is an
+       ANSI-ism, avoid.
+
+       * Makefile.am (AUTOMAKE_OPTIONS): Restore this, giving no directory on
+       ansi2knr to avoid a circular build rule.
+       * configure.in (AM_INIT_AUTOMAKE): Note options also in Makefile.am.
+
+       * configure.in (cflags_maybe): Don't loop adding cflags_maybe if the
+       user has set CFLAGS.
+
+2003-12-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcd.c (gcd_schoenhage_itch): Avoid unary "+".
+       (mpn_gcd): Allocate scratch space on heap for gcd_schoenhage.
+       (mpn_gcd): Don't invoke MPN_NORMALIZE on input operands.
+
+2003-12-23  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (*sparc*-*-*): Test sizeof(long)==4 or 8 for ABIs, to
+       get the right mode when the user sets the CFLAGS.
+       (testlist): Introduce "any_<abi>_testlist" to apply to all compilers.
+
+       * demos/perl/typemap (MPZ_ASSUME, MPQ_ASSUME, MPF_ASSUME): Remove
+       output rules, these are only meant for inputs.
+       (MPZ_MUTATE): Remove, not used since changes for magic.
+
+       * demos/perl/GMP.xs (mpz_class_hv, mpq_class_hv, mpf_class_hv): New
+       variables, initialized in BOOT.
+       * demos/perl/GMP.xs, demos/perl/typemap: Use them and explicit
+       sv_bless, to save a gv_stashpv for every new object.
+
+2003-12-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/mode1o.c, mpn/alpha/dive_1.c: Moved from ev5/mode1o.c and
+       ev5/dive_1.c, these are good for ev4, and would like them in a generic
+       alpha build.
+
+2003-12-21  Kevin Ryde  <kevin@swox.se>
+
+       * doc/gmp.texi (Integer Logic and Bit Fiddling): Say "bitwise" in
+       mpz_and, mpz_ior and mpz_xor, to avoid any confusion with what C means
+       by "logical".  Reported by Rüdiger Schütz.
+
+       * gmp-h.in (_GMP_H_HAVE_FILE): Note why defined(EOF) is not good.
+
+2003-12-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/hgcd.c (mpn_diff_smaller_p): Use mpn_cmp instead of
+       mpn_sub_n where possible.  Use mp_size_t for relevant variables.
+
+2003-12-20  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.h (SPEED_TMP_ALLOC_LIMBS): Correction to last change,
+       don't want "- 1" on the TMP_ALLOC_LIMBS.
+
+       * demos/expr/expr.h: Test #ifdef MPFR_VERSION_MAJOR for when mpfr.h is
+       included, not GMP_RNDZ which is now an enum.
+
+       * demos/expr/exprfra.c (e_mpfr_ulong_p): Use mpfr_integer_p and
+       mpfr_fits_ulong_p.
+       (e_mpfr_get_ui_fits): Use mpfr_get_ui.
+
+       * mpfr/*: Update to mpfr cvs head 2003-12-20.
+
+       * configure, config.in: Update to autoconf 2.59.
+       * */Makefile.in, configure, aclocal.m4, ansi2knr.c, install-sh,
+       doc/mdate-sh: Update to automake 1.8.
+
+       * mkinstalldirs: Remove, not required by automake 1.8.
+       * doc/gmp.texi (Build Options): HTML is a usual target in automake 1.8.
+
+       * configure.in (AC_PREREQ): Require autoconf 2.59.
+       (AM_INIT_AUTOMAKE): Require automake 1.8.
+       (AC_C_INLINE): Use rather than GMP_C_INLINE, now has #ifndef
+       __cplusplus we want.
+       (gettimeofday): Use AC_CHECK_FUNCS rather than our workaround code,
+       autoconf now ok.
+
+       * acinclude.m4 (GMP_C_INLINE): Remove.
+       (GMP_H_EXTERN_INLINE): Use AC_C_INLINE.
+       (GMP_PROG_AR): Comment on automake $ARFLAGS.
+
+2003-12-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_diff_smaller_p): Rewrote function. Tried
+       to explain how it works.
+       (slow_diff_smaller_p, wrap_mpn_diff_smaller_p) [WANT_ASSERT]: Use
+       CPP to wrap assertion checks around all calls to
+       mpn_diff_smaller_p.
+
+       * mpn/generic/hgcd.c (mpn_addmul2_n_1) [nails]: Fixed carry handling.
+
+       * mpn/generic/hgcd.c (mpn_diff_smaller_p) [nails]: Use
+       GMP_NUMB_MAX, not MP_LIMB_T_MAX.
+       (mpn_hgcd_itch): Improved size calculation.
+       (mpn_hgcd_max_recursion): Moved function from qstack.c. Should to
+       be recompiled when HGCD_SCHOENHAGE_THRESHOLD is tuned.
+
+       * mpn/generic/qstack.c (mpn_hgcd_max_recursion): ... moved from
+       here.
+
+2003-12-19  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpf/t-get_d.c: Print message before aborting.
+
+       * mpn/generic/hgcd2.c (mpn_hgcd2): Substitute always-zero variable
+       with 0.  Remove bogus comment.
+
+       * mpn/generic/get_d.c: Make ONE_LIMB case actually work for nails.
+
+2003-12-18  Niels Möller  <niels@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (hgcd_update_r): Assert that the output r2 is
+       smaller than the input r1.
+
+2003-12-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/get_d.c: Don't include longlong.h.
+
+       * tests/mpz/t-mul.c (ref_mpn_mul): Handle un == vn specially, to avoid
+       a dummy r/w outside of allocated area.
+
+2003-12-18  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/unicos.m4 (ALIGN): Add comments on what GCC does.
+
+       * configure.in (fat setups), acinclude.m4 (GMP_INIT): Obscure
+       include() from automake 1.8 aclocal.
+       * acinclude.m4: Quote names in AC_DEFUN, for automake 1.8 aclocal.
+
+2003-12-17  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer) [nails]:
+       Enabled code also for GMP_NAIL_BITS > 0.
+       * tune/speed.c [nails]: Enable speed_mpn_hgcd and
+       speed_mpn_hgcd_lehmer.
+       * tune/tuneup.c (tune_hgcd) [nails]: Likewise.
+
+       * mpn/generic/gcd.c [nails]: Use Schönhage's algorithm also for
+       GMP_NAIL_BITS > 0.
+
+       * mpn/generic/hgcd.c [nails]: Enable the code for GMP_NAIL_BITS > 0.
+       (MPN_EXTRACT_LIMB) [nails]: Handle nails.
+       (__gmpn_hgcd_sanity): Allocate temporaries on the heap, not on the
+       stack. Also check that r[i] >= r[i+1].
+       (mpn_hgcd2_lehmer_step) [nails]: Handle nails.
+       (mpn_hgcd_lehmer): When we temporarily have r3 > r2, avoid
+       trigging that assert in __gmpn_hgcd_sanity.
+       (mpn_hgcd): Likewise.
+
+       * mpn/generic/hgcd2.c (div2) [nails]: Alternative nail-aware
+       version.
+       (SUB_2): New macro of Kevin's, which reduces do sub_ddmmss in the
+       non-nail case.
+       (HGCD2_STEP): Use SUB_2, not sub_ddmmss. Added alternative version
+       for K&R compilers.
+       (mpn_hgcd2) [nails]: Use SUB_2, not sub_ddmmss. New nail-aware
+       code for checking Jebelean's condition.
+
+2003-12-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpq/get_d.c: Amend comments per mpn_get_d change.
+       (limb2dbl): Remove, no longer used.
+
+       * gmp-impl.h (DIVREM_1_NORM_THRESHOLD etc) [nails]: Correction to
+       comments, MP_SIZE_T_MAX means preinv never.
+
+       * gmp-impl.h (DIVEXACT_1_THRESHOLD, MODEXACT_1_ODD_THRESHOLD) [nails]:
+       Remove overrides, divexact_1 and modexact_1 have been nailified.
+
+       * mpz/inp_str.c (mpz_inp_str_nowhite): Use ASSERT_ALWAYS for EOF value
+       requirement.
+
+       * tests/refmpn.c (refmpn_rsh1add_n, refmpn_rsh1sub_n): Parens around
+       GMP_NUMB_BITS - 1 with ">>", to quieten gcc -Wall.
+       * tests/t-constants.c (main), tests/t-count_zeros.c (check_clz),
+       tests/t-modlinv.c (one), tests/mpz/t-jac.c (try_si_zi),
+       tests/mpq/t-get_d.c (check_onebit): : Correction to printfs.
+       * tests/mpn/t-fat.c: Add <string.h> for memcpy.
+       * tests/mpz/t-scan.c (check_ref): Remove unused variable "isigned".
+       * tests/mpq/t-get_d.c (check_onebit): Remove unused variable "limit".
+       * tests/mpf/t-set_si.c, tests/mpf/t-set_ui.c (check_data): Braces for
+       initializers.
+       * tests/devel/try.c (mpn_divexact_by3_fun, mpn_modexact_1_odd_fun):
+       Correction to return values.
+
+       * doc/gmp.texi (Miscellaneous Integer Functions): Note mpz_sizeinbase
+       can be used to locate the most significant bit.  Reword a bit for
+       clarity.
+
+2003-12-12  Niels Möller  <niels@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (__gmpn_hgcd_sanity): Fixed stack buffer
+       overrun.
+       * mpn/generic/hgcd.c: Improved comments.
+
+2003-12-11  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h: Change asm => __asm__, tabify.
+       * mpz/get_d_2exp.c: Likewise.
+       * mpf/get_d_2exp.c: Likewise.
+
+       * tests/cxx/t-ops.cc: #if .. #endif out tests that cause ambiguities.
+
+2003-12-10  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-gcd.c: Generate operands with sizes as a geometric
+       progression, to allow for larger operands and less varying timing.
+
+       * tune/tuneup.c (tune_gcd_schoenhage): Set param.step_factor.
+       (tune_hgcd): Likewise.
+
+2003-12-10  Kevin Ryde  <kevin@swox.se>
+
+       * demos/perl/test.pl: Should be $] for perl version in old perl.
+
+       * configure.in (sparc64-*-*): Single block of gcc configs for all
+       systems, on unknown systems try both ABI 32 and 64.
+
+       * configure.in (LIBGMP_LDFLAGS, LIBGMPXX_LDFLAGS): New AC_SUBSTs with
+       options to generate .def files with windows DLLs.
+       * Makefile.am (libgmp_la_LDFLAGS, libgmpxx_la_LDFLAGS): Use them.
+
+       * mpn/generic/gcd.c: Use ABOVE_THRESHOLD / BELOW_THRESHOLD, to follow
+       convention and cooperate with tune/tuneup.c.
+
+       * tune/tuneup.c (tune_gcd_schoenhage): Increase max_size to 3000, side
+       default 1000 is approx the crossover point on athlon.
+
+       * tune/common.c, tune/speed.c, tune/speed.h, tune/speed-ext.c,
+       tune/tuneup.c (SPEED_TMP_ALLOC_LIMBS): Take variable as parameter
+       rather than returning a value, avoids alloca in a function call.
+       * tune/common.c, tune/speed.h (speed_tmp_alloc_adjust): Remove, now
+       inline in SPEED_TMP_ALLOC_LIMBS, and using ptr-NULL for alignment
+       extraction.
+
+       * gmpxx.h (__gmp_binary_equal, __gmp_binary_not_equal,
+       __gmp_binary_less, __gmp_binary_less_equal, __gmp_binary_greater,
+       __gmp_binary_greater_equal, __gmp_cmp_function): Use mpfr_cmp_si and
+       mpfr_cmp_d.
+       * tests/cxx/t-ops.cc: Exercise this.
+
+       * demos/perl/Makefile.PL: Don't install sample.pl and test2.pl.
+
+       * demos/perl/GMP.xs (use_sv): Prefer PV over IV or NV to avoid any
+       rounding.
+       * demos/perl/test.pl: Exercise this.
+
+       * demos/perl/GMP/Mpf.pm (overload_string): Corrections to $# usage.
+       * demos/perl/test.pl: Exercise this.
+
+2003-12-08  Kevin Ryde  <kevin@swox.se>
+
+       * demos/perl/GMP.pm: Correction to canonicalize example.
+
+       * demos/perl/GMP.xs: New type check scheme, support magic scalars,
+       support UV when available.  Remove some unused local variables.
+       (coerce_long): Check range of double.
+       (get_d_2exp): Remove stray printf.
+
+       * demos/perl/test.pl: Exercise magic, rearrange to make it clearer
+       what's being tested.
+
+2003-12-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd): Use BELOW_THRESHOLD, to follow the
+       convention of N<THRESH for the lower algorithm, not <=.  Fixes
+       algorithm selection in tuneup.c.
+
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer): Use intended
+       align_xp, align_yp.
+
+       * tune/tuneup.c (mul_toom3_threshold): Use MUL_TOOM3_THRESHOLD_LIMIT,
+       for the benefit of ASSERT in mpn_mul_n.
+
+       * tune/tuneup.c (tune_mul): Correction to toom3 param.min_size, should
+       use MPN_TOOM3_MUL_N_MINSIZE.
+
+       * tune/speed.c (check_align_option): Correction to printf format.
+       * tune/freq.c (freq_sysctl_hw_model): Remove unused "i" variable.
+
+       * scanf/doscan.c: Correction to a couple of trace printfs.
+       Add <stdlib.h> for strtol.
+
+       * tests/misc/t-scanf.c (test_sscanf_eof_ok): New function.
+       (check_misc): Use it to suppress tests broken by libc.
+       And should be EOF rather than -1 in various places.
+
+2003-12-06  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer):
+       Move SPEED_TMP_ALLOC_LIMBS invocations out from calls.
+
+       * mpn/generic/get_str.c (mpn_get_str, POW2_P case):
+       Don't append extra '\0' byte.
+
+2003-12-05  Niels Möller  <niels@lysator.liu.se>
+
+       * tune/common.c (speed_mpn_hgcd_lehmer, speed_mpn_hgcd):
+       Updated for the renaming hgcd_sanity -> ASSERT_HGCD.
+
+       * mpn/generic/gcd.c (gcd_schoenhage): TMP_DECL must be the final
+       declaration in the declaration section of a block.
+
+       * tune/speed.h (mpn_gcd_accel): Added prototype.
+
+2003-12-05  Torbjorn Granlund  <tege@swox.com>
+
+       * randmt.c (__gmp_mt_recalc_buffer): Put parens around "&" expressions
+       inside "!=".
+
+       * mpf/get_str.c: Remove unused variable "fracn".
+
+2003-12-03  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, Makefile.am (LIBGMP_LDFLAGS, LIBGMPXX_LDFLAGS): New
+       AC_SUBSTs, use them to create .def files with Windows DLLs.
+       * doc/gmp.texi (Notes for Particular Systems): Update notes on mingw
+       DLL with MS C.
+
+       * mpz/export.c: Allow NULL for countp.
+       * doc/gmp.texi (Integer Import and Export): Describe this.
+       Suggested by Jack Lloyd.
+
+       * mpn/x86/p6/aors_n.asm: New file, grabbing the K7 code.
+       Superiority of this reported by Patrick Pelissier.
+
+2003-11-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/ev67/popcount.asm, mpn/alpha/ev67/hamdist.asm: New files.
+
+       * mpn/alpha/ev67: New directory.
+       * configure.in (alphaev67, alphaev68, alphaev7*): Use it.
+
+       * doc/gmp.texi (GMPrefu, GMPpxrefu): Change back to plain ref and
+       pxref, remove macros.
+       (GMPreftopu, GMPpxreftopu): Remove URL parameter, rename to GMPreftop
+       and GMPpxreftop.
+       (Debugging): Remove debauch, seems to have disappeared.
+       (Language Bindings): Corrections to URLs for CLN, Omni F77, Pike.
+
+2003-11-29  Kevin Ryde  <kevin@swox.se>
+
+       * demos/perl/GMP/Mpf.pm (overload_string): Use $OFMT to avoid warnings
+       about $#.
+
+       * demos/perl/GMP.xs (fits_slong_p): Use LONG_MAX+1 to avoid possible
+       rounding of 0x7F..FF in a double on 64-bit systems.
+
+       * configure.in (ppc601-*-*): Remove this case, it never matched
+       anything, the name adopted is powerpc601.
+       (powerpc601-*-*): Use gcc -mcpu=601, xlc -qarch=601.
+
+       * configure.in: Introduce ${cc}_cflags_maybe, used if they work.
+       (*sparc*-*-*) [ABI=32]: Add gcc_cflags_maybe=-m32 to force that mode.
+
+       * doc/gmp.texi (Introduction to GMP): Add AMD64 to optimizations list.
+       (Build Options): Add cpu types alphaev7 and amd64.  Update texinfo
+       html cross reference.
+
+2003-11-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (tune_hgcd): Disable if GMP_NAIL_BITS > 0.
+       * tune/speed.c (routine): Likewise.
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer): Likewise.
+
+       * mpn/generic/gcd.c, mpn/generic/hgcd.c, mpn/generic/hgcd2.c
+       [GMP_NAIL_BITS]: Disabled new code if we have nails.
+
+       * mpn/generic/gcd.c (MPN_LEQ_P): Copied macro definition (needed
+       for compilation with --enable-assert).
+
+       * tune/tuneup.c (hgcd_schoenhage_threshold,
+       gcd_schoenhage_threshold): New variables.
+       (tune_hgcd, tune_gcd_schoenhage): New functions.
+       (all): Call tune_hgcd and tune_gcd_schoenhage.
+
+       * tune/common.c (speed_mpn_hgcd, speed_mpn_hgcd_lehmer)
+       (speed_mpn_gcd_accel): New functions.
+       * tune/speed.c (routine): Added mpn_hgcd, mpn_hgcd_lehmer and
+       mpn_gcd _accel.
+       * tune/speed.h: Added corresponding prototypes.
+
+       * tune/gcd_accel.c: New file.
+
+       * tune/gcd_bin.c (GCD_SCHOENHAGE_THRESHOLD): Set to MP_SIZE_T_MAX.
+
+       * tune/Makefile.am (libspeed_la_SOURCES): Added gcd_accel.c.
+       (TUNE_MPN_SRCS_BASIC): Added hgcd.c.
+
+       * mpn/x86/k7/gmp-mparam.h (HGCD_SCHOENHAGE_THRESHOLD)
+       (GCD_SCHOENHAGE_THRESHOLD): Tuned values.
+
+       * mpn/generic/gcd.c (mpn_gcd, gcd_binary_odd): Renamed the
+       old mpn_gcd function (which implements accelerated binary gcd) to
+       gcd_binary_odd.
+       (gcd_binary): New function, with the additional book keeping
+       needed when using gcd_binary_odd to compute the gcd of non-odd
+       numbers.
+       (hgcd_tdiv): New function.
+       (gcd_lehmer): New function, currently #if:ed out.
+       (hgcd_start_row_p): New function, duplicatd from hgcd.c.
+       (gcd_schoenhage_itch): New function.
+       (gcd_schoenhage): New function.
+       (mpn_gcd): New advertised gcd function, which calls
+       mpn_gcd_binary_odd or mpn_gcd_schoenhage, depending on the size of
+       the input.
+
+       * mpn/generic/hgcd.c (mpn_hgcd2_lehmer_step): Renamed function
+       (was lehmer_step), and made non-static. Updated callers.
+
+       * gmp-impl.h (GCD_LEHMER_THRESHOLD): #if:ed out this macro.
+       (mpn_hgcd2_lehmer_step): Added prototype.
+
+2003-11-27  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-gcd.c (gcd_values): Moved definition, so that we
+       don't need to forward declare the array.
+
+2003-11-26  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd2_fix): Deleted duplicate definition
+       (the function belongs to hgcd2.c).
+
+2003-11-26  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-gcd.c: Generate random operands up to 32767 bits;
+       decrease # of test to 1000.
+       (gcd_values): Remove oversize test case.
+
+2003-11-26  Niels Möller  <niels@lysator.liu.se>
+
+       * gmp-impl.h: Added name mangling for hgcd-related functions. Also
+       use __GMP_PROTO.
+       (MPN_LEQ_P, MPN_EXTRACT_LIMB): Moved macros to hgcd.c.
+       * mpn/generic/hgcd.c, mpn/generic/hgcd2.c, mpn/generic/qstack.c:
+       Adapted to name changes.
+
+       * tests/mpz/t-gcd.c (main): Added some tests with non-random
+       input.
+
+2003-11-25  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (MPN_LEQ_P, MPN_EXTRACT_LIMB): New macros.
+       (struct qstack, struct hgcd2_row, struct hgcd2, struct hgcd_row)
+       (struct hgcd): New structs. Also added prototypes for new hgcd,
+       hgcd2, qstack and gcd functions.
+
+       * configure.in (gmp_mpn_functions): Added hgcd2, hgcd and qstack.
+
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Added hgcd2.c,
+       hgcd.c and qstack.c.
+
+       * mpn/generic/hgcd.c, mpn/generic/hgcd2.c, mpn/generic/qstack.c:
+       New files, needed for the sub-quadratic gcd.
+
+2003-11-25  Kevin Ryde  <kevin@swox.se>
+
+       * doc/gmp.texi (Language Bindings): Add Axiom.
+
+2003-11-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/README: More notes on assembler syntax variations.
+
+       * mpn/alpha/alpha-defs.m4, mpn/alpha/unicos.m4 (unop): Should be ldq_u
+       not bis, and move to alpha-defs.m4 since it can be happily used
+       everywhere.
+
+       * mpn/alpha/alpha-defs.m4, mpn/alpha/default.m4, mpn/alpha/unicos.m4
+       (bigend): Move to alpha-defs.m4 and base it on HAVE_LIMB_BIG_ENDIAN or
+       HAVE_LIMB_LITTLE_ENDIAN, so as not to hard code system endianness.
+
+       * mpn/alpha/alpha-defs.m4: New file.
+       * configure.in (alpha*-*-*): Use it.
+
+2003-11-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-11-21.
+
+       * mpn/alpha/ev5/com_n.asm: Change "not" to "ornot r31", since "not"
+       isn't recognised by on Cray Unicos.  Add missing "gp" to PROLOGUE.
+       * mpn/alpha/README: Add a note on "not".
+
+2003-11-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/aorslsh1_n.asm: Slightly rework feed-in code, avoiding
+       spurious reads beyond operand limits.
+
+       * mpn/alpha/ev5/com_n.asm: Add ASM_START/ASM_END.
+
+       * mpn/generic/mul_fft.c (mpn_fft_zero_p): Remove unused function.
+       (mpn_lshift_com): Make static, nailify properly.
+
+2003-11-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/diveby3.c: Use a "q" variable to make it clearer what
+       the code is doing.
+
+       * mpn/powerpc32/750/lshift.asm, mpn/powerpc32/750/rshift.asm: New
+       files.
+
+       * mpn/alpha/ev5/com_n.asm: New file.
+
+       * doc/gmp.texi (Assembler Functional Units, Assembler Writing Guide):
+       New sections by Torbjorn, tweaked by me.
+
+2003-11-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32: Add power4/powerpc970 cycle counts.
+       Use cmpwi instead of cmpi to placate darwin.
+
+2003-11-15  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess: Add comments on MacOS "machine" command.
+
+       * tests/devel/try.c (main): Use gmp_randinit_default explicitly on
+       __gmp_rands, since RANDS doesn't allow seeding.
+
+       * doc/gmp.texi (Assigning Integers): Remove notes on possible change
+       to disallow whitespace, this would be an incompatible change and
+       really can't be made.
+       (Toom 3-Way Multiplication): Updates for Paul's new code.
+
+       * mpn/generic/mul_n.c (toom3_interpolate, mpn_toom3_mul_n): Put
+       if/else braces around whole of #if code, for readability.
+
+       * tests/refmpn.c (refmpn_addlsh1_n, refmpn_sublsh1_n,
+       refmpn_rsh1add_n, refmpn_rsh1sub_n): Add ASSERTs for operand overlaps
+       etc.
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-11-15.
+
+2003-11-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/aorslsh1_n.asm: Use Cray-friendly syntax for "br".
+
+2003-11-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/aorslsh1_n.asm: New file.
+
+2003-11-12  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Add case provoking AIX power2
+       assembler, test code by Torbjorn.
+       * configure.in (power*-*-*): Add a comment about -mcpu=rios2 fallback.
+
+       * tune/speed.c (main): Use gmp_randinit_default explicitly on
+       __gmp_rands, since RANDS doesn't allow seeding.
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-11-12.
+
+       * gmp-impl.h, randmt.h (__gmp_randinit_mt_noseed): Move prototype to
+       gmp-impl.h, for use by RANDS.
+
+       * mpn/Makeasm.am (.s, .S, .asm): Quote $< in test -f, per automake.
+       (.obj): Use test -f and $(CYGPATH_W) as per automake.
+
+2003-11-11  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Make umul and udiv standard-optional objects, rather
+       than under various extra_functions.
+
+       * mpn/pa32/hppa1_1/pa7100/add_n.asm,
+       mpn/pa32/hppa1_1/pa7100/addmul_1.asm,
+       mpn/pa32/hppa1_1/pa7100/lshift.asm,
+       mpn/pa32/hppa1_1/pa7100/rshift.asm,
+       mpn/pa32/hppa1_1/pa7100/sub_n.asm,
+       mpn/pa32/hppa1_1/pa7100/submul_1.asm: Use LDEF for labels.
+
+       * mpf/set_str.c: Don't use memcmp for decimal point testing, just a
+       loop is enough and avoids any chance of memcmp reading past the end of
+       the given string.
+
+       * randmts.c, randmt.h: New files.
+       * Makefile.am (libgmp_la_SOURCES): Add them.
+       * randmt.c: Move seeding to randmts.c, common defines in randmt.h.
+       * gmp-impl.h (RANDS): Use __gmp_randinit_mt_noseed.
+       * tests/misc.c (tests_rand_start): Use gmp_randinit_default
+       explicitly, not RANDS.
+
+       * mpn/ia64/ia64-defs.m4 (PROLOGUE_cpu): Use 32-byte alignment, for the
+       benefit of itanium 2.
+       * mpn/ia64/gcd_1.asm: Remove own .align 32.
+
+       * mpn/ia64/ia64-defs.m4 (ALIGN): New define, using IA64_ALIGN_OK.
+       * mpn/ia64/hamdist.asm: Use ALIGN instead of .align.
+
+       * acinclude.m4 (GMP_ASM_IA64_ALIGN_OK): New macro.
+       * configure.in (IA64_PATTERN): Use it.
+       * mpn/ia64/README: Add notes on gas big endian align problem.
+
+2003-11-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_1.asm: Rewrite.
+
+2003-11-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/aors_n.asm: Align loop to a multiple of 16.  Also align
+       M4_function_n to a multiple of 16, to minimize alignment padding.
+       Update P6 cycle counts reflecting improvements with new alignment.
+
+2003-11-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (HAVE_HOST_CPU_alpha_CIX): New define.
+       (ULONG_PARITY, popc_limb): Use it, to pick up ev7 as well as 67 and 68.
+       * longlong.h (count_leading_zeros, count_trailing_zeros): Ditto.
+
+       * doc/gmp.texi (Notes for Package Builds): Add notes on multi-ABI
+       system packaging.
+       (ABI and ISA): Add GNU/Linux ABI=64.
+       (Binary GCD): Add notes on 1x1 GCD algorithms.
+
+       * mpn/alpha/README: Add some literature references.
+
+       * mpn/ia64/mode1o.asm: Various corrections to initial checkin.
+       * mpn/ia64/ia64-defs.m4 (ASSERT): Correction to arg quoting.
+
+2003-11-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/linux64.m4: New file.
+       * configure.in (POWERPC64_PATTERN): Handle *-*-linux*.
+       Use linux64.m4.
+
+       * mpn/ia64/logops_n.asm: New file.
+
+2003-11-05  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_sysctl_hw_model): Relax to just look for "%u MHz",
+       for the benefit of sparc cypress under netbsd 1.6.1.
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-11-05.
+
+       * mpn/alpha/ev5/dive_1.c: New file.
+
+       * configure.in (x86_64-*-*): Accept together with amd64-*-*.
+
+       * tune/speed.c: Check range of -x,-y,-w,-W alignment specifiers.
+       * tune/speed.h (CACHE_LINE_SIZE): Amend comments.
+
+2003-11-04  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.c: Fix typo in testing HAVE_NATIVE_mpn_modexact_1_odd.
+
+2003-11-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/ia64/hamdist.asm: New file.
+       * mpn/ia64/mode1o.asm: New file.
+       * mpn/ia64/ia64-defs.m4 (ASSERT): New macro.
+
+       * tests/mpz/t-set_d.c (check_2n_plus_1): New test.
+
+2003-11-01  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/fac_ui.c (BSWAP_ULONG) [limb==2*long]: Remove this case, it
+       provokes code gen problems on HP cc.
+       (BSWAP_ULONG) [generic]: Rename __dst variable to avoid conflicts with
+       BITREV_ULONG.
+       Fix by Jason Moxham.
+
+       * mpn/powerpc32/mode1o.asm: Use 16-bit i*i for early out, no need to
+       truncate divisor.  Amend stated 750/7400 speeds, and note operands
+       that give the extremes.
+
+       * mpz/set_d.c: Don't use a special case for d < MP_BASE_AS_DOUBLE, gcc
+       3.3 -mpowerpc64 on darwin gets ulonglong->double casts wrong.
+
+       * mpn/generic/diveby3.c: Show a better style carry handling in the
+       alternative pipelined sample code.
+
+       Revert this, the longlong.h macros need -mpowerpc64:
+       * acinclude.m4 (GMP_GCC_POWERPC64): New macro.
+       * configure.in (powerpc64-*-darwin*): Use it to exclude -mpowerpc64
+       when bad.
+
+2003-10-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode64/submul_1.asm: Move an instruction to save a
+       cycle on POWER4.
+
+       * mpn/powerpc64/mode64/divrem_1.asm: Fix several syntax problems
+       revealed on Mac OS X.
+
+       * mpn/powerpc64/mode64/*.asm: Add cycle counts for POWER4.
+
+       * mpn/powerpc64/sqr_diagonal.asm: Rewrite to save a cycle on POWER4.
+
+2003-10-31  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-10-31.
+
+       * mpn/powerpc64/README: Add subdirectory organisation notes.
+
+       * tests/mpn/t-get_d.c: Don't use limits.h, LONG_MIN is wrong on gcc
+       2.95 with -mcpu=ultrasparc.
+
+       * acinclude.m4 (GMP_GCC_POWERPC64): New macro.
+       * configure.in (powerpc64-*-darwin*): Use it to exclude -mpowerpc64
+       when bad.
+
+       * configure.in (powerpc64-*-darwin*) [ABI=mode32]: Use gcc -mcpu flags.
+
+       * mpn/ia64/divrem_1.asm, mpn/ia64/gcd_1.asm: Use "C" for comments.
+       * mpn/ia64/README, mpn/ia64/ia64-defs.m4: Note this.
+
+       * mpn/ia64/ia64-defs.m4: Renamed from default.m4, per other defs files.
+       * configure.in (IA64_PATTERN): Update GMP_INCLUDE_MPN.
+
+       * doc/gmp.texi (Notes for Particular Systems): Remove m68k ABI notes
+       for -mshort and PalmOS, now works.
+       (References): Correction, GMP Square Root proof already there, just
+       wanting URL from RRRR 4475.
+
+2003-10-29  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (sparc*-*-*): Use gcc -m32 when that option works, to
+       force 32-bit mode on dual 32/64 configurations like GNU/Linux.
+       (sparc64-*-linux*): Add support for ABI=64.
+
+       * mpn/generic/pre_divrem_1.c: In fraction part, use CNST_LIMB(0) with
+       udiv_qrnnd_preinv to avoid warning about shift > type.
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-10-29.
+
+       * tests/cxx/t-istream.cc: Avoid tellg() checks if putback() doesn't
+       update that, avoids certain g++ 2.96 problems.
+
+       * tests/mpn/t-fat.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add it.
+
+       * configure.in (CPUVEC_INSTALL, ITERATE_FAT_THRESHOLDS): New macros
+       for fat.h.
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec_init): Use CPUVEC_INSTALL instead
+       of memcpy.  Correction to location of "initialized" set.  Improve
+       various comments.
+
+2003-10-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/mul_1.asm: Change addcc => add in a few places.
+       * mpn/sparc64/addmul_1.asm: Likewise.
+
+       * mpn/sparc32/v9/mul_1.asm: Apply cross-jumping.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+       * mpn/sparc32/v9/sqr_diagonal.asm: Likewise.
+
+2003-10-27  Kevin Ryde  <kevin@swox.se>
+
+       * tests/cxx/t-misc.cc: Don't use <climits>, on g++ 2.95.4 (debian 3.0)
+       -mcpu=ultrasparc LONG_MIN is wrong and kills the compile.
+
+       * tests/cxx/t-istream.cc: Correction to tellg tests, don't assume
+       streampos is zero based.
+
+       * configure.in (HAVE_HOST_CPU_FAMILY_alpha): New define for config.h.
+       * mpn/generic/get_d.c: Use it instead of __alpha for alpha workaround,
+       since Cray cc doesn't define __alpha.
+
+       * mpn/x86/README: Revise PIC coding notes a bit, add gcc visibility
+       attribute.
+
+2003-10-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/ia64/gcd_1.asm: New file.
+
+       * tune/many.pl: Allow for PROLOGUE(fun,...), as used on alpha.
+
+       * doc/gmp.texi (C++ Formatted Input): Describe base indicator handling.
+
+       * tests/cxx/t-istream.cc: New file.
+       * tests/cxx/Makefile.am: Add it.
+
+       * cxx/ismpznw.cc: New file, integer input without whitespace ...
+       * cxx/ismpz.cc: ... from here.
+       * gmp-impl.h (__gmpz_operator_in_nowhite): Add prototype.
+       * cxx/ismpq.cc: Rewrite using mpz input routines.  Change to accept a
+       separate base indicator on numerator and denominator.  Fix base
+       indicator case where "123/0456" would stop at "123/0".
+       * Makefile.am, cxx/Makefile.am: Add cxx/ismpznw.cc.
+
+       * tests/mpz/t-set_d.c: New file, derived from tests/mpz/t-set_si.c
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpn/m68k/lshift.asm, mpn/m68k/rshift.asm: Support 16-bit int and
+       stack alignment.
+       * mpn/m68k/README: Add notes on this.
+       * configure.in (SIZEOF_UNSIGNED): New define in config.m4.
+       * mpn/m68k/m68k-defs.m4 (m68k_definsn): Add cmpw, movew.
+       Reported by Patrick Pelissier.
+
+       * mpn/m68k/t-m68k-defs.pl: Don't use -> with hashes, to avoid
+       deprecation warnings from perl 5.8.
+
+       * configure.in (viac3-*-*): Use just x86/pentium in $path not x86/p6.
+       If gcc is to be believed the old C3s don't have cmov.
+
+       * Makefile.am: Amend comments about not building from libtool
+       convenience libraries.
+
+       * mpn/asm-defs.m4 (PROLOGUE): Use m4_file_seen, for correct filename
+       in missing EPILOGUE error messages.
+       (m4_file_seen): Amend comments about where used.
+
+       * Makefile.am (CXX_OBJECTS): Remove $U, C++ files are not subject to
+       ansi2knr rules.
+
+       * gmp-h.in (mpn_divmod_1): Use __GMP_CAST, to avoid warnings in
+       applications using g++ -Wold-style-cast.
+
+       * mpn/z8000/README: New file.
+
+2003-10-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/get_d.c (CONST_1024, CONST_NEG_1023,
+       CONST_NEG_1022_SUB_53): Replace ALPHA_WORKAROUND with a non-gcc-ism,
+       and use on Cray Unicos alpha too, which has the same problem.
+
+       * configure.in (powerpc64-*-darwin*): Make ABI=32 available as the
+       final fallback, remove mode64 until we know how it will work.
+
+       * doc/gmp.texi (Build Options): Add powerpc970 to available CPUs.
+       (ABI and ISA): Add mode32 for Darwin.
+
+       * configure.in (gettimeofday): Use an explicit AC_TRY_LINK, to avoid
+       known autoconf 2.57 problems with gettimeofday in AC_CHECK_FUNCS on
+       HP-UX.
+
+       * configure.in (powerpc*-*-*): Use ABI=32 instead of ABI=standard for
+       the default 32-bit ABI.  Fixes powerpc64-*-aix* which is documented as
+       choices "aix64 32" but had "aix64 standard".
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-10-22.
+
+       * doc/gmp.texi (Notes for Particular Systems): Note m68k gcc -mshort
+       and PalmOS calling conventions not supported.  Reported by Patrick
+       Pelissier.
+       (References): Add Paul Zimmermann's Inria 4475 paper.
+
+2003-10-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/submul_1.asm: Slightly reschedule loop to accommodate
+       Itanium 2 getf.sig latency.
+
+2003-10-21  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpn/t-instrument.c: Add mpn_addlsh1_n, mpn_rsh1add_n,
+       mpn_rsh1sub_n, mpn_sub_nc, mpn_sublsh1_n.  Typo in mpn_preinv_divrem_1
+       conditional.
+
+2003-10-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/mode32/add_n.asm: New file.
+       * mpn/powerpc64/mode32/sub_n.asm: New file.
+       * mpn/powerpc64/mode32/mul_1.asm: New file.
+       * mpn/powerpc64/mode32/addmul_1.asm: New file.
+       * mpn/powerpc64/mode32/submul_1.asm: New file.
+
+2003-10-19  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (AMD64): __x86_64__ => __amd64__.
+       (64-bit powerpc): Only define carry-dependent macros if
+       !_LONG_LONG_LIMB.
+
+       * acinclude.m4 (POWERPC64_PATTERN): Add powerpc970-*-*.
+
+       * configure.in (POWERPC64_PATTERN): Handle *-*-darwin*.
+       (POWERPC64_PATTERN, *-*-aix*): Prepend powerpc64/mode64 to path_aix64.
+
+       * mpn/powerpc64/mode64/mul_1.asm: Change cal => addi.
+       * mpn/powerpc64/mode64/addmul_1.asm: Likewise.
+       * mpn/powerpc64/mode64/submul_1.asm: Likewise.
+       * mpn/powerpc64/sqr_diagonal.asm: Likewise.
+
+       * mpn/powerpc64/mode64/mul_1.asm: Move from "..".
+       * mpn/powerpc64/mode64/addmul_1.asm: Likewise.
+       * mpn/powerpc64/mode64/submul_1.asm: Likewise.
+       * mpn/powerpc64/mode64/divrem_1.asm: Likewise.
+       * mpn/powerpc64/mode64/rsh1sub_n.asm: Likewise.
+       * mpn/powerpc64/mode64/add_n.asm: Likewise.
+       * mpn/powerpc64/mode64/addsub_n.asm: Likewise.
+       * mpn/powerpc64/mode64/sub_n.asm: Likewise.
+       * mpn/powerpc64/mode64/addlsh1_n.asm: Likewise.
+       * mpn/powerpc64/mode64/diveby3.asm: Likewise.
+       * mpn/powerpc64/mode64/rsh1add_n.asm: Likewise.
+       * mpn/powerpc64/mode64/sublsh1_n.asm: Likewise.
+
+       * mpn/powerpc64/lshift.asm: Handle mode32 ABI.
+       * mpn/powerpc64/rshift.asm: Likewise.
+       * mpn/powerpc64/umul.asm: Likewise.
+
+       * tune/powerpc64.asm: Make it actually work.
+
+2003-10-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/get_d.c: Add a workaround for alpha gcc signed constant
+       comparison bug.
+
+       * gmpxx.h (gmp_randclass gmp_randinit_lc_2exp_size constructor): Throw
+       std::length_error if size is too big.
+       * tests/cxx/t-rand.cc (check_randinit): Exercise this.
+
+       * mpn/x86/pentium4/sse2/addlsh1_n.asm: New file, derived in part from
+       mpn/x86/pentium4/sse2/add_n.asm.
+
+       * doc/gmp.texi (C++ Interface Integers, C++ Interface Rationals, C++
+       Interface Floats): Note std::invalid_argument exception for invalid
+       strings to constructors and operator=.
+       (C++ Interface Random Numbers): Note std::length_error exception for
+       size too big in gmp_randinit_lc_2exp_size.
+
+2003-10-18  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr-2-0-2-branch 2003-10-18.
+
+       * gmpxx.h (mpz_class, mpq_class, mpf_class, mpfr_class constructors
+       and operator= taking string or char*): Throw std::invalid_argument if
+       string cannot be converted.
+       * tests/cxx/t-constr.cc, tests/cxx/t-assign.cc: Exercise this.
+
+       * cxx/ismpz.cc, cxx/ismpq.cc, cxx/ismpf.cc: Use istream std::locale
+       ctype facet for isspace when available.  Only accept space at the
+       start of the input, same as g++ libstdc++.  Use ASSERT_NOCARRY to
+       check result of mpz_set_str etc.
+       * cxx/ismpf.cc: Don't accept "@" for exponent indicator.
+
+       * tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Remove
+       _open and _mpn variants of mpn_toom3_mul_n, only one style now.
+       * tune/mul_n_open.c, tune/mul_n_mpn.c: Remove files.
+
+       * gmp-impl.h (LIMB_HIGHBIT_TO_MASK): New macro.
+       (udiv_qrnnd_preinv2, udiv_qrnnd_preinv2gen): Use it.
+
+       * tests/mpz/t-import.c, tests/mpz/t-export.c: Use octal for character
+       constants, hex is an ANSI-ism.
+
+       * mpn/alpha/ev5/mode1o.c: Corrections to ASSERTs, as per
+       mpn/generic/mode1o.c.
+
+       * mpn/generic/diveby3.c: Add commented out alternative code and notes
+       for taking the multiply off the dependent chain.  Amend/clarify some
+       of the other comments.
+
+       * configure.in (powerpc970-*-*): Use gcc -mcpu=970 when available.
+       (powerpc7400-*-*): Fallback on gcc -mcpu=750 if -mcpu=7400 not
+       available.
+
+       * doc/gmp.texi (C++ Formatted Input): Note locale digit grouping not
+       supported.
+       (C++ Formatted Input, C++ Formatted Output): Cross reference class
+       interface on overloading.
+
+       * mpn/m68k/README: Add various ideas from doc/tasks.html.
+
+       * mpn/m88k/README: New file.
+
+2003-10-16  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Recognize powerpc970.
+
+2003-10-15  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Recognize powerpc970 under MacOS.
+
+2003-10-15  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, acinclude.m4 (GMP_C_RIGHT_SHIFT): New test.
+       * gmp-impl.h (LIMB_HIGHBIT_TO_MASK): New macro.
+       (udiv_qrnnd_preinv2, udiv_qrnnd_preinv2gen): Use it.
+
+       * mpn/amd64/amd64-defs.m4: New file, with a non-aligning PROLOGUE.
+       * configure.in (amd64-*-*): Use it.
+       * mpn/amd64/addlsh1_n.asm: Add ALIGN(16).
+
+       * mpfr/*: Update to mpfr cvs 2003-10-15.
+
+       * mpn/generic/get_d.c: Rewrite, simplifying and truncating towards
+       zero unconditionally.
+       * tests/mpn/t-get_d.c: Add various further tests.
+       * gmp-impl.h (FORCE_DOUBLE): New macro.
+
+       * gmp-h.in (__mpz_struct): Add comment on __mpz_struct getting into
+       C++ mangled function names.
+
+       * doc/gmp.texi (Build Options): Update notes for new doc subdir.
+       (Low-level Functions): Note mpn functions don't check for zero limbs
+       etc, it's up to an application to strip.
+
+       * doc/configuration (Configure): mdate-sh now in doc subdir, add
+       generated fat.h.
+
+2003-10-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/lorrshift.asm: Rewrite.
+
+       * mpn/ia64/diveby3.asm: Remove explicit bundling; add branch hints.
+
+2003-10-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/diveby3.asm: New file.
+
+2003-10-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc32/mod_34lsub1.asm: New file.
+
+       * mpn/powerpc32/diveby3.asm, mpn/powerpc64/diveby3.asm: src[] in
+       second operand of mullw, to allow possible early-out, which the
+       0xAA..AB inverse cannot give.  This improvement noticed by Torbjorn.
+
+       * acinclude.m4 (GMP_ASM_LSYM_PREFIX): Print to config.log whether
+       local label is purely temporary or appears in object files, for
+       development purposes.
+
+       * doc/gmp.texi, doc/fdl.texi, doc/texinfo.tex, doc/mdate-sh: Moved
+       from top-level.
+       * doc/Makefile.am: New file.
+       * configure.in (AC_OUTPUT): Add doc/Makefile.
+       * Makefile.am (SUBDIRS): Move doc subdirectory from EXTRA_DIST.
+       (info_TEXINFOS, gmp_TEXINFOS): Moved to doc/Makefile.am.
+       * mpfr/Makefile.am (mpfr_TEXINFOS): fdl.texi now in doc subdir.
+       (TEXINFO_TEX): texinfo.tex now in doc subdir.
+       (AM_MAKEINFOFLAGS): Set -I to doc subdir.
+
+       * mpz/and.c: For positive/positive, use mpn_and_n, rate a realloc as
+       UNLIKELY.
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Don't test
+       for high zero limbs.
+
+2003-10-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/diveby3.asm: New file (trivial edits of
+       powerpc32/diveby3.asm).
+
+       * mpn/powerpc32/diveby3.asm: Update cycle counts with more processors.
+       * mpn/powerpc32/sqr_diagonal.asm: Likewise.
+
+       * mpn/pa64/add_n.asm: Correct PA8500 cycle counts.
+       * mpn/pa64/sub_n.asm: Likewise.
+
+       * mpn/m68k/aors_n.asm (INPUT PARAMETERS): Fix typo.
+       * mpn/m68k/lshift.asm: Likewise.
+       * mpn/m68k/rshift.asm: Likewise.
+
+       * mpn/m68k/README: Correct an URL; add some STATUS comments.
+
+       * mpn/ia64/aorslsh1_n.asm: Avoid shrp when shl/shr works just as well.
+
+       * mpn/powerpc32/addlsh1_n.asm: New file.
+       * mpn/powerpc32/sublsh1_n.asm: New file.
+
+2003-10-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/sparc64/divrem_1.c, mpn/sparc64/mod_1.c: New files.
+       * mpn/sparc64/sparc64.h (HALF_ENDIAN_ADJ, count_leading_zeros_32,
+       invert_half_limb, udiv_qrnnd_half_preinv): New macros.
+
+       * gmp-impl.h (udiv_qrnnd_preinv2): Use a ? : for getting the n1 bit,
+       so as not to depend on signed right shifts being arithmetic.
+
+       * mpn/powerpc32/diveby3.asm: New file.
+
+       * mpn/generic/divrem_1.c: Use CNST_LIMB(0) to avoid warnings from
+       udiv_qrnnd_preinv about shift count when int<long.  Do the same with
+       udiv_qrnnd, for consistency.
+
+       * Makefile.am (install-data-hook): Print a warning recommending "make
+       check" to watch out for compiler bugs.  Proposed by Torbjorn.
+
+       * mpn/ia64/README (mpn_lshift, mpn_rshift): Amend prospective itanium2
+       speed, 0.75 c/l with shrp plus shl/shr.
+
+       * mpn/ia64/popcount.asm: Add comment on optimality.
+
+2003-10-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/rsh1aors_n.asm: New file.
+
+       * mpn/asm-defs.m4: Handle rsh1aors_n.
+
+       * configure.in (tmp_mulfunc): Handle rsh1aors_n.
+
+2003-10-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium4/sse2/diveby3.asm: Remove non-PIC RODATA memory
+       access for 0xAAAAAAAB constant.
+
+       * gmp-impl.h (popc_limb, ULONG_PARITY) [ev67, ev68]: Add gcc asm
+       versions using ctpop.
+
+       * mpn/x86/k6/aorsmul_1.asm: Tweak some comments, remove M4_description
+       and M4_desc_retval used only in comments.
+
+       * mpn/x86/k6/mul_basecase.asm: Add comment on using mpn_mul_1.
+
+2003-10-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/addlsh1_n.asm: Tweak for 0.25 c/l better loop speed.
+       * mpn/powerpc64/sublsh1_n.asm: Likewise.
+
+2003-10-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-10-09.
+
+       * tests/devel/try.c (_SC_PAGESIZE): Define from _SC_PAGE_SIZE on
+       systems which use that, eg. hpux 9.
+
+2003-10-07  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_sysctl_hw_model): Correction to last sscanf change.
+
+       * configure.in: Check for psp_iticksperclktick in struct pst_processor.
+       * tune/freq.c (freq_pstat_getprocessor): Use this.
+
+       * tests/devel/try.c (divisor_array): Add a couple of half-limb values.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Correction to last change, need to
+       set result "yes" when cross compiling.
+
+2003-10-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c: Use __GMPN_ADD_1/_GMPN_SUB_1 instead of
+       mpn_add_1 and mpn_sub_1.
+
+       * mpn/pa64/aorslsh1_n.asm: Schedule register save and restore code.
+
+2003-10-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64/mul_1.asm: Misc comment cleanups.
+       * mpn/pa64/addmul_1.asm: Likewise.
+       * mpn/pa64/submul_1.asm: Likewise.
+
+       * mpn/pa64/README: Correct cycle counts.
+
+       * mpn/pa64/aorslsh1_n.asm: New file.
+
+2003-10-04  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_sysctl_hw_model, freq_sunos_sysinfo,
+       freq_sco_etchw, freq_bsd_dmesg, freq_irix_hinv): Demand matching of
+       MHz etc at end of sscanf format string.  In particular need this for
+       freq_bsd_dmesg on i486-pc-freebsd4.7 to avoid the 486 cpu being used
+       for the frequency.
+
+       * tests/misc.c, tests/tests.h (tests_setjmp_sigfpe,
+       tests_sigfpe_handler, tests_sigfpe_done, tests_sigfpe_target,
+       tests_dbl_mant_bits): New.
+
+       * configure.in (viac3*-*-*): Add gcc VIA c3 options.
+
+       * mpfr/*: Update to mpfr cvs 2003-10-04.
+
+       * tests/refmpn.c (refmpn_addlsh1_n, refmpn_sublsh1_n,
+       refmpn_rsh1add_n, refmpn_rsh1sub_n): Add ASSERTs for operand overlaps.
+       * tests/tests.h (refmpn_addlsh1_n, refmpn_sublsh1_n, refmpn_rsh1add_n,
+       refmpn_rsh1sub_n): Add prototypes.
+
+       * tests/devel/try.c, tune/many.pl: Add mpn_addlsh1_n, mpn_sublsh1_n,
+       mpn_rsh1add_n, mpn_rsh1sub_n.
+
+2003-10-03  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/refmpn.c (refmpn_addlsh1_n, refmpn_sublsh1_n, refmpn_rsh1add_n,
+       refmpn_rsh1sub_n): New functions.
+
+2003-10-03  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/mul_n.c (toom3_interpolate): Use mpn_add_1/mpn_sub_1
+       instead of MPN_INCR_/MPN_DECR_U.
+
+2003-10-02  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (ia64*-*-hpux*): Fall back to +O1, not +O.
+
+2003-10-02  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (ia64*-*-hpux*): For cc, let +O optimization level
+       fallback if +O3 doesn't work.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Add a test of __builtin_alloca
+       when available, to pick up Itanium HP-UX cc internal errors in +O2.
+       Provoking code by Torbjorn.
+
+2003-10-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/gmp-mparam.h: Retune.
+
+       * mpn/asm-defs.m4: Handle aorslsh1_n.
+
+       * configure.in (tmp_mulfunc): Handle aorslsh1_n.
+
+       * mpn/ia64/aorslsh1_n.asm: New file.
+
+       * mpn/ia64/aors_n.asm: New file, complete rewrite of mpn_add_n and
+       mpn_sub_n.
+       * mpn/ia64/add_n.asm: Replace by aors_n.asm.
+       * mpn/ia64/sub_n.asm: Replace by aors_n.asm.
+
+2003-10-01  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Make bad ARM last byte into a
+       separate case and consider it non-IEEE, since it looks like this is
+       due to some sort of restricted or incorrect software floats.
+
+       * demos/calc/Makefile.am: Use automake yacc/lex support, seems fine in
+       separate objdir now.
+
+       * cxx/dummy.cc: Moved from top-level dummy.cc.
+       * Makefile.am (libgmpxx_la_SOURCES): Update to cxx/dummy.cc,
+       correction to comment about this.
+
+2003-09-30  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Correct documentation of -split.
+       (TIME): Remove cast of result to double.
+       (main): Change timing variables to int.
+       (main): #ifdef LIMIT_RESOURCE_USAGE, don't convert numbers of more than
+       100000 digits.
+
+2003-09-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/*/*.asm: Clean up spacing, tabify.
+
+       * mpn/alpha/rshift.asm: Table cycle counts.
+       * mpn/alpha/lshift.asm: Likewise.
+       * mpn/alpha/ev5/rshift.asm: Likewise.
+       * mpn/alpha/ev5/lshift.asm: Likewise.
+       * mpn/alpha/ev6/add_n.asm: Likewise.
+       * mpn/alpha/ev6/sub_n.asm: Likewise.
+
+       * mpn/ia64/lorrshift.asm: Amend comments about performance.
+
+       * mpn/pa64/mul_1.asm: Fix comment typo.
+       * mpn/pa64/addmul_1.asm: Likewise.
+       * mpn/pa64/submul_1.asm: Likewise.
+
+       * mpn/amd64/addlsh1_n.asm: Save/restore carry using two insn to break
+       recurrency.  Add remarks about possible further speedup.
+       * mpn/amd64/sublsh1_n.asm: Likewise.
+
+       * mpn/amd64/rsh1add_n.asm: Add remarks about possible further speedup.
+       * mpn/amd64/rsh1sub_n.asm: Likewise.
+
+2003-09-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/README: Update with POWER4/PPC970 pipeline info.
+
+       * mpn/powerpc64/rsh1add_n.asm: New file.
+       * mpn/powerpc64/rsh1sub_n.asm: New file.
+       * mpn/powerpc64/rshift.asm: Rewrite.
+       * mpn/powerpc64/lshift.asm: Rewrite.
+
+2003-09-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/addlsh1_n.asm: New file.
+       * mpn/powerpc64/sublsh1_n.asm: New file.
+
+2003-09-25  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/common.c (speed_mpn_addlsh1_n, speed_mpn_sublsh1_n,
+       speed_mpn_rsh1add_n, speed_mpn_rsh1sub_n): Conditionalize on
+       corresponding HAVE_NATIVE_*.
+
+2003-09-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/combit.c: Use GMP_NUMB_BITS not BITS_PER_MP_LIMB.
+
+       * demos/expr/exprfr.c: Allow for mpfr_inf_p, mpfr_nan_p and
+       mpfr_number_p merely returning non-zero, rather than 1 or 0.
+
+       * demos/expr/exprfr.c, demos/expr/t-expr.c: Add erf, integer_p, zeta.
+
+       * demos/expr/Makefile.am (LDADD): Update comments on $(LIBM).
+
+2003-09-24  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.c (routine): Add entires for mpn_addlsh1_n, mpn_sublsh1_n,
+       mpn_rsh1add_n, and mpn_rsh1sub_n.
+
+       * tune/speed.h: Declare speed_mpn_addlsh1_n, speed_mpn_sublsh1_n,
+       speed_mpn_rsh1add_n, and speed_mpn_rsh1sub_n.
+
+       * tune/common.c (speed_mpn_addlsh1_n, speed_mpn_sublsh1_n,
+       speed_mpn_rsh1add_n, speed_mpn_rsh1sub_n): New functions.
+
+       * gmp-impl.h: Declare mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsh1add_n, and
+       mpn_rsh1sub_n.
+
+       * mpn/asm-defs.m4: Add define_mpn's for addlsh1_n, sublsh1_n,
+       rsh1add_n, and rsh1sub_n.
+
+       * mpn/powerpc64/*.asm: Add cycle counts in consistent style.  Misc
+       styling edits.
+
+       * mpn/amd64/gmp-mparam.h: Retune.
+
+       * configure.in: Add #undefs for HAVE_NATIVE_mpn_addlsh1_n,
+       HAVE_NATIVE_mpn_sublsh1_n, HAVE_NATIVE_mpn_rsh1add_n,
+       HAVE_NATIVE_mpn_rsh1sub_n.
+       (gmp_mpn_functions_optional): List addlsh1_n, sublsh1_n, rsh1add_n,
+       and rsh1sub_n.
+
+       * mpn/amd64/addlsh1_n.asm: New file.
+       * mpn/amd64/sublsh1_n.asm: New file.
+       * mpn/amd64/rsh1add_n.asm: New file.
+       * mpn/amd64/rsh1sub_n.asm: New file.
+
+2003-09-24  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-09-24.
+
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Remove conftest* temporary files.
+
+2003-09-23  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MUL_TOOM3_THRESHOLD, SQR_TOOM3_THRESHOLD): Now 128.
+
+2003-09-23  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (gmp_randinit_set): Use __gmp_const rather than const.
+
+2003-09-22  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/mul_n_mpn.c: (__gmpn_sqr_n): New #define.
+       * tune/mul_n_open.c (__gmpn_sqr_n): New #define.
+
+       * mpn/generic/mul.c (mpn_sqr_n): Move from here...
+       * mpn/generic/mul_n.c (mpn_sqr_n): ...to here.
+       (mpn_sqr_n): Allocate workspace for toom3 using TMP_* mechanism except
+       for very large operands when !WANT_FFT.
+
+       * mpn/generic/mul_n.c: Add a missing ";". Misc comment fixes.
+
+       * mpn/generic/mul.c: Remove spurious #include <stdio.h>.
+
+       * mpn/x86/k7/gmp-mparam.h: Retune.
+
+       * mpn/generic/mul_n.c (mpn_mul_n): Allocate workspace for toom3 using
+       TMP_* mechanism except for very large operands when !WANT_FFT.
+
+       * gmp-impl.h (MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_SQR_N_TSIZE):
+       Define conditionally on WANT_FFT and HAVE_NATIVE_mpn_sublsh1_n.
+       (MPN_TOOM3_MAX_N): New #define.
+
+       * mpn/amd64/gmp-mparam.h: Retune.
+
+       * mpn/Makefile.am (TARG_DIST): Add amd64.
+
+       * mpn/generic/sqr_basecase.c: Use mpn_addlsh1_n when available.
+
+       * mpn/generic/mul_n.c: Use proper form for HAVE_NATIVE macros.
+
+2003-09-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-09-22.
+
+2003-09-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium4/sse2/gmp-mparam.h (USE_PREINV_DIVREM_1,
+       USE_PREINV_MOD_1): Set to 1 for new asm versions.
+
+       * mpfr/*: Update to mpfr cvs 2003-09-21.
+
+2003-09-21  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n): Conditionally use
+       mpn_sublsh1_n, mpn_rsh1add_n and mpn_rsh1sub_n, in addition to
+       mpn_addlsh1_n.  Avoid all copying, at the expense of some additional
+       workspace.
+
+       * gmp-impl.h (MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_SQR_N_TSIZE): Accomodate
+       latest toom3 code.
+
+2003-09-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium4/sse2/divrem_1.asm, mpn/x86/pentium4/sse2/mod_1.asm:
+       New files.
+
+2003-09-16  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.c (run_one): Don't scale the -1.0 not-available return.
+       Print "n/a" for times not-available.
+
+2003-09-13  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/mul_n.c (toom3_interpolate): New function.
+       (mpn_toom3_mul_n, mpn_toom3_sqr_n): Call toom3_interpolate.
+
+2003-09-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Remove unused
+       variables.
+       (mpn_toom3_mul_n, mpn_toom3_sqr_n): Use offset `+ 1', not `+ 2' in last
+       MPN_DECR_U calls.
+
+2003-09-12  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Rewrite.
+
+2003-09-12  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MPN_KARA_MUL_N_TSIZE, MPN_KARA_SQR_N_TSIZE): Reformulate
+       to use the same form as MPN_TOOM3_MUL_N_TSIZE.
+       (MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_SQR_N_TSIZE): Update for new Toom3
+       code requirements.
+       * mpn/generic/mul_n.c (evaluate3, interpolate3, add2Times): Remove.
+       (USE_MORE_MPN): Remove.
+
+2003-08-31  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-08-31.
+
+2003-08-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-08-30.
+
+2003-08-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/copyi.asm: New file.
+       * mpn/amd64/copyd.asm: New file.
+       * mpn/amd64/README: New file.
+
+2003-08-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/lshift.asm: New file.
+       * mpn/amd64/rshift.asm: New file.
+       * mpn/amd64/gmp-mparam.h: Retune.
+
+2003-08-23  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_getsysinfo): Correction to speed_cycletime value
+       established.
+
+       * mpz/rootrem.c, gmp-h.in, gmp.texi (mpz_rootrem): Don't return
+       exactness indication, can get that from testing the remainder.
+
+       * mpn/x86/k7/aors_n.asm, mpn/x86/k7/mmx/copyi.asm: Amend to comments
+       about loads and stores and what speed should be possible.
+
+2003-08-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/amd64/add_n.asm: New file.
+       * mpn/amd64/sub_n.asm: New file.
+       * mpn/amd64/mul_1.asm: New file.
+       * mpn/amd64/addmul_1.asm: New file.
+       * mpn/amd64/submul_1.asm: New file.
+
+2003-08-19  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (add_ssaaaa, sub_ddmmss) [hppa 64]: Move down into main
+       __GNUC__ block.  Exclude for _LONG_LONG_LIMB (ie. ABI=2.0n) since
+       these forms are only for ABI=2.0w.
+
+       * longlong.h (count_leading_zeros) [__mcpu32__]: Check __mcpu32__ to
+       avoid bfffo on GCC 3.4 in CPU32 mode.  Reported by Bernardo Innocenti.
+
+       * longlong.h (count_trailing_zeros) [x86_64]: Use "%q0" to force
+       64-bit register destination.  Pointed out by Torbjorn.
+
+       * mpz/combit.c: Correction to carry handling when extending a
+       negative, and use __GMPN_ADD_1.  Correction to complement limb for a
+       negative when there's a non-zero low limb.
+       * tests/mpz/bit.c (check_clr_extend, check_com_negs): Exercise these.
+
+       * demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/test.pl: Add
+       get_d_2exp.
+       * demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/GMP/Rand.pm,
+       demos/perl/test.pl: Add gmp_urandomb_ui, gmp_urandomm_ui.
+       (GMP::Rand::randstate): Accept a randstate object to copy.
+       * demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/GMP/Mpz.pm,
+       demos/perl/test.pl: Add combit, rootrem.
+
+2003-08-19  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/Makefile.am (EXTRA_DIST): Add amd64.asm.
+
+2003-08-17  Kevin Ryde  <kevin@swox.se>
+
+       * gmpxx.h [__MPFR_H]: Include full <iostream> for inlines.
+       * tests/cxx/t-headfr.cc: New file, exercising this.
+       * tests/cxx/Makefile.am: Add it.
+
+       * tests/cxx/t-constr.cc: Include config.h for WANT_MPFR.
+
+       * gmpxx.h: Correction to temp variable type in mpf -> mpfr assignment.
+       Reported by Derrick Bass.
+       * tests/cxx/t-assign.cc (check_mpfr): Exercise this.
+
+       * configure.in (WANT_MPFR): AC_DEFINE this, for the benefit of
+       tests/cxx/t-*.cc.  (Was always meant to have been defined.)
+       * tests/cxx/Makefile.am (INCLUDES): Add -I$(top_srcdir)/mpfr.
+
+       * gmpxx.h: __gmp_default_rounding_mode -> __gmpfr_default_rounding_mode
+       (struct __gmp_hypot_function): Correction to mpfr_hypot addition.
+       * tests/cxx/t-misc.cc (check_mpfr_hypot): Corrections to mpfr/long
+       tests.
+
+2003-08-16  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (amd64): New.
+
+       * mpn/amd64/gmp-mparam.h: New file.
+
+       * tune/amd64.asm: New file, derived in part from tune/pentium.asm.
+
+2003-08-15  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_irix_hinv): Reinstate, for the benefit of IRIX 6.2.
+       (freq_attr_get_invent): Conditionalize on INFO_LBL_DETAIL_INVENT too.
+
+2003-08-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpq/get_d.c: Use mpn_get_d.
+       * tests/mpq/t-get_d.c (check_onebit): New test.
+
+       * gmp.texi (Notes for Particular Systems): Under x86 cpu types, note
+       i386 is a fat binary, remove pentium4 recommendation since i386 is now
+       quite reasonable for p4.
+       (Notes for Particular Systems): Under Windows DLLs, remove caveat
+       about --enable-cxx now ok, update .lib creation for new libtool,
+       remove .exp not needed for MS C.
+       (Notes for Package Builds): i386 is a fat binary.
+       (Reentrancy): Remove SCO ctype.h note, don't want to list every system
+       misfeature, and was quite possibly for non-threading mode anyway.
+       (Autoconf): Remove notes on gmp 2 detection, too old to want to
+       encourage anyone to use.
+       (Karatsuba Multiplication): Correction to threshold increase/decrease
+       for a and b terms.  Reported by Richard Brent and Paul Zimmermann.
+       Also add various further index entries.
+
+       * tune/freq.c (freq_attr_get_invent): New function.
+       (freq_irix_hinv): Remove, in favour or freq_attr_get_invent.
+       * configure.in (AC_CHECK_FUNCS): Add attr_get.
+       (AC_CHECK_HEADERS): Add invent.h, sys/attributes.h, sys/iograph.h.
+
+2003-08-03  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c (tune_mul): Use MUL_KARATSUBA_THRESHOLD_LIMIT.
+
+2003-08-02  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/asm-defs.m4: Tweak some comments, add hpux11 to m4wrap 0xFF
+       problem systems.
+
+       * configure.in (*-*-sco3.2v5*): Remove lt_cv_archive_cmds_need_lc=no,
+       since libtool no longer uses it.  This was a workaround fixing ctype.h
+       in SCO 5 shared libraries; not sure if libtool now gets it right on
+       its own, let's hope so.
+
+       * configure.in, acinclude.m4 (GMP_PROG_HOST_CC): Remove, libtool no
+       longer demands HOST_CC.
+
+       * configure.in: When C or C++ compiler not found, refer user to
+       config.log.
+
+       * configure.in (i386-*-*): Turn i386 into a fat binary build.
+       * mpn/x86/fat/fat.c, mpn/x86/fat/fat_entry.asm,
+       mpn/x86/fat/gmp-mparam.h, mpn/x86/fat/gcd_1.c, mpn/x86/fat/mode1o.c:
+       New files.
+       * gmp-impl.h (struct cpuvec_t) [x86 fat]: New structure.
+       * longlong.h (COUNT_LEADING_ZEROS_NEED_CLZ_TAB) [x86 fat]: Define.
+       * mpn/asm-defs.m4 (foreach): New macro.
+       * mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): New define.
+       * mpn/x86/sqr_basecase.asm: New file, primarily as a fallback for fat
+       binaries.
+       * mpn/x86/p6/gmp-mparam.h, mpn/x86/p6/mmx/gmp-mparam.h: Add comments
+       about fat binary SQR_KARATSUBA_THRESHOLD for p6 and p6/mmx.
+
+       * configure.in: Add various supports for fat binaries, via fat_path,
+       fat_functions and fat_thresholds variables.
+       * acinclude.m4 (GMP_STRIP_PATH): Mung $fat_path too.
+       (GMP_FAT_SUFFIX, GMP_REMOVE_FROM_LIST): New macros.
+       * gmp-impl.h: Add various supports for fat binaries.
+       (DECL_add_n etc): New macros.
+       (mpn_mul_basecase etc): Define only if not already defined.
+       * mpn/asm-defs.m4 (m4_config_gmp_mparam): Mention fat binary.
+       (MPN): Use m4_unquote, for the benefit of fat binary name expansion.
+       * doc/configuration: Notes on fat binaries.
+       * gmp-impl.h (MUL_TOOM3_THRESHOLD_LIMIT): Define always.
+       (MUL_KARATSUBA_THRESHOLD_LIMIT): New define.
+       * mpn/generic/mul.c, mpn/generic/mul_n.c: Use these.
+       * tune/divrem1div.c, tune/divrem1inv.c, tune/mod_1_div.c,
+       tune/mod_1_inv.c: Define OPERATION_divrem_1 and OPERATION_mod_1, to
+       tell fat.h what's being done.
+
+       * config.guess (alpha-*-*): Update comments on what configfsf.guess
+       does and doesn't do for us.
+
+2003-07-31  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess: Remove $dummy.o files everywhere, in case vendor
+       compilers produce that even when not asked.
+
+       * demos/perl/GMP.xs (class_or_croak): Rename "class" parameter to
+       avoid C++ keyword.
+       (coerce_ulong, coerce_long): Move croaks to stop g++ 3.3 complaining
+       about uninitialized variables.
+
+       * demos/perl/INSTALL: Add notes on building with a DLL.
+
+       * longlong.h (count_trailing_zeros) [x86_64]: Ensure bsfq destination
+       is a 64-bit register.  Diagnosed by Francois G. Dorais.
+
+2003-07-31  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h [ppc]: Remove nested test for vxworks.
+
+2003-07-24  Kevin Ryde  <kevin@swox.se>
+
+       * gmpxx.h (struct __gmp_binary_multiplies): Use mpz_mul_si for
+       mpz*long and long*mpz.
+       * tests/cxx/t-ops.cc (check_mpz): Exercise mpz*long and mpz*ulong.
+
+       * cxx/ismpf.cc: Use std::locale decimal point when available.  Expect
+       localeconv available always.
+       * tests/cxx/t-locale.cc: Enable check_input tests.
+
+       * gmpxx.h (struct __gmp_hypot_function): Use mpfr_hypot.
+       * tests/cxx/t-misc.cc (check_mpfr_hypot): New tests.
+
+       * tests/cxx/t-assign.cc, tests/cxx/t-binary.cc, tests/cxx/t-ops.cc,
+       tests/cxx/t-prec.cc, tests/cxx/t-ternary.cc, tests/cxx/t-unary.cc:
+       Include config.h for WANT_MPFR.
+
+       * tests/mpz/bit.c (check_single): Correction to a diagnostic print.
+
+2003-07-24  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/combit.c: New file.
+       * Makefile.am, mpz/Makefile.am: Add it.
+       * gmp-h.in (mpz_combit): Add prototype.
+       * tests/mpz/bit.c (check_single): Exercise mpz_combit.
+
+2003-07-16  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/get_d.c: Correction to infinity handling for large exp.
+
+2003-07-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/get_d.c, mpz/get_d_2exp.c, mpf/get_d.c, mpf/get_d_2exp.c: Use
+       mpn_get_d.
+
+       * mpn/generic/get_d.c: New file, based on mpz/get_d.c and insert-dbl.c.
+       * configure.in, mpn/Makefile.am: Add it.
+       * gmp-impl.h (mpn_get_d): Add prototype.
+
+       * tests/mpn/t-get_d.c: New file.
+       * tests/mpn/Makefile.am: Add it.
+
+       * tests/mpz/t-get_d_2exp.c (check_onebit, check_round): Test negatives.
+       (check_onebit): Add a few more bit sizes.
+
+       * tests/misc.c, tests/tests.h (tests_isinf): New function.
+
+2003-07-12  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (GMP_PROG_CXX_WORKS): Include $CPPFLAGS, same as
+       automake does in the actual build.
+
+       * acinclude.m4 (GMP_PROG_CXX_WORKS): In the namespace test, declare
+       namespace before trying to use.  In std iostream test, provoke a
+       failure from Compaq C++ in pre-standard mode.
+
+2003-07-08  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Use separate compiles for various
+       known problems, and indicate to the user the reason for rejecting.
+       (GMP_PROG_CXX_WORKS): Ditto, and insist on being able to execute each
+       compiled program.
+
+2003-07-05  Kevin Ryde  <kevin@swox.se>
+
+       * config.sub: Add comments to our alias transformations.
+
+       * configfsf.sub, configfsf.guess: Update to 2003-07-04.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS, GMP_PROG_CC_WORKS_LONGLONG): Show
+       failing program in config.log, per other autoconf tests.
+
+       * configure.in (i786-*-*): Recognise as pentium4, per configfsf.sub.
+
+2003-06-28  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/get_d_2exp.c, mpf/get_d_2exp.c: Avoid res==1.0 when floats round
+       upwards.
+
+       * tests/mpz/t-get_d_2exp.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+       * tests/mpf/t-get_d_2exp.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+       * tests/x86call.asm, test/tests.h (x86_fldcw, x86_fstcw): New
+       functions.
+       * tests/misc.c, tests/tests.h (tests_hardware_getround,
+       tests_hardware_setround): New functions.
+
+2003-06-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/sparc64/dive_1.c: New file.
+
+       * mpn/sparc64/sparc64.h: New file.
+       * mpn/sparc64/mode1o.c: Remove things now in sparc64.h.
+
+       * mpfr/*: Update to mpfr cvs 2003-06-25.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): In last change provoking gnupro
+       gcc, don't use ANSI style function definition.
+
+2003-06-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/pa32/hppa1_1/udiv.asm: Remove .proc, .entry, .exit and .procend,
+       handled by PROLOGUE and EPILOGUE.  Comment out .callinfo, per other
+       asm files.
+
+       * gmpxx.h (mpz_class __gmp_binary_divides, __gmp_binary_modulus): Fix
+       long/mpz and long%mpz for dividend==LONG_MIN divisor==-LONG_MIN.
+       (mpz_class __gmp_binary_modulus): Fix mpz%long for negative dividend.
+       * tests/cxx/t-ops.cc (check_mpz): Add test cases for these, merging
+       operator/ and operator% sections for clarity.
+
+2003-06-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-06-21.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Add code by Torbjorn provoking an
+       ICE from gcc 2.9-gnupro-99r1 under -O2 -mcpu=ev6.
+       * configure.in (alpha*-*-* gcc_cflags_cpu): Fallback on -mcpu=ev56 for
+       this compiler.
+
+       * gmpxx.h (get_d): Remove comments about long double, double is
+       correct for get_d, a future long double form would be get_ld.
+
+2003-06-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-06-19.
+
+       * mpn/generic/dive_1.c: Share src[0] fetch among all cases.  No need
+       for separate final umul_ppmm in even case, make it part of the loop.
+
+       * mpz/get_d_2exp.c, mpq/set_si.c, mpq/set_ui.c: Nailify.
+
+       * mpf/iset_si.c: Rewrite using mpf/set_si.c code, in particular this
+       nailifies it.
+       * tests/mpf/t-set_si.c: Nailify tests.
+
+       * mpf/iset_ui.c: Nailify, as per mpf/set_ui.c
+       * tests/mpf/t-set_ui.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+2003-06-15  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-06-15.
+
+       * mpn/x86/k6/mode1o.asm: Remove a bogus ASSERT.
+
+2003-06-12  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (--enable-assert): Emit WANT_ASSERT to config.m4.
+       * mpn/powerpc32/powerpc-defs.m4, mpn/x86/x86-defs.m4 (ASSERT): Check
+       WANT_ASSERT is defined.
+
+       * mpn/sparc32/v9/udiv.asm: Amend heading, this file is for sparc v9.
+
+       * tests/cxx/Makefile.am (TESTS_ENVIRONMENT): In libtool openbsd hack,
+       disard error messages from cp, for the benefit of --disable-shared or
+       systems not using names libgmp.so.*.
+
+       * tests/devel/try.c (try_one): When overlapping, copy source data
+       after filling dst.  Previously probably used only DEADVAL in
+       overlapping cases.
+
+2003-06-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/random2.c: Rewrite.  Ignore sign of exp parameter.
+
+2003-06-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/sparc64/mode1o.c: New file.
+
+2003-06-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/lshift.asm: Add more cycle counts.
+       * mpn/powerpc32/rshift.asm: Add more cycle counts.
+
+       * mpn/ia64/addmul_1.asm: Reformat comments for 80 columns.
+
+       * gmp-impl.h (udiv_qrnnd_preinv1): New name for udiv_qrnnd_preinv.
+       (udiv_qrnnd_preinv2): New name for udiv_qrnnd_preinv2norm.
+       (udiv_qrnnd_preinv): New #define, making udiv_qrnnd_preinv2
+       the default.
+       * tune/speed.c: Corresponding changes.
+       * tune/speed.h: Likewise.
+       * tune/common.c: Likewise.
+
+       * mpf/get_str.c: Simplify `off' computation.
+
+       * longlong.h: Tabify.
+
+2003-06-09  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (ABI and ISA): FreeBSD has sparc64 too, just say "BSD" to
+       cover all flavours.
+       * configure.in: Ditto in some comments.
+
+       * mpfr/*: Update to mpfr cvs 2003-06-09.
+
+       * tests/cxx/Makefile.am (LDADD): Add -L$(top_builddir)/$(LIBS), for
+       the benefit of gcc 3.2 on itanium2-hp-hpux11.22.
+
+       * tune/many.pl (mul_2): Add speed routine settings.
+       (MAKEFILE): Close when done, for the benefit of development hackery.
+
+2003-06-08  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-06-08.
+
+       * mpn/x86/x86-defs.m4 (femms): Remove fallback to emms.
+       (cmovCC, psadbw): Remove simulated versions.
+       (cmov_available_p, psadbw_available_p): Remove.
+       This trickery was only ever for development purposes on machines
+       without those instructions.  Removing it simplifies gmp and in
+       particular avoids complications for fat binary builds.  Development
+       can be done with a wrapper around "as" if really needed.
+
+       * mpn/x86/divrem_1.asm: Don't use loop_or_decljnz, now K6 has its own
+       mpn/x86/k6/divrem_1.asm.  Amend K6 comments now moved to there.
+       * mpn/x86/x86-defs.m4 (loop_or_decljnz): Remove, no longer used.
+
+       * mpn/x86/k6/divrem_1.asm: New file, derived from mpn/x86/divrem_1.asm.
+
+       * mpn/x86/k6/pre_mod_1.asm: Remove comments now in mpn/x86/mod_1.asm.
+
+       * mpn/x86/mod_1.asm: Put mpn_mod_1c after mpn_mod_1 for better branch
+       prediction.  Put done_zero at end for less wastage in alignment.  Use
+       decl+jnz unconditionally since in fact it's ok on k6.  Amend comments.
+
+2003-06-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mode1o.c: Fix ASSERTs on return value.
+
+       * gmp.texi (Build Options): Add viac3 and viac32 cpu types.
+       (ABI and ISA): Note on sparcv9 ABI=32 vs ABI=64 speed.  More indexing.
+
+       * configfsf.guess, configfsf.sub: Update to 2003-06-06.
+       * config.guess: Remove $RANDOM hack supporting netbsd 1.4, not needed
+       by new configfsf.guess.
+
+2003-06-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/submul_1.asm: Add branch over .align block.
+
+2003-06-05  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (add_ssaaaa) [pa64]: Output zero operand as register 0.
+       Allow more immediate operands.
+       (sub_ddmmss) [pa64]: Likewise.
+       (add_ssaaaa) [pa32]: Likewise.
+       (sub_ddmmss) [pa32]: Likewise.
+
+       * mpn/pa64: Change ".level 2.0W" to ".level 2.0w" to please
+       picky GNU assembler.
+
+2003-06-05  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Integer Special Functions): In mpz_array_init, fix type
+       shown for integer_array and give an example use.
+
+2003-06-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/set_str.c (mpf_set_str): Work around gcc 2 bug triggered on
+       alpha.
+
+2003-06-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/README: Add 7 c/l mmx mul_1, tweak wordings.
+
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Use octal char constants in test
+       program, hex is not supported by K&R.
+
+2003-06-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips64/divrem_1.asm: New file.
+
+2003-06-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/lshift.asm: Reformat code.
+       * mpn/powerpc32/rshift.asm: Reformat code.
+
+2003-05-30  Kevin Ryde  <kevin@swox.se>
+
+       * tests/misc.c (tests_start): Set stdout and stderr to unbuffered, to
+       avoid any chance of losing output on segv etc.
+
+2003-05-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/get_str.c: Move label `done' to match TMP_MARK and TMP_FREE.
+       Remove redundant variable prec.
+
+2003-05-26  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/convert.c: Test bases up to 62.
+
+       * tests/mpf/t-conv.c: Test bases up to 62.
+
+       * demos/pexpr.c: Don't iterate to get accurate timing.
+
+       * mpf/set_str.c (mpn_pow_1_highpart): Cleanup.
+
+       * mp_dv_tab.c: Fix typo.
+
+       * mpf/get_str.c: Rewrite (now sub-quadratic).
+
+2003-05-22  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpn/t-divrem_1.c: New file.
+       * tests/mpn/Makefile.am: Add it.
+
+2003-05-22  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Recognize viac3* processors.
+
+2003-05-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/addmul_2.asm: New file.
+
+2003-05-19  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Recognize alphaev7* as alphaev67.
+
+       * config.guess: Recognize viac3* processors.
+       * configure.in: Set up path for viac3* processors.
+       * acinclude.m4 (X86_PATTERN): Include viac3* processors.
+
+2003-05-19  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_pstat_getprocessor): New function.
+       (freq_all): Use it.
+       * configure.in (AC_CHECK_HEADERS): Add sys/pstat.h.
+       (AC_CHECK_FUNCS): Add pstat_getprocessor.
+
+2003-05-15  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_fft.c (mpn_mul_fft_decompose): Remove "inline",
+       since the code is a bit too big.  gcc doesn't actually inline when
+       alloca (TMP_ALLOC) is used anyway.
+
+2003-05-13  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Notes for Particular Systems): Libtool directory is .libs
+       not _libs for mingw dll.  Reported by Andreas Fabri.
+
+2003-05-07  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Add code to generate sse2/xmm code
+       from gcc -march=pentium4, to check the assembler supports that.
+       (GMP_GCC_PENTIUM4_SSE2, GMP_OS_X86_XMM): New macros.
+       * configure.in (pentium4-*-*): Use them to see if gcc -march=pentium4
+       (with sse2) is ok.
+
+2003-05-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/com.c: Rate size==0 as UNLIKELY, fix comment to mpn_add_1.
+
+       * tune/freq.c (<sys/sysinfo.h>): Include only when needed for
+       getsysinfo(), to avoid a problem with this file on AIX 5.1.
+
+2003-05-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/set_str.c: Do not ignore supposedly superfluous digits (in part
+       reverting last change).
+
+2003-05-03  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi: Use @code for files in @cindex entries, it looks nicer
+       than @file.
+
+       * Makefile.am: Note gmp 4.1.1 and 4.1.2 version info.
+
+       * configure.in, acinclude.m4 (GMP_CRAY_OPTIONS): New macro for Cray
+       system setups, letting AC_REQUIRE do its job instead of a hard coded
+       AC_PROG_EGREP.
+
+       * config.guess: Amend fake RANDOM to avoid ". configfsf.guess" which
+       segfaults on Debian "ash" 0.4.16.
+
+2003-05-01  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_CHECK_FUNCS): Add getsysinfo.
+       (AC_CHECK_HEADERS): Add sys/sysinfo.h and machine/hal_sysinfo.h.
+       * tune/freq.c (freq_getsysinfo): New function.
+       (freq_all): Use it.
+       (freq_sysctlbyname_i586_freq, freq_sysctlbyname_tsc_freq,
+       freq_sysctl_hw_cpufrequency, freq_sysctl_hw_model): Set
+       speed_cycletime before trying to print it, when verbose.
+
+2003-04-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/set_str.c: Major overhaul.
+       (mpn_pow_1_highpart): New helper function, meat extracted from
+       mpf_set_str.
+
+2003-04-24  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_GCC_ARM_UMODSI): Quote result string against m4.
+
+       * configure, ltmain.sh, aclocal.m4: Update to libtool 1.5.
+
+       * longlong.h (add_ssaaaa) [all]: Remove first "%" commutative in each,
+       since gcc only supports one per asm.
+
+       * printf/doprnt.c: Add M for mp_limb_t.
+       * tests/misc/t-printf.c: Exercise this.
+
+       * tests/mpz/t-cmp_d.c: Test infinities.
+       * tests/mpf/t-cmp_d.c: New file.
+       * tests/mpf/Makefile.am: Add it.
+
+       * mpz/cmp_d.c, mpz/cmpabs_d.c, mpf/cmp_d.c: NaN invalid, Inf bigger
+       than any value.
+       * mpz/set_d.c, mpq/set_d.c, mpf/set_d.c: Nan or Inf invalid.
+
+       * configure.in (AC_CHECK_FUNCS): Add raise.
+       * invalid.c: New file.
+       * Makefile.am: Add it.
+       * gmp-impl.h (__gmp_invalid_operation): Add prototype.
+       (DOUBLE_NAN_INF_ACTION): New macro.
+
+       * tests/trace.c, tests/tests.h (d_trace): New function.
+       * tests/misc.c, tests/tests.h (tests_infinity_d): New function.
+       * tests/misc.c (mpz_erandomb, mpz_errandomb): Use gmp_urandomm_ui.
+
+       * tune/tuneup.c, tune/common.c, tests/devel/try.c: Cast various
+       mp_size_t values for printf %ld in case mp_size_t==int.  Use
+       gmp_printf for mp_limb_t values.
+
+       * gmp.texi (Nomenclature and Types): Add mp_exp_t, mp_size_t,
+       gmp_randstate_t.  Note ulong for bit counts and size_t for byte
+       counts.  Don't bother with @noindent.
+       (Debugging): New valgrind is getting MMX/SSE.
+       (Integer Comparisons): mpz_cmp_d and mpz_cmpabs_d on NaNs and Infs.
+       (Float Comparison): mpf_cmp_d behaviour on NaNs and Infs.
+       (Low-level Functions): Note with mpn_hamdist what hamming distance is.
+       (Formatted Output Strings): Add type M.
+       (Internals): Remove remarks on ulong bits and size_t bytes.  Move int
+       field remarks to ...
+       (Integer Internals, Float Internals): ... here.
+
+2003-04-19  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (*sparc*-*-* ABI=32): Add umul to extra_functions.
+
+       * mpn/x86/p6/mul_basecase.asm: New file.
+
+2003-04-18  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (m68060-*-*): Fallback to gcc -m68000 when -m68060 not
+       available, and don't use mpn/m68k/mc68020 asm routines.  (Avoids 32x32
+       mul and 64/32 div which trap to the kernel on 68060.  Advice by
+       Richard Zidlicky.)
+       * mpn/m68k/README: Update notes on directory usage.
+
+       * tests/cxx/Makefile.am (TESTS_ENVIRONMENT): Add a hack to let the
+       test programs run with a shared libgmpxx on openbsd 3.2.
+
+       * gmp.texi (Language Bindings): Add Guile.
+
+2003-04-12  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (cygwin*, mingw*, pw32*, os2*): Add
+       -Wl,--export-all-symbols to GMP_LDFLAGS, no longer the default in
+       latest mingw and libtool.
+
+       * acinclude.m4 (GMP_ASM_COFF_TYPE): New macro.
+       * configure.in (x86s): Use it.
+       * mpn/x86/x86-defs.m4 (COFF_TYPE): New macro.
+       (PROLOGUE_cpu): Use it, for the benefit of mingw DLLs.
+
+       * gmp-impl.h (mpn_copyi, mpn_copyd): Add __GMP_DECLSPEC.
+
+       * gmp.texi (Known Build Problems): Remove windows test program .exe
+       repeated built, fixed by new libtool.  Remove MacOS C++ shared library
+       creation, fixed by new libtool.
+       (Notes for Package Builds, Known Build Problems): Remove DESTDIR notes
+       on libgmpxx, fixed in new libtool.
+
+2003-04-10  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Match turbosparc.
+       * config.guess: Recognize turbosparc (just for *bsd for now).
+
+2003-04-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/mul_ui.c [nails]: Call mpf_mul to handle v > GMP_NUMB_MAX.
+
+       * tests/mpz/t-mul.c (main): Don't try FFT sizes when FFT disabled via
+       MP_SIZE_T_MAX, eg. for nails.
+
+       * tests/cxx/t-ternary.cc: Split up tests to help compile speed and
+       memory usage.
+
+       * tests/devel/try.c: Print seed under -R, add -E to reseed, use ulong
+       for seed not uint.
+
+       * gmp.texi: Add @: after various abbreviations, more index entries.
+       (leftarrow): New macro, for non-tex.
+       (Random State Initialization): Remove commented gmp_randinit_lc, not
+       going to be implemented.
+       (Random Number Algorithms): New section.
+       (References): Add Matsumoto and Nishimura on Mersenne Twister, add
+       Bertot, Magaud and Zimmermann on GMP Square Root.
+
+2003-04-06  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-gcd_ui.c: New file.
+       * tests/mpz/Makefile.am: Add it.
+
+       * mpz/gcd_ui.c: Correction to return value on longlong limb systems,
+       limb might not fit a ulong.
+
+2003-04-04  Kevin Ryde  <kevin@swox.se>
+
+       * configure, aclocal.m4, ltmain.sh: Update to libtool cvs snapshot
+       2003-04-02.
+
+2003-04-02  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (*-*-cygwin*): No longer force lt_cv_sys_max_cmd_len,
+       libtool has addressed this now.
+       (AC_PROVIDE_AC_LIBTOOL_WIN32_DLL): Remove this, libtool _LT_AC_LOCK
+       no longer needs it.
+
+       * acinclude.m4 (GMP_PROG_AR): Also set ac_cv_prog_AR and
+       ac_cv_prog_ac_ct_AR when adding flags to AR, so they're not lost by
+       libtool's call to AC_CHECK_TOOL.
+
+2003-04-01  Kevin Ryde  <kevin@swox.se>
+
+       * configure, aclocal.m4, ltmain.sh: Update to libtool cvs snapshot
+       2003-03-31.
+
+       * configure.in (AC_PROG_F77): Add a dummy AC_PROVIDE to stop libtool
+       running F77 probes.
+
+       * randlc2x.c (gmp_rand_lc_struct): Add comments about what exactly is
+       in each field.
+       (randseed_lc): Rename seedp to seedz to avoid confusion with seedp in
+       the lc function.  Suggested by Pedro Gimeno.
+       (gmp_randinit_lc_2exp): Use __GMP_ALLOCATE_FUNC_TYPE.  No need for
+       "+1" in mpz_init2 of _mp_seed.  Don't bother with mpz_init2 for _mp_a.
+
+2003-03-29  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (m68k-*-*): Use -O2, no longer need to fallback to -O.
+       * acinclude.m4 (GMP_GCC_M68K_OPTIMIZE): Remove macro.
+
+       * configure.in (AC_CHECK_TYPES): Add notes on why tested.
+
+       * gmp.texi (GMPrefu, GMPpxrefu, GMPreftopu, GMPpxreftopu): New macros,
+       use them for all external references to get URLs into HTML output.
+       (Random State Initialization): Add gmp_randinit_set.
+       (Random State Miscellaneous): New section.
+
+2003-03-29  Kevin Ryde  <kevin@swox.se>
+
+       * randbui.c, randmui.c: New files.
+       * Makefile.am: Add them.
+       * gmp-h.in (gmp_urandomb_ui, gmp_urandomm_ui): Add prototypes.
+       * tests/rand/t-urbui.c, tests/rand/t-urmui.c: New files.
+       * tests/rand/Makefile.am: Add them.
+
+       * gmp-impl.h (gmp_randstate_srcptr): New typedef.
+       (gmp_randfnptr_t): Add randiset_fn.
+       * randiset.c: New file.
+       * Makefile.am: Add it.
+       * gmp-h.in (gmp_randinit_set): Add prototype.
+       * randlc2x.c, randmt.c: Add gmp_randinit_set support.
+       * tests/rand/t-iset.c: New file.
+       * tests/rand/Makefile.am: Add it.
+
+       * tests/misc.c, tests/tests.h (call_rand_algs): New function.
+
+2003-03-27  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/bin_uiui.c: Use plain "*" for kacc products rather than
+       umul_ppmm since high not needed, except for an ASSERT now amended.
+
+2003-03-26  Kevin Ryde  <kevin@swox.se>
+
+       * demos/expr/exprfr.c (cbrt, cmpabs, exp2, gamma, nextabove,
+       nextbelow, nexttoward): New functions.
+       * demos/expr/t-expr.c: Exercise these.
+
+       * mpfr/*: Update to mpfr cvs 2003-03-26.
+
+       * gmp-impl.h (MPZ_REALLOC): Use UNLIKELY, to expect no realloc.
+
+       * tune/time.c (cycles_works_p): Scope variables down to relevant part
+       to avoid warnings about unused.
+
+       * configfsf.guess, configfsf.sub: Update to 2003-02-22.
+       * config.guess: Fake a $RANDOM variable when running configfsf.guess,
+       to workaround a problem on m68k NetBSD 1.4.1.
+
+       * mpz/fac_ui.c: Remove unused variable "z1".
+
+       * tune/freq.c (freq_irix_hinv): Allow "Processor 0" line from IRIX 6.5.
+
+2003-03-24  Torbjorn Granlund  <tege@swox.com>
+
+       * randlc2x.c (randget_lc): Remove write-only variable rn.
+       * mpf/eq.c: Remove write-only variable usign.
+       * gen-psqr.c (main): Remove write-only variable numb_bits.
+
+2003-03-17  Torbjorn Granlund  <tege@swox.com>
+
+       * Makefile.am (libgmp_la_SOURCES): Add mp_dv_tab.c.
+       (libmp_la_SOURCES): Add mp_dv_tab.c.
+
+       * mpn/alpha/invert_limb.asm: Add a few comments.
+
+       * mp_dv_tab.c: New file, defining __gmp_digit_value_tab.
+
+       * mpz/set_str.c: Get rid of function digit_value_in_base and use table
+       __gmp_digit_value_tab instead.
+       * mpz/inp_str.c: Likewise.
+       * mpf/set_str.c: Likewise.
+       * mpbsd/min.c: Likewise.
+       * mpbsd/xtom.c: Likewise.
+
+       * mpz/set_str.c: Allow bases <= 62.  Return error for invalid bases.
+       * mpz/inp_str.c: Likewise.
+       * mpf/set_str.c: Likewise.
+       * mpz/out_str.c: Likewise.
+       * mpz/get_str.c: Likewise.
+       * mpf/get_str.c: Likewise.
+
+       * mpz/inp_str.c: Restucture to allocate more string space just
+       before needed.
+       * mpbsd/min.c: Likewise.
+
+       * longlong.h (__udiv_qrnnd_c): Remove redundant casts.
+       (32-bit sparc): Test HAVE_HOST_CPU_supersparc in addition to various
+       sparc_v8 spellings.
+
+2003-03-17  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-03-17.
+
+2003-03-15  Kevin Ryde  <kevin@swox.se>
+
+       * Makefile.am (EXTRA_libgmp_la_SOURCES): Use this for TMP_ALLOC
+       sources, instead of a libdummy.la.
+
+2003-03-16  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Recognize supersparc and microsparc for *BSD systems.
+       Generalize some superscalar recognition patterns.
+
+2003-03-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/udiv.asm: New file.
+
+2003-03-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64: Table cycle counts.  Update some comments.
+
+       * mpn/powerpc64/divrem_1.asm: New file.
+
+2003-03-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul.c (mpn_mul): Don't blindly expect
+       MUL_KARATSUBA_THRESHOLD to be a constant.
+
+2003-03-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul.c (mpn_mul): New operand splitting code for
+       avoiding cache misses when un >> MUL_KARATSUBA_THRESHOLD > vn.
+       (MUL_BASECASE_MAX_UN): New #define, default to 500 for now.
+
+2003-03-07  Kevin Ryde  <kevin@swox.se>
+
+       * Makefile.am: Put gmp.h and mp.h under $(exec_prefix)/include.
+       * gmp.texi (Build Options): Add notes on this.
+       Reported by Vincent Lefèvre.
+
+2003-03-06  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (alpha*-*-* gcc): Add asm option before testing -mcpu,
+       for the benefit of gcc 2.9-gnupro-99r1 on alphaev68-dec-osf5.1 which
+       doesn't otherwise put the assembler in the right mode for -mcpu=ev6.
+
+2003-03-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/powerpc-defs.m4: Set up renaming for v registers.
+
+       * mpz/powm.c (redc): Instead of repeated mpn_incr_u invocations,
+       accumulate carries and add at the end.
+       (mpz_powm): Trim tp allocation, now as redc doesn't need carry guard.
+
+2003-02-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/copyd.asm: Correct header comment.
+
+       * mpn/arm/addmul_1.asm: Correct cycle counts.
+       * mpn/arm/submul_1.asm: Likewise.
+
+2003-02-20  Kevin Ryde  <kevin@swox.se>
+
+       * demos/factorize.c (factor_using_pollard_rho): Test k>0 to avoid
+       infinite loop if k=0 and gcd!=1 reveals a factor.  Reported by John
+       Pongsajapan.
+
+       * gmp.texi, fdl.texi: Update to FDL version 1.2.
+
+2003-02-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/arm/mul_1.asm: Fix typo introduced in last change.
+
+2003-02-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/gmp-mparam.h: Retune.
+
+       * mpn/sparc64/copyi.asm: Add some header comments.
+       * mpn/sparc64/copyd.asm: Likewise.
+
+       * mpn/arm/mul_1.asm: Put vl operand last for umull/umlal.
+       Add some header comments.
+       * mpn/arm/addmul_1.asm: Rewrite.
+       * mpn/arm/submul_1.asm: Rewrite.
+       * mpn/arm/gmp-mparam.h: Retune.
+
+2003-02-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/arm/copyi.asm: New file.
+       * mpn/arm/copyd.asm: New file.
+
+2003-02-16  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Tolerate incorrect last data
+       byte seen on an arm system.
+
+2003-02-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/arm/gmp-mparam.h: Retune.
+
+2003-02-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/750/com_n.asm: Add more cycle counts.
+
+2003-02-13  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_PREREQ): Bump to 2.57.
+
+       * configure.in, acinclude.m4 (GMP_GCC_WA_OLDAS): New macro, applying
+       -Wa,-oldas only when necessary.
+
+       * configure.in (powerpc*-*-*): Don't use -Wa,-mppc with gcc, it
+       overrides options recent gcc adds for -mcpu, making generated code
+       fail to assemble.
+
+       * tune/tuneup.c (mpn_fft_table): Remove definition, it's in mul_fft.c.
+
+2003-02-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/gmp-mparam.h: Retune.
+       * mpn/x86/k7/gmp-mparam.h: Retune.
+       * mpn/x86/k6/gmp-mparam.h: Retune.
+       * mpn/x86/p6/gmp-mparam.h: Retune.
+       * mpn/x86/p6/mmx/gmp-mparam.h: Retune.
+
+       * tests/mpz/t-mul.c (main): Rewrite FFT testing code.
+
+2003-02-10  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Recognize "power2" systems.
+
+       * mpn/powerpc64/gmp-mparam.h: Fix indentation.
+       * mpn/power/gmp-mparam.h: Retune.
+       * mpn/alpha/ev6/nails/gmp-mparam.h: Retune.
+       * mpn/sparc64/gmp-mparam.h: Retune.
+       * mpn/pa64/gmp-mparam.h: Retune.
+       * mpn/sparc32/v8/supersparc/gmp-mparam.h: Retune.
+       * mpn/sparc32/v8/gmp-mparam.h: Retune.
+       * mpn/mips64/gmp-mparam.h: Retune.
+       * mpn/alpha/ev6/gmp-mparam.h: Retune.
+       * mpn/powerpc32/gmp-mparam.h: Retune.
+       * mpn/powerpc32/750/gmp-mparam.h: Retune.
+       * mpn/alpha/ev5/gmp-mparam.h: Retune.
+       * mpn/m68k/gmp-mparam.h: Retune.
+       * mpn/cray/gmp-mparam.h: Set GET_STR_PRECOMPUTE_THRESHOLD.
+
+       * configure.in: Undo this, problem doesn't happen any more:
+       (mips64*-*-*): Pass just -O1 to cc, to work around compiler bug.
+
+2003-02-03  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (MPN_NORMALIZE, MPN_NORMALIZE_NOT_ZERO): Add parens
+       around macro parameters.  Reported by Jason Moxham.
+
+2003-02-01  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Low-level Functions): No overlap permitted by mpn_mul_n.
+       Reported by Jason Moxham.
+       (Formatted Input Strings): Correction to strtoul cross reference
+       formatting.
+       (BSD Compatible Functions): Add index entry for MINT.
+
+2003-01-29  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (mpn_mul_fft): Now returns int.
+
+2003-01-29  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/mul_fft.c: Major rewrite.
+
+2003-01-25  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (powerpc*-*-*): Remove $dummy.core file when mfpvr
+       fails on NetBSD.
+       (trap): Remove $dummy.core on abnormal termination too.
+
+       * mpfr/*: Update to mpfr cvs 2003-01-25.
+
+2003-01-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/README: Update cycle counts to match current code.
+
+2003-01-18  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-01-18.
+
+2003-01-17  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp.texi: Canonicalize URLs.
+
+2003-01-15  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Notes for Particular Systems): Add hardware floating point
+       precision mode.
+
+       * mpfr/*, configure, aclocal.m4, config.in: Update to mpfr cvs
+       2003-01-15.
+
+2003-01-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to mpfr cvs 2003-01-11.
+
+2003-01-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/get_str.c: Update to mpfr cvs 2003-01-09.
+
+       * doc/configuration: Various updates.
+
+2003-01-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/copyi.asm: Avoid `nop' mnemonic, unsupported on Cray.
+       * mpn/alpha/copyd.asm: Likewise.
+
+2003-01-05  Kevin Ryde  <kevin@swox.se>
+
+       * demos/expr/t-expr.c (check_r): Tolerate mpfr_set_str new return
+       value.
+
+       * configure, aclocal.m4 (*-*-osf4*, *-*-osf5*): Regenerate with
+       libtool patch to avoid bash printf option problem when building shared
+       libraries with cxx.
+
+       * configure.in (pentium4-*-*): Use "-march=pentium4 -mno-sse2" since
+       sse2 causes buggy code from gcc 3.2.1 and is only supported on new
+       enough kernels.
+
+       * acinclude.m4 (GMP_PROG_NM): Add some notes about failures, per
+       report by Krzysztof Kozminski.
+
+       * gmp-h.in (mpz_mdivmod_ui, mpz_mmod_ui): Add parens around "r".
+
+       * gmp-h.in (__GMP_CAST): New macro, clean to g++ -Wold-style-cast.
+       (GMP_NUMB_MASK, mpz_cmp_si, mpq_cmp_si, mpz_odd_p, mpn_divexact_by3,
+       mpn_divmod): Use it.  Reported by Krzysztof Kozminski.
+       (mpz_odd_p): No need for the outermost cast to "int".
+       * tests/cxx/t-cast.cc: New file.
+       * tests/cxx/Makefile.am: Add it.
+
+2003-01-04  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/set_str.c: Update to mpfr cvs 2003-01-04.
+
+       * demos/expr/exprfra.c (e_mpfr_number): Tolerate recent mpfr_set_str
+       returning count of characters accepted.
+
+2003-01-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/copyi.asm: New file.
+       * mpn/alpha/copyd.asm: New file.
+
+2003-01-03  Kevin Ryde  <kevin@swox.se>
+
+       * demos/expr/t-expr.c: Use __gmpfr on some mpfr internals that have
+       changed.
+
+       * mpfr/*, aclocal.m4, config.in, configure: Update to mpfr cvs
+       2003-01-03.
+
+       * gmp.texi (Introduction to GMP): Mention release announcements
+       mailing list, and put home page and ftp before mailing lists.
+
+2002-12-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_fft.c (mpn_fft_next_size): Simplify.
+
+2002-12-28  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (M68K_PATTERN): New macro.
+       (GMP_GCC_M68K_OPTIMIZE): Use it to avoid m6811 and friends.
+       * configure.in: Ditto.
+
+       * tests/mpz/t-import.c, tests/mpz/t-export.c: Use '\xHH' to avoid
+       warnings about char overflows.
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Ditto.
+
+2002-12-28  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * randmt.c (randseed_mt, default_state): Fix off-by-one bug on padding.
+       (randseed_mt): Add ASSERT checking result of mpz_export.
+
+2002-12-24  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Integer Import and Export): Clarify treatment of signs,
+       reported by Kent Boortz.
+
+       * randmt.c: Use gmp_uint_least32_t.
+       (randseed_mt): Add nails to mpz_export in case mt[i] more than 32 bits.
+
+       * gmp-impl.h (gmp_uint_least32_t): New typedef, replacing GMP_UINT32.
+       * configure.in (AC_CHECK_TYPES): Add uint_least32_t.
+       (AC_CHECK_SIZEOF): Add unsigned short.
+
+2002-12-22  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (ULONG_PARITY) [generic C]: Mask result to a single bit.
+       (ULONG_PARITY) [_CRAY, __ia64]: New macros.
+       * tests/t-parity.c: New test.
+       * tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * longlong.h (count_trailing_zeros) [ia64]: New macro.
+
+       * tests/t-count_zeros.c (check_various): Remove unused variable "n".
+
+       * mpn/x86/README: Revise notes on PIC, PLT and GOT.
+
+       * demos/perl/GMP.xs, demos/perl/GMP.pm, demos/perl/test.pl: Add "mt"
+       to GMP::Rand::randstate.
+
+2002-12-22  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * randmt.c (randseed_mt): Fix bug that might cause the generator to
+       return all zeros with certain seeds.  Fix WARM_UP==0 case.
+       (gmp_randinit_mt): Initialize to a known state by default.
+       (randget_mt): Remove check for uninitialized buffer: no longer needed.
+       (recalc_buffer): Use ?: instead of two-element array.
+
+       * tests/rand/t-mt.c: New test.
+       * tests/rand/Makefile.am (check_PROGRAMS): Add it.
+
+2002-12-21  Kevin Ryde  <kevin@swox.se>
+
+       * cxx/osdoprnti.cc: Use <cstdarg> and <cstring> rather than <stdarg.h>
+       and <string.h>.  No need for <stdio.h>.
+
+       * demos/expr/expr.c, demos/expr/exprfa.c, demos/expr/exprfra.c,
+       demos/expr/exprza.c: Use mp_get_memory_functions, not
+       __gmp_allocate_func etc.
+       * demos/expr/t-expr.c: Don't use gmp-impl.h.
+       (numberof): New macro.
+
+       * gmp-h.in, gmp-impl.h (__gmp_allocate_func, __gmp_reallocate_func,
+       __gmp_free_func): Move declarations to gmp-impl.h
+
+       * mp_get_fns.c: New file.
+       * Makefile.am (libgmp_la_SOURCES, libmp_la_SOURCES): Add it.
+       * gmp-h.in (mp_get_memory_functions): Add prototype.
+       * gmp.texi (Custom Allocation): Add mp_get_memory_functions, refer to
+       "free" not "deallocate" function.
+       * gmpxx.h (struct __gmp_alloc_cstring): Use mp_get_memory_functions,
+       not __gmp_free_func.
+
+       * gmp-impl.h [__cplusplus]: Add <cstring> for strlen.
+       (gmp_allocated_string): Hold length in a field.
+       * cxx/osdoprnti.cc, cxx/osmpf.cc: Use this.
+
+2002-12-20  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-perfsqr.c (check_sqrt): Print more variables upon
+       failure.
+
+       * mpn/generic/rootrem.c: In Newton loop, pad qp with leading zero.
+
+2002-12-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/rootrem.c: Allocate 1.585 (log2(3)) times more space
+       for pp temporary to allow for worst case overestimate of root.
+       Add some asserts.
+
+       * tests/mpz/t-root.c: Generalize and speed up.
+
+2002-12-19  Kevin Ryde  <kevin@swox.se>
+
+       * tests/cxx/t-rand.cc (check_randinit): Add gmp_randinit_mt test.
+
+       * gmp-h.in: Don't bother trying to support Compaq C++ in pre-standard
+       I/O mode.
+       * gmp.texi (Notes for Particular Systems): Compaq C++ must be used in
+       "standard" iostream mode.
+
+2002-12-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/mod_34lsub1.asm: Add code for big-endian, using existing
+       little-endian code only if HAVE_LIMB_LITTLE_ENDIAN is defined.
+
+2002-12-18  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (HAVE_LIMB_BIG_ENDIAN, HAVE_LIMB_LITTLE_ENDIAN): New
+       defines in config.m4.
+
+2002-12-17  Torbjorn Granlund  <tege@swox.com>
+
+       * printf/printffuns.c (gmp_fprintf_reps): Make it actually work
+       for padding > 256.
+
+2002-12-17  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c: Add <string.h> for memcmp.
+
+       * mpz/pprime_p.c: Use MPN_MOD_OR_MODEXACT_1_ODD.
+
+       * gmp.texi (Formatted Output Strings): %a and %A are C99 not glibc.
+       (Formatted Input Strings): Type "l" is for double too.  Hex floats are
+       accepted for mpf_t.
+       (Formatted Input Functions): Describe tightened parse rule, clarify
+       return value a bit.
+
+       * scanf/doscan.c: Add hex floats, tighten matching to follow C99, for
+       instance "0x" is no longer acceptable to "%Zi".
+       Rename "invalid" label to avoid "invalid" variable, SunOS cc doesn't
+       like them the same.
+       * tests/misc/t-scanf.c: Update tests.
+       * tests/misc/t-locale.c (check_input): Don't let "0x" appear from fake
+       decimal point.
+
+       * config.guess (sparc*-*-*): Look at BSD sysctl hw.model to recognise
+       ultrasparcs.
+
+       * mpfr/tests/dummy.c: New file.
+       * mpfr/tests/Makefile.am (libfrtests_a_SOURCES): Add it.
+
+2002-12-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpbsd/Makefile.am (nodist_libmpbsd_la_SOURCES): Move these mpz
+       sources to libmpbsd_la_SOURCES directly, automake 1.7.2 now gets the
+       ansi2knr setups right for sources in other directories.
+
+       * mpfr/tests/Makefile.am: Add libfrtests.a in preparation for new mpfr.
+
+2002-12-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/Makefile.am (mpfr_TEXINFOS, AM_MAKEINFOFLAGS): Allow for
+       fdl.texi in recent mpfr.
+
+       * configure.in (AC_PROG_EGREP): Ensure this is run outside the Cray
+       conditional AC_EGREP_CPP.
+
+       * configure.in (alpha*-*-*): Use gcc -Wa,-oldas if it works, to avoid
+       problems with new compaq "as" on OSF 5.1.
+
+       * mpn/Makefile.am (EXTRA_DIST): Remove Makeasm.am, automake 1.7.2 does
+       it automatically.
+
+       * acinclude.m4 (AC_LANG_FUNC_LINK_TRY(C)): Remove this hack, fixed by
+       autoconf 2.57.
+
+       * configure.in (AC_CONFIG_LIBOBJ_DIR): Set to mpfr, for the benefit of
+       new mpfr using LIBOBJ.
+
+       * configure.in: (AM_INIT_AUTOMAKE): Use "gnu no-dependencies
+       $(top_builddir)/ansi2knr".
+       * */Makefile.am (AUTOMAKE_OPTIONS): Remove, now in configure.in.
+
+       * configure, config.in, INSTALL.autoconf: Update to autoconf 2.57.
+       * */Makefile.in, configure, aclocal.m4, install-sh, mkinstalldirs:
+       Update to automake 1.7.2.
+
+       * gmp.texi (Build Options): Add hppa64 to cpu types.
+       (ABI and ISA): Add gcc to hppa 2.0.
+       (Debugging): Add maximum debuggability config options.
+       (Language Bindings): Add Arithmos, reported by Johan Vervloet.
+       (Formatted Output Strings): 128 bits is about 40 digits, ll is only
+       for long long not long double.
+       (Formatted Input Strings): ll is only for long long not long double.
+
+       * mpz/divis.c, mpz/divis_ui.c, mpz/cong.c, mpz/cong_ui.c: Allow d=0,
+       under the rule n==c mod d iff exists q satisfying n=c+q*d.
+       * gmp.texi (Integer Division): Describe this.
+       Suggested by Jason Moxham.
+
+2002-12-13  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * randlc2x.c (lc): Remove check for seedn < an, which is now
+       superfluous.  Add ASSERT to ensure it's correct.  Add ASSERT to check
+       precondition of __GMPN_ADD.
+       (gmp_randinit_lc_2exp): Avoid reallocation by allocating one extra bit
+       for both seed and a.  Simplify seedn < p->_cn case.
+
+       * tests/rand/t-lc2exp.c (check_bigs): Test negative seeds.
+
+2002-12-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa32/pa-defs.m4 (PROLOGUE_cpu): Zap spurious argument to `.proc'.
+       Add empty `.callinfo'.
+
+2002-12-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Don't reuse `ret' symbol for a
+       label.
+
+2002-12-11  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (hppa*-*-*): Don't use gcc -mpa-risc-2-0 in ABI=1.0.
+
+       * mpn/pa32/pa-defs.m4: New file, arranging for .proc/.procend.
+       * configure.in (hppa*-*-*): Use it.
+
+       * printf/doprnt.c: Comments on "ll" versus "L".
+
+       * tests/mpz/t-div_2exp.c: Reduce tests, especially the random ones.
+
+2002-12-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/get_d.c (limb2dbl): New macro for conversion to `double'.
+       Define it to something non-trivial for 64-bit hppa.
+       * mpq/get_d.c: Likewise.
+       * mpf/get_d.c: Likewise.
+
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Unroll to save one c/l.
+
+2002-12-09  Kevin Ryde  <kevin@swox.se>
+
+       * tune/Makefile.am: Don't use -static under --disable-static, it tends
+       not to work.
+       * configure.in (ENABLE_STATIC): New AM_CONDITIONAL.
+
+       * gmp-h.in: Use <iostream> instead of <iosfwd> with Compaq C++ in
+       pre-standard I/O mode.
+
+       * tests/mpz/t-jac.c, tests/mpz/t-scan.c: Reduce tests.
+
+2002-12-08  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (*-*-ultrix*): Remove forcible --disable-shared,
+       believe this was a generic problem with libtool, now gone.
+
+2002-12-08  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (USE_LEADING_REGPARM): Disable for PIC code generation.
+
+2002-12-07  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/cxx/t-misc.cc (check_mpq): Use 0/1 for canonical 0 in
+       mpq_cmp_ui calls.
+
+       * configure.in (hppa2.0*-*-*): Pass +O2 instead of +O3 to work around
+       compiler bug with mpfr/tests/tdiv.
+
+2002-12-07  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (hppa2.0*-*-* ABI=2.0n): Make -mpa-risc-2-0 optional.
+       New hppa-level-2.0 test using GMP_HPPA_LEVEL_20 to detect assembler
+       support for 2.0n.
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Add code that provokes an error
+       from gcc -mpa-risc-2-0 if the assembler doesn't know 2.0 instructions.
+       (GMP_HPPA_LEVEL_20): New macro.
+
+2002-12-07  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * gmp-impl.h (gmp_randfnptr_t.randseed_fn) Return void.
+       (LIMBS_PER_ULONG, MPN_SET_UI): New macros.
+       (MPZ_FAKE_UI): Rename couple of parameters.
+
+       * randlc2x.c (gmp_rand_lc_struct): _mp_c and _mp_c_limbs replaced
+       with mpn style _cp and _cn.  All callers changed.
+       (randseed_lc): Fix limbs(seed) > bits_to_limbs(m2exp) case.
+       Remove return value.
+       (gmp_randinit_lc_2exp): Attempt to avoid redundant reallocation.
+
+       * randmt.c (mangle_seed): New function by Kevin.
+       (randseed_mt): Use it instead of mpz_powm, for performance.  Remove
+       return value.  Remove commented out code (an inferior alternative to
+       mpz_export).
+
+       * randsdui.c (gmp_randseed_ui): Use MPZ_FAKE_UI.
+
+       * tests/rand/t-lc2exp.c (check_bigm, check_bigs): New tests.
+       * tests/rand/t-urndmm.c: Add L to constants in calls, for K&R.
+
+2002-12-06  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Remove -g.
+       (hppa*-*-*): Pass -Wl,+vnocompatwarnings with +DA2.0.
+
+2002-12-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64/sqr_diagonal.asm: Remove .entry, .proc, .procend.
+       * mpn/pa64/udiv.asm: Likewise.
+
+2002-12-05  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/pa64/sub_n.asm: Remove space in "sub, db" which gas objects to.
+       * mpn/pa64/*.asm, tune/hppa2.asm: Use ".level 2.0" for 2.0n, since gas
+       doesn't like ".level 2.0N".
+
+       * configure.in (hppa*-*-*): Group path and flags choices, for clarity.
+       (hppa1.0*-*-*): Use gcc -mpa-risc-1-0 when available.
+       (hppa2.0*-*-*): Ditto -mpa-risc-2-0.
+       (*-*-hpux*): Exclude ABI=2.0w for hpux[1-9] and hpux10, rather than
+       the converse of allowing it for hpux1[1-9]; ie. list the bad systems
+       rather than try to guess the good systems.
+       (hppa2.0*-*-*) [ABI=2.0n ABI=2.0w]: Add gcc to likely compilers.
+       (hppa*-*-*) [gcc]: Test sizeof(long) to differentiate a 32-bit or
+       64-bit build of the compiler.
+       (hppa64-*-*): Add this as equivalent to hppa2.0-*-*.
+       * acinclude.m4 (GMP_C_TEST_SIZEOF): New macro.
+
+       * tests/tests.h (ostringstream::str): Must null-terminate
+       ostrstream::str() for the string constructor.
+
+2002-12-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa32/hppa1_1/udiv.asm: Don't wrap symbol to INT64 in L() stuff.
+
+       * longlong.h (mpn_udiv_qrnnd_r based udiv_qrnnd): Fix typo.
+
+       * mpn/powerpc32/powerpc-defs.m4: Define float registers with `f'
+       prefix.
+
+2002-12-04  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Floating-point Functions): Note the mantissa is binary and
+       decimal fractions cannot be represented exactly.  Suggested by Serge
+       Winitzki.
+       (Known Build Problems): Note libtool stripping options when linking.
+       Reported by Vincent Lefevre.
+
+       * acinclude.m4 (GMP_ASM_LABEL_SUFFIX): Don't make an empty result a
+       failure, that's a valid result.
+       (GMP_ASM_GLOBL): Establish this from the host cpu type.
+       (IA64_PATTERN): New macro.
+       (GMP_PROG_EXEEXT_FOR_BUILD, GMP_C_FOR_BUILD_ANSI,
+       GMP_CHECK_LIBM_FOR_BUILD): Remove temporary files created.
+       * configure.in: Use IA64_PATTERN.
+
+2002-12-03  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/hppa.asm: Use config.m4.
+       * tune/hppa2.asm: Likewise.
+       * tune/hppa2w.asm: Likewise.
+
+       * mpn/pa64: Use LDEF.
+
+2002-12-03  Kevin Ryde  <kevin@swox.se>
+
+       * INSTALL: Use return rather than exit in the example programs.
+       Suggested by Richard Dawe.
+
+       * gmp.texi (Build Options): Move non-unix notes to ...
+       (Notes for Particular Systems): ... here.  Mention MS Interix,
+       reported by Paul Leyland.
+       (C++ Interface Random Numbers): Add gmp_randinit_mt to examples.
+
+       * acinclude.m4 (GMP_ASM_LABEL_SUFFIX): Must test empty suffix first,
+       for the benefit of hppa hp-ux.
+       (GMP_ASM_UNDERSCORE): Grep the output of "nm" instead of trying to
+       construct an asm file, and in case of failure fallback on no
+       underscore and a warning.
+
+       * longlong.h (count_leading_zeros, count_trailing_zeros) [ev67, ev68]:
+       Restrict __asm__ ctlz and cttz to __GNUC__.
+
+       * gen-psqr.c (HAVE_CONST, const): New macros.
+
+       * tests/cxx/t-rand.cc (check_randinit): Add gmp_randinit_mt.
+
+2002-12-02  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h: Split popc_limb again, combined version gives too many
+       compiler warnings.
+
+2002-12-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c (div1): Disable unused function.
+
+       * mpz/root.c: Don't include stdlib.h or longlong.h.
+       * mpz/rootrem.c: Likewise.
+
+       * extract-dbl.c: abort => ASSERT_ALWAYS.
+       * mpz/set_d.c: Likewise.
+       * mpn/generic/tdiv_qr.c: Likewise.
+
+       * gen-psqr.c (f_cmp_fraction, f_cmp_divisor): Change parameter to
+       `const void *', to match qsort spec.
+
+2002-12-01  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Integer Division): Fix a couple of @math's for tex.
+       Use @dots in more places.
+
+       * tests/cxx/t-locale.cc: Test non std::locale systems too.
+       * tests/cxx/clocale.c: New file, reinstating what was localeconv.c,
+       and subverting nl_langinfo too.
+       * tests/cxx/Makefile.am (t_locale_SOURCES): Add it.
+
+       * tests/tests.h (ostringstream, istringstream): Provide fakes of these
+       if <sstream> not available.
+       * tests/cxx/t-locale.cc, tests/cxx/t-ostream.cc: Remove <sstream>.
+       * configure.in (AC_CHECK_HEADERS) [C++]: Add <sstream>.
+
+2002-11-30  Torbjorn Granlund  <tege@swox.com>
+
+       * printf/doprnt.c (__gmp_doprnt): Comment out a `break' to shut up
+       compiler warnings.
+
+       * mpn/ia64/invert_limb.asm: Add `many' hints to return insns.
+
+       * mpn/ia64/divrem_1.asm: Allocate more local registers; put b0 in
+       one of them.
+
+       * mpn/ia64/popcount.asm: Properly restore register ar.lc.
+
+       * longlong.h (umul_ppmm) [ia64]: Form both product parts in asm.
+
+       * mpz/bin_uiui.c: Cast umul_ppmm operands.
+
+       * scanf/doscan.c (gmpscan): Remove unused label store_get_digits.
+
+       * gmp-impl.h: #undef MIN and MAX before #defining.
+
+       * mpn/ia64/copyi.asm: Add `;' after bundle declarators.
+       * mpn/ia64/copyd.asm: Likewise.
+
+       * mpn/ia64/divrem_1.asm: Add some syntax to placid the HP-UX assembler.
+
+2002-11-30  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_CHECK_HEADERS): Add nl_types.h.
+       * tests/misc/t-locale.c: Use this, for nl_item on netbsd 1.4.1.
+
+2002-11-29  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/addmul_1.c: Provide prototype for mpn_print.
+       (OPS): Account for function overhead.
+       * tests/devel/{submul_1.c,mul_1.c,add_n.c,sub_n.c}: Likewise.
+
+       * mpn/ia64/addmul_1.asm: Rewrite.
+
+2002-11-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/sqr_diagonal.asm: Don't allocate any registers.
+
+       * mpn/ia64/submul_1.asm: Adapt to Itanium 2.
+
+       * mpn/ia64/mul_1.asm: Fix typo in HAVE_ABI_32 code.
+
+       * mpn/ia64/add_n.asm: Rewrite.
+       * mpn/ia64/sub_n.asm: Rewrite.
+
+2002-11-28  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Use this rather
+       than libdummy.
+       * tests/Makefile.am (EXTRA_libtests_la_SOURCES): Use this for
+       x86call.asm and x86check.c rather than libdummy.
+
+2002-11-27  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-mul.c: Implement reference Karatsuba multiplication.
+       Rewrite testing scheme to run fewer really huge tests.
+
+2002-11-26  Torbjorn Granlund  <tege@swox.com>
+
+       * tests: Decrease repetition count for some of the slowest tests.
+
+       * mpn/ia64/divrem_1.asm: New file.
+
+2002-11-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpfr/tests/tdiv.c: Decrease number of performed tests.
+
+2002-11-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_1.asm: Rewrite.
+
+2002-11-23  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/ia64/README: Add some references.
+
+       * gmp.texi (Build Options): Add itanium and itanium2, mention DocBook
+       and XML from makeinfo, add texinfo top level cross reference.
+       (Integer Division): Try to clarify 2exp functions a bit.
+       (C++ Interface Floats): Giving bad string to constructor is undefined.
+       (C++ Interface Integers, C++ Interface Rationals): Ditto, and show
+       default base in prototype, not the description.
+
+       * config.sub, config.guess, configure.in (itanium, itanium2): New cpu
+       types.
+
+       * tests/misc/t-printf.c, tests/misc/t-scanf.c (check_misc): Suppress
+       %zd test on glibc prior to 2.1, it's not supported.
+
+2002-11-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/copyi.asm: Optimize for Itanium 2.
+       * mpn/ia64/copyd.asm: Likewise.
+
+2002-11-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/sqr_diagonal.asm: New file.
+
+       * mpn/ia64/submul_1.asm: Handle vl == 0 specially.
+
+2002-11-20  Kevin Ryde  <kevin@swox.se>
+
+       * tests/cxx/t-locale.cc: Test with locales imbued into stream, use
+       <sstream>, eliminated some C-isms.  istream tests disabled, not yet
+       locale-ized.
+       * tests/cxx/Makefile.am (t_locale_SOURCES): Remove localeconv.c.
+       * tests/cxx/localeconv.c: Remove file.
+
+       * configure.in (AC_CHECK_TYPES) [C++]: Add std::locale.
+       * printf/doprntf.c: Add decimal point parameter, remove localeconv use.
+       * gmp-impl.h (__gmp_doprnt_mpf): Update prototype, bump symbol to
+       __gmp_doprnt_mpf2 to protect old libgmpxx.
+       * cxx/osmpf.cc: Use this with ostream locale decimal_point facet.
+       * printf/doprnt.c: Ditto, with GMP_DECIMAL_POINT.
+
+       * gmp-h.in: More comments on __declspec for windows DLLs.
+
+       * mpf/set_str.c, scanf/doscan.c: Cast through "unsigned char" for
+       decimal point string, same as input chars.
+
+       * configure.in (AC_CHECK_HEADERS): Add langinfo.h.
+       (AC_CHECK_FUNCS): Add nl_langinfo.
+       * gmp-impl.h (GMP_DECIMAL_POINT): New macro.
+       * mpf/out_str.c, mpf/set_str.c, scanf/doscan.c: Use it, and don't
+       bother with special code for non-locale systems.
+       * tests/misc/t-locale.c: Subvert nl_langinfo too.
+
+       * configure.in, acinclude.m4 (GMP_ASM_X86_GOT_UNDERSCORE): New macro.
+       * mpn/x86/x86-defs.m4 (_GLOBAL_OFFSET_TABLE_): New macro, inserting
+       extra underscore for OpenBSD.
+       * mpn/x86/README (_GLOBAL_OFFSET_TABLE_): Update notes.
+       Reported by Christian Weisgerber.
+
+       * tests/cxx/t-rand.cc (check_randinit): New function, collecting up
+       constructor tests.
+
+       * tests/cxx/t-ostream.cc: Use <sstream> instead of <strstream>, use
+       compare instead of strcmp.
+
+       * gmpxx.h (__gmp_randinit_lc_2exp_size_t): Return type is int.
+
+2002-11-18  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.c (r_string): Use CNST_LIMB with <N>bits, spotted by
+       Torbjorn.
+
+2002-11-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_1.asm: Remove redundant cmp from prologue code.
+       Streamline prologue.
+       * mpn/ia64/addmul_1.asm: Likewise.
+       * mpn/ia64/submul_1.asm: New file.
+       * mpn/ia64/submul_1.c: Remove.
+
+2002-11-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/popham.c: New file, using new faster algorithm.
+       * mpn/generic/popcount.c: Remove.
+       * mpn/generic/hamdist.c: Remove.
+
+       * mpn/ia64/addmul_1.asm: Don't clobber callee-saves register f16.
+       * mpn/ia64/mul_1.asm: Likewise.
+
+       * mpn/ia64/addmul_1.asm: Add pred.rel declarations.  Resolve RAW
+       hazards for condition code registers, duplicating code as needed.  Add
+       prediction to all branches.
+       * mpn/ia64/mul_1.asm: Likewise.
+       * mpn/ia64/add_n.asm: Likewise.
+       * mpn/ia64/sub_n.asm: Likewise.
+       * mpn/ia64/copyi.asm: Likewise.
+       * mpn/ia64/copyd.asm: Likewise.
+
+       * mpn/generic/random2.c: Add a cast to silence some compilers.
+
+2002-11-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm.c: Cap allocation by limiting k to 10 (512 precomputed
+       values).
+
+2002-11-16  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, gmp.texi: Remove powerpc64 ABI=32L, doesn't work and
+       is unlikely to ever do so.
+       * configure.in: Allow ABI=32 for powerpc64.
+       Reported by David Edelsohn.
+
+2002-11-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add addmul_2.c
+       addmul_3.c addmul_4.c addmul_5.c addmul_6.c addmul_7.c addmul_8.c.
+
+       * gmp-h.in (__GMP_DECLSPEC_EXPORT, __GMP_DECLSPEC_IMPORT) [__GNUC__]:
+       Use __dllexport__ and __dllimport__ to keep out of application
+       namespace.
+
+2002-11-14  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h (__gmp_randinit_default_t, __gmp_randinit_lc_2exp_t,
+       __gmp_randinit_lc_2exp_size_t): Use extern "C" { typedef ... }, for
+       the benefit of g++ prior to 3.2.
+
+2002-11-12  Kevin Ryde  <kevin@swox.se>
+
+       * gmpxx.h (gmp_randclass constructors): Patch from Roberto Bagnara to
+       use extern "C" on C function pointer arguments.
+
+2002-11-09  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, Makefile.am, printf/Makefile.am,
+       printf/repl-vsnprintf.c: Handle vsnprintf replacement with C
+       conditionals.
+
+       * acinclude.m4 (AC_LANG_FUNC_LINK_TRY(C)): Workaround troubles recent
+       HP cc +O3 causes for AC_CHECK_FUNCS.
+
+       * gmp.texi (Notes for Particular Systems): Add Sparc app regs.
+       (Debugging): Note gcc -fstack options to detect overflow.
+       (Formatted Output Strings, Formatted Input Strings): Format strings
+       are not multibyte.
+
+2002-11-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/tdiv_qr.c: Remove a bogus assert.
+
+2002-11-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/tdiv_qr.c: Remove two dead mpn_divrem_2 calls.
+
+2002-11-04  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_C_INLINE): Don't define "inline" for C++.
+
+       * demos/expr/expr-impl.h (stdarg.h): Test __DECC same as gmp.h.
+
+       * mpbsd/mtox.c, printf/obprintf.c, printf/obvprintf.c,
+       scanf/vsscanf.c, demos/expr/expr.c, demos/expr/exprf.c,
+       demos/expr/exprfa.c, demos/expr/exprfr.c, demos/expr/exprq.c,
+       demos/expr/exprz.c, demos/expr/exprza.c: Add <string.h> for strlen and
+       memcpy.
+
+2002-11-02  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h: Test __x86_64__ not __x86_64.  Reported by Andreas
+       Jaeger.
+
+       * mpz/import.c, mpz/export.c: Use char* subtract from NULL to get
+       pointer alignment, for the benefit of Cray vector systems.
+
+       * cxx/ismpf.cc: Use <clocale>.
+       * tests/cxx/t-locale.cc: No need to conditionalize <clocale>.
+
+       * scanf/doscan.c: Don't use isascii, rely on C99 ctype.h.
+
+       * gmp.texi (Build Options): Describe CC_FOR_BUILD, cross reference
+       texinfo manual.
+       (ABI and ISA): Add powerpc620 and powerpc630 to powerpc64, add NetBSD
+       and OpenBSD sparc64.
+       (Notes for Package Builds): Cross reference libtool manual.
+       (Notes for Particular Systems): Add OpenBSD to non-MMX versions of gas.
+       (Known Build Problems): Add MacOS X C++ shared libraries.
+
+2002-10-31  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h, tune/speed.c, tune/speed.h, tune/common.c, tune/many.pl,
+       tests/devel/try.c, tests/tests.h, tests/refmpn.c (mpn_addmul_5,
+       mpn_addmul_6, mpn_addmul_7, mpn_addmul_8): Add testing and measuring.
+       * configure.in (config.in): Add #undefs of HAVE_NATIVE_mpn_addmul_5,
+       HAVE_NATIVE_mpn_addmul_6, HAVE_NATIVE_mpn_addmul_7,
+       HAVE_NATIVE_mpn_addmul_8.
+       (gmp_mpn_functions_optional): Add addmul_5 addmul_6 addmul_7 addmul_8.
+
+       * tests/devel/try.c (ASSERT_CARRY): Remove, now in gmp-impl.h
+       (try_one): Do dest setups after sources, for benefit of
+       dst0_from_src1.
+
+2002-11-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/tdiv_qr.c: Avoid quadratic behaviour for
+       sub-division when numerator is more than twice the size of the
+       denominator.  Simplify loop logic for the same case.  Clean up a
+       few comments.
+
+2002-10-29  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (*-cray-unicos*): Pass -hnofastmd again.
+
+2002-10-25  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/tadd.c: Disable test of denorms.
+
+2002-10-23  Linus Nordberg  <linus@swox.se>
+
+       * gmp.texi (Introduction to GMP): Update section about mailing
+       lists.
+
+2002-10-23  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (__GMP_ATTRIBUTE_PURE): Suppress this when
+       __GMP_NO_ATTRIBUTE_CONST_PURE is defined.
+       * gmp-impl.h (ATTRIBUTE_CONST): Ditto.
+       * tune/common.c: Use __GMP_NO_ATTRIBUTE_CONST_PURE.
+
+       * tune/speed.h, tune/many.pl: Remove ATTRIBUTEs from prototypes.
+       * tune/speed.h: Remove various "dummy" variables attempting to keep
+       "pure" calls live, no longer necessary.  They weren't sufficient for
+       recent MacOS cc anyway.
+
+2002-10-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/addmul_1.c: Handle overlap as in mul_1.c.
+       * mpn/cray/ieee/submul_1.c: Likewise.
+
+2002-10-19  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (sparcv9 etc -*-*bsd*): Add support for NetBSD and
+       OpenBSD sparc64.  Reported by Christian Weisgerber.
+       (AC_CHECK_HEADERS): Add sys/param.h for sys/sysctl.h on *BSD.
+
+       * demos/calc/calc.y: Change ={ to {, needed for bison 1.50.
+
+       * longlong.h (count_leading_zeros, count_trailing_zeros) [x86_64]:
+       Should be UDItype.
+
+       * mpz/set_str.c, mpf/set_str.c, mpbsd/xtom.c, scanf/sscanffuns.c: Cast
+       chars through "unsigned char" to zero extend, required by C99 ctype.h.
+
+2002-10-18  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-root.c: Test also mpz_rootrem.
+
+       * mpn/generic/rootrem.c: Avoid overflow problem when n is huge.
+
+       * mpz/root.c: Avoid overflow problems in allocation computation; also
+       simplify it.  Misc cleanups.
+
+       * mpz/rootrem.c: New file.
+       * Makefile.am, mpz/Makefile.am, gmp-h.in: Add them.
+
+2002-10-17  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (popc_limb): Combine variants.
+
+2002-10-14  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_CHECK_HEADERS): Add sys/time.h for sys/resource.h
+       test, needed by SunOS, and next autoconf will insist headers actually
+       compile.
+
+2002-10-08  Kevin Ryde  <kevin@swox.se>
+
+       * tune/time.c (speed_time_init): Allow for Cray times() apparently
+       being a cycle counter.
+
+       * dumbmp.c (mpz_get_str): Fix buf size allocation.
+
+       * tests/trace.c, tests/tests.h (mp_limb_trace): New function.
+
+       * tune/speed-ext.c (SPEED_EXTRA_PROTOS): Use __GMP_PROTO.
+       * tests/devel/try.c (malloc_region): Add a cast for SunOS cc.
+
+       * configure.in (AC_CHECK_FUNCS): Add strerror.
+       (AC_CHECK_DECLS): Add sys_errlist, sys_nerr.
+       * tune/time.c, tests/devel/try.c: Use them.
+
+2002-10-05  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_CHECK_HEADERS): Test float.h, not in SunOS cc.
+       * printf/repl-vsnprintf.c: Use this.
+
+       * configure.in (*sparc*-*-*): Collect up various options for clarity,
+       use gcc -mcpu=supersparc and ultrasparc3, use cc -xchip, don't use
+       -xtarget=native, use cc configs with acc, merge SunOS bundled cc and
+       SunPRO cc configs.
+
+       * gmp-impl.h (gmp_randfnptr_t): Use __GMP_PROTO.
+       (MPZ_REALLOC): Cast _mpz_realloc return value to mp_ptr, for the
+       benefit of SunOS cc which requires pointers of the same type on the
+       two legs of a ?:.
+
+       * dumbmp.c (mpz_realloc): Add a cast to avoid a warning from SunOS cc.
+
+       * acinclude.m4: Allow for i960 b.out default cc output.
+
+       * gmp.texi (Random State Initialization): Add gmp_randinit_mt.
+       (Perfect Square Algorithm): Describe new mpn_mod_34lsub1 use.
+       (Factorial Algorithm): Describe Jason's new code.
+       (Binomial Coefficients Algorithm): Ideas about improvements
+       moved to doc/projects.html.
+       (Contributors): Add Jason Moxham and Pedro Gimeno.
+
+2002-10-03  Kevin Ryde  <kevin@swox.se>
+
+       * gen-psqr.c: New file.
+       * Makefile.am, mpn/Makefile.am: Use it to generate mpn/perfsqr.h.
+       * mpn/generic/perfsqr.c: Use generated data, put mod 256 data into
+       limbs to save space, use mpn_mod_34lsub1 when good.
+       * tests/mpn/t-perfsqr.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add it.
+       * tests/mpz/t-perfsqr.c (check_modulo): New test.
+       (check_sqrt): New function holding current tests.
+
+       * configure.in (AC_INIT): Modernize to package name and version here
+       rather than AM_INIT_AUTOMAKE, add bug report email.
+       (AC_CONFIG_SRCDIR): New macro.
+
+       * gmp-impl.h (ROUND_UP_MULTIPLE): Fix for non-power-of-2 moduli (not
+       normal in current uses), clarify the comments a bit.
+
+2002-09-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makeasm.am (.s.lo): Add --tag=CC for the benefit of CCAS!=CC,
+       same as .S.lo and .asm.lo.
+
+       * Makefile.am (gen-fac_ui, gen-fib, gen-bases): Quote source files in
+       test -f stuff to avoid Sun make rewriting them.
+
+2002-09-28  Kevin Ryde  <kevin@swox.se>
+
+       * tests/devel/try.c, tune/speed.c: Avoid strings longer than C99
+       guarantees.
+
+       * tests/refmpn.c, tests/tests.h (refmpn_zero_extend, refmpn_normalize,
+       refmpn_sqrtrem): New functions.
+       * tests/devel/try.c (TYPE_SQRTREM): Use refmpn_sqrtrem.
+       (compare): Correction to tr->dst_size subscripting.
+
+       * dumbmp.c: Add several new functions, allow for initial n<d in
+       mpz_tdiv_q (now in mpz_tdiv_qr actually).
+
+       * gen-bases.c (chars_per_limb): Get GMP_NUMB_BITS for base==2,
+       similarly other powers of 2, which this was in the past.
+       * tests/refmpn.c (refmpn_chars_per_limb): Ditto.
+       * tests/mpn/t-mp_bases.c: Test chars_per_limb for power-of-2 bases too.
+
+       * Makefile.am, mpz/Makefile.am: Setups for gen-fac_ui.c generating
+       mpz/fac_ui.h.
+
+2002-09-28  Jason Moxham <J.L.Moxham@maths.soton.ac.uk>
+
+       * dumbmp.c (mpz_pow_ui, mpz_addmul_ui, mpz_root): New functions.
+       * gen-fac_ui.c: New file.
+       * mpz/fac_ui.c: Rewrite.
+
+2002-09-26  Kevin Ryde  <kevin@swox.se>
+
+       * tests/cxx/localeconv.c: New file, split from t-locale.cc.
+       * tests/cxx/t-locale.cc: Use it.
+       * tests/cxx/Makefile.am (t_locale_SOURCES): Add it.
+
+       * tests/cxx/Makefile.am: Updates for Gerardo's new test programs.
+
+2002-09-26  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h (__gmp_cmp_function): Bug fixes in double/mpq and
+       double/mpfr comparisons.
+
+       * tests/cxx/t-assign.cc, tests/cxx/t-binary.cc, tests/cxx/t-constr.cc,
+       tests/cxx/t-ternary.cc, tests/cxx/t-unary.cc: Revise and add various
+       tests, including some for mpfr, some split from t-expr.cc.
+       * tests/cxx/t-locale.cc: Modernize include files.
+       * tests/cxx/t-ostream.cc: Modernize include files, use cout rather
+       than printf for diagnostics.
+       * tests/cxx/t-misc.cc, tests/cxx/t-rand.cc: New file, split from
+       t-allfuns.cc.
+       * tests/cxx/t-ops.cc: New file, some split from t-allfuns.cc.
+       * tests/cxx/t-prec.cc: New file.
+       * tests/cxx/t-allfuns.cc, tests/cxx/t-expr.cc: Remove files.
+
+2002-09-25  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (*-cray-unicos*): Remove -hscalar0, it causes too much
+       performance loss.  Let's trust Cray to fix their compilers.
+
+2002-09-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/add_n.asm: Rewrite.
+       * mpn/powerpc32/sub_n.asm: Rewrite.
+
+2002-09-24  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * randlc2x.c: Prepare for nails by changing type of _mp_c to mpz_t,
+       make _mp_seed fixed-size, disallow SIZ(a)==0 to optimize comparisons
+       for mpn_mul.
+       * gmp-impl.h (MPZ_FAKE_UI): New macro.
+
+       * randmt.c: Some constants made long for K&R compliance; remove UL at
+       the end of other constants; use mp_size_t where appropriate; use
+       mpz_export to split the seed.
+
+       * gmp-impl.h: Remove type cast in RNG_FNPTR and RNG_STATE, to allow
+       them to be used as lvalues.
+       * randclr.c, randlc2x.c, randmt.c, randsd.c: All callers changed.
+
+       * mpz/urandomm.c: Replace mpn_cmp with MPN_CMP.
+
+       * tests/rand/gen.c: Get rid of gmp_errno.
+
+2002-09-24  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Custom Allocation): Keep allocate_function etc out of the
+       function index by using @deftypevr.
+       More index entries.
+
+2002-09-24  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h (mpfr_class constructors from strings): Precision was set
+       incorrectly, fixed.
+
+2002-09-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/urandomb.c: Don't crash for overlarge nbits argument.
+       Let nbits==0 mean to fill number with random bits.
+
+2002-09-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/mod_34lsub1.asm: Add r31 dummy operand to `br' instruction.
+
+2002-09-20  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h (__gmp_binary_equal, __gmp_binary_not_equal): Fix broken
+       mpq/double functions.
+
+2002-09-18  Torbjorn Granlund  <tege@swox.com>
+
+       * randmt.c (randget_mt): Fix typo.
+
+2002-09-18  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (_gmp_rand): Avoid evaluating "state" more than once, for
+       the benefit places calling it with RANDS.
+
+       * randmt.c (randseed_mt): Use mpz_init for mod and seed1, for safety.
+
+       * tune/tuneup.c (sqr_karatsuba_threshold): Initialize to
+       TUNE_SQR_KARATSUBA_MAX so mpn_sqr_n works for randmt initialization.
+
+       * gmp.texi (Integer Comparisons): Remove mention of non-existent
+       mpz_cmpabs_si, reported by Conrad Curry.
+
+       * tune/speed.c, tune/speed.h, tune/common.c: Add gmp_randseed,
+       gmp_randseed_ui and mpz_urandomb.
+
+2002-09-18  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * tests/rand/gen.c: Add mt, remove lc and bbs.
+
+       * Makefile.am (libgmp_la_SOURCES): Add randmt.c, remove randlc.c and
+       randraw.c.
+
+       * randmt.c: New file.
+       * gmp-h.in (gmp_randinit_mt): Add prototype.
+       * randdef.c: Use gmp_randinit_mt.
+
+       * gmp-impl.h (RNG_FNPTR, RNG_STATE): New macros.
+       (gmp_randfnptr_t): New structure.
+       (_gmp_rand): Now a macro not a function.
+       * gmp-h.in (__gmp_randata_lc): Remove, now internal to randlc2x.c.
+       (__gmp_randstate_struct): Revise comments on field usage.
+       * randsd.c, randclr.c: Use function pointer scheme.
+       * randsdui.c: Use gmp_randseed.
+       * randraw.c: Remove file.
+       * randlc2x.c: Collect up lc_2exp related code from randsd.c, randclr.c
+       and randraw.c, use function pointer scheme, integrate seed==0/a==0
+       into main case and fix case where bits(a) < m2exp.
+
+       * randlc.c: Remove file, never documented and never worked.
+       * gmp-h.in (gmp_randinit_lc): Remove prototype.
+
+2002-09-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/mod_34lsub1.asm: New file.
+
+2002-09-16  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, acinclude.m4 (GMP_C_RESTRICT): Remove this, not
+       currently used, and #define restrict upsets Microsoft C headers on
+       win64.  Reported by David Librik.
+
+       * configure.in (x86): Add gcc 3.2 -march and -mcpu flags, remove some
+       unnecessary -march=i486 fallbacks.
+
+       * gmp.texi (Notes for Particular Systems): Note cl /MD is required for
+       Microsoft C and MINGW to cooperate on I/O.  Explained by David Librik.
+       (Language Bindings): Add linbox.
+       * gmp.texi (Language Bindings):
+
+2002-09-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/aorsmul_i.c: Allow for w==x overlap with nails.  Test
+       BITS_PER_ULONG > GMP_NUMB_BITS rather than GMP_NAIL_BITS != 0.
+       * tests/mpz/t-aorsmul.c: Test this.
+
+       * tune/common.c: mpn_mod_34lsub1 only exists for GMP_NUMB_BITS%4==0
+       * tune/speed.c: Add mpn_mod_34lsub1.
+
+2002-09-10  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * rand.c: Remove old disabled BBS code.
+       * mpf/urandomb.c: Use BITS_TO_LIMBS.
+
+2002-09-10  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Multiplication Algorithms): FFT is now enabled by default.
+
+2002-09-10  Pedro Gimeno  <pggimeno@wanadoo.es>
+
+       * mpz/urandomm.c: Use mpn level functions, avoid an infinite loop if
+       _gmp_rand forever returns all "1" bits.
+       * tests/rand/t-urndmm.c: New file
+       * tests/rand/Makefile.am (check_PROGRAMS): Add it.
+
+       * gmp-impl.h (BITS_TO_LIMBS): New macro.
+       * mpz/urandomb.c: Use it, and use MPZ_REALLOC.
+
+2002-09-08  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_GCC_WA_MCPU): New macro.
+       * configure.in (alpha*-*-*): Use it to avoid -Wa,-mev67 if gas isn't
+       new enough to know ev67.  Reported by David Bremner.
+
+2002-07-30  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h (__gmpz_value etc): Remove, use mpz_t etc instead.
+       (__gmp_expr): Reorganise specializations, use __gmp_expr<T,T> not
+       mpz_class etc.
+       (mpfr evals): Remove mode parameter, was always
+       __gmp_default_rounding_mode anyway.
+
+2002-09-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, mp-h.in: Use #ifdef for tests, for the benefit of
+       applications using gcc -Wundef.
+
+       * longlong.h: Define COUNT_LEADING_ZEROS_NEED_CLZ_TAB for all alphas,
+       since mpn/alpha/cntlz.asm always goes into libgmp.so, even for ev67
+       and ev68 which don't need it.  Reported by David Bremner.
+
+       * gmp.texi (Demonstration Programs): New section, expanding on what
+       was under "Build Options".
+       (Converting Floats): Don't need \ for _ in @var within @math.
+       Add and amend various index entries.
+
+       * demos/qcn.c: Add -p prime limit option.
+
+2002-08-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/pprime_p.c: Handle small negatives with isprime, in particular
+       must do so for n==-2.
+       * tests/mpz/t-pprime_p.c: New file.
+       * tests/mpz/Makefile.am: Add it.
+
+2002-08-26  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp.texi (Converting Floats): Fix typo in mpf_get_d_2exp docs,
+       reported by Paul Zimmermann.
+
+2002-08-26  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Echo the ABI being tried for the compilers.
+       (powerpc*-*-*): Use powerpc64/aix.m4 for ABI=aix64 too.
+       (AC_CHECK_FUNCS): Add strtol, for tests/rand/gen.c.
+
+2002-08-24  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (HAVE_HOST_CPU_, HAVE_HOST_CPU_FAMILY_, HAVE_NATIVE_):
+       Setup templates for these using AH_VERBATIM rather than acconfig.h,
+       preferred by latest autoconf.  Prune lists to just things used.
+       * acconfig.h: Remove file.
+
+       * mpn/powerpc32/mode1o.asm: Forgot ASM_START.
+
+       * tune/time.c (have_cgt_id): Renamed from HAVE_CGT_ID so avoid
+       confusion with autoconf outputs, and turn it into a "const" variable.
+
+2002-08-23  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Choose powerpc32/aix.m4 or powerpc64/aix.m4 based on
+       ABI, not configuration triple.
+
+       * mpz/pprime_p.c: Partially undo last change--handle small and
+       negative numbers in the same test.
+
+2002-08-22  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD): Note
+       mpn/generic/mul_fft.c is not nails-capable, and don't bother setting
+       other FFT data for nails.
+
+       * configfsf.guess: Update to 2002-08-19.
+       * configfsf.sub: Update to 2002-08-20.
+
+       * config.guess (powerpc*-*-*): Use a { } construct to suppress SIGILL
+       message on AIX.
+
+2002-08-20  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Add ia64 under cpu types.
+       (ABI and ISA): Describe IRIX 6 ABI=o32.
+       (Notes for Particular Systems): Remove -march=pentiumpro, now ok.
+       (Known Build Problems): Binutils 2.12 is ok for libgmp.a.
+       (Emacs): New section.
+       (Language Bindings): Update MLton URL, reported by Stephen Weeks.
+       (Prime Testing Algorithm): New section.
+       Don't put a blank line after @item in @table since it can make a page
+       break between the heading and the entry.
+       Misc tweaks elsewhere, in particular more index entries.
+
+       * mpz/millerrabin.c: Need x to be size+1 for change to urandomm.
+
+       * gmp-impl.h: Comments on the use of __GMP_DECLSPEC.
+
+       * tune/time.c (freq_measure_mftb_one): Use struct_timeval, for the
+       benefit of mingw.
+
+       * tests/refmpn.c, tests/tests.h (ref_addc_limb, ref_subc_limb):
+       Renamed from add and sub, following gmp-impl.h ADDC_LIMB and SUBC_LIMB.
+
+2002-08-17  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc32/mode1o.asm: New file.
+       * configure.in, acinclude.m4 (GMP_ASM_POWERPC_PIC_ALWAYS): New macro.
+       * mpn/asm-defs.m4: Use it to help setting up PIC.
+
+       * configure.in (AC_PREREQ): Bump to 2.53.
+
+       * mpn/powerpc32/powerpc-defs.m4 (ASSERT): New macro.
+       (PROLOGUE_cpu): New macro, giving ALIGN(4) not 8.
+
+2002-08-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/m68k/lshift.asm: Fix typo in !scale_available_p code.
+       * mpn/m68k/rshift.asm: Likewise.
+
+2002-08-16  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (--enable-profiling=instrument): New option.
+       * gmp.texi (Profiling): Describe it.
+       * mpn/x86/x86-defs.m4 (PROLOGUE_cpu, call_instrument, ret_internal):
+       Add support.
+       (call_mcount): Share PIC setups with call_instrument.
+       * mpn/x86/*.asm: Use ret_internal.
+       * mpn/asm-defs.m4 (m4_unquote): New macro.
+       * tests/mpn/t-instrument.c: New file.
+       * tests/mpn/Makefile.am: Add it.
+
+       * mpn/alpha/umul.asm: Add ASM_END.
+
+2002-08-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/pprime_p.c: Fake up a local mpz_t to take abs(n), rather than
+       using mpz_init etc.
+
+       * mpz/millerrabin.c: Use mpz_urandomm for uniform selection of x,
+       reported by Jason Moxham.  Exclude x==n-1, ie. -1 mod n.  Use
+       gmp_randinit_default.
+
+       * mpn/alpha/umul.asm: Use "r" registers, for the benefit of Unicos.
+
+       * tests/devel/try.c: Add mpn_copyi and mpn_copyd.
+
+2002-08-09  Kevin Ryde  <kevin@swox.se>
+
+       * Makefile.am: Remove configure.lineno from DISTCLEANFILES and gmp.tmp
+       from MOSTLYCLEANFILES, automake does these itself now.
+
+       * */Makefile.in, aclocal.m4, configure, install-sh, missing,
+       mkinstalldirs: Update to automake 1.6.3.
+
+       * mpn/ia64/README: Some notes on assembler syntax.
+
+       * mpn/ia64/add_n.asm, mpn/ia64/sub_n.asm: Add .body.
+       * mpn/ia64/add_n.asm, mpn/ia64/addmul_1.asm, mpn/ia64/mul_1.asm,
+       mpn/ia64/sub_n.asm: Position .save ar.lc just before relevant
+       instruction.
+       * mpn/ia64/addmul_1.asm, mpn/ia64/mul_1.asm: Add .save ar.pfs and pr.
+       * mpn/ia64/copyd.asm, mpn/ia64/copyi.asm: Correction to .body position.
+       * mpn/ia64/lorrshift.asm: Add .prologue stuff.
+
+       * configure.in (*-*-unicos*): Remove forcible --disable-shared,
+       libtool gets this right itself now.
+
+2002-08-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/mmx/hamdist.asm: New file, adapted from
+       mpn/x86/pentium/mmx/popham.asm.
+       * mpn/x86/pentium/mmx/popham.asm: Remove file, not faster than plain
+       mpn/x86/pentium/popcount.asm for the popcount.
+
+       * mpn/alpha/umul.asm: Use PROLOGUE/EPILOGUE, rename it mpn_umul_ppmm.
+       * configure.in (alpha*-*-*): Add umul to extra_functions.
+
+       * mpz/remove.c: Make src==0 return 0, not do DIVIDE_BY_ZERO.
+
+2002-08-05  Torbjorn Granlund  <tege@swox.com>
+
+       * acconfig.h: Remove spurious undefs for mpn_divrem_newton and
+       mpn_divrem_classic.
+
+2002-08-05  Kevin Ryde  <kevin@swox.se>
+
+       * tests/refmpn.c, tests/tests.h, tests/misc/t-printf.c,
+       tests/mpf/t-trunc.c, tests/mpn/t-mp_bases.c, tests/mpn/t-scan.c,
+       tests/mpq/t-cmp_ui.c, tests/mpz/bit.c, tests/mpz/t-aorsmul.c,
+       tests/mpz/t-powm_ui.c tests/mpz/t-root.c, tests/mpz/t-scan.c: More
+       care with long and mp_size_t parameters, for the benefit of K&R.
+
+       * demos/perl/GMP.pm, demos/perl/GMP.xs, demos/perl/GMP/Mpz.pm,
+       demos/perl/test.pl: Add mpz_import and mpz_export.
+       * demos/perl/GMP.pm: Remove "preliminary" warning.
+
+       * mpn/lisp/gmpasm-mode.el: Set add-log-current-defun-header-regexp to
+       pick up m4 defines etc.
+
+       * Makefile.am (libgmpxx_la_DEPENDENCIES): libgmp.la should be here,
+       not libgmpxx_la_LIBADD, for the benefit of "make -j2".
+
+       * mpn/ia64/*.asm [hpux ABI=32]: Extend 32-bit operands to 64-bits, not
+       optimal and might not be sufficient, but seems to work.
+
+2002-08-03  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Profiling): Use a table and expand for clarity.
+       (Integer Special Functions): New section for mpz_array_init,
+       _mpz_realloc, mpz_getlimbn and mpz_size, to discourage their use.
+
+       * configure.in (*-*-msdosdjgpp*): Remove forcible --disable-shared,
+       libtool gets this right itself now.
+
+2002-07-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc32/lshift.asm, mpn/powerpc32/rshift.asm: Lose final mr,
+       and make final stwu into an stw.
+
+       * gmp.texi (Known Build Problems): An easier workaround for DESTDIR,
+       using LD_LIBRARY_PATH.
+       (C++ Interface MPFR): Remove mpfrxx.h.
+
+       * mpfrxx.h: Remove file.
+       * Makefile.am: Remove mpfrxx.h.
+       * tests/cxx/Makefile.am: Add Gerardo's new test programs.
+
+2002-07-30  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h: Use mpz_addmul etc for ternary a+b*c etc.  Reorganise some
+       macros for maintainability.  Merge mpfrxx.h.
+       * tests/cxx/t-constr.cc, tests/cxx/t-expr.cc: Various updates.
+       * tests/cxx/t-assign.cc, tests/cxx/t-binary.cc,
+       tests/cxx/t-ternary.cc, tests/cxx/t-unary.cc: New files.
+
+2002-07-27  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (count_trailing_zeros) [ia64 __GNUC__]: Don't use
+       __builtin_ffs for now, doesn't seem to work.
+
+       * configure.in: Establish CONFIG_SHELL to avoid a problem with
+       AC_LIBTOOL_SYS_MAX_CMD_LEN on ia64-*-hpux*.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_GCD_FINDA): Don't let calls to
+       mpn_gcd_finda go dead.
+
+       * mpn/generic/tdiv_qr.c: Inline mpn_rshift and MPN_COPY of 2 limbs.
+
+2002-07-24  Kevin Ryde  <kevin@swox.se>
+
+       * demos/primes.c: Use __GMP_PROTO and don't use signed, for the
+       benefit of K&R.
+
+       * demos/calc/calclex.l: Add <string.h> for strcmp.
+
+       * mpn/ia64/invert_limb.asm: Use .rodata which works on ia64-*-hpux*
+       and should be standard, rather than worrying about RODATA.
+
+       * gmp.texi (Function Classes): Add cross references.
+       (Integer Import and Export): Fix return value grouping.
+
+       * mpn/lisp/gmpasm-mode.el (gmpasm-comment-start-regexp): Add // for
+       ia64.  Add notes on what the various styles are for.
+
+       * mpn/ia64/default.m4 (ASM_START): Define to empty, not dnl, so as not
+       to kill text on the same line.
+       (EPILOGUE_cpu): Force a newline after "#", so as not to suppress macro
+       expansion in the rest of the EPILOGUE line.
+
+2002-07-21  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.h: Fix some missing _PROTOs.
+
+       * Makefile.am (DISTCLEANFILES): Add configure.lineno.
+
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Define
+       HAVE_DOUBLE_IEEE_BIG_ENDIAN and HAVE_DOUBLE_IEEE_LITTLE_ENDIAN in
+       config.m4 too.
+       * mpn/ia64/invert_limb.asm: Add big-endian data.
+
+       * tests/mpz/t-jac.c (try_si_zi): Correction to "a" parameter type.
+
+2002-07-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/bin_ui.c, mpz/jacobi.c, mpz/pprime_p.c, mpn/generic/divis.c:
+       More care with long and mp_size_t parameters, for the benefit of K&R.
+
+       * gmp-impl.h (invert_limb): Use parens around macro arguments.
+       (mpn_invert_limb): Give prototype and define unconditionally.
+
+       * gmp-impl.h (CACHED_ABOVE_THRESHOLD, CACHED_BELOW_THRESHOLD): New
+       macros.
+       * mpn/generic/sb_divrem_mn.c: Use them to help gcc let preinv code go
+       dead when not wanted.
+
+2002-07-17  Kevin Ryde  <kevin@swox.se>
+
+       * tests/refmpz.c (refmpz_hamdist): Ensure mp_size_t parameters are
+       that type, for the benefit of hpux ia64 bundled cc ABI=64.
+
+       * configure.in (ia64*-*-hpux*): Need +DD64 in cc_64_cppflags to get
+       the right headers for ansi2knr.
+
+       * acinclude.m4 (GMP_TRY_ASSEMBLE, GMP_ASM_UNDERSCORE): Use $CPPFLAGS
+       with $CCAS and when linking, as done by the makefiles.
+       (GMP_ASM_X86_MMX, GMP_ASM_X86_SSE2): Show $CPPFLAGS in diagnostics.
+
+       * gmp-impl.h (ieee_double_extract): Setup using HAVE_DOUBLE_IEEE_*.
+       (GMP_UINT32): New define, 32 bit type for ieee_double_extract.
+       * configure.in: Add AC_CHECK_SIZEOF unsigned.
+       * configure.in, acinclude.m4 (GMP_IMPL_H_IEEE_FLOATS): Remove.
+       (GMP_C_DOUBLE_FORMAT): Instead warn about unknown float here.
+
+       * configure.in, acinclude.m4 (GMP_C_SIZES): Remove.
+       * acinclude.m4 (GMP_INCLUDE_GMP_H_BITS_PER_MP_LIMB): Remove this
+       scheme, not required.
+       * configure.in (unsigned long, mp_limb_t): Run AC_CHECK_SIZEOF for
+       these unconditionally, check mp_limb_t against gmp-mparam.h values.
+       * gmp-impl.h (BYTES_PER_MP_LIMB, BITS_PER_MP_LIMB): Define based on
+       SIZEOF_MP_LIMB_T if not provided by gmp-mparam.h.
+       (BITS_PER_ULONG): Define here now.
+
+       * gmp.texi (ABI and ISA): Add HP-UX IA-64 choices.
+       (Random State Initialization): Typo in m2exp described for
+       gmp_randinit_lc_2exp_size.
+       (Formatted Output Functions): Clarify gmp_obstack_printf a bit.
+       (Formatted Input Strings): Typo in %n summary.
+
+       * mpz/inp_raw.c (NTOH_LIMB_FETCH): Use simple generic default, since
+       endianness detection is now cross-compile friendly.
+       * mpz/out_raw.c (HTON_LIMB_STORE): Ditto.
+
+       * mpz/fib_ui.c: Nailify.
+       * mpz/random.c: Nailify.
+
+       * mpfr/acinclude.m4 (MPFR_CONFIGS): Patch by Vincent for an apparent
+       float rounding gremlin on powerpc.
+
+2002-07-15  Kevin Ryde  <kevin@swox.se>
+
+       * Makefile.am (PRINTF_OBJECTS): Avoid ending in a backslash, hpux ia64
+       make doesn't like that.
+
+       * mpn/ia64/*.asm: Add .sptk to unconditional branches, add ";" after
+       .mib etc, for the benefit of hpux.
+
+       * configure.in (ia64*-*-*): Use ABI=64 on non-HPUX systems, for
+       consistency.
+
+       * gmp-impl.h (ieee_double_extract): Test __sparc__, used by gcc 3.1.
+       Reported by nix@esperi.demon.co.uk.
+       * mpfr/mpfr-math.h (_MPFR_NAN_BYTES etc): Ditto.
+
+2002-07-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc32/rshift.asm: Rewrite, transformed from lshift.asm.
+
+       * tune/tuneup.c (DIVEXACT_1_THRESHOLD, MODEXACT_1_ODD_THRESHOLD):
+       Always zero for native mpn_divexact_1, mpn_modexact_1_odd.
+
+       * gmp-h.in (__GMP_EXTERN_INLINE): Don't use this during configure,
+       ie. __GMP_WITHIN_CONFIGURE, to avoid needing dependent routines.
+       * acinclude.m4 (GMP_H_EXTERN_INLINE): Consequent changes.
+
+       * gmp-impl.h, mpn/asm-defs.m4 (mpn_addmul_2, mpn_addmul_3,
+       mpn_addmul_4): Add prototypes and defines.
+
+       * gmp.texi (Number Theoretic Functions): Clarify return value.
+       Reported by Peter Keller.
+
+2002-07-10  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, acinclude.m4 (GMP_PROG_LEX): Remove this in favour of
+       AM_PROG_LEX, now ok when lex is missing.
+
+       * longlong.h (count_leading_zeros) [pentiummmx]: Don't use __clz_tab
+       variant under LONGLONG_STANDALONE.
+       (count_trailing_zeros) [ia64 __GNUC__]: Use __builtin_ffs.
+
+       * gmp-impl.h (popc_limb): Add an ia64 asm version.
+       (DItype): Use HAVE_LONG_LONG to choose long long, avoiding _LONGLONG
+       which is in gcc but means something unrelated in MS Visual C 7.0.
+       Reported by David Librik.
+
+       * mpz/divexact.c: Add an ASSERT that den divides num.
+
+       * mpn/asm-defs.m4 (LDEF): New macro.
+       (INT32, INT64): Use it.
+       * mpn/pa32/*.asm: Use it.
+       * mpn/pa32/README: Update notes on labels.
+
+       * tests/refmpn.c, tests/tests.h, tests/t-bswap.c (ref_bswap_limb):
+       Renamed from refmpn_bswap_limb.
+       * tests/t-bswap.c: Add tests_start/tests_end for randomization.
+
+       * tests/refmpn.c, tests/tests.h (ref_popc_limb): New function.
+       * tests/t-popc.c: New file.
+       * tests/Makefile.am: Add it.
+
+       * mpn/ia64/invert_limb.asm: Use RODATA since ".section .rodata" is not
+       accepted by ia64-*-hpux*.
+
+       * acinclude.m4 (GMP_ASM_BYTE): New macro.
+       (GMP_ASM_ALIGN_LOG, GMP_ASM_W32): Use it.
+       (GMP_ASM_LABEL_SUFFIX): Use test compiles, not $host.
+       (GMP_ASM_GLOBL): Ditto, and add .global for ia64-*-hpux*.
+       (GMP_ASM_GLOBL_ATTR): Use GMP_ASM_GLOBL result, not $host.
+       (GMP_ASM_LSYM_PREFIX): Allow any "a-z" nm symbol code, add ".text" to
+       test program, required by ia64-*-hpux*.
+       (GMP_ASM_LABEL_SUFFIX): Make LABEL_SUFFIX just the value, not a "$1:",
+       the former being how it's currently being used in fact.
+
+       * configure.in, acinclude.m4 (GMP_PROG_CC_WORKS_LONGLONG): New macro.
+       * configure.in (ia64-*-hpux*): Add 32 and 64 bit ABI modes.
+
+2002-07-06  Kevin Ryde  <kevin@swox.se>
+
+       * tests/cxx/t-allfuns.cc: New file.
+       * tests/cxx/Makefile.am: Add it.
+
+       * mpz/clrbit.c, mpz/setbit.c: Only MPN_NORMALIZE if high limb changes
+       to zero.  Use _mpz_realloc return value.
+
+       * gmp.texi (Build Options, C++ Formatted Output, C++ Formatted Input):
+       Cross reference to Headers and Libraries for libgmpxx stuff.
+       (Low-level Functions): mpn_divexact_by3 result based on GMP_NUMB_BITS.
+       mpn_set_str takes "unsigned char *", reported by Mark Sofroniou.
+       (C++ Interface General): Describe linking with libgmpxx and libgmp.
+
+2002-07-01  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c, gmp-impl.h: Eliminate the array of thresholds in
+       one(), tune just one at a time and let the callers hand dependencies.
+       Eliminate the second_start_min hack, handle SQR_KARATSUBA_THRESHOLD
+       oddities in tune_sqr() instead.
+
+       * mpn/pa64/umul.asm, mpn/pa64/udiv.asm, mpn/asm-defs.m4, acconfig.h,
+       longlong.h, tune/speed.c, tune/speed.h, tune/common.c, tune/many.pl,
+       tests/devel/try.c: Introduce mpn_umul_ppmm_r and mpn_udiv_qrnnd_r
+       rather than having variant parameter order for mpn_umul_ppmm and
+       mpn_udiv_qrnnd on pa64.
+
+       * gmp-h.in (mpz_export): Remove a spurious parameter name.
+       * gmp-impl.h (mpn_rootrem): Use __MPN.
+
+2002-06-29  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (udiv_qrnnd) [hppa32]: Remove mpn_udiv_qrnnd version, the
+       general mechanism for that suffices.
+
+       * mpf/inp_str.c: Fix returned count of chars read, reported by Paul
+       Zimmermann.  Also fix a memory leak for invalid input.
+       * tests/mpf/t-inp_str.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+
+       * tests/devel/try.c (mpn_mod_34lsub1): Only exists for
+       GMP_NUMB_BITS%4==0.
+       (SIZE2_FIRST): Respect option_firstsize2 for "fraction" case.
+
+       * mpn/generic/diveby3.c: Further nailifications.
+       * gmp-impl.h (MODLIMB_INVERSE_3): Allow for GMP_NUMB_BITS odd.
+       (GMP_NUMB_CEIL_MAX_DIV3, GMP_NUMB_CEIL_2MAX_DIV3): New constants.
+       * tests/t-constants.c: Check them.
+
+       * gmp-h.in (__GMP_CRAY_Pragma): New macro.
+       (__GMPN_COPY_REST): Use it.
+       * gmp-impl.h (CRAY_Pragma): Use it.
+
+2002-06-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/import.c, mpz/export.c: Cast data pointer through "char *" in
+       alignment tests, for the benefit of Cray vector systems.
+
+       * configure.in (x86-*-*): Remove -march=pentiumpro check, seems ok
+       with current code.
+       * acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO, GMP_GCC_VERSION_GE): Remove
+       macros, no longer needed
+
+       * acinclude.m4 (GMP_ASM_RODATA): Remove temporary files.
+
+       * configure.in (GMP_ASM_GLOBL_ATTR): Reposition to avoid duplication
+       through AC_REQUIRE.
+
+2002-06-23  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-fib_ui.c (check_fib_table): Check table values, not just
+       that they're non-zero.
+
+       * acinclude.m4 (GMP_GCC_ARM_UMODSI): Match bad "gcc --version" output
+       exactly, rather than parsing it with GMP_GCC_VERSION_GE.
+       (GMP_ASM_UNDERSCORE): Use GLOBL_ATTR.
+
+       * mpn/pa32/udiv.asm, mpn/pa32/hppa1_1/udiv.asm, mpn/pa64/udiv.asm:
+       Renamed from udiv_qrnnd.asm, for consistency with other udiv's.
+       * mpn/pa64/umul.asm: Renamed from umul_ppmm.asm likewise.
+       * configure.in (hppa*-*-*): Update extra_functions.
+       (NAILS_SUPPORT): Remove umul_ppmm, udiv_qrnnd, udiv_fp, udiv_nfp from
+       nails-neutral list, no longer needed.
+
+       * gmp-h.in (__DECC): Add notes on testing this for ANSI-ness.
+       (__GMP_EXTERN_INLINE): Add static __inline for DEC C.
+       (mpz_mod_ui): Move up to main section, it's still documented.
+
+2002-06-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/jacobi.c, mpz/kronsz.c, mpz/kronuz.c, mpz/kronzs.c,
+       mpz/kronzu.c: Allow for odd GMP_NUMB_BITS, tweak a few variable setups.
+       * gmp-impl.h (JACOBI_STRIP_LOW_ZEROS): New macro.
+
+       * mpn/generic/mod_34lsub1.c: Nailify.
+       * tests/devel/try.c (CNST_34LSUB1): Nailify.
+       * gmp-impl.h (ADDC_LIMB): New macro.
+
+       * gmpxx.h (mpf_class::get_str): Make exponent mp_exp_t&, default
+       base=10 and ndigits=0.
+       (mpz_class::set_str, mpq_class::set_str, mpf_class::set_str): Add
+       versions accepting "const char *".
+       * mpfrxx.h (mpfr_class::get_str, mpfr_class::set_str): Ditto, and
+       uncommenting set_str and operator=.
+       * gmp.texi (C++ Interface Integers, C++ Interface Rationals)
+       (C++ Interface Floats): Update.
+
+       * gmp-impl.h (modlimb_invert): Merge the <=64bits and general versions.
+       (const, signed): Move to near top of file, fixes --enable-alloca=debug
+       on K&R.
+
+       * gen-fib.c: New file, derived from mainline in mpn/generic/fib2_ui.c.
+       * dumbmp.c (mpz_init_set_ui): New function.
+       * Makefile.am, mpn/Makefile.am: Generate fib_table.h, mpn/fib_table.c.
+       * gmp-impl.h: Use fib_table.h, add __GMP_DECLSPEC to __gmp_fib_table
+       (for the benefit of tests/mpz/t-fib_ui.c).
+       * mpn/generic/fib2_ui.c: Remove __gmp_fib_table and generating code.
+
+       * Makefile.am: Add mp.h to BUILT_SOURCES, distclean all BUILT_SOURCES,
+       use += more.
+
+       * acinclude.m4 (GMP_ASM_M68K_INSTRUCTION, GMP_ASM_M68K_BRANCHES):
+       Don't let "unknown" get into the cache variables.
+       (GMP_ASM_TEXT): See what assembles, don't hard-code hpux and aix.
+       (GMP_PROG_EXEEXT_FOR_BUILD): Add ,ff8 for RISC OS, per autoconf cvs.
+       (GMP_PROG_CPP_FOR_BUILD): Restructure per AC_PROG_CPP, print correct
+       result if CPP_FOR_BUILD overrides the cache variable.
+       (GMP_PROG_CC_FOR_BUILD_WORKS): New macro split from
+       GMP_PROG_CC_FOR_BUILD.  Allow for "conftest" default compiler output.
+       * configure.in, acinclude.m4 (GMP_PROG_HOST_CC): Reinstate this,
+       separating HOST_CC establishment from GMP_PROG_CC_FOR_BUILD.
+
+       * configure.in (mpn_objs_in_libgmp): Move mpn/mp_bases.lo ...
+       * Makefile.am (MPN_OBJECTS): ... to here, add $U, and arrange
+       MPN_OBJECTS to be common between libgmp and libmp.
+
+2002-06-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c (TOOM3_MUL_REC, TOOM3_SQR_REC): Don't check if
+       basecase is to be invoked when *_TOOM3_THRESHOLD is more than 3 times
+       the corresponding *_THRESHOLD.
+
+2002-06-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/ia64/submul_1.c: Add missing TMP_DECL, TMP_MARK, TMP_FREE.
+       Reported by Paul Zimmermann.
+
+       * configure.in, acinclude.m4 (AC_DEFINE): Make templates read "Define
+       to 1", for clarity as per autoconf.
+       * acinclude.m4 (GMP_OPTION_ALLOCA): Group WANT_TMP templates.
+
+2002-06-20  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h, mpfrxx.h: Remove mpz_classref, let mpq_class::get_num and
+       mpq_class::get_den return mpz_class& as per the documentation.
+       Reported by Roberto Bagnara.
+
+2002-06-18  Kevin Ryde  <kevin@swox.se>
+
+       * tests/rand/t-lc2exp.c: New file.
+       * tests/rand/Makefile.am: Add it, and use tests/libtests.la.
+
+       * randraw.c (lc): Pad seed==0 case with zero limbs, return same
+       (m2exp+1)/2 bits as normal, right shift "c" result as normal.
+
+       * configure.in: Don't bother with line numbers in some diagnostics.
+       (*-*-mingw*): Use -mno-cygwin if it works, suggested by delta trinity.
+
+       * tests/mpz/Makefile.am, tests/mpq/Makefile.am,
+       tests/misc/Makefile.am, (CLEANFILES): Set to *.tmp for test program
+       temporaries, to get t-scanf.tmp and reduce future maintenance.
+
+2002-06-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_dc_get_str): Pass scratch memory area in
+       new `tmp' parameter.  Trim allocation needs by reusing input parameter.
+
+2002-06-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/udiv.asm: New file.
+
+2002-06-15  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_GCC_VERSION_GE): Correction to recognising mingw
+       gcc 3.1 version number.  Reported by Jim Fougeron.
+
+       * configure.in (AC_PROVIDE_AC_LIBTOOL_WIN32_DLL): New define, to make
+       AC_LIBTOOL_WIN32_DLL work with autoconf 2.53.
+
+       * acinclude.m4 (GMP_C_SIZES): Establish BITS_PER_MP_LIMB as a value,
+       not an expression, for the benefit of the gen-bases invocation.
+
+       * config.guess (CC_FOR_BUILD): Try c99, same as configfsf.guess.
+
+2002-06-15  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpfr/set_q.c: Allow for 1 bit numerator or denominator.
+
+2002-06-14  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_C_BIGENDIAN): Use new style action parameters.
+
+       * randlc2x.c: Allow for a<0, allow for c>=2^m2exp.
+       * randraw.c (lc): Allow for a==0.
+
+       * mpn/sparc32/udiv.asm: Renamed from udiv_fp.asm.  Don't know if float
+       is the best way for v7, but it's what configure has chosen since gmp 3.
+       * configure.in (*sparc*-*-* ABI=32): extra_functions="udiv" for all,
+       in particular sparc32/v8/udiv.asm is faster (on ultrasparc2) than
+       udiv_fp previously used for v9 chips.
+
+       * gen-bases.c: New file, derived from mpn/mp_bases.c.
+       * dumbmp.c: New file, mostly by Torbjorn, some by me.
+       * configure.in, acinclude.m4 (GMP_PROG_CC_FOR_BUILD,
+       GMP_PROG_CPP_FOR_BUILD, GMP_PROG_EXEEXT_FOR_BUILD,
+       GMP_C_FOR_BUILD_ANSI, GMP_CHECK_LIBM_FOR_BUILD): New macros.
+       (GMP_PROG_HOST_CC): Remove, superceded by GMP_PROG_CC_FOR_BUILD.
+       * Makefile.am: Run gen-bases to create mp_bases.h and mpn/mp_bases.c.
+       * gmp-impl.h: Use mp_bases.h.
+       * mpn/mp_bases.c: Remove file.
+       * mpn/Makefile.am: mp_bases.c now in nodist_libmpn_la_SOURCES.
+
+       * tests/mpz/t-cmp_d.c (check_one_2exp): Use volatile to force to
+       double, fixes gcc 3.1 with -O4.  Reported by Michael Lee.
+       * configure.in (AC_C_VOLATILE): New macro.
+
+       * tests/misc/t-scanf.c: (fromstring_gmp_fscanf): Add missing va_end.
+       Don't mix varargs and fixed args functions, not good on x86_64.
+       Reported by Marcus Meissner.
+
+       * Makefile.am (EXTRA_DIST): Remove mpfr/README, now in mpfr/Makefile.in
+
+       * configure, config.in, INSTALL.autoconf: Update to autoconf 2.53.
+       * */Makefile.in, install-sh, mdate-sh, missing, aclocal.m4, configure:
+       Update to automake 1.6.1.
+       * configfsf.guess, configfsf.sub: Update to 2002-05-29.
+
+2002-06-12  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_GCC_VERSION_GE): Recognise mingw gcc 3.1 version.
+       (GMP_PROG_CC_WORKS): Allow for a_out.exe, as per autoconf.
+       (GMP_GCC_NO_CPP_PRECOMP, GMP_ASM_UNDERSCORE): Ditto, plus a.exe.
+
+2002-06-09  Torbjorn Granlund  <tege@swox.com>
+
+       * randraw.c (lc): Remove broken ASSERT_ALWAYS.
+
+       * mpn/x86: Update gmp-mparam.h files with current measures *_THRESHOLD
+       values.
+       * mpn/x86/p6/mmx/gmp-mparam.h: New file.
+
+2002-06-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/*/gmp-mparam.h (USE_PREINV_DIVREM_1): Add tuned settings.
+
+       * acconfig.h (HAVE_NATIVE_mpn_preinv_divrem_1): New template.
+
+       * tests/refmpn.c, tests/tests.h (refmpn_chars_per_limb,
+       refmpn_big_base): New functions.
+       * tests/mpn/t-mp_bases.c: Use them, and don't test big_base_inverted
+       unless it's being used.
+
+       * gmp.texi (Notes for Particular Systems): Using Microsoft C with DLLs.
+       (Known Build Problems): Notes on MacOS and GCC.
+       (Integer Logic and Bit Fiddling): Use ULONG_MAX for maximum ulong.
+       (Low-level Functions): mpn_get_str accepts base==256.
+       (Formatted Output Functions): Note output is not atomic.
+       (Internals): Note mp_size_t for limb counts.
+
+       * mp-h.in, gmp-h.in (mp_ptr, mp_srcptr, mp_size_t, mp_exp_t): Remove
+       these types from mp.h, not needed.
+
+       * mpfr/tests/tadd.c, mpfr/tests/tmul.c (check): Apply a hack to the
+       parameter order to make sparc gcc 2.95.2 happy.
+
+       * doc/configuration: Notes on bootstrapping.
+
+2002-06-08  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/tests/tsqrt.c, mpfr/tests/tsqrt_ui.c: Suppress tests if sqrt is
+       not affected by mpfr_set_machine_rnd_mode.
+
+       * mpfr/mul_2si.c: Workaround a mips gcc 2.95.3 bug under -O2 -mabi=n32.
+
+       * configure.in (alphev56): Fix to use ev5 path.
+
+2002-06-06  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in: Use __gmp_const not const, in a number of places.
+
+       * configure.in (sparc): Use ABI=32 instead of ABI=standard on v7 and
+       v8, for consistency with v9 choices.
+       (sparc64): Restrict GMP_ASM_SPARC_REGISTER to ABI=64.
+       (x86): Move MMX $path munging to before printout.
+       (CCAS): Move upward to support this.
+
+       * gmp-impl.h (modlimb_invert): Merge macros for specific limb sizes,
+       add a version for arbitrary limb size, use GMP_NUMB_BITS.
+       (modlimb_invert, MODLIMB_INVERSE_3): Fix comments to say GMP_NUMB_BITS.
+
+       * gmp-h.in (__GMP_LIKELY, __GMP_UNLIKELY): New macros.
+       (mpz_getlimbn, mpz_perfect_square_p, mpz_popcount): Use them, make the
+       fetch or mpn call likely, unconditionally calculate the alternative so
+       as to avoid an "else" clause.
+       * gmp-impl.h (LIKELY, UNLIKELY): Aliases.
+
+       * configure.in, mpfr/tests/Makefile.am: Add $LIBM to $LIBS for
+       MPFR_CONFIGS so it detects fesetround, and let it go through to
+       $MPFR_LIBS.
+       * mpfr/rnd_mode.c: Use gmp-impl.h to get MPFR_HAVE_FESETROUND.
+
+       * tests/mpz/t-sizeinbase.c: Disable fake bits test, such pointer
+       setups are bogus and have been seen failing on hppa.
+
+       * tests/misc.c, tests/refmpz.c, tests.tests.h, tests/mpz/t-cong.c:
+       Rename mpz_flipbit to refmpz_combit and move from misc.c to refmpz.c.
+
+2002-06-05  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-powm_ui.c Print proper routine name in error message.
+
+2002-06-03  Kevin Ryde  <kevin@swox.se>
+
+       * tune/time.c, tune/freq.c, tune/speed.h: Add powerpc mftb support.
+       (FREQ_MEASURE_ONE): Move to speed.h, fix tv_sec factor.
+       (freq_measure): Use for mftb measuring too.
+       * tune/powerpc.asm, tune/powerpc64.asm: New files.
+       * configure.in, tune/Makefile.am: Add them.
+
+       * gmp-impl.h (popc_limb): Add versions for Cray and fallback for
+       arbitrary limb size.
+
+       * mpn/sparc32/sparc-defs.m4: New file.
+       * configure.in (sparc*-*-*): Use it.
+       * acinclude.m4 (GMP_ASM_SPARC_REGISTER): New macro.
+       * configure.in (sparc64): Use it.  Also, use -Wc,-m64 for linking.
+       * mpn/sparc64/add_n.asm, mpn/sparc64/addmul_1.asm,
+       mpn/sparc64/copyd.asm, mpn/sparc64/copyi.asm, mpn/sparc64/lshift.asm,
+       mpn/sparc64/mul_1.asm, mpn/sparc64/rshift.asm,
+       mpn/sparc64/sqr_diagonal.asm, mpn/sparc64/sub_n.asm,
+       mpn/sparc64/submul_1.asm: Use REGISTER for .register.
+
+2002-06-01  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/powm_ui.c: Fix for result range in certain circumstances.
+
+       * mpn/x86/k6/diveby3.asm: Speedup to 10 c/l, same as divexact_1.
+       Anomaly pointed out by Alexander Kruppa.
+
+2002-05-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/export.c: Cast pointer via `unsigned long' when checking
+       alignment to avoid compiler warnings.
+
+2002-05-29  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (BSWAP_LIMB): Versions for m68k, powerpc, and arbitrary
+       limb size.
+       * configure.in, acconfig.h (HAVE_HOST_CPU_FAMILY_m68k): New define.
+
+2002-05-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_basecase.c: Improve MAX_LEFT handling, returning
+       when possible.  Add code for mpn_addmul_5 and mpn_addmul_6.
+
+2002-05-25  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c: Misc nailifications, and disable preinv thresholds
+       with nails.
+       * tune/speed.h: Use GMP_NUMB_HIGHBIT with mpn_sb_divrem_mn and
+       mpn_divrem_2.
+       * mpz/powm.c (redc): Nailify q.
+
+       * tests/mpn/t-scan.c: Reduce the amount of testing, to go faster.
+
+2002-05-23  Torbjorn Granlund  <tege@swox.com>
+
+       * Version 4.1 released.
+
+       * mpn/alpha/ev6/nails/gmp-mparam.h: New file.
+
+       * tests/devel/add_n.c (refmpn_add_n): Nailify.
+       * tests/devel/sub_n.c (refmpn_sub_n): Nailify.
+       * tests/devel/addmul_1.c (refmpn_addmul_1): Nailify.
+       * tests/devel/submul_1.c (refmpn_submul_1): Nailify.
+
+       * mpn/alpha/ev6/nails/add_n.asm: New file.
+       * mpn/alpha/ev6/nails/sub_n.asm: New file.
+       * mpn/alpha/ev6/nails/mul_1.asm: New file.
+       * mpn/alpha/ev6/nails/submul_1.asm: New file.
+
+2002-05-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/nails/addmul_1.asm: New file.
+
+       * mpz/inp_str.c (mpz_inp_str_nowhite): Nailify.
+
+       * mpn/generic/mul_basecase.c: Update pointers before conditional
+       MAX_LEFT break statements.
+
+2002-05-21  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-gcd.c: Test mpz_gcd_ui.
+
+       * mpz/lcm_ui.c: Nailify.
+
+       * mpz/gcd_ui.c: Nailify.  Make it work as documented, allowing
+       NULL to be passed for result parameter.  Fix gcd(0,0) case.
+
+       * mpz/set_str.c: Nailify.
+
+       * randlc2x.c (gmp_randinit_lc_2exp): Nailify.
+
+       From Jakub Jelinek:
+       * longlong.h (add_ssaaaa,sub_ddmmss) [64-bit sparc]:
+       Make it actually work.
+
+2002-05-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/ui_div.c: Shut up compiler warning.
+
+       * mpn/generic/mul_basecase.c: Use mpn_addmul_2, mpn_addmul_3, and
+       mpn_addmul_4, as available.
+
+       * mpn/alpha/ev6/nails/addmul_2.asm: Adjust NAILS_SUPPORT decls.
+       * mpn/alpha/ev6/nails/addmul_3.asm: Likewise
+       * mpn/alpha/ev6/nails/addmul_4.asm: Likewise.
+
+       * configure.in (*-cray-unicos*): Back again to -hscalar0.
+       (gmp_mpn_functions_optional): Add mul_3, mul_4, addmul_2, addmul_3,
+       and addmul_4.
+       * acconfig.h: Add #undefs for new optional mpn functions.
+
+2002-05-18  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Integer Import and Export): Mention Cray unfilled words.
+
+       * mpz/set_d.c, mpq/set_d.c: Use LIMBS_PER_DOUBLE for the output of
+       __gmp_extract_double.  Reported by Henrik Johansson.
+
+2002-05-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/nails/addmul_2.asm: New file.
+       * mpn/alpha/ev6/nails/addmul_3.asm: New file.
+       * mpn/alpha/ev6/nails/addmul_4.asm: New file.
+
+       * mpn/generic/dump.c: Rewrite and nailify.
+
+2002-05-16  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/Makefile.am (EXTRA_DIST): Add BUGS file.
+
+2002-05-15  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (*-cray-unicos*): Remove -hscalar0, add -hnofastmd
+       as workaround for compiler bug.
+       (mips64*-*-*): Pass just -O1 to cc, to work around compiler bug.
+
+2002-05-14  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (*-cray-unicos*): Pass -hscalar0 to work around
+       compiler bug for mpz/import.c.
+
+2002-05-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/import.c: Cast pointer via `unsigned long' when checking
+       alignment to avoid compiler warnings.
+
+       * mpn/generic/rootrem.c: Adjust allocation of qp temporary area.
+
+2002-05-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/import.c: Corrections to size store, special case tests, and
+       general case ACCUMULATE.
+       * tests/mpz/t-import.c, tests/mpz/t-export.c: More test data.
+
+2002-05-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/rootrem.c: Use temp space for root, copy value in place
+       before returning.
+       * mpz/root.c: Don't allocate extra limb for root value.
+       * mpz/perfpow.c: Undo last change.
+
+2002-05-08  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (powerpc BSWAP_LIMB_FETCH): Rename local variable to make
+       it not clash with caller.
+
+       * mpn/generic/rootrem.c: New file.
+       * configure.in (gmp_mpn_functions): Add rootrem and pow_1.
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add rootrem.c and
+       pow_1.c
+       * gmp-impl.h (mpn_rootrem): Add declaration.
+       * mpz/perfpow.c: Amend allocations for mpn_rootrem requirements.
+       * mpz/root.c: Rewrite to use mpn_rootrem.
+
+2002-05-08  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (MUL_KARATSUBA_THRESHOLD etc): Remove forced nail values.
+
+       * mpf/fits_u.h, mpf/fits_s.h, tests/mpf/t-fits.c: Ignore fraction
+       part, making the code match the documentation.
+
+       * gmpxx.h (struct __gmp_binary_minus): Use mpz_ui_sub.
+
+2002-05-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc32/README: New file.
+
+       * mpz/root.c: Use unsigned long with mpz_sub_ui not mp_limb_t.
+
+       * tune/README: Misc updates including sparc32/v9 smoothness, low res
+       timebase, and mpn_add_n operand overlaps.
+       * tune/many.pl: Add udiv.asm support.
+
+       * gmp.texi (Build Options): A couple of --build better as --host.
+       (Known Build Problems, Notes for Package Builds): Add DESTDIR problem.
+       (Compatibility with older versions): Compatible with 4.x versions.
+       (Converting Integers): Remove mpz_get_ui + mpz_tdiv_q_2exp decompose.
+       (Integer Import and Export): New section.
+       (Miscellaneous Integer Functions): Clarify mpz_sizeinbase returns 1
+       for operand of 0.
+       (Language Bindings): Add GNU Pascal.
+       (Low-level Functions): Add GMP_NUMB_MAX.
+
+       * tests/mpz/t-import.c, tests/mpz/t-export.c, tests/mpz/t-get_d.c:
+       New tests.
+       * tests/mpz/Makefile.am: Add them.
+
+       * mpz/import.c, mpz/export.c: New files.
+       * Makefile.am, mpz/Makefile.am, gmp-h.in: Add them.
+
+       * gmp-h.in, gmp-impl.h (GMP_NUMB_MAX): Move to gmp.h.
+       * gmp-impl.h (CNST_LIMB): Add cast to mp_limb_t to ensure unsigned.
+       (CRAY_Pragma, MPN_REVERSE, MPN_BSWAP, MPN_BSWAP_REVERSE,
+       ASSERT_ALWAYS_LIMB, ASSERT_ALWAYS_MPN): New macros.
+       (MPZ_CHECK_FORMAT): Use ASSERT_ALWAYS_MPN.
+
+2002-05-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/aors_ui.h: Nailify.
+
+       * tests/mpz/t-addsub.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-addsub.
+
+       * mpz/ui_sub.c: New file.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add ui_sub.c.
+       * Makefile.am (MPZ_OBJECTS): Ditto.
+       * gmp-h.in (mpz_ui_sub): Add declaration.
+
+       * gmp-impl.h (MPZ_REALLOC): Rewrite to allow the use of _mpz_realloc
+       return value.
+
+       * gmp-h.in (mpn_pow_1): Add declaration.
+
+       * mpn/generic/pow_1.c: Handle exp <= 1.  Reverse rp/tp parity scheme
+       for bn == 1 arm.
+
+       * Rename MP_LIMB_T_HIGHBIT => GMP_LIMB_HIGHBIT.
+
+2002-05-06  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (main): Don't call mpz_sizeinbase with negative base.
+
+       * randraw.c (lc): Remove an unused variable.
+
+       * mpn/generic/get_str.c: Clarify an algorithm description.
+
+       * tests/mpf/t-trunc.c: Nailify.
+       * tests/mpf/t-set_si.c: Disable for nails.
+
+       * mpf/cmp_si.c: Nailify.
+       * mpf/cmp_ui.c: Nailify.
+       * mpf/div.c: Nailify.
+       * mpf/div_2exp.c: Nailify.
+       * mpf/div_ui.c: Nailify.
+       * mpf/eq.c: Nailify.
+       * mpf/get_d.c: Nailify.
+       * mpf/get_d_2exp.c: Nailify.
+       * mpf/get_si.c: Nailify.
+       * mpf/get_str.c: Nailify.
+       * mpf/get_ui.c: Nailify.
+       * mpf/mul_2exp.c: Nailify.
+       * mpf/random2.c: Nailify.
+       * mpf/set_q.c: Nailify.
+       * mpf/set_si.c: Nailify.
+       * mpf/set_str.c: Nailify.
+       * mpf/set_ui.c: Nailify.
+       * mpf/sub.c: Nailify.
+       * mpf/ui_div.c: Nailify.
+       * mpf/ui_sub.c: Nailify.
+       * mpf/urandomb.c: Nailify.
+
+       * gmp-impl.h (__GMPF_BITS_TO_PREC, __GMPF_PREC_TO_BITS): Nailify.
+
+       * mpz/get_si.c: Misc variable name changes.
+
+       * mpf/fits_u.h: Rewrite - nailify.
+       * mpf/fits_s.h: Likewise.
+
+       * mpz/mod.c: Disambiguate if-statament with extra {}.
+
+       * mpf/int_p.c: Fix type of size variables.
+       * mpf/get_ui: Likewise.
+       * mpf/get_si: Likewise.
+       * mpq/equal.c: Likewise.
+       * mpq/get_d.c: Likewise.
+       * mpz/cmp_d.c: Likewise.
+       * mpz/cmpabs_d.c: Likewise.
+       * mpz/divis_2exp.c: Likewise.
+       * mpz/kronuz.c: Likewise.
+       * mpz/kronzu.c: Likewise.
+       * mpz/kronzs.c: Likewise.
+       * mpz/kronsz.c: Likewise.
+       * mpz/scan0.c: Likewise.
+       * mpz/scan1.c: Likewise.
+       * mpz/tstbit.c: Likewise.
+       * mpz/cong_2exp.c: Likewise.
+       * mpz/divis.c: Likewise.
+
+2002-05-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcd.c: Additional nailify changes.
+
+2002-05-04  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (__GNU_MP_VERSION): Set to 4.1.
+       * Makefile.am (-version-info): Bump for new release.
+
+2002-04-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divrem_1.c: Additional nailify changes.
+       * mpn/generic/mod_1.c: Likewise.
+
+       * tests/mpq/t-get_d.c: Print floats with all 16 digits.
+
+       * mpq/get_d.c: Nailify.
+
+       * tests/mpq/t-set_f.c: Disable for nails.
+
+       * mpz/get_d.c: Nailify.
+
+       * gmp-impl.h (LIMBS_PER_DOUBLE, MP_BASE_AS_DOUBLE): Nailify.
+
+       * gmp-h.in (__GMPZ_FITS_UTYPE_P): Cast maxval to before shifting it.
+
+       * extract-dbl.c: Nailify.
+
+2002-04-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpq/md_2exp.c (mord_2exp): Nailify.
+
+       * mpq/cmp_ui.c: Nailify.
+
+       * mpq/cmp.c (mpq_cmp): Nailify.
+
+       * mpn/generic/gcd.c: Nailify.  GNUify code layout.
+
+       * mpn/generic/gcdext.c: Nailify.  Misc changes.
+
+       * tests/mpz/t-sqrtrem.c: Let argv[1] mean # of repetitions.
+       * tests/mpz/t-gcd.c: Likewise.
+
+       * mpz/gcd.c: Nailify.
+
+       * mpn/generic/random.c: Nailify.
+
+       * gmp-impl.h (modlimb_invert): Nailify.
+
+2002-04-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c (div2): Remove qh parameter.
+       (mpn_gcdext): Streamline double-limb code.
+       Move GCDEXT_THRESHOLD check to after initial division.
+
+2002-04-27  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (JACOBI_MOD_OR_MODEXACT_1_ODD): Allow for odd
+       GMP_NUMB_BITS.
+
+       * tune/time.c (sgi_works_p): Allow for 64-bit counter, and fix
+       SGI_CYCLECNTR_SIZE handling.
+
+       * demos/expr/exprfr.c: Add nan and inf constants.
+       * demos/expr/t-expr.c: Exercise them.
+
+2002-04-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/cmp_ui.c: Fix overflow conditions for nails.
+
+       * gmp-h.in (mpz_get_ui): Fix typo from last change.
+
+       * mpz/n_pow_ui.c: Adjust allocation for nails.
+       (GMP_NUMB_HALFMAX): Renamed from MP_LIMB_T_HALFMAX.
+       Fix umul_ppmm invocation for for nails.
+
+2002-04-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c: Simplify by using mpn_tdiv_qr instead of
+       mpn_divmod.
+
+2002-04-24  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (*-*-cygwin*): Give a sensible default command line
+       limit, to avoid blowups reported by Jim Fougeron on windows 9x.
+       (--enable-nails): Make the default 2, since mp_bases has data for that.
+
+       * mpfr/mpfr-math.h (__mpfr_nan): Use a "double" for the bytes, to
+       avoid a mis-conversion on alpha gcc 3.0.2.
+       (_MPFR_INFP_BYTES, _MPFR_INFM_BYTES): Should be a zero mantissa.
+
+2002-04-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/dive_ui.c: Fix typo.
+
+       * mpz/fits_s.h: Rewrite.
+
+       * mpz/jacobi.c: Nailify.
+       * mpz/kronuz.c: Additional nailify changes.
+       * mpz/kronsz.c: Likewise.
+
+2002-04-23  Kevin Ryde  <kevin@swox.se>
+
+       * demos/expr/Makefile.am (LDADD): Add $(LIBM) for the benefit of mpfr.
+
+       * mpz/divis_ui.c, mpz/cong_ui.c: Nailify.
+       * mpn/generic/bdivmod.c, mpz/divexact.c, mpz/dive_ui.c: Nailify.
+       * mpn/generic/sb_divrem_mn.c, mpn/generic/divrem.c,
+       mpn/generic/divrem_2.c: Nailify ASSERTs.
+       * mpn/x86/k6/mmx/logops_n.asm, mpn/x86/k6/mmx/com_n.asm: Nailify.
+       * mpz/inp_raw.c, mpz/out_raw.c: Nailify.
+       * mpz/kronzu.c, mpz/kronuz.c, mpz/kronzs.c, mpz/kronsz.c: Nailify.
+       * mpn/generic/divis.c, mpz/cong.c, mpz/cong_2exp.c: Nailify.
+       * gmp-impl.h (NEG_MOD): Nailify.
+
+       * gmp-impl.h, mpn/mp_bases.c: Add back GMP_NUMB_BITS==30 bases data.
+
+       * mpfr/get_d.c: Patch from Paul to avoid problem with constant folding
+       in gcc on OSF.
+
+       * mpn/lisp/gmpasm-mode.el: Remove mention of defunct LF macro.
+
+2002-04-22  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Handle "binomial" operator.
+
+       * mpz/cmp_ui.c: Move assignments of `up' out of conditionals.
+
+       * mpn/generic/gcdext.c: Fix fencepost error in STAT code.
+
+       * gmp-impl.h (mpn_com_n): Nailify.
+
+       * tests/mpz/t-cdiv_ui.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-cdiv_ui.
+       * mpz/cdiv_qr_ui.c: Nailify.
+       * mpz/cdiv_q_ui.c: Nailify.
+       * mpz/cdiv_r_ui.c: Nailify.
+       * mpz/cdiv_ui.c: Nailify.
+
+       * tests/misc/t-printf.c (CHECK_N): Add cast to allow `char' to be an
+       unsigned type.
+       * tests/misc/t-scanf.c: Likewise.
+
+       * mpz/mul_i.h: Rework nails code to handle parameter overlap.
+
+       * tests/mpz/t-set_f.c: Disable for nails.
+
+2002-04-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/set_si.c: Add cast to support LONG_LONG_LIMB.
+       * mpz/iset_si.c: Likewise.
+
+       * mpz/bin_ui.c: Nailify.
+       * mpz/bin_uiui.c: Nailify.
+
+       * mpz/cmpabs_ui.c: Nailify.
+
+       * tests/mpz/t-aorsmul.c: Nailify.
+       * mpz/aorsmul_i.c (mpz_addmul_ui, mpz_submul_ui): Nailify better.
+
+2002-04-20  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-fdiv_ui.c: Check mpz_fdiv_ui.
+       * tests/mpz/t-tdiv_ui.c: Check mpz_tdiv_ui.
+
+       * mpz/tdiv_ui.c: Rewrite nails code.
+       * mpz/fdiv_ui.c: Nailify.
+
+       * tests/mpz/t-tdiv_ui.c: Check returned remainders.
+       * tests/mpz/t-fdiv_ui.c: Merge in recent t-tdiv_ui changes.
+
+       * mpz/tdiv_q_ui.c: Remove spurious TMP_* calls.
+
+       * mpz/fdiv_qr_ui.c: Nailify.
+       * mpz/fdiv_q_ui.c: Nailify.
+       * mpz/fdiv_r_ui.c: Nailify.
+
+       * mpz/get_si.c: Misc nailify changes to shut up compiler warnings.
+
+       * mpz/ui_pow_ui.c: Fix typo in last change.
+
+2002-04-20  Kevin Ryde  <kevin@swox.se>
+
+       * tests/misc/t-printf.c, tests/misc/t-scanf.c: Check all %n types.
+
+       * mpn/x86/k7/mmx/divrem_1.asm, mpn/x86/p6/mmx/divrem_1.asm
+       (mpn_preinv_divrem_1): New entrypoint.
+       (mpn_divrem_1): Avoid a branch when testing high<divisor.
+       * mpn/asm-defs.m4: Add define_mpn(preinv_divrem_1).
+       * configure.in: Allow divrem_1.asm to provide mpn_preinv_divrem_1.
+
+       * gmp-impl.h [nails]: Add #undefs of MUL_KARATSUBA_THRESHOLD etc, to
+       override CPU gmp-mparam.h.  Remove JACOBI_BASE_METHOD override since
+       it's nails-neutral.
+
+       * tests/mpn/t-mp_bases.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add it.
+       * tests/t-constants.c: Move MP_BASES constants checks to it.
+
+       * mpn/mp_bases.c: Fix big_base_inverted values for nails.
+       * gmp-impl.h (MP_BASES_BIG_BASE_INVERTED_10,
+       MP_BASES_NORMALIZATION_STEPS_10): Fix nails values.
+       (MP_BASES_*): Remove GMP_NUMB_BITS == 30 data.
+
+       * mpn/x86/pentium/com_n.asm, mpn/x86/pentium/logops_n.asm: Add
+       NAILS_SUPPORT indicators.
+
+       * configure.in: Grep for NAILS_SUPPORT in cpu-specific code, and look
+       in "nails" subdirectories, print path used.
+       * mpn/asm-defs.m4 (NAILS_SUPPORT): New macro.
+
+       * mpfr/mpfr-test.h: Include config.h, for the benefit of test programs
+       not using gmp-impl.h.
+
+2002-04-19  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-scan.c: Nailify.
+
+       * mpz/tdiv_qr_ui.c: Nailify.
+       * mpz/tdiv_q_ui.c: Nailify.
+       * mpz/tdiv_r_ui.c: Nailify.
+       * mpz/tdiv_ui.c: Nailify.
+
+       * mpz/cmp_ui.c: Nailify.
+
+       * mpz/ui_pow_ui.c: Misc nailify changes to shut up compiler warnings.
+
+       * mpz/scan0.c: Nailify.
+       * mpz/scan1.c: Nailify.
+
+       * tests/mpz/t-sizeinbase.c (mpz_fake_bits): Nailify.
+
+2002-04-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/aorsmul_i.c: Nailify.
+
+       * mpz/cmp_si.c: Nailify (botched).
+
+       * mpz/ui_pow_ui.c: Nailify.
+
+       * gmp-h.in (__GMPZ_FITS_UTYPE_P): Nailify.
+
+       * mpz/fits_s.h: Nailify.
+
+       * tests/mpz/bit.c (check_tstbit): Nailify.
+
+       From Paul Zimmermann:
+       * mpn/generic/sqrtrem.c: Nailify.
+
+       * mpz/n_pow_ui.c: Nailify.
+
+       * mpz/cfdiv_r_2exp.c: Nailify.
+
+       * randraw.c (lc): Undo: Let mpn_rshift put result in place to avoid
+       extra MPN_COPY.
+
+2002-04-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/clrbit.c: Add two GMP_NUMB_MASK masks after addition.
+
+       * mpn/generic/random2.c (LOGBITS_PER_BLOCK): Decrease to 4.
+
+       * gmp-impl.h (nail DIV_DC_THRESHOLD): Decrease to 50 to allow fast
+       division.
+
+       * mpn/generic/random2.c: Nailify.
+
+       * mpz/fac_ui.c: Nailify.
+
+       * mpz/mul_i.h: #if ... #endif code block to shut up gcc warnings.
+
+       * mpn/generic/sqrtrem.c: Adopt to GNU coding standards.
+       (mpn_dc_sqrtrem): New name for mpn_dq_sqrtrem.
+       Partial nailification.
+
+       * configure.in: As a temporary hack, clear extra_functions for nails
+       builds.
+
+       * gmp-h.in (mpz_get_ui): #if ... #endif else code block to shut up gcc
+       warnings.
+
+2002-04-17  Kevin Ryde  <kevin@swox.se>
+
+       * texinfo.tex: Update to 2002-03-26.08 per texinfo 4.2.
+       * gmp.texi: Must have @top in @ifnottex (or @contents doesn't come out
+       in one run).
+
+       * mpn/generic/scan0.c, mpn/generic/scan1.c: Nailify.
+
+       * tests/mpn/t-scan.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add it.
+
+       * tests/refmpn.c, tests/tests.h (refmpn_tstbit): Use unsigned long for
+       bit index.
+       (refmpn_setbit, refmpn_clrbit, refmpn_scan0, refmpn_scan1): New
+       functions.
+
+       * mpfr/cmp_ui.c (mpfr_cmp_si_2exp): Fix b==0 i!=0 case.
+
+2002-04-17  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h, mpfrxx.h: Remove mpfr_class bool combinations, remove
+       mpfr_class::get_str2, use mp_rnd_t for rounding modes, use
+       8*sizeof(double) for mpfr_t's holding doubles.
+
+2002-04-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm.c: Nailify.
+       * mpz/powm_ui.c: Nailify.
+
+2002-04-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/hamdist.c: Nailify.
+       * tests/misc.c (urandom): Nailify.
+
+       * mpz/get_si.c: Nailify.
+       * gmp-h.in (mpz_get_ui): Nailify.  Streamline (and probably upset
+       memory checkers).
+
+       * gmp-impl.h (mp_bases[10] values): Add versions for GMP_NUMB_BITS
+       being 28, 60, and 63.
+       * mpn/mp_bases.c: Add tables for GMP_NUMB_BITS being 28, 60, and 63.
+
+       * mpz/iset_si.c: Nailify.
+       * mpz/iset_ui.c: Nailify
+
+       * tests/mpz/convert.c (main): Print test number in error message.
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Shift up `frac' into nails
+       field after bignum division.
+
+2002-04-16  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, gmp-impl.h (GMP_NAIL_MASK): Move to gmp.h.
+
+       * gmp.texi: Use @documentdescription and @copying, per texinfo 4.2.
+       (Low-level Functions): Clarify mpn_gcd overlap requirements, rewrite
+       mpn_set_str description, add nails section.
+       (C++ Interface General): Remove bool from types that mix with classes.
+       (Language Bindings): Add STklos, GNU Smalltalk, Regina.
+       (Binary to Radix, Radix to Binary): Describe new code.
+       (Assembler Cache Handling): More notes, mostly by Torbjorn.
+
+       * macos/configure (%vars): Remove __GMP from substitutions, per change
+       to main configure.
+
+       * mpn/generic/dive_1.c: Nailify.
+       * mpn/generic/mode1o.c: Nailify, remove bogus ASSERT in commented-out
+       alternate implementation.
+       * gmp-impl.h (SUBC_LIMB): New macro.
+
+       * tests/devel/try.c (validate_divexact_1): Correction to compare.
+       (udiv_qrnnd): New testing.
+       (SHIFT_LIMIT): Nailify.
+       (-b): New option, remove spurious "H" from getopt string.
+
+       * mpz/clrbit.c: Nailify.
+       * tests/mpz/t-hamdist.c: Nailify.
+       * gmp-impl.h (MPN_FIB2_SIZE): Nailify.
+       (PP): Nailify conditionals.
+       * tests/mpz/t-fib_ui.c (MPZ_FIB_SIZE_FLOAT): Nailify.
+
+       * configure.in, acinclude.m4: Establish GMP_NAIL_BITS and
+       GMP_LIMB_BITS for gmp-h.in configure tests.
+
+       * mpfr/*, configure.in: Update to final mpfr 2.0.1.
+       * mpfr/acinclude.m4 (MPFR_CONFIGS): Use $host, not uname stuff.
+       * mpfr/tests/tout_str.c: Patch from Paul for denorm fprintf tests.
+
+2002-04-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divrem_1.c (EXTRACT): Remove.
+
+       * tests/mpz/t-tdiv_ui.c (dump_abort): Accept argument for error string.
+
+       * mpz/rrandomb.c: Nailify.  Needs further work.
+
+       * mpn/generic/mod_1.c: Nailify.
+
+       * gmp-impl.h: Set various *_THRESHOLD values to be used for nails to
+       avoid not yet qualified algorithms.
+       (MPZ_CHECK_FORMAT): Check that nail part is zero.
+
+       * tests/mpz/t-mul.c (main): Test squaring even for huge operands.
+       (base_mul): Nailify.
+       (dump_abort): Accept argument for error string.  Print product
+       difference.
+
+       * mpn/generic/set_str.c: Nailify.
+
+       * gmp-h.in (__GMPN_ADD, __GMPN_SUB): Nailify.
+
+2002-04-14  Torbjorn Granlund  <tege@swox.com>
+
+       * randraw.c (lc): Return non-nonsense return value for seed=0 case.
+       Check for m2exp being non-zero early; remove all other tests of m2exp.
+       Remove redundant MPN_ZERO call.
+       Let mpn_rshift put result in place to avoid extra MPN_COPY.
+       Remove confusing comment before function `lc' describing BBS algorithm.
+       Misc simplification and cleanups.
+       Nailify.  Needs further work.
+
+       * mpz/set_si.c: Nailify.
+       * mpz/set_ui.c: Nailify.
+       * mpz/mul_i.h: Nailify.
+
+       * tests/mpz/t-mul_i.c: Actually test _ui routines.  Add some more test
+       values.
+
+       * mpn/generic/mul_n.c: Finish nailifying toom3 code.
+
+2002-04-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*: Update to another new mpfr 2.0.1.
+       * configure.in, Makefile.am, mpfr/Makefile.am, mpfr/tests/Makefile.am:
+       Use MPFR_CONFIGS macro, establish separate MPFR_CFLAGS for mpfr build.
+
+       * mpfr/tests/Makefile.am: Correction to convenience rule for libmpfr.a.
+
+2002-04-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/set_q.c: gmp-impl.h before mpfr.h to avoid _PROTO redefine.
+
+       * mpfr/*, configure.in: Update to new mpfr 2.0.1.
+
+       * tests/refmpn.c (refmpn_udiv_qrnnd, refmpn_divmod_1c_workaround):
+       Fixes for nails.
+
+       * tests/t-constants.c (MODLIMB_INVERSE_3): Nailify tests.
+       (MP_BASES_BIG_BASE_INVERTED_10, MP_BASES_NORMALIZATION_STEPS_10): Only
+       check these under USE_PREINV_DIVREM_1.
+       * tests/t-modlinv.c: Nailify tests.
+
+2002-04-11  Gerardo Ballabio <gerardo.ballabio@unimib.it>
+
+       * gmpxx.h: Remove bool combinations, remove mpf_class::get_str2, only
+       need <iosfwd> now.
+
+2002-04-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/diveby3.c: Nailify.
+       * gmp-impl.h (MODLIMB_INVERSE_3): Nailify.
+
+       * mpn/generic/mul_n.c: Nailify Toom3 code.
+
+2002-04-10  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (MPN_KARA_MUL_N_MINSIZE, MPN_KARA_SQR_N_MINSIZE): Set to
+       3, as needed by nails case.
+
+       * mpn/generic/addmul_1.c, mpn/generic/submul_1.c [nails]: Fix vl
+       assert, add rp,n and up,n asserts.
+
+       * mpfr/Makefile.am: Add new mpfr-math.h, install mpf2mpfr.h.
+
+2002-04-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divrem_1.c: Nailify.  Update mp_size_t variables to use
+       `n' suffix instead of `size' suffix.
+       * mpn/generic/divrem_2.c: Likewise.
+       * mpn/generic/sb_divrem_mn.c: Nailify.
+       * mpn/generic/tdiv_qr.c: Nailify.
+       (SHL): Remove silly macro.
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n): Replace open-coded increment by
+       mpn_incr_u call.  Handle nails in ws[n] increment.
+       * mpn/generic/mul_n.c (mpn_kara_sqr_n): Likewise.
+
+       * gmp-h.in (GMP_NUMB_MASK): New #define.
+       (__GMPN_AORS_1): Add version for nails.
+
+       * gmp-impl.h (GMP_NUMB_MASK): Comment out, now in gmp.h.
+       (mpn_incr_u): Don't assume `incr' is non-zero.
+       (mpn_decr_u): Similarly.
+
+2002-04-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/*, configure.in: Update to mpfr 2.0.1.
+
+       * tests/refmpn.c (refmpn_mul_1c, lshift_make): Corrections for nails.
+       * tssts/refmpn.c, tests/tests.h (refmpn_cmp_allowzero): New function.
+
+       * mpn/generic/mul_1.c [nails]: Fix vl assert, add {up,n} assert.
+
+       * mpn/pa32/hppa1_1/pa7100/addmul_1.asm,
+       mpn/pa32/hppa1_1/pa7100/submul_1.asm: Rename "size" define, to avoid
+       ELF .size directive.  Reported by LaMont Jones.
+
+       * tests/mpz/t-set_si.c: Add nails support.
+
+2002-04-05  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h: Replace nail mpn_incr_u, mpn_decr_u with faster versions.
+       (mp_bases[10] values): Check GMP_NUMB_BITS instead of BITS_PER_MP_LIMB.
+       Add GMP_NUMB_BITS == 30 version.
+       (__gmp_doprnt, etc): Remove parameter names.
+
+       * mpn/generic/mul_n.c: Nailify Karatsuba code.
+       * mpn/generic/get_str.c: Nailify.
+       * mpn/generic/sqr_basecase.c: Nailify.
+       * mpn/generic/lshift.c: Nailify.
+       * mpn/generic/rshift.c: Likewise.
+       * mpn/generic/add_n.c: Nailify.  Revamp non-nail code.
+       * mpn/generic/sub_n.c: Likewise.
+       * mpn/generic/mul_1.c: Likewise.
+       * mpn/generic/addmul_1.c: Likewise.
+       * mpn/generic/submul_1.c: Likewise.
+
+2002-04-02  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (BSWAP_LIMB_FETCH, BSWAP_LIMB_STORE) [powerpc]:
+       Corrections to constraints, and restrict to bigendian.
+
+2002-03-31  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/dive.c: Better diagnostics.
+
+       * tests/devel/try.c (mpn_get_str, mpn_umul_ppmm_r): New tests.
+
+       * tests/misc.c, tests/tests.h (byte_diff_lowest, byte_diff_highest):
+       New functions.
+
+       * tests/t-bswap.c: New file.
+       * tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * tests/mpn/t-aors_1.c, tests/mpn/t-iord_u.c: Add nails support.
+
+       * gmp-impl.h (MPN_IORD_U) [x86]: Eliminate unnecessary jiord and iord,
+       rename "n" to incr per generic versions, restrict to nails==0.
+       (mpn_incr_u, mpn_decr_u): Add nails support.
+       (GMP_NAIL_LOWBIT, GMP_NUMB_MAX): New macros.
+
+       * tests/trace.c, tests/tests.h (byte_trace, byte_tracen): New
+       functions.
+       * tests/trace.c: Handle NULL operands.
+
+       * tests/refmpn.c, tests/devel/try.c, tune/speed.c: Add preliminary
+       nail support.
+
+       * tests/refmpn.c, test/tests.h (byte_overlap_p, refmpn_equal_anynail,
+       refmpn_umul_ppmm_r, refmpn_udiv_qrnnd_r, refmpn_get_str,
+       refmpn_bswap_limb, refmpn_random, refmpn_random2, refmpn_bswap_limb):
+       New functions.
+
+       * gmp-impl.h, tests/refmpn.c (ASSERT_LIMB): Renamed from
+       ASSERT_MP_LIMB_T.
+
+       * mpn/x86/*/*.asm, mpn/powerpc32/*/*.asm, mpn/powerpc64/*/*.asm: Put
+       speeds after the copyright notice, so as to keep that clear.
+
+2002-03-29  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (powerpc*-*-aix*): Correction to xlc -qarch selection,
+       for 32-bit mode.
+
+2002-03-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn: Fix spacing in many files.
+
+       * mpn/generic/aorsmul_1.c: Split into addmul_1.c and submul_1.c.
+       * mpn/generic/aors_n.c: Split into add_n.c and sub_n.c.
+
+       * mpn/pa64/add_n.asm: Trim another 0.125 cycle/limb.  Fix a comment.
+       * mpn/pa64/sub_n.asm: Likewise.
+
+       * mpn/pa64/mul_1.asm: Change comclr, comb to proper forms cmpclr, cmpb.
+       * mpn/pa64/addmul_1.asm: Likewise.
+       * mpn/pa64/submul_1.asm: Likewise.
+
+2002-03-28  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Converting Integers): Fix type of exp in mpz_get_d_2exp,
+       reported by epl@unimelb.edu.au.
+       (References): Update Burnikel and Ziegler URL, reported by Keith
+       Briggs.
+
+       * gmp-h.in, mp-h.in, configure.in, acinclude.m4: Remove __GMP from
+       AC_SUBSTs, since autoconf says leading "_" in makefile variables is
+       not portable.
+
+       * demos/expr/run-expr.c: Declare optarg, optind, opterr if necessary.
+       * configure.in, demos/expr/expr-config-h.in: Configs for this.
+
+2002-03-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/Makefile.am (TARG_DIST): Remove pa64w and hppa, add pa32.
+
+       * configure.in (path_20w): Remove pa64w.
+
+       * mpn/pa64/udiv_qrnnd.asm: Tweak for PA8000 performance comparative to
+       that on PA8500.
+
+2002-03-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa32: New name for mpn/hppa.
+       * configure.in: Corresponding changes.
+
+       * mpn/pa64/umul_ppmm.asm: New file, generalized for both 2.0N and 2.0W.
+       * mpn/pa64/umul_ppmm.S: Remove.
+
+       * mpn/pa64/udiv_qrnnd.asm: Generalize for both 2.0N and 2.0W.
+       * mpn/pa64w/udiv_qrnnd.asm: Remove.
+
+2002-03-26  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/tests/tdiv.c, mpfr/tests/tui_div.c: Don't depend on nan and inf
+       handling in "double", for the benefit of alpha.
+
+       * configure (hppa2.0w): Set path to "pa64w pa64".
+
+       * acinclude.m4, configure.in (GMP_C_INLINE): New macro.
+       * acinclude.m4 (GMP_H_EXTERN_INLINE): Use it, and fix "yes" handling.
+
+2002-03-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64w/add_n.s: Remove.
+       * mpn/pa64w/sub_n.s: Remove.
+       * mpn/pa64w/lshift.s: Remove.
+       * mpn/pa64w/rshift.s: Remove.
+       * mpn/pa64w/mul_1.S: Remove.
+       * mpn/pa64w/addmul_1.S: Remove.
+       * mpn/pa64w/submul_1.S: Remove.
+       * mpn/pa64w/sqr_diagonal.asm: Remove.
+
+       * mpn/pa64/mul_1.asm: New file with twice faster code; generalized
+       for both 2.0N and 2.0W.
+       * mpn/pa64/submul_1.asm: Likewise.
+       * mpn/pa64/mul_1.S: Remove.
+       * mpn/pa64/submul_1.S: Remove.
+
+       * mpn/pa64/sqr_diagonal.asm: Generalize for both 2.0N and 2.0W.
+
+       * mpn/pa64/add_n.asm: New file, generalized for both 2.0N and 2.0W.
+       * mpn/pa64/sub_n.asm: Likewise.
+       * mpn/pa64/lshift.asm: Likewise.
+       * mpn/pa64/rshift.asm: Likewise.
+       * mpn/pa64/add_n.s: Remove.
+       * mpn/pa64/sub_n.s: Remove.
+       * mpn/pa64/lshift.s: Remove.
+       * mpn/pa64/rshift.s: Remove.
+
+2002-03-24  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (BSWAP_LIMB_FETCH, BSWAP_LIMB_STORE): New macros.
+       * mpz/inp_raw.c, mpz/out_raw.c: Use them.
+       * acconfig.h (HAVE_HOST_CPU): Add some powerpc types.
+
+       * mpn/powerpc32/750/com_n.asm: New file.
+
+       * mpfr/tests/tout_str.c: Disable random tests, since they fail on
+       alphaev56-unknown-freebsd4.1 and do nothing by default.
+
+       * mpfr/tests/tsqrt.c: Don't depend on nan, inf or -0 in "double", for
+       the benefit of alpha.
+       * mpfr/sqrt.c: Clear nan flag on -0.
+
+       * demos/factorize.c: Use mpn_random() instead of random(), to avoid
+       portability problems.
+
+       * demos/isprime.c (print_usage_and_exit): Declare as "void" to avoid
+       warnings.
+
+       * demos/pexpr.c (setup_error_handler): Corrections to sigstack code.
+
+       * demos/calc/calc.y: Add some `;'s to make bison 1.34 happy.
+
+2002-03-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64/addmul_1.asm: New file with twice faster code; generalized
+       for both 2.0N and 2.0W.
+
+2002-03-22  Kevin Ryde  <kevin@swox.se>
+
+       * tune/time.c: Add SGI hardware counter measuring method, change some
+       abort()s into ASSERT_FAIL()s.
+
+       * configure.in (AC_CHECK_HEADERS): Add fcntl.h and sys/syssgi.h.
+       (AC_CHECK_FUNCS): Add syssgi.
+
+       * configure.in, mpfr/Makefile.am, mpfr/tests/Makefile.am: Use
+       -mieee-with-inexact or -ieee_with_inexact for mpfr on alpha, so
+       denorms work.
+
+       * mpfr/isinteger.c: Fix a memory leak.
+
+2002-03-21  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.c (struct choice_t): Make `r' an mp_limb_t.
+
+2002-03-21  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (HAVE_LIMB_BIG_ENDIAN, HAVE_LIMB_LITTLE_ENDIAN): Use an
+       AH_VERBATIM and better explanation.
+       * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Similarly for the HAVE_DOUBLE
+       constants.
+
+       * gmp.texi (Number Theoretic Functions): Clarify sign of GCD returned
+       by mpz_gcdext.
+
+       * demos/pexpr.c, demos/pexpr-config-h.in, configure.in: Use an
+       autoconf test for stack_t.
+
+       * configure.in, gmp-h.in, mp-h.in, macos/configure, tests/mpz/reuse.c,
+       tests/mpf/reuse.c: Use __GMP_LIBGMP_DLL to enable windows declspec,
+       don't require _WIN32 (etc), remove __GMP_LIBGMP_SHARED and
+       __GMP_LIBGMP_STATIC.
+
+       * gmp-impl.h (mp_bases): Add __GMP_DECLSPEC, for the benefit of
+       tests/t-constants.c.
+
+       * tune/many.pl, tune/speed.h: Remove suffix hack for back.asm.
+
+2002-03-21  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpfr/sin_cos.c (mpfr_sin_cos): New file.
+       * mpfr/mpfr.h, mpfr/mpfr.texi, mpfr/Makefile.am: Add it.
+       * mpfr/tan.c: Fix sign in 2nd and 4th quadrants.
+
+       * mpfr/log10.c: Fix hangs on certain inputs.
+
+2002-03-20  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (setup_error_handler): Declare `s', the first
+       sigaltstack parameter, using `stack_t' just on AIX.
+
+2002-03-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/mul_1.asm: Use free caller-saves registers instead
+       of the callee-saves r30 and r31.
+
+2002-03-19  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (freq_proc_cpuinfo): Recognise powerpc "clock", where
+       previously got the wrong result from "bogomips".
+
+       * mpn/powerpc32/add_n.asm, mpn/powerpc32/sub_n.asm: Rewrite, faster on
+       750, and smaller too.
+       * mpn/powerpc32/*.asm: Use L(), add some measured speeds.
+
+       * longlong.h (count_trailing_zeros) [vax]: Add a version using ffs,
+       but commented out.
+
+2002-03-17  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.c, tune/speed.h, tune/common.c, many.pl: Use optional
+       ".r" to specify operand overlaps for mpn_add_n, mpn_sub_n and logops.
+       Remove mpn_add_n_inplace and mpn_add_n_self.
+       * tune/many.pl: Fix MULFUNC_PROLOGUE parsing.
+
+       * gmp.texi (Known Build Problems): Note `make' problem with long
+       libgmp.la dependencies list.
+
+       * printf/doprnt.c, scanf/doscan.c (%zn): Remove test of non-existent
+       HAVE_SIZE_T, just use size_t unconditionally.
+       * printf/doprnt.c (%zd etc): Fix 'z' type parsing.
+       * tests/misc/t-printf.c, tests/misc/t-scanf.c: More tests.
+
+       * configure.in: Use AC_COPYRIGHT.
+       Add m4_pattern_allow(GMP_MPARAM_H_SUGGEST).
+
+       * tune/Makefile.am (libdummy.la): Remove this, sqr_basecase.c already
+       gets an ansi2knr rule from nodist_tuneup_SOURCES.
+
+       * longlong.h (count_leading_zeros) [pentiumpro gcc<3]: Test
+       HAVE_HOST_CPU_i686 too.
+
+       * mpz/out_raw.c (HTON_LIMB_STORE): Fix a typo in big endian #if.
+
+2002-03-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/com_n.asm, mpn/x86/pentium/logops_n.asm,
+       mpn/x86/k6/mmx/com_n.asm: Add nails support.
+
+       * texinfo.tex: Update to 2002-03-01.06 (per texinfo 4.1).
+       * gmp.texi (@ma): Remove, @math does this now.
+
+       * mpfr/tests/reuse.c: Clear op1 and op2 flags only in their respective
+       outer loops.
+
+       * configure.in (--enable-cxx): Correction to the default stated in the
+       help string.
+       (power*-*-aix*, not powerpc): Use aix.m4, don't run
+       GMP_ASM_POWERPC_R_REGISTERS or use powerpc-defs.m4.
+
+2002-03-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/gmp-mparam.h: New file.
+
+2002-03-13  Kevin Ryde  <kevin@swox.se>
+
+       * demos/expr/exprfr.c: More mpfr functions, corrections to agm, cos,
+       sin, rename log2 constant to loge2 to make room for log2 function.
+       * demos/expr/t-expr.c: More tests.
+
+       * mpz/inp_raw.c (NTOH_LIMB_FETCH) [generic 16bit]: Remove spurious "+".
+
+       * mpfr/acos.c: Avoid a memory leak for certain operands.
+
+       * acinclude.m4, configure.in (GMP_C_DOUBLE_FORMAT): New macro.
+
+       * acinclude.m4 (GMP_HPC_HPPA_2_0, GMP_ASM_UNDERSCORE,
+       GMP_ASM_ALIGN_LOG, GMP_ASM_LSYM_PREFIX, GMP_ASM_W32, GMP_ASM_X86_MMX):
+       Change ac_objext to OBJEXT, which is the documented variable.
+
+       * config.guess (powerpc*-*-*): Use #ifdef on constants POWER_630 etc
+       in the AIX test, since old versions don't have them all.
+
+2002-03-11  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (LIBC211): New AC_DEFINE, for mpfr.
+
+       * configure.in (mips*-*-*): Support ABI=o32 on irix 6, allow gcc 2.7.2
+       to fall back on it, but detect it doesn't work with gcc 2.95.  Use
+       single mips-defs.m4 for both mips32 and mips64.
+       * acinclude.m4 (GMP_GCC_MIPS_O32): New macro.
+       * mpn/mips32/mips-defs.m4: Renamed from mips.m4.
+       * mpn/mips64/mips.m4: Remove (was a copy of mips32/mips.m4).
+
+       * mpn/powerpc32/750: New directory.
+       * configure.in (powerpc740, powerpc750, powerpc7400): Use it.
+       * mpn/powerpc32/750/gmp-mparam.h: New file.
+
+       * config.sub, gmp.texi (ultrasparc1): Remove this, just use plain
+       "ultrasparc".
+
+2002-03-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr: Update to 20020301, except internal_ceil_exp2.c,
+       internal_ceil_log2.c, internal_floor_log2.c renamed to i_ceil_exp2.c,
+       i_ceil_log2.c, i_floor_log2.c to be unique in DOS 8.3.  And sqrtrem.c
+       removed since no longer required.
+       * mpfr/mpfr.texi: Fix some formatting.
+       * mpfr/tests/reuse.c: Patch by Paul to fix test4 variable handling.
+       * mpfr/sinh.c: Patch by Paul to fix err calculation when t==0.
+       * mpfr/tests/tget_d.c: Disable until portability of rnd_mode.c can be
+       sorted out.
+
+       * configure.in (powerpc*-*-*): Separate gcc and xlc cpu flags setups
+       for clarity.
+
+       * longlong.h (count_leading_zeros, count_trailing_zeros) [x86_64]: New
+       macros.
+
+2002-03-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Note all the ultrasparcs accepted.
+       (Language Bindings): Add Math::BigInt::GMP.
+
+       * config.sub (ultrasparc2i): New cpu type.
+       * config.guess (sparc-*-*, sparc64-*-*): Add some exact CPU detection.
+
+2002-03-05  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h (count_leading_zeros, count_trailing_zeros) [alphaev67,
+       alphaev68]: Use ctlz and cttz insns (as per gcc longlong.h).
+       (count_leading_zeros) [sparclite]: Fix parameter order (as per gcc
+       longlong.h).
+       * acconfig.h (HAVE_HOST_CPU_alphaev68): New define.
+
+       * config.guess [i?86-*-*]: Suppress error messages if compiler not
+       found or test program won't run.
+       [rs6000-*-*, powerpc-*-*]: Force code alignment for mfpvr test.
+
+2002-03-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/pow_1.c: New file.
+
+2002-03-03  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Note compiler must be able to fully link,
+       add alphapca57 and alphaev68, give a clearer example of MPN_PATH
+       (Debugging): Add notes on valgrind.
+       (C++ Formatted Output): Clarify mpf showbase handling, in particular
+       note "00.4" in octal.
+
+       * printf/doprntf.c: Do a showbase on octal float fractions, for
+       instance "00.4" where previously it gave "0.4".
+       * tests/cxx/t-ostream.cc: Update.
+
+       * gmp-h.in, mp-h.in (__GMP_DECLSPEC, __GMP_DECLSPEC_XX): Test
+       __WIN32__ for Borland C, reported by "delta trinity".
+
+       * gmp-h.in, mp-h.in: Use <cstddef> for size_t under C++, suggested by
+       Hans Aberg some time ago.
+       * gmp-h.in (<iosfwd>): Move to top of file for clarity.
+
+       * Makefile.am (libgmpxx_la_SOURCES): Use dummy.cc to force C++.
+       (CXX_OBJECTS): Add osfuns$U.lo.
+       * dummy.cc: New file.
+       * cxx/Makefile.am (INCLUDES): Use __GMP_WITHIN_GMPXX.
+       (libcxx_la_SOURCES): Add osfuns.cc.
+       * gmp-h.in (__GMP_DECLSPEC_XX): New define, use it on libgmpxx funs.
+       * gmp-impl.h: Add __GMP_DECLSPEC to libgmp functions used by libgmpxx.
+
+       * longlong.h (COUNT_TRAILING_ZEROS_TIME): Remove, no longer used.
+
+       * gmp-impl.h (MPN_SIZEINBASE, MPN_SIZEINBASE_16): Correction to
+       __totbits for nails.
+
+       * gmp-impl.h (JACOBI_LS0): Test size before limb, to pacify valgrind.
+       (JACOBI_0LS): Ditto, and fix parens around arguments.
+
+       * mpn/x86/x86-defs.m4 (call_mcount): Add a counter to make data labels
+       unique, since simplified L() scheme no longer gives that effect.
+       (notl_or_xorl_GMP_NUMB_MASK): New macro.
+       Add m4_assert_numargs in a few places.
+
+       * configure.in (*sparc*): Fix cycle counter setups for ABI=64.
+
+2002-02-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/vax/gmp-mparam.h: New file.
+
+2002-02-28  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (gmp_errno, gmp_version): Move into extern "C" block,
+       reported by librik@panix.com.
+
+       * gmp-h.in, mp-h.in (__GMP_DECLSPEC_EXPORT, __GMP_DECLSPEC_IMPORT):
+       Use __declspec(dllexport) and __declspec(dllimport) on Borland.
+       * gmp-h.in (_GMP_H_HAVE_FILE): Test __STDIO_H for Borland.
+       Reported by "delta trinity".
+
+       * gmp-impl.h (va_copy): Fall back on memcpy, not "=".
+
+       * mpn/generic/pre_mod_1.c: Add a comment about obsolescence.
+
+       * tune/time.c (MICROSECONDS_P): Don't trust time differences of 1
+       microsecond.
+
+       * tests/cxx/t-ostream.cc: Use "const char *" not just "char *" for
+       test data strings, avoids warnings on Sun CC.
+
+2002-02-27  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: For sparc under solaris2.[7-9], pass -fsimple=1 to
+       disable some crazy -fast optimizations.
+
+2002-02-25  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: For sparc under solaris2.[7-9], pass -fns=no to enable
+       denorm handling under -fast.
+
+2002-02-25  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (alpha*-*-*): Rearrange -mcpu selection for gcc,
+       provide an ev67 -> ev6 fallback.  Fix -arch,-tune selection for DEC C.
+       Allow ~ for space in optional options lists.
+
+       * tune/tuneup.c (tune_preinv_divrem_1): Compare against an assembler
+       mpn_divrem_1 if it exists, not the generic C mpn_divrem_1_div.
+       (tune_preinv_mod_1): Ditto with mpn_mod_1.
+
+       * tune/time.c (DIFF_SECS_ROUTINE): Eliminate the unused "type"
+       parameter, try to make the code a bit clearer.
+
+       * tune/freq.c: Reduce the period measured for cycles versus
+       gettimeofday, add cycles versus microsecond getrusage.
+
+       * mpz/array_init.c: "i" should be mp_size_t, noticed by E. Khong.
+
+2002-02-24  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: For sparc under solaris2.[7-9], pass -fast instead of
+       other optimization options.
+
+2002-02-23  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/asm-defs.m4 (GMP_NUMB_MASK): New macro.
+       (PROLOGUE, EPILOGUE): Relax quoting for the benefit of tune/many.pl
+       when GSYM_PREFIX non-empty.
+
+       * tune/time.c, tune/speed.h (speed_time_init): Include clock tick
+       period in speed_time_string.
+       * tune/time.c, configure.in (clock_gettime): New measuring method.
+
+       * tune/many.pl: Add -DHAVE_NATIVE_mpn_foo to C objects, to avoid
+       conflicts with a macro version in gmp-impl.h, eg. mpn_com_n.
+
+2002-02-22  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Increase RLIMIT_STACK to 4Mibyte.
+
+2002-02-22  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c: Don't confuse gcc with mipspro cc in diagnostic.
+
+2002-02-20  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (mips*-*-irix[6789]*]): Set `extra_functions_n32', not
+       `extra_functions'.
+
+       * printf/doprnt.c: Conditionally include inttypes.h.
+       * printf/repl-vsnprintf.c: Likewise.
+       * scanf/doscan.c: Likewise.
+
+2002-02-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k7/mmx/com_n.asm: New file.
+
+       * mpz/n_pow_ui.c (SWAP_RP_TP): Use ASSERT_CODE on ralloc and talloc,
+       to ensure they needn't live past the initial allocs in a normal build.
+
+       * mpn/generic/mod_34lsub1.c: Note this is for internal use.
+
+2002-02-19  Torbjorn Granlund  <tege@swox.com>
+
+       * Clean up *_THRESHOLD names.  Many files affected.
+
+       * mpn/mips32: Asm-ify 32-bit mips code.
+       Move files from `mips2' to `mips32' directory.
+       * mpn/mips64: Move files from `mips3' to `mips64' directory.
+       * configure.in: Change `mips2' => `mips32' and `mips3' => `mips64'.
+
+2002-02-19  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4, configure.in (GMP_PROG_LEX): New macro.
+
+       * tune/tuneup.c (one): Start next threshold at a max of previous ones,
+       in order to get a good starting point for TOOM3_SQR_THRESHOLD if
+       KARATSUBA_SQR_THRESHOLD is 0 (ie. using mpn_mul_basecase only).
+
+       * configure.in, tune/tuneup.c (GMP_MPARAM_H_SUGGEST): New AC_DEFINE
+       replacing GMP_MPARAM_H_FILENAME.  Suggest a new file in a cpu specific
+       subdirectory rather than mpn/generic.
+
+       * acinclude.m4 (POWERPC64_PATTERN): New macro.
+       * configure.in (powerpc*-*-*): Use it.
+       (powerpc*-*-*): Use umul in 32L and aix64.
+       (mips*-*-*): Use umul, 32 and 64 bit versions.
+
+2002-02-18  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h: Add basic x86-64 support.
+
+2002-02-17  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Support `-X' for upper case hex, make `-x' output
+       lower case hex.
+
+       * mpn/mips2/umul.s: Make it actually work.
+       * mpn/mips3/umul.asm: New file.
+
+       * mpn/mips2/gmp-mparam.h: New file.
+
+2002-02-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Round frac upwards after
+       umul_ppmm calls.
+
+2002-02-16  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (alpha-*-*): Do alpha exact cpu probes on any system,
+       and only if configfsf.guess gives a plain "alpha".
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Detect a gcc 3.0.3 powerpc64
+       linker invocation problem.
+
+2002-02-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): For base 10, develop initial
+       digits using umul_ppmm, then switch to plain multiplication.
+
+       * config.guess: Rewrite Alpha subtype detection code for *bsd systems.
+
+2002-02-15  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Note powerpc exact cpu types.
+       (Debugging): Advertise DEBUG in memory.c.
+
+       * config.sub, config.guess: Add some powerpc exact cpus.
+       * configure.in: Add configs for them.
+
+       * memory.c [__NeXT__]: Remove unused #define of "static".
+       (__gmp_default_allocate, __gmp_default_reallocate): Print size if
+       allocation fails, don't use perror.
+
+       * gmp-h.in: g++ 3 demands __GMP_NOTHROW is before other attributes.
+
+2002-02-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/mul_1.asm: Fix typo preventing build on T3E systems.
+
+2002-02-14  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c (tune_set_str): Increase max_size, for the benefit of
+       alpha.
+
+       * macos/README: Bug reports to bug-gmp@gnu.org, clarify MacOS X a bit.
+
+       * mpn/generic/gcdext.c [WANT_GCDEXT_ONE_STEP]: Add missing TMP_FREE.
+
+       * tune/speed.c, tune/tuneup.c: Allow for speed_cycletime of 0.0 in
+       some diagnostic printouts.
+       * tune/time.c (speed_cycletime): Note can be 0.0.
+
+2002-02-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/mul_1.asm: Add mpn_mul_1c entry.
+
+       * mpn/pa64w/sqr_diagonal.asm: Use L() for labels.
+
+2002-02-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Change declaration of rp to
+       accommodate tuneup compiles.
+
+2002-02-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/default.m4, mpn/alpha/unicos.m4 (PROLOGUE_cpu): Add
+       noalign option.
+       * mpn/alpha/default.m4 (PROLOGUE_cpu): use ALIGN instead of ".align".
+
+       * gmp.texi (Debugging): Notes on Checker.
+       (Other Multiplication): Move note on float FFTs to here.
+       (Assembler Floating Point): New text and revisions by Torbjorn,
+       picture formatting by me.
+       Simplify tex pictures elsewhere a bit, share heights, eliminate some
+       gaps at line joins.
+
+2002-02-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Rewrite to generate fraction
+       limbs and use multiplication for digit development.  Trim allocation of
+       buf.  Get rid of code for !USE_MULTILIMB.
+
+2002-02-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/set_str.c (mpn_set_str): Undo this:
+       Change invocations of mpn_add_1 to instead use mpn_incr_u.
+
+       * tests/mpz/convert.c: Free str only after it is used in error message.
+
+       * mpn/generic/get_str.c (mpn_sb_get_str): Combine tail code for base 10
+       and generic bases.
+
+       * mpn/mp_bases.c: Add entries for base 256.  Remove __ prefix from
+       table name.
+       * gmp-impl.h (__mp_bases): Remove superflous `mp_' part of name, making
+       it __gmpn_bases instead of __gmpn_mp_bases.
+       (mp_bases): New #define.
+       * tune/speed.h (SPEED_ROUTINE_MPN_SET_STR): Allow bases up to 256.
+       (SPEED_ROUTINE_MPN_GET_STR): Likewise.
+
+2002-02-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/set_str.c (mpn_set_str): Use mpn_mul_1c if available.
+       Change invocations of mpn_add_1 to instead use mpn_incr_u.
+
+2002-02-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/array_init.c, mpz/cfdiv_q_2exp.c, mpz/cfdiv_r_2exp.c,
+       mpz/cong_2exp.c, mpz/divis_2exp.c, mpz/hamdist.c, mpz/init2.c,
+       mpz/mul_2exp.c, mpz/realloc2.c, mpz/scan0.c, mpz/scan1.c,
+       mpz/setbit.c, mpz/tdiv_q_2exp.c, mpz/tdiv_r_2exp.c, mpz/tstbit.c,
+       mpz/urandomb.c: Use GMP_NUMB_BITS.
+
+       * mpz/iset_str.c [__CHECKER__]: Store a dummy value to the low limb to
+       stop it appearing uninitialized.
+
+       * gmp-h.in (__GMP_NOTHROW): New macro.
+       (mp_set_memory_functions, mpz_cmp, mpz_cmp_si, mpz_cmp_ui, mpz_cmpabs,
+       mpz_cmpabs_ui, mpz_congruent_2exp_p, mpz_divisible_2exp_p,
+       mpz_fits_sint_p, mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p,
+       mpz_fits_ulong_p, mpz_fits_ushort_p, mpz_get_si, mpz_get_ui,
+       mpz_getlimbn, mpz_hamdist, mpz_popcount, mpz_scan0, mpz_scan1,
+       mpz_size, mpz_sizeinbase, mpz_swap, mpz_tstbit, mpq_equal, mpq_swap,
+       mpf_cmp, mpf_cmp_si, mpf_cmp_ui, mpf_fits_sint_p, mpf_fits_slong_p,
+       mpf_fits_sshort_p, mpf_fits_uint_p, mpf_fits_ulong_p,
+       mpf_fits_ushort_p, mpf_get_default_prec, mpf_get_prec, mpf_get_si,
+       mpf_get_ui, mpf_integer_p, mpf_set_default_prec, mpf_set_prec_raw,
+       mpf_size, mpf_swap, mpn_add_1, mpn_cmp, mpn_hamdist, mpn_popcount,
+       mpn_sub_1): Use it.
+
+       * gmp-impl.h (MPN_SIZEINBASE, MPN_SIZEINBASE_16): New macros from
+       mpn_sizeinbase, and use GMP_NUMB_BITS.
+       * mpz/get_str.c, mpz/sizeinbase.c, mpbsd/mout.c, tune/speed.h: Use
+       MPN_SIZEINBASE.
+       * mpbsd/mtox.c: Use MPN_SIZEINBASE_16.
+
+       * configure.in, mpn/Makefile.am, gmp-impl.h (mpn_sizeinbase): Remove.
+       * mpn/generic/sizeinbase.c: Remove file.
+
+       * gmp-impl.h (MPN_GET_STR_SIZE): Remove.
+       * tests/mpn/t-g_str_size.c: Remove file.
+       * tests/mpn/Makefile.am: Update.
+
+       * Makefile.am (dist-hook): Don't distribute cvs merge ".#" files.
+
+2002-02-08  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Override extra_functions for all sparcv8 systems, not
+       just supersparc.
+
+2002-02-06  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c (tune_mul, tune_sqr): Disable FFTs until tuned.
+       * tune/speed.h (SPEED_ROUTINE_MPN_SET_STR): Fix memory clobber in
+       destination cache priming.
+
+       * printf/doprnt.c: Fix parsing of %s and %p conversions.
+       * tests/misc/t-printf.c (check_misc): Add some tests.
+
+2002-02-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v8/udiv.asm: New file, from v8/supersparc.
+
+       * mpn/generic/set_str.c: Rename indigits_per_limb => chars_per_limb.
+       Remove redundant chars_per_limb.  Reverse 4 loops in basecase code for
+       speed.  Use MP_BASES_CHARS_PER_LIMB_10.
+
+2002-02-03  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_NM): Ensure -B or -p get used when doing a
+       cross compile with the native nm, helps OSF for instance.
+       (GMP_ASM_LSYM_PREFIX): Remove ".byte 0" for the benefit of irix 6,
+       allow "N" from nm for OSF, allow for "t" for other systems, but prefer
+       no mention of the symbol at all.
+
+       * tune/tuneup.c (print_define_remark): New function.
+       Turn some "#if"s into plain "if"s.
+
+       * tune/tuneup.c, gmp-impl.h, tune/Makefile.am
+       (GET_STR_BASECASE_THRESHOLD, GET_STR_PRECOMPUTE_THRESHOLD): Tune these.
+       * mpn/generic/get_str.c [TUNE_PROGRAM_BUILD]: Cope with non-constant
+       GET_STR_PRECOMPUTE_THRESHOLD.
+
+2002-02-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c (mpn_get_str): Fix typo in a declaration.
+
+2002-02-02  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/set_str.c: Use MP_PTR_SWAP and POW2_P, add __GMP_PROTO
+       to convert_blocks prototype, disable SET_STR_BLOCK_SIZE sanity check.
+
+       * tune/set_strb.c, tune/set_strs.c: New files.
+       * tune/speed.h, tune/speed.c, tune/common.c,tune/Makefile.am: Add them.
+       * tune/tuneup.c: Tune SET_STR_THRESHOLD.
+       (DEFAULT_MAX_SIZE): Renamed from MAX_SIZE, allow any param.max_size[].
+
+2002-02-01  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/convert.c: Increase operand size.  Add (yet disabled) code
+       for testing with random strings.
+
+       * mpn/generic/get_str.c (mpn_get_str): Rewrite to become sub-quadratic.
+       (mpn_dc_get_str, mpn_sb_get_str): New functions.
+
+2002-01-31  Kevin Ryde  <kevin@swox.se>
+
+       * gmpxx.h (cmp): Renamed from "compare".
+
+       * configure.in (AC_C_BIGENDIAN): Don't abort when cross compiling.
+       (PROLOGUE): Allow new style optional second parameter when grepping.
+
+       * acinclude.m4 (GMP_HPC_HPPA_2_0, GMP_ASM_UNDERSCORE,
+       GMP_ASM_ALIGN_LOG, GMP_ASM_LSYM_PREFIX, GMP_ASM_W32, GMP_ASM_X86_MMX):
+       Use $ac_objext for object filenames.
+       (GMP_ASM_UNDERSCORE): Use CCAS to assemble.
+
+       * demos/pexpr-config-h.in: New file.
+       * configure.in: Generate demos/pexpr-config.h.
+       (AC_CHECK_FUNCS): Add clock, cputime, setrlimit, sigaction,
+       sigaltstack, sigstack.
+       * acinclude.m4 (GMP_SUBST_CHECK_FUNCS, GMP_SUBST_CHECK_HEADERS): New
+       macros.
+       * demos/pexpr.c: Use pexpr-config.h, not various #ifdefs.
+       (setup_error_handler): Use signal if sigaction not available, allow
+       for SIGBUS missing on mingw.
+       (main): Use time() for random seed if gettimeofday not available.
+       (cleanup_and_exit): Move SIGFPE out of LIMIT_RESOURCE_USAGE.
+
+2002-01-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/set_str.c: Rewrite to become sub-quadratic.
+       (convert_blocks): New function.
+
+2002-01-30  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (GMP_NUMB_MASK, GMP_NAIL_MASK, GMP_NUMB_HIGHBIT,
+       ASSERT_MPN, ASSERT_MP_LIMB_T): New macros.
+
+       * mpn/generic/fib2_ui.c: Use GMP_NUMB_BITS, simplify the data
+       generator program, share __gmp_fib_table initializers between bit
+       sizes, cope with bit sizes other than those specifically setup.
+       * gmp-impl.h (FIB_TABLE_LIMIT, FIB_TABLE_LUCNUM_LIMIT): Corresponding
+       rearrangement of conditionals.
+       * tests/mpz/t-fib_ui.c (check_fib_table): New test.
+
+2002-01-28  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/set_si.c, mpz/iset_si.c: Store to _mp_d[0] unconditionally, use
+       an expression for _mp_size.
+
+       * mpz/init.c, mpz/init2.c, mpz/iset.c, mpq/init.c [__CHECKER__]: Store
+       dummy values to low limbs to stop them appearing uninitialized.
+
+2002-01-26  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/mpfr-test.h (MAX, MIN, ABS): Use instead a patch from Paul and
+       Vincent.
+
+2002-01-24  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Extra quoting to get argument help messages right.
+
+       * gmp.texi (Efficiency): Suggest hex or octal for input and output.
+       (Formatted Output Strings): Mention "*" for width and precision.
+
+       * mpn/generic/sizeinbase.c: New file, adapted from mpz/sizeinbase.c.
+       Use POW2_P, use __mp_bases[base].big_base for log2(base).
+       * configure.in, mpn/Makefile.am: Add it.
+       * gmp-impl.h: Add prototype.
+       * mpz/sizeinbase.c, tune/speed.h, mpn/generic/get_str.c,
+       mpz/get_str.c, mpbsd/mout.c, mpbsd/mtox.c: Use it.
+       * mpz/get_str.c: Write directly to user buffer, skip at most one
+       leading zero, eliminate special case for x==0.
+       * mpbsd/mtox.c: Allocate exact result space at the start, eliminate
+       special case for x==0.
+       * mpbsd/mout.c: Only need to skip one high zero with mpn_sizeinbase.
+
+       * configure.in (--enable-nails): New option.
+       (GMP_NAIL_BITS, GMP_LIMB_BITS, GMP_NUMB_BITS): New defines for gmp.h
+       and config.m4.
+       * gmp-h.in: Add templates.
+
+       * mpfr/mpfr-test.h (MAX, MIN, ABS): Use #ifndef to avoid a redefine
+       error on AIX xlc.
+
+2002-01-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c: Correct type of `out_len'.
+
+2002-01-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/pre_divrem_1.c: Corrections to some ASSERTs.
+
+       * mpfr/mul_ui.c: Don't call mpn_lshift with 0 shift.
+
+       * mpfr/mpz_set_fr.c: Produce correct mpz_t for f==0.
+
+2002-01-21  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (32-bit powerpc add_ssaaaa): Remove spurious commutative
+       declaration.
+       (64-bit powerpc add_ssaaaa): Likewise.
+
+2002-01-20  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_FUNC_VSNPRINTF): Use %n to better detect sparc
+       solaris 2.7 problems.
+
+2002-01-19  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (mpz_eval_expr): Optimize s^rhs for -1 <= s <= 1.
+       (cleanup_and_exit): Improve error message wording.
+
+2002-01-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr/mpfr.h (_PROTO): Use __GMP_PROTO, for compatibility with
+       gmp-impl.h.
+
+2002-01-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpfr/mpfr-test.h: Test "__hpux", not "hpux".  Mask off mrand48
+       return value to 31 bits to work around sloppy mpfr #include practices.
+
+       * mpfr/tests/*.c: Use #include "", not <>, for gmp.h and mpfr.h.
+       Make sure to #include mpfr-test.h from all files that use random().
+
+2002-01-17  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (__GMP_REALLOCATE_FUNC_MAYBE_TYPE): New macro.
+       * gmp-impl.h, mpz/get_str.c, mpz/out_raw.c, mpq/get_str.c,
+       mpq/set_str.c, mpf/get_str.c, printf/asprntffuns.c, printf/doprnt.c,
+       printf/repl-vsnprintf.c, printf/snprntffuns.c, scanf/doscan.c,
+       mpbsd/mtox.c: Some fixes to compile as C++.
+
+       * mpn/generic/jacbase.c (JACOBI_BASE_METHOD): New tuned parameter,
+       replacing COUNT_TRAILING_ZEROS_TIME test.  Add a third method too.
+       * tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add
+       measuring of mpn_jacobi_base methods.
+       * tune/jacbase1.c, tune/jacbase2.c, tune/jacbase3.c: New files.
+       * tune/tuneup.c (JACOBI_BASE_METHOD): Tune this.
+       * mpn/x86/*/gmp-mparam.h (COUNT_TRAILING_ZEROS_TIME): Remove macro.
+
+       * gmp-h.in: Use __gmp prefix on variables in inlines.
+
+       * gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR): Remove __i, unused.
+
+       * mpn/generic/mul_fft.c: Use HAVE_NATIVE_mpn_addsub_n, not ADDSUB.
+       Use CNST_LIMB for some constants.
+
+2002-01-15  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpbsd/Makefile.am: Add a convenience rule for ../libtests.la.
+
+       * printf/Makefile.am: libdummy.la should be in EXTRA_LTLIBRARIES.
+
+       * mpf/out_str.c: Use MPF_SIGNIFICANT_DIGITS, so mpf_out_str and
+       mpf_get_str give the same for ndigits==0.
+
+       * mpfr/exceptions.c (mpfr_set_emin, mpfr_set_emax): Work around a
+       powerpc64 gcc 3.0 -O2 bug.
+
+       * tests/memory.c, tests/tests.h (tests_memory_validate): New function.
+
+2002-01-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/sb_divrem_mn.c, mpn/generic/divrem_1.c,
+       mpn/generic/divrem_2.c, mpn/generic/mod_1.c: Don't use UMUL_TIME and
+       UDIV_TIME, just default to preinv.
+       * gmp-impl.h (USE_PREINV_DIVREM_1, USE_PREINV_MOD_1): Ditto.
+       (DIVEXACT_1_THRESHOLD, MODEXACT_1_ODD_THRESHOLD): Don't use UMUL_TIME
+       and UDIV_TIME, make default thresholds 0.
+       (UDIV_NORM_PREINV_TIME, UDIV_UNNORM_PREINV_TIME): Remove macros.
+       * mpn/x86/*/gmp-mparam.h (UMUL_TIME, UDIV_TIME,
+       UDIV_NORM_PREINV_TIME): Remove macros.
+
+       * gmp.texi (Headers and Libraries): New section, being the header
+       notes from "GMP Basics" and some new stuff.
+       (Parameter Conventions): Notes on "const" parameters.
+       (Formatted Output Strings): Add type N, tweak some wording.
+
+       * tests/refmpn.c (refmpn_divmod_1c): Avoid a bug in i386 gcc 3.0.
+
+2002-01-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/root.c: Add <stdlib.h>, for abort().
+
+       * mpfr/tests/Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.
+       * mpfr/mpfr.h, mpfr/mpfr-tests.h, reuse.c, tadd.c, tadd_ui.c, tagm.c,
+       tatan.c, tcmp2.c, tcos.c, tdiv.c, tdiv_ui.c, teq.c, texp.c,
+       tget_str.c, thyperbolic.c, tlog.c, tmul.c, tout_str.c, tpow.c,
+       trandom.c, tset_z.c, tsin.c, tsqrt.c, tsqrt_ui.c, tsub_ui.c, ttan.c,
+       tui_div.c: Fixes for K&R.
+
+       * tests/misc/t-scanf.c (check_misc, check_misc):
+
+       * tests/mpz/t-inp_str.c, tests/mpq/t-inp_str.c, tests/misc/t-scanf.c:
+       Avoid strings in ASSERT, not enjoyed by K&R.
+       * gmp-impl.h (ASSERT): Note this.
+
+       * tests/tests.h (refmpn_mod_34lsub1): Add __GMP_PROTO.
+
+       * mpbsd/Makefile.am: Avoid an automake problem with ansi2knr and
+       sources in a different directory.
+
+       * printf/repl-vsnprintf.c: Test HAVE_LONG_DOUBLE for long double.
+
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add mod_34lsub1.c,
+       mul_2.c, pre_divrem_1.c.
+
+       * gmp-h.in, gmp-impl.h (mpn_add_nc, mpn_addmul_1c, mpn_addsub_n,
+       mpn_addsub_nc, mpn_divrem_1c, mpn_dump, mpn_mod_1c, mpn_mul_1c,
+       mpn_mul_basecase, mpn_sqr_n, mpn_sqr_basecase, mpn_sub_nc,
+       mpn_submul_1c): Move to gmp-impl.h, since they're undocumented.
+
+       * gmp-impl.h (mpn_reciprocal): Remove, unused.
+
+       * tune/many.pl (cntlz, cnttz): Use new SPEED_ROUTINE_COUNT_ZEROS.
+
+2002-01-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/hppa/*.asm, mpn/pa64/*.asm, mpn/pa64w/*.asm: Use L().
+
+2002-01-08  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/asm-defs.m4 (PROLOGUE, EPILOGUE): New scheme, optional function
+       name to EPILOGUE, check for missing or wrong function name EPILOGUE.
+       * mpn/alpha/unicos.m4, mpn/alpha/default.m4, mpn/m68k/m68k-defs.m4,
+       mpn/mips3/mips.m4, mpn/ia64/default.m4, mpn/powerpc32/aix.m4,
+       mpn/powerpc64/aix.m4, mpn/x86/x86-defs.m4: Consequent updates, add a
+       few more asserts.
+       * mpn/alpha/unicos.m4, mpn/alpha/default.m4, mpn/alpha/cntlz.asm,
+       mpn/alpha/invert_limb.asm (PROLOGUE_GP): Change to an optional "gp"
+       parameter on plain PROLOGUE.
+
+       * gmp.texi (Low-level Functions): mpn_get_str doesn't clobber an extra
+       limb, and doesn't clobber at all for power of 2 bases.
+       (Language Bindings): Add python gmpy.
+
+       * mpz/get_str.c: Determine realloc size arithmetically.
+
+       * mpbsd/mtox.c: Size memory block returned to actual space needed.
+       * gmp.texi (BSD Compatible Functions): Describe this.
+
+       * mpz/get_str.c: Don't copy mpn_get_str input for power of 2 bases.
+       * mpbsd/mtox.c: Ditto, and as a side effect avoid a memory leak from a
+       missing TMP_FREE.
+
+       * mpz/get_str.c, mpbsd/mout.c: No longer need for +1 limb for
+       mpn_get_str clobber.
+
+       * gmp-impl.h (MPN_GET_STR_SIZE): New macro.
+       * mpn/generic/get_str.c, mpz/get_str.c, mpbsd/mout.c, mpbsd/mtox.c,
+       tune/speed.h: Use it.
+       * tests/mpn/t-g_str_size.c: New test.
+       * tests/mpn/Makefile.am: Add it.
+
+       * gmp-impl.h (POW2_P): New macro.
+       * mpn/generic/get_str.c, tests/misc.c: Use it.
+
+       * printf/doprnt.c: Add "N" for mpn, share some code between N, Q and Z.
+       * tests/misc/t-printf.c: Add tests.
+       * gmp-impl.h (ASSERT_CODE): New macro.
+
+       * tests/mpbsd/t-mtox.c: New test.
+       * tests/mpbsd/Makefile.am: Add it.
+       (allfuns_LDADD): Don't link against libgmp when testing everything in
+       libmp can link.
+
+2002-01-07  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR): Rewrite generic versions.
+
+2002-01-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/pre_divrem_1.c: Don't support size==0.
+       * tests/devel/try.c: Update.
+
+       * mpn/generic/get_str.c: Add special case for base==10.
+       * gmp-impl.h (MP_BASES_CHARS_PER_LIMB_10, MP_BASES_BIG_BASE_10,
+       MP_BASES_BIG_BASE_INVERTED_10, MP_BASES_NORMALIZATION_STEPS_10): New
+       constants.
+       * tests/t-constants.c: Add checks.
+       * mpn/mp_bases.c [GENERATE_TABLE]: Print defines for gmp-impl.h, print
+       all standard bits-per-limb by default.
+
+       * demos/pexpr.c, demos/expr/expr.h, demos/expr/expr-impl.h: Use
+       __GMP_PROTO.
+
+       * gmp-h.in (mpn_divexact_by3c): Remove variables from prototype, to
+       keep out of application namespace.
+
+2002-01-04  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h: Move _PROTO declaration to before its first usages.
+
+2002-01-04  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, mp-h.in, tests/tests.h: Rename _PROTO to __GMP_PROTO, and
+       don't use #ifndef just define it ourselves.
+       * gmp-impl.h: Provide _PROTO as an alias for __GMP_PROTO, to avoid big
+       edits internally, for the moment.
+
+2002-01-03  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/speed.c (usage): Insert "\n\" into a string.
+
+2001-12-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64/udiv_qrnnd.c: Remove file.
+       * mpn/pa64w/udiv_qrnnd.c: Remove file.
+
+       * gmp-impl.h (MPN_IORD_U): Change formatting (labels in pos 0, insns
+       indented by tab).
+       (MPN_INCR_U): Use "addl $1,foo; jc", not "incl foo; jz".
+
+       * gmp-impl.h (udiv_qrnnd_preinv): Use plain subtract, not sub_ddmmss,
+       in one more case.
+
+2001-12-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/get_str.c (udiv_qrnd_unnorm): New macro.
+       Use "do while" for dig_per_u loop since it's non-zero.
+       * acconfig.h (HAVE_HOST_CPU_m68k etc): Add templates.
+
+       * mpn/generic/mul_basecase.c, mpz/mul.c, mpz/n_pow_ui.c,
+       mpn/x86/pentium/mul_2.asm, tests/devel/try.c, tests/tests.h,
+       tests/refmpn.c, tune/speed.c, tune/speed.h, tune/common.c,
+       tune/many.pl (mpn_mul_2): New parameter style.
+       * gmp-impl.h (mpn_mul_2): Add prototype.
+       * configure.in (gmp_mpn_functions_optional): Add mul_2.
+
+       * longlong.h (__vxworks__): Remove from powerpc tests, not correct,
+       not on its own at least.
+
+       * tune/speed.c: Add "aas" to specify 0xAA..AA data.
+
+       * tune/tuneup.c (print_define_end): Indicate "never" and "always".
+
+2001-12-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpq/set_d.c: ANSI-fy.
+       * mpz/invert.c: Use PTR and SIZ (cosmetic change).
+
+       * mpz/cong.c: Rename `xor' to `sign' to avoid C++ reserved word.
+
+2001-12-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/sqr_diagonal.asm: New file.
+
+2001-12-28  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/get_str.c: Avoid one mpn_divrem_1 by running main loop
+       only until msize==1.
+
+       * tune/tuneup.c: Break up all() for clarity.
+       (USE_PREINV_DIVREM_1, USE_PREINV_MOD_1): Compare against plain
+       division udiv_qrnnd, not the tuned and possibly preinv version.
+
+       * tune/freq.c: Split sysctl and sysctlbyname probes into separate
+       functions, shorten some identifiers, put descriptions inside
+       functions, define functions unconditionally and do nothing if
+       requisites not available.
+
+       * mpz/inp_raw.c: Avoid a gcc 3.0 powerpc64 bug on AIX.
+
+       * acinclude.m4, configure.in (GMP_C_RESTRICT): New macro.
+
+       * mpfr/sin.c: Patch from Paul to fix sign of sin(3pi/2).
+
+       * demos/calc/calc.y: Improve some error messages.
+
+2001-12-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/mul_1.asm: Rename r72 -> r80.
+       * mpn/sparc64/addmul_1.asm: Likewise.
+
+2001-12-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/tdiv_qr.c: Misc formatting cleanups.
+       For switch case 2, replace `dn' with its value (2).
+
+2001-12-25  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/devel/mul_1.c: Add FIXED_XLIMB.
+       * tests/devel/addmul_1.c: Likewise.
+       * tests/devel/submul_1.c: Likewise.
+
+       * tests/devel/add_n.c: Improve error message.
+       Accept command line argument for # of tests.
+       * tests/devel/sub_n.c: Likewise.
+
+       * tests/devel/: Remove CLOCK settings.
+
+       * mpn/sparc32/v9/mul_1.asm: Rewrite.
+       * mpn/sparc32/v9/addmul_1.asm: Rewrite.
+       * mpn/sparc32/v9/submul_1.asm: Rewrite.
+
+2001-12-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/mul_1.asm: Get rid of global constant 0.0 (L(noll)).
+       * mpn/sparc64/addmul_1.asm: Likewise.
+
+2001-12-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c: Move final ASSERT to just before zero fill
+       loop.
+
+2001-12-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/get_str.c: Move ASSERTs out of loops.  Split digit
+       generation code into two loops, saving a test of msize in the loop.
+
+2001-12-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/x86-defs.m4, mpn/x86/*/*.asm: Remove L / LF scheme putting
+       function name in local labels.
+
+       * mpn/generic/get_str.c: Use mpn_preinv_divrem_1, add a couple of
+       ASSERTs.
+
+       * mpn/generic/pre_divrem_1.c: New file.
+       * configure.in (gmp_mpn_functions): Add it.
+       * gmp-impl.h (mpn_preinv_divrem_1): Add prototype.
+       (USE_PREINV_DIVREM_1, MPN_DIVREM_OR_PREINV_DIVREM_1): New macros.
+       * tests/devel/try.c, tune/speed.c, tune/speed.h, tune/common.c,
+       tune/many.pl, tune/Makefile.am (mpn_preinv_divrem_1): Add testing and
+       measuring.
+       * tune/tuneup.c: Determine USE_PREINV_DIVREM_1.
+       * tune/pre_divrem_1.c: New file.
+       * tests/refmpn.c, tests/tests.h (refmpn_preinv_divrem_1): New function.
+
+       * tests/mpz/t-io_raw.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/inp_raw.c, mpz/out_raw.c: Rewrite.
+       * acinclude.m4, configure.in (AC_C_BIGENDIAN): New test.
+       * gmp-impl.h (BSWAP_LIMB): New macro.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): For a native compile, demand
+       executables will run, per AC_PROG_CC.  This detects ABI=64 is unusable
+       in a native sparc solaris 7 build with the kernel in 32-bit mode.
+       * gmp.texi (ABI and ISA): Add notes on this, add an example configure
+       setting an ABI.
+
+       * tune/tuneup.c, configure.in: Print the gmp-mparam.h filename.
+       * tune/tuneup.c: Print the CPU frequency.
+
+       * tune/time.c, tune/speed.h: Add s390 "stck" method, flatten
+       conditionals in speed_time_init a bit, use have_* variables to let
+       some code go dead in speed_starttime and speed_endtime.
+
+       * tune/freq.c (speed_cpu_frequency_irix_hinv): New function.
+
+       * Makefile.am, configure.in: Restore mpfr.
+
+       * configure.in: Add --with-readline, AC_PROG_YACC and AM_PROG_LEX.
+       * demos/calc/calc.y, demos/calc/calclex.l: Add readline support, add
+       lucnum function.
+       * demos/calc/Makefile.am: Add calcread.c, calc-common.h, use $(YACC),
+       $(LEX) and $(LEXLIB).
+       * demos/calc/calcread.c, demos/calc/calc-common.h,
+       demos/calc/calc-config-h.in, demos/calc/README: New files.
+
+       * configure.in: Put demos/expr configs in expr-config.h.
+       * demos/expr/expr-config-h.in: New file.
+       * demos/expr/expr-impl.h: Renamed from expr-impl-h.in, get configs
+       from expr-config.h.
+       * demos/expr/Makefile.am: Update.
+
+       * demos/expr/exprfr.c: Use mpfr_sin and mpfr_cos, remove some spurious
+       returns.
+
+2001-12-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/mul_1.asm: Trim an instruction.
+       * mpn/sparc64/addmul_1.asm: Likewise.
+
+       * mpn/ia64/add_n.asm: Rewrite.
+       * mpn/ia64/sub_n.asm: Rewrite.
+
+2001-12-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/mul_1.asm: Rewrite.
+       * mpn/ia64/addmul_1.asm: Rewrite.
+       * mpn/ia64/submul_1.c: Use TMP_ALLOC_LIMBS.
+
+       * tests/devel/mul_1.c: Improve error message.
+       Accept command line argument for # of tests.
+       * tests/devel/addmul_1.c: Likewise.
+       * tests/devel/submul_1.c: Likewise.
+
+2001-12-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3/mul_1.asm: Add NOPs to save a cycle on R1x000.
+
+2001-12-18  Kevin Ryde  <kevin@swox.se>
+
+       * gmpxx.h (gmp_randclass): Don't allow copy constructors or "=",
+       implementation by Gerardo.
+
+       * gmp-h.in (operator<<, operator>>): Remove parameter names from
+       prototypes, to keep out of user namespace.
+
+       * acinclude.m4 (GMP_FUNC_VSNPRINTF): Let the test program work as C++.
+
+2001-12-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/mul_1.asm: Rewrite.
+       * mpn/sparc64/addmul_1.asm: Rewrite.
+       * mpn/sparc64/submul_1.asm: Rewrite.
+
+       * mpn/sparc64/addmul1h.asm: Remove.
+       * mpn/sparc64/submul1h.asm: Remove.
+       * mpn/sparc64/mul1h.asm: Remove.
+
+2001-12-15  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (mpn_add, mpn_add_1, mpn_cmp, mpn_sub, mpn_sub_1): Follow
+       __GMP_INLINE_PROTOTYPES for whether to give prototype with inline.
+
+       * configure.in (i686*-*-*, pentiumpro-*-*, pentium[23]-*-*,
+       athlon-*-*, pentium4-*-*): Fall back on -march=pentium if
+       -march=pentiumpro or higher is not good (eg. solaris cmov).
+
+2001-12-12  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MPN_ZERO): Rewrite generic version to be similar to
+       powerpc version.
+
+2001-12-12  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Detect cmov problems with gcc
+       -march=pentiumpro on solaris 2.8.
+
+       * tune/common.c, tune/speed.h: Allow for commas in count_leading_zeros
+       and count_trailing_zeros macros.
+
+       * demos/expr/Makefile.am: Distribute exprfr.c and exprfra.c.
+
+       * tune/Makefile.am (speed_ext_SOURCES): Should be speed-ext.c.
+
+2001-12-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/s390/addmul_1.asm: New file.
+       * mpn/s390/submul_1.asm: New file.
+       * mpn/s390/mul_1.asm: New file.
+       * mpn/s390/gmp-mparam.h: Update.
+
+2001-12-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, mp-h.in, gmp-impl.h: __GMP_DECLSPEC at start of
+       prototypes, for the benefit of Microsoft C.
+
+       * gmp.texi (Introduction to GMP): Mention ABI and ISA section.
+       (Known Build Problems): Recommend GNU sed on solaris 2.6.
+       (Assigning Integers): Direct feedback to bug-gmp.
+       (References): Typo Knuth vol 2 is from 1998.
+
+       * gmpxx.h (gmp_randclass): Add initializers for gmp_randinit_default
+       and gmp_randinit_lc_2exp_size.
+       gmp.texi (C++ Interface Random Numbers): Describe them.
+
+       * tests/misc/t-locale.c, tests/cxx/t-locale.cc: Ensure mpf_clear is
+       done when the localconv override doesn't work.  Reported by Mike
+       Jetzer.
+
+       * printf/doprnti.c: Don't showbase on a zero mpq denominator.
+       * tests/misc/t-printf.c, tests/cxx/t-ostream.c: Add test cases.
+
+2001-12-04  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Known Build Problems): Update to gmp_randinit_lc_2exp_size
+       for the sparc solaris 2.7 problem.
+       (Reentrancy): SCO ctype.h affects all text-based input functions.
+       (Formatted Output Strings): Correction to the mpf example.
+       (Single Limb Division): Correction, should be q-1 not q+1.
+       (Extended GCD): Clarify why single-limb is inferior.
+       (Raw Output Internals): Clarify size is twos complement, note limb
+       order means _mp_d doesn't get directly read or written.
+       (Contributors): Clarify mpz_jacobi.
+       And a couple of formatting tweaks elsewhere.
+
+       * tests/cxx/t-headers.cc: New file.
+       * tests/cxx/Makefile.am: Add it.
+
+       * gmpxx.h: Add <strstream>, needed by mpf_class::get_str2.
+
+       * gmp-h.in (mpq_inp_str, mpn_hamdist): Add __GMP_DECLSPEC.
+
+2001-12-01  Torbjorn Granlund  <tege@swox.com>
+
+       * Version 4.0 released.
+
+       * mpfr/README: Replace contents with explanation of why mpfr is gone.
+
+2001-12-01  Kevin Ryde  <kevin@swox.se>
+
+       * Makefile.am, configure.in: Temporarily remove mpfr, just leave a
+       README.
+
+       * mpn/Makefile.am (EXTRA_DIST): Add Makeasm.am.
+
+2001-11-30  Gerardo Ballabio  <ballabio@sissa.it>
+
+       * tests/cxx/t-constr.cc, tests/cxx/t-expr.cc: New files.
+       * tests/cxx/Makefile.am (check_PROGRAMS): Add them.
+
+2001-11-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpfr: Update to 2001-11-16.  Patch TMP handling of agm.c and sqrt.c,
+       use plain mpn_sqrtrem in sqrt.c, separate .c files for floor and ceil,
+       disable an expression style assert in add1.c.
+
+       * mpn/s370: Rename to s390.
+       * configure.in (s3[6-9]0*-*-*): Update.
+       * mpn/Makefile.am (TARG_DIST): Add s390.
+
+       * mpz/fits_s.c, mpf/fits_s.c, mpf/fits_u.c: Remove files, unused since
+       change to .h style.
+
+2001-11-29  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-h.in: Declare mpz_get_d_2exp and mpf_get_d_2exp.
+       * Makefile.am: Add mpz/get_d_2exp$U.lo and mpf/get_d_2exp$U.lo.
+       * mpf/Makefile.am: Add get_d_2exp.c.
+       * mpz/Makefile.am: Add get_d_2exp.c.
+
+2001-11-29  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/*/gmp-mparam.h: Update measured thresholds.
+       * mpn/s370/gmp-mparam.h: New file.
+
+       * mpz/millerrabin.c: Mark for internal use only, for now.
+       * gmp.texi (Number Theoretic Functions): Remove documentation.
+
+2001-11-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/get_d_2exp.c: New file.
+       * mpz/get_d_2exp.c: New file.
+
+       * mpz/realloc2.c: Fix typo.  Make more similar to mpz_realloc.
+       * mpz/realloc.c: Use __GMP_REALLOCATE_FUNC_LIMBS.
+
+2001-11-27  Gerardo Ballabio  <ballabio@sissa.it>
+
+       * gmpxx.h, mpfrxx.h: Various updates and improvements.
+
+2001-11-27  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Useful Macros and Constants): Add gmp_version, add @findex
+       for mp_bits_per_limb.
+
+       * demos/perl/GMP.pm, demos/perl/GMP.xs: Use new style gmp_randinit's.
+       * demos/perl/test.pl: Update for this, and for mpz_perfect_power_p
+       handling of 0 and 1.
+
+2001-11-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/realloc.c: Clear variable when decreasing allocation to less than
+       needed.  Misc updates.
+
+2001-11-25  Kevin Ryde  <kevin@swox.se>
+
+       * tests/misc/t-locale.c: Avoid printf in the normal case, since the
+       replacement localeconv breaks it on SunOS 4.
+
+       * gmp.texi (Build Options, Notes for Package Builds): Note libgmpxx
+       depends on libgmp from same GMP version.
+
+       * acinclude.m4, configure.in (GMP_FUNC_SSCANF_WRITABLE_INPUT): New
+       test.
+       * scanf/sscanf.c, scanf/vsscanf.c: Use it to ensure sscanf input is
+       writable, if necessary.
+
+       * tests/misc/t-scanf.c: Ensure sscanf arguments are writable, always.
+       * configure.in (AC_CHECK_DECLS): Remove sscanf, no longer required.
+
+       * configure.in (none-*-*): Fix default CFLAGS setups.
+
+       * doc/configuration: Misc updates.
+
+2001-11-23  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/init2.c, mpz/realloc2.c: New files.
+       * Makefile.am, mpz/Makefile.am: Add them.
+       * gmp-h.in: Add prototypes.
+       * gmp.texi (Efficiency): Mention these instead of _mpz_realloc.
+       (Initializing Integers): Add documentation, reword other parts.
+
+2001-11-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/addmul_1.c: Fix logic for more_carries scalar loop.
+       * mpn/cray/ieee/submul_1.c: Likewise.
+
+2001-11-20  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Known Build Problems): Note an out of memory on DJGPP.
+       (Function Classes): Update function counts.
+       Misc tweaks elsewhere.
+
+       * configure.in (AC_CHECK_DECLS): Add sscanf.
+       * tests/misc/t-scanf.c: Use it, for the benefit of SunOS 4.
+
+       * tal-debug.c, gmp-impl.h: More checks of TMP_DECL/TMP_MARK/TMP_FREE
+       consistency.
+
+       * mpfr/Makefile.am (AR): Explicit AR=@AR@ to override automake
+       default, necessary for powerpc64 ABI=aix64.
+
+2001-11-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm.c: Move TMP_MARK to before any TMP_ALLOCs.
+
+2001-11-18  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (--enable-fft): Make this the default.
+       * gmp.texi (Build Options): Update.
+
+       * Makefile.am (libmp_la_DEPENDENCIES): Revise mpz objects needed by
+       new mpz/powm.c.
+
+       * gmp.texi (Random State Initialization): Add gmp_randinit_default and
+       gmp_randinit_lc_2exp_size, mark gmp_randinit as obsolete.
+       (Random State Seeding): New section, taken from "Random State
+       Initialization" and "Random Number Functions".
+
+       * configure.in (AC_CHECK_DECLS): Add fgetc, fscanf, ungetc.
+       * scanf/fscanffuns.c: Use these, for the benefit of SunOS 4.
+
+       * gmp-impl.h, gmp-h.in (__gmp_default_fp_limb_precision): Move back to
+       gmp-impl.h now not required for inlined mpf.
+
+       * randlc2s.c (gmp_randinit_lc_2exp_size): New file, the size-based LC
+       selection from rand.c.
+       * rand.c (gmp_randinit): Use it.
+       * randdef.c (gmp_randinit_default): New file.
+       * gmp-impl.h (RANDS): Use it.
+       (ASSERT_CARRY): New macro.
+       * gmp-h.in (gmp_randinit_default, gmp_randinit_lc_2exp_size: Add
+       prototypes.
+       * Makefile.am (libgmp_la_SOURCES): Add randdef.c and randlc2s.c.
+
+       * printf/asprntffuns.c: Include config.h before using its defines.
+
+       * gmp-impl.h: Move C++ <string> to top of file to avoid the memset
+       redefine upsetting configure tests.  Remove <iostream> since <iosfwd>
+       in gmp.h suffices.
+
+2001-11-16  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Integer Exponentiation): mpz_powm supports negative
+       exponents.
+       (Assigning Floats, I/O of Floats, C++ Formatted Output, C++ Formatted
+       Input): Decimal point follows locale.
+       (Formatted Output Strings): %n accepts any type.
+       (Formatted Input Strings): New section.
+       (Formatted Input Functions): New section.
+       (C++ Class Interface): Corrections and clarifications suggested by
+       Gerardo.
+
+       * scanf/doscan.c, scanf/fscanf.c, scanf/fscanffuns.c, scanf/scanf.c,
+       scanf/sscanf.c, scanf/sscanffuns.c, scanf/vfscanf.c, scanf/vscanf.c,
+       scanf/vsscanf.c, scanf/Makefile.am, tests/misc/t-scanf.c: New files.
+       * gmp-h.in, gmp-impl.h, Makefile.am, configure.in: Consequent
+       additions.
+
+       * tests/misc: New directory.
+       * tests/misc/Makefile.am: New file.
+       * tests/misc/t-locale.c: New file.
+       * tests/misc/t-printf.c: Moved from tests/printf.
+       * tests/printf: Remove directory.
+       * configure.in, tests/Makefile.am: Update.
+
+       * tests/cxx/t-locale.cc: New file.
+       * tests/cxx/Makefile.am: Add it.
+
+       * mpf/set_str.c, cxx/ismpf.cc: Use localeconv for the decimal point.
+
+       * acinclude.m4 (GMP_ASM_X86_MCOUNT): Update to $lt_prog_compiler_pic
+       for current libtool, recognise non-PIC style mcount in windows DLLs.
+
+       * gmp-impl.h (__gmp_replacement_vsnprintf): Add prototype.
+
+       * gmp-impl.h (__gmp_rands, __gmp_rands_initialized,
+       modlimb_invert_table): Add __GMP_DECLSPEC for the benefit of test
+       programs using them from a windows DLL.
+       * longlong.h (__clz_tab): Ditto.
+
+       * mpn/x86/t-zdisp2.pl: New file.
+
+       * mpn/x86/pentium4/README: New file.
+
+2001-11-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm.c (HANDLE_NEGATIVE_EXPONENT): #define to 1.
+       * tests/mpz/reuse.c (main): Use mpz_invert to avoid undefined mpz_powm
+       cases.
+
+2001-11-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm_ui.c: Rewrite along the lines of mpz/powm.c (except still no
+       redc).
+       * mpz/powm.c: Adjust for negative b, after exponentiation done.  Add
+       (still disabled) code for handling negative exponents.  Misc cleanups.
+
+2001-11-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/out_str.c: Use localeconv for the decimal point.
+
+       * tests/misc.c (tests_rand_end): Use time() if gettimeofday() not
+       available (eg. on mingw).
+
+2001-11-11  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in: Remove parameter names from prototypes, to keep out of
+       application namespace.
+
+2001-11-08  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_GCC_VERSION_GE): Fix sed regexps to work on
+       Solaris 8.
+
+       * printf/doprnt.c: Support %n of all types, per glibc.
+
+       * gmp-h.in, gmp-impl.h, mpf/abs.c, mpf/neg.c, mpf/get_prc.c,
+       mpf/get_dfl_prec.c, mpf/set_dfl_prec.c, mpf/set_prc_raw.c,
+       mpf/set_si.c, mpf/set_ui.c, mpf/size.c: Revert mpf inlining, in order
+       to leave open the possibility of keeping binary compatibility if mpf
+       becomes mpfr.
+
+       * mpn/x86/k7/mmx/lshift.asm, mpn/x86/k7/mmx/rshift.asm: Use Zdisp to
+       force code size for computed jumps.
+       * mpn/x86/k6/mod_34lsub1.asm, mpn/x86/k6/k62mmx/copyd.asm: Use Zdisp
+       to force good code alignment.
+       * mpn/x86/x86-defs.m4 (Zdisp): More instructions.
+
+       * mpn/x86/pentium/sqr_basecase.asm, mpn/x86/k7/mmx/mod_1.asm,
+       mpn/x86/k7/mmx/popham.asm: Remove some unnecessary "0" address offsets.
+
+       * mpq/set_si.c, mpq/set_ui.c: Set _mp_den._mp_size correctly if den==0.
+
+2001-11-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/hppa/hppa1_1/udiv_qrnnd.asm: Work around gas bug.
+
+       * mpn/asm-defs.m4 (PROLOGUE): Change alignment to 8 (probably a good
+       idea in general; required for hppa/hppa1_1/udiv_qrnnd.asm).
+
+2001-11-06  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (MPN_COPY_INCR): Prepend local variable by `__'.
+       (MPN_COPY_DECR): Likewise.
+
+2001-11-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm.c: Call mpn functions, not mpz functions, for computation
+       mod m.  Streamline allocations to use a mixture of stack allocation and
+       heap allocation.  Add currently disabled phi(m) exponent reduction
+       code.  Misc optimizations and cleanups.
+
+2001-11-05  Kevin Ryde  <kevin@swox.se>
+
+       * mpq/inp_str.c: Remove unused variable "ret".
+
+       * mpn/x86/k7/sqr_basecase.asm: Fix a 0(%edi) to use Zdisp, so the
+       computed jumps hit the right spot on old gas.
+
+       * mpq/canonicalize.c: DIVIDE_BY_ZERO if denominator is zero.
+
+       * mpn/lisp/gmpasm-mode.el (comment-start-skip): Correction to the way
+       the first \( \) pair is setup.
+       (gmpasm-font-lock-keywords): Don't fontify the space before a "#" etc.
+       Misc tweaks to some comments.
+
+2001-11-03  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/refmpn.c (refmpn_overlap_p): Reverse return values.
+
+2001-11-02  Kevin Ryde  <kevin@swox.se>
+
+       * tune/many.pl: Setup CFLAGS_PIC and ASMFLAGS_PIC, since that's no
+       longer done by configure.
+
+       * mpn/x86/pentium4/mmx/popham.asm: New file.
+
+       * mpn/x86/x86-defs.m4 (psadbw): New macro.
+       * mpn/x86/k7/mmx/popham.asm: Use it.
+
+       * tests/refmpn.c (refmpn_overlap_p): New function, independent of
+       MPN_OVERLAP_P.
+
+2001-10-31  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-powm.c: Print proper error message when finding
+       discrepancy.
+
+2001-10-31  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/mod_34lsub1.asm: New file.
+       * mpn/x86/k7/mod_34lsub1.asm: New file.
+       * mpn/x86/mod_34lsub1.asm: New file.
+
+2001-10-30  Kevin Ryde  <kevin@swox.se>
+
+       * tests/printf/t-printf.c (check_misc): Add checks from the glibc docs.
+       (check_vasprintf, check_vsnprintf): Run these unconditionally.
+
+       * gmp-impl.h (ASSERT_MPQ_CANONICAL): New macro.
+       * mpq/cmp.c, mpq/cmp_si.c, mpq/cmp_ui.c, mpq/equal.c: Add ASSERTs for
+       canonical inputs, where correctness depends on it.
+
+       * mpn/lisp/gmpasm-mode.el (comment-start-skip): Add "dnl".
+
+2001-10-27  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Remove some unused variables.
+       (main): Allocate more buffer space to accommodate minus sign.
+
+2001-10-27  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h, mpn/asm-defs.m4, configure.in, tune/speed.h,
+       tune/speed.c, tune/common.c, tune/many.pl, tests/devel/try.c: Add
+       mpn_mod_34lsub1.
+       * tests/refmpn.c, tests/tests.h (refmpn_mod_34lsub1): New function.
+
+       * mpn/generic/mod_34lsub1.c: New file.
+       * mpn/x86/k6/mod_34lsub1.asm: New file.
+       * mpn/x86/pentium4/sse2/mod_34lsub1.asm: New file.
+       * mpn/x86/x86-defs.m4 (Zdisp): Add another instruction.
+
+       * gmp-h.in, gmpxx.h: Use <iosfwd> not whole <iostream>.
+
+       * gmp.texi (Known Build Problems): Add note on test programs with
+       Windows DLLs.
+
+2001-10-26  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpq/t-get_d.c: Limit the size of "eps" for vax.
+
+       * gmp.texi (maybepagebreak): New macro, use it in a few places.
+       (Notes for Particular Systems): C++ Windows DLLs are not supported.
+       (Known Build Problems): Note sparc solaris 2.7 gcc 2.95.2 shared
+       library problems.
+       (Autoconf): Tweak version numbers shown.
+       (Integer Roots): mpz_perfect_square_p and mpz_perfect_power_p consider
+       0 and 1 perfect powers, mpz_perfect_power_p accepts negatives.
+       (Number Theoretic Functions): Add mpz_millerrabin, combined with a
+       reworded mpz_probab_prime_p.
+       (Formatted Output Strings): Misc clarifications.
+       (Formatted Output Functions): gmp_asprintf, gmp_vasprintf,
+       gmp_snprintf, gmp_vsnprintf always available.
+       (C++ Formatted Output): Misc rewordings.
+       (Formatted Input): New chapter.
+       (C++ Class Interface): New chapter, by Gerardo and me.
+       (Language Bindings): Update GMP++ now in GMP.
+       (C++ Interface Internals): New section, by Gerardo and me.
+
+       * printf/repl-vsnprintf.c: New file.
+       * configure.in, acinclude.m4, Makefile.am, printf/Makefile.am: Use it
+       if libc vsnprintf missing or bad.
+       * configure.in (AC_CHECK_FUNCS): Add strnlen.
+
+       * printf/snprntffuns.c, printf/vasprintf.c: Use
+       __gmp_replacement_vsnprintf if libc vsnprintf not available.
+       * printf/asprintf.c, printf/snprintf.c, printf/vasprintf.c,
+       printf/vsnprintf.c: Provide these functions unconditionally.
+       * acinclude.m4 (GMP_FUNC_VSNPRINTF): Remove warning about omissions
+       when vsnprintf not available.
+
+2001-10-24  Kevin Ryde  <kevin@swox.se>
+
+       * configure, aclocal.m4: Regenerate with a libtool patch for a stray
+       quote in AC_LIBTOOL_PROG_LD_SHLIBS under mingw and cygwin.
+
+       * gmp-impl.h (modlimb_invert): More comments.
+
+       * printf/doprnt.c, printf/doprnti.c: Use the precision field to print
+       leading zeros.
+       * tests/printf/t-printf.c: Test this.
+       * cxx/osdoprnti.cc, gmp-impl.h: Ignore precision in operator<<.
+
+       * tune/speed.c, tune/speed.h, tune/common.c: Add mpn_mul_1_inplace.
+
+2001-10-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/pprime_p.c (mpz_millerrabin): Remove function and its descendant.
+
+       * mpz/millerrabin.c: New file with code from pprime.c.
+       * mpz/Makefile.am: Compile millerrabin.c.
+       * Makefile.am (MPZ_OBJECTS): Ditto.
+       * gmp-h.in: Declare mpz_millerrabin.
+
+2001-10-22  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/mpz/t-perfsqr.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+
+       * demos/factorize.c (factor): Check for number to factor == 0.
+       (main): When invoked without arguments, read from stdin.
+
+       * mpz/perfpow.c: Add code to handle negative perfect powers ((-b)^odd).
+       Treat 0 and 1 as perfect powers.
+
+       * mpn/sparc32/v9/sqr_diagonal.asm: Jump past .align.
+
+2001-10-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/perfsqr.c (sq_res_0x100): Remove bogus final `,'.
+       (mpn_perfect_square_p): Suppress superfluous `&1' in sq_res_0x100 test.
+       (mpn_perfect_square_p, O(n) test): Improve comments.  Combine remainder
+       tests for some small primes.  Don't share code for different limb
+       sizes.  Use single `if' with many `||' for better code density.
+
+2001-10-22  Kevin Ryde  <kevin@swox.se>
+
+       * demos/perl/GMP.xs (mutate_mpz, tmp_mpf_grow): Make these "static".
+
+       * mpn/x86/pentium/popcount.asm, mpn/x86/pentium/hamdist.asm
+       (mpn_popcount_table): Use GSYM_PREFIX.
+
+2001-10-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/*.asm: Add some measured speeds on various x86s.
+
+       * tests/mpz/reuse.c, tests/mpf/reuse.c: Disable tests when using a
+       windows DLL, because certain global variable usages won't compile.
+
+       * configure.in (AC_CHECK_FUNCS): Add alarm.
+       * tests/spinner.c: Conditionalize alarm and SIGALRM availability, for
+       the benefit of mingw32.
+
+       * acinclude.m4 (GMP_ASM_TYPE, GMP_ASM_SIZE): Suppress .type and .size
+       on COFF.
+
+       * acinclude.m4 (GMP_PROG_HOST_CC): New macro.
+       * configure.in: Use it for windows DLL cross-compiles.
+       * aclocal.m4, configure: Regenerate with libtool patch to hold HOST_CC
+       in the generated libtool script.
+
+       * aclocal.m4, configure: Regenerate with libtool patch to suppress
+       warnings when probing command line limit on FreeBSD.
+
+       * demos/qcn.c (M_PI): Define if not already provided, helps mingw32.
+
+2001-10-17  Kevin Ryde  <kevin@swox.se>
+
+       * printf/doprnt.c: Use <stdint.h> for intmax_t.
+
+       * longlong.h: Recognise __sparcv8 for gcc on Solaris.  Reported by
+       Mark Mentovai <mark@mentovai.com>.
+
+       * gmp-impl.h (gmp_allocated_string): No need for inline on member funs.
+
+2001-10-16  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Debugging): Add mpatrol.
+       (Integer Comparisons, Comparing Rationals, Float Comparison): Index
+       entries for sign tests.
+       (I/O of Floats): Clarify mpf_out_str exponent is in decimal.
+       (C++ Formatted Output): mpf_t operator<< exponent now in decimal.
+       (FFT Multiplication): Use an ascii art sigma.
+       (Contributors): Add Gerardo Ballabio.
+
+       * cxx/osfuns.cc (__gmp_doprnt_params_from_ios): Always give mpf_t
+       exponent in decimal, irrespective of ios::hex or ios::oct.
+       * tests/cxx/t-ostream.cc (check_mpf): Update.
+
+       * printf/doprnt.c: Support %lln and %hhn.
+
+       * mpn/x86/pentium4/sse2/submul_1.asm: Use a psubq to negate the
+       initial carry (helps the submul_1c case), and improve the comments.
+
+2001-10-11  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4, configure.in (GMP_IMPL_H_IEEE_FLOATS): New macro.
+
+       * ltmain.sh: Send some rm errors to /dev/null, helps during compiles
+       on Solaris 2.7 and HP-UX 10.
+
+       * tal-notreent.c: Renamed from stack-alloc.c.
+       * Makefile.am, acinclude.m4, gmp-impl.h: Update.
+
+       * gmp-h.in: Don't give both prototypes and inlines, except on gcc.
+
+       * gmp-h.in, gmp-impl.h: Use #includes to get necessary standard
+       classes, add std:: to prototypes.
+       * cxx/*.cc, tests/cxx/t-ostream.cc: Add "use namespace std".
+       * acinclude.m4 (GMP_PROG_CXX_WORKS): Ditto.
+
+       * tests/*/Makefile.in, mpfr/tests/Makefile.in: Regenerate with
+       automake patch to avoid Ultrix problem with empty $(TESTS).
+
+       * */Makefile.in: Regenerate with automake patch to only rm *_.c in
+       "make clean" when ansi2knr actually in use, helps DOS 8.3.
+
+       * Makefile.in: Regenerate with automake patch to fix stamp-h
+       numbering, avoiding an unnecessary config.status run.
+
+2001-10-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/hppa/hppa1_1/udiv_qrnnd.asm: Use L macros for labels.
+       Quote L reloc operator.
+
+       * gmp-impl.h: Declare class string.
+
+       * mpn/asm-defs.m4 (INT32, INT64): Quote $1 to prevent further
+       expansion.
+
+       * mpn/alpha/ev6/mul_1.asm: New file.
+
+2001-10-09  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Introduction to GMP): Add pentium 4 to optimized CPUs.
+       (Build Options): Note macos directory.
+       (Notes for Package Builds): GMP 4 series binary compatible with 3.
+       (Known Build Problems): Remove $* and ansi2knr note, now fixed, except
+       possibly under --host=none.
+       (Formatted Output Strings): Remove -1 prec for all digits.
+
+       * mpz/add.c, mpz/sub.c: Don't use mpz path on #include (helps macos).
+       * mpbsd/Makefile.am (INCLUDES): Add -I$(top_srcdir)/mpz.
+
+       * printf/doprnt.c, tests/printf/t-printf.c: Remove support for %.*Fe
+       prec -1 meaning all digits.
+
+       * acinclude.m4 (GMP_PROG_AR): Override libtool, use AR_FLAGS="cq".
+       (GMP_HPC_HPPA_2_0): Print version string to config.log.
+
+       * Makefile.am (AUTOMAKE_OPTIONS): Remove check-news (permission notice
+       in NEWS file is too big).
+       (dist-hook): Don't distribute numbered or unnumbered emacs backups.
+
+       * Makefile.am, cxx/Makefile.am: Updates for Gerardo's stuff.
+
+2001-10-09  Gerardo Ballabio  <ballabio@sissa.it>
+
+       * cxx/isfuns.cc: New file.
+       * gmp-impl.h: Add prototypes.
+       * cxx/ismpf.cc, cxx/ismpq.cc, cxx/ismpz.cc: New files.
+       * gmp-h.in: Add prototypes.
+       * gmpxx.h, mpfrxx.h: New files.
+
+2001-10-08  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (with_tags): Establish a default based on --enable-cxx.
+
+       * aclocal.m4: Regenerate with libtool patches for sed char range to
+       help Cray, LTCC quotes and +Z warnings grep to help HP-UX.
+
+       * gmp-impl.h (doprnt_format_t, doprnt_memory_t, doprnt_reps_t,
+       doprnt_final_t): Use _PROTO.
+
+2001-10-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/asm-defs.m4 (INT32, INT64): Use LABEL_SUFFIX.
+
+       * mpn/hppa: Convert files to `.asm'.
+
+2001-10-05  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makeasm.am (.S files): Revert to separate CPP and CCAS, use
+       cpp-ccas, and only pass CPPFLAGS to CPP, not whole CFLAGS.
+       * mpn/cpp-ccas: New file.
+       * mpn/Makefile.am (EXTRA_DIST): Add it.
+
+       * tune/common.c, tune/speed.h: Change SPEED_ROUTINE_MPN_COPY_CALL uses
+       to SPEED_ROUTINE_MPN_COPY or new SPEED_ROUTINE_MPN_COPY_BYTES.  Avoids
+       macro expansion problems on Cray.
+
+       * configure.in (AC_PROG_CXXCPP): Add this, to make libtool happier.
+
+2001-10-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/rrandomb.c (gmp_rrandomb): Change bit_pos to be 0-based (was
+       1-based); shift 2 (was 1) when making bit mask.  These two changes
+       avoid undefined shift counts.
+       (gmp_rrandomb): Avoid most calls to _gmp_rand by caching random values.
+
+       * mpn/generic/random2.c: Changes for mirroring mpz/rrandomb.c.
+
+2001-10-04  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Add --enable-cxx.
+       (Notes for Particular Systems): Mention pentium4 performance and SSE2.
+       (Known Build Problems): Remove vax jsobgtr note, no longer needed.
+       (Converting Floats): Tweak mpf_get_str description.
+       (Low-level Functions): Correction to mpn_gcdext destination space
+       requirements.
+       (C++ Formatted Output): New section.
+       (Language Bindings): Add ALP
+       (Contributors): Add Paul Zimmermann's square root, update my things.
+
+       * acinclude.m4 (GMP_PROG_CC_IS_GNU, GMP_PROG_CXX_WORKS): Send compiler
+       errors to config.log.
+
+       * mpq/Makefile.am (INCLUDES): Remove -DOPERATION_$*, not needed.
+
+       * mpn/x86/*.asm: Change references to old README.family to just README.
+
+       * mpz/README: Remove file, now adequately covered in the manual.
+
+2001-10-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/copyi.asm: New file.
+       * mpn/x86/pentium4/copyd.asm: New file.
+
+       * gmp-impl.h: Implement separate MPN_COPY_INCR and MPN_COPY_DECR
+       macros for CRAY systems.
+       (CRAY _MPN_COPY): Delete.
+
+2001-10-02  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-popcount.c (check_data): Use "~ (unsigned long) 0" to
+       avoid compiler warnings on sco.
+
+       * mpbsd/Makefile.am: Compile mpz files directly, no copying.
+       Use mpz/add.c and mpz/sub.c rather than mpz/aors.c.
+       (INCLUDES): Remove -DOPERATION_$*, no longer needed (by mpz).
+
+       * mpz/aors.h: Renamed from mpz/aors.c.
+       * mpz/add.c, mpz/sub.c: New files, using mpz/aors.h.
+       * mpz/aors_ui.h: Renamed from mpz/aors_ui.c.
+       * mpz/add_ui.c, mpz/sub_ui.c: New files, using mpz/aors_ui.h.
+       * mpz/fits_s.h: Renamed and adapted from mpz/fits_s.c.
+       * mpz/fits_sshort.c, mpz/fits_sint.c, mpz/fits_slong.c: New files.
+       * mpz/mul_i.h: Renamed from mpz/mul_siui.c.
+       * mpz/mul_ui.c, mpz/mul_ui.c: New files, using mpz/mul_i.h.
+       * mpz/Makefile.am: Consequent updates.
+       (INCLUDES): Remove -DOPERATION_$*.
+
+       * mpf/fits_s.h: Renamed and adapted from mpf/fits_s.c.
+       * mpf/fits_sshort.c, mpf/fits_sint.c, mpf/fits_slong.c: New files.
+       * mpf/fits_u.h: Renamed and adapted from mpf/fits_u.c.
+       * mpf/fits_ushort.c, mpf/fits_uint.c, mpf/fits_ulong.c: New files.
+       * mpf/Makefile.am: Consequent updates.
+       (INCLUDES): Remove -DOPERATION_$*.
+
+       * cxx/osfuns.cc (__gmp_doprnt_params_from_ios): Don't use ios::hex etc
+       as cases in a switch, they're not constant in g++ 3.0.
+
+       * mpn/Makeasm.am (.s.o, .s.obj, .S.o, .S.obj, .asm.o, .asm.obj):
+       Locate source file with test -f the same as automake.
+       (.S): Let CCAS do the preprocessing, and run libtool for .S.lo.
+       (.asm.lo): Run libtool via m4-ccas to get new style foo.lo right.
+       (COMPILE_FLAGS): Add $(DEFAULT_INCLUDES), per new automake.
+       * mpn/m4-ccas: New file.
+       * mpn/Makefile.am (EXTRA_DIST): Add it.
+       * mpn/asm-defs.m4: Add m4_not_for_expansion(`DLL_EXPORT').
+       * mpn/x86/x86-defs.m4: Undefine PIC if DLL_EXPORT is set.
+       * configure.in (CFLAGS_PIC, ASMFLAGS_PIC): Remove, no longer needed.
+
+       * acinclude.m4 (GMP_FUNC_VSNPRINTF): Warn what's omitted when
+       vsnprintf not available.
+
+       * mpn/underscore.h: Remove file, not used since m68k converted to asm.
+       * mpn/Makefile.am (EXTRA_DIST): Remove it.
+
+       * tests/refmpz.c: Add <stdlib.h>, for free().
+
+2001-10-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/submul_1.asm: Apply some algebraic
+       simplifications.
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Comment.
+
+2001-10-01  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (--enable-cxx): New option for C++ support.
+       Add cxx and tests/cxx subdirectories.
+       * ltmain.sh, aclocal.m4: Update to libtool 2001-09-30.
+
+       * cxx/Makefile.am, cxx/Makefile.in, cxx/osdoprnti.cc, cxx/osfuns.cc,
+       cxx/osmpf.cc, cxx/osmpq.cc, cxx/osmpz.cc: New files.
+       * Makefile.am: Add them, in new libgmpxx.
+       * gmp-h.in, gmp-impl.h: Prototypes and support.
+       * tests/cxx/Makefile.am, tests/cxx/Makefile.in,
+       tests/cxx/t-ostream.cc: New files.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL,
+       SPEED_ROUTINE_MPN_GCDEXT_ONE): mpn_gcdext needs size+1 for
+       destinations.  Found by Torbjorn.
+
+       * gmp-h.in (__GNU_MP__, __GNU_MP_VERSION): Bump to 4.0.
+       * mp-h.in (__GNU_MP__): Ditto.
+       * gmp.texi, Makefile.am, compat.c: Amend version 3.2 to 4.0.
+
+       * acinclude.m4 (GMP_PROG_CXX_WORKS): New macro.
+       (GMP_PROG_CC_WORKS): Write "conftest" test program, not a.out.
+
+       * gmp-impl.h (struct gmp_asprintf_t): Moved from printf/vasprintf.c.
+       (GMP_ASPRINTF_T_INIT): New macro.
+       (GMP_ASPRINTF_T_NEED): New macro, adapted from vasprintf.c NEED().
+       * printf/vasprintf.c: Use these.
+
+       * printf/asprntffuns.c: New file.
+       * printf/Makefile.am, Makefile.am: Add it.
+       * printf/asprntffuns.c, printf/vasprintf.c, gmp-impl.h
+       (__gmp_asprintf_memory, __gmp_asprintf_reps, __gmp_asprintf_final):
+       Move to asprntffuns.c, rename to __gmp and make global, remove
+       spurious formal parameters from __gmp_asprintf_final.
+
+       * configure.in (j90-*-*, sv1-*-*): Don't duplicate $path in $add_path.
+       (*-*-mingw*): Don't assemble with -DPIC (as per cygwin).
+
+       * printf/snprntffuns.c (gmp_snprintf_final): Remove spurious formal
+       parameters.
+
+       * tune/tuneup.c (POWM_THRESHOLD): Reduce stop_factor to 1.1 to help
+       Cray vector systems.
+
+       * tests/misc.c (tests_rand_start): Print GMP_CHECK_RANDOMIZE=NN to
+       facilitate cut and paste when re-running.
+       * tests/mpz/t-inp_str.c (check_data): Add more diagnostic prints.
+
+2001-09-30  Kent Boortz  <kent@swox.com>
+
+       * macos/configure, macos/Makefile.in, macos/README: Updates for gmp 4.
+       * gmp-h.in (_GMP_H_HAVE_FILE): Recognise Apple MPW.
+
+2001-09-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/submul_1.c: Rewrite.  Streamline multiplications;
+       use `majority' logic.
+
+2001-09-27  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-h.in (__GMPN_AORS_1): Rewrite to work around Cray compiler bug.
+
+2001-09-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/gmp-mparam.h: New file.
+
+2001-09-26  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium4/sse2/dive_1.asm: New file.
+       * mpn/x86/pentium4/sse2/submul_1.asm: New file.
+       * mpn/x86/pentium4/sse2/sqr_basecase.asm: New file.
+
+       * mpn/x86/pentium/copyi.asm: New file, based on past work by Torbjorn.
+       * mpn/x86/pentium/copyi.asm: New file, ditto.
+       * mpn/x86/pentium/com_n.asm: Rewrite, ditto.
+
+       * printf/snprntffuns.c (gmp_snprintf_format): Copy va_list in case
+       vsnprintf trashes it.
+       * printf/vasprintf.c (gmp_asprintf_format): Ditto.
+       * gmp-impl.h, doprnt.c (va_copy): Move to gmp-impl.h.
+
+       * tests/mpz/t-cmp_d.c (check_low_z_one): Patch by Torbjorn for vax
+       limited float range.
+
+2001-09-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/vax/lshift.s: Change `jsob*' to `sob*'.
+       * mpn/vax/rshift.s: Likewise.
+
+2001-09-23  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium4/sse2/mul_basecase.asm: Some simple but real code.
+
+       * printf/doprnt.c: Use va_copy for va_list variables, copy function
+       parameter in case it's call-by-reference.
+
+       * tune/freq.c (speed_cpu_frequency_bsd_dmesg): New function.
+       (speed_cpu_frequency_table): Use it.
+
+       * tune/many.pl (popcount, hamdist): Fix declared return value.
+       (sb_divrem_mn): Remove a spurious duplicate entry.
+       (CLEAN): Add tmp-$objbase.c when using that for .h files.
+       (macro_speed): Give a default for .h files.
+       Add ATTRIBUTE_CONST or __GMP_ATTRIBUTE_PURE as appropriate.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MOD_CALL,
+       SPEED_ROUTINE_MPN_PREINV_MOD_1, SPEED_ROUTINE_MPN_POPCOUNT,
+       SPEED_ROUTINE_MPN_HAMDIST, SPEED_ROUTINE_MPN_GCD_1N,
+       SPEED_ROUTINE_MPN_GCD_1_CALL, SPEED_ROUTINE_MPZ_JACOBI): Use return
+       values so gcc 3 won't discard calls to pure or const functions.
+       (mpn_mod_1_div, mpn_mod_1_inv): Add __GMP_ATTRIBUTE_PURE.
+
+2001-09-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/mul_basecase.asm: New file, placeholder
+       for real code, hiding the default x86 mul_basecase.asm.
+
+2001-09-22  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_PREREQ): Bump to 2.52.
+       (m4_pattern_forbid, m4_pattern_allow): New calls, forbid GMP_.
+       (AC_CHECK_HEADERS): Remove sys/types.h, already done by autoconf.
+       * acinclude.m4, configure.in (GMP_GCC_NO_CPP_PRECOMP): New macro.
+
+       * tests/devel/try.c (TYPE_PREINV_MOD_1): Don't run size==0.
+       (malloc_region): Need fd=-1 for mmap MAP_ANON on BSD.
+
+2001-09-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/cong.c (mpz_congruent_p): Fix one-limb c<d test.
+
+       * longlong.h: Rewrite __i370__ smul_ppmm; enable also for __s390__.
+
+       * configure.in: Add support for IBM 360, 370, 390 families.
+
+2001-09-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium4/sse2/diveby3.asm: New file.
+       * mpn/x86/pentium4/sse2/mode1o.asm: New file.
+
+2001-09-16  Kevin Ryde  <kevin@swox.se>
+
+       * printf/doprnt.c: '#' means showpoint and showtrailing for %e, %f, %g.
+       * tests/printf/t-printf.c (check_f): More test cases.
+
+2001-09-15  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-h.in (__GMPN_AORS_1): Remove param TEST, add OP and CB.
+       Postpone zeroing of (cout).
+       (__GMPN_ADD_1, __GMPN_SUB_1): Corresponding changes.
+
+2001-09-14  Kevin Ryde  <kevin@swox.se>
+
+       * ChangeLog: Merge in tests/rand/ChangeLog.
+       * tests/rand/ChangeLog: Remove file.
+
+       * printf/doprnt.c: Fix handling of a plain format after a GMP one; no
+       need to protect against negative precision internally.
+       * tests/printf/t-printf.c (check_misc): More checks.
+
+2001-09-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/invert_limb.c: Add a PROLOGUE in a comment to have
+       HAVE_NATIVE_... defined.
+
+2001-09-11  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, gmp-h.in (__GMP_HAVE_HOST_CPU_FAMILY_power,
+       __GMP_HAVE_HOST_CPU_FAMILY_powerpc): New AC_SUBSTs.
+       * gmp-h.in (__GMPN_COPY_INCR): Use them to select the power/powerpc
+       code, rather than preprocessor defines.
+
+       * acinclude.m4, configure.in (GMP_H_ANSI): New macro.
+
+       * gmp-h.in (__GMP_EXTERN_INLINE): Add a definition for SCO 8 cc.
+
+       * gmp-h.in, version.c (gmp_version): Make the pointer "const" as well
+       as the string.
+
+       * acinclude.m4, configure.in (GMP_PROG_CC_IS_XLC): Recognise xlc when
+       invoked under another name (cc, xlc128, etc).
+       * acinclude.m4 (GMP_PROG_CC_IS_GCC): Print a message when recognised.
+
+2001-09-11  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-h.in: Let __DECC mean __GMP_HAVE_CONST, etc.
+       * mp-h.in: Likewise.
+
+2001-09-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/mmx/lshift.asm: New file.
+       * mpn/x86/pentium4/mmx/rshift.asm: New file.
+
+       * tests/mpn/t-iord_u.c (check_incr_data): Work around HP compiler bug.
+       (check_decr_data): Likewise.
+
+2001-09-08  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Integer Logic and Bit Fiddling): Update mpz_hamdist
+       behaviour, clarify mpz_popcount a touch.
+       (Language Bindings): Add mlton, fix alphabetical order.
+       (Single Limb Division): Describe 2 or 1/2 limbs at a time style.
+
+       * configure.in (AC_CHECK_FUNCS): Add mmap.
+       * tests/devel/try.c (malloc_region): Use mmap if available.
+
+       * tests/refmpz.c, tests/tests.h (refmpz_hamdist): New function.
+       * tests/mpz/t-hamdist.c: New file.
+       * tests/mpz/Makefile.am: Add it.
+
+       * mpz/hamdist.c: Support neg/neg operands.
+
+       * macos/Makefile.in: Remove dual compile of mpq/aors.c and
+       mpn/generic/popham.c.
+
+       * gmp-impl.h (popc_limb): New macro, adapted from mpn/generic/popham.c.
+       For 64-bits reuse 0x33...33 constant.
+       * mpn/generic/popcount.c, mpn/generic/hamdist.c: Split from popham.c,
+       use popc_limb macro, remove unused "i", don't bother with "register"
+       qualifiers.
+       * mpn/generic/popham.c: Remove file.
+
+       * ltmain.sh, configure, aclocal.m4: Update to libtool 1.4.1, with one
+       ltdll.c generation patch.
+       * doc/configuration: Misc updates, note libtool patch used.
+
+       * mpn/x86/pentium4/sse2/mul_1.asm: Use pointer increments not indexed
+       addressing, to get 4.0 c/l flat.
+
+       * tests/mpq/t-cmp_si.c (check_data): Use ULONG_MAX for denominators.
+
+       * tests/misc.c (mpz_negrandom): Use given rstate, not RANDS.
+
+2001-09-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/pentium4/sse2/addmul_1.asm: New file.
+
+2001-09-04  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c: Define a HAVE for each speed_cpu_frequency routine to
+       avoid duplicating conditionals.
+       (speed_cpu_frequency_sco_etchw): New function.
+       (speed_cpu_frequency_table): Use it.
+       * tune/README: Mention SCO openunix 8 /etc/hw.
+
+       * mpz/fib_ui.c: Use ?: to avoid a gcc 3 bug on powerpc64.
+       Store back a carry for limb<long.
+
+       * mpn/x86/k7/mmx/divrem_1.asm, mpn/x86/k7/mmx/mod_1.asm,
+       mpn/x86/p6/mmx/divrem_1.asm: Fix a couple of comments.
+
+       * config.guess: Give m68020 for 68020 or better, not m68k.
+       * configfsf.guess: Update to 2001-09-04.
+
+2001-09-02  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (m68k-*-*): Let m68k mean 68000, not 68020.
+       * gmp.texi (Notes for Particular Systems): Update.
+
+       * gmp-impl.h (union ieee_double_extract) [m68k]: Use longs, since int
+       might be only 16 bits.
+
+       * tests/mpq/t-aors.c: New file.
+       * tests/mpq/Makefile.am: Add it.
+
+       * tests/refmpq.c: New file.
+       * tests/Makefile.am: Add it.
+       * tests/tests.h: Add prototypes.
+
+       * mpq/aors.c: Share object code for mpq_add and mpq_sub.
+       * Makefile.am, mpq/Makefile.am: Single mpq/aors.lo now.
+
+       * tests/devel/try.c (TYPE_SUBMUL_1): Use correct reference routine.
+
+2001-08-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/x86-defs.m4 (cmov_available_p): Add pentium4.
+
+       * gmp-h.in: Put #define renamings with prototypes.
+       Remove commented out #defines of gmp-impl.h things.
+       (mpn_invert_limb): Remove #define, already in gmp-impl.h.
+       (mpn_lshiftc, mpn_rshiftc): Remove #defines, unused.
+       (mpn_addsub_nc): Add prototype to #define.
+
+2001-08-28  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi: Switch to GFDL.
+       (Top): Arrange copyright and conditions to appear here too.  For
+       clarity have all this before the miscellaneous macro definitions.
+       (Copying): Refer to COPYING.LIB file, mention plain GPL2 in demo
+       programs.
+       (Contributors, References): Use @appendix rather than @unnumbered.
+       (GNU Free Documentation License): New appendix.
+       (@contents): Move to start of document, use only for tex (not html).
+       (Debugging): Add leakbug.
+       (Build Options): Add pentium4.
+       (I/O of Rationals): Add mpq_inp_str.
+
+       * fdl.texi: New file, with two @appendix directive tweaks.
+       * Makefile.am (gmp_TEXINFOS): Add it.
+
+       * tests/mpz/io.c: Check mpz_inp_str return against ftell, send error
+       messages just to stdout.
+
+       * mpz/inp_str.c, gmp-impl.h (__gmpz_inp_str_nowhite): New function,
+       and share a __gmp_free_func call.
+       * mpq/inp_str.c: New file.
+       * Makefile.am, mpq/Makefile.am: Add it.
+       * tests/mpq/t-inp_str.c: New file.
+       * tests/mpq/Makefile.am (check_PROGRAMS): Add it.
+
+       * configure.in, acconfig.h (HAVE_HOST_CPU_FAMILY_power,
+       HAVE_HOST_CPU_FAMILY_powerpc, HAVE_HOST_CPU_FAMILY_x86): AC_DEFINEs
+       for processor families.
+       * gmp-impl.h: Use them, rather than cpp defines.
+
+       * demos/Makefile.am (primes_LDADD): Use $(LIBM), for log().
+
+       * tune/many.pl, tune/Makefile.am: Fix some from clean and distclean.
+
+2001-08-26  Kevin Ryde  <kevin@swox.se>
+
+       * tests/devel/try.c (ARRAY_ITERATION): Make types match on "?:" legs.
+       (TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER): Remove some superseded code.
+
+       * tests/printf/t-printf.c (check_plain): Don't compare "all digits"
+       precision against plain printf.
+
+       * tune/Makefile.am: Eliminate empty TUNE_MPZ_SRCS.
+
+       * configure, config.in, INSTALL.autoconf: Update to autoconf 2.52.
+       * */Makefile.in, mdate-sh, missing, aclocal.m4, configure: Update to
+       automake 1.5.
+       * configfsf.guess, configfsf.sub: Update to 2001-08-23.
+
+2001-08-24  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/primes.c: Complete rewrite.
+
+2001-08-24  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h: Test __ppc__ for apple darwin cc, reported by Jon
+       Becker.  Also test __POWERPC__, PPC and __vxworks__.
+
+       * tune/speed.h (speed_cyclecounter) [x86]: Don't clobber ebx in PIC.
+
+2001-08-22  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (x86 mmx): Correction to mmx path stripping.
+
+2001-08-17  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, acinclude.m4, Makefile.am, printf/Makefile.am,
+       tests/printf/Makefile.am, gmp-h.in, gmp-impl.h, gmp.texi: Remove C++
+       support, for the time being.
+       * printf/doprntfx.cc, doprntix.cc, osfuns.cc, osmpf.cc, osmpq.cc,
+       osmpz.cc, tests/printf/t-ostream.cc: Remove files.
+
+       * printf/doprnt.c, printf/doprntf.c, gmp-impl.h: Use a single
+       __gmp_doprnt_mpf, rather than a separate ndigits calculation.
+       * printf/doprnt.c, printf/doprntf.c, gmp-impl.h, gmp.texi,
+       tests/printf/t-printf.c: Let empty or -1 prec mean all digits for mpf.
+       * printf/doprnt.c, tests/printf/t-printf.c: Accept h or l in %n; let
+       negative "*" style width mean left justify.
+
+       * gmp-impl.h, mpf/get_str.c (MPF_SIGNIFICANT_DIGITS): New macro,
+       extracted from mpf/get_str.c.
+
+       * libmp.sym: New file.
+       * Makefile.am (libmp_la_LDFLAGS): Use it.
+       (DISTCLEANFILES): Remove asm-syntax.h, no longer generated.
+       Remove some comments about "make check".
+
+       * demos/perl/GMP.pm, GMP.xs, GMP/Mpf.pm: Add printf and sprintf,
+       change get_str to string/exponent for floats, remove separate
+       mpf_get_str.
+       * demos/perl/GMP/Mpf.pm (overload_string): Use $# (default "%.g").
+       * demos/perl/typemap: Fix some duplicate string entries.
+       * demos/perl/test.pl: Update tests, split overloaded constants into ...
+       * demos/perl/test2.pl: ... this new file.
+       * demos/perl/Makefile.PL (clean): Add test.tmp.
+
+2001-08-16  Kevin Ryde  <kevin@swox.se>
+
+       * printf/snprntffuns.c (gmp_snprintf_format): Correction to bufsize-1
+       return value handling.
+
+       * demos/calc/calc.y: Reposition "%{" so copyright notice gets into
+       generated files.
+
+       * INSTALL: Use gmp_printf.
+
+2001-08-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/inp_str.c: Fix return value (was 1 too big).
+       * tests/mpz/t-inp_str.c: New file.
+       * tests/mpz/Makefile.am: Add it.
+
+       * mpn/x86/pentium4/sse2/add_n.asm: New file.
+       * mpn/x86/pentium4/sse2/sub_n.asm: New file.
+       * mpn/x86/pentium4/sse2/mul_1.asm: New file.
+
+2001-08-12  Kevin Ryde  <kevin@swox.se>
+
+       * printf/sprintffuns.c, printf/doprntf.c: Don't use sprintf return
+       value (it's a pointer on SunOS 4).
+
+       * acinclude.m4 (GMP_ASM_X86_SSE2, GMP_STRIP_PATH): New macros.
+       * configure.in: Add pentium4 support.
+       * mpn/x86/pentium4, mpn/x86/pentium4/mmx, mpn/x86/pentium4/sse2: New
+       directories.
+       * mpn/x86/README: Update.
+
+2001-08-10  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (setup_error_handler): Catch also SIGABRT.
+
+2001-07-31  Kevin Ryde  <kevin@swox.se>
+
+       * tests/refmpn.c (refmpn_mul_1c): Allow low to high overlaps.
+
+       * gmp-h.in, gmp-impl.h (_gmp_rand): Move prototype to gmp-impl.h.
+
+       * tune/Makefile.am (EXTRA_DIST): Add many.pl.
+
+2001-07-28  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Random Number Functions): Old rand functions no longer use
+       the C library.
+
+       * configure.in, acinclude.m4 (GMP_FUNC_VSNPRINTF): New macro.
+
+       * mpn/generic/get_str.c: Add an ASSERT for high limb non-zero.
+
+2001-07-24  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Add --enable-cxx.
+       (Converting Floats): Note mpf_get_str only generates accurately
+       representable digits.
+       (Low-level Functions): Note mpn_get_str requires non-zero high limb.
+       (Formatted Output): New chapter.
+       (Multiplication Algorithms): Use @quotation with @multitable.
+       (Toom-Cook 3-Way Multiplication): Ditto.
+
+       * tests/memory.c (tests_free_nosize): New function.
+       * tests/tests.h (tests_allocate etc): Add prototypes.
+
+       * tests/printf: New directory.
+       * tests/printf/Makefile.am, t-printf.c, t-ostream.cc: New files.
+       * configure.in, tests/Makefile.am: Add them.
+
+       * configure.in, acinclude.m4 (GMP_PROG_CXX): New macro.
+       * configure.in (--enable-cxx): New option.
+       (AC_CHECK_HEADERS): Add locale.h and sys/types.h, remove unistd.h.
+       (AC_CHECK_TYPES): Add intmax_t, long double, long long, ptrdiff_t,
+       quad_t.
+       (AC_CHECK_FUNCS): Add localeconv, memset, obstack_vprintf, snprintf,
+       strchr, vsnprintf.
+       (AC_CHECK_DECLS): Add vfprintf.
+
+       * gmp-h.in, gmp-impl.h: Additions for gmp_printf etc.
+
+       * printf: New directory.
+       * printf/Makefile.am, asprintf.c, doprnt.c, doprntf.c, doprntfx.cc,
+       doprnti.c, doprntix.cc, fprintf.c, obprintf.c, obprntffuns.c,
+       obvprintf.c, osfuns.cc, osmpf.cc, osmpq.cc, osmpz.cc, printf.c,
+       printffuns.c, snprintf.c, snprntffuns.c, sprintf.c, sprintffuns.c,
+       vasprintf.c, vfprintf.c, vprintf.c, vsnprintf.c, vsprintf.c: New
+       files.
+       * configure.in, Makefile.am: Add them.
+
+       * configure.in (HAVE_INLINE): Remove AC_DEFINE, unused.
+       (AC_CHECK_TYPES): Don't test for void, assume it always exists.
+
+       * gmp-impl.h (__GMP_REALLOCATE_FUNC_MAYBE): New macro.
+       * mpz/get_str.c, mpq/get_str.c, mpf/get_str.c: Use it.
+
+       * gmp-impl.h (mpn_fib2_ui): Use __MPN.
+       (MPN_COPY_DECR): Fix an ASSERT.
+       (CAST_TO_VOID): Remove macro.
+
+       * gmp-h.in (mpq_out_str): Give #define even without prototype.
+       (mpz_cmp_d, mpz_cmpabs_d): Corrections to #defines.
+
+       * tests/devel/try.c: Add mpn_add and mpn_sub, don't use CAST_TO_VOID.
+
+2001-07-23  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Recognize pentium4.
+       * config.sub: Recognize pentium4.
+
+2001-07-17  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (__GMPN_AORS_1): Remove x86 and gcc versions, leave just
+       one version.
+       (__GMPN_ADD, __GMPN_SUB): New macros, rewrite of mpn_add and mpn_sub.
+       (mpn_add, mpn_sub): Use them.
+       (__GMPN_COPY_REST): New macro.
+
+       * gmp-h.in, gmp-impl.h, acinclude.m4: Remove __GMP_ASM_L and
+       __GMP_LSYM_PREFIX, revert to ASM_L in gmp-impl.h and AC_DEFINE of
+       LSYM_PREFIX.
+
+2001-07-11  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (__GMPN_ADD_1 etc) [x86]: Don't use this on egcs 2.91.
+
+       * mpz/fits_uint.c, fits_ulong.c, mpz/fits_ushort.c: Split up fits_u.c.
+       * mpz/fits_u.c: Remove file.
+       * mpz/Makefile.am, macos/Makefile.in: Update.
+
+       * tests/refmpn.c,tests.h (refmpn_copy): New function.
+       * tests/devel/try.c (TYPE_ZERO): No return value from call.
+       (TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD): Share call with
+       TYPE_MOD_1 and TYPE_MOD_1C.
+       (MPN_COPY, __GMPN_COPY, __GMPN_COPY_INCR): Add testing.
+
+2001-07-10  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (__GMPN_COPY): Add form to help gcc on power and powerpc.
+       * gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR, MPN_ZERO): Ditto.
+       * mpn/powerpc64/copyi.asm, mpn/powerpc64/copyd.asm: Remove files.
+
+       * mpz/tdiv_ui.c: Eliminate some local variables (seems to save code on
+       i386 gcc 2.95.x), remove a bogus comment about quotient.
+
+       * errno.c, gmp-impl.h (__gmp_exception, __gmp_divide_by_zero,
+       __gmp_sqrt_of_negative): New functions.
+       * gmp-impl.h (GMP_ERROR, DIVIDE_BY_ZERO, SQRT_OF_NEGATIVE): Use them.
+
+       * randclr.c, randraw.c: Use ASSERT(0) for unrecognised algorithms.
+
+2001-07-07  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (powerpc*-*-*): Use -no-cpp-precomp for Darwin.
+
+       * tests/mpbsd/t-itom.c: Renamed from t-misc.c.
+       * tests/mpbsd/t-misc.c: Remove file.
+       * tests/mpbsd/Makefile.am: Update.
+
+       * tests/mpf/t-set_si.c,t-cmp_si.c,t-gsprec.c: Split from t-misc.c.
+       * tests/mpf/t-misc.c: Remove file.
+       * tests/mpf/Makefile.am: Update.
+
+       * tests/mpz/t-oddeven.c,t-set_si.c,t-cmp_si.c: Split from t-misc.c.
+       * tests/mpz/t-misc.c: Remove file.
+       * tests/mpz/Makefile.am: Update.
+
+       * stack-alloc.c: Add some alignment ASSERTs.
+
+       * gmp-impl.h (MPN_NORMALIZE): Add notes on x86 repe/scasl slow.
+
+       * tests/devel/try.c (MPN_ZERO): Add testing.
+       * tune/speed.c,speed.h,common.c,many.pl (MPN_ZERO): Add measuring.
+
+       * mpn/x86/divrem_1.asm: Update a remark about gcc and "loop".
+
+       * tests/mpq/t-cmp_si.c: New file.
+       * tests/mpq/Makefile.am: Add it.
+
+       * tests/misc.c,tests.h (mpq_set_str_or_abort): New function.
+
+       * mpq/cmp_si.c: New file.
+       * Makefile.am, mpq/Makefile.am: Add it.
+       * gmp-h.in (mpq_cmp_si): Add prototype.
+       * gmp.texi (Comparing Rationals): Add doco.
+
+       * gmp-h.in (_GMP_H_HAVE_FILE): Add _FILE_DEFINED for microsoft, add
+       notes on what symbols are for what systems.
+
+2001-07-06  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (ibm032 umul_ppmm): Fix typo.
+       * longlong.h (sparclite sdiv_qrnnd): Fix typo.
+
+2001-07-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/bin_ui.c (DIVIDE): Use MPN_DIVREM_OR_DIVEXACT_1.
+       * mpz/bin_uiui.c (MULDIV): Ditto, and use local variables for size and
+       pointer.
+
+       * acinclude.m4 (GMP_INCLUDE_GMP_H): New macro, use it everywhere gmp.h
+       is wanted at configure time.
+       * acinclude.m4, configure.in (GMP_H_EXTERN_INLINE, GMP_H_HAVE_FILE):
+       New macros.
+
+       * gmp-h.in (__GMP_EXTERN_INLINE): Set to "inline" for C++.
+       (mpn_add, mpn_sub): Use new style __GMP_EXTERN_INLINE.
+       * gmp-h.in, mp-h.in, gmp-impl.h (_EXTERN_INLINE): Remove, unused.
+       * mpn/generic/add.c, mpn/generic/sub.c: New files.
+       * mpn/generic/inlines.c: Remove file.
+       * configure.in, mpn/Makefile.am: Update.
+
+       * gmp.texi (GMP Basics): Note the need for stdio.h to get FILE
+       prototypes.
+
+2001-07-01  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options, Reentrancy): Updates for new
+       --enable-alloca behaviour.
+       (Debugging): Describe --enable-alloca=debug.
+       (Miscellaneous Integer Functions): Note mpz_sizeinbase ignores signs.
+       (Low-level Functions): Give a formula for mpn_gcdext cofactor.
+       (Factorial Algorithm): New section.
+       (Binomial Coefficients Algorithm): New section.
+       Misc tweaks elsewhere.
+
+       * mpf/set_prc.c: Merge the two truncation conditionals, misc cleanups,
+       no functional changes.
+
+       * mpn/*/gmp-mparam.h (DIVEXACT_1_THRESHOLD): Add tuned values.
+       * gmp-impl.h (DIVEXACT_1_THRESHOLD): Make the default 0 when
+       2*UMUL_TIME < UDIV_TIME.
+
+       * mpn/x86/p6/dive_1.asm: New file.
+
+       * mpn/x86/dive_1.asm: New file.
+       * mpn/x86/gmp-mparam.h (DIVEXACT_1_THRESHOLD): Use it always.
+
+       * tests/refmpn.c, tests.h (refmpn_zero): New function.
+       * tests/devel/try.c: Use it.
+
+       * tests/refmpn.c (refmpn_sb_divrem_mn): Use refmpn_cmp, not mpn_cmp.
+
+       * tests/mpf/t-get_d.c (main): Use || not |.
+
+       * tests/misc.c, tests/t-modlinv.c, tests/mpq/t-get_str.c,
+       tests/mpf/reuse.c: Add string.h.
+
+2001-06-29  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_FIB2_UI,
+       SPEED_ROUTINE_COUNT_ZEROS_C): Corrections to TMP block handling.
+
+       * gmp-impl.h (MPN_TOOM3_MUL_N_MINSIZE, MPN_TOOM3_SQR_N_MINSIZE):
+       Corrections to these to account for adding tD into E.
+       (MPN_INCR_U, MPN_DECR_U) [WANT_ASSERT]: Add size
+       assertions, since mpn_add_1 and mpn_sub_1 from gmp.h don't get them.
+       (MPN_DIVREM_OR_DIVEXACT_1): Add an assert of no remainder.
+
+       * assert.c: Add stdlib.h for abort prototype.
+       * tests/spinner.c, trace.c, t-constants.c, t-count_zeros.c,
+       t-gmpmax.c, t-modlinv.c: Ditto.
+       * tests/mpz/t-bin.c, t-cmp.c, t-get_si.c, t-misc.c, t-popcount.c,
+       t-set_str.c, t-sizeinbase.c: Ditto.
+       * tests/mpq/t-equal.c, t-get_str.c, t-set_f.c, t-set_str.c: Ditto.
+       * tests/mpf/t-fits.c, t-get_d.c, t-get_si.c, t-int_p.c, t-misc.c,
+       t-trunc.c: Ditto.
+       * tests/mpbsd/allfuns.c, t-misc.c: Ditto.
+
+       * mpn/generic/mul_n.c, mpz/cfdiv_r_2exp.c: Use MPN_INCR_U rather than
+       mpn_incr_u.
+
+       * tests/devel/try.c (TYPE_SB_DIVREM_MN): More fixes for calling method.
+
+       * mpn/x86/k6/cross.pl: More insn exceptions.
+
+2001-06-23  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (__GMPN_ADD_1, __GMPN_SUB_1) [i386]: Fix some asm output
+       constraints.
+
+       * gmp-impl.h (modlimb_invert): Mask after shifting, so mask constant
+       fits a signed byte.
+
+       * tests/devel/try.c (TYPE_SB_DIVREM_MN): Fix initial fill of quotient
+       with garbage.
+
+2001-06-20  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (rs6000-*-aix4* | powerpc-*-aix4*): Suppress error
+       messages if $CC_FOR_BUILD or program don't work.
+
+       * mpz/sqrt.c,sqrtrem.c: Special case for op==0, to avoid TMP_ALLOC(0).
+       * tests/refmpf.c (refmpf_add, refmpf_sub): Avoid TMP_ALLOC(0).
+
+       * tests/mpn/t-aors_1.c: New file.
+       * tests/mpn/Makefile.am: Add it.
+
+       * gmp-h.in (__GMPN_ADD_1, __GMPN_SUB_1): New macros, rewrite of
+       mpn_add_1 and mpn_sub_1, better code for src==dst and/or n==1,
+       separate versions for gcc x86, gcc generic, and non-gcc.
+       (mpn_add_1, mpn_sub_1): Use them.
+       (mpn_add, mpn_sub): Ditto, to get inlines on all compilers.
+       (extern "C") [__cplusplus]: Let this encompass the extern inlines too.
+       * mpn/generic/add_1.c,sub_1.c: New files, force code from gmp.h.
+       * configure.in, mpn/Makefile.am: Add them.
+
+       * acinclude.m4 (GMP_ASM_LSYM_PREFIX): AC_SUBST __GMP_LSYM_PREFIX
+       rather than AC_DEFINE LSYM_PREFIX.
+       * gmp-h.in (__GMP_LSYM_PREFIX): New substitution.
+       (__GMP_ASM_L): New macro.
+       * gmp-impl.h (ASM_L): Use it.
+
+       * acinclude.m4, configure.in (GMP_C_ATTRIBUTE_MALLOC): New macro.
+       * gmp-impl.h: Use it for all the malloc based TMP_ALLOCs.
+
+       * stack-alloc.h: Remove file.
+       * tal-reent.c: New file.
+       * Makefile.am: Update.
+
+       * acinclude.m4, configure.in (GMP_OPTION_ALLOCA): New macro, add
+       malloc-reentrant method, use stack-alloc.c as malloc-notreentrant,
+       make "reentrant" the default.
+       * gmp-impl.h (__TMP_ALIGN): Moved from stack-alloc.c, use a union to
+       determine the value, and demand only 4 bytes align on 32-bit systems.
+       * gmp-impl.h (WANT_TMP_NOTREENTRANT): Move global parts of
+       stack-alloc.h to here, allow non power-of-2 __TMP_ALIGN in TMP_ALLOC.
+       * gmp-impl.h: Extend extern "C" to TMP_ALLOC declarations.
+       * stack-alloc.c (tmp_stack): Move private parts of stack-alloc.h to
+       here, use gmp-impl.h.
+
+       * gmp-impl.h (TMP_ALLOC_LIMBS_2): New macro.
+       * mpz/fib_ui.c, mpz/jacobi.c, mpq/cmp.c, mpn/generic/fib2_ui.c: Use it.
+
+       * mpfr/exp2.c: Patch by Paul to match TMP_MARK and TMP_FREE in loop.
+       * mpfr/sqrt.c: Scope nested TMP_DECL into nested { } block, patch by
+       Paul, tweaked by me.
+       * mpfr/agm.c: Ditto, and add a final TMP_FREE(marker2).
+
+       * gmp-h.in (mpn_cmp): Add __GMP_ATTRIBUTE_PURE.
+
+       * INSTALL: Clarify "make install", tweak formatting a bit.
+
+2001-06-17  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, Makefile.am, gmp-impl.h: Add a debugging TMP_ALLOC,
+       selected with --enable-alloca=debug.
+       * tal-debug.c: New file.
+       * configure.in, Makefile.am: Compile stack-alloc.c only for
+       --disable-alloca.
+       * assert.c (__gmp_assert_header): New function, split from
+       __gmp_assert_fail.
+
+       * mpz/lcm.c: Don't TMP_MARK and then just return. Remove unnecessary
+       _mpz_realloc prototype.
+
+       * mpn/generic/mul.c (mpn_sqr_n): Use __gmp_allocate_func for toom3
+       temporary workspace.
+
+2001-06-15  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-set_f.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/set_f.c: Share MPN_COPY between pad and trunc cases, do exp<=0
+       test earlier, store SIZ(w) earlier.
+
+       * tests/t-count_zeros.c: New file.
+       * tests/t-gmpmax.c: New file.
+       * tests/Makefile.am (check_PROGRAMS): Add them.
+
+       * mp_clz_tab.c: Compile the table only if longlong.h says it's needed;
+       add an internal-use-only comment.
+       * tune/common.c: Force a __clz_tab for convenience when testing.
+
+       * mpn/x86/pentium/gmp-mparam.h, mpn/x86/pentium/mmx/gmp-mparam.h: Add
+       COUNT_LEADING_ZEROS_NEED_CLZ_TAB, for mod_1.asm.
+
+       * longlong.h (count_leading_zeros) [pentium]: Decide to go with float
+       method for p54.
+       (count_leading_zeros) [alpha]: Add COUNT_LEADING_ZEROS_NEED_CLZ_TAB.
+       (__clz_tab): Provide a prototype only if it's needed.
+
+       * tests/trace.c (mpz_trace): Don't use = on structures.
+       (mpn_trace): Set _mp_alloc when creating mpz.
+
+2001-06-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/divrem_1.asm: Amend some comments about P5 speed.
+
+       * tune/README: Clarify reconfigure on gmp-mparam.h update.
+
+       * mpn/x86/p6/copyd.asm: New file.
+       * mpn/x86/p6/README: Update copyd and mod_1.
+       * mpn/x86/copyd.asm: Amend some comments.
+
+       * gmp-impl.h (__builtin_constant_p): Add dummy for non-gcc.
+       (mpn_incr_u, mpn_decr_u): Recognise incr==1 at compile time in the
+       generic code on gcc.
+
+       * gmp-impl.h (ASSERT_ZERO_P, ASSERT_MPN_NONZERO_P): New macros.
+       * mpn/generic/gcd_1.c, mpn/generic/mul_fft.c: Use them.
+       * mpz/get_d.c: Add a private mpn_zero_p.
+       * mpfr/trunc.c: Use own mpn_zero_p.
+       * tune/speed.h (SPEED_ROUTINE_MPN_GCD_1N): Use refmpn_zero_p.
+       * gmp-impl.h (mpn_zero_p): Remove, no longer needed.
+
+       * gmp-h.in, gmp-impl.h: Move MPN_CMP to gmp.h as __GMPN_CMP, leave an
+       MPN_CMP alias in gmp-impl.h.
+       * gmp-h.in (mpn_cmp): Add an inline version.
+       * mpn/generic/cmp.c: Use __GMP_FORCE_mpn_cmp to get code from gmp.h.
+
+       * acinclude.m4 (GMP_C_ATTRIBUTE_MODE): New macro.
+       * configure.in: Call it.
+       * gmp-impl.h (SItype etc): Use it.
+
+       * randraw.c (lc): Change mpn_mul_basecase->mpn_mul,
+       mpn_incr_u->MPN_INCR_U, abort->ASSERT_ALWAYS(0).
+
+       * longlong.h (count_leading_zeros) [pentiumpro]: Work around a partial
+       register stall on gcc < 3.
+
+       * gmp.texi (Introduction to GMP): Add IA-64.
+       (Notes for Particular Systems): i386 means generic x86.
+
+       * tests/t-modlinv.c: Use tests_start and tests_end.
+
+2001-06-10  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Number Theoretic Functions): mpz_jacobi only defined for b
+       odd.  Separate the jacobi/legendre/kronecker descriptions.
+       (Low-level Functions): Document mpn_mul_1 "incr" overlaps.
+       (Language Bindings): New chapter.
+
+       * mpz/jacobi.c: Don't retaining old behaviour of mpz_jacobi on even b
+       (it wasn't documented in 3.1.1).
+       * mpz/jacobi.c, gmp-h.in (mpz_kronecker, mpz_legendre): Remove
+       separate entrypoints, just #define to mpz_jacobi.
+       * compat.c (__gmpz_legendre): Add compatibility entrypoint.
+
+       * mpn/generic/mul_1.c: Allow "incr" style overlaps.
+       * tests/devel/try.c (param_init): Test this.
+
+       * mpf/mul_ui.c: Do size==0 test earlier.
+
+2001-06-08  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (ULONG_HIGHBIT, UINT_HIGHBIT, USHRT_HIGHBIT): Cast
+       ULONG_MAX etc to unsigned long etc before attempting to right shift.
+
+       * acinclude.m4 (GMP_ASM_LSYM_PREFIX): Add an AC_DEFINE of LSYM_PREFIX.
+       * gmp-impl.h (ASM_L): New macro.
+       (mpn_incr_u, mpn_decr_u, MPN_INCR_U, MPN_DECR_U): Add i386 optimized
+       versions.
+
+       * mpn/hppa/*.s,S,asm: Use .label so the code works with gas on hppa
+       GNU/Linux too, reported by LaMont Jones <lamont@smallone.fc.hp.com>.
+       * mpn/hppa/README: Add some notes on this.
+       * acinclude.m4 (GMP_ASM_LABEL_SUFFIX): Ditto.
+
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add dive_1.c,
+       fib2_ui.c.
+
+       * tests/mpn/t-iord_u.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add it.
+
+       * configure.in (mips*-*-irix[6789]*): Make ABI=n32 the default, same
+       as in gmp 3.1.
+       * gmp.texi (ABI and ISA): Update.
+
+       * gmp.texi (Build Options): Misc tweaks.
+       (Notes for Particular Systems): Describe windows DLL handling.
+       (Known Build Problems): DJGPP needs bash 2.04.
+       (Number Theoretic Functions): mpz_invert returns 0<=r<modulus; add
+       mpz_fib2_ui, mpz_lucnum_ui, mpz_lucnum2_ui.
+       (Fibonacci Numbers Algorithm): Update for new formulas used.
+       (Lucas Numbers Algorithm): New section.
+
+       * tune/speed.c,speed.h,common.c,many.pl: Add mpn_fib2_ui, mpz_fib2_ui,
+       mpz_lucnum_ui, mpz_lucnum2_ui.
+       * demos/expr/exprz.c,README: Add lucnum.
+       * demos/perl/GMP.pm,GMP.xs,GMP/Mpz.pm,test.pl: Add fib2, lucnum,
+       lucnum2.
+
+       * tests/mpz/t-lucnum_ui.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+       * tests/mpz/t-fib_ui.c: Check mpz_fib2_ui too, updates for new style
+       MPN_FIB2_SIZE.
+
+       * tune/tuneup.c, tune/Makefile.am, gmp-impl.h, mpn/*/gmp-mparam.h:
+       Remove FIB_THRESHOLD, no longer required.
+
+       * mpz/fib2_ui.c, mpz/lucnum_ui.c mpz/lucnum2_ui.c: New files.
+       * Makefile.am, mpz/Makefile.am: Add them.
+       * gmp-h.in (mpz_fib2_ui, mpz_lucnum_ui, mpz_lucnum2_ui): Add
+       prototypes.
+
+       * mpn/generic/fib2_ui.c: New file.
+       * configure.in (gmp_mpn_functions): Add it.
+       * gmp-impl.h (mpn_fib2_ui, FIB_TABLE, etc): Add these.
+       * mpz/fib_ui.c: Rewrite.
+
+       * acinclude.m4 (GMP_C_SIZES): Fix _LONG_LONG_LIMB define for mp_limb_t
+       size test.
+       (GMP_FUNC_ALLOCA): Add dummy __GMP_BITS_PER_MP_LIMB for gmp-h.in work.
+
+       * configure.in (CPPFLAGS): Remove -D__GMP_WITHIN_GMP, don't want it
+       everywhere.
+       * Makefile.am, mpn/Makefile.am, mpz/Makefile.am, mpq/Makefile.am,
+       mpf/Makefile.am, mpbsd/Makefile.am (INCLUDES): Set -D__GMP_WITHIN_GMP.
+
+       * configure.in (*-*-msdosdjgpp*): Forcibly disable shared libraries,
+       to make libtests.la work.
+
+       * acconfig.h (_LONG_LONG_LIMB, HAVE_MPFR): Remove dummy defines, no
+       longer needed.
+
+       * mpz/set_ui.c: Store to _mp_d[0] unconditionally.
+
+2001-05-27  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, gmp-h.in, mp-h.in: Add support for windows DLLs.
+
+2001-05-26  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (ABI and ISA, Reentrancy): Minor tweaks
+       (Notes for Package Builds): Note gmp.h is a generated file.
+       (Notes for Particular Systems): -march=pentiumpro is used for gcc
+       2.95.4 and up.
+       (Assembler Loop Unrolling): Mention non power-of-2 unrolling.
+       (Internals): New chapter.
+       * mpf/README: Remove file.
+
+       * demos/expr/README: Miscellaneous rewordings.
+
+       * demos/perl: New directory.
+       * demos/Makefile.am: Add it.
+       * demos/perl/INSTALL, Makefile.PL, GMP.pm, GMP.xs, typemap,
+       GMP/Mpz.pm, GMP/Mpq.pm, GMP/mpf.pm, GMP/Rand.pm, sample.pl, test.pl:
+       New files.
+
+       * configure, aclocal.m4: Update to autoconf 2.50.
+
+       * configure, aclocal.m4, ltmain.sh: Update to libtool 1.4.
+
+       * configure, aclocal.m4, missing, ansi2knr.c, */Makefile.in: Update to
+       automake 1.4f.
+       * Makefile.am: Conditionalize mpfr in $(SUBDIRS) to handle mpfr.info.
+       * mpfr/Makefile.am (INFO_DEPS): Remove previous mpfr.info handling.
+       * mpn/Makefile.am (GENERIC_SOURCES): Remove this, just put mp_bases.c
+       in libmpn_la_SOURCES.
+       * tests/Makefile.am (tests.h): Move from EXTRA_HEADERS to
+       libtests_la_SOURCES.
+       * ltconfig: Remove file, no longer needed.
+
+       * Makefile.am (gmp-impl.h, longlong.h, stack-alloc.h): Move from
+       EXTRA_DIST to libgmp_la_SOURCES, so they get included in TAGS.
+       * tests/rand/Makefile.am (gmpstat.h): Move to libstat_la_SOURCES
+       similarly.
+
+       * config.guess (68k-*-*): Use $SHELL not "sh", tweak some comments.
+
+       * mpfr/mpfr.texi (Introduction to MPFR): Tweak table formatting, note
+       non-free programs must be able to be re-linked.
+
+2001-05-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc64/addmul_1.asm, mpn/powerpc64/mul_1.asm,
+       mpn/powerpc64/submul_1.asm: Add carry-in entrypoints.
+
+2001-05-17  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (ge): Fix definition for info.
+       (Notes for Particular Systems): Mention 68k dragonball and cpu32.
+       (Efficiency): Add static linking, more about in-place operations,
+       describe mpq+/-integer using addmul.
+       (Reporting Bugs): A couple of words about self-contained reports.
+       (Floating-point Functions): Note exponent limitations of mpf_get_str
+       and mpf_set_str.
+       (Initializing Floats): Clarify mpf_get_prec, mpf_set_prec and
+       mpf_set_prec_raw a bit.
+       (Float Comparison): Note current mpf_eq deficiencies.
+
+       * gmp-h.in (__GMP_HAVE_CONST, __GMP_HAVE_PROTOTYPES,
+       __GMP_HAVE_TOKEN_PASTE): Merge GNU ansidecl.h tests for ANSI compilers.
+       * demos/expr/expr-impl-h.in: Ditto.
+
+       * gmp-impl.h (BITS_PER_MP_LIMB): Define from __GMP_BITS_PER_MP_LIMB if
+       not already in gmp-mparam.h.
+       * tests/t-constants.c (BITS_PER_MP_LIMB, __GMP_BITS_PER_MP_LIMB):
+       Check these are the same.
+
+       * gmp-h.in (mpf_get_default_prec, mpf_get_prec, mpf_set_default_prec,
+       mpf_set_prec_raw): Provide "extern inline" versions, use __GMPF on the
+       macros.
+       * mpf/get_dfl_prc.c, mpf/get_prc.c, mpf/set_dfl_prc.c,
+       mpf/set_prc_raw.c: Get code from gmp.h using __GMP_FORCE.
+
+       * gmp-h.in, gmp-impl.h (__gmp_default_fp_limb_precision): Move from
+       gmp-impl.h to gmp-h.in.
+       (__GMPF_BITS_TO_PREC, __GMPF_PREC_TO_BITS): Ditto, and use __GMPF
+       prefix and add a couple of casts.
+       * gmp-h.in (__GMP_MAX): New macro.
+       * mpf/init2.c mpf/set_prc.c: Update for __GMPF prefix.
+
+       * gmp-h.in (__GMP_BITS_PER_MP_LIMB): New templated define.
+       * acinclude.m4 (GMP_C_SIZES): Add AC_SUBST __GMP_BITS_PER_MP_LIMB,
+       remove AC_DEFINE BITS_PER_MP_LIMB.
+
+2001-05-13  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, gmp.texi, Makefile.am, mpz/Makefile.am, tests/mpz/t-pow.c:
+       Remove mpz_si_pow_ui, pending full si support.
+       * mpz/si_pow_ui.c: Remove file.
+
+2001-05-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/dive_1.asm: New file.
+
+       * mpn/powerpc32/umul.asm: Use r on registers.
+       * mpn/powerpc64/umul.asm: New file.
+       * configure.in (powerpc*-*-*): Enable umul in extra_functions.
+
+       * tests/refmpn.c, tests/tests.h (refmpn_umul_ppmm): Use same arguments
+       as normal mpn_umul_ppmm.
+       (refmpn_mul_1c): Update.
+       * tests/devel/try.c, tune/many.pl: Add some umul_ppmm testing support.
+
+       * mpn/x86/k6/mmx/popham.asm, mpn/x86/k7/mmx/popham.asm: Don't support
+       size==0.
+       * mpn/x86/pentium/popcount.asm, mpn/x86/pentium/hamdist.asm: Ditto,
+       and shave a couple of cycles from the PIC entry code.
+
+       * mpz/mul.c: Use mpn_mul_1 for size==1 and mpn_mul_2 (if available)
+       for size==2, to avoid copying; do vsize==0 test earlier.
+
+       * mpf/sub.c: Test r!=u before calling mpf_set.
+       * mpf/add.c: Ditto, and share mpf_set between usize==0 and vsize==0.
+
+       * mpn/generic/tdiv_qr.c, mpq/get_d.c, mpf/div.c, mpf/set_q.c,
+       mpf/set_str.c, mpf/ui_div.c: Test for high bit set, not for
+       count_leading_zeros zero.
+
+       * acinclude.m4 (GMP_PROG_AR, GMP_PROG_NM): Print a message if extra
+       flags are added.
+
+       * tests/mpz/t-mul_i.c: New file.
+       * tests/mpz/Makefile.am: Add it.
+
+       * mpz/mul_siui.c (mpz_mul_si): Fix for -0x80..00 on long long limb.
+
+       * gmp-h.in (mpf_set_si, mpf_set_ui): Revert last change, set exp to 0
+       when n==0.
+       * mpf/ceilfloor.c, mpf/trunc.c: Fix exp to 0 when setting r to 0.
+       * gmp-impl.h (MPF_CHECK_FORMAT): Check exp==0 when size==0.
+
+2001-05-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in (mpf_set_si, mpf_set_ui): Don't bother setting _mp_exp to 0
+       when n==0 (use 1 unconditionally).
+       * tests/mpf/t-misc.c (check_mpf_set_si): Don't demand anything of
+       _mp_exp when _mp_size is zero.
+
+       * mpn/x86/README: Note gas _GLOBAL_OFFSET_TABLE_ with leal problem.
+
+       * gmp-h.in (mpz_fits_uint_p, mpz_fits_ulong_p, mpz_fits_ushort_p):
+       Provide these as "extern inline"s.
+       (__GMP_UINT_MAX, __GMP_ULONG_MAX, __GMP_USHRT_MAX): New macros.
+       (mpz_popcount): Use __GMP_ULONG_MAX.
+       * gmp-impl.h (UINT_MAX, ULONG_MAX, USHRT_MAX): Use __GMP_U*_MAX, if
+       not already defined.
+       * mpz/fits_u.c: Use the code from gmp.h.
+
+2001-05-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k7/dive_1.asm: New file.
+       * mpn/x86/k7/gcd_1.asm: New file.
+       * mpn/asm-defs.m4 (m4_count_trailing_zeros): New macro.
+
+       * gmp-h.in (mpz_get_ui, mpz_getlimbn, mpz_set_q, mpz_perfect_square_p,
+       mpz_popcount, mpz_size, mpf_set_ui, mpf_set_si, mpf_size): Provide
+       these as "extern inlines".
+       Use just one big extern "C" block.
+       * mpz/getlimbn.c, mpz/get_ui.c, mpz/perfsqr.c, mpz/popcount.c
+       mpz/set_q.c, mpz/size.c, mpf/set_si.c, mpf/set_ui.c, mpf/size.c: Use
+       __GMP_FORCE to get code from gmp.h.
+
+2001-05-03  Kevin Ryde  <kevin@swox.se>
+
+       * extract-dbl.c: Add ASSERT d>=0.
+
+       * gmp.texi (Efficiency): Add mpz_addmul etc for mpz+=integer, add
+       mpz_neg etc in-place.
+       (Integer Arithmetic): Add mpz_addmul, mpz_submul, mpz_submul_ui.
+       (Initializing Rationals): Add mpq_set_str.
+       (Low-level Functions): mpn_set_str requires strsize >= 1.
+
+       * gmp-h.in (__GMP_EXTERN_INLINE, __GMP_ABS): New macros.
+       (mpz_abs, mpq_abs, mpf_abs, mpz_neg, mpq_neg, mpf_neg): Provide inline
+       versions.
+       * mpz/abs.c, mpq/abs.c, mpf/abs.c, mpz/neg.c, mpq/neg.c, mpf/neg.c:
+       Add suitable __GMP_FORCE to turn off inline versions.
+
+       * tests/mpz/t-aorsmul.c,t-cmp_d.c,t-popcount,t-set_str.c: New files.
+       * tests/mpz/Makefile.am: Add them.
+
+       * mpz/aorsmul_i.c: New file, rewrite of addmul_ui.c.  Add
+       mpz_submul_ui entrypoint, share more code between some of the
+       conditionals, use mpn_mul_1c if available.
+       * mpz/addmul_ui.c: Remove file.
+       * mpz/aorsmul.c: New file.
+       * Makefile.am, mpz/Makefile.am: Update.
+       * gmp-h.in (mpz_addmul, mpz_submul, mpz_submul_ui): Add prototypes.
+       * gmp-impl.h (mpz_aorsmul_1): Add prototype.
+
+       * tests/mpq/t-set_str.c: New file.
+       * tests/mpq/Makefile.am: Add it.
+
+       * mpq/set_str.c: New file.
+       * Makefile.am, mpq/Makefile.am: Add it.
+       * gmp-h.in (mpq_set_str): Add prototype.
+
+       * mpz/set_str.c: Fix for trailing white space on zero, eg. "0 ".
+       * mpn/generic/set_str.c: Add ASSERT str_len >= 1.
+
+       * gmp-h.in, gmp-impl.h (mpn_incr_u, mpn_decr_u): Move to gmp-impl.h.
+       * gmp-impl.h (MPN_INCR_U, MPN_DECR_U): New macros.
+
+2001-04-30  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/t-lcm.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/lcm.c: Add one limb special case.
+
+       * mpz/lcm_ui.c: New file.
+       * Makefile.am, mpz/Makefile.am: Add it.
+       * gmp-h.in (mpz_lcm_ui): Add prototype.
+       * gmp.texi (Number Theoretic Functions): Add mpz_lcm_ui, document lcm
+       now always positive.
+
+       * mp-h.in (mp_size_t, mp_exp_t): Fix typedefs to match gmp-h.in.
+
+       * gmp-h.in (mpn_add_1, mpn_add, mpn_sub_1, mpn_sub): Remove K&R
+       function defines (ansi2knr will handle mpn/inline.c, and just ansi is
+       enough for gcc extern inline).
+
+       * gmp-h.in (__GMP_HAVE_TOKEN_PASTE): New macro.
+       (__MPN): Use it.
+       * gmp-impl.h (CNST_LIMB): Ditto.
+
+       * gmp-h.in, mp-h.in (__gmp_const, __gmp_signed, _PROTO, __MPN): Use
+       ANSI forms on Microsoft C.
+       (__GMP_HAVE_CONST): New define.
+       * gmp-impl.h (const, signed): Use it.
+
+       * demos/expr/expr-impl-h.in (<stdarg.h>): Use this with Microsoft C.
+       (HAVE_STDARG): New define.
+       * demos/expr/expr.c,exprz.c,exprq.c,exprf.c,exprfr.c: Use it.
+
+       * acinclude.m4 (GMP_C_STDARG): New macro.
+       * configure.in: Call it.
+       * rand.c: Use it.
+
+       * configure.in (AC_PROG_CC_STDC): New test.
+
+2001-04-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/mmx/dive_1.asm: New file.
+       * mpn/x86/x86-defs.m4 (Zdisp): Two more insns.
+
+       * mpn/x86/pentium/mul_2.asm: New file.
+       * mpn/asm-defs.m4: Add define_mpn(mul_2).
+       * acconfig.h (HAVE_NATIVE_mpn_divexact_1, mul_2): Add templates.
+
+       * configure.in (ABI): Use AC_ARG_VAR.
+
+       * tests/devel/try.c: Run reference function when validate fails.
+
+       * mpq/get_str.c: Fixes for negative bases.
+       * tests/mpq/t-get_str.c: Check negative bases.
+       * tests/misc.c,tests.h (__gmp_allocate_strdup, strtoupper): New
+       functions.
+
+2001-04-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/lcm.c (mpz_lcm): Make result always positive.
+
+       * gmp-h.in (mpz_inp_binary, mpz_out_binary): Remove declarations.
+
+2001-04-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc64/addsub_n.asm: Use config.m4 not asm-syntax.m4.
+
+       * mpz/cmp_d.c, mpz/cmpabs_d.c: New files.
+       * Makefile.am, mpz/Makefile.am: Add them.
+       * mpf/cmp_d.c, mpf/get_dfl_prec.c: New files.
+       * Makefile.am, mpf/Makefile.am: Add them.
+       * gmp-h.in (mpz_cmp_d, mpz_cmpabs_d, mpf_cmp_d, mpf_get_default_prec):
+       Add prototypes.
+       * gmp.texi: Add documentation.
+
+       * mpf/set_prc.c: Avoid a realloc call if already the right precision.
+
+       * gmp-impl.h (MPF_BITS_TO_PREC, MPF_PREC_TO_BITS): New macros.
+       * mpf/get_prc.c, init2.c, set_dfl_prec.c, set_prc.c, set_prc_raw.c:
+       Use them.
+
+2001-04-20  Kevin Ryde  <kevin@swox.se>
+
+       * tests/devel/try.c: Don't test size==0 on mpn_popcount and
+       mpn_hamdist; add testing for mpn_divexact_1; print some limb values
+       with mpn_trace not printf.
+
+       * mpz/popcount.c, mpz/hamdist.c: Don't pass size==0 to mpn_popcount
+       and mpn_hamdist.
+       * mpn/generic/popham.c: Don't support size==0.
+
+       * config.guess (m68k-*-*): Detect m68010, return m68360 for cpu32,
+       cleanup the nesting a bit.
+
+       * gmp.texi (Integer Division): Fix mpz_congruent_2exp_p "c" type.
+       (Integer Division): Add mpz_divexact_ui.
+       (Number Theoretic Functions): Fix mpz_nextprime return type.
+       (Exact Remainder): Divisibility tests now implemented.
+       And more index entries in a few places.
+
+       * tests/mpz/dive_ui.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/dive_ui.c: New file.
+       * Makefile.am, mpz/Makefile.am: Add it.
+       * gmp-h.in (mpz_divexact_ui): Add prototype.
+
+       * tune/many.pl, tune/speed.h: Add special mpn_back_to_back for
+       development.
+
+       * gmp-impl.h (MPN_DIVREM_OR_DIVEXACT_1): New macro.
+       * mpz/divexact.c: Use it.
+
+       * gmp-impl.h (DIVEXACT_1_THRESHOLD): New threshold.
+       * tune/tuneup.c: Tune it.
+
+       * tune/speed.c,speed.h,common.c,many.pl: Add measuring of
+       mpn_divexact_1, mpn_copyi, mpn_copyd.
+
+       * mpn/generic/dive_1.c: New file.
+       * configure.in (gmp_mpn_functions): Add it.
+       * gmp-impl.h (mpn_divexact_1): Add prototype.
+       * mpn/asm-defs.m4: Add define_mpn(divexact_1).
+
+       * tests/mpn: New directory.
+       * tests/Makefile.am: Add it.
+       * tests/mpn/Makefile.am: New file.
+       * configure.in (AC_OUTPUT): Add it.
+       * tests/mpn/t-asmtype.c: New file.
+
+       * configure, config.in: Update to autoconf 2.49d.
+
+       * configure.in, gmp-h.in, mp-h.in, demos/expr/expr-impl-h.in: Revert
+       to generating gmp.h, mp.h and expr-impl.h with AC_OUTPUT and AC_SUBST.
+
+       * configure.in (m68*-*-*): Oops, m683?2 is 68000, m68360 is cpu32.
+       * mpn/m68k/m68k-defs.m4 (scale_available_p): Ditto.
+
+       * configure.in (underscore, asm_align): Remove these variables, unused.
+       (GMP_ASM_*): Sort by AC_REQUIREs, to avoid duplication.
+       * acinclude.m4 (GMP_ASM_UNDERSCORE, GMP_ASM_ALIGN_LOG): Remove support
+       for actions, no longer needed.
+
+2001-04-17  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (m68k-*-*): Look for cpu in linux kernel /proc/cpuinfo.
+
+       * acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO): The -mpentiumpro problem is
+       fixed in 2.95.4, so test for that.
+       (GMP_ASM_TYPE): Amend some comments.
+
+       * tune/freq.c (speed_cpu_frequency_sysctl): Avoid having unused
+       variables on GNU/Linux.
+
+       * mpn/asm-defs.m4 (m4_instruction_wrapper): Fix a quoting problem if
+       the name of the file is a macro.
+
+2001-04-15  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/powerpc64/*.asm: Add speeds on ppc630.
+
+       * acconfig.h: Add dummy templates for _LONG_LONG_LIMB and HAVE_MPFR.
+       * configure.in: Ensure config.in is the last AM_CONFIG_HEADER,
+       which autoheader requires.
+
+       * mpn/x86/pentium/popcount.asm: New file.
+       * mpn/x86/pentium/hamdist.asm: New file.
+
+       * mpn/asm-defs.m4: (m4_popcount): New macro.
+       Amend a few comments elsewhere.
+
+       * acinclude.m4 (GMP_ASM_RODATA): If possible, grep compiler output for
+       the right directive.
+
+       * tune/speed.c: Print clock speed in MHz, not cycle time.
+
+       * configure.in (AC_CHECK_HEADERS): Check for sys/processor.h.
+       * tune/freq.c (speed_cpu_frequency_processor_info): Require
+       <sys/processor.h> to exist, to differentiate the different
+       processor_info on Darwin.
+       (speed_cpu_frequency_sysctlbyname): Remove hw.model test which is in
+       speed_cpu_frequency_sysctl.
+       (speed_cpu_frequency_sysctl): Add hw.cpufrequency for Darwin.
+
+       * gmp-impl.h (MPN_LOGOPS_N_INLINE, mpn_and_n ... mpn_xnor_n): Use a
+       single expression argument for the different operations, necessary for
+       the Darwin "smart" preprocessor.
+
+       * mpn/m68k/t-m68k-defs.pl: Allow white space in m4_definsn and
+       m4_defbranch.
+
+       * tune/many.pl: Change RM_TMP_S to RM_TMP to match mpn/Makeasm.am,
+       avoid a possibly undefined array in a diagnostic, add more renaming to
+       hamdist.
+
+2001-04-13  Kevin Ryde  <kevin@swox.se>
+
+       * ltmain.sh, aclocal.m4, configure, config.in: Update to libtool 1.3d.
+       * configure.in: Change ac_ to lt_ in lt_cv_archive_cmds_need_lc and
+       lt_cv_proc_cc_pic.
+
+       * config.guess (m68*-*-*): Detect exact cpu with BSD sysctl hw.model,
+       detect 68000/68010 with trapf, detect 68302 with bfffo.
+
+2001-04-11  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_ASM_M68K_INSTRUCTION, GMP_ASM_M68K_ADDRESSING,
+       GMP_ASM_M68K_BRANCHES): New macros.
+       * configure.in: Use them, remove old 68k configs, use mc68020 udiv and
+       umul.
+
+       * mpn/m68k/m68k-defs.m4: New file.
+       * mpn/m68k/t-m68k-defs.pl: New file.
+       * mpn/m68k/*.asm: New files, converted from .S.  Merge add_n and sub_n
+       to aors_n, ditto mc68020 addmul_1 and submul_1 to aorsmul_1.  No
+       object code changes (except .type and .size now used on NetBSD 1.4).
+       * mpn/m68k/README: New file.
+       * mpn/m68k/*.S, */*.S, syntax.h: Remove files.
+
+       * configure.in (m68*-*-netbsd1.4*): Pretend getrusage doesn't exist.
+       * tune/README: Update.
+
+       * configure.in (powerpc*-*-*): For the benefit of Darwin 1.3, add cc
+       to cclist, make gcc_cflags -Wa,-mppc optional.
+
+2001-04-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/lisp/gmpasm-mode.el (gmpasm-comment-start-regexp): Add | for 68k.
+       (gmpasm-mode-syntax-table): Add to comments.
+
+       * tests/mpz/reuse.c (dsi_div_func_names): Add names for cdiv_[qr]_2exp.
+
+2001-04-04  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_M4_M4WRAP_SPURIOUS): Fix test so as to actually
+       detect the problem, add notes on m68k netbsd 1.4.1.
+
+       * gmp.texi (Compatibility with older versions): Note libmp
+       compatibility.
+
+2001-04-03  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpz/reuse.c: Add mpz_cdiv_q_2exp and mpz_cdiv_r_2exp.
+
+       * tests/mpz/t-pow.c: Drag in refmpn.o when testing mpz_pow_ui etc with
+       refmpn_mul_2.
+
+       * tune/speed.c,speed.h,common.c,many.pl: Add measuring of mpn_com_n
+       and mpn_mul_2.
+       * tests/devel/try.c: Add testing of mpn_mul_2, and a
+       DATA_MULTIPLE_DIVISOR attribute.
+
+       * gmp.texi (Build Options): List more m68k's.
+       (Build Options): Add cross reference to tex2html.
+       (Notes for Particular Systems): Add m68k means 68020 or up.
+       (Rational Conversions): New section, with mpq_get_d, mpq_set_d and
+       mpq_set_f from Miscellaneous, and new mpq_set_str.
+       (Applying Integer Functions): Move mpq_get_num, mpq_get_den,
+       mpq_set_num and mpq_set_den from Misc.
+       (Miscellaneous Rational Functions): Remove section.
+       (Custom Allocation): Partial rewrite for various clarifications.
+       (References): Improve line breaks near URLs.
+
+       * acinclude.m4 (GMP_GCC_M68K_OPTIMIZE): New macro.
+       * configure.in (m68*-*-*): Use it to run gcc 2.95.x at -O not -O2.
+       (m680?0-*-*, m683?2-*-*, m68360-*-*): Add optional gcc -m options.
+
+       * tests/mpz/t-cmp.c: New file.
+       * tests/mpz/t-sizeinbase.c: New file.
+       * tests/mpz/Makefile.am: Add them.
+
+       * gmp-impl.h (MPN_CMP): New macro.
+       * mpz/cmp.c,cmpabs.c: Use it, and minor cleanups too.
+
+       * tests/mpq/t-equal.c: New file.
+       * tests/mpq/t-get_str.c: New file.
+       * tests/mpq/Makefile.am: Add them.
+
+       * mpq/get_str.c: New file.
+       * Makefile.am, mpq/Makefile.am: Add it.
+       * gmp-h.in (mpq_get_str): Add prototype.
+
+       * mpq/equal.c: Rewrite using inline compare loops.
+
+       * tests/refmpn.c,tests.h (refmpn_mul_2): Fix parameter order.
+       * mpz/n_pow_ui.c: Fix mpn_mul_2 calls parameter order.
+
+2001-03-29  Kevin Ryde  <kevin@swox.se>
+
+       * tests/mpf/t-trunc.c: New file.
+       * tests/mpf/Makefile.am (check_PROGRAMS): Add it.
+       * gmp-impl.h (MPF_CHECK_FORMAT): New macro.
+
+       * mpf/trunc.c: New file, rewrite of integer.c, preserve prec+1 in
+       copy, don't copy if unnecessary.
+       * mpf/ceilfloor.c: New file likewise, and use common subroutine for
+       ceil and floor.
+       * mpf/integer.c: Remove file.
+       * Makefile.am, mpf/Makefile.am, macos/Makefile.in: Update.
+
+       * acinclude.m4 (GMP_GCC_VERSION_GE): New macro.
+       (GMP_GCC_MARCH_PENTIUMPRO): Use it, remove CCBASE parameter (don't
+       bother checking it's gcc).
+       (GMP_GCC_ARM_UMODSI): New macro.
+       * configure.in (GMP_GCC_MARCH_PENTIUMPRO): Update parameters.
+       (arm*-*-*): Use GMP_GCC_ARM_UMODSI.
+       * gmp.texi (Notes for Particular Systems): Add arm gcc requirements.
+
+2001-03-28  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Converting Integers): Document mpz_getlimbn using absolute
+       value and giving zero for N out of range, move to end of section.
+
+       * tests/refmpn.c (refmpn_tdiv_qr): Use refmpn_divmod_1 rather than
+       refmpn_divrem_1.
+       * tests/tests.h: Add some prototypes that were missing.
+
+       * mpz/tdiv_q_ui.c: Remove a comment that belonged to mpz_tdiv_r_ui.
+
+2001-03-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c: Handle carry overflow after m*n multiply code
+       in both arms.  Partially combine multiply arms.
+
+2001-03-24  Kevin Ryde  <kevin@swox.se>
+
+       * longlong.h: Add comments to P5 count_leading_zeros.
+
+       * demos/expr/exprz.c,t-expr.c,README: Add congruent_p and divisible_p.
+
+2001-03-23  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (GMPceil, GMPfloor, ge, le): New macros.
+       (Integer Division, mpn_cmp, mpn_sqrtrem, Algorithms): Use them.
+       (mpn_bdivmod): Refer to mp_bits_per_limb, not BITS_PER_MP_LIMB, and
+       improve formatting a bit.
+       (mpn_lshift, mpn_rshift): Clarify the return values, and use {rp,n}
+       for the destination.
+       Miscellaneous minor rewordings in a few places.
+
+       * mpn/arm/arm-defs.m4: New file.
+       * configure.in (arm*-*-*): Use it.
+       * mpn/arm/*.asm: Use changecom and registers from arm-defs.m4, use L()
+       for local labels.
+
+       * mpn/x86/k6/mmx/com_n.asm: Relax code alignment (same speed).
+
+       * gmp-h.in (__GMP_ATTRIBUTE_PURE): Use __pure__ to avoid application
+       namespace.
+
+       * gmp-impl.h (ABS): Add parens around argument.
+
+2001-03-20  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_PROG_M4): Use AC_ARG_VAR on $M4.
+
+       * acinclude.m4 (GMP_M4_M4WRAP_SPURIOUS): New macro.
+       * configure.in: Use it.
+       * mpn/asm-defs.m4: Ditto.
+
+2001-03-18  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/logops_n.asm: New file.
+
+       * mpn/x86/k6/k62mmx/copyd.asm: Rewrite, smaller and simpler, faster on
+       small sizes, slower on big sizes (about half the time).
+       * mpn/x86/k6/k62mmx/copyi.asm: Remove file, in favour of generic x86.
+       * mpn/x86/copyi.asm: Add some comments.
+       * mpn/x86/k6/README: Update.
+
+       * mpn/x86/k6/gcd_1.asm: New file.
+
+       * gmp-impl.h (NEG_MOD): Fix type of __dnorm.
+
+       * acinclude.m4 (GMP_C_SIZES): Fix use of __GMP_WITHIN_CONFIGURE.
+
+2001-03-15  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (GMPabs): New macro.
+       (Float Comparison - mpf_reldiff): Use it.
+       (Integer Comparisons - mpz_cmpabs): Ditto, puts "abs" in info.
+       (Reentrancy): Update notes on old random functions.
+       (Karatsuba Multiplication): Better characterize the effect of basecase
+       speedups on the thresholds, pointed out by Torbjorn.
+
+       * tune/README: Notes on the 1x1 div threshold for mpn_gcd_1.
+
+       * tests/misc.c (mpz_pow2abs_p, mpz_flipbit, mpz_errandomb,
+       mpz_errandomb_nonzero, mpz_negrandom): New functions.
+       (mpz_erandomb, mpz_erandomb_nonzero): Use urandom().
+       * tests/spinner.c (spinner_wanted, spinner_tick): Make global.
+       * tests/tests.h: Update prototypes.
+
+       * tests/mpz/t-cong.c, tests/mpz/t-cong_2exp.c: New files.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add them.
+
+       * mpz/cong.c, mpz/cong_2exp.c, mpz/cong_ui.c: New files.
+       * Makefile.am, mpz/Makefile.am: Add them.
+       * gmp-impl.h (NEG_MOD): New macro.
+       * gmp-h.in (mpz_congruent_p, mpz_congruent_2exp_p,
+       mpz_congruent_ui_p): Add prototypes.
+       * gmp.texi (Integer Division, Efficiency): Add documentation.
+
+       * mpq/aors.c: No need for ABS on denominator sizes.
+
+       * gmp-impl.h (mpn_divisible_p): Use __MPN.
+
+       * gmp-impl.h (LOW_ZEROS_MASK): New macro.
+       * mpz/divis_ui.c, mpn/generic/divis.c: Use it.
+
+       * mpz/setbit.c: Fix normalization for case of a negative ending up
+       with a zero high limb.
+       * tests/mpz/bit.c (check_single): New test for this problem.
+
+       * configure.in (none-*-*): Fix cclist for default ABI=long.
+
+2001-03-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/cfdiv_q_2exp.c: Don't scan for non-zero limbs if they don't
+       matter to the rounding.
+
+       * mpz/get_ui.c: Fetch _mp_d[0] unconditionally, so the code can come
+       out branch-free.
+
+2001-03-08  Kevin Ryde  <kevin@swox.se>
+
+       * tests/devel/try.c (param_init): Fix reference functions for and_n
+       and nand_n.
+
+       * tune/speed.c, tests/devel/try.c: Seed RANDS, not srandom etc.
+       * configure.in (AC_CHECK_FUNCS): Remove srand48 and srandom.
+       * macos/configure (coptions): Remove random/srandom, now unnecessary.
+
+       * configure.in (gmp.h, mp.h, demos/expr/expr-impl.h): Generate using
+       AM_CONFIG_HEADER.
+       (_LONG_LONG_LIMB, HAVE_MPFR): Change to AC_DEFINEs.
+       * gmp-h.in, mp-h.in, demos/expr/expr-impl-h.in: Change to #undef's.
+       * acinclude.m4 (GMP_FUNC_ALLOCA, GMP_C_SIZES): Use gmp-h.in, not gmp.h.
+       * Makefile.am (EXTRA_DIST): Remove gmp-h.in and mp-h.in, now done
+       automatically.
+       * acinclude.m4 (GMP_FUNC_ALLOCA), gmp-impl.h: Set and use
+       __GMP_WITHIN_CONFIGURE rather than GMP_FUNC_ALLOCA_TEST.
+
+       * mpf/random2.c: Use _gmp_rand and RANDS instead of random() for the
+       exponent, ensures full range of values too.
+
+       * tests/mpz/t-div_2exp.c (check_various): Start with d based on i, but
+       don't let it go negative.
+
+       * tune/tuneup.c (KARATSUBA_MUL_THRESHOLD): Limit probing to
+       TOOM3_MUL_THRESHOLD_LIMIT, the size of the workspace in mul_n.c.
+       Use a -1 with this too, so size<LIMIT not <=.
+
+2001-03-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/cfp/mul_1.c: Don't call mpn_add_n with size 0.
+       * mpn/cray/cfp/addmul_1.c: Likewise.
+       * mpn/cray/cfp/submul_1.c: Don't call mpn_sub_n with size 0.
+
+       * tests/mpz/t-div_2exp.c (check_various): Start 2nd d loop from 0
+       (avoid problems with Cray compilers).
+
+2001-03-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/submul_1.c: Don't call mpn_sub_n with size 0.
+
+       * mpn/cray/ieee/mul_basecase.c: New file.
+       * mpn/cray/ieee/sqr_basecase.c: New file, derived from mul_basecase.c.
+
+2001-03-06  Kevin Ryde  <kevin@swox.se>
+
+       * tests/devel/try.c (pointer_setup): Allow dst_size == SIZE_SIZE2 for
+       the benefit of mpn_tdiv_qr.
+
+       * tune/tuneup.c (all): Start karatsuba probing at size==4, for the
+       benefit of cray t90 ieee which has speed oddities at size==2.
+
+       * gmp-impl.h (USE_LEADING_REGPARM): Use __GMP_GNUC_PREREQ.
+       Use __GMP_ATTRIBUTE_PURE and ATTRIBUTE_CONST in a few places.
+
+       * gmp-h.in (__GMP_GNUC_PREREQ) New macro.
+       (__GMP_ATTRIBUTE_PURE): New macro, use it in many places.
+
+       * gmp-impl.h, gmp-h.in (mpn_jacobi_base): Move prototype to
+       gmp-impl.h, use ATTRIBUTE_CONST.
+
+       * tune/speed.h (speed_cyclecounter): Inline asm version for i386.
+
+       * mpz/cfdiv_r_2exp.c (cfdiv_r_2exp): Only reread "up" after second
+       realloc, first is under w!=u.
+
+2001-03-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/sub_n.c: Rewrite using `majority' logic.
+
+       * mpz/cfdiv_r_2exp.c (cfdiv_r_2exp): Reread `up' after realloc of w.
+
+       * mpn/cray/ieee/mul_1.c: Rewrite.  Streamline multiplications;
+       use `majority' logic.
+       * mpn/cray/ieee/addmul_1.c: Likewise.
+
+       * mpn/cray/add_n.c: Rewrite using `majority' logic.
+
+2001-03-04  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (CRAY udiv_qrnnd): No longer conditional on CRAYMPP.
+       (64-bit hppa add_ssaaaa): New.
+       (64-bit hppa sub_ddmmss): New.
+
+       * mpn/cray/ieee/invert_limb.c: New file.
+
+       * gmp-impl.h (RANDS): Add a `,0' to make it compile on more compilers.
+
+2001-03-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/n_pow_ui.c (ULONG_PARITY): Move to gmp-impl.h.
+       * gmp-impl.h (ULONG_PARITY): i386 part from n_pow_ui.c, new generic
+       form by Torbjorn.
+
+       * tests/mpz/t-div_2exp.c: New file, rewrite of t-2exp.c.
+       * tests/mpz/t-2exp.c: Remove file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Update.
+
+       * gmp-h.in (mpz_cdiv_q_2exp, mpz_cdiv_q_2exp): Add prototypes.
+       * gmp.texi (Integer Division): Add mpz_cdiv_q_2exp and mpz_cdiv_q_2exp.
+
+       * mpz/cfdiv_q_2exp.c: New file, partial rewrite of fdiv_q_2exp.c, add
+       mpz_cdiv_q_2exp entrypoint.
+       * mpz/cfdiv_r_2exp.c: New file, rewrite of fdiv_r_2exp.c, use all mpn,
+       add mpz_cdiv_r_2exp entrypoint.
+       * mpz/fdiv_q_2exp.c, mpz/fdiv_r_2exp.c: Remove files.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Update.
+       * Makefile.am (MPZ_OBJECTS): Ditto.
+
+       * gmp-impl.h (USE_LEADING_REGPARM): Use __i386__ same as longlong.h
+       (REGPARM_2_1, REGPARM_3_1, REGPARM_ATTR): New macros.
+       * mpz/jacobi.c (jac_or_kron): Use them.
+
+       * configure.in (HAVE_ABI_$ABI): Re-enable this for config.m4, with
+       dots changed to underscores (necessary for hppa).
+
+       * tests/mpz/t-divis.c, tests/mpz/t-divis_2exp.c: New files.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add them.
+
+       * gmp-h.in (mpz_divisible_p, mpz_divisible_ui_p,
+       mpz_divisible_2exp_p): Add prototypes.
+       * gmp.texi (Integer Division): Add mpz_divisible_p.
+       (Efficiency): Add remarks about divisibility testing.
+
+       * mpz/divis.c, mpz/divis_ui.c, mpz/divis_2exp.c: New files.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add them.
+       * Makefile.am (MPZ_OBJECTS): Ditto.
+
+       * mpn/generic/divis.c: New file.
+       * configure.in (gmp_mpn_functions): Add it.
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Ditto.
+       * gmp-impl.h (mpn_divisible_p): Add prototype.
+
+       * urandom.h: Remove file.
+       * Makefile.am (EXTRA_DIST): Remove it.
+
+       * tests/mpz/convert.c, dive.c, io.c, logic.c, reuse.c, t-2exp.c,
+       t-fdiv.c, t-fdiv_ui.c, t-gcd.c, t-jac.c, t-mul.c, t-pow.c,
+       t-powm.c, t-powm_ui.c, t-root.c, t-sqrtrem.c, t-tdiv.c,
+       t-tdiv_ui.c: Use RANDS, initialized by tests_rand_start.
+
+       * tests/mpz/t-pow.c: New file, being t-pow_ui renamed and with some
+       further tests added.
+       * tests/mpz/t-pow_ui.c: Remove file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Update.
+
+       * tests/t-modlinv.c: Don't use urandom.h.
+       * tests/mpz/bit.c, tests/mpz/t-scan.c: Ditto.
+       * tests/mpq/t-cmp.c, tests/mpq/t-cmp_ui.c, tests/mpq/t-get_d.c: Ditto.
+       * tests/mpf/reuse.c, t-add.c, t-conv.c, t-dm2exp.c, t-muldiv.c,
+       t-sqrt.c, t-sub.c: Ditto.
+
+       * tests/misc.c (tests_rand_start, tests_rand_end): New functions.
+       (tests_start, tests_end): Use them.
+       (urandom): New function.
+       * tests/tests.h: Add prototypes.
+
+       * mpz/random.c: Rewrite using mpz_urandomb and RANDS.
+       * mpn/generic/random.c: Rewrite using _gmp_rand and RANDS.
+       * mpn/generic/random2.c: Use RANDS not random() etc.
+
+       * gmp-impl.h (__gmp_rands, __gmp_rands_initialized): Add externs.
+       (gmp_randstate_ptr): New typedef.
+       (RANDS, RANDS_CLEAR): New macros.
+
+       * rands.c: New file.
+       * Makefile.am (libgmp_la_SOURCES): Add it.
+
+       * configure.in (mpn_objs_in_libmp): New AC_SUBST.
+       * Makefile.am (libmp_la_DEPENDENCIES): Use it.
+
+2001-03-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64/udiv_qrnnd.asm: New file.
+
+2001-03-01  Kevin Ryde  <kevin@swox.se>
+
+       * mpbsd/rpow.c: New file.
+       * mpbsd/Makefile.am (libmpbsd_la_SOURCES): Add it
+       (nodist_libmpbsd_la_SOURCES): Remove pow_ui.c.
+       * Makefile.am (MPBSD_OBJECTS): Add rpow.lo, remove pow_ui.lo.
+       (libmp_la_DEPENDENCIES): Add mpz/n_pow_ui.lo.
+
+       * mpz/ui_pow_ui.c: Rewrite using mpz_n_pow_ui.
+       * mpz/pow_ui.c: Ditto, and no longer provide rpow for mpbsd.
+
+       * mpz/n_pow_ui.c: New file, rewrite of pow_ui.c and ui_pow_ui.c.  Use
+       less temporary memory, strip factors of 2 from the base, use mpn_mul_2
+       if available.
+       * mpz/si_pow_ui.c: New file.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add them.
+       * Makefile.am (MPZ_OBJECTS): Ditto.
+       * gmp-impl.h (mpz_n_pow_ui): Add prototype.
+       * gmp-h.in (mpz_si_pow_ui): Add prototype.
+       * gmp.texi (Integer Exponentiation): Add mpz_si_pow_ui.
+
+       * acinclude.m4 (GMP_C_SIZES): Add BITS_PER_ULONG.
+       Correction to mp_limb_t working check.
+       * configure.in (limb_chosen): New variable.
+       * tests/t-constants.c (BITS_PER_ULONG): Check this value.
+       Add some reminders about tests that fail on Cray.
+
+       * tests/refmpn.c (refmpn_mul_2): New function.
+       * tests/refmpz.c (refmpz_pow_ui): Copied from tests/mpz/t-pow_ui.c
+       * tests/tests.h: Add prototypes.
+
+       * configure.in (none-*-*): Add ABI=longlong.
+       * doc/configuration (Long long limb testing): Describe it.
+
+       * gmp.texi (Low-level Functions): Move some commented out remarks ...
+       * mpn/generic/mul_basecase.c: ... to here.
+
+       * mpn/x86/README: Note "%=" as an alternative to "1:" in __asm__.
+
+       * tests/trace.c (mp_trace_start): Print "bin" for binary.
+
+       * mpn/generic/dump.c: Add a couple of casts to keep gcc quiet.
+
+       * gmp-h.in (mpn_incr_u, mpn_decr_u): Add parens around arguments.
+
+       * mpbsd/mout.c, mpbsd/mtox.c (num_to_text): Remove unused variable.
+
+       * mpfr/set_d.c (mpfr_get_d2): Declare "q" for 64-bit limbs.
+
+2001-02-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64w/udiv_qrnnd.asm: Tune.
+
+2001-02-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64w/udiv_qrnnd.asm: New file.
+
+2001-02-26  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (arm): Optimize sub_ddmmss by testing for constant
+       operands.
+       * mpn/arm/invert_limb.asm: New file.
+
+2001-02-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/lshift.c: Rewrite.
+       * mpn/generic/rshift.c: Rewrite.
+
+       * longlong.h: Use UWtype for external interfaces that expect mp_limb_t.
+
+       * longlong.h (arm): #define invert_limb.
+
+       * mpn/arm: Make labels have local scope.
+
+       * configure.in (arm*-*-*): Set extra_functions.
+       * longlong.h (arm): #define udiv_qrnnd.
+       * mpn/arm/udiv.asm: New file.
+
+2001-02-24  Kevin Ryde  <kevin@swox.se>
+
+       * tune/many.pl: Add mpn_count_leading_zeros, mpn_count_trailing_zeros
+       and mpn_invert_limb.  Add count_leading_zeros, count_trailing_zeros
+       from a .h file.  Correction to modexact_1_odd prototype.  Support
+       ansi2knr.
+       * tune/speed.h, tune/common.c: Consequent changes.
+
+       * demos/expr/*: Make a few more functions available in expressions,
+       create only libexpr.a, misc minor updates.
+
+       * mpn/Makeasm.am: Add some comments about suffix ordering.
+
+       * tests/refmpn.c (rshift_make, lshift_make): No need to compare
+       unsigned to zero.
+
+       * mpq/mul.c: Detect and optimize squaring.
+
+2001-02-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3: Convert files to `.asm'.
+
+       * mpn/arm: Convert files to `.asm'.  Misc cleanups.
+       * mpn/arm/submul_1.asm: New file.
+
+2001-02-21  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c (all): Only one compiler print should match, no need
+       for #undef PRINTED_COMPILER.
+
+       * mpfr/mpfr.h (mpfr_sgn): Use mpfr_cmp_ui (patch from Paul).
+
+       * mpz/fib_ui.c: Update some remarks about alternative algorithms.
+       * gmp.texi (Fibonacci Numbers Algorithm): Ditto.
+       (Assigning Floats): Clarify mpf_swap swaps the precisions too.
+       (Low-level Functions): Try to be clearer about negative cofactors.
+
+2001-02-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/copyi.asm: Streamline for small operands.
+       * mpn/sparc64/add_n.asm: Likewise.
+       * mpn/sparc64/sub_n.asm: Likewise.
+
+       * mpn/sparc64/copyd.asm: New file.
+
+2001-02-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/lshift.asm: Rewrite.
+       * mpn/sparc64/rshift.asm: Rewrite.
+
+2001-02-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/add_n.asm: Rewrite using `majority' logic.
+       * mpn/sparc64/sub_n.asm: Likewise.
+
+       * tune/tuneup.c (all): Recognise DECC and MIPSpro compilers.
+
+       * mpn/pa64/sqr_diagonal.asm: Use PROLOGUE/EPILOGUE.
+       * mpn/pa642/sqr_diagonal.asm: Likewise.
+
+       * configure.in (HAVE_ABI_$abi): Disable for now.
+
+       * mpn/asm-defs.m4 (PROLOGUE): Use LABEL_SUFFIX.
+
+       * acinclude.m4 (GMP_ASM_ATTR): New check, for hppa oddities.
+
+2001-02-18  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/hppa/hppa1_1/gmp-mparam.h: New file.
+       * mpn/hppa/hppa2_0/gmp-mparam.h: New file.
+
+       * mpn/pa64/sqr_diagonal.asm: New file.
+       * mpn/pa64w/sqr_diagonal.asm: New file.
+       * mpn/hppa/hppa1_1/sqr_diagonal.asm: New file.
+       * mpn/hppa/hppa2_0/sqr_diagonal.asm: New file.
+
+       * mpn/sparc32/v9/add_n.asm: Use `fitod' instead of `fxtod' for dummy
+       FA-pipeline insns.
+       * mpn/sparc32/v9/sub_n.asm: Likewise.
+
+2001-02-18  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Known Build Problems): Notes on make, $* and K&R, misc
+       tweaks elsewhere.
+       (Low-level Functions): Use {} notation in mpn_sqrtrem.
+       (Basecase Multiplication): Mention BASECASE_SQR_THRESHOLD.
+
+       * mpfr/isnan.c (mpfr_number_p): Infinity is not a number.
+       * mpfr/out_str.c: Pass strlen+1 for the block size to free.
+       * mpfr/get_str.c: Correction for realloc to strlen+1.
+
+       * acinclude.m4 (GMP_C_SIZES): Generate an error if mp_limb_t doesn't
+       seem to work for some reason.
+
+2001-02-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/gmp-mparam.h: Retune.
+
+       * mpn/sparc32/v9/add_n.asm: New file.
+       * mpn/sparc32/v9/sub_n.asm: New file.
+
+       * mpn/sparc32/v9/mul_1.asm: Tune function entry.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+
+       * mpn/sparc32/v9/sqr_diagonal.asm: New file.
+
+2001-02-16  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Fix flags selection when $CC is a compiler known to us.
+
+       * demos/expr/exprfr.c (e_mpfr_cos, e_mpfr_sin): mpfr_sin_cos now
+       allows NULL for one parameter.
+
+       * mpfr/*: Update to 20010215.
+       * mpfr/trunc.c: Use -DOPERATION scheme, and gmp mpn_zero_p.
+       * mpfr/sqrt.c: Use plain mpn_sqrtrem, not mpn_sqrtrem_new.
+       * mpfr/sqrtrem.c: Remove file.
+       * mpfr/Makefile.am (libmpfr_a_SOURCES): Add isnan.c and set_ui.c,
+       remove sqrtrem.c and srandom.h.
+
+       * configfsf.guess: Update to 2001-02-13.
+       * configfsf.sub: Update to 2001-02-16.
+       * config.sub (j90, t90): Remove special handing, configfsf.sub now ok.
+
+       * Makefile.am (MPF_OBJECTS): Add a couple of missing $U's.
+
+       * tune/tuneup.c: Identify compiler used (GCC and Sun C so far).
+
+2001-02-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/mul_1.asm: Change `ld' to `lduw' and `st' to `stw'.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+
+2001-02-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3/mips.m4: New file.
+       * configure.in (mips*-*-irix[6789]*): Use mips3/mips.m4.
+
+       * mpn/powerpc64/sqr_diagonal.asm: New file.
+
+       * mpn/mips3/sqr_diagonal.asm: New file.
+
+2001-02-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/sqr_diagonal.asm: New file.
+
+       * mpn/generic/sqr_basecase.c: Remove declaration of mpn_sqr_diagonal.
+       Fix typo in header comment.
+
+2001-02-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul.c, mpn/generic/mul_n.c, gmp-impl.h: Use
+       mpn_mul_basecase for squaring below new BASECASE_SQR_THRESHOLD.
+       * tune/tuneup.c gmp-impl.h: Tune BASECASE_SQR_THRESHOLD.
+
+       * Makefile.am (libgmp.la, libmp.la): Revert change to build from
+       mpn/libmpn.la etc, go back to explicitly listed objects.
+
+       * configure.in: Recognise sparc64-*-*, not just sparc64-*-linux*.
+
+2001-02-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/asm-defs.m4 (sqr_diagonal): New define_mpn.
+
+       * mpn/alpha/sqr_diagonal.asm: New file.
+
+2001-02-11  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Low-level Functions): Note mpn_get_str clobbers its input
+       plus 1 extra limb.
+
+       * mpfr/add.c,agm.c,exp2.c,exp3.c,generic.c,log2.c,pi.c,print_raw.c,
+       set_d.c,sin_cos.c,sqrtrem.c,sub.c: Apply some tweaks for K&R.
+       * tests/mpz/reuse.c, tests/mpq/t-md_2exp.c, demos/pexpr.c,
+       demos/expr/t-expr.c: Ditto.
+
+       * configure.in (HAVE_ABI_$abi): New define in config.m4.
+
+       * gmp-impl.h (mpn_sqr_diagonal): Add prototype and define.
+       * tune/speed.c,speed.h,common.c,many.pl: Add measuring of
+       mpn_sqr_diagonal.
+
+       * gmp.texi, acinclude.m4: Mention x86 solaris 2.7 has the reg->reg
+       movq bug the same as 2.6.
+
+       * mpfr/Makefile.am (EXTRA_DIST): Add mpfr-test.h and mpf2mpfr.h.
+
+       * mpn/x86/README: Merge contents of README.family.
+       * mpn/x86/README.family: Remove file.
+
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add mode1o, gcd_finda,
+       invert_limb, sqr_diagonal; remove mod_1_rs; sort alphabetically.
+
+2001-02-10  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (gmp_mpn_functions_optional): List sqr_diagonal.
+
+       * mpn/powerpc32/aix.m4: Use unnamed csects.
+       * mpn/powerpc64/aix.m4: Likewise.
+
+       * acconfig.h: Add #undef of mpn_sqr_diagonal.
+       Remove lots of spacing.
+
+       * configure.in (syntax testing section): Match power* instead of
+       powerpc*.
+       * mpn/power: Convert files to `.asm'.
+       Prefix umul_ppmm and sdiv_qrnnd.
+       Update some comments.
+
+2001-02-09  Kevin Ryde  <kevin@swox.se>
+
+       * acconfig.h: Add HAVE_NATIVE_mpn_modexact_1_odd and
+       HAVE_NATIVE_mpn_modexact_1c_odd.
+
+       * configure.in (CCAS): Don't override a user selection.
+
+       * mpq/cmp_ui.c: DIVIDE_BY_ZERO if den2==0.
+
+2001-02-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/sqr_basecase.c: Use mpn_sqr_diagonal when appropriate.
+
+2001-02-07  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Low-level Functions): mpn_preinv_mod_1 now undocumented.
+
+       * mpn/generic/random2.c (myrandom): Use rand() on mingw.
+
+       * mpn/alpha/gmp-mparam.h: Update tuned parameters.
+
+2001-02-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/gmp-mparam.h: Retune.
+
+2001-02-05  Kevin Ryde  <kevin@swox.se>
+
+       * Makefile.am (libgmp, libmp): Construct from mpn/libmpn.la etc rather
+       than explicitly listed objects.
+
+       * urandom.h: Use rand() on mingw.
+
+       * mpn/powerpc64/lshift.asm,addsub_n.asm: Use r1 not 1.
+
+2001-02-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/copyi.asm: New file.
+       * mpn/ia64/copyd.asm: New file.
+
+2001-02-04  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/ev5/gmp-mparam.h, mpn/mips3/gmp-mparam.h,
+       mpn/powerpc32/gmp-mparam.h, mpn/powerpc64/gmp-mparam.h,
+       mpn/sparc64/gmp-mparam.h, mpn/x86/*/gmp-mparam.h:
+       Update tuned parameters.
+
+       * mpn/x86/i486: New directory.
+       * configure.in (i486-*-*): Use it.
+       * mpn/x86/i486/gmp-mparam.h: New file.
+
+       * mpn/x86/pentium/mode1o.asm: New file.
+       * mpn/x86/p6/mode1o.asm: New file.
+
+       * tune/many.pl: Use $(ASMFLAGS_PIC) and $(CFLAGS_PIC).
+
+       * gmp.texi (Integer Division): Another rewording of 2exp divisions.
+
+2001-02-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/arm/gmp-mparam.h: Tune.
+
+       * mpn/ia64/popcount.asm: Put a `;;' break at end of main loop.
+
+       * configure.in (arm*-*-*): Set gcc_cflags in order to pass
+       $fomit_frame_pointer.
+
+       * tests/mpz/t-mul.c (base_mul): Remove an unused variable.
+
+2001-02-02  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (TIME): New macro.
+       (main): Use TIME--print timing more accurately.
+       (setup_error_handler): Increase RLIMIT_DATA to 16 Mibyte.
+
+       * longlong.h (arm): Add __CLOBBER_CC to add_ssaaaa and sub_ddmmss.
+
+2001-02-02  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Don't remove gmp-mparam.h and mpn source links under
+       --no-create since in that case they're not re-created.
+
+       * demos/expr: New directory.
+       * Makefile.am (SUBDIRS, allprogs): Add it.
+       * demos/expr/README, Makefile.am, expr.c, exprv.c, exprz.c, exprza.c,
+       exprq.c, exprqa.c, exprfa.c, exprf.c, exprfr.c, exprfra.c, expr.h,
+       expr-impl-h.in, run-expr.c, t-expr.c: New files.
+       * configure.in: Generate demos/expr/Makefile & demos/expr/expr-impl.h.
+
+       * Makefile.am: Remove mpfr from main libgmp.
+       * mpfr/Makefile.am: Build and install separate libmpfr.a.
+       * mpfr/*: Update to mpfr 2001.
+
+       * gmp-h.in (__GNU_MP_VERSION_MINOR): Bump to 2.
+       * Makefile.am (libtool -version-info): Bump appropriately.
+       * NEWS: Updates.
+
+       * tune/divrem1div.c, tune/divrem1inv.c, tune/divrem2div.c,
+       tune/divrem2inv.c: Renamed from divrem_1_div.c, divrem_1_inv.c,
+       divrem_2_div.c, divrem_2_inv.c, to be unique in DOS 8.3 filenames.
+       * tune/Makefile.am (libspeed_la_SOURCES): Update.
+
+       * mpn/x86/*/README, mpn/x86/README.family: Misc updates.
+       * tune/README: Misc updates.
+       * doc/configuration: Misc updates.
+
+       * mpn/x86/pentium/mmx/gmp-mparam.h: Change UDIV_PREINV_TIME to
+       UDIV_NORM_PREINV_TIME.
+
+       * mpz/pprime_p.c: Use ASSERT_ALWAYS instead of abort.
+
+       * rand.c (__gmp_rand_lc_scheme): Add "const".
+       (struct __gmp_rand_lc_scheme_struct): Make astr "const char *".
+
+       * demos/calc/calc.y, demos/calc/calclex.l: Add kron function.
+
+       * tests/devel/try.c: Partial rewrite, new scheme of function types,
+       allow result validation functions, add sqrtrem and jacobi testing.
+       * tune/many.pl: Corresponding updates.
+       * tests/devel/Makefile.am: Add a convenience rule for libtests.la.
+
+       * tests/refmpz.c: New file.
+       * tests/Makefile.am: Add it.
+       * tests/misc.c (mpz_erandomb, mpz_erandomb_nonzero): New functions.
+       * tests/tests.h: Add prototypes.
+
+       * mpn/x86/k6/cross.pl: Add a couple more exceptions.
+
+       * gmp.texi: Don't use @nicode{'\0'}, it doesn't come out right in tex.
+       (Introduction to GMP): Mention Cray vector systems.
+       (Build Options): Describe --enable-mpfr, refer to its manual.  Add
+       Crays under supported CPUs.
+       (Debugging): Add notes on source file paths.
+       (Autoconf): New section.
+       (Assigning Integers): Note truncation by mpz_set_d, mpz_set_q and
+       mpz_set_f.
+       (Converting Integers): Note the size mpz_get_str allocates.
+       (Floating-point Functions): Rewrite introduction, clarifying some
+       points about precision handling.
+       (Converting Floats): Note the size mpf_get_str allocates, and that it
+       gives an empty string for zero.  Add mpf_get_si and mpf_get_ui.
+       (Float Comparison): Give the formula mpf_reldiff calculates.
+       (Miscellaneous Float Functions): Add mpf_integer_p and mpf_fits_*_p.
+       (Random Number Functions): Misc rewordings for clarity.
+       (Random State Initialization): Ditto.
+       (Custom Allocation): Remove note on deallocate_function called with 0,
+       misc rewording and clarifications.
+       (Exact Remainder): New section.
+       (Binary GCD): A few words on initial reduction using division.
+       (Accelerated GCD): Refer to exact remainder section.
+       (Extended GCD): Extra remarks on single versus double selection.
+       (Jacobi Symbol): Update for mpz/jacobi.c rewrite and modexact_1_odd.
+       (Modular Powering Algorithm): Refer to exact remainder section.
+       (Assembler SIMD Instructions): Update remarks on MMX.
+       (Contributors): Amend to "Divide and Conquer" division.
+       (References): Tweak some formatting.  Add "Proof of GMP Fast Division
+       and Square Root Implementations" by Paul Zimmermann.
+
+2001-01-31  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in: Don't ever pass -mips3; let ABI flags imply ISA.
+
+2001-01-31  Kevin Ryde  <kevin@swox.se>
+
+       * tune/time.c: Remove unnecessary longlong.h.
+       (speed_endtime): Add some extra diagnostics.
+
+       * tests/mpz/t-fdiv_ui.c, tests/mpz/t-tdiv_ui.c: Use unsigned long for
+       the divisor, not mp_limb_t.
+       * tests/mpz/t-jac.c (try_base): Use %llu for long long limb.
+       * tests/trace.c: Add <string.h> for strlen.
+
+       * tune/freq.c (speed_cpu_frequency_proc_cpuinfo): Ignore "cycle
+       frequency" of 0, allow "BogoMIPS" as well as "bogomips".
+
+       * macos/Makefile.in: Add mpf/fits_s.c and mpf/fits_u.c objects.
+
+2001-01-30  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h: Add add_ssaaaa and sub_ddmmss for 64-bit sparc.
+
+2001-01-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/addmul_1.asm: Prefix registers with an `r'.
+       * mpn/powerpc64/submul_1.asm: Likewise.
+       * mpn/powerpc64/mul_1.asm: Likewise.
+
+       * configure.in (alpha*-*-*): Amend last change to handle pca*.
+
+2001-01-29  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.h (SPEED_ROUTINE_INVERT_LIMB_CALL): Don't let the
+       compiler optimize everything away.
+
+       * tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Measure
+       operator_div, operator_mod, mpn_divrem_2_div, mpn_divrem_2_inv,
+       mpn_sb_divrem_m3, mpn_sb_divrem_m3_div, mpn_sb_divrem_m3_inv,
+       mpn_dc_divrem_sb_div, mpn_dc_divrem_sb_inv.
+       * tune/divrem_2_div.c, tune/divrem_2_inv.c, tune/sb_div.c,
+       tune/sb_inv.c: New files.
+
+       * tune/tuneup.c, gmp-impl.h, tune/speed.h, tune/common.c,
+       tune/Makefile.am: Tune SB_PREINV_THRESHOLD and DIVREM_2_THRESHOLD.
+
+       * mpn/generic/divrem_2.c: Use new DIVREM_2_THRESHOLD.
+       * mpn/generic/sb_divrem_mn.c: Use new SB_PREINV_THRESHOLD.
+
+       * mpn/x86/p6/mmx/lshift.asm, mpn/x86/p6/mmx/rshift.asm: New files,
+       just m4 include()ing the P55 code.
+       * configure.in (pentium[23]-*-*): Remove x86/pentium/mmx from path.
+
+2001-01-27  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_CHECK_FUNCS): Add srand48.
+       * tune/speed.c: Use this test.
+
+       * acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO): Allow "egcs-" prefix on gcc
+       --version, warn if the format is unrecognised.
+       (GMP_COMPARE_GE): Guard against empty $1 not only on last arg.
+       (GMP_INIT, GMP_FINISH, GMP_PROG_M4): Obscure or eliminate literal
+       "dnl"s since autoconf thinks they indicate faulty macros.
+
+       * mpz/get_str.c, mpf/get_str.c: Make allocated string block exactly
+       strlen(str)+1 bytes.
+       * mpz/dump.c, mpf/dump.c, tests/mpz/convert.c: Use this size when
+       freeing.
+       * tests/mpf/t-conv.c: Ditto, and ensure x==0 is exercised.
+
+       * tests/mpz/t-fits.c: New file.
+       * tests/mpz/Makefile.am: Add it.
+
+       * tests/mpf/t-fits.c: New file.
+       * tests/mpf/t-get_si.c: New file.
+       * tests/mpf/t-int.c: New file.
+       * tests/mpf/Makefile.am: Add them.
+
+       * mpf/fits_s.c: New file.
+       * mpf/fits_u.c: New file.
+       * mpf/get_si.c: New file.
+       * mpf/get_ui.c: New file.
+       * mpf/int_p.c: New file.
+       * Makefile.am, mpf/Makefile.am: Add them.
+       * gmp-h.in (mpf_fits_*_p, mpf_get_si, mpf_get_ui, mpf_integer_p): Add
+       prototypes.
+
+       * tests/memory.c (tests_allocate, tests_reallocate): Guard against
+       size==0.
+
+       * tests/mpz/*.c, tests/mpq/*.c, tests/mpf/*.c: Uses tests_start and
+       tests_end.
+
+       * gmp-impl.h (USE_LEADING_REGPARM): Fix conditionals.
+
+2001-01-23  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, mpn/Makeasm.am (ASMFLAGS_PIC): New substitution,
+       allowing -DPIC to be suppressed on cygwin.
+       (CFLAGS_PIC): New substitution, use it and $(CCAS) directly, rather
+       than $(LIBTOOL), avoiding a problem with FreeBSD 2.2.8.
+
+       * mpn/x86/k6/mode1o.asm, mpn/x86/k7/mode1o.asm: Remove an unnecessary
+       +[.-L(here)] from _GLOBAL_OFFSET_TABLE_, avoids a segv from gas 1.92.3.
+       * mpn/x86/README.family: Add notes on the problem.
+
+2001-01-20  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (alpha*-*-*): Default `flavour' to ev4.
+
+2001-01-19  Kevin Ryde  <kevin@swox.se>
+
+       * assert.c, gmp-impl.h (__gmp_assert_fail): Change return type to
+       void, since it's no longer used in expressions.
+
+       * mpn/x86/addsub_n.S: Remove file, since it doesn't work and it upsets
+       tune/many.pl.
+
+       * mpz/jacobi.c: Rewrite, but still binary algorithm; accept zero and
+       negative denominators; merge mpz_jacobi and mpz_legendre, add
+       mpz_kronecker; use mpn directly, add special cases for size==1.
+       * gmp.texi (Number Theoretic Functions): Update.
+       * gmp-h.in (mpz_kronecker): Add prototype.
+       * gmp-impl.h (USE_LEADING_REGPARM): New macro.
+       * tests/mpz/t-jac.c: Test mpz_kronecker.
+       * mpz/legendre.c: Remove file.
+       * Makefile.am, mpz/Makefile.am: Update.
+
+       * longlong.h (alpha count_leading_zeros): Use __attribute__ ((const))
+       when possible, add parameter to prototype.
+       (ia64 udiv_qrnnd): Use for all compilers, not just gcc.
+       (pentium count_trailing_zeros): Use count_leading_zeros.
+
+       * acinclude.m4 (GMP_C_ATTRIBUTE_CONST, GMP_C_ATTRIBUTE_NORETURN): New
+       macros.
+       * configure.in: Use them.
+       * gmp-impl.h (ATTRIBUTE_CONST, ATTRIBUTE_NORETURN): New macros.
+       (mpn_invert_limb): Add ATTRIBUTE_CONST.
+       (__gmp_assert_fail): Add ATTRIBUTE_NORETURN.
+
+2001-01-18  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-h.in, gmp-impl.h (__gmp_allocate_func, __gmp_reallocate_func,
+       __gmp_free_func): Move prototypes from gmp-impl.h to gmp-h.in, for the
+       benefit of gmp++.h.
+
+       * gmp-impl.h, tests/misc.c, tests/tests.h: Move MPZ_SET_STR_OR_ABORT
+       and MPF_SET_STR_OR_ABORT to mpz_set_str_or_abort and
+       mpf_set_str_or_abort in libtests.
+       * tests/mpz/convert.c, tests/mpz/t-bin.c, tests/mpz/t-get_si.c,
+       tests/mpz/t-jac.c, tests/mpz/t-misc.c, tests/mpq/t-md_2exp.c,
+       tests/mpq/t-set_f.c, tests/mpf/t-conv.c, tests/mpf/t-misc.c: Update.
+
+       * mpn/generic/sqrtrem.c: Use MPN_COPY_INCR (for when rp==NULL).
+
+       * tests/mpz/reuse.c: Only run mpz_divexact_gcd on positive divisors.
+
+2001-01-18  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (main): Accept -vml option.
+       (fns): List `hamdist', `pow', `nextprime'.
+       (mpz_eval_expr): Return -1 for `popc' of negative.
+       (mpz_eval_expr): Handle `hamdist', `pow', `nextprime'.
+
+2001-01-15  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/ev5/mode1o.c: New file.
+
+       * tune/freq.c (speed_cpu_frequency_measure): Check cycles_works_p
+       before running speed_cyclecounter.
+       * tune/speed.h (cycles_works_p): Add prototype.
+
+2001-01-13  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/rand/t-rand.c (farr): Fix typo.
+       (zarr): Fix typo.
+
+2001-01-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/kronsz.c: Don't depend on right shifting a negative.
+
+       * mpn/x86/gmp-mparam.h: New file.
+
+       * mpn/x86/pentium/mmx/mul_1.asm: New file.
+
+2001-01-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/kronsz.c: Temporary workaround for Cray right shift oddities.
+       Explicitly compare against zero in tests.
+
+2001-01-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/kronzs.c: Don't depend on right shifting a negative.
+
+2001-01-09  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/t-constants.c: Disable some undefined tests.
+       (CHECK_MAX_S): Remove workaround for gcc 2.95.2 bug recently added.
+
+2001-01-09  Kevin Ryde  <kevin@swox.se>
+
+       * tests/t-constants.c: Add more diagnostics.
+       (CHECK_MAX_S): Fix for gcc 2.95.2 -mpowerpc64 -maix64.
+
+       * mpn/x86/k6/mode1o.asm: New file.
+       * mpn/x86/k7/mode1o.asm: New file.
+
+       * mpn/asm-defs.m4 (modexact_1_odd, modexact_1c_odd): New define_mpn's.
+       (__clz_tab, modlimb_invert_table, PROLOGUE, EPILOGUE): Add asserts for
+       GSYM_PREFIX.
+       * mpn/x86/x86-defs.m4 (Zdisp): Add a movzbl.
+
+       * tests/mpz/t-jac.c (check_a_zero): New test.
+       (check_squares_zi): Fix to use (a^2/b), not (a*b/b); revert last
+       change avoiding a,b=0, both are fine.
+       (try_2den): Don't use mpz_kronecker_ui for the expected answer.
+       (try_*): Call abort rather than exit.
+
+       * mpz/kronzu.c, mpz/kronzs.c: Fix for a=0.
+
+       * tune/tuneup.c (USE_PREINV_MOD_1): Fix to use new DATA_HIGH_LT_R.
+
+2001-01-08  Torbjorn Granlund  <tege@swox.com>
+
+       * urandom.h: Amend 2000-11-21 change to also handle cygwin.
+
+2001-01-08  Kevin Ryde  <kevin@swox.se>
+
+       * tune/many.pl: Updates for move to tests/devel, add modexact_1_odd,
+       don't assume C files can't have carry-in entrypoints, remove
+       $(TRY_TESTS_OBJS) now in libtests.
+
+       * tests/devel/try.c, tests/refmpn.c, tests/tests.h: Remove
+       mpn_mod_1_rshift testing.
+
+       * tune/tuneup.c (fft_step_size): Test for overflow using the actual
+       mp_size_t, don't use BITS_PER_INT.
+
+       * tune/speed.c (r_string): "r" is a limb, use BITS_PER_MP_LIMB and
+       change LONG_ONES to LIMB_ONES.
+       * tune/time.c (M_2POWU): Use INT_MAX rather than BITS_PER_INT.
+
+       * extract-dbl.c (BITS_PER_PART): Use BITS_PER_MP_LIMB not
+       BITS_PER_LONGINT.
+
+       * mpz/inp_raw.c, mpz/out_raw.c: Add private defines of BITS_PER_CHAR.
+       * mpz/fac_ui.c, tests/mpz/t-fac_ui.c: Don't use BITS_PER_LONGINT.
+       * tests/mpz/t-get_si.c: Don't use BITS_PER_LONGINT, do the LONG_MAX
+       tests with some explicit code.
+
+       * mpn/*/gmp-mparam.h, acinclude.m4, tests/t-constants.c
+       (BITS_PER_LONGINT, BITS_PER_INT, BITS_PER_SHORTINT, BITS_PER_CHAR):
+       Remove defines, remove probings, remove tests.
+
+       * tune/tuneup.c (MODEXACT_1_ODD_THRESHOLD): Add tuning.
+
+       * tune/speed.c,speed.h,common.c: Add measuring of mpn_modexact_1_odd,
+       mpn_gcd_finda, and an "N" form for mpn_gcd_1.
+
+       * tests/mpz/t-jac.c (check_squares_zi): Ensure random a,b != 0.
+
+2001-01-07  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (gmp_mpn_functions): Add mode1o, remove mod_1_rs.
+
+       * mpn/generic/mod_1_rs.c: Remove file, no longer needed.
+       * gmp-h.in (mpn_mod_1_rshift): Remove prototype and define.
+
+       * mpq/set_f.c: Use MPN_STRIP_LOW_ZEROS_NOT_ZERO.
+
+       * mpz/kronzu.c, mpz/kronzs.c, mpz/kronuz.c, mpz/kronsz.c: Use
+       mpn_modexact_1_odd, new style MPN_STRIP_LOW_ZEROS_NOT_ZERO, and new
+       JACOBI macros.  Various rearrangements supporting all this.
+
+       * mpn/generic/gcd_1.c: Use mpn_modexact_1_odd, reduce u%v if u much
+       bigger than v when size==1, some rearrangements supporting this.
+
+       * gmp-impl.h (JACOBI_*): More macros, add some casts to "int".
+       (MPN_STRIP_LOW_ZEROS_NOT_ZERO): Add a "low" parameter.
+       (mpn_modexact_1_odd, mpn_modexact_1c_odd): Add prototype and defines.
+       (MODEXACT_1_ODD_THRESHOLD): New threshold.
+       (MPN_MOD_OR_MODEXACT_1_ODD, JACOBI_MOD_OR_MODEXACT_1_ODD): New macros.
+
+       * mpn/generic/mode1o.c: New file.
+
+       * tests/mpz/reuse.c: Add testing of mpz_divexact_gcd.
+       * tests/mpz/t-fac_ui.c: Use libtests for memory leak checking.
+       * tests/mpz/t-fib_ui.c: Add a usage comment.
+
+       * tests/mpz/bit.c: Use libtests.
+       * tests/mpz/t-scan.c: Remove unused subroutines.
+       * tests/devel/try.c: Use libtests, define PROT_NONE if the system
+       doesn't.
+
+       * tests/spinner.c, tests/x86check.c: Use tests.h.
+       * tests/trace.c: Use tests.h, add mpf_trace.
+       * tests/refmpn.c: Use tests.h, add refmpn_malloc_limbs_aligned,
+       refmpn_tstbit, refmpn_neg.
+
+       * tune/common.c, tune/speed.h: Update for functions moved to
+       tests/misc.c.
+
+       * tune/Makefile.am, tests/mpz/Makefile.am, tests/mpq/Makefile.am,
+       tests/mpf/Makefile.am: Use tests/libtests.la.
+
+       * configure.in (AC_OUTPUT): Update for new directories.
+       (x86 CALLING_CONVENTIONS_OBJS): Use .lo for libtests.la, allow
+       ansi2knr on x86check.c.
+
+       * tests/Makefile.am: Establish new libtests.la convenience library,
+       add mpz, mpq, mpf, mpbsd subdirectories.
+       * tests/tests.h: New file.
+       * mpn/tests/ref.h,try.h: Remove files, now in tests.h.
+
+       * tests/mpf/ref.c: Move to tests/refmpf.c, rename functions to refmpf.
+       * tests/mpf/t-add.c, tests/mpf/t-sub.c: Use libtests.
+       * tests/mpf/Makefile.am: Update.
+
+       * tests/memory.c: New file.
+       * tests/misc.c: New file, a few subroutines from the test programs.
+
+       * mpz/tests, mpq/tests, mpf/tests, mpbsd/tests: Move directories to
+       tests/mpz etc.
+       * mpz/Makefile.am, mpq/Makefile.am, mpf/Makefile.am, mpbsd/Makefile.am
+       (SUBDIRS): Remove.
+
+       * tests/devel: New directory.
+       * mpn/tests/*.c: Move programs to tests/devel.
+       * mpn/tests/Makefile.am, mpn/tests/README: Move to tests/devel, update.
+
+       * mpn/tests/ref.c: Move to tests/refmpn.c.
+       * mpn/tests/spinner.c,trace.c,x86call.asm,x86check.c: Move to tests
+       directory.
+
+       * tests/t-constants.c: Add checks of HIGHBIT, MAX and MIN constants,
+       simplify ANSI vs K&R stringizing, use correct printf format types, do
+       all tests before aborting.
+
+2001-01-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/gmp-mparam.h: Retune.
+
+2001-01-05  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (mp.h): Only create this under --enable-mpbsd.
+
+       * demos/calc: New subdirectory, move demos/calc* to it.
+       * demos/calc/Makefile.am: New file, split from demos/Makefile.am.
+       * demos/Makefile.am: Update.
+       * configure.in (AC_OUTPUT): Add demos/calc/Makefile.
+
+       * tests/t-constants.c (CALC_BITS_PER_TYPE etc): Use a run-time test
+       for how many bits work in a give type, don't assume bits==8*sizeof.
+
+2001-01-04  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/fits_s.c, mpz/fits_u.c: New files, split from fits.c, use plain
+       UINT_MAX etc, not MPZ_FITS_UTYPE_SDT etc.
+       * mpz/fits.c: Remove file.
+       * mpz/Makefile.am, macos/Makefile.in: Update.
+
+       * gmp-impl.h (UNSIGNED_TYPE_MAX etc): Remove these generic forms.
+       (MPZ_FITS_[SU]TYPE_SDT): Remove these.
+       (UINT_MAX etc): Provide a full set of defaults.
+       * gmp-h.in (__GMP_MP_SIZE_T_INT): New define.
+
+       * mpz/tests/t-scan.c: New file.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/scan0.c, mpz/scan1.c: Rewrite, don't read beyond allocated
+       memory, support negatives, return ULONG_MAX for no bit found.
+       * gmp.texi (Integer Logic and Bit Fiddling): Update.
+
+2001-01-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/dive.c: Generate test operands using new random functions.
+       * mpz/tests/io.c: Likewise.
+       * mpz/tests/logic.c: Likewise.
+       * mpz/tests/t-2exp.c: Likewise.
+
+       * stack-alloc.c (__gmp_tmp_alloc): Round `now' to required alignment.
+
+       * stack-alloc.h (__TMP_ALIGN): Append `L'.
+
+       * gmp-impl.h: For Cray, #include limits.h.
+       (LONG_MIN): New #define.
+       (ULONG_HIGHBIT): #define in terms of ULONG_MAX.
+       (LONG_HIGHBIT): #define as LONG_MIN.
+       (USHRT_MAX): New name for USHORT_MAX.
+       (SHRT_MAX): New name for SHORT_MAX.
+       (SHRT_MIN): New #define.
+       (USHORT_HIGHBIT,SHORT_HIGHBIT): Removed.
+
+       * mpbsd/tests/t-misc.c (check_itom [data]): *SHORT* => *SHRT*;
+       remove code disabling a test for Cray.
+
+       * tests/t-constants.c (CHECK_CONSTANT): Cast parameters to long.
+
+       * mpn/generic/mul_n.c (mpn_kara_sqr_n): Remove unused variable `t'.
+       (mpn_kara_mul_n): Likewise.
+
+       * mpz/fac_ui.c (MPZ_SET_1_NZ): Actually use `__z'.
+
+       * mpz/tests/t-jac.c
+       (main, check_squares_zi): Generate test operands using new random
+       functions.
+
+       All changes below on this date for enabling `make; make check'
+       with C++ compilers:
+
+       * mpz/tests/t-pow_ui.c (debug_mp, ref_mpz_pow_ui): Provide prototypes.
+
+       * mpz/tests/t-mul.c (debug_mp, base_mul, ref_mpz_mul):
+       Provide prototypes.
+       (dump_abort): Provide prototype and declare properly for C++.
+
+       * mpz/tests/t-jac.c: #include stdlib.h and sys/time.h.
+
+       * mpz/tests/t-fdiv.c
+       (dump_abort): Provide prototype and declare properly for C++.
+       (debug_mp): Provide prototype.
+       * mpz/tests/t-fdiv_ui.c: Likewise.
+       * mpz/tests/t-gcd.c: Likewise.
+       * mpz/tests/t-powm.c: Likewise.
+       * mpz/tests/t-powm_ui.c: Likewise.
+       * mpz/tests/t-sqrtrem.c: Likewise.
+       * mpz/tests/t-tdiv_ui.c: Likewise.
+       * mpz/tests/t-tdiv.c: Likewise.
+
+       * mpz/tests/t-2exp.c: #include stdlib.h and sys/time.h.
+       Remove #include of longlong.h.
+
+       * mpz/tests/io.c: #include config.h, stdlib.h, sys/time.h, and
+       conditionally unistd.h.
+
+       * mpz/tests/dive.c: #include stdlib.h and sys/time.h.
+       (dump_abort): Provide prototype and declare properly for C++.
+       (debug_mp): Provide prototype.
+       * mpz/tests/logic.c: Likewise.
+
+       * mpz/tests/convert.c (debug_mp): Provide prototype.
+       * mpz/tests/t-root.c (debug_mp): Likewise.
+
+       * mpz/tests/bit.c: #include stdlib.h and sys/time.h.
+
+       * mpq/tests/t-get_d.c: #include stdlib.h and sys/time.h.
+       (dump): Provide prototype and declare properly for C++.
+
+       * mpq/tests/t-cmp_ui.c: #include stdio.h, stdlib.h and sys/time.h.
+       (ref_mpq_cmp_ui): Declare properly for C++.
+
+       * mpq/tests/t-cmp.c: #include stdlib.h and sys/time.h.
+       (ref_mpq_cmp): Declare properly for C++.
+       (dump): Delete unused function.
+
+       * mpf/random2.c (myrandom): New function.
+       (mpf_random2): Use it.
+
+       * mpn/generic/random2.c: #include stdlib.h (for random/mrand48).
+       (myrandom): New function.
+       (mpn_random2): Use it.
+
+       * mpf/tests/t-add.c: #include stdlib.h and sys/time.h.
+       (oo): Remove unused function.
+       * mpf/tests/t-conv.c: Likewise.
+       * mpf/tests/t-sub.c: Likewise.
+       * mpf/tests/t-dm2exp.c: Likewise.
+       * mpf/tests/t-muldiv.c: Likewise.
+       * mpf/tests/t-sqrt.c: Likewise.
+
+       * mpf/tests/reuse.c: #include stdlib.h and sys/time.h.
+       Use PROTO on some typedefs.
+       (oo): Remove function.
+       (dump_abort): Call mpf_dump instead of oo.
+
+       * mpf/set_str.c: #include stdlib.h (for strtol).
+
+       * mpf/random2.c: #include stdlib.h (for random/mrand48).
+       * mpn/alpha/udiv_arnnd: File deleted.
+
+       * Remove K&R function headers.
+
+2001-01-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul.c: Clean up spacing and indentation.
+
+       * mpn/generic/mul_fft.c (mpn_fft_add_modF): Use mpn_decr_u.
+       Clean up spacing and indentation.
+
+       * extract-dbl.c: Generalize to handle smaller limb sizes.
+
+2001-01-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpbsd/mout.c: Output newline after "0".
+
+2000-12-31  Torbjorn Granlund  <tege@swox.com>
+
+       * ltmain.sh: Remove space between `#!' and `$SHELL' when generating
+       `libtool'.
+
+       * mpbsd/tests/t-misc.c (check_itom): Exclude test for all Cray
+       vector systems.  Correct comment.
+
+2000-12-31  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (ABI and ISA): New enough gcc needed for mips n32 etc, gcc
+       2.95 needed for sparc 64-bit ABI, gcc 2.8 needed for -mv8plus.
+
+       * configure.in ([cjt]90,sv1-cray-unicos*): Preserve user specified
+       MPN_PATH, amend test program indenting.
+       (none-*-*): Add -DNO_ASM to gcc to disable longlong.h asm macros in
+       generic C.
+
+       * config.sub (j90, t90): Preserve these, don't let configfsf.sub turn
+       them into c90.
+
+       * config.guess (m68k-*-nextstep*,m68k-*-openstep*): Don't transform
+       m68k to m68020, since m68k is already interpreted as 68020.
+
+2000-12-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpq/neg.c: Rewrite, use mpn, avoid denominator copy if unnecessary.
+
+       * mpz/tstbit.c: Rewrite, slightly simplified.
+       * mpz/tests/bit.c (check_tstbit): New test, and add a couple more
+       diagnostics elsewhere.
+
+       * configure.in (x86 gcc_cflags_cpu): Add -m486 for gcc 2.7.2.
+       (ccbase): Only use a known compiler in eval statements (avoids
+       problems with non-symbol characters).
+       (ccbase): Use GMP_PROG_CC_IS_GNU to identify gcc installed under a
+       different name.
+       (cclist): Use same style $abi as other variables.
+
+       * acinclude.m4 (GMP_PROG_CC_IS_GNU): New macro.
+       (GMP_GCC_MARCH_PENTIUMPRO): Use $ccbase to identify gcc.
+       (GMP_ASM_TYPE): Define TYPE to empty, not "dnl", when no .type needed.
+       (GMP_ASM_SIZE): Ditto for SIZE, which ensures EPILOGUE on the last
+       line of a file doesn't leave a tab and no newline.
+       (GMP_ASM_UNDERSCORE): Add a prototype for C++.
+
+       * configure.in (sys/mman.h, mprotect): New tests.
+       * mpn/tests/try.c: Use them, and HAVE_UNISTD_H too.
+
+       * configure.in (getopt.h): Remove test.
+       * tune/speed.c, mpn/tests/try.c (getopt.h): Remove include, since
+       plain getopt() is in <unistd.h>.
+
+       * configure.in, gmp-h.in (mips*-*-irix6*): Set limb_n32=longlong
+       rather than using _ABIN32.
+
+2000-12-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/reuse.c: Rename dump_abort => dump.
+       * mpz/tests/reuse.c: Generate operands using gmp_rand*.
+       * mpz/tests/convert.c: Likewise.
+
+       * configure.in: Detect T90-ieee systems; move Cray path
+       selection to after AC_PROG_CC.  Invoke AC_PROG_CPP.
+       * mpn/cray/cfp: New directory.  Move cfp specific files here.
+       * mpn/cray/cfp/mulwwc90.s: New file.
+       * mpn/cray/cfp/mulwwj90.s: New file.
+       * mpn/cray/mulww.s: Delete.
+
+2000-12-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/cray/ieee/mul_1.c: New file.
+       * mpn/cray/ieee/addmul_1.c: New file.
+       * mpn/cray/ieee/submul_1.c: New file.
+       * mpn/cray/ieee/gmp-mparam.h: New file.
+
+       * mpn/cray/gmp-mparam.h: Disable UMUL_TIME and UDIV_TIME.
+
+       * mpn/cray/hamdist.c: New file.
+       * mpn/cray/popcount.c: New file.
+       * mpn/cray/rshift.c: New file.
+       * mpn/cray/lshift.c: New file.
+
+       * longlong.h: Add count_leading_zeros for _CRAY.
+       Reorganize _CRAY stuff.
+
+2000-12-24  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (alpha*-cray-unicos*): Disable SPEED_CYCLECOUNTER_OBJ,
+       as tune/alpha.asm doesn't suit.
+
+       * mpn/generic/sqrtrem.c, mpz/pow_ui.c, mpz/powm_ui.c, mpf/get_str.c,
+       mpf/set_str.c: Use mpn_sqr_n when applicable, not mpn_mul_n.
+
+2000-12-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_fft.c: Reformat.
+       (mpn_fft_neg_modF): Remove.
+       (mpn_fft_mul_2exp_modF): Inline mpn_fft_neg_modF.
+
+       * mpn/cray/gmp-mparam.h: Retune.
+
+       * configure.in (*-cray-unicos*): Pass `-O3 -htask0'.
+       (vax*-*-*): Fix typo.
+
+       * mpn/cray/mul_1.c: Use dynamic arrays, get rid of TMP_*.
+       * mpn/cray/addmul_1.c: Likewise.
+       * mpn/cray/submul_1.c: Likewise.
+       * mpn/cray/add_n.c: Likewise.
+       * mpn/cray/sub_n.c: Likewise.
+
+       * configure.in (default cc_cflags,cc_64_cflags): Remove -g/add -O.
+       (mips*-*-irix[6789]*]): Remove -g from cc_*_cflags.
+
+2000-12-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c: Delete K&R function headers.
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n): Clean up type confusion
+       between mp_limb_t and mp_size_t.
+       (mpn_kara_sqr_n): Likewise.
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n): Use mpn_incr_u.
+       (mpn_kara_sqr_n): Likewise.
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n): Change handling of `sign'
+       to work around GCC 2.8.1 MIPS bug.
+
+       * configure.in (implied alpha*-cray-unicos*): Remove -g from cc_cflags.
+
+2000-12-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/invert_limb.asm: Simplify a bit.
+       Add handling of bigend systems.
+       * mpn/alpha/unicos.m4: Define `bigend'.
+       * mpn/alpha/default.m4: Define `bigend' (to expand to nothing).
+
+       * tests/t-constants.c (CHECK_CONSTANT): Print using %lx.
+
+       * mpn/alpha/gmp-mparam.h: Remove sizes for plain C types.
+       * mpn/alpha/ev5/gmp-mparam.h: Likewise.
+       * mpn/alpha/ev6/gmp-mparam.h: Likewise.
+
+       * mpn/alpha/unicos.m4: Define LEA.
+       * mpn/alpha/default.m4: Likewise.
+       * mpn/alpha/invert_limb.asm: Use LEA for loading symbolic addresses.
+       * mpn/alpha/cntlz.asm: Likewise.
+
+       * mpn/alpha/cntlz.asm: Don't use `ldbu', use slightly slower
+       `ldq_u' + `extbl' instead.
+
+       * mpn/alpha/unicos.m4: Define EXTERN.
+       * mpn/alpha/default.m4: Define EXTERN (to expand to nothing).
+       * mpn/alpha/cntlz.asm: Declare __clz_tab usign `EXTERN' (for the
+       benefit of Unicos).
+
+2000-12-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/alpha/unicos.m4 (GSYM_PREFIX): Define for the benefit of
+       __clz_tab.
+
+2000-12-20  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h: Add udiv_qrnnd and count_leading_zeros for _CRAYMPP
+       systems.
+
+2000-12-19  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (*sparc*-*-*): Remove -g from cc_cflags and acc_cflags.
+
+       * mpn/generic/sqrtrem.c (mpn_sqrtrem): Separate `limb' values from
+       `size' values.
+
+       * configure.in (*-cray-unicos*): Add `-Wa,-B' to cc_cflags.
+
+       * demos/pexpr.c (rstate): New variable.
+       (main): Initialize rstate.
+       (enum op_t): Add RANDOM.
+       (fns): Add field for RANDOM.
+       (mpz_eval_expr): Handle RANDOM.
+
+2000-12-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/sqrtrem.c: Rewrite by Paul Zimmermann, based on his
+       Karatsuba Square Root algorithm.
+       * gmp.texi (Square Root Algorithm): Update.
+
+       * tune/many.pl: New file.
+
+       * mpn/tests/try.c,ref.[ch] (mpn_preinv_mod_1, mpn_sb_divrem_mn,
+       mpn_tdiv_qr, mpn_gcd_finda, mpn_kara_mul_n, mpn_kara_sqr_n,
+       mpn_toom3_mul_n, mpn_toom3_sqr_n): Add testing.
+       * mpn/tests/ref.c: Cast some "0"s in function calls.
+
+       * mpn/x86/k7/mmx/mod_1.asm: Add preinv_mod_1 entrypoint, remove extra
+       variable for loop termination.
+
+       * mpn/x86/p6/mmx/mod_1.asm: Remove file, in favour of the following.
+       * mpn/x86/p6/mod_1.asm: New file.
+
+       * mpn/x86/pentium/mod_1.asm: New file.
+
+2000-12-18  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (mips*-*-irix[6789]*): Pass options to compiler using
+       `-Wc'.
+
+2000-12-18  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/pre_mod_1.asm: New file.
+
+       * tune/tuneup.c (USE_PREINV_MOD_1): Tune this, rearrange mpn_divrem_1
+       and mpn_mod_1 handling in support of it.
+       * tune/Makefile.am: Consequent changes to divrem_1.c and mod_1.c.
+
+       * gmp-impl.h (USE_PREINV_MOD_1, MPN_MOD_OR_PREINV_MOD_1): New macros.
+       * mpn/generic/perfsqr.c, mpz/pprime_p.c: Use MPN_MOD_OR_PREINV_MOD_1.
+
+       * configure.in: Let an asm mod_1 provide a preinv_mod_1 entrypoint.
+
+       * mpn/alpha/default.m4: Remove some newlines, add some asserts.
+       (r0 etc, f0 etc): Use defreg and deflit.
+       (PROLOGUE, PROLOGUE_GP, EPILOGUE): Use GSYM_PREFIX.
+       * mpn/alpha/unicos.m4: Remove some newlines, add some asserts.
+       * mpn/alpha/invert_limb.asm: Remove unused second DATASTART parameter.
+       * mpn/alpha/cntlz.asm: Use mpn_count_leading_zeros and __clz_tab.
+
+       * mpn/asm-defs.m4 (changecom): Comments on portability.
+       (__clz_tab, modlimb_invert_table): New macros, matching gmp-impl.h.
+       (count_leading_zeros, count_trailing_zeros): New define_mpn's.
+       (PROLOGUE etc): Comments on usage, add some asserts.
+       (OPERATION_[lr]shift): Use m4_not_for_expansion, for the benefit of
+       lorrshift multifunc.
+
+       * mpn/Makeasm.am (RM_TMP): New variable controlling tmp-*.s
+       removal, for development purposes.
+
+       * mpz/fac_ui.c: Fix for long long limb by using mpn_mul_1 not
+       mpz_mul_ui, and note some possible enhancements.
+
+       * mpz/tests/t-fac_ui.c: New test.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add it.
+       * macos/Makefile.in: Ditto, and add t-fib_ui too.
+
+       * mpn/generic/[lr]shift.c: Remove some DEBUG code adequately covered
+       by new parameter ASSERTs.
+
+       * longlong.h (count_trailing_zeros): Assert x!=0.
+
+       * doc/configuration: Updates for new configure things, add some notes
+       on test setups.
+
+2000-12-16  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (*-*-aix): Pass -qmaxmem=20000 to xlc also for 64-bit
+       compiles.
+       * configure.in: Disable shared libs for *-*-ultrix*.
+
+2000-12-15  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (powerpc*-*-*): Pass -Wa,-mppc when using gcc.
+
+       * gmp-impl.h (_EXTERN_INLINE): #define different for GCC and other
+       compilers.
+
+       * gmp-h.in (__gmp_inline): Remove.
+       * mp-h.in: Likewise.
+       * mpn/generic/gcd.c: Use `inline' instead of `__gmp_inline'.
+
+       * configure.in (mips*-*-irix[6789]*): Define *_ldflags.
+
+2000-12-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/pre_mod_1.c: Use proper type for udiv_qrnnd
+       parameter `dummy'.
+
+       * mpn/generic/divrem_1.c: Use explicit `!= 0' in if statement.
+       * mpn/generic/mod_1.c: Likewise.
+
+2000-12-14  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (mips-*-irix[6789]*): Transform to mips64.
+       (m68k-*-nextstep* | m68k-*-openstep*): Transform to m68020.
+
+2000-12-13  Torbjorn Granlund  <tege@swox.com>
+
+       * tests/t-constants.c (main): Conditionalize use of PP_INVERTED.
+
+       * mpn/mp_bases.c: Handle 4-bit limbs.
+       (main): Add code for generating tables.
+
+       * mpn/generic/popham.c: Handle limb bitsizes of 4, 8, 16.
+       Suffix all 32-bit constant with `L'.
+       Use CNST_LIMB for 64-bit constants.
+
+2000-12-13  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (FIB_THRESHOLD): Defaults for 4,8,16 bits per limb, and
+       an arbitrary fallback default.
+       (modlimb_invert): Add efficient code for 8,16 (or 4) bits per limb.
+
+       * configure.in (mips3, mips64): Don't bother with o32 (mips2 32-bit
+       limb) on IRIX 6.
+
+       * Makefile.am (SUBDIRS): Put "tests" first so tests/t-constants.c is
+       run first, to pick up any limb size mismatch.
+
+       * tune/tuneup.c (DIVREM_1, MOD_1): Fix result values, were off by 1.
+
+       * mpz/fib_ui.c (table1, table2): Add data for 4,8,16 bits per limb.
+
+2000-12-12  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (LIMBS_PER_DOUBLE): Define for any limb bitsize.
+
+2000-12-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mp_bases.c: Add tables for 8-bit and 16-bit limbs.
+       Round existing `double' values properly.
+
+       * gmp-h.in (__gmp_randstate_struct): Prefix field names with _mp_
+       to keep out of user name space.
+       (__gmp_randata_lc): Likewise.
+       * randclr.c, randlc.c, randlc2x.c, randraw.c, randsd.c, randsdui.c:
+       Corresponding changes.
+
+       * gmp-impl.h (PP): #define for machines with BITS_PER_MP_LIMB
+       of 2, 4, 8, and 16.
+       (PP_FIRST_OMITTED): New, define for various BITS_PER_MP_LIMB.
+       (PP_MASK): Remove.
+       (PP_MAXPRIME): Remove.
+
+       * mpn/generic/perfsqr.c: Generalize PP handling for machines with
+       limbs of < 32 bits.  Allow PP_INVERTED to be undefined.
+       * mpz/pprime_p.c: Likewise.
+
+2000-12-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_1.c: Declare parameters in C89 style.
+
+2000-12-10  Kevin Ryde  <kevin@swox.se>
+
+       * tune/Makefile.am (speed_LDFLAGS, speed_ext_LDFLAGS, tune_LDFLAGS):
+       Don't use -all-static, as gcc 2.95.2 on i386 solaris 8 doesn't like
+       it.
+
+       * configure.in (mips3,mips64): Add ABI=64, name the others ABI=n32 and
+       ABI=o32.
+       * mpn/mips3/gmp-mparam.h (BITS_PER_LONGINT): Remove #define and let
+       configure determine it, since it varies with ABI=64 or ABI=n32.
+       * gmp.texi (ABI and ISA): Update.
+       (mpz_mod_ui): Remark that it's identical to mpz_fdiv_r_ui.
+       (mpn_divexact_by3): Qualify a statement needing mp_bits_per_limb even.
+
+       * mul_fft.c (mpn_fft_mul_modF_K etc): Patch by Paul Zimmermann to fix
+       results in certain cases of recursing into a further FFT.
+
+2000-12-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/cmpabs.c: Remove unused variable.
+       * mpz/rrandomb.c: Likewise.
+       * mpz/xor.c: Likewise.
+
+2000-12-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/gcdext.c: Handle double carry when computing s1.
+       Merge two code blocks for computing s0 and s1.
+
+2000-12-07  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (hppa*-*-*): Remove -Aa -D_HPUX_SOURCE from
+       cc_cflags/cppflags, and instead let AM_C_PROTOTYPES add it, or -Ae,
+       whichever works.
+
+       * configure.in (*-*-aix[34]*): Disable shared by default, but let
+       the user override that, if desired.
+       * gmp.texi (Notes for Particular Systems): Update.
+
+2000-12-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpq/cmp_ui.c: Streamline.
+
+2000-12-06  Kevin Ryde  <kevin@swox.se>
+
+       * tune/divrem_1_div.c,divrem_1_inv.c,mod_1_div.c,mod_1_inv.c,
+       gcdext_double.c: New files for measuring.
+       * tune/Makefile.am (libspeed_la_SOURCES): Add them.
+       * tune/speed.c,speed.h,common.c: Add measuring of them.
+       (mpn_preinv_mod_1, mpz_jacobi, mpz_powm_ui): Add measuring.
+
+       * speed.c (getopt_long): Don't use this, just plain getopt.
+       * configure.in (getopt_long): Remove test.
+
+       * gmp-impl.h (MPN_KARA_MUL_N_TSIZE, MPN_KARA_MUL_N_MINSIZE,
+       MPN_TOOM3_MUL_N_TSIZE, MPN_TOOM3_MUL_N_MINSIZE): New macros, and
+       assume toom3 square tsize was meant to be the same as the mul (both
+       are overestimates).
+       * tune/tuneup.c, mpn/generic/mul.c, mpn/generic/mul_n.c: Use them.
+       * mpn/generic/mul_n.c (mpn_toom3_sqr_n): Fix an ASSERT to use
+       TOOM3_SQR_THRESHOLD not TOOM3_MUL_THRESHOLD, add a few that might
+       be more realistic size checks.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MUL_N_TSPACE etc): Use minsize.
+
+       * mpn/generic/divrem_1.c: Partial rewrite, merge fractional part
+       calculation, skip a divide step in more cases, introduce
+       DIVREM_1_NORM_THRESHOLD and DIVREM_1_UNNORM_THRESHOLD.
+       * mpn/generic/mod_1.c: Partial rewrite, skip a divide step in more
+       cases, introduce MOD_1_NORM_THRESHOLD, MOD_1_UNNORM_THRESHOLD.
+       * longlong.h (UDIV_PREINV_ALWAYS): New define, set for alpha and ia64.
+       * tune/tuneup.c (DIVREM_1_NORM_THRESHOLD, DIVREM_1_UNNORM_THRESHOLD,
+       MOD_1_NORM_THRESHOLD, MOD_1_UNNORM_THRESHOLD): Tune these.
+       * gmp-impl.h [TUNE_PROGRAM_BUILD]: Support for this.
+       * tune/Makefile.am (TUNE_MPN_SRCS): Add divrem_1.c and mod_1.c.
+
+       * gmp-impl.h (UDIV_NORM_PREINV_TIME): Renamed from UDIV_PREINV_TIME.
+       * mpn/generic/perfsqr.c, mpn/generic/sb_divrem_mn.c,
+       mpn/x86/*/gmp-mparam.h: Ditto.
+       * gmp-impl.h (UDIV_UNNORM_PREINV_TIME): New define.
+
+       * configure.in (AC_C_INLINE, HAVE_INLINE): New test and define.
+       * gmp-impl.h (inline): Remove, use config.h.
+       (_EXTERN_INLINE): Redefine based on HAVE_INLINE.
+       (mpn_zero_p): Use HAVE_INLINE.
+
+       * acinclude.m4 (GMP_PROG_AR, GMP_PROG_NM): Don't add flags to a user
+       selected $AR or $NM.
+
+       * tune/tuneup.c (all): Print how long the tuning took.
+
+       * configure.in (AM_C_PROTOTYPES): Use this, not GMP_ANSI2KNR.
+       * acinclude.m4 (GMP_ANSI2KNR): Remove.
+
+       * Makefile.am (gmp.h, mp.h): In DISTCLEANFILES not CLEANFILES.
+
+       * gmp-h.in (mpn_divmod, mpn_divmod_1, mpn_divexact_by3): Cast some
+       zeros, for the benefit of K&R if long!=int.
+
+       * mpn/lisp/gmpasm-mode.el (gmpasm-comment-start-regexp): Add "*" for
+       the benefit of cray.
+
+       * compat.c (mpn_divexact_by3, mpn_divmod_1): Return types should be
+       mp_limb_t, not int, and need an actual "return".
+
+2000-12-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v8/supersparc/gmp-mparam.h: Retune.
+       * mpn/alpha/gmp-mparam.h: Tune for 21064.
+
+       * longlong.h: Reformat to avoid newlines within strings.
+
+       * gmp-impl.h (inline): Disable if GCC has defined __STRICT_ANSI__.
+
+       * configure.in: Do a `mkdir tune' before creating tune/sqr_basecase.c.
+
+       * Makefile.am: Treat mp.h analogously to gmp.h.
+
+       configure.in (*-*-aix): Pass -qmaxmem=20000 to xlc.
+
+       * mp-h.in: Renamed from mp.h.
+       Add #define for _LONG_LONG_LIMB.
+       Move some other fixes from gmp-h.in.
+       * mp.h: Removed.
+       * configure.in: Generate mp.h from mp-h.in like we handle
+       gmp-h.in/gmp.h.
+
+2000-12-04  Torbjorn Granlund  <tege@swox.com>
+
+       * acinclude.m4: Fix typo testing for bad HP compiler.
+
+2000-12-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpbsd/tests/t-misc.c (check_itom): Exclude some tests for Cray
+       CFP systems.
+
+       * longlong.h (CRAYIEEE umul_ppmm): New.
+
+       * mpn/cray/gmp-mparam.h (BITS_PER_SHORTINT): 32 => 64.
+       (*_THRESHOLD): Tune.
+
+       * configure.in: Disable shared libs for *-*-unicos*.
+
+2000-12-03  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, tune/Makefile.am: Create tune/sqr_basecase.c during
+       configure, and use it unconditionally in $(nodist_tuneup_SOURCES).
+       Fixes a problem with sqr_basecase.lo under --disable-static.
+
+2000-12-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/tests/t-get_d.c (LOW_BOUND,HIGH_BOUND): #define for non-IEEE
+       Cray systems.
+
+       * gmp-impl.h (union ieee_double_extract): Test for _CRAYIEEE.
+
+2000-11-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-mul.c (base_mul): Fix re-evaluation problems in macro
+       invocations.
+       (ref_mpz_mul): New name from mpz_refmul.  Make static.
+       (base_mul): New name for _mpn_mul_classic.
+
+2000-11-30  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Rewrite of CC/CFLAGS selection scheme, introduce a
+       notion of ABI, merge compiler and mpn path selection, add flags
+       selection for AR and NM, let CC without CFLAGS work.
+       (AC_PROG_CC): Use this, not GMP_SELECT_CC.
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Don't use AC_TRY_COMPILE, combine
+       cc/cflags parameter.
+       (GMP_PROG_CC_FIND, GMP_CHECK_CC_64BIT, GMP_PROG_CC_SELECT): Remove.
+       * gmp.texi (Installing GMP): Updates for new scheme.
+
+       * configure.in (AC_CANONICAL_HOST): Use this and $host, not $target.
+       * acinclude.m4, acconfig.h, longlong.h, mpn/x86/x86-defs.m4,
+       mpn/x86/k7/mmx/popham.asm: Ditto, renaming HAVE_TARGET_CPU to
+       HAVE_HOST_CPU.
+       * gmp.texi (Build Options, and elsewhere): Update.
+
+       * acinclude.m4 (GMP_COMPARE_GE): New macro.
+       (GMP_GCC_MARCH_PENTIUMPRO): Use it, add CC parameter, check for GCC.
+       (GMP_HPC_HPPA_2_0): New macro, adapted from GMP_CHECK_CC_64BIT.
+
+       * acinclude.m4 (GMP_PROG_AR): New macro, using AC_CHECK_TOOL, adding
+       GMP flags.
+       * configure.in: Use it
+
+       * gmp-h.in: Renamed from gmp.h.
+       (@define_LONG_LONG_LIMB@): Placeholder for instantiation.
+       (__GNU_MP__): Bump to 3.
+       * acinclude.m4 (GMP_VERSION): Get version from gmp-h.in.
+       * configure.in: Create gmp.h from gmp-h.in to set _LONG_LONG_LIMB.
+       * gmp.texi.h (ABI and ISA): Mention this.
+       * acconfig.h (_LONG_LONG_LIMB): Remove undef.
+       * Makefile.am: Distribute gmp-h.in, not gmp.h.
+
+       * configure.in (AC_PROG_CPP, AC_PROG_INSTALL, AC_PROG_LN_S): Remove,
+       dragged in by other macros.
+       (gmp_asm_syntax_testing): Renamed from gmp_no_asm_syntax_testing.
+       (AC_EXEEXT, AC_OBJEXT): Remove, done automatically by libtool.
+       * configure.in, acinclude.m4: Remove "" from "`foo`", being
+       unnecessary and not portable.
+
+       * configure.in (GMP_LDFLAGS): New AC_SUBST flags for libtool link.
+       (powerpc64*-*-aix*): Use for -Wc,-maix to fix shared library creation,
+       but can't build shared and static at the same time.
+       * Makefile.am (libgmp_la_LDFLAGS, libmp_la_LDFLAGS): Use
+       $(GMP_LDFLAGS).
+       * gmp.texi (Notes for Particular Systems): Update AIX problem
+
+       * configure.in (AC_CONFIG_LINKS): Use where needed, not via gmp_links.
+       (gmp_srclinks): Build up as needed, not via gmp_links.
+
+       * acinclude.m4 (GMP_INIT): Do CONFIG_TOP_SRCDIR and asm-defs.m4 here.
+       * configure.in (asm-defs.m4): Consequent changes.
+
+       * acinclude.m4 (GMP_INCLUDE_MPN): Using include_mpn(), replacing
+       GMP_INCLUDE and GMP_SINCLUDE.
+       * configure.in (gmp_m4postinc): Remove this scheme, use
+       GMP_INCLUDE_MPN instead.
+
+       * configure.in (*-*-sco3.2v5*): Force ac_cv_archive_cmds_need_lc=no,
+       until libtool does this itself.
+       * gmp.texi (Known Build Problems): Remove SCO -lc problem.
+
+       * configure, INSTALL.autoconf, etc: Update to autoconf 2000-11-29.
+       * acinclude.m4 (GMP_C_SIZES): Use AC_CHECK_SIZEOF.
+       * gmp.texi (Known Build Problems): Remove version.c sed/config.h
+       problem, fixed.
+
+       * ltmain.sh, aclocal.m4: Update to libtool 2000-11-25.
+       * ltconfig: No longer required, but leave an empty dummy for automake.
+       * gmp.texi (Known Build Problems): Remove SunOS native ar ranlib
+       problem, fixed.
+
+       * */Makefile.in, aclocal.m4: Update to automake 2000-11-25.
+       * mpbsd/tests/Makefile.am, mpfr/tests/Makefile.am (check_PROGRAMS):
+       Remove dummy, no longer required.
+       * mpbsd/tests/dummy.c, mpfr/tests/dummy.c: Remove files.
+       * depcomp: Remove file, no longer required (with no-dependencies).
+
+       * texinfo.tex: Update to 2000-11-09.
+       * gmp.texi (Build Options): Mention PDF from gmp.texi.
+       * Makefile.am (MOSTLYCLEANFILES): Add gmp.tmp, from new texinfo.tex.
+
+       * gmp.texi (Build Options): List alphaev56, alphapca56, alphaev67,
+       hppa2.0n and power among supported CPUs.
+
+2000-11-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-mul.c: Increase max operand size from 2^17 bits
+       to 2^19 bits.  Misc cleanups.
+
+2000-11-26  Kevin Ryde  <kevin@swox.se>
+
+       * tune/tuneup.c (FIB_THRESHOLD): Cope better with different speeds of
+       odd and even sizes.
+
+       * longlong.h (alpha): Use udiv_qrnnd and count_leading_zeros on all
+       compilers, not just gcc.
+
+       * pre_mod_1.c: Use conditional subtract to always skip a division.
+       (UMUL_TIME, UDIV_TIME): Remove defaults, now in longlong.h.
+
+2000-11-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64w/gmp-mparam.h: Retune.
+       * mpn/pa64/gmp-mparam.h: Retune.
+       * mpn/sparc64/gmp-mparam.h: Retune.
+
+2000-11-22  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (ABOVE_THRESHOLD, BELOW_THRESHOLD): New macros.
+       * mpn/generic/gcdext.c: Use them.
+
+       * mpn/generic/gcdext.c [WANT_GCDEXT_ONE_STEP]: Force only one step.
+       * tune/gcdextos.c, tune/gcdextod.c: New files, one step gcdext, single
+       and double.
+       * tune/Makefile.am (libspeed_la_SOURCES): Add them.
+       (TUNE_MPN_SRCS): Remove gcdext.c.
+       * tune/speed.h, tune/common.c, tune/speed.c: Add measuring.
+       * tune/tuneup.c: Use for GCDEXT_THRESHOLD, plus check if double limb
+       is ever better.  Should be more accurate, and hopefully faster.
+
+       * tune/gcdext_single.c: New file, gcdext forced to single limbs.
+       * tune/Makefile.am: Add it.
+       * tune/speed.h, tune/common.c, tune/speed.c: Add measuring, and of
+       invert_limb.
+
+       * tune/speed.h (speed_params r): Use mp_limb_t, not long.
+       * tune/speed.h, tune/common.c: Don't "switch" on "r".
+       * tune/speed.c (r_string): Accept limb sized constants.
+       (choice scale): Add a scale factor (eg. "2.33*mpn_add_n").
+       * tune/common.c (SPEED_ROUTINE_UDIV_QRNND_A): Default r to
+       __mp_bases[10].big_base, being a full limb value.
+
+       * configure.in (alphapca56*-*-*): Use ev5 mpn path.
+       (am29000*-*-*): Remove this, leave the canonical a29k.
+       (z8k*-*-*, z8kx*-*-*): Changed from z8000, since z8k is canonical.
+       (gmp_mpn_functions_optional): Add invert_limb, use for alpha and ia64.
+
+       * configure.in (alloca): Accept yes/no/detect, generate an error if
+       "yes" but not available.
+       * gmp.texi (Build Options): Update.
+
+       * acinclude.m4 (GMP_TRY_ASSEMBLE): Make conftest.out available.
+       (GMP_ASM_ALIGN_FILL_0x90): Use it.
+
+       * acinclude.m4 (GMP_ASM_X86_MMX) [*-*-solaris*]: Check for solaris
+       2.6 "as" movq bug.
+       * gmp.texi (Notes for Particular Systems): Update x86 MMX note.
+
+2000-11-21  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/Makefile.am (EXTRA_DIST): List hppa2w.asm.
+
+       * tune/hppa2.asm: Change level directive to "2.0n".
+       * tune/hppa2w.asm: New file.
+       * configure.in [SPEED_CYCLECOUNTER_OBJS switch]: Separate out hppa2.0w.
+
+       * mpn/pa64/gmp-mparam.h (BITS_PER_LONGINT): 64 => 32.
+
+2000-11-21  Kevin Ryde  <kevin@swox.se>
+
+       * urandom.h (random): No prototype if glibc stdlib.h has already
+       provided it (avoids an int32_t/long conflict).
+
+       * tune/Makefile.am (LDFLAGS): Use -all-static.
+       (speed-dynamic): Dynamic linked version of speed.c.
+       * tune/README: Update.
+
+       * mpn/generic/gcd.c (find_a): Use native version if available.
+       * acconfig.h (HAVE_NATIVE_mpn_gcd_finda): Add #undef.
+       * gmp-impl.h (mpn_gcd_finda): Add prototype and define.
+       * mpn/asm-defs.m4 (mpn_gcd_finda): New define_mpn.
+       * tune/gcd_finda_gen.c: #undef any HAVE_NATIVE_mpn_gcd_finda.
+       * configure.in (gmp_mpn_functions_optional): Add gcd_finda.
+       * mpn/x86/k6/gcd_finda.asm: New file.
+
+       * tune/tuneup.c (POWM_THRESHOLD): Slightly bigger size steps.
+
+       * gmp-impl.h (__GMP_IMPL_H__): Protect against multiple inclusion.
+       * tune/gcd_bin.c, tune/powm_mod.c, tune/powm_redc.c: Use #undef after
+       gmp-impl.h to force thresholds.
+       * tune/tuneup.c (print_define, fft): No need for #ifndefs on
+       thresholds any more.
+
+2000-11-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-powm.c: Analogous changes as made 2000-11-12 to t-mul.c.
+       * mpz/tests/t-powm_ui.c: Likewise.
+       * mpz/tests/t-pow_ui.c: Likewise.
+       * mpz/tests/t-root.c: Likewise.
+
+       * configure.in [compiler switch]: Pass "-Aa -D_HPUX_SOURCE" to cc for
+       all hppa versions.
+
+       * mpn/hppa/hppa1_1/udiv_qrnnd.S: Reference data using PC relative
+       addressing (was r19 relative addressing).
+
+2000-11-18  Torbjorn Granlund  <tege@swox.com>
+
+       * rand.c: (__gmp_rand_lc_scheme): Convert strings to hexadecimal.
+       (gmp_randinit): Expect strings in hexadecimal.
+
+2000-11-18  Kevin Ryde  <kevin@swox.se>
+
+       * configfsf.guess, configfsf.sub: Update to 2000-11-16.
+       * config.guess (alpha*-*-openbsd*): Do exact cpu detection.
+
+2000-11-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-fdiv.c: Analogous changes as made 2000-11-12 to t-mul.c.
+       * mpz/tests/t-tdiv_ui.c: Likewise.
+       * mpz/tests/t-fdiv_ui.c: Likewise.
+       * mpz/tests/t-sqrtrem.c: Likewise.
+       * mpz/tests/t-gcd.c: Likewise.
+
+2000-11-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makeasm.am: New file, splitting out assembler rules.
+       * mpn/Makefile.am, tune/Makefile.am: Use it.
+
+       * mpn/Makefile.am (@CPP@): Remove this, automake already gives it.
+
+       * configure.in (AC_CHECK_LIBM): New test, and AC_SUBST it.
+       * Makefile.am (MPFR_LIBADD_OPTION): Use it.
+       * demos/Makefile.am (qcn_LDADD): Ditto.
+       * tune/Makefile.am (libspeed_la_LIBADD): Ditto.
+       * tests/rand/Makefile.am (libstat_la_LIBADD): Ditto.
+
+       * tune/time.c (timeval_diff_secs): Better calculation.
+       (read_real_time): New measuring method for AIX power/powerpc.
+       (speed_endtime): Protect against negative times.
+       * tune/common.c (speed_measure): Protect against big reps.
+       * tune/freq.c (speed_cpu_frequency_measure_one): Better timeval diff.
+       * tune/speed.h (TIMEVAL_DIFF_SEC,USEC): Remove macros.
+       * configure.in: (sys/systemcfg.h, read_real_time): New tests.
+
+2000-11-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-mul.c: Remove #include urandom.h.
+       * mpz/tests/t-tdiv.c: Likewise.
+
+       * configure.in [SPEED_CYCLECOUNTER_OBJS switch]:
+       Declare hppa.asm as just 32 bits (cyclecounter_size=1).
+
+2000-11-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-mul.c
+       (main): Generate random numbers using gmp_rand* functions.
+       (main): Distribute random numbers non-uniformly.
+       (main): Seed by current time if GMP_CHECK_RANDOMIZE is set.
+       (_mpn_mul_classic): Streamline.
+       * mpz/tests/t-tdiv.c: Analogous changes.
+
+       * demos/pexpr.c (HAVE_sigaltstack): Fix typo in testing for _UNICOS.
+       Also test for __hpux.
+
+2000-11-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev5/gmp-mparam.h: Retune.
+
+       * mpn/alpha/ev6/gmp-mparam.h: Retune.
+
+       * mpn/alpha/ev6/add_n.asm: Misc cleanups.
+
+       * mpn/alpha/ev6/sub_n.asm: New file.
+
+2000-11-10  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [path switch] (alphaev6*-*-*): Add alpha/ev5 to path.
+
+       * mpn/alpha/ev6/add_n.asm: New file.
+
+2000-11-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/powm.c (redc): Make global under WANT_REDC_GLOBAL.
+       * tune/powm_mod.c, tune/powm_redc.c: New files.
+       * tune/Makefile.am (libspeed_la_SOURCES): Add them.
+       * tune/*: Add measuring of redc, mpz_mod, mpz_powm_mod, mpz_powm_redc.
+
+       * tune/tuneup.c (POWM_THRESHOLD): Determine from redc and mpz_mod.
+       * tune/Makefile.am (TUNE_MPZ_SRCS): Remove powm.
+
+2000-11-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3/gmp-mparam.h: Retune.
+
+       * configure.in (os_64bit): Rename to check_64bit_compiler.
+
+2000-11-09  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [SPEED_CYCLECOUNTER_OBJS switch]: Choose hppa/hppa2 code
+       depending on $CC64.
+
+2000-11-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/mul_1.asm: Unroll 2x, saving 1 c/l when in L1.
+       Add 1c entrypoint.
+       * mpn/x86/pentium/aorsmul_1.asm: Add 1c entrypoints, shave a couple
+       of cycles at entry and exit.
+
+       * configure.in (power1,2,2sc): Support these as synonyms for plain
+       power.
+
+       * acinclude.m4 (GMP_ASM_X86_SHLDL_CL): GMP_DEFINE WANT_SHLDL_CL here.
+       (GMP_ASM_X86_MMX, GMP_ASM_X86_SHLDL_CL): Add X86 into the names.
+       * configure.in: Consequent changes.
+
+       * gmp.texi (Notes for Particular Systems): Remarks about power/powerpc.
+       (Reentrancy): Remarks about simultaneous writing.
+       (Reporting Bugs): Ask for configfsf.guess.
+
+2000-11-08  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_FUNC_ALLOCA): New macro.
+       * configure.in: Use it.
+       * gmp-impl.h (alloca): Conditionals and setups as per autoconf
+       (should make alloca available on more non-gcc compilers).
+
+       * acinclude.m4: Misc reformatting, simplify some quoting.
+       (GMP_ASM_UNDERSCORE, GMP_ASM_X86_MCOUNT): Use $CC $CFLAGS $CPPFLAGS.
+       (GMP_ASM_UNDERSCORE, GMP_ASM_ALIGN_FILL_0x90, GMP_ASM_RODATA): Put
+       AC_REQUIREs outside AC_CACHE_CHECK.
+       (GMP_C_SIZES): Use $srcdir/gmp.h, not -I; use $CPPFLAGS.
+       (GMP_ASM_UNDERSCORE): Use "gmp_compile" variable, and only rm
+       conftes1* conftes2*.
+       (GMP_PROG_NM): New macro, require it in appropriate GMP_ASM_*.
+       (GMP_TRY_ASSEMBLE): New macro, use it in various GMP_ASM_*.
+       * configure.in: Use GMP_PROG_NM.
+
+       * mpn/tests/spinner.c (spinner_signal): Use RETSIGTYPE.
+       (spinner_init): Force output to unbuffered.
+
+       * mpn/x86/README.family: Notes about GOT table and imul, misc updates.
+       * mpn/x86/k7/diveby3.asm: Change to 3 operands for immediate imul.
+       * mpn/x86/k6/diveby3.asm: Ditto.
+
+2000-11-06  Torbjorn Granlund  <tege@swox.com>
+
+       * urandom.h: Simplify and make it work properly for 64-bit
+       machines also in environments without `random'.
+
+2000-11-04  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [path switch]: Don't match rs6000-*-*, in
+       particular don't assume POWER.
+
+       * tune/tuneup.c (fft): Remove usleep calls.
+
+       * config.guess: Don't pass "$@" when it is known to be empty.
+
+       * Makefile.am (EXTRA_DIST): List configfsf.guess and configfsf.sub.
+
+2000-11-04  Kevin Ryde  <kevin@swox.se>
+
+       * configfsf.guess, configfsf.sub: Moved from config.guess and
+       config.sub.
+       * config.guess, config.sub: New files, wrappers around around
+       configfsf versions.
+       * configfsf.guess: Update to FSF 2000-10-23.
+       * configfsf.sub: Update to FSF 2000-10-25.
+
+       * acinclude.m4 (GMP_ASM_POWERPC_R_REGISTERS): New macro.
+       * mpn/powerpc32/powerpc-defs.m4: New file, regmap.m4 r0 etc macros
+       conditionalized by GMP_ASM_POWERPC_R_REGISTERS.
+       * mpn/powerpc32/regmap.m4: Remove file.
+       * configure.in (powerpc*-*-*): Use all this.
+
+       * mpz/divegcd.c: New file, providing mpz_divexact_gcd.
+       * Makefile.am, mpz/Makefile.am: Add it.
+       * gmp-impl.h (mpz_divexact_gcd): Add prototype.
+       * mpq/aors.c,canonicalize.c,div.c,mul.c: Use it.
+
+       * longlong.h [pentium] (count_leading_zeros): New macro.
+       (__clz_tab): Always provide prototype.
+       * acconfig.h (HAVE_TARGET_CPU_): Add x86s.
+
+       * tune/speed.[ch],common.c (count_leading_zeros,
+       count_trailing_zeros, __udiv_qrnnd_c): Add measuring.
+
+       * configure.in (X86_PATTERN): Move from here ...
+       * acinclude.m4 (X86_PATTERN): ... to here.
+       (GMP_ASM_RODATA): Use it.
+
+       * configure.in (srandom): New test.
+       * mpn/tests/try.c: Use it.
+       * tune/speed.c: Ditto, and conditionalize getrusage and headers.
+
+2000-11-02  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makefile.am (nodist_libdummy_la_SOURCES): Add udiv_qrnnd.c
+       and udiv_w_sdiv.c.
+
+       * mpn/generic/mul_n.c (mpn_kara_sqr_n): Remove a duplicate
+       subtract at the evaluate stage.
+
+2000-11-01  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [compiler switch] (sparc64-*-linux*): Spell
+       gmp_xoptcflags_gcc properly, and pass same options as for other
+       sparcv9 configs.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_GET_STR): Fix type of wsize.
+
+2000-10-31  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [compiler switch] (sparc64-*-linux*): Remove -mvis
+       from gmp_xoptflags_gcc, this might not be an ultrasparc.
+       Remove -m32 from gmp_cflags_gcc; add -Wa,-xarch=v8plus.
+
+2000-10-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/lorrshift.asm: New file.
+
+       * configure.in: New mulfunc `lorrshift' for lshift and rshift.
+
+2000-10-29  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_n.c (mpn_kara_sqr_n): Delete code performing
+       superfluous mpn_sub_n calls.
+
+       * configure.in (found_asm, M4): Account for SPEED_CYCLECOUNTER_OBJ,
+       for the benefit of targets whose only .asm is a cycle counter.
+
+       * tune/tuneup.c (fft): Remove bogus usleep calls.
+
+2000-10-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/invert_limb.asm: Get return value for 0x800...00 right.
+
+       * tune/Makefile.am (EXTRA_DIST): Add ia64.asm.
+
+       * tune/ia64.asm: Fix typo.
+
+       * add_n.asm addmul_1.asm mul_1.asm popcount.asm sub_n.asm:
+       Preserve ar.lc as required by ABI.
+       * longlong.h (ia64 udiv_qrnnd): New.
+
+       * configure.in [path switch] (ia64*-*-*): Set extra_functions.
+       * mpn/ia64/invert_limb.asm: New file.
+
+2000-10-27  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [compiler switch]:
+       Get rid of c89 for all hppa flavours--it is an evil compiler!
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_SET_STR): Fix type of xp.
+       (SPEED_ROUTINE_MPN_GET_STR): Fix type of wp.
+
+2000-10-27  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Fibonacci Number Algorithm): New section.
+
+       * mpz/tests/t-fib_ui.c: New file.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/fib_ui.c: Rewrite, same formulas but using mpn functions and
+       some lookup tables, much faster at small to moderate sizes.
+       * gmp-impl.h (MPZ_FIB_SIZE): New macro.
+       (FIB_THRESHOLD): Establish default here.
+       * tune/tuneup.c (FIB_THRESHOLD): Start search after the new table
+       data.
+
+       * mpn/x86/x86-defs.m4 (mcount_movl_GOT_ebx): Rename from movl_GOT_ebx,
+       and don't use GSYM_PREFIX with _GLOBAL_OFFSET_TABLE_.
+
+       * tune/freq.c (speed_cpu_frequency_measure): New test comparing
+       gettimeofday and speed_cyclecounter, should cover many systems.
+
+2000-10-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/gmp-mparam.h: Retune.
+
+2000-10-26  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (ia64): Set UMUL_TIME and UDIV_TIME.
+
+       * mpn/ia64/submul_1.c: Fix typo.
+
+2000-10-25  Kevin Ryde  <kevin@swox.se>
+
+       * tune/freq.c (speed_cpu_frequency_sysctl): New test, supporting
+       hw.model for BSD flavours.
+       * configure.in (sysctl, sys/param.h): New tests.
+
+2000-10-24  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/freq.c: Explicitly #include config.h before other include files.
+
+       * mpz/tests/reuse.c (FAIL2): New #define.
+       (main): Use FAIL2.  Now this test properly returns non-zero exit
+       status when it fails.
+
+       * mpn/powerpc32/gmp-mparam.h: Retune.
+       * mpn/powerpc64/gmp-mparam.h: Retune.
+
+2000-10-24  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/cross.pl: Support 8 and 16 byte code alignment.
+
+       * mpq/aors.c, mpq/canonicalize.c: Skip two mpz_divexact calls if
+       gcd gives 1, which should be 60% of the time.
+       * gmp-impl.h (MPZ_EQUAL_1_P): New macro.
+       * mpq/mul.c, mpq/div.c: Use it, and a new DIV_OR_SET.
+
+       * tune/tuneup.c (xp_block, yp_block): Initialize these with random
+       data.  Fixes GCD_ACCEL and GCDEXT thresholds, and latest POWM.
+
+2000-10-23  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in [SPEED_CYCLECOUNTER_OBJS switch]: Add ia64 case.
+
+       * mpn/ia64/gmp-mparam.h: Fill in some parameters.
+
+       * mpn/ia64/submul_1.c: New file.
+
+       * tune/ia64.asm: New file.
+
+       * gmp-impl.h (union ieee_double_extract): Handle ia64.
+
+       * mpn/mp_bases.c: Decrease chars_per_bit_exactly for entry 1 to
+       work around buggy ia64-linux.
+
+       * longlong.h (ia64 umul_ppmm): Update register flags to match new GCC.
+
+2000-10-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/gmp-mparam.h (DC_THRESHOLD): Update.
+       * mpn/alpha/ev6/submul_1.asm: New file.
+
+2000-10-22  Kevin Ryde  <kevin@swox.se>
+
+       * tune/gcd_bin.c: New file.
+       * tune/gcd_finda_gen.c: New file.
+       * tune/Makefile.am (libspeed_la_SOURCES): Add them.
+       * tune/speed.[ch],common.c (mpn_gcd_binary, find_a): Add measuring.
+
+       * * (__gmp_allocate_func etc): Rename from _mp_allocate_func etc.
+       (__gmp_default_allocate etc): Rename from _mp_default_allocate etc.
+       * gmp-impl.h (__GMP_REALLOCATE_FUNC_TYPE,
+       __GMP_REALLOCATE_FUNC_LIMBS): New macros.
+
+       * gmp-impl.h (DC_THRESHOLD): Establish default here, set to 3*KARA
+       since that's the measured average.
+       * mpn/generic/dc_divrem_n.c, mpn/generic/tdiv_qr.c (DC_THRESHOLD):
+       Remove default.
+
+2000-10-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/Makefile.am (TARG_DIST): Add ia64.
+
+2000-10-21  Kevin Ryde  <kevin@swox.se>
+
+       * *: Change BZ -> DC.
+       * mpn/generic/dc_divrem_n.c: Renamed from bz_divrem_n.c.
+
+       * doc/multiplication: Remove file, now in the manual.
+       * doc/assembly_code: Ditto.
+       * tune/README: Remove some parts now in the manual.
+
+       * gmp.texi (@m etc): Add and use some new macros.
+       (Integer Division - mpz_[cft]div_*): Merge descriptions, for brevity
+       and to emphasise similarities.
+       (Low-Level Functions - mpn_[lr]shift): Specify count as 1 to
+       mp_bits_per_limb-1.
+       (Algorithms): New chapter.
+       (References): Add some papers.
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Remove some
+       unused variables.
+       * mpn/generic/mul_fft.c (mpn_fft_best_k): Ditto.
+
+       * tune/freq.c: New file, split from time.c.
+       * tune/time.c: Rewrite, now more automated.
+       * configure.in, tune/*: Consequent changes.
+
+2000-10-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/default.m4: New file.
+       * configure.in [config.m4 switch] (ia64*-*-*): Use ia64/default.m4.
+
+       * mpn/ia64/mul_1.asm: New file.
+       * mpn/ia64/addmul_1.asm: New file.
+       * mpn/ia64/add_n.asm: New file.
+       * mpn/ia64/sub_n.asm: New file.
+       * mpn/ia64/popcount.asm: New file.
+       * mpn/ia64/README: New file.
+
+       * mpn/alpha/cntlz.asm: Override `.set noat' from ASM_START.
+
+       * configure.in (HAVE_TARGET_CPU_*): Support hppa1.0, hppa1.1, hppa2.0
+       by sed'ing the period into `_'.
+
+       * acconfig.h: Add #undefs for hppa targets.
+
+       * longlong.h (udiv_qrnnd): Fix typo in last change.
+
+       * mpz/tstbit.c: Rewrite (partly to work around GCC 2.95.2 HPPA bug).
+
+       * configure.in [path switch]:
+       (hppa2.0*-*-*): For non-CC64 case, update path.
+
+       * configure.in [compiler switch]:
+       (hppa2.0w-*-*): Match with same regexp in both places.
+       (hppa*-*-*): New case.
+       (all hppa alternatives): Don't inherit default gmp_cflags_cc,
+       gmp_cflags_c89.
+
+2000-10-18  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (alpha*-*-*): Define gmp_xoptcflags_gcc like for
+       alpha*-*-osf*.
+
+       * longlong.h (x86 udiv_qrnnd): Change `d' => `dx' to avoid K&R C
+       stringification.
+
+2000-10-15  Kevin Ryde  <kevin@swox.se>
+
+       * doc/configuration: Updates.
+
+       * demos/calc.y: Remove some comments.
+
+2000-10-14  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Parameter Conventions, Memory Management): New sections
+       split from "Variable Conventions".
+       (Efficiency, Debugging, Profiling): New sections in "GMP Basics".
+       (Reentrancy): Some rewording, add note on standard I/O.
+       (Build options): Add --enable-assert and --enable-profiling.
+
+       * configure.in (--enable-profiling): New option.
+       * acinclude.m4 (GMP_ASM_X86_MCOUNT): New macro, finding how to profile.
+       * mpn/x86/x86-defs.m4 (PROLOGUE_cpu, call_mcount): Profiling support.
+
+       * acinclude.m4, configure.in (GMP_ASM_*): Rename from GMP_CHECK_ASM_*,
+       to follow autoconf conventions.
+
+       * configure.in: Run GMP_CHECK_ASM tests only if needed.
+       * acinclude.m4 (GMP_CHECK_ASM_MMX): Don't use GMP_CHECK_ASM_TEXT.
+
+       * mpn/x86/x86-defs.m4 (ASSERT): Allow no condition, to just emit code.
+
+2000-10-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpq/md_2exp.c: New file.
+       * mpq/Makefile.am (libmpq_la_SOURCES): Add it.
+       * Makefile.am (MPQ_OBJECTS): Ditto.
+       * gmp.h (mpq_mul_2exp, mpq_div_2exp): Add prototypes.
+       * gmp.texi (Rational Arithmetic): Add documentation.
+
+       * mpq/tests/t-md_2exp.c: New file.
+       * mpq/tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpn/generic/perfsqr.c: Add/amend some comments.
+
+       * gmp.texi (Known Build Problems): Note VERSION problem with old
+       sed, do some minor rewording.
+       (Build Options): Add cygwin and djgpp URLs, mention INSTALL.autoconf,
+       mention HTML.
+       (Getting the Latest Version of GMP): Move this ...
+       (Introduction to GMP): ... to here.
+       (Compatibility with older versions): Just refer to 2.x and 3.x, not
+       every minor version.
+       (Initializing Integers): Note restrictions on mpz_array_init'ed
+       variables.
+       (Integer Logic and Bit Fiddling): Note bits are numbered from 0.
+
+       * INSTALL.autoconf: New file.
+       * Makefile.am (EXTRA_DIST): Add it.
+
+       * tune/Makefile.am, tune/tuneup.c, configure.in, gmp-impl.h: New
+       scheme for recompiled objects used by tune program.  Don't use
+       libgmptune.a, make better use of libtool, work with ansi2knr.
+
+       * tune/speed.h,common.c (SPEED_ROUTINE_MPZ_POWM): Use s->yp and
+       s->xp_block, make exponent a fixed size.
+
+2000-10-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3/gmp-mparam.h: Retune.
+
+       * mpn/generic/mul_n.c (USE_MORE_MPN): Revert last change.
+
+2000-10-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3/add_n.s: Decrease carry recurrence from 4 to 3 cycles.
+       * mpn/mips3/sub_n.s: Likewise.
+
+2000-10-04  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (sparc64-*-linux*): Set path according to CC64.
+
+2000-10-04  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): Use LABEL_SUFFIX, not a
+       hard-coded ":".
+
+       * config.sub: Don't demand "86" in CPU name for SCO.
+
+       * configure.in (supersparc-*-*): Remove -DSUPERSPARC.
+       * longlong.h: Use HAVE_TARGET_CPU_supersparc.
+
+       * configure.in (HAVE_TARGET_CPU_*): AC_DEFINE from $target_cpu.
+       * acconfig.h: Add #undefs, but only for targets of interest.
+
+2000-10-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/cntlz.asm: Rewrite.
+
+       * mp_clz_tab.c (__clz_tab): Half table size to 128 entires.
+       * longlong.h (count_leading_zeros): Demand just 128 entries from
+       __clz_tab.
+
+       * configure.in (mips-sgi-irix6.*): Pass -mips3 in addition to options
+       for n32 ABI.
+
+       * longlong.h: Move NO_ASM test around all assembly code.
+       From gcc:
+       * longlong.h (count_leading_zeros): Sparclite scan instruction was
+       being invoked incorrectly.
+       Replace __mc68332__ with __mcpu32__.
+       Add ARC support.
+
+2000-10-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/mips3/gmp-mparam.h: Retune for both gcc and cc.
+
+       * mpn/generic/mul_n.c (USE_MORE_MPN): Remove exception for __mips.
+       (interpolate3): Cast mp_limb_t variables to mp_limb_signed_t
+       when testing sign bit.
+
+       * mpn/alpha/ev6/gmp-mparam.h: Retune.
+       * mpn/powerpc32/gmp-mparam.h: Retune.
+       * mpn/powerpc64/gmp-mparam.h: Retune.
+       * mpn/x86/pentium/gmp-mparam.h: Retune.
+       * mpn/x86/pentium/mmx/gmp-mparam.h: Retune.
+       * mpn/sparc32/v9/gmp-mparam.h: Retune.
+       * mpn/x86/k6/gmp-mparam.h: Retune.
+       * mpn/x86/p6/gmp-mparam.h: Retune.
+       * mpn/x86/k7/gmp-mparam.h: Retune.
+       * mpn/sparc64/gmp-mparam.h: Retune.
+
+       * mpn/m68k/gmp-mparam.h: New file.
+       * mpn/alpha/ev5/gmp-mparam.h: New file.
+
+       * gmp-impl.h (default MPN_COPY): Remove final `;'.
+
+       * tune/time.c (speed_endtime): Rewrite.
+
+       * tune/speed.h (SPEED_ROUTINE_MPZ_POWM): Set base to a large value,
+       not 2.
+
+       * demos/pexpr.c (setup_error_handler): Fix typo.
+
+       * mpz/powm.c (redc): New function, based on old mpz_redc.  Don't
+       multiply here.
+       (mpz_redc): Remove.
+       (mpz_powm): Major changes, partially reverting to mpn calls.
+       Multiply before calling redc.
+       (mpz_powm): Use TMP_ allocation.
+       (mpz_powm): Refine calculation of k (width of exponent window).
+       (mpz_powm): Cast constants to mp_limb_t before left shifting.
+
+       * longlong.h: Use ia64 count_leading_zeros just when __GNUC__.
+
+2000-09-29  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_C_SIZES): New macro.
+       * configure.in: Use it.
+       * acconfig.in (BYTES_PER_MP_LIMB etc): Add #undefs.
+       * mpn/generic/gmp-mparam.h (BYTES_PER_MP_LIMB etc): Remove #defines.
+       * gmp.texi (Known Build Problems): Remove 64-bit generic C
+       gmp-mparam.h problem, now fixed.
+
+       * configure.in: Only run GMP_PROG_M4 if it's actually needed.
+
+2000-09-27  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Clean up code for systems not supporting
+       sigaltstack.  Handle old Linux without sigaltstack.  Properly
+       disable all stuff related to sigaltstack under Unicos.
+
+       * mpn/alpha/ev6/addmul_1.asm: Use explicit offset for all load and
+       store insns.  Helps old gas.
+
+       * longlong.h (count_leading_zeros): Define for ia64.
+
+2000-09-27  Paul Zimmermann  <Paul.Zimmermann@loria.fr>
+
+       * mpn/generic/bz_divrem_n.c: Fix qhl handling, simplify.
+
+2000-09-27  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/Makefile.in (.SUFFIXES): Regenerate with patched automake to
+       get .s before .c, which is needed to override ansi2knr .c rules.
+
+       * gmp.texi (mpn_sqrtrem): Fix r2p==NULL return value description
+       to match the code (change by Torbjorn).
+       (mpn_gcd, mpn_gcdext, mpn_sqrtrem, mpn_tdiv_qr): Note most
+       significant limbs must be non-zero.
+       (mpn_gcd, mpn_gcdext, mpn_sqrtrem): Clarify destination size
+       requirements.
+       (mpn_gcd_1): Clarify value must be non-zero, not just size.
+
+       * gmp-impl.h (mpn_zero_p): New inline function.
+       * mpn/generic/inlines.c: Add gmp-impl.h.
+       * mpf/integer.c, mpz/get_d.c, mpn/generic/mul_fft.c: Use it.
+
+       * mpn/generic/gcd.c: Use MPN_COPY_INCR not MPN_COPY.
+       * mpf/add_ui.c: Ditto.
+       * mpf/add.c: Ditto, and fix test to skip copy.
+
+2000-09-26  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h, longlong.h, mpn/generic/*.c: Add ASSERTs for various
+       parameter restrictions.
+
+       * gmp-impl.h (UDIV_PREINV_TIME): New macro.
+       * mpn/generic/sb_divrem_mn.c: Use it.
+       * mpn/generic/perfsqr.c: Ditto.
+       * mpn/x86/*/gmp-mparam.h (UDIV_PREINV_TIME): Add values.
+
+       * macos/Makefile.in: Add mpz/tests/t-get_si.c, mpf/tests/t-set_f.c,
+       and new multi-function mpz and mpq files.
+
+2000-09-25  Kevin Ryde  <kevin@swox.se>
+
+       * randlc.c, randlc2x.c, randsd.c, mpz/urandomb.c, mpz/urandomm.c:
+       Use mpz_ptr and mpz_srcptr for parameters.
+       * gmp.h (gmp_randinit_lc, gmp_randinit_lc_2exp, gmp_randseed,
+       mpz_urandomb, mpz_urandomm): Corresponding change to prototypes.
+       * randsdui.c: Remove wrong K&R parameters part.
+
+2000-09-12  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (mpn_tdiv_qr): Move prototype from here ...
+       * gmp.h (mpn_tdiv_qr): ... to here.
+
+       * gmp.texi (Miscellaneous Rational Functions): Comment-out and
+       move version 1 compatibility note to "Compatibility" section.
+       (Rational Number Functions): Ditto for canonicalization note.
+
+2000-09-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/com_n.asm: New file.
+
+       * gmp.texi (Rational Arithmetic): Add mpq_abs.
+       (Miscellaneous Rational Functions): Merge and simplify descriptions of
+       mpq_get_num, mpq_get_den, mpq_set_num, mpq_set_den.
+
+       * mpq/abs.c: New file.
+       * mpq/Makefile.am (libmpq_la_SOURCES): Add it.
+       * Makefile.am (MPQ_OBJECTS): Add it.
+       * gmp.h (mpq_abs): Add prototype.
+
+       * mpq/set_den.c: Don't discard sign when copying, this makes the
+       code match the manual.
+
+2000-09-07  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/alpha.asm: Rewrite to actually work right.
+
+2000-09-07  Kevin Ryde  <kevin@swox.se>
+
+       * tune/common.c,speed.[ch]: Add measuring of mpn_sqrtrem,
+       mpn_get_str, mpn_set_str.
+       * tune/README: Various updates.
+
+2000-09-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/fits.c: Correct type of `data'.
+
+2000-09-06  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Clarify where to find CFLAGS.
+       (Known Build Problems): Note SCO -lc problem.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_GCD_CALL): Fix for sizes > 512 limbs.
+
+       * doc/multiplication: Corrections and additions suggested by Paul.
+
+       * tune/modlinv.c: New file with alternate modlimb_inverts.
+       * tune/Makefile.am, tune/speed.[ch]: Add measuring of them.
+       * tune/speed.c (FLAG_NODATA): New attribute, use for mpz_bin_uiui,
+       mpz_fib_ui, mpz_fac_ui.
+
+       * mpn/x86/t-zdisp.sh: New file.
+
+       * tests/t-modlinv.c: New file.
+       * tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpq/tests/t-set_f.c: New file.
+       * mpq/tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * gmp-impl.h (MPQ_CHECK_FORMAT): New macro.
+       * mpq/tests/t-get_d.c: Use it.
+
+       * mpq/set_f.c: New file.
+       * mpq/Makefile.am (libmpq_la_SOURCES): Add it.
+       * Makefile.am (MPQ_OBJECTS): Ditto.
+       * gmp.h: Add prototype.
+       * gmp.texi (Miscellaneous Rational Functions): Document mpq_set_f,
+       correct return type of mpq_set_d.
+
+2000-09-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/aors_ui.c: New file merging add_ui.c and sub_ui.c, no object
+       code changes.
+       * mpz/add_ui.c, mpz/sub_ui.c: Remove files.
+       * mpz/Makefile.am: Update.
+
+       * gmp-impl.h (MPZ_FITS_STYPE_SDT, MPZ_FITS_UTYPE_SDT): New macros.
+       * mpz/fits.c: New file merging six separate fits*.c.
+       * mpz/fits_sshort_p.c, fits_sint_p.c, fits_slong_p.c, fits_ushort_p.c,
+       fits_uint_p.c, fits_ulong_p.c: Remove files
+       * mpz/Makefile.am: Use new fits.c, change object names from
+       fits_*_p.lo to fits_*.lo to avoid SunOS 4 native "ar" warnings.
+       * Makefile.am (MPZ_OBJECTS): Change from fits_*_p.lo to fits_*.lo.
+
+       * acinclude.m4 (GMP_CHECK_ASM_RODATA): New macro, defining RODATA.
+       * configure.in: Use it.
+       * mpn/x86/k[67]/mmx/popham.asm: Use it.
+
+       * mpn/x86/*/*.asm: Use "TEXT" not ".text".
+
+2000-09-02  Kevin Ryde  <kevin@swox.se>
+
+       * mpq/aors.c: New file merging add.c and sub.c, no object code changes.
+       * mpq/add.c, mpq/sub.c: Remove files.
+       * mpq/Makefile.am: Update.
+
+       * mpz/aors.c: New file merging add.c and sub.c, no object code changes.
+       * mpz/add.c, mpz/sub.c: Remove files.
+       * mpz/Makefile.am, mpbsd/Makefile.am: Update.
+
+       * configure.in: Re-apply "PROLOGUE.*" regexp change for the
+       benefit of alpha PROLOGUE_GP, lost in path search reorganisation.
+
+       * mpn/x86/x86-defs.m4 (jadcl0, cmov_simulate, ASSERT,
+       movl_text_address): Don't use "1:" style labels.
+       (Zdisp): Rearrange a bit, switch to all hex.
+       * mpn/x86/README.family: Note SCO "as" doesn't support "1:" style
+       local labels, misc rewordings.
+
+2000-08-29  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/primes.c: Include string.h.
+
+       * config.guess (x86 variant recog code): Remove dummy*.o files
+       generated by some compilers.
+
+2000-08-28  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_ALIGN_FILL_0x90): Fix Solaris 2.8
+       warning message suppression, add notes about SCO.
+
+       * Makefile.am (MPZ_OBJECTS etc): Move some comments.
+
+2000-08-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/pprime_p.c (mpz_millerrabin): Fix a TMP_FREE.
+
+       * gmp.texi (Copying): Refer to Lesser not Library GPL.
+       (GMP and Reentrancy): Note stack-alloc.c is not reentrant, and
+       that SCO <ctype.h> is potentially not reentrant.
+
+       * acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): Test by attempting to
+       link with or without an underscore.
+       * gmp.texi (Known Build Problems): Remove SunOS 4 native grep
+       GSYM_PREFIX problem, now fixed.
+
+       * gmp-impl.h (MODLIMB_INVERSE_3): New constant.
+       * mpn/generic/diveby3.c: Use it instead of own INVERSE_3.
+       * mpn/generic/mul_n.c: Ditto.
+       * tests/t-constants.c: Check it, and PP_INVERTED too.
+
+       * acinclude.m4 (GMP_GCC_MARCH_PENTIUMPRO): New macro.
+       * configure.in [p6 and athlon] (gmp_optcflags_gcc): Use it to
+       possibly add -march=pentiumpro.
+
+       * gmp-impl.h (MPZ_SET_STR_OR_ABORT, MPF_SET_STR_OR_ABORT): New macros.
+       * mpz/tests/t-bin.c, mpz/tests/t-get_si.c, mpz/tests/t-jac.c,
+       mpz/tests/t-misc.c: Use them.
+       * mpf/tests/t-conv.c, mpf/tests/t-misc.c: Ditto.
+       * mpz/tests/convert.c: Ditto and amend diagnostics slightly.
+       * mpz/tests/t-misc.c (check_mpz_set_si): Remove a superfluous init.
+       * mpz/tests/io.c: Differentiate between I/O and data conversion errors.
+
+       * mpn/generic/aors_n.c: New file merging add_n and sub_n, no
+       object code changes.
+       * mpn/generic/add_n.c: Remove file.
+       * mpn/generic/sub_n.c: Remove file.
+
+       * mpn/generic/aorsmul_1.c: New file merging addmul_1 and submul_1,
+       no object code changes.
+       * mpn/generic/addmul_1.c: Remove file.
+       * mpn/generic/submul_1.c: Remove file.
+
+       * mpn/generic/popham.c: New file merging popcount and hamdist, no
+       object code changes.
+       * mpn/generic/popcount.c: Remove file.
+       * mpn/generic/hamdist.c: Remove file.
+
+2000-08-24  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (mpn_com_n): Fix typo.
+
+2000-08-23  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/primes.c (main): Don't call mpz_probab_prime_p for numbers
+       that are known to be prime after sieving.
+       (main): Declare and initialize max_s_prime_squared.
+       (MAX_S_PRIME): Increase.
+       (ST_SIZE): Increase.
+
+2000-08-23  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (ASSERT_ALWAYS): Change to statement style.
+       (JACOBI_TWO_U_BIT1): Remove ASSERT.
+       (MPZ_CHECK_FORMAT): Use ASSERT_ALWAYS as a statement.
+
+2000-08-21  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (ASSERT): Use do..while for dummy version.
+
+       * mpf/get_str.c: Don't set n_digits from digits_computed_so_far
+       when the converted operand becomes zero.  Misc cleanups.
+
+2000-08-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/fdiv_r_2exp.c, mpz/lcm.c, mpz/urandomm.c: Add missing
+       TMP_MARK/FREE, avoiding memory leak when using stack-alloc.c.
+
+2000-08-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/set.c [BERKELEY_MP] (move): Add conditionals to build as
+       "move" for libmp.
+       * mpbsd/Makefile.am: Use mpz/set.c, not move.c.
+       * Makefile.am (MPBSD_OBJECTS): Corresponding change.
+       * mpbsd/move.c: Remove file.
+
+       * mpn/Makefile.am, mpz/Makefile.am, mpq/Makefile.am, mpf/Makefile.am,
+       mpbsd/Makefile.am (-DOPERATION_foo): Use "foo" even for ansi2knr
+       "foo_" objects.  Do this with the makefiles to keep the sources
+       cleaner.
+       * mpz/mul_siui.c, mpf/integer.c: Revert to plain OPERATION_* forms.
+
+       * mpn/lisp/gmpasm-mode.el (gmpasm-remove-from-list): Renamed from
+       gmpasm-delete-from-list, because it's non-destructive.
+       (gmpasm-font-lock-keywords): Add some more keywords.
+
+2000-08-16  Kevin Ryde  <kevin@swox.se>
+
+       * tune/mul_n_mpn.c, tune/mul_n_open.c: New files, being forced
+       open-coded and mpn #includes of mpn/generic/mul_n.c.
+       * tune/*: Add measuring of them.
+       * tune/speed.c: Print command line into *.gnuplot file.
+
+       * mpn/generic/mul_n.c (USE_MORE_MPN): Change to #if not #ifdef for
+       using the value, add #ifndef for providing the default.
+       * mpn/sparc64/gmp-mparam.h (USE_MORE_MPN): Add #ifndef.
+
+       * tests/t-constants.c: New file.
+       * tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/get_si.c: Use LONG_MAX, not BITS_PER_MP_LIMB, so the result
+       doesn't depend on limb size when outside the range of a long
+       (though such results are not actually documented).
+       * mpz/tests/t-get_si.c: New file.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpn/tests/try.c (call): Cast popcount and hamdist calls,
+       for the benefit of long long limb.
+
+2000-08-15  Kevin Ryde  <kevin@swox.se>
+
+       * mp.h (mp_set_memory_functions): Add missing #define.
+       * mpbsd/tests/allfuns.c (mp_set_memory_functions): Verify its
+       existence.
+
+       * mpf/tests/t-misc.c (check_mpf_getset_prec): New test, verifying
+       reverted behaviour of mpf_get_prec.
+
+       * mpn/tests/ref.c (refmpn_strip_twos): Use refmpn_copyi, not
+       MPN_COPY_INCR.
+
+       * mpz/mul_siui.c, mpf/integer.c: Recognise OPERATION_*_ forms
+       produced under ansi2knr.
+
+       * configure.in (mpn_objects, mpn_objs_in_libgmp): Add $U to .c
+       objects when ansi2knr in use.
+
+       * mpn/Makefile.am (AUTOMAKE_OPTIONS): Enable ansi2knr.
+       (libdummy.la): Add this, not built, to create ansi2knr style rules
+       for all potential .c files.
+       * mpz/Makefile.am, mpq/Makefile.am, mpf/Makefile.am, mpfr/Makefile.am,
+       mpbsd/Makefile.am, mpq/tests/Makefile.am, tests/Makefile.am
+       (AUTOMAKE_OPTIONS): Enable ansi2knr (now everywhere).
+       * Makefile.am (MPZ_OBJECTS, MPQ_OBJECTS, MPF_OBJECTS, MPFR_OBJECTS,
+       MPBSD_OBJECTS, libmp_la_DEPENDENCIES): Add $U to all .lo filenames.
+
+2000-08-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev6/addmul_1.asm: Correct number of cycles to 3.5/28.
+
+2000-08-02  Torbjorn Granlund  <tege@swox.com>
+
+       * Version 3.1 released.
+
+       * gmp.texi: Rephrase mpf_urandomb documentation.
+
+       * mpn/alpha/ev6: New directory with ev6/21264 optimized code.
+       * mpn/alpha/ev6/addmul_1.asm: New file.
+       * mpn/alpha/ev6/gmp-mparam.h: New file.
+
+2000-08-02  Kevin Ryde  <kevin@swox.se>
+
+       * demos/factorize.c (random): Don't use "inline".
+
+       * mpfr/log.c, mpfr/mul_ui.c, mpfr/round.c, mpfr/set.c, mpfr/set_d.c:
+       Corrections to K&R parts.
+
+       * Makefile.am (EXTRA_HEADERS): Omit $(MPFR_HEADERS_OPTION).
+       * mpfr/Makefile.am (EXTRA_DIST): Add mpfr.h.
+
+       * gmp.texi (Known Build Problems): Note problem stripping libgmp.a.
+
+2000-08-02  Kent Boortz  <kent@swox.com>
+
+       * mpfr: Integrated experimental version of mpfr-0.4.
+       * configure.in: Changes for option --enable-mpfr.
+       * Makefile.am: Changes for option --enable-mpfr.
+
+2000-08-01  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/popcount.c: Disable SPARC v9 popc_limb pattern.
+       * mpn/generic/hamdist.c: Likewise.
+
+2000-08-01  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/tests/try.c (try_init): Account for ALIGNMENTS when sizing
+       source and dest regions.
+
+2000-07-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/get_str.c: Develop three extra digits, not just one.
+
+2000-07-31  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (References): Add URL for invariant division.
+
+2000-07-30  Kevin Ryde  <kevin@swox.se>
+
+       * tune/time.c (speed_cpu_frequency_proc_cpuinfo): Add support for
+       alpha linux "cycle frequency".
+
+       * mpn/sparc64/gmp-mparam.h: Re-run tune program for FFT thresholds.
+
+2000-07-29  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (ABI and ISA): Add sparc64-*-linux*.
+       * configure.in [sparc64-*-linux*] (gmp_cflags64_gcc): Same flags
+       as under solaris.
+
+       * configure.in (--enable-fft): New option, default "no".
+       * gmp.texi (Build Options): Describe it.
+       * mpn/generic/mul.c, mpn/generic/mul_n.c [WANT_FFT]: Use it.
+       * tune/tuneup.c [WANT_FFT]: By default don't probe FFTs if not enabled.
+       * NEWS: Multiplication optionally using FFT.
+
+       * tune/README: Notes on FFT and GCD thresholds, other minor updates.
+
+       * Makefile.am: Expunge the macos generated files update stuff.
+
+2000-07-28  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/*/gmp-mparam.h: Add some FFT thresholds.
+
+2000-07-28  Kent Boortz  <kent@swox.se>
+
+       * macos/Asm*, macos/CmnObj, macos/Mp*: Delete directories.
+       * macos/Makefile: Delete file.
+       * macos/Makefile.cw: Delete file.
+       * macos/config.h: Delete file.
+       * macos/Asm/*.s: Delete files.
+       * macos/configure: Create target directories. Don't transform
+       '(C)' to '(;)' in a 'dnl' line comment in .asm file.
+       * Makefile.am: Delete macos targets.
+       * macos/README: Reflect that we reverted back to a build
+       process that require ""macos/configure" to run on MacOS.
+       This imply that MacPerl is needed for a build in MacOS.
+
+2000-07-27  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_fft.c: New file, by Paul Zimmermann, minor mods
+       applied.
+       * configure.in (gmp_mpn_functions): Add it.
+       * mpn/generic/mul.c, mpn/generic/mul_n.c: Use it.
+       * doc/multiplication: Describe it (briefly).
+
+       * gmp-impl.h (FFT_MUL_THRESHOLD etc): New thresholds.
+       (mpn_fft_best_k, mpn_fft_next_size, mpn_mul_fft, mpn_mul_fft_full):
+       New functions.
+       (numberof, TMP_ALLOC_TYPE etc, _MP_ALLOCATE_FUNC_TYPE etc,
+       UNSIGNED_TYPE_MAX etc): New macros.
+
+       * tune/*: Add FFT threshold tuning and speed measuring.
+       * tune/common.c: Avoid huge macro expansions for umul and udiv.
+
+       * mpz/tests/t-bin.c, mpz/tests/t-jac.c, mpz/tests/t-misc.c,
+       mpbsd/tests/t-misc.c, mpf/tests/t-misc.c, mpn/tests/try.c,
+       mpn/tests/spinner.c: Use new gmp-impl.h macros.
+
+       * demos/Makefile.am (BUILT_SOURCES): Don't need calc.c etc under this.
+
+2000-07-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/ia64/gmp-mparam.h: New file.
+
+2000-07-26  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/isprime.c: Handle any number of arguments and print
+       classification for each.  Add `-q' option for old behaviour.
+
+2000-07-26  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Build Options): Mention djgpp stack size.
+       (Notes for Package Builds): New section.
+       (Compatibility with older versions): Update for 3.1, add mpf_get_prec.
+
+       * demos/factorize.c [__GLIBC__]: Don't declare random() under glibc.
+
+       * gmp.h (gmp_version): Add prototype and define.
+
+       * Makefile.am: Keep macos directory generated files up-to-date
+       during development and on a "make dist".
+
+2000-07-25  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/hppa/gmp-mparam.h: Update threshold values from new `tune' run.
+
+       * mpn/pa64/gmp-mparam.h: Fill in values from `make tune' run.
+       * mpn/pa64w/gmp-mparam.h: Likewise.
+       * mpn/mips3/gmp-mparam.h: Likewise.
+
+       * tune/hppa2.asm: Fix typo in .level directive.
+
+       * configure.in: Add sparc64-*-linux* support (from Jakub Jelinek).
+       * configure: Regenerate.
+
+       * mpn/sparc64/rshift.asm: Use %g5 instead of volatile stack frame area
+       for return value (from Jakub Jelinek).
+       * mpn/sparc64/lshift.asm: Likewise.
+
+       * mpf/get_prc.c: Revert Aug 8, 1996 change.
+
+       * version.c: No longer static.
+
+       * mpn/pa64/gmp-mparam.h: Only #define *_THRESHOLD if not already
+       defined.
+       * mpn/pa64w/gmp-mparam.h: Likewise.
+       * mpn/arm/gmp-mparam.h: Likewise.
+       * mpn/mips3/gmp-mparam.h: Likewise.
+
+2000-07-25  Kevin Ryde  <kevin@swox.se>
+
+       * INSTALL: It's "info -f ./gmp.info" to be sure of hitting the
+       gmp.info in the current directory.
+
+       * Makefile.am (libmp_la_DEPENDENCIES): Add mpz/cmp.lo, for last
+       mpz/powm.c fix.
+
+       * mpn/sparc64/addmul1h.asm, mpn/sparc64/submul1h.asm: Renamed from
+       addmul_1h.asm, submul_1h.asm to avoid name conflicts on an 8.3
+       filesystem.
+       * mpn/sparc64/addmul_1.asm, mpn/sparc64/submul_1.asm,
+       mpn/sparc64/mul_1.asm: Update include_mpn()s.
+
+2000-07-24  Torbjorn Granlund  <tege@swox.com>
+
+       * Update header of all files previously under the Library GPL
+       to instead be under the Lesser GPL.
+
+       * COPYING.LIB: Now Lesser GPL.
+       * demos/primes.c: Change license to GPL (was Library GPL).
+       * demos/isprime.c: Change license to GPL (was Library GPL).
+
+       * gmp.h (error code enum): Add GMP_ERROR_BAD_STRING (currently unused).
+
+       * mpz/tests/t-mul.c: Default SIZE to a function of TOOM3_MUL_THRESHOLD.
+       Improve error messages.  Decrease reps.
+
+2000-07-22  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.h: Decrease the amount of data used for gcd and powm
+       measuring, to make the tune go a bit faster.
+
+2000-07-21  Kent Boortz  <kent@swox.se>
+
+       * macos/Asm*, macos/CmnObj, macos/Mp*: Directories no longer created
+       from configure script, now part of dist.
+       * macos/Makefile
+       * macos/Makefile.cw
+       * macos/config.h
+       * macos/Asm/*.s
+       New files and directories that is the output from configure. This way
+       no Perl installation is required to build on MacOS, just MPW.
+       * macos/configure: Added prefix '__g' to exported assembler labels.
+       Changed to handle new m4 syntax instead of the old cpp syntax in asm.
+       * macos/Makefile.in: Corrected 'clean' target, added 'distclean'
+       and 'maintainer_clean'. Added "mpn/mp_bases.c" to build.
+       * macos/README: Reflect the new build process without configure.
+       Corrected the file structure for Apple MPW installation.
+
+2000-07-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/tests/t-muldiv.c: Relax error limit.  Make precision depend
+       on SIZE.  Misc changes.
+
+       * configure: Regenerate.
+
+2000-07-20  Kent Boortz  <kent@swox.com>
+
+       * macos/Makefile.in: Removed hard coded targets, added special
+       targets found in Makefile.am files.
+       * macos/configure: Generate targets from top configure script and
+       Makefile.am files. Made script runnable from Unix for testing.
+       * macos/README: Notes about search paths for includes, contributed
+       by Marco Bambini.
+       * configure.in: Added comment about lines that the "macos/configure"
+       script depend on.
+
+2000-07-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/powm.c (mpz_powm): After final mpz_redc call, subtract `mod'
+       from result if it is greater than `mod'.
+
+2000-07-19  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/hppa/gmp-mparam.h: Fill in values from `make tune' run.
+       * mpn/alpha/gmp-mparam.h: Likewise.
+       * mpn/powerpc32/gmp-mparam.h: Likewise.
+
+       * tune/hppa.asm: New file.
+       * tune/hppa2.asm: New file.
+       * configure.in (SPEED_CYCLECOUNTER_OBJS): Set for hppa2*-*-* and
+       hppa*-*-*.
+       * tune/Makefile.am (EXTRA_DIST): Add hppa.asm and hppa2.asm.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_BZ_DIVREM_CALL): Declare `marker';
+       invoke TMP_FREE.
+
+       * mpn/hppa/hppa1_1/udiv_qrnnd.S: Use "%" instead of "'" for
+       reloc/symbol delimiter.
+
+2000-07-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/gmp-mparam.h: Update with output from tune utility.
+       * mpn/powerpc64/copyi.asm: New file.
+       * mpn/powerpc64/copyd.asm: New file.
+
+2000-07-16  Kevin Ryde  <kevin@swox.se>
+
+       * tune/*: Add measuring for umul_ppmm and udiv_qrnnd.
+
+2000-07-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/k62mmx: New directory.
+       * configure.in (k6[23]*-*-*): Use it.
+       * mpn/x86/k6/k62mmx/copyi.asm, mpn/x86/k6/k62mmx/copyd.asm: Move from
+       mmx directory, improve code alignment a bit.
+       * mpn/x86/k6/k62mmx/lshift.asm, mpn/x86/k6/k62mmx/rshift.asm: Ditto,
+       and improve addressing modes for pre-CXT cores.
+       * mpn/x86/x86-defs.m4 (Zdisp): Add an instruction.
+       * mpn/x86/k6/mmx/lshift.asm, mpn/x86/k6/mmx/rshift.asm: New files,
+       suiting plain K6.
+       * mpn/x86/README, mpn/x86/k6/README: Updates.
+       * mpn/x86/k6/mmx/*.asm: Update some comments.
+
+       * mpn/tests/Makefile.am: Use $(MAKE) in .asm rules, not "m".
+       * tune/Makefile.am: Use $(EXEEXT) and libtool --config objdir, for
+       the benefit of djgpp.
+
+       * */Makefile.in: Regenerate with patched automake that adds
+       $(EXEEXT) to EXTRA_PROGRAMS.
+
+       * mpn/tests/try.c: Add #ifdef to SIGBUS, for the benefit of djgpp.
+       * config.guess: Recognise pc:*:*:* as an x86, for djgpp.
+
+       * configure: Regenerate with patched autoconf to fix temp file
+       ".hdr" which is invalid on a DOS 8.3 filesystem, and to fix two
+       sed substitutes that clobbered a ":" in $srcdir (eg. a DOS drive
+       spec).
+
+       * mpz/tests/io.c: Use one fp opened "w+", since separately opened
+       input and output doesn't work on MS-DOS 6.21.
+
+       * tests/rand/Makefile.am (allprogs): Pseudo-target to build everything.
+       (CLEANFILES): Add EXTRA_PROGRAMS and EXTRA_LTLIBRARIES.
+       (manual-test, manual-bigtest): Add $(EXEEXT) to dependencies.
+
+       * tests/rand/*/Makefile.in: Regenerate with patched automake that adds
+       $(EXEEXT) to EXTRA_PROGRAMS.
+
+2000-07-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/t-root.c: Also test mpz_perfect_power_p.
+       Generate `nth' so that there will be fewer trivial values.
+
+       * mpz/root.c: Reverse return value in tests for detecting root of +1
+       and -1.
+
+       * mpz/perfpow.c: Use TMP_ALLOC interface.
+
+2000-07-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/perfpow.c (primes): Make it const.
+
+2000-07-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/cross.pl: New file.
+
+       * mpn/x86/*/gmp-mparam.h: Updates to thresholds, conditionalize
+       all _TIME defines.
+       * mpn/x86/pentium/mmx/gmp-mparam.h: New file.
+       * mpn/sparc64/gmp-mparam.h: Update thresholds.
+       * mpn/sparc32/v9/gmp-mparam.h: Ditto.
+
+2000-07-04  Kevin Ryde  <kevin@swox.se>
+
+       * NEWS: Updates.
+       * mpn/x86/*/README: Miscellaneous updates.
+
+       * tune/speed-ext.c: New file.
+       * tune/Makefile.am: Add it.
+       * tune/README: Updates.
+       * tune/speed.h (SPEED_ROUTINE_MPN_DIVREM_2): Bug fixes.
+
+       * demos/calc.y,calclex.l: New files.
+       * demos/calc.c,calc.h,calclex.c: New files, generated from .y and .l.
+       * demos/Makefile.am: Add them.
+
+       * gmp.h (mpq_swap, mpf_swap): Add prototypes and defines.
+
+2000-07-01  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (ABI and ISA): New section, bringing together ABI notes.
+       (Build Options): Add MPN_PATH, various updates.
+       (Build Options): Add note on setting CFLAGS when setting CC.
+       (Notes for Particular Systems): Add -march=pentiumpro problem.
+       (Known Build Problems): Note on gmp-mparam.h for 64-bit generic C.
+       (GMP Variable Conventions): Add some info on user defined functions.
+       (Reporting Bugs): Minor rewording.
+
+       * configure.in (MPN_PATH): Renamed from mpn_path.
+
+       * gmp-impl.h (ULONG_MAX,ULONG_HIGHBIT,...,SHORT_MAX): New defines.
+       * mp[zf]/tests/t-misc.c: Use them.
+
+       * mpbsd/tests/t-misc.c: New file.
+       * mpbsd/tests/Makefile.am: Add it.
+
+       * Makefile.am (LIBGMP_LT_*, LIBMP_LT_*): Bump version info.
+       * gmp.h (__GNU_MP_VERSION_*): Bump to 3.1.
+
+       * mpf/tests/Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.
+
+       * Makefile.am (libmp_la_SOURCES): Add mp_set_fns.c, accidentally
+       omitted in gmp 3.0.x.
+       * gmp.texi (Custom Allocation): Note this is available in mpbsd,
+       and some minor rewording.
+
+2000-06-30  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/factorize.c (random): New function, defined conditionally.
+       (factor_using_pollard_rho): Use it, not mrand48.
+
+       * mpn/cray/README: New file.
+
+2000-06-30  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/aorsmul_1.asm: Add MULFUNC_PROLOGUE.
+
+       * mpz/tests/t-jac.c: Test limbs on mpn_jacobi_base, not just ulongs.
+
+       * gmp-impl.h, mpn/tests/try.c, mpn/tests/spinner.c, tune/speed.c:
+       Use config.h unconditionally, not under HAVE_CONFIG_H.
+
+       * demos/pexpr.c [__DJGPP__]: Patch by Richard Dawe to not use
+       setup_error_handler on djgpp.
+
+       * tune/*: Locate data to help direct-mapped caches, add measuring
+       of mpz_init/clear, mpz_add and mpz_bin_uiui, various cleanups.
+       * configure.in (AC_CHECK_FUNCS): Add popen.
+
+2000-06-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/mul_2exp.c: Streamline criterion for whether to use mpn_lshift or
+       mpn_rshift.  Increase precision when exp is a multiple of
+       BITS_PER_MP_LIMB primarily to make exp==0 be a noop.
+       * mpf/div_2exp.c: Analogous changes.
+
+       * mpf/tests/t-dm2exp.c: Set u randomly in loop.  Perform more
+       mpf_mul_2exp testing.
+
+       * configure.in: Recognize cray vector processors with a broad `*';
+       move after alpha* not to match that.
+
+2000-06-28  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/tests/io.c: Use a disk file, not a pipe, switch to ansi2knr
+       style, switch from MP_INT to mpz_t, add a couple of error checks.
+       * mpz/tests/Makefile.am (CLEANFILES): Add io.tmp, in case io.c fails.
+
+2000-06-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/tests/t-get_d.c: Be more lax about relative error, to handle Cray
+       floating point format.
+
+       * mpq/tests/t-get_d.c: Decrease default reps to 1000.
+
+       * mpf/tests/t-conv.c: Correct type of `bexp'.
+
+       * configure.in (cray vector machines): Don't inherit gmp_cflags_cc.
+
+       * tune/Makefile.am (EXTRA_DIST): Delete sparc64.asm.
+
+       * configure.in (cray vector machines): Set extra_functions.
+
+       * mpn/cray/mulww.f: New file with vectorizing cray code.
+       * mpn/cray/mulww.s: Generated from mulww.f.
+       * mpn/cray/mul_1.c: New file.
+       * mpn/cray/addmul_1.c: New file.
+       * mpn/cray/submul_1.c: New file.
+       * mpn/cray/add_n.c: New file.
+       * mpn/cray/sub_n.c: New file.
+
+2000-06-26  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_ALIGN_FILL_0x90): Fix so it actually
+       detects solaris 2.6, and also suppress warning on solaris 2.8.
+       * configure.in (SPEED_CYCLECOUNTER): Remove spurious "athlon" from
+       sparc case.
+
+       * mpn/lisp/gmpasm-mode.el: Move keymap to the top of the docstring.
+
+2000-06-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n, mpn_kara_sqr_n): Use
+       mp_size_t for n2.
+       (mpn_toom3_mul_n, mpn_toom3_sqr_n): Use mp_size_t for size
+       parameters and "l" variables.
+       * gmp-impl.h (mpn_toom3_mul_n, mpn_toom3_sqr_n): Update prototypes.
+
+       * mpbsd/itom.c, mpbsd/sdiv.c: Add casts for correct handling of
+       -0x80...00 on systems with sizeof(short)==sizeof(int).
+
+       * mpz/tests/t-misc.c: Move "bin" test from here ...
+       * mpz/tests/t-bin.c: ... to here, and add a new (2k,k) test too.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add t-bin.
+
+       * mpz/bin_ui.c [_LONG_LONG_LIMB]: Use mpn_divrem_1, since kacc is
+       a limb not a ulong.
+       * mpz/bin_uiui.c [_LONG_LONG_LIMB]: Ditto, and use mpn_mul_1 too,
+       since nacc is a limb.
+
+       * mpf/tests/t-misc.c (check_mpf_set_si, check_mpf_cmp_si):
+       New file, testing mpf_set_si, mpf_init_set_si, and mpf_cmp_si.
+       * mpf/tests/Makefile.am (check_PROGRAMS): Add it.
+
+       * mpz/tests/t-misc.c (check_mpz_set_si, check_mpz_cmp_si):
+       New tests, for mpz_set_si, mpz_init_set_si, and mpz_cmp_si.
+
+       * mpz/set_si.c, mpz/iset_si.c, mpz/cmp_si.c [_LONG_LONG_LIMB]: Fix
+       handling of -0x80..00.
+       * mpf/set_si.c, mpf/iset_si.c, mpf/cmp_si.c [_LONG_LONG_LIMB]: Ditto.
+
+2000-06-19  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/primes.c: Properly handle arguments `m +n'.
+
+2000-06-17  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Recognize k5 and k6 with common pattern.
+
+       * mpq/tests/t-get_d.c: Also test mpq_set_d.  Misc improvements.
+
+       * mpq/set_d.c: Special case 0.0.  Don't call mpn_rshift with 0 count.
+       Allocate correct amount of memory for numerator.  Delete spurious
+       ASSERT_ALWAYS(1).
+
+2000-06-17  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/perfsqr.c: Fix so that zero is considered a perfect square.
+       (Was wrongly calling mpn_perfect_square_p with size==0.)
+
+2000-06-16  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in: Set k5*-*-* to use basic i386 code until there's
+       something specific.  Add path=x86 as a default for x86s.
+
+       * acinclude.m4 (GMP_CHECK_ASM_ALIGN_LOG): Generate
+       ALIGN_LOGARITHMIC setting, not a full ALIGN definition.
+       (GMP_CHECK_ASM_ALIGN_FILL_0x90): New test.
+       * configure.in [x86-*-*]: Use GMP_CHECK_ASM_ALIGN_FILL_0x90.
+       * mpn/asm-defs.m4 (ALIGN): New macro.
+       * mpn/x86/x86-defs.m4 (ALIGN): Remove supplementary definition.
+
+       * tune/*: Plain "unsigned" for speed_cyclecounter.
+       * configure.in: Use tune/sparcv9.asm for 32 and 64 bit modes.
+       * tune/sparc64.asm: Remove file.
+
+2000-06-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/k7/mmx/copyi.asm: Use `testb' instead of `test'.
+       * mpn/x86/k7/mmx/copyd.asm: Likewise.
+
+       * mpn/x86/k7/mmx/lshift.asm: Avoid using `~' (Solaris as problems).
+       * mpn/x86/k7/mmx/rshift.asm: Likewise.
+       * mpn/x86/k6/aors_n.asm: Likewise.
+       * mpn/x86/k7/aors_n.asm: Likewise.
+       * mpn/x86/k7/mul_basecase.asm: Likewise.
+
+2000-06-13  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/sparcv9.asm: Tune, deleting two instructions.
+
+       * tune/alpha.asm: Update to unified speed_cyclecounter.
+
+2000-06-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/tests/reuse.c (FAIL): Add a K&R version.
+       Use _PROTO on some typedefs.
+       * mpz/tests/t-misc.c: Add gmp-impl.h for "const".
+
+       * configure.in: Rework mpn multi-function and optional files.
+       Names standardized, no need for explicit declarations, all picked
+       up in one $path traversal.
+       * doc/configuration: Updates.
+
+       * tests/rand/t-rand.c (main): Change "usage" to work with K&R.
+
+2000-06-10  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/mmx/popham.asm, mpn/x86/p6/mmx/popham.asm,
+       mpn/x86/p6/p3mmx/popham.asm, mpn/x86/p6/diveby3.asm: Add
+       MULFUNC_PROLOGUE for correct HAVE_NATIVE_* matching.
+
+       * mpn/x86/x86-defs.m4 (cmov_bytes_tttn): Use eval() on expressions.
+       (cmov_available_p): Switch to list CPUs which do have cmov.
+       * mpn/x86/p6/sqr_basecase.asm, mpn/x86/k6/sqr_basecase.asm,
+       mpn/x86/k7/sqr_basecase.asm: Use eval() for multiplication.
+       * mpn/x86/README.family: Various updates.
+
+2000-06-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpbsd/tests/allfuns.c (main): Call exit() instead of doing return.
+
+       * doc/tasks.html, doc/projects.html: Moved from projects directory.
+       * doc/multiplication: New file.
+       * Makefile.am (EXTRA_DIST): Remove projects, add doc.
+
+       * Makefile.am (libgmp_la_LIBADD, libmp_la_LIBADD): Remove
+       unnecessary -lm.
+       * INSTALL: Remove -lm from instructions.
+       * demos/Makefile.am (qcn_LDADD): Add -lm.
+
+       * tune/*: Add measuring for mpn_divrem_2 and modlimb_invert,
+       improve addsub_n.  Switch to unified speed_cyclecounter.
+       * configure.in: Update configs for speed_cyclecounter.
+
+       * gmp-impl.h (MP_LIMB_T_MAX, MP_LIMB_T_HIGHBIT): New macros.
+       * mpn/generic/diveby3.c, mpn/generic/mul_n.c, mpn/generic/gcd.c,
+       tune/speed.c, mpn/tests/ref.c: Use them.
+
+       * mpn/tests/spinner.c: Remove setitimer, just alarm is enough.
+       * configure.in (AC_CHECK_FUNCS): Remove setitimer.
+       * mpn/tests/x86call.asm: Start with junk in %eax, %ecx, %edx.
+       * mpn/tests/ref.[ch] (refmpn_addsub_nc): New function.
+       * mpn/tests/try.c: Add some support for mpn_addsub_nc.
+       * mpn/tests/Makefile.am (EXTRA_PROGRAMS): Remove addsub_n and
+       addsub_n_2 which don't currently build.
+       * mpn/tests/copy.c: Test MPN_COPY_INCR, not __gmpn_copy.
+
+       * tests/rand/Makefile.am (libstat_la_LIBADD): Add -lm, no longer on
+       libgmp.la.
+       (findlc_LDADD): Use libstat.la.
+       (AUTOMAKE_OPTIONS): Use ansi2knr.
+
+2000-06-08  Torbjorn Granlund  <tege@swox.com>
+
+       * configure.in (alpha*-*-osf*): Default `flavour' to ev6 for ev6 and
+       higher.
+       (alpha*-*-*): Likewise.
+       (alpha*-*-osf*: gmp_optcflags_cc): Move -arch/-tune flags from
+       gmp_xoptcflags_gcc.
+
+       * mpn/Makefile.am (TARG_DIST): Add pa64w.
+
+       * longlong.h: Wrap 64-bit hppa code in #ifndef LONGLONG_STANDALONE.
+
+2000-06-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/remove.c: Fail for `src' being zero.
+
+       * mpz/tests/reuse.c: Test more functions.
+       (FAIL): New define.
+
+       * mpz/tests/t-powm.c: Loop during operand generation while they
+       are mathematically ill-defined (used to just skip such tests).
+
+       * mpz/powm.c (mpz_redc): Clean up argument declarations.
+
+       * configure.in (gmp_cflags64_gcc): Don't add bogus -mWHAT option.
+       (sparcv9-*-solaris2.[7-9]], gmp_cflags64_gcc):
+       Inherit from previous gmp_cflags64_gcc; pass `-m64 -mptr64'.
+       (ia64*-*-*): New.
+
+       * mpn/generic/dump.c: Make it work when an mp_limb_t is not `long'.
+
+       * mpf/set_prc.c: MPN_COPY => MPN_COPY_INCR.
+
+2000-06-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n):
+       Use mpn_incr_u for final carry propagation.
+
+       * mpz/tests/t-gcd.c: Add calls to mpz_gcdext with argument t == NULL.
+
+       * mpz/tests/reuse.c: Major rewrite; test many more functions.
+
+       * mpz/powm_ui.c: When exp is 0, change res assign order in order
+       to handle argument overlap.
+       * mpz/powm.c: When exp is 0, change res assign order in order
+       to handle argument overlap.  Handle negative exp and mod arguments.
+
+       * mpz/gcdext.c: Rework code after mpn_gcdext call to handle
+       argument overlap.
+
+       * mpz/fdiv_qr.c: Read dividend->_mp_size before calling mpz_tdiv_qr
+       in order to handle argument overlap.
+       * mpz/cdiv_qr.c: Likewise.
+
+       * mpf/tests/reuse.c: Fix typo that effectively disabled `dis_funcs'
+       tests.  Clean up test for mpf_ui_div.
+
+2000-06-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/p6/sqr_basecase.asm: New file.
+       * mpn/x86/mod_1.asm: Avoid one conditional jump.
+       * mpn/x86/p6/gmp-mparam.h: Update thresholds, #ifndef UMUL_TIME
+       and UDIV_TIME, add COUNT_TRAILING_ZEROS_TIME.
+
+       * mp_minv_tab.c: New file.
+       * Makefile.am (libgmp_la_SOURCES, libmp_la_SOURCES): Add it.
+       * gmp-impl.h (modlimb_invert): New macro.
+       * mpz/powm.c: Remove mpz_dmprepare, use modlimb_invert instead.
+       * mpn/generic/bdivmod.c: Use modlimb_invert instead of a loop.
+       * mpn/generic/gcd.c: Inline two small mpn_bdivmod calls, use
+       MPN_COPY_INCR not MPN_COPY in one place.
+
+2000-06-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/tests/reuse.c (dsi_funcs): Add mpf_mul_2exp and mpf_div_2exp.
+       (main): Clean up test for mpf_div_ui.
+
+       * mpf/mul_2exp.c: Correct criterion for whether to use mpn_lshift or
+       mpn_rshift.  MPN_COPY => MPN_COPY_INCR.  Coerce the two assignments to
+       r->_mp_size.
+
+       * mpf/div_2exp.c: Use mpn_rshift instead of mpn_lshift when overlap
+       so requires.  MPN_COPY => MPN_COPY_INCR.
+
+       * mpf/tests/t-dm2exp.c: Correct type of res_prec.
+
+2000-06-04  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/bin_uiui.c: Fix result for n==0 and n==k.
+       * mpz/bin_ui.c: Fix result for k>n, add support for n<0.
+       * gmp.texi (Number Theoretic Functions): Update mpz_bin_ui to
+       note n<0 is supported.
+
+       * mpz/tests/t-misc.c: New file.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add it.
+
+2000-05-31  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.* (FLAG_R_OPTIONAL): New option for routines, use on
+       mpn_gcd_1 and mpn_mul_basecase.
+       * tune/README: Update.
+
+       * tune/alpha.asm: New file, by Torbjorn.
+       * tune/Makefile.am (EXTRA_DIST): Add it.
+       * configure.in (alpha*-*-*): Use it.
+
+2000-05-31  Linus Nordberg  <linus@swox.se>
+
+       * doc/configuration: New file.
+
+2000-05-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_basecase.c: Call mpn_mul_2 and mpn_addmul_2
+       if available.  Don't include longlong.h.
+
+       * doc/isa_abi_headache: New file.
+
+2000-05-30  Linus Nordberg  <linus@swox.se>
+
+       * configure.in (NM): Use AC_PROG_NM rather than AC_CHECK_TOOL to
+       find `nm'.  (AC_PROG_NM comes with Libtool and is needed to get
+       the `-B' option (BSD compatible output) included in $NM.)
+       (AR): Use AC_CHECK_PROG rather than AC_CHECK_TOOL to find `ar'.
+       (Now that NM isn't a cross compilation tool, don't give the
+       impression that we know how to cross compile.)
+       (CCAS): Remove spurious comment.
+
+       * gmp.texi (Notes for Particular Systems): Remove comment about
+       using GNU `nm' on AIX since system nm now works.
+
+2000-05-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/power/mul_1.s: Remove [PR] from first word in function
+       descriptor.
+       * mpn/power/addmul_1.s: Likewise.
+       * mpn/power/submul_1.s: Likewise.
+
+2000-05-28  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in, tune/*: Change pentium rdtsc cycle scheme to
+       HAVE_SPEED_CYCLECOUNTER and SPEED_CYCLECOUNTER_OBJS.
+       * tune/pentium.asm: Renamed and converted from rdtsc.asm.
+       * tune/sparcv9.asm: New file, by Torbjorn.
+       * tune/sparc64.asm: New file.
+       * tune/tuneup.c: Put a limit on gcdext search.
+
+       * gmp.h (mp_set_memory_functions): Add extern "C".
+       * mp.h (__GNU_MP__): Bump to "3".
+       * mpz/add.c,mul.c,powm.c,sub.c,sqrtrem.c,tdiv_qr.c [BERKELEY_MP]:
+       Include mp.h for mpbsd compile.
+       * mpz/gcd.c: Ditto, and remove _mpz_realloc declaration.
+
+       * gmp.texi (Integer Functions): Flatten @subsections into @sections.
+       (Floating-point Functions): Ditto.
+       (Integer Random Numbers): Split from miscellaneous as a sep section.
+       (Installing GMP): Make nodes for the sections.
+       Add more "@cindex"s.
+       (Known Build Problems): Remove SunOS get_d problem, believed fixed.
+       (Notes for Particular Systems): Remove HPPA note since now PIC.
+       (References): URL for Jebelean.
+
+2000-05-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64w: New directory, contents based on corresponding mpn/pa64
+       files.
+       * configure.in (hppa2.0w-*-*): New.
+       * mpz/tests/io.c (_INCLUDE_POSIX_SOURCE): Define when __hpux before
+       including stdio.h.
+       * gmp-impl.h: Always define DItype and UDItype.
+
+2000-05-27  Kevin Ryde  <kevin@swox.se>
+
+       * tune/common.c (speed_measure): Correction to array sorting,
+       better diagnostic when measuring fails.
+       * tune/time.c: Add microsecond accurate getrusage method.
+
+       * tune/time.c (speed_cpu_frequency_processor_info): New function.
+       * configure.in (AC_CHECK_FUNCS): Add processor_info.
+
+2000-05-26  Linus Nordberg  <linus@swox.se>
+
+       * gmp.texi (Installing GMP): Shared libraries work for AIX < 4.3
+       if using GNU nm.
+
+2000-05-26  Torbjorn Granlund  <tege@swox.com>
+
+       * tune/tuneup.c (SIGNED_TYPE_MAX): Shift `-1' instead of `1' to
+       avoid signed overflow.
+
+       * demos/pexpr.c (setup_error_handler): Don't call sigaltstack on
+       Unicos.
+
+2000-05-25  Torbjorn Granlund  <tege@swox.com>
+
+       * insert-dbl.c: Work around GCC 2.8 bug.
+       * extract-dbl.c: Likewise.
+
+       * config.sub: Allow i586, i686, i786 again.
+
+       * config.guess: Use X86CPU for lots more systems.
+
+2000-05-25  Linus Nordberg  <linus@swox.se>
+
+       * mpbsd/tests/dummy.c (main): Call exit() instead of doing return
+       (some old SysV machines don't get this correct, I've heard.)
+
+2000-05-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/iset_str.c: Initialize _mp_size and _mp_exp to 0, in case no
+       digits in string, so it's the same as a separate init and set_str.
+
+2000-05-24  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/reuse.c: Use mpz_random2 instead of mpz_random.
+
+       * mpz/divexact.c: Read pointers after reallocation.
+       Compare `quot' and `den' instead of `qp' and `dp' in overlap check.
+       Use MPN_COPY_INCR for copying from `np'.
+
+       (*-*-aix4.[3-9]*): Disable shared libs just for problematic AIX
+       versions.
+       * configure.in (*-cray-unicos*): Disable asm syntax checking; set
+       compiler explicitly.
+       * configure.in (hppa*-*-*): Remove code disabling shared libs.
+
+2000-05-24  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): Don't report progress to user
+       when doing the AIX specific test to avoid "nested output".
+
+2000-05-22  Kevin Ryde  <kevin@swox.se>
+
+       * mp.h (_PROTO): Copy from gmp.h, use on prototypes.
+       Add extern "C" too.
+       * mpbsd/tests/Makefile.am (AUTOMAKE_OPTIONS): Enable ansi2knr.
+       * mpbsd/tests/allfuns.c: Don't execute mout, just link to it.
+       (main): ANSI style definition.
+
+       * gmp-impl.h (MP_BASE_AS_DOUBLE): Change the expression to
+       something that works on SunOS native cc.  Seems to fix the
+       mp*_get_d problems.
+
+       * mpn/tests/ref.c (refmpn_strip_twos): Use MPN_COPY_INCR.
+       * mpn/tests/Makefile.am: Let .asm.o rules work with absolute $srcdir.
+
+2000-05-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k7/sqr_basecase.asm: Replace file with K7 specific code.
+       * mpn/x86/k7/README: Update.
+       * mpn/x86/k7/gmp-mparam.h: Tune thresholds.
+       (COUNT_TRAILING_ZEROS_TIME): New define.
+       * mpn/x86/k6/gmp-mparam.h: Ditto.
+
+       * mpn/x86/pentium/mmx/popham.asm: New file (include_mpn of K6 version).
+       * mpn/x86/p6/diveby3.asm: New file (include_mpn of P5 version).
+       * mpn/x86/p6/mmx/popham.asm: New file (include_mpn of K6 version).
+       * mpn/x86/p6/p3mmx/popham.asm: New file (include_mpn of K7 version).
+       * configure.in (pentium3-*-*): Add p3mmx to $path.
+
+       * gmp.texi (Integer Arithmetic): Clarify mpz_jacobi op2; add
+       mpz_*_kronecker_*.
+       (Miscellaneous Integer Functions): Add mpz_odd_p and mpz_even_p.
+       (Low-level Functions): Put mpn_divmod_1 with mpn_divrem_1 and note
+       it's now a macro.
+       (References): Add Henri Cohen.
+
+       * gmp.h (mpn_addmul_1c, mpn_divrem_1c, mpn_mod_1c, mpn_mul_1c,
+       mpn_submul_1c): Add prototypes.
+       (mpz_odd_p, mpz_even_p): New macros.
+
+       * mpn/asm-defs.m4 (m4wrap_prepend): New macro.
+       (m4_error): Use it.
+       (m4_not_for_expansion): Corrections to OPERATION symbols.
+       More comments about variations between m4 versions.
+       * mpn/x86/x86-defs.m4 (PROLOGUE): Use m4wrap_prepend (fixes error
+       exit under BSD m4, previously m4_error printed the message but the
+       exit code was 0).
+
+       * gmp.h (mpn_divmod_1): Change to a macro calling mpn_divrem_1.
+       * mpn/generic/divrem_1.c: Move divmod_1.c code to here, make it
+       static and call it __gmpn_divmod_1_internal.
+       * mpn/generic/divmod_1.c: Remove file.
+       * configure.in (gmp_mpn_functions): Remove divmod_1.
+       * mpn/asm-defs.m4 (define_mpn): Remove divmod_1 and divmod_1c.
+       * compat.c (mpn_divmod_1): Add compatibility function.
+       * tune/*: Remove mpn_divmod_1 measuring (leave just divrem_1).
+
+       * acconfig.h (HAVE_NATIVE_mpn_*): Add some missing carry-in
+       variants, remove divmod_1.
+
+       * mpn/x86/diveby3.asm: Use imul, update comments.
+
+       * demos/qcn.c: New file.
+       * demos/Makefile.am (EXTRA_PROGRAMS): Add it.
+
+       * mpz/tests/t-jac.c: New file.
+       * mpz/tests/Makefile.am (check_PROGRAMS): Add it. Enable ansi2knr.
+
+       * mpz/kronsz.c: New file.
+       * mpz/kronuz.c: New file.
+       * mpz/kronzs.c: New file.
+       * mpz/kronzu.c: New file.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add them.
+       * Makefile.am (MPZ_OBJECTS): Add them.
+       * gmp-impl.h (JACOBI_*, MPN_STRIP_LOW_ZEROS_NOT_ZERO): New macros.
+       * gmp.h (mpz_*_kronecker_*): New defines and prototypes.
+
+       * mpn/generic/jacbase.c: New file.
+       * mpn/generic/mod_1_rs.c: New file.
+       * configure.in (gmp_mpn_functions): Add them.
+       * gmp.h (mpn_jacobi_base, mpn_mod_1_rshift): New defines and
+       prototypes.
+       * longlong.h (COUNT_TRAILING_ZEROS_TIME): New define.
+       * mpn/tests/ref.c (refmpn_mod_1_rshift): New function.
+       * mpn/tests/try.c: Add mpn_mod_1_rshift.
+       * tune/*: Add measuring for mpn_jacobi_base.
+
+       * acinclude.m4 (GMP_FINISH): Add ifdefs to allow multiple
+       inclusion of config.m4.
+       (GMP_PROG_M4): Put "good" message through to config.log.
+
+       * mpz/powm.c: Use a POWM_THRESHOLD for where redc stops.
+       * tune/*: Add mpz_powm measuring, and tune POWM_THRESHOLD.
+       * gmp-impl.h [TUNE_PROGRAM_BUILD] (POWM_THRESHOLD): Conditional
+       redefinition for use when tuning.
+
+       * mpz/powm_ui.c: Use DIVIDE_BY_ZERO.
+
+       * mpz/iset_str.c: Initialize _mp_size to 0, in case no digits in
+       string; this makes it the same as a separate init and set_str.
+
+2000-05-20  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/asm-defs.m4: Note &,|,^ aren't bitwise in BSD m4 eval().
+       * mpn/x86/k6/sqr_basecase.asm: Use "%" not "&" in m4 eval()s.
+
+       * mpn/x86/x86-defs.m4 (Zdisp): Yet more instruction forms.
+
+2000-05-19  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_CC_64BIT): Don't use shell variable
+       `ac_compile' for our own compile command string since other
+       Autoconf macros may depend on it.
+
+2000-05-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_n.c (mpn_toom3_mul_n, mpn_toom3_sqr_n): Fix
+       carry propagation in final coefficient additions.
+
+2000-05-18  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Set NM before looking for compiler since
+       GMP_CHECK_CC_64BIT needs it.
+
+       * acinclude.m4 (GMP_CHECK_CC_64BIT): Don't execute on target.
+       (GMP_PROG_CC_FIND): Before checking if the compiler knows how to
+       produce 64-bit code, verify that it works at all.  The background
+       is that /usr/ucb/cc on Solaris 7 successfully compiles in 64-bit
+       mode but fails when doing final link.
+       (GMP_PROG_CC_WORKS): Report to user what's happening.
+
+2000-05-17  Linus Nordberg  <linus@swox.se>
+
+       * config.guess: Use X86CPU for x86 Cygwin.
+
+2000-05-16  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/p6/mmx/divrem_1.asm: New file.
+       * mpn/x86/p6/mmx/mod_1.asm: New file.
+       * mpn/x86/p6/README: Update.
+       * mpn/x86/divrem_1.asm: Update comments.
+       * mpn/x86/mod_1.asm: Ditto.
+
+2000-05-14  Kevin Ryde  <kevin@swox.se>
+
+       * tune/speed.h: Run gcd functions on a set of data.
+
+       * mpn/tests/try.c: New file.
+       * mpn/tests/try.h: New file.
+       * mpn/tests/spinner.c: New file.
+       * mpn/tests/trace.c: New file.
+       * mpn/tests/x86call.asm: New file.
+       * mpn/tests/x86check.c: New file.
+       * mpn/tests/ref.c (refmpn_hamdist): Allow size==0.
+       (refmpn_gcd): New function, and other additions supporting it.
+       * mpn/tests/ref.h: More prototypes.
+       * mpn/tests/Makefile.am: Add try program, use ansi2knr.
+
+       * mpn/x86/k7/mmx/popham.asm: New file.
+       * mpn/x86/k6/mmx/popham.asm: New file.
+       * mpn/x86/k6/sqr_basecase.asm: Unroll the addmul, for approx 1.3x
+       speedup above 15 limbs.
+       * mpn/x86/k7/README: Update.
+       * mpn/x86/k6/README: Update, and add notes on plain K6 and pre-CXT
+       K6-2 problems.
+       * configure.in (k6*-*-*, athlon-*-*): Add popham.
+
+       * mpn/x86/pentium/diveby3.asm: New file.
+       * mpn/x86/pentium/README: Update.
+
+       * gmp.texi (Installing GMP): Add note on bad OpenBSD 2.6 m4.
+       (Reporting Bugs): Ask for config.m4 if asm file related.
+       (I/O of Rationals): New section, add mpq_out_str.
+       (References): Add url for on-line gcc manuals.
+       A few node and menu updates.
+
+       * INSTALL: Better command line argument checking for test progs.
+       Change MP -> GMP.
+
+       * configure.in (WANT_ASSERT, USE_STACK_ALLOC, HAVE_PENTIUM_RDTSC):
+       Put descriptions here, not in acconfig.h.
+       (CALLING_CONVENTIONS_OBJS): New AC_SUBST (for mpn/tests/try).
+       (HAVE_CALLING_CONVENTIONS): New AC_DEFINE.
+       (AC_CHECK_HEADERS): Add sys/time.h.
+       (AC_CHECK_FUNCS): Add getpagesize, setitimer.
+       (KARATSUBA_SQR_THRESHOLD): Strip trailing comments from the
+       #define when passing through to config.m4.
+       * acconfig.h (PACKAGE, VERSION, WANT_ASSERT, USE_STACK_ALLOC,
+       HAVE_PENTIUM_RDTSC): No need for #undefs, autoheader gets them
+       from configure.in.
+
+       * acinclude.m4 (GMP_PROG_M4): Check for broken OpenBSD 2.6 m4
+       eval(), put messages into config.log.
+       * mpn/asm-defs.m4: Add notes and test for OpenBSD 2.6 m4.
+
+       * mpq/out_str.c: New file.
+       * mpq/Makefile.am (libmpq_la_SOURCES): Add it.
+       * Makefile.am (MPQ_OBJECTS): Ditto.
+       * gmp.h (mpq_out_str): New define and prototype.
+
+2000-05-12  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (CONFIG_TOP_SRCDIR): Fix to use $srcdir not
+       $top_srcdir (which doesn't exist).
+       * acinclude.m4 (GMP_C_ANSI2KNR): Fix setting U=_.
+       * gmp-impl.h (mpn_com_n, MPN_LOGOPS_N_INLINE): Fix missing "do"
+       (not currently used, probably no ill effect anyway).
+
+2000-05-11  Torbjorn Granlund  <tege@swox.com>
+
+       * randraw.c (lc): Major overhaul (pending rewrite).
+       (_gmp_rand): Rewrite.
+
+2000-05-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/convert.c: Call free via _mp_free_func.
+       * mpf/tests/t-conv.c: Likewise.
+
+       * memory.c: Add code enabled for DEBUG that adds special patterns
+       around allocated blocks.
+
+2000-05-05  Linus Nordberg  <linus@swox.se>
+
+       * gmp.texi (Miscellaneous Float Functions): Correct parameter list
+       for mpf_urandomb().
+
+       * configure.in: Invoke AC_REVISION.
+
+2000-05-05  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi: Use @dircategory and @direntry.
+       (Installing GMP): Clarification for --target, updates on SunOS
+       problems.
+       (Integer Arithmetic): Add mpz_mul_si.
+       (Initializing Rationals): Add mpq_swap.
+       (Assigning Floats): Add mpf_swap.
+       (Low-level Functions): Add mpn_divexact_by3c, and details of what
+       the calculation actually gives.
+       (Low-level Functions): Note extra space needed by mpn_gcdext,
+       clarify the details a bit.
+
+       * compat.c: New file, entry points for upward binary compatibility.
+       (mpn_divexact_by3): Compatibility function.
+       * Makefile.am (libgmp_la_SOURCES): Add compat.c.
+
+       * mpn/tests/ref.c: Rearrange macros for ansi2knr.
+       (div1): Renamed from div to avoid library function.
+       (refmpn_divexact_by3c, refmpn_gcd_1, refmpn_popcount,
+       refmpn_hamdist): New functions.
+       * mpn/tests/ref.h: Add extern "C", add new prototypes.
+
+       * gmp.h (gmp_randinit, etc): Add extern "C".
+       (_mpq_cmp_ui): Fix prototype name from mpq_cmp_ui.
+       (mpn_divexact_by3): Now a macro calling mpn_divexact_by3c.
+       (mpn_divexact_by3c): New prototype and define.
+
+       * mpn/x86/diveby3.asm: Change to mpn_divexact_by3c.
+       * mpn/x86/k6/diveby3.asm: Ditto.
+       * mpn/generic/diveby3.c: Ditto.
+       * mpn/asm-defs.m4: Ditto on the define_mpn.
+       * acconfig.h (HAVE_NATIVE_mpn_divexact_by3c): New define.
+
+       * mpq/swap.c: New file, derived from mpz/swap.c.
+       * mpf/swap.c: Ditto.
+       * mpq/Makefile.am: Add swap.c.
+       * mpf/Makefile.am: Ditto.
+       * Makefile.am: Add two new "swap.lo"s.
+
+       * mpn/x86/k6/mmx/com_n.asm: Fix an addressing bug (fortunately
+       this code hasn't been used anywhere yet).
+
+       * mpn/x86/k7/mmx/divrem_1.asm: New file.
+       * mpn/x86/k7/mmx/mod_1.asm: New file.
+       * mpn/x86/k7/diveby3.asm: New file.
+       * mpn/x86/k7/README: Update.
+
+       * mpn/x86/k7/aorsmul_1.asm: Use new cmovCC, no object code change.
+       * mpn/x86/k7/mul_basecase.asm: Ditto.
+       * mpn/x86/p6/aorsmul_1.asm: Ditto.
+
+       * mpn/x86/x86-defs.m4 (defframe_empty_if_zero): Eval the argument.
+       (cmovCC): New macros, replacing individual cmovCC_reg_reg forms.
+       (Zdisp): Recognise more instructions.
+       (shldl,etc): Use m4_instruction_wrapper().
+       (ASSERT, movl_text_address): New macros.
+
+       * mpn/asm-defs.m4: Add remarks on SunOS /usr/bin/m4 and new
+       OpenBSD m4.
+       (m4_assert_numargs_internal_check): Remove a spurious parameter.
+       (m4_empty_if_zero): Eval the argument.
+       (m4_assert, m4_assert_numargs_range, m4_config_gmp_mparam,
+       m4_instruction_wrapper): New macros.
+
+2000-05-04  Linus Nordberg  <linus@swox.se>
+
+       * gmp.texi (Reporting Bugs): Be explicit about output from running
+       a command.
+
+2000-05-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/bz_divrem_n.c (mpn_bz_divrem_n): Handle non-zero return
+       from first mpn_bz_div_3_halves_by_2 call.
+       (mpn_bz_divrem_aux): Likewise.
+
+2000-04-30  Kevin Ryde  <kevin@swox.se>
+
+       * tune/* (GCD_ACCEL_THRESHOLD, GCDEXT_THRESHOLD): Tune these.
+
+       * mpn/generic/gcdext.c (GCDEXT_THRESHOLD): Rename from THRESHOLD,
+       use with >=, adjust default to 17 accordingly.
+       Use new *_SWAP macros.
+
+       * mpn/generic/gcd.c (GCD_ACCEL_THRESHOLD): Rename from
+       ACCEL_THRESHOLD, use with >=, adjust default to 5 accordingly.
+       Use new *_SWAP macros.
+
+       * mpf/get_str.c, mpf/set_str.c, mpf/sub.c, mpz/add.c, mpz/ior.c,
+       mpz/and.c, mpz/sub.c, mpz/xor.c, mpz/ui_pow_ui.c,
+       mpn/generic/mul.c: Use new *_SWAP macros.
+
+       * stack-alloc.h: Add extern "C" around prototypes.
+
+       * gmp-impl.h: (MP_PTR_SWAP, etc): New macros.
+       (_mp_allocate_func, etc): Use _PROTO.
+       [TUNE_PROGRAM_BUILD]: More changes in tune program build part.
+
+2000-04-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/pa64/add_n.s: Add `,entry' to export directive.
+       * mpn/pa64/addmul_1.S, mpn/pa64/lshift.s, mpn/pa64/mul_1.S,
+       mpn/pa64/rshift.s, mpn/pa64/sub_n.s, mpn/pa64/submul_1.S,
+       mpn/pa64/umul_ppmm.S: Likewise.
+       * mpn/hppa/hppa1_1/udiv_qrnnd.S: New name for udiv_qrnnd.s.
+       Add PIC support.
+
+2000-04-29  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h [TUNE_PROGRAM_BUILD] (TOOM3_MUL_THRESHOLD_LIMIT): New
+       define.
+       * mpn/generic/mul_n.c [TUNE_PROGRAM_BUILD] (mpn_mul_n): Use
+       TOOM3_MUL_THRESHOLD_LIMIT, not a hard coded 500.
+
+       * memory.c: Use <stdlib.h> for malloc etc, and use _PROTO.
+       * stack-alloc.c: Don't use C++ reserved word "this".
+       * urandom.h: Put extern "C" around prototypes.
+       * mpz/powm.c: Switch a couple of parameters to "const", which they
+       are, to satisfy g++.
+
+       * randraw.c, stack-alloc.c, mpbsd/mout.c, mpbsd/mtox.c: Add casts to
+       help g++.
+
+       * stack-alloc.c: Provide dual ANSI/K&R function definitions.
+       * mpz/addmul_ui.c,get_d.c,inp_str.c,perfpow.c,powm.c,pprime_p.c,
+       rrandomb.c,set_str.c,ui_pow_ui.c: Ditto.
+       * mpf/integer.c,set_str.c: Ditto.
+       * mpbsd/min.c,xtom.c: Ditto.
+       * mpn/generic/bz_divrem_n.c,dump.c,gcd_1.c,get_str.c,hamdist.c,
+       popcount.c,random.c,random2.c,set_str.c: Ditto.
+
+       * rand.c: Use <stdio.h> for NULL.
+       * mpz/gcd_ui.c,gcdext.c,mul.c,perfpow.c,powm_ui.c,root.c,sqrt.c,
+       sqrtrem.c: Ditto
+       * mpf/sqrt.c,sqrt_ui.c: Ditto.
+       * mpn/generic/perfsqr.c,sqrtrem.c: Ditto.
+
+       * gmp-impl.h (NULL, malloc, realloc, free): Don't define/declare.
+       (extern "C"): Add around function prototypes.
+       (mpn_kara_mul_n, mpn_kara_sqr_n, mpn_toom3_mul_n, mpn_toom3_sqr_n):
+       Add prototypes.
+       [TUNE_PROGRAM_BUILD] (FIB_THRESHOLD): Add necessary redefinitions for
+       use by tune program.
+       * mpn/generic/mul_n.c: Remove mpn_toom3_mul_n prototype.
+
+       * acinclude.m4 (GMP_C_ANSI2KNR): New macro.
+       (GMP_CHECK_ASM_MMX, GMP_CHECK_ASM_SHLDL_CL): Fix to use
+       $gmp_cv_check_asm_text which is what GMP_CHECK_ASM_TEXT sets.
+       * configure.in (GMP_C_ANSI2KNR): Use this instead of AM_C_PROTOTYPES,
+       for reasons described with its definition.
+
+       * demos/Makefile.am (ansi2knr): Use $(top_builddir) nor $(top_srcdir).
+
+       * mpz/fib_ui.c (FIB_THRESHOLD): Rename from FIB_THRES, for consistency.
+       (FIB_THRESHOLD): Conditionalize so gmp-mparam.h can define a value.
+       (mpz_fib_bigcase): Use >= FIB_THRESHOLD, same as main mpz_fib_ui.
+       * tune/tuneup.c,Makefile.am (FIB_THRESHOLD): Tune this.
+
+       * configure.in (*-*-aix* gmp_m4postinc): Fix setting (don't overwrite
+       a value just stored).
+
+2000-04-26  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/sparc32/udiv_fp.asm: Use mpn_udiv_qrnnd macro.
+       * mpn/sparc32/udiv_nfp.asm: Ditto.
+       * mpn/sparc32/v8/supersparc/udiv.asm: Ditto.
+       * mpn/sparc32/umul.asm: Name the function mpn_umul_ppmm.
+       * mpn/sparc32/v8/umul.asm: Ditto.
+       * mpn/powerpc32/umul.asm: Ditto.
+
+       * mpn/x86/syntax.h: Remove file, since now unused.
+
+       * configure.in (x86): Remove -DBROKEN_ALIGN and -DOLD_GAS
+       previously used by .S files.
+       (x86 extra_functions): Add udiv and umul.
+       (GMP_PROG_M4): Use this instead of AC_CHECK_PROG(M4,m4,...)
+       (HAVE_NATIVE_*): Loosen up the regexp to "PROLOGUE.*" so as to
+       accept PROLOGUE_GP on alpha.
+
+       * acconfig.h (HAVE_NATIVE_mpn_umul_ppmm, udiv_qrnnd, invert_limb):
+       New template defines.
+       * mpn/asm-defs.m4 (mpn_umul_ppmm, mpn_udiv_qrnnd): New define_mpn()s.
+       * longlong.h (umul_ppmm, udiv_qrnnd): Use a library version if
+       it's available and an asm macro isn't.
+       * gmp-impl.h (invert_limb): Ditto.
+
+       * gmp-impl.h (ASSERT_NOREALLOC): Not a good idea, remove it.
+
+       * acinclude.m4 (GMP_PROG_M4): New macro.
+
+2000-04-25  Linus Nordberg  <linus@swox.se>
+
+       * gmp.texi (Random State Initialization): Correct arguments to
+       `gmp_randinit'.
+
+       * acinclude.m4 (GMP_VERSION): Change `eval' --> `m4_eval'.  Fix
+       from Kevin.
+       * aclocal.m4: Regenerate.
+
+2000-04-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/aors_n.asm: Remove parentheses around an immediate that
+       Solaris "as" doesn't like, change by Torbjorn.
+
+2000-04-24  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (AC_CHECK_FUNCS): Add strtoul.
+
+       * mpn/generic/mul_n.c [TUNE_PROGRAM_BUILD] (mpn_mul_n): Bigger
+       array for karatsuba temporary space for tune program build.
+       (mpn_toom3_sqr_n) Remove an unused variable.
+
+       * demos/Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.
+       Add "allprogs:" pseudo-target.
+       * demos/factorize.c, demos/isprime.c: Switch to ANSI functions,
+       rely on ansi2knr.
+
+       * gmp.texi (Getting the Latest Version of GMP): Add reference to
+       ftp.gnu.org mirrors list.
+       * INSTALL: Add arg count check to example programs.
+
+       * mpn/x86/*/*.asm: Convert to FORTRAN ... or rather to
+       FORTRAN-style "C" commenting to support Solaris "as".
+       * mpn/x86/x86-defs.m4: Ditto, and add another Zdisp insn.
+       * mpn/asm-defs.m4 (C): Update comments.
+       * mpn/x86/README.family: Add a note on commenting, remove
+       description of .S files.
+
+       * mpn/sparc64/addmul_1.asm, mul_1.asm, submul_1.asm: Use
+       include_mpn().
+
+2000-04-23  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Merge with FSF version of April 23.
+
+       * mpn/powerpc32: Use dnl/C instead of `#' for comments.
+
+       * config.guess: Get "model" limit between pentium 2 and pentium3 right.
+       Get rid of code determining `_' prefix; use double labels instead.
+       * config.guess: Partially merge with FSF version of April 22.
+       (Don't bring over NetBSD changes for now.)
+
+2000-04-23  Kevin Ryde  <kevin@swox.se>
+
+       * tune/Makefile.am, tune/README, tune/common.c, tune/rdtsc.asm,
+       tune/speed.c, tune/speed.h, tune/time.c, tune/tuneup.c: New files.
+       * tune/Makefile.in: New file, generated from Makefile.am.
+
+       * gmp-impl.h (ASSERT_NOREALLOC,TMP_ALLOC_LIMBS): New macros.
+       [TUNE_PROGRAM_BUILD] Further mods for tune program builds.
+
+       * mpz/Makefile.am: Add -DOPERATION_$* for new mul_siui.c.
+       Add rules to build mul_si and mul_ui from a common mul_siui.c.
+       * mpz/mul_siui.c: New file, derived from and replacing mul_ui.c.
+       * gmp.h (mpz_mul_si): New prototype and define.
+
+       * mpn/tests/*.c [__i386__] (CLOCK): Don't use floating point in
+       CLOCK because cpp can't handle floats in #if's (TIMES is derived
+       from CLOCK by default).
+
+       * mpn/asm-defs.m4 (include_mpn): New macro.
+       (m4_assert_numargs) Changes to implementation.
+
+       * mpf/Makefile.am: Add -DOPERATION_$* for new integer.c.
+       Remove explicit rules for floor.o etc.
+       * mpf/integer.c: Use OPERATION_$* for floor/ceil/trunc.
+
+       * mpn/Makefile.am: Put "tests" in SUBDIRS.
+       * mpn/tests/Makefile.am: New file providing rules to build test
+       programs, nothing done in a "make all" or "make check" though.
+       * mpn/tests/README: New file.
+
+       * acconfig.h (HAVE_PENTIUM_RDTSC): New define.
+
+       * configure.in (x86): Rearrange target cases.
+       Add mulfunc aors_n and aorsmul_1 for x86 and pentium (now all x86s).
+       Remove asm-syntax.h generation not needed.
+       Remove now unused family=x86.
+       (sparc) Remove unused family=sparc.
+       (HAVE_PENTIUM_RDTSC) New AC_DEFINE and AM_CONDITIONAL.
+       (AM_C_PROTOTYPES) New test, supporting ansi2knr.
+       (AC_CHECK_HEADERS) Add getopt.h, unistd.h and sys/sysctl.h for
+       tune progs.
+       (AC_CHECK_FUNCS) Add getopt_long, sysconf and sysctlbyname for
+       tune progs.
+       (config.m4 CONFIG_TOP_SRCDIR) Renamed from CONFIG_SRCDIR.
+       (config.m4 asm-defs.m4) Use CONFIG_TOP_SRCDIR and include().
+       (gmp_m4postinc) Use include_mpn().
+       (gmp_links) Omit asm-defs.m4/asm.m4 and gmp_m4postinc's.
+       (MULFUNC_PROLOGUE) Fix regexps so all functions get AC_DEFINE'd.
+       (PROLOGUE) Ditto (native copyi and copyd were unused in gmp 3).
+       (KARATSUBA_SQR_THRESHOLD) Copy from gmp-mparam.h into config.m4.
+       (AC_OUTPUT) Add tune/Makefile, mpn/tests/Makefile.
+
+       * Makefile.am (AUTOMAKE_OPTIONS): Add ansi2knr.
+       (SUBDIRS): Add tune, reorder directories.
+       (MPZ_OBJECTS): Add mpz/mul_si.lo.
+       (libmp_la_SOURCES): Use this for top-level objects, not .lo's.
+       * ansi2knr.c, ansi2knr.1: New files, provided by automake.
+
+       * mpn/x86/aors_n.asm: Convert add_n.S and sub_n.S to a
+       multi-function aors_n.asm, no object code change.
+       * mpn/x86/pentium/aors_n.asm: Ditto.
+       * mpn/x86/aorsmul_1.asm: Ditto for addmul/submul.
+       * mpn/x86/pentium/aorsmul_1.asm: Ditto.
+
+       * mpn/x86/lshift.asm, mpn/x86/mul_1.asm, mpn/x86/mul_basecase.asm,
+       mpn/x86/rshift.asm: Convert from .S, no object code change.
+       * mpn/x86/pentium/lshift.asm, mpn/x86/pentium/mul_1.asm,
+       mpn/x86/pentium/mul_basecase.asm, mpn/x86/pentium/rshift.asm: Ditto.
+
+       * gmp.texi (Reporting Bugs): Itemize the list of things to include.
+       (Miscellaneous Float Functions): Correct typo in mpf_ceil etc
+       argument types.
+       Change @ifinfo -> @ifnottex for benefit of makeinfo --html.
+       Remove unnecessary @iftex's around @tex.
+
+2000-04-22  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Generalize x86 cpu determination code.
+       Now works on Solaris.
+
+       * mpz/nextprime.c: Rewrite still disabled code.
+
+       * configure.in: Specifically match freebsd[3-9].
+
+2000-04-21  Torbjorn Granlund  <tege@swox.com>
+
+       * rand.c: Call mpz_clear for otherwise leaking mpz_t.
+
+       * mpz/pprime_p.c (mpz_probab_prime_p): Merge handling of negative
+       n into code for handling small positive n.  Merge variables m and n.
+       After dividing, simply call mpz_millerrabin.
+       (isprime): Local variables now use attribute `long'.
+       (mpz_millerrabin): New static function, based on code from
+       mpz_probab_prime_p.
+       (millerrabin): Now simple workhorse for mpz_millerrabin.
+
+2000-04-19  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h: Fix parenthesis error in test for __APPLE_CC__.
+
+2000-04-18  Linus Nordberg  <linus@swox.se>
+
+       * NEWS: Add info about shared libraries.  Remove reference to
+       gmp_randinit_lc.
+
+2000-04-17  Torbjorn Granlund  <tege@swox.com>
+
+       * Version 3.0 released.
+
+       * mpn/arm/add_n.S: New version from Robert Harley.
+       * mpn/arm/addmul_1.S: Likewise.
+       * mpn/arm/mul_1.S: Likewise.
+       * mpn/arm/sub_n.S: Likewise.
+
+       * gmp.h (__GNU_MP_VERSION_PATCHLEVEL): Now 0.
+
+2000-04-17  Linus Nordberg  <linus@swox.se>
+
+       * configure.in (hppa2.0*-*-*): Pass `+O3' to cc/c89 in 64-bit mode
+       to avoid compiler bug.
+       (ns32k*-*-*): Fix typo in path.  Change by Kevin.
+       (alpha*-*-osf*): New case.  Pass assembly flags for architecture
+       to gcc.
+       (alpha*-*-*): Don't bother searching for cc.
+       * configure: Regenerate.
+
+       * Makefile.am (EXTRA_DIST): Add `macos', `.gdbinit'.
+       * Makefile.in: Regenerate.
+       * mpn/Makefile.am (EXTRA_DIST): Add `m88k', `lisp'.
+       * mpn/Makefile.in: Regenerate.
+
+2000-04-16  Kevin Ryde  <kevin@swox.se>
+
+       * README: Updates, and don't duplicate the example in INSTALL.
+       * INSTALL: Minor updates.
+       * gmp.texi (Installing MP): Minor edits, restore CC/CFLAGS description.
+
+2000-04-16  Linus Nordberg  <linus@swox.se>
+
+       * configure.in (*-*-cygwin*): Select BSD_SYNTAX to avoid
+       .type/.size in PROLOGUE for ELF_SYNTAX.  Override ALIGN definition
+       from x86/syntax.h.
+       (gmp_xoptcflags_${CC}): New set of variables, indicating
+       ``exclusive optional cflags''.
+       (most sparcs): Use gmp_xoptcflags instead of gmp_optcflags to
+       ensure that we pass CPU type to older gcc.
+       (CFLAGS): CFLAGS on the command line was spoiled.
+       * configure: Regenerate.
+
+2000-04-16  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Invoke AC_PROG_LIBTOOL directly.
+
+       * acinclude.m4 (GMP_PROG_CC_FIND): Quote source variable when
+       setting CC64 and CFLAGS64.
+       (GMP_PROG_CC_SELECT): Cache result.
+       (GMP_PROG_LIBTOOL): Remove.
+
+       * aclocal.m4: Regenerate.
+       * configure: Regenerate.
+
+2000-04-16  Linus Nordberg  <linus@swox.se>
+
+       * tests/rand/t-rand.c (main): Add non-ANSI function declaration.
+       Don't use `const'.
+
+2000-04-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/dump.c: Suppress output of leadign zeros.
+
+       * mpz/inp_str.c: Fix memory leakage.
+
+       * mpz/tests/reuse.c (dss_func_division): Add a final 1.
+
+       * longlong.h (alpha count_leading_zeros): Wrap in __MPN.
+       * mpn/alpha/cntlz.asm: Use __gmpn prefix (by means of __MPN).
+
+       * longlong.h (__umul_ppmm, __udiv_qrnnd): Wrap in __MPN.
+       * mpn/alpha/udiv_qrnnd.S: Use __gmpn prefix.
+       * mpn/hppa/udiv_qrnnd.s: Likewise.
+       * mpn/hppa/hppa1_1/udiv_qrnnd.s: Likewise.
+       * mpn/pa64/udiv_qrnnd.c: Likewise (by means of __MPN).
+       * mpn/pa64/umul_ppmm.S: Likewise.
+       * mpn/sparc32/udiv_fp.asm: Likewise (by means of MPN).
+       * mpn/sparc32/udiv_nfp.asm: Likewise (by means of MPN).
+       * mpn/sparc32/v8/supersparc/udiv.asm: Likewise (by means of MPN).
+
+       * mpn/generic/tdiv_qr.c: Work around gcc 2.7.2.3 i386 register handling
+       bug.
+
+       * mpn/generic/tdiv_qr.c: Use udiv_qrnnd instead of mpn_divrem_1
+       when computing appropriate quotient; mpn_divrem_1 writes too
+       many quotient limbs.
+
+       * mpn/asm-defs.m4: invert_normalized_limb => invert_limb.
+       * mpn/alpha/invert_limb.asm: mpn_invert_normalized_limb =>
+       mpn_invert_limb.
+       * gmp.h: Likewise.
+       * gmp-impl.h (alpha specific): invert_normalized_limb => invert_limb;
+       wrap with __MPN.
+       * longlong.h (alpha udiv_qrnnd): Likewise.
+
+2000-04-16  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.h (mp_set_memory_functions,mp_bits_per_limb,gmp_errno): Add
+       #defines so the library symbols are __gmp_*.
+       * errno.c: Include gmp.h.
+       * gmp-impl.h (_mp_allocate_func,etc): Add #defines to __gmp_*.
+       (__clz_tab): New #define to __MPN(clz_tab).
+       * stack-alloc.c (__gmp_allocate_func,etc): Change from _mp_*.
+
+       * Makefile.am (libmp_la_DEPENDENCIES): Add some mpz files needed
+       for new mpz_powm (pow in libmp).
+       (EXTRA_DIST): Add projects directory.
+
+       * mpn/*: Change __mpn to __gmpn.
+       * gmp.h (__MPN): Ditto.
+       * stack_alloc.c,stack-alloc.h: Change __tmp to __gmp_tmp.
+
+       * mpn/generic/sb_divrem_mn.c (mpn_sb_divrem_mn): Avoid gcc 2.7.2.3
+       i386 register handling bug (same as previously in mpn_divrem_classic).
+
+       * mpn/generic/divrem.c: Now contains mpn_divrem, which is not an
+       internal function, so remove warning comment.
+
+       * gmp.texi (Compatibility with Version 2.0.x): Source level only.
+
+2000-04-16  Linus Nordberg  <linus@swox.se>
+
+       * configure.in (hppa1.0*): Prefer c89 to cc.
+       * configure: Regenerate.
+
+2000-04-15  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: If `mpn_path' is set by user on configure command
+       line, use that as path.
+       * configure: Regenerate.
+
+2000-04-15  Linus Nordberg  <linus@swox.se>
+
+       * configure.in (hppa2.0*): Use path "hppa/hppa1_1 hppa" if no
+       64-bit compiler was found.
+       * configure: Regenerate.
+
+2000-04-15  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Honor `CC' and `CFLAGS' set by user on configure
+       command line.
+       * acinclude.m4: (GMP_PROG_CC_SELECT): Set CFLAGS if not set already.
+       * aclocal.m4: Regenerate.
+       * configure: Regenerate.
+
+2000-04-15  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_PROG_CC_FIND): Remove debug output.  Remove
+       commented out code.
+       * aclocal.m4: Regenerate.
+       * configure: Regenerate.
+
+       * configure.in: Make all `-mcpu' options to gcc optional.
+       * configure: Regenerate.
+
+       * tests/rand/Makefile.am: Don't do anything for target 'all'.
+       * tests/rand/Makefile.in: Regenerate.
+
+2000-04-15  Kevin Ryde  <kevin@swox.se>
+
+       * README: Small updates.
+       * NEWS: Add some things about 3.0.
+
+       * mpz/Makefile.am (EXTRA_DIST): Remove dmincl.c.
+
+       * Makefile.am: Use -version-info on libraries, not -release.
+
+       * mpz/tdiv_qr.c: Add mdiv function header #ifdef BERKELEY_MP.
+       * mpbsd/Makefile.am: Use mpz/tdiv_qr.c, not mdiv.c.
+       * Makefile.am (MPBSD_OBJECTS): Change mdiv.lo to tdiv_qr.lo.
+       (libmp_la_DEPENDENCIES): Add mp_clz_tab.lo.
+       * mpbsd/mdiv.c: Remove file.
+
+       * config/mt-linux,mt-m68k,mt-m88110,mt-ppc,mt-ppc64-aix,mt-pwr,
+       mt-sprc8-gcc,mt-sprc9-gcc,mt-supspc-gcc,mt-vax,mt-x86,
+       mpn/config/mt-pa2hpux,mt-sprc9,t-oldgas,t-ppc-aix,t-pwr-aix:
+       Remove configure fragments not used since change to autoconf.
+
+       * mpn/generic/bz_divrem_n.c,sb_divrem_mn.c: Add comment that
+       internal functions are changeable and shouldn't be used directly.
+
+2000-04-15  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Remove debug output.
+       * configure: Regenerate.
+
+2000-04-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/tdiv_qr.c: Don't use alloca directly.
+
+       * mpz/tdiv_qr.c: Fix typo.
+       * mpz/tdiv_r.c: Fix typo.
+       * mpz/tdiv_q.c: Fix typo.
+
+       * configure.in: Disable -march=pentiumpro due to apparent compiler
+       problems.
+
+       * mpz/powm.c: Replace with new code from Paul Zimmermann.
+
+       * mpz/tdiv_q.c: Remove debug code.
+
+       * mpn/generic/divrem.c: Remove C++ style `//' commented-out code.
+       * mpn/generic/sb_divrem_mn.c: Likewise.
+
+2000-04-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/cdiv_q.c: Change temp allocation for new requirements of
+       mpz_tdiv_qr.
+       * mpz/fdiv_q.c: Likewise.
+
+       * mpn/sparc64/gmp-mparam.h: Set up parameters for TOOM3.
+
+       * mpz/dmincl.c: Delete file.
+       * mpz/tdiv_qr.c: Rewrite using mpn_tdiv_qr.
+       * mpz/tdiv_r.c: Likewise.
+       * mpz/tdiv_q.c: Likewise.
+
+       * mpn/generic/tdiv_qr.c: New file.
+       * mpn/generic/bz_divrem_n.c: New file.
+       * mpn/generic/sb_divrem_mn.c: New file.
+
+       * gmp-impl.h (MPZ_REALLOC): New macro.
+       (mpn_sb_divrem_mn): Declare.
+       (mpn_bz_divrem_n): Declare.
+       (mpn_tdiv_qr): Declare.
+
+       * configure.in (gmp_mpn_functions): Delete divrem_newt and divrem_1n;
+       add tdiv_qr, bz_divrem_n, and sb_divrem_mn.
+       * mpn/generic/divrem_newt.c: Delete file.
+       * mpn/generic/divrem_1n.c: Delete file.
+
+       * gmp.h (mpn_divrem_newton): Remove declaration.
+       (mpn_divrem_classic): Remove declaration.
+
+       * gmp.h (mpn_divrem): Remove function definition.
+       * mpn/generic/divrem.c: Replace mpn_divrem_classic with a
+       mpn_divrem wrapper.
+
+2000-04-14  Kevin Ryde  <kevin@swox.se>
+
+       * mpf/dump.c, mpz/dump.c, mpn/generic/dump.c,
+       mpn/generic/divrem.c, mpn/generic/divrem_1n.c,
+       mpn/generic/divrem_2.c, mpn/generic/divrem_newt.c,
+       mpn/generic/mul.c, mpn/generic/mul_basecase.c,
+       mpn/generic/mul_n.c, mpn/generic/sqr_basecase.c,
+       mpn/generic/udiv_w_sdiv.c: Add comment that internal functions are
+       changeable and shouldn't be used directly.
+
+       * mpq/div.c: Use DIVIDE_BY_ZERO (previously didn't get an
+       exception on zero divisor).
+
+       * mpf/tests/t-get_d.c, mpz/tests/reuse.c: Add K&R function
+       definitions.
+       * mpz/tests/t-2exp.c: Don't use ANSI-ism 2ul.
+
+       * gmp.texi (Installing MP): Build problem notes for GSYM_PREFIX
+       and ranlib on native SunOS.
+       Particular systems notes about AIX and HPPA shared libraries
+       disabled.
+       (MP Basics): Add that undocumented things shouldn't be used.
+       (Introduction to MP): Add to CPUs listed.
+
+       * acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): Don't depend on C
+       having "void".
+
+2000-04-13  Linus Nordberg  <linus@swox.se>
+
+       * mpn/pa64/udiv_qrnnd.c (__udiv_qrnnd64): Add K&R function
+       definition.
+
+       * configure.in: Disable shared libraries for hppa*.
+       (mips-sgi-irix6.*): Fix flags for 64-bit gcc.
+       (hppa2.0*-*-*): Prefer c89 to cc.
+       * configure: Regenerate.
+
+       * gmp.h (gmp_randalg_t): Remove comma after last element.
+
+       * tests/rand/t-rand.c: Add copyright notice.
+
+2000-04-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/mul_n.c, mpn/generic/gcdext.c, mpz/nextprime.c,
+       mpz/remove.c, mpz/root.c: Add K&R function definitions.
+       * mpz/rrandomb.c: Fix typo in K&R part.
+       * stack-alloc.c: Add K&R style function pointer declarations.
+
+       * mpz/root.c: Use SQRT_OF_NEGATIVE on even roots of negatives.
+       Use DIVIDE_BY_ZERO on a "zero'th" root.
+
+       * configure: Regenerate with autoconf backpatched to fix --srcdir
+       absolute path wildcards that bash doesn't like, change by Linus.
+
+       * gmp.texi (Integer Arithmetic): Document mpz_nextprime.
+       (Miscellaneous Integer Functions): Fix mpz_fits_* formatting.
+       (Installing MP): Comment-out CC and CFLAGS description.
+
+2000-04-13  Linus Nordberg  <linus@swox.se>
+
+       * rand.c (gmp_randinit): Don't combine va_alist with ordinary
+       arguments for non STDC.
+
+2000-04-13  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/nextprime.c: Use proper names of new random types and functions.
+
+       * mpz/rrandomb.c: New file.
+       * mpz/Makefile.am: List it.
+       * mpz/Makefile.in: Regenerate.
+       * Makefile.am: Here too.
+       * Makefile.in: Regenerate.
+       * gmp.h: Declare mpz_rrandomb.
+
+2000-04-12  Linus Nordberg  <linus@swox.se>
+
+       * Makefile.am, demos/Makefile.am, mpbsd/Makefile.am,
+       mpbsd/tests/Makefile.am, mpf/Makefile.am, mpf/tests/Makefile.am,
+       mpn/Makefile.am, mpq/Makefile.am, mpq/tests/Makefile.am,
+       mpz/Makefile.am, mpz/tests/Makefile.am, tests/Makefile.am,
+       tests/rand/Makefile.am (AUTOMAKE_OPTIONS): Add 'no-dependencies'.
+
+       * Makefile.in, demos/Makefile.in, mpbsd/Makefile.in,
+       mpbsd/tests/Makefile.in, mpf/Makefile.in, mpf/tests/Makefile.in,
+       mpn/Makefile.in, mpq/Makefile.in, mpq/tests/Makefile.in,
+       mpz/Makefile.in, mpz/tests/Makefile.in, tests/Makefile.in,
+       tests/rand/Makefile.in: Regenerate.
+
+2000-04-12  Linus Nordberg  <linus@swox.se>
+
+       * randlc.c (gmp_randinit_lc): Disable function.
+       * gmp.texi (Random State Initialization): Remove gmp_randinit_lc.
+
+       * acinclude.m4 (GMP_CHECK_CC_64BIT): Compiling an empty main
+       successfully with `-n32' will have to suffice on irix6.
+       * aclocal.m4: Regenerate.
+
+       * configure.in (sparc): Don't pass -D_LONG_LONG_LIMB to compiler.
+       (mips-sgi-irix6.*): Use compiler option `-n32' rather than `-64'
+       for 64-bit `cc'.  Add options for gcc.
+       * configure: Regenerate.
+
+       * mpf/urandomb.c (mpf_urandomb): Add third parameter 'nbits'.  If
+       'nbits' doesn't make even limbs, shift up result before
+       normalizing.
+
+       * gmp.h (mpf_urandomb): Add parameter to prototype.
+
+       * mpf/urandom.c: Rename file to ...
+       * mpf/urandomb.c: ... this.
+       * Makefile.am (MPF_OBJECTS): Change urandom.lo --> urandomb.lo.
+       * Makefile.in: Regenerate.
+       * mpf/Makefile.am (libmpf_la_SOURCES): Change urandom.c --> urandomb.c.
+       * mpf/Makefile.in: Regenerate.
+
+       * config.in: Regenerate for HAVE_DECL_OPTARG.
+
+       * randraw.c (_gmp_rand): Fix bug with _LONG_LONG_LIMB.
+       (lc): Change return type.
+       Use one temporary storage instead of two.
+       Handle seed of size 0.
+       Avoid modulus operation in some cases.
+       Abort if M is not a power of 2.
+       Fix bug with 64-bit limbs.
+       Fix bug with small seed, small A and large M.
+
+       * tests/rand/gen.c (main): Include gmp.h.  Remove macros MIN, MAX.  Add
+       option '-q'.  Don't demand argument N.  Change parameters in call
+       to mpf_urandomb.
+
+       * tests/rand/t-rand.c: New file for testing random number generation.
+
+       * tests/rand/Makefile.am: Run t-rand for 'make check'.
+       (test, bigtest): Rename to manual-test, manual-bigtest.
+       * tests/rand/Makefile.in: Regenerate.
+
+2000-04-12  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h: Include config.h before TMP_ALLOC, so
+       --disable-alloca works.
+
+       * mpbsd/Makefile.am: Don't recompile top-level sources here.
+       * Makefile.am (libmp_la_DEPENDENCIES): Put objects here instead,
+       add errno.lo and stack-alloc.lo.
+
+       * mpn/asm-defs.m4: Add a test and message for the unsuitable SunOS m4.
+       * gmp.texi (Installing MP): Update note on SunOS m4 failure.
+
+       * acconfig.h: Add copyright notice using @TOP@.
+
+       * stack-alloc.c: Use _mp_allocate_func, not malloc.
+       * gmp.texi (Installing MP): Note this under --disable-alloca.
+
+       * gmp.texi (Comparison Functions): mpz_cmp_abs => mpz_cmpabs.
+       (Integer Arithmetic): mpz_prime_p not yet implemented, comment out.
+       (Float Arithmetic): mpf_pow_ui now implemented, uncomment-out.
+       (Miscellaneous Float Functions): Add mpf_ceil, mpf_floor, mpf_trunc.
+       (Low-level Functions): Add mpn_random2, with mpn_random.
+
+       * mpn/m68k/mc68020/udiv.S: Rename from udiv.s.
+       * mpn/m68k/mc68020/umul.S: Ditto.
+
+       * mpn/alpha/umul.asm: Rename from umul.s, remove .file and
+       compiler identifiers.
+
+       * mpn/powerpc32/syntax.h: Removed, no longer used.
+
+       * mpn/a29k/udiv.s: Remove .file and compiler identifiers.
+       * mpn/a29k/umul.s: Ditto.
+
+       * mpn/tests/ref.c: Use WANT_ASSERT.
+       * mpn/tests/ref.h: Use _PROTO.
+
+       * mpbsd/configure.in: Removed, no longer required.
+
+       * mpf/div.c: Use DIVIDE_BY_ZERO.
+       * mpf/div_ui.c: Ditto.
+       * mpf/ui_div.c: Ditto.
+       * mpq/inv.c: Ditto.
+       * mpf/sqrt.c: Use SQRT_OF_NEGATIVE.
+       * mpz/sqrt.c: Ditto.
+       * mpz/sqrtrem.c: Ditto.
+
+       * gmp-impl.h (GMP_ERROR,SQRT_OF_NEGATIVE): New macros.
+       (DIVIDE_BY_ZERO): Use GMP_ERROR.
+       (__mp_bases): #define to __MPN(mp_bases).
+
+2000-04-11  Linus Nordberg  <linus@swox.se>
+
+       * tests/rand/stat.c (main): Initialize `l1runs' at declaration.
+
+2000-04-11  Kevin Ryde  <kevin@swox.se>
+
+       * mpz/fib_ui.c: Add K&R function definitions.
+
+       * mpbsd/tests/Makefile.am (TESTS): Add a dummy test to avoid a
+       shell problem with an empty "for tst in $(TESTS) ; ...".
+       * mpbsd/tests/dummy.c: New file.
+
+2000-04-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/bin_uiui.c: Delete several unused variables.
+       Add copyright notice.
+       * mpz/bin_ui.c: Add copyright notice.
+
+       * longlong.h: Declare __count_leading_zeros for alpha.
+
+2000-04-10  Linus Nordberg  <linus@swox.se>
+
+       * rand.c (gmp_randinit): Change parameter list to (rstate, alg, ...).
+       * gmp.h: Change prototype accordingly.
+       * mpz/pprime_p.c (millerrabin): Change call accordingly.
+
+       * configure.in: Check for `optarg'.
+       * configure: Regenerate.
+
+       * mpn/Makefile.am: Remove incorrect comment.
+       * mpn/Makefile.in: Regenerate.
+
+       * gmp.h: Rename most of the random number functions, structs and
+         some of the struct members.
+       * rand.c (gmp_randinit): Likewise.
+       * randclr.c (gmp_randclear): Likewise.
+       * randlc.c (gmp_randinit_lc): Likewise.
+       * randlc2x.c (gmp_randinit_lc_2exp): Likewise.
+       * randraw.c (lc): Likewise.
+       (_gmp_rand_getraw): Likewise.
+       * randsd.c (gmp_randseed): Likewise.
+       * randsdui.c (gmp_randseed_ui): Likewise.
+       * gmp.texi: Likewise.
+
+       * gmp.texi: Use three hyphens for a dash.
+       (Low-level Functions): Remove documentation for gmp_rand_getraw.
+       (Random Number Functions): Add info on where to find documentation
+       on the random number functions.
+
+       * tests/rand/Makefile.am (test, bigtest): Quote argument to grep.
+       * tests/rand/Makefile.in: Regenerate.
+
+       * tests/rand/gen.c: Declare optarg, optind, opterr if not already
+       declared.
+       (main): Use new names for the random stuff.
+       (main): Don't use strtoul() if we don't have it.  Use strtol()
+       instead, if we have it.  Otherwise, use atoi().
+       (main): Use srandom/srandomdev for __FreeBSD__ only.
+       (main): Use new parameter order to gmp_randinit().
+
+       * tests/rand/stat.c: Declare optarg, optind, opterr if not already
+       declared.
+
+2000-04-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/pprime_p.c: Pass 0L for mpz_scan1.  mpz_mmod => mpz_mod.
+       (millerrabin): Use new random interface.
+       (millerrabin): ... and don't forget to call gmp_randclear.
+
+       * mpz/nextprime.c: New file.
+       * gmp.h: Declare mpz_nextprime.
+       * mpz/Makefile.am: List nextprime.c.
+       * mpz/Makefile.in: Regenerate.
+       * Makefile.am: List mpz/nextprime.lo.
+       * Makefile.in: Regenerate.
+
+2000-04-10  Kevin Ryde  <kevin@swox.se>
+
+       * move-if-change, mpz/tests/move-if-change, mpq/tests/move-if-change,
+       mpf/tests/move-if-change: Remove, no longer used.
+
+       * Makefile.am (SUBDIRS): Add tests, demos, mpbsd.
+       (libmp.la): New target, conditional on WANT_MPBSD.
+       (libgmp_la_LIBADD): Add -lm.
+       (AUTOMAKE_OPTIONS): Add check-news.
+       (include_HEADERS): Setup to install gmp.h and possibly mp.h.
+       (DISTCLEANFILES): Add generated files.
+       (check): Remove explicit target (now uses check-recursive).
+
+       * configure.in: Use AM_CONFIG_HEADER.
+       Add --enable-mpbsd setting automake conditional WANT_MPBSD.
+       Output demos/Makefile, mpbsd/Makefile and mpbsd/tests/Makefile.
+
+       * mpz/Makefile.am: Add SUBDIRS=tests, shorten INCLUDES since now
+       using AM_CONFIG_HEADER.
+       * mpq/Makefile.am: Ditto.
+       * mpf/Makefile.am: Ditto, and add DISTCLEANFILES.
+       * mpn/Makefile.am: Shorten INCLUDES, amend some comments.
+       * mpz/tests/Makefile.am: Use TESTS and $(top_builddir).
+       * mpf/tests/Makefile.am: Ditto.
+       * mpq/tests/Makefile.am: Ditto.
+       * demos/Makefile.am: New file.
+
+       * mpbsd/Makefile.am: New file, derived from old mpbsd/Makefile.in.
+       * mpbsd/Makefile.in: Now generated from Makefile.am.
+       * mpbsd/realloc.c: Removed, use mpz/realloc.c instead.
+       * mpbsd/tests/Makefile.am: New file.
+       * mpbsd/tests/Makefile.in: New file, generated from Makefile.am.
+       * mpbsd/tests/allfuns.c: New file.
+
+       * gmp.texi (Top): Use @ifnottex, to help makeinfo --html.
+       (Installing MP): Describe --enable-mpbsd and demo programs.
+
+       * tests/rand/statlib.c: mpz_cmp_abs => mpz_cmpabs.
+
+       * tests/rand/Makefile.am (LDADD): Don't need -lm (now in libgmp.la).
+       (EXTRA_PROGRAMS): Not noinst_PROGRAMS.
+       (INCLUDES): Shorten to -I$(top_srcdir) now using AM_CONFIG_HEADER.
+
+2000-04-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/urandomm.c: Get type of count right.
+       Simplify computation of nbits.
+
+2000-04-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/urandomb.c: Fix reallocation condition.
+       Simplify size computation.
+
+2000-04-08  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_CC_64BIT): Add special handling for
+       HPUX.
+       (GMP_CHECK_ASM_W32): Ditto.
+       * aclocal.m4: Regenerate.
+
+       * mpn/Makefile.am: Use $(CCAS) for assembling.
+       (.asm.obj): Add rule.
+       * mpn/Makefile.in: Regenerate.
+
+       * gmp.texi (Miscellaneous Integer Functions): Fix typos.
+
+       * configure.in: Never pass `-h' to grep.
+       (mips-sgi-irix6.[2-9]*): Try to find 64-bit compiler.
+       (hppa1.0*-*-*): New flag for cc.
+       (hppa2.0*-*-*): Try to find 64-bit compiler.  Chose path, set
+       CCAS.
+       * configure: Regenerate.
+
+2000-04-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/bin_ui.c: Don't depend on ANSI C features.
+       * mpz/bin_uiui.c: Likewise.
+
+       * Makefile.am (MPZ_OBJECTS): mpz/cmp_abs* => mpz/cmpabs*.
+       (MPQ_OBJECTS): Add mpq/set_d.lo.
+       (MPZ_OBJECTS): Add mpz/fits*.lo.
+       * Makefile.in: Regenerate.
+
+       * mpz/cmpabs.c: New name for mpz/cmp_abs.c.
+       * mpz/cmpabs_ui.c: New name for mpz/cmp_abs_ui.c.
+       * mpz/Makefile.am: Corresponding changes.
+       * mpz/Makefile.in: Regenerate.
+       * gmp.h: mpz_cmp_abs* => mpz_cmpabs*.
+
+       * mpz/addmul_ui.c (mpn_neg1): Don't depend on ANSI C features.
+
+       * mpz/invert.c: Use TMP_MARK since we invoke MPZ_TMP_INIT.
+
+       * gmp.h (mpq_set_d): Declare correctly.
+       (mpz_root): Use _PROTO.
+       (mpz_remove): Use _PROTO.
+       (mpf_pow_iu): Use _PROTO.
+
+       * mpn/asm-defs.m4 (MPN_PREFIX): Revert previous change.
+       * gmp.h (__MPN): Revert previous change.
+
+       * mpz/perfpow.c: De-ANSI-fy.  Add copyright notice.
+
+       * mpz/set_d.c: Misc cleanups.
+
+       * mpq/set_d: New file.
+       * gmp.h: Declare mpq_set_d.
+       * mpq/Makefile.am: List set_d.c.
+       * mpq/Makefile.in: Regenerate.
+
+2000-04-07  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/fits_sint_p.c: New file.
+       * mpz/fits_slong_p.c: New file.
+       * mpz/fits_sshort_p.c: New file.
+       * mpz/fits_uint_p.c: New file.
+       * mpz/fits_ulong_p.c: New file.
+       * mpz/fits_ushort_p.c: New file.
+       * gmp.h: Declare mpz_fits_*.
+       * mpz/Makefile.am: List fits_* files.
+       * mpz/Makefile.in: Regenerate.
+
+2000-04-06  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.texi (Installing MP): Add known build problem SunOS 4.1.4 m4
+       failure.
+
+       * mpn/x86/pentium/gmp-mparam.h: Tune thresholds.
+       * mpn/x86/p6/gmp-mparam.h: Ditto.
+       * mpn/x86/k6/gmp-mparam.h: Tune thresholds, add UMUL_TIME, UDIV_TIME.
+       * mpn/x86/k7/gmp-mparam.h: Tune thresholds, amend UMUL_TIME.
+
+       * mpn/generic/mul_n.c (mpn_kara_mul_n): Add an ASSERT.
+       (mpn_kara_sqr_n): Add an ASSERT, use KARATSUBA_SQR_THRESHOLD.
+       (mpn_toom3_sqr_n): Eliminate second evaluate3.
+
+       * gmp-impl.h (mpn_com_n,MPN_LOGOPS_N_INLINE): Don't allow size==0.
+       (tune_mul_threshold,tune_sqr_threshold): Conditionalize
+       declarations on TUNE_PROGRAM_BUILD.
+
+       * mpn/generic/sqr_basecase.c: Add an assert.
+
+2000-04-05  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp.h, mpn/asm-defs.m4: List the same functions for __MPN, but
+       leave some commented out.
+
+       * gmp-impl.h (MPN_LOGOPS_N_INLINE): Optimize.
+       (mpn_com_n): Optimize.
+
+       * gmp.h (__MPN): Make it use __gmpn instead of __mpn for consistency.
+       * mpn/asm-defs.m4 (MPN_PREFIX): Likewise.
+
+       * gmp.h (GMP_ERROR_ALLOCATE): New errcode.
+
+       * gmp-impl.h (MPN_MUL_N_RECURSE): Delete.
+       (MPN_SQR_RECURSE): Delete.
+
+       * gmp-impl.h (TARGET_REGISTER_STARVED): New define.
+
+       * gmp-impl.h (mpn_kara_sqr_n): Remap with __MPN.
+       (mpn_toom3_sqr_n): Likewise.
+       (mpn_kara_mul_n): Likewise.
+       (mpn_toom3_mul_n): Likewise.
+       (mpn_reciprocal): Likewise.
+
+       * gmp-impl.h (__gmpn_mul_n): Remove declaration.
+       (__gmpn_sqr): Likewise.
+       * gmp.h (mpn_sqr_n): Declare/remap.
+       * mpn/generic/mul.c (mpn_sqr_n): New name for mpn_sqr.
+
+       * gmp.h (mpn_udiv_w_sdiv): Move __MPN remap from here...
+       * gmp-impl.h: ...to here.
+
+2000-04-05  Linus Nordberg  <linus@swox.se>
+
+       * gmp.texi (Top): Add `Random Number Functions' to menu.
+       (Introduction to MP): Fix typo.
+       (MP Basics): Create menu for all sections.  Move `Random Number
+       Functions' to its own chapter.  Add nodes for all sections.
+       (Function Classes): Mention random generation functions under
+       miscellaneous.
+       (Miscellaneous Integer Functions): Update mpz_urandomb,
+       mpz_urandomm.
+       (Low-level Functions): Remove mpn_rawrandom.
+       (Random State Initialization): Update.
+
+       * mpf/urandom.c (mpf_urandomb): Remove SIZE parameter.  Normalize
+       result correctly.
+
+       * gmp.h (mpf_urandomb): Remove SIZE parameter.
+
+       * randraw.c (gmp_rand_getraw): Handle the case where (1) the LC
+       scheme doesn't generate even limbs and (2) more than one LC
+       invocation is necessary to produce the requested number of bits.
+
+2000-04-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/mul_n.c (INVERSE_3): New name for THIRD, define for
+       any BITS_PER_MP_LIMB.
+       (MP_LIMB_T_MAX): New.
+       (mpn_divexact3_n): Remove.
+       (interpolate3): Use mpn_divexact_by3 instead of mpn_divexact3_n.
+
+2000-04-05  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h (KARATSUBA_MUL_THRESHOLD<2): Remove cpp test.
+       (tune_mul_threshold,tune_sqr_threshold): Add declarations, used in
+       development only.
+
+       * mpn/x86/k7/sqr_basecase.asm: New file, only a copy of k6 for now.
+
+2000-04-04  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (TOOM3_MUL_THRESHOLD): Provide default.
+       (TOOM3_SQR_THRESHOLD): Provide default.
+
+       * mpn/generic/mul_n.c: Rewrite (mostly by Robert Harley).
+       * mpn/generic/mul.c: Rewrite (mostly by Robert Harley).
+
+       * configure.in (sparcv9 64-bit OS): Set extra_functions.
+
+2000-04-04  Linus Nordberg  <linus@swox.se>
+
+       * mpn/generic/rawrandom.c: Remove file and replace with randraw.c
+       on top level.
+       (mpn_rawrandom): Rename to gmp_rand_getraw.
+
+       * randraw.c: New file; essentially a copy of
+       mpn/generic/rawrandom.c.
+       (gmp_rand_getraw): New function (formerly known as mpn_rawrandom).
+
+       * mpz/urandomb.c (mpz_urandomb): Change mpn_rawrandom -->
+       gmp_rand_getraw.
+       * mpz/urandomm.c (mpz_urandomb): Ditto.
+       * mpf/urandom.c (mpf_urandomb): Ditto.
+
+       * gmp.h (gmp_rand_getraw): Add function prototype.
+       (mpn_rawrandom): Remove function prototype.
+
+       * Makefile.am (libgmp_la_SOURCES): Add randraw.c.
+       * Makefile.in: Regenerate.
+
+       * configure.in (gmp_mpn_functions): Remove rawrandom.
+       * configure: Regenerate.
+
+2000-04-04  Linus Nordberg  <linus@swox.se>
+
+       * gmp.h (GMP_ERROR enum): Remove comma after last enumeration
+       since the AIX compiler (xlc) doesn't like that.
+
+       * randlc.c (gmp_rand_init_lc): Allocate enough space for seed to
+       hold any upcoming seed.
+       * randlc2x.c (gmp_rand_init_lc_2exp): Likewise.
+
+       * mpn/generic/rawrandom.c: Remove debugging code.
+       (mpn_lc): Don't reallocate seed.
+
+       * mpz/urandomm.c (mpz_urandomm): Implement function.
+
+       * mpz/urandomb.c (mpz_urandomb): Fix typo in function definition.
+
+2000-04-04  Kevin Ryde  <kevin@swox.se>
+
+       * make.bat: Removed (no longer works, no longer supported).
+       * mpn/msdos/asm-syntax.h: Removed (was used only by make.bat).
+
+2000-04-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/brandom.c: New file, replacing random2.
+
+2000-04-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/submul_1.asm: Change some carry-form instructions
+       into their plain counterparts.
+
+       * mpn/sparc64/copyi.asm: Avoid executing ALIGN.
+
+       * mpn/sparc64/mul_1.asm: Handle overlap of rp/sp.
+       * mpn/sparc64/addmul_1.asm: Likewise.
+       * mpn/sparc64/submul_1.asm: Likewise.
+
+2000-04-01  Linus Nordberg  <linus@swox.se>
+
+       * gmp.h: Fix function prototypes for randomization functions.
+       (__gmp_rand_lc_scheme_struct): Replace `m' with `m2exp'. Remove
+       unused `bits'.
+       (__gmp_rand_data_lc): Add `m2exp' as another way of representing
+       the modulus.
+       (__gmp_rand_state_struct): Remove unused `size'.
+
+       * rand.c (__gmp_rand_scheme): Use better multipliers.  Remove test
+       schemes.  Replace `m' with `m2exp'.
+       (gmp_rand_init): Change parameters and return type.  Use `m2exp'
+       instead of `m'.  Set `gmp_errno' on error.  Disable BBS algorithm.
+
+       * randlc.c (gmp_rand_init_lc): Don't use malloc().  Change
+       parameters.
+
+       * randclr.c (gmp_rand_clear): Don't use free().  Disable BBS
+       algorithm.  Set `gmp_errno' on error.
+
+       * randlc2x.c (gmp_rand_init_lc_2exp): New function.
+       * randsd.c (gmp_rand_seed): New function.
+       * randsdui.c (gmp_rand_seed_ui): New function.
+       * randlcui.c: Remove unused file.
+
+       * mpn/generic/rawrandom.c (mpn_rawrandom): Rewrite.
+       (mpn_lc): New static function.
+
+       * mpz/urandomb.c (mpz_urandomb): Use ABSIZ() instead of SIZ() for
+       determining size of ROP.
+
+       * mpf/urandom.c (mpf_urandomb): Add third parameter, nbits.  (Not
+       used yet!)
+       Change parameter order to mpn_rawrandom().
+
+       * Makefile.am (libgmp_la_SOURCES): Add errno.c, randlc2x.c,
+       randsd.c, randsdui.c.  Remove randui.c.
+       (MPZ_OBJECTS): Rename urandom.lo --> urandomb.lo.  Add urandomm.lo.
+       * Makefile.in: Regenerate.
+
+       * mpz/Makefile.am (libmpz_la_SOURCES): Change urandom.c -->
+       urandomb.c.  Add urandomm.c.
+       * mpz/Makefile.in: Regenerate.
+
+       * tests/rand/Makefile.am (noinst_PROGRAMS): Change findcl --> findlc.
+       Add gen.static.
+       * tests/rand/Makefile.in: Regenerate.
+
+       * tests/rand/gen.c (main): Add mpz_urandomm.  Add command line options
+       `-C', `-m', extend `-a'.  Use *mp*_*rand*() with new parameters.  Call
+       gmp_rand_seed().
+
+2000-04-01  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_DATA): Plain .data for hpux.
+       * configure.in (CCAS): No CFLAGS, they're added when it's used.
+       (CONFIG_SRCDIR): New define for config.m4.
+       * mpn/sparc64/addmul_1.asm: Use it for an include().
+       * mpn/sparc64/submul_1.asm: Ditto.
+       * mpn/sparc64/mul_1.asm: Ditto.
+
+2000-03-31  Linus Nordberg  <linus@swox.se>
+
+       * mpz/urandom.c: Rename to...
+       * mpz/urandomb.c: ...this.
+
+       * mpz/urandomb.c (mpz_urandomb): Change operand order in call to
+       mpn_rawrandom().  Use ABSIZ() instead of SIZ() when checking size
+       of ROP.
+
+       * mpz/urandomm.c: New file.
+
+2000-03-31  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_MMX): Give a warning when mmx code
+       will be omitted.
+
+2000-03-30  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/mul_1h.asm: New file.
+       * mpn/sparc64/addmul_1h.asm: New file.
+       * mpn/sparc64/submul_1h.asm: New file.
+       * mpn/sparc64/mul_1.asm: Rewrite.
+       * mpn/sparc64/addmul_1.asm: Rewrite.
+       * mpn/sparc64/submul_1.asm: Rewrite.
+
+2000-03-28  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/mul_1.asm: Fix typo in branch prediction.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+
+2000-03-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/lisp/gmpasm-mode.el: Fix some comment detection, use custom,
+       fontify more keywords, turn into a standalone mode.
+
+       * stamp-vti: New file, generated together with version.texi.
+
+       * acinclude.m4 (GMP_VERSION,GMP_HEADER_GETVAL): New macros.
+       * configure.in (AM_INIT_AUTOMAKE): Use GMP_VERSION.
+
+2000-03-24  Kevin Ryde  <kevin@swox.se>
+
+       * INSTALL: Updates for new configure system.
+
+       * configure.in: Add gmp_optcflags_gcc for the x86s, setting -mcpu
+       and -march.
+
+2000-03-23  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (mpz_eval_expr): Properly initialize rhs/lhs
+       for ROOT.
+
+2000-03-23  Kevin Ryde  <kevin@swox.se>
+
+       * config.guess (i?86:*:*:*): Use uname -m if detection program fails.
+
+       * mpn/x86/README: Remove remarks on the now implemented MMX shifts.
+       * mpn/x86/k6/README: Add speed of mpn_divexact_by3, update mpn_mul_1.
+
+       * gmp.texi (Installing MP): Corrections to target CPUs.
+
+       * version.c: Use VERSION from config.h, add copyright comment,
+       restore "const" somehow lost.
+
+       * configure.in (a29k*-*-*): Fix directory name.
+
+2000-03-22  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (op_t): Add ROOT.
+       (fns): Add ROOT.
+       (mpz_eval_expr): Add ROOT.
+
+       * mpz/root.c: Handle roots of negative numbers.
+       Fix other border cases.
+       Fix rare memory leakage.
+
+       * errno.c: New file.
+
+2000-03-21  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp.h (error number enum): New anonymous enum.
+       (gmp_errno): New.
+
+       * gmp.h (__GNU_MP_VERSION, __GNU_MP_VERSION_MINOR): Bump for GMP 3.0.
+
+2000-03-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/unicos.m4 (FLOAT64): New define.
+       * mpn/alpha/default.m4 (FLOAT64): New define.
+       * mpn/alpha/invert_limb.asm (C36): Use FLOAT64.
+
+2000-03-21  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/diveby3.asm: Tiny speedup.
+
+       * acinclude.m4 (GMP_CHECK_ASM_SHLDL_CL): New macro.
+       * configure.in: Use it, set WANT_SHLDL_CL in config.m4.
+       * mpn/x86/x86-defs.m4 (shldl,shrdl,shldw,shrdw): New macros, using
+       WANT_SHLDL_CL.
+       * mpn/x86/k6/mmx/lshift.asm: Use shldl macro.
+       * mpn/x86/k7/mmx/lshift.asm: Ditto.
+       * mpn/x86/pentium/mmx/lshift.asm: Ditto.
+       * mpn/x86/k6/mmx/rshift.asm: Use shrdl macro.
+       * mpn/x86/k7/mmx/rshift.asm: Ditto.
+       * mpn/x86/pentium/mmx/rshift.asm: Ditto.
+       * mpn/x86/README.family: Add a note about this.
+
+2000-03-20  Linus Nordberg  <linus@swox.se>
+
+       * mpn/generic/rawrandom.c (mpn_rawrandom): Handle seed value of 0
+       correctly.
+
+       * configure.in: Fix detection of alpha flavour.
+       Set compiler options for `sparcv8'.
+       * configure: Regenerate.
+
+       * rand.c (__gmp_rand_scheme): Clean up some.  Use slightly better
+       multipliers.
+
+       * configure.in (AC_OUTPUT): Add tests/Makefile and
+       tests/rand/Makefile.
+
+       * acinclude.m4 (AC_CANONICAL_BUILD): Define to
+       `_AC_CANONICAL_BUILD' to deal with incompabilities between
+       Autoconf and Libtool.
+       (AC_CHECK_TOOL_PREFIX): Likewise.
+
+       * Makefile.am (EXTRA_DIST): Add directory `tests'.
+
+       * mkinstalldirs: Update (Automake 2000-03-17).
+       * ltconfig: Update (Libtool 2000-03-17).
+       * ltmain.sh: Ditto.
+
+       * configure: Regenerate with new autoconf/-make/libtool suite.
+       * aclocal.m4: Ditto.
+       * config.in: Ditto.
+       * all Makefile.in's: Ditto.
+
+2000-03-20  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (main): Don't allow `-N' for base, require `-bN'.
+
+       * mpn/alpha/unicos.m4 (cvttqc): New define.
+       * mpn/alpha/invert_limb.asm: Use new define for cvttqc.
+
+2000-03-19  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/sqr_basecase.asm: Tiny amendments for 3x3 case.
+
+       * gmp.texi: Use @include version.texi.
+       Use @email and @uref.
+       (Installing MP): Rewrite for new configure.
+       (Low-level Functions): Add mpn_divexact_by3.
+
+       * configure.in (--enable-alloca): New option.
+       * acconfig.h (USE_STACK_ALLOC): For --disable-alloca.
+
+2000-03-18  Kent Boortz  <kent@swox.com>
+
+       * macos: New directory with macos port files.
+
+2000-03-17  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (union ieee_double_extract): Check _CRAYMPP.
+
+       * mpn/asm-defs.m4 (invert_normalized_limb): Define.
+
+       * mpn/alpha: Translate `.s' files to `.asm'.
+
+       * configure: Regenerate.
+
+       * mpn/alpha/invert_limb.asm: Replace dash in file name with underscore.
+       * configure.in: Corresponding change.
+
+       * configure.in: Assign special "path" for alphaev6.
+
+       * mpn/alpha/unicos.m4: New file.
+       * configure.in (alpha*-cray-unicos*): [This part of the change
+       commited 2000-03-13 by linus]
+       * mpn/alpha/default.m4: New file.
+       * configure.in (alpha*-*-*): Use it.
+
+2000-03-17  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/rshift.S: Use plain rcrl (not rcrl $1) for
+       shift-by-1 case, significant speedup.
+       * mpn/x86/pentium/README: Add shift-by-1 speed.
+
+2000-03-16  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Handle Cray T3D/E.
+
+2000-03-15  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/diveby3.c: New file.
+       * mpn/x86/diveby3.asm: New file.
+       * mpn/x86/k6/diveby3.asm: New file.
+       * gmp.h (mpn_divexact_by3): Prototype and define.
+       * mpn/asm-defs.m4: define_mpn(divexact_by3).
+       * configure.in (gmp_mpn_functions): Add diveby3.
+
+       * mpn/x86/pentium/sqr_basecase.asm: A few better addressing modes.
+
+       * configure.in: Add AC_C_STRINGIZE and AC_CHECK_TYPES((void)).
+       * gmp-impl.h (ASSERT): Use them.
+
+       * mpn/x86/k7/mmx/lshift.asm: New file.
+       * mpn/x86/k7/mmx/rshift.asm: Rewrite simple loop and return value
+       handling, add some pictures.
+
+2000-03-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v8/mul_1.asm: Make PIC actually work.
+       * mpn/sparc32/v8/addmul_1.asm: Likewise.
+
+       * mpn/sparc32/v8/mul_1.asm: Use m4 ifdef, not cpp #if.
+       * mpn/sparc32/v8/addmul_1.asm: Likewise.
+
+       * mpn/asm-defs.m4 (C): New define for comments.
+       * mpn/sparc32: Start comments with `C'.
+
+       * config.guess: Remove `SunOS 6' handling.
+       Recognize sun4m and sun4d architectures under old SunOS.
+
+2000-03-14  Linus Nordberg  <linus@swox.se>
+
+       * configure.in (gmp_srclinks): Set to list of links created by
+       configure.
+       * configure: Regenerate.
+
+       * Makefile.am (libgmp_la_LDFLAGS): Set version info.
+       (DISTCLEANFILES): Include @gmp_srclinks@.
+       * Makefile.in: Regenerate.
+
+2000-03-13  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Remove some changequote's by quoting the strings
+       containing `[]'.
+       Add support for `alpha*-cray-unicos*'.
+       AC_DEFINE `_LONG_LONG_LIMB' instead of passing it in CFLAGS.
+       Conditionalize the assembler syntax checks.
+       * configure: Regenerate.
+       * config.in: Regenerate.
+
+       * acinclude.m4 (GMP_PROG_CCAS): Remove macro.
+       * aclocal.m4: Regenerate.
+
+2000-03-13  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/p6/README: New file.
+
+       * mpn/x86/k6/mul_1.asm: Rewrite, smaller and slightly faster.
+
+       * mpn/lisp/gmpasm-mode.el: Rewrite assembler comment detection and
+       handling.
+
+       * configure.in: Separate mmx directories for each x86 flavour.
+       * configure: Regenerate.
+
+2000-03-12  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/x86-defs.m4 (ALIGN): Supplement definition from
+       config.m4 so as to pad with nops not zeros on old gas.
+
+       * mpn/x86/k7/mmx/copyd.asm: Use plain emms (femms is just an alias
+       for emms now).
+       * mpn/x86/k7/mmx/copyi.asm: Ditto.
+       * mpn/x86/k7/mmx/rshift.asm: Ditto.
+       * mpn/x86/x86-defs.m4: Amend comments.
+
+       * mpn/x86/mod_1.asm: Add comments on speeds.
+
+       * mpn/x86/pentium/mmx/lshift.asm: New file.
+       * mpn/x86/pentium/mmx/rshift.asm: New file.
+       * mpn/x86/pentium/README: Add speeds of various routines.
+
+2000-03-10  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Reorganize.
+       Use AC_CHECK_TOOL to find `ar'.
+       Add post-includes `regmap.m4' and `aix.m4' for AIX targets.
+       asm-syntax.h is not needed for PPC or sparc anymore.
+       (powerpc64-*-aix*): Compiler is always 64-bit. Use `-q64
+       -qtune=pwr3' to xlc and `-maix64 -mpowerpc64' to gcc.  Pass `-X
+       64' to `ar' and `nm'.
+       (pentiummmx): Use GMP_CHECK_ASM_MMX and avoid MMX assembly path if
+       assembler is not MMX capable.
+       (pentium[23]): Likewise.
+       (athlon): Likewise.
+       (k6*): Likewise.
+       * configure: Regenerate.
+
+       * acinclude.m4 (GMP_PROG_CC_WORKS): New macro.
+       (GMP_PROG_CC_FIND): Use GMP_PROG_CC_WORKS instead of
+       AC_TRY_COMPILER.  Make sure that the *first* working 32-bit
+       compiler is used if no 64-bit compiler is found.
+       (GMP_CHECK_ASM_MMX): New macro.
+       * aclocal.m4: Regenerate.
+
+       * Makefile.in: Regenerate.  (CC_TEST removed.)
+       * mpf/Makefile.in: Likewise.
+       * mpn/Makefile.in: Likewise.
+       * mpq/Makefile.in: Likewise.
+       * mpz/Makefile.in: Likewise.
+       * mpf/tests/Makefile.in: Likewise.
+       * mpq/tests/Makefile.in: Likewise.
+       * mpz/tests/Makefile.in: Likewise.
+
+       * acconfig.h (_LONG_LONG_LIMB): Add.
+
+       * gmp-impl.h: Include config.h only if HAVE_CONFIG_H is defined.
+
+2000-03-09  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/pentium/mul_basecase.S: Small speedup by avoiding an AGI.
+
+       * mpn/x86/k7/mmx/copyd.asm: Tiny speedup by avoiding popl.
+       * mpn/x86/k7/mmx/copyi.asm: Ditto.
+       * mpn/x86/k7/mul_basecase.asm: Ditto.
+
+2000-03-07  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Better recognize POWER/PowerPC processor type.
+
+2000-03-07  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/addsub_n.c: Use HAVE_NATIVE_* now in config.h.
+
+       * mpn/asm-defs.m4: Add comments about SysV m4.
+       (m4_log2): Don't use <<.
+       (m4_lshift,m4_rshift): New macros.
+
+2000-03-06  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/regmap.m4: Map cr0 => `0', etc.
+
+2000-03-06  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/tests/ref.c (refmpn_divexact_by3): New function.
+       * mpn/tests/ref.h: Prototype.
+
+       * acconfig.h (WANT_ASSERT): New define.
+       * configure.in (--enable-assert): Turn on WANT_ASSERT.
+       * assert.c: New file.
+       * Makefile.am: Add to build.
+       * gmp-impl.h (ASSERT): New macro.
+       (ASSERT_NOCARRY) Renamed from assert_nocarry.
+       (MPZ_CHECK_FORMAT): Use ASSERT_ALWAYS.
+       * mpn/tests/ref.c: Use ASSERT.
+       * mpf/get_str.c: Use ASSERT_ALWAYS.
+       * mpf/set_str.c: Remove old assert macro.
+
+       * mpn/x86/x86-defs.m4 (cmovnz_ebx_ecx): New macro.
+       * mpn/x86/p6/aorsmul_1.asm: Use cmov.
+
+       * mpn/x86/lshift.S: Use %dl with testb, not %edx. No object code
+       change, testb was still getting generated.
+       * mpn/x86/rshift.S: Ditto.
+
+2000-03-03  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h: Add IA-64 support.
+
+       * mpn/powerpc32: Misc cleanups.
+       * mpn/powerpc32/aix.m4: New file (mainly by Linus).
+       * mpn/powerpc64/aix.m4: New file (mainly by Linus).
+       * mpn/powerpc64: Translate `.S' files to `.asm'.
+
+       * configure.in: Fix tyops.
+       * configure: Regenerate.
+
+2000-03-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc32/regmap.m4: New file.
+       * mpn/powerpc32: Translate `.S' files to `.asm'.
+       * configure.in: Use mpn/powerpc32/regmap.m4 for powerpc targets
+       except some weird ones.
+
+2000-03-03  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/lisp/gmpasm-mode.el: Suppress postscript comment prefixes in
+       filladapt.
+
+       * mpn/x86/pentium/sqr_basecase.asm: New file.
+       * mpn/x86/pentium/gmp-mparam.h (KARATSUBA_SQR_THRESHOLD): Update.
+
+       * configure.in: Add --enable-assert, enable k6 logops functions.
+
+       * mpn/x86/k6/mmx/copyi.asm: Use m4 for divide, not as.
+       * mpn/x86/k6/mmx/copyd.asm: Ditto.
+       * mpn/x86/README.family: Add a note on this.
+
+2000-03-02  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k6/aors_n.asm: Don't use stosl.
+       * mpn/x86/copyi.asm: Use cld to clear direction flag.
+       * mpn/x86/divrem_1.asm: Ditto.
+       * mpn/x86/README.family: Add a note on this.
+
+       * mpn/x86/k6/mmx/copyi.asm: Rewrite.
+       * mpn/x86/k6/mmx/copyd.asm: New file.
+       * mpn/x86/k6/README: Update, and small amendments.
+
+       * mpn/x86/x86-defs.m4 (Zdisp): New macro.
+       * mpn/asm-defs.m4 (m4_stringequal_p): New macro.
+
+       * mpn/x86/p6/aorsmul_1.asm: Use Zdisp to force zero displacements.
+       * mpn/x86/k6/aorsmul_1.asm: Ditto.
+       * mpn/x86/k6/mul_1.asm: Ditto.
+       * mpn/x86/k6/mul_basecase.asm: Ditto.
+       * mpn/x86/k7/aors_n.asm: Ditto.
+       * mpn/x86/k7/aorsmul_1.asm: Ditto.
+       * mpn/x86/k7/mul_1.asm: Ditto.
+       * mpn/x86/k7/mul_basecase.asm: Ditto.
+       * mpn/x86/README.family: Add a note on this.
+
+2000-02-27  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/generic/divrem.c (mpn_divrem_classic): Patch to avoid gcc
+       2.7.2.3 i386 register handling bug.
+
+       * mpn/x86/k6/aors_n.asm: Rewrite.
+       * mpn/x86/k6/mmx/lshift.asm: Rewrite.
+       * mpn/x86/k6/mmx/rshift.asm: Rewrite.
+       * mpn/x86/k6/README: Update.
+
+       * mpn/x86/k7/mmx/copyd.asm: Support size==0.
+       * mpn/x86/k7/mmx/copyi.asm: Ditto.
+       * mpn/x86/k6/mmx/copyi.asm: Ditto.
+       * gmp-impl.h: Comment size==0 allowed in MPN_COPY_INCR and
+       MPN_COPY_DECR.
+       * configure.in: Enable x86 copyi, copyd; add k6 com_n.
+
+2000-02-25  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (power): Move factorial handing code from `factor'
+       to `power'.
+
+       * demos/factorize.c (factor_using_pollard_rho): Move resetting of `c'
+       to before checking for a non-zero gcd.
+
+2000-02-25  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/asm-defs.m4 (MULFUNC_PROLOGUE): New macro by Linus.
+       * mpn/x86/k6/aors_n.asm: Use MULFUNC_PROLOGUE.
+       * mpn/x86/k6/aorsmul_1.asm: Ditto.
+       * mpn/x86/k7/aors_n.asm: Ditto.
+       * mpn/x86/k7/aorsmul_1.asm: Ditto.
+       * mpn/x86/p6/aorsmul_1.asm: Ditto.
+
+       * mpn/tests/ref.c (refmpn_copyi,refmpn_copyd): Allow size==0.
+
+       * gmp-impl.h: Move mpn_and_n, mpn_andn_n, mpn_com_n, mpn_ior_n,
+       mpn_iorn_n, mpn_nand_n, mpn_nior_n, mpn_xor_n and mpn_xorn_n here
+       from gmp.h.  Use HAVE_NATIVE_mpn_* to make these functions or
+       inlines.
+
+       * gmp-impl.h: Move mpn_copyd, mpn_copyi here from gmp.h.
+       * gmp-impl.h (MPN_COPY_INCR): Use mpn_copyi if available.
+       * gmp-impl.h (MPN_COPY_DECR): Use mpn_copyd if available.
+
+       * mpn/x86/k6/mmx/com_n.asm: Moved into mmx subdirectory.
+       * mpn/x86/k6/mmx/copyi.asm: Ditto.
+       * mpn/x86/k6/mmx/lshift.asm: Ditto.
+       * mpn/x86/k6/mmx/rshift.asm: Ditto.
+       * mpn/x86/k7/mmx/rshift.asm: Ditto.
+       * mpn/x86/k6/mmx/logops_n.asm: New file.
+       * configure.in (k6*-*-*): Add logops_n.asm.
+       * mpn/x86/k6/README: Update.
+
+       * mpn/x86/k7/mmx/copyi.asm: New file.
+       * mpn/x86/k7/mmx/copyd.asm: New file.
+       * mpn/x86/k7/README: Update.
+
+2000-02-24  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/x86-defs.m4 (femms): Generate emms if 3dnow not available.
+       * mpn/x86/x86-defs.m4 (FRAME_popl): New macro.
+
+       * Makefile.am: Add info_TEXINFOS = gmp.texi
+
+       * mpn/x86/divrem_1.asm: Moved from mpn/x86/k6, allow size==0,
+       conditionalize loop versus decl/jnz.
+       * mpn/x86/mod_1.asm: Ditto.
+       * mpn/x86/divmod_1.asm: Removed.
+       * gmp.texi (mpn_divrem_1,mpn_mod_1): Add that size==0 is allowed.
+       * mpn/tests/ref.c (refmpn_divrem_1c,etc): Allow size==0.
+
+       * mpn/x86/k6/aors_n.asm: Avoid gas 1.92.3 leal displacement
+       expression problem.
+       * mpn/x86/k6/aorsmul_1.asm: Ditto.
+       * mpn/x86/k6/mul_1.asm: Ditto.
+       * mpn/x86/k6/mul_basecase.asm: Ditto
+       * mpn/x86/k7/aors_n.asm: Ditto.
+       * mpn/x86/k7/aorsmul_1.asm: Ditto.
+       * mpn/x86/k7/mul_1.asm: Ditto.
+       * mpn/x86/k7/mul_basecase.asm: Ditto.
+       * mpn/x86/k7/rshift.asm: Ditto.
+       * mpn/x86/p6/aorsmul_1.asm: Ditto.
+       * mpn/x86/README.family: Describe problem.
+
+2000-02-24  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_LSYM_PREFIX): Add dummy symbol to
+       testcase to avoid nm failure.  Try nm before piping to grep.
+
+       * acconfig.h: Undef HAVE_NATIVE_func for every mpn function found
+       in gmp.h.
+
+       * configure.in: Invoke AC_CONFIG_HEADERS.
+       Don't invoke AM_CONFIG_HEADER; it makes autoconf confused.
+       Dig out entry points declared in assembly code and AC_DEFINE proper
+       HAVE_NATIVE_func.
+
+       * mpn/asm-defs.m4 (MULFUNC_PROLOGUE): New macro.
+
+       * mpn/x86/p6/aorsmul_1.asm: Use MULFUNC_PROLOGUE.
+       * mpn/x86/k6/aors_n.asm: Likewise.
+
+       * Makefile.am (EXTRA_DIST): Add config.in; needed when we don't
+       use AM_CONFIG_HEADER in configure.in.
+
+       * mpn/Makefile.am (INCLUDES): Add `-I..' for config.h and
+       gmp-mparam.h.
+       * mpf/Makefile.am: Likewise.
+       * mpq/Makefile.am: Likewise.
+       * mpz/Makefile.am: Likewise.
+
+       * mpf/tests/Makefile.am (INCLUDES): Add `-I../..' for config.h and
+       gmp-mparam.h.
+       * mpq/tests/Makefile.am: Likewise.
+       * mpz/tests/Makefile.am: Likewise.
+
+       * configure: Regenerate.
+       * aclocal.m4: Regenerate.
+       * config.in: Regenerate.
+       * Makefile.in: Regenerate.
+       * mpf/Makefile.in: Regenerate.
+       * mpn/Makefile.in: Regenerate.
+       * mpq/Makefile.in: Regenerate.
+       * mpz/Makefile.in: Regenerate.
+       * mpf/tests/Makefile.in: Regenerate.
+       * mpq/tests/Makefile.in: Regenerate.
+       * mpz/tests/Makefile.in: Regenerate.
+
+2000-02-23  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/addmul_1.S: Amend comments, this code no longer used by
+       PentiumPro.
+       * mpn/x86/submul_1.S: Ditto.
+
+       * mpn/x86/k6/com_n.asm: Rewrite, smaller but same speed.
+
+       * mpn/x86/addmul_1.S: Add PROLOGUE and EPILOGUE to get .type and
+       .size for ELF.  Rename #define size to n to avoid .size.
+       * mpn/x86/lshift.S: Ditto.
+       * mpn/x86/mul_1.S: Ditto.
+       * mpn/x86/mul_basecase.S: Ditto.
+       * mpn/x86/rshift.S: Ditto.
+       * mpn/x86/submul_1.S: Ditto.
+       * mpn/x86/udiv.S: Ditto.
+       * mpn/x86/umul.S: Ditto.
+       * mpn/x86/pentium/add_n.S: Ditto.
+       * mpn/x86/pentium/addmul_1.S: Ditto.
+       * mpn/x86/pentium/lshift.S: Ditto.
+       * mpn/x86/pentium/mul_1.S: Ditto.
+       * mpn/x86/pentium/mul_basecase.S: Ditto.
+       * mpn/x86/pentium/rshift.S: Ditto.
+       * mpn/x86/pentium/sub_n.S: Ditto.
+       * mpn/x86/pentium/submul_1.S: Ditto.
+
+2000-02-22  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_INIT): Use temporary file cnfm4p.tmp for
+       post-defines.
+       (GMP_FINISH): Ditto.
+       (GMP_DEFINE): Add third optional argument specifying location in
+       outfile.
+       (GMP_DEFINE_RAW): New macro.
+       * aclocal.m4: Regenerate.
+
+       * configure.in: Add `HAVE_TARGET_CPU_$target_cpu' using
+       GMP_DEFINE_RAW.
+       * configure: Regenerate.
+
+       * mpz/tests/Makefile.am: New test t-root.
+       * mpz/tests/Makefile.in: Regenerate.
+
+2000-02-22  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/root.c: Complete rewrite; still primitive, but at least correct.
+       * mpz/tests/t-root.c: New test.
+
+2000-02-22  Kevin Ryde  <kevin@swox.se>
+
+       * mpn/x86/k7/mul_basecase.asm: New file.
+       * mpn/x86/k7/README: Add mpn_mul_basecase speed.
+       * mpn/x86/k7/gmp-mparam.h: New file.
+
+       * mpn/x86/x86-defs.m4 (loop_or_decljnz,cmov_bytes): New macros.
+       * mpn/asm-defs.m4 (m4_ifdef_anyof_p): New macro.
+
+       * mpn/x86/k6/aorsmul_1.asm: New file.
+       * mpn/x86/k6/addmul_1.S: Removed (was a copy of pentium version).
+       * mpn/x86/k6/submul_1.S: Removed (was a copy of pentium version).
+
+       * mpn/x86/p6/aorsmul_1.asm: Use OPERATION_addmul_1 and
+       OPERATION_submul_1.
+       * mpn/x86/k6/aors_n.asm: Use OPERATION_add_n and OPERATION_sub_n.
+       * configure.in: Declare multi-function files for k6 and p6.
+
+       * configure.in: Add HAVE_TARGET_CPU_$target_cpu for config.m4.
+       * mpn/asm-defs.m4 (define_not_for_expansion): New macro.
+
+       * mpn/generic/divrem_1n.c (__gmpn_divrem_1n): New file, split from
+       mpn/generic/divrem_1.c.
+       * mpn/generic/divrem_1.c: Ditto.
+       * configure.in (gmp_mpn_functions): Ditto.
+
+2000-02-21  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp.h: Undo 1996-10-06 NeXT change, it was clearly improperly
+       written.
+
+2000-02-21  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Link <src>/mpn/asm-defs.m4 to <build>mpn/asm.m4.
+       * configure: Regenerate.
+
+2000-02-21  Linus Nordberg  <linus@swox.se>
+
+       * mpn/x86/k7/aorsmul_1.asm: Change OPERATION_ADDMUL -->
+       OPERATION_addmul_1.  Change OPERATION_SUBMUL -->
+       OPERATION_submul_1.
+
+       * mpn/x86/k7/aors_n.asm: Change OPERATION_ADD --> OPERATION_add_n.
+       Change OPERATION_SUB --> OPERATION_sub_n.
+
+       * mpn/Makefile.am: Pass -DOPERATION_$* to preprocessors.
+       * mpn/Makefile.in: Regenerate.
+
+       * configure.in: Symlink mpn/asm-defs.m4 to build-dir/mpn.  Link
+       multi-function files to mpn/<function>.asm and remove function
+       name from `gmp_mpn_functions'.
+       * configure: Regenerate.
+
+       * acinclude.m4 (GMP_FINISH): Tell user what we're doing.
+       * aclocal.m4: Regenerate.
+
+2000-02-21  Kevin Ryde  <kevin@swox.se>
+
+       * gmp-impl.h: Rename __gmpn_mul_basecase to mpn_mul_basecase and
+       __gmpn_sqr_basecase to mpn_sqr_basecase, remove __gmpn prototypes.
+       * mpn/x86/mul_basecase.S: Ditto.
+       * mpn/x86/pentium/mul_basecase.S: Ditto.
+
+       * configure.in (gmp_m4postinc): Use x86-defs.m4 on athlon-*-* too.
+
+2000-02-20  Kevin Ryde  <kevin@swox.se>
+
+       * acinclude.m4 (GSYM_PREFIX): Drop $1, change by Linus.
+       * mpn/asm-defs.m4 (PROLOGUE,EPILOGUE): Use GSYM_PREFIX as a
+       string, change by Linus.
+       * mpn/x86/x86-defs.m4: Use GSYM_PREFIX as a string.
+
+       * mpn/x86/k6/gmp-mparam.h: New file.
+       * mpn/asm-defs.m4 (m4_warning): New macro.
+
+       * mpn/x86/README: Amendments per new code and directories.
+       * mpn/x86/README.family: New file.
+       * mpn/x86/k6/README: New file.
+       * mpn/x86/k7/README: New file.
+
+       * mpn/generic/mul_n.c: Rename __gmpn_mul_basecase to
+       mpn_mul_basecase and __gmpn_sqr_basecase to mpn_sqr_basecase.
+       * mpn/generic/mul_basecase.c: Ditto.
+       * mpn/generic/sqr_basecase.c: Ditto.
+       * mpn/generic/mul.c: Ditto.
+
+2000-02-19  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Don't try to symlink more than one multi-func
+       file.
+       * configure: Regenerate.
+
+2000-02-18  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_UNDERSCORE): GMP_DEFINE
+       `GSYM_PREFIX'.  Run ACTIONs even when value is found in cache.
+       (GMP_CHECK_ASM_ALIGN_LOG): GMP_DEFINE `ALIGN'.  Run ACTIONs even
+       when value is found in cache.
+       * aclocal.m4: Regenerate.
+
+       * configure.in: Don't define GSYM_PREFIX or ALIGN.
+       Add mechanism for multi-function files.
+       * configure: Regenerate.
+
+2000-02-18  Kevin Ryde  <kevin@swox.se>
+
+       * configure.in (gmp_m4postinc): Enable x86-defs.m4.
+       * mpn/x86/k7/mul_1.asm: Fix include.
+       * mpn/x86/k6/mul_basecase.S: Removed (copy of the pentium version).
+       * mpn/x86/k6/mul_basecase.asm: New file.
+       * mpn/x86/k6/sqr_basecase.asm: New file.
+       * mpn/x86/k6/com_n.asm: New file.
+       * mpn/x86/k6/copyi.asm: New file.
+       * gmp.texi (Low-level Functions): Clarify mpn overlaps permitted.
+       * gmp-impl.h (MPN_OVERLAP_P): New macro.
+       * gmp-impl.h (assert_nocarry): New macro.
+       * mpn/tests/ref.c: New file, based in part on other mpn/tests/*.c.
+       * mpn/tests/ref.h: New file.
+
+2000-02-17  Linus Nordberg  <linus@swox.se>
+
+       * Makefile.am (dist-hook): Don't include any emacs backup files
+       (*.~*) in dist.
+       * Makefile.in: Regenerate.
+
+2000-02-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/mul_1.asm: Use `rd' to get current PC; get rid of
+       getpc function.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+
+2000-02-17  Kevin Ryde  <kevin@swox.se>
+
+       * gmp.h: Add prototypes and defines for mpn_and_n, mpn_andn_n,
+       mpn_com_n, mpn_copyd, mpn_copyi, mpn_ior_n, mpn_iorn_n,
+       mpn_mul_basecase, mpn_nand_n, mpn_nior_n, mpn_sqr_basecase,
+       mpn_xor_n, mpn_xorn_n.
+
+       * mpn/asm-defs.m4: Many additions making up initial version.
+       * mpn/asm-defs.m4 (L): Use defn(`LSYM_PREFIX').
+       * mpn/x86/x86-defs.m4: New file.
+       * mpn/x86/k6/aors_n.asm: New file.
+       * mpn/x86/k6/divmod_1.asm: New file.
+       * mpn/x86/k6/divrem_1.asm: New file.
+       * mpn/x86/k6/lshift.S: Removed (was a copy of the pentium version).
+       * mpn/x86/k6/lshift.asm: New file.
+       * mpn/x86/k6/mod_1.asm: New file.
+       * mpn/x86/k6/mul_1.S: Removed (was a copy of the pentium version).
+       * mpn/x86/k6/mul_1.asm: New file.
+       * mpn/x86/k6/rshift.S: Removed (was a copy of the pentium version).
+       * mpn/x86/k6/rshift.asm: New file.
+       * mpn/x86/k7/aors_n.asm: New file.
+       * mpn/x86/k7/aorsmul_1.asm: New file.
+       * mpn/x86/k7/mul_1.asm: New file.
+       * mpn/x86/k7/rshift.asm: New file.
+       * mpn/x86/p6/aorsmul_1.asm: New file.
+       * mpn/x86/copyi.asm: New file.
+       * mpn/x86/copyd.asm: New file.
+       * mpn/lisp/gmpasm-mode.el: New file.
+
+2000-02-16  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/v9/mul_1.asm: Conditionalize for PIC.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+       * mpn/sparc32/v8/supersparc/udiv.asm: Likewise.
+       * mpn/sparc32/udiv_fp.asm: Likewise.
+
+2000-02-16  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Add mechanism for including target specific
+       m4-files in config.m4.
+       * configure: Regenerate.
+
+       * acinclude.m4 (GMP_PROG_CCAS): Begin assembly lines (except
+       labels) with a tab character.  HP-UX demands it.
+       (GMP_CHECK_ASM_SIZE): Ditto.
+       (GMP_CHECK_ASM_LSYM_PREFIX): Ditto.
+       (GMP_CHECK_ASM_LABEL_SUFFIX): Set to empty string for HP-UX.
+       (GMP_CHECK_ASM_GLOBL): Change `.xport' --> `.export'.
+       * aclocal.m4: Regenerate.
+
+2000-02-16  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4 (GMP_CHECK_ASM_LSYM_PREFIX): Define LSYM_PREFIX as
+       the prefix only, no argument.
+       * aclocal.m4: Regenerate.
+       * configure: Regenerate.
+
+       * mpn/asm-defs.m4 (L): No argument to LSYM_PREFIX.
+
+2000-02-15  Linus Nordberg  <linus@swox.se>
+
+       * acinclude.m4: Prefix all temporary shell variables with
+       `gmp_tmp_'.
+       (GMP_PROG_CC_FIND): Use defaults if no arguments are passed.
+       Quote use of arguments.
+       (GMP_PROG_CCAS): New macro.
+       (GMP_INIT): New macro.
+       (GMP_FINISH): New macro.
+       (GMP_INCLUDE): New macro.
+       (GMP_SINCLUDE): New macro.
+       (GMP_DEFINE): New macro.
+       (GMP_CHECK_ASM_LABEL_SUFFIX): New macro.
+       (GMP_CHECK_ASM_TEXT): New macro.
+       (GMP_CHECK_ASM_DATA): New macro.
+       (GMP_CHECK_ASM_GLOBL): New macro.
+       (GMP_CHECK_ASM_TYPE): New macro.
+       (GMP_CHECK_ASM_SIZE): New macro.
+       (GMP_CHECK_ASM_LSYM_PREFIX): New macro.
+       (GMP_CHECK_ASM_W32): New macro.
+       * aclocal.m4: Regenerate.
+
+       * configure.in: Find m4 and nm for target.
+       Use new macros to create config.m4.
+       Prefix all temporary shell variables with `tmp_'.
+       Pass `-X 64' to nm for 64-bit PPC target with 64-bit compiler.
+       * configure: Regenerate.
+
+       * Makefile.am (dist-hook): *Really* remove all CVS dirs in
+       dist.
+       * Makefile.in: Regenerate.
+
+       * mpn/Makefile.am: Add target for building .lo and .o from
+       .asm.
+       Pass -DPIC to preprocessor (CPP/m4) when building .lo.
+       Build .o a second time for target .lo, without -DPIC to
+       preprocessor.
+       (SUFFIX): Add `.asm'.
+       (EXTRA_DIST): Add asm-defs.m4.
+       * mpn/Makefile.in: Regenerate.
+
+       * mpf/Makefile.in: Regenerate.
+       * mpf/tests/Makefile.in: Regenerate.
+       * mpq/Makefile.in: Regenerate.
+       * mpq/tests/Makefile.in: Regenerate.
+       * mpz/Makefile.in: Regenerate.
+       * mpz/tests/Makefile.in: Regenerate.
+
+2000-02-15  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc32/udiv_fp.asm: Change `RODATA' to `DATA'.
+       * mpn/sparc32/v8/supersparc/udiv.asm: Likewise.
+       * mpn/sparc32/v9/addmul_1.asm: Likewise.
+       * mpn/sparc32/v9/submul_1.asm: Likewise.
+       * mpn/sparc32/v9/mul_1.asm: Likewise.
+
+       * mpn/sparc32/add_n.asm: Rename `size' -> `n'.
+       * mpn/sparc32/sub_n.asm: Likewise.
+
+       * sparc32: Rename `.s' and `.S' files to `.asm'.
+       * sparc64: Rename `.s' and `.S' files to `.asm'.
+
+2000-02-11  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Adopt to new config.guess sparc naming conventions.
+
+       * config.guess (sun4u:SunOS:5.*:*): Change `sparc9' to `sparcv9'.
+       * config.guess (sun4m:SunOS:5.*:*): Change to sun4[md]:SunOS:5.*:* and
+       change `sparc8' to `sparcv8'.
+
+       * mpn/x86/add_n.S: Use PROLOGUE/EPILOGUE.
+       * mpn/x86/sub_n.S: Likewise.
+
+       * mpn/x86/syntax.h (PROLOGUE): New name for PROLOG.
+       * mpn/x86/syntax.h (EPILOGUE): New name for EPILOG.
+
+2000-02-11  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Better path for 64-bit sparc without 64-bit cc.
+       Change sparc8 --> sparcv8.
+       Change sparc9 --> sparcv9.
+       * configure: Regenerate.
+
+2000-02-10  Linus Nordberg  <linus@swox.se>
+
+       * configure.in: Use Autoconf.
+       * Makefile.am: New file.
+
+       * AUTHORS: New file.
+       * COPYING: New file.
+       * acinclude.m4: New file.
+       * acconfig.h: New file.
+
+       * configure: Generate.
+       * Makefile.in: Generate.
+       * aclocal.m4: Generate.
+       * config.in: Generate.
+
+       * install.sh: Remove.
+       * install-sh: New file from Automake.
+       * missing: New file from Automake.
+       * ltconfig: New file from Libtool.
+       * ltmain.sh: New file from Libtool.
+
+       * mpf/Makefile.am: New file.
+       * mpf/Makefile.in: Generate.
+       * mpf/configure.in: Remove.
+       * mpf/tests/Makefile.am: New file.
+       * mpf/tests/Makefile.in: Generate.
+       * mpf/tests/configure.in: Remove.
+
+       * mpn/Makefile.am: New file.
+       * mpn/Makefile.in: Generate.
+       * mpn/configure.in: Remove.
+
+       * mpq/Makefile.am: New file.
+       * mpq/Makefile.in: Generate.
+       * mpq/configure.in: Remove.
+       * mpq/tests/Makefile.am: New file.
+       * mpq/tests/Makefile.in: Generate.
+       * mpq/tests/configure.in: Remove.
+
+       * mpz/Makefile.am: New file.
+       * mpz/Makefile.in: Generate.
+       * mpz/configure.in: Remove.
+       * mpz/tests/Makefile.am: New file.
+       * mpz/tests/Makefile.in: Generate.
+       * mpz/tests/configure.in: Remove.
+
+2000-02-10  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/add_n.S: Don't use label L0 twice.
+       * mpn/x86/sub_n.S: Likewise.
+
+2000-01-20  Linus Nordberg  <linus@swox.se>
+
+       * demos/pexpr.c: Don't use setup_error_handler() in windoze.
+
+2000-01-19  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (sigaltstack): #define to sigstack for AIX.
+       (setup_error_handler): Don't write to ss_size and ss_flags
+       on AIX.
+
+2000-01-11  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/configure.in (hppa2.0*-*-*): Move assignment of
+       target_makefile_frag to where it belongs.
+
+1999-12-21  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (v9 umul_ppmm): New #define.
+       (v9 udiv_qrnnd): New #define.
+
+1999-12-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divmod_1.c: Use invert_limb.
+       * mpn/generic/mod_1.c: Use invert_limb.
+
+       * gmp-impl.h (invert_limb): Put definition here.
+       * mpn/generic/divrem.c (invert_limb): Delete definition.
+       * mpn/generic/divrem_2.c (invert_limb): Delete definition.
+
+       * gmp.h (mpn_divrem): Inhibit for non-gcc.
+       But declare (undo 1999-11-22 change).
+
+       * gmp-impl.h (DItype,UDItype): Do these also if _LONG_LONG_LIMB.
+
+       * longlong.h: Move 64-bit hppa code out of __GNUC__ conditional.
+
+       * stack-alloc.c (HSIZ): New #define.
+       (__tmp_alloc): Use HSIZ instead of sizeof(tmp_stack).
+
+1999-12-10  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Clean up handling of x86 CPUs: Properly recognize
+       Amd CPUs as unique entities.  Use manufacturer's names of
+       processors ("pentium", etc); still match ambiguous names like
+       "i586", "i686", "p6" but be conservative in interpreting them.
+
+       * configure.in: Recognize x86 CPU types known by config.guess.
+       * mpn/configure.in: Likewise.  Add x86/mmx path component as
+       appropriate.
+       (athlon-*-*): Fix typo.
+
+       * config.guess: Update x86 recog code to intiallly match
+       more than just i386.
+       Call K6-2 and K6-III for "k62" and "k63" respectively.
+
+       * config.guess: Recognize x86 CPU types.
+       Update code for FreeBSD, NetBSD, OpenBSD, Linux.
+
+1999-12-08  Torbjorn Granlund  <tege@swox.com>
+
+       * mpf/pow_ui.c: Avoid final squaring in loop.
+
+1999-12-07  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp-impl.h (udiv_qrnnd_preinv2gen): Prefix local variables with `_'.
+       (udiv_qrnnd_preinv2norm): Likewise.
+       From Kevin Ryde:
+       (HAVE_ALLOCA): #define also if defined (alloca).
+
+1999-12-04  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/tests/add_n.c: Set OPS from CLOCK.
+       * mpn/tests/sub_n.c: Likewise.
+       * mpn/tests/mul_1.c: Likewise.
+       * mpn/tests/addmul_1.c: Likewise.
+       * mpn/tests/submul_1.c: Likewise.
+
+       * mpn/tests/lshift.c: Update from add_n.c.
+       * mpn/tests/rshift.c: Likewise.
+
+1999-12-03  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/powerpc64/copy.S: New file.
+
+1999-12-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/copy.s: New file.
+
+       * mpn/tests/copy.c: New file.
+
+       * mpn/configure.in: Recognize more Amd CPUs; Set special paths for
+       k7 CPU.
+
+       * configure.in: Recognize Amd x86 CPUs.
+
+       * mpz/fdiv_r_2exp.c: In rounding code, read in->_mp_size before
+       writing to res->_mp_size.
+
+       * mpn/powerpc64/*.S: Clean up assembly syntax, add function headers.
+       * mpn/powerpc64/gmp-mparam.h: (KARATSUBA_MUL_THRESHOLD): #define.
+       (KARATSUBA_SQR_THRESHOLD): #define.
+
+       * mpn/tests/add_n.c (main): Only print test number if TIMES==1
+       and not printing.
+       (main): Don't run reference code if NOCHECK.
+       * mpn/tests/sub_n.c: Likewise.
+       * mpn/tests/mul_1.c: Likewise.
+       * mpn/tests/addmul_1.c: Likewise.
+       * mpn/tests/submul_1.c: Likewise.
+
+       * mpn/tests/lshift.c: (main): Only print test number if TIMES==1
+       and not printing.
+       * mpn/tests/rshift.c: Likewise.
+
+1999-11-22  Torbjorn Granlund  <tege@swox.com>
+
+       * gmp.h (mpz_init_set_str): Declare using __gmp_const.
+       (mpz_set_str): Likewise.
+       (mpf_init_set_str): Likewise.
+       (mpf_set_str): Likewise.
+       (mpn_set_str): Likewise.
+       (__gmp_0): Likewise.
+       (mpn_divrem): Remove separate declaration; it's defined later in
+       this file.
+
+       * gmp.h: Replace "defined (__STD__)' by (__STDC__-0) in
+       expressions involving more than one term, to handle Sun's compiler
+       that most helpfully sets __STDC__ to 0.
+       * gmp-impl.h: Likewise.
+       * longlong.h: Likewise.
+
+1999-11-21  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/gmp-mparam.h (KARATSUBA_MUL_THRESHOLD): #define.
+       (KARATSUBA_SQR_THRESHOLD): #define.
+
+       * mpn/sparc64/lshift.s: Compensate stack references for odd stack ptr.
+       * mpn/sparc64/rshift.s: Likewise.
+
+       * mpn/sparc64/addmul_1.s: Propagate carry properly.
+       * mpn/sparc64/submul_1.s: Likewise.
+
+       * mpn/sparc64/sub_n.s: Rewrite.
+
+       * mpn/sparc64/sub_n.s: Get operand order for main subcc right
+       (before scrapping this code for new code).
+
+1999-11-20  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/sparc64/add_n.s: Rewrite.
+
+1999-11-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/syntax.h (PROLOG): New #define.
+       (EPILOG): New #define.
+
+       * gmp.h (mpn_addsub_n): Declare.
+       * gmp.h (mpn_add_nc): Declare.
+       * gmp.h (mpn_sub_nc): Declare.
+       * mpn/powerpc64/addsub_n.S: New file.
+
+1999-11-17  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/gmp-mparam.h
+       (KARATSUBA_MUL_THRESHOLD): Only #define #ifndef.
+       (KARATSUBA_SQR_THRESHOLD): Likewise.
+
+1999-11-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/mul_1.S: Unroll and optimize for P6 and K7.
+
+1999-11-09  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/x86/p6/gmp-mparam.h
+       (KARATSUBA_MUL_THRESHOLD): Only #define #ifndef.
+       (KARATSUBA_SQR_THRESHOLD): Likewise.
+
+1999-11-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/addsub_n.c: New file.
+
+1999-11-02  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Handle alpha:FreeBSD with alpha:NetBSD.
+
+       * configure.in (vax*-*-*): New case.
+       * config/mt-vax: New file.
+       * mpn/vax/add_n.s: Rewrite.
+       * mpn/vax/sub_n.s: Rewrite.
+
+1999-10-31  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/vax/rshift.s: New file.
+       * mpn/vax/lshift.s: New file.
+
+1999-10-29  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Handle k5 and k6.
+       * mpn/configure.in: Recognize k6.
+
+       * mpf/tests/t-get_d.c (LOW_BOUND, HIGH_BOUND): New #defines.
+       (main): Tighten error bounds to 14 digits.
+
+       * longlong.h (default umul_ppmm, when smul_ppmm exists):
+       Rename __m0 => __xm0, __m1 => __xm1.
+       (default smul_ppmm): Likewise.
+
+1999-10-11  Torbjorn Granlund  <tege@swox.com>
+
+       * config.guess: Reverse the test for POWER vs PowerPC.
+       * config.guess (sun4m:SunOS:5.*:*): New case.
+       * config.guess (sun4u:SunOS:5.*:*): New case.
+
+1999-09-29  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divrem_2.c: Clean up comments.
+
+1999-09-23  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/Makefile.in: Use move-if-change when generating binaries.
+       * mpf/tests/Makefile.in: Likewise.
+       * mpq/tests/Makefile.in: Likewise.
+       * mpz/tests/move-if-change: New file.
+       * mpf/tests/move-if-change: New file.
+       * mpq/tests/move-if-change: New file.
+
+       * gmp.h (mpn_incr_u): New macro (from mpn/generic/mul_n.c).
+       (mpn_decr_u): New macro.
+
+       * mpn/generic/mul_n.c (mpn_incr): Delete.
+       * mpn/generic/mul_n.c: Update usages mpn_incr => mpn_incr_u.
+       * mpn/generic/divrem_newt.c: Use mpn_incr_u and mpn_decr_u instead of
+       mpn_add_1 and mpn_sub_1.
+       * mpn/generic/sqrtrem.c: Likewise.
+       * mpz/cdiv_q_ui.c: Likewise.
+       * mpz/cdiv_qr_ui.c: Likewise.
+       * mpz/fdiv_q_ui.c: Likewise.
+       * mpz/fdiv_qr_ui.c: Likewise.
+
+       * mpn/generic/sqrtrem.c: Start single-limb Newton iteration from 18
+       bits.
+
+1999-07-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divrem_1.c (__gmpn_divrem_1n): New function.
+
+       * mpn/generic/divrem_2.c: New file, code from divrem.c, `case 2:'.
+       * mpn/Makefile.in: Compile divrem_2.c.
+       * make.bat: Compile divrem_2.c.
+       * mpn/configure.in (functions): Add divrem_2.
+       * gmp.h: Declare mpn_divrem_2.
+
+       * mpn/generic/divrem.c: Delete special cases, handle just divisors
+       of more than 2 limbs.
+       * gmp.h (mpn_divrem): Call mpn_divrem_1, mpn_divrem_2, as appropriate.
+
+       * mpn/generic/divrem.c: Rework variable usage for better register
+       allocation.
+
+1999-07-26  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/alpha/ev5/add_n.s: Rewrite for better ev6 speed.
+       * mpn/alpha/ev5/sub_n.s: Likewise.
+
+1999-07-21  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (alpha): Define umul_ppmm for cc.
+
+       * gmp-impl.h (DItype, UDItype): Define for non-gcc if _LONGLONG is
+       defined.
+
+1999-07-15  Torbjorn Granlund  <tege@swox.com>
+
+       * longlong.h (powerpc64 count_leading_zeros): Fix typo.
+       (powerpc64 add_ssaaaa): Fix typos.
+       (powerpc64 sub_ddmmss): Fix typos.
+
+1999-07-14  Torbjorn Granlund  <tege@swox.com>
+
+       * mpz/tests/Makefile.in: Pass XCFLAGS when linking.
+       * mpf/tests/Makefile.in: Likewise.
+       * mpq/tests/Makefile.in: Likewise.
+       * mpn/Makefile.in (.S.o): Pass XCFLAGS.
+
+       * longlong.h: Add support for 64-bit PowerPC.
+       * config.sub: Handle "powerpc64".
+       * configure.in: Likewise.
+       * mpn/configure.in: Suppress use of config/t-ppc-aix for now,
+       it seems compiler passes proper options.
+       * mpn/powerpc64/*.S: New files.
+
+       * Makefile.in (FLAGS_TO_PASS): Pass "AR=$(AR)".
+
+1999-07-07  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (factor): Change alloca call to a malloc/free pair.
+
+       * mpn/powerpc32/syntax.h: Add #define's for crN.
+
+       * gmp.h (gmp_rand_algorithm): Remove spurious `,'.
+
+1999-07-05  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/generic/divrem_1.c: Normalize divisor when needed.
+
+1999-07-02  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/configure.in (powerpc*-apple-mach): New configuration.
+       * mpn/powerpc32/*: Add support for apple-macho syntax.
+       * mpn/powerpc32/syntax.h: New file.
+       * gmp-impl.h: Don't use `__attribute__' syntax for Apple's perversion
+       of GCC.
+
+1999-05-26  Linus Nordberg  <linus@swox.se>
+
+       * rand.c (gmp_rand_init): Fix typo.
+
+       * mpn/generic/rawrandom.c (mpn_rawrandom): Count bits, not limbs,
+       to keep track of how many rounds to do in loop.  Clean up
+       temporary allocation.  Update `seedsize' inside loop.  Mask off
+       the correct number of bits from final result.  Init `mcopyp' even
+       when not normalizing `m'.
+
+       * randlc.c (gmp_rand_init_lc): Fix typo (don't call
+       mpz_init_set_ui()).
+
+       * mpn/generic/rawrandom.c (mpn_rawrandom): Set SIZ(s->seed) when
+       reallocating.
+
+       * tests/rand/Makefile (test, bigtest): Add 33-bit tests.
+
+       * tests/rand/gen.c (main): Set precision of variable passed to
+       mpf_urandomb().  Add option `-p'.
+
+1999-05-25  Linus Nordberg  <linus@swox.se>
+
+       * randcm.c: Remove.
+       * randcmui.c: Remove.
+       * Makefile.in: Remove randcm and randcmui.
+       * make.bat: Ditto.
+       * gmp-impl.h: Remove prototypes for __gmp_rand_init_common() and
+       __gmp_rand_init_common_ui().
+       * randlc.c (gmp_rand_init_lc): Don't call
+       __gmp_rand_init_common().
+
+       * randlcui.c (gmp_rand_init_lc_ui): Don't call
+       __gmp_rand_init_common_ui().
+
+       * gmp.h (__gmp_rand_state_struct): Remove unused member `maxval'.
+       * randclr.c (gmp_rand_clear): Remove reference to s->maxval.
+       * randcm.c (__gmp_rand_init_common): Ditto
+
+       * mpn/generic/rawrandom.c (mpn_rawrandom): Don't calculate nlimbs
+       twice.
+
+       * gmp.h (__gmp_rand_dist): Remove.
+
+1999-05-24  Linus Nordberg  <linus@swox.se>
+
+       * mpn/generic/rawrandom.c: Clean up comments.
+
+       * gmp.texi: Add documentation for random number generation.
+
+1999-05-21  Linus Nordberg  <linus@swox.se>
+
+       * gmp.h: Typedef `gmp_rand_state' as an array with one element.
+       Change prototypes accordingly.
+       * gmp-impl.h: Change prototypes using `gmp_rand_state'.
+       * rand.c (gmp_rand_init): Take `gmp_rand_state' as argument
+       instead of a pointer to a `gmp_rand_state'.
+       * mpf/urandom.c (mpf_urandomb): Ditto.
+       * mpz/urandom.c (mpz_urandomb): Ditto.
+       * mpn/generic/rawrandom.c (mpn_rawrandom): Ditto.
+       * randcmui.c (__gmp_rand_init_common_ui): Ditto.
+       * randlc.c (gmp_rand_init_lc): Ditto.
+       * randlcui.c (gmp_rand_init_lc_ui): Ditto.
+       * randui.c (gmp_rand_init_ui): Ditto.
+       * randcm.c (__gmp_rand_init_common): Ditto.
+       * randclr.c (gmp_rand_clear): Ditto.
+
+       * tests/rand/gen.c (main): Pass `s' to rand-funcs instead of address
+       of `s'.
+
+1999-05-20  Linus Nordberg  <linus@swox.se>
+
+       * Makefile.in: Rename randi.c --> rand.c, randi_lc.c --> randlc.c,
+       randicom.c --> randcm.c.  Add randui.c, randcmui.c, randlcui.c.
+       * make.bat: Ditto.
+
+       * gmp.h: Add prototypes for gmp_rand_init_ui() and
+       gmp_rand_init_lc_ui().
+       * gmp-impl.h: Add prototypes for __gmp_rand_init_common() and
+       __gmp_rand_init_common_ui().
+
+       * randlc.c, randcm.c, randclr.c, rand.c: Change #include of
+       <gmp.h> to "gmp.h".
+       * randclr.c: Include stdlib.h for free().
+       * rand.c: Include gmp-impl.h.
+
+1999-05-12  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/configure.in: Put generic m68k alternative last.
+
+1999-05-04  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c (setup_error_handler): Use sigemptyset to create
+       empty set (for portability).
+       (fns): Fix typo '#if #if'.
+       (mpz_eval_expr): Implement FERMAT and MERSENNE.
+
+       * demos/pexpr.c: Cast longjmp argument via long to silent warnings on
+       64-bit hosts.
+
+1999-05-03  Torbjorn Granlund  <tege@swox.com>
+
+       * demos/pexpr.c: Add #defines for GMP 1.x and 2.0 compatibility.
+
+       * demos/pexpr.c (setup_error_handler): New function; take signal
+       handler setup code from main(), with major modifications to use modern
+       signal interface.
+       (main): Remove signal handler setup code; call setup_error_handler.
+
+1999-04-29  Linus Nordberg  <linus@swox.se>
+
+       * tests/rand/findcl.c (main): Add option '-i' for interval factor.
+       Separate v and merit lose figures.  Add '-v' for version.
+
+1999-04-28  Linus Nordberg  <linus@swox.se>
+
+       * tests/rand/statlib.c: Change debugging stuff.
+
+       * tests/rand/gmpstat.h: Add debug values definitions.
+
+       * tests/rand/findcl.c (main): Print low and high merit on startup.
+       Print version string on startup.  Catch SEGV and HUP.  Add option -d
+       for debug.  Fix bug making test for v too hard.
+       (sh_status): New function.
+       (sh_status): Flush stdout.  Add RCSID.
+
+1999-04-27  Linus Nordberg  <linus@swox.se>
+
+       * tests/rand/Makefile (clean): Add target.
+
+1999-04-27  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * tests/rand/stat.c: Include gmpstat.h.
+       Add global int g_debug.
+
+       * tests/rand/spect.c: Include <unistd.h>.
+
+       * tests/rand/findcl.c (main): Input is `m', not all factors of `m'.
+       Print only the very first matching multiplier.  Include <unistd.h>.
+       Flush stdout.  Print "done." when done.
+
+       * tests/rand/spect.c: Move everything but main() to statlib.c.
+
+       * tests/rand/findcl.c: New file.
+
+       * tests/rand/gmpstat.h: New file.
+
+       * tests/rand/statlib.c (merit, merit_u, f_floor, vz_dot,
+       spectral_test): New functions.
+
+1999-04-27  Torbjorn Granlund  <tege@swox.com>
+
+       * mpn/configure.in: Fix typo, "sparc-*)" was "sparc)".
+
+1999-04-21  Torbjorn Granlund  <tege@swox.com>
+
+       * config.sub: Recognize ev6.
+
+1999-04-12  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * urandom.c: Split up into randclr.c, randi.c, randi_lc.c,
+       randicom.c.
+       * randclr.c, randi.c, randi_lc.c, randicom.c: New files.
+       * Makefile.in: Remove urandom.  Add randclr, randi, randi_lc,
+       randicom.
+       * make.bat: Ditto
+
+1999-03-31  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * configure.in (sparc9-*-solaris2.[789]*, etc): New alternative.
+       * mpn/configure.in: Use mt-sprc9 also for ultrasparc*-*-solaris2*.
+
+1999-03-30  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * urandom.c (__gmp_rand_scheme): Change NULL->0.
+       Include "gmp.h" instead of <gmp.h>.
+
+1999-03-29  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * gmp.h (__gmp_rand_data_lc): Now holds a, c, m instead of scheme
+       struct.
+       (__gmp_rand_lc_scheme_struct): Remove mpz_t's `a' and `m'.
+
+       * tests/rand/stat.c (f_freq): Don't print 2nd level results if doing
+       1st level.
+
+       * tests/rand/gen.c (main): Set default algorithm to mpz_urandomb.
+       (main): Add option -c.
+
+1999-03-24  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * tests/rand/Makefile (GMPINC): Rename to GMPH.
+       (GMPH): Add gmp-mparam.h.
+       (CFLAGS): Add -I$(GMPLIBDIR)/mpn
+
+1999-03-23  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * Makefile.in: Compile top-dir/urandom.c.
+       * make.bat: Ditto.
+
+       * mpn/Makefile.in: Compile rawrandom.c.
+       * make.bat: Ditto.
+
+       * mpn/configure.in (functions): Add rawrandom.
+
+       * gmp.h (__gmp_rand_scheme_struct): Rename to
+       __gmp_rand_lc_scheme_struct.
+       (__gmp_rand_data_lc): Remove member 'n'.  Allocate a
+       __gmp_rand_lc_scheme_struct instead of a pointer to one.
+       Add prototype for gmp_rand_init_lc(), mpn_rawrandom().
+       New prototype for mpz_urandomb().
+
+       * urandom.c: New file.
+       (__gmp_rand_init_common): New function.
+       (gmp_rand_init_lc): New function.
+       (gmp_rand_init): Don't init data_lc->n.  Call gmp_rand_init_lc()
+       and __gmp_rand_init_common().
+       (gmp_rand_clear): Remove reference to data_lc->n.
+
+       * mpz/urandom.c (gmp_rand_init, gmp_rand_clear): Move to new file
+       urandom.c in top-dir.
+       (mpz_urandomb): Add function parameter nbits.  Call mpn_rawrandom().
+
+       * mpf/urandom.c (mpf_urandomb): Call mpn_rawrandom().
+
+       * mpn/generic/rawrandom.c: New file.
+       (mpn_rawrandom): New function.
+
+1999-03-17  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * extract-dbl.c: When packing result, adjust exp when sc == 0.
+
+       * mpf/tests/t-get_d.c: New file.
+       * mpf/tests/Makefile.in: Compile t-get_d.c.
+
+1999-03-16  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * mpz/urandom.c (__gmp_rand_scheme): Add extra braces around the
+       mpz_t members.
+
+       * make.bat: Compile mpz/urandom.c and mpf/urandom.c
+
+       * tests/rand/statlib.c (ks_table): Use mpf_pow_ui() and exp().
+
+       * tests/rand/gen.c: Include unistd.h for getopt.
+
+1999-03-15  Linus Nordberg  <linus.nordberg@canit.se>
+
+       * mpz/urandom.c (gmp_rand_init): New function.
+       (gmp_rand_clear): New function.
+       (mpz_urandomb): New function.
+
+       * mpz/Makefile.in: Compile urandom.c
+
+       * mpf/urandom.c (mpf_urandomb): New function.
+
+       * mpf/Makefile.in: Compile urandom.c.
+
+       * gmp.h (__gmp_rand_state_struct, __gmp_rand_scheme_struct): New
+       structs for randomization functions.
+       (gmp_rand_dist, gmp_rand_alogrithm): New enums for randomization
+       functions.
+       (mpz_urandomb, mpf_urandomb): Add prototype.
+       (gmp_rand_init, gmp_rand_clear): Add prototype.
+
+       * tests/rand/gen.c, stat.c, statlib.c, statlib.h: New files.
+       * tests/rand/Makefile, tests/rand/ChangeLog: New files.
+
+1999-03-15  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * .gdbinit: New file.
+
+       * mpz/dump.c: New file.
+       * mpz/Makefile.in: Compile dump.c.
+       * make.bat: Likewise.
+       * gmp.h (mpz_dump): Declare.
+
+1999-03-14  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/tests/reuse.c: Also test mpz_invert and mpz_divexact.
+
+       * mpz/tests/convert.c: Update to GMP 2 variable syntax.
+
+1999-03-13  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/README: New file.
+       * mpz/README: New file.
+
+       * mpf/pow_ui.c: New file.
+       * mpf/Makefile.in: Compile pow_ui.c.
+       * make.bat: Likewise.
+       * gmp.h (mpf_pow_ui): Declare.
+
+1999-03-12  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/configure.in: Stage 1 of rewrite.
+       * mpn/underscore.h: New name for bsd.h.
+       * mpn/sysv.h: Deleted.
+
+       * mpn/m68k/*: Don't include sysdep.h.
+
+       * mpn/pa64/README: New file.
+
+1999-03-11  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/powerpc32/add_n.S: Add support for both AIX and ELF syntax.
+       Renamed from `.s'.
+       * mpn/powerpc32/sub_n.S: Likewise.
+       * mpn/powerpc32/lshift.S: Likewise.
+       * mpn/powerpc32/rshift.S: Likewise.
+       * mpn/powerpc32/mul_1.S: Likewise.
+       * mpn/powerpc32/addmul_1.S: Likewise.
+       * mpn/powerpc32/submul_1.S: Likewise.
+
+       * mpn/powerpc32/umul.S: New file.
+       * mpn/sparc32/v8/umul.S: New file.
+       * mpn/sparc32/umul.S: New file.
+       * mpn/x86/umul.S: New file.
+       * mpn/x86/udiv.S: New file.
+
+       * mpn/Makefile.in (mul_basecase.o): Delete rule.
+
+1999-02-22  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * configure.in (hppa2.0*-*-*): Force use of GCC.
+
+       * extract-dbl.c: Handle IEEE denormalized numbrs.  Clean up.
+
+1998-12-02  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/Makefile.in (CCAS): New macro.
+       (.s.o): Use CCAS.
+       (.S.o): Likewise.
+
+       * mpn/Makefile.in (mul_basecase.o): Add dependency.
+       (sqr_basecase.o): Likewise.
+       (mod_1.o): Likewise.
+
+       * demos/pexpr.c (cputime): Test also __hpux.
+       (cleanup_and_exit): Check SIGXCPU only #ifdef LIMIT_RESOURCE_USAGE.
+
+       * mpz/tests/t-2exp.c: Use urandom, not random.
+
+       * mpn/configure.in (arm*-*-*): New alternative.
+
+1998-11-30  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp-impl.h (union ieee_double_extract): Special case for
+       little-endian arm.
+       (LIMBS): Alias for PTR.
+
+1998-11-26  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * longlong.h (m68000 umul_ppmm): Use `muluw', not `mulu'.
+       (m68k stuff): Clean up; add coldfire support.
+
+1998-11-23  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/mips3/gmp-mparam.h (KARATSUBA_MUL_THRESHOLD): #define.
+       (KARATSUBA_SQR_THRESHOLD): #define.
+
+       * mpn/sparc32/v9/README: New file.
+
+1998-11-20  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/x86/README: New file.
+
+       * mpn/arm/gmp-mparam.h: New file.
+       * mpn/pa64/gmp-mparam.h: New file.
+       * mpn/hppa/gmp-mparam.h: New file.
+       * mpn/x86/pentium/gmp-mparam.h: New file.
+       * mpn/sparc32/v9/gmp-mparam.h: New file.
+       * mpn/powerpc32/gmp-mparam.h: New file.
+       * mpn/x86/p6/gmp-mparam.h: New file.
+
+       * mpn/alpha/gmp-mparam.h (KARATSUBA_MUL_THRESHOLD): #define.
+       (KARATSUBA_SQR_THRESHOLD): #define.
+
+       * mpn/configure.in: Point to x86/p6 when appropriate.
+
+       * mpn/power/umul.s: New file.
+       * mpn/power/sdiv.s: New file.
+       * mpn/pa64/addmul_1.S: New file.
+       * mpn/pa64/submul_1.S: New file.
+       * mpn/pa64/mul_1.S: New file.
+       * mpn/pa64/udiv_qrnnd.c: New file.
+       * mpn/pa64/umul_ppmm.S: New file.
+       * mpn/mips2/umul.s: New file.
+       * mpn/m68k/mc68020/umul.s: New file.
+       * mpn/m68k/mc68020/udiv.s: New file.
+       * mpn/hppa/hppa1_1/umul.s: New file.
+       * mpn/alpha/umul.s: New file.
+       * mpn/a29k/udiv.s: New file.
+       * mpn/a29k/umul.s: New file.
+
+1998-11-17  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/x86/mul_basecase.S: New file for non-pentiums.
+       * mpn/x86/mul_basecase.S: Move to mpn/x86/pentium.
+
+1998-11-16  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * make.bat: Compile mul_basecase.c and sqr_basecase.c.
+
+1998-11-10  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/invert.c: Defer writing to parameter `invert' until
+       end.
+
+1998-11-03  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/pa64/udiv_qrnnd.c: Handle more border cases.
+
+1998-10-29  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * insert-dbl.c: Special case biased exponents < 1; Get boundary for
+       Inf right.
+
+       * longlong.h (COUNT_LEADING_ZEROS_NEED_CLZ_TAB): New #define.
+
+1998-10-28  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/powerpc32/submul_1.s: Rewrite, optimizing for PPC604.
+       * mpn/powerpc32/addmul_1.s: Likewise.
+       * mpn/powerpc32/lshift.s: Likewise.
+
+1998-10-23  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * config/mt-sprc9-gcc (XCFLAGS): Add -Wa,-xarch=v8plus.
+
+       * mpn/sparc32/v9/submul_1.s: New file.
+
+1998-10-21  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/config/mt-pa2hpux: New file.
+       * mpn/configure.in (hppa2.0*-*-*): Use new 64-bit code.
+
+       * config.sub: Recognize hppa2.0 as CPU type.
+
+       * longlong.h (64-bit hppa): Add umul_ppmm and udiv_qrnnd.
+       * mpn/pa64/mul_1.S: New file.
+       * mpn/pa64/addmul_1.S: New file.
+       * mpn/pa64/submul_1.S: New file.
+       * mpn/pa64/umul_ppmm.S: New file.
+       * mpn/pa64/udiv_qrnnd.c: New file.
+
+1998-10-20  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/pprime_p.c: Pass 1L, not 1, to mpz_cmp_ui.
+
+       * mpz/fdiv_q_2exp.c: Cast `long' argument to `mp_limb_t' for mpn calls.
+       * mpz/gcd_ui.c: Likewise.
+       * mpz/add_ui.c: Likewise.
+       * mpz/sub_ui.c: Likewise.
+
+1998-10-19  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/bdivmod.c: Avoid using switch statement with mp_limb_t
+       index.
+
+1998-10-17  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/sparc32/v9/mul_1.s: Misc cleanups.
+       * mpn/sparc32/v9/addmul_1.s: Misc cleanups.
+
+1998-10-16  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/tests/{add,sub,}mul_1.c: Print xlimb using mpn_print.
+
+       * mpz/tests/t-powm.c (SIZE): Increase to 50.
+       (EXP_SIZE): New parameter; use it for computing exp_size.
+
+1998-10-15  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/divrem_newt.c: Use TMP_ALLOC interface.
+
+       * mpn/generic/sqrtrem.c: Check BITS_PER_MP_LIMB before defining
+       assembly variants of SQRT.
+
+1998-10-14  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/tests: Clean up timing routines.  Don't include longlong.h
+       where it is not needed.
+       (mpn_print): Handle printing when _LONG_LONG_LIMB.
+       * mpn/tests/{add,sub,}mul_1.c: Generate xlimb with mpn_random2
+       and do it whether TIMES != 1 or not.
+
+       * mpn/generic/mul_n.c: Delay assignment of `sign' for lower
+       register pressure.
+
+       * mpn/sparc32/v9/mul_1.s: New file.
+
+       * config/mt-sprc9-gcc: New file.
+       * configure.in: Use it.
+
+       * mpn/configure.in: Use sparc64 for Solaris 2.7 and later with a
+       sparc v9 CPU.
+       * mpn/configure.in: Use sparc32/v9 for Solaris 2.6 or earlier with
+       a sparc v9 CPU.
+
+       * mpf/sub.c: In initial code for ediff == 0, limit precision
+       before jumping to `normalize'.
+
+1998-10-13  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/hppa/hppa2_0/add_n.s: New file.
+       * mpn/hppa/hppa2_0/sub_n.s: New file.
+       * mpn/configure.in: Handle hppa2.0 (32-bit code for now).
+
+       * config.guess: Update from egcs 1.1.
+       (9000/[3478]??:HP-UX:*:*): Properly return 2.0 for all known 2.0
+       machines.
+
+1998-10-07  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/root.c (mpz_root): New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_root): Declare.
+
+       * mpz/perfpow.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_perfect_power_p): Declare.
+
+       * mpz/remove.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_remove): Declare.
+
+       * mpz/bin_ui.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_bin_ui): Declare.
+
+       * mpz/bin_uiui.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_bin_uiui): Declare.
+
+1998-09-16  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * longlong.h: Test for __powerpc__ in addition to _ARCH_PPC.
+
+Sat Sep  5 17:22:28 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/cmp_si.c: Compare most significant mantissa limb before
+       trying to deduce anything from the limb count.
+       * mpf/cmp_ui.c: Likewise.
+
+Tue Aug 18 10:24:39 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/pprime_p.c (mpz_probab_prime_p): Add new code block
+       for doing more dividing.
+
+Sat Aug 15 18:43:17 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/divrem_newt.c: New name for divrem_newton.c.
+       * mpn/Makefile.in: Corresponding changes.
+       * mpn/configure.in: Likewise.
+
+Wed Aug 12 23:07:09 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * config.guess: Handle powerpc for NetBSD.
+
+Tue Jul 28 23:10:55 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/fib_ui.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_fib_ui): Declare.
+
+Wed Jun 17 22:52:58 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * make.bat: Fix typo, `asm-synt.h' => `asm-syntax.h'.
+
+Wed Jun  3 11:27:32 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * config/mt-pwr: New file.
+       * config/mt-ppc: New file.
+       * configure.in: Use the new files.
+
+Tue Jun  2 13:04:17 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/sparc32/v9/addmul_1.s: New file.
+       * mpn/config/mt-sprc9: New file.
+       * mpn/configure.in: Use mt-sprc9.
+
+Tue May 26 11:24:18 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * demos/factorize.c (factor_using_pollard_rho): Pass correct
+       parameters in recursive calls; join the two recursion arms.
+
+       * mpf/set_q.c: Set result sign.
+       When normalizing the numerator, don't allow it to increase in size
+       beyond prec.
+
+Tue May 19 17:28:14 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * demos/factorize.c (factor_using_division): Call fflush
+       also for the factor 2.
+
+Mon May 18 15:51:01 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * make.bat: Pass -fomit-frame-pointer.  Do not pass -g.
+
+Tue May  5 01:42:50 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/Makefile.in (LOCAL_CC): Remove definition.
+
+       * gmp.h: Get rid of GMP_SMALL stuff.
+       * mpz/Makefile.in: Likewise.
+       * mpq/Makefile.in: Likewise.
+       * mpf/Makefile.in: Likewise.
+
+       * mpz/invert.c: Fix typo in comment.
+
+Mon May  4 23:05:32 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/sqrtrem.c: Check that __arch64__ is not defined
+       before defining sparc SQRT.
+
+Mon Apr 20 19:16:17 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/gcdext.c: Allow gp to be NULL.
+
+1998-04-03  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/configure.in: Recognize `alphaev5*', not `alphaev5'.
+
+       * config.guess: Handle CPU variants for NetBSD.
+
+Mon Mar 16 13:07:54 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/pprime_p.c: Use mpn_mod_1/mpn_preinv_mod_1 for computing mod PP,
+       not mpz_tdiv_r_ui (which expects an `unsigned long').
+       (mpz_probab_prime_p): Change type of `r' to mp_limb_t.
+
+Thu Mar 12 17:19:04 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp.h (mpf_ceil, mpf_floor, mpf_trunc): Add declarations.
+
+       * config.guess: Update from FSF version.
+       * config.sub: Likewise.
+
+       * config.guess: Add special handling of alpha-*-NetBSD.
+
+Wed Mar 11 00:55:34 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/inp_str.c: Update from set_str.c.
+       Properly increment `nread' when skipping minus sign.
+
+       * mpz/set_str.c: Check for empty string after having skipped
+       leading zeros.
+
+Mon Mar  9 19:28:00 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/set_str.c: Skip leading zeros.
+
+Wed Mar  4 19:29:16 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp.h (mpz_cmp_si): Cast argument before calling mpz_cmp_ui.
+
+       * demos/factorize.c: Rewrite.
+
+1998-02-04  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * configure.in (i[3456]86* etc): Check if using gcc before
+       choosing mt-x86.
+
+       * configure.in (m68*-*-*): New alternative.
+       * config/mt-m68k: New file.
+
+       * mpn/alpha/invert-limb.s: Put tables in text segment,
+       since not all systems support "rdata".
+
+Wed Feb  4 02:20:57 1998  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp.h (__GNU_MP_VERSION_SNAP): New #define.
+       (__GNU_MP_VERSION_MINOR): Now 1.
+
+Wed Jan 28 22:29:36 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * longlong.h (alpha udiv_qrnnd): #define UDIV_NEEDS_NORMALIZATION.
+
+Wed Jan 28 20:28:19 1998  Torbjorn Granlund  <tege@sophie.matematik.su.se>
+
+       * mpz/pprime_p.c (mpz_probab_prime_p): Delete 59 from tried divisors.
+
+Mon Jan 26 01:39:02 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * mpz/pprime_p.c (mpz_probab_prime_p): Major overhaul: Check small
+       numers specifically; check small factors, then perform a fermat test.
+
+Tue Jan 13 14:58:28 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * longlong.h (alpha udiv_qrnnd): Call __mpn_invert_normalized_limb
+       and udiv_qrnnd_preinv.
+
+Wed Jan  7 01:52:54 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * mpn/configure.in (alpha*, extra_functions): Add invert-limb and
+       remove udiv_qrnnd.
+
+       * mpn/tests/divrem.c: Get allocations right.
+
+       * mpn/generic/divrem.c: Conditionally pre-invert most significant
+       divisor limb.
+
+Tue Jan  6 23:08:54 1998  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * mpn/generic/divrem_1.c: Rename variables to comply to conventions.
+       Make `i' have type `mp_size_t'.
+
+Tue Dec 30 22:21:42 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/tdiv_qr_ui.c: Return the remainder.
+       * mpz/tdiv_r_ui.c: Likewise.
+       * mpz/tdiv_q_ui.c: Likewise.
+       * gmp.h: Change return type of mpz_tdiv_qr_ui, mpz_tdiv_r_ui,
+       mpz_tdiv_q_ui.
+
+       * mpz/tdiv_ui.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_tdiv_ui): Declare.
+
+Fri Nov  7 04:21:15 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/integer.c (FUNC_NAME): Fix bogus test for mpf_trunc.
+
+       * demos/isprime.c: New file.
+
+       Sat Nov  1 19:32:25 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/cmp_abs.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_cmp_abs): Declare.
+
+       * mpz/cmp_abs_ui.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_cmp_abs_ui): Declare.
+
+Sat Sep 27 04:49:52 1997  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * mpz/fdiv_r_2exp.c: Get allocation for `tmp' right.
+
+       * mpz/fdiv_q_2exp.c: In final result adjustment code, handle
+       that intermediate result is zero.
+
+       * mpz/tests/t-2exp.c: New file.
+       * mpz/tests/Makefile.in: Handle t-2exp.c.
+
+Fri Sep 26 16:29:21 1997  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * mpz/divexact.c: Fix typo in test for whether to copy numerator to
+       quotient and move that statement to after handling quotient and
+       denominator overlap.  Misc cleanups.
+
+       * mpn/generic/gcd.c: Change count argument of mpn_lshift/mpn_rshift
+       calls to `unsigned int'.
+       * mpz/divexact.c: Likewise.
+
+Mon Sep 22 02:19:52 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpz/tests/t-powm.c: Decrease `reps' to 2500.
+
+       * mpz/tests/t-pow_ui.c: New file.
+       * mpz/tests/Makefile.in: Handle t-pow_ui.c.
+
+       * mpz/ui_pow_ui.c: Get special cases for exponent and base right.
+
+       * mpz/pow_ui.c: Increase temp space allocation by 1 limb.
+       Split `rsize' into two variables; compute space allocation into
+       `ralloc'.
+
+Sun Sep  7 04:15:12 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpn/pa64/lshift.s: New file.
+       * mpn/pa64/rshift.s: New file.
+       * mpn/pa64/sub_n.s: New file.
+
+Sat Sep  6 19:14:13 1997  Torbjorn Granlund  <tege@gmp.tmg.se>
+
+       * mpn/pa64/add_n.s: New file.
+       * mpn/pa64: New directory.
+
+Tue Aug 19 16:17:09 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpz/swap.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_swap): Declare.
+
+       * mpn/generic/mul_n.c: Push assignment of x and y pointers into the
+       if/else clauses in several places.  (Decreases register pressure.)
+
+Mon Aug 18 03:29:50 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpn/thumb/add_n.s: New file.
+       * mpn/thumb/sub_n.s: New file.
+       * mpn/arm/add_n.s: New file.
+       * mpn/arm/sub_n.s: New file.
+
+       * mpz/powm.c: After mpn_mul_n and mpn_mul calls, adjust product size
+       if most significant limb is zero.
+       * mpz/powm_ui.c: Likewise.
+
+Fri Aug 15 02:13:57 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpn/arm/m/mul_1.s: New file.
+       * mpn/arm/m/addmul_1.s: New file.
+
+       * mpn/powerpc32/mul_1.s: Rewrite.
+
+       * mpn/alpha/mul_1.s: Prefix labels with `.'.
+
+Mon Aug 11 02:37:16 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpn/powerpc32/add_n.s: Rewrite.
+       * mpn/powerpc32/sub_n.s: Rewrite.
+
+Sun Aug 10 17:07:15 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpn/powerpc32/addmul_1.s: Delete obsolete comments.
+       * mpn/powerpc32/submul_1.s: Likewise.
+
+Fri Jul 25 20:07:54 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpz/addmul_ui.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_addmul_ui): Declare.
+
+       * mpz/setbit.c: Add missing code after final `else'.
+
+Tue Jul 22 17:45:01 1997  Torbjorn Granlund  <tege@tunnis.tmg.se>
+
+       * mpn/sh/add_n.s: Fix typo.
+       * mpn/sh/sub_n.s: Likewise.
+
+       * longlong.h (ns32k count_trailing_zeros): Fix typo.
+
+       * insert-dbl.c: Check for exponent overflow and return Inf.
+
+       * mpz/get_d.c: Rewrite to avoid rounding errors.
+
+Thu May 29 11:51:07 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpq/add.c: Swap some usages of tmp1 and tmp2 to make sure
+       their allocation suffices.
+       * mpq/sub.c: Likewise.
+
+Wed Apr 16 02:24:25 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * demos/pexpr.c: New file.
+
+       * mpn/generic/mul_n.c: Misc optimizations from Robert Harley.
+
+       * gmp-impl.h (MPZ_PROVOKE_REALLOC): New #define.
+
+Sat Apr 12 17:54:04 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * mpz/tstbit.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_tstbit): Declare.
+
+       * mpz/tests/logic.c: Use MPZ_CHECK_FORMAT.
+       * mpz/tests/bit.c: New test.
+       * mpz/tests/Makefile.in: Handle bit.c.
+
+       * mpz/ior.c: In -OP2,+OP1 case, normalize OP2 after call to mpn_sub_1.
+
+       * gmp-impl.h (MPZ_CHECK_FORMAT): New #define.
+
+Thu Apr 10 00:30:14 1997  Torbjorn Granlund  <tege@tmg.se>
+
+       * longlong.h (POWER/PowerPC): Test _ARCH_PWR instead of _IBMR2.
+
+Wed Apr  9 18:23:31 1997  Torbjorn Granlund  <tege@pro.tmg.se>
+
+       * gmp-impl.h: Move defaulting of UMUL_TIME and UDIV_TIME from here...
+       * longlong.h: ...to here.
+
+Sun Mar 30 12:16:23 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/next_prime.c: New file.
+
+       * mpn/generic/perfsqr.c: Remove definitions of PP and PP_INVERTED.
+       * gmp-impl.h: Put them here.
+
+Fri Mar 28 08:18:05 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp-impl.h (MPN_COPY_INCR, MPN_COPY_DECR): Define as inline asm for
+       for x86, but leave disabled for now.
+
+Fri Feb 28 02:39:47 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/Makefile.in (.S.o): Pass SFLAGS and CFLAGS also to compiler
+       for assembly phase.
+       (.s.o): Pass SFLAGS.
+
+Wed Feb 26 06:46:08 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/configure.in: For Pentium Pro, use default code, not Pentium
+       optimized code.
+
+       * mpn/x86/addmul_1.S: Unroll and optimize for Pentium Pro.
+       * mpn/x86/submul_1.S: Likewise.
+
+Thu Feb 13 08:26:09 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/Makefile.in: Compile floor.o, ceil.o and trunc.o (from
+       integer.c).
+       * make.bat: Likewise.
+
+Wed Feb  5 05:58:44 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/configure.in (alpha*): Add cntlz to extra_functions.
+
+Wed Feb  4 03:30:45 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/integer.c: New file (supporting mpf_floor, mpf_ceil, mpf_trunc).
+
+Mon Feb  3 14:21:36 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * make.bat: Fix typo, set_dfl_prc => set_dfl_prec.
+
+Sun Feb  2 02:34:33 1997  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/out_str.c: After outputting `-', decrement n_digits.
+
+Wed Jan  8 02:50:20 1997  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/generic/divrem.c: qextra_limbs => qxn.
+
+Wed Dec 18 07:50:46 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/tests/t-tdiv.c (SIZE): Increase to 200.
+
+Tue Dec 17 19:32:48 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/generic/divrem.c (mpn_divrem_classic): New name for mpn_divrem.
+       * gmp.h (mpn_divrem): New function.
+       * mpn/generic/divrem_newton.c: New file.
+       * mpn/configure.in (functions): Add divrem_newton.
+       * make.bat: Likewise.
+
+Thu Dec 12 17:55:13 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * gmp.h (_GMP_H_HAVE_FILE): Test also __dj_include_stdio_h_.
+
+Sat Dec  7 09:40:06 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/alpha/invert-limb.s: New file.
+
+Thu Dec  5 01:25:31 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/ui_pow_ui.c (mpz_pow2): New (static) function.
+       (mpz_ui_pow_ui): Rewrite.
+
+       * make.bat: `pre_mod_1.c' => `pre_mod_.c'.  Fix typo in path to
+       gmp-mpar.h.
+
+Fri Nov 15 00:49:55 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/ui_pow_ui.c: Rewrite for better speed.
+
+Fri Nov  1 16:36:56 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * Makefile.in (recursive make rules): Use `&&' instead of `;' as
+       delimiter.
+
+Fri Oct 25 17:12:36 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * gmp-impl.h (Cray/uxp MPN_COPY): Really declare as inline.
+
+Thu Oct 24 15:08:19 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/fujitsu/rshift.c: Fix typo in loop boundaries.
+
+Fri Oct 18 03:13:54 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/configure.in: Recognize `nextstep' for m68k variants; likewise
+       for x86 variants.
+
+       * mpn/x86/syntax.h (INSND): New macro.
+       * mpn/x86/[lr]shift.S: Use INSND.
+       * mpn/x86/pentium/[lr]shift.S: Likewise.
+       * mpn/config/t-oldgas (SFLAGS): Pass -DOLD_GAS.
+
+       * gmp-impl.h: In code for determining endianness, test also
+       __BIG_ENDIAN__ and __hppa__.  Remove test of __NeXT__.
+
+Wed Oct 16 03:50:34 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpf/set_str.c: Let `prec' determine precision used in
+       exponentiation code; decrease allocation accordingly.
+
+       * mpn/vax: Change `jsob*' to `sob*' in all files.
+
+Tue Oct 15 03:54:06 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * longlong.h (m88110 udiv_qrnnd): Change type of intermediate quotient
+       to DImode (divu.d generates a 64-bit quotient).
+
+       * configure.in (m88110*): Fix typo.
+
+       * mpf/get_str.c: Compute exp_in_base using `double' to avoid overflow.
+
+       * gmp-impl.h (struct bases): Change type of chars_per_bit_exactly from
+       float to double.
+       * mpn/mp_bases.c (__mp_bases): Give 17 digits for chars_per_bit_exactly
+       field.
+
+       * mpf/get_str.c: Let `prec' determine precision used in
+       exponentiation code; decrease allocation accordingly.
+
+Sun Oct 13 03:31:53 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * longlong.h: Major cleanup.
+       (__udiv_qrnnd_c): Compute remainders using multiply and subtract,
+       not explicit `%' operator.
+       (C umul_ppmm): Get rid of a redundant __ll_lowpart.
+
+       * mpz/invert.c: Properly detect all operands that would yield an
+       undefined inverse; make sure the inverse is always positive.
+
+       * mpz/xor.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_xor): Declare.
+
+       * mpz/tests/logic.c: Also test mpz_xor.
+
+       * mpz/lcm.c: Special case for when either operand equals 0.
+
+Sat Oct 12 01:57:09 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/generic/gcd.c (find_a): Don't inline on x86.
+
+       * Makefile.in (CFLAGS): Default to just `-g'.
+
+       * configure.in: Recognize 386 and 486 wherever other x86 cpus are
+       recognized.
+       * configure.in: Use mt-x86 for all x86 cpus.
+       * config/mt-x86: New file.
+
+       * mpn/alpha/cntlz.s: New file.
+
+Tue Oct  8 00:16:18 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * longlong.h: Define smul_ppmm for Fujitsu vpp/uxp.
+       Rewrite umul_ppmm to actually work on the hardware.
+
+       * mpn/x86/sub_n.S: Avoid parens around displacement of `leal'.
+       * mpn/x86/add_n.S: Likewise.
+
+       * mpn/x86/syntax.h (R): Define differently depending on __STDC__.
+
+Mon Oct  7 16:48:08 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * longlong.h: Don't test for __NeXT__ in outer 68k conditional;
+       add test for __m68k__.
+
+Sun Oct  6 00:59:09 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * gmp.h: Declare mpn_random.
+       * make.bat: Compile mpn/generic/random.c.
+
+       * longlong.h: Define umul_ppmm for Fujitsu vpp/uxp.
+
+       * gmp-impl.h: Protect definitions using `__attribute__ ((mode (...)))'
+       with test also for __GNUC_MINOR__.
+
+       * gmp.h: Don't define macros using __builtin_constant_p when using
+       NeXT's compiler.
+
+Fri Oct  4 16:53:50 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/lcm.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h (mpz_lcm): Declare.
+
+Wed Sep 25 00:06:21 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpq/tests/t-cmp_ui.c: Make sure numerator and denominator of `b' is
+       within limits of an `unsigned long int'.
+
+       * mpz/tests/t-powm_ui.c: Change type of exp2 to `unsigned long int'.
+
+Tue Sep 24 18:58:20 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/powm_ui.c: Make result always positive.
+
+       * urandom.h (urandom): Make it return mp_limb_t.
+
+       * gmp-impl.h (CNST_LIMB): New macro.
+       * mpn/mp_bases.c: Use CNST_LIMB.
+       * mpn/generic/hamdist.c (popc_limb): Likewise.
+       * mpn/generic/popcount.c (popc_limb): Likewise.
+       * mpn/generic/perfsqr.c: Likewise.
+
+Fri Sep 20 03:08:10 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/pprime_p.c: When n <= 3, don't clear out n before using it.
+
+Wed Sep 18 11:22:45 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/fujitsu/mul_1.c: New file.
+       * mpn/fujitsu/addmul_1.c: New file.
+       * mpn/fujitsu/sub_n.c: New file.
+       * mpn/fujitsu/add_n.c: Mew file.
+
+Sun Sep 15 03:13:02 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/generic/random.c: New file.
+       * mpn/configure.in (functions): Add `random'.
+
+       * gmp-impl.h (MPN_COPY): Define as annotated inline function for
+       Crays and Fujitsu VPPs.
+
+       * gmp.h (mp_size_t): Define as `int' for non-MPP Cray.
+       (mp_exp_t): Likewise.
+
+       * configure.in: Add support for Fujitsu VPP machines.
+       * mpn/configure.in: Likewise.
+       * config.guess: Likewise.
+       * config.sub: Likewise.
+
+       * mpn/fujitsu/rshift.c: New file.
+       * mpn/fujitsu/lshift.c: New file.
+       * mpn/fujitsu: New directory, for Fujitsu VPP machines.
+
+Wed Sep 11 11:34:38 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/generic/mul_n.c (__gmpn_mul_n): New name for impn_mul_n.
+       Call __gmpn_mul_basecase, not impn_mul_n_basecase; update parameter
+       list to work with __gmpn_mul_basecase.
+       (__gmpn_sqr): New name for impn_sqr_n.
+       Call __gmpn_sqr_basecase, not impn_sqr_n_basecase; update parameter
+       list to work with __gmpn_sqr_basecase.
+       (mpn_mul_n): Update calls to match new names and parameter conventions.
+       * gmp-impl.h (MPN_MUL_N_RECURSE): Likewise.
+       (MPN_SQR_RECURSE): New name for MPN_SQR_N_RECURSE.
+       Update calls to match new names and parameter conventions.
+       * mpn/generic/mul.c: Never perform multiply explicitly here, call
+       __gmpn_mul_basecase instead.
+       Update calls to match new names and parameter conventions.
+
+       * mpn/x86/mul_basecase.S: New file.
+       * mpn/generic/mul_basecase.c: New file.
+       * mpn/generic/sqr_basecase.c: New file.
+
+Wed Sep  4 02:59:21 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/set_str.c: Let `0b' and `0B' mean base 2.
+
+Fri Aug 30 00:44:00 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * longlong.h (x86 umul_ppmm): Work around GCC bug that was
+       triggered by Aug 28 change.
+
+       * mpbsd/min.c (digit_value_in_base): New function.
+
+       * mpz/set_str.c: Refine allocation size computation, use
+       chars_per_bit_exactly instead of chars_per_limb.
+
+       * mpbsd/Makefile.in (.c.o): Add -D_mpz_realloc=_mp_realloc.
+
+Wed Aug 28 02:52:14 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * longlong.h (x86 umul_ppmm): Don't cast result operands.
+       (x86 udiv_qrnnd): Likewise.
+       (default smul_ppmm): Fix typo, umul_ppmm => smul_ppmm.
+       (default umul_ppmm): New #define using smul_ppmm.
+       (vax smul_ppmm): New #define.
+       (vax umul_ppmm): Delete.
+       (POWER umul_ppmm): Delete.
+       (IBM 370 smul_ppmm): New #define.
+       (IBM 370 umul_ppmm): Delete.
+       (IBM RT/ROMP smul_ppmm): New #define.
+       (IBM RT/ROMP umul_ppmm): Delete.
+
+Tue Aug 27 01:03:25 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * gmp-impl.h (__gmp_0): Make it `const'.
+
+       * mpn/Makefile.in (clean mostlyclean): Comment out recursive clean
+       of `tests'.
+
+       * mpn/generic/mul.c: Identify when we do squaring, and call
+       impn_sqr_n_basecase/impn_sqr_n as appropriate.  Use
+       KARATSUBA_MUL_THRESHOLD and KARATSUBA_SQR_THRESHOLD.
+       Don't #define KARATSUBA_THRESHOLD.
+
+       * mpn/generic/mul_n.c: Don't #define KARATSUBA_THRESHOLD.
+       (impn_mul_n, impn_sqr_n): Rewrite, based on code contributed by
+       Robert Harley.
+       (impn_sqr_n_basecase): Rewrite.
+
+       * gmp-impl.h (KARATSUBA_MUL_THRESHOLD): New #define.
+       (KARATSUBA_SQR_THRESHOLD): Likewise.
+       (MPN_SQR_N_RECURSE): Use KARATSUBA_SQR_THRESHOLD.
+       (MPN_MUL_N_RECURSE): Use KARATSUBA_MUL_THRESHOLD.
+
+       * configure.in: Fix typo in last change.
+
+Mon Aug 26 22:25:18 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpn/generic/random2.c: Fix typo, `alpha__' => `__alpha'.
+       * mpf/random2.c: Likewise.
+
+Sun Aug 25 00:07:09 1996  Torbjorn Granlund  <tege@quiet.matematik.su.se>
+
+       * mpz/tests/t-mul.c: Also test squaring.
+
+Fri Aug 16 05:12:08 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mp_clz_tab.c (__clz_tab): Declare as `const'.
+       * version.c (gmp_version): Likewise.
+       * mpn/generic/sqrtrem.c (even_approx_tab, odd_approx_tab): Likewise.
+
+Thu Aug 15 02:34:47 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h: Fix typo, `mips__' => `__mips'.
+
+       * mpf/set_str.c: Allow a number to start with a period, if next
+       position contains a digit.
+
+Tue Aug 13 18:41:25 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/gcdext.c: Get cofactor sign right for negative input operands.
+       Clean up code for computing tt.
+
+       * mpz/invert.c: Get rid of variable `rv'.
+
+       * mpz/divexact.c: Test for zero divisor in special case for zero
+       dividend.
+
+Mon Aug 12 18:04:07 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/?div_*_ui.c: Special case for division by 0.
+       * mpz/tdiv_q.c: Likewise.
+
+Sat Aug 10 14:45:26 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/dmincl.c: Special case for division by 0.
+
+       * mpz/tdiv_*_ui.c: Delete special case for dividend being 0; handle
+       it when computing size after mpn_divmod_1 call.
+
+       * mp_bpl.c: (__gmp_junk): New variable.
+       (__gmp_0): New constant.
+
+       * gmp-impl.h (DIVIDE_BY_ZERO): New #define.
+
+Fri Aug  9 20:03:27 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/divexact.c: Test for dividend being zero before testing
+       for small divisors.
+
+Thu Aug  8 13:20:23 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * configure.in: Require operating system specification for cpus
+       where assembly syntax differs between system.
+
+       * Makefile.in (many targets): Change `-' action prefix to `@'.
+
+       * mpn/Makefile.in: (distclean): Fix typo.
+
+       * mpq/cmp_ui.c: Rename function to _mpq_cmp_ui.
+       (mpq_cmp_ui): #undef deleted.
+       * mpz/cmp_si.c: Rename function to _mpz_cmp_si.
+       (mpz_cmp_si): #undef deleted.
+       * mpz/cmp_ui.c: Rename function to _mpz_cmp_ui.
+       (mpz_cmp_ui): #undef deleted.
+       * Makefile.in: Corresponding changes.
+
+       * mpf/get_prc.c: Return the *highest* precision achievable.
+
+       * mpf/get_str.c: Complete rewrite.
+
+       * mpf/set_str.c (swapptr): New #define.
+       (assert): New #define.
+       * mpf/set_str.c: Set prec to one more than the saved _mp_prec.
+       Misc cleanups.
+
+       * mpz/set_str.c: #include string.h.
+       * mpf/out_str.c: #include string.h.
+       * mpbsd/xtom.c: #include string.h and ctype.h.
+       * mpbsd/mout.c: #include string.h.
+
+Wed Aug  7 11:46:04 EDT 1996  Ken Weber <kweber@mcs.kent.edu>
+
+       * mpn/generic/gcd.c: Reorder mpn_gcd argument list.
+       * mpz/gcd.c: Change call to mpn_gcd.
+       * gmp.texi: Update manual entry on mpn_gcd.
+       * mpn/generic/bdivmod.c: Delete limb cache to make mpn_bdivmod
+       reentrant.
+
+Wed Aug  7 02:15:38 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/get_str.c: Rewrite code for converting integral part of a
+       number with both an integral and fractional part.
+
+       * mpf/set_str.c: Get rid of variable xxx.  New variables madj and radj.
+       In exp_in_base==0 case, add madj to msize for EXP field.
+
+       * mpz/tests/t-gcd.c: Test deleted.  Rename t-gcd2.c to t-gcd.c.
+       Increase reps to 2000.
+       * mpz/tests/t-gcd2.c: Get rid of mpz_refgcd.
+
+       * mpf/set_str.c: Ignore excess limbs in MP,MSIZE.
+
+Thu Jul 25 04:39:10 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/configure.in: Fix typo in setting path, "sparc" => "sparc32".
+
+Wed Jul 24 02:27:02 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/gcdext.c: Reorganize and clean up.  Get rid of all
+       signed limb arithmetic.
+
+Mon Jul 22 02:39:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/gcdext.c (mpn_gcdext): For large enough operands,
+       work with most significant *two* limbs.
+       (div2): New function (two variants).
+       (THRESHOLD): New #define.
+
+       * mpz/gcdext.c: Fix typo in MPZ_TMP_INIT call.
+
+       * longlong.h (alpha UMUL_TIME): Now 30.
+       (alpha UDIV_TIME): Now 350.
+       (x86 UMUL_TIME): Now 10 (let Pentium decide).
+       (SuperSPARC UDIV_TIME): Override default.
+
+       * extract-dbl.c (MP_BASE_AS_DOUBLE): Don't redefine here.
+
+       * extract-dbl.c: New name for extract-double.c.
+       * insert-dbl.c: New name for insert-double.c.
+       * Makefile.in: Corresponding changes.
+       * make.bat: Likewise.
+
+       * mpz/Makefile.in (.c.o): Don't pass non-portable `-f' to cp.
+       * mpq/Makefile.in: Likewise.
+       * mpf/Makefile.in: Likewise.
+
+Sat Jul 20 01:35:18 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/getlimbn.c: Take ABS of integer->_mp_size.
+
+       * mpz/divexact.c: Use mpn_divmod_1 if divisor is a single limb.
+
+Thu Jul 18 00:31:15 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/popcount.c (popc_limb): Use different masking trick
+       for first step (due to David Seal).
+       * mpn/generic/hamdist.c (popc_limb): Likewise.
+
+Wed Jul 17 23:21:48 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/divrem.c: In MPN_COPY_DECR call, copy dsize - 1 limbs.
+
+Sun Jul 14 17:47:46 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * configure.in: Handle sparc9, sparc64, and ultrasparc like sparc8.
+
+Thu Jul 11 14:05:54 1996  J.T. Conklin  <jtc@rtl.cygnus.com>
+
+       * longlong.h (mc680x0): Define umul_ppmm, udiv_qrnnd, sdiv_qrnnd
+       for the '020, '030, '040, and '332.  Define count_leading_zeros
+       for the '020, '030, '040, and '060.
+
+Sun Jul 14 15:24:53 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       From Joe Keane:
+       * mpq/equal.c: Take ABS of num1_size before passing it to mpn_cmp.
+
+Fri Jul 12 17:11:17 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/sqrtrem.c (SQRT): New asm for x86, but leave it
+       disabled for now.
+
+       * mpn/generic/sqrtrem.c: Use MP_BASE_AS_DOUBLE.
+
+Wed Jul 10 03:17:45 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * cre-mparam.c: Delete obsolete file.
+
+       * gmp.h: #define _LONG_LONG_LIMB if __mips && _ABIN32.
+       * longlong.h: Test __mips instead of __mips__.
+
+Sun Jul  7 23:19:13 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * longlong.h (_PROTO): Define, unless already defined.
+       (alpha __udiv_qrnnd): Declare using _PROTO.
+       (hppa __udiv_qrnnd): Likewise.
+       (sparc __udiv_qrnnd): Likewise.
+
+Mon Jul  1 01:44:30 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * config.guess: Update from master version; add Cray x90 handling.
+
+Wed Jun 26 05:35:02 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/power/add_n.s (__mpn_add_n): Work around GAS bug.
+       * mpn/power/sub_n.s (__mpn_sub_n): Likewise.
+
+       * insert-double.c: Rework loop to avoid potential overflow.
+
+       * mpq/get_d.c: For vax, if qsize > N_QLIMBS, ignore excess limbs.
+
+       * mpq/tests/t-get_d.c (SIZE): Special case for vax.
+
+       * gmp.h (mpX_cmp_ui): #define also when ! __GNUC__.
+
+Mon Jun 24 17:13:21 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * longlong.h (vax sdiv_qrnnd): Fix typo.
+
+Sat Jun 15 01:33:33 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h: Support `small' and `large' type and function variants,
+       controlled by GMP_SMALL.
+
+       * mpz/Makefile.in (.c.o): Compile each function twice, for small and
+       large variant.
+       (MPZS_OBJS): New variable.
+       (libmpz.a): Include MPZS_OBJS in archive.
+       * mpf/Makefile.in: Analogous changes.
+       * mpq/Makefile.in: Analogous changes.
+
+       * gmp.h: Prefix all functions with __gmp, to allow namespace-clean
+       internal calls.
+
+       * mp.h: Rip out __MP_SMALL__ stuff.
+       (__mpz_struct): mp_size_t => int.
+
+       * mpz/invert.c: #include "gmp-impl.h".
+       Use MPZ_TMP_INIT, not mpz_init.
+
+       * mpz/gcdext.c: Rewrite to call mpn_gcdext.
+
+Fri Jun 14 18:05:29 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/gcdext.c (s0size): New parameter.
+       * gmp.h (mpn_gcdext): Update prototype.
+
+       * mpn/generic/gcdext.c: Major rewrite.
+
+Mon Jun 10 00:14:27 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/dump.c: Add missing `else'.
+
+Fri Jun  7 03:35:12 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Makefile.in (gmp_toc.html): Pass -expandinfo to texi2html.
+
+Thu Jun  6 19:00:53 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Version 2.0.2 released.
+
+       * install.sh: New file.
+       * Makefile.in (INSTALL): Use install.sh.
+       (install-normal): New name for target `install'.
+       (install): New dummy target.
+
+       * mpz/pow_ui.c: Swap tests for (e == 0) and (bsize == 0).
+       * mpz/ui_pow_ui.c: Swap tests for (e == 0) and (blimb == 0).
+
+       * config/mt-linux (AR_FLAGS): New file.
+       * configure.in: Use config/mt-linux for all linux systems.
+
+Tue Jun  4 03:42:18 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Version 2.0.1 released.
+
+       * mpf/tests/ref.c: Cast result of TMP_ALLOC to the right pointer type.
+
+       * extract-double.c: Test _GMP_IEEE_FLOATS with #if, not plain if.
+
+       * insert-double.c: Don't #include stdlib.h.
+
+       * gmp-impl.h (union ieee_double_extract): Test sparc and __sparc.
+       Do not test __sparc__.
+
+       * mpf/reldiff.c: Change declaration to work around irix5 compiler bug.
+       * mpq/equal.c: Likewise.
+
+       * mpn/generic/gcd.c: Delete spurious comma at end of enumeration.
+
+       * mpn/generic/gcdext.c: Add K&R declaration syntax.
+       * stack-alloc.h: Likewise.
+       * insert-double.c: Likewise.
+       * extract-double.c: Likewise.
+       * mpf/tests/reuse.c: Likewise.
+       * mpz/tests/reuse.c: Likewise.
+       * mpf/tests/t-sub.c: Likewise.
+       * mpf/tests/t-add.c: Likewise.
+       * mpf/tests/t-muldiv.c: Likewise.
+       * mpf/tests/t-conv.c: Likewise.
+       * mpf/tests/ref.c: Likewise.
+
+       * mpn/config/t-oldgas: Renamed from t-freebsd.
+       * mpn/configure.in: Use t-oldgas for freebsd, netbsd, and some linux
+       configurations.
+
+       * mpn/powerpc32/mul_1.s: Really clear cy before entering loop.
+       * mpn/powerpc32/*.s: Fix power/powerpc syntax issues.
+
+       * mpn/config/t-ppc-aix: New file.
+       * mpn/configure.in: Use t-ppc-aix for powerpc like t-pwr-aix for power.
+
+Wed May 29 02:07:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h (mp_bits_per_limb): Change qualifier from `const' to
+       __gmp_const.
+
+       * gmp.h (mpf_init_set_str): Add `const' qualifier for 2nd parameter.
+       * mpf/iset_str.c: Likewise.
+
+Mon May 27 00:15:58 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp-impl.h: Declare __gmp_extract_double.
+
+       * mpz/set_q.c: Delete unused variables.
+
+       * gmp.h (mpq_equal): Declare.
+
+       * mpf/eq.c: mpf_cmp2 -> mpf_eq.
+
+Fri May 24 03:20:44 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/iset_d.c: Don't include <math.h>.
+
+       * insert-double.c (__gmp_scale2): New name for scal2.
+       * mpz/get_d.c: Corresponding change.
+       * mpf/get_d.c: Likewise.
+       * mpq/get_d.c: Likewise.
+       * gmp-impl.h: Declare __gmp_scale2.
+
+       * mpn/generic/scan0.c: Clarify comment.
+
+       * mpz/set_q.c: New file.
+       * Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h: Declare mpz_set_q.
+
+       * insert-double.c: New file.
+       * Makefile.in: Compile it.
+       * make.bat: Likewise.
+
+       * mpz/get_d.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h: Declare mpz_get_d.
+
+       * mpf/get_d.c: New file.
+       * mpf/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h: Declare mpf_get_d.
+
+       * make.bat: Compile things in alphabetical order.
+
+       * gmp-impl.h (MP_BASE_AS_DOUBLE): New #define.
+       (LIMBS_PER_DOUBLE): New #define.
+
+       * extract-double.c: New file.
+       * Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * mpz/set_d.c: Rewrite to use __gmp_extract_double.
+       * mpf/set_d.c: Likewise.
+
+       * mpn/configure.in: Use t-pwr-aix also for aix 3.2.4 and up.
+
+Wed May 22 02:48:35 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp-impl.h: Rework code for defining ieee_double_extract.
+       (IEEE_DOUBLE_BIG_ENDIAN): Macro removed.
+       (_GMP_IEEE_FLOATS): New macro.
+       * mpn/vax/gmp-mparam.h: Delete.
+
+       * mpn/config/t-pwr-aix: New file.
+       * mpn/configure.in: Use t-pwr-aix for aix 4 and later.
+
+Mon May 20 16:30:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h: In code for setting _GMP_H_HAVE_FILE, test more symbols.
+
+       * mpf/tests/t-add.c (oo): Add some `l' printf modifiers.
+       * mpf/tests/t-sub.c (oo): Likewise.
+       * mpf/tests/t-conv.c (oo): Likewise.
+       * mpf/tests/t-sqrt.c (oo): Likewise.
+
+       * mpz/tests/t-mul.c (_mpn_mul_classic): Remove unused variables.
+
+       * mpn/{pyr,i960,clipper}/*.s: Add missing copyright headers.
+
+Fri May 17 02:24:43 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/set_d.c: Call _mpz_realloc.
+
+       * mpq/set_z.c: New file.
+       * mpq/Makefile.in: Compile it.
+       * make.bat: Likewise.
+       * gmp.h: Declare mpq_set_z.
+
+       * mp?/Makefile.in (libmp?.a): Depend on Makefile, not Makefile.in.
+       * mpf/Makefile.in (test): Delete spurious target.
+       * mpq/Makefile.in (test): Likewise.
+
+       * mpf/out_str.c: Use `e' to separate exponent when base <= 10.
+
+       * mpn/configure.in: Treat ultrasparc just like sparc v8,
+       until 64-bit compilers are ready.
+
+       * mpf/set_d.c: Make it work for 64-bit machines.
+
+Thu May 16 20:53:57 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp-impl.h: Set IEEE_DOUBLE_BIG_ENDIAN to 0 for little-endian
+       machines.
+       * mpn/x86/gmp-mparam.h: Delete file.
+
+       * configure.in: Treat microsparc like sparc8.
+
+       * urandom.h: Test __alpha instead of __alpha__, since the former
+       is the standard symbol.
+       * mpn/generic/random2.c: Likewise.
+       * mpf/random2.c: Likewise.
+
+Tue May 14 13:42:39 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)
+
+       * mpz/set_f.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * gmp.h: Declare mpz_set_f.
+
+       * mpf/set_q.c: Simplify expression in rsize == nsize if-then-else arms.
+
+Tue May 14 13:03:07 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)
+
+       * make.bat: Add all new files.
+
+Sun May 12 22:24:36 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/set_z.c: New file.
+       * mpf/Makefile.in: Compile it.
+       * gmp.h: Declare mpf_set_z.
+
+Sat May 11 19:26:25 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h: Declare mpf_set_q.
+
+       * mpf/set_q.c: Compute prec-1 limbs in mpn_divrem call.
+
+Fri May 10 17:37:38 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/set_q.c: New file.
+       * mpf/Makefile.in: Compile it.
+
+       * config.sub: Recognize sparc8.
+
+Wed May  8 09:19:11 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/tests/t-dm2exp.c: New file.
+
+       * mpf/tests/t-add.c: Correct header comment.
+       * mpf/tests/t-sub.c: Likewise.
+       * mpf/tests/t-sqrt.c: Likewise.
+
+       * mpf/div.c: Misc variable name cleanups.
+       * mpf/div_ui.c: Base more closely on mpf/div.c.
+       * mpf/ui_div.c: Likewise.
+
+       * mpz/tests/Makefile.in (check): Depend on Makefile.
+       * mpq/tests/Makefile.in (check): Likewise.
+       * mpf/tests/Makefile.in (check): Likewise.
+
+       * mpf/tests/t-muldiv.c: New file.
+       * mpf/tests/Makefile.in: Compile and run `t-muldiv'.
+       (t-ref.o): Delete spurious rule.
+
+       * mpf/sqrt.c: Properly detect negative input operand.
+
+       * mpf/sqrt_ui.c: Delete spurious header comment.
+       * mpf/sqrt.c: Likewise.
+       * mpz/sqrt.c: Likewise.
+
+       * mpz/tests/reuse.c (main): Read `reps' from command line.
+
+       * mpf/tests/reuse.c: New file.
+       * mpf/tests/Makefile.in: Compile and run `reuse'.
+
+       * mpf/mul_ui.c: Disable code for removing low zero limbs.
+
+       * mpf/div.c: Fix condition for when vp and qp overlaps.
+
+       * mpf/add_ui.c: When sum equals u, copy up to prec+1 limbs.
+
+       * mpf/out_str.c: Don't output '\n' after exponent.
+
+       * mpf/add_ui.c: New special case for when U is completely cancelled.
+
+Wed Apr 24 05:33:28 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Version 2.0 released.
+
+       * All files: Update FSF's address.
+
+       * Makefile.in (gmp_toc.html): New name for gmp.html.
+       (TAGS): Depend on force.
+
+       * mpf/tests/t-conv.c: Pass -base to mpf_set_str.
+
+Sat Apr 20 03:54:06 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Makefile.in (ps): New target, depend on gmp.ps.
+
+Fri Apr 19 14:03:15 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/out_str.c: Print `@' before exponent, not `e'.
+
+       * make.bat: Update from Makefiles.
+
+Thu Apr 18 01:22:05 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/set_str.c: If parameter `base' is negative, expect exponent
+       to be decimal, otherwise in the same base as the mantissa.
+
+Wed Apr 17 17:28:36 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/set_dfl_prec.c: Don't return anything.
+       * gmp.h: Corresponding changes.
+
+       * mpf/set_dfl_prec.c: Use `unsigned long int' for bit counts.
+       * mpf/init2.c: Likewise.
+       * mpf/get_prc.c: Likewise.
+       * mpf/set_prc.c: Likewise.
+       * mpf/set_prc_raw.c: Likewise.
+       * mpz/popcount.c: Likewise.
+       * mpz/hamdist.c: Likewise.
+       * mpz/scan1.c: Likewise.
+       * mpz/scan0.c: Likewise.
+       * mpn/generic/popcount.c: Likewise.
+       * mpn/generic/hamdist.c: Likewise.
+       * mpn/generic/scan1.c: Likewise.
+       * mpn/generic/scan0.c: Likewise.
+       * gmp.h: Likewise.
+
+       * mpf/eq.c: New file, based on mpf/diff.c.
+       * mpf/diff.c: Delete.
+       * mpf/Makefile.in: Corresponding changes.
+       * gmp.h: Likewise.
+
+       * mpf/reldiff.c: New file.
+       * mpf/Makefile.in: Compile it.
+       * gmp.h: Declare mpf_reldiff.
+
+       * mpz/iset_d.c: New file.
+       * mpz/Makefile.in: Compile it.
+       * gmp.h: Declare mpz_init_set_d.
+
+Tue Apr 16 16:28:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Makefile.in (gmp.html): Pass -acc to texi2html.
+
+Mon Apr 15 16:20:24 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/set_str.c: Switch off code for defaulting the base from the
+       leading characters.
+
+       * gmp.h (mp?_sign): Delete.
+       (mp?_sgn): New macros.
+
+Fri Apr 12 17:23:33 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Makefile.in (gmp.dvi): Delete tmp.* at end of rule.
+
+Wed Apr 10 22:52:02 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)
+
+       * mpf/random2.c: Change of `exp' param, mp_size_t => mp_exp_t.
+       * gmp.h: Corresponding change.
+
+       * gmp.h (mp_bits_per_limb): Make it const.
+
+Sat Mar 30 01:20:23 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * configure.in: Re-enable recognition of with_gcc.
+
+       * mpf/Makefile.in (.c.o): Pass XCFLAGS.
+       * mpn/Makefile.in (.c.o): Likewise.
+       * mpz/Makefile.in (.c.o): Likewise.
+       * mpq/Makefile.in (.c.o): Likewise.
+       * mpbsd/Makefile.in (.c.o): Likewise.
+       * mpf/tests/Makefile.in (.c.o): Likewise.
+       * mpz/tests/Makefile.in (.c.o): Likewise.
+       * mpq/tests/Makefile.in (.c.o): Likewise.
+
+       * Makefile.in (XCFLAGS): Default to empty.
+       (FLAGS_TO_PASS): Pass on XCFLAGS.
+       (.c.o): Pass XCFLAGS.
+
+       * config/mt-m88110 (XCFLAGS): Define instead of CC.
+       * config/mt-sprc8-gcc (XCFLAGS): Likewise.
+       * config/mt-supspc-gcc (XCFLAGS): Likewise.
+
+       * configure: Don't default CC to "gcc -O2" is -with-gcc=no was
+       specified.
+
+Mon Mar 25 01:07:54 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * urandom.h: Test for __SVR4 in addition to __svr4__.
+
+       * mp_bpl.c (mp_bits_per_limb): Declare as `const'.
+
+       * Makefile.in (CFLAGS): `-O2' => `-O'.
+       * mpn/Makefile.in (CFLAGS): Likewise.
+
+       * gmp-impl.h: Get rid of obsolete field access macros.
+
+       * mpn/mp_bases.c (__mp_bases): 1e39 => 1e38 to work around Solaris
+       cc compiler bug.
+
+       * gmp.h (__MPN): Make it work also for non-ANSI compilers.
+
+Thu Mar 21 01:07:54 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/sub.c: New special case for ediff <= 1 before generic code.
+       Simplify generic code for ediff == 0.
+       Rename uexp => exp.
+
+Mon Mar 11 18:24:57 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/tests/*.c: Use ref_mpf_sub for error calculation.
+       * mpf/tests/Makefile.in: Link ref.o to all executables.
+
+       * mpf/tests/t-sub.c: Make u = v + 1 with 50% probability.
+
+Sun Mar 10 21:03:17 1996  Torbjorn Granlund  (tege@tiny.matematik.su.se)
+
+       * mpf/get_str.c: In digit development loop for fractions, change
+       loop condition from `<' to `<='.
+
+Thu Mar  7 04:58:11 1996  Torbjorn Granlund  <tege@tiny.matematik.su.se>
+
+       * mpn/mp_bases.c (__mp_bases): 1e100 => 1e39 to avoid overflow warning.
+
+Wed Mar  6 01:10:42 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpf/tests/t-sqrt.c: New file.
+       * mpf/tests/Makefile.in: Corresponding changes.
+
+       * mpf/sqrt.c: Special case for square root of zero.
+
+       * mpq/add.c: Clean up variable names.
+       * mpq/sub.c: Update from mpq/add.c.
+
+       * mpz/divexact.c: abs => ABS.
+       * mpz/gcd.c: Likewise.  Rewrite final fixup code, to decrease
+       allocation.  Misc cleanups.
+
+Tue Mar  5 22:24:56 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/configure.in: Recognize linuxoldld as a synonym for linuxaout.
+
+       * gmp.h (mpn_add, mpn_add_1, mpn_sub, mpn_sub_1): Add prototypes.
+
+       * mpn/configure.in: Use t-freebsd also for netbsd.
+
+Mon Mar  4 15:13:28 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpq/Makefile.in (cmp.o): Depend on longlong.h.
+
+       * mpq/equal.c: New file.
+       * mpq/Makefile.in: Corresponding changes.
+
+       * mpf/tests/t-add.c: New file.
+       * mpf/tests/t-sub.c: Renamed from t-addsub.c.
+       * mpf/tests/ref.c: New file.
+       * mpf/tests/Makefile.in: Corresponding changes.
+
+       * gmp-impl.h (SIZ, ABSIZ, PTR, EXP, PREC, ALLOC): New #defines.
+
+Sun Mar  3 07:45:46 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/set_str.c: In exponentiation code, allocate 3 extra
+       limbs, not just 2.
+
+       * mpf/get_str.c: Allocate sufficient space for tstr.
+       When calculating exp_in_base, round result down.
+
+       * mpf/tests/t-conv.c: New file.
+       * mpf/tests/Makefile.in: Corresponding changes.
+
+       * mp_bpl.c: New file.
+       * gmp.h: Declare it.
+       * Makefile.in: Corresponding changes.
+
+Sat Mar  2 06:27:56 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/set_prc_raw.c: New file.
+       * mpf/set_prc.c: Renamed from set_prec.c.
+       * mpf/get_prc.c: New file.
+       * mpf/Makefile.in: Corresponding changes.
+       * gmp.h: Declare new functions.
+
+       * mpn/generic/gcdext.c: Add copyright header.
+
+Fri Mar  1 01:22:24 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/configure.in: For ppc601, search "power" before "powerpc32".
+
+       * mp?/Makefile.in (AR_FLAGS): New variable.
+       (libmp?.a): Use it.
+
+       * make.bat: New file.
+       * mpn/msdos: New directory.
+       * mpn/msdos/asm-syntax.h: New file.
+
+       * mpn/Makefile.in (distclean maintainer-clean): Delete asm-syntax.h.
+
+       * config.sub: Recognize [ctj]90-cray.
+
+       * mpn/configure.in: Recognize [ctj]90-cray-unicos*.
+
+       * mpn/generic/gcdext.c: Don't use alloca directly, use TMP_* macros.
+
+       * mpn/generic/gcd.c: Split increment from use of USIZE to avoid
+       undefined behaviour.
+
+Thu Feb 29 04:11:24 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * Makefile.in (install-info-files): Update for new install-info
+       behaviour.
+
+       * mpn/power/add_n.s: Rewrite.
+       * mpn/power/sub_n.s: Rewrite.
+
+Wed Feb 28 01:34:30 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/pow_ui.c: Compute allocation more aggressively for small bases.
+       * mpz/ui_pow_ui.c: Likewise.
+
+       * mpn/mp_bases.c (__mp_bases): Put huge value in 2nd field for index 1.
+
+       * mpn/generic/sqrtrem.c: sizeof (mp_limb_t) => BYTES_PER_MP_LIMB.
+       * mpn/generic/gcd.c: Likewise.
+       (SIGN_BIT): Compute differently.
+
+Mon Feb 26 00:07:36 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * All files: mp_limb => mp_limb_t, mp_limb_signed => mp_limb_signed_t.
+
+       * Makefile.in (install, install-bsdmp, install-info-files): Depend
+       on installdirs.  chmod all installed files.
+
+Sun Feb 25 01:47:41 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpbsd/configure.in: Delete debugging code.
+
+       * All Makefile.in: Update clean targets.
+
+       * Makefile.in (AR_FLAGS): New variable.
+       (libgmp.a): Use it.
+       (libmp.a): Likewise.
+
+       * VERSION: Delete file.
+
+       * Makefile.in (installdirs): New target.
+       * mkinstalldirs: New file (from the texinfo package).
+
+       * Makefile.in (INSTALL, INSTALL_DATA, INSTALL_PROGRAM): New variables.
+       (MAKEINFO, MAKEINFOFLAGS, TEXI2DVI): New variables.
+       (install-info): New target.
+       (install, install-bsdmp): Depend on install-info.
+       ($(srcdir)/gmp.info): Changed from plain gmp.info; put info files
+       into source directory.
+       (distclean, mostlyclean): New targets.
+       (maintainer-clean): New name for realclean.
+       (uninstall): New target.
+       (TAGS): New target.
+       (info, dvi): New targets.
+       (.PHONY): Assign.
+
+       * Makefile.in (install, install-bsdmp): Use INSTALL_DATA.
+
+       * mp{n,z,f,bsd}/move-if-change: Delete.
+
+       * mpbsd/Makefile.in (stamp-stddefh): Delete target.
+
+       * Makefile.in (.c.o): Pass CFLAGS last.
+       * mpbsd/Makefile.in (.c.o): Likewise.
+       * mpf/Makefile.in (.c.o): Likewise.
+       * mpq/Makefile.in (.c.o): Likewise.
+       * mpz/Makefile.in (.c.o): Likewise.
+       * mpn/Makefile.in (.c.o): Likewise.
+       (.S.o): Likewise.
+
+       * memory.c: Change allocation error message.
+
+       * Makefile.in (install): Prefix gmp.h with $(srcdir).
+       (install-bsdmp): Prefix mp.h with $(srcdir).
+
+       * mp{n,z,f,bsd}/{configure,config.sub}: Delete.
+
+       * Makefile.in (gmp.dvi): Set TEXINPUTS also for 2nd tex invocation
+       (install targets): Install gmp.info-N.
+
+Sat Feb 24 03:36:52 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/get_str.c: Fix typo.
+
+       * mpz/legendre.c: Clarify expression with extra parens.
+
+       * version.c (gmp_version): Not static.
+
+       * mpf/iset_str.c: Properly return error code.
+
+       * mpf/add.c: Delete unused variables.
+       * mpf/inp_str.c: Likewise.
+       * mpq/get_d.c: Likewise.
+
+       * mpn/generic/dump.c: #include <stdio.h>.
+       * mpf/dump.c: Likewise.
+       * mpf/set_str.c: #include <ctype.h>.
+       (strtol): Declare.
+
+       * gmp.h: mpn_sqrt => mpn_sqrtrem.
+
+       * Makefile.in (clean, realclean): Clean in mpbsd.
+       (check): Test in mpf.
+
+       * mpf/Makefile.in (clean): Clean in tests.
+       * mpq/Makefile.in (clean): Clean in tests.
+
+       * mpf/tests/Makefile.in: New file.
+       * mpf/tests/configure.in: New file.
+       * mpf/tests/t-addsub.c: New file.
+
+       * mpf/sub_ui.c: Simply call mpf_sub for now.
+
+       * mpf/sub.c: Increase prec by 1.
+       * mpf/ui_sub.c: Likewise.
+
+Fri Feb 23 00:59:54 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpf/ui_sub.c: Fix typos.
+
+       * mpf/get_str.c: When allocating space for tmp, allow for an extra
+       limb.  In code for fraction conversion, add special case for bases
+       that are a power of 2.
+
+       * mpf/out_str.c: Output leading "0.".
+       Default base to 10, before computing string allocation.
+
+       * mpf/get_str.c: Make variables for string size have type size_t.
+       * gmp.h: Corresponding change.
+
+       * mpf/random2.c: Allow creation of prec+1 large mantissas.
+
+       * mpf/add_ui.c: Don't abort if u < 0; special case for u <= 0.
+       Fix typo in MPN_COPY offset.
+       * mpf/sub_ui.c: Analogous changes.
+
+       * mpf/set_prec.c: Rewrite.
+
+       * mpf/init2.c: Compute precision as in set_prec.c.
+
+       * mpf/div_2exp.c: Special case for u == 0.
+       * mpf/mul_2exp.c: Likewise.  Write r->_mp_size always.
+
+       * mpf/sqrt_ui.c: mpn_sqrt => mpn_sqrtrem.
+       * mpf/sqrt.c: Likewise.  When computing new exponent, round quotient
+       towards -infinity.
+
+       * mpf/add.c: Fix typos.
+       * mpf/sub.c: Fix typos.
+
+Thu Feb 22 00:24:48 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/Makefile.in (stamp-stddefh): Delete target.
+       (test): Delete target.
+       * Makefile.in (stamp-stddefh): Delete target.
+       (cre-stddefh.o): Delete target.
+       (gmp.dvi): Set TEXINPUTS before invoking tex.
+
+       * cre-stddefh.c: Delete.
+
+       * mpz/sqrt.c: Fix typo.
+
+       * mpz/powm.c: Special case for mod == 0.
+       * mpz/powm_ui.c: Likewise.
+
+       * mpz/get_si.c: Handle -0x80000000 correctly.
+
+       * mpz/inp_str.c: Now returns size_t.
+       Make it return number of bytes read or error indication.
+       * mpf/inp_str.c: Likewise.
+
+       * mpz/out_raw.c: Replace by mpz/out_binary.c, with modifications.
+       * mpz/inp_raw.c: Rewrite, using mpz/inp_binary as a base.
+       * mpz/inp_binary.c: Delete.
+
+       * mpn/Makefile.in (XCFLAGS): Remove variable.
+       (.c.o): Don't pass XCFLAGS.
+       (SFLAGS): Set to nothing.
+       (.S.o): Pass SFLAGS, not XCFLAGS.
+
+       * mpn/config/t-freebsd (SFLAGS): New name for XCFLAGS.
+
+       * mpf/out_str.c: Make return number of bytes written or error
+       indication.
+       * mpz/out_str.c: Likewise.
+       * gmp.h: Corresponding changes.
+
+       * gmp.h (__mpz_struct): mp_size_t => int.
+       (__mpq_struct): Likewise.
+       (__mpf_struct): Likewise.
+       (mp_size_t): int => long int.
+
+       * mpn/cray: New directory.
+       * mpn/cray/gmp-mparam.h: New file.
+       * mpn/configure.in: Recognize cray variants.
+
+       * Makefile.in: Set defaults for prefix, libdir, etc.
+       (install): New target.
+       (install-bsdmp): New target.
+       (gmp.html): New target.
+
+       * stack-alloc.c (__tmp_alloc): Cast void ptrs to char * in comparison.
+
+Wed Feb 21 04:35:02 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp.h: Sort mpn declarations.
+       (mpn_gcdext): Add declaration.
+
+       * mpn/generic/divrem_1.c: New file.
+       * mpn/Makefile.in (divrem_1.o): New rule.
+       * configure.in (functions): Add divrem_1.
+
+       * mpn/generic/divmod.c: Delete file.
+       * mpn/configure.in (functions): Delete divmod.
+       * Makefile.in (divmod.o): Delete rule.
+       * gmp.h (mpn_divmod): New #define.
+
+       * gmp.h (mpn_next_bit_set): Delete spurious declaration.
+
+       * mpn/generic/divrem.c (default case): In code assigning
+       most_significant_q_limb, move reassignment of n0 into if statement.
+
+       * gmp.h (mpf_inp_str): Fix typo.
+       (mpf_out_str): Make prototype match reality.
+       * mpf/inp_str.c: New file.
+       * mpf/out_str.c: New file.
+       * mpf/Makefile.in: Compile new files.
+
+       * mpn/Makefile.in (dump.o): Fix dependency path.
+       (inlines.o): Likewise.
+
+       * mpn/configure.in: Make m68060 be the same as m68000.  Clean up
+       m68k configs.
+
+Tue Feb 20 01:35:11 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/generic/sqrtrem.c: Renamed from sqrt.
+       * mpn/configure.in (functions): Corresponding change.
+       * mpn/Makefile.in: Likewise.
+       * mpz/sqrtrem.c: Likewise.
+       * mpz/sqrt.c: Likewise.
+       * mpn/generic/perfsqr.c: Likewise.
+
+       * Makefile.in (clean): Also remove libmp.a.
+       Don't compile cre-conv-tab.c or mp_bases.c.
+       cre-conv-tab.c: Delete file.
+       (gmp.ps): New rule.
+
+       * mpn/mp_bases.c: New file.
+       * mpn/Makefile.in: Compile mp_bases.c.
+
+       * mpz/set_str.c: Skip initial whitespace.
+       * mpf/set_str.c: Likewise.
+       * mpbsd/xtom.c: Likewise.
+
+       * gmp.h: Add missing mpz declarations.
+       Delete all formal parameter names from declarations.
+
+       * mpn/Makefile.in: Add dependencies for .c files.
+
+       * Makefile.in (check): Write recursive make calls separately, not as
+       a loop.
+       (FLAGS_TO_PASS): New variable.  Use it for most recursive makes.
+
+Mon Feb 19 01:02:20 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpn/Makefile.in (.S.o): Pipe cpp output to grep in order to delete
+       lines starting with #.
+       (CPP): Set to $(CC) -E to avoid gcc dependency.
+
+       * mpn/m68k/syntax.h (moveql): Define to moveq for MIT_SYNTAX.
+
+       * mpn/hppa/hppa1_1/pa7100/addmul_1.S: Fix typo in s1_ptr alignment
+       code.
+       * mpn/hppa/hppa1_1/pa7100/submul_1.S: Likewise.
+
+       * gmp.h: Fix typos in #defines of recently added mpn functions.
+
+       * mpz/inp_str.c: Skip all whitespace, not just plain space.
+       * mpbsd/min.c: Likewise.
+
+       * mpn/configure.in (functions): Add gcdext.
+       * mpn/generic/gcdext.c: New file.
+
+       * mpz/legendre.c: mpz_div_2exp => mpz_tdiv_q_2exp.
+
+       * gmp.h: Surround mpn declarations with extern "C" { ... }.
+
+       * Makefile.in (check): New target.
+
+       * mpq/get_d.c: Update comments.  Use rsize instead of dsize + N_QLIMBS
+       when possible.  Add special case for nsize == 0.
+
+       * gmp.h (mpq_get_d): Add declaration.
+       (mpq_canonicalize): Likewise.
+       (mpq_cmp_ui): Likewise.
+       (mpf_diff): Likewise.
+       (mpf_ui_sub): Likewise.
+       (mpf_set_prec): Likewise.
+       (mpf_random2): Likewise.
+
+       * gmp.h (mpz_cmp_ui): New #define.
+       (mpz_cmp_si): New #define.
+       (mpq_cmp_ui): New #define.
+       (mpz_sign): New #define.
+       (mpq_sign): New #define.
+       (mpf_sign): New #define.
+       (mpq_numref): New #define.
+       (mpq_denref): New #define.
+
+       * mpq/set_z.c: File deleted.
+       * mpq/Makefile.in: Corresponding changes.
+
+Sun Feb 18 01:34:47 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpbsd/sdiv.c: Use _mp_realloc, not _mpz_realloc.
+
+       * mpz/inp_binary.c: Default stream to stdin.
+       * mpz/inp_str.c: Likewise.
+       * mpz/inp_raw.c: Likewise.
+       * mpz/out_binary.c: Default stream to stdout.
+       * mpz/out_raw.c: Likewise.
+       * mpz/out_str.c: Likewise.
+
+       * mpbsd/realloc.c: New file.
+       * mpbsd/Makefile.in: Corresponding changes.
+
+       * mpbsd/min.c: Rewrite (base on mpz/inp_str.c).
+       * mpbsd/mtox.c: Rewrite (base on mpz/get_str.c).
+
+       * mpbsd/mout.c: Rewrite (base on mpz/out_str) but make it output
+       spaces in each 10th position.
+       * mpbsd/xtom.c: Rewrite (base on mpz/set_str).
+
+       * mpq/tests/Makefile.in (st-cmp): New file.
+       * mpq/tests/configure.in (srcname): New file.
+
+       * mpz/tests/configure.in (srcname): Fix typo.
+
+       * mpq/cmp.c: Add check using number of significant bits, to avoid
+       general multiplication.
+
+Sat Feb 17 11:58:30 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpq/cmp_ui.c: Store cy_limb after the mpn_mul_1 calls.
+
+       * mpq/tests: New directory.
+       * mpq/tests/t-cmp.c: New file.
+       * mpq/tests/t-cmp_ui.c: New file.
+
+       * mpz/tests/dive.c (main): Generate zero numerator.
+       (get_random_size) : Delete.
+
+       * mpz/divexact.c: Add special case for 0/x.
+
+       * gmp.h (mpz_mod): Add declaration.
+
+Fri Feb 16 18:18:39 1996  Andreas Schwab  <schwab@informatik.uni-dortmund.de>
+
+       * mpn/m68k/*: Rewrite code not to use the INSN macros.
+       (L): New macro to properly prefix local labels for ELF.
+
+Fri Feb 16 00:20:56 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp-impl.h (ieee_double_extract): Use plain `unsigned int' for
+       fields.
+       * mpn/generic/inlines.c (_FORCE_INLINES): New #define.  Delete
+       conditional __GNUC__.
+       * gmp.h (mpn_add, mpn_sub, mpn_add_1, mpn_sub_1):
+       Only define these if __GNUC__ || _FORCE_INLINES.
+       * mpf/random2.c: Add missing parameter in non-ANSI header.
+       * mpn/generic/gcd.c (SIGN_BIT): Do as #define to work around bug
+       in AIX compilers.
+       * mpq/get_d.c: #define N_QLIMBS.
+       * mpz/divexact.c: Obscure division by 0 to silent compiler warnings.
+       * stack-alloc.c: Cast void* pointer to char* before doing arithmetic
+       on it.
+
+       * Makefile.in (mpbsd/libmpbsd.a): New rule.
+       * configure.in (configdirs): Add mpbsd.
+
+       * gmp.h: Add declarations for a few missing mpn functions.
+
+       * Makefile.in (libmp.a): New rule.
+
+       * mpbsd/mdiv.c: #include "dmincl.c", not "mpz_dmincl.c"
+       * gmp.h: Move #define of __GNU_MP__ into the `#if __GNU_MP__' block.
+       * mp.h: Likewise.  Update typedefs from gmp.h.
+       * mpbsd/configure.in: New file.
+       * mpbsd/Makefile.in: New file.
+       * mpbsd/configure: Link to master configure.
+       * mpbsd/config.sub: Link to master config.sub.
+
+       * Makefile.in: Set RANLIB_TEST.
+       * (libgmp.a): Use it.
+       * (libgmp.a): Do ranlib before moving the libgmp.a to the build
+       directory.
+       * mp?/Makefile.in: Don't use or set RANLIB.
+
+Thu Feb 15 16:38:41 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/add_ui.c: MP_INT => mpz_t.
+       * mpz/cmp_ui.c: Likewise.
+       * mpz/fac_ui.c: Likewise.
+       * mpz/inp_binary.c: Likewise.
+       * mpz/inp_raw.c: Likewise.
+       * mpz/legendre.c: Likewise.
+       * mpz/jacobi.c: Likewise.
+       * mpz/out_binary.c: Likewise.
+       * mpz/out_raw.c: Likewise.
+       * mpz/random2.c: Likewise.
+       * mpz/random.c: Likewise.
+       * mpz/realloc.c: Likewise.
+
+       * mpz/legendre.c: __mpz_2factor(X) => mpz_scan1(X,0),
+       __mpz_odd_less1_2factor => mpz_scan1(X,1).
+       * mpz/ntsup.c: File deleted.
+       * mpz/Makefile.in: Corresponding changes.
+
+       * mpz/pprime_p: Use mpz_scan1 to avoid looping.
+
+       * mpz/fac_ui.c: Type of `k' and `p' is `unsigned long'.
+       * mpz/pprime_p.c: Pass long to *_ui functions.
+       * mpz/gcdext.c: Likewise.
+       * mpz/fdiv_r_2exp.c: Likewise.
+       * mpz/fac_ui.c: Likewise.
+
+       * mpz/powm.c: Don't use mpn_rshift when mod_shift_cnt is 0.
+
+       * mpz/tests/Makefile.in (st-sqrtrem): Fix typo.
+
+       * mpz/cmp_ui.c: #undef mpz_cmp_ui.
+       * mpz/cmp_si.c: #undef mpz_cmp_si.
+       * gmp.h (mpz_cmp_ui): New #define.
+       (mpz_cmp_si): New #define.
+
+Wed Feb 14 22:11:24 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * gmp.h: Test __cplusplus in addition to __STDC__.
+       * gmp-impl.h: Likewise.
+
+       * gmp.h: Surround declarations with extern "C" { ... }.
+
+Tue Feb 13 15:20:45 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/fdiv_r_2exp.c: Use MPN_NORMALIZE.
+       * mpz/tdiv_r_2exp.c: Likewise.
+
+       * mpz/fdiv_r_2exp.c: New file.
+       * mpz/fdiv_q_2exp.c: New file.
+       * mpz/tdiv_r_2exp.c: Renamed from mpz/mod_2exp.c.
+       * mpz/tdiv_q_2exp.c: Renamed from mpz/div_2exp.c
+       * mpz/Makefile.in: Corresponding changes.
+
+       * mpz/scan0.c,scan1.c: New files.
+       * mpz/Makefile.in: Compile them.
+
+       * gmp.h (mpn_normal_size): Delete.
+
+       * config.guess: Update from Cygnus version.
+
+       * mpn/m68k/rshift.S: Use INSN2 macro for lea instructions.
+       * mpn/m68k/lshift.S: Likewise.
+
+       * mpn/configure.in: Fix configuration for plain 68000.
+
+Mon Feb 12 01:06:06 1996  Torbjorn Granlund  <tege@matematik.su.se>
+
+       * mpz/tests/t-powm.c: Generate negative BASE operand.
+
+       * mpz/powm.c: Make result always positive.
+
+Sun Feb 11 01:44:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/tests/*.c: Add t- prefix.
+       * mpz/tests/Makefile.in: Corresponding changes.
+       * mpz/tests/configure.in: Update srctrigger.
+
+       * mpz/tests/gcd.c: Generate negative operands.
+       * mpz/tests/gcd2.c: Likewise.
+
+       * mpz/gcdext.c: At end, if G is negative, negate all G, S, and T.
+
+Thu Feb  8 17:16:12 UTC 1996 Ken Weber <kweber@mat.ufrgs.br>
+
+       * mp{z,n}/gcd.c: Change mpn_gcd interface.
+       * gmp.h: Ditto.
+       * gmp.texi: update documentation.
+
+Mon Feb  7 23:58:43 1996  Andreas Schwab  <schwab@informatik.uni-dortmund.de>
+
+       * mpn/m68k/{lshift,rshift}.S: New files.
+       * mpn/m68k/syntax.h: New ELF_SYNTAX macros.
+       (MEM_INDX, R, PROLOG, EPILOG): New macros.
+       * mpn/m68k/*.S: Use R macro with register name.  Use PROLOG and EPILOG
+       macros.  Rename `size' to `s_size' or s1_size to avoid clash with ELF
+       .size directive.
+       * mpn/configure.in: New target m68k-*-linux*.
+
+Wed Feb  7 07:41:31 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * Makefile.in (cre-conv-tab): Workaround for SunOS make.
+
+       * mpz/tests/reuse.c: New file.
+       * mpz/tests/Makefile.in: Handle reuse.c.
+
+Tue Feb  6 11:56:24 UTC 1996 Ken Weber <kweber@mat.ufrgs.br>
+
+       * mpz/gcd.c: Fix g->size when one op is 0 and g == other op.
+
+Tue Feb  6 01:36:39 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h (mpz_divexact): Delete parameter names.
+       (mpz_lcm): Delete spurious declaration.
+
+       * mpz/dmincl.c: Fix typo.
+
+Mon Feb  5 01:11:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/gcd.c (gcd_2): Declare consistently.
+
+       * mpz/tdiv_q.c: Optimize division by a single-limb divisor.
+       * mpz/dmincl.c: Likewise.
+
+       * mpz/add.c: Use MPN_NORMALIZE instead of mpn_normal_size.
+       * mpz/sub.c: Likewise.
+       * mpn/generic/sqrt.c: Likewise.
+
+       * mpn/tests/{add_n,sub_n,lshift,rshift}.c: Put garbage in the
+       destination arrays.
+
+Fri Feb  2 02:21:27 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/{jacobi.c,legendre.c,ntsup.c,invert.c}: New files.
+       * mpz/Makefile.in: Compile them.
+
+       * mpn/Makefile.in (INCLUDES): Don't search in `generic'.
+
+Thu Feb  1 02:15:11 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       Change from Ken Weber:
+       * mpz/divexact.c: Make it work when quot is identical to either input.
+
+       * mpf/ui_sub.c: New file.
+       * mpf/Makefile.in: Compile it.
+
+       * gmp-impl.h (MPZ_TMP_INIT): alloca -> TMP_ALLOC.
+       * mpz/{c,f}div_{q,qr,r}.c: Use TMP_DECL/TMP_MARK/TMP_FREE since
+       these use MPZ_TMP_INIT.
+       * mpz/mod.c: Likewise.
+       * mpq/{add,sub}.c: Likewise.
+       * mpq/canonicalize: Likewise.
+
+       * mpq/{add,sub,mul,div}.c: Use mpz_divexact. MP_INT -> mpz_t.
+       * mpq/canonicalize.c: Likewise.
+
+Wed Jan 31 01:45:00 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/gcd.c: Misc changes from Ken.
+
+       * mpz/tests/gcd2.c: New file.
+       * mpz/tests/Makefile.in: Handle gcd2.c.
+
+       * mpn/generic/gcd.c (mpn_gcd): When GCD == ORIG_V, return vsize,
+       not orig_vsize.  Fix parameter declaration.
+
+       * mpz/mod_ui.c: Delete file.
+       * mpz/Makefile.in: Don't try to compile mod_ui.
+
+       * mpz/cdiv_*_ui.c): Make them work right.
+       * gmp.h: Declare cdiv*.
+
+Tue Jan 30 02:22:56 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/{cdiv_q.c,cdiv_q_ui.c,cdiv_qr.c,cdiv_qr_ui.c,cdiv_r.c,
+       cdiv_r_ui.c,cdiv_ui.c}: New files.
+       * mpz/Makefile.in: Compile them.
+
+       * All files: Make file permissions right.
+
+       Changes from Ken Weber:
+       * mpn/generic/accelgcd.c: Delete.
+       * mpn/generic/bingcd.c: Delete.
+       * mpn/generic/numbits.c: Delete.
+       * mpn/generic/gcd.c: New file.
+       * mpn/configure.in (functions): Update accordingly.
+       * mpz/divexact.c: New file.
+       * mpz/Makefile.in: Compile divexact.c.
+       * mpz/gcd.c: Rewrite to accommodate for gcd changes in mpn.
+       * gmp.h: declare new functions, delete obsolete declarations.
+       * mpz/tests/dive.c: New file.
+       * mpz/tests/Makefile.in: Handle dive.c.
+
+Mon Jan 29 03:53:24 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/random.c: Handle negative SIZE parameter.
+
+       * mpz/tests/tdiv(_ui).c: New name for tst-dm(_ui).c.
+       * mpz/tests/tst-mdm(_ui).c: Delete.
+       * mpz/tests/fdiv(_ui).c: New test based in tst-mdm(_ui).
+       * mpz/tests/*.c: Get rid of tst- prefix for DOS 8+3 naming.
+       * mpz/tests/Makefile.in: Corresponding changes.
+       * mpz/tests/configure.in: Update srctrigger.
+
+       * mpn/generic/divmod.c: Update from divrem.
+       * mpn/generic/divrem.c: Misc cleanups.
+
+Sun Jan 28 03:25:08 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * All files: Use new TMP_ALLOC interface.
+
+       * mpz/powm_ui.c: Make Jan 25 changes to powm.c also here.
+
+       * mpz/tests/powm_ui.c: New file.
+       * mpz/tests/Makefile.in: Add rules for tst-powm and tst-powm_ui.
+
+       * Makefile.in: Update dependency list.
+       * mpf/Makefile.in: Likewise.
+       * mpz/Makefile.in: Likewise.
+       * mpq/Makefile.in: Likewise.
+       * Makefile.in: Set RANLIB simply to ranlib, and allow configure
+       to override it.
+
+       * mpz/Makefile.in (conf): Delete spurious target.
+       (mp_bases.c): Delete.
+       (cre-conv-tab rules): Delete.
+
+       * Makefile.in (cre-conv-tab): Greatly simplify.
+
+Sat Jan 27 13:38:15 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * stack-alloc.c: New file.
+       * stack-alloc.h: New file.
+
+       * gmp.h (__gmp_inline): Define using __inline__.
+
+Thu Jan 25 00:28:37 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/generic/scan0.c: New file.
+       * mpn/generic/scan1.c: Renamed from next_bit.c.
+       * mpn/configure.in (functions): Include scan0 and scan1.
+
+       * mpn/m68k/*: #include sysdep.h.  Use C_GLOBAL_NAME.
+
+       * configure: Update from Cygnus version.
+       * config.guess: Likewise.
+       * config.sub: Likewise.
+       * configure: Pass --nfp to recursive configures.
+
+       * mpz/tests/tst-*.c: Adjust SIZE and reps.
+
+       * mpz/powm.c: Move esize==0 test earlier.
+       In final reduction of rp,rsize, don't call mpn_divmod unless
+       reduction is really needed.
+
+       * mpz/tests/tst-powm.c: Fix thinko in checking code.
+
+       * All files: Get rid of `__' prefix from mpn_* calls and declarations.
+       * gmp.h: #define __MPN.
+       * gmp.h: Use __MPN in #defines for mpn calls.
+
+       * mpn/generic/mul_n.c: Prepend `i' to internal routines.
+       * gmp-impl.h: Add #defines using __MPN for those internal routines.
+
+       * mpn/generic/sqrt.c: Change call to mpn_mul to mpn_mul_n.
+
+Wed Jan 24 13:28:19 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/sparc32/udiv_fp.S: New name for udiv_qrnnd.S.
+       * mpn/sparc32/udiv_nfp.S: New name for v8/udiv_qrnnd.S.
+       * mpn/sparc32/v8/supersparc: New directory.
+       * mpn/sparc32/v8/supersparc/udiv.S: New file.
+
+Tue Jan 23 01:10:11 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       This major contribution is from Ken Weber:
+       * mpn/generic/accelgcd.c: New file.
+       * mpn/generic/bdivmod.c: New file.
+       * mpn/generic/bingcd.c: New file.
+       * mpn/generic/gcd_1.c: Rewrite.
+       * mpn/generic/numbits.c: New file (to go away soon).
+       * mpz/gcd.c: Rewrite.
+       * mpz/tests/tst-gcd.c (SIZE): Now 128.
+       * gmp.h: Declare new functions.
+       * mpn/configure.in (functions): List new files.
+       * gmp-impl.h (MPN_SWAP): Delete.
+       (MPN_LESS_BITS_LIMB, MPN_LESS_BITS, MPN_MORE_BITS): Delete.
+       (MPN_COMPL_INCR, MPN_COMPL): Delete.
+
+Mon Jan 22 02:04:59 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h (mpn_name): New #define.
+
+       * mpn/m88k/mc88110/addmul_1.s: New file.
+       * mpn/m88k/mc88110/add_n.S: New file.
+       * mpn/m88k/mc88110/sub_n.S: New file.
+
+       * mpn/m88k/sub_n.s: Correctly initialize carry.
+
+       * mpn/sparc32/{add_n.S,sub_n.S,lshift.S,rshift.S): `beq' => `be'.
+
+Sun Jan 21 00:04:35 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/sparc64/addmul_1.s: New file.
+       * mpn/sparc64/submul_1.s: New file.
+       * mpn/sparc64/rshift.s: New file.
+
+Sat Jan 20 00:32:54 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpz/iset.c: Fix typo introduced Dec 25.
+
+Wed Jan 17 13:16:44 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * config/mt-sprc8-gcc: New name for mt-sparc8-gcc.
+       * config/mt-sparcv8-gcc: Delete.
+       * configure.in: Corresponding changes.
+
+Tue Jan 16 16:31:01 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp-impl.h: #include alloca.h when necessary.
+
+       * longlong.h: Test __alpha instead of __alpha__, since the former
+       is the standard symbol.
+
+Mon Jan 15 18:06:57 1996  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/sparc64/mul_1.s: Swap operands of mulx instructions.
+       * mpn/sparc64/lshift.s: New file.
+
+Fri Dec 29 17:34:03 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/x86/pentium/add_n.S: Get rid of #defines for register names.
+       * mpn/x86/pentium/sub_n.S: Likewise.
+
+Thu Dec 28 03:16:57 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/x86/pentium/mul_1.S: Rework loop to avoid AGI between update
+       of loop induction variable and load insn at beginning of loop.
+       * mpn/x86/pentium/addmul_1.S: Likewise.
+       * mpn/x86/pentium/submul_1.S: Likewise.
+
+Mon Dec 25 23:22:55 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * All files: Prefix user-visible structure fields with _mp_.
+
+Fri Dec 22 20:42:17 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/configure.in (m68k configs): Terminate path variable with
+       plain "m68k".
+
+Fri Dec 22 03:29:33 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/sparc32/add_n.S: Update from sub_n.S to fix bugs, and to
+       clean things up.
+
+       * mpn/configure.in (m68k configs): Update #include path for new
+       mpn directory organization.
+
+Tue Dec 12 02:53:02 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * gmp.h: Prefix all structure field with _mp_.
+       * gmp-impl.h: Define access macros for these fields.
+
+Sun Dec 10 00:47:17 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/alpha/addmul_1.s: Prefix labels with `.'.
+       * mpn/alpha/submul_1.s: Likewise.
+       * mpn/alpha/[lr]shift.s: Likewise.
+       * mpn/alpha/udiv_qrnnd.S: Likewise.
+       * mpn/alpha/ev5/[lr]shift.s: Likewise.
+
+       * mpn/alpha/ev5/lshift.s: Fix typos.
+
+Fri Dec  1 14:28:20 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/Makefile.in (.SUFFIXES): Define.
+
+Wed Nov 29 23:11:57 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/sparc64/{add_n.s, sub_n.s}: New files.
+
+Tue Nov 28 06:03:13 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/x86/syntax.h: Handle ELF_SYNTAX.
+       Rename GAS_SYNTAX => BSD_SYNTAX.
+
+       * mpn/configure.in: Handle linuxelf and SysV for x86 variants.
+
+Mon Nov 27 01:32:12 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/hppa/hppa1_1/pa7100/submul_1.S: New file.
+
+Sun Nov 26 04:30:47 1995  Torbjorn Granlund  <tege@noisy.matematik.su.se>
+
+       * mpn/hppa/hppa1_1/pa7100/addmul_1.S: New file.
+
+       * mpn/sparc32/add_n.S: Rewrite to use 64 bit loads/stores.
+       * mpn/sparc32/sub_n.S: Likewise.
+
+Fri Nov 17 00:18:46 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/configure.in: Handle m68k on NextStep.
+
+Thu Nov 16 02:30:26 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn: Reorganize machine-specific directories.
+       * mpn/configure.in: Corresponding changes.
+       (sh, sh2): Handle these.
+       (m68k targets): Create asm-syntax.h.
+
+Thu Nov  9 02:20:50 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/generic/mul_n.c (____mpn_sqr_n): Delete code that calls abort.
+       (____mpn_mul_n): Likewise.
+
+Tue Nov  7 03:25:12 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/get_str.c: In exponentiation code (two places), don't swap
+       input and output areas when calling mpn_mul_1.
+       * mpf/set_str.c: Likewise.
+
+Fri Nov  3 02:35:58 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/Makefile.in: Make sure all objects are listed in dependency list;
+       delete spurious entries.
+
+       * mpf/mul.c: Handle U or V being 0.  Allow prec+1 for result precision.
+
+       * mpf/set_prec.c: New computation of limb precision.
+       * mpf/set_dfl_prec.c: Likewise.
+
+       * mpf/random2.c: Fix typo computing exp.
+       * mpf/get_str.c: In (uexp > usize) case, set n_limbs as a function of
+       the user-requested number of digits, n_digits.
+
+Thu Nov  2 16:25:07 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/generic/divrem.c (case 2): Don't move np vector back, it is
+       never read.
+       (default case): Put most significant limb from np in new variable n2;
+       decrease size argument for MPN_COPY_DECR; use n2 instead of np[dsize].
+
+Wed Nov  1 02:59:53 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/sparc/[lr]shift.S: New files.
+
+Tue Oct 31 00:08:12 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpz/gcd_ui.c: Set w->size unconditionally when v is zero.
+
+       * gmp-impl.h (assert): Delete definition.
+
+       * mpf/sub.c: Delete all assert calls.  Delete variable `cy'.
+
+       * mpf/neg.c: Use prec+1 as precision.  Optimize for when arguments
+       are the same.
+       * mpf/abs.c: Likewise.
+       * mpf/{set,neg,abs}.c: Make structure and variable names similar.
+
+Mon Oct 30 12:45:26 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/random2.c (random): Test __SVR4 in addition to __svr4__.
+       * mpn/generic/random2.c (random): Likewise.
+
+Sun Oct 29 01:54:28 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/div.c: Special handle U or V being 0.
+
+       * mpf/random2.c: New file.
+
+       * longlong.h (i860 rshift_rhlc): Define.
+       (i960 udiv_qrnnd): Define.
+       (i960 count_leading_zeros): Define.
+       (i960 add_ssaaaa): Define.
+       (i960 sub_ddmmss): Define.
+       (i960 rshift_rhlc): Define.
+
+Sat Oct 28 19:09:15 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/pentium/rshift.S: Fix and generalize condition for when to use
+       special code for shift by 1.
+       * mpn/pentium/lshift.S: Likewise.
+
+Thu Oct 26 00:02:56 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * gmp.h: #undef __need_size_t.
+       * mp.h: Update from gmp.h.
+
+Wed Oct 25 00:17:27 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/Makefile.in: Compile set_prec.c.
+       * mpf/realloc.c: Delete this file.
+       * mpf/Makefile.in: Delete mentions of realloc.c.
+
+       * gmp.h (__mpf_struct): Get rid of `alloc' field.
+       * mpf/clear.c: Likewise.
+       * mpf/init*.c: Likewise.
+       * mpf/set_prec.c: Likewise.
+       * mpf/iset*.c: Likewise.
+
+       * mpf/iset_str.c: New file.
+
+       * mpn/configure.in: Handle pyramid.
+
+       * mpf/set.c: Use prec+1 as precision.
+
+       * mpf/set_prec.c: New file.
+
+Tue Oct 24 00:56:41 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/generic/divrem.c: New file.  Will replace mpn/generic/divmod.c
+       when rest of source is converted.
+       * mpn/configure.in (functions): Add `divrem'
+       * mpn/generic/set_str.c: Never call __mpn_mul_1 with zero size.
+
+       * mpf/get_str.c: Completely rewritten.
+       * mpf/add.c: Fix several problems.
+       * mpf/sub.c: Compare operands from most significant end until
+       first difference, exclude skipped limbs from computation.
+       Accordingly simplify normalization code.
+       * mpf/set_str.c: Fix several problems.
+       * mpf/dump.c: New file.
+       * mpf/Makefile.in: Compile dump.c.
+       * mpf/init2.c: Set prec field correctly.
+
+Sun Oct 22 03:02:09 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * cre-conv-tab.c: #include math.h; don't declare log and floor.
+
+Sat Oct 21 23:04:10 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/mul_ui.c: Handle U being 0.
+
+Wed Oct 18 19:39:27 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/generic/set_str.c: Correctly handle input like "000000000000".
+       Misc cleanups.
+
+Tue Oct 17 15:14:13 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * longlong.h: Define COUNT_LEADING_ZEROS_0 for machines where
+       appropriate.
+
+Mon Oct 16 19:14:43 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/add.c: Rewrite.
+       * mpf/set_str.c: New file.  Needs more work.
+
+Sat Oct 14 00:14:04 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpf/div_2exp.c: Vastly simplify.
+       * mpf/mul_2exp.c: Likewise.
+
+       * mpf/sub.c: Rewrite.
+
+       * gmp-impl.h (udiv_qrnnd_preinv2gen): Terminate comment.
+
+       * mpf/dump.c: Free allocated memory.
+
+       * gmp-impl.h (assert): Define.
+
+Wed Oct 11 13:31:00 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/pentium/rshift.S: Install new code to optimize shift-by-1.
+
+Tue Oct 10 00:37:21 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/pentium/lshift.S: Install new code to optimize shift-by-1.
+
+       * mpn/powerpc32/{lshift.s,rshift.s}: New files.
+
+       * configure.in: Fix typo.
+
+Sat Oct  7 08:17:09 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * longlong.h (smul_ppmm): Correct type of __m0 and __m1.
+
+Wed Oct  4 16:31:28 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/configure.in: Handle alphaev5.
+       * mpn/ev4: New name for alpha subdir.
+       * mpn/ev5: New subdir.
+       * mpn/ev5/lshift.s: New file.
+
+Tue Oct  3 15:06:45 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/alpha/mul_1.s: Avoid static increments of pointers; use
+       corresponding offsets in ldq and stq instructions instead.
+       (Loop): Swap cmpult and stq to save one cycle on EV5.
+
+       * mpn/tests/{add_n.s,sub_n.s,lshift.s,rshift.s,mul_1.s,addmul_1.s,
+       submul_1.s}: Don't check results if NOCHECK is defined.
+
+Mon Oct  2 11:40:18 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * longlong.h (mips umul_ppmm [32 and 64 bit versions]):
+       Make new variants, based on GCC version number, that use `l' and `h'
+       constraints instead of explicit mflo and mfhi instructions
+
+Sun Oct  1 00:17:47 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/mc88100/add_n.s: Decrease unrolling factor from 16 to 8.
+       * mpn/mc88100/sub_n.s: Likewise.
+
+       * config/mt-m88110: New file.
+       * configure.in: Use it.
+
+       * mpn/mc88110/mul_1.s: Fix thinko.
+
+Sat Sep 30 21:28:19 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpz/set_d.c: Declare `size' at function start.
+
+       * experimental: New directory for mpx and mpz2.
+
+       * mpz/tdiv_q.c: Clarify comments.
+       * mpz/{mod.c,mod_ui.c}: New file, for math mod function.
+
+       * mpn/sh2/{mul_1.s,addmul_1.s,submul_1.s}: New files.
+
+       * mpn/sh/{add_n.s,sub_n.s}: New files.
+
+       * mpn/pyr/{add_n.s,sub_n.s,mul_1.s,addmul_1.s}: New files.
+
+       * mpn/i960/{add_n.s,sub_n.s}: New files.
+
+       * mpn/alpha/addmul_1.s (Loop): Move decrement of r18 to before umulh,
+       to save cycles on EV5.
+       * mpn/alpha/submul_1.s: Ditto.
+       * mpn/alpha/mul_1.s: Ditto.
+
+Thu Sep 28 02:48:59 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * gmp.h (mp_limb, mp_limb_signed): Define as `long long' if
+       _LONG_LONG_LIMB is defined.
+
+       * longlong.h (m88110): Test __m88110__, not __mc88110__
+
+       * mpn/mc88110/mul_1.s: Rewrite.
+
+Tue Sep 26 23:29:05 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * config.sub: Update from current Cygnus version.
+
+       * mpn/configure.in: Recognize canonical m88*, not mc88*.
+
+Fri Sep 22 14:58:05 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpz/set_d.c: New file.
+       * mpz/Makefile.in: Build new files.
+
+       * mpq/get_d.c: Replace usage of scalbn with ldexp.
+
+       * mpn/{vax,i386}/gmp-mparam.h: New files.
+       * gmp-impl.h (ieee_double_extract): Define here.
+       * mpf/set_d.c (ieee_double_extract): Not here.
+
+Thu Sep 21 00:56:36 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * longlong.h (C umul_ppmm): Use UWtype, not USItype for temps.
+       (udiv_qrnnd): For cases implemented with call to __udiv_qrnnd,
+       protect with new symbol LONGLONG_STANDALONE.
+       (68000 umul_ppmm): Use %# prefix for immediate constants.
+
+Wed Sep 20 15:36:23 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/generic/divmod_1.c: Handle
+       divisor_limb == 1 << (BITS_PER_MP_LIMB - 1)
+       specifically also when normalization_steps != 0.
+
+Mon Sep 18 15:42:30 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpq/get_d.c: New file.
+
+Sun Sep 17 02:04:36 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * longlong.h (pyr): Botch up for now.
+
+Sat Sep 16 00:11:50 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/clipper/mul_1.s: New file.
+       * mpn/clipper/add_n.s: New file.
+       * mpn/clipper/sub_n.s: New file.
+       * mpn/configure.in: Handle clipper*-*-*.
+
+       * mpn/configure.in: Recognize rs6000-*-*.
+
+Fri Sep 15 00:41:34 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/alpha/add_n.s: New file.
+       * mpn/alpha/sub_n.s: New file.
+
+       * mpn/mips3: New name for mpn/r4000.
+       * mpn/mips2: New name for mpn/r3000.
+       * mpn/configure.in: Corresponding changes.
+
+       * mpn/generic/perfsqr.c (primes): Delete.
+       (residue_map): Delete.
+
+Thu Sep 14 00:07:58 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/r3000/sub_n.s: Fix typo.
+
+       * dm_trunc.c: Delete spurious file.
+
+       * mpz/out_binary.c: Fix typo.
+
+       * mpn/configure.in (per-target): Make mips*-*-irix6* imply r4000.
+
+       * gmp-impl.h: For sparc and sgi, include alloca.h.
+
+       * mpn/z8000/mul_1.s: Replace `test r' with `and r,r'.  Replace
+       `ldk r,#0' with `xor r,r'.
+
+Wed Sep  6 00:58:38 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpz/inp_binary.c: New file.
+       * mpz/out_binary.c: New file.
+       * mpz/Makefile.in: Build new files.
+
+Tue Sep  5 22:53:51 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * gmp.h (__mpz_struct): Change `long int' => `mp_size_t' for alloc
+       and size fields.
+
+Sat Sep  2 17:47:59 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/r4000/{add_n.s,sub_n.s}: Optimize away some pointer arithmetic.
+       * mpn/r3000/{add_n.s,sub_n.s,lshift.s,rshift.s}: New files,
+       derived from r4000 code.
+
+Fri Sep  1 05:35:52 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/r3000/mul_1.s: Fix typo.
+
+       * mpn/powerpc32: Fix some old vs new mnemonic issues.
+
+       * mpn/powerpc32/{add_n.s,sub_n.s}: New files.
+       * mpn/r4000/{add_n.s,sub_n.s,lshift.s,rshift.s}: New files.
+
+Wed Aug 30 10:43:47 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/r3000/mul_1.s ($LC1): Use addiu for immediate add.
+       * mpn/r4000/{mul_1.s,addmul_1.s,submul_1.s}: New files.
+
+       * config.guess: Update to latest FSF revision.
+
+Mon Aug 28 02:18:13 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpz/out_str.c: Cast str to char * in fputs call.
+
+       * gmp-impl.h: Define UQItype, SItype, and USItype also
+       when not __GNUC__.
+
+Fri Aug 25 01:45:04 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/i386/syntax.h: Renamed from asm-syntax.h.
+       * mpn/mc68020/syntax.h: Renamed from asm-syntax.h.
+       * mpn/configure.in: Corresponding changes.
+
+Sun Aug 13 19:20:04 1995  Torbjorn Granlund  <tege@bozo.matematik.su.se>
+
+       * mpn/generic/random2.c: Test __hpux, not hpux.
+
+Sat Apr 15 20:50:33 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/sparc/add_n.S: Make it work for PIC.
+       * mpn/sparc/sub_n.s: Likewise.
+       * mpn/sparc8/addmul_1.S: Likewise.
+       * mpn/sparc8/mul_1.S: Likewise.
+       * mpn/i386/add_n.S: Likewise.
+       * mpn/i386/sub_n.S: Likewise.
+
+Thu Apr 13 23:15:03 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/configure.in: Don't search power subdir for generic ppc configs.
+       Add some ppc cpu-specific configs.  Misc clean up.
+
+Mon Apr 10 00:16:35 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpz/ui_pow_ui.c: Delete spurious code to handle negative results.
+
+Sun Apr  9 12:38:11 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * longlong.h (SPARC v8 udiv_qrnnd): Generate remainder in C,
+       not in asm.
+
+       * mpn/generic/sqrt.c (SQRT): Test for __SOFT_FLOAT.
+
+Tue Mar 28 00:19:52 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/generic/hamdist.c (popc_limb): Make Mar 16 change here too.
+
+Fri Mar 17 23:29:22 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * longlong.h (SH umul_ppmm): Define.
+
+Thu Mar 16 16:40:44 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/generic/popcount.c (popc_limb): Rearrange 32 bit case
+       to help CSE.
+
+Fri Mar 10 20:03:49 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/powerpc32/mul_1.s: Clear cy before entering loop.
+       Rearrange loop to save a cycle.
+       * mpn/powerpc32/addmul_1.s: New file.
+       * mpn/powerpc32/submul_1.s: New file.
+
+Fri Feb 17 22:44:45 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/configure.in: Set target_makefile_frag for freebsd
+       in new case stmt.
+       * mpn/config/t-freebsd: New file.
+       * mpn/Makefile.in: Add #### for frag insertion.
+       (XCFLAGS): Clear by default.
+       (.c.o, .S.o rules): Pass XCFLAGS.
+
+Tue Feb  7 16:27:50 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * longlong.h (68000 umul_ppmm): Merge improvements from henderson.
+
+Tue Jan 24 04:23:20 1995  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * longlong.h (default umul_ppmm): Store input parameters in temporaries
+       to avoid reading them twice.
+       (default smul_ppmm): New definition.
+
+Thu Dec 29 04:20:07 1994  Jim Meyering  (meyering@comco.com)
+
+       * generic/perfsqr.c (__mpn_perfect_square_p): Remove declaration
+       of unused variable.
+       * generic/pre_mod_1.c (__mpn_preinv_mod_1): Likewise.
+       * mpz/powm.c (pow): Likewise.
+
+       * mpz/and.c (mpz_and): Use {} instead of `;' for empty else clause
+       to placate `gcc -Wall'.
+       * mpz/ior.c (mpz_ior): Likewise.
+
+Wed Dec 28 13:31:40 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/m*68*/*.S: #include asm-syntax.h, not asm.h.
+
+Mon Dec 26 17:15:36 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * longlong.h: Test for more symbols, in __mc68000__ case.
+
+       * mpn/mpn/config.sub: Recognize m68060.
+       * mpn/configure.in: Change mc* to m* for 68k targets.
+       * mpn/Makefile.in (.S.o): Delete spurious creation of temp .c file.
+
+Mon Dec 19 01:56:30 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * config.sub: Recognize pentium as a valid CPU.
+       * mpn/configure.in: Handle pentium specifically, to use new assembly
+       code.
+
+Mon Dec 19 00:13:01 1994  Jim Meyering  (meyering@comco.com)
+
+       * gmp.h: Define _GMP_H_HAVE_FILE if FILE, __STDIO_H__, or H_STDIO
+       is defined.
+       * gmp.h: test _GMP_H_HAVE_FILE instead of FILE everywhere else.
+
+Mon Dec 19 00:04:54 1994  Kent Boortz  (boortz@sics.se)
+
+       * Makefile.in (recursive makes): Pass CFLAGS.
+
+Sun Dec 18 22:34:49 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/pentium: New directory.
+
+       * mpz/pprime.c: Make sure to mpz_clear all temporaries.
+
+       * longlong.h: Don't use udiv instruction when SUPERSPARC is defined.
+       * configure.in: Handle supersparc*-.
+       * config/mt-supspc-gcc: New file.
+       * config/mt-sparc8-gcc: New name for mt-sparcv8-gcc.
+
+Mon Dec 12 22:22:10 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/i386/*.S: #include "asm-syntax.h", not "asm.h".
+       #include sysdep.h before asm-syntax.h.
+
+       * mpn/mc68020/asm-syntax.h: #undef ALIGN before defining it.
+       * mpn/i386/asm-syntax.h: Likewise.
+
+       * mpn/mc68020/asm-syntax.h: New name for asm.h.
+       * mpn/i386/asm-syntax.h: New name for asm.h.
+
+Tue Dec  6 21:55:25 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpz/array_init.c: Fix typo in declaration.
+
+Fri Nov 18 19:50:52 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/Makefile.in (.S.o): Pass CFLAGS and INCLUDES.
+
+Mon Nov 14 00:34:12 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/generic/random2.c (random): Test for __svr4__.
+
+Wed Oct 12 23:28:16 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * cre-conv-tab.c (main): Avoid upper-case X in printf format string.
+
+Tue Aug 23 17:16:35 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpz/perfsqr.c: Use mpn_perfect_square_p.
+       * mpn/generic/perfsqr.c: New file.
+
+Wed Jul  6 13:46:51 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpz/array_init.c: New file.
+       * mpz/Makefile.in: Compile array_init.
+       * gmp.h: Declare mpz_array_init.
+
+Mon Jul  4 01:10:03 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpz/add.c: Fix bogus comment.
+       * mpz/sub.c: Likewise.
+
+Sat Jul  2 02:14:56 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpn/generic/pre_mod_1.c: New file.
+       * mpz/perfsqr.c: Use __mpn_preinv_mod_1 when faster.
+
+Fri Jul 01 22:10:19 1994  Richard Earnshaw (rwe11@cl.cam.ac.uk)
+
+       * longlong.h (arm umul_ppmm): Fix typos in last change.  Mark
+       hard-coded registers with "%|"
+
+Thu Jun 30 03:59:33 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpz/perfsqr.c: Define PP, etc, for machines with 64 bit limbs.
+       Use __mpn_mod_1.
+       * mpz/perfsqr.c: Don't clobber REM in quadratic residue check loop.
+
+Wed Jun 29 18:45:41 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpn/generic/sqrt.c (SQRT): New asm for IBM POWER2.
+
+       * mpz/gcd_ui.c: Return 0 if result does not fit an unsigned long.
+
+       * gmp.h: Use "defined (__STDC__)" consistently.
+
+Tue Jun 28 18:44:58 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * gmp.h (mpz_get_si): Don't use "signed" keyword for return type.
+
+       * mpz/tests/Makefile.in: Use CFLAGS for linking.
+
+       * Makefile.in (CFLAGS): Use -O2 here.
+       * mpn/Makefile (CFLAGS): Not here.
+
+       * mpq/cmp_ui.c: Fix typo.
+       * mpq/canonicalize.c: Fix typo.
+       * mpz/gcd_ui.c: Handle gcd(0,v) and gcd(u,0) correctly.
+       * mpn/generic/gcd_1.c: Fix braino in last change.
+
+Mon Jun 27 16:10:27 1994  Torbjorn Granlund  (tege@rtl.cygnus.com)
+
+       * mpz/gcd_ui.c: Change return type and return result.
+       Allow destination param to be NULL.
+       * gmp.h: Corresponding change.
+       * mpn/generic/gcd_1.c: Handle zero return from mpn_mod_1.
+
+Tue Jun 14 02:17:43 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
+
+       * mpn/i386/asm.h (ALIGN): Make it take a parameter.
+       * mpn/i386/*.S: Use ALIGN to align all loops.
+
+       * mpn/i386/*.S: Move colon inside C_GLOBAL_NAME expression.
+       (Makes old versions of GAS happy.)
+
+Sat May 28 01:43:54 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * Many files: Delete unused variables and labels.
+       * mpn/generic/dump.c: cast printf width argument to int.
+
+Wed May 25 00:42:37 1994  Torbjorn Granlund  (tege@thepub.cygnus.com)
+
+       * mpz/gcd.c (mpz_gcd): Normalize after __mpn_sub calls.
+       (xmod): Ignore return value of __mpn_divmod.
+       (xmod): Improve normalization code.
+
+Sat May 21 01:30:09 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpz/gcdext.c: Cosmetic changes.
+
+       * mpz/fdiv_ui.c: New file.
+
+Fri May 20 00:24:53 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpz/tests/Makefile.in: Use explicit rules for running tests,
+       not a shell loop.
+       (clean): Delete stmp-*.
+
+       * mpz/Makefile.in: Update.
+
+       * mpz/div_ui.c: Don't include longlong.h.
+       * mpz/dm_ui.c: Likewise.
+
+       * mpz/fdiv_q.c, mpz/fdiv_q_ui.c, mpz/fdiv_qr.c, mpz/fdiv_qr_ui.c,
+       mpz/fdiv_r.c, mpz/fdiv_r_ui.c: New files.  Code partly from deleted
+       mdm.c, mdm_ui.c, etc, partly rewritten.
+       * mpz/dm_floor_ui.c, mpz/dm_floor.c: Delete.
+       * mpz/mdm.c, mpz/mdm_ui.c, mpz/mdiv.c, mpz/mdiv_ui.c, mpz/mmod.c,
+       mpz/mmod_ui.c: Delete.
+
+       * mpz/tdiv_q.c, mpz/tdiv_q_ui.c, mpz/tdiv_qr.c, mpz/tdiv_qr_ui.c,
+       mpz/tdiv_r.c, mpz/tdiv_r_ui.c:
+       New names for files implementing truncating division.
+       * mpz/div_ui.c, mpz/dm_ui.c, mpz/mod_ui.c: Simplify.
+
+       * mpn/Makefile.in (.S.o): Don't rely on CPP being defined, use CC
+       instead.
+       (clean): Delete tmp-*.
+
+Thu May 19 01:37:44 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpz/cmp.c: Call __mpn_cmp.
+
+       * mpz/popcount.c: Fix typo.
+
+       * mpz/powm_ui.c: Simplify main loop.  Keep principal operand size
+       smaller than MSIZE when possible.
+       * mpz/powm.c: Likewise.
+
+       * mpn/generic/sqrt.c: Move alloca calls into where the memory is
+       needed.  Simplify.
+
+       * gmp.h: (_PROTO): New macro.
+       Add many function declarations; use _PROTO macro in all declarations.
+
+       * mpf/*.c: Prepend mpn calls with __.
+
+Wed May 18 20:57:06 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpf/*ui*.c: Make ui argument `long' for consistency with mpz
+       functions.
+
+       * mpf/div_ui.c: Simplify.
+
+Tue May 17 01:05:14 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpz/*.c: Prepend mpn calls with __.
+
+       * mpz/mul_ui.c: Use mpn_mul_1.
+
+Mon May 16 17:19:41 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpn/i386/mul_1.S: Use C_GLOBAL_NAME.
+       * mpn/i386/mul_1.S, mpn/i386/addmul_1.S, mpn/i386/submul_1.S:
+       Nuke use of LAB.
+
+Sat May 14 14:21:02 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * gmp-impl.h: Don't define abort here.
+
+       * mpz/pow_ui.c: Increase temporary allocation.
+       * mpz/ui_pow_ui.c: Likewise.
+
+       * gmp.h (mpz_add_1, mpz_sub_1): Don't call memcpy.
+
+       * All Makefile.in: Delete spurious -I arguments.
+       Update dependencies.
+
+       * mpz/popcount.c: New file.
+       * mpz/hamdist.c: New file.
+
+       * All configure: Latest version from Cygnus.
+
+       * mpq/Makefile.in: New file.
+       * mpq/configure.in: New file.
+       * Makefile.in, configure.in: Enable compilation of mpq.
+
+       * mpq/set_z.c: Fix typos.
+       * mpq/canonicalize.c: Fix typos.
+       * mpq/cmp_ui.c: Fix typos.
+
+       * mpf/add_ui.c: Read U->D into UP always.  Delete spurious MPN_COPY.
+       * mpf/sub_ui.c: Likewise.
+
+       * gmp-impl.h: Don't redefine alloca.
+
+       * COPYING.LIB: Renamed from COPYING.
+
+Wed May 11 01:45:44 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
+
+       * mpz/powm_ui.c: When shifting E left by C+1, handle out-of-range
+       shift counts.  Fix typo when testing negative_result.
+       * mpz/powm.c: Likewise.
+
+       * mpz/ui_pow_ui.c: New file.
+       * mpz/Makefile.in: Update.
+
+       * mpz/pow_ui.c: Call __mpn_mul_n instead of __mpn_mul when possible.
+
+       * mpz/div.c, mpz/div_ui.c, mpz/gcd.c: Prefix external mpn calls.
+       * mpz/gcd.c: Declare mpn_xmod.
+
+       * mpz/powm.c: Major changes to accommodate changed mpn semantics.
+       * mpz/powm_ui.c: Update from mpz/powm.c.
+
+       * mpz/tests/tst-io.c: New file.
+       * mpz/tests/tst-logic: New file.
+       * mpz/tests/Makefile.in: Update.
+
+       * mpz/inp_str.c: Get base right when checking for first digit.
+       * mpz/inp_str.c: Allocate more space for DEST when needed.
+
+       * mpz/com.c: Use mpn_add_1 and mpn_sub_1.
+       * mpz/and.c, mpz/ior.c: Likewise.  Simplify somewhat.
+
+       * mpz/add_ui.c: Use mpn_add_1 and mpn_sub_1.
+       Rename parameters to be consistent with mpz/sub_ui.
+       General simplifications.
+       * mpz/sub_ui.x: Likewise.
+
+Tue Aug 10 19:41:16 1993  Torbjorn Granlund  (tege@prudens.matematik.su.se)
+
+       * mpf: New directory.
+       * mpf/*.c: Merge basic set of mpf functions.
+
+       * Many logs missing...
+
+Sun Apr 25 18:40:26 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
+
+       * memory.c: Use #if instead of #ifdef for __STDC__ for consistency.
+       * bsd/xtom.c: Likewise.
+
+       * mpz/div.c: Remove free_me and free_me_size and their usage.
+       Use mpn_divmod for division; corresponding changes in return value
+       convention.
+       * mpz/powm.c: `carry_digit' => `carry_limb'.
+       * bsd/sdiv.c: Clearify comment.
+
+Sun Apr 25 00:31:28 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
+
+       * longlong.h (__udiv_qrnnd_c): Make all variables `unsigned long int'.
+
+Sat Apr 24 16:23:33 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
+
+       * longlong.h (__udiv_qrnnd_c): Make all variables `unsigned long int'.
+
+       * gmp-impl.h: #define ABS.
+       * (Many files): Use ABS instead of abs.
+
+       * mpn/generic/sqrt.c, mpz/clrbit.c, mpz/get_si.c, mpz/mod_2exp.c,
+       mpz/pow_ui.c: Cast 1 to mp_limb before shifting.
+
+       * mpz/perfsqr.c: Use #if, not plain if for exclusion of code for
+       non-32-bit machines.
+
+Tue Apr 20 13:13:58 1993  Torbjorn Granlund  (tege@du.nada.kth.se)
+
+       * mpn/generic/sqrt.c: Handle overflow for intermediate quotients by
+       rounding them down to fit.
+
+       * mpz/perfsqr.c (PP): Define in hexadecimal to avoid GCC warnings.
+
+       * mpz/inp_str.c (char_ok_for_base): New function.
+       (mpz_inp_str): Use it.
+
+Sun Mar 28 21:54:06 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
+
+       * mpz/inp_raw.c: Allocate x_index, not xsize limbs.
+
+Mon Mar 15 11:44:06 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
+
+       * mpz/pprime.c: Declare param `const'.
+       * gmp.h: Add declarations for mpz_com.
+
+Thu Feb 18 14:10:34 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
+
+       * mpq/add.c, mpq/sub.c: Call mpz_clear for t.
+
+Fri Feb 12 20:27:34 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
+
+       * mpz/inp_str.c: Recog minus sign as first character.
+
+Wed Feb  3 01:36:02 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
+
+       * mpz/iset.c: Handle 0 size.
+
+Tue Feb  2 13:03:33 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
+
+       * mpz/mod_ui.c: Initialize dividend_size before it's used.
+
+Mon Jan  4 09:11:15 1993  Torbjorn Granlund  (tege@sics.se)
+
+       * bsd/itom.c: Declare param explicitly 'signed'.
+       * bsd/sdiv.c: Likewise.
+
+       * mpq/cmp.c: Remove unused variable tmp_size.
+       * mpz/powm_ui.c: Fix typo in esize==0 if stmt.
+       * mpz/powm.c: Likewise.
+
+Sun Nov 29 01:16:11 1992  Torbjorn Granlund  (tege@sics.se)
+
+       * mpn/generic/divmod_1.c (mpn_divmod_1): Handle
+       divisor_limb == 1 << (BITS_PER_MP_LIMB - 1)
+       specifically.
+
+       * Reorganize sources.  New directories mpn, mpn/MACH, mpn/generic,
+       mpz, mpq, bsd.  Use full file name for change logs hereafter.
+
+Wed Oct 28 17:40:04 1992  Torbjorn Granlund  (tege@jupiter.sics.se)
+
+       * longlong.h (__hppa umul_ppmm): Fix typos.
+       (__hppa sub_ddmmss): Swap input arguments.
+
+       * mpz_perfsqr.c (mpz_perfect_square_p): Avoid , before } in
+       initializator.
+
+Sun Oct 25 20:30:06 1992  Torbjorn Granlund  (tege@jupiter.sics.se)
+
+       * mpz_pprime.c (mpz_probab_prime_p): Handle numbers <= 3
+       specifically (used to consider all negative numbers prime).
+
+       * mpz_powm_ui: `carry_digit' => `carry_limb'.
+
+       * sdiv: Handle zero dividend specifically.  Replace most code in
+       this function with a call to mpn_divmod_1.
+
+Fri Sep 11 22:15:55 1992  Torbjorn Granlund  (tege@tarrega.sics.se)
+
+       * mpq_clear: Don't free the MP_RAT!
+
+       * mpn_lshift, mpn_rshift, mpn_rshiftci: Remove `long' from 4:th arg.
+
+Thu Sep  3 01:47:07 1992  Torbjorn Granlund  (tege@jupiter.sics.se)
+
+       * All files: Remove leading _ from mpn function names.
+
+Wed Sep  2 22:21:16 1992  Torbjorn Granlund  (tege@jupiter.sics.se)
+
+       Fix from Jan-Hein Buhrman:
+       * mpz_mdiv.c, mpz_mmod.c, mpz_mdm.c: Make them work as documented.
+
+       * mpz_mmod.c, mpz_mdm.c: Move decl of TEMP_DIVISOR to reflect its
+       life.
+
+Sun Aug 30 18:37:15 1992  Torbjorn Granlund  (tege@jupiter.sics.se)
+
+       * _mpz_get_str: Use mpz_sizeinbase for computing out_len.
+       * _mpz_get_str: Don't remove leading zeros.  Abort if there are some.
+
+Wed Mar  4 17:56:56 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * gmp.h: Change definition of MP_INT to make the & before params
+       optional.  Use typedef to define it.
+       * mp.h: Use typedef to define MINT.
+
+Tue Feb 18 14:38:39 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       longlong.h (hppa umul_ppmm): Add missing semicolon.  Declare type
+       of __w1 and __w0.
+
+Fri Feb 14 21:33:21 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Make default count_leading_zeros work for machines >
+       32 bits.  Prepend `__' before local variables to avoid conflicts
+       with users' variables.
+
+       * mpn_dm_1.c: Remove udiv_qrnnd_preinv ...
+       * gmp-impl.h: ... and put it here.
+       * mpn_mod_1: Use udiv_qrnnd_preinv if it is faster than udiv_qrnnd.
+
+Tue Feb 11 17:20:12 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_mul: Enhance base case by handling small multiplicands.
+       * mpn_dm_1.c: Revert last change.
+
+Mon Feb 10 11:55:15 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_dm_1.c: Don't define udiv_qrnnd_preinv unless needed.
+
+Fri Feb  7 16:26:16 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_mul: Replace code for base case.
+
+Thu Feb  6 15:10:42 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_dm_1.c (_mpn_divmod_1): Add code for avoiding division by
+       pre-inverting divisor.
+
+Sun Feb  2 11:10:25 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Make __LLDEBUG__ work differently.
+       (_IBMR2): Reinsert old code.
+
+Sat Feb  1 16:43:00 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h (#ifdef _IBMR2): Replace udiv_qrnnd with new code
+       using floating point operations.  Don't define
+       UDIV_NEEDS_NORMALIZATION any longer.
+
+Fri Jan 31 15:09:13 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Define UMUL_TIME and UDIV_TIME for most machines.
+       * longlong.h (#ifdef __hppa): Define umul_ppmm.
+
+Wed Jan 29 16:41:36 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_cmp: Only one length parameter, assume operand lengths are
+       the same.  Don't require normalization.
+       * mpq_cmp, mpz_add, mpz_sub, mpz_gcd, mpn_mul, mpn_sqrt: Change for
+       new mpn_cmp definition.
+
+Tue Jan 28 11:18:55 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * _mpz_get_str: Fix typo in comment.
+
+Mon Jan 27 09:44:16 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Makefile.in: Add new files.
+
+       * mpn_dm_1.c: New file with function _mpn_divmod_1.
+       * mpz_dm_ui.c (mpz_divmod_ui): Use _mpn_divmod_1.
+       * mpz_div_ui: Likewise.
+
+       * mpn_mod_1.c: New file with function _mpn_mod_1.
+       * mpz_mod_ui: Use _mpn_mod_1.
+
+Thu Jan 23 18:54:09 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Bug found by Paul Zimmermann (zimmermann@inria.inria.fr):
+       * mpz_div_ui.c (mpz_div_ui), mpz_dm_ui.c (mpz_divmod_ui):
+       Handle dividend == 0.
+
+Wed Jan 22 12:02:26 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_pprime.c: Use "" for #include.
+
+Sun Jan 19 13:36:55 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_rshiftci.c (header): Correct comment.
+
+Wed Jan 15 18:56:04 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_powm, mpz_powm_ui (if (bsize > msize)): Do alloca (bsize + 1)
+       to make space for ignored quotient at the end.  (The quotient might
+       always be an extra limb.)
+
+Tue Jan 14 21:28:48 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_powm_ui: Fix comment.
+       * mpz_powm: Likewise.
+
+Mon Jan 13 18:16:25 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * tests/Makefile.in: Prepend $(TEST_PREFIX) to Makefile target.
+
+Sun Jan 12 13:54:28 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Fixes from Kazumaro Aoki:
+       * mpz_out_raw: Take abs of size to handle negative values.
+       * mpz_inp_raw: Reallocate before reading ptr from X.
+       * mpz_inp_raw: Store, don't read, size to x->size.
+
+Tue Jan  7 17:50:25 1992  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * gmp.h, mp.h: Remove parameter names from prototypes.
+
+Sun Dec 15 00:09:36 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * tests/Makefile.in: Prepend "./" to file names when executing
+       tests.
+
+       * Makefile.in: Fix many problems.
+
+Sat Dec 14 01:00:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_sqrt.c: New file with _mpn_sqrt.
+       * mpz_sqrt, mpz_sqrtrem, mpz_perfect_square_p: Use _mpn_sqrt.
+       * msqrt.c: Delete.  Create from mpz_sqrtrem.c in Makefile.in.
+       * mpz_do_sqrt.c: Delete.
+       * Makefile.in: Update to reflect these changes.
+
+       * Makefile.in, configure, configure.subr: New files
+       (from bothner@cygnus.com).
+       * dist-Makefile: Delete.
+
+       * mpz_fac_ui: Fix comment.
+
+       * mpz_random2: Rewrite a bit to make it possible for the most
+       significant limb to be == 1.
+
+       * mpz_pprime.c (mpz_probab_prime_p): Remove \t\n.
+
+Fri Dec 13 23:10:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_do_sqrt: Simplify special case for U == 0.
+       * m*sqrt*.c, mpz_perfsqr.c (mpz_perfect_square_p):
+         Rename _mpz_impl_sqrt to _mpz_do_sqrt.
+
+Fri Dec 13 12:52:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * gmp-impl.h (MPZ_TMP_INIT): Cast to the right type.
+
+Thu Dec 12 22:17:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_add, mpn_sub, mpn_mul, mpn_div: Change type of several
+       variables to mp_size.
+
+Wed Dec 11 22:00:34 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_rshift.c: Fix header comments.
+
+Mon Dec  9 17:46:10 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.2.
+
+       * gmp-impl.h (MPZ_TMP_INIT): Cast alloca return value.
+
+       * dist-Makefile: Add missing dependency for cre-mparam.
+
+       * mpz_mdiv.c, mpz_mmod.c, mpz_mdm.c, mpz_mdiv_ui.c,
+         mpz_mmod_ui.c, mpz_mdm_ui.c: Remove obsolete comment.
+
+       * dist-Makefile (clean): clean in tests subdir too.
+       * tests/Makefile: Define default values for ROOT and SUB.
+
+       * longlong.h (__a29k__ udiv_qrnnd): Change "q" to "1" for operand
+       2 constraint.
+
+Mon Nov 11 00:06:05 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_sizeinb.c (mpz_sizeinbase): Special code for size == 0.
+
+Sat Nov  9 23:47:38 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.1.94.
+
+       * dist-Makefile, Makefile, tests/Makefile: Merge tests into
+       distribution.
+
+Fri Nov  8 22:57:19 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * gmp.h: Don't use keyword `signed' for non-ANSI compilers.
+
+Thu Nov  7 22:06:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Cosmetic changes to keep it identical to gcc2 version
+       of longlong.h.
+       * longlong.h (__ibm032__): Fix operand order for add_ssaaaa and
+       sub_ddmmss.
+
+Mon Nov  4 00:36:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_mul: Fix indentation.
+
+       * mpz_do_sqrt: Don't assume 32 bit limbs (had constant
+       4294967296.0).
+       * mpz_do_sqrt: Handle overflow in conversion from double returned
+       by SQRT to mp_limb.
+
+       * gmp.h: Add missing function definitions.
+
+Sun Nov  3 18:25:25 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_pow_ui: Change type of `i' to int.
+
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+       * ChangeLog: Add change log entry.
+Stack overflow.
+
+       * mpz_pow_ui.c: Fix typo in comment.
+
+       * dist-Makefile: Create rpow.c from mpz_powm_ui.c.
+       * mpz_powm_ui.c: Add code for rpow.
+       * rpow.c: Delete this file.  The rpow function is now implemented
+       in mpz_powm_ui.c.
+
+       * mpz_fac_ui.c: New file.
+       * gmp.h, dist-Makefile: Add stuff for mpz_fac_ui.
+
+       Bug found by John Amanatides (amana@sasquatch.cs.yorku.ca):
+       * mpz_powm_ui, mpz_powm: Call _mpn_mul in the right way, with
+       the first argument not smaller than the second.
+
+Tue Oct 29 13:56:55 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * cre-conv-tab.c (main), cre-mparam.c (main): Fix typo in output
+       header text.
+
+Mon Oct 28 00:35:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_random2: Handle size == 0.
+
+       * gmp-impl.h (struct __mp_bases): Rename chars_per_limb_exactly to
+       chars_per_bit_exactly, and change its definition.
+       * cre-conv-tab.c (main): Output field according to its new
+       definition.
+       * mpz_out_str, _mpz_get_str, mpz_sizeinb, mout:
+       Use chars_per_bit_exactly.
+
+       * mpz_random2: Change the loop termination condition in order to
+       get a large most significant limb with higher probability.
+
+       * gmp.h: Add declaration of new mpz_random2 and mpz_get_si.
+       * mpz_get_si.c: New file.
+       * dist-Makefile: Add mpz_random2 and mpz_get_si.
+
+       * mpz_sizeinb.c (mpz_sizeinbase): Special code for base being a
+       power of 2, giving exact result.
+
+       * mpn_mul: Fix MPN_MUL_VERIFY in various ways.
+       * mpn_mul: New macro KARATSUBA_THRESHOLD.
+       * mpn_mul (karatsuba's algorithm): Don't write intermediate results
+       to prodp, use temporary pp instead.  (Intermediate results can be
+       larger than the final result, possibly writing into hyperspace.)
+       * mpn_mul: Make smarter choice between Karatsuba's algorithm and the
+       shortcut algorithm.
+       * mpn_mul: Fix typo, cy instead of xcy.  Unify carry handling code.
+
+Sun Oct 27 19:57:32 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_mul: In non-classical case, choose Karatsuba's algorithm only
+       when usize > 1.5 vsize.
+
+       * mpn_mul: Break between classical and Karatsuba's algorithm at
+       KARATSUBA_THRESHOLD, if defined.  Default to 8.
+
+       * mpn_div: Kludge to fix stray memory read.
+
+Sat Oct 26 20:06:14 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_gcdext: Handle a = b = 0.  Remove memory leakage by calling
+       mpz_clear for all temporary variables.
+
+       * mpz_gcd: Reduce w_bcnt in _mpn_lshift call to hold that
+       function's argument constraints.  Compute wsize correctly.
+
+       * mpz_gcd: Fix typo in comment.
+
+       * memory.c (_mp_default_allocate, _mp_default_reallocate): Call
+       abort if allocation fails, don't just exit.
+
+Fri Oct 25 22:17:20 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_random2.c: New file.
+
+Thu Oct 17 18:06:42 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Bugs found by Pierre-Joseph Gailly (pjg@sunbim.be):
+       * mpq_cmp: Take sign into account, don't just compare the
+       magnitudes.
+       * mpq_cmp: Call _mpn_mul in the right way, with the first argument
+       not smaller than the second.
+
+Wed Oct 16 19:27:32 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_random: Ensure the result is normalized.
+
+Tue Oct 15 14:55:13 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_clrbit: Support non-ANSI compilers.
+
+Wed Oct  9 18:03:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h (68k add_ssaaaa, sub_ddmmss): Generalize constraints.
+
+Tue Oct  8 17:42:59 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_mdm_ui: Add comments.
+
+       * mpz_mdiv: Use MPZ_TMP_INIT instead of mpz_init.
+       * mpz_init_ui: Change spacing and header comment.
+
+Thu Oct  3 18:36:13 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * dist-Makefile: Prepend `./' before some filenames.
+
+Sun Sep 29 14:02:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.1 (public).
+
+       * mpz_com: New name of mpz_not.
+       * dist-Makefile: Change mpz_not to mpz_com.
+
+Tue Sep 24 12:44:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Fix header comment.
+
+Mon Sep  9 15:16:24 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.0.92.
+
+       * mpn_mul.c (_mpn_mul): Handle leading zero limbs in non-Karatsuba
+       case.
+
+       * longlong.h (m68000 umul_ppmm): Clobber one register less by
+       slightly rearranging the code.
+
+Sun Sep  1 18:53:25 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * dist-Makefile (stamp-stddefh): Fix typo.
+
+Sat Aug 31 20:41:31 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.0.91.
+
+       * mpz_mdiv.c, mpz_mmod.c, mpz_mdm.c, mpz_mdiv_ui.c,
+         mpz_mmod_ui.c, mpz_mdm_ui.c: New files and functions.
+       * gmp.h, gmp.texi: Define the new functions.
+
+Fri Aug 30 08:32:56 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_gcdext: Compute t argument from the other quantities at the
+         end, of the function, not in the loop.  New feature: Allow t to be
+         NULL.
+
+       * mpz_add.c, mpz_sub.c, mpz_mul.c, mpz_powm.c, mpz_gcd.c: Don't
+         include "mp.h".  Use type name `MP_INT' always.
+
+       * dist-Makefile, mpz_cmp.c: Merge mcmp.c from mpz_cmp.c.
+
+Wed Aug 28 00:45:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * dist-Makefile (documentation): Go via tmp.texi to avoid the
+         creation of gmp.dvi if any errors occur.  Make tex read input
+         from /dev/null.
+
+Fri Aug 23 15:58:52 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h (68020, i386): Don't define machine-dependent
+         __umulsidi3 (so the default definition is used).
+       * longlong.h (all machines): Cast all operands, sources and
+         destinations, to `unsigned long int'.
+       * longlong.h: Add gmicro support.
+
+Thu Aug 22 00:28:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Rename BITS_PER_LONG to LONG_TYPE_SIZE.
+       * longlong.h (__ibm032__): Define count_leading_zeros and umul_ppmm.
+       * longlong.h: Define UMUL_TIME and UDIV_TIME for some CPUs.
+       * _mpz_get_str.c: Add code to do division by big_base using only
+         umul_qrnnd, if that is faster.  Use UMUL_TIME and UDIV_TIME to
+         decide which variant to use.
+
+Wed Aug 21 15:45:23 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h (__sparc__ umul_ppmm): Move two insn from end to the
+         nops.  (Saves two insn.)
+
+       * longlong.h (__sparc__ umul_ppmm): Rewrite in order to avoid
+         branch, and to permit input/output register overlap.
+
+       * longlong.h (__29k__): Remove duplicated udiv_qrnnd definition.
+       * longlong.h (__29k__ umul_ppmm): Split asm instructions into two
+         asm statements (gives better code if either the upper or lower
+         part of the product is unused.
+
+Tue Aug 20 17:57:59 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * _mpz_get_str.c (outside of functions): Remove
+         num_to_ascii_lower_case and num_to_ascii_upper_case.  Use string
+         constants in the function instead.
+
+Mon Aug 19 00:37:42 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * cre-conv-tab.c (main): Output table in hex.  Output 4 fields, not
+         3, for components 0 and 1.
+
+       * gmp.h: Add declaration of mpq_neg.
+
+       Released 1.0beta.13.
+
+       * _mpz_set_str.c (mpz_set_str): Cast EOF and SPC to char before
+         comparing to enum literals SPC and EOF.  This makes the code work
+         for compilers where `char' is unsigned.  (Bug found by Brian
+         Beuning).
+
+       Released 1.0beta.12.
+
+       * mpz_mod_ui: Remove references to quot.  Remove quot_ptr, quot_size
+         declarations and assignment code.
+
+Sun Aug 18 14:44:26 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_mod_ui: Handle dividend < 0.
+
+       Released 1.0beta.11.
+
+       * mpz_dm_ui, mpz_div_ui, mpz_mod_ui, sdiv: Make them share the same
+         general structure, variable names, etc.
+
+       * sdiv: Un-normalize the remainder in n1 before it is negated.
+
+       * longlong.h: Mention UDIV_NEEDS_NORMALIZATION in description of
+         udiv_qrnnd.
+
+       * mpz_dm_ui.c (mpz_divmod_ui), mpz_div_ui.c (mpz_div_ui): Increment
+         the quotient size if the dividend size is incremented.  (Bug found
+         by Brian Beuning.)
+
+       * mpz_mod_ui: Shift back the remainder, if UDIV_NEEDS_NORMALIZATION.
+         (Bug found by Brian Beuning.)
+
+       * mpz_mod_ui: Replace "digit" by "limb".
+
+       * mpz_perfsqr.c (mpz_perfect_square_p): Disable second test case
+         for non-32-bit machines (PP is hardwired for such machines).
+       * mpz_perfsqr.c (outside of functions): Define PP value with an L.
+
+       * mpn_mul.c (_mpn_mul): Add verification code that is activated if
+         DEBUG is defined.  Replace "digit" by "limb".
+       * mpn_mul.c (_mpn_mul: Karatsuba's algorithm: 4.): Normalize temp
+         after the addition.
+       * mpn_mul.c (_mpn_mul: Karatsuba's algorithm: 1.): Compare u0_size
+         and v0_size, and according to the result, swap arguments in
+         recursive call.  (Don't violate mpn_mul's own argument
+         constraints.)
+
+Fri Aug 16 13:47:12 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.0beta.10.
+
+       * longlong.h (IBMR2): Add udiv_qrnnd.
+
+       * mpz_perfsqr: Remove unused variables.
+
+       * mpz_and (case for different signs): Initialize loop variable i!
+
+       * dist-Makefile: Update automatically generated dependencies.
+       * dist-Makefile (madd.c, msub.c, pow.c, mult.c, gcd.c): Add mp.h,
+         etc to dependency file lists.
+
+       * longlong.h (add_ssaaaa, sub_ddmmss [C default versions]): Make __x
+         `unsigned long int'.
+       * longlong.h: Add `int' after `unsigned' and `long' everywhere.
+
+Wed Aug 14 18:06:48 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Add ARM, i860 support.
+
+       * mpn_lshift, mpn_rshift, mpn_rshiftci: Rename *_word with *_limb.
+
+Tue Aug 13 21:57:43 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * _mpz_get_str.c, _mpz_set_str.c, mpz_sizeinb.c (mpz_sizeinbase),
+         mpz_out_str.c, mout.c: Remove declaration of __mp_bases.
+       * gmp-impl.h: Put it here, and make it `const'.
+       * cre-conv-tab.c (main): Make struct __mp_bases `const'.
+
+Mon Aug 12 17:11:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * cre-conv-tab.c (main): Use %lu in printf for long ints.
+
+       * dist-Makefile: Fix cre-* dependencies.
+
+       * cre-conv-tab.c (main): Output field big_base_inverted.
+
+       * gmp-impl.h (struct bases): New field big_base_inverted.
+       * gmp-impl.h (struct bases): Change type of chars_per_limb_exactly
+         to float (in order to keep the structure smaller).
+
+       * mp.h, gmp.h: Change names of macros for avoiding multiple
+         includes.
+
+Fri Aug  9 18:01:36 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * _mpz_get_str: Only shift limb array if normalization_steps != 0
+         (optimization).
+
+       * longlong.h (sparc umul_ppmm): Use __asm__, not asm.
+       * longlong.h (IBMR2 umul_ppmm): Refer to __m0 and __m1, not to m0
+         and m1 (overlap between output and input operands did not work).
+       * longlong.h: Add VAX, ROMP and HP-PA support.
+       * longlong.h: Sort the machine dependent code in alphabetical order
+         on the CPU name.
+       * longlong.h: Hack comments.
+
+Thu Aug  8 14:13:36 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       Released 1.0beta.9.
+
+       * longlong.h: Define BITS_PER_LONG to 32 if it's not already
+         defined.
+       * Define __BITS4 to BITS_PER_LONG / 4.
+       * Don't assume 32 bit word size in "count_leading_zeros" C macro.
+         Use __BITS4 and BITS_PER_LONG instead.
+
+       * longlong.h: Don't #undef internal macros (reverse change of Aug 3).
+
+       * longlong.h (68k): Define add_ssaaaa sub_ddmmss, and umul_ppmm
+         even for plain mc68000.
+
+       * mpq_div: Flip the sign of the numerator *and* denominator of the
+         result if the intermediate denominator is negative.
+
+       * mpz_and.c, mpz_ior.c: Use MPN_COPY for all copying operations.
+
+       * mpz_and.c: Compute the result size more conservatively.
+       * mpz_ior.c: Likewise.
+
+       * mpz_realloc: Never allocate zero space even if NEW_SIZE == 0.
+
+       * dist-Makefile: Remove madd.c, msub.c, pow.c, mult.c, gcd.c from
+         BSDMP_SRCS.
+
+       * dist-Makefile: Create mult.c from mpz_mul.c.
+       * mult.c: Delete this file.
+
+       * _mpz_set_str: Normalize the result (for bases 2, 4, 8... it was
+         not done properly if the input string had many leading zeros).
+
+Sun Aug  4 16:54:14 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * dist-Makefile (gcd.c, pow.c, madd.c, msub.c): Make these targets
+         work with VPATH and GNU MP.
+
+       * mpz_gcd: Don't call mpz_set; inline its functionality.
+
+       * mpq_mul, mpq_div: Fix several serious typos.
+
+       * mpz_dmincl, mpz_div: Don't normalize the quotient if it's already
+         zero.
+
+       * mpq_neg.c: New file.
+
+       * dist-Makefile: Remove obsolete dependencies.
+
+       * mpz_sub: Fix typo.
+
+       Bugs found by Pierre-Joseph Gailly (pjg@sunbim.be):
+       * mpq_mul, mpq_div: Initialize tmp[12] variables even when the gcd
+         is just 1.
+       * mpz_gcd: Handle gcd(0,v) and gcd(u,0) in special cases.
+
+Sat Aug  3 23:45:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h: Clean up comments.
+       * longlong.h: #undef internal macros.
+
+Fri Aug  2 18:29:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpq_set_si, mpq_set_ui: Canonicalize 0/x to 0/1.
+       * mpq_set_si, mpq_set_ui: Cosmetic formatting changes.
+
+       * mpz_dmincl.c: Normalize the remainder before shifting it back.
+
+       * mpz_dm_ui.c (mpz_divmod_ui): Handle rem == dividend.
+
+       * mpn_div.c: Fix comment.
+
+       * mpz_add.c, mpz_sub.c: Use __MP_INT (not MP_INT) for intermediate
+         type, in order to work for both GNU and Berkeley functions.
+
+       * dist-Makefile: Create gcd.c from mpz_gcd.c, pow.c from mpz_powm,
+         madd.c from mpz_add.c, msub.c from mpz_sub.c.
+         respectively.
+       * pow.c, gcd.c, mpz_powmincl.c, madd.c, msub.c: Remove these.
+       * mpz_powm.c, mpz_gcd.c, mpz_add.c, mpz_sub.c: #ifdef for GNU and
+         Berkeley function name variants.
+       * dist-Makefile: Add created files to "clean" target.
+
+Tue Jul 16 15:19:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpq_get_den: No need for absolute value of the size, the
+         denominator is always positive.
+
+       * mpz_get_ui: If the operand is zero, return zero.  Don't read the
+         limb array!
+
+       * mpz_dmincl.c: Don't ignore the return value from _mpn_rshift, it
+         is the size of the remainder.
+
+Mon Jul 15 11:08:05 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Several files: Remove unused variables and functions.
+
+       * gmp-impl.h: Declare _mpz_impl_sqrt.
+
+       * mpz_dm_ui (mpz_divmod_ui), sdiv: Shift back the remainder if
+         UDIV_NEEDS_NORMALIZATION.  (Fix from Brian Beuning.)
+
+       * mpz_dm_ui.c, sdiv: Replace *digit with *limb.
+
+       * mpz_ior: Add missing else statement in -OP1 | -OP2 case.
+       * mpz_ior: Add missing else statement in OP1 | -OP2 case.
+       * mpz_ior: Swap also OP1 and OP2 pointers in -OP1 & OP2 case.
+       * mpz_ior: Duplicate _mpz_realloc code.
+
+       * mpz_and: Add missing else statement in -OP1 & -OP2 case.
+       * mpz_and: Rewrite OP1 & -OP2 case.
+       * mpz_and: Swap also OP1 and OP2 pointers in -OP1 & OP2 case.
+
+       * mpz_gcdext: Loop in d1.size (not b->size).  (Fix from Brian
+         Beuning.)
+
+       * mpz_perfsqr: Fix argument order in _mpz_impl_sqrt call.  (Fix from
+         Brian Beuning.)
+
+Fri Jul 12 17:10:33 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpq_set.c, mpq_set_ui.c, mpq_set_si.c, mpq_inv.c,
+         mpq_get_num.c, mpq_get_den.c, mpq_set_num.c, mpq_set_den.c:
+         New files.
+
+       * mpz_dmincl.c: Remove second re-allocation of rem->d.  It
+         was never executed.
+
+       * dist-Makefile: Use `-r' instead of `-x' for test for ranlib (as
+         some unixes' test doesn't have the -r option).
+
+       * *.*: Cast allocated pointers to the appropriate type (makes old C
+         compilers happier).
+
+       * cre-conv-tab.c (main): Divide max_uli by 2 and multiply again
+         after conversion to double.  (Kludge for broken C compilers.)
+
+       * dist-Makefile (stamp-stddefh): New target.  Test if "stddef.h"
+         exists in the system and creates a minimal one if it does not
+         exist.
+       * cre-stddefh.c: New file.
+       * dist-Makefile: Make libgmp.a and libmp.a depend on stamp-stddefh.
+       * dist-Makefile (clean): Add some more.
+       * gmp.h, mp.h: Unconditionally include "stddef.h".
+
+Thu Jul 11 10:08:21 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * min: Do ungetc of last read character.
+       * min.c: include stdio.h.
+
+       * dist-Makefile: Go via tmp- files for cre* redirection.
+       * dist-Makefile: Add tmp* to "clean" target.
+
+       * dist-Makefile: Use LOCAL_CC for cre*, to simplify cross
+         compilation.
+
+       * gmp.h, mp.h: Don't define NULL here.
+       * gmp-impl.h: Define it here.
+
+Wed Jul 10 14:13:33 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_mod_2exp: Don't copy too much, overwriting most significant
+         limb.
+
+       * mpz_and, mpz_ior: Don't read op[12]_ptr from op[12] when
+         reallocating res, if op[12]_ptr got their value from alloca.
+
+       * mpz_and, mpz_ior: Clear up comments.
+
+       * cre-mparam.c: Output parameters for `short int' and `int'.
+
+       * mpz_and, mpz_ior: Negate negative op[12]_size in several places.
+
+Tue Jul  9 18:40:30 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * gmp.h, mp.h: Test for _SIZE_T defined before typedef'ing size_t.
+         (Fix for Sun lossage.)
+
+       * gmp.h: Add declaration of mpq_clear.
+
+       * dist-Makefile: Chack if "ranlib" exists, before using it.
+       * dist-Makefile: Add mpz_sqrtrem.c and mpz_size.c.
+       * mpz_powm: Fix typo, "pow" instead of "mpz_powm".
+
+Fri Jul  5 19:08:09 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * move: Remove incorrect comment.
+
+       * mpz_free, mpq_free: Rename to *_clear.
+       * dist-Makefile: Likewise.
+       * mpq_add, mpq_sub, mpq_mul, mpq_div: Likewise.
+
+       * mpz_dmincl.c: Don't call "move", inline its functionality.
+
+Thu Jul  4 00:06:39 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Makefile: Include dist-Makefile.  Fix dist target to include
+         dist-Makefile (with the name "Makefile" in the archive).
+
+       * dist-Makefile: New file made from Makefile.  Add new mpz_...
+         functions.
+
+       * mpz_powincl.c New file for mpz_powm (Berkeley MP pow)
+         functionality.  Avoids code duplication.
+       * pow.c, mpz_powm.c: Include mpz_powincl.c
+
+       * mpz_dmincl.c: New file containing general division code.  Avoids
+         code duplication.
+       * mpz_dm.c (mpz_divmod), mpz_mod.c (mpz_mod), mdiv.c (mdiv): Include
+         mpz_dmincl.c.
+
+       * _mpz_get_str: Don't call memmove, unless HAS_MEMMOVE is defined.
+         Instead, write the overlapping memory copying inline.
+
+       * mpz_dm_ui.c: New name for mpz_divmod_ui.c (SysV file name limit).
+
+       * longlong.h: Don't use #elif.
+       * mpz_do_sqrt.c: Likewise.
+
+       * longlong.h: Use __asm__ instead of asm.
+       * longlong.h (sparc udiv_qrnnd): Make it to one string over several
+         lines.
+
+       * longlong.h: Preend __ll_ to B, highpart, and lowpart.
+
+       * longlong.h: Move array t in count_leading_zeros to the new file
+         mp_clz_tab.c.  Rename the array __clz_tab.
+       * All files: #ifdef for traditional C compatibility.
+
+Wed Jul  3 11:42:14 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_and: Initialize res_ptr always (used to be initialized only
+         when reallocating).
+
+       * longlong.h (umul_ppmm [C variant]): Make __ul...__vh
+         `unsigned int', and cast the multiplications.  This way
+         compilers more easily can choose cheaper multiplication
+         instructions.
+
+       * mpz_mod_2exp: Handle input argument < modulo argument.
+       * mpz_many: Make sure mp_size is the type for sizes, not int.
+
+       * mpz_init, mpz_init_set*, mpq_init, mpq_add, mpq_sub, mpq_mul,
+         mpq_div: Change mpz_init* interface.  Structure pointer as first
+         arg to initialization function, no longer *return* struct.
+
+Sun Jun 30 19:21:44 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Rename mpz_impl_sqrt.c to mpz_do_sqrt.c to satisfy SysV 14
+         character file name length limit.
+
+       * Most files: Rename MINT to MP_INT.  Rename MRAT to MP_RAT.
+       * mpz_sizeinb.c: New file with function mpz_sizeinbase.
+       * mp_bases.c: New file, with array __mp_bases.
+       * _mpz_get_str, _mpz_set_str: Remove struct bases, use extern
+         __mp_bases instead.
+       * mout, mpz_out_str: Use array __mp_bases instead of function
+         _mpz_get_cvtlen.
+       * mpz_get_cvtlen.c: Remove.
+       * Makefile: Update.
+
+Sat Jun 29 21:57:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * longlong.h (__sparc8__ umul_ppmm): Insert 3 nop:s for wr delay.
+       * longlong.h (___IBMR2__): Define umul_ppmm, add_ssaaaa, sub_ddmmss.
+       * longlong.h (__sparc__): Don't call .umul; expand asm instead.
+         Don't define __umulsidi3 (i.e. use default definition).
+
+Mon Jun 24 17:37:23 1991  Torbjorn Granlund  (tege@amon.sics.se)
+
+       * _mpz_get_str.c (num_to_ascii_lower_case, num_to_ascii_upper_case):
+         Swap 't' and 's'.
+
+Sat Jun 22 13:54:01 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_gcdext.c: New file.
+
+       * mpn_mul: Handle carry and unexpected operand sizes in last
+         additions/subtractions.  (Bug trigged when v1_size == 1.)
+
+       * mp*_alloc*: Rename functions to mp*_init* (files to mp*_iset*.c).
+       * mpq_*: Call mpz_init*.
+
+       * mpz_pow_ui, rpow: Use _mpn_mul instead of mult.  Restructure.
+
+Wed May 29 20:32:33 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_get_cvtlen: multiply by size.
+
+Sun May 26 15:01:15 1991  Torbjorn Granlund  (tege@bella.nada.kth.se)
+
+       Alpha-release 0.95.
+
+       Fixes from Doug Lea (dl@g.oswego.edu):
+       * mpz_mul_ui: Loop to MULT_SIZE (not PROD_SIZE).  Adjust PROD_SIZE
+         correctly.
+       * mpz_div: Prepend _ to mpz_realloc.
+       * mpz_set_xs, mpz_set_ds: Fix typos in function name.
+
+Sat May 25 22:51:16 1991  Torbjorn Granlund  (tege@bella.nada.kth.se)
+
+       * mpz_divmod_ui: New function.
+
+       * sdiv: Make the sign of the remainder correct.
+
+Thu May 23 15:28:24 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Alpha-release 0.94.
+
+       * mpz_mul_ui: Include longlong.h.
+
+       * mpz_perfsqr.c (mpz_perfect_square_p): Call _mpz_impl_sqrt instead
+         of msqrt.
+
+       * mpz_impl_sqrt: Don't call "move", inline its functionality.
+
+       * mdiv: Use MPN_COPY instead of memcpy.
+       * rpow, mpz_mul, mpz_mod_2exp: Likewise.
+       * pow.c: Likewise, and fix bug in the size arg.
+
+       * xtom: Don't use mpz_alloc, inline needed code instead.  Call
+         _mpz_set_str instead of mpz_set_str.
+
+       * Makefile: Make two libraries, libmp.a and libgmp.a.
+
+Thu May 22 20:25:29 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Add manual to distribution.
+       * Fold in many missing routines descibed in the manual.
+       * Update Makefile.
+
+Wed May 22 13:48:46 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_set_str: Make it handle 0x prefix OK.
+
+Sat May 18 18:31:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * memory.c (_mp_default_reallocate): Swap OLD_SIZE and NEW_SIZE
+         arguments.
+       * mpz_realloc (_mpz_realloc): Swap in call to _mp_reallocate_func.
+       * min: Likewise.
+
+Thu May 16 20:43:05 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * memory.c: Make the default allocations functions global.
+       * mp_set_fns (mp_set_memory_functions): Make a NULL pointer mean the
+         default memory function.
+
+Wed May  8 20:02:42 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_div: Handle DEN the same as QUOT correctly by copying DEN->D
+         even if no normalization is needed.
+       * mpz_div: Rework reallocation scheme, to avoid excess copying.
+
+       * mpz_sub_ui.c, mpz_add_ui.c: New files.
+
+       * mpz_cmp.c, mpz_cmp_ui.c: New files.
+
+       * mpz_mul_2exp: Handle zero input MINT correctly.
+
+       * mpn_rshiftci: Don't handle shift counts > BITS_PER_MP_DIGIT.
+
+       * mpz_out_raw.c, mpz_inp_raw.c: New files for raw I/O.
+
+Tue May  7 15:44:58 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_rshift: Don't handle shift counts > BITS_PER_MP_DIGIT.
+       * mpz_div_2exp: Don't call _mpn_rshift with cnt > BITS_PER_MP_DIGIT.
+       * gcd, mpz_gcd: Likewise.
+
+       * gcd, mpz_gcd: Handle common 2 factors correctly.
+
+Mon May  6 20:22:59 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * gmp-impl.h (MPN_COPY): Inline a loop instead of calling memcpy.
+
+       * gmp-impl.h, mpz_get_str, rpow: Swap DST and SRC in TMPCOPY* macros.
+
+Sun May  5 15:16:23 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpz_div: Remove test for QUOT == 0.
+
+Sun Apr 28 20:21:04 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * pow: Don't make MOD normalization in place, as it's a bad idea to
+         write on an input parameter.
+       * pow: Reduce BASE if it's > MOD.
+       * pow, mult, mpz_mul: Simplify realloc code.
+
+Sat Apr 27 21:03:11 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * Install multiplication using Karatsuba's algorithm as default.
+
+Fri Apr 26 01:03:57 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * msqrt: Store in ROOT even for U==0, to make msqrt(0) defined.
+
+       * mpz_div_2exp.c, mpz_mul_2exp.c: New files for shifting right and
+         left, respectively.
+       * gmp.h: Add definitions for mpz_div_2exp and mpz_mul_2exp.
+
+       * mlshift.c, mrshift.c: Remove.
+
+Wed Apr 24 21:39:22 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * mpn_mul: Check only for m2_size == 0 in function header.
+
+Mon Apr 22 01:31:57 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * karatsuba.c: New file for Karatsuba's multiplication algorithm.
+
+       * mpz_random, mpz_init, mpz_mod_2exp: New files and functions.
+
+       * mpn_cmp: Fix header comment.
+
+Sun Apr 21 00:10:44 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * pow: Switch off initial base reduction.
+
+Sat Apr 20 22:06:05 1991  Torbjorn Granlund  (tege@echnaton.sics.se)
+
+       * mpz_get_str: Don't generate initial zeros for initial word.
+         Used to write outside of allocated storage.
+
+Mon Apr 15 15:48:08 1991  Torbjorn Granlund  (tege@zevs.sics.se)
+
+       * _mpz_realloc: Make it accept size in number of mp_digits.
+       * Most functions: Use new _mpz_realloc definition.
+
+       * mpz_set_str: Remove calls _mp_free_func.
+
+       * Most functions: Rename mpn_* to _mpn_*.  Rename mpz_realloc to
+         _mpz_realloc.
+       * mpn_lshift: Redefine _mpn_lshift to only handle small shifts.
+       * mdiv, mpz_div, ...: Changes for new definition of _mpn_lshift.
+       * msqrt, mp*_*shift*: Define cnt as unsigned (for speed).
+
+Sat Apr  6 14:05:16 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)
+
+       * mpn_mul: Multiply by the first digit in M2 in a special
+         loop instead of zeroing the product area.
+
+       * mpz_abs.c: New file.
+
+       * sdiv: Implement as mpz_div_si for speed.
+
+       * mpn_add: Make it work for second source operand == 0.
+
+       * msub: Negate the correct operand, i.e. V before swapping, not
+         the smaller of U and V!
+       * madd, msub: Update abs_* when swapping operands, and not after
+         (optimization).
+
+Fri Apr  5 00:19:36 1991  Torbjorn Granlund  (tege@black.nada.kth.se)
+
+       * mpn_sub: Make it work for subtrahend == 0.
+
+       * madd, msub: Rewrite to minimize mpn_cmp calls.  Ensure
+         mpn_cmp is called with positive sizes (used to be called
+         incorrectly with negative sizes sometimes).
+
+       * msqrt: Make it divide by zero if fed with a negative number.
+       * Remove if statement at end of precision calculation that was
+         never true.
+
+       * itom, mp.h: The argument is of type short, not int.
+
+       * mpz_realloc, gmp.h: Make mpz_realloc return the new digit pointer.
+
+       * mpz_get_str.c, mpz_set_str.c, mpz_new_str.c: Don't include mp.h.
+
+       * Add COPYING to distribution.
+
+       * mpz_div_ui.c, mpz_div_si.c, mpz_new_ui.c, mpz_new_si.c: New files.
+
+Fri Mar 15 00:26:29 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)
+
+       * Add Copyleft headers to all files.
+
+       * mpn_mul.c, mpn_div.c: Add header comments.
+       * mult.c, mdiv.c: Update header comments.
+
+       * mpq_add.c, mpq_sub.c, mpq_div.c, mpq_new.c, mpq_new_ui.c,
+         mpq_free.c: New files for rational arithmetics.
+
+       * mpn_lshift.c: Avoid writing the most significant word if it is 0.
+
+       * mdiv.c: Call mpn_lshift for the normalization.
+       * mdiv.c: Remove #ifdefs.
+
+       * Makefile: Add ChangeLog to DISTFILES.
+
+       * mpn_div.c: Make the add_back code work (by removing abort()).
+       * mpn_div.c: Make it return if the quotient is size as compared
+         with the difference NSIZE - DSIZE.  If the stored quotient is
+         larger than that, return 1, otherwise 0.
+       * gmp.h: Fix mpn_div declaration.
+       * mdiv.c: Adopt call to mpn_div.
+       * mpz_div.c: New file (developed from mdiv.c).
+
+       * README: Update routine names.
+
+Thu Mar 14 18:45:28 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)
+
+       * mpq_mul.c: New file for rational multiplication.
+
+       * gmp.h: Add definitions for rational arithmetics.
+
+       * mpn_div: Kludge the case where the high numerator digit > the
+         high denominator digit.  (This code is going to be optimized later.)
+
+       * New files: gmp.h for GNU specific functions, gmp-common.h for
+         definitions common for mp.h and gmp.h.
+
+       * Ensure mp.h just defines what BSD mp.h defines.
+
+       * pow.c: Fix typo for bp allocation.
+
+       * Rename natural number functions to mpn_*, integer functions to
+         mpz_*.
+
+Tue Mar  5 18:47:04 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)
+
+       * mdiv.c (_mp_divide, case 2): Change test for estimate of Q from
+         "n0 >= r" to "n0 > r".
+
+       * msqrt: Tune the increasing precision scheme, to do fewer steps.
+
+Tue Mar  3 18:50:10 1991  Torbjorn Granlund  (tege@musta.nada.kth.se)
+
+       * msqrt: Use the low level routines.  Use low precision in the
+       beginning, and increase the precision as the result converges.
+       (This optimization gave a 6-fold speedup.)
diff --git a/INSTALL b/INSTALL

new file mode 100644 (file)

index 0000000..efcfd88
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,70 @@
+Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2006 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                          INSTALLING GNU MP
+                          =================
+
+
+These instructions are only for the impatient.  Others should read the install
+instructions in gmp.info.  Use
+
+       info -f doc/gmp.info
+
+from the gmp source directory.
+
+Here are some brief instructions on how to install GMP.  First you need to
+compile.  Since you're impatient, try this
+
+       ./configure
+       make
+       make check              <= VERY IMPORTANT!!
+
+If that fails, or you care about the performance of GMP, you need to read the
+full instructions in the chapter "Installing GMP" in the manual.
+
+You should not skip the "make check" part; the risk that the GMP sources are
+miscompiled are unfortunately quite high.  And if they indeed are, "make check"
+is very likely to trigger the compiler-introduced bug.
+
+Optionally, you can install the library with the following command.  This will
+be to /usr/local by default, and you'll probably need to be "root" to be able
+to write there.
+
+       make install
+
+To create the printable documentation from the texinfo source, type "make
+gmp.dvi" or "make gmp.ps".  This requires various "tex" commands.
+
+If you are new to GMP, it is a good idea you at least read the chapter "GMP
+Basics" in the manual.
+
+Some known build problems are noted in the "Installing GMP" chapter of
+the manual.  Please report other problems to gmp-bugs@gmplib.org.
+
+The GMP web site is located here: http://gmplib.org/.
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 78
+End:
diff --git a/INSTALL.autoconf b/INSTALL.autoconf

new file mode 100644 (file)

index 0000000..54caf7c
--- /dev/null
+++ b/INSTALL.autoconf
@@ -0,0 +1,229 @@
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+   This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+
+Basic Installation
+==================
+
+   These are generic installation instructions.
+
+   The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation.  It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions.  Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+   It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring.  (Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.)
+
+   If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release.  If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+   The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'.  You only need
+`configure.ac' if you want to change it or regenerate `configure' using
+a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+  1. `cd' to the directory containing the package's source code and type
+     `./configure' to configure the package for your system.  If you're
+     using `csh' on an old version of System V, you might need to type
+     `sh ./configure' instead to prevent `csh' from trying to execute
+     `configure' itself.
+
+     Running `configure' takes awhile.  While running, it prints some
+     messages telling which features it is checking for.
+
+  2. Type `make' to compile the package.
+
+  3. Optionally, type `make check' to run any self-tests that come with
+     the package.
+
+  4. Type `make install' to install the programs and any data files and
+     documentation.
+
+  5. You can remove the program binaries and object files from the
+     source code directory by typing `make clean'.  To also remove the
+     files that `configure' created (so you can compile the package for
+     a different kind of computer), type `make distclean'.  There is
+     also a `make maintainer-clean' target, but that is intended mainly
+     for the package's developers.  If you use it, you may have to get
+     all sorts of other programs in order to regenerate files that came
+     with the distribution.
+
+Compilers and Options
+=====================
+
+   Some systems require unusual options for compilation or linking that
+the `configure' script does not know about.  Run `./configure --help'
+for details on some of the pertinent environment variables.
+
+   You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment.  Here
+is an example:
+
+     ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix
+
+   *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+   You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory.  To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'.  `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script.  `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+   If you have to use a `make' that does not support the `VPATH'
+variable, you have to compile the package for one architecture at a
+time in the source code directory.  After you have installed the
+package for one architecture, use `make distclean' before reconfiguring
+for another architecture.
+
+Installation Names
+==================
+
+   By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc.  You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+   You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files.  If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+   In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files.  Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+   If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+   Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System).  The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+   For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+   There may be some features `configure' cannot figure out
+automatically, but needs to determine by the type of machine the package
+will run on.  Usually, assuming the package is built to be run on the
+_same_ architectures, `configure' can figure that out, but if it prints
+a message saying it cannot guess the machine type, give it the
+`--build=TYPE' option.  TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+     CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+     OS KERNEL-OS
+
+   See the file `config.sub' for the possible values of each field.  If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+   If you are _building_ compiler tools for cross-compiling, you should
+use the `--target=TYPE' option to select the type of system they will
+produce code for.
+
+   If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+   If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists.  Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+   Variables not defined in a site shell script can be set in the
+environment passed to `configure'.  However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost.  In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'.  For example:
+
+     ./configure CC=/usr/local2/bin/gcc
+
+will cause the specified gcc to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+`configure' Invocation
+======================
+
+   `configure' recognizes the following options to control how it
+operates.
+
+`--help'
+`-h'
+     Print a summary of the options to `configure', and exit.
+
+`--version'
+`-V'
+     Print the version of Autoconf used to generate the `configure'
+     script, and exit.
+
+`--cache-file=FILE'
+     Enable the cache: use and save the results of the tests in FILE,
+     traditionally `config.cache'.  FILE defaults to `/dev/null' to
+     disable caching.
+
+`--config-cache'
+`-C'
+     Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+     Do not print messages saying which checks are being made.  To
+     suppress all normal output, redirect it to `/dev/null' (any error
+     messages will still be shown).
+
+`--srcdir=DIR'
+     Look for the package's source code in directory DIR.  Usually
+     `configure' can determine that directory automatically.
+
+`configure' also accepts some other, not widely useful, options.  Run
+`configure --help' for more details.
+
diff --git a/Makefile.am b/Makefile.am

new file mode 100644 (file)

index 0000000..37f4cc6
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,434 @@
+## Process this file with automake to generate Makefile.in
+
+
+# Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
+# 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# The following options are the same as AM_INIT_AUTOMAKE in configure.in,
+# except no $(top_builddir) on ansi2knr.  That directory is wanted for the
+# Makefiles in subdirectories, but here we must omit it so automake gives
+# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
+#
+AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
+
+
+# Libtool -version-info for libgmp.la and libmp.la.  See "Versioning" in the
+# libtool manual.
+#
+#      CURRENT:REVISION:AGE
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+#    AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+#    CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp, libgmpxx and libmp, and only for releases.
+#
+#        GMP      -version-info
+#       release   libgmp  libgmpxx libmp
+#        2.0.x      -        -       -
+#        3.0      3:0:0      -     3:0:0
+#        3.0.1    3:1:0      -     3:0:0
+#        3.1      4:0:1      -     4:0:1
+#        3.1.1    4:1:1      -     4:1:1
+#        4.0      5:0:2    3:0:0   4:2:1
+#        4.0.1    5:1:2    3:1:0   4:3:1
+#        4.1      6:0:3    3:2:0   4:4:1
+#        4.1.1    6:1:3    3:3:0   4:5:1
+#        4.1.2    6:2:3    3:4:0   4:6:1
+#        4.1.3    6:3:3    3:5:0   4:7:1
+#        4.1.4    6:3:3    3:5:0   4:7:1       WRONG, same as 4.1.3!
+#        4.2      6:0:3    3:2:0   4:4:1       REALLY WRONG, same as 4.1!
+#        4.2.1    7:1:4    4:1:1   4:10:1      WRONG for libgmpxx
+#        4.2.2    7:2:4    4:2:0   4:11:1
+#        4.2.3    7:3:4    4:3:0   4:12:1
+#        4.2.4    7:4:4    4:4:0   4:13:1
+#        4.3.0    8:0:5    5:0:1   4:14:1
+#        4.3.1    8:1:5    5:1:1   4:15:1      WRONG Really used same as 4.3.0
+#        4.3.2    8:2:5    5:2:1   4:16:1
+#        5.0.0    9:0:6    6:0:2   4:20:1      Should have been 10:0:0
+#        5.0.1   10:1:0    6:1:2   4:21:1
+#        5.0.2   10:2:0    6:2:2   4:22:1
+#        5.0.3   10:3:0    6:3:2   4:23:1
+#        5.0.4   10:4:0    6:4:2   4:24:1
+#        5.0.5   10:5:0    6:5:2   4:25:1
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
+# packages of gmp 2.  Pretend gmp 2 was 2:0:0, so the interface changes for
+# gmp 3 mean 3:0:0 is right.
+#
+# We interpret "implementation changed" in item "1." above as meaning any
+# release, ie. the REVISION is incremented every time (if nothing else).
+# Even if we thought the code generated will be identical on all systems,
+# it's still good to get the shared library filename (like
+# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
+
+LIBGMP_LT_CURRENT =  10
+LIBGMP_LT_REVISION = 5
+LIBGMP_LT_AGE =      0
+
+LIBGMPXX_LT_CURRENT =  6
+LIBGMPXX_LT_REVISION = 5
+LIBGMPXX_LT_AGE =      2
+
+LIBMP_LT_CURRENT =  4
+LIBMP_LT_REVISION = 25
+LIBMP_LT_AGE =      1
+
+
+SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
+
+EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf
+
+if WANT_CXX
+GMPXX_HEADERS_OPTION = gmpxx.h
+endif
+EXTRA_DIST += gmpxx.h
+
+# gmp.h and mp.h are architecture dependent, mainly since they encode the
+# limb size used in libgmp.  For that reason they belong under $exec_prefix
+# not $prefix, strictly speaking.
+#
+# $exec_prefix/include is not in the default include path for gcc built to
+# the same $prefix and $exec_prefix, which might mean gmp.h is not found,
+# but anyone knowledgeable enough to be playing with exec_prefix will be able
+# to address that.
+#
+includeexecdir = $(exec_prefix)/include
+include_HEADERS = $(GMPXX_HEADERS_OPTION)
+nodist_includeexec_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION)
+lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION) $(MPBSD_LTLIBRARIES_OPTION)
+
+BUILT_SOURCES = gmp.h
+
+DISTCLEANFILES = $(BUILT_SOURCES) config.m4 @gmp_srclinks@
+
+# Tell gmp.h it's building gmp, not an application, used by windows DLL stuff.
+INCLUDES=-D__GMP_WITHIN_GMP
+
+
+MPF_OBJECTS = mpf/init$U.lo mpf/init2$U.lo mpf/inits$U.lo mpf/set$U.lo     \
+  mpf/set_ui$U.lo mpf/set_si$U.lo mpf/set_str$U.lo mpf/set_d$U.lo          \
+  mpf/set_z$U.lo mpf/iset$U.lo mpf/iset_ui$U.lo mpf/iset_si$U.lo           \
+  mpf/iset_str$U.lo mpf/iset_d$U.lo mpf/clear$U.lo mpf/clears$U.lo         \
+  mpf/get_str$U.lo mpf/dump$U.lo mpf/size$U.lo mpf/eq$U.lo mpf/reldiff$U.lo \
+  mpf/sqrt$U.lo mpf/random2$U.lo mpf/inp_str$U.lo mpf/out_str$U.lo         \
+  mpf/add$U.lo mpf/add_ui$U.lo mpf/sub$U.lo mpf/sub_ui$U.lo mpf/ui_sub$U.lo \
+  mpf/mul$U.lo mpf/mul_ui$U.lo mpf/div$U.lo mpf/div_ui$U.lo                \
+  mpf/cmp$U.lo mpf/cmp_d$U.lo mpf/cmp_ui$U.lo mpf/cmp_si$U.lo              \
+  mpf/mul_2exp$U.lo mpf/div_2exp$U.lo mpf/abs$U.lo mpf/neg$U.lo                    \
+  mpf/set_q$U.lo mpf/get_d$U.lo mpf/get_d_2exp$U.lo mpf/set_dfl_prec$U.lo   \
+  mpf/set_prc$U.lo mpf/set_prc_raw$U.lo mpf/get_dfl_prec$U.lo               \
+  mpf/get_prc$U.lo mpf/ui_div$U.lo mpf/sqrt_ui$U.lo                         \
+  mpf/ceilfloor$U.lo mpf/trunc$U.lo mpf/pow_ui$U.lo                        \
+  mpf/urandomb$U.lo mpf/swap$U.lo                                          \
+  mpf/fits_sint$U.lo mpf/fits_slong$U.lo mpf/fits_sshort$U.lo              \
+  mpf/fits_uint$U.lo mpf/fits_ulong$U.lo mpf/fits_ushort$U.lo              \
+  mpf/get_si$U.lo mpf/get_ui$U.lo                                          \
+  mpf/int_p$U.lo
+
+MPZ_OBJECTS = mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo                        \
+  mpz/aorsmul$U.lo mpz/aorsmul_i$U.lo mpz/and$U.lo mpz/array_init$U.lo \
+  mpz/bin_ui$U.lo mpz/bin_uiui$U.lo                                    \
+  mpz/cdiv_q$U.lo mpz/cdiv_q_ui$U.lo                                   \
+  mpz/cdiv_qr$U.lo mpz/cdiv_qr_ui$U.lo                                 \
+  mpz/cdiv_r$U.lo mpz/cdiv_r_ui$U.lo mpz/cdiv_ui$U.lo                  \
+  mpz/cfdiv_q_2exp$U.lo mpz/cfdiv_r_2exp$U.lo                          \
+  mpz/clear$U.lo mpz/clears$U.lo mpz/clrbit$U.lo                       \
+  mpz/cmp$U.lo mpz/cmp_d$U.lo mpz/cmp_si$U.lo mpz/cmp_ui$U.lo          \
+  mpz/cmpabs$U.lo mpz/cmpabs_d$U.lo mpz/cmpabs_ui$U.lo                 \
+  mpz/com$U.lo mpz/combit$U.lo                                         \
+  mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo                    \
+  mpz/divexact$U.lo mpz/divegcd$U.lo mpz/dive_ui$U.lo                  \
+  mpz/divis$U.lo mpz/divis_ui$U.lo mpz/divis_2exp$U.lo mpz/dump$U.lo   \
+  mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo                      \
+  mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo              \
+  mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo                                   \
+  mpz/fdiv_ui$U.lo mpz/fib_ui$U.lo mpz/fib2_ui$U.lo mpz/fits_sint$U.lo \
+  mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_uint$U.lo          \
+  mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo                        \
+  mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo   \
+  mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_ui$U.lo mpz/getlimbn$U.lo   \
+  mpz/hamdist$U.lo                                                     \
+  mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo          \
+  mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo                    \
+  mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo          \
+  mpz/iset_str$U.lo mpz/iset_ui$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo   \
+  mpz/kronuz$U.lo mpz/kronzs$U.lo mpz/kronzu$U.lo                      \
+  mpz/lcm$U.lo mpz/lcm_ui$U.lo mpz/lucnum_ui$U.lo mpz/lucnum2_ui$U.lo  \
+  mpz/millerrabin$U.lo mpz/mod$U.lo mpz/mul$U.lo mpz/mul_2exp$U.lo     \
+  mpz/mul_si$U.lo mpz/mul_ui$U.lo                                      \
+  mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/nextprime$U.lo                    \
+  mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo  \
+  mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_sec$U.lo    \
+  mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo  \
+  mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo                   \
+  mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo      \
+  mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo            \
+  mpz/set_q$U.lo mpz/set_si$U.lo mpz/set_str$U.lo mpz/set_ui$U.lo      \
+  mpz/setbit$U.lo                                                      \
+  mpz/size$U.lo mpz/sizeinbase$U.lo mpz/sqrt$U.lo                      \
+  mpz/sqrtrem$U.lo mpz/sub$U.lo mpz/sub_ui$U.lo mpz/swap$U.lo          \
+  mpz/tdiv_ui$U.lo mpz/tdiv_q$U.lo mpz/tdiv_q_2exp$U.lo                        \
+  mpz/tdiv_q_ui$U.lo mpz/tdiv_qr$U.lo mpz/tdiv_qr_ui$U.lo              \
+  mpz/tdiv_r$U.lo mpz/tdiv_r_2exp$U.lo mpz/tdiv_r_ui$U.lo              \
+  mpz/tstbit$U.lo mpz/ui_pow_ui$U.lo mpz/ui_sub$U.lo mpz/urandomb$U.lo \
+  mpz/urandomm$U.lo mpz/xor$U.lo
+
+MPQ_OBJECTS = mpq/abs$U.lo mpq/aors$U.lo                               \
+  mpq/canonicalize$U.lo mpq/clear$U.lo mpq/clears$U.lo                 \
+  mpq/cmp$U.lo mpq/cmp_si$U.lo mpq/cmp_ui$U.lo mpq/div$U.lo            \
+  mpq/get_d$U.lo mpq/get_den$U.lo mpq/get_num$U.lo mpq/get_str$U.lo    \
+  mpq/init$U.lo mpq/inits$U.lo mpq/inp_str$U.lo mpq/inv$U.lo           \
+  mpq/md_2exp$U.lo mpq/mul$U.lo mpq/neg$U.lo mpq/out_str$U.lo          \
+  mpq/set$U.lo mpq/set_den$U.lo mpq/set_num$U.lo                       \
+  mpq/set_si$U.lo mpq/set_str$U.lo mpq/set_ui$U.lo                     \
+  mpq/equal$U.lo mpq/set_z$U.lo mpq/set_d$U.lo                         \
+  mpq/set_f$U.lo mpq/swap$U.lo
+
+MPN_OBJECTS = mpn/fib_table$U.lo mpn/mp_bases$U.lo
+
+PRINTF_OBJECTS =                                                       \
+  printf/asprintf$U.lo printf/asprntffuns$U.lo                         \
+  printf/doprnt$U.lo printf/doprntf$U.lo printf/doprnti$U.lo           \
+  printf/fprintf$U.lo                                                  \
+  printf/obprintf$U.lo printf/obvprintf$U.lo printf/obprntffuns$U.lo   \
+  printf/printf$U.lo printf/printffuns$U.lo                            \
+  printf/snprintf$U.lo printf/snprntffuns$U.lo                         \
+  printf/sprintf$U.lo printf/sprintffuns$U.lo                          \
+  printf/vasprintf$U.lo printf/vfprintf$U.lo printf/vprintf$U.lo       \
+  printf/vsnprintf$U.lo printf/vsprintf$U.lo                           \
+  printf/repl-vsnprintf$U.lo
+
+SCANF_OBJECTS =                                                        \
+  scanf/doscan$U.lo scanf/fscanf$U.lo scanf/fscanffuns$U.lo    \
+  scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo     \
+  scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo
+
+# no $U for C++ files
+CXX_OBJECTS =                                                          \
+  cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo  \
+  cxx/osdoprnti.lo cxx/osfuns.lo                                       \
+  cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo
+
+MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo       \
+  mpbsd/powm$U.lo mpbsd/sub$U.lo mpbsd/cmp$U.lo mpbsd/mfree$U.lo       \
+  mpbsd/mtox$U.lo mpbsd/realloc$U.lo mpbsd/gcd$U.lo mpbsd/itom$U.lo    \
+  mpbsd/min$U.lo mpbsd/mul$U.lo mpbsd/mout$U.lo mpbsd/rpow$U.lo                \
+  mpbsd/sdiv$U.lo mpbsd/sqrtrem$U.lo mpbsd/xtom$U.lo
+
+
+# In libtool 1.5 it doesn't work to build libgmp.la from the convenience
+# libraries like mpz/libmpz.la.  Or rather it works, but it ends up putting
+# PIC objects into libgmp.a if shared and static are both built.  (The PIC
+# objects go into mpz/.libs/libmpz.a, and thence into .libs/libgmp.a.)
+#
+# For now the big lists of objects above are used.  Something like mpz/*.lo
+# would probably work, but might risk missing something out or getting
+# something extra.  The source files for each .lo are listed in the
+# Makefile.am's in the subdirectories.
+#
+# Currently, for libgmp, unlike libmp below, we're not using
+# -export-symbols, since the tune and speed programs, and perhaps some of
+# the test programs, want to access undocumented symbols.
+
+libgmp_la_SOURCES = gmp-impl.h longlong.h randmt.h                     \
+  assert.c compat.c errno.c extract-dbl.c invalid.c memory.c           \
+  mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \
+  rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
+  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c version.c  \
+  nextprime.c
+EXTRA_libgmp_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c
+libgmp_la_DEPENDENCIES = @TAL_OBJECT@          \
+  $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
+  $(MPN_OBJECTS) @mpn_objs_in_libgmp@          \
+  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS)
+libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)
+libgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \
+  -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
+
+
+# We need at least one .cc file in $(libgmpxx_la_SOURCES) so automake will
+# use $(CXXLINK) rather than the plain C $(LINK).  cxx/dummy.cc is that
+# file.
+
+if WANT_CXX
+GMPXX_LTLIBRARIES_OPTION = libgmpxx.la
+endif
+libgmpxx_la_SOURCES = cxx/dummy.cc
+libgmpxx_la_DEPENDENCIES = $(CXX_OBJECTS) libgmp.la
+libgmpxx_la_LIBADD = $(libgmpxx_la_DEPENDENCIES)
+libgmpxx_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMPXX_LDFLAGS) \
+  -version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)
+
+
+# The selected mpz objects here support mpz/powm.c (built as mpbsd/powm.lo)
+# and can probably be removed when that switches to an mpn implementation.
+# (Apart from mpz/n_pow_ui$U.lo, which supports mpbsd/rpow.c)
+
+if WANT_MPBSD
+MPBSD_HEADERS_OPTION = mp.h
+MPBSD_LTLIBRARIES_OPTION = libmp.la
+endif
+BUILT_SOURCES += mp.h
+libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c     \
+  mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c nextprime.c
+libmp_la_DEPENDENCIES = $(srcdir)/libmp.sym                            \
+  @TAL_OBJECT@ $(MPBSD_OBJECTS) $(MPN_OBJECTS) @mpn_objs_in_libmp@     \
+  mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo mpz/mul$U.lo            \
+  mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo mpz/sub$U.lo         \
+  mpz/tdiv_q$U.lo
+libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
+libmp_la_LDFLAGS = $(GMP_LDFLAGS) \
+  -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) \
+  -export-symbols $(srcdir)/libmp.sym
+EXTRA_DIST += libmp.sym
+
+
+install-data-hook:
+       @echo ''
+       @echo '+-------------------------------------------------------------+'
+       @echo '| CAUTION:                                                    |'
+       @echo '|                                                             |'
+       @echo '| If you have not already run "make check", then we strongly  |'
+       @echo '| recommend you do so.                                        |'
+       @echo '|                                                             |'
+       @echo '| GMP has been carefully tested by its authors, but compilers |'
+       @echo '| are all too often released with serious bugs.  GMP tends to |'
+       @echo '| explore interesting corners in compilers and has hit bugs   |'
+       @echo '| on quite a few occasions.                                   |'
+       @echo '|                                                             |'
+       @echo '+-------------------------------------------------------------+'
+       @echo ''
+
+
+# The ansi2knr setups for the build programs are the same as the normal
+# automake ansi2knr rules, but using $(CC_FOR_BUILD) instead of $(CC).
+#
+# The "test -f" support for srcdir!=builddir is similar to the automake .c.o
+# etc rules, but with each foo.c explicitly, since $< is not portable
+# outside an inference rule.
+#
+# A quoted 'foo.c' is used with the "test -f"'s to avoid Sun make rewriting
+# it as part of its VPATH support.  See the autoconf manual "Limitations of
+# Make".
+#
+# Generated .h files which are used by gmp-impl.h are BUILT_SOURCES since
+# they must exist before anything can be compiled.
+#
+# Other generated .h files are also BUILT_SOURCES so as to get all the
+# build-system stuff over and done with at the start.  Also, dependencies on
+# the .h files are not properly expressed for the various objects that use
+# them.
+
+EXTRA_DIST += dumbmp.c
+
+mpz/fac_ui.h: gen-fac_ui$(EXEEXT_FOR_BUILD)
+       ./gen-fac_ui $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpz/fac_ui.h || (rm -f mpz/fac_ui.h; exit 1)
+BUILT_SOURCES += mpz/fac_ui.h
+
+gen-fac_ui$(EXEEXT_FOR_BUILD): gen-fac_ui$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-fac_ui$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac_ui$(U_FOR_BUILD).c -o gen-fac_ui$(EXEEXT_FOR_BUILD)
+DISTCLEANFILES += gen-fac_ui$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-fac_ui.c
+
+gen-fac_ui_.c: gen-fac_ui.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fac_ui.c; then echo $(srcdir)/gen-fac_ui.c; else echo gen-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fac_ui_.c || rm -f gen-fac_ui_.c
+
+
+fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
+       ./gen-fib header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fib_table.h || (rm -f fib_table.h; exit 1)
+BUILT_SOURCES += fib_table.h
+
+mpn/fib_table.c: gen-fib$(EXEEXT_FOR_BUILD)
+       ./gen-fib table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/fib_table.c || (rm -f mpn/fib_table.c; exit 1)
+BUILT_SOURCES += mpn/fib_table.c
+
+gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-fib$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fib$(U_FOR_BUILD).c -o gen-fib$(EXEEXT_FOR_BUILD)
+DISTCLEANFILES += gen-fib$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-fib.c
+
+gen-fib_.c: gen-fib.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fib.c; then echo $(srcdir)/gen-fib.c; else echo gen-fib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fib_.c || rm -f gen-fib_.c
+
+
+mp_bases.h: gen-bases$(EXEEXT_FOR_BUILD)
+       ./gen-bases header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mp_bases.h || (rm -f mp_bases.h; exit 1)
+BUILT_SOURCES += mp_bases.h
+
+mpn/mp_bases.c: gen-bases$(EXEEXT_FOR_BUILD)
+       ./gen-bases table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/mp_bases.c || (rm -f mpn/mp_bases.c; exit 1)
+BUILT_SOURCES += mpn/mp_bases.c
+
+gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-bases$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-bases$(U_FOR_BUILD).c -o gen-bases$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+DISTCLEANFILES += gen-bases$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-bases.c
+
+gen-bases_.c: gen-bases.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
+
+
+
+trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
+       ./gen-trialdivtab $(GMP_LIMB_BITS) 8000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
+BUILT_SOURCES += trialdivtab.h
+
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+DISTCLEANFILES += gen-trialdivtab$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-trialdivtab.c
+
+gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
+
+
+
+
+mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
+       ./gen-psqr $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
+BUILT_SOURCES += mpn/perfsqr.h
+
+gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-psqr$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-psqr$(U_FOR_BUILD).c -o gen-psqr$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+DISTCLEANFILES += gen-psqr$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-psqr.c
+
+gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-psqr.c; then echo $(srcdir)/gen-psqr.c; else echo gen-psqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-psqr_.c || rm -f gen-psqr_.c
+
+
+# Avoid: CVS - cvs directories
+#        *~  - emacs backups
+#        .#* - cvs merge originals
+#
+# *~ and .#* only occur when a whole directory without it's own Makefile.am
+# is distributed, like "doc" or the mpn cpu subdirectories.
+#
+dist-hook:
+       -find $(distdir) \( -name CVS -type d \) -o -name "*~" -o -name ".#*" \
+               | xargs rm -rf
+#      grep -F $(VERSION) $(srcdir)/Makefile.am \
+#              | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE) *$(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)"
+#      test -z "`sed -n 's/^# *[0-9]*\.[0-9]*\.[0-9]* *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\).*/A\1\nB\2\nC\3/p' $(srcdir)/Makefile.am | grep -v 'A6:3:3\|B3:5:0\|C4:7:1' | sort | uniq -d`"
diff --git a/Makefile.in b/Makefile.in

new file mode 100644 (file)

index 0000000..7728bf1
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,1452 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
+# 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = @ANSI2KNR@
+subdir = .
+DIST_COMMON = README $(am__configure_deps) $(am__include_HEADERS_DIST) \
+       $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+       $(srcdir)/config.in $(srcdir)/gmp-h.in $(srcdir)/mp-h.in \
+       $(top_srcdir)/configure AUTHORS COPYING COPYING.LIB ChangeLog \
+       INSTALL NEWS ansi2knr.1 ansi2knr.c config.guess config.sub \
+       install-sh ltmain.sh missing ylwrap
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+ configure.lineno config.status.lineno
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES = gmp.h mp.h gmp-mparam.h
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" \
+       "$(DESTDIR)$(includeexecdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 = $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
+       $(MPN_OBJECTS) $(PRINTF_OBJECTS) $(SCANF_OBJECTS)
+am_libgmp_la_OBJECTS = assert$U.lo compat$U.lo errno$U.lo \
+       extract-dbl$U.lo invalid$U.lo memory$U.lo mp_bpl$U.lo \
+       mp_clz_tab$U.lo mp_dv_tab$U.lo mp_minv_tab$U.lo \
+       mp_get_fns$U.lo mp_set_fns$U.lo rand$U.lo randclr$U.lo \
+       randdef$U.lo randiset$U.lo randlc2s$U.lo randlc2x$U.lo \
+       randmt$U.lo randmts$U.lo rands$U.lo randsd$U.lo randsdui$U.lo \
+       randbui$U.lo randmui$U.lo version$U.lo nextprime$U.lo
+libgmp_la_OBJECTS = $(am_libgmp_la_OBJECTS)
+libgmp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+       $(libgmp_la_LDFLAGS) $(LDFLAGS) -o $@
+am_libgmpxx_la_OBJECTS = dummy.lo
+libgmpxx_la_OBJECTS = $(am_libgmpxx_la_OBJECTS)
+libgmpxx_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+       $(CXXFLAGS) $(libgmpxx_la_LDFLAGS) $(LDFLAGS) -o $@
+@WANT_CXX_TRUE@am_libgmpxx_la_rpath = -rpath $(libdir)
+am__DEPENDENCIES_2 = $(srcdir)/libmp.sym $(MPBSD_OBJECTS) \
+       $(MPN_OBJECTS) mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo \
+       mpz/mul$U.lo mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo \
+       mpz/sub$U.lo mpz/tdiv_q$U.lo
+am_libmp_la_OBJECTS = assert$U.lo errno$U.lo memory$U.lo mp_bpl$U.lo \
+       mp_clz_tab$U.lo mp_dv_tab$U.lo mp_minv_tab$U.lo \
+       mp_get_fns$U.lo mp_set_fns$U.lo nextprime$U.lo
+libmp_la_OBJECTS = $(am_libmp_la_OBJECTS)
+libmp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(libmp_la_LDFLAGS) \
+       $(LDFLAGS) -o $@
+@WANT_MPBSD_TRUE@am_libmp_la_rpath = -rpath $(libdir)
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libgmp_la_SOURCES) $(EXTRA_libgmp_la_SOURCES) \
+       $(libgmpxx_la_SOURCES) $(libmp_la_SOURCES)
+DIST_SOURCES = $(libgmp_la_SOURCES) $(EXTRA_libgmp_la_SOURCES) \
+       $(libgmpxx_la_SOURCES) $(libmp_la_SOURCES)
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+       html-recursive info-recursive install-data-recursive \
+       install-dvi-recursive install-exec-recursive \
+       install-html-recursive install-info-recursive \
+       install-pdf-recursive install-ps-recursive install-recursive \
+       installcheck-recursive installdirs-recursive pdf-recursive \
+       ps-recursive uninstall-recursive
+am__include_HEADERS_DIST = gmpxx.h
+HEADERS = $(include_HEADERS) $(nodist_includeexec_HEADERS)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive        \
+  distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+       $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+       distdir dist dist-all distcheck
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+am__remove_distdir = \
+  { test ! -d "$(distdir)" \
+    || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+         && rm -fr "$(distdir)"; }; }
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+DIST_ARCHIVES = $(distdir).tar.gz
+GZIP_ENV = --best
+distuninstallcheck_listfiles = find . -type f -print
+distcleancheck_listfiles = find . -type f -print
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# The following options are the same as AM_INIT_AUTOMAKE in configure.in,
+# except no $(top_builddir) on ansi2knr.  That directory is wanted for the
+# Makefiles in subdirectories, but here we must omit it so automake gives
+# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
+#
+AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
+
+# Libtool -version-info for libgmp.la and libmp.la.  See "Versioning" in the
+# libtool manual.
+#
+#      CURRENT:REVISION:AGE
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+#    AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+#    CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp, libgmpxx and libmp, and only for releases.
+#
+#        GMP      -version-info
+#       release   libgmp  libgmpxx libmp
+#        2.0.x      -        -       -
+#        3.0      3:0:0      -     3:0:0
+#        3.0.1    3:1:0      -     3:0:0
+#        3.1      4:0:1      -     4:0:1
+#        3.1.1    4:1:1      -     4:1:1
+#        4.0      5:0:2    3:0:0   4:2:1
+#        4.0.1    5:1:2    3:1:0   4:3:1
+#        4.1      6:0:3    3:2:0   4:4:1
+#        4.1.1    6:1:3    3:3:0   4:5:1
+#        4.1.2    6:2:3    3:4:0   4:6:1
+#        4.1.3    6:3:3    3:5:0   4:7:1
+#        4.1.4    6:3:3    3:5:0   4:7:1       WRONG, same as 4.1.3!
+#        4.2      6:0:3    3:2:0   4:4:1       REALLY WRONG, same as 4.1!
+#        4.2.1    7:1:4    4:1:1   4:10:1      WRONG for libgmpxx
+#        4.2.2    7:2:4    4:2:0   4:11:1
+#        4.2.3    7:3:4    4:3:0   4:12:1
+#        4.2.4    7:4:4    4:4:0   4:13:1
+#        4.3.0    8:0:5    5:0:1   4:14:1
+#        4.3.1    8:1:5    5:1:1   4:15:1      WRONG Really used same as 4.3.0
+#        4.3.2    8:2:5    5:2:1   4:16:1
+#        5.0.0    9:0:6    6:0:2   4:20:1      Should have been 10:0:0
+#        5.0.1   10:1:0    6:1:2   4:21:1
+#        5.0.2   10:2:0    6:2:2   4:22:1
+#        5.0.3   10:3:0    6:3:2   4:23:1
+#        5.0.4   10:4:0    6:4:2   4:24:1
+#        5.0.5   10:5:0    6:5:2   4:25:1
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
+# packages of gmp 2.  Pretend gmp 2 was 2:0:0, so the interface changes for
+# gmp 3 mean 3:0:0 is right.
+#
+# We interpret "implementation changed" in item "1." above as meaning any
+# release, ie. the REVISION is incremented every time (if nothing else).
+# Even if we thought the code generated will be identical on all systems,
+# it's still good to get the shared library filename (like
+# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
+LIBGMP_LT_CURRENT = 10
+LIBGMP_LT_REVISION = 5
+LIBGMP_LT_AGE = 0
+LIBGMPXX_LT_CURRENT = 6
+LIBGMPXX_LT_REVISION = 5
+LIBGMPXX_LT_AGE = 2
+LIBMP_LT_CURRENT = 4
+LIBMP_LT_REVISION = 25
+LIBMP_LT_AGE = 1
+SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
+
+# The ansi2knr setups for the build programs are the same as the normal
+# automake ansi2knr rules, but using $(CC_FOR_BUILD) instead of $(CC).
+#
+# The "test -f" support for srcdir!=builddir is similar to the automake .c.o
+# etc rules, but with each foo.c explicitly, since $< is not portable
+# outside an inference rule.
+#
+# A quoted 'foo.c' is used with the "test -f"'s to avoid Sun make rewriting
+# it as part of its VPATH support.  See the autoconf manual "Limitations of
+# Make".
+#
+# Generated .h files which are used by gmp-impl.h are BUILT_SOURCES since
+# they must exist before anything can be compiled.
+#
+# Other generated .h files are also BUILT_SOURCES so as to get all the
+# build-system stuff over and done with at the start.  Also, dependencies on
+# the .h files are not properly expressed for the various objects that use
+# them.
+EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf \
+       gmpxx.h libmp.sym dumbmp.c gen-fac_ui.c gen-fib.c gen-bases.c \
+       gen-trialdivtab.c gen-psqr.c
+@WANT_CXX_TRUE@GMPXX_HEADERS_OPTION = gmpxx.h
+
+# gmp.h and mp.h are architecture dependent, mainly since they encode the
+# limb size used in libgmp.  For that reason they belong under $exec_prefix
+# not $prefix, strictly speaking.
+#
+# $exec_prefix/include is not in the default include path for gcc built to
+# the same $prefix and $exec_prefix, which might mean gmp.h is not found,
+# but anyone knowledgeable enough to be playing with exec_prefix will be able
+# to address that.
+#
+includeexecdir = $(exec_prefix)/include
+include_HEADERS = $(GMPXX_HEADERS_OPTION)
+nodist_includeexec_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION)
+lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION) $(MPBSD_LTLIBRARIES_OPTION)
+BUILT_SOURCES = gmp.h mp.h mpz/fac_ui.h fib_table.h mpn/fib_table.c \
+       mp_bases.h mpn/mp_bases.c trialdivtab.h mpn/perfsqr.h
+DISTCLEANFILES = $(BUILT_SOURCES) config.m4 @gmp_srclinks@ \
+       gen-fac_ui$(EXEEXT_FOR_BUILD) gen-fib$(EXEEXT_FOR_BUILD) \
+       gen-bases$(EXEEXT_FOR_BUILD) \
+       gen-trialdivtab$(EXEEXT_FOR_BUILD) gen-psqr$(EXEEXT_FOR_BUILD)
+
+# Tell gmp.h it's building gmp, not an application, used by windows DLL stuff.
+INCLUDES = -D__GMP_WITHIN_GMP
+MPF_OBJECTS = mpf/init$U.lo mpf/init2$U.lo mpf/inits$U.lo mpf/set$U.lo     \
+  mpf/set_ui$U.lo mpf/set_si$U.lo mpf/set_str$U.lo mpf/set_d$U.lo          \
+  mpf/set_z$U.lo mpf/iset$U.lo mpf/iset_ui$U.lo mpf/iset_si$U.lo           \
+  mpf/iset_str$U.lo mpf/iset_d$U.lo mpf/clear$U.lo mpf/clears$U.lo         \
+  mpf/get_str$U.lo mpf/dump$U.lo mpf/size$U.lo mpf/eq$U.lo mpf/reldiff$U.lo \
+  mpf/sqrt$U.lo mpf/random2$U.lo mpf/inp_str$U.lo mpf/out_str$U.lo         \
+  mpf/add$U.lo mpf/add_ui$U.lo mpf/sub$U.lo mpf/sub_ui$U.lo mpf/ui_sub$U.lo \
+  mpf/mul$U.lo mpf/mul_ui$U.lo mpf/div$U.lo mpf/div_ui$U.lo                \
+  mpf/cmp$U.lo mpf/cmp_d$U.lo mpf/cmp_ui$U.lo mpf/cmp_si$U.lo              \
+  mpf/mul_2exp$U.lo mpf/div_2exp$U.lo mpf/abs$U.lo mpf/neg$U.lo                    \
+  mpf/set_q$U.lo mpf/get_d$U.lo mpf/get_d_2exp$U.lo mpf/set_dfl_prec$U.lo   \
+  mpf/set_prc$U.lo mpf/set_prc_raw$U.lo mpf/get_dfl_prec$U.lo               \
+  mpf/get_prc$U.lo mpf/ui_div$U.lo mpf/sqrt_ui$U.lo                         \
+  mpf/ceilfloor$U.lo mpf/trunc$U.lo mpf/pow_ui$U.lo                        \
+  mpf/urandomb$U.lo mpf/swap$U.lo                                          \
+  mpf/fits_sint$U.lo mpf/fits_slong$U.lo mpf/fits_sshort$U.lo              \
+  mpf/fits_uint$U.lo mpf/fits_ulong$U.lo mpf/fits_ushort$U.lo              \
+  mpf/get_si$U.lo mpf/get_ui$U.lo                                          \
+  mpf/int_p$U.lo
+
+MPZ_OBJECTS = mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo                        \
+  mpz/aorsmul$U.lo mpz/aorsmul_i$U.lo mpz/and$U.lo mpz/array_init$U.lo \
+  mpz/bin_ui$U.lo mpz/bin_uiui$U.lo                                    \
+  mpz/cdiv_q$U.lo mpz/cdiv_q_ui$U.lo                                   \
+  mpz/cdiv_qr$U.lo mpz/cdiv_qr_ui$U.lo                                 \
+  mpz/cdiv_r$U.lo mpz/cdiv_r_ui$U.lo mpz/cdiv_ui$U.lo                  \
+  mpz/cfdiv_q_2exp$U.lo mpz/cfdiv_r_2exp$U.lo                          \
+  mpz/clear$U.lo mpz/clears$U.lo mpz/clrbit$U.lo                       \
+  mpz/cmp$U.lo mpz/cmp_d$U.lo mpz/cmp_si$U.lo mpz/cmp_ui$U.lo          \
+  mpz/cmpabs$U.lo mpz/cmpabs_d$U.lo mpz/cmpabs_ui$U.lo                 \
+  mpz/com$U.lo mpz/combit$U.lo                                         \
+  mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo                    \
+  mpz/divexact$U.lo mpz/divegcd$U.lo mpz/dive_ui$U.lo                  \
+  mpz/divis$U.lo mpz/divis_ui$U.lo mpz/divis_2exp$U.lo mpz/dump$U.lo   \
+  mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo                      \
+  mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo              \
+  mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo                                   \
+  mpz/fdiv_ui$U.lo mpz/fib_ui$U.lo mpz/fib2_ui$U.lo mpz/fits_sint$U.lo \
+  mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_uint$U.lo          \
+  mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo                        \
+  mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo   \
+  mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_ui$U.lo mpz/getlimbn$U.lo   \
+  mpz/hamdist$U.lo                                                     \
+  mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo          \
+  mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo                    \
+  mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo          \
+  mpz/iset_str$U.lo mpz/iset_ui$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo   \
+  mpz/kronuz$U.lo mpz/kronzs$U.lo mpz/kronzu$U.lo                      \
+  mpz/lcm$U.lo mpz/lcm_ui$U.lo mpz/lucnum_ui$U.lo mpz/lucnum2_ui$U.lo  \
+  mpz/millerrabin$U.lo mpz/mod$U.lo mpz/mul$U.lo mpz/mul_2exp$U.lo     \
+  mpz/mul_si$U.lo mpz/mul_ui$U.lo                                      \
+  mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/nextprime$U.lo                    \
+  mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo  \
+  mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_sec$U.lo    \
+  mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo  \
+  mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo                   \
+  mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo      \
+  mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo            \
+  mpz/set_q$U.lo mpz/set_si$U.lo mpz/set_str$U.lo mpz/set_ui$U.lo      \
+  mpz/setbit$U.lo                                                      \
+  mpz/size$U.lo mpz/sizeinbase$U.lo mpz/sqrt$U.lo                      \
+  mpz/sqrtrem$U.lo mpz/sub$U.lo mpz/sub_ui$U.lo mpz/swap$U.lo          \
+  mpz/tdiv_ui$U.lo mpz/tdiv_q$U.lo mpz/tdiv_q_2exp$U.lo                        \
+  mpz/tdiv_q_ui$U.lo mpz/tdiv_qr$U.lo mpz/tdiv_qr_ui$U.lo              \
+  mpz/tdiv_r$U.lo mpz/tdiv_r_2exp$U.lo mpz/tdiv_r_ui$U.lo              \
+  mpz/tstbit$U.lo mpz/ui_pow_ui$U.lo mpz/ui_sub$U.lo mpz/urandomb$U.lo \
+  mpz/urandomm$U.lo mpz/xor$U.lo
+
+MPQ_OBJECTS = mpq/abs$U.lo mpq/aors$U.lo                               \
+  mpq/canonicalize$U.lo mpq/clear$U.lo mpq/clears$U.lo                 \
+  mpq/cmp$U.lo mpq/cmp_si$U.lo mpq/cmp_ui$U.lo mpq/div$U.lo            \
+  mpq/get_d$U.lo mpq/get_den$U.lo mpq/get_num$U.lo mpq/get_str$U.lo    \
+  mpq/init$U.lo mpq/inits$U.lo mpq/inp_str$U.lo mpq/inv$U.lo           \
+  mpq/md_2exp$U.lo mpq/mul$U.lo mpq/neg$U.lo mpq/out_str$U.lo          \
+  mpq/set$U.lo mpq/set_den$U.lo mpq/set_num$U.lo                       \
+  mpq/set_si$U.lo mpq/set_str$U.lo mpq/set_ui$U.lo                     \
+  mpq/equal$U.lo mpq/set_z$U.lo mpq/set_d$U.lo                         \
+  mpq/set_f$U.lo mpq/swap$U.lo
+
+MPN_OBJECTS = mpn/fib_table$U.lo mpn/mp_bases$U.lo
+PRINTF_OBJECTS = \
+  printf/asprintf$U.lo printf/asprntffuns$U.lo                         \
+  printf/doprnt$U.lo printf/doprntf$U.lo printf/doprnti$U.lo           \
+  printf/fprintf$U.lo                                                  \
+  printf/obprintf$U.lo printf/obvprintf$U.lo printf/obprntffuns$U.lo   \
+  printf/printf$U.lo printf/printffuns$U.lo                            \
+  printf/snprintf$U.lo printf/snprntffuns$U.lo                         \
+  printf/sprintf$U.lo printf/sprintffuns$U.lo                          \
+  printf/vasprintf$U.lo printf/vfprintf$U.lo printf/vprintf$U.lo       \
+  printf/vsnprintf$U.lo printf/vsprintf$U.lo                           \
+  printf/repl-vsnprintf$U.lo
+
+SCANF_OBJECTS = \
+  scanf/doscan$U.lo scanf/fscanf$U.lo scanf/fscanffuns$U.lo    \
+  scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo     \
+  scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo
+
+
+# no $U for C++ files
+CXX_OBJECTS = \
+  cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo  \
+  cxx/osdoprnti.lo cxx/osfuns.lo                                       \
+  cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo
+
+MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo       \
+  mpbsd/powm$U.lo mpbsd/sub$U.lo mpbsd/cmp$U.lo mpbsd/mfree$U.lo       \
+  mpbsd/mtox$U.lo mpbsd/realloc$U.lo mpbsd/gcd$U.lo mpbsd/itom$U.lo    \
+  mpbsd/min$U.lo mpbsd/mul$U.lo mpbsd/mout$U.lo mpbsd/rpow$U.lo                \
+  mpbsd/sdiv$U.lo mpbsd/sqrtrem$U.lo mpbsd/xtom$U.lo
+
+
+# In libtool 1.5 it doesn't work to build libgmp.la from the convenience
+# libraries like mpz/libmpz.la.  Or rather it works, but it ends up putting
+# PIC objects into libgmp.a if shared and static are both built.  (The PIC
+# objects go into mpz/.libs/libmpz.a, and thence into .libs/libgmp.a.)
+#
+# For now the big lists of objects above are used.  Something like mpz/*.lo
+# would probably work, but might risk missing something out or getting
+# something extra.  The source files for each .lo are listed in the
+# Makefile.am's in the subdirectories.
+#
+# Currently, for libgmp, unlike libmp below, we're not using
+# -export-symbols, since the tune and speed programs, and perhaps some of
+# the test programs, want to access undocumented symbols.
+libgmp_la_SOURCES = gmp-impl.h longlong.h randmt.h                     \
+  assert.c compat.c errno.c extract-dbl.c invalid.c memory.c           \
+  mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \
+  rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
+  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c version.c  \
+  nextprime.c
+
+EXTRA_libgmp_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c
+libgmp_la_DEPENDENCIES = @TAL_OBJECT@          \
+  $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
+  $(MPN_OBJECTS) @mpn_objs_in_libgmp@          \
+  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS)
+
+libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)
+libgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \
+  -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
+
+
+# We need at least one .cc file in $(libgmpxx_la_SOURCES) so automake will
+# use $(CXXLINK) rather than the plain C $(LINK).  cxx/dummy.cc is that
+# file.
+@WANT_CXX_TRUE@GMPXX_LTLIBRARIES_OPTION = libgmpxx.la
+libgmpxx_la_SOURCES = cxx/dummy.cc
+libgmpxx_la_DEPENDENCIES = $(CXX_OBJECTS) libgmp.la
+libgmpxx_la_LIBADD = $(libgmpxx_la_DEPENDENCIES)
+libgmpxx_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMPXX_LDFLAGS) \
+  -version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)
+
+
+# The selected mpz objects here support mpz/powm.c (built as mpbsd/powm.lo)
+# and can probably be removed when that switches to an mpn implementation.
+# (Apart from mpz/n_pow_ui$U.lo, which supports mpbsd/rpow.c)
+@WANT_MPBSD_TRUE@MPBSD_HEADERS_OPTION = mp.h
+@WANT_MPBSD_TRUE@MPBSD_LTLIBRARIES_OPTION = libmp.la
+libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c     \
+  mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c nextprime.c
+
+libmp_la_DEPENDENCIES = $(srcdir)/libmp.sym                            \
+  @TAL_OBJECT@ $(MPBSD_OBJECTS) $(MPN_OBJECTS) @mpn_objs_in_libmp@     \
+  mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo mpz/mul$U.lo            \
+  mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo mpz/sub$U.lo         \
+  mpz/tdiv_q$U.lo
+
+libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
+libmp_la_LDFLAGS = $(GMP_LDFLAGS) \
+  -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) \
+  -export-symbols $(srcdir)/libmp.sym
+
+all: $(BUILT_SOURCES) config.h
+       $(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .cc .lo .o .obj
+am--refresh:
+       @:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             echo ' cd $(srcdir) && $(AUTOMAKE) --gnu --ignore-deps'; \
+             $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu --ignore-deps \
+               && exit 0; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           echo ' $(SHELL) ./config.status'; \
+           $(SHELL) ./config.status;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       $(SHELL) ./config.status --recheck
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       $(am__cd) $(srcdir) && $(AUTOCONF)
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+$(am__aclocal_m4_deps):
+
+config.h: stamp-h1
+       @if test ! -f $@; then \
+         rm -f stamp-h1; \
+         $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
+       else :; fi
+
+stamp-h1: $(srcdir)/config.in $(top_builddir)/config.status
+       @rm -f stamp-h1
+       cd $(top_builddir) && $(SHELL) ./config.status config.h
+$(srcdir)/config.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) 
+       ($(am__cd) $(top_srcdir) && $(AUTOHEADER))
+       rm -f stamp-h1
+       touch $@
+
+distclean-hdr:
+       -rm -f config.h stamp-h1
+gmp.h: $(top_builddir)/config.status $(srcdir)/gmp-h.in
+       cd $(top_builddir) && $(SHELL) ./config.status $@
+mp.h: $(top_builddir)/config.status $(srcdir)/mp-h.in
+       cd $(top_builddir) && $(SHELL) ./config.status $@
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+       @$(NORMAL_INSTALL)
+       test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+       @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+       list2=; for p in $$list; do \
+         if test -f $$p; then \
+           list2="$$list2 $$p"; \
+         else :; fi; \
+       done; \
+       test -z "$$list2" || { \
+         echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+         $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+       }
+
+uninstall-libLTLIBRARIES:
+       @$(NORMAL_UNINSTALL)
+       @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+       for p in $$list; do \
+         $(am__strip_dir) \
+         echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+         $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+       done
+
+clean-libLTLIBRARIES:
+       -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+       @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES) 
+       $(libgmp_la_LINK) -rpath $(libdir) $(libgmp_la_OBJECTS) $(libgmp_la_LIBADD) $(LIBS)
+libgmpxx.la: $(libgmpxx_la_OBJECTS) $(libgmpxx_la_DEPENDENCIES) 
+       $(libgmpxx_la_LINK) $(am_libgmpxx_la_rpath) $(libgmpxx_la_OBJECTS) $(libgmpxx_la_LIBADD) $(LIBS)
+libmp.la: $(libmp_la_OBJECTS) $(libmp_la_DEPENDENCIES) 
+       $(libmp_la_LINK) $(am_libmp_la_rpath) $(libmp_la_OBJECTS) $(libmp_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+./ansi2knr: ansi2knr.$(OBJEXT)
+       $(LINK) ansi2knr.$(OBJEXT) $(LIBS)
+ansi2knr.$(OBJEXT): $(CONFIG_HEADER)
+
+clean-krextra:
+       -rm -f ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+assert_.c: assert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/assert.c; then echo $(srcdir)/assert.c; else echo assert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+compat_.c: compat.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/compat.c; then echo $(srcdir)/compat.c; else echo compat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+errno_.c: errno.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/errno.c; then echo $(srcdir)/errno.c; else echo errno.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+extract-dbl_.c: extract-dbl.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/extract-dbl.c; then echo $(srcdir)/extract-dbl.c; else echo extract-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invalid_.c: invalid.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invalid.c; then echo $(srcdir)/invalid.c; else echo invalid.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+memory_.c: memory.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_bpl_.c: mp_bpl.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bpl.c; then echo $(srcdir)/mp_bpl.c; else echo mp_bpl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_clz_tab_.c: mp_clz_tab.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_clz_tab.c; then echo $(srcdir)/mp_clz_tab.c; else echo mp_clz_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_dv_tab_.c: mp_dv_tab.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_dv_tab.c; then echo $(srcdir)/mp_dv_tab.c; else echo mp_dv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_get_fns_.c: mp_get_fns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_get_fns.c; then echo $(srcdir)/mp_get_fns.c; else echo mp_get_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_minv_tab_.c: mp_minv_tab.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_minv_tab.c; then echo $(srcdir)/mp_minv_tab.c; else echo mp_minv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_set_fns_.c: mp_set_fns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_set_fns.c; then echo $(srcdir)/mp_set_fns.c; else echo mp_set_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nextprime_.c: nextprime.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nextprime.c; then echo $(srcdir)/nextprime.c; else echo nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rand_.c: rand.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rand.c; then echo $(srcdir)/rand.c; else echo rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randbui_.c: randbui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randbui.c; then echo $(srcdir)/randbui.c; else echo randbui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randclr_.c: randclr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randclr.c; then echo $(srcdir)/randclr.c; else echo randclr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randdef_.c: randdef.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randdef.c; then echo $(srcdir)/randdef.c; else echo randdef.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randiset_.c: randiset.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randiset.c; then echo $(srcdir)/randiset.c; else echo randiset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randlc2s_.c: randlc2s.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2s.c; then echo $(srcdir)/randlc2s.c; else echo randlc2s.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randlc2x_.c: randlc2x.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2x.c; then echo $(srcdir)/randlc2x.c; else echo randlc2x.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randmt_.c: randmt.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmt.c; then echo $(srcdir)/randmt.c; else echo randmt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randmts_.c: randmts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmts.c; then echo $(srcdir)/randmts.c; else echo randmts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randmui_.c: randmui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmui.c; then echo $(srcdir)/randmui.c; else echo randmui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rands_.c: rands.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rands.c; then echo $(srcdir)/rands.c; else echo rands.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randsd_.c: randsd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsd.c; then echo $(srcdir)/randsd.c; else echo randsd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+randsdui_.c: randsdui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsdui.c; then echo $(srcdir)/randsdui.c; else echo randsdui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tal-debug_.c: tal-debug.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-debug.c; then echo $(srcdir)/tal-debug.c; else echo tal-debug.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tal-notreent_.c: tal-notreent.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-notreent.c; then echo $(srcdir)/tal-notreent.c; else echo tal-notreent.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tal-reent_.c: tal-reent.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-reent.c; then echo $(srcdir)/tal-reent.c; else echo tal-reent.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+version_.c: version.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+assert_.$(OBJEXT) assert_.lo compat_.$(OBJEXT) compat_.lo \
+errno_.$(OBJEXT) errno_.lo extract-dbl_.$(OBJEXT) extract-dbl_.lo \
+invalid_.$(OBJEXT) invalid_.lo memory_.$(OBJEXT) memory_.lo \
+mp_bpl_.$(OBJEXT) mp_bpl_.lo mp_clz_tab_.$(OBJEXT) mp_clz_tab_.lo \
+mp_dv_tab_.$(OBJEXT) mp_dv_tab_.lo mp_get_fns_.$(OBJEXT) \
+mp_get_fns_.lo mp_minv_tab_.$(OBJEXT) mp_minv_tab_.lo \
+mp_set_fns_.$(OBJEXT) mp_set_fns_.lo nextprime_.$(OBJEXT) \
+nextprime_.lo rand_.$(OBJEXT) rand_.lo randbui_.$(OBJEXT) randbui_.lo \
+randclr_.$(OBJEXT) randclr_.lo randdef_.$(OBJEXT) randdef_.lo \
+randiset_.$(OBJEXT) randiset_.lo randlc2s_.$(OBJEXT) randlc2s_.lo \
+randlc2x_.$(OBJEXT) randlc2x_.lo randmt_.$(OBJEXT) randmt_.lo \
+randmts_.$(OBJEXT) randmts_.lo randmui_.$(OBJEXT) randmui_.lo \
+rands_.$(OBJEXT) rands_.lo randsd_.$(OBJEXT) randsd_.lo \
+randsdui_.$(OBJEXT) randsdui_.lo tal-debug_.$(OBJEXT) tal-debug_.lo \
+tal-notreent_.$(OBJEXT) tal-notreent_.lo tal-reent_.$(OBJEXT) \
+tal-reent_.lo version_.$(OBJEXT) version_.lo : $(ANSI2KNR)
+
+.cc.o:
+       $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+       $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+       $(LTCXXCOMPILE) -c -o $@ $<
+
+dummy.lo: cxx/dummy.cc
+       $(LIBTOOL)  --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o dummy.lo `test -f 'cxx/dummy.cc' || echo '$(srcdir)/'`cxx/dummy.cc
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+distclean-libtool:
+       -rm -f libtool config.lt
+install-includeHEADERS: $(include_HEADERS)
+       @$(NORMAL_INSTALL)
+       test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
+       @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+       for p in $$list; do \
+         if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+         echo "$$d$$p"; \
+       done | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+         $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+       done
+
+uninstall-includeHEADERS:
+       @$(NORMAL_UNINSTALL)
+       @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+       files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+       test -n "$$files" || exit 0; \
+       echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
+       cd "$(DESTDIR)$(includedir)" && rm -f $$files
+install-nodist_includeexecHEADERS: $(nodist_includeexec_HEADERS)
+       @$(NORMAL_INSTALL)
+       test -z "$(includeexecdir)" || $(MKDIR_P) "$(DESTDIR)$(includeexecdir)"
+       @list='$(nodist_includeexec_HEADERS)'; test -n "$(includeexecdir)" || list=; \
+       for p in $$list; do \
+         if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+         echo "$$d$$p"; \
+       done | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includeexecdir)'"; \
+         $(INSTALL_HEADER) $$files "$(DESTDIR)$(includeexecdir)" || exit $$?; \
+       done
+
+uninstall-nodist_includeexecHEADERS:
+       @$(NORMAL_UNINSTALL)
+       @list='$(nodist_includeexec_HEADERS)'; test -n "$(includeexecdir)" || list=; \
+       files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+       test -n "$$files" || exit 0; \
+       echo " ( cd '$(DESTDIR)$(includeexecdir)' && rm -f" $$files ")"; \
+       cd "$(DESTDIR)$(includeexecdir)" && rm -f $$files
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+       @fail= failcom='exit 1'; \
+       for f in x $$MAKEFLAGS; do \
+         case $$f in \
+           *=* | --[!k]*);; \
+           *k*) failcom='fail=yes';; \
+         esac; \
+       done; \
+       dot_seen=no; \
+       target=`echo $@ | sed s/-recursive//`; \
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         echo "Making $$target in $$subdir"; \
+         if test "$$subdir" = "."; then \
+           dot_seen=yes; \
+           local_target="$$target-am"; \
+         else \
+           local_target="$$target"; \
+         fi; \
+         ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+         || eval $$failcom; \
+       done; \
+       if test "$$dot_seen" = "no"; then \
+         $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+       fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+       @fail= failcom='exit 1'; \
+       for f in x $$MAKEFLAGS; do \
+         case $$f in \
+           *=* | --[!k]*);; \
+           *k*) failcom='fail=yes';; \
+         esac; \
+       done; \
+       dot_seen=no; \
+       case "$@" in \
+         distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+         *) list='$(SUBDIRS)' ;; \
+       esac; \
+       rev=''; for subdir in $$list; do \
+         if test "$$subdir" = "."; then :; else \
+           rev="$$subdir $$rev"; \
+         fi; \
+       done; \
+       rev="$$rev ."; \
+       target=`echo $@ | sed s/-recursive//`; \
+       for subdir in $$rev; do \
+         echo "Making $$target in $$subdir"; \
+         if test "$$subdir" = "."; then \
+           local_target="$$target-am"; \
+         else \
+           local_target="$$target"; \
+         fi; \
+         ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+         || eval $$failcom; \
+       done && test -z "$$fail"
+tags-recursive:
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+       done
+ctags-recursive:
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+       done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) config.in $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+         include_option=--etags-include; \
+         empty_fix=.; \
+       else \
+         include_option=--include; \
+         empty_fix=; \
+       fi; \
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           test ! -f $$subdir/TAGS || \
+             set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+         fi; \
+       done; \
+       list='$(SOURCES) $(HEADERS) config.in $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.in $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS) config.in $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       $(am__remove_distdir)
+       test -d "$(distdir)" || mkdir "$(distdir)"
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           test -d "$(distdir)/$$subdir" \
+           || $(MKDIR_P) "$(distdir)/$$subdir" \
+           || exit 1; \
+         fi; \
+       done
+       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+           $(am__relativize); \
+           new_distdir=$$reldir; \
+           dir1=$$subdir; dir2="$(top_distdir)"; \
+           $(am__relativize); \
+           new_top_distdir=$$reldir; \
+           echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+           echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+           ($(am__cd) $$subdir && \
+             $(MAKE) $(AM_MAKEFLAGS) \
+               top_distdir="$$new_top_distdir" \
+               distdir="$$new_distdir" \
+               am__remove_distdir=: \
+               am__skip_length_check=: \
+               am__skip_mode_fix=: \
+               distdir) \
+             || exit 1; \
+         fi; \
+       done
+       $(MAKE) $(AM_MAKEFLAGS) \
+         top_distdir="$(top_distdir)" distdir="$(distdir)" \
+         dist-hook
+       -test -n "$(am__skip_mode_fix)" \
+       || find "$(distdir)" -type d ! -perm -755 \
+               -exec chmod u+rwx,go+rx {} \; -o \
+         ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+         ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+         ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
+       || chmod -R a+r "$(distdir)"
+dist-gzip: distdir
+       tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
+       $(am__remove_distdir)
+
+dist-bzip2: distdir
+       tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
+       $(am__remove_distdir)
+
+dist-lzma: distdir
+       tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma
+       $(am__remove_distdir)
+
+dist-xz: distdir
+       tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
+       $(am__remove_distdir)
+
+dist-tarZ: distdir
+       tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
+       $(am__remove_distdir)
+
+dist-shar: distdir
+       shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
+       $(am__remove_distdir)
+
+dist-zip: distdir
+       -rm -f $(distdir).zip
+       zip -rq $(distdir).zip $(distdir)
+       $(am__remove_distdir)
+
+dist dist-all: distdir
+       tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
+       $(am__remove_distdir)
+
+# This target untars the dist file and tries a VPATH configuration.  Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+       case '$(DIST_ARCHIVES)' in \
+       *.tar.gz*) \
+         GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
+       *.tar.bz2*) \
+         bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
+       *.tar.lzma*) \
+         lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
+       *.tar.xz*) \
+         xz -dc $(distdir).tar.xz | $(am__untar) ;;\
+       *.tar.Z*) \
+         uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
+       *.shar.gz*) \
+         GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
+       *.zip*) \
+         unzip $(distdir).zip ;;\
+       esac
+       chmod -R a-w $(distdir); chmod a+w $(distdir)
+       mkdir $(distdir)/_build
+       mkdir $(distdir)/_inst
+       chmod a-w $(distdir)
+       test -d $(distdir)/_build || exit 0; \
+       dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
+         && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
+         && am__cwd=`pwd` \
+         && $(am__cd) $(distdir)/_build \
+         && ../configure --srcdir=.. --prefix="$$dc_install_base" \
+           $(DISTCHECK_CONFIGURE_FLAGS) \
+         && $(MAKE) $(AM_MAKEFLAGS) \
+         && $(MAKE) $(AM_MAKEFLAGS) dvi \
+         && $(MAKE) $(AM_MAKEFLAGS) check \
+         && $(MAKE) $(AM_MAKEFLAGS) install \
+         && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+         && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+         && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
+               distuninstallcheck \
+         && chmod -R a-w "$$dc_install_base" \
+         && ({ \
+              (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
+              && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
+              && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
+              && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
+                   distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
+             } || { rm -rf "$$dc_destdir"; exit 1; }) \
+         && rm -rf "$$dc_destdir" \
+         && $(MAKE) $(AM_MAKEFLAGS) dist \
+         && rm -rf $(DIST_ARCHIVES) \
+         && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
+         && cd "$$am__cwd" \
+         || exit 1
+       $(am__remove_distdir)
+       @(echo "$(distdir) archives ready for distribution: "; \
+         list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
+         sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
+distuninstallcheck:
+       @$(am__cd) '$(distuninstallcheck_dir)' \
+       && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
+          || { echo "ERROR: files left after uninstall:" ; \
+               if test -n "$(DESTDIR)"; then \
+                 echo "  (check DESTDIR support)"; \
+               fi ; \
+               $(distuninstallcheck_listfiles) ; \
+               exit 1; } >&2
+distcleancheck: distclean
+       @if test '$(srcdir)' = . ; then \
+         echo "ERROR: distcleancheck can only run from a VPATH build" ; \
+         exit 1 ; \
+       fi
+       @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
+         || { echo "ERROR: files left in build directory after distclean:" ; \
+              $(distcleancheck_listfiles) ; \
+              exit 1; } >&2
+check-am: all-am
+check: $(BUILT_SOURCES)
+       $(MAKE) $(AM_MAKEFLAGS) check-recursive
+all-am: Makefile $(ANSI2KNR) $(LTLIBRARIES) $(HEADERS) config.h
+installdirs: installdirs-recursive
+installdirs-am:
+       for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includeexecdir)"; do \
+         test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+       done
+install: $(BUILT_SOURCES)
+       $(MAKE) $(AM_MAKEFLAGS) install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+       -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+       -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+clean: clean-recursive
+
+clean-am: clean-generic clean-krextra clean-libLTLIBRARIES \
+       clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+       -rm -f $(am__CONFIG_DISTCLEAN_FILES)
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-hdr distclean-libtool distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am: install-includeHEADERS
+       @$(NORMAL_INSTALL)
+       $(MAKE) $(AM_MAKEFLAGS) install-data-hook
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES \
+       install-nodist_includeexecHEADERS
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+       -rm -f $(am__CONFIG_DISTCLEAN_FILES)
+       -rm -rf $(top_srcdir)/autom4te.cache
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES \
+       uninstall-nodist_includeexecHEADERS
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) all check \
+       ctags-recursive install install-am install-data-am \
+       install-strip tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+       all all-am am--refresh check check-am clean clean-generic \
+       clean-krextra clean-libLTLIBRARIES clean-libtool ctags \
+       ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-hook \
+       dist-lzma dist-shar dist-tarZ dist-xz dist-zip distcheck \
+       distclean distclean-compile distclean-generic distclean-hdr \
+       distclean-libtool distclean-tags distcleancheck distdir \
+       distuninstallcheck dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am \
+       install-data-hook install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am \
+       install-includeHEADERS install-info install-info-am \
+       install-libLTLIBRARIES install-man \
+       install-nodist_includeexecHEADERS install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs installdirs-am maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
+       uninstall-includeHEADERS uninstall-libLTLIBRARIES \
+       uninstall-nodist_includeexecHEADERS
+
+
+install-data-hook:
+       @echo ''
+       @echo '+-------------------------------------------------------------+'
+       @echo '| CAUTION:                                                    |'
+       @echo '|                                                             |'
+       @echo '| If you have not already run "make check", then we strongly  |'
+       @echo '| recommend you do so.                                        |'
+       @echo '|                                                             |'
+       @echo '| GMP has been carefully tested by its authors, but compilers |'
+       @echo '| are all too often released with serious bugs.  GMP tends to |'
+       @echo '| explore interesting corners in compilers and has hit bugs   |'
+       @echo '| on quite a few occasions.                                   |'
+       @echo '|                                                             |'
+       @echo '+-------------------------------------------------------------+'
+       @echo ''
+
+mpz/fac_ui.h: gen-fac_ui$(EXEEXT_FOR_BUILD)
+       ./gen-fac_ui $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpz/fac_ui.h || (rm -f mpz/fac_ui.h; exit 1)
+
+gen-fac_ui$(EXEEXT_FOR_BUILD): gen-fac_ui$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-fac_ui$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac_ui$(U_FOR_BUILD).c -o gen-fac_ui$(EXEEXT_FOR_BUILD)
+
+gen-fac_ui_.c: gen-fac_ui.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fac_ui.c; then echo $(srcdir)/gen-fac_ui.c; else echo gen-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fac_ui_.c || rm -f gen-fac_ui_.c
+
+fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
+       ./gen-fib header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fib_table.h || (rm -f fib_table.h; exit 1)
+
+mpn/fib_table.c: gen-fib$(EXEEXT_FOR_BUILD)
+       ./gen-fib table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/fib_table.c || (rm -f mpn/fib_table.c; exit 1)
+
+gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-fib$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fib$(U_FOR_BUILD).c -o gen-fib$(EXEEXT_FOR_BUILD)
+
+gen-fib_.c: gen-fib.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fib.c; then echo $(srcdir)/gen-fib.c; else echo gen-fib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fib_.c || rm -f gen-fib_.c
+
+mp_bases.h: gen-bases$(EXEEXT_FOR_BUILD)
+       ./gen-bases header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mp_bases.h || (rm -f mp_bases.h; exit 1)
+
+mpn/mp_bases.c: gen-bases$(EXEEXT_FOR_BUILD)
+       ./gen-bases table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/mp_bases.c || (rm -f mpn/mp_bases.c; exit 1)
+
+gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-bases$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-bases$(U_FOR_BUILD).c -o gen-bases$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+
+gen-bases_.c: gen-bases.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
+
+trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
+       ./gen-trialdivtab $(GMP_LIMB_BITS) 8000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
+
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+
+gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
+
+mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
+       ./gen-psqr $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
+
+gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c dumbmp.c
+       $(CC_FOR_BUILD) `test -f 'gen-psqr$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-psqr$(U_FOR_BUILD).c -o gen-psqr$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
+
+gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
+       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-psqr.c; then echo $(srcdir)/gen-psqr.c; else echo gen-psqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-psqr_.c || rm -f gen-psqr_.c
+
+# Avoid: CVS - cvs directories
+#        *~  - emacs backups
+#        .#* - cvs merge originals
+#
+# *~ and .#* only occur when a whole directory without it's own Makefile.am
+# is distributed, like "doc" or the mpn cpu subdirectories.
+#
+dist-hook:
+       -find $(distdir) \( -name CVS -type d \) -o -name "*~" -o -name ".#*" \
+               | xargs rm -rf
+#      grep -F $(VERSION) $(srcdir)/Makefile.am \
+#              | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE) *$(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)"
+#      test -z "`sed -n 's/^# *[0-9]*\.[0-9]*\.[0-9]* *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\).*/A\1\nB\2\nC\3/p' $(srcdir)/Makefile.am | grep -v 'A6:3:3\|B3:5:0\|C4:7:1' | sort | uniq -d`"
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/NEWS b/NEWS

new file mode 100644 (file)

index 0000000..b1a695c
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,672 @@
+Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+Verbatim copying and distribution of this entire article is permitted in any
+medium, provided this notice is preserved.
+
+
+Changes between GMP version 5.0.4 and 5.0.5
+
+  BUGS FIXED
+  * A bug causing AMD 11h processors to be treated like AMD 10h has been fixed.
+    The 11h processors do not correctly handle all 10h (aka K10) instructions,
+    and GMP's use of these instructions results in major miscomputations (not
+    as one would have hoped CPU traps of some 'illegal instruction' sort).
+
+  * A bug affecting recent Intel Sandy Bridge CPUs resulting in configuration
+    failures has been fixed,
+
+  SPEEDUPS
+  * None.
+
+  FEATURES
+  * A couple of tests added to the self-check suite.
+
+  MISC
+  * None.
+
+
+Changes between GMP version 5.0.3 and 5.0.4
+
+  BUGS FIXED
+  * Thresholds in mpn_powm_sec for both fat and non-fat builds are now used
+    safely, plugging a one-word buffer overrun introduced in the 5.0.3 release
+    (for non-fat) and a multi-word buffer overrun that existed since 5.0 (for
+    fat).  (We have not been able to provoke malign stack smashing in any of
+    the ~100 configurations explored by the GMP nightly builds, but the bug
+    should be assumed to be exploitable.)
+
+  * Two bugs in multiplication code causing incorrect computation with
+    extremely low probability have been fixed.
+
+  * A bug in the test suite causing buffer overruns during "make check",
+    sometimes leading to subsequent malloc crashes, has been fixed.
+
+  * Two bugs in the gcd code have been fixed.  They could lead to incorrect
+    results, but for uniformly distributed random operands, the likelihood for
+    that is infinitesimally small.  (There was also a third bug, but that was
+    an incorrect ASSERT, which furthermore was not enabled by default.)
+
+  * A bug affecting 32-bit PowerPC division has been fixed.  The bug caused
+    miscomputation for certain divisors in the range 2^32 ... 2^64-1 (about 1
+    in 2^30 of these).
+
+  SPEEDUPS
+  * None, except indirectly through recognition of new CPUs, and through better
+    tuning parameters.
+
+  FEATURES
+  * Some more tests added to the self-check suite.
+
+  * The AMD "Bulldozer" CPU is now recognised.
+
+  MISC
+  * None.
+
+
+Changes between GMP version 5.0.2 and 5.0.3
+
+  BUGS FIXED
+  * A few minor bugs related to portability fixed.
+
+  * A slight timing leak of the powm_sec functions have been sealed.  (This
+    leak could possibly be used to extract the most significant few bits of the
+    exponent.  "Few" here means at most 10.)
+
+  * The mpz_nextprime function now runs a safer number of pseudo-random prime
+    tests.
+
+  * A bug in division code possibly causing incorrect computation was fixed.
+
+  SPEEDUPS
+  * None, except indirectly through recognition of new CPUs, and through better
+    tuning parameters.
+
+  FEATURES
+  * New CPUs recognised.
+
+  * IBM S/390 are now supported in both 31/32-bit and 64-bit mode.  (We have
+    not been able to fully test this on any multilib machine, since IBM expired
+    our guest account a few days before our release.)
+
+  MISC
+  * None.
+
+
+Changes between GMP version 5.0.1 and 5.0.2
+
+  BUGS FIXED
+  * Many minor bugs related to portability fixed.
+
+  * The support for HPPA 2.0N now works, after an assembly bug fix.
+
+  * A test case type error has been fixed.  The symptom of this bug was
+    spurious 'make check' failures.
+
+  SPEEDUPS
+  * None, except indirectly through recognition of new CPUs.
+
+  FEATURES
+  * Fat builds are now supported for 64-bit x86 processors also under Darwin.
+
+  MISC
+  * None.
+
+
+Changes between GMP version 5.0.0 and 5.0.1
+
+  BUGS FIXED
+  * Fat builds fixed.
+
+  * Fixed crash for huge multiplies when old FFT_TABLE2 type of parameter
+    selection tables' sentinel was smaller than multiplied operands.
+
+  * The solib numbers now reflect the removal of the documented but preliminary
+    mpn_bdivmod function; we correctly flag incompatibility with GMP 4.3.  GMP
+    5.0.0 has this wrong, and should perhaps be uninstalled to avoid confusion.
+
+  SPEEDUPS
+  * Multiplication of large numbers has indirectly been sped up through better
+    FFT tuning and processor recognition.  Since many operations depend on
+    multiplication, there will be a general speedup.
+
+  FEATURES
+  * More Core i3, i5 an Core i7 processor models are recognised.
+
+  * Fixes and workarounds for Mac OS quirks should make this GMP version build
+    using many of the different versions of "Xcode".
+
+  MISC
+  * The amount of scratch memory needed for multiplication of huge numbers has
+    been reduced substantially (but is still larger than in GMP 4.3.)
+
+  * Likewise, the amount of scratch memory needed for division of large numbers
+    has been reduced substantially.
+
+  * The FFT tuning code of tune/tuneup.c has been completely rewritten, and
+    new, large FFT parameter selection tables are provided for many machines.
+
+  * Upgraded to the latest autoconf, automake, libtool.
+
+
+Changes between GMP version 4.3.X and 5.0.0
+
+  BUGS FIXED
+  * None (contains the same fixes as release 4.3.2).
+
+  SPEEDUPS
+  * Multiplication has been overhauled:
+    (1) Multiplication of larger same size operands has been improved with
+        the addition of two new Toom functions and a new internal function
+        mpn_mulmod_bnm1 (computing U * V mod (B^n-1), B being the word base.
+        This latter function is used for the largest products, waiting for a
+        better Schoenhage-Strassen U * V mod (B^n+1) implementation.
+    (2) Likewise for squaring.
+    (3) Multiplication of different size operands has been improved with the
+        addition of many new Toom function, and by selecting underlying
+        functions better from the main multiply functions.
+
+  * Division and mod have been overhauled:
+    (1) Plain "schoolbook" division is reimplemented using faster quotient
+        approximation.
+    (2) Division Q = N/D, R = N mod D where both the quotient and remainder
+        are needed now runs in time O(M(log(N))).  This is an improvement of
+        a factor log(log(N))
+    (3) Division where just the quotient is needed is now O(M(log(Q))) on
+        average.
+    (4) Modulo operations using Montgomery REDC form now take time O(M(n)).
+    (5) Exact division Q = N/D by means of mpz_divexact has been improved
+        for all sizes, and now runs in time O(M(log(N))).
+
+  * The function mpz_powm is now faster for all sizes.  Its complexity has
+    gone from O(M(n)log(n)m) to O(M(n)m) where n is the size of the modulo
+    argument and m is the size of the exponent.  It is also radically
+    faster for even modulus, since it now partially factors such modulus
+    and performs two smaller modexp operations, then uses CRT.
+
+  * The internal support for multiplication yielding just the lower n limbs
+    has been improved by using Mulders' algorithm.
+
+  * Computation of inverses, both plain 1/N and 1/N mod B^n have been
+    improved by using well-tuned Newton iterations, and wrap-around
+    multiplication using mpn_mulmod_bnm1.
+
+  * A new algorithm makes mpz_perfect_power_p asymptotically faster.
+
+  * The function mpz_remove uses a much faster algorithm, is better tuned,
+    and also benefits from the division improvements.
+
+  * Intel Atom and VIA Nano specific optimisations.
+
+  * Plus hundreds of smaller improvements and tweaks!
+
+  FEATURES
+  * New mpz function: mpz_powm_sec for side-channel quiet modexp
+    computations.
+
+  * New mpn functions: mpn_sqr, mpn_and_n, mpn_ior_n, mpn_xor_n, mpn_nand_n,
+    mpn_nior_n, mpn_xnor_n, mpn_andn_n, mpn_iorn_n, mpn_com, mpn_neg,
+    mpn_copyi, mpn_copyd, mpn_zero.
+
+  * The function mpn_tdiv_qr now allows certain argument overlap.
+
+  * Support for fat binaries for 64-bit x86 processors has been added.
+
+  * A new type, mp_bitcnt_t for bignum bit counts, has been introduced.
+
+  * Support for Windows64 through mingw64 has been added.
+
+  * The cofactors of mpz_gcdext and mpn_gcdext are now more strictly
+    normalised, returning to how GMP 4.2 worked.  (Note that also release
+    4.3.2 has this change.)
+
+  MISC
+  * The mpn_mul function should no longer be used for squaring,
+    instead use the new mpn_sqr.
+
+  * The algorithm selection has been improved, the number of thresholds have
+    more than doubled, and the tuning and use of existing thresholds have
+    been improved.
+
+  * The tune/speed program can measure many of new functions.
+
+  * The mpn_bdivmod function has been removed.  We do not consider this an
+    incompatible change, since the function was marked as preliminary.
+
+  * The testsuite has been enhanced in various ways.
+
+
+Changes between GMP version 4.3.1 and 4.3.2
+
+  Bugs:
+  * Fixed bug in mpf_eq.
+  * Fixed overflow issues in mpz_set_str, mpz_inp_str, mpf_set_str, and
+    mpf_get_str.
+  * Avoid unbounded stack allocation for unbalanced multiplication.
+  * Fixed bug in FFT multiplication.
+
+  Speedups:
+  * None, except that proper processor recognition helps affected processors.
+
+  Features:
+  * Recognise more "Core 2" processor variants.
+  * The cofactors of mpz_gcdext and mpn_gcdext are now more strictly
+    normalised, returning to how GMP 4.2 worked.
+
+
+Changes between GMP version 4.3.0 and 4.3.1
+
+  Bugs:
+  * Fixed bug in mpn_gcdext, affecting also mpz_gcdext and mpz_invert.
+    The bug could cause a cofactor to have a leading zero limb, which
+    could lead to crashes or miscomputation later on.
+  * Fixed some minor documentation issues.
+
+  Speedups:
+  * None.
+
+  Features:
+  * Workarounds for various issues with Mac OS X's build tools.
+  * Recognise more IBM "POWER" processor variants.
+
+
+Changes between GMP version 4.2.X and 4.3.0
+
+  Bugs:
+  * Fixed bug in mpz_perfect_power_p with recognition of negative perfect
+    powers that can be written both as an even and odd power.
+  * We might accidentally have added bugs since there is a large amount of
+    new code in this release.
+
+  Speedups:
+  * Vastly improved assembly code for x86-64 processors from AMD and Intel.
+  * Major improvements also for many other processor families, such as
+    Alpha, PowerPC, and Itanium.
+  * New sub-quadratic mpn_gcd and mpn_gcdext, as well as improved basecase
+    gcd code.
+  * The multiply FFT code has been slightly improved.
+  * Balanced multiplication now uses 4-way Toom in addition to schoolbook,
+    Karatsuba, 3-way Toom, and FFT.
+  * Unbalanced multiplication has been vastly improved.
+  * Improved schoolbook division by means of faster quotient approximation.
+  * Several new algorithms for division and mod by single limbs, giving
+    many-fold speedups.
+  * Improved nth root computations.
+  * The mpz_nextprime function uses sieving and is much faster.
+  * Countless minor tweaks.
+
+  Features:
+  * Updated support for fat binaries for x86_32 include current processors
+  * Lots of new mpn internal interfaces.  Some of them will become public
+    in a future GMP release.
+  * Support for the 32-bit ABI under x86-apple-darwin.
+  * x86 CPU recognition code should now default better for future
+    processors.
+  * The experimental nails feature does not work in this release, but
+    it might be re-enabled in the future.
+
+  Misc:
+  * The gmp_version variable now always contains three parts.  For this
+    release, it is "4.3.0".
+
+
+Changes between GMP version 4.2.3 and 4.2.4
+
+  Bugs:
+  * Fix bug with parsing exponent '+' sign in mpf.
+  * Fix an allocation bug in mpf_set_str, also affecting mpf_init_set_str, and
+    mpf_inp_str.
+
+  Speedups:
+  * None, except that proper processor recognition helps affected processors.
+
+  Features:
+  * Recognize new AMD processors.
+
+
+Changes between GMP version 4.2.2 and 4.2.3
+
+  Bugs:
+  * Fix x86 CPU recognition code to properly identify recent AMD and Intel
+    64-bit processors.
+  * The >> operator of the C++ wrapper gmpxx.h now does floor rounding, not
+    truncation.
+  * Inline semantics now follow the C99 standard, and works with recent GCC
+    releases.
+  * C++ bitwise logical operations work for more types.
+  * For C++, gmp.h now includes cstdio, improving compiler compatibility.
+  * Bases > 36 now work properly in mpf_set_str.
+
+  Speedups:
+  * None, except that proper processor recognition helps affected processors.
+
+  Features:
+  * The allocation functions now detect overflow of the mpz_t type.  This means
+    that overflow will now cause an abort, except when the allocation
+    computation itself overflows.  (Such overflow can probably only happen in
+    powering functions; we will detect powering overflow in the future.)
+
+
+Changes between GMP version 4.2.1 and 4.2.2
+
+  * License is now LGPL version 3.
+
+  Bugs:
+  * Shared library numbers corrected for libcxx.
+  * Fixed serious bug in gmpxx.h where a=a+b*c would generate garbage.
+    Note that this only affects C++ programs.
+  * Fix crash in mpz_set_d for arguments with large negative exponent.
+  * Fix 32-bit ABI bug with Itanium assembly for popcount and hamdist.
+  * Fix assembly syntax problem for powerpc-ibm-aix with AIX native assembler.
+  * Fix problems with x86 --enable-fat, where the compiler where told to
+    generate code for the build machine, not plain i386 code as it should.
+  * Improved recognition of powerpc systems wrt Altivec/VMX capability.
+  * Misc minor fixes, mainly workarounds for compiler/assembler bugs.
+
+  Speedups:
+  * "Core 2" and Pentium 4 processors, running in 64-bit mode will get a
+     slight boost as they are now specifically recognized.
+
+  Features:
+  * New support for x86_64-solaris
+  * New, rudimentary support for x86-apple-darwin and x86_64-apple-darwin.
+    (Please see http://gmplib.org/macos.html for more information.)
+
+
+Changes between GMP version 4.2 and 4.2.1
+
+  Bugs:
+  * Shared library numbers corrected.
+  * Broken support for 32-bit AIX fixed.
+  * Misc minor fixes.
+
+  Speedups:
+  * Exact division (mpz_divexact) now falls back to plain division for large
+    operands.
+
+  Features:
+  * Support for some new systems.
+
+
+Changes between GMP version 4.1.4 and 4.2
+
+  Bugs:
+  * Minor bug fixes and code generalizations.
+  * Expanded and improved test suite.
+
+  Speedups:
+  * Many minor optimizations, too many to mention here.
+  * Division now always subquadratic.
+  * Computation of n-factorial much faster.
+  * Added basic x86-64 assembly code.
+  * Floating-point output is now subquadratic for all bases.
+  * FFT multiply code now about 25% faster.
+  * Toom3 multiply code faster.
+
+  Features:
+  * Much improved configure.
+  * Workarounds for many more compiler bugs.
+  * Temporary allocations are now made on the stack only if small.
+  * New systems supported: HPPA-2.0 gcc, IA-64 HP-UX, PowerPC-64 Darwin,
+    Sparc64 GNU/Linux.
+  * New i386 fat binaries, selecting optimised code at runtime (--enable-fat).
+  * New build option: --enable-profiling=instrument.
+  * New memory function: mp_get_memory_functions.
+  * New Mersenne Twister random numbers: gmp_randinit_mt, also now used for
+    gmp_randinit_default.
+  * New random functions: gmp_randinit_set, gmp_urandomb_ui, gmp_urandomm_ui.
+  * New integer functions: mpz_combit, mpz_rootrem.
+  * gmp_printf etc new type "M" for mp_limb_t.
+  * gmp_scanf and friends now accept C99 hex floats.
+  * Numeric input and output can now be in bases up to 62.
+  * Comparisons mpz_cmp_d, mpz_cmpabs_d, mpf_cmp_d recognise infinities.
+  * Conversions mpz_get_d, mpq_get_d, mpf_get_d truncate towards zero,
+    previously their behaviour was unspecified.
+  * Fixes for overflow issues with operands >= 2^31 bits.
+
+  Caveats:
+  * mpfr is gone, and will from now on be released only separately.  Please see
+    www.mpfr.org.
+
+
+Changes between GMP version 4.1.3 and 4.1.4
+
+* Bug fix to FFT multiplication code (crash for huge operands).
+* Bug fix to mpf_sub (miscomputation).
+* Support for powerpc64-gnu-linux.
+* Better support for AMD64 in 32-bit mode.
+* Upwardly binary compatible with 4.1.3, 4.1.2, 4.1.1, 4.1, 4.0.1, 4.0,
+  and 3.x versions.
+
+
+Changes between GMP version 4.1.2 and 4.1.3
+
+* Bug fix for FFT multiplication code (miscomputation).
+* Bug fix to K6 assembly code for gcd.
+* Bug fix to IA-64 assembly code for population count.
+* Portability improvements, most notably functional AMD64 support.
+* mpz_export allows NULL for countp parameter.
+* Many minor bug fixes.
+* mpz_export allows NULL for countp parameter.
+* Upwardly binary compatible with 4.1.2, 4.1.1, 4.1, 4.0.1, 4.0, and 3.x
+  versions.
+
+
+Changes between GMP version 4.1.1 and 4.1.2
+
+* Bug fixes.
+
+
+Changes between GMP version 4.1 and 4.1.1
+
+* Bug fixes.
+* New systems supported: NetBSD and OpenBSD sparc64.
+
+
+Changes between GMP version 4.0.1 and 4.1
+
+* Bug fixes.
+* Speed improvements.
+* Upwardly binary compatible with 4.0, 4.0.1, and 3.x versions.
+* Asymptotically fast conversion to/from strings (mpz, mpq, mpn levels), but
+  also major speed improvements for tiny operands.
+* mpn_get_str parameter restrictions relaxed.
+* Major speed improvements for HPPA 2.0 systems.
+* Major speed improvements for UltraSPARC systems.
+* Major speed improvements for IA-64 systems (but still sub-optimal code).
+* Extended test suite.
+* mpfr is back, with many bug fixes and portability improvements.
+* New function: mpz_ui_sub.
+* New functions: mpz_export, mpz_import.
+* Optimization for nth root functions (mpz_root, mpz_perfect_power_p).
+* Optimization for extended gcd (mpz_gcdext, mpz_invert, mpn_gcdext).
+* Generalized low-level number format, reserving a `nails' part of each
+  limb.  (Please note that this is really experimental; some functions
+  are likely to compute garbage when nails are enabled.)
+* Nails-enabled Alpha 21264 assembly code, allowing up to 75% better
+  performance.  (Use --enable-nails=4 to enable it.)
+
+
+Changes between GMP version 4.0 and 4.0.1
+
+* Bug fixes.
+
+
+Changes between GMP version 3.1.1 and 4.0
+
+* Bug fixes.
+* Speed improvements.
+* Upwardly binary compatible with 3.x versions.
+* New CPU support: IA-64, Pentium 4.
+* Improved CPU support: 21264, Cray vector systems.
+* Support for all MIPS ABIs: o32, n32, 64.
+* New systems supported: Darwin, SCO, Windows DLLs.
+* New divide-and-conquer square root algorithm.
+* New algorithms chapter in the manual.
+* New malloc reentrant temporary memory method.
+* New C++ class interface by Gerardo Ballabio (beta).
+* Revamped configure, featuring ABI selection.
+* Speed improvements for mpz_powm and mpz_powm_ui (mainly affecting small
+  operands).
+* mpz_perfect_power_p now properly recognizes 0, 1, and negative perfect
+  powers.
+* mpz_hamdist now supports negative operands.
+* mpz_jacobi now accepts non-positive denominators.
+* mpz_powm now supports negative exponents.
+* mpn_mul_1 operand overlap requirements relaxed.
+* Float input and output uses locale specific decimal point where available.
+* New gmp_printf, gmp_scanf and related functions.
+* New division functions: mpz_cdiv_q_2exp, mpz_cdiv_r_2exp, mpz_divexact_ui.
+* New divisibility tests: mpz_divisible_p, mpz_divisible_ui_p,
+  mpz_divisible_2exp_p, mpz_congruent_p, mpz_congruent_ui_p,
+  mpz_congruent_2exp_p.
+* New Fibonacci function: mpz_fib2_ui.
+* New Lucas number functions: mpz_lucnum_ui, mpz_lucnum2_ui.
+* Other new integer functions: mpz_cmp_d, mpz_cmpabs_d, mpz_get_d_2exp,
+  mpz_init2, mpz_kronecker, mpz_lcm_ui, mpz_realloc2.
+* New rational I/O: mpq_get_str, mpq_inp_str, mpq_out_str, mpq_set_str.
+* Other new rational functions: mpq_abs, mpq_cmp_si, mpq_div_2exp,
+  mpq_mul_2exp, mpq_set_f.
+* New float tests: mpf_integer_p, mpf_fits_sint_p, mpf_fits_slong_p,
+  mpf_fits_sshort_p, mpf_fits_uint_p, mpf_fits_ulong_p, mpf_fits_ushort_p.
+* Other new float functions: mpf_cmp_d, mpf_get_default_prec, mpf_get_si,
+  mpf_get_ui, mpf_get_d_2exp.
+* New random functions: gmp_randinit_default, gmp_randinit_lc_2exp_size.
+* New demo expression string parser (see demos/expr).
+* New preliminary perl interface (see demos/perl).
+* Tuned algorithm thresholds for many more CPUs.
+
+
+Changes between GMP version 3.1 and 3.1.1
+
+* Bug fixes for division (rare), mpf_get_str, FFT, and miscellaneous minor
+  things.
+
+
+Changes between GMP version 3.0 and 3.1
+
+* Bug fixes.
+* Improved `make check' running more tests.
+* Tuned algorithm cutoff points for many machines.  This will improve speed for
+  a lot of operations, in some cases by a large amount.
+* Major speed improvements: Alpha 21264.
+* Some speed improvements: Cray vector computers, AMD K6 and Athlon, Intel P5
+  and Pentium Pro/II/III.
+* The mpf_get_prec function now works as it did in GMP 2.
+* New utilities for auto-tuning and speed measuring.
+* Multiplication now optionally uses FFT for very large operands.  (To enable
+  it, pass --enable-fft to configure.)
+* Support for new systems: Solaris running on x86, FreeBSD 5, HP-UX 11, Cray
+  vector computers, Rhapsody, Nextstep/Openstep, MacOS.
+* Support for shared libraries on 32-bit HPPA.
+* New integer functions: mpz_mul_si, mpz_odd_p, mpz_even_p.
+* New Kronecker symbol functions: mpz_kronecker_si, mpz_kronecker_ui,
+  mpz_si_kronecker, mpz_ui_kronecker.
+* New rational functions: mpq_out_str, mpq_swap.
+* New float functions: mpf_swap.
+* New mpn functions: mpn_divexact_by3c, mpn_tdiv_qr.
+* New EXPERIMENTAL function layer for accurate floating-point arithmetic, mpfr.
+  To try it, pass --enable-mpfr to configure.  See the mpfr subdirectory for
+  more information; it is not documented in the main GMP manual.
+
+
+Changes between GMP version 3.0 and 3.0.1
+
+* Memory leaks in gmp_randinit and mpz_probab_prime_p fixed.
+* Documentation for gmp_randinit fixed.  Misc documentation errors fixed.
+
+
+Changes between GMP version 2.0 and 3.0
+
+* Source level compatibility with past releases (except mpn_gcd).
+* Bug fixes.
+* Much improved speed thanks to both host independent and host dependent
+  optimizations.
+* Switch to autoconf/automake/libtool.
+* Support for building libgmp as a shared library.
+* Multiplication and squaring using 3-way Toom-Cook.
+* Division using the Burnikel-Ziegler method.
+* New functions computing binomial coefficients: mpz_bin_ui, mpz_bin_uiui.
+* New function computing Fibonacci numbers: mpz_fib_ui.
+* New random number generators: mpf_urandomb, mpz_rrandomb, mpz_urandomb,
+  mpz_urandomm, gmp_randclear, gmp_randinit, gmp_randinit_lc_2exp,
+  gmp_randseed, gmp_randseed_ui.
+* New function for quickly extracting limbs: mpz_getlimbn.
+* New functions performing integer size tests: mpz_fits_sint_p,
+  mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p, mpz_fits_ulong_p,
+  mpz_fits_ushort_p.
+* New mpf functions: mpf_ceil, mpf_floor, mpf_pow_ui, mpf_trunc.
+* New mpq function: mpq_set_d.
+* New mpz functions: mpz_addmul_ui, mpz_cmpabs, mpz_cmpabs_ui, mpz_lcm,
+  mpz_nextprime, mpz_perfect_power_p, mpz_remove, mpz_root, mpz_swap,
+  mpz_tdiv_ui, mpz_tstbit, mpz_xor.
+* New mpn function: mpn_divexact_by3.
+* New CPU support: DEC Alpha 21264, AMD K6 and Athlon, HPPA 2.0 and 64,
+  Intel Pentium Pro and Pentium-II/III, Sparc 64, PowerPC 64.
+* Almost 10 times faster mpz_invert and mpn_gcdext.
+* The interface of mpn_gcd has changed.
+* Better support for MIPS R4x000 and R5000 under Irix 6.
+* Improved support for SPARCv8 and SPARCv9 processors.
+
+
+Changes between GMP version 2.0 and 2.0.2
+
+* Many bug fixes.
+
+
+Changes between GMP version 1.3.2 and 2.0
+
+* Division routines in the mpz class have changed.  There are three classes of
+  functions, that rounds the quotient to -infinity, 0, and +infinity,
+  respectively.  The first class of functions have names that begin with
+  mpz_fdiv (f is short for floor), the second class' names begin with mpz_tdiv
+  (t is short for trunc), and the third class' names begin with mpz_cdiv (c is
+  short for ceil).
+
+  The old division routines beginning with mpz_m are similar to the new
+  mpz_fdiv, with the exception that some of the new functions return useful
+  values.
+
+  The old function names can still be used.  All the old functions names will
+  now do floor division, not trunc division as some of them used to.  This was
+  changed to make the functions more compatible with common mathematical
+  practice.
+
+  The mpz_mod and mpz_mod_ui functions now compute the mathematical mod
+  function.  I.e., the sign of the 2nd argument is ignored.
+
+* The mpq assignment functions do not canonicalize their results.  A new
+  function, mpq_canonicalize must be called by the user if the result is not
+  known to be canonical.
+* The mpn functions are now documented.  These functions are intended for
+  very time critical applications, or applications that need full control over
+  memory allocation.  Note that the mpn interface is irregular and hard to
+  use.
+* New functions for arbitrary precision floating point arithmetic.  Names
+  begin with `mpf_'.  Associated type mpf_t.
+* New and improved mpz functions, including much faster GCD, fast exact
+  division (mpz_divexact), bit scan (mpz_scan0 and mpz_scan1), and number
+  theoretical functions like Jacobi (mpz_jacobi) and multiplicative inverse
+  (mpz_invert).
+* New variable types (mpz_t and mpq_t) are available that makes syntax of
+  mpz and mpq calls nicer (no need for & before variables).  The MP_INT and
+  MP_RAT types are still available for compatibility.
+* Uses GNU configure.  This makes it possible to choose target architecture
+  and CPU variant, and to compile into a separate object directory.
+* Carefully optimized assembly for important inner loops.  Support for DEC
+  Alpha, Amd 29000, HPPA 1.0 and 1.1, Intel Pentium and generic x86, Intel
+  i960, Motorola MC68000, MC68020, MC88100, and MC88110, Motorola/IBM
+  PowerPC, National NS32000, IBM POWER, MIPS R3000, R4000, SPARCv7,
+  SuperSPARC, generic SPARCv8, and DEC VAX.  Some support also for ARM,
+  Clipper, IBM ROMP (RT), and Pyramid AP/XP.
+* Faster.  Thanks to the assembler code, new algorithms, and general tuning.
+  In particular, the speed on machines without GCC is improved.
+* Support for machines without alloca.
+* Now under the LGPL.
+
+INCOMPATIBILITIES BETWEEN GMP 1 AND GMP 2
+
+* mpq assignment functions do not canonicalize their results.
+* mpz division functions round differently.
+* mpz mod functions now really compute mod.
+* mpz_powm and mpz_powm_ui now really use mod for reduction.
diff --git a/README b/README

new file mode 100644 (file)

index 0000000..03ffb7f
--- /dev/null
+++ b/README
@@ -0,0 +1,105 @@
+Copyright 1991, 1996, 1999, 2000, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+
+                       THE GNU MP LIBRARY
+
+
+GNU MP is a library for arbitrary precision arithmetic, operating on signed
+integers, rational numbers, and floating point numbers.  It has a rich set of
+functions, and the functions have a regular interface.
+
+GNU MP is designed to be as fast as possible, both for small operands and huge
+operands.  The speed is achieved by using fullwords as the basic arithmetic
+type, by using fast algorithms, with carefully optimized assembly code for the
+most common inner loops for lots of CPUs, and by a general emphasis on speed
+(instead of simplicity or elegance).
+
+GNU MP is believed to be faster than any other similar library.  Its advantage
+increases with operand sizes for certain operations, since GNU MP in many
+cases has asymptotically faster algorithms.
+
+GNU MP is free software and may be freely copied on the terms contained in the
+files COPYING.LIB and COPYING (most of GNU MP is under the former, some under
+the latter).
+
+
+
+                       OVERVIEW OF GNU MP
+
+There are five classes of functions in GNU MP.
+
+ 1. Signed integer arithmetic functions (mpz).  These functions are intended
+    to be easy to use, with their regular interface.  The associated type is
+    `mpz_t'.
+
+ 2. Rational arithmetic functions (mpq).  For now, just a small set of
+    functions necessary for basic rational arithmetics.  The associated type
+    is `mpq_t'.
+
+ 3. Floating-point arithmetic functions (mpf).  If the C type `double'
+    doesn't give enough precision for your application, declare your
+    variables as `mpf_t' instead, set the precision to any number desired,
+    and call the functions in the mpf class for the arithmetic operations.
+
+ 4. Positive-integer, hard-to-use, very low overhead functions are in the
+    mpn class.  No memory management is performed.  The caller must ensure
+    enough space is available for the results.  The set of functions is not
+    regular, nor is the calling interface.  These functions accept input
+    arguments in the form of pairs consisting of a pointer to the least
+    significant word, and an integral size telling how many limbs (= words)
+    the pointer points to.
+
+    Almost all calculations, in the entire package, are made by calling these
+    low-level functions.
+
+ 5. Berkeley MP compatible functions.
+
+    To use these functions, include the file "mp.h".  You can test if you are
+    using the GNU version by testing if the symbol __GNU_MP__ is defined.
+
+For more information on how to use GNU MP, please refer to the documentation.
+It is composed from the file doc/gmp.texi, and can be displayed on the screen
+or printed.  How to do that, as well how to build the library, is described in
+the INSTALL file in this directory.
+
+
+
+                       REPORTING BUGS
+
+If you find a bug in the library, please make sure to tell us about it!
+
+You should first check the GNU MP web pages at http://gmplib.org/, under
+"Status of the current release".  There will be patches for all known serious
+bugs there.
+
+Report bugs to gmp-bugs@gmplib.org.  What information is needed in a useful bug
+report is described in the manual.  The same address can be used for suggesting
+modifications and enhancements.
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 78
+End:
diff --git a/acinclude.m4 b/acinclude.m4

new file mode 100644 (file)

index 0000000..dfb270f
--- /dev/null
+++ b/acinclude.m4
@@ -0,0 +1,3882 @@
+dnl  GMP specific autoconf macros
+
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, 2011 Free
+dnl  Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Some tests use, or must delete, the default compiler output.  The
+dnl  possible filenames are based on what autoconf looks for, namely
+dnl
+dnl    a.out - normal unix style
+dnl    b.out - i960 systems, including gcc there
+dnl    a.exe - djgpp
+dnl    a_out.exe - OpenVMS DEC C called via GNV wrapper (gnv.sourceforge.net)
+dnl    conftest.exe - various DOS compilers
+
+
+define(IA64_PATTERN,
+[[ia64*-*-* | itanium-*-* | itanium2-*-*]])
+
+dnl  Need to be careful not to match m6811, m6812, m68hc11 and m68hc12, all
+dnl  of which config.sub accepts.  (Though none of which are likely to work
+dnl  with GMP.)
+dnl
+define(M68K_PATTERN,
+[[m68k-*-* | m68[0-9][0-9][0-9]-*-*]])
+
+define(POWERPC64_PATTERN,
+[[powerpc64-*-* | powerpc64le-*-* | powerpc620-*-* | powerpc630-*-* | powerpc970-*-* | power[3-9]-*-*]])
+
+define(S390_PATTERN,
+[[s390-*-* | z900esa-*-* | z990esa-*-* | z9esa-*-* | z10esa-*-* | z196esa-*-*]])
+
+define(S390X_PATTERN,
+[[s390x-*-* | z900-*-* | z990-*-* | z9-*-* | z10-*-* | z196-*-*]])
+
+define(X86_PATTERN,
+[[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
+
+define(X86_64_PATTERN,
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*]])
+
+dnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
+dnl  ---------------------------------
+dnl  Emit code to set shell variable DSTVAR to the suffix for a fat binary
+dnl  routine from DIRECTORY.  DIRECTORY can be a shell expression like $foo
+dnl  etc.
+dnl
+dnl  The suffix is directory separators / or \ changed to underscores, and
+dnl  if there's more than one directory part, then the first is dropped.
+dnl
+dnl  For instance,
+dnl
+dnl      x86         ->  x86
+dnl      x86/k6      ->  k6
+dnl      x86/k6/mmx  ->  k6_mmx
+
+define(GMP_FAT_SUFFIX,
+[[$1=`echo $2 | sed -e '/\//s:^[^/]*/::' -e 's:[\\/]:_:g'`]])
+
+
+dnl  GMP_REMOVE_FROM_LIST(listvar,item)
+dnl  ----------------------------------
+dnl  Emit code to remove any occurrence of ITEM from $LISTVAR.  ITEM can be a
+dnl  shell expression like $foo if desired.
+
+define(GMP_REMOVE_FROM_LIST,
+[remove_from_list_tmp=
+for remove_from_list_i in $[][$1]; do
+  if test $remove_from_list_i = [$2]; then :;
+  else
+     remove_from_list_tmp="$remove_from_list_tmp $remove_from_list_i"
+  fi
+done
+[$1]=$remove_from_list_tmp
+])
+
+
+dnl  GMP_STRIP_PATH(subdir)
+dnl  ----------------------
+dnl  Strip entries */subdir from $path and $fat_path.
+
+define(GMP_STRIP_PATH,
+[GMP_STRIP_PATH_VAR(path, [$1])
+GMP_STRIP_PATH_VAR(fat_path, [$1])
+])
+
+define(GMP_STRIP_PATH_VAR,
+[tmp_path=
+for i in $[][$1]; do
+  case $i in
+    */[$2]) ;;
+    *) tmp_path="$tmp_path $i" ;;
+  esac
+done
+[$1]="$tmp_path"
+])
+
+
+dnl  GMP_INCLUDE_GMP_H
+dnl  -----------------
+dnl  Expand to the right way to #include gmp-h.in.  This must be used
+dnl  instead of gmp.h, since that file isn't generated until the end of the
+dnl  configure.
+dnl
+dnl  Dummy value for GMP_LIMB_BITS is enough
+dnl  for all current configure-time uses of gmp.h.
+
+define(GMP_INCLUDE_GMP_H,
+[[#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
+#define GMP_NAIL_BITS $GMP_NAIL_BITS
+#define GMP_LIMB_BITS 123
+$DEFN_LONG_LONG_LIMB
+#include "$srcdir/gmp-h.in"]
+])
+
+
+dnl  GMP_HEADER_GETVAL(NAME,FILE)
+dnl  ----------------------------
+dnl  Expand at autoconf time to the value of a "#define NAME" from the given
+dnl  FILE.  The regexps here aren't very rugged, but are enough for gmp.
+dnl  /dev/null as a parameter prevents a hang if $2 is accidentally omitted.
+
+define(GMP_HEADER_GETVAL,
+[patsubst(patsubst(
+esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]),
+[^.*$1[        ]+],[]),
+[[
+       ]*$],[])])
+
+
+dnl  GMP_VERSION
+dnl  -----------
+dnl  The gmp version number, extracted from the #defines in gmp-h.in at
+dnl  autoconf time.  Two digits like 3.0 if patchlevel <= 0, or three digits
+dnl  like 3.0.1 if patchlevel > 0.
+
+define(GMP_VERSION,
+[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp-h.in)[]dnl
+.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp-h.in)[]dnl
+.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp-h.in)])
+
+
+dnl  GMP_SUBST_CHECK_FUNCS(func,...)
+dnl  ------------------------------
+dnl  Setup an AC_SUBST of HAVE_FUNC_01 for each argument.
+
+AC_DEFUN([GMP_SUBST_CHECK_FUNCS],
+[m4_if([$1],,,
+[_GMP_SUBST_CHECK_FUNCS(ac_cv_func_[$1],HAVE_[]m4_translit([$1],[a-z],[A-Z])_01)
+GMP_SUBST_CHECK_FUNCS(m4_shift($@))])])
+
+dnl  Called: _GMP_SUBST_CHECK_FUNCS(cachevar,substvar)
+AC_DEFUN([_GMP_SUBST_CHECK_FUNCS],
+[case $[$1] in
+yes) AC_SUBST([$2],1) ;;
+no)  [$2]=0 ;;
+esac
+])
+
+
+dnl  GMP_SUBST_CHECK_HEADERS(foo.h,...)
+dnl  ----------------------------------
+dnl  Setup an AC_SUBST of HAVE_FOO_H_01 for each argument.
+
+AC_DEFUN([GMP_SUBST_CHECK_HEADERS],
+[m4_if([$1],,,
+[_GMP_SUBST_CHECK_HEADERS(ac_cv_header_[]m4_translit([$1],[./],[__]),
+HAVE_[]m4_translit([$1],[a-z./],[A-Z__])_01)
+GMP_SUBST_CHECK_HEADERS(m4_shift($@))])])
+
+dnl  Called: _GMP_SUBST_CHECK_HEADERS(cachevar,substvar)
+AC_DEFUN([_GMP_SUBST_CHECK_HEADERS],
+[case $[$1] in
+yes) AC_SUBST([$2],1) ;;
+no)  [$2]=0 ;;
+esac
+])
+
+
+dnl  GMP_COMPARE_GE(A1,B1, A2,B2, ...)
+dnl  ---------------------------------
+dnl  Compare two version numbers A1.A2.etc and B1.B2.etc.  Set
+dnl  $gmp_compare_ge to yes or no according to the result.  The A parts
+dnl  should be variables, the B parts fixed numbers.  As many parts as
+dnl  desired can be included.  An empty string in an A part is taken to be
+dnl  zero, the B parts should be non-empty and non-zero.
+dnl
+dnl  For example,
+dnl
+dnl      GMP_COMPARE($major,10, $minor,3, $subminor,1)
+dnl
+dnl  would test whether $major.$minor.$subminor is greater than or equal to
+dnl  10.3.1.
+
+AC_DEFUN([GMP_COMPARE_GE],
+[gmp_compare_ge=no
+GMP_COMPARE_GE_INTERNAL($@)
+])
+
+AC_DEFUN([GMP_COMPARE_GE_INTERNAL],
+[ifelse(len([$3]),0,
+[if test -n "$1" && test "$1" -ge $2; then
+  gmp_compare_ge=yes
+fi],
+[if test -n "$1"; then
+  if test "$1" -gt $2; then
+    gmp_compare_ge=yes
+  else
+    if test "$1" -eq $2; then
+      GMP_COMPARE_GE_INTERNAL(m4_shift(m4_shift($@)))
+    fi
+  fi
+fi])
+])
+
+
+dnl  GMP_PROG_AR
+dnl  -----------
+dnl  GMP additions to $AR.
+dnl
+dnl  A cross-"ar" may be necessary when cross-compiling since the build
+dnl  system "ar" might try to interpret the object files to build a symbol
+dnl  table index, hence the use of AC_CHECK_TOOL.
+dnl
+dnl  A user-selected $AR is always left unchanged.  AC_CHECK_TOOL is still
+dnl  run to get the "checking" message printed though.
+dnl
+dnl  If extra flags are added to AR, then ac_cv_prog_AR and
+dnl  ac_cv_prog_ac_ct_AR are set too, since libtool (cvs 2003-03-31 at
+dnl  least) will do an AC_CHECK_TOOL and that will AR from one of those two
+dnl  cached variables.  (ac_cv_prog_AR is used if there's an ac_tool_prefix,
+dnl  or ac_cv_prog_ac_ct_AR is used otherwise.)  FIXME: This is highly
+dnl  dependent on autoconf internals, perhaps it'd work to put our extra
+dnl  flags into AR_FLAGS instead.
+dnl
+dnl  $AR_FLAGS is set to "cq" rather than leaving it to libtool "cru".  The
+dnl  latter fails when libtool goes into piecewise mode and is unlucky
+dnl  enough to have two same-named objects in separate pieces, as happens
+dnl  for instance to random.o (and others) on vax-dec-ultrix4.5.  Naturally
+dnl  a user-selected $AR_FLAGS is left unchanged.
+dnl
+dnl  For reference, $ARFLAGS is used by automake (1.8) for its ".a" archive
+dnl  file rules.  This doesn't get used by the piecewise linking, so we
+dnl  leave it at the default "cru".
+dnl
+dnl  FIXME: Libtool 1.5.2 has its own arrangements for "cq", but that version
+dnl  is broken in other ways.  When we can upgrade, remove the forcible
+dnl  AR_FLAGS=cq.
+
+AC_DEFUN([GMP_PROG_AR],
+[dnl  Want to establish $AR before libtool initialization.
+AC_BEFORE([$0],[AC_PROG_LIBTOOL])
+gmp_user_AR=$AR
+AC_CHECK_TOOL(AR, ar, ar)
+if test -z "$gmp_user_AR"; then
+                        eval arflags=\"\$ar${abi1}_flags\"
+  test -n "$arflags" || eval arflags=\"\$ar${abi2}_flags\"
+  if test -n "$arflags"; then
+    AC_MSG_CHECKING([for extra ar flags])
+    AR="$AR $arflags"
+    ac_cv_prog_AR="$AR $arflags"
+    ac_cv_prog_ac_ct_AR="$AR $arflags"
+    AC_MSG_RESULT([$arflags])
+  fi
+fi
+if test -z "$AR_FLAGS"; then
+  AR_FLAGS=cq
+fi
+])
+
+
+dnl  GMP_PROG_M4
+dnl  -----------
+dnl  Find a working m4, either in $PATH or likely locations, and setup $M4
+dnl  and an AC_SUBST accordingly.  If $M4 is already set then it's a user
+dnl  choice and is accepted with no checks.  GMP_PROG_M4 is like
+dnl  AC_PATH_PROG or AC_CHECK_PROG, but tests each m4 found to see if it's
+dnl  good enough.
+dnl
+dnl  See mpn/asm-defs.m4 for details on the known bad m4s.
+
+AC_DEFUN([GMP_PROG_M4],
+[AC_ARG_VAR(M4,[m4 macro processor])
+AC_CACHE_CHECK([for suitable m4],
+                gmp_cv_prog_m4,
+[if test -n "$M4"; then
+  gmp_cv_prog_m4="$M4"
+else
+  cat >conftest.m4 <<\EOF
+dnl  Must protect this against being expanded during autoconf m4!
+dnl  Dont put "dnl"s in this as autoconf will flag an error for unexpanded
+dnl  macros.
+[define(dollarhash,``$][#'')ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $][# not supported (SunOS /usr/bin/m4)
+'')ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')ifelse(t1`'t2,YY,`good
+')]
+EOF
+dnl ' <- balance the quotes for emacs sh-mode
+  echo "trying m4" >&AC_FD_CC
+  gmp_tmp_val=`(m4 conftest.m4) 2>&AC_FD_CC`
+  echo "$gmp_tmp_val" >&AC_FD_CC
+  if test "$gmp_tmp_val" = good; then
+    gmp_cv_prog_m4="m4"
+  else
+    IFS="${IFS=        }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word.  This closes a longstanding sh security hole.
+    ac_dummy="$PATH:/usr/5bin"
+    for ac_dir in $ac_dummy; do
+      test -z "$ac_dir" && ac_dir=.
+      echo "trying $ac_dir/m4" >&AC_FD_CC
+      gmp_tmp_val=`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`
+      echo "$gmp_tmp_val" >&AC_FD_CC
+      if test "$gmp_tmp_val" = good; then
+        gmp_cv_prog_m4="$ac_dir/m4"
+        break
+      fi
+    done
+    IFS="$ac_save_ifs"
+    if test -z "$gmp_cv_prog_m4"; then
+      AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).])
+    fi
+  fi
+  rm -f conftest.m4
+fi])
+M4="$gmp_cv_prog_m4"
+AC_SUBST(M4)
+])
+
+
+dnl  GMP_M4_M4WRAP_SPURIOUS
+dnl  ----------------------
+dnl  Check for spurious output from m4wrap(), as described in mpn/asm-defs.m4.
+dnl
+dnl  The following systems have been seen with the problem.
+dnl
+dnl  - Unicos alpha, but its assembler doesn't seem to mind.
+dnl  - MacOS X Darwin, its assembler fails.
+dnl  - NetBSD 1.4.1 m68k, and gas 1.92.3 there gives a warning and ignores
+dnl    the bad last line since it doesn't have a newline.
+dnl  - NetBSD 1.4.2 alpha, but its assembler doesn't seem to mind.
+dnl  - HP-UX ia64.
+dnl
+dnl  Enhancement: Maybe this could be in GMP_PROG_M4, and attempt to prefer
+dnl  an m4 with a working m4wrap, if it can be found.
+
+AC_DEFUN([GMP_M4_M4WRAP_SPURIOUS],
+[AC_REQUIRE([GMP_PROG_M4])
+AC_CACHE_CHECK([if m4wrap produces spurious output],
+               gmp_cv_m4_m4wrap_spurious,
+[# hide the d-n-l from autoconf's error checking
+tmp_d_n_l=d""nl
+cat >conftest.m4 <<EOF
+[changequote({,})define(x,)m4wrap({x})$tmp_d_n_l]
+EOF
+echo test input is >&AC_FD_CC
+cat conftest.m4 >&AC_FD_CC
+tmp_chars=`$M4 conftest.m4 | wc -c`
+echo produces $tmp_chars chars output >&AC_FD_CC
+rm -f conftest.m4
+if test $tmp_chars = 0; then
+  gmp_cv_m4_m4wrap_spurious=no
+else
+  gmp_cv_m4_m4wrap_spurious=yes
+fi
+])
+GMP_DEFINE_RAW(["define(<M4WRAP_SPURIOUS>,<$gmp_cv_m4_m4wrap_spurious>)"])
+])
+
+
+dnl  GMP_PROG_NM
+dnl  -----------
+dnl  GMP additions to libtool AC_PROG_NM.
+dnl
+dnl  Note that if AC_PROG_NM can't find a working nm it still leaves
+dnl  $NM set to "nm", so $NM can't be assumed to actually work.
+dnl
+dnl  A user-selected $NM is always left unchanged.  AC_PROG_NM is still run
+dnl  to get the "checking" message printed though.
+dnl
+dnl  Perhaps it'd be worthwhile checking that nm works, by running it on an
+dnl  actual object file.  For instance on sparcv9 solaris old versions of
+dnl  GNU nm don't recognise 64-bit objects.  Checking would give a better
+dnl  error message than just a failure in later tests like GMP_ASM_W32 etc.
+dnl
+dnl  On the other hand it's not really normal autoconf practice to take too
+dnl  much trouble over detecting a broken set of tools.  And libtool doesn't
+dnl  do anything at all for say ranlib or strip.  So for now we're inclined
+dnl  to just demand that the user provides a coherent environment.
+
+AC_DEFUN([GMP_PROG_NM],
+[dnl  Make sure we're the first to call AC_PROG_NM, so our extra flags are
+dnl   used by everyone.
+AC_BEFORE([$0],[AC_PROG_NM])
+gmp_user_NM=$NM
+AC_PROG_NM
+
+# FIXME: When cross compiling (ie. $ac_tool_prefix not empty), libtool
+# defaults to plain "nm" if a "${ac_tool_prefix}nm" is not found.  In this
+# case run it again to try the native "nm", firstly so that likely locations
+# are searched, secondly so that -B or -p are added if necessary for BSD
+# format.  This is necessary for instance on OSF with "./configure
+# --build=alphaev5-dec-osf --host=alphaev6-dec-osf".
+#
+if test -z "$gmp_user_NM" && test -n "$ac_tool_prefix" && test "$NM" = nm; then
+  $as_unset lt_cv_path_NM
+  gmp_save_ac_tool_prefix=$ac_tool_prefix
+  ac_tool_prefix=
+  NM=
+  AC_PROG_NM
+  ac_tool_prefix=$gmp_save_ac_tool_prefix
+fi
+
+if test -z "$gmp_user_NM"; then
+                        eval nmflags=\"\$nm${abi1}_flags\"
+  test -n "$nmflags" || eval nmflags=\"\$nm${abi2}_flags\"
+  if test -n "$nmflags"; then
+    AC_MSG_CHECKING([for extra nm flags])
+    NM="$NM $nmflags"
+    AC_MSG_RESULT([$nmflags])
+  fi
+fi
+])
+
+
+dnl  GMP_PROG_CC_WORKS(cc+cflags,[ACTION-IF-WORKS][,ACTION-IF-NOT-WORKS])
+dnl  --------------------------------------------------------------------
+dnl  Check if cc+cflags can compile and link.
+dnl
+dnl  This test is designed to be run repeatedly with different cc+cflags
+dnl  selections, so the result is not cached.
+dnl
+dnl  For a native build, meaning $cross_compiling == no, we require that the
+dnl  generated program will run.  This is the same as AC_PROG_CC does in
+dnl  _AC_COMPILER_EXEEXT_WORKS, and checking here will ensure we don't pass
+dnl  a CC/CFLAGS combination that it rejects.
+dnl
+dnl  sparc-*-solaris2.7 can compile ABI=64 but won't run it if the kernel
+dnl  was booted in 32-bit mode.  The effect of requiring the compiler output
+dnl  will run is that a plain native "./configure" falls back on ABI=32, but
+dnl  ABI=64 is still available as a cross-compile.
+dnl
+dnl  The various specific problems we try to detect are done in separate
+dnl  compiles.  Although this is probably a bit slower than one test
+dnl  program, it makes it easy to indicate the problem in AC_MSG_RESULT,
+dnl  hence giving the user a clue about why we rejected the compiler.
+
+AC_DEFUN([GMP_PROG_CC_WORKS],
+[AC_MSG_CHECKING([compiler $1])
+gmp_prog_cc_works=yes
+
+# first see a simple "main()" works, then go on to other checks
+GMP_PROG_CC_WORKS_PART([$1], [])
+
+GMP_PROG_CC_WORKS_PART([$1], [function pointer return],
+[/* The following provokes an internal error from gcc 2.95.2 -mpowerpc64
+   (without -maix64), hence detecting an unusable compiler */
+void *g() { return (void *) 0; }
+void *f() { return g(); }
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [cmov instruction],
+[/* The following provokes an invalid instruction syntax from i386 gcc
+   -march=pentiumpro on Solaris 2.8.  The native sun assembler
+   requires a non-standard syntax for cmov which gcc (as of 2.95.2 at
+   least) doesn't know.  */
+int n;
+int cmov () { return (n >= 0 ? n : 0); }
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [double -> ulong conversion],
+[/* The following provokes a linker invocation problem with gcc 3.0.3
+   on AIX 4.3 under "-maix64 -mpowerpc64 -mcpu=630".  The -mcpu=630
+   option causes gcc to incorrectly select the 32-bit libgcc.a, not
+   the 64-bit one, and consequently it misses out on the __fixunsdfdi
+   helper (double -> uint64 conversion).  */
+double d;
+unsigned long gcc303 () { return (unsigned long) d; }
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [double negation],
+[/* The following provokes an error from hppa gcc 2.95 under -mpa-risc-2-0 if
+   the assembler doesn't know hppa 2.0 instructions.  fneg is a 2.0
+   instruction, and a negation like this comes out using it.  */
+double fneg_data;
+unsigned long fneg () { return -fneg_data; }
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [double -> float conversion],
+[/* The following makes gcc 3.3 -march=pentium4 generate an SSE2 xmm insn
+   (cvtsd2ss) which will provoke an error if the assembler doesn't recognise
+   those instructions.  Not sure how much of the gmp code will come out
+   wanting sse2, but it's easiest to reject an option we know is bad.  */
+double ftod_data;
+float ftod () { return (float) ftod_data; }
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [gnupro alpha ev6 char spilling],
+[/* The following provokes an internal compiler error from gcc version
+   "2.9-gnupro-99r1" under "-O2 -mcpu=ev6", apparently relating to char
+   values being spilled into floating point registers.  The problem doesn't
+   show up all the time, but has occurred enough in GMP for us to reject
+   this compiler+flags.  */
+#include <string.h>  /* for memcpy */
+struct try_t
+{
+ char dst[2];
+ char size;
+ long d0, d1, d2, d3, d4, d5, d6;
+ char overlap;
+};
+struct try_t param[6];
+int
+param_init ()
+{
+ struct try_t *p;
+ memcpy (p, &param[ 2 ], sizeof (*p));
+ memcpy (p, &param[ 2 ], sizeof (*p));
+ p->size = 2;
+ memcpy (p, &param[ 1 ], sizeof (*p));
+ p->dst[0] = 1;
+ p->overlap = 2;
+ memcpy (p, &param[ 3 ], sizeof (*p));
+ p->dst[0] = 1;
+ p->overlap = 8;
+ memcpy (p, &param[ 4 ], sizeof (*p));
+ memcpy (p, &param[ 4 ], sizeof (*p));
+ p->overlap = 8;
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ return 0;
+}
+])
+
+# __builtin_alloca is not available everywhere, check it exists before
+# seeing that it works
+GMP_PROG_CC_WORKS_PART_TEST([$1],[__builtin_alloca availability],
+[int k; int foo () { __builtin_alloca (k); }],
+  [GMP_PROG_CC_WORKS_PART([$1], [alloca array],
+[/* The following provokes an internal compiler error from Itanium HP-UX cc
+    under +O2 or higher.  We use this sort of code in mpn/generic/mul_fft.c. */
+int k;
+int foo ()
+{
+  int i, **a;
+  a = __builtin_alloca (k);
+  for (i = 0; i <= k; i++)
+    a[i] = __builtin_alloca (1 << i);
+}
+])])
+
+GMP_PROG_CC_WORKS_PART([$1], [abs int -> double conversion],
+[/* The following provokes an internal error from the assembler on
+   power2-ibm-aix4.3.1.0.  gcc -mrios2 compiles to nabs+fcirz, and this
+   results in "Internal error related to the source program domain".
+
+   For reference it seems to be the combination of nabs+fcirz which is bad,
+   not either alone.  This sort of thing occurs in mpz/get_str.c with the
+   way double chars_per_bit_exactly is applied in MPN_SIZEINBASE.  Perhaps
+   if that code changes to a scaled-integer style then we won't need this
+   test.  */
+
+double fp[1];
+int x;
+int f ()
+{
+  int a;
+  a = (x >= 0 ? x : -x);
+  return a * fp[0];
+}
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [long long reliability test 1],
+[/* The following provokes a segfault in the compiler on powerpc-apple-darwin.
+   Extracted from tests/mpn/t-iord_u.c.  Causes Apple's gcc 3.3 build 1640 and
+   1666 to segfault with e.g., -O2 -mpowerpc64.  */
+
+#if defined (__GNUC__) && ! defined (__cplusplus)
+typedef unsigned long long t1;typedef t1*t2;
+static __inline__ t1 e(t2 rp,t2 up,int n,t1 v0)
+{t1 c,x,r;int i;if(v0){c=1;for(i=1;i<n;i++){x=up[i];r=x+1;rp[i]=r;}}return c;}
+f(){static const struct{t1 n;t1 src[9];t1 want[9];}d[]={{1,{0},{1}},};t1 got[9];int i;
+for(i=0;i<1;i++){if(e(got,got,9,d[i].n)==0)h();g(i,d[i].src,d[i].n,got,d[i].want,9);if(d[i].n)h();}}
+h(){}g(){}
+#else
+int dummy;
+#endif
+])
+
+GMP_PROG_CC_WORKS_PART([$1], [long long reliability test 2],
+[/* The following provokes an internal compiler error on powerpc-apple-darwin.
+   Extracted from mpz/cfdiv_q_2exp.c.  Causes Apple's gcc 3.3 build 1640 and
+   1666 to get an ICE with -O1 -mpowerpc64.  */
+
+#if defined (__GNUC__) && ! defined (__cplusplus)
+f(int u){int i;long long x;x=u?~0:0;if(x)for(i=0;i<9;i++);x&=g();if(x)g();}
+g(){}
+#else
+int dummy;
+#endif
+])
+
+GMP_PROG_CC_WORKS_PART_MAIN([$1], [mpn_lshift_com optimization],
+[/* The following is mis-compiled by HP ia-64 cc version
+        cc: HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]
+   under "cc +O3", both in +DD32 and +DD64 modes.  The mpn_lshift_com gets
+   inlined and its return value somehow botched to be 0 instead of 1.  This
+   arises in the real mpn_lshift_com in mul_fft.c.  A lower optimization
+   level, like +O2 seems ok.  This code needs to be run to show the problem,
+   but that's fine, the offending cc is a native-only compiler so we don't
+   have to worry about cross compiling.  */
+
+#if ! defined (__cplusplus)
+unsigned long
+lshift_com (rp, up, n, cnt)
+  unsigned long *rp;
+  unsigned long *up;
+  long n;
+  unsigned cnt;
+{
+  unsigned long retval, high_limb, low_limb;
+  unsigned tnc;
+  long i;
+  tnc = 8 * sizeof (unsigned long) - cnt;
+  low_limb = *up++;
+  retval = low_limb >> tnc;
+  high_limb = low_limb << cnt;
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *up++;
+      *rp++ = ~(high_limb | (low_limb >> tnc));
+      high_limb = low_limb << cnt;
+    }
+  return retval;
+}
+int
+main ()
+{
+  unsigned long cy, rp[2], up[2];
+  up[0] = ~ 0L;
+  up[1] = 0;
+  cy = lshift_com (rp, up, 2L, 1);
+  if (cy != 1L)
+    return 1;
+  return 0;
+}
+#else
+int
+main ()
+{
+  return 0;
+}
+#endif
+])
+
+GMP_PROG_CC_WORKS_PART_MAIN([$1], [mpn_lshift_com optimization 2],
+[/* The following is mis-compiled by Intel ia-64 icc version 1.8 under
+    "icc -O3",  After several calls, the function writes parial garbage to
+    the result vector.  Perhaps relates to the chk.a.nc insn.  This code needs
+    to be run to show the problem, but that's fine, the offending cc is a
+    native-only compiler so we don't have to worry about cross compiling.  */
+
+#if ! defined (__cplusplus)
+#include <stdlib.h>
+void
+lshift_com (rp, up, n, cnt)
+  unsigned long *rp;
+  unsigned long *up;
+  long n;
+  unsigned cnt;
+{
+  unsigned long high_limb, low_limb;
+  unsigned tnc;
+  long i;
+  up += n;
+  rp += n;
+  tnc = 8 * sizeof (unsigned long) - cnt;
+  low_limb = *--up;
+  high_limb = low_limb << cnt;
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = ~(high_limb | (low_limb >> tnc));
+      high_limb = low_limb << cnt;
+    }
+  *--rp = ~high_limb;
+}
+int
+main ()
+{
+  unsigned long *r, *r2;
+  unsigned long a[88 + 1];
+  long i;
+  for (i = 0; i < 88 + 1; i++)
+    a[i] = ~0L;
+  r = malloc (10000 * sizeof (unsigned long));
+  r2 = r;
+  for (i = 0; i < 528; i += 22)
+    {
+      lshift_com (r2, a,
+                 i / (8 * sizeof (unsigned long)) + 1,
+                 i % (8 * sizeof (unsigned long)));
+      r2 += 88 + 1;
+    }
+  if (r[2048] != 0 || r[2049] != 0 || r[2050] != 0 || r[2051] != 0 ||
+      r[2052] != 0 || r[2053] != 0 || r[2054] != 0)
+    abort ();
+  return 0;
+}
+#else
+int
+main ()
+{
+  return 0;
+}
+#endif
+])
+
+
+# A certain _GLOBAL_OFFSET_TABLE_ problem in past versions of gas, tickled
+# by recent versions of gcc.
+#
+if test "$gmp_prog_cc_works" = yes; then
+  case $host in
+    X86_PATTERN)
+      # this problem only arises in PIC code, so don't need to test when
+      # --disable-shared.  We don't necessarily have $enable_shared set to
+      # yes at this point, it will still be unset for the default (which is
+      # yes); hence the use of "!= no".
+      if test "$enable_shared" != no; then
+        GMP_PROG_CC_X86_GOT_EAX_EMITTED([$1],
+          [GMP_ASM_X86_GOT_EAX_OK([$1],,
+            [gmp_prog_cc_works="no, bad gas GOT with eax"])])
+      fi
+      ;;
+  esac
+fi
+
+AC_MSG_RESULT($gmp_prog_cc_works)
+case $gmp_prog_cc_works in
+  yes)
+    [$2]
+    ;;
+  *)
+    [$3]
+    ;;
+esac
+])
+
+dnl  Called: GMP_PROG_CC_WORKS_PART(CC+CFLAGS,FAIL-MESSAGE [,CODE])
+dnl  A dummy main() is appended to the CODE given.
+dnl
+AC_DEFUN([GMP_PROG_CC_WORKS_PART],
+[GMP_PROG_CC_WORKS_PART_MAIN([$1],[$2],
+[$3]
+[int main () { return 0; }])
+])
+
+dnl  Called: GMP_PROG_CC_WORKS_PART_MAIN(CC+CFLAGS,FAIL-MESSAGE,CODE)
+dnl  CODE must include a main().
+dnl
+AC_DEFUN([GMP_PROG_CC_WORKS_PART_MAIN],
+[GMP_PROG_CC_WORKS_PART_TEST([$1],[$2],[$3],
+  [],
+  gmp_prog_cc_works="no[]m4_if([$2],,,[[, ]])[$2]",
+  gmp_prog_cc_works="no[]m4_if([$2],,,[[, ]])[$2][[, program does not run]]")
+])
+
+dnl  Called: GMP_PROG_CC_WORKS_PART_TEST(CC+CFLAGS,TITLE,[CODE],
+dnl            [ACTION-GOOD],[ACTION-BAD][ACTION-NORUN])
+dnl
+AC_DEFUN([GMP_PROG_CC_WORKS_PART_TEST],
+[if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+[$3]
+EOF
+  echo "Test compile: [$2]" >&AC_FD_CC
+  gmp_compile="$1 conftest.c >&AC_FD_CC"
+  if AC_TRY_EVAL(gmp_compile); then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if AC_TRY_COMMAND([./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest]); then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&AC_FD_CC
+    cat conftest.c >&AC_FD_CC
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+      $4
+      ;;
+    no)
+      $5
+      ;;
+    norun)
+      $6
+      ;;
+  esac
+fi
+])
+
+
+dnl  GMP_PROG_CC_WORKS_LONGLONG(cc+cflags,[ACTION-YES][,ACTION-NO])
+dnl  --------------------------------------------------------------
+dnl  Check that cc+cflags accepts "long long".
+dnl
+dnl  This test is designed to be run repeatedly with different cc+cflags
+dnl  selections, so the result is not cached.
+
+AC_DEFUN([GMP_PROG_CC_WORKS_LONGLONG],
+[AC_MSG_CHECKING([compiler $1 has long long])
+cat >conftest.c <<EOF
+long long  foo;
+long long  bar () { return foo; }
+int main () { return 0; }
+EOF
+gmp_prog_cc_works=no
+gmp_compile="$1 -c conftest.c >&AC_FD_CC"
+if AC_TRY_EVAL(gmp_compile); then
+  gmp_prog_cc_works=yes
+else
+  echo "failed program was:" >&AC_FD_CC
+  cat conftest.c >&AC_FD_CC
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+AC_MSG_RESULT($gmp_prog_cc_works)
+if test $gmp_prog_cc_works = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_C_TEST_SIZEOF(cc/cflags,test,[ACTION-GOOD][,ACTION-BAD])
+dnl  ------------------------------------------------------------
+dnl  The given cc/cflags compiler is run to check the size of a type
+dnl  specified by the "test" argument.  "test" can either be a string, or a
+dnl  variable like $foo.  The value should be for instance "sizeof-long-4",
+dnl  to test that sizeof(long)==4.
+dnl
+dnl  This test is designed to be run for different compiler and/or flags
+dnl  combinations, so the result is not cached.
+dnl
+dnl  The idea for making an array that has a negative size if the desired
+dnl  condition test is false comes from autoconf AC_CHECK_SIZEOF.  The cast
+dnl  to "long" in the array dimension also follows autoconf, apparently it's
+dnl  a workaround for a HP compiler bug.
+
+AC_DEFUN([GMP_C_TEST_SIZEOF],
+[echo "configure: testlist $2" >&AC_FD_CC
+[gmp_sizeof_type=`echo "$2" | sed 's/sizeof-\([a-z]*\).*/\1/'`]
+[gmp_sizeof_want=`echo "$2" | sed 's/sizeof-[a-z]*-\([0-9]*\).*/\1/'`]
+AC_MSG_CHECKING([compiler $1 has sizeof($gmp_sizeof_type)==$gmp_sizeof_want])
+cat >conftest.c <<EOF
+[int
+main ()
+{
+  static int test_array [1 - 2 * (long) (sizeof ($gmp_sizeof_type) != $gmp_sizeof_want)];
+  test_array[0] = 0;
+  return 0;
+}]
+EOF
+gmp_c_testlist_sizeof=no
+gmp_compile="$1 -c conftest.c >&AC_FD_CC"
+if AC_TRY_EVAL(gmp_compile); then
+  gmp_c_testlist_sizeof=yes
+fi
+rm -f conftest*
+AC_MSG_RESULT($gmp_c_testlist_sizeof)
+if test $gmp_c_testlist_sizeof = yes; then
+  ifelse([$3],,:,[$3])
+else
+  ifelse([$4],,:,[$4])
+fi
+])
+
+
+dnl  GMP_PROG_CC_IS_GNU(CC,[ACTIONS-IF-YES][,ACTIONS-IF-NO])
+dnl  -------------------------------------------------------
+dnl  Determine whether the given compiler is GNU C.
+dnl
+dnl  This test is the same as autoconf _AC_LANG_COMPILER_GNU, but doesn't
+dnl  cache the result.  The same "ifndef" style test is used, to avoid
+dnl  problems with syntax checking cpp's used on NeXT and Apple systems.
+
+AC_DEFUN([GMP_PROG_CC_IS_GNU],
+[cat >conftest.c <<EOF
+#if ! defined (__GNUC__) || defined (__INTEL_COMPILER)
+  choke me
+#endif
+EOF
+gmp_compile="$1 -c conftest.c >&AC_FD_CC"
+if AC_TRY_EVAL(gmp_compile); then
+  rm -f conftest*
+  AC_MSG_CHECKING([whether $1 is gcc])
+  AC_MSG_RESULT(yes)
+  ifelse([$2],,:,[$2])
+else
+  rm -f conftest*
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_PROG_CC_IS_XLC(CC,[ACTIONS-IF-YES][,ACTIONS-IF-NO])
+dnl  -------------------------------------------------------
+dnl  Determine whether the given compiler is IBM xlc (on AIX).
+dnl
+dnl  There doesn't seem to be a preprocessor symbol to test for this, or if
+dnl  there is one then it's well hidden in xlc 3.1 on AIX 4.3, so just grep
+dnl  the man page printed when xlc is invoked with no arguments.
+
+AC_DEFUN([GMP_PROG_CC_IS_XLC],
+[gmp_command="$1 2>&1 | grep xlc >/dev/null"
+if AC_TRY_EVAL(gmp_command); then
+  AC_MSG_CHECKING([whether $1 is xlc])
+  AC_MSG_RESULT(yes)
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_PROG_CC_X86_GOT_EAX_EMITTED(CC+CFLAGS, [ACTION-YES] [, ACTION-NO])
+dnl  ----------------------------------------------------------------------
+dnl  Determine whether CC+CFLAGS emits instructions using %eax with
+dnl  _GLOBAL_OFFSET_TABLE_.  This test is for use on x86 systems.
+dnl
+dnl  Recent versions of gcc will use %eax for the GOT in leaf functions, for
+dnl  instance gcc 3.3.3 with -O3.  This avoids having to save and restore
+dnl  %ebx which otherwise usually holds the GOT, and is what gcc used in the
+dnl  past.
+dnl
+dnl  %ecx and %edx are also candidates for this sort of optimization, and
+dnl  are used under lesser optimization levels, like -O2 in 3.3.3.  FIXME:
+dnl  It's not quite clear what the conditions for using %eax are, we might
+dnl  need more test code to provoke it.
+dnl
+dnl  The motivation for this test is that past versions of gas have bugs
+dnl  affecting this usage, see GMP_ASM_X86_GOT_EAX_OK.
+dnl
+dnl  This test is not specific to gcc, other compilers might emit %eax GOT
+dnl  insns like this, though we've not investigated that.
+dnl
+dnl  This is for use by compiler probing in GMP_PROG_CC_WORKS, so we doesn't
+dnl  cache the result.
+dnl
+dnl  -fPIC is hard coded here, because this test is for use before libtool
+dnl  has established the pic options.  It's right for gcc, but perhaps not
+dnl  other compilers.
+
+AC_DEFUN([GMP_PROG_CC_X86_GOT_EAX_EMITTED],
+[echo "Testing gcc GOT with eax emitted" >&AC_FD_CC
+cat >conftest.c <<\EOF
+[int foo;
+int bar () { return foo; }
+]EOF
+tmp_got_emitted=no
+gmp_compile="$1 -fPIC -S conftest.c >&AC_FD_CC 2>&1"
+if AC_TRY_EVAL(gmp_compile); then
+  if grep "addl.*_GLOBAL_OFFSET_TABLE_.*eax" conftest.s >/dev/null; then
+    tmp_got_emitted=yes
+  fi
+fi
+rm -f conftest.*
+echo "Result: $tmp_got_emitted" >&AC_FD_CC
+if test "$tmp_got_emitted" = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_HPC_HPPA_2_0(cc,[ACTION-IF-GOOD][,ACTION-IF-BAD])
+dnl  ---------------------------------------------------------
+dnl  Find out whether a HP compiler is good enough to generate hppa 2.0.
+dnl
+dnl  This test might be repeated for different compilers, so the result is
+dnl  not cached.
+
+AC_DEFUN([GMP_HPC_HPPA_2_0],
+[AC_MSG_CHECKING([whether HP compiler $1 is good for 64-bits])
+# Bad compiler output:
+#   ccom: HP92453-01 G.10.32.05 HP C Compiler
+# Good compiler output:
+#   ccom: HP92453-01 A.10.32.30 HP C Compiler
+# Let A.10.32.30 or higher be ok.
+echo >conftest.c
+gmp_tmp_vs=`$1 $2 -V -c -o conftest.$OBJEXT conftest.c 2>&1 | grep "^ccom:"`
+echo "Version string: $gmp_tmp_vs" >&AC_FD_CC
+rm conftest*
+gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\([[0-9]]*\).*/\1/'`
+gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+echo "Version number: $gmp_tmp_v1.$gmp_tmp_v2.$gmp_tmp_v3" >&AC_FD_CC
+if test -z "$gmp_tmp_v1"; then
+  gmp_hpc_64bit=not-applicable
+else
+  GMP_COMPARE_GE($gmp_tmp_v1, 10, $gmp_tmp_v2, 32, $gmp_tmp_v3, 30)
+  gmp_hpc_64bit=$gmp_compare_ge
+fi
+AC_MSG_RESULT($gmp_hpc_64bit)
+if test $gmp_hpc_64bit = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_GCC_ARM_UMODSI(CC,[ACTIONS-IF-GOOD][,ACTIONS-IF-BAD])
+dnl  ---------------------------------------------------------
+dnl  gcc 2.95.3 and earlier on arm has a bug in the libgcc __umodsi routine
+dnl  making "%" give wrong results for some operands, eg. "0x90000000 % 3".
+dnl  We're hoping it'll be fixed in 2.95.4, and we know it'll be fixed in
+dnl  gcc 3.
+dnl
+dnl  There's only a couple of places gmp cares about this, one is the
+dnl  size==1 case in mpn/generic/mode1o.c, and this shows up in
+dnl  tests/mpz/t-jac.c as a wrong result from mpz_kronecker_ui.
+
+AC_DEFUN([GMP_GCC_ARM_UMODSI],
+[AC_MSG_CHECKING([whether ARM gcc unsigned division works])
+tmp_version=`$1 --version`
+echo "$tmp_version" >&AC_FD_CC
+case $tmp_version in
+  [2.95 | 2.95.[123]])
+    ifelse([$3],,:,[$3])
+    gmp_gcc_arm_umodsi_result=["no, gcc 2.95.[0123]"] ;;
+  *)
+    ifelse([$2],,:,[$2])
+    gmp_gcc_arm_umodsi_result=yes ;;
+esac
+AC_MSG_RESULT([$gmp_gcc_arm_umodsi_result])
+])
+
+
+dnl  GMP_GCC_MIPS_O32(gcc,[actions-yes][,[actions-no]])
+dnl  -------------------------------------------------
+dnl  Test whether gcc supports o32.
+dnl
+dnl  gcc 2.7.2.2 only does o32, and doesn't accept -mabi=32.
+dnl
+dnl  gcc 2.95 accepts -mabi=32 but it only works on irix5, on irix6 it gives
+dnl  "cc1: The -mabi=32 support does not work yet".
+
+AC_DEFUN([GMP_GCC_MIPS_O32],
+[AC_MSG_CHECKING([whether gcc supports o32])
+echo 'int x;' >conftest.c
+echo "$1 -mabi=32 -c conftest.c" >&AC_FD_CC
+if $1 -mabi=32 -c conftest.c >conftest.out 2>&1; then
+  result=yes
+else
+  cat conftest.out >&AC_FD_CC
+  if grep "cc1: Invalid option \`abi=32'" conftest.out >/dev/null; then
+    result=yes
+  else
+    result=no
+  fi
+fi
+rm -f conftest.*
+AC_MSG_RESULT($result)
+if test $result = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_GCC_NO_CPP_PRECOMP(CCBASE,CC,CFLAGS,[ACTIONS-YES][,ACTIONS-NO])
+dnl  -------------------------------------------------------------------
+dnl  Check whether -no-cpp-precomp should be used on this compiler, and
+dnl  execute the corresponding ACTIONS-YES or ACTIONS-NO.
+dnl
+dnl  -no-cpp-precomp is only meant for Apple's hacked version of gcc found
+dnl  on powerpc*-*-darwin*, but we can give it a try on any gcc.  Normal gcc
+dnl  (as of 3.0 at least) only gives a warning, not an actual error, and we
+dnl  watch for that and decide against the option in that case, to avoid
+dnl  confusing the user.
+
+AC_DEFUN([GMP_GCC_NO_CPP_PRECOMP],
+[if test "$ccbase" = gcc; then
+  AC_MSG_CHECKING([compiler $2 $3 -no-cpp-precomp])
+  result=no
+  cat >conftest.c <<EOF
+int main () { return 0; }
+EOF
+  gmp_compile="$2 $3 -no-cpp-precomp conftest.c >conftest.out 2>&1"
+  if AC_TRY_EVAL(gmp_compile); then
+    if grep "unrecognized option.*-no-cpp-precomp" conftest.out >/dev/null; then : ;
+    else
+      result=yes
+    fi
+  fi
+  cat conftest.out >&AC_FD_CC
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  AC_MSG_RESULT($result)
+  if test "$result" = yes; then
+      ifelse([$4],,:,[$4])
+  else
+      ifelse([$5],,:,[$5])
+  fi
+fi
+])
+
+
+dnl  GMP_GCC_PENTIUM4_SSE2(CC+CFLAGS,[ACTION-IF-YES][,ACTION-IF-NO])
+dnl  ---------------------------------------------------------------
+dnl  Determine whether gcc CC+CFLAGS is a good enough version for
+dnl  -march=pentium4 with sse2.
+dnl
+dnl  Gcc 3.2.1 was seen generating incorrect code for raw double -> int
+dnl  conversions through a union.  We believe the problem is in all 3.1 and
+dnl  3.2 versions, but that it's fixed in 3.3.
+
+AC_DEFUN([GMP_GCC_PENTIUM4_SSE2],
+[AC_MSG_CHECKING([whether gcc is good for sse2])
+case `$1 -dumpversion` in
+  [3.[012] | 3.[012].*]) result=no ;;
+  *)                     result=yes ;;
+esac
+AC_MSG_RESULT($result)
+if test "$result" = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_GCC_WA_MCPU(CC+CFLAGS, NEWFLAG [,ACTION-YES [,ACTION-NO]])
+dnl  --------------------------------------------------------------
+dnl  Check whether gcc (or gas rather) accepts a flag like "-Wa,-mev67".
+dnl
+dnl  Gas doesn't give an error for an unknown cpu, it only prints a warning
+dnl  like "Warning: Unknown CPU identifier `ev78'".
+dnl
+dnl  This is intended for use on alpha, since only recent versions of gas
+dnl  accept -mev67, but there's nothing here that's alpha specific.
+
+AC_DEFUN([GMP_GCC_WA_MCPU],
+[AC_MSG_CHECKING([assembler $1 $2])
+result=no
+cat >conftest.c <<EOF
+int main () {}
+EOF
+gmp_compile="$1 $2 -c conftest.c >conftest.out 2>&1"
+if AC_TRY_EVAL(gmp_compile); then
+  if grep "Unknown CPU identifier" conftest.out >/dev/null; then : ;
+  else
+    result=yes
+  fi
+fi
+cat conftest.out >&AC_FD_CC
+rm -f conftest*
+AC_MSG_RESULT($result)
+if test "$result" = yes; then
+  ifelse([$3],,:,[$3])
+else
+  ifelse([$4],,:,[$4])
+fi
+])
+
+
+dnl  GMP_GCC_WA_OLDAS(CC+CFLAGS [,ACTION-YES [,ACTION-NO]])
+dnl  ------------------------------------------------------
+dnl  Check whether gcc should be run with "-Wa,-oldas".
+dnl
+dnl  On systems alpha*-*-osf* (or maybe just osf5), apparently there's a
+dnl  newish Compaq "as" which doesn't work with the gcc mips-tfile.
+dnl  Compiling an empty file with "gcc -c foo.c" produces for instance
+dnl
+dnl      mips-tfile, /tmp/ccaqUNnF.s:7 Segmentation fault
+dnl
+dnl  The fix is to pass "-oldas" to that assembler, as noted by
+dnl
+dnl      http://gcc.gnu.org/install/specific.html#alpha*-dec-osf*
+dnl
+dnl  The test here tries to compile an empty file, and if that fails but
+dnl  adding -Wa,-oldas makes it succeed, then that flag is considered
+dnl  necessary.
+dnl
+dnl  We look for the failing case specifically, since it may not be a good
+dnl  idea to use -Wa,-oldas in other circumstances.  For instance gas takes
+dnl  "-oldas" to mean the "-o" option and will write a file called "ldas" as
+dnl  its output.  Normally gcc puts its own "-o" after any -Wa options, so
+dnl  -oldas ends up being harmless, but clearly that's only through good
+dnl  luck.
+dnl
+dnl  This macro is designed for use while probing for a good compiler, and
+dnl  so doesn't cache it's result.
+
+AC_DEFUN([GMP_GCC_WA_OLDAS],
+[AC_MSG_CHECKING([for $1 -Wa,-oldas])
+result=no
+cat >conftest.c <<EOF
+EOF
+echo "with empty conftest.c" >&AC_FD_CC
+gmp_compile="$1 -c conftest.c >&AC_FD_CC 2>&1"
+if AC_TRY_EVAL(gmp_compile); then : ;
+else
+  # empty fails
+  gmp_compile="$1 -Wa,-oldas -c conftest.c >&AC_FD_CC 2>&1"
+  if AC_TRY_EVAL(gmp_compile); then
+    # but with -Wa,-oldas it works
+    result=yes
+  fi
+fi
+rm -f conftest*
+AC_MSG_RESULT($result)
+if test "$result" = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_OS_X86_XMM(CC+CFLAGS,[ACTION-IF-YES][,ACTION-IF-NO])
+dnl  --------------------------------------------------------
+dnl  Determine whether the operating system supports XMM registers.
+dnl
+dnl  If build==host then a test program is run, executing an SSE2
+dnl  instruction using an XMM register.  This will give a SIGILL if the
+dnl  system hasn't set the OSFXSR bit in CR4 to say it knows it must use
+dnl  fxsave/fxrestor in a context switch (to save xmm registers).
+dnl
+dnl  If build!=host, we can fallback on:
+dnl
+dnl      - FreeBSD version 4 is the first supporting xmm.
+dnl
+dnl      - Linux kernel 2.4 might be the first stable series supporting xmm
+dnl        (not sure).  But there's no version number in the GNU/Linux
+dnl        config tuple to test anyway.
+dnl
+dnl  The default is to allow xmm.  This might seem rash, but it's likely
+dnl  most systems know xmm by now, so this will normally be what's wanted.
+dnl  And cross compiling is a bit hairy anyway, so hopefully anyone doing it
+dnl  will be smart enough to know what to do.
+dnl
+dnl  In the test program, .text and .globl are hard coded because this macro
+dnl  is wanted before GMP_ASM_TEXT and GMP_ASM_GLOBL are run.  A .byte
+dnl  sequence is used (for xorps %xmm0, %xmm0) to make us independent of
+dnl  tests for whether the assembler supports sse2/xmm.  Obviously we need
+dnl  both assembler and OS support, but this means we don't force the order
+dnl  in which we test.
+dnl
+dnl  FIXME: Maybe we should use $CCAS to assemble, if it's set.  (Would
+dnl  still want $CC/$CFLAGS for the link.)  But this test is used before
+dnl  AC_PROG_CC sets $OBJEXT, so we'd need to check for various object file
+dnl  suffixes ourselves.
+
+AC_DEFUN([GMP_OS_X86_XMM],
+[AC_CACHE_CHECK([whether the operating system supports XMM registers],
+               gmp_cv_os_x86_xmm,
+[if test "$build" = "$host"; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.s <<EOF
+       .text
+main:
+_main:
+       .globl  main
+       .globl  _main
+       .byte   0x0f, 0x57, 0xc0
+       xorl    %eax, %eax
+       ret
+EOF
+  gmp_compile="$1 conftest.s -o conftest >&AC_FD_CC"
+  if AC_TRY_EVAL(gmp_compile); then
+    if AC_TRY_COMMAND([./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest]); then
+      gmp_cv_os_x86_xmm=yes
+    else
+      gmp_cv_os_x86_xmm=no
+    fi
+  else
+    AC_MSG_WARN([Oops, cannot compile test program])
+  fi
+  rm -f conftest*
+fi
+
+if test -z "$gmp_cv_os_x86_xmm"; then
+  case $host_os in
+    [freebsd[123] | freebsd[123].*])
+      gmp_cv_os_x86_xmm=no ;;
+    freebsd*)
+      gmp_cv_os_x86_xmm=yes ;;
+    *)
+      gmp_cv_os_x86_xmm=probably ;;
+  esac
+fi
+])
+
+if test "$gmp_cv_os_x86_xmm" = probably; then
+  AC_MSG_WARN([Not certain of OS support for xmm when cross compiling.])
+  AC_MSG_WARN([Will assume it's ok, expect a SIGILL if this is wrong.])
+fi
+
+case $gmp_cv_os_x86_xmm in
+no)
+  $3
+  ;;
+*)
+  $2
+  ;;
+esac
+])
+
+
+dnl  GMP_CRAY_HOST_TYPES(C90/T90-IEEE, C90/T90-CFP, J90/SV1)
+dnl  -------------------------------------------------------
+dnl  Execute the actions in the arguments on the respective Cray vector
+dnl  systems.  For other hosts, do nothing.
+dnl
+dnl  This macro should be used after the C compiler has been chosen, since
+dnl  on c90 and t90 we ask the compiler whether we're in IEEE or CFP float
+dnl  mode.
+dnl
+dnl  This code is in a macro so that any AC_REQUIRE pre-requisites of
+dnl  AC_EGREP_CPP will be expanded at the top-level, ie. for all hosts not
+dnl  merely c90 and t90.  In autoconf 2.57 for instance this means
+dnl  AC_PROG_EGREP, which is needed by various other macros.
+
+AC_DEFUN([GMP_CRAY_OPTIONS],
+[case $host_cpu in
+  c90 | t90)
+    AC_EGREP_CPP(yes,
+[#ifdef _CRAYIEEE
+yes
+#endif],
+    [$1],
+    [$2])
+    ;;
+  j90 | sv1)
+    [$3]
+    ;;
+esac
+])
+
+
+dnl  GMP_HPPA_LEVEL_20(cc/cflags [, ACTION-GOOD [,ACTION-BAD]])
+dnl  ----------------------------------------------------------
+dnl  Check that the given cc/cflags accepts HPPA 2.0n assembler code.
+dnl
+dnl  Old versions of gas don't know 2.0 instructions.  It rejects ".level
+dnl  2.0" for a start, so just test that.
+dnl
+dnl  This test is designed to be run for various different compiler and
+dnl  flags combinations, and hence doesn't cache its result.
+
+AC_DEFUN([GMP_HPPA_LEVEL_20],
+[AC_MSG_CHECKING([$1 assembler knows hppa 2.0])
+result=no
+cat >conftest.s <<EOF
+       .level 2.0
+EOF
+gmp_compile="$1 -c conftest.s >&AC_FD_CC 2>&1"
+if AC_TRY_EVAL(gmp_compile); then
+  result=yes
+else
+  echo "failed program was" >&AC_FD_CC
+  cat conftest.s >&AC_FD_CC
+fi
+rm -f conftest*
+AC_MSG_RESULT($result)
+if test "$result" = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_PROG_CXX_WORKS(cxx/cxxflags [, ACTION-YES [,ACTION-NO]])
+dnl  ------------------------------------------------------------
+dnl  Check whether cxx/cxxflags can compile and link.
+dnl
+dnl  This test is designed to be run repeatedly with different cxx/cxxflags
+dnl  selections, so the result is not cached.
+dnl
+dnl  For a native build, we insist on being able to run the program, so as
+dnl  to detect any problems with the standard C++ library.  During
+dnl  development various systems with broken or incomplete C++ installations
+dnl  were seen.
+dnl
+dnl  The various features and problems we try to detect are done in separate
+dnl  compiles.  Although this is probably a bit slower than one test
+dnl  program, it makes it easy to indicate the problem in AC_MSG_RESULT,
+dnl  hence giving the user a clue about why we rejected the compiler.
+
+AC_DEFUN([GMP_PROG_CXX_WORKS],
+[AC_MSG_CHECKING([C++ compiler $1])
+gmp_prog_cxx_works=yes
+
+# start with a plain "main()", then go on to further checks
+GMP_PROG_CXX_WORKS_PART([$1], [])
+
+GMP_PROG_CXX_WORKS_PART([$1], [namespace],
+[namespace foo { }
+using namespace foo;
+])
+
+# GMP requires the standard C++ iostream classes
+GMP_PROG_CXX_WORKS_PART([$1], [std iostream],
+[/* This test rejects g++ 2.7.2 which doesn't have <iostream>, only a
+    pre-standard iostream.h. */
+#include <iostream>
+
+/* This test rejects OSF 5.1 Compaq C++ in its default pre-standard iostream
+   mode, since that mode puts cout in the global namespace, not "std".  */
+void someoutput (void) { std::cout << 123; }
+])
+
+AC_MSG_RESULT($gmp_prog_cxx_works)
+case $gmp_prog_cxx_works in
+  yes)
+    [$2]
+    ;;
+  *)
+    [$3]
+    ;;
+esac
+])
+
+dnl  Called: GMP_PROG_CXX_WORKS_PART(CXX+CXXFLAGS, FAIL-MESSAGE [,CODE])
+dnl
+AC_DEFUN([GMP_PROG_CXX_WORKS_PART],
+[if test "$gmp_prog_cxx_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.cc <<EOF
+[$3]
+int main (void) { return 0; }
+EOF
+  echo "Test compile: [$2]" >&AC_FD_CC
+  gmp_cxxcompile="$1 conftest.cc >&AC_FD_CC"
+  if AC_TRY_EVAL(gmp_cxxcompile); then
+    if test "$cross_compiling" = no; then
+      if AC_TRY_COMMAND([./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest]); then :;
+      else
+        gmp_prog_cxx_works="no[]m4_if([$2],,,[, ])[$2], program does not run"
+      fi
+    fi
+  else
+    gmp_prog_cxx_works="no[]m4_if([$2],,,[, ])[$2]"
+  fi
+  case $gmp_prog_cxx_works in
+    no*)
+      echo "failed program was:" >&AC_FD_CC
+      cat conftest.cc >&AC_FD_CC
+      ;;
+  esac
+  rm -f conftest* a.out b.out a.exe a_out.exe
+fi
+])
+
+
+dnl  GMP_INIT([M4-DEF-FILE])
+dnl  -----------------------
+dnl  Initializations for GMP config.m4 generation.
+dnl
+dnl  FIXME: The generated config.m4 doesn't get recreated by config.status.
+dnl  Maybe the relevant "echo"s should go through AC_CONFIG_COMMANDS.
+
+AC_DEFUN([GMP_INIT],
+[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]")
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+rm -f $gmp_tmpconfigm4 $gmp_tmpconfigm4i $gmp_tmpconfigm4p
+
+# CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir.
+# The pattern here tests for an absolute path the same way as
+# _AC_OUTPUT_FILES in autoconf acgeneral.m4.
+case $srcdir in
+[[\\/]]* | ?:[[\\/]]* )  tmp="$srcdir"    ;;
+*)                       tmp="../$srcdir" ;;
+esac
+echo ["define(<CONFIG_TOP_SRCDIR>,<\`$tmp'>)"] >>$gmp_tmpconfigm4
+
+# All CPUs use asm-defs.m4
+echo ["include][(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')"] >>$gmp_tmpconfigm4i
+])
+
+
+dnl  GMP_FINISH
+dnl  ----------
+dnl  Create config.m4 from its accumulated parts.
+dnl
+dnl  __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include
+dnl  of config.m4 is harmless.
+dnl
+dnl  A separate ifdef on the angle bracket quoted part ensures the quoting
+dnl  style there is respected.  The basic defines from gmp_tmpconfigm4 are
+dnl  fully quoted but are still put under an ifdef in case any have been
+dnl  redefined by one of the m4 include files.
+dnl
+dnl  Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't
+dnl  work, since it'd interpret parentheses and quotes in dnl comments, and
+dnl  having a whole file as a macro argument would overflow the string space
+dnl  on BSD m4.
+
+AC_DEFUN([GMP_FINISH],
+[AC_REQUIRE([GMP_INIT])
+echo "creating $gmp_configm4"
+echo ["d""nl $gmp_configm4.  Generated automatically by configure."] > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+  echo ["changequote(<,>)"] >> $gmp_configm4
+  echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4
+  cat $gmp_tmpconfigm4 >> $gmp_configm4
+  echo [">)"] >> $gmp_configm4
+  echo ["changequote(\`,')"] >> $gmp_configm4
+  rm $gmp_tmpconfigm4
+fi
+echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+  cat $gmp_tmpconfigm4i >> $gmp_configm4
+  rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+  cat $gmp_tmpconfigm4p >> $gmp_configm4
+  rm $gmp_tmpconfigm4p
+fi
+echo ["')"] >> $gmp_configm4
+echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4
+])
+
+
+dnl  GMP_INCLUDE_MPN(FILE)
+dnl  ---------------------
+dnl  Add an include_mpn(`FILE') to config.m4.  FILE should be a path
+dnl  relative to the mpn source directory, for example
+dnl
+dnl      GMP_INCLUDE_MPN(`x86/x86-defs.m4')
+dnl
+
+AC_DEFUN([GMP_INCLUDE_MPN],
+[AC_REQUIRE([GMP_INIT])
+echo ["include_mpn(\`$1')"] >> $gmp_tmpconfigm4i
+])
+
+
+dnl  GMP_DEFINE(MACRO, DEFINITION [, LOCATION])
+dnl  ------------------------------------------
+dnl  Define M4 macro MACRO as DEFINITION in temporary file.
+dnl
+dnl  If LOCATION is `POST', the definition will appear after any include()
+dnl  directives inserted by GMP_INCLUDE.  Mind the quoting!  No shell
+dnl  variables will get expanded.  Don't forget to invoke GMP_FINISH to
+dnl  create file config.m4.  config.m4 uses `<' and '>' as quote characters
+dnl  for all defines.
+
+AC_DEFUN([GMP_DEFINE],
+[AC_REQUIRE([GMP_INIT])
+echo ['define(<$1>, <$2>)'] >>ifelse([$3], [POST],
+                              $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])
+
+
+dnl  GMP_DEFINE_RAW(STRING [, LOCATION])
+dnl  ------------------------------------
+dnl  Put STRING into config.m4 file.
+dnl
+dnl  If LOCATION is `POST', the definition will appear after any include()
+dnl  directives inserted by GMP_INCLUDE.  Don't forget to invoke GMP_FINISH
+dnl  to create file config.m4.
+
+AC_DEFUN([GMP_DEFINE_RAW],
+[AC_REQUIRE([GMP_INIT])
+echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])
+
+
+dnl  GMP_TRY_ASSEMBLE(asm-code,[action-success][,action-fail])
+dnl  ----------------------------------------------------------
+dnl  Attempt to assemble the given code.
+dnl  Do "action-success" if this succeeds, "action-fail" if not.
+dnl
+dnl  conftest.o and conftest.out are available for inspection in
+dnl  "action-success".  If either action does a "break" out of a loop then
+dnl  an explicit "rm -f conftest*" will be necessary.
+dnl
+dnl  This is not unlike AC_TRY_COMPILE, but there's no default includes or
+dnl  anything in "asm-code", everything wanted must be given explicitly.
+
+AC_DEFUN([GMP_TRY_ASSEMBLE],
+[cat >conftest.s <<EOF
+[$1]
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if AC_TRY_EVAL(gmp_assemble); then
+  cat conftest.out >&AC_FD_CC
+  ifelse([$2],,:,[$2])
+else
+  cat conftest.out >&AC_FD_CC
+  echo "configure: failed program was:" >&AC_FD_CC
+  cat conftest.s >&AC_FD_CC
+  ifelse([$3],,:,[$3])
+fi
+rm -f conftest*
+])
+
+
+dnl Checks whether the stack can be marked nonexecutable by passing an option
+dnl to the C-compiler when acting on .s files. Appends that option to ASFLAGS.
+dnl This macro is adapted from one found in GLIBC-2.3.5.
+AC_DEFUN([CL_AS_NOEXECSTACK],[
+dnl AC_REQUIRE([AC_PROG_CC]) GMP uses something else
+AC_CACHE_CHECK([whether assembler supports --noexecstack option],
+cl_cv_as_noexecstack, [dnl
+  cat > conftest.c <<EOF
+void foo() {}
+EOF
+  if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS
+                     -S -o conftest.s conftest.c >/dev/null]) \
+     && grep .note.GNU-stack conftest.s >/dev/null \
+     && AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -Wa,--noexecstack
+                       -c -o conftest.o conftest.s >/dev/null])
+  then
+    cl_cv_as_noexecstack=yes
+  else
+    cl_cv_as_noexecstack=no
+  fi
+  rm -f conftest*])
+  if test "$cl_cv_as_noexecstack" = yes; then
+    ASMFLAGS="$ASMFLAGS -Wa,--noexecstack"
+  fi
+  AC_SUBST(ASMFLAGS)
+])
+
+
+dnl  GMP_ASM_LABEL_SUFFIX
+dnl  --------------------
+dnl  : - is usual.
+dnl  empty - hppa on HP-UX doesn't use a :, just the label name
+dnl
+dnl  Note that it's necessary to test the empty case first, since HP "as"
+dnl  will accept "somelabel:", and take it to mean a label with a name that
+dnl  happens to end in a colon.
+
+AC_DEFUN([GMP_ASM_LABEL_SUFFIX],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([for assembler label suffix],
+                gmp_cv_asm_label_suffix,
+[gmp_cv_asm_label_suffix=unknown
+for i in "" ":"; do
+  echo "trying $i" >&AC_FD_CC
+  GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+somelabel$i],
+    [gmp_cv_asm_label_suffix=$i
+     rm -f conftest*
+     break],
+    [cat conftest.out >&AC_FD_CC])
+done
+if test "$gmp_cv_asm_label_suffix" = "unknown"; then
+  AC_MSG_ERROR([Cannot determine label suffix])
+fi
+])
+echo ["define(<LABEL_SUFFIX>, <$gmp_cv_asm_label_suffix>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_UNDERSCORE
+dnl  ------------------
+dnl  Determine whether global symbols need to be prefixed with an underscore.
+dnl  The output from "nm" is grepped to see what a typical symbol looks like.
+dnl
+dnl  This test used to grep the .o file directly, but that failed with greps
+dnl  that don't like binary files (eg. SunOS 4).
+dnl
+dnl  This test also used to construct an assembler file with and without an
+dnl  underscore and try to link that to a C file, to see which worked.
+dnl  Although that's what will happen in the real build we don't really want
+dnl  to depend on creating asm files within configure for every possible CPU
+dnl  (or at least we don't want to do that more than we have to).
+dnl
+dnl  The fallback on no underscore is based on the assumption that the world
+dnl  is moving towards non-underscore systems.  There should actually be no
+dnl  good reason for nm to fail though.
+
+AC_DEFUN([GMP_ASM_UNDERSCORE],
+[AC_REQUIRE([GMP_PROG_NM])
+AC_CACHE_CHECK([if globals are prefixed by underscore],
+               gmp_cv_asm_underscore,
+[gmp_cv_asm_underscore="unknown"
+cat >conftest.c <<EOF
+int gurkmacka;
+EOF
+gmp_compile="$CC $CFLAGS $CPPFLAGS -c conftest.c >&AC_FD_CC"
+if AC_TRY_EVAL(gmp_compile); then
+  $NM conftest.$OBJEXT >conftest.out
+  if grep _gurkmacka conftest.out >/dev/null; then
+    gmp_cv_asm_underscore=yes
+  elif grep gurkmacka conftest.out >/dev/null; then
+    gmp_cv_asm_underscore=no
+  else
+    echo "configure: $NM doesn't have gurkmacka:" >&AC_FD_CC
+    cat conftest.out >&AC_FD_CC
+  fi
+else
+  echo "configure: failed program was:" >&AC_FD_CC
+  cat conftest.c >&AC_FD_CC
+fi
+rm -f conftest*
+])
+case $gmp_cv_asm_underscore in
+  yes)
+    GMP_DEFINE(GSYM_PREFIX, [_]) ;;
+  no)
+    GMP_DEFINE(GSYM_PREFIX, []) ;;
+  *)
+    AC_MSG_WARN([+----------------------------------------------------------])
+    AC_MSG_WARN([| Cannot determine global symbol prefix.])
+    AC_MSG_WARN([| $NM output doesn't contain a global data symbol.])
+    AC_MSG_WARN([| Will proceed with no underscore.])
+    AC_MSG_WARN([| If this is wrong then you'll get link errors referring])
+    AC_MSG_WARN([| to ___gmpn_add_n (note three underscores).])
+    AC_MSG_WARN([| In this case do a fresh build with an override,])
+    AC_MSG_WARN([|     ./configure gmp_cv_asm_underscore=yes])
+    AC_MSG_WARN([+----------------------------------------------------------])
+    GMP_DEFINE(GSYM_PREFIX, [])
+    ;;
+esac
+])
+
+
+dnl  GMP_ASM_ALIGN_LOG
+dnl  -----------------
+dnl  Is parameter to `.align' logarithmic?
+
+AC_DEFUN([GMP_ASM_ALIGN_LOG],
+[AC_REQUIRE([GMP_ASM_GLOBL])
+AC_REQUIRE([GMP_ASM_BYTE])
+AC_REQUIRE([GMP_ASM_DATA])
+AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_REQUIRE([GMP_PROG_NM])
+AC_CACHE_CHECK([if .align assembly directive is logarithmic],
+               gmp_cv_asm_align_log,
+[GMP_TRY_ASSEMBLE(
+[              $gmp_cv_asm_data
+       .align  4
+       $gmp_cv_asm_globl       foo
+       $gmp_cv_asm_byte        1
+       .align  4
+foo$gmp_cv_asm_label_suffix
+       $gmp_cv_asm_byte        2],
+  [gmp_tmp_val=[`$NM conftest.$OBJEXT | grep foo | \
+     sed -e 's;[[][0-9][]]\(.*\);\1;' -e 's;[^1-9]*\([0-9]*\).*;\1;'`]
+  if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+    gmp_cv_asm_align_log=yes
+  else
+    gmp_cv_asm_align_log=no
+  fi],
+  [AC_MSG_ERROR([cannot assemble alignment test])])])
+
+GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_asm_align_log>)"])
+])
+
+
+dnl  GMP_ASM_ALIGN_FILL_0x90
+dnl  -----------------------
+dnl  Determine whether a ",0x90" suffix works on a .align directive.
+dnl  This is only meant for use on x86, 0x90 being a "nop".
+dnl
+dnl  Old gas, eg. 1.92.3
+dnl       Needs ",0x90" or else the fill is 0x00, which can't be executed
+dnl       across.
+dnl
+dnl  New gas, eg. 2.91
+dnl       Generates multi-byte nop fills even when ",0x90" is given.
+dnl
+dnl  Solaris 2.6 as
+dnl       ",0x90" is not allowed, causes a fatal error.
+dnl
+dnl  Solaris 2.8 as
+dnl       ",0x90" does nothing, generates a warning that it's being ignored.
+dnl
+dnl  SCO OpenServer 5 as
+dnl       Second parameter is max bytes to fill, not a fill pattern.
+dnl       ",0x90" is an error due to being bigger than the first parameter.
+dnl       Multi-byte nop fills are generated in text segments.
+dnl
+dnl  Note that both solaris "as"s only care about ",0x90" if they actually
+dnl  have to use it to fill something, hence the .byte in the test.  It's
+dnl  the second .align which provokes the error or warning.
+dnl
+dnl  The warning from solaris 2.8 is suppressed to stop anyone worrying that
+dnl  something might be wrong.
+
+AC_DEFUN([GMP_ASM_ALIGN_FILL_0x90],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text],
+               gmp_cv_asm_align_fill_0x90,
+[GMP_TRY_ASSEMBLE(
+[              $gmp_cv_asm_text
+       .align  4, 0x90
+       .byte   0
+       .align  4, 0x90],
+[if grep "Warning: Fill parameter ignored for executable section" conftest.out >/dev/null; then
+  echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC
+  gmp_cv_asm_align_fill_0x90=no
+else
+  gmp_cv_asm_align_fill_0x90=yes
+fi],
+[gmp_cv_asm_align_fill_0x90=no])])
+
+GMP_DEFINE_RAW(["define(<ALIGN_FILL_0x90>,<$gmp_cv_asm_align_fill_0x90>)"])
+])
+
+
+dnl  GMP_ASM_BYTE
+dnl  ------------
+dnl  .byte - is usual.
+dnl  data1 - required by ia64 (on hpux at least).
+dnl
+dnl  This macro is just to support other configure tests, not any actual asm
+dnl  code.
+
+AC_DEFUN([GMP_ASM_BYTE],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([for assembler byte directive],
+                gmp_cv_asm_byte,
+[for i in .byte data1; do
+  echo "trying $i" >&AC_FD_CC
+  GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_data
+       $i      0
+],
+    [gmp_cv_asm_byte=$i
+     rm -f conftest*
+     break],
+    [cat conftest.out >&AC_FD_CC])
+done
+if test -z "$gmp_cv_asm_byte"; then
+  AC_MSG_ERROR([Cannot determine how to emit a data byte])
+fi
+])
+])
+
+
+dnl  GMP_ASM_TEXT
+dnl  ------------
+dnl  .text - is usual.
+dnl  .code - is needed by the hppa on HP-UX (but ia64 HP-UX uses .text)
+dnl  .csect .text[PR] - is for AIX.
+
+AC_DEFUN([GMP_ASM_TEXT],
+[AC_CACHE_CHECK([how to switch to text section],
+                gmp_cv_asm_text,
+[for i in ".text" ".code" [".csect .text[PR]"]; do
+  echo "trying $i" >&AC_FD_CC
+  GMP_TRY_ASSEMBLE([   $i],
+    [gmp_cv_asm_text=$i
+     rm -f conftest*
+     break])
+done
+if test -z "$gmp_cv_asm_text"; then
+  AC_MSG_ERROR([Cannot determine text section directive])
+fi
+])
+echo ["define(<TEXT>, <$gmp_cv_asm_text>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_DATA
+dnl  ------------
+dnl  Can we say `.data'?
+
+AC_DEFUN([GMP_ASM_DATA],
+[AC_CACHE_CHECK([how to switch to data section],
+                gmp_cv_asm_data,
+[case $host in
+  *-*-aix*) gmp_cv_asm_data=[".csect .data[RW]"] ;;
+  *)        gmp_cv_asm_data=".data" ;;
+esac
+])
+echo ["define(<DATA>, <$gmp_cv_asm_data>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_RODATA
+dnl  --------------
+dnl  Find out how to switch to the read-only data section.
+dnl
+dnl  The compiler output is grepped for the right directive.  It's not
+dnl  considered wise to just probe for ".section .rodata" or whatever works,
+dnl  since arbitrary section names might be accepted, but not necessarily do
+dnl  the right thing when they get to the linker.
+dnl
+dnl  Only a few asm files use RODATA, so this code is perhaps a bit
+dnl  excessive right now, but should find more uses in the future.
+dnl
+dnl  FIXME: gcc on aix generates something like ".csect _foo.ro_c[RO],3"
+dnl  where foo is the object file.  Might need to check for that if we use
+dnl  RODATA there.
+
+AC_DEFUN([GMP_ASM_RODATA],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_REQUIRE([GMP_ASM_DATA])
+AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_REQUIRE([GMP_ASM_UNDERSCORE])
+AC_CACHE_CHECK([how to switch to read-only data section],
+               gmp_cv_asm_rodata,
+[
+dnl Default to DATA on CPUs with split code/data caching, and TEXT
+dnl elsewhere.  i386 means generic x86, so use DATA on it.
+case $host in
+X86_PATTERN | x86_64-*-*)
+  gmp_cv_asm_rodata="$gmp_cv_asm_data" ;;
+*)
+  gmp_cv_asm_rodata="$gmp_cv_asm_text" ;;
+esac
+
+cat >conftest.c <<EOF
+extern const int foo[[]];              /* Suppresses C++'s suppression of foo */
+const int foo[[]] = {1,2,3};
+EOF
+echo "Test program:" >&AC_FD_CC
+cat conftest.c >&AC_FD_CC
+gmp_compile="$CC $CFLAGS $CPPFLAGS -S conftest.c >&AC_FD_CC"
+if AC_TRY_EVAL(gmp_compile); then
+  echo "Compiler output:" >&AC_FD_CC
+  cat conftest.s >&AC_FD_CC
+  if test $gmp_cv_asm_underscore = yes; then
+    tmp_gsym_prefix=_
+  else
+    tmp_gsym_prefix=
+  fi
+  # must see our label
+  if grep "^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix" conftest.s >/dev/null 2>&AC_FD_CC; then
+    # take the last directive before our label (hence skipping segments
+    # getting debugging info etc)
+    tmp_match=`sed -n ["/^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix/q
+                        /^[.   ]*data/p
+                        /^[.   ]*rdata/p
+                        /^[.   ]*text/p
+                        /^[.   ]*section/p
+                        /^[.   ]*csect/p
+                        /^[.   ]*CSECT/p"] conftest.s | sed -n '$p'`
+    echo "Match: $tmp_match" >&AC_FD_CC
+    if test -n "$tmp_match"; then
+      gmp_cv_asm_rodata=$tmp_match
+    fi
+  else
+    echo "Couldn't find label: ^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix" >&AC_FD_CC
+  fi
+fi
+rm -f conftest*
+])
+echo ["define(<RODATA>, <$gmp_cv_asm_rodata>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_GLOBL
+dnl  -------------
+dnl  The assembler directive to mark a label as a global symbol.
+dnl
+dnl  ia64 - .global is standard, according to the Intel documentation.
+dnl
+dnl  hppa - ".export foo,entry" is demanded by HP hppa "as".  ".global" is a
+dnl      kind of import.
+dnl
+dnl  other - .globl is usual.
+dnl
+dnl  "gas" tends to accept .globl everywhere, in addition to .export or
+dnl  .global or whatever the system assembler demands.
+
+AC_DEFUN([GMP_ASM_GLOBL],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([for assembler global directive],
+                gmp_cv_asm_globl,
+[case $host in
+  hppa*-*-*)     gmp_cv_asm_globl=.export ;;
+  IA64_PATTERN)  gmp_cv_asm_globl=.global ;;
+  *)             gmp_cv_asm_globl=.globl  ;;
+esac
+])
+echo ["define(<GLOBL>, <$gmp_cv_asm_globl>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_GLOBL_ATTR
+dnl  ------------------
+dnl  Do we need something after `GLOBL symbol'?
+
+AC_DEFUN([GMP_ASM_GLOBL_ATTR],
+[AC_REQUIRE([GMP_ASM_GLOBL])
+AC_CACHE_CHECK([for assembler global directive attribute],
+                gmp_cv_asm_globl_attr,
+[case $gmp_cv_asm_globl in
+  .export) gmp_cv_asm_globl_attr=",entry" ;;
+  *)       gmp_cv_asm_globl_attr="" ;;
+esac
+])
+echo ["define(<GLOBL_ATTR>, <$gmp_cv_asm_globl_attr>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_TYPE
+dnl  ------------
+dnl  Can we say ".type", and how?
+dnl
+dnl  For i386 GNU/Linux ELF systems, and very likely other ELF systems,
+dnl  .type and .size are important on functions in shared libraries.  If
+dnl  .type is omitted and the mainline program references that function then
+dnl  the code will be copied down to the mainline at load time like a piece
+dnl  of data.  If .size is wrong or missing (it defaults to 4 bytes or some
+dnl  such) then incorrect bytes will be copied and a segv is the most likely
+dnl  result.  In any case such copying is not what's wanted, a .type
+dnl  directive will ensure a PLT entry is used.
+dnl
+dnl  In GMP the assembler functions are normally only used from within the
+dnl  library (since most programs are not interested in the low level
+dnl  routines), and in those circumstances a missing .type isn't fatal,
+dnl  letting the problem go unnoticed.  tests/mpn/t-asmtype.c aims to check
+dnl  for it.
+
+AC_DEFUN([GMP_ASM_TYPE],
+[AC_CACHE_CHECK([for assembler .type directive],
+                gmp_cv_asm_type,
+[gmp_cv_asm_type=
+for gmp_tmp_prefix in @ \# %; do
+  GMP_TRY_ASSEMBLE([   .type   sym,${gmp_tmp_prefix}function],
+    [if grep "\.type pseudo-op used outside of \.def/\.endef ignored" conftest.out >/dev/null; then : ;
+    else
+      gmp_cv_asm_type=".type   \$][1,${gmp_tmp_prefix}\$][2"
+      break
+    fi])
+done
+rm -f conftest*
+])
+echo ["define(<TYPE>, <$gmp_cv_asm_type>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_SIZE
+dnl  ------------
+dnl  Can we say `.size'?
+
+AC_DEFUN([GMP_ASM_SIZE],
+[AC_CACHE_CHECK([for assembler .size directive],
+                gmp_cv_asm_size,
+[gmp_cv_asm_size=
+GMP_TRY_ASSEMBLE([     .size   sym,1],
+  [if grep "\.size pseudo-op used outside of \.def/\.endef ignored" conftest.out >/dev/null; then : ;
+  else
+    gmp_cv_asm_size=".size     \$][1,\$][2"
+  fi])
+])
+echo ["define(<SIZE>, <$gmp_cv_asm_size>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_COFF_TYPE
+dnl  -----------------
+dnl  Determine whether the assembler supports COFF type information.
+dnl
+dnl  Currently this is only needed for mingw (and cygwin perhaps) and so is
+dnl  run only on the x86s, but it ought to work anywhere.
+dnl
+dnl  On MINGW, recent versions of the linker have an automatic import scheme
+dnl  for data in a DLL which is referenced by a mainline but without
+dnl  __declspec (__dllimport__) on the prototype.  It seems functions
+dnl  without type information are treated as data, or something, and calls
+dnl  to them from the mainline will crash.  gcc puts type information on the
+dnl  C functions it generates, we need to do the same for assembler
+dnl  functions.
+dnl
+dnl  This applies only to functions without __declspec(__dllimport__),
+dnl  ie. without __GMP_DECLSPEC in the case of libgmp, so it also works just
+dnl  to ensure all assembler functions used from outside libgmp have
+dnl  __GMP_DECLSPEC on their prototypes.  But this isn't an ideal situation,
+dnl  since we don't want perfectly valid calls going wrong just because
+dnl  there wasn't a prototype in scope.
+dnl
+dnl  When an auto-import takes place, the following warning is given by the
+dnl  linker.  This shouldn't be seen for any functions.
+dnl
+dnl      Info: resolving _foo by linking to __imp__foo (auto-import)
+dnl
+dnl
+dnl  COFF type directives look like the following
+dnl
+dnl      .def    _foo
+dnl      .scl    2
+dnl      .type   32
+dnl      .endef
+dnl
+dnl  _foo is the symbol with GSYM_PREFIX (_).  .scl is the storage class, 2
+dnl  for external, 3 for static.  .type is the object type, 32 for a
+dnl  function.
+dnl
+dnl  On an ELF system, this is (correctly) rejected due to .def, .endef and
+dnl  .scl being invalid, and .type not having enough arguments.
+
+AC_DEFUN([GMP_ASM_COFF_TYPE],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_REQUIRE([GMP_ASM_GLOBL])
+AC_REQUIRE([GMP_ASM_GLOBL_ATTR])
+AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_REQUIRE([GMP_ASM_UNDERSCORE])
+AC_CACHE_CHECK([for assembler COFF type directives],
+               gmp_cv_asm_x86_coff_type,
+[GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       $gmp_cv_asm_globl ${tmp_gsym_prefix}foo$gmp_cv_asm_globl_attr
+       .def    ${tmp_gsym_prefix}foo
+       .scl    2
+       .type   32
+       .endef
+${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix
+],
+  [gmp_cv_asm_x86_coff_type=yes],
+  [gmp_cv_asm_x86_coff_type=no])
+])
+echo ["define(<HAVE_COFF_TYPE>, <$gmp_cv_asm_x86_coff_type>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_ASM_LSYM_PREFIX
+dnl  -------------------
+dnl  What is the prefix for a local label?
+dnl
+dnl  The prefixes tested are,
+dnl
+dnl      L  - usual for underscore systems
+dnl      .L - usual for non-underscore systems
+dnl      $  - alpha (gas and OSF system assembler)
+dnl      L$ - hppa (gas and HP-UX system assembler)
+dnl
+dnl  The default is "L" if the tests fail for any reason.  There's a good
+dnl  chance this will be adequate, since on most systems labels are local
+dnl  anyway unless given a ".globl", and an "L" will avoid clashes with
+dnl  other identifers.
+dnl
+dnl  For gas, ".L" is normally purely local to the assembler, it doesn't get
+dnl  put into the object file at all.  This style is preferred, to keep the
+dnl  object files nice and clean.
+dnl
+dnl  BSD format nm produces a line like
+dnl
+dnl      00000000 t Lgurkmacka
+dnl
+dnl  The symbol code is normally "t" for text, but any lower case letter
+dnl  indicates a local definition.
+dnl
+dnl  Code "n" is for a debugging symbol, OSF "nm -B" gives that as an upper
+dnl  case "N" for a local.
+dnl
+dnl  HP-UX nm prints an error message (though seems to give a 0 exit) if
+dnl  there's no symbols at all in an object file, hence the use of "dummy".
+
+AC_DEFUN([GMP_ASM_LSYM_PREFIX],
+[AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_REQUIRE([GMP_ASM_TEXT])
+AC_REQUIRE([GMP_PROG_NM])
+AC_CACHE_CHECK([for assembler local label prefix],
+               gmp_cv_asm_lsym_prefix,
+[gmp_tmp_pre_appears=yes
+for gmp_tmp_pre in L .L $L $ L$; do
+  echo "Trying $gmp_tmp_pre" >&AC_FD_CC
+  GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+dummy${gmp_cv_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_asm_label_suffix}],
+  [if $NM conftest.$OBJEXT >conftest.nm 2>&AC_FD_CC; then : ; else
+    cat conftest.nm >&AC_FD_CC
+    AC_MSG_WARN(["$NM" failure])
+    break
+  fi
+  cat conftest.nm >&AC_FD_CC
+  if grep gurkmacka conftest.nm >/dev/null; then : ; else
+    # no mention of the symbol, this is good
+    echo "$gmp_tmp_pre label doesn't appear in object file at all (good)" >&AC_FD_CC
+    gmp_cv_asm_lsym_prefix="$gmp_tmp_pre"
+    gmp_tmp_pre_appears=no
+    break
+  fi
+  if grep [' [a-zN] .*gurkmacka'] conftest.nm >/dev/null; then
+    # symbol mentioned as a local, use this if nothing better
+    echo "$gmp_tmp_pre label is local but still in object file" >&AC_FD_CC
+    if test -z "$gmp_cv_asm_lsym_prefix"; then
+      gmp_cv_asm_lsym_prefix="$gmp_tmp_pre"
+    fi
+  else
+    echo "$gmp_tmp_pre label is something unknown" >&AC_FD_CC
+  fi
+  ])
+done
+rm -f conftest*
+if test -z "$gmp_cv_asm_lsym_prefix"; then
+  gmp_cv_asm_lsym_prefix=L
+  AC_MSG_WARN([cannot determine local label, using default $gmp_cv_asm_lsym_prefix])
+fi
+# for development purposes, note whether we got a purely temporary local label
+echo "Local label appears in object files: $gmp_tmp_pre_appears" >&AC_FD_CC
+])
+echo ["define(<LSYM_PREFIX>, <${gmp_cv_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4
+AC_DEFINE_UNQUOTED(LSYM_PREFIX, "$gmp_cv_asm_lsym_prefix",
+                   [Assembler local label prefix])
+])
+
+
+dnl  GMP_ASM_W32
+dnl  -----------
+dnl  How to define a 32-bit word.
+dnl
+dnl  FIXME: This test is not right for ia64-*-hpux*.  The directive should
+dnl  be "data4", but the W32 macro is not currently used by the mpn/ia64 asm
+dnl  files.
+
+AC_DEFUN([GMP_ASM_W32],
+[AC_REQUIRE([GMP_ASM_DATA])
+AC_REQUIRE([GMP_ASM_BYTE])
+AC_REQUIRE([GMP_ASM_GLOBL])
+AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_REQUIRE([GMP_PROG_NM])
+AC_CACHE_CHECK([how to define a 32-bit word],
+              gmp_cv_asm_w32,
+[case $host in
+  *-*-hpux*)
+    # FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+    # that it's at 0x0.  We'll have to declare another symbol before the
+    # .long/.word and look at the distance between the two symbols.  The
+    # only problem is that the sed expression(s) barfs (on Solaris, for
+    # example) for the symbol with value 0.  For now, HPUX uses .word.
+    gmp_cv_asm_w32=".word"
+    ;;
+  *-*-*)
+    gmp_tmp_val=
+    for gmp_tmp_op in .long .word data4; do
+      GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_data
+       $gmp_cv_asm_globl       foo
+       $gmp_tmp_op     0
+foo$gmp_cv_asm_label_suffix
+       $gmp_cv_asm_byte        0],
+        [gmp_tmp_val=[`$NM conftest.$OBJEXT | grep foo | \
+          sed -e 's;[[][0-9][]]\(.*\);\1;' -e 's;[^1-9]*\([0-9]*\).*;\1;'`]
+        if test "$gmp_tmp_val" = 4; then
+          gmp_cv_asm_w32="$gmp_tmp_op"
+          break
+        fi])
+    done
+    rm -f conftest*
+    ;;
+esac
+if test -z "$gmp_cv_asm_w32"; then
+  AC_MSG_ERROR([cannot determine how to define a 32-bit word])
+fi
+])
+echo ["define(<W32>, <$gmp_cv_asm_w32>)"] >> $gmp_tmpconfigm4
+])
+
+
+dnl  GMP_X86_ASM_GOT_UNDERSCORE
+dnl  --------------------------
+dnl  Determine whether i386 _GLOBAL_OFFSET_TABLE_ needs an additional
+dnl  underscore prefix.
+dnl
+dnl    SVR4      - the standard is _GLOBAL_OFFSET_TABLE_
+dnl    GNU/Linux - follows SVR4
+dnl    OpenBSD   - an a.out underscore system, uses __GLOBAL_OFFSET_TABLE_
+dnl    NetBSD    - also an a.out underscore system, but _GLOBAL_OFFSET_TABLE_
+dnl
+dnl  The test attempts to link a program using _GLOBAL_OFFSET_TABLE_ or
+dnl  __GLOBAL_OFFSET_TABLE_ to see which works.
+dnl
+dnl  $lt_prog_compiler_pic is included in the compile because old versions
+dnl  of gas wouldn't accept PIC idioms without the right option (-K).  This
+dnl  is the same as what libtool and mpn/Makeasm.am will do.
+dnl
+dnl  $lt_prog_compiler_pic is also included in the link because OpenBSD ld
+dnl  won't accept an R_386_GOTPC relocation without the right options.  This
+dnl  is not what's done by the Makefiles when building executables, but
+dnl  let's hope it's ok (it works fine with gcc).
+dnl
+dnl  The fallback is no additional underscore, on the basis that this will
+dnl  suit SVR4/ELF style systems, which should be much more common than
+dnl  a.out systems with shared libraries.
+dnl
+dnl  Note that it's not an error for the tests to fail, since for instance
+dnl  cygwin, mingw and djgpp don't have a _GLOBAL_OFFSET_TABLE_ scheme at
+dnl  all.
+dnl
+dnl  Perhaps $CCAS could be asked to do the linking as well as the
+dnl  assembling, but in the Makefiles it's only used for assembling, so lets
+dnl  keep it that way.
+dnl
+dnl  The test here is run even under --disable-shared, so that PIC objects
+dnl  can be built and tested by the tune/many.pl development scheme.  The
+dnl  tests will be reasonably quick and won't give a fatal error, so this
+dnl  arrangement is ok.  AC_LIBTOOL_PROG_COMPILER_PIC does its
+dnl  $lt_prog_compiler_pic setups even for --disable-shared too.
+
+AC_DEFUN([GMP_ASM_X86_GOT_UNDERSCORE],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_REQUIRE([GMP_ASM_GLOBL])
+AC_REQUIRE([GMP_ASM_GLOBL_ATTR])
+AC_REQUIRE([GMP_ASM_LABEL_SUFFIX])
+AC_REQUIRE([GMP_ASM_UNDERSCORE])
+AC_REQUIRE([AC_LIBTOOL_PROG_COMPILER_PIC])
+AC_CACHE_CHECK([if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore],
+               gmp_cv_asm_x86_got_underscore,
+[gmp_cv_asm_x86_got_underscore="not applicable"
+if test $gmp_cv_asm_underscore = yes; then
+  tmp_gsym_prefix=_
+else
+  tmp_gsym_prefix=
+fi
+for tmp_underscore in "" "_"; do
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       $gmp_cv_asm_globl ${tmp_gsym_prefix}main$gmp_cv_asm_globl_attr
+${tmp_gsym_prefix}main$gmp_cv_asm_label_suffix
+       addl    $ ${tmp_underscore}_GLOBAL_OFFSET_TABLE_, %ebx
+EOF
+  gmp_compile="$CCAS $CFLAGS $CPPFLAGS $lt_prog_compiler_pic conftest.s >&AC_FD_CC && $CC $CFLAGS $CPPFLAGS $lt_prog_compiler_pic conftest.$OBJEXT >&AC_FD_CC"
+  if AC_TRY_EVAL(gmp_compile); then
+    if test "$tmp_underscore" = "_"; then
+      gmp_cv_asm_x86_got_underscore=yes
+    else
+      gmp_cv_asm_x86_got_underscore=no
+    fi
+    break
+  fi
+done
+rm -f conftest* a.out b.out a.exe a_out.exe
+])
+if test "$gmp_cv_asm_x86_got_underscore" = "yes"; then
+  GMP_DEFINE(GOT_GSYM_PREFIX, [_])
+else
+  GMP_DEFINE(GOT_GSYM_PREFIX, [])
+fi
+])
+
+
+dnl  GMP_ASM_X86_GOT_EAX_OK(CC+CFLAGS, [ACTION-YES] [, ACTION-NO])
+dnl  -------------------------------------------------------------
+dnl  Determine whether _GLOBAL_OFFSET_TABLE_ used with %eax is ok.
+dnl
+dnl  An instruction
+dnl
+dnl          addl  $_GLOBAL_OFFSET_TABLE_, %eax
+dnl
+dnl  is incorrectly assembled by gas 2.12 (or thereabouts) and earlier.  It
+dnl  puts an addend 2 into the R_386_GOTPC relocation, but it should be 1
+dnl  for this %eax form being a 1 byte opcode (with other registers it's 2
+dnl  opcode bytes).  See note about this in mpn/x86/README too.
+dnl
+dnl  We assemble this, surrounded by some unlikely byte sequences as
+dnl  delimiters, and check for the bad output.
+dnl
+dnl  This is for use by compiler probing in GMP_PROG_CC_WORKS, so the result
+dnl  is not cached.
+dnl
+dnl  This test is not specific to gas, but old gas is the only assembler we
+dnl  know of with this problem.  The Solaris has been seen coming out ok.
+dnl
+dnl  ".text" is hard coded because this macro is wanted before GMP_ASM_TEXT.
+dnl  This should be fine, ".text" is normal on x86 systems, and certainly
+dnl  will be fine with the offending gas.
+dnl
+dnl  If an error occurs when assembling, we consider the assembler ok, since
+dnl  the bad output does not occur.  This happens for instance on mingw,
+dnl  where _GLOBAL_OFFSET_TABLE_ results in a bfd error, since there's no
+dnl  GOT etc in PE object files.
+dnl
+dnl  This test is used before the object file extension has been determined,
+dnl  so we force output to conftest.o.  Using -o with -c is not portable,
+dnl  but we think all x86 compilers will accept -o with -c, certainly gcc
+dnl  does.
+dnl
+dnl  -fPIC is hard coded here, because this test is for use before libtool
+dnl  has established the pic options.  It's right for gcc, but perhaps not
+dnl  other compilers.
+
+AC_DEFUN([GMP_ASM_X86_GOT_EAX_OK],
+[echo "Testing gas GOT with eax good" >&AC_FD_CC
+cat >conftest.awk <<\EOF
+[BEGIN {
+  want[0]  = "001"
+  want[1]  = "043"
+  want[2]  = "105"
+  want[3]  = "147"
+  want[4]  = "211"
+  want[5]  = "253"
+  want[6]  = "315"
+  want[7]  = "357"
+
+  want[8]  = "005"
+  want[9]  = "002"
+  want[10] = "000"
+  want[11] = "000"
+  want[12] = "000"
+
+  want[13] = "376"
+  want[14] = "334"
+  want[15] = "272"
+  want[16] = "230"
+  want[17] = "166"
+  want[18] = "124"
+  want[19] = "062"
+  want[20] = "020"
+
+  result = "yes"
+}
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 20; i++)
+        got[i] = got[i+1];
+      got[20] = $f;
+
+      found = 1
+      for (i = 0; i < 21; i++)
+        if (got[i] != want[i])
+          {
+            found = 0
+            break
+          }
+      if (found)
+        {
+          result = "no"
+          exit
+        }
+    }
+}
+END {
+  print result
+}
+]EOF
+cat >conftest.s <<\EOF
+[      .text
+       .byte   1, 35, 69, 103, 137, 171, 205, 239
+       addl    $_GLOBAL_OFFSET_TABLE_, %eax
+       .byte   254, 220, 186, 152, 118, 84, 50, 16
+]EOF
+tmp_got_good=yes
+gmp_compile="$1 -fPIC -o conftest.o -c conftest.s >&AC_FD_CC 2>&1"
+if AC_TRY_EVAL(gmp_compile); then
+  tmp_got_good=`od -b conftest.o | $AWK -f conftest.awk`
+fi
+rm -f conftest.*
+echo "Result: $tmp_got_good" >&AC_FD_CC
+if test "$tmp_got_good" = no; then
+  ifelse([$3],,:,[$3])
+else
+  ifelse([$2],,:,[$2])
+fi
+])
+
+
+dnl  GMP_ASM_X86_MMX([ACTION-IF-YES][,ACTION-IF-NO])
+dnl  -----------------------------------------------
+dnl  Determine whether the assembler supports MMX instructions.
+dnl
+dnl  This macro is wanted before GMP_ASM_TEXT, so ".text" is hard coded
+dnl  here.  ".text" is believed to be correct on all x86 systems.  Actually
+dnl  ".text" probably isn't needed at all, at least for just checking
+dnl  instruction syntax.
+dnl
+dnl  "movq %mm0, %mm1" should assemble to "0f 6f c8", but Solaris 2.6 and
+dnl  2.7 wrongly assemble it to "0f 6f c1" (that being the reverse "movq
+dnl  %mm1, %mm0").  It seems more trouble than it's worth to work around
+dnl  this in the code, so just detect and reject.
+
+AC_DEFUN([GMP_ASM_X86_MMX],
+[AC_CACHE_CHECK([if the assembler knows about MMX instructions],
+               gmp_cv_asm_x86_mmx,
+[GMP_TRY_ASSEMBLE(
+[      .text
+       movq    %mm0, %mm1],
+[gmp_cv_asm_x86_mmx=yes
+case $host in
+*-*-solaris*)
+  if (dis conftest.$OBJEXT >conftest.out) 2>/dev/null; then
+    if grep "0f 6f c1" conftest.out >/dev/null; then
+      gmp_cv_asm_x86_mmx=movq-bug
+    fi
+  else
+    AC_MSG_WARN(["dis" not available to check for "as" movq bug])
+  fi
+esac],
+[gmp_cv_asm_x86_mmx=no])])
+
+case $gmp_cv_asm_x86_mmx in
+movq-bug)
+  AC_MSG_WARN([+----------------------------------------------------------])
+  AC_MSG_WARN([| WARNING WARNING WARNING])
+  AC_MSG_WARN([| Host CPU has MMX code, but the assembler])
+  AC_MSG_WARN([|     $CCAS $CFLAGS $CPPFLAGS])
+  AC_MSG_WARN([| has the Solaris 2.6 and 2.7 bug where register to register])
+  AC_MSG_WARN([| movq operands are reversed.])
+  AC_MSG_WARN([| Non-MMX replacements will be used.])
+  AC_MSG_WARN([| This will be an inferior build.])
+  AC_MSG_WARN([+----------------------------------------------------------])
+  ;;
+no)
+  AC_MSG_WARN([+----------------------------------------------------------])
+  AC_MSG_WARN([| WARNING WARNING WARNING])
+  AC_MSG_WARN([| Host CPU has MMX code, but it can't be assembled by])
+  AC_MSG_WARN([|     $CCAS $CFLAGS $CPPFLAGS])
+  AC_MSG_WARN([| Non-MMX replacements will be used.])
+  AC_MSG_WARN([| This will be an inferior build.])
+  AC_MSG_WARN([+----------------------------------------------------------])
+  ;;
+esac
+if test "$gmp_cv_asm_x86_mmx" = yes; then
+  ifelse([$1],,:,[$1])
+else
+  ifelse([$2],,:,[$2])
+fi
+])
+
+
+dnl  GMP_ASM_X86_SHLDL_CL
+dnl  --------------------
+
+AC_DEFUN([GMP_ASM_X86_SHLDL_CL],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler takes cl with shldl],
+               gmp_cv_asm_x86_shldl_cl,
+[GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       shldl   %cl, %eax, %ebx],
+  gmp_cv_asm_x86_shldl_cl=yes,
+  gmp_cv_asm_x86_shldl_cl=no)
+])
+if test "$gmp_cv_asm_x86_shldl_cl" = "yes"; then
+  GMP_DEFINE(WANT_SHLDL_CL,1)
+else
+  GMP_DEFINE(WANT_SHLDL_CL,0)
+fi
+])
+
+
+dnl  GMP_ASM_X86_SSE2([ACTION-IF-YES][,ACTION-IF-NO])
+dnl  ------------------------------------------------
+dnl  Determine whether the assembler supports SSE2 instructions.
+dnl
+dnl  This macro is wanted before GMP_ASM_TEXT, so ".text" is hard coded
+dnl  here.  ".text" is believed to be correct on all x86 systems, certainly
+dnl  it's all GMP_ASM_TEXT gives currently.  Actually ".text" probably isn't
+dnl  needed at all, at least for just checking instruction syntax.
+
+AC_DEFUN([GMP_ASM_X86_SSE2],
+[AC_CACHE_CHECK([if the assembler knows about SSE2 instructions],
+               gmp_cv_asm_x86_sse2,
+[GMP_TRY_ASSEMBLE(
+[      .text
+       paddq   %mm0, %mm1],
+  [gmp_cv_asm_x86_sse2=yes],
+  [gmp_cv_asm_x86_sse2=no])
+])
+case $gmp_cv_asm_x86_sse2 in
+yes)
+  ifelse([$1],,:,[$1])
+  ;;
+*)
+  AC_MSG_WARN([+----------------------------------------------------------])
+  AC_MSG_WARN([| WARNING WARNING WARNING])
+  AC_MSG_WARN([| Host CPU has SSE2 code, but it can't be assembled by])
+  AC_MSG_WARN([|     $CCAS $CFLAGS $CPPFLAGS])
+  AC_MSG_WARN([| Non-SSE2 replacements will be used.])
+  AC_MSG_WARN([| This will be an inferior build.])
+  AC_MSG_WARN([+----------------------------------------------------------])
+  ifelse([$2],,:,[$2])
+  ;;
+esac
+])
+
+
+dnl  GMP_ASM_X86_MCOUNT
+dnl  ------------------
+dnl  Find out how to call mcount for profiling on an x86 system.
+dnl
+dnl  A dummy function is compiled and the ".s" output examined.  The pattern
+dnl  matching might be a bit fragile, but should work at least with gcc on
+dnl  sensible systems.  Certainly it's better than hard coding a table of
+dnl  conventions.
+dnl
+dnl  For non-PIC, any ".data" is taken to mean a counter might be passed.
+dnl  It's assumed a movl will set it up, and the right register is taken
+dnl  from that movl.  Any movl involving %esp is ignored (a frame pointer
+dnl  setup normally).
+dnl
+dnl  For PIC, any ".data" is similarly interpreted, but a GOTOFF identifies
+dnl  the line setting up the right register.
+dnl
+dnl  In both cases a line with "mcount" identifies the call and that line is
+dnl  used literally.
+dnl
+dnl  On some systems (eg. FreeBSD 3.5) gcc emits ".data" but doesn't use it,
+dnl  so it's not an error to have .data but then not find a register.
+dnl
+dnl  Variations in mcount conventions on different x86 systems can be found
+dnl  in gcc config/i386.  mcount can have a "_" prefix or be .mcount or
+dnl  _mcount_ptr, and for PIC it can be called through a GOT entry, or via
+dnl  the PLT.  If a pointer to a counter is required it's passed in %eax or
+dnl  %edx.
+dnl
+dnl  Flags to specify PIC are taken from $lt_prog_compiler_pic set by
+dnl  AC_PROG_LIBTOOL.
+dnl
+dnl  Enhancement: Cache the values determined here. But what's the right way
+dnl  to get two variables (mcount_nonpic_reg and mcount_nonpic_call say) set
+dnl  from one block of commands?
+
+AC_DEFUN([GMP_ASM_X86_MCOUNT],
+[AC_REQUIRE([AC_ENABLE_SHARED])
+AC_REQUIRE([AC_PROG_LIBTOOL])
+AC_MSG_CHECKING([how to call x86 mcount])
+cat >conftest.c <<EOF
+foo(){bar();}
+EOF
+
+if test "$enable_static" = yes; then
+  gmp_asmout_compile="$CC $CFLAGS $CPPFLAGS -S conftest.c 1>&AC_FD_CC"
+  if AC_TRY_EVAL(gmp_asmout_compile); then
+    if grep '\.data' conftest.s >/dev/null; then
+      mcount_nonpic_reg=`sed -n ['/esp/!s/.*movl.*,\(%[a-z]*\).*$/\1/p'] conftest.s`
+    else
+      mcount_nonpic_reg=
+    fi
+    mcount_nonpic_call=`grep 'call.*mcount' conftest.s`
+    if test -z "$mcount_nonpic_call"; then
+      AC_MSG_ERROR([Cannot find mcount call for non-PIC])
+    fi
+  else
+    AC_MSG_ERROR([Cannot compile test program for non-PIC])
+  fi
+fi
+
+if test "$enable_shared" = yes; then
+  gmp_asmout_compile="$CC $CFLAGS $CPPFLAGS $lt_prog_compiler_pic -S conftest.c 1>&AC_FD_CC"
+  if AC_TRY_EVAL(gmp_asmout_compile); then
+    if grep '\.data' conftest.s >/dev/null; then
+      case $lt_prog_compiler_pic in
+        *-DDLL_EXPORT*)
+          # Windows DLLs have non-PIC style mcount
+          mcount_pic_reg=`sed -n ['/esp/!s/.*movl.*,\(%[a-z]*\).*$/\1/p'] conftest.s`
+          ;;
+        *)
+          mcount_pic_reg=`sed -n ['s/.*GOTOFF.*,\(%[a-z]*\).*$/\1/p'] conftest.s`
+          ;;
+      esac
+    else
+      mcount_pic_reg=
+    fi
+    mcount_pic_call=`grep 'call.*mcount' conftest.s`
+    if test -z "$mcount_pic_call"; then
+      AC_MSG_ERROR([Cannot find mcount call for PIC])
+    fi
+  else
+    AC_MSG_ERROR([Cannot compile test program for PIC])
+  fi
+fi
+
+GMP_DEFINE_RAW(["define(<MCOUNT_NONPIC_REG>, <\`$mcount_nonpic_reg'>)"])
+GMP_DEFINE_RAW(["define(<MCOUNT_NONPIC_CALL>,<\`$mcount_nonpic_call'>)"])
+GMP_DEFINE_RAW(["define(<MCOUNT_PIC_REG>,    <\`$mcount_pic_reg'>)"])
+GMP_DEFINE_RAW(["define(<MCOUNT_PIC_CALL>,   <\`$mcount_pic_call'>)"])
+
+rm -f conftest.*
+AC_MSG_RESULT([determined])
+])
+
+
+dnl  GMP_ASM_IA64_ALIGN_OK
+dnl  ---------------------
+dnl  Determine whether .align correctly pads with nop instructions in a text
+dnl  segment.
+dnl
+dnl  gas 2.14 and earlier byte swaps its padding bundle on big endian
+dnl  systems, which is incorrect (endianness only changes data).  What
+dnl  should be "nop.m / nop.f / nop.i" comes out as "break" instructions.
+dnl
+dnl  The test here detects the bad case, and assumes anything else is ok
+dnl  (there are many sensible nop bundles, so it'd be impractical to try to
+dnl  match everything good).
+
+AC_DEFUN([GMP_ASM_IA64_ALIGN_OK],
+[AC_CACHE_CHECK([whether assembler .align padding is good],
+               gmp_cv_asm_ia64_align_ok,
+[cat >conftest.awk <<\EOF
+[BEGIN {
+  want[0]  = "011"
+  want[1]  = "160"
+  want[2]  = "074"
+  want[3]  = "040"
+  want[4]  = "000"
+  want[5]  = "040"
+  want[6]  = "020"
+  want[7]  = "221"
+  want[8]  = "114"
+  want[9]  = "000"
+  want[10] = "100"
+  want[11] = "200"
+  want[12] = "122"
+  want[13] = "261"
+  want[14] = "000"
+  want[15] = "200"
+
+  want[16] = "000"
+  want[17] = "004"
+  want[18] = "000"
+  want[19] = "000"
+  want[20] = "000"
+  want[21] = "000"
+  want[22] = "002"
+  want[23] = "000"
+  want[24] = "000"
+  want[25] = "000"
+  want[26] = "000"
+  want[27] = "001"
+  want[28] = "000"
+  want[29] = "000"
+  want[30] = "000"
+  want[31] = "014"
+
+  want[32] = "011"
+  want[33] = "270"
+  want[34] = "140"
+  want[35] = "062"
+  want[36] = "000"
+  want[37] = "040"
+  want[38] = "240"
+  want[39] = "331"
+  want[40] = "160"
+  want[41] = "000"
+  want[42] = "100"
+  want[43] = "240"
+  want[44] = "343"
+  want[45] = "371"
+  want[46] = "000"
+  want[47] = "200"
+
+  result = "yes"
+}
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 47; i++)
+        got[i] = got[i+1];
+      got[47] = $f;
+
+      found = 1
+      for (i = 0; i < 48; i++)
+        if (got[i] != want[i])
+          {
+            found = 0
+            break
+          }
+      if (found)
+        {
+          result = "no"
+          exit
+        }
+    }
+}
+END {
+  print result
+}
+]EOF
+GMP_TRY_ASSEMBLE(
+[      .text
+       .align  32
+{ .mmi;        add     r14 = r15, r16
+       add     r17 = r18, r19
+       add     r20 = r21, r22 ;; }
+       .align  32
+{ .mmi;        add     r23 = r24, r25
+       add     r26 = r27, r28
+       add     r29 = r30, r31 ;; }
+],
+  [gmp_cv_asm_ia64_align_ok=`od -b conftest.$OBJEXT | $AWK -f conftest.awk`],
+  [AC_MSG_WARN([oops, cannot compile test program])
+   gmp_cv_asm_ia64_align_ok=yes])
+])
+GMP_DEFINE_RAW(["define(<IA64_ALIGN_OK>, <\`$gmp_cv_asm_ia64_align_ok'>)"])
+])
+
+
+
+
+dnl  GMP_ASM_M68K_INSTRUCTION
+dnl  ------------------------
+dnl  Not sure if ".l" and "%" are independent settings, but it doesn't hurt
+dnl  to try all four possibilities.  Note that the % ones must be first, so
+dnl  "d0" won't be interpreted as a label.
+dnl
+dnl  gas 1.92.3 on NetBSD 1.4 needs to be tested with a two operand
+dnl  instruction.  It takes registers without "%", but a single operand
+dnl  "clrl %d0" only gives a warning, not an error.
+
+AC_DEFUN([GMP_ASM_M68K_INSTRUCTION],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([assembler instruction and register style],
+               gmp_cv_asm_m68k_instruction,
+[for i in "addl %d0,%d1" "add.l %d0,%d1" "addl d0,d1" "add.l d0,d1"; do
+  GMP_TRY_ASSEMBLE(
+    [  $gmp_cv_asm_text
+       $i],
+    [gmp_cv_asm_m68k_instruction=$i
+    rm -f conftest*
+    break])
+done
+if test -z "$gmp_cv_asm_m68k_instruction"; then
+  AC_MSG_ERROR([cannot determine assembler instruction and register style])
+fi
+])
+case $gmp_cv_asm_m68k_instruction in
+"addl d0,d1")    want_dot_size=no;  want_register_percent=no  ;;
+"addl %d0,%d1")  want_dot_size=no;  want_register_percent=yes ;;
+"add.l d0,d1")   want_dot_size=yes; want_register_percent=no  ;;
+"add.l %d0,%d1") want_dot_size=yes; want_register_percent=yes ;;
+*) AC_MSG_ERROR([oops, unrecognised instruction and register style]) ;;
+esac
+GMP_DEFINE_RAW(["define(<WANT_REGISTER_PERCENT>, <\`$want_register_percent'>)"])
+GMP_DEFINE_RAW(["define(<WANT_DOT_SIZE>, <\`$want_dot_size'>)"])
+])
+
+
+dnl  GMP_ASM_M68K_ADDRESSING
+dnl  -----------------------
+
+AC_DEFUN([GMP_ASM_M68K_ADDRESSING],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_REQUIRE([GMP_ASM_M68K_INSTRUCTION])
+AC_CACHE_CHECK([assembler addressing style],
+               gmp_cv_asm_m68k_addressing,
+[case $gmp_cv_asm_m68k_instruction in
+addl*)  movel=movel ;;
+add.l*) movel=move.l ;;
+*) AC_MSG_ERROR([oops, unrecognised gmp_cv_asm_m68k_instruction]) ;;
+esac
+case $gmp_cv_asm_m68k_instruction in
+*"%d0,%d1") dreg=%d0; areg=%a0 ;;
+*"d0,d1")   dreg=d0;  areg=a0  ;;
+*) AC_MSG_ERROR([oops, unrecognised gmp_cv_asm_m68k_instruction]) ;;
+esac
+GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       $movel  $dreg, $areg@-],
+  [gmp_cv_asm_m68k_addressing=mit],
+[GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       $movel  $dreg, -($areg)],
+  [gmp_cv_asm_m68k_addressing=motorola],
+[AC_MSG_ERROR([cannot determine assembler addressing style])])])
+])
+GMP_DEFINE_RAW(["define(<WANT_ADDRESSING>, <\`$gmp_cv_asm_m68k_addressing'>)"])
+])
+
+
+dnl  GMP_ASM_M68K_BRANCHES
+dnl  ---------------------
+dnl  "bra" is the standard branch instruction.  "jra" or "jbra" are
+dnl  preferred where available, since on gas for instance they give a
+dnl  displacement only as big as it needs to be, whereas "bra" is always
+dnl  16-bits.  This applies to the conditional branches "bcc" etc too.
+dnl  However "dbcc" etc on gas are already only as big as they need to be.
+
+AC_DEFUN([GMP_ASM_M68K_BRANCHES],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([assembler shortest branches],
+               gmp_cv_asm_m68k_branches,
+[for i in jra jbra bra; do
+  GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+foo$gmp_cv_asm_label_suffix
+       $i      foo],
+  [gmp_cv_asm_m68k_branches=$i
+  rm -f conftest*
+  break])
+done
+if test -z "$gmp_cv_asm_m68k_branches"; then
+  AC_MSG_ERROR([cannot determine assembler branching style])
+fi
+])
+GMP_DEFINE_RAW(["define(<WANT_BRANCHES>, <\`$gmp_cv_asm_m68k_branches'>)"])
+])
+
+
+dnl  GMP_ASM_POWERPC_PIC_ALWAYS
+dnl  --------------------------
+dnl  Determine whether PIC is the default compiler output.
+dnl
+dnl  SVR4 style "foo@ha" addressing is interpreted as non-PIC, and anything
+dnl  else is assumed to require PIC always (Darwin or AIX).  SVR4 is the
+dnl  only non-PIC addressing syntax the asm files have at the moment anyway.
+dnl
+dnl  Libtool does this by taking "*-*-aix* | *-*-darwin* | *-*-rhapsody*" to
+dnl  mean PIC always, but it seems more reliable to grep the compiler
+dnl  output.
+dnl
+dnl The next paragraph is untrue for Tiger.  Was it ever true?  For tiger,
+dnl "cc -fast" makes non-PIC the default (and the binaries do run).
+dnl  On Darwin "cc -static" is non-PIC with syntax "ha16(_foo)", but that's
+dnl  apparently only for use in the kernel, which we're not attempting to
+dnl  target at the moment, so don't look for that.
+
+AC_DEFUN([GMP_ASM_POWERPC_PIC_ALWAYS],
+[AC_REQUIRE([AC_PROG_CC])
+AC_CACHE_CHECK([whether compiler output is PIC by default],
+               gmp_cv_asm_powerpc_pic,
+[gmp_cv_asm_powerpc_pic=yes
+cat >conftest.c <<EOF
+int foo;
+int *bar() { return &foo; }
+EOF
+echo "Test program:" >&AC_FD_CC
+cat conftest.c >&AC_FD_CC
+gmp_compile="$CC $CFLAGS $CPPFLAGS -S conftest.c >&AC_FD_CC"
+if AC_TRY_EVAL(gmp_compile); then
+  echo "Compiler output:" >&AC_FD_CC
+  cat conftest.s >&AC_FD_CC
+  if grep 'foo@ha' conftest.s >/dev/null 2>&AC_FD_CC; then
+    gmp_cv_asm_powerpc_pic=no
+  fi
+  if grep 'ha16(_foo)' conftest.s >/dev/null 2>&AC_FD_CC; then
+    gmp_cv_asm_powerpc_pic=no
+  fi
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(["define(<PIC_ALWAYS>,<$gmp_cv_asm_powerpc_pic>)"])
+])
+
+
+dnl  GMP_ASM_POWERPC_R_REGISTERS
+dnl  ---------------------------
+dnl  Determine whether the assembler takes powerpc registers with an "r" as
+dnl  in "r6", or as plain "6".  The latter is standard, but NeXT, Rhapsody,
+dnl  and MacOS-X require the "r" forms.
+dnl
+dnl  See also mpn/powerpc32/powerpc-defs.m4 which uses the result of this
+dnl  test.
+
+AC_DEFUN([GMP_ASM_POWERPC_R_REGISTERS],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler needs r on registers],
+               gmp_cv_asm_powerpc_r_registers,
+[GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       mtctr   6],
+[gmp_cv_asm_powerpc_r_registers=no],
+[GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       mtctr   r6],
+[gmp_cv_asm_powerpc_r_registers=yes],
+[AC_MSG_ERROR([neither "mtctr 6" nor "mtctr r6" works])])])])
+
+GMP_DEFINE_RAW(["define(<WANT_R_REGISTERS>,<$gmp_cv_asm_powerpc_r_registers>)"])
+])
+
+
+dnl  GMP_ASM_SPARC_REGISTER
+dnl  ----------------------
+dnl  Determine whether the assembler accepts the ".register" directive.
+dnl  Old versions of solaris "as" don't.
+dnl
+dnl  See also mpn/sparc32/sparc-defs.m4 which uses the result of this test.
+
+AC_DEFUN([GMP_ASM_SPARC_REGISTER],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler accepts ".register"],
+               gmp_cv_asm_sparc_register,
+[GMP_TRY_ASSEMBLE(
+[      $gmp_cv_asm_text
+       .register       %g2,#scratch
+],
+[gmp_cv_asm_sparc_register=yes],
+[gmp_cv_asm_sparc_register=no])])
+
+GMP_DEFINE_RAW(["define(<HAVE_REGISTER>,<$gmp_cv_asm_sparc_register>)"])
+])
+
+
+dnl  GMP_C_ATTRIBUTE_CONST
+dnl  ---------------------
+
+AC_DEFUN([GMP_C_ATTRIBUTE_CONST],
+[AC_CACHE_CHECK([whether gcc __attribute__ ((const)) works],
+                gmp_cv_c_attribute_const,
+[AC_TRY_COMPILE([int foo (int x) __attribute__ ((const));], ,
+  gmp_cv_c_attribute_const=yes, gmp_cv_c_attribute_const=no)
+])
+if test $gmp_cv_c_attribute_const = yes; then
+  AC_DEFINE(HAVE_ATTRIBUTE_CONST, 1,
+  [Define to 1 if the compiler accepts gcc style __attribute__ ((const))])
+fi
+])
+
+
+dnl  GMP_C_ATTRIBUTE_MALLOC
+dnl  ----------------------
+dnl  gcc 2.95.x accepts __attribute__ ((malloc)) but with a warning that
+dnl  it's ignored.  Pretend it doesn't exist in this case, to avoid that
+dnl  warning.
+
+AC_DEFUN([GMP_C_ATTRIBUTE_MALLOC],
+[AC_CACHE_CHECK([whether gcc __attribute__ ((malloc)) works],
+                gmp_cv_c_attribute_malloc,
+[cat >conftest.c <<EOF
+void *foo (int x) __attribute__ ((malloc));
+EOF
+gmp_compile="$CC $CFLAGS $CPPFLAGS -c conftest.c >conftest.out 2>&1"
+if AC_TRY_EVAL(gmp_compile); then
+  if grep "attribute directive ignored" conftest.out >/dev/null; then
+    gmp_cv_c_attribute_malloc=no
+  else
+    gmp_cv_c_attribute_malloc=yes
+  fi
+else
+  gmp_cv_c_attribute_malloc=no
+fi
+cat conftest.out >&AC_FD_CC
+rm -f conftest*
+])
+if test $gmp_cv_c_attribute_malloc = yes; then
+  AC_DEFINE(HAVE_ATTRIBUTE_MALLOC, 1,
+  [Define to 1 if the compiler accepts gcc style __attribute__ ((malloc))])
+fi
+])
+
+
+dnl  GMP_C_ATTRIBUTE_MODE
+dnl  --------------------
+dnl  Introduced in gcc 2.2, but perhaps not in all Apple derived versions.
+
+AC_DEFUN([GMP_C_ATTRIBUTE_MODE],
+[AC_CACHE_CHECK([whether gcc __attribute__ ((mode (XX))) works],
+                gmp_cv_c_attribute_mode,
+[AC_TRY_COMPILE([typedef int SItype __attribute__ ((mode (SI)));], ,
+  gmp_cv_c_attribute_mode=yes, gmp_cv_c_attribute_mode=no)
+])
+if test $gmp_cv_c_attribute_mode = yes; then
+  AC_DEFINE(HAVE_ATTRIBUTE_MODE, 1,
+  [Define to 1 if the compiler accepts gcc style __attribute__ ((mode (XX)))])
+fi
+])
+
+
+dnl  GMP_C_ATTRIBUTE_NORETURN
+dnl  ------------------------
+
+AC_DEFUN([GMP_C_ATTRIBUTE_NORETURN],
+[AC_CACHE_CHECK([whether gcc __attribute__ ((noreturn)) works],
+                gmp_cv_c_attribute_noreturn,
+[AC_TRY_COMPILE([void foo (int x) __attribute__ ((noreturn));], ,
+  gmp_cv_c_attribute_noreturn=yes, gmp_cv_c_attribute_noreturn=no)
+])
+if test $gmp_cv_c_attribute_noreturn = yes; then
+  AC_DEFINE(HAVE_ATTRIBUTE_NORETURN, 1,
+  [Define to 1 if the compiler accepts gcc style __attribute__ ((noreturn))])
+fi
+])
+
+
+dnl  GMP_C_DOUBLE_FORMAT
+dnl  -------------------
+dnl  Determine the floating point format.
+dnl
+dnl  The object file is grepped, in order to work when cross compiling.  A
+dnl  start and end sequence is included to avoid false matches, and allowance
+dnl  is made for the desired data crossing an "od -b" line boundary.  The test
+dnl  number is a small integer so it should appear exactly, no rounding or
+dnl  truncation etc.
+dnl
+dnl  "od -b", incidentally, is supported even by Unix V7, and the awk script
+dnl  used doesn't have functions or anything, so even an "old" awk should
+dnl  suffice.
+dnl
+dnl  The C code here declares the variable foo as extern; without that, some
+dnl  C++ compilers will not put foo in the object file.
+
+AC_DEFUN([GMP_C_DOUBLE_FORMAT],
+[AC_REQUIRE([AC_PROG_CC])
+AC_REQUIRE([AC_PROG_AWK])
+AC_CACHE_CHECK([format of `double' floating point],
+                gmp_cv_c_double_format,
+[gmp_cv_c_double_format=unknown
+cat >conftest.c <<\EOF
+[struct foo {
+  char    before[8];
+  double  x;
+  char    after[8];
+};
+extern struct foo foo;
+struct foo foo = {
+  { '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
+  -123456789.0,
+  { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' },
+};]
+EOF
+gmp_compile="$CC $CFLAGS $CPPFLAGS -c conftest.c >&AC_FD_CC 2>&1"
+if AC_TRY_EVAL(gmp_compile); then
+cat >conftest.awk <<\EOF
+[
+BEGIN {
+  found = 0
+}
+
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 23; i++)
+        got[i] = got[i+1];
+      got[23] = $f;
+
+      # match the special begin and end sequences
+      if (got[0] != "001") continue
+      if (got[1] != "043") continue
+      if (got[2] != "105") continue
+      if (got[3] != "147") continue
+      if (got[4] != "211") continue
+      if (got[5] != "253") continue
+      if (got[6] != "315") continue
+      if (got[7] != "357") continue
+      if (got[16] != "376") continue
+      if (got[17] != "334") continue
+      if (got[18] != "272") continue
+      if (got[19] != "230") continue
+      if (got[20] != "166") continue
+      if (got[21] != "124") continue
+      if (got[22] != "062") continue
+      if (got[23] != "020") continue
+
+      saw = " (" got[8] " " got[9] " " got[10] " " got[11] " " got[12] " " got[13] " " got[14] " " got[15] ")"
+
+      if (got[8]  == "000" &&  \
+          got[9]  == "000" &&  \
+          got[10] == "000" &&  \
+          got[11] == "124" &&  \
+          got[12] == "064" &&  \
+          got[13] == "157" &&  \
+          got[14] == "235" &&  \
+          got[15] == "301")
+        {
+          print "IEEE little endian"
+          found = 1
+          exit
+        }
+
+      # Little endian with the two 4-byte halves swapped, as used by ARM
+      # when the chip is in little endian mode.
+      #
+      if (got[8]  == "064" &&  \
+          got[9]  == "157" &&  \
+          got[10] == "235" &&  \
+          got[11] == "301" &&  \
+          got[12] == "000" &&  \
+          got[13] == "000" &&  \
+          got[14] == "000" &&  \
+          got[15] == "124")
+        {
+          print "IEEE little endian, swapped halves"
+          found = 1
+          exit
+        }
+
+      # gcc 2.95.4 on one GNU/Linux ARM system was seen generating 000 in
+      # the last byte, whereas 124 is correct.  Not sure where the bug
+      # actually lies, but a running program didn't seem to get a full
+      # mantissa worth of working bits.
+      #
+      # We match this case explicitly so we can give a nice result message,
+      # but we deliberately exclude it from the normal IEEE double setups
+      # since it's too broken.
+      #
+      if (got[8]  == "064" &&  \
+          got[9]  == "157" &&  \
+          got[10] == "235" &&  \
+          got[11] == "301" &&  \
+          got[12] == "000" &&  \
+          got[13] == "000" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "bad ARM software floats"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "301" &&  \
+          got[9]  == "235" &&  \
+          got[10] == "157" &&  \
+          got[11] == "064" &&  \
+          got[12] == "124" &&  \
+          got[13] == "000" &&  \
+          got[14] == "000" &&  \
+         got[15] == "000")
+        {
+          print "IEEE big endian"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "353" &&  \
+          got[9]  == "315" &&  \
+          got[10] == "242" &&  \
+          got[11] == "171" &&  \
+          got[12] == "000" &&  \
+          got[13] == "240" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "VAX D"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "275" &&  \
+          got[9]  == "301" &&  \
+          got[10] == "064" &&  \
+          got[11] == "157" &&  \
+          got[12] == "000" &&  \
+          got[13] == "124" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "VAX G"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "300" &&  \
+          got[9]  == "033" &&  \
+          got[10] == "353" &&  \
+          got[11] == "171" &&  \
+          got[12] == "242" &&  \
+          got[13] == "240" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "Cray CFP"
+          found = 1
+          exit
+        }
+    }
+}
+
+END {
+  if (! found)
+    print "unknown", saw
+}
+]
+EOF
+  gmp_cv_c_double_format=`od -b conftest.$OBJEXT | $AWK -f conftest.awk`
+  case $gmp_cv_c_double_format in
+  unknown*)
+    echo "cannot match anything, conftest.$OBJEXT contains" >&AC_FD_CC
+    od -b conftest.$OBJEXT >&AC_FD_CC
+    ;;
+  esac
+else
+  AC_MSG_WARN([oops, cannot compile test program])
+fi
+rm -f conftest*
+])
+
+AH_VERBATIM([HAVE_DOUBLE],
+[/* Define one of the following to 1 for the format of a `double'.
+   If your format is not among these choices, or you don't know what it is,
+   then leave all undefined.
+   IEEE_LITTLE_SWAPPED means little endian, but with the two 4-byte halves
+   swapped, as used by ARM CPUs in little endian mode.  */
+#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
+#undef HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
+#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
+#undef HAVE_DOUBLE_VAX_D
+#undef HAVE_DOUBLE_VAX_G
+#undef HAVE_DOUBLE_CRAY_CFP])
+
+case $gmp_cv_c_double_format in
+  "IEEE big endian")
+    AC_DEFINE(HAVE_DOUBLE_IEEE_BIG_ENDIAN, 1)
+    GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_DOUBLE_IEEE_BIG_ENDIAN')", POST)
+    ;;
+  "IEEE little endian")
+    AC_DEFINE(HAVE_DOUBLE_IEEE_LITTLE_ENDIAN, 1)
+    GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN')", POST)
+    ;;
+  "IEEE little endian, swapped halves")
+    AC_DEFINE(HAVE_DOUBLE_IEEE_LITTLE_SWAPPED, 1) ;;
+  "VAX D")
+    AC_DEFINE(HAVE_DOUBLE_VAX_D, 1) ;;
+  "VAX G")
+    AC_DEFINE(HAVE_DOUBLE_VAX_G, 1) ;;
+  "Cray CFP")
+    AC_DEFINE(HAVE_DOUBLE_CRAY_CFP, 1) ;;
+  "bad ARM software floats")
+    ;;
+  unknown*)
+    AC_MSG_WARN([Could not determine float format.])
+    AC_MSG_WARN([Conversions to and from "double" may be slow.])
+    ;;
+  *)
+    AC_MSG_WARN([oops, unrecognised float format: $gmp_cv_c_double_format])
+    ;;
+esac
+])
+
+
+dnl  GMP_C_STDARG
+dnl  ------------
+dnl  Test whether to use <stdarg.h> or <varargs.h>.
+dnl
+dnl  Notice the AC_DEFINE here is HAVE_STDARG to avoid clashing with
+dnl  HAVE_STDARG_H which could arise from AC_CHECK_HEADERS.
+dnl
+dnl  This test might be slight overkill, after all there's really only going
+dnl  to be ANSI or K&R and the two can be differentiated by AC_PROG_CC_STDC
+dnl  or very likely by the setups for _PROTO in gmp.h.  On the other hand
+dnl  this test is nice and direct, being what we're going to actually use.
+
+AC_DEFUN([GMP_C_STDARG],
+[AC_CACHE_CHECK([whether <stdarg.h> exists and works],
+                gmp_cv_c_stdarg,
+[AC_TRY_COMPILE(
+[#include <stdarg.h>
+int foo (int x, ...)
+{
+  va_list  ap;
+  int      y;
+  va_start (ap, x);
+  y = va_arg (ap, int);
+  va_end (ap);
+  return y;
+}],,
+gmp_cv_c_stdarg=yes, gmp_cv_c_stdarg=no)
+])
+if test $gmp_cv_c_stdarg = yes; then
+  AC_DEFINE(HAVE_STDARG, 1, [Define to 1 if <stdarg.h> exists and works])
+fi
+])
+
+
+dnl  GMP_FUNC_ALLOCA
+dnl  ---------------
+dnl  Determine whether "alloca" is available.  This is AC_FUNC_ALLOCA from
+dnl  autoconf, but changed so it doesn't use alloca.c if alloca() isn't
+dnl  available, and also to use gmp-impl.h for the conditionals detecting
+dnl  compiler builtin alloca's.
+
+AC_DEFUN([GMP_FUNC_ALLOCA],
+[AC_REQUIRE([GMP_HEADER_ALLOCA])
+AC_CACHE_CHECK([for alloca (via gmp-impl.h)],
+               gmp_cv_func_alloca,
+[AC_TRY_LINK(
+GMP_INCLUDE_GMP_H
+[#include "$srcdir/gmp-impl.h"
+],
+  [char *p = (char *) alloca (1);],
+  gmp_cv_func_alloca=yes,
+  gmp_cv_func_alloca=no)])
+if test $gmp_cv_func_alloca = yes; then
+  AC_DEFINE(HAVE_ALLOCA, 1, [Define to 1 if alloca() works (via gmp-impl.h).])
+fi
+])
+
+AC_DEFUN([GMP_HEADER_ALLOCA],
+[# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works
+# for constant arguments.  Useless!
+AC_CACHE_CHECK([for working alloca.h],
+               gmp_cv_header_alloca,
+[AC_TRY_LINK([#include <alloca.h>],
+  [char *p = (char *) alloca (2 * sizeof (int));],
+  gmp_cv_header_alloca=yes,
+  gmp_cv_header_alloca=no)])
+if test $gmp_cv_header_alloca = yes; then
+  AC_DEFINE(HAVE_ALLOCA_H, 1,
+  [Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).])
+fi
+])
+
+
+dnl  GMP_OPTION_ALLOCA
+dnl  -----------------
+dnl  Decide what to do about --enable-alloca from the user.
+dnl  This is a macro so it can require GMP_FUNC_ALLOCA.
+
+AC_DEFUN([GMP_OPTION_ALLOCA],
+[AC_REQUIRE([GMP_FUNC_ALLOCA])
+AC_CACHE_CHECK([how to allocate temporary memory],
+               gmp_cv_option_alloca,
+[case $enable_alloca in
+  yes)
+    gmp_cv_option_alloca=alloca
+    ;;
+  no)
+    gmp_cv_option_alloca=malloc-reentrant
+    ;;
+  reentrant | notreentrant)
+    case $gmp_cv_func_alloca in
+    yes)  gmp_cv_option_alloca=alloca ;;
+    *)    gmp_cv_option_alloca=malloc-$enable_alloca ;;
+    esac
+    ;;
+  *)
+    gmp_cv_option_alloca=$enable_alloca
+    ;;
+esac
+])
+
+AH_VERBATIM([WANT_TMP],
+[/* Define one of these to 1 for the desired temporary memory allocation
+   method, per --enable-alloca. */
+#undef WANT_TMP_ALLOCA
+#undef WANT_TMP_REENTRANT
+#undef WANT_TMP_NOTREENTRANT
+#undef WANT_TMP_DEBUG])
+
+case $gmp_cv_option_alloca in
+  alloca)
+    if test $gmp_cv_func_alloca = no; then
+      AC_MSG_ERROR([--enable-alloca=alloca specified, but alloca not available])
+    fi
+    AC_DEFINE(WANT_TMP_ALLOCA)
+    TAL_OBJECT=tal-reent$U.lo
+    ;;
+  malloc-reentrant)
+    AC_DEFINE(WANT_TMP_REENTRANT)
+    TAL_OBJECT=tal-reent$U.lo
+    ;;
+  malloc-notreentrant)
+    AC_DEFINE(WANT_TMP_NOTREENTRANT)
+    TAL_OBJECT=tal-notreent$U.lo
+    ;;
+  debug)
+    AC_DEFINE(WANT_TMP_DEBUG)
+    TAL_OBJECT=tal-debug$U.lo
+    ;;
+  *)
+    # checks at the start of configure.in should protect us
+    AC_MSG_ERROR([unrecognised --enable-alloca=$gmp_cv_option_alloca])
+    ;;
+esac
+AC_SUBST(TAL_OBJECT)
+])
+
+
+dnl  GMP_FUNC_SSCANF_WRITABLE_INPUT
+dnl  ------------------------------
+dnl  Determine whether sscanf requires a writable input string.
+dnl
+dnl  It might be nicer to run a program to determine this when doing a
+dnl  native build, but the systems afflicted are few and far between these
+dnl  days, so it seems good enough just to list them.
+
+AC_DEFUN([GMP_FUNC_SSCANF_WRITABLE_INPUT],
+[AC_CACHE_CHECK([whether sscanf needs writable input],
+                 gmp_cv_func_sscanf_writable_input,
+[case $host in
+  *-*-hpux9 | *-*-hpux9.*)
+     gmp_cv_func_sscanf_writable_input=yes ;;
+  *) gmp_cv_func_sscanf_writable_input=no  ;;
+esac
+])
+case $gmp_cv_func_sscanf_writable_input in
+  yes) AC_DEFINE(SSCANF_WRITABLE_INPUT, 1,
+                 [Define to 1 if sscanf requires writable inputs]) ;;
+  no)  ;;
+  *)   AC_MSG_ERROR([unrecognised \$gmp_cv_func_sscanf_writable_input]) ;;
+esac
+])
+
+
+dnl  GMP_FUNC_VSNPRINTF
+dnl  ------------------
+dnl  Check whether vsnprintf exists, and works properly.
+dnl
+dnl  Systems without vsnprintf include mingw32, OSF 4.
+dnl
+dnl  Sparc Solaris 2.7 in 64-bit mode doesn't always truncate, making
+dnl  vsnprintf like vsprintf, and hence completely useless.  On one system a
+dnl  literal string is enough to provoke the problem, on another a "%n" was
+dnl  needed.  There seems to be something weird going on with the optimizer
+dnl  or something, since on the first system adding a second check with
+dnl  "%n", or even just an initialized local variable, makes it work.  In
+dnl  any case, without bothering to get to the bottom of this, the two
+dnl  program runs in the code below end up successfully detecting the
+dnl  problem.
+dnl
+dnl  glibc 2.0.x returns either -1 or bufsize-1 for an overflow (both seen,
+dnl  not sure which 2.0.x does which), but still puts the correct null
+dnl  terminated result into the buffer.
+
+AC_DEFUN([GMP_FUNC_VSNPRINTF],
+[AC_REQUIRE([GMP_C_STDARG])
+AC_CHECK_FUNC(vsnprintf,
+              [gmp_vsnprintf_exists=yes],
+              [gmp_vsnprintf_exists=no])
+if test "$gmp_vsnprintf_exists" = no; then
+  gmp_cv_func_vsnprintf=no
+else
+  AC_CACHE_CHECK([whether vsnprintf works],
+                 gmp_cv_func_vsnprintf,
+  [gmp_cv_func_vsnprintf=yes
+   for i in 'return check ("hello world");' 'int n; return check ("%nhello world", &n);'; do
+     AC_TRY_RUN([
+#include <string.h>  /* for strcmp */
+#include <stdio.h>   /* for vsnprintf */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+int
+#if HAVE_STDARG
+check (const char *fmt, ...)
+#else
+check (va_alist)
+     va_dcl
+#endif
+{
+  static char  buf[128];
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  char *fmt;
+  va_start (ap);
+  fmt = va_arg (ap, char *);
+#endif
+
+  ret = vsnprintf (buf, 4, fmt, ap);
+
+  if (strcmp (buf, "hel") != 0)
+    return 1;
+
+  /* allowed return values */
+  if (ret != -1 && ret != 3 && ret != 11)
+    return 2;
+
+  return 0;
+}
+
+int
+main ()
+{
+$i
+}
+],
+      [:],
+      [gmp_cv_func_vsnprintf=no; break],
+      [gmp_cv_func_vsnprintf=probably; break])
+  done
+  ])
+  if test "$gmp_cv_func_vsnprintf" = probably; then
+    AC_MSG_WARN([cannot check for properly working vsnprintf when cross compiling, will assume it's ok])
+  fi
+  if test "$gmp_cv_func_vsnprintf" != no; then
+    AC_DEFINE(HAVE_VSNPRINTF,1,
+    [Define to 1 if you have the `vsnprintf' function and it works properly.])
+  fi
+fi
+])
+
+
+dnl  GMP_H_ANSI
+dnl  ----------
+dnl  Check whether gmp.h recognises the compiler as ANSI capable.
+
+AC_DEFUN([GMP_H_ANSI],
+[AC_REQUIRE([AC_PROG_CC_STDC])
+case $ac_cv_prog_cc_stdc in
+  no)
+    ;;
+  *)
+    AC_TRY_COMPILE(
+GMP_INCLUDE_GMP_H
+[#if ! __GMP_HAVE_PROTOTYPES
+die die die
+#endif
+],,,
+    [AC_MSG_WARN([gmp.h doesnt recognise compiler as ANSI, prototypes and "const" will be unavailable])])
+    ;;
+esac
+])
+
+
+dnl  GMP_H_EXTERN_INLINE
+dnl  -------------------
+dnl  If the compiler has an "inline" of some sort, check whether the
+dnl  #ifdef's in gmp.h recognise it.
+
+AC_DEFUN([GMP_H_EXTERN_INLINE],
+[AC_REQUIRE([AC_C_INLINE])
+case $ac_cv_c_inline in
+no) ;;
+*)
+  AC_TRY_COMPILE(
+[#define __GMP_WITHIN_CONFIGURE_INLINE 1
+]GMP_INCLUDE_GMP_H[
+#ifndef __GMP_EXTERN_INLINE
+die die die
+#endif
+],,,
+  [case $ac_cv_c_inline in
+  yes) tmp_inline=inline ;;
+  *)   tmp_inline=$ac_cv_c_inline ;;
+  esac
+  AC_MSG_WARN([gmp.h doesnt recognise compiler "$tmp_inline", inlines will be unavailable])])
+  ;;
+esac
+])
+
+
+dnl  GMP_H_HAVE_FILE
+dnl  ---------------
+dnl  Check whether the #ifdef's in gmp.h recognise when stdio.h has been
+dnl  included to get FILE.
+
+AC_DEFUN([GMP_H_HAVE_FILE],
+[AC_TRY_COMPILE(
+[#include <stdio.h>]
+GMP_INCLUDE_GMP_H
+[#if ! _GMP_H_HAVE_FILE
+die die die
+#endif
+],,,
+  [AC_MSG_WARN([gmp.h doesnt recognise <stdio.h>, FILE prototypes will be unavailable])])
+])
+
+
+dnl  GMP_PROG_CC_FOR_BUILD
+dnl  ---------------------
+dnl  Establish CC_FOR_BUILD, a C compiler for the build system.
+dnl
+dnl  If CC_FOR_BUILD is set then it's expected to work, likewise the old
+dnl  style HOST_CC, otherwise some likely candidates are tried, the same as
+dnl  configfsf.guess.
+
+AC_DEFUN([GMP_PROG_CC_FOR_BUILD],
+[AC_REQUIRE([AC_PROG_CC])
+if test -n "$CC_FOR_BUILD"; then
+  GMP_PROG_CC_FOR_BUILD_WORKS($CC_FOR_BUILD,,
+    [AC_MSG_ERROR([Specified CC_FOR_BUILD doesn't seem to work])])
+elif test -n "$HOST_CC"; then
+  GMP_PROG_CC_FOR_BUILD_WORKS($HOST_CC,
+    [CC_FOR_BUILD=$HOST_CC],
+    [AC_MSG_ERROR([Specified HOST_CC doesn't seem to work])])
+else
+  for i in "$CC" "$CC $CFLAGS $CPPFLAGS" cc gcc c89 c99; do
+    GMP_PROG_CC_FOR_BUILD_WORKS($i,
+      [CC_FOR_BUILD=$i
+       break])
+  done
+  if test -z "$CC_FOR_BUILD"; then
+    AC_MSG_ERROR([Cannot find a build system compiler])
+  fi
+fi
+
+AC_ARG_VAR(CC_FOR_BUILD,[build system C compiler])
+AC_SUBST(CC_FOR_BUILD)
+])
+
+
+dnl  GMP_PROG_CC_FOR_BUILD_WORKS(cc/cflags[,[action-if-good][,action-if-bad]])
+dnl  -------------------------------------------------------------------------
+dnl  See if the given cc/cflags works on the build system.
+dnl
+dnl  It seems easiest to just use the default compiler output, rather than
+dnl  figuring out the .exe or whatever at this stage.
+
+AC_DEFUN([GMP_PROG_CC_FOR_BUILD_WORKS],
+[AC_MSG_CHECKING([build system compiler $1])
+# remove anything that might look like compiler output to our "||" expression
+rm -f conftest* a.out b.out a.exe a_out.exe
+cat >conftest.c <<EOF
+int
+main ()
+{
+  exit(0);
+}
+EOF
+gmp_compile="$1 conftest.c"
+cc_for_build_works=no
+if AC_TRY_EVAL(gmp_compile); then
+  if (./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest) >&AC_FD_CC 2>&1; then
+    cc_for_build_works=yes
+  fi
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+AC_MSG_RESULT($cc_for_build_works)
+if test "$cc_for_build_works" = yes; then
+  ifelse([$2],,:,[$2])
+else
+  ifelse([$3],,:,[$3])
+fi
+])
+
+
+dnl  GMP_PROG_CPP_FOR_BUILD
+dnl  ---------------------
+dnl  Establish CPP_FOR_BUILD, the build system C preprocessor.
+dnl  The choices tried here are the same as AC_PROG_CPP, but with
+dnl  CC_FOR_BUILD.
+
+AC_DEFUN([GMP_PROG_CPP_FOR_BUILD],
+[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])
+AC_MSG_CHECKING([for build system preprocessor])
+if test -z "$CPP_FOR_BUILD"; then
+  AC_CACHE_VAL(gmp_cv_prog_cpp_for_build,
+  [cat >conftest.c <<EOF
+#define FOO BAR
+EOF
+  for i in "$CC_FOR_BUILD -E" "$CC_FOR_BUILD -E -traditional-cpp" "/lib/cpp"; do
+    gmp_compile="$i conftest.c"
+    if AC_TRY_EVAL(gmp_compile) >&AC_FD_CC 2>&1; then
+      gmp_cv_prog_cpp_for_build=$i
+      break
+    fi
+  done
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  if test -z "$gmp_cv_prog_cpp_for_build"; then
+    AC_MSG_ERROR([Cannot find build system C preprocessor.])
+  fi
+  ])
+  CPP_FOR_BUILD=$gmp_cv_prog_cpp_for_build
+fi
+AC_MSG_RESULT([$CPP_FOR_BUILD])
+
+AC_ARG_VAR(CPP_FOR_BUILD,[build system C preprocessor])
+AC_SUBST(CPP_FOR_BUILD)
+])
+
+
+dnl  GMP_PROG_EXEEXT_FOR_BUILD
+dnl  -------------------------
+dnl  Determine EXEEXT_FOR_BUILD, the build system executable suffix.
+dnl
+dnl  The idea is to find what "-o conftest$foo" will make it possible to run
+dnl  the program with ./conftest.  On Unix-like systems this is of course
+dnl  nothing, for DOS it's ".exe", or for a strange RISC OS foreign file
+dnl  system cross compile it can be ",ff8" apparently.  Not sure if the
+dnl  latter actually applies to a build-system executable, maybe it doesn't,
+dnl  but it won't hurt to try.
+
+AC_DEFUN([GMP_PROG_EXEEXT_FOR_BUILD],
+[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])
+AC_CACHE_CHECK([for build system executable suffix],
+               gmp_cv_prog_exeext_for_build,
+[cat >conftest.c <<EOF
+int
+main ()
+{
+  exit (0);
+}
+EOF
+for i in .exe ,ff8 ""; do
+  gmp_compile="$CC_FOR_BUILD conftest.c -o conftest$i"
+  if AC_TRY_EVAL(gmp_compile); then
+    if (./conftest) 2>&AC_FD_CC; then
+      gmp_cv_prog_exeext_for_build=$i
+      break
+    fi
+  fi
+done
+rm -f conftest*
+if test "${gmp_cv_prog_exeext_for_build+set}" != set; then
+  AC_MSG_ERROR([Cannot determine executable suffix])
+fi
+])
+AC_SUBST(EXEEXT_FOR_BUILD,$gmp_cv_prog_exeext_for_build)
+])
+
+
+dnl  GMP_C_FOR_BUILD_ANSI
+dnl  --------------------
+dnl  Determine whether CC_FOR_BUILD is ANSI, and establish U_FOR_BUILD
+dnl  accordingly.
+dnl
+dnl  FIXME: Use AC_PROG_CC sets ac_cv_prog_cc_c89 which could be used instead
+
+AC_DEFUN([GMP_C_FOR_BUILD_ANSI],
+[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])
+AC_CACHE_CHECK([whether build system compiler is ANSI],
+               gmp_cv_c_for_build_ansi,
+[cat >conftest.c <<EOF
+int
+main (int argc, char **argv)
+{
+  exit(0);
+}
+EOF
+gmp_compile="$CC_FOR_BUILD conftest.c"
+if AC_TRY_EVAL(gmp_compile); then
+  gmp_cv_c_for_build_ansi=yes
+else
+  gmp_cv_c_for_build_ansi=no
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+])
+if test "$gmp_cv_c_for_build_ansi" = yes; then
+  U_FOR_BUILD=
+else
+  AC_SUBST(U_FOR_BUILD,_)
+fi
+])
+
+
+dnl  GMP_CHECK_LIBM_FOR_BUILD
+dnl  ------------------------
+dnl  Establish LIBM_FOR_BUILD as -lm, if that seems to work.
+dnl
+dnl  Libtool AC_CHECK_LIBM also uses -lmw on *-ncr-sysv4.3*, if it works.
+dnl  Don't know what that does, lets assume it's not needed just for log().
+
+AC_DEFUN([GMP_CHECK_LIBM_FOR_BUILD],
+[AC_REQUIRE([GMP_PROG_CC_FOR_BUILD])
+AC_CACHE_CHECK([for build system compiler math library],
+               gmp_cv_check_libm_for_build,
+[cat >conftest.c <<EOF
+int
+main ()
+{
+  exit(0);
+}
+double d;
+double
+foo ()
+{
+  return log (d);
+}
+EOF
+gmp_compile="$CC_FOR_BUILD conftest.c -lm"
+if AC_TRY_EVAL(gmp_compile); then
+  gmp_cv_check_libm_for_build=-lm
+else
+  gmp_cv_check_libm_for_build=no
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+])
+case $gmp_cv_check_libm_for_build in
+  yes) AC_SUBST(LIBM_FOR_BUILD,-lm) ;;
+  no)  LIBM_FOR_BUILD= ;;
+  *)   LIBM_FOR_BUILD=$gmp_cv_check_libm_for_build ;;
+esac
+])
diff --git a/aclocal.m4 b/aclocal.m4

new file mode 100644 (file)

index 0000000..aecf1dc
--- /dev/null
+++ b/aclocal.m4
@@ -0,0 +1,9324 @@
+# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+# 2005, 2006, 2007, 2008, 2009  Free Software Foundation, Inc.
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.65],,
+[m4_warning([this file was generated for autoconf 2.65.
+You have another version of autoconf.  It may work, but is not guaranteed to.
+If you have problems, you may need to regenerate the build system entirely.
+To do so, use the procedure documented by the package, typically `autoreconf'.])])
+
+# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
+#
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+m4_define([_LT_COPYING], [dnl
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+#   This file is part of GNU Libtool.
+#
+# GNU Libtool is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
+# obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+])
+
+# serial 57 LT_INIT
+
+
+# LT_PREREQ(VERSION)
+# ------------------
+# Complain and exit if this libtool version is less that VERSION.
+m4_defun([LT_PREREQ],
+[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1,
+       [m4_default([$3],
+                  [m4_fatal([Libtool version $1 or higher is required],
+                            63)])],
+       [$2])])
+
+
+# _LT_CHECK_BUILDDIR
+# ------------------
+# Complain if the absolute build directory name contains unusual characters
+m4_defun([_LT_CHECK_BUILDDIR],
+[case `pwd` in
+  *\ * | *\    *)
+    AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;;
+esac
+])
+
+
+# LT_INIT([OPTIONS])
+# ------------------
+AC_DEFUN([LT_INIT],
+[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT
+AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+AC_BEFORE([$0], [LT_LANG])dnl
+AC_BEFORE([$0], [LT_OUTPUT])dnl
+AC_BEFORE([$0], [LTDL_INIT])dnl
+m4_require([_LT_CHECK_BUILDDIR])dnl
+
+dnl Autoconf doesn't catch unexpanded LT_ macros by default:
+m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl
+m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl
+dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4
+dnl unless we require an AC_DEFUNed macro:
+AC_REQUIRE([LTOPTIONS_VERSION])dnl
+AC_REQUIRE([LTSUGAR_VERSION])dnl
+AC_REQUIRE([LTVERSION_VERSION])dnl
+AC_REQUIRE([LTOBSOLETE_VERSION])dnl
+m4_require([_LT_PROG_LTMAIN])dnl
+
+_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}])
+
+dnl Parse OPTIONS
+_LT_SET_OPTIONS([$0], [$1])
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ltmain"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+AC_SUBST(LIBTOOL)dnl
+
+_LT_SETUP
+
+# Only expand once:
+m4_define([LT_INIT])
+])# LT_INIT
+
+# Old names:
+AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT])
+AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_PROG_LIBTOOL], [])
+dnl AC_DEFUN([AM_PROG_LIBTOOL], [])
+
+
+# _LT_CC_BASENAME(CC)
+# -------------------
+# Calculate cc_basename.  Skip known compiler wrappers and cross-prefix.
+m4_defun([_LT_CC_BASENAME],
+[for cc_temp in $1""; do
+  case $cc_temp in
+    compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;;
+    distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+])
+
+
+# _LT_FILEUTILS_DEFAULTS
+# ----------------------
+# It is okay to use these file commands and assume they have been set
+# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'.
+m4_defun([_LT_FILEUTILS_DEFAULTS],
+[: ${CP="cp -f"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+])# _LT_FILEUTILS_DEFAULTS
+
+
+# _LT_SETUP
+# ---------
+m4_defun([_LT_SETUP],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
+AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
+
+_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
+dnl
+_LT_DECL([], [host_alias], [0], [The host system])dnl
+_LT_DECL([], [host], [0])dnl
+_LT_DECL([], [host_os], [0])dnl
+dnl
+_LT_DECL([], [build_alias], [0], [The build system])dnl
+_LT_DECL([], [build], [0])dnl
+_LT_DECL([], [build_os], [0])dnl
+dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([LT_PATH_LD])dnl
+AC_REQUIRE([LT_PATH_NM])dnl
+dnl
+AC_REQUIRE([AC_PROG_LN_S])dnl
+test -z "$LN_S" && LN_S="ln -s"
+_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl
+dnl
+AC_REQUIRE([LT_CMD_MAX_LEN])dnl
+_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl
+_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl
+dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_CHECK_SHELL_FEATURES])dnl
+m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl
+m4_require([_LT_CMD_RELOAD])dnl
+m4_require([_LT_CHECK_MAGIC_METHOD])dnl
+m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl
+m4_require([_LT_CMD_OLD_ARCHIVE])dnl
+m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
+m4_require([_LT_WITH_SYSROOT])dnl
+
+_LT_CONFIG_LIBTOOL_INIT([
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes INIT.
+if test -n "\${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+])
+if test -n "${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+_LT_CHECK_OBJDIR
+
+m4_require([_LT_TAG_COMPILER])dnl
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Global variables:
+ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$LD" && LD=ld
+test -z "$ac_objext" && ac_objext=o
+
+_LT_CC_BASENAME([$compiler])
+
+# Only perform the check for file, if the check method requires it
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    _LT_PATH_MAGIC
+  fi
+  ;;
+esac
+
+# Use C for the default configuration in the libtool script
+LT_SUPPORTED_TAG([CC])
+_LT_LANG_C_CONFIG
+_LT_LANG_DEFAULT_CONFIG
+_LT_CONFIG_COMMANDS
+])# _LT_SETUP
+
+
+# _LT_PREPARE_SED_QUOTE_VARS
+# --------------------------
+# Define a few sed substitution that help us do robust quoting.
+m4_defun([_LT_PREPARE_SED_QUOTE_VARS],
+[# Backslashify metacharacters that are still active within
+# double-quoted strings.
+sed_quote_subst='s/\([["`$\\]]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\([["`\\]]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to delay expansion of an escaped single quote.
+delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+])
+
+# _LT_PROG_LTMAIN
+# ---------------
+# Note that this code is called both from `configure', and `config.status'
+# now that we use AC_CONFIG_COMMANDS to generate libtool.  Notably,
+# `config.status' has no value for ac_aux_dir unless we are using Automake,
+# so we pass a copy along to make sure it has a sensible value anyway.
+m4_defun([_LT_PROG_LTMAIN],
+[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl
+_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir'])
+ltmain="$ac_aux_dir/ltmain.sh"
+])# _LT_PROG_LTMAIN
+
+
+
+# So that we can recreate a full libtool script including additional
+# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS
+# in macros and then make a single call at the end using the `libtool'
+# label.
+
+
+# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS])
+# ----------------------------------------
+# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later.
+m4_define([_LT_CONFIG_LIBTOOL_INIT],
+[m4_ifval([$1],
+          [m4_append([_LT_OUTPUT_LIBTOOL_INIT],
+                     [$1
+])])])
+
+# Initialize.
+m4_define([_LT_OUTPUT_LIBTOOL_INIT])
+
+
+# _LT_CONFIG_LIBTOOL([COMMANDS])
+# ------------------------------
+# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later.
+m4_define([_LT_CONFIG_LIBTOOL],
+[m4_ifval([$1],
+          [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS],
+                     [$1
+])])])
+
+# Initialize.
+m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS])
+
+
+# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS])
+# -----------------------------------------------------
+m4_defun([_LT_CONFIG_SAVE_COMMANDS],
+[_LT_CONFIG_LIBTOOL([$1])
+_LT_CONFIG_LIBTOOL_INIT([$2])
+])
+
+
+# _LT_FORMAT_COMMENT([COMMENT])
+# -----------------------------
+# Add leading comment marks to the start of each line, and a trailing
+# full-stop to the whole comment if one is not present already.
+m4_define([_LT_FORMAT_COMMENT],
+[m4_ifval([$1], [
+m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])],
+              [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.])
+)])
+
+
+
+
+
+# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?])
+# -------------------------------------------------------------------
+# CONFIGNAME is the name given to the value in the libtool script.
+# VARNAME is the (base) name used in the configure script.
+# VALUE may be 0, 1 or 2 for a computed quote escaped value based on
+# VARNAME.  Any other value will be used directly.
+m4_define([_LT_DECL],
+[lt_if_append_uniq([lt_decl_varnames], [$2], [, ],
+    [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name],
+       [m4_ifval([$1], [$1], [$2])])
+    lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3])
+    m4_ifval([$4],
+       [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])])
+    lt_dict_add_subkey([lt_decl_dict], [$2],
+       [tagged?], [m4_ifval([$5], [yes], [no])])])
+])
+
+
+# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION])
+# --------------------------------------------------------
+m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])])
+
+
+# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...])
+# ------------------------------------------------
+m4_define([lt_decl_tag_varnames],
+[_lt_decl_filter([tagged?], [yes], $@)])
+
+
+# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..])
+# ---------------------------------------------------------
+m4_define([_lt_decl_filter],
+[m4_case([$#],
+  [0], [m4_fatal([$0: too few arguments: $#])],
+  [1], [m4_fatal([$0: too few arguments: $#: $1])],
+  [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)],
+  [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)],
+  [lt_dict_filter([lt_decl_dict], $@)])[]dnl
+])
+
+
+# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...])
+# --------------------------------------------------
+m4_define([lt_decl_quote_varnames],
+[_lt_decl_filter([value], [1], $@)])
+
+
+# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...])
+# ---------------------------------------------------
+m4_define([lt_decl_dquote_varnames],
+[_lt_decl_filter([value], [2], $@)])
+
+
+# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...])
+# ---------------------------------------------------
+m4_define([lt_decl_varnames_tagged],
+[m4_assert([$# <= 2])dnl
+_$0(m4_quote(m4_default([$1], [[, ]])),
+    m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]),
+    m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))])
+m4_define([_lt_decl_varnames_tagged],
+[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])])
+
+
+# lt_decl_all_varnames([SEPARATOR], [VARNAME1...])
+# ------------------------------------------------
+m4_define([lt_decl_all_varnames],
+[_$0(m4_quote(m4_default([$1], [[, ]])),
+     m4_if([$2], [],
+          m4_quote(lt_decl_varnames),
+       m4_quote(m4_shift($@))))[]dnl
+])
+m4_define([_lt_decl_all_varnames],
+[lt_join($@, lt_decl_varnames_tagged([$1],
+                       lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl
+])
+
+
+# _LT_CONFIG_STATUS_DECLARE([VARNAME])
+# ------------------------------------
+# Quote a variable value, and forward it to `config.status' so that its
+# declaration there will have the same value as in `configure'.  VARNAME
+# must have a single quote delimited value for this to work.
+m4_define([_LT_CONFIG_STATUS_DECLARE],
+[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`'])
+
+
+# _LT_CONFIG_STATUS_DECLARATIONS
+# ------------------------------
+# We delimit libtool config variables with single quotes, so when
+# we write them to config.status, we have to be sure to quote all
+# embedded single quotes properly.  In configure, this macro expands
+# each variable declared with _LT_DECL (and _LT_TAGDECL) into:
+#
+#    <var>='`$ECHO "$<var>" | $SED "$delay_single_quote_subst"`'
+m4_defun([_LT_CONFIG_STATUS_DECLARATIONS],
+[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames),
+    [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])])
+
+
+# _LT_LIBTOOL_TAGS
+# ----------------
+# Output comment and list of tags supported by the script
+m4_defun([_LT_LIBTOOL_TAGS],
+[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl
+available_tags="_LT_TAGS"dnl
+])
+
+
+# _LT_LIBTOOL_DECLARE(VARNAME, [TAG])
+# -----------------------------------
+# Extract the dictionary values for VARNAME (optionally with TAG) and
+# expand to a commented shell variable setting:
+#
+#    # Some comment about what VAR is for.
+#    visible_name=$lt_internal_name
+m4_define([_LT_LIBTOOL_DECLARE],
+[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1],
+                                          [description])))[]dnl
+m4_pushdef([_libtool_name],
+    m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl
+m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])),
+    [0], [_libtool_name=[$]$1],
+    [1], [_libtool_name=$lt_[]$1],
+    [2], [_libtool_name=$lt_[]$1],
+    [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl
+m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl
+])
+
+
+# _LT_LIBTOOL_CONFIG_VARS
+# -----------------------
+# Produce commented declarations of non-tagged libtool config variables
+# suitable for insertion in the LIBTOOL CONFIG section of the `libtool'
+# script.  Tagged libtool config variables (even for the LIBTOOL CONFIG
+# section) are produced by _LT_LIBTOOL_TAG_VARS.
+m4_defun([_LT_LIBTOOL_CONFIG_VARS],
+[m4_foreach([_lt_var],
+    m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)),
+    [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])])
+
+
+# _LT_LIBTOOL_TAG_VARS(TAG)
+# -------------------------
+m4_define([_LT_LIBTOOL_TAG_VARS],
+[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames),
+    [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])])
+
+
+# _LT_TAGVAR(VARNAME, [TAGNAME])
+# ------------------------------
+m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])])
+
+
+# _LT_CONFIG_COMMANDS
+# -------------------
+# Send accumulated output to $CONFIG_STATUS.  Thanks to the lists of
+# variables for single and double quote escaping we saved from calls
+# to _LT_DECL, we can put quote escaped variables declarations
+# into `config.status', and then the shell code to quote escape them in
+# for loops in `config.status'.  Finally, any additional code accumulated
+# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded.
+m4_defun([_LT_CONFIG_COMMANDS],
+[AC_PROVIDE_IFELSE([LT_OUTPUT],
+       dnl If the libtool generation code has been placed in $CONFIG_LT,
+       dnl instead of duplicating it all over again into config.status,
+       dnl then we will have config.status run $CONFIG_LT later, so it
+       dnl needs to know what name is stored there:
+        [AC_CONFIG_COMMANDS([libtool],
+            [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])],
+    dnl If the libtool generation code is destined for config.status,
+    dnl expand the accumulated commands and init code now:
+    [AC_CONFIG_COMMANDS([libtool],
+        [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])])
+])#_LT_CONFIG_COMMANDS
+
+
+# Initialize.
+m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT],
+[
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+sed_quote_subst='$sed_quote_subst'
+double_quote_subst='$double_quote_subst'
+delay_variable_subst='$delay_variable_subst'
+_LT_CONFIG_STATUS_DECLARATIONS
+LTCC='$LTCC'
+LTCFLAGS='$LTCFLAGS'
+compiler='$compiler_DEFAULT'
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$[]1
+_LTECHO_EOF'
+}
+
+# Quote evaled strings.
+for var in lt_decl_all_varnames([[ \
+]], lt_decl_quote_varnames); do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[[\\\\\\\`\\"\\\$]]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+# Double-quote double-evaled strings.
+for var in lt_decl_all_varnames([[ \
+]], lt_decl_dquote_varnames); do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[[\\\\\\\`\\"\\\$]]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+_LT_OUTPUT_LIBTOOL_INIT
+])
+
+# _LT_GENERATED_FILE_INIT(FILE, [COMMENT])
+# ------------------------------------
+# Generate a child script FILE with all initialization necessary to
+# reuse the environment learned by the parent script, and make the
+# file executable.  If COMMENT is supplied, it is inserted after the
+# `#!' sequence but before initialization text begins.  After this
+# macro, additional text can be appended to FILE to form the body of
+# the child script.  The macro ends with non-zero status if the
+# file could not be fully written (such as if the disk is full).
+m4_ifdef([AS_INIT_GENERATED],
+[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])],
+[m4_defun([_LT_GENERATED_FILE_INIT],
+[m4_require([AS_PREPARE])]dnl
+[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl
+[lt_write_fail=0
+cat >$1 <<_ASEOF || lt_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+$2
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$1 <<\_ASEOF || lt_write_fail=1
+AS_SHELL_SANITIZE
+_AS_PREPARE
+exec AS_MESSAGE_FD>&1
+_ASEOF
+test $lt_write_fail = 0 && chmod +x $1[]dnl
+m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT
+
+# LT_OUTPUT
+# ---------
+# This macro allows early generation of the libtool script (before
+# AC_OUTPUT is called), incase it is used in configure for compilation
+# tests.
+AC_DEFUN([LT_OUTPUT],
+[: ${CONFIG_LT=./config.lt}
+AC_MSG_NOTICE([creating $CONFIG_LT])
+_LT_GENERATED_FILE_INIT(["$CONFIG_LT"],
+[# Run this file to recreate a libtool stub with the current configuration.])
+
+cat >>"$CONFIG_LT" <<\_LTEOF
+lt_cl_silent=false
+exec AS_MESSAGE_LOG_FD>>config.log
+{
+  echo
+  AS_BOX([Running $as_me.])
+} >&AS_MESSAGE_LOG_FD
+
+lt_cl_help="\
+\`$as_me' creates a local libtool stub from the current configuration,
+for use in further configure time tests before the real libtool is
+generated.
+
+Usage: $[0] [[OPTIONS]]
+
+  -h, --help      print this help, then exit
+  -V, --version   print version number, then exit
+  -q, --quiet     do not print progress messages
+  -d, --debug     don't remove temporary files
+
+Report bugs to <bug-libtool@gnu.org>."
+
+lt_cl_version="\
+m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl
+m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
+configured by $[0], generated by m4_PACKAGE_STRING.
+
+Copyright (C) 2011 Free Software Foundation, Inc.
+This config.lt script is free software; the Free Software Foundation
+gives unlimited permision to copy, distribute and modify it."
+
+while test $[#] != 0
+do
+  case $[1] in
+    --version | --v* | -V )
+      echo "$lt_cl_version"; exit 0 ;;
+    --help | --h* | -h )
+      echo "$lt_cl_help"; exit 0 ;;
+    --debug | --d* | -d )
+      debug=: ;;
+    --quiet | --q* | --silent | --s* | -q )
+      lt_cl_silent=: ;;
+
+    -*) AC_MSG_ERROR([unrecognized option: $[1]
+Try \`$[0] --help' for more information.]) ;;
+
+    *) AC_MSG_ERROR([unrecognized argument: $[1]
+Try \`$[0] --help' for more information.]) ;;
+  esac
+  shift
+done
+
+if $lt_cl_silent; then
+  exec AS_MESSAGE_FD>/dev/null
+fi
+_LTEOF
+
+cat >>"$CONFIG_LT" <<_LTEOF
+_LT_OUTPUT_LIBTOOL_COMMANDS_INIT
+_LTEOF
+
+cat >>"$CONFIG_LT" <<\_LTEOF
+AC_MSG_NOTICE([creating $ofile])
+_LT_OUTPUT_LIBTOOL_COMMANDS
+AS_EXIT(0)
+_LTEOF
+chmod +x "$CONFIG_LT"
+
+# configure is writing to config.log, but config.lt does its own redirection,
+# appending to config.log, which fails on DOS, as config.log is still kept
+# open by configure.  Here we exec the FD to /dev/null, effectively closing
+# config.log, so it can be properly (re)opened and appended to by config.lt.
+lt_cl_success=:
+test "$silent" = yes &&
+  lt_config_lt_args="$lt_config_lt_args --quiet"
+exec AS_MESSAGE_LOG_FD>/dev/null
+$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false
+exec AS_MESSAGE_LOG_FD>>config.log
+$lt_cl_success || AS_EXIT(1)
+])# LT_OUTPUT
+
+
+# _LT_CONFIG(TAG)
+# ---------------
+# If TAG is the built-in tag, create an initial libtool script with a
+# default configuration from the untagged config vars.  Otherwise add code
+# to config.status for appending the configuration named by TAG from the
+# matching tagged config vars.
+m4_defun([_LT_CONFIG],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+_LT_CONFIG_SAVE_COMMANDS([
+  m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl
+  m4_if(_LT_TAG, [C], [
+    # See if we are running on zsh, and set the options which allow our
+    # commands through without removal of \ escapes.
+    if test -n "${ZSH_VERSION+set}" ; then
+      setopt NO_GLOB_SUBST
+    fi
+
+    cfgfile="${ofile}T"
+    trap "$RM \"$cfgfile\"; exit 1" 1 2 15
+    $RM "$cfgfile"
+
+    cat <<_LT_EOF >> "$cfgfile"
+#! $SHELL
+
+# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+_LT_COPYING
+_LT_LIBTOOL_TAGS
+
+# ### BEGIN LIBTOOL CONFIG
+_LT_LIBTOOL_CONFIG_VARS
+_LT_LIBTOOL_TAG_VARS
+# ### END LIBTOOL CONFIG
+
+_LT_EOF
+
+  case $host_os in
+  aix3*)
+    cat <<\_LT_EOF >> "$cfgfile"
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+_LT_EOF
+    ;;
+  esac
+
+  _LT_PROG_LTMAIN
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" \
+     || (rm -f "$cfgfile"; exit 1)
+
+  _LT_PROG_REPLACE_SHELLFNS
+
+   mv -f "$cfgfile" "$ofile" ||
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+],
+[cat <<_LT_EOF >> "$ofile"
+
+dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded
+dnl in a comment (ie after a #).
+# ### BEGIN LIBTOOL TAG CONFIG: $1
+_LT_LIBTOOL_TAG_VARS(_LT_TAG)
+# ### END LIBTOOL TAG CONFIG: $1
+_LT_EOF
+])dnl /m4_if
+],
+[m4_if([$1], [], [
+    PACKAGE='$PACKAGE'
+    VERSION='$VERSION'
+    TIMESTAMP='$TIMESTAMP'
+    RM='$RM'
+    ofile='$ofile'], [])
+])dnl /_LT_CONFIG_SAVE_COMMANDS
+])# _LT_CONFIG
+
+
+# LT_SUPPORTED_TAG(TAG)
+# ---------------------
+# Trace this macro to discover what tags are supported by the libtool
+# --tag option, using:
+#    autoconf --trace 'LT_SUPPORTED_TAG:$1'
+AC_DEFUN([LT_SUPPORTED_TAG], [])
+
+
+# C support is built-in for now
+m4_define([_LT_LANG_C_enabled], [])
+m4_define([_LT_TAGS], [])
+
+
+# LT_LANG(LANG)
+# -------------
+# Enable libtool support for the given language if not already enabled.
+AC_DEFUN([LT_LANG],
+[AC_BEFORE([$0], [LT_OUTPUT])dnl
+m4_case([$1],
+  [C],                 [_LT_LANG(C)],
+  [C++],               [_LT_LANG(CXX)],
+  [Go],                        [_LT_LANG(GO)],
+  [Java],              [_LT_LANG(GCJ)],
+  [Fortran 77],                [_LT_LANG(F77)],
+  [Fortran],           [_LT_LANG(FC)],
+  [Windows Resource],  [_LT_LANG(RC)],
+  [m4_ifdef([_LT_LANG_]$1[_CONFIG],
+    [_LT_LANG($1)],
+    [m4_fatal([$0: unsupported language: "$1"])])])dnl
+])# LT_LANG
+
+
+# _LT_LANG(LANGNAME)
+# ------------------
+m4_defun([_LT_LANG],
+[m4_ifdef([_LT_LANG_]$1[_enabled], [],
+  [LT_SUPPORTED_TAG([$1])dnl
+  m4_append([_LT_TAGS], [$1 ])dnl
+  m4_define([_LT_LANG_]$1[_enabled], [])dnl
+  _LT_LANG_$1_CONFIG($1)])dnl
+])# _LT_LANG
+
+
+m4_ifndef([AC_PROG_GO], [
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_GO.  When it is available in    #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+m4_defun([AC_PROG_GO],
+[AC_LANG_PUSH(Go)dnl
+AC_ARG_VAR([GOC],     [Go compiler command])dnl
+AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
+_AC_ARG_VAR_LDFLAGS()dnl
+AC_CHECK_TOOL(GOC, gccgo)
+if test -z "$GOC"; then
+  if test -n "$ac_tool_prefix"; then
+    AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
+  fi
+fi
+if test -z "$GOC"; then
+  AC_CHECK_PROG(GOC, gccgo, gccgo, false)
+fi
+])#m4_defun
+])#m4_ifndef
+
+
+# _LT_LANG_DEFAULT_CONFIG
+# -----------------------
+m4_defun([_LT_LANG_DEFAULT_CONFIG],
+[AC_PROVIDE_IFELSE([AC_PROG_CXX],
+  [LT_LANG(CXX)],
+  [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])])
+
+AC_PROVIDE_IFELSE([AC_PROG_F77],
+  [LT_LANG(F77)],
+  [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])])
+
+AC_PROVIDE_IFELSE([AC_PROG_FC],
+  [LT_LANG(FC)],
+  [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])])
+
+dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal
+dnl pulling things in needlessly.
+AC_PROVIDE_IFELSE([AC_PROG_GCJ],
+  [LT_LANG(GCJ)],
+  [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],
+    [LT_LANG(GCJ)],
+    [AC_PROVIDE_IFELSE([LT_PROG_GCJ],
+      [LT_LANG(GCJ)],
+      [m4_ifdef([AC_PROG_GCJ],
+       [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])])
+       m4_ifdef([A][M_PROG_GCJ],
+       [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])])
+       m4_ifdef([LT_PROG_GCJ],
+       [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
+
+AC_PROVIDE_IFELSE([AC_PROG_GO],
+  [LT_LANG(GO)],
+  [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
+
+AC_PROVIDE_IFELSE([LT_PROG_RC],
+  [LT_LANG(RC)],
+  [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
+])# _LT_LANG_DEFAULT_CONFIG
+
+# Obsolete macros:
+AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)])
+AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)])
+AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)])
+AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)])
+AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_CXX], [])
+dnl AC_DEFUN([AC_LIBTOOL_F77], [])
+dnl AC_DEFUN([AC_LIBTOOL_FC], [])
+dnl AC_DEFUN([AC_LIBTOOL_GCJ], [])
+dnl AC_DEFUN([AC_LIBTOOL_RC], [])
+
+
+# _LT_TAG_COMPILER
+# ----------------
+m4_defun([_LT_TAG_COMPILER],
+[AC_REQUIRE([AC_PROG_CC])dnl
+
+_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl
+_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl
+_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl
+_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+])# _LT_TAG_COMPILER
+
+
+# _LT_COMPILER_BOILERPLATE
+# ------------------------
+# Check for compiler boilerplate output or warnings with
+# the simple compiler test code.
+m4_defun([_LT_COMPILER_BOILERPLATE],
+[m4_require([_LT_DECL_SED])dnl
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+])# _LT_COMPILER_BOILERPLATE
+
+
+# _LT_LINKER_BOILERPLATE
+# ----------------------
+# Check for linker boilerplate output or warnings with
+# the simple link test code.
+m4_defun([_LT_LINKER_BOILERPLATE],
+[m4_require([_LT_DECL_SED])dnl
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+])# _LT_LINKER_BOILERPLATE
+
+# _LT_REQUIRED_DARWIN_CHECKS
+# -------------------------
+m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
+  case $host_os in
+    rhapsody* | darwin*)
+    AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:])
+    AC_CHECK_TOOL([NMEDIT], [nmedit], [:])
+    AC_CHECK_TOOL([LIPO], [lipo], [:])
+    AC_CHECK_TOOL([OTOOL], [otool], [:])
+    AC_CHECK_TOOL([OTOOL64], [otool64], [:])
+    _LT_DECL([], [DSYMUTIL], [1],
+      [Tool to manipulate archived DWARF debug symbol files on Mac OS X])
+    _LT_DECL([], [NMEDIT], [1],
+      [Tool to change global to local symbols on Mac OS X])
+    _LT_DECL([], [LIPO], [1],
+      [Tool to manipulate fat objects and archives on Mac OS X])
+    _LT_DECL([], [OTOOL], [1],
+      [ldd/readelf like tool for Mach-O binaries on Mac OS X])
+    _LT_DECL([], [OTOOL64], [1],
+      [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4])
+
+    AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod],
+      [lt_cv_apple_cc_single_mod=no
+      if test -z "${LT_MULTI_MODULE}"; then
+       # By default we will add the -single_module flag. You can override
+       # by either setting the environment variable LT_MULTI_MODULE
+       # non-empty at configure time, or by adding -multi_module to the
+       # link flags.
+       rm -rf libconftest.dylib*
+       echo "int foo(void){return 1;}" > conftest.c
+       echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD
+       $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+         -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
+        _lt_result=$?
+       # If there is a non-empty error log, and "single_module"
+       # appears in it, assume the flag caused a linker warning
+        if test -s conftest.err && $GREP single_module conftest.err; then
+         cat conftest.err >&AS_MESSAGE_LOG_FD
+       # Otherwise, if the output was created with a 0 exit code from
+       # the compiler, it worked.
+       elif test -f libconftest.dylib && test $_lt_result -eq 0; then
+         lt_cv_apple_cc_single_mod=yes
+       else
+         cat conftest.err >&AS_MESSAGE_LOG_FD
+       fi
+       rm -rf libconftest.dylib*
+       rm -f conftest.*
+      fi])
+
+    AC_CACHE_CHECK([for -exported_symbols_list linker flag],
+      [lt_cv_ld_exported_symbols_list],
+      [lt_cv_ld_exported_symbols_list=no
+      save_LDFLAGS=$LDFLAGS
+      echo "_main" > conftest.sym
+      LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
+      AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
+       [lt_cv_ld_exported_symbols_list=yes],
+       [lt_cv_ld_exported_symbols_list=no])
+       LDFLAGS="$save_LDFLAGS"
+    ])
+
+    AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
+      [lt_cv_ld_force_load=no
+      cat > conftest.c << _LT_EOF
+int forced_loaded() { return 2;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD
+      $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD
+      echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD
+      $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD
+      echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD
+      $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD
+      cat > conftest.c << _LT_EOF
+int main() { return 0;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
+      $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
+      _lt_result=$?
+      if test -s conftest.err && $GREP force_load conftest.err; then
+       cat conftest.err >&AS_MESSAGE_LOG_FD
+      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
+       lt_cv_ld_force_load=yes
+      else
+       cat conftest.err >&AS_MESSAGE_LOG_FD
+      fi
+        rm -f conftest.err libconftest.a conftest conftest.c
+        rm -rf conftest.dSYM
+    ])
+    case $host_os in
+    rhapsody* | darwin1.[[012]])
+      _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
+    darwin1.*)
+      _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+    darwin*) # darwin 5.x on
+      # if running on 10.5 or later, the deployment target defaults
+      # to the OS version, if on x86, and 10.4, the deployment
+      # target defaults to 10.4. Don't you love it?
+      case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
+       10.0,*86*-darwin8*|10.0,*-darwin[[91]]*)
+         _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+       10.[[012]]*)
+         _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+       10.*)
+         _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+      esac
+    ;;
+  esac
+    if test "$lt_cv_apple_cc_single_mod" = "yes"; then
+      _lt_dar_single_mod='$single_module'
+    fi
+    if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
+      _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
+    else
+      _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
+    fi
+    if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
+      _lt_dsymutil='~$DSYMUTIL $lib || :'
+    else
+      _lt_dsymutil=
+    fi
+    ;;
+  esac
+])
+
+
+# _LT_DARWIN_LINKER_FEATURES([TAG])
+# ---------------------------------
+# Checks for linker and compiler features on darwin
+m4_defun([_LT_DARWIN_LINKER_FEATURES],
+[
+  m4_require([_LT_REQUIRED_DARWIN_CHECKS])
+  _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+  _LT_TAGVAR(hardcode_direct, $1)=no
+  _LT_TAGVAR(hardcode_automatic, $1)=yes
+  _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+    m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
+                  [FC],  [_LT_TAGVAR(compiler_needs_object, $1)=yes])
+  else
+    _LT_TAGVAR(whole_archive_flag_spec, $1)=''
+  fi
+  _LT_TAGVAR(link_all_deplibs, $1)=yes
+  _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+    m4_if([$1], [CXX],
+[   if test "$lt_cv_apple_cc_single_mod" != "yes"; then
+      _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
+      _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
+    fi
+],[])
+  else
+  _LT_TAGVAR(ld_shlibs, $1)=no
+  fi
+])
+
+# _LT_SYS_MODULE_PATH_AIX([TAGNAME])
+# ----------------------------------
+# Links a minimal program and checks the executable
+# for the system default hardcoded library path. In most cases,
+# this is /usr/lib:/lib, but when the MPI compilers are used
+# the location of the communication and MPI libs are included too.
+# If we don't find anything, use the default library path according
+# to the aix ld manual.
+# Store the results from the different compilers for each TAGNAME.
+# Allow to override them for all tags through lt_cv_aix_libpath.
+m4_defun([_LT_SYS_MODULE_PATH_AIX],
+[m4_require([_LT_DECL_SED])dnl
+if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])],
+  [AC_LINK_IFELSE([AC_LANG_PROGRAM],[
+  lt_aix_libpath_sed='[
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }]'
+  _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
+    _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi],[])
+  if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
+    _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib"
+  fi
+  ])
+  aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])
+fi
+])# _LT_SYS_MODULE_PATH_AIX
+
+
+# _LT_SHELL_INIT(ARG)
+# -------------------
+m4_define([_LT_SHELL_INIT],
+[m4_divert_text([M4SH-INIT], [$1
+])])# _LT_SHELL_INIT
+
+
+
+# _LT_PROG_ECHO_BACKSLASH
+# -----------------------
+# Find how we can fake an echo command that does not interpret backslash.
+# In particular, with Autoconf 2.60 or later we add some code to the start
+# of the generated configure script which will find a shell with a builtin
+# printf (which we can use as an echo command).
+m4_defun([_LT_PROG_ECHO_BACKSLASH],
+[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+
+AC_MSG_CHECKING([how to print strings])
+# Test print first, because it will be a builtin if present.
+if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
+   test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='print -r --'
+elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='printf %s\n'
+else
+  # Use this function as a fallback that always works.
+  func_fallback_echo ()
+  {
+    eval 'cat <<_LTECHO_EOF
+$[]1
+_LTECHO_EOF'
+  }
+  ECHO='func_fallback_echo'
+fi
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO "$*" 
+}
+
+case "$ECHO" in
+  printf*) AC_MSG_RESULT([printf]) ;;
+  print*) AC_MSG_RESULT([print -r]) ;;
+  *) AC_MSG_RESULT([cat]) ;;
+esac
+
+m4_ifdef([_AS_DETECT_SUGGESTED],
+[_AS_DETECT_SUGGESTED([
+  test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || (
+    ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+    ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+    ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+    PATH=/empty FPATH=/empty; export PATH FPATH
+    test "X`printf %s $ECHO`" = "X$ECHO" \
+      || test "X`print -r -- $ECHO`" = "X$ECHO" )])])
+
+_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts])
+_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes])
+])# _LT_PROG_ECHO_BACKSLASH
+
+
+# _LT_WITH_SYSROOT
+# ----------------
+AC_DEFUN([_LT_WITH_SYSROOT],
+[AC_MSG_CHECKING([for sysroot])
+AC_ARG_WITH([sysroot],
+[  --with-sysroot[=DIR] Search for dependent libraries within DIR
+                        (or the compiler's sysroot if not specified).],
+[], [with_sysroot=no])
+
+dnl lt_sysroot will always be passed unquoted.  We quote it here
+dnl in case the user passed a directory name.
+lt_sysroot=
+case ${with_sysroot} in #(
+ yes)
+   if test "$GCC" = yes; then
+     lt_sysroot=`$CC --print-sysroot 2>/dev/null`
+   fi
+   ;; #(
+ /*)
+   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
+   ;; #(
+ no|'')
+   ;; #(
+ *)
+   AC_MSG_RESULT([${with_sysroot}])
+   AC_MSG_ERROR([The sysroot must be an absolute path.])
+   ;;
+esac
+
+ AC_MSG_RESULT([${lt_sysroot:-no}])
+_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl
+[dependent libraries, and in which our libraries should be installed.])])
+
+# _LT_ENABLE_LOCK
+# ---------------
+m4_defun([_LT_ENABLE_LOCK],
+[AC_ARG_ENABLE([libtool-lock],
+  [AS_HELP_STRING([--disable-libtool-lock],
+    [avoid locking (might break parallel builds)])])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.$ac_objext` in
+      *ELF-32*)
+       HPUX_IA64_MODE="32"
+       ;;
+      *ELF-64*)
+       HPUX_IA64_MODE="64"
+       ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    if test "$lt_cv_prog_gnu_ld" = yes; then
+      case `/usr/bin/file conftest.$ac_objext` in
+       *32-bit*)
+         LD="${LD-ld} -melf32bsmip"
+         ;;
+       *N32*)
+         LD="${LD-ld} -melf32bmipn32"
+         ;;
+       *64-bit*)
+         LD="${LD-ld} -melf64bmip"
+       ;;
+      esac
+    else
+      case `/usr/bin/file conftest.$ac_objext` in
+       *32-bit*)
+         LD="${LD-ld} -32"
+         ;;
+       *N32*)
+         LD="${LD-ld} -n32"
+         ;;
+       *64-bit*)
+         LD="${LD-ld} -64"
+         ;;
+      esac
+    fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+      *32-bit*)
+       case $host in
+         x86_64-*kfreebsd*-gnu)
+           LD="${LD-ld} -m elf_i386_fbsd"
+           ;;
+         x86_64-*linux*)
+           LD="${LD-ld} -m elf_i386"
+           ;;
+         ppc64-*linux*|powerpc64-*linux*)
+           LD="${LD-ld} -m elf32ppclinux"
+           ;;
+         s390x-*linux*)
+           LD="${LD-ld} -m elf_s390"
+           ;;
+         sparc64-*linux*)
+           LD="${LD-ld} -m elf32_sparc"
+           ;;
+       esac
+       ;;
+      *64-bit*)
+       case $host in
+         x86_64-*kfreebsd*-gnu)
+           LD="${LD-ld} -m elf_x86_64_fbsd"
+           ;;
+         x86_64-*linux*)
+           LD="${LD-ld} -m elf_x86_64"
+           ;;
+         ppc*-*linux*|powerpc*-*linux*)
+           LD="${LD-ld} -m elf64ppc"
+           ;;
+         s390*-*linux*|s390*-*tpf*)
+           LD="${LD-ld} -m elf64_s390"
+           ;;
+         sparc*-*linux*)
+           LD="${LD-ld} -m elf64_sparc"
+           ;;
+       esac
+       ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+    [AC_LANG_PUSH(C)
+     AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+     AC_LANG_POP])
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*)
+        case $host in
+        i?86-*-solaris*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        sparc*-*-solaris*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+        esac
+        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
+        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+          LD="${LD-ld}_sol2"
+        fi
+        ;;
+      *)
+       if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
+         LD="${LD-ld} -64"
+       fi
+       ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+esac
+
+need_locks="$enable_libtool_lock"
+])# _LT_ENABLE_LOCK
+
+
+# _LT_PROG_AR
+# -----------
+m4_defun([_LT_PROG_AR],
+[AC_CHECK_TOOLS(AR, [ar], false)
+: ${AR=ar}
+: ${AR_FLAGS=cru}
+_LT_DECL([], [AR], [1], [The archiver])
+_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive])
+
+AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file],
+  [lt_cv_ar_at_file=no
+   AC_COMPILE_IFELSE([AC_LANG_PROGRAM],
+     [echo conftest.$ac_objext > conftest.lst
+      lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD'
+      AC_TRY_EVAL([lt_ar_try])
+      if test "$ac_status" -eq 0; then
+       # Ensure the archiver fails upon bogus file names.
+       rm -f conftest.$ac_objext libconftest.a
+       AC_TRY_EVAL([lt_ar_try])
+       if test "$ac_status" -ne 0; then
+          lt_cv_ar_at_file=@
+        fi
+      fi
+      rm -f conftest.* libconftest.a
+     ])
+  ])
+
+if test "x$lt_cv_ar_at_file" = xno; then
+  archiver_list_spec=
+else
+  archiver_list_spec=$lt_cv_ar_at_file
+fi
+_LT_DECL([], [archiver_list_spec], [1],
+  [How to feed a file listing to the archiver])
+])# _LT_PROG_AR
+
+
+# _LT_CMD_OLD_ARCHIVE
+# -------------------
+m4_defun([_LT_CMD_OLD_ARCHIVE],
+[_LT_PROG_AR
+
+AC_CHECK_TOOL(STRIP, strip, :)
+test -z "$STRIP" && STRIP=:
+_LT_DECL([], [STRIP], [1], [A symbol stripping program])
+
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+test -z "$RANLIB" && RANLIB=:
+_LT_DECL([], [RANLIB], [1],
+    [Commands used to install an old-style archive])
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
+fi
+
+case $host_os in
+  darwin*)
+    lock_old_archive_extraction=yes ;;
+  *)
+    lock_old_archive_extraction=no ;;
+esac
+_LT_DECL([], [old_postinstall_cmds], [2])
+_LT_DECL([], [old_postuninstall_cmds], [2])
+_LT_TAGDECL([], [old_archive_cmds], [2],
+    [Commands used to build an old-style archive])
+_LT_DECL([], [lock_old_archive_extraction], [0],
+    [Whether to use a lock for old archive extraction])
+])# _LT_CMD_OLD_ARCHIVE
+
+
+# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#              [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE])
+# ----------------------------------------------------------------
+# Check whether the given compiler option works
+AC_DEFUN([_LT_COMPILER_OPTION],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_SED])dnl
+AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+   m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4])
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$3"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       $2=yes
+     fi
+   fi
+   $RM conftest*
+])
+
+if test x"[$]$2" = xyes; then
+    m4_if([$5], , :, [$5])
+else
+    m4_if([$6], , :, [$6])
+fi
+])# _LT_COMPILER_OPTION
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], [])
+
+
+# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#                  [ACTION-SUCCESS], [ACTION-FAILURE])
+# ----------------------------------------------------
+# Check whether the given linker option works
+AC_DEFUN([_LT_LINKER_OPTION],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_SED])dnl
+AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $3"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&AS_MESSAGE_LOG_FD
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         $2=yes
+       fi
+     else
+       $2=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+])
+
+if test x"[$]$2" = xyes; then
+    m4_if([$4], , :, [$4])
+else
+    m4_if([$5], , :, [$5])
+fi
+])# _LT_LINKER_OPTION
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], [])
+
+
+# LT_CMD_MAX_LEN
+#---------------
+AC_DEFUN([LT_CMD_MAX_LEN],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+# find the maximum length of command line arguments
+AC_MSG_CHECKING([the maximum length of command line arguments])
+AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
+  i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw* | cegcc*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  mint*)
+    # On MiNT this can take a long time and run out of memory.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536      # usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  os2*)
+    # The test takes a long time on OS/2.
+    lt_cv_sys_max_cmd_len=8192
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[        ]]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
+    if test -n "$lt_cv_sys_max_cmd_len"; then
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    else
+      # Make teststring a little bigger before we do anything with it.
+      # a 1K string should be a reasonable start.
+      for i in 1 2 3 4 5 6 7 8 ; do
+        teststring=$teststring$teststring
+      done
+      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+      # If test is not a shell built-in, we'll probably end up computing a
+      # maximum length that is only half of the actual maximum length, but
+      # we can't tell.
+      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
+                = "X$teststring$teststring"; } >/dev/null 2>&1 &&
+             test $i != 17 # 1/2 MB should be enough
+      do
+        i=`expr $i + 1`
+        teststring=$teststring$teststring
+      done
+      # Only check the string length outside the loop.
+      lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
+      teststring=
+      # Add a significant safety factor because C++ compilers can tack on
+      # massive amounts of additional arguments before passing them to the
+      # linker.  It appears as though 1/2 is a usable value.
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    fi
+    ;;
+  esac
+])
+if test -n $lt_cv_sys_max_cmd_len ; then
+  AC_MSG_RESULT($lt_cv_sys_max_cmd_len)
+else
+  AC_MSG_RESULT(none)
+fi
+max_cmd_len=$lt_cv_sys_max_cmd_len
+_LT_DECL([], [max_cmd_len], [0],
+    [What is the maximum length of a command?])
+])# LT_CMD_MAX_LEN
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], [])
+
+
+# _LT_HEADER_DLFCN
+# ----------------
+m4_defun([_LT_HEADER_DLFCN],
+[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl
+])# _LT_HEADER_DLFCN
+
+
+# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
+#                      ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
+# ----------------------------------------------------------------
+m4_defun([_LT_TRY_DLOPEN_SELF],
+[m4_require([_LT_HEADER_DLFCN])dnl
+if test "$cross_compiling" = yes; then :
+  [$4]
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+[#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+         if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+       }
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}]
+_LT_EOF
+  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) $1 ;;
+      x$lt_dlneed_uscore) $2 ;;
+      x$lt_dlunknown|x*) $3 ;;
+    esac
+  else :
+    # compilation failed
+    $3
+  fi
+fi
+rm -fr conftest*
+])# _LT_TRY_DLOPEN_SELF
+
+
+# LT_SYS_DLOPEN_SELF
+# ------------------
+AC_DEFUN([LT_SYS_DLOPEN_SELF],
+[m4_require([_LT_HEADER_DLFCN])dnl
+if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32* | cegcc*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+    ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+    ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    AC_CHECK_LIB([dl], [dlopen],
+               [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ])
+    ;;
+
+  *)
+    AC_CHECK_FUNC([shl_load],
+         [lt_cv_dlopen="shl_load"],
+      [AC_CHECK_LIB([dld], [shl_load],
+           [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"],
+       [AC_CHECK_FUNC([dlopen],
+             [lt_cv_dlopen="dlopen"],
+         [AC_CHECK_LIB([dl], [dlopen],
+               [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+           [AC_CHECK_LIB([svld], [dlopen],
+                 [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+             [AC_CHECK_LIB([dld], [dld_link],
+                   [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"])
+             ])
+           ])
+         ])
+       ])
+      ])
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    AC_CACHE_CHECK([whether a program can dlopen itself],
+         lt_cv_dlopen_self, [dnl
+         _LT_TRY_DLOPEN_SELF(
+           lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
+           lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
+    ])
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
+      AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
+         lt_cv_dlopen_self_static, [dnl
+         _LT_TRY_DLOPEN_SELF(
+           lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
+           lt_cv_dlopen_self_static=no,  lt_cv_dlopen_self_static=cross)
+      ])
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+_LT_DECL([dlopen_support], [enable_dlopen], [0],
+        [Whether dlopen is supported])
+_LT_DECL([dlopen_self], [enable_dlopen_self], [0],
+        [Whether dlopen of programs is supported])
+_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0],
+        [Whether dlopen of statically linked programs is supported])
+])# LT_SYS_DLOPEN_SELF
+
+# Old name:
+AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], [])
+
+
+# _LT_COMPILER_C_O([TAGNAME])
+# ---------------------------
+# Check to see if options -c and -o are simultaneously supported by compiler.
+# This macro does not hard code the compiler like AC_PROG_CC_C_O.
+m4_defun([_LT_COMPILER_C_O],
+[m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_TAG_COMPILER])dnl
+AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext],
+  [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)],
+  [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+     fi
+   fi
+   chmod u+w . 2>&AS_MESSAGE_LOG_FD
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+])
+_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1],
+       [Does compiler simultaneously support -c and -o options?])
+])# _LT_COMPILER_C_O
+
+
+# _LT_COMPILER_FILE_LOCKS([TAGNAME])
+# ----------------------------------
+# Check to see if we can do hard links to lock some files if needed
+m4_defun([_LT_COMPILER_FILE_LOCKS],
+[m4_require([_LT_ENABLE_LOCK])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+_LT_COMPILER_C_O([$1])
+
+hard_links="nottested"
+if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  AC_MSG_CHECKING([if we can lock with hard links])
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  AC_MSG_RESULT([$hard_links])
+  if test "$hard_links" = no; then
+    AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe])
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?])
+])# _LT_COMPILER_FILE_LOCKS
+
+
+# _LT_CHECK_OBJDIR
+# ----------------
+m4_defun([_LT_CHECK_OBJDIR],
+[AC_CACHE_CHECK([for objdir], [lt_cv_objdir],
+[rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null])
+objdir=$lt_cv_objdir
+_LT_DECL([], [objdir], [0],
+         [The name of the directory that contains temporary libtool files])dnl
+m4_pattern_allow([LT_OBJDIR])dnl
+AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/",
+  [Define to the sub-directory in which libtool stores uninstalled libraries.])
+])# _LT_CHECK_OBJDIR
+
+
+# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME])
+# --------------------------------------
+# Check hardcoding attributes.
+m4_defun([_LT_LINKER_HARDCODE_LIBPATH],
+[AC_MSG_CHECKING([how to hardcode library paths into programs])
+_LT_TAGVAR(hardcode_action, $1)=
+if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" ||
+   test -n "$_LT_TAGVAR(runpath_var, $1)" ||
+   test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$_LT_TAGVAR(hardcode_direct, $1)" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no &&
+     test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then
+    # Linking always hardcodes the temporary library directory.
+    _LT_TAGVAR(hardcode_action, $1)=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    _LT_TAGVAR(hardcode_action, $1)=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  _LT_TAGVAR(hardcode_action, $1)=unsupported
+fi
+AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)])
+
+if test "$_LT_TAGVAR(hardcode_action, $1)" = relink ||
+   test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+_LT_TAGDECL([], [hardcode_action], [0],
+    [How to hardcode a shared library path into an executable])
+])# _LT_LINKER_HARDCODE_LIBPATH
+
+
+# _LT_CMD_STRIPLIB
+# ----------------
+m4_defun([_LT_CMD_STRIPLIB],
+[m4_require([_LT_DECL_EGREP])
+striplib=
+old_striplib=
+AC_MSG_CHECKING([whether stripping libraries is possible])
+if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  AC_MSG_RESULT([yes])
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+  darwin*)
+    if test -n "$STRIP" ; then
+      striplib="$STRIP -x"
+      old_striplib="$STRIP -S"
+      AC_MSG_RESULT([yes])
+    else
+      AC_MSG_RESULT([no])
+    fi
+    ;;
+  *)
+    AC_MSG_RESULT([no])
+    ;;
+  esac
+fi
+_LT_DECL([], [old_striplib], [1], [Commands to strip libraries])
+_LT_DECL([], [striplib], [1])
+])# _LT_CMD_STRIPLIB
+
+
+# _LT_SYS_DYNAMIC_LINKER([TAG])
+# -----------------------------
+# PORTME Fill in your ld.so characteristics
+m4_defun([_LT_SYS_DYNAMIC_LINKER],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_OBJDUMP])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_CHECK_SHELL_FEATURES])dnl
+AC_MSG_CHECKING([dynamic linker characteristics])
+m4_if([$1],
+       [], [
+if test "$GCC" = yes; then
+  case $host_os in
+    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
+    *) lt_awk_arg="/^libraries:/" ;;
+  esac
+  case $host_os in
+    mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;;
+    *) lt_sed_strip_eq="s,=/,/,g" ;;
+  esac
+  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
+  case $lt_search_path_spec in
+  *\;*)
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
+    ;;
+  *)
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
+    ;;
+  esac
+  # Ok, now we have the path, separated by spaces, we can step through it
+  # and add multilib dir if necessary.
+  lt_tmp_lt_search_path_spec=
+  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
+  for lt_sys_path in $lt_search_path_spec; do
+    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+    else
+      test -d "$lt_sys_path" && \
+       lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
+    fi
+  done
+  lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
+BEGIN {RS=" "; FS="/|\n";} {
+  lt_foo="";
+  lt_count=0;
+  for (lt_i = NF; lt_i > 0; lt_i--) {
+    if ($lt_i != "" && $lt_i != ".") {
+      if ($lt_i == "..") {
+        lt_count++;
+      } else {
+        if (lt_count == 0) {
+          lt_foo="/" $lt_i lt_foo;
+        } else {
+          lt_count--;
+        }
+      }
+    }
+  }
+  if (lt_foo != "") { lt_freq[[lt_foo]]++; }
+  if (lt_freq[[lt_foo]] == 1) { print lt_foo; }
+}'`
+  # AWK program above erroneously prepends '/' to C:/dos/paths
+  # for these hosts.
+  case $host_os in
+    mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
+      $SED 's,/\([[A-Za-z]]:\),\1,g'` ;;
+  esac
+  sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi])
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[[4-9]]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[[01]] | aix4.[[01]].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+          echo ' yes '
+          echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+       :
+      else
+       can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[[45]]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+m4_if([$1], [],[
+      sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"])
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+m4_if([$1], [],[
+  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"])
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[[23]].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[[01]]* | freebsdelf3.[[01]]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \
+  freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[[3-9]]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+       if test "$lt_cv_prog_gnu_ld" = yes; then
+               version_type=linux # correct to gnu/linux during the next big refactor
+       else
+               version_type=irix
+       fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath],
+    [lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \
+        LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\""
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
+      [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null],
+        [lt_cv_shlibpath_overrides_runpath=yes])])
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+    ])
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[  ]*hwcap[        ]/d;s/[:,      ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[[89]] | openbsd2.[[89]].*)
+       shlibpath_overrides_runpath=no
+       ;;
+      *)
+       shlibpath_overrides_runpath=yes
+       ;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+       ;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+AC_MSG_RESULT([$dynamic_linker])
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+_LT_DECL([], [variables_saved_for_relink], [1],
+    [Variables whose values should be saved in libtool wrapper scripts and
+    restored at link time])
+_LT_DECL([], [need_lib_prefix], [0],
+    [Do we need the "lib" prefix for modules?])
+_LT_DECL([], [need_version], [0], [Do we need a version for libraries?])
+_LT_DECL([], [version_type], [0], [Library versioning type])
+_LT_DECL([], [runpath_var], [0],  [Shared library runtime path variable])
+_LT_DECL([], [shlibpath_var], [0],[Shared library path variable])
+_LT_DECL([], [shlibpath_overrides_runpath], [0],
+    [Is shlibpath searched before the hard-coded library search path?])
+_LT_DECL([], [libname_spec], [1], [Format of library name prefix])
+_LT_DECL([], [library_names_spec], [1],
+    [[List of archive names.  First name is the real one, the rest are links.
+    The last name is the one that the linker finds with -lNAME]])
+_LT_DECL([], [soname_spec], [1],
+    [[The coded name of the library, if different from the real name]])
+_LT_DECL([], [install_override_mode], [1],
+    [Permission mode override for installation of shared libraries])
+_LT_DECL([], [postinstall_cmds], [2],
+    [Command to use after installation of a shared archive])
+_LT_DECL([], [postuninstall_cmds], [2],
+    [Command to use after uninstallation of a shared archive])
+_LT_DECL([], [finish_cmds], [2],
+    [Commands used to finish a libtool library installation in a directory])
+_LT_DECL([], [finish_eval], [1],
+    [[As "finish_cmds", except a single script fragment to be evaled but
+    not shown]])
+_LT_DECL([], [hardcode_into_libs], [0],
+    [Whether we should hardcode library paths into libraries])
+_LT_DECL([], [sys_lib_search_path_spec], [2],
+    [Compile-time system search path for libraries])
+_LT_DECL([], [sys_lib_dlsearch_path_spec], [2],
+    [Run-time system search path for libraries])
+])# _LT_SYS_DYNAMIC_LINKER
+
+
+# _LT_PATH_TOOL_PREFIX(TOOL)
+# --------------------------
+# find a file program which can recognize shared library
+AC_DEFUN([_LT_PATH_TOOL_PREFIX],
+[m4_require([_LT_DECL_EGREP])dnl
+AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
+[case $MAGIC_CMD in
+[[\\/*] |  ?:[\\/]*])
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word.  This closes a longstanding sh security hole.
+  ac_dummy="m4_if([$2], , $PATH, [$2])"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$1; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/$1"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           $EGREP "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac])
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  AC_MSG_RESULT($MAGIC_CMD)
+else
+  AC_MSG_RESULT(no)
+fi
+_LT_DECL([], [MAGIC_CMD], [0],
+        [Used to examine libraries when file_magic_cmd begins with "file"])dnl
+])# _LT_PATH_TOOL_PREFIX
+
+# Old name:
+AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], [])
+
+
+# _LT_PATH_MAGIC
+# --------------
+# find a file program which can recognize a shared library
+m4_defun([_LT_PATH_MAGIC],
+[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH)
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH)
+  else
+    MAGIC_CMD=:
+  fi
+fi
+])# _LT_PATH_MAGIC
+
+
+# LT_PATH_LD
+# ----------
+# find the pathname to the GNU or non-GNU linker
+AC_DEFUN([LT_PATH_LD],
+[AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_PROG_ECHO_BACKSLASH])dnl
+
+AC_ARG_WITH([gnu-ld],
+    [AS_HELP_STRING([--with-gnu-ld],
+       [assume the C compiler uses GNU ld @<:@default=no@:>@])],
+    [test "$withval" = no || with_gnu_ld=yes],
+    [with_gnu_ld=no])dnl
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  AC_MSG_CHECKING([for ld used by $CC])
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [[\\/]]* | ?:[[\\/]]*)
+      re_direlt='/[[^/]][[^/]]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+       ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  AC_MSG_CHECKING([for GNU ld])
+else
+  AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(lt_cv_path_LD,
+[if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+       test "$with_gnu_ld" != no && break
+       ;;
+      *)
+       test "$with_gnu_ld" != yes && break
+       ;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  AC_MSG_RESULT($LD)
+else
+  AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+_LT_PATH_LD_GNU
+AC_SUBST([LD])
+
+_LT_TAGDECL([], [LD], [1], [The linker used to build libraries])
+])# LT_PATH_LD
+
+# Old names:
+AU_ALIAS([AM_PROG_LD], [LT_PATH_LD])
+AU_ALIAS([AC_PROG_LD], [LT_PATH_LD])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_PROG_LD], [])
+dnl AC_DEFUN([AC_PROG_LD], [])
+
+
+# _LT_PATH_LD_GNU
+#- --------------
+m4_defun([_LT_PATH_LD_GNU],
+[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac])
+with_gnu_ld=$lt_cv_prog_gnu_ld
+])# _LT_PATH_LD_GNU
+
+
+# _LT_CMD_RELOAD
+# --------------
+# find reload flag for linker
+#   -- PORTME Some linkers may need a different reload flag.
+m4_defun([_LT_CMD_RELOAD],
+[AC_CACHE_CHECK([for $LD option to reload object files],
+  lt_cv_ld_reload_flag,
+  [lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    if test "$GCC" != yes; then
+      reload_cmds=false
+    fi
+    ;;
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+_LT_TAGDECL([], [reload_flag], [1], [How to create reloadable object files])dnl
+_LT_TAGDECL([], [reload_cmds], [2])dnl
+])# _LT_CMD_RELOAD
+
+
+# _LT_CHECK_MAGIC_METHOD
+# ----------------------
+# how to check for library dependencies
+#  -- PORTME fill in with the dynamic library characteristics
+m4_defun([_LT_CHECK_MAGIC_METHOD],
+[m4_require([_LT_DECL_EGREP])
+m4_require([_LT_DECL_OBJDUMP])
+AC_CACHE_CHECK([how to recognize dependent libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix[[4-9]]*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[[45]]*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump',
+  # unless we find 'file', for example because we are cross-compiling.
+  # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
+  if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
+    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+    lt_cv_file_magic_cmd='func_win32_libid'
+  else
+    # Keep this pattern in sync with the one in func_win32_libid.
+    lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
+    lt_cv_file_magic_cmd='$OBJDUMP -f'
+  fi
+  ;;
+
+cegcc*)
+  # use the weaker test based on 'objdump'. See mingw*.
+  lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | dragonfly*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+haiku*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]']
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix[[3-9]]*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+*nto* | *qnx*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+rdos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+tpf*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+])
+
+file_magic_glob=
+want_nocaseglob=no
+if test "$build" = "$host"; then
+  case $host_os in
+  mingw* | pw32*)
+    if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
+      want_nocaseglob=yes
+    else
+      file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"`
+    fi
+    ;;
+  esac
+fi
+
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+_LT_DECL([], [deplibs_check_method], [1],
+    [Method to check whether dependent libraries are shared objects])
+_LT_DECL([], [file_magic_cmd], [1],
+    [Command to use when deplibs_check_method = "file_magic"])
+_LT_DECL([], [file_magic_glob], [1],
+    [How to find potential files when deplibs_check_method = "file_magic"])
+_LT_DECL([], [want_nocaseglob], [1],
+    [Find potential files using nocaseglob when deplibs_check_method = "file_magic"])
+])# _LT_CHECK_MAGIC_METHOD
+
+
+# LT_PATH_NM
+# ----------
+# find the pathname to a BSD- or MS-compatible name lister
+AC_DEFUN([LT_PATH_NM],
+[AC_REQUIRE([AC_PROG_CC])dnl
+AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM,
+[if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+       # Check to see if the nm accepts a BSD-compat flag.
+       # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+       #   nm: unknown option "B" ignored
+       # Tru64's nm complains that /dev/null is an invalid object file
+       case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+       */dev/null* | *'Invalid file or object type'*)
+         lt_cv_path_NM="$tmp_nm -B"
+         break
+         ;;
+       *)
+         case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+         */dev/null*)
+           lt_cv_path_NM="$tmp_nm -p"
+           break
+           ;;
+         *)
+           lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+           continue # so that we can try to find one that supports BSD flags
+           ;;
+         esac
+         ;;
+       esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  : ${lt_cv_path_NM=no}
+fi])
+if test "$lt_cv_path_NM" != "no"; then
+  NM="$lt_cv_path_NM"
+else
+  # Didn't find any BSD compatible name lister, look for dumpbin.
+  if test -n "$DUMPBIN"; then :
+    # Let the user override the test.
+  else
+    AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :)
+    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
+    *COFF*)
+      DUMPBIN="$DUMPBIN -symbols"
+      ;;
+    *)
+      DUMPBIN=:
+      ;;
+    esac
+  fi
+  AC_SUBST([DUMPBIN])
+  if test "$DUMPBIN" != ":"; then
+    NM="$DUMPBIN"
+  fi
+fi
+test -z "$NM" && NM=nm
+AC_SUBST([NM])
+_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl
+
+AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface],
+  [lt_cv_nm_interface="BSD nm"
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD)
+  (eval "$ac_compile" 2>conftest.err)
+  cat conftest.err >&AS_MESSAGE_LOG_FD
+  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD)
+  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
+  cat conftest.err >&AS_MESSAGE_LOG_FD
+  (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD)
+  cat conftest.out >&AS_MESSAGE_LOG_FD
+  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
+    lt_cv_nm_interface="MS dumpbin"
+  fi
+  rm -f conftest*])
+])# LT_PATH_NM
+
+# Old names:
+AU_ALIAS([AM_PROG_NM], [LT_PATH_NM])
+AU_ALIAS([AC_PROG_NM], [LT_PATH_NM])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_PROG_NM], [])
+dnl AC_DEFUN([AC_PROG_NM], [])
+
+# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
+# --------------------------------
+# how to determine the name of the shared library
+# associated with a specific link library.
+#  -- PORTME fill in with the dynamic library characteristics
+m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB],
+[m4_require([_LT_DECL_EGREP])
+m4_require([_LT_DECL_OBJDUMP])
+m4_require([_LT_DECL_DLLTOOL])
+AC_CACHE_CHECK([how to associate runtime and link libraries],
+lt_cv_sharedlib_from_linklib_cmd,
+[lt_cv_sharedlib_from_linklib_cmd='unknown'
+
+case $host_os in
+cygwin* | mingw* | pw32* | cegcc*)
+  # two different shell functions defined in ltmain.sh
+  # decide which to use based on capabilities of $DLLTOOL
+  case `$DLLTOOL --help 2>&1` in
+  *--identify-strict*)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
+    ;;
+  *)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
+    ;;
+  esac
+  ;;
+*)
+  # fallback: assume linklib IS sharedlib
+  lt_cv_sharedlib_from_linklib_cmd="$ECHO"
+  ;;
+esac
+])
+sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
+test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
+
+_LT_DECL([], [sharedlib_from_linklib_cmd], [1],
+    [Command to associate shared and link libraries])
+])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
+
+
+# _LT_PATH_MANIFEST_TOOL
+# ----------------------
+# locate the manifest tool
+m4_defun([_LT_PATH_MANIFEST_TOOL],
+[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :)
+test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
+AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool],
+  [lt_cv_path_mainfest_tool=no
+  echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD
+  $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
+  cat conftest.err >&AS_MESSAGE_LOG_FD
+  if $GREP 'Manifest Tool' conftest.out > /dev/null; then
+    lt_cv_path_mainfest_tool=yes
+  fi
+  rm -f conftest*])
+if test "x$lt_cv_path_mainfest_tool" != xyes; then
+  MANIFEST_TOOL=:
+fi
+_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl
+])# _LT_PATH_MANIFEST_TOOL
+
+
+# LT_LIB_M
+# --------
+# check for math library
+AC_DEFUN([LT_LIB_M],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case $host in
+*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*)
+  # These system don't have libm, or don't need it
+  ;;
+*-ncr-sysv4.3*)
+  AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+  AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm")
+  ;;
+*)
+  AC_CHECK_LIB(m, cos, LIBM="-lm")
+  ;;
+esac
+AC_SUBST([LIBM])
+])# LT_LIB_M
+
+# Old name:
+AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_CHECK_LIBM], [])
+
+
+# _LT_COMPILER_NO_RTTI([TAGNAME])
+# -------------------------------
+m4_defun([_LT_COMPILER_NO_RTTI],
+[m4_require([_LT_TAG_COMPILER])dnl
+
+_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+
+if test "$GCC" = yes; then
+  case $cc_basename in
+  nvcc*)
+    _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;;
+  *)
+    _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;;
+  esac
+
+  _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions],
+    lt_cv_prog_compiler_rtti_exceptions,
+    [-fno-rtti -fno-exceptions], [],
+    [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"])
+fi
+_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1],
+       [Compiler flag to turn off builtin functions])
+])# _LT_COMPILER_NO_RTTI
+
+
+# _LT_CMD_GLOBAL_SYMBOLS
+# ----------------------
+m4_defun([_LT_CMD_GLOBAL_SYMBOLS],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([LT_PATH_NM])dnl
+AC_REQUIRE([LT_PATH_LD])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_TAG_COMPILER])dnl
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+AC_MSG_CHECKING([command to parse $NM output from $compiler object])
+AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe],
+[
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[[BCDEGRST]]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[[BCDT]]'
+  ;;
+cygwin* | mingw* | pw32* | cegcc*)
+  symcode='[[ABCDGISTW]]'
+  ;;
+hpux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[[ABCDEGRST]]'
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[[BCDEGRST]]'
+  ;;
+osf*)
+  symcode='[[BCDEGQRST]]'
+  ;;
+solaris*)
+  symcode='[[BDRT]]'
+  ;;
+sco3.2v5*)
+  symcode='[[DT]]'
+  ;;
+sysv4.2uw2*)
+  symcode='[[DT]]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[[ABDT]]'
+  ;;
+sysv4)
+  symcode='[[DFNSTU]]'
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[[ABCDGIRSTW]]' ;;
+esac
+
+# Transform an extracted symbol line into a proper C declaration.
+# Some systems (esp. on ia64) link data and code symbols differently,
+# so use this general approach.
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (void *) \&\2},/p'"
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/  {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"lib\2\", (void *) \&\2},/p'"
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# Try without a prefix underscore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+    # Fake it for dumpbin and say T for any non-static function
+    # and D for any global variable.
+    # Also find C++ and __fastcall symbols from MSVC++,
+    # which start with @ or ?.
+    lt_cv_sys_global_symbol_pipe="$AWK ['"\
+"     {last_section=section; section=\$ 3};"\
+"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
+"     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
+"     \$ 0!~/External *\|/{next};"\
+"     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
+"     {if(hide[section]) next};"\
+"     {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
+"     {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
+"     s[1]~/^[@?]/{print s[1], s[1]; next};"\
+"     s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
+"     ' prfx=^$ac_symprfx]"
+  else
+    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[    ]]\($symcode$symcode*\)[[       ]][[    ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+  fi
+  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<_LT_EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(void);
+void nm_test_func(void){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+_LT_EOF
+
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+       mv -f "$nlist"T "$nlist"
+      else
+       rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
+       if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
+         cat <<_LT_EOF > conftest.$ac_ext
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT@&t@_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT@&t@_DLSYM_CONST
+#else
+# define LT@&t@_DLSYM_CONST const
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_LT_EOF
+         # Now generate the symbol file.
+         eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
+
+         cat <<_LT_EOF >> conftest.$ac_ext
+
+/* The mapping between symbol names and symbols.  */
+LT@&t@_DLSYM_CONST struct {
+  const char *name;
+  void       *address;
+}
+lt__PROGRAM__LTX_preloaded_symbols[[]] =
+{
+  { "@PROGRAM@", (void *) 0 },
+_LT_EOF
+         $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
+         cat <<\_LT_EOF >> conftest.$ac_ext
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt__PROGRAM__LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+_LT_EOF
+         # Now try linking the two files.
+         mv conftest.$ac_objext conftstm.$ac_objext
+         lt_globsym_save_LIBS=$LIBS
+         lt_globsym_save_CFLAGS=$CFLAGS
+         LIBS="conftstm.$ac_objext"
+         CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)"
+         if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then
+           pipe_works=yes
+         fi
+         LIBS=$lt_globsym_save_LIBS
+         CFLAGS=$lt_globsym_save_CFLAGS
+       else
+         echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD
+       fi
+      else
+       echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
+    fi
+  else
+    echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD
+    cat conftest.$ac_ext >&5
+  fi
+  rm -rf conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+])
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  AC_MSG_RESULT(failed)
+else
+  AC_MSG_RESULT(ok)
+fi
+
+# Response file support.
+if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+  nm_file_list_spec='@'
+elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then
+  nm_file_list_spec='@'
+fi
+
+_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1],
+    [Take the output of nm and produce a listing of raw symbols and C names])
+_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1],
+    [Transform the output of nm in a proper C declaration])
+_LT_DECL([global_symbol_to_c_name_address],
+    [lt_cv_sys_global_symbol_to_c_name_address], [1],
+    [Transform the output of nm in a C name address pair])
+_LT_DECL([global_symbol_to_c_name_address_lib_prefix],
+    [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1],
+    [Transform the output of nm in a C name address pair when lib prefix is needed])
+_LT_DECL([], [nm_file_list_spec], [1],
+    [Specify filename containing input files for $NM])
+]) # _LT_CMD_GLOBAL_SYMBOLS
+
+
+# _LT_COMPILER_PIC([TAGNAME])
+# ---------------------------
+m4_defun([_LT_COMPILER_PIC],
+[m4_require([_LT_TAG_COMPILER])dnl
+_LT_TAGVAR(lt_prog_compiler_wl, $1)=
+_LT_TAGVAR(lt_prog_compiler_pic, $1)=
+_LT_TAGVAR(lt_prog_compiler_static, $1)=
+
+m4_if([$1], [CXX], [
+  # C++ specific cases for pic, static, wl, etc.
+  if test "$GXX" = yes; then
+    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+    aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    mingw* | cygwin* | os2* | pw32* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      m4_if([$1], [GCJ], [],
+       [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+    *djgpp*)
+      # DJGPP does not support shared libraries at all
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+      ;;
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)=
+      ;;
+    interix[[3-9]]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+       ;;
+      *)
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+       ;;
+      esac
+      ;;
+    *qnx* | *nto*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+      ;;
+    *)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+  else
+    case $host_os in
+      aix[[4-9]]*)
+       # All AIX code is PIC.
+       if test "$host_cpu" = ia64; then
+         # AIX 5 now supports IA64 processor
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+       else
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+       fi
+       ;;
+      chorus*)
+       case $cc_basename in
+       cxch68*)
+         # Green Hills C++ Compiler
+         # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
+         ;;
+       esac
+       ;;
+      mingw* | cygwin* | os2* | pw32* | cegcc*)
+       # This hack is so that the source file can tell whether it is being
+       # built for inclusion in a dll (and should export symbols for example).
+       m4_if([$1], [GCJ], [],
+         [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+       ;;
+      dgux*)
+       case $cc_basename in
+         ec++*)
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+           ;;
+         ghcx*)
+           # Green Hills C++ Compiler
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      freebsd* | dragonfly*)
+       # FreeBSD uses GNU C++
+       ;;
+      hpux9* | hpux10* | hpux11*)
+       case $cc_basename in
+         CC*)
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+           if test "$host_cpu" != ia64; then
+             _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+           fi
+           ;;
+         aCC*)
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+           case $host_cpu in
+           hppa*64*|ia64*)
+             # +Z the default
+             ;;
+           *)
+             _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+             ;;
+           esac
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      interix*)
+       # This is c89, which is MS Visual C++ (no shared libs)
+       # Anyone wants to do a port?
+       ;;
+      irix5* | irix6* | nonstopux*)
+       case $cc_basename in
+         CC*)
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+           # CC pic flag -KPIC is the default.
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+       case $cc_basename in
+         KCC*)
+           # KAI C++ Compiler
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+           ;;
+         ecpc* )
+           # old Intel C++ for x86_64 which still supported -KPIC.
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+           ;;
+         icpc* )
+           # Intel C++, used to be incompatible with GCC.
+           # ICC 10 doesn't accept -KPIC any more.
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+           ;;
+         pgCC* | pgcpp*)
+           # Portland Group C++ compiler
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+           ;;
+         cxx*)
+           # Compaq C++
+           # Make sure the PIC flag is empty.  It appears that all Alpha
+           # Linux and Compaq Tru64 Unix objects are PIC.
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+           ;;
+         xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*)
+           # IBM XL 8.0, 9.0 on PPC and BlueGene
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
+           ;;
+         *)
+           case `$CC -V 2>&1 | sed 5q` in
+           *Sun\ C*)
+             # Sun C++ 5.9
+             _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+             _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+             _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+             ;;
+           esac
+           ;;
+       esac
+       ;;
+      lynxos*)
+       ;;
+      m88k*)
+       ;;
+      mvs*)
+       case $cc_basename in
+         cxx*)
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      netbsd*)
+       ;;
+      *qnx* | *nto*)
+        # QNX uses GNU C++, but need to define -shared option too, otherwise
+        # it will coredump.
+        _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+        ;;
+      osf3* | osf4* | osf5*)
+       case $cc_basename in
+         KCC*)
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+           ;;
+         RCC*)
+           # Rational C++ 2.4.1
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+           ;;
+         cxx*)
+           # Digital/Compaq C++
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           # Make sure the PIC flag is empty.  It appears that all Alpha
+           # Linux and Compaq Tru64 Unix objects are PIC.
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      psos*)
+       ;;
+      solaris*)
+       case $cc_basename in
+         CC* | sunCC*)
+           # Sun C++ 4.2, 5.x and Centerline C++
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+           ;;
+         gcx*)
+           # Green Hills C++ Compiler
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      sunos4*)
+       case $cc_basename in
+         CC*)
+           # Sun C++ 4.x
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+           ;;
+         lcc*)
+           # Lucid
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+       case $cc_basename in
+         CC*)
+           _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+           _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+           ;;
+       esac
+       ;;
+      tandem*)
+       case $cc_basename in
+         NCC*)
+           # NonStop-UX NCC 3.20
+           _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      vxworks*)
+       ;;
+      *)
+       _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+       ;;
+    esac
+  fi
+],
+[
+  if test "$GCC" = yes; then
+    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      m4_if([$1], [GCJ], [],
+       [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)=
+      ;;
+
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+       # +Z the default
+       ;;
+      *)
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+       ;;
+      esac
+      ;;
+
+    interix[[3-9]]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      enable_shared=no
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+
+    *)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+
+    case $cc_basename in
+    nvcc*) # Cuda Compiler Driver 2.2
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
+      if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+        _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
+      fi
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      else
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      m4_if([$1], [GCJ], [],
+       [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+       # +Z the default
+       ;;
+      *)
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+       ;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC (with -KPIC) is the default.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+      # old Intel for x86_64 which still supported -KPIC.
+      ecc*)
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+        ;;
+      # icc used to be incompatible with GCC.
+      # ICC 10 doesn't accept -KPIC any more.
+      icc* | ifort*)
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+        ;;
+      # Lahey Fortran 8.1.
+      lf95*)
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='--static'
+       ;;
+      nagfor*)
+       # NAG Fortran compiler
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,'
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+       ;;
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+       # which looks to be a dead project)
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+        ;;
+      ccc*)
+        _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+        # All Alpha code is PIC.
+        _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+        ;;
+      xl* | bgxl* | bgf* | mpixl*)
+       # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
+       ;;
+      *)
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
+         # Sun Fortran 8.3 passes all unrecognized flags to the linker
+         _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+         _LT_TAGVAR(lt_prog_compiler_wl, $1)=''
+         ;;
+       *Sun\ F* | *Sun*Fortran*)
+         _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+         _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+         ;;
+       *Sun\ C*)
+         # Sun C 5.9
+         _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+         _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+         ;;
+        *Intel*\ [[CF]]*Compiler*)
+         _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+         _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+         ;;
+       *Portland\ Group*)
+         _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+         _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+         _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+         ;;
+       esac
+       ;;
+      esac
+      ;;
+
+    newsos6)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # All OSF/1 code is PIC.
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    rdos*)
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    solaris*)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';;
+      *)
+       _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+       _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic'
+       _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    unicos*)
+      _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+
+    uts4*)
+      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+      _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    *)
+      _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+    esac
+  fi
+])
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    _LT_TAGVAR(lt_prog_compiler_pic, $1)=
+    ;;
+  *)
+    _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])"
+    ;;
+esac
+
+AC_CACHE_CHECK([for $compiler option to produce PIC],
+  [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)],
+  [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)])
+_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+  _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works],
+    [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)],
+    [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [],
+    [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in
+     "" | " "*) ;;
+     *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;;
+     esac],
+    [_LT_TAGVAR(lt_prog_compiler_pic, $1)=
+     _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no])
+fi
+_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1],
+       [Additional compiler flags for building library objects])
+
+_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1],
+       [How to pass a linker flag through the compiler])
+#
+# Check to make sure the static flag actually works.
+#
+wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\"
+_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works],
+  _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1),
+  $lt_tmp_static_flag,
+  [],
+  [_LT_TAGVAR(lt_prog_compiler_static, $1)=])
+_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1],
+       [Compiler flag to prevent dynamic linking])
+])# _LT_COMPILER_PIC
+
+
+# _LT_LINKER_SHLIBS([TAGNAME])
+# ----------------------------
+# See if the linker supports building shared libraries.
+m4_defun([_LT_LINKER_SHLIBS],
+[AC_REQUIRE([LT_PATH_LD])dnl
+AC_REQUIRE([LT_PATH_NM])dnl
+m4_require([_LT_PATH_MANIFEST_TOOL])dnl
+m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_DECL_SED])dnl
+m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
+m4_require([_LT_TAG_COMPILER])dnl
+AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+m4_if([$1], [CXX], [
+  _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
+  case $host_os in
+  aix[[4-9]]*)
+    # If we're using GNU nm, then we don't want the "-C" option.
+    # -C means demangle to AIX nm, but means don't demangle with GNU nm
+    # Also, AIX nm treats weak defined symbols like other global defined
+    # symbols, whereas GNU nm marks them as "W".
+    if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    else
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    fi
+    ;;
+  pw32*)
+    _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds"
+    ;;
+  cygwin* | mingw* | cegcc*)
+    case $cc_basename in
+    cl*)
+      _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+      ;;
+    *)
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
+      _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
+      ;;
+    esac
+    ;;
+  *)
+    _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+    ;;
+  esac
+], [
+  runpath_var=
+  _LT_TAGVAR(allow_undefined_flag, $1)=
+  _LT_TAGVAR(always_export_symbols, $1)=no
+  _LT_TAGVAR(archive_cmds, $1)=
+  _LT_TAGVAR(archive_expsym_cmds, $1)=
+  _LT_TAGVAR(compiler_needs_object, $1)=no
+  _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+  _LT_TAGVAR(export_dynamic_flag_spec, $1)=
+  _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  _LT_TAGVAR(hardcode_automatic, $1)=no
+  _LT_TAGVAR(hardcode_direct, $1)=no
+  _LT_TAGVAR(hardcode_direct_absolute, $1)=no
+  _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+  _LT_TAGVAR(hardcode_libdir_separator, $1)=
+  _LT_TAGVAR(hardcode_minus_L, $1)=no
+  _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+  _LT_TAGVAR(inherit_rpath, $1)=no
+  _LT_TAGVAR(link_all_deplibs, $1)=unknown
+  _LT_TAGVAR(module_cmds, $1)=
+  _LT_TAGVAR(module_expsym_cmds, $1)=
+  _LT_TAGVAR(old_archive_from_new_cmds, $1)=
+  _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)=
+  _LT_TAGVAR(thread_safe_flag_spec, $1)=
+  _LT_TAGVAR(whole_archive_flag_spec, $1)=
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  _LT_TAGVAR(include_expsyms, $1)=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  # Exclude shared library initialization/finalization symbols.
+dnl Note also adjust exclude_expsyms for C++ above.
+  extract_expsyms_cmds=
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  _LT_TAGVAR(ld_shlibs, $1)=yes
+
+  # On some targets, GNU ld is compatible enough with the native linker
+  # that we're better off using the native interface for both.
+  lt_use_gnu_ld_interface=no
+  if test "$with_gnu_ld" = yes; then
+    case $host_os in
+      aix*)
+       # The AIX port of GNU ld has always aspired to compatibility
+       # with the native linker.  However, as the warning in the GNU ld
+       # block says, versions before 2.19.5* couldn't really create working
+       # shared libraries, regardless of the interface used.
+       case `$LD -v 2>&1` in
+         *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
+         *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;;
+         *\ \(GNU\ Binutils\)\ [[3-9]]*) ;;
+         *)
+           lt_use_gnu_ld_interface=yes
+           ;;
+       esac
+       ;;
+      *)
+       lt_use_gnu_ld_interface=yes
+       ;;
+    esac
+  fi
+
+  if test "$lt_use_gnu_ld_interface" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+    _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
+      _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      _LT_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>&1` in
+      *GNU\ gold*) supports_anon_versioning=yes ;;
+      *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix[[3-9]]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+       _LT_TAGVAR(ld_shlibs, $1)=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.19, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to install binutils
+*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
+*** You will then need to restart the configuration process.
+
+_LT_EOF
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            _LT_TAGVAR(archive_expsym_cmds, $1)=''
+        ;;
+      m68k)
+            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+            _LT_TAGVAR(hardcode_minus_L, $1)=yes
+        ;;
+      esac
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+       # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+       # support --undefined.  This deserves some investigation.  FIXME
+       _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+       _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+      # as there is no search path for DLLs.
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
+      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_TAGVAR(always_export_symbols, $1)=no
+      _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+      _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
+      _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
+
+      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+       # If the export-symbols file already is a .def file (1st line
+       # is EXPORTS), use it as is; otherwise, prepend...
+       _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+         cp $export_symbols $output_objdir/$soname.def;
+       else
+         echo EXPORTS > $output_objdir/$soname.def;
+         cat $export_symbols >> $output_objdir/$soname.def;
+       fi~
+       $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+       _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    haiku*)
+      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    interix[[3-9]]*)
+      _LT_TAGVAR(hardcode_direct, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      tmp_diet=no
+      if test "$host_os" = linux-dietlibc; then
+       case $cc_basename in
+         diet\ *) tmp_diet=yes;;       # linux-dietlibc with static linking (!diet-dyn)
+       esac
+      fi
+      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
+        && test "$tmp_diet" = no
+      then
+       tmp_addflag=' $pic_flag'
+       tmp_sharedflag='-shared'
+       case $cc_basename,$host_cpu in
+        pgcc*)                         # Portland Group C compiler
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag'
+         ;;
+       pgf77* | pgf90* | pgf95* | pgfortran*)
+                                       # Portland Group f77 and f90 compilers
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag -Mnomain' ;;
+       ecc*,ia64* | icc*,ia64*)        # Intel C compiler on ia64
+         tmp_addflag=' -i_dynamic' ;;
+       efc*,ia64* | ifort*,ia64*)      # Intel Fortran compiler on ia64
+         tmp_addflag=' -i_dynamic -nofor_main' ;;
+       ifc* | ifort*)                  # Intel Fortran compiler
+         tmp_addflag=' -nofor_main' ;;
+       lf95*)                          # Lahey Fortran 8.1
+         _LT_TAGVAR(whole_archive_flag_spec, $1)=
+         tmp_sharedflag='--shared' ;;
+       xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below)
+         tmp_sharedflag='-qmkshrobj'
+         tmp_addflag= ;;
+       nvcc*)  # Cuda Compiler Driver 2.2
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         _LT_TAGVAR(compiler_needs_object, $1)=yes
+         ;;
+       esac
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ C*)                       # Sun C 5.9
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         _LT_TAGVAR(compiler_needs_object, $1)=yes
+         tmp_sharedflag='-G' ;;
+       *Sun\ F*)                       # Sun Fortran 8.3
+         tmp_sharedflag='-G' ;;
+       esac
+       _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+        if test "x$supports_anon_versioning" = xyes; then
+          _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
+           cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+           echo "local: *; };" >> $output_objdir/$libname.ver~
+           $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+        fi
+
+       case $cc_basename in
+       xlf* | bgf* | bgxlf* | mpixlf*)
+         # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+         _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
+         if test "x$supports_anon_versioning" = xyes; then
+           _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
+             cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+             echo "local: *; };" >> $output_objdir/$libname.ver~
+             $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
+         fi
+         ;;
+       esac
+      else
+        _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+       wlarc=
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
+       _LT_TAGVAR(ld_shlibs, $1)=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+       _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*)
+       _LT_TAGVAR(ld_shlibs, $1)=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+       ;;
+       *)
+         # For security reasons, it is highly recommended that you always
+         # use absolute paths for naming shared libraries, and exclude the
+         # DT_RUNPATH tag from executables and libraries.  But doing so
+         # requires that you compile everything twice, which is a pain.
+         if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+           _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+         else
+           _LT_TAGVAR(ld_shlibs, $1)=no
+         fi
+       ;;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+       _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+    esac
+
+    if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then
+      runpath_var=
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)=
+      _LT_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_TAGVAR(always_export_symbols, $1)=yes
+      _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+       # Neither direct hardcoding nor static linking is supported with a
+       # broken collect2.
+       _LT_TAGVAR(hardcode_direct, $1)=unsupported
+      fi
+      ;;
+
+    aix[[4-9]]*)
+      if test "$host_cpu" = ia64; then
+       # On IA64, the linker does run time linking by default, so we don't
+       # have to do anything special.
+       aix_use_runtimelinking=no
+       exp_sym_flag='-Bexport'
+       no_entry_flag=""
+      else
+       # If we're using GNU nm, then we don't want the "-C" option.
+       # -C means demangle to AIX nm, but means don't demangle with GNU nm
+       # Also, AIX nm treats weak defined symbols like other global
+       # defined symbols, whereas GNU nm marks them as "W".
+       if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+         _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       else
+         _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       fi
+       aix_use_runtimelinking=no
+
+       # Test if we are trying to use run time linking or normal
+       # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+       # need to do runtime linking.
+       case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
+         for ld_flag in $LDFLAGS; do
+         if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+           aix_use_runtimelinking=yes
+           break
+         fi
+         done
+         ;;
+       esac
+
+       exp_sym_flag='-bexport'
+       no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      _LT_TAGVAR(archive_cmds, $1)=''
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
+
+      if test "$GCC" = yes; then
+       case $host_os in aix4.[[012]]|aix4.[[012]].*)
+       # We only want to do this on AIX 4.2 and lower, the check
+       # below for broken collect2 doesn't work under 4.3+
+         collect2name=`${CC} -print-prog-name=collect2`
+         if test -f "$collect2name" &&
+          strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+         then
+         # We have reworked collect2
+         :
+         else
+         # We have old collect2
+         _LT_TAGVAR(hardcode_direct, $1)=unsupported
+         # It fails to find uninstalled libraries when the uninstalled
+         # path is not listed in the libpath.  Setting hardcode_minus_L
+         # to unsupported forces relinking
+         _LT_TAGVAR(hardcode_minus_L, $1)=yes
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+         _LT_TAGVAR(hardcode_libdir_separator, $1)=
+         fi
+         ;;
+       esac
+       shared_flag='-shared'
+       if test "$aix_use_runtimelinking" = yes; then
+         shared_flag="$shared_flag "'${wl}-G'
+       fi
+      else
+       # not using gcc
+       if test "$host_cpu" = ia64; then
+       # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+       # chokes on -Wl,-G. The following line is correct:
+         shared_flag='-G'
+       else
+         if test "$aix_use_runtimelinking" = yes; then
+           shared_flag='${wl}-G'
+         else
+           shared_flag='${wl}-bM:SRE'
+         fi
+       fi
+      fi
+
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      _LT_TAGVAR(always_export_symbols, $1)=yes
+      if test "$aix_use_runtimelinking" = yes; then
+       # Warning - without using the other runtime loading flags (-brtl),
+       # -berok will link without error, but may produce a broken library.
+       _LT_TAGVAR(allow_undefined_flag, $1)='-berok'
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        _LT_SYS_MODULE_PATH_AIX([$1])
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+        _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+      else
+       if test "$host_cpu" = ia64; then
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+         _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+         _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+       else
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        _LT_SYS_MODULE_PATH_AIX([$1])
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+         # Warning - without using the other run time loading flags,
+         # -berok will link without error, but may produce a broken library.
+         _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+         _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+         if test "$with_gnu_ld" = yes; then
+           # We only use this code for GNU lds that support --whole-archive.
+           _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+         else
+           # Exported symbols can be pulled into shared objects from archives
+           _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+         fi
+         _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+         # This is similar to how AIX traditionally builds its shared libraries.
+         _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+       fi
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            _LT_TAGVAR(archive_expsym_cmds, $1)=''
+        ;;
+      m68k)
+            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+            _LT_TAGVAR(hardcode_minus_L, $1)=yes
+        ;;
+      esac
+      ;;
+
+    bsdi[[45]]*)
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      case $cc_basename in
+      cl*)
+       # Native MSVC
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+       _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+       _LT_TAGVAR(always_export_symbols, $1)=yes
+       _LT_TAGVAR(file_list_spec, $1)='@'
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+       _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+           sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+         else
+           sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+         fi~
+         $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+         linknames='
+       # The linker will not automatically build a static lib if we build a DLL.
+       # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
+       _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+       _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+       _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
+       # Don't use ranlib
+       _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
+       _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
+         lt_tool_outputfile="@TOOL_OUTPUT@"~
+         case $lt_outputfile in
+           *.exe|*.EXE) ;;
+           *)
+             lt_outputfile="$lt_outputfile.exe"
+             lt_tool_outputfile="$lt_tool_outputfile.exe"
+             ;;
+         esac~
+         if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+           $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+           $RM "$lt_outputfile.manifest";
+         fi'
+       ;;
+      *)
+       # Assume MSVC wrapper
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+       _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
+       # The linker will automatically build a .lib file if we build a DLL.
+       _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
+       # FIXME: Should let the user specify the lib program.
+       _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs'
+       _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+       ;;
+      esac
+      ;;
+
+    darwin* | rhapsody*)
+      _LT_DARWIN_LINKER_FEATURES($1)
+      ;;
+
+    dgux*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | dragonfly*)
+      _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+       _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+       _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+       _LT_TAGVAR(hardcode_direct, $1)=yes
+       _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+       _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+       # hardcode_minus_L: Not really in the search PATH,
+       # but as the default location of the library.
+       _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       case $host_cpu in
+       hppa*64*)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       ia64*)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       esac
+      else
+       case $host_cpu in
+       hppa*64*)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       ia64*)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+       m4_if($1, [], [
+         # Older versions of the 11.00 compiler do not understand -b yet
+         # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
+         _LT_LINKER_OPTION([if $CC understands -b],
+           _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b],
+           [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'],
+           [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])],
+         [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'])
+         ;;
+       esac
+      fi
+      if test "$with_gnu_ld" = no; then
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+       _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+       case $host_cpu in
+       hppa*64*|ia64*)
+         _LT_TAGVAR(hardcode_direct, $1)=no
+         _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+         ;;
+       *)
+         _LT_TAGVAR(hardcode_direct, $1)=yes
+         _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+         _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+
+         # hardcode_minus_L: Not really in the search PATH,
+         # but as the default location of the library.
+         _LT_TAGVAR(hardcode_minus_L, $1)=yes
+         ;;
+       esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       # Try to use the -exported_symbol ld option, if it does not
+       # work, assume that -exports_file does not work either and
+       # implicitly export all symbols.
+       # This should be the same for all languages, so no per-tag cache variable.
+       AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol],
+         [lt_cv_irix_exported_symbol],
+         [save_LDFLAGS="$LDFLAGS"
+          LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
+          AC_LINK_IFELSE(
+            [AC_LANG_SOURCE(
+               [AC_LANG_CASE([C], [[int foo (void) { return 0; }]],
+                             [C++], [[int foo (void) { return 0; }]],
+                             [Fortran 77], [[
+      subroutine foo
+      end]],
+                             [Fortran], [[
+      subroutine foo
+      end]])])],
+             [lt_cv_irix_exported_symbol=yes],
+             [lt_cv_irix_exported_symbol=no])
+           LDFLAGS="$save_LDFLAGS"])
+       if test "$lt_cv_irix_exported_symbol" = yes; then
+          _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
+       fi
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
+      fi
+      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_TAGVAR(inherit_rpath, $1)=yes
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    newsos6)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *nto* | *qnx*)
+      ;;
+
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+       _LT_TAGVAR(hardcode_direct, $1)=yes
+       _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+       _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+       if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+         _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+         _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+         _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+       else
+         case $host_os in
+          openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
+            _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+            ;;
+          *)
+            _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+            _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+            ;;
+         esac
+       fi
+      else
+       _LT_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    os2*)
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+       _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+       _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    osf4* | osf5*)     # as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+       _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      else
+       _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
+       $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
+
+       # Both c and cxx compiler support -rpath directly
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+      fi
+      _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    solaris*)
+      _LT_TAGVAR(no_undefined_flag, $1)=' -z defs'
+      if test "$GCC" = yes; then
+       wlarc='${wl}'
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+      else
+       case `$CC -V 2>&1` in
+       *"Compilers 5.0"*)
+         wlarc=''
+         _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
+         ;;
+       *)
+         wlarc='${wl}'
+         _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
+         _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+         ;;
+       esac
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      case $host_os in
+      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+      *)
+       # The compiler driver will combine and reorder linker options,
+       # but understands `-z linker_flag'.  GCC discards it without `$wl',
+       # but is careful enough not to reorder.
+       # Supported since Solaris 2.6 (maybe 2.5.1?)
+       if test "$GCC" = yes; then
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+       else
+         _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
+       fi
+       ;;
+      esac
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+       # Use $CC to link under sequent, because it throws in some extra .o
+       # files that make .init and .fini sections work.
+       _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_direct, $1)=yes
+      _LT_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+       sni)
+         _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true???
+       ;;
+       siemens)
+         ## LD is ld it makes a PLAMLIB
+         ## CC just makes a GrossModule.
+         _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+         _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs'
+         _LT_TAGVAR(hardcode_direct, $1)=no
+        ;;
+       motorola)
+         _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie
+       ;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4.3*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+       _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+       runpath_var=LD_RUN_PATH
+       hardcode_runpath_var=yes
+       _LT_TAGVAR(ld_shlibs, $1)=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
+      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
+      _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_TAGVAR(link_all_deplibs, $1)=yes
+      _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      _LT_TAGVAR(ld_shlibs, $1)=no
+      ;;
+    esac
+
+    if test x$host_vendor = xsni; then
+      case $host in
+      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+       _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym'
+       ;;
+      esac
+    fi
+  fi
+])
+AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
+test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld
+
+_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl
+_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl
+_LT_DECL([], [extract_expsyms_cmds], [2],
+    [The commands to extract the exported symbol list from a shared archive])
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in
+x|xyes)
+  # Assume -lc should be added
+  _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $_LT_TAGVAR(archive_cmds, $1) in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      AC_CACHE_CHECK([whether -lc should be explicitly linked in],
+       [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1),
+       [$RM conftest*
+       echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+       if AC_TRY_EVAL(ac_compile) 2>conftest.err; then
+         soname=conftest
+         lib=conftest
+         libobjs=conftest.$ac_objext
+         deplibs=
+         wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1)
+         pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1)
+         compiler_flags=-v
+         linker_flags=-v
+         verstring=
+         output_objdir=.
+         libname=conftest
+         lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1)
+         _LT_TAGVAR(allow_undefined_flag, $1)=
+         if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1)
+         then
+           lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+         else
+           lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+         fi
+         _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag
+       else
+         cat conftest.err 1>&5
+       fi
+       $RM conftest*
+       ])
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0],
+    [Whether or not to add -lc for building shared libraries])
+_LT_TAGDECL([allow_libtool_libs_with_static_runtimes],
+    [enable_shared_with_static_runtimes], [0],
+    [Whether or not to disallow shared libs when runtime libs are static])
+_LT_TAGDECL([], [export_dynamic_flag_spec], [1],
+    [Compiler flag to allow reflexive dlopens])
+_LT_TAGDECL([], [whole_archive_flag_spec], [1],
+    [Compiler flag to generate shared objects directly from archives])
+_LT_TAGDECL([], [compiler_needs_object], [1],
+    [Whether the compiler copes with passing no objects directly])
+_LT_TAGDECL([], [old_archive_from_new_cmds], [2],
+    [Create an old-style archive from a shared archive])
+_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2],
+    [Create a temporary old-style archive to link instead of a shared archive])
+_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive])
+_LT_TAGDECL([], [archive_expsym_cmds], [2])
+_LT_TAGDECL([], [module_cmds], [2],
+    [Commands used to build a loadable module if different from building
+    a shared archive.])
+_LT_TAGDECL([], [module_expsym_cmds], [2])
+_LT_TAGDECL([], [with_gnu_ld], [1],
+    [Whether we are building with GNU ld or not])
+_LT_TAGDECL([], [allow_undefined_flag], [1],
+    [Flag that allows shared libraries with undefined symbols to be built])
+_LT_TAGDECL([], [no_undefined_flag], [1],
+    [Flag that enforces no undefined symbols])
+_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
+    [Flag to hardcode $libdir into a binary during linking.
+    This must work even if $libdir does not exist])
+_LT_TAGDECL([], [hardcode_libdir_separator], [1],
+    [Whether we need a single "-rpath" flag with a separated argument])
+_LT_TAGDECL([], [hardcode_direct], [0],
+    [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
+    DIR into the resulting binary])
+_LT_TAGDECL([], [hardcode_direct_absolute], [0],
+    [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
+    DIR into the resulting binary and the resulting library dependency is
+    "absolute", i.e impossible to change by setting ${shlibpath_var} if the
+    library is relocated])
+_LT_TAGDECL([], [hardcode_minus_L], [0],
+    [Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+    into the resulting binary])
+_LT_TAGDECL([], [hardcode_shlibpath_var], [0],
+    [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+    into the resulting binary])
+_LT_TAGDECL([], [hardcode_automatic], [0],
+    [Set to "yes" if building a shared library automatically hardcodes DIR
+    into the library and all subsequent libraries and executables linked
+    against it])
+_LT_TAGDECL([], [inherit_rpath], [0],
+    [Set to yes if linker adds runtime paths of dependent libraries
+    to runtime path list])
+_LT_TAGDECL([], [link_all_deplibs], [0],
+    [Whether libtool must link a program against all its dependency libraries])
+_LT_TAGDECL([], [always_export_symbols], [0],
+    [Set to "yes" if exported symbols are required])
+_LT_TAGDECL([], [export_symbols_cmds], [2],
+    [The commands to list exported symbols])
+_LT_TAGDECL([], [exclude_expsyms], [1],
+    [Symbols that should not be listed in the preloaded symbols])
+_LT_TAGDECL([], [include_expsyms], [1],
+    [Symbols that must always be exported])
+_LT_TAGDECL([], [prelink_cmds], [2],
+    [Commands necessary for linking programs (against libraries) with templates])
+_LT_TAGDECL([], [postlink_cmds], [2],
+    [Commands necessary for finishing linking programs])
+_LT_TAGDECL([], [file_list_spec], [1],
+    [Specify filename containing input files])
+dnl FIXME: Not yet implemented
+dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1],
+dnl    [Compiler flag to generate thread safe objects])
+])# _LT_LINKER_SHLIBS
+
+
+# _LT_LANG_C_CONFIG([TAG])
+# ------------------------
+# Ensure that the configuration variables for a C compiler are suitably
+# defined.  These variables are subsequently used by _LT_CONFIG to write
+# the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_C_CONFIG],
+[m4_require([_LT_DECL_EGREP])dnl
+lt_save_CC="$CC"
+AC_LANG_PUSH(C)
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}'
+
+_LT_TAG_COMPILER
+# Save the default compiler, since it gets overwritten when the other
+# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
+compiler_DEFAULT=$CC
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+if test -n "$compiler"; then
+  _LT_COMPILER_NO_RTTI($1)
+  _LT_COMPILER_PIC($1)
+  _LT_COMPILER_C_O($1)
+  _LT_COMPILER_FILE_LOCKS($1)
+  _LT_LINKER_SHLIBS($1)
+  _LT_SYS_DYNAMIC_LINKER($1)
+  _LT_LINKER_HARDCODE_LIBPATH($1)
+  LT_SYS_DLOPEN_SELF
+  _LT_CMD_STRIPLIB
+
+  # Report which library types will actually be built
+  AC_MSG_CHECKING([if libtool supports shared libraries])
+  AC_MSG_RESULT([$can_build_shared])
+
+  AC_MSG_CHECKING([whether to build shared libraries])
+  test "$can_build_shared" = "no" && enable_shared=no
+
+  # On AIX, shared libraries and static libraries use the same namespace, and
+  # are all built from PIC.
+  case $host_os in
+  aix3*)
+    test "$enable_shared" = yes && enable_static=no
+    if test -n "$RANLIB"; then
+      archive_cmds="$archive_cmds~\$RANLIB \$lib"
+      postinstall_cmds='$RANLIB $lib'
+    fi
+    ;;
+
+  aix[[4-9]]*)
+    if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+      test "$enable_shared" = yes && enable_static=no
+    fi
+    ;;
+  esac
+  AC_MSG_RESULT([$enable_shared])
+
+  AC_MSG_CHECKING([whether to build static libraries])
+  # Make sure either enable_shared or enable_static is yes.
+  test "$enable_shared" = yes || enable_static=yes
+  AC_MSG_RESULT([$enable_static])
+
+  _LT_CONFIG($1)
+fi
+AC_LANG_POP
+CC="$lt_save_CC"
+])# _LT_LANG_C_CONFIG
+
+
+# _LT_LANG_CXX_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for a C++ compiler are suitably
+# defined.  These variables are subsequently used by _LT_CONFIG to write
+# the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_CXX_CONFIG],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+m4_require([_LT_DECL_EGREP])dnl
+m4_require([_LT_PATH_MANIFEST_TOOL])dnl
+if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+    (test "X$CXX" != "Xg++"))) ; then
+  AC_PROG_CXXCPP
+else
+  _lt_caught_CXX_error=yes
+fi
+
+AC_LANG_PUSH(C++)
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_TAGVAR(allow_undefined_flag, $1)=
+_LT_TAGVAR(always_export_symbols, $1)=no
+_LT_TAGVAR(archive_expsym_cmds, $1)=
+_LT_TAGVAR(compiler_needs_object, $1)=no
+_LT_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_TAGVAR(hardcode_direct, $1)=no
+_LT_TAGVAR(hardcode_direct_absolute, $1)=no
+_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_TAGVAR(hardcode_minus_L, $1)=no
+_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+_LT_TAGVAR(hardcode_automatic, $1)=no
+_LT_TAGVAR(inherit_rpath, $1)=no
+_LT_TAGVAR(module_cmds, $1)=
+_LT_TAGVAR(module_expsym_cmds, $1)=
+_LT_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+_LT_TAGVAR(no_undefined_flag, $1)=
+_LT_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for C++ test sources.
+ac_ext=cpp
+
+# Object file extension for compiled C++ test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# No sense in running all these tests if we already determined that
+# the CXX compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_caught_CXX_error" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="int some_variable = 0;"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }'
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+  _LT_TAG_COMPILER
+
+  # save warnings/boilerplate of simple test code
+  _LT_COMPILER_BOILERPLATE
+  _LT_LINKER_BOILERPLATE
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC=$CC
+  lt_save_CFLAGS=$CFLAGS
+  lt_save_LD=$LD
+  lt_save_GCC=$GCC
+  GCC=$GXX
+  lt_save_with_gnu_ld=$with_gnu_ld
+  lt_save_path_LD=$lt_cv_path_LD
+  if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
+    lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
+  else
+    $as_unset lt_cv_prog_gnu_ld
+  fi
+  if test -n "${lt_cv_path_LDCXX+set}"; then
+    lt_cv_path_LD=$lt_cv_path_LDCXX
+  else
+    $as_unset lt_cv_path_LD
+  fi
+  test -z "${LDCXX+set}" || LD=$LDCXX
+  CC=${CXX-"c++"}
+  CFLAGS=$CXXFLAGS
+  compiler=$CC
+  _LT_TAGVAR(compiler, $1)=$CC
+  _LT_CC_BASENAME([$compiler])
+
+  if test -n "$compiler"; then
+    # We don't want -fno-exception when compiling C++ code, so set the
+    # no_builtin_flag separately
+    if test "$GXX" = yes; then
+      _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
+    else
+      _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+    fi
+
+    if test "$GXX" = yes; then
+      # Set up default GNU C++ configuration
+
+      LT_PATH_LD
+
+      # Check if GNU C++ uses GNU ld as the underlying linker, since the
+      # archiving commands below assume that GNU ld is being used.
+      if test "$with_gnu_ld" = yes; then
+        _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+        # If archive_cmds runs LD, not CC, wlarc should be empty
+        # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
+        #     investigate it a little bit more. (MM)
+        wlarc='${wl}'
+
+        # ancient GNU ld didn't support --whole-archive et. al.
+        if eval "`$CC -print-prog-name=ld` --help 2>&1" |
+         $GREP 'no-whole-archive' > /dev/null; then
+          _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+        else
+          _LT_TAGVAR(whole_archive_flag_spec, $1)=
+        fi
+      else
+        with_gnu_ld=no
+        wlarc=
+
+        # A generic and very simple default shared library creation
+        # command for GNU C++ for the case where it uses the native
+        # linker, instead of GNU ld.  If possible, this setting should
+        # overridden to take advantage of the native linker features on
+        # the platform it is being used on.
+        _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+      fi
+
+      # Commands to make compiler produce verbose output that lists
+      # what "hidden" libraries, object files and flags are used when
+      # linking a shared library.
+      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+    else
+      GXX=no
+      with_gnu_ld=no
+      wlarc=
+    fi
+
+    # PORTME: fill in a description of your system's C++ link characteristics
+    AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+    _LT_TAGVAR(ld_shlibs, $1)=yes
+    case $host_os in
+      aix3*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+      aix[[4-9]]*)
+        if test "$host_cpu" = ia64; then
+          # On IA64, the linker does run time linking by default, so we don't
+          # have to do anything special.
+          aix_use_runtimelinking=no
+          exp_sym_flag='-Bexport'
+          no_entry_flag=""
+        else
+          aix_use_runtimelinking=no
+
+          # Test if we are trying to use run time linking or normal
+          # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+          # need to do runtime linking.
+          case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
+           for ld_flag in $LDFLAGS; do
+             case $ld_flag in
+             *-brtl*)
+               aix_use_runtimelinking=yes
+               break
+               ;;
+             esac
+           done
+           ;;
+          esac
+
+          exp_sym_flag='-bexport'
+          no_entry_flag='-bnoentry'
+        fi
+
+        # When large executables or shared objects are built, AIX ld can
+        # have problems creating the table of contents.  If linking a library
+        # or program results in "error TOC overflow" add -mminimal-toc to
+        # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+        # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+        _LT_TAGVAR(archive_cmds, $1)=''
+        _LT_TAGVAR(hardcode_direct, $1)=yes
+        _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+        _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+        _LT_TAGVAR(link_all_deplibs, $1)=yes
+        _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
+
+        if test "$GXX" = yes; then
+          case $host_os in aix4.[[012]]|aix4.[[012]].*)
+          # We only want to do this on AIX 4.2 and lower, the check
+          # below for broken collect2 doesn't work under 4.3+
+         collect2name=`${CC} -print-prog-name=collect2`
+         if test -f "$collect2name" &&
+            strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+         then
+           # We have reworked collect2
+           :
+         else
+           # We have old collect2
+           _LT_TAGVAR(hardcode_direct, $1)=unsupported
+           # It fails to find uninstalled libraries when the uninstalled
+           # path is not listed in the libpath.  Setting hardcode_minus_L
+           # to unsupported forces relinking
+           _LT_TAGVAR(hardcode_minus_L, $1)=yes
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+           _LT_TAGVAR(hardcode_libdir_separator, $1)=
+         fi
+          esac
+          shared_flag='-shared'
+         if test "$aix_use_runtimelinking" = yes; then
+           shared_flag="$shared_flag "'${wl}-G'
+         fi
+        else
+          # not using gcc
+          if test "$host_cpu" = ia64; then
+         # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+         # chokes on -Wl,-G. The following line is correct:
+         shared_flag='-G'
+          else
+           if test "$aix_use_runtimelinking" = yes; then
+             shared_flag='${wl}-G'
+           else
+             shared_flag='${wl}-bM:SRE'
+           fi
+          fi
+        fi
+
+        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
+        # It seems that -bexpall does not export symbols beginning with
+        # underscore (_), so it is better to generate a list of symbols to
+       # export.
+        _LT_TAGVAR(always_export_symbols, $1)=yes
+        if test "$aix_use_runtimelinking" = yes; then
+          # Warning - without using the other runtime loading flags (-brtl),
+          # -berok will link without error, but may produce a broken library.
+          _LT_TAGVAR(allow_undefined_flag, $1)='-berok'
+          # Determine the default libpath from the value encoded in an empty
+          # executable.
+          _LT_SYS_MODULE_PATH_AIX([$1])
+          _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+
+          _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+        else
+          if test "$host_cpu" = ia64; then
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+           _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+           _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+          else
+           # Determine the default libpath from the value encoded in an
+           # empty executable.
+           _LT_SYS_MODULE_PATH_AIX([$1])
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+           # Warning - without using the other run time loading flags,
+           # -berok will link without error, but may produce a broken library.
+           _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+           _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+           if test "$with_gnu_ld" = yes; then
+             # We only use this code for GNU lds that support --whole-archive.
+             _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+           else
+             # Exported symbols can be pulled into shared objects from archives
+             _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+           fi
+           _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
+           # This is similar to how AIX traditionally builds its shared
+           # libraries.
+           _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+          fi
+        fi
+        ;;
+
+      beos*)
+       if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+         _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+         # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+         # support --undefined.  This deserves some investigation.  FIXME
+         _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       else
+         _LT_TAGVAR(ld_shlibs, $1)=no
+       fi
+       ;;
+
+      chorus*)
+        case $cc_basename in
+          *)
+         # FIXME: insert proper C++ library support
+         _LT_TAGVAR(ld_shlibs, $1)=no
+         ;;
+        esac
+        ;;
+
+      cygwin* | mingw* | pw32* | cegcc*)
+       case $GXX,$cc_basename in
+       ,cl* | no,cl*)
+         # Native MSVC
+         # hardcode_libdir_flag_spec is actually meaningless, as there is
+         # no search path for DLLs.
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+         _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+         _LT_TAGVAR(always_export_symbols, $1)=yes
+         _LT_TAGVAR(file_list_spec, $1)='@'
+         # Tell ltmain to make .lib files, not .a files.
+         libext=lib
+         # Tell ltmain to make .dll files, not .so files.
+         shrext_cmds=".dll"
+         # FIXME: Setting linknames here is a bad hack.
+         _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+         _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+             $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+           else
+             $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+           fi~
+           $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+           linknames='
+         # The linker will not automatically build a static lib if we build a DLL.
+         # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
+         _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+         # Don't use ranlib
+         _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
+         _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
+           lt_tool_outputfile="@TOOL_OUTPUT@"~
+           case $lt_outputfile in
+             *.exe|*.EXE) ;;
+             *)
+               lt_outputfile="$lt_outputfile.exe"
+               lt_tool_outputfile="$lt_tool_outputfile.exe"
+               ;;
+           esac~
+           func_to_tool_file "$lt_outputfile"~
+           if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+             $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+             $RM "$lt_outputfile.manifest";
+           fi'
+         ;;
+       *)
+         # g++
+         # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+         # as there is no search path for DLLs.
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+         _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
+         _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
+         _LT_TAGVAR(always_export_symbols, $1)=no
+         _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+
+         if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+           _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+           # If the export-symbols file already is a .def file (1st line
+           # is EXPORTS), use it as is; otherwise, prepend...
+           _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+             cp $export_symbols $output_objdir/$soname.def;
+           else
+             echo EXPORTS > $output_objdir/$soname.def;
+             cat $export_symbols >> $output_objdir/$soname.def;
+           fi~
+           $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+         else
+           _LT_TAGVAR(ld_shlibs, $1)=no
+         fi
+         ;;
+       esac
+       ;;
+      darwin* | rhapsody*)
+        _LT_DARWIN_LINKER_FEATURES($1)
+       ;;
+
+      dgux*)
+        case $cc_basename in
+          ec++*)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          ghcx*)
+           # Green Hills C++ Compiler
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          *)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+        esac
+        ;;
+
+      freebsd2.*)
+        # C++ shared libraries reported to be fairly broken before
+       # switch to ELF
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+
+      freebsd-elf*)
+        _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+        ;;
+
+      freebsd* | dragonfly*)
+        # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
+        # conventions
+        _LT_TAGVAR(ld_shlibs, $1)=yes
+        ;;
+
+      gnu*)
+        ;;
+
+      haiku*)
+        _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        _LT_TAGVAR(link_all_deplibs, $1)=yes
+        ;;
+
+      hpux9*)
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+        _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+        _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+        _LT_TAGVAR(hardcode_direct, $1)=yes
+        _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+                                            # but as the default
+                                            # location of the library.
+
+        case $cc_basename in
+          CC*)
+            # FIXME: insert proper C++ library support
+            _LT_TAGVAR(ld_shlibs, $1)=no
+            ;;
+          aCC*)
+            _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            # Commands to make compiler produce verbose output that lists
+            # what "hidden" libraries, object files and flags are used when
+            # linking a shared library.
+            #
+            # There doesn't appear to be a way to prevent this compiler from
+            # explicitly linking system object files so we need to strip them
+            # from the output so that they don't get included in the library
+            # dependencies.
+            output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+            ;;
+          *)
+            if test "$GXX" = yes; then
+              _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            else
+              # FIXME: insert proper C++ library support
+              _LT_TAGVAR(ld_shlibs, $1)=no
+            fi
+            ;;
+        esac
+        ;;
+
+      hpux10*|hpux11*)
+        if test $with_gnu_ld = no; then
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+         _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+          case $host_cpu in
+            hppa*64*|ia64*)
+              ;;
+            *)
+             _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+              ;;
+          esac
+        fi
+        case $host_cpu in
+          hppa*64*|ia64*)
+            _LT_TAGVAR(hardcode_direct, $1)=no
+            _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+            ;;
+          *)
+            _LT_TAGVAR(hardcode_direct, $1)=yes
+            _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+            _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+                                                # but as the default
+                                                # location of the library.
+            ;;
+        esac
+
+        case $cc_basename in
+          CC*)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          aCC*)
+           case $host_cpu in
+             hppa*64*)
+               _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+               ;;
+             ia64*)
+               _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+               ;;
+             *)
+               _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+               ;;
+           esac
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+           ;;
+          *)
+           if test "$GXX" = yes; then
+             if test $with_gnu_ld = no; then
+               case $host_cpu in
+                 hppa*64*)
+                   _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+                   ;;
+                 ia64*)
+                   _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+                   ;;
+                 *)
+                   _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+                   ;;
+               esac
+             fi
+           else
+             # FIXME: insert proper C++ library support
+             _LT_TAGVAR(ld_shlibs, $1)=no
+           fi
+           ;;
+        esac
+        ;;
+
+      interix[[3-9]]*)
+       _LT_TAGVAR(hardcode_direct, $1)=no
+       _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+       _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+       # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+       # Instead, shared libraries are loaded at an image base (0x10000000 by
+       # default) and relocated if they conflict, which is a slow very memory
+       # consuming and fragmenting process.  To avoid this, we pick a random,
+       # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+       # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+       _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+       ;;
+      irix5* | irix6*)
+        case $cc_basename in
+          CC*)
+           # SGI C++
+           _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+
+           # Archives containing C++ object files must be created using
+           # "CC -ar", where "CC" is the IRIX C++ compiler.  This is
+           # necessary to make sure instantiated templates are included
+           # in the archive.
+           _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs'
+           ;;
+          *)
+           if test "$GXX" = yes; then
+             if test "$with_gnu_ld" = no; then
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+             else
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
+             fi
+           fi
+           _LT_TAGVAR(link_all_deplibs, $1)=yes
+           ;;
+        esac
+        _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+        _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+        _LT_TAGVAR(inherit_rpath, $1)=yes
+        ;;
+
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+        case $cc_basename in
+          KCC*)
+           # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+           # KCC will only create a shared library if the output file
+           # ends with ".so" (or ".sl" for HP-UX), so rename the library
+           # to its proper name (with version) after linking.
+           _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+           _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+           _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+           # Archives containing C++ object files must be created using
+           # "CC -Bstatic", where "CC" is the KAI C++ compiler.
+           _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
+           ;;
+         icpc* | ecpc* )
+           # Intel C++
+           with_gnu_ld=yes
+           # version 8.0 and above of icpc choke on multiply defined symbols
+           # if we add $predep_objects and $postdep_objects, however 7.1 and
+           # earlier do not add the objects themselves.
+           case `$CC -V 2>&1` in
+             *"Version 7."*)
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+               _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+               ;;
+             *)  # Version 8.0 or newer
+               tmp_idyn=
+               case $host_cpu in
+                 ia64*) tmp_idyn=' -i_dynamic';;
+               esac
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+               _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+               ;;
+           esac
+           _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+           _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+           _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+           ;;
+          pgCC* | pgcpp*)
+            # Portland Group C++ compiler
+           case `$CC -V` in
+           *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*)
+             _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
+               compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
+             _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
+               $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
+               $RANLIB $oldlib'
+             _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+               $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+             _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+               $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+             ;;
+           *) # Version 6 and above use weak symbols
+             _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+             _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+             ;;
+           esac
+
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+           _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+           _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+            ;;
+         cxx*)
+           # Compaq C++
+           _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
+
+           runpath_var=LD_RUN_PATH
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+           _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
+           ;;
+         xl* | mpixl* | bgxl*)
+           # IBM XL 8.0 on PPC, with GNU ld
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+           _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+           _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           if test "x$supports_anon_versioning" = xyes; then
+             _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
+               cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+               echo "local: *; };" >> $output_objdir/$libname.ver~
+               $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+           fi
+           ;;
+         *)
+           case `$CC -V 2>&1 | sed 5q` in
+           *Sun\ C*)
+             # Sun C++ 5.9
+             _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
+             _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+             _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
+             _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+             _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+             _LT_TAGVAR(compiler_needs_object, $1)=yes
+
+             # Not sure whether something based on
+             # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
+             # would be better.
+             output_verbose_link_cmd='func_echo_all'
+
+             # Archives containing C++ object files must be created using
+             # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+             # necessary to make sure instantiated templates are included
+             # in the archive.
+             _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
+             ;;
+           esac
+           ;;
+       esac
+       ;;
+
+      lynxos*)
+        # FIXME: insert proper C++ library support
+       _LT_TAGVAR(ld_shlibs, $1)=no
+       ;;
+
+      m88k*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+       ;;
+
+      mvs*)
+        case $cc_basename in
+          cxx*)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+         *)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+       esac
+       ;;
+
+      netbsd*)
+        if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+         _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
+         wlarc=
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+         _LT_TAGVAR(hardcode_direct, $1)=yes
+         _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+       fi
+       # Workaround some broken pre-1.5 toolchains
+       output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
+       ;;
+
+      *nto* | *qnx*)
+        _LT_TAGVAR(ld_shlibs, $1)=yes
+       ;;
+
+      openbsd2*)
+        # C++ shared libraries are fairly broken
+       _LT_TAGVAR(ld_shlibs, $1)=no
+       ;;
+
+      openbsd*)
+       if test -f /usr/libexec/ld.so; then
+         _LT_TAGVAR(hardcode_direct, $1)=yes
+         _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+         _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
+         _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+         _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+         if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+           _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+           _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+           _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+         fi
+         output_verbose_link_cmd=func_echo_all
+       else
+         _LT_TAGVAR(ld_shlibs, $1)=no
+       fi
+       ;;
+
+      osf3* | osf4* | osf5*)
+        case $cc_basename in
+          KCC*)
+           # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+           # KCC will only create a shared library if the output file
+           # ends with ".so" (or ".sl" for HP-UX), so rename the library
+           # to its proper name (with version) after linking.
+           _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+           _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+           # Archives containing C++ object files must be created using
+           # the KAI C++ compiler.
+           case $host in
+             osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;;
+             *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;;
+           esac
+           ;;
+          RCC*)
+           # Rational C++ 2.4.1
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          cxx*)
+           case $host in
+             osf3*)
+               _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+               _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+               ;;
+             *)
+               _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+               _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
+                 echo "-hidden">> $lib.exp~
+                 $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp  `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
+                 $RM $lib.exp'
+               _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+               ;;
+           esac
+
+           _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+           ;;
+         *)
+           if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+             _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+             case $host in
+               osf3*)
+                 _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+                 ;;
+               *)
+                 _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+                 ;;
+             esac
+
+             _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+             _LT_TAGVAR(hardcode_libdir_separator, $1)=:
+
+             # Commands to make compiler produce verbose output that lists
+             # what "hidden" libraries, object files and flags are used when
+             # linking a shared library.
+             output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+           else
+             # FIXME: insert proper C++ library support
+             _LT_TAGVAR(ld_shlibs, $1)=no
+           fi
+           ;;
+        esac
+        ;;
+
+      psos*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+
+      sunos4*)
+        case $cc_basename in
+          CC*)
+           # Sun C++ 4.x
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          lcc*)
+           # Lucid
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          *)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+        esac
+        ;;
+
+      solaris*)
+        case $cc_basename in
+          CC* | sunCC*)
+           # Sun C++ 4.2, 5.x and Centerline C++
+            _LT_TAGVAR(archive_cmds_need_lc,$1)=yes
+           _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
+           _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+           _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+             $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+           _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+           _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+           case $host_os in
+             solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+             *)
+               # The compiler driver will combine and reorder linker options,
+               # but understands `-z linker_flag'.
+               # Supported since Solaris 2.6 (maybe 2.5.1?)
+               _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
+               ;;
+           esac
+           _LT_TAGVAR(link_all_deplibs, $1)=yes
+
+           output_verbose_link_cmd='func_echo_all'
+
+           # Archives containing C++ object files must be created using
+           # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+           # necessary to make sure instantiated templates are included
+           # in the archive.
+           _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
+           ;;
+          gcx*)
+           # Green Hills C++ Compiler
+           _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+
+           # The C++ compiler must be used to create the archive.
+           _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
+           ;;
+          *)
+           # GNU C++ compiler with Solaris linker
+           if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+             _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs'
+             if $CC --version | $GREP -v '^2\.7' > /dev/null; then
+               _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+               _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+                 $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+               # Commands to make compiler produce verbose output that lists
+               # what "hidden" libraries, object files and flags are used when
+               # linking a shared library.
+               output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+             else
+               # g++ 2.7 appears to require `-G' NOT `-shared' on this
+               # platform.
+               _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+               _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+                 $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+               # Commands to make compiler produce verbose output that lists
+               # what "hidden" libraries, object files and flags are used when
+               # linking a shared library.
+               output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+             fi
+
+             _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir'
+             case $host_os in
+               solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+               *)
+                 _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+                 ;;
+             esac
+           fi
+           ;;
+        esac
+        ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
+      _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+      runpath_var='LD_RUN_PATH'
+
+      case $cc_basename in
+        CC*)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+         _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+      esac
+      ;;
+
+      sysv5* | sco3.2v5* | sco5v6*)
+       # Note: We can NOT use -z defs as we might desire, because we do not
+       # link with -lc, and that would cause any symbols used from libc to
+       # always be unresolved, which means just about no library would
+       # ever link correctly.  If we're not using GNU ld we use -z text
+       # though, which does catch some bad symbols but isn't as heavy-handed
+       # as -z defs.
+       _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+       _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+       _LT_TAGVAR(archive_cmds_need_lc, $1)=no
+       _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
+       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
+       _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
+       _LT_TAGVAR(link_all_deplibs, $1)=yes
+       _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+       runpath_var='LD_RUN_PATH'
+
+       case $cc_basename in
+          CC*)
+           _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~
+             '"$_LT_TAGVAR(old_archive_cmds, $1)"
+           _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~
+             '"$_LT_TAGVAR(reload_cmds, $1)"
+           ;;
+         *)
+           _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           ;;
+       esac
+      ;;
+
+      tandem*)
+        case $cc_basename in
+          NCC*)
+           # NonStop-UX NCC 3.20
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+          *)
+           # FIXME: insert proper C++ library support
+           _LT_TAGVAR(ld_shlibs, $1)=no
+           ;;
+        esac
+        ;;
+
+      vxworks*)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+
+      *)
+        # FIXME: insert proper C++ library support
+        _LT_TAGVAR(ld_shlibs, $1)=no
+        ;;
+    esac
+
+    AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
+    test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+    _LT_TAGVAR(GCC, $1)="$GXX"
+    _LT_TAGVAR(LD, $1)="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    _LT_SYS_HIDDEN_LIBDEPS($1)
+    _LT_COMPILER_PIC($1)
+    _LT_COMPILER_C_O($1)
+    _LT_COMPILER_FILE_LOCKS($1)
+    _LT_LINKER_SHLIBS($1)
+    _LT_SYS_DYNAMIC_LINKER($1)
+    _LT_LINKER_HARDCODE_LIBPATH($1)
+
+    _LT_CONFIG($1)
+  fi # test -n "$compiler"
+
+  CC=$lt_save_CC
+  CFLAGS=$lt_save_CFLAGS
+  LDCXX=$LD
+  LD=$lt_save_LD
+  GCC=$lt_save_GCC
+  with_gnu_ld=$lt_save_with_gnu_ld
+  lt_cv_path_LDCXX=$lt_cv_path_LD
+  lt_cv_path_LD=$lt_save_path_LD
+  lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
+  lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
+fi # test "$_lt_caught_CXX_error" != yes
+
+AC_LANG_POP
+])# _LT_LANG_CXX_CONFIG
+
+
+# _LT_FUNC_STRIPNAME_CNF
+# ----------------------
+# func_stripname_cnf prefix suffix name
+# strip PREFIX and SUFFIX off of NAME.
+# PREFIX and SUFFIX must not contain globbing or regex special
+# characters, hashes, percent signs, but SUFFIX may contain a leading
+# dot (in which case that matches only a dot).
+#
+# This function is identical to the (non-XSI) version of func_stripname,
+# except this one can be used by m4 code that may be executed by configure,
+# rather than the libtool script.
+m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl
+AC_REQUIRE([_LT_DECL_SED])
+AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])
+func_stripname_cnf ()
+{
+  case ${2} in
+  .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
+  *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
+  esac
+} # func_stripname_cnf
+])# _LT_FUNC_STRIPNAME_CNF
+
+# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME])
+# ---------------------------------
+# Figure out "hidden" library dependencies from verbose
+# compiler output when linking a shared library.
+# Parse the compiler output and extract the necessary
+# objects, libraries and library flags.
+m4_defun([_LT_SYS_HIDDEN_LIBDEPS],
+[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
+AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl
+# Dependencies to place before and after the object being linked:
+_LT_TAGVAR(predep_objects, $1)=
+_LT_TAGVAR(postdep_objects, $1)=
+_LT_TAGVAR(predeps, $1)=
+_LT_TAGVAR(postdeps, $1)=
+_LT_TAGVAR(compiler_lib_search_path, $1)=
+
+dnl we can't use the lt_simple_compile_test_code here,
+dnl because it contains code intended for an executable,
+dnl not a library.  It's possible we should let each
+dnl tag define a new lt_????_link_test_code variable,
+dnl but it's only used here...
+m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF
+int a;
+void foo (void) { a = 0; }
+_LT_EOF
+], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF
+class Foo
+{
+public:
+  Foo (void) { a = 0; }
+private:
+  int a;
+};
+_LT_EOF
+], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF
+      subroutine foo
+      implicit none
+      integer*4 a
+      a=0
+      return
+      end
+_LT_EOF
+], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF
+      subroutine foo
+      implicit none
+      integer a
+      a=0
+      return
+      end
+_LT_EOF
+], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF
+public class foo {
+  private int a;
+  public void bar (void) {
+    a = 0;
+  }
+};
+_LT_EOF
+], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
+package foo
+func foo() {
+}
+_LT_EOF
+])
+
+_lt_libdeps_save_CFLAGS=$CFLAGS
+case "$CC $CFLAGS " in #(
+*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
+*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
+esac
+
+dnl Parse the compiler output and extract the necessary
+dnl objects, libraries and library flags.
+if AC_TRY_EVAL(ac_compile); then
+  # Parse the compiler output and extract the necessary
+  # objects, libraries and library flags.
+
+  # Sentinel used to keep track of whether or not we are before
+  # the conftest object file.
+  pre_test_object_deps_done=no
+
+  for p in `eval "$output_verbose_link_cmd"`; do
+    case ${prev}${p} in
+
+    -L* | -R* | -l*)
+       # Some compilers place space between "-{L,R}" and the path.
+       # Remove the space.
+       if test $p = "-L" ||
+          test $p = "-R"; then
+        prev=$p
+        continue
+       fi
+
+       # Expand the sysroot to ease extracting the directories later.
+       if test -z "$prev"; then
+         case $p in
+         -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
+         -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
+         -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
+         esac
+       fi
+       case $p in
+       =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
+       esac
+       if test "$pre_test_object_deps_done" = no; then
+        case ${prev} in
+        -L | -R)
+          # Internal compiler library paths should come after those
+          # provided the user.  The postdeps already come after the
+          # user supplied libs so there is no need to process them.
+          if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then
+            _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}"
+          else
+            _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}"
+          fi
+          ;;
+        # The "-l" case would never come before the object being
+        # linked, so don't bother handling this case.
+        esac
+       else
+        if test -z "$_LT_TAGVAR(postdeps, $1)"; then
+          _LT_TAGVAR(postdeps, $1)="${prev}${p}"
+        else
+          _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}"
+        fi
+       fi
+       prev=
+       ;;
+
+    *.lto.$objext) ;; # Ignore GCC LTO objects
+    *.$objext)
+       # This assumes that the test object file only shows up
+       # once in the compiler output.
+       if test "$p" = "conftest.$objext"; then
+        pre_test_object_deps_done=yes
+        continue
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+        if test -z "$_LT_TAGVAR(predep_objects, $1)"; then
+          _LT_TAGVAR(predep_objects, $1)="$p"
+        else
+          _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p"
+        fi
+       else
+        if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then
+          _LT_TAGVAR(postdep_objects, $1)="$p"
+        else
+          _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p"
+        fi
+       fi
+       ;;
+
+    *) ;; # Ignore the rest.
+
+    esac
+  done
+
+  # Clean up.
+  rm -f a.out a.exe
+else
+  echo "libtool.m4: error: problem compiling $1 test program"
+fi
+
+$RM -f confest.$objext
+CFLAGS=$_lt_libdeps_save_CFLAGS
+
+# PORTME: override above test on systems where it is broken
+m4_if([$1], [CXX],
+[case $host_os in
+interix[[3-9]]*)
+  # Interix 3.5 installs completely hosed .la files for C++, so rather than
+  # hack all around it, let's just trust "g++" to DTRT.
+  _LT_TAGVAR(predep_objects,$1)=
+  _LT_TAGVAR(postdep_objects,$1)=
+  _LT_TAGVAR(postdeps,$1)=
+  ;;
+
+linux*)
+  case `$CC -V 2>&1 | sed 5q` in
+  *Sun\ C*)
+    # Sun C++ 5.9
+
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    if test "$solaris_use_stlport4" != yes; then
+      _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+
+solaris*)
+  case $cc_basename in
+  CC* | sunCC*)
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    # Adding this requires a known-good setup of shared libraries for
+    # Sun compiler versions before 5.6, else PIC objects from an old
+    # archive will be linked into the output, leading to subtle bugs.
+    if test "$solaris_use_stlport4" != yes; then
+      _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+esac
+])
+
+case " $_LT_TAGVAR(postdeps, $1) " in
+*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;;
+esac
+ _LT_TAGVAR(compiler_lib_search_dirs, $1)=
+if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then
+ _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
+fi
+_LT_TAGDECL([], [compiler_lib_search_dirs], [1],
+    [The directories searched by this compiler when creating a shared library])
+_LT_TAGDECL([], [predep_objects], [1],
+    [Dependencies to place before and after the objects being linked to
+    create a shared library])
+_LT_TAGDECL([], [postdep_objects], [1])
+_LT_TAGDECL([], [predeps], [1])
+_LT_TAGDECL([], [postdeps], [1])
+_LT_TAGDECL([], [compiler_lib_search_path], [1],
+    [The library search path used internally by the compiler when linking
+    a shared library])
+])# _LT_SYS_HIDDEN_LIBDEPS
+
+
+# _LT_LANG_F77_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for a Fortran 77 compiler are
+# suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_F77_CONFIG],
+[AC_LANG_PUSH(Fortran 77)
+if test -z "$F77" || test "X$F77" = "Xno"; then
+  _lt_disable_F77=yes
+fi
+
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_TAGVAR(allow_undefined_flag, $1)=
+_LT_TAGVAR(always_export_symbols, $1)=no
+_LT_TAGVAR(archive_expsym_cmds, $1)=
+_LT_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_TAGVAR(hardcode_direct, $1)=no
+_LT_TAGVAR(hardcode_direct_absolute, $1)=no
+_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_TAGVAR(hardcode_minus_L, $1)=no
+_LT_TAGVAR(hardcode_automatic, $1)=no
+_LT_TAGVAR(inherit_rpath, $1)=no
+_LT_TAGVAR(module_cmds, $1)=
+_LT_TAGVAR(module_expsym_cmds, $1)=
+_LT_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+_LT_TAGVAR(no_undefined_flag, $1)=
+_LT_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for f77 test sources.
+ac_ext=f
+
+# Object file extension for compiled f77 test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# No sense in running all these tests if we already determined that
+# the F77 compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_disable_F77" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="\
+      subroutine t
+      return
+      end
+"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code="\
+      program t
+      end
+"
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+  _LT_TAG_COMPILER
+
+  # save warnings/boilerplate of simple test code
+  _LT_COMPILER_BOILERPLATE
+  _LT_LINKER_BOILERPLATE
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC="$CC"
+  lt_save_GCC=$GCC
+  lt_save_CFLAGS=$CFLAGS
+  CC=${F77-"f77"}
+  CFLAGS=$FFLAGS
+  compiler=$CC
+  _LT_TAGVAR(compiler, $1)=$CC
+  _LT_CC_BASENAME([$compiler])
+  GCC=$G77
+  if test -n "$compiler"; then
+    AC_MSG_CHECKING([if libtool supports shared libraries])
+    AC_MSG_RESULT([$can_build_shared])
+
+    AC_MSG_CHECKING([whether to build shared libraries])
+    test "$can_build_shared" = "no" && enable_shared=no
+
+    # On AIX, shared libraries and static libraries use the same namespace, and
+    # are all built from PIC.
+    case $host_os in
+      aix3*)
+        test "$enable_shared" = yes && enable_static=no
+        if test -n "$RANLIB"; then
+          archive_cmds="$archive_cmds~\$RANLIB \$lib"
+          postinstall_cmds='$RANLIB $lib'
+        fi
+        ;;
+      aix[[4-9]]*)
+       if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+         test "$enable_shared" = yes && enable_static=no
+       fi
+        ;;
+    esac
+    AC_MSG_RESULT([$enable_shared])
+
+    AC_MSG_CHECKING([whether to build static libraries])
+    # Make sure either enable_shared or enable_static is yes.
+    test "$enable_shared" = yes || enable_static=yes
+    AC_MSG_RESULT([$enable_static])
+
+    _LT_TAGVAR(GCC, $1)="$G77"
+    _LT_TAGVAR(LD, $1)="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    _LT_COMPILER_PIC($1)
+    _LT_COMPILER_C_O($1)
+    _LT_COMPILER_FILE_LOCKS($1)
+    _LT_LINKER_SHLIBS($1)
+    _LT_SYS_DYNAMIC_LINKER($1)
+    _LT_LINKER_HARDCODE_LIBPATH($1)
+
+    _LT_CONFIG($1)
+  fi # test -n "$compiler"
+
+  GCC=$lt_save_GCC
+  CC="$lt_save_CC"
+  CFLAGS="$lt_save_CFLAGS"
+fi # test "$_lt_disable_F77" != yes
+
+AC_LANG_POP
+])# _LT_LANG_F77_CONFIG
+
+
+# _LT_LANG_FC_CONFIG([TAG])
+# -------------------------
+# Ensure that the configuration variables for a Fortran compiler are
+# suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_FC_CONFIG],
+[AC_LANG_PUSH(Fortran)
+
+if test -z "$FC" || test "X$FC" = "Xno"; then
+  _lt_disable_FC=yes
+fi
+
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_TAGVAR(allow_undefined_flag, $1)=
+_LT_TAGVAR(always_export_symbols, $1)=no
+_LT_TAGVAR(archive_expsym_cmds, $1)=
+_LT_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_TAGVAR(hardcode_direct, $1)=no
+_LT_TAGVAR(hardcode_direct_absolute, $1)=no
+_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_TAGVAR(hardcode_minus_L, $1)=no
+_LT_TAGVAR(hardcode_automatic, $1)=no
+_LT_TAGVAR(inherit_rpath, $1)=no
+_LT_TAGVAR(module_cmds, $1)=
+_LT_TAGVAR(module_expsym_cmds, $1)=
+_LT_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+_LT_TAGVAR(no_undefined_flag, $1)=
+_LT_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for fc test sources.
+ac_ext=${ac_fc_srcext-f}
+
+# Object file extension for compiled fc test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# No sense in running all these tests if we already determined that
+# the FC compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_disable_FC" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="\
+      subroutine t
+      return
+      end
+"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code="\
+      program t
+      end
+"
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+  _LT_TAG_COMPILER
+
+  # save warnings/boilerplate of simple test code
+  _LT_COMPILER_BOILERPLATE
+  _LT_LINKER_BOILERPLATE
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC="$CC"
+  lt_save_GCC=$GCC
+  lt_save_CFLAGS=$CFLAGS
+  CC=${FC-"f95"}
+  CFLAGS=$FCFLAGS
+  compiler=$CC
+  GCC=$ac_cv_fc_compiler_gnu
+
+  _LT_TAGVAR(compiler, $1)=$CC
+  _LT_CC_BASENAME([$compiler])
+
+  if test -n "$compiler"; then
+    AC_MSG_CHECKING([if libtool supports shared libraries])
+    AC_MSG_RESULT([$can_build_shared])
+
+    AC_MSG_CHECKING([whether to build shared libraries])
+    test "$can_build_shared" = "no" && enable_shared=no
+
+    # On AIX, shared libraries and static libraries use the same namespace, and
+    # are all built from PIC.
+    case $host_os in
+      aix3*)
+        test "$enable_shared" = yes && enable_static=no
+        if test -n "$RANLIB"; then
+          archive_cmds="$archive_cmds~\$RANLIB \$lib"
+          postinstall_cmds='$RANLIB $lib'
+        fi
+        ;;
+      aix[[4-9]]*)
+       if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+         test "$enable_shared" = yes && enable_static=no
+       fi
+        ;;
+    esac
+    AC_MSG_RESULT([$enable_shared])
+
+    AC_MSG_CHECKING([whether to build static libraries])
+    # Make sure either enable_shared or enable_static is yes.
+    test "$enable_shared" = yes || enable_static=yes
+    AC_MSG_RESULT([$enable_static])
+
+    _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu"
+    _LT_TAGVAR(LD, $1)="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    _LT_SYS_HIDDEN_LIBDEPS($1)
+    _LT_COMPILER_PIC($1)
+    _LT_COMPILER_C_O($1)
+    _LT_COMPILER_FILE_LOCKS($1)
+    _LT_LINKER_SHLIBS($1)
+    _LT_SYS_DYNAMIC_LINKER($1)
+    _LT_LINKER_HARDCODE_LIBPATH($1)
+
+    _LT_CONFIG($1)
+  fi # test -n "$compiler"
+
+  GCC=$lt_save_GCC
+  CC=$lt_save_CC
+  CFLAGS=$lt_save_CFLAGS
+fi # test "$_lt_disable_FC" != yes
+
+AC_LANG_POP
+])# _LT_LANG_FC_CONFIG
+
+
+# _LT_LANG_GCJ_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Java Compiler compiler
+# are suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GCJ_CONFIG],
+[AC_REQUIRE([LT_PROG_GCJ])dnl
+AC_LANG_SAVE
+
+# Source file extension for Java test sources.
+ac_ext=java
+
+# Object file extension for compiled Java test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="class foo {}"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GCJ-"gcj"}
+CFLAGS=$GCJFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# GCJ did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+if test -n "$compiler"; then
+  _LT_COMPILER_NO_RTTI($1)
+  _LT_COMPILER_PIC($1)
+  _LT_COMPILER_C_O($1)
+  _LT_COMPILER_FILE_LOCKS($1)
+  _LT_LINKER_SHLIBS($1)
+  _LT_LINKER_HARDCODE_LIBPATH($1)
+
+  _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GCJ_CONFIG
+
+
+# _LT_LANG_GO_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Go compiler
+# are suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GO_CONFIG],
+[AC_REQUIRE([LT_PROG_GO])dnl
+AC_LANG_SAVE
+
+# Source file extension for Go test sources.
+ac_ext=go
+
+# Object file extension for compiled Go test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="package main; func main() { }"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='package main; func main() { }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GOC-"gccgo"}
+CFLAGS=$GOFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# Go did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+if test -n "$compiler"; then
+  _LT_COMPILER_NO_RTTI($1)
+  _LT_COMPILER_PIC($1)
+  _LT_COMPILER_C_O($1)
+  _LT_COMPILER_FILE_LOCKS($1)
+  _LT_LINKER_SHLIBS($1)
+  _LT_LINKER_HARDCODE_LIBPATH($1)
+
+  _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GO_CONFIG
+
+
+# _LT_LANG_RC_CONFIG([TAG])
+# -------------------------
+# Ensure that the configuration variables for the Windows resource compiler
+# are suitably defined.  These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_RC_CONFIG],
+[AC_REQUIRE([LT_PROG_RC])dnl
+AC_LANG_SAVE
+
+# Source file extension for RC test sources.
+ac_ext=rc
+
+# Object file extension for compiled RC test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }'
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="$lt_simple_compile_test_code"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=
+CC=${RC-"windres"}
+CFLAGS=
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+
+if test -n "$compiler"; then
+  :
+  _LT_CONFIG($1)
+fi
+
+GCC=$lt_save_GCC
+AC_LANG_RESTORE
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_RC_CONFIG
+
+
+# LT_PROG_GCJ
+# -----------
+AC_DEFUN([LT_PROG_GCJ],
+[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ],
+  [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ],
+    [AC_CHECK_TOOL(GCJ, gcj,)
+      test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2"
+      AC_SUBST(GCJFLAGS)])])[]dnl
+])
+
+# Old name:
+AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
+
+
+# LT_PROG_GO
+# ----------
+AC_DEFUN([LT_PROG_GO],
+[AC_CHECK_TOOL(GOC, gccgo,)
+])
+
+
+# LT_PROG_RC
+# ----------
+AC_DEFUN([LT_PROG_RC],
+[AC_CHECK_TOOL(RC, windres,)
+])
+
+# Old name:
+AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([LT_AC_PROG_RC], [])
+
+
+# _LT_DECL_EGREP
+# --------------
+# If we don't have a new enough Autoconf to choose the best grep
+# available, choose the one first in the user's PATH.
+m4_defun([_LT_DECL_EGREP],
+[AC_REQUIRE([AC_PROG_EGREP])dnl
+AC_REQUIRE([AC_PROG_FGREP])dnl
+test -z "$GREP" && GREP=grep
+_LT_DECL([], [GREP], [1], [A grep program that handles long lines])
+_LT_DECL([], [EGREP], [1], [An ERE matcher])
+_LT_DECL([], [FGREP], [1], [A literal string matcher])
+dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too
+AC_SUBST([GREP])
+])
+
+
+# _LT_DECL_OBJDUMP
+# --------------
+# If we don't have a new enough Autoconf to choose the best objdump
+# available, choose the one first in the user's PATH.
+m4_defun([_LT_DECL_OBJDUMP],
+[AC_CHECK_TOOL(OBJDUMP, objdump, false)
+test -z "$OBJDUMP" && OBJDUMP=objdump
+_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper])
+AC_SUBST([OBJDUMP])
+])
+
+# _LT_DECL_DLLTOOL
+# ----------------
+# Ensure DLLTOOL variable is set.
+m4_defun([_LT_DECL_DLLTOOL],
+[AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+_LT_DECL([], [DLLTOOL], [1], [DLL creation program])
+AC_SUBST([DLLTOOL])
+])
+
+# _LT_DECL_SED
+# ------------
+# Check for a fully-functional sed program, that truncates
+# as few characters as possible.  Prefer GNU sed if found.
+m4_defun([_LT_DECL_SED],
+[AC_PROG_SED
+test -z "$SED" && SED=sed
+Xsed="$SED -e 1s/^X//"
+_LT_DECL([], [SED], [1], [A sed program that does not truncate output])
+_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"],
+    [Sed that helps us avoid accidentally triggering echo(1) options like -n])
+])# _LT_DECL_SED
+
+m4_ifndef([AC_PROG_SED], [
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_SED.  When it is available in   #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+
+m4_defun([AC_PROG_SED],
+[AC_MSG_CHECKING([for a sed that does not truncate output])
+AC_CACHE_VAL(lt_cv_path_SED,
+[# Loop through the user's path and test for sed and gsed.
+# Then use that list of sed's as ones to test for truncation.
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for lt_ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
+        lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
+      fi
+    done
+  done
+done
+IFS=$as_save_IFS
+lt_ac_max=0
+lt_ac_count=0
+# Add /usr/xpg4/bin/sed as it is typically found on Solaris
+# along with /bin/sed that truncates output.
+for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do
+  test ! -f $lt_ac_sed && continue
+  cat /dev/null > conftest.in
+  lt_ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >conftest.in
+  # Check for GNU sed and select it if it is found.
+  if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then
+    lt_cv_path_SED=$lt_ac_sed
+    break
+  fi
+  while true; do
+    cat conftest.in conftest.in >conftest.tmp
+    mv conftest.tmp conftest.in
+    cp conftest.in conftest.nl
+    echo >>conftest.nl
+    $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break
+    cmp -s conftest.out conftest.nl || break
+    # 10000 chars as input seems more than enough
+    test $lt_ac_count -gt 10 && break
+    lt_ac_count=`expr $lt_ac_count + 1`
+    if test $lt_ac_count -gt $lt_ac_max; then
+      lt_ac_max=$lt_ac_count
+      lt_cv_path_SED=$lt_ac_sed
+    fi
+  done
+done
+])
+SED=$lt_cv_path_SED
+AC_SUBST([SED])
+AC_MSG_RESULT([$SED])
+])#AC_PROG_SED
+])#m4_ifndef
+
+# Old name:
+AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED])
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([LT_AC_PROG_SED], [])
+
+
+# _LT_CHECK_SHELL_FEATURES
+# ------------------------
+# Find out whether the shell is Bourne or XSI compatible,
+# or has some other useful features.
+m4_defun([_LT_CHECK_SHELL_FEATURES],
+[AC_MSG_CHECKING([whether the shell understands some XSI constructs])
+# Try some XSI features
+xsi_shell=no
+( _lt_dummy="a/b/c"
+  test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
+      = c,a/b,b/c, \
+    && eval 'test $(( 1 + 1 )) -eq 2 \
+    && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
+  && xsi_shell=yes
+AC_MSG_RESULT([$xsi_shell])
+_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell'])
+
+AC_MSG_CHECKING([whether the shell understands "+="])
+lt_shell_append=no
+( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \
+    >/dev/null 2>&1 \
+  && lt_shell_append=yes
+AC_MSG_RESULT([$lt_shell_append])
+_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append'])
+
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  lt_unset=unset
+else
+  lt_unset=false
+fi
+_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl
+
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  lt_SP2NL='tr \040 \012'
+  lt_NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  lt_SP2NL='tr \100 \n'
+  lt_NL2SP='tr \r\n \100\100'
+  ;;
+esac
+_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl
+_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl
+])# _LT_CHECK_SHELL_FEATURES
+
+
+# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY)
+# ------------------------------------------------------
+# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and
+# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY.
+m4_defun([_LT_PROG_FUNCTION_REPLACE],
+[dnl {
+sed -e '/^$1 ()$/,/^} # $1 /c\
+$1 ()\
+{\
+m4_bpatsubsts([$2], [$], [\\], [^\([    ]\)], [\\\1])
+} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+])
+
+
+# _LT_PROG_REPLACE_SHELLFNS
+# -------------------------
+# Replace existing portable implementations of several shell functions with
+# equivalent extended shell implementations where those features are available..
+m4_defun([_LT_PROG_REPLACE_SHELLFNS],
+[if test x"$xsi_shell" = xyes; then
+  _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl
+    case ${1} in
+      */*) func_dirname_result="${1%/*}${2}" ;;
+      *  ) func_dirname_result="${3}" ;;
+    esac])
+
+  _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl
+    func_basename_result="${1##*/}"])
+
+  _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl
+    case ${1} in
+      */*) func_dirname_result="${1%/*}${2}" ;;
+      *  ) func_dirname_result="${3}" ;;
+    esac
+    func_basename_result="${1##*/}"])
+
+  _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl
+    # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are
+    # positional parameters, so assign one to ordinary parameter first.
+    func_stripname_result=${3}
+    func_stripname_result=${func_stripname_result#"${1}"}
+    func_stripname_result=${func_stripname_result%"${2}"}])
+
+  _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl
+    func_split_long_opt_name=${1%%=*}
+    func_split_long_opt_arg=${1#*=}])
+
+  _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl
+    func_split_short_opt_arg=${1#??}
+    func_split_short_opt_name=${1%"$func_split_short_opt_arg"}])
+
+  _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl
+    case ${1} in
+      *.lo) func_lo2o_result=${1%.lo}.${objext} ;;
+      *)    func_lo2o_result=${1} ;;
+    esac])
+
+  _LT_PROG_FUNCTION_REPLACE([func_xform], [    func_xform_result=${1%.*}.lo])
+
+  _LT_PROG_FUNCTION_REPLACE([func_arith], [    func_arith_result=$(( $[*] ))])
+
+  _LT_PROG_FUNCTION_REPLACE([func_len], [    func_len_result=${#1}])
+fi
+
+if test x"$lt_shell_append" = xyes; then
+  _LT_PROG_FUNCTION_REPLACE([func_append], [    eval "${1}+=\\${2}"])
+
+  _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl
+    func_quote_for_eval "${2}"
+dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \
+    eval "${1}+=\\\\ \\$func_quote_for_eval_result"])
+
+  # Save a `func_append' function call where possible by direct use of '+='
+  sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+else
+  # Save a `func_append' function call even when '+=' is not available
+  sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+fi
+
+if test x"$_lt_function_replace_fail" = x":"; then
+  AC_MSG_WARN([Unable to substitute extended shell functions in $ofile])
+fi
+])
+
+# _LT_PATH_CONVERSION_FUNCTIONS
+# -----------------------------
+# Determine which file name conversion functions should be used by
+# func_to_host_file (and, implicitly, by func_to_host_path).  These are needed
+# for certain cross-compile configurations and native mingw.
+m4_defun([_LT_PATH_CONVERSION_FUNCTIONS],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_MSG_CHECKING([how to convert $build file names to $host format])
+AC_CACHE_VAL(lt_cv_to_host_file_cmd,
+[case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
+        ;;
+    esac
+    ;;
+  *-*-cygwin* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_noop
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
+        ;;
+    esac
+    ;;
+  * ) # unhandled hosts (and "normal" native builds)
+    lt_cv_to_host_file_cmd=func_convert_file_noop
+    ;;
+esac
+])
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+AC_MSG_RESULT([$lt_cv_to_host_file_cmd])
+_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd],
+         [0], [convert $build file names to $host format])dnl
+
+AC_MSG_CHECKING([how to convert $build file names to toolchain format])
+AC_CACHE_VAL(lt_cv_to_tool_file_cmd,
+[#assume ordinary cross tools, or native build.
+lt_cv_to_tool_file_cmd=func_convert_file_noop
+case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
+        ;;
+    esac
+    ;;
+esac
+])
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+AC_MSG_RESULT([$lt_cv_to_tool_file_cmd])
+_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd],
+         [0], [convert $build files to toolchain format])dnl
+])# _LT_PATH_CONVERSION_FUNCTIONS
+
+# Helper functions for option handling.                    -*- Autoconf -*-
+#
+#   Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
+#   Inc.
+#   Written by Gary V. Vaughan, 2004
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# serial 7 ltoptions.m4
+
+# This is to help aclocal find these macros, as it can't see m4_define.
+AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
+
+
+# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
+# ------------------------------------------
+m4_define([_LT_MANGLE_OPTION],
+[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
+
+
+# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
+# ---------------------------------------
+# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
+# matching handler defined, dispatch to it.  Other OPTION-NAMEs are
+# saved as a flag.
+m4_define([_LT_SET_OPTION],
+[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
+m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
+        _LT_MANGLE_DEFUN([$1], [$2]),
+    [m4_warning([Unknown $1 option `$2'])])[]dnl
+])
+
+
+# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
+# ------------------------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+m4_define([_LT_IF_OPTION],
+[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
+
+
+# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
+# -------------------------------------------------------
+# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
+# are set.
+m4_define([_LT_UNLESS_OPTIONS],
+[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
+           [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
+                     [m4_define([$0_found])])])[]dnl
+m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
+])[]dnl
+])
+
+
+# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
+# ----------------------------------------
+# OPTION-LIST is a space-separated list of Libtool options associated
+# with MACRO-NAME.  If any OPTION has a matching handler declared with
+# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
+# the unknown option and exit.
+m4_defun([_LT_SET_OPTIONS],
+[# Set options
+m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
+    [_LT_SET_OPTION([$1], _LT_Option)])
+
+m4_if([$1],[LT_INIT],[
+  dnl
+  dnl Simply set some default values (i.e off) if boolean options were not
+  dnl specified:
+  _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
+  ])
+  _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
+  ])
+  dnl
+  dnl If no reference was made to various pairs of opposing options, then
+  dnl we run the default mode handler for the pair.  For example, if neither
+  dnl `shared' nor `disable-shared' was passed, we enable building of shared
+  dnl archives by default:
+  _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
+  _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
+  _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
+  _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
+                  [_LT_ENABLE_FAST_INSTALL])
+  ])
+])# _LT_SET_OPTIONS
+
+
+
+# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
+# -----------------------------------------
+m4_define([_LT_MANGLE_DEFUN],
+[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
+
+
+# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
+# -----------------------------------------------
+m4_define([LT_OPTION_DEFINE],
+[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
+])# LT_OPTION_DEFINE
+
+
+# dlopen
+# ------
+LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
+])
+
+AU_DEFUN([AC_LIBTOOL_DLOPEN],
+[_LT_SET_OPTION([LT_INIT], [dlopen])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you
+put the `dlopen' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
+
+
+# win32-dll
+# ---------
+# Declare package support for building win32 dll's.
+LT_OPTION_DEFINE([LT_INIT], [win32-dll],
+[enable_win32_dll=yes
+
+case $host in
+*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
+  AC_CHECK_TOOL(AS, as, false)
+  AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+  AC_CHECK_TOOL(OBJDUMP, objdump, false)
+  ;;
+esac
+
+test -z "$AS" && AS=as
+_LT_DECL([], [AS],      [1], [Assembler program])dnl
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
+])# win32-dll
+
+AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+_LT_SET_OPTION([LT_INIT], [win32-dll])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you
+put the `win32-dll' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
+
+
+# _LT_ENABLE_SHARED([DEFAULT])
+# ----------------------------
+# implement the --enable-shared flag, and supports the `shared' and
+# `disable-shared' LT_INIT options.
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+m4_define([_LT_ENABLE_SHARED],
+[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
+AC_ARG_ENABLE([shared],
+    [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
+       [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_shared=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
+
+    _LT_DECL([build_libtool_libs], [enable_shared], [0],
+       [Whether or not to build shared libraries])
+])# _LT_ENABLE_SHARED
+
+LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
+LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
+
+# Old names:
+AC_DEFUN([AC_ENABLE_SHARED],
+[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
+])
+
+AC_DEFUN([AC_DISABLE_SHARED],
+[_LT_SET_OPTION([LT_INIT], [disable-shared])
+])
+
+AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
+AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_ENABLE_SHARED], [])
+dnl AC_DEFUN([AM_DISABLE_SHARED], [])
+
+
+
+# _LT_ENABLE_STATIC([DEFAULT])
+# ----------------------------
+# implement the --enable-static flag, and support the `static' and
+# `disable-static' LT_INIT options.
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+m4_define([_LT_ENABLE_STATIC],
+[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
+AC_ARG_ENABLE([static],
+    [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
+       [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_static=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_static=]_LT_ENABLE_STATIC_DEFAULT)
+
+    _LT_DECL([build_old_libs], [enable_static], [0],
+       [Whether or not to build static libraries])
+])# _LT_ENABLE_STATIC
+
+LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
+LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
+
+# Old names:
+AC_DEFUN([AC_ENABLE_STATIC],
+[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
+])
+
+AC_DEFUN([AC_DISABLE_STATIC],
+[_LT_SET_OPTION([LT_INIT], [disable-static])
+])
+
+AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
+AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AM_ENABLE_STATIC], [])
+dnl AC_DEFUN([AM_DISABLE_STATIC], [])
+
+
+
+# _LT_ENABLE_FAST_INSTALL([DEFAULT])
+# ----------------------------------
+# implement the --enable-fast-install flag, and support the `fast-install'
+# and `disable-fast-install' LT_INIT options.
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+m4_define([_LT_ENABLE_FAST_INSTALL],
+[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
+AC_ARG_ENABLE([fast-install],
+    [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
+    [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_fast_install=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
+
+_LT_DECL([fast_install], [enable_fast_install], [0],
+        [Whether or not to optimize for fast installation])dnl
+])# _LT_ENABLE_FAST_INSTALL
+
+LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
+LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
+
+# Old names:
+AU_DEFUN([AC_ENABLE_FAST_INSTALL],
+[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you put
+the `fast-install' option into LT_INIT's first parameter.])
+])
+
+AU_DEFUN([AC_DISABLE_FAST_INSTALL],
+[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you put
+the `disable-fast-install' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
+dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
+
+
+# _LT_WITH_PIC([MODE])
+# --------------------
+# implement the --with-pic flag, and support the `pic-only' and `no-pic'
+# LT_INIT options.
+# MODE is either `yes' or `no'.  If omitted, it defaults to `both'.
+m4_define([_LT_WITH_PIC],
+[AC_ARG_WITH([pic],
+    [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
+       [try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
+    [lt_p=${PACKAGE-default}
+    case $withval in
+    yes|no) pic_mode=$withval ;;
+    *)
+      pic_mode=default
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for lt_pkg in $withval; do
+       IFS="$lt_save_ifs"
+       if test "X$lt_pkg" = "X$lt_p"; then
+         pic_mode=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [pic_mode=default])
+
+test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
+
+_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
+])# _LT_WITH_PIC
+
+LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
+LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
+
+# Old name:
+AU_DEFUN([AC_LIBTOOL_PICMODE],
+[_LT_SET_OPTION([LT_INIT], [pic-only])
+AC_DIAGNOSE([obsolete],
+[$0: Remove this warning and the call to _LT_SET_OPTION when you
+put the `pic-only' option into LT_INIT's first parameter.])
+])
+
+dnl aclocal-1.4 backwards compatibility:
+dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
+
+
+m4_define([_LTDL_MODE], [])
+LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
+                [m4_define([_LTDL_MODE], [nonrecursive])])
+LT_OPTION_DEFINE([LTDL_INIT], [recursive],
+                [m4_define([_LTDL_MODE], [recursive])])
+LT_OPTION_DEFINE([LTDL_INIT], [subproject],
+                [m4_define([_LTDL_MODE], [subproject])])
+
+m4_define([_LTDL_TYPE], [])
+LT_OPTION_DEFINE([LTDL_INIT], [installable],
+                [m4_define([_LTDL_TYPE], [installable])])
+LT_OPTION_DEFINE([LTDL_INIT], [convenience],
+                [m4_define([_LTDL_TYPE], [convenience])])
+
+# ltsugar.m4 -- libtool m4 base layer.                         -*-Autoconf-*-
+#
+# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+# Written by Gary V. Vaughan, 2004
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# serial 6 ltsugar.m4
+
+# This is to help aclocal find these macros, as it can't see m4_define.
+AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
+
+
+# lt_join(SEP, ARG1, [ARG2...])
+# -----------------------------
+# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
+# associated separator.
+# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
+# versions in m4sugar had bugs.
+m4_define([lt_join],
+[m4_if([$#], [1], [],
+       [$#], [2], [[$2]],
+       [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
+m4_define([_lt_join],
+[m4_if([$#$2], [2], [],
+       [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
+
+
+# lt_car(LIST)
+# lt_cdr(LIST)
+# ------------
+# Manipulate m4 lists.
+# These macros are necessary as long as will still need to support
+# Autoconf-2.59 which quotes differently.
+m4_define([lt_car], [[$1]])
+m4_define([lt_cdr],
+[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
+       [$#], 1, [],
+       [m4_dquote(m4_shift($@))])])
+m4_define([lt_unquote], $1)
+
+
+# lt_append(MACRO-NAME, STRING, [SEPARATOR])
+# ------------------------------------------
+# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
+# Note that neither SEPARATOR nor STRING are expanded; they are appended
+# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
+# No SEPARATOR is output if MACRO-NAME was previously undefined (different
+# than defined and empty).
+#
+# This macro is needed until we can rely on Autoconf 2.62, since earlier
+# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
+m4_define([lt_append],
+[m4_define([$1],
+          m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
+
+
+
+# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
+# ----------------------------------------------------------
+# Produce a SEP delimited list of all paired combinations of elements of
+# PREFIX-LIST with SUFFIX1 through SUFFIXn.  Each element of the list
+# has the form PREFIXmINFIXSUFFIXn.
+# Needed until we can rely on m4_combine added in Autoconf 2.62.
+m4_define([lt_combine],
+[m4_if(m4_eval([$# > 3]), [1],
+       [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
+[[m4_foreach([_Lt_prefix], [$2],
+            [m4_foreach([_Lt_suffix],
+               ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
+       [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
+
+
+# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
+# -----------------------------------------------------------------------
+# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
+# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
+m4_define([lt_if_append_uniq],
+[m4_ifdef([$1],
+         [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
+                [lt_append([$1], [$2], [$3])$4],
+                [$5])],
+         [lt_append([$1], [$2], [$3])$4])])
+
+
+# lt_dict_add(DICT, KEY, VALUE)
+# -----------------------------
+m4_define([lt_dict_add],
+[m4_define([$1($2)], [$3])])
+
+
+# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
+# --------------------------------------------
+m4_define([lt_dict_add_subkey],
+[m4_define([$1($2:$3)], [$4])])
+
+
+# lt_dict_fetch(DICT, KEY, [SUBKEY])
+# ----------------------------------
+m4_define([lt_dict_fetch],
+[m4_ifval([$3],
+       m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
+    m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
+
+
+# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
+# -----------------------------------------------------------------
+m4_define([lt_if_dict_fetch],
+[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
+       [$5],
+    [$6])])
+
+
+# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
+# --------------------------------------------------------------
+m4_define([lt_dict_filter],
+[m4_if([$5], [], [],
+  [lt_join(m4_quote(m4_default([$4], [[, ]])),
+           lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
+                     [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
+])
+
+# ltversion.m4 -- version numbers                      -*- Autoconf -*-
+#
+#   Copyright (C) 2004 Free Software Foundation, Inc.
+#   Written by Scott James Remnant, 2004
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# @configure_input@
+
+# serial 3337 ltversion.m4
+# This file is part of GNU Libtool
+
+m4_define([LT_PACKAGE_VERSION], [2.4.2])
+m4_define([LT_PACKAGE_REVISION], [1.3337])
+
+AC_DEFUN([LTVERSION_VERSION],
+[macro_version='2.4.2'
+macro_revision='1.3337'
+_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
+_LT_DECL(, macro_revision, 0)
+])
+
+# lt~obsolete.m4 -- aclocal satisfying obsolete definitions.    -*-Autoconf-*-
+#
+#   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+#   Written by Scott James Remnant, 2004.
+#
+# This file is free software; the Free Software Foundation gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+
+# serial 5 lt~obsolete.m4
+
+# These exist entirely to fool aclocal when bootstrapping libtool.
+#
+# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
+# which have later been changed to m4_define as they aren't part of the
+# exported API, or moved to Autoconf or Automake where they belong.
+#
+# The trouble is, aclocal is a bit thick.  It'll see the old AC_DEFUN
+# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
+# using a macro with the same name in our local m4/libtool.m4 it'll
+# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
+# and doesn't know about Autoconf macros at all.)
+#
+# So we provide this file, which has a silly filename so it's always
+# included after everything else.  This provides aclocal with the
+# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
+# because those macros already exist, or will be overwritten later.
+# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 
+#
+# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
+# Yes, that means every name once taken will need to remain here until
+# we give up compatibility with versions before 1.7, at which point
+# we need to keep only those names which we still refer to.
+
+# This is to help aclocal find these macros, as it can't see m4_define.
+AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
+
+m4_ifndef([AC_LIBTOOL_LINKER_OPTION],  [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
+m4_ifndef([AC_PROG_EGREP],             [AC_DEFUN([AC_PROG_EGREP])])
+m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH],        [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
+m4_ifndef([_LT_AC_SHELL_INIT],         [AC_DEFUN([_LT_AC_SHELL_INIT])])
+m4_ifndef([_LT_AC_SYS_LIBPATH_AIX],    [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
+m4_ifndef([_LT_PROG_LTMAIN],           [AC_DEFUN([_LT_PROG_LTMAIN])])
+m4_ifndef([_LT_AC_TAGVAR],             [AC_DEFUN([_LT_AC_TAGVAR])])
+m4_ifndef([AC_LTDL_ENABLE_INSTALL],    [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
+m4_ifndef([AC_LTDL_PREOPEN],           [AC_DEFUN([AC_LTDL_PREOPEN])])
+m4_ifndef([_LT_AC_SYS_COMPILER],       [AC_DEFUN([_LT_AC_SYS_COMPILER])])
+m4_ifndef([_LT_AC_LOCK],               [AC_DEFUN([_LT_AC_LOCK])])
+m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE],        [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
+m4_ifndef([_LT_AC_TRY_DLOPEN_SELF],    [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
+m4_ifndef([AC_LIBTOOL_PROG_CC_C_O],    [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
+m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
+m4_ifndef([AC_LIBTOOL_OBJDIR],         [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
+m4_ifndef([AC_LTDL_OBJDIR],            [AC_DEFUN([AC_LTDL_OBJDIR])])
+m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
+m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP],  [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
+m4_ifndef([AC_PATH_MAGIC],             [AC_DEFUN([AC_PATH_MAGIC])])
+m4_ifndef([AC_PROG_LD_GNU],            [AC_DEFUN([AC_PROG_LD_GNU])])
+m4_ifndef([AC_PROG_LD_RELOAD_FLAG],    [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
+m4_ifndef([AC_DEPLIBS_CHECK_METHOD],   [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
+m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
+m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
+m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
+m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
+m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
+m4_ifndef([LT_AC_PROG_EGREP],          [AC_DEFUN([LT_AC_PROG_EGREP])])
+m4_ifndef([LT_AC_PROG_SED],            [AC_DEFUN([LT_AC_PROG_SED])])
+m4_ifndef([_LT_CC_BASENAME],           [AC_DEFUN([_LT_CC_BASENAME])])
+m4_ifndef([_LT_COMPILER_BOILERPLATE],  [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
+m4_ifndef([_LT_LINKER_BOILERPLATE],    [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
+m4_ifndef([_AC_PROG_LIBTOOL],          [AC_DEFUN([_AC_PROG_LIBTOOL])])
+m4_ifndef([AC_LIBTOOL_SETUP],          [AC_DEFUN([AC_LIBTOOL_SETUP])])
+m4_ifndef([_LT_AC_CHECK_DLFCN],                [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
+m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER],     [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
+m4_ifndef([_LT_AC_TAGCONFIG],          [AC_DEFUN([_LT_AC_TAGCONFIG])])
+m4_ifndef([AC_DISABLE_FAST_INSTALL],   [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
+m4_ifndef([_LT_AC_LANG_CXX],           [AC_DEFUN([_LT_AC_LANG_CXX])])
+m4_ifndef([_LT_AC_LANG_F77],           [AC_DEFUN([_LT_AC_LANG_F77])])
+m4_ifndef([_LT_AC_LANG_GCJ],           [AC_DEFUN([_LT_AC_LANG_GCJ])])
+m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG],  [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
+m4_ifndef([_LT_AC_LANG_C_CONFIG],      [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG],        [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
+m4_ifndef([_LT_AC_LANG_CXX_CONFIG],    [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG],        [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
+m4_ifndef([_LT_AC_LANG_F77_CONFIG],    [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG],        [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
+m4_ifndef([_LT_AC_LANG_GCJ_CONFIG],    [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
+m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
+m4_ifndef([_LT_AC_LANG_RC_CONFIG],     [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
+m4_ifndef([AC_LIBTOOL_CONFIG],         [AC_DEFUN([AC_LIBTOOL_CONFIG])])
+m4_ifndef([_LT_AC_FILE_LTDLL_C],       [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
+m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS],        [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
+m4_ifndef([_LT_AC_PROG_CXXCPP],                [AC_DEFUN([_LT_AC_PROG_CXXCPP])])
+m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS],        [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
+m4_ifndef([_LT_PROG_ECHO_BACKSLASH],   [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
+m4_ifndef([_LT_PROG_F77],              [AC_DEFUN([_LT_PROG_F77])])
+m4_ifndef([_LT_PROG_FC],               [AC_DEFUN([_LT_PROG_FC])])
+m4_ifndef([_LT_PROG_CXX],              [AC_DEFUN([_LT_PROG_CXX])])
+
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_AUTOMAKE_VERSION(VERSION)
+# ----------------------------
+# Automake X.Y traces this macro to ensure aclocal.m4 has been
+# generated from the m4 files accompanying Automake X.Y.
+# (This private macro should not be called outside this file.)
+AC_DEFUN([AM_AUTOMAKE_VERSION],
+[am__api_version='1.11'
+dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
+dnl require some minimum version.  Point them to the right macro.
+m4_if([$1], [1.11.1], [],
+      [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
+])
+
+# _AM_AUTOCONF_VERSION(VERSION)
+# -----------------------------
+# aclocal traces this macro to find the Autoconf version.
+# This is a private macro too.  Using m4_define simplifies
+# the logic in aclocal, which can simply ignore this definition.
+m4_define([_AM_AUTOCONF_VERSION], [])
+
+# AM_SET_CURRENT_AUTOMAKE_VERSION
+# -------------------------------
+# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
+# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
+AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
+[AM_AUTOMAKE_VERSION([1.11.1])dnl
+m4_ifndef([AC_AUTOCONF_VERSION],
+  [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
+_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
+
+# AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
+
+# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
+# $ac_aux_dir to `$srcdir/foo'.  In other projects, it is set to
+# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
+#
+# Of course, Automake must honor this variable whenever it calls a
+# tool from the auxiliary directory.  The problem is that $srcdir (and
+# therefore $ac_aux_dir as well) can be either absolute or relative,
+# depending on how configure is run.  This is pretty annoying, since
+# it makes $ac_aux_dir quite unusable in subdirectories: in the top
+# source directory, any form will work fine, but in subdirectories a
+# relative path needs to be adjusted first.
+#
+# $ac_aux_dir/missing
+#    fails when called from a subdirectory if $ac_aux_dir is relative
+# $top_srcdir/$ac_aux_dir/missing
+#    fails if $ac_aux_dir is absolute,
+#    fails when called from a subdirectory in a VPATH build with
+#          a relative $ac_aux_dir
+#
+# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
+# are both prefixed by $srcdir.  In an in-source build this is usually
+# harmless because $srcdir is `.', but things will broke when you
+# start a VPATH build or use an absolute $srcdir.
+#
+# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
+# iff we strip the leading $srcdir from $ac_aux_dir.  That would be:
+#   am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
+# and then we would define $MISSING as
+#   MISSING="\${SHELL} $am_aux_dir/missing"
+# This will work as long as MISSING is not called from configure, because
+# unfortunately $(top_srcdir) has no meaning in configure.
+# However there are other variables, like CC, which are often used in
+# configure, and could therefore not use this "fixed" $ac_aux_dir.
+#
+# Another solution, used here, is to always expand $ac_aux_dir to an
+# absolute PATH.  The drawback is that using absolute paths prevent a
+# configured tree to be moved without reconfiguration.
+
+AC_DEFUN([AM_AUX_DIR_EXPAND],
+[dnl Rely on autoconf to set up CDPATH properly.
+AC_PREREQ([2.50])dnl
+# expand $ac_aux_dir to an absolute path
+am_aux_dir=`cd $ac_aux_dir && pwd`
+])
+
+# AM_CONDITIONAL                                            -*- Autoconf -*-
+
+# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 9
+
+# AM_CONDITIONAL(NAME, SHELL-CONDITION)
+# -------------------------------------
+# Define a conditional.
+AC_DEFUN([AM_CONDITIONAL],
+[AC_PREREQ(2.52)dnl
+ ifelse([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
+       [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
+AC_SUBST([$1_TRUE])dnl
+AC_SUBST([$1_FALSE])dnl
+_AM_SUBST_NOTMAKE([$1_TRUE])dnl
+_AM_SUBST_NOTMAKE([$1_FALSE])dnl
+m4_define([_AM_COND_VALUE_$1], [$2])dnl
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi
+AC_CONFIG_COMMANDS_PRE(
+[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
+fi])])
+
+# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 8
+
+# AM_CONFIG_HEADER is obsolete.  It has been replaced by AC_CONFIG_HEADERS.
+AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
+
+# Do all the work for Automake.                             -*- Autoconf -*-
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+# 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 16
+
+# This macro actually does too much.  Some checks are only needed if
+# your package does certain things.  But this isn't really a big deal.
+
+# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
+# AM_INIT_AUTOMAKE([OPTIONS])
+# -----------------------------------------------
+# The call with PACKAGE and VERSION arguments is the old style
+# call (pre autoconf-2.50), which is being phased out.  PACKAGE
+# and VERSION should now be passed to AC_INIT and removed from
+# the call to AM_INIT_AUTOMAKE.
+# We support both call styles for the transition.  After
+# the next Automake release, Autoconf can make the AC_INIT
+# arguments mandatory, and then we can depend on a new Autoconf
+# release and drop the old call support.
+AC_DEFUN([AM_INIT_AUTOMAKE],
+[AC_PREREQ([2.62])dnl
+dnl Autoconf wants to disallow AM_ names.  We explicitly allow
+dnl the ones we care about.
+m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
+AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
+AC_REQUIRE([AC_PROG_INSTALL])dnl
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+AC_SUBST([CYGPATH_W])
+
+# Define the identity of the package.
+dnl Distinguish between old-style and new-style calls.
+m4_ifval([$2],
+[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
+ AC_SUBST([PACKAGE], [$1])dnl
+ AC_SUBST([VERSION], [$2])],
+[_AM_SET_OPTIONS([$1])dnl
+dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
+m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
+  [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
+ AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
+ AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
+
+_AM_IF_OPTION([no-define],,
+[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
+ AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
+
+# Some tools Automake needs.
+AC_REQUIRE([AM_SANITY_CHECK])dnl
+AC_REQUIRE([AC_ARG_PROGRAM])dnl
+AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
+AM_MISSING_PROG(AUTOCONF, autoconf)
+AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
+AM_MISSING_PROG(AUTOHEADER, autoheader)
+AM_MISSING_PROG(MAKEINFO, makeinfo)
+AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
+AC_REQUIRE([AM_PROG_MKDIR_P])dnl
+# We need awk for the "check" target.  The system "awk" is bad on
+# some platforms.
+AC_REQUIRE([AC_PROG_AWK])dnl
+AC_REQUIRE([AC_PROG_MAKE_SET])dnl
+AC_REQUIRE([AM_SET_LEADING_DOT])dnl
+_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
+             [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
+                            [_AM_PROG_TAR([v7])])])
+_AM_IF_OPTION([no-dependencies],,
+[AC_PROVIDE_IFELSE([AC_PROG_CC],
+                 [_AM_DEPENDENCIES(CC)],
+                 [define([AC_PROG_CC],
+                         defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_CXX],
+                 [_AM_DEPENDENCIES(CXX)],
+                 [define([AC_PROG_CXX],
+                         defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
+AC_PROVIDE_IFELSE([AC_PROG_OBJC],
+                 [_AM_DEPENDENCIES(OBJC)],
+                 [define([AC_PROG_OBJC],
+                         defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
+])
+_AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl
+dnl The `parallel-tests' driver may need to know about EXEEXT, so add the
+dnl `am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen.  This macro
+dnl is hooked onto _AC_COMPILER_EXEEXT early, see below.
+AC_CONFIG_COMMANDS_PRE(dnl
+[m4_provide_if([_AM_COMPILER_EXEEXT],
+  [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
+])
+
+dnl Hook into `_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
+dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
+dnl mangled by Autoconf and run in a shell conditional statement.
+m4_define([_AC_COMPILER_EXEEXT],
+m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
+
+
+# When config.status generates a header, we must update the stamp-h file.
+# This file resides in the same directory as the config header
+# that is generated.  The stamp files are numbered to have different names.
+
+# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
+# loop where config.status creates the headers, so we can generate
+# our stamp files there.
+AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
+[# Compute $1's index in $config_headers.
+_am_arg=$1
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
+
+# Copyright (C) 2001, 2003, 2005, 2008  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_SH
+# ------------------
+# Define $install_sh.
+AC_DEFUN([AM_PROG_INSTALL_SH],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+if test x"${install_sh}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\    *)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+AC_SUBST(install_sh)])
+
+# Copyright (C) 2003, 2005  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 2
+
+# Check whether the underlying file-system supports filenames
+# with a leading dot.  For instance MS-DOS doesn't.
+AC_DEFUN([AM_SET_LEADING_DOT],
+[rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+AC_SUBST([am__leading_dot])])
+
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2005
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 5
+
+# AM_PROG_LEX
+# -----------
+# Autoconf leaves LEX=: if lex or flex can't be found.  Change that to a
+# "missing" invocation, for better error output.
+AC_DEFUN([AM_PROG_LEX],
+[AC_PREREQ(2.50)dnl
+AC_REQUIRE([AM_MISSING_HAS_RUN])dnl
+AC_REQUIRE([AC_PROG_LEX])dnl
+if test "$LEX" = :; then
+  LEX=${am_missing_run}flex
+fi])
+
+# Add --enable-maintainer-mode option to configure.         -*- Autoconf -*-
+# From Jim Meyering
+
+# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 5
+
+# AM_MAINTAINER_MODE([DEFAULT-MODE])
+# ----------------------------------
+# Control maintainer-specific portions of Makefiles.
+# Default is to disable them, unless `enable' is passed literally.
+# For symmetry, `disable' may be passed as well.  Anyway, the user
+# can override the default with the --enable/--disable switch.
+AC_DEFUN([AM_MAINTAINER_MODE],
+[m4_case(m4_default([$1], [disable]),
+       [enable], [m4_define([am_maintainer_other], [disable])],
+       [disable], [m4_define([am_maintainer_other], [enable])],
+       [m4_define([am_maintainer_other], [enable])
+        m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
+AC_MSG_CHECKING([whether to am_maintainer_other maintainer-specific portions of Makefiles])
+  dnl maintainer-mode's default is 'disable' unless 'enable' is passed
+  AC_ARG_ENABLE([maintainer-mode],
+[  --][am_maintainer_other][-maintainer-mode  am_maintainer_other make rules and dependencies not useful
+                         (and sometimes confusing) to the casual installer],
+      [USE_MAINTAINER_MODE=$enableval],
+      [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes]))
+  AC_MSG_RESULT([$USE_MAINTAINER_MODE])
+  AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes])
+  MAINT=$MAINTAINER_MODE_TRUE
+  AC_SUBST([MAINT])dnl
+]
+)
+
+AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
+
+# Fake the existence of programs that GNU maintainers use.  -*- Autoconf -*-
+
+# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005, 2008
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 6
+
+# AM_MISSING_PROG(NAME, PROGRAM)
+# ------------------------------
+AC_DEFUN([AM_MISSING_PROG],
+[AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+
+# AM_MISSING_HAS_RUN
+# ------------------
+# Define MISSING if not defined so far and test if it supports --run.
+# If it does, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN],
+[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
+AC_REQUIRE_AUX_FILE([missing])dnl
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\    *)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --run true"; then
+  am_missing_run="$MISSING --run "
+else
+  am_missing_run=
+  AC_MSG_WARN([`missing' script is too old or missing])
+fi
+])
+
+# Copyright (C) 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_MKDIR_P
+# ---------------
+# Check for `mkdir -p'.
+AC_DEFUN([AM_PROG_MKDIR_P],
+[AC_PREREQ([2.60])dnl
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+dnl Automake 1.8 to 1.9.6 used to define mkdir_p.  We now use MKDIR_P,
+dnl while keeping a definition of mkdir_p for backward compatibility.
+dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
+dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
+dnl Makefile.ins that do not define MKDIR_P, so we do our own
+dnl adjustment using top_builddir (which is defined more often than
+dnl MKDIR_P).
+AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
+case $mkdir_p in
+  [[\\/$]]* | ?:[[\\/]]*) ;;
+  */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+])
+
+# Helper functions for option handling.                     -*- Autoconf -*-
+
+# Copyright (C) 2001, 2002, 2003, 2005, 2008  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 4
+
+# _AM_MANGLE_OPTION(NAME)
+# -----------------------
+AC_DEFUN([_AM_MANGLE_OPTION],
+[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
+
+# _AM_SET_OPTION(NAME)
+# ------------------------------
+# Set option NAME.  Presently that only means defining a flag for this option.
+AC_DEFUN([_AM_SET_OPTION],
+[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
+
+# _AM_SET_OPTIONS(OPTIONS)
+# ----------------------------------
+# OPTIONS is a space-separated list of Automake options.
+AC_DEFUN([_AM_SET_OPTIONS],
+[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
+
+# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
+# -------------------------------------------
+# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
+AC_DEFUN([_AM_IF_OPTION],
+[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
+
+# Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005, 2006
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 5
+
+AC_DEFUN([AM_C_PROTOTYPES],
+[AC_REQUIRE([AC_C_PROTOTYPES])
+if test "$ac_cv_prog_cc_stdc" != no; then
+  U= ANSI2KNR=
+else
+  U=_ ANSI2KNR=./ansi2knr
+fi
+# Ensure some checks needed by ansi2knr itself.
+AC_REQUIRE([AC_HEADER_STDC])
+AC_CHECK_HEADERS([string.h])
+AC_SUBST([U])dnl
+AC_SUBST([ANSI2KNR])dnl
+_AM_SUBST_NOTMAKE([ANSI2KNR])dnl
+])
+
+AU_DEFUN([fp_C_PROTOTYPES], [AM_C_PROTOTYPES])
+
+# Check to make sure that the build environment is sane.    -*- Autoconf -*-
+
+# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 5
+
+# AM_SANITY_CHECK
+# ---------------
+AC_DEFUN([AM_SANITY_CHECK],
+[AC_MSG_CHECKING([whether build environment is sane])
+# Just in case
+sleep 1
+echo timestamp > conftest.file
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[[\\\"\#\$\&\'\`$am_lf]]*)
+    AC_MSG_ERROR([unsafe absolute working directory name]);;
+esac
+case $srcdir in
+  *[[\\\"\#\$\&\'\`$am_lf\ \   ]]*)
+    AC_MSG_ERROR([unsafe srcdir value: `$srcdir']);;
+esac
+
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+   if test "$[*]" = "X"; then
+      # -L didn't work.
+      set X `ls -t "$srcdir/configure" conftest.file`
+   fi
+   rm -f conftest.file
+   if test "$[*]" != "X $srcdir/configure conftest.file" \
+      && test "$[*]" != "X conftest.file $srcdir/configure"; then
+
+      # If neither matched, then we have a broken ls.  This can happen
+      # if, for instance, CONFIG_SHELL is bash and it inherits a
+      # broken ls alias from the environment.  This has actually
+      # happened.  Such a system could not be considered "sane".
+      AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
+alias in your environment])
+   fi
+
+   test "$[2]" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+AC_MSG_RESULT(yes)])
+
+# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_INSTALL_STRIP
+# ---------------------
+# One issue with vendor `install' (even GNU) is that you can't
+# specify the program used to strip binaries.  This is especially
+# annoying in cross-compiling environments, where the build's strip
+# is unlikely to handle the host's binaries.
+# Fortunately install-sh will honor a STRIPPROG variable, so we
+# always use install-sh in `make install-strip', and initialize
+# STRIPPROG with the value of the STRIP variable (set by the user).
+AC_DEFUN([AM_PROG_INSTALL_STRIP],
+[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
+# Installed binaries are usually stripped using `strip' when the user
+# run `make install-strip'.  However `strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the `STRIP' environment variable to overrule this program.
+dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
+if test "$cross_compiling" != no; then
+  AC_CHECK_TOOL([STRIP], [strip], :)
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+AC_SUBST([INSTALL_STRIP_PROGRAM])])
+
+# Copyright (C) 2006, 2008  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 2
+
+# _AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
+# This macro is traced by Automake.
+AC_DEFUN([_AM_SUBST_NOTMAKE])
+
+# AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Public sister of _AM_SUBST_NOTMAKE.
+AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
+
+# Check how to create a tarball.                            -*- Autoconf -*-
+
+# Copyright (C) 2004, 2005  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 2
+
+# _AM_PROG_TAR(FORMAT)
+# --------------------
+# Check how to create a tarball in format FORMAT.
+# FORMAT should be one of `v7', `ustar', or `pax'.
+#
+# Substitute a variable $(am__tar) that is a command
+# writing to stdout a FORMAT-tarball containing the directory
+# $tardir.
+#     tardir=directory && $(am__tar) > result.tar
+#
+# Substitute a variable $(am__untar) that extract such
+# a tarball read from stdin.
+#     $(am__untar) < result.tar
+AC_DEFUN([_AM_PROG_TAR],
+[# Always define AMTAR for backward compatibility.
+AM_MISSING_PROG([AMTAR], [tar])
+m4_if([$1], [v7],
+     [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+     [m4_case([$1], [ustar],, [pax],,
+              [m4_fatal([Unknown tar format])])
+AC_MSG_CHECKING([how to create a $1 tar archive])
+# Loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
+_am_tools=${am_cv_prog_tar_$1-$_am_tools}
+# Do not fold the above two line into one, because Tru64 sh and
+# Solaris sh will not grok spaces in the rhs of `-'.
+for _am_tool in $_am_tools
+do
+  case $_am_tool in
+  gnutar)
+    for _am_tar in tar gnutar gtar;
+    do
+      AM_RUN_LOG([$_am_tar --version]) && break
+    done
+    am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+    am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+    am__untar="$_am_tar -xf -"
+    ;;
+  plaintar)
+    # Must skip GNU tar: if it does not support --format= it doesn't create
+    # ustar tarball either.
+    (tar --version) >/dev/null 2>&1 && continue
+    am__tar='tar chf - "$$tardir"'
+    am__tar_='tar chf - "$tardir"'
+    am__untar='tar xf -'
+    ;;
+  pax)
+    am__tar='pax -L -x $1 -w "$$tardir"'
+    am__tar_='pax -L -x $1 -w "$tardir"'
+    am__untar='pax -r'
+    ;;
+  cpio)
+    am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+    am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+    am__untar='cpio -i -H $1 -d'
+    ;;
+  none)
+    am__tar=false
+    am__tar_=false
+    am__untar=false
+    ;;
+  esac
+
+  # If the value was cached, stop now.  We just wanted to have am__tar
+  # and am__untar set.
+  test -n "${am_cv_prog_tar_$1}" && break
+
+  # tar/untar a dummy directory, and stop if the command works
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  echo GrepMe > conftest.dir/file
+  AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+  rm -rf conftest.dir
+  if test -s conftest.tar; then
+    AM_RUN_LOG([$am__untar <conftest.tar])
+    grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+  fi
+done
+rm -rf conftest.dir
+
+AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+AC_SUBST([am__tar])
+AC_SUBST([am__untar])
+]) # _AM_PROG_TAR
+
+m4_include([acinclude.m4])
diff --git a/ansi2knr.1 b/ansi2knr.1

new file mode 100644 (file)

index 0000000..f9ee5a6
--- /dev/null
+++ b/ansi2knr.1
@@ -0,0 +1,36 @@
+.TH ANSI2KNR 1 "19 Jan 1996"
+.SH NAME
+ansi2knr \- convert ANSI C to Kernighan & Ritchie C
+.SH SYNOPSIS
+.I ansi2knr
+[--varargs] input_file [output_file]
+.SH DESCRIPTION
+If no output_file is supplied, output goes to stdout.
+.br
+There are no error messages.
+.sp
+.I ansi2knr
+recognizes function definitions by seeing a non-keyword identifier at the left
+margin, followed by a left parenthesis, with a right parenthesis as the last
+character on the line, and with a left brace as the first token on the
+following line (ignoring possible intervening comments).  It will recognize a
+multi-line header provided that no intervening line ends with a left or right
+brace or a semicolon.  These algorithms ignore whitespace and comments, except
+that the function name must be the first thing on the line.
+.sp
+The following constructs will confuse it:
+.br
+     - Any other construct that starts at the left margin and follows the
+above syntax (such as a macro or function call).
+.br
+     - Some macros that tinker with the syntax of the function header.
+.sp
+The --varargs switch is obsolete, and is recognized only for
+backwards compatibility.  The present version of
+.I ansi2knr
+will always attempt to convert a ... argument to va_alist and va_dcl.
+.SH AUTHOR
+L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and
+continues to maintain the current version; most of the code in the current
+version is his work.  ansi2knr also includes contributions by Francois
+Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>.
diff --git a/ansi2knr.c b/ansi2knr.c

new file mode 100644 (file)

index 0000000..b646b85
--- /dev/null
+++ b/ansi2knr.c
@@ -0,0 +1,739 @@
+/* Copyright (C) 1989, 2000 Aladdin Enterprises.  All rights reserved. */
+
+/*$Id$*/
+/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
+
+/*
+ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY.  No author or distributor accepts responsibility to anyone for the
+consequences of using it or for whether it serves any particular purpose or
+works at all, unless he says so in writing.  Refer to the GNU General Public
+License (the "GPL") for full details.
+
+Everyone is granted permission to copy, modify and redistribute ansi2knr,
+but only under the conditions described in the GPL.  A copy of this license
+is supposed to have been given to you along with ansi2knr so you can know
+your rights and responsibilities.  It should be in a file named COPYLEFT,
+or, if there is no file named COPYLEFT, a file named COPYING.  Among other
+things, the copyright notice and this notice must be preserved on all
+copies.
+
+We explicitly state here what we believe is already implied by the GPL: if
+the ansi2knr program is distributed as a separate set of sources and a
+separate executable file which are aggregated on a storage medium together
+with another program, this in itself does not bring the other program under
+the GPL, nor does the mere fact that such a program or the procedures for
+constructing it invoke the ansi2knr executable bring any other part of the
+program under the GPL.
+*/
+
+/*
+ * Usage:
+       ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
+ * --filename provides the file name for the #line directive in the output,
+ * overriding input_file (if present).
+ * If no input_file is supplied, input is read from stdin.
+ * If no output_file is supplied, output goes to stdout.
+ * There are no error messages.
+ *
+ * ansi2knr recognizes function definitions by seeing a non-keyword
+ * identifier at the left margin, followed by a left parenthesis, with a
+ * right parenthesis as the last character on the line, and with a left
+ * brace as the first token on the following line (ignoring possible
+ * intervening comments and/or preprocessor directives), except that a line
+ * consisting of only
+ *     identifier1(identifier2)
+ * will not be considered a function definition unless identifier2 is
+ * the word "void", and a line consisting of
+ *     identifier1(identifier2, <<arbitrary>>)
+ * will not be considered a function definition.
+ * ansi2knr will recognize a multi-line header provided that no intervening
+ * line ends with a left or right brace or a semicolon.  These algorithms
+ * ignore whitespace, comments, and preprocessor directives, except that
+ * the function name must be the first thing on the line.  The following
+ * constructs will confuse it:
+ *     - Any other construct that starts at the left margin and
+ *         follows the above syntax (such as a macro or function call).
+ *     - Some macros that tinker with the syntax of function headers.
+ */
+
+/*
+ * The original and principal author of ansi2knr is L. Peter Deutsch
+ * <ghost@aladdin.com>.  Other authors are noted in the change history
+ * that follows (in reverse chronological order):
+
+       lpd 2000-04-12 backs out Eggert's changes because of bugs:
+       - concatlits didn't declare the type of its bufend argument;
+       - concatlits didn't recognize when it was inside a comment;
+       - scanstring could scan backward past the beginning of the string; when
+       - the check for \ + newline in scanstring was unnecessary.
+
+       2000-03-05  Paul Eggert  <eggert@twinsun.com>
+
+       Add support for concatenated string literals.
+       * ansi2knr.c (concatlits): New decl.
+       (main): Invoke concatlits to concatenate string literals.
+       (scanstring): Handle backslash-newline correctly.  Work with
+       character constants.  Fix bug when scanning backwards through
+       backslash-quote.  Check for unterminated strings.
+       (convert1): Parse character constants, too.
+       (appendline, concatlits): New functions.
+       * ansi2knr.1: Document this.
+
+       lpd 1999-08-17 added code to allow preprocessor directives
+               wherever comments are allowed
+       lpd 1999-04-12 added minor fixes from Pavel Roskin
+               <pavel_roskin@geocities.com> for clean compilation with
+               gcc -W -Wall
+       lpd 1999-03-22 added hack to recognize lines consisting of
+               identifier1(identifier2, xxx) as *not* being procedures
+       lpd 1999-02-03 made indentation of preprocessor commands consistent
+       lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
+               endless loop; quoted strings within an argument list
+               confused the parser
+       lpd 1999-01-24 added a check for write errors on the output,
+               suggested by Jim Meyering <meyering@ascend.com>
+       lpd 1998-11-09 added further hack to recognize identifier(void)
+               as being a procedure
+       lpd 1998-10-23 added hack to recognize lines consisting of
+               identifier1(identifier2) as *not* being procedures
+       lpd 1997-12-08 made input_file optional; only closes input and/or
+               output file if not stdin or stdout respectively; prints
+               usage message on stderr rather than stdout; adds
+               --filename switch (changes suggested by
+               <ceder@lysator.liu.se>)
+       lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
+               compilers that don't understand void, as suggested by
+               Tom Lane
+       lpd 1996-01-15 changed to require that the first non-comment token
+               on the line following a function header be a left brace,
+               to reduce sensitivity to macros, as suggested by Tom Lane
+               <tgl@sss.pgh.pa.us>
+       lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
+               undefined preprocessor symbols as 0; changed all #ifdefs
+               for configuration symbols to #ifs
+       lpd 1995-04-05 changed copyright notice to make it clear that
+               including ansi2knr in a program does not bring the entire
+               program under the GPL
+       lpd 1994-12-18 added conditionals for systems where ctype macros
+               don't handle 8-bit characters properly, suggested by
+               Francois Pinard <pinard@iro.umontreal.ca>;
+               removed --varargs switch (this is now the default)
+       lpd 1994-10-10 removed CONFIG_BROKETS conditional
+       lpd 1994-07-16 added some conditionals to help GNU `configure',
+               suggested by Francois Pinard <pinard@iro.umontreal.ca>;
+               properly erase prototype args in function parameters,
+               contributed by Jim Avera <jima@netcom.com>;
+               correct error in writeblanks (it shouldn't erase EOLs)
+       lpd 1989-xx-xx original version
+ */
+
+/* Most of the conditionals here are to make ansi2knr work with */
+/* or without the GNU configure machinery. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+
+#if HAVE_CONFIG_H
+
+/*
+   For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
+   This will define HAVE_CONFIG_H and so, activate the following lines.
+ */
+
+# if STDC_HEADERS || HAVE_STRING_H
+#  include <string.h>
+# else
+#  include <strings.h>
+# endif
+
+#else /* not HAVE_CONFIG_H */
+
+/* Otherwise do it the hard way */
+
+# ifdef BSD
+#  include <strings.h>
+# else
+#  ifdef VMS
+    extern int strlen(), strncmp();
+#  else
+#   include <string.h>
+#  endif
+# endif
+
+#endif /* not HAVE_CONFIG_H */
+
+#if STDC_HEADERS
+# include <stdlib.h>
+#else
+/*
+   malloc and free should be declared in stdlib.h,
+   but if you've got a K&R compiler, they probably aren't.
+ */
+# ifdef MSDOS
+#  include <malloc.h>
+# else
+#  ifdef VMS
+     extern char *malloc();
+     extern void free();
+#  else
+     extern char *malloc();
+     extern int free();
+#  endif
+# endif
+
+#endif
+
+/* Define NULL (for *very* old compilers). */
+#ifndef NULL
+# define NULL (0)
+#endif
+
+/*
+ * The ctype macros don't always handle 8-bit characters correctly.
+ * Compensate for this here.
+ */
+#ifdef isascii
+# undef HAVE_ISASCII           /* just in case */
+# define HAVE_ISASCII 1
+#else
+#endif
+#if STDC_HEADERS || !HAVE_ISASCII
+# define is_ascii(c) 1
+#else
+# define is_ascii(c) isascii(c)
+#endif
+
+#define is_space(c) (is_ascii(c) && isspace(c))
+#define is_alpha(c) (is_ascii(c) && isalpha(c))
+#define is_alnum(c) (is_ascii(c) && isalnum(c))
+
+/* Scanning macros */
+#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
+#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
+
+/* Forward references */
+char *ppdirforward();
+char *ppdirbackward();
+char *skipspace();
+char *scanstring();
+int writeblanks();
+int test1();
+int convert1();
+
+/* The main program */
+int
+main(argc, argv)
+    int argc;
+    char *argv[];
+{      FILE *in = stdin;
+       FILE *out = stdout;
+       char *filename = 0;
+       char *program_name = argv[0];
+       char *output_name = 0;
+#define bufsize 5000                   /* arbitrary size */
+       char *buf;
+       char *line;
+       char *more;
+       char *usage =
+         "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
+       /*
+        * In previous versions, ansi2knr recognized a --varargs switch.
+        * If this switch was supplied, ansi2knr would attempt to convert
+        * a ... argument to va_alist and va_dcl; if this switch was not
+        * supplied, ansi2knr would simply drop any such arguments.
+        * Now, ansi2knr always does this conversion, and we only
+        * check for this switch for backward compatibility.
+        */
+       int convert_varargs = 1;
+       int output_error;
+
+       while ( argc > 1 && argv[1][0] == '-' ) {
+         if ( !strcmp(argv[1], "--varargs") ) {
+           convert_varargs = 1;
+           argc--;
+           argv++;
+           continue;
+         }
+         if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
+           filename = argv[2];
+           argc -= 2;
+           argv += 2;
+           continue;
+         }
+         fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
+                 argv[1]);
+         fprintf(stderr, usage);
+         exit(1);
+       }
+       switch ( argc )
+          {
+       default:
+               fprintf(stderr, usage);
+               exit(0);
+       case 3:
+               output_name = argv[2];
+               out = fopen(output_name, "w");
+               if ( out == NULL ) {
+                 fprintf(stderr, "%s: Cannot open output file %s\n",
+                         program_name, output_name);
+                 exit(1);
+               }
+               /* falls through */
+       case 2:
+               in = fopen(argv[1], "r");
+               if ( in == NULL ) {
+                 fprintf(stderr, "%s: Cannot open input file %s\n",
+                         program_name, argv[1]);
+                 exit(1);
+               }
+               if ( filename == 0 )
+                 filename = argv[1];
+               /* falls through */
+       case 1:
+               break;
+          }
+       if ( filename )
+         fprintf(out, "#line 1 \"%s\"\n", filename);
+       buf = malloc(bufsize);
+       if ( buf == NULL )
+          {
+               fprintf(stderr, "Unable to allocate read buffer!\n");
+               exit(1);
+          }
+       line = buf;
+       while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
+          {
+test:          line += strlen(line);
+               switch ( test1(buf) )
+                  {
+               case 2:                 /* a function header */
+                       convert1(buf, out, 1, convert_varargs);
+                       break;
+               case 1:                 /* a function */
+                       /* Check for a { at the start of the next line. */
+                       more = ++line;
+f:                     if ( line >= buf + (bufsize - 1) ) /* overflow check */
+                         goto wl;
+                       if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
+                         goto wl;
+                       switch ( *skipspace(ppdirforward(more), 1) )
+                         {
+                         case '{':
+                           /* Definitely a function header. */
+                           convert1(buf, out, 0, convert_varargs);
+                           fputs(more, out);
+                           break;
+                         case 0:
+                           /* The next line was blank or a comment: */
+                           /* keep scanning for a non-comment. */
+                           line += strlen(line);
+                           goto f;
+                         default:
+                           /* buf isn't a function header, but */
+                           /* more might be. */
+                           fputs(buf, out);
+                           strcpy(buf, more);
+                           line = buf;
+                           goto test;
+                         }
+                       break;
+               case -1:                /* maybe the start of a function */
+                       if ( line != buf + (bufsize - 1) ) /* overflow check */
+                         continue;
+                       /* falls through */
+               default:                /* not a function */
+wl:                    fputs(buf, out);
+                       break;
+                  }
+               line = buf;
+          }
+       if ( line != buf )
+         fputs(buf, out);
+       free(buf);
+       if ( output_name ) {
+         output_error = ferror(out);
+         output_error |= fclose(out);
+       } else {                /* out == stdout */
+         fflush(out);
+         output_error = ferror(out);
+       }
+       if ( output_error ) {
+         fprintf(stderr, "%s: error writing to %s\n", program_name,
+                 (output_name ? output_name : "stdout"));
+         exit(1);
+       }
+       if ( in != stdin )
+         fclose(in);
+       return 0;
+}
+
+/*
+ * Skip forward or backward over one or more preprocessor directives.
+ */
+char *
+ppdirforward(p)
+    char *p;
+{
+    for (; *p == '#'; ++p) {
+       for (; *p != '\r' && *p != '\n'; ++p)
+           if (*p == 0)
+               return p;
+       if (*p == '\r' && p[1] == '\n')
+           ++p;
+    }
+    return p;
+}
+char *
+ppdirbackward(p, limit)
+    char *p;
+    char *limit;
+{
+    char *np = p;
+
+    for (;; p = --np) {
+       if (*np == '\n' && np[-1] == '\r')
+           --np;
+       for (; np > limit && np[-1] != '\r' && np[-1] != '\n'; --np)
+           if (np[-1] == 0)
+               return np;
+       if (*np != '#')
+           return p;
+    }
+}
+
+/*
+ * Skip over whitespace, comments, and preprocessor directives,
+ * in either direction.
+ */
+char *
+skipspace(p, dir)
+    char *p;
+    int dir;                   /* 1 for forward, -1 for backward */
+{
+    for ( ; ; ) {
+       while ( is_space(*p) )
+           p += dir;
+       if ( !(*p == '/' && p[dir] == '*') )
+           break;
+       p += dir;  p += dir;
+       while ( !(*p == '*' && p[dir] == '/') ) {
+           if ( *p == 0 )
+               return p;       /* multi-line comment?? */
+           p += dir;
+       }
+       p += dir;  p += dir;
+    }
+    return p;
+}
+
+/* Scan over a quoted string, in either direction. */
+char *
+scanstring(p, dir)
+    char *p;
+    int dir;
+{
+    for (p += dir; ; p += dir)
+       if (*p == '"' && p[-dir] != '\\')
+           return p + dir;
+}
+
+/*
+ * Write blanks over part of a string.
+ * Don't overwrite end-of-line characters.
+ */
+int
+writeblanks(start, end)
+    char *start;
+    char *end;
+{      char *p;
+       for ( p = start; p < end; p++ )
+         if ( *p != '\r' && *p != '\n' )
+           *p = ' ';
+       return 0;
+}
+
+/*
+ * Test whether the string in buf is a function definition.
+ * The string may contain and/or end with a newline.
+ * Return as follows:
+ *     0 - definitely not a function definition;
+ *     1 - definitely a function definition;
+ *     2 - definitely a function prototype (NOT USED);
+ *     -1 - may be the beginning of a function definition,
+ *             append another line and look again.
+ * The reason we don't attempt to convert function prototypes is that
+ * Ghostscript's declaration-generating macros look too much like
+ * prototypes, and confuse the algorithms.
+ */
+int
+test1(buf)
+    char *buf;
+{      char *p = buf;
+       char *bend;
+       char *endfn;
+       int contin;
+
+       if ( !isidfirstchar(*p) )
+         return 0;             /* no name at left margin */
+       bend = skipspace(ppdirbackward(buf + strlen(buf) - 1, buf), -1);
+       switch ( *bend )
+          {
+          case ';': contin = 0 /*2*/; break;
+          case ')': contin = 1; break;
+          case '{': return 0;          /* not a function */
+          case '}': return 0;          /* not a function */
+          default: contin = -1;
+          }
+       while ( isidchar(*p) )
+         p++;
+       endfn = p;
+       p = skipspace(p, 1);
+       if ( *p++ != '(' )
+         return 0;             /* not a function */
+       p = skipspace(p, 1);
+       if ( *p == ')' )
+         return 0;             /* no parameters */
+       /* Check that the apparent function name isn't a keyword. */
+       /* We only need to check for keywords that could be followed */
+       /* by a left parenthesis (which, unfortunately, is most of them). */
+          {    static char *words[] =
+                  {    "asm", "auto", "case", "char", "const", "double",
+                       "extern", "float", "for", "if", "int", "long",
+                       "register", "return", "short", "signed", "sizeof",
+                       "static", "switch", "typedef", "unsigned",
+                       "void", "volatile", "while", 0
+                  };
+               char **key = words;
+               char *kp;
+               unsigned len = endfn - buf;
+
+               while ( (kp = *key) != 0 )
+                  {    if ( strlen(kp) == len && !strncmp(kp, buf, len) )
+                         return 0;     /* name is a keyword */
+                       key++;
+                  }
+          }
+          {
+              char *id = p;
+              int len;
+              /*
+               * Check for identifier1(identifier2) and not
+               * identifier1(void), or identifier1(identifier2, xxxx).
+               */
+
+              while ( isidchar(*p) )
+                  p++;
+              len = p - id;
+              p = skipspace(p, 1);
+              if (*p == ',' ||
+                  (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
+                  )
+                  return 0;    /* not a function */
+          }
+       /*
+        * If the last significant character was a ), we need to count
+        * parentheses, because it might be part of a formal parameter
+        * that is a procedure.
+        */
+       if (contin > 0) {
+           int level = 0;
+
+           for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
+               level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
+           if (level > 0)
+               contin = -1;
+       }
+       return contin;
+}
+
+/* Convert a recognized function definition or header to K&R syntax. */
+int
+convert1(buf, out, header, convert_varargs)
+    char *buf;
+    FILE *out;
+    int header;                        /* Boolean */
+    int convert_varargs;       /* Boolean */
+{      char *endfn;
+       char *p;
+       /*
+        * The breaks table contains pointers to the beginning and end
+        * of each argument.
+        */
+       char **breaks;
+       unsigned num_breaks = 2;        /* for testing */
+       char **btop;
+       char **bp;
+       char **ap;
+       char *vararg = 0;
+
+       /* Pre-ANSI implementations don't agree on whether strchr */
+       /* is called strchr or index, so we open-code it here. */
+       for ( endfn = buf; *(endfn++) != '('; )
+         ;
+top:   p = endfn;
+       breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
+       if ( breaks == NULL )
+          {    /* Couldn't allocate break table, give up */
+               fprintf(stderr, "Unable to allocate break table!\n");
+               fputs(buf, out);
+               return -1;
+          }
+       btop = breaks + num_breaks * 2 - 2;
+       bp = breaks;
+       /* Parse the argument list */
+       do
+          {    int level = 0;
+               char *lp = NULL;
+               char *rp = NULL;
+               char *end = NULL;
+
+               if ( bp >= btop )
+                  {    /* Filled up break table. */
+                       /* Allocate a bigger one and start over. */
+                       free((char *)breaks);
+                       num_breaks <<= 1;
+                       goto top;
+                  }
+               *bp++ = p;
+               /* Find the end of the argument */
+               for ( ; end == NULL; p++ )
+                  {    switch(*p)
+                          {
+                          case ',':
+                               if ( !level ) end = p;
+                               break;
+                          case '(':
+                               if ( !level ) lp = p;
+                               level++;
+                               break;
+                          case ')':
+                               if ( --level < 0 ) end = p;
+                               else rp = p;
+                               break;
+                          case '/':
+                               if (p[1] == '*')
+                                   p = skipspace(p, 1) - 1;
+                               break;
+                          case '"':
+                              p = scanstring(p, 1) - 1;
+                              break;
+                          default:
+                               ;
+                          }
+                  }
+               /* Erase any embedded prototype parameters. */
+               if ( lp && rp )
+                 writeblanks(lp + 1, rp);
+               p--;                    /* back up over terminator */
+               /* Find the name being declared. */
+               /* This is complicated because of procedure and */
+               /* array modifiers. */
+               for ( ; ; )
+                  {    p = skipspace(p - 1, -1);
+                       switch ( *p )
+                          {
+                          case ']':    /* skip array dimension(s) */
+                          case ')':    /* skip procedure args OR name */
+                          {    int level = 1;
+                               while ( level )
+                                switch ( *--p )
+                                  {
+                                  case ']': case ')':
+                                      level++;
+                                      break;
+                                  case '[': case '(':
+                                      level--;
+                                      break;
+                                  case '/':
+                                      if (p > buf && p[-1] == '*')
+                                          p = skipspace(p, -1) + 1;
+                                      break;
+                                  case '"':
+                                      p = scanstring(p, -1) + 1;
+                                      break;
+                                  default: ;
+                                  }
+                          }
+                               if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
+                                  {    /* We found the name being declared */
+                                       while ( !isidfirstchar(*p) )
+                                         p = skipspace(p, 1) + 1;
+                                       goto found;
+                                  }
+                               break;
+                          default:
+                               goto found;
+                          }
+                  }
+found:         if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
+                 {     if ( convert_varargs )
+                         {     *bp++ = "va_alist";
+                               vararg = p-2;
+                         }
+                       else
+                         {     p++;
+                               if ( bp == breaks + 1 ) /* sole argument */
+                                 writeblanks(breaks[0], p);
+                               else
+                                 writeblanks(bp[-1] - 1, p);
+                               bp--;
+                         }
+                  }
+               else
+                  {    while ( isidchar(*p) ) p--;
+                       *bp++ = p+1;
+                  }
+               p = end;
+          }
+       while ( *p++ == ',' );
+       *bp = p;
+       /* Make a special check for 'void' arglist */
+       if ( bp == breaks+2 )
+          {    p = skipspace(breaks[0], 1);
+               if ( !strncmp(p, "void", 4) )
+                  {    p = skipspace(p+4, 1);
+                       if ( p == breaks[2] - 1 )
+                          {    bp = breaks;    /* yup, pretend arglist is empty */
+                               writeblanks(breaks[0], p + 1);
+                          }
+                  }
+          }
+       /* Put out the function name and left parenthesis. */
+       p = buf;
+       while ( p != endfn ) putc(*p, out), p++;
+       /* Put out the declaration. */
+       if ( header )
+         {     fputs(");", out);
+               for ( p = breaks[0]; *p; p++ )
+                 if ( *p == '\r' || *p == '\n' )
+                   putc(*p, out);
+         }
+       else
+         {     for ( ap = breaks+1; ap < bp; ap += 2 )
+                 {     p = *ap;
+                       while ( isidchar(*p) )
+                         putc(*p, out), p++;
+                       if ( ap < bp - 1 )
+                         fputs(", ", out);
+                 }
+               fputs(")  ", out);
+               /* Put out the argument declarations */
+               for ( ap = breaks+2; ap <= bp; ap += 2 )
+                 (*ap)[-1] = ';';
+               if ( vararg != 0 )
+                 {     *vararg = 0;
+                       fputs(breaks[0], out);          /* any prior args */
+                       fputs("va_dcl", out);           /* the final arg */
+                       fputs(bp[0], out);
+                 }
+               else
+                 fputs(breaks[0], out);
+         }
+       free((char *)breaks);
+       return 0;
+}
diff --git a/assert.c b/assert.c

new file mode 100644 (file)

index 0000000..b9b5559
--- /dev/null
+++ b/assert.c
@@ -0,0 +1,48 @@
+/* GMP assertion failure handler.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+__gmp_assert_header (const char *filename, int linenum)
+{
+  if (filename != NULL && filename[0] != '\0')
+    {
+      fprintf (stderr, "%s:", filename);
+      if (linenum != -1)
+        fprintf (stderr, "%d: ", linenum);
+    }
+}
+
+void
+__gmp_assert_fail (const char *filename, int linenum,
+                   const char *expr)
+{
+  __gmp_assert_header (filename, linenum);
+  fprintf (stderr, "GNU MP assertion failed: %s\n", expr);
+  abort();
+}
diff --git a/compat.c b/compat.c

new file mode 100644 (file)

index 0000000..7251bef
--- /dev/null
+++ b/compat.c
@@ -0,0 +1,49 @@
+/* Old function entrypoints retained for binary compatibility.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* mpn_divexact_by3 was a function in gmp 3.0.1, but as of gmp 3.1 it's a
+   macro calling mpn_divexact_by3c.  */
+mp_limb_t
+__MPN (divexact_by3) (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  return mpn_divexact_by3 (dst, src, size);
+}
+
+
+/* mpn_divmod_1 was a function in gmp 3.0.1 and earlier, but marked obsolete
+   in both gmp 2 and 3.  As of gmp 3.1 it's a macro calling mpn_divrem_1. */
+mp_limb_t
+__MPN (divmod_1) (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  return mpn_divmod_1 (dst, src, size, divisor);
+}
+
+
+/* mpz_legendre was a separate function in gmp 3.1.1 and earlier, but as of
+   4.0 it's a #define alias for mpz_jacobi.  */
+int
+__gmpz_legendre (mpz_srcptr a, mpz_srcptr b)
+{
+  return mpz_jacobi (a, b);
+}
diff --git a/config.guess b/config.guess

new file mode 100755 (executable)

index 0000000..d6e9acd
--- /dev/null
+++ b/config.guess
@@ -0,0 +1,926 @@
+#! /bin/sh
+#
+# GMP config.guess wrapper.
+
+
+# Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2011 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: config.guess
+#
+# Print the host system CPU-VENDOR-OS.
+#
+# configfsf.guess is run and its guess then sharpened up to take advantage
+# of the finer grained CPU types that GMP knows.
+
+
+# Expect to find configfsf.guess in the same directory as this config.guess
+configfsf_guess="`echo \"$0\" | sed 's/config.guess$/configfsf.guess/'`"
+if test "$configfsf_guess" = "$0"; then
+  echo "Cannot derive configfsf.guess from $0" 1>&2
+  exit 1
+fi
+if test -f "$configfsf_guess"; then
+  :
+else
+  echo "$configfsf_guess not found" 1>&2
+  exit 1
+fi
+
+# Setup a $SHELL with which to run configfsf.guess, using the same
+# $CONFIG_SHELL or /bin/sh as autoconf does when running config.guess
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Identify ourselves on --version, --help or errors
+if test $# != 0; then
+  echo "(GNU MP wrapped config.guess)"
+  $SHELL $configfsf_guess "$@"
+  exit 1
+fi
+
+guess_full=`$SHELL $configfsf_guess`
+if test $? != 0; then
+  exit 1
+fi
+
+guess_cpu=`echo "$guess_full" | sed 's/-.*$//'`
+guess_rest=`echo "$guess_full" | sed 's/^[^-]*//'`
+exact_cpu=
+
+
+# -------------------------------------------------------------------------
+# The following should look at the current guess and probe the system to
+# establish a better guess in exact_cpu.  Leave exact_cpu empty if probes
+# can't be done, or don't work.
+#
+# When a number of probes are done, test -z "$exact_cpu" can be used instead
+# of putting each probe under an "else" of the preceeding.  That can stop
+# the code getting horribly nested and marching off the right side of the
+# screen.
+
+# Note that when a compile-and-link is done in one step we need to remove .o
+# files, since lame C compilers generate these even when not asked.
+#
+
+dummy=dummy-$$
+trap 'rm -f $dummy.c $dummy.o $dummy.core $dummy ${dummy}1.s ${dummy}2.c ; exit 1' 1 2 15
+
+# Use $HOST_CC if defined. $CC may point to a cross-compiler
+if test x"$CC_FOR_BUILD" = x; then
+  if test x"$HOST_CC" != x; then
+    CC_FOR_BUILD="$HOST_CC"
+  else
+    if test x"$CC" != x; then
+      CC_FOR_BUILD="$CC"
+    else
+      echo 'dummy(){}' >$dummy.c
+      for c in cc gcc c89 c99; do
+         ($c $dummy.c -c) >/dev/null 2>&1
+         if test $? = 0; then
+             CC_FOR_BUILD="$c"; break
+         fi
+      done
+      rm -f $dummy.c $dummy.o
+      if test x"$CC_FOR_BUILD" = x; then
+       CC_FOR_BUILD=no_compiler_found
+      fi
+    fi
+  fi
+fi
+
+
+case "$guess_full" in
+
+alpha-*-*)
+  # configfsf.guess detects exact alpha cpu types for OSF and GNU/Linux, but
+  # not for *BSD and other systems.  We try to get an exact type for any
+  # plain "alpha" it leaves.
+  #
+  # configfsf.guess used to have a block of code not unlike this, but these
+  # days does its thing with Linux kernel /proc/cpuinfo or OSF psrinfo.
+  #
+  cat <<EOF >$dummy.s
+       .data
+Lformat:
+       .byte 37,100,45,37,120,10,0     # "%d-%x\n"
+       .text
+       .globl main
+       .align 4
+       .ent main
+main:
+       .frame \$30,16,\$26,0
+       ldgp \$29,0(\$27)
+       .prologue 1
+       .long 0x47e03d91 # implver \$17
+       lda \$2,-1
+       .long 0x47e20c21 # amask \$2,\$1
+       lda \$16,Lformat
+       not \$1,\$18
+       jsr \$26,printf
+       ldgp \$29,0(\$26)
+       mov 0,\$16
+       jsr \$26,exit
+       .end main
+EOF
+  $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+  if test "$?" = 0 ; then
+    case `./$dummy` in
+    0-0)       exact_cpu=alpha      ;;
+    1-0)       exact_cpu=alphaev5   ;;
+    1-1)       exact_cpu=alphaev56  ;;
+    1-101)     exact_cpu=alphapca56 ;;
+    2-303)     exact_cpu=alphaev6   ;;
+    2-307)     exact_cpu=alphaev67  ;;
+    2-1307)    exact_cpu=alphaev68  ;;
+    esac
+  fi
+  rm -f $dummy.s $dummy.o $dummy
+  ;;
+
+ia64*-*-*)
+  # CPUID[3] bits 24 to 31 is the processor family.  itanium2 is documented
+  # as 0x1f, plain itanium has been seen returning 0x07 on two systems, but
+  # haven't found any documentation on it as such.
+  #
+  # Defining both getcpuid and _getcpuid lets us ignore whether the system
+  # expects underscores or not.
+  #
+  # "unsigned long long" is always 64 bits, in fact on hpux in ilp32 mode
+  # (which is the default there), it's the only 64-bit type.
+  #
+  cat >${dummy}a.s <<EOF
+       .text
+       .global _getcpuid
+       .proc   _getcpuid
+_getcpuid:
+       mov     r8 = CPUID[r32] ;;
+       br.ret.sptk.many rp ;;
+       .endp   _getcpuid
+       .global getcpuid
+       .proc   getcpuid
+getcpuid:
+       mov     r8 = CPUID[r32] ;;
+       br.ret.sptk.many rp ;;
+       .endp   getcpuid
+EOF
+  cat >${dummy}b.c <<EOF
+#include <stdio.h>
+unsigned long long getcpuid ();
+int
+main ()
+{
+  if (getcpuid(0LL) == 0x49656E69756E6547LL && getcpuid(1LL) == 0x6C65746ELL)
+    {
+      /* "GenuineIntel" */
+      switch ((getcpuid(3LL) >> 24) & 0xFF) {
+      case 0x07: puts ("itanium");  break;
+      case 0x1F: puts ("itanium2"); break; /* McKinley, Madison */
+      case 0x20: puts ("itanium2"); break; /* Montecito */
+      }
+    }
+  return 0;
+}
+EOF
+  if $CC_FOR_BUILD ${dummy}a.s ${dummy}b.c -o $dummy >/dev/null 2>&1; then
+    exact_cpu=`./$dummy`
+  fi
+  rm -f ${dummy}a.s ${dummy}a.o ${dummy}b.c ${dummy}b.o $dummy $dummy.core core
+  ;;
+
+mips-*-irix[6789]*)
+  # IRIX 6 and up always has a 64-bit mips cpu
+  exact_cpu=mips64
+  ;;
+
+m68k-*-*)
+  # NetBSD (and presumably other *BSD) "sysctl hw.model" gives for example
+  #   hw.model = Apple Macintosh Quadra 610  (68040)
+  exact_cpu=`(sysctl hw.model) 2>/dev/null | sed -n 's/^.*\(680[012346]0\).*$/m\1/p'`
+  if test -z "$exact_cpu"; then
+    # Linux kernel 2.2 gives for example "CPU: 68020" (tabs in between).
+    exact_cpu=`sed -n 's/^CPU:.*\(680[012346]0\).*$/m\1/p' /proc/cpuinfo 2>/dev/null`
+  fi
+  if test -z "$exact_cpu"; then
+    # Try: movel #0,%d0; rts
+    # This is to check the compiler and our asm code works etc, before
+    # assuming failures below indicate cpu characteristics.
+    # .byte is used to avoid problems with assembler syntax variations.
+    # For testing, provoke failures by adding "illegal" possibly as
+    # ".byte 0x4A, 0xFC"
+    cat >$dummy.s <<EOF
+       .text
+       .globl main
+       .globl _main
+main:
+_main:
+       .byte   0x70, 0x00
+       .byte   0x4e, 0x75
+EOF
+    if ($CC_FOR_BUILD $dummy.s -o $dummy && ./$dummy) >/dev/null 2>&1; then
+
+      # $SHELL -c is used to execute ./$dummy below, since (./$dummy)
+      # 2>/dev/null still prints the SIGILL message on some shells.
+      #
+        # Try: movel #0,%d0
+        #      rtd #0
+        cat >$dummy.s <<EOF
+       .text
+       .globl main
+       .globl _main
+main:
+_main:
+       .byte   0x70, 0x00
+       .byte   0x4e, 0x74, 0x00, 0x00
+EOF
+        if $CC_FOR_BUILD $dummy.s -o $dummy >/dev/null 2>&1; then
+          $SHELL -c ./$dummy >/dev/null 2>&1
+         if test $? != 0; then
+            exact_cpu=m68000    # because rtd didn't work
+          fi
+        fi
+      #
+
+      if test -z "$exact_cpu"; then
+        # Try: trapf
+        #      movel #0,%d0
+        #      rts
+        # Another possibility for identifying 68000 and 68010 is the
+        # different value stored by "movem a0,(a0)+"
+        cat >$dummy.s <<EOF
+       .text
+       .globl main
+       .globl _main
+main:
+_main:
+       .byte   0x51, 0xFC
+       .byte   0x70, 0x00
+       .byte   0x4e, 0x75
+EOF
+        if $CC_FOR_BUILD $dummy.s -o $dummy >/dev/null 2>&1; then
+          $SHELL -c ./$dummy >/dev/null 2>&1
+         if test $? != 0; then
+            exact_cpu=m68010    # because trapf didn't work
+          fi
+        fi
+      fi
+
+      if test -z "$exact_cpu"; then
+        # Try: bfffo %d1{0:31},%d0
+        #      movel #0,%d0
+        #      rts
+        cat >$dummy.s <<EOF
+       .text
+       .globl main
+       .globl _main
+main:
+_main:
+       .byte   0xED, 0xC1, 0x00, 0x1F
+       .byte   0x70, 0x00
+       .byte   0x4e, 0x75
+EOF
+        if $CC_FOR_BUILD $dummy.s -o $dummy >/dev/null 2>&1; then
+          $SHELL -c ./$dummy >/dev/null 2>&1
+         if test $? != 0; then
+            exact_cpu=m68360  # cpu32, because bfffo didn't work
+          fi
+        fi
+      fi
+
+      if test -z "$exact_cpu"; then
+        # FIXME: Now we know 68020 or up, but how to detect 030, 040 and 060?
+        exact_cpu=m68020
+      fi
+    fi
+    rm -f $dummy.s $dummy.o $dummy $dummy.core core
+  fi
+  if test -z "$exact_cpu"; then
+    case "$guess_full" in
+      *-*-next* | *-*-openstep*)  # NeXTs are 68020 or better
+        exact_cpu=m68020 ;;
+    esac
+  fi
+  ;;
+
+
+rs6000-*-* | powerpc*-*-*)
+  # Enhancement: On MacOS the "machine" command prints for instance
+  # "ppc750".  Interestingly on powerpc970-apple-darwin6.8.5 it prints
+  # "ppc970" where there's no actual #define for 970 from NXGetLocalArchInfo
+  # (as noted below).  But the man page says the command is still "under
+  # development", so it doesn't seem wise to use it just yet, not while
+  # there's an alternative.
+  #
+  # Try to read the PVR.  mfpvr is a protected instruction, NetBSD, MacOS
+  # and AIX don't allow it in user mode, but the Linux kernel does.
+  #
+  # Using explicit bytes for mfpvr avoids worrying about assembler syntax
+  # and underscores.  "char"s are used instead of "int"s to avoid worrying
+  # whether sizeof(int)==4 or if it's the right endianness.
+  #
+  # Note this is no good on AIX, since a C function there is the address of
+  # a function descriptor, not actual code.  But this doesn't matter since
+  # AIX doesn't allow mfpvr anyway.
+  #
+  cat >$dummy.c <<\EOF
+#include <stdio.h>
+struct {
+  int   n;  /* force 4-byte alignment */
+  char  a[8];
+} getpvr = {
+  0,
+  {
+    0x7c, 0x7f, 0x42, 0xa6,  /* mfpvr r3 */
+    0x4e, 0x80, 0x00, 0x20,  /* blr      */
+  }
+};
+int
+main ()
+{
+  unsigned  (*fun)();
+  unsigned  pvr;
+
+  /* a separate "fun" variable is necessary for gcc 2.95.2 on MacOS,
+     it gets a compiler error on a combined cast and call */
+  fun = (unsigned (*)()) getpvr.a;
+  pvr = (*fun) ();
+
+  switch (pvr >> 16) {
+  case 0x0001: puts ("powerpc601");  break;
+  case 0x0003: puts ("powerpc603");  break;
+  case 0x0004: puts ("powerpc604");  break;
+  case 0x0006: puts ("powerpc603e"); break;
+  case 0x0007: puts ("powerpc603e"); break;  /* 603ev */
+  case 0x0008: puts ("powerpc750");  break;
+  case 0x0009: puts ("powerpc604e"); break;
+  case 0x000a: puts ("powerpc604e"); break;  /* 604ev5 */
+  case 0x000c: puts ("powerpc7400"); break;
+  case 0x0041: puts ("powerpc630");  break;
+  case 0x0050: puts ("powerpc860");  break;
+  case 0x8000: puts ("powerpc7450"); break;
+  case 0x8001: puts ("powerpc7455"); break;
+  case 0x8002: puts ("powerpc7457"); break;
+  case 0x8003: puts ("powerpc7447"); break; /* really 7447A */
+  case 0x800c: puts ("powerpc7410"); break;
+  }
+  return 0;
+}
+EOF
+  if ($CC_FOR_BUILD $dummy.c -o $dummy) >/dev/null 2>&1; then
+    # This style construct is needed on AIX 4.3 to suppress the SIGILL error
+    # from (*fun)().  Using $SHELL -c ./$dummy 2>/dev/null doesn't work.
+    { x=`./$dummy`; } 2>/dev/null
+    if test -n "$x"; then
+      exact_cpu=$x
+    fi
+  fi
+  rm -f $dummy.c $dummy.o $dummy $dummy.core
+
+  # Grep the linux kernel /proc/cpuinfo pseudo-file.
+  # Anything unrecognised is ignored, since of course we mustn't spit out
+  # a cpu type config.sub doesn't know.
+  if test -z "$exact_cpu" && test -f /proc/cpuinfo; then
+    x=`grep "^cpu[     ]" /proc/cpuinfo | head -n 1`
+    x=`echo $x | sed -n 's/^cpu[       ]*:[    ]*\([A-Za-z0-9]*\).*/\1/p'`
+    x=`echo $x | sed 's/PPC//'`
+    case $x in
+      601)     exact_cpu="power" ;;
+      603ev)   exact_cpu="powerpc603e" ;;
+      604ev5)  exact_cpu="powerpc604e" ;;
+      603 | 603e | 604 | 604e | 750 | 821 | 860 | 970)
+        exact_cpu="powerpc$x" ;;
+      POWER[4-9])
+        exact_cpu=`echo $x | sed "s;POWER;power;"` ;;
+    esac
+  fi
+
+  if test -z "$exact_cpu"; then
+    # On AIX, try looking at _system_configuration.  This is present in
+    # version 4 at least.
+    cat >$dummy.c <<EOF
+#include <stdio.h>
+#include <sys/systemcfg.h>
+int
+main ()
+{
+  switch (_system_configuration.implementation) {
+  /* Old versions of AIX don't have all these constants,
+     use ifdef for safety. */
+#ifdef POWER_RS2
+  case POWER_RS2:    puts ("power2");     break;
+#endif
+#ifdef POWER_601
+  case POWER_601:    puts ("power");      break;
+#endif
+#ifdef POWER_603
+  case POWER_603:    puts ("powerpc603"); break;
+#endif
+#ifdef POWER_604
+  case POWER_604:    puts ("powerpc604"); break;
+#endif
+#ifdef POWER_620
+  case POWER_620:    puts ("powerpc620"); break;
+#endif
+#ifdef POWER_630
+  case POWER_630:    puts ("powerpc630"); break;
+#endif
+  /* Dunno what this is, leave it out for now.
+  case POWER_A35:    puts ("powerpca35"); break;
+  */
+  /* This is waiting for a bit more info.
+  case POWER_RS64II: puts ("powerpcrs64ii"); break;
+  */
+#ifdef POWER_4
+  case POWER_4:    puts ("power4"); break;
+#endif
+#ifdef POWER_5
+  case POWER_5:    puts ("power5"); break;
+#endif
+#ifdef POWER_6
+  case POWER_6:    puts ("power6"); break;
+#endif
+#ifdef POWER_7
+  case POWER_7:    puts ("power7"); break;
+#endif
+  default:
+    if (_system_configuration.architecture == POWER_RS)
+      puts ("power");
+    else if (_system_configuration.width == 64)
+      puts ("powerpc64");
+  }
+  return 0;
+}
+EOF
+    if ($CC_FOR_BUILD $dummy.c -o $dummy) >/dev/null 2>&1; then
+      x=`./$dummy`
+      if test -n "$x"; then
+        exact_cpu=$x
+      fi
+    fi
+    rm -f $dummy.c $dummy.o $dummy
+  fi
+
+  if test -z "$exact_cpu"; then
+    # On MacOS X (or any Mach-O presumably), NXGetLocalArchInfo cpusubtype
+    # can tell us the exact cpu.
+    cat >$dummy.c <<EOF
+#include <stdio.h>
+#include <mach-o/arch.h>
+int
+main (void)
+{
+  const NXArchInfo *a = NXGetLocalArchInfo();
+  if (a->cputype == CPU_TYPE_POWERPC)
+    {
+      switch (a->cpusubtype) {
+      /* The following known to Darwin 1.3. */
+      case CPU_SUBTYPE_POWERPC_601:   puts ("powerpc601");  break;
+      case CPU_SUBTYPE_POWERPC_602:   puts ("powerpc602");  break;
+      case CPU_SUBTYPE_POWERPC_603:   puts ("powerpc603");  break;
+      case CPU_SUBTYPE_POWERPC_603e:  puts ("powerpc603e"); break;
+      case CPU_SUBTYPE_POWERPC_603ev: puts ("powerpc603e"); break;
+      case CPU_SUBTYPE_POWERPC_604:   puts ("powerpc604");  break;
+      case CPU_SUBTYPE_POWERPC_604e:  puts ("powerpc604e"); break;
+      case CPU_SUBTYPE_POWERPC_620:   puts ("powerpc620");  break;
+      case CPU_SUBTYPE_POWERPC_750:   puts ("powerpc750");  break;
+      case CPU_SUBTYPE_POWERPC_7400:  puts ("powerpc7400"); break;
+      case CPU_SUBTYPE_POWERPC_7450:  puts ("powerpc7450"); break;
+      /* Darwin 6.8.5 doesn't define the following */
+      case 0x8001:                    puts ("powerpc7455"); break;
+      case 0x8002:                    puts ("powerpc7457"); break;
+      case 0x8003:                    puts ("powerpc7447"); break;
+      case 100:                       puts ("powerpc970");  break;
+      }
+    }
+  return 0;
+}
+EOF
+    if ($CC_FOR_BUILD $dummy.c -o $dummy) >/dev/null 2>&1; then
+      x=`./$dummy`
+      if test -n "$x"; then
+        exact_cpu=$x
+      fi
+    fi
+    rm -f $dummy.c $dummy.o $dummy
+  fi
+  ;;
+
+sparc-*-* | sparc64-*-*)
+  # If we can recognise an actual v7 then $exact_cpu is set to "sparc" so as
+  # to short-circuit subsequent tests.
+
+  # Grep the linux kernel /proc/cpuinfo pseudo-file.
+  # A typical line is "cpu\t\t: TI UltraSparc II  (BlackBird)"
+  # See arch/sparc/kernel/cpu.c and arch/sparc64/kernel/cpu.c.
+  #
+  if test -f /proc/cpuinfo; then
+    if grep 'cpu.*Cypress' /proc/cpuinfo >/dev/null; then
+      exact_cpu="sparc"   # ie. v7
+    elif grep 'cpu.*Power-UP' /proc/cpuinfo >/dev/null; then
+      exact_cpu="sparc"   # ie. v7
+    elif grep 'cpu.*HyperSparc' /proc/cpuinfo >/dev/null; then
+      exact_cpu="sparcv8"
+    elif grep 'cpu.*SuperSparc' /proc/cpuinfo >/dev/null; then
+      exact_cpu="supersparc"
+    elif grep 'cpu.*MicroSparc' /proc/cpuinfo >/dev/null; then
+      exact_cpu="microsparc"
+    elif grep 'cpu.*MB86904' /proc/cpuinfo >/dev/null; then
+      # actually MicroSPARC-II
+      exact_cpu=microsparc
+    elif grep 'cpu.*UltraSparc T3' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparct3"
+    elif grep 'cpu.*UltraSparc T2' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparct2"
+    elif grep 'cpu.*UltraSparc T1' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparct1"
+    elif grep 'cpu.*UltraSparc III' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparc3"
+    elif grep 'cpu.*UltraSparc IIi' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparc2i"
+    elif grep 'cpu.*UltraSparc II' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparc2"
+    elif grep 'cpu.*UltraSparc' /proc/cpuinfo >/dev/null; then
+      exact_cpu="ultrasparc"
+    fi
+  fi
+
+  # Grep the output from sysinfo on SunOS.
+  # sysinfo has been seen living in /bin or in /usr/kvm
+  #    cpu0 is a "SuperSPARC Model 41 SPARCmodule" CPU
+  #    cpu0 is a "75 MHz TI,TMS390Z55" CPU
+  #
+  if test -z "$exact_cpu"; then
+    for i in sysinfo /usr/kvm/sysinfo; do
+      if $SHELL -c $i 2>/dev/null >conftest.dat; then
+        if grep 'cpu0 is a "SuperSPARC' conftest.dat >/dev/null; then
+          exact_cpu=supersparc
+          break
+        elif grep 'cpu0 is a .*TMS390Z5.' conftest.dat >/dev/null; then
+          # TMS390Z50 and TMS390Z55
+          exact_cpu=supersparc
+          break
+        fi
+      fi
+    done
+    rm -f conftest.dat
+  fi
+
+  # Grep the output from prtconf on Solaris.
+  # Use an explicit /usr/sbin, since that directory might not be in a normal
+  # user's path.
+  #
+  #     SUNW,UltraSPARC (driver not attached)
+  #     SUNW,UltraSPARC-II (driver not attached)
+  #     SUNW,UltraSPARC-IIi (driver not attached)
+  #     SUNW,UltraSPARC-III+ (driver not attached)
+  #     Ross,RT625 (driver not attached)
+  #     TI,TMS390Z50 (driver not attached)
+  #
+  # /usr/sbin/sysdef prints similar information, but includes all loadable
+  # cpu modules, not just the real cpu.
+  #
+  # We first try a plain prtconf, since that is known to work on older systems.
+  # But for newer T1 systems, that doesn't produce any useful output, we need
+  # "prtconf -vp" there.
+  #
+  for prtconfopt in "" "-vp"; do
+    if test -z "$exact_cpu"; then
+      if $SHELL -c "/usr/sbin/prtconf $prtconfopt" 2>/dev/null >conftest.dat; then
+       if grep 'SUNW,UltraSPARC-T3' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparct3
+       elif grep 'SUNW,UltraSPARC-T2' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparct2
+       elif grep 'SUNW,UltraSPARC-T1' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparct1
+       elif grep 'SUNW,UltraSPARC-III' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparc3
+       elif grep 'SUNW,UltraSPARC-IIi' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparc2i
+       elif grep 'SUNW,UltraSPARC-II' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparc2
+       elif grep 'SUNW,UltraSPARC' conftest.dat >/dev/null; then
+         exact_cpu=ultrasparc
+       elif grep 'Ross,RT62.' conftest.dat >/dev/null; then
+         # RT620, RT625, RT626 hypersparcs (v8).
+         exact_cpu=sparcv8
+       elif grep 'TI,TMS390Z5.' conftest.dat >/dev/null; then
+         # TMS390Z50 and TMS390Z55
+         exact_cpu=supersparc
+       elif grep 'TI,TMS390S10' conftest.dat >/dev/null; then
+         exact_cpu=microsparc
+       elif grep 'FMI,MB86904' conftest.dat >/dev/null; then
+         # actually MicroSPARC-II
+         exact_cpu=microsparc
+       fi
+      fi
+      rm -f conftest.dat
+    fi
+  done
+
+  # Grep the output from sysctl hw.model on sparc or sparc64 *BSD.
+  # Use an explicit /sbin, since that directory might not be in a normal
+  # user's path.  Example outputs,
+  #
+  #     hw.model: Sun Microsystems UltraSparc-IIi
+  #
+  if test -z "$exact_cpu"; then
+    if $SHELL -c "/sbin/sysctl hw.model" 2>/dev/null >conftest.dat; then
+      if grep -i 'UltraSparc-T3' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparct3
+      elif grep -i 'UltraSparc-T2' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparct2
+      elif grep -i 'UltraSparc-T1' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparct1
+      elif grep -i 'UltraSparc-III' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparc3
+      elif grep -i 'UltraSparc-IIi' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparc2i
+      elif grep -i 'UltraSparc-II' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparc2
+      elif grep -i 'UltraSparc' conftest.dat >/dev/null; then
+        exact_cpu=ultrasparc
+      elif grep 'TMS390Z5.' conftest.dat >/dev/null; then
+        # TMS390Z50 and TMS390Z55
+        exact_cpu=supersparc
+      elif grep 'TMS390S10' conftest.dat >/dev/null; then
+        exact_cpu=microsparc
+      elif grep 'MB86904' conftest.dat >/dev/null; then
+        # actually MicroSPARC-II
+        exact_cpu=microsparc
+      elif grep 'MB86907' conftest.dat >/dev/null; then
+        exact_cpu=turbosparc
+      fi
+    fi
+    rm -f conftest.dat
+  fi
+
+  # sun4m and sun4d are v8s of some sort, sun4u is a v9 of some sort
+  #
+  if test -z "$exact_cpu"; then
+    case `uname -m` in
+      sun4[md]) exact_cpu=sparcv8 ;;
+      sun4u)    exact_cpu=sparcv9 ;;
+    esac
+  fi
+  ;;
+
+
+# Recognise x86 processors using a tricky cpuid with 4 arguments, repeating
+# arguments; for x86-64 we effectively pass the 1st in rdx and the 2nd in rcx.
+# This allows the same asm to work for both standard and Windoze calling
+# conventions.
+
+i?86-*-* | amd64-*-* | x86_64-*-*)
+  cat <<EOF >${dummy}0.s
+       .globl cpuid
+       .globl _cpuid
+cpuid:
+_cpuid:
+       pushl %esi
+       pushl %ebx
+       movl 24(%esp),%eax
+       .byte 0x0f
+       .byte 0xa2
+       movl 20(%esp),%esi
+       movl %ebx,(%esi)
+       movl %edx,4(%esi)
+       movl %ecx,8(%esi)
+       popl %ebx
+       popl %esi
+       ret
+EOF
+  cat <<EOF >${dummy}1.s
+       .globl cpuid
+       .globl _cpuid
+cpuid:
+_cpuid:
+       push    %rbx
+       mov     %rdx, %r8
+       mov     %ecx, %eax
+       .byte   0x0f
+       .byte   0xa2
+       mov     %ebx, (%r8)
+       mov     %edx, 4(%r8)
+       mov     %ecx, 8(%r8)
+       pop     %rbx
+       ret
+EOF
+  cat <<EOF >${dummy}2.c
+#include <string.h>
+#include <stdio.h>
+#define CPUID(a,b) cpuid(b,a,a,b)
+#if __cplusplus
+extern "C"
+#endif
+unsigned int cpuid (int, char *, char *, int);
+int
+main ()
+{
+  char vendor_string[13];
+  char dummy_string[12];
+  long fms;
+  int family, model, stepping;
+  const char *modelstr;
+  int cpu_64bit = 0;
+
+  CPUID (vendor_string, 0);
+  vendor_string[12] = 0;
+
+  fms = CPUID (dummy_string, 1);
+
+  family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
+  model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+  stepping = fms & 0xf;
+
+  modelstr = "$guess_cpu";
+
+  /**************************************************/
+  /*** WARNING: keep this list in sync with fat.c ***/
+  /**************************************************/
+  if (strcmp (vendor_string, "GenuineIntel") == 0)
+    {
+      switch (family)
+       {
+       case 5:
+         if (model <= 2)       modelstr = "pentium";
+         else if (model >= 4)  modelstr = "pentiummmx";
+         break;
+       case 6:
+         if (model <= 1)               modelstr = "pentiumpro";
+         else if (model <= 6)          modelstr = "pentium2";
+         else if (model <= 8)          modelstr = "pentium3";
+         else if (model <= 9)          modelstr = "pentiumm";
+         else if (model <= 0x0c)       modelstr = "pentium3";
+         else if (model <= 0x0e)       modelstr = "pentiumm";
+         else if (model <= 0x19)       cpu_64bit = 1, modelstr = "core2";
+         else if (model == 0x1a)       cpu_64bit = 1, modelstr = "coreinhm"; /* NHM Gainestown */
+         else if (model == 0x1c)       cpu_64bit = 1, modelstr = "atom";  /* Silverthorne */
+         else if (model == 0x1d)       cpu_64bit = 1, modelstr = "core2"; /* PNR Dunnington */
+         else if (model == 0x1e)       cpu_64bit = 1, modelstr = "coreinhm"; /* NHM Lynnfield/Jasper */
+         else if (model == 0x25)       cpu_64bit = 1, modelstr = "coreiwsm"; /* WSM Clarkdale/Arrandale */
+         else if (model == 0x26)       cpu_64bit = 1, modelstr = "atom";  /* Lincroft */
+         else if (model == 0x27)       cpu_64bit = 1, modelstr = "atom";  /* Saltwell */
+         else if (model == 0x2a)       cpu_64bit = 1, modelstr = "coreisbr"; /* SB */
+         else if (model == 0x2c)       cpu_64bit = 1, modelstr = "coreiwsm"; /* WSM Gulftown */
+         else if (model == 0x2d)       cpu_64bit = 1, modelstr = "coreisbr"; /* SBC-EP */
+         else if (model == 0x2e)       cpu_64bit = 1, modelstr = "coreinhm"; /* NHM Beckton */
+         else if (model == 0x2f)       cpu_64bit = 1, modelstr = "coreiwsm"; /* WSM Eagleton */
+         else cpu_64bit = 1, modelstr = "corei"; /* default */
+         break;
+       case 15:
+         cpu_64bit = 1, modelstr = "pentium4";
+         break;
+       }
+    }
+  else if (strcmp (vendor_string, "AuthenticAMD") == 0)
+    {
+      switch (family)
+       {
+       case 5:
+         if (model <= 3)       modelstr = "k5";
+         else if (model <= 7)  modelstr = "k6";
+         else if (model == 8)  modelstr = "k62";
+         else if (model == 9)  modelstr = "k63";
+         else if (model == 10) modelstr = "geode";
+         else if (model == 13) modelstr = "k63";
+         break;
+       case 6:
+         modelstr = "athlon";
+         break;
+       case 15:                /* K8, K9 */
+         cpu_64bit = 1, modelstr = "k8";
+         break;
+       case 16:                /* K10 */
+         cpu_64bit = 1, modelstr = "k10";
+         break;
+       case 17:                /* Hybrid k8/k10, claim k8 */
+         cpu_64bit = 1, modelstr = "k8";
+         break;
+       case 18:                /* Llano, uses K10 core */
+         cpu_64bit = 1, modelstr = "k10";
+         break;
+       case 19:                /* AMD Internal, assume future K10 */
+         cpu_64bit = 1, modelstr = "k10";
+         break;
+       case 20:                /* Bobcat */
+         cpu_64bit = 1, modelstr = "bobcat";
+         break;
+       case 21:                /* Bulldozer */
+         cpu_64bit = 1, modelstr = "bulldozer";
+         break;
+       case 22:                /* AMD Internal, assume future bulldozer */
+         cpu_64bit = 1, modelstr = "bulldozer";
+         break;
+       }
+    }
+  else if (strcmp (vendor_string, "CyrixInstead") == 0)
+    {
+      /* Should recognize Cyrix' processors too.  */
+    }
+  else if (strcmp (vendor_string, "CentaurHauls") == 0)
+    {
+      switch (family)
+       {
+       case 6:
+         if (model < 9)        modelstr = "viac3";
+         else if (model < 15)  modelstr = "viac32";
+         else                  cpu_64bit = 1, modelstr = "nano";
+         break;
+       }
+    }
+
+  /* If our cpuid-based exact guess is more conservative than the previous
+     guess, revert.  This is of course wrong, but it can happen in an emulator,
+     so this workaround allows for successful 64-bit builds.  */
+  if (strcmp ("$guess_cpu", "x86_64") == 0 && ! cpu_64bit)
+    modelstr = "$guess_cpu";
+
+  printf ("%s", modelstr);
+  return 0;
+}
+EOF
+
+  if ($CC_FOR_BUILD ${dummy}1.s ${dummy}2.c -o $dummy) >/dev/null 2>&1; then
+    # On 80386 and early 80486 cpuid is not available and will result in a
+    # SIGILL message, hence 2>/dev/null.
+    #
+    # On i386-unknown-freebsd4.9, "/bin/sh -c ./dummy" seems to send an
+    # "Illegal instruction (core dumped)" message to stdout, so we test $?
+    # to check if the program run was successful.
+    #
+    x=`$SHELL -c ./$dummy 2>/dev/null`
+    if test $? = 0 && test -n "$x"; then
+      exact_cpu=$x
+    fi
+  fi
+
+  if test -z "$exact_cpu"; then
+  if ($CC_FOR_BUILD ${dummy}0.s ${dummy}2.c -o $dummy) >/dev/null 2>&1; then
+    # On 80386 and early 80486 cpuid is not available and will result in a
+    # SIGILL message, hence 2>/dev/null.
+    #
+    # On i386-unknown-freebsd4.9, "/bin/sh -c ./dummy" seems to send an
+    # "Illegal instruction (core dumped)" message to stdout, so we test $?
+    # to check if the program run was successful.
+    #
+    x=`$SHELL -c ./$dummy 2>/dev/null`
+    if test $? = 0 && test -n "$x"; then
+      exact_cpu=$x
+    fi
+  fi
+  fi
+
+  # We need to remove some .o files here since lame C compilers
+  # generate these even when not asked.
+  rm -f ${dummy}0.s ${dummy}0.o ${dummy}1.s ${dummy}1.o ${dummy}2.c ${dummy}2.o $dummy
+  ;;
+
+s390*-*-*)
+  model=`grep "^processor 0: version =" /proc/cpuinfo | sed -e 's/.*machine = //'`
+  case $model in
+    2064 | 2066) zcpu="z900" ;;
+    2084 | 2086) zcpu="z990" ;;
+    2094 | 2096) zcpu="z9"   ;;
+    2097 | 2098) zcpu="z10"  ;;
+    2817 | 2818 | *) zcpu="z196" ;;
+  esac
+  case "$guess_full" in
+    s390x-*-*) exact_cpu=${zcpu}    ;;
+    s390-*-*)  exact_cpu=${zcpu}esa ;;
+  esac
+  ;;
+
+esac
+
+
+
+# -------------------------------------------------------------------------
+# Use an exact cpu, if possible
+
+if test -n "$exact_cpu"; then
+  echo "$exact_cpu$guess_rest"
+else
+  echo "$guess_full"
+fi
+exit 0
+
+
+
+# Local variables:
+# fill-column: 76
+# End:
diff --git a/config.in b/config.in

new file mode 100644 (file)

index 0000000..8980159
--- /dev/null
+++ b/config.in
@@ -0,0 +1,594 @@
+/* config.in.  Generated from configure.in by autoheader.  */
+
+/*
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+*/
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* The gmp-mparam.h file (a string) the tune program should suggest updating.
+   */
+#undef GMP_MPARAM_H_SUGGEST
+
+/* Define to 1 if you have the `alarm' function. */
+#undef HAVE_ALARM
+
+/* Define to 1 if alloca() works (via gmp-impl.h). */
+#undef HAVE_ALLOCA
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+   */
+#undef HAVE_ALLOCA_H
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((const)) */
+#undef HAVE_ATTRIBUTE_CONST
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((malloc)) */
+#undef HAVE_ATTRIBUTE_MALLOC
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((mode (XX)))
+   */
+#undef HAVE_ATTRIBUTE_MODE
+
+/* Define to 1 if the compiler accepts gcc style __attribute__ ((noreturn)) */
+#undef HAVE_ATTRIBUTE_NORETURN
+
+/* Define to 1 if you have the `attr_get' function. */
+#undef HAVE_ATTR_GET
+
+/* Define to 1 if tests/libtests has calling conventions checking for the CPU
+   */
+#undef HAVE_CALLING_CONVENTIONS
+
+/* Define to 1 if you have the `clock' function. */
+#undef HAVE_CLOCK
+
+/* Define to 1 if you have the `clock_gettime' function. */
+#undef HAVE_CLOCK_GETTIME
+
+/* Define to 1 if you have the `cputime' function. */
+#undef HAVE_CPUTIME
+
+/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
+   */
+#undef HAVE_DECL_FGETC
+
+/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
+   */
+#undef HAVE_DECL_FSCANF
+
+/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
+   */
+#undef HAVE_DECL_OPTARG
+
+/* Define to 1 if you have the declaration of `sys_errlist', and to 0 if you
+   don't. */
+#undef HAVE_DECL_SYS_ERRLIST
+
+/* Define to 1 if you have the declaration of `sys_nerr', and to 0 if you
+   don't. */
+#undef HAVE_DECL_SYS_NERR
+
+/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
+   */
+#undef HAVE_DECL_UNGETC
+
+/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
+   don't. */
+#undef HAVE_DECL_VFPRINTF
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define one of the following to 1 for the format of a `double'.
+   If your format is not among these choices, or you don't know what it is,
+   then leave all undefined.
+   IEEE_LITTLE_SWAPPED means little endian, but with the two 4-byte halves
+   swapped, as used by ARM CPUs in little endian mode.  */
+#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
+#undef HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
+#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
+#undef HAVE_DOUBLE_VAX_D
+#undef HAVE_DOUBLE_VAX_G
+#undef HAVE_DOUBLE_CRAY_CFP
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define to 1 if you have the <float.h> header file. */
+#undef HAVE_FLOAT_H
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if you have the `getrusage' function. */
+#undef HAVE_GETRUSAGE
+
+/* Define to 1 if you have the `getsysinfo' function. */
+#undef HAVE_GETSYSINFO
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Define one of these to 1 for the host CPU family.
+   If your CPU is not in any of these families, leave all undefined.
+   For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
+#undef HAVE_HOST_CPU_FAMILY_alpha
+#undef HAVE_HOST_CPU_FAMILY_m68k
+#undef HAVE_HOST_CPU_FAMILY_power
+#undef HAVE_HOST_CPU_FAMILY_powerpc
+#undef HAVE_HOST_CPU_FAMILY_x86
+#undef HAVE_HOST_CPU_FAMILY_x86_64
+
+/* Define one of the following to 1 for the host CPU, as per the output of
+   ./config.guess.  If your CPU is not listed here, leave all undefined.  */
+#undef HAVE_HOST_CPU_alphaev67
+#undef HAVE_HOST_CPU_alphaev68
+#undef HAVE_HOST_CPU_alphaev7
+#undef HAVE_HOST_CPU_m68020
+#undef HAVE_HOST_CPU_m68030
+#undef HAVE_HOST_CPU_m68040
+#undef HAVE_HOST_CPU_m68060
+#undef HAVE_HOST_CPU_m68360
+#undef HAVE_HOST_CPU_powerpc604
+#undef HAVE_HOST_CPU_powerpc604e
+#undef HAVE_HOST_CPU_powerpc750
+#undef HAVE_HOST_CPU_powerpc7400
+#undef HAVE_HOST_CPU_supersparc
+#undef HAVE_HOST_CPU_i386
+#undef HAVE_HOST_CPU_i586
+#undef HAVE_HOST_CPU_i686
+#undef HAVE_HOST_CPU_pentium
+#undef HAVE_HOST_CPU_pentiummmx
+#undef HAVE_HOST_CPU_pentiumpro
+#undef HAVE_HOST_CPU_pentium2
+#undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_s390_z900
+#undef HAVE_HOST_CPU_s390_z990
+#undef HAVE_HOST_CPU_s390_z9
+#undef HAVE_HOST_CPU_s390_z10
+#undef HAVE_HOST_CPU_s390_z196
+
+/* Define to 1 iff we have a s390 with 64-bit registers.  */
+#undef HAVE_HOST_CPU_s390_zarch
+
+/* Define to 1 if the system has the type `intmax_t'. */
+#undef HAVE_INTMAX_T
+
+/* Define to 1 if the system has the type `intptr_t'. */
+#undef HAVE_INTPTR_T
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <invent.h> header file. */
+#undef HAVE_INVENT_H
+
+/* Define to 1 if you have the <langinfo.h> header file. */
+#undef HAVE_LANGINFO_H
+
+/* Define one of these to 1 for the endianness of `mp_limb_t'.
+   If the endianness is not a simple big or little, or you don't know what
+   it is, then leave both undefined. */
+#undef HAVE_LIMB_BIG_ENDIAN
+#undef HAVE_LIMB_LITTLE_ENDIAN
+
+/* Define to 1 if you have the `localeconv' function. */
+#undef HAVE_LOCALECONV
+
+/* Define to 1 if you have the <locale.h> header file. */
+#undef HAVE_LOCALE_H
+
+/* Define to 1 if the system has the type `long double'. */
+#undef HAVE_LONG_DOUBLE
+
+/* Define to 1 if the system has the type `long long'. */
+#undef HAVE_LONG_LONG
+
+/* Define to 1 if you have the <machine/hal_sysinfo.h> header file. */
+#undef HAVE_MACHINE_HAL_SYSINFO_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `memset' function. */
+#undef HAVE_MEMSET
+
+/* Define to 1 if you have the `mmap' function. */
+#undef HAVE_MMAP
+
+/* Define to 1 if you have the `mprotect' function. */
+#undef HAVE_MPROTECT
+
+/* Define to 1 each of the following for which a native (ie. CPU specific)
+    implementation of the corresponding routine exists.  */
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_n_sub_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addlsh1_n
+#undef HAVE_NATIVE_mpn_addlsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addmul_2
+#undef HAVE_NATIVE_mpn_addmul_3
+#undef HAVE_NATIVE_mpn_addmul_4
+#undef HAVE_NATIVE_mpn_addmul_5
+#undef HAVE_NATIVE_mpn_addmul_6
+#undef HAVE_NATIVE_mpn_addmul_7
+#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdiv_dbm1c
+#undef HAVE_NATIVE_mpn_bdiv_q_1
+#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#undef HAVE_NATIVE_mpn_com
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_divexact_1
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_lshiftc
+#undef HAVE_NATIVE_mpn_lshsub_n
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1_1p
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mod_1s_2p
+#undef HAVE_NATIVE_mpn_mod_1s_4p
+#undef HAVE_NATIVE_mpn_mod_34lsub1
+#undef HAVE_NATIVE_mpn_modexact_1_odd
+#undef HAVE_NATIVE_mpn_modexact_1c_odd
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_2
+#undef HAVE_NATIVE_mpn_mul_3
+#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_divrem_1
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_redc_1
+#undef HAVE_NATIVE_mpn_redc_2
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1add_nc
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsh1sub_nc
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n
+#undef HAVE_NATIVE_mpn_sublsh2_n
+#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_umul_ppmm_r
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n
+
+/* Define to 1 if you have the `nl_langinfo' function. */
+#undef HAVE_NL_LANGINFO
+
+/* Define to 1 if you have the <nl_types.h> header file. */
+#undef HAVE_NL_TYPES_H
+
+/* Define to 1 if you have the `obstack_vprintf' function. */
+#undef HAVE_OBSTACK_VPRINTF
+
+/* Define to 1 if you have the `popen' function. */
+#undef HAVE_POPEN
+
+/* Define to 1 if you have the `processor_info' function. */
+#undef HAVE_PROCESSOR_INFO
+
+/* Define to 1 if <sys/pstat.h> `struct pst_processor' exists and contains
+   `psp_iticksperclktick'. */
+#undef HAVE_PSP_ITICKSPERCLKTICK
+
+/* Define to 1 if you have the `pstat_getprocessor' function. */
+#undef HAVE_PSTAT_GETPROCESSOR
+
+/* Define to 1 if the system has the type `ptrdiff_t'. */
+#undef HAVE_PTRDIFF_T
+
+/* Define to 1 if the system has the type `quad_t'. */
+#undef HAVE_QUAD_T
+
+/* Define to 1 if you have the `raise' function. */
+#undef HAVE_RAISE
+
+/* Define to 1 if you have the `read_real_time' function. */
+#undef HAVE_READ_REAL_TIME
+
+/* Define to 1 if you have the `sigaction' function. */
+#undef HAVE_SIGACTION
+
+/* Define to 1 if you have the `sigaltstack' function. */
+#undef HAVE_SIGALTSTACK
+
+/* Define to 1 if you have the `sigstack' function. */
+#undef HAVE_SIGSTACK
+
+/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
+#undef HAVE_SPEED_CYCLECOUNTER
+
+/* Define to 1 if you have the <sstream> header file. */
+#undef HAVE_SSTREAM
+
+/* Define to 1 if the system has the type `stack_t'. */
+#undef HAVE_STACK_T
+
+/* Define to 1 if <stdarg.h> exists and works */
+#undef HAVE_STDARG
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if the system has the type `std::locale'. */
+#undef HAVE_STD__LOCALE
+
+/* Define to 1 if you have the `strchr' function. */
+#undef HAVE_STRCHR
+
+/* Define to 1 if you have the `strerror' function. */
+#undef HAVE_STRERROR
+
+/* Define to 1 if cpp supports the ANSI # stringizing operator. */
+#undef HAVE_STRINGIZE
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strnlen' function. */
+#undef HAVE_STRNLEN
+
+/* Define to 1 if you have the `strtol' function. */
+#undef HAVE_STRTOL
+
+/* Define to 1 if you have the `strtoul' function. */
+#undef HAVE_STRTOUL
+
+/* Define to 1 if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define to 1 if you have the `sysctl' function. */
+#undef HAVE_SYSCTL
+
+/* Define to 1 if you have the `sysctlbyname' function. */
+#undef HAVE_SYSCTLBYNAME
+
+/* Define to 1 if you have the `syssgi' function. */
+#undef HAVE_SYSSGI
+
+/* Define to 1 if you have the <sys/attributes.h> header file. */
+#undef HAVE_SYS_ATTRIBUTES_H
+
+/* Define to 1 if you have the <sys/iograph.h> header file. */
+#undef HAVE_SYS_IOGRAPH_H
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#undef HAVE_SYS_MMAN_H
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
+/* Define to 1 if you have the <sys/processor.h> header file. */
+#undef HAVE_SYS_PROCESSOR_H
+
+/* Define to 1 if you have the <sys/pstat.h> header file. */
+#undef HAVE_SYS_PSTAT_H
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#undef HAVE_SYS_RESOURCE_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/sysctl.h> header file. */
+#undef HAVE_SYS_SYSCTL_H
+
+/* Define to 1 if you have the <sys/sysinfo.h> header file. */
+#undef HAVE_SYS_SYSINFO_H
+
+/* Define to 1 if you have the <sys/syssgi.h> header file. */
+#undef HAVE_SYS_SYSSGI_H
+
+/* Define to 1 if you have the <sys/systemcfg.h> header file. */
+#undef HAVE_SYS_SYSTEMCFG_H
+
+/* Define to 1 if you have the <sys/times.h> header file. */
+#undef HAVE_SYS_TIMES_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the `times' function. */
+#undef HAVE_TIMES
+
+/* Define to 1 if the system has the type `uint_least32_t'. */
+#undef HAVE_UINT_LEAST32_T
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `vsnprintf' function and it works properly. */
+#undef HAVE_VSNPRINTF
+
+/* Assembler local label prefix */
+#undef LSYM_PREFIX
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LT_OBJDIR
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if the C compiler supports function prototypes. */
+#undef PROTOTYPES
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#undef RETSIGTYPE
+
+/* The size of `mp_limb_t', as computed by sizeof. */
+#undef SIZEOF_MP_LIMB_T
+
+/* The size of `unsigned', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED
+
+/* The size of `unsigned long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG
+
+/* The size of `unsigned short', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_SHORT
+
+/* The size of `void *', as computed by sizeof. */
+#undef SIZEOF_VOID_P
+
+/* Define to 1 if sscanf requires writable inputs */
+#undef SSCANF_WRITABLE_INPUT
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Maximum size the tune program can test for SQR_TOOM2_THRESHOLD */
+#undef TUNE_SQR_TOOM2_MAX
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to 1 to enable ASSERT checking, per --enable-assert */
+#undef WANT_ASSERT
+
+/* Define to 1 when building a fat binary. */
+#undef WANT_FAT_BINARY
+
+/* Define to 1 to enable FFTs for multiplication, per --enable-fft */
+#undef WANT_FFT
+
+/* Define to 1 to enable old mpn_mul_fft_full for multiplication, per
+   --enable-old-fft-full */
+#undef WANT_OLD_FFT_FULL
+
+/* Define to 1 if --enable-profiling=gprof */
+#undef WANT_PROFILING_GPROF
+
+/* Define to 1 if --enable-profiling=instrument */
+#undef WANT_PROFILING_INSTRUMENT
+
+/* Define to 1 if --enable-profiling=prof */
+#undef WANT_PROFILING_PROF
+
+/* Define one of these to 1 for the desired temporary memory allocation
+   method, per --enable-alloca. */
+#undef WANT_TMP_ALLOCA
+#undef WANT_TMP_REENTRANT
+#undef WANT_TMP_NOTREENTRANT
+#undef WANT_TMP_DEBUG
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
+
+/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
+   `char[]'. */
+#undef YYTEXT_POINTER
+
+/* Define like PROTOTYPES; this can be used by system headers. */
+#undef __PROTOTYPES
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#undef inline
+#endif
+
+/* Define to the equivalent of the C99 'restrict' keyword, or to
+   nothing if this is not supported.  Do not define if restrict is
+   supported directly.  */
+#undef restrict
+/* Work around a bug in Sun C++: it does not support _Restrict or
+   __restrict__, even though the corresponding Sun C compiler ends up with
+   "#define restrict _Restrict" or "#define restrict __restrict__" in the
+   previous line.  Perhaps some future version of Sun C++ will work with
+   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
+#if defined __SUNPRO_CC && !defined __RESTRICT
+# define _Restrict
+# define __restrict__
+#endif
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+   code using `volatile' can become incorrect without. Disable with care. */
+#undef volatile
diff --git a/config.sub b/config.sub

new file mode 100755 (executable)

index 0000000..4f68184
--- /dev/null
+++ b/config.sub
@@ -0,0 +1,148 @@
+#! /bin/sh
+#
+# GMP config.sub wrapper.
+
+
+# Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: config.sub CPU-VENDOR-OS
+#        config.sub ALIAS
+#
+# Validate and canonicalize the given configuration name, with special
+# handling for GMP extra CPU names.
+#
+# When the CPU isn't special the whole name is simply passed straight
+# through to configfsf.sub.
+#
+# When the CPU is a GMP extra, configfsf.sub is run on a similar CPU that it
+# will recognise.  For example "athlon-pc-freebsd3.5" is validated using
+# "i386-pc-freebsd3.5".
+#
+# Any canonicalizations made by configfsf.sub are preserved.  For example
+# given "athlon-linux", configfsf.sub is called with "i386-linux" and will
+# give back "i386-pc-linux-gnu".  "athlon" is then reinstated, so we print
+# "athlon-pc-linux-gnu".
+
+
+# Expect to find configfsf.sub in the same directory as this config.sub
+configfsf_sub="`echo \"$0\" | sed 's/config.sub$/configfsf.sub/'`"
+if test "$configfsf_sub" = "$0"; then
+  echo "Cannot derive configfsf.sub from $0" 1>&2
+  exit 1
+fi
+if test -f "$configfsf_sub"; then
+  :
+else
+  echo "$configfsf_sub not found" 1>&2
+  exit 1
+fi
+
+# Always run configfsf.sub with $SHELL, like autoconf does for config.sub
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Identify ourselves on --version, --help, etc
+case "$1" in
+"" | -*)
+  echo "(GNU MP wrapped config.sub)" 1>&2
+  $SHELL $configfsf_sub "$@"
+  exit
+  ;;
+esac
+
+given_full="$1"
+given_cpu=`echo "$given_full" | sed 's/-.*$//'`
+given_rest=`echo "$given_full" | sed 's/^[^-]*//'`
+
+
+# Aliases for GMP extras
+case "$given_cpu" in
+  # configfsf.sub turns p5 into i586, instead use our exact cpu type
+  p5 | p54)   given_cpu=pentium ;;
+  p55)        given_cpu=pentiummmx ;;
+
+  # configfsf.sub turns p6, pentiumii and pentiumiii into i686, instead use
+  # our exact cpu types
+  p6)         given_cpu=pentiumpro ;;
+  pentiumii)  given_cpu=pentium2 ;;
+  pentiumiii) given_cpu=pentium3 ;;
+esac
+given_full="$given_cpu$given_rest"
+
+
+# GMP extras and what to use for the config.sub test
+case "$given_cpu" in
+itanium | itanium2)
+  test_cpu=ia64 ;;
+pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
+  test_cpu=i386 ;;
+athlon64 | atom | core2 | corei | coreinhm | coreiwsm | coreisbr | opteron | k[89] | k10 | bobcat | bulldozer | nano)
+  test_cpu=x86_64 ;;
+power[2-9] | power2sc)
+  test_cpu=power ;;
+powerpc401 | powerpc403 | powerpc405 | \
+powerpc505 | \
+powerpc601 | powerpc602  | \
+powerpc603 | powerpc603e | \
+powerpc604 | powerpc604e | \
+powerpc620 | powerpc630  | powerpc970  | \
+powerpc740 | powerpc7400 | powerpc7450 | powerpc750  | \
+powerpc801 | powerpc821 | powerpc823  | powerpc860 | \
+powerpc64)
+  test_cpu=powerpc ;;
+sparcv8 | supersparc | microsparc | \
+ultrasparc | ultrasparc2 | ultrasparc2i | ultrasparc3 | ultrasparct[1234])
+  test_cpu=sparc ;;
+sh2)
+  test_cpu=sh ;;
+
+z900 | z990 | z9 | z10 | z196)
+  test_cpu=s390x;;
+z900esa | z990esa | z9esa | z10esa | z196esa)
+  test_cpu=s390;;
+
+*)
+  # Don't need or want to change the given name, just run configfsf.sub
+  $SHELL $configfsf_sub "$given_full"
+  if test $? = 0; then
+    exit 0
+  else
+    echo "(GNU MP wrapped config.sub, testing \"$given_full\")"
+    exit 1
+  fi
+esac
+
+
+test_full="$test_cpu$given_rest"
+canonical_full=`$SHELL $configfsf_sub "$test_full"`
+if test $? = 0; then
+  :
+else
+  echo "(GNU MP wrapped config.sub, testing \"$given_full\" as \"$test_full\")"
+  exit 1
+fi
+
+canonical_rest=`echo "$canonical_full" | sed 's/^[^-]*//'`
+echo "$given_cpu$canonical_rest"
+exit 0
+
+
+
+# Local variables:
+# fill-column: 76
+# End:
diff --git a/configfsf.guess b/configfsf.guess

new file mode 100644 (file)

index 0000000..187cd54
--- /dev/null
+++ b/configfsf.guess
@@ -0,0 +1,1511 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011 Free Software Foundation, Inc.
+
+timestamp='2011-02-02'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub.  If it succeeds, it prints the system name on stdout, and
+# exits with 0.  Otherwise, it exits with 1.
+#
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
+Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )        # Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > $dummy.c ;
+       for c in cc gcc c89 c99 ; do
+         if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+            CC_FOR_BUILD="$c"; break ;
+         fi ;
+       done ;
+       if test x"$CC_FOR_BUILD" = x ; then
+         CC_FOR_BUILD=no_compiler_found ;
+       fi
+       ;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+       PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+       # NetBSD (nbsd) targets should (where applicable) match one or
+       # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+       # *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+       # switched to ELF, *-*-netbsd* would select the old
+       # object file format.  This provides both forward
+       # compatibility and a consistent mechanism for selecting the
+       # object file format.
+       #
+       # Note: NetBSD doesn't particularly care about the vendor
+       # portion of the name.  We always set it to "unknown".
+       sysctl="sysctl -n hw.machine_arch"
+       UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+           /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+       case "${UNAME_MACHINE_ARCH}" in
+           armeb) machine=armeb-unknown ;;
+           arm*) machine=arm-unknown ;;
+           sh3el) machine=shl-unknown ;;
+           sh3eb) machine=sh-unknown ;;
+           sh5el) machine=sh5le-unknown ;;
+           *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+       esac
+       # The Operating System including object format, if it has switched
+       # to ELF recently, or will in the future.
+       case "${UNAME_MACHINE_ARCH}" in
+           arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+               eval $set_cc_for_build
+               if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+                       | grep -q __ELF__
+               then
+                   # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+                   # Return netbsd for either.  FIX?
+                   os=netbsd
+               else
+                   os=netbsdelf
+               fi
+               ;;
+           *)
+               os=netbsd
+               ;;
+       esac
+       # The OS release
+       # Debian GNU/NetBSD machines have a different userland, and
+       # thus, need a distinct triplet. However, they do not need
+       # kernel version information, so it can be replaced with a
+       # suitable tag, in the style of linux-gnu.
+       case "${UNAME_VERSION}" in
+           Debian*)
+               release='-gnu'
+               ;;
+           *)
+               release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+               ;;
+       esac
+       # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+       # contains redundant information, the shorter form:
+       # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+       echo "${machine}-${os}${release}"
+       exit ;;
+    *:OpenBSD:*:*)
+       UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+       echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+       exit ;;
+    *:ekkoBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+       exit ;;
+    *:SolidBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+       exit ;;
+    macppc:MirBSD:*:*)
+       echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+       exit ;;
+    *:MirBSD:*:*)
+       echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+       exit ;;
+    alpha:OSF1:*:*)
+       case $UNAME_RELEASE in
+       *4.0)
+               UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+               ;;
+       *5.*)
+               UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+               ;;
+       esac
+       # According to Compaq, /usr/sbin/psrinfo has been available on
+       # OSF/1 and Tru64 systems produced since 1995.  I hope that
+       # covers most systems running today.  This code pipes the CPU
+       # types through head -n 1, so we only detect the type of CPU 0.
+       ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+       case "$ALPHA_CPU_TYPE" in
+           "EV4 (21064)")
+               UNAME_MACHINE="alpha" ;;
+           "EV4.5 (21064)")
+               UNAME_MACHINE="alpha" ;;
+           "LCA4 (21066/21068)")
+               UNAME_MACHINE="alpha" ;;
+           "EV5 (21164)")
+               UNAME_MACHINE="alphaev5" ;;
+           "EV5.6 (21164A)")
+               UNAME_MACHINE="alphaev56" ;;
+           "EV5.6 (21164PC)")
+               UNAME_MACHINE="alphapca56" ;;
+           "EV5.7 (21164PC)")
+               UNAME_MACHINE="alphapca57" ;;
+           "EV6 (21264)")
+               UNAME_MACHINE="alphaev6" ;;
+           "EV6.7 (21264A)")
+               UNAME_MACHINE="alphaev67" ;;
+           "EV6.8CB (21264C)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.8AL (21264B)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.8CX (21264D)")
+               UNAME_MACHINE="alphaev68" ;;
+           "EV6.9A (21264/EV69A)")
+               UNAME_MACHINE="alphaev69" ;;
+           "EV7 (21364)")
+               UNAME_MACHINE="alphaev7" ;;
+           "EV7.9 (21364A)")
+               UNAME_MACHINE="alphaev79" ;;
+       esac
+       # A Pn.n version is a patched version.
+       # A Vn.n version is a released version.
+       # A Tn.n version is a released field test version.
+       # A Xn.n version is an unreleased experimental baselevel.
+       # 1.2 uses "1.2" for uname -r.
+       echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+       # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+       exitcode=$?
+       trap '' 0
+       exit $exitcode ;;
+    Alpha\ *:Windows_NT*:*)
+       # How do we know it's Interix rather than the generic POSIX subsystem?
+       # Should we change UNAME_MACHINE based on the output of uname instead
+       # of the specific Alpha model?
+       echo alpha-pc-interix
+       exit ;;
+    21064:Windows_NT:50:3)
+       echo alpha-dec-winnt3.5
+       exit ;;
+    Amiga*:UNIX_System_V:4.0:*)
+       echo m68k-unknown-sysv4
+       exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+       echo ${UNAME_MACHINE}-unknown-amigaos
+       exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+       echo ${UNAME_MACHINE}-unknown-morphos
+       exit ;;
+    *:OS/390:*:*)
+       echo i370-ibm-openedition
+       exit ;;
+    *:z/VM:*:*)
+       echo s390-ibm-zvmoe
+       exit ;;
+    *:OS400:*:*)
+       echo powerpc-ibm-os400
+       exit ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+       echo arm-acorn-riscix${UNAME_RELEASE}
+       exit ;;
+    arm:riscos:*:*|arm:RISCOS:*:*)
+       echo arm-unknown-riscos
+       exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+       echo hppa1.1-hitachi-hiuxmpp
+       exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+       # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+       if test "`(/bin/universe) 2>/dev/null`" = att ; then
+               echo pyramid-pyramid-sysv3
+       else
+               echo pyramid-pyramid-bsd
+       fi
+       exit ;;
+    NILE*:*:*:dcosx)
+       echo pyramid-pyramid-svr4
+       exit ;;
+    DRS?6000:unix:4.0:6*)
+       echo sparc-icl-nx6
+       exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+       case `/usr/bin/uname -p` in
+           sparc) echo sparc-icl-nx7; exit ;;
+       esac ;;
+    s390x:SunOS:*:*)
+       echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
+    sun4H:SunOS:5.*:*)
+       echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+       echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+       echo i386-pc-auroraux${UNAME_RELEASE}
+       exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+       eval $set_cc_for_build
+       SUN_ARCH="i386"
+       # If there is a compiler, see if it is configured for 64-bit objects.
+       # Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+       # This test works for both compilers.
+       if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+           if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+               (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+               grep IS_64BIT_ARCH >/dev/null
+           then
+               SUN_ARCH="x86_64"
+           fi
+       fi
+       echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
+    sun4*:SunOS:6*:*)
+       # According to config.sub, this is the proper way to canonicalize
+       # SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+       # it's likely to be more like Solaris than SunOS4.
+       echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
+    sun4*:SunOS:*:*)
+       case "`/usr/bin/arch -k`" in
+           Series*|S4*)
+               UNAME_RELEASE=`uname -v`
+               ;;
+       esac
+       # Japanese Language versions have a version number like `4.1.3-JL'.
+       echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+       exit ;;
+    sun3*:SunOS:*:*)
+       echo m68k-sun-sunos${UNAME_RELEASE}
+       exit ;;
+    sun*:*:4.2BSD:*)
+       UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+       test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+       case "`/bin/arch`" in
+           sun3)
+               echo m68k-sun-sunos${UNAME_RELEASE}
+               ;;
+           sun4)
+               echo sparc-sun-sunos${UNAME_RELEASE}
+               ;;
+       esac
+       exit ;;
+    aushp:SunOS:*:*)
+       echo sparc-auspex-sunos${UNAME_RELEASE}
+       exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+       echo m68k-atari-mint${UNAME_RELEASE}
+       exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+       echo m68k-atari-mint${UNAME_RELEASE}
+       exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+       echo m68k-atari-mint${UNAME_RELEASE}
+       exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+       echo m68k-milan-mint${UNAME_RELEASE}
+       exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+       echo m68k-hades-mint${UNAME_RELEASE}
+       exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+       echo m68k-unknown-mint${UNAME_RELEASE}
+       exit ;;
+    m68k:machten:*:*)
+       echo m68k-apple-machten${UNAME_RELEASE}
+       exit ;;
+    powerpc:machten:*:*)
+       echo powerpc-apple-machten${UNAME_RELEASE}
+       exit ;;
+    RISC*:Mach:*:*)
+       echo mips-dec-mach_bsd4.3
+       exit ;;
+    RISC*:ULTRIX:*:*)
+       echo mips-dec-ultrix${UNAME_RELEASE}
+       exit ;;
+    VAX*:ULTRIX*:*:*)
+       echo vax-dec-ultrix${UNAME_RELEASE}
+       exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+       echo clipper-intergraph-clix${UNAME_RELEASE}
+       exit ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+       int main (int argc, char *argv[]) {
+#else
+       int main (argc, argv) int argc; char *argv[]; {
+#endif
+       #if defined (host_mips) && defined (MIPSEB)
+       #if defined (SYSTYPE_SYSV)
+         printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+       #endif
+       #if defined (SYSTYPE_SVR4)
+         printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+       #endif
+       #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+         printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+       #endif
+       #endif
+         exit (-1);
+       }
+EOF
+       $CC_FOR_BUILD -o $dummy $dummy.c &&
+         dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+         SYSTEM_NAME=`$dummy $dummyarg` &&
+           { echo "$SYSTEM_NAME"; exit; }
+       echo mips-mips-riscos${UNAME_RELEASE}
+       exit ;;
+    Motorola:PowerMAX_OS:*:*)
+       echo powerpc-motorola-powermax
+       exit ;;
+    Motorola:*:4.3:PL8-*)
+       echo powerpc-harris-powermax
+       exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+       echo powerpc-harris-powermax
+       exit ;;
+    Night_Hawk:Power_UNIX:*:*)
+       echo powerpc-harris-powerunix
+       exit ;;
+    m88k:CX/UX:7*:*)
+       echo m88k-harris-cxux7
+       exit ;;
+    m88k:*:4*:R4*)
+       echo m88k-motorola-sysv4
+       exit ;;
+    m88k:*:3*:R3*)
+       echo m88k-motorola-sysv3
+       exit ;;
+    AViiON:dgux:*:*)
+       # DG/UX returns AViiON for all architectures
+       UNAME_PROCESSOR=`/usr/bin/uname -p`
+       if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+       then
+           if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+              [ ${TARGET_BINARY_INTERFACE}x = x ]
+           then
+               echo m88k-dg-dgux${UNAME_RELEASE}
+           else
+               echo m88k-dg-dguxbcs${UNAME_RELEASE}
+           fi
+       else
+           echo i586-dg-dgux${UNAME_RELEASE}
+       fi
+       exit ;;
+    M88*:DolphinOS:*:*)        # DolphinOS (SVR3)
+       echo m88k-dolphin-sysv3
+       exit ;;
+    M88*:*:R3*:*)
+       # Delta 88k system running SVR3
+       echo m88k-motorola-sysv3
+       exit ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+       echo m88k-tektronix-sysv3
+       exit ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+       echo m68k-tektronix-bsd
+       exit ;;
+    *:IRIX*:*:*)
+       echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+       exit ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+       echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+       exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+       echo i386-ibm-aix
+       exit ;;
+    ia64:AIX:*:*)
+       if [ -x /usr/bin/oslevel ] ; then
+               IBM_REV=`/usr/bin/oslevel`
+       else
+               IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+       fi
+       echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+       exit ;;
+    *:AIX:2:3)
+       if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+               eval $set_cc_for_build
+               sed 's/^                //' << EOF >$dummy.c
+               #include <sys/systemcfg.h>
+
+               main()
+                       {
+                       if (!__power_pc())
+                               exit(1);
+                       puts("powerpc-ibm-aix3.2.5");
+                       exit(0);
+                       }
+EOF
+               if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+               then
+                       echo "$SYSTEM_NAME"
+               else
+                       echo rs6000-ibm-aix3.2.5
+               fi
+       elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+               echo rs6000-ibm-aix3.2.4
+       else
+               echo rs6000-ibm-aix3.2
+       fi
+       exit ;;
+    *:AIX:*:[4567])
+       IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+       if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+               IBM_ARCH=rs6000
+       else
+               IBM_ARCH=powerpc
+       fi
+       if [ -x /usr/bin/oslevel ] ; then
+               IBM_REV=`/usr/bin/oslevel`
+       else
+               IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+       fi
+       echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+       exit ;;
+    *:AIX:*:*)
+       echo rs6000-ibm-aix
+       exit ;;
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+       echo romp-ibm-bsd4.4
+       exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+       echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+       exit ;;                             # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+       echo rs6000-bull-bosx
+       exit ;;
+    DPX/2?00:B.O.S.:*:*)
+       echo m68k-bull-sysv3
+       exit ;;
+    9000/[34]??:4.3bsd:1.*:*)
+       echo m68k-hp-bsd
+       exit ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+       echo m68k-hp-bsd4.4
+       exit ;;
+    9000/[34678]??:HP-UX:*:*)
+       HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+       case "${UNAME_MACHINE}" in
+           9000/31? )            HP_ARCH=m68000 ;;
+           9000/[34]?? )         HP_ARCH=m68k ;;
+           9000/[678][0-9][0-9])
+               if [ -x /usr/bin/getconf ]; then
+                   sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+                   sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+                   case "${sc_cpu_version}" in
+                     523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+                     528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+                     532)                      # CPU_PA_RISC2_0
+                       case "${sc_kernel_bits}" in
+                         32) HP_ARCH="hppa2.0n" ;;
+                         64) HP_ARCH="hppa2.0w" ;;
+                         '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+                       esac ;;
+                   esac
+               fi
+               if [ "${HP_ARCH}" = "" ]; then
+                   eval $set_cc_for_build
+                   sed 's/^            //' << EOF >$dummy.c
+
+               #define _HPUX_SOURCE
+               #include <stdlib.h>
+               #include <unistd.h>
+
+               int main ()
+               {
+               #if defined(_SC_KERNEL_BITS)
+                   long bits = sysconf(_SC_KERNEL_BITS);
+               #endif
+                   long cpu  = sysconf (_SC_CPU_VERSION);
+
+                   switch (cpu)
+                       {
+                       case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+                       case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+                       case CPU_PA_RISC2_0:
+               #if defined(_SC_KERNEL_BITS)
+                           switch (bits)
+                               {
+                               case 64: puts ("hppa2.0w"); break;
+                               case 32: puts ("hppa2.0n"); break;
+                               default: puts ("hppa2.0"); break;
+                               } break;
+               #else  /* !defined(_SC_KERNEL_BITS) */
+                           puts ("hppa2.0"); break;
+               #endif
+                       default: puts ("hppa1.0"); break;
+                       }
+                   exit (0);
+               }
+EOF
+                   (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+                   test -z "$HP_ARCH" && HP_ARCH=hppa
+               fi ;;
+       esac
+       if [ ${HP_ARCH} = "hppa2.0w" ]
+       then
+           eval $set_cc_for_build
+
+           # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+           # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+           # generating 64-bit code.  GNU and HP use different nomenclature:
+           #
+           # $ CC_FOR_BUILD=cc ./config.guess
+           # => hppa2.0w-hp-hpux11.23
+           # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+           # => hppa64-hp-hpux11.23
+
+           if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+               grep -q __LP64__
+           then
+               HP_ARCH="hppa2.0w"
+           else
+               HP_ARCH="hppa64"
+           fi
+       fi
+       echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+       exit ;;
+    ia64:HP-UX:*:*)
+       HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+       echo ia64-hp-hpux${HPUX_REV}
+       exit ;;
+    3050*:HI-UX:*:*)
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #include <unistd.h>
+       int
+       main ()
+       {
+         long cpu = sysconf (_SC_CPU_VERSION);
+         /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+            true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+            results, however.  */
+         if (CPU_IS_PA_RISC (cpu))
+           {
+             switch (cpu)
+               {
+                 case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+                 case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+                 case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+                 default: puts ("hppa-hitachi-hiuxwe2"); break;
+               }
+           }
+         else if (CPU_IS_HP_MC68K (cpu))
+           puts ("m68k-hitachi-hiuxwe2");
+         else puts ("unknown-hitachi-hiuxwe2");
+         exit (0);
+       }
+EOF
+       $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+               { echo "$SYSTEM_NAME"; exit; }
+       echo unknown-hitachi-hiuxwe2
+       exit ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+       echo hppa1.1-hp-bsd
+       exit ;;
+    9000/8??:4.3bsd:*:*)
+       echo hppa1.0-hp-bsd
+       exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+       echo hppa1.0-hp-mpeix
+       exit ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+       echo hppa1.1-hp-osf
+       exit ;;
+    hp8??:OSF1:*:*)
+       echo hppa1.0-hp-osf
+       exit ;;
+    i*86:OSF1:*:*)
+       if [ -x /usr/sbin/sysversion ] ; then
+           echo ${UNAME_MACHINE}-unknown-osf1mk
+       else
+           echo ${UNAME_MACHINE}-unknown-osf1
+       fi
+       exit ;;
+    parisc*:Lites*:*:*)
+       echo hppa1.1-hp-lites
+       exit ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+       echo c1-convex-bsd
+       exit ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+       if getsysinfo -f scalar_acc
+       then echo c32-convex-bsd
+       else echo c2-convex-bsd
+       fi
+       exit ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+       echo c34-convex-bsd
+       exit ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+       echo c38-convex-bsd
+       exit ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+       echo c4-convex-bsd
+       exit ;;
+    CRAY*Y-MP:*:*:*)
+       echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    CRAY*[A-Z]90:*:*:*)
+       echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+       | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+             -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+             -e 's/\.[^.]*$/.X/'
+       exit ;;
+    CRAY*TS:*:*:*)
+       echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    CRAY*T3E:*:*:*)
+       echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    CRAY*SV1:*:*:*)
+       echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    *:UNICOS/mp:*:*)
+       echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+       exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+       FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+       FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+       FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+       echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+       exit ;;
+    5000:UNIX_System_V:4.*:*)
+       FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+       FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+       echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+       exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+       echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+       exit ;;
+    sparc*:BSD/OS:*:*)
+       echo sparc-unknown-bsdi${UNAME_RELEASE}
+       exit ;;
+    *:BSD/OS:*:*)
+       echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+       exit ;;
+    *:FreeBSD:*:*)
+       case ${UNAME_MACHINE} in
+           pc98)
+               echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+           amd64)
+               echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+           *)
+               echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+       esac
+       exit ;;
+    i*:CYGWIN*:*)
+       echo ${UNAME_MACHINE}-pc-cygwin
+       exit ;;
+    *:MINGW*:*)
+       echo ${UNAME_MACHINE}-pc-mingw32
+       exit ;;
+    i*:windows32*:*)
+       # uname -m includes "-pc" on this system.
+       echo ${UNAME_MACHINE}-mingw32
+       exit ;;
+    i*:PW*:*)
+       echo ${UNAME_MACHINE}-pc-pw32
+       exit ;;
+    *:Interix*:*)
+       case ${UNAME_MACHINE} in
+           x86)
+               echo i586-pc-interix${UNAME_RELEASE}
+               exit ;;
+           authenticamd | genuineintel | EM64T)
+               echo x86_64-unknown-interix${UNAME_RELEASE}
+               exit ;;
+           IA64)
+               echo ia64-unknown-interix${UNAME_RELEASE}
+               exit ;;
+       esac ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+       echo i${UNAME_MACHINE}-pc-mks
+       exit ;;
+    8664:Windows_NT:*)
+       echo x86_64-pc-mks
+       exit ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+       # How do we know it's Interix rather than the generic POSIX subsystem?
+       # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+       # UNAME_MACHINE based on the output of uname instead of i386?
+       echo i586-pc-interix
+       exit ;;
+    i*:UWIN*:*)
+       echo ${UNAME_MACHINE}-pc-uwin
+       exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+       echo x86_64-unknown-cygwin
+       exit ;;
+    p*:CYGWIN*:*)
+       echo powerpcle-unknown-cygwin
+       exit ;;
+    prep*:SunOS:5.*:*)
+       echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+       exit ;;
+    *:GNU:*:*)
+       # the GNU system
+       echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+       exit ;;
+    *:GNU/*:*:*)
+       # other systems with GNU libc and userland
+       echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+       exit ;;
+    i*86:Minix:*:*)
+       echo ${UNAME_MACHINE}-pc-minix
+       exit ;;
+    alpha:Linux:*:*)
+       case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+         EV5)   UNAME_MACHINE=alphaev5 ;;
+         EV56)  UNAME_MACHINE=alphaev56 ;;
+         PCA56) UNAME_MACHINE=alphapca56 ;;
+         PCA57) UNAME_MACHINE=alphapca56 ;;
+         EV6)   UNAME_MACHINE=alphaev6 ;;
+         EV67)  UNAME_MACHINE=alphaev67 ;;
+         EV68*) UNAME_MACHINE=alphaev68 ;;
+       esac
+       objdump --private-headers /bin/sh | grep -q ld.so.1
+       if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+       echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+       exit ;;
+    arm*:Linux:*:*)
+       eval $set_cc_for_build
+       if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+           | grep -q __ARM_EABI__
+       then
+           echo ${UNAME_MACHINE}-unknown-linux-gnu
+       else
+           echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+       fi
+       exit ;;
+    avr32*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    cris:Linux:*:*)
+       echo cris-axis-linux-gnu
+       exit ;;
+    crisv32:Linux:*:*)
+       echo crisv32-axis-linux-gnu
+       exit ;;
+    frv:Linux:*:*)
+       echo frv-unknown-linux-gnu
+       exit ;;
+    i*86:Linux:*:*)
+       LIBC=gnu
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #ifdef __dietlibc__
+       LIBC=dietlibc
+       #endif
+EOF
+       eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+       echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
+       exit ;;
+    ia64:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    m32r*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    m68*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
+       eval $set_cc_for_build
+       sed 's/^        //' << EOF >$dummy.c
+       #undef CPU
+       #undef ${UNAME_MACHINE}
+       #undef ${UNAME_MACHINE}el
+       #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+       CPU=${UNAME_MACHINE}el
+       #else
+       #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+       CPU=${UNAME_MACHINE}
+       #else
+       CPU=
+       #endif
+       #endif
+EOF
+       eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
+       test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+       ;;
+    or32:Linux:*:*)
+       echo or32-unknown-linux-gnu
+       exit ;;
+    padre:Linux:*:*)
+       echo sparc-unknown-linux-gnu
+       exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+       echo hppa64-unknown-linux-gnu
+       exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+       # Look for CPU level
+       case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+         PA7*) echo hppa1.1-unknown-linux-gnu ;;
+         PA8*) echo hppa2.0-unknown-linux-gnu ;;
+         *)    echo hppa-unknown-linux-gnu ;;
+       esac
+       exit ;;
+    ppc64:Linux:*:*)
+       echo powerpc64-unknown-linux-gnu
+       exit ;;
+    ppc:Linux:*:*)
+       echo powerpc-unknown-linux-gnu
+       exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+       echo ${UNAME_MACHINE}-ibm-linux
+       exit ;;
+    sh64*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    sh*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    tile*:Linux:*:*)
+       echo ${UNAME_MACHINE}-tilera-linux-gnu
+       exit ;;
+    vax:Linux:*:*)
+       echo ${UNAME_MACHINE}-dec-linux-gnu
+       exit ;;
+    x86_64:Linux:*:*)
+       echo x86_64-unknown-linux-gnu
+       exit ;;
+    xtensa*:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    i*86:DYNIX/ptx:4*:*)
+       # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+       # earlier versions are messed up and put the nodename in both
+       # sysname and nodename.
+       echo i386-sequent-sysv4
+       exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+       # Unixware is an offshoot of SVR4, but it has its own version
+       # number series starting with 2...
+       # I am not positive that other SVR4 systems won't match this,
+       # I just have to hope.  -- rms.
+       # Use sysv4.2uw... so that sysv4* matches it.
+       echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+       exit ;;
+    i*86:OS/2:*:*)
+       # If we were able to find `uname', then EMX Unix compatibility
+       # is probably installed.
+       echo ${UNAME_MACHINE}-pc-os2-emx
+       exit ;;
+    i*86:XTS-300:*:STOP)
+       echo ${UNAME_MACHINE}-unknown-stop
+       exit ;;
+    i*86:atheos:*:*)
+       echo ${UNAME_MACHINE}-unknown-atheos
+       exit ;;
+    i*86:syllable:*:*)
+       echo ${UNAME_MACHINE}-pc-syllable
+       exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+       echo i386-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
+    i*86:*DOS:*:*)
+       echo ${UNAME_MACHINE}-pc-msdosdjgpp
+       exit ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+       UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+       if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+               echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+       else
+               echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+       fi
+       exit ;;
+    i*86:*:5:[678]*)
+       # UnixWare 7.x, OpenUNIX and OpenServer 6.
+       case `/bin/uname -X | grep "^Machine"` in
+           *486*)           UNAME_MACHINE=i486 ;;
+           *Pentium)        UNAME_MACHINE=i586 ;;
+           *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+       esac
+       echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+       exit ;;
+    i*86:*:3.2:*)
+       if test -f /usr/options/cb.name; then
+               UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+               echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+       elif /bin/uname -X 2>/dev/null >/dev/null ; then
+               UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+               (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+               (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+                       && UNAME_MACHINE=i586
+               (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+                       && UNAME_MACHINE=i686
+               (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+                       && UNAME_MACHINE=i686
+               echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+       else
+               echo ${UNAME_MACHINE}-pc-sysv32
+       fi
+       exit ;;
+    pc:*:*:*)
+       # Left here for compatibility:
+       # uname -m prints for DJGPP always 'pc', but it prints nothing about
+       # the processor, so we play safe by assuming i586.
+       # Note: whatever this is, it MUST be the same as what config.sub
+       # prints for the "djgpp" host, or else GDB configury will decide that
+       # this is a cross-build.
+       echo i586-pc-msdosdjgpp
+       exit ;;
+    Intel:Mach:3*:*)
+       echo i386-pc-mach3
+       exit ;;
+    paragon:*:*:*)
+       echo i860-intel-osf1
+       exit ;;
+    i860:*:4.*:*) # i860-SVR4
+       if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+         echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+       else # Add other i860-SVR4 vendors below as they are discovered.
+         echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+       fi
+       exit ;;
+    mini*:CTIX:SYS*5:*)
+       # "miniframe"
+       echo m68010-convergent-sysv
+       exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+       echo m68k-convergent-sysv
+       exit ;;
+    M680?0:D-NIX:5.3:*)
+       echo m68k-diab-dnix
+       exit ;;
+    M68*:*:R3V[5678]*:*)
+       test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+       OS_REL=''
+       test -r /etc/.relid \
+       && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+       /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+         && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+       /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+         && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+       /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+         && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+       OS_REL='.3'
+       test -r /etc/.relid \
+           && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+       /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+           && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+       /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+           && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+       /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+           && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+       echo m68k-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
+    mc68030:UNIX_System_V:4.*:*)
+       echo m68k-atari-sysv4
+       exit ;;
+    TSUNAMI:LynxOS:2.*:*)
+       echo sparc-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
+    rs6000:LynxOS:2.*:*)
+       echo rs6000-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+       echo powerpc-unknown-lynxos${UNAME_RELEASE}
+       exit ;;
+    SM[BE]S:UNIX_SV:*:*)
+       echo mips-dde-sysv${UNAME_RELEASE}
+       exit ;;
+    RM*:ReliantUNIX-*:*:*)
+       echo mips-sni-sysv4
+       exit ;;
+    RM*:SINIX-*:*:*)
+       echo mips-sni-sysv4
+       exit ;;
+    *:SINIX-*:*:*)
+       if uname -p 2>/dev/null >/dev/null ; then
+               UNAME_MACHINE=`(uname -p) 2>/dev/null`
+               echo ${UNAME_MACHINE}-sni-sysv4
+       else
+               echo ns32k-sni-sysv
+       fi
+       exit ;;
+    PENTIUM:*:4.0*:*)  # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+                       # says <Richard.M.Bartel@ccMail.Census.GOV>
+       echo i586-unisys-sysv4
+       exit ;;
+    *:UNIX_System_V:4*:FTX*)
+       # From Gerald Hewes <hewes@openmarket.com>.
+       # How about differentiating between stratus architectures? -djm
+       echo hppa1.1-stratus-sysv4
+       exit ;;
+    *:*:*:FTX*)
+       # From seanf@swdc.stratus.com.
+       echo i860-stratus-sysv4
+       exit ;;
+    i*86:VOS:*:*)
+       # From Paul.Green@stratus.com.
+       echo ${UNAME_MACHINE}-stratus-vos
+       exit ;;
+    *:VOS:*:*)
+       # From Paul.Green@stratus.com.
+       echo hppa1.1-stratus-vos
+       exit ;;
+    mc68*:A/UX:*:*)
+       echo m68k-apple-aux${UNAME_RELEASE}
+       exit ;;
+    news*:NEWS-OS:6*:*)
+       echo mips-sony-newsos6
+       exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+       if [ -d /usr/nec ]; then
+               echo mips-nec-sysv${UNAME_RELEASE}
+       else
+               echo mips-unknown-sysv${UNAME_RELEASE}
+       fi
+       exit ;;
+    BeBox:BeOS:*:*)    # BeOS running on hardware made by Be, PPC only.
+       echo powerpc-be-beos
+       exit ;;
+    BeMac:BeOS:*:*)    # BeOS running on Mac or Mac clone, PPC only.
+       echo powerpc-apple-beos
+       exit ;;
+    BePC:BeOS:*:*)     # BeOS running on Intel PC compatible.
+       echo i586-pc-beos
+       exit ;;
+    BePC:Haiku:*:*)    # Haiku running on Intel PC compatible.
+       echo i586-pc-haiku
+       exit ;;
+    SX-4:SUPER-UX:*:*)
+       echo sx4-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-5:SUPER-UX:*:*)
+       echo sx5-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-6:SUPER-UX:*:*)
+       echo sx6-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-7:SUPER-UX:*:*)
+       echo sx7-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-8:SUPER-UX:*:*)
+       echo sx8-nec-superux${UNAME_RELEASE}
+       exit ;;
+    SX-8R:SUPER-UX:*:*)
+       echo sx8r-nec-superux${UNAME_RELEASE}
+       exit ;;
+    Power*:Rhapsody:*:*)
+       echo powerpc-apple-rhapsody${UNAME_RELEASE}
+       exit ;;
+    *:Rhapsody:*:*)
+       echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+       exit ;;
+    *:Darwin:*:*)
+       UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+       case $UNAME_PROCESSOR in
+           i386)
+               eval $set_cc_for_build
+               if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+                 if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+                     (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+                     grep IS_64BIT_ARCH >/dev/null
+                 then
+                     UNAME_PROCESSOR="x86_64"
+                 fi
+               fi ;;
+           unknown) UNAME_PROCESSOR=powerpc ;;
+       esac
+       echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+       exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+       UNAME_PROCESSOR=`uname -p`
+       if test "$UNAME_PROCESSOR" = "x86"; then
+               UNAME_PROCESSOR=i386
+               UNAME_MACHINE=pc
+       fi
+       echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+       exit ;;
+    *:QNX:*:4*)
+       echo i386-pc-qnx
+       exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+       echo neo-tandem-nsk${UNAME_RELEASE}
+       exit ;;
+    NSE-?:NONSTOP_KERNEL:*:*)
+       echo nse-tandem-nsk${UNAME_RELEASE}
+       exit ;;
+    NSR-?:NONSTOP_KERNEL:*:*)
+       echo nsr-tandem-nsk${UNAME_RELEASE}
+       exit ;;
+    *:NonStop-UX:*:*)
+       echo mips-compaq-nonstopux
+       exit ;;
+    BS2000:POSIX*:*:*)
+       echo bs2000-siemens-sysv
+       exit ;;
+    DS/*:UNIX_System_V:*:*)
+       echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+       exit ;;
+    *:Plan9:*:*)
+       # "uname -m" is not consistent, so use $cputype instead. 386
+       # is converted to i386 for consistency with other x86
+       # operating systems.
+       if test "$cputype" = "386"; then
+           UNAME_MACHINE=i386
+       else
+           UNAME_MACHINE="$cputype"
+       fi
+       echo ${UNAME_MACHINE}-unknown-plan9
+       exit ;;
+    *:TOPS-10:*:*)
+       echo pdp10-unknown-tops10
+       exit ;;
+    *:TENEX:*:*)
+       echo pdp10-unknown-tenex
+       exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+       echo pdp10-dec-tops20
+       exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+       echo pdp10-xkl-tops20
+       exit ;;
+    *:TOPS-20:*:*)
+       echo pdp10-unknown-tops20
+       exit ;;
+    *:ITS:*:*)
+       echo pdp10-unknown-its
+       exit ;;
+    SEI:*:*:SEIUX)
+       echo mips-sei-seiux${UNAME_RELEASE}
+       exit ;;
+    *:DragonFly:*:*)
+       echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+       exit ;;
+    *:*VMS:*:*)
+       UNAME_MACHINE=`(uname -p) 2>/dev/null`
+       case "${UNAME_MACHINE}" in
+           A*) echo alpha-dec-vms ; exit ;;
+           I*) echo ia64-dec-vms ; exit ;;
+           V*) echo vax-dec-vms ; exit ;;
+       esac ;;
+    *:XENIX:*:SysV)
+       echo i386-pc-xenix
+       exit ;;
+    i*86:skyos:*:*)
+       echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+       exit ;;
+    i*86:rdos:*:*)
+       echo ${UNAME_MACHINE}-pc-rdos
+       exit ;;
+    i*86:AROS:*:*)
+       echo ${UNAME_MACHINE}-pc-aros
+       exit ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+       "4"
+#else
+       ""
+#endif
+       ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+  printf ("arm-acorn-riscix\n"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+  printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+    struct utsname un;
+
+    uname(&un);
+
+    if (strncmp(un.version, "V2", 2) == 0) {
+       printf ("i386-sequent-ptx2\n"); exit (0);
+    }
+    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+       printf ("i386-sequent-ptx1\n"); exit (0);
+    }
+    printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+       { echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+    case `getsysinfo -f cpu_type` in
+    c1*)
+       echo c1-convex-bsd
+       exit ;;
+    c2*)
+       if getsysinfo -f scalar_acc
+       then echo c32-convex-bsd
+       else echo c2-convex-bsd
+       fi
+       exit ;;
+    c34*)
+       echo c34-convex-bsd
+       exit ;;
+    c38*)
+       echo c38-convex-bsd
+       exit ;;
+    c4*)
+       echo c4-convex-bsd
+       exit ;;
+    esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+and
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/configfsf.sub b/configfsf.sub

new file mode 100644 (file)

index 0000000..30fdca8
--- /dev/null
+++ b/configfsf.sub
@@ -0,0 +1,1760 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011 Free Software Foundation, Inc.
+
+timestamp='2011-03-23'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted GNU ChangeLog entry.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#      CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#      CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
+Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )        # Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+       -sun*os*)
+               # Prevent following clause from handling this invalid input.
+               ;;
+       -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+       -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+       -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+       -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+       -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+       -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+       -apple | -axis | -knuth | -cray | -microblaze)
+               os=
+               basic_machine=$1
+               ;;
+       -bluegene*)
+               os=-cnk
+               ;;
+       -sim | -cisco | -oki | -wec | -winbond)
+               os=
+               basic_machine=$1
+               ;;
+       -scout)
+               ;;
+       -wrs)
+               os=-vxworks
+               basic_machine=$1
+               ;;
+       -chorusos*)
+               os=-chorusos
+               basic_machine=$1
+               ;;
+       -chorusrdb)
+               os=-chorusrdb
+               basic_machine=$1
+               ;;
+       -hiux*)
+               os=-hiuxwe2
+               ;;
+       -sco6)
+               os=-sco5v6
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -sco5)
+               os=-sco3.2v5
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -sco4)
+               os=-sco3.2v4
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -sco3.2.[4-9]*)
+               os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -sco3.2v[4-9]*)
+               # Don't forget version if it is 3.2v4 or newer.
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -sco5v6*)
+               # Don't forget version if it is 3.2v4 or newer.
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -sco*)
+               os=-sco3.2v2
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -udk*)
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -isc)
+               os=-isc2.2
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -clix*)
+               basic_machine=clipper-intergraph
+               ;;
+       -isc*)
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+               ;;
+       -lynx*)
+               os=-lynxos
+               ;;
+       -ptx*)
+               basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+               ;;
+       -windowsnt*)
+               os=`echo $os | sed -e 's/windowsnt/winnt/'`
+               ;;
+       -psos*)
+               os=-psos
+               ;;
+       -mint | -mint[0-9]*)
+               basic_machine=m68k-atari
+               os=-mint
+               ;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+       # Recognize the basic CPU types without company name.
+       # Some are omitted here because they have special meanings below.
+       1750a | 580 \
+       | a29k \
+       | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+       | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+       | am33_2.0 \
+       | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+       | bfin \
+       | c4x | clipper \
+       | d10v | d30v | dlx | dsp16xx \
+       | fido | fr30 | frv \
+       | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+       | i370 | i860 | i960 | ia64 \
+       | ip2k | iq2000 \
+       | lm32 \
+       | m32c | m32r | m32rle | m68000 | m68k | m88k \
+       | maxq | mb | microblaze | mcore | mep | metag \
+       | mips | mipsbe | mipseb | mipsel | mipsle \
+       | mips16 \
+       | mips64 | mips64el \
+       | mips64octeon | mips64octeonel \
+       | mips64orion | mips64orionel \
+       | mips64r5900 | mips64r5900el \
+       | mips64vr | mips64vrel \
+       | mips64vr4100 | mips64vr4100el \
+       | mips64vr4300 | mips64vr4300el \
+       | mips64vr5000 | mips64vr5000el \
+       | mips64vr5900 | mips64vr5900el \
+       | mipsisa32 | mipsisa32el \
+       | mipsisa32r2 | mipsisa32r2el \
+       | mipsisa64 | mipsisa64el \
+       | mipsisa64r2 | mipsisa64r2el \
+       | mipsisa64sb1 | mipsisa64sb1el \
+       | mipsisa64sr71k | mipsisa64sr71kel \
+       | mipstx39 | mipstx39el \
+       | mn10200 | mn10300 \
+       | moxie \
+       | mt \
+       | msp430 \
+       | nds32 | nds32le | nds32be \
+       | nios | nios2 \
+       | ns16k | ns32k \
+       | open8 \
+       | or32 \
+       | pdp10 | pdp11 | pj | pjl \
+       | powerpc | powerpc64 | powerpc64le | powerpcle \
+       | pyramid \
+       | rx \
+       | score \
+       | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+       | sh64 | sh64le \
+       | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+       | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+       | spu \
+       | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+       | ubicom32 \
+       | v850 | v850e \
+       | we32k \
+       | x86 | xc16x | xstormy16 | xtensa \
+       | z8k | z80)
+               basic_machine=$basic_machine-unknown
+               ;;
+       c54x)
+               basic_machine=tic54x-unknown
+               ;;
+       c55x)
+               basic_machine=tic55x-unknown
+               ;;
+       c6x)
+               basic_machine=tic6x-unknown
+               ;;
+       m6811 | m68hc11 | m6812 | m68hc12 | picochip)
+               # Motorola 68HC11/12.
+               basic_machine=$basic_machine-unknown
+               os=-none
+               ;;
+       m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+               ;;
+       ms1)
+               basic_machine=mt-unknown
+               ;;
+
+       strongarm | thumb | xscale)
+               basic_machine=arm-unknown
+               ;;
+
+       xscaleeb)
+               basic_machine=armeb-unknown
+               ;;
+
+       xscaleel)
+               basic_machine=armel-unknown
+               ;;
+
+       # We use `pc' rather than `unknown'
+       # because (1) that's what they normally are, and
+       # (2) the word "unknown" tends to confuse beginning users.
+       i*86 | x86_64)
+         basic_machine=$basic_machine-pc
+         ;;
+       # Object if more than one company name word.
+       *-*-*)
+               echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+               exit 1
+               ;;
+       # Recognize the basic CPU types with company name.
+       580-* \
+       | a29k-* \
+       | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+       | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+       | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+       | arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+       | avr-* | avr32-* \
+       | bfin-* | bs2000-* \
+       | c[123]* | c30-* | [cjt]90-* | c4x-* \
+       | clipper-* | craynv-* | cydra-* \
+       | d10v-* | d30v-* | dlx-* \
+       | elxsi-* \
+       | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+       | h8300-* | h8500-* \
+       | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+       | i*86-* | i860-* | i960-* | ia64-* \
+       | ip2k-* | iq2000-* \
+       | lm32-* \
+       | m32c-* | m32r-* | m32rle-* \
+       | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+       | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
+       | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+       | mips16-* \
+       | mips64-* | mips64el-* \
+       | mips64octeon-* | mips64octeonel-* \
+       | mips64orion-* | mips64orionel-* \
+       | mips64r5900-* | mips64r5900el-* \
+       | mips64vr-* | mips64vrel-* \
+       | mips64vr4100-* | mips64vr4100el-* \
+       | mips64vr4300-* | mips64vr4300el-* \
+       | mips64vr5000-* | mips64vr5000el-* \
+       | mips64vr5900-* | mips64vr5900el-* \
+       | mipsisa32-* | mipsisa32el-* \
+       | mipsisa32r2-* | mipsisa32r2el-* \
+       | mipsisa64-* | mipsisa64el-* \
+       | mipsisa64r2-* | mipsisa64r2el-* \
+       | mipsisa64sb1-* | mipsisa64sb1el-* \
+       | mipsisa64sr71k-* | mipsisa64sr71kel-* \
+       | mipstx39-* | mipstx39el-* \
+       | mmix-* \
+       | mt-* \
+       | msp430-* \
+       | nds32-* | nds32le-* | nds32be-* \
+       | nios-* | nios2-* \
+       | none-* | np1-* | ns16k-* | ns32k-* \
+       | open8-* \
+       | orion-* \
+       | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+       | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+       | pyramid-* \
+       | romp-* | rs6000-* | rx-* \
+       | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+       | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+       | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+       | sparclite-* \
+       | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+       | tahoe-* \
+       | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+       | tile-* | tilegx-* \
+       | tron-* \
+       | ubicom32-* \
+       | v850-* | v850e-* | vax-* \
+       | we32k-* \
+       | x86-* | x86_64-* | xc16x-* | xps100-* \
+       | xstormy16-* | xtensa*-* \
+       | ymp-* \
+       | z8k-* | z80-*)
+               ;;
+       # Recognize the basic CPU types without company name, with glob match.
+       xtensa*)
+               basic_machine=$basic_machine-unknown
+               ;;
+       # Recognize the various machine names and aliases which stand
+       # for a CPU type and a company and sometimes even an OS.
+       386bsd)
+               basic_machine=i386-unknown
+               os=-bsd
+               ;;
+       3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+               basic_machine=m68000-att
+               ;;
+       3b*)
+               basic_machine=we32k-att
+               ;;
+       a29khif)
+               basic_machine=a29k-amd
+               os=-udi
+               ;;
+       abacus)
+               basic_machine=abacus-unknown
+               ;;
+       adobe68k)
+               basic_machine=m68010-adobe
+               os=-scout
+               ;;
+       alliant | fx80)
+               basic_machine=fx80-alliant
+               ;;
+       altos | altos3068)
+               basic_machine=m68k-altos
+               ;;
+       am29k)
+               basic_machine=a29k-none
+               os=-bsd
+               ;;
+       amd64)
+               basic_machine=x86_64-pc
+               ;;
+       amd64-*)
+               basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       amdahl)
+               basic_machine=580-amdahl
+               os=-sysv
+               ;;
+       amiga | amiga-*)
+               basic_machine=m68k-unknown
+               ;;
+       amigaos | amigados)
+               basic_machine=m68k-unknown
+               os=-amigaos
+               ;;
+       amigaunix | amix)
+               basic_machine=m68k-unknown
+               os=-sysv4
+               ;;
+       apollo68)
+               basic_machine=m68k-apollo
+               os=-sysv
+               ;;
+       apollo68bsd)
+               basic_machine=m68k-apollo
+               os=-bsd
+               ;;
+       aros)
+               basic_machine=i386-pc
+               os=-aros
+               ;;
+       aux)
+               basic_machine=m68k-apple
+               os=-aux
+               ;;
+       balance)
+               basic_machine=ns32k-sequent
+               os=-dynix
+               ;;
+       blackfin)
+               basic_machine=bfin-unknown
+               os=-linux
+               ;;
+       blackfin-*)
+               basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+               os=-linux
+               ;;
+       bluegene*)
+               basic_machine=powerpc-ibm
+               os=-cnk
+               ;;
+       c54x-*)
+               basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       c55x-*)
+               basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       c6x-*)
+               basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       c90)
+               basic_machine=c90-cray
+               os=-unicos
+               ;;
+       cegcc)
+               basic_machine=arm-unknown
+               os=-cegcc
+               ;;
+       convex-c1)
+               basic_machine=c1-convex
+               os=-bsd
+               ;;
+       convex-c2)
+               basic_machine=c2-convex
+               os=-bsd
+               ;;
+       convex-c32)
+               basic_machine=c32-convex
+               os=-bsd
+               ;;
+       convex-c34)
+               basic_machine=c34-convex
+               os=-bsd
+               ;;
+       convex-c38)
+               basic_machine=c38-convex
+               os=-bsd
+               ;;
+       cray | j90)
+               basic_machine=j90-cray
+               os=-unicos
+               ;;
+       craynv)
+               basic_machine=craynv-cray
+               os=-unicosmp
+               ;;
+       cr16 | cr16-*)
+               basic_machine=cr16-unknown
+               os=-elf
+               ;;
+       crds | unos)
+               basic_machine=m68k-crds
+               ;;
+       crisv32 | crisv32-* | etraxfs*)
+               basic_machine=crisv32-axis
+               ;;
+       cris | cris-* | etrax*)
+               basic_machine=cris-axis
+               ;;
+       crx)
+               basic_machine=crx-unknown
+               os=-elf
+               ;;
+       da30 | da30-*)
+               basic_machine=m68k-da30
+               ;;
+       decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+               basic_machine=mips-dec
+               ;;
+       decsystem10* | dec10*)
+               basic_machine=pdp10-dec
+               os=-tops10
+               ;;
+       decsystem20* | dec20*)
+               basic_machine=pdp10-dec
+               os=-tops20
+               ;;
+       delta | 3300 | motorola-3300 | motorola-delta \
+             | 3300-motorola | delta-motorola)
+               basic_machine=m68k-motorola
+               ;;
+       delta88)
+               basic_machine=m88k-motorola
+               os=-sysv3
+               ;;
+       dicos)
+               basic_machine=i686-pc
+               os=-dicos
+               ;;
+       djgpp)
+               basic_machine=i586-pc
+               os=-msdosdjgpp
+               ;;
+       dpx20 | dpx20-*)
+               basic_machine=rs6000-bull
+               os=-bosx
+               ;;
+       dpx2* | dpx2*-bull)
+               basic_machine=m68k-bull
+               os=-sysv3
+               ;;
+       ebmon29k)
+               basic_machine=a29k-amd
+               os=-ebmon
+               ;;
+       elxsi)
+               basic_machine=elxsi-elxsi
+               os=-bsd
+               ;;
+       encore | umax | mmax)
+               basic_machine=ns32k-encore
+               ;;
+       es1800 | OSE68k | ose68k | ose | OSE)
+               basic_machine=m68k-ericsson
+               os=-ose
+               ;;
+       fx2800)
+               basic_machine=i860-alliant
+               ;;
+       genix)
+               basic_machine=ns32k-ns
+               ;;
+       gmicro)
+               basic_machine=tron-gmicro
+               os=-sysv
+               ;;
+       go32)
+               basic_machine=i386-pc
+               os=-go32
+               ;;
+       h3050r* | hiux*)
+               basic_machine=hppa1.1-hitachi
+               os=-hiuxwe2
+               ;;
+       h8300hms)
+               basic_machine=h8300-hitachi
+               os=-hms
+               ;;
+       h8300xray)
+               basic_machine=h8300-hitachi
+               os=-xray
+               ;;
+       h8500hms)
+               basic_machine=h8500-hitachi
+               os=-hms
+               ;;
+       harris)
+               basic_machine=m88k-harris
+               os=-sysv3
+               ;;
+       hp300-*)
+               basic_machine=m68k-hp
+               ;;
+       hp300bsd)
+               basic_machine=m68k-hp
+               os=-bsd
+               ;;
+       hp300hpux)
+               basic_machine=m68k-hp
+               os=-hpux
+               ;;
+       hp3k9[0-9][0-9] | hp9[0-9][0-9])
+               basic_machine=hppa1.0-hp
+               ;;
+       hp9k2[0-9][0-9] | hp9k31[0-9])
+               basic_machine=m68000-hp
+               ;;
+       hp9k3[2-9][0-9])
+               basic_machine=m68k-hp
+               ;;
+       hp9k6[0-9][0-9] | hp6[0-9][0-9])
+               basic_machine=hppa1.0-hp
+               ;;
+       hp9k7[0-79][0-9] | hp7[0-79][0-9])
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k78[0-9] | hp78[0-9])
+               # FIXME: really hppa2.0-hp
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+               # FIXME: really hppa2.0-hp
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k8[0-9][13679] | hp8[0-9][13679])
+               basic_machine=hppa1.1-hp
+               ;;
+       hp9k8[0-9][0-9] | hp8[0-9][0-9])
+               basic_machine=hppa1.0-hp
+               ;;
+       hppa-next)
+               os=-nextstep3
+               ;;
+       hppaosf)
+               basic_machine=hppa1.1-hp
+               os=-osf
+               ;;
+       hppro)
+               basic_machine=hppa1.1-hp
+               os=-proelf
+               ;;
+       i370-ibm* | ibm*)
+               basic_machine=i370-ibm
+               ;;
+# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
+       i*86v32)
+               basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+               os=-sysv32
+               ;;
+       i*86v4*)
+               basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+               os=-sysv4
+               ;;
+       i*86v)
+               basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+               os=-sysv
+               ;;
+       i*86sol2)
+               basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+               os=-solaris2
+               ;;
+       i386mach)
+               basic_machine=i386-mach
+               os=-mach
+               ;;
+       i386-vsta | vsta)
+               basic_machine=i386-unknown
+               os=-vsta
+               ;;
+       iris | iris4d)
+               basic_machine=mips-sgi
+               case $os in
+                   -irix*)
+                       ;;
+                   *)
+                       os=-irix4
+                       ;;
+               esac
+               ;;
+       isi68 | isi)
+               basic_machine=m68k-isi
+               os=-sysv
+               ;;
+       m68knommu)
+               basic_machine=m68k-unknown
+               os=-linux
+               ;;
+       m68knommu-*)
+               basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+               os=-linux
+               ;;
+       m88k-omron*)
+               basic_machine=m88k-omron
+               ;;
+       magnum | m3230)
+               basic_machine=mips-mips
+               os=-sysv
+               ;;
+       merlin)
+               basic_machine=ns32k-utek
+               os=-sysv
+               ;;
+       microblaze)
+               basic_machine=microblaze-xilinx
+               ;;
+       mingw32)
+               basic_machine=i386-pc
+               os=-mingw32
+               ;;
+       mingw32ce)
+               basic_machine=arm-unknown
+               os=-mingw32ce
+               ;;
+       miniframe)
+               basic_machine=m68000-convergent
+               ;;
+       *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+               basic_machine=m68k-atari
+               os=-mint
+               ;;
+       mips3*-*)
+               basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+               ;;
+       mips3*)
+               basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+               ;;
+       monitor)
+               basic_machine=m68k-rom68k
+               os=-coff
+               ;;
+       morphos)
+               basic_machine=powerpc-unknown
+               os=-morphos
+               ;;
+       msdos)
+               basic_machine=i386-pc
+               os=-msdos
+               ;;
+       ms1-*)
+               basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+               ;;
+       mvs)
+               basic_machine=i370-ibm
+               os=-mvs
+               ;;
+       ncr3000)
+               basic_machine=i486-ncr
+               os=-sysv4
+               ;;
+       netbsd386)
+               basic_machine=i386-unknown
+               os=-netbsd
+               ;;
+       netwinder)
+               basic_machine=armv4l-rebel
+               os=-linux
+               ;;
+       news | news700 | news800 | news900)
+               basic_machine=m68k-sony
+               os=-newsos
+               ;;
+       news1000)
+               basic_machine=m68030-sony
+               os=-newsos
+               ;;
+       news-3600 | risc-news)
+               basic_machine=mips-sony
+               os=-newsos
+               ;;
+       necv70)
+               basic_machine=v70-nec
+               os=-sysv
+               ;;
+       next | m*-next )
+               basic_machine=m68k-next
+               case $os in
+                   -nextstep* )
+                       ;;
+                   -ns2*)
+                     os=-nextstep2
+                       ;;
+                   *)
+                     os=-nextstep3
+                       ;;
+               esac
+               ;;
+       nh3000)
+               basic_machine=m68k-harris
+               os=-cxux
+               ;;
+       nh[45]000)
+               basic_machine=m88k-harris
+               os=-cxux
+               ;;
+       nindy960)
+               basic_machine=i960-intel
+               os=-nindy
+               ;;
+       mon960)
+               basic_machine=i960-intel
+               os=-mon960
+               ;;
+       nonstopux)
+               basic_machine=mips-compaq
+               os=-nonstopux
+               ;;
+       np1)
+               basic_machine=np1-gould
+               ;;
+       neo-tandem)
+               basic_machine=neo-tandem
+               ;;
+       nse-tandem)
+               basic_machine=nse-tandem
+               ;;
+       nsr-tandem)
+               basic_machine=nsr-tandem
+               ;;
+       op50n-* | op60c-*)
+               basic_machine=hppa1.1-oki
+               os=-proelf
+               ;;
+       openrisc | openrisc-*)
+               basic_machine=or32-unknown
+               ;;
+       os400)
+               basic_machine=powerpc-ibm
+               os=-os400
+               ;;
+       OSE68000 | ose68000)
+               basic_machine=m68000-ericsson
+               os=-ose
+               ;;
+       os68k)
+               basic_machine=m68k-none
+               os=-os68k
+               ;;
+       pa-hitachi)
+               basic_machine=hppa1.1-hitachi
+               os=-hiuxwe2
+               ;;
+       paragon)
+               basic_machine=i860-intel
+               os=-osf
+               ;;
+       parisc)
+               basic_machine=hppa-unknown
+               os=-linux
+               ;;
+       parisc-*)
+               basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+               os=-linux
+               ;;
+       pbd)
+               basic_machine=sparc-tti
+               ;;
+       pbb)
+               basic_machine=m68k-tti
+               ;;
+       pc532 | pc532-*)
+               basic_machine=ns32k-pc532
+               ;;
+       pc98)
+               basic_machine=i386-pc
+               ;;
+       pc98-*)
+               basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       pentium | p5 | k5 | k6 | nexgen | viac3)
+               basic_machine=i586-pc
+               ;;
+       pentiumpro | p6 | 6x86 | athlon | athlon_*)
+               basic_machine=i686-pc
+               ;;
+       pentiumii | pentium2 | pentiumiii | pentium3)
+               basic_machine=i686-pc
+               ;;
+       pentium4)
+               basic_machine=i786-pc
+               ;;
+       pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+               basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       pentiumpro-* | p6-* | 6x86-* | athlon-*)
+               basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+               basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       pentium4-*)
+               basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       pn)
+               basic_machine=pn-gould
+               ;;
+       power)  basic_machine=power-ibm
+               ;;
+       ppc | ppcbe)    basic_machine=powerpc-unknown
+               ;;
+       ppc-* | ppcbe-*)
+               basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       ppcle | powerpclittle | ppc-le | powerpc-little)
+               basic_machine=powerpcle-unknown
+               ;;
+       ppcle-* | powerpclittle-*)
+               basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       ppc64)  basic_machine=powerpc64-unknown
+               ;;
+       ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+               basic_machine=powerpc64le-unknown
+               ;;
+       ppc64le-* | powerpc64little-*)
+               basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       ps2)
+               basic_machine=i386-ibm
+               ;;
+       pw32)
+               basic_machine=i586-unknown
+               os=-pw32
+               ;;
+       rdos)
+               basic_machine=i386-pc
+               os=-rdos
+               ;;
+       rom68k)
+               basic_machine=m68k-rom68k
+               os=-coff
+               ;;
+       rm[46]00)
+               basic_machine=mips-siemens
+               ;;
+       rtpc | rtpc-*)
+               basic_machine=romp-ibm
+               ;;
+       s390 | s390-*)
+               basic_machine=s390-ibm
+               ;;
+       s390x | s390x-*)
+               basic_machine=s390x-ibm
+               ;;
+       sa29200)
+               basic_machine=a29k-amd
+               os=-udi
+               ;;
+       sb1)
+               basic_machine=mipsisa64sb1-unknown
+               ;;
+       sb1el)
+               basic_machine=mipsisa64sb1el-unknown
+               ;;
+       sde)
+               basic_machine=mipsisa32-sde
+               os=-elf
+               ;;
+       sei)
+               basic_machine=mips-sei
+               os=-seiux
+               ;;
+       sequent)
+               basic_machine=i386-sequent
+               ;;
+       sh)
+               basic_machine=sh-hitachi
+               os=-hms
+               ;;
+       sh5el)
+               basic_machine=sh5le-unknown
+               ;;
+       sh64)
+               basic_machine=sh64-unknown
+               ;;
+       sparclite-wrs | simso-wrs)
+               basic_machine=sparclite-wrs
+               os=-vxworks
+               ;;
+       sps7)
+               basic_machine=m68k-bull
+               os=-sysv2
+               ;;
+       spur)
+               basic_machine=spur-unknown
+               ;;
+       st2000)
+               basic_machine=m68k-tandem
+               ;;
+       stratus)
+               basic_machine=i860-stratus
+               os=-sysv4
+               ;;
+       strongarm-* | thumb-*)
+               basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+               ;;
+       sun2)
+               basic_machine=m68000-sun
+               ;;
+       sun2os3)
+               basic_machine=m68000-sun
+               os=-sunos3
+               ;;
+       sun2os4)
+               basic_machine=m68000-sun
+               os=-sunos4
+               ;;
+       sun3os3)
+               basic_machine=m68k-sun
+               os=-sunos3
+               ;;
+       sun3os4)
+               basic_machine=m68k-sun
+               os=-sunos4
+               ;;
+       sun4os3)
+               basic_machine=sparc-sun
+               os=-sunos3
+               ;;
+       sun4os4)
+               basic_machine=sparc-sun
+               os=-sunos4
+               ;;
+       sun4sol2)
+               basic_machine=sparc-sun
+               os=-solaris2
+               ;;
+       sun3 | sun3-*)
+               basic_machine=m68k-sun
+               ;;
+       sun4)
+               basic_machine=sparc-sun
+               ;;
+       sun386 | sun386i | roadrunner)
+               basic_machine=i386-sun
+               ;;
+       sv1)
+               basic_machine=sv1-cray
+               os=-unicos
+               ;;
+       symmetry)
+               basic_machine=i386-sequent
+               os=-dynix
+               ;;
+       t3e)
+               basic_machine=alphaev5-cray
+               os=-unicos
+               ;;
+       t90)
+               basic_machine=t90-cray
+               os=-unicos
+               ;;
+       # This must be matched before tile*.
+       tilegx*)
+               basic_machine=tilegx-unknown
+               os=-linux-gnu
+               ;;
+       tile*)
+               basic_machine=tile-unknown
+               os=-linux-gnu
+               ;;
+       tx39)
+               basic_machine=mipstx39-unknown
+               ;;
+       tx39el)
+               basic_machine=mipstx39el-unknown
+               ;;
+       toad1)
+               basic_machine=pdp10-xkl
+               os=-tops20
+               ;;
+       tower | tower-32)
+               basic_machine=m68k-ncr
+               ;;
+       tpf)
+               basic_machine=s390x-ibm
+               os=-tpf
+               ;;
+       udi29k)
+               basic_machine=a29k-amd
+               os=-udi
+               ;;
+       ultra3)
+               basic_machine=a29k-nyu
+               os=-sym1
+               ;;
+       v810 | necv810)
+               basic_machine=v810-nec
+               os=-none
+               ;;
+       vaxv)
+               basic_machine=vax-dec
+               os=-sysv
+               ;;
+       vms)
+               basic_machine=vax-dec
+               os=-vms
+               ;;
+       vpp*|vx|vx-*)
+               basic_machine=f301-fujitsu
+               ;;
+       vxworks960)
+               basic_machine=i960-wrs
+               os=-vxworks
+               ;;
+       vxworks68)
+               basic_machine=m68k-wrs
+               os=-vxworks
+               ;;
+       vxworks29k)
+               basic_machine=a29k-wrs
+               os=-vxworks
+               ;;
+       w65*)
+               basic_machine=w65-wdc
+               os=-none
+               ;;
+       w89k-*)
+               basic_machine=hppa1.1-winbond
+               os=-proelf
+               ;;
+       xbox)
+               basic_machine=i686-pc
+               os=-mingw32
+               ;;
+       xps | xps100)
+               basic_machine=xps100-honeywell
+               ;;
+       xscale-* | xscalee[bl]-*)
+               basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+               ;;
+       ymp)
+               basic_machine=ymp-cray
+               os=-unicos
+               ;;
+       z8k-*-coff)
+               basic_machine=z8k-unknown
+               os=-sim
+               ;;
+       z80-*-coff)
+               basic_machine=z80-unknown
+               os=-sim
+               ;;
+       none)
+               basic_machine=none-none
+               os=-none
+               ;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+       w89k)
+               basic_machine=hppa1.1-winbond
+               ;;
+       op50n)
+               basic_machine=hppa1.1-oki
+               ;;
+       op60c)
+               basic_machine=hppa1.1-oki
+               ;;
+       romp)
+               basic_machine=romp-ibm
+               ;;
+       mmix)
+               basic_machine=mmix-knuth
+               ;;
+       rs6000)
+               basic_machine=rs6000-ibm
+               ;;
+       vax)
+               basic_machine=vax-dec
+               ;;
+       pdp10)
+               # there are many clones, so DEC is not a safe bet
+               basic_machine=pdp10-unknown
+               ;;
+       pdp11)
+               basic_machine=pdp11-dec
+               ;;
+       we32k)
+               basic_machine=we32k-att
+               ;;
+       sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+               basic_machine=sh-unknown
+               ;;
+       sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+               basic_machine=sparc-sun
+               ;;
+       cydra)
+               basic_machine=cydra-cydrome
+               ;;
+       orion)
+               basic_machine=orion-highlevel
+               ;;
+       orion105)
+               basic_machine=clipper-highlevel
+               ;;
+       mac | mpw | mac-mpw)
+               basic_machine=m68k-apple
+               ;;
+       pmac | pmac-mpw)
+               basic_machine=powerpc-apple
+               ;;
+       *-unknown)
+               # Make sure to match an already-canonicalized machine name.
+               ;;
+       *)
+               echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+               exit 1
+               ;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+       *-digital*)
+               basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+               ;;
+       *-commodore*)
+               basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+               ;;
+       *)
+               ;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+       # First match some system type aliases
+       # that might get confused with valid system types.
+       # -solaris* is a basic system type, with this one exception.
+       -auroraux)
+               os=-auroraux
+               ;;
+       -solaris1 | -solaris1.*)
+               os=`echo $os | sed -e 's|solaris1|sunos4|'`
+               ;;
+       -solaris)
+               os=-solaris2
+               ;;
+       -svr4*)
+               os=-sysv4
+               ;;
+       -unixware*)
+               os=-sysv4.2uw
+               ;;
+       -gnu/linux*)
+               os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+               ;;
+       # First accept the basic system types.
+       # The portable systems comes first.
+       # Each alternative MUST END IN A *, to match a version number.
+       # -sysv* is not here because it comes later, after sysvr4.
+       -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+             | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+             | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+             | -sym* | -kopensolaris* \
+             | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+             | -aos* | -aros* \
+             | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+             | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+             | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+             | -openbsd* | -solidbsd* \
+             | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+             | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+             | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+             | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+             | -chorusos* | -chorusrdb* | -cegcc* \
+             | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+             | -mingw32* | -linux-gnu* | -linux-android* \
+             | -linux-newlib* | -linux-uclibc* \
+             | -uxpv* | -beos* | -mpeix* | -udk* \
+             | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+             | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+             | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+             | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+             | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+             | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+             | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
+       # Remember, each alternative MUST END IN *, to match a version number.
+               ;;
+       -qnx*)
+               case $basic_machine in
+                   x86-* | i*86-*)
+                       ;;
+                   *)
+                       os=-nto$os
+                       ;;
+               esac
+               ;;
+       -nto-qnx*)
+               ;;
+       -nto*)
+               os=`echo $os | sed -e 's|nto|nto-qnx|'`
+               ;;
+       -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+             | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+             | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+               ;;
+       -mac*)
+               os=`echo $os | sed -e 's|mac|macos|'`
+               ;;
+       -linux-dietlibc)
+               os=-linux-dietlibc
+               ;;
+       -linux*)
+               os=`echo $os | sed -e 's|linux|linux-gnu|'`
+               ;;
+       -sunos5*)
+               os=`echo $os | sed -e 's|sunos5|solaris2|'`
+               ;;
+       -sunos6*)
+               os=`echo $os | sed -e 's|sunos6|solaris3|'`
+               ;;
+       -opened*)
+               os=-openedition
+               ;;
+       -os400*)
+               os=-os400
+               ;;
+       -wince*)
+               os=-wince
+               ;;
+       -osfrose*)
+               os=-osfrose
+               ;;
+       -osf*)
+               os=-osf
+               ;;
+       -utek*)
+               os=-bsd
+               ;;
+       -dynix*)
+               os=-bsd
+               ;;
+       -acis*)
+               os=-aos
+               ;;
+       -atheos*)
+               os=-atheos
+               ;;
+       -syllable*)
+               os=-syllable
+               ;;
+       -386bsd)
+               os=-bsd
+               ;;
+       -ctix* | -uts*)
+               os=-sysv
+               ;;
+       -nova*)
+               os=-rtmk-nova
+               ;;
+       -ns2 )
+               os=-nextstep2
+               ;;
+       -nsk*)
+               os=-nsk
+               ;;
+       # Preserve the version number of sinix5.
+       -sinix5.*)
+               os=`echo $os | sed -e 's|sinix|sysv|'`
+               ;;
+       -sinix*)
+               os=-sysv4
+               ;;
+       -tpf*)
+               os=-tpf
+               ;;
+       -triton*)
+               os=-sysv3
+               ;;
+       -oss*)
+               os=-sysv3
+               ;;
+       -svr4)
+               os=-sysv4
+               ;;
+       -svr3)
+               os=-sysv3
+               ;;
+       -sysvr4)
+               os=-sysv4
+               ;;
+       # This must come after -sysvr4.
+       -sysv*)
+               ;;
+       -ose*)
+               os=-ose
+               ;;
+       -es1800*)
+               os=-ose
+               ;;
+       -xenix)
+               os=-xenix
+               ;;
+       -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+               os=-mint
+               ;;
+       -aros*)
+               os=-aros
+               ;;
+       -kaos*)
+               os=-kaos
+               ;;
+       -zvmoe)
+               os=-zvmoe
+               ;;
+       -dicos*)
+               os=-dicos
+               ;;
+       -nacl*)
+               ;;
+       -none)
+               ;;
+       *)
+               # Get rid of the `-' at the beginning of $os.
+               os=`echo $os | sed 's/[^-]*-//'`
+               echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+               exit 1
+               ;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+       score-*)
+               os=-elf
+               ;;
+       spu-*)
+               os=-elf
+               ;;
+       *-acorn)
+               os=-riscix1.2
+               ;;
+       arm*-rebel)
+               os=-linux
+               ;;
+       arm*-semi)
+               os=-aout
+               ;;
+       c4x-* | tic4x-*)
+               os=-coff
+               ;;
+       tic54x-*)
+               os=-coff
+               ;;
+       tic55x-*)
+               os=-coff
+               ;;
+       tic6x-*)
+               os=-coff
+               ;;
+       # This must come before the *-dec entry.
+       pdp10-*)
+               os=-tops20
+               ;;
+       pdp11-*)
+               os=-none
+               ;;
+       *-dec | vax-*)
+               os=-ultrix4.2
+               ;;
+       m68*-apollo)
+               os=-domain
+               ;;
+       i386-sun)
+               os=-sunos4.0.2
+               ;;
+       m68000-sun)
+               os=-sunos3
+               # This also exists in the configure program, but was not the
+               # default.
+               # os=-sunos4
+               ;;
+       m68*-cisco)
+               os=-aout
+               ;;
+       mep-*)
+               os=-elf
+               ;;
+       mips*-cisco)
+               os=-elf
+               ;;
+       mips*-*)
+               os=-elf
+               ;;
+       or32-*)
+               os=-coff
+               ;;
+       *-tti)  # must be before sparc entry or we get the wrong os.
+               os=-sysv3
+               ;;
+       sparc-* | *-sun)
+               os=-sunos4.1.1
+               ;;
+       *-be)
+               os=-beos
+               ;;
+       *-haiku)
+               os=-haiku
+               ;;
+       *-ibm)
+               os=-aix
+               ;;
+       *-knuth)
+               os=-mmixware
+               ;;
+       *-wec)
+               os=-proelf
+               ;;
+       *-winbond)
+               os=-proelf
+               ;;
+       *-oki)
+               os=-proelf
+               ;;
+       *-hp)
+               os=-hpux
+               ;;
+       *-hitachi)
+               os=-hiux
+               ;;
+       i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+               os=-sysv
+               ;;
+       *-cbm)
+               os=-amigaos
+               ;;
+       *-dg)
+               os=-dgux
+               ;;
+       *-dolphin)
+               os=-sysv3
+               ;;
+       m68k-ccur)
+               os=-rtu
+               ;;
+       m88k-omron*)
+               os=-luna
+               ;;
+       *-next )
+               os=-nextstep
+               ;;
+       *-sequent)
+               os=-ptx
+               ;;
+       *-crds)
+               os=-unos
+               ;;
+       *-ns)
+               os=-genix
+               ;;
+       i370-*)
+               os=-mvs
+               ;;
+       *-next)
+               os=-nextstep3
+               ;;
+       *-gould)
+               os=-sysv
+               ;;
+       *-highlevel)
+               os=-bsd
+               ;;
+       *-encore)
+               os=-bsd
+               ;;
+       *-sgi)
+               os=-irix
+               ;;
+       *-siemens)
+               os=-sysv4
+               ;;
+       *-masscomp)
+               os=-rtu
+               ;;
+       f30[01]-fujitsu | f700-fujitsu)
+               os=-uxpv
+               ;;
+       *-rom68k)
+               os=-coff
+               ;;
+       *-*bug)
+               os=-coff
+               ;;
+       *-apple)
+               os=-macos
+               ;;
+       *-atari*)
+               os=-mint
+               ;;
+       *)
+               os=-none
+               ;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+       *-unknown)
+               case $os in
+                       -riscix*)
+                               vendor=acorn
+                               ;;
+                       -sunos*)
+                               vendor=sun
+                               ;;
+                       -cnk*|-aix*)
+                               vendor=ibm
+                               ;;
+                       -beos*)
+                               vendor=be
+                               ;;
+                       -hpux*)
+                               vendor=hp
+                               ;;
+                       -mpeix*)
+                               vendor=hp
+                               ;;
+                       -hiux*)
+                               vendor=hitachi
+                               ;;
+                       -unos*)
+                               vendor=crds
+                               ;;
+                       -dgux*)
+                               vendor=dg
+                               ;;
+                       -luna*)
+                               vendor=omron
+                               ;;
+                       -genix*)
+                               vendor=ns
+                               ;;
+                       -mvs* | -opened*)
+                               vendor=ibm
+                               ;;
+                       -os400*)
+                               vendor=ibm
+                               ;;
+                       -ptx*)
+                               vendor=sequent
+                               ;;
+                       -tpf*)
+                               vendor=ibm
+                               ;;
+                       -vxsim* | -vxworks* | -windiss*)
+                               vendor=wrs
+                               ;;
+                       -aux*)
+                               vendor=apple
+                               ;;
+                       -hms*)
+                               vendor=hitachi
+                               ;;
+                       -mpw* | -macos*)
+                               vendor=apple
+                               ;;
+                       -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+                               vendor=atari
+                               ;;
+                       -vos*)
+                               vendor=stratus
+                               ;;
+               esac
+               basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+               ;;
+esac
+
+echo $basic_machine$os
+exit
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/configure b/configure

new file mode 100755 (executable)

index 0000000..e8378cc
--- /dev/null
+++ b/configure
@@ -0,0 +1,32197 @@
+#! /bin/sh
+# From configure.in Revision.
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.65 for GNU MP 5.0.5.
+#
+# Report bugs to <gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html>.
+#
+#
+#
+# Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+#
+#
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+       expr "X$arg" : "X\\(.*\\)$as_nl";
+       arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""       $as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+  exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1"
+  as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1
+
+  test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || (
+    ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+    ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
+    ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
+    PATH=/empty FPATH=/empty; export PATH FPATH
+    test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\
+      || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1"
+  if (eval "$as_required") 2>/dev/null; then :
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  as_found=:
+  case $as_dir in #(
+        /*)
+          for as_base in sh bash ksh sh5; do
+            # Try only shells that exist, to save several forks.
+            as_shell=$as_dir/$as_base
+            if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+                   { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+                  if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+  break 2
+fi
+fi
+          done;;
+       esac
+  as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+             { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+      if test "x$CONFIG_SHELL" != x; then :
+  # We cannot yet assume a decent shell, so we have to provide a
+       # neutralization value for shells without unset; and this also
+       # works around shells that cannot unset nonexistent variables.
+       BASH_ENV=/dev/null
+       ENV=/dev/null
+       (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+       export CONFIG_SHELL
+       exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
+
+    if test x$as_have_required = xno; then :
+  $as_echo "$0: This script requires a shell more modern than all"
+  $as_echo "$0: the shells that I found on your system."
+  if test x${ZSH_VERSION+set} = xset ; then
+    $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    $as_echo "$0: Please tell bug-autoconf@gnu.org and
+$0: gmp-bugs@gmplib.org, see
+$0: http://gmplib.org/manual/Reporting-Bugs.html about your
+$0: system, including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
+  fi
+  exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$as_dir" : 'X\(//\)[^/]' \| \
+        X"$as_dir" : 'X\(//\)$' \| \
+        X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+# as_fn_error ERROR [LINENO LOG_FD]
+# ---------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with status $?, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$?; test $as_status -eq 0 && as_status=1
+  if test "$3"; then
+    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  fi
+  $as_echo "$as_me: error: $1" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+        X"$0" : 'X\(//\)$' \| \
+        X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='        ';;     # ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='        ';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -p'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -p'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -p'
+  fi
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+  as_test_x='test -x'
+else
+  if ls -dL / >/dev/null 2>&1; then
+    as_ls_L_option=L
+  else
+    as_ls_L_option=
+  fi
+  as_test_x='
+    eval sh -c '\''
+      if test -d "$1"; then
+       test -d "$1/.";
+      else
+       case $1 in #(
+       -*)set "./$1";;
+       esac;
+       case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+       ???[sx]*):;;*)false;;esac;fi
+    '\'' sh
+  '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='GNU MP'
+PACKAGE_TARNAME='gmp'
+PACKAGE_VERSION='5.0.5'
+PACKAGE_STRING='GNU MP 5.0.5'
+PACKAGE_BUGREPORT='gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html'
+PACKAGE_URL='http://www.gnu.org/software/gmp/'
+
+ac_unique_file="gmp-impl.h"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='am__EXEEXT_FALSE
+am__EXEEXT_TRUE
+LTLIBOBJS
+LIBOBJS
+LEXLIB
+LEX_OUTPUT_ROOT
+LEX
+YFLAGS
+YACC
+LIBREADLINE
+WITH_READLINE_01
+LIBCURSES
+HAVE_STACK_T_01
+HAVE_SYS_RESOURCE_H_01
+HAVE_SIGSTACK_01
+HAVE_SIGALTSTACK_01
+HAVE_SIGACTION_01
+HAVE_GETTIMEOFDAY_01
+HAVE_GETRUSAGE_01
+HAVE_CPUTIME_01
+HAVE_CLOCK_01
+TUNE_SQR_OBJ
+gmp_srclinks
+mpn_objs_in_libgmp
+mpn_objects
+mpn_objs_in_libmp
+GMP_LIMB_BITS
+M4
+TAL_OBJECT
+LIBM
+ENABLE_STATIC_FALSE
+ENABLE_STATIC_TRUE
+OTOOL64
+OTOOL
+LIPO
+NMEDIT
+DSYMUTIL
+MANIFEST_TOOL
+RANLIB
+ac_ct_AR
+LN_S
+LD
+FGREP
+SED
+LIBTOOL
+LIBGMP_DLL
+OBJDUMP
+DLLTOOL
+AS
+NM
+ac_ct_DUMPBIN
+DUMPBIN
+AR
+ASMFLAGS
+ANSI2KNR
+U
+EGREP
+GREP
+CXXCPP
+WANT_CXX_FALSE
+WANT_CXX_TRUE
+ac_ct_CXX
+CXXFLAGS
+CXX
+CCAS
+LIBM_FOR_BUILD
+U_FOR_BUILD
+EXEEXT_FOR_BUILD
+CPP_FOR_BUILD
+CC_FOR_BUILD
+CPP
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CFLAGS
+CC
+DEFN_LONG_LONG_LIMB
+CALLING_CONVENTIONS_OBJS
+SPEED_CYCLECOUNTER_OBJ
+LIBGMPXX_LDFLAGS
+LIBGMP_LDFLAGS
+GMP_LDFLAGS
+HAVE_HOST_CPU_FAMILY_powerpc
+HAVE_HOST_CPU_FAMILY_power
+ABI
+GMP_NAIL_BITS
+WANT_MPBSD_FALSE
+WANT_MPBSD_TRUE
+MAINT
+MAINTAINER_MODE_FALSE
+MAINTAINER_MODE_TRUE
+am__untar
+am__tar
+AMTAR
+am__leading_dot
+SET_MAKE
+AWK
+mkdir_p
+MKDIR_P
+INSTALL_STRIP_PROGRAM
+STRIP
+install_sh
+MAKEINFO
+AUTOHEADER
+AUTOMAKE
+AUTOCONF
+ACLOCAL
+VERSION
+PACKAGE
+CYGPATH_W
+am__isrc
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+host_os
+host_vendor
+host_cpu
+host
+build_os
+build_vendor
+build_cpu
+build
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_maintainer_mode
+enable_assert
+enable_alloca
+enable_cxx
+enable_fft
+enable_old_fft_full
+enable_mpbsd
+enable_nails
+enable_profiling
+with_readline
+enable_fat
+enable_minithres
+enable_shared
+enable_static
+with_pic
+enable_fast_install
+with_gnu_ld
+with_sysroot
+enable_libtool_lock
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+ABI
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP
+CC_FOR_BUILD
+CPP_FOR_BUILD
+CXX
+CXXFLAGS
+CCC
+CXXCPP
+M4
+YACC
+YFLAGS'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *)   ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error "invalid feature name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error "invalid package name: $ac_useropt"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information."
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error "invalid variable name: \`$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in  exec_prefix prefix bindir sbindir libexecdir datarootdir \
+               datadir sysconfdir sharedstatedir localstatedir includedir \
+               oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+               libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+    $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+    If a cross compiler is detected then cross compile mode will be used." >&2
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$as_myself" : 'X\(//\)[^/]' \| \
+        X"$as_myself" : 'X\(//\)$' \| \
+        X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+       cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error "$ac_msg"
+       pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures GNU MP 5.0.5 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/gmp]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+Program names:
+  --program-prefix=PREFIX            prepend PREFIX to installed program names
+  --program-suffix=SUFFIX            append SUFFIX to installed program names
+  --program-transform-name=PROGRAM   run sed PROGRAM on installed program names
+
+System types:
+  --build=BUILD     configure for building on BUILD [guessed]
+  --host=HOST       cross-compile to build programs to run on HOST [BUILD]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of GNU MP 5.0.5:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-option-checking  ignore unrecognized --enable/--with options
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-maintainer-mode  enable make rules and dependencies not useful
+                         (and sometimes confusing) to the casual installer
+  --enable-assert         enable ASSERT checking [[default=no]]
+  --enable-alloca         how to get temp memory [[default=reentrant]]
+  --enable-cxx            enable C++ support [[default=no]]
+  --enable-fft            enable FFTs for multiplication [[default=yes]]
+  --enable-old-fft-full   enable old mpn_mul_fft_full for multiplication
+                          [[default=no]]
+  --enable-mpbsd          build Berkeley MP compatibility library
+                          [[default=no]]
+  --enable-nails          use nails on limbs [[default=no]]
+  --enable-profiling      build with profiler support [[default=no]]
+  --enable-fat            build a fat binary on systems that support it
+                          [[default=no]]
+  --enable-minithres      choose minimal thresholds for testing [[default=no]]
+  --enable-shared[=PKGS]  build shared libraries [default=yes]
+  --enable-static[=PKGS]  build static libraries [default=yes]
+  --enable-fast-install[=PKGS]
+                          optimize for fast installation [default=yes]
+  --disable-libtool-lock  avoid locking (might break parallel builds)
+
+Optional Packages:
+  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
+  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-readline         readline support in calc demo program
+                          [[default=detect]]
+  --with-pic[=PKGS]       try to use only PIC/non-PIC objects [default=use
+                          both]
+  --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
+  --with-sysroot=DIR Search for dependent libraries within DIR
+                        (or the compiler's sysroot if not specified).
+
+Some influential environment variables:
+  ABI         desired ABI (for processors supporting more than one ABI)
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CPP         C preprocessor
+  CC_FOR_BUILD
+              build system C compiler
+  CPP_FOR_BUILD
+              build system C preprocessor
+  CXX         C++ compiler command
+  CXXFLAGS    C++ compiler flags
+  CXXCPP      C++ preprocessor
+  M4          m4 macro processor
+  YACC        The `Yet Another C Compiler' implementation to use. Defaults to
+              the first program found out of: `bison -y', `byacc', `yacc'.
+  YFLAGS      The list of arguments that will be passed by default to $YACC.
+              This script will default YFLAGS to the empty string to avoid a
+              default value of `-d' given by some make applications.
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html>.
+GNU MP home page: <http://www.gnu.org/software/gmp/>.
+General help using GNU software: <http://www.gnu.org/gethelp/>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+GNU MP configure 5.0.5
+generated by GNU Autoconf 2.65
+
+Copyright (C) 2009 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+
+
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } >/dev/null && {
+        test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_cxx_try_compile LINENO
+# ----------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_cxx_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_compile
+
+# ac_fn_cxx_try_cpp LINENO
+# ------------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } >/dev/null && {
+        test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" ||
+        test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_cpp
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: program exited with status $ac_status" >&5
+       $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=$ac_status
+fi
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( cat <<\_ASBOX
+## ssssssssssssssssssssssssssssssssss ##
+## Report this to gmp-bugs@gmplib.org ##
+## ssssssssssssssssssssssssssssssssss ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_header_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+        test "$cross_compiling" = yes ||
+        $as_test_x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $2 (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_func
+
+# ac_fn_cxx_try_link LINENO
+# -------------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_cxx_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+        test "$cross_compiling" = yes ||
+        $as_test_x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_link
+
+# ac_fn_f77_try_link LINENO
+# -------------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_f77_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_f77_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+        test "$cross_compiling" = yes ||
+        $as_test_x conftest$ac_exeext
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_f77_try_link
+
+# ac_fn_c_check_decl LINENO SYMBOL VAR
+# ------------------------------------
+# Tests whether SYMBOL is declared, setting cache variable VAR accordingly.
+ac_fn_c_check_decl ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $2 is declared" >&5
+$as_echo_n "checking whether $2 is declared... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+#ifndef $2
+  (void) $2;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_decl
+
+# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
+# -------------------------------------------
+# Tests whether TYPE exists after having included INCLUDES, setting cache
+# variable VAR accordingly.
+ac_fn_c_check_type ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=no"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+if (sizeof ($2))
+        return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+if (sizeof (($2)))
+           return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  eval "$3=yes"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_type
+
+# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES
+# ----------------------------------------------------
+# Tries to find if the field MEMBER exists in type AGGR, after including
+# INCLUDES, setting cache variable VAR accordingly.
+ac_fn_c_check_member ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
+$as_echo_n "checking for $2.$3... " >&6; }
+if { as_var=$4; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$5
+int
+main ()
+{
+static $2 ac_aggr;
+if (ac_aggr.$3)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$4=yes"
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$5
+int
+main ()
+{
+static $2 ac_aggr;
+if (sizeof ac_aggr.$3)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$4=yes"
+else
+  eval "$4=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$4
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_c_check_member
+
+# ac_fn_cxx_check_header_mongrel LINENO HEADER VAR INCLUDES
+# ---------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_cxx_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( cat <<\_ASBOX
+## ssssssssssssssssssssssssssssssssss ##
+## Report this to gmp-bugs@gmplib.org ##
+## ssssssssssssssssssssssssssssssssss ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_cxx_check_header_mongrel
+
+# ac_fn_cxx_check_type LINENO TYPE VAR INCLUDES
+# ---------------------------------------------
+# Tests whether TYPE exists after having included INCLUDES, setting cache
+# variable VAR accordingly.
+ac_fn_cxx_check_type ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=no"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+if (sizeof ($2))
+        return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+if (sizeof (($2)))
+           return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+else
+  eval "$3=yes"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+
+} # ac_fn_cxx_check_type
+
+# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES
+# --------------------------------------------
+# Tries to find the compile-time value of EXPR in a program that includes
+# INCLUDES, setting VAR accordingly. Returns whether the value could be
+# computed
+ac_fn_c_compute_int ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if test "$cross_compiling" = yes; then
+    # Depending upon the size, compute the lo and hi bounds.
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+static int test_array [1 - 2 * !(($2) >= 0)];
+test_array [0] = 0
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_lo=0 ac_mid=0
+  while :; do
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+static int test_array [1 - 2 * !(($2) <= $ac_mid)];
+test_array [0] = 0
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_hi=$ac_mid; break
+else
+  as_fn_arith $ac_mid + 1 && ac_lo=$as_val
+                       if test $ac_lo -le $ac_mid; then
+                         ac_lo= ac_hi=
+                         break
+                       fi
+                       as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  done
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+static int test_array [1 - 2 * !(($2) < 0)];
+test_array [0] = 0
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_hi=-1 ac_mid=-1
+  while :; do
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+static int test_array [1 - 2 * !(($2) >= $ac_mid)];
+test_array [0] = 0
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_lo=$ac_mid; break
+else
+  as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val
+                       if test $ac_mid -le $ac_hi; then
+                         ac_lo= ac_hi=
+                         break
+                       fi
+                       as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  done
+else
+  ac_lo= ac_hi=
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+# Binary search between lo and hi bounds.
+while test "x$ac_lo" != "x$ac_hi"; do
+  as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+static int test_array [1 - 2 * !(($2) <= $ac_mid)];
+test_array [0] = 0
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_hi=$ac_mid
+else
+  as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+done
+case $ac_lo in #((
+?*) eval "$3=\$ac_lo"; ac_retval=0 ;;
+'') ac_retval=1 ;;
+esac
+  else
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+static long int longval () { return $2; }
+static unsigned long int ulongval () { return $2; }
+#include <stdio.h>
+#include <stdlib.h>
+int
+main ()
+{
+
+  FILE *f = fopen ("conftest.val", "w");
+  if (! f)
+    return 1;
+  if (($2) < 0)
+    {
+      long int i = longval ();
+      if (i != ($2))
+       return 1;
+      fprintf (f, "%ld", i);
+    }
+  else
+    {
+      unsigned long int i = ulongval ();
+      if (i != ($2))
+       return 1;
+      fprintf (f, "%lu", i);
+    }
+  /* Do not output a trailing newline, as this causes \r\n confusion
+     on some platforms.  */
+  return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  echo >>conftest.val; read $3 <conftest.val; ac_retval=0
+else
+  ac_retval=1
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f conftest.val
+
+  fi
+  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_compute_int
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by GNU MP $as_me 5.0.5, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    $as_echo "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+       ac_must_keep_next=false # Got value, back to normal.
+      else
+       case $ac_arg in
+         *=* | --config-cache | -C | -disable-* | --disable-* \
+         | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+         | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+         | -with-* | --with-* | -without-* | --without-* | --x)
+           case "$ac_configure_args0 " in
+             "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+           esac
+           ;;
+         -* ) ac_must_keep_next=true ;;
+       esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+       "s/'\''/'\''\\\\'\'''\''/g;
+         s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      $as_echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      cat <<\_ASBOX
+## ------------------- ##
+## File substitutions. ##
+## ------------------- ##
+_ASBOX
+      echo
+      for ac_var in $ac_subst_files
+      do
+       eval ac_val=\$$ac_var
+       case $ac_val in
+       *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+       esac
+       $as_echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      $as_echo "$as_me: caught signal $ac_signal"
+    $as_echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+  ac_site_file1=$CONFIG_SITE
+elif test "x$prefix" != xNONE; then
+  ac_site_file1=$prefix/share/config.site
+  ac_site_file2=$prefix/etc/config.site
+else
+  ac_site_file1=$ac_default_prefix/share/config.site
+  ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+  test "x$ac_site_file" = xNONE && continue
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file"
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+       # differences in whitespace do not lead to failure.
+       ac_old_val_w=`echo x $ac_old_val`
+       ac_new_val_w=`echo x $ac_new_val`
+       if test "$ac_old_val_w" != "$ac_new_val_w"; then
+         { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+         ac_cache_corrupted=:
+       else
+         { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+         eval $ac_var=\$ac_old_val
+       fi
+       { $as_echo "$as_me:${as_lineno-$LINENO}:   former value:  \`$ac_old_val'" >&5
+$as_echo "$as_me:   former value:  \`$ac_old_val'" >&2;}
+       { $as_echo "$as_me:${as_lineno-$LINENO}:   current value: \`$ac_new_val'" >&5
+$as_echo "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  as_fn_error "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+# If --target is not used then $target_alias is empty, but if say
+# "./configure athlon-pc-freebsd3.5" is used, then all three of
+# $build_alias, $host_alias and $target_alias are set to
+# "athlon-pc-freebsd3.5".
+#
+if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
+  as_fn_error "--target is not appropriate for GMP
+Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
+explicitly.  Use --host if cross-compiling (see \"Installing GMP\" in the
+manual for more on this)." "$LINENO" 5
+fi
+
+gmp_configm4="config.m4"
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+rm -f $gmp_tmpconfigm4 $gmp_tmpconfigm4i $gmp_tmpconfigm4p
+
+# CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir.
+# The pattern here tests for an absolute path the same way as
+# _AC_OUTPUT_FILES in autoconf acgeneral.m4.
+case $srcdir in
+[\\/]* | ?:[\\/]* )  tmp="$srcdir"    ;;
+*)                       tmp="../$srcdir" ;;
+esac
+echo "define(<CONFIG_TOP_SRCDIR>,<\`$tmp'>)" >>$gmp_tmpconfigm4
+
+# All CPUs use asm-defs.m4
+echo "include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')" >>$gmp_tmpconfigm4i
+
+
+ac_aux_dir=
+for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
+  for ac_t in install-sh install.sh shtool; do
+    if test -f "$ac_dir/$ac_t"; then
+      ac_aux_dir=$ac_dir
+      ac_install_sh="$ac_aux_dir/$ac_t -c"
+      break 2
+    fi
+  done
+done
+if test -z "$ac_aux_dir"; then
+  as_fn_error "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+# Make sure we can run config.sub.
+$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
+  as_fn_error "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
+$as_echo_n "checking build system type... " >&6; }
+if test "${ac_cv_build+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_build_alias=$build_alias
+test "x$ac_build_alias" = x &&
+  ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
+test "x$ac_build_alias" = x &&
+  as_fn_error "cannot guess build type; you must specify one" "$LINENO" 5
+ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
+  as_fn_error "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
+$as_echo "$ac_cv_build" >&6; }
+case $ac_cv_build in
+*-*-*) ;;
+*) as_fn_error "invalid value of canonical build" "$LINENO" 5;;
+esac
+build=$ac_cv_build
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_build
+shift
+build_cpu=$1
+build_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+build_os=$*
+IFS=$ac_save_IFS
+case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
+$as_echo_n "checking host system type... " >&6; }
+if test "${ac_cv_host+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test "x$host_alias" = x; then
+  ac_cv_host=$ac_cv_build
+else
+  ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
+    as_fn_error "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5
+$as_echo "$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) as_fn_error "invalid value of canonical host" "$LINENO" 5;;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
+
+
+am__api_version='1.11'
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+  ./ | .// | /[cC]/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+       if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+         if test $ac_prog = install &&
+           grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+           # AIX install.  It has an incompatible calling convention.
+           :
+         elif test $ac_prog = install &&
+           grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+           # program-specific install script used by HP pwplus--don't use.
+           :
+         else
+           rm -rf conftest.one conftest.two conftest.dir
+           echo one > conftest.one
+           echo two > conftest.two
+           mkdir conftest.dir
+           if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+             test -s conftest.one && test -s conftest.two &&
+             test -s conftest.dir/conftest.one &&
+             test -s conftest.dir/conftest.two
+           then
+             ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+             break 3
+           fi
+         fi
+       fi
+      done
+    done
+    ;;
+esac
+
+  done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5
+$as_echo_n "checking whether build environment is sane... " >&6; }
+# Just in case
+sleep 1
+echo timestamp > conftest.file
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[\\\"\#\$\&\'\`$am_lf]*)
+    as_fn_error "unsafe absolute working directory name" "$LINENO" 5;;
+esac
+case $srcdir in
+  *[\\\"\#\$\&\'\`$am_lf\ \    ]*)
+    as_fn_error "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;;
+esac
+
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+   if test "$*" = "X"; then
+      # -L didn't work.
+      set X `ls -t "$srcdir/configure" conftest.file`
+   fi
+   rm -f conftest.file
+   if test "$*" != "X $srcdir/configure conftest.file" \
+      && test "$*" != "X conftest.file $srcdir/configure"; then
+
+      # If neither matched, then we have a broken ls.  This can happen
+      # if, for instance, CONFIG_SHELL is bash and it inherits a
+      # broken ls alias from the environment.  This has actually
+      # happened.  Such a system could not be considered "sane".
+      as_fn_error "ls -t appears to fail.  Make sure there is not a broken
+alias in your environment" "$LINENO" 5
+   fi
+
+   test "$2" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   as_fn_error "newly created file is older than distributed files!
+Check your system clock" "$LINENO" 5
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+test "$program_prefix" != NONE &&
+  program_transform_name="s&^&$program_prefix&;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s&\$&$program_suffix&;$program_transform_name"
+# Double any \ or $.
+# By default was `s,x,x', remove it if useless.
+ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
+program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
+
+# expand $ac_aux_dir to an absolute path
+am_aux_dir=`cd $ac_aux_dir && pwd`
+
+if test x"${MISSING+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\    *)
+    MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
+  *)
+    MISSING="\${SHELL} $am_aux_dir/missing" ;;
+  esac
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --run true"; then
+  am_missing_run="$MISSING --run "
+else
+  am_missing_run=
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`missing' script is too old or missing" >&5
+$as_echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;}
+fi
+
+if test x"${install_sh}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\    *)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+
+# Installed binaries are usually stripped using `strip' when the user
+# run `make install-strip'.  However `strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the `STRIP' environment variable to overrule this program.
+if test "$cross_compiling" != no; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_STRIP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
+$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
+if test -z "$MKDIR_P"; then
+  if test "${ac_cv_path_mkdir+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in mkdir gmkdir; do
+        for ac_exec_ext in '' $ac_executable_extensions; do
+          { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue
+          case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+            'mkdir (GNU coreutils) '* | \
+            'mkdir (coreutils) '* | \
+            'mkdir (fileutils) '4.1*)
+              ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+              break 3;;
+          esac
+        done
+       done
+  done
+IFS=$as_save_IFS
+
+fi
+
+  test -d ./--version && rmdir ./--version
+  if test "${ac_cv_path_mkdir+set}" = set; then
+    MKDIR_P="$ac_cv_path_mkdir -p"
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for MKDIR_P within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    MKDIR_P="$ac_install_sh -d"
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
+$as_echo "$MKDIR_P" >&6; }
+
+mkdir_p="$MKDIR_P"
+case $mkdir_p in
+  [\\/$]* | ?:[\\/]*) ;;
+  */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_AWK+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_AWK="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
+$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if { as_var=ac_cv_prog_make_${ac_make}_set; eval "test \"\${$as_var+set}\" = set"; }; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+       @echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  SET_MAKE=
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  am__isrc=' -I$(srcdir)'
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    as_fn_error "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+
+
+# Define the identity of the package.
+ PACKAGE='gmp'
+ VERSION='5.0.5'
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE "$PACKAGE"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define VERSION "$VERSION"
+_ACEOF
+
+# Some tools Automake needs.
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+# We need awk for the "check" target.  The system "awk" is bad on
+# some platforms.
+# Always define AMTAR for backward compatibility.
+
+AMTAR=${AMTAR-"${am_missing_run}tar"}
+
+am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+
+
+
+
+
+ac_config_headers="$ac_config_headers config.h:config.in"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
+$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
+    # Check whether --enable-maintainer-mode was given.
+if test "${enable_maintainer_mode+set}" = set; then :
+  enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
+else
+  USE_MAINTAINER_MODE=no
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5
+$as_echo "$USE_MAINTAINER_MODE" >&6; }
+   if test $USE_MAINTAINER_MODE = yes; then
+  MAINTAINER_MODE_TRUE=
+  MAINTAINER_MODE_FALSE='#'
+else
+  MAINTAINER_MODE_TRUE='#'
+  MAINTAINER_MODE_FALSE=
+fi
+
+  MAINT=$MAINTAINER_MODE_TRUE
+
+
+
+
+# Check whether --enable-assert was given.
+if test "${enable_assert+set}" = set; then :
+  enableval=$enable_assert; case $enableval in
+yes|no) ;;
+*) as_fn_error "bad value $enableval for --enable-assert, need yes or no" "$LINENO" 5 ;;
+esac
+else
+  enable_assert=no
+fi
+
+
+if test "$enable_assert" = "yes"; then
+
+$as_echo "#define WANT_ASSERT 1" >>confdefs.h
+
+  want_assert_01=1
+else
+  want_assert_01=0
+fi
+
+echo "define(<WANT_ASSERT>,$want_assert_01)" >> $gmp_tmpconfigm4
+
+
+
+# Check whether --enable-alloca was given.
+if test "${enable_alloca+set}" = set; then :
+  enableval=$enable_alloca; case $enableval in
+alloca|malloc-reentrant|malloc-notreentrant) ;;
+yes|no|reentrant|notreentrant) ;;
+debug) ;;
+*)
+  as_fn_error "bad value $enableval for --enable-alloca, need one of:
+yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug" "$LINENO" 5 ;;
+esac
+else
+  enable_alloca=reentrant
+fi
+
+
+
+# IMPROVE ME: The default for C++ is disabled.  The tests currently
+# performed below for a working C++ compiler are not particularly strong,
+# and in general can't be expected to get the right setup on their own.  The
+# most significant problem is getting the ABI the same.  Defaulting CXXFLAGS
+# to CFLAGS takes only a small step towards this.  It's also probably worth
+# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can
+# work together.  Some rather broken C++ installations were encountered
+# during testing, and though such things clearly aren't GMP's problem, if
+# --enable-cxx=detect were to be the default then some careful checks of
+# which, if any, C++ compiler on the system is up to scratch would be
+# wanted.
+#
+# Check whether --enable-cxx was given.
+if test "${enable_cxx+set}" = set; then :
+  enableval=$enable_cxx; case $enableval in
+yes|no|detect) ;;
+*) as_fn_error "bad value $enableval for --enable-cxx, need yes/no/detect" "$LINENO" 5 ;;
+esac
+else
+  enable_cxx=no
+fi
+
+
+
+# Check whether --enable-fft was given.
+if test "${enable_fft+set}" = set; then :
+  enableval=$enable_fft; case $enableval in
+yes|no) ;;
+*) as_fn_error "bad value $enableval for --enable-fft, need yes or no" "$LINENO" 5 ;;
+esac
+else
+  enable_fft=yes
+fi
+
+
+if test "$enable_fft" = "yes"; then
+
+$as_echo "#define WANT_FFT 1" >>confdefs.h
+
+fi
+
+
+# Check whether --enable-old-fft-full was given.
+if test "${enable_old_fft_full+set}" = set; then :
+  enableval=$enable_old_fft_full; case $enableval in
+yes|no) ;;
+*) as_fn_error "bad value $enableval for --enable-old-fft-full, need yes or no" "$LINENO" 5 ;;
+esac
+else
+  enable_old_fft_full=no
+fi
+
+
+if test "$enable_old_fft_full" = "yes"; then
+
+$as_echo "#define WANT_OLD_FFT_FULL 1" >>confdefs.h
+
+fi
+
+
+# Check whether --enable-mpbsd was given.
+if test "${enable_mpbsd+set}" = set; then :
+  enableval=$enable_mpbsd; case $enableval in
+yes|no) ;;
+*) as_fn_error "bad value $enableval for --enable-mpbsd, need yes or no" "$LINENO" 5 ;;
+esac
+else
+  enable_mpbsd=no
+fi
+
+ if test "$enable_mpbsd" = "yes"; then
+  WANT_MPBSD_TRUE=
+  WANT_MPBSD_FALSE='#'
+else
+  WANT_MPBSD_TRUE='#'
+  WANT_MPBSD_FALSE=
+fi
+
+
+
+# Check whether --enable-nails was given.
+if test "${enable_nails+set}" = set; then :
+  enableval=$enable_nails; case $enableval in
+yes|no|[02468]|[0-9][02468]) ;;
+*[13579])
+  as_fn_error "bad value $enableval for --enable-nails, only even nail sizes supported" "$LINENO" 5 ;;
+*)
+  as_fn_error "bad value $enableval for --enable-nails, need yes/no/number" "$LINENO" 5 ;;
+esac
+else
+  enable_nails=no
+fi
+
+
+case $enable_nails in
+yes) GMP_NAIL_BITS=2 ;;
+no)  GMP_NAIL_BITS=0 ;;
+*)   GMP_NAIL_BITS=$enable_nails ;;
+esac
+
+
+
+# Check whether --enable-profiling was given.
+if test "${enable_profiling+set}" = set; then :
+  enableval=$enable_profiling; case $enableval in
+no|prof|gprof|instrument) ;;
+*) as_fn_error "bad value $enableval for --enable-profiling, need no/prof/gprof/instrument" "$LINENO" 5 ;;
+esac
+else
+  enable_profiling=no
+fi
+
+
+case $enable_profiling in
+  prof)
+
+$as_echo "#define WANT_PROFILING_PROF 1" >>confdefs.h
+
+    ;;
+  gprof)
+
+$as_echo "#define WANT_PROFILING_GPROF 1" >>confdefs.h
+
+    ;;
+  instrument)
+
+$as_echo "#define WANT_PROFILING_INSTRUMENT 1" >>confdefs.h
+
+    ;;
+esac
+
+
+echo "define(<WANT_PROFILING>,<\`$enable_profiling'>)" >> $gmp_tmpconfigm4
+
+
+# -fomit-frame-pointer is incompatible with -pg on some chips
+if test "$enable_profiling" = gprof; then
+  fomit_frame_pointer=
+else
+  fomit_frame_pointer="-fomit-frame-pointer"
+fi
+
+
+
+# Check whether --with-readline was given.
+if test "${with_readline+set}" = set; then :
+  withval=$with_readline; case $withval in
+yes|no|detect) ;;
+*) as_fn_error "bad value $withval for --with-readline, need yes/no/detect" "$LINENO" 5 ;;
+esac
+else
+  with_readline=detect
+fi
+
+
+
+# Check whether --enable-fat was given.
+if test "${enable_fat+set}" = set; then :
+  enableval=$enable_fat; case $enableval in
+yes|no) ;;
+*) as_fn_error "bad value $enableval for --enable-fat, need yes or no" "$LINENO" 5 ;;
+esac
+else
+  enable_fat=no
+fi
+
+
+
+# Check whether --enable-minithres was given.
+if test "${enable_minithres+set}" = set; then :
+  enableval=$enable_minithres; case $enableval in
+yes|no) ;;
+*) as_fn_error "bad value $enableval for --enable-minithres, need yes or no" "$LINENO" 5 ;;
+esac
+else
+  enable_minithres=no
+fi
+
+
+
+
+tmp_host=`echo $host_cpu | sed 's/\./_/'`
+cat >>confdefs.h <<_ACEOF
+#define HAVE_HOST_CPU_$tmp_host 1
+_ACEOF
+
+
+echo "define_not_for_expansion(\`HAVE_HOST_CPU_$tmp_host')" >> $gmp_tmpconfigm4p
+
+
+
+
+
+
+# Table of compilers, options, and mpn paths.  This code has various related
+# purposes
+#
+#   - better default CC/CFLAGS selections than autoconf otherwise gives
+#   - default CC/CFLAGS selections for extra CPU types specific to GMP
+#   - a few tests for known bad compilers
+#   - choice of ABIs on suitable systems
+#   - selection of corresponding mpn search path
+#
+# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is
+# called.  User selections of CC etc are respected.
+#
+# Care is taken not to use macros like AC_TRY_COMPILE during the GMP
+# pre-testing, since they of course depend on AC_PROG_CC, and also some of
+# them cache their results, which is not wanted.
+#
+# The ABI selection mechanism is unique to GMP.  All that reaches autoconf
+# is a different selection of CC/CFLAGS according to the best ABI the system
+# supports, and/or what the user selects.  Naturally the mpn assembler code
+# selected is very dependent on the ABI.
+#
+# The closest the standard tools come to a notion of ABI is something like
+# "sparc64" which encodes a CPU and an ABI together.  This doesn't seem to
+# scale well for GMP, where exact CPU types like "ultrasparc2" are wanted,
+# separate from the ABI used on them.
+#
+#
+# The variables set here are
+#
+#   cclist              the compiler choices
+#   xx_cflags           flags for compiler xx
+#   xx_cflags_maybe     flags for compiler xx, if they work
+#   xx_cppflags         cpp flags for compiler xx
+#   xx_cflags_optlist   list of sets of optional flags
+#   xx_cflags_yyy       set yyy of optional flags for compiler xx
+#   xx_ldflags          -Wc,-foo flags for libtool linking with compiler xx
+#   ar_flags            extra flags for $AR
+#   nm_flags            extra flags for $NM
+#   limb                limb size, can be "longlong"
+#   path                mpn search path
+#   extra_functions     extra mpn functions
+#   fat_path            fat binary mpn search path [if fat binary desired]
+#   fat_functions       fat functions
+#   fat_thresholds      fat thresholds
+#
+# Suppose xx_cflags_optlist="arch", then flags from $xx_cflags_arch are
+# tried, and the first flag that works will be used.  An optlist like "arch
+# cpu optimize" can be used to get multiple independent sets of flags tried.
+# The first that works from each will be used.  If no flag in a set works
+# then nothing from that set is added.
+#
+# For multiple ABIs, the scheme extends as follows.
+#
+#   abilist               set of ABI choices
+#   cclist_aa             compiler choices in ABI aa
+#   xx_aa_cflags          flags for xx in ABI aa
+#   xx_aa_cflags_maybe    flags for xx in ABI aa, if they work
+#   xx_aa_cppflags        cpp flags for xx in ABI aa
+#   xx_aa_cflags_optlist  list of sets of optional flags in ABI aa
+#   xx_aa_cflags_yyy      set yyy of optional flags for compiler xx in ABI aa
+#   xx_aa_ldflags         -Wc,-foo flags for libtool linking
+#   ar_aa_flags           extra flags for $AR in ABI aa
+#   nm_aa_flags           extra flags for $NM in ABI aa
+#   limb_aa               limb size in ABI aa, can be "longlong"
+#   path_aa               mpn search path in ABI aa
+#   extra_functions_aa    extra mpn functions in ABI aa
+#
+# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI
+# in ablist, if an xx_aa_cflags for that ABI isn't given.  For example if
+# abilist="64 32" then $cc_64_cflags will be used for the 64-bit ABI, but
+# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is
+# defined.  This makes it easy to add some 64-bit compilers and flags to an
+# unadorned 32-bit set.
+#
+# limb=longlong (or limb_aa=longlong) applies to all compilers within that
+# ABI.  It won't work to have some needing long long and some not, since a
+# single instantiated gmp.h will be used by both.
+#
+# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are
+# also set here, with an ABI suffix.
+#
+#
+#
+# A table-driven approach like this to mapping cpu type to good compiler
+# options is a bit of a maintenance burden, but there's not much uniformity
+# between options specifications on different compilers.  Some sort of
+# separately updatable tool might be cute.
+#
+# The use of lots of variables like this, direct and indirect, tends to
+# obscure when and how various things are done, but unfortunately it's
+# pretty much the only way.  If shell subroutines were portable then actual
+# code like "if this .. do that" could be written, but attempting the same
+# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would
+# hugely bloat the output.
+
+
+
+
+# abilist needs to be non-empty, "standard" is just a generic name here
+abilist="standard"
+
+# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring
+# c89 over cc here.  But note that on HP-UX c89 provides a castrated
+# environment, and would want to be excluded somehow.  Maybe
+# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and
+# we don't need to worry.
+#
+cclist="gcc cc"
+
+gcc_cflags="-O2 -pedantic"
+gcc_64_cflags="-O2 -pedantic"
+cc_cflags="-O"
+cc_64_cflags="-O"
+
+SPEED_CYCLECOUNTER_OBJ=
+cyclecounter_size=2
+
+HAVE_HOST_CPU_FAMILY_power=0
+
+HAVE_HOST_CPU_FAMILY_powerpc=0
+
+
+case $host in
+
+  a29k*-*-*)
+    path="a29k"
+    ;;
+
+
+  alpha*-*-*)
+    $as_echo "#define HAVE_HOST_CPU_FAMILY_alpha 1" >>confdefs.h
+
+    case $host_cpu in
+      alphaev5* | alphapca5*)
+       path="alpha/ev5 alpha" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+        path="alpha/ev67 alpha/ev6 alpha" ;;
+      alphaev6)
+       path="alpha/ev6 alpha" ;;
+      *)
+        path="alpha" ;;
+    esac
+    extra_functions="cntlz"
+    gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
+    gcc_cflags_oldas="-Wa,-oldas"     # see GMP_GCC_WA_OLDAS.
+
+    # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.
+    # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.
+    # gcc 3.0 adds nothing.
+    # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).
+    # gcc 3.2 adds nothing.
+    #
+    # gcc version "2.9-gnupro-99r1" under "-O2 -mcpu=ev6" strikes internal
+    # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS.  Each
+    # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.
+    #
+    case $host_cpu in
+      alpha)        gcc_cflags_cpu="-mcpu=ev4" ;;
+      alphaev5)     gcc_cflags_cpu="-mcpu=ev5" ;;
+      alphaev56)    gcc_cflags_cpu="-mcpu=ev56" ;;
+      alphapca56 | alphapca57)
+                    gcc_cflags_cpu="-mcpu=pca56" ;;
+      alphaev6)     gcc_cflags_cpu="-mcpu=ev6 -mcpu=ev56" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+                    gcc_cflags_cpu="-mcpu=ev67 -mcpu=ev6 -mcpu=ev56" ;;
+    esac
+
+    # gcc version "2.9-gnupro-99r1" on alphaev68-dec-osf5.1 has been seen
+    # accepting -mcpu=ev6, but not putting the assembler in the right mode
+    # for what it produces.  We need to do this for it, and need to do it
+    # before testing the -mcpu options.
+    #
+    # On old versions of gcc, which don't know -mcpu=, we believe an
+    # explicit -Wa,-mev5 etc will be necessary to put the assembler in
+    # the right mode for our .asm files and longlong.h asm blocks.
+    #
+    # On newer versions of gcc, when -mcpu= is known, we must give a -Wa
+    # which is at least as high as the code gcc will generate.  gcc
+    # establishes what it needs with a ".arch" directive, our command line
+    # option seems to override that.
+    #
+    # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for
+    # ctlz and cttz (in 2.10.0 at least).
+    #
+    # OSF `as' accepts ev68 but stupidly treats it as ev4.  -arch only seems
+    # to affect insns like ldbu which are expanded as macros when necessary.
+    # Insns like ctlz which were never available as macros are always
+    # accepted and always generate their plain code.
+    #
+    case $host_cpu in
+      alpha)        gcc_cflags_asm="-Wa,-arch,ev4 -Wa,-mev4" ;;
+      alphaev5)     gcc_cflags_asm="-Wa,-arch,ev5 -Wa,-mev5" ;;
+      alphaev56)    gcc_cflags_asm="-Wa,-arch,ev56 -Wa,-mev56" ;;
+      alphapca56 | alphapca57)
+                    gcc_cflags_asm="-Wa,-arch,pca56 -Wa,-mpca56" ;;
+      alphaev6)     gcc_cflags_asm="-Wa,-arch,ev6 -Wa,-mev6" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+                    gcc_cflags_asm="-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6" ;;
+    esac
+
+    # It might be better to ask "cc" whether it's Cray C or DEC C,
+    # instead of relying on the OS part of $host.  But it's hard to
+    # imagine either of those compilers anywhere except their native
+    # systems.
+    #
+
+echo "include_mpn(\`alpha/alpha-defs.m4')" >> $gmp_tmpconfigm4i
+
+    case $host in
+      *-cray-unicos*)
+        cc_cflags="-O"         # no -g, it silently disables all optimizations
+
+echo "include_mpn(\`alpha/unicos.m4')" >> $gmp_tmpconfigm4i
+
+        # Don't perform any assembly syntax tests on this beast.
+        gmp_asm_syntax_testing=no
+        ;;
+      *-*-osf*)
+
+echo "include_mpn(\`alpha/default.m4')" >> $gmp_tmpconfigm4i
+
+        cc_cflags=""
+        cc_cflags_optlist="opt cpu"
+
+        # not sure if -fast works on old versions, so make it optional
+       cc_cflags_opt="-fast -O2"
+
+       # DEC C V5.9-005 knows ev4, ev5, ev56, pca56, ev6.
+       # Compaq C V6.3-029 adds ev67.
+       #
+       case $host_cpu in
+         alpha)       cc_cflags_cpu="-arch~ev4~-tune~ev4" ;;
+         alphaev5)    cc_cflags_cpu="-arch~ev5~-tune~ev5" ;;
+         alphaev56)   cc_cflags_cpu="-arch~ev56~-tune~ev56" ;;
+         alphapca56 | alphapca57)
+            cc_cflags_cpu="-arch~pca56~-tune~pca56" ;;
+         alphaev6)    cc_cflags_cpu="-arch~ev6~-tune~ev6" ;;
+         alphaev67 | alphaev68 | alphaev7*)
+            cc_cflags_cpu="-arch~ev67~-tune~ev67 -arch~ev6~-tune~ev6" ;;
+       esac
+        ;;
+      *)
+
+echo "include_mpn(\`alpha/default.m4')" >> $gmp_tmpconfigm4i
+
+        ;;
+    esac
+
+    case $host in
+      *-*-unicos*)
+        # tune/alpha.asm assumes int==4bytes but unicos uses int==8bytes
+        ;;
+      *)
+        SPEED_CYCLECOUNTER_OBJ=alpha.lo
+        cyclecounter_size=1 ;;
+    esac
+    ;;
+
+
+  # Cray vector machines.
+  # This must come after alpha* so that we can recognize present and future
+  # vector processors with a wildcard.
+  *-cray-unicos*)
+    gmp_asm_syntax_testing=no
+    cclist="cc"
+    # We used to have -hscalar0 here as a workaround for miscompilation of
+    # mpz/import.c, but let's hope Cray fixes their bugs instead, since
+    # -hscalar0 causes disastrously poor code to be generated.
+    cc_cflags="-O3 -hnofastmd -htask0 -Wa,-B"
+    path="cray"
+    ;;
+
+
+  arm*-*-*)
+    path="arm"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_testlist="gcc-arm-umodsi"
+
+echo "include_mpn(\`arm/arm-defs.m4')" >> $gmp_tmpconfigm4i
+
+    ;;
+
+
+  clipper*-*-*)
+    path="clipper"
+    ;;
+
+
+  # Fujitsu
+  f30[01]-fujitsu-sysv*)
+    cclist="gcc vcc"
+    # FIXME: flags for vcc?
+    vcc_cflags="-g"
+    path="fujitsu"
+    ;;
+
+
+  hppa*-*-*)
+    # HP cc (the one sold separately) is K&R by default, but AM_C_PROTOTYPES
+    # will add "-Ae", or "-Aa -D_HPUX_SOURCE", to put it into ansi mode, if
+    # possible.
+    #
+    # gcc for hppa 2.0 can be built either for 2.0n (32-bit) or 2.0w
+    # (64-bit), but not both, so there's no option to choose the desired
+    # mode, we must instead detect which of the two it is.  This is done by
+    # checking sizeof(long), either 4 or 8 bytes respectively.  Do this in
+    # ABI=1.0 too, in case someone tries to build that with a 2.0w gcc.
+    #
+    gcc_cflags_optlist="arch"
+    gcc_testlist="sizeof-long-4"
+    SPEED_CYCLECOUNTER_OBJ=hppa.lo
+    cyclecounter_size=1
+
+    # FIXME: For hppa2.0*, path should be "pa32/hppa2_0 pa32/hppa1_1 pa32".
+    # (Can't remember why this isn't done already, have to check what .asm
+    # files are available in each and how they run on a typical 2.0 cpu.)
+    #
+    case $host_cpu in
+      hppa1.0*)    path="pa32" ;;
+      hppa7000*)   path="pa32/hppa1_1 pa32" ;;
+      hppa2.0* | hppa64)
+                   path="pa32/hppa2_0 pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+      *)           # default to 7100
+                   path="pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+    esac
+
+    # gcc 2.7.2.3 knows -mpa-risc-1-0 and -mpa-risc-1-1
+    # gcc 2.95 adds -mpa-risc-2-0, plus synonyms -march=1.0, 1.1 and 2.0
+    #
+    # We don't use -mpa-risc-2-0 in ABI=1.0 because 64-bit registers may not
+    # be saved by the kernel on an old system.  Actually gcc (as of 3.2)
+    # only adds a few float instructions with -mpa-risc-2-0, so it would
+    # probably be safe, but let's not take the chance.  In any case, a
+    # configuration like --host=hppa2.0 ABI=1.0 is far from optimal.
+    #
+    case $host_cpu in
+      hppa1.0*)           gcc_cflags_arch="-mpa-risc-1-0" ;;
+      *)                  # default to 7100
+                          gcc_cflags_arch="-mpa-risc-1-1" ;;
+    esac
+
+    case $host_cpu in
+      hppa1.0*)    cc_cflags="+O2" ;;
+      *)           # default to 7100
+                   cc_cflags="+DA1.1 +O2" ;;
+    esac
+
+    case $host in
+      hppa2.0*-*-* | hppa64-*-*)
+       cclist_20n="gcc cc"
+        abilist="2.0n 1.0"
+        path_20n="pa64"
+       limb_20n=longlong
+        any_20n_testlist="sizeof-long-4"
+        SPEED_CYCLECOUNTER_OBJ_20n=hppa2.lo
+        cyclecounter_size_20n=2
+
+        # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
+        # used.  Assembler support for 2.0 is essential though, for our asm
+        # files.
+       gcc_20n_cflags="-O2"
+       gcc_20n_cflags_optlist="arch"
+        gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
+        gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
+
+        cc_20n_cflags="+DA2.0 +e +O2 -Wl,+vnocompatwarnings"
+        cc_20n_testlist="hpc-hppa-2-0"
+
+       # ABI=2.0w is available for hppa2.0w and hppa2.0, but not for
+       # hppa2.0n, on the assumption that that the latter indicates a
+       # desire for ABI=2.0n.
+       case $host in
+        hppa2.0n-*-*) ;;
+        *)
+          # HPUX 10 and earlier cannot run 2.0w.  Not sure about other
+          # systems (GNU/Linux for instance), but lets assume they're ok.
+          case $host in
+            *-*-hpux[1-9] | *-*-hpux[1-9].* | *-*-hpux10 | *-*-hpux10.*) ;;
+           *-*-linux*)  abilist="1.0" ;; # due to linux permanent kernel bug
+            *)    abilist="2.0w $abilist" ;;
+          esac
+
+          cclist_20w="gcc cc"
+         gcc_20w_cflags="-O2 -mpa-risc-2-0"
+          cc_20w_cflags="+DD64 +O2"
+          cc_20w_testlist="hpc-hppa-2-0"
+          path_20w="pa64"
+         any_20w_testlist="sizeof-long-8"
+          SPEED_CYCLECOUNTER_OBJ_20w=hppa2w.lo
+          cyclecounter_size_20w=2
+         ;;
+        esac
+        ;;
+    esac
+    ;;
+
+
+  i960*-*-*)
+    path="i960"
+    ;;
+
+
+  ia64*-*-* | itanium-*-* | itanium2-*-*)
+    abilist="64"
+
+echo "include_mpn(\`ia64/ia64-defs.m4')" >> $gmp_tmpconfigm4i
+
+    SPEED_CYCLECOUNTER_OBJ=ia64.lo
+
+    case $host_cpu in
+      itanium)   path="ia64/itanium  ia64" ;;
+      itanium2)  path="ia64/itanium2 ia64" ;;
+      *)         path="ia64" ;;
+    esac
+
+    gcc_64_cflags_optlist="tune"
+    gcc_32_cflags_optlist=$gcc_64_cflags_optlist
+
+    # gcc pre-release 3.4 adds -mtune itanium and itanium2
+    case $host_cpu in
+      itanium)   gcc_cflags_tune="-mtune=itanium" ;;
+      itanium2)  gcc_cflags_tune="-mtune=itanium2" ;;
+    esac
+
+    case $host in
+      *-*-linux*)
+       cclist="gcc icc"
+       icc_cflags="-no-gcc"
+       icc_cflags_optlist="opt"
+       # Don't use -O3, it is for "large data sets" and also miscompiles GMP.
+       # But icc miscompiles GMP at any optimization level, at higher levels
+       # it miscompiles more files...
+       icc_cflags_opt="-O2 -O1"
+       ;;
+
+      *-*-hpux*)
+        # HP cc sometimes gets internal errors if the optimization level is
+        # too high.  GMP_PROG_CC_WORKS detects this, the "_opt" fallbacks
+        # let us use whatever seems to work.
+        #
+        abilist="32 64"
+
+        cclist_32="gcc cc"
+        path_32="ia64"
+        cc_32_cflags=""
+        cc_32_cflags_optlist="opt"
+        cc_32_cflags_opt="+O3 +O2 +O1"
+        gcc_32_cflags="-milp32 -O2"
+        limb_32=longlong
+        SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
+        cyclecounter_size_32=2
+
+        # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,
+        # but also need it in CFLAGS for linking programs, since automake
+        # only uses CFLAGS when linking, not CPPFLAGS.
+        # FIXME: Maybe should use cc_64_ldflags for this, but that would
+        # need GMP_LDFLAGS used consistently by all the programs.
+        #
+        cc_64_cflags="+DD64"
+        cc_64_cppflags="+DD64"
+        cc_64_cflags_optlist="opt"
+        cc_64_cflags_opt="+O3 +O2 +O1"
+        gcc_64_cflags="$gcc_64_cflags -mlp64"
+        ;;
+    esac
+    ;;
+
+
+  # Motorola 68k
+  #
+  m68k-*-* | m68[0-9][0-9][0-9]-*-*)
+    $as_echo "#define HAVE_HOST_CPU_FAMILY_m68k 1" >>confdefs.h
+
+
+echo "include_mpn(\`m68k/m68k-defs.m4')" >> $gmp_tmpconfigm4i
+
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch"
+
+    # gcc 2.7.2 knows -m68000, -m68020, -m68030, -m68040.
+    # gcc 2.95 adds -mcpu32, -m68060.
+    # FIXME: Maybe "-m68020 -mnobitfield" would suit cpu32 on 2.7.2.
+    #
+    case $host_cpu in
+    m68020)  gcc_cflags_arch="-m68020" ;;
+    m68030)  gcc_cflags_arch="-m68030" ;;
+    m68040)  gcc_cflags_arch="-m68040" ;;
+    m68060)  gcc_cflags_arch="-m68060 -m68000" ;;
+    m68360)  gcc_cflags_arch="-mcpu32 -m68000" ;;
+    *)       gcc_cflags_arch="-m68000" ;;
+    esac
+
+    # FIXME: m68k/mc68020 looks like it's ok for cpu32, but this wants to be
+    # tested.  Will need to introduce an m68k/cpu32 if m68k/mc68020 ever uses
+    # the bitfield instructions.
+    case $host_cpu in
+    m680[234]0 | m68360)  path="m68k/mc68020 m68k" ;;
+    *)                      path="m68k" ;;
+    esac
+    ;;
+
+
+  # Motorola 88k
+  m88k*-*-*)
+    path="m88k"
+    ;;
+  m88110*-*-*)
+    gcc_cflags="$gcc_cflags -m88110"
+    path="m88k/mc88110 m88k"
+    ;;
+
+
+  # National Semiconductor 32k
+  ns32k*-*-*)
+    path="ns32k"
+    ;;
+
+
+  # IRIX 5 and earlier can only run 32-bit o32.
+  #
+  # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64.  n32 is
+  # preferred over 64, but only because that's been the default in past
+  # versions of GMP.  The two are equally efficient.
+  #
+  # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not
+  # supporting n32 or 64.
+  #
+  # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the
+  # right options to use when linking (both cc and gcc), so no need for
+  # anything special from us.
+  #
+  mips*-*-*)
+    abilist="o32"
+    gcc_cflags_optlist="abi"
+    gcc_cflags_abi="-mabi=32"
+    gcc_testlist="gcc-mips-o32"
+    path="mips32"
+    cc_cflags="-O2 -o32"   # no -g, it disables all optimizations
+    # this suits both mips32 and mips64
+
+echo "include_mpn(\`mips32/mips-defs.m4')" >> $gmp_tmpconfigm4i
+
+
+    case $host in
+      mips64*-*-* | mips*-*-irix[6789]*)
+        abilist="n32 64 o32"
+
+        cclist_n32="gcc cc"
+        gcc_n32_cflags="-O2 -mabi=n32"
+        cc_n32_cflags="-O2 -n32"       # no -g, it disables all optimizations
+        limb_n32=longlong
+        path_n32="mips64"
+
+        cclist_64="gcc cc"
+        gcc_64_cflags="$gcc_64_cflags -mabi=64"
+        gcc_64_ldflags="-Wc,-mabi=64"
+        cc_64_cflags="-O2 -64"         # no -g, it disables all optimizations
+        cc_64_ldflags="-Wc,-64"
+        path_64="mips64"
+        ;;
+    esac
+    ;;
+
+
+  # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.
+  # Our usual "gcc in disguise" detection means gcc_cflags etc here gets
+  # used.
+  #
+  # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp
+  # since it doesn't like "__attribute__ ((mode (SI)))" etc in gmp-impl.h,
+  # and so always ends up running the plain preprocessor anyway.  This could
+  # be done in CPPFLAGS rather than CFLAGS, but there's not many places
+  # preprocessing is done separately, and this is only a speedup, the normal
+  # preprocessor gets run if there's any problems.
+  #
+  # We used to use -Wa,-mppc with gcc, but can't remember exactly why.
+  # Presumably it was for old versions of gcc where -mpowerpc doesn't put
+  # the assembler in the right mode.  In any case -Wa,-mppc is not good, for
+  # instance -mcpu=604 makes recent gcc use -m604 to get access to the
+  # "fsel" instruction, but a -Wa,-mppc overrides that, making code that
+  # comes out with fsel fail.
+  #
+  # (Note also that the darwin assembler doesn't accept "-mppc", so any
+  # -Wa,-mppc was used only if it worked.  The right flag on darwin would be
+  # "-arch ppc" or some such, but that's already the default.)
+  #
+  powerpc*-*-* | power[3-9]-*-*)
+    $as_echo "#define HAVE_HOST_CPU_FAMILY_powerpc 1" >>confdefs.h
+
+    HAVE_HOST_CPU_FAMILY_powerpc=1
+    abilist="32"
+    cclist="gcc cc"
+    cc_cflags="-O2"
+    gcc_32_cflags="$gcc_cflags -mpowerpc"
+    gcc_cflags_optlist="precomp subtype asm cpu"
+    gcc_cflags_precomp="-no-cpp-precomp"
+    gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin
+    gcc_cflags_asm=""
+    gcc_cflags_cpu=""
+    vmx_path=""
+
+    # grab this object, though it's not a true cycle counter routine
+    SPEED_CYCLECOUNTER_OBJ=powerpc.lo
+    cyclecounter_size=0
+
+    case $host_cpu in
+      powerpc740 | powerpc750)
+        path="powerpc32/750 powerpc32" ;;
+      powerpc7400 | powerpc7410)
+        path="powerpc32/vmx powerpc32/750 powerpc32" ;;
+      powerpc74[45]?)
+        path="powerpc32/vmx powerpc32" ;;
+      *)
+        path="powerpc32" ;;
+    esac
+
+    case $host_cpu in
+      powerpc401)   gcc_cflags_cpu="-mcpu=401" ;;
+      powerpc403)   gcc_cflags_cpu="-mcpu=403"
+                   xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
+      powerpc405)   gcc_cflags_cpu="-mcpu=405" ;;
+      powerpc505)   gcc_cflags_cpu="-mcpu=505" ;;
+      powerpc601)   gcc_cflags_cpu="-mcpu=601"
+                   xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
+      powerpc602)   gcc_cflags_cpu="-mcpu=602"
+                   xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
+      powerpc603)   gcc_cflags_cpu="-mcpu=603"
+                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc603e)  gcc_cflags_cpu="-mcpu=603e -mcpu=603"
+                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc604)   gcc_cflags_cpu="-mcpu=604"
+                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc604e)  gcc_cflags_cpu="-mcpu=604e -mcpu=604"
+                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc620)   gcc_cflags_cpu="-mcpu=620" ;;
+      powerpc630)   gcc_cflags_cpu="-mcpu=630"
+                   xlc_cflags_arch="-qarch=pwr3"
+                   cpu_path="p3 p3-p7" ;;
+      powerpc740)   gcc_cflags_cpu="-mcpu=740" ;;
+      powerpc7400 | powerpc7410)
+                   gcc_cflags_asm="-Wa,-maltivec"
+                   gcc_cflags_cpu="-mcpu=7400 -mcpu=750" ;;
+      powerpc74[45]?)
+                   gcc_cflags_asm="-Wa,-maltivec"
+                   gcc_cflags_cpu="-mcpu=7450" ;;
+      powerpc750)   gcc_cflags_cpu="-mcpu=750" ;;
+      powerpc801)   gcc_cflags_cpu="-mcpu=801" ;;
+      powerpc821)   gcc_cflags_cpu="-mcpu=821" ;;
+      powerpc823)   gcc_cflags_cpu="-mcpu=823" ;;
+      powerpc860)   gcc_cflags_cpu="-mcpu=860" ;;
+      powerpc970)   gcc_cflags_cpu="-mtune=970"
+                   xlc_cflags_arch="-qarch=970 -qarch=pwr3"
+                   vmx_path="powerpc64/vmx"
+                   cpu_path="p4 p3-p7" ;;
+      power4)      gcc_cflags_cpu="-mtune=power4"
+                   xlc_cflags_arch="-qarch=pwr4"
+                   cpu_path="p4 p3-p7" ;;
+      power5)      gcc_cflags_cpu="-mtune=power5 -mtune=power4"
+                   xlc_cflags_arch="-qarch=pwr5"
+                   cpu_path="p5 p4 p3-p7" ;;
+      power6)      gcc_cflags_cpu="-mtune=power6"
+                   xlc_cflags_arch="-qarch=pwr6"
+                   cpu_path="p6 p3-p7" ;;
+      power7)      gcc_cflags_cpu="-mtune=power7 -mtune=power5"
+                   xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
+                   cpu_path="p7 p5 p4 p3-p7" ;;
+    esac
+
+    case $host in
+      *-*-aix*)
+       cclist="gcc xlc cc"
+       gcc_32_cflags_maybe="-maix32"
+       xlc_cflags="-O2 -qmaxmem=20000"
+       xlc_cflags_optlist="arch"
+       xlc_32_cflags_maybe="-q32"
+       ar_32_flags="-X32"
+       nm_32_flags="-X32"
+    esac
+
+    case $host in
+      powerpc64-*-* | powerpc64le-*-* | powerpc620-*-* | powerpc630-*-* | powerpc970-*-* | power[3-9]-*-*)
+       case $host in
+         *-*-aix*)
+           # On AIX a true 64-bit ABI is available.
+           # Need -Wc to pass object type flags through to the linker.
+           abilist="aix64 $abilist"
+           cclist_aix64="gcc xlc"
+           gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
+           gcc_aix64_cflags_optlist="cpu"
+           gcc_aix64_ldflags="-Wc,-maix64"
+           xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
+           xlc_aix64_cflags_optlist="arch"
+           xlc_aix64_ldflags="-Wc,-q64"
+           # Must indicate object type to ar and nm
+           ar_aix64_flags="-X64"
+           nm_aix64_flags="-X64"
+           path_aix64=""
+           for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done
+           path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64"
+           # grab this object, though it's not a true cycle counter routine
+           SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo
+           cyclecounter_size_aix64=0
+           ;;
+         *-*-darwin*)
+           # On Darwin we can use 64-bit instructions with a longlong limb,
+           # but the chip still in 32-bit mode.
+           # In theory this can be used on any OS which knows how to save
+           # 64-bit registers in a context switch.
+           #
+           # Note that we must use -mpowerpc64 with gcc, since the
+           # longlong.h macros expect limb operands in a single 64-bit
+           # register, not two 32-bit registers as would be given for a
+           # long long without -mpowerpc64.  In theory we could detect and
+           # accommodate both styles, but the proper 64-bit registers will
+           # be fastest and are what we really want to use.
+           #
+           # One would think -mpowerpc64 would set the assembler in the right
+           # mode to handle 64-bit instructions.  But for that, also
+           # -force_cpusubtype_ALL is needed.
+           #
+           # Do not use -fast for Darwin, it actually adds options
+           # incompatible with a shared library.
+           #
+           abilist="mode64 mode32 $abilist"
+           gcc_32_cflags_maybe="-m32"
+           gcc_cflags_opt="-O3 -O2 -O1"        # will this become used?
+           cclist_mode32="gcc"
+           gcc_mode32_cflags_maybe="-m32"
+           gcc_mode32_cflags="-mpowerpc64"
+           gcc_mode32_cflags_optlist="subtype cpu opt"
+           gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
+           gcc_mode32_cflags_opt="-O3 -O2 -O1"
+           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
+           limb_mode32=longlong
+           cclist_mode64="gcc"
+           gcc_mode64_cflags="-m64"
+           gcc_mode64_cflags_optlist="cpu opt"
+           gcc_mode64_cflags_opt="-O3 -O2 -O1"
+           path_mode64=""
+           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           any_mode64_testlist="sizeof-long-8"
+           ;;
+         *-*-linux* | *-*-*bsd*)
+           # On GNU/Linux, assume the processor is in 64-bit mode.  Some
+           # environments have a gcc that is always in 64-bit mode, while
+           # others require -m64, hence the use of cflags_maybe.  The
+           # sizeof-long-8 test checks the mode is right (for the no option
+           # case).
+           #
+           # -mpowerpc64 is not used, since it should be the default in
+           # 64-bit mode.  (We need its effect for the various longlong.h
+           # asm macros to be right of course.)
+           #
+           # gcc64 was an early port of gcc to 64-bit mode, but should be
+           # obsolete before too long.  We prefer plain gcc when it knows
+           # 64-bits.
+           #
+           abilist="mode64 mode32 $abilist"
+           gcc_32_cflags_maybe="-m32"
+           cclist_mode32="gcc"
+           gcc_mode32_cflags_maybe="-m32"
+           gcc_mode32_cflags="-mpowerpc64"
+           gcc_mode32_cflags_optlist="cpu opt"
+           gcc_mode32_cflags_opt="-O3 -O2 -O1"
+           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
+           limb_mode32=longlong
+           cclist_mode64="gcc gcc64"
+           gcc_mode64_cflags_maybe="-m64"
+           gcc_mode64_cflags_optlist="cpu opt"
+           gcc_mode64_cflags_opt="-O3 -O2 -O1"
+           path_mode64=""
+           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           any_mode64_testlist="sizeof-long-8"
+           ;;
+       esac
+       ;;
+    esac
+    ;;
+
+
+  # POWER 32-bit
+  power-*-* | power[12]-*-* | power2sc-*-*)
+    $as_echo "#define HAVE_HOST_CPU_FAMILY_power 1" >>confdefs.h
+
+    HAVE_HOST_CPU_FAMILY_power=1
+    cclist="gcc"
+    extra_functions="udiv_w_sdiv"
+    path="power"
+
+    # gcc 2.7.2 knows rios1, rios2, rsc
+    #
+    # -mcpu=rios2 can tickle an AIX assembler bug (see GMP_PROG_CC_WORKS) so
+    # there needs to be a fallback to just -mpower.
+    #
+    gcc_cflags_optlist="cpu"
+    case $host in
+      power-*-*)    gcc_cflags_cpu="-mcpu=power -mpower" ;;
+      power1-*-*)   gcc_cflags_cpu="-mcpu=rios1 -mpower" ;;
+      power2-*-*)   gcc_cflags_cpu="-mcpu=rios2 -mpower" ;;
+      power2sc-*-*) gcc_cflags_cpu="-mcpu=rsc   -mpower" ;;
+    esac
+    case $host in
+    *-*-aix*)
+      cclist="gcc xlc"
+      xlc_cflags="-O2 -qarch=pwr -qmaxmem=20000"
+      ;;
+    esac
+    ;;
+
+
+  pyramid-*-*)
+    path="pyr"
+    ;;
+
+
+  # IBM System/390 and z/Architecture
+  s390-*-* | z900esa-*-* | z990esa-*-* | z9esa-*-* | z10esa-*-* | z196esa-*-* | s390x-*-* | z900-*-* | z990-*-* | z9-*-* | z10-*-* | z196-*-*)
+    abilist="32"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch"
+    path="s390_32"
+    extra_functions="udiv_w_sdiv"
+    gcc_32_cflags_maybe="-m31"
+
+    case $host_cpu in
+      s390)
+       ;;
+      z900 | z900esa)
+        cpu="z900"
+        gccarch="$cpu"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       cat >>confdefs.h <<_ACEOF
+#define HAVE_HOST_CPU_s390_$cpu 1
+_ACEOF
+
+       $as_echo "#define HAVE_HOST_CPU_s390_zarch 1" >>confdefs.h
+
+       extra_functions=""
+        ;;
+      z990 | z990esa)
+        cpu="z990"
+        gccarch="$cpu"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       cat >>confdefs.h <<_ACEOF
+#define HAVE_HOST_CPU_s390_$cpu 1
+_ACEOF
+
+       $as_echo "#define HAVE_HOST_CPU_s390_zarch 1" >>confdefs.h
+
+       extra_functions=""
+        ;;
+      z9 | z9esa)
+        cpu="z9"
+       gccarch="z9-109"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       cat >>confdefs.h <<_ACEOF
+#define HAVE_HOST_CPU_s390_$cpu 1
+_ACEOF
+
+       $as_echo "#define HAVE_HOST_CPU_s390_zarch 1" >>confdefs.h
+
+       extra_functions=""
+        ;;
+      z10 | z10esa)
+        cpu="z10"
+       gccarch="z10"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       cat >>confdefs.h <<_ACEOF
+#define HAVE_HOST_CPU_s390_$cpu 1
+_ACEOF
+
+       $as_echo "#define HAVE_HOST_CPU_s390_zarch 1" >>confdefs.h
+
+       extra_functions=""
+        ;;
+      z196 | z196esa)
+        cpu="z196"
+       gccarch="z196"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       cat >>confdefs.h <<_ACEOF
+#define HAVE_HOST_CPU_s390_$cpu 1
+_ACEOF
+
+       $as_echo "#define HAVE_HOST_CPU_s390_zarch 1" >>confdefs.h
+
+       extra_functions=""
+        ;;
+      esac
+
+    case $host in
+      s390x-*-* | z900-*-* | z990-*-* | z9-*-* | z10-*-* | z196-*-*)
+       abilist="64 32"
+       cclist_64="gcc"
+       gcc_64_cflags_optlist="arch"
+       gcc_64_cflags="$gcc_cflags -m64"
+       path_64="s390_64/$host_cpu s390_64"
+       extra_functions=""
+       ;;
+      esac
+    ;;
+
+
+  sh-*-*)   path="sh" ;;
+  sh[2-4]-*-*)  path="sh/sh2 sh" ;;
+
+
+  *sparc*-*-*)
+    # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI.  We've
+    # had various bug reports where users have set CFLAGS for their desired
+    # mode, but not set our ABI.  For some reason it's sparc where this
+    # keeps coming up, presumably users there are accustomed to driving the
+    # compiler mode that way.  The effect of our testlist setting is to
+    # reject ABI=64 in favour of ABI=32 if the user has forced the flags to
+    # 32-bit mode.
+    #
+    abilist="32"
+    cclist="gcc acc cc"
+    any_testlist="sizeof-long-4"
+
+echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
+
+
+    case $host_cpu in
+      sparcv8 | microsparc | turbosparc)
+        path="sparc32/v8 sparc32" ;;
+      supersparc)
+        path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
+      sparc64 | sparcv9* | ultrasparc*)
+        path="sparc32/v9 sparc32/v8 sparc32" ;;
+      *)
+        path="sparc32" ;;
+    esac
+
+    # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the
+    # assembler.  Add it explicitly since the solaris assembler won't accept
+    # our sparc32/v9 asm code without it.  gas accepts -xarch=v8plus too, so
+    # it can be in the cflags unconditionally (though gas doesn't need it).
+    #
+    # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past
+    # gcc doesn't know that flag, hence cflags_maybe.  Note that -m32 cannot
+    # be done through the optlist since the plain cflags would be run first
+    # and we don't want to require the default mode (whatever it is) works.
+    #
+    # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the
+    # latter would be used in the 64-bit ABI on systems like "*bsd" where
+    # abilist="64" only.
+    #
+    case $host_cpu in
+      sparc64 | sparcv9* | ultrasparc*)
+        gcc_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
+      *)
+        gcc_cflags="$gcc_cflags" ;;
+    esac
+    gcc_32_cflags_maybe="-m32"
+    gcc_cflags_optlist="cpu"
+
+    # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.
+    # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,
+    #   sparclet, tsc701, v9, ultrasparc.  A warning is given that the
+    #   plain -m forms will disappear.
+    # gcc 3.0 adds nothing.
+    # gcc 3.1 adds nothing.
+    # gcc 3.2 adds nothing.
+    # gcc 3.3 adds ultrasparc3.
+    #
+    case $host_cpu in
+      supersparc)           gcc_cflags_cpu="-mcpu=supersparc -msupersparc" ;;
+      sparcv8 | microsparc | turbosparc)
+                           gcc_cflags_cpu="-mcpu=v8 -mv8" ;;
+      sparc64 | sparcv9*)   gcc_cflags_cpu="-mcpu=v9 -mv8" ;;
+      ultrasparc3)          gcc_cflags_cpu="-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8" ;;
+      ultrasparc*)          gcc_cflags_cpu="-mcpu=ultrasparc -mv8" ;;
+      *)                    gcc_cflags_cpu="-mcpu=v7 -mcypress" ;;
+    esac
+
+    # SunPRO cc and acc, and SunOS bundled cc
+    case $host in
+      *-*-solaris* | *-*-sunos*)
+       # Note no -g, it disables all optimizations.
+       cc_cflags=
+       cc_cflags_optlist="opt arch cpu"
+
+        # SunOS cc doesn't know -xO4, fallback to -O2.
+       cc_cflags_opt="-xO4 -O2"
+
+        # SunOS cc doesn't know -xarch, apparently always generating v7
+        # code, so make this optional
+       case $host_cpu in
+         sparcv8 | microsparc | supersparc | turbosparc)
+                                             cc_cflags_arch="-xarch=v8" ;;
+         sparc64 | sparcv9* | ultrasparc*)   cc_cflags_arch="-xarch=v8plus" ;;
+         *)                                  cc_cflags_arch="-xarch=v7" ;;
+       esac
+
+        # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.
+       # SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,
+       #   micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.
+       # SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.
+        #
+       # FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for
+       # ultrasparc3?
+       #
+       case $host_cpu in
+         supersparc)   cc_cflags_cpu="-xchip=super" ;;
+         microsparc)   cc_cflags_cpu="-xchip=micro" ;;
+         turbosparc)   cc_cflags_cpu="-xchip=micro2" ;;
+         ultrasparc)   cc_cflags_cpu="-xchip=ultra" ;;
+         ultrasparc2)  cc_cflags_cpu="-xchip=ultra2" ;;
+         ultrasparc2i) cc_cflags_cpu="-xchip=ultra2i" ;;
+         ultrasparc3)  cc_cflags_cpu="-xchip=ultra3 -xchip=ultra" ;;
+         *)            cc_cflags_cpu="-xchip=generic" ;;
+       esac
+    esac
+
+    case $host_cpu in
+      sparc64 | sparcv9* | ultrasparc*)
+        case $host in
+          # Solaris 6 and earlier cannot run ABI=64 since it doesn't save
+          # registers properly, so ABI=32 is left as the only choice.
+          #
+          *-*-solaris2.[0-6] | *-*-solaris2.[0-6].*) ;;
+
+          # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only
+          # choice.  In fact they need no special compiler flags, gcc -m64
+          # is the default, but it doesn't hurt to add it.  v9 CPUs always
+          # use the sparc64 port, since the plain 32-bit sparc ports don't
+          # run on a v9.
+          #
+          *-*-*bsd*) abilist="64" ;;
+
+          # For all other systems, we try both 64 and 32.
+          #
+          # GNU/Linux sparc64 has only recently gained a 64-bit user mode.
+          # In the past sparc64 meant a v9 cpu, but there were no 64-bit
+          # operations in user mode.  We assume that if "gcc -m64" works
+          # then the system is suitable.  Hopefully even if someone attempts
+          # to put a new gcc and/or glibc on an old system it won't run.
+          #
+          *) abilist="64 32" ;;
+        esac
+
+       case $host_cpu in
+         ultrasparc | ultrasparc2 | ultrasparc2i)
+           path_64="sparc64/ultrasparc12 sparc64" ;;
+         ultrasparc[34])
+           path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
+         ultrasparct[1234])
+           path_64="sparc64" ;;
+         *)
+           path_64="sparc64"
+       esac
+
+        cclist_64="gcc"
+        any_64_testlist="sizeof-long-8"
+
+        # gcc -mptr64 is probably implied by -m64, but we're not sure if
+        # this was always so.  On Solaris in the past we always used both
+        # "-m64 -mptr64".
+        #
+        # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on
+        # solaris, but it would seem likely that if gcc is going to generate
+        # 64-bit code it will have to add that option itself where needed.
+        # An extra copy of this option should be harmless though, but leave
+        # it until we're sure.  (Might want -xarch=v9a or -xarch=v9b for the
+        # higher cpu types instead.)
+        #
+        gcc_64_cflags="$gcc_64_cflags -m64 -mptr64"
+        gcc_64_ldflags="-Wc,-m64"
+        gcc_64_cflags_optlist="cpu"
+
+        case $host in
+          *-*-solaris*)
+            # Sun cc.
+            #
+            # We used to have -fast and some fixup options here, but it
+            # recurrently caused problems with miscompilation.  Of course,
+            # -fast is documented as miscompiling things for the sake of speed.
+            #
+            cclist_64="$cclist_64 cc"
+            cc_64_cflags="-xO3 -xarch=v9"
+            cc_64_cflags_optlist="cpu"
+            ;;
+        esac
+
+        # using the v9 %tick register
+        SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo
+        SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo
+        cyclecounter_size_32=2
+        cyclecounter_size_64=2
+        ;;
+    esac
+    ;;
+
+
+  # VAX
+  vax*-*-*)
+    # Currently gcc (version 3.0) on vax always uses a frame pointer
+    # (config/vax/vax.h FRAME_POINTER_REQUIRED=1), so -fomit-frame-pointer
+    # will be ignored.
+    #
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    path="vax"
+    extra_functions="udiv_w_sdiv"
+    ;;
+
+
+  # AMD and Intel x86 configurations, including AMD64
+  #
+  # Rumour has it gcc -O2 used to give worse register allocation than just
+  # -O, but lets assume that's no longer true.
+  #
+  # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc.  -m64 is
+  # the default in such a build (we think), so -m32 is essential for ABI=32.
+  # This is, of course, done for any $host_cpu, not just x86_64, so we can
+  # get such a gcc into the right mode to cross-compile to say i486-*-*.
+  #
+  # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use
+  # it when it works.  We check sizeof(long)==4 to ensure we get the right
+  # mode, in case -m32 has failed not because it's an old gcc, but because
+  # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.
+  #
+  i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-* | athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
+    abilist="32"
+    cclist="gcc icc cc"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_32_cflags_maybe="-m32"
+    icc_cflags="-no-gcc"
+    icc_cflags_optlist="opt"
+    icc_cflags_opt="-O3 -O2 -O1"
+    any_32_testlist="sizeof-long-4"
+    CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'
+
+    # Availability of rdtsc is checked at run-time.
+    SPEED_CYCLECOUNTER_OBJ=pentium.lo
+
+    # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486.  These
+    #     represent -mcpu= since -m486 doesn't generate 486 specific insns.
+    # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.
+    # gcc 3.0 adds athlon.
+    # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,
+    #     athlon-tbird, athlon-4, athlon-xp, athlon-mp.
+    # gcc 3.2 adds winchip2.
+    # gcc 3.3 adds winchip-c6.
+    # gcc 3.3.1 from mandrake adds k8 and knows -mtune.
+    # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.
+    #
+    # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an
+    # old version of mpz/powm.c.  Seems to be fine with the current code, so
+    # no need for any restrictions on that option.
+    #
+    # -march=pentiumpro can fail if the assembler doesn't know "cmov"
+    # (eg. solaris 2.8 native "as"), so always have -march=pentium after
+    # that as a fallback.
+    #
+    # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or
+    # may not be supported by the assembler and/or the OS, and is bad in gcc
+    # prior to 3.3.  The tests will reject these if no good, so fallbacks
+    # like "-march=pentium4 -mno-sse2" are given to try also without SSE2.
+    # Note the relevant -march types are listed in the optflags handling
+    # below, be sure to update there if adding new types emitting SSE2.
+    #
+    # -mtune is used at the start of each cpu option list to give something
+    # gcc 3.4 will use, thereby avoiding warnings from -mcpu.  -mcpu forms
+    # are retained for use by prior gcc.  For example pentium has
+    # "-mtune=pentium -mcpu=pentium ...", the -mtune is for 3.4 and the
+    # -mcpu for prior.  If there's a brand new choice in 3.4 for a chip,
+    # like k8 for x86_64, then it can be the -mtune at the start, no need to
+    # duplicate anything.
+    #
+    gcc_cflags_optlist="cpu arch"
+    case $host_cpu in
+      i386*)
+       gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
+       gcc_cflags_arch="-march=i386"
+       path="x86"
+       ;;
+      i486*)
+       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=i486"
+       path="x86/i486 x86"
+       ;;
+      i586 | pentium)
+       gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
+       gcc_cflags_arch="-march=pentium"
+       path="x86/pentium x86"
+       ;;
+      pentiummmx)
+       gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
+       gcc_cflags_arch="-march=pentium-mmx -march=pentium"
+       path="x86/pentium/mmx x86/pentium x86"
+       ;;
+      i686 | pentiumpro)
+       gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentiumpro -march=pentium"
+       path="x86/p6 x86"
+       ;;
+      pentium2)
+       gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
+       path="x86/p6/mmx x86/p6 x86"
+       ;;
+      pentium3)
+       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      pentiumm)
+       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      k6)
+       gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6"
+       path="x86/k6/mmx x86/k6 x86"
+       ;;
+      k62)
+       gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-2 -march=k6"
+       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      k63)
+       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-3 -march=k6"
+       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      geode)
+       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-3 -march=k6"
+       path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      athlon)
+       # Athlon instruction costs are close to P6 (3 cycle load latency,
+       # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't
+       # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
+       gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
+       path="x86/k7/mmx x86/k7 x86"
+       ;;
+      i786 | pentium4)
+       # pentiumpro is the primary fallback when gcc doesn't know pentium4.
+       # This gets us cmov to eliminate branches.  Maybe "athlon" would be
+       # a possibility on gcc 3.0.
+       #
+       gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
+       gcc_64_cflags_cpu="-mtune=nocona"
+       path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
+       path_64="x86_64/pentium4 x86_64"
+       ;;
+      viac32)
+       # Not sure of the best fallbacks here for -mcpu.
+       # c3-2 has sse and mmx, so pentium3 is good for -march.
+       gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      viac3*)
+       # Not sure of the best fallbacks here.
+       gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
+       path="x86/pentium/mmx x86/pentium x86"
+       ;;
+      athlon64 | k8 | x86_64)
+       gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
+       path="x86/k8 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/k8 x86_64"
+       ;;
+      k10)
+       gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      bobcat)
+       gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/bobcat x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      bulldozer | bd1)
+       gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/bd1 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/bd1 x86_64"
+       ;;
+      core2)
+       gcc_cflags_cpu="-mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/core2 x86_64"
+       ;;
+      corei | coreinhm | coreiwsm)
+       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/coreinhm x86_64/core2 x86_64"
+       ;;
+      coreisbr)
+       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+       ;;
+      atom)
+       gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
+       gcc_cflags_arch="-march=atom -march=pentium3"
+       path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
+       path_64="x86_64/atom x86_64"
+       ;;
+      nano)
+       gcc_cflags_cpu="-mtune=nano"
+       gcc_cflags_arch="-march=nano"
+       path="x86/nano x86"
+       path_64="x86_64/nano x86_64"
+       ;;
+      *)
+       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=i486"
+       path="x86"
+       path_64="x86_64"
+       ;;
+    esac
+
+    case $host in
+      athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
+       cclist_64="gcc"
+       gcc_64_cflags="$gcc_64_cflags -m64"
+       gcc_64_cflags_optlist="cpu arch"
+       CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
+       SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
+       cyclecounter_size_64=2
+       abilist="64 32"
+
+       case $host in
+         *-*-solaris*)
+           # Sun cc.
+           cclist_64="$cclist_64 cc"
+           cc_64_cflags="-xO3 -m64"
+           ;;
+         *-*-mingw* | *-*-cygwin)
+           limb_64=longlong
+           path_64=""  # Windows amd64 calling conventions are *different*
+           # Silence many pedantic warnings for w64.  FIXME.
+           gcc_64_cflags="$gcc_64_cflags -std=gnu99"
+           ;;
+       esac
+       ;;
+    esac
+    ;;
+
+
+  # FIXME: z8kx won't get through config.sub.  Could make 16 versus 32 bit
+  # limb an ABI option perhaps.
+  z8kx*-*-*)
+    path="z8000x"
+    extra_functions="udiv_w_sdiv"
+    ;;
+  z8k*-*-*)
+    path="z8000"
+    extra_functions="udiv_w_sdiv"
+    ;;
+
+
+  # Special CPU "none" selects generic C.  -DNO_ASM is used to disable gcc
+  # asm blocks in longlong.h (since they're driven by cpp pre-defined
+  # symbols like __alpha rather than the configured $host_cpu).
+  #
+  none-*-*)
+    abilist="long longlong"
+    cclist_long=$cclist
+    gcc_long_cflags=$gcc_cflags
+    gcc_long_cppflags="-DNO_ASM"
+    cc_long_cflags=$cc_cflags
+    cclist_longlong=$cclist
+    gcc_longlong_cflags=$gcc_cflags
+    gcc_longlong_cppflags="-DNO_ASM"
+    cc_longlong_cflags=$cc_cflags
+    limb_longlong=longlong
+    ;;
+
+esac
+
+# mingw can be built by the cygwin gcc if -mno-cygwin is added.  For
+# convenience add this automatically if it works.  Actual mingw gcc accepts
+# -mno-cygwin too, but of course is the default.  mingw only runs on the
+# x86s, but allow any CPU here so as to catch "none" too.
+#
+case $host in
+  *-*-mingw*)
+    gcc_cflags_optlist="$gcc_cflags_optlist nocygwin"
+    gcc_cflags_nocygwin="-mno-cygwin"
+    ;;
+esac
+
+
+CFLAGS_or_unset=${CFLAGS-'(unset)'}
+CPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}
+
+cat >&5 <<EOF
+User:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS_or_unset
+CPPFLAGS=$CPPFLAGS_or_unset
+MPN_PATH=$MPN_PATH
+GMP:
+abilist=$abilist
+cclist=$cclist
+EOF
+
+
+test_CFLAGS=${CFLAGS+set}
+test_CPPFLAGS=${CPPFLAGS+set}
+
+for abi in $abilist; do
+  abi_last="$abi"
+done
+
+# If the user specifies an ABI then it must be in $abilist, after that
+# $abilist is restricted to just that choice.
+#
+if test -n "$ABI"; then
+  found=no
+  for abi in $abilist; do
+    if test $abi = "$ABI"; then found=yes; break; fi
+  done
+  if test $found = no; then
+    as_fn_error "ABI=$ABI is not among the following valid choices: $abilist" "$LINENO" 5
+  fi
+  abilist="$ABI"
+fi
+
+found_compiler=no
+
+for abi in $abilist; do
+
+  echo "checking ABI=$abi"
+
+  # Suppose abilist="64 32", then for abi=64, will have abi1="_64" and
+  # abi2="_64".  For abi=32, will have abi1="_32" and abi2="".  This is how
+  # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the
+  # abilist), but there's no fallback for $gcc_64_cflags.
+  #
+  abi1=`echo _$abi | sed 's/[.]//g'`
+  if test $abi = $abi_last; then abi2=; else abi2="$abi1"; fi
+
+  # Compiler choices under this ABI
+                              eval cclist_chosen=\"\$cclist$abi1\"
+  test -n "$cclist_chosen" || eval cclist_chosen=\"\$cclist$abi2\"
+
+  # If there's a user specified $CC then don't use a list for
+  # $cclist_chosen, just a single value for $ccbase.
+  #
+  if test -n "$CC"; then
+
+    # The first word of $CC, stripped of any directory.  For instance
+    # CC="/usr/local/bin/gcc -pipe" will give "gcc".
+    #
+    for ccbase in $CC; do break; done
+    ccbase=`echo $ccbase | sed 's:.*/::'`
+
+    # If this $ccbase is in $cclist_chosen then it's a compiler we know and
+    # we can do flags defaulting with it.  If not, then $cclist_chosen is
+    # set to "unrecognised" so no default flags are used.
+    #
+    # "unrecognised" is used to avoid bad effects with eval if $ccbase has
+    # non-symbol characters.  For instance ccbase=my+cc would end up with
+    # something like cflags="$my+cc_cflags" which would give
+    # cflags="+cc_cflags" rather than the intended empty string for an
+    # unknown compiler.
+    #
+    found=unrecognised
+    for i in $cclist_chosen; do
+      if test "$ccbase" = $i; then
+        found=$ccbase
+        break
+      fi
+    done
+    cclist_chosen=$found
+  fi
+
+  for ccbase in $cclist_chosen; do
+
+    # When cross compiling, look for a compiler with the $host_alias as a
+    # prefix, the same way that AC_CHECK_TOOL does.  But don't do this to a
+    # user-selected $CC.
+    #
+    # $cross_compiling will be yes/no/maybe at this point.  Do the host
+    # prefixing for "maybe" as well as "yes".
+    #
+    if test "$cross_compiling" != no && test -z "$CC"; then
+      cross_compiling_prefix="${host_alias}-"
+    fi
+
+    for ccprefix in $cross_compiling_prefix ""; do
+
+      cc="$CC"
+      test -n "$cc" || cc="$ccprefix$ccbase"
+
+      # If the compiler is gcc but installed under another name, then change
+      # $ccbase so as to use the flags we know for gcc.  This helps for
+      # instance when specifying CC=gcc272 on Debian GNU/Linux, or the
+      # native cc which is really gcc on NeXT or MacOS-X.
+      #
+      # FIXME: There's a slight misfeature here.  If cc is actually gcc but
+      # gcc is not a known compiler under this $abi then we'll end up
+      # testing it with no flags and it'll work, but chances are it won't be
+      # in the right mode for the ABI we desire.  Let's quietly hope this
+      # doesn't happen.
+      #
+      if test $ccbase != gcc; then
+        cat >conftest.c <<EOF
+#if ! defined (__GNUC__) || defined (__INTEL_COMPILER)
+  choke me
+#endif
+EOF
+gmp_compile="$cc -c conftest.c >&5"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  rm -f conftest*
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $cc is gcc" >&5
+$as_echo_n "checking whether $cc is gcc... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  ccbase=gcc
+else
+  rm -f conftest*
+  :
+fi
+
+      fi
+
+      # Similarly if the compiler is IBM xlc but invoked as cc or whatever
+      # then change $ccbase and make the default xlc flags available.
+      if test $ccbase != xlc; then
+        gmp_command="$cc 2>&1 | grep xlc >/dev/null"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_command\""; } >&5
+  (eval $gmp_command) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $cc is xlc" >&5
+$as_echo_n "checking whether $cc is xlc... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+  ccbase=xlc
+else
+  :
+fi
+
+      fi
+
+      # acc was Sun's first unbundled compiler back in the SunOS days, or
+      # something like that, but today its man page says it's not meant to
+      # be used directly (instead via /usr/ucb/cc).  The options are pretty
+      # much the same as the main SunPRO cc, so share those configs.
+      #
+      case $host in
+        *sparc*-*-solaris* | *sparc*-*-sunos*)
+          if test "$ccbase" = acc; then ccbase=cc; fi ;;
+      esac
+
+      for tmp_cflags_maybe in yes no; do
+                             eval cflags=\"\$${ccbase}${abi1}_cflags\"
+        test -n "$cflags" || eval cflags=\"\$${ccbase}${abi2}_cflags\"
+
+       if test "$tmp_cflags_maybe" = yes; then
+          # don't try cflags_maybe when the user set CFLAGS
+          if test "$test_CFLAGS" = set; then continue; fi
+                                     eval cflags_maybe=\"\$${ccbase}${abi1}_cflags_maybe\"
+          test -n "$cflags_maybe" || eval cflags_maybe=\"\$${ccbase}${abi2}_cflags_maybe\"
+          # don't try cflags_maybe if there's nothing set
+          if test -z "$cflags_maybe"; then continue; fi
+          cflags="$cflags_maybe $cflags"
+        fi
+
+        # Any user CFLAGS, even an empty string, takes precedence
+        if test "$test_CFLAGS" = set; then cflags=$CFLAGS; fi
+
+        # Any user CPPFLAGS, even an empty string, takes precedence
+                               eval cppflags=\"\$${ccbase}${abi1}_cppflags\"
+        test -n "$cppflags" || eval cppflags=\"\$${ccbase}${abi2}_cppflags\"
+        if test "$test_CPPFLAGS" = set; then cppflags=$CPPFLAGS; fi
+
+        # --enable-profiling adds -p/-pg even to user-specified CFLAGS.
+        # This is convenient, but it's perhaps a bit naughty to modify user
+        # CFLAGS.
+        case "$enable_profiling" in
+          prof)       cflags="$cflags -p" ;;
+          gprof)      cflags="$cflags -pg" ;;
+          instrument) cflags="$cflags -finstrument-functions" ;;
+        esac
+
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler $cc $cflags $cppflags" >&5
+$as_echo_n "checking compiler $cc $cflags $cppflags... " >&6; }
+gmp_prog_cc_works=yes
+
+# first see a simple "main()" works, then go on to other checks
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+
+int main () { return 0; }
+EOF
+  echo "Test compile: " >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal error from gcc 2.95.2 -mpowerpc64
+   (without -maix64), hence detecting an unusable compiler */
+void *g() { return (void *) 0; }
+void *f() { return g(); }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: function pointer return" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, function pointer return"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, function pointer return, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an invalid instruction syntax from i386 gcc
+   -march=pentiumpro on Solaris 2.8.  The native sun assembler
+   requires a non-standard syntax for cmov which gcc (as of 2.95.2 at
+   least) doesn't know.  */
+int n;
+int cmov () { return (n >= 0 ? n : 0); }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: cmov instruction" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, cmov instruction"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, cmov instruction, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes a linker invocation problem with gcc 3.0.3
+   on AIX 4.3 under "-maix64 -mpowerpc64 -mcpu=630".  The -mcpu=630
+   option causes gcc to incorrectly select the 32-bit libgcc.a, not
+   the 64-bit one, and consequently it misses out on the __fixunsdfdi
+   helper (double -> uint64 conversion).  */
+double d;
+unsigned long gcc303 () { return (unsigned long) d; }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: double -> ulong conversion" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, double -> ulong conversion"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, double -> ulong conversion, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an error from hppa gcc 2.95 under -mpa-risc-2-0 if
+   the assembler doesn't know hppa 2.0 instructions.  fneg is a 2.0
+   instruction, and a negation like this comes out using it.  */
+double fneg_data;
+unsigned long fneg () { return -fneg_data; }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: double negation" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, double negation"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, double negation, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following makes gcc 3.3 -march=pentium4 generate an SSE2 xmm insn
+   (cvtsd2ss) which will provoke an error if the assembler doesn't recognise
+   those instructions.  Not sure how much of the gmp code will come out
+   wanting sse2, but it's easiest to reject an option we know is bad.  */
+double ftod_data;
+float ftod () { return (float) ftod_data; }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: double -> float conversion" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, double -> float conversion"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, double -> float conversion, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal compiler error from gcc version
+   "2.9-gnupro-99r1" under "-O2 -mcpu=ev6", apparently relating to char
+   values being spilled into floating point registers.  The problem doesn't
+   show up all the time, but has occurred enough in GMP for us to reject
+   this compiler+flags.  */
+#include <string.h>  /* for memcpy */
+struct try_t
+{
+ char dst[2];
+ char size;
+ long d0, d1, d2, d3, d4, d5, d6;
+ char overlap;
+};
+struct try_t param[6];
+int
+param_init ()
+{
+ struct try_t *p;
+ memcpy (p, &param[ 2 ], sizeof (*p));
+ memcpy (p, &param[ 2 ], sizeof (*p));
+ p->size = 2;
+ memcpy (p, &param[ 1 ], sizeof (*p));
+ p->dst[0] = 1;
+ p->overlap = 2;
+ memcpy (p, &param[ 3 ], sizeof (*p));
+ p->dst[0] = 1;
+ p->overlap = 8;
+ memcpy (p, &param[ 4 ], sizeof (*p));
+ memcpy (p, &param[ 4 ], sizeof (*p));
+ p->overlap = 8;
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ return 0;
+}
+
+int main () { return 0; }
+EOF
+  echo "Test compile: gnupro alpha ev6 char spilling" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, gnupro alpha ev6 char spilling"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, gnupro alpha ev6 char spilling, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+# __builtin_alloca is not available everywhere, check it exists before
+# seeing that it works
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+int k; int foo () { __builtin_alloca (k); }
+EOF
+  echo "Test compile: __builtin_alloca availability" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+      if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal compiler error from Itanium HP-UX cc
+    under +O2 or higher.  We use this sort of code in mpn/generic/mul_fft.c. */
+int k;
+int foo ()
+{
+  int i, **a;
+  a = __builtin_alloca (k);
+  for (i = 0; i <= k; i++)
+    a[i] = __builtin_alloca (1 << i);
+}
+
+int main () { return 0; }
+EOF
+  echo "Test compile: alloca array" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, alloca array"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, alloca array, program does not run"
+      ;;
+  esac
+fi
+
+
+
+      ;;
+    no)
+
+      ;;
+    norun)
+
+      ;;
+  esac
+fi
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal error from the assembler on
+   power2-ibm-aix4.3.1.0.  gcc -mrios2 compiles to nabs+fcirz, and this
+   results in "Internal error related to the source program domain".
+
+   For reference it seems to be the combination of nabs+fcirz which is bad,
+   not either alone.  This sort of thing occurs in mpz/get_str.c with the
+   way double chars_per_bit_exactly is applied in MPN_SIZEINBASE.  Perhaps
+   if that code changes to a scaled-integer style then we won't need this
+   test.  */
+
+double fp[1];
+int x;
+int f ()
+{
+  int a;
+  a = (x >= 0 ? x : -x);
+  return a * fp[0];
+}
+
+int main () { return 0; }
+EOF
+  echo "Test compile: abs int -> double conversion" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, abs int -> double conversion"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, abs int -> double conversion, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes a segfault in the compiler on powerpc-apple-darwin.
+   Extracted from tests/mpn/t-iord_u.c.  Causes Apple's gcc 3.3 build 1640 and
+   1666 to segfault with e.g., -O2 -mpowerpc64.  */
+
+#if defined (__GNUC__) && ! defined (__cplusplus)
+typedef unsigned long long t1;typedef t1*t2;
+static __inline__ t1 e(t2 rp,t2 up,int n,t1 v0)
+{t1 c,x,r;int i;if(v0){c=1;for(i=1;i<n;i++){x=up[i];r=x+1;rp[i]=r;}}return c;}
+f(){static const struct{t1 n;t1 src[9];t1 want[9];}d[]={{1,{0},{1}},};t1 got[9];int i;
+for(i=0;i<1;i++){if(e(got,got,9,d[i].n)==0)h();g(i,d[i].src,d[i].n,got,d[i].want,9);if(d[i].n)h();}}
+h(){}g(){}
+#else
+int dummy;
+#endif
+
+int main () { return 0; }
+EOF
+  echo "Test compile: long long reliability test 1" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, long long reliability test 1"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, long long reliability test 1, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal compiler error on powerpc-apple-darwin.
+   Extracted from mpz/cfdiv_q_2exp.c.  Causes Apple's gcc 3.3 build 1640 and
+   1666 to get an ICE with -O1 -mpowerpc64.  */
+
+#if defined (__GNUC__) && ! defined (__cplusplus)
+f(int u){int i;long long x;x=u?~0:0;if(x)for(i=0;i<9;i++);x&=g();if(x)g();}
+g(){}
+#else
+int dummy;
+#endif
+
+int main () { return 0; }
+EOF
+  echo "Test compile: long long reliability test 2" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, long long reliability test 2"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, long long reliability test 2, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following is mis-compiled by HP ia-64 cc version
+        cc: HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]
+   under "cc +O3", both in +DD32 and +DD64 modes.  The mpn_lshift_com gets
+   inlined and its return value somehow botched to be 0 instead of 1.  This
+   arises in the real mpn_lshift_com in mul_fft.c.  A lower optimization
+   level, like +O2 seems ok.  This code needs to be run to show the problem,
+   but that's fine, the offending cc is a native-only compiler so we don't
+   have to worry about cross compiling.  */
+
+#if ! defined (__cplusplus)
+unsigned long
+lshift_com (rp, up, n, cnt)
+  unsigned long *rp;
+  unsigned long *up;
+  long n;
+  unsigned cnt;
+{
+  unsigned long retval, high_limb, low_limb;
+  unsigned tnc;
+  long i;
+  tnc = 8 * sizeof (unsigned long) - cnt;
+  low_limb = *up++;
+  retval = low_limb >> tnc;
+  high_limb = low_limb << cnt;
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *up++;
+      *rp++ = ~(high_limb | (low_limb >> tnc));
+      high_limb = low_limb << cnt;
+    }
+  return retval;
+}
+int
+main ()
+{
+  unsigned long cy, rp[2], up[2];
+  up[0] = ~ 0L;
+  up[1] = 0;
+  cy = lshift_com (rp, up, 2L, 1);
+  if (cy != 1L)
+    return 1;
+  return 0;
+}
+#else
+int
+main ()
+{
+  return 0;
+}
+#endif
+
+EOF
+  echo "Test compile: mpn_lshift_com optimization" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization, program does not run"
+      ;;
+  esac
+fi
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following is mis-compiled by Intel ia-64 icc version 1.8 under
+    "icc -O3",  After several calls, the function writes parial garbage to
+    the result vector.  Perhaps relates to the chk.a.nc insn.  This code needs
+    to be run to show the problem, but that's fine, the offending cc is a
+    native-only compiler so we don't have to worry about cross compiling.  */
+
+#if ! defined (__cplusplus)
+#include <stdlib.h>
+void
+lshift_com (rp, up, n, cnt)
+  unsigned long *rp;
+  unsigned long *up;
+  long n;
+  unsigned cnt;
+{
+  unsigned long high_limb, low_limb;
+  unsigned tnc;
+  long i;
+  up += n;
+  rp += n;
+  tnc = 8 * sizeof (unsigned long) - cnt;
+  low_limb = *--up;
+  high_limb = low_limb << cnt;
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = ~(high_limb | (low_limb >> tnc));
+      high_limb = low_limb << cnt;
+    }
+  *--rp = ~high_limb;
+}
+int
+main ()
+{
+  unsigned long *r, *r2;
+  unsigned long a[88 + 1];
+  long i;
+  for (i = 0; i < 88 + 1; i++)
+    a[i] = ~0L;
+  r = malloc (10000 * sizeof (unsigned long));
+  r2 = r;
+  for (i = 0; i < 528; i += 22)
+    {
+      lshift_com (r2, a,
+                 i / (8 * sizeof (unsigned long)) + 1,
+                 i % (8 * sizeof (unsigned long)));
+      r2 += 88 + 1;
+    }
+  if (r[2048] != 0 || r[2049] != 0 || r[2050] != 0 || r[2051] != 0 ||
+      r[2052] != 0 || r[2053] != 0 || r[2054] != 0)
+    abort ();
+  return 0;
+}
+#else
+int
+main ()
+{
+  return 0;
+}
+#endif
+
+EOF
+  echo "Test compile: mpn_lshift_com optimization 2" >&5
+  gmp_compile="$cc $cflags $cppflags conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization 2"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization 2, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+# A certain _GLOBAL_OFFSET_TABLE_ problem in past versions of gas, tickled
+# by recent versions of gcc.
+#
+if test "$gmp_prog_cc_works" = yes; then
+  case $host in
+    i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*)
+      # this problem only arises in PIC code, so don't need to test when
+      # --disable-shared.  We don't necessarily have $enable_shared set to
+      # yes at this point, it will still be unset for the default (which is
+      # yes); hence the use of "!= no".
+      if test "$enable_shared" != no; then
+        echo "Testing gcc GOT with eax emitted" >&5
+cat >conftest.c <<\EOF
+int foo;
+int bar () { return foo; }
+EOF
+tmp_got_emitted=no
+gmp_compile="$cc $cflags $cppflags -fPIC -S conftest.c >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if grep "addl.*_GLOBAL_OFFSET_TABLE_.*eax" conftest.s >/dev/null; then
+    tmp_got_emitted=yes
+  fi
+fi
+rm -f conftest.*
+echo "Result: $tmp_got_emitted" >&5
+if test "$tmp_got_emitted" = yes; then
+  echo "Testing gas GOT with eax good" >&5
+cat >conftest.awk <<\EOF
+BEGIN {
+  want[0]  = "001"
+  want[1]  = "043"
+  want[2]  = "105"
+  want[3]  = "147"
+  want[4]  = "211"
+  want[5]  = "253"
+  want[6]  = "315"
+  want[7]  = "357"
+
+  want[8]  = "005"
+  want[9]  = "002"
+  want[10] = "000"
+  want[11] = "000"
+  want[12] = "000"
+
+  want[13] = "376"
+  want[14] = "334"
+  want[15] = "272"
+  want[16] = "230"
+  want[17] = "166"
+  want[18] = "124"
+  want[19] = "062"
+  want[20] = "020"
+
+  result = "yes"
+}
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 20; i++)
+        got[i] = got[i+1];
+      got[20] = $f;
+
+      found = 1
+      for (i = 0; i < 21; i++)
+        if (got[i] != want[i])
+          {
+            found = 0
+            break
+          }
+      if (found)
+        {
+          result = "no"
+          exit
+        }
+    }
+}
+END {
+  print result
+}
+EOF
+cat >conftest.s <<\EOF
+       .text
+       .byte   1, 35, 69, 103, 137, 171, 205, 239
+       addl    $_GLOBAL_OFFSET_TABLE_, %eax
+       .byte   254, 220, 186, 152, 118, 84, 50, 16
+EOF
+tmp_got_good=yes
+gmp_compile="$cc $cflags $cppflags -fPIC -o conftest.o -c conftest.s >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  tmp_got_good=`od -b conftest.o | $AWK -f conftest.awk`
+fi
+rm -f conftest.*
+echo "Result: $tmp_got_good" >&5
+if test "$tmp_got_good" = no; then
+  gmp_prog_cc_works="no, bad gas GOT with eax"
+else
+  :
+fi
+
+else
+  :
+fi
+
+      fi
+      ;;
+  esac
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_prog_cc_works" >&5
+$as_echo "$gmp_prog_cc_works" >&6; }
+case $gmp_prog_cc_works in
+  yes)
+
+    ;;
+  *)
+    continue
+    ;;
+esac
+
+
+        # If we're supposed to be using a "long long" for a limb, check that
+        # it works.
+                                  eval limb_chosen=\"\$limb$abi1\"
+        test -n "$limb_chosen" || eval limb_chosen=\"\$limb$abi2\"
+        if test "$limb_chosen" = longlong; then
+          { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler $cc $cflags $cppflags has long long" >&5
+$as_echo_n "checking compiler $cc $cflags $cppflags has long long... " >&6; }
+cat >conftest.c <<EOF
+long long  foo;
+long long  bar () { return foo; }
+int main () { return 0; }
+EOF
+gmp_prog_cc_works=no
+gmp_compile="$cc $cflags $cppflags -c conftest.c >&5"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  gmp_prog_cc_works=yes
+else
+  echo "failed program was:" >&5
+  cat conftest.c >&5
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_prog_cc_works" >&5
+$as_echo "$gmp_prog_cc_works" >&6; }
+if test $gmp_prog_cc_works = yes; then
+  :
+else
+  continue
+fi
+
+        fi
+
+        # The tests to perform on this $cc, if any
+                               eval testlist=\"\$${ccbase}${abi1}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$${ccbase}${abi2}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$any${abi1}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$any${abi2}_testlist\"
+
+        testlist_pass=yes
+        for tst in $testlist; do
+          case $tst in
+          hpc-hppa-2-0)   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether HP compiler $cc is good for 64-bits" >&5
+$as_echo_n "checking whether HP compiler $cc is good for 64-bits... " >&6; }
+# Bad compiler output:
+#   ccom: HP92453-01 G.10.32.05 HP C Compiler
+# Good compiler output:
+#   ccom: HP92453-01 A.10.32.30 HP C Compiler
+# Let A.10.32.30 or higher be ok.
+echo >conftest.c
+gmp_tmp_vs=`$cc  -V -c -o conftest.$OBJEXT conftest.c 2>&1 | grep "^ccom:"`
+echo "Version string: $gmp_tmp_vs" >&5
+rm conftest*
+gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\([0-9]*\).*/\1/'`
+gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+echo "Version number: $gmp_tmp_v1.$gmp_tmp_v2.$gmp_tmp_v3" >&5
+if test -z "$gmp_tmp_v1"; then
+  gmp_hpc_64bit=not-applicable
+else
+  gmp_compare_ge=no
+if test -n "$gmp_tmp_v1"; then
+  if test "$gmp_tmp_v1" -gt 10; then
+    gmp_compare_ge=yes
+  else
+    if test "$gmp_tmp_v1" -eq 10; then
+      if test -n "$gmp_tmp_v2"; then
+  if test "$gmp_tmp_v2" -gt 32; then
+    gmp_compare_ge=yes
+  else
+    if test "$gmp_tmp_v2" -eq 32; then
+      if test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge 30; then
+  gmp_compare_ge=yes
+fi
+
+    fi
+  fi
+fi
+
+    fi
+  fi
+fi
+
+
+  gmp_hpc_64bit=$gmp_compare_ge
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_hpc_64bit" >&5
+$as_echo "$gmp_hpc_64bit" >&6; }
+if test $gmp_hpc_64bit = yes; then
+  :
+else
+  testlist_pass=no
+fi
+ ;;
+          gcc-arm-umodsi) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ARM gcc unsigned division works" >&5
+$as_echo_n "checking whether ARM gcc unsigned division works... " >&6; }
+tmp_version=`$cc --version`
+echo "$tmp_version" >&5
+case $tmp_version in
+  2.95 | 2.95.[123])
+    testlist_pass=no
+    gmp_gcc_arm_umodsi_result="no, gcc 2.95.[0123]" ;;
+  *)
+    :
+    gmp_gcc_arm_umodsi_result=yes ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_gcc_arm_umodsi_result" >&5
+$as_echo "$gmp_gcc_arm_umodsi_result" >&6; }
+ ;;
+          gcc-mips-o32)   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc supports o32" >&5
+$as_echo_n "checking whether gcc supports o32... " >&6; }
+echo 'int x;' >conftest.c
+echo "$cc -mabi=32 -c conftest.c" >&5
+if $cc -mabi=32 -c conftest.c >conftest.out 2>&1; then
+  result=yes
+else
+  cat conftest.out >&5
+  if grep "cc1: Invalid option \`abi=32'" conftest.out >/dev/null; then
+    result=yes
+  else
+    result=no
+  fi
+fi
+rm -f conftest.*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $result" >&5
+$as_echo "$result" >&6; }
+if test $result = yes; then
+  :
+else
+  testlist_pass=no
+fi
+ ;;
+          hppa-level-2.0) { $as_echo "$as_me:${as_lineno-$LINENO}: checking $cc $cflags assembler knows hppa 2.0" >&5
+$as_echo_n "checking $cc $cflags assembler knows hppa 2.0... " >&6; }
+result=no
+cat >conftest.s <<EOF
+       .level 2.0
+EOF
+gmp_compile="$cc $cflags -c conftest.s >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  result=yes
+else
+  echo "failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $result" >&5
+$as_echo "$result" >&6; }
+if test "$result" = yes; then
+  :
+else
+  testlist_pass=no
+fi
+ ;;
+          sizeof*)       echo "configure: testlist $tst" >&5
+gmp_sizeof_type=`echo "$tst" | sed 's/sizeof-\([a-z]*\).*/\1/'`
+gmp_sizeof_want=`echo "$tst" | sed 's/sizeof-[a-z]*-\([0-9]*\).*/\1/'`
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler $cc $cflags has sizeof($gmp_sizeof_type)==$gmp_sizeof_want" >&5
+$as_echo_n "checking compiler $cc $cflags has sizeof($gmp_sizeof_type)==$gmp_sizeof_want... " >&6; }
+cat >conftest.c <<EOF
+int
+main ()
+{
+  static int test_array [1 - 2 * (long) (sizeof ($gmp_sizeof_type) != $gmp_sizeof_want)];
+  test_array[0] = 0;
+  return 0;
+}
+EOF
+gmp_c_testlist_sizeof=no
+gmp_compile="$cc $cflags -c conftest.c >&5"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  gmp_c_testlist_sizeof=yes
+fi
+rm -f conftest*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_c_testlist_sizeof" >&5
+$as_echo "$gmp_c_testlist_sizeof" >&6; }
+if test $gmp_c_testlist_sizeof = yes; then
+  :
+else
+  testlist_pass=no
+fi
+ ;;
+          esac
+          if test $testlist_pass = no; then break; fi
+        done
+
+        if test $testlist_pass = yes; then
+          found_compiler=yes
+          break
+        fi
+      done
+
+      if test $found_compiler = yes; then break; fi
+    done
+
+    if test $found_compiler = yes; then break; fi
+  done
+
+  if test $found_compiler = yes; then break; fi
+done
+
+
+# If we recognised the CPU, as indicated by $path being set, then insist
+# that we have a working compiler, either from our $cclist choices or from
+# $CC.  We can't let AC_PROG_CC look around for a compiler because it might
+# find one that we've rejected (for not supporting the modes our asm code
+# demands, etc).
+#
+# If we didn't recognise the CPU (and this includes host_cpu=none), then
+# fall through and let AC_PROG_CC look around for a compiler too.  This is
+# mostly in the interests of following a standard autoconf setup, after all
+# we've already tested cc and gcc adequately (hopefully).  As of autoconf
+# 2.50 the only thing AC_PROG_CC really adds is a check for "cl" (Microsoft
+# C on MS-DOS systems).
+#
+if test $found_compiler = no && test -n "$path"; then
+  as_fn_error "could not find a working compiler, see config.log for details" "$LINENO" 5
+fi
+
+case $host in
+  i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-* | athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
+    # If the user asked for a fat build, override the path and flags set above
+    if test $enable_fat = yes; then
+      gcc_cflags_cpu=""
+      gcc_cflags_arch=""
+
+      if test "$abi" = 32; then
+       extra_functions="$extra_functions fat fat_entry"
+       path="x86/fat x86"
+       fat_path="x86 x86/fat x86/i486
+                 x86/k6 x86/k6/mmx x86/k6/k62mmx
+                 x86/k7 x86/k7/mmx
+                 x86/pentium x86/pentium/mmx
+                 x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
+                 x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2"
+      fi
+
+      if test "$abi" = 64; then
+       gcc_64_cflags=""
+       extra_functions_64="$extra_functions_64 fat fat_entry"
+       path_64="x86_64/fat x86_64"
+       fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/atom x86_64/nano"
+      fi
+
+      fat_functions="add_n addmul_1 copyd copyi
+                    dive_1 diveby3 divrem_1 gcd_1 lshift
+                    mod_1 mod_34lsub1 mode1o mul_1 mul_basecase
+                    pre_divrem_1 pre_mod_1 rshift
+                    sqr_basecase sub_n submul_1"
+      fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
+                     SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD"
+    fi
+    ;;
+esac
+
+
+if test $found_compiler = yes; then
+
+  # If we're creating CFLAGS, then look for optional additions.  If the user
+  # set CFLAGS then leave it alone.
+  #
+  if test "$test_CFLAGS" != set; then
+                          eval optlist=\"\$${ccbase}${abi1}_cflags_optlist\"
+    test -n "$optlist" || eval optlist=\"\$${ccbase}${abi2}_cflags_optlist\"
+
+    for opt in $optlist; do
+                             eval optflags=\"\$${ccbase}${abi1}_cflags_${opt}\"
+      test -n "$optflags" || eval optflags=\"\$${ccbase}${abi2}_cflags_${opt}\"
+      test -n "$optflags" || eval optflags=\"\$${ccbase}_cflags_${opt}\"
+
+      for flag in $optflags; do
+
+       # ~ represents a space in an option spec
+        flag=`echo "$flag" | tr '~' ' '`
+
+        case $flag in
+          -march=pentium4 | -march=k8)
+            # For -march settings which enable SSE2 we exclude certain bad
+            # gcc versions and we need an OS knowing how to save xmm regs.
+            #
+            # This is only for ABI=32, any 64-bit gcc is good and any OS
+            # knowing x86_64 will know xmm.
+            #
+            # -march=k8 was only introduced in gcc 3.3, so we shouldn't need
+            # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior).  But
+            # it doesn't hurt to run it anyway, sharing code with the
+            # pentium4 case.
+            #
+            if test "$abi" = 32; then
+              { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc is good for sse2" >&5
+$as_echo_n "checking whether gcc is good for sse2... " >&6; }
+case `$cc $cflags $cppflags -dumpversion` in
+  3.[012] | 3.[012].*) result=no ;;
+  *)                     result=yes ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $result" >&5
+$as_echo "$result" >&6; }
+if test "$result" = yes; then
+  :
+else
+  continue
+fi
+
+              { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the operating system supports XMM registers" >&5
+$as_echo_n "checking whether the operating system supports XMM registers... " >&6; }
+if test "${gmp_cv_os_x86_xmm+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test "$build" = "$host"; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.s <<EOF
+       .text
+main:
+_main:
+       .globl  main
+       .globl  _main
+       .byte   0x0f, 0x57, 0xc0
+       xorl    %eax, %eax
+       ret
+EOF
+  gmp_compile="$cc $cflags $cppflags conftest.s -o conftest >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+      gmp_cv_os_x86_xmm=yes
+    else
+      gmp_cv_os_x86_xmm=no
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Oops, cannot compile test program" >&5
+$as_echo "$as_me: WARNING: Oops, cannot compile test program" >&2;}
+  fi
+  rm -f conftest*
+fi
+
+if test -z "$gmp_cv_os_x86_xmm"; then
+  case $host_os in
+    freebsd[123] | freebsd[123].*)
+      gmp_cv_os_x86_xmm=no ;;
+    freebsd*)
+      gmp_cv_os_x86_xmm=yes ;;
+    *)
+      gmp_cv_os_x86_xmm=probably ;;
+  esac
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_os_x86_xmm" >&5
+$as_echo "$gmp_cv_os_x86_xmm" >&6; }
+
+if test "$gmp_cv_os_x86_xmm" = probably; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Not certain of OS support for xmm when cross compiling." >&5
+$as_echo "$as_me: WARNING: Not certain of OS support for xmm when cross compiling." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Will assume it's ok, expect a SIGILL if this is wrong." >&5
+$as_echo "$as_me: WARNING: Will assume it's ok, expect a SIGILL if this is wrong." >&2;}
+fi
+
+case $gmp_cv_os_x86_xmm in
+no)
+  continue
+  ;;
+*)
+
+  ;;
+esac
+
+            fi
+            ;;
+          -no-cpp-precomp)
+            # special check, avoiding a warning
+            if test "$ccbase" = gcc; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler $cc $cflags -no-cpp-precomp" >&5
+$as_echo_n "checking compiler $cc $cflags -no-cpp-precomp... " >&6; }
+  result=no
+  cat >conftest.c <<EOF
+int main () { return 0; }
+EOF
+  gmp_compile="$cc $cflags -no-cpp-precomp conftest.c >conftest.out 2>&1"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if grep "unrecognized option.*-no-cpp-precomp" conftest.out >/dev/null; then : ;
+    else
+      result=yes
+    fi
+  fi
+  cat conftest.out >&5
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $result" >&5
+$as_echo "$result" >&6; }
+  if test "$result" = yes; then
+      cflags="$cflags $flag"
+                                   break
+  else
+      continue
+  fi
+fi
+
+            ;;
+          -Wa,-m*)
+            case $host in
+              alpha*-*-*)
+                { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler $cc $cflags $flag" >&5
+$as_echo_n "checking assembler $cc $cflags $flag... " >&6; }
+result=no
+cat >conftest.c <<EOF
+int main () {}
+EOF
+gmp_compile="$cc $cflags $flag -c conftest.c >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if grep "Unknown CPU identifier" conftest.out >/dev/null; then : ;
+  else
+    result=yes
+  fi
+fi
+cat conftest.out >&5
+rm -f conftest*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $result" >&5
+$as_echo "$result" >&6; }
+if test "$result" = yes; then
+  :
+else
+  continue
+fi
+
+              ;;
+            esac
+            ;;
+          -Wa,-oldas)
+            { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $cc $cflags $cppflags -Wa,-oldas" >&5
+$as_echo_n "checking for $cc $cflags $cppflags -Wa,-oldas... " >&6; }
+result=no
+cat >conftest.c <<EOF
+EOF
+echo "with empty conftest.c" >&5
+gmp_compile="$cc $cflags $cppflags -c conftest.c >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then : ;
+else
+  # empty fails
+  gmp_compile="$cc $cflags $cppflags -Wa,-oldas -c conftest.c >&5 2>&1"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    # but with -Wa,-oldas it works
+    result=yes
+  fi
+fi
+rm -f conftest*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $result" >&5
+$as_echo "$result" >&6; }
+if test "$result" = yes; then
+  cflags="$cflags $flag"
+                             break
+else
+  continue
+fi
+
+            ;;
+        esac
+
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler $cc $cflags $cppflags $flag" >&5
+$as_echo_n "checking compiler $cc $cflags $cppflags $flag... " >&6; }
+gmp_prog_cc_works=yes
+
+# first see a simple "main()" works, then go on to other checks
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+
+int main () { return 0; }
+EOF
+  echo "Test compile: " >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal error from gcc 2.95.2 -mpowerpc64
+   (without -maix64), hence detecting an unusable compiler */
+void *g() { return (void *) 0; }
+void *f() { return g(); }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: function pointer return" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, function pointer return"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, function pointer return, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an invalid instruction syntax from i386 gcc
+   -march=pentiumpro on Solaris 2.8.  The native sun assembler
+   requires a non-standard syntax for cmov which gcc (as of 2.95.2 at
+   least) doesn't know.  */
+int n;
+int cmov () { return (n >= 0 ? n : 0); }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: cmov instruction" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, cmov instruction"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, cmov instruction, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes a linker invocation problem with gcc 3.0.3
+   on AIX 4.3 under "-maix64 -mpowerpc64 -mcpu=630".  The -mcpu=630
+   option causes gcc to incorrectly select the 32-bit libgcc.a, not
+   the 64-bit one, and consequently it misses out on the __fixunsdfdi
+   helper (double -> uint64 conversion).  */
+double d;
+unsigned long gcc303 () { return (unsigned long) d; }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: double -> ulong conversion" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, double -> ulong conversion"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, double -> ulong conversion, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an error from hppa gcc 2.95 under -mpa-risc-2-0 if
+   the assembler doesn't know hppa 2.0 instructions.  fneg is a 2.0
+   instruction, and a negation like this comes out using it.  */
+double fneg_data;
+unsigned long fneg () { return -fneg_data; }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: double negation" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, double negation"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, double negation, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following makes gcc 3.3 -march=pentium4 generate an SSE2 xmm insn
+   (cvtsd2ss) which will provoke an error if the assembler doesn't recognise
+   those instructions.  Not sure how much of the gmp code will come out
+   wanting sse2, but it's easiest to reject an option we know is bad.  */
+double ftod_data;
+float ftod () { return (float) ftod_data; }
+
+int main () { return 0; }
+EOF
+  echo "Test compile: double -> float conversion" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, double -> float conversion"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, double -> float conversion, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal compiler error from gcc version
+   "2.9-gnupro-99r1" under "-O2 -mcpu=ev6", apparently relating to char
+   values being spilled into floating point registers.  The problem doesn't
+   show up all the time, but has occurred enough in GMP for us to reject
+   this compiler+flags.  */
+#include <string.h>  /* for memcpy */
+struct try_t
+{
+ char dst[2];
+ char size;
+ long d0, d1, d2, d3, d4, d5, d6;
+ char overlap;
+};
+struct try_t param[6];
+int
+param_init ()
+{
+ struct try_t *p;
+ memcpy (p, &param[ 2 ], sizeof (*p));
+ memcpy (p, &param[ 2 ], sizeof (*p));
+ p->size = 2;
+ memcpy (p, &param[ 1 ], sizeof (*p));
+ p->dst[0] = 1;
+ p->overlap = 2;
+ memcpy (p, &param[ 3 ], sizeof (*p));
+ p->dst[0] = 1;
+ p->overlap = 8;
+ memcpy (p, &param[ 4 ], sizeof (*p));
+ memcpy (p, &param[ 4 ], sizeof (*p));
+ p->overlap = 8;
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ memcpy (p, &param[ 5 ], sizeof (*p));
+ return 0;
+}
+
+int main () { return 0; }
+EOF
+  echo "Test compile: gnupro alpha ev6 char spilling" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, gnupro alpha ev6 char spilling"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, gnupro alpha ev6 char spilling, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+# __builtin_alloca is not available everywhere, check it exists before
+# seeing that it works
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+int k; int foo () { __builtin_alloca (k); }
+EOF
+  echo "Test compile: __builtin_alloca availability" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+      if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal compiler error from Itanium HP-UX cc
+    under +O2 or higher.  We use this sort of code in mpn/generic/mul_fft.c. */
+int k;
+int foo ()
+{
+  int i, **a;
+  a = __builtin_alloca (k);
+  for (i = 0; i <= k; i++)
+    a[i] = __builtin_alloca (1 << i);
+}
+
+int main () { return 0; }
+EOF
+  echo "Test compile: alloca array" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, alloca array"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, alloca array, program does not run"
+      ;;
+  esac
+fi
+
+
+
+      ;;
+    no)
+
+      ;;
+    norun)
+
+      ;;
+  esac
+fi
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal error from the assembler on
+   power2-ibm-aix4.3.1.0.  gcc -mrios2 compiles to nabs+fcirz, and this
+   results in "Internal error related to the source program domain".
+
+   For reference it seems to be the combination of nabs+fcirz which is bad,
+   not either alone.  This sort of thing occurs in mpz/get_str.c with the
+   way double chars_per_bit_exactly is applied in MPN_SIZEINBASE.  Perhaps
+   if that code changes to a scaled-integer style then we won't need this
+   test.  */
+
+double fp[1];
+int x;
+int f ()
+{
+  int a;
+  a = (x >= 0 ? x : -x);
+  return a * fp[0];
+}
+
+int main () { return 0; }
+EOF
+  echo "Test compile: abs int -> double conversion" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, abs int -> double conversion"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, abs int -> double conversion, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes a segfault in the compiler on powerpc-apple-darwin.
+   Extracted from tests/mpn/t-iord_u.c.  Causes Apple's gcc 3.3 build 1640 and
+   1666 to segfault with e.g., -O2 -mpowerpc64.  */
+
+#if defined (__GNUC__) && ! defined (__cplusplus)
+typedef unsigned long long t1;typedef t1*t2;
+static __inline__ t1 e(t2 rp,t2 up,int n,t1 v0)
+{t1 c,x,r;int i;if(v0){c=1;for(i=1;i<n;i++){x=up[i];r=x+1;rp[i]=r;}}return c;}
+f(){static const struct{t1 n;t1 src[9];t1 want[9];}d[]={{1,{0},{1}},};t1 got[9];int i;
+for(i=0;i<1;i++){if(e(got,got,9,d[i].n)==0)h();g(i,d[i].src,d[i].n,got,d[i].want,9);if(d[i].n)h();}}
+h(){}g(){}
+#else
+int dummy;
+#endif
+
+int main () { return 0; }
+EOF
+  echo "Test compile: long long reliability test 1" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, long long reliability test 1"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, long long reliability test 1, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following provokes an internal compiler error on powerpc-apple-darwin.
+   Extracted from mpz/cfdiv_q_2exp.c.  Causes Apple's gcc 3.3 build 1640 and
+   1666 to get an ICE with -O1 -mpowerpc64.  */
+
+#if defined (__GNUC__) && ! defined (__cplusplus)
+f(int u){int i;long long x;x=u?~0:0;if(x)for(i=0;i<9;i++);x&=g();if(x)g();}
+g(){}
+#else
+int dummy;
+#endif
+
+int main () { return 0; }
+EOF
+  echo "Test compile: long long reliability test 2" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, long long reliability test 2"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, long long reliability test 2, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following is mis-compiled by HP ia-64 cc version
+        cc: HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]
+   under "cc +O3", both in +DD32 and +DD64 modes.  The mpn_lshift_com gets
+   inlined and its return value somehow botched to be 0 instead of 1.  This
+   arises in the real mpn_lshift_com in mul_fft.c.  A lower optimization
+   level, like +O2 seems ok.  This code needs to be run to show the problem,
+   but that's fine, the offending cc is a native-only compiler so we don't
+   have to worry about cross compiling.  */
+
+#if ! defined (__cplusplus)
+unsigned long
+lshift_com (rp, up, n, cnt)
+  unsigned long *rp;
+  unsigned long *up;
+  long n;
+  unsigned cnt;
+{
+  unsigned long retval, high_limb, low_limb;
+  unsigned tnc;
+  long i;
+  tnc = 8 * sizeof (unsigned long) - cnt;
+  low_limb = *up++;
+  retval = low_limb >> tnc;
+  high_limb = low_limb << cnt;
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *up++;
+      *rp++ = ~(high_limb | (low_limb >> tnc));
+      high_limb = low_limb << cnt;
+    }
+  return retval;
+}
+int
+main ()
+{
+  unsigned long cy, rp[2], up[2];
+  up[0] = ~ 0L;
+  up[1] = 0;
+  cy = lshift_com (rp, up, 2L, 1);
+  if (cy != 1L)
+    return 1;
+  return 0;
+}
+#else
+int
+main ()
+{
+  return 0;
+}
+#endif
+
+EOF
+  echo "Test compile: mpn_lshift_com optimization" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization, program does not run"
+      ;;
+  esac
+fi
+
+
+
+if test "$gmp_prog_cc_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.c <<EOF
+/* The following is mis-compiled by Intel ia-64 icc version 1.8 under
+    "icc -O3",  After several calls, the function writes parial garbage to
+    the result vector.  Perhaps relates to the chk.a.nc insn.  This code needs
+    to be run to show the problem, but that's fine, the offending cc is a
+    native-only compiler so we don't have to worry about cross compiling.  */
+
+#if ! defined (__cplusplus)
+#include <stdlib.h>
+void
+lshift_com (rp, up, n, cnt)
+  unsigned long *rp;
+  unsigned long *up;
+  long n;
+  unsigned cnt;
+{
+  unsigned long high_limb, low_limb;
+  unsigned tnc;
+  long i;
+  up += n;
+  rp += n;
+  tnc = 8 * sizeof (unsigned long) - cnt;
+  low_limb = *--up;
+  high_limb = low_limb << cnt;
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = ~(high_limb | (low_limb >> tnc));
+      high_limb = low_limb << cnt;
+    }
+  *--rp = ~high_limb;
+}
+int
+main ()
+{
+  unsigned long *r, *r2;
+  unsigned long a[88 + 1];
+  long i;
+  for (i = 0; i < 88 + 1; i++)
+    a[i] = ~0L;
+  r = malloc (10000 * sizeof (unsigned long));
+  r2 = r;
+  for (i = 0; i < 528; i += 22)
+    {
+      lshift_com (r2, a,
+                 i / (8 * sizeof (unsigned long)) + 1,
+                 i % (8 * sizeof (unsigned long)));
+      r2 += 88 + 1;
+    }
+  if (r[2048] != 0 || r[2049] != 0 || r[2050] != 0 || r[2051] != 0 ||
+      r[2052] != 0 || r[2053] != 0 || r[2054] != 0)
+    abort ();
+  return 0;
+}
+#else
+int
+main ()
+{
+  return 0;
+}
+#endif
+
+EOF
+  echo "Test compile: mpn_lshift_com optimization 2" >&5
+  gmp_compile="$cc $cflags $cppflags $flag conftest.c >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    cc_works_part=yes
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        cc_works_part=norun
+      fi
+    fi
+  else
+    cc_works_part=no
+  fi
+  if test "$cc_works_part" != yes; then
+    echo "failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  case $cc_works_part in
+    yes)
+
+      ;;
+    no)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization 2"
+      ;;
+    norun)
+      gmp_prog_cc_works="no, mpn_lshift_com optimization 2, program does not run"
+      ;;
+  esac
+fi
+
+
+
+
+# A certain _GLOBAL_OFFSET_TABLE_ problem in past versions of gas, tickled
+# by recent versions of gcc.
+#
+if test "$gmp_prog_cc_works" = yes; then
+  case $host in
+    i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*)
+      # this problem only arises in PIC code, so don't need to test when
+      # --disable-shared.  We don't necessarily have $enable_shared set to
+      # yes at this point, it will still be unset for the default (which is
+      # yes); hence the use of "!= no".
+      if test "$enable_shared" != no; then
+        echo "Testing gcc GOT with eax emitted" >&5
+cat >conftest.c <<\EOF
+int foo;
+int bar () { return foo; }
+EOF
+tmp_got_emitted=no
+gmp_compile="$cc $cflags $cppflags $flag -fPIC -S conftest.c >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if grep "addl.*_GLOBAL_OFFSET_TABLE_.*eax" conftest.s >/dev/null; then
+    tmp_got_emitted=yes
+  fi
+fi
+rm -f conftest.*
+echo "Result: $tmp_got_emitted" >&5
+if test "$tmp_got_emitted" = yes; then
+  echo "Testing gas GOT with eax good" >&5
+cat >conftest.awk <<\EOF
+BEGIN {
+  want[0]  = "001"
+  want[1]  = "043"
+  want[2]  = "105"
+  want[3]  = "147"
+  want[4]  = "211"
+  want[5]  = "253"
+  want[6]  = "315"
+  want[7]  = "357"
+
+  want[8]  = "005"
+  want[9]  = "002"
+  want[10] = "000"
+  want[11] = "000"
+  want[12] = "000"
+
+  want[13] = "376"
+  want[14] = "334"
+  want[15] = "272"
+  want[16] = "230"
+  want[17] = "166"
+  want[18] = "124"
+  want[19] = "062"
+  want[20] = "020"
+
+  result = "yes"
+}
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 20; i++)
+        got[i] = got[i+1];
+      got[20] = $f;
+
+      found = 1
+      for (i = 0; i < 21; i++)
+        if (got[i] != want[i])
+          {
+            found = 0
+            break
+          }
+      if (found)
+        {
+          result = "no"
+          exit
+        }
+    }
+}
+END {
+  print result
+}
+EOF
+cat >conftest.s <<\EOF
+       .text
+       .byte   1, 35, 69, 103, 137, 171, 205, 239
+       addl    $_GLOBAL_OFFSET_TABLE_, %eax
+       .byte   254, 220, 186, 152, 118, 84, 50, 16
+EOF
+tmp_got_good=yes
+gmp_compile="$cc $cflags $cppflags $flag -fPIC -o conftest.o -c conftest.s >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  tmp_got_good=`od -b conftest.o | $AWK -f conftest.awk`
+fi
+rm -f conftest.*
+echo "Result: $tmp_got_good" >&5
+if test "$tmp_got_good" = no; then
+  gmp_prog_cc_works="no, bad gas GOT with eax"
+else
+  :
+fi
+
+else
+  :
+fi
+
+      fi
+      ;;
+  esac
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_prog_cc_works" >&5
+$as_echo "$gmp_prog_cc_works" >&6; }
+case $gmp_prog_cc_works in
+  yes)
+    cflags="$cflags $flag"
+          break
+    ;;
+  *)
+
+    ;;
+esac
+
+      done
+    done
+  fi
+
+  ABI="$abi"
+  CC="$cc"
+  CFLAGS="$cflags"
+  CPPFLAGS="$cppflags"
+
+
+  # Could easily have this in config.h too, if desired.
+  ABI_nodots=`echo $ABI | sed 's/\./_/'`
+
+echo "define_not_for_expansion(\`HAVE_ABI_$ABI_nodots')" >> $gmp_tmpconfigm4p
+
+
+
+  # GMP_LDFLAGS substitution, selected according to ABI.
+  # These are needed on libgmp.la and libmp.la, but currently not on
+  # convenience libraries like tune/libspeed.la or mpz/libmpz.la.
+  #
+                            eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+  test -n "$GMP_LDFLAGS" || eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+
+
+
+
+  # extra_functions, selected according to ABI
+                    eval tmp=\"\$extra_functions$abi1\"
+  test -n "$tmp" || eval tmp=\"\$extra_functions$abi2\"
+  extra_functions="$tmp"
+
+
+  # Cycle counter, selected according to ABI.
+  #
+                    eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi1\"
+  test -n "$tmp" || eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi2\"
+  SPEED_CYCLECOUNTER_OBJ="$tmp"
+                    eval tmp=\"\$cyclecounter_size$abi1\"
+  test -n "$tmp" || eval tmp=\"\$cyclecounter_size$abi2\"
+  cyclecounter_size="$tmp"
+
+  if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_SPEED_CYCLECOUNTER $cyclecounter_size
+_ACEOF
+
+  fi
+
+
+
+  # Calling conventions checking, selected according to ABI.
+  #
+                    eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
+  test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
+  CALLING_CONVENTIONS_OBJS="$tmp"
+
+  if test -n "$CALLING_CONVENTIONS_OBJS"; then
+
+$as_echo "#define HAVE_CALLING_CONVENTIONS 1" >>confdefs.h
+
+  fi
+
+
+fi
+
+
+# If the user gave an MPN_PATH, use that verbatim, otherwise choose
+# according to the ABI and add "generic".
+#
+if test -n "$MPN_PATH"; then
+  path="$MPN_PATH"
+else
+                    eval tmp=\"\$path$abi1\"
+  test -n "$tmp" || eval tmp=\"\$path$abi2\"
+  path="$tmp generic"
+fi
+
+
+# Long long limb setup for gmp.h.
+case $limb_chosen in
+longlong) DEFN_LONG_LONG_LIMB="#define _LONG_LONG_LIMB 1"    ;;
+*)        DEFN_LONG_LONG_LIMB="/* #undef _LONG_LONG_LIMB */" ;;
+esac
+
+
+
+# The C compiler and preprocessor, put into ANSI mode if possible.
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "no acceptable C compiler found in \$PATH
+See \`config.log' for more details." "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+       ;;
+    [ab].out )
+       # We found the default executable, but exeext='' is most
+       # certainly right.
+       break;;
+    *.* )
+       if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+       then :; else
+          ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+       fi
+       # We set ac_cv_exeext here because the later test for it is not
+       # safe: cross compilers may not add the suffix if given an `-o'
+       # argument, so we may need to know it at that point already.
+       # Even if this section looks crufty: it has the advantage of
+       # actually working.
+       break;;
+    * )
+       break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  ac_file=''
+fi
+if test -z "$ac_file"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ as_fn_set_status 77
+as_fn_error "C compiler cannot create executables
+See \`config.log' for more details." "$LINENO" 5; }; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+         break;;
+    * ) break;;
+  esac
+done
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+       cross_compiling=yes
+    else
+       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if test "${ac_cv_objext+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then :
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if test "${ac_cv_prog_cc_g+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+else
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  ac_c_werror_flag=$ac_save_c_werror_flag
+        CFLAGS="-g"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+       -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+   case $ac_cv_prog_cc_stdc in #(
+  no) :
+    ac_cv_prog_cc_c99=no; ac_cv_prog_cc_c89=no ;; #(
+  *) :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5
+$as_echo_n "checking for $CC option to accept ISO C99... " >&6; }
+if test "${ac_cv_prog_cc_c99+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c99=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <stdio.h>
+
+// Check varargs macros.  These examples are taken from C99 6.10.3.5.
+#define debug(...) fprintf (stderr, __VA_ARGS__)
+#define showlist(...) puts (#__VA_ARGS__)
+#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__))
+static void
+test_varargs_macros (void)
+{
+  int x = 1234;
+  int y = 5678;
+  debug ("Flag");
+  debug ("X = %d\n", x);
+  showlist (The first, second, and third items.);
+  report (x>y, "x is %d but y is %d", x, y);
+}
+
+// Check long long types.
+#define BIG64 18446744073709551615ull
+#define BIG32 4294967295ul
+#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0)
+#if !BIG_OK
+  your preprocessor is broken;
+#endif
+#if BIG_OK
+#else
+  your preprocessor is broken;
+#endif
+static long long int bignum = -9223372036854775807LL;
+static unsigned long long int ubignum = BIG64;
+
+struct incomplete_array
+{
+  int datasize;
+  double data[];
+};
+
+struct named_init {
+  int number;
+  const wchar_t *name;
+  double average;
+};
+
+typedef const char *ccp;
+
+static inline int
+test_restrict (ccp restrict text)
+{
+  // See if C++-style comments work.
+  // Iterate through items via the restricted pointer.
+  // Also check for declarations in for loops.
+  for (unsigned int i = 0; *(text+i) != '\0'; ++i)
+    continue;
+  return 0;
+}
+
+// Check varargs and va_copy.
+static void
+test_varargs (const char *format, ...)
+{
+  va_list args;
+  va_start (args, format);
+  va_list args_copy;
+  va_copy (args_copy, args);
+
+  const char *str;
+  int number;
+  float fnumber;
+
+  while (*format)
+    {
+      switch (*format++)
+       {
+       case 's': // string
+         str = va_arg (args_copy, const char *);
+         break;
+       case 'd': // int
+         number = va_arg (args_copy, int);
+         break;
+       case 'f': // float
+         fnumber = va_arg (args_copy, double);
+         break;
+       default:
+         break;
+       }
+    }
+  va_end (args_copy);
+  va_end (args);
+}
+
+int
+main ()
+{
+
+  // Check bool.
+  _Bool success = false;
+
+  // Check restrict.
+  if (test_restrict ("String literal") == 0)
+    success = true;
+  char *restrict newvar = "Another string";
+
+  // Check varargs.
+  test_varargs ("s, d' f .", "string", 65, 34.234);
+  test_varargs_macros ();
+
+  // Check flexible array members.
+  struct incomplete_array *ia =
+    malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10));
+  ia->datasize = 10;
+  for (int i = 0; i < ia->datasize; ++i)
+    ia->data[i] = i * 1.234;
+
+  // Check named initializers.
+  struct named_init ni = {
+    .number = 34,
+    .name = L"Test wide string",
+    .average = 543.34343,
+  };
+
+  ni.number = 58;
+
+  int dynamic_array[ni.number];
+  dynamic_array[ni.number - 1] = 543;
+
+  // work around unused variable warnings
+  return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x'
+         || dynamic_array[ni.number - 1] != 543);
+
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -xc99=all -qlanglvl=extc99
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c99=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c99" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c99" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c99"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5
+$as_echo "$ac_cv_prog_cc_c99" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c99" != xno; then :
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+       -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89
+else
+  ac_cv_prog_cc_stdc=no
+fi
+
+fi
+ ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO Standard C" >&5
+$as_echo_n "checking for $CC option to accept ISO Standard C... " >&6; }
+  if test "${ac_cv_prog_cc_stdc+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+fi
+
+  case $ac_cv_prog_cc_stdc in #(
+  no) :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;; #(
+  '') :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;; #(
+  *) :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_stdc" >&5
+$as_echo "$ac_cv_prog_cc_stdc" >&6; } ;;
+esac
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if test "${ac_cv_prog_CPP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+                    Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+                    Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+case $ac_cv_prog_cc_stdc in
+  no)
+    ;;
+  *)
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
+#define GMP_NAIL_BITS $GMP_NAIL_BITS
+#define GMP_LIMB_BITS 123
+$DEFN_LONG_LONG_LIMB
+#include "$srcdir/gmp-h.in"
+
+#if ! __GMP_HAVE_PROTOTYPES
+die die die
+#endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: gmp.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable" >&5
+$as_echo "$as_me: WARNING: gmp.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable" >&2;}
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    ;;
+esac
+
+
+
+# The C compiler on the build system, and associated tests.
+
+if test -n "$CC_FOR_BUILD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system compiler $CC_FOR_BUILD" >&5
+$as_echo_n "checking build system compiler $CC_FOR_BUILD... " >&6; }
+# remove anything that might look like compiler output to our "||" expression
+rm -f conftest* a.out b.out a.exe a_out.exe
+cat >conftest.c <<EOF
+int
+main ()
+{
+  exit(0);
+}
+EOF
+gmp_compile="$CC_FOR_BUILD conftest.c"
+cc_for_build_works=no
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if (./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest) >&5 2>&1; then
+    cc_for_build_works=yes
+  fi
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cc_for_build_works" >&5
+$as_echo "$cc_for_build_works" >&6; }
+if test "$cc_for_build_works" = yes; then
+  :
+else
+  as_fn_error "Specified CC_FOR_BUILD doesn't seem to work" "$LINENO" 5
+fi
+
+elif test -n "$HOST_CC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system compiler $HOST_CC" >&5
+$as_echo_n "checking build system compiler $HOST_CC... " >&6; }
+# remove anything that might look like compiler output to our "||" expression
+rm -f conftest* a.out b.out a.exe a_out.exe
+cat >conftest.c <<EOF
+int
+main ()
+{
+  exit(0);
+}
+EOF
+gmp_compile="$HOST_CC conftest.c"
+cc_for_build_works=no
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if (./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest) >&5 2>&1; then
+    cc_for_build_works=yes
+  fi
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cc_for_build_works" >&5
+$as_echo "$cc_for_build_works" >&6; }
+if test "$cc_for_build_works" = yes; then
+  CC_FOR_BUILD=$HOST_CC
+else
+  as_fn_error "Specified HOST_CC doesn't seem to work" "$LINENO" 5
+fi
+
+else
+  for i in "$CC" "$CC $CFLAGS $CPPFLAGS" cc gcc c89 c99; do
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system compiler $i" >&5
+$as_echo_n "checking build system compiler $i... " >&6; }
+# remove anything that might look like compiler output to our "||" expression
+rm -f conftest* a.out b.out a.exe a_out.exe
+cat >conftest.c <<EOF
+int
+main ()
+{
+  exit(0);
+}
+EOF
+gmp_compile="$i conftest.c"
+cc_for_build_works=no
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if (./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest) >&5 2>&1; then
+    cc_for_build_works=yes
+  fi
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cc_for_build_works" >&5
+$as_echo "$cc_for_build_works" >&6; }
+if test "$cc_for_build_works" = yes; then
+  CC_FOR_BUILD=$i
+       break
+else
+  :
+fi
+
+  done
+  if test -z "$CC_FOR_BUILD"; then
+    as_fn_error "Cannot find a build system compiler" "$LINENO" 5
+  fi
+fi
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system preprocessor" >&5
+$as_echo_n "checking for build system preprocessor... " >&6; }
+if test -z "$CPP_FOR_BUILD"; then
+  if test "${gmp_cv_prog_cpp_for_build+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.c <<EOF
+#define FOO BAR
+EOF
+  for i in "$CC_FOR_BUILD -E" "$CC_FOR_BUILD -E -traditional-cpp" "/lib/cpp"; do
+    gmp_compile="$i conftest.c"
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } >&5 2>&1; then
+      gmp_cv_prog_cpp_for_build=$i
+      break
+    fi
+  done
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  if test -z "$gmp_cv_prog_cpp_for_build"; then
+    as_fn_error "Cannot find build system C preprocessor." "$LINENO" 5
+  fi
+
+fi
+
+  CPP_FOR_BUILD=$gmp_cv_prog_cpp_for_build
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP_FOR_BUILD" >&5
+$as_echo "$CPP_FOR_BUILD" >&6; }
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system executable suffix" >&5
+$as_echo_n "checking for build system executable suffix... " >&6; }
+if test "${gmp_cv_prog_exeext_for_build+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.c <<EOF
+int
+main ()
+{
+  exit (0);
+}
+EOF
+for i in .exe ,ff8 ""; do
+  gmp_compile="$CC_FOR_BUILD conftest.c -o conftest$i"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if (./conftest) 2>&5; then
+      gmp_cv_prog_exeext_for_build=$i
+      break
+    fi
+  fi
+done
+rm -f conftest*
+if test "${gmp_cv_prog_exeext_for_build+set}" != set; then
+  as_fn_error "Cannot determine executable suffix" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_prog_exeext_for_build" >&5
+$as_echo "$gmp_cv_prog_exeext_for_build" >&6; }
+EXEEXT_FOR_BUILD=$gmp_cv_prog_exeext_for_build
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build system compiler is ANSI" >&5
+$as_echo_n "checking whether build system compiler is ANSI... " >&6; }
+if test "${gmp_cv_c_for_build_ansi+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.c <<EOF
+int
+main (int argc, char **argv)
+{
+  exit(0);
+}
+EOF
+gmp_compile="$CC_FOR_BUILD conftest.c"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  gmp_cv_c_for_build_ansi=yes
+else
+  gmp_cv_c_for_build_ansi=no
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_for_build_ansi" >&5
+$as_echo "$gmp_cv_c_for_build_ansi" >&6; }
+if test "$gmp_cv_c_for_build_ansi" = yes; then
+  U_FOR_BUILD=
+else
+  U_FOR_BUILD=_
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system compiler math library" >&5
+$as_echo_n "checking for build system compiler math library... " >&6; }
+if test "${gmp_cv_check_libm_for_build+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.c <<EOF
+int
+main ()
+{
+  exit(0);
+}
+double d;
+double
+foo ()
+{
+  return log (d);
+}
+EOF
+gmp_compile="$CC_FOR_BUILD conftest.c -lm"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  gmp_cv_check_libm_for_build=-lm
+else
+  gmp_cv_check_libm_for_build=no
+fi
+rm -f conftest* a.out b.out a.exe a_out.exe
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_check_libm_for_build" >&5
+$as_echo "$gmp_cv_check_libm_for_build" >&6; }
+case $gmp_cv_check_libm_for_build in
+  yes) LIBM_FOR_BUILD=-lm
+ ;;
+  no)  LIBM_FOR_BUILD= ;;
+  *)   LIBM_FOR_BUILD=$gmp_cv_check_libm_for_build ;;
+esac
+
+
+
+# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.
+# Using the compiler is a lot easier than figuring out how to invoke the
+# assembler directly.
+#
+test -n "$CCAS" || CCAS="$CC -c"
+
+
+
+# The C++ compiler, if desired.
+want_cxx=no
+if test $enable_cxx != no; then
+  test_CXXFLAGS=${CXXFLAGS+set}
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+  if test -n "$CCC"; then
+    CXX=$CCC
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CXX"; then
+  ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5
+$as_echo "$CXX" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CXX" && break
+  done
+fi
+if test -z "$CXX"; then
+  ac_ct_CXX=$CXX
+  for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CXX"; then
+  ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CXX="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5
+$as_echo "$ac_ct_CXX" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CXX" && break
+done
+
+  if test "x$ac_ct_CXX" = x; then
+    CXX="g++"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CXX=$ac_ct_CXX
+  fi
+fi
+
+  fi
+fi
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
+$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
+if test "${ac_cv_cxx_compiler_gnu+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_compiler_gnu=yes
+else
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5
+$as_echo "$ac_cv_cxx_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GXX=yes
+else
+  GXX=
+fi
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
+$as_echo_n "checking whether $CXX accepts -g... " >&6; }
+if test "${ac_cv_prog_cxx_g+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+   ac_cxx_werror_flag=yes
+   ac_cv_prog_cxx_g=no
+   CXXFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_cv_prog_cxx_g=yes
+else
+  CXXFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+else
+  ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+        CXXFLAGS="-g"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  ac_cv_prog_cxx_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5
+$as_echo "$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+  CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+  if test "$GXX" = yes; then
+    CXXFLAGS="-g -O2"
+  else
+    CXXFLAGS="-g"
+  fi
+else
+  if test "$GXX" = yes; then
+    CXXFLAGS="-O2"
+  else
+    CXXFLAGS=
+  fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+  echo "CXXFLAGS chosen by autoconf: $CXXFLAGS" >&5
+  cxxflags_ac_prog_cxx=$CXXFLAGS
+  cxxflags_list=ac_prog_cxx
+
+  # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed
+  # if AC_PROG_CXX thinks that doesn't work.  $CFLAGS stands a good chance
+  # of working, eg. on a GNU system where CC=gcc and CXX=g++.
+  #
+  if test "$test_CXXFLAGS" != set; then
+    cxxflags_cflags=$CFLAGS
+    cxxflags_list="cflags $cxxflags_list"
+    if test "$ac_prog_cxx_g" = no; then
+      cxxflags_cflags=`echo "$cxxflags_cflags" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`
+    fi
+  fi
+
+  # See if the C++ compiler works.  If the user specified CXXFLAGS then all
+  # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't
+  # give a fatal error, just leaves CXX set to a default g++.  If on the
+  # other hand the user didn't specify CXXFLAGS then we get to try here our
+  # $cxxflags_list alternatives.
+  #
+  # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.
+  #
+  for cxxflags_choice in $cxxflags_list; do
+    eval CXXFLAGS=\"\$cxxflags_$cxxflags_choice\"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking C++ compiler $CXX $CPPFLAGS $CXXFLAGS" >&5
+$as_echo_n "checking C++ compiler $CXX $CPPFLAGS $CXXFLAGS... " >&6; }
+gmp_prog_cxx_works=yes
+
+# start with a plain "main()", then go on to further checks
+if test "$gmp_prog_cxx_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.cc <<EOF
+
+int main (void) { return 0; }
+EOF
+  echo "Test compile: " >&5
+  gmp_cxxcompile="$CXX $CPPFLAGS $CXXFLAGS conftest.cc >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_cxxcompile\""; } >&5
+  (eval $gmp_cxxcompile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        gmp_prog_cxx_works="no, program does not run"
+      fi
+    fi
+  else
+    gmp_prog_cxx_works="no"
+  fi
+  case $gmp_prog_cxx_works in
+    no*)
+      echo "failed program was:" >&5
+      cat conftest.cc >&5
+      ;;
+  esac
+  rm -f conftest* a.out b.out a.exe a_out.exe
+fi
+
+
+if test "$gmp_prog_cxx_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.cc <<EOF
+namespace foo { }
+using namespace foo;
+
+int main (void) { return 0; }
+EOF
+  echo "Test compile: namespace" >&5
+  gmp_cxxcompile="$CXX $CPPFLAGS $CXXFLAGS conftest.cc >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_cxxcompile\""; } >&5
+  (eval $gmp_cxxcompile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        gmp_prog_cxx_works="no, namespace, program does not run"
+      fi
+    fi
+  else
+    gmp_prog_cxx_works="no, namespace"
+  fi
+  case $gmp_prog_cxx_works in
+    no*)
+      echo "failed program was:" >&5
+      cat conftest.cc >&5
+      ;;
+  esac
+  rm -f conftest* a.out b.out a.exe a_out.exe
+fi
+
+
+# GMP requires the standard C++ iostream classes
+if test "$gmp_prog_cxx_works" = yes; then
+  # remove anything that might look like compiler output to our "||" expression
+  rm -f conftest* a.out b.out a.exe a_out.exe
+  cat >conftest.cc <<EOF
+/* This test rejects g++ 2.7.2 which doesn't have <iostream>, only a
+    pre-standard iostream.h. */
+#include <iostream>
+
+/* This test rejects OSF 5.1 Compaq C++ in its default pre-standard iostream
+   mode, since that mode puts cout in the global namespace, not "std".  */
+void someoutput (void) { std::cout << 123; }
+
+int main (void) { return 0; }
+EOF
+  echo "Test compile: std iostream" >&5
+  gmp_cxxcompile="$CXX $CPPFLAGS $CXXFLAGS conftest.cc >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_cxxcompile\""; } >&5
+  (eval $gmp_cxxcompile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$cross_compiling" = no; then
+      if { ac_try='./a.out || ./b.out || ./a.exe || ./a_out.exe || ./conftest'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :;
+      else
+        gmp_prog_cxx_works="no, std iostream, program does not run"
+      fi
+    fi
+  else
+    gmp_prog_cxx_works="no, std iostream"
+  fi
+  case $gmp_prog_cxx_works in
+    no*)
+      echo "failed program was:" >&5
+      cat conftest.cc >&5
+      ;;
+  esac
+  rm -f conftest* a.out b.out a.exe a_out.exe
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_prog_cxx_works" >&5
+$as_echo "$gmp_prog_cxx_works" >&6; }
+case $gmp_prog_cxx_works in
+  yes)
+    want_cxx=yes
+      break
+    ;;
+  *)
+
+    ;;
+esac
+
+  done
+
+  # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
+  if test $want_cxx = no && test $enable_cxx = yes; then
+    as_fn_error "C++ compiler not available, see config.log for details" "$LINENO" 5
+  fi
+fi
+
+ if test $want_cxx = yes; then
+  WANT_CXX_TRUE=
+  WANT_CXX_FALSE='#'
+else
+  WANT_CXX_TRUE='#'
+  WANT_CXX_FALSE=
+fi
+
+
+# FIXME: We're not interested in CXXCPP for ourselves, but if we don't do it
+# here then AC_PROG_LIBTOOL will AC_REQUIRE it (via _LT_AC_TAGCONFIG) and
+# hence execute it unconditionally, and that will fail if there's no C++
+# compiler (and no generic /lib/cpp).
+#
+if test $want_cxx = yes; then
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
+$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
+if test -z "$CXXCPP"; then
+  if test "${ac_cv_prog_CXXCPP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CXXCPP needs to be expanded
+    for CXXCPP in "$CXX -E" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+                    Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CXXCPP=$CXXCPP
+
+fi
+  CXXCPP=$ac_cv_prog_CXXCPP
+else
+  ac_cv_prog_CXXCPP=$CXXCPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5
+$as_echo "$CXXCPP" >&6; }
+ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+                    Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details." "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+
+
+# Path setups for Cray, according to IEEE or CFP.  These must come after
+# deciding the compiler.
+#
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+case $host_cpu in
+  c90 | t90)
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef _CRAYIEEE
+yes
+#endif
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "yes" >/dev/null 2>&1; then :
+  add_path="cray/ieee"
+else
+  add_path="cray/cfp"; extra_functions="mulwwc90"
+fi
+rm -f conftest*
+
+    ;;
+  j90 | sv1)
+    add_path="cray/cfp"; extra_functions="mulwwj90"
+    ;;
+esac
+
+
+
+if test -z "$MPN_PATH"; then
+  path="$add_path $path"
+fi
+
+# For a nail build, also look in "nails" subdirectories.
+#
+if test $GMP_NAIL_BITS != 0 && test -z "$MPN_PATH"; then
+  new_path=
+  for i in $path; do
+    case $i in
+    generic) new_path="$new_path $i" ;;
+    *)       new_path="$new_path $i/nails $i" ;;
+    esac
+  done
+  path=$new_path
+fi
+
+
+# Put all directories into CPUVEC_list so as to get a full set of
+# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are
+# empty because mmx and/or sse2 had to be dropped.
+#
+for i in $fat_path; do
+  tmp_suffix=`echo $i | sed -e '/\//s:^[^/]*/::' -e 's:[\\/]:_:g'`
+  CPUVEC_list="$CPUVEC_list CPUVEC_SETUP_$tmp_suffix"
+done
+
+
+# If there's any sse2 or mmx in the path, check whether the assembler
+# supports it, and remove if not.
+#
+# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new
+# enough assembler.
+#
+case $host in
+  i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-* | athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
+    if test "$ABI" = 32; then
+      case "$path $fat_path" in
+        *mmx*)   { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler knows about MMX instructions" >&5
+$as_echo_n "checking if the assembler knows about MMX instructions... " >&6; }
+if test "${gmp_cv_asm_x86_mmx+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       .text
+       movq    %mm0, %mm1
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_x86_mmx=yes
+case $host in
+*-*-solaris*)
+  if (dis conftest.$OBJEXT >conftest.out) 2>/dev/null; then
+    if grep "0f 6f c1" conftest.out >/dev/null; then
+      gmp_cv_asm_x86_mmx=movq-bug
+    fi
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \"dis\" not available to check for \"as\" movq bug" >&5
+$as_echo "$as_me: WARNING: \"dis\" not available to check for \"as\" movq bug" >&2;}
+  fi
+esac
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  gmp_cv_asm_x86_mmx=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_x86_mmx" >&5
+$as_echo "$gmp_cv_asm_x86_mmx" >&6; }
+
+case $gmp_cv_asm_x86_mmx in
+movq-bug)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | WARNING WARNING WARNING" >&5
+$as_echo "$as_me: WARNING: | WARNING WARNING WARNING" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Host CPU has MMX code, but the assembler" >&5
+$as_echo "$as_me: WARNING: | Host CPU has MMX code, but the assembler" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: |     $CCAS $CFLAGS $CPPFLAGS" >&5
+$as_echo "$as_me: WARNING: |     $CCAS $CFLAGS $CPPFLAGS" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | has the Solaris 2.6 and 2.7 bug where register to register" >&5
+$as_echo "$as_me: WARNING: | has the Solaris 2.6 and 2.7 bug where register to register" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | movq operands are reversed." >&5
+$as_echo "$as_me: WARNING: | movq operands are reversed." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Non-MMX replacements will be used." >&5
+$as_echo "$as_me: WARNING: | Non-MMX replacements will be used." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | This will be an inferior build." >&5
+$as_echo "$as_me: WARNING: | This will be an inferior build." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+  ;;
+no)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | WARNING WARNING WARNING" >&5
+$as_echo "$as_me: WARNING: | WARNING WARNING WARNING" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Host CPU has MMX code, but it can't be assembled by" >&5
+$as_echo "$as_me: WARNING: | Host CPU has MMX code, but it can't be assembled by" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: |     $CCAS $CFLAGS $CPPFLAGS" >&5
+$as_echo "$as_me: WARNING: |     $CCAS $CFLAGS $CPPFLAGS" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Non-MMX replacements will be used." >&5
+$as_echo "$as_me: WARNING: | Non-MMX replacements will be used." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | This will be an inferior build." >&5
+$as_echo "$as_me: WARNING: | This will be an inferior build." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+  ;;
+esac
+if test "$gmp_cv_asm_x86_mmx" = yes; then
+  :
+else
+  tmp_path=
+for i in $path; do
+  case $i in
+    */*mmx*) ;;
+    *) tmp_path="$tmp_path $i" ;;
+  esac
+done
+path="$tmp_path"
+
+tmp_path=
+for i in $fat_path; do
+  case $i in
+    */*mmx*) ;;
+    *) tmp_path="$tmp_path $i" ;;
+  esac
+done
+fat_path="$tmp_path"
+
+
+fi
+ ;;
+      esac
+      case "$path $fat_path" in
+        *sse2*)  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler knows about SSE2 instructions" >&5
+$as_echo_n "checking if the assembler knows about SSE2 instructions... " >&6; }
+if test "${gmp_cv_asm_x86_sse2+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       .text
+       paddq   %mm0, %mm1
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_x86_sse2=yes
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  gmp_cv_asm_x86_sse2=no
+fi
+rm -f conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_x86_sse2" >&5
+$as_echo "$gmp_cv_asm_x86_sse2" >&6; }
+case $gmp_cv_asm_x86_sse2 in
+yes)
+  :
+  ;;
+*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | WARNING WARNING WARNING" >&5
+$as_echo "$as_me: WARNING: | WARNING WARNING WARNING" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Host CPU has SSE2 code, but it can't be assembled by" >&5
+$as_echo "$as_me: WARNING: | Host CPU has SSE2 code, but it can't be assembled by" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: |     $CCAS $CFLAGS $CPPFLAGS" >&5
+$as_echo "$as_me: WARNING: |     $CCAS $CFLAGS $CPPFLAGS" >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Non-SSE2 replacements will be used." >&5
+$as_echo "$as_me: WARNING: | Non-SSE2 replacements will be used." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | This will be an inferior build." >&5
+$as_echo "$as_me: WARNING: | This will be an inferior build." >&2;}
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+  tmp_path=
+for i in $path; do
+  case $i in
+    */sse2) ;;
+    *) tmp_path="$tmp_path $i" ;;
+  esac
+done
+path="$tmp_path"
+
+tmp_path=
+for i in $fat_path; do
+  case $i in
+    */sse2) ;;
+    *) tmp_path="$tmp_path $i" ;;
+  esac
+done
+fat_path="$tmp_path"
+
+
+  ;;
+esac
+ ;;
+      esac
+    fi
+    ;;
+esac
+
+
+cat >&5 <<EOF
+Decided:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS
+CPPFLAGS=$CPPFLAGS
+GMP_LDFLAGS=$GMP_LDFLAGS
+CXX=$CXX
+CXXFLAGS=$CXXFLAGS
+path=$path
+EOF
+echo "using ABI=\"$ABI\""
+echo "      CC=\"$CC\""
+echo "      CFLAGS=\"$CFLAGS\""
+echo "      CPPFLAGS=\"$CPPFLAGS\""
+if test $want_cxx = yes; then
+  echo "      CXX=\"$CXX\""
+  echo "      CXXFLAGS=\"$CXXFLAGS\""
+fi
+echo "      MPN_PATH=\"$path\""
+
+
+# Automake ansi2knr support.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for function prototypes" >&5
+$as_echo_n "checking for function prototypes... " >&6; }
+if test "$ac_cv_prog_cc_c89" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define PROTOTYPES 1" >>confdefs.h
+
+
+$as_echo "#define __PROTOTYPES 1" >>confdefs.h
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+                  (('a' <= (c) && (c) <= 'i') \
+                    || ('j' <= (c) && (c) <= 'r') \
+                    || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+       || toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+                 inttypes.h stdint.h unistd.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+eval as_val=\$$as_ac_Header
+   if test "x$as_val" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+if test "$ac_cv_prog_cc_stdc" != no; then
+  U= ANSI2KNR=
+else
+  U=_ ANSI2KNR=./ansi2knr
+fi
+# Ensure some checks needed by ansi2knr itself.
+
+for ac_header in string.h
+do :
+  ac_fn_c_check_header_mongrel "$LINENO" "string.h" "ac_cv_header_string_h" "$ac_includes_default"
+if test "x$ac_cv_header_string_h" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_STRING_H 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports --noexecstack option" >&5
+$as_echo_n "checking whether assembler supports --noexecstack option... " >&6; }
+if test "${cl_cv_as_noexecstack+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+    cat > conftest.c <<EOF
+void foo() {}
+EOF
+  if { ac_try='${CC} $CFLAGS $CPPFLAGS
+                     -S -o conftest.s conftest.c >/dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; } \
+     && grep .note.GNU-stack conftest.s >/dev/null \
+     && { ac_try='${CC} $CFLAGS $CPPFLAGS -Wa,--noexecstack
+                       -c -o conftest.o conftest.s >/dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+  then
+    cl_cv_as_noexecstack=yes
+  else
+    cl_cv_as_noexecstack=no
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cl_cv_as_noexecstack" >&5
+$as_echo "$cl_cv_as_noexecstack" >&6; }
+  if test "$cl_cv_as_noexecstack" = yes; then
+    ASMFLAGS="$ASMFLAGS -Wa,--noexecstack"
+  fi
+
+
+
+
+gmp_user_AR=$AR
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_AR+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_AR="${ac_tool_prefix}ar"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+  ac_ct_AR=$AR
+  # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_AR+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_AR="ar"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_AR" = x; then
+    AR="ar"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+else
+  AR="$ac_cv_prog_AR"
+fi
+
+if test -z "$gmp_user_AR"; then
+                        eval arflags=\"\$ar${abi1}_flags\"
+  test -n "$arflags" || eval arflags=\"\$ar${abi2}_flags\"
+  if test -n "$arflags"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for extra ar flags" >&5
+$as_echo_n "checking for extra ar flags... " >&6; }
+    AR="$AR $arflags"
+    ac_cv_prog_AR="$AR $arflags"
+    ac_cv_prog_ac_ct_AR="$AR $arflags"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $arflags" >&5
+$as_echo "$arflags" >&6; }
+  fi
+fi
+if test -z "$AR_FLAGS"; then
+  AR_FLAGS=cq
+fi
+
+
+gmp_user_NM=$NM
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
+$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
+if test "${lt_cv_path_NM+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+       # Check to see if the nm accepts a BSD-compat flag.
+       # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+       #   nm: unknown option "B" ignored
+       # Tru64's nm complains that /dev/null is an invalid object file
+       case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+       */dev/null* | *'Invalid file or object type'*)
+         lt_cv_path_NM="$tmp_nm -B"
+         break
+         ;;
+       *)
+         case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+         */dev/null*)
+           lt_cv_path_NM="$tmp_nm -p"
+           break
+           ;;
+         *)
+           lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+           continue # so that we can try to find one that supports BSD flags
+           ;;
+         esac
+         ;;
+       esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  : ${lt_cv_path_NM=no}
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5
+$as_echo "$lt_cv_path_NM" >&6; }
+if test "$lt_cv_path_NM" != "no"; then
+  NM="$lt_cv_path_NM"
+else
+  # Didn't find any BSD compatible name lister, look for dumpbin.
+  if test -n "$DUMPBIN"; then :
+    # Let the user override the test.
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in dumpbin "link -dump"
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_DUMPBIN+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DUMPBIN"; then
+  ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DUMPBIN=$ac_cv_prog_DUMPBIN
+if test -n "$DUMPBIN"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5
+$as_echo "$DUMPBIN" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$DUMPBIN" && break
+  done
+fi
+if test -z "$DUMPBIN"; then
+  ac_ct_DUMPBIN=$DUMPBIN
+  for ac_prog in dumpbin "link -dump"
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_DUMPBIN+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DUMPBIN"; then
+  ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN
+if test -n "$ac_ct_DUMPBIN"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5
+$as_echo "$ac_ct_DUMPBIN" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_DUMPBIN" && break
+done
+
+  if test "x$ac_ct_DUMPBIN" = x; then
+    DUMPBIN=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DUMPBIN=$ac_ct_DUMPBIN
+  fi
+fi
+
+    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
+    *COFF*)
+      DUMPBIN="$DUMPBIN -symbols"
+      ;;
+    *)
+      DUMPBIN=:
+      ;;
+    esac
+  fi
+
+  if test "$DUMPBIN" != ":"; then
+    NM="$DUMPBIN"
+  fi
+fi
+test -z "$NM" && NM=nm
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
+$as_echo_n "checking the name lister ($NM) interface... " >&6; }
+if test "${lt_cv_nm_interface+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_nm_interface="BSD nm"
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5)
+  (eval "$ac_compile" 2>conftest.err)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: output\"" >&5)
+  cat conftest.out >&5
+  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
+    lt_cv_nm_interface="MS dumpbin"
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5
+$as_echo "$lt_cv_nm_interface" >&6; }
+
+
+# FIXME: When cross compiling (ie. $ac_tool_prefix not empty), libtool
+# defaults to plain "nm" if a "${ac_tool_prefix}nm" is not found.  In this
+# case run it again to try the native "nm", firstly so that likely locations
+# are searched, secondly so that -B or -p are added if necessary for BSD
+# format.  This is necessary for instance on OSF with "./configure
+# --build=alphaev5-dec-osf --host=alphaev6-dec-osf".
+#
+if test -z "$gmp_user_NM" && test -n "$ac_tool_prefix" && test "$NM" = nm; then
+  $as_unset lt_cv_path_NM
+  gmp_save_ac_tool_prefix=$ac_tool_prefix
+  ac_tool_prefix=
+  NM=
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
+$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
+if test "${lt_cv_path_NM+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+       # Check to see if the nm accepts a BSD-compat flag.
+       # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+       #   nm: unknown option "B" ignored
+       # Tru64's nm complains that /dev/null is an invalid object file
+       case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+       */dev/null* | *'Invalid file or object type'*)
+         lt_cv_path_NM="$tmp_nm -B"
+         break
+         ;;
+       *)
+         case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+         */dev/null*)
+           lt_cv_path_NM="$tmp_nm -p"
+           break
+           ;;
+         *)
+           lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+           continue # so that we can try to find one that supports BSD flags
+           ;;
+         esac
+         ;;
+       esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  : ${lt_cv_path_NM=no}
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5
+$as_echo "$lt_cv_path_NM" >&6; }
+if test "$lt_cv_path_NM" != "no"; then
+  NM="$lt_cv_path_NM"
+else
+  # Didn't find any BSD compatible name lister, look for dumpbin.
+  if test -n "$DUMPBIN"; then :
+    # Let the user override the test.
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in dumpbin "link -dump"
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_DUMPBIN+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DUMPBIN"; then
+  ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DUMPBIN=$ac_cv_prog_DUMPBIN
+if test -n "$DUMPBIN"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5
+$as_echo "$DUMPBIN" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$DUMPBIN" && break
+  done
+fi
+if test -z "$DUMPBIN"; then
+  ac_ct_DUMPBIN=$DUMPBIN
+  for ac_prog in dumpbin "link -dump"
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_DUMPBIN+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DUMPBIN"; then
+  ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN
+if test -n "$ac_ct_DUMPBIN"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5
+$as_echo "$ac_ct_DUMPBIN" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_DUMPBIN" && break
+done
+
+  if test "x$ac_ct_DUMPBIN" = x; then
+    DUMPBIN=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DUMPBIN=$ac_ct_DUMPBIN
+  fi
+fi
+
+    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
+    *COFF*)
+      DUMPBIN="$DUMPBIN -symbols"
+      ;;
+    *)
+      DUMPBIN=:
+      ;;
+    esac
+  fi
+
+  if test "$DUMPBIN" != ":"; then
+    NM="$DUMPBIN"
+  fi
+fi
+test -z "$NM" && NM=nm
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
+$as_echo_n "checking the name lister ($NM) interface... " >&6; }
+if test "${lt_cv_nm_interface+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_nm_interface="BSD nm"
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5)
+  (eval "$ac_compile" 2>conftest.err)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: output\"" >&5)
+  cat conftest.out >&5
+  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
+    lt_cv_nm_interface="MS dumpbin"
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5
+$as_echo "$lt_cv_nm_interface" >&6; }
+
+  ac_tool_prefix=$gmp_save_ac_tool_prefix
+fi
+
+if test -z "$gmp_user_NM"; then
+                        eval nmflags=\"\$nm${abi1}_flags\"
+  test -n "$nmflags" || eval nmflags=\"\$nm${abi2}_flags\"
+  if test -n "$nmflags"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for extra nm flags" >&5
+$as_echo_n "checking for extra nm flags... " >&6; }
+    NM="$NM $nmflags"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $nmflags" >&5
+$as_echo "$nmflags" >&6; }
+  fi
+fi
+
+
+case $host in
+  # FIXME: On AIX 3 and 4, $libname.a is included in libtool
+  # $library_names_spec, so libgmp.a becomes a symlink to libgmp.so, making
+  # it impossible to build shared and static libraries simultaneously.
+  # Disable shared libraries by default, but let the user override with
+  # --enable-shared --disable-static.
+  #
+  # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*
+  # and *-*-os2* too, but wait for someone to test this before worrying
+  # about it.  If there is a problem then of course libtool is the right
+  # place to fix it.
+  #
+  *-*-aix[34]*)
+    if test -z "$enable_shared"; then enable_shared=no; fi ;;
+esac
+
+
+# Configs for Windows DLLs.
+
+enable_win32_dll=yes
+
+case $host in
+*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args.
+set dummy ${ac_tool_prefix}as; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_AS+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AS"; then
+  ac_cv_prog_AS="$AS" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_AS="${ac_tool_prefix}as"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AS=$ac_cv_prog_AS
+if test -n "$AS"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AS" >&5
+$as_echo "$AS" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AS"; then
+  ac_ct_AS=$AS
+  # Extract the first word of "as", so it can be a program name with args.
+set dummy as; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_AS+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AS"; then
+  ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_AS="as"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AS=$ac_cv_prog_ac_ct_AS
+if test -n "$ac_ct_AS"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5
+$as_echo "$ac_ct_AS" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_AS" = x; then
+    AS="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AS=$ac_ct_AS
+  fi
+else
+  AS="$ac_cv_prog_AS"
+fi
+
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DLLTOOL=$ac_cv_prog_DLLTOOL
+if test -n "$DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
+$as_echo "$DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DLLTOOL"; then
+  ac_ct_DLLTOOL=$DLLTOOL
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_DLLTOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DLLTOOL"; then
+  ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_DLLTOOL="dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
+if test -n "$ac_ct_DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
+$as_echo "$ac_ct_DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_DLLTOOL" = x; then
+    DLLTOOL="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DLLTOOL=$ac_ct_DLLTOOL
+  fi
+else
+  DLLTOOL="$ac_cv_prog_DLLTOOL"
+fi
+
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJDUMP=$ac_cv_prog_OBJDUMP
+if test -n "$OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
+$as_echo "$OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJDUMP"; then
+  ac_ct_OBJDUMP=$OBJDUMP
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_OBJDUMP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OBJDUMP"; then
+  ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_OBJDUMP="objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
+if test -n "$ac_ct_OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
+$as_echo "$ac_ct_OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJDUMP" = x; then
+    OBJDUMP="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJDUMP=$ac_ct_OBJDUMP
+  fi
+else
+  OBJDUMP="$ac_cv_prog_OBJDUMP"
+fi
+
+  ;;
+esac
+
+test -z "$AS" && AS=as
+
+
+
+
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+
+
+
+
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+
+
+
+
+
+
+
+
+LIBGMP_DLL=0
+
+case $host in
+  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+    # By default, build only static.
+    if test -z "$enable_shared"; then
+      enable_shared=no
+    fi
+    # Don't allow both static and DLL.
+    if test "$enable_shared" != no && test "$enable_static" != no; then
+      as_fn_error "cannot build both static and DLL, since gmp.h is different for each.
+Use \"--disable-static --enable-shared\" to build just a DLL." "$LINENO" 5
+    fi
+
+    # "-no-undefined" is required when building a DLL, see documentation on
+    # AC_LIBTOOL_WIN32_DLL.
+    #
+    # "-Wl,--export-all-symbols" is a bit of a hack, it gets all libgmp and
+    # libgmpxx functions and variables exported.  This is what libtool did
+    # in the past, and it's convenient for us in the test programs.
+    #
+    # Maybe it'd be prudent to check for --export-all-symbols before using
+    # it, but it seems to have been in ld since at least 2000, and there's
+    # not really any alternative we want to take up at the moment.
+    #
+    # "-Wl,output-def" is used to get a .def file for use by MS lib to make
+    # a .lib import library, described in the manual.  libgmp-3.dll.def
+    # corresponds to the libmp-3.dll.def generated by libtool (as a result
+    # of -export-symbols on that library).
+    #
+    # Incidentally, libtool does generate an import library libgmp.dll.a,
+    # but it's "ar" format and cannot be used by the MS linker.  There
+    # doesn't seem to be any GNU tool for generating or converting to .lib.
+    #
+    # FIXME: The .def files produced by -Wl,output-def include isascii,
+    # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't
+    # inline isascii (used in gmp).  It gives an extern inline for
+    # __isascii, but for some reason not the plain isascii.
+    #
+    if test "$enable_shared" = yes; then
+      GMP_LDFLAGS="$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols"
+      LIBGMP_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmp-3.dll.def"
+      LIBGMPXX_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmpxx-3.dll.def"
+      LIBGMP_DLL=1
+    fi
+    ;;
+esac
+
+
+# Ensure that $CONFIG_SHELL is available for AC_LIBTOOL_SYS_MAX_CMD_LEN.
+# It's often set already by _LT_AC_PROG_ECHO_BACKSLASH or
+# _AS_LINENO_PREPARE, but not always.
+#
+# The symptom of CONFIG_SHELL unset is some "expr" errors during the test,
+# and an empty result.  This only happens when invoked as "sh configure",
+# ie. no path, and can be seen for instance on ia64-*-hpux*.
+#
+# FIXME: Newer libtool should have it's own fix for this.
+#
+if test -z "$CONFIG_SHELL"; then
+  CONFIG_SHELL=$SHELL
+fi
+
+# Enable CXX in libtool only if we want it, and never enable GCJ, nor RC on
+# mingw and cygwin.  Under --disable-cxx this avoids some error messages
+# from libtool arising from the fact we didn't actually run AC_PROG_CXX.
+# Notice that any user-supplied --with-tags setting takes precedence.
+#
+# FIXME: Is this the right way to get this effect?  Very possibly not, but
+# the current _LT_AC_TAGCONFIG doesn't really suggest an alternative.
+#
+if test "${with_tags+set}" != set; then
+  if test $want_cxx = yes; then
+    with_tags=CXX
+  else
+    with_tags=
+  fi
+fi
+
+# The dead hand of AC_REQUIRE makes AC_PROG_LIBTOOL expand and execute
+# AC_PROG_F77, even when F77 is not in the selected with_tags.  This is
+# probably harmless, but it's unsightly and bloats our configure, so pretend
+# AC_PROG_F77 has been expanded already.
+#
+# FIXME: Rumour has it libtool will one day provide a way for a configure.in
+# to say what it wants from among supported languages etc.
+#
+
+
+case `pwd` in
+  *\ * | *\    *)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
+$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;;
+esac
+
+
+
+macro_version='2.4.2'
+macro_revision='1.3337'
+
+
+
+
+
+
+
+
+
+
+
+
+
+ltmain="$ac_aux_dir/ltmain.sh"
+
+# Backslashify metacharacters that are still active within
+# double-quoted strings.
+sed_quote_subst='s/\(["`$\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\(["`\\]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to delay expansion of an escaped single quote.
+delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+
+ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5
+$as_echo_n "checking how to print strings... " >&6; }
+# Test print first, because it will be a builtin if present.
+if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
+   test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='print -r --'
+elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='printf %s\n'
+else
+  # Use this function as a fallback that always works.
+  func_fallback_echo ()
+  {
+    eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+  }
+  ECHO='func_fallback_echo'
+fi
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO ""
+}
+
+case "$ECHO" in
+  printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5
+$as_echo "printf" >&6; } ;;
+  print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5
+$as_echo "print -r" >&6; } ;;
+  *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5
+$as_echo "cat" >&6; } ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
+$as_echo_n "checking for a sed that does not truncate output... " >&6; }
+if test "${ac_cv_path_SED+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+            ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
+     for ac_i in 1 2 3 4 5 6 7; do
+       ac_script="$ac_script$as_nl$ac_script"
+     done
+     echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed
+     { ac_script=; unset ac_script;}
+     if test -z "$SED"; then
+  ac_path_SED_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
+      { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+# Check for GNU ac_path_SED and select it if it is found.
+  # Check for GNU $ac_path_SED
+case `"$ac_path_SED" --version 2>&1` in
+*GNU*)
+  ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo '' >> "conftest.nl"
+    "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_SED_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_SED="$ac_path_SED"
+      ac_path_SED_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_SED_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_SED"; then
+    as_fn_error "no acceptable sed could be found in \$PATH" "$LINENO" 5
+  fi
+else
+  ac_cv_path_SED=$SED
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5
+$as_echo "$ac_cv_path_SED" >&6; }
+ SED="$ac_cv_path_SED"
+  rm -f conftest.sed
+
+test -z "$SED" && SED=sed
+Xsed="$SED -e 1s/^X//"
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
+$as_echo_n "checking for fgrep... " >&6; }
+if test "${ac_cv_path_FGREP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
+   then ac_cv_path_FGREP="$GREP -F"
+   else
+     if test -z "$FGREP"; then
+  ac_path_FGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in fgrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
+      { test -f "$ac_path_FGREP" && $as_test_x "$ac_path_FGREP"; } || continue
+# Check for GNU ac_path_FGREP and select it if it is found.
+  # Check for GNU $ac_path_FGREP
+case `"$ac_path_FGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'FGREP' >> "conftest.nl"
+    "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_FGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_FGREP="$ac_path_FGREP"
+      ac_path_FGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_FGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_FGREP"; then
+    as_fn_error "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_FGREP=$FGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5
+$as_echo "$ac_cv_path_FGREP" >&6; }
+ FGREP="$ac_cv_path_FGREP"
+
+
+test -z "$GREP" && GREP=grep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Check whether --with-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then :
+  withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
+else
+  with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5
+$as_echo_n "checking for ld used by $CC... " >&6; }
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [\\/]* | ?:[\\/]*)
+      re_direlt='/[^/][^/]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+       ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
+$as_echo_n "checking for GNU ld... " >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
+$as_echo_n "checking for non-GNU ld... " >&6; }
+fi
+if test "${lt_cv_path_LD+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+       test "$with_gnu_ld" != no && break
+       ;;
+      *)
+       test "$with_gnu_ld" != yes && break
+       ;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
+$as_echo "$LD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+test -z "$LD" && as_fn_error "no acceptable ld found in \$PATH" "$LINENO" 5
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
+$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
+if test "${lt_cv_prog_gnu_ld+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  # I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
+$as_echo "$lt_cv_prog_gnu_ld" >&6; }
+with_gnu_ld=$lt_cv_prog_gnu_ld
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
+$as_echo_n "checking whether ln -s works... " >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
+$as_echo "no, using $LN_S" >&6; }
+fi
+
+# find the maximum length of command line arguments
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5
+$as_echo_n "checking the maximum length of command line arguments... " >&6; }
+if test "${lt_cv_sys_max_cmd_len+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+    i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw* | cegcc*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  mint*)
+    # On MiNT this can take a long time and run out of memory.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536      # usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  os2*)
+    # The test takes a long time on OS/2.
+    lt_cv_sys_max_cmd_len=8192
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[         ]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
+    if test -n "$lt_cv_sys_max_cmd_len"; then
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    else
+      # Make teststring a little bigger before we do anything with it.
+      # a 1K string should be a reasonable start.
+      for i in 1 2 3 4 5 6 7 8 ; do
+        teststring=$teststring$teststring
+      done
+      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+      # If test is not a shell built-in, we'll probably end up computing a
+      # maximum length that is only half of the actual maximum length, but
+      # we can't tell.
+      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
+                = "X$teststring$teststring"; } >/dev/null 2>&1 &&
+             test $i != 17 # 1/2 MB should be enough
+      do
+        i=`expr $i + 1`
+        teststring=$teststring$teststring
+      done
+      # Only check the string length outside the loop.
+      lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
+      teststring=
+      # Add a significant safety factor because C++ compilers can tack on
+      # massive amounts of additional arguments before passing them to the
+      # linker.  It appears as though 1/2 is a usable value.
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    fi
+    ;;
+  esac
+
+fi
+
+if test -n $lt_cv_sys_max_cmd_len ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5
+$as_echo "$lt_cv_sys_max_cmd_len" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5
+$as_echo "none" >&6; }
+fi
+max_cmd_len=$lt_cv_sys_max_cmd_len
+
+
+
+
+
+
+: ${CP="cp -f"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5
+$as_echo_n "checking whether the shell understands some XSI constructs... " >&6; }
+# Try some XSI features
+xsi_shell=no
+( _lt_dummy="a/b/c"
+  test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
+      = c,a/b,b/c, \
+    && eval 'test $(( 1 + 1 )) -eq 2 \
+    && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
+  && xsi_shell=yes
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5
+$as_echo "$xsi_shell" >&6; }
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5
+$as_echo_n "checking whether the shell understands \"+=\"... " >&6; }
+lt_shell_append=no
+( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \
+    >/dev/null 2>&1 \
+  && lt_shell_append=yes
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5
+$as_echo "$lt_shell_append" >&6; }
+
+
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  lt_unset=unset
+else
+  lt_unset=false
+fi
+
+
+
+
+
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  lt_SP2NL='tr \040 \012'
+  lt_NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  lt_SP2NL='tr \100 \n'
+  lt_NL2SP='tr \r\n \100\100'
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5
+$as_echo_n "checking how to convert $build file names to $host format... " >&6; }
+if test "${lt_cv_to_host_file_cmd+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
+        ;;
+    esac
+    ;;
+  *-*-cygwin* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_noop
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
+        ;;
+    esac
+    ;;
+  * ) # unhandled hosts (and "normal" native builds)
+    lt_cv_to_host_file_cmd=func_convert_file_noop
+    ;;
+esac
+
+fi
+
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5
+$as_echo "$lt_cv_to_host_file_cmd" >&6; }
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5
+$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; }
+if test "${lt_cv_to_tool_file_cmd+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  #assume ordinary cross tools, or native build.
+lt_cv_to_tool_file_cmd=func_convert_file_noop
+case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
+        ;;
+    esac
+    ;;
+esac
+
+fi
+
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5
+$as_echo "$lt_cv_to_tool_file_cmd" >&6; }
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5
+$as_echo_n "checking for $LD option to reload object files... " >&6; }
+if test "${lt_cv_ld_reload_flag+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ld_reload_flag='-r'
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5
+$as_echo "$lt_cv_ld_reload_flag" >&6; }
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    if test "$GCC" != yes; then
+      reload_cmds=false
+    fi
+    ;;
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJDUMP=$ac_cv_prog_OBJDUMP
+if test -n "$OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
+$as_echo "$OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJDUMP"; then
+  ac_ct_OBJDUMP=$OBJDUMP
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_OBJDUMP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OBJDUMP"; then
+  ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_OBJDUMP="objdump"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
+if test -n "$ac_ct_OBJDUMP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
+$as_echo "$ac_ct_OBJDUMP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJDUMP" = x; then
+    OBJDUMP="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJDUMP=$ac_ct_OBJDUMP
+  fi
+else
+  OBJDUMP="$ac_cv_prog_OBJDUMP"
+fi
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5
+$as_echo_n "checking how to recognize dependent libraries... " >&6; }
+if test "${lt_cv_deplibs_check_method+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix[4-9]*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[45]*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump',
+  # unless we find 'file', for example because we are cross-compiling.
+  # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
+  if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
+    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+    lt_cv_file_magic_cmd='func_win32_libid'
+  else
+    # Keep this pattern in sync with the one in func_win32_libid.
+    lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
+    lt_cv_file_magic_cmd='$OBJDUMP -f'
+  fi
+  ;;
+
+cegcc*)
+  # use the weaker test based on 'objdump'. See mingw*.
+  lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | dragonfly*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+haiku*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix[3-9]*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+*nto* | *qnx*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+rdos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+tpf*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5
+$as_echo "$lt_cv_deplibs_check_method" >&6; }
+
+file_magic_glob=
+want_nocaseglob=no
+if test "$build" = "$host"; then
+  case $host_os in
+  mingw* | pw32*)
+    if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
+      want_nocaseglob=yes
+    else
+      file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"`
+    fi
+    ;;
+  esac
+fi
+
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DLLTOOL=$ac_cv_prog_DLLTOOL
+if test -n "$DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
+$as_echo "$DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DLLTOOL"; then
+  ac_ct_DLLTOOL=$DLLTOOL
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_DLLTOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DLLTOOL"; then
+  ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_DLLTOOL="dlltool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
+if test -n "$ac_ct_DLLTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
+$as_echo "$ac_ct_DLLTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_DLLTOOL" = x; then
+    DLLTOOL="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DLLTOOL=$ac_ct_DLLTOOL
+  fi
+else
+  DLLTOOL="$ac_cv_prog_DLLTOOL"
+fi
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5
+$as_echo_n "checking how to associate runtime and link libraries... " >&6; }
+if test "${lt_cv_sharedlib_from_linklib_cmd+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_sharedlib_from_linklib_cmd='unknown'
+
+case $host_os in
+cygwin* | mingw* | pw32* | cegcc*)
+  # two different shell functions defined in ltmain.sh
+  # decide which to use based on capabilities of $DLLTOOL
+  case `$DLLTOOL --help 2>&1` in
+  *--identify-strict*)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
+    ;;
+  *)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
+    ;;
+  esac
+  ;;
+*)
+  # fallback: assume linklib IS sharedlib
+  lt_cv_sharedlib_from_linklib_cmd="$ECHO"
+  ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5
+$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; }
+sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
+test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  for ac_prog in ar
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_AR+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+$as_echo "$AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$AR" && break
+  done
+fi
+if test -z "$AR"; then
+  ac_ct_AR=$AR
+  for ac_prog in ar
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_AR+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_AR="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+$as_echo "$ac_ct_AR" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_AR" && break
+done
+
+  if test "x$ac_ct_AR" = x; then
+    AR="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+fi
+
+: ${AR=ar}
+: ${AR_FLAGS=cru}
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5
+$as_echo_n "checking for archiver @FILE support... " >&6; }
+if test "${lt_cv_ar_at_file+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ar_at_file=no
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  echo conftest.$ac_objext > conftest.lst
+      lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5'
+      { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
+  (eval $lt_ar_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+      if test "$ac_status" -eq 0; then
+       # Ensure the archiver fails upon bogus file names.
+       rm -f conftest.$ac_objext libconftest.a
+       { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
+  (eval $lt_ar_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+       if test "$ac_status" -ne 0; then
+          lt_cv_ar_at_file=@
+        fi
+      fi
+      rm -f conftest.* libconftest.a
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5
+$as_echo "$lt_cv_ar_at_file" >&6; }
+
+if test "x$lt_cv_ar_at_file" = xno; then
+  archiver_list_spec=
+else
+  archiver_list_spec=$lt_cv_ar_at_file
+fi
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_STRIP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+$as_echo "$STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+$as_echo "$ac_ct_STRIP" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+test -z "$STRIP" && STRIP=:
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_RANLIB+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+$as_echo "$RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+$as_echo "$ac_ct_RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+test -z "$RANLIB" && RANLIB=:
+
+
+
+
+
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
+fi
+
+case $host_os in
+  darwin*)
+    lock_old_archive_extraction=yes ;;
+  *)
+    lock_old_archive_extraction=no ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5
+$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; }
+if test "${lt_cv_sys_global_symbol_pipe+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[BCDT]'
+  ;;
+cygwin* | mingw* | pw32* | cegcc*)
+  symcode='[ABCDGISTW]'
+  ;;
+hpux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDEGRST]'
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[BCDEGRST]'
+  ;;
+osf*)
+  symcode='[BCDEGQRST]'
+  ;;
+solaris*)
+  symcode='[BDRT]'
+  ;;
+sco3.2v5*)
+  symcode='[DT]'
+  ;;
+sysv4.2uw2*)
+  symcode='[DT]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[ABDT]'
+  ;;
+sysv4)
+  symcode='[DFNSTU]'
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[ABCDGIRSTW]' ;;
+esac
+
+# Transform an extracted symbol line into a proper C declaration.
+# Some systems (esp. on ia64) link data and code symbols differently,
+# so use this general approach.
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (void *) \&\2},/p'"
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/  {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"lib\2\", (void *) \&\2},/p'"
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# Try without a prefix underscore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+    # Fake it for dumpbin and say T for any non-static function
+    # and D for any global variable.
+    # Also find C++ and __fastcall symbols from MSVC++,
+    # which start with @ or ?.
+    lt_cv_sys_global_symbol_pipe="$AWK '"\
+"     {last_section=section; section=\$ 3};"\
+"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
+"     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
+"     \$ 0!~/External *\|/{next};"\
+"     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
+"     {if(hide[section]) next};"\
+"     {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
+"     {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
+"     s[1]~/^[@?]/{print s[1], s[1]; next};"\
+"     s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
+"     ' prfx=^$ac_symprfx"
+  else
+    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[     ]\($symcode$symcode*\)[         ][      ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+  fi
+  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<_LT_EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(void);
+void nm_test_func(void){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+_LT_EOF
+
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5
+  (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+       mv -f "$nlist"T "$nlist"
+      else
+       rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
+       if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
+         cat <<_LT_EOF > conftest.$ac_ext
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT_DLSYM_CONST
+#else
+# define LT_DLSYM_CONST const
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_LT_EOF
+         # Now generate the symbol file.
+         eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
+
+         cat <<_LT_EOF >> conftest.$ac_ext
+
+/* The mapping between symbol names and symbols.  */
+LT_DLSYM_CONST struct {
+  const char *name;
+  void       *address;
+}
+lt__PROGRAM__LTX_preloaded_symbols[] =
+{
+  { "@PROGRAM@", (void *) 0 },
+_LT_EOF
+         $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
+         cat <<\_LT_EOF >> conftest.$ac_ext
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt__PROGRAM__LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+_LT_EOF
+         # Now try linking the two files.
+         mv conftest.$ac_objext conftstm.$ac_objext
+         lt_globsym_save_LIBS=$LIBS
+         lt_globsym_save_CFLAGS=$CFLAGS
+         LIBS="conftstm.$ac_objext"
+         CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag"
+         if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext}; then
+           pipe_works=yes
+         fi
+         LIBS=$lt_globsym_save_LIBS
+         CFLAGS=$lt_globsym_save_CFLAGS
+       else
+         echo "cannot find nm_test_func in $nlist" >&5
+       fi
+      else
+       echo "cannot find nm_test_var in $nlist" >&5
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "$progname: failed program was:" >&5
+    cat conftest.$ac_ext >&5
+  fi
+  rm -rf conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+
+fi
+
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5
+$as_echo "failed" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
+$as_echo "ok" >&6; }
+fi
+
+# Response file support.
+if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+  nm_file_list_spec='@'
+elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then
+  nm_file_list_spec='@'
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5
+$as_echo_n "checking for sysroot... " >&6; }
+
+# Check whether --with-sysroot was given.
+if test "${with_sysroot+set}" = set; then :
+  withval=$with_sysroot;
+else
+  with_sysroot=no
+fi
+
+
+lt_sysroot=
+case ${with_sysroot} in #(
+ yes)
+   if test "$GCC" = yes; then
+     lt_sysroot=`$CC --print-sysroot 2>/dev/null`
+   fi
+   ;; #(
+ /*)
+   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
+   ;; #(
+ no|'')
+   ;; #(
+ *)
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5
+$as_echo "${with_sysroot}" >&6; }
+   as_fn_error "The sysroot must be an absolute path." "$LINENO" 5
+   ;;
+esac
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5
+$as_echo "${lt_sysroot:-no}" >&6; }
+
+
+
+
+
+# Check whether --enable-libtool-lock was given.
+if test "${enable_libtool_lock+set}" = set; then :
+  enableval=$enable_libtool_lock;
+fi
+
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.$ac_objext` in
+      *ELF-32*)
+       HPUX_IA64_MODE="32"
+       ;;
+      *ELF-64*)
+       HPUX_IA64_MODE="64"
+       ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '#line '$LINENO' "configure"' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$lt_cv_prog_gnu_ld" = yes; then
+      case `/usr/bin/file conftest.$ac_objext` in
+       *32-bit*)
+         LD="${LD-ld} -melf32bsmip"
+         ;;
+       *N32*)
+         LD="${LD-ld} -melf32bmipn32"
+         ;;
+       *64-bit*)
+         LD="${LD-ld} -melf64bmip"
+       ;;
+      esac
+    else
+      case `/usr/bin/file conftest.$ac_objext` in
+       *32-bit*)
+         LD="${LD-ld} -32"
+         ;;
+       *N32*)
+         LD="${LD-ld} -n32"
+         ;;
+       *64-bit*)
+         LD="${LD-ld} -64"
+         ;;
+      esac
+    fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.o` in
+      *32-bit*)
+       case $host in
+         x86_64-*kfreebsd*-gnu)
+           LD="${LD-ld} -m elf_i386_fbsd"
+           ;;
+         x86_64-*linux*)
+           LD="${LD-ld} -m elf_i386"
+           ;;
+         ppc64-*linux*|powerpc64-*linux*)
+           LD="${LD-ld} -m elf32ppclinux"
+           ;;
+         s390x-*linux*)
+           LD="${LD-ld} -m elf_s390"
+           ;;
+         sparc64-*linux*)
+           LD="${LD-ld} -m elf32_sparc"
+           ;;
+       esac
+       ;;
+      *64-bit*)
+       case $host in
+         x86_64-*kfreebsd*-gnu)
+           LD="${LD-ld} -m elf_x86_64_fbsd"
+           ;;
+         x86_64-*linux*)
+           LD="${LD-ld} -m elf_x86_64"
+           ;;
+         ppc*-*linux*|powerpc*-*linux*)
+           LD="${LD-ld} -m elf64ppc"
+           ;;
+         s390*-*linux*|s390*-*tpf*)
+           LD="${LD-ld} -m elf64_s390"
+           ;;
+         sparc*-*linux*)
+           LD="${LD-ld} -m elf64_sparc"
+           ;;
+       esac
+       ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5
+$as_echo_n "checking whether the C compiler needs -belf... " >&6; }
+if test "${lt_cv_cc_needs_belf+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+     cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  lt_cv_cc_needs_belf=yes
+else
+  lt_cv_cc_needs_belf=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+     ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5
+$as_echo "$lt_cv_cc_needs_belf" >&6; }
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*)
+        case $host in
+        i?86-*-solaris*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        sparc*-*-solaris*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+        esac
+        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
+        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+          LD="${LD-ld}_sol2"
+        fi
+        ;;
+      *)
+       if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
+         LD="${LD-ld} -64"
+       fi
+       ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+esac
+
+need_locks="$enable_libtool_lock"
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args.
+set dummy ${ac_tool_prefix}mt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_MANIFEST_TOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$MANIFEST_TOOL"; then
+  ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL
+if test -n "$MANIFEST_TOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5
+$as_echo "$MANIFEST_TOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_MANIFEST_TOOL"; then
+  ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL
+  # Extract the first word of "mt", so it can be a program name with args.
+set dummy mt; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_MANIFEST_TOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_MANIFEST_TOOL"; then
+  ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL
+if test -n "$ac_ct_MANIFEST_TOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5
+$as_echo "$ac_ct_MANIFEST_TOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_MANIFEST_TOOL" = x; then
+    MANIFEST_TOOL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL
+  fi
+else
+  MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL"
+fi
+
+test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5
+$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; }
+if test "${lt_cv_path_mainfest_tool+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_path_mainfest_tool=no
+  echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5
+  $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
+  cat conftest.err >&5
+  if $GREP 'Manifest Tool' conftest.out > /dev/null; then
+    lt_cv_path_mainfest_tool=yes
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5
+$as_echo "$lt_cv_path_mainfest_tool" >&6; }
+if test "x$lt_cv_path_mainfest_tool" != xyes; then
+  MANIFEST_TOOL=:
+fi
+
+
+
+
+
+
+  case $host_os in
+    rhapsody* | darwin*)
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args.
+set dummy ${ac_tool_prefix}dsymutil; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_DSYMUTIL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$DSYMUTIL"; then
+  ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DSYMUTIL=$ac_cv_prog_DSYMUTIL
+if test -n "$DSYMUTIL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5
+$as_echo "$DSYMUTIL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DSYMUTIL"; then
+  ac_ct_DSYMUTIL=$DSYMUTIL
+  # Extract the first word of "dsymutil", so it can be a program name with args.
+set dummy dsymutil; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_DSYMUTIL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_DSYMUTIL"; then
+  ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL
+if test -n "$ac_ct_DSYMUTIL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5
+$as_echo "$ac_ct_DSYMUTIL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_DSYMUTIL" = x; then
+    DSYMUTIL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DSYMUTIL=$ac_ct_DSYMUTIL
+  fi
+else
+  DSYMUTIL="$ac_cv_prog_DSYMUTIL"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args.
+set dummy ${ac_tool_prefix}nmedit; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_NMEDIT+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$NMEDIT"; then
+  ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+NMEDIT=$ac_cv_prog_NMEDIT
+if test -n "$NMEDIT"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5
+$as_echo "$NMEDIT" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_NMEDIT"; then
+  ac_ct_NMEDIT=$NMEDIT
+  # Extract the first word of "nmedit", so it can be a program name with args.
+set dummy nmedit; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_NMEDIT+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_NMEDIT"; then
+  ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_NMEDIT="nmedit"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT
+if test -n "$ac_ct_NMEDIT"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5
+$as_echo "$ac_ct_NMEDIT" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_NMEDIT" = x; then
+    NMEDIT=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    NMEDIT=$ac_ct_NMEDIT
+  fi
+else
+  NMEDIT="$ac_cv_prog_NMEDIT"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args.
+set dummy ${ac_tool_prefix}lipo; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_LIPO+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$LIPO"; then
+  ac_cv_prog_LIPO="$LIPO" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+LIPO=$ac_cv_prog_LIPO
+if test -n "$LIPO"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5
+$as_echo "$LIPO" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_LIPO"; then
+  ac_ct_LIPO=$LIPO
+  # Extract the first word of "lipo", so it can be a program name with args.
+set dummy lipo; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_LIPO+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_LIPO"; then
+  ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_LIPO="lipo"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO
+if test -n "$ac_ct_LIPO"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5
+$as_echo "$ac_ct_LIPO" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_LIPO" = x; then
+    LIPO=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    LIPO=$ac_ct_LIPO
+  fi
+else
+  LIPO="$ac_cv_prog_LIPO"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args.
+set dummy ${ac_tool_prefix}otool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_OTOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL=$ac_cv_prog_OTOOL
+if test -n "$OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
+$as_echo "$OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OTOOL"; then
+  ac_ct_OTOOL=$OTOOL
+  # Extract the first word of "otool", so it can be a program name with args.
+set dummy otool; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_OTOOL+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OTOOL"; then
+  ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_OTOOL="otool"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL
+if test -n "$ac_ct_OTOOL"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5
+$as_echo "$ac_ct_OTOOL" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OTOOL" = x; then
+    OTOOL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OTOOL=$ac_ct_OTOOL
+  fi
+else
+  OTOOL="$ac_cv_prog_OTOOL"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args.
+set dummy ${ac_tool_prefix}otool64; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_OTOOL64+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$OTOOL64"; then
+  ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL64=$ac_cv_prog_OTOOL64
+if test -n "$OTOOL64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5
+$as_echo "$OTOOL64" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OTOOL64"; then
+  ac_ct_OTOOL64=$OTOOL64
+  # Extract the first word of "otool64", so it can be a program name with args.
+set dummy otool64; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_OTOOL64+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_OTOOL64"; then
+  ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_OTOOL64="otool64"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64
+if test -n "$ac_ct_OTOOL64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5
+$as_echo "$ac_ct_OTOOL64" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_OTOOL64" = x; then
+    OTOOL64=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OTOOL64=$ac_ct_OTOOL64
+  fi
+else
+  OTOOL64="$ac_cv_prog_OTOOL64"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5
+$as_echo_n "checking for -single_module linker flag... " >&6; }
+if test "${lt_cv_apple_cc_single_mod+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_apple_cc_single_mod=no
+      if test -z "${LT_MULTI_MODULE}"; then
+       # By default we will add the -single_module flag. You can override
+       # by either setting the environment variable LT_MULTI_MODULE
+       # non-empty at configure time, or by adding -multi_module to the
+       # link flags.
+       rm -rf libconftest.dylib*
+       echo "int foo(void){return 1;}" > conftest.c
+       echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+-dynamiclib -Wl,-single_module conftest.c" >&5
+       $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+         -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
+        _lt_result=$?
+       # If there is a non-empty error log, and "single_module"
+       # appears in it, assume the flag caused a linker warning
+        if test -s conftest.err && $GREP single_module conftest.err; then
+         cat conftest.err >&5
+       # Otherwise, if the output was created with a 0 exit code from
+       # the compiler, it worked.
+       elif test -f libconftest.dylib && test $_lt_result -eq 0; then
+         lt_cv_apple_cc_single_mod=yes
+       else
+         cat conftest.err >&5
+       fi
+       rm -rf libconftest.dylib*
+       rm -f conftest.*
+      fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
+$as_echo "$lt_cv_apple_cc_single_mod" >&6; }
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
+$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
+if test "${lt_cv_ld_exported_symbols_list+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ld_exported_symbols_list=no
+      save_LDFLAGS=$LDFLAGS
+      echo "_main" > conftest.sym
+      LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  lt_cv_ld_exported_symbols_list=yes
+else
+  lt_cv_ld_exported_symbols_list=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+       LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
+$as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
+$as_echo_n "checking for -force_load linker flag... " >&6; }
+if test "${lt_cv_ld_force_load+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_ld_force_load=no
+      cat > conftest.c << _LT_EOF
+int forced_loaded() { return 2;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5
+      $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5
+      echo "$AR cru libconftest.a conftest.o" >&5
+      $AR cru libconftest.a conftest.o 2>&5
+      echo "$RANLIB libconftest.a" >&5
+      $RANLIB libconftest.a 2>&5
+      cat > conftest.c << _LT_EOF
+int main() { return 0;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
+      $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
+      _lt_result=$?
+      if test -s conftest.err && $GREP force_load conftest.err; then
+       cat conftest.err >&5
+      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
+       lt_cv_ld_force_load=yes
+      else
+       cat conftest.err >&5
+      fi
+        rm -f conftest.err libconftest.a conftest conftest.c
+        rm -rf conftest.dSYM
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5
+$as_echo "$lt_cv_ld_force_load" >&6; }
+    case $host_os in
+    rhapsody* | darwin1.[012])
+      _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
+    darwin1.*)
+      _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+    darwin*) # darwin 5.x on
+      # if running on 10.5 or later, the deployment target defaults
+      # to the OS version, if on x86, and 10.4, the deployment
+      # target defaults to 10.4. Don't you love it?
+      case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
+       10.0,*86*-darwin8*|10.0,*-darwin[91]*)
+         _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+       10.[012]*)
+         _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
+       10.*)
+         _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+      esac
+    ;;
+  esac
+    if test "$lt_cv_apple_cc_single_mod" = "yes"; then
+      _lt_dar_single_mod='$single_module'
+    fi
+    if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
+      _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
+    else
+      _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
+    fi
+    if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
+      _lt_dsymutil='~$DSYMUTIL $lib || :'
+    else
+      _lt_dsymutil=
+    fi
+    ;;
+  esac
+
+for ac_header in dlfcn.h
+do :
+  ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default
+"
+if test "x$ac_cv_header_dlfcn_h" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DLFCN_H 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+func_stripname_cnf ()
+{
+  case ${2} in
+  .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
+  *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
+  esac
+} # func_stripname_cnf
+
+
+
+
+
+
+# Set options
+
+
+
+        enable_dlopen=no
+
+
+
+            # Check whether --enable-shared was given.
+if test "${enable_shared+set}" = set; then :
+  enableval=$enable_shared; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_shared=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_shared=yes
+fi
+
+
+
+
+
+
+
+
+
+  # Check whether --enable-static was given.
+if test "${enable_static+set}" = set; then :
+  enableval=$enable_static; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_static=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_static=yes
+fi
+
+
+
+
+
+
+
+
+
+
+# Check whether --with-pic was given.
+if test "${with_pic+set}" = set; then :
+  withval=$with_pic; lt_p=${PACKAGE-default}
+    case $withval in
+    yes|no) pic_mode=$withval ;;
+    *)
+      pic_mode=default
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for lt_pkg in $withval; do
+       IFS="$lt_save_ifs"
+       if test "X$lt_pkg" = "X$lt_p"; then
+         pic_mode=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  pic_mode=default
+fi
+
+
+test -z "$pic_mode" && pic_mode=default
+
+
+
+
+
+
+
+  # Check whether --enable-fast-install was given.
+if test "${enable_fast_install+set}" = set; then :
+  enableval=$enable_fast_install; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_fast_install=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_fast_install=yes
+fi
+
+
+
+
+
+
+
+
+
+
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ltmain"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+test -z "$LN_S" && LN_S="ln -s"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test -n "${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
+$as_echo_n "checking for objdir... " >&6; }
+if test "${lt_cv_objdir+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5
+$as_echo "$lt_cv_objdir" >&6; }
+objdir=$lt_cv_objdir
+
+
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define LT_OBJDIR "$lt_cv_objdir/"
+_ACEOF
+
+
+
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Global variables:
+ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$LD" && LD=ld
+test -z "$ac_objext" && ac_objext=o
+
+for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+
+# Only perform the check for file, if the check method requires it
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5
+$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; }
+if test "${lt_cv_path_MAGIC_CMD+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $MAGIC_CMD in
+[\\/*] |  ?:[\\/]*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/${ac_tool_prefix}file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           $EGREP "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
+$as_echo "$MAGIC_CMD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+
+
+
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5
+$as_echo_n "checking for file... " >&6; }
+if test "${lt_cv_path_MAGIC_CMD+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $MAGIC_CMD in
+[\\/*] |  ?:[\\/]*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/file"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           $EGREP "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
+$as_echo "$MAGIC_CMD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  else
+    MAGIC_CMD=:
+  fi
+fi
+
+  fi
+  ;;
+esac
+
+# Use C for the default configuration in the libtool script
+
+lt_save_CC="$CC"
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+objext=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}'
+
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+# Save the default compiler, since it gets overwritten when the other
+# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
+compiler_DEFAULT=$CC
+
+# save warnings/boilerplate of simple test code
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+if test -n "$compiler"; then
+
+lt_prog_compiler_no_builtin_flag=
+
+if test "$GCC" = yes; then
+  case $cc_basename in
+  nvcc*)
+    lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;;
+  *)
+    lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;;
+  esac
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
+$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; }
+if test "${lt_cv_prog_compiler_rtti_exceptions+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_rtti_exceptions=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="-fno-rtti -fno-exceptions"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_rtti_exceptions=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5
+$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; }
+
+if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then
+    lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions"
+else
+    :
+fi
+
+fi
+
+
+
+
+
+
+  lt_prog_compiler_wl=
+lt_prog_compiler_pic=
+lt_prog_compiler_static=
+
+
+  if test "$GCC" = yes; then
+    lt_prog_compiler_wl='-Wl,'
+    lt_prog_compiler_static='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            lt_prog_compiler_pic='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      lt_prog_compiler_pic='-DDLL_EXPORT'
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_prog_compiler_pic='-fno-common'
+      ;;
+
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      lt_prog_compiler_static=
+      ;;
+
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+       # +Z the default
+       ;;
+      *)
+       lt_prog_compiler_pic='-fPIC'
+       ;;
+      esac
+      ;;
+
+    interix[3-9]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      lt_prog_compiler_can_build_shared=no
+      enable_shared=no
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic='-fPIC -shared'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       lt_prog_compiler_pic=-Kconform_pic
+      fi
+      ;;
+
+    *)
+      lt_prog_compiler_pic='-fPIC'
+      ;;
+    esac
+
+    case $cc_basename in
+    nvcc*) # Cuda Compiler Driver 2.2
+      lt_prog_compiler_wl='-Xlinker '
+      if test -n "$lt_prog_compiler_pic"; then
+        lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+      fi
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      lt_prog_compiler_wl='-Wl,'
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static='-Bstatic'
+      else
+       lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_prog_compiler_pic='-DDLL_EXPORT'
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      lt_prog_compiler_wl='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+       # +Z the default
+       ;;
+      *)
+       lt_prog_compiler_pic='+Z'
+       ;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      lt_prog_compiler_static='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      lt_prog_compiler_wl='-Wl,'
+      # PIC (with -KPIC) is the default.
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+      # old Intel for x86_64 which still supported -KPIC.
+      ecc*)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-KPIC'
+       lt_prog_compiler_static='-static'
+        ;;
+      # icc used to be incompatible with GCC.
+      # ICC 10 doesn't accept -KPIC any more.
+      icc* | ifort*)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-fPIC'
+       lt_prog_compiler_static='-static'
+        ;;
+      # Lahey Fortran 8.1.
+      lf95*)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='--shared'
+       lt_prog_compiler_static='--static'
+       ;;
+      nagfor*)
+       # NAG Fortran compiler
+       lt_prog_compiler_wl='-Wl,-Wl,,'
+       lt_prog_compiler_pic='-PIC'
+       lt_prog_compiler_static='-Bstatic'
+       ;;
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+       # which looks to be a dead project)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-fpic'
+       lt_prog_compiler_static='-Bstatic'
+        ;;
+      ccc*)
+        lt_prog_compiler_wl='-Wl,'
+        # All Alpha code is PIC.
+        lt_prog_compiler_static='-non_shared'
+        ;;
+      xl* | bgxl* | bgf* | mpixl*)
+       # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-qpic'
+       lt_prog_compiler_static='-qstaticlink'
+       ;;
+      *)
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
+         # Sun Fortran 8.3 passes all unrecognized flags to the linker
+         lt_prog_compiler_pic='-KPIC'
+         lt_prog_compiler_static='-Bstatic'
+         lt_prog_compiler_wl=''
+         ;;
+       *Sun\ F* | *Sun*Fortran*)
+         lt_prog_compiler_pic='-KPIC'
+         lt_prog_compiler_static='-Bstatic'
+         lt_prog_compiler_wl='-Qoption ld '
+         ;;
+       *Sun\ C*)
+         # Sun C 5.9
+         lt_prog_compiler_pic='-KPIC'
+         lt_prog_compiler_static='-Bstatic'
+         lt_prog_compiler_wl='-Wl,'
+         ;;
+        *Intel*\ [CF]*Compiler*)
+         lt_prog_compiler_wl='-Wl,'
+         lt_prog_compiler_pic='-fPIC'
+         lt_prog_compiler_static='-static'
+         ;;
+       *Portland\ Group*)
+         lt_prog_compiler_wl='-Wl,'
+         lt_prog_compiler_pic='-fpic'
+         lt_prog_compiler_static='-Bstatic'
+         ;;
+       esac
+       ;;
+      esac
+      ;;
+
+    newsos6)
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic='-fPIC -shared'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      lt_prog_compiler_wl='-Wl,'
+      # All OSF/1 code is PIC.
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    rdos*)
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    solaris*)
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+       lt_prog_compiler_wl='-Qoption ld ';;
+      *)
+       lt_prog_compiler_wl='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      lt_prog_compiler_wl='-Qoption ld '
+      lt_prog_compiler_pic='-PIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+       lt_prog_compiler_pic='-Kconform_pic'
+       lt_prog_compiler_static='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    unicos*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_can_build_shared=no
+      ;;
+
+    uts4*)
+      lt_prog_compiler_pic='-pic'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    *)
+      lt_prog_compiler_can_build_shared=no
+      ;;
+    esac
+  fi
+
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    lt_prog_compiler_pic=
+    ;;
+  *)
+    lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC"
+    ;;
+esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
+$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
+if test "${lt_cv_prog_compiler_pic+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic=$lt_prog_compiler_pic
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5
+$as_echo "$lt_cv_prog_compiler_pic" >&6; }
+lt_prog_compiler_pic=$lt_cv_prog_compiler_pic
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$lt_prog_compiler_pic"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5
+$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; }
+if test "${lt_cv_prog_compiler_pic_works+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_works=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$lt_prog_compiler_pic -DPIC"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_pic_works=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5
+$as_echo "$lt_cv_prog_compiler_pic_works" >&6; }
+
+if test x"$lt_cv_prog_compiler_pic_works" = xyes; then
+    case $lt_prog_compiler_pic in
+     "" | " "*) ;;
+     *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;;
+     esac
+else
+    lt_prog_compiler_pic=
+     lt_prog_compiler_can_build_shared=no
+fi
+
+fi
+
+
+
+
+
+
+
+
+
+
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
+$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
+if test "${lt_cv_prog_compiler_static_works+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_static_works=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler_static_works=yes
+       fi
+     else
+       lt_cv_prog_compiler_static_works=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5
+$as_echo "$lt_cv_prog_compiler_static_works" >&6; }
+
+if test x"$lt_cv_prog_compiler_static_works" = xyes; then
+    :
+else
+    lt_prog_compiler_static=
+fi
+
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test "${lt_cv_prog_compiler_c_o+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5
+$as_echo "$lt_cv_prog_compiler_c_o" >&6; }
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test "${lt_cv_prog_compiler_c_o+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5
+$as_echo "$lt_cv_prog_compiler_c_o" >&6; }
+
+
+
+
+hard_links="nottested"
+if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
+$as_echo_n "checking if we can lock with hard links... " >&6; }
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
+$as_echo "$hard_links" >&6; }
+  if test "$hard_links" = no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
+$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+
+  runpath_var=
+  allow_undefined_flag=
+  always_export_symbols=no
+  archive_cmds=
+  archive_expsym_cmds=
+  compiler_needs_object=no
+  enable_shared_with_static_runtimes=no
+  export_dynamic_flag_spec=
+  export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  hardcode_automatic=no
+  hardcode_direct=no
+  hardcode_direct_absolute=no
+  hardcode_libdir_flag_spec=
+  hardcode_libdir_separator=
+  hardcode_minus_L=no
+  hardcode_shlibpath_var=unsupported
+  inherit_rpath=no
+  link_all_deplibs=unknown
+  module_cmds=
+  module_expsym_cmds=
+  old_archive_from_new_cmds=
+  old_archive_from_expsyms_cmds=
+  thread_safe_flag_spec=
+  whole_archive_flag_spec=
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  include_expsyms=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  # Exclude shared library initialization/finalization symbols.
+  extract_expsyms_cmds=
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  ld_shlibs=yes
+
+  # On some targets, GNU ld is compatible enough with the native linker
+  # that we're better off using the native interface for both.
+  lt_use_gnu_ld_interface=no
+  if test "$with_gnu_ld" = yes; then
+    case $host_os in
+      aix*)
+       # The AIX port of GNU ld has always aspired to compatibility
+       # with the native linker.  However, as the warning in the GNU ld
+       # block says, versions before 2.19.5* couldn't really create working
+       # shared libraries, regardless of the interface used.
+       case `$LD -v 2>&1` in
+         *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
+         *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;;
+         *\ \(GNU\ Binutils\)\ [3-9]*) ;;
+         *)
+           lt_use_gnu_ld_interface=yes
+           ;;
+       esac
+       ;;
+      *)
+       lt_use_gnu_ld_interface=yes
+       ;;
+    esac
+  fi
+
+  if test "$lt_use_gnu_ld_interface" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    export_dynamic_flag_spec='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
+      whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      whole_archive_flag_spec=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>&1` in
+      *GNU\ gold*) supports_anon_versioning=yes ;;
+      *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix[3-9]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+       ld_shlibs=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.19, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to install binutils
+*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
+*** You will then need to restart the configuration process.
+
+_LT_EOF
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds=''
+        ;;
+      m68k)
+            archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_minus_L=yes
+        ;;
+      esac
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       allow_undefined_flag=unsupported
+       # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+       # support --undefined.  This deserves some investigation.  FIXME
+       archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless,
+      # as there is no search path for DLLs.
+      hardcode_libdir_flag_spec='-L$libdir'
+      export_dynamic_flag_spec='${wl}--export-all-symbols'
+      allow_undefined_flag=unsupported
+      always_export_symbols=no
+      enable_shared_with_static_runtimes=yes
+      export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
+      exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
+
+      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+        archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+       # If the export-symbols file already is a .def file (1st line
+       # is EXPORTS), use it as is; otherwise, prepend...
+       archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+         cp $export_symbols $output_objdir/$soname.def;
+       else
+         echo EXPORTS > $output_objdir/$soname.def;
+         cat $export_symbols >> $output_objdir/$soname.def;
+       fi~
+       $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    haiku*)
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      link_all_deplibs=yes
+      ;;
+
+    interix[3-9]*)
+      hardcode_direct=no
+      hardcode_shlibpath_var=no
+      hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      export_dynamic_flag_spec='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      tmp_diet=no
+      if test "$host_os" = linux-dietlibc; then
+       case $cc_basename in
+         diet\ *) tmp_diet=yes;;       # linux-dietlibc with static linking (!diet-dyn)
+       esac
+      fi
+      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
+        && test "$tmp_diet" = no
+      then
+       tmp_addflag=' $pic_flag'
+       tmp_sharedflag='-shared'
+       case $cc_basename,$host_cpu in
+        pgcc*)                         # Portland Group C compiler
+         whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag'
+         ;;
+       pgf77* | pgf90* | pgf95* | pgfortran*)
+                                       # Portland Group f77 and f90 compilers
+         whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag -Mnomain' ;;
+       ecc*,ia64* | icc*,ia64*)        # Intel C compiler on ia64
+         tmp_addflag=' -i_dynamic' ;;
+       efc*,ia64* | ifort*,ia64*)      # Intel Fortran compiler on ia64
+         tmp_addflag=' -i_dynamic -nofor_main' ;;
+       ifc* | ifort*)                  # Intel Fortran compiler
+         tmp_addflag=' -nofor_main' ;;
+       lf95*)                          # Lahey Fortran 8.1
+         whole_archive_flag_spec=
+         tmp_sharedflag='--shared' ;;
+       xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below)
+         tmp_sharedflag='-qmkshrobj'
+         tmp_addflag= ;;
+       nvcc*)  # Cuda Compiler Driver 2.2
+         whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         compiler_needs_object=yes
+         ;;
+       esac
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ C*)                       # Sun C 5.9
+         whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         compiler_needs_object=yes
+         tmp_sharedflag='-G' ;;
+       *Sun\ F*)                       # Sun Fortran 8.3
+         tmp_sharedflag='-G' ;;
+       esac
+       archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+        if test "x$supports_anon_versioning" = xyes; then
+          archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
+           cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+           echo "local: *; };" >> $output_objdir/$libname.ver~
+           $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+        fi
+
+       case $cc_basename in
+       xlf* | bgf* | bgxlf* | mpixlf*)
+         # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
+         whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
+         hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+         archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
+         if test "x$supports_anon_versioning" = xyes; then
+           archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
+             cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+             echo "local: *; };" >> $output_objdir/$libname.ver~
+             $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
+         fi
+         ;;
+       esac
+      else
+        ld_shlibs=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+       wlarc=
+      else
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
+       ld_shlibs=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*)
+       ld_shlibs=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+       ;;
+       *)
+         # For security reasons, it is highly recommended that you always
+         # use absolute paths for naming shared libraries, and exclude the
+         # DT_RUNPATH tag from executables and libraries.  But doing so
+         # requires that you compile everything twice, which is a pain.
+         if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+           hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+           archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+         else
+           ld_shlibs=no
+         fi
+       ;;
+      esac
+      ;;
+
+    sunos4*)
+      archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+    esac
+
+    if test "$ld_shlibs" = no; then
+      runpath_var=
+      hardcode_libdir_flag_spec=
+      export_dynamic_flag_spec=
+      whole_archive_flag_spec=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      allow_undefined_flag=unsupported
+      always_export_symbols=yes
+      archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      hardcode_minus_L=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+       # Neither direct hardcoding nor static linking is supported with a
+       # broken collect2.
+       hardcode_direct=unsupported
+      fi
+      ;;
+
+    aix[4-9]*)
+      if test "$host_cpu" = ia64; then
+       # On IA64, the linker does run time linking by default, so we don't
+       # have to do anything special.
+       aix_use_runtimelinking=no
+       exp_sym_flag='-Bexport'
+       no_entry_flag=""
+      else
+       # If we're using GNU nm, then we don't want the "-C" option.
+       # -C means demangle to AIX nm, but means don't demangle with GNU nm
+       # Also, AIX nm treats weak defined symbols like other global
+       # defined symbols, whereas GNU nm marks them as "W".
+       if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+         export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       else
+         export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       fi
+       aix_use_runtimelinking=no
+
+       # Test if we are trying to use run time linking or normal
+       # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+       # need to do runtime linking.
+       case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+         for ld_flag in $LDFLAGS; do
+         if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+           aix_use_runtimelinking=yes
+           break
+         fi
+         done
+         ;;
+       esac
+
+       exp_sym_flag='-bexport'
+       no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      archive_cmds=''
+      hardcode_direct=yes
+      hardcode_direct_absolute=yes
+      hardcode_libdir_separator=':'
+      link_all_deplibs=yes
+      file_list_spec='${wl}-f,'
+
+      if test "$GCC" = yes; then
+       case $host_os in aix4.[012]|aix4.[012].*)
+       # We only want to do this on AIX 4.2 and lower, the check
+       # below for broken collect2 doesn't work under 4.3+
+         collect2name=`${CC} -print-prog-name=collect2`
+         if test -f "$collect2name" &&
+          strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+         then
+         # We have reworked collect2
+         :
+         else
+         # We have old collect2
+         hardcode_direct=unsupported
+         # It fails to find uninstalled libraries when the uninstalled
+         # path is not listed in the libpath.  Setting hardcode_minus_L
+         # to unsupported forces relinking
+         hardcode_minus_L=yes
+         hardcode_libdir_flag_spec='-L$libdir'
+         hardcode_libdir_separator=
+         fi
+         ;;
+       esac
+       shared_flag='-shared'
+       if test "$aix_use_runtimelinking" = yes; then
+         shared_flag="$shared_flag "'${wl}-G'
+       fi
+      else
+       # not using gcc
+       if test "$host_cpu" = ia64; then
+       # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+       # chokes on -Wl,-G. The following line is correct:
+         shared_flag='-G'
+       else
+         if test "$aix_use_runtimelinking" = yes; then
+           shared_flag='${wl}-G'
+         else
+           shared_flag='${wl}-bM:SRE'
+         fi
+       fi
+      fi
+
+      export_dynamic_flag_spec='${wl}-bexpall'
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      always_export_symbols=yes
+      if test "$aix_use_runtimelinking" = yes; then
+       # Warning - without using the other runtime loading flags (-brtl),
+       # -berok will link without error, but may produce a broken library.
+       allow_undefined_flag='-berok'
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test "${lt_cv_aix_libpath_+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath_
+fi
+
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+        archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+      else
+       if test "$host_cpu" = ia64; then
+         hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+         allow_undefined_flag="-z nodefs"
+         archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+       else
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test "${lt_cv_aix_libpath_+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath_
+fi
+
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+         # Warning - without using the other run time loading flags,
+         # -berok will link without error, but may produce a broken library.
+         no_undefined_flag=' ${wl}-bernotok'
+         allow_undefined_flag=' ${wl}-berok'
+         if test "$with_gnu_ld" = yes; then
+           # We only use this code for GNU lds that support --whole-archive.
+           whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+         else
+           # Exported symbols can be pulled into shared objects from archives
+           whole_archive_flag_spec='$convenience'
+         fi
+         archive_cmds_need_lc=yes
+         # This is similar to how AIX traditionally builds its shared libraries.
+         archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+       fi
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds=''
+        ;;
+      m68k)
+            archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_minus_L=yes
+        ;;
+      esac
+      ;;
+
+    bsdi[45]*)
+      export_dynamic_flag_spec=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      case $cc_basename in
+      cl*)
+       # Native MSVC
+       hardcode_libdir_flag_spec=' '
+       allow_undefined_flag=unsupported
+       always_export_symbols=yes
+       file_list_spec='@'
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+       archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+           sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+         else
+           sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+         fi~
+         $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+         linknames='
+       # The linker will not automatically build a static lib if we build a DLL.
+       # _LT_TAGVAR(old_archive_from_new_cmds, )='true'
+       enable_shared_with_static_runtimes=yes
+       exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+       export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
+       # Don't use ranlib
+       old_postinstall_cmds='chmod 644 $oldlib'
+       postlink_cmds='lt_outputfile="@OUTPUT@"~
+         lt_tool_outputfile="@TOOL_OUTPUT@"~
+         case $lt_outputfile in
+           *.exe|*.EXE) ;;
+           *)
+             lt_outputfile="$lt_outputfile.exe"
+             lt_tool_outputfile="$lt_tool_outputfile.exe"
+             ;;
+         esac~
+         if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+           $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+           $RM "$lt_outputfile.manifest";
+         fi'
+       ;;
+      *)
+       # Assume MSVC wrapper
+       hardcode_libdir_flag_spec=' '
+       allow_undefined_flag=unsupported
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
+       # The linker will automatically build a .lib file if we build a DLL.
+       old_archive_from_new_cmds='true'
+       # FIXME: Should let the user specify the lib program.
+       old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs'
+       enable_shared_with_static_runtimes=yes
+       ;;
+      esac
+      ;;
+
+    darwin* | rhapsody*)
+
+
+  archive_cmds_need_lc=no
+  hardcode_direct=no
+  hardcode_automatic=yes
+  hardcode_shlibpath_var=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    whole_archive_flag_spec='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
+  else
+    whole_archive_flag_spec=''
+  fi
+  link_all_deplibs=yes
+  allow_undefined_flag="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+
+  else
+  ld_shlibs=no
+  fi
+
+      ;;
+
+    dgux*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_shlibpath_var=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | dragonfly*)
+      archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+       archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+       archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_direct=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      hardcode_minus_L=yes
+      export_dynamic_flag_spec='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+       hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+       hardcode_libdir_separator=:
+       hardcode_direct=yes
+       hardcode_direct_absolute=yes
+       export_dynamic_flag_spec='${wl}-E'
+       # hardcode_minus_L: Not really in the search PATH,
+       # but as the default location of the library.
+       hardcode_minus_L=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       case $host_cpu in
+       hppa*64*)
+         archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       ia64*)
+         archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+         archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       esac
+      else
+       case $host_cpu in
+       hppa*64*)
+         archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       ia64*)
+         archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+
+         # Older versions of the 11.00 compiler do not understand -b yet
+         # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
+         { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5
+$as_echo_n "checking if $CC understands -b... " >&6; }
+if test "${lt_cv_prog_compiler__b+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler__b=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS -b"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler__b=yes
+       fi
+     else
+       lt_cv_prog_compiler__b=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5
+$as_echo "$lt_cv_prog_compiler__b" >&6; }
+
+if test x"$lt_cv_prog_compiler__b" = xyes; then
+    archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+else
+    archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+fi
+
+         ;;
+       esac
+      fi
+      if test "$with_gnu_ld" = no; then
+       hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+       hardcode_libdir_separator=:
+
+       case $host_cpu in
+       hppa*64*|ia64*)
+         hardcode_direct=no
+         hardcode_shlibpath_var=no
+         ;;
+       *)
+         hardcode_direct=yes
+         hardcode_direct_absolute=yes
+         export_dynamic_flag_spec='${wl}-E'
+
+         # hardcode_minus_L: Not really in the search PATH,
+         # but as the default location of the library.
+         hardcode_minus_L=yes
+         ;;
+       esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       # Try to use the -exported_symbol ld option, if it does not
+       # work, assume that -exports_file does not work either and
+       # implicitly export all symbols.
+       # This should be the same for all languages, so no per-tag cache variable.
+       { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
+$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
+if test "${lt_cv_irix_exported_symbol+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  save_LDFLAGS="$LDFLAGS"
+          LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int foo (void) { return 0; }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  lt_cv_irix_exported_symbol=yes
+else
+  lt_cv_irix_exported_symbol=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+           LDFLAGS="$save_LDFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5
+$as_echo "$lt_cv_irix_exported_symbol" >&6; }
+       if test "$lt_cv_irix_exported_symbol" = yes; then
+          archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
+       fi
+      else
+       archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+       archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      inherit_rpath=yes
+      link_all_deplibs=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+       archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    newsos6)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct=yes
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_shlibpath_var=no
+      ;;
+
+    *nto* | *qnx*)
+      ;;
+
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+       hardcode_direct=yes
+       hardcode_shlibpath_var=no
+       hardcode_direct_absolute=yes
+       if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+         archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+         hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+         export_dynamic_flag_spec='${wl}-E'
+       else
+         case $host_os in
+          openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+            archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+            hardcode_libdir_flag_spec='-R$libdir'
+            ;;
+          *)
+            archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+            hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+            ;;
+         esac
+       fi
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    os2*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      allow_undefined_flag=unsupported
+      archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+       allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+       allow_undefined_flag=' -expect_unresolved \*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+
+    osf4* | osf5*)     # as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+       allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      else
+       allow_undefined_flag=' -expect_unresolved \*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+       archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
+       $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
+
+       # Both c and cxx compiler support -rpath directly
+       hardcode_libdir_flag_spec='-rpath $libdir'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_separator=:
+      ;;
+
+    solaris*)
+      no_undefined_flag=' -z defs'
+      if test "$GCC" = yes; then
+       wlarc='${wl}'
+       archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+      else
+       case `$CC -V 2>&1` in
+       *"Compilers 5.0"*)
+         wlarc=''
+         archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
+         ;;
+       *)
+         wlarc='${wl}'
+         archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+         ;;
+       esac
+      fi
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_shlibpath_var=no
+      case $host_os in
+      solaris2.[0-5] | solaris2.[0-5].*) ;;
+      *)
+       # The compiler driver will combine and reorder linker options,
+       # but understands `-z linker_flag'.  GCC discards it without `$wl',
+       # but is careful enough not to reorder.
+       # Supported since Solaris 2.6 (maybe 2.5.1?)
+       if test "$GCC" = yes; then
+         whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+       else
+         whole_archive_flag_spec='-z allextract$convenience -z defaultextract'
+       fi
+       ;;
+      esac
+      link_all_deplibs=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+       # Use $CC to link under sequent, because it throws in some extra .o
+       # files that make .init and .fini sections work.
+       archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+       sni)
+         archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         hardcode_direct=yes # is this really true???
+       ;;
+       siemens)
+         ## LD is ld it makes a PLAMLIB
+         ## CC just makes a GrossModule.
+         archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+         reload_cmds='$CC -r -o $output$reload_objs'
+         hardcode_direct=no
+        ;;
+       motorola)
+         archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+       ;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      hardcode_shlibpath_var=no
+      ;;
+
+    sysv4.3*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_shlibpath_var=no
+      export_dynamic_flag_spec='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+       hardcode_shlibpath_var=no
+       runpath_var=LD_RUN_PATH
+       hardcode_runpath_var=yes
+       ld_shlibs=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
+      no_undefined_flag='${wl}-z,text'
+      archive_cmds_need_lc=no
+      hardcode_shlibpath_var=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      no_undefined_flag='${wl}-z,text'
+      allow_undefined_flag='${wl}-z,nodefs'
+      archive_cmds_need_lc=no
+      hardcode_shlibpath_var=no
+      hardcode_libdir_flag_spec='${wl}-R,$libdir'
+      hardcode_libdir_separator=':'
+      link_all_deplibs=yes
+      export_dynamic_flag_spec='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_shlibpath_var=no
+      ;;
+
+    *)
+      ld_shlibs=no
+      ;;
+    esac
+
+    if test x$host_vendor = xsni; then
+      case $host in
+      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+       export_dynamic_flag_spec='${wl}-Blargedynsym'
+       ;;
+      esac
+    fi
+  fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5
+$as_echo "$ld_shlibs" >&6; }
+test "$ld_shlibs" = no && can_build_shared=no
+
+with_gnu_ld=$with_gnu_ld
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$archive_cmds_need_lc" in
+x|xyes)
+  # Assume -lc should be added
+  archive_cmds_need_lc=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $archive_cmds in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
+$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
+if test "${lt_cv_archive_cmds_need_lc+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  $RM conftest*
+       echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+       if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } 2>conftest.err; then
+         soname=conftest
+         lib=conftest
+         libobjs=conftest.$ac_objext
+         deplibs=
+         wl=$lt_prog_compiler_wl
+         pic_flag=$lt_prog_compiler_pic
+         compiler_flags=-v
+         linker_flags=-v
+         verstring=
+         output_objdir=.
+         libname=conftest
+         lt_save_allow_undefined_flag=$allow_undefined_flag
+         allow_undefined_flag=
+         if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
+  (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+         then
+           lt_cv_archive_cmds_need_lc=no
+         else
+           lt_cv_archive_cmds_need_lc=yes
+         fi
+         allow_undefined_flag=$lt_save_allow_undefined_flag
+       else
+         cat conftest.err 1>&5
+       fi
+       $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5
+$as_echo "$lt_cv_archive_cmds_need_lc" >&6; }
+      archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
+$as_echo_n "checking dynamic linker characteristics... " >&6; }
+
+if test "$GCC" = yes; then
+  case $host_os in
+    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
+    *) lt_awk_arg="/^libraries:/" ;;
+  esac
+  case $host_os in
+    mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;;
+    *) lt_sed_strip_eq="s,=/,/,g" ;;
+  esac
+  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
+  case $lt_search_path_spec in
+  *\;*)
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
+    ;;
+  *)
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
+    ;;
+  esac
+  # Ok, now we have the path, separated by spaces, we can step through it
+  # and add multilib dir if necessary.
+  lt_tmp_lt_search_path_spec=
+  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
+  for lt_sys_path in $lt_search_path_spec; do
+    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+    else
+      test -d "$lt_sys_path" && \
+       lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
+    fi
+  done
+  lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
+BEGIN {RS=" "; FS="/|\n";} {
+  lt_foo="";
+  lt_count=0;
+  for (lt_i = NF; lt_i > 0; lt_i--) {
+    if ($lt_i != "" && $lt_i != ".") {
+      if ($lt_i == "..") {
+        lt_count++;
+      } else {
+        if (lt_count == 0) {
+          lt_foo="/" $lt_i lt_foo;
+        } else {
+          lt_count--;
+        }
+      }
+    }
+  }
+  if (lt_foo != "") { lt_freq[lt_foo]++; }
+  if (lt_freq[lt_foo] == 1) { print lt_foo; }
+}'`
+  # AWK program above erroneously prepends '/' to C:/dos/paths
+  # for these hosts.
+  case $host_os in
+    mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
+      $SED 's,/\([A-Za-z]:\),\1,g'` ;;
+  esac
+  sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[4-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+          echo ' yes '
+          echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+       :
+      else
+       can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+
+      sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+
+  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[23].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[3-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+       if test "$lt_cv_prog_gnu_ld" = yes; then
+               version_type=linux # correct to gnu/linux during the next big refactor
+       else
+               version_type=irix
+       fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \
+        LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\""
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  if  ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
+  lt_cv_shlibpath_overrides_runpath=yes
+fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+
+fi
+
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[      ]*hwcap[        ]/d;s/[:,      ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+       shlibpath_overrides_runpath=no
+       ;;
+      *)
+       shlibpath_overrides_runpath=yes
+       ;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+       ;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
+$as_echo "$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
+$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" ||
+   test -n "$runpath_var" ||
+   test "X$hardcode_automatic" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$hardcode_direct" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no &&
+     test "$hardcode_minus_L" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action=unsupported
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5
+$as_echo "$hardcode_action" >&6; }
+
+if test "$hardcode_action" = relink ||
+   test "$inherit_rpath" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+
+
+
+
+
+  if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32* | cegcc*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+    ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+    ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
+$as_echo_n "checking for dlopen in -ldl... " >&6; }
+if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dl_dlopen=yes
+else
+  ac_cv_lib_dl_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
+$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
+if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+
+fi
+
+    ;;
+
+  *)
+    ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load"
+if test "x$ac_cv_func_shl_load" = x""yes; then :
+  lt_cv_dlopen="shl_load"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5
+$as_echo_n "checking for shl_load in -ldld... " >&6; }
+if test "${ac_cv_lib_dld_shl_load+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load ();
+int
+main ()
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dld_shl_load=yes
+else
+  ac_cv_lib_dld_shl_load=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5
+$as_echo "$ac_cv_lib_dld_shl_load" >&6; }
+if test "x$ac_cv_lib_dld_shl_load" = x""yes; then :
+  lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
+else
+  ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
+if test "x$ac_cv_func_dlopen" = x""yes; then :
+  lt_cv_dlopen="dlopen"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
+$as_echo_n "checking for dlopen in -ldl... " >&6; }
+if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dl_dlopen=yes
+else
+  ac_cv_lib_dl_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
+$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
+if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5
+$as_echo_n "checking for dlopen in -lsvld... " >&6; }
+if test "${ac_cv_lib_svld_dlopen+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lsvld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_svld_dlopen=yes
+else
+  ac_cv_lib_svld_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5
+$as_echo "$ac_cv_lib_svld_dlopen" >&6; }
+if test "x$ac_cv_lib_svld_dlopen" = x""yes; then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5
+$as_echo_n "checking for dld_link in -ldld... " >&6; }
+if test "${ac_cv_lib_dld_dld_link+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dld_link ();
+int
+main ()
+{
+return dld_link ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dld_dld_link=yes
+else
+  ac_cv_lib_dld_dld_link=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5
+$as_echo "$ac_cv_lib_dld_dld_link" >&6; }
+if test "x$ac_cv_lib_dld_dld_link" = x""yes; then :
+  lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5
+$as_echo_n "checking whether a program can dlopen itself... " >&6; }
+if test "${lt_cv_dlopen_self+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+         if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+         if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+       }
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}
+_LT_EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self=no
+  fi
+fi
+rm -fr conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5
+$as_echo "$lt_cv_dlopen_self" >&6; }
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5
+$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; }
+if test "${lt_cv_dlopen_self_static+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+         if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self_static=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+         if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+       }
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}
+_LT_EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self_static=no
+  fi
+fi
+rm -fr conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5
+$as_echo "$lt_cv_dlopen_self_static" >&6; }
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+striplib=
+old_striplib=
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5
+$as_echo_n "checking whether stripping libraries is possible... " >&6; }
+if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+  darwin*)
+    if test -n "$STRIP" ; then
+      striplib="$STRIP -x"
+      old_striplib="$STRIP -S"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    fi
+    ;;
+  *)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+    ;;
+  esac
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+  # Report which library types will actually be built
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5
+$as_echo_n "checking if libtool supports shared libraries... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5
+$as_echo "$can_build_shared" >&6; }
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5
+$as_echo_n "checking whether to build shared libraries... " >&6; }
+  test "$can_build_shared" = "no" && enable_shared=no
+
+  # On AIX, shared libraries and static libraries use the same namespace, and
+  # are all built from PIC.
+  case $host_os in
+  aix3*)
+    test "$enable_shared" = yes && enable_static=no
+    if test -n "$RANLIB"; then
+      archive_cmds="$archive_cmds~\$RANLIB \$lib"
+      postinstall_cmds='$RANLIB $lib'
+    fi
+    ;;
+
+  aix[4-9]*)
+    if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+      test "$enable_shared" = yes && enable_static=no
+    fi
+    ;;
+  esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5
+$as_echo "$enable_shared" >&6; }
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5
+$as_echo_n "checking whether to build static libraries... " >&6; }
+  # Make sure either enable_shared or enable_static is yes.
+  test "$enable_shared" = yes || enable_static=yes
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5
+$as_echo "$enable_static" >&6; }
+
+
+
+
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+CC="$lt_save_CC"
+
+      if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+    (test "X$CXX" != "Xg++"))) ; then
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
+$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
+if test -z "$CXXCPP"; then
+  if test "${ac_cv_prog_CXXCPP+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+      # Double quotes because CXXCPP needs to be expanded
+    for CXXCPP in "$CXX -E" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+                    Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+  break
+fi
+
+    done
+    ac_cv_prog_CXXCPP=$CXXCPP
+
+fi
+  CXXCPP=$ac_cv_prog_CXXCPP
+else
+  ac_cv_prog_CXXCPP=$CXXCPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5
+$as_echo "$CXXCPP" >&6; }
+ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+                    Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+  # Broken: success on invalid input.
+continue
+else
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details." "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+else
+  _lt_caught_CXX_error=yes
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+archive_cmds_need_lc_CXX=no
+allow_undefined_flag_CXX=
+always_export_symbols_CXX=no
+archive_expsym_cmds_CXX=
+compiler_needs_object_CXX=no
+export_dynamic_flag_spec_CXX=
+hardcode_direct_CXX=no
+hardcode_direct_absolute_CXX=no
+hardcode_libdir_flag_spec_CXX=
+hardcode_libdir_separator_CXX=
+hardcode_minus_L_CXX=no
+hardcode_shlibpath_var_CXX=unsupported
+hardcode_automatic_CXX=no
+inherit_rpath_CXX=no
+module_cmds_CXX=
+module_expsym_cmds_CXX=
+link_all_deplibs_CXX=unknown
+old_archive_cmds_CXX=$old_archive_cmds
+reload_flag_CXX=$reload_flag
+reload_cmds_CXX=$reload_cmds
+no_undefined_flag_CXX=
+whole_archive_flag_spec_CXX=
+enable_shared_with_static_runtimes_CXX=no
+
+# Source file extension for C++ test sources.
+ac_ext=cpp
+
+# Object file extension for compiled C++ test sources.
+objext=o
+objext_CXX=$objext
+
+# No sense in running all these tests if we already determined that
+# the CXX compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_caught_CXX_error" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="int some_variable = 0;"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code='int main(int, char *[]) { return(0); }'
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+  # save warnings/boilerplate of simple test code
+  ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+  ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC=$CC
+  lt_save_CFLAGS=$CFLAGS
+  lt_save_LD=$LD
+  lt_save_GCC=$GCC
+  GCC=$GXX
+  lt_save_with_gnu_ld=$with_gnu_ld
+  lt_save_path_LD=$lt_cv_path_LD
+  if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
+    lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
+  else
+    $as_unset lt_cv_prog_gnu_ld
+  fi
+  if test -n "${lt_cv_path_LDCXX+set}"; then
+    lt_cv_path_LD=$lt_cv_path_LDCXX
+  else
+    $as_unset lt_cv_path_LD
+  fi
+  test -z "${LDCXX+set}" || LD=$LDCXX
+  CC=${CXX-"c++"}
+  CFLAGS=$CXXFLAGS
+  compiler=$CC
+  compiler_CXX=$CC
+  for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+
+  if test -n "$compiler"; then
+    # We don't want -fno-exception when compiling C++ code, so set the
+    # no_builtin_flag separately
+    if test "$GXX" = yes; then
+      lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin'
+    else
+      lt_prog_compiler_no_builtin_flag_CXX=
+    fi
+
+    if test "$GXX" = yes; then
+      # Set up default GNU C++ configuration
+
+
+
+# Check whether --with-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then :
+  withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
+else
+  with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5
+$as_echo_n "checking for ld used by $CC... " >&6; }
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [\\/]* | ?:[\\/]*)
+      re_direlt='/[^/][^/]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+       ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
+$as_echo_n "checking for GNU ld... " >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
+$as_echo_n "checking for non-GNU ld... " >&6; }
+fi
+if test "${lt_cv_path_LD+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+       test "$with_gnu_ld" != no && break
+       ;;
+      *)
+       test "$with_gnu_ld" != yes && break
+       ;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
+$as_echo "$LD" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+test -z "$LD" && as_fn_error "no acceptable ld found in \$PATH" "$LINENO" 5
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
+$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
+if test "${lt_cv_prog_gnu_ld+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  # I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
+$as_echo "$lt_cv_prog_gnu_ld" >&6; }
+with_gnu_ld=$lt_cv_prog_gnu_ld
+
+
+
+
+
+
+
+      # Check if GNU C++ uses GNU ld as the underlying linker, since the
+      # archiving commands below assume that GNU ld is being used.
+      if test "$with_gnu_ld" = yes; then
+        archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+
+        hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+        export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+
+        # If archive_cmds runs LD, not CC, wlarc should be empty
+        # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
+        #     investigate it a little bit more. (MM)
+        wlarc='${wl}'
+
+        # ancient GNU ld didn't support --whole-archive et. al.
+        if eval "`$CC -print-prog-name=ld` --help 2>&1" |
+         $GREP 'no-whole-archive' > /dev/null; then
+          whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+        else
+          whole_archive_flag_spec_CXX=
+        fi
+      else
+        with_gnu_ld=no
+        wlarc=
+
+        # A generic and very simple default shared library creation
+        # command for GNU C++ for the case where it uses the native
+        # linker, instead of GNU ld.  If possible, this setting should
+        # overridden to take advantage of the native linker features on
+        # the platform it is being used on.
+        archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+      fi
+
+      # Commands to make compiler produce verbose output that lists
+      # what "hidden" libraries, object files and flags are used when
+      # linking a shared library.
+      output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+    else
+      GXX=no
+      with_gnu_ld=no
+      wlarc=
+    fi
+
+    # PORTME: fill in a description of your system's C++ link characteristics
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+    ld_shlibs_CXX=yes
+    case $host_os in
+      aix3*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+      aix[4-9]*)
+        if test "$host_cpu" = ia64; then
+          # On IA64, the linker does run time linking by default, so we don't
+          # have to do anything special.
+          aix_use_runtimelinking=no
+          exp_sym_flag='-Bexport'
+          no_entry_flag=""
+        else
+          aix_use_runtimelinking=no
+
+          # Test if we are trying to use run time linking or normal
+          # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+          # need to do runtime linking.
+          case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+           for ld_flag in $LDFLAGS; do
+             case $ld_flag in
+             *-brtl*)
+               aix_use_runtimelinking=yes
+               break
+               ;;
+             esac
+           done
+           ;;
+          esac
+
+          exp_sym_flag='-bexport'
+          no_entry_flag='-bnoentry'
+        fi
+
+        # When large executables or shared objects are built, AIX ld can
+        # have problems creating the table of contents.  If linking a library
+        # or program results in "error TOC overflow" add -mminimal-toc to
+        # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+        # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+        archive_cmds_CXX=''
+        hardcode_direct_CXX=yes
+        hardcode_direct_absolute_CXX=yes
+        hardcode_libdir_separator_CXX=':'
+        link_all_deplibs_CXX=yes
+        file_list_spec_CXX='${wl}-f,'
+
+        if test "$GXX" = yes; then
+          case $host_os in aix4.[012]|aix4.[012].*)
+          # We only want to do this on AIX 4.2 and lower, the check
+          # below for broken collect2 doesn't work under 4.3+
+         collect2name=`${CC} -print-prog-name=collect2`
+         if test -f "$collect2name" &&
+            strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+         then
+           # We have reworked collect2
+           :
+         else
+           # We have old collect2
+           hardcode_direct_CXX=unsupported
+           # It fails to find uninstalled libraries when the uninstalled
+           # path is not listed in the libpath.  Setting hardcode_minus_L
+           # to unsupported forces relinking
+           hardcode_minus_L_CXX=yes
+           hardcode_libdir_flag_spec_CXX='-L$libdir'
+           hardcode_libdir_separator_CXX=
+         fi
+          esac
+          shared_flag='-shared'
+         if test "$aix_use_runtimelinking" = yes; then
+           shared_flag="$shared_flag "'${wl}-G'
+         fi
+        else
+          # not using gcc
+          if test "$host_cpu" = ia64; then
+         # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+         # chokes on -Wl,-G. The following line is correct:
+         shared_flag='-G'
+          else
+           if test "$aix_use_runtimelinking" = yes; then
+             shared_flag='${wl}-G'
+           else
+             shared_flag='${wl}-bM:SRE'
+           fi
+          fi
+        fi
+
+        export_dynamic_flag_spec_CXX='${wl}-bexpall'
+        # It seems that -bexpall does not export symbols beginning with
+        # underscore (_), so it is better to generate a list of symbols to
+       # export.
+        always_export_symbols_CXX=yes
+        if test "$aix_use_runtimelinking" = yes; then
+          # Warning - without using the other runtime loading flags (-brtl),
+          # -berok will link without error, but may produce a broken library.
+          allow_undefined_flag_CXX='-berok'
+          # Determine the default libpath from the value encoded in an empty
+          # executable.
+          if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test "${lt_cv_aix_libpath__CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath__CXX
+fi
+
+          hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath"
+
+          archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+        else
+          if test "$host_cpu" = ia64; then
+           hardcode_libdir_flag_spec_CXX='${wl}-R $libdir:/usr/lib:/lib'
+           allow_undefined_flag_CXX="-z nodefs"
+           archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+          else
+           # Determine the default libpath from the value encoded in an
+           # empty executable.
+           if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test "${lt_cv_aix_libpath__CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath__CXX"; then
+    lt_cv_aix_libpath__CXX="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath__CXX
+fi
+
+           hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath"
+           # Warning - without using the other run time loading flags,
+           # -berok will link without error, but may produce a broken library.
+           no_undefined_flag_CXX=' ${wl}-bernotok'
+           allow_undefined_flag_CXX=' ${wl}-berok'
+           if test "$with_gnu_ld" = yes; then
+             # We only use this code for GNU lds that support --whole-archive.
+             whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+           else
+             # Exported symbols can be pulled into shared objects from archives
+             whole_archive_flag_spec_CXX='$convenience'
+           fi
+           archive_cmds_need_lc_CXX=yes
+           # This is similar to how AIX traditionally builds its shared
+           # libraries.
+           archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+          fi
+        fi
+        ;;
+
+      beos*)
+       if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+         allow_undefined_flag_CXX=unsupported
+         # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+         # support --undefined.  This deserves some investigation.  FIXME
+         archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       else
+         ld_shlibs_CXX=no
+       fi
+       ;;
+
+      chorus*)
+        case $cc_basename in
+          *)
+         # FIXME: insert proper C++ library support
+         ld_shlibs_CXX=no
+         ;;
+        esac
+        ;;
+
+      cygwin* | mingw* | pw32* | cegcc*)
+       case $GXX,$cc_basename in
+       ,cl* | no,cl*)
+         # Native MSVC
+         # hardcode_libdir_flag_spec is actually meaningless, as there is
+         # no search path for DLLs.
+         hardcode_libdir_flag_spec_CXX=' '
+         allow_undefined_flag_CXX=unsupported
+         always_export_symbols_CXX=yes
+         file_list_spec_CXX='@'
+         # Tell ltmain to make .lib files, not .a files.
+         libext=lib
+         # Tell ltmain to make .dll files, not .so files.
+         shrext_cmds=".dll"
+         # FIXME: Setting linknames here is a bad hack.
+         archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+         archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+             $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+           else
+             $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+           fi~
+           $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+           linknames='
+         # The linker will not automatically build a static lib if we build a DLL.
+         # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true'
+         enable_shared_with_static_runtimes_CXX=yes
+         # Don't use ranlib
+         old_postinstall_cmds_CXX='chmod 644 $oldlib'
+         postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~
+           lt_tool_outputfile="@TOOL_OUTPUT@"~
+           case $lt_outputfile in
+             *.exe|*.EXE) ;;
+             *)
+               lt_outputfile="$lt_outputfile.exe"
+               lt_tool_outputfile="$lt_tool_outputfile.exe"
+               ;;
+           esac~
+           func_to_tool_file "$lt_outputfile"~
+           if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+             $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+             $RM "$lt_outputfile.manifest";
+           fi'
+         ;;
+       *)
+         # g++
+         # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless,
+         # as there is no search path for DLLs.
+         hardcode_libdir_flag_spec_CXX='-L$libdir'
+         export_dynamic_flag_spec_CXX='${wl}--export-all-symbols'
+         allow_undefined_flag_CXX=unsupported
+         always_export_symbols_CXX=no
+         enable_shared_with_static_runtimes_CXX=yes
+
+         if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+           archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+           # If the export-symbols file already is a .def file (1st line
+           # is EXPORTS), use it as is; otherwise, prepend...
+           archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+             cp $export_symbols $output_objdir/$soname.def;
+           else
+             echo EXPORTS > $output_objdir/$soname.def;
+             cat $export_symbols >> $output_objdir/$soname.def;
+           fi~
+           $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+         else
+           ld_shlibs_CXX=no
+         fi
+         ;;
+       esac
+       ;;
+      darwin* | rhapsody*)
+
+
+  archive_cmds_need_lc_CXX=no
+  hardcode_direct_CXX=no
+  hardcode_automatic_CXX=yes
+  hardcode_shlibpath_var_CXX=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
+  else
+    whole_archive_flag_spec_CXX=''
+  fi
+  link_all_deplibs_CXX=yes
+  allow_undefined_flag_CXX="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    module_expsym_cmds_CXX="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+       if test "$lt_cv_apple_cc_single_mod" != "yes"; then
+      archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
+      archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
+    fi
+
+  else
+  ld_shlibs_CXX=no
+  fi
+
+       ;;
+
+      dgux*)
+        case $cc_basename in
+          ec++*)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          ghcx*)
+           # Green Hills C++ Compiler
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          *)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+        esac
+        ;;
+
+      freebsd2.*)
+        # C++ shared libraries reported to be fairly broken before
+       # switch to ELF
+        ld_shlibs_CXX=no
+        ;;
+
+      freebsd-elf*)
+        archive_cmds_need_lc_CXX=no
+        ;;
+
+      freebsd* | dragonfly*)
+        # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
+        # conventions
+        ld_shlibs_CXX=yes
+        ;;
+
+      gnu*)
+        ;;
+
+      haiku*)
+        archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+        link_all_deplibs_CXX=yes
+        ;;
+
+      hpux9*)
+        hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir'
+        hardcode_libdir_separator_CXX=:
+        export_dynamic_flag_spec_CXX='${wl}-E'
+        hardcode_direct_CXX=yes
+        hardcode_minus_L_CXX=yes # Not in the search PATH,
+                                            # but as the default
+                                            # location of the library.
+
+        case $cc_basename in
+          CC*)
+            # FIXME: insert proper C++ library support
+            ld_shlibs_CXX=no
+            ;;
+          aCC*)
+            archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            # Commands to make compiler produce verbose output that lists
+            # what "hidden" libraries, object files and flags are used when
+            # linking a shared library.
+            #
+            # There doesn't appear to be a way to prevent this compiler from
+            # explicitly linking system object files so we need to strip them
+            # from the output so that they don't get included in the library
+            # dependencies.
+            output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+            ;;
+          *)
+            if test "$GXX" = yes; then
+              archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+            else
+              # FIXME: insert proper C++ library support
+              ld_shlibs_CXX=no
+            fi
+            ;;
+        esac
+        ;;
+
+      hpux10*|hpux11*)
+        if test $with_gnu_ld = no; then
+         hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir'
+         hardcode_libdir_separator_CXX=:
+
+          case $host_cpu in
+            hppa*64*|ia64*)
+              ;;
+            *)
+             export_dynamic_flag_spec_CXX='${wl}-E'
+              ;;
+          esac
+        fi
+        case $host_cpu in
+          hppa*64*|ia64*)
+            hardcode_direct_CXX=no
+            hardcode_shlibpath_var_CXX=no
+            ;;
+          *)
+            hardcode_direct_CXX=yes
+            hardcode_direct_absolute_CXX=yes
+            hardcode_minus_L_CXX=yes # Not in the search PATH,
+                                                # but as the default
+                                                # location of the library.
+            ;;
+        esac
+
+        case $cc_basename in
+          CC*)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          aCC*)
+           case $host_cpu in
+             hppa*64*)
+               archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+               ;;
+             ia64*)
+               archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+               ;;
+             *)
+               archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+               ;;
+           esac
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+           ;;
+          *)
+           if test "$GXX" = yes; then
+             if test $with_gnu_ld = no; then
+               case $host_cpu in
+                 hppa*64*)
+                   archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+                   ;;
+                 ia64*)
+                   archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+                   ;;
+                 *)
+                   archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+                   ;;
+               esac
+             fi
+           else
+             # FIXME: insert proper C++ library support
+             ld_shlibs_CXX=no
+           fi
+           ;;
+        esac
+        ;;
+
+      interix[3-9]*)
+       hardcode_direct_CXX=no
+       hardcode_shlibpath_var_CXX=no
+       hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+       export_dynamic_flag_spec_CXX='${wl}-E'
+       # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+       # Instead, shared libraries are loaded at an image base (0x10000000 by
+       # default) and relocated if they conflict, which is a slow very memory
+       # consuming and fragmenting process.  To avoid this, we pick a random,
+       # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+       # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+       archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+       archive_expsym_cmds_CXX='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+       ;;
+      irix5* | irix6*)
+        case $cc_basename in
+          CC*)
+           # SGI C++
+           archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+
+           # Archives containing C++ object files must be created using
+           # "CC -ar", where "CC" is the IRIX C++ compiler.  This is
+           # necessary to make sure instantiated templates are included
+           # in the archive.
+           old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs'
+           ;;
+          *)
+           if test "$GXX" = yes; then
+             if test "$with_gnu_ld" = no; then
+               archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+             else
+               archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
+             fi
+           fi
+           link_all_deplibs_CXX=yes
+           ;;
+        esac
+        hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+        hardcode_libdir_separator_CXX=:
+        inherit_rpath_CXX=yes
+        ;;
+
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+        case $cc_basename in
+          KCC*)
+           # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+           # KCC will only create a shared library if the output file
+           # ends with ".so" (or ".sl" for HP-UX), so rename the library
+           # to its proper name (with version) after linking.
+           archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+           archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+
+           hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+           export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+
+           # Archives containing C++ object files must be created using
+           # "CC -Bstatic", where "CC" is the KAI C++ compiler.
+           old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs'
+           ;;
+         icpc* | ecpc* )
+           # Intel C++
+           with_gnu_ld=yes
+           # version 8.0 and above of icpc choke on multiply defined symbols
+           # if we add $predep_objects and $postdep_objects, however 7.1 and
+           # earlier do not add the objects themselves.
+           case `$CC -V 2>&1` in
+             *"Version 7."*)
+               archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+               archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+               ;;
+             *)  # Version 8.0 or newer
+               tmp_idyn=
+               case $host_cpu in
+                 ia64*) tmp_idyn=' -i_dynamic';;
+               esac
+               archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+               archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+               ;;
+           esac
+           archive_cmds_need_lc_CXX=no
+           hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+           export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+           whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+           ;;
+          pgCC* | pgcpp*)
+            # Portland Group C++ compiler
+           case `$CC -V` in
+           *pgCC\ [1-5].* | *pgcpp\ [1-5].*)
+             prelink_cmds_CXX='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
+               compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
+             old_archive_cmds_CXX='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
+               $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
+               $RANLIB $oldlib'
+             archive_cmds_CXX='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+               $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+             archive_expsym_cmds_CXX='tpldir=Template.dir~
+               rm -rf $tpldir~
+               $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
+               $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+             ;;
+           *) # Version 6 and above use weak symbols
+             archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+             archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+             ;;
+           esac
+
+           hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir'
+           export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+           whole_archive_flag_spec_CXX='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+            ;;
+         cxx*)
+           # Compaq C++
+           archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
+
+           runpath_var=LD_RUN_PATH
+           hardcode_libdir_flag_spec_CXX='-rpath $libdir'
+           hardcode_libdir_separator_CXX=:
+
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
+           ;;
+         xl* | mpixl* | bgxl*)
+           # IBM XL 8.0 on PPC, with GNU ld
+           hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+           export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
+           archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           if test "x$supports_anon_versioning" = xyes; then
+             archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~
+               cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+               echo "local: *; };" >> $output_objdir/$libname.ver~
+               $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+           fi
+           ;;
+         *)
+           case `$CC -V 2>&1 | sed 5q` in
+           *Sun\ C*)
+             # Sun C++ 5.9
+             no_undefined_flag_CXX=' -zdefs'
+             archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+             archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
+             hardcode_libdir_flag_spec_CXX='-R$libdir'
+             whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+             compiler_needs_object_CXX=yes
+
+             # Not sure whether something based on
+             # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
+             # would be better.
+             output_verbose_link_cmd='func_echo_all'
+
+             # Archives containing C++ object files must be created using
+             # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+             # necessary to make sure instantiated templates are included
+             # in the archive.
+             old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
+             ;;
+           esac
+           ;;
+       esac
+       ;;
+
+      lynxos*)
+        # FIXME: insert proper C++ library support
+       ld_shlibs_CXX=no
+       ;;
+
+      m88k*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+       ;;
+
+      mvs*)
+        case $cc_basename in
+          cxx*)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+         *)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+       esac
+       ;;
+
+      netbsd*)
+        if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+         archive_cmds_CXX='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
+         wlarc=
+         hardcode_libdir_flag_spec_CXX='-R$libdir'
+         hardcode_direct_CXX=yes
+         hardcode_shlibpath_var_CXX=no
+       fi
+       # Workaround some broken pre-1.5 toolchains
+       output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
+       ;;
+
+      *nto* | *qnx*)
+        ld_shlibs_CXX=yes
+       ;;
+
+      openbsd2*)
+        # C++ shared libraries are fairly broken
+       ld_shlibs_CXX=no
+       ;;
+
+      openbsd*)
+       if test -f /usr/libexec/ld.so; then
+         hardcode_direct_CXX=yes
+         hardcode_shlibpath_var_CXX=no
+         hardcode_direct_absolute_CXX=yes
+         archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+         hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+         if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+           archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+           export_dynamic_flag_spec_CXX='${wl}-E'
+           whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+         fi
+         output_verbose_link_cmd=func_echo_all
+       else
+         ld_shlibs_CXX=no
+       fi
+       ;;
+
+      osf3* | osf4* | osf5*)
+        case $cc_basename in
+          KCC*)
+           # Kuck and Associates, Inc. (KAI) C++ Compiler
+
+           # KCC will only create a shared library if the output file
+           # ends with ".so" (or ".sl" for HP-UX), so rename the library
+           # to its proper name (with version) after linking.
+           archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+           hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+           hardcode_libdir_separator_CXX=:
+
+           # Archives containing C++ object files must be created using
+           # the KAI C++ compiler.
+           case $host in
+             osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;;
+             *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;;
+           esac
+           ;;
+          RCC*)
+           # Rational C++ 2.4.1
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          cxx*)
+           case $host in
+             osf3*)
+               allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*'
+               archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+               hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+               ;;
+             *)
+               allow_undefined_flag_CXX=' -expect_unresolved \*'
+               archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+               archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
+                 echo "-hidden">> $lib.exp~
+                 $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp  `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
+                 $RM $lib.exp'
+               hardcode_libdir_flag_spec_CXX='-rpath $libdir'
+               ;;
+           esac
+
+           hardcode_libdir_separator_CXX=:
+
+           # Commands to make compiler produce verbose output that lists
+           # what "hidden" libraries, object files and flags are used when
+           # linking a shared library.
+           #
+           # There doesn't appear to be a way to prevent this compiler from
+           # explicitly linking system object files so we need to strip them
+           # from the output so that they don't get included in the library
+           # dependencies.
+           output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
+           ;;
+         *)
+           if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+             allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*'
+             case $host in
+               osf3*)
+                 archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+                 ;;
+               *)
+                 archive_cmds_CXX='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+                 ;;
+             esac
+
+             hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
+             hardcode_libdir_separator_CXX=:
+
+             # Commands to make compiler produce verbose output that lists
+             # what "hidden" libraries, object files and flags are used when
+             # linking a shared library.
+             output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+
+           else
+             # FIXME: insert proper C++ library support
+             ld_shlibs_CXX=no
+           fi
+           ;;
+        esac
+        ;;
+
+      psos*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+
+      sunos4*)
+        case $cc_basename in
+          CC*)
+           # Sun C++ 4.x
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          lcc*)
+           # Lucid
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          *)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+        esac
+        ;;
+
+      solaris*)
+        case $cc_basename in
+          CC* | sunCC*)
+           # Sun C++ 4.2, 5.x and Centerline C++
+            archive_cmds_need_lc_CXX=yes
+           no_undefined_flag_CXX=' -zdefs'
+           archive_cmds_CXX='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+           archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+             $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+           hardcode_libdir_flag_spec_CXX='-R$libdir'
+           hardcode_shlibpath_var_CXX=no
+           case $host_os in
+             solaris2.[0-5] | solaris2.[0-5].*) ;;
+             *)
+               # The compiler driver will combine and reorder linker options,
+               # but understands `-z linker_flag'.
+               # Supported since Solaris 2.6 (maybe 2.5.1?)
+               whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract'
+               ;;
+           esac
+           link_all_deplibs_CXX=yes
+
+           output_verbose_link_cmd='func_echo_all'
+
+           # Archives containing C++ object files must be created using
+           # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+           # necessary to make sure instantiated templates are included
+           # in the archive.
+           old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
+           ;;
+          gcx*)
+           # Green Hills C++ Compiler
+           archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+
+           # The C++ compiler must be used to create the archive.
+           old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
+           ;;
+          *)
+           # GNU C++ compiler with Solaris linker
+           if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+             no_undefined_flag_CXX=' ${wl}-z ${wl}defs'
+             if $CC --version | $GREP -v '^2\.7' > /dev/null; then
+               archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+               archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+                 $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+               # Commands to make compiler produce verbose output that lists
+               # what "hidden" libraries, object files and flags are used when
+               # linking a shared library.
+               output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+             else
+               # g++ 2.7 appears to require `-G' NOT `-shared' on this
+               # platform.
+               archive_cmds_CXX='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+               archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+                 $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
+
+               # Commands to make compiler produce verbose output that lists
+               # what "hidden" libraries, object files and flags are used when
+               # linking a shared library.
+               output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
+             fi
+
+             hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir'
+             case $host_os in
+               solaris2.[0-5] | solaris2.[0-5].*) ;;
+               *)
+                 whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+                 ;;
+             esac
+           fi
+           ;;
+        esac
+        ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
+      no_undefined_flag_CXX='${wl}-z,text'
+      archive_cmds_need_lc_CXX=no
+      hardcode_shlibpath_var_CXX=no
+      runpath_var='LD_RUN_PATH'
+
+      case $cc_basename in
+        CC*)
+         archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+         archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+      esac
+      ;;
+
+      sysv5* | sco3.2v5* | sco5v6*)
+       # Note: We can NOT use -z defs as we might desire, because we do not
+       # link with -lc, and that would cause any symbols used from libc to
+       # always be unresolved, which means just about no library would
+       # ever link correctly.  If we're not using GNU ld we use -z text
+       # though, which does catch some bad symbols but isn't as heavy-handed
+       # as -z defs.
+       no_undefined_flag_CXX='${wl}-z,text'
+       allow_undefined_flag_CXX='${wl}-z,nodefs'
+       archive_cmds_need_lc_CXX=no
+       hardcode_shlibpath_var_CXX=no
+       hardcode_libdir_flag_spec_CXX='${wl}-R,$libdir'
+       hardcode_libdir_separator_CXX=':'
+       link_all_deplibs_CXX=yes
+       export_dynamic_flag_spec_CXX='${wl}-Bexport'
+       runpath_var='LD_RUN_PATH'
+
+       case $cc_basename in
+          CC*)
+           archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~
+             '"$old_archive_cmds_CXX"
+           reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~
+             '"$reload_cmds_CXX"
+           ;;
+         *)
+           archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+           ;;
+       esac
+      ;;
+
+      tandem*)
+        case $cc_basename in
+          NCC*)
+           # NonStop-UX NCC 3.20
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+          *)
+           # FIXME: insert proper C++ library support
+           ld_shlibs_CXX=no
+           ;;
+        esac
+        ;;
+
+      vxworks*)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+
+      *)
+        # FIXME: insert proper C++ library support
+        ld_shlibs_CXX=no
+        ;;
+    esac
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5
+$as_echo "$ld_shlibs_CXX" >&6; }
+    test "$ld_shlibs_CXX" = no && can_build_shared=no
+
+    GCC_CXX="$GXX"
+    LD_CXX="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    # Dependencies to place before and after the object being linked:
+predep_objects_CXX=
+postdep_objects_CXX=
+predeps_CXX=
+postdeps_CXX=
+compiler_lib_search_path_CXX=
+
+cat > conftest.$ac_ext <<_LT_EOF
+class Foo
+{
+public:
+  Foo (void) { a = 0; }
+private:
+  int a;
+};
+_LT_EOF
+
+
+_lt_libdeps_save_CFLAGS=$CFLAGS
+case "$CC $CFLAGS " in #(
+*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
+*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
+esac
+
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  # Parse the compiler output and extract the necessary
+  # objects, libraries and library flags.
+
+  # Sentinel used to keep track of whether or not we are before
+  # the conftest object file.
+  pre_test_object_deps_done=no
+
+  for p in `eval "$output_verbose_link_cmd"`; do
+    case ${prev}${p} in
+
+    -L* | -R* | -l*)
+       # Some compilers place space between "-{L,R}" and the path.
+       # Remove the space.
+       if test $p = "-L" ||
+          test $p = "-R"; then
+        prev=$p
+        continue
+       fi
+
+       # Expand the sysroot to ease extracting the directories later.
+       if test -z "$prev"; then
+         case $p in
+         -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
+         -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
+         -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
+         esac
+       fi
+       case $p in
+       =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
+       esac
+       if test "$pre_test_object_deps_done" = no; then
+        case ${prev} in
+        -L | -R)
+          # Internal compiler library paths should come after those
+          # provided the user.  The postdeps already come after the
+          # user supplied libs so there is no need to process them.
+          if test -z "$compiler_lib_search_path_CXX"; then
+            compiler_lib_search_path_CXX="${prev}${p}"
+          else
+            compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} ${prev}${p}"
+          fi
+          ;;
+        # The "-l" case would never come before the object being
+        # linked, so don't bother handling this case.
+        esac
+       else
+        if test -z "$postdeps_CXX"; then
+          postdeps_CXX="${prev}${p}"
+        else
+          postdeps_CXX="${postdeps_CXX} ${prev}${p}"
+        fi
+       fi
+       prev=
+       ;;
+
+    *.lto.$objext) ;; # Ignore GCC LTO objects
+    *.$objext)
+       # This assumes that the test object file only shows up
+       # once in the compiler output.
+       if test "$p" = "conftest.$objext"; then
+        pre_test_object_deps_done=yes
+        continue
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+        if test -z "$predep_objects_CXX"; then
+          predep_objects_CXX="$p"
+        else
+          predep_objects_CXX="$predep_objects_CXX $p"
+        fi
+       else
+        if test -z "$postdep_objects_CXX"; then
+          postdep_objects_CXX="$p"
+        else
+          postdep_objects_CXX="$postdep_objects_CXX $p"
+        fi
+       fi
+       ;;
+
+    *) ;; # Ignore the rest.
+
+    esac
+  done
+
+  # Clean up.
+  rm -f a.out a.exe
+else
+  echo "libtool.m4: error: problem compiling CXX test program"
+fi
+
+$RM -f confest.$objext
+CFLAGS=$_lt_libdeps_save_CFLAGS
+
+# PORTME: override above test on systems where it is broken
+case $host_os in
+interix[3-9]*)
+  # Interix 3.5 installs completely hosed .la files for C++, so rather than
+  # hack all around it, let's just trust "g++" to DTRT.
+  predep_objects_CXX=
+  postdep_objects_CXX=
+  postdeps_CXX=
+  ;;
+
+linux*)
+  case `$CC -V 2>&1 | sed 5q` in
+  *Sun\ C*)
+    # Sun C++ 5.9
+
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    if test "$solaris_use_stlport4" != yes; then
+      postdeps_CXX='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+
+solaris*)
+  case $cc_basename in
+  CC* | sunCC*)
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
+    # Adding this requires a known-good setup of shared libraries for
+    # Sun compiler versions before 5.6, else PIC objects from an old
+    # archive will be linked into the output, leading to subtle bugs.
+    if test "$solaris_use_stlport4" != yes; then
+      postdeps_CXX='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+esac
+
+
+case " $postdeps_CXX " in
+*" -lc "*) archive_cmds_need_lc_CXX=no ;;
+esac
+ compiler_lib_search_dirs_CXX=
+if test -n "${compiler_lib_search_path_CXX}"; then
+ compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    lt_prog_compiler_wl_CXX=
+lt_prog_compiler_pic_CXX=
+lt_prog_compiler_static_CXX=
+
+
+  # C++ specific cases for pic, static, wl, etc.
+  if test "$GXX" = yes; then
+    lt_prog_compiler_wl_CXX='-Wl,'
+    lt_prog_compiler_static_CXX='-static'
+
+    case $host_os in
+    aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static_CXX='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            lt_prog_compiler_pic_CXX='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    mingw* | cygwin* | os2* | pw32* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_prog_compiler_pic_CXX='-fno-common'
+      ;;
+    *djgpp*)
+      # DJGPP does not support shared libraries at all
+      lt_prog_compiler_pic_CXX=
+      ;;
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      lt_prog_compiler_static_CXX=
+      ;;
+    interix[3-9]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       lt_prog_compiler_pic_CXX=-Kconform_pic
+      fi
+      ;;
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+       ;;
+      *)
+       lt_prog_compiler_pic_CXX='-fPIC'
+       ;;
+      esac
+      ;;
+    *qnx* | *nto*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic_CXX='-fPIC -shared'
+      ;;
+    *)
+      lt_prog_compiler_pic_CXX='-fPIC'
+      ;;
+    esac
+  else
+    case $host_os in
+      aix[4-9]*)
+       # All AIX code is PIC.
+       if test "$host_cpu" = ia64; then
+         # AIX 5 now supports IA64 processor
+         lt_prog_compiler_static_CXX='-Bstatic'
+       else
+         lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp'
+       fi
+       ;;
+      chorus*)
+       case $cc_basename in
+       cxch68*)
+         # Green Hills C++ Compiler
+         # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
+         ;;
+       esac
+       ;;
+      mingw* | cygwin* | os2* | pw32* | cegcc*)
+       # This hack is so that the source file can tell whether it is being
+       # built for inclusion in a dll (and should export symbols for example).
+       lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
+       ;;
+      dgux*)
+       case $cc_basename in
+         ec++*)
+           lt_prog_compiler_pic_CXX='-KPIC'
+           ;;
+         ghcx*)
+           # Green Hills C++ Compiler
+           lt_prog_compiler_pic_CXX='-pic'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      freebsd* | dragonfly*)
+       # FreeBSD uses GNU C++
+       ;;
+      hpux9* | hpux10* | hpux11*)
+       case $cc_basename in
+         CC*)
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_static_CXX='${wl}-a ${wl}archive'
+           if test "$host_cpu" != ia64; then
+             lt_prog_compiler_pic_CXX='+Z'
+           fi
+           ;;
+         aCC*)
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_static_CXX='${wl}-a ${wl}archive'
+           case $host_cpu in
+           hppa*64*|ia64*)
+             # +Z the default
+             ;;
+           *)
+             lt_prog_compiler_pic_CXX='+Z'
+             ;;
+           esac
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      interix*)
+       # This is c89, which is MS Visual C++ (no shared libs)
+       # Anyone wants to do a port?
+       ;;
+      irix5* | irix6* | nonstopux*)
+       case $cc_basename in
+         CC*)
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_static_CXX='-non_shared'
+           # CC pic flag -KPIC is the default.
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      linux* | k*bsd*-gnu | kopensolaris*-gnu)
+       case $cc_basename in
+         KCC*)
+           # KAI C++ Compiler
+           lt_prog_compiler_wl_CXX='--backend -Wl,'
+           lt_prog_compiler_pic_CXX='-fPIC'
+           ;;
+         ecpc* )
+           # old Intel C++ for x86_64 which still supported -KPIC.
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_pic_CXX='-KPIC'
+           lt_prog_compiler_static_CXX='-static'
+           ;;
+         icpc* )
+           # Intel C++, used to be incompatible with GCC.
+           # ICC 10 doesn't accept -KPIC any more.
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_pic_CXX='-fPIC'
+           lt_prog_compiler_static_CXX='-static'
+           ;;
+         pgCC* | pgcpp*)
+           # Portland Group C++ compiler
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_pic_CXX='-fpic'
+           lt_prog_compiler_static_CXX='-Bstatic'
+           ;;
+         cxx*)
+           # Compaq C++
+           # Make sure the PIC flag is empty.  It appears that all Alpha
+           # Linux and Compaq Tru64 Unix objects are PIC.
+           lt_prog_compiler_pic_CXX=
+           lt_prog_compiler_static_CXX='-non_shared'
+           ;;
+         xlc* | xlC* | bgxl[cC]* | mpixl[cC]*)
+           # IBM XL 8.0, 9.0 on PPC and BlueGene
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_pic_CXX='-qpic'
+           lt_prog_compiler_static_CXX='-qstaticlink'
+           ;;
+         *)
+           case `$CC -V 2>&1 | sed 5q` in
+           *Sun\ C*)
+             # Sun C++ 5.9
+             lt_prog_compiler_pic_CXX='-KPIC'
+             lt_prog_compiler_static_CXX='-Bstatic'
+             lt_prog_compiler_wl_CXX='-Qoption ld '
+             ;;
+           esac
+           ;;
+       esac
+       ;;
+      lynxos*)
+       ;;
+      m88k*)
+       ;;
+      mvs*)
+       case $cc_basename in
+         cxx*)
+           lt_prog_compiler_pic_CXX='-W c,exportall'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      netbsd*)
+       ;;
+      *qnx* | *nto*)
+        # QNX uses GNU C++, but need to define -shared option too, otherwise
+        # it will coredump.
+        lt_prog_compiler_pic_CXX='-fPIC -shared'
+        ;;
+      osf3* | osf4* | osf5*)
+       case $cc_basename in
+         KCC*)
+           lt_prog_compiler_wl_CXX='--backend -Wl,'
+           ;;
+         RCC*)
+           # Rational C++ 2.4.1
+           lt_prog_compiler_pic_CXX='-pic'
+           ;;
+         cxx*)
+           # Digital/Compaq C++
+           lt_prog_compiler_wl_CXX='-Wl,'
+           # Make sure the PIC flag is empty.  It appears that all Alpha
+           # Linux and Compaq Tru64 Unix objects are PIC.
+           lt_prog_compiler_pic_CXX=
+           lt_prog_compiler_static_CXX='-non_shared'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      psos*)
+       ;;
+      solaris*)
+       case $cc_basename in
+         CC* | sunCC*)
+           # Sun C++ 4.2, 5.x and Centerline C++
+           lt_prog_compiler_pic_CXX='-KPIC'
+           lt_prog_compiler_static_CXX='-Bstatic'
+           lt_prog_compiler_wl_CXX='-Qoption ld '
+           ;;
+         gcx*)
+           # Green Hills C++ Compiler
+           lt_prog_compiler_pic_CXX='-PIC'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      sunos4*)
+       case $cc_basename in
+         CC*)
+           # Sun C++ 4.x
+           lt_prog_compiler_pic_CXX='-pic'
+           lt_prog_compiler_static_CXX='-Bstatic'
+           ;;
+         lcc*)
+           # Lucid
+           lt_prog_compiler_pic_CXX='-pic'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+       case $cc_basename in
+         CC*)
+           lt_prog_compiler_wl_CXX='-Wl,'
+           lt_prog_compiler_pic_CXX='-KPIC'
+           lt_prog_compiler_static_CXX='-Bstatic'
+           ;;
+       esac
+       ;;
+      tandem*)
+       case $cc_basename in
+         NCC*)
+           # NonStop-UX NCC 3.20
+           lt_prog_compiler_pic_CXX='-KPIC'
+           ;;
+         *)
+           ;;
+       esac
+       ;;
+      vxworks*)
+       ;;
+      *)
+       lt_prog_compiler_can_build_shared_CXX=no
+       ;;
+    esac
+  fi
+
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    lt_prog_compiler_pic_CXX=
+    ;;
+  *)
+    lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC"
+    ;;
+esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
+$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
+if test "${lt_cv_prog_compiler_pic_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_pic_CXX" >&6; }
+lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$lt_prog_compiler_pic_CXX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5
+$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; }
+if test "${lt_cv_prog_compiler_pic_works_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_works_CXX=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_pic_works_CXX=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_pic_works_CXX" >&6; }
+
+if test x"$lt_cv_prog_compiler_pic_works_CXX" = xyes; then
+    case $lt_prog_compiler_pic_CXX in
+     "" | " "*) ;;
+     *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;;
+     esac
+else
+    lt_prog_compiler_pic_CXX=
+     lt_prog_compiler_can_build_shared_CXX=no
+fi
+
+fi
+
+
+
+
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
+$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
+if test "${lt_cv_prog_compiler_static_works_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_static_works_CXX=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler_static_works_CXX=yes
+       fi
+     else
+       lt_cv_prog_compiler_static_works_CXX=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_static_works_CXX" >&6; }
+
+if test x"$lt_cv_prog_compiler_static_works_CXX" = xyes; then
+    :
+else
+    lt_prog_compiler_static_CXX=
+fi
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o_CXX=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o_CXX=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o_CXX=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o_CXX=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5
+$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
+
+
+
+
+hard_links="nottested"
+if test "$lt_cv_prog_compiler_c_o_CXX" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
+$as_echo_n "checking if we can lock with hard links... " >&6; }
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
+$as_echo "$hard_links" >&6; }
+  if test "$hard_links" = no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
+$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+
+  export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
+  case $host_os in
+  aix[4-9]*)
+    # If we're using GNU nm, then we don't want the "-C" option.
+    # -C means demangle to AIX nm, but means don't demangle with GNU nm
+    # Also, AIX nm treats weak defined symbols like other global defined
+    # symbols, whereas GNU nm marks them as "W".
+    if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+      export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    else
+      export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+    fi
+    ;;
+  pw32*)
+    export_symbols_cmds_CXX="$ltdll_cmds"
+    ;;
+  cygwin* | mingw* | cegcc*)
+    case $cc_basename in
+    cl*)
+      exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+      ;;
+    *)
+      export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
+      exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
+      ;;
+    esac
+    ;;
+  *)
+    export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+    ;;
+  esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5
+$as_echo "$ld_shlibs_CXX" >&6; }
+test "$ld_shlibs_CXX" = no && can_build_shared=no
+
+with_gnu_ld_CXX=$with_gnu_ld
+
+
+
+
+
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$archive_cmds_need_lc_CXX" in
+x|xyes)
+  # Assume -lc should be added
+  archive_cmds_need_lc_CXX=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $archive_cmds_CXX in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
+$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
+if test "${lt_cv_archive_cmds_need_lc_CXX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  $RM conftest*
+       echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+       if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } 2>conftest.err; then
+         soname=conftest
+         lib=conftest
+         libobjs=conftest.$ac_objext
+         deplibs=
+         wl=$lt_prog_compiler_wl_CXX
+         pic_flag=$lt_prog_compiler_pic_CXX
+         compiler_flags=-v
+         linker_flags=-v
+         verstring=
+         output_objdir=.
+         libname=conftest
+         lt_save_allow_undefined_flag=$allow_undefined_flag_CXX
+         allow_undefined_flag_CXX=
+         if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
+  (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+         then
+           lt_cv_archive_cmds_need_lc_CXX=no
+         else
+           lt_cv_archive_cmds_need_lc_CXX=yes
+         fi
+         allow_undefined_flag_CXX=$lt_save_allow_undefined_flag
+       else
+         cat conftest.err 1>&5
+       fi
+       $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5
+$as_echo "$lt_cv_archive_cmds_need_lc_CXX" >&6; }
+      archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
+$as_echo_n "checking dynamic linker characteristics... " >&6; }
+
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[4-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+          echo ' yes '
+          echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+       :
+      else
+       can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[23].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[3-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+       if test "$lt_cv_prog_gnu_ld" = yes; then
+               version_type=linux # correct to gnu/linux during the next big refactor
+       else
+               version_type=irix
+       fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \
+        LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\""
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  if  ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
+  lt_cv_shlibpath_overrides_runpath=yes
+fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+
+fi
+
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[      ]*hwcap[        ]/d;s/[:,      ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+       shlibpath_overrides_runpath=no
+       ;;
+      *)
+       shlibpath_overrides_runpath=yes
+       ;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+       ;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
+$as_echo "$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
+$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
+hardcode_action_CXX=
+if test -n "$hardcode_libdir_flag_spec_CXX" ||
+   test -n "$runpath_var_CXX" ||
+   test "X$hardcode_automatic_CXX" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$hardcode_direct_CXX" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" != no &&
+     test "$hardcode_minus_L_CXX" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action_CXX=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action_CXX=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action_CXX=unsupported
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5
+$as_echo "$hardcode_action_CXX" >&6; }
+
+if test "$hardcode_action_CXX" = relink ||
+   test "$inherit_rpath_CXX" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+
+
+
+
+
+
+  fi # test -n "$compiler"
+
+  CC=$lt_save_CC
+  CFLAGS=$lt_save_CFLAGS
+  LDCXX=$LD
+  LD=$lt_save_LD
+  GCC=$lt_save_GCC
+  with_gnu_ld=$lt_save_with_gnu_ld
+  lt_cv_path_LDCXX=$lt_cv_path_LD
+  lt_cv_path_LD=$lt_save_path_LD
+  lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
+  lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
+fi # test "$_lt_caught_CXX_error" != yes
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+      ac_ext=f
+ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5'
+ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_f77_compiler_gnu
+
+if test -z "$F77" || test "X$F77" = "Xno"; then
+  _lt_disable_F77=yes
+fi
+
+archive_cmds_need_lc_F77=no
+allow_undefined_flag_F77=
+always_export_symbols_F77=no
+archive_expsym_cmds_F77=
+export_dynamic_flag_spec_F77=
+hardcode_direct_F77=no
+hardcode_direct_absolute_F77=no
+hardcode_libdir_flag_spec_F77=
+hardcode_libdir_separator_F77=
+hardcode_minus_L_F77=no
+hardcode_automatic_F77=no
+inherit_rpath_F77=no
+module_cmds_F77=
+module_expsym_cmds_F77=
+link_all_deplibs_F77=unknown
+old_archive_cmds_F77=$old_archive_cmds
+reload_flag_F77=$reload_flag
+reload_cmds_F77=$reload_cmds
+no_undefined_flag_F77=
+whole_archive_flag_spec_F77=
+enable_shared_with_static_runtimes_F77=no
+
+# Source file extension for f77 test sources.
+ac_ext=f
+
+# Object file extension for compiled f77 test sources.
+objext=o
+objext_F77=$objext
+
+# No sense in running all these tests if we already determined that
+# the F77 compiler isn't working.  Some variables (like enable_shared)
+# are currently assumed to apply to all compilers on this platform,
+# and will be corrupted by setting them based on a non-working compiler.
+if test "$_lt_disable_F77" != yes; then
+  # Code to be used in simple compile tests
+  lt_simple_compile_test_code="\
+      subroutine t
+      return
+      end
+"
+
+  # Code to be used in simple link tests
+  lt_simple_link_test_code="\
+      program t
+      end
+"
+
+  # ltmain only uses $CC for tagged configurations so make sure $CC is set.
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+  # save warnings/boilerplate of simple test code
+  ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+  ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+  # Allow CC to be a program name with arguments.
+  lt_save_CC="$CC"
+  lt_save_GCC=$GCC
+  lt_save_CFLAGS=$CFLAGS
+  CC=${F77-"f77"}
+  CFLAGS=$FFLAGS
+  compiler=$CC
+  compiler_F77=$CC
+  for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+  GCC=$G77
+  if test -n "$compiler"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5
+$as_echo_n "checking if libtool supports shared libraries... " >&6; }
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5
+$as_echo "$can_build_shared" >&6; }
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5
+$as_echo_n "checking whether to build shared libraries... " >&6; }
+    test "$can_build_shared" = "no" && enable_shared=no
+
+    # On AIX, shared libraries and static libraries use the same namespace, and
+    # are all built from PIC.
+    case $host_os in
+      aix3*)
+        test "$enable_shared" = yes && enable_static=no
+        if test -n "$RANLIB"; then
+          archive_cmds="$archive_cmds~\$RANLIB \$lib"
+          postinstall_cmds='$RANLIB $lib'
+        fi
+        ;;
+      aix[4-9]*)
+       if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+         test "$enable_shared" = yes && enable_static=no
+       fi
+        ;;
+    esac
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5
+$as_echo "$enable_shared" >&6; }
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5
+$as_echo_n "checking whether to build static libraries... " >&6; }
+    # Make sure either enable_shared or enable_static is yes.
+    test "$enable_shared" = yes || enable_static=yes
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5
+$as_echo "$enable_static" >&6; }
+
+    GCC_F77="$G77"
+    LD_F77="$LD"
+
+    ## CAVEAT EMPTOR:
+    ## There is no encapsulation within the following macros, do not change
+    ## the running order or otherwise move them around unless you know exactly
+    ## what you are doing...
+    lt_prog_compiler_wl_F77=
+lt_prog_compiler_pic_F77=
+lt_prog_compiler_static_F77=
+
+
+  if test "$GCC" = yes; then
+    lt_prog_compiler_wl_F77='-Wl,'
+    lt_prog_compiler_static_F77='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static_F77='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            lt_prog_compiler_pic_F77='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything better,
+            # like `-m68040'.
+            lt_prog_compiler_pic_F77='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      lt_prog_compiler_pic_F77='-DDLL_EXPORT'
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_prog_compiler_pic_F77='-fno-common'
+      ;;
+
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      lt_prog_compiler_static_F77=
+      ;;
+
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+       # +Z the default
+       ;;
+      *)
+       lt_prog_compiler_pic_F77='-fPIC'
+       ;;
+      esac
+      ;;
+
+    interix[3-9]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      lt_prog_compiler_can_build_shared_F77=no
+      enable_shared=no
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic_F77='-fPIC -shared'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       lt_prog_compiler_pic_F77=-Kconform_pic
+      fi
+      ;;
+
+    *)
+      lt_prog_compiler_pic_F77='-fPIC'
+      ;;
+    esac
+
+    case $cc_basename in
+    nvcc*) # Cuda Compiler Driver 2.2
+      lt_prog_compiler_wl_F77='-Xlinker '
+      if test -n "$lt_prog_compiler_pic_F77"; then
+        lt_prog_compiler_pic_F77="-Xcompiler $lt_prog_compiler_pic_F77"
+      fi
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static_F77='-Bstatic'
+      else
+       lt_prog_compiler_static_F77='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_prog_compiler_pic_F77='-DDLL_EXPORT'
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+       # +Z the default
+       ;;
+      *)
+       lt_prog_compiler_pic_F77='+Z'
+       ;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      lt_prog_compiler_static_F77='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      # PIC (with -KPIC) is the default.
+      lt_prog_compiler_static_F77='-non_shared'
+      ;;
+
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+      # old Intel for x86_64 which still supported -KPIC.
+      ecc*)
+       lt_prog_compiler_wl_F77='-Wl,'
+       lt_prog_compiler_pic_F77='-KPIC'
+       lt_prog_compiler_static_F77='-static'
+        ;;
+      # icc used to be incompatible with GCC.
+      # ICC 10 doesn't accept -KPIC any more.
+      icc* | ifort*)
+       lt_prog_compiler_wl_F77='-Wl,'
+       lt_prog_compiler_pic_F77='-fPIC'
+       lt_prog_compiler_static_F77='-static'
+        ;;
+      # Lahey Fortran 8.1.
+      lf95*)
+       lt_prog_compiler_wl_F77='-Wl,'
+       lt_prog_compiler_pic_F77='--shared'
+       lt_prog_compiler_static_F77='--static'
+       ;;
+      nagfor*)
+       # NAG Fortran compiler
+       lt_prog_compiler_wl_F77='-Wl,-Wl,,'
+       lt_prog_compiler_pic_F77='-PIC'
+       lt_prog_compiler_static_F77='-Bstatic'
+       ;;
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+       # which looks to be a dead project)
+       lt_prog_compiler_wl_F77='-Wl,'
+       lt_prog_compiler_pic_F77='-fpic'
+       lt_prog_compiler_static_F77='-Bstatic'
+        ;;
+      ccc*)
+        lt_prog_compiler_wl_F77='-Wl,'
+        # All Alpha code is PIC.
+        lt_prog_compiler_static_F77='-non_shared'
+        ;;
+      xl* | bgxl* | bgf* | mpixl*)
+       # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
+       lt_prog_compiler_wl_F77='-Wl,'
+       lt_prog_compiler_pic_F77='-qpic'
+       lt_prog_compiler_static_F77='-qstaticlink'
+       ;;
+      *)
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
+         # Sun Fortran 8.3 passes all unrecognized flags to the linker
+         lt_prog_compiler_pic_F77='-KPIC'
+         lt_prog_compiler_static_F77='-Bstatic'
+         lt_prog_compiler_wl_F77=''
+         ;;
+       *Sun\ F* | *Sun*Fortran*)
+         lt_prog_compiler_pic_F77='-KPIC'
+         lt_prog_compiler_static_F77='-Bstatic'
+         lt_prog_compiler_wl_F77='-Qoption ld '
+         ;;
+       *Sun\ C*)
+         # Sun C 5.9
+         lt_prog_compiler_pic_F77='-KPIC'
+         lt_prog_compiler_static_F77='-Bstatic'
+         lt_prog_compiler_wl_F77='-Wl,'
+         ;;
+        *Intel*\ [CF]*Compiler*)
+         lt_prog_compiler_wl_F77='-Wl,'
+         lt_prog_compiler_pic_F77='-fPIC'
+         lt_prog_compiler_static_F77='-static'
+         ;;
+       *Portland\ Group*)
+         lt_prog_compiler_wl_F77='-Wl,'
+         lt_prog_compiler_pic_F77='-fpic'
+         lt_prog_compiler_static_F77='-Bstatic'
+         ;;
+       esac
+       ;;
+      esac
+      ;;
+
+    newsos6)
+      lt_prog_compiler_pic_F77='-KPIC'
+      lt_prog_compiler_static_F77='-Bstatic'
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic_F77='-fPIC -shared'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      # All OSF/1 code is PIC.
+      lt_prog_compiler_static_F77='-non_shared'
+      ;;
+
+    rdos*)
+      lt_prog_compiler_static_F77='-non_shared'
+      ;;
+
+    solaris*)
+      lt_prog_compiler_pic_F77='-KPIC'
+      lt_prog_compiler_static_F77='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+       lt_prog_compiler_wl_F77='-Qoption ld ';;
+      *)
+       lt_prog_compiler_wl_F77='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      lt_prog_compiler_wl_F77='-Qoption ld '
+      lt_prog_compiler_pic_F77='-PIC'
+      lt_prog_compiler_static_F77='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      lt_prog_compiler_pic_F77='-KPIC'
+      lt_prog_compiler_static_F77='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+       lt_prog_compiler_pic_F77='-Kconform_pic'
+       lt_prog_compiler_static_F77='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      lt_prog_compiler_pic_F77='-KPIC'
+      lt_prog_compiler_static_F77='-Bstatic'
+      ;;
+
+    unicos*)
+      lt_prog_compiler_wl_F77='-Wl,'
+      lt_prog_compiler_can_build_shared_F77=no
+      ;;
+
+    uts4*)
+      lt_prog_compiler_pic_F77='-pic'
+      lt_prog_compiler_static_F77='-Bstatic'
+      ;;
+
+    *)
+      lt_prog_compiler_can_build_shared_F77=no
+      ;;
+    esac
+  fi
+
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    lt_prog_compiler_pic_F77=
+    ;;
+  *)
+    lt_prog_compiler_pic_F77="$lt_prog_compiler_pic_F77"
+    ;;
+esac
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
+$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
+if test "${lt_cv_prog_compiler_pic_F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_F77=$lt_prog_compiler_pic_F77
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_F77" >&5
+$as_echo "$lt_cv_prog_compiler_pic_F77" >&6; }
+lt_prog_compiler_pic_F77=$lt_cv_prog_compiler_pic_F77
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$lt_prog_compiler_pic_F77"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works" >&5
+$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works... " >&6; }
+if test "${lt_cv_prog_compiler_pic_works_F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_pic_works_F77=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$lt_prog_compiler_pic_F77"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_pic_works_F77=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_F77" >&5
+$as_echo "$lt_cv_prog_compiler_pic_works_F77" >&6; }
+
+if test x"$lt_cv_prog_compiler_pic_works_F77" = xyes; then
+    case $lt_prog_compiler_pic_F77 in
+     "" | " "*) ;;
+     *) lt_prog_compiler_pic_F77=" $lt_prog_compiler_pic_F77" ;;
+     esac
+else
+    lt_prog_compiler_pic_F77=
+     lt_prog_compiler_can_build_shared_F77=no
+fi
+
+fi
+
+
+
+
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$lt_prog_compiler_wl_F77 eval lt_tmp_static_flag=\"$lt_prog_compiler_static_F77\"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
+$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
+if test "${lt_cv_prog_compiler_static_works_F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_static_works_F77=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler_static_works_F77=yes
+       fi
+     else
+       lt_cv_prog_compiler_static_works_F77=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_F77" >&5
+$as_echo "$lt_cv_prog_compiler_static_works_F77" >&6; }
+
+if test x"$lt_cv_prog_compiler_static_works_F77" = xyes; then
+    :
+else
+    lt_prog_compiler_static_F77=
+fi
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test "${lt_cv_prog_compiler_c_o_F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o_F77=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o_F77=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5
+$as_echo "$lt_cv_prog_compiler_c_o_F77" >&6; }
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
+$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test "${lt_cv_prog_compiler_c_o_F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_prog_compiler_c_o_F77=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       lt_cv_prog_compiler_c_o_F77=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5
+$as_echo "$lt_cv_prog_compiler_c_o_F77" >&6; }
+
+
+
+
+hard_links="nottested"
+if test "$lt_cv_prog_compiler_c_o_F77" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
+$as_echo_n "checking if we can lock with hard links... " >&6; }
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
+$as_echo "$hard_links" >&6; }
+  if test "$hard_links" = no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
+$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
+$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
+
+  runpath_var=
+  allow_undefined_flag_F77=
+  always_export_symbols_F77=no
+  archive_cmds_F77=
+  archive_expsym_cmds_F77=
+  compiler_needs_object_F77=no
+  enable_shared_with_static_runtimes_F77=no
+  export_dynamic_flag_spec_F77=
+  export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  hardcode_automatic_F77=no
+  hardcode_direct_F77=no
+  hardcode_direct_absolute_F77=no
+  hardcode_libdir_flag_spec_F77=
+  hardcode_libdir_separator_F77=
+  hardcode_minus_L_F77=no
+  hardcode_shlibpath_var_F77=unsupported
+  inherit_rpath_F77=no
+  link_all_deplibs_F77=unknown
+  module_cmds_F77=
+  module_expsym_cmds_F77=
+  old_archive_from_new_cmds_F77=
+  old_archive_from_expsyms_cmds_F77=
+  thread_safe_flag_spec_F77=
+  whole_archive_flag_spec_F77=
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  include_expsyms_F77=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  exclude_expsyms_F77='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  # Exclude shared library initialization/finalization symbols.
+  extract_expsyms_cmds=
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  ld_shlibs_F77=yes
+
+  # On some targets, GNU ld is compatible enough with the native linker
+  # that we're better off using the native interface for both.
+  lt_use_gnu_ld_interface=no
+  if test "$with_gnu_ld" = yes; then
+    case $host_os in
+      aix*)
+       # The AIX port of GNU ld has always aspired to compatibility
+       # with the native linker.  However, as the warning in the GNU ld
+       # block says, versions before 2.19.5* couldn't really create working
+       # shared libraries, regardless of the interface used.
+       case `$LD -v 2>&1` in
+         *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
+         *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;;
+         *\ \(GNU\ Binutils\)\ [3-9]*) ;;
+         *)
+           lt_use_gnu_ld_interface=yes
+           ;;
+       esac
+       ;;
+      *)
+       lt_use_gnu_ld_interface=yes
+       ;;
+    esac
+  fi
+
+  if test "$lt_use_gnu_ld_interface" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+    export_dynamic_flag_spec_F77='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
+      whole_archive_flag_spec_F77="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      whole_archive_flag_spec_F77=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>&1` in
+      *GNU\ gold*) supports_anon_versioning=yes ;;
+      *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix[3-9]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+       ld_shlibs_F77=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.19, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to install binutils
+*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
+*** You will then need to restart the configuration process.
+
+_LT_EOF
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds_F77=''
+        ;;
+      m68k)
+            archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec_F77='-L$libdir'
+            hardcode_minus_L_F77=yes
+        ;;
+      esac
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       allow_undefined_flag_F77=unsupported
+       # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+       # support --undefined.  This deserves some investigation.  FIXME
+       archive_cmds_F77='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+       ld_shlibs_F77=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # _LT_TAGVAR(hardcode_libdir_flag_spec, F77) is actually meaningless,
+      # as there is no search path for DLLs.
+      hardcode_libdir_flag_spec_F77='-L$libdir'
+      export_dynamic_flag_spec_F77='${wl}--export-all-symbols'
+      allow_undefined_flag_F77=unsupported
+      always_export_symbols_F77=no
+      enable_shared_with_static_runtimes_F77=yes
+      export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
+      exclude_expsyms_F77='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
+
+      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+        archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+       # If the export-symbols file already is a .def file (1st line
+       # is EXPORTS), use it as is; otherwise, prepend...
+       archive_expsym_cmds_F77='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+         cp $export_symbols $output_objdir/$soname.def;
+       else
+         echo EXPORTS > $output_objdir/$soname.def;
+         cat $export_symbols >> $output_objdir/$soname.def;
+       fi~
+       $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+       ld_shlibs_F77=no
+      fi
+      ;;
+
+    haiku*)
+      archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      link_all_deplibs_F77=yes
+      ;;
+
+    interix[3-9]*)
+      hardcode_direct_F77=no
+      hardcode_shlibpath_var_F77=no
+      hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
+      export_dynamic_flag_spec_F77='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      archive_expsym_cmds_F77='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      tmp_diet=no
+      if test "$host_os" = linux-dietlibc; then
+       case $cc_basename in
+         diet\ *) tmp_diet=yes;;       # linux-dietlibc with static linking (!diet-dyn)
+       esac
+      fi
+      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
+        && test "$tmp_diet" = no
+      then
+       tmp_addflag=' $pic_flag'
+       tmp_sharedflag='-shared'
+       case $cc_basename,$host_cpu in
+        pgcc*)                         # Portland Group C compiler
+         whole_archive_flag_spec_F77='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag'
+         ;;
+       pgf77* | pgf90* | pgf95* | pgfortran*)
+                                       # Portland Group f77 and f90 compilers
+         whole_archive_flag_spec_F77='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag -Mnomain' ;;
+       ecc*,ia64* | icc*,ia64*)        # Intel C compiler on ia64
+         tmp_addflag=' -i_dynamic' ;;
+       efc*,ia64* | ifort*,ia64*)      # Intel Fortran compiler on ia64
+         tmp_addflag=' -i_dynamic -nofor_main' ;;
+       ifc* | ifort*)                  # Intel Fortran compiler
+         tmp_addflag=' -nofor_main' ;;
+       lf95*)                          # Lahey Fortran 8.1
+         whole_archive_flag_spec_F77=
+         tmp_sharedflag='--shared' ;;
+       xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below)
+         tmp_sharedflag='-qmkshrobj'
+         tmp_addflag= ;;
+       nvcc*)  # Cuda Compiler Driver 2.2
+         whole_archive_flag_spec_F77='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         compiler_needs_object_F77=yes
+         ;;
+       esac
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ C*)                       # Sun C 5.9
+         whole_archive_flag_spec_F77='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
+         compiler_needs_object_F77=yes
+         tmp_sharedflag='-G' ;;
+       *Sun\ F*)                       # Sun Fortran 8.3
+         tmp_sharedflag='-G' ;;
+       esac
+       archive_cmds_F77='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+        if test "x$supports_anon_versioning" = xyes; then
+          archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~
+           cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+           echo "local: *; };" >> $output_objdir/$libname.ver~
+           $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+        fi
+
+       case $cc_basename in
+       xlf* | bgf* | bgxlf* | mpixlf*)
+         # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
+         whole_archive_flag_spec_F77='--whole-archive$convenience --no-whole-archive'
+         hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+         archive_cmds_F77='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
+         if test "x$supports_anon_versioning" = xyes; then
+           archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~
+             cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+             echo "local: *; };" >> $output_objdir/$libname.ver~
+             $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
+         fi
+         ;;
+       esac
+      else
+        ld_shlibs_F77=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       archive_cmds_F77='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+       wlarc=
+      else
+       archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
+       ld_shlibs_F77=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+       ld_shlibs_F77=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*)
+       ld_shlibs_F77=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+       ;;
+       *)
+         # For security reasons, it is highly recommended that you always
+         # use absolute paths for naming shared libraries, and exclude the
+         # DT_RUNPATH tag from executables and libraries.  But doing so
+         # requires that you compile everything twice, which is a pain.
+         if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+           hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+           archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+           archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+         else
+           ld_shlibs_F77=no
+         fi
+       ;;
+      esac
+      ;;
+
+    sunos4*)
+      archive_cmds_F77='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      hardcode_direct_F77=yes
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+       ld_shlibs_F77=no
+      fi
+      ;;
+    esac
+
+    if test "$ld_shlibs_F77" = no; then
+      runpath_var=
+      hardcode_libdir_flag_spec_F77=
+      export_dynamic_flag_spec_F77=
+      whole_archive_flag_spec_F77=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      allow_undefined_flag_F77=unsupported
+      always_export_symbols_F77=yes
+      archive_expsym_cmds_F77='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      hardcode_minus_L_F77=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+       # Neither direct hardcoding nor static linking is supported with a
+       # broken collect2.
+       hardcode_direct_F77=unsupported
+      fi
+      ;;
+
+    aix[4-9]*)
+      if test "$host_cpu" = ia64; then
+       # On IA64, the linker does run time linking by default, so we don't
+       # have to do anything special.
+       aix_use_runtimelinking=no
+       exp_sym_flag='-Bexport'
+       no_entry_flag=""
+      else
+       # If we're using GNU nm, then we don't want the "-C" option.
+       # -C means demangle to AIX nm, but means don't demangle with GNU nm
+       # Also, AIX nm treats weak defined symbols like other global
+       # defined symbols, whereas GNU nm marks them as "W".
+       if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+         export_symbols_cmds_F77='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       else
+         export_symbols_cmds_F77='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       fi
+       aix_use_runtimelinking=no
+
+       # Test if we are trying to use run time linking or normal
+       # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+       # need to do runtime linking.
+       case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+         for ld_flag in $LDFLAGS; do
+         if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+           aix_use_runtimelinking=yes
+           break
+         fi
+         done
+         ;;
+       esac
+
+       exp_sym_flag='-bexport'
+       no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      archive_cmds_F77=''
+      hardcode_direct_F77=yes
+      hardcode_direct_absolute_F77=yes
+      hardcode_libdir_separator_F77=':'
+      link_all_deplibs_F77=yes
+      file_list_spec_F77='${wl}-f,'
+
+      if test "$GCC" = yes; then
+       case $host_os in aix4.[012]|aix4.[012].*)
+       # We only want to do this on AIX 4.2 and lower, the check
+       # below for broken collect2 doesn't work under 4.3+
+         collect2name=`${CC} -print-prog-name=collect2`
+         if test -f "$collect2name" &&
+          strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+         then
+         # We have reworked collect2
+         :
+         else
+         # We have old collect2
+         hardcode_direct_F77=unsupported
+         # It fails to find uninstalled libraries when the uninstalled
+         # path is not listed in the libpath.  Setting hardcode_minus_L
+         # to unsupported forces relinking
+         hardcode_minus_L_F77=yes
+         hardcode_libdir_flag_spec_F77='-L$libdir'
+         hardcode_libdir_separator_F77=
+         fi
+         ;;
+       esac
+       shared_flag='-shared'
+       if test "$aix_use_runtimelinking" = yes; then
+         shared_flag="$shared_flag "'${wl}-G'
+       fi
+      else
+       # not using gcc
+       if test "$host_cpu" = ia64; then
+       # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+       # chokes on -Wl,-G. The following line is correct:
+         shared_flag='-G'
+       else
+         if test "$aix_use_runtimelinking" = yes; then
+           shared_flag='${wl}-G'
+         else
+           shared_flag='${wl}-bM:SRE'
+         fi
+       fi
+      fi
+
+      export_dynamic_flag_spec_F77='${wl}-bexpall'
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      always_export_symbols_F77=yes
+      if test "$aix_use_runtimelinking" = yes; then
+       # Warning - without using the other runtime loading flags (-brtl),
+       # -berok will link without error, but may produce a broken library.
+       allow_undefined_flag_F77='-berok'
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test "${lt_cv_aix_libpath__F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.$ac_ext <<_ACEOF
+      program main
+
+      end
+_ACEOF
+if ac_fn_f77_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath__F77"; then
+    lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath__F77"; then
+    lt_cv_aix_libpath__F77="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath__F77
+fi
+
+        hardcode_libdir_flag_spec_F77='${wl}-blibpath:$libdir:'"$aix_libpath"
+        archive_expsym_cmds_F77='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+      else
+       if test "$host_cpu" = ia64; then
+         hardcode_libdir_flag_spec_F77='${wl}-R $libdir:/usr/lib:/lib'
+         allow_undefined_flag_F77="-z nodefs"
+         archive_expsym_cmds_F77="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+       else
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test "${lt_cv_aix_libpath__F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.$ac_ext <<_ACEOF
+      program main
+
+      end
+_ACEOF
+if ac_fn_f77_try_link "$LINENO"; then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath__F77"; then
+    lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath__F77"; then
+    lt_cv_aix_libpath__F77="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath__F77
+fi
+
+        hardcode_libdir_flag_spec_F77='${wl}-blibpath:$libdir:'"$aix_libpath"
+         # Warning - without using the other run time loading flags,
+         # -berok will link without error, but may produce a broken library.
+         no_undefined_flag_F77=' ${wl}-bernotok'
+         allow_undefined_flag_F77=' ${wl}-berok'
+         if test "$with_gnu_ld" = yes; then
+           # We only use this code for GNU lds that support --whole-archive.
+           whole_archive_flag_spec_F77='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+         else
+           # Exported symbols can be pulled into shared objects from archives
+           whole_archive_flag_spec_F77='$convenience'
+         fi
+         archive_cmds_need_lc_F77=yes
+         # This is similar to how AIX traditionally builds its shared libraries.
+         archive_expsym_cmds_F77="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+       fi
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds_F77=''
+        ;;
+      m68k)
+            archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec_F77='-L$libdir'
+            hardcode_minus_L_F77=yes
+        ;;
+      esac
+      ;;
+
+    bsdi[45]*)
+      export_dynamic_flag_spec_F77=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      case $cc_basename in
+      cl*)
+       # Native MSVC
+       hardcode_libdir_flag_spec_F77=' '
+       allow_undefined_flag_F77=unsupported
+       always_export_symbols_F77=yes
+       file_list_spec_F77='@'
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       archive_cmds_F77='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
+       archive_expsym_cmds_F77='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+           sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+         else
+           sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
+         fi~
+         $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+         linknames='
+       # The linker will not automatically build a static lib if we build a DLL.
+       # _LT_TAGVAR(old_archive_from_new_cmds, F77)='true'
+       enable_shared_with_static_runtimes_F77=yes
+       exclude_expsyms_F77='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+       export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
+       # Don't use ranlib
+       old_postinstall_cmds_F77='chmod 644 $oldlib'
+       postlink_cmds_F77='lt_outputfile="@OUTPUT@"~
+         lt_tool_outputfile="@TOOL_OUTPUT@"~
+         case $lt_outputfile in
+           *.exe|*.EXE) ;;
+           *)
+             lt_outputfile="$lt_outputfile.exe"
+             lt_tool_outputfile="$lt_tool_outputfile.exe"
+             ;;
+         esac~
+         if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
+           $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
+           $RM "$lt_outputfile.manifest";
+         fi'
+       ;;
+      *)
+       # Assume MSVC wrapper
+       hardcode_libdir_flag_spec_F77=' '
+       allow_undefined_flag_F77=unsupported
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       archive_cmds_F77='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
+       # The linker will automatically build a .lib file if we build a DLL.
+       old_archive_from_new_cmds_F77='true'
+       # FIXME: Should let the user specify the lib program.
+       old_archive_cmds_F77='lib -OUT:$oldlib$oldobjs$old_deplibs'
+       enable_shared_with_static_runtimes_F77=yes
+       ;;
+      esac
+      ;;
+
+    darwin* | rhapsody*)
+
+
+  archive_cmds_need_lc_F77=no
+  hardcode_direct_F77=no
+  hardcode_automatic_F77=yes
+  hardcode_shlibpath_var_F77=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    whole_archive_flag_spec_F77='`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+    compiler_needs_object_F77=yes
+  else
+    whole_archive_flag_spec_F77=''
+  fi
+  link_all_deplibs_F77=yes
+  allow_undefined_flag_F77="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    archive_cmds_F77="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
+    module_cmds_F77="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
+    archive_expsym_cmds_F77="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    module_expsym_cmds_F77="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+
+  else
+  ld_shlibs_F77=no
+  fi
+
+      ;;
+
+    dgux*)
+      archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec_F77='-L$libdir'
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      hardcode_libdir_flag_spec_F77='-R$libdir'
+      hardcode_direct_F77=yes
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct_F77=yes
+      hardcode_minus_L_F77=yes
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | dragonfly*)
+      archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+      hardcode_libdir_flag_spec_F77='-R$libdir'
+      hardcode_direct_F77=yes
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+       archive_cmds_F77='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+       archive_cmds_F77='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      hardcode_libdir_flag_spec_F77='${wl}+b ${wl}$libdir'
+      hardcode_libdir_separator_F77=:
+      hardcode_direct_F77=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      hardcode_minus_L_F77=yes
+      export_dynamic_flag_spec_F77='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       archive_cmds_F77='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds_F77='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+       hardcode_libdir_flag_spec_F77='${wl}+b ${wl}$libdir'
+       hardcode_libdir_separator_F77=:
+       hardcode_direct_F77=yes
+       hardcode_direct_absolute_F77=yes
+       export_dynamic_flag_spec_F77='${wl}-E'
+       # hardcode_minus_L: Not really in the search PATH,
+       # but as the default location of the library.
+       hardcode_minus_L_F77=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       case $host_cpu in
+       hppa*64*)
+         archive_cmds_F77='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       ia64*)
+         archive_cmds_F77='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+         archive_cmds_F77='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       esac
+      else
+       case $host_cpu in
+       hppa*64*)
+         archive_cmds_F77='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       ia64*)
+         archive_cmds_F77='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+       archive_cmds_F77='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       esac
+      fi
+      if test "$with_gnu_ld" = no; then
+       hardcode_libdir_flag_spec_F77='${wl}+b ${wl}$libdir'
+       hardcode_libdir_separator_F77=:
+
+       case $host_cpu in
+       hppa*64*|ia64*)
+         hardcode_direct_F77=no
+         hardcode_shlibpath_var_F77=no
+         ;;
+       *)
+         hardcode_direct_F77=yes
+         hardcode_direct_absolute_F77=yes
+         export_dynamic_flag_spec_F77='${wl}-E'
+
+         # hardcode_minus_L: Not really in the search PATH,
+         # but as the default location of the library.
+         hardcode_minus_L_F77=yes
+         ;;
+       esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+       archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       # Try to use the -exported_symbol ld option, if it does not
+       # work, assume that -exports_file does not work either and
+       # implicitly export all symbols.
+       # This should be the same for all languages, so no per-tag cache variable.
+       { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
+$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
+if test "${lt_cv_irix_exported_symbol+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  save_LDFLAGS="$LDFLAGS"
+          LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
+          cat > conftest.$ac_ext <<_ACEOF
+
+      subroutine foo
+      end
+_ACEOF
+if ac_fn_f77_try_link "$LINENO"; then :
+  lt_cv_irix_exported_symbol=yes
+else
+  lt_cv_irix_exported_symbol=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+           LDFLAGS="$save_LDFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5
+$as_echo "$lt_cv_irix_exported_symbol" >&6; }
+       if test "$lt_cv_irix_exported_symbol" = yes; then
+          archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
+       fi
+      else
+       archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+       archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
+      fi
+      archive_cmds_need_lc_F77='no'
+      hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator_F77=:
+      inherit_rpath_F77=yes
+      link_all_deplibs_F77=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+       archive_cmds_F77='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      hardcode_libdir_flag_spec_F77='-R$libdir'
+      hardcode_direct_F77=yes
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    newsos6)
+      archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct_F77=yes
+      hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator_F77=:
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    *nto* | *qnx*)
+      ;;
+
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+       hardcode_direct_F77=yes
+       hardcode_shlibpath_var_F77=no
+       hardcode_direct_absolute_F77=yes
+       if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+         archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+         hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
+         export_dynamic_flag_spec_F77='${wl}-E'
+       else
+         case $host_os in
+          openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+            archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+            hardcode_libdir_flag_spec_F77='-R$libdir'
+            ;;
+          *)
+            archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+            hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
+            ;;
+         esac
+       fi
+      else
+       ld_shlibs_F77=no
+      fi
+      ;;
+
+    os2*)
+      hardcode_libdir_flag_spec_F77='-L$libdir'
+      hardcode_minus_L_F77=yes
+      allow_undefined_flag_F77=unsupported
+      archive_cmds_F77='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      old_archive_from_new_cmds_F77='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+       allow_undefined_flag_F77=' ${wl}-expect_unresolved ${wl}\*'
+       archive_cmds_F77='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+       allow_undefined_flag_F77=' -expect_unresolved \*'
+       archive_cmds_F77='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      archive_cmds_need_lc_F77='no'
+      hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator_F77=:
+      ;;
+
+    osf4* | osf5*)     # as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+       allow_undefined_flag_F77=' ${wl}-expect_unresolved ${wl}\*'
+       archive_cmds_F77='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       hardcode_libdir_flag_spec_F77='${wl}-rpath ${wl}$libdir'
+      else
+       allow_undefined_flag_F77=' -expect_unresolved \*'
+       archive_cmds_F77='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
+       archive_expsym_cmds_F77='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
+       $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
+
+       # Both c and cxx compiler support -rpath directly
+       hardcode_libdir_flag_spec_F77='-rpath $libdir'
+      fi
+      archive_cmds_need_lc_F77='no'
+      hardcode_libdir_separator_F77=:
+      ;;
+
+    solaris*)
+      no_undefined_flag_F77=' -z defs'
+      if test "$GCC" = yes; then
+       wlarc='${wl}'
+       archive_cmds_F77='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+      else
+       case `$CC -V 2>&1` in
+       *"Compilers 5.0"*)
+         wlarc=''
+         archive_cmds_F77='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
+         ;;
+       *)
+         wlarc='${wl}'
+         archive_cmds_F77='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+         ;;
+       esac
+      fi
+      hardcode_libdir_flag_spec_F77='-R$libdir'
+      hardcode_shlibpath_var_F77=no
+      case $host_os in
+      solaris2.[0-5] | solaris2.[0-5].*) ;;
+      *)
+       # The compiler driver will combine and reorder linker options,
+       # but understands `-z linker_flag'.  GCC discards it without `$wl',
+       # but is careful enough not to reorder.
+       # Supported since Solaris 2.6 (maybe 2.5.1?)
+       if test "$GCC" = yes; then
+         whole_archive_flag_spec_F77='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+       else
+         whole_archive_flag_spec_F77='-z allextract$convenience -z defaultextract'
+       fi
+       ;;
+      esac
+      link_all_deplibs_F77=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+       # Use $CC to link under sequent, because it throws in some extra .o
+       # files that make .init and .fini sections work.
+       archive_cmds_F77='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds_F77='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      hardcode_libdir_flag_spec_F77='-L$libdir'
+      hardcode_direct_F77=yes
+      hardcode_minus_L_F77=yes
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+       sni)
+         archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         hardcode_direct_F77=yes # is this really true???
+       ;;
+       siemens)
+         ## LD is ld it makes a PLAMLIB
+         ## CC just makes a GrossModule.
+         archive_cmds_F77='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+         reload_cmds_F77='$CC -r -o $output$reload_objs'
+         hardcode_direct_F77=no
+        ;;
+       motorola)
+         archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+         hardcode_direct_F77=no #Motorola manual says yes, but my tests say they lie
+       ;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    sysv4.3*)
+      archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_shlibpath_var_F77=no
+      export_dynamic_flag_spec_F77='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+       hardcode_shlibpath_var_F77=no
+       runpath_var=LD_RUN_PATH
+       hardcode_runpath_var=yes
+       ld_shlibs_F77=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
+      no_undefined_flag_F77='${wl}-z,text'
+      archive_cmds_need_lc_F77=no
+      hardcode_shlibpath_var_F77=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       archive_cmds_F77='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds_F77='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds_F77='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds_F77='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      no_undefined_flag_F77='${wl}-z,text'
+      allow_undefined_flag_F77='${wl}-z,nodefs'
+      archive_cmds_need_lc_F77=no
+      hardcode_shlibpath_var_F77=no
+      hardcode_libdir_flag_spec_F77='${wl}-R,$libdir'
+      hardcode_libdir_separator_F77=':'
+      link_all_deplibs_F77=yes
+      export_dynamic_flag_spec_F77='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       archive_cmds_F77='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds_F77='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds_F77='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds_F77='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec_F77='-L$libdir'
+      hardcode_shlibpath_var_F77=no
+      ;;
+
+    *)
+      ld_shlibs_F77=no
+      ;;
+    esac
+
+    if test x$host_vendor = xsni; then
+      case $host in
+      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+       export_dynamic_flag_spec_F77='${wl}-Blargedynsym'
+       ;;
+      esac
+    fi
+  fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_F77" >&5
+$as_echo "$ld_shlibs_F77" >&6; }
+test "$ld_shlibs_F77" = no && can_build_shared=no
+
+with_gnu_ld_F77=$with_gnu_ld
+
+
+
+
+
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$archive_cmds_need_lc_F77" in
+x|xyes)
+  # Assume -lc should be added
+  archive_cmds_need_lc_F77=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $archive_cmds_F77 in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
+$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
+if test "${lt_cv_archive_cmds_need_lc_F77+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  $RM conftest*
+       echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+       if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } 2>conftest.err; then
+         soname=conftest
+         lib=conftest
+         libobjs=conftest.$ac_objext
+         deplibs=
+         wl=$lt_prog_compiler_wl_F77
+         pic_flag=$lt_prog_compiler_pic_F77
+         compiler_flags=-v
+         linker_flags=-v
+         verstring=
+         output_objdir=.
+         libname=conftest
+         lt_save_allow_undefined_flag=$allow_undefined_flag_F77
+         allow_undefined_flag_F77=
+         if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
+  (eval $archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+         then
+           lt_cv_archive_cmds_need_lc_F77=no
+         else
+           lt_cv_archive_cmds_need_lc_F77=yes
+         fi
+         allow_undefined_flag_F77=$lt_save_allow_undefined_flag
+       else
+         cat conftest.err 1>&5
+       fi
+       $RM conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_F77" >&5
+$as_echo "$lt_cv_archive_cmds_need_lc_F77" >&6; }
+      archive_cmds_need_lc_F77=$lt_cv_archive_cmds_need_lc_F77
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
+$as_echo_n "checking dynamic linker characteristics... " >&6; }
+
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[4-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+          echo ' yes '
+          echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+       :
+      else
+       can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[23].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[3-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+       if test "$lt_cv_prog_gnu_ld" = yes; then
+               version_type=linux # correct to gnu/linux during the next big refactor
+       else
+               version_type=irix
+       fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_F77\"; \
+        LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_F77\""
+    cat > conftest.$ac_ext <<_ACEOF
+      program main
+
+      end
+_ACEOF
+if ac_fn_f77_try_link "$LINENO"; then :
+  if  ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
+  lt_cv_shlibpath_overrides_runpath=yes
+fi
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+
+fi
+
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[      ]*hwcap[        ]/d;s/[:,      ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+       shlibpath_overrides_runpath=no
+       ;;
+      *)
+       shlibpath_overrides_runpath=yes
+       ;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+       ;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
+$as_echo "$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
+$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
+hardcode_action_F77=
+if test -n "$hardcode_libdir_flag_spec_F77" ||
+   test -n "$runpath_var_F77" ||
+   test "X$hardcode_automatic_F77" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$hardcode_direct_F77" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, F77)" != no &&
+     test "$hardcode_minus_L_F77" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action_F77=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action_F77=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action_F77=unsupported
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_F77" >&5
+$as_echo "$hardcode_action_F77" >&6; }
+
+if test "$hardcode_action_F77" = relink ||
+   test "$inherit_rpath_F77" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+
+
+
+
+
+
+  fi # test -n "$compiler"
+
+  GCC=$lt_save_GCC
+  CC="$lt_save_CC"
+  CFLAGS="$lt_save_CFLAGS"
+fi # test "$_lt_disable_F77" != yes
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+        ac_config_commands="$ac_config_commands libtool"
+
+
+
+
+# Only expand once:
+
+
+
+# Generate an error here if attempting to build both shared and static when
+# $libname.a is in $library_names_spec (as mentioned above), rather than
+# wait for ar or ld to fail.
+#
+if test "$enable_shared" = yes && test "$enable_static" = yes; then
+  case $library_names_spec in
+    *libname.a*)
+      as_fn_error "cannot create both shared and static libraries on this system, --disable one of the two" "$LINENO" 5
+      ;;
+  esac
+fi
+
+ if test "$enable_static" = yes; then
+  ENABLE_STATIC_TRUE=
+  ENABLE_STATIC_FALSE='#'
+else
+  ENABLE_STATIC_TRUE='#'
+  ENABLE_STATIC_FALSE=
+fi
+
+
+
+# Many of these library and header checks are for the benefit of
+# supplementary programs.  libgmp doesn't use anything too weird.
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+                  (('a' <= (c) && (c) <= 'i') \
+                    || ('j' <= (c) && (c) <= 'r') \
+                    || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+       || toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5
+$as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; }
+if test "${ac_cv_header_time+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+
+int
+main ()
+{
+if ((struct tm *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_time=yes
+else
+  ac_cv_header_time=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_time" >&5
+$as_echo "$ac_cv_header_time" >&6; }
+if test $ac_cv_header_time = yes; then
+
+$as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h
+
+fi
+
+
+# Reasons for testing:
+#   float.h - not in SunOS bundled cc
+#   invent.h - IRIX specific
+#   langinfo.h - X/Open standard only, not in djgpp for instance
+#   locale.h - old systems won't have this
+#   nl_types.h - X/Open standard only, not in djgpp for instance
+#       (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)
+#   sys/attributes.h - IRIX specific
+#   sys/iograph.h - IRIX specific
+#   sys/mman.h - not in Cray Unicos
+#   sys/param.h - not in mingw
+#   sys/processor.h - solaris specific, though also present in macos
+#   sys/pstat.h - HPUX specific
+#   sys/resource.h - not in mingw
+#   sys/sysctl.h - not in mingw
+#   sys/sysinfo.h - OSF specific
+#   sys/syssgi.h - IRIX specific
+#   sys/systemcfg.h - AIX specific
+#   sys/time.h - autoconf suggests testing, don't know anywhere without it
+#   sys/times.h - not in mingw
+#   machine/hal_sysinfo.h - OSF specific
+#
+# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf
+# default tests
+#
+for ac_header in fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/attributes.h sys/iograph.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/syssgi.h sys/systemcfg.h sys/time.h sys/times.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
+eval as_val=\$$as_ac_Header
+   if test "x$as_val" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)
+for ac_header in sys/resource.h
+do :
+  ac_fn_c_check_header_compile "$LINENO" "sys/resource.h" "ac_cv_header_sys_resource_h" "#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+"
+if test "x$ac_cv_header_sys_resource_h" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_SYS_RESOURCE_H 1
+_ACEOF
+
+fi
+
+done
+
+
+# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants
+for ac_header in sys/sysctl.h
+do :
+  ac_fn_c_check_header_compile "$LINENO" "sys/sysctl.h" "ac_cv_header_sys_sysctl_h" "#if HAVE_SYS_PARAM_H
+# include <sys/param.h>
+#endif
+"
+if test "x$ac_cv_header_sys_sysctl_h" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_SYS_SYSCTL_H 1
+_ACEOF
+
+fi
+
+done
+
+
+# On OSF 4.0, <machine/hal_sysinfo.h> must have <sys/sysinfo.h> for ulong_t
+for ac_header in machine/hal_sysinfo.h
+do :
+  ac_fn_c_check_header_compile "$LINENO" "machine/hal_sysinfo.h" "ac_cv_header_machine_hal_sysinfo_h" "#if HAVE_SYS_SYSINFO_H
+# include <sys/sysinfo.h>
+#endif
+"
+if test "x$ac_cv_header_machine_hal_sysinfo_h" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_MACHINE_HAL_SYSINFO_H 1
+_ACEOF
+
+fi
+
+done
+
+
+# Reasons for testing:
+#   optarg - not declared in mingw
+#   fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4
+#   sys_errlist, sys_nerr - not declared in SunOS 4
+#
+# optarg should be in unistd.h and the rest in stdio.h, both of which are
+# in the autoconf default includes.
+#
+# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according
+# to the man page (but aren't), in glibc they're in stdio.h.
+#
+ac_fn_c_check_decl "$LINENO" "fgetc" "ac_cv_have_decl_fgetc" "$ac_includes_default"
+if test "x$ac_cv_have_decl_fgetc" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FGETC $ac_have_decl
+_ACEOF
+ac_fn_c_check_decl "$LINENO" "fscanf" "ac_cv_have_decl_fscanf" "$ac_includes_default"
+if test "x$ac_cv_have_decl_fscanf" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FSCANF $ac_have_decl
+_ACEOF
+ac_fn_c_check_decl "$LINENO" "optarg" "ac_cv_have_decl_optarg" "$ac_includes_default"
+if test "x$ac_cv_have_decl_optarg" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_OPTARG $ac_have_decl
+_ACEOF
+ac_fn_c_check_decl "$LINENO" "ungetc" "ac_cv_have_decl_ungetc" "$ac_includes_default"
+if test "x$ac_cv_have_decl_ungetc" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_UNGETC $ac_have_decl
+_ACEOF
+ac_fn_c_check_decl "$LINENO" "vfprintf" "ac_cv_have_decl_vfprintf" "$ac_includes_default"
+if test "x$ac_cv_have_decl_vfprintf" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_VFPRINTF $ac_have_decl
+_ACEOF
+
+ac_fn_c_check_decl "$LINENO" "sys_errlist" "ac_cv_have_decl_sys_errlist" "#include <stdio.h>
+#include <errno.h>
+"
+if test "x$ac_cv_have_decl_sys_errlist" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_SYS_ERRLIST $ac_have_decl
+_ACEOF
+ac_fn_c_check_decl "$LINENO" "sys_nerr" "ac_cv_have_decl_sys_nerr" "#include <stdio.h>
+#include <errno.h>
+"
+if test "x$ac_cv_have_decl_sys_nerr" = x""yes; then :
+  ac_have_decl=1
+else
+  ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_SYS_NERR $ac_have_decl
+_ACEOF
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking return type of signal handlers" >&5
+$as_echo_n "checking return type of signal handlers... " >&6; }
+if test "${ac_cv_type_signal+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <signal.h>
+
+int
+main ()
+{
+return *(signal (0, 0)) (0) == 1;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_type_signal=int
+else
+  ac_cv_type_signal=void
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_type_signal" >&5
+$as_echo "$ac_cv_type_signal" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define RETSIGTYPE $ac_cv_type_signal
+_ACEOF
+
+
+
+# Reasons for testing:
+#   intmax_t       - C99
+#   long double    - not in the HP bundled K&R cc
+#   long long      - only in reasonably recent compilers
+#   ptrdiff_t      - seems to be everywhere, maybe don't need to check this
+#   quad_t         - BSD specific
+#   uint_least32_t - C99
+#
+# the default includes are sufficient for all these types
+#
+ac_fn_c_check_type "$LINENO" "intmax_t" "ac_cv_type_intmax_t" "$ac_includes_default"
+if test "x$ac_cv_type_intmax_t" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_INTMAX_T 1
+_ACEOF
+
+
+fi
+ac_fn_c_check_type "$LINENO" "long double" "ac_cv_type_long_double" "$ac_includes_default"
+if test "x$ac_cv_type_long_double" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_LONG_DOUBLE 1
+_ACEOF
+
+
+fi
+ac_fn_c_check_type "$LINENO" "long long" "ac_cv_type_long_long" "$ac_includes_default"
+if test "x$ac_cv_type_long_long" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_LONG_LONG 1
+_ACEOF
+
+
+fi
+ac_fn_c_check_type "$LINENO" "ptrdiff_t" "ac_cv_type_ptrdiff_t" "$ac_includes_default"
+if test "x$ac_cv_type_ptrdiff_t" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_PTRDIFF_T 1
+_ACEOF
+
+
+fi
+ac_fn_c_check_type "$LINENO" "quad_t" "ac_cv_type_quad_t" "$ac_includes_default"
+if test "x$ac_cv_type_quad_t" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_QUAD_T 1
+_ACEOF
+
+
+fi
+ac_fn_c_check_type "$LINENO" "uint_least32_t" "ac_cv_type_uint_least32_t" "$ac_includes_default"
+if test "x$ac_cv_type_uint_least32_t" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_UINT_LEAST32_T 1
+_ACEOF
+
+
+fi
+ac_fn_c_check_type "$LINENO" "intptr_t" "ac_cv_type_intptr_t" "$ac_includes_default"
+if test "x$ac_cv_type_intptr_t" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_INTPTR_T 1
+_ACEOF
+
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for preprocessor stringizing operator" >&5
+$as_echo_n "checking for preprocessor stringizing operator... " >&6; }
+if test "${ac_cv_c_stringize+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#define x(y) #y
+
+char *s = x(teststring);
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "#teststring" >/dev/null 2>&1; then :
+  ac_cv_c_stringize=no
+else
+  ac_cv_c_stringize=yes
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stringize" >&5
+$as_echo "$ac_cv_c_stringize" >&6; }
+if test $ac_cv_c_stringize = yes; then
+
+$as_echo "#define HAVE_STRINGIZE 1" >>confdefs.h
+
+fi
+
+
+# FIXME: Really want #ifndef __cplusplus around the #define volatile
+# replacement autoconf gives, since volatile is always available in C++.
+# But we don't use it in C++ currently.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working volatile" >&5
+$as_echo_n "checking for working volatile... " >&6; }
+if test "${ac_cv_c_volatile+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+volatile int x;
+int * volatile y = (int *) 0;
+return !x && !y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_volatile=yes
+else
+  ac_cv_c_volatile=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_volatile" >&5
+$as_echo "$ac_cv_c_volatile" >&6; }
+if test $ac_cv_c_volatile = no; then
+
+$as_echo "#define volatile /**/" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5
+$as_echo_n "checking for C/C++ restrict keyword... " >&6; }
+if test "${ac_cv_c_restrict+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_c_restrict=no
+   # The order here caters to the fact that C++ does not require restrict.
+   for ac_kw in __restrict __restrict__ _Restrict restrict; do
+     cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+typedef int * int_ptr;
+       int foo (int_ptr $ac_kw ip) {
+       return ip[0];
+       }
+int
+main ()
+{
+int s[1];
+       int * $ac_kw t = s;
+       t[0] = 0;
+       return foo(t)
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_restrict=$ac_kw
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+     test "$ac_cv_c_restrict" != no && break
+   done
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_restrict" >&5
+$as_echo "$ac_cv_c_restrict" >&6; }
+
+ case $ac_cv_c_restrict in
+   restrict) ;;
+   no) $as_echo "#define restrict /**/" >>confdefs.h
+ ;;
+   *)  cat >>confdefs.h <<_ACEOF
+#define restrict $ac_cv_c_restrict
+_ACEOF
+ ;;
+ esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether <stdarg.h> exists and works" >&5
+$as_echo_n "checking whether <stdarg.h> exists and works... " >&6; }
+if test "${gmp_cv_c_stdarg+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+int foo (int x, ...)
+{
+  va_list  ap;
+  int      y;
+  va_start (ap, x);
+  y = va_arg (ap, int);
+  va_end (ap);
+  return y;
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  gmp_cv_c_stdarg=yes
+else
+  gmp_cv_c_stdarg=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_stdarg" >&5
+$as_echo "$gmp_cv_c_stdarg" >&6; }
+if test $gmp_cv_c_stdarg = yes; then
+
+$as_echo "#define HAVE_STDARG 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((const)) works" >&5
+$as_echo_n "checking whether gcc __attribute__ ((const)) works... " >&6; }
+if test "${gmp_cv_c_attribute_const+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int foo (int x) __attribute__ ((const));
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  gmp_cv_c_attribute_const=yes
+else
+  gmp_cv_c_attribute_const=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_attribute_const" >&5
+$as_echo "$gmp_cv_c_attribute_const" >&6; }
+if test $gmp_cv_c_attribute_const = yes; then
+
+$as_echo "#define HAVE_ATTRIBUTE_CONST 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((malloc)) works" >&5
+$as_echo_n "checking whether gcc __attribute__ ((malloc)) works... " >&6; }
+if test "${gmp_cv_c_attribute_malloc+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.c <<EOF
+void *foo (int x) __attribute__ ((malloc));
+EOF
+gmp_compile="$CC $CFLAGS $CPPFLAGS -c conftest.c >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  if grep "attribute directive ignored" conftest.out >/dev/null; then
+    gmp_cv_c_attribute_malloc=no
+  else
+    gmp_cv_c_attribute_malloc=yes
+  fi
+else
+  gmp_cv_c_attribute_malloc=no
+fi
+cat conftest.out >&5
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_attribute_malloc" >&5
+$as_echo "$gmp_cv_c_attribute_malloc" >&6; }
+if test $gmp_cv_c_attribute_malloc = yes; then
+
+$as_echo "#define HAVE_ATTRIBUTE_MALLOC 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((mode (XX))) works" >&5
+$as_echo_n "checking whether gcc __attribute__ ((mode (XX))) works... " >&6; }
+if test "${gmp_cv_c_attribute_mode+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+typedef int SItype __attribute__ ((mode (SI)));
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  gmp_cv_c_attribute_mode=yes
+else
+  gmp_cv_c_attribute_mode=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_attribute_mode" >&5
+$as_echo "$gmp_cv_c_attribute_mode" >&6; }
+if test $gmp_cv_c_attribute_mode = yes; then
+
+$as_echo "#define HAVE_ATTRIBUTE_MODE 1" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((noreturn)) works" >&5
+$as_echo_n "checking whether gcc __attribute__ ((noreturn)) works... " >&6; }
+if test "${gmp_cv_c_attribute_noreturn+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void foo (int x) __attribute__ ((noreturn));
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  gmp_cv_c_attribute_noreturn=yes
+else
+  gmp_cv_c_attribute_noreturn=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_attribute_noreturn" >&5
+$as_echo "$gmp_cv_c_attribute_noreturn" >&6; }
+if test $gmp_cv_c_attribute_noreturn = yes; then
+
+$as_echo "#define HAVE_ATTRIBUTE_NORETURN 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
+$as_echo_n "checking for inline... " >&6; }
+if test "${ac_cv_c_inline+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_c_inline=no
+for ac_kw in inline __inline__ __inline; do
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __cplusplus
+typedef int foo_t;
+static $ac_kw foo_t static_foo () {return 0; }
+$ac_kw foo_t foo () {return 0; }
+#endif
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_inline=$ac_kw
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  test "$ac_cv_c_inline" != no && break
+done
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5
+$as_echo "$ac_cv_c_inline" >&6; }
+
+case $ac_cv_c_inline in
+  inline | yes) ;;
+  *)
+    case $ac_cv_c_inline in
+      no) ac_val=;;
+      *) ac_val=$ac_cv_c_inline;;
+    esac
+    cat >>confdefs.h <<_ACEOF
+#ifndef __cplusplus
+#define inline $ac_val
+#endif
+_ACEOF
+    ;;
+esac
+
+
+case $ac_cv_c_inline in
+no) ;;
+*)
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#define __GMP_WITHIN_CONFIGURE_INLINE 1
+#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
+#define GMP_NAIL_BITS $GMP_NAIL_BITS
+#define GMP_LIMB_BITS 123
+$DEFN_LONG_LONG_LIMB
+#include "$srcdir/gmp-h.in"
+
+#ifndef __GMP_EXTERN_INLINE
+die die die
+#endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  case $ac_cv_c_inline in
+  yes) tmp_inline=inline ;;
+  *)   tmp_inline=$ac_cv_c_inline ;;
+  esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: gmp.h doesnt recognise compiler \"$tmp_inline\", inlines will be unavailable" >&5
+$as_echo "$as_me: WARNING: gmp.h doesnt recognise compiler \"$tmp_inline\", inlines will be unavailable" >&2;}
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ;;
+esac
+
+
+# from libtool
+LIBM=
+case $host in
+*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*)
+  # These system don't have libm, or don't need it
+  ;;
+*-ncr-sysv4.3*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mwvalidcheckl in -lmw" >&5
+$as_echo_n "checking for _mwvalidcheckl in -lmw... " >&6; }
+if test "${ac_cv_lib_mw__mwvalidcheckl+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lmw  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char _mwvalidcheckl ();
+int
+main ()
+{
+return _mwvalidcheckl ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_mw__mwvalidcheckl=yes
+else
+  ac_cv_lib_mw__mwvalidcheckl=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mw__mwvalidcheckl" >&5
+$as_echo "$ac_cv_lib_mw__mwvalidcheckl" >&6; }
+if test "x$ac_cv_lib_mw__mwvalidcheckl" = x""yes; then :
+  LIBM="-lmw"
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
+$as_echo_n "checking for cos in -lm... " >&6; }
+if test "${ac_cv_lib_m_cos+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lm  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char cos ();
+int
+main ()
+{
+return cos ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_m_cos=yes
+else
+  ac_cv_lib_m_cos=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5
+$as_echo "$ac_cv_lib_m_cos" >&6; }
+if test "x$ac_cv_lib_m_cos" = x""yes; then :
+  LIBM="$LIBM -lm"
+fi
+
+  ;;
+*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
+$as_echo_n "checking for cos in -lm... " >&6; }
+if test "${ac_cv_lib_m_cos+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lm  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char cos ();
+int
+main ()
+{
+return cos ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_m_cos=yes
+else
+  ac_cv_lib_m_cos=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5
+$as_echo "$ac_cv_lib_m_cos" >&6; }
+if test "x$ac_cv_lib_m_cos" = x""yes; then :
+  LIBM="-lm"
+fi
+
+  ;;
+esac
+
+
+
+
+# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works
+# for constant arguments.  Useless!
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5
+$as_echo_n "checking for working alloca.h... " >&6; }
+if test "${gmp_cv_header_alloca+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <alloca.h>
+int
+main ()
+{
+char *p = (char *) alloca (2 * sizeof (int));
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  gmp_cv_header_alloca=yes
+else
+  gmp_cv_header_alloca=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_header_alloca" >&5
+$as_echo "$gmp_cv_header_alloca" >&6; }
+if test $gmp_cv_header_alloca = yes; then
+
+$as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca (via gmp-impl.h)" >&5
+$as_echo_n "checking for alloca (via gmp-impl.h)... " >&6; }
+if test "${gmp_cv_func_alloca+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
+#define GMP_NAIL_BITS $GMP_NAIL_BITS
+#define GMP_LIMB_BITS 123
+$DEFN_LONG_LONG_LIMB
+#include "$srcdir/gmp-h.in"
+
+#include "$srcdir/gmp-impl.h"
+
+int
+main ()
+{
+char *p = (char *) alloca (1);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  gmp_cv_func_alloca=yes
+else
+  gmp_cv_func_alloca=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_func_alloca" >&5
+$as_echo "$gmp_cv_func_alloca" >&6; }
+if test $gmp_cv_func_alloca = yes; then
+
+$as_echo "#define HAVE_ALLOCA 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to allocate temporary memory" >&5
+$as_echo_n "checking how to allocate temporary memory... " >&6; }
+if test "${gmp_cv_option_alloca+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $enable_alloca in
+  yes)
+    gmp_cv_option_alloca=alloca
+    ;;
+  no)
+    gmp_cv_option_alloca=malloc-reentrant
+    ;;
+  reentrant | notreentrant)
+    case $gmp_cv_func_alloca in
+    yes)  gmp_cv_option_alloca=alloca ;;
+    *)    gmp_cv_option_alloca=malloc-$enable_alloca ;;
+    esac
+    ;;
+  *)
+    gmp_cv_option_alloca=$enable_alloca
+    ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_option_alloca" >&5
+$as_echo "$gmp_cv_option_alloca" >&6; }
+
+
+
+case $gmp_cv_option_alloca in
+  alloca)
+    if test $gmp_cv_func_alloca = no; then
+      as_fn_error "--enable-alloca=alloca specified, but alloca not available" "$LINENO" 5
+    fi
+    $as_echo "#define WANT_TMP_ALLOCA 1" >>confdefs.h
+
+    TAL_OBJECT=tal-reent$U.lo
+    ;;
+  malloc-reentrant)
+    $as_echo "#define WANT_TMP_REENTRANT 1" >>confdefs.h
+
+    TAL_OBJECT=tal-reent$U.lo
+    ;;
+  malloc-notreentrant)
+    $as_echo "#define WANT_TMP_NOTREENTRANT 1" >>confdefs.h
+
+    TAL_OBJECT=tal-notreent$U.lo
+    ;;
+  debug)
+    $as_echo "#define WANT_TMP_DEBUG 1" >>confdefs.h
+
+    TAL_OBJECT=tal-debug$U.lo
+    ;;
+  *)
+    # checks at the start of configure.in should protect us
+    as_fn_error "unrecognised --enable-alloca=$gmp_cv_option_alloca" "$LINENO" 5
+    ;;
+esac
+
+
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
+#define GMP_NAIL_BITS $GMP_NAIL_BITS
+#define GMP_LIMB_BITS 123
+$DEFN_LONG_LONG_LIMB
+#include "$srcdir/gmp-h.in"
+
+#if ! _GMP_H_HAVE_FILE
+die die die
+#endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: gmp.h doesnt recognise <stdio.h>, FILE prototypes will be unavailable" >&5
+$as_echo "$as_me: WARNING: gmp.h doesnt recognise <stdio.h>, FILE prototypes will be unavailable" >&2;}
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
+$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
+if test "${ac_cv_c_bigendian+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_c_bigendian=unknown
+    # See if we're dealing with a universal compiler.
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __APPLE_CC__
+              not a universal capable compiler
+            #endif
+            typedef int dummy;
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+       # Check for potential -arch flags.  It is not universal unless
+       # there are at least two -arch flags with different values.
+       ac_arch=
+       ac_prev=
+       for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
+        if test -n "$ac_prev"; then
+          case $ac_word in
+            i?86 | x86_64 | ppc | ppc64)
+              if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
+                ac_arch=$ac_word
+              else
+                ac_cv_c_bigendian=universal
+                break
+              fi
+              ;;
+          esac
+          ac_prev=
+        elif test "x$ac_word" = "x-arch"; then
+          ac_prev=arch
+        fi
+       done
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if sys/param.h defines the BYTE_ORDER macro.
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+            #include <sys/param.h>
+
+int
+main ()
+{
+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
+                    && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
+                    && LITTLE_ENDIAN)
+             bogus endian macros
+            #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+               #include <sys/param.h>
+
+int
+main ()
+{
+#if BYTE_ORDER != BIG_ENDIAN
+                not big endian
+               #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_bigendian=yes
+else
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main ()
+{
+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
+             bogus endian macros
+            #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  # It does; now see whether it defined to _BIG_ENDIAN or not.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main ()
+{
+#ifndef _BIG_ENDIAN
+                not big endian
+               #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_bigendian=yes
+else
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # Compile a test program.
+      if test "$cross_compiling" = yes; then :
+  # Try to guess by grepping values from an object file.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+short int ascii_mm[] =
+                 { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+               short int ascii_ii[] =
+                 { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+               int use_ascii (int i) {
+                 return ascii_mm[i] + ascii_ii[i];
+               }
+               short int ebcdic_ii[] =
+                 { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+               short int ebcdic_mm[] =
+                 { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+               int use_ebcdic (int i) {
+                 return ebcdic_mm[i] + ebcdic_ii[i];
+               }
+               extern int foo;
+
+int
+main ()
+{
+return use_ascii (foo) == use_ebcdic (foo);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
+             ac_cv_c_bigendian=yes
+           fi
+           if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+             if test "$ac_cv_c_bigendian" = unknown; then
+               ac_cv_c_bigendian=no
+             else
+               # finding both strings is unlikely to happen, but who knows?
+               ac_cv_c_bigendian=unknown
+             fi
+           fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+
+            /* Are we little or big endian?  From Harbison&Steele.  */
+            union
+            {
+              long int l;
+              char c[sizeof (long int)];
+            } u;
+            u.l = 1;
+            return u.c[sizeof (long int) - 1] == 1;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  ac_cv_c_bigendian=no
+else
+  ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+    fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
+$as_echo "$ac_cv_c_bigendian" >&6; }
+ case $ac_cv_c_bigendian in #(
+   yes)
+     $as_echo "#define HAVE_LIMB_BIG_ENDIAN 1" >>confdefs.h
+
+
+echo "define_not_for_expansion(\`HAVE_LIMB_BIG_ENDIAN')" >> $gmp_tmpconfigm4p
+;; #(
+   no)
+     $as_echo "#define HAVE_LIMB_LITTLE_ENDIAN 1" >>confdefs.h
+
+
+echo "define_not_for_expansion(\`HAVE_LIMB_LITTLE_ENDIAN')" >> $gmp_tmpconfigm4p
+
+   ;; #(
+   universal)
+
+$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
+
+     ;; #(
+   *)
+     : ;;
+ esac
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking format of \`double' floating point" >&5
+$as_echo_n "checking format of \`double' floating point... " >&6; }
+if test "${gmp_cv_c_double_format+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_c_double_format=unknown
+cat >conftest.c <<\EOF
+struct foo {
+  char    before[8];
+  double  x;
+  char    after[8];
+};
+extern struct foo foo;
+struct foo foo = {
+  { '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
+  -123456789.0,
+  { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' },
+};
+EOF
+gmp_compile="$CC $CFLAGS $CPPFLAGS -c conftest.c >&5 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+cat >conftest.awk <<\EOF
+
+BEGIN {
+  found = 0
+}
+
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 23; i++)
+        got[i] = got[i+1];
+      got[23] = $f;
+
+      # match the special begin and end sequences
+      if (got[0] != "001") continue
+      if (got[1] != "043") continue
+      if (got[2] != "105") continue
+      if (got[3] != "147") continue
+      if (got[4] != "211") continue
+      if (got[5] != "253") continue
+      if (got[6] != "315") continue
+      if (got[7] != "357") continue
+      if (got[16] != "376") continue
+      if (got[17] != "334") continue
+      if (got[18] != "272") continue
+      if (got[19] != "230") continue
+      if (got[20] != "166") continue
+      if (got[21] != "124") continue
+      if (got[22] != "062") continue
+      if (got[23] != "020") continue
+
+      saw = " (" got[8] " " got[9] " " got[10] " " got[11] " " got[12] " " got[13] " " got[14] " " got[15] ")"
+
+      if (got[8]  == "000" &&  \
+          got[9]  == "000" &&  \
+          got[10] == "000" &&  \
+          got[11] == "124" &&  \
+          got[12] == "064" &&  \
+          got[13] == "157" &&  \
+          got[14] == "235" &&  \
+          got[15] == "301")
+        {
+          print "IEEE little endian"
+          found = 1
+          exit
+        }
+
+      # Little endian with the two 4-byte halves swapped, as used by ARM
+      # when the chip is in little endian mode.
+      #
+      if (got[8]  == "064" &&  \
+          got[9]  == "157" &&  \
+          got[10] == "235" &&  \
+          got[11] == "301" &&  \
+          got[12] == "000" &&  \
+          got[13] == "000" &&  \
+          got[14] == "000" &&  \
+          got[15] == "124")
+        {
+          print "IEEE little endian, swapped halves"
+          found = 1
+          exit
+        }
+
+      # gcc 2.95.4 on one GNU/Linux ARM system was seen generating 000 in
+      # the last byte, whereas 124 is correct.  Not sure where the bug
+      # actually lies, but a running program didn't seem to get a full
+      # mantissa worth of working bits.
+      #
+      # We match this case explicitly so we can give a nice result message,
+      # but we deliberately exclude it from the normal IEEE double setups
+      # since it's too broken.
+      #
+      if (got[8]  == "064" &&  \
+          got[9]  == "157" &&  \
+          got[10] == "235" &&  \
+          got[11] == "301" &&  \
+          got[12] == "000" &&  \
+          got[13] == "000" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "bad ARM software floats"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "301" &&  \
+          got[9]  == "235" &&  \
+          got[10] == "157" &&  \
+          got[11] == "064" &&  \
+          got[12] == "124" &&  \
+          got[13] == "000" &&  \
+          got[14] == "000" &&  \
+         got[15] == "000")
+        {
+          print "IEEE big endian"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "353" &&  \
+          got[9]  == "315" &&  \
+          got[10] == "242" &&  \
+          got[11] == "171" &&  \
+          got[12] == "000" &&  \
+          got[13] == "240" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "VAX D"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "275" &&  \
+          got[9]  == "301" &&  \
+          got[10] == "064" &&  \
+          got[11] == "157" &&  \
+          got[12] == "000" &&  \
+          got[13] == "124" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "VAX G"
+          found = 1
+          exit
+        }
+
+      if (got[8]  == "300" &&  \
+          got[9]  == "033" &&  \
+          got[10] == "353" &&  \
+          got[11] == "171" &&  \
+          got[12] == "242" &&  \
+          got[13] == "240" &&  \
+          got[14] == "000" &&  \
+          got[15] == "000")
+        {
+          print "Cray CFP"
+          found = 1
+          exit
+        }
+    }
+}
+
+END {
+  if (! found)
+    print "unknown", saw
+}
+
+EOF
+  gmp_cv_c_double_format=`od -b conftest.$OBJEXT | $AWK -f conftest.awk`
+  case $gmp_cv_c_double_format in
+  unknown*)
+    echo "cannot match anything, conftest.$OBJEXT contains" >&5
+    od -b conftest.$OBJEXT >&5
+    ;;
+  esac
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: oops, cannot compile test program" >&5
+$as_echo "$as_me: WARNING: oops, cannot compile test program" >&2;}
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_c_double_format" >&5
+$as_echo "$gmp_cv_c_double_format" >&6; }
+
+
+
+case $gmp_cv_c_double_format in
+  "IEEE big endian")
+    $as_echo "#define HAVE_DOUBLE_IEEE_BIG_ENDIAN 1" >>confdefs.h
+
+
+echo "define_not_for_expansion(\`HAVE_DOUBLE_IEEE_BIG_ENDIAN')" >> $gmp_tmpconfigm4p
+
+    ;;
+  "IEEE little endian")
+    $as_echo "#define HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 1" >>confdefs.h
+
+
+echo "define_not_for_expansion(\`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN')" >> $gmp_tmpconfigm4p
+
+    ;;
+  "IEEE little endian, swapped halves")
+    $as_echo "#define HAVE_DOUBLE_IEEE_LITTLE_SWAPPED 1" >>confdefs.h
+ ;;
+  "VAX D")
+    $as_echo "#define HAVE_DOUBLE_VAX_D 1" >>confdefs.h
+ ;;
+  "VAX G")
+    $as_echo "#define HAVE_DOUBLE_VAX_G 1" >>confdefs.h
+ ;;
+  "Cray CFP")
+    $as_echo "#define HAVE_DOUBLE_CRAY_CFP 1" >>confdefs.h
+ ;;
+  "bad ARM software floats")
+    ;;
+  unknown*)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Could not determine float format." >&5
+$as_echo "$as_me: WARNING: Could not determine float format." >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Conversions to and from \"double\" may be slow." >&5
+$as_echo "$as_me: WARNING: Conversions to and from \"double\" may be slow." >&2;}
+    ;;
+  *)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: oops, unrecognised float format: $gmp_cv_c_double_format" >&5
+$as_echo "$as_me: WARNING: oops, unrecognised float format: $gmp_cv_c_double_format" >&2;}
+    ;;
+esac
+
+
+
+# Reasons for testing:
+#   alarm - not in mingw
+#   attr_get - IRIX specific
+#   clock_gettime - not in glibc 2.2.4, only very recent systems
+#   cputime - not in glibc
+#   getsysinfo - OSF specific
+#   getrusage - not in mingw
+#   gettimeofday - not in mingw
+#   mmap - not in mingw, djgpp
+#   nl_langinfo - X/Open standard only, not in djgpp for instance
+#   obstack_vprintf - glibc specific
+#   processor_info - solaris specific
+#   pstat_getprocessor - HPUX specific (10.x and up)
+#   raise - an ANSI-ism, though probably almost universal by now
+#   read_real_time - AIX specific
+#   sigaction - not in mingw
+#   sigaltstack - not in mingw, or old AIX (reputedly)
+#   sigstack - not in mingw
+#   strerror - not in SunOS
+#   strnlen - glibc extension (some other systems too)
+#   syssgi - IRIX specific
+#   times - not in mingw
+#
+# clock_gettime is in librt on *-*-osf5.1.  We could look for it
+# there, but that's not worth bothering with unless it has a decent
+# resolution (in a quick test clock_getres said only 1 millisecond).
+#
+# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
+# replacement setups it gives.  It detects a faulty strnlen on AIX, but
+# missing out on that test is ok since our only use of strnlen is in
+# __gmp_replacement_vsnprintf which is not required on AIX since it has a
+# vsnprintf.
+#
+for ac_func in alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times
+do :
+  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+eval as_val=\$$as_ac_var
+   if test "x$as_val" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+ac_fn_c_check_func "$LINENO" "vsnprintf" "ac_cv_func_vsnprintf"
+if test "x$ac_cv_func_vsnprintf" = x""yes; then :
+  gmp_vsnprintf_exists=yes
+else
+  gmp_vsnprintf_exists=no
+fi
+
+if test "$gmp_vsnprintf_exists" = no; then
+  gmp_cv_func_vsnprintf=no
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether vsnprintf works" >&5
+$as_echo_n "checking whether vsnprintf works... " >&6; }
+if test "${gmp_cv_func_vsnprintf+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_func_vsnprintf=yes
+   for i in 'return check ("hello world");' 'int n; return check ("%nhello world", &n);'; do
+     if test "$cross_compiling" = yes; then :
+  gmp_cv_func_vsnprintf=probably; break
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <string.h>  /* for strcmp */
+#include <stdio.h>   /* for vsnprintf */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+int
+#if HAVE_STDARG
+check (const char *fmt, ...)
+#else
+check (va_alist)
+     va_dcl
+#endif
+{
+  static char  buf[128];
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  char *fmt;
+  va_start (ap);
+  fmt = va_arg (ap, char *);
+#endif
+
+  ret = vsnprintf (buf, 4, fmt, ap);
+
+  if (strcmp (buf, "hel") != 0)
+    return 1;
+
+  /* allowed return values */
+  if (ret != -1 && ret != 3 && ret != 11)
+    return 2;
+
+  return 0;
+}
+
+int
+main ()
+{
+$i
+}
+
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  :
+else
+  gmp_cv_func_vsnprintf=no; break
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+  done
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_func_vsnprintf" >&5
+$as_echo "$gmp_cv_func_vsnprintf" >&6; }
+  if test "$gmp_cv_func_vsnprintf" = probably; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot check for properly working vsnprintf when cross compiling, will assume it's ok" >&5
+$as_echo "$as_me: WARNING: cannot check for properly working vsnprintf when cross compiling, will assume it's ok" >&2;}
+  fi
+  if test "$gmp_cv_func_vsnprintf" != no; then
+
+$as_echo "#define HAVE_VSNPRINTF 1" >>confdefs.h
+
+  fi
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sscanf needs writable input" >&5
+$as_echo_n "checking whether sscanf needs writable input... " >&6; }
+if test "${gmp_cv_func_sscanf_writable_input+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $host in
+  *-*-hpux9 | *-*-hpux9.*)
+     gmp_cv_func_sscanf_writable_input=yes ;;
+  *) gmp_cv_func_sscanf_writable_input=no  ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_func_sscanf_writable_input" >&5
+$as_echo "$gmp_cv_func_sscanf_writable_input" >&6; }
+case $gmp_cv_func_sscanf_writable_input in
+  yes)
+$as_echo "#define SSCANF_WRITABLE_INPUT 1" >>confdefs.h
+ ;;
+  no)  ;;
+  *)   as_fn_error "unrecognised \$gmp_cv_func_sscanf_writable_input" "$LINENO" 5 ;;
+esac
+
+
+# Reasons for checking:
+#   pst_processor psp_iticksperclktick - not in hpux 9
+#
+ac_fn_c_check_member "$LINENO" "struct pst_processor" "psp_iticksperclktick" "ac_cv_member_struct_pst_processor_psp_iticksperclktick" "#include <sys/pstat.h>
+"
+if test "x$ac_cv_member_struct_pst_processor_psp_iticksperclktick" = x""yes; then :
+
+$as_echo "#define HAVE_PSP_ITICKSPERCLKTICK 1" >>confdefs.h
+
+fi
+
+
+# C++ tests, when required
+#
+if test $enable_cxx = yes; then
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+  # Reasons for testing:
+  #   <sstream> - not in g++ 2.95.2
+  #   std::locale - not in g++ 2.95.4
+  #
+
+for ac_header in sstream
+do :
+  ac_fn_cxx_check_header_mongrel "$LINENO" "sstream" "ac_cv_header_sstream" "$ac_includes_default"
+if test "x$ac_cv_header_sstream" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_SSTREAM 1
+_ACEOF
+
+fi
+
+done
+
+  ac_fn_cxx_check_type "$LINENO" "std::locale" "ac_cv_type_std__locale" "#include <locale>
+"
+if test "x$ac_cv_type_std__locale" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STD__LOCALE 1
+_ACEOF
+
+
+fi
+
+
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension.  Certain multi-function files with special names
+# can provide some functions too.  (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# Note: $gmp_mpn_functions must have mod_1 before pre_mod_1 so the former
+#       can optionally provide the latter as an extra entrypoint.  Likewise
+#       divrem_1 and pre_divrem_1.
+
+gmp_mpn_functions_optional="umul udiv                                  \
+  invert_limb sqr_diagonal                                             \
+  mul_2 mul_3 mul_4                                                    \
+  addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8       \
+  addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n                    \
+  addlsh2_n sublsh2_n rsblsh2_n                                                \
+  addlsh_n sublsh_n rsblsh_n                                           \
+  add_n_sub_n addaddmul_1msb0"
+
+gmp_mpn_functions="$extra_functions                                       \
+  add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1                  \
+  submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2     \
+  fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump            \
+  mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc                                 \
+  mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul          \
+  random random2 pow_1                                                    \
+  rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp        \
+  perfsqr perfpow                                                         \
+  gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step                    \
+  gcdext_lehmer gcdext_subdiv_step                                        \
+  div_q tdiv_qr jacbase get_d                                             \
+  matrix22_mul hgcd2 hgcd mullo_n mullo_basecase                          \
+  toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul                  \
+  toom33_mul toom43_mul toom53_mul toom63_mul                             \
+  toom44_mul                                                              \
+  toom6h_mul toom6_sqr toom8h_mul toom8_sqr                               \
+  toom_couple_handling                                                    \
+  toom2_sqr toom3_sqr toom4_sqr                                                   \
+  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2                                   \
+  toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp          \
+  toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts       \
+  toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts     \
+  invertappr invert binvert mulmod_bnm1 sqrmod_bnm1                       \
+  sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q                                \
+  dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q                                \
+  mu_div_qr mu_divappr_q mu_div_q                                         \
+  bdiv_q_1                                                                \
+  sbpi1_bdiv_q sbpi1_bdiv_qr                                              \
+  dcpi1_bdiv_q dcpi1_bdiv_qr                                              \
+  mu_bdiv_q mu_bdiv_qr                                                    \
+  bdiv_q bdiv_qr                                                          \
+  divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n           \
+  redc_1_sec trialdiv remove                                              \
+  and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n                    \
+  copyi copyd zero                                                        \
+  $gmp_mpn_functions_optional"
+
+
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp=
+
+# links from the sources, to be removed by "make distclean"
+gmp_srclinks=
+
+
+# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn
+# build directory.  If $srcdir is relative then we use a relative path too,
+# so the two trees can be moved together.
+case $srcdir in
+  [\\/]* | ?:[\\/]*)  # absolute, as per autoconf
+    mpn_relative_top_srcdir=$srcdir ;;
+  *)                    # relative
+    mpn_relative_top_srcdir=../$srcdir ;;
+esac
+
+
+
+
+
+
+# Fat binary setups.
+#
+# We proceed through each $fat_path directory, and look for $fat_function
+# routines there.  Those found are incorporated in the build by generating a
+# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with
+# suitable function renaming, and adding that to $mpn_objects (the same as a
+# normal mpn file).
+#
+# fat.h is generated with macros to let internal calls to each $fat_function
+# go directly through __gmpn_cpuvec, plus macros and declarations helping to
+# setup that structure, on a per-directory basis ready for
+# mpn/<cpu>/fat/fat.c.
+#
+# fat.h includes thresholds listed in $fat_thresholds, extracted from
+# gmp-mparam.h in each directory.  An overall maximum for each threshold is
+# established, for use in making fixed size arrays of temporary space.
+# (Eg. MUL_TOOM33_THRESHOLD_LIMIT used by mpn/generic/mul.c.)
+#
+# It'd be possible to do some of this manually, but when there's more than a
+# few functions and a few directories it becomes very tedious, and very
+# prone to having some routine accidentally omitted.  On that basis it seems
+# best to automate as much as possible, even if the code to do so is a bit
+# ugly.
+#
+
+if test -n "$fat_path"; then
+  # Usually the mpn build directory is created with mpn/Makefile
+  # instantiation, but we want to write to it sooner.
+  mkdir mpn 2>/dev/null
+
+  echo "/* fat.h - setups for fat binaries." >fat.h
+  echo "   Generated by configure - DO NOT EDIT.  */" >>fat.h
+
+
+$as_echo "#define WANT_FAT_BINARY 1" >>confdefs.h
+
+
+echo 'define(<WANT_FAT_BINARY>, <yes>)' >>$gmp_tmpconfigm4
+
+
+  # Don't want normal copies of fat functions
+  for tmp_fn in $fat_functions; do
+    remove_from_list_tmp=
+for remove_from_list_i in $gmp_mpn_functions; do
+  if test $remove_from_list_i = $tmp_fn; then :;
+  else
+     remove_from_list_tmp="$remove_from_list_tmp $remove_from_list_i"
+  fi
+done
+gmp_mpn_functions=$remove_from_list_tmp
+
+    remove_from_list_tmp=
+for remove_from_list_i in $gmp_mpn_functions_optional; do
+  if test $remove_from_list_i = $tmp_fn; then :;
+  else
+     remove_from_list_tmp="$remove_from_list_tmp $remove_from_list_i"
+  fi
+done
+gmp_mpn_functions_optional=$remove_from_list_tmp
+
+  done
+
+  for tmp_fn in $fat_functions; do
+    case $tmp_fn in
+  dive_1)      tmp_fbase=divexact_1 ;;
+  diveby3)     tmp_fbase=divexact_by3c ;;
+  pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
+  mode1o)      tmp_fbase=modexact_1c_odd ;;
+  pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  *)           tmp_fbase=$tmp_fn ;;
+esac
+
+    echo "
+#ifndef OPERATION_$tmp_fn
+#undef  mpn_$tmp_fbase
+#define mpn_$tmp_fbase  (*__gmpn_cpuvec.$tmp_fbase)
+#endif
+DECL_$tmp_fbase (__MPN(${tmp_fbase}_init));" >>fat.h
+    # encourage various macros to use fat functions
+    cat >>confdefs.h <<_ACEOF
+#define HAVE_NATIVE_mpn_$tmp_fbase 1
+_ACEOF
+
+  done
+
+  echo "" >>fat.h
+  echo "/* variable thresholds */" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    echo "#undef  $tmp_tn" >>fat.h
+    echo "#define $tmp_tn  CPUVEC_THRESHOLD (`echo $tmp_tn | tr [A-Z] [a-z]`)" >>fat.h
+  done
+
+  echo "
+/* Copy all fields into __gmpn_cpuvec.
+   memcpy is not used because it might operate byte-wise (depending on its
+   implementation), and we need the function pointer writes to be atomic.
+   "volatile" discourages the compiler from trying to optimize this.  */
+#define CPUVEC_INSTALL(vec) \\
+  do { \\
+    volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\" >>fat.h
+  for tmp_fn in $fat_functions; do
+    case $tmp_fn in
+  dive_1)      tmp_fbase=divexact_1 ;;
+  diveby3)     tmp_fbase=divexact_by3c ;;
+  pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
+  mode1o)      tmp_fbase=modexact_1c_odd ;;
+  pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  *)           tmp_fbase=$tmp_fn ;;
+esac
+
+    echo "    p->$tmp_fbase = vec.$tmp_fbase; \\" >>fat.h
+  done
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [A-Z] [a-z]`
+    echo "    p->$tmp_field_name = vec.$tmp_field_name; \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  echo "
+/* A helper to check all fields are filled. */
+#define ASSERT_CPUVEC(vec) \\
+  do { \\" >>fat.h
+  for tmp_fn in $fat_functions; do
+    case $tmp_fn in
+  dive_1)      tmp_fbase=divexact_1 ;;
+  diveby3)     tmp_fbase=divexact_by3c ;;
+  pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
+  mode1o)      tmp_fbase=modexact_1c_odd ;;
+  pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  *)           tmp_fbase=$tmp_fn ;;
+esac
+
+    echo "    ASSERT (vec.$tmp_fbase != NULL); \\" >>fat.h
+  done
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [A-Z] [a-z]`
+    echo "    ASSERT (vec.$tmp_field_name != 0); \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  echo "
+/* Call ITERATE(field) for each fat threshold field. */
+#define ITERATE_FAT_THRESHOLDS() \\
+  do { \\" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [A-Z] [a-z]`
+    echo "    ITERATE ($tmp_tn, $tmp_field_name); \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  for tmp_dir in $fat_path; do
+    CPUVEC_SETUP=
+    THRESH_ASM_SETUP=
+    echo "" >>fat.h
+    tmp_suffix=`echo $tmp_dir | sed -e '/\//s:^[^/]*/::' -e 's:[\\/]:_:g'`
+
+    # In order to keep names unique on a DOS 8.3 filesystem, use a prefix
+    # (rather than a suffix) for the generated file names, and abbreviate.
+    case $tmp_suffix in
+      pentium)       tmp_prefix=p   ;;
+      pentium_mmx)   tmp_prefix=pm  ;;
+      p6_mmx)        tmp_prefix=p2  ;;
+      p6_p3mmx)      tmp_prefix=p3  ;;
+      pentium4)      tmp_prefix=p4  ;;
+      pentium4_mmx)  tmp_prefix=p4m ;;
+      pentium4_sse2) tmp_prefix=p4s ;;
+      k6_mmx)        tmp_prefix=k6m ;;
+      k6_k62mmx)     tmp_prefix=k62 ;;
+      k7_mmx)        tmp_prefix=k7m ;;
+      *)             tmp_prefix=$tmp_suffix ;;
+    esac
+
+    # Extract desired thresholds from gmp-mparam.h file in this directory,
+    # if present.
+    tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h
+    if test -f $tmp_mparam; then
+      for tmp_tn in $fat_thresholds; do
+        tmp_thresh=`sed -n "s/^#define $tmp_tn[        ]*\\([0-9][0-9]*\\).*$/\\1/p" $tmp_mparam`
+        if test -n "$tmp_thresh"; then
+          THRESH_ASM_SETUP="${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)
+"
+          CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.`echo $tmp_tn | tr [A-Z] [a-z]` = $tmp_thresh; \\
+"
+          eval tmp_limit=\$${tmp_tn}_LIMIT
+          if test -z "$tmp_limit"; then
+            tmp_limit=0
+          fi
+          if test $tmp_thresh -gt $tmp_limit; then
+            eval ${tmp_tn}_LIMIT=$tmp_thresh
+          fi
+        fi
+      done
+    fi
+
+    for tmp_fn in $fat_functions; do
+      # functions that can be provided by multi-function files
+tmp_mulfunc=
+case $tmp_fn in
+  add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
+  addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+  popcount|hamdist)  tmp_mulfunc="popham"    ;;
+  and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+                     tmp_mulfunc="logops_n"  ;;
+  lshift|rshift)     tmp_mulfunc="lorrshift";;
+  addlsh1_n)
+                    tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
+  sublsh1_n)
+                    tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
+  rsblsh1_n)
+                    tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
+  addlsh2_n)
+                    tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
+  sublsh2_n)
+                    tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
+  rsblsh2_n)
+                    tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
+  addlsh_n)
+                    tmp_mulfunc="aorslsh_n aorrlsh_n";;
+  sublsh_n)
+                    tmp_mulfunc="aorslsh_n sorrlsh_n";;
+  rsblsh_n)
+                    tmp_mulfunc="aorrlsh_n sorrlsh_n";;
+  rsh1add_n|rsh1sub_n)
+                    tmp_mulfunc="rsh1aors_n";;
+esac
+
+
+      for tmp_base in $tmp_fn $tmp_mulfunc; do
+        for tmp_ext in asm S s c; do
+          tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+          if test -f $tmp_file; then
+
+            mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
+            mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
+
+            case $tmp_fn in
+  dive_1)      tmp_fbase=divexact_1 ;;
+  diveby3)     tmp_fbase=divexact_by3c ;;
+  pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
+  mode1o)      tmp_fbase=modexact_1c_odd ;;
+  pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  *)           tmp_fbase=$tmp_fn ;;
+esac
+
+
+            # carry-in variant, eg. divrem_1c or modexact_1c_odd
+            case $tmp_fbase in
+              *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;
+              *)    tmp_fbasec=${tmp_fbase}c ;;
+            esac
+
+            # Create a little file doing an include from srcdir.  The
+            # OPERATION and renamings aren't all needed all the time, but
+            # they don't hurt if unused.
+            #
+            # FIXME: Should generate these via config.status commands.
+            # Would need them all in one AC_CONFIG_COMMANDS though, since
+            # that macro doesn't accept a set of separate commands generated
+            # by shell code.
+            #
+            case $tmp_ext in
+              asm)
+                # hide the d-n-l from autoconf's error checking
+                tmp_d_n_l=d""nl
+                echo "$tmp_d_n_l  mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+$tmp_d_n_l  Generated by configure - DO NOT EDIT.
+
+define(OPERATION_$tmp_fn)
+define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
+define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
+define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
+
+$tmp_d_n_l  For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
+ifdef(\`__gmpn_modexact_1_odd',,
+\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')
+
+$THRESH_ASM_SETUP
+include($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
+" >mpn/${tmp_prefix}_$tmp_fn.asm
+                ;;
+              c)
+                echo "/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+   Generated by configure - DO NOT EDIT. */
+
+#define OPERATION_$tmp_fn 1
+#define __gmpn_$tmp_fbase           __gmpn_${tmp_fbase}_$tmp_suffix
+#define __gmpn_$tmp_fbasec          __gmpn_${tmp_fbasec}_${tmp_suffix}
+#define __gmpn_preinv_${tmp_fbase}  __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
+
+#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
+" >mpn/${tmp_prefix}_$tmp_fn.c
+                ;;
+            esac
+
+            # Prototype, and append to CPUVEC_SETUP for this directory.
+            echo "DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);" >>fat.h
+            CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\
+"
+            # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).
+            if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
+              echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
+              CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
+"
+            fi
+          fi
+        done
+      done
+    done
+
+    # Emit CPUVEC_SETUP for this directory
+    echo "" >>fat.h
+    echo "#define CPUVEC_SETUP_$tmp_suffix \\" >>fat.h
+    echo "  do { \\" >>fat.h
+    echo "$CPUVEC_SETUP  } while (0)" >>fat.h
+  done
+
+  # Emit threshold limits
+  echo "" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    eval tmp_limit=\$${tmp_tn}_LIMIT
+    echo "#define ${tmp_tn}_LIMIT  $tmp_limit" >>fat.h
+  done
+fi
+
+
+# Normal binary setups.
+#
+
+for tmp_ext in asm S s c; do
+  eval found_$tmp_ext=no
+done
+
+for tmp_fn in $gmp_mpn_functions; do
+  for tmp_ext in asm S s c; do
+    test "$no_create" = yes || rm -f mpn/$tmp_fn.$tmp_ext
+  done
+
+  # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise
+  # mpn_preinv_mod_1 by mod_1.asm.
+  case $tmp_fn in
+  pre_divrem_1)
+    if test "$HAVE_NATIVE_mpn_preinv_divrem_1" = yes; then continue; fi ;;
+  pre_mod_1)
+    if test "$HAVE_NATIVE_mpn_preinv_mod_1" = yes; then continue; fi ;;
+  esac
+
+  # functions that can be provided by multi-function files
+tmp_mulfunc=
+case $tmp_fn in
+  add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
+  addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+  popcount|hamdist)  tmp_mulfunc="popham"    ;;
+  and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+                     tmp_mulfunc="logops_n"  ;;
+  lshift|rshift)     tmp_mulfunc="lorrshift";;
+  addlsh1_n)
+                    tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
+  sublsh1_n)
+                    tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
+  rsblsh1_n)
+                    tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
+  addlsh2_n)
+                    tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
+  sublsh2_n)
+                    tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
+  rsblsh2_n)
+                    tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
+  addlsh_n)
+                    tmp_mulfunc="aorslsh_n aorrlsh_n";;
+  sublsh_n)
+                    tmp_mulfunc="aorslsh_n sorrlsh_n";;
+  rsblsh_n)
+                    tmp_mulfunc="aorrlsh_n sorrlsh_n";;
+  rsh1add_n|rsh1sub_n)
+                    tmp_mulfunc="rsh1aors_n";;
+esac
+
+
+  found=no
+  for tmp_dir in $path; do
+    for tmp_base in $tmp_fn $tmp_mulfunc; do
+      for tmp_ext in asm S s c; do
+        tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+        if test -f $tmp_file; then
+
+          # For a nails build, check if the file supports our nail bits.
+          # Generic code always supports all nails.
+          #
+          # FIXME: When a multi-function file is selected to provide one of
+          # the nails-neutral routines, like logops_n for and_n, the
+          # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for
+          # all functions in that file, even if they haven't all been
+          # nailified.  Not sure what to do about this, it's only really a
+          # problem for logops_n, and it's not too terrible to insist those
+          # get nailified always.
+          #
+          if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then
+            case $tmp_fn in
+              and_n | ior_n | xor_n | andn_n | \
+              copyi | copyd | \
+              popcount | hamdist | \
+              udiv | udiv_w_sdiv | umul | \
+              cntlz | invert_limb)
+                # these operations are either unaffected by nails or defined
+                # to operate on full limbs
+                ;;
+              *)
+                nails=`sed -n 's/^[    ]*NAILS_SUPPORT(\(.*\))/\1/p' $tmp_file `
+                for n in $nails; do
+                  case $n in
+                  *-*)
+                    n_start=`echo "$n" | sed -n 's/\(.*\)-.*/\1/p'`
+                    n_end=`echo "$n" | sed -n 's/.*-\(.*\)/\1/p'`
+                    ;;
+                  *)
+                    n_start=$n
+                    n_end=$n
+                    ;;
+                  esac
+                  if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then
+                    found=yes
+                    break
+                  fi
+                done
+                if test $found != yes; then
+                  continue
+                fi
+                ;;
+            esac
+          fi
+
+          found=yes
+          eval found_$tmp_ext=yes
+
+          if test $tmp_ext = c; then
+            tmp_u='$U'
+          else
+            tmp_u=
+          fi
+
+          mpn_objects="$mpn_objects $tmp_fn$tmp_u.lo"
+          mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo"
+          ac_config_links="$ac_config_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext"
+
+          gmp_srclinks="$gmp_srclinks mpn/$tmp_fn.$tmp_ext"
+
+          # Duplicate AC_DEFINEs are harmless, so it doesn't matter
+          # that multi-function files get grepped here repeatedly.
+          # The PROLOGUE pattern excludes the optional second parameter.
+          gmp_ep=`
+            sed -n 's/^[       ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+            sed -n 's/^[       ]*PROLOGUE(\([^,]*\).*)/\1/p' $tmp_file
+          `
+          for gmp_tmp in $gmp_ep; do
+            cat >>confdefs.h <<_ACEOF
+#define HAVE_NATIVE_$gmp_tmp 1
+_ACEOF
+
+            eval HAVE_NATIVE_$gmp_tmp=yes
+          done
+
+          case $tmp_fn in
+          sqr_basecase) sqr_basecase_source=$tmp_file ;;
+          esac
+
+          break
+        fi
+      done
+      if test $found = yes; then break ; fi
+    done
+    if test $found = yes; then break ; fi
+  done
+
+  if test $found = no; then
+    for tmp_optional in $gmp_mpn_functions_optional; do
+      if test $tmp_optional = $tmp_fn; then
+        found=yes
+      fi
+    done
+    if test $found = no; then
+      as_fn_error "no version of $tmp_fn found in path: $path" "$LINENO" 5
+    fi
+  fi
+done
+
+# All cycle counters are .asm files currently
+if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+  found_asm=yes
+fi
+
+
+
+
+
+# Don't demand an m4 unless it's actually needed.
+if test $found_asm = yes; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suitable m4" >&5
+$as_echo_n "checking for suitable m4... " >&6; }
+if test "${gmp_cv_prog_m4+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$M4"; then
+  gmp_cv_prog_m4="$M4"
+else
+  cat >conftest.m4 <<\EOF
+define(dollarhash,``$#'')ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $# not supported (SunOS /usr/bin/m4)
+'')ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')ifelse(t1`'t2,YY,`good
+')
+EOF
+  echo "trying m4" >&5
+  gmp_tmp_val=`(m4 conftest.m4) 2>&5`
+  echo "$gmp_tmp_val" >&5
+  if test "$gmp_tmp_val" = good; then
+    gmp_cv_prog_m4="m4"
+  else
+    IFS="${IFS=        }"; ac_save_ifs="$IFS"; IFS=":"
+    ac_dummy="$PATH:/usr/5bin"
+    for ac_dir in $ac_dummy; do
+      test -z "$ac_dir" && ac_dir=.
+      echo "trying $ac_dir/m4" >&5
+      gmp_tmp_val=`($ac_dir/m4 conftest.m4) 2>&5`
+      echo "$gmp_tmp_val" >&5
+      if test "$gmp_tmp_val" = good; then
+        gmp_cv_prog_m4="$ac_dir/m4"
+        break
+      fi
+    done
+    IFS="$ac_save_ifs"
+    if test -z "$gmp_cv_prog_m4"; then
+      as_fn_error "No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." "$LINENO" 5
+    fi
+  fi
+  rm -f conftest.m4
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_prog_m4" >&5
+$as_echo "$gmp_cv_prog_m4" >&6; }
+M4="$gmp_cv_prog_m4"
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if m4wrap produces spurious output" >&5
+$as_echo_n "checking if m4wrap produces spurious output... " >&6; }
+if test "${gmp_cv_m4_m4wrap_spurious+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  # hide the d-n-l from autoconf's error checking
+tmp_d_n_l=d""nl
+cat >conftest.m4 <<EOF
+changequote({,})define(x,)m4wrap({x})$tmp_d_n_l
+EOF
+echo test input is >&5
+cat conftest.m4 >&5
+tmp_chars=`$M4 conftest.m4 | wc -c`
+echo produces $tmp_chars chars output >&5
+rm -f conftest.m4
+if test $tmp_chars = 0; then
+  gmp_cv_m4_m4wrap_spurious=no
+else
+  gmp_cv_m4_m4wrap_spurious=yes
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_m4_m4wrap_spurious" >&5
+$as_echo "$gmp_cv_m4_m4wrap_spurious" >&6; }
+
+echo "define(<M4WRAP_SPURIOUS>,<$gmp_cv_m4_m4wrap_spurious>)" >> $gmp_tmpconfigm4
+
+
+# else
+# It's unclear why this m4-not-needed stuff was ever done.
+#  if test -z "$M4" ; then
+#    M4=m4-not-needed
+#  fi
+fi
+
+# Only do the GMP_ASM checks if there's a .S or .asm wanting them.
+if test $found_asm = no && test $found_S = no; then
+  gmp_asm_syntax_testing=no
+fi
+
+if test "$gmp_asm_syntax_testing" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to text section" >&5
+$as_echo_n "checking how to switch to text section... " >&6; }
+if test "${gmp_cv_asm_text+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  for i in ".text" ".code" ".csect .text[PR]"; do
+  echo "trying $i" >&5
+  cat >conftest.s <<EOF
+       $i
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_text=$i
+     rm -f conftest*
+     break
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+done
+if test -z "$gmp_cv_asm_text"; then
+  as_fn_error "Cannot determine text section directive" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_text" >&5
+$as_echo "$gmp_cv_asm_text" >&6; }
+echo "define(<TEXT>, <$gmp_cv_asm_text>)" >> $gmp_tmpconfigm4
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to data section" >&5
+$as_echo_n "checking how to switch to data section... " >&6; }
+if test "${gmp_cv_asm_data+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $host in
+  *-*-aix*) gmp_cv_asm_data=".csect .data[RW]" ;;
+  *)        gmp_cv_asm_data=".data" ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_data" >&5
+$as_echo "$gmp_cv_asm_data" >&6; }
+echo "define(<DATA>, <$gmp_cv_asm_data>)" >> $gmp_tmpconfigm4
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler label suffix" >&5
+$as_echo_n "checking for assembler label suffix... " >&6; }
+if test "${gmp_cv_asm_label_suffix+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_asm_label_suffix=unknown
+for i in "" ":"; do
+  echo "trying $i" >&5
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+somelabel$i
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_label_suffix=$i
+     rm -f conftest*
+     break
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  cat conftest.out >&5
+fi
+rm -f conftest*
+
+done
+if test "$gmp_cv_asm_label_suffix" = "unknown"; then
+  as_fn_error "Cannot determine label suffix" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_label_suffix" >&5
+$as_echo "$gmp_cv_asm_label_suffix" >&6; }
+echo "define(<LABEL_SUFFIX>, <$gmp_cv_asm_label_suffix>)" >> $gmp_tmpconfigm4
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler global directive" >&5
+$as_echo_n "checking for assembler global directive... " >&6; }
+if test "${gmp_cv_asm_globl+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $host in
+  hppa*-*-*)     gmp_cv_asm_globl=.export ;;
+  ia64*-*-* | itanium-*-* | itanium2-*-*)  gmp_cv_asm_globl=.global ;;
+  *)             gmp_cv_asm_globl=.globl  ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_globl" >&5
+$as_echo "$gmp_cv_asm_globl" >&6; }
+echo "define(<GLOBL>, <$gmp_cv_asm_globl>)" >> $gmp_tmpconfigm4
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler global directive attribute" >&5
+$as_echo_n "checking for assembler global directive attribute... " >&6; }
+if test "${gmp_cv_asm_globl_attr+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $gmp_cv_asm_globl in
+  .export) gmp_cv_asm_globl_attr=",entry" ;;
+  *)       gmp_cv_asm_globl_attr="" ;;
+esac
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_globl_attr" >&5
+$as_echo "$gmp_cv_asm_globl_attr" >&6; }
+echo "define(<GLOBL_ATTR>, <$gmp_cv_asm_globl_attr>)" >> $gmp_tmpconfigm4
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if globals are prefixed by underscore" >&5
+$as_echo_n "checking if globals are prefixed by underscore... " >&6; }
+if test "${gmp_cv_asm_underscore+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_asm_underscore="unknown"
+cat >conftest.c <<EOF
+int gurkmacka;
+EOF
+gmp_compile="$CC $CFLAGS $CPPFLAGS -c conftest.c >&5"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  $NM conftest.$OBJEXT >conftest.out
+  if grep _gurkmacka conftest.out >/dev/null; then
+    gmp_cv_asm_underscore=yes
+  elif grep gurkmacka conftest.out >/dev/null; then
+    gmp_cv_asm_underscore=no
+  else
+    echo "configure: $NM doesn't have gurkmacka:" >&5
+    cat conftest.out >&5
+  fi
+else
+  echo "configure: failed program was:" >&5
+  cat conftest.c >&5
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_underscore" >&5
+$as_echo "$gmp_cv_asm_underscore" >&6; }
+case $gmp_cv_asm_underscore in
+  yes)
+
+echo 'define(<GSYM_PREFIX>, <_>)' >>$gmp_tmpconfigm4
+ ;;
+  no)
+
+echo 'define(<GSYM_PREFIX>, <>)' >>$gmp_tmpconfigm4
+ ;;
+  *)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Cannot determine global symbol prefix." >&5
+$as_echo "$as_me: WARNING: | Cannot determine global symbol prefix." >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | $NM output doesn't contain a global data symbol." >&5
+$as_echo "$as_me: WARNING: | $NM output doesn't contain a global data symbol." >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | Will proceed with no underscore." >&5
+$as_echo "$as_me: WARNING: | Will proceed with no underscore." >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | If this is wrong then you'll get link errors referring" >&5
+$as_echo "$as_me: WARNING: | If this is wrong then you'll get link errors referring" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | to ___gmpn_add_n (note three underscores)." >&5
+$as_echo "$as_me: WARNING: | to ___gmpn_add_n (note three underscores)." >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: | In this case do a fresh build with an override," >&5
+$as_echo "$as_me: WARNING: | In this case do a fresh build with an override," >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: |     ./configure gmp_cv_asm_underscore=yes" >&5
+$as_echo "$as_me: WARNING: |     ./configure gmp_cv_asm_underscore=yes" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: +----------------------------------------------------------" >&5
+$as_echo "$as_me: WARNING: +----------------------------------------------------------" >&2;}
+
+echo 'define(<GSYM_PREFIX>, <>)' >>$gmp_tmpconfigm4
+
+    ;;
+esac
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to read-only data section" >&5
+$as_echo_n "checking how to switch to read-only data section... " >&6; }
+if test "${gmp_cv_asm_rodata+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+case $host in
+i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-* | x86_64-*-*)
+  gmp_cv_asm_rodata="$gmp_cv_asm_data" ;;
+*)
+  gmp_cv_asm_rodata="$gmp_cv_asm_text" ;;
+esac
+
+cat >conftest.c <<EOF
+extern const int foo[];                /* Suppresses C++'s suppression of foo */
+const int foo[] = {1,2,3};
+EOF
+echo "Test program:" >&5
+cat conftest.c >&5
+gmp_compile="$CC $CFLAGS $CPPFLAGS -S conftest.c >&5"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  echo "Compiler output:" >&5
+  cat conftest.s >&5
+  if test $gmp_cv_asm_underscore = yes; then
+    tmp_gsym_prefix=_
+  else
+    tmp_gsym_prefix=
+  fi
+  # must see our label
+  if grep "^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix" conftest.s >/dev/null 2>&5; then
+    # take the last directive before our label (hence skipping segments
+    # getting debugging info etc)
+    tmp_match=`sed -n "/^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix/q
+                        /^[.   ]*data/p
+                        /^[.   ]*rdata/p
+                        /^[.   ]*text/p
+                        /^[.   ]*section/p
+                        /^[.   ]*csect/p
+                        /^[.   ]*CSECT/p" conftest.s | sed -n '$p'`
+    echo "Match: $tmp_match" >&5
+    if test -n "$tmp_match"; then
+      gmp_cv_asm_rodata=$tmp_match
+    fi
+  else
+    echo "Couldn't find label: ^${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix" >&5
+  fi
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_rodata" >&5
+$as_echo "$gmp_cv_asm_rodata" >&6; }
+echo "define(<RODATA>, <$gmp_cv_asm_rodata>)" >> $gmp_tmpconfigm4
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .type directive" >&5
+$as_echo_n "checking for assembler .type directive... " >&6; }
+if test "${gmp_cv_asm_type+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_asm_type=
+for gmp_tmp_prefix in @ \# %; do
+  cat >conftest.s <<EOF
+       .type   sym,${gmp_tmp_prefix}function
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  if grep "\.type pseudo-op used outside of \.def/\.endef ignored" conftest.out >/dev/null; then : ;
+    else
+      gmp_cv_asm_type=".type   \$1,${gmp_tmp_prefix}\$2"
+      break
+    fi
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+done
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_type" >&5
+$as_echo "$gmp_cv_asm_type" >&6; }
+echo "define(<TYPE>, <$gmp_cv_asm_type>)" >> $gmp_tmpconfigm4
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .size directive" >&5
+$as_echo_n "checking for assembler .size directive... " >&6; }
+if test "${gmp_cv_asm_size+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_asm_size=
+cat >conftest.s <<EOF
+       .size   sym,1
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  if grep "\.size pseudo-op used outside of \.def/\.endef ignored" conftest.out >/dev/null; then : ;
+  else
+    gmp_cv_asm_size=".size     \$1,\$2"
+  fi
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_size" >&5
+$as_echo "$gmp_cv_asm_size" >&6; }
+echo "define(<SIZE>, <$gmp_cv_asm_size>)" >> $gmp_tmpconfigm4
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler local label prefix" >&5
+$as_echo_n "checking for assembler local label prefix... " >&6; }
+if test "${gmp_cv_asm_lsym_prefix+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_tmp_pre_appears=yes
+for gmp_tmp_pre in L .L $L $ L$; do
+  echo "Trying $gmp_tmp_pre" >&5
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+dummy${gmp_cv_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_asm_label_suffix}
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  if $NM conftest.$OBJEXT >conftest.nm 2>&5; then : ; else
+    cat conftest.nm >&5
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \"$NM\" failure" >&5
+$as_echo "$as_me: WARNING: \"$NM\" failure" >&2;}
+    break
+  fi
+  cat conftest.nm >&5
+  if grep gurkmacka conftest.nm >/dev/null; then : ; else
+    # no mention of the symbol, this is good
+    echo "$gmp_tmp_pre label doesn't appear in object file at all (good)" >&5
+    gmp_cv_asm_lsym_prefix="$gmp_tmp_pre"
+    gmp_tmp_pre_appears=no
+    break
+  fi
+  if grep ' [a-zN] .*gurkmacka' conftest.nm >/dev/null; then
+    # symbol mentioned as a local, use this if nothing better
+    echo "$gmp_tmp_pre label is local but still in object file" >&5
+    if test -z "$gmp_cv_asm_lsym_prefix"; then
+      gmp_cv_asm_lsym_prefix="$gmp_tmp_pre"
+    fi
+  else
+    echo "$gmp_tmp_pre label is something unknown" >&5
+  fi
+
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+done
+rm -f conftest*
+if test -z "$gmp_cv_asm_lsym_prefix"; then
+  gmp_cv_asm_lsym_prefix=L
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot determine local label, using default $gmp_cv_asm_lsym_prefix" >&5
+$as_echo "$as_me: WARNING: cannot determine local label, using default $gmp_cv_asm_lsym_prefix" >&2;}
+fi
+# for development purposes, note whether we got a purely temporary local label
+echo "Local label appears in object files: $gmp_tmp_pre_appears" >&5
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_lsym_prefix" >&5
+$as_echo "$gmp_cv_asm_lsym_prefix" >&6; }
+echo "define(<LSYM_PREFIX>, <${gmp_cv_asm_lsym_prefix}>)" >> $gmp_tmpconfigm4
+
+cat >>confdefs.h <<_ACEOF
+#define LSYM_PREFIX "$gmp_cv_asm_lsym_prefix"
+_ACEOF
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler byte directive" >&5
+$as_echo_n "checking for assembler byte directive... " >&6; }
+if test "${gmp_cv_asm_byte+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  for i in .byte data1; do
+  echo "trying $i" >&5
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_data
+       $i      0
+
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_byte=$i
+     rm -f conftest*
+     break
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  cat conftest.out >&5
+fi
+rm -f conftest*
+
+done
+if test -z "$gmp_cv_asm_byte"; then
+  as_fn_error "Cannot determine how to emit a data byte" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_byte" >&5
+$as_echo "$gmp_cv_asm_byte" >&6; }
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to define a 32-bit word" >&5
+$as_echo_n "checking how to define a 32-bit word... " >&6; }
+if test "${gmp_cv_asm_w32+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $host in
+  *-*-hpux*)
+    # FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+    # that it's at 0x0.  We'll have to declare another symbol before the
+    # .long/.word and look at the distance between the two symbols.  The
+    # only problem is that the sed expression(s) barfs (on Solaris, for
+    # example) for the symbol with value 0.  For now, HPUX uses .word.
+    gmp_cv_asm_w32=".word"
+    ;;
+  *-*-*)
+    gmp_tmp_val=
+    for gmp_tmp_op in .long .word data4; do
+      cat >conftest.s <<EOF
+       $gmp_cv_asm_data
+       $gmp_cv_asm_globl       foo
+       $gmp_tmp_op     0
+foo$gmp_cv_asm_label_suffix
+       $gmp_cv_asm_byte        0
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_tmp_val=`$NM conftest.$OBJEXT | grep foo | \
+          sed -e 's;[[][0-9][]]\(.*\);\1;' -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+        if test "$gmp_tmp_val" = 4; then
+          gmp_cv_asm_w32="$gmp_tmp_op"
+          break
+        fi
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+    done
+    rm -f conftest*
+    ;;
+esac
+if test -z "$gmp_cv_asm_w32"; then
+  as_fn_error "cannot determine how to define a 32-bit word" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_w32" >&5
+$as_echo "$gmp_cv_asm_w32" >&6; }
+echo "define(<W32>, <$gmp_cv_asm_w32>)" >> $gmp_tmpconfigm4
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if .align assembly directive is logarithmic" >&5
+$as_echo_n "checking if .align assembly directive is logarithmic... " >&6; }
+if test "${gmp_cv_asm_align_log+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_data
+       .align  4
+       $gmp_cv_asm_globl       foo
+       $gmp_cv_asm_byte        1
+       .align  4
+foo$gmp_cv_asm_label_suffix
+       $gmp_cv_asm_byte        2
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_tmp_val=`$NM conftest.$OBJEXT | grep foo | \
+     sed -e 's;[[][0-9][]]\(.*\);\1;' -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+  if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+    gmp_cv_asm_align_log=yes
+  else
+    gmp_cv_asm_align_log=no
+  fi
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  as_fn_error "cannot assemble alignment test" "$LINENO" 5
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_align_log" >&5
+$as_echo "$gmp_cv_asm_align_log" >&6; }
+
+
+echo "define(<ALIGN_LOGARITHMIC>,<$gmp_cv_asm_align_log>)" >> $gmp_tmpconfigm4
+
+
+
+  case $host in
+    hppa*-*-*)
+      # for both pa32 and pa64
+
+echo "include_mpn(\`pa32/pa-defs.m4')" >> $gmp_tmpconfigm4i
+
+      ;;
+    ia64*-*-* | itanium-*-* | itanium2-*-*)
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler .align padding is good" >&5
+$as_echo_n "checking whether assembler .align padding is good... " >&6; }
+if test "${gmp_cv_asm_ia64_align_ok+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.awk <<\EOF
+BEGIN {
+  want[0]  = "011"
+  want[1]  = "160"
+  want[2]  = "074"
+  want[3]  = "040"
+  want[4]  = "000"
+  want[5]  = "040"
+  want[6]  = "020"
+  want[7]  = "221"
+  want[8]  = "114"
+  want[9]  = "000"
+  want[10] = "100"
+  want[11] = "200"
+  want[12] = "122"
+  want[13] = "261"
+  want[14] = "000"
+  want[15] = "200"
+
+  want[16] = "000"
+  want[17] = "004"
+  want[18] = "000"
+  want[19] = "000"
+  want[20] = "000"
+  want[21] = "000"
+  want[22] = "002"
+  want[23] = "000"
+  want[24] = "000"
+  want[25] = "000"
+  want[26] = "000"
+  want[27] = "001"
+  want[28] = "000"
+  want[29] = "000"
+  want[30] = "000"
+  want[31] = "014"
+
+  want[32] = "011"
+  want[33] = "270"
+  want[34] = "140"
+  want[35] = "062"
+  want[36] = "000"
+  want[37] = "040"
+  want[38] = "240"
+  want[39] = "331"
+  want[40] = "160"
+  want[41] = "000"
+  want[42] = "100"
+  want[43] = "240"
+  want[44] = "343"
+  want[45] = "371"
+  want[46] = "000"
+  want[47] = "200"
+
+  result = "yes"
+}
+{
+  for (f = 2; f <= NF; f++)
+    {
+      for (i = 0; i < 47; i++)
+        got[i] = got[i+1];
+      got[47] = $f;
+
+      found = 1
+      for (i = 0; i < 48; i++)
+        if (got[i] != want[i])
+          {
+            found = 0
+            break
+          }
+      if (found)
+        {
+          result = "no"
+          exit
+        }
+    }
+}
+END {
+  print result
+}
+EOF
+cat >conftest.s <<EOF
+       .text
+       .align  32
+{ .mmi;        add     r14 = r15, r16
+       add     r17 = r18, r19
+       add     r20 = r21, r22 ;; }
+       .align  32
+{ .mmi;        add     r23 = r24, r25
+       add     r26 = r27, r28
+       add     r29 = r30, r31 ;; }
+
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_ia64_align_ok=`od -b conftest.$OBJEXT | $AWK -f conftest.awk`
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: oops, cannot compile test program" >&5
+$as_echo "$as_me: WARNING: oops, cannot compile test program" >&2;}
+   gmp_cv_asm_ia64_align_ok=yes
+fi
+rm -f conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_ia64_align_ok" >&5
+$as_echo "$gmp_cv_asm_ia64_align_ok" >&6; }
+
+echo "define(<IA64_ALIGN_OK>, <\`$gmp_cv_asm_ia64_align_ok'>)" >> $gmp_tmpconfigm4
+
+
+      ;;
+    m68k-*-* | m68[0-9][0-9][0-9]-*-*)
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler instruction and register style" >&5
+$as_echo_n "checking assembler instruction and register style... " >&6; }
+if test "${gmp_cv_asm_m68k_instruction+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  for i in "addl %d0,%d1" "add.l %d0,%d1" "addl d0,d1" "add.l d0,d1"; do
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       $i
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_m68k_instruction=$i
+    rm -f conftest*
+    break
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+done
+if test -z "$gmp_cv_asm_m68k_instruction"; then
+  as_fn_error "cannot determine assembler instruction and register style" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_m68k_instruction" >&5
+$as_echo "$gmp_cv_asm_m68k_instruction" >&6; }
+case $gmp_cv_asm_m68k_instruction in
+"addl d0,d1")    want_dot_size=no;  want_register_percent=no  ;;
+"addl %d0,%d1")  want_dot_size=no;  want_register_percent=yes ;;
+"add.l d0,d1")   want_dot_size=yes; want_register_percent=no  ;;
+"add.l %d0,%d1") want_dot_size=yes; want_register_percent=yes ;;
+*) as_fn_error "oops, unrecognised instruction and register style" "$LINENO" 5 ;;
+esac
+
+echo "define(<WANT_REGISTER_PERCENT>, <\`$want_register_percent'>)" >> $gmp_tmpconfigm4
+
+
+echo "define(<WANT_DOT_SIZE>, <\`$want_dot_size'>)" >> $gmp_tmpconfigm4
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler addressing style" >&5
+$as_echo_n "checking assembler addressing style... " >&6; }
+if test "${gmp_cv_asm_m68k_addressing+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $gmp_cv_asm_m68k_instruction in
+addl*)  movel=movel ;;
+add.l*) movel=move.l ;;
+*) as_fn_error "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
+esac
+case $gmp_cv_asm_m68k_instruction in
+*"%d0,%d1") dreg=%d0; areg=%a0 ;;
+*"d0,d1")   dreg=d0;  areg=a0  ;;
+*) as_fn_error "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
+esac
+cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       $movel  $dreg, $areg@-
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_m68k_addressing=mit
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       $movel  $dreg, -($areg)
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_m68k_addressing=motorola
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  as_fn_error "cannot determine assembler addressing style" "$LINENO" 5
+fi
+rm -f conftest*
+
+fi
+rm -f conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_m68k_addressing" >&5
+$as_echo "$gmp_cv_asm_m68k_addressing" >&6; }
+
+echo "define(<WANT_ADDRESSING>, <\`$gmp_cv_asm_m68k_addressing'>)" >> $gmp_tmpconfigm4
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler shortest branches" >&5
+$as_echo_n "checking assembler shortest branches... " >&6; }
+if test "${gmp_cv_asm_m68k_branches+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  for i in jra jbra bra; do
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+foo$gmp_cv_asm_label_suffix
+       $i      foo
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_m68k_branches=$i
+  rm -f conftest*
+  break
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  :
+fi
+rm -f conftest*
+
+done
+if test -z "$gmp_cv_asm_m68k_branches"; then
+  as_fn_error "cannot determine assembler branching style" "$LINENO" 5
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_m68k_branches" >&5
+$as_echo "$gmp_cv_asm_m68k_branches" >&6; }
+
+echo "define(<WANT_BRANCHES>, <\`$gmp_cv_asm_m68k_branches'>)" >> $gmp_tmpconfigm4
+
+
+      ;;
+    powerpc*-*-* | power[3-9]-*-*)
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler output is PIC by default" >&5
+$as_echo_n "checking whether compiler output is PIC by default... " >&6; }
+if test "${gmp_cv_asm_powerpc_pic+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_asm_powerpc_pic=yes
+cat >conftest.c <<EOF
+int foo;
+int *bar() { return &foo; }
+EOF
+echo "Test program:" >&5
+cat conftest.c >&5
+gmp_compile="$CC $CFLAGS $CPPFLAGS -S conftest.c >&5"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  echo "Compiler output:" >&5
+  cat conftest.s >&5
+  if grep 'foo@ha' conftest.s >/dev/null 2>&5; then
+    gmp_cv_asm_powerpc_pic=no
+  fi
+  if grep 'ha16(_foo)' conftest.s >/dev/null 2>&5; then
+    gmp_cv_asm_powerpc_pic=no
+  fi
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_powerpc_pic" >&5
+$as_echo "$gmp_cv_asm_powerpc_pic" >&6; }
+
+echo "define(<PIC_ALWAYS>,<$gmp_cv_asm_powerpc_pic>)" >> $gmp_tmpconfigm4
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler needs r on registers" >&5
+$as_echo_n "checking if the assembler needs r on registers... " >&6; }
+if test "${gmp_cv_asm_powerpc_r_registers+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       mtctr   6
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_powerpc_r_registers=no
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       mtctr   r6
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_powerpc_r_registers=yes
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  as_fn_error "neither \"mtctr 6\" nor \"mtctr r6\" works" "$LINENO" 5
+fi
+rm -f conftest*
+
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_powerpc_r_registers" >&5
+$as_echo "$gmp_cv_asm_powerpc_r_registers" >&6; }
+
+
+echo "define(<WANT_R_REGISTERS>,<$gmp_cv_asm_powerpc_r_registers>)" >> $gmp_tmpconfigm4
+
+
+
+echo "include_mpn(\`powerpc32/powerpc-defs.m4')" >> $gmp_tmpconfigm4i
+
+      case $host in
+        *-*-aix*)
+         case $ABI in
+           64 | aix64)
+echo "include_mpn(\`powerpc64/aix.m4')" >> $gmp_tmpconfigm4i
+ ;;
+            *)
+echo "include_mpn(\`powerpc32/aix.m4')" >> $gmp_tmpconfigm4i
+ ;;
+          esac
+          ;;
+        *-*-linux* | *-*-*bsd*)
+         case $ABI in
+           mode64)
+echo "include_mpn(\`powerpc64/elf.m4')" >> $gmp_tmpconfigm4i
+ ;;
+           mode32 | 32)
+echo "include_mpn(\`powerpc32/elf.m4')" >> $gmp_tmpconfigm4i
+ ;;
+          esac
+          ;;
+        *-*-darwin*)
+         case $ABI in
+           mode64)
+echo "include_mpn(\`powerpc64/darwin.m4')" >> $gmp_tmpconfigm4i
+ ;;
+           mode32 | 32)
+echo "include_mpn(\`powerpc32/darwin.m4')" >> $gmp_tmpconfigm4i
+ ;;
+          esac
+          ;;
+        *)
+         # Assume unrecognized operating system is the powerpc eABI
+
+echo "include_mpn(\`powerpc32/eabi.m4')" >> $gmp_tmpconfigm4i
+
+         ;;
+      esac
+      ;;
+    power*-*-aix*)
+
+echo "include_mpn(\`powerpc32/aix.m4')" >> $gmp_tmpconfigm4i
+
+      ;;
+    sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
+      case $ABI in
+        64)
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler accepts \".register\"" >&5
+$as_echo_n "checking if the assembler accepts \".register\"... " >&6; }
+if test "${gmp_cv_asm_sparc_register+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       .register       %g2,#scratch
+
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_sparc_register=yes
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  gmp_cv_asm_sparc_register=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_sparc_register" >&5
+$as_echo "$gmp_cv_asm_sparc_register" >&6; }
+
+
+echo "define(<HAVE_REGISTER>,<$gmp_cv_asm_sparc_register>)" >> $gmp_tmpconfigm4
+
+
+          ;;
+      esac
+      ;;
+    i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-* | athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the .align directive accepts an 0x90 fill in .text" >&5
+$as_echo_n "checking if the .align directive accepts an 0x90 fill in .text... " >&6; }
+if test "${gmp_cv_asm_align_fill_0x90+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       .align  4, 0x90
+       .byte   0
+       .align  4, 0x90
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  if grep "Warning: Fill parameter ignored for executable section" conftest.out >/dev/null; then
+  echo "Supressing this warning by omitting 0x90" 1>&5
+  gmp_cv_asm_align_fill_0x90=no
+else
+  gmp_cv_asm_align_fill_0x90=yes
+fi
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  gmp_cv_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_align_fill_0x90" >&5
+$as_echo "$gmp_cv_asm_align_fill_0x90" >&6; }
+
+
+echo "define(<ALIGN_FILL_0x90>,<$gmp_cv_asm_align_fill_0x90>)" >> $gmp_tmpconfigm4
+
+
+      case $ABI in
+        32)
+
+echo "include_mpn(\`x86/x86-defs.m4')" >> $gmp_tmpconfigm4i
+
+          $as_echo "#define HAVE_HOST_CPU_FAMILY_x86 1" >>confdefs.h
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler COFF type directives" >&5
+$as_echo_n "checking for assembler COFF type directives... " >&6; }
+if test "${gmp_cv_asm_x86_coff_type+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       $gmp_cv_asm_globl ${tmp_gsym_prefix}foo$gmp_cv_asm_globl_attr
+       .def    ${tmp_gsym_prefix}foo
+       .scl    2
+       .type   32
+       .endef
+${tmp_gsym_prefix}foo$gmp_cv_asm_label_suffix
+
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_x86_coff_type=yes
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  gmp_cv_asm_x86_coff_type=no
+fi
+rm -f conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_x86_coff_type" >&5
+$as_echo "$gmp_cv_asm_x86_coff_type" >&6; }
+echo "define(<HAVE_COFF_TYPE>, <$gmp_cv_asm_x86_coff_type>)" >> $gmp_tmpconfigm4
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore" >&5
+$as_echo_n "checking if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore... " >&6; }
+if test "${gmp_cv_asm_x86_got_underscore+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gmp_cv_asm_x86_got_underscore="not applicable"
+if test $gmp_cv_asm_underscore = yes; then
+  tmp_gsym_prefix=_
+else
+  tmp_gsym_prefix=
+fi
+for tmp_underscore in "" "_"; do
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       $gmp_cv_asm_globl ${tmp_gsym_prefix}main$gmp_cv_asm_globl_attr
+${tmp_gsym_prefix}main$gmp_cv_asm_label_suffix
+       addl    $ ${tmp_underscore}_GLOBAL_OFFSET_TABLE_, %ebx
+EOF
+  gmp_compile="$CCAS $CFLAGS $CPPFLAGS $lt_prog_compiler_pic conftest.s >&5 && $CC $CFLAGS $CPPFLAGS $lt_prog_compiler_pic conftest.$OBJEXT >&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_compile\""; } >&5
+  (eval $gmp_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$tmp_underscore" = "_"; then
+      gmp_cv_asm_x86_got_underscore=yes
+    else
+      gmp_cv_asm_x86_got_underscore=no
+    fi
+    break
+  fi
+done
+rm -f conftest* a.out b.out a.exe a_out.exe
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_x86_got_underscore" >&5
+$as_echo "$gmp_cv_asm_x86_got_underscore" >&6; }
+if test "$gmp_cv_asm_x86_got_underscore" = "yes"; then
+
+echo 'define(<GOT_GSYM_PREFIX>, <_>)' >>$gmp_tmpconfigm4
+
+else
+
+echo 'define(<GOT_GSYM_PREFIX>, <>)' >>$gmp_tmpconfigm4
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler takes cl with shldl" >&5
+$as_echo_n "checking if the assembler takes cl with shldl... " >&6; }
+if test "${gmp_cv_asm_x86_shldl_cl+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.s <<EOF
+       $gmp_cv_asm_text
+       shldl   %cl, %eax, %ebx
+EOF
+gmp_assemble="$CCAS $CFLAGS $CPPFLAGS conftest.s >conftest.out 2>&1"
+if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_assemble\""; } >&5
+  (eval $gmp_assemble) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  cat conftest.out >&5
+  gmp_cv_asm_x86_shldl_cl=yes
+else
+  cat conftest.out >&5
+  echo "configure: failed program was:" >&5
+  cat conftest.s >&5
+  gmp_cv_asm_x86_shldl_cl=no
+fi
+rm -f conftest*
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_x86_shldl_cl" >&5
+$as_echo "$gmp_cv_asm_x86_shldl_cl" >&6; }
+if test "$gmp_cv_asm_x86_shldl_cl" = "yes"; then
+
+echo 'define(<WANT_SHLDL_CL>, <1>)' >>$gmp_tmpconfigm4
+
+else
+
+echo 'define(<WANT_SHLDL_CL>, <0>)' >>$gmp_tmpconfigm4
+
+fi
+
+         case $enable_profiling in
+           prof | gprof)  # Check whether --enable-shared was given.
+if test "${enable_shared+set}" = set; then :
+  enableval=$enable_shared; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_shared=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else
+  enable_shared=yes
+fi
+
+
+
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to call x86 mcount" >&5
+$as_echo_n "checking how to call x86 mcount... " >&6; }
+cat >conftest.c <<EOF
+foo(){bar();}
+EOF
+
+if test "$enable_static" = yes; then
+  gmp_asmout_compile="$CC $CFLAGS $CPPFLAGS -S conftest.c 1>&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_asmout_compile\""; } >&5
+  (eval $gmp_asmout_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if grep '\.data' conftest.s >/dev/null; then
+      mcount_nonpic_reg=`sed -n '/esp/!s/.*movl.*,\(%[a-z]*\).*$/\1/p' conftest.s`
+    else
+      mcount_nonpic_reg=
+    fi
+    mcount_nonpic_call=`grep 'call.*mcount' conftest.s`
+    if test -z "$mcount_nonpic_call"; then
+      as_fn_error "Cannot find mcount call for non-PIC" "$LINENO" 5
+    fi
+  else
+    as_fn_error "Cannot compile test program for non-PIC" "$LINENO" 5
+  fi
+fi
+
+if test "$enable_shared" = yes; then
+  gmp_asmout_compile="$CC $CFLAGS $CPPFLAGS $lt_prog_compiler_pic -S conftest.c 1>&5"
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$gmp_asmout_compile\""; } >&5
+  (eval $gmp_asmout_compile) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if grep '\.data' conftest.s >/dev/null; then
+      case $lt_prog_compiler_pic in
+        *-DDLL_EXPORT*)
+          # Windows DLLs have non-PIC style mcount
+          mcount_pic_reg=`sed -n '/esp/!s/.*movl.*,\(%[a-z]*\).*$/\1/p' conftest.s`
+          ;;
+        *)
+          mcount_pic_reg=`sed -n 's/.*GOTOFF.*,\(%[a-z]*\).*$/\1/p' conftest.s`
+          ;;
+      esac
+    else
+      mcount_pic_reg=
+    fi
+    mcount_pic_call=`grep 'call.*mcount' conftest.s`
+    if test -z "$mcount_pic_call"; then
+      as_fn_error "Cannot find mcount call for PIC" "$LINENO" 5
+    fi
+  else
+    as_fn_error "Cannot compile test program for PIC" "$LINENO" 5
+  fi
+fi
+
+
+echo "define(<MCOUNT_NONPIC_REG>, <\`$mcount_nonpic_reg'>)" >> $gmp_tmpconfigm4
+
+
+echo "define(<MCOUNT_NONPIC_CALL>,<\`$mcount_nonpic_call'>)" >> $gmp_tmpconfigm4
+
+
+echo "define(<MCOUNT_PIC_REG>,    <\`$mcount_pic_reg'>)" >> $gmp_tmpconfigm4
+
+
+echo "define(<MCOUNT_PIC_CALL>,   <\`$mcount_pic_call'>)" >> $gmp_tmpconfigm4
+
+
+rm -f conftest.*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: determined" >&5
+$as_echo "determined" >&6; }
+ ;;
+         esac
+         case $host in
+           *-*-darwin*)
+
+echo "include_mpn(\`x86/darwin.m4')" >> $gmp_tmpconfigm4i
+ ;;
+         esac
+          ;;
+        64)
+
+echo "include_mpn(\`x86_64/x86_64-defs.m4')" >> $gmp_tmpconfigm4i
+
+          $as_echo "#define HAVE_HOST_CPU_FAMILY_x86_64 1" >>confdefs.h
+
+         case $host in
+           *-*-darwin*)
+
+echo "include_mpn(\`x86_64/darwin.m4')" >> $gmp_tmpconfigm4i
+ ;;
+         esac
+          ;;
+      esac
+      ;;
+  esac
+fi
+
+# For --enable-minithres, prepend "minithres" to path so that its special
+# gmp-mparam.h will be used.
+if test $enable_minithres = yes; then
+  path="minithres $path"
+fi
+
+# Create link for gmp-mparam.h.
+gmp_mparam_source=
+for gmp_mparam_dir in $path; do
+  test "$no_create" = yes || rm -f gmp-mparam.h
+  tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h
+  if test -f $tmp_file; then
+    ac_config_links="$ac_config_links gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h"
+
+    gmp_srclinks="$gmp_srclinks gmp-mparam.h"
+    gmp_mparam_source=$tmp_file
+    break
+  fi
+done
+if test -z "$gmp_mparam_source"; then
+  as_fn_error "no version of gmp-mparam.h found in path: $path" "$LINENO" 5
+fi
+
+# For a helpful message from tune/tuneup.c
+gmp_mparam_suggest=$gmp_mparam_source
+if test "$gmp_mparam_dir" = generic; then
+  for i in $path; do break; done
+  if test "$i" != generic; then
+    gmp_mparam_suggest="new file $srcdir/mpn/$i/gmp-mparam.h"
+  fi
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define GMP_MPARAM_H_SUGGEST "$gmp_mparam_source"
+_ACEOF
+
+
+
+# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
+# Some versions of sqr_basecase.asm use this.
+# Fat binaries do this on a per-file basis, so skip in that case.
+#
+if test -z "$fat_path"; then
+  tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[     ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+  if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+
+echo "define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)" >> $gmp_tmpconfigm4
+
+  fi
+fi
+
+
+# Sizes of some types, needed at preprocessing time.
+#
+# FIXME: The assumption that GMP_LIMB_BITS is 8*sizeof(mp_limb_t) might
+# be slightly rash, but it's true everywhere we know of and ought to be true
+# of any sensible system.  In a generic C build, grepping LONG_BIT out of
+# <limits.h> might be an alternative, for maximum portability.
+#
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5
+$as_echo_n "checking size of void *... " >&6; }
+if test "${ac_cv_sizeof_void_p+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p"        "$ac_includes_default"; then :
+
+else
+  if test "$ac_cv_type_void_p" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ as_fn_set_status 77
+as_fn_error "cannot compute sizeof (void *)
+See \`config.log' for more details." "$LINENO" 5; }; }
+   else
+     ac_cv_sizeof_void_p=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5
+$as_echo "$ac_cv_sizeof_void_p" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_VOID_P $ac_cv_sizeof_void_p
+_ACEOF
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned short" >&5
+$as_echo_n "checking size of unsigned short... " >&6; }
+if test "${ac_cv_sizeof_unsigned_short+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned short))" "ac_cv_sizeof_unsigned_short"        "$ac_includes_default"; then :
+
+else
+  if test "$ac_cv_type_unsigned_short" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ as_fn_set_status 77
+as_fn_error "cannot compute sizeof (unsigned short)
+See \`config.log' for more details." "$LINENO" 5; }; }
+   else
+     ac_cv_sizeof_unsigned_short=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_short" >&5
+$as_echo "$ac_cv_sizeof_unsigned_short" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_UNSIGNED_SHORT $ac_cv_sizeof_unsigned_short
+_ACEOF
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned" >&5
+$as_echo_n "checking size of unsigned... " >&6; }
+if test "${ac_cv_sizeof_unsigned+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned))" "ac_cv_sizeof_unsigned"        "$ac_includes_default"; then :
+
+else
+  if test "$ac_cv_type_unsigned" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ as_fn_set_status 77
+as_fn_error "cannot compute sizeof (unsigned)
+See \`config.log' for more details." "$LINENO" 5; }; }
+   else
+     ac_cv_sizeof_unsigned=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned" >&5
+$as_echo "$ac_cv_sizeof_unsigned" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_UNSIGNED $ac_cv_sizeof_unsigned
+_ACEOF
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" >&5
+$as_echo_n "checking size of unsigned long... " >&6; }
+if test "${ac_cv_sizeof_unsigned_long+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" "ac_cv_sizeof_unsigned_long"        "$ac_includes_default"; then :
+
+else
+  if test "$ac_cv_type_unsigned_long" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ as_fn_set_status 77
+as_fn_error "cannot compute sizeof (unsigned long)
+See \`config.log' for more details." "$LINENO" 5; }; }
+   else
+     ac_cv_sizeof_unsigned_long=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_long" >&5
+$as_echo "$ac_cv_sizeof_unsigned_long" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_UNSIGNED_LONG $ac_cv_sizeof_unsigned_long
+_ACEOF
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of mp_limb_t" >&5
+$as_echo_n "checking size of mp_limb_t... " >&6; }
+if test "${ac_cv_sizeof_mp_limb_t+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (mp_limb_t))" "ac_cv_sizeof_mp_limb_t"        "#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
+#define GMP_NAIL_BITS $GMP_NAIL_BITS
+#define GMP_LIMB_BITS 123
+$DEFN_LONG_LONG_LIMB
+#include \"$srcdir/gmp-h.in\"
+
+"; then :
+
+else
+  if test "$ac_cv_type_mp_limb_t" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ as_fn_set_status 77
+as_fn_error "cannot compute sizeof (mp_limb_t)
+See \`config.log' for more details." "$LINENO" 5; }; }
+   else
+     ac_cv_sizeof_mp_limb_t=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_mp_limb_t" >&5
+$as_echo "$ac_cv_sizeof_mp_limb_t" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_MP_LIMB_T $ac_cv_sizeof_mp_limb_t
+_ACEOF
+
+
+if test "$ac_cv_sizeof_mp_limb_t" = 0; then
+  as_fn_error "Oops, mp_limb_t doesn't seem to work" "$LINENO" 5
+fi
+GMP_LIMB_BITS=`expr 8 \* $ac_cv_sizeof_mp_limb_t`
+
+
+echo "define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)" >> $gmp_tmpconfigm4
+
+
+# Check compiler limb size matches gmp-mparam.h
+#
+# FIXME: Some of the cycle counter objects in the tune directory depend on
+# the size of ulong, it'd be possible to check that here, though a mismatch
+# probably wouldn't want to be fatal, none of the libgmp assembler code
+# depends on ulong.
+#
+mparam_bits=`sed -n 's/^#define GMP_LIMB_BITS[         ][      ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`
+if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
+  if test "$test_CFLAGS" = set; then
+    as_fn_error "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.
+You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
+intended ABI, see \"ABI and ISA\" in the manual." "$LINENO" 5
+  else
+    as_fn_error "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits." "$LINENO" 5
+  fi
+fi
+
+
+echo "define(<GMP_LIMB_BITS>,$GMP_LIMB_BITS)" >> $gmp_tmpconfigm4
+
+
+echo "define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)" >> $gmp_tmpconfigm4
+
+
+echo "define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))" >> $gmp_tmpconfigm4
+
+
+
+# Exclude the mpn random functions from mpbsd since that would drag in the
+# top-level rand things, all of which are unnecessary for libmp.  There's
+# other unnecessary objects too actually, if we could be bothered figuring
+# out exactly which they are.
+#
+mpn_objs_in_libmp=
+for i in $mpn_objs_in_libgmp; do
+  case $i in
+  *random*) ;;
+  *) mpn_objs_in_libmp="$mpn_objs_in_libmp $i" ;;
+  esac
+done
+
+
+
+
+
+
+
+# A recompiled sqr_basecase for use in the tune program, if necessary.
+TUNE_SQR_OBJ=
+test -d tune || mkdir tune
+case $sqr_basecase_source in
+  *.asm)
+    sqr_max=`sed -n 's/^def...(SQR_TOOM2_THRESHOLD_MAX, *\([0-9]*\))/\1/p' $sqr_basecase_source`
+    if test -n "$sqr_max"; then
+      TUNE_SQR_OBJ=sqr_asm.o
+
+cat >>confdefs.h <<_ACEOF
+#define TUNE_SQR_TOOM2_MAX $sqr_max
+_ACEOF
+
+    fi
+    cat >tune/sqr_basecase.c <<EOF
+/* not sure that an empty file can compile, so put in a dummy */
+int sqr_basecase_dummy;
+EOF
+    ;;
+  *.c)
+    TUNE_SQR_OBJ=
+    $as_echo "#define TUNE_SQR_TOOM2_MAX SQR_TOOM2_MAX_GENERIC" >>confdefs.h
+
+    cat >tune/sqr_basecase.c <<EOF
+#define TUNE_PROGRAM_BUILD 1
+#define TUNE_PROGRAM_BUILD_SQR 1
+#include "mpn/sqr_basecase.c"
+EOF
+    ;;
+esac
+
+
+
+# Configs for demos/pexpr.c.
+#
+ac_config_files="$ac_config_files demos/pexpr-config.h:demos/pexpr-config-h.in"
+
+case $ac_cv_func_clock in
+yes) HAVE_CLOCK_01=1
+ ;;
+no)  HAVE_CLOCK_01=0 ;;
+esac
+
+case $ac_cv_func_cputime in
+yes) HAVE_CPUTIME_01=1
+ ;;
+no)  HAVE_CPUTIME_01=0 ;;
+esac
+
+case $ac_cv_func_getrusage in
+yes) HAVE_GETRUSAGE_01=1
+ ;;
+no)  HAVE_GETRUSAGE_01=0 ;;
+esac
+
+case $ac_cv_func_gettimeofday in
+yes) HAVE_GETTIMEOFDAY_01=1
+ ;;
+no)  HAVE_GETTIMEOFDAY_01=0 ;;
+esac
+
+case $ac_cv_func_sigaction in
+yes) HAVE_SIGACTION_01=1
+ ;;
+no)  HAVE_SIGACTION_01=0 ;;
+esac
+
+case $ac_cv_func_sigaltstack in
+yes) HAVE_SIGALTSTACK_01=1
+ ;;
+no)  HAVE_SIGALTSTACK_01=0 ;;
+esac
+
+case $ac_cv_func_sigstack in
+yes) HAVE_SIGSTACK_01=1
+ ;;
+no)  HAVE_SIGSTACK_01=0 ;;
+esac
+
+
+case $ac_cv_header_sys_resource_h in
+yes) HAVE_SYS_RESOURCE_H_01=1
+ ;;
+no)  HAVE_SYS_RESOURCE_H_01=0 ;;
+esac
+
+
+ac_fn_c_check_type "$LINENO" "stack_t" "ac_cv_type_stack_t" "#include <signal.h>
+"
+if test "x$ac_cv_type_stack_t" = x""yes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STACK_T 1
+_ACEOF
+
+HAVE_STACK_T_01=1
+else
+  HAVE_STACK_T_01=0
+fi
+
+
+
+# Configs for demos/calc directory
+#
+# AC_SUBST+AC_CONFIG_FILES is used for calc-config.h, rather than AC_DEFINE+
+# AC_CONFIG_HEADERS, since with the latter automake (1.8) will then put the
+# directory (ie. demos/calc) into $(DEFAULT_INCLUDES) for every Makefile.in,
+# which would look very strange.
+#
+# -lcurses is required by libreadline.  On a typical SVR4 style system this
+# normally doesn't have to be given explicitly, since libreadline.so will
+# have a NEEDED record for it.  But if someone for some reason is using only
+# a static libreadline.a then we must give -lcurses.  Readline (as of
+# version 4.3) doesn't use libtool, so we can't rely on a .la to cover
+# necessary dependencies.
+#
+# On a couple of systems we've seen libreadline available, but the headers
+# not in the default include path, so check for readline/readline.h.  We've
+# also seen readline/history.h missing, not sure if that's just a broken
+# install or a very old version, but check that too.
+#
+ac_config_files="$ac_config_files demos/calc/calc-config.h:demos/calc/calc-config-h.in"
+
+LIBCURSES=
+if test $with_readline != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for tputs in -lncurses" >&5
+$as_echo_n "checking for tputs in -lncurses... " >&6; }
+if test "${ac_cv_lib_ncurses_tputs+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lncurses  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char tputs ();
+int
+main ()
+{
+return tputs ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_ncurses_tputs=yes
+else
+  ac_cv_lib_ncurses_tputs=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ncurses_tputs" >&5
+$as_echo "$ac_cv_lib_ncurses_tputs" >&6; }
+if test "x$ac_cv_lib_ncurses_tputs" = x""yes; then :
+  LIBCURSES=-lncurses
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for tputs in -lcurses" >&5
+$as_echo_n "checking for tputs in -lcurses... " >&6; }
+if test "${ac_cv_lib_curses_tputs+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lcurses  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char tputs ();
+int
+main ()
+{
+return tputs ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_curses_tputs=yes
+else
+  ac_cv_lib_curses_tputs=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curses_tputs" >&5
+$as_echo "$ac_cv_lib_curses_tputs" >&6; }
+if test "x$ac_cv_lib_curses_tputs" = x""yes; then :
+  LIBCURSES=-lcurses
+fi
+
+fi
+
+fi
+
+use_readline=$with_readline
+if test $with_readline = detect; then
+  use_readline=no
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5
+$as_echo_n "checking for readline in -lreadline... " >&6; }
+if test "${ac_cv_lib_readline_readline+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lreadline $LIBCURSES $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char readline ();
+int
+main ()
+{
+return readline ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_readline_readline=yes
+else
+  ac_cv_lib_readline_readline=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5
+$as_echo "$ac_cv_lib_readline_readline" >&6; }
+if test "x$ac_cv_lib_readline_readline" = x""yes; then :
+  ac_fn_c_check_header_mongrel "$LINENO" "readline/readline.h" "ac_cv_header_readline_readline_h" "$ac_includes_default"
+if test "x$ac_cv_header_readline_readline_h" = x""yes; then :
+  ac_fn_c_check_header_mongrel "$LINENO" "readline/history.h" "ac_cv_header_readline_history_h" "$ac_includes_default"
+if test "x$ac_cv_header_readline_history_h" = x""yes; then :
+  use_readline=yes
+fi
+
+
+fi
+
+
+fi
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking readline detected" >&5
+$as_echo_n "checking readline detected... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $use_readline" >&5
+$as_echo "$use_readline" >&6; }
+fi
+if test $use_readline = yes; then
+  WITH_READLINE_01=1
+
+  LIBREADLINE=-lreadline
+
+else
+  WITH_READLINE_01=0
+fi
+for ac_prog in 'bison -y' byacc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_YACC+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$YACC"; then
+  ac_cv_prog_YACC="$YACC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_YACC="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+YACC=$ac_cv_prog_YACC
+if test -n "$YACC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $YACC" >&5
+$as_echo "$YACC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$YACC" && break
+done
+test -n "$YACC" || YACC="yacc"
+
+for ac_prog in flex lex
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_LEX+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$LEX"; then
+  ac_cv_prog_LEX="$LEX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_LEX="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+LEX=$ac_cv_prog_LEX
+if test -n "$LEX"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LEX" >&5
+$as_echo "$LEX" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$LEX" && break
+done
+test -n "$LEX" || LEX=":"
+
+if test "x$LEX" != "x:"; then
+  cat >conftest.l <<_ACEOF
+%%
+a { ECHO; }
+b { REJECT; }
+c { yymore (); }
+d { yyless (1); }
+e { yyless (input () != 0); }
+f { unput (yytext[0]); }
+. { BEGIN INITIAL; }
+%%
+#ifdef YYTEXT_POINTER
+extern char *yytext;
+#endif
+int
+main (void)
+{
+  return ! yylex () + ! yywrap ();
+}
+_ACEOF
+{ { ac_try="$LEX conftest.l"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$LEX conftest.l") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking lex output file root" >&5
+$as_echo_n "checking lex output file root... " >&6; }
+if test "${ac_cv_prog_lex_root+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+if test -f lex.yy.c; then
+  ac_cv_prog_lex_root=lex.yy
+elif test -f lexyy.c; then
+  ac_cv_prog_lex_root=lexyy
+else
+  as_fn_error "cannot find output from $LEX; giving up" "$LINENO" 5
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_lex_root" >&5
+$as_echo "$ac_cv_prog_lex_root" >&6; }
+LEX_OUTPUT_ROOT=$ac_cv_prog_lex_root
+
+if test -z "${LEXLIB+set}"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking lex library" >&5
+$as_echo_n "checking lex library... " >&6; }
+if test "${ac_cv_lib_lex+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+    ac_save_LIBS=$LIBS
+    ac_cv_lib_lex='none needed'
+    for ac_lib in '' -lfl -ll; do
+      LIBS="$ac_lib $ac_save_LIBS"
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+`cat $LEX_OUTPUT_ROOT.c`
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_lex=$ac_lib
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      test "$ac_cv_lib_lex" != 'none needed' && break
+    done
+    LIBS=$ac_save_LIBS
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lex" >&5
+$as_echo "$ac_cv_lib_lex" >&6; }
+  test "$ac_cv_lib_lex" != 'none needed' && LEXLIB=$ac_cv_lib_lex
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether yytext is a pointer" >&5
+$as_echo_n "checking whether yytext is a pointer... " >&6; }
+if test "${ac_cv_prog_lex_yytext_pointer+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  # POSIX says lex can declare yytext either as a pointer or an array; the
+# default is implementation-dependent.  Figure out which it is, since
+# not all implementations provide the %pointer and %array declarations.
+ac_cv_prog_lex_yytext_pointer=no
+ac_save_LIBS=$LIBS
+LIBS="$LEXLIB $ac_save_LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#define YYTEXT_POINTER 1
+`cat $LEX_OUTPUT_ROOT.c`
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_prog_lex_yytext_pointer=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_save_LIBS
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_lex_yytext_pointer" >&5
+$as_echo "$ac_cv_prog_lex_yytext_pointer" >&6; }
+if test $ac_cv_prog_lex_yytext_pointer = yes; then
+
+$as_echo "#define YYTEXT_POINTER 1" >>confdefs.h
+
+fi
+rm -f conftest.l $LEX_OUTPUT_ROOT.c
+
+fi
+if test "$LEX" = :; then
+  LEX=${am_missing_run}flex
+fi
+
+# Configs for demos/expr directory
+#
+# Libtool already runs an AC_CHECK_TOOL for ranlib, but we give
+# AC_PROG_RANLIB anyway since automake is supposed to complain if it's not
+# called.  (Automake 1.8.4 doesn't, at least not when the only library is in
+# an EXTRA_LIBRARIES.)
+#
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_RANLIB+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+$as_echo "$RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+$as_echo "$ac_ct_RANLIB" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+
+
+# Create config.m4.
+
+echo "creating $gmp_configm4"
+echo "d""nl $gmp_configm4.  Generated automatically by configure." > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+  echo "changequote(<,>)" >> $gmp_configm4
+  echo "ifdef(<__CONFIG_M4_INCLUDED__>,,<" >> $gmp_configm4
+  cat $gmp_tmpconfigm4 >> $gmp_configm4
+  echo ">)" >> $gmp_configm4
+  echo "changequote(\`,')" >> $gmp_configm4
+  rm $gmp_tmpconfigm4
+fi
+echo "ifdef(\`__CONFIG_M4_INCLUDED__',,\`" >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+  cat $gmp_tmpconfigm4i >> $gmp_configm4
+  rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+  cat $gmp_tmpconfigm4p >> $gmp_configm4
+  rm $gmp_tmpconfigm4p
+fi
+echo "')" >> $gmp_configm4
+echo "define(\`__CONFIG_M4_INCLUDED__')" >> $gmp_configm4
+
+
+# Create Makefiles
+# FIXME: Upcoming version of autoconf/automake may not like broken lines.
+#        Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
+
+ac_config_files="$ac_config_files Makefile mpbsd/Makefile mpf/Makefile mpn/Makefile mpq/Makefile mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile tests/Makefile tests/devel/Makefile tests/mpbsd/Makefile tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile tests/cxx/Makefile doc/Makefile tune/Makefile demos/Makefile demos/calc/Makefile demos/expr/Makefile gmp.h:gmp-h.in mp.h:mp-h.in"
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+       "s/'/'\\\\''/g;
+         s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    test "x$cache_file" != "x/dev/null" &&
+      { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+    cat confcache >$cache_file
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+ if test -n "$EXEEXT"; then
+  am__EXEEXT_TRUE=
+  am__EXEEXT_FALSE='#'
+else
+  am__EXEEXT_TRUE='#'
+  am__EXEEXT_FALSE=
+fi
+
+if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
+  as_fn_error "conditional \"MAINTAINER_MODE\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${WANT_MPBSD_TRUE}" && test -z "${WANT_MPBSD_FALSE}"; then
+  as_fn_error "conditional \"WANT_MPBSD\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${WANT_CXX_TRUE}" && test -z "${WANT_CXX_FALSE}"; then
+  as_fn_error "conditional \"WANT_CXX\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${ENABLE_STATIC_TRUE}" && test -z "${ENABLE_STATIC_FALSE}"; then
+  as_fn_error "conditional \"ENABLE_STATIC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+       expr "X$arg" : "X\\(.*\\)$as_nl";
+       arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""       $as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error ERROR [LINENO LOG_FD]
+# ---------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with status $?, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$?; test $as_status -eq 0 && as_status=1
+  if test "$3"; then
+    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  fi
+  $as_echo "$as_me: error: $1" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+        X"$0" : 'X\(//\)$' \| \
+        X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='        ';;     # ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='        ';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -p'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -p'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -p'
+  fi
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$as_dir" : 'X\(//\)[^/]' \| \
+        X"$as_dir" : 'X\(//\)$' \| \
+        X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+  as_test_x='test -x'
+else
+  if ls -dL / >/dev/null 2>&1; then
+    as_ls_L_option=L
+  else
+    as_ls_L_option=
+  fi
+  as_test_x='
+    eval sh -c '\''
+      if test -d "$1"; then
+       test -d "$1/.";
+      else
+       case $1 in #(
+       -*)set "./$1";;
+       esac;
+       case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+       ???[sx]*):;;*)false;;esac;fi
+    '\'' sh
+  '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by GNU MP $as_me 5.0.5, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+config_links="$ac_config_links"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration links:
+$config_links
+
+Configuration commands:
+$config_commands
+
+Report bugs to <gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html>.
+GNU MP home page: <http://www.gnu.org/software/gmp/>.
+General help using GNU software: <http://www.gnu.org/gethelp/>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+GNU MP config.status 5.0.5
+configured by $0, generated by GNU Autoconf 2.65,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2009 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+MKDIR_P='$MKDIR_P'
+AWK='$AWK'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#
+# INIT-COMMANDS
+#
+
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+sed_quote_subst='$sed_quote_subst'
+double_quote_subst='$double_quote_subst'
+delay_variable_subst='$delay_variable_subst'
+NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`'
+AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`'
+DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`'
+OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`'
+macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
+macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
+enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
+enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
+pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
+enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
+SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
+ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
+host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
+host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
+host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
+build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`'
+build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`'
+build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`'
+SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`'
+Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`'
+GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`'
+EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`'
+FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`'
+LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`'
+LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`'
+max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`'
+ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`'
+exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`'
+lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`'
+lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`'
+lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`'
+lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`'
+lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`'
+reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`'
+reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`'
+deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`'
+file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`'
+file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`'
+want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`'
+sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`'
+AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`'
+AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`'
+archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`'
+STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`'
+RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`'
+old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`'
+old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`'
+old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`'
+lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`'
+CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`'
+CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`'
+compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`'
+GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`'
+nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`'
+lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`'
+objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`'
+MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`'
+need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`'
+MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`'
+DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`'
+NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`'
+LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`'
+OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`'
+OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`'
+libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`'
+shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`'
+extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`'
+archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`'
+whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`'
+compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`'
+old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`'
+archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`'
+module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`'
+with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
+no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
+hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
+hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`'
+hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`'
+hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`'
+inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`'
+link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`'
+always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`'
+export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`'
+exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`'
+include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`'
+prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`'
+postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`'
+file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`'
+variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`'
+need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`'
+need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`'
+version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`'
+runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`'
+shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`'
+shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`'
+libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`'
+library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`'
+soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`'
+install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`'
+postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`'
+postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`'
+finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`'
+finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`'
+hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`'
+sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`'
+sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`'
+hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`'
+enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`'
+enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`'
+enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`'
+old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`'
+striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`'
+predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`'
+postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`'
+predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`'
+postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`'
+LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`'
+LD_F77='`$ECHO "$LD_F77" | $SED "$delay_single_quote_subst"`'
+reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`'
+reload_flag_F77='`$ECHO "$reload_flag_F77" | $SED "$delay_single_quote_subst"`'
+reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+reload_cmds_F77='`$ECHO "$reload_cmds_F77" | $SED "$delay_single_quote_subst"`'
+old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+old_archive_cmds_F77='`$ECHO "$old_archive_cmds_F77" | $SED "$delay_single_quote_subst"`'
+compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_F77='`$ECHO "$compiler_F77" | $SED "$delay_single_quote_subst"`'
+GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`'
+GCC_F77='`$ECHO "$GCC_F77" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag_F77='`$ECHO "$lt_prog_compiler_no_builtin_flag_F77" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic_F77='`$ECHO "$lt_prog_compiler_pic_F77" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_wl_F77='`$ECHO "$lt_prog_compiler_wl_F77" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_static_F77='`$ECHO "$lt_prog_compiler_static_F77" | $SED "$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o_F77='`$ECHO "$lt_cv_prog_compiler_c_o_F77" | $SED "$delay_single_quote_subst"`'
+archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`'
+archive_cmds_need_lc_F77='`$ECHO "$archive_cmds_need_lc_F77" | $SED "$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes_F77='`$ECHO "$enable_shared_with_static_runtimes_F77" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec_F77='`$ECHO "$export_dynamic_flag_spec_F77" | $SED "$delay_single_quote_subst"`'
+whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
+whole_archive_flag_spec_F77='`$ECHO "$whole_archive_flag_spec_F77" | $SED "$delay_single_quote_subst"`'
+compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_needs_object_F77='`$ECHO "$compiler_needs_object_F77" | $SED "$delay_single_quote_subst"`'
+old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+old_archive_from_new_cmds_F77='`$ECHO "$old_archive_from_new_cmds_F77" | $SED "$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds_F77='`$ECHO "$old_archive_from_expsyms_cmds_F77" | $SED "$delay_single_quote_subst"`'
+archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+archive_cmds_F77='`$ECHO "$archive_cmds_F77" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds_F77='`$ECHO "$archive_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`'
+module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+module_cmds_F77='`$ECHO "$module_cmds_F77" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds_F77='`$ECHO "$module_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`'
+with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`'
+with_gnu_ld_F77='`$ECHO "$with_gnu_ld_F77" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag_F77='`$ECHO "$allow_undefined_flag_F77" | $SED "$delay_single_quote_subst"`'
+no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
+no_undefined_flag_F77='`$ECHO "$no_undefined_flag_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec_F77='`$ECHO "$hardcode_libdir_flag_spec_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_libdir_separator_F77='`$ECHO "$hardcode_libdir_separator_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_F77='`$ECHO "$hardcode_direct_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_absolute_F77='`$ECHO "$hardcode_direct_absolute_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_minus_L_F77='`$ECHO "$hardcode_minus_L_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_shlibpath_var_F77='`$ECHO "$hardcode_shlibpath_var_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_automatic_F77='`$ECHO "$hardcode_automatic_F77" | $SED "$delay_single_quote_subst"`'
+inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`'
+inherit_rpath_F77='`$ECHO "$inherit_rpath_F77" | $SED "$delay_single_quote_subst"`'
+link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`'
+link_all_deplibs_F77='`$ECHO "$link_all_deplibs_F77" | $SED "$delay_single_quote_subst"`'
+always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`'
+always_export_symbols_F77='`$ECHO "$always_export_symbols_F77" | $SED "$delay_single_quote_subst"`'
+export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+export_symbols_cmds_F77='`$ECHO "$export_symbols_cmds_F77" | $SED "$delay_single_quote_subst"`'
+exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`'
+exclude_expsyms_F77='`$ECHO "$exclude_expsyms_F77" | $SED "$delay_single_quote_subst"`'
+include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`'
+include_expsyms_F77='`$ECHO "$include_expsyms_F77" | $SED "$delay_single_quote_subst"`'
+prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+prelink_cmds_F77='`$ECHO "$prelink_cmds_F77" | $SED "$delay_single_quote_subst"`'
+postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`'
+postlink_cmds_F77='`$ECHO "$postlink_cmds_F77" | $SED "$delay_single_quote_subst"`'
+file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`'
+file_list_spec_F77='`$ECHO "$file_list_spec_F77" | $SED "$delay_single_quote_subst"`'
+hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`'
+hardcode_action_F77='`$ECHO "$hardcode_action_F77" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_dirs_F77='`$ECHO "$compiler_lib_search_dirs_F77" | $SED "$delay_single_quote_subst"`'
+predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`'
+predep_objects_F77='`$ECHO "$predep_objects_F77" | $SED "$delay_single_quote_subst"`'
+postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`'
+postdep_objects_F77='`$ECHO "$postdep_objects_F77" | $SED "$delay_single_quote_subst"`'
+predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`'
+predeps_F77='`$ECHO "$predeps_F77" | $SED "$delay_single_quote_subst"`'
+postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`'
+postdeps_F77='`$ECHO "$postdeps_F77" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`'
+compiler_lib_search_path_F77='`$ECHO "$compiler_lib_search_path_F77" | $SED "$delay_single_quote_subst"`'
+
+LTCC='$LTCC'
+LTCFLAGS='$LTCFLAGS'
+compiler='$compiler_DEFAULT'
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$1
+_LTECHO_EOF'
+}
+
+# Quote evaled strings.
+for var in NM \
+AS \
+DLLTOOL \
+OBJDUMP \
+SHELL \
+ECHO \
+PATH_SEPARATOR \
+SED \
+GREP \
+EGREP \
+FGREP \
+LD \
+LN_S \
+lt_SP2NL \
+lt_NL2SP \
+reload_flag \
+deplibs_check_method \
+file_magic_cmd \
+file_magic_glob \
+want_nocaseglob \
+sharedlib_from_linklib_cmd \
+AR \
+AR_FLAGS \
+archiver_list_spec \
+STRIP \
+RANLIB \
+CC \
+CFLAGS \
+compiler \
+lt_cv_sys_global_symbol_pipe \
+lt_cv_sys_global_symbol_to_cdecl \
+lt_cv_sys_global_symbol_to_c_name_address \
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \
+nm_file_list_spec \
+lt_prog_compiler_no_builtin_flag \
+lt_prog_compiler_pic \
+lt_prog_compiler_wl \
+lt_prog_compiler_static \
+lt_cv_prog_compiler_c_o \
+need_locks \
+MANIFEST_TOOL \
+DSYMUTIL \
+NMEDIT \
+LIPO \
+OTOOL \
+OTOOL64 \
+shrext_cmds \
+export_dynamic_flag_spec \
+whole_archive_flag_spec \
+compiler_needs_object \
+with_gnu_ld \
+allow_undefined_flag \
+no_undefined_flag \
+hardcode_libdir_flag_spec \
+hardcode_libdir_separator \
+exclude_expsyms \
+include_expsyms \
+file_list_spec \
+variables_saved_for_relink \
+libname_spec \
+library_names_spec \
+soname_spec \
+install_override_mode \
+finish_eval \
+old_striplib \
+striplib \
+compiler_lib_search_dirs \
+predep_objects \
+postdep_objects \
+predeps \
+postdeps \
+compiler_lib_search_path \
+LD_CXX \
+LD_F77 \
+reload_flag_CXX \
+reload_flag_F77 \
+compiler_CXX \
+compiler_F77 \
+lt_prog_compiler_no_builtin_flag_CXX \
+lt_prog_compiler_no_builtin_flag_F77 \
+lt_prog_compiler_pic_CXX \
+lt_prog_compiler_pic_F77 \
+lt_prog_compiler_wl_CXX \
+lt_prog_compiler_wl_F77 \
+lt_prog_compiler_static_CXX \
+lt_prog_compiler_static_F77 \
+lt_cv_prog_compiler_c_o_CXX \
+lt_cv_prog_compiler_c_o_F77 \
+export_dynamic_flag_spec_CXX \
+export_dynamic_flag_spec_F77 \
+whole_archive_flag_spec_CXX \
+whole_archive_flag_spec_F77 \
+compiler_needs_object_CXX \
+compiler_needs_object_F77 \
+with_gnu_ld_CXX \
+with_gnu_ld_F77 \
+allow_undefined_flag_CXX \
+allow_undefined_flag_F77 \
+no_undefined_flag_CXX \
+no_undefined_flag_F77 \
+hardcode_libdir_flag_spec_CXX \
+hardcode_libdir_flag_spec_F77 \
+hardcode_libdir_separator_CXX \
+hardcode_libdir_separator_F77 \
+exclude_expsyms_CXX \
+exclude_expsyms_F77 \
+include_expsyms_CXX \
+include_expsyms_F77 \
+file_list_spec_CXX \
+file_list_spec_F77 \
+compiler_lib_search_dirs_CXX \
+compiler_lib_search_dirs_F77 \
+predep_objects_CXX \
+predep_objects_F77 \
+postdep_objects_CXX \
+postdep_objects_F77 \
+predeps_CXX \
+predeps_F77 \
+postdeps_CXX \
+postdeps_F77 \
+compiler_lib_search_path_CXX \
+compiler_lib_search_path_F77; do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[\\\\\\\`\\"\\\$]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+# Double-quote double-evaled strings.
+for var in reload_cmds \
+old_postinstall_cmds \
+old_postuninstall_cmds \
+old_archive_cmds \
+extract_expsyms_cmds \
+old_archive_from_new_cmds \
+old_archive_from_expsyms_cmds \
+archive_cmds \
+archive_expsym_cmds \
+module_cmds \
+module_expsym_cmds \
+export_symbols_cmds \
+prelink_cmds \
+postlink_cmds \
+postinstall_cmds \
+postuninstall_cmds \
+finish_cmds \
+sys_lib_search_path_spec \
+sys_lib_dlsearch_path_spec \
+reload_cmds_CXX \
+reload_cmds_F77 \
+old_archive_cmds_CXX \
+old_archive_cmds_F77 \
+old_archive_from_new_cmds_CXX \
+old_archive_from_new_cmds_F77 \
+old_archive_from_expsyms_cmds_CXX \
+old_archive_from_expsyms_cmds_F77 \
+archive_cmds_CXX \
+archive_cmds_F77 \
+archive_expsym_cmds_CXX \
+archive_expsym_cmds_F77 \
+module_cmds_CXX \
+module_cmds_F77 \
+module_expsym_cmds_CXX \
+module_expsym_cmds_F77 \
+export_symbols_cmds_CXX \
+export_symbols_cmds_F77 \
+prelink_cmds_CXX \
+prelink_cmds_F77 \
+postlink_cmds_CXX \
+postlink_cmds_F77; do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[\\\\\\\`\\"\\\$]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+ac_aux_dir='$ac_aux_dir'
+xsi_shell='$xsi_shell'
+lt_shell_append='$lt_shell_append'
+
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes INIT.
+if test -n "\${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+
+    PACKAGE='$PACKAGE'
+    VERSION='$VERSION'
+    TIMESTAMP='$TIMESTAMP'
+    RM='$RM'
+    ofile='$ofile'
+
+
+
+
+
+
+
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h:config.in" ;;
+    "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
+    "mpn/$tmp_fn.$tmp_ext") CONFIG_LINKS="$CONFIG_LINKS mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext" ;;
+    "gmp-mparam.h") CONFIG_LINKS="$CONFIG_LINKS gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h" ;;
+    "demos/pexpr-config.h") CONFIG_FILES="$CONFIG_FILES demos/pexpr-config.h:demos/pexpr-config-h.in" ;;
+    "demos/calc/calc-config.h") CONFIG_FILES="$CONFIG_FILES demos/calc/calc-config.h:demos/calc/calc-config-h.in" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "mpbsd/Makefile") CONFIG_FILES="$CONFIG_FILES mpbsd/Makefile" ;;
+    "mpf/Makefile") CONFIG_FILES="$CONFIG_FILES mpf/Makefile" ;;
+    "mpn/Makefile") CONFIG_FILES="$CONFIG_FILES mpn/Makefile" ;;
+    "mpq/Makefile") CONFIG_FILES="$CONFIG_FILES mpq/Makefile" ;;
+    "mpz/Makefile") CONFIG_FILES="$CONFIG_FILES mpz/Makefile" ;;
+    "printf/Makefile") CONFIG_FILES="$CONFIG_FILES printf/Makefile" ;;
+    "scanf/Makefile") CONFIG_FILES="$CONFIG_FILES scanf/Makefile" ;;
+    "cxx/Makefile") CONFIG_FILES="$CONFIG_FILES cxx/Makefile" ;;
+    "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;;
+    "tests/devel/Makefile") CONFIG_FILES="$CONFIG_FILES tests/devel/Makefile" ;;
+    "tests/mpbsd/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpbsd/Makefile" ;;
+    "tests/mpf/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpf/Makefile" ;;
+    "tests/mpn/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpn/Makefile" ;;
+    "tests/mpq/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpq/Makefile" ;;
+    "tests/mpz/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpz/Makefile" ;;
+    "tests/rand/Makefile") CONFIG_FILES="$CONFIG_FILES tests/rand/Makefile" ;;
+    "tests/misc/Makefile") CONFIG_FILES="$CONFIG_FILES tests/misc/Makefile" ;;
+    "tests/cxx/Makefile") CONFIG_FILES="$CONFIG_FILES tests/cxx/Makefile" ;;
+    "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
+    "tune/Makefile") CONFIG_FILES="$CONFIG_FILES tune/Makefile" ;;
+    "demos/Makefile") CONFIG_FILES="$CONFIG_FILES demos/Makefile" ;;
+    "demos/calc/Makefile") CONFIG_FILES="$CONFIG_FILES demos/calc/Makefile" ;;
+    "demos/expr/Makefile") CONFIG_FILES="$CONFIG_FILES demos/expr/Makefile" ;;
+    "gmp.h") CONFIG_FILES="$CONFIG_FILES gmp.h:gmp-h.in" ;;
+    "mp.h") CONFIG_FILES="$CONFIG_FILES mp.h:mp-h.in" ;;
+
+  *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_LINKS+set}" = set || CONFIG_LINKS=$config_links
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp=
+  trap 'exit_status=$?
+  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error "cannot create a temporary directory in ." "$LINENO" 5
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = "\a"
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
+  || as_fn_error "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[         ]*VPATH[        ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[    ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[      ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_t=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_t"; then
+    break
+  elif $ac_last_try; then
+    as_fn_error "could not make $CONFIG_HEADERS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[    ]*#[    ]*define[       ][      ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  for (key in D) D_is_set[key] = 1
+  FS = "\a"
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+  as_fn_error "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS  :L $CONFIG_LINKS  :C $CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+        # (if the path is not absolute).  The absolute path cannot be DOS-style,
+        # because $ac_f cannot contain `:'.
+        test -f "$ac_f" ||
+          case $ac_f in
+          [\\/$]*) false;;
+          *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+          esac ||
+          as_fn_error "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+         $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+       `' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$tmp/stdin" \
+      || as_fn_error "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$ac_file" : 'X\(//\)[^/]' \| \
+        X"$ac_file" : 'X\(//\)$' \| \
+        X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+  ac_MKDIR_P=$MKDIR_P
+  case $MKDIR_P in
+  [\\/$]* | ?:[\\/]* ) ;;
+  */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+s&@MKDIR_P@&$ac_MKDIR_P&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
+  || as_fn_error "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[         ]*datarootdir[  ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&2;}
+
+  rm -f "$tmp/stdin"
+  case $ac_file in
+  -) cat "$tmp/out" && rm -f "$tmp/out";;
+  *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error "could not create $ac_file" "$LINENO" 5
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
+    } >"$tmp/config.h" \
+      || as_fn_error "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+$as_echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$tmp/config.h" "$ac_file" \
+       || as_fn_error "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error "could not create -" "$LINENO" 5
+  fi
+# Compute "$ac_file"'s index in $config_headers.
+_am_arg="$ac_file"
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
+$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$_am_arg" : 'X\(//\)[^/]' \| \
+        X"$_am_arg" : 'X\(//\)$' \| \
+        X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$_am_arg" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`/stamp-h$_am_stamp_count
+ ;;
+  :L)
+  #
+  # CONFIG_LINK
+  #
+
+  if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then
+    :
+  else
+    # Prefer the file from the source tree if names are identical.
+    if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then
+      ac_source=$srcdir/$ac_source
+    fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5
+$as_echo "$as_me: linking $ac_source to $ac_file" >&6;}
+
+    if test ! -r "$ac_source"; then
+      as_fn_error "$ac_source: file not found" "$LINENO" 5
+    fi
+    rm -f "$ac_file"
+
+    # Try a relative symlink, then a hard link, then a copy.
+    case $srcdir in
+    [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;;
+       *) ac_rel_source=$ac_top_build_prefix$ac_source ;;
+    esac
+    ln -s "$ac_rel_source" "$ac_file" 2>/dev/null ||
+      ln "$ac_source" "$ac_file" 2>/dev/null ||
+      cp -p "$ac_source" "$ac_file" ||
+      as_fn_error "cannot link or copy $ac_source to $ac_file" "$LINENO" 5
+  fi
+ ;;
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "libtool":C)
+
+    # See if we are running on zsh, and set the options which allow our
+    # commands through without removal of \ escapes.
+    if test -n "${ZSH_VERSION+set}" ; then
+      setopt NO_GLOB_SUBST
+    fi
+
+    cfgfile="${ofile}T"
+    trap "$RM \"$cfgfile\"; exit 1" 1 2 15
+    $RM "$cfgfile"
+
+    cat <<_LT_EOF >> "$cfgfile"
+#! $SHELL
+
+# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+#   This file is part of GNU Libtool.
+#
+# GNU Libtool is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
+# obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+
+# The names of the tagged configurations supported by this script.
+available_tags="CXX F77 "
+
+# ### BEGIN LIBTOOL CONFIG
+
+# A BSD- or MS-compatible name lister.
+NM=$lt_NM
+
+# Assembler program.
+AS=$lt_AS
+
+# DLL creation program.
+DLLTOOL=$lt_DLLTOOL
+
+# Object dumper program.
+OBJDUMP=$lt_OBJDUMP
+
+# Which release of libtool.m4 was used?
+macro_version=$macro_version
+macro_revision=$macro_revision
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# What type of objects to build.
+pic_mode=$pic_mode
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# An echo program that protects backslashes.
+ECHO=$lt_ECHO
+
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
+# The host system.
+host_alias=$host_alias
+host=$host
+host_os=$host_os
+
+# The build system.
+build_alias=$build_alias
+build=$build
+build_os=$build_os
+
+# A sed program that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="\$SED -e 1s/^X//"
+
+# A grep program that handles long lines.
+GREP=$lt_GREP
+
+# An ERE matcher.
+EGREP=$lt_EGREP
+
+# A literal string matcher.
+FGREP=$lt_FGREP
+
+# Whether we need soft or hard links.
+LN_S=$lt_LN_S
+
+# What is the maximum length of a command?
+max_cmd_len=$max_cmd_len
+
+# Object file suffix (normally "o").
+objext=$ac_objext
+
+# Executable file suffix (normally "").
+exeext=$exeext
+
+# whether the shell understands "unset".
+lt_unset=$lt_unset
+
+# turn spaces into newlines.
+SP2NL=$lt_lt_SP2NL
+
+# turn newlines into spaces.
+NL2SP=$lt_lt_NL2SP
+
+# convert \$build file names to \$host format.
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+
+# convert \$build files to toolchain format.
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method = "file_magic".
+file_magic_cmd=$lt_file_magic_cmd
+
+# How to find potential files when deplibs_check_method = "file_magic".
+file_magic_glob=$lt_file_magic_glob
+
+# Find potential files using nocaseglob when deplibs_check_method = "file_magic".
+want_nocaseglob=$lt_want_nocaseglob
+
+# Command to associate shared and link libraries.
+sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd
+
+# The archiver.
+AR=$lt_AR
+
+# Flags to create an archive.
+AR_FLAGS=$lt_AR_FLAGS
+
+# How to feed a file listing to the archiver.
+archiver_list_spec=$lt_archiver_list_spec
+
+# A symbol stripping program.
+STRIP=$lt_STRIP
+
+# Commands used to install an old-style archive.
+RANLIB=$lt_RANLIB
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Whether to use a lock for old archive extraction.
+lock_old_archive_extraction=$lock_old_archive_extraction
+
+# A C compiler.
+LTCC=$lt_CC
+
+# LTCC compiler flags.
+LTCFLAGS=$lt_CFLAGS
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration.
+global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair.
+global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address
+
+# Transform the output of nm in a C name address pair when lib prefix is needed.
+global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix
+
+# Specify filename containing input files for \$NM.
+nm_file_list_spec=$lt_nm_file_list_spec
+
+# The root where to search for dependent libraries,and in which our libraries should be installed.
+lt_sysroot=$lt_sysroot
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# Used to examine libraries when file_magic_cmd begins with "file".
+MAGIC_CMD=$MAGIC_CMD
+
+# Must we lock files when doing compilation?
+need_locks=$lt_need_locks
+
+# Manifest tool.
+MANIFEST_TOOL=$lt_MANIFEST_TOOL
+
+# Tool to manipulate archived DWARF debug symbol files on Mac OS X.
+DSYMUTIL=$lt_DSYMUTIL
+
+# Tool to change global to local symbols on Mac OS X.
+NMEDIT=$lt_NMEDIT
+
+# Tool to manipulate fat objects and archives on Mac OS X.
+LIPO=$lt_LIPO
+
+# ldd/readelf like tool for Mach-O binaries on Mac OS X.
+OTOOL=$lt_OTOOL
+
+# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4.
+OTOOL64=$lt_OTOOL64
+
+# Old archive suffix (normally "a").
+libext=$libext
+
+# Shared library suffix (normally ".so").
+shrext_cmds=$lt_shrext_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at link time.
+variables_saved_for_relink=$lt_variables_saved_for_relink
+
+# Do we need the "lib" prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Library versioning type.
+version_type=$version_type
+
+# Shared library runtime path variable.
+runpath_var=$runpath_var
+
+# Shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Permission mode override for installation of shared libraries.
+install_override_mode=$lt_install_override_mode
+
+# Command to use after installation of a shared archive.
+postinstall_cmds=$lt_postinstall_cmds
+
+# Command to use after uninstallation of a shared archive.
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# As "finish_cmds", except a single script fragment to be evaled but
+# not shown.
+finish_eval=$lt_finish_eval
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Compile-time system search path for libraries.
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries.
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+
+# The linker used to build libraries.
+LD=$lt_LD
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds
+
+# A language specific compiler.
+CC=$lt_compiler
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds
+archive_expsym_cmds=$lt_archive_expsym_cmds
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds
+module_expsym_cmds=$lt_module_expsym_cmds
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# The directories searched by this compiler when creating a shared library.
+compiler_lib_search_dirs=$lt_compiler_lib_search_dirs
+
+# Dependencies to place before and after the objects being linked to
+# create a shared library.
+predep_objects=$lt_predep_objects
+postdep_objects=$lt_postdep_objects
+predeps=$lt_predeps
+postdeps=$lt_postdeps
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_compiler_lib_search_path
+
+# ### END LIBTOOL CONFIG
+
+_LT_EOF
+
+  case $host_os in
+  aix3*)
+    cat <<\_LT_EOF >> "$cfgfile"
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+_LT_EOF
+    ;;
+  esac
+
+
+ltmain="$ac_aux_dir/ltmain.sh"
+
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" \
+     || (rm -f "$cfgfile"; exit 1)
+
+  if test x"$xsi_shell" = xyes; then
+  sed -e '/^func_dirname ()$/,/^} # func_dirname /c\
+func_dirname ()\
+{\
+\    case ${1} in\
+\      */*) func_dirname_result="${1%/*}${2}" ;;\
+\      *  ) func_dirname_result="${3}" ;;\
+\    esac\
+} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_basename ()$/,/^} # func_basename /c\
+func_basename ()\
+{\
+\    func_basename_result="${1##*/}"\
+} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\
+func_dirname_and_basename ()\
+{\
+\    case ${1} in\
+\      */*) func_dirname_result="${1%/*}${2}" ;;\
+\      *  ) func_dirname_result="${3}" ;;\
+\    esac\
+\    func_basename_result="${1##*/}"\
+} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_stripname ()$/,/^} # func_stripname /c\
+func_stripname ()\
+{\
+\    # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\
+\    # positional parameters, so assign one to ordinary parameter first.\
+\    func_stripname_result=${3}\
+\    func_stripname_result=${func_stripname_result#"${1}"}\
+\    func_stripname_result=${func_stripname_result%"${2}"}\
+} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\
+func_split_long_opt ()\
+{\
+\    func_split_long_opt_name=${1%%=*}\
+\    func_split_long_opt_arg=${1#*=}\
+} # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\
+func_split_short_opt ()\
+{\
+\    func_split_short_opt_arg=${1#??}\
+\    func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\
+} # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\
+func_lo2o ()\
+{\
+\    case ${1} in\
+\      *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\
+\      *)    func_lo2o_result=${1} ;;\
+\    esac\
+} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_xform ()$/,/^} # func_xform /c\
+func_xform ()\
+{\
+    func_xform_result=${1%.*}.lo\
+} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_arith ()$/,/^} # func_arith /c\
+func_arith ()\
+{\
+    func_arith_result=$(( $* ))\
+} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_len ()$/,/^} # func_len /c\
+func_len ()\
+{\
+    func_len_result=${#1}\
+} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+fi
+
+if test x"$lt_shell_append" = xyes; then
+  sed -e '/^func_append ()$/,/^} # func_append /c\
+func_append ()\
+{\
+    eval "${1}+=\\${2}"\
+} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\
+func_append_quoted ()\
+{\
+\    func_quote_for_eval "${2}"\
+\    eval "${1}+=\\\\ \\$func_quote_for_eval_result"\
+} # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  # Save a `func_append' function call where possible by direct use of '+='
+  sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+else
+  # Save a `func_append' function call even when '+=' is not available
+  sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+fi
+
+if test x"$_lt_function_replace_fail" = x":"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5
+$as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;}
+fi
+
+
+   mv -f "$cfgfile" "$ofile" ||
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+
+
+    cat <<_LT_EOF >> "$ofile"
+
+# ### BEGIN LIBTOOL TAG CONFIG: CXX
+
+# The linker used to build libraries.
+LD=$lt_LD_CXX
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag_CXX
+reload_cmds=$lt_reload_cmds_CXX
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds_CXX
+
+# A language specific compiler.
+CC=$lt_compiler_CXX
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC_CXX
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic_CXX
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl_CXX
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static_CXX
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc_CXX
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object_CXX
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds_CXX
+archive_expsym_cmds=$lt_archive_expsym_cmds_CXX
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds_CXX
+module_expsym_cmds=$lt_module_expsym_cmds_CXX
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld_CXX
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag_CXX
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag_CXX
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct_CXX
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute_CXX
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L_CXX
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic_CXX
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath_CXX
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs_CXX
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols_CXX
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds_CXX
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms_CXX
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms_CXX
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds_CXX
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds_CXX
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec_CXX
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action_CXX
+
+# The directories searched by this compiler when creating a shared library.
+compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX
+
+# Dependencies to place before and after the objects being linked to
+# create a shared library.
+predep_objects=$lt_predep_objects_CXX
+postdep_objects=$lt_postdep_objects_CXX
+predeps=$lt_predeps_CXX
+postdeps=$lt_postdeps_CXX
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_compiler_lib_search_path_CXX
+
+# ### END LIBTOOL TAG CONFIG: CXX
+_LT_EOF
+
+
+    cat <<_LT_EOF >> "$ofile"
+
+# ### BEGIN LIBTOOL TAG CONFIG: F77
+
+# The linker used to build libraries.
+LD=$lt_LD_F77
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag_F77
+reload_cmds=$lt_reload_cmds_F77
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds_F77
+
+# A language specific compiler.
+CC=$lt_compiler_F77
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC_F77
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_F77
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic_F77
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl_F77
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static_F77
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o_F77
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc_F77
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_F77
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_F77
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec_F77
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object_F77
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_F77
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_F77
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds_F77
+archive_expsym_cmds=$lt_archive_expsym_cmds_F77
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds_F77
+module_expsym_cmds=$lt_module_expsym_cmds_F77
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld_F77
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag_F77
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag_F77
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_F77
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator_F77
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct_F77
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute_F77
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L_F77
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var_F77
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic_F77
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath_F77
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs_F77
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols_F77
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds_F77
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms_F77
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms_F77
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds_F77
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds_F77
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec_F77
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action_F77
+
+# The directories searched by this compiler when creating a shared library.
+compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_F77
+
+# Dependencies to place before and after the objects being linked to
+# create a shared library.
+predep_objects=$lt_predep_objects_F77
+postdep_objects=$lt_postdep_objects_F77
+predeps=$lt_predeps_F77
+postdeps=$lt_postdeps_F77
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_compiler_lib_search_path_F77
+
+# ### END LIBTOOL TAG CONFIG: F77
+_LT_EOF
+
+ ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit $?
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/configure.in b/configure.in

new file mode 100644 (file)

index 0000000..2396dac
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,3477 @@
+dnl  Process this file with autoconf to produce a configure script.
+
+
+define(GMP_COPYRIGHT,[[
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+]])
+
+AC_COPYRIGHT(GMP_COPYRIGHT)
+AH_TOP(/*GMP_COPYRIGHT*/)
+
+AC_REVISION($Revision$)
+AC_PREREQ(2.59)
+AC_INIT(GNU MP, GMP_VERSION, [gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html], gmp)
+AC_CONFIG_SRCDIR(gmp-impl.h)
+m4_pattern_forbid([^[ \t]*GMP_])
+m4_pattern_allow(GMP_LDFLAGS)
+m4_pattern_allow(GMP_LIMB_BITS)
+m4_pattern_allow(GMP_MPARAM_H_SUGGEST)
+m4_pattern_allow(GMP_NAIL_BITS)
+m4_pattern_allow(GMP_NUMB_BITS)
+
+# If --target is not used then $target_alias is empty, but if say
+# "./configure athlon-pc-freebsd3.5" is used, then all three of
+# $build_alias, $host_alias and $target_alias are set to
+# "athlon-pc-freebsd3.5".
+#
+if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
+  AC_MSG_ERROR([--target is not appropriate for GMP
+Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
+explicitly.  Use --host if cross-compiling (see "Installing GMP" in the
+manual for more on this).])
+fi
+
+GMP_INIT(config.m4)
+
+AC_CANONICAL_HOST
+
+dnl  Automake "no-dependencies" is used because include file dependencies
+dnl  are not useful to us.  Pretty much everything depends just on gmp.h,
+dnl  gmp-impl.h and longlong.h, and yet only rarely does everything need to
+dnl  be rebuilt for changes to those files.
+dnl
+dnl  "no-dependencies" also helps with the way we're setup to run
+dnl  AC_PROG_CXX only conditionally.  If dependencies are used then recent
+dnl  automake (eg 1.7.2) appends an AM_CONDITIONAL to AC_PROG_CXX, and then
+dnl  gets upset if it's not actually executed.
+dnl
+dnl  Note that there's a copy of these options in the top-level Makefile.am,
+dnl  so update there too if changing anything.
+dnl
+AM_INIT_AUTOMAKE([1.8 gnu no-dependencies $(top_builddir)/ansi2knr])
+AM_CONFIG_HEADER(config.h:config.in)
+AM_MAINTAINER_MODE
+
+
+AC_ARG_ENABLE(assert,
+AC_HELP_STRING([--enable-assert],[enable ASSERT checking [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-assert, need yes or no]) ;;
+esac],
+[enable_assert=no])
+
+if test "$enable_assert" = "yes"; then
+  AC_DEFINE(WANT_ASSERT,1,
+  [Define to 1 to enable ASSERT checking, per --enable-assert])
+  want_assert_01=1
+else
+  want_assert_01=0
+fi
+GMP_DEFINE_RAW(["define(<WANT_ASSERT>,$want_assert_01)"])
+
+
+AC_ARG_ENABLE(alloca,
+AC_HELP_STRING([--enable-alloca],[how to get temp memory [[default=reentrant]]]),
+[case $enableval in
+alloca|malloc-reentrant|malloc-notreentrant) ;;
+yes|no|reentrant|notreentrant) ;;
+debug) ;;
+*)
+  AC_MSG_ERROR([bad value $enableval for --enable-alloca, need one of:
+yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug]) ;;
+esac],
+[enable_alloca=reentrant])
+
+
+# IMPROVE ME: The default for C++ is disabled.  The tests currently
+# performed below for a working C++ compiler are not particularly strong,
+# and in general can't be expected to get the right setup on their own.  The
+# most significant problem is getting the ABI the same.  Defaulting CXXFLAGS
+# to CFLAGS takes only a small step towards this.  It's also probably worth
+# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can
+# work together.  Some rather broken C++ installations were encountered
+# during testing, and though such things clearly aren't GMP's problem, if
+# --enable-cxx=detect were to be the default then some careful checks of
+# which, if any, C++ compiler on the system is up to scratch would be
+# wanted.
+#
+AC_ARG_ENABLE(cxx,
+AC_HELP_STRING([--enable-cxx],[enable C++ support [[default=no]]]),
+[case $enableval in
+yes|no|detect) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-cxx, need yes/no/detect]) ;;
+esac],
+[enable_cxx=no])
+
+
+AC_ARG_ENABLE(fft,
+AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [[default=yes]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-fft, need yes or no]) ;;
+esac],
+[enable_fft=yes])
+
+if test "$enable_fft" = "yes"; then
+  AC_DEFINE(WANT_FFT,1,
+  [Define to 1 to enable FFTs for multiplication, per --enable-fft])
+fi
+
+
+AC_ARG_ENABLE(old-fft-full,
+AC_HELP_STRING([--enable-old-fft-full],[enable old mpn_mul_fft_full for multiplication [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-old-fft-full, need yes or no]) ;;
+esac],
+[enable_old_fft_full=no])
+
+if test "$enable_old_fft_full" = "yes"; then
+  AC_DEFINE(WANT_OLD_FFT_FULL,1,
+  [Define to 1 to enable old mpn_mul_fft_full for multiplication, per --enable-old-fft-full])
+fi
+
+
+AC_ARG_ENABLE(mpbsd,
+AC_HELP_STRING([--enable-mpbsd],
+               [build Berkeley MP compatibility library [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-mpbsd, need yes or no]) ;;
+esac],
+[enable_mpbsd=no])
+AM_CONDITIONAL(WANT_MPBSD, test "$enable_mpbsd" = "yes")
+
+
+AC_ARG_ENABLE(nails,
+AC_HELP_STRING([--enable-nails],[use nails on limbs [[default=no]]]),
+[case $enableval in
+[yes|no|[02468]|[0-9][02468]]) ;;
+[*[13579]])
+  AC_MSG_ERROR([bad value $enableval for --enable-nails, only even nail sizes supported]) ;;
+*)
+  AC_MSG_ERROR([bad value $enableval for --enable-nails, need yes/no/number]) ;;
+esac],
+[enable_nails=no])
+
+case $enable_nails in
+yes) GMP_NAIL_BITS=2 ;;
+no)  GMP_NAIL_BITS=0 ;;
+*)   GMP_NAIL_BITS=$enable_nails ;;
+esac
+AC_SUBST(GMP_NAIL_BITS)
+
+
+AC_ARG_ENABLE(profiling,
+AC_HELP_STRING([--enable-profiling],
+               [build with profiler support [[default=no]]]),
+[case $enableval in
+no|prof|gprof|instrument) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-profiling, need no/prof/gprof/instrument]) ;;
+esac],
+[enable_profiling=no])
+
+case $enable_profiling in
+  prof)
+    AC_DEFINE(WANT_PROFILING_PROF, 1,
+              [Define to 1 if --enable-profiling=prof])
+    ;;
+  gprof)
+    AC_DEFINE(WANT_PROFILING_GPROF, 1,
+              [Define to 1 if --enable-profiling=gprof])
+    ;;
+  instrument)
+    AC_DEFINE(WANT_PROFILING_INSTRUMENT, 1,
+              [Define to 1 if --enable-profiling=instrument])
+    ;;
+esac
+
+GMP_DEFINE_RAW(["define(<WANT_PROFILING>,<\`$enable_profiling'>)"])
+
+# -fomit-frame-pointer is incompatible with -pg on some chips
+if test "$enable_profiling" = gprof; then
+  fomit_frame_pointer=
+else
+  fomit_frame_pointer="-fomit-frame-pointer"
+fi
+
+
+AC_ARG_WITH(readline,
+AC_HELP_STRING([--with-readline],
+               [readline support in calc demo program [[default=detect]]]),
+[case $withval in
+yes|no|detect) ;;
+*) AC_MSG_ERROR([bad value $withval for --with-readline, need yes/no/detect]) ;;
+esac],
+[with_readline=detect])
+
+
+AC_ARG_ENABLE(fat,
+AC_HELP_STRING([--enable-fat],
+               [build a fat binary on systems that support it [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;;
+esac],
+[enable_fat=no])
+
+
+AC_ARG_ENABLE(minithres,
+AC_HELP_STRING([--enable-minithres],
+               [choose minimal thresholds for testing [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-minithres, need yes or no]) ;;
+esac],
+[enable_minithres=no])
+
+
+
+tmp_host=`echo $host_cpu | sed 's/\./_/'`
+AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_$tmp_host)
+GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_HOST_CPU_$tmp_host')", POST)
+
+dnl  The HAVE_HOST_CPU_ list here only needs to have entries for those which
+dnl  are going to be tested, not everything that can possibly be selected.
+dnl
+dnl  The HAVE_HOST_CPU_FAMILY_ list similarly, and note that the AC_DEFINEs
+dnl  for these are under the cpu specific setups below.
+
+AH_VERBATIM([HAVE_HOST_CPU_1],
+[/* Define one of these to 1 for the host CPU family.
+   If your CPU is not in any of these families, leave all undefined.
+   For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
+#undef HAVE_HOST_CPU_FAMILY_alpha
+#undef HAVE_HOST_CPU_FAMILY_m68k
+#undef HAVE_HOST_CPU_FAMILY_power
+#undef HAVE_HOST_CPU_FAMILY_powerpc
+#undef HAVE_HOST_CPU_FAMILY_x86
+#undef HAVE_HOST_CPU_FAMILY_x86_64
+
+/* Define one of the following to 1 for the host CPU, as per the output of
+   ./config.guess.  If your CPU is not listed here, leave all undefined.  */
+#undef HAVE_HOST_CPU_alphaev67
+#undef HAVE_HOST_CPU_alphaev68
+#undef HAVE_HOST_CPU_alphaev7
+#undef HAVE_HOST_CPU_m68020
+#undef HAVE_HOST_CPU_m68030
+#undef HAVE_HOST_CPU_m68040
+#undef HAVE_HOST_CPU_m68060
+#undef HAVE_HOST_CPU_m68360
+#undef HAVE_HOST_CPU_powerpc604
+#undef HAVE_HOST_CPU_powerpc604e
+#undef HAVE_HOST_CPU_powerpc750
+#undef HAVE_HOST_CPU_powerpc7400
+#undef HAVE_HOST_CPU_supersparc
+#undef HAVE_HOST_CPU_i386
+#undef HAVE_HOST_CPU_i586
+#undef HAVE_HOST_CPU_i686
+#undef HAVE_HOST_CPU_pentium
+#undef HAVE_HOST_CPU_pentiummmx
+#undef HAVE_HOST_CPU_pentiumpro
+#undef HAVE_HOST_CPU_pentium2
+#undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_s390_z900
+#undef HAVE_HOST_CPU_s390_z990
+#undef HAVE_HOST_CPU_s390_z9
+#undef HAVE_HOST_CPU_s390_z10
+#undef HAVE_HOST_CPU_s390_z196
+
+/* Define to 1 iff we have a s390 with 64-bit registers.  */
+#undef HAVE_HOST_CPU_s390_zarch])
+
+
+# Table of compilers, options, and mpn paths.  This code has various related
+# purposes
+#
+#   - better default CC/CFLAGS selections than autoconf otherwise gives
+#   - default CC/CFLAGS selections for extra CPU types specific to GMP
+#   - a few tests for known bad compilers
+#   - choice of ABIs on suitable systems
+#   - selection of corresponding mpn search path
+#
+# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is
+# called.  User selections of CC etc are respected.
+#
+# Care is taken not to use macros like AC_TRY_COMPILE during the GMP
+# pre-testing, since they of course depend on AC_PROG_CC, and also some of
+# them cache their results, which is not wanted.
+#
+# The ABI selection mechanism is unique to GMP.  All that reaches autoconf
+# is a different selection of CC/CFLAGS according to the best ABI the system
+# supports, and/or what the user selects.  Naturally the mpn assembler code
+# selected is very dependent on the ABI.
+#
+# The closest the standard tools come to a notion of ABI is something like
+# "sparc64" which encodes a CPU and an ABI together.  This doesn't seem to
+# scale well for GMP, where exact CPU types like "ultrasparc2" are wanted,
+# separate from the ABI used on them.
+#
+#
+# The variables set here are
+#
+#   cclist              the compiler choices
+#   xx_cflags           flags for compiler xx
+#   xx_cflags_maybe     flags for compiler xx, if they work
+#   xx_cppflags         cpp flags for compiler xx
+#   xx_cflags_optlist   list of sets of optional flags
+#   xx_cflags_yyy       set yyy of optional flags for compiler xx
+#   xx_ldflags          -Wc,-foo flags for libtool linking with compiler xx
+#   ar_flags            extra flags for $AR
+#   nm_flags            extra flags for $NM
+#   limb                limb size, can be "longlong"
+#   path                mpn search path
+#   extra_functions     extra mpn functions
+#   fat_path            fat binary mpn search path [if fat binary desired]
+#   fat_functions       fat functions
+#   fat_thresholds      fat thresholds
+#
+# Suppose xx_cflags_optlist="arch", then flags from $xx_cflags_arch are
+# tried, and the first flag that works will be used.  An optlist like "arch
+# cpu optimize" can be used to get multiple independent sets of flags tried.
+# The first that works from each will be used.  If no flag in a set works
+# then nothing from that set is added.
+#
+# For multiple ABIs, the scheme extends as follows.
+#
+#   abilist               set of ABI choices
+#   cclist_aa             compiler choices in ABI aa
+#   xx_aa_cflags          flags for xx in ABI aa
+#   xx_aa_cflags_maybe    flags for xx in ABI aa, if they work
+#   xx_aa_cppflags        cpp flags for xx in ABI aa
+#   xx_aa_cflags_optlist  list of sets of optional flags in ABI aa
+#   xx_aa_cflags_yyy      set yyy of optional flags for compiler xx in ABI aa
+#   xx_aa_ldflags         -Wc,-foo flags for libtool linking
+#   ar_aa_flags           extra flags for $AR in ABI aa
+#   nm_aa_flags           extra flags for $NM in ABI aa
+#   limb_aa               limb size in ABI aa, can be "longlong"
+#   path_aa               mpn search path in ABI aa
+#   extra_functions_aa    extra mpn functions in ABI aa
+#
+# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI
+# in ablist, if an xx_aa_cflags for that ABI isn't given.  For example if
+# abilist="64 32" then $cc_64_cflags will be used for the 64-bit ABI, but
+# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is
+# defined.  This makes it easy to add some 64-bit compilers and flags to an
+# unadorned 32-bit set.
+#
+# limb=longlong (or limb_aa=longlong) applies to all compilers within that
+# ABI.  It won't work to have some needing long long and some not, since a
+# single instantiated gmp.h will be used by both.
+#
+# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are
+# also set here, with an ABI suffix.
+#
+#
+#
+# A table-driven approach like this to mapping cpu type to good compiler
+# options is a bit of a maintenance burden, but there's not much uniformity
+# between options specifications on different compilers.  Some sort of
+# separately updatable tool might be cute.
+#
+# The use of lots of variables like this, direct and indirect, tends to
+# obscure when and how various things are done, but unfortunately it's
+# pretty much the only way.  If shell subroutines were portable then actual
+# code like "if this .. do that" could be written, but attempting the same
+# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would
+# hugely bloat the output.
+
+
+AC_ARG_VAR(ABI, [desired ABI (for processors supporting more than one ABI)])
+
+# abilist needs to be non-empty, "standard" is just a generic name here
+abilist="standard"
+
+# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring
+# c89 over cc here.  But note that on HP-UX c89 provides a castrated
+# environment, and would want to be excluded somehow.  Maybe
+# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and
+# we don't need to worry.
+#
+cclist="gcc cc"
+
+gcc_cflags="-O2 -pedantic"
+gcc_64_cflags="-O2 -pedantic"
+cc_cflags="-O"
+cc_64_cflags="-O"
+
+SPEED_CYCLECOUNTER_OBJ=
+cyclecounter_size=2
+
+AC_SUBST(HAVE_HOST_CPU_FAMILY_power,  0)
+AC_SUBST(HAVE_HOST_CPU_FAMILY_powerpc,0)
+
+case $host in
+
+  a29k*-*-*)
+    path="a29k"
+    ;;
+
+
+  alpha*-*-*)
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_alpha)
+    case $host_cpu in
+      alphaev5* | alphapca5*)
+       path="alpha/ev5 alpha" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+        path="alpha/ev67 alpha/ev6 alpha" ;;
+      alphaev6)
+       path="alpha/ev6 alpha" ;;
+      *)
+        path="alpha" ;;
+    esac
+    extra_functions="cntlz"
+    gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
+    gcc_cflags_oldas="-Wa,-oldas"     # see GMP_GCC_WA_OLDAS.
+
+    # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.
+    # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.
+    # gcc 3.0 adds nothing.
+    # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).
+    # gcc 3.2 adds nothing.
+    #
+    # gcc version "2.9-gnupro-99r1" under "-O2 -mcpu=ev6" strikes internal
+    # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS.  Each
+    # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.
+    #
+    case $host_cpu in
+      alpha)        gcc_cflags_cpu="-mcpu=ev4" ;;
+      alphaev5)     gcc_cflags_cpu="-mcpu=ev5" ;;
+      alphaev56)    gcc_cflags_cpu="-mcpu=ev56" ;;
+      alphapca56 | alphapca57)
+                    gcc_cflags_cpu="-mcpu=pca56" ;;
+      alphaev6)     gcc_cflags_cpu="-mcpu=ev6 -mcpu=ev56" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+                    gcc_cflags_cpu="-mcpu=ev67 -mcpu=ev6 -mcpu=ev56" ;;
+    esac
+
+    # gcc version "2.9-gnupro-99r1" on alphaev68-dec-osf5.1 has been seen
+    # accepting -mcpu=ev6, but not putting the assembler in the right mode
+    # for what it produces.  We need to do this for it, and need to do it
+    # before testing the -mcpu options.
+    #
+    # On old versions of gcc, which don't know -mcpu=, we believe an
+    # explicit -Wa,-mev5 etc will be necessary to put the assembler in
+    # the right mode for our .asm files and longlong.h asm blocks.
+    #
+    # On newer versions of gcc, when -mcpu= is known, we must give a -Wa
+    # which is at least as high as the code gcc will generate.  gcc
+    # establishes what it needs with a ".arch" directive, our command line
+    # option seems to override that.
+    #
+    # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for
+    # ctlz and cttz (in 2.10.0 at least).
+    #
+    # OSF `as' accepts ev68 but stupidly treats it as ev4.  -arch only seems
+    # to affect insns like ldbu which are expanded as macros when necessary.
+    # Insns like ctlz which were never available as macros are always
+    # accepted and always generate their plain code.
+    #
+    case $host_cpu in
+      alpha)        gcc_cflags_asm="-Wa,-arch,ev4 -Wa,-mev4" ;;
+      alphaev5)     gcc_cflags_asm="-Wa,-arch,ev5 -Wa,-mev5" ;;
+      alphaev56)    gcc_cflags_asm="-Wa,-arch,ev56 -Wa,-mev56" ;;
+      alphapca56 | alphapca57)
+                    gcc_cflags_asm="-Wa,-arch,pca56 -Wa,-mpca56" ;;
+      alphaev6)     gcc_cflags_asm="-Wa,-arch,ev6 -Wa,-mev6" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+                    gcc_cflags_asm="-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6" ;;
+    esac
+
+    # It might be better to ask "cc" whether it's Cray C or DEC C,
+    # instead of relying on the OS part of $host.  But it's hard to
+    # imagine either of those compilers anywhere except their native
+    # systems.
+    #
+    GMP_INCLUDE_MPN(alpha/alpha-defs.m4)
+    case $host in
+      *-cray-unicos*)
+        cc_cflags="-O"         # no -g, it silently disables all optimizations
+        GMP_INCLUDE_MPN(alpha/unicos.m4)
+        # Don't perform any assembly syntax tests on this beast.
+        gmp_asm_syntax_testing=no
+        ;;
+      *-*-osf*)
+        GMP_INCLUDE_MPN(alpha/default.m4)
+        cc_cflags=""
+        cc_cflags_optlist="opt cpu"
+
+        # not sure if -fast works on old versions, so make it optional
+       cc_cflags_opt="-fast -O2"
+
+       # DEC C V5.9-005 knows ev4, ev5, ev56, pca56, ev6.
+       # Compaq C V6.3-029 adds ev67.
+       #
+       case $host_cpu in
+         alpha)       cc_cflags_cpu="-arch~ev4~-tune~ev4" ;;
+         alphaev5)    cc_cflags_cpu="-arch~ev5~-tune~ev5" ;;
+         alphaev56)   cc_cflags_cpu="-arch~ev56~-tune~ev56" ;;
+         alphapca56 | alphapca57)
+            cc_cflags_cpu="-arch~pca56~-tune~pca56" ;;
+         alphaev6)    cc_cflags_cpu="-arch~ev6~-tune~ev6" ;;
+         alphaev67 | alphaev68 | alphaev7*)
+            cc_cflags_cpu="-arch~ev67~-tune~ev67 -arch~ev6~-tune~ev6" ;;
+       esac
+        ;;
+      *)
+        GMP_INCLUDE_MPN(alpha/default.m4)
+        ;;
+    esac
+
+    case $host in
+      *-*-unicos*)
+        # tune/alpha.asm assumes int==4bytes but unicos uses int==8bytes
+        ;;
+      *)
+        SPEED_CYCLECOUNTER_OBJ=alpha.lo
+        cyclecounter_size=1 ;;
+    esac
+    ;;
+
+
+  # Cray vector machines.
+  # This must come after alpha* so that we can recognize present and future
+  # vector processors with a wildcard.
+  *-cray-unicos*)
+    gmp_asm_syntax_testing=no
+    cclist="cc"
+    # We used to have -hscalar0 here as a workaround for miscompilation of
+    # mpz/import.c, but let's hope Cray fixes their bugs instead, since
+    # -hscalar0 causes disastrously poor code to be generated.
+    cc_cflags="-O3 -hnofastmd -htask0 -Wa,-B"
+    path="cray"
+    ;;
+
+
+  arm*-*-*)
+    path="arm"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_testlist="gcc-arm-umodsi"
+    GMP_INCLUDE_MPN(arm/arm-defs.m4)
+    ;;
+
+
+  clipper*-*-*)
+    path="clipper"
+    ;;
+
+
+  # Fujitsu
+  [f30[01]-fujitsu-sysv*])
+    cclist="gcc vcc"
+    # FIXME: flags for vcc?
+    vcc_cflags="-g"
+    path="fujitsu"
+    ;;
+
+
+  hppa*-*-*)
+    # HP cc (the one sold separately) is K&R by default, but AM_C_PROTOTYPES
+    # will add "-Ae", or "-Aa -D_HPUX_SOURCE", to put it into ansi mode, if
+    # possible.
+    #
+    # gcc for hppa 2.0 can be built either for 2.0n (32-bit) or 2.0w
+    # (64-bit), but not both, so there's no option to choose the desired
+    # mode, we must instead detect which of the two it is.  This is done by
+    # checking sizeof(long), either 4 or 8 bytes respectively.  Do this in
+    # ABI=1.0 too, in case someone tries to build that with a 2.0w gcc.
+    #
+    gcc_cflags_optlist="arch"
+    gcc_testlist="sizeof-long-4"
+    SPEED_CYCLECOUNTER_OBJ=hppa.lo
+    cyclecounter_size=1
+
+    # FIXME: For hppa2.0*, path should be "pa32/hppa2_0 pa32/hppa1_1 pa32".
+    # (Can't remember why this isn't done already, have to check what .asm
+    # files are available in each and how they run on a typical 2.0 cpu.)
+    #
+    case $host_cpu in
+      hppa1.0*)    path="pa32" ;;
+      hppa7000*)   path="pa32/hppa1_1 pa32" ;;
+      hppa2.0* | hppa64)
+                   path="pa32/hppa2_0 pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+      *)           # default to 7100
+                   path="pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+    esac
+
+    # gcc 2.7.2.3 knows -mpa-risc-1-0 and -mpa-risc-1-1
+    # gcc 2.95 adds -mpa-risc-2-0, plus synonyms -march=1.0, 1.1 and 2.0
+    #
+    # We don't use -mpa-risc-2-0 in ABI=1.0 because 64-bit registers may not
+    # be saved by the kernel on an old system.  Actually gcc (as of 3.2)
+    # only adds a few float instructions with -mpa-risc-2-0, so it would
+    # probably be safe, but let's not take the chance.  In any case, a
+    # configuration like --host=hppa2.0 ABI=1.0 is far from optimal.
+    #
+    case $host_cpu in
+      hppa1.0*)           gcc_cflags_arch="-mpa-risc-1-0" ;;
+      *)                  # default to 7100
+                          gcc_cflags_arch="-mpa-risc-1-1" ;;
+    esac
+
+    case $host_cpu in
+      hppa1.0*)    cc_cflags="+O2" ;;
+      *)           # default to 7100
+                   cc_cflags="+DA1.1 +O2" ;;
+    esac
+
+    case $host in
+      hppa2.0*-*-* | hppa64-*-*)
+       cclist_20n="gcc cc"
+        abilist="2.0n 1.0"
+        path_20n="pa64"
+       limb_20n=longlong
+        any_20n_testlist="sizeof-long-4"
+        SPEED_CYCLECOUNTER_OBJ_20n=hppa2.lo
+        cyclecounter_size_20n=2
+
+        # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
+        # used.  Assembler support for 2.0 is essential though, for our asm
+        # files.
+       gcc_20n_cflags="-O2"
+       gcc_20n_cflags_optlist="arch"
+        gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
+        gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
+
+        cc_20n_cflags="+DA2.0 +e +O2 -Wl,+vnocompatwarnings"
+        cc_20n_testlist="hpc-hppa-2-0"
+
+       # ABI=2.0w is available for hppa2.0w and hppa2.0, but not for
+       # hppa2.0n, on the assumption that that the latter indicates a
+       # desire for ABI=2.0n.
+       case $host in
+        hppa2.0n-*-*) ;;
+        *)
+          # HPUX 10 and earlier cannot run 2.0w.  Not sure about other
+          # systems (GNU/Linux for instance), but lets assume they're ok.
+          case $host in
+            [*-*-hpux[1-9] | *-*-hpux[1-9].* | *-*-hpux10 | *-*-hpux10.*]) ;;
+           [*-*-linux*])  abilist="1.0" ;; # due to linux permanent kernel bug
+            *)    abilist="2.0w $abilist" ;;
+          esac
+
+          cclist_20w="gcc cc"
+         gcc_20w_cflags="-O2 -mpa-risc-2-0"
+          cc_20w_cflags="+DD64 +O2"
+          cc_20w_testlist="hpc-hppa-2-0"
+          path_20w="pa64"
+         any_20w_testlist="sizeof-long-8"
+          SPEED_CYCLECOUNTER_OBJ_20w=hppa2w.lo
+          cyclecounter_size_20w=2
+         ;;
+        esac
+        ;;
+    esac
+    ;;
+
+
+  i960*-*-*)
+    path="i960"
+    ;;
+
+
+  IA64_PATTERN)
+    abilist="64"
+    GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
+    SPEED_CYCLECOUNTER_OBJ=ia64.lo
+
+    case $host_cpu in
+      itanium)   path="ia64/itanium  ia64" ;;
+      itanium2)  path="ia64/itanium2 ia64" ;;
+      *)         path="ia64" ;;
+    esac
+
+    gcc_64_cflags_optlist="tune"
+    gcc_32_cflags_optlist=$gcc_64_cflags_optlist
+
+    # gcc pre-release 3.4 adds -mtune itanium and itanium2
+    case $host_cpu in
+      itanium)   gcc_cflags_tune="-mtune=itanium" ;;
+      itanium2)  gcc_cflags_tune="-mtune=itanium2" ;;
+    esac
+
+    case $host in
+      *-*-linux*)
+       cclist="gcc icc"
+       icc_cflags="-no-gcc"
+       icc_cflags_optlist="opt"
+       # Don't use -O3, it is for "large data sets" and also miscompiles GMP.
+       # But icc miscompiles GMP at any optimization level, at higher levels
+       # it miscompiles more files...
+       icc_cflags_opt="-O2 -O1"
+       ;;
+
+      *-*-hpux*)
+        # HP cc sometimes gets internal errors if the optimization level is
+        # too high.  GMP_PROG_CC_WORKS detects this, the "_opt" fallbacks
+        # let us use whatever seems to work.
+        #
+        abilist="32 64"
+
+        cclist_32="gcc cc"
+        path_32="ia64"
+        cc_32_cflags=""
+        cc_32_cflags_optlist="opt"
+        cc_32_cflags_opt="+O3 +O2 +O1"
+        gcc_32_cflags="-milp32 -O2"
+        limb_32=longlong
+        SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
+        cyclecounter_size_32=2
+
+        # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,
+        # but also need it in CFLAGS for linking programs, since automake
+        # only uses CFLAGS when linking, not CPPFLAGS.
+        # FIXME: Maybe should use cc_64_ldflags for this, but that would
+        # need GMP_LDFLAGS used consistently by all the programs.
+        #
+        cc_64_cflags="+DD64"
+        cc_64_cppflags="+DD64"
+        cc_64_cflags_optlist="opt"
+        cc_64_cflags_opt="+O3 +O2 +O1"
+        gcc_64_cflags="$gcc_64_cflags -mlp64"
+        ;;
+    esac
+    ;;
+
+
+  # Motorola 68k
+  #
+  M68K_PATTERN)
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_m68k)
+    GMP_INCLUDE_MPN(m68k/m68k-defs.m4)
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch"
+
+    # gcc 2.7.2 knows -m68000, -m68020, -m68030, -m68040.
+    # gcc 2.95 adds -mcpu32, -m68060.
+    # FIXME: Maybe "-m68020 -mnobitfield" would suit cpu32 on 2.7.2.
+    #
+    case $host_cpu in
+    m68020)  gcc_cflags_arch="-m68020" ;;
+    m68030)  gcc_cflags_arch="-m68030" ;;
+    m68040)  gcc_cflags_arch="-m68040" ;;
+    m68060)  gcc_cflags_arch="-m68060 -m68000" ;;
+    m68360)  gcc_cflags_arch="-mcpu32 -m68000" ;;
+    *)       gcc_cflags_arch="-m68000" ;;
+    esac
+
+    # FIXME: m68k/mc68020 looks like it's ok for cpu32, but this wants to be
+    # tested.  Will need to introduce an m68k/cpu32 if m68k/mc68020 ever uses
+    # the bitfield instructions.
+    case $host_cpu in
+    [m680[234]0 | m68360])  path="m68k/mc68020 m68k" ;;
+    *)                      path="m68k" ;;
+    esac
+    ;;
+
+
+  # Motorola 88k
+  m88k*-*-*)
+    path="m88k"
+    ;;
+  m88110*-*-*)
+    gcc_cflags="$gcc_cflags -m88110"
+    path="m88k/mc88110 m88k"
+    ;;
+
+
+  # National Semiconductor 32k
+  ns32k*-*-*)
+    path="ns32k"
+    ;;
+
+
+  # IRIX 5 and earlier can only run 32-bit o32.
+  #
+  # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64.  n32 is
+  # preferred over 64, but only because that's been the default in past
+  # versions of GMP.  The two are equally efficient.
+  #
+  # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not
+  # supporting n32 or 64.
+  #
+  # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the
+  # right options to use when linking (both cc and gcc), so no need for
+  # anything special from us.
+  #
+  mips*-*-*)
+    abilist="o32"
+    gcc_cflags_optlist="abi"
+    gcc_cflags_abi="-mabi=32"
+    gcc_testlist="gcc-mips-o32"
+    path="mips32"
+    cc_cflags="-O2 -o32"   # no -g, it disables all optimizations
+    # this suits both mips32 and mips64
+    GMP_INCLUDE_MPN(mips32/mips-defs.m4)
+
+    case $host in
+      [mips64*-*-* | mips*-*-irix[6789]*])
+        abilist="n32 64 o32"
+
+        cclist_n32="gcc cc"
+        gcc_n32_cflags="-O2 -mabi=n32"
+        cc_n32_cflags="-O2 -n32"       # no -g, it disables all optimizations
+        limb_n32=longlong
+        path_n32="mips64"
+
+        cclist_64="gcc cc"
+        gcc_64_cflags="$gcc_64_cflags -mabi=64"
+        gcc_64_ldflags="-Wc,-mabi=64"
+        cc_64_cflags="-O2 -64"         # no -g, it disables all optimizations
+        cc_64_ldflags="-Wc,-64"
+        path_64="mips64"
+        ;;
+    esac
+    ;;
+
+
+  # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.
+  # Our usual "gcc in disguise" detection means gcc_cflags etc here gets
+  # used.
+  #
+  # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp
+  # since it doesn't like "__attribute__ ((mode (SI)))" etc in gmp-impl.h,
+  # and so always ends up running the plain preprocessor anyway.  This could
+  # be done in CPPFLAGS rather than CFLAGS, but there's not many places
+  # preprocessing is done separately, and this is only a speedup, the normal
+  # preprocessor gets run if there's any problems.
+  #
+  # We used to use -Wa,-mppc with gcc, but can't remember exactly why.
+  # Presumably it was for old versions of gcc where -mpowerpc doesn't put
+  # the assembler in the right mode.  In any case -Wa,-mppc is not good, for
+  # instance -mcpu=604 makes recent gcc use -m604 to get access to the
+  # "fsel" instruction, but a -Wa,-mppc overrides that, making code that
+  # comes out with fsel fail.
+  #
+  # (Note also that the darwin assembler doesn't accept "-mppc", so any
+  # -Wa,-mppc was used only if it worked.  The right flag on darwin would be
+  # "-arch ppc" or some such, but that's already the default.)
+  #
+  [powerpc*-*-* | power[3-9]-*-*])
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_powerpc)
+    HAVE_HOST_CPU_FAMILY_powerpc=1
+    abilist="32"
+    cclist="gcc cc"
+    cc_cflags="-O2"
+    gcc_32_cflags="$gcc_cflags -mpowerpc"
+    gcc_cflags_optlist="precomp subtype asm cpu"
+    gcc_cflags_precomp="-no-cpp-precomp"
+    gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin
+    gcc_cflags_asm=""
+    gcc_cflags_cpu=""
+    vmx_path=""
+
+    # grab this object, though it's not a true cycle counter routine
+    SPEED_CYCLECOUNTER_OBJ=powerpc.lo
+    cyclecounter_size=0
+
+    case $host_cpu in
+      powerpc740 | powerpc750)
+        path="powerpc32/750 powerpc32" ;;
+      powerpc7400 | powerpc7410)
+        path="powerpc32/vmx powerpc32/750 powerpc32" ;;
+      [powerpc74[45]?])
+        path="powerpc32/vmx powerpc32" ;;
+      *)
+        path="powerpc32" ;;
+    esac
+
+    case $host_cpu in
+      powerpc401)   gcc_cflags_cpu="-mcpu=401" ;;
+      powerpc403)   gcc_cflags_cpu="-mcpu=403"
+                   xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
+      powerpc405)   gcc_cflags_cpu="-mcpu=405" ;;
+      powerpc505)   gcc_cflags_cpu="-mcpu=505" ;;
+      powerpc601)   gcc_cflags_cpu="-mcpu=601"
+                   xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
+      powerpc602)   gcc_cflags_cpu="-mcpu=602"
+                   xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
+      powerpc603)   gcc_cflags_cpu="-mcpu=603"
+                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc603e)  gcc_cflags_cpu="-mcpu=603e -mcpu=603"
+                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc604)   gcc_cflags_cpu="-mcpu=604"
+                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc604e)  gcc_cflags_cpu="-mcpu=604e -mcpu=604"
+                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc620)   gcc_cflags_cpu="-mcpu=620" ;;
+      powerpc630)   gcc_cflags_cpu="-mcpu=630"
+                   xlc_cflags_arch="-qarch=pwr3"
+                   cpu_path="p3 p3-p7" ;;
+      powerpc740)   gcc_cflags_cpu="-mcpu=740" ;;
+      powerpc7400 | powerpc7410)
+                   gcc_cflags_asm="-Wa,-maltivec"
+                   gcc_cflags_cpu="-mcpu=7400 -mcpu=750" ;;
+      [powerpc74[45]?])
+                   gcc_cflags_asm="-Wa,-maltivec"
+                   gcc_cflags_cpu="-mcpu=7450" ;;
+      powerpc750)   gcc_cflags_cpu="-mcpu=750" ;;
+      powerpc801)   gcc_cflags_cpu="-mcpu=801" ;;
+      powerpc821)   gcc_cflags_cpu="-mcpu=821" ;;
+      powerpc823)   gcc_cflags_cpu="-mcpu=823" ;;
+      powerpc860)   gcc_cflags_cpu="-mcpu=860" ;;
+      powerpc970)   gcc_cflags_cpu="-mtune=970"
+                   xlc_cflags_arch="-qarch=970 -qarch=pwr3"
+                   vmx_path="powerpc64/vmx"
+                   cpu_path="p4 p3-p7" ;;
+      power4)      gcc_cflags_cpu="-mtune=power4"
+                   xlc_cflags_arch="-qarch=pwr4"
+                   cpu_path="p4 p3-p7" ;;
+      power5)      gcc_cflags_cpu="-mtune=power5 -mtune=power4"
+                   xlc_cflags_arch="-qarch=pwr5"
+                   cpu_path="p5 p4 p3-p7" ;;
+      power6)      gcc_cflags_cpu="-mtune=power6"
+                   xlc_cflags_arch="-qarch=pwr6"
+                   cpu_path="p6 p3-p7" ;;
+      power7)      gcc_cflags_cpu="-mtune=power7 -mtune=power5"
+                   xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
+                   cpu_path="p7 p5 p4 p3-p7" ;;
+    esac
+
+    case $host in
+      *-*-aix*)
+       cclist="gcc xlc cc"
+       gcc_32_cflags_maybe="-maix32"
+       xlc_cflags="-O2 -qmaxmem=20000"
+       xlc_cflags_optlist="arch"
+       xlc_32_cflags_maybe="-q32"
+       ar_32_flags="-X32"
+       nm_32_flags="-X32"
+    esac
+
+    case $host in
+      POWERPC64_PATTERN)
+       case $host in
+         *-*-aix*)
+           # On AIX a true 64-bit ABI is available.
+           # Need -Wc to pass object type flags through to the linker.
+           abilist="aix64 $abilist"
+           cclist_aix64="gcc xlc"
+           gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
+           gcc_aix64_cflags_optlist="cpu"
+           gcc_aix64_ldflags="-Wc,-maix64"
+           xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
+           xlc_aix64_cflags_optlist="arch"
+           xlc_aix64_ldflags="-Wc,-q64"
+           # Must indicate object type to ar and nm
+           ar_aix64_flags="-X64"
+           nm_aix64_flags="-X64"
+           path_aix64=""
+           for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done
+           path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64"
+           # grab this object, though it's not a true cycle counter routine
+           SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo
+           cyclecounter_size_aix64=0
+           ;;
+         *-*-darwin*)
+           # On Darwin we can use 64-bit instructions with a longlong limb,
+           # but the chip still in 32-bit mode.
+           # In theory this can be used on any OS which knows how to save
+           # 64-bit registers in a context switch.
+           #
+           # Note that we must use -mpowerpc64 with gcc, since the
+           # longlong.h macros expect limb operands in a single 64-bit
+           # register, not two 32-bit registers as would be given for a
+           # long long without -mpowerpc64.  In theory we could detect and
+           # accommodate both styles, but the proper 64-bit registers will
+           # be fastest and are what we really want to use.
+           #
+           # One would think -mpowerpc64 would set the assembler in the right
+           # mode to handle 64-bit instructions.  But for that, also
+           # -force_cpusubtype_ALL is needed.
+           #
+           # Do not use -fast for Darwin, it actually adds options
+           # incompatible with a shared library.
+           #
+           abilist="mode64 mode32 $abilist"
+           gcc_32_cflags_maybe="-m32"
+           gcc_cflags_opt="-O3 -O2 -O1"        # will this become used?
+           cclist_mode32="gcc"
+           gcc_mode32_cflags_maybe="-m32"
+           gcc_mode32_cflags="-mpowerpc64"
+           gcc_mode32_cflags_optlist="subtype cpu opt"
+           gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
+           gcc_mode32_cflags_opt="-O3 -O2 -O1"
+           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
+           limb_mode32=longlong
+           cclist_mode64="gcc"
+           gcc_mode64_cflags="-m64"
+           gcc_mode64_cflags_optlist="cpu opt"
+           gcc_mode64_cflags_opt="-O3 -O2 -O1"
+           path_mode64=""
+           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           any_mode64_testlist="sizeof-long-8"
+           ;;
+         *-*-linux* | *-*-*bsd*)
+           # On GNU/Linux, assume the processor is in 64-bit mode.  Some
+           # environments have a gcc that is always in 64-bit mode, while
+           # others require -m64, hence the use of cflags_maybe.  The
+           # sizeof-long-8 test checks the mode is right (for the no option
+           # case).
+           #
+           # -mpowerpc64 is not used, since it should be the default in
+           # 64-bit mode.  (We need its effect for the various longlong.h
+           # asm macros to be right of course.)
+           #
+           # gcc64 was an early port of gcc to 64-bit mode, but should be
+           # obsolete before too long.  We prefer plain gcc when it knows
+           # 64-bits.
+           #
+           abilist="mode64 mode32 $abilist"
+           gcc_32_cflags_maybe="-m32"
+           cclist_mode32="gcc"
+           gcc_mode32_cflags_maybe="-m32"
+           gcc_mode32_cflags="-mpowerpc64"
+           gcc_mode32_cflags_optlist="cpu opt"
+           gcc_mode32_cflags_opt="-O3 -O2 -O1"
+           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
+           limb_mode32=longlong
+           cclist_mode64="gcc gcc64"
+           gcc_mode64_cflags_maybe="-m64"
+           gcc_mode64_cflags_optlist="cpu opt"
+           gcc_mode64_cflags_opt="-O3 -O2 -O1"
+           path_mode64=""
+           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           any_mode64_testlist="sizeof-long-8"
+           ;;
+       esac
+       ;;
+    esac
+    ;;
+
+
+  # POWER 32-bit
+  [power-*-* | power[12]-*-* | power2sc-*-*])
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
+    HAVE_HOST_CPU_FAMILY_power=1
+    cclist="gcc"
+    extra_functions="udiv_w_sdiv"
+    path="power"
+
+    # gcc 2.7.2 knows rios1, rios2, rsc
+    #
+    # -mcpu=rios2 can tickle an AIX assembler bug (see GMP_PROG_CC_WORKS) so
+    # there needs to be a fallback to just -mpower.
+    #
+    gcc_cflags_optlist="cpu"
+    case $host in
+      power-*-*)    gcc_cflags_cpu="-mcpu=power -mpower" ;;
+      power1-*-*)   gcc_cflags_cpu="-mcpu=rios1 -mpower" ;;
+      power2-*-*)   gcc_cflags_cpu="-mcpu=rios2 -mpower" ;;
+      power2sc-*-*) gcc_cflags_cpu="-mcpu=rsc   -mpower" ;;
+    esac
+    case $host in
+    *-*-aix*)
+      cclist="gcc xlc"
+      xlc_cflags="-O2 -qarch=pwr -qmaxmem=20000"
+      ;;
+    esac
+    ;;
+
+
+  pyramid-*-*)
+    path="pyr"
+    ;;
+
+
+  # IBM System/390 and z/Architecture
+  S390_PATTERN | S390X_PATTERN)
+    abilist="32"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch"
+    path="s390_32"
+    extra_functions="udiv_w_sdiv"
+    gcc_32_cflags_maybe="-m31"
+
+    case $host_cpu in
+      s390)
+       ;;
+      z900 | z900esa)
+        cpu="z900"
+        gccarch="$cpu"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z990 | z990esa)
+        cpu="z990"
+        gccarch="$cpu"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z9 | z9esa)
+        cpu="z9"
+       gccarch="z9-109"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z10 | z10esa)
+        cpu="z10"
+       gccarch="z10"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z196 | z196esa)
+        cpu="z196"
+       gccarch="z196"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      esac
+
+    case $host in
+      S390X_PATTERN)
+       abilist="64 32"
+       cclist_64="gcc"
+       gcc_64_cflags_optlist="arch"
+       gcc_64_cflags="$gcc_cflags -m64"
+       path_64="s390_64/$host_cpu s390_64"
+       extra_functions=""
+       ;;
+      esac
+    ;;
+
+
+  sh-*-*)   path="sh" ;;
+  [sh[2-4]-*-*])  path="sh/sh2 sh" ;;
+
+
+  *sparc*-*-*)
+    # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI.  We've
+    # had various bug reports where users have set CFLAGS for their desired
+    # mode, but not set our ABI.  For some reason it's sparc where this
+    # keeps coming up, presumably users there are accustomed to driving the
+    # compiler mode that way.  The effect of our testlist setting is to
+    # reject ABI=64 in favour of ABI=32 if the user has forced the flags to
+    # 32-bit mode.
+    #
+    abilist="32"
+    cclist="gcc acc cc"
+    any_testlist="sizeof-long-4"
+    GMP_INCLUDE_MPN(sparc32/sparc-defs.m4)
+
+    case $host_cpu in
+      sparcv8 | microsparc | turbosparc)
+        path="sparc32/v8 sparc32" ;;
+      supersparc)
+        path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
+      sparc64 | sparcv9* | ultrasparc*)
+        path="sparc32/v9 sparc32/v8 sparc32" ;;
+      *)
+        path="sparc32" ;;
+    esac
+
+    # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the
+    # assembler.  Add it explicitly since the solaris assembler won't accept
+    # our sparc32/v9 asm code without it.  gas accepts -xarch=v8plus too, so
+    # it can be in the cflags unconditionally (though gas doesn't need it).
+    #
+    # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past
+    # gcc doesn't know that flag, hence cflags_maybe.  Note that -m32 cannot
+    # be done through the optlist since the plain cflags would be run first
+    # and we don't want to require the default mode (whatever it is) works.
+    #
+    # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the
+    # latter would be used in the 64-bit ABI on systems like "*bsd" where
+    # abilist="64" only.
+    #
+    case $host_cpu in
+      sparc64 | sparcv9* | ultrasparc*)
+        gcc_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
+      *)
+        gcc_cflags="$gcc_cflags" ;;
+    esac
+    gcc_32_cflags_maybe="-m32"
+    gcc_cflags_optlist="cpu"
+
+    # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.
+    # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,
+    #   sparclet, tsc701, v9, ultrasparc.  A warning is given that the
+    #   plain -m forms will disappear.
+    # gcc 3.0 adds nothing.
+    # gcc 3.1 adds nothing.
+    # gcc 3.2 adds nothing.
+    # gcc 3.3 adds ultrasparc3.
+    #
+    case $host_cpu in
+      supersparc)           gcc_cflags_cpu="-mcpu=supersparc -msupersparc" ;;
+      sparcv8 | microsparc | turbosparc)
+                           gcc_cflags_cpu="-mcpu=v8 -mv8" ;;
+      sparc64 | sparcv9*)   gcc_cflags_cpu="-mcpu=v9 -mv8" ;;
+      ultrasparc3)          gcc_cflags_cpu="-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8" ;;
+      ultrasparc*)          gcc_cflags_cpu="-mcpu=ultrasparc -mv8" ;;
+      *)                    gcc_cflags_cpu="-mcpu=v7 -mcypress" ;;
+    esac
+
+    # SunPRO cc and acc, and SunOS bundled cc
+    case $host in
+      *-*-solaris* | *-*-sunos*)
+       # Note no -g, it disables all optimizations.
+       cc_cflags=
+       cc_cflags_optlist="opt arch cpu"
+
+        # SunOS cc doesn't know -xO4, fallback to -O2.
+       cc_cflags_opt="-xO4 -O2"
+
+        # SunOS cc doesn't know -xarch, apparently always generating v7
+        # code, so make this optional
+       case $host_cpu in
+         sparcv8 | microsparc | supersparc | turbosparc)
+                                             cc_cflags_arch="-xarch=v8" ;;
+         sparc64 | sparcv9* | ultrasparc*)   cc_cflags_arch="-xarch=v8plus" ;;
+         *)                                  cc_cflags_arch="-xarch=v7" ;;
+       esac
+
+        # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.
+       # SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,
+       #   micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.
+       # SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.
+        #
+       # FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for
+       # ultrasparc3?
+       #
+       case $host_cpu in
+         supersparc)   cc_cflags_cpu="-xchip=super" ;;
+         microsparc)   cc_cflags_cpu="-xchip=micro" ;;
+         turbosparc)   cc_cflags_cpu="-xchip=micro2" ;;
+         ultrasparc)   cc_cflags_cpu="-xchip=ultra" ;;
+         ultrasparc2)  cc_cflags_cpu="-xchip=ultra2" ;;
+         ultrasparc2i) cc_cflags_cpu="-xchip=ultra2i" ;;
+         ultrasparc3)  cc_cflags_cpu="-xchip=ultra3 -xchip=ultra" ;;
+         *)            cc_cflags_cpu="-xchip=generic" ;;
+       esac
+    esac
+
+    case $host_cpu in
+      sparc64 | sparcv9* | ultrasparc*)
+        case $host in
+          # Solaris 6 and earlier cannot run ABI=64 since it doesn't save
+          # registers properly, so ABI=32 is left as the only choice.
+          #
+          [*-*-solaris2.[0-6] | *-*-solaris2.[0-6].*]) ;;
+
+          # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only
+          # choice.  In fact they need no special compiler flags, gcc -m64
+          # is the default, but it doesn't hurt to add it.  v9 CPUs always
+          # use the sparc64 port, since the plain 32-bit sparc ports don't
+          # run on a v9.
+          #
+          *-*-*bsd*) abilist="64" ;;
+
+          # For all other systems, we try both 64 and 32.
+          #
+          # GNU/Linux sparc64 has only recently gained a 64-bit user mode.
+          # In the past sparc64 meant a v9 cpu, but there were no 64-bit
+          # operations in user mode.  We assume that if "gcc -m64" works
+          # then the system is suitable.  Hopefully even if someone attempts
+          # to put a new gcc and/or glibc on an old system it won't run.
+          #
+          *) abilist="64 32" ;;
+        esac
+
+       case $host_cpu in
+         ultrasparc | ultrasparc2 | ultrasparc2i)
+           path_64="sparc64/ultrasparc12 sparc64" ;;
+         [ultrasparc[34]])
+           path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
+         [ultrasparct[1234]])
+           path_64="sparc64" ;;
+         *)
+           path_64="sparc64"
+       esac
+
+        cclist_64="gcc"
+        any_64_testlist="sizeof-long-8"
+
+        # gcc -mptr64 is probably implied by -m64, but we're not sure if
+        # this was always so.  On Solaris in the past we always used both
+        # "-m64 -mptr64".
+        #
+        # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on
+        # solaris, but it would seem likely that if gcc is going to generate
+        # 64-bit code it will have to add that option itself where needed.
+        # An extra copy of this option should be harmless though, but leave
+        # it until we're sure.  (Might want -xarch=v9a or -xarch=v9b for the
+        # higher cpu types instead.)
+        #
+        gcc_64_cflags="$gcc_64_cflags -m64 -mptr64"
+        gcc_64_ldflags="-Wc,-m64"
+        gcc_64_cflags_optlist="cpu"
+
+        case $host in
+          *-*-solaris*)
+            # Sun cc.
+            #
+            # We used to have -fast and some fixup options here, but it
+            # recurrently caused problems with miscompilation.  Of course,
+            # -fast is documented as miscompiling things for the sake of speed.
+            #
+            cclist_64="$cclist_64 cc"
+            cc_64_cflags="-xO3 -xarch=v9"
+            cc_64_cflags_optlist="cpu"
+            ;;
+        esac
+
+        # using the v9 %tick register
+        SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo
+        SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo
+        cyclecounter_size_32=2
+        cyclecounter_size_64=2
+        ;;
+    esac
+    ;;
+
+
+  # VAX
+  vax*-*-*)
+    # Currently gcc (version 3.0) on vax always uses a frame pointer
+    # (config/vax/vax.h FRAME_POINTER_REQUIRED=1), so -fomit-frame-pointer
+    # will be ignored.
+    #
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    path="vax"
+    extra_functions="udiv_w_sdiv"
+    ;;
+
+
+  # AMD and Intel x86 configurations, including AMD64
+  #
+  # Rumour has it gcc -O2 used to give worse register allocation than just
+  # -O, but lets assume that's no longer true.
+  #
+  # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc.  -m64 is
+  # the default in such a build (we think), so -m32 is essential for ABI=32.
+  # This is, of course, done for any $host_cpu, not just x86_64, so we can
+  # get such a gcc into the right mode to cross-compile to say i486-*-*.
+  #
+  # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use
+  # it when it works.  We check sizeof(long)==4 to ensure we get the right
+  # mode, in case -m32 has failed not because it's an old gcc, but because
+  # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.
+  #
+  X86_PATTERN | X86_64_PATTERN)
+    abilist="32"
+    cclist="gcc icc cc"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_32_cflags_maybe="-m32"
+    icc_cflags="-no-gcc"
+    icc_cflags_optlist="opt"
+    icc_cflags_opt="-O3 -O2 -O1"
+    any_32_testlist="sizeof-long-4"
+    CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'
+
+    # Availability of rdtsc is checked at run-time.
+    SPEED_CYCLECOUNTER_OBJ=pentium.lo
+
+    # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486.  These
+    #     represent -mcpu= since -m486 doesn't generate 486 specific insns.
+    # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.
+    # gcc 3.0 adds athlon.
+    # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,
+    #     athlon-tbird, athlon-4, athlon-xp, athlon-mp.
+    # gcc 3.2 adds winchip2.
+    # gcc 3.3 adds winchip-c6.
+    # gcc 3.3.1 from mandrake adds k8 and knows -mtune.
+    # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.
+    #
+    # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an
+    # old version of mpz/powm.c.  Seems to be fine with the current code, so
+    # no need for any restrictions on that option.
+    #
+    # -march=pentiumpro can fail if the assembler doesn't know "cmov"
+    # (eg. solaris 2.8 native "as"), so always have -march=pentium after
+    # that as a fallback.
+    #
+    # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or
+    # may not be supported by the assembler and/or the OS, and is bad in gcc
+    # prior to 3.3.  The tests will reject these if no good, so fallbacks
+    # like "-march=pentium4 -mno-sse2" are given to try also without SSE2.
+    # Note the relevant -march types are listed in the optflags handling
+    # below, be sure to update there if adding new types emitting SSE2.
+    #
+    # -mtune is used at the start of each cpu option list to give something
+    # gcc 3.4 will use, thereby avoiding warnings from -mcpu.  -mcpu forms
+    # are retained for use by prior gcc.  For example pentium has
+    # "-mtune=pentium -mcpu=pentium ...", the -mtune is for 3.4 and the
+    # -mcpu for prior.  If there's a brand new choice in 3.4 for a chip,
+    # like k8 for x86_64, then it can be the -mtune at the start, no need to
+    # duplicate anything.
+    #
+    gcc_cflags_optlist="cpu arch"
+    case $host_cpu in
+      i386*)
+       gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
+       gcc_cflags_arch="-march=i386"
+       path="x86"
+       ;;
+      i486*)
+       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=i486"
+       path="x86/i486 x86"
+       ;;
+      i586 | pentium)
+       gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
+       gcc_cflags_arch="-march=pentium"
+       path="x86/pentium x86"
+       ;;
+      pentiummmx)
+       gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
+       gcc_cflags_arch="-march=pentium-mmx -march=pentium"
+       path="x86/pentium/mmx x86/pentium x86"
+       ;;
+      i686 | pentiumpro)
+       gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentiumpro -march=pentium"
+       path="x86/p6 x86"
+       ;;
+      pentium2)
+       gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
+       path="x86/p6/mmx x86/p6 x86"
+       ;;
+      pentium3)
+       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      pentiumm)
+       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      k6)
+       gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6"
+       path="x86/k6/mmx x86/k6 x86"
+       ;;
+      k62)
+       gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-2 -march=k6"
+       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      k63)
+       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-3 -march=k6"
+       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      geode)
+       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-3 -march=k6"
+       path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      athlon)
+       # Athlon instruction costs are close to P6 (3 cycle load latency,
+       # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't
+       # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
+       gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
+       path="x86/k7/mmx x86/k7 x86"
+       ;;
+      i786 | pentium4)
+       # pentiumpro is the primary fallback when gcc doesn't know pentium4.
+       # This gets us cmov to eliminate branches.  Maybe "athlon" would be
+       # a possibility on gcc 3.0.
+       #
+       gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
+       gcc_64_cflags_cpu="-mtune=nocona"
+       path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
+       path_64="x86_64/pentium4 x86_64"
+       ;;
+      viac32)
+       # Not sure of the best fallbacks here for -mcpu.
+       # c3-2 has sse and mmx, so pentium3 is good for -march.
+       gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      viac3*)
+       # Not sure of the best fallbacks here.
+       gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
+       path="x86/pentium/mmx x86/pentium x86"
+       ;;
+      athlon64 | k8 | x86_64)
+       gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
+       path="x86/k8 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/k8 x86_64"
+       ;;
+      k10)
+       gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      bobcat)
+       gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/bobcat x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      bulldozer | bd1)
+       gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/bd1 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/bd1 x86_64"
+       ;;
+      core2)
+       gcc_cflags_cpu="-mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/core2 x86_64"
+       ;;
+      corei | coreinhm | coreiwsm)
+       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/coreinhm x86_64/core2 x86_64"
+       ;;
+      coreisbr)
+       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+       ;;
+      atom)
+       gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
+       gcc_cflags_arch="-march=atom -march=pentium3"
+       path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
+       path_64="x86_64/atom x86_64"
+       ;;
+      nano)
+       gcc_cflags_cpu="-mtune=nano"
+       gcc_cflags_arch="-march=nano"
+       path="x86/nano x86"
+       path_64="x86_64/nano x86_64"
+       ;;
+      *)
+       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=i486"
+       path="x86"
+       path_64="x86_64"
+       ;;
+    esac
+
+    case $host in
+      X86_64_PATTERN)
+       cclist_64="gcc"
+       gcc_64_cflags="$gcc_64_cflags -m64"
+       gcc_64_cflags_optlist="cpu arch"
+       CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
+       SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
+       cyclecounter_size_64=2
+       abilist="64 32"
+
+       case $host in
+         *-*-solaris*)
+           # Sun cc.
+           cclist_64="$cclist_64 cc"
+           cc_64_cflags="-xO3 -m64"
+           ;;
+         *-*-mingw* | *-*-cygwin)
+           limb_64=longlong
+           path_64=""  # Windows amd64 calling conventions are *different*
+           # Silence many pedantic warnings for w64.  FIXME.
+           gcc_64_cflags="$gcc_64_cflags -std=gnu99"
+           ;;
+       esac
+       ;;
+    esac
+    ;;
+
+
+  # FIXME: z8kx won't get through config.sub.  Could make 16 versus 32 bit
+  # limb an ABI option perhaps.
+  z8kx*-*-*)
+    path="z8000x"
+    extra_functions="udiv_w_sdiv"
+    ;;
+  z8k*-*-*)
+    path="z8000"
+    extra_functions="udiv_w_sdiv"
+    ;;
+
+
+  # Special CPU "none" selects generic C.  -DNO_ASM is used to disable gcc
+  # asm blocks in longlong.h (since they're driven by cpp pre-defined
+  # symbols like __alpha rather than the configured $host_cpu).
+  #
+  none-*-*)
+    abilist="long longlong"
+    cclist_long=$cclist
+    gcc_long_cflags=$gcc_cflags
+    gcc_long_cppflags="-DNO_ASM"
+    cc_long_cflags=$cc_cflags
+    cclist_longlong=$cclist
+    gcc_longlong_cflags=$gcc_cflags
+    gcc_longlong_cppflags="-DNO_ASM"
+    cc_longlong_cflags=$cc_cflags
+    limb_longlong=longlong
+    ;;
+
+esac
+
+# mingw can be built by the cygwin gcc if -mno-cygwin is added.  For
+# convenience add this automatically if it works.  Actual mingw gcc accepts
+# -mno-cygwin too, but of course is the default.  mingw only runs on the
+# x86s, but allow any CPU here so as to catch "none" too.
+#
+case $host in
+  *-*-mingw*)
+    gcc_cflags_optlist="$gcc_cflags_optlist nocygwin"
+    gcc_cflags_nocygwin="-mno-cygwin"
+    ;;
+esac
+
+
+CFLAGS_or_unset=${CFLAGS-'(unset)'}
+CPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}
+
+cat >&AC_FD_CC <<EOF
+User:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS_or_unset
+CPPFLAGS=$CPPFLAGS_or_unset
+MPN_PATH=$MPN_PATH
+GMP:
+abilist=$abilist
+cclist=$cclist
+EOF
+
+
+test_CFLAGS=${CFLAGS+set}
+test_CPPFLAGS=${CPPFLAGS+set}
+
+for abi in $abilist; do
+  abi_last="$abi"
+done
+
+# If the user specifies an ABI then it must be in $abilist, after that
+# $abilist is restricted to just that choice.
+#
+if test -n "$ABI"; then
+  found=no
+  for abi in $abilist; do
+    if test $abi = "$ABI"; then found=yes; break; fi
+  done
+  if test $found = no; then
+    AC_MSG_ERROR([ABI=$ABI is not among the following valid choices: $abilist])
+  fi
+  abilist="$ABI"
+fi
+
+found_compiler=no
+
+for abi in $abilist; do
+
+  echo "checking ABI=$abi"
+
+  # Suppose abilist="64 32", then for abi=64, will have abi1="_64" and
+  # abi2="_64".  For abi=32, will have abi1="_32" and abi2="".  This is how
+  # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the
+  # abilist), but there's no fallback for $gcc_64_cflags.
+  #
+  abi1=[`echo _$abi | sed 's/[.]//g'`]
+  if test $abi = $abi_last; then abi2=; else abi2="$abi1"; fi
+
+  # Compiler choices under this ABI
+                              eval cclist_chosen=\"\$cclist$abi1\"
+  test -n "$cclist_chosen" || eval cclist_chosen=\"\$cclist$abi2\"
+
+  # If there's a user specified $CC then don't use a list for
+  # $cclist_chosen, just a single value for $ccbase.
+  #
+  if test -n "$CC"; then
+
+    # The first word of $CC, stripped of any directory.  For instance
+    # CC="/usr/local/bin/gcc -pipe" will give "gcc".
+    #
+    for ccbase in $CC; do break; done
+    ccbase=`echo $ccbase | sed 's:.*/::'`
+
+    # If this $ccbase is in $cclist_chosen then it's a compiler we know and
+    # we can do flags defaulting with it.  If not, then $cclist_chosen is
+    # set to "unrecognised" so no default flags are used.
+    #
+    # "unrecognised" is used to avoid bad effects with eval if $ccbase has
+    # non-symbol characters.  For instance ccbase=my+cc would end up with
+    # something like cflags="$my+cc_cflags" which would give
+    # cflags="+cc_cflags" rather than the intended empty string for an
+    # unknown compiler.
+    #
+    found=unrecognised
+    for i in $cclist_chosen; do
+      if test "$ccbase" = $i; then
+        found=$ccbase
+        break
+      fi
+    done
+    cclist_chosen=$found
+  fi
+
+  for ccbase in $cclist_chosen; do
+
+    # When cross compiling, look for a compiler with the $host_alias as a
+    # prefix, the same way that AC_CHECK_TOOL does.  But don't do this to a
+    # user-selected $CC.
+    #
+    # $cross_compiling will be yes/no/maybe at this point.  Do the host
+    # prefixing for "maybe" as well as "yes".
+    #
+    if test "$cross_compiling" != no && test -z "$CC"; then
+      cross_compiling_prefix="${host_alias}-"
+    fi
+
+    for ccprefix in $cross_compiling_prefix ""; do
+
+      cc="$CC"
+      test -n "$cc" || cc="$ccprefix$ccbase"
+
+      # If the compiler is gcc but installed under another name, then change
+      # $ccbase so as to use the flags we know for gcc.  This helps for
+      # instance when specifying CC=gcc272 on Debian GNU/Linux, or the
+      # native cc which is really gcc on NeXT or MacOS-X.
+      #
+      # FIXME: There's a slight misfeature here.  If cc is actually gcc but
+      # gcc is not a known compiler under this $abi then we'll end up
+      # testing it with no flags and it'll work, but chances are it won't be
+      # in the right mode for the ABI we desire.  Let's quietly hope this
+      # doesn't happen.
+      #
+      if test $ccbase != gcc; then
+        GMP_PROG_CC_IS_GNU($cc,ccbase=gcc)
+      fi
+
+      # Similarly if the compiler is IBM xlc but invoked as cc or whatever
+      # then change $ccbase and make the default xlc flags available.
+      if test $ccbase != xlc; then
+        GMP_PROG_CC_IS_XLC($cc,ccbase=xlc)
+      fi
+
+      # acc was Sun's first unbundled compiler back in the SunOS days, or
+      # something like that, but today its man page says it's not meant to
+      # be used directly (instead via /usr/ucb/cc).  The options are pretty
+      # much the same as the main SunPRO cc, so share those configs.
+      #
+      case $host in
+        *sparc*-*-solaris* | *sparc*-*-sunos*)
+          if test "$ccbase" = acc; then ccbase=cc; fi ;;
+      esac
+
+      for tmp_cflags_maybe in yes no; do
+                             eval cflags=\"\$${ccbase}${abi1}_cflags\"
+        test -n "$cflags" || eval cflags=\"\$${ccbase}${abi2}_cflags\"
+
+       if test "$tmp_cflags_maybe" = yes; then
+          # don't try cflags_maybe when the user set CFLAGS
+          if test "$test_CFLAGS" = set; then continue; fi
+                                     eval cflags_maybe=\"\$${ccbase}${abi1}_cflags_maybe\"
+          test -n "$cflags_maybe" || eval cflags_maybe=\"\$${ccbase}${abi2}_cflags_maybe\"
+          # don't try cflags_maybe if there's nothing set
+          if test -z "$cflags_maybe"; then continue; fi
+          cflags="$cflags_maybe $cflags"
+        fi
+
+        # Any user CFLAGS, even an empty string, takes precedence
+        if test "$test_CFLAGS" = set; then cflags=$CFLAGS; fi
+
+        # Any user CPPFLAGS, even an empty string, takes precedence
+                               eval cppflags=\"\$${ccbase}${abi1}_cppflags\"
+        test -n "$cppflags" || eval cppflags=\"\$${ccbase}${abi2}_cppflags\"
+        if test "$test_CPPFLAGS" = set; then cppflags=$CPPFLAGS; fi
+
+        # --enable-profiling adds -p/-pg even to user-specified CFLAGS.
+        # This is convenient, but it's perhaps a bit naughty to modify user
+        # CFLAGS.
+        case "$enable_profiling" in
+          prof)       cflags="$cflags -p" ;;
+          gprof)      cflags="$cflags -pg" ;;
+          instrument) cflags="$cflags -finstrument-functions" ;;
+        esac
+
+        GMP_PROG_CC_WORKS($cc $cflags $cppflags,,continue)
+
+        # If we're supposed to be using a "long long" for a limb, check that
+        # it works.
+                                  eval limb_chosen=\"\$limb$abi1\"
+        test -n "$limb_chosen" || eval limb_chosen=\"\$limb$abi2\"
+        if test "$limb_chosen" = longlong; then
+          GMP_PROG_CC_WORKS_LONGLONG($cc $cflags $cppflags,,continue)
+        fi
+
+        # The tests to perform on this $cc, if any
+                               eval testlist=\"\$${ccbase}${abi1}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$${ccbase}${abi2}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$any${abi1}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$any${abi2}_testlist\"
+
+        testlist_pass=yes
+        for tst in $testlist; do
+          case $tst in
+          hpc-hppa-2-0)   GMP_HPC_HPPA_2_0($cc,,testlist_pass=no) ;;
+          gcc-arm-umodsi) GMP_GCC_ARM_UMODSI($cc,,testlist_pass=no) ;;
+          gcc-mips-o32)   GMP_GCC_MIPS_O32($cc,,testlist_pass=no) ;;
+          hppa-level-2.0) GMP_HPPA_LEVEL_20($cc $cflags,,testlist_pass=no) ;;
+          sizeof*)       GMP_C_TEST_SIZEOF($cc $cflags,$tst,,testlist_pass=no) ;;
+          esac
+          if test $testlist_pass = no; then break; fi
+        done
+
+        if test $testlist_pass = yes; then
+          found_compiler=yes
+          break
+        fi
+      done
+
+      if test $found_compiler = yes; then break; fi
+    done
+
+    if test $found_compiler = yes; then break; fi
+  done
+
+  if test $found_compiler = yes; then break; fi
+done
+
+
+# If we recognised the CPU, as indicated by $path being set, then insist
+# that we have a working compiler, either from our $cclist choices or from
+# $CC.  We can't let AC_PROG_CC look around for a compiler because it might
+# find one that we've rejected (for not supporting the modes our asm code
+# demands, etc).
+#
+# If we didn't recognise the CPU (and this includes host_cpu=none), then
+# fall through and let AC_PROG_CC look around for a compiler too.  This is
+# mostly in the interests of following a standard autoconf setup, after all
+# we've already tested cc and gcc adequately (hopefully).  As of autoconf
+# 2.50 the only thing AC_PROG_CC really adds is a check for "cl" (Microsoft
+# C on MS-DOS systems).
+#
+if test $found_compiler = no && test -n "$path"; then
+  AC_MSG_ERROR([could not find a working compiler, see config.log for details])
+fi
+
+case $host in
+  X86_PATTERN | X86_64_PATTERN)
+    # If the user asked for a fat build, override the path and flags set above
+    if test $enable_fat = yes; then
+      gcc_cflags_cpu=""
+      gcc_cflags_arch=""
+
+      if test "$abi" = 32; then
+       extra_functions="$extra_functions fat fat_entry"
+       path="x86/fat x86"
+       fat_path="x86 x86/fat x86/i486
+                 x86/k6 x86/k6/mmx x86/k6/k62mmx
+                 x86/k7 x86/k7/mmx
+                 x86/pentium x86/pentium/mmx
+                 x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
+                 x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2"
+      fi
+
+      if test "$abi" = 64; then
+       gcc_64_cflags=""
+       extra_functions_64="$extra_functions_64 fat fat_entry"
+       path_64="x86_64/fat x86_64"
+       fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/atom x86_64/nano"
+      fi
+
+      fat_functions="add_n addmul_1 copyd copyi
+                    dive_1 diveby3 divrem_1 gcd_1 lshift
+                    mod_1 mod_34lsub1 mode1o mul_1 mul_basecase
+                    pre_divrem_1 pre_mod_1 rshift
+                    sqr_basecase sub_n submul_1"
+      fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
+                     SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD"
+    fi
+    ;;
+esac
+
+
+if test $found_compiler = yes; then
+
+  # If we're creating CFLAGS, then look for optional additions.  If the user
+  # set CFLAGS then leave it alone.
+  #
+  if test "$test_CFLAGS" != set; then
+                          eval optlist=\"\$${ccbase}${abi1}_cflags_optlist\"
+    test -n "$optlist" || eval optlist=\"\$${ccbase}${abi2}_cflags_optlist\"
+
+    for opt in $optlist; do
+                             eval optflags=\"\$${ccbase}${abi1}_cflags_${opt}\"
+      test -n "$optflags" || eval optflags=\"\$${ccbase}${abi2}_cflags_${opt}\"
+      test -n "$optflags" || eval optflags=\"\$${ccbase}_cflags_${opt}\"
+
+      for flag in $optflags; do
+
+       # ~ represents a space in an option spec
+        flag=`echo "$flag" | tr '~' ' '`
+
+        case $flag in
+          -march=pentium4 | -march=k8)
+            # For -march settings which enable SSE2 we exclude certain bad
+            # gcc versions and we need an OS knowing how to save xmm regs.
+            #
+            # This is only for ABI=32, any 64-bit gcc is good and any OS
+            # knowing x86_64 will know xmm.
+            #
+            # -march=k8 was only introduced in gcc 3.3, so we shouldn't need
+            # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior).  But
+            # it doesn't hurt to run it anyway, sharing code with the
+            # pentium4 case.
+            #
+            if test "$abi" = 32; then
+              GMP_GCC_PENTIUM4_SSE2($cc $cflags $cppflags,, continue)
+              GMP_OS_X86_XMM($cc $cflags $cppflags,, continue)
+            fi
+            ;;
+          -no-cpp-precomp)
+            # special check, avoiding a warning
+            GMP_GCC_NO_CPP_PRECOMP($ccbase,$cc,$cflags,
+                                   [cflags="$cflags $flag"
+                                   break],
+                                   [continue])
+            ;;
+          -Wa,-m*)
+            case $host in
+              alpha*-*-*)
+                GMP_GCC_WA_MCPU($cc $cflags, $flag, , [continue])
+              ;;
+            esac
+            ;;
+          -Wa,-oldas)
+            GMP_GCC_WA_OLDAS($cc $cflags $cppflags,
+                             [cflags="$cflags $flag"
+                             break],
+                             [continue])
+            ;;
+        esac
+
+        GMP_PROG_CC_WORKS($cc $cflags $cppflags $flag,
+          [cflags="$cflags $flag"
+          break])
+      done
+    done
+  fi
+
+  ABI="$abi"
+  CC="$cc"
+  CFLAGS="$cflags"
+  CPPFLAGS="$cppflags"
+
+
+  # Could easily have this in config.h too, if desired.
+  ABI_nodots=`echo $ABI | sed 's/\./_/'`
+  GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_ABI_$ABI_nodots')", POST)
+
+
+  # GMP_LDFLAGS substitution, selected according to ABI.
+  # These are needed on libgmp.la and libmp.la, but currently not on
+  # convenience libraries like tune/libspeed.la or mpz/libmpz.la.
+  #
+                            eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+  test -n "$GMP_LDFLAGS" || eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+  AC_SUBST(GMP_LDFLAGS)
+  AC_SUBST(LIBGMP_LDFLAGS)
+  AC_SUBST(LIBGMPXX_LDFLAGS)
+
+  # extra_functions, selected according to ABI
+                    eval tmp=\"\$extra_functions$abi1\"
+  test -n "$tmp" || eval tmp=\"\$extra_functions$abi2\"
+  extra_functions="$tmp"
+
+
+  # Cycle counter, selected according to ABI.
+  #
+                    eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi1\"
+  test -n "$tmp" || eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi2\"
+  SPEED_CYCLECOUNTER_OBJ="$tmp"
+                    eval tmp=\"\$cyclecounter_size$abi1\"
+  test -n "$tmp" || eval tmp=\"\$cyclecounter_size$abi2\"
+  cyclecounter_size="$tmp"
+
+  if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+    AC_DEFINE_UNQUOTED(HAVE_SPEED_CYCLECOUNTER, $cyclecounter_size,
+    [Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits)])
+  fi
+  AC_SUBST(SPEED_CYCLECOUNTER_OBJ)
+
+
+  # Calling conventions checking, selected according to ABI.
+  #
+                    eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
+  test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
+  CALLING_CONVENTIONS_OBJS="$tmp"
+
+  if test -n "$CALLING_CONVENTIONS_OBJS"; then
+    AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
+    [Define to 1 if tests/libtests has calling conventions checking for the CPU])
+  fi
+  AC_SUBST(CALLING_CONVENTIONS_OBJS)
+
+fi
+
+
+# If the user gave an MPN_PATH, use that verbatim, otherwise choose
+# according to the ABI and add "generic".
+#
+if test -n "$MPN_PATH"; then
+  path="$MPN_PATH"
+else
+                    eval tmp=\"\$path$abi1\"
+  test -n "$tmp" || eval tmp=\"\$path$abi2\"
+  path="$tmp generic"
+fi
+
+
+# Long long limb setup for gmp.h.
+case $limb_chosen in
+longlong) DEFN_LONG_LONG_LIMB="#define _LONG_LONG_LIMB 1"    ;;
+*)        DEFN_LONG_LONG_LIMB="/* #undef _LONG_LONG_LIMB */" ;;
+esac
+AC_SUBST(DEFN_LONG_LONG_LIMB)
+
+
+# The C compiler and preprocessor, put into ANSI mode if possible.
+AC_PROG_CC
+AC_PROG_CC_STDC
+AC_PROG_CPP
+GMP_H_ANSI
+
+
+# The C compiler on the build system, and associated tests.
+GMP_PROG_CC_FOR_BUILD
+GMP_PROG_CPP_FOR_BUILD
+GMP_PROG_EXEEXT_FOR_BUILD
+GMP_C_FOR_BUILD_ANSI
+GMP_CHECK_LIBM_FOR_BUILD
+
+
+# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.
+# Using the compiler is a lot easier than figuring out how to invoke the
+# assembler directly.
+#
+test -n "$CCAS" || CCAS="$CC -c"
+AC_SUBST(CCAS)
+
+
+# The C++ compiler, if desired.
+want_cxx=no
+if test $enable_cxx != no; then
+  test_CXXFLAGS=${CXXFLAGS+set}
+  AC_PROG_CXX
+
+  echo "CXXFLAGS chosen by autoconf: $CXXFLAGS" >&AC_FD_CC
+  cxxflags_ac_prog_cxx=$CXXFLAGS
+  cxxflags_list=ac_prog_cxx
+
+  # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed
+  # if AC_PROG_CXX thinks that doesn't work.  $CFLAGS stands a good chance
+  # of working, eg. on a GNU system where CC=gcc and CXX=g++.
+  #
+  if test "$test_CXXFLAGS" != set; then
+    cxxflags_cflags=$CFLAGS
+    cxxflags_list="cflags $cxxflags_list"
+    if test "$ac_prog_cxx_g" = no; then
+      cxxflags_cflags=`echo "$cxxflags_cflags" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`
+    fi
+  fi
+
+  # See if the C++ compiler works.  If the user specified CXXFLAGS then all
+  # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't
+  # give a fatal error, just leaves CXX set to a default g++.  If on the
+  # other hand the user didn't specify CXXFLAGS then we get to try here our
+  # $cxxflags_list alternatives.
+  #
+  # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.
+  #
+  for cxxflags_choice in $cxxflags_list; do
+    eval CXXFLAGS=\"\$cxxflags_$cxxflags_choice\"
+    GMP_PROG_CXX_WORKS($CXX $CPPFLAGS $CXXFLAGS,
+      [want_cxx=yes
+      break])
+  done
+
+  # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
+  if test $want_cxx = no && test $enable_cxx = yes; then
+    AC_MSG_ERROR([C++ compiler not available, see config.log for details])
+  fi
+fi
+
+AM_CONDITIONAL(WANT_CXX, test $want_cxx = yes)
+
+# FIXME: We're not interested in CXXCPP for ourselves, but if we don't do it
+# here then AC_PROG_LIBTOOL will AC_REQUIRE it (via _LT_AC_TAGCONFIG) and
+# hence execute it unconditionally, and that will fail if there's no C++
+# compiler (and no generic /lib/cpp).
+#
+if test $want_cxx = yes; then
+  AC_PROG_CXXCPP
+fi
+
+
+# Path setups for Cray, according to IEEE or CFP.  These must come after
+# deciding the compiler.
+#
+GMP_CRAY_OPTIONS(
+  [add_path="cray/ieee"],
+  [add_path="cray/cfp"; extra_functions="mulwwc90"],
+  [add_path="cray/cfp"; extra_functions="mulwwj90"])
+
+
+if test -z "$MPN_PATH"; then
+  path="$add_path $path"
+fi
+
+# For a nail build, also look in "nails" subdirectories.
+#
+if test $GMP_NAIL_BITS != 0 && test -z "$MPN_PATH"; then
+  new_path=
+  for i in $path; do
+    case $i in
+    generic) new_path="$new_path $i" ;;
+    *)       new_path="$new_path $i/nails $i" ;;
+    esac
+  done
+  path=$new_path
+fi
+
+
+# Put all directories into CPUVEC_list so as to get a full set of
+# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are
+# empty because mmx and/or sse2 had to be dropped.
+#
+for i in $fat_path; do
+  GMP_FAT_SUFFIX(tmp_suffix, $i)
+  CPUVEC_list="$CPUVEC_list CPUVEC_SETUP_$tmp_suffix"
+done
+
+
+# If there's any sse2 or mmx in the path, check whether the assembler
+# supports it, and remove if not.
+#
+# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new
+# enough assembler.
+#
+case $host in
+  X86_PATTERN | X86_64_PATTERN)
+    if test "$ABI" = 32; then
+      case "$path $fat_path" in
+        *mmx*)   GMP_ASM_X86_MMX( , [GMP_STRIP_PATH(*mmx*)]) ;;
+      esac
+      case "$path $fat_path" in
+        *sse2*)  GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;
+      esac
+    fi
+    ;;
+esac
+
+
+cat >&AC_FD_CC <<EOF
+Decided:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS
+CPPFLAGS=$CPPFLAGS
+GMP_LDFLAGS=$GMP_LDFLAGS
+CXX=$CXX
+CXXFLAGS=$CXXFLAGS
+path=$path
+EOF
+echo "using ABI=\"$ABI\""
+echo "      CC=\"$CC\""
+echo "      CFLAGS=\"$CFLAGS\""
+echo "      CPPFLAGS=\"$CPPFLAGS\""
+if test $want_cxx = yes; then
+  echo "      CXX=\"$CXX\""
+  echo "      CXXFLAGS=\"$CXXFLAGS\""
+fi
+echo "      MPN_PATH=\"$path\""
+
+
+# Automake ansi2knr support.
+AM_C_PROTOTYPES
+
+CL_AS_NOEXECSTACK
+
+GMP_PROG_AR
+GMP_PROG_NM
+
+case $host in
+  # FIXME: On AIX 3 and 4, $libname.a is included in libtool
+  # $library_names_spec, so libgmp.a becomes a symlink to libgmp.so, making
+  # it impossible to build shared and static libraries simultaneously.
+  # Disable shared libraries by default, but let the user override with
+  # --enable-shared --disable-static.
+  #
+  # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*
+  # and *-*-os2* too, but wait for someone to test this before worrying
+  # about it.  If there is a problem then of course libtool is the right
+  # place to fix it.
+  #
+  [*-*-aix[34]*])
+    if test -z "$enable_shared"; then enable_shared=no; fi ;;
+esac
+
+
+# Configs for Windows DLLs.
+
+AC_LIBTOOL_WIN32_DLL
+
+AC_SUBST(LIBGMP_DLL,0)
+case $host in
+  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+    # By default, build only static.
+    if test -z "$enable_shared"; then
+      enable_shared=no
+    fi
+    # Don't allow both static and DLL.
+    if test "$enable_shared" != no && test "$enable_static" != no; then
+      AC_MSG_ERROR([cannot build both static and DLL, since gmp.h is different for each.
+Use "--disable-static --enable-shared" to build just a DLL.])
+    fi
+
+    # "-no-undefined" is required when building a DLL, see documentation on
+    # AC_LIBTOOL_WIN32_DLL.
+    #
+    # "-Wl,--export-all-symbols" is a bit of a hack, it gets all libgmp and
+    # libgmpxx functions and variables exported.  This is what libtool did
+    # in the past, and it's convenient for us in the test programs.
+    #
+    # Maybe it'd be prudent to check for --export-all-symbols before using
+    # it, but it seems to have been in ld since at least 2000, and there's
+    # not really any alternative we want to take up at the moment.
+    #
+    # "-Wl,output-def" is used to get a .def file for use by MS lib to make
+    # a .lib import library, described in the manual.  libgmp-3.dll.def
+    # corresponds to the libmp-3.dll.def generated by libtool (as a result
+    # of -export-symbols on that library).
+    #
+    # Incidentally, libtool does generate an import library libgmp.dll.a,
+    # but it's "ar" format and cannot be used by the MS linker.  There
+    # doesn't seem to be any GNU tool for generating or converting to .lib.
+    #
+    # FIXME: The .def files produced by -Wl,output-def include isascii,
+    # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't
+    # inline isascii (used in gmp).  It gives an extern inline for
+    # __isascii, but for some reason not the plain isascii.
+    #
+    if test "$enable_shared" = yes; then
+      GMP_LDFLAGS="$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols"
+      LIBGMP_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmp-3.dll.def"
+      LIBGMPXX_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmpxx-3.dll.def"
+      LIBGMP_DLL=1
+    fi
+    ;;
+esac
+
+
+# Ensure that $CONFIG_SHELL is available for AC_LIBTOOL_SYS_MAX_CMD_LEN.
+# It's often set already by _LT_AC_PROG_ECHO_BACKSLASH or
+# _AS_LINENO_PREPARE, but not always.
+#
+# The symptom of CONFIG_SHELL unset is some "expr" errors during the test,
+# and an empty result.  This only happens when invoked as "sh configure",
+# ie. no path, and can be seen for instance on ia64-*-hpux*.
+#
+# FIXME: Newer libtool should have it's own fix for this.
+#
+if test -z "$CONFIG_SHELL"; then
+  CONFIG_SHELL=$SHELL
+fi
+
+# Enable CXX in libtool only if we want it, and never enable GCJ, nor RC on
+# mingw and cygwin.  Under --disable-cxx this avoids some error messages
+# from libtool arising from the fact we didn't actually run AC_PROG_CXX.
+# Notice that any user-supplied --with-tags setting takes precedence.
+#
+# FIXME: Is this the right way to get this effect?  Very possibly not, but
+# the current _LT_AC_TAGCONFIG doesn't really suggest an alternative.
+#
+if test "${with_tags+set}" != set; then
+  if test $want_cxx = yes; then
+    with_tags=CXX
+  else
+    with_tags=
+  fi
+fi
+
+# The dead hand of AC_REQUIRE makes AC_PROG_LIBTOOL expand and execute
+# AC_PROG_F77, even when F77 is not in the selected with_tags.  This is
+# probably harmless, but it's unsightly and bloats our configure, so pretend
+# AC_PROG_F77 has been expanded already.
+#
+# FIXME: Rumour has it libtool will one day provide a way for a configure.in
+# to say what it wants from among supported languages etc.
+#
+AC_PROVIDE([AC_PROG_F77])
+
+AC_PROG_LIBTOOL
+
+# Generate an error here if attempting to build both shared and static when
+# $libname.a is in $library_names_spec (as mentioned above), rather than
+# wait for ar or ld to fail.
+#
+if test "$enable_shared" = yes && test "$enable_static" = yes; then
+  case $library_names_spec in
+    *libname.a*)
+      AC_MSG_ERROR([cannot create both shared and static libraries on this system, --disable one of the two])
+      ;;
+  esac
+fi
+
+AM_CONDITIONAL(ENABLE_STATIC, test "$enable_static" = yes)
+
+
+# Many of these library and header checks are for the benefit of
+# supplementary programs.  libgmp doesn't use anything too weird.
+
+AC_HEADER_STDC
+AC_HEADER_TIME
+
+# Reasons for testing:
+#   float.h - not in SunOS bundled cc
+#   invent.h - IRIX specific
+#   langinfo.h - X/Open standard only, not in djgpp for instance
+#   locale.h - old systems won't have this
+#   nl_types.h - X/Open standard only, not in djgpp for instance
+#       (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)
+#   sys/attributes.h - IRIX specific
+#   sys/iograph.h - IRIX specific
+#   sys/mman.h - not in Cray Unicos
+#   sys/param.h - not in mingw
+#   sys/processor.h - solaris specific, though also present in macos
+#   sys/pstat.h - HPUX specific
+#   sys/resource.h - not in mingw
+#   sys/sysctl.h - not in mingw
+#   sys/sysinfo.h - OSF specific
+#   sys/syssgi.h - IRIX specific
+#   sys/systemcfg.h - AIX specific
+#   sys/time.h - autoconf suggests testing, don't know anywhere without it
+#   sys/times.h - not in mingw
+#   machine/hal_sysinfo.h - OSF specific
+#
+# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf
+# default tests
+#
+AC_CHECK_HEADERS(fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/attributes.h sys/iograph.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/syssgi.h sys/systemcfg.h sys/time.h sys/times.h)
+
+# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)
+AC_CHECK_HEADERS(sys/resource.h,,,
+[#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif])
+
+# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants
+AC_CHECK_HEADERS(sys/sysctl.h,,,
+[#if HAVE_SYS_PARAM_H
+# include <sys/param.h>
+#endif])
+
+# On OSF 4.0, <machine/hal_sysinfo.h> must have <sys/sysinfo.h> for ulong_t
+AC_CHECK_HEADERS(machine/hal_sysinfo.h,,,
+[#if HAVE_SYS_SYSINFO_H
+# include <sys/sysinfo.h>
+#endif])
+
+# Reasons for testing:
+#   optarg - not declared in mingw
+#   fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4
+#   sys_errlist, sys_nerr - not declared in SunOS 4
+#
+# optarg should be in unistd.h and the rest in stdio.h, both of which are
+# in the autoconf default includes.
+#
+# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according
+# to the man page (but aren't), in glibc they're in stdio.h.
+#
+AC_CHECK_DECLS([fgetc, fscanf, optarg, ungetc, vfprintf])
+AC_CHECK_DECLS([sys_errlist, sys_nerr], , ,
+[#include <stdio.h>
+#include <errno.h>])
+
+AC_TYPE_SIGNAL
+
+# Reasons for testing:
+#   intmax_t       - C99
+#   long double    - not in the HP bundled K&R cc
+#   long long      - only in reasonably recent compilers
+#   ptrdiff_t      - seems to be everywhere, maybe don't need to check this
+#   quad_t         - BSD specific
+#   uint_least32_t - C99
+#
+# the default includes are sufficient for all these types
+#
+AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t,
+               uint_least32_t, intptr_t])
+
+AC_C_STRINGIZE
+
+# FIXME: Really want #ifndef __cplusplus around the #define volatile
+# replacement autoconf gives, since volatile is always available in C++.
+# But we don't use it in C++ currently.
+AC_C_VOLATILE
+
+AC_C_RESTRICT
+
+GMP_C_STDARG
+GMP_C_ATTRIBUTE_CONST
+GMP_C_ATTRIBUTE_MALLOC
+GMP_C_ATTRIBUTE_MODE
+GMP_C_ATTRIBUTE_NORETURN
+
+GMP_H_EXTERN_INLINE
+
+# from libtool
+AC_CHECK_LIBM
+AC_SUBST(LIBM)
+
+GMP_FUNC_ALLOCA
+GMP_OPTION_ALLOCA
+
+GMP_H_HAVE_FILE
+
+AC_C_BIGENDIAN(
+  [AC_DEFINE(HAVE_LIMB_BIG_ENDIAN, 1)
+   GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_BIG_ENDIAN')", POST)],
+  [AC_DEFINE(HAVE_LIMB_LITTLE_ENDIAN, 1)
+   GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_LITTLE_ENDIAN')", POST)
+  ], [:])
+AH_VERBATIM([HAVE_LIMB],
+[/* Define one of these to 1 for the endianness of `mp_limb_t'.
+   If the endianness is not a simple big or little, or you don't know what
+   it is, then leave both undefined. */
+#undef HAVE_LIMB_BIG_ENDIAN
+#undef HAVE_LIMB_LITTLE_ENDIAN])
+
+GMP_C_DOUBLE_FORMAT
+
+
+# Reasons for testing:
+#   alarm - not in mingw
+#   attr_get - IRIX specific
+#   clock_gettime - not in glibc 2.2.4, only very recent systems
+#   cputime - not in glibc
+#   getsysinfo - OSF specific
+#   getrusage - not in mingw
+#   gettimeofday - not in mingw
+#   mmap - not in mingw, djgpp
+#   nl_langinfo - X/Open standard only, not in djgpp for instance
+#   obstack_vprintf - glibc specific
+#   processor_info - solaris specific
+#   pstat_getprocessor - HPUX specific (10.x and up)
+#   raise - an ANSI-ism, though probably almost universal by now
+#   read_real_time - AIX specific
+#   sigaction - not in mingw
+#   sigaltstack - not in mingw, or old AIX (reputedly)
+#   sigstack - not in mingw
+#   strerror - not in SunOS
+#   strnlen - glibc extension (some other systems too)
+#   syssgi - IRIX specific
+#   times - not in mingw
+#
+# clock_gettime is in librt on *-*-osf5.1.  We could look for it
+# there, but that's not worth bothering with unless it has a decent
+# resolution (in a quick test clock_getres said only 1 millisecond).
+#
+# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
+# replacement setups it gives.  It detects a faulty strnlen on AIX, but
+# missing out on that test is ok since our only use of strnlen is in
+# __gmp_replacement_vsnprintf which is not required on AIX since it has a
+# vsnprintf.
+#
+AC_CHECK_FUNCS(alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times)
+
+GMP_FUNC_VSNPRINTF
+GMP_FUNC_SSCANF_WRITABLE_INPUT
+
+# Reasons for checking:
+#   pst_processor psp_iticksperclktick - not in hpux 9
+#
+AC_CHECK_MEMBER(struct pst_processor.psp_iticksperclktick,
+                [AC_DEFINE(HAVE_PSP_ITICKSPERCLKTICK, 1,
+[Define to 1 if <sys/pstat.h> `struct pst_processor' exists
+and contains `psp_iticksperclktick'.])],,
+                [#include <sys/pstat.h>])
+
+# C++ tests, when required
+#
+if test $enable_cxx = yes; then
+  AC_LANG_PUSH(C++)
+
+  # Reasons for testing:
+  #   <sstream> - not in g++ 2.95.2
+  #   std::locale - not in g++ 2.95.4
+  #
+  AC_CHECK_HEADERS([sstream])
+  AC_CHECK_TYPES([std::locale],,,[#include <locale>])
+
+  AC_LANG_POP(C++)
+fi
+
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension.  Certain multi-function files with special names
+# can provide some functions too.  (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# Note: $gmp_mpn_functions must have mod_1 before pre_mod_1 so the former
+#       can optionally provide the latter as an extra entrypoint.  Likewise
+#       divrem_1 and pre_divrem_1.
+
+gmp_mpn_functions_optional="umul udiv                                  \
+  invert_limb sqr_diagonal                                             \
+  mul_2 mul_3 mul_4                                                    \
+  addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8       \
+  addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n                    \
+  addlsh2_n sublsh2_n rsblsh2_n                                                \
+  addlsh_n sublsh_n rsblsh_n                                           \
+  add_n_sub_n addaddmul_1msb0"
+
+gmp_mpn_functions="$extra_functions                                       \
+  add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1                  \
+  submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2     \
+  fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump            \
+  mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc                                 \
+  mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul          \
+  random random2 pow_1                                                    \
+  rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp        \
+  perfsqr perfpow                                                         \
+  gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step                    \
+  gcdext_lehmer gcdext_subdiv_step                                        \
+  div_q tdiv_qr jacbase get_d                                             \
+  matrix22_mul hgcd2 hgcd mullo_n mullo_basecase                          \
+  toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul                  \
+  toom33_mul toom43_mul toom53_mul toom63_mul                             \
+  toom44_mul                                                              \
+  toom6h_mul toom6_sqr toom8h_mul toom8_sqr                               \
+  toom_couple_handling                                                    \
+  toom2_sqr toom3_sqr toom4_sqr                                                   \
+  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2                                   \
+  toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp          \
+  toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts       \
+  toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts     \
+  invertappr invert binvert mulmod_bnm1 sqrmod_bnm1                       \
+  sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q                                \
+  dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q                                \
+  mu_div_qr mu_divappr_q mu_div_q                                         \
+  bdiv_q_1                                                                \
+  sbpi1_bdiv_q sbpi1_bdiv_qr                                              \
+  dcpi1_bdiv_q dcpi1_bdiv_qr                                              \
+  mu_bdiv_q mu_bdiv_qr                                                    \
+  bdiv_q bdiv_qr                                                          \
+  divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n           \
+  redc_1_sec trialdiv remove                                              \
+  and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n                    \
+  copyi copyd zero                                                        \
+  $gmp_mpn_functions_optional"
+
+define(GMP_MULFUNC_CHOICES,
+[# functions that can be provided by multi-function files
+tmp_mulfunc=
+case $tmp_fn in
+  add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
+  addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+  popcount|hamdist)  tmp_mulfunc="popham"    ;;
+  and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+                     tmp_mulfunc="logops_n"  ;;
+  lshift|rshift)     tmp_mulfunc="lorrshift";;
+  addlsh1_n)
+                    tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
+  sublsh1_n)
+                    tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
+  rsblsh1_n)
+                    tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
+  addlsh2_n)
+                    tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
+  sublsh2_n)
+                    tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
+  rsblsh2_n)
+                    tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
+  addlsh_n)
+                    tmp_mulfunc="aorslsh_n aorrlsh_n";;
+  sublsh_n)
+                    tmp_mulfunc="aorslsh_n sorrlsh_n";;
+  rsblsh_n)
+                    tmp_mulfunc="aorrlsh_n sorrlsh_n";;
+  rsh1add_n|rsh1sub_n)
+                    tmp_mulfunc="rsh1aors_n";;
+esac
+])
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp=
+
+# links from the sources, to be removed by "make distclean"
+gmp_srclinks=
+
+
+# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn
+# build directory.  If $srcdir is relative then we use a relative path too,
+# so the two trees can be moved together.
+case $srcdir in
+  [[\\/]* | ?:[\\/]*])  # absolute, as per autoconf
+    mpn_relative_top_srcdir=$srcdir ;;
+  *)                    # relative
+    mpn_relative_top_srcdir=../$srcdir ;;
+esac
+
+
+define(MPN_SUFFIXES,[asm S s c])
+
+dnl  Usage: GMP_FILE_TO_FUNCTION_BASE(func,file)
+dnl
+dnl  Set $func to the function base name for $file, eg. dive_1 gives
+dnl  divexact_1.
+dnl
+define(GMP_FILE_TO_FUNCTION,
+[case $$2 in
+  dive_1)      $1=divexact_1 ;;
+  diveby3)     $1=divexact_by3c ;;
+  pre_divrem_1) $1=preinv_divrem_1 ;;
+  mode1o)      $1=modexact_1c_odd ;;
+  pre_mod_1)   $1=preinv_mod_1 ;;
+  *)           $1=$$2 ;;
+esac
+])
+
+# Fat binary setups.
+#
+# We proceed through each $fat_path directory, and look for $fat_function
+# routines there.  Those found are incorporated in the build by generating a
+# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with
+# suitable function renaming, and adding that to $mpn_objects (the same as a
+# normal mpn file).
+#
+# fat.h is generated with macros to let internal calls to each $fat_function
+# go directly through __gmpn_cpuvec, plus macros and declarations helping to
+# setup that structure, on a per-directory basis ready for
+# mpn/<cpu>/fat/fat.c.
+#
+# fat.h includes thresholds listed in $fat_thresholds, extracted from
+# gmp-mparam.h in each directory.  An overall maximum for each threshold is
+# established, for use in making fixed size arrays of temporary space.
+# (Eg. MUL_TOOM33_THRESHOLD_LIMIT used by mpn/generic/mul.c.)
+#
+# It'd be possible to do some of this manually, but when there's more than a
+# few functions and a few directories it becomes very tedious, and very
+# prone to having some routine accidentally omitted.  On that basis it seems
+# best to automate as much as possible, even if the code to do so is a bit
+# ugly.
+#
+
+if test -n "$fat_path"; then
+  # Usually the mpn build directory is created with mpn/Makefile
+  # instantiation, but we want to write to it sooner.
+  mkdir mpn 2>/dev/null
+
+  echo "/* fat.h - setups for fat binaries." >fat.h
+  echo "   Generated by configure - DO NOT EDIT.  */" >>fat.h
+
+  AC_DEFINE(WANT_FAT_BINARY, 1, [Define to 1 when building a fat binary.])
+  GMP_DEFINE(WANT_FAT_BINARY, yes)
+
+  # Don't want normal copies of fat functions
+  for tmp_fn in $fat_functions; do
+    GMP_REMOVE_FROM_LIST(gmp_mpn_functions, $tmp_fn)
+    GMP_REMOVE_FROM_LIST(gmp_mpn_functions_optional, $tmp_fn)
+  done
+
+  for tmp_fn in $fat_functions; do
+    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+    echo "
+#ifndef OPERATION_$tmp_fn
+#undef  mpn_$tmp_fbase
+#define mpn_$tmp_fbase  (*__gmpn_cpuvec.$tmp_fbase)
+#endif
+DECL_$tmp_fbase (__MPN(${tmp_fbase}_init));" >>fat.h
+    # encourage various macros to use fat functions
+    AC_DEFINE_UNQUOTED(HAVE_NATIVE_mpn_$tmp_fbase)
+  done
+
+  echo "" >>fat.h
+  echo "/* variable thresholds */" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    echo "#undef  $tmp_tn" >>fat.h
+    echo "#define $tmp_tn  CPUVEC_THRESHOLD (`echo $tmp_tn | tr [A-Z] [a-z]`)" >>fat.h
+  done
+
+  echo "
+/* Copy all fields into __gmpn_cpuvec.
+   memcpy is not used because it might operate byte-wise (depending on its
+   implementation), and we need the function pointer writes to be atomic.
+   "volatile" discourages the compiler from trying to optimize this.  */
+#define CPUVEC_INSTALL(vec) \\
+  do { \\
+    volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\" >>fat.h
+  for tmp_fn in $fat_functions; do
+    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+    echo "    p->$tmp_fbase = vec.$tmp_fbase; \\" >>fat.h
+  done
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+    echo "    p->$tmp_field_name = vec.$tmp_field_name; \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  echo "
+/* A helper to check all fields are filled. */
+#define ASSERT_CPUVEC(vec) \\
+  do { \\" >>fat.h
+  for tmp_fn in $fat_functions; do
+    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+    echo "    ASSERT (vec.$tmp_fbase != NULL); \\" >>fat.h
+  done
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+    echo "    ASSERT (vec.$tmp_field_name != 0); \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  echo "
+/* Call ITERATE(field) for each fat threshold field. */
+#define ITERATE_FAT_THRESHOLDS() \\
+  do { \\" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+    echo "    ITERATE ($tmp_tn, $tmp_field_name); \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  for tmp_dir in $fat_path; do
+    CPUVEC_SETUP=
+    THRESH_ASM_SETUP=
+    echo "" >>fat.h
+    GMP_FAT_SUFFIX(tmp_suffix, $tmp_dir)
+
+    # In order to keep names unique on a DOS 8.3 filesystem, use a prefix
+    # (rather than a suffix) for the generated file names, and abbreviate.
+    case $tmp_suffix in
+      pentium)       tmp_prefix=p   ;;
+      pentium_mmx)   tmp_prefix=pm  ;;
+      p6_mmx)        tmp_prefix=p2  ;;
+      p6_p3mmx)      tmp_prefix=p3  ;;
+      pentium4)      tmp_prefix=p4  ;;
+      pentium4_mmx)  tmp_prefix=p4m ;;
+      pentium4_sse2) tmp_prefix=p4s ;;
+      k6_mmx)        tmp_prefix=k6m ;;
+      k6_k62mmx)     tmp_prefix=k62 ;;
+      k7_mmx)        tmp_prefix=k7m ;;
+      *)             tmp_prefix=$tmp_suffix ;;
+    esac
+
+    # Extract desired thresholds from gmp-mparam.h file in this directory,
+    # if present.
+    tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h
+    if test -f $tmp_mparam; then
+      for tmp_tn in $fat_thresholds; do
+        tmp_thresh=`sed -n "s/^#define $tmp_tn[        ]*\\([0-9][0-9]*\\).*$/\\1/p" $tmp_mparam`
+        if test -n "$tmp_thresh"; then
+          THRESH_ASM_SETUP=["${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)
+"]
+          CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.`echo $tmp_tn | tr [[A-Z]] [[a-z]]` = $tmp_thresh; \\
+"
+          eval tmp_limit=\$${tmp_tn}_LIMIT
+          if test -z "$tmp_limit"; then
+            tmp_limit=0
+          fi
+          if test $tmp_thresh -gt $tmp_limit; then
+            eval ${tmp_tn}_LIMIT=$tmp_thresh
+          fi
+        fi
+      done
+    fi
+
+    for tmp_fn in $fat_functions; do
+      GMP_MULFUNC_CHOICES
+
+      for tmp_base in $tmp_fn $tmp_mulfunc; do
+        for tmp_ext in MPN_SUFFIXES; do
+          tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+          if test -f $tmp_file; then
+
+            mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
+            mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
+
+            GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+
+            # carry-in variant, eg. divrem_1c or modexact_1c_odd
+            case $tmp_fbase in
+              *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;
+              *)    tmp_fbasec=${tmp_fbase}c ;;
+            esac
+
+            # Create a little file doing an include from srcdir.  The
+            # OPERATION and renamings aren't all needed all the time, but
+            # they don't hurt if unused.
+            #
+            # FIXME: Should generate these via config.status commands.
+            # Would need them all in one AC_CONFIG_COMMANDS though, since
+            # that macro doesn't accept a set of separate commands generated
+            # by shell code.
+            #
+            case $tmp_ext in
+              asm)
+                # hide the d-n-l from autoconf's error checking
+                tmp_d_n_l=d""nl
+                echo ["$tmp_d_n_l  mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+$tmp_d_n_l  Generated by configure - DO NOT EDIT.
+
+define(OPERATION_$tmp_fn)
+define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
+define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
+define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
+
+$tmp_d_n_l  For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
+ifdef(\`__gmpn_modexact_1_odd',,
+\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')
+
+$THRESH_ASM_SETUP
+include][($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
+"] >mpn/${tmp_prefix}_$tmp_fn.asm
+                ;;
+              c)
+                echo ["/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+   Generated by configure - DO NOT EDIT. */
+
+#define OPERATION_$tmp_fn 1
+#define __gmpn_$tmp_fbase           __gmpn_${tmp_fbase}_$tmp_suffix
+#define __gmpn_$tmp_fbasec          __gmpn_${tmp_fbasec}_${tmp_suffix}
+#define __gmpn_preinv_${tmp_fbase}  __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
+
+#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
+"] >mpn/${tmp_prefix}_$tmp_fn.c
+                ;;
+            esac
+
+            # Prototype, and append to CPUVEC_SETUP for this directory.
+            echo "DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);" >>fat.h
+            CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\
+"
+            # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).
+            if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
+              echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
+              CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
+"
+            fi
+          fi
+        done
+      done
+    done
+
+    # Emit CPUVEC_SETUP for this directory
+    echo "" >>fat.h
+    echo "#define CPUVEC_SETUP_$tmp_suffix \\" >>fat.h
+    echo "  do { \\" >>fat.h
+    echo "$CPUVEC_SETUP  } while (0)" >>fat.h
+  done
+
+  # Emit threshold limits
+  echo "" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    eval tmp_limit=\$${tmp_tn}_LIMIT
+    echo "#define ${tmp_tn}_LIMIT  $tmp_limit" >>fat.h
+  done
+fi
+
+
+# Normal binary setups.
+#
+
+for tmp_ext in MPN_SUFFIXES; do
+  eval found_$tmp_ext=no
+done
+
+for tmp_fn in $gmp_mpn_functions; do
+  for tmp_ext in MPN_SUFFIXES; do
+    test "$no_create" = yes || rm -f mpn/$tmp_fn.$tmp_ext
+  done
+
+  # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise
+  # mpn_preinv_mod_1 by mod_1.asm.
+  case $tmp_fn in
+  pre_divrem_1)
+    if test "$HAVE_NATIVE_mpn_preinv_divrem_1" = yes; then continue; fi ;;
+  pre_mod_1)
+    if test "$HAVE_NATIVE_mpn_preinv_mod_1" = yes; then continue; fi ;;
+  esac
+
+  GMP_MULFUNC_CHOICES
+
+  found=no
+  for tmp_dir in $path; do
+    for tmp_base in $tmp_fn $tmp_mulfunc; do
+      for tmp_ext in MPN_SUFFIXES; do
+        tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+        if test -f $tmp_file; then
+
+          # For a nails build, check if the file supports our nail bits.
+          # Generic code always supports all nails.
+          #
+          # FIXME: When a multi-function file is selected to provide one of
+          # the nails-neutral routines, like logops_n for and_n, the
+          # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for
+          # all functions in that file, even if they haven't all been
+          # nailified.  Not sure what to do about this, it's only really a
+          # problem for logops_n, and it's not too terrible to insist those
+          # get nailified always.
+          #
+          if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then
+            case $tmp_fn in
+              and_n | ior_n | xor_n | andn_n | \
+              copyi | copyd | \
+              popcount | hamdist | \
+              udiv | udiv_w_sdiv | umul | \
+              cntlz | invert_limb)
+                # these operations are either unaffected by nails or defined
+                # to operate on full limbs
+                ;;
+              *)
+                nails=[`sed -n 's/^[   ]*NAILS_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+                for n in $nails; do
+                  case $n in
+                  *-*)
+                    n_start=`echo "$n" | sed -n 's/\(.*\)-.*/\1/p'`
+                    n_end=`echo "$n" | sed -n 's/.*-\(.*\)/\1/p'`
+                    ;;
+                  *)
+                    n_start=$n
+                    n_end=$n
+                    ;;
+                  esac
+                  if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then
+                    found=yes
+                    break
+                  fi
+                done
+                if test $found != yes; then
+                  continue
+                fi
+                ;;
+            esac
+          fi
+
+          found=yes
+          eval found_$tmp_ext=yes
+
+          if test $tmp_ext = c; then
+            tmp_u='$U'
+          else
+            tmp_u=
+          fi
+
+          mpn_objects="$mpn_objects $tmp_fn$tmp_u.lo"
+          mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo"
+          AC_CONFIG_LINKS(mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext)
+          gmp_srclinks="$gmp_srclinks mpn/$tmp_fn.$tmp_ext"
+
+          # Duplicate AC_DEFINEs are harmless, so it doesn't matter
+          # that multi-function files get grepped here repeatedly.
+          # The PROLOGUE pattern excludes the optional second parameter.
+          gmp_ep=[`
+            sed -n 's/^[       ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+            sed -n 's/^[       ]*PROLOGUE(\([^,]*\).*)/\1/p' $tmp_file
+          `]
+          for gmp_tmp in $gmp_ep; do
+            AC_DEFINE_UNQUOTED(HAVE_NATIVE_$gmp_tmp)
+            eval HAVE_NATIVE_$gmp_tmp=yes
+          done
+
+          case $tmp_fn in
+          sqr_basecase) sqr_basecase_source=$tmp_file ;;
+          esac
+
+          break
+        fi
+      done
+      if test $found = yes; then break ; fi
+    done
+    if test $found = yes; then break ; fi
+  done
+
+  if test $found = no; then
+    for tmp_optional in $gmp_mpn_functions_optional; do
+      if test $tmp_optional = $tmp_fn; then
+        found=yes
+      fi
+    done
+    if test $found = no; then
+      AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
+    fi
+  fi
+done
+
+# All cycle counters are .asm files currently
+if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+  found_asm=yes
+fi
+
+dnl  The following list only needs to have templates for those defines which
+dnl  are going to be tested by the code, there's no need to have every
+dnl  possible mpn routine.
+
+AH_VERBATIM([HAVE_NATIVE],
+[/* Define to 1 each of the following for which a native (ie. CPU specific)
+    implementation of the corresponding routine exists.  */
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_n_sub_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addlsh1_n
+#undef HAVE_NATIVE_mpn_addlsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addmul_2
+#undef HAVE_NATIVE_mpn_addmul_3
+#undef HAVE_NATIVE_mpn_addmul_4
+#undef HAVE_NATIVE_mpn_addmul_5
+#undef HAVE_NATIVE_mpn_addmul_6
+#undef HAVE_NATIVE_mpn_addmul_7
+#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdiv_dbm1c
+#undef HAVE_NATIVE_mpn_bdiv_q_1
+#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#undef HAVE_NATIVE_mpn_com
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_divexact_1
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_lshiftc
+#undef HAVE_NATIVE_mpn_lshsub_n
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1_1p
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mod_1s_2p
+#undef HAVE_NATIVE_mpn_mod_1s_4p
+#undef HAVE_NATIVE_mpn_mod_34lsub1
+#undef HAVE_NATIVE_mpn_modexact_1_odd
+#undef HAVE_NATIVE_mpn_modexact_1c_odd
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_2
+#undef HAVE_NATIVE_mpn_mul_3
+#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_divrem_1
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_redc_1
+#undef HAVE_NATIVE_mpn_redc_2
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1add_nc
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsh1sub_nc
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n
+#undef HAVE_NATIVE_mpn_sublsh2_n
+#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_umul_ppmm_r
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n])
+
+
+# Don't demand an m4 unless it's actually needed.
+if test $found_asm = yes; then
+  GMP_PROG_M4
+  GMP_M4_M4WRAP_SPURIOUS
+# else
+# It's unclear why this m4-not-needed stuff was ever done.
+#  if test -z "$M4" ; then
+#    M4=m4-not-needed
+#  fi
+fi
+
+# Only do the GMP_ASM checks if there's a .S or .asm wanting them.
+if test $found_asm = no && test $found_S = no; then
+  gmp_asm_syntax_testing=no
+fi
+
+if test "$gmp_asm_syntax_testing" != no; then
+  GMP_ASM_TEXT
+  GMP_ASM_DATA
+  GMP_ASM_LABEL_SUFFIX
+  GMP_ASM_GLOBL
+  GMP_ASM_GLOBL_ATTR
+  GMP_ASM_UNDERSCORE
+  GMP_ASM_RODATA
+  GMP_ASM_TYPE
+  GMP_ASM_SIZE
+  GMP_ASM_LSYM_PREFIX
+  GMP_ASM_W32
+  GMP_ASM_ALIGN_LOG
+
+  case $host in
+    hppa*-*-*)
+      # for both pa32 and pa64
+      GMP_INCLUDE_MPN(pa32/pa-defs.m4)
+      ;;
+    IA64_PATTERN)
+      GMP_ASM_IA64_ALIGN_OK
+      ;;
+    M68K_PATTERN)
+      GMP_ASM_M68K_INSTRUCTION
+      GMP_ASM_M68K_ADDRESSING
+      GMP_ASM_M68K_BRANCHES
+      ;;
+    [powerpc*-*-* | power[3-9]-*-*])
+      GMP_ASM_POWERPC_PIC_ALWAYS
+      GMP_ASM_POWERPC_R_REGISTERS
+      GMP_INCLUDE_MPN(powerpc32/powerpc-defs.m4)
+      case $host in
+        *-*-aix*)
+         case $ABI in
+           64 | aix64)  GMP_INCLUDE_MPN(powerpc64/aix.m4) ;;
+            *)           GMP_INCLUDE_MPN(powerpc32/aix.m4) ;;
+          esac
+          ;;
+        *-*-linux* | *-*-*bsd*)
+         case $ABI in
+           mode64)      GMP_INCLUDE_MPN(powerpc64/elf.m4) ;;
+           mode32 | 32) GMP_INCLUDE_MPN(powerpc32/elf.m4) ;;
+          esac
+          ;;
+        *-*-darwin*)
+         case $ABI in
+           mode64)      GMP_INCLUDE_MPN(powerpc64/darwin.m4) ;;
+           mode32 | 32) GMP_INCLUDE_MPN(powerpc32/darwin.m4) ;;
+          esac
+          ;;
+        *)
+         # Assume unrecognized operating system is the powerpc eABI
+          GMP_INCLUDE_MPN(powerpc32/eabi.m4)
+         ;;
+      esac
+      ;;
+    power*-*-aix*)
+      GMP_INCLUDE_MPN(powerpc32/aix.m4)
+      ;;
+    sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
+      case $ABI in
+        64)
+          GMP_ASM_SPARC_REGISTER
+          ;;
+      esac
+      ;;
+    X86_PATTERN | X86_64_PATTERN)
+      GMP_ASM_ALIGN_FILL_0x90
+      case $ABI in
+        32)
+          GMP_INCLUDE_MPN(x86/x86-defs.m4)
+          AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86)
+          GMP_ASM_COFF_TYPE
+          GMP_ASM_X86_GOT_UNDERSCORE
+          GMP_ASM_X86_SHLDL_CL
+         case $enable_profiling in
+           prof | gprof)  GMP_ASM_X86_MCOUNT ;;
+         esac
+         case $host in
+           *-*-darwin*)
+             GMP_INCLUDE_MPN(x86/darwin.m4) ;;
+         esac
+          ;;
+        64)
+          GMP_INCLUDE_MPN(x86_64/x86_64-defs.m4)
+          AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86_64)
+         case $host in
+           *-*-darwin*)
+             GMP_INCLUDE_MPN(x86_64/darwin.m4) ;;
+         esac
+          ;;
+      esac
+      ;;
+  esac
+fi
+
+# For --enable-minithres, prepend "minithres" to path so that its special
+# gmp-mparam.h will be used.
+if test $enable_minithres = yes; then
+  path="minithres $path"
+fi
+
+# Create link for gmp-mparam.h.
+gmp_mparam_source=
+for gmp_mparam_dir in $path; do
+  test "$no_create" = yes || rm -f gmp-mparam.h
+  tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h
+  if test -f $tmp_file; then
+    AC_CONFIG_LINKS(gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h)
+    gmp_srclinks="$gmp_srclinks gmp-mparam.h"
+    gmp_mparam_source=$tmp_file
+    break
+  fi
+done
+if test -z "$gmp_mparam_source"; then
+  AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path])
+fi
+
+# For a helpful message from tune/tuneup.c
+gmp_mparam_suggest=$gmp_mparam_source
+if test "$gmp_mparam_dir" = generic; then
+  for i in $path; do break; done
+  if test "$i" != generic; then
+    gmp_mparam_suggest="new file $srcdir/mpn/$i/gmp-mparam.h"
+  fi
+fi
+AC_DEFINE_UNQUOTED(GMP_MPARAM_H_SUGGEST, "$gmp_mparam_source",
+[The gmp-mparam.h file (a string) the tune program should suggest updating.])
+
+
+# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
+# Some versions of sqr_basecase.asm use this.
+# Fat binaries do this on a per-file basis, so skip in that case.
+#
+if test -z "$fat_path"; then
+  tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[     ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+  if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+    GMP_DEFINE_RAW(["define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
+  fi
+fi
+
+
+# Sizes of some types, needed at preprocessing time.
+#
+# FIXME: The assumption that GMP_LIMB_BITS is 8*sizeof(mp_limb_t) might
+# be slightly rash, but it's true everywhere we know of and ought to be true
+# of any sensible system.  In a generic C build, grepping LONG_BIT out of
+# <limits.h> might be an alternative, for maximum portability.
+#
+AC_CHECK_SIZEOF(void *)
+AC_CHECK_SIZEOF(unsigned short)
+AC_CHECK_SIZEOF(unsigned)
+AC_CHECK_SIZEOF(unsigned long)
+AC_CHECK_SIZEOF(mp_limb_t, , GMP_INCLUDE_GMP_H)
+if test "$ac_cv_sizeof_mp_limb_t" = 0; then
+  AC_MSG_ERROR([Oops, mp_limb_t doesn't seem to work])
+fi
+AC_SUBST(GMP_LIMB_BITS, `expr 8 \* $ac_cv_sizeof_mp_limb_t`)
+GMP_DEFINE_RAW(["define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)"])
+
+# Check compiler limb size matches gmp-mparam.h
+#
+# FIXME: Some of the cycle counter objects in the tune directory depend on
+# the size of ulong, it'd be possible to check that here, though a mismatch
+# probably wouldn't want to be fatal, none of the libgmp assembler code
+# depends on ulong.
+#
+mparam_bits=[`sed -n 's/^#define GMP_LIMB_BITS[        ][      ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`]
+if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
+  if test "$test_CFLAGS" = set; then
+    AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.
+You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
+intended ABI, see "ABI and ISA" in the manual.])
+  else
+    AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.])
+  fi
+fi
+
+GMP_DEFINE_RAW(["define(<GMP_LIMB_BITS>,$GMP_LIMB_BITS)"])
+GMP_DEFINE_RAW(["define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)"])
+GMP_DEFINE_RAW(["define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))"])
+
+
+# Exclude the mpn random functions from mpbsd since that would drag in the
+# top-level rand things, all of which are unnecessary for libmp.  There's
+# other unnecessary objects too actually, if we could be bothered figuring
+# out exactly which they are.
+#
+mpn_objs_in_libmp=
+for i in $mpn_objs_in_libgmp; do
+  case $i in
+  *random*) ;;
+  *) mpn_objs_in_libmp="$mpn_objs_in_libmp $i" ;;
+  esac
+done
+AC_SUBST(mpn_objs_in_libmp)
+
+AC_SUBST(mpn_objects)
+AC_SUBST(mpn_objs_in_libgmp)
+AC_SUBST(gmp_srclinks)
+
+
+# A recompiled sqr_basecase for use in the tune program, if necessary.
+TUNE_SQR_OBJ=
+test -d tune || mkdir tune
+case $sqr_basecase_source in
+  *.asm)
+    sqr_max=[`sed -n 's/^def...(SQR_TOOM2_THRESHOLD_MAX, *\([0-9]*\))/\1/p' $sqr_basecase_source`]
+    if test -n "$sqr_max"; then
+      TUNE_SQR_OBJ=sqr_asm.o
+      AC_DEFINE_UNQUOTED(TUNE_SQR_TOOM2_MAX,$sqr_max,
+      [Maximum size the tune program can test for SQR_TOOM2_THRESHOLD])
+    fi
+    cat >tune/sqr_basecase.c <<EOF
+/* not sure that an empty file can compile, so put in a dummy */
+int sqr_basecase_dummy;
+EOF
+    ;;
+  *.c)
+    TUNE_SQR_OBJ=
+    AC_DEFINE(TUNE_SQR_TOOM2_MAX,SQR_TOOM2_MAX_GENERIC)
+    cat >tune/sqr_basecase.c <<EOF
+#define TUNE_PROGRAM_BUILD 1
+#define TUNE_PROGRAM_BUILD_SQR 1
+#include "mpn/sqr_basecase.c"
+EOF
+    ;;
+esac
+AC_SUBST(TUNE_SQR_OBJ)
+
+
+# Configs for demos/pexpr.c.
+#
+AC_CONFIG_FILES(demos/pexpr-config.h:demos/pexpr-config-h.in)
+GMP_SUBST_CHECK_FUNCS(clock, cputime, getrusage, gettimeofday, sigaction, sigaltstack, sigstack)
+GMP_SUBST_CHECK_HEADERS(sys/resource.h)
+AC_CHECK_TYPES([stack_t], HAVE_STACK_T_01=1, HAVE_STACK_T_01=0,
+               [#include <signal.h>])
+AC_SUBST(HAVE_STACK_T_01)
+
+# Configs for demos/calc directory
+#
+# AC_SUBST+AC_CONFIG_FILES is used for calc-config.h, rather than AC_DEFINE+
+# AC_CONFIG_HEADERS, since with the latter automake (1.8) will then put the
+# directory (ie. demos/calc) into $(DEFAULT_INCLUDES) for every Makefile.in,
+# which would look very strange.
+#
+# -lcurses is required by libreadline.  On a typical SVR4 style system this
+# normally doesn't have to be given explicitly, since libreadline.so will
+# have a NEEDED record for it.  But if someone for some reason is using only
+# a static libreadline.a then we must give -lcurses.  Readline (as of
+# version 4.3) doesn't use libtool, so we can't rely on a .la to cover
+# necessary dependencies.
+#
+# On a couple of systems we've seen libreadline available, but the headers
+# not in the default include path, so check for readline/readline.h.  We've
+# also seen readline/history.h missing, not sure if that's just a broken
+# install or a very old version, but check that too.
+#
+AC_CONFIG_FILES(demos/calc/calc-config.h:demos/calc/calc-config-h.in)
+LIBCURSES=
+if test $with_readline != no; then
+  AC_CHECK_LIB(ncurses, tputs, [LIBCURSES=-lncurses],
+    [AC_CHECK_LIB(curses, tputs, [LIBCURSES=-lcurses])])
+fi
+AC_SUBST(LIBCURSES)
+use_readline=$with_readline
+if test $with_readline = detect; then
+  use_readline=no
+  AC_CHECK_LIB(readline, readline,
+    [AC_CHECK_HEADER(readline/readline.h,
+      [AC_CHECK_HEADER(readline/history.h, use_readline=yes)])],
+    , $LIBCURSES)
+  AC_MSG_CHECKING(readline detected)
+  AC_MSG_RESULT($use_readline)
+fi
+if test $use_readline = yes; then
+  AC_SUBST(WITH_READLINE_01, 1)
+  AC_SUBST(LIBREADLINE, -lreadline)
+else
+  WITH_READLINE_01=0
+fi
+AC_PROG_YACC
+AM_PROG_LEX
+
+# Configs for demos/expr directory
+#
+# Libtool already runs an AC_CHECK_TOOL for ranlib, but we give
+# AC_PROG_RANLIB anyway since automake is supposed to complain if it's not
+# called.  (Automake 1.8.4 doesn't, at least not when the only library is in
+# an EXTRA_LIBRARIES.)
+#
+AC_PROG_RANLIB
+
+
+# Create config.m4.
+GMP_FINISH
+
+# Create Makefiles
+# FIXME: Upcoming version of autoconf/automake may not like broken lines.
+#        Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
+
+AC_OUTPUT(Makefile                                                     \
+  mpbsd/Makefile mpf/Makefile mpn/Makefile mpq/Makefile                        \
+  mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile             \
+  tests/Makefile tests/devel/Makefile tests/mpbsd/Makefile             \
+  tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile             \
+  tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile           \
+  tests/cxx/Makefile                                                   \
+  doc/Makefile tune/Makefile                                           \
+  demos/Makefile demos/calc/Makefile demos/expr/Makefile               \
+  gmp.h:gmp-h.in mp.h:mp-h.in)
diff --git a/cxx/Makefile.am b/cxx/Makefile.am

new file mode 100644 (file)

index 0000000..c4a42dc
--- /dev/null
+++ b/cxx/Makefile.am
@@ -0,0 +1,29 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMPXX -I$(top_srcdir)
+
+if WANT_CXX
+noinst_LTLIBRARIES = libcxx.la
+endif
+
+libcxx_la_SOURCES = \
+  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \
+  osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc
diff --git a/cxx/Makefile.in b/cxx/Makefile.in

new file mode 100644 (file)

index 0000000..a6e278f
--- /dev/null
+++ b/cxx/Makefile.in
@@ -0,0 +1,526 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = cxx
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libcxx_la_LIBADD =
+am_libcxx_la_OBJECTS = isfuns.lo ismpf.lo ismpq.lo ismpz.lo ismpznw.lo \
+       osdoprnti.lo osfuns.lo osmpf.lo osmpq.lo osmpz.lo
+libcxx_la_OBJECTS = $(am_libcxx_la_OBJECTS)
+@WANT_CXX_TRUE@am_libcxx_la_rpath =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libcxx_la_SOURCES)
+DIST_SOURCES = $(libcxx_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMPXX -I$(top_srcdir)
+@WANT_CXX_TRUE@noinst_LTLIBRARIES = libcxx.la
+libcxx_la_SOURCES = \
+  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \
+  osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps cxx/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps cxx/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libcxx.la: $(libcxx_la_OBJECTS) $(libcxx_la_DEPENDENCIES) 
+       $(CXXLINK) $(am_libcxx_la_rpath) $(libcxx_la_OBJECTS) $(libcxx_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.cc.o:
+       $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+       $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+       $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cxx/dummy.cc b/cxx/dummy.cc

new file mode 100644 (file)

index 0000000..00e470c
--- /dev/null
+++ b/cxx/dummy.cc
@@ -0,0 +1,22 @@
+/* Dummy file to make automake treat libgmpxx.la as C++.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* some compilers reputedly dislike completely empty files */
+typedef int  foo;
diff --git a/cxx/isfuns.cc b/cxx/isfuns.cc

new file mode 100644 (file)

index 0000000..e3ea5ae
--- /dev/null
+++ b/cxx/isfuns.cc
@@ -0,0 +1,105 @@
+/* Auxiliary functions for C++-style input of GMP types.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+int
+__gmp_istream_set_base (istream &i, char &c, bool &zero, bool &showbase)
+{
+  int base;
+
+  zero = showbase = false;
+  switch (i.flags() & ios::basefield)
+    {
+    case ios::dec:
+      base = 10;
+      break;
+    case ios::hex:
+      base = 16;
+      break;
+    case ios::oct:
+      base = 8;
+      break;
+    default:
+      showbase = true; // look for initial "0" or "0x" or "0X"
+      if (c == '0')
+       {
+         if (! i.get(c))
+           c = 0; // reset or we might loop indefinitely
+
+         if (c == 'x' || c == 'X')
+           {
+             base = 16;
+             i.get(c);
+           }
+         else
+           {
+             base = 8;
+             zero = true; // if no other digit is read, the "0" counts
+           }
+       }
+      else
+       base = 10;
+      break;
+    }
+
+  return base;
+}
+
+void
+__gmp_istream_set_digits (string &s, istream &i, char &c, bool &ok, int base)
+{
+  switch (base)
+    {
+    case 10:
+      while (isdigit(c))
+       {
+         ok = true; // at least a valid digit was read
+         s += c;
+         if (! i.get(c))
+           break;
+       }
+      break;
+    case 8:
+      while (isdigit(c) && c != '8' && c != '9')
+       {
+         ok = true; // at least a valid digit was read
+         s += c;
+         if (! i.get(c))
+           break;
+       }
+      break;
+    case 16:
+      while (isxdigit(c))
+       {
+         ok = true; // at least a valid digit was read
+         s += c;
+         if (! i.get(c))
+           break;
+       }
+      break;
+    }
+}
diff --git a/cxx/ismpf.cc b/cxx/ismpf.cc

new file mode 100644 (file)

index 0000000..bfe4dc8
--- /dev/null
+++ b/cxx/ismpf.cc
@@ -0,0 +1,134 @@
+/* operator>> -- C++-style input of mpf_t.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include <clocale>    // for localeconv
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_float
+// in include/bits/locale_facets.tcc.
+//
+// There are no plans to accept hex or octal floats, not unless the standard
+// C++ library does so.  Although such formats might be of use, it's
+// considered more important to be compatible with what the normal
+// operator>> does on "double"s etc.
+
+istream &
+operator>> (istream &i, mpf_ptr f)
+{
+  int base;
+  char c = 0;
+  string s;
+  bool ok = false;
+
+  // C decimal point, as expected by mpf_set_str
+  const char *lconv_point = localeconv()->decimal_point;
+
+  // C++ decimal point
+#if HAVE_STD__LOCALE
+  const locale& loc = i.getloc();
+  char point_char = use_facet< numpunct<char> >(loc).decimal_point();
+#else
+  const char *point = lconv_point;
+  char point_char = *point;
+#endif
+
+  i.get(c); // start reading
+
+  if (i.flags() & ios::skipws) // skip initial whitespace
+    {
+      // C++ isspace
+#if HAVE_STD__LOCALE
+      const ctype<char>& ct = use_facet< ctype<char> >(loc);
+#define cxx_isspace(c)  (ct.is(ctype_base::space,(c)))
+#else
+#define cxx_isspace(c)  isspace(c)
+#endif
+
+      while (cxx_isspace(c) && i.get(c))
+        ;
+    }
+
+  if (c == '-' || c == '+') // sign
+    {
+      if (c == '-')
+       s = "-";
+      i.get(c);
+    }
+
+  base = 10;
+  __gmp_istream_set_digits(s, i, c, ok, base); // read the number
+
+  // look for the C++ radix point, but put the C one in for mpf_set_str
+  if (c == point_char)
+    {
+#if HAVE_STD__LOCALE
+      i.get(c);
+#else // lconv point can be multi-char
+      for (;;)
+        {
+          i.get(c);
+          point++;
+          if (*point == '\0')
+            break;
+          if (c != *point)
+            goto fail;
+        }
+#endif
+      s += lconv_point;
+      __gmp_istream_set_digits(s, i, c, ok, base); // read the mantissa
+    }
+
+  if (ok && (c == 'e' || c == 'E')) // exponent
+    {
+      s += c;
+      i.get(c);
+      ok = false; // exponent is mandatory
+
+      if (c == '-' || c == '+') // sign
+       {
+         s += c;
+         i.get(c);
+       }
+
+      __gmp_istream_set_digits(s, i, c, ok, base); // read the exponent
+    }
+
+  if (i.good()) // last character read was non-numeric
+    i.putback(c);
+  else if (i.eof() && ok) // stopped just before eof
+    i.clear();
+
+  if (ok)
+    ASSERT_NOCARRY (mpf_set_str(f, s.c_str(), base)); // extract the number
+  else
+    {
+    fail:
+      i.setstate(ios::failbit); // read failed
+    }
+
+  return i;
+}
diff --git a/cxx/ismpq.cc b/cxx/ismpq.cc

new file mode 100644 (file)

index 0000000..23eec76
--- /dev/null
+++ b/cxx/ismpq.cc
@@ -0,0 +1,56 @@
+/* operator>> -- C++-style input of mpq_t.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+istream &
+operator>> (istream &i, mpq_ptr q)
+{
+  if (! (i >> mpq_numref(q)))
+    return i;
+
+  char  c = 0;
+  i.get(c); // start reading
+
+  if (c == '/')
+    {
+      // skip slash, read denominator
+      i.get(c);
+      return __gmpz_operator_in_nowhite (i, mpq_denref(q), c);
+    }
+  else
+    {
+      // no denominator, set 1
+      q->_mp_den._mp_size = 1;
+      q->_mp_den._mp_d[0] = 1;
+      if (i.good())
+        i.putback(c);
+      else if (i.eof())
+        i.clear();
+    }
+
+  return i;
+}
diff --git a/cxx/ismpz.cc b/cxx/ismpz.cc

new file mode 100644 (file)

index 0000000..3afa836
--- /dev/null
+++ b/cxx/ismpz.cc
@@ -0,0 +1,52 @@
+/* operator>> -- C++-style input of mpz_t.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_int in
+// include/bits/locale_facets.tcc.
+
+istream &
+operator>> (istream &i, mpz_ptr z)
+{
+  char c = 0;
+  i.get(c); // start reading
+
+  if (i.flags() & ios::skipws) // skip initial whitespace
+    {
+#if HAVE_STD__LOCALE
+      const ctype<char>& ct = use_facet< ctype<char> >(i.getloc());
+#define cxx_isspace(c)  (ct.is(ctype_base::space,(c)))
+#else
+#define cxx_isspace(c)  isspace(c)
+#endif
+
+      while (cxx_isspace(c) && i.get(c))
+        ;
+    }
+
+  return __gmpz_operator_in_nowhite (i, z, c);
+}
diff --git a/cxx/ismpznw.cc b/cxx/ismpznw.cc

new file mode 100644 (file)

index 0000000..387d092
--- /dev/null
+++ b/cxx/ismpznw.cc
@@ -0,0 +1,62 @@
+/* __gmpz_operator_in_nowhite -- C++-style input of mpz_t, no whitespace skip.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <cctype>
+#include <iostream>
+#include <string>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+// For g++ libstdc++ parsing see num_get<chartype,initer>::_M_extract_int in
+// include/bits/locale_facets.tcc.
+
+istream &
+__gmpz_operator_in_nowhite (istream &i, mpz_ptr z, char c)
+{
+  int base;
+  string s;
+  bool ok = false, zero, showbase;
+
+  if (c == '-' || c == '+') // sign
+    {
+      if (c == '-') // mpz_set_str doesn't accept '+'
+       s = "-";
+      i.get(c);
+    }
+
+  base = __gmp_istream_set_base(i, c, zero, showbase); // select the base
+  __gmp_istream_set_digits(s, i, c, ok, base);         // read the number
+
+  if (i.good()) // last character read was non-numeric
+    i.putback(c);
+  else if (i.eof() && (ok || zero)) // stopped just before eof
+    i.clear();
+
+  if (ok)
+    ASSERT_NOCARRY (mpz_set_str (z, s.c_str(), base)); // extract the number
+  else if (zero)
+    mpz_set_ui(z, 0);
+  else
+    i.setstate(ios::failbit); // read failed
+
+  return i;
+}
diff --git a/cxx/osdoprnti.cc b/cxx/osdoprnti.cc

new file mode 100644 (file)

index 0000000..e5dea4e
--- /dev/null
+++ b/cxx/osdoprnti.cc
@@ -0,0 +1,57 @@
+/* __gmp_doprnt_integer_ios -- integer formatted output to an ostream.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <cstdarg>    /* for va_list and hence doprnt_funs_t */
+#include <cstring>    /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+/* The gmp_asprintf support routines never give an error, so
+   __gmp_doprnt_integer shouldn't fail and it's return can just be checked
+   with an ASSERT.  */
+
+ostream&
+__gmp_doprnt_integer_ostream (ostream &o, struct doprnt_params_t *p,
+                              char *s)
+{
+  struct gmp_asprintf_t   d;
+  char  *result;
+  int   ret;
+
+  /* don't show leading zeros the way printf does */
+  p->prec = -1;
+
+  GMP_ASPRINTF_T_INIT (d, &result);
+  ret = __gmp_doprnt_integer (&__gmp_asprintf_funs_noformat, &d, p, s);
+  ASSERT (ret != -1);
+  __gmp_asprintf_final (&d);
+  (*__gmp_free_func) (s, strlen(s)+1);
+
+  gmp_allocated_string  t (result);
+  return o.write (t.str, t.len);
+}
diff --git a/cxx/osfuns.cc b/cxx/osfuns.cc

new file mode 100644 (file)

index 0000000..d4a2443
--- /dev/null
+++ b/cxx/osfuns.cc
@@ -0,0 +1,113 @@
+/* Support for operator<< routines.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+/* Don't need "format" for operator<< routines, just "memory" and "reps".
+   Omitting gmp_asprintf_format lets us avoid dragging vsnprintf into the
+   link.  __gmp_asprintf_final will be called directly and doesn't need to
+   be in the struct.  */
+
+const struct doprnt_funs_t  __gmp_asprintf_funs_noformat = {
+  NULL,
+  (doprnt_memory_t) __gmp_asprintf_memory,
+  (doprnt_reps_t)   __gmp_asprintf_reps,
+  NULL
+};
+
+
+void
+__gmp_doprnt_params_from_ios (struct doprnt_params_t *p, ios &o)
+{
+  if ((o.flags() & ios::basefield) == ios::hex)
+    {
+      p->expfmt = "@%c%02d";
+      p->base = (o.flags() & ios::uppercase ? -16 : 16);
+    }
+  else
+    {
+      p->expfmt = (o.flags() & ios::uppercase ? "E%c%02d" : "e%c%02d");
+      if ((o.flags() & ios::basefield) == ios::oct)
+        p->base = 8;
+      else
+        p->base = 10;
+    }
+
+  /* "general" if none or more than one bit set */
+  if ((o.flags() & ios::floatfield) == ios::fixed)
+    p->conv = DOPRNT_CONV_FIXED;
+  else if ((o.flags() & ios::floatfield) == ios::scientific)
+    p->conv = DOPRNT_CONV_SCIENTIFIC;
+  else
+    p->conv = DOPRNT_CONV_GENERAL;
+
+  p->exptimes4 = 0;
+
+  p->fill = o.fill();
+
+  /* "right" if more than one bit set */
+  if ((o.flags() & ios::adjustfield) == ios::left)
+    p->justify = DOPRNT_JUSTIFY_LEFT;
+  else if ((o.flags() & ios::adjustfield) == ios::internal)
+    p->justify = DOPRNT_JUSTIFY_INTERNAL;
+  else
+    p->justify = DOPRNT_JUSTIFY_RIGHT;
+
+  /* ios::fixed allows prec==0, others take 0 as the default 6.
+     Don't allow negatives (they do bad things to __gmp_doprnt_float_cxx).  */
+  p->prec = MAX (0, o.precision());
+  if (p->prec == 0 && p->conv != DOPRNT_CONV_FIXED)
+    p->prec = 6;
+
+  /* for hex showbase is always, for octal only non-zero */
+  if (o.flags() & ios::showbase)
+    p->showbase = ((o.flags() & ios::basefield) == ios::hex
+                   ? DOPRNT_SHOWBASE_YES : DOPRNT_SHOWBASE_NONZERO);
+  else
+    p->showbase = DOPRNT_SHOWBASE_NO;
+
+  p->showpoint = ((o.flags() & ios::showpoint) != 0);
+
+  /* in fixed and scientific always show trailing zeros, in general format
+     show them if showpoint is set (or so it seems) */
+  if ((o.flags() & ios::floatfield) == ios::fixed
+      || (o.flags() & ios::floatfield) == ios::scientific)
+    p->showtrailing = 1;
+  else
+    p->showtrailing = p->showpoint;
+
+  p->sign = (o.flags() & ios::showpos ? '+' : '\0');
+
+  p->width = o.width();
+
+  /* reset on each output */
+  o.width (0);
+}
diff --git a/cxx/osmpf.cc b/cxx/osmpf.cc

new file mode 100644 (file)

index 0000000..402a570
--- /dev/null
+++ b/cxx/osmpf.cc
@@ -0,0 +1,60 @@
+/* operator<< -- mpf formatted output to an ostream.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <clocale>
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+/* The gmp_asprintf support routines never give an error, so
+   __gmp_doprnt_mpf shouldn't fail and it's return can just be checked with
+   an ASSERT.  */
+
+ostream&
+operator<< (ostream &o, mpf_srcptr f)
+{
+  struct doprnt_params_t  param;
+  struct gmp_asprintf_t   d;
+  char  *result;
+  int   ret;
+
+  __gmp_doprnt_params_from_ios (&param, o);
+
+#if HAVE_STD__LOCALE
+  char  point[2];
+  point[0] = use_facet< numpunct<char> >(o.getloc()).decimal_point();
+  point[1] = '\0';
+#else
+  const char *point = localeconv()->decimal_point;
+#endif
+
+  GMP_ASPRINTF_T_INIT (d, &result);
+  ret = __gmp_doprnt_mpf (&__gmp_asprintf_funs_noformat, &d, &param, point, f);
+  ASSERT (ret != -1);
+  __gmp_asprintf_final (&d);
+
+  gmp_allocated_string  t (result);
+  return o.write (t.str, t.len);
+}
diff --git a/cxx/osmpq.cc b/cxx/osmpq.cc

new file mode 100644 (file)

index 0000000..92679f8
--- /dev/null
+++ b/cxx/osmpq.cc
@@ -0,0 +1,37 @@
+/* operator<< -- mpq formatted output to an ostream.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+ostream&
+operator<< (ostream &o, mpq_srcptr q)
+{
+  struct doprnt_params_t  param;
+  __gmp_doprnt_params_from_ios (&param, o);
+  return __gmp_doprnt_integer_ostream (o, &param,
+                                       mpq_get_str (NULL, param.base, q));
+}
diff --git a/cxx/osmpz.cc b/cxx/osmpz.cc

new file mode 100644 (file)

index 0000000..d7d0b5c
--- /dev/null
+++ b/cxx/osmpz.cc
@@ -0,0 +1,37 @@
+/* operator<< -- mpz formatted output to an ostream.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+using namespace std;
+
+
+ostream&
+operator<< (ostream &o, mpz_srcptr z)
+{
+  struct doprnt_params_t  param;
+  __gmp_doprnt_params_from_ios (&param, o);
+  return __gmp_doprnt_integer_ostream (o, &param,
+                                       mpz_get_str (NULL, param.base, z));
+}
diff --git a/demos/Makefile.am b/demos/Makefile.am

new file mode 100644 (file)

index 0000000..97cb3ef
--- /dev/null
+++ b/demos/Makefile.am
@@ -0,0 +1,39 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+SUBDIRS = calc expr
+EXTRA_DIST = perl
+
+INCLUDES = -I$(top_srcdir)
+LDADD = $(top_builddir)/libgmp.la
+
+qcn_LDADD = $(LDADD) $(LIBM)
+primes_LDADD = $(LDADD) $(LIBM)
+
+# None of these programs are built by default, but "make <whatever>" will
+# build them once libgmp.la is built.
+#
+EXTRA_PROGRAMS = factorize isprime pexpr primes qcn
+
+CLEANFILES = $(EXTRA_PROGRAMS)
+
+allprogs: $(EXTRA_PROGRAMS)
+       cd calc; $(MAKE) $(AM_MAKEFLAGS) allprogs
+       cd expr; $(MAKE) $(AM_MAKEFLAGS) allprogs
diff --git a/demos/Makefile.in b/demos/Makefile.in

new file mode 100644 (file)

index 0000000..9e7a690
--- /dev/null
+++ b/demos/Makefile.in
@@ -0,0 +1,718 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+EXTRA_PROGRAMS = factorize$(EXEEXT) isprime$(EXEEXT) pexpr$(EXEEXT) \
+       primes$(EXEEXT) qcn$(EXEEXT)
+subdir = demos
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+       $(srcdir)/pexpr-config-h.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES = pexpr-config.h
+CONFIG_CLEAN_VPATH_FILES =
+factorize_SOURCES = factorize.c
+factorize_OBJECTS = factorize$U.$(OBJEXT)
+factorize_LDADD = $(LDADD)
+factorize_DEPENDENCIES = $(top_builddir)/libgmp.la
+isprime_SOURCES = isprime.c
+isprime_OBJECTS = isprime$U.$(OBJEXT)
+isprime_LDADD = $(LDADD)
+isprime_DEPENDENCIES = $(top_builddir)/libgmp.la
+pexpr_SOURCES = pexpr.c
+pexpr_OBJECTS = pexpr$U.$(OBJEXT)
+pexpr_LDADD = $(LDADD)
+pexpr_DEPENDENCIES = $(top_builddir)/libgmp.la
+primes_SOURCES = primes.c
+primes_OBJECTS = primes$U.$(OBJEXT)
+am__DEPENDENCIES_1 =
+primes_DEPENDENCIES = $(LDADD) $(am__DEPENDENCIES_1)
+qcn_SOURCES = qcn.c
+qcn_OBJECTS = qcn$U.$(OBJEXT)
+qcn_DEPENDENCIES = $(LDADD) $(am__DEPENDENCIES_1)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = factorize.c isprime.c pexpr.c primes.c qcn.c
+DIST_SOURCES = factorize.c isprime.c pexpr.c primes.c qcn.c
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+       html-recursive info-recursive install-data-recursive \
+       install-dvi-recursive install-exec-recursive \
+       install-html-recursive install-info-recursive \
+       install-pdf-recursive install-ps-recursive install-recursive \
+       installcheck-recursive installdirs-recursive pdf-recursive \
+       ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive        \
+  distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+       $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+       distdir
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = calc expr
+EXTRA_DIST = perl
+INCLUDES = -I$(top_srcdir)
+LDADD = $(top_builddir)/libgmp.la
+qcn_LDADD = $(LDADD) $(LIBM)
+primes_LDADD = $(LDADD) $(LIBM)
+CLEANFILES = $(EXTRA_PROGRAMS)
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps demos/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps demos/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+pexpr-config.h: $(top_builddir)/config.status $(srcdir)/pexpr-config-h.in
+       cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+factorize$(EXEEXT): $(factorize_OBJECTS) $(factorize_DEPENDENCIES) 
+       @rm -f factorize$(EXEEXT)
+       $(LINK) $(factorize_OBJECTS) $(factorize_LDADD) $(LIBS)
+isprime$(EXEEXT): $(isprime_OBJECTS) $(isprime_DEPENDENCIES) 
+       @rm -f isprime$(EXEEXT)
+       $(LINK) $(isprime_OBJECTS) $(isprime_LDADD) $(LIBS)
+pexpr$(EXEEXT): $(pexpr_OBJECTS) $(pexpr_DEPENDENCIES) 
+       @rm -f pexpr$(EXEEXT)
+       $(LINK) $(pexpr_OBJECTS) $(pexpr_LDADD) $(LIBS)
+primes$(EXEEXT): $(primes_OBJECTS) $(primes_DEPENDENCIES) 
+       @rm -f primes$(EXEEXT)
+       $(LINK) $(primes_OBJECTS) $(primes_LDADD) $(LIBS)
+qcn$(EXEEXT): $(qcn_OBJECTS) $(qcn_DEPENDENCIES) 
+       @rm -f qcn$(EXEEXT)
+       $(LINK) $(qcn_OBJECTS) $(qcn_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+factorize_.c: factorize.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/factorize.c; then echo $(srcdir)/factorize.c; else echo factorize.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+isprime_.c: isprime.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/isprime.c; then echo $(srcdir)/isprime.c; else echo isprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pexpr_.c: pexpr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pexpr.c; then echo $(srcdir)/pexpr.c; else echo pexpr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+primes_.c: primes.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/primes.c; then echo $(srcdir)/primes.c; else echo primes.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+qcn_.c: qcn.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/qcn.c; then echo $(srcdir)/qcn.c; else echo qcn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+factorize_.$(OBJEXT) factorize_.lo isprime_.$(OBJEXT) isprime_.lo \
+pexpr_.$(OBJEXT) pexpr_.lo primes_.$(OBJEXT) primes_.lo qcn_.$(OBJEXT) \
+qcn_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+       @fail= failcom='exit 1'; \
+       for f in x $$MAKEFLAGS; do \
+         case $$f in \
+           *=* | --[!k]*);; \
+           *k*) failcom='fail=yes';; \
+         esac; \
+       done; \
+       dot_seen=no; \
+       target=`echo $@ | sed s/-recursive//`; \
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         echo "Making $$target in $$subdir"; \
+         if test "$$subdir" = "."; then \
+           dot_seen=yes; \
+           local_target="$$target-am"; \
+         else \
+           local_target="$$target"; \
+         fi; \
+         ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+         || eval $$failcom; \
+       done; \
+       if test "$$dot_seen" = "no"; then \
+         $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+       fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+       @fail= failcom='exit 1'; \
+       for f in x $$MAKEFLAGS; do \
+         case $$f in \
+           *=* | --[!k]*);; \
+           *k*) failcom='fail=yes';; \
+         esac; \
+       done; \
+       dot_seen=no; \
+       case "$@" in \
+         distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+         *) list='$(SUBDIRS)' ;; \
+       esac; \
+       rev=''; for subdir in $$list; do \
+         if test "$$subdir" = "."; then :; else \
+           rev="$$subdir $$rev"; \
+         fi; \
+       done; \
+       rev="$$rev ."; \
+       target=`echo $@ | sed s/-recursive//`; \
+       for subdir in $$rev; do \
+         echo "Making $$target in $$subdir"; \
+         if test "$$subdir" = "."; then \
+           local_target="$$target-am"; \
+         else \
+           local_target="$$target"; \
+         fi; \
+         ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+         || eval $$failcom; \
+       done && test -z "$$fail"
+tags-recursive:
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+       done
+ctags-recursive:
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+       done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+         include_option=--etags-include; \
+         empty_fix=.; \
+       else \
+         include_option=--include; \
+         empty_fix=; \
+       fi; \
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           test ! -f $$subdir/TAGS || \
+             set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+         fi; \
+       done; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           test -d "$(distdir)/$$subdir" \
+           || $(MKDIR_P) "$(distdir)/$$subdir" \
+           || exit 1; \
+         fi; \
+       done
+       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+           $(am__relativize); \
+           new_distdir=$$reldir; \
+           dir1=$$subdir; dir2="$(top_distdir)"; \
+           $(am__relativize); \
+           new_top_distdir=$$reldir; \
+           echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+           echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+           ($(am__cd) $$subdir && \
+             $(MAKE) $(AM_MAKEFLAGS) \
+               top_distdir="$$new_top_distdir" \
+               distdir="$$new_distdir" \
+               am__remove_distdir=: \
+               am__skip_length_check=: \
+               am__skip_mode_fix=: \
+               distdir) \
+             || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
+       $(top_builddir)/ansi2knr ctags-recursive install-am \
+       install-strip tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+       all all-am check check-am clean clean-generic clean-libtool \
+       ctags ctags-recursive distclean distclean-compile \
+       distclean-generic distclean-libtool distclean-tags distdir dvi \
+       dvi-am html html-am info info-am install install-am \
+       install-data install-data-am install-dvi install-dvi-am \
+       install-exec install-exec-am install-html install-html-am \
+       install-info install-info-am install-man install-pdf \
+       install-pdf-am install-ps install-ps-am install-strip \
+       installcheck installcheck-am installdirs installdirs-am \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
+       uninstall uninstall-am
+
+
+allprogs: $(EXTRA_PROGRAMS)
+       cd calc; $(MAKE) $(AM_MAKEFLAGS) allprogs
+       cd expr; $(MAKE) $(AM_MAKEFLAGS) allprogs
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/demos/calc/Makefile.am b/demos/calc/Makefile.am

new file mode 100644 (file)

index 0000000..da64907
--- /dev/null
+++ b/demos/calc/Makefile.am
@@ -0,0 +1,36 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir)
+
+# $(LEXLIB) is not actually needed for flex (which means the distributed
+# calclex.c), but it's included here for the benefit of anyone rebuilding
+# with some other lex.
+#
+LDADD = $(top_builddir)/libgmp.la $(LIBREADLINE) $(LIBCURSES) $(LEXLIB)
+
+EXTRA_PROGRAMS = calc
+AM_YFLAGS = -d
+calc_SOURCES = calc.y calclex.l calcread.c calc-common.h
+BUILT_SOURCES = calc.h
+
+CLEANFILES = $(EXTRA_PROGRAMS)
+
+allprogs: $(EXTRA_PROGRAMS)
diff --git a/demos/calc/Makefile.in b/demos/calc/Makefile.in

new file mode 100644 (file)

index 0000000..9053e0f
--- /dev/null
+++ b/demos/calc/Makefile.in
@@ -0,0 +1,564 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+EXTRA_PROGRAMS = calc$(EXEEXT)
+subdir = demos/calc
+DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+       $(srcdir)/calc-config-h.in calc.c calc.h calclex.c
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES = calc-config.h
+CONFIG_CLEAN_VPATH_FILES =
+am_calc_OBJECTS = calc$U.$(OBJEXT) calclex$U.$(OBJEXT) \
+       calcread$U.$(OBJEXT)
+calc_OBJECTS = $(am_calc_OBJECTS)
+calc_LDADD = $(LDADD)
+am__DEPENDENCIES_1 =
+calc_DEPENDENCIES = $(top_builddir)/libgmp.la $(am__DEPENDENCIES_1) \
+       $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+@MAINTAINER_MODE_FALSE@am__skiplex = test -f $@ ||
+LEXCOMPILE = $(LEX) $(LFLAGS) $(AM_LFLAGS)
+LTLEXCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(LEX) $(LFLAGS) $(AM_LFLAGS)
+YLWRAP = $(top_srcdir)/ylwrap
+@MAINTAINER_MODE_FALSE@am__skipyacc = test -f $@ ||
+YACCCOMPILE = $(YACC) $(YFLAGS) $(AM_YFLAGS)
+LTYACCCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(YACC) $(YFLAGS) $(AM_YFLAGS)
+SOURCES = $(calc_SOURCES)
+DIST_SOURCES = $(calc_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir)
+
+# $(LEXLIB) is not actually needed for flex (which means the distributed
+# calclex.c), but it's included here for the benefit of anyone rebuilding
+# with some other lex.
+#
+LDADD = $(top_builddir)/libgmp.la $(LIBREADLINE) $(LIBCURSES) $(LEXLIB)
+AM_YFLAGS = -d
+calc_SOURCES = calc.y calclex.l calcread.c calc-common.h
+BUILT_SOURCES = calc.h
+CLEANFILES = $(EXTRA_PROGRAMS)
+all: $(BUILT_SOURCES)
+       $(MAKE) $(AM_MAKEFLAGS) all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .l .lo .o .obj .y
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps demos/calc/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps demos/calc/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+calc-config.h: $(top_builddir)/config.status $(srcdir)/calc-config-h.in
+       cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+calc.h: calc.c
+       @if test ! -f $@; then \
+         rm -f calc.c; \
+         $(MAKE) $(AM_MAKEFLAGS) calc.c; \
+       else :; fi
+calc$(EXEEXT): $(calc_OBJECTS) $(calc_DEPENDENCIES) 
+       @rm -f calc$(EXEEXT)
+       $(LINK) $(calc_OBJECTS) $(calc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+calc_.c: calc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calc.c; then echo $(srcdir)/calc.c; else echo calc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+calclex_.c: calclex.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calclex.c; then echo $(srcdir)/calclex.c; else echo calclex.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+calcread_.c: calcread.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calcread.c; then echo $(srcdir)/calcread.c; else echo calcread.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+calc_.$(OBJEXT) calc_.lo calclex_.$(OBJEXT) calclex_.lo \
+calcread_.$(OBJEXT) calcread_.lo : $(ANSI2KNR)
+
+.l.c:
+       $(am__skiplex) $(SHELL) $(YLWRAP) $< $(LEX_OUTPUT_ROOT).c $@ -- $(LEXCOMPILE)
+
+.y.c:
+       $(am__skipyacc) $(SHELL) $(YLWRAP) $< y.tab.c $@ y.tab.h $*.h y.output $*.output -- $(YACCCOMPILE)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: $(BUILT_SOURCES)
+       $(MAKE) $(AM_MAKEFLAGS) check-am
+all-am: Makefile
+installdirs:
+install: $(BUILT_SOURCES)
+       $(MAKE) $(AM_MAKEFLAGS) install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+       -rm -f calc.c
+       -rm -f calc.h
+       -rm -f calclex.c
+       -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr all check install install-am \
+       install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool ctags distclean distclean-compile \
+       distclean-generic distclean-libtool distclean-tags distdir dvi \
+       dvi-am html html-am info info-am install install-am \
+       install-data install-data-am install-dvi install-dvi-am \
+       install-exec install-exec-am install-html install-html-am \
+       install-info install-info-am install-man install-pdf \
+       install-pdf-am install-ps install-ps-am install-strip \
+       installcheck installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+allprogs: $(EXTRA_PROGRAMS)
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/demos/calc/README b/demos/calc/README

new file mode 100644 (file)

index 0000000..f2f6130
--- /dev/null
+++ b/demos/calc/README
@@ -0,0 +1,65 @@
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+                   DEMONSTRATION CALCULATOR PROGRAM
+
+
+This is a simple program, meant only to show one way to use GMP with yacc
+and lex to make a calculator.  Usage and comments on the implementation can
+be found in calc.y.
+
+Within a GMP build tree, the generated Makefile can be used to build the
+program,
+
+       make calc
+
+(or on a DOS system, "make calc.exe").
+
+Elsewhere, once GMP has been installed, the program can be compiled with for
+instance
+
+       gcc calc.c calclex.c -lgmp -o calc
+
+Or if GNU readline is used then
+
+       gcc calc.c calclex.c calcread.c -lgmp -lreadline -o calc
+
+(again, on a DOS system "-o calc.exe").
+
+Readline support can be enabled or disabled in calc-config.h.  That file is
+created by the GMP ./configure based on the --with-readline option.  The
+default is --with-readline=detect, which means to use readline if available.
+"yes" can be used to force it to be used, or "no" to not use it.
+
+The supplied calc.c was generated by GNU bison, but a standard yacc should
+work too.
+
+The supplied calclex.c was generated by GNU flex, but a standard lex should
+work too.  The readline support may or may not work with a standard lex (see
+comments with input() in calcread.c).  Note also that a standard lex will
+require its library "-ll" on the compile command line.  "./configure" sets
+this up in the GMP build tree Makefile.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/demos/calc/calc-common.h b/demos/calc/calc-common.h

new file mode 100644 (file)

index 0000000..c9ca8a6
--- /dev/null
+++ b/demos/calc/calc-common.h
@@ -0,0 +1,35 @@
+/* Prototypes etc for calc program.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stddef.h>  /* for size_t */
+#ifndef NO_CALC_H
+#include "calc.h"
+#endif
+#include "calc-config.h"
+
+struct calc_keywords_t {
+  char  *name;
+  int   value;
+};
+
+extern int  calc_option_readline;
+extern int  calc_more_input;
+extern const struct calc_keywords_t  calc_keywords[];
+
+int calc_input (char *buf, size_t max_size);
+void calc_init_readline (void);
diff --git a/demos/calc/calc-config-h.in b/demos/calc/calc-config-h.in

new file mode 100644 (file)

index 0000000..6ecf43e
--- /dev/null
+++ b/demos/calc/calc-config-h.in
@@ -0,0 +1,21 @@
+/* Templates for calc program configuration.   -*- mode:c -*-
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Define if GNU readline should be used. */
+#define WITH_READLINE @WITH_READLINE_01@
diff --git a/demos/calc/calc.c b/demos/calc/calc.c

new file mode 100644 (file)

index 0000000..40ed02e
--- /dev/null
+++ b/demos/calc/calc.c
@@ -0,0 +1,2308 @@
+/* A Bison parser, made by GNU Bison 2.5.  */
+
+/* Bison implementation for Yacc-like parsers in C
+   
+      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+   
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+   
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+   simplifying the original so-called "semantic" parser.  */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+   infringing on user name space.  This should be done even for local
+   variables, as they might otherwise be expanded by user macros.
+   There are some unavoidable exceptions within include files to
+   define necessary library symbols; they are noted "INFRINGES ON
+   USER NAME SPACE" below.  */
+
+/* Identify Bison output.  */
+#define YYBISON 1
+
+/* Bison version.  */
+#define YYBISON_VERSION "2.5"
+
+/* Skeleton name.  */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers.  */
+#define YYPURE 0
+
+/* Push parsers.  */
+#define YYPUSH 0
+
+/* Pull parsers.  */
+#define YYPULL 1
+
+/* Using locations.  */
+#define YYLSP_NEEDED 0
+
+
+
+/* Copy the first part of user declarations.  */
+
+/* Line 268 of yacc.c  */
+#line 1 "calc.y"
+
+/* A simple integer desk calculator using yacc and gmp.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* This is a simple program, meant only to show one way to use GMP for this
+   sort of thing.  There's few features, and error checking is minimal.
+   Standard input is read, calc_help() below shows the inputs accepted.
+
+   Expressions are evaluated as they're read.  If user defined functions
+   were wanted it'd be necessary to build a parse tree like pexpr.c does, or
+   a list of operations for a stack based evaluator.  That would also make
+   it possible to detect and optimize evaluations "mod m" like pexpr.c does.
+
+   A stack is used for intermediate values in the expression evaluation,
+   separate from the yacc parser stack.  This is simple, makes error
+   recovery easy, minimizes the junk around mpz calls in the rules, and
+   saves initializing or clearing "mpz_t"s during a calculation.  A
+   disadvantage though is that variables must be copied to the stack to be
+   worked on.  A more sophisticated calculator or language system might be
+   able to avoid that when executing a compiled or semi-compiled form.
+
+   Avoiding repeated initializing and clearing of "mpz_t"s is important.  In
+   this program the time spent parsing is obviously much greater than any
+   possible saving from this, but a proper calculator or language should
+   take some trouble over it.  Don't be surprised if an init/clear takes 3
+   or more times as long as a 10 limb addition, depending on the system (see
+   the mpz_init_realloc_clear example in tune/README).  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#define NO_CALC_H /* because it conflicts with normal calc.c stuff */
+#include "calc-common.h"
+
+
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+
+
+void
+calc_help (void)
+{
+  printf ("Examples:\n");
+  printf ("    2+3*4        expressions are evaluated\n");
+  printf ("    x=5^6        variables a to z can be set and used\n");
+  printf ("Operators:\n");
+  printf ("    + - *        arithmetic\n");
+  printf ("    / %%          division and remainder (rounding towards negative infinity)\n");
+  printf ("    ^            exponentiation\n");
+  printf ("    !            factorial\n");
+  printf ("    << >>        left and right shifts\n");
+  printf ("    <= >= >      \\ comparisons, giving 1 if true, 0 if false\n");
+  printf ("    == != <      /\n");
+  printf ("    && ||        logical and/or, giving 1 if true, 0 if false\n");
+  printf ("Functions:\n");
+  printf ("    abs(n)       absolute value\n");
+  printf ("    bin(n,m)     binomial coefficient\n");
+  printf ("    fib(n)       fibonacci number\n");
+  printf ("    gcd(a,b,..)  greatest common divisor\n");
+  printf ("    kron(a,b)    kronecker symbol\n");
+  printf ("    lcm(a,b,..)  least common multiple\n");
+  printf ("    lucnum(n)    lucas number\n");
+  printf ("    nextprime(n) next prime after n\n");
+  printf ("    powm(b,e,m)  modulo powering, b^e%%m\n");
+  printf ("    root(n,r)    r-th root\n");
+  printf ("    sqrt(n)      square root\n");
+  printf ("Other:\n");
+  printf ("    hex          \\ set hex or decimal for input and output\n");
+  printf ("    decimal      /   (\"0x\" can be used for hex too)\n");
+  printf ("    quit         exit program (EOF works too)\n");
+  printf ("    ;            statements are separated with a ; or newline\n");
+  printf ("    \\            continue expressions with \\ before newline\n");
+  printf ("    # xxx        comments are # though to newline\n");
+  printf ("Hex numbers must be entered in upper case, to distinguish them from the\n");
+  printf ("variables a to f (like in bc).\n");
+}
+
+
+int  ibase = 0;
+int  obase = 10;
+
+
+/* The stack is a fixed size, which means there's a limit on the nesting
+   allowed in expressions.  A more sophisticated program could let it grow
+   dynamically.  */
+
+mpz_t    stack[100];
+mpz_ptr  sp = stack[0];
+
+#define CHECK_OVERFLOW()                                                  \
+  if (sp >= stack[numberof(stack)])    /* FIXME */                     \
+    {                                                                     \
+      fprintf (stderr,                                                    \
+               "Value stack overflow, too much nesting in expression\n"); \
+      YYERROR;                                                            \
+    }
+
+#define CHECK_EMPTY()                                                   \
+  if (sp != stack[0])                                                   \
+    {                                                                   \
+      fprintf (stderr, "Oops, expected the value stack to be empty\n"); \
+      sp = stack[0];                                                    \
+    }
+
+
+mpz_t  variable[26];
+
+#define CHECK_VARIABLE(var)                                             \
+  if ((var) < 0 || (var) >= numberof (variable))                        \
+    {                                                                   \
+      fprintf (stderr, "Oops, bad variable somehow: %d\n", var);        \
+      YYERROR;                                                          \
+    }
+
+
+#define CHECK_UI(name,z)                        \
+  if (! mpz_fits_ulong_p (z))                   \
+    {                                           \
+      fprintf (stderr, "%s too big\n", name);   \
+      YYERROR;                                  \
+    }
+
+
+
+/* Line 268 of yacc.c  */
+#line 213 "calc.c"
+
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+
+/* Enabling verbose error messages.  */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE 0
+#endif
+
+/* Enabling the token table.  */
+#ifndef YYTOKEN_TABLE
+# define YYTOKEN_TABLE 0
+#endif
+
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     EOS = 258,
+     BAD = 259,
+     HELP = 260,
+     HEX = 261,
+     DECIMAL = 262,
+     QUIT = 263,
+     ABS = 264,
+     BIN = 265,
+     FIB = 266,
+     GCD = 267,
+     KRON = 268,
+     LCM = 269,
+     LUCNUM = 270,
+     NEXTPRIME = 271,
+     POWM = 272,
+     ROOT = 273,
+     SQRT = 274,
+     NUMBER = 275,
+     VARIABLE = 276,
+     LOR = 277,
+     LAND = 278,
+     GE = 279,
+     LE = 280,
+     NE = 281,
+     EQ = 282,
+     RSHIFT = 283,
+     LSHIFT = 284,
+     UMINUS = 285
+   };
+#endif
+/* Tokens.  */
+#define EOS 258
+#define BAD 259
+#define HELP 260
+#define HEX 261
+#define DECIMAL 262
+#define QUIT 263
+#define ABS 264
+#define BIN 265
+#define FIB 266
+#define GCD 267
+#define KRON 268
+#define LCM 269
+#define LUCNUM 270
+#define NEXTPRIME 271
+#define POWM 272
+#define ROOT 273
+#define SQRT 274
+#define NUMBER 275
+#define VARIABLE 276
+#define LOR 277
+#define LAND 278
+#define GE 279
+#define LE 280
+#define NE 281
+#define EQ 282
+#define RSHIFT 283
+#define LSHIFT 284
+#define UMINUS 285
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 293 of yacc.c  */
+#line 142 "calc.y"
+
+  char  *str;
+  int   var;
+
+
+
+/* Line 293 of yacc.c  */
+#line 316 "calc.c"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+/* Copy the second part of user declarations.  */
+
+
+/* Line 343 of yacc.c  */
+#line 328 "calc.c"
+
+#ifdef short
+# undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+#  define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+#  define YYSIZE_T size_t
+# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+#  include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYSIZE_T size_t
+# else
+#  define YYSIZE_T unsigned int
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+#  if ENABLE_NLS
+#   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+#   define YY_(msgid) dgettext ("bison-runtime", msgid)
+#  endif
+# endif
+# ifndef YY_
+#  define YY_(msgid) msgid
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E.  */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(e) ((void) (e))
+#else
+# define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions.  */
+#ifndef lint
+# define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static int
+YYID (int yyi)
+#else
+static int
+YYID (yyi)
+    int yyi;
+#endif
+{
+  return yyi;
+}
+#endif
+
+#if ! defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols.  */
+
+# ifdef YYSTACK_USE_ALLOCA
+#  if YYSTACK_USE_ALLOCA
+#   ifdef __GNUC__
+#    define YYSTACK_ALLOC __builtin_alloca
+#   elif defined __BUILTIN_VA_ARG_INCR
+#    include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+#   elif defined _AIX
+#    define YYSTACK_ALLOC __alloca
+#   elif defined _MSC_VER
+#    include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+#    define alloca _alloca
+#   else
+#    define YYSTACK_ALLOC alloca
+#    if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+#     include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#     ifndef EXIT_SUCCESS
+#      define EXIT_SUCCESS 0
+#     endif
+#    endif
+#   endif
+#  endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+   /* Pacify GCC's `empty if-body' warning.  */
+#  define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+    /* The OS might guarantee only one guard page at the bottom of the stack,
+       and a page size can be as small as 4096 bytes.  So we cannot safely
+       invoke alloca (N) if N exceeds 4096.  Use a slightly smaller number
+       to allow for a few compiler-allocated temporary stack slots.  */
+#   define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+#  endif
+# else
+#  define YYSTACK_ALLOC YYMALLOC
+#  define YYSTACK_FREE YYFREE
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+#   define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+#  endif
+#  if (defined __cplusplus && ! defined EXIT_SUCCESS \
+       && ! ((defined YYMALLOC || defined malloc) \
+            && (defined YYFREE || defined free)))
+#   include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#   ifndef EXIT_SUCCESS
+#    define EXIT_SUCCESS 0
+#   endif
+#  endif
+#  ifndef YYMALLOC
+#   define YYMALLOC malloc
+#   if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+#  ifndef YYFREE
+#   define YYFREE free
+#   if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+# endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+
+#if (! defined yyoverflow \
+     && (! defined __cplusplus \
+        || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member.  */
+union yyalloc
+{
+  yytype_int16 yyss_alloc;
+  YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next.  */
+# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+   N elements.  */
+# define YYSTACK_BYTES(N) \
+     ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+      + YYSTACK_GAP_MAXIMUM)
+
+# define YYCOPY_NEEDED 1
+
+/* Relocate STACK from its old location to the new one.  The
+   local variables YYSIZE and YYSTACKSIZE give the old and new number of
+   elements in the stack, and YYPTR gives the new location of the
+   stack.  Advance YYPTR to a properly aligned location for the next
+   stack.  */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack)                          \
+    do                                                                 \
+      {                                                                        \
+       YYSIZE_T yynewbytes;                                            \
+       YYCOPY (&yyptr->Stack_alloc, Stack, yysize);                    \
+       Stack = &yyptr->Stack_alloc;                                    \
+       yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+       yyptr += yynewbytes / sizeof (*yyptr);                          \
+      }                                                                        \
+    while (YYID (0))
+
+#endif
+
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from FROM to TO.  The source and destination do
+   not overlap.  */
+# ifndef YYCOPY
+#  if defined __GNUC__ && 1 < __GNUC__
+#   define YYCOPY(To, From, Count) \
+      __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+#  else
+#   define YYCOPY(To, From, Count)             \
+      do                                       \
+       {                                       \
+         YYSIZE_T yyi;                         \
+         for (yyi = 0; yyi < (Count); yyi++)   \
+           (To)[yyi] = (From)[yyi];            \
+       }                                       \
+      while (YYID (0))
+#  endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
+/* YYFINAL -- State number of the termination state.  */
+#define YYFINAL  41
+/* YYLAST -- Last index in YYTABLE.  */
+#define YYLAST   552
+
+/* YYNTOKENS -- Number of terminals.  */
+#define YYNTOKENS  44
+/* YYNNTS -- Number of nonterminals.  */
+#define YYNNTS  7
+/* YYNRULES -- Number of rules.  */
+#define YYNRULES  49
+/* YYNRULES -- Number of states.  */
+#define YYNSTATES  118
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
+#define YYUNDEFTOK  2
+#define YYMAXUTOK   285
+
+#define YYTRANSLATE(YYX)                                               \
+  ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX.  */
+static const yytype_uint8 yytranslate[] =
+{
+       0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,    39,     2,     2,     2,    36,     2,     2,
+      41,    42,    34,    32,    43,    33,     2,    35,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+      24,    40,    25,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,    38,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
+       5,     6,     7,     8,     9,    10,    11,    12,    13,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    26,
+      27,    28,    29,    30,    31,    37
+};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+   YYRHS.  */
+static const yytype_uint8 yyprhs[] =
+{
+       0,     0,     3,     5,     8,    11,    15,    18,    19,    21,
+      25,    27,    29,    31,    33,    37,    41,    45,    49,    53,
+      57,    61,    65,    69,    72,    75,    79,    83,    87,    91,
+      95,    99,   103,   107,   112,   119,   124,   129,   136,   141,
+     146,   151,   160,   167,   172,   174,   176,   178,   182,   184
+};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS.  */
+static const yytype_int8 yyrhs[] =
+{
+      45,     0,    -1,    47,    -1,    46,    47,    -1,    47,     3,
+      -1,    46,    47,     3,    -1,     1,     3,    -1,    -1,    48,
+      -1,    21,    40,    48,    -1,     5,    -1,     6,    -1,     7,
+      -1,     8,    -1,    41,    48,    42,    -1,    48,    32,    48,
+      -1,    48,    33,    48,    -1,    48,    34,    48,    -1,    48,
+      35,    48,    -1,    48,    36,    48,    -1,    48,    38,    48,
+      -1,    48,    31,    48,    -1,    48,    30,    48,    -1,    48,
+      39,    -1,    33,    48,    -1,    48,    24,    48,    -1,    48,
+      27,    48,    -1,    48,    29,    48,    -1,    48,    28,    48,
+      -1,    48,    26,    48,    -1,    48,    25,    48,    -1,    48,
+      23,    48,    -1,    48,    22,    48,    -1,     9,    41,    48,
+      42,    -1,    10,    41,    48,    43,    48,    42,    -1,    11,
+      41,    48,    42,    -1,    12,    41,    49,    42,    -1,    13,
+      41,    48,    43,    48,    42,    -1,    14,    41,    50,    42,
+      -1,    15,    41,    48,    42,    -1,    16,    41,    48,    42,
+      -1,    17,    41,    48,    43,    48,    43,    48,    42,    -1,
+      18,    41,    48,    43,    48,    42,    -1,    19,    41,    48,
+      42,    -1,    21,    -1,    20,    -1,    48,    -1,    49,    43,
+      48,    -1,    48,    -1,    50,    43,    48,    -1
+};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
+static const yytype_uint16 yyrline[] =
+{
+       0,   167,   167,   168,   171,   172,   173,   175,   177,   182,
+     188,   189,   190,   191,   197,   198,   199,   200,   201,   202,
+     203,   205,   207,   209,   211,   213,   214,   215,   216,   217,
+     218,   220,   221,   223,   224,   226,   228,   229,   231,   232,
+     234,   235,   236,   238,   240,   246,   257,   258,   261,   262
+};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+   First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
+static const char *const yytname[] =
+{
+  "$end", "error", "$undefined", "EOS", "BAD", "HELP", "HEX", "DECIMAL",
+  "QUIT", "ABS", "BIN", "FIB", "GCD", "KRON", "LCM", "LUCNUM", "NEXTPRIME",
+  "POWM", "ROOT", "SQRT", "NUMBER", "VARIABLE", "LOR", "LAND", "'<'",
+  "'>'", "GE", "LE", "NE", "EQ", "RSHIFT", "LSHIFT", "'+'", "'-'", "'*'",
+  "'/'", "'%'", "UMINUS", "'^'", "'!'", "'='", "'('", "')'", "','",
+  "$accept", "top", "statements", "statement", "e", "gcdlist", "lcmlist", 0
+};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+   token YYLEX-NUM.  */
+static const yytype_uint16 yytoknum[] =
+{
+       0,   256,   257,   258,   259,   260,   261,   262,   263,   264,
+     265,   266,   267,   268,   269,   270,   271,   272,   273,   274,
+     275,   276,   277,   278,    60,    62,   279,   280,   281,   282,
+     283,   284,    43,    45,    42,    47,    37,   285,    94,    33,
+      61,    40,    41,    44
+};
+# endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
+static const yytype_uint8 yyr1[] =
+{
+       0,    44,    45,    45,    46,    46,    46,    47,    47,    47,
+      47,    47,    47,    47,    48,    48,    48,    48,    48,    48,
+      48,    48,    48,    48,    48,    48,    48,    48,    48,    48,
+      48,    48,    48,    48,    48,    48,    48,    48,    48,    48,
+      48,    48,    48,    48,    48,    48,    49,    49,    50,    50
+};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
+static const yytype_uint8 yyr2[] =
+{
+       0,     2,     1,     2,     2,     3,     2,     0,     1,     3,
+       1,     1,     1,     1,     3,     3,     3,     3,     3,     3,
+       3,     3,     3,     2,     2,     3,     3,     3,     3,     3,
+       3,     3,     3,     4,     6,     4,     4,     6,     4,     4,
+       4,     8,     6,     4,     1,     1,     1,     3,     1,     3
+};
+
+/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
+   Performed when YYTABLE doesn't specify something else to do.  Zero
+   means the default is an error.  */
+static const yytype_uint8 yydefact[] =
+{
+       0,     0,    10,    11,    12,    13,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,    45,    44,     0,
+       0,     0,     7,     2,     8,     6,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,     0,    44,    24,
+       0,     1,     3,     4,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
+      23,     0,     0,     0,    46,     0,     0,    48,     0,     0,
+       0,     0,     0,     0,     9,    14,     5,    32,    31,    25,
+      30,    29,    26,    28,    27,    22,    21,    15,    16,    17,
+      18,    19,    20,    33,     0,    35,    36,     0,     0,    38,
+       0,    39,    40,     0,     0,    43,     0,    47,     0,    49,
+       0,     0,    34,    37,     0,    42,     0,    41
+};
+
+/* YYDEFGOTO[NTERM-NUM].  */
+static const yytype_int8 yydefgoto[] =
+{
+      -1,    21,    22,    23,    24,    65,    68
+};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+   STATE-NUM.  */
+#define YYPACT_NINF -39
+static const yytype_int16 yypact[] =
+{
+      41,     3,   -39,   -39,   -39,   -39,     2,     4,    27,    32,
+      35,    36,    39,    42,    45,    46,    47,   -39,   -18,   124,
+     124,    89,    91,    87,   464,   -39,   124,   124,   124,   124,
+     124,   124,   124,   124,   124,   124,   124,   124,   -39,   -36,
+     254,   -39,    88,   -39,   124,   124,   124,   124,   124,   124,
+     124,   124,   124,   124,   124,   124,   124,   124,   124,   124,
+     -39,   275,   144,   296,   464,   -38,   166,   464,    29,   317,
+     338,   188,   210,   359,   464,   -39,   -39,   481,   497,   513,
+     513,   513,   513,   513,   513,    31,    31,   -15,   -15,   -36,
+     -36,   -36,   -36,   -39,   124,   -39,   -39,   124,   124,   -39,
+     124,   -39,   -39,   124,   124,   -39,   380,   464,   401,   464,
+     232,   422,   -39,   -39,   124,   -39,   443,   -39
+};
+
+/* YYPGOTO[NTERM-NUM].  */
+static const yytype_int8 yypgoto[] =
+{
+     -39,   -39,   -39,    70,   -19,   -39,   -39
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
+   positive, shift that token.  If negative, reduce the rule which
+   number is the opposite.  If YYTABLE_NINF, syntax error.  */
+#define YYTABLE_NINF -8
+static const yytype_int8 yytable[] =
+{
+      39,    40,    59,    60,    96,    97,    25,    61,    62,    63,
+      64,    66,    67,    69,    70,    71,    72,    73,    74,    56,
+      57,    58,    37,    59,    60,    77,    78,    79,    80,    81,
+      82,    83,    84,    85,    86,    87,    88,    89,    90,    91,
+      92,    -7,     1,    26,    -7,    27,     2,     3,     4,     5,
+       6,     7,     8,     9,    10,    11,    12,    13,    14,    15,
+      16,    17,    18,    54,    55,    56,    57,    58,    28,    59,
+      60,    99,   100,    29,    19,   106,    30,    31,   107,   108,
+      32,   109,    20,    33,   110,   111,    34,    35,    36,    41,
+      43,    76,    42,     0,     0,   116,     2,     3,     4,     5,
+       6,     7,     8,     9,    10,    11,    12,    13,    14,    15,
+      16,    17,    18,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,    19,     0,     0,     0,     0,     0,
+       0,     0,    20,     6,     7,     8,     9,    10,    11,    12,
+      13,    14,    15,    16,    17,    38,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,    19,     0,     0,
+       0,     0,     0,     0,     0,    20,    44,    45,    46,    47,
+      48,    49,    50,    51,    52,    53,    54,    55,    56,    57,
+      58,     0,    59,    60,     0,     0,     0,    94,    44,    45,
+      46,    47,    48,    49,    50,    51,    52,    53,    54,    55,
+      56,    57,    58,     0,    59,    60,     0,     0,     0,    98,
+      44,    45,    46,    47,    48,    49,    50,    51,    52,    53,
+      54,    55,    56,    57,    58,     0,    59,    60,     0,     0,
+       0,   103,    44,    45,    46,    47,    48,    49,    50,    51,
+      52,    53,    54,    55,    56,    57,    58,     0,    59,    60,
+       0,     0,     0,   104,    44,    45,    46,    47,    48,    49,
+      50,    51,    52,    53,    54,    55,    56,    57,    58,     0,
+      59,    60,     0,     0,     0,   114,    44,    45,    46,    47,
+      48,    49,    50,    51,    52,    53,    54,    55,    56,    57,
+      58,     0,    59,    60,     0,     0,    75,    44,    45,    46,
+      47,    48,    49,    50,    51,    52,    53,    54,    55,    56,
+      57,    58,     0,    59,    60,     0,     0,    93,    44,    45,
+      46,    47,    48,    49,    50,    51,    52,    53,    54,    55,
+      56,    57,    58,     0,    59,    60,     0,     0,    95,    44,
+      45,    46,    47,    48,    49,    50,    51,    52,    53,    54,
+      55,    56,    57,    58,     0,    59,    60,     0,     0,   101,
+      44,    45,    46,    47,    48,    49,    50,    51,    52,    53,
+      54,    55,    56,    57,    58,     0,    59,    60,     0,     0,
+     102,    44,    45,    46,    47,    48,    49,    50,    51,    52,
+      53,    54,    55,    56,    57,    58,     0,    59,    60,     0,
+       0,   105,    44,    45,    46,    47,    48,    49,    50,    51,
+      52,    53,    54,    55,    56,    57,    58,     0,    59,    60,
+       0,     0,   112,    44,    45,    46,    47,    48,    49,    50,
+      51,    52,    53,    54,    55,    56,    57,    58,     0,    59,
+      60,     0,     0,   113,    44,    45,    46,    47,    48,    49,
+      50,    51,    52,    53,    54,    55,    56,    57,    58,     0,
+      59,    60,     0,     0,   115,    44,    45,    46,    47,    48,
+      49,    50,    51,    52,    53,    54,    55,    56,    57,    58,
+       0,    59,    60,     0,     0,   117,    44,    45,    46,    47,
+      48,    49,    50,    51,    52,    53,    54,    55,    56,    57,
+      58,     0,    59,    60,    45,    46,    47,    48,    49,    50,
+      51,    52,    53,    54,    55,    56,    57,    58,     0,    59,
+      60,    46,    47,    48,    49,    50,    51,    52,    53,    54,
+      55,    56,    57,    58,     0,    59,    60,    -8,    -8,    -8,
+      -8,    -8,    -8,    52,    53,    54,    55,    56,    57,    58,
+       0,    59,    60
+};
+
+#define yypact_value_is_default(yystate) \
+  ((yystate) == (-39))
+
+#define yytable_value_is_error(yytable_value) \
+  ((yytable_value) == (-8))
+
+static const yytype_int8 yycheck[] =
+{
+      19,    20,    38,    39,    42,    43,     3,    26,    27,    28,
+      29,    30,    31,    32,    33,    34,    35,    36,    37,    34,
+      35,    36,    40,    38,    39,    44,    45,    46,    47,    48,
+      49,    50,    51,    52,    53,    54,    55,    56,    57,    58,
+      59,     0,     1,    41,     3,    41,     5,     6,     7,     8,
+       9,    10,    11,    12,    13,    14,    15,    16,    17,    18,
+      19,    20,    21,    32,    33,    34,    35,    36,    41,    38,
+      39,    42,    43,    41,    33,    94,    41,    41,    97,    98,
+      41,   100,    41,    41,   103,   104,    41,    41,    41,     0,
+       3,     3,    22,    -1,    -1,   114,     5,     6,     7,     8,
+       9,    10,    11,    12,    13,    14,    15,    16,    17,    18,
+      19,    20,    21,    -1,    -1,    -1,    -1,    -1,    -1,    -1,
+      -1,    -1,    -1,    -1,    33,    -1,    -1,    -1,    -1,    -1,
+      -1,    -1,    41,     9,    10,    11,    12,    13,    14,    15,
+      16,    17,    18,    19,    20,    21,    -1,    -1,    -1,    -1,
+      -1,    -1,    -1,    -1,    -1,    -1,    -1,    33,    -1,    -1,
+      -1,    -1,    -1,    -1,    -1,    41,    22,    23,    24,    25,
+      26,    27,    28,    29,    30,    31,    32,    33,    34,    35,
+      36,    -1,    38,    39,    -1,    -1,    -1,    43,    22,    23,
+      24,    25,    26,    27,    28,    29,    30,    31,    32,    33,
+      34,    35,    36,    -1,    38,    39,    -1,    -1,    -1,    43,
+      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
+      32,    33,    34,    35,    36,    -1,    38,    39,    -1,    -1,
+      -1,    43,    22,    23,    24,    25,    26,    27,    28,    29,
+      30,    31,    32,    33,    34,    35,    36,    -1,    38,    39,
+      -1,    -1,    -1,    43,    22,    23,    24,    25,    26,    27,
+      28,    29,    30,    31,    32,    33,    34,    35,    36,    -1,
+      38,    39,    -1,    -1,    -1,    43,    22,    23,    24,    25,
+      26,    27,    28,    29,    30,    31,    32,    33,    34,    35,
+      36,    -1,    38,    39,    -1,    -1,    42,    22,    23,    24,
+      25,    26,    27,    28,    29,    30,    31,    32,    33,    34,
+      35,    36,    -1,    38,    39,    -1,    -1,    42,    22,    23,
+      24,    25,    26,    27,    28,    29,    30,    31,    32,    33,
+      34,    35,    36,    -1,    38,    39,    -1,    -1,    42,    22,
+      23,    24,    25,    26,    27,    28,    29,    30,    31,    32,
+      33,    34,    35,    36,    -1,    38,    39,    -1,    -1,    42,
+      22,    23,    24,    25,    26,    27,    28,    29,    30,    31,
+      32,    33,    34,    35,    36,    -1,    38,    39,    -1,    -1,
+      42,    22,    23,    24,    25,    26,    27,    28,    29,    30,
+      31,    32,    33,    34,    35,    36,    -1,    38,    39,    -1,
+      -1,    42,    22,    23,    24,    25,    26,    27,    28,    29,
+      30,    31,    32,    33,    34,    35,    36,    -1,    38,    39,
+      -1,    -1,    42,    22,    23,    24,    25,    26,    27,    28,
+      29,    30,    31,    32,    33,    34,    35,    36,    -1,    38,
+      39,    -1,    -1,    42,    22,    23,    24,    25,    26,    27,
+      28,    29,    30,    31,    32,    33,    34,    35,    36,    -1,
+      38,    39,    -1,    -1,    42,    22,    23,    24,    25,    26,
+      27,    28,    29,    30,    31,    32,    33,    34,    35,    36,
+      -1,    38,    39,    -1,    -1,    42,    22,    23,    24,    25,
+      26,    27,    28,    29,    30,    31,    32,    33,    34,    35,
+      36,    -1,    38,    39,    23,    24,    25,    26,    27,    28,
+      29,    30,    31,    32,    33,    34,    35,    36,    -1,    38,
+      39,    24,    25,    26,    27,    28,    29,    30,    31,    32,
+      33,    34,    35,    36,    -1,    38,    39,    24,    25,    26,
+      27,    28,    29,    30,    31,    32,    33,    34,    35,    36,
+      -1,    38,    39
+};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+   symbol of state STATE-NUM.  */
+static const yytype_uint8 yystos[] =
+{
+       0,     1,     5,     6,     7,     8,     9,    10,    11,    12,
+      13,    14,    15,    16,    17,    18,    19,    20,    21,    33,
+      41,    45,    46,    47,    48,     3,    41,    41,    41,    41,
+      41,    41,    41,    41,    41,    41,    41,    40,    21,    48,
+      48,     0,    47,     3,    22,    23,    24,    25,    26,    27,
+      28,    29,    30,    31,    32,    33,    34,    35,    36,    38,
+      39,    48,    48,    48,    48,    49,    48,    48,    50,    48,
+      48,    48,    48,    48,    48,    42,     3,    48,    48,    48,
+      48,    48,    48,    48,    48,    48,    48,    48,    48,    48,
+      48,    48,    48,    42,    43,    42,    42,    43,    43,    42,
+      43,    42,    42,    43,    43,    42,    48,    48,    48,    48,
+      48,    48,    42,    42,    43,    42,    48,    42
+};
+
+#define yyerrok                (yyerrstatus = 0)
+#define yyclearin      (yychar = YYEMPTY)
+#define YYEMPTY                (-2)
+#define YYEOF          0
+
+#define YYACCEPT       goto yyacceptlab
+#define YYABORT                goto yyabortlab
+#define YYERROR                goto yyerrorlab
+
+
+/* Like YYERROR except do call yyerror.  This remains here temporarily
+   to ease the transition to the new meaning of YYERROR, for GCC.
+   Once GCC version 2 has supplanted version 1, this can go.  However,
+   YYFAIL appears to be in use.  Nevertheless, it is formally deprecated
+   in Bison 2.4.2's NEWS entry, where a plan to phase it out is
+   discussed.  */
+
+#define YYFAIL         goto yyerrlab
+#if defined YYFAIL
+  /* This is here to suppress warnings from the GCC cpp's
+     -Wunused-macros.  Normally we don't worry about that warning, but
+     some users do, and we want to make it easy for users to remove
+     YYFAIL uses, which will produce warnings from Bison 2.5.  */
+#endif
+
+#define YYRECOVERING()  (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value)                                 \
+do                                                             \
+  if (yychar == YYEMPTY && yylen == 1)                         \
+    {                                                          \
+      yychar = (Token);                                                \
+      yylval = (Value);                                                \
+      YYPOPSTACK (1);                                          \
+      goto yybackup;                                           \
+    }                                                          \
+  else                                                         \
+    {                                                          \
+      yyerror (YY_("syntax error: cannot back up")); \
+      YYERROR;                                                 \
+    }                                                          \
+while (YYID (0))
+
+
+#define YYTERROR       1
+#define YYERRCODE      256
+
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+   If N is 0, then set CURRENT to the empty location which ends
+   the previous symbol: RHS[0] (always defined).  */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+# define YYLLOC_DEFAULT(Current, Rhs, N)                               \
+    do                                                                 \
+      if (YYID (N))                                                    \
+       {                                                               \
+         (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;        \
+         (Current).first_column = YYRHSLOC (Rhs, 1).first_column;      \
+         (Current).last_line    = YYRHSLOC (Rhs, N).last_line;         \
+         (Current).last_column  = YYRHSLOC (Rhs, N).last_column;       \
+       }                                                               \
+      else                                                             \
+       {                                                               \
+         (Current).first_line   = (Current).last_line   =              \
+           YYRHSLOC (Rhs, 0).last_line;                                \
+         (Current).first_column = (Current).last_column =              \
+           YYRHSLOC (Rhs, 0).last_column;                              \
+       }                                                               \
+    while (YYID (0))
+#endif
+
+
+/* This macro is provided for backward compatibility. */
+
+#ifndef YY_LOCATION_PRINT
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+#endif
+
+
+/* YYLEX -- calling `yylex' with the right arguments.  */
+
+#ifdef YYLEX_PARAM
+# define YYLEX yylex (YYLEX_PARAM)
+#else
+# define YYLEX yylex ()
+#endif
+
+/* Enable debugging if requested.  */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+#  include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args)                       \
+do {                                           \
+  if (yydebug)                                 \
+    YYFPRINTF Args;                            \
+} while (YYID (0))
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)                   \
+do {                                                                     \
+  if (yydebug)                                                           \
+    {                                                                    \
+      YYFPRINTF (stderr, "%s ", Title);                                          \
+      yy_symbol_print (stderr,                                           \
+                 Type, Value); \
+      YYFPRINTF (stderr, "\n");                                                  \
+    }                                                                    \
+} while (YYID (0))
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_value_print (yyoutput, yytype, yyvaluep)
+    FILE *yyoutput;
+    int yytype;
+    YYSTYPE const * const yyvaluep;
+#endif
+{
+  if (!yyvaluep)
+    return;
+# ifdef YYPRINT
+  if (yytype < YYNTOKENS)
+    YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# else
+  YYUSE (yyoutput);
+# endif
+  switch (yytype)
+    {
+      default:
+       break;
+    }
+}
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_print (yyoutput, yytype, yyvaluep)
+    FILE *yyoutput;
+    int yytype;
+    YYSTYPE const * const yyvaluep;
+#endif
+{
+  if (yytype < YYNTOKENS)
+    YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
+  else
+    YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+  yy_symbol_value_print (yyoutput, yytype, yyvaluep);
+  YYFPRINTF (yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included).                                                   |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
+#else
+static void
+yy_stack_print (yybottom, yytop)
+    yytype_int16 *yybottom;
+    yytype_int16 *yytop;
+#endif
+{
+  YYFPRINTF (stderr, "Stack now");
+  for (; yybottom <= yytop; yybottom++)
+    {
+      int yybot = *yybottom;
+      YYFPRINTF (stderr, " %d", yybot);
+    }
+  YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top)                           \
+do {                                                           \
+  if (yydebug)                                                 \
+    yy_stack_print ((Bottom), (Top));                          \
+} while (YYID (0))
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced.  |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
+#else
+static void
+yy_reduce_print (yyvsp, yyrule)
+    YYSTYPE *yyvsp;
+    int yyrule;
+#endif
+{
+  int yynrhs = yyr2[yyrule];
+  int yyi;
+  unsigned long int yylno = yyrline[yyrule];
+  YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
+            yyrule - 1, yylno);
+  /* The symbols being reduced.  */
+  for (yyi = 0; yyi < yynrhs; yyi++)
+    {
+      YYFPRINTF (stderr, "   $%d = ", yyi + 1);
+      yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
+                      &(yyvsp[(yyi + 1) - (yynrhs)])
+                                      );
+      YYFPRINTF (stderr, "\n");
+    }
+}
+
+# define YY_REDUCE_PRINT(Rule)         \
+do {                                   \
+  if (yydebug)                         \
+    yy_reduce_print (yyvsp, Rule); \
+} while (YYID (0))
+
+/* Nonzero means print parse trace.  It is left uninitialized so that
+   multiple parsers can coexist.  */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks.  */
+#ifndef        YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+   if the built-in stack extension method is used).
+
+   Do not make this value too large; the results are undefined if
+   YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+   evaluated with infinite-precision integer arithmetic.  */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+#if YYERROR_VERBOSE
+
+# ifndef yystrlen
+#  if defined __GLIBC__ && defined _STRING_H
+#   define yystrlen strlen
+#  else
+/* Return the length of YYSTR.  */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T
+yystrlen (const char *yystr)
+#else
+static YYSIZE_T
+yystrlen (yystr)
+    const char *yystr;
+#endif
+{
+  YYSIZE_T yylen;
+  for (yylen = 0; yystr[yylen]; yylen++)
+    continue;
+  return yylen;
+}
+#  endif
+# endif
+
+# ifndef yystpcpy
+#  if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+#   define yystpcpy stpcpy
+#  else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+   YYDEST.  */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static char *
+yystpcpy (char *yydest, const char *yysrc)
+#else
+static char *
+yystpcpy (yydest, yysrc)
+    char *yydest;
+    const char *yysrc;
+#endif
+{
+  char *yyd = yydest;
+  const char *yys = yysrc;
+
+  while ((*yyd++ = *yys++) != '\0')
+    continue;
+
+  return yyd - 1;
+}
+#  endif
+# endif
+
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+   quotes and backslashes, so that it's suitable for yyerror.  The
+   heuristic is that double-quoting is unnecessary unless the string
+   contains an apostrophe, a comma, or backslash (other than
+   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
+   null, do not copy; instead, return the length of what the result
+   would have been.  */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+{
+  if (*yystr == '"')
+    {
+      YYSIZE_T yyn = 0;
+      char const *yyp = yystr;
+
+      for (;;)
+       switch (*++yyp)
+         {
+         case '\'':
+         case ',':
+           goto do_not_strip_quotes;
+
+         case '\\':
+           if (*++yyp != '\\')
+             goto do_not_strip_quotes;
+           /* Fall through.  */
+         default:
+           if (yyres)
+             yyres[yyn] = *yyp;
+           yyn++;
+           break;
+
+         case '"':
+           if (yyres)
+             yyres[yyn] = '\0';
+           return yyn;
+         }
+    do_not_strip_quotes: ;
+    }
+
+  if (! yyres)
+    return yystrlen (yystr);
+
+  return yystpcpy (yyres, yystr) - yyres;
+}
+# endif
+
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+   about the unexpected token YYTOKEN for the state stack whose top is
+   YYSSP.
+
+   Return 0 if *YYMSG was successfully written.  Return 1 if *YYMSG is
+   not large enough to hold the message.  In that case, also set
+   *YYMSG_ALLOC to the required number of bytes.  Return 2 if the
+   required number of bytes is too large to store.  */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+                yytype_int16 *yyssp, int yytoken)
+{
+  YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+  YYSIZE_T yysize = yysize0;
+  YYSIZE_T yysize1;
+  enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+  /* Internationalized format string. */
+  const char *yyformat = 0;
+  /* Arguments of yyformat. */
+  char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+  /* Number of reported tokens (one for the "unexpected", one per
+     "expected"). */
+  int yycount = 0;
+
+  /* There are many possibilities here to consider:
+     - Assume YYFAIL is not used.  It's too flawed to consider.  See
+       <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html>
+       for details.  YYERROR is fine as it does not invoke this
+       function.
+     - If this state is a consistent state with a default action, then
+       the only way this function was invoked is if the default action
+       is an error action.  In that case, don't check for expected
+       tokens because there are none.
+     - The only way there can be no lookahead present (in yychar) is if
+       this state is a consistent state with a default action.  Thus,
+       detecting the absence of a lookahead is sufficient to determine
+       that there is no unexpected or expected token to report.  In that
+       case, just report a simple "syntax error".
+     - Don't assume there isn't a lookahead just because this state is a
+       consistent state with a default action.  There might have been a
+       previous inconsistent state, consistent state with a non-default
+       action, or user semantic action that manipulated yychar.
+     - Of course, the expected token list depends on states to have
+       correct lookahead information, and it depends on the parser not
+       to perform extra reductions after fetching a lookahead from the
+       scanner and before detecting a syntax error.  Thus, state merging
+       (from LALR or IELR) and default reductions corrupt the expected
+       token list.  However, the list is correct for canonical LR with
+       one exception: it will still contain any token that will not be
+       accepted due to an error action in a later state.
+  */
+  if (yytoken != YYEMPTY)
+    {
+      int yyn = yypact[*yyssp];
+      yyarg[yycount++] = yytname[yytoken];
+      if (!yypact_value_is_default (yyn))
+        {
+          /* Start YYX at -YYN if negative to avoid negative indexes in
+             YYCHECK.  In other words, skip the first -YYN actions for
+             this state because they are default actions.  */
+          int yyxbegin = yyn < 0 ? -yyn : 0;
+          /* Stay within bounds of both yycheck and yytname.  */
+          int yychecklim = YYLAST - yyn + 1;
+          int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+          int yyx;
+
+          for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+            if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
+                && !yytable_value_is_error (yytable[yyx + yyn]))
+              {
+                if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+                  {
+                    yycount = 1;
+                    yysize = yysize0;
+                    break;
+                  }
+                yyarg[yycount++] = yytname[yyx];
+                yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+                if (! (yysize <= yysize1
+                       && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+                  return 2;
+                yysize = yysize1;
+              }
+        }
+    }
+
+  switch (yycount)
+    {
+# define YYCASE_(N, S)                      \
+      case N:                               \
+        yyformat = S;                       \
+      break
+      YYCASE_(0, YY_("syntax error"));
+      YYCASE_(1, YY_("syntax error, unexpected %s"));
+      YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
+      YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
+      YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
+      YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
+# undef YYCASE_
+    }
+
+  yysize1 = yysize + yystrlen (yyformat);
+  if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+    return 2;
+  yysize = yysize1;
+
+  if (*yymsg_alloc < yysize)
+    {
+      *yymsg_alloc = 2 * yysize;
+      if (! (yysize <= *yymsg_alloc
+             && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+        *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+      return 1;
+    }
+
+  /* Avoid sprintf, as that infringes on the user's name space.
+     Don't have undefined behavior even if the translation
+     produced a string with the wrong number of "%s"s.  */
+  {
+    char *yyp = *yymsg;
+    int yyi = 0;
+    while ((*yyp = *yyformat) != '\0')
+      if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+        {
+          yyp += yytnamerr (yyp, yyarg[yyi++]);
+          yyformat += 2;
+        }
+      else
+        {
+          yyp++;
+          yyformat++;
+        }
+  }
+  return 0;
+}
+#endif /* YYERROR_VERBOSE */
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol.  |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+static void
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
+#else
+static void
+yydestruct (yymsg, yytype, yyvaluep)
+    const char *yymsg;
+    int yytype;
+    YYSTYPE *yyvaluep;
+#endif
+{
+  YYUSE (yyvaluep);
+
+  if (!yymsg)
+    yymsg = "Deleting";
+  YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
+  switch (yytype)
+    {
+
+      default:
+       break;
+    }
+}
+
+
+/* Prevent warnings from -Wmissing-prototypes.  */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
+/* The lookahead symbol.  */
+int yychar;
+
+/* The semantic value of the lookahead symbol.  */
+YYSTYPE yylval;
+
+/* Number of syntax errors so far.  */
+int yynerrs;
+
+
+/*----------.
+| yyparse.  |
+`----------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void *YYPARSE_PARAM)
+#else
+int
+yyparse (YYPARSE_PARAM)
+    void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+     || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void)
+#else
+int
+yyparse ()
+
+#endif
+#endif
+{
+    int yystate;
+    /* Number of tokens to shift before error messages enabled.  */
+    int yyerrstatus;
+
+    /* The stacks and their tools:
+       `yyss': related to states.
+       `yyvs': related to semantic values.
+
+       Refer to the stacks thru separate pointers, to allow yyoverflow
+       to reallocate them elsewhere.  */
+
+    /* The state stack.  */
+    yytype_int16 yyssa[YYINITDEPTH];
+    yytype_int16 *yyss;
+    yytype_int16 *yyssp;
+
+    /* The semantic value stack.  */
+    YYSTYPE yyvsa[YYINITDEPTH];
+    YYSTYPE *yyvs;
+    YYSTYPE *yyvsp;
+
+    YYSIZE_T yystacksize;
+
+  int yyn;
+  int yyresult;
+  /* Lookahead token as an internal (translated) token number.  */
+  int yytoken;
+  /* The variables used to return semantic value and location from the
+     action routines.  */
+  YYSTYPE yyval;
+
+#if YYERROR_VERBOSE
+  /* Buffer for error messages, and its allocated size.  */
+  char yymsgbuf[128];
+  char *yymsg = yymsgbuf;
+  YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+#define YYPOPSTACK(N)   (yyvsp -= (N), yyssp -= (N))
+
+  /* The number of symbols on the RHS of the reduced rule.
+     Keep to zero when no symbol should be popped.  */
+  int yylen = 0;
+
+  yytoken = 0;
+  yyss = yyssa;
+  yyvs = yyvsa;
+  yystacksize = YYINITDEPTH;
+
+  YYDPRINTF ((stderr, "Starting parse\n"));
+
+  yystate = 0;
+  yyerrstatus = 0;
+  yynerrs = 0;
+  yychar = YYEMPTY; /* Cause a token to be read.  */
+
+  /* Initialize stack pointers.
+     Waste one element of value and location stack
+     so that they stay on the same level as the state stack.
+     The wasted elements are never initialized.  */
+  yyssp = yyss;
+  yyvsp = yyvs;
+
+  goto yysetstate;
+
+/*------------------------------------------------------------.
+| yynewstate -- Push a new state, which is found in yystate.  |
+`------------------------------------------------------------*/
+ yynewstate:
+  /* In all cases, when you get here, the value and location stacks
+     have just been pushed.  So pushing a state here evens the stacks.  */
+  yyssp++;
+
+ yysetstate:
+  *yyssp = yystate;
+
+  if (yyss + yystacksize - 1 <= yyssp)
+    {
+      /* Get the current used size of the three stacks, in elements.  */
+      YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+      {
+       /* Give user a chance to reallocate the stack.  Use copies of
+          these so that the &'s don't force the real ones into
+          memory.  */
+       YYSTYPE *yyvs1 = yyvs;
+       yytype_int16 *yyss1 = yyss;
+
+       /* Each stack pointer address is followed by the size of the
+          data in use in that stack, in bytes.  This used to be a
+          conditional around just the two extra args, but that might
+          be undefined if yyoverflow is a macro.  */
+       yyoverflow (YY_("memory exhausted"),
+                   &yyss1, yysize * sizeof (*yyssp),
+                   &yyvs1, yysize * sizeof (*yyvsp),
+                   &yystacksize);
+
+       yyss = yyss1;
+       yyvs = yyvs1;
+      }
+#else /* no yyoverflow */
+# ifndef YYSTACK_RELOCATE
+      goto yyexhaustedlab;
+# else
+      /* Extend the stack our own way.  */
+      if (YYMAXDEPTH <= yystacksize)
+       goto yyexhaustedlab;
+      yystacksize *= 2;
+      if (YYMAXDEPTH < yystacksize)
+       yystacksize = YYMAXDEPTH;
+
+      {
+       yytype_int16 *yyss1 = yyss;
+       union yyalloc *yyptr =
+         (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+       if (! yyptr)
+         goto yyexhaustedlab;
+       YYSTACK_RELOCATE (yyss_alloc, yyss);
+       YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+#  undef YYSTACK_RELOCATE
+       if (yyss1 != yyssa)
+         YYSTACK_FREE (yyss1);
+      }
+# endif
+#endif /* no yyoverflow */
+
+      yyssp = yyss + yysize - 1;
+      yyvsp = yyvs + yysize - 1;
+
+      YYDPRINTF ((stderr, "Stack size increased to %lu\n",
+                 (unsigned long int) yystacksize));
+
+      if (yyss + yystacksize - 1 <= yyssp)
+       YYABORT;
+    }
+
+  YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+
+  if (yystate == YYFINAL)
+    YYACCEPT;
+
+  goto yybackup;
+
+/*-----------.
+| yybackup.  |
+`-----------*/
+yybackup:
+
+  /* Do appropriate processing given the current state.  Read a
+     lookahead token if we need one and don't already have one.  */
+
+  /* First try to decide what to do without reference to lookahead token.  */
+  yyn = yypact[yystate];
+  if (yypact_value_is_default (yyn))
+    goto yydefault;
+
+  /* Not known => get a lookahead token if don't already have one.  */
+
+  /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol.  */
+  if (yychar == YYEMPTY)
+    {
+      YYDPRINTF ((stderr, "Reading a token: "));
+      yychar = YYLEX;
+    }
+
+  if (yychar <= YYEOF)
+    {
+      yychar = yytoken = YYEOF;
+      YYDPRINTF ((stderr, "Now at end of input.\n"));
+    }
+  else
+    {
+      yytoken = YYTRANSLATE (yychar);
+      YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+    }
+
+  /* If the proper action on seeing token YYTOKEN is to reduce or to
+     detect an error, take that action.  */
+  yyn += yytoken;
+  if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+    goto yydefault;
+  yyn = yytable[yyn];
+  if (yyn <= 0)
+    {
+      if (yytable_value_is_error (yyn))
+        goto yyerrlab;
+      yyn = -yyn;
+      goto yyreduce;
+    }
+
+  /* Count tokens shifted since error; after three, turn off error
+     status.  */
+  if (yyerrstatus)
+    yyerrstatus--;
+
+  /* Shift the lookahead token.  */
+  YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+
+  /* Discard the shifted token.  */
+  yychar = YYEMPTY;
+
+  yystate = yyn;
+  *++yyvsp = yylval;
+
+  goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state.  |
+`-----------------------------------------------------------*/
+yydefault:
+  yyn = yydefact[yystate];
+  if (yyn == 0)
+    goto yyerrlab;
+  goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- Do a reduction.  |
+`-----------------------------*/
+yyreduce:
+  /* yyn is the number of a rule to reduce with.  */
+  yylen = yyr2[yyn];
+
+  /* If YYLEN is nonzero, implement the default value of the action:
+     `$$ = $1'.
+
+     Otherwise, the following line sets YYVAL to garbage.
+     This behavior is undocumented and Bison
+     users should not rely upon it.  Assigning to YYVAL
+     unconditionally makes the parser a bit smaller, and it avoids a
+     GCC warning that YYVAL may be used uninitialized.  */
+  yyval = yyvsp[1-yylen];
+
+
+  YY_REDUCE_PRINT (yyn);
+  switch (yyn)
+    {
+        case 6:
+
+/* Line 1806 of yacc.c  */
+#line 173 "calc.y"
+    { sp = stack[0]; yyerrok; }
+    break;
+
+  case 8:
+
+/* Line 1806 of yacc.c  */
+#line 177 "calc.y"
+    {
+      mpz_out_str (stdout, obase, sp); putchar ('\n');
+      sp--;
+      CHECK_EMPTY ();
+    }
+    break;
+
+  case 9:
+
+/* Line 1806 of yacc.c  */
+#line 182 "calc.y"
+    {
+      CHECK_VARIABLE ((yyvsp[(1) - (3)].var));
+      mpz_swap (variable[(yyvsp[(1) - (3)].var)], sp);
+      sp--;
+      CHECK_EMPTY ();
+    }
+    break;
+
+  case 10:
+
+/* Line 1806 of yacc.c  */
+#line 188 "calc.y"
+    { calc_help (); }
+    break;
+
+  case 11:
+
+/* Line 1806 of yacc.c  */
+#line 189 "calc.y"
+    { ibase = 16; obase = -16; }
+    break;
+
+  case 12:
+
+/* Line 1806 of yacc.c  */
+#line 190 "calc.y"
+    { ibase = 0;  obase = 10; }
+    break;
+
+  case 13:
+
+/* Line 1806 of yacc.c  */
+#line 191 "calc.y"
+    { exit (0); }
+    break;
+
+  case 15:
+
+/* Line 1806 of yacc.c  */
+#line 198 "calc.y"
+    { sp--; mpz_add    (sp, sp, sp+1); }
+    break;
+
+  case 16:
+
+/* Line 1806 of yacc.c  */
+#line 199 "calc.y"
+    { sp--; mpz_sub    (sp, sp, sp+1); }
+    break;
+
+  case 17:
+
+/* Line 1806 of yacc.c  */
+#line 200 "calc.y"
+    { sp--; mpz_mul    (sp, sp, sp+1); }
+    break;
+
+  case 18:
+
+/* Line 1806 of yacc.c  */
+#line 201 "calc.y"
+    { sp--; mpz_fdiv_q (sp, sp, sp+1); }
+    break;
+
+  case 19:
+
+/* Line 1806 of yacc.c  */
+#line 202 "calc.y"
+    { sp--; mpz_fdiv_r (sp, sp, sp+1); }
+    break;
+
+  case 20:
+
+/* Line 1806 of yacc.c  */
+#line 203 "calc.y"
+    { CHECK_UI ("Exponent", sp);
+                    sp--; mpz_pow_ui (sp, sp, mpz_get_ui (sp+1)); }
+    break;
+
+  case 21:
+
+/* Line 1806 of yacc.c  */
+#line 205 "calc.y"
+    { CHECK_UI ("Shift count", sp);
+                    sp--; mpz_mul_2exp (sp, sp, mpz_get_ui (sp+1)); }
+    break;
+
+  case 22:
+
+/* Line 1806 of yacc.c  */
+#line 207 "calc.y"
+    { CHECK_UI ("Shift count", sp);
+                    sp--; mpz_fdiv_q_2exp (sp, sp, mpz_get_ui (sp+1)); }
+    break;
+
+  case 23:
+
+/* Line 1806 of yacc.c  */
+#line 209 "calc.y"
+    { CHECK_UI ("Factorial", sp);
+                    mpz_fac_ui (sp, mpz_get_ui (sp)); }
+    break;
+
+  case 24:
+
+/* Line 1806 of yacc.c  */
+#line 211 "calc.y"
+    { mpz_neg (sp, sp); }
+    break;
+
+  case 25:
+
+/* Line 1806 of yacc.c  */
+#line 213 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <  0); }
+    break;
+
+  case 26:
+
+/* Line 1806 of yacc.c  */
+#line 214 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <= 0); }
+    break;
+
+  case 27:
+
+/* Line 1806 of yacc.c  */
+#line 215 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) == 0); }
+    break;
+
+  case 28:
+
+/* Line 1806 of yacc.c  */
+#line 216 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) != 0); }
+    break;
+
+  case 29:
+
+/* Line 1806 of yacc.c  */
+#line 217 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >= 0); }
+    break;
+
+  case 30:
+
+/* Line 1806 of yacc.c  */
+#line 218 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >  0); }
+    break;
+
+  case 31:
+
+/* Line 1806 of yacc.c  */
+#line 220 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_sgn (sp) && mpz_sgn (sp+1)); }
+    break;
+
+  case 32:
+
+/* Line 1806 of yacc.c  */
+#line 221 "calc.y"
+    { sp--; mpz_set_ui (sp, mpz_sgn (sp) || mpz_sgn (sp+1)); }
+    break;
+
+  case 33:
+
+/* Line 1806 of yacc.c  */
+#line 223 "calc.y"
+    { mpz_abs (sp, sp); }
+    break;
+
+  case 34:
+
+/* Line 1806 of yacc.c  */
+#line 224 "calc.y"
+    { sp--; CHECK_UI ("Binomial base", sp+1);
+                                   mpz_bin_ui (sp, sp, mpz_get_ui (sp+1)); }
+    break;
+
+  case 35:
+
+/* Line 1806 of yacc.c  */
+#line 226 "calc.y"
+    { CHECK_UI ("Fibonacci", sp);
+                                   mpz_fib_ui (sp, mpz_get_ui (sp)); }
+    break;
+
+  case 37:
+
+/* Line 1806 of yacc.c  */
+#line 229 "calc.y"
+    { sp--; mpz_set_si (sp,
+                                         mpz_kronecker (sp, sp+1)); }
+    break;
+
+  case 39:
+
+/* Line 1806 of yacc.c  */
+#line 232 "calc.y"
+    { CHECK_UI ("Lucas number", sp);
+                                   mpz_lucnum_ui (sp, mpz_get_ui (sp)); }
+    break;
+
+  case 40:
+
+/* Line 1806 of yacc.c  */
+#line 234 "calc.y"
+    { mpz_nextprime (sp, sp); }
+    break;
+
+  case 41:
+
+/* Line 1806 of yacc.c  */
+#line 235 "calc.y"
+    { sp -= 2; mpz_powm (sp, sp, sp+1, sp+2); }
+    break;
+
+  case 42:
+
+/* Line 1806 of yacc.c  */
+#line 236 "calc.y"
+    { sp--; CHECK_UI ("Nth-root", sp+1);
+                                   mpz_root (sp, sp, mpz_get_ui (sp+1)); }
+    break;
+
+  case 43:
+
+/* Line 1806 of yacc.c  */
+#line 238 "calc.y"
+    { mpz_sqrt (sp, sp); }
+    break;
+
+  case 44:
+
+/* Line 1806 of yacc.c  */
+#line 240 "calc.y"
+    {
+        sp++;
+        CHECK_OVERFLOW ();
+        CHECK_VARIABLE ((yyvsp[(1) - (1)].var));
+        mpz_set (sp, variable[(yyvsp[(1) - (1)].var)]);
+      }
+    break;
+
+  case 45:
+
+/* Line 1806 of yacc.c  */
+#line 246 "calc.y"
+    {
+        sp++;
+        CHECK_OVERFLOW ();
+        if (mpz_set_str (sp, (yyvsp[(1) - (1)].str), ibase) != 0)
+          {
+            fprintf (stderr, "Invalid number: %s\n", (yyvsp[(1) - (1)].str));
+            YYERROR;
+          }
+      }
+    break;
+
+  case 47:
+
+/* Line 1806 of yacc.c  */
+#line 258 "calc.y"
+    { sp--; mpz_gcd (sp, sp, sp+1); }
+    break;
+
+  case 49:
+
+/* Line 1806 of yacc.c  */
+#line 262 "calc.y"
+    { sp--; mpz_lcm (sp, sp, sp+1); }
+    break;
+
+
+
+/* Line 1806 of yacc.c  */
+#line 2022 "calc.c"
+      default: break;
+    }
+  /* User semantic actions sometimes alter yychar, and that requires
+     that yytoken be updated with the new translation.  We take the
+     approach of translating immediately before every use of yytoken.
+     One alternative is translating here after every semantic action,
+     but that translation would be missed if the semantic action invokes
+     YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+     if it invokes YYBACKUP.  In the case of YYABORT or YYACCEPT, an
+     incorrect destructor might then be invoked immediately.  In the
+     case of YYERROR or YYBACKUP, subsequent parser actions might lead
+     to an incorrect destructor call or verbose syntax error message
+     before the lookahead is translated.  */
+  YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+
+  *++yyvsp = yyval;
+
+  /* Now `shift' the result of the reduction.  Determine what state
+     that goes to, based on the state we popped back to and the rule
+     number reduced by.  */
+
+  yyn = yyr1[yyn];
+
+  yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+  if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+    yystate = yytable[yystate];
+  else
+    yystate = yydefgoto[yyn - YYNTOKENS];
+
+  goto yynewstate;
+
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+  /* Make sure we have latest lookahead translation.  See comments at
+     user semantic actions for why this is necessary.  */
+  yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
+
+  /* If not already recovering from an error, report this error.  */
+  if (!yyerrstatus)
+    {
+      ++yynerrs;
+#if ! YYERROR_VERBOSE
+      yyerror (YY_("syntax error"));
+#else
+# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
+                                        yyssp, yytoken)
+      {
+        char const *yymsgp = YY_("syntax error");
+        int yysyntax_error_status;
+        yysyntax_error_status = YYSYNTAX_ERROR;
+        if (yysyntax_error_status == 0)
+          yymsgp = yymsg;
+        else if (yysyntax_error_status == 1)
+          {
+            if (yymsg != yymsgbuf)
+              YYSTACK_FREE (yymsg);
+            yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+            if (!yymsg)
+              {
+                yymsg = yymsgbuf;
+                yymsg_alloc = sizeof yymsgbuf;
+                yysyntax_error_status = 2;
+              }
+            else
+              {
+                yysyntax_error_status = YYSYNTAX_ERROR;
+                yymsgp = yymsg;
+              }
+          }
+        yyerror (yymsgp);
+        if (yysyntax_error_status == 2)
+          goto yyexhaustedlab;
+      }
+# undef YYSYNTAX_ERROR
+#endif
+    }
+
+
+
+  if (yyerrstatus == 3)
+    {
+      /* If just tried and failed to reuse lookahead token after an
+        error, discard it.  */
+
+      if (yychar <= YYEOF)
+       {
+         /* Return failure if at end of input.  */
+         if (yychar == YYEOF)
+           YYABORT;
+       }
+      else
+       {
+         yydestruct ("Error: discarding",
+                     yytoken, &yylval);
+         yychar = YYEMPTY;
+       }
+    }
+
+  /* Else will try to reuse lookahead token after shifting the error
+     token.  */
+  goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR.  |
+`---------------------------------------------------*/
+yyerrorlab:
+
+  /* Pacify compilers like GCC when the user code never invokes
+     YYERROR and the label yyerrorlab therefore never appears in user
+     code.  */
+  if (/*CONSTCOND*/ 0)
+     goto yyerrorlab;
+
+  /* Do not reclaim the symbols of the rule which action triggered
+     this YYERROR.  */
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+  yystate = *yyssp;
+  goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR.  |
+`-------------------------------------------------------------*/
+yyerrlab1:
+  yyerrstatus = 3;     /* Each real token shifted decrements this.  */
+
+  for (;;)
+    {
+      yyn = yypact[yystate];
+      if (!yypact_value_is_default (yyn))
+       {
+         yyn += YYTERROR;
+         if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+           {
+             yyn = yytable[yyn];
+             if (0 < yyn)
+               break;
+           }
+       }
+
+      /* Pop the current state because it cannot handle the error token.  */
+      if (yyssp == yyss)
+       YYABORT;
+
+
+      yydestruct ("Error: popping",
+                 yystos[yystate], yyvsp);
+      YYPOPSTACK (1);
+      yystate = *yyssp;
+      YY_STACK_PRINT (yyss, yyssp);
+    }
+
+  *++yyvsp = yylval;
+
+
+  /* Shift the error token.  */
+  YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
+  yystate = yyn;
+  goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here.  |
+`-------------------------------------*/
+yyacceptlab:
+  yyresult = 0;
+  goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here.  |
+`-----------------------------------*/
+yyabortlab:
+  yyresult = 1;
+  goto yyreturn;
+
+#if !defined(yyoverflow) || YYERROR_VERBOSE
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here.  |
+`-------------------------------------------------*/
+yyexhaustedlab:
+  yyerror (YY_("memory exhausted"));
+  yyresult = 2;
+  /* Fall through.  */
+#endif
+
+yyreturn:
+  if (yychar != YYEMPTY)
+    {
+      /* Make sure we have latest lookahead translation.  See comments at
+         user semantic actions for why this is necessary.  */
+      yytoken = YYTRANSLATE (yychar);
+      yydestruct ("Cleanup: discarding lookahead",
+                  yytoken, &yylval);
+    }
+  /* Do not reclaim the symbols of the rule which action triggered
+     this YYABORT or YYACCEPT.  */
+  YYPOPSTACK (yylen);
+  YY_STACK_PRINT (yyss, yyssp);
+  while (yyssp != yyss)
+    {
+      yydestruct ("Cleanup: popping",
+                 yystos[*yyssp], yyvsp);
+      YYPOPSTACK (1);
+    }
+#ifndef yyoverflow
+  if (yyss != yyssa)
+    YYSTACK_FREE (yyss);
+#endif
+#if YYERROR_VERBOSE
+  if (yymsg != yymsgbuf)
+    YYSTACK_FREE (yymsg);
+#endif
+  /* Make sure YYID is used.  */
+  return YYID (yyresult);
+}
+
+
+
+/* Line 2067 of yacc.c  */
+#line 264 "calc.y"
+
+
+yyerror (char *s)
+{
+  fprintf (stderr, "%s\n", s);
+}
+
+int calc_option_readline = -1;
+
+int
+main (int argc, char *argv[])
+{
+  int  i;
+
+  for (i = 1; i < argc; i++)
+    {
+      if (strcmp (argv[i], "--readline") == 0)
+        calc_option_readline = 1;
+      else if (strcmp (argv[i], "--noreadline") == 0)
+        calc_option_readline = 0;
+      else if (strcmp (argv[i], "--help") == 0)
+        {
+          printf ("Usage: calc [--option]...\n");
+          printf ("  --readline    use readline\n");
+          printf ("  --noreadline  don't use readline\n");
+          printf ("  --help        this message\n");
+          printf ("Readline is only available when compiled in,\n");
+          printf ("and in that case it's the default on a tty.\n");
+          exit (0);
+        }
+      else
+        {
+          fprintf (stderr, "Unrecognised option: %s\n", argv[i]);
+          exit (1);
+        }
+    }
+
+#if WITH_READLINE
+  calc_init_readline ();
+#else
+  if (calc_option_readline == 1)
+    {
+      fprintf (stderr, "Readline support not available\n");
+      exit (1);
+    }
+#endif
+
+  for (i = 0; i < numberof (variable); i++)
+    mpz_init (variable[i]);
+
+  for (i = 0; i < numberof (stack); i++)
+    mpz_init (stack[i]);
+
+  return yyparse ();
+}
+
diff --git a/demos/calc/calc.h b/demos/calc/calc.h

new file mode 100644 (file)

index 0000000..2afcf0f
--- /dev/null
+++ b/demos/calc/calc.h
@@ -0,0 +1,125 @@
+/* A Bison parser, made by GNU Bison 2.5.  */
+
+/* Bison interface for Yacc-like parsers in C
+   
+      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+   
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+   
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     EOS = 258,
+     BAD = 259,
+     HELP = 260,
+     HEX = 261,
+     DECIMAL = 262,
+     QUIT = 263,
+     ABS = 264,
+     BIN = 265,
+     FIB = 266,
+     GCD = 267,
+     KRON = 268,
+     LCM = 269,
+     LUCNUM = 270,
+     NEXTPRIME = 271,
+     POWM = 272,
+     ROOT = 273,
+     SQRT = 274,
+     NUMBER = 275,
+     VARIABLE = 276,
+     LOR = 277,
+     LAND = 278,
+     GE = 279,
+     LE = 280,
+     NE = 281,
+     EQ = 282,
+     RSHIFT = 283,
+     LSHIFT = 284,
+     UMINUS = 285
+   };
+#endif
+/* Tokens.  */
+#define EOS 258
+#define BAD 259
+#define HELP 260
+#define HEX 261
+#define DECIMAL 262
+#define QUIT 263
+#define ABS 264
+#define BIN 265
+#define FIB 266
+#define GCD 267
+#define KRON 268
+#define LCM 269
+#define LUCNUM 270
+#define NEXTPRIME 271
+#define POWM 272
+#define ROOT 273
+#define SQRT 274
+#define NUMBER 275
+#define VARIABLE 276
+#define LOR 277
+#define LAND 278
+#define GE 279
+#define LE 280
+#define NE 281
+#define EQ 282
+#define RSHIFT 283
+#define LSHIFT 284
+#define UMINUS 285
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 2068 of yacc.c  */
+#line 142 "calc.y"
+
+  char  *str;
+  int   var;
+
+
+
+/* Line 2068 of yacc.c  */
+#line 117 "calc.h"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+extern YYSTYPE yylval;
+
+
diff --git a/demos/calc/calc.y b/demos/calc/calc.y

new file mode 100644 (file)

index 0000000..0f9b1ba
--- /dev/null
+++ b/demos/calc/calc.y
@@ -0,0 +1,318 @@
+%{
+/* A simple integer desk calculator using yacc and gmp.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* This is a simple program, meant only to show one way to use GMP for this
+   sort of thing.  There's few features, and error checking is minimal.
+   Standard input is read, calc_help() below shows the inputs accepted.
+
+   Expressions are evaluated as they're read.  If user defined functions
+   were wanted it'd be necessary to build a parse tree like pexpr.c does, or
+   a list of operations for a stack based evaluator.  That would also make
+   it possible to detect and optimize evaluations "mod m" like pexpr.c does.
+
+   A stack is used for intermediate values in the expression evaluation,
+   separate from the yacc parser stack.  This is simple, makes error
+   recovery easy, minimizes the junk around mpz calls in the rules, and
+   saves initializing or clearing "mpz_t"s during a calculation.  A
+   disadvantage though is that variables must be copied to the stack to be
+   worked on.  A more sophisticated calculator or language system might be
+   able to avoid that when executing a compiled or semi-compiled form.
+
+   Avoiding repeated initializing and clearing of "mpz_t"s is important.  In
+   this program the time spent parsing is obviously much greater than any
+   possible saving from this, but a proper calculator or language should
+   take some trouble over it.  Don't be surprised if an init/clear takes 3
+   or more times as long as a 10 limb addition, depending on the system (see
+   the mpz_init_realloc_clear example in tune/README).  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#define NO_CALC_H /* because it conflicts with normal calc.c stuff */
+#include "calc-common.h"
+
+
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+
+
+void
+calc_help (void)
+{
+  printf ("Examples:\n");
+  printf ("    2+3*4        expressions are evaluated\n");
+  printf ("    x=5^6        variables a to z can be set and used\n");
+  printf ("Operators:\n");
+  printf ("    + - *        arithmetic\n");
+  printf ("    / %%          division and remainder (rounding towards negative infinity)\n");
+  printf ("    ^            exponentiation\n");
+  printf ("    !            factorial\n");
+  printf ("    << >>        left and right shifts\n");
+  printf ("    <= >= >      \\ comparisons, giving 1 if true, 0 if false\n");
+  printf ("    == != <      /\n");
+  printf ("    && ||        logical and/or, giving 1 if true, 0 if false\n");
+  printf ("Functions:\n");
+  printf ("    abs(n)       absolute value\n");
+  printf ("    bin(n,m)     binomial coefficient\n");
+  printf ("    fib(n)       fibonacci number\n");
+  printf ("    gcd(a,b,..)  greatest common divisor\n");
+  printf ("    kron(a,b)    kronecker symbol\n");
+  printf ("    lcm(a,b,..)  least common multiple\n");
+  printf ("    lucnum(n)    lucas number\n");
+  printf ("    nextprime(n) next prime after n\n");
+  printf ("    powm(b,e,m)  modulo powering, b^e%%m\n");
+  printf ("    root(n,r)    r-th root\n");
+  printf ("    sqrt(n)      square root\n");
+  printf ("Other:\n");
+  printf ("    hex          \\ set hex or decimal for input and output\n");
+  printf ("    decimal      /   (\"0x\" can be used for hex too)\n");
+  printf ("    quit         exit program (EOF works too)\n");
+  printf ("    ;            statements are separated with a ; or newline\n");
+  printf ("    \\            continue expressions with \\ before newline\n");
+  printf ("    # xxx        comments are # though to newline\n");
+  printf ("Hex numbers must be entered in upper case, to distinguish them from the\n");
+  printf ("variables a to f (like in bc).\n");
+}
+
+
+int  ibase = 0;
+int  obase = 10;
+
+
+/* The stack is a fixed size, which means there's a limit on the nesting
+   allowed in expressions.  A more sophisticated program could let it grow
+   dynamically.  */
+
+mpz_t    stack[100];
+mpz_ptr  sp = stack[0];
+
+#define CHECK_OVERFLOW()                                                  \
+  if (sp >= stack[numberof(stack)])    /* FIXME */                     \
+    {                                                                     \
+      fprintf (stderr,                                                    \
+               "Value stack overflow, too much nesting in expression\n"); \
+      YYERROR;                                                            \
+    }
+
+#define CHECK_EMPTY()                                                   \
+  if (sp != stack[0])                                                   \
+    {                                                                   \
+      fprintf (stderr, "Oops, expected the value stack to be empty\n"); \
+      sp = stack[0];                                                    \
+    }
+
+
+mpz_t  variable[26];
+
+#define CHECK_VARIABLE(var)                                             \
+  if ((var) < 0 || (var) >= numberof (variable))                        \
+    {                                                                   \
+      fprintf (stderr, "Oops, bad variable somehow: %d\n", var);        \
+      YYERROR;                                                          \
+    }
+
+
+#define CHECK_UI(name,z)                        \
+  if (! mpz_fits_ulong_p (z))                   \
+    {                                           \
+      fprintf (stderr, "%s too big\n", name);   \
+      YYERROR;                                  \
+    }
+
+%}
+
+%union {
+  char  *str;
+  int   var;
+}
+
+%token EOS BAD
+%token HELP HEX DECIMAL QUIT
+%token ABS BIN FIB GCD KRON LCM LUCNUM NEXTPRIME POWM ROOT SQRT
+%token <str> NUMBER
+%token <var> VARIABLE
+
+/* operators, increasing precedence */
+%left     LOR
+%left     LAND
+%nonassoc '<' '>' EQ NE LE GE
+%left     LSHIFT RSHIFT
+%left     '+' '-'
+%left     '*' '/' '%'
+%nonassoc UMINUS
+%right    '^'
+%nonassoc '!'
+
+%%
+
+top:
+  statement
+  | statements statement;
+
+statements:
+  statement EOS
+  | statements statement EOS
+  | error EOS { sp = stack[0]; yyerrok; };
+
+statement:
+  /* empty */
+  | e {
+      mpz_out_str (stdout, obase, sp); putchar ('\n');
+      sp--;
+      CHECK_EMPTY ();
+    }
+  | VARIABLE '=' e {
+      CHECK_VARIABLE ($1);
+      mpz_swap (variable[$1], sp);
+      sp--;
+      CHECK_EMPTY ();
+    }
+  | HELP    { calc_help (); }
+  | HEX     { ibase = 16; obase = -16; }
+  | DECIMAL { ibase = 0;  obase = 10; }
+  | QUIT    { exit (0); };
+
+/* "e" leaves it's value on the top of the mpz stack.  A rule like "e '+' e"
+   will have done a reduction for the first "e" first and the second "e"
+   second, so the code receives the values in that order on the stack.  */
+e:
+    '(' e ')'     /* value on stack */
+    | e '+' e     { sp--; mpz_add    (sp, sp, sp+1); }
+    | e '-' e     { sp--; mpz_sub    (sp, sp, sp+1); }
+    | e '*' e     { sp--; mpz_mul    (sp, sp, sp+1); }
+    | e '/' e     { sp--; mpz_fdiv_q (sp, sp, sp+1); }
+    | e '%' e     { sp--; mpz_fdiv_r (sp, sp, sp+1); }
+    | e '^' e     { CHECK_UI ("Exponent", sp);
+                    sp--; mpz_pow_ui (sp, sp, mpz_get_ui (sp+1)); }
+    | e LSHIFT e  { CHECK_UI ("Shift count", sp);
+                    sp--; mpz_mul_2exp (sp, sp, mpz_get_ui (sp+1)); }
+    | e RSHIFT e  { CHECK_UI ("Shift count", sp);
+                    sp--; mpz_fdiv_q_2exp (sp, sp, mpz_get_ui (sp+1)); }
+    | e '!'       { CHECK_UI ("Factorial", sp);
+                    mpz_fac_ui (sp, mpz_get_ui (sp)); }
+    | '-' e %prec UMINUS   { mpz_neg (sp, sp); }
+
+    | e '<' e     { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <  0); }
+    | e LE  e     { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <= 0); }
+    | e EQ  e     { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) == 0); }
+    | e NE  e     { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) != 0); }
+    | e GE  e     { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >= 0); }
+    | e '>' e     { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >  0); }
+
+    | e LAND e    { sp--; mpz_set_ui (sp, mpz_sgn (sp) && mpz_sgn (sp+1)); }
+    | e LOR e     { sp--; mpz_set_ui (sp, mpz_sgn (sp) || mpz_sgn (sp+1)); }
+
+    | ABS '(' e ')'              { mpz_abs (sp, sp); }
+    | BIN '(' e ',' e ')'        { sp--; CHECK_UI ("Binomial base", sp+1);
+                                   mpz_bin_ui (sp, sp, mpz_get_ui (sp+1)); }
+    | FIB '(' e ')'              { CHECK_UI ("Fibonacci", sp);
+                                   mpz_fib_ui (sp, mpz_get_ui (sp)); }
+    | GCD '(' gcdlist ')'        /* value on stack */
+    | KRON '(' e ',' e ')'       { sp--; mpz_set_si (sp,
+                                         mpz_kronecker (sp, sp+1)); }
+    | LCM '(' lcmlist ')'        /* value on stack */
+    | LUCNUM '(' e ')'           { CHECK_UI ("Lucas number", sp);
+                                   mpz_lucnum_ui (sp, mpz_get_ui (sp)); }
+    | NEXTPRIME '(' e ')'        { mpz_nextprime (sp, sp); }
+    | POWM '(' e ',' e ',' e ')' { sp -= 2; mpz_powm (sp, sp, sp+1, sp+2); }
+    | ROOT '(' e ',' e ')'       { sp--; CHECK_UI ("Nth-root", sp+1);
+                                   mpz_root (sp, sp, mpz_get_ui (sp+1)); }
+    | SQRT '(' e ')'             { mpz_sqrt (sp, sp); }
+
+    | VARIABLE {
+        sp++;
+        CHECK_OVERFLOW ();
+        CHECK_VARIABLE ($1);
+        mpz_set (sp, variable[$1]);
+      }
+    | NUMBER {
+        sp++;
+        CHECK_OVERFLOW ();
+        if (mpz_set_str (sp, $1, ibase) != 0)
+          {
+            fprintf (stderr, "Invalid number: %s\n", $1);
+            YYERROR;
+          }
+      };
+
+gcdlist:
+    e                /* value on stack */
+    | gcdlist ',' e  { sp--; mpz_gcd (sp, sp, sp+1); };
+
+lcmlist:
+    e                /* value on stack */
+    | lcmlist ',' e  { sp--; mpz_lcm (sp, sp, sp+1); };
+
+%%
+
+yyerror (char *s)
+{
+  fprintf (stderr, "%s\n", s);
+}
+
+int calc_option_readline = -1;
+
+int
+main (int argc, char *argv[])
+{
+  int  i;
+
+  for (i = 1; i < argc; i++)
+    {
+      if (strcmp (argv[i], "--readline") == 0)
+        calc_option_readline = 1;
+      else if (strcmp (argv[i], "--noreadline") == 0)
+        calc_option_readline = 0;
+      else if (strcmp (argv[i], "--help") == 0)
+        {
+          printf ("Usage: calc [--option]...\n");
+          printf ("  --readline    use readline\n");
+          printf ("  --noreadline  don't use readline\n");
+          printf ("  --help        this message\n");
+          printf ("Readline is only available when compiled in,\n");
+          printf ("and in that case it's the default on a tty.\n");
+          exit (0);
+        }
+      else
+        {
+          fprintf (stderr, "Unrecognised option: %s\n", argv[i]);
+          exit (1);
+        }
+    }
+
+#if WITH_READLINE
+  calc_init_readline ();
+#else
+  if (calc_option_readline == 1)
+    {
+      fprintf (stderr, "Readline support not available\n");
+      exit (1);
+    }
+#endif
+
+  for (i = 0; i < numberof (variable); i++)
+    mpz_init (variable[i]);
+
+  for (i = 0; i < numberof (stack); i++)
+    mpz_init (stack[i]);
+
+  return yyparse ();
+}
diff --git a/demos/calc/calclex.c b/demos/calc/calclex.c

new file mode 100644 (file)

index 0000000..df6f331
--- /dev/null
+++ b/demos/calc/calclex.c
@@ -0,0 +1,1920 @@
+
+#line 3 "calclex.c"
+
+#define  YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types. 
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t; 
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+#endif /* ! C99 */
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN               (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX               (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX              (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX             (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else  /* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif /* defined (__STDC__) */
+#endif /* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index.  If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition.  This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN (yy_start) = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.  The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START (((yy_start) - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart(yyin  )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#define YY_BUF_SIZE 16384
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE   ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+extern int yyleng;
+
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+    #define YY_LESS_LINENO(n)
+    
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+       do \
+               { \
+               /* Undo effects of setting up yytext. */ \
+        int yyless_macro_arg = (n); \
+        YY_LESS_LINENO(yyless_macro_arg);\
+               *yy_cp = (yy_hold_char); \
+               YY_RESTORE_YY_MORE_OFFSET \
+               (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+               YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+               } \
+       while ( 0 )
+
+#define unput(c) yyunput( c, (yytext_ptr)  )
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+       {
+       FILE *yy_input_file;
+
+       char *yy_ch_buf;                /* input buffer */
+       char *yy_buf_pos;               /* current position in input buffer */
+
+       /* Size of input buffer in bytes, not including room for EOB
+        * characters.
+        */
+       yy_size_t yy_buf_size;
+
+       /* Number of characters read into yy_ch_buf, not including EOB
+        * characters.
+        */
+       int yy_n_chars;
+
+       /* Whether we "own" the buffer - i.e., we know we created it,
+        * and can realloc() it to grow it, and should free() it to
+        * delete it.
+        */
+       int yy_is_our_buffer;
+
+       /* Whether this is an "interactive" input source; if so, and
+        * if we're using stdio for input, then we want to use getc()
+        * instead of fread(), to make sure we stop fetching input after
+        * each newline.
+        */
+       int yy_is_interactive;
+
+       /* Whether we're considered to be at the beginning of a line.
+        * If so, '^' rules will be active on the next match, otherwise
+        * not.
+        */
+       int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+    
+       /* Whether to try to fill the input buffer when we reach the
+        * end of it.
+        */
+       int yy_fill_buffer;
+
+       int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+       /* When an EOF's been seen but there's still some text to process
+        * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+        * shouldn't try reading from the input source any more.  We might
+        * still have a bunch of tokens to match, though, because of
+        * possible backing-up.
+        *
+        * When we actually see the EOF, we change the status to "new"
+        * (via yyrestart()), so that the user can continue scanning by
+        * just pointing yyin at a new input file.
+        */
+#define YY_BUFFER_EOF_PENDING 2
+
+       };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* Stack of input buffers. */
+static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
+static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
+static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
+                          ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
+                          : NULL)
+
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+static int yy_n_chars;         /* number of characters read into yy_ch_buf */
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 0;                /* whether we need to initialize */
+static int yy_start = 0;       /* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin.  A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart (FILE *input_file  );
+void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer  );
+YY_BUFFER_STATE yy_create_buffer (FILE *file,int size  );
+void yy_delete_buffer (YY_BUFFER_STATE b  );
+void yy_flush_buffer (YY_BUFFER_STATE b  );
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer  );
+void yypop_buffer_state (void );
+
+static void yyensure_buffer_stack (void );
+static void yy_load_buffer_state (void );
+static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file  );
+
+#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER )
+
+YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size  );
+YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str  );
+YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len  );
+
+void *yyalloc (yy_size_t  );
+void *yyrealloc (void *,yy_size_t  );
+void yyfree (void *  );
+
+#define yy_new_buffer yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+       { \
+       if ( ! YY_CURRENT_BUFFER ){ \
+        yyensure_buffer_stack (); \
+               YY_CURRENT_BUFFER_LVALUE =    \
+            yy_create_buffer(yyin,YY_BUF_SIZE ); \
+       } \
+       YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+       }
+
+#define yy_set_bol(at_bol) \
+       { \
+       if ( ! YY_CURRENT_BUFFER ){\
+        yyensure_buffer_stack (); \
+               YY_CURRENT_BUFFER_LVALUE =    \
+            yy_create_buffer(yyin,YY_BUF_SIZE ); \
+       } \
+       YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+       }
+
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+typedef unsigned char YY_CHAR;
+
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+
+typedef int yy_state_type;
+
+extern int yylineno;
+
+int yylineno = 1;
+
+extern char *yytext;
+#define yytext_ptr yytext
+
+static yy_state_type yy_get_previous_state (void );
+static yy_state_type yy_try_NUL_trans (yy_state_type current_state  );
+static int yy_get_next_buffer (void );
+static void yy_fatal_error (yyconst char msg[]  );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+       (yytext_ptr) = yy_bp; \
+       yyleng = (size_t) (yy_cp - yy_bp); \
+       (yy_hold_char) = *yy_cp; \
+       *yy_cp = '\0'; \
+       (yy_c_buf_p) = yy_cp;
+
+#define YY_NUM_RULES 19
+#define YY_END_OF_BUFFER 20
+/* This struct is not used in this scanner,
+   but its presence is necessary. */
+struct yy_trans_info
+       {
+       flex_int32_t yy_verify;
+       flex_int32_t yy_nxt;
+       };
+static yyconst flex_int16_t yy_accept[39] =
+    {   0,
+        0,    0,   20,   18,    1,    2,    7,    6,    7,   18,
+       16,   16,    2,    7,    7,    7,   16,   17,   18,   18,
+       11,    6,    5,    6,   14,   16,    0,   12,    8,   10,
+        9,   13,   16,   17,    3,   15,    4,    0
+    } ;
+
+static yyconst flex_int32_t yy_ec[256] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
+        1,    2,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    2,    4,    1,    5,    1,    6,    7,    1,    6,
+        6,    6,    6,    6,    6,    1,    6,    8,    9,    9,
+        9,    9,    9,    9,    9,    9,    9,    1,   10,   11,
+       12,   13,    1,    1,   14,   14,   14,   14,   14,   14,
+       15,   15,   15,   15,   15,   15,   15,   15,   15,   15,
+       15,   15,   15,   15,   15,   15,   15,   16,   15,   15,
+        1,   17,    1,    6,    1,    1,   15,   15,   15,   15,
+
+       15,   15,   15,   15,   15,   15,   15,   15,   15,   15,
+       15,   15,   15,   15,   15,   15,   15,   15,   15,   16,
+       15,   15,    1,   18,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1
+    } ;
+
+static yyconst flex_int32_t yy_meta[19] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    2,    2,    1,
+        1,    1,    1,    2,    3,    2,    1,    1
+    } ;
+
+static yyconst flex_int16_t yy_base[43] =
+    {   0,
+        0,    0,   39,   49,   49,   49,   26,   16,   49,   30,
+       20,   19,   49,    9,   22,   10,    9,    0,   29,   13,
+       49,   23,   49,   24,   49,    0,    0,   49,   49,   49,
+       49,   49,   13,    0,   49,   49,   49,   49,   41,   28,
+       43,   45
+    } ;
+
+static yyconst flex_int16_t yy_def[43] =
+    {   0,
+       38,    1,   38,   38,   38,   38,   38,   39,   38,   38,
+       40,   40,   38,   38,   38,   38,   41,   42,   38,   38,
+       38,   39,   38,   39,   38,   12,   12,   38,   38,   38,
+       38,   38,   41,   42,   38,   38,   38,    0,   38,   38,
+       38,   38
+    } ;
+
+static yyconst flex_int16_t yy_nxt[68] =
+    {   0,
+        4,    5,    6,    7,    8,    9,   10,   11,   12,   13,
+       14,   15,   16,   17,   18,   18,   19,   20,   23,   28,
+       29,   31,   32,   34,   34,   23,   37,   34,   34,   26,
+       36,   35,   24,   30,   38,   27,   25,   21,   38,   24,
+       24,   22,   22,   22,   33,   33,   34,   34,    3,   38,
+       38,   38,   38,   38,   38,   38,   38,   38,   38,   38,
+       38,   38,   38,   38,   38,   38,   38
+    } ;
+
+static yyconst flex_int16_t yy_chk[68] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    8,   14,
+       14,   16,   16,   17,   17,   22,   24,   33,   33,   40,
+       20,   19,    8,   15,   12,   11,   10,    7,    3,   22,
+       24,   39,   39,   39,   41,   41,   42,   42,   38,   38,
+       38,   38,   38,   38,   38,   38,   38,   38,   38,   38,
+       38,   38,   38,   38,   38,   38,   38
+    } ;
+
+static yy_state_type yy_last_accepting_state;
+static char *yy_last_accepting_cpos;
+
+extern int yy_flex_debug;
+int yy_flex_debug = 0;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "calclex.l"
+/* Lexical analyzer for calc program.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+#line 20 "calclex.l"
+#include <string.h>
+#include "calc-common.h"
+
+
+#if WITH_READLINE
+/* Let GNU flex use readline.  See the calcread.c redefined input() for a
+   way that might work for a standard lex too.  */
+#define YY_INPUT(buf,result,max_size)   \
+  result = calc_input (buf, max_size);
+#endif
+
+
+/* Non-zero when reading the second or subsequent line of an expression,
+   used to give a different prompt when using readline.  */
+int  calc_more_input = 0;
+
+
+const struct calc_keywords_t  calc_keywords[] = {
+  { "abs",       ABS },
+  { "bin",       BIN },
+  { "decimal",   DECIMAL },
+  { "fib",       FIB },
+  { "hex",       HEX },
+  { "help",      HELP },
+  { "gcd",       GCD },
+  { "kron",      KRON },
+  { "lcm",       LCM },
+  { "lucnum",    LUCNUM },
+  { "nextprime", NEXTPRIME },
+  { "powm",      POWM },
+  { "quit",      QUIT },
+  { "root",      ROOT },
+  { "sqrt",      SQRT },
+  { NULL }
+};
+#line 526 "calclex.c"
+
+#define INITIAL 0
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+static int yy_init_globals (void );
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy (void );
+
+int yyget_debug (void );
+
+void yyset_debug (int debug_flag  );
+
+YY_EXTRA_TYPE yyget_extra (void );
+
+void yyset_extra (YY_EXTRA_TYPE user_defined  );
+
+FILE *yyget_in (void );
+
+void yyset_in  (FILE * in_str  );
+
+FILE *yyget_out (void );
+
+void yyset_out  (FILE * out_str  );
+
+int yyget_leng (void );
+
+char *yyget_text (void );
+
+int yyget_lineno (void );
+
+void yyset_lineno (int line_number  );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap (void );
+#else
+extern int yywrap (void );
+#endif
+#endif
+
+    static void yyunput (int c,char *buf_ptr  );
+    
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int );
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * );
+#endif
+
+#ifndef YY_NO_INPUT
+
+#ifdef __cplusplus
+static int yyinput (void );
+#else
+static int input (void );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO fwrite( yytext, yyleng, 1, yyout )
+#endif
+
+/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+       if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+               { \
+               int c = '*'; \
+               int n; \
+               for ( n = 0; n < max_size && \
+                            (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+                       buf[n] = (char) c; \
+               if ( c == '\n' ) \
+                       buf[n++] = (char) c; \
+               if ( c == EOF && ferror( yyin ) ) \
+                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
+               result = n; \
+               } \
+       else \
+               { \
+               errno=0; \
+               while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
+                       { \
+                       if( errno != EINTR) \
+                               { \
+                               YY_FATAL_ERROR( "input in flex scanner failed" ); \
+                               break; \
+                               } \
+                       errno=0; \
+                       clearerr(yyin); \
+                       } \
+               }\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex (void);
+
+#define YY_DECL int yylex (void)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+       YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+       register yy_state_type yy_current_state;
+       register char *yy_cp, *yy_bp;
+       register int yy_act;
+    
+#line 57 "calclex.l"
+
+
+#line 711 "calclex.c"
+
+       if ( !(yy_init) )
+               {
+               (yy_init) = 1;
+
+#ifdef YY_USER_INIT
+               YY_USER_INIT;
+#endif
+
+               if ( ! (yy_start) )
+                       (yy_start) = 1; /* first start state */
+
+               if ( ! yyin )
+                       yyin = stdin;
+
+               if ( ! yyout )
+                       yyout = stdout;
+
+               if ( ! YY_CURRENT_BUFFER ) {
+                       yyensure_buffer_stack ();
+                       YY_CURRENT_BUFFER_LVALUE =
+                               yy_create_buffer(yyin,YY_BUF_SIZE );
+               }
+
+               yy_load_buffer_state( );
+               }
+
+       while ( 1 )             /* loops until end-of-file is reached */
+               {
+               yy_cp = (yy_c_buf_p);
+
+               /* Support of yytext. */
+               *yy_cp = (yy_hold_char);
+
+               /* yy_bp points to the position in yy_ch_buf of the start of
+                * the current run.
+                */
+               yy_bp = yy_cp;
+
+               yy_current_state = (yy_start);
+yy_match:
+               do
+                       {
+                       register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
+                       if ( yy_accept[yy_current_state] )
+                               {
+                               (yy_last_accepting_state) = yy_current_state;
+                               (yy_last_accepting_cpos) = yy_cp;
+                               }
+                       while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+                               {
+                               yy_current_state = (int) yy_def[yy_current_state];
+                               if ( yy_current_state >= 39 )
+                                       yy_c = yy_meta[(unsigned int) yy_c];
+                               }
+                       yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+                       ++yy_cp;
+                       }
+               while ( yy_base[yy_current_state] != 49 );
+
+yy_find_action:
+               yy_act = yy_accept[yy_current_state];
+               if ( yy_act == 0 )
+                       { /* have to back up */
+                       yy_cp = (yy_last_accepting_cpos);
+                       yy_current_state = (yy_last_accepting_state);
+                       yy_act = yy_accept[yy_current_state];
+                       }
+
+               YY_DO_BEFORE_ACTION;
+
+do_action:     /* This label is used only to access EOF actions. */
+
+               switch ( yy_act )
+       { /* beginning of action switch */
+                       case 0: /* must back up */
+                       /* undo the effects of YY_DO_BEFORE_ACTION */
+                       *yy_cp = (yy_hold_char);
+                       yy_cp = (yy_last_accepting_cpos);
+                       yy_current_state = (yy_last_accepting_state);
+                       goto yy_find_action;
+
+case 1:
+YY_RULE_SETUP
+#line 59 "calclex.l"
+{ /* white space is skipped */ }
+       YY_BREAK
+case 2:
+/* rule 2 can match eol */
+YY_RULE_SETUP
+#line 61 "calclex.l"
+{ /* semicolon or newline separates statements */
+          calc_more_input = 0;
+          return EOS; }
+       YY_BREAK
+case 3:
+/* rule 3 can match eol */
+YY_RULE_SETUP
+#line 64 "calclex.l"
+{ /* escaped newlines are skipped */ }
+       YY_BREAK
+case 4:
+/* rule 4 can match eol */
+YY_RULE_SETUP
+#line 67 "calclex.l"
+{
+            /* comment through to escaped newline is skipped */ }
+       YY_BREAK
+case 5:
+/* rule 5 can match eol */
+YY_RULE_SETUP
+#line 69 "calclex.l"
+{ /* comment through to newline is a separator */
+            calc_more_input = 0;
+            return EOS; }
+       YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 72 "calclex.l"
+{   /* comment through to EOF skipped */ }
+       YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 75 "calclex.l"
+{ return yytext[0]; }
+       YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 76 "calclex.l"
+{ return LE; }
+       YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 77 "calclex.l"
+{ return GE; }
+       YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 78 "calclex.l"
+{ return EQ; }
+       YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 79 "calclex.l"
+{ return NE; }
+       YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 80 "calclex.l"
+{ return LSHIFT; }
+       YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 81 "calclex.l"
+{ return RSHIFT; }
+       YY_BREAK
+case 14:
+YY_RULE_SETUP
+#line 82 "calclex.l"
+{ return LAND; }
+       YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 83 "calclex.l"
+{ return LOR; }
+       YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 85 "calclex.l"
+{
+        yylval.str = yytext;
+        return NUMBER; }
+       YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 89 "calclex.l"
+{
+        int  i;
+
+        for (i = 0; calc_keywords[i].name != NULL; i++)
+          if (strcmp (yytext, calc_keywords[i].name) == 0)
+            return calc_keywords[i].value;
+
+        if (yytext[0] >= 'a' && yytext[0] <= 'z' && yytext[1] == '\0')
+          {
+            yylval.var = yytext[0] - 'a';
+            return VARIABLE;
+          }
+
+        return BAD;
+}
+       YY_BREAK
+case 18:
+YY_RULE_SETUP
+#line 105 "calclex.l"
+{ return BAD; }
+       YY_BREAK
+case 19:
+YY_RULE_SETUP
+#line 107 "calclex.l"
+ECHO;
+       YY_BREAK
+#line 914 "calclex.c"
+case YY_STATE_EOF(INITIAL):
+       yyterminate();
+
+       case YY_END_OF_BUFFER:
+               {
+               /* Amount of text matched not including the EOB char. */
+               int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
+
+               /* Undo the effects of YY_DO_BEFORE_ACTION. */
+               *yy_cp = (yy_hold_char);
+               YY_RESTORE_YY_MORE_OFFSET
+
+               if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+                       {
+                       /* We're scanning a new file or input source.  It's
+                        * possible that this happened because the user
+                        * just pointed yyin at a new source and called
+                        * yylex().  If so, then we have to assure
+                        * consistency between YY_CURRENT_BUFFER and our
+                        * globals.  Here is the right place to do so, because
+                        * this is the first action (other than possibly a
+                        * back-up) that will match for the new input source.
+                        */
+                       (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+                       YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
+                       YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+                       }
+
+               /* Note that here we test for yy_c_buf_p "<=" to the position
+                * of the first EOB in the buffer, since yy_c_buf_p will
+                * already have been incremented past the NUL character
+                * (since all states make transitions on EOB to the
+                * end-of-buffer state).  Contrast this with the test
+                * in input().
+                */
+               if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+                       { /* This was really a NUL. */
+                       yy_state_type yy_next_state;
+
+                       (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+                       yy_current_state = yy_get_previous_state(  );
+
+                       /* Okay, we're now positioned to make the NUL
+                        * transition.  We couldn't have
+                        * yy_get_previous_state() go ahead and do it
+                        * for us because it doesn't know how to deal
+                        * with the possibility of jamming (and we don't
+                        * want to build jamming into it because then it
+                        * will run more slowly).
+                        */
+
+                       yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+                       yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+                       if ( yy_next_state )
+                               {
+                               /* Consume the NUL. */
+                               yy_cp = ++(yy_c_buf_p);
+                               yy_current_state = yy_next_state;
+                               goto yy_match;
+                               }
+
+                       else
+                               {
+                               yy_cp = (yy_c_buf_p);
+                               goto yy_find_action;
+                               }
+                       }
+
+               else switch ( yy_get_next_buffer(  ) )
+                       {
+                       case EOB_ACT_END_OF_FILE:
+                               {
+                               (yy_did_buffer_switch_on_eof) = 0;
+
+                               if ( yywrap( ) )
+                                       {
+                                       /* Note: because we've taken care in
+                                        * yy_get_next_buffer() to have set up
+                                        * yytext, we can now set up
+                                        * yy_c_buf_p so that if some total
+                                        * hoser (like flex itself) wants to
+                                        * call the scanner after we return the
+                                        * YY_NULL, it'll still work - another
+                                        * YY_NULL will get returned.
+                                        */
+                                       (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+                                       yy_act = YY_STATE_EOF(YY_START);
+                                       goto do_action;
+                                       }
+
+                               else
+                                       {
+                                       if ( ! (yy_did_buffer_switch_on_eof) )
+                                               YY_NEW_FILE;
+                                       }
+                               break;
+                               }
+
+                       case EOB_ACT_CONTINUE_SCAN:
+                               (yy_c_buf_p) =
+                                       (yytext_ptr) + yy_amount_of_matched_text;
+
+                               yy_current_state = yy_get_previous_state(  );
+
+                               yy_cp = (yy_c_buf_p);
+                               yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+                               goto yy_match;
+
+                       case EOB_ACT_LAST_MATCH:
+                               (yy_c_buf_p) =
+                               &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+                               yy_current_state = yy_get_previous_state(  );
+
+                               yy_cp = (yy_c_buf_p);
+                               yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+                               goto yy_find_action;
+                       }
+               break;
+               }
+
+       default:
+               YY_FATAL_ERROR(
+                       "fatal flex scanner internal error--no action found" );
+       } /* end of action switch */
+               } /* end of scanning one token */
+} /* end of yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ *     EOB_ACT_LAST_MATCH -
+ *     EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ *     EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (void)
+{
+       register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+       register char *source = (yytext_ptr);
+       register int number_to_move, i;
+       int ret_val;
+
+       if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
+               YY_FATAL_ERROR(
+               "fatal flex scanner internal error--end of buffer missed" );
+
+       if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+               { /* Don't try to fill the buffer, so this is an EOF. */
+               if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
+                       {
+                       /* We matched a single character, the EOB, so
+                        * treat this as a final EOF.
+                        */
+                       return EOB_ACT_END_OF_FILE;
+                       }
+
+               else
+                       {
+                       /* We matched some text prior to the EOB, first
+                        * process it.
+                        */
+                       return EOB_ACT_LAST_MATCH;
+                       }
+               }
+
+       /* Try to read more data. */
+
+       /* First move last chars to start of buffer. */
+       number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
+
+       for ( i = 0; i < number_to_move; ++i )
+               *(dest++) = *(source++);
+
+       if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+               /* don't do the read, it's not guaranteed to return an EOF,
+                * just force an EOF
+                */
+               YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+       else
+               {
+                       int num_to_read =
+                       YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+               while ( num_to_read <= 0 )
+                       { /* Not enough room in the buffer - grow it. */
+
+                       /* just a shorter name for the current buffer */
+                       YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+
+                       int yy_c_buf_p_offset =
+                               (int) ((yy_c_buf_p) - b->yy_ch_buf);
+
+                       if ( b->yy_is_our_buffer )
+                               {
+                               int new_size = b->yy_buf_size * 2;
+
+                               if ( new_size <= 0 )
+                                       b->yy_buf_size += b->yy_buf_size / 8;
+                               else
+                                       b->yy_buf_size *= 2;
+
+                               b->yy_ch_buf = (char *)
+                                       /* Include room in for 2 EOB chars. */
+                                       yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2  );
+                               }
+                       else
+                               /* Can't grow it, we don't own it. */
+                               b->yy_ch_buf = 0;
+
+                       if ( ! b->yy_ch_buf )
+                               YY_FATAL_ERROR(
+                               "fatal error - scanner input buffer overflow" );
+
+                       (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+                       num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+                                               number_to_move - 1;
+
+                       }
+
+               if ( num_to_read > YY_READ_BUF_SIZE )
+                       num_to_read = YY_READ_BUF_SIZE;
+
+               /* Read in more data. */
+               YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+                       (yy_n_chars), (size_t) num_to_read );
+
+               YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+               }
+
+       if ( (yy_n_chars) == 0 )
+               {
+               if ( number_to_move == YY_MORE_ADJ )
+                       {
+                       ret_val = EOB_ACT_END_OF_FILE;
+                       yyrestart(yyin  );
+                       }
+
+               else
+                       {
+                       ret_val = EOB_ACT_LAST_MATCH;
+                       YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+                               YY_BUFFER_EOF_PENDING;
+                       }
+               }
+
+       else
+               ret_val = EOB_ACT_CONTINUE_SCAN;
+
+       if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+               /* Extend the array by 50%, plus the number we really need. */
+               yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+               YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
+               if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+                       YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+       }
+
+       (yy_n_chars) += number_to_move;
+       YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+       YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+       (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+       return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+    static yy_state_type yy_get_previous_state (void)
+{
+       register yy_state_type yy_current_state;
+       register char *yy_cp;
+    
+       yy_current_state = (yy_start);
+
+       for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
+               {
+               register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+               if ( yy_accept[yy_current_state] )
+                       {
+                       (yy_last_accepting_state) = yy_current_state;
+                       (yy_last_accepting_cpos) = yy_cp;
+                       }
+               while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+                       {
+                       yy_current_state = (int) yy_def[yy_current_state];
+                       if ( yy_current_state >= 39 )
+                               yy_c = yy_meta[(unsigned int) yy_c];
+                       }
+               yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+               }
+
+       return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ *     next_state = yy_try_NUL_trans( current_state );
+ */
+    static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
+{
+       register int yy_is_jam;
+       register char *yy_cp = (yy_c_buf_p);
+
+       register YY_CHAR yy_c = 1;
+       if ( yy_accept[yy_current_state] )
+               {
+               (yy_last_accepting_state) = yy_current_state;
+               (yy_last_accepting_cpos) = yy_cp;
+               }
+       while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+               {
+               yy_current_state = (int) yy_def[yy_current_state];
+               if ( yy_current_state >= 39 )
+                       yy_c = yy_meta[(unsigned int) yy_c];
+               }
+       yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+       yy_is_jam = (yy_current_state == 38);
+
+       return yy_is_jam ? 0 : yy_current_state;
+}
+
+    static void yyunput (int c, register char * yy_bp )
+{
+       register char *yy_cp;
+    
+    yy_cp = (yy_c_buf_p);
+
+       /* undo effects of setting up yytext */
+       *yy_cp = (yy_hold_char);
+
+       if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+               { /* need to shift things up to make room */
+               /* +2 for EOB chars. */
+               register int number_to_move = (yy_n_chars) + 2;
+               register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
+                                       YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
+               register char *source =
+                               &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
+
+               while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+                       *--dest = *--source;
+
+               yy_cp += (int) (dest - source);
+               yy_bp += (int) (dest - source);
+               YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
+                       (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
+
+               if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+                       YY_FATAL_ERROR( "flex scanner push-back overflow" );
+               }
+
+       *--yy_cp = (char) c;
+
+       (yytext_ptr) = yy_bp;
+       (yy_hold_char) = *yy_cp;
+       (yy_c_buf_p) = yy_cp;
+}
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+    static int yyinput (void)
+#else
+    static int input  (void)
+#endif
+
+{
+       int c;
+    
+       *(yy_c_buf_p) = (yy_hold_char);
+
+       if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
+               {
+               /* yy_c_buf_p now points to the character we want to return.
+                * If this occurs *before* the EOB characters, then it's a
+                * valid NUL; if not, then we've hit the end of the buffer.
+                */
+               if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+                       /* This was really a NUL. */
+                       *(yy_c_buf_p) = '\0';
+
+               else
+                       { /* need more input */
+                       int offset = (yy_c_buf_p) - (yytext_ptr);
+                       ++(yy_c_buf_p);
+
+                       switch ( yy_get_next_buffer(  ) )
+                               {
+                               case EOB_ACT_LAST_MATCH:
+                                       /* This happens because yy_g_n_b()
+                                        * sees that we've accumulated a
+                                        * token and flags that we need to
+                                        * try matching the token before
+                                        * proceeding.  But for input(),
+                                        * there's no matching to consider.
+                                        * So convert the EOB_ACT_LAST_MATCH
+                                        * to EOB_ACT_END_OF_FILE.
+                                        */
+
+                                       /* Reset buffer status. */
+                                       yyrestart(yyin );
+
+                                       /*FALLTHROUGH*/
+
+                               case EOB_ACT_END_OF_FILE:
+                                       {
+                                       if ( yywrap( ) )
+                                               return EOF;
+
+                                       if ( ! (yy_did_buffer_switch_on_eof) )
+                                               YY_NEW_FILE;
+#ifdef __cplusplus
+                                       return yyinput();
+#else
+                                       return input();
+#endif
+                                       }
+
+                               case EOB_ACT_CONTINUE_SCAN:
+                                       (yy_c_buf_p) = (yytext_ptr) + offset;
+                                       break;
+                               }
+                       }
+               }
+
+       c = *(unsigned char *) (yy_c_buf_p);    /* cast for 8-bit char's */
+       *(yy_c_buf_p) = '\0';   /* preserve yytext */
+       (yy_hold_char) = *++(yy_c_buf_p);
+
+       return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ * 
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+    void yyrestart  (FILE * input_file )
+{
+    
+       if ( ! YY_CURRENT_BUFFER ){
+        yyensure_buffer_stack ();
+               YY_CURRENT_BUFFER_LVALUE =
+            yy_create_buffer(yyin,YY_BUF_SIZE );
+       }
+
+       yy_init_buffer(YY_CURRENT_BUFFER,input_file );
+       yy_load_buffer_state( );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ * 
+ */
+    void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
+{
+    
+       /* TODO. We should be able to replace this entire function body
+        * with
+        *              yypop_buffer_state();
+        *              yypush_buffer_state(new_buffer);
+     */
+       yyensure_buffer_stack ();
+       if ( YY_CURRENT_BUFFER == new_buffer )
+               return;
+
+       if ( YY_CURRENT_BUFFER )
+               {
+               /* Flush out information for old buffer. */
+               *(yy_c_buf_p) = (yy_hold_char);
+               YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+               YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+               }
+
+       YY_CURRENT_BUFFER_LVALUE = new_buffer;
+       yy_load_buffer_state( );
+
+       /* We don't actually know whether we did this switch during
+        * EOF (yywrap()) processing, but the only time this flag
+        * is looked at is after yywrap() is called, so it's safe
+        * to go ahead and always set it.
+        */
+       (yy_did_buffer_switch_on_eof) = 1;
+}
+
+static void yy_load_buffer_state  (void)
+{
+       (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+       (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+       yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+       (yy_hold_char) = *(yy_c_buf_p);
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ * 
+ * @return the allocated buffer state.
+ */
+    YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
+{
+       YY_BUFFER_STATE b;
+    
+       b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state )  );
+       if ( ! b )
+               YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+       b->yy_buf_size = size;
+
+       /* yy_ch_buf has to be 2 characters longer than the size given because
+        * we need to put in 2 end-of-buffer characters.
+        */
+       b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2  );
+       if ( ! b->yy_ch_buf )
+               YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+       b->yy_is_our_buffer = 1;
+
+       yy_init_buffer(b,file );
+
+       return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with yy_create_buffer()
+ * 
+ */
+    void yy_delete_buffer (YY_BUFFER_STATE  b )
+{
+    
+       if ( ! b )
+               return;
+
+       if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+               YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+       if ( b->yy_is_our_buffer )
+               yyfree((void *) b->yy_ch_buf  );
+
+       yyfree((void *) b  );
+}
+
+#ifndef __cplusplus
+extern int isatty (int );
+#endif /* __cplusplus */
+    
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a yyrestart() or at EOF.
+ */
+    static void yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
+
+{
+       int oerrno = errno;
+    
+       yy_flush_buffer(b );
+
+       b->yy_input_file = file;
+       b->yy_fill_buffer = 1;
+
+    /* If b is the current buffer, then yy_init_buffer was _probably_
+     * called from yyrestart() or through yy_get_next_buffer.
+     * In that case, we don't want to reset the lineno or column.
+     */
+    if (b != YY_CURRENT_BUFFER){
+        b->yy_bs_lineno = 1;
+        b->yy_bs_column = 0;
+    }
+
+        b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+    
+       errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ * 
+ */
+    void yy_flush_buffer (YY_BUFFER_STATE  b )
+{
+       if ( ! b )
+               return;
+
+       b->yy_n_chars = 0;
+
+       /* We always need two end-of-buffer characters.  The first causes
+        * a transition to the end-of-buffer state.  The second causes
+        * a jam in that state.
+        */
+       b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+       b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+       b->yy_buf_pos = &b->yy_ch_buf[0];
+
+       b->yy_at_bol = 1;
+       b->yy_buffer_status = YY_BUFFER_NEW;
+
+       if ( b == YY_CURRENT_BUFFER )
+               yy_load_buffer_state( );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ *  the current state. This function will allocate the stack
+ *  if necessary.
+ *  @param new_buffer The new state.
+ *  
+ */
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+{
+       if (new_buffer == NULL)
+               return;
+
+       yyensure_buffer_stack();
+
+       /* This block is copied from yy_switch_to_buffer. */
+       if ( YY_CURRENT_BUFFER )
+               {
+               /* Flush out information for old buffer. */
+               *(yy_c_buf_p) = (yy_hold_char);
+               YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+               YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+               }
+
+       /* Only push if top exists. Otherwise, replace top. */
+       if (YY_CURRENT_BUFFER)
+               (yy_buffer_stack_top)++;
+       YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+       /* copied from yy_switch_to_buffer. */
+       yy_load_buffer_state( );
+       (yy_did_buffer_switch_on_eof) = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ *  The next element becomes the new top.
+ *  
+ */
+void yypop_buffer_state (void)
+{
+       if (!YY_CURRENT_BUFFER)
+               return;
+
+       yy_delete_buffer(YY_CURRENT_BUFFER );
+       YY_CURRENT_BUFFER_LVALUE = NULL;
+       if ((yy_buffer_stack_top) > 0)
+               --(yy_buffer_stack_top);
+
+       if (YY_CURRENT_BUFFER) {
+               yy_load_buffer_state( );
+               (yy_did_buffer_switch_on_eof) = 1;
+       }
+}
+
+/* Allocates the stack if it does not exist.
+ *  Guarantees space for at least one push.
+ */
+static void yyensure_buffer_stack (void)
+{
+       int num_to_alloc;
+    
+       if (!(yy_buffer_stack)) {
+
+               /* First allocation is just for 2 elements, since we don't know if this
+                * scanner will even need a stack. We use 2 instead of 1 to avoid an
+                * immediate realloc on the next call.
+         */
+               num_to_alloc = 1;
+               (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
+                                                               (num_to_alloc * sizeof(struct yy_buffer_state*)
+                                                               );
+               if ( ! (yy_buffer_stack) )
+                       YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+                                                                 
+               memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+                               
+               (yy_buffer_stack_max) = num_to_alloc;
+               (yy_buffer_stack_top) = 0;
+               return;
+       }
+
+       if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
+
+               /* Increase the buffer to prepare for a possible push. */
+               int grow_size = 8 /* arbitrary grow size */;
+
+               num_to_alloc = (yy_buffer_stack_max) + grow_size;
+               (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
+                                                               ((yy_buffer_stack),
+                                                               num_to_alloc * sizeof(struct yy_buffer_state*)
+                                                               );
+               if ( ! (yy_buffer_stack) )
+                       YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+               /* zero only the new slots.*/
+               memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
+               (yy_buffer_stack_max) = num_to_alloc;
+       }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ * 
+ * @return the newly allocated buffer state object. 
+ */
+YY_BUFFER_STATE yy_scan_buffer  (char * base, yy_size_t  size )
+{
+       YY_BUFFER_STATE b;
+    
+       if ( size < 2 ||
+            base[size-2] != YY_END_OF_BUFFER_CHAR ||
+            base[size-1] != YY_END_OF_BUFFER_CHAR )
+               /* They forgot to leave room for the EOB's. */
+               return 0;
+
+       b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state )  );
+       if ( ! b )
+               YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+       b->yy_buf_size = size - 2;      /* "- 2" to take care of EOB's */
+       b->yy_buf_pos = b->yy_ch_buf = base;
+       b->yy_is_our_buffer = 0;
+       b->yy_input_file = 0;
+       b->yy_n_chars = b->yy_buf_size;
+       b->yy_is_interactive = 0;
+       b->yy_at_bol = 1;
+       b->yy_fill_buffer = 0;
+       b->yy_buffer_status = YY_BUFFER_NEW;
+
+       yy_switch_to_buffer(b  );
+
+       return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ * 
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ *       yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE yy_scan_string (yyconst char * yystr )
+{
+    
+       return yy_scan_bytes(yystr,strlen(yystr) );
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param bytes the byte buffer to scan
+ * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * 
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
+{
+       YY_BUFFER_STATE b;
+       char *buf;
+       yy_size_t n;
+       int i;
+    
+       /* Get memory for full buffer, including space for trailing EOB's. */
+       n = _yybytes_len + 2;
+       buf = (char *) yyalloc(n  );
+       if ( ! buf )
+               YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+       for ( i = 0; i < _yybytes_len; ++i )
+               buf[i] = yybytes[i];
+
+       buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+       b = yy_scan_buffer(buf,n );
+       if ( ! b )
+               YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+       /* It's okay to grow etc. this buffer, and we should throw it
+        * away when we're done.
+        */
+       b->yy_is_our_buffer = 1;
+
+       return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yy_fatal_error (yyconst char* msg )
+{
+       (void) fprintf( stderr, "%s\n", msg );
+       exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+       do \
+               { \
+               /* Undo effects of setting up yytext. */ \
+        int yyless_macro_arg = (n); \
+        YY_LESS_LINENO(yyless_macro_arg);\
+               yytext[yyleng] = (yy_hold_char); \
+               (yy_c_buf_p) = yytext + yyless_macro_arg; \
+               (yy_hold_char) = *(yy_c_buf_p); \
+               *(yy_c_buf_p) = '\0'; \
+               yyleng = yyless_macro_arg; \
+               } \
+       while ( 0 )
+
+/* Accessor  methods (get/set functions) to struct members. */
+
+/** Get the current line number.
+ * 
+ */
+int yyget_lineno  (void)
+{
+        
+    return yylineno;
+}
+
+/** Get the input stream.
+ * 
+ */
+FILE *yyget_in  (void)
+{
+        return yyin;
+}
+
+/** Get the output stream.
+ * 
+ */
+FILE *yyget_out  (void)
+{
+        return yyout;
+}
+
+/** Get the length of the current token.
+ * 
+ */
+int yyget_leng  (void)
+{
+        return yyleng;
+}
+
+/** Get the current token.
+ * 
+ */
+
+char *yyget_text  (void)
+{
+        return yytext;
+}
+
+/** Set the current line number.
+ * @param line_number
+ * 
+ */
+void yyset_lineno (int  line_number )
+{
+    
+    yylineno = line_number;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param in_str A readable stream.
+ * 
+ * @see yy_switch_to_buffer
+ */
+void yyset_in (FILE *  in_str )
+{
+        yyin = in_str ;
+}
+
+void yyset_out (FILE *  out_str )
+{
+        yyout = out_str ;
+}
+
+int yyget_debug  (void)
+{
+        return yy_flex_debug;
+}
+
+void yyset_debug (int  bdebug )
+{
+        yy_flex_debug = bdebug ;
+}
+
+static int yy_init_globals (void)
+{
+        /* Initialization is the same as for the non-reentrant scanner.
+     * This function is called from yylex_destroy(), so don't allocate here.
+     */
+
+    (yy_buffer_stack) = 0;
+    (yy_buffer_stack_top) = 0;
+    (yy_buffer_stack_max) = 0;
+    (yy_c_buf_p) = (char *) 0;
+    (yy_init) = 0;
+    (yy_start) = 0;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+    yyin = stdin;
+    yyout = stdout;
+#else
+    yyin = (FILE *) 0;
+    yyout = (FILE *) 0;
+#endif
+
+    /* For future reference: Set errno on error, since we are called by
+     * yylex_init()
+     */
+    return 0;
+}
+
+/* yylex_destroy is for both reentrant and non-reentrant scanners. */
+int yylex_destroy  (void)
+{
+    
+    /* Pop the buffer stack, destroying each element. */
+       while(YY_CURRENT_BUFFER){
+               yy_delete_buffer(YY_CURRENT_BUFFER  );
+               YY_CURRENT_BUFFER_LVALUE = NULL;
+               yypop_buffer_state();
+       }
+
+       /* Destroy the stack itself. */
+       yyfree((yy_buffer_stack) );
+       (yy_buffer_stack) = NULL;
+
+    /* Reset the globals. This is important in a non-reentrant scanner so the next time
+     * yylex() is called, initialization will occur. */
+    yy_init_globals( );
+
+    return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
+{
+       register int i;
+       for ( i = 0; i < n; ++i )
+               s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * s )
+{
+       register int n;
+       for ( n = 0; s[n]; ++n )
+               ;
+
+       return n;
+}
+#endif
+
+void *yyalloc (yy_size_t  size )
+{
+       return (void *) malloc( size );
+}
+
+void *yyrealloc  (void * ptr, yy_size_t  size )
+{
+       /* The cast to (char *) in the following accommodates both
+        * implementations that use char* generic pointers, and those
+        * that use void* generic pointers.  It works with the latter
+        * because both ANSI C and C++ allow castless assignment from
+        * any pointer type to void*, and deal with argument conversions
+        * as though doing an assignment.
+        */
+       return (void *) realloc( (char *) ptr, size );
+}
+
+void yyfree (void * ptr )
+{
+       free( (char *) ptr );   /* see yyrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+#line 107 "calclex.l"
+
+
+
+int
+yywrap ()
+{
+  return 1;
+}
+
diff --git a/demos/calc/calclex.l b/demos/calc/calclex.l

new file mode 100644 (file)

index 0000000..32d4fc2
--- /dev/null
+++ b/demos/calc/calclex.l
@@ -0,0 +1,113 @@
+/* Lexical analyzer for calc program.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+%{
+#include <string.h>
+#include "calc-common.h"
+
+
+#if WITH_READLINE
+/* Let GNU flex use readline.  See the calcread.c redefined input() for a
+   way that might work for a standard lex too.  */
+#define YY_INPUT(buf,result,max_size)   \
+  result = calc_input (buf, max_size);
+#endif
+
+
+/* Non-zero when reading the second or subsequent line of an expression,
+   used to give a different prompt when using readline.  */
+int  calc_more_input = 0;
+
+
+const struct calc_keywords_t  calc_keywords[] = {
+  { "abs",       ABS },
+  { "bin",       BIN },
+  { "decimal",   DECIMAL },
+  { "fib",       FIB },
+  { "hex",       HEX },
+  { "help",      HELP },
+  { "gcd",       GCD },
+  { "kron",      KRON },
+  { "lcm",       LCM },
+  { "lucnum",    LUCNUM },
+  { "nextprime", NEXTPRIME },
+  { "powm",      POWM },
+  { "quit",      QUIT },
+  { "root",      ROOT },
+  { "sqrt",      SQRT },
+  { NULL }
+};
+%}
+
+%%
+
+[ \t\f] { /* white space is skipped */ }
+
+[;\n]   { /* semicolon or newline separates statements */
+          calc_more_input = 0;
+          return EOS; }
+\\\n    { /* escaped newlines are skipped */ }
+
+
+#(([^\\\n]*)\\)+\n {
+            /* comment through to escaped newline is skipped */ }
+#[^\n]*\n { /* comment through to newline is a separator */
+            calc_more_input = 0;
+            return EOS; }
+#[^\n]* {   /* comment through to EOF skipped */ }
+
+
+[-+*/%()<>^!=,] { return yytext[0]; }
+"<="    { return LE; }
+">="    { return GE; }
+"=="    { return EQ; }
+"!="    { return NE; }
+"<<"    { return LSHIFT; }
+">>"    { return RSHIFT; }
+"&&"    { return LAND; }
+"||"    { return LOR; }
+
+(0[xX])?[0-9A-F]+ {
+        yylval.str = yytext;
+        return NUMBER; }
+
+[a-zA-Z][a-zA-Z0-9]* {
+        int  i;
+
+        for (i = 0; calc_keywords[i].name != NULL; i++)
+          if (strcmp (yytext, calc_keywords[i].name) == 0)
+            return calc_keywords[i].value;
+
+        if (yytext[0] >= 'a' && yytext[0] <= 'z' && yytext[1] == '\0')
+          {
+            yylval.var = yytext[0] - 'a';
+            return VARIABLE;
+          }
+
+        return BAD;
+}
+
+. { return BAD; }
+
+%%
+
+int
+yywrap ()
+{
+  return 1;
+}
diff --git a/demos/calc/calcread.c b/demos/calc/calcread.c

new file mode 100644 (file)

index 0000000..53291ff
--- /dev/null
+++ b/demos/calc/calcread.c
@@ -0,0 +1,146 @@
+/* Readline support for calc program.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "calc-common.h"
+
+#if WITH_READLINE
+#include <stdio.h>   /* for FILE for old versions of readline/readline.h */
+#include <stdlib.h>  /* for free */
+#include <string.h>  /* for strdup */
+#include <unistd.h>  /* for isatty */
+#include <readline/readline.h>
+#include <readline/history.h>
+
+#include "gmp.h"
+
+
+/* change this to "#define TRACE(x) x" for a few diagnostics */
+#define TRACE(x)
+
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+
+char *
+calc_completion_entry (const char *text, int state)
+{
+  static int  index, len;
+  char  *name;
+
+  if (!state)
+    {
+      index = 0;
+      len = strlen (text);
+    }
+  TRACE (printf ("calc_completion_entry %s %d, index=%d len=%d\n",
+                text, state, index, len));
+  while ((name = calc_keywords[index].name) != NULL)
+    {
+      index++;
+      if (memcmp (name, text, len) == 0)
+       return (strdup (name));
+    }
+  return NULL;
+}
+
+void
+calc_init_readline (void)
+{
+  /* By default use readline when the input is a tty.  It's a bit contrary
+     to the GNU interface conventions to make the behaviour depend on where
+     the input is coming from, but this is pretty convenient.  */
+  if (calc_option_readline == -1)
+    {
+      calc_option_readline = isatty (fileno (stdin));
+      TRACE (printf ("calc_option_readline %d\n", calc_option_readline));
+    }
+
+  if (calc_option_readline)
+    {
+      printf ("GNU MP demo calculator program, gmp version %s\n", gmp_version);
+      printf ("Type \"help\" for help.\n");
+      rl_readline_name = "gmp-calc";
+      rl_completion_entry_function = calc_completion_entry;
+    }
+}
+
+
+/* This function is supposed to return YY_NULL to indicate EOF, but that
+   constant is only in calclex.c and we don't want to clutter calclex.l with
+   this readline stuff, so instead just hard code 0 for YY_NULL.  That's
+   it's defined value on unix anyway.  */
+
+int
+calc_input (char *buf, size_t max_size)
+{
+  if (calc_option_readline)
+    {
+      static char    *line = NULL;
+      static size_t  line_size = 0;
+      static size_t  upto = 0;
+      size_t         copy_size;
+
+      if (upto >= line_size)
+       {
+         if (line != NULL)
+           free (line);
+
+         line = readline (calc_more_input ? "more> " : "> ");
+         calc_more_input = 1;
+         if (line == NULL)
+           return 0;
+         TRACE (printf ("readline: %s\n", line));
+
+         if (line[0] != '\0')
+           add_history (line);
+
+         line_size = strlen (line);
+         line[line_size] = '\n';
+         line_size++;
+         upto = 0;
+       }
+
+      copy_size = MIN (line_size-upto, max_size);
+      memcpy (buf, line+upto, copy_size);
+      upto += copy_size;
+      return copy_size;
+    }
+  else
+    {
+      /* not readline */
+      return fread (buf, 1, max_size, stdin);
+    }
+}
+
+
+/* This redefined input() might let a traditional lex use the readline
+   support here.  Apparently POSIX doesn't specify whether an override like
+   this will work, so maybe it'll work or maybe it won't.  This function is
+   also not particularly efficient, but don't worry about that, since flex
+   is the preferred parser.  */
+
+int
+input (void)
+{
+  char  c;
+  if (calc_input (&c, 1) != 1)
+    return EOF;
+  else
+    return (int) c;
+}
+
+#endif /* WITH_READLINE */
diff --git a/demos/expr/Makefile.am b/demos/expr/Makefile.am

new file mode 100644 (file)

index 0000000..333cacc
--- /dev/null
+++ b/demos/expr/Makefile.am
@@ -0,0 +1,43 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+
+# FIXME: This is a workaround for a bug in automake 1.8.4.  When the only
+# library is in EXTRA_LIBRARIES, $(ARFLAGS) is used but no default setting
+# for that variable is established.  We give an explicit ARFLAGS=cru the
+# same as generated for lib_LIBRARIES or noinst_LIBRARIES.
+#
+ARFLAGS = cru
+
+EXTRA_LIBRARIES = libexpr.a
+libexpr_a_SOURCES = expr.h expr-impl.h \
+  expr.c exprv.c exprz.c exprza.c exprq.c exprqa.c exprf.c exprfa.c
+
+EXTRA_PROGRAMS = run-expr t-expr
+LDADD = libexpr.a $(top_builddir)/libgmp.la
+t_expr_LDADD = $(top_builddir)/tests/libtests.la $(LDADD)
+
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LIBRARIES)
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/demos/expr/Makefile.in b/demos/expr/Makefile.in

new file mode 100644 (file)

index 0000000..5f95947
--- /dev/null
+++ b/demos/expr/Makefile.in
@@ -0,0 +1,570 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+EXTRA_PROGRAMS = run-expr$(EXEEXT) t-expr$(EXEEXT)
+subdir = demos/expr
+DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+libexpr_a_AR = $(AR) $(ARFLAGS)
+libexpr_a_LIBADD =
+am_libexpr_a_OBJECTS = expr$U.$(OBJEXT) exprv$U.$(OBJEXT) \
+       exprz$U.$(OBJEXT) exprza$U.$(OBJEXT) exprq$U.$(OBJEXT) \
+       exprqa$U.$(OBJEXT) exprf$U.$(OBJEXT) exprfa$U.$(OBJEXT)
+libexpr_a_OBJECTS = $(am_libexpr_a_OBJECTS)
+run_expr_SOURCES = run-expr.c
+run_expr_OBJECTS = run-expr$U.$(OBJEXT)
+run_expr_LDADD = $(LDADD)
+run_expr_DEPENDENCIES = libexpr.a $(top_builddir)/libgmp.la
+t_expr_SOURCES = t-expr.c
+t_expr_OBJECTS = t-expr$U.$(OBJEXT)
+t_expr_DEPENDENCIES = $(top_builddir)/tests/libtests.la $(LDADD)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libexpr_a_SOURCES) run-expr.c t-expr.c
+DIST_SOURCES = $(libexpr_a_SOURCES) run-expr.c t-expr.c
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+
+# FIXME: This is a workaround for a bug in automake 1.8.4.  When the only
+# library is in EXTRA_LIBRARIES, $(ARFLAGS) is used but no default setting
+# for that variable is established.  We give an explicit ARFLAGS=cru the
+# same as generated for lib_LIBRARIES or noinst_LIBRARIES.
+#
+ARFLAGS = cru
+EXTRA_LIBRARIES = libexpr.a
+libexpr_a_SOURCES = expr.h expr-impl.h \
+  expr.c exprv.c exprz.c exprza.c exprq.c exprqa.c exprf.c exprfa.c
+
+LDADD = libexpr.a $(top_builddir)/libgmp.la
+t_expr_LDADD = $(top_builddir)/tests/libtests.la $(LDADD)
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LIBRARIES)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps demos/expr/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps demos/expr/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+libexpr.a: $(libexpr_a_OBJECTS) $(libexpr_a_DEPENDENCIES) 
+       -rm -f libexpr.a
+       $(libexpr_a_AR) libexpr.a $(libexpr_a_OBJECTS) $(libexpr_a_LIBADD)
+       $(RANLIB) libexpr.a
+run-expr$(EXEEXT): $(run_expr_OBJECTS) $(run_expr_DEPENDENCIES) 
+       @rm -f run-expr$(EXEEXT)
+       $(LINK) $(run_expr_OBJECTS) $(run_expr_LDADD) $(LIBS)
+t-expr$(EXEEXT): $(t_expr_OBJECTS) $(t_expr_DEPENDENCIES) 
+       @rm -f t-expr$(EXEEXT)
+       $(LINK) $(t_expr_OBJECTS) $(t_expr_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+expr_.c: expr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/expr.c; then echo $(srcdir)/expr.c; else echo expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprf_.c: exprf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprf.c; then echo $(srcdir)/exprf.c; else echo exprf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprfa_.c: exprfa.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprfa.c; then echo $(srcdir)/exprfa.c; else echo exprfa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprq_.c: exprq.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprq.c; then echo $(srcdir)/exprq.c; else echo exprq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprqa_.c: exprqa.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprqa.c; then echo $(srcdir)/exprqa.c; else echo exprqa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprv_.c: exprv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprv.c; then echo $(srcdir)/exprv.c; else echo exprv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprz_.c: exprz.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprz.c; then echo $(srcdir)/exprz.c; else echo exprz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+exprza_.c: exprza.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprza.c; then echo $(srcdir)/exprza.c; else echo exprza.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+run-expr_.c: run-expr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/run-expr.c; then echo $(srcdir)/run-expr.c; else echo run-expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-expr_.c: t-expr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-expr.c; then echo $(srcdir)/t-expr.c; else echo t-expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+expr_.$(OBJEXT) expr_.lo exprf_.$(OBJEXT) exprf_.lo exprfa_.$(OBJEXT) \
+exprfa_.lo exprq_.$(OBJEXT) exprq_.lo exprqa_.$(OBJEXT) exprqa_.lo \
+exprv_.$(OBJEXT) exprv_.lo exprz_.$(OBJEXT) exprz_.lo \
+exprza_.$(OBJEXT) exprza_.lo run-expr_.$(OBJEXT) run-expr_.lo \
+t-expr_.$(OBJEXT) t-expr_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool ctags distclean distclean-compile \
+       distclean-generic distclean-libtool distclean-tags distdir dvi \
+       dvi-am html html-am info info-am install install-am \
+       install-data install-data-am install-dvi install-dvi-am \
+       install-exec install-exec-am install-html install-html-am \
+       install-info install-info-am install-man install-pdf \
+       install-pdf-am install-ps install-ps-am install-strip \
+       installcheck installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/demos/expr/README b/demos/expr/README

new file mode 100644 (file)

index 0000000..f80b4d7
--- /dev/null
+++ b/demos/expr/README
@@ -0,0 +1,490 @@
+Copyright 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+
+                    GMP EXPRESSION EVALUATION
+                    -------------------------
+
+
+
+THIS CODE IS PRELIMINARY AND MAY BE SUBJECT TO INCOMPATIBLE CHANGES IN
+FUTURE VERSIONS OF GMP.
+
+
+
+The files in this directory implement a simple scheme of string based
+expression parsing and evaluation, supporting mpz, mpq and mpf.
+
+This will be slower than direct GMP library calls, but may be convenient in
+various circumstances, such as while prototyping, or for letting a user
+enter values in symbolic form.  "2**5723-7" for example is a lot easier to
+enter or maintain than the equivalent written out in decimal.
+
+
+
+BUILDING
+
+Nothing in this directory is a normal part of libgmp, and nothing is built
+or installed, but various Makefile rules are available to compile
+everything.
+
+All the functions are available through a little library (there's no shared
+library since upward binary compatibility is not guaranteed).
+
+       make libexpr.a
+
+In a program, prototypes are available using
+
+       #include "expr.h"
+
+run-expr.c is a sample program doing evaluations from the command line.
+
+       make run-expr
+       ./run-expr '1+2*3'
+
+t-expr.c is self-test program, it prints nothing if successful.
+
+       make t-expr
+       ./t-expr
+
+The expr*.c sources don't depend on gmp-impl.h and can be compiled with just
+a standard installed GMP.  This isn't true of t-expr though, since it uses
+some of the internal tests/libtests.la.
+
+
+
+SIMPLE USAGE
+
+int mpz_expr (mpz_t res, int base, const char *e, ...);
+int mpq_expr (mpq_t res, int base, const char *e, ...);
+int mpf_expr (mpf_t res, int base, const char *e, ...);
+
+These functions evaluate simple arithmetic expressions.  For example,
+
+       mpz_expr (result, 0, "123+456", NULL);
+
+Numbers are parsed by mpz_expr and mpq_expr the same as mpz_set_str with the
+given base.  mpf_expr follows mpf_set_str, but supporting an "0x" prefix for
+hex when base==0.
+
+       mpz_expr (result, 0, "0xAAAA * 0x5555", NULL);
+
+White space, as indicated by <ctype.h> isspace(), is ignored except for the
+purpose of separating tokens.
+
+Variables can be included in expressions by putting them in the varargs list
+after the string.  "a", "b", "c" etc in the expression string designate
+those values.  For example,
+
+        mpq_t  foo, bar;
+        ...
+       mpq_expr (q, 10, "2/3 + 1/a + b/2", foo, bar, NULL);
+
+Here "a" will be the value from foo and "b" from bar.  Up to 26 variables
+can be included this way.  The NULL must be present to indicate the end of
+the list.
+
+Variables can also be written "$a", "$b" etc.  This is necessary when using
+bases greater than 10 since plain "a", "b" etc will otherwise be interpreted
+as numbers.  For example,
+
+        mpf_t  quux;
+        mpf_expr (f, 16, "F00F@-6 * $a", quux, NULL);
+
+All the standard C operators are available, with the usual precedences, plus
+"**" for exponentiation at the highest precedence (and right associative).
+
+        Operators      Precedence
+         **              220
+         ~ ! - (unary)   210
+         * / %           200
+         + -             190
+         << >>           180
+         <= < >= >       170
+         == !=           160
+         &               150
+         ^               140
+         |               130
+         &&              120
+         ||              110
+         ? :             100/101
+
+Currently only mpz_expr has the bitwise ~ % & ^ and | operators.  The
+precedence numbers are of interest in the advanced usage described below.
+
+Various functions are available too.  For example,
+
+        mpz_expr (res, 10, "gcd(123,456,789) * abs(a)", var, NULL);
+
+The following is the full set of functions,
+
+        mpz_expr
+            abs bin clrbit cmp cmpabs congruent_p divisible_p even_p fib fac
+            gcd hamdist invert jacobi kronecker lcm lucnum max min nextprime
+            odd_p perfect_power_p perfect_square_p popcount powm
+            probab_prime_p root scan0 scan1 setbit sgn sqrt
+
+        mpq_expr
+            abs, cmp, den, max, min, num, sgn
+
+        mpf_expr
+            abs, ceil, cmp, eq, floor, integer_p, max, min, reldiff, sgn,
+            sqrt, trunc
+
+All these are the same as the GMP library functions, except that min and max
+don't exist in the library.  Note also that min, max, gcd and lcm take any
+number of arguments, not just two.
+
+mpf_expr does all calculations to the precision of the destination variable.
+
+
+Expression parsing can succeed or fail.  The return value indicates this,
+and will be one of the following
+
+       MPEXPR_RESULT_OK
+       MPEXPR_RESULT_BAD_VARIABLE
+       MPEXPR_RESULT_BAD_TABLE
+       MPEXPR_RESULT_PARSE_ERROR
+       MPEXPR_RESULT_NOT_UI
+
+BAD_VARIABLE is when a variable is referenced that hasn't been provided.
+For example if "c" is used when only two parameters have been passed.
+BAD_TABLE is applicable to the advanced usage described below.
+
+PARSE_ERROR is a general syntax error, returned for any mal-formed input
+string.
+
+NOT_UI is returned when an attempt is made to use an operand that's bigger
+than an "unsigned long" with a function that's restricted to that range.
+For example "fib" is mpz_fib_ui and only accepts an "unsigned long".
+
+
+
+
+ADVANCED USAGE
+
+int mpz_expr_a (const struct mpexpr_operator_t *table,
+                mpz_ptr res, int base, const char *e, size_t elen,
+                mpz_srcptr var[26])
+int mpq_expr_a (const struct mpexpr_operator_t *table,
+                mpq_ptr res, int base, const char *e, size_t elen,
+                mpq_srcptr var[26])
+int mpf_expr_a (const struct mpexpr_operator_t *table,
+                mpf_ptr res, int base, unsigned long prec,
+                const char *e, size_t elen,
+                mpf_srcptr var[26])
+
+These functions are an advanced interface to expression parsing.
+
+The string is taken as pointer and length.  This makes it possible to parse
+an expression in the middle of somewhere without copying and null
+terminating it.
+
+Variables are an array of 26 pointers to the appropriate operands, or NULL
+for variables that are not available.  Any combination of variables can be
+given, for example just "x" and "y" (var[23] and var[24]) could be set.
+
+Operators and functions are specified with a table.  This makes it possible
+to provide additional operators or functions, or to completely change the
+syntax.  The standard tables used by the simple functions above are
+available as
+
+       const struct mpexpr_operator_t * const mpz_expr_standard_table;
+       const struct mpexpr_operator_t * const mpq_expr_standard_table;
+       const struct mpexpr_operator_t * const mpf_expr_standard_table;
+
+struct mpexpr_operator_t is the following
+
+       struct mpexpr_operator_t {
+         const char    *name;
+         mpexpr_fun_t  fun;
+         int           type;
+         int           precedence;
+       };
+
+        typedef void (*mpexpr_fun_t) (void);
+
+As an example, the standard mpz_expr table entry for multiplication is as
+follows.  See the source code for the full set of standard entries.
+
+       { "*", (mpexpr_fun_t) mpz_mul, MPEXPR_TYPE_BINARY, 200 },
+
+"name" is the string to parse, "fun" is the function to call for it, "type"
+indicates what parameters the function takes (among other things), and
+"precedence" sets its operator precedence.
+
+A NULL for "name" indicates the end of the table, so for example an mpf
+table with nothing but addition could be
+
+        struct mpexpr_operator_t  table[] = {
+          { "+", (mpexpr_fun_t) mpf_add, MPEXPR_TYPE_BINARY, 190 },
+          { NULL }
+        };
+
+A special type MPEXPR_TYPE_NEW_TABLE makes it possible to chain from one
+table to another.  For example the following would add a "mod" operator to
+the standard mpz table,
+
+        struct mpexpr_operator_t  table[] = {
+        { "mod", (mpexpr_fun_t) mpz_fdiv_r, MPEXPR_TYPE_BINARY, 125 },
+        { (const char *) mpz_expr_standard_table, NULL, MPEXPR_TYPE_NEW_TABLE }
+        };
+
+Notice the low precedence on "mod", so that for instance "45+26 mod 7"
+parses as "(45+26)mod7".
+
+
+Functions are designated by a precedence of 0.  They always occur as
+"foo(expr)" and so have no need for a precedence level.  mpq_abs in the
+standard mpq table is
+
+       { "abs", (mpexpr_fun_t) mpq_abs, MPEXPR_TYPE_UNARY },
+
+Functions expecting no arguments as in "foo()" can be given with
+MPEXPR_TYPE_0ARY, or actual constants to be parsed as just "foo" are
+MPEXPR_TYPE_CONSTANT.  For example if a "void mpf_const_pi(mpf_t f)"
+function existed (which it doesn't) it could be,
+
+       { "pi", (mpexpr_fun_t) mpf_const_pi, MPEXPR_TYPE_CONSTANT },
+
+
+Parsing of operator names is done by seeking the table entry with the
+longest matching name.  So for instance operators "<" and "<=" exist, and
+when presented with "x <= y" the parser matches "<=" because it's longer.
+
+Parsing of function names, on the other hand, is done by requiring a whole
+alphanumeric word to match.  For example presented with "fib2zz(5)" the
+parser will attempt to find a function called "fib2zz".  A function "fib"
+wouldn't be used because it doesn't match the whole word.
+
+The flag MPEXPR_TYPE_WHOLEWORD can be ORed into an operator type to override
+the default parsing style.  Similarly MPEXPR_TYPE_OPERATOR into a function.
+
+
+Binary operators are left associative by default, meaning they're evaluated
+from left to right, so for example "1+2+3" is treated as "(1+2)+3".
+MPEXPR_TYPE_RIGHTASSOC can be ORed into the operator type to work from right
+to left as in "1+(2+3)".  This is generally what's wanted for
+exponentiation, and for example the standard mpz table has
+
+        { "**", (mpexpr_fun_t) mpz_pow_ui,
+          MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC, 220 }
+
+Unary operators are postfix by default.  For example a factorial to be used
+as "123!" might be
+
+       { "!", (mpexpr_fun_t) mpz_fac_ui, MPEXPR_TYPE_UNARY_UI, 215 }
+
+MPEXPR_TYPE_PREFIX can be ORed into the type to get a prefix operator.  For
+instance negation (unary minus) in the standard mpf table is
+
+       { "-", (mpexpr_fun_t) mpf_neg,
+          MPEXPR_TYPE_UNARY | MPEXPR_TYPE_PREFIX, 210 },
+
+
+The same operator can exist as a prefix unary and a binary, or as a prefix
+and postfix unary, simply by putting two entries in the table.  While
+parsing the context determines which style is sought.  But note that the
+same operator can't be both a postfix unary and a binary, since the parser
+doesn't try to look ahead to decide which ought to be used.
+
+When there's two entries for an operator, both prefix or both postfix (or
+binary), then the first in the table will be used.  This makes it possible
+to override an entry in a standard table, for example to change the function
+it calls, or perhaps its precedence level.  The following would change mpz
+division from tdiv to cdiv,
+
+        struct mpexpr_operator_t  table[] = {
+          { "/", (mpexpr_fun_t) mpz_cdiv_q, MPEXPR_TYPE_BINARY, 200 },
+          { "%", (mpexpr_fun_t) mpz_cdiv_r, MPEXPR_TYPE_BINARY, 200 },
+          { (char *) mpz_expr_standard_table, NULL, MPEXPR_TYPE_NEW_TABLE }
+        };
+
+
+The type field indicates what parameters the given function expects.  The
+following styles of functions are supported.  mpz_t is shown, but of course
+this is mpq_t for mpq_expr_a, mpf_t for mpf_expr_a, etc.
+
+    MPEXPR_TYPE_CONSTANT     void func (mpz_t result);
+
+    MPEXPR_TYPE_0ARY         void func (mpz_t result);
+    MPEXPR_TYPE_I_0ARY       int func (void);
+
+    MPEXPR_TYPE_UNARY        void func (mpz_t result, mpz_t op);
+    MPEXPR_TYPE_UNARY_UI     void func (mpz_t result, unsigned long op);
+    MPEXPR_TYPE_I_UNARY      int func (mpz_t op);
+    MPEXPR_TYPE_I_UNARY_UI   int func (unsigned long op);
+
+    MPEXPR_TYPE_BINARY       void func (mpz_t result, mpz_t op1, mpz_t op2);
+    MPEXPR_TYPE_BINARY_UI    void func (mpz_t result,
+                                        mpz_t op1, unsigned long op2);
+    MPEXPR_TYPE_I_BINARY     int func (mpz_t op1, mpz_t op2);
+    MPEXPR_TYPE_I_BINARY_UI  int func (mpz_t op1, unsigned long op2);
+
+    MPEXPR_TYPE_TERNARY      void func (mpz_t result,
+                                        mpz_t op1, mpz_t op2, mpz_t op3);
+    MPEXPR_TYPE_TERNARY_UI   void func (mpz_t result, mpz_t op1, mpz_t op2,
+                                        unsigned long op3);
+    MPEXPR_TYPE_I_TERNARY    int func (mpz_t op1, mpz_t op2, mpz_t op3);
+    MPEXPR_TYPE_I_TERNARY_UI int func (mpz_t op1, mpz_t op2,
+                                       unsigned long op3);
+
+Notice the pattern of "UI" for the last parameter as an unsigned long, or
+"I" for the result as an "int" return value.
+
+It's important that the declared type for an operator or function matches
+the function pointer given.  Any mismatch will have unpredictable results.
+
+For binary functions, a further type attribute is MPEXPR_TYPE_PAIRWISE which
+indicates that any number of arguments should be accepted, and evaluated by
+applying the given binary function to them pairwise.  This is used by gcd,
+lcm, min and max.  For example the standard mpz gcd is
+
+       { "gcd", (mpexpr_fun_t) mpz_gcd,
+         MPEXPR_TYPE_BINARY | MPEXPR_TYPE_PAIRWISE },
+
+Some special types exist for comparison operators (or functions).
+MPEXPR_TYPE_CMP_LT through MPEXPR_TYPE_CMP_GE expect an MPEXPR_TYPE_I_BINARY
+function, returning positive, negative or zero like mpz_cmp and similar.
+For example the standard mpf "!=" operator is
+
+       { "!=", (mpexpr_fun_t) mpf_cmp, MPEXPR_TYPE_CMP_NE, 160 },
+
+But there's no obligation to use these types, for instance the standard mpq
+table just uses a plain MPEXPR_TYPE_I_BINARY and mpq_equal for "==".
+
+Further special types MPEXPR_TYPE_MIN and MPEXPR_TYPE_MAX exist to implement
+the min and max functions, and they take a function like mpf_cmp similarly.
+The standard mpf max function is
+
+       { "max",  (mpexpr_fun_t) mpf_cmp,
+          MPEXPR_TYPE_MAX | MPEXPR_TYPE_PAIRWISE },
+
+These can be used as operators too, for instance the following would be the
+>? operator which is a feature of GNU C++,
+
+       { ">?", (mpexpr_fun_t) mpf_cmp, MPEXPR_TYPE_MAX, 175 },
+
+Other special types are used to define "(" ")" parentheses, "," function
+argument separator, "!" through "||" logical booleans, ternary "?"  ":", and
+the "$" which introduces variables.  See the sources for how they should be
+used.
+
+
+User definable operator tables will have various uses.  For example,
+
+  - a subset of the C operators, to be rid of infrequently used things
+  - a more mathematical syntax like "." for multiply, "^" for powering,
+    and "!" for factorial
+  - a boolean evaluator with "^" for AND, "v" for OR
+  - variables introduced with "%" instead of "$"
+  - brackets as "[" and "]" instead of "(" and ")"
+
+The only fixed parts of the parsing are the treatment of numbers, whitespace
+and the two styles of operator/function name recognition.
+
+As a final example, the following would be a complete mpz table implementing
+some operators with a more mathematical syntax.  Notice there's no need to
+preserve the standard precedence values, anything can be used so long as
+they're in the desired relation to each other.  There's also no need to have
+entries in precedence order, but it's convenient to do so to show what comes
+where.
+
+        static const struct mpexpr_operator_t  table[] = {
+         { "^",   (mpexpr_fun_t) mpz_pow_ui,
+            MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,           9 },
+
+          { "!",   (mpexpr_fun_t) mpz_fac_ui, MPEXPR_TYPE_UNARY_UI,   8 },
+          { "-",   (mpexpr_fun_t) mpz_neg,
+            MPEXPR_TYPE_UNARY | MPEXPR_TYPE_PREFIX,                   7 },
+
+          { "*",   (mpexpr_fun_t) mpz_mul,    MPEXPR_TYPE_BINARY,     6 },
+          { "/",   (mpexpr_fun_t) mpz_fdiv_q, MPEXPR_TYPE_BINARY,     6 },
+
+          { "+",   (mpexpr_fun_t) mpz_add,    MPEXPR_TYPE_BINARY,     5 },
+          { "-",   (mpexpr_fun_t) mpz_sub,    MPEXPR_TYPE_BINARY,     5 },
+
+          { "mod", (mpexpr_fun_t) mpz_mod,    MPEXPR_TYPE_BINARY,     6 },
+
+          { ")",   NULL,                      MPEXPR_TYPE_CLOSEPAREN, 4 },
+          { "(",   NULL,                      MPEXPR_TYPE_OPENPAREN,  3 },
+          { ",",   NULL,                      MPEXPR_TYPE_ARGSEP,     2 },
+
+          { "$",   NULL,                      MPEXPR_TYPE_VARIABLE,   1 },
+          { NULL }
+        };
+
+
+
+
+INTERNALS
+
+Operator precedence is implemented using a control and data stack, there's
+no C recursion.  When an expression like 1+2*3 is read the "+" is held on
+the control stack and 1 on the data stack until "*" has been parsed and
+applied to 2 and 3.  This happens any time a higher precedence operator
+follows a lower one, or when a right-associative operator like "**" is
+repeated.
+
+Parentheses are handled by making "(" a special prefix unary with a low
+precedence so a whole following expression is read.  The special operator
+")" knows to discard the pending "(".  Function arguments are handled
+similarly, with the function pretending to be a low precedence prefix unary
+operator, and with "," allowed within functions.  The same special ")"
+operator recognises a pending function and will invoke it appropriately.
+
+The ternary "? :" operator is also handled using precedences.  ":" is one
+level higher than "?", so when a valid a?b:c is parsed the ":" finds a "?"
+on the control stack.  It's a parse error for ":" to find anything else.
+
+
+
+FUTURE
+
+The ternary "?:" operator evaluates the "false" side of its pair, which is
+wasteful, though it ought to be harmless.  It'd be better if it could
+evaluate only the "true" side.  Similarly for the logical booleans "&&" and
+"||" if they know their result already.
+
+Functions like MPEXPR_TYPE_BINARY could return a status indicating operand
+out of range or whatever, to get an error back through mpz_expr etc.  That
+would want to be just an option, since plain mpz_add etc have no such
+return.
+
+Could have assignments like "a = b*c" modifying the input variables.
+Assignment could be an operator attribute, making it expect an lvalue.
+There would want to be a standard table without assignments available
+though, so user input could be safely parsed.
+
+The closing parenthesis table entry could specify the type of open paren it
+expects, so that "(" and ")" could match and "[" and "]" match but not a
+mixture of the two.  Currently "[" and "]" can be added, but there's no
+error on writing a mixed expression like "2*(3+4]".  Maybe also there could
+be a way to say that functions can only be written with one or the other
+style of parens.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/demos/expr/expr-impl.h b/demos/expr/expr-impl.h

new file mode 100644 (file)

index 0000000..e6050c5
--- /dev/null
+++ b/demos/expr/expr-impl.h
@@ -0,0 +1,135 @@
+/* Implementation specifics for expression evaluation.
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Same tests as gmp.h. */
+#if  defined (__STDC__)                                 \
+  || defined (__cplusplus)                              \
+  || defined (_AIX)                                     \
+  || defined (__DECC)                                   \
+  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
+  || defined (_MSC_VER)                                 \
+  || defined (_WIN32)
+#define HAVE_STDARG 1
+#include <stdarg.h>
+#else
+#define HAVE_STDARG 0
+#include <varargs.h>
+#endif
+
+#include "expr.h"
+
+
+#define isasciidigit(c)   (isascii (c) && isdigit (c))
+#define isasciicsym(c)    (isascii (c) && (isalnum(c) || (c) == '_'))
+
+#define isasciidigit_in_base(c,base)                    \
+  (isascii (c)                                          \
+   && ((isdigit (c) && (c)-'0' < (base))                \
+       || (isupper (c) && (c)-'A'+10 < (base))          \
+       || (islower (c) && (c)-'a'+10 < (base))))
+
+
+union mpX_t {
+  mpz_t   z;
+  mpq_t   q;
+  mpf_t   f;
+};
+
+typedef union mpX_t *mpX_ptr;
+typedef __gmp_const union mpX_t *mpX_srcptr;
+
+typedef void (*mpexpr_fun_one_t) __GMP_PROTO ((mpX_ptr));
+typedef unsigned long (*mpexpr_fun_ui_one_t) __GMP_PROTO ((mpX_ptr));
+
+typedef void (*mpexpr_fun_0ary_t) __GMP_PROTO ((mpX_ptr));
+typedef int  (*mpexpr_fun_i_0ary_t) __GMP_PROTO ((void));
+
+typedef void (*mpexpr_fun_unary_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr));
+typedef void (*mpexpr_fun_unary_ui_t) __GMP_PROTO ((mpX_ptr, unsigned long));
+typedef int  (*mpexpr_fun_i_unary_t) __GMP_PROTO ((mpX_srcptr));
+typedef int  (*mpexpr_fun_i_unary_ui_t) __GMP_PROTO ((unsigned long));
+
+typedef void (*mpexpr_fun_binary_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr));
+typedef void (*mpexpr_fun_binary_ui_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr, unsigned long));
+typedef int  (*mpexpr_fun_i_binary_t) __GMP_PROTO ((mpX_srcptr, mpX_srcptr));
+typedef int  (*mpexpr_fun_i_binary_ui_t) __GMP_PROTO ((mpX_srcptr, unsigned long));
+
+typedef void (*mpexpr_fun_ternary_t)
+     __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr, mpX_srcptr));
+typedef void (*mpexpr_fun_ternary_ui_t)
+     __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr, unsigned long));
+typedef int (*mpexpr_fun_i_ternary_t)
+     __GMP_PROTO ((mpX_srcptr, mpX_srcptr, mpX_srcptr));
+typedef int (*mpexpr_fun_i_ternary_ui_t)
+     __GMP_PROTO ((mpX_srcptr, mpX_srcptr, unsigned long));
+
+typedef size_t (*mpexpr_fun_number_t)
+     __GMP_PROTO ((mpX_ptr, __gmp_const char *str, size_t len, int base));
+typedef void (*mpexpr_fun_swap_t) __GMP_PROTO ((mpX_ptr, mpX_ptr));
+typedef unsigned long (*mpexpr_fun_get_ui_t) __GMP_PROTO ((mpX_srcptr));
+typedef void (*mpexpr_fun_set_si_t) __GMP_PROTO ((mpX_srcptr, long));
+
+struct mpexpr_control_t {
+  __gmp_const struct mpexpr_operator_t  *op;
+  int                                   argcount;
+};
+
+#define MPEXPR_VARIABLES  26
+
+struct mpexpr_parse_t {
+  __gmp_const struct mpexpr_operator_t  *table;
+
+  mpX_ptr                               res;
+  int                                   base;
+  unsigned long                         prec;
+  __gmp_const char                      *e;
+  size_t                                elen;
+  mpX_srcptr                            *var;
+  int                                   error_code;
+
+  int                                   token;
+  __gmp_const struct mpexpr_operator_t  *token_op;
+
+  union mpX_t                           *data_stack;
+  int                                   data_top;
+  int                                   data_alloc;
+  int                                   data_inited;
+
+  struct mpexpr_control_t               *control_stack;
+  int                                   control_top;
+  int                                   control_alloc;
+
+
+  mpexpr_fun_0ary_t                     mpX_clear;
+  mpexpr_fun_i_unary_t                  mpX_ulong_p;
+  mpexpr_fun_get_ui_t                   mpX_get_ui;
+  mpexpr_fun_unary_ui_t                 mpX_init;
+  mpexpr_fun_number_t                   mpX_number;
+  mpexpr_fun_unary_t                    mpX_set;
+  mpexpr_fun_unary_t                    mpX_set_or_swap;
+  mpexpr_fun_set_si_t                   mpX_set_si;
+  mpexpr_fun_swap_t                     mpX_swap;
+};
+
+
+int mpexpr_evaluate __GMP_PROTO ((struct mpexpr_parse_t *p));
+int mpexpr_va_to_var __GMP_PROTO ((void *var[], va_list ap));
+size_t mpexpr_mpz_number __GMP_PROTO ((mpz_ptr res,
+                                  __gmp_const char *e, size_t elen, int base));
diff --git a/demos/expr/expr.c b/demos/expr/expr.c

new file mode 100644 (file)

index 0000000..f78c321
--- /dev/null
+++ b/demos/expr/expr.c
@@ -0,0 +1,823 @@
+/* mpexpr_evaluate -- shared code for simple expression evaluation
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" to get some traces.  The trace
+   printfs junk up the code a bit, but it's very hard to tell what's going
+   on without them.  Set MPX_TRACE to a suitable output function for the
+   mpz/mpq/mpf being run (if you have the wrong trace function it'll
+   probably segv).  */
+
+#define TRACE(x)
+#define MPX_TRACE  mpz_trace
+
+
+/* A few helper macros copied from gmp-impl.h */
+#define ALLOCATE_FUNC_TYPE(n,type) \
+  ((type *) (*allocate_func) ((n) * sizeof (type)))
+#define ALLOCATE_FUNC_LIMBS(n)   ALLOCATE_FUNC_TYPE (n, mp_limb_t)
+#define REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
+  ((type *) (*reallocate_func)                            \
+   (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
+#define REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
+  REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
+#define FREE_FUNC_TYPE(p,n,type) (*free_func) (p, (n) * sizeof (type))
+#define FREE_FUNC_LIMBS(p,n)     FREE_FUNC_TYPE (p, n, mp_limb_t)
+#define ASSERT(x)
+
+
+
+/* All the error strings are just for diagnostic traces.  Only the error
+   code is actually returned.  */
+#define ERROR(str,code)                 \
+  {                                     \
+    TRACE (printf ("%s\n", str));       \
+    p->error_code = (code);             \
+    goto done;                          \
+  }
+
+
+#define REALLOC(ptr, alloc, incr, type)                         \
+  do {                                                          \
+    int  new_alloc = (alloc) + (incr);                          \
+    ptr = REALLOCATE_FUNC_TYPE (ptr, alloc, new_alloc, type);   \
+    (alloc) = new_alloc;                                        \
+  } while (0)
+
+
+/* data stack top element */
+#define SP   (p->data_stack + p->data_top)
+
+/* Make sure there's room for another data element above current top.
+   reallocate_func is fetched for when this macro is used in lookahead(). */
+#define DATA_SPACE()                                                    \
+  do {                                                                  \
+    if (p->data_top + 1 >= p->data_alloc)                               \
+      {                                                                 \
+       void *(*reallocate_func) (void *, size_t, size_t);              \
+       mp_get_memory_functions (NULL, &reallocate_func, NULL);         \
+       TRACE (printf ("grow stack from %d\n", p->data_alloc));         \
+       REALLOC (p->data_stack, p->data_alloc, 20, union mpX_t);        \
+      }                                                                 \
+    ASSERT (p->data_top + 1 <= p->data_inited);                         \
+    if (p->data_top + 1 == p->data_inited)                              \
+      {                                                                 \
+       TRACE (printf ("initialize %d\n", p->data_top + 1));            \
+       (*p->mpX_init) (&p->data_stack[p->data_top + 1], p->prec);      \
+       p->data_inited++;                                               \
+      }                                                                 \
+  } while (0)
+
+#define DATA_PUSH()                             \
+  do {                                          \
+    p->data_top++;                              \
+    ASSERT (p->data_top < p->data_alloc);       \
+    ASSERT (p->data_top < p->data_inited);      \
+  } while (0)
+
+/* the last stack entry is never popped, so top>=0 will be true */
+#define DATA_POP(n)             \
+  do {                          \
+    p->data_top -= (n);         \
+    ASSERT (p->data_top >= 0);  \
+  } while (0)
+
+
+/* lookahead() parses the next token.  Return 1 if successful, with some
+   extra data.  Return 0 if fail, with reason in p->error_code.
+
+   "prefix" is MPEXPR_TYPE_PREFIX if an operator with that attribute is
+   preferred, or 0 if an operator without is preferred. */
+
+#define TOKEN_EOF         -1   /* no extra data */
+#define TOKEN_VALUE       -2   /* pushed onto data stack */
+#define TOKEN_OPERATOR    -3   /* stored in p->token_op */
+#define TOKEN_FUNCTION    -4   /* stored in p->token_op */
+
+#define TOKEN_NAME(n)                           \
+  ((n) == TOKEN_EOF ? "TOKEN_EOF"               \
+   : (n) == TOKEN_VALUE ? "TOKEN_VALUE"         \
+   : (n) == TOKEN_OPERATOR ? "TOKEN_OPERATOR"   \
+   : (n) == TOKEN_VALUE ? "TOKEN_FUNCTION"      \
+   : "UNKNOWN TOKEN")
+
+/* Functions default to being parsed as whole words, operators to match just
+   at the start of the string.  The type flags override this. */
+#define WHOLEWORD(op)                           \
+  (op->precedence == 0                          \
+   ? (! (op->type & MPEXPR_TYPE_OPERATOR))      \
+   :   (op->type & MPEXPR_TYPE_WHOLEWORD))
+
+#define isasciispace(c)   (isascii (c) && isspace (c))
+
+static int
+lookahead (struct mpexpr_parse_t *p, int prefix)
+{
+  __gmp_const struct mpexpr_operator_t  *op, *op_found;
+  size_t  oplen, oplen_found, wlen;
+  int     i;
+
+  /* skip white space */
+  while (p->elen > 0 && isasciispace (*p->e))
+    p->e++, p->elen--;
+
+  if (p->elen == 0)
+    {
+      TRACE (printf ("lookahead EOF\n"));
+      p->token = TOKEN_EOF;
+      return 1;
+    }
+
+  DATA_SPACE ();
+
+  /* Get extent of whole word. */
+  for (wlen = 0; wlen < p->elen; wlen++)
+    if (! isasciicsym (p->e[wlen]))
+      break;
+
+  TRACE (printf ("lookahead at: \"%.*s\" length %u, word %u\n",
+                (int) p->elen, p->e, p->elen, wlen));
+
+  op_found = NULL;
+  oplen_found = 0;
+  for (op = p->table; op->name != NULL; op++)
+    {
+      if (op->type == MPEXPR_TYPE_NEW_TABLE)
+       {
+         printf ("new\n");
+         op = (struct mpexpr_operator_t *) op->name - 1;
+         continue;
+       }
+
+      oplen = strlen (op->name);
+      if (! ((WHOLEWORD (op) ? wlen == oplen : p->elen >= oplen)
+            && memcmp (p->e, op->name, oplen) == 0))
+       continue;
+
+      /* Shorter matches don't replace longer previous ones. */
+      if (op_found && oplen < oplen_found)
+       continue;
+
+      /* On a match of equal length to a previous one, the old match isn't
+        replaced if it has the preferred prefix, and if it doesn't then
+        it's not replaced if the new one also doesn't.  */
+      if (op_found && oplen == oplen_found
+         && ((op_found->type & MPEXPR_TYPE_PREFIX) == prefix
+             || (op->type & MPEXPR_TYPE_PREFIX) != prefix))
+       continue;
+
+      /* This is now either the first match seen, or a longer than previous
+        match, or an equal to previous one but with a preferred prefix. */
+      op_found = op;
+      oplen_found = oplen;
+    }
+
+  if (op_found)
+    {
+      p->e += oplen_found, p->elen -= oplen_found;
+
+      if (op_found->type == MPEXPR_TYPE_VARIABLE)
+       {
+         if (p->elen == 0)
+           ERROR ("end of string expecting a variable",
+                  MPEXPR_RESULT_PARSE_ERROR);
+         i = p->e[0] - 'a';
+         if (i < 0 || i >= MPEXPR_VARIABLES)
+           ERROR ("bad variable name", MPEXPR_RESULT_BAD_VARIABLE);
+         goto variable;
+       }
+
+      if (op_found->precedence == 0)
+       {
+         TRACE (printf ("lookahead function: %s\n", op_found->name));
+         p->token = TOKEN_FUNCTION;
+         p->token_op = op_found;
+         return 1;
+       }
+      else
+       {
+         TRACE (printf ("lookahead operator: %s\n", op_found->name));
+         p->token = TOKEN_OPERATOR;
+         p->token_op = op_found;
+         return 1;
+       }
+    }
+
+  oplen = (*p->mpX_number) (SP+1, p->e, p->elen, p->base);
+  if (oplen != 0)
+    {
+      p->e += oplen, p->elen -= oplen;
+      p->token = TOKEN_VALUE;
+      DATA_PUSH ();
+      TRACE (MPX_TRACE ("lookahead number", SP));
+      return 1;
+    }
+
+  /* Maybe an unprefixed one character variable */
+  i = p->e[0] - 'a';
+  if (wlen == 1 && i >= 0 && i < MPEXPR_VARIABLES)
+    {
+    variable:
+      p->e++, p->elen--;
+      if (p->var[i] == NULL)
+       ERROR ("NULL variable", MPEXPR_RESULT_BAD_VARIABLE);
+      TRACE (printf ("lookahead variable: var[%d] = ", i);
+            MPX_TRACE ("", p->var[i]));
+      p->token = TOKEN_VALUE;
+      DATA_PUSH ();
+      (*p->mpX_set) (SP, p->var[i]);
+      return 1;
+    }
+
+  ERROR ("no token matched", MPEXPR_RESULT_PARSE_ERROR);
+
+ done:
+  return 0;
+}
+
+
+/* control stack current top element */
+#define CP   (p->control_stack + p->control_top)
+
+/* make sure there's room for another control element above current top */
+#define CONTROL_SPACE()                                                    \
+  do {                                                                     \
+    if (p->control_top + 1 >= p->control_alloc)                            \
+      {                                                                    \
+       TRACE (printf ("grow control stack from %d\n", p->control_alloc)); \
+       REALLOC (p->control_stack, p->control_alloc, 20,                   \
+                struct mpexpr_control_t);                                 \
+      }                                                                    \
+  } while (0)
+
+/* Push an operator on the control stack, claiming currently to have the
+   given number of args ready.  Local variable "op" is used in case opptr is
+   a reference through CP.  */
+#define CONTROL_PUSH(opptr,args)                        \
+  do {                                                  \
+    __gmp_const struct mpexpr_operator_t *op = opptr;   \
+    struct mpexpr_control_t *cp;                        \
+    CONTROL_SPACE ();                                   \
+    p->control_top++;                                   \
+    ASSERT (p->control_top < p->control_alloc);         \
+    cp = CP;                                            \
+    cp->op = op;                                        \
+    cp->argcount = (args);                              \
+    TRACE_CONTROL("control stack push:");               \
+  } while (0)
+
+/* The special operator_done is never popped, so top>=0 will hold. */
+#define CONTROL_POP()                           \
+  do {                                          \
+    p->control_top--;                           \
+    ASSERT (p->control_top >= 0);               \
+    TRACE_CONTROL ("control stack pop:");       \
+  } while (0)
+
+#define TRACE_CONTROL(str)                              \
+  TRACE ({                                              \
+    int  i;                                             \
+    printf ("%s depth %d:", str, p->control_top);       \
+    for (i = 0; i <= p->control_top; i++)               \
+      printf (" \"%s\"(%d)",                            \
+             p->control_stack[i].op->name,             \
+             p->control_stack[i].argcount);            \
+    printf ("\n");                                      \
+  });
+
+
+#define LOOKAHEAD(prefix)               \
+  do {                                  \
+    if (! lookahead (p, prefix))        \
+      goto done;                        \
+  } while (0)
+
+#define CHECK_UI(n)                                                     \
+  do {                                                                  \
+    if (! (*p->mpX_ulong_p) (n))                                        \
+      ERROR ("operand doesn't fit ulong", MPEXPR_RESULT_NOT_UI);        \
+  } while (0)
+
+#define CHECK_ARGCOUNT(str,n)                                              \
+  do {                                                                     \
+    if (CP->argcount != (n))                                               \
+      {                                                                    \
+       TRACE (printf ("wrong number of arguments for %s, got %d want %d", \
+                      str, CP->argcount, n));                             \
+       ERROR ("", MPEXPR_RESULT_PARSE_ERROR);                             \
+      }                                                                    \
+  } while (0)
+
+
+/* There's two basic states here.  In both p->token is the next token.
+
+   "another_expr" is when a whole expression should be parsed.  This means a
+   literal or variable value possibly followed by an operator, or a function
+   or prefix operator followed by a further whole expression.
+
+   "another_operator" is when an expression has been parsed and its value is
+   on the top of the data stack (SP) and an optional further postfix or
+   infix operator should be parsed.
+
+   In "another_operator" precedences determine whether to push the operator
+   onto the control stack, or instead go to "apply_control" to reduce the
+   operator currently on top of the control stack.
+
+   When an operator has both a prefix and postfix/infix form, a LOOKAHEAD()
+   for "another_expr" will seek the prefix form, a LOOKAHEAD() for
+   "another_operator" will seek the postfix/infix form.  The grammar is
+   simple enough that the next state is known before reading the next token.
+
+   Argument count checking guards against functions consuming the wrong
+   number of operands from the data stack.  The same checks are applied to
+   operators, but will always pass since a UNARY or BINARY will only ever
+   parse with the correct operands.  */
+
+int
+mpexpr_evaluate (struct mpexpr_parse_t *p)
+{
+  void *(*allocate_func) (size_t);
+  void *(*reallocate_func) (void *, size_t, size_t);
+  void (*free_func) (void *, size_t);
+
+  mp_get_memory_functions (&allocate_func, &reallocate_func, &free_func);
+
+  TRACE (printf ("mpexpr_evaluate() base %d \"%.*s\"\n",
+                p->base, (int) p->elen, p->e));
+
+  /* "done" is a special sentinel at the bottom of the control stack,
+     precedence -1 is lower than any normal operator.  */
+  {
+    static __gmp_const struct mpexpr_operator_t  operator_done
+      = { "DONE", NULL, MPEXPR_TYPE_DONE, -1 };
+
+    p->control_alloc = 20;
+    p->control_stack = ALLOCATE_FUNC_TYPE (p->control_alloc,
+                                          struct mpexpr_control_t);
+    p->control_top = 0;
+    CP->op = &operator_done;
+    CP->argcount = 1;
+  }
+
+  p->data_inited = 0;
+  p->data_alloc = 20;
+  p->data_stack = ALLOCATE_FUNC_TYPE (p->data_alloc, union mpX_t);
+  p->data_top = -1;
+
+  p->error_code = MPEXPR_RESULT_OK;
+
+
+ another_expr_lookahead:
+  LOOKAHEAD (MPEXPR_TYPE_PREFIX);
+  TRACE (printf ("another expr\n"));
+
+  /*another_expr:*/
+  switch (p->token) {
+  case TOKEN_VALUE:
+    goto another_operator_lookahead;
+
+  case TOKEN_OPERATOR:
+    TRACE (printf ("operator %s\n", p->token_op->name));
+    if (! (p->token_op->type & MPEXPR_TYPE_PREFIX))
+      ERROR ("expected a prefix operator", MPEXPR_RESULT_PARSE_ERROR);
+
+    CONTROL_PUSH (p->token_op, 1);
+    goto another_expr_lookahead;
+
+  case TOKEN_FUNCTION:
+    CONTROL_PUSH (p->token_op, 1);
+
+    if (p->token_op->type & MPEXPR_TYPE_CONSTANT)
+      goto apply_control_lookahead;
+
+    LOOKAHEAD (MPEXPR_TYPE_PREFIX);
+    if (! (p->token == TOKEN_OPERATOR
+          && p->token_op->type == MPEXPR_TYPE_OPENPAREN))
+      ERROR ("expected open paren for function", MPEXPR_RESULT_PARSE_ERROR);
+
+    TRACE (printf ("open paren for function \"%s\"\n", CP->op->name));
+
+    if ((CP->op->type & MPEXPR_TYPE_MASK_ARGCOUNT) == MPEXPR_TYPE_NARY(0))
+      {
+       LOOKAHEAD (0);
+       if (! (p->token == TOKEN_OPERATOR
+              && p->token_op->type == MPEXPR_TYPE_CLOSEPAREN))
+         ERROR ("expected close paren for 0ary function",
+                MPEXPR_RESULT_PARSE_ERROR);
+       goto apply_control_lookahead;
+      }
+
+    goto another_expr_lookahead;
+  }
+  ERROR ("unrecognised start of expression", MPEXPR_RESULT_PARSE_ERROR);
+
+
+ another_operator_lookahead:
+  LOOKAHEAD (0);
+ another_operator:
+  TRACE (printf ("another operator maybe: %s\n", TOKEN_NAME(p->token)));
+
+  switch (p->token) {
+  case TOKEN_EOF:
+    goto apply_control;
+
+  case TOKEN_OPERATOR:
+    /* The next operator is compared to the one on top of the control stack.
+       If the next is lower precedence, or the same precedence and not
+       right-associative, then reduce using the control stack and look at
+       the next operator again later.  */
+
+#define PRECEDENCE_TEST_REDUCE(tprec,cprec,ttype,ctype)                 \
+    ((tprec) < (cprec)                                                  \
+     || ((tprec) == (cprec) && ! ((ttype) & MPEXPR_TYPE_RIGHTASSOC)))
+
+    if (PRECEDENCE_TEST_REDUCE (p->token_op->precedence, CP->op->precedence,
+                               p->token_op->type,       CP->op->type))
+      {
+       TRACE (printf ("defer operator: %s (prec %d vs %d, type 0x%X)\n",
+                      p->token_op->name,
+                      p->token_op->precedence, CP->op->precedence,
+                      p->token_op->type));
+       goto apply_control;
+      }
+
+    /* An argsep is a binary operator, but is never pushed on the control
+       stack, it just accumulates an extra argument for a function. */
+    if (p->token_op->type == MPEXPR_TYPE_ARGSEP)
+      {
+       if (CP->op->precedence != 0)
+         ERROR ("ARGSEP not in a function call", MPEXPR_RESULT_PARSE_ERROR);
+
+       TRACE (printf ("argsep for function \"%s\"(%d)\n",
+                      CP->op->name, CP->argcount));
+
+#define IS_PAIRWISE(type)                                               \
+       (((type) & (MPEXPR_TYPE_MASK_ARGCOUNT | MPEXPR_TYPE_PAIRWISE))  \
+        == (MPEXPR_TYPE_BINARY | MPEXPR_TYPE_PAIRWISE))
+
+       if (IS_PAIRWISE (CP->op->type) && CP->argcount >= 2)
+         {
+           TRACE (printf ("    will reduce pairwise now\n"));
+           CP->argcount--;
+           CONTROL_PUSH (CP->op, 2);
+           goto apply_control;
+         }
+
+       CP->argcount++;
+       goto another_expr_lookahead;
+      }
+
+    switch (p->token_op->type & MPEXPR_TYPE_MASK_ARGCOUNT) {
+    case MPEXPR_TYPE_NARY(1):
+      /* Postfix unary operators can always be applied immediately.  The
+        easiest way to do this is just push it on the control stack and go
+        to the normal control stack reduction code. */
+
+      TRACE (printf ("postfix unary operator: %s\n", p->token_op->name));
+      if (p->token_op->type & MPEXPR_TYPE_PREFIX)
+       ERROR ("prefix unary operator used postfix",
+              MPEXPR_RESULT_PARSE_ERROR);
+      CONTROL_PUSH (p->token_op, 1);
+      goto apply_control_lookahead;
+
+    case MPEXPR_TYPE_NARY(2):
+      CONTROL_PUSH (p->token_op, 2);
+      goto another_expr_lookahead;
+
+    case MPEXPR_TYPE_NARY(3):
+      CONTROL_PUSH (p->token_op, 1);
+      goto another_expr_lookahead;
+    }
+
+    TRACE (printf ("unrecognised operator \"%s\" type: 0x%X",
+                  CP->op->name, CP->op->type));
+    ERROR ("", MPEXPR_RESULT_PARSE_ERROR);
+    break;
+
+  default:
+    TRACE (printf ("expecting an operator, got token %d", p->token));
+    ERROR ("", MPEXPR_RESULT_PARSE_ERROR);
+  }
+
+
+ apply_control_lookahead:
+  LOOKAHEAD (0);
+ apply_control:
+  /* Apply the top element CP of the control stack.  Data values are SP,
+     SP-1, etc.  Result is left as stack top SP after popping consumed
+     values.
+
+     The use of sp as a duplicate of SP will help compilers that can't
+     otherwise recognise the various uses of SP as common subexpressions.  */
+
+  TRACE (printf ("apply control: nested %d, \"%s\" 0x%X, %d args\n",
+                p->control_top, CP->op->name, CP->op->type, CP->argcount));
+
+  TRACE (printf ("apply 0x%X-ary\n",
+                CP->op->type & MPEXPR_TYPE_MASK_ARGCOUNT));
+  switch (CP->op->type & MPEXPR_TYPE_MASK_ARGCOUNT) {
+  case MPEXPR_TYPE_NARY(0):
+    {
+      mpX_ptr  sp;
+      DATA_SPACE ();
+      DATA_PUSH ();
+      sp = SP;
+      switch (CP->op->type & MPEXPR_TYPE_MASK_ARGSTYLE) {
+      case 0:
+       (* (mpexpr_fun_0ary_t) CP->op->fun) (sp);
+       break;
+      case MPEXPR_TYPE_RESULT_INT:
+       (*p->mpX_set_si) (sp, (long) (* (mpexpr_fun_i_0ary_t) CP->op->fun) ());
+       break;
+      default:
+       ERROR ("unrecognised 0ary argument calling style",
+              MPEXPR_RESULT_BAD_TABLE);
+      }
+    }
+    break;
+
+  case MPEXPR_TYPE_NARY(1):
+    {
+      mpX_ptr  sp = SP;
+      CHECK_ARGCOUNT ("unary", 1);
+      TRACE (MPX_TRACE ("before", sp));
+
+      switch (CP->op->type & MPEXPR_TYPE_MASK_SPECIAL) {
+      case 0:
+       /* not a special */
+       break;
+
+      case MPEXPR_TYPE_DONE & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special done\n"));
+       goto done;
+
+      case MPEXPR_TYPE_LOGICAL_NOT & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special logical not\n"));
+       (*p->mpX_set_si)
+         (sp, (long) ((* (mpexpr_fun_i_unary_t) CP->op->fun) (sp) == 0));
+       goto apply_control_done;
+
+      case MPEXPR_TYPE_CLOSEPAREN & MPEXPR_TYPE_MASK_SPECIAL:
+       CONTROL_POP ();
+       if (CP->op->type == MPEXPR_TYPE_OPENPAREN)
+         {
+           TRACE (printf ("close paren matching open paren\n"));
+           CONTROL_POP ();
+           goto another_operator;
+         }
+       if (CP->op->precedence == 0)
+         {
+           TRACE (printf ("close paren for function\n"));
+           goto apply_control;
+         }
+       ERROR ("unexpected close paren", MPEXPR_RESULT_PARSE_ERROR);
+
+      default:
+       TRACE (printf ("unrecognised special unary operator 0x%X",
+                      CP->op->type & MPEXPR_TYPE_MASK_SPECIAL));
+       ERROR ("", MPEXPR_RESULT_BAD_TABLE);
+      }
+
+      switch (CP->op->type & MPEXPR_TYPE_MASK_ARGSTYLE) {
+      case 0:
+       (* (mpexpr_fun_unary_t) CP->op->fun) (sp, sp);
+       break;
+      case MPEXPR_TYPE_LAST_UI:
+       CHECK_UI (sp);
+       (* (mpexpr_fun_unary_ui_t) CP->op->fun)
+         (sp, (*p->mpX_get_ui) (sp));
+       break;
+      case MPEXPR_TYPE_RESULT_INT:
+       (*p->mpX_set_si)
+         (sp, (long) (* (mpexpr_fun_i_unary_t) CP->op->fun) (sp));
+       break;
+      case MPEXPR_TYPE_RESULT_INT | MPEXPR_TYPE_LAST_UI:
+       CHECK_UI (sp);
+       (*p->mpX_set_si)
+         (sp,
+          (long) (* (mpexpr_fun_i_unary_ui_t) CP->op->fun)
+          ((*p->mpX_get_ui) (sp)));
+       break;
+      default:
+       ERROR ("unrecognised unary argument calling style",
+              MPEXPR_RESULT_BAD_TABLE);
+      }
+    }
+    break;
+
+  case MPEXPR_TYPE_NARY(2):
+    {
+      mpX_ptr  sp;
+
+      /* pairwise functions are allowed to have just one argument */
+      if ((CP->op->type & MPEXPR_TYPE_PAIRWISE)
+         && CP->op->precedence == 0
+         && CP->argcount == 1)
+       goto apply_control_done;
+
+      CHECK_ARGCOUNT ("binary", 2);
+      DATA_POP (1);
+      sp = SP;
+      TRACE (MPX_TRACE ("lhs", sp);
+            MPX_TRACE ("rhs", sp+1));
+
+      if (CP->op->type & MPEXPR_TYPE_MASK_CMP)
+       {
+         int  type = CP->op->type;
+         int  cmp = (* (mpexpr_fun_i_binary_t) CP->op->fun)
+           (sp, sp+1);
+         (*p->mpX_set_si)
+           (sp,
+            (long)
+            ((  (cmp  < 0) & ((type & MPEXPR_TYPE_MASK_CMP_LT) != 0))
+             | ((cmp == 0) & ((type & MPEXPR_TYPE_MASK_CMP_EQ) != 0))
+             | ((cmp  > 0) & ((type & MPEXPR_TYPE_MASK_CMP_GT) != 0))));
+         goto apply_control_done;
+       }
+
+      switch (CP->op->type & MPEXPR_TYPE_MASK_SPECIAL) {
+      case 0:
+       /* not a special */
+       break;
+
+      case MPEXPR_TYPE_QUESTION & MPEXPR_TYPE_MASK_SPECIAL:
+       ERROR ("'?' without ':'", MPEXPR_RESULT_PARSE_ERROR);
+
+      case MPEXPR_TYPE_COLON & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special colon\n"));
+       CONTROL_POP ();
+       if (CP->op->type != MPEXPR_TYPE_QUESTION)
+         ERROR ("':' without '?'", MPEXPR_RESULT_PARSE_ERROR);
+
+       CP->argcount--;
+       DATA_POP (1);
+       sp--;
+       TRACE (MPX_TRACE ("query", sp);
+              MPX_TRACE ("true",  sp+1);
+              MPX_TRACE ("false", sp+2));
+       (*p->mpX_set)
+         (sp, (* (mpexpr_fun_i_unary_t) CP->op->fun) (sp)
+          ? sp+1 : sp+2);
+       goto apply_control_done;
+
+      case MPEXPR_TYPE_LOGICAL_AND & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special logical and\n"));
+       (*p->mpX_set_si)
+         (sp,
+          (long)
+          ((* (mpexpr_fun_i_unary_t) CP->op->fun) (sp)
+           && (* (mpexpr_fun_i_unary_t) CP->op->fun) (sp+1)));
+       goto apply_control_done;
+
+      case MPEXPR_TYPE_LOGICAL_OR & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special logical and\n"));
+       (*p->mpX_set_si)
+         (sp,
+          (long)
+          ((* (mpexpr_fun_i_unary_t) CP->op->fun) (sp)
+           || (* (mpexpr_fun_i_unary_t) CP->op->fun) (sp+1)));
+       goto apply_control_done;
+
+      case MPEXPR_TYPE_MAX & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special max\n"));
+       if ((* (mpexpr_fun_i_binary_t) CP->op->fun) (sp, sp+1) < 0)
+         (*p->mpX_swap) (sp, sp+1);
+       goto apply_control_done;
+      case MPEXPR_TYPE_MIN & MPEXPR_TYPE_MASK_SPECIAL:
+       TRACE (printf ("special min\n"));
+       if ((* (mpexpr_fun_i_binary_t) CP->op->fun) (sp, sp+1) > 0)
+         (*p->mpX_swap) (sp, sp+1);
+       goto apply_control_done;
+
+      default:
+       ERROR ("unrecognised special binary operator",
+              MPEXPR_RESULT_BAD_TABLE);
+      }
+
+      switch (CP->op->type & MPEXPR_TYPE_MASK_ARGSTYLE) {
+      case 0:
+       (* (mpexpr_fun_binary_t) CP->op->fun) (sp, sp, sp+1);
+       break;
+      case MPEXPR_TYPE_LAST_UI:
+       CHECK_UI (sp+1);
+       (* (mpexpr_fun_binary_ui_t) CP->op->fun)
+         (sp, sp, (*p->mpX_get_ui) (sp+1));
+       break;
+      case MPEXPR_TYPE_RESULT_INT:
+       (*p->mpX_set_si)
+         (sp,
+          (long) (* (mpexpr_fun_i_binary_t) CP->op->fun) (sp, sp+1));
+       break;
+      case MPEXPR_TYPE_LAST_UI | MPEXPR_TYPE_RESULT_INT:
+       CHECK_UI (sp+1);
+       (*p->mpX_set_si)
+         (sp,
+          (long) (* (mpexpr_fun_i_binary_ui_t) CP->op->fun)
+          (sp, (*p->mpX_get_ui) (sp+1)));
+       break;
+      default:
+       ERROR ("unrecognised binary argument calling style",
+              MPEXPR_RESULT_BAD_TABLE);
+      }
+    }
+    break;
+
+  case MPEXPR_TYPE_NARY(3):
+    {
+      mpX_ptr  sp;
+
+      CHECK_ARGCOUNT ("ternary", 3);
+      DATA_POP (2);
+      sp = SP;
+      TRACE (MPX_TRACE ("arg1", sp);
+            MPX_TRACE ("arg2", sp+1);
+            MPX_TRACE ("arg3", sp+1));
+
+      switch (CP->op->type & MPEXPR_TYPE_MASK_ARGSTYLE) {
+      case 0:
+       (* (mpexpr_fun_ternary_t) CP->op->fun) (sp, sp, sp+1, sp+2);
+       break;
+      case MPEXPR_TYPE_LAST_UI:
+       CHECK_UI (sp+2);
+       (* (mpexpr_fun_ternary_ui_t) CP->op->fun)
+         (sp, sp, sp+1, (*p->mpX_get_ui) (sp+2));
+       break;
+      case MPEXPR_TYPE_RESULT_INT:
+       (*p->mpX_set_si)
+         (sp,
+          (long) (* (mpexpr_fun_i_ternary_t) CP->op->fun)
+          (sp, sp+1, sp+2));
+       break;
+      case MPEXPR_TYPE_LAST_UI | MPEXPR_TYPE_RESULT_INT:
+       CHECK_UI (sp+2);
+       (*p->mpX_set_si)
+         (sp,
+          (long) (* (mpexpr_fun_i_ternary_ui_t) CP->op->fun)
+          (sp, sp+1, (*p->mpX_get_ui) (sp+2)));
+       break;
+      default:
+       ERROR ("unrecognised binary argument calling style",
+              MPEXPR_RESULT_BAD_TABLE);
+      }
+    }
+    break;
+
+  default:
+    TRACE (printf ("unrecognised operator type: 0x%X\n", CP->op->type));
+    ERROR ("", MPEXPR_RESULT_PARSE_ERROR);
+  }
+
+ apply_control_done:
+  TRACE (MPX_TRACE ("result", SP));
+  CONTROL_POP ();
+  goto another_operator;
+
+ done:
+  if (p->error_code == MPEXPR_RESULT_OK)
+    {
+      if (p->data_top != 0)
+       {
+         TRACE (printf ("data stack want top at 0, got %d\n", p->data_top));
+         p->error_code = MPEXPR_RESULT_PARSE_ERROR;
+       }
+      else
+       (*p->mpX_set_or_swap) (p->res, SP);
+    }
+
+  {
+    int  i;
+    for (i = 0; i < p->data_inited; i++)
+      {
+       TRACE (printf ("clear %d\n", i));
+       (*p->mpX_clear) (p->data_stack+i);
+      }
+  }
+
+  FREE_FUNC_TYPE (p->data_stack, p->data_alloc, union mpX_t);
+  FREE_FUNC_TYPE (p->control_stack, p->control_alloc, struct mpexpr_control_t);
+
+  return p->error_code;
+}
diff --git a/demos/expr/expr.h b/demos/expr/expr.h

new file mode 100644 (file)

index 0000000..c3525b6
--- /dev/null
+++ b/demos/expr/expr.h
@@ -0,0 +1,129 @@
+/* Header for expression evaluation.
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#ifndef __EXPR_H__
+#define __EXPR_H__
+
+#define MPEXPR_RESULT_OK            0
+#define MPEXPR_RESULT_BAD_VARIABLE  1
+#define MPEXPR_RESULT_BAD_TABLE     2
+#define MPEXPR_RESULT_PARSE_ERROR   3
+#define MPEXPR_RESULT_NOT_UI        4
+
+
+/* basic types */
+#define MPEXPR_TYPE_NARY(n)       ((n) * 0x0100)
+#define MPEXPR_TYPE_MASK_ARGCOUNT MPEXPR_TYPE_NARY(0xF)
+#define MPEXPR_TYPE_0ARY          MPEXPR_TYPE_NARY(0)
+#define MPEXPR_TYPE_UNARY         MPEXPR_TYPE_NARY(1)
+#define MPEXPR_TYPE_BINARY        MPEXPR_TYPE_NARY(2)
+#define MPEXPR_TYPE_TERNARY       MPEXPR_TYPE_NARY(3)
+
+/* options for all */
+#define MPEXPR_TYPE_LAST_UI       0x0010
+#define MPEXPR_TYPE_RESULT_INT    0x0020
+#define MPEXPR_TYPE_MASK_ARGSTYLE 0x0030
+
+#define MPEXPR_TYPE_UNARY_UI     (MPEXPR_TYPE_UNARY   | MPEXPR_TYPE_LAST_UI)
+#define MPEXPR_TYPE_I_UNARY      (MPEXPR_TYPE_UNARY   | MPEXPR_TYPE_RESULT_INT)
+#define MPEXPR_TYPE_I_UNARY_UI   (MPEXPR_TYPE_I_UNARY | MPEXPR_TYPE_LAST_UI)
+#define MPEXPR_TYPE_BINARY_UI    (MPEXPR_TYPE_BINARY  | MPEXPR_TYPE_LAST_UI)
+#define MPEXPR_TYPE_I_BINARY     (MPEXPR_TYPE_BINARY  | MPEXPR_TYPE_RESULT_INT)
+#define MPEXPR_TYPE_I_BINARY_UI  (MPEXPR_TYPE_I_BINARY| MPEXPR_TYPE_LAST_UI)
+#define MPEXPR_TYPE_TERNARY_UI   (MPEXPR_TYPE_TERNARY | MPEXPR_TYPE_LAST_UI)
+#define MPEXPR_TYPE_I_TERNARY    (MPEXPR_TYPE_TERNARY | MPEXPR_TYPE_RESULT_INT)
+#define MPEXPR_TYPE_I_TERNARY_UI (MPEXPR_TYPE_I_TERNARY|MPEXPR_TYPE_LAST_UI)
+
+/* 0ary with options */
+#define MPEXPR_TYPE_CONSTANT      (MPEXPR_TYPE_0ARY | 0x0040)
+
+/* unary options */
+#define MPEXPR_TYPE_PREFIX        0x0040
+
+/* binary options */
+#define MPEXPR_TYPE_RIGHTASSOC    0x0040
+#define MPEXPR_TYPE_PAIRWISE      0x0080
+
+#define MPEXPR_TYPE_MASK_SPECIAL  0x000F
+
+/* unary specials */
+#define MPEXPR_TYPE_NEW_TABLE     (MPEXPR_TYPE_UNARY | 0x001)
+#define MPEXPR_TYPE_DONE          (MPEXPR_TYPE_UNARY | 0x002)
+#define MPEXPR_TYPE_VARIABLE      (MPEXPR_TYPE_UNARY | 0x003)
+#define MPEXPR_TYPE_LOGICAL_NOT   (MPEXPR_TYPE_UNARY | 0x004)
+#define MPEXPR_TYPE_CLOSEPAREN    (MPEXPR_TYPE_UNARY | 0x005)
+#define MPEXPR_TYPE_OPENPAREN     (MPEXPR_TYPE_CLOSEPAREN | MPEXPR_TYPE_PREFIX)
+
+/* binary specials */
+#define MPEXPR_TYPE_LOGICAL_AND   (MPEXPR_TYPE_BINARY | 0x001)
+#define MPEXPR_TYPE_LOGICAL_OR    (MPEXPR_TYPE_BINARY | 0x002)
+#define MPEXPR_TYPE_ARGSEP        (MPEXPR_TYPE_BINARY | 0x003)
+#define MPEXPR_TYPE_QUESTION      (MPEXPR_TYPE_BINARY | 0x004)
+#define MPEXPR_TYPE_COLON         (MPEXPR_TYPE_BINARY | 0x005)
+#define MPEXPR_TYPE_MAX           (MPEXPR_TYPE_BINARY | 0x006)
+#define MPEXPR_TYPE_MIN           (MPEXPR_TYPE_BINARY | 0x007)
+#define MPEXPR_TYPE_MASK_CMP      0x008
+#define MPEXPR_TYPE_MASK_CMP_LT   0x001
+#define MPEXPR_TYPE_MASK_CMP_EQ   0x002
+#define MPEXPR_TYPE_MASK_CMP_GT   0x004
+#define MPEXPR_TYPE_CMP_LT       (MPEXPR_TYPE_BINARY | MPEXPR_TYPE_MASK_CMP \
+                                 | MPEXPR_TYPE_MASK_CMP_LT)
+#define MPEXPR_TYPE_CMP_EQ       (MPEXPR_TYPE_BINARY | MPEXPR_TYPE_MASK_CMP \
+                                 | MPEXPR_TYPE_MASK_CMP_EQ)
+#define MPEXPR_TYPE_CMP_GT       (MPEXPR_TYPE_BINARY | MPEXPR_TYPE_MASK_CMP \
+                                 | MPEXPR_TYPE_MASK_CMP_GT)
+#define MPEXPR_TYPE_CMP_LE       (MPEXPR_TYPE_CMP_LT | MPEXPR_TYPE_MASK_CMP_EQ)
+#define MPEXPR_TYPE_CMP_NE       (MPEXPR_TYPE_CMP_LT | MPEXPR_TYPE_MASK_CMP_GT)
+#define MPEXPR_TYPE_CMP_GE       (MPEXPR_TYPE_CMP_GT | MPEXPR_TYPE_MASK_CMP_EQ)
+
+/* parse options */
+#define MPEXPR_TYPE_WHOLEWORD      0x1000
+#define MPEXPR_TYPE_OPERATOR       0x2000
+
+
+typedef void (*mpexpr_fun_t) __GMP_PROTO ((void));
+
+struct mpexpr_operator_t {
+  __gmp_const char  *name;
+  mpexpr_fun_t      fun;
+  int               type;
+  int               precedence;
+};
+
+
+int mpf_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
+                       mpf_ptr res, int base, unsigned long prec,
+                       __gmp_const char *e, size_t elen,
+                       mpf_srcptr var[26]));
+int mpf_expr __GMP_PROTO ((mpf_ptr res, int base, __gmp_const char *e, ...));
+
+int mpq_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
+                       mpq_ptr res, int base,
+                       __gmp_const char *e, size_t elen,
+                       mpq_srcptr var[26]));
+int mpq_expr __GMP_PROTO ((mpq_ptr res, int base, __gmp_const char *e, ...));
+
+int mpz_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
+                       mpz_ptr res, int base,
+                       __gmp_const char *e, size_t elen,
+                       mpz_srcptr var[26]));
+int mpz_expr __GMP_PROTO ((mpz_ptr res, int base, __gmp_const char *e, ...));
+
+#endif
diff --git a/demos/expr/exprf.c b/demos/expr/exprf.c

new file mode 100644 (file)

index 0000000..c67ee51
--- /dev/null
+++ b/demos/expr/exprf.c
@@ -0,0 +1,127 @@
+/* mpf expression evaluation
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" to get some traces. */
+#define TRACE(x)
+
+
+static int
+e_mpf_sgn (mpf_srcptr x)
+{
+  return mpf_sgn (x);
+}
+
+
+static __gmp_const struct mpexpr_operator_t  _mpf_expr_standard_table[] = {
+
+  { "**",  (mpexpr_fun_t) mpf_pow_ui,
+    MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,                   220 },
+
+  { "!",   (mpexpr_fun_t) e_mpf_sgn,
+    MPEXPR_TYPE_LOGICAL_NOT | MPEXPR_TYPE_PREFIX,                     210 },
+  { "-",   (mpexpr_fun_t) mpf_neg,
+    MPEXPR_TYPE_UNARY | MPEXPR_TYPE_PREFIX,                           210 },
+
+  { "*",   (mpexpr_fun_t) mpf_mul,           MPEXPR_TYPE_BINARY,      200 },
+  { "/",   (mpexpr_fun_t) mpf_div,           MPEXPR_TYPE_BINARY,      200 },
+
+  { "+",   (mpexpr_fun_t) mpf_add,           MPEXPR_TYPE_BINARY,      190 },
+  { "-",   (mpexpr_fun_t) mpf_sub,           MPEXPR_TYPE_BINARY,      190 },
+
+  { "<<",  (mpexpr_fun_t) mpf_mul_2exp,      MPEXPR_TYPE_BINARY_UI,   180 },
+  { ">>",  (mpexpr_fun_t) mpf_div_2exp,      MPEXPR_TYPE_BINARY_UI,   180 },
+
+  { "<=",  (mpexpr_fun_t) mpf_cmp,           MPEXPR_TYPE_CMP_LE,      170 },
+  { "<",   (mpexpr_fun_t) mpf_cmp,           MPEXPR_TYPE_CMP_LT,      170 },
+  { ">=",  (mpexpr_fun_t) mpf_cmp,           MPEXPR_TYPE_CMP_GE,      170 },
+  { ">",   (mpexpr_fun_t) mpf_cmp,           MPEXPR_TYPE_CMP_GT,      170 },
+
+  { "==",  (mpexpr_fun_t) mpf_cmp,           MPEXPR_TYPE_CMP_EQ,      160 },
+  { "!=",  (mpexpr_fun_t) mpf_cmp,           MPEXPR_TYPE_CMP_NE,      160 },
+
+  { "&&",  (mpexpr_fun_t) e_mpf_sgn,         MPEXPR_TYPE_LOGICAL_AND, 120 },
+  { "||",  (mpexpr_fun_t) e_mpf_sgn,         MPEXPR_TYPE_LOGICAL_OR,  110 },
+
+  { ":",   NULL,                             MPEXPR_TYPE_COLON,       101 },
+  { "?",   (mpexpr_fun_t) e_mpf_sgn,         MPEXPR_TYPE_QUESTION,    100 },
+
+  { ")",   NULL,                             MPEXPR_TYPE_CLOSEPAREN,    4 },
+  { "(",   NULL,                             MPEXPR_TYPE_OPENPAREN,     3 },
+  { ",",   NULL,                             MPEXPR_TYPE_ARGSEP,        2 },
+  { "$",   NULL,                             MPEXPR_TYPE_VARIABLE,      1 },
+
+  { "abs",      (mpexpr_fun_t) mpf_abs,          MPEXPR_TYPE_UNARY        },
+  { "ceil",     (mpexpr_fun_t) mpf_ceil,         MPEXPR_TYPE_UNARY        },
+  { "cmp",      (mpexpr_fun_t) mpf_cmp,          MPEXPR_TYPE_I_BINARY     },
+  { "eq",       (mpexpr_fun_t) mpf_eq,           MPEXPR_TYPE_I_TERNARY_UI },
+  { "floor",    (mpexpr_fun_t) mpf_floor,        MPEXPR_TYPE_UNARY        },
+  { "integer_p",(mpexpr_fun_t) mpf_integer_p,    MPEXPR_TYPE_I_UNARY      },
+  { "max",   (mpexpr_fun_t) mpf_cmp, MPEXPR_TYPE_MAX | MPEXPR_TYPE_PAIRWISE },
+  { "min",   (mpexpr_fun_t) mpf_cmp, MPEXPR_TYPE_MIN | MPEXPR_TYPE_PAIRWISE },
+  { "reldiff",  (mpexpr_fun_t) mpf_reldiff,      MPEXPR_TYPE_BINARY       },
+  { "sgn",      (mpexpr_fun_t) e_mpf_sgn,        MPEXPR_TYPE_I_UNARY      },
+  { "sqrt",     (mpexpr_fun_t) mpf_sqrt,         MPEXPR_TYPE_UNARY        },
+  { "trunc",    (mpexpr_fun_t) mpf_trunc,        MPEXPR_TYPE_UNARY        },
+
+  { NULL }
+};
+
+__gmp_const struct mpexpr_operator_t * __gmp_const mpf_expr_standard_table
+= _mpf_expr_standard_table;
+
+
+int
+#if HAVE_STDARG
+mpf_expr (mpf_ptr res, int base, __gmp_const char *e, ...)
+#else
+mpf_expr (va_alist)
+     va_dcl
+#endif
+{
+  mpf_srcptr  var[MPEXPR_VARIABLES];
+  va_list     ap;
+  int         ret;
+#if HAVE_STDARG
+  va_start (ap, e);
+#else
+  mpf_ptr           res;
+  int               base;
+  __gmp_const char  *e;
+  va_start (ap);
+  res  = va_arg (ap, mpf_ptr);
+  base = va_arg (ap, int);
+  e    = va_arg (ap, __gmp_const char *);
+#endif
+
+  TRACE (printf ("mpf_expr(): base %d, %s\n", base, e));
+  ret = mpexpr_va_to_var ((void **) var, ap);
+  va_end (ap);
+
+  if (ret != MPEXPR_RESULT_OK)
+    return ret;
+
+  return mpf_expr_a (mpf_expr_standard_table, res, base,
+                    mpf_get_prec (res), e, strlen(e), var);
+}
diff --git a/demos/expr/exprfa.c b/demos/expr/exprfa.c

new file mode 100644 (file)

index 0000000..1cc00e0
--- /dev/null
+++ b/demos/expr/exprfa.c
@@ -0,0 +1,180 @@
+/* mpf expression evaluation
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Future: Bitwise "&", "|" and "&" could be done, if desired.  Not sure
+   those functions would be much value though.  */
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" to get some traces. */
+#define TRACE(x)
+
+
+static size_t
+e_mpf_number (mpf_ptr res, __gmp_const char *e, size_t elen, int base)
+{
+  char    *edup;
+  size_t  i, ret, extra=0;
+  int     mant_base, exp_base;
+  void    *(*allocate_func) (size_t);
+  void    (*free_func) (void *, size_t);
+
+  TRACE (printf ("mpf_number base=%d \"%.*s\"\n", base, (int) elen, e));
+
+  /* mpf_set_str doesn't currently accept 0x for hex in base==0, so do it
+     here instead.  FIXME: Would prefer to let mpf_set_str handle this.  */
+  if (base == 0 && elen >= 2 && e[0] == '0' && (e[1] == 'x' || e[1] == 'X'))
+    {
+      base = 16;
+      extra = 2;
+      e += extra;
+      elen -= extra;
+    }
+
+  if (base == 0)
+    mant_base = 10;
+  else if (base < 0)
+    mant_base = -base;
+  else
+    mant_base = base;
+
+  /* exponent in decimal if base is negative */
+  if (base < 0)
+    exp_base = 10;
+  else if (base == 0)
+    exp_base = 10;
+  else
+    exp_base = base;
+
+#define IS_EXPONENT(c) \
+  (c == '@' || (base <= 10 && base >= -10 && (e[i] == 'e' || e[i] == 'E')))
+
+  i = 0;
+  for (;;)
+    {
+      if (i >= elen)
+        goto parsed;
+      if (e[i] == '.')
+        break;
+      if (IS_EXPONENT (e[i]))
+        goto exponent;
+      if (! isasciidigit_in_base (e[i], mant_base))
+        goto parsed;
+      i++;
+    }
+
+  /* fraction */
+  i++;
+  for (;;)
+    {
+      if (i >= elen)
+        goto parsed;
+      if (IS_EXPONENT (e[i]))
+        goto exponent;
+      if (! isasciidigit_in_base (e[i], mant_base))
+        goto parsed;
+      i++;
+    }
+
+ exponent:
+  i++;
+  if (i >= elen)
+    goto parsed;
+  if (e[i] == '-')
+    i++;
+  for (;;)
+    {
+      if (i >= elen)
+        goto parsed;
+      if (! isasciidigit_in_base (e[i], exp_base))
+        break;
+      i++;
+    }
+
+ parsed:
+  TRACE (printf ("  parsed i=%u \"%.*s\"\n", i, (int) i, e));
+
+  mp_get_memory_functions (&allocate_func, NULL, &free_func);
+  edup = (*allocate_func) (i+1);
+  memcpy (edup, e, i);
+  edup[i] = '\0';
+
+  if (mpf_set_str (res, edup, base) == 0)
+    ret = i + extra;
+  else
+    ret = 0;
+
+  (*free_func) (edup, i+1);
+  return ret;
+}
+
+static int
+e_mpf_ulong_p (mpf_srcptr f)
+{
+  return mpf_integer_p (f) && mpf_fits_ulong_p (f);
+}
+
+/* Don't want to change the precision of w, can only do an actual swap when
+   w and x have the same precision.  */
+static void
+e_mpf_set_or_swap (mpf_ptr w, mpf_ptr x)
+{
+  if (mpf_get_prec (w) == mpf_get_prec (x))
+    mpf_swap (w, x);
+  else
+    mpf_set (w, x);
+}
+
+
+int
+mpf_expr_a (__gmp_const struct mpexpr_operator_t *table,
+            mpf_ptr res, int base, unsigned long prec,
+            __gmp_const char *e, size_t elen,
+            mpf_srcptr var[26])
+{
+  struct mpexpr_parse_t  p;
+
+  p.table = table;
+  p.res = (mpX_ptr) res;
+  p.base = base;
+  p.prec = prec;
+  p.e = e;
+  p.elen = elen;
+  p.var = (mpX_srcptr *) var;
+
+  p.mpX_clear       = (mpexpr_fun_one_t)      mpf_clear;
+  p.mpX_ulong_p     = (mpexpr_fun_i_unary_t)  e_mpf_ulong_p;
+  p.mpX_get_ui      = (mpexpr_fun_get_ui_t)   mpf_get_ui;
+  p.mpX_init        = (mpexpr_fun_unary_ui_t) mpf_init2;
+  p.mpX_number      = (mpexpr_fun_number_t)   e_mpf_number;
+  p.mpX_set         = (mpexpr_fun_unary_t)    mpf_set;
+  p.mpX_set_or_swap = (mpexpr_fun_unary_t)    e_mpf_set_or_swap;
+  p.mpX_set_si      = (mpexpr_fun_set_si_t)   mpf_set_si;
+  p.mpX_swap        = (mpexpr_fun_swap_t)     mpf_swap;
+
+  return mpexpr_evaluate (&p);
+}
diff --git a/demos/expr/exprq.c b/demos/expr/exprq.c

new file mode 100644 (file)

index 0000000..af91b00
--- /dev/null
+++ b/demos/expr/exprq.c
@@ -0,0 +1,159 @@
+/* mpq expression evaluation
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" to get some traces. */
+#define TRACE(x)
+
+
+static void
+e_mpq_pow_ui (mpq_ptr r, mpq_srcptr b, unsigned long e)
+{
+  mpz_pow_ui (mpq_numref(r), mpq_numref(b), e);
+  mpz_pow_ui (mpq_denref(r), mpq_denref(b), e);
+}
+
+/* Wrapped because mpq_sgn is a macro. */
+static int
+e_mpq_sgn (mpq_srcptr x)
+{
+  return mpq_sgn (x);
+}
+
+/* Wrapped because mpq_equal only guarantees a non-zero return, whereas we
+   want 1 or 0 for == and !=. */
+static int
+e_mpq_equal (mpq_srcptr x, mpq_srcptr y)
+{
+  return mpq_equal (x, y) != 0;
+}
+static int
+e_mpq_notequal (mpq_srcptr x, mpq_srcptr y)
+{
+  return ! mpq_equal (x, y);
+}
+
+static void
+e_mpq_num (mpq_ptr w, mpq_srcptr x)
+{
+  if (w != x)
+    mpz_set (mpq_numref(w), mpq_numref(x));
+  mpz_set_ui (mpq_denref(w), 1L);
+}
+static void
+e_mpq_den (mpq_ptr w, mpq_srcptr x)
+{
+  if (w == x)
+    mpz_swap (mpq_numref(w), mpq_denref(w));
+  else
+    mpz_set (mpq_numref(w), mpq_denref(x));
+  mpz_set_ui (mpq_denref(w), 1L);
+}
+
+
+static __gmp_const struct mpexpr_operator_t  _mpq_expr_standard_table[] = {
+
+  { "**",  (mpexpr_fun_t) e_mpq_pow_ui,
+    MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,                   220 },
+
+  { "!",   (mpexpr_fun_t) e_mpq_sgn,
+    MPEXPR_TYPE_LOGICAL_NOT | MPEXPR_TYPE_PREFIX,                     210 },
+  { "-",   (mpexpr_fun_t) mpq_neg,
+    MPEXPR_TYPE_UNARY | MPEXPR_TYPE_PREFIX,                           210 },
+
+  { "*",   (mpexpr_fun_t) mpq_mul,           MPEXPR_TYPE_BINARY,      200 },
+  { "/",   (mpexpr_fun_t) mpq_div,           MPEXPR_TYPE_BINARY,      200 },
+
+  { "+",   (mpexpr_fun_t) mpq_add,           MPEXPR_TYPE_BINARY,      190 },
+  { "-",   (mpexpr_fun_t) mpq_sub,           MPEXPR_TYPE_BINARY,      190 },
+
+  { "<<",  (mpexpr_fun_t) mpq_mul_2exp,      MPEXPR_TYPE_BINARY_UI,   180 },
+  { ">>",  (mpexpr_fun_t) mpq_div_2exp,      MPEXPR_TYPE_BINARY_UI,   180 },
+
+  { "<=",  (mpexpr_fun_t) mpq_cmp,           MPEXPR_TYPE_CMP_LE,      170 },
+  { "<",   (mpexpr_fun_t) mpq_cmp,           MPEXPR_TYPE_CMP_LT,      170 },
+  { ">=",  (mpexpr_fun_t) mpq_cmp,           MPEXPR_TYPE_CMP_GE,      170 },
+  { ">",   (mpexpr_fun_t) mpq_cmp,           MPEXPR_TYPE_CMP_GT,      170 },
+
+  { "==",  (mpexpr_fun_t) e_mpq_equal,       MPEXPR_TYPE_I_BINARY,    160 },
+  { "!=",  (mpexpr_fun_t) e_mpq_notequal,    MPEXPR_TYPE_I_BINARY,    160 },
+
+  { "&&",  (mpexpr_fun_t) e_mpq_sgn,         MPEXPR_TYPE_LOGICAL_AND, 120 },
+  { "||",  (mpexpr_fun_t) e_mpq_sgn,         MPEXPR_TYPE_LOGICAL_OR,  110 },
+
+  { ":",   NULL,                             MPEXPR_TYPE_COLON,       101 },
+  { "?",   (mpexpr_fun_t) e_mpq_sgn,         MPEXPR_TYPE_QUESTION,    100 },
+
+  { ")",   (mpexpr_fun_t) e_mpq_sgn,         MPEXPR_TYPE_CLOSEPAREN,    4 },
+  { "(",   (mpexpr_fun_t) e_mpq_sgn,         MPEXPR_TYPE_OPENPAREN,     3 },
+  { ",",   (mpexpr_fun_t) e_mpq_sgn,         MPEXPR_TYPE_ARGSEP,        2 },
+  { "$",   NULL,                             MPEXPR_TYPE_VARIABLE,      1 },
+
+  { "abs",  (mpexpr_fun_t) mpq_abs,          MPEXPR_TYPE_UNARY            },
+  { "cmp",  (mpexpr_fun_t) mpq_cmp,          MPEXPR_TYPE_I_BINARY         },
+  { "den",  (mpexpr_fun_t) e_mpq_den,        MPEXPR_TYPE_UNARY            },
+  { "max",  (mpexpr_fun_t) mpq_cmp,  MPEXPR_TYPE_MAX | MPEXPR_TYPE_PAIRWISE },
+  { "min",  (mpexpr_fun_t) mpq_cmp,  MPEXPR_TYPE_MIN | MPEXPR_TYPE_PAIRWISE },
+  { "num",  (mpexpr_fun_t) e_mpq_num,        MPEXPR_TYPE_UNARY            },
+  { "sgn",  (mpexpr_fun_t) e_mpq_sgn,        MPEXPR_TYPE_I_UNARY          },
+
+  { NULL }
+};
+
+__gmp_const struct mpexpr_operator_t * __gmp_const mpq_expr_standard_table
+= _mpq_expr_standard_table;
+
+
+int
+#if HAVE_STDARG
+mpq_expr (mpq_ptr res, int base, __gmp_const char *e, ...)
+#else
+mpq_expr (va_alist)
+     va_dcl
+#endif
+{
+  mpq_srcptr  var[MPEXPR_VARIABLES];
+  va_list     ap;
+  int         ret;
+#if HAVE_STDARG
+  va_start (ap, e);
+#else
+  mpq_ptr           res;
+  int               base;
+  __gmp_const char  *e;
+  va_start (ap);
+  res  = va_arg (ap, mpq_ptr);
+  base = va_arg (ap, int);
+  e    = va_arg (ap, __gmp_const char *);
+#endif
+
+  TRACE (printf ("mpq_expr(): base %d, %s\n", base, e));
+  ret = mpexpr_va_to_var ((void **) var, ap);
+  va_end (ap);
+
+  if (ret != MPEXPR_RESULT_OK)
+    return ret;
+
+  return mpq_expr_a (mpq_expr_standard_table, res, base, e, strlen(e), var);
+}
diff --git a/demos/expr/exprqa.c b/demos/expr/exprqa.c

new file mode 100644 (file)

index 0000000..c56f1ca
--- /dev/null
+++ b/demos/expr/exprqa.c
@@ -0,0 +1,89 @@
+/* mpq expression evaluation
+
+Copyright 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+static int
+e_mpq_ulong_p (mpq_srcptr q)
+{
+  return mpz_fits_ulong_p (mpq_numref (q))
+    && mpz_cmp_ui (mpq_denref (q), 1L) == 0;
+}
+
+/* get value as a ui, on the assumption it fits */
+static int
+e_mpq_get_ui_fits (mpq_srcptr q)
+{
+  return mpz_get_ui (mpq_numref (q));
+}
+
+static void
+e_mpq_set_si1 (mpq_ptr q, long num)
+{
+  mpq_set_si (q, num, 1L);
+}
+
+/* The same as mpz, but putting the result in the numerator.  Negatives and
+   fractions aren't parsed here because '-' and '/' are operators. */
+static size_t
+e_mpq_number (mpq_ptr res, __gmp_const char *e, size_t elen, int base)
+{
+  mpz_set_ui (mpq_denref (res), 1L);
+  return mpexpr_mpz_number (mpq_numref (res), e, elen, base);
+}
+
+
+/* ignoring prec */
+static void
+e_mpq_init (mpq_ptr q, unsigned long prec)
+{
+  mpq_init (q);
+}
+
+int
+mpq_expr_a (__gmp_const struct mpexpr_operator_t *table,
+            mpq_ptr res, int base,
+            __gmp_const char *e, size_t elen,
+            mpq_srcptr var[26])
+{
+  struct mpexpr_parse_t  p;
+
+  p.table = table;
+  p.res = (mpX_ptr) res;
+  p.base = base;
+  p.e = e;
+  p.elen = elen;
+  p.var = (mpX_srcptr *) var;
+
+  p.mpX_clear       = (mpexpr_fun_one_t)      mpq_clear;
+  p.mpX_ulong_p     = (mpexpr_fun_i_unary_t)  e_mpq_ulong_p;
+  p.mpX_get_ui      = (mpexpr_fun_get_ui_t)   e_mpq_get_ui_fits;
+  p.mpX_init        = (mpexpr_fun_unary_ui_t) e_mpq_init;
+  p.mpX_number      = (mpexpr_fun_number_t)   e_mpq_number;
+  p.mpX_set         = (mpexpr_fun_unary_t)    mpq_set;
+  p.mpX_set_or_swap = (mpexpr_fun_unary_t)    mpq_swap;
+  p.mpX_set_si      = (mpexpr_fun_set_si_t)   e_mpq_set_si1;
+  p.mpX_swap        = (mpexpr_fun_swap_t)     mpq_swap;
+
+  return mpexpr_evaluate (&p);
+}
diff --git a/demos/expr/exprv.c b/demos/expr/exprv.c

new file mode 100644 (file)

index 0000000..b53682e
--- /dev/null
+++ b/demos/expr/exprv.c
@@ -0,0 +1,46 @@
+/* mpz expression evaluation, simple part */
+
+/*
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+int
+mpexpr_va_to_var (void *var[], va_list ap)
+{
+  int   i = 0;
+  void  *v;
+
+  for (;;)
+    {
+      v = va_arg (ap, void *);
+      if (v == NULL)
+       break;
+      if (i >= MPEXPR_VARIABLES)
+       return MPEXPR_RESULT_BAD_VARIABLE;
+      var[i++] = v;
+    }
+
+  while (i < MPEXPR_VARIABLES)
+    var[i++] = NULL;
+
+  return MPEXPR_RESULT_OK;
+}
diff --git a/demos/expr/exprz.c b/demos/expr/exprz.c

new file mode 100644 (file)

index 0000000..7c01b9d
--- /dev/null
+++ b/demos/expr/exprz.c
@@ -0,0 +1,210 @@
+/* mpz expression evaluation, simple part
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" to get some traces. */
+#define TRACE(x)
+
+
+/* These are macros, so need function wrappers. */
+static int
+e_mpz_sgn (mpz_srcptr x)
+{
+  return mpz_sgn (x);
+}
+static int
+e_mpz_odd_p (mpz_srcptr x)
+{
+  return mpz_odd_p (x);
+}
+static int
+e_mpz_even_p (mpz_srcptr x)
+{
+  return mpz_even_p (x);
+}
+
+/* These wrapped because MPEXPR_TYPE_I_ functions are expected to return
+   "int" whereas these return "unsigned long".  */
+static void
+e_mpz_hamdist (mpz_ptr w, mpz_srcptr x, mpz_srcptr y)
+{
+  mpz_set_ui (w, mpz_hamdist (x, y));
+}
+static void
+e_mpz_popcount (mpz_ptr w, mpz_srcptr x)
+{
+  mpz_set_ui (w, mpz_popcount (x));
+}
+static void
+e_mpz_scan0 (mpz_ptr w, mpz_srcptr x, unsigned long start)
+{
+  mpz_set_ui (w, mpz_scan0 (x, start));
+}
+static void
+e_mpz_scan1 (mpz_ptr w, mpz_srcptr x, unsigned long start)
+{
+  mpz_set_ui (w, mpz_scan1 (x, start));
+}
+
+/* These wrapped because they're in-place whereas MPEXPR_TYPE_BINARY_UI
+   expects a separate source and destination.  Actually the parser will
+   normally pass w==x anyway.  */
+static void
+e_mpz_setbit (mpz_ptr w, mpz_srcptr x, unsigned long n)
+{
+  if (w != x)
+    mpz_set (w, x);
+  mpz_setbit (w, n);
+}
+static void
+e_mpz_clrbit (mpz_ptr w, mpz_srcptr x, unsigned long n)
+{
+  if (w != x)
+    mpz_set (w, x);
+  mpz_clrbit (w, n);
+}
+
+static __gmp_const struct mpexpr_operator_t  _mpz_expr_standard_table[] = {
+
+  { "**",  (mpexpr_fun_t) mpz_pow_ui,
+    MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,                  220 },
+
+  { "~",   (mpexpr_fun_t) mpz_com,
+    MPEXPR_TYPE_UNARY | MPEXPR_TYPE_PREFIX,                          210 },
+  { "!",   (mpexpr_fun_t) e_mpz_sgn,
+    MPEXPR_TYPE_LOGICAL_NOT | MPEXPR_TYPE_PREFIX,                    210 },
+  { "-",   (mpexpr_fun_t) mpz_neg,
+    MPEXPR_TYPE_UNARY | MPEXPR_TYPE_PREFIX,                          210 },
+
+  { "*",   (mpexpr_fun_t) mpz_mul,          MPEXPR_TYPE_BINARY,      200 },
+  { "/",   (mpexpr_fun_t) mpz_tdiv_q,       MPEXPR_TYPE_BINARY,      200 },
+  { "%",   (mpexpr_fun_t) mpz_tdiv_r,       MPEXPR_TYPE_BINARY,      200 },
+
+  { "+",   (mpexpr_fun_t) mpz_add,          MPEXPR_TYPE_BINARY,      190 },
+  { "-",   (mpexpr_fun_t) mpz_sub,          MPEXPR_TYPE_BINARY,      190 },
+
+  { "<<",  (mpexpr_fun_t) mpz_mul_2exp,     MPEXPR_TYPE_BINARY_UI,   180 },
+  { ">>",  (mpexpr_fun_t) mpz_tdiv_q_2exp,  MPEXPR_TYPE_BINARY_UI,   180 },
+
+  { "<=",  (mpexpr_fun_t) mpz_cmp,          MPEXPR_TYPE_CMP_LE,      170 },
+  { "<",   (mpexpr_fun_t) mpz_cmp,          MPEXPR_TYPE_CMP_LT,      170 },
+  { ">=",  (mpexpr_fun_t) mpz_cmp,          MPEXPR_TYPE_CMP_GE,      170 },
+  { ">",   (mpexpr_fun_t) mpz_cmp,          MPEXPR_TYPE_CMP_GT,      170 },
+
+  { "==",  (mpexpr_fun_t) mpz_cmp,          MPEXPR_TYPE_CMP_EQ,      160 },
+  { "!=",  (mpexpr_fun_t) mpz_cmp,          MPEXPR_TYPE_CMP_NE,      160 },
+
+  { "&",   (mpexpr_fun_t) mpz_and,          MPEXPR_TYPE_BINARY,      150 },
+  { "^",   (mpexpr_fun_t) mpz_xor,          MPEXPR_TYPE_BINARY,      140 },
+  { "|",   (mpexpr_fun_t) mpz_ior,          MPEXPR_TYPE_BINARY,      130 },
+  { "&&",  (mpexpr_fun_t) e_mpz_sgn, MPEXPR_TYPE_LOGICAL_AND, 120 },
+  { "||",  (mpexpr_fun_t) e_mpz_sgn, MPEXPR_TYPE_LOGICAL_OR,  110 },
+
+  { ":",   NULL,                            MPEXPR_TYPE_COLON,       101 },
+  { "?",   (mpexpr_fun_t) e_mpz_sgn, MPEXPR_TYPE_QUESTION,    100 },
+
+  { ")",   NULL,                            MPEXPR_TYPE_CLOSEPAREN,   4 },
+  { "(",   NULL,                            MPEXPR_TYPE_OPENPAREN,    3 },
+  { ",",   NULL,                            MPEXPR_TYPE_ARGSEP,       2 },
+  { "$",   NULL,                            MPEXPR_TYPE_VARIABLE,     1 },
+
+  { "abs",       (mpexpr_fun_t) mpz_abs,           MPEXPR_TYPE_UNARY         },
+  { "bin",       (mpexpr_fun_t) mpz_bin_ui,        MPEXPR_TYPE_BINARY_UI     },
+  { "clrbit",    (mpexpr_fun_t) e_mpz_clrbit,      MPEXPR_TYPE_BINARY_UI     },
+  { "cmp",       (mpexpr_fun_t) mpz_cmp,           MPEXPR_TYPE_I_BINARY      },
+  { "cmpabs",    (mpexpr_fun_t) mpz_cmpabs,        MPEXPR_TYPE_I_BINARY      },
+  { "congruent_p",(mpexpr_fun_t)mpz_congruent_p,   MPEXPR_TYPE_I_TERNARY     },
+  { "divisible_p",(mpexpr_fun_t)mpz_divisible_p,   MPEXPR_TYPE_I_BINARY      },
+  { "even_p",    (mpexpr_fun_t) e_mpz_even_p,      MPEXPR_TYPE_I_UNARY       },
+  { "fib",       (mpexpr_fun_t) mpz_fib_ui,        MPEXPR_TYPE_UNARY_UI      },
+  { "fac",       (mpexpr_fun_t) mpz_fac_ui,        MPEXPR_TYPE_UNARY_UI      },
+  { "gcd",       (mpexpr_fun_t) mpz_gcd,           MPEXPR_TYPE_BINARY
+                                                  | MPEXPR_TYPE_PAIRWISE    },
+  { "hamdist",   (mpexpr_fun_t) e_mpz_hamdist,     MPEXPR_TYPE_BINARY        },
+  { "invert",    (mpexpr_fun_t) mpz_invert,        MPEXPR_TYPE_BINARY        },
+  { "jacobi",    (mpexpr_fun_t) mpz_jacobi,        MPEXPR_TYPE_I_BINARY      },
+  { "kronecker", (mpexpr_fun_t) mpz_kronecker,     MPEXPR_TYPE_I_BINARY      },
+  { "lcm",       (mpexpr_fun_t) mpz_lcm,           MPEXPR_TYPE_BINARY
+                                                  | MPEXPR_TYPE_PAIRWISE    },
+  { "lucnum",    (mpexpr_fun_t) mpz_lucnum_ui,     MPEXPR_TYPE_UNARY_UI      },
+  { "max",       (mpexpr_fun_t) mpz_cmp,           MPEXPR_TYPE_MAX
+                                                  | MPEXPR_TYPE_PAIRWISE    },
+  { "min",       (mpexpr_fun_t) mpz_cmp,           MPEXPR_TYPE_MIN
+                                                  | MPEXPR_TYPE_PAIRWISE    },
+  { "nextprime", (mpexpr_fun_t) mpz_nextprime,     MPEXPR_TYPE_UNARY         },
+  { "odd_p",     (mpexpr_fun_t) e_mpz_odd_p,       MPEXPR_TYPE_I_UNARY       },
+  { "perfect_power_p", (mpexpr_fun_t)mpz_perfect_power_p, MPEXPR_TYPE_I_UNARY},
+  { "perfect_square_p",(mpexpr_fun_t)mpz_perfect_square_p,MPEXPR_TYPE_I_UNARY},
+  { "popcount",  (mpexpr_fun_t) e_mpz_popcount,    MPEXPR_TYPE_UNARY         },
+  { "powm",      (mpexpr_fun_t) mpz_powm,          MPEXPR_TYPE_TERNARY       },
+  { "probab_prime_p",  (mpexpr_fun_t)mpz_probab_prime_p,  MPEXPR_TYPE_I_UNARY},
+  { "root",      (mpexpr_fun_t) mpz_root,          MPEXPR_TYPE_BINARY_UI     },
+  { "scan0",     (mpexpr_fun_t) e_mpz_scan0,       MPEXPR_TYPE_BINARY_UI     },
+  { "scan1",     (mpexpr_fun_t) e_mpz_scan1,       MPEXPR_TYPE_BINARY_UI     },
+  { "setbit",    (mpexpr_fun_t) e_mpz_setbit,      MPEXPR_TYPE_BINARY_UI     },
+  { "tstbit",    (mpexpr_fun_t) mpz_tstbit,        MPEXPR_TYPE_I_BINARY_UI   },
+  { "sgn",       (mpexpr_fun_t) e_mpz_sgn,         MPEXPR_TYPE_I_UNARY       },
+  { "sqrt",      (mpexpr_fun_t) mpz_sqrt,          MPEXPR_TYPE_UNARY         },
+  { NULL }
+};
+
+/* The table is available globally only through a pointer, so the table size
+   can change without breaking binary compatibility. */
+__gmp_const struct mpexpr_operator_t * __gmp_const mpz_expr_standard_table
+= _mpz_expr_standard_table;
+
+
+int
+#if HAVE_STDARG
+mpz_expr (mpz_ptr res, int base, __gmp_const char *e, ...)
+#else
+mpz_expr (va_alist)
+     va_dcl
+#endif
+{
+  mpz_srcptr  var[MPEXPR_VARIABLES];
+  va_list     ap;
+  int         ret;
+#if HAVE_STDARG
+  va_start (ap, e);
+#else
+  mpz_ptr           res;
+  int               base;
+  __gmp_const char  *e;
+  va_start (ap);
+  res  = va_arg (ap, mpz_ptr);
+  base = va_arg (ap, int);
+  e    = va_arg (ap, __gmp_const char *);
+#endif
+
+  TRACE (printf ("mpz_expr(): base %d, %s\n", base, e));
+  ret = mpexpr_va_to_var ((void **) var, ap);
+  va_end (ap);
+
+  if (ret != MPEXPR_RESULT_OK)
+    return ret;
+
+  return mpz_expr_a (mpz_expr_standard_table, res, base, e, strlen(e), var);
+}
diff --git a/demos/expr/exprza.c b/demos/expr/exprza.c

new file mode 100644 (file)

index 0000000..81e16ce
--- /dev/null
+++ b/demos/expr/exprza.c
@@ -0,0 +1,97 @@
+/* mpz expression evaluation
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include "gmp.h"
+#include "expr-impl.h"
+
+
+/* No need to parse '-' since that's handled as an operator.
+   This function also by mpq_expr_a, so it's not static.  */
+size_t
+mpexpr_mpz_number (mpz_ptr res, __gmp_const char *e, size_t elen, int base)
+{
+  char    *edup;
+  size_t  i, ret;
+  int     base_effective = (base == 0 ? 10 : base);
+  void    *(*allocate_func) (size_t);
+  void    (*free_func) (void *, size_t);
+
+  i = 0;
+  if (e[i] == '0')
+    {
+      i++;
+      if (e[i] == 'x' || e[i] == 'b')
+        i++;
+    }
+
+  for ( ; i < elen; i++)
+    if (! isasciidigit_in_base (e[i], base_effective))
+      break;
+
+  mp_get_memory_functions (&allocate_func, NULL, &free_func);
+  edup = (*allocate_func) (i+1);
+  memcpy (edup, e, i);
+  edup[i] = '\0';
+
+  if (mpz_set_str (res, edup, base) == 0)
+    ret = i;
+  else
+    ret = 0;
+
+  (*free_func) (edup, i+1);
+  return ret;
+}
+
+/* ignoring prec */
+static void
+e_mpz_init (mpz_ptr z, unsigned long prec)
+{
+  mpz_init (z);
+}
+
+int
+mpz_expr_a (__gmp_const struct mpexpr_operator_t *table,
+            mpz_ptr res, int base,
+            __gmp_const char *e, size_t elen,
+            mpz_srcptr var[26])
+{
+  struct mpexpr_parse_t  p;
+
+  p.table = table;
+  p.res = (mpX_ptr) res;
+  p.base = base;
+  p.e = e;
+  p.elen = elen;
+  p.var = (mpX_srcptr *) var;
+
+  p.mpX_clear       = (mpexpr_fun_one_t)      mpz_clear;
+  p.mpX_ulong_p     = (mpexpr_fun_i_unary_t)  mpz_fits_ulong_p;
+  p.mpX_get_ui      = (mpexpr_fun_get_ui_t)   mpz_get_ui;
+  p.mpX_init        = (mpexpr_fun_unary_ui_t) e_mpz_init;
+  p.mpX_number      = (mpexpr_fun_number_t)   mpexpr_mpz_number;
+  p.mpX_set         = (mpexpr_fun_unary_t)    mpz_set;
+  p.mpX_set_or_swap = (mpexpr_fun_unary_t)    mpz_swap;
+  p.mpX_set_si      = (mpexpr_fun_set_si_t)   mpz_set_si;
+  p.mpX_swap        = (mpexpr_fun_swap_t)     mpz_swap;
+
+  return mpexpr_evaluate (&p);
+}
diff --git a/demos/expr/run-expr.c b/demos/expr/run-expr.c

new file mode 100644 (file)

index 0000000..169f5a8
--- /dev/null
+++ b/demos/expr/run-expr.c
@@ -0,0 +1,231 @@
+/* Demo program to run expression evaluation.
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage: ./run-expr [-z] [-q] [-f] [-p prec] [-b base] expression...
+
+   Evaluate each argument as a simple expression.  By default this is in mpz
+   integers, but -q selects mpq or -f selects mpf.  For mpf the float
+   precision can be set with -p.  In all cases the input base can be set
+   with -b, or the default is "0" meaning decimal with "0x" allowed.
+
+   This is a pretty trivial program, it's just an easy way to experiment
+   with the evaluation functions.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "expr.h"
+
+
+void
+run_expr (int type, int base, unsigned long prec, char *str)
+{
+  int  outbase = (base == 0 ? 10 : base);
+  int  ret;
+
+  switch (type) {
+  case 'z':
+  default:
+    {
+      mpz_t  res, var_a, var_b;
+
+      mpz_init (res);
+      mpz_init_set_ui (var_a, 55L);
+      mpz_init_set_ui (var_b, 99L);
+
+      ret = mpz_expr (res, base, str, var_a, var_b, NULL);
+      printf ("\"%s\" base %d: ", str, base);
+      if (ret == MPEXPR_RESULT_OK)
+        {
+          printf ("result ");
+          mpz_out_str (stdout, outbase, res);
+          printf ("\n");
+        }
+      else
+        printf ("invalid (return code %d)\n", ret);
+
+      mpz_clear (res);
+      mpz_clear (var_a);
+      mpz_clear (var_b);
+    }
+    break;
+
+  case 'q':
+    {
+      mpq_t  res, var_a, var_b;
+
+      mpq_init (res);
+      mpq_init (var_a);
+      mpq_init (var_b);
+
+      mpq_set_ui (var_a, 55L, 1);
+      mpq_set_ui (var_b, 99L, 1);
+
+      ret = mpq_expr (res, base, str, var_a, var_b, NULL);
+      printf ("\"%s\" base %d: ", str, base);
+      if (ret == MPEXPR_RESULT_OK)
+        {
+          printf ("result ");
+          mpq_out_str (stdout, outbase, res);
+          printf ("\n");
+        }
+      else
+        printf ("invalid (return code %d)\n", ret);
+
+      mpq_clear (res);
+      mpq_clear (var_a);
+      mpq_clear (var_b);
+    }
+    break;
+
+  case 'f':
+    {
+      mpf_t  res, var_a, var_b;
+
+      mpf_init2 (res, prec);
+      mpf_init_set_ui (var_a, 55L);
+      mpf_init_set_ui (var_b, 99L);
+
+      ret = mpf_expr (res, base, str, var_a, var_b, NULL);
+      printf ("\"%s\" base %d: ", str, base);
+      if (ret == MPEXPR_RESULT_OK)
+        {
+          printf ("result ");
+          mpf_out_str (stdout, outbase, (size_t) 0, res);
+          printf ("\n");
+        }
+      else
+        printf ("invalid (return code %d)\n", ret);
+
+      mpf_clear (res);
+      mpf_clear (var_a);
+      mpf_clear (var_b);
+    }
+    break;
+  }
+}
+
+int
+main (int argc, char *argv[])
+{
+  int            type = 'z';
+  int            base = 0;
+  unsigned long  prec = 64;
+  int            seen_expr = 0;
+  int            opt;
+  char           *arg;
+
+  for (;;)
+    {
+      argv++;
+      arg = argv[0];
+      if (arg == NULL)
+        break;
+
+      if (arg[0] == '-')
+        {
+          for (;;)
+            {
+              arg++;
+              opt = arg[0];
+
+              switch (opt) {
+              case '\0':
+                goto end_opt;
+
+              case 'f':
+              case 'q':
+              case 'z':
+                type = opt;
+                break;
+
+              case 'b':
+                arg++;
+                if (arg[0] == '\0')
+                  {
+                    argv++;
+                    arg = argv[0];
+                    if (arg == NULL)
+                      {
+                      need_arg:
+                        fprintf (stderr, "Need argument for -%c\n", opt);
+                        exit (1);
+                      }
+                  }
+                base = atoi (arg);
+                goto end_opt;
+
+              case 'p':
+                arg++;
+                if (arg[0] == '\0')
+                  {
+                    argv++;
+                    arg = argv[0];
+                    if (arg == NULL)
+                      goto need_arg;
+                  }
+                prec = atoi (arg);
+                goto end_opt;
+
+              case '-':
+                arg++;
+                if (arg[0] != '\0')
+                  {
+                    /* no "--foo" options */
+                    fprintf (stderr, "Unrecognised option --%s\n", arg);
+                    exit (1);
+                  }
+                /* stop option interpretation at "--" */
+                for (;;)
+                  {
+                    argv++;
+                    arg = argv[0];
+                    if (arg == NULL)
+                      goto done;
+                    run_expr (type, base, prec, arg);
+                    seen_expr = 1;
+                  }
+
+              default:
+                fprintf (stderr, "Unrecognised option -%c\n", opt);
+                exit (1);
+              }
+            }
+        end_opt:
+          ;
+        }
+      else
+        {
+          run_expr (type, base, prec, arg);
+          seen_expr = 1;
+        }
+    }
+
+ done:
+  if (! seen_expr)
+    {
+      printf ("Usage: %s [-z] [-q] [-f] [-p prec] [-b base] expression...\n", argv[0]);
+      exit (1);
+    }
+
+  return 0;
+}
diff --git a/demos/expr/t-expr.c b/demos/expr/t-expr.c

new file mode 100644 (file)

index 0000000..195350e
--- /dev/null
+++ b/demos/expr/t-expr.c
@@ -0,0 +1,499 @@
+/* Test expression evaluation (print nothing and exit 0 if successful).
+
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "tests.h"
+#include "expr-impl.h"
+
+
+int  option_trace = 0;
+
+
+struct data_t {
+  int         base;
+  const char  *expr;
+  const char  *want;
+};
+
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+
+
+/* These data_xxx[] arrays are tables to be tested with one or more of the
+   mp?_t types.  z=mpz_t, q=mpz_t, f=mpf_t.  */
+
+struct data_t  data_zqf[] = {
+
+  /* various deliberately wrong expressions */
+  { 0, "", NULL },
+  { 0, "1+", NULL },
+  { 0, "+2", NULL },
+  { 0, "1,2", NULL },
+  { 0, "foo(1,2)", NULL },
+  { 0, "1+foo", NULL },
+  { 10, "0fff", NULL },
+  { 0, "!", NULL },
+  { 0, "10!", NULL },
+  { 0, "-10!", NULL },
+  { 0, "gcd((4,6))", NULL },
+  { 0, "()", NULL },
+  { 0, "fac(2**1000)", NULL },
+  { 0, "$", NULL },
+  { 0, "$-", NULL },
+
+  /* some basics */
+  { 10, "123", "123" },
+  { 10, "-123", "-123" },
+  { 10, "1+2", "3" },
+  { 10, "1+2+3", "6" },
+  { 10, "1+2*3", "7" },
+  { 10, "3*2+1", "7" },
+  { 10, "$a", "55" },
+  { 10, "b", "99" },
+  { 16, "b", "11" },
+  { 10, "4**3 * 2 + 1", "129" },
+  { 10, "1<2", "1" },
+  { 10, "1>2", "0" },
+
+  { 10, "(123)", "123" },
+
+  { 10, "sgn(-123)", "-1" },
+  { 10, "5-7", "-2" },
+
+  { 0, "cmp(0,0)", "0" },
+  { 0, "cmp(1,0)", "1" },
+  { 0, "cmp(0,1)", "-1" },
+  { 0, "cmp(-1,0)", "-1" },
+  { 0, "cmp(0,-1)", "1" },
+
+  { 10, "0 ? 123 : 456", "456" },
+  { 10, "1 ? 4+5 : 6+7", "9" },
+
+  { 10, "(123)", "123" },
+  { 10, "(2+3)", "5" },
+  { 10, "(4+5)*(5+6)", "99" },
+
+  { 0, "1 << 16", "65536" },
+  { 0, "256 >> 4", "16" },
+  { 0, "-256 >> 4", "-16" },
+
+  { 0, "!1", "0" },
+  { 0, "!9", "0" },
+  { 0, "!0", "1" },
+
+  { 0, "2**2**2", "16" },
+  { 0, "-2**2**2", "-16" },
+
+  { 0, "0x100", "256" },
+  { 10, "0x100", NULL },
+  { 10, "0x 100", NULL },
+
+  { 0, " max ( 1, 2, 3, 4, 5, 6, 7, 8)", "8" },
+  { 0, " max ( 1, 9, 2, 3, 4, 5, 6, 7, 8)", "9" },
+  { 0, " min ( 1, 9, 2, 3, 4, 5, 6, 7, 8)", "1" },
+
+  { 10, "abs(123)",  "123" },
+  { 10, "abs(-123)", "123" },
+  { 10, "abs(0)",    "0" },
+
+  /* filling data stack */
+  { 0, "1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+(1+1))))))))))))))", "16" },
+
+  /* filling control stack */
+  { 0, "----------------------------------------------------1", "1" },
+};
+
+
+const struct data_t  data_z[] = {
+  { 0, "divisible_p(333,3)", "1" },
+  { 0, "congruent_p(7,1,3)", "1" },
+
+  { 0, "cmpabs(0,0)", "0" },
+  { 0, "cmpabs(1,0)", "1" },
+  { 0, "cmpabs(0,1)", "-1" },
+  { 0, "cmpabs(-1,0)", "1" },
+  { 0, "cmpabs(0,-1)", "-1" },
+
+  { 0, "odd_p(1)", "1" },
+  { 0, "odd_p(0)", "0" },
+  { 0, "odd_p(-1)", "1" },
+
+  { 0, "even_p(1)", "0" },
+  { 0, "even_p(0)", "1" },
+  { 0, "even_p(-1)", "0" },
+
+  { 0, "fac(0)",  "1" },
+  { 0, "fac(1)",  "1" },
+  { 0, "fac(2)",  "2" },
+  { 0, "fac(3)",  "6" },
+  { 0, "fac(10)", "3628800" },
+
+  { 10, "root(81,4)", "3" },
+
+  { 10, "gcd(4,6)", "2" },
+  { 10, "gcd(4,6,9)", "1" },
+
+  { 10, "powm(3,2,9)", "0" },
+  { 10, "powm(3,2,8)", "1" },
+
+  /* filling data stack */
+  { 0, "1 ? 1 : 1 || 1 && 1 | 1 ^ 1 & 1 == 1 >= 1 << 1 - 1 * 1 ** 1", "1" },
+
+  /* filling control stack */
+  { 0, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1", "1" },
+
+  { 0, "fib(10)", "55" },
+
+  { 0, "setbit(0,5)", "32" },
+  { 0, "clrbit(32,5)", "0" },
+  { 0, "tstbit(32,5)", "1" },
+  { 0, "tstbit(32,4)", "0" },
+  { 0, "scan0(7,0)", "3" },
+  { 0, "scan1(7,0)", "0" },
+};
+
+const struct data_t  data_zq[] = {
+  /* expecting failure */
+  { 0, "1.2", NULL },
+};
+
+const struct data_t  data_q[] = {
+  { 10,  "(1/2 + 1/3 + 1/4 + 1/5 + 1/6)*20", "29" },
+  { 0, "num(5/9)", "5" },
+  { 0, "den(5/9)", "9" },
+};
+
+const struct data_t  data_zf[] = {
+  { 10, "sqrt ( 49 )", "7" },
+  { 10, "sqrt ( 49 ) + 1", "8" },
+  { 10, "sqrt((49))", "7" },
+  { 10, "sqrt((((((((49))))))))", "7" },
+};
+
+const struct data_t  data_f[] = {
+  { 0, "1@10",    "10000000000" },
+  { 0, "1.5@10",  "15000000000" },
+  { 0, "1000@-1", "100" },
+  { 0, "10.00@-1", "1" },
+
+  { 0, "1e10",     "10000000000" },
+  { 0, "1.5e10",   "15000000000" },
+  { 0, "1000e-1",  "100" },
+  { 0, "10.00e-1", "1" },
+
+  { 16, "1@9",  "68719476736" },
+
+  { 16,  "1@10", "18446744073709551616" },
+  { -16, "1@10", "1099511627776" },
+
+  { 0, "ceil(0)",           "0" },
+  { 0, "ceil(0.25)",        "1" },
+  { 0, "ceil(0.5)",         "1" },
+  { 0, "ceil(1.5)",         "2" },
+  { 0, "ceil(-0.5)",        "0" },
+  { 0, "ceil(-1.5)",        "-1" },
+
+  /* only simple cases because mpf_eq currently only works on whole limbs */
+  { 0, "eq(0xFFFFFFFFFFFFFFFF1111111111111111,0xFFFFFFFFFFFFFFFF2222222222222222,64)", "1" },
+  { 0, "eq(0xFFFFFFFFFFFFFFFF1111111111111111,0xFFFFFFFFFFFFFFFF2222222222222222,128)", "0" },
+
+  { 0, "floor(0)",           "0" },
+  { 0, "floor(0.25)",        "0" },
+  { 0, "floor(0.5)",         "0" },
+  { 0, "floor(1.5)",         "1" },
+  { 0, "floor(-0.5)",        "-1" },
+  { 0, "floor(-1.5)",        "-2" },
+
+  { 0, "integer_p(1)",   "1" },
+  { 0, "integer_p(0.5)", "0" },
+
+  { 0, "trunc(0)",           "0" },
+  { 0, "trunc(0.25)",        "0" },
+  { 0, "trunc(0.5)",         "0" },
+  { 0, "trunc(1.5)",         "1" },
+  { 0, "trunc(-0.5)",        "0" },
+  { 0, "trunc(-1.5)",        "-1" },
+};
+
+struct datalist_t {
+  const struct data_t  *data;
+  int                  num;
+};
+
+#define DATALIST(data)  { data, numberof (data) }
+
+struct datalist_t  list_z[] = {
+  DATALIST (data_z),
+  DATALIST (data_zq),
+  DATALIST (data_zf),
+  DATALIST (data_zqf),
+};
+
+struct datalist_t  list_q[] = {
+  DATALIST (data_q),
+  DATALIST (data_zq),
+  DATALIST (data_zqf),
+};
+
+struct datalist_t  list_f[] = {
+  DATALIST (data_zf),
+  DATALIST (data_zqf),
+  DATALIST (data_f),
+};
+
+
+void
+check_z (void)
+{
+  const struct data_t  *data;
+  mpz_t  a, b, got, want;
+  int    l, i, ret;
+
+  mpz_init (got);
+  mpz_init (want);
+  mpz_init_set_ui (a, 55);
+  mpz_init_set_ui (b, 99);
+
+  for (l = 0; l < numberof (list_z); l++)
+    {
+      data = list_z[l].data;
+
+      for (i = 0; i < list_z[l].num; i++)
+        {
+          if (option_trace)
+            printf ("mpz_expr \"%s\"\n", data[i].expr);
+
+          ret = mpz_expr (got, data[i].base, data[i].expr, a, b, NULL);
+
+          if (data[i].want == NULL)
+            {
+              /* expect to fail */
+              if (ret == MPEXPR_RESULT_OK)
+                {
+                  printf ("mpz_expr wrong return value, got %d, expected failure\n", ret);
+                  goto error;
+                }
+            }
+          else
+            {
+              if (mpz_set_str (want, data[i].want, 0) != 0)
+                {
+                  printf ("Cannot parse wanted value string\n");
+                  goto error;
+                }
+              if (ret != MPEXPR_RESULT_OK)
+                {
+                  printf ("mpz_expr failed unexpectedly\n");
+                  printf ("   return value %d\n", ret);
+                  goto error;
+                }
+              if (mpz_cmp (got, want) != 0)
+                {
+                  printf ("mpz_expr wrong result\n");
+                  printf ("   got  "); mpz_out_str (stdout, 10, got);
+                  printf ("\n");
+                  printf ("   want "); mpz_out_str (stdout, 10, want);
+                  printf ("\n");
+                  goto error;
+                }
+            }
+        }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (got);
+  mpz_clear (want);
+  return;
+
+ error:
+  printf ("   base %d\n", data[i].base);
+  printf ("   expr \"%s\"\n", data[i].expr);
+  if (data[i].want != NULL)
+    printf ("   want \"%s\"\n", data[i].want);
+  abort ();
+}
+
+void
+check_q (void)
+{
+  const struct data_t  *data;
+  mpq_t  a, b, got, want;
+  int    l, i, ret;
+
+  mpq_init (got);
+  mpq_init (want);
+  mpq_init (a);
+  mpq_init (b);
+
+  mpq_set_ui (a, 55, 1);
+  mpq_set_ui (b, 99, 1);
+
+  for (l = 0; l < numberof (list_q); l++)
+    {
+      data = list_q[l].data;
+
+      for (i = 0; i < list_q[l].num; i++)
+        {
+          if (option_trace)
+            printf ("mpq_expr \"%s\"\n", data[i].expr);
+
+          ret = mpq_expr (got, data[i].base, data[i].expr, a, b, NULL);
+
+          if (data[i].want == NULL)
+            {
+              /* expect to fail */
+              if (ret == MPEXPR_RESULT_OK)
+                {
+                  printf ("mpq_expr wrong return value, got %d, expected failure\n", ret);
+                  goto error;
+                }
+            }
+          else
+            {
+              if (mpz_set_str (mpq_numref(want), data[i].want, 0) != 0)
+                {
+                  printf ("Cannot parse wanted value string\n");
+                  goto error;
+                }
+              mpz_set_ui (mpq_denref(want), 1);
+
+              if (ret != MPEXPR_RESULT_OK)
+                {
+                  printf ("mpq_expr failed unexpectedly\n");
+                  printf ("   return value %d\n", ret);
+                  goto error;
+                }
+              if (mpq_cmp (got, want) != 0)
+                {
+                  printf ("mpq_expr wrong result\n");
+                  printf ("   got  "); mpq_out_str (stdout, 10, got);
+                  printf ("\n");
+                  printf ("   want "); mpq_out_str (stdout, 10, want);
+                  printf ("\n");
+                  goto error;
+                }
+            }
+        }
+    }
+  mpq_clear (a);
+  mpq_clear (b);
+  mpq_clear (got);
+  mpq_clear (want);
+  return;
+
+ error:
+  printf ("   base %d\n", data[i].base);
+  printf ("   expr \"%s\"\n", data[i].expr);
+  if (data[i].want != NULL)
+    printf ("   want \"%s\"\n", data[i].want);
+  abort ();
+}
+
+void
+check_f (void)
+{
+  const struct data_t  *data;
+  mpf_t  a, b, got, want;
+  int    l, i, ret;
+
+  mpf_set_default_prec (200L);
+
+  mpf_init (got);
+  mpf_init (want);
+  mpf_init_set_ui (a, 55);
+  mpf_init_set_ui (b, 99);
+
+  for (l = 0; l < numberof (list_f); l++)
+    {
+      data = list_f[l].data;
+
+      for (i = 0; i < list_f[l].num; i++)
+        {
+          if (option_trace)
+            printf ("mpf_expr \"%s\"\n", data[i].expr);
+
+          ret = mpf_expr (got, data[i].base, data[i].expr, a, b, NULL);
+
+          if (data[i].want == NULL)
+            {
+              /* expect to fail */
+              if (ret == MPEXPR_RESULT_OK)
+                {
+                  printf ("mpf_expr wrong return value, got %d, expected failure\n", ret);
+                  goto error;
+                }
+            }
+          else
+            {
+              if (mpf_set_str (want, data[i].want, 0) != 0)
+                {
+                  printf ("Cannot parse wanted value string\n");
+                  goto error;
+                }
+
+              if (ret != MPEXPR_RESULT_OK)
+                {
+                  printf ("mpf_expr failed unexpectedly\n");
+                  printf ("   return value %d\n", ret);
+                  goto error;
+                }
+              if (mpf_cmp (got, want) != 0)
+                {
+                  printf ("mpf_expr wrong result\n");
+                  printf ("   got  "); mpf_out_str (stdout, 10, 20, got);
+                  printf ("\n");
+                  printf ("   want "); mpf_out_str (stdout, 10, 20, want);
+                  printf ("\n");
+                  goto error;
+                }
+            }
+        }
+    }
+  mpf_clear (a);
+  mpf_clear (b);
+  mpf_clear (got);
+  mpf_clear (want);
+  return;
+
+ error:
+  printf ("   base %d\n", data[i].base);
+  printf ("   expr \"%s\"\n", data[i].expr);
+  if (data[i].want != NULL)
+    printf ("   want \"%s\"\n", data[i].want);
+  abort ();
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  if (argc >= 2)
+    option_trace = 1;
+
+  check_z ();
+  check_q ();
+  check_f ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/demos/factorize.c b/demos/factorize.c

new file mode 100644 (file)

index 0000000..67cf0cc
--- /dev/null
+++ b/demos/factorize.c
@@ -0,0 +1,375 @@
+/* Factoring with Pollard's rho method.
+
+Copyright 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2009
+Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp.h"
+
+int flag_verbose = 0;
+
+static unsigned add[] = {4, 2, 4, 2, 4, 6, 2, 6};
+
+void
+factor_using_division (mpz_t t, unsigned int limit)
+{
+  mpz_t q, r;
+  unsigned long int f;
+  int ai;
+  unsigned *addv = add;
+  unsigned int failures;
+
+  if (flag_verbose > 0)
+    {
+      printf ("[trial division (%u)] ", limit);
+      fflush (stdout);
+    }
+
+  mpz_init (q);
+  mpz_init (r);
+
+  f = mpz_scan1 (t, 0);
+  mpz_div_2exp (t, t, f);
+  while (f)
+    {
+      printf ("2 ");
+      fflush (stdout);
+      --f;
+    }
+
+  for (;;)
+    {
+      mpz_tdiv_qr_ui (q, r, t, 3);
+      if (mpz_cmp_ui (r, 0) != 0)
+       break;
+      mpz_set (t, q);
+      printf ("3 ");
+      fflush (stdout);
+    }
+
+  for (;;)
+    {
+      mpz_tdiv_qr_ui (q, r, t, 5);
+      if (mpz_cmp_ui (r, 0) != 0)
+       break;
+      mpz_set (t, q);
+      printf ("5 ");
+      fflush (stdout);
+    }
+
+  failures = 0;
+  f = 7;
+  ai = 0;
+  while (mpz_cmp_ui (t, 1) != 0)
+    {
+      mpz_tdiv_qr_ui (q, r, t, f);
+      if (mpz_cmp_ui (r, 0) != 0)
+       {
+         f += addv[ai];
+         if (mpz_cmp_ui (q, f) < 0)
+           break;
+         ai = (ai + 1) & 7;
+         failures++;
+         if (failures > limit)
+           break;
+       }
+      else
+       {
+         mpz_swap (t, q);
+         printf ("%lu ", f);
+         fflush (stdout);
+         failures = 0;
+       }
+    }
+
+  mpz_clears (q, r, NULL);
+}
+
+void
+factor_using_division_2kp (mpz_t t, unsigned int limit, unsigned long p)
+{
+  mpz_t r;
+  mpz_t f;
+  unsigned int k;
+
+  if (flag_verbose > 0)
+    {
+      printf ("[trial division (%u)] ", limit);
+      fflush (stdout);
+    }
+
+  mpz_init (r);
+  mpz_init_set_ui (f, 2 * p);
+  mpz_add_ui (f, f, 1);
+  for (k = 1; k < limit; k++)
+    {
+      mpz_tdiv_r (r, t, f);
+      while (mpz_cmp_ui (r, 0) == 0)
+       {
+         mpz_tdiv_q (t, t, f);
+         mpz_tdiv_r (r, t, f);
+         mpz_out_str (stdout, 10, f);
+         fflush (stdout);
+         fputc (' ', stdout);
+       }
+      mpz_add_ui (f, f, 2 * p);
+    }
+
+  mpz_clears (f, r, NULL);
+}
+
+void
+factor_using_pollard_rho (mpz_t n, unsigned long a, unsigned long p)
+{
+  mpz_t x, x1, y, P;
+  mpz_t t1, t2;
+  unsigned long long k, l, i;
+
+  if (flag_verbose > 0)
+    {
+      printf ("[pollard-rho (%lu)] ", a);
+      fflush (stdout);
+    }
+
+  mpz_inits (t1, t2, NULL);
+  mpz_init_set_si (y, 2);
+  mpz_init_set_si (x, 2);
+  mpz_init_set_si (x1, 2);
+  mpz_init_set_ui (P, 1);
+  k = 1;
+  l = 1;
+
+  while (mpz_cmp_ui (n, 1) != 0)
+    {
+      for (;;)
+       {
+         do
+           {
+             if (p != 0)
+               {
+                 mpz_powm_ui (x, x, p, n);
+                 mpz_add_ui (x, x, a);
+               }
+             else
+               {
+                 mpz_mul (t1, x, x);
+                 mpz_mod (x, t1, n);
+                 mpz_add_ui (x, x, a);
+               }
+
+             mpz_sub (t1, x1, x);
+             mpz_mul (t2, P, t1);
+             mpz_mod (P, t2, n);
+
+             if (k % 32 == 1)
+               {
+                 mpz_gcd (t1, P, n);
+                 if (mpz_cmp_ui (t1, 1) != 0)
+                   goto factor_found;
+                 mpz_set (y, x);
+               }
+           }
+         while (--k != 0);
+
+         mpz_gcd (t1, P, n);
+         if (mpz_cmp_ui (t1, 1) != 0)
+           goto factor_found;
+
+         mpz_set (x1, x);
+         k = l;
+         l = 2 * l;
+         for (i = 0; i < k; i++)
+           {
+             if (p != 0)
+               {
+                 mpz_powm_ui (x, x, p, n);
+                 mpz_add_ui (x, x, a);
+               }
+             else
+               {
+                 mpz_mul (t1, x, x);
+                 mpz_mod (x, t1, n);
+                 mpz_add_ui (x, x, a);
+               }
+           }
+         mpz_set (y, x);
+       }
+
+    factor_found:
+      do
+       {
+         if (p != 0)
+           {
+             mpz_powm_ui (y, y, p, n); mpz_add_ui (y, y, a);
+           }
+         else
+           {
+             mpz_mul (t1, y, y);
+             mpz_mod (y, t1, n);
+             mpz_add_ui (y, y, a);
+           }
+         mpz_sub (t1, x1, y);
+         mpz_gcd (t1, t1, n);
+       }
+      while (mpz_cmp_ui (t1, 1) == 0);
+
+      mpz_divexact (n, n, t1); /* divide by t1, before t1 is overwritten */
+
+      if (!mpz_probab_prime_p (t1, 25))
+       {
+         do
+           {
+             mp_limb_t a_limb;
+             mpn_random (&a_limb, (mp_size_t) 1);
+             a = a_limb;
+           }
+         while (a == 0);
+
+         if (flag_verbose > 0)
+           {
+             printf ("[composite factor--restarting pollard-rho] ");
+             fflush (stdout);
+           }
+         factor_using_pollard_rho (t1, a, p);
+       }
+      else
+       {
+         mpz_out_str (stdout, 10, t1);
+         fflush (stdout);
+         fputc (' ', stdout);
+       }
+      mpz_mod (x, x, n);
+      mpz_mod (x1, x1, n);
+      mpz_mod (y, y, n);
+      if (mpz_probab_prime_p (n, 25))
+       {
+         mpz_out_str (stdout, 10, n);
+         fflush (stdout);
+         fputc (' ', stdout);
+         break;
+       }
+    }
+
+  mpz_clears (P, t2, t1, x1, x, y, NULL);
+}
+
+void
+factor (mpz_t t, unsigned long p)
+{
+  unsigned int division_limit;
+
+  if (mpz_sgn (t) == 0)
+    return;
+
+  /* Set the trial division limit according the size of t.  */
+  division_limit = mpz_sizeinbase (t, 2);
+  if (division_limit > 1000)
+    division_limit = 1000 * 1000;
+  else
+    division_limit = division_limit * division_limit;
+
+  if (p != 0)
+    factor_using_division_2kp (t, division_limit / 10, p);
+  else
+    factor_using_division (t, division_limit);
+
+  if (mpz_cmp_ui (t, 1) != 0)
+    {
+      if (flag_verbose > 0)
+       {
+         printf ("[is number prime?] ");
+         fflush (stdout);
+       }
+      if (mpz_probab_prime_p (t, 25))
+       mpz_out_str (stdout, 10, t);
+      else
+       factor_using_pollard_rho (t, 1L, p);
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  mpz_t t;
+  unsigned long p;
+  int i;
+
+  if (argc > 1 && !strcmp (argv[1], "-v"))
+    {
+      flag_verbose = 1;
+      argv++;
+      argc--;
+    }
+  if (argc > 1 && !strcmp (argv[1], "-q"))
+    {
+      flag_verbose = -1;
+      argv++;
+      argc--;
+    }
+
+  mpz_init (t);
+  if (argc > 1)
+    {
+      p = 0;
+      for (i = 1; i < argc; i++)
+       {
+         if (!strncmp (argv[i], "-Mp", 3))
+           {
+             p = atoi (argv[i] + 3);
+             mpz_set_ui (t, 1);
+             mpz_mul_2exp (t, t, p);
+             mpz_sub_ui (t, t, 1);
+           }
+         else if (!strncmp (argv[i], "-2kp", 4))
+           {
+             p = atoi (argv[i] + 4);
+             continue;
+           }
+         else
+           {
+             mpz_set_str (t, argv[i], 0);
+           }
+
+         if (mpz_cmp_ui (t, 0) == 0)
+           puts ("-");
+         else
+           {
+             factor (t, p);
+             puts ("");
+           }
+       }
+    }
+  else
+    {
+      for (;;)
+       {
+         mpz_inp_str (t, stdin, 0);
+         if (feof (stdin))
+           break;
+         if (flag_verbose >= 0)
+           {
+             mpz_out_str (stdout, 10, t); printf (" = ");
+           }
+         factor (t, 0);
+         puts ("");
+       }
+    }
+
+  exit (0);
+}
diff --git a/demos/isprime.c b/demos/isprime.c

new file mode 100644 (file)

index 0000000..083866b
--- /dev/null
+++ b/demos/isprime.c
@@ -0,0 +1,68 @@
+/* Classify numbers as probable primes, primes or composites.
+   With -q return true if the following argument is a (probable) prime.
+
+Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "gmp.h"
+
+char *progname;
+
+void
+print_usage_and_exit ()
+{
+  fprintf (stderr, "usage: %s -q nnn\n", progname);
+  fprintf (stderr, "usage: %s nnn ...\n", progname);
+  exit (-1);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t n;
+  int i;
+
+  progname = argv[0];
+
+  if (argc < 2)
+    print_usage_and_exit ();
+
+  mpz_init (n);
+
+  if (argc == 3 && strcmp (argv[1], "-q") == 0)
+    {
+      if (mpz_set_str (n, argv[2], 0) != 0)
+       print_usage_and_exit ();
+      exit (mpz_probab_prime_p (n, 25) == 0);
+    }
+
+  for (i = 1; i < argc; i++)
+    {
+      int class;
+      if (mpz_set_str (n, argv[i], 0) != 0)
+       print_usage_and_exit ();
+      class = mpz_probab_prime_p (n, 25);
+      mpz_out_str (stdout, 10, n);
+      if (class == 0)
+       puts (" is composite");
+      else if (class == 1)
+       puts (" is a probable prime");
+      else /* class == 2 */
+       puts (" is a prime");
+    }
+  exit (0);
+}
diff --git a/demos/perl/GMP.pm b/demos/perl/GMP.pm

new file mode 100644 (file)

index 0000000..3dc6b32
--- /dev/null
+++ b/demos/perl/GMP.pm
@@ -0,0 +1,649 @@
+# GMP perl module
+
+# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+# [Note: The above copyright notice is repeated in the documentation section
+# below, in order to get it into man pages etc generated by the various pod
+# conversions.  When changing, be sure to update below too.]
+
+
+# This code is designed to work with perl 5.005, so it and the sub-packages
+# aren't as modern as they could be.
+
+package GMP;
+
+require Symbol;
+require Exporter;
+require DynaLoader;
+@ISA = qw(Exporter DynaLoader);
+
+@EXPORT = qw();
+@EXPORT_OK = qw(version);
+%EXPORT_TAGS = ('all' => [qw(
+                             get_d get_d_2exp get_si get_str integer_p
+                             printf sgn sprintf)],
+               'constants' => [()]);
+Exporter::export_ok_tags('all');
+
+$VERSION = '2.00';
+bootstrap GMP $VERSION;
+
+
+# The format string is cut up into "%" specifiers so GMP types can be
+# passed to GMP::sprintf_internal.  Any "*"s are interpolated before
+# calling sprintf_internal, which saves worrying about variable
+# argument lists there.
+#
+# Because sprintf_internal is only called after the conversion and
+# operand have been checked there won't be any crashes from a bad
+# format string.
+#
+sub sprintf {
+  my $fmt = shift;
+  my $out = '';
+  my ($pre, $dummy, $pat, $rest);
+
+  while (($pre, $dummy, $pat, $rest) = ($fmt =~ /^((%%|[^%])*)(%[- +#.*hlLqv\d]*[bcdfeEgGinopsuxX])(.*)$/s)) {
+
+    $out .= $pre;
+
+    my $pat2 = $pat;    # $pat with "*"s expanded
+    my @params = ();    # arguments per "*"s
+    while ($pat2 =~ /[*]/) {
+      my $arg = shift;
+      $pat2 =~ s/[*]/$arg/;
+      push @params, $arg;
+    }
+
+    if (UNIVERSAL::isa($_[0],"GMP::Mpz")) {
+      if ($pat2 !~ /[dioxX]$/) {
+       die "GMP::sprintf: unsupported output format for mpz: $pat2\n";
+      }
+      $pat2 =~ s/(.)$/Z$1/;
+      $out .= sprintf_internal ($pat2, shift);
+
+    } elsif (UNIVERSAL::isa($_[0],"GMP::Mpq")) {
+      if ($pat2 !~ /[dioxX]$/) {
+       die "GMP::sprintf: unsupported output format for mpq: $pat2\n";
+      }
+      $pat2 =~ s/(.)$/Q$1/;
+      $out .= sprintf_internal ($pat2, shift);
+
+    } elsif (UNIVERSAL::isa($_[0],"GMP::Mpf")) {
+      if ($pat2 !~ /[eEfgG]$/) {
+       die "GMP::sprintf: unsupported output format for mpf: $pat2\n";
+      }
+      $pat2 =~ s/(.)$/F$1/;
+      $out .= sprintf_internal ($pat2, shift);
+
+    } elsif ($pat =~ /n$/) {
+      # do it this way so h, l or V type modifiers are respected, and use a
+      # dummy variable to avoid a warning about discarding the value
+      my $dummy = sprintf "%s$pat", $out, $_[0];
+      shift;
+
+    } else {
+      $out .= sprintf $pat, @params, shift;
+    }
+
+    $fmt = $rest;
+  }
+  $out .= $fmt;
+  return $out;
+}
+
+sub printf {
+  if (ref($_[0]) eq 'GLOB') {
+    my $h = Symbol::qualify_to_ref(shift, caller);
+    print $h GMP::sprintf(@_);
+  } else {
+    print STDOUT GMP::sprintf(@_);
+  }
+}
+
+1;
+__END__
+
+
+
+=head1 NAME
+
+GMP - Perl interface to the GNU Multiple Precision Arithmetic Library
+
+=head1 SYNOPSIS
+
+    use GMP;
+    use GMP::Mpz;
+    use GMP::Mpq;
+    use GMP::Mpf;
+    use GMP::Rand;
+
+=head1 DESCRIPTION
+
+This module provides access to GNU MP arbitrary precision integers,
+rationals and floating point.
+
+No functions are exported from these packages by default, but can be
+selected in the usual way, or the tag :all for everything.
+
+    use GMP::Mpz qw(gcd, lcm);   # just these functions
+    use GMP::Mpq qw(:all);       # everything in mpq
+
+=head2 GMP::Mpz
+
+This class provides arbitrary precision integers.  A new mpz can be
+constructed with C<mpz>.  The initial value can be an integer, float,
+string, mpz, mpq or mpf.  Floats, mpq and mpf will be automatically
+truncated to an integer.
+
+    use GMP::Mpz qw(:all);
+    my $a = mpz(123);
+    my $b = mpz("0xFFFF");
+    my $c = mpz(1.5);       # truncated
+
+The following overloaded operators are available, and corresponding
+assignment forms like C<+=>,
+
+=over 4
+
+=item
+
++ - * / % E<lt>E<lt> E<gt>E<gt> ** & | ^ ! E<lt> E<lt>= == != E<gt> E<gt>=
+E<lt>=E<gt> abs not sqrt
+
+=back
+
+C</> and C<%> round towards zero (as per the C<tdiv> functions in GMP).
+
+The following functions are available, behaving the same as the
+corresponding GMP mpz functions,
+
+=over 4
+
+=item
+
+bin, cdiv, cdiv_2exp, clrbit, combit, congruent_p, congruent_2exp_p,
+divexact, divisible_p, divisible_2exp_p, even_p, fac, fdiv, fdiv_2exp, fib,
+fib2, gcd, gcdext, hamdist, invert, jacobi, kronecker, lcm, lucnum, lucnum2,
+mod, mpz_export, mpz_import, nextprime, odd_p, perfect_power_p,
+perfect_square_p, popcount, powm, probab_prime_p, realloc, remove, root,
+roote, scan0, scan1, setbit, sizeinbase, sqrtrem, tdiv, tdiv_2exp, tstbit
+
+=back
+
+C<cdiv>, C<fdiv> and C<tdiv> and their C<2exp> variants return a
+quotient/remainder pair.  C<fib2> returns a pair F[n] and F[n-1], similarly
+C<lucnum2>.  C<gcd> and C<lcm> accept a variable number of arguments (one or
+more).  C<gcdext> returns a triplet of gcd and two cofactors, for example
+
+    use GMP::Mpz qw(:all);
+    $a = 7257;
+    $b = 10701;
+    ($g, $x, $y) = gcdext ($a, $b);
+    print "gcd($a,$b) is $g, and $g == $a*$x + $b*$y\n";
+
+C<mpz_import> and C<mpz_export> are so named to avoid the C<import> keyword.
+Their parameters are as follows,
+
+    $z = mpz_import ($order, $size, $endian, $nails, $string);
+    $string = mpz_export ($order, $size, $endian, $nails, $z);
+
+The order, size, endian and nails parameters are as per the corresponding C
+functions.  The string input for C<mpz_import> is interpreted as byte data
+and must be a multiple of $size bytes.  C<mpz_export> conversely returns a
+string of byte data, which will be a multiple of $size bytes.
+
+C<invert> returns the inverse, or undef if it doesn't exist.  C<remove>
+returns a remainder/multiplicity pair.  C<root> returns the nth root, and
+C<roote> returns a root/bool pair, the bool indicating whether the root is
+exact.  C<sqrtrem> and C<rootrem> return a root/remainder pair.
+
+C<clrbit>, C<combit> and C<setbit> expect a variable which they can modify,
+it doesn't make sense to pass a literal constant.  Only the given variable
+is modified, if other variables are referencing the same mpz object then a
+new copy is made of it.  If the variable isn't an mpz it will be coerced to
+one.  For instance,
+
+    use GMP::Mpz qw(setbit);
+    setbit (123, 0);  # wrong, don't pass a constant
+    $a = mpz(6);
+    $b = $a;
+    setbit ($a, 0);   # $a becomes 7, $b stays at 6
+
+C<scan0> and C<scan1> return ~0 if no 0 or 1 bit respectively is found.
+
+=head2 GMP::Mpq
+
+This class provides rationals with arbitrary precision numerators and
+denominators.  A new mpq can be constructed with C<mpq>.  The initial value
+can be an integer, float, string, mpz, mpq or mpf, or a pair of integers or
+mpz's.  No precision is lost when converting a float or mpf, the exact value
+is retained.
+
+    use GMP::Mpq qw(:all);
+    $a = mpq();              # zero
+    $b = mpq(0.5);           # gives 1/2
+    $b = mpq(14);            # integer 14
+    $b = mpq(3,4);           # fraction 3/4
+    $b = mpq("7/12");        # fraction 7/12
+    $b = mpq("0xFF/0x100");  # fraction 255/256
+
+When a fraction is given, it should be in the canonical form specified in
+the GMP manual, which is denominator positive, no common factors, and zero
+always represented as 0/1.  If not then C<canonicalize> can be called to put
+it in that form.  For example,
+
+    use GMP::Mpq qw(:all);
+    $q = mpq(21,15);   # eek! common factor 3
+    canonicalize($q);  # get rid of it
+
+The following overloaded operators are available, and corresponding
+assignment forms like C<+=>,
+
+=over 4
+
+=item
+
++ - * / E<lt>E<lt> E<gt>E<gt> ** ! E<lt> E<lt>= == != E<gt> E<gt>=
+E<lt>=E<gt> abs not
+
+=back
+
+The following functions are available,
+
+=over 4
+
+=item
+
+den, inv, num
+
+=back
+
+C<inv> calculates 1/q, as per the corresponding GMP function.  C<num> and
+C<den> return an mpz copy of the numerator or denominator respectively.  In
+the future C<num> and C<den> might give lvalues so the original mpq can be
+modified through them, but this is not done currently.
+
+=head2 GMP::Mpf
+
+This class provides arbitrary precision floating point numbers.  The
+mantissa is an arbitrary user-selected precision and the exponent is a fixed
+size (one machine word).
+
+A new mpf can be constructed with C<mpf>.  The initial value can be an
+integer, float, string, mpz, mpq or mpf.  The second argument specifies the
+desired precision in bits, or if omitted then the default precision is used.
+
+    use GMP::Mpf qw(:all);
+    $a = mpf();         # zero
+    $b = mpf(-7.5);     # default precision
+    $c = mpf(1.5, 500); # 500 bits precision
+    $d = mpf("1.0000000000000001");
+
+The following overloaded operators are available, with the corresponding
+assignment forms like C<+=>,
+
+=over 4
+
+=item
+
++ - * / E<lt>E<lt> E<gt>E<gt> ** ! E<lt> E<lt>= == != E<gt> E<gt>=
+E<lt>=E<gt> abs not sqrt
+
+=back
+
+The following functions are available, behaving the same as the
+corresponding GMP mpf functions,
+
+=over 4
+
+=item
+
+ceil, floor, get_default_prec, get_prec, mpf_eq, set_default_prec, set_prec,
+trunc
+
+=back
+
+C<mpf_eq> is so named to avoid clashing with the perl C<eq> operator.
+
+C<set_prec> expects a variable which it can modify, it doesn't make sense to
+pass a literal constant.  Only the given variable is modified, if other
+variables are referencing the same mpf object then a new copy is made of it.
+If the variable isn't an mpf it will be coerced to one.
+
+Results are the same precision as inputs, or if two mpf's are given to a
+binary operator then the precision of the first is used.  For example,
+
+    use GMP::Mpf qw(mpf);
+    $a = mpf(2.0, 100);
+    $b = mpf(2.0, 500);
+    $c = $a + $b;         # gives 100 bits precision
+
+Mpf to string conversion via "" or the usual string contexts uses C<$#> the
+same as normal float to string conversions, or defaults to C<%.g> if C<$#>
+is not defined.  C<%.g> means all significant digits in the selected
+precision.
+
+=head2 GMP class
+
+The following functions are available in the GMP class,
+
+=over 4
+
+=item
+
+fits_slong_p, get_d, get_d_2exp, get_si, get_str, integer_p, printf, sgn,
+sprintf, version
+
+=back
+
+C<get_d_2exp> accepts any integer, string, float, mpz, mpq or mpf operands
+and returns a float and an integer exponent,
+
+    ($dbl, $exp) = get_d_2exp (mpf ("3.0"));
+    # dbl is 0.75, exp is 2
+
+C<get_str> takes an optional second argument which is the base, defaulting
+to decimal.  A negative base means upper case, as per the C functions.  For
+integer, integer string, mpz or mpq operands a string is returned.
+
+    use GMP qw(:all);
+    use GMP::Mpq qw(:all);
+    print get_str(mpq(-5,8)),"\n";      # -5/8
+    print get_str(255,16),"\n";         # ff
+
+For float, float strings or mpf operands, C<get_str> accepts an optional
+third parameter being how many digits to produce, defaulting to 0 which
+means all digits.  (Only as many digits as can be accurately represented by
+the float precision are ever produced though.)  A string/exponent pair is
+returned, as per the C mpf_get_str function.  For example,
+
+    use GMP qw(:all);
+    use GMP::Mpf qw(:all);
+    ($s, $e) = get_str(111.111111111, 10, 4);
+    printf ".$se$e\n";                  # .1111e3
+    ($s, $e) = get_str(1.625, 10);
+    print "0.$s*10^$e\n";               # 0.1625*10^1
+    ($s, $e) = get_str(mpf(2)**20, 16);
+    printf ".%s@%x\n", $s, $e;          # .1@14
+
+C<printf> and C<sprintf> allow formatted output of GMP types.  mpz and mpq
+values can be used with integer conversions (d, o, x, X) and mpf with float
+conversions (f, e, E, g, G).  All the standard perl printf features are
+available too.  For example,
+
+    use GMP::Mpz qw(mpz);
+    use GMP::Mpf qw(mpf);
+    GMP::printf ("%d %d %s", 123, mpz(2)**128, 'foo');
+    GMP::printf STDERR "%.40f", mpf(1.234);
+
+In perl 5.6.1 it doesn't seem to work to export C<printf>, the plain builtin
+C<printf> is reached unless calls are C<&printf()> style.  Explicit use of
+C<GMP::printf> is suggested.  C<sprintf> doesn't suffer this problem.
+
+    use GMP qw(sprintf);
+    use GMP::Mpq qw(mpq);
+    $s = sprintf "%x", mpq(15,16);
+
+C<version> is not exported by default or by tag :all, calling it as
+C<GMP::version()> is recommended.  It returns the GMP library version
+string, which is not to be confused with the module version number.
+
+The other GMP module functions behave as per the corresponding GMP routines,
+and accept any integer, string, float, mpz, mpq or mpf.  For example,
+
+    use GMP qw(:all);
+    use GMP::Mpz qw(mpz);
+    $z = mpz(123);
+    print sgn($z);    # gives 1
+
+Because each of GMP::Mpz, GMP::Mpq and GMP::Mpf is a sub-class of GMP,
+C<-E<gt>> style calls work too.
+
+    use GMP qw(:all);
+    use GMP::Mpq qw(mpf);
+    $q = mpq(-5,7);
+    if ($q->integer_p())   # false
+      ...
+
+=head2 GMP::Rand
+
+This class provides objects holding an algorithm and state for random number
+generation.  C<randstate> creates a new object, for example,
+
+    use GMP::Rand qw(randstate);
+    $r = randstate();
+    $r = randstate('lc_2exp_size', 64);
+    $r = randstate('lc_2exp', 43840821, 1, 32);
+    $r = randstate('mt');
+    $r = randstate($another_r);
+
+With no parameters this corresponds to the C function
+C<gmp_randinit_default>, and is a compromise between speed and randomness.
+'lc_2exp_size' corresponds to C<gmp_randinit_lc_2exp_size>, 'lc_2exp'
+corresponds to C<gmp_randinit_lc_2exp>, and 'mt' corresponds to
+C<gmp_randinit_mt>.  Or when passed another randstate object, a copy of that
+object is made.
+
+'lc_2exp_size' can fail if the requested size is bigger than the internal
+table provides for, in which case undef is returned.  The maximum size
+currently supported is 128.  The other forms always succeed.
+
+A randstate can be seeded with an integer or mpz, using the C<seed> method.
+/dev/random might be a good source of randomness, or time() or
+Time::HiRes::time() might be adequate, depending on the application.
+
+    $r->seed(time()));
+
+Random numbers can be generated with the following functions,
+
+=over 4
+
+=item
+
+mpf_urandomb, mpz_rrandomb, mpz_urandomb, mpz_urandomm,
+gmp_urandomb_ui, gmp_urandomm_ui
+
+=back
+
+Each constructs a new mpz or mpf and with a distribution per the
+corresponding GMP function.  For example,
+
+    use GMP::Rand (:all);
+    $r = randstate();
+    $a = mpz_urandomb($r,256);         # uniform mpz, 256 bits
+    $b = mpz_urandomm($r,mpz(3)**100); # uniform mpz, 0 to 3**100-1
+    $c = mpz_rrandomb($r,1024);        # special mpz, 1024 bits
+    $f = mpf_urandomb($r,128);         # uniform mpf, 128 bits, 0<=$f<1
+    $f = gmp_urandomm_ui($r,56);       # uniform int, 0 to 55
+
+=head2 Coercion
+
+Arguments to operators and functions are converted as necessary to the
+appropriate type.  For instance C<**> requires an unsigned integer exponent,
+and an mpq argument will be converted, so long as it's an integer in the
+appropriate range.
+
+    use GMP::Mpz (mpz);
+    use GMP::Mpq (mpq);
+    $p = mpz(3) ** mpq(45);   # allowed, 45 is an integer
+
+It's an error if a conversion to an integer or mpz would cause any
+truncation.  For example,
+
+    use GMP::Mpz (mpz);
+    $p = mpz(3) + 1.25;       # not allowed
+    $p = mpz(3) + mpz(1.25);  # allowed, explicit truncation
+
+Comparisons, however, accept any combination of operands and are always done
+exactly.  For example,
+
+    use GMP::Mpz (mpz);
+    print mpz(3) < 3.1;       # true
+
+Variables used on the left of an assignment operator like C<+=> are subject
+to coercion too.  An integer, float or string will change type when an mpz,
+mpq or mpf is applied to it.  For example,
+
+    use GMP::Mpz (mpz);
+    $a = 1;
+    $a += mpz(1234);   # $a becomes an mpz
+
+=head2 Overloading
+
+The rule for binary operators in the C<overload> mechanism is that if both
+operands are class objects then the method from the first is used.  This
+determines the result type when mixing GMP classes.  For example,
+
+    use GMP::Mpz (mpz);
+    use GMP::Mpq (mpq);
+    use GMP::Mpf (mpf);
+    $z = mpz(123);
+    $q = mpq(3,2);
+    $f = mpf(1.375)
+    print $q+$f;     # gives an mpq
+    print $f+$z;     # gives an mpf
+    print $z+$f;     # not allowed, would lose precision
+
+=head2 Constants
+
+A special tag C<:constants> is recognised in the module exports list.  It
+doesn't select any functions, but indicates that perl constants should be
+GMP objects.  This can only be used on one of GMP::Mpz, GMP::Mpq or GMP::Mpf
+at any one time, since they apply different rules.
+
+GMP::Mpz will treat constants as mpz's if they're integers, or ordinary
+floats if not.  For example,
+
+    use GMP::Mpz qw(:constants);
+    print 764861287634126387126378128,"\n";   # an mpz
+    print 1.25,"\n";                          # a float
+
+GMP::Mpq is similar, treating integers as mpq's and leaving floats to the
+normal perl handling.  Something like 3/4 is read as two integer mpq's and a
+division, but that's fine since it gives the intended fraction.
+
+    use GMP::Mpq qw(:constants);
+    print 3/4,"\n";    # an mpq
+    print 1.25,"\n";   # a float
+
+GMP::Mpf will treat all constants as mpf's using the default precision.
+BEGIN blocks can be used to set that precision while the code is parsed.
+For example,
+
+    use GMP::Mpf qw(:constants);
+    BEGIN { GMP::Mpf::set_default_prec(256); }
+    print 1/3;
+    BEGIN { GMP::Mpf::set_default_prec(64); }
+    print 5/7;
+
+A similar special tag :noconstants is recognised to turn off the constants
+feature.  For example,
+
+    use GMP::Mpz qw(:constants);
+    print 438249738748174928193,"\n";   # an mpz
+    use GMP::Mpz qw(:noconstants);
+    print 438249738748174928193,"\n";   # now a float
+
+All three 'integer', 'binary' and 'float' constant methods are captured.
+'float' is captured even for GMP::Mpz and GMP::Mpq since perl by default
+treats integer strings as floats if they don't fit a plain integer.
+
+=head1 SEE ALSO
+
+GMP manual, L<perl>, L<overload>.
+
+=head1 BUGS
+
+In perl 5.005_03 on i386 FreeBSD, the overloaded constants sometimes provoke
+seg faults.  Don't know if that's a perl bug or a GMP module bug, though it
+does seem to go bad before reaching anything in GMP.xs.
+
+There's no way to specify an arbitrary base when converting a string to an
+mpz (or mpq or mpf), only hex or octal with 0x or 0 (for mpz and mpq, but
+not for mpf).
+
+These modules are not reentrant or thread safe, due to the implementation of
+the XSUBs.
+
+Returning a new object from the various functions is convenient, but
+assignment versions could avoid creating new objects.  Perhaps they could be
+named after the C language functions, eg. mpq_inv($q,$q);
+
+It'd be good if C<num> and C<den> gave lvalues so the underlying mpq could
+be manipulated.
+
+C<printf> could usefully accept %b for mpz, mpq and mpf, and perhaps %x for
+mpf too.
+
+C<get_str> returning different style values for integer versus float is a
+bit unfortunate.  With mpz, mpq and mpf objects there's no doubt what it
+will do, but on a plain scalar its action depends on whether the scalar was
+promoted to a float at any stage, and then on the GMP module rules about
+using the integer or float part.
+
+=head1 INTERNALS
+
+In usual perl object style, an mpz is a reference to an object blessed into
+class C<GMP::Mpz>.  The object holds a pointer to the C language C<mpz_t>
+structure.  Similarly for mpq, mpf and randstate.
+
+A free list of mpz and mpq values is kept to avoid repeated initializing and
+clearing when objects are created and destroyed.  This aims to help speed,
+but it's not clear whether it's really needed.
+
+mpf doesn't use a free list because the precision of new objects can be
+different each time.
+
+No interface to C<mpf_set_prec_raw> is provided.  It wouldn't be very useful
+since there's no way to make an operation store its result in a particular
+object.  The plain C<set_prec> is useful though, for truncating to a lower
+precision, or as a sort of directive that subsequent calculations involving
+that variable should use a higher precision.
+
+The overheads of perl dynamic typing (operator dispatch, operand type
+checking or coercion) will mean this interface is slower than using C
+directly.
+
+Some assertion checking is available as a compile-time option.
+
+=head1 COPYRIGHT
+
+Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+=cut
+
+# Local variables:
+# perl-indent-level: 2
+# fill-column: 76
+# End:
diff --git a/demos/perl/GMP.xs b/demos/perl/GMP.xs

new file mode 100644 (file)

index 0000000..2282c89
--- /dev/null
+++ b/demos/perl/GMP.xs
@@ -0,0 +1,3201 @@
+/* GMP module external subroutines.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+/* Notes:
+
+   Routines are grouped with the alias feature and a table of function
+   pointers where possible, since each xsub routine ends up with quite a bit
+   of code size.  Different combinations of arguments and return values have
+   to be separate though.
+
+   The "INTERFACE:" feature isn't available in perl 5.005 and so isn't used.
+   "ALIAS:" requires a table lookup with CvXSUBANY(cv).any_i32 (which is
+   "ix") whereas "INTERFACE:" would have CvXSUBANY(cv).any_dptr as the
+   function pointer immediately.
+
+   Mixed-type swapped-order assignments like "$a = 123; $a += mpz(456);"
+   invoke the plain overloaded "+", not "+=", which makes life easier.
+
+   mpz_assume etc types are used with the overloaded operators since such
+   operators are always called with a class object as the first argument, we
+   don't need an sv_derived_from() lookup to check.  There's assert()s in
+   MPX_ASSUME() for this though.
+
+   The overload_constant routines reached via overload::constant get 4
+   arguments in perl 5.6, not the 3 as documented.  This is apparently a
+   bug, using "..." lets us ignore the extra one.
+
+   There's only a few "si" functions in gmp, so usually SvIV values get
+   handled with an mpz_set_si into a temporary and then a full precision mpz
+   routine.  This is reasonably efficient.
+
+   Argument types are checked, with a view to preserving all bits in the
+   operand.  Perl is a bit looser in its arithmetic, allowing rounding or
+   truncation to an intended operand type (IV, UV or NV).
+
+   Bugs:
+
+   The memory leak detection attempted in GMP::END() doesn't work when mpz's
+   are created as constants because END() is called before they're
+   destroyed.  What's the right place to hook such a check?
+
+   See the bugs section of GMP.pm too.  */
+
+
+/* Comment this out to get assertion checking. */
+#define NDEBUG
+
+/* Change this to "#define TRACE(x) x" for some diagnostics. */
+#define TRACE(x)
+
+
+#include <assert.h>
+#include <float.h>
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "patchlevel.h"
+
+#include "gmp.h"
+
+
+/* Perl 5.005 doesn't have SvIsUV, only 5.6 and up.
+   Perl 5.8 has SvUOK, but not 5.6, so we don't use that.  */
+#ifndef SvIsUV
+#define SvIsUV(sv)  0
+#endif
+#ifndef SvUVX
+#define SvUVX(sv)  (croak("GMP: oops, shouldn't be using SvUVX"), 0)
+#endif
+
+
+/* Code which doesn't check anything itself, but exists to support other
+   assert()s.  */
+#ifdef NDEBUG
+#define assert_support(x)
+#else
+#define assert_support(x) x
+#endif
+
+/* LONG_MAX + 1 and ULONG_MAX + 1, as a doubles */
+#define LONG_MAX_P1_AS_DOUBLE   ((double) ((unsigned long) LONG_MAX + 1))
+#define ULONG_MAX_P1_AS_DOUBLE  (2.0 * (double) ((unsigned long) ULONG_MAX/2 + 1))
+
+/* Check for perl version "major.minor".
+   Perl 5.004 doesn't have PERL_REVISION and PERL_VERSION, but that's ok,
+   we're only interested in tests above that.  */
+#if defined (PERL_REVISION) && defined (PERL_VERSION)
+#define PERL_GE(major,minor)                                    \
+    (PERL_REVISION > (major)                                    \
+     || ((major) == PERL_REVISION && PERL_VERSION >= (minor)))
+#else
+#define PERL_GE(major,minor)  (0)
+#endif
+#define PERL_LT(major,minor)  (! PERL_GE(major,minor))
+
+/* sv_derived_from etc in 5.005 took "char *" rather than "const char *".
+   Avoid some compiler warnings by using const only where it works.  */
+#if PERL_LT (5,6)
+#define classconst
+#else
+#define classconst const
+#endif
+
+/* In a MINGW or Cygwin DLL build of gmp, the various gmp functions are
+   given with dllimport directives, which prevents them being used as
+   initializers for constant data.  We give function tables as
+   "static_functable const ...", which is normally "static const", but for
+   mingw expands to just "const" making the table an automatic with a
+   run-time initializer.
+
+   In gcc 3.3.1, the function tables initialized like this end up getting
+   all the __imp__foo values fetched, even though just one or two will be
+   used.  This is wasteful, but probably not too bad.  */
+
+#if defined (__MINGW32__) || defined (__CYGWIN__)
+#define static_functable
+#else
+#define static_functable  static
+#endif
+
+#define GMP_MALLOC_ID  42
+
+static classconst char mpz_class[]  = "GMP::Mpz";
+static classconst char mpq_class[]  = "GMP::Mpq";
+static classconst char mpf_class[]  = "GMP::Mpf";
+static classconst char rand_class[] = "GMP::Rand";
+
+static HV *mpz_class_hv;
+static HV *mpq_class_hv;
+static HV *mpf_class_hv;
+
+assert_support (static long mpz_count = 0;)
+assert_support (static long mpq_count = 0;)
+assert_support (static long mpf_count = 0;)
+assert_support (static long rand_count = 0;)
+
+#define TRACE_ACTIVE()                                                   \
+  assert_support                                                         \
+  (TRACE (printf ("  active %ld mpz, %ld mpq, %ld mpf, %ld randstate\n", \
+                  mpz_count, mpq_count, mpf_count, rand_count)))
+
+
+/* Each "struct mpz_elem" etc is an mpz_t with a link field tacked on the
+   end so they can be held on a linked list.  */
+
+#define CREATE_MPX(type)                                \
+                                                        \
+  /* must have mpz_t etc first, for sprintf below */    \
+  struct type##_elem {                                  \
+    type##_t            m;                              \
+    struct type##_elem  *next;                          \
+  };                                                    \
+  typedef struct type##_elem  *type;                    \
+  typedef struct type##_elem  *type##_assume;           \
+  typedef type##_ptr          type##_coerce;            \
+                                                        \
+  static type type##_freelist = NULL;                   \
+                                                        \
+  static type                                           \
+  new_##type (void)                                     \
+  {                                                     \
+    type p;                                             \
+    TRACE (printf ("new %s\n", type##_class));          \
+    if (type##_freelist != NULL)                        \
+      {                                                 \
+        p = type##_freelist;                            \
+        type##_freelist = type##_freelist->next;        \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        New (GMP_MALLOC_ID, p, 1, struct type##_elem);  \
+        type##_init (p->m);                             \
+      }                                                 \
+    TRACE (printf ("  p=%p\n", p));                     \
+    assert_support (type##_count++);                    \
+    TRACE_ACTIVE ();                                    \
+    return p;                                           \
+  }                                                     \
+
+CREATE_MPX (mpz)
+CREATE_MPX (mpq)
+
+typedef mpf_ptr  mpf;
+typedef mpf_ptr  mpf_assume;
+typedef mpf_ptr  mpf_coerce_st0;
+typedef mpf_ptr  mpf_coerce_def;
+
+
+static mpf
+new_mpf (unsigned long prec)
+{
+  mpf p;
+  New (GMP_MALLOC_ID, p, 1, __mpf_struct);
+  mpf_init2 (p, prec);
+  TRACE (printf ("  mpf p=%p\n", p));
+  assert_support (mpf_count++);
+  TRACE_ACTIVE ();
+  return p;
+}
+
+
+/* tmp_mpf_t records an allocated precision with an mpf_t so changes of
+   precision can be done with just an mpf_set_prec_raw.  */
+
+struct tmp_mpf_struct {
+  mpf_t          m;
+  unsigned long  allocated_prec;
+};
+typedef const struct tmp_mpf_struct  *tmp_mpf_srcptr;
+typedef struct tmp_mpf_struct        *tmp_mpf_ptr;
+typedef struct tmp_mpf_struct        tmp_mpf_t[1];
+
+#define tmp_mpf_init(f)                         \
+  do {                                          \
+    mpf_init (f->m);                            \
+    f->allocated_prec = mpf_get_prec (f->m);    \
+  } while (0)
+
+static void
+tmp_mpf_grow (tmp_mpf_ptr f, unsigned long prec)
+{
+  mpf_set_prec_raw (f->m, f->allocated_prec);
+  mpf_set_prec (f->m, prec);
+  f->allocated_prec = mpf_get_prec (f->m);
+}
+
+#define tmp_mpf_shrink(f)  tmp_mpf_grow (f, 1L)
+
+#define tmp_mpf_set_prec(f,prec)        \
+  do {                                  \
+    if (prec > f->allocated_prec)       \
+      tmp_mpf_grow (f, prec);           \
+    else                                \
+      mpf_set_prec_raw (f->m, prec);    \
+  } while (0)
+
+
+static mpz_t  tmp_mpz_0, tmp_mpz_1, tmp_mpz_2;
+static mpq_t  tmp_mpq_0, tmp_mpq_1;
+static tmp_mpf_t tmp_mpf_0, tmp_mpf_1;
+
+/* for GMP::Mpz::export */
+#define tmp_mpz_4  tmp_mpz_2
+
+
+#define FREE_MPX_FREELIST(p,type)               \
+  do {                                          \
+    TRACE (printf ("free %s\n", type##_class)); \
+    p->next = type##_freelist;                  \
+    type##_freelist = p;                        \
+    assert_support (type##_count--);            \
+    TRACE_ACTIVE ();                            \
+    assert (type##_count >= 0);                 \
+  } while (0)
+
+/* this version for comparison, if desired */
+#define FREE_MPX_NOFREELIST(p,type)             \
+  do {                                          \
+    TRACE (printf ("free %s\n", type##_class)); \
+    type##_clear (p->m);                        \
+    Safefree (p);                               \
+    assert_support (type##_count--);            \
+    TRACE_ACTIVE ();                            \
+    assert (type##_count >= 0);                 \
+  } while (0)
+
+#define free_mpz(z)    FREE_MPX_FREELIST (z, mpz)
+#define free_mpq(q)    FREE_MPX_FREELIST (q, mpq)
+
+
+/* Return a new mortal SV holding the given mpx_ptr pointer.
+   class_hv should be one of mpz_class_hv etc.  */
+#define MPX_NEWMORTAL(mpx_ptr, class_hv)                                \
+    sv_bless (sv_setref_pv (sv_newmortal(), NULL, mpx_ptr), class_hv)
+
+/* Aliases for use in typemaps */
+typedef char           *malloced_string;
+typedef const char     *const_string;
+typedef const char     *const_string_assume;
+typedef char           *string;
+typedef SV             *order_noswap;
+typedef SV             *dummy;
+typedef SV             *SV_copy_0;
+typedef unsigned long  ulong_coerce;
+typedef __gmp_randstate_struct *randstate;
+typedef UV             gmp_UV;
+
+#define SvMPX(s,type)  ((type) SvIV((SV*) SvRV(s)))
+#define SvMPZ(s)       SvMPX(s,mpz)
+#define SvMPQ(s)       SvMPX(s,mpq)
+#define SvMPF(s)       SvMPX(s,mpf)
+#define SvRANDSTATE(s) SvMPX(s,randstate)
+
+#define MPX_ASSUME(x,sv,type)                           \
+  do {                                                  \
+    assert (sv_derived_from (sv, type##_class));        \
+    x = SvMPX(sv,type);                                 \
+  } while (0)
+
+#define MPZ_ASSUME(z,sv)    MPX_ASSUME(z,sv,mpz)
+#define MPQ_ASSUME(q,sv)    MPX_ASSUME(q,sv,mpq)
+#define MPF_ASSUME(f,sv)    MPX_ASSUME(f,sv,mpf)
+
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+#define SGN(x)       ((x)<0 ? -1 : (x) != 0)
+#define ABS(x)       ((x)>=0 ? (x) : -(x))
+#define double_integer_p(d)  (floor (d) == (d))
+
+#define x_mpq_integer_p(q) \
+  (mpz_cmp_ui (mpq_denref(q), 1L) == 0)
+
+#define assert_table(ix)  assert (ix >= 0 && ix < numberof (table))
+
+#define SV_PTR_SWAP(x,y) \
+  do { SV *__tmp = (x); (x) = (y); (y) = __tmp; } while (0)
+#define MPF_PTR_SWAP(x,y) \
+  do { mpf_ptr __tmp = (x); (x) = (y); (y) = __tmp; } while (0)
+
+
+static void
+class_or_croak (SV *sv, classconst char *cl)
+{
+  if (! sv_derived_from (sv, cl))
+    croak("not type %s", cl);
+}
+
+
+/* These are macros, wrap them in functions. */
+static int
+x_mpz_odd_p (mpz_srcptr z)
+{
+  return mpz_odd_p (z);
+}
+static int
+x_mpz_even_p (mpz_srcptr z)
+{
+  return mpz_even_p (z);
+}
+
+static void
+x_mpq_pow_ui (mpq_ptr r, mpq_srcptr b, unsigned long e)
+{
+  mpz_pow_ui (mpq_numref(r), mpq_numref(b), e);
+  mpz_pow_ui (mpq_denref(r), mpq_denref(b), e);
+}
+
+
+static void *
+my_gmp_alloc (size_t n)
+{
+  void *p;
+  TRACE (printf ("my_gmp_alloc %u\n", n));
+  New (GMP_MALLOC_ID, p, n, char);
+  TRACE (printf ("  p=%p\n", p));
+  return p;
+}
+
+static void *
+my_gmp_realloc (void *p, size_t oldsize, size_t newsize)
+{
+  TRACE (printf ("my_gmp_realloc %p, %u to %u\n", p, oldsize, newsize));
+  Renew (p, newsize, char);
+  TRACE (printf ("  p=%p\n", p));
+  return p;
+}
+
+static void
+my_gmp_free (void *p, size_t n)
+{
+  TRACE (printf ("my_gmp_free %p %u\n", p, n));
+  Safefree (p);
+}
+
+
+#define my_mpx_set_svstr(type)                                  \
+  static void                                                   \
+  my_##type##_set_svstr (type##_ptr x, SV *sv)                  \
+  {                                                             \
+    const char  *str;                                           \
+    STRLEN      len;                                            \
+    TRACE (printf ("  my_" #type "_set_svstr\n"));              \
+    assert (SvPOK(sv) || SvPOKp(sv));                           \
+    str = SvPV (sv, len);                                       \
+    TRACE (printf ("  str \"%s\"\n", str));                     \
+    if (type##_set_str (x, str, 0) != 0)                        \
+      croak ("%s: invalid string: %s", type##_class, str);      \
+  }
+
+my_mpx_set_svstr(mpz)
+my_mpx_set_svstr(mpq)
+my_mpx_set_svstr(mpf)
+
+
+/* very slack */
+static int
+x_mpq_cmp_si (mpq_srcptr x, long yn, unsigned long yd)
+{
+  mpq  y;
+  int  ret;
+  y = new_mpq ();
+  mpq_set_si (y->m, yn, yd);
+  ret = mpq_cmp (x, y->m);
+  free_mpq (y);
+  return ret;
+}
+
+static int
+x_mpq_fits_slong_p (mpq_srcptr q)
+{
+  return x_mpq_cmp_si (q, LONG_MIN, 1L) >= 0
+    && mpq_cmp_ui (q, LONG_MAX, 1L) <= 0;
+}
+
+static int
+x_mpz_cmp_q (mpz_ptr x, mpq_srcptr y)
+{
+  int  ret;
+  mpz_set_ui (mpq_denref(tmp_mpq_0), 1L);
+  mpz_swap (mpq_numref(tmp_mpq_0), x);
+  ret = mpq_cmp (tmp_mpq_0, y);
+  mpz_swap (mpq_numref(tmp_mpq_0), x);
+  return ret;
+}
+
+static int
+x_mpz_cmp_f (mpz_srcptr x, mpf_srcptr y)
+{
+  tmp_mpf_set_prec (tmp_mpf_0, mpz_sizeinbase (x, 2));
+  mpf_set_z (tmp_mpf_0->m, x);
+  return mpf_cmp (tmp_mpf_0->m, y);
+}
+
+
+#define USE_UNKNOWN  0
+#define USE_IVX      1
+#define USE_UVX      2
+#define USE_NVX      3
+#define USE_PVX      4
+#define USE_MPZ      5
+#define USE_MPQ      6
+#define USE_MPF      7
+
+/* mg_get is called every time we get a value, even if the private flags are
+   still set from a previous such call.  This is the same as as SvIV and
+   friends do.
+
+   When POK, we use the PV, even if there's an IV or NV available.  This is
+   because it's hard to be sure there wasn't any rounding in establishing
+   the IV and/or NV.  Cases of overflow, where the PV should definitely be
+   used, are easy enough to spot, but rounding is hard.  So although IV or
+   NV would be more efficient, we must use the PV to be sure of getting all
+   the data.  Applications should convert once to mpz, mpq or mpf when using
+   a value repeatedly.
+
+   Zany dual-type scalars like $! where the IV is an error code and the PV
+   is an error description string won't work with this preference for PV,
+   but that's too bad.  Such scalars should be rare, and unlikely to be used
+   in bignum calculations.
+
+   When IOK and NOK are both set, we would prefer to use the IV since it can
+   be converted more efficiently, and because on a 64-bit system the NV may
+   have less bits than the IV.  The following rules are applied,
+
+   - If the NV is not an integer, then we must use that NV, since clearly
+     the IV was merely established by rounding and is not the full value.
+
+   - In perl prior to 5.8, an NV too big for an IV leaves an overflow value
+     0xFFFFFFFF.  If the NV is too big to fit an IV then clearly it's the NV
+     which is the true value and must be used.
+
+   - In perl 5.8 and up, such an overflow doesn't set IOK, so that test is
+     unnecessary.  However when coming from get-magic, IOKp _is_ set, and we
+     must check for overflow the same as in older perl.
+
+   FIXME:
+
+   We'd like to call mg_get just once, but unfortunately sv_derived_from()
+   will call it for each of our checks.  We could do a string compare like
+   sv_isa ourselves, but that only tests the exact class, it doesn't
+   recognise subclassing.  There doesn't seem to be a public interface to
+   the subclassing tests (in the internal isa_lookup() function).  */
+
+int
+use_sv (SV *sv)
+{
+  double  d;
+
+  if (SvGMAGICAL(sv))
+    {
+      mg_get(sv);
+
+      if (SvPOKp(sv))
+        return USE_PVX;
+
+      if (SvIOKp(sv))
+        {
+          if (SvIsUV(sv))
+            {
+              if (SvNOKp(sv))
+                goto u_or_n;
+              return USE_UVX;
+            }
+          else
+            {
+              if (SvNOKp(sv))
+                goto i_or_n;
+              return USE_IVX;
+            }
+        }
+
+      if (SvNOKp(sv))
+        return USE_NVX;
+
+      goto rok_or_unknown;
+    }
+
+  if (SvPOK(sv))
+    return USE_PVX;
+
+  if (SvIOK(sv))
+    {
+      if (SvIsUV(sv))
+        {
+          if (SvNOK(sv))
+            {
+              if (PERL_LT (5, 8))
+                {
+                u_or_n:
+                  d = SvNVX(sv);
+                  if (d >= ULONG_MAX_P1_AS_DOUBLE || d < 0.0)
+                    return USE_NVX;
+                }
+              d = SvNVX(sv);
+              if (d != floor (d))
+                return USE_NVX;
+            }
+          return USE_UVX;
+        }
+      else
+        {
+          if (SvNOK(sv))
+            {
+              if (PERL_LT (5, 8))
+                {
+                i_or_n:
+                  d = SvNVX(sv);
+                  if (d >= LONG_MAX_P1_AS_DOUBLE || d < (double) LONG_MIN)
+                    return USE_NVX;
+                }
+              d = SvNVX(sv);
+              if (d != floor (d))
+                return USE_NVX;
+            }
+          return USE_IVX;
+        }
+    }
+
+  if (SvNOK(sv))
+    return USE_NVX;
+
+ rok_or_unknown:
+  if (SvROK(sv))
+    {
+      if (sv_derived_from (sv, mpz_class))
+        return USE_MPZ;
+      if (sv_derived_from (sv, mpq_class))
+        return USE_MPQ;
+      if (sv_derived_from (sv, mpf_class))
+        return USE_MPF;
+    }
+
+  return USE_UNKNOWN;
+}
+
+
+/* Coerce sv to an mpz.  Use tmp to hold the converted value if sv isn't
+   already an mpz (or an mpq of which the numerator can be used).  Return
+   the chosen mpz (tmp or the contents of sv).  */
+
+static mpz_ptr
+coerce_mpz_using (mpz_ptr tmp, SV *sv, int use)
+{
+  switch (use) {
+  case USE_IVX:
+    mpz_set_si (tmp, SvIVX(sv));
+    return tmp;
+
+  case USE_UVX:
+    mpz_set_ui (tmp, SvUVX(sv));
+    return tmp;
+
+  case USE_NVX:
+    {
+      double d;
+      d = SvNVX(sv);
+      if (! double_integer_p (d))
+        croak ("cannot coerce non-integer double to mpz");
+      mpz_set_d (tmp, d);
+      return tmp;
+    }
+
+  case USE_PVX:
+    my_mpz_set_svstr (tmp, sv);
+    return tmp;
+
+  case USE_MPZ:
+    return SvMPZ(sv)->m;
+
+  case USE_MPQ:
+    {
+      mpq q = SvMPQ(sv);
+      if (! x_mpq_integer_p (q->m))
+        croak ("cannot coerce non-integer mpq to mpz");
+      return mpq_numref(q->m);
+    }
+
+  case USE_MPF:
+    {
+      mpf f = SvMPF(sv);
+      if (! mpf_integer_p (f))
+        croak ("cannot coerce non-integer mpf to mpz");
+      mpz_set_f (tmp, f);
+      return tmp;
+    }
+
+  default:
+    croak ("cannot coerce to mpz");
+  }
+}
+static mpz_ptr
+coerce_mpz (mpz_ptr tmp, SV *sv)
+{
+  return coerce_mpz_using (tmp, sv, use_sv (sv));
+}
+
+
+/* Coerce sv to an mpq.  If sv is an mpq then just return that, otherwise
+   use tmp to hold the converted value and return that.  */
+
+static mpq_ptr
+coerce_mpq_using (mpq_ptr tmp, SV *sv, int use)
+{
+  TRACE (printf ("coerce_mpq_using %p %d\n", tmp, use));
+  switch (use) {
+  case USE_IVX:
+    mpq_set_si (tmp, SvIVX(sv), 1L);
+    return tmp;
+
+  case USE_UVX:
+    mpq_set_ui (tmp, SvUVX(sv), 1L);
+    return tmp;
+
+  case USE_NVX:
+    mpq_set_d (tmp, SvNVX(sv));
+    return tmp;
+
+  case USE_PVX:
+    my_mpq_set_svstr (tmp, sv);
+    return tmp;
+
+  case USE_MPZ:
+    mpq_set_z (tmp, SvMPZ(sv)->m);
+    return tmp;
+
+  case USE_MPQ:
+    return SvMPQ(sv)->m;
+
+  case USE_MPF:
+    mpq_set_f (tmp, SvMPF(sv));
+    return tmp;
+
+  default:
+    croak ("cannot coerce to mpq");
+  }
+}
+static mpq_ptr
+coerce_mpq (mpq_ptr tmp, SV *sv)
+{
+  return coerce_mpq_using (tmp, sv, use_sv (sv));
+}
+
+
+static void
+my_mpf_set_sv_using (mpf_ptr f, SV *sv, int use)
+{
+  switch (use) {
+  case USE_IVX:
+    mpf_set_si (f, SvIVX(sv));
+    break;
+
+  case USE_UVX:
+    mpf_set_ui (f, SvUVX(sv));
+    break;
+
+  case USE_NVX:
+    mpf_set_d (f, SvNVX(sv));
+    break;
+
+  case USE_PVX:
+    my_mpf_set_svstr (f, sv);
+    break;
+
+  case USE_MPZ:
+    mpf_set_z (f, SvMPZ(sv)->m);
+    break;
+
+  case USE_MPQ:
+    mpf_set_q (f, SvMPQ(sv)->m);
+    break;
+
+  case USE_MPF:
+    mpf_set (f, SvMPF(sv));
+    break;
+
+  default:
+    croak ("cannot coerce to mpf");
+  }
+}
+
+/* Coerce sv to an mpf.  If sv is an mpf then just return that, otherwise
+   use tmp to hold the converted value (with prec precision).  */
+static mpf_ptr
+coerce_mpf_using (tmp_mpf_ptr tmp, SV *sv, unsigned long prec, int use)
+{
+  if (use == USE_MPF)
+    return SvMPF(sv);
+
+  tmp_mpf_set_prec (tmp, prec);
+  my_mpf_set_sv_using (tmp->m, sv, use);
+  return tmp->m;
+}
+static mpf_ptr
+coerce_mpf (tmp_mpf_ptr tmp, SV *sv, unsigned long prec)
+{
+  return coerce_mpf_using (tmp, sv, prec, use_sv (sv));
+}
+
+
+/* Coerce xv to an mpf and store the pointer in x, ditto for yv to x.  If
+   one of xv or yv is an mpf then use it for the precision, otherwise use
+   the default precision.  */
+unsigned long
+coerce_mpf_pair (mpf *xp, SV *xv, mpf *yp, SV *yv)
+{
+  int x_use = use_sv (xv);
+  int y_use = use_sv (yv);
+  unsigned long  prec;
+  mpf  x, y;
+
+  if (x_use == USE_MPF)
+    {
+      x = SvMPF(xv);
+      prec = mpf_get_prec (x);
+      y = coerce_mpf_using (tmp_mpf_0, yv, prec, y_use);
+    }
+  else
+    {
+      y = coerce_mpf_using (tmp_mpf_0, yv, mpf_get_default_prec(), y_use);
+      prec = mpf_get_prec (y);
+      x = coerce_mpf_using (tmp_mpf_1, xv, prec, x_use);
+    }
+  *xp = x;
+  *yp = y;
+  return prec;
+}
+
+
+/* Note that SvUV is not used, since it merely treats the signed IV as if it
+   was unsigned.  We get an IV and check its sign. */
+static unsigned long
+coerce_ulong (SV *sv)
+{
+  long  n;
+
+  switch (use_sv (sv)) {
+  case USE_IVX:
+    n = SvIVX(sv);
+  negative_check:
+    if (n < 0)
+      goto range_error;
+    return n;
+
+  case USE_UVX:
+    return SvUVX(sv);
+
+  case USE_NVX:
+    {
+      double d;
+      d = SvNVX(sv);
+      if (! double_integer_p (d))
+        goto integer_error;
+      n = SvIV(sv);
+    }
+    goto negative_check;
+
+  case USE_PVX:
+    /* FIXME: Check the string is an integer. */
+    n = SvIV(sv);
+    goto negative_check;
+
+  case USE_MPZ:
+    {
+      mpz z = SvMPZ(sv);
+      if (! mpz_fits_ulong_p (z->m))
+        goto range_error;
+      return mpz_get_ui (z->m);
+    }
+
+  case USE_MPQ:
+    {
+      mpq q = SvMPQ(sv);
+      if (! x_mpq_integer_p (q->m))
+        goto integer_error;
+      if (! mpz_fits_ulong_p (mpq_numref (q->m)))
+        goto range_error;
+      return mpz_get_ui (mpq_numref (q->m));
+    }
+
+  case USE_MPF:
+    {
+      mpf f = SvMPF(sv);
+      if (! mpf_integer_p (f))
+        goto integer_error;
+      if (! mpf_fits_ulong_p (f))
+        goto range_error;
+      return mpf_get_ui (f);
+    }
+
+  default:
+    croak ("cannot coerce to ulong");
+  }
+
+ integer_error:
+  croak ("not an integer");
+
+ range_error:
+  croak ("out of range for ulong");
+}
+
+
+static long
+coerce_long (SV *sv)
+{
+  switch (use_sv (sv)) {
+  case USE_IVX:
+    return SvIVX(sv);
+
+  case USE_UVX:
+    {
+      UV u = SvUVX(sv);
+      if (u > (UV) LONG_MAX)
+        goto range_error;
+      return u;
+    }
+
+  case USE_NVX:
+    {
+      double d = SvNVX(sv);
+      if (! double_integer_p (d))
+        goto integer_error;
+      return SvIV(sv);
+    }
+
+  case USE_PVX:
+    /* FIXME: Check the string is an integer. */
+    return SvIV(sv);
+
+  case USE_MPZ:
+    {
+      mpz z = SvMPZ(sv);
+      if (! mpz_fits_slong_p (z->m))
+        goto range_error;
+      return mpz_get_si (z->m);
+    }
+
+  case USE_MPQ:
+    {
+      mpq q = SvMPQ(sv);
+      if (! x_mpq_integer_p (q->m))
+        goto integer_error;
+      if (! mpz_fits_slong_p (mpq_numref (q->m)))
+        goto range_error;
+      return mpz_get_si (mpq_numref (q->m));
+    }
+
+  case USE_MPF:
+    {
+      mpf f = SvMPF(sv);
+      if (! mpf_integer_p (f))
+        goto integer_error;
+      if (! mpf_fits_slong_p (f))
+        goto range_error;
+      return mpf_get_si (f);
+    }
+
+  default:
+    croak ("cannot coerce to long");
+  }
+
+ integer_error:
+  croak ("not an integer");
+
+ range_error:
+  croak ("out of range for ulong");
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+MODULE = GMP         PACKAGE = GMP
+
+BOOT:
+    TRACE (printf ("GMP boot\n"));
+    mp_set_memory_functions (my_gmp_alloc, my_gmp_realloc, my_gmp_free);
+    mpz_init (tmp_mpz_0);
+    mpz_init (tmp_mpz_1);
+    mpz_init (tmp_mpz_2);
+    mpq_init (tmp_mpq_0);
+    mpq_init (tmp_mpq_1);
+    tmp_mpf_init (tmp_mpf_0);
+    tmp_mpf_init (tmp_mpf_1);
+    mpz_class_hv = gv_stashpv (mpz_class, 1);
+    mpq_class_hv = gv_stashpv (mpq_class, 1);
+    mpf_class_hv = gv_stashpv (mpf_class, 1);
+
+
+void
+END()
+CODE:
+    TRACE (printf ("GMP end\n"));
+    TRACE_ACTIVE ();
+    /* These are not always true, see Bugs at the top of the file. */
+    /* assert (mpz_count == 0); */
+    /* assert (mpq_count == 0); */
+    /* assert (mpf_count == 0); */
+    /* assert (rand_count == 0); */
+
+
+const_string
+version()
+CODE:
+    RETVAL = gmp_version;
+OUTPUT:
+    RETVAL
+
+
+bool
+fits_slong_p (sv)
+    SV *sv
+CODE:
+    switch (use_sv (sv)) {
+    case USE_IVX:
+      RETVAL = 1;
+      break;
+
+    case USE_UVX:
+      {
+        UV u = SvUVX(sv);
+        RETVAL = (u <= LONG_MAX);
+      }
+      break;
+
+    case USE_NVX:
+      {
+        double  d = SvNVX(sv);
+        RETVAL = (d >= (double) LONG_MIN && d < LONG_MAX_P1_AS_DOUBLE);
+      }
+      break;
+
+    case USE_PVX:
+      {
+        STRLEN len;
+        const char *str = SvPV (sv, len);
+        if (mpq_set_str (tmp_mpq_0, str, 0) == 0)
+          RETVAL = x_mpq_fits_slong_p (tmp_mpq_0);
+        else
+          {
+            /* enough precision for a long */
+            tmp_mpf_set_prec (tmp_mpf_0, 2*mp_bits_per_limb);
+            if (mpf_set_str (tmp_mpf_0->m, str, 10) != 0)
+              croak ("GMP::fits_slong_p invalid string format");
+            RETVAL = mpf_fits_slong_p (tmp_mpf_0->m);
+          }
+      }
+      break;
+
+    case USE_MPZ:
+      RETVAL = mpz_fits_slong_p (SvMPZ(sv)->m);
+      break;
+
+    case USE_MPQ:
+      RETVAL = x_mpq_fits_slong_p (SvMPQ(sv)->m);
+      break;
+
+    case USE_MPF:
+      RETVAL = mpf_fits_slong_p (SvMPF(sv));
+      break;
+
+    default:
+      croak ("GMP::fits_slong_p invalid argument");
+    }
+OUTPUT:
+    RETVAL
+
+
+double
+get_d (sv)
+    SV *sv
+CODE:
+    switch (use_sv (sv)) {
+    case USE_IVX:
+      RETVAL = (double) SvIVX(sv);
+      break;
+
+    case USE_UVX:
+      RETVAL = (double) SvUVX(sv);
+      break;
+
+    case USE_NVX:
+      RETVAL = SvNVX(sv);
+      break;
+
+    case USE_PVX:
+      {
+        STRLEN len;
+        RETVAL = atof(SvPV(sv, len));
+      }
+      break;
+
+    case USE_MPZ:
+      RETVAL = mpz_get_d (SvMPZ(sv)->m);
+      break;
+
+    case USE_MPQ:
+      RETVAL = mpq_get_d (SvMPQ(sv)->m);
+      break;
+
+    case USE_MPF:
+      RETVAL = mpf_get_d (SvMPF(sv));
+      break;
+
+    default:
+      croak ("GMP::get_d invalid argument");
+    }
+OUTPUT:
+    RETVAL
+
+
+void
+get_d_2exp (sv)
+    SV *sv
+PREINIT:
+    double ret;
+    long   exp;
+PPCODE:
+    switch (use_sv (sv)) {
+    case USE_IVX:
+      ret = (double) SvIVX(sv);
+      goto use_frexp;
+
+    case USE_UVX:
+      ret = (double) SvUVX(sv);
+      goto use_frexp;
+
+    case USE_NVX:
+      {
+        int i_exp;
+        ret = SvNVX(sv);
+      use_frexp:
+        ret = frexp (ret, &i_exp);
+        exp = i_exp;
+      }
+      break;
+
+    case USE_PVX:
+      /* put strings through mpf to give full exp range */
+      tmp_mpf_set_prec (tmp_mpf_0, DBL_MANT_DIG);
+      my_mpf_set_svstr (tmp_mpf_0->m, sv);
+      ret = mpf_get_d_2exp (&exp, tmp_mpf_0->m);
+      break;
+
+    case USE_MPZ:
+      ret = mpz_get_d_2exp (&exp, SvMPZ(sv)->m);
+      break;
+
+    case USE_MPQ:
+      tmp_mpf_set_prec (tmp_mpf_0, DBL_MANT_DIG);
+      mpf_set_q (tmp_mpf_0->m, SvMPQ(sv)->m);
+      ret = mpf_get_d_2exp (&exp, tmp_mpf_0->m);
+      break;
+
+    case USE_MPF:
+      ret = mpf_get_d_2exp (&exp, SvMPF(sv));
+      break;
+
+    default:
+      croak ("GMP::get_d_2exp invalid argument");
+    }
+    PUSHs (sv_2mortal (newSVnv (ret)));
+    PUSHs (sv_2mortal (newSViv (exp)));
+
+
+long
+get_si (sv)
+    SV *sv
+CODE:
+    switch (use_sv (sv)) {
+    case USE_IVX:
+      RETVAL = SvIVX(sv);
+      break;
+
+    case USE_UVX:
+      RETVAL = SvUVX(sv);
+      break;
+
+    case USE_NVX:
+      RETVAL = (long) SvNVX(sv);
+      break;
+
+    case USE_PVX:
+      RETVAL = SvIV(sv);
+      break;
+
+    case USE_MPZ:
+      RETVAL = mpz_get_si (SvMPZ(sv)->m);
+      break;
+
+    case USE_MPQ:
+      mpz_set_q (tmp_mpz_0, SvMPQ(sv)->m);
+      RETVAL = mpz_get_si (tmp_mpz_0);
+      break;
+
+    case USE_MPF:
+      RETVAL = mpf_get_si (SvMPF(sv));
+      break;
+
+    default:
+      croak ("GMP::get_si invalid argument");
+    }
+OUTPUT:
+    RETVAL
+
+
+void
+get_str (sv, ...)
+    SV *sv
+PREINIT:
+    char      *str;
+    mp_exp_t  exp;
+    mpz_ptr   z;
+    mpq_ptr   q;
+    mpf       f;
+    int       base;
+    int       ndigits;
+PPCODE:
+    TRACE (printf ("GMP::get_str\n"));
+
+    if (items >= 2)
+      base = coerce_long (ST(1));
+    else
+      base = 10;
+    TRACE (printf (" base=%d\n", base));
+
+    if (items >= 3)
+      ndigits = coerce_long (ST(2));
+    else
+      ndigits = 10;
+    TRACE (printf (" ndigits=%d\n", ndigits));
+
+    EXTEND (SP, 2);
+
+    switch (use_sv (sv)) {
+    case USE_IVX:
+      mpz_set_si (tmp_mpz_0, SvIVX(sv));
+    get_tmp_mpz_0:
+      z = tmp_mpz_0;
+      goto get_mpz;
+
+    case USE_UVX:
+      mpz_set_ui (tmp_mpz_0, SvUVX(sv));
+      goto get_tmp_mpz_0;
+
+    case USE_NVX:
+      /* only digits in the original double, not in the coerced form */
+      if (ndigits == 0)
+        ndigits = DBL_DIG;
+      mpf_set_d (tmp_mpf_0->m, SvNVX(sv));
+      f = tmp_mpf_0->m;
+      goto get_mpf;
+
+    case USE_PVX:
+      {
+        /* get_str on a string is not much more than a base conversion */
+        STRLEN len;
+        str = SvPV (sv, len);
+        if (mpz_set_str (tmp_mpz_0, str, 0) == 0)
+          {
+            z = tmp_mpz_0;
+            goto get_mpz;
+          }
+        else if (mpq_set_str (tmp_mpq_0, str, 0) == 0)
+          {
+            q = tmp_mpq_0;
+            goto get_mpq;
+          }
+        else
+          {
+            /* FIXME: Would like perhaps a precision equivalent to the
+               number of significant digits of the string, in its given
+               base.  */
+            tmp_mpf_set_prec (tmp_mpf_0, strlen(str));
+            if (mpf_set_str (tmp_mpf_0->m, str, 10) == 0)
+              {
+                f = tmp_mpf_0->m;
+                goto get_mpf;
+              }
+            else
+              croak ("GMP::get_str invalid string format");
+          }
+      }
+      break;
+
+    case USE_MPZ:
+      z = SvMPZ(sv)->m;
+    get_mpz:
+      str = mpz_get_str (NULL, base, z);
+    push_str:
+      PUSHs (sv_2mortal (newSVpv (str, 0)));
+      break;
+
+    case USE_MPQ:
+      q = SvMPQ(sv)->m;
+    get_mpq:
+      str = mpq_get_str (NULL, base, q);
+      goto push_str;
+
+    case USE_MPF:
+      f = SvMPF(sv);
+    get_mpf:
+      str = mpf_get_str (NULL, &exp, base, 0, f);
+      PUSHs (sv_2mortal (newSVpv (str, 0)));
+      PUSHs (sv_2mortal (newSViv (exp)));
+      break;
+
+    default:
+      croak ("GMP::get_str invalid argument");
+    }
+
+
+bool
+integer_p (sv)
+    SV *sv
+CODE:
+    switch (use_sv (sv)) {
+    case USE_IVX:
+    case USE_UVX:
+      RETVAL = 1;
+      break;
+
+    case USE_NVX:
+      RETVAL = double_integer_p (SvNVX(sv));
+      break;
+
+    case USE_PVX:
+      {
+        /* FIXME: Maybe this should be done by parsing the string, not by an
+           actual conversion.  */
+        STRLEN len;
+        const char *str = SvPV (sv, len);
+        if (mpq_set_str (tmp_mpq_0, str, 0) == 0)
+          RETVAL = x_mpq_integer_p (tmp_mpq_0);
+        else
+          {
+            /* enough for all digits of the string */
+            tmp_mpf_set_prec (tmp_mpf_0, strlen(str)+64);
+            if (mpf_set_str (tmp_mpf_0->m, str, 10) == 0)
+              RETVAL = mpf_integer_p (tmp_mpf_0->m);
+            else
+              croak ("GMP::integer_p invalid string format");
+          }
+      }
+      break;
+
+    case USE_MPZ:
+      RETVAL = 1;
+      break;
+
+    case USE_MPQ:
+      RETVAL = x_mpq_integer_p (SvMPQ(sv)->m);
+      break;
+
+    case USE_MPF:
+      RETVAL = mpf_integer_p (SvMPF(sv));
+      break;
+
+    default:
+      croak ("GMP::integer_p invalid argument");
+    }
+OUTPUT:
+    RETVAL
+
+
+int
+sgn (sv)
+    SV *sv
+CODE:
+    switch (use_sv (sv)) {
+    case USE_IVX:
+      RETVAL = SGN (SvIVX(sv));
+      break;
+
+    case USE_UVX:
+      RETVAL = (SvUVX(sv) > 0);
+      break;
+
+    case USE_NVX:
+      RETVAL = SGN (SvNVX(sv));
+      break;
+
+    case USE_PVX:
+      {
+        /* FIXME: Maybe this should be done by parsing the string, not by an
+           actual conversion.  */
+        STRLEN len;
+        const char *str = SvPV (sv, len);
+        if (mpq_set_str (tmp_mpq_0, str, 0) == 0)
+          RETVAL = mpq_sgn (tmp_mpq_0);
+        else
+          {
+            /* enough for all digits of the string */
+            tmp_mpf_set_prec (tmp_mpf_0, strlen(str)+64);
+            if (mpf_set_str (tmp_mpf_0->m, str, 10) == 0)
+              RETVAL = mpf_sgn (tmp_mpf_0->m);
+            else
+              croak ("GMP::sgn invalid string format");
+          }
+      }
+      break;
+
+    case USE_MPZ:
+      RETVAL = mpz_sgn (SvMPZ(sv)->m);
+      break;
+
+    case USE_MPQ:
+      RETVAL = mpq_sgn (SvMPQ(sv)->m);
+      break;
+
+    case USE_MPF:
+      RETVAL = mpf_sgn (SvMPF(sv));
+      break;
+
+    default:
+      croak ("GMP::sgn invalid argument");
+    }
+OUTPUT:
+    RETVAL
+
+
+# currently undocumented
+void
+shrink ()
+CODE:
+#define x_mpz_shrink(z) \
+    mpz_set_ui (z, 0L); _mpz_realloc (z, 1)
+#define x_mpq_shrink(q) \
+    x_mpz_shrink (mpq_numref(q)); x_mpz_shrink (mpq_denref(q))
+
+    x_mpz_shrink (tmp_mpz_0);
+    x_mpz_shrink (tmp_mpz_1);
+    x_mpz_shrink (tmp_mpz_2);
+    x_mpq_shrink (tmp_mpq_0);
+    x_mpq_shrink (tmp_mpq_1);
+    tmp_mpf_shrink (tmp_mpf_0);
+    tmp_mpf_shrink (tmp_mpf_1);
+
+
+
+malloced_string
+sprintf_internal (fmt, sv)
+    const_string fmt
+    SV           *sv
+CODE:
+    assert (strlen (fmt) >= 3);
+    assert (SvROK(sv));
+    assert ((sv_derived_from (sv, mpz_class)    && fmt[strlen(fmt)-2] == 'Z')
+            || (sv_derived_from (sv, mpq_class) && fmt[strlen(fmt)-2] == 'Q')
+            || (sv_derived_from (sv, mpf_class) && fmt[strlen(fmt)-2] == 'F'));
+    TRACE (printf ("GMP::sprintf_internal\n");
+           printf ("  fmt  |%s|\n", fmt);
+           printf ("  sv   |%p|\n", SvMPZ(sv)));
+
+    /* cheat a bit here, SvMPZ works for mpq and mpf too */
+    gmp_asprintf (&RETVAL, fmt, SvMPZ(sv));
+
+    TRACE (printf ("  result |%s|\n", RETVAL));
+OUTPUT:
+    RETVAL
+
+
+
+#------------------------------------------------------------------------------
+
+MODULE = GMP         PACKAGE = GMP::Mpz
+
+mpz
+mpz (...)
+ALIAS:
+    GMP::Mpz::new = 1
+PREINIT:
+    SV *sv;
+CODE:
+    TRACE (printf ("%s new, ix=%ld, items=%d\n", mpz_class, ix, (int) items));
+    RETVAL = new_mpz();
+
+    switch (items) {
+    case 0:
+      mpz_set_ui (RETVAL->m, 0L);
+      break;
+
+    case 1:
+      sv = ST(0);
+      TRACE (printf ("  use %d\n", use_sv (sv)));
+      switch (use_sv (sv)) {
+      case USE_IVX:
+        mpz_set_si (RETVAL->m, SvIVX(sv));
+        break;
+
+      case USE_UVX:
+        mpz_set_ui (RETVAL->m, SvUVX(sv));
+        break;
+
+      case USE_NVX:
+        mpz_set_d (RETVAL->m, SvNVX(sv));
+        break;
+
+      case USE_PVX:
+        my_mpz_set_svstr (RETVAL->m, sv);
+        break;
+
+      case USE_MPZ:
+        mpz_set (RETVAL->m, SvMPZ(sv)->m);
+        break;
+
+      case USE_MPQ:
+        mpz_set_q (RETVAL->m, SvMPQ(sv)->m);
+        break;
+
+      case USE_MPF:
+        mpz_set_f (RETVAL->m, SvMPF(sv));
+        break;
+
+      default:
+        goto invalid;
+      }
+      break;
+
+    default:
+    invalid:
+      croak ("%s new: invalid arguments", mpz_class);
+    }
+OUTPUT:
+    RETVAL
+
+
+void
+overload_constant (str, pv, d1, ...)
+    const_string_assume str
+    SV                  *pv
+    dummy               d1
+PREINIT:
+    mpz z;
+PPCODE:
+    TRACE (printf ("%s constant: %s\n", mpz_class, str));
+    z = new_mpz();
+    if (mpz_set_str (z->m, str, 0) == 0)
+      {
+        PUSHs (MPX_NEWMORTAL (z, mpz_class_hv));
+      }
+    else
+      {
+        free_mpz (z);
+        PUSHs(pv);
+      }
+
+
+mpz
+overload_copy (z, d1, d2)
+    mpz_assume z
+    dummy      d1
+    dummy      d2
+CODE:
+    RETVAL = new_mpz();
+    mpz_set (RETVAL->m, z->m);
+OUTPUT:
+    RETVAL
+
+
+void
+DESTROY (z)
+    mpz_assume z
+CODE:
+    TRACE (printf ("%s DESTROY %p\n", mpz_class, z));
+    free_mpz (z);
+
+
+malloced_string
+overload_string (z, d1, d2)
+    mpz_assume z
+    dummy      d1
+    dummy      d2
+CODE:
+    TRACE (printf ("%s overload_string %p\n", mpz_class, z));
+    RETVAL = mpz_get_str (NULL, 10, z->m);
+OUTPUT:
+    RETVAL
+
+
+mpz
+overload_add (xv, yv, order)
+    SV *xv
+    SV *yv
+    SV *order
+ALIAS:
+    GMP::Mpz::overload_sub = 1
+    GMP::Mpz::overload_mul = 2
+    GMP::Mpz::overload_div = 3
+    GMP::Mpz::overload_rem = 4
+    GMP::Mpz::overload_and = 5
+    GMP::Mpz::overload_ior = 6
+    GMP::Mpz::overload_xor = 7
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+    } table[] = {
+      { mpz_add    }, /* 0 */
+      { mpz_sub    }, /* 1 */
+      { mpz_mul    }, /* 2 */
+      { mpz_tdiv_q }, /* 3 */
+      { mpz_tdiv_r }, /* 4 */
+      { mpz_and    }, /* 5 */
+      { mpz_ior    }, /* 6 */
+      { mpz_xor    }, /* 7 */
+    };
+CODE:
+    assert_table (ix);
+    if (order == &PL_sv_yes)
+      SV_PTR_SWAP (xv, yv);
+    RETVAL = new_mpz();
+    (*table[ix].op) (RETVAL->m,
+                     coerce_mpz (tmp_mpz_0, xv),
+                     coerce_mpz (tmp_mpz_1, yv));
+OUTPUT:
+    RETVAL
+
+
+void
+overload_addeq (x, y, o)
+    mpz_assume   x
+    mpz_coerce   y
+    order_noswap o
+ALIAS:
+    GMP::Mpz::overload_subeq = 1
+    GMP::Mpz::overload_muleq = 2
+    GMP::Mpz::overload_diveq = 3
+    GMP::Mpz::overload_remeq = 4
+    GMP::Mpz::overload_andeq = 5
+    GMP::Mpz::overload_ioreq = 6
+    GMP::Mpz::overload_xoreq = 7
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+    } table[] = {
+      { mpz_add    }, /* 0 */
+      { mpz_sub    }, /* 1 */
+      { mpz_mul    }, /* 2 */
+      { mpz_tdiv_q }, /* 3 */
+      { mpz_tdiv_r }, /* 4 */
+      { mpz_and    }, /* 5 */
+      { mpz_ior    }, /* 6 */
+      { mpz_xor    }, /* 7 */
+    };
+PPCODE:
+    assert_table (ix);
+    (*table[ix].op) (x->m, x->m, y);
+    XPUSHs (ST(0));
+
+
+mpz
+overload_lshift (zv, nv, order)
+    SV *zv
+    SV *nv
+    SV *order
+ALIAS:
+    GMP::Mpz::overload_rshift   = 1
+    GMP::Mpz::overload_pow      = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, unsigned long);
+    } table[] = {
+      { mpz_mul_2exp }, /* 0 */
+      { mpz_div_2exp }, /* 1 */
+      { mpz_pow_ui   }, /* 2 */
+    };
+CODE:
+    assert_table (ix);
+    if (order == &PL_sv_yes)
+      SV_PTR_SWAP (zv, nv);
+    RETVAL = new_mpz();
+    (*table[ix].op) (RETVAL->m, coerce_mpz (RETVAL->m, zv), coerce_ulong (nv));
+OUTPUT:
+    RETVAL
+
+
+void
+overload_lshifteq (z, n, o)
+    mpz_assume   z
+    ulong_coerce n
+    order_noswap o
+ALIAS:
+    GMP::Mpz::overload_rshifteq   = 1
+    GMP::Mpz::overload_poweq      = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, unsigned long);
+    } table[] = {
+      { mpz_mul_2exp }, /* 0 */
+      { mpz_div_2exp }, /* 1 */
+      { mpz_pow_ui   }, /* 2 */
+    };
+PPCODE:
+    assert_table (ix);
+    (*table[ix].op) (z->m, z->m, n);
+    XPUSHs(ST(0));
+
+
+mpz
+overload_abs (z, d1, d2)
+    mpz_assume z
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpz::overload_neg  = 1
+    GMP::Mpz::overload_com  = 2
+    GMP::Mpz::overload_sqrt = 3
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr w, mpz_srcptr x);
+    } table[] = {
+      { mpz_abs  }, /* 0 */
+      { mpz_neg  }, /* 1 */
+      { mpz_com  }, /* 2 */
+      { mpz_sqrt }, /* 3 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpz();
+    (*table[ix].op) (RETVAL->m, z->m);
+OUTPUT:
+    RETVAL
+
+
+void
+overload_inc (z, d1, d2)
+    mpz_assume z
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpz::overload_dec = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr w, mpz_srcptr x, unsigned long y);
+    } table[] = {
+      { mpz_add_ui }, /* 0 */
+      { mpz_sub_ui }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    (*table[ix].op) (z->m, z->m, 1L);
+
+
+int
+overload_spaceship (xv, yv, order)
+    SV *xv
+    SV *yv
+    SV *order
+PREINIT:
+    mpz x;
+CODE:
+    TRACE (printf ("%s overload_spaceship\n", mpz_class));
+    MPZ_ASSUME (x, xv);
+    switch (use_sv (yv)) {
+    case USE_IVX:
+      RETVAL = mpz_cmp_si (x->m, SvIVX(yv));
+      break;
+    case USE_UVX:
+      RETVAL = mpz_cmp_ui (x->m, SvUVX(yv));
+      break;
+    case USE_PVX:
+      RETVAL = mpz_cmp (x->m, coerce_mpz (tmp_mpz_0, yv));
+      break;
+    case USE_NVX:
+      RETVAL = mpz_cmp_d (x->m, SvNVX(yv));
+      break;
+    case USE_MPZ:
+      RETVAL = mpz_cmp (x->m, SvMPZ(yv)->m);
+      break;
+    case USE_MPQ:
+      RETVAL = x_mpz_cmp_q (x->m, SvMPQ(yv)->m);
+      break;
+    case USE_MPF:
+      RETVAL = x_mpz_cmp_f (x->m, SvMPF(yv));
+      break;
+    default:
+      croak ("%s <=>: invalid operand", mpz_class);
+    }
+    RETVAL = SGN (RETVAL);
+    if (order == &PL_sv_yes)
+      RETVAL = -RETVAL;
+OUTPUT:
+    RETVAL
+
+
+bool
+overload_bool (z, d1, d2)
+    mpz_assume z
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpz::overload_not = 1
+CODE:
+    RETVAL = (mpz_sgn (z->m) != 0) ^ ix;
+OUTPUT:
+    RETVAL
+
+
+mpz
+bin (n, k)
+    mpz_coerce   n
+    ulong_coerce k
+ALIAS:
+    GMP::Mpz::root = 1
+PREINIT:
+    /* mpz_root returns an int, hence the cast */
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, unsigned long);
+    } table[] = {
+      {                                                mpz_bin_ui }, /* 0 */
+      { (void (*)(mpz_ptr, mpz_srcptr, unsigned long)) mpz_root   }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpz();
+    (*table[ix].op) (RETVAL->m, n, k);
+OUTPUT:
+    RETVAL
+
+
+void
+cdiv (a, d)
+    mpz_coerce a
+    mpz_coerce d
+ALIAS:
+    GMP::Mpz::fdiv = 1
+    GMP::Mpz::tdiv = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+    } table[] = {
+      { mpz_cdiv_qr }, /* 0 */
+      { mpz_fdiv_qr }, /* 1 */
+      { mpz_tdiv_qr }, /* 2 */
+    };
+    mpz q, r;
+PPCODE:
+    assert_table (ix);
+    q = new_mpz();
+    r = new_mpz();
+    (*table[ix].op) (q->m, r->m, a, d);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (q, mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (r, mpz_class_hv));
+
+
+void
+cdiv_2exp (a, d)
+    mpz_coerce   a
+    ulong_coerce d
+ALIAS:
+    GMP::Mpz::fdiv_2exp = 1
+    GMP::Mpz::tdiv_2exp = 2
+PREINIT:
+    static_functable const struct {
+      void (*q) (mpz_ptr, mpz_srcptr, unsigned long);
+      void (*r) (mpz_ptr, mpz_srcptr, unsigned long);
+    } table[] = {
+      { mpz_cdiv_q_2exp, mpz_cdiv_r_2exp }, /* 0 */
+      { mpz_fdiv_q_2exp, mpz_fdiv_r_2exp }, /* 1 */
+      { mpz_tdiv_q_2exp, mpz_tdiv_r_2exp }, /* 2 */
+    };
+    mpz q, r;
+PPCODE:
+    assert_table (ix);
+    q = new_mpz();
+    r = new_mpz();
+    (*table[ix].q) (q->m, a, d);
+    (*table[ix].r) (r->m, a, d);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (q, mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (r, mpz_class_hv));
+
+
+bool
+congruent_p (a, c, d)
+    mpz_coerce a
+    mpz_coerce c
+    mpz_coerce d
+PREINIT:
+CODE:
+    RETVAL = mpz_congruent_p (a, c, d);
+OUTPUT:
+    RETVAL
+
+
+bool
+congruent_2exp_p (a, c, d)
+    mpz_coerce   a
+    mpz_coerce   c
+    ulong_coerce d
+PREINIT:
+CODE:
+    RETVAL = mpz_congruent_2exp_p (a, c, d);
+OUTPUT:
+    RETVAL
+
+
+mpz
+divexact (a, d)
+    mpz_coerce a
+    mpz_coerce d
+ALIAS:
+    GMP::Mpz::mod = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+    } table[] = {
+      { mpz_divexact }, /* 0 */
+      { mpz_mod      }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpz();
+    (*table[ix].op) (RETVAL->m, a, d);
+OUTPUT:
+    RETVAL
+
+
+bool
+divisible_p (a, d)
+    mpz_coerce a
+    mpz_coerce d
+CODE:
+    RETVAL = mpz_divisible_p (a, d);
+OUTPUT:
+    RETVAL
+
+
+bool
+divisible_2exp_p (a, d)
+    mpz_coerce   a
+    ulong_coerce d
+CODE:
+    RETVAL = mpz_divisible_2exp_p (a, d);
+OUTPUT:
+    RETVAL
+
+
+bool
+even_p (z)
+    mpz_coerce z
+ALIAS:
+    GMP::Mpz::odd_p            = 1
+    GMP::Mpz::perfect_square_p = 2
+    GMP::Mpz::perfect_power_p  = 3
+PREINIT:
+    static_functable const struct {
+      int (*op) (mpz_srcptr z);
+    } table[] = {
+      { x_mpz_even_p         }, /* 0 */
+      { x_mpz_odd_p          }, /* 1 */
+      { mpz_perfect_square_p }, /* 2 */
+      { mpz_perfect_power_p  }, /* 3 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = (*table[ix].op) (z);
+OUTPUT:
+    RETVAL
+
+
+mpz
+fac (n)
+    ulong_coerce n
+ALIAS:
+    GMP::Mpz::fib    = 1
+    GMP::Mpz::lucnum = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr r, unsigned long n);
+    } table[] = {
+      { mpz_fac_ui },    /* 0 */
+      { mpz_fib_ui },    /* 1 */
+      { mpz_lucnum_ui }, /* 2 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpz();
+    (*table[ix].op) (RETVAL->m, n);
+OUTPUT:
+    RETVAL
+
+
+void
+fib2 (n)
+    ulong_coerce n
+ALIAS:
+    GMP::Mpz::lucnum2 = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr r, mpz_ptr r2, unsigned long n);
+    } table[] = {
+      { mpz_fib2_ui },    /* 0 */
+      { mpz_lucnum2_ui }, /* 1 */
+    };
+    mpz  r, r2;
+PPCODE:
+    assert_table (ix);
+    r = new_mpz();
+    r2 = new_mpz();
+    (*table[ix].op) (r->m, r2->m, n);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (r,  mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (r2, mpz_class_hv));
+
+
+mpz
+gcd (x, ...)
+    mpz_coerce x
+ALIAS:
+    GMP::Mpz::lcm = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr w, mpz_srcptr x, mpz_srcptr y);
+      void (*op_ui) (mpz_ptr w, mpz_srcptr x, unsigned long y);
+    } table[] = {
+      /* cast to ignore ulong return from mpz_gcd_ui */
+      { mpz_gcd,
+        (void (*) (mpz_ptr, mpz_srcptr, unsigned long)) mpz_gcd_ui }, /* 0 */
+      { mpz_lcm, mpz_lcm_ui },                                        /* 1 */
+    };
+    int  i;
+    SV   *yv;
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpz();
+    if (items == 1)
+      mpz_set (RETVAL->m, x);
+    else
+      {
+        for (i = 1; i < items; i++)
+          {
+            yv = ST(i);
+            if (SvIOK(yv))
+              (*table[ix].op_ui) (RETVAL->m, x, ABS(SvIVX(yv)));
+            else
+              (*table[ix].op) (RETVAL->m, x, coerce_mpz (tmp_mpz_1, yv));
+            x = RETVAL->m;
+          }
+      }
+OUTPUT:
+    RETVAL
+
+
+void
+gcdext (a, b)
+    mpz_coerce a
+    mpz_coerce b
+PREINIT:
+    mpz g, x, y;
+    SV  *sv;
+PPCODE:
+    g = new_mpz();
+    x = new_mpz();
+    y = new_mpz();
+    mpz_gcdext (g->m, x->m, y->m, a, b);
+    EXTEND (SP, 3);
+    PUSHs (MPX_NEWMORTAL (g, mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (x, mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (y, mpz_class_hv));
+
+
+unsigned long
+hamdist (x, y)
+    mpz_coerce x
+    mpz_coerce y
+CODE:
+    RETVAL = mpz_hamdist (x, y);
+OUTPUT:
+    RETVAL
+
+
+mpz
+invert (a, m)
+    mpz_coerce a
+    mpz_coerce m
+CODE:
+    RETVAL = new_mpz();
+    if (! mpz_invert (RETVAL->m, a, m))
+      {
+        free_mpz (RETVAL);
+        XSRETURN_UNDEF;
+      }
+OUTPUT:
+    RETVAL
+
+
+int
+jacobi (a, b)
+    mpz_coerce a
+    mpz_coerce b
+CODE:
+    RETVAL = mpz_jacobi (a, b);
+OUTPUT:
+    RETVAL
+
+
+int
+kronecker (a, b)
+    SV *a
+    SV *b
+CODE:
+    if (SvIOK(b))
+      RETVAL = mpz_kronecker_si (coerce_mpz(tmp_mpz_0,a), SvIVX(b));
+    else if (SvIOK(a))
+      RETVAL = mpz_si_kronecker (SvIVX(a), coerce_mpz(tmp_mpz_0,b));
+    else
+      RETVAL = mpz_kronecker (coerce_mpz(tmp_mpz_0,a),
+                              coerce_mpz(tmp_mpz_1,b));
+OUTPUT:
+    RETVAL
+
+
+void
+mpz_export (order, size, endian, nails, z)
+    int        order
+    size_t     size
+    int        endian
+    size_t     nails
+    mpz_coerce z
+PREINIT:
+    size_t  numb, count, bytes, actual_count;
+    char    *data;
+    SV      *sv;
+PPCODE:
+    numb = 8*size - nails;
+    count = (mpz_sizeinbase (z, 2) + numb-1) / numb;
+    bytes = count * size;
+    New (GMP_MALLOC_ID, data, bytes+1, char);
+    mpz_export (data, &actual_count, order, size, endian, nails, z);
+    assert (count == actual_count);
+    data[bytes] = '\0';
+    sv = sv_newmortal(); sv_usepvn_mg (sv, data, bytes); PUSHs(sv);
+
+
+mpz
+mpz_import (order, size, endian, nails, sv)
+    int     order
+    size_t  size
+    int     endian
+    size_t  nails
+    SV      *sv
+PREINIT:
+    size_t      count;
+    const char  *data;
+    STRLEN      len;
+CODE:
+    data = SvPV (sv, len);
+    if ((len % size) != 0)
+      croak ("%s mpz_import: string not a multiple of the given size",
+             mpz_class);
+    count = len / size;
+    RETVAL = new_mpz();
+    mpz_import (RETVAL->m, count, order, size, endian, nails, data);
+OUTPUT:
+    RETVAL
+
+
+mpz
+nextprime (z)
+    mpz_coerce z
+CODE:
+    RETVAL = new_mpz();
+    mpz_nextprime (RETVAL->m, z);
+OUTPUT:
+    RETVAL
+
+
+unsigned long
+popcount (x)
+    mpz_coerce x
+CODE:
+    RETVAL = mpz_popcount (x);
+OUTPUT:
+    RETVAL
+
+
+mpz
+powm (b, e, m)
+    mpz_coerce b
+    mpz_coerce e
+    mpz_coerce m
+CODE:
+    RETVAL = new_mpz();
+    mpz_powm (RETVAL->m, b, e, m);
+OUTPUT:
+    RETVAL
+
+
+bool
+probab_prime_p (z, n)
+    mpz_coerce   z
+    ulong_coerce n
+CODE:
+    RETVAL = mpz_probab_prime_p (z, n);
+OUTPUT:
+    RETVAL
+
+
+# No attempt to coerce here, only an mpz makes sense.
+void
+realloc (z, limbs)
+    mpz z
+    int limbs
+CODE:
+    _mpz_realloc (z->m, limbs);
+
+
+void
+remove (z, f)
+    mpz_coerce z
+    mpz_coerce f
+PREINIT:
+    SV             *sv;
+    mpz            rem;
+    unsigned long  mult;
+PPCODE:
+    rem = new_mpz();
+    mult = mpz_remove (rem->m, z, f);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (rem, mpz_class_hv));
+    PUSHs (sv_2mortal (newSViv (mult)));
+
+
+void
+roote (z, n)
+    mpz_coerce   z
+    ulong_coerce n
+PREINIT:
+    SV  *sv;
+    mpz root;
+    int exact;
+PPCODE:
+    root = new_mpz();
+    exact = mpz_root (root->m, z, n);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (root, mpz_class_hv));
+    sv = (exact ? &PL_sv_yes : &PL_sv_no); sv_2mortal(sv); PUSHs(sv);
+
+
+void
+rootrem (z, n)
+    mpz_coerce   z
+    ulong_coerce n
+PREINIT:
+    SV  *sv;
+    mpz root;
+    mpz rem;
+PPCODE:
+    root = new_mpz();
+    rem = new_mpz();
+    mpz_rootrem (root->m, rem->m, z, n);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (root, mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (rem,  mpz_class_hv));
+
+
+# In the past scan0 and scan1 were described as returning ULONG_MAX which
+# could be obtained in perl with ~0.  That wasn't true on 64-bit systems
+# (eg. alpha) with perl 5.005, since in that version IV and UV were still
+# 32-bits.
+#
+# We changed in gmp 4.2 to just say ~0 for the not-found return.  It's
+# likely most people have used ~0 rather than POSIX::ULONG_MAX(), so this
+# change should match existing usage.  It only actually makes a difference
+# in old perl, since recent versions have gone to 64-bits for IV and UV, the
+# same as a ulong.
+#
+# In perl 5.005 we explicitly mask the mpz return down to 32-bits to get ~0.
+# UV_MAX is no good, it reflects the size of the UV type (64-bits), rather
+# than the size of the values one ought to be storing in an SV (32-bits).
+
+gmp_UV
+scan0 (z, start)
+    mpz_coerce   z
+    ulong_coerce start
+ALIAS:
+    GMP::Mpz::scan1 = 1
+PREINIT:
+    static_functable const struct {
+      unsigned long (*op) (mpz_srcptr, unsigned long);
+    } table[] = {
+      { mpz_scan0  }, /* 0 */
+      { mpz_scan1  }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = (*table[ix].op) (z, start);
+    if (PERL_LT (5,6))
+      RETVAL &= 0xFFFFFFFF;
+OUTPUT:
+    RETVAL
+
+
+void
+setbit (sv, bit)
+    SV           *sv
+    ulong_coerce bit
+ALIAS:
+    GMP::Mpz::clrbit = 1
+    GMP::Mpz::combit = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, unsigned long);
+    } table[] = {
+      { mpz_setbit }, /* 0 */
+      { mpz_clrbit }, /* 1 */
+      { mpz_combit }, /* 2 */
+    };
+    int  use;
+    mpz  z;
+CODE:
+    use = use_sv (sv);
+    if (use == USE_MPZ && SvREFCNT(SvRV(sv)) == 1 && ! SvSMAGICAL(sv))
+      {
+        /* our operand is a non-magical mpz with a reference count of 1, so
+           we can just modify it */
+        (*table[ix].op) (SvMPZ(sv)->m, bit);
+      }
+    else
+      {
+        /* otherwise we need to make a new mpz, from whatever we have, and
+           operate on that, possibly invoking magic when storing back */
+        SV   *new_sv;
+        mpz  z = new_mpz ();
+        mpz_ptr  coerce_ptr = coerce_mpz_using (z->m, sv, use);
+        if (coerce_ptr != z->m)
+          mpz_set (z->m, coerce_ptr);
+        (*table[ix].op) (z->m, bit);
+        new_sv = sv_bless (sv_setref_pv (sv_newmortal(), NULL, z),
+                           mpz_class_hv);
+        SvSetMagicSV (sv, new_sv);
+      }
+
+
+void
+sqrtrem (z)
+    mpz_coerce z
+PREINIT:
+    SV  *sv;
+    mpz root;
+    mpz rem;
+PPCODE:
+    root = new_mpz();
+    rem = new_mpz();
+    mpz_sqrtrem (root->m, rem->m, z);
+    EXTEND (SP, 2);
+    PUSHs (MPX_NEWMORTAL (root, mpz_class_hv));
+    PUSHs (MPX_NEWMORTAL (rem,  mpz_class_hv));
+
+
+size_t
+sizeinbase (z, base)
+    mpz_coerce z
+    int        base
+CODE:
+    RETVAL = mpz_sizeinbase (z, base);
+OUTPUT:
+    RETVAL
+
+
+int
+tstbit (z, bit)
+    mpz_coerce   z
+    ulong_coerce bit
+CODE:
+    RETVAL = mpz_tstbit (z, bit);
+OUTPUT:
+    RETVAL
+
+
+
+#------------------------------------------------------------------------------
+
+MODULE = GMP         PACKAGE = GMP::Mpq
+
+
+mpq
+mpq (...)
+ALIAS:
+    GMP::Mpq::new = 1
+CODE:
+    TRACE (printf ("%s new, ix=%ld, items=%d\n", mpq_class, ix, (int) items));
+    RETVAL = new_mpq();
+    switch (items) {
+    case 0:
+      mpq_set_ui (RETVAL->m, 0L, 1L);
+      break;
+    case 1:
+      {
+        mpq_ptr rp = RETVAL->m;
+        mpq_ptr cp = coerce_mpq (rp, ST(0));
+        if (cp != rp)
+          mpq_set (rp, cp);
+      }
+      break;
+    case 2:
+      {
+        mpz_ptr rp, cp;
+        rp = mpq_numref (RETVAL->m);
+        cp = coerce_mpz (rp, ST(0));
+        if (cp != rp)
+          mpz_set (rp, cp);
+        rp = mpq_denref (RETVAL->m);
+        cp = coerce_mpz (rp, ST(1));
+        if (cp != rp)
+          mpz_set (rp, cp);
+      }
+      break;
+    default:
+      croak ("%s new: invalid arguments", mpq_class);
+    }
+OUTPUT:
+    RETVAL
+
+
+void
+overload_constant (str, pv, d1, ...)
+    const_string_assume str
+    SV                  *pv
+    dummy               d1
+PREINIT:
+    SV  *sv;
+    mpq q;
+PPCODE:
+    TRACE (printf ("%s constant: %s\n", mpq_class, str));
+    q = new_mpq();
+    if (mpq_set_str (q->m, str, 0) == 0)
+      { sv = sv_bless (sv_setref_pv (sv_newmortal(), NULL, q), mpq_class_hv); }
+    else
+      { free_mpq (q); sv = pv; }
+    XPUSHs(sv);
+
+
+mpq
+overload_copy (q, d1, d2)
+    mpq_assume q
+    dummy      d1
+    dummy      d2
+CODE:
+    RETVAL = new_mpq();
+    mpq_set (RETVAL->m, q->m);
+OUTPUT:
+    RETVAL
+
+
+void
+DESTROY (q)
+    mpq_assume q
+CODE:
+    TRACE (printf ("%s DESTROY %p\n", mpq_class, q));
+    free_mpq (q);
+
+
+malloced_string
+overload_string (q, d1, d2)
+    mpq_assume q
+    dummy      d1
+    dummy      d2
+CODE:
+    TRACE (printf ("%s overload_string %p\n", mpq_class, q));
+    RETVAL = mpq_get_str (NULL, 10, q->m);
+OUTPUT:
+    RETVAL
+
+
+mpq
+overload_add (xv, yv, order)
+    SV *xv
+    SV *yv
+    SV *order
+ALIAS:
+    GMP::Mpq::overload_sub   = 1
+    GMP::Mpq::overload_mul   = 2
+    GMP::Mpq::overload_div   = 3
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpq_ptr, mpq_srcptr, mpq_srcptr);
+    } table[] = {
+      { mpq_add }, /* 0 */
+      { mpq_sub }, /* 1 */
+      { mpq_mul }, /* 2 */
+      { mpq_div }, /* 3 */
+    };
+CODE:
+    TRACE (printf ("%s binary\n", mpf_class));
+    assert_table (ix);
+    if (order == &PL_sv_yes)
+      SV_PTR_SWAP (xv, yv);
+    RETVAL = new_mpq();
+    (*table[ix].op) (RETVAL->m,
+                     coerce_mpq (tmp_mpq_0, xv),
+                     coerce_mpq (tmp_mpq_1, yv));
+OUTPUT:
+    RETVAL
+
+
+void
+overload_addeq (x, y, o)
+    mpq_assume   x
+    mpq_coerce   y
+    order_noswap o
+ALIAS:
+    GMP::Mpq::overload_subeq = 1
+    GMP::Mpq::overload_muleq = 2
+    GMP::Mpq::overload_diveq = 3
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpq_ptr, mpq_srcptr, mpq_srcptr);
+    } table[] = {
+      { mpq_add    }, /* 0 */
+      { mpq_sub    }, /* 1 */
+      { mpq_mul    }, /* 2 */
+      { mpq_div    }, /* 3 */
+    };
+PPCODE:
+    assert_table (ix);
+    (*table[ix].op) (x->m, x->m, y);
+    XPUSHs(ST(0));
+
+
+mpq
+overload_lshift (qv, nv, order)
+    SV *qv
+    SV *nv
+    SV *order
+ALIAS:
+    GMP::Mpq::overload_rshift   = 1
+    GMP::Mpq::overload_pow      = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpq_ptr, mpq_srcptr, unsigned long);
+    } table[] = {
+      { mpq_mul_2exp }, /* 0 */
+      { mpq_div_2exp }, /* 1 */
+      { x_mpq_pow_ui }, /* 2 */
+    };
+CODE:
+    assert_table (ix);
+    if (order == &PL_sv_yes)
+      SV_PTR_SWAP (qv, nv);
+    RETVAL = new_mpq();
+    (*table[ix].op) (RETVAL->m, coerce_mpq (RETVAL->m, qv), coerce_ulong (nv));
+OUTPUT:
+    RETVAL
+
+
+void
+overload_lshifteq (q, n, o)
+    mpq_assume   q
+    ulong_coerce n
+    order_noswap o
+ALIAS:
+    GMP::Mpq::overload_rshifteq   = 1
+    GMP::Mpq::overload_poweq      = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpq_ptr, mpq_srcptr, unsigned long);
+    } table[] = {
+      { mpq_mul_2exp }, /* 0 */
+      { mpq_div_2exp }, /* 1 */
+      { x_mpq_pow_ui }, /* 2 */
+    };
+PPCODE:
+    assert_table (ix);
+    (*table[ix].op) (q->m, q->m, n);
+    XPUSHs(ST(0));
+
+
+void
+overload_inc (q, d1, d2)
+    mpq_assume q
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpq::overload_dec = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+    } table[] = {
+      { mpz_add }, /* 0 */
+      { mpz_sub }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    (*table[ix].op) (mpq_numref(q->m), mpq_numref(q->m), mpq_denref(q->m));
+
+
+mpq
+overload_abs (q, d1, d2)
+    mpq_assume q
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpq::overload_neg = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpq_ptr w, mpq_srcptr x);
+    } table[] = {
+      { mpq_abs }, /* 0 */
+      { mpq_neg }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpq();
+    (*table[ix].op) (RETVAL->m, q->m);
+OUTPUT:
+    RETVAL
+
+
+int
+overload_spaceship (x, y, order)
+    mpq_assume x
+    mpq_coerce y
+    SV         *order
+CODE:
+    RETVAL = mpq_cmp (x->m, y);
+    RETVAL = SGN (RETVAL);
+    if (order == &PL_sv_yes)
+      RETVAL = -RETVAL;
+OUTPUT:
+    RETVAL
+
+
+bool
+overload_bool (q, d1, d2)
+    mpq_assume q
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpq::overload_not = 1
+CODE:
+    RETVAL = (mpq_sgn (q->m) != 0) ^ ix;
+OUTPUT:
+    RETVAL
+
+
+bool
+overload_eq (x, yv, d)
+    mpq_assume x
+    SV         *yv
+    dummy      d
+ALIAS:
+    GMP::Mpq::overload_ne = 1
+PREINIT:
+    int  use;
+CODE:
+    use = use_sv (yv);
+    switch (use) {
+    case USE_IVX:
+    case USE_UVX:
+    case USE_MPZ:
+      RETVAL = 0;
+      if (x_mpq_integer_p (x->m))
+        {
+          switch (use) {
+          case USE_IVX:
+            RETVAL = (mpz_cmp_si (mpq_numref(x->m), SvIVX(yv)) == 0);
+            break;
+          case USE_UVX:
+            RETVAL = (mpz_cmp_ui (mpq_numref(x->m), SvUVX(yv)) == 0);
+            break;
+          case USE_MPZ:
+            RETVAL = (mpz_cmp (mpq_numref(x->m), SvMPZ(yv)->m) == 0);
+            break;
+          }
+        }
+      break;
+
+    case USE_MPQ:
+      RETVAL = (mpq_equal (x->m, SvMPQ(yv)->m) != 0);
+      break;
+
+    default:
+      RETVAL = (mpq_equal (x->m, coerce_mpq_using (tmp_mpq_0, yv, use)) != 0);
+      break;
+    }
+    RETVAL ^= ix;
+OUTPUT:
+    RETVAL
+
+
+void
+canonicalize (q)
+    mpq q
+CODE:
+    mpq_canonicalize (q->m);
+
+
+mpq
+inv (q)
+    mpq_coerce q
+CODE:
+    RETVAL = new_mpq();
+    mpq_inv (RETVAL->m, q);
+OUTPUT:
+    RETVAL
+
+
+mpz
+num (q)
+    mpq q
+ALIAS:
+    GMP::Mpq::den = 1
+CODE:
+    RETVAL = new_mpz();
+    mpz_set (RETVAL->m, (ix == 0 ? mpq_numref(q->m) : mpq_denref(q->m)));
+OUTPUT:
+    RETVAL
+
+
+
+#------------------------------------------------------------------------------
+
+MODULE = GMP         PACKAGE = GMP::Mpf
+
+
+mpf
+mpf (...)
+ALIAS:
+    GMP::Mpf::new = 1
+PREINIT:
+    unsigned long  prec;
+CODE:
+    TRACE (printf ("%s new\n", mpf_class));
+    if (items > 2)
+      croak ("%s new: invalid arguments", mpf_class);
+    prec = (items == 2 ? coerce_ulong (ST(1)) : mpf_get_default_prec());
+    RETVAL = new_mpf (prec);
+    if (items >= 1)
+      {
+        SV *sv = ST(0);
+        my_mpf_set_sv_using (RETVAL, sv, use_sv(sv));
+      }
+OUTPUT:
+    RETVAL
+
+
+mpf
+overload_constant (sv, d1, d2, ...)
+    SV     *sv
+    dummy  d1
+    dummy  d2
+CODE:
+    assert (SvPOK (sv));
+    TRACE (printf ("%s constant: %s\n", mpq_class, SvPVX(sv)));
+    RETVAL = new_mpf (mpf_get_default_prec());
+    my_mpf_set_svstr (RETVAL, sv);
+OUTPUT:
+    RETVAL
+
+
+mpf
+overload_copy (f, d1, d2)
+    mpf_assume f
+    dummy      d1
+    dummy      d2
+CODE:
+    TRACE (printf ("%s copy\n", mpf_class));
+    RETVAL = new_mpf (mpf_get_prec (f));
+    mpf_set (RETVAL, f);
+OUTPUT:
+    RETVAL
+
+
+void
+DESTROY (f)
+    mpf_assume f
+CODE:
+    TRACE (printf ("%s DESTROY %p\n", mpf_class, f));
+    mpf_clear (f);
+    Safefree (f);
+    assert_support (mpf_count--);
+    TRACE_ACTIVE ();
+
+
+mpf
+overload_add (x, y, order)
+    mpf_assume     x
+    mpf_coerce_st0 y
+    SV             *order
+ALIAS:
+    GMP::Mpf::overload_sub   = 1
+    GMP::Mpf::overload_mul   = 2
+    GMP::Mpf::overload_div   = 3
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr, mpf_srcptr, mpf_srcptr);
+    } table[] = {
+      { mpf_add }, /* 0 */
+      { mpf_sub }, /* 1 */
+      { mpf_mul }, /* 2 */
+      { mpf_div }, /* 3 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpf (mpf_get_prec (x));
+    if (order == &PL_sv_yes)
+      MPF_PTR_SWAP (x, y);
+    (*table[ix].op) (RETVAL, x, y);
+OUTPUT:
+    RETVAL
+
+
+void
+overload_addeq (x, y, o)
+    mpf_assume     x
+    mpf_coerce_st0 y
+    order_noswap   o
+ALIAS:
+    GMP::Mpf::overload_subeq = 1
+    GMP::Mpf::overload_muleq = 2
+    GMP::Mpf::overload_diveq = 3
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr, mpf_srcptr, mpf_srcptr);
+    } table[] = {
+      { mpf_add }, /* 0 */
+      { mpf_sub }, /* 1 */
+      { mpf_mul }, /* 2 */
+      { mpf_div }, /* 3 */
+    };
+PPCODE:
+    assert_table (ix);
+    (*table[ix].op) (x, x, y);
+    XPUSHs(ST(0));
+
+
+mpf
+overload_lshift (fv, nv, order)
+    SV *fv
+    SV *nv
+    SV *order
+ALIAS:
+    GMP::Mpf::overload_rshift = 1
+    GMP::Mpf::overload_pow    = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr, mpf_srcptr, unsigned long);
+    } table[] = {
+      { mpf_mul_2exp }, /* 0 */
+      { mpf_div_2exp }, /* 1 */
+      { mpf_pow_ui   }, /* 2 */
+    };
+    mpf f;
+    unsigned long prec;
+CODE:
+    assert_table (ix);
+    MPF_ASSUME (f, fv);
+    prec = mpf_get_prec (f);
+    if (order == &PL_sv_yes)
+      SV_PTR_SWAP (fv, nv);
+    f = coerce_mpf (tmp_mpf_0, fv, prec);
+    RETVAL = new_mpf (prec);
+    (*table[ix].op) (RETVAL, f, coerce_ulong (nv));
+OUTPUT:
+    RETVAL
+
+
+void
+overload_lshifteq (f, n, o)
+    mpf_assume   f
+    ulong_coerce n
+    order_noswap o
+ALIAS:
+    GMP::Mpf::overload_rshifteq   = 1
+    GMP::Mpf::overload_poweq      = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr, mpf_srcptr, unsigned long);
+    } table[] = {
+      { mpf_mul_2exp }, /* 0 */
+      { mpf_div_2exp }, /* 1 */
+      { mpf_pow_ui   }, /* 2 */
+    };
+PPCODE:
+    assert_table (ix);
+    (*table[ix].op) (f, f, n);
+    XPUSHs(ST(0));
+
+
+mpf
+overload_abs (f, d1, d2)
+    mpf_assume f
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpf::overload_neg   = 1
+    GMP::Mpf::overload_sqrt  = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr w, mpf_srcptr x);
+    } table[] = {
+      { mpf_abs  }, /* 0 */
+      { mpf_neg  }, /* 1 */
+      { mpf_sqrt }, /* 2 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpf (mpf_get_prec (f));
+    (*table[ix].op) (RETVAL, f);
+OUTPUT:
+    RETVAL
+
+
+void
+overload_inc (f, d1, d2)
+    mpf_assume f
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpf::overload_dec = 1
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr w, mpf_srcptr x, unsigned long y);
+    } table[] = {
+      { mpf_add_ui }, /* 0 */
+      { mpf_sub_ui }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    (*table[ix].op) (f, f, 1L);
+
+
+int
+overload_spaceship (xv, yv, order)
+    SV *xv
+    SV *yv
+    SV *order
+PREINIT:
+    mpf x;
+CODE:
+    MPF_ASSUME (x, xv);
+    switch (use_sv (yv)) {
+    case USE_IVX:
+      RETVAL = mpf_cmp_si (x, SvIVX(yv));
+      break;
+    case USE_UVX:
+      RETVAL = mpf_cmp_ui (x, SvUVX(yv));
+      break;
+    case USE_NVX:
+      RETVAL = mpf_cmp_d (x, SvNVX(yv));
+      break;
+    case USE_PVX:
+      {
+        STRLEN len;
+        const char *str = SvPV (yv, len);
+        /* enough for all digits of the string */
+        tmp_mpf_set_prec (tmp_mpf_0, strlen(str)+64);
+        if (mpf_set_str (tmp_mpf_0->m, str, 10) != 0)
+          croak ("%s <=>: invalid string format", mpf_class);
+        RETVAL = mpf_cmp (x, tmp_mpf_0->m);
+      }
+      break;
+    case USE_MPZ:
+      RETVAL = - x_mpz_cmp_f (SvMPZ(yv)->m, x);
+      break;
+    case USE_MPF:
+      RETVAL = mpf_cmp (x, SvMPF(yv));
+      break;
+    default:
+      RETVAL = mpq_cmp (coerce_mpq (tmp_mpq_0, xv),
+                        coerce_mpq (tmp_mpq_1, yv));
+      break;
+    }
+    RETVAL = SGN (RETVAL);
+    if (order == &PL_sv_yes)
+      RETVAL = -RETVAL;
+OUTPUT:
+    RETVAL
+
+
+bool
+overload_bool (f, d1, d2)
+    mpf_assume f
+    dummy      d1
+    dummy      d2
+ALIAS:
+    GMP::Mpf::overload_not = 1
+CODE:
+    RETVAL = (mpf_sgn (f) != 0) ^ ix;
+OUTPUT:
+    RETVAL
+
+
+mpf
+ceil (f)
+    mpf_coerce_def f
+ALIAS:
+    GMP::Mpf::floor = 1
+    GMP::Mpf::trunc = 2
+PREINIT:
+    static_functable const struct {
+      void (*op) (mpf_ptr w, mpf_srcptr x);
+    } table[] = {
+      { mpf_ceil  }, /* 0 */
+      { mpf_floor }, /* 1 */
+      { mpf_trunc }, /* 2 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpf (mpf_get_prec (f));
+    (*table[ix].op) (RETVAL, f);
+OUTPUT:
+    RETVAL
+
+
+unsigned long
+get_default_prec ()
+CODE:
+    RETVAL = mpf_get_default_prec();
+OUTPUT:
+    RETVAL
+
+
+unsigned long
+get_prec (f)
+    mpf_coerce_def f
+CODE:
+    RETVAL = mpf_get_prec (f);
+OUTPUT:
+    RETVAL
+
+
+bool
+mpf_eq (xv, yv, bits)
+    SV           *xv
+    SV           *yv
+    ulong_coerce bits
+PREINIT:
+    mpf  x, y;
+CODE:
+    TRACE (printf ("%s eq\n", mpf_class));
+    coerce_mpf_pair (&x,xv, &y,yv);
+    RETVAL = mpf_eq (x, y, bits);
+OUTPUT:
+    RETVAL
+
+
+mpf
+reldiff (xv, yv)
+    SV *xv
+    SV *yv
+PREINIT:
+    mpf  x, y;
+    unsigned long prec;
+CODE:
+    TRACE (printf ("%s reldiff\n", mpf_class));
+    prec = coerce_mpf_pair (&x,xv, &y,yv);
+    RETVAL = new_mpf (prec);
+    mpf_reldiff (RETVAL, x, y);
+OUTPUT:
+    RETVAL
+
+
+void
+set_default_prec (prec)
+    ulong_coerce prec
+CODE:
+    TRACE (printf ("%s set_default_prec %lu\n", mpf_class, prec));
+    mpf_set_default_prec (prec);
+
+
+void
+set_prec (sv, prec)
+    SV           *sv
+    ulong_coerce prec
+PREINIT:
+    mpf_ptr  old_f, new_f;
+    int      use;
+CODE:
+    TRACE (printf ("%s set_prec to %lu\n", mpf_class, prec));
+    use = use_sv (sv);
+    if (use == USE_MPF)
+      {
+        old_f = SvMPF(sv);
+        if (SvREFCNT(SvRV(sv)) == 1)
+          mpf_set_prec (old_f, prec);
+        else
+          {
+            TRACE (printf ("  fork new mpf\n"));
+            new_f = new_mpf (prec);
+            mpf_set (new_f, old_f);
+            goto setref;
+          }
+      }
+    else
+      {
+        TRACE (printf ("  coerce to mpf\n"));
+        new_f = new_mpf (prec);
+        my_mpf_set_sv_using (new_f, sv, use);
+      setref:
+        sv_bless (sv_setref_pv (sv, NULL, new_f), mpf_class_hv);
+      }
+
+
+
+#------------------------------------------------------------------------------
+
+MODULE = GMP         PACKAGE = GMP::Rand
+
+randstate
+new (...)
+ALIAS:
+    GMP::Rand::randstate = 1
+CODE:
+    TRACE (printf ("%s new\n", rand_class));
+    New (GMP_MALLOC_ID, RETVAL, 1, __gmp_randstate_struct);
+    TRACE (printf ("  RETVAL %p\n", RETVAL));
+    assert_support (rand_count++);
+    TRACE_ACTIVE ();
+
+    if (items == 0)
+      {
+        gmp_randinit_default (RETVAL);
+      }
+    else
+      {
+        if (SvROK (ST(0)) && sv_derived_from (ST(0), rand_class))
+          {
+            if (items != 1)
+              goto invalid;
+            gmp_randinit_set (RETVAL, SvRANDSTATE (ST(0)));
+          }
+        else
+          {
+            STRLEN      len;
+            const char  *method = SvPV (ST(0), len);
+            assert (len == strlen (method));
+            if (strcmp (method, "lc_2exp") == 0)
+              {
+                if (items != 4)
+                  goto invalid;
+                gmp_randinit_lc_2exp (RETVAL,
+                                      coerce_mpz (tmp_mpz_0, ST(1)),
+                                      coerce_ulong (ST(2)),
+                                      coerce_ulong (ST(3)));
+              }
+            else if (strcmp (method, "lc_2exp_size") == 0)
+              {
+                if (items != 2)
+                  goto invalid;
+                if (! gmp_randinit_lc_2exp_size (RETVAL, coerce_ulong (ST(1))))
+                  {
+                    Safefree (RETVAL);
+                    XSRETURN_UNDEF;
+                  }
+              }
+            else if (strcmp (method, "mt") == 0)
+              {
+                if (items != 1)
+                  goto invalid;
+                gmp_randinit_mt (RETVAL);
+              }
+            else
+              {
+              invalid:
+                croak ("%s new: invalid arguments", rand_class);
+              }
+          }
+      }
+OUTPUT:
+    RETVAL
+
+
+void
+DESTROY (r)
+    randstate r
+CODE:
+    TRACE (printf ("%s DESTROY\n", rand_class));
+    gmp_randclear (r);
+    Safefree (r);
+    assert_support (rand_count--);
+    TRACE_ACTIVE ();
+
+
+void
+seed (r, z)
+    randstate  r
+    mpz_coerce z
+CODE:
+    gmp_randseed (r, z);
+
+
+mpz
+mpz_urandomb (r, bits)
+    randstate    r
+    ulong_coerce bits
+ALIAS:
+    GMP::Rand::mpz_rrandomb = 1
+PREINIT:
+    static_functable const struct {
+      void (*fun) (mpz_ptr, gmp_randstate_t r, unsigned long bits);
+    } table[] = {
+      { mpz_urandomb }, /* 0 */
+      { mpz_rrandomb }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = new_mpz();
+    (*table[ix].fun) (RETVAL->m, r, bits);
+OUTPUT:
+    RETVAL
+
+
+mpz
+mpz_urandomm (r, m)
+    randstate  r
+    mpz_coerce m
+CODE:
+    RETVAL = new_mpz();
+    mpz_urandomm (RETVAL->m, r, m);
+OUTPUT:
+    RETVAL
+
+
+mpf
+mpf_urandomb (r, bits)
+    randstate    r
+    ulong_coerce bits
+CODE:
+    RETVAL = new_mpf (bits);
+    mpf_urandomb (RETVAL, r, bits);
+OUTPUT:
+    RETVAL
+
+
+unsigned long
+gmp_urandomb_ui (r, bits)
+    randstate    r
+    ulong_coerce bits
+ALIAS:
+    GMP::Rand::gmp_urandomm_ui = 1
+PREINIT:
+    static_functable const struct {
+      unsigned long (*fun) (gmp_randstate_t r, unsigned long bits);
+    } table[] = {
+      { gmp_urandomb_ui }, /* 0 */
+      { gmp_urandomm_ui }, /* 1 */
+    };
+CODE:
+    assert_table (ix);
+    RETVAL = (*table[ix].fun) (r, bits);
+OUTPUT:
+    RETVAL
diff --git a/demos/perl/GMP/Mpf.pm b/demos/perl/GMP/Mpf.pm

new file mode 100644 (file)

index 0000000..ea7b28b
--- /dev/null
+++ b/demos/perl/GMP/Mpf.pm
@@ -0,0 +1,95 @@
+# GMP mpf module.
+
+# Copyright 2001, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+package GMP::Mpf;
+
+require GMP;
+require Exporter;
+@ISA = qw(GMP Exporter);
+@EXPORT = qw();
+@EXPORT_OK = qw();
+%EXPORT_TAGS = ('all' => [qw(
+                            ceil floor get_default_prec get_prec mpf mpf_eq
+                            reldiff set_default_prec set_prec trunc)],
+               'constants'   => [@EXPORT],
+               'noconstants' => [@EXPORT]);
+Exporter::export_ok_tags('all');
+
+use overload
+    '+'   => \&overload_add,     '+='  => \&overload_addeq,
+    '-'   => \&overload_sub,     '-='  => \&overload_subeq,
+    '*'   => \&overload_mul,     '*='  => \&overload_muleq,
+    '/'   => \&overload_div,     '/='  => \&overload_diveq,
+    '**'  => \&overload_pow,     '**=' => \&overload_poweq,
+    '<<'  => \&overload_lshift,  '<<=' => \&overload_lshifteq,
+    '>>'  => \&overload_rshift,  '>>=' => \&overload_rshifteq,
+
+    'bool' => \&overload_bool,
+    'not'  => \&overload_not,
+    '!'    => \&overload_not,
+    '<=>'  => \&overload_spaceship,
+    '++'   => \&overload_inc,
+    '--'   => \&overload_dec,
+    'abs'  => \&overload_abs,
+    'neg'  => \&overload_neg,
+    'sqrt' => \&overload_sqrt,
+    '='    => \&overload_copy,
+    '""'   => \&overload_string;
+
+sub import {
+  foreach (@_) {
+    if ($_ eq ':constants') {
+      overload::constant ('integer' => \&overload_constant,
+                         'binary'  => \&overload_constant,
+                         'float'   => \&overload_constant);
+    } elsif ($_ eq ':noconstants') {
+      overload::remove_constant ('integer' => \&overload_constant,
+                                'binary'  => \&overload_constant,
+                                'float'   => \&overload_constant);
+    }
+  }
+  goto &Exporter::import;
+}
+
+
+sub overload_string {
+  my $fmt;
+  BEGIN { $^W = 0; }
+  if (defined ($#)) {
+    $fmt = $#;
+    BEGIN { $^W = 1; }
+    # protect against calling sprintf_internal with a bad format
+    if ($fmt !~ /^((%%|[^%])*%[-+ .\d]*)([eEfgG](%%|[^%])*)$/) {
+      die "GMP::Mpf: invalid \$# format: $#\n";
+    }
+    $fmt = $1 . 'F' . $3;
+  } else {
+    $fmt = '%.Fg';
+  }
+  GMP::sprintf_internal ($fmt, $_[0]);
+}
+
+1;
+__END__
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/demos/perl/GMP/Mpq.pm b/demos/perl/GMP/Mpq.pm

new file mode 100644 (file)

index 0000000..cd60cbe
--- /dev/null
+++ b/demos/perl/GMP/Mpq.pm
@@ -0,0 +1,78 @@
+# GMP mpq module.
+
+# Copyright 2001 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+package GMP::Mpq;
+
+require GMP;
+require Exporter;
+@ISA = qw(GMP Exporter);
+@EXPORT = qw();
+@EXPORT_OK = qw();
+%EXPORT_TAGS = ('all' => [qw(canonicalize den inv mpq num)],
+               'constants'   => [@EXPORT],
+               'noconstants' => [@EXPORT] );
+Exporter::export_ok_tags('all');
+
+use overload
+    '+'   => \&overload_add,     '+='  => \&overload_addeq,
+    '-'   => \&overload_sub,     '-='  => \&overload_subeq,
+    '*'   => \&overload_mul,     '*='  => \&overload_muleq,
+    '/'   => \&overload_div,     '/='  => \&overload_diveq,
+    '**'  => \&overload_pow,     '**=' => \&overload_poweq,
+    '<<'  => \&overload_lshift,  '<<=' => \&overload_lshifteq,
+    '>>'  => \&overload_rshift,  '>>=' => \&overload_rshifteq,
+
+    'bool' => \&overload_bool,
+    'not'  => \&overload_not,
+    '!'    => \&overload_not,
+    '=='   => \&overload_eq,
+    '!='   => \&overload_ne,
+    '<=>'  => \&overload_spaceship,
+    '++'   => \&overload_inc,
+    '--'   => \&overload_dec,
+    'abs'  => \&overload_abs,
+    'neg'  => \&overload_neg,
+    '='    => \&overload_copy,
+    '""'   => \&overload_string;
+
+my $constants = { };
+
+sub import {
+  foreach (@_) {
+    if ($_ eq ':constants') {
+      overload::constant ('integer' => \&overload_constant,
+                         'binary'  => \&overload_constant,
+                         'float'   => \&overload_constant);
+    } elsif ($_ eq ':noconstants') {
+      overload::remove_constant ('integer' => \&overload_constant,
+                                'binary'  => \&overload_constant,
+                                'float'   => \&overload_constant);
+    }
+  }
+  goto &Exporter::import;
+}
+
+1;
+__END__
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/demos/perl/GMP/Mpz.pm b/demos/perl/GMP/Mpz.pm

new file mode 100644 (file)

index 0000000..a86d4ab
--- /dev/null
+++ b/demos/perl/GMP/Mpz.pm
@@ -0,0 +1,90 @@
+# GMP mpz module.
+
+# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+package GMP::Mpz;
+
+require GMP;
+require Exporter;
+@ISA = qw(GMP Exporter);
+@EXPORT = qw();
+@EXPORT_OK = qw();
+%EXPORT_TAGS = ('all' => [qw(
+                            bin cdiv cdiv_2exp clrbit combit congruent_p
+                            congruent_2exp_p divexact divisible_p
+                            divisible_2exp_p even_p fac fdiv fdiv_2exp fib
+                            fib2 gcd gcdext hamdist invert jacobi kronecker
+                            lcm lucnum lucnum2 mod mpz mpz_export
+                            mpz_import nextprime odd_p perfect_power_p
+                            perfect_square_p popcount powm probab_prime_p
+                            realloc remove root roote rootrem scan0 scan1
+                            setbit sizeinbase sqrtrem tdiv tdiv_2exp
+                            tstbit)],
+               'constants'   => [@EXPORT],
+               'noconstants' => [@EXPORT]);
+Exporter::export_ok_tags('all');
+
+use overload
+    '+'    => \&overload_add,     '+='   => \&overload_addeq,
+    '-'    => \&overload_sub,     '-='   => \&overload_subeq,
+    '*'    => \&overload_mul,     '*='   => \&overload_muleq,
+    '/'    => \&overload_div,     '/='   => \&overload_diveq,
+    '%'    => \&overload_rem,     '%='   => \&overload_remeq,
+    '<<'   => \&overload_lshift,  '<<='  => \&overload_lshifteq,
+    '>>'   => \&overload_rshift,  '>>='  => \&overload_rshifteq,
+    '**'   => \&overload_pow,     '**='  => \&overload_poweq,
+    '&'    => \&overload_and,     '&='   => \&overload_andeq,
+    '|'    => \&overload_ior,     '|='   => \&overload_ioreq,
+    '^'    => \&overload_xor,     '^='   => \&overload_xoreq,
+
+    'bool' => \&overload_bool,
+    'not'  => \&overload_not,
+    '!'    => \&overload_not,
+    '~'    => \&overload_com,
+    '<=>'  => \&overload_spaceship,
+    '++'   => \&overload_inc,
+    '--'   => \&overload_dec,
+    '='    => \&overload_copy,
+    'abs'  => \&overload_abs,
+    'neg'  => \&overload_neg,
+    'sqrt' => \&overload_sqrt,
+    '""'   => \&overload_string;
+
+sub import {
+  foreach (@_) {
+    if ($_ eq ':constants') {
+      overload::constant ('integer' => \&overload_constant,
+                         'binary'  => \&overload_constant,
+                         'float'   => \&overload_constant);
+    } elsif ($_ eq ':noconstants') {
+      overload::remove_constant ('integer' => \&overload_constant,
+                                'binary'  => \&overload_constant,
+                                'float'   => \&overload_constant);
+    }
+  }
+  goto &Exporter::import;
+}
+
+1;
+__END__
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/demos/perl/GMP/Rand.pm b/demos/perl/GMP/Rand.pm

new file mode 100644 (file)

index 0000000..049c729
--- /dev/null
+++ b/demos/perl/GMP/Rand.pm
@@ -0,0 +1,33 @@
+# GMP random numbers module.
+
+# Copyright 2001, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+package GMP::Rand;
+
+require GMP;
+require Exporter;
+@ISA = qw(GMP Exporter);
+@EXPORT = qw();
+%EXPORT_TAGS = ('all' => [qw(
+                            randstate mpf_urandomb mpz_rrandomb
+                            mpz_urandomb mpz_urandomm gmp_urandomb_ui
+                            gmp_urandomm_ui)]);
+Exporter::export_ok_tags('all');
+1;
+__END__
diff --git a/demos/perl/INSTALL b/demos/perl/INSTALL

new file mode 100644 (file)

index 0000000..e503c79
--- /dev/null
+++ b/demos/perl/INSTALL
@@ -0,0 +1,77 @@
+Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                  GMP PERL MODULE INSTALLATION
+
+
+This module can be compiled within the GMP source directory or moved
+elsewhere and compiled.  An installed GMP can be used, or a specified
+GMP build tree.  Both static and shared GMP builds will work.
+
+The simplest case is when GMP has been installed to a standard system
+location
+
+       perl Makefile.PL
+       make
+
+If not yet installed then the top-level GMP build directory must be
+specified
+
+       perl Makefile.PL GMP_BUILDDIR=/my/gmp/build
+       make
+
+In any case, with the module built, the sample program provided can be
+run
+
+       perl -Iblib/arch sample.pl
+
+If you built a shared version of libgmp but haven't yet installed it,
+then it might be necessary to add a run-time path to it.  For example
+
+       LD_LIBRARY_PATH=/my/gmp/build/.libs perl -Iblib/arch sample.pl
+
+Documentation is provided in pod format in GMP.pm, and will have been
+"man"-ified in the module build
+
+       man -l blib/man3/GMP.3pm
+or
+       man -M`pwd`/blib GMP
+
+A test script is provided, running a large number of more or less
+trivial checks
+
+       make test
+
+The module and its documentation can be installed in the usual way
+
+       make install
+
+This will be into /usr/local or wherever the perl Config module
+directs, but that can be controlled back at the Makefile.PL stage with
+the usual ExtUtils::MakeMaker options.
+
+Once installed, programs using the GMP module become simply
+
+       perl sample.pl
+
+And the documentation read directly too
+
+       man GMP
diff --git a/demos/perl/Makefile.PL b/demos/perl/Makefile.PL

new file mode 100644 (file)

index 0000000..c30b503
--- /dev/null
+++ b/demos/perl/Makefile.PL
@@ -0,0 +1,71 @@
+# Makefile for GMP perl module.
+
+# Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Bugs:
+#
+# When the generated Makefile re-runs "perl Makefile.PL" the GMP_BUILDDIR
+# parameter is lost.
+
+
+use ExtUtils::MakeMaker;
+
+
+# Find and remove our parameters
+@ARGV = map {
+  if (/^GMP_BUILDDIR=(.*)/) {
+    $GMP_BUILDDIR=$1; ();
+  } else {
+    $_;
+  }
+} (@ARGV);
+
+$INC = "";
+$LIBS = "-lgmp";
+$OBJECT = "GMP.o";
+
+if (defined $GMP_BUILDDIR) {
+  if (! -f "$GMP_BUILDDIR/libgmp.la") {
+    die "$GMP_BUILDDIR doesn't contain libgmp.la\n" .
+       "if it's really a gmp build directory then go there and run \"make libgmp.la\"\n";
+  }
+  $INC = "-I$GMP_BUILDDIR $INC";
+  $LIBS = "-L$GMP_BUILDDIR/.libs $LIBS";
+}
+
+WriteMakefile(
+             NAME         => 'GMP',
+             VERSION      => '2.00',
+             LIBS         => [$LIBS],
+             OBJECT       => $OBJECT,
+             INC          => $INC,
+             clean        => { FILES => 'test.tmp' },
+             PM => {
+               'GMP.pm'      => '$(INST_LIBDIR)/GMP.pm',
+               'GMP/Mpz.pm'  => '$(INST_LIBDIR)/GMP/Mpz.pm',
+               'GMP/Mpq.pm'  => '$(INST_LIBDIR)/GMP/Mpq.pm',
+               'GMP/Mpf.pm'  => '$(INST_LIBDIR)/GMP/Mpf.pm',
+               'GMP/Rand.pm' => '$(INST_LIBDIR)/GMP/Rand.pm',
+             }
+             );
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/demos/perl/sample.pl b/demos/perl/sample.pl

new file mode 100644 (file)

index 0000000..20ca5a7
--- /dev/null
+++ b/demos/perl/sample.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl -w
+
+# Some sample GMP module operations
+
+# Copyright 2001, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+use strict;
+
+
+use GMP;
+print "using GMP module $GMP::VERSION and GMP library ",GMP::version(),"\n";
+
+
+use GMP::Mpz qw(:all);
+print "the 200th fibonacci number is ", fib(200), "\n";
+print "next prime after 10**30 is (probably) ", nextprime(mpz(10)**30), "\n";
+
+
+use GMP::Mpq qw(:constants);
+print "the 7th harmonic number is ", 1+1/2+1/3+1/4+1/5+1/6+1/7, "\n";
+use GMP::Mpq qw(:noconstants);
+
+
+use GMP::Mpf qw(mpf);
+my $f = mpf(1,180);
+$f >>= 180;
+$f += 1;
+print "a sample mpf is $f\n";
diff --git a/demos/perl/test.pl b/demos/perl/test.pl

new file mode 100644 (file)

index 0000000..40a9033
--- /dev/null
+++ b/demos/perl/test.pl
@@ -0,0 +1,2168 @@
+#!/usr/bin/perl -w
+
+# GMP perl module tests
+
+# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# These tests aim to exercise the many possible combinations of operands
+# etc, and to run all functions at least once, which if nothing else will
+# check everything intended is in the :all list.
+#
+# Use the following in .emacs to match test failure messages.
+#
+# ;; perl "Test" module error messages
+# (eval-after-load "compile"
+#   '(add-to-list
+#     'compilation-error-regexp-alist
+#     '("^.*Failed test [0-9]+ in \\([^ ]+\\) at line \\([0-9]+\\)" 1 2)))
+
+
+use strict;
+use Test;
+
+BEGIN {
+  plan tests => 123,
+  onfail => sub { print "there were failures\n" },
+}
+
+use GMP qw(:all);
+use GMP::Mpz qw(:all);
+use GMP::Mpq qw(:all);
+use GMP::Mpf qw(:all);
+use GMP::Rand qw(:all);
+
+use GMP::Mpz qw(:constants);
+use GMP::Mpz qw(:noconstants);
+use GMP::Mpq qw(:constants);
+use GMP::Mpq qw(:noconstants);
+use GMP::Mpf qw(:constants);
+use GMP::Mpf qw(:noconstants);
+
+package Mytie;
+use Exporter;
+use vars  qw($val $fetched $stored);
+$val = 0;
+$fetched = 0;
+$stored = 0;
+sub TIESCALAR {
+  my ($class, $newval) = @_;
+  my $var = 'mytie dummy refed var';
+  $val = $newval;
+  $fetched = 0;
+  $stored = 0;
+  return bless \$var, $class;
+}
+sub FETCH {
+  my ($self) = @_;
+  $fetched++;
+  return $val;
+}
+sub STORE {
+  my ($self, $newval) = @_;
+  $val = $newval;
+  $stored++;
+}
+package main;
+
+# check Mytie does what it should
+{ tie my $t, 'Mytie', 123;
+  ok ($Mytie::val == 123);
+  $Mytie::val = 456;
+  ok ($t == 456);
+  $t = 789;
+  ok ($Mytie::val == 789);
+}
+
+
+# Usage: str(x)
+# Return x forced to a string, not a PVIV.
+#
+sub str {
+  my $s = "$_[0]" . "";
+  return $s;
+}
+
+my $ivnv_2p128 = 65536.0 * 65536.0 * 65536.0 * 65536.0
+               * 65536.0 * 65536.0 * 65536.0 * 65536.0;
+kill (0, $ivnv_2p128);
+my $str_2p128 = '340282366920938463463374607431768211456';
+
+my $uv_max = ~ 0;
+my $uv_max_str = ~ 0;
+$uv_max_str = "$uv_max_str";
+$uv_max_str = "" . "$uv_max_str";
+
+
+#------------------------------------------------------------------------------
+# GMP::version
+
+use GMP qw(version);
+print '$GMP::VERSION ',$GMP::VERSION,' GMP::version() ',version(),"\n";
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::new
+
+ok (mpz(0) == 0);
+ok (mpz('0') == 0);
+ok (mpz(substr('101',1,1)) == 0);
+ok (mpz(0.0) == 0);
+ok (mpz(mpz(0)) == 0);
+ok (mpz(mpq(0)) == 0);
+ok (mpz(mpf(0)) == 0);
+
+{ tie my $t, 'Mytie', 0;
+  ok (mpz($t) == 0);
+  ok ($Mytie::fetched > 0);
+}
+{ tie my $t, 'Mytie', '0';
+  ok (mpz($t) == 0);
+  ok ($Mytie::fetched > 0);
+}
+{ tie my $t, 'Mytie', substr('101',1,1); ok (mpz($t) == 0); }
+{ tie my $t, 'Mytie', 0.0; ok (mpz($t) == 0); }
+{ tie my $t, 'Mytie', mpz(0); ok (mpz($t) == 0); }
+{ tie my $t, 'Mytie', mpq(0); ok (mpz($t) == 0); }
+{ tie my $t, 'Mytie', mpf(0); ok (mpz($t) == 0); }
+
+ok (mpz(-123) == -123);
+ok (mpz('-123') == -123);
+ok (mpz(substr('1-1231',1,4)) == -123);
+ok (mpz(-123.0) == -123);
+ok (mpz(mpz(-123)) == -123);
+ok (mpz(mpq(-123)) == -123);
+ok (mpz(mpf(-123)) == -123);
+
+{ tie my $t, 'Mytie', -123; ok (mpz($t) == -123); }
+{ tie my $t, 'Mytie', '-123'; ok (mpz($t) == -123); }
+{ tie my $t, 'Mytie', substr('1-1231',1,4); ok (mpz($t) == -123); }
+{ tie my $t, 'Mytie', -123.0; ok (mpz($t) == -123); }
+{ tie my $t, 'Mytie', mpz(-123); ok (mpz($t) == -123); }
+{ tie my $t, 'Mytie', mpq(-123); ok (mpz($t) == -123); }
+{ tie my $t, 'Mytie', mpf(-123); ok (mpz($t) == -123); }
+
+ok (mpz($ivnv_2p128) == $str_2p128);
+{ tie my $t, 'Mytie', $ivnv_2p128; ok (mpz($t) == $str_2p128); }
+
+ok (mpz($uv_max) > 0);
+ok (mpz($uv_max) == mpz($uv_max_str));
+{ tie my $t, 'Mytie', $uv_max; ok (mpz($t) > 0); }
+{ tie my $t, 'Mytie', $uv_max; ok (mpz($t) == mpz($uv_max_str)); }
+
+{ my $s = '999999999999999999999999999999';
+  kill (0, $s);
+  ok (mpz($s) == '999999999999999999999999999999');
+  tie my $t, 'Mytie', $s;
+  ok (mpz($t) == '999999999999999999999999999999');
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_abs
+
+ok (abs(mpz(0)) == 0);
+ok (abs(mpz(123)) == 123);
+ok (abs(mpz(-123)) == 123);
+
+{ my $x = mpz(-123); $x = abs($x); ok ($x == 123); }
+{ my $x = mpz(0);    $x = abs($x); ok ($x == 0);   }
+{ my $x = mpz(123);  $x = abs($x); ok ($x == 123); }
+
+{ tie my $t, 'Mytie', mpz(0); ok (abs($t) == 0); }
+{ tie my $t, 'Mytie', mpz(123); ok (abs($t) == 123); }
+{ tie my $t, 'Mytie', mpz(-123); ok (abs($t) == 123); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_add
+
+ok (mpz(0) + 1 == 1);
+ok (mpz(-1) + 1 == 0);
+ok (1 + mpz(0) == 1);
+ok (1 + mpz(-1) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_addeq
+
+{ my $a = mpz(7); $a += 1; ok ($a == 8); }
+{ my $a = mpz(7); my $b = $a; $a += 1; ok ($a == 8); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_and
+
+ok ((mpz(3) & 1) == 1);
+ok ((mpz(3) & 4) == 0);
+
+{ my $a = mpz(3); $a &= 1; ok ($a == 1); }
+{ my $a = mpz(3); $a &= 4; ok ($a == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_bool
+
+if (mpz(0))   { ok (0); } else { ok (1); }
+if (mpz(123)) { ok (1); } else { ok (0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_com
+
+ok (~ mpz(0) == -1);
+ok (~ mpz(1) == -2);
+ok (~ mpz(-2) == 1);
+ok (~ mpz(0xFF) == -0x100);
+ok (~ mpz(-0x100) == 0xFF);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_dec
+
+{ my $a = mpz(0); ok ($a-- == 0); ok ($a == -1); }
+{ my $a = mpz(0); ok (--$a == -1); }
+
+{ my $a = mpz(0); my $b = $a; $a--; ok ($a == -1); ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_div
+
+ok (mpz(6) / 2 == 3);
+ok (mpz(-6) / 2 == -3);
+ok (mpz(6) / -2 == -3);
+ok (mpz(-6) / -2 == 3);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_diveq
+
+{ my $a = mpz(21); $a /= 3; ok ($a == 7); }
+{ my $a = mpz(21); my $b = $a; $a /= 3; ok ($a == 7); ok ($b == 21); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_eq
+
+{ my $a = mpz(0);
+  my $b = $a;
+  $a = mpz(1);
+  ok ($a == 1);
+  ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_inc
+
+{ my $a = mpz(0); ok ($a++ == 0); ok ($a == 1); }
+{ my $a = mpz(0); ok (++$a == 1); }
+
+{ my $a = mpz(0); my $b = $a; $a++; ok ($a == 1); ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_ior
+
+ok ((mpz(3) | 1) == 3);
+ok ((mpz(3) | 4) == 7);
+
+{ my $a = mpz(3); $a |= 1; ok ($a == 3); }
+{ my $a = mpz(3); $a |= 4; ok ($a == 7); }
+
+ok ((mpz("0xAA") | mpz("0x55")) == mpz("0xFF"));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_lshift
+
+{ my $a = mpz(7) << 1; ok ($a == 14); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_lshifteq
+
+{ my $a = mpz(7); $a <<= 1; ok ($a == 14); }
+{ my $a = mpz(7); my $b = $a; $a <<= 1; ok ($a == 14); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_mul
+
+ok (mpz(2) * 3 == 6);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_muleq
+
+{ my $a = mpz(7); $a *= 3;  ok ($a == 21); }
+{ my $a = mpz(7); my $b = $a; $a *= 3;  ok ($a == 21); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_neg
+
+ok (- mpz(0) == 0);
+ok (- mpz(123) == -123);
+ok (- mpz(-123) == 123);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_not
+
+if (not mpz(0))   { ok (1); } else { ok (0); }
+if (not mpz(123)) { ok (0); } else { ok (1); }
+
+ok ((! mpz(0)) == 1);
+ok ((! mpz(123)) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_pow
+
+ok (mpz(0) ** 1 == 0);
+ok (mpz(1) ** 1 == 1);
+ok (mpz(2) ** 0 == 1);
+ok (mpz(2) ** 1 == 2);
+ok (mpz(2) ** 2 == 4);
+ok (mpz(2) ** 3 == 8);
+ok (mpz(2) ** 4 == 16);
+
+ok (mpz(0) ** mpz(1) == 0);
+ok (mpz(1) ** mpz(1) == 1);
+ok (mpz(2) ** mpz(0) == 1);
+ok (mpz(2) ** mpz(1) == 2);
+ok (mpz(2) ** mpz(2) == 4);
+ok (mpz(2) ** mpz(3) == 8);
+ok (mpz(2) ** mpz(4) == 16);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_poweq
+
+{ my $a = mpz(3); $a **= 4; ok ($a == 81); }
+{ my $a = mpz(3); my $b = $a; $a **= 4; ok ($a == 81); ok ($b == 3); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_rem
+
+ok (mpz(-8) % 3 == -2);
+ok (mpz(-7) % 3 == -1);
+ok (mpz(-6) % 3 == 0);
+ok (mpz(6) % 3 == 0);
+ok (mpz(7) % 3 == 1);
+ok (mpz(8) % 3 == 2);
+
+{ my $a = mpz(24); $a %= 7; ok ($a == 3); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_rshift
+
+{ my $a = mpz(32) >> 1; ok ($a == 16); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_rshifteq
+
+{ my $a = mpz(32); $a >>= 1; ok ($a == 16); }
+{ my $a = mpz(32); my $b = $a; $a >>= 1; ok ($a == 16); ok ($b == 32); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_spaceship
+
+ok (mpz(0) < 1);
+ok (mpz(0) > -1);
+
+ok (mpz(0) != 1);
+ok (mpz(0) != -1);
+ok (mpz(1) != 0);
+ok (mpz(1) != -1);
+ok (mpz(-1) != 0);
+ok (mpz(-1) != 1);
+
+ok (mpz(0) < 1.0);
+ok (mpz(0) < '1');
+ok (mpz(0) < substr('-1',1,1));
+ok (mpz(0) < mpz(1));
+ok (mpz(0) < mpq(1));
+ok (mpz(0) < mpf(1));
+ok (mpz(0) < $uv_max);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_sqrt
+
+ok (sqrt(mpz(0)) == 0);
+ok (sqrt(mpz(1)) == 1);
+ok (sqrt(mpz(4)) == 2);
+ok (sqrt(mpz(81)) == 9);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_string
+
+{ my $x = mpz(0);    ok("$x" eq "0"); }
+{ my $x = mpz(123);  ok("$x" eq "123"); }
+{ my $x = mpz(-123); ok("$x" eq "-123"); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_sub
+
+ok (mpz(0) - 1 == -1);
+ok (mpz(1) - 1 == 0);
+ok (1 - mpz(0) == 1);
+ok (1 - mpz(1) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_subeq
+
+{ my $a = mpz(7); $a -= 1; ok ($a == 6); }
+{ my $a = mpz(7); my $b = $a; $a -= 1; ok ($a == 6); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::overload_xor
+
+ok ((mpz(3) ^ 1) == 2);
+ok ((mpz(3) ^ 4) == 7);
+
+{ my $a = mpz(3); $a ^= 1; ok ($a == 2); }
+{ my $a = mpz(3); $a ^= 4; ok ($a == 7); }
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::bin
+
+ok (bin(2,0) == 1);
+ok (bin(2,1) == 2);
+ok (bin(2,2) == 1);
+
+ok (bin(3,0) == 1);
+ok (bin(3,1) == 3);
+ok (bin(3,2) == 3);
+ok (bin(3,3) == 1);
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::cdiv
+
+{ my ($q, $r);
+  ($q, $r) = cdiv (16, 3);
+  ok ($q == 6);
+  ok ($r == -2);
+  ($q, $r) = cdiv (16, -3);
+  ok ($q == -5);
+  ok ($r == 1);
+  ($q, $r) = cdiv (-16, 3);
+  ok ($q == -5);
+  ok ($r == -1);
+  ($q, $r) = cdiv (-16, -3);
+  ok ($q == 6);
+  ok ($r == 2);
+}
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::cdiv_2exp
+
+{ my ($q, $r);
+  ($q, $r) = cdiv_2exp (23, 2);
+  ok ($q == 6);
+  ok ($r == -1);
+  ($q, $r) = cdiv_2exp (-23, 2);
+  ok ($q == -5);
+  ok ($r == -3);
+}
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::clrbit
+
+{ my $a = mpz(3); clrbit ($a, 1); ok ($a == 1);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+{ my $a = mpz(3); clrbit ($a, 2); ok ($a == 3);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+
+{ my $a = 3; clrbit ($a, 1); ok ($a == 1);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+{ my $a = 3; clrbit ($a, 2); ok ($a == 3);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+
+# mutate only given variable
+{ my $a = mpz(3);
+  my $b = $a;
+  clrbit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+}
+{ my $a = 3;
+  my $b = $a;
+  clrbit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+}
+
+{ tie my $a, 'Mytie', mpz(3);
+  clrbit ($a, 1);
+  ok ($Mytie::fetched > 0);    # used fetch
+  ok ($Mytie::stored > 0);     # used store
+  ok ($a == 1);                # expected result
+  ok (UNIVERSAL::isa($a,"GMP::Mpz"));
+  ok (tied($a));               # still tied
+}
+{ tie my $a, 'Mytie', 3;
+  clrbit ($a, 1);
+  ok ($Mytie::fetched > 0);    # used fetch
+  ok ($Mytie::stored > 0);     # used store
+  ok ($a == 1);                # expected result
+  ok (UNIVERSAL::isa($a,"GMP::Mpz"));
+  ok (tied($a));               # still tied
+}
+
+{ my $b = mpz(3);
+  tie my $a, 'Mytie', $b;
+  clrbit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+  ok (tied($a));
+}
+{ my $b = 3;
+  tie my $a, 'Mytie', $b;
+  clrbit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+  ok (tied($a));
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::combit
+
+{ my $a = mpz(3); combit ($a, 1); ok ($a == 1);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+{ my $a = mpz(3); combit ($a, 2); ok ($a == 7);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+
+{ my $a = 3; combit ($a, 1); ok ($a == 1);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+{ my $a = 3; combit ($a, 2); ok ($a == 7);
+  ok (UNIVERSAL::isa($a,"GMP::Mpz")); }
+
+# mutate only given variable
+{ my $a = mpz(3);
+  my $b = $a;
+  combit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+}
+{ my $a = 3;
+  my $b = $a;
+  combit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+}
+
+{ tie my $a, 'Mytie', mpz(3);
+  combit ($a, 2);
+  ok ($Mytie::fetched > 0);    # used fetch
+  ok ($Mytie::stored > 0);     # used store
+  ok ($a == 7);                # expected result
+  ok (UNIVERSAL::isa($a,"GMP::Mpz"));
+  ok (tied($a));               # still tied
+}
+{ tie my $a, 'Mytie', 3;
+  combit ($a, 2);
+  ok ($Mytie::fetched > 0);    # used fetch
+  ok ($Mytie::stored > 0);     # used store
+  ok ($a == 7);                # expected result
+  ok (UNIVERSAL::isa($a,"GMP::Mpz"));
+  ok (tied($a));               # still tied
+}
+
+{ my $b = mpz(3);
+  tie my $a, 'Mytie', $b;
+  combit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+  ok (tied($a));
+}
+{ my $b = 3;
+  tie my $a, 'Mytie', $b;
+  combit ($a, 0);
+  ok ($a == 2);
+  ok ($b == 3);
+  ok (tied($a));
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::congruent_p
+
+ok (  congruent_p (21, 0, 7));
+ok (! congruent_p (21, 1, 7));
+ok (  congruent_p (21, 5, 8));
+ok (! congruent_p (21, 6, 8));
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::congruent_2exp_p
+
+ok (  congruent_2exp_p (20, 0, 2));
+ok (! congruent_2exp_p (21, 0, 2));
+ok (! congruent_2exp_p (20, 1, 2));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::divexact
+
+ok (divexact(27,3) == 9);
+ok (divexact(27,-3) == -9);
+ok (divexact(-27,3) == -9);
+ok (divexact(-27,-3) == 9);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::divisible_p
+
+ok (  divisible_p (21, 7));
+ok (! divisible_p (21, 8));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::divisible_2exp_p
+
+ok (  divisible_2exp_p (20, 2));
+ok (! divisible_2exp_p (21, 2));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::even_p
+
+ok (! even_p(mpz(-3)));
+ok (  even_p(mpz(-2)));
+ok (! even_p(mpz(-1)));
+ok (  even_p(mpz(0)));
+ok (! even_p(mpz(1)));
+ok (  even_p(mpz(2)));
+ok (! even_p(mpz(3)));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::export
+
+{ my $s = mpz_export (1, 2, 1, 0, "0x61626364");
+  ok ($s eq 'abcd'); }
+{ my $s = mpz_export (-1, 2, 1, 0, "0x61626364");
+  ok ($s eq 'cdab'); }
+{ my $s = mpz_export (1, 2, -1, 0, "0x61626364");
+  ok ($s eq 'badc'); }
+{ my $s = mpz_export (-1, 2, -1, 0, "0x61626364");
+  ok ($s eq 'dcba'); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::fac
+
+ok (fac(0) == 1);
+ok (fac(1) == 1);
+ok (fac(2) == 2);
+ok (fac(3) == 6);
+ok (fac(4) == 24);
+ok (fac(5) == 120);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::fdiv
+
+{ my ($q, $r);
+  ($q, $r) = fdiv (16, 3);
+  ok ($q == 5);
+  ok ($r == 1);
+  ($q, $r) = fdiv (16, -3);
+  ok ($q == -6);
+  ok ($r == -2);
+  ($q, $r) = fdiv (-16, 3);
+  ok ($q == -6);
+  ok ($r == 2);
+  ($q, $r) = fdiv (-16, -3);
+  ok ($q == 5);
+  ok ($r == -1);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::fdiv_2exp
+
+{ my ($q, $r);
+  ($q, $r) = fdiv_2exp (23, 2);
+  ok ($q == 5);
+  ok ($r == 3);
+  ($q, $r) = fdiv_2exp (-23, 2);
+  ok ($q == -6);
+  ok ($r == 1);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::fib
+
+ok (fib(0) == 0);
+ok (fib(1) == 1);
+ok (fib(2) == 1);
+ok (fib(3) == 2);
+ok (fib(4) == 3);
+ok (fib(5) == 5);
+ok (fib(6) == 8);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::fib2
+
+{ my ($a, $b) = fib2(0); ok($a==0); ok($b==1); }
+{ my ($a, $b) = fib2(1); ok($a==1); ok($b==0); }
+{ my ($a, $b) = fib2(2); ok($a==1); ok($b==1); }
+{ my ($a, $b) = fib2(3); ok($a==2); ok($b==1); }
+{ my ($a, $b) = fib2(4); ok($a==3); ok($b==2); }
+{ my ($a, $b) = fib2(5); ok($a==5); ok($b==3); }
+{ my ($a, $b) = fib2(6); ok($a==8); ok($b==5); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::gcd
+
+ok (gcd (21) == 21);
+ok (gcd (21,15) == 3);
+ok (gcd (21,15,30,57) == 3);
+ok (gcd (21,-15) == 3);
+ok (gcd (-21,15) == 3);
+ok (gcd (-21,-15) == 3);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::gcdext
+
+{
+  my ($g, $x, $y) = gcdext (3,5);
+  ok ($g == 1);
+  ok ($x == 2);
+  ok ($y == -1);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::hamdist
+
+ok (hamdist(5,7) == 1);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::import
+
+{ my $z = mpz_import (1, 2, 1, 0, 'abcd');
+  ok ($z == 0x61626364); }
+{ my $z = mpz_import (-1, 2, 1, 0, 'abcd');
+  ok ($z == 0x63646162); }
+{ my $z = mpz_import (1, 2, -1, 0, 'abcd');
+  ok ($z == 0x62616463); }
+{ my $z = mpz_import (-1, 2, -1, 0, 'abcd');
+  ok ($z == 0x64636261); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::invert
+
+ok (invert(1,123) == 1);
+ok (invert(6,7) == 6);
+ok (! defined invert(2,8));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::jacobi, GMP::Mpz::kronecker
+
+foreach my $i ([  1, 19,  1 ],
+              [  4, 19,  1 ],
+              [  5, 19,  1 ],
+              [  6, 19,  1 ],
+              [  7, 19,  1 ],
+              [  9, 19,  1 ],
+              [ 11, 19,  1 ],
+              [ 16, 19,  1 ],
+              [ 17, 19,  1 ],
+              [  2, 19, -1 ],
+              [  3, 19, -1 ],
+              [  8, 19, -1 ],
+              [ 10, 19, -1 ],
+              [ 12, 19, -1 ],
+              [ 13, 19, -1 ],
+              [ 14, 19, -1 ],
+              [ 15, 19, -1 ],
+              [ 18, 19, -1 ]) {
+  foreach my $fun (\&jacobi, \&kronecker) {
+    ok (&$fun ($$i[0], $$i[1]) == $$i[2]);
+
+    ok (&$fun ($$i[0],      str($$i[1])) == $$i[2]);
+    ok (&$fun (str($$i[0]),     $$i[1])  == $$i[2]);
+    ok (&$fun (str($$i[0]), str($$i[1])) == $$i[2]);
+
+    ok (&$fun ($$i[0],      mpz($$i[1])) == $$i[2]);
+    ok (&$fun (mpz($$i[0]), $$i[1]) == $$i[2]);
+    ok (&$fun (mpz($$i[0]), mpz($$i[1])) == $$i[2]);
+  }
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::lcm
+
+ok (lcm (2) == 2);
+ok (lcm (0) == 0);
+ok (lcm (0,0) == 0);
+ok (lcm (0,0,0) == 0);
+ok (lcm (0,0,0,0) == 0);
+ok (lcm (2,0) == 0);
+ok (lcm (-2,0) == 0);
+ok (lcm (2,3) == 6);
+ok (lcm (2,3,4) == 12);
+ok (lcm (2,-3) == 6);
+ok (lcm (-2,3) == 6);
+ok (lcm (-2,-3) == 6);
+ok (lcm (mpz(2)**512,1) == mpz(2)**512);
+ok (lcm (mpz(2)**512,-1) == mpz(2)**512);
+ok (lcm (-mpz(2)**512,1) == mpz(2)**512);
+ok (lcm (-mpz(2)**512,-1) == mpz(2)**512);
+ok (lcm (mpz(2)**512,mpz(2)**512) == mpz(2)**512);
+ok (lcm (mpz(2)**512,-mpz(2)**512) == mpz(2)**512);
+ok (lcm (-mpz(2)**512,mpz(2)**512) == mpz(2)**512);
+ok (lcm (-mpz(2)**512,-mpz(2)**512) == mpz(2)**512);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::lucnum
+
+ok (lucnum(0) == 2);
+ok (lucnum(1) == 1);
+ok (lucnum(2) == 3);
+ok (lucnum(3) == 4);
+ok (lucnum(4) == 7);
+ok (lucnum(5) == 11);
+ok (lucnum(6) == 18);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::lucnum2
+
+{ my ($a, $b) = lucnum2(0); ok($a==2);  ok($b==-1); }
+{ my ($a, $b) = lucnum2(1); ok($a==1);  ok($b==2); }
+{ my ($a, $b) = lucnum2(2); ok($a==3);  ok($b==1); }
+{ my ($a, $b) = lucnum2(3); ok($a==4);  ok($b==3); }
+{ my ($a, $b) = lucnum2(4); ok($a==7);  ok($b==4); }
+{ my ($a, $b) = lucnum2(5); ok($a==11); ok($b==7); }
+{ my ($a, $b) = lucnum2(6); ok($a==18); ok($b==11); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::nextprime
+
+ok (nextprime(2) == 3);
+ok (nextprime(3) == 5);
+ok (nextprime(5) == 7);
+ok (nextprime(7) == 11);
+ok (nextprime(11) == 13);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::perfect_power_p
+
+# ok (  perfect_power_p(mpz(-27)));
+# ok (! perfect_power_p(mpz(-9)));
+# ok (! perfect_power_p(mpz(-1)));
+ok (  perfect_power_p(mpz(0)));
+ok (  perfect_power_p(mpz(1)));
+ok (! perfect_power_p(mpz(2)));
+ok (! perfect_power_p(mpz(3)));
+ok (  perfect_power_p(mpz(4)));
+ok (  perfect_power_p(mpz(9)));
+ok (  perfect_power_p(mpz(27)));
+ok (  perfect_power_p(mpz(81)));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::perfect_square_p
+
+ok (! perfect_square_p(mpz(-9)));
+ok (! perfect_square_p(mpz(-1)));
+ok (  perfect_square_p(mpz(0)));
+ok (  perfect_square_p(mpz(1)));
+ok (! perfect_square_p(mpz(2)));
+ok (! perfect_square_p(mpz(3)));
+ok (  perfect_square_p(mpz(4)));
+ok (  perfect_square_p(mpz(9)));
+ok (! perfect_square_p(mpz(27)));
+ok (  perfect_square_p(mpz(81)));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::popcount
+
+ok (popcount(7) == 3);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::powm
+
+ok (powm (3,2,8) == 1);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::probab_prime_p
+
+ok (  probab_prime_p(89,1));
+ok (! probab_prime_p(81,1));
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::realloc
+
+{ my $z = mpz(123);
+  realloc ($z, 512); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::remove
+
+{
+  my ($rem, $mult);
+  ($rem, $mult) = remove(12,3);
+  ok ($rem == 4);
+  ok ($mult == 1);
+  ($rem, $mult) = remove(12,2);
+  ok ($rem == 3);
+  ok ($mult == 2);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::root
+
+ok (root(0,2) == 0);
+ok (root(8,3) == 2);
+ok (root(-8,3) == -2);
+ok (root(81,4) == 3);
+ok (root(243,5) == 3);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::roote
+
+{ my ($r,$e);
+  ($r, $e) = roote(0,2);
+  ok ($r == 0);
+  ok ($e);
+  ($r, $e) = roote(81,4);
+  ok ($r == 3);
+  ok ($e);
+  ($r, $e) = roote(85,4);
+  ok ($r == 3);
+  ok (! $e);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::rootrem
+
+{ my ($root, $rem) = rootrem (mpz(0), 1);
+  ok ($root == 0); ok ($rem == 0); }
+{ my ($root, $rem) = rootrem (mpz(0), 2);
+  ok ($root == 0); ok ($rem == 0); }
+{ my ($root, $rem) = rootrem (mpz(64), 2);
+  ok ($root == 8); ok ($rem == 0); }
+{ my ($root, $rem) = rootrem (mpz(64), 3);
+  ok ($root == 4); ok ($rem == 0); }
+{ my ($root, $rem) = rootrem (mpz(65), 3);
+  ok ($root == 4); ok ($rem == 1); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::scan0
+
+ok (scan0 (0, 0) == 0);
+ok (scan0 (1, 0) == 1);
+ok (scan0 (3, 0) == 2);
+ok (scan0 (-1, 0) == ~0);
+ok (scan0 (-2, 1) == ~0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::scan1
+
+ok (scan1 (1, 0) == 0);
+ok (scan1 (2, 0) == 1);
+ok (scan1 (4, 0) == 2);
+ok (scan1 (0, 0) == ~0);
+ok (scan1 (3, 2) == ~0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::setbit
+
+{ my $a = mpz(3); setbit ($a, 1); ok ($a == 3); }
+{ my $a = mpz(3); setbit ($a, 2); ok ($a == 7); }
+
+{ my $a = 3; setbit ($a, 1); ok ($a == 3); }
+{ my $a = 3; setbit ($a, 2); ok ($a == 7); }
+
+# mutate only given variable
+{ my $a = mpz(0);
+  my $b = $a;
+  setbit ($a, 0);
+  ok ($a == 1);
+  ok ($b == 0);
+}
+{ my $a = 0;
+  my $b = $a;
+  setbit ($a, 0);
+  ok ($a == 1);
+  ok ($b == 0);
+}
+
+{ tie my $a, 'Mytie', mpz(3);
+  setbit ($a, 2);
+  ok ($Mytie::fetched > 0);    # used fetch
+  ok ($Mytie::stored > 0);     # used store
+  ok ($a == 7);                # expected result
+  ok (UNIVERSAL::isa($a,"GMP::Mpz"));
+  ok (tied($a));               # still tied
+}
+{ tie my $a, 'Mytie', 3;
+  setbit ($a, 2);
+  ok ($Mytie::fetched > 0);    # used fetch
+  ok ($Mytie::stored > 0);     # used store
+  ok ($a == 7);                # expected result
+  ok (UNIVERSAL::isa($a,"GMP::Mpz"));
+  ok (tied($a));               # still tied
+}
+
+{ my $b = mpz(2);
+  tie my $a, 'Mytie', $b;
+  setbit ($a, 0);
+  ok ($a == 3);
+  ok ($b == 2);
+  ok (tied($a));
+}
+{ my $b = 2;
+  tie my $a, 'Mytie', $b;
+  setbit ($a, 0);
+  ok ($a == 3);
+  ok ($b == 2);
+  ok (tied($a));
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::sizeinbase
+
+ok (sizeinbase(1,10) == 1);
+ok (sizeinbase(100,10) == 3);
+ok (sizeinbase(9999,10) == 5);
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::sqrtrem
+
+{
+  my ($root, $rem) = sqrtrem(mpz(0));
+  ok ($root == 0);
+  ok ($rem == 0);
+}
+{
+  my ($root, $rem) = sqrtrem(mpz(1));
+  ok ($root == 1);
+  ok ($rem == 0);
+}
+{
+  my ($root, $rem) = sqrtrem(mpz(2));
+  ok ($root == 1);
+  ok ($rem == 1);
+}
+{
+  my ($root, $rem) = sqrtrem(mpz(9));
+  ok ($root == 3);
+  ok ($rem == 0);
+}
+{
+  my ($root, $rem) = sqrtrem(mpz(35));
+  ok ($root == 5);
+  ok ($rem == 10);
+}
+{
+  my ($root, $rem) = sqrtrem(mpz(0));
+  ok ($root == 0);
+  ok ($rem == 0);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::tdiv
+
+{ my ($q, $r);
+  ($q, $r) = tdiv (16, 3);
+  ok ($q == 5);
+  ok ($r == 1);
+  ($q, $r) = tdiv (16, -3);
+  ok ($q == -5);
+  ok ($r == 1);
+  ($q, $r) = tdiv (-16, 3);
+  ok ($q == -5);
+  ok ($r == -1);
+  ($q, $r) = tdiv (-16, -3);
+  ok ($q == 5);
+  ok ($r == -1);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::tdiv_2exp
+
+{ my ($q, $r);
+  ($q, $r) = tdiv_2exp (23, 2);
+  ok ($q == 5);
+  ok ($r == 3);
+  ($q, $r) = tdiv_2exp (-23, 2);
+  ok ($q == -5);
+  ok ($r == -3);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpz::tstbit
+
+ok (tstbit (6, 0) == 0);
+ok (tstbit (6, 1) == 1);
+ok (tstbit (6, 2) == 1);
+ok (tstbit (6, 3) == 0);
+
+
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpq
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::new
+
+ok (mpq(0) == 0);
+ok (mpq('0') == 0);
+ok (mpq(substr('101',1,1)) == 0);
+ok (mpq(0.0) == 0);
+ok (mpq(mpz(0)) == 0);
+ok (mpq(mpq(0)) == 0);
+ok (mpq(mpf(0)) == 0);
+
+{ tie my $t, 'Mytie', 0; ok (mpq($t) == 0); }
+{ tie my $t, 'Mytie', '0'; ok (mpq($t) == 0); }
+{ tie my $t, 'Mytie', substr('101',1,1); ok (mpq($t) == 0); }
+{ tie my $t, 'Mytie', 0.0; ok (mpq($t) == 0); }
+{ tie my $t, 'Mytie', mpz(0); ok (mpq($t) == 0); }
+{ tie my $t, 'Mytie', mpq(0); ok (mpq($t) == 0); }
+{ tie my $t, 'Mytie', mpf(0); ok (mpq($t) == 0); }
+
+ok (mpq(-123) == -123);
+ok (mpq('-123') == -123);
+ok (mpq(substr('1-1231',1,4)) == -123);
+ok (mpq(-123.0) == -123);
+ok (mpq(mpz(-123)) == -123);
+ok (mpq(mpq(-123)) == -123);
+ok (mpq(mpf(-123)) == -123);
+
+{ tie my $t, 'Mytie', -123; ok (mpq($t) == -123); }
+{ tie my $t, 'Mytie', '-123'; ok (mpq($t) == -123); }
+{ tie my $t, 'Mytie', substr('1-1231',1,4); ok (mpq($t) == -123); }
+{ tie my $t, 'Mytie', -123.0; ok (mpq($t) == -123); }
+{ tie my $t, 'Mytie', mpz(-123); ok (mpq($t) == -123); }
+{ tie my $t, 'Mytie', mpq(-123); ok (mpq($t) == -123); }
+{ tie my $t, 'Mytie', mpf(-123); ok (mpq($t) == -123); }
+
+ok (mpq($ivnv_2p128) == $str_2p128);
+{ tie my $t, 'Mytie', $ivnv_2p128; ok (mpq($t) == $str_2p128); }
+
+ok (mpq('3/2') == mpq(3,2));
+ok (mpq('3/1') == mpq(3,1));
+ok (mpq('-3/2') == mpq(-3,2));
+ok (mpq('-3/1') == mpq(-3,1));
+ok (mpq('0x3') == mpq(3,1));
+ok (mpq('0b111') == mpq(7,1));
+ok (mpq('0b0') == mpq(0,1));
+
+ok (mpq($uv_max) > 0);
+ok (mpq($uv_max) == mpq($uv_max_str));
+{ tie my $t, 'Mytie', $uv_max; ok (mpq($t) > 0); }
+{ tie my $t, 'Mytie', $uv_max; ok (mpq($t) == mpq($uv_max_str)); }
+
+{ my $x = 123.5;
+  kill (0, $x);
+  ok (mpq($x) == 123.5);
+  tie my $t, 'Mytie', $x;
+  ok (mpq($t) == 123.5);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_abs
+
+ok (abs(mpq(0)) == 0);
+ok (abs(mpq(123)) == 123);
+ok (abs(mpq(-123)) == 123);
+
+{ my $x = mpq(-123); $x = abs($x); ok ($x == 123); }
+{ my $x = mpq(0);    $x = abs($x); ok ($x == 0);   }
+{ my $x = mpq(123);  $x = abs($x); ok ($x == 123); }
+
+{ tie my $t, 'Mytie', mpq(0); ok (abs($t) == 0); }
+{ tie my $t, 'Mytie', mpq(123); ok (abs($t) == 123); }
+{ tie my $t, 'Mytie', mpq(-123); ok (abs($t) == 123); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_add
+
+ok (mpq(0) + 1 == 1);
+ok (mpq(-1) + 1 == 0);
+ok (1 + mpq(0) == 1);
+ok (1 + mpq(-1) == 0);
+
+ok (mpq(1,2)+mpq(1,3) == mpq(5,6));
+ok (mpq(1,2)+mpq(-1,3) == mpq(1,6));
+ok (mpq(-1,2)+mpq(1,3) == mpq(-1,6));
+ok (mpq(-1,2)+mpq(-1,3) == mpq(-5,6));
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_addeq
+
+{ my $a = mpq(7); $a += 1; ok ($a == 8); }
+{ my $a = mpq(7); my $b = $a; $a += 1; ok ($a == 8); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_bool
+
+if (mpq(0))   { ok (0); } else { ok (1); }
+if (mpq(123)) { ok (1); } else { ok (0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_dec
+
+{ my $a = mpq(0); ok ($a-- == 0); ok ($a == -1); }
+{ my $a = mpq(0); ok (--$a == -1); }
+
+{ my $a = mpq(0); my $b = $a; $a--; ok ($a == -1); ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_div
+
+ok (mpq(6) / 2 == 3);
+ok (mpq(-6) / 2 == -3);
+ok (mpq(6) / -2 == -3);
+ok (mpq(-6) / -2 == 3);
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_diveq
+
+{ my $a = mpq(21); $a /= 3; ok ($a == 7); }
+{ my $a = mpq(21); my $b = $a; $a /= 3; ok ($a == 7); ok ($b == 21); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_eq
+
+{ my $a = mpq(0);
+  my $b = $a;
+  $a = mpq(1);
+  ok ($a == 1);
+  ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_inc
+
+{ my $a = mpq(0); ok ($a++ == 0); ok ($a == 1); }
+{ my $a = mpq(0); ok (++$a == 1); }
+
+{ my $a = mpq(0); my $b = $a; $a++; ok ($a == 1); ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_lshift
+
+{ my $a = mpq(7) << 1; ok ($a == 14); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_lshifteq
+
+{ my $a = mpq(7); $a <<= 1; ok ($a == 14); }
+{ my $a = mpq(7); my $b = $a; $a <<= 1; ok ($a == 14); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_mul
+
+ok (mpq(2) * 3 == 6);
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_muleq
+
+{ my $a = mpq(7); $a *= 3;  ok ($a == 21); }
+{ my $a = mpq(7); my $b = $a; $a *= 3;  ok ($a == 21); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_neg
+
+ok (- mpq(0) == 0);
+ok (- mpq(123) == -123);
+ok (- mpq(-123) == 123);
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_not
+
+if (not mpq(0))   { ok (1); } else { ok (0); }
+if (not mpq(123)) { ok (0); } else { ok (1); }
+
+ok ((! mpq(0)) == 1);
+ok ((! mpq(123)) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_pow
+
+ok (mpq(0) ** 1 == 0);
+ok (mpq(1) ** 1 == 1);
+ok (mpq(2) ** 0 == 1);
+ok (mpq(2) ** 1 == 2);
+ok (mpq(2) ** 2 == 4);
+ok (mpq(2) ** 3 == 8);
+ok (mpq(2) ** 4 == 16);
+
+ok (mpq(0) ** mpq(1) == 0);
+ok (mpq(1) ** mpq(1) == 1);
+ok (mpq(2) ** mpq(0) == 1);
+ok (mpq(2) ** mpq(1) == 2);
+ok (mpq(2) ** mpq(2) == 4);
+ok (mpq(2) ** mpq(3) == 8);
+ok (mpq(2) ** mpq(4) == 16);
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_poweq
+
+{ my $a = mpq(3); $a **= 4; ok ($a == 81); }
+{ my $a = mpq(3); my $b = $a; $a **= 4; ok ($a == 81); ok ($b == 3); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_rshift
+
+{ my $a = mpq(32) >> 1; ok ($a == 16); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_rshifteq
+
+{ my $a = mpq(32); $a >>= 1; ok ($a == 16); }
+{ my $a = mpq(32); my $b = $a; $a >>= 1; ok ($a == 16); ok ($b == 32); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_spaceship
+
+ok (mpq(0) < 1);
+ok (mpq(0) > -1);
+
+ok (mpq(0) != 1);
+ok (mpq(0) != -1);
+ok (mpq(1) != 0);
+ok (mpq(1) != -1);
+ok (mpq(-1) != 0);
+ok (mpq(-1) != 1);
+
+ok (mpq(3,2) > 1);
+ok (mpq(3,2) < 2);
+
+ok (mpq(0) < 1.0);
+ok (mpq(0) < '1');
+ok (mpq(0) < substr('-1',1,1));
+ok (mpq(0) < mpz(1));
+ok (mpq(0) < mpq(1));
+ok (mpq(0) < mpf(1));
+ok (mpq(0) < $uv_max);
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_string
+
+{ my $x = mpq(0);    ok("$x" eq "0"); }
+{ my $x = mpq(123);  ok("$x" eq "123"); }
+{ my $x = mpq(-123); ok("$x" eq "-123"); }
+
+{ my $q = mpq(5,7);  ok("$q" eq "5/7"); }
+{ my $q = mpq(-5,7); ok("$q" eq "-5/7"); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_sub
+
+ok (mpq(0) - 1 == -1);
+ok (mpq(1) - 1 == 0);
+ok (1 - mpq(0) == 1);
+ok (1 - mpq(1) == 0);
+
+ok (mpq(1,2)-mpq(1,3) == mpq(1,6));
+ok (mpq(1,2)-mpq(-1,3) == mpq(5,6));
+ok (mpq(-1,2)-mpq(1,3) == mpq(-5,6));
+ok (mpq(-1,2)-mpq(-1,3) == mpq(-1,6));
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::overload_subeq
+
+{ my $a = mpq(7); $a -= 1; ok ($a == 6); }
+{ my $a = mpq(7); my $b = $a; $a -= 1; ok ($a == 6); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::canonicalize
+
+{ my $q = mpq(21,15); canonicalize($q);
+  ok (num($q) == 7);
+  ok (den($q) == 5);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::den
+
+{ my $q = mpq(5,9); ok (den($q) == 9); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpq::num
+
+{ my $q = mpq(5,9); ok (num($q) == 5); }
+
+
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpf
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::new
+
+ok (mpf(0) == 0);
+ok (mpf('0') == 0);
+ok (mpf(substr('101',1,1)) == 0);
+ok (mpf(0.0) == 0);
+ok (mpf(mpz(0)) == 0);
+ok (mpf(mpq(0)) == 0);
+ok (mpf(mpf(0)) == 0);
+
+{ tie my $t, 'Mytie', 0; ok (mpf($t) == 0); }
+{ tie my $t, 'Mytie', '0'; ok (mpf($t) == 0); }
+{ tie my $t, 'Mytie', substr('101',1,1); ok (mpf($t) == 0); }
+{ tie my $t, 'Mytie', 0.0; ok (mpf($t) == 0); }
+{ tie my $t, 'Mytie', mpz(0); ok (mpf($t) == 0); }
+{ tie my $t, 'Mytie', mpq(0); ok (mpf($t) == 0); }
+{ tie my $t, 'Mytie', mpf(0); ok (mpf($t) == 0); }
+
+ok (mpf(-123) == -123);
+ok (mpf('-123') == -123);
+ok (mpf(substr('1-1231',1,4)) == -123);
+ok (mpf(-123.0) == -123);
+ok (mpf(mpz(-123)) == -123);
+ok (mpf(mpq(-123)) == -123);
+ok (mpf(mpf(-123)) == -123);
+
+{ tie my $t, 'Mytie', -123; ok (mpf($t) == -123); }
+{ tie my $t, 'Mytie', '-123'; ok (mpf($t) == -123); }
+{ tie my $t, 'Mytie', substr('1-1231',1,4); ok (mpf($t) == -123); }
+{ tie my $t, 'Mytie', -123.0; ok (mpf($t) == -123); }
+{ tie my $t, 'Mytie', mpz(-123); ok (mpf($t) == -123); }
+{ tie my $t, 'Mytie', mpq(-123); ok (mpf($t) == -123); }
+{ tie my $t, 'Mytie', mpf(-123); ok (mpf($t) == -123); }
+
+ok (mpf($ivnv_2p128) == $str_2p128);
+{ tie my $t, 'Mytie', $ivnv_2p128; ok (mpf($t) == $str_2p128); }
+
+ok (mpf(-1.5) == -1.5);
+ok (mpf(-1.0) == -1.0);
+ok (mpf(-0.5) == -0.5);
+ok (mpf(0) == 0);
+ok (mpf(0.5) == 0.5);
+ok (mpf(1.0) == 1.0);
+ok (mpf(1.5) == 1.5);
+
+ok (mpf("-1.5") == -1.5);
+ok (mpf("-1.0") == -1.0);
+ok (mpf("-0.5") == -0.5);
+ok (mpf("0") == 0);
+ok (mpf("0.5") == 0.5);
+ok (mpf("1.0") == 1.0);
+ok (mpf("1.5") == 1.5);
+
+ok (mpf($uv_max) > 0);
+ok (mpf($uv_max) == mpf($uv_max_str));
+{ tie my $t, 'Mytie', $uv_max; ok (mpf($t) > 0); }
+{ tie my $t, 'Mytie', $uv_max; ok (mpf($t) == mpf($uv_max_str)); }
+
+{ my $x = 123.5;
+  kill (0, $x);
+  ok (mpf($x) == 123.5);
+  tie my $t, 'Mytie', $x;
+  ok (mpf($t) == 123.5);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_abs
+
+ok (abs(mpf(0)) == 0);
+ok (abs(mpf(123)) == 123);
+ok (abs(mpf(-123)) == 123);
+
+{ my $x = mpf(-123); $x = abs($x); ok ($x == 123); }
+{ my $x = mpf(0);    $x = abs($x); ok ($x == 0);   }
+{ my $x = mpf(123);  $x = abs($x); ok ($x == 123); }
+
+{ tie my $t, 'Mytie', mpf(0); ok (abs($t) == 0); }
+{ tie my $t, 'Mytie', mpf(123); ok (abs($t) == 123); }
+{ tie my $t, 'Mytie', mpf(-123); ok (abs($t) == 123); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_add
+
+ok (mpf(0) + 1 == 1);
+ok (mpf(-1) + 1 == 0);
+ok (1 + mpf(0) == 1);
+ok (1 + mpf(-1) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_addeq
+
+{ my $a = mpf(7); $a += 1; ok ($a == 8); }
+{ my $a = mpf(7); my $b = $a; $a += 1; ok ($a == 8); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_bool
+
+if (mpf(0))   { ok (0); } else { ok (1); }
+if (mpf(123)) { ok (1); } else { ok (0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_dec
+
+{ my $a = mpf(0); ok ($a-- == 0); ok ($a == -1); }
+{ my $a = mpf(0); ok (--$a == -1); }
+
+{ my $a = mpf(0); my $b = $a; $a--; ok ($a == -1); ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_div
+
+ok (mpf(6) / 2 == 3);
+ok (mpf(-6) / 2 == -3);
+ok (mpf(6) / -2 == -3);
+ok (mpf(-6) / -2 == 3);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_diveq
+
+{ my $a = mpf(21); $a /= 3; ok ($a == 7); }
+{ my $a = mpf(21); my $b = $a; $a /= 3; ok ($a == 7); ok ($b == 21); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_eq
+
+{ my $a = mpf(0);
+  my $b = $a;
+  $a = mpf(1);
+  ok ($a == 1);
+  ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_inc
+
+{ my $a = mpf(0); ok ($a++ == 0); ok ($a == 1); }
+{ my $a = mpf(0); ok (++$a == 1); }
+
+{ my $a = mpf(0); my $b = $a; $a++; ok ($a == 1); ok ($b == 0); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_lshift
+
+{ my $a = mpf(7) << 1; ok ($a == 14); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_lshifteq
+
+{ my $a = mpf(7); $a <<= 1; ok ($a == 14); }
+{ my $a = mpf(7); my $b = $a; $a <<= 1; ok ($a == 14); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_mul
+
+ok (mpf(2) * 3 == 6);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_muleq
+
+{ my $a = mpf(7); $a *= 3;  ok ($a == 21); }
+{ my $a = mpf(7); my $b = $a; $a *= 3;  ok ($a == 21); ok ($b == 7); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_neg
+
+ok (- mpf(0) == 0);
+ok (- mpf(123) == -123);
+ok (- mpf(-123) == 123);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_not
+
+if (not mpf(0))   { ok (1); } else { ok (0); }
+if (not mpf(123)) { ok (0); } else { ok (1); }
+
+ok ((! mpf(0)) == 1);
+ok ((! mpf(123)) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_pow
+
+ok (mpf(0) ** 1 == 0);
+ok (mpf(1) ** 1 == 1);
+ok (mpf(2) ** 0 == 1);
+ok (mpf(2) ** 1 == 2);
+ok (mpf(2) ** 2 == 4);
+ok (mpf(2) ** 3 == 8);
+ok (mpf(2) ** 4 == 16);
+
+ok (mpf(0) ** mpf(1) == 0);
+ok (mpf(1) ** mpf(1) == 1);
+ok (mpf(2) ** mpf(0) == 1);
+ok (mpf(2) ** mpf(1) == 2);
+ok (mpf(2) ** mpf(2) == 4);
+ok (mpf(2) ** mpf(3) == 8);
+ok (mpf(2) ** mpf(4) == 16);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_poweq
+
+{ my $a = mpf(3); $a **= 4; ok ($a == 81); }
+{ my $a = mpf(3); my $b = $a; $a **= 4; ok ($a == 81); ok ($b == 3); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_rshift
+
+{ my $a = mpf(32) >> 1; ok ($a == 16); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_rshifteq
+
+{ my $a = mpf(32); $a >>= 1; ok ($a == 16); }
+{ my $a = mpf(32); my $b = $a; $a >>= 1; ok ($a == 16); ok ($b == 32); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_sqrt
+
+ok (sqrt(mpf(0)) == 0);
+ok (sqrt(mpf(1)) == 1);
+ok (sqrt(mpf(4)) == 2);
+ok (sqrt(mpf(81)) == 9);
+
+ok (sqrt(mpf(0.25)) == 0.5);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_spaceship
+
+ok (mpf(0) < 1);
+ok (mpf(0) > -1);
+
+ok (mpf(0) != 1);
+ok (mpf(0) != -1);
+ok (mpf(1) != 0);
+ok (mpf(1) != -1);
+ok (mpf(-1) != 0);
+ok (mpf(-1) != 1);
+
+ok (mpf(0) < 1.0);
+ok (mpf(0) < '1');
+ok (mpf(0) < substr('-1',1,1));
+ok (mpf(0) < mpz(1));
+ok (mpf(0) < mpq(1));
+ok (mpf(0) < mpf(1));
+ok (mpf(0) < $uv_max);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_string
+
+{ my $x = mpf(0);    ok ("$x" eq "0"); }
+{ my $x = mpf(123);  ok ("$x" eq "123"); }
+{ my $x = mpf(-123); ok ("$x" eq "-123"); }
+
+{ my $f = mpf(0.25);            ok ("$f" eq "0.25"); }
+{ my $f = mpf(-0.25);           ok ("$f" eq "-0.25"); }
+{ my $f = mpf(1.25);            ok ("$f" eq "1.25"); }
+{ my $f = mpf(-1.25);           ok ("$f" eq "-1.25"); }
+{ my $f = mpf(1000000);         ok ("$f" eq "1000000"); }
+{ my $f = mpf(-1000000); ok ("$f" eq "-1000000"); }
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_sub
+
+ok (mpf(0) - 1 == -1);
+ok (mpf(1) - 1 == 0);
+ok (1 - mpf(0) == 1);
+ok (1 - mpf(1) == 0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::overload_subeq
+
+{ my $a = mpf(7); $a -= 1; ok ($a == 6); }
+{ my $a = mpf(7); my $b = $a; $a -= 1; ok ($a == 6); ok ($b == 7); }
+
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::ceil
+
+ok (ceil (mpf(-7.5)) == -7.0);
+ok (ceil (mpf(7.5)) == 8.0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::floor
+
+ok (floor(mpf(-7.5)) == -8.0);
+ok (floor(mpf(7.5)) == 7.0);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::mpf_eq
+
+{ my $old_prec = get_default_prec();
+  set_default_prec(128);
+
+  ok (  mpf_eq (mpz("0x10000000000000001"), mpz("0x10000000000000002"), 1));
+  ok (! mpf_eq (mpz("0x11"), mpz("0x12"), 128));
+
+  set_default_prec($old_prec);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::get_default_prec
+
+get_default_prec();
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::get_prec
+
+{ my $x = mpf(1.0, 512);
+  ok (get_prec ($x) == 512);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::reldiff
+
+ok (reldiff (2,4) == 1);
+ok (reldiff (4,2) == 0.5);
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::set_default_prec
+
+{ my $old_prec = get_default_prec();
+
+  set_default_prec(512);
+  ok (get_default_prec () == 512);
+
+  set_default_prec($old_prec);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::set_prec
+
+{ my $x = mpf(1.0, 512);
+  my $y = $x;
+  set_prec ($x, 1024);
+  ok (get_prec ($x) == 1024);
+  ok (get_prec ($y) == 512);
+}
+
+#------------------------------------------------------------------------------
+# GMP::Mpf::trunc
+
+ok (trunc(mpf(-7.5)) == -7.0);
+ok (trunc(mpf(7.5)) == 7.0);
+
+
+
+#------------------------------------------------------------------------------
+# GMP::Rand
+
+#------------------------------------------------------------------------------
+# GMP::Rand::new
+
+{ my $r = randstate();                          ok (defined $r); }
+{ my $r = randstate('lc_2exp', 1, 2, 3);        ok (defined $r); }
+{ my $r = randstate('lc_2exp_size', 64);        ok (defined $r); }
+{ my $r = randstate('lc_2exp_size', 999999999); ok (! defined $r); }
+{ my $r = randstate('mt');                      ok (defined $r); }
+
+{ # copying a randstate results in same sequence
+  my $r1 = randstate('lc_2exp_size', 64);
+  $r1->seed(123);
+  my $r2 = randstate($r1);
+  for (1 .. 20) {
+    my $z1 = mpz_urandomb($r1, 20);
+    my $z2 = mpz_urandomb($r2, 20);
+    ok ($z1 == $z2);
+  }
+}
+
+#------------------------------------------------------------------------------
+# GMP::Rand::seed
+
+{ my $r = randstate();
+  $r->seed(123);
+  $r->seed(time());
+}
+
+#------------------------------------------------------------------------------
+# GMP::Rand::mpf_urandomb
+
+{ my $r = randstate();
+  my $f = mpf_urandomb($r,1024);
+  ok (UNIVERSAL::isa($f,"GMP::Mpf")); }
+
+#------------------------------------------------------------------------------
+# GMP::Rand::mpz_urandomb
+
+{ my $r = randstate();
+  my $z = mpz_urandomb($r, 1024);
+  ok (UNIVERSAL::isa($z,"GMP::Mpz")); }
+
+#------------------------------------------------------------------------------
+# GMP::Rand::mpz_rrandomb
+
+{ my $r = randstate();
+  my $z = mpz_rrandomb($r, 1024);
+  ok (UNIVERSAL::isa($z,"GMP::Mpz")); }
+
+#------------------------------------------------------------------------------
+# GMP::Rand::mpz_urandomm
+
+{ my $r = randstate();
+  my $z = mpz_urandomm($r, mpz(3)**100);
+  ok (UNIVERSAL::isa($z,"GMP::Mpz")); }
+
+#------------------------------------------------------------------------------
+# GMP::Rand::mpz_urandomb_ui
+
+{ my $r = randstate();
+  foreach (1 .. 20) {
+    my $u = gmp_urandomb_ui($r,8);
+    ok ($u >= 0);
+    ok ($u < 256);
+  }
+}
+
+#------------------------------------------------------------------------------
+# GMP::Rand::mpz_urandomm_ui
+
+{ my $r = randstate();
+  foreach (1 .. 20) {
+    my $u = gmp_urandomm_ui($r,8);
+    ok ($u >= 0);
+    ok ($u < 8);
+  }
+}
+
+
+
+
+#------------------------------------------------------------------------------
+# GMP module
+
+#------------------------------------------------------------------------------
+# GMP::fits_slong_p
+
+ok (GMP::fits_slong_p(0));
+
+# in perl 5.005 uv_max is only 32-bits on a 64-bit system, so won't exceed a
+# long
+# ok (! GMP::fits_slong_p($uv_max));
+
+ok (GMP::fits_slong_p(0.0));
+
+ok (GMP::fits_slong_p('0'));
+
+ok (GMP::fits_slong_p(substr('999999999999999999999999999999',1,1)));
+
+ok (! mpz("-9999999999999999999999999999999999999999999")->fits_slong_p());
+ok (  mpz(-123)->fits_slong_p());
+ok (  mpz(0)->fits_slong_p());
+ok (  mpz(123)->fits_slong_p());
+ok (! mpz("9999999999999999999999999999999999999999999")->fits_slong_p());
+
+ok (! mpq("-9999999999999999999999999999999999999999999")->fits_slong_p());
+ok (  mpq(-123)->fits_slong_p());
+ok (  mpq(0)->fits_slong_p());
+ok (  mpq(123)->fits_slong_p());
+ok (! mpq("9999999999999999999999999999999999999999999")->fits_slong_p());
+
+ok (! mpf("-9999999999999999999999999999999999999999999")->fits_slong_p());
+ok (  mpf(-123)->fits_slong_p());
+ok (  mpf(0)->fits_slong_p());
+ok (  mpf(123)->fits_slong_p());
+ok (! mpf("9999999999999999999999999999999999999999999")->fits_slong_p());
+
+#------------------------------------------------------------------------------
+# GMP::get_d
+
+ok (GMP::get_d(123) == 123.0);
+
+ok (GMP::get_d($uv_max) > 0);
+
+ok (GMP::get_d(123.0) == 123.0);
+
+ok (GMP::get_d('123') == 123.0);
+
+ok (GMP::get_d(mpz(123)) == 123.0);
+
+ok (GMP::get_d(mpq(123)) == 123.0);
+
+ok (GMP::get_d(mpf(123)) == 123.0);
+
+#------------------------------------------------------------------------------
+# GMP::get_d_2exp
+
+{ my ($dbl, $exp) = get_d_2exp (0);
+  ok ($dbl == 0); ok ($exp == 0); }
+{ my ($dbl, $exp) = get_d_2exp (1);
+  ok ($dbl == 0.5); ok ($exp == 1); }
+
+{ my ($dbl, $exp) = get_d_2exp ($uv_max);
+  ok ($dbl > 0.0); ok ($exp > 0); }
+
+{ my ($dbl, $exp) = get_d_2exp (0.5);
+  ok ($dbl == 0.5); ok ($exp == 0); }
+{ my ($dbl, $exp) = get_d_2exp (0.25);
+  ok ($dbl == 0.5); ok ($exp == -1); }
+
+{ my ($dbl, $exp) = get_d_2exp ("1.0");
+  ok ($dbl == 0.5); ok ($exp == 1); }
+
+{ my ($dbl, $exp) = get_d_2exp (mpz ("256"));
+  ok ($dbl == 0.5); ok ($exp == 9); }
+
+{ my ($dbl, $exp) = get_d_2exp (mpq ("1/16"));
+  ok ($dbl == 0.5); ok ($exp == -3); }
+
+{ my ($dbl, $exp) = get_d_2exp (mpf ("1.5"));
+  ok ($dbl == 0.75); ok ($exp == 1); }
+{ my ($dbl, $exp) = get_d_2exp (mpf ("3.0"));
+  ok ($dbl == 0.75); ok ($exp == 2); }
+
+#------------------------------------------------------------------------------
+# GMP::get_str
+
+ok (get_str(-123) eq '-123');
+ok (get_str('-123') eq '-123');
+ok (get_str(substr('x-123x',1,4)) eq '-123');
+ok (get_str(mpz(-123)) eq '-123');
+ok (get_str(mpq(-123)) eq '-123');
+
+ok (get_str(-123,10) eq '-123');
+ok (get_str('-123',10) eq '-123');
+ok (get_str(substr('x-123x',1,4),10) eq '-123');
+ok (get_str(mpz(-123),10) eq '-123');
+ok (get_str(mpq(-123),10) eq '-123');
+
+ok (get_str(-123,16) eq '-7b');
+ok (get_str('-123',16) eq '-7b');
+ok (get_str(substr('x-123x',1,4),16) eq '-7b');
+ok (get_str(mpz(-123),16) eq '-7b');
+ok (get_str(mpq(-123),16) eq '-7b');
+
+ok (get_str(-123,-16) eq '-7B');
+ok (get_str('-123',-16) eq '-7B');
+ok (get_str(substr('x-123x',1,4),-16) eq '-7B');
+ok (get_str(mpz(-123),-16) eq '-7B');
+ok (get_str(mpq(-123),-16) eq '-7B');
+
+# is a float in past versions of perl without UV type
+{ my ($str, $exp) = get_str($uv_max);
+  ok ($str eq $uv_max_str); }
+
+ok (get_str(mpq(5/8)) eq "5/8");
+ok (get_str(mpq(-5/8)) eq "-5/8");
+ok (get_str(mpq(255/256),16) eq "ff/100");
+ok (get_str(mpq(255/256),-16) eq "FF/100");
+ok (get_str(mpq(-255/256),16) eq "-ff/100");
+ok (get_str(mpq(-255/256),-16) eq "-FF/100");
+
+{ my ($s,$e) = get_str(1.5, 10);      ok ($s eq '15'); ok ($e == 1); }
+{ my ($s,$e) = get_str(mpf(1.5), 10); ok ($s eq '15'); ok ($e == 1); }
+
+{ my ($s,$e) = get_str(-1.5, 10);      ok ($s eq '-15'); ok ($e == 1); }
+{ my ($s,$e) = get_str(mpf(-1.5), 10); ok ($s eq '-15'); ok ($e == 1); }
+
+{ my ($s,$e) = get_str(1.5, 16);      ok ($s eq '18'); ok ($e == 1); }
+{ my ($s,$e) = get_str(mpf(1.5), 16); ok ($s eq '18'); ok ($e == 1); }
+
+{ my ($s,$e) = get_str(-1.5, 16);      ok ($s eq '-18'); ok ($e == 1); }
+{ my ($s,$e) = get_str(mpf(-1.5), 16); ok ($s eq '-18'); ok ($e == 1); }
+
+{ my ($s,$e) = get_str(65536.0, 16);      ok ($s eq '1'); ok ($e == 5); }
+{ my ($s,$e) = get_str(mpf(65536.0), 16); ok ($s eq '1'); ok ($e == 5); }
+
+{ my ($s,$e) = get_str(1.625, 16);      ok ($s eq '1a'); ok ($e == 1); }
+{ my ($s,$e) = get_str(mpf(1.625), 16); ok ($s eq '1a'); ok ($e == 1); }
+
+{ my ($s,$e) = get_str(1.625, -16);      ok ($s eq '1A'); ok ($e == 1); }
+{ my ($s,$e) = get_str(mpf(1.625), -16); ok ($s eq '1A'); ok ($e == 1); }
+
+{ my ($s, $e) = get_str(255.0,16,0);      ok ($s eq "ff"); ok ($e == 2); }
+{ my ($s, $e) = get_str(mpf(255.0),16,0); ok ($s eq "ff"); ok ($e == 2); }
+
+{ my ($s, $e) = get_str(255.0,-16,0);      ok ($s eq "FF"); ok ($e == 2); }
+{ my ($s, $e) = get_str(mpf(255.0),-16,0); ok ($s eq "FF"); ok ($e == 2); }
+
+#------------------------------------------------------------------------------
+# GMP::get_si
+
+ok (GMP::get_si(123) == 123.0);
+
+# better not assume anything about the relatives sizes of long and UV
+ok (GMP::get_si($uv_max) != 0);
+
+ok (GMP::get_si(123.0) == 123.0);
+
+ok (GMP::get_si('123') == 123.0);
+
+ok (GMP::get_si(mpz(123)) == 123.0);
+
+ok (GMP::get_si(mpq(123)) == 123.0);
+
+ok (GMP::get_si(mpf(123)) == 123.0);
+
+#------------------------------------------------------------------------------
+# GMP::integer_p
+
+ok (  GMP::integer_p (0));
+ok (  GMP::integer_p (123));
+ok (  GMP::integer_p (-123));
+
+ok (  GMP::integer_p ($uv_max));
+
+ok (  GMP::integer_p (0.0));
+ok (  GMP::integer_p (123.0));
+ok (  GMP::integer_p (-123.0));
+ok (! GMP::integer_p (0.5));
+ok (! GMP::integer_p (123.5));
+ok (! GMP::integer_p (-123.5));
+
+ok (  GMP::integer_p ('0'));
+ok (  GMP::integer_p ('123'));
+ok (  GMP::integer_p ('-123'));
+ok (! GMP::integer_p ('0.5'));
+ok (! GMP::integer_p ('123.5'));
+ok (! GMP::integer_p ('-123.5'));
+ok (! GMP::integer_p ('5/8'));
+
+ok (  GMP::integer_p (mpz(1)));
+
+ok (  GMP::integer_p (mpq(1)));
+ok (! GMP::integer_p (mpq(1,2)));
+
+ok (  GMP::integer_p (mpf(1.0)));
+ok (! GMP::integer_p (mpf(1.5)));
+
+#------------------------------------------------------------------------------
+# GMP::odd_p
+
+ok (! odd_p(0));
+ok (  odd_p(1));
+ok (! odd_p(2));
+
+ok (  odd_p($uv_max));
+
+ok (  odd_p(mpz(-3)));
+ok (! odd_p(mpz(-2)));
+ok (  odd_p(mpz(-1)));
+ok (! odd_p(mpz(0)));
+ok (  odd_p(mpz(1)));
+ok (! odd_p(mpz(2)));
+ok (  odd_p(mpz(3)));
+
+#------------------------------------------------------------------------------
+# GMP::printf
+
+GMP::printf ("hello world\n");
+
+sub via_printf {
+  my $s;
+  open TEMP, ">test.tmp" or die;
+  GMP::printf TEMP @_;
+  close TEMP or die;
+  open TEMP, "<test.tmp" or die;
+  read (TEMP, $s, 1024);
+  close TEMP or die;
+  unlink 'test.tmp';
+  return $s;
+}
+
+ok (sprintf ("%d", mpz(123)) eq '123');
+ok (sprintf ("%d %d %d", 456, mpz(123), 789) eq '456 123 789');
+ok (sprintf ("%d", mpq(15,16)) eq '15/16');
+ok (sprintf ("%f", mpf(1.5)) eq '1.500000');
+ok (sprintf ("%.2f", mpf(1.5)) eq '1.50');
+
+ok (sprintf ("%*d", 6, 123) eq '   123');
+ok (sprintf ("%*d", 6, mpz(123))  eq '   123');
+ok (sprintf ("%*d", 6, mpq(15,16))  eq ' 15/16');
+
+ok (sprintf ("%x", 123) eq '7b');
+ok (sprintf ("%x", mpz(123))  eq '7b');
+ok (sprintf ("%X", 123) eq '7B');
+ok (sprintf ("%X", mpz(123))  eq '7B');
+ok (sprintf ("%#x", 123) eq '0x7b');
+ok (sprintf ("%#x", mpz(123))  eq '0x7b');
+ok (sprintf ("%#X", 123) eq '0X7B');
+ok (sprintf ("%#X", mpz(123))  eq '0X7B');
+
+ok (sprintf ("%x", mpq(15,16))  eq 'f/10');
+ok (sprintf ("%X", mpq(15,16))  eq 'F/10');
+ok (sprintf ("%#x", mpq(15,16))  eq '0xf/0x10');
+ok (sprintf ("%#X", mpq(15,16))  eq '0XF/0X10');
+
+ok (sprintf ("%*.*f", 10, 3, 1.25) eq '     1.250');
+ok (sprintf ("%*.*f", 10, 3, mpf(1.5))   eq '     1.500');
+
+ok (via_printf ("%d", mpz(123)) eq '123');
+ok (via_printf ("%d %d %d", 456, mpz(123), 789) eq '456 123 789');
+ok (via_printf ("%d", mpq(15,16)) eq '15/16');
+ok (via_printf ("%f", mpf(1.5)) eq '1.500000');
+ok (via_printf ("%.2f", mpf(1.5)) eq '1.50');
+
+ok (via_printf ("%*d", 6, 123) eq '   123');
+ok (via_printf ("%*d", 6, mpz(123))  eq '   123');
+ok (via_printf ("%*d", 6, mpq(15,16))  eq ' 15/16');
+
+ok (via_printf ("%x", 123) eq '7b');
+ok (via_printf ("%x", mpz(123))  eq '7b');
+ok (via_printf ("%X", 123) eq '7B');
+ok (via_printf ("%X", mpz(123))  eq '7B');
+ok (via_printf ("%#x", 123) eq '0x7b');
+ok (via_printf ("%#x", mpz(123))  eq '0x7b');
+ok (via_printf ("%#X", 123) eq '0X7B');
+ok (via_printf ("%#X", mpz(123))  eq '0X7B');
+
+ok (via_printf ("%x", mpq(15,16))  eq 'f/10');
+ok (via_printf ("%X", mpq(15,16))  eq 'F/10');
+ok (via_printf ("%#x", mpq(15,16))  eq '0xf/0x10');
+ok (via_printf ("%#X", mpq(15,16))  eq '0XF/0X10');
+
+ok (via_printf ("%*.*f", 10, 3, 1.25) eq '     1.250');
+ok (via_printf ("%*.*f", 10, 3, mpf(1.5))   eq '     1.500');
+
+#------------------------------------------------------------------------------
+# GMP::sgn
+
+ok (sgn(-123) == -1);
+ok (sgn(0)    == 0);
+ok (sgn(123)  == 1);
+
+ok (sgn($uv_max) == 1);
+
+ok (sgn(-123.0) == -1);
+ok (sgn(0.0)    == 0);
+ok (sgn(123.0)  == 1);
+
+ok (sgn('-123') == -1);
+ok (sgn('0')    == 0);
+ok (sgn('123')  == 1);
+ok (sgn('-123.0') == -1);
+ok (sgn('0.0')    == 0);
+ok (sgn('123.0')  == 1);
+
+ok (sgn(substr('x-123x',1,4)) == -1);
+ok (sgn(substr('x0x',1,1))    == 0);
+ok (sgn(substr('x123x',1,3))  == 1);
+
+ok (mpz(-123)->sgn() == -1);
+ok (mpz(0)   ->sgn() == 0);
+ok (mpz(123) ->sgn() == 1);
+
+ok (mpq(-123)->sgn() == -1);
+ok (mpq(0)   ->sgn() == 0);
+ok (mpq(123) ->sgn() == 1);
+
+ok (mpf(-123)->sgn() == -1);
+ok (mpf(0)   ->sgn() == 0);
+ok (mpf(123) ->sgn() == 1);
+
+
+
+#------------------------------------------------------------------------------
+# overloaded constants
+
+if ($] > 5.00503) {
+  if (! do 'test2.pl') {
+    die "Cannot run test2.pl\n";
+  }
+}
+
+
+
+
+#------------------------------------------------------------------------------
+# $# stuff
+#
+# For some reason "local $#" doesn't leave $# back at its default undefined
+# state when exiting the block.
+
+{ local $# = 'hi %.0f there';
+  my $f = mpf(123);
+  ok ("$f" eq 'hi 123 there'); }
+
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/demos/perl/test2.pl b/demos/perl/test2.pl

new file mode 100644 (file)

index 0000000..645d3cf
--- /dev/null
+++ b/demos/perl/test2.pl
@@ -0,0 +1,64 @@
+# GMP perl module tests (part 2)
+
+# Copyright 2001 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# The following uses of :constants seem to provoke segvs in perl 5.005_03,
+# so they're kept separate file to be run only on suitable perl versions.
+
+
+use GMP::Mpz qw(:constants);
+{
+  my $a = 123;
+  ok (UNIVERSAL::isa ($a, "GMP::Mpz"));
+}
+use GMP::Mpz qw(:noconstants);
+
+use GMP::Mpq qw(:constants);
+{
+  my $a = 123;
+  ok (UNIVERSAL::isa ($a, "GMP::Mpq"));
+}
+use GMP::Mpq qw(:noconstants);
+
+use GMP::Mpf qw(:constants);
+{
+  my $a = 123;
+  ok (UNIVERSAL::isa ($a, "GMP::Mpf"));
+}
+use GMP::Mpf qw(:noconstants);
+
+
+# compiled constants unchanged by clrbit etc when re-executed
+foreach (0, 1, 2) {
+  use GMP::Mpz qw(:constants);
+  my $a = 15;
+  my $b = 6;
+  use GMP::Mpz qw(:noconstants);
+  clrbit ($a, 0);
+  ok ($a == 14);
+  setbit ($b, 0);
+  ok ($b == 7);
+}
+
+1;
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/demos/perl/typemap b/demos/perl/typemap

new file mode 100644 (file)

index 0000000..06dc96d
--- /dev/null
+++ b/demos/perl/typemap
@@ -0,0 +1,97 @@
+# GMP module external subroutine type mappings.
+
+# Copyright 2001, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+TYPEMAP
+const_string           T_PV
+const_string_assume    CONST_STRING_ASSUME
+mpz                    MPZ
+mpq                    MPQ
+mpf                    MPF
+mpz_assume             MPZ_ASSUME
+mpq_assume             MPQ_ASSUME
+mpf_assume             MPF_ASSUME
+mpz_coerce             MPZ_COERCE
+mpq_coerce             MPQ_COERCE
+mpf_coerce_st0         MPF_COERCE_ST0
+mpf_coerce_def         MPF_COERCE_DEF
+randstate              RANDSTATE
+ulong_coerce           ULONG_COERCE
+malloced_string                MALLOCED_STRING
+order_noswap           ORDER_NOSWAP
+dummy                  DUMMY
+# perl 5.005 doesn't have UV in its standard typemap, so use this instead
+gmp_UV                 GMP_UV
+
+
+INPUT
+MPZ
+       class_or_croak ($arg, mpz_class); $var = SvMPZ($arg);
+MPQ
+       class_or_croak ($arg, mpq_class); $var = SvMPQ($arg);
+MPF
+       class_or_croak ($arg, mpf_class); $var = SvMPF($arg);
+MPZ_ASSUME
+        MPZ_ASSUME ($var, $arg)
+MPQ_ASSUME
+        MPQ_ASSUME ($var, $arg)
+MPF_ASSUME
+        MPF_ASSUME ($var, $arg)
+MPZ_COERCE
+       $var = coerce_mpz (tmp_mpz_${(my $stnum=$arg)=~s/[^0-9]//g;\$stnum}, $arg)
+MPQ_COERCE
+       $var = coerce_mpq (tmp_mpq_${(my $stnum=$arg)=~s/[^0-9]//g;\$stnum}, $arg)
+MPF_COERCE_ST0
+        /* precision follows ST(0) */
+        assert (sv_derived_from (ST(0), mpf_class));
+       $var = coerce_mpf (tmp_mpf_${(my $stnum=$arg)=~s/[^0-9]//g;\$stnum},
+                          $arg, mpf_get_prec (SvMPF(ST(0))))
+MPF_COERCE_DEF
+        /* default precision used */
+       $var = coerce_mpf (tmp_mpf_${(my $stnum=$arg)=~s/[^0-9]//g;\$stnum},
+                          $arg, mpf_get_default_prec())
+RANDSTATE
+       class_or_croak ($arg, rand_class); $var = SvRANDSTATE($arg);
+ULONG_COERCE
+       $var = coerce_ulong ($arg)
+ORDER_NOSWAP
+       assert ($arg != &PL_sv_yes);
+DUMMY
+       /* dummy $var */
+CONST_STRING_ASSUME
+        /* No need to check for SvPOKp and use SvPV, this mapping is
+           only used for overload_constant, which always gets literal
+           strings.  */
+       assert (SvPOK ($arg));
+       $var = SvPVX ($arg);
+
+
+OUTPUT
+MPZ
+    sv_bless (sv_setref_pv ($arg, NULL, $var), mpz_class_hv);
+MPQ
+    sv_bless (sv_setref_pv ($arg, NULL, $var), mpq_class_hv);
+MPF
+    sv_bless (sv_setref_pv ($arg, NULL, $var), mpf_class_hv);
+RANDSTATE
+    sv_setref_pv ($arg, rand_class, $var);
+MALLOCED_STRING
+    sv_usepvn_mg ($arg, $var, strlen($var));
+GMP_UV
+    sv_setuv ($arg, (UV) ($var));
diff --git a/demos/pexpr-config-h.in b/demos/pexpr-config-h.in

new file mode 100644 (file)

index 0000000..77f767f
--- /dev/null
+++ b/demos/pexpr-config-h.in
@@ -0,0 +1,45 @@
+/* Templates for pexpr program configuration.   -*- mode:c -*-
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Define if you have the <sys/resource.h> header file. */
+#define HAVE_SYS_RESOURCE_H  @HAVE_SYS_RESOURCE_H_01@
+
+/* Define if you have the `clock' function. */
+#define HAVE_CLOCK  @HAVE_CLOCK_01@
+
+/* Define if you have the `cputime' function. */
+#define HAVE_CPUTIME  @HAVE_CPUTIME_01@
+
+/* Define if you have the `getrusage' function. */
+#define HAVE_GETRUSAGE  @HAVE_GETRUSAGE_01@
+
+/* Define if you have the `gettimeofday' function. */
+#define HAVE_GETTIMEOFDAY  @HAVE_GETTIMEOFDAY_01@
+
+/* Define if you have the `sigaction' function. */
+#define HAVE_SIGACTION  @HAVE_SIGACTION_01@
+
+/* Define if you have the `sigaltstack' function. */
+#define HAVE_SIGALTSTACK  @HAVE_SIGALTSTACK_01@
+
+/* Define if you have the `sigstack' function. */
+#define HAVE_SIGSTACK  @HAVE_SIGSTACK_01@
+
+/* Define if the system has the type `stack_t'. */
+#define HAVE_STACK_T  @HAVE_STACK_T_01@
diff --git a/demos/pexpr.c b/demos/pexpr.c

new file mode 100644 (file)

index 0000000..ec4eecb
--- /dev/null
+++ b/demos/pexpr.c
@@ -0,0 +1,1378 @@
+/* Program for computing integer expressions using the GNU Multiple Precision
+   Arithmetic Library.
+
+Copyright 1997, 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* This expressions evaluator works by building an expression tree (using a
+   recursive descent parser) which is then evaluated.  The expression tree is
+   useful since we want to optimize certain expressions (like a^b % c).
+
+   Usage: pexpr [options] expr ...
+   (Assuming you called the executable `pexpr' of course.)
+
+   Command line options:
+
+   -b        print output in binary
+   -o        print output in octal
+   -d        print output in decimal (the default)
+   -x        print output in hexadecimal
+   -b<NUM>   print output in base NUM
+   -t        print timing information
+   -html     output html
+   -wml      output wml
+   -split    split long lines each 80th digit
+*/
+
+/* Define LIMIT_RESOURCE_USAGE if you want to make sure the program doesn't
+   use up extensive resources (cpu, memory).  Useful for the GMP demo on the
+   GMP web site, since we cannot load the server too much.  */
+
+#include "pexpr-config.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <ctype.h>
+
+#include <time.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+
+#include "gmp.h"
+
+/* SunOS 4 and HPUX 9 don't define a canonical SIGSTKSZ, use a default. */
+#ifndef SIGSTKSZ
+#define SIGSTKSZ  4096
+#endif
+
+
+#define TIME(t,func)                                                   \
+  do { int __t0, __tmp;                                                        \
+    __t0 = cputime ();                                                 \
+    {func;}                                                            \
+    __tmp = cputime () - __t0;                                         \
+    (t) = __tmp;                                                       \
+  } while (0)
+
+/* GMP version 1.x compatibility.  */
+#if ! (__GNU_MP_VERSION >= 2)
+typedef MP_INT __mpz_struct;
+typedef __mpz_struct mpz_t[1];
+typedef __mpz_struct *mpz_ptr;
+#define mpz_fdiv_q     mpz_div
+#define mpz_fdiv_r     mpz_mod
+#define mpz_tdiv_q_2exp        mpz_div_2exp
+#define mpz_sgn(Z) ((Z)->size < 0 ? -1 : (Z)->size > 0)
+#endif
+
+/* GMP version 2.0 compatibility.  */
+#if ! (__GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1)
+#define mpz_swap(a,b) \
+  do { __mpz_struct __t; __t = *a; *a = *b; *b = __t;} while (0)
+#endif
+
+jmp_buf errjmpbuf;
+
+enum op_t {NOP, LIT, NEG, NOT, PLUS, MINUS, MULT, DIV, MOD, REM, INVMOD, POW,
+          AND, IOR, XOR, SLL, SRA, POPCNT, HAMDIST, GCD, LCM, SQRT, ROOT, FAC,
+          LOG, LOG2, FERMAT, MERSENNE, FIBONACCI, RANDOM, NEXTPRIME, BINOM,
+          TIMING};
+
+/* Type for the expression tree.  */
+struct expr
+{
+  enum op_t op;
+  union
+  {
+    struct {struct expr *lhs, *rhs;} ops;
+    mpz_t val;
+  } operands;
+};
+
+typedef struct expr *expr_t;
+
+void cleanup_and_exit __GMP_PROTO ((int));
+
+char *skipspace __GMP_PROTO ((char *));
+void makeexp __GMP_PROTO ((expr_t *, enum op_t, expr_t, expr_t));
+void free_expr __GMP_PROTO ((expr_t));
+char *expr __GMP_PROTO ((char *, expr_t *));
+char *term __GMP_PROTO ((char *, expr_t *));
+char *power __GMP_PROTO ((char *, expr_t *));
+char *factor __GMP_PROTO ((char *, expr_t *));
+int match __GMP_PROTO ((char *, char *));
+int matchp __GMP_PROTO ((char *, char *));
+int cputime __GMP_PROTO ((void));
+
+void mpz_eval_expr __GMP_PROTO ((mpz_ptr, expr_t));
+void mpz_eval_mod_expr __GMP_PROTO ((mpz_ptr, expr_t, mpz_ptr));
+
+char *error;
+int flag_print = 1;
+int print_timing = 0;
+int flag_html = 0;
+int flag_wml = 0;
+int flag_splitup_output = 0;
+char *newline = "";
+gmp_randstate_t rstate;
+
+
+
+/* cputime() returns user CPU time measured in milliseconds.  */
+#if ! HAVE_CPUTIME
+#if HAVE_GETRUSAGE
+int
+cputime (void)
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#if HAVE_CLOCK
+int
+cputime (void)
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+int
+cputime (void)
+{
+  return 0;
+}
+#endif
+#endif
+#endif
+
+
+int
+stack_downwards_helper (char *xp)
+{
+  char  y;
+  return &y < xp;
+}
+int
+stack_downwards_p (void)
+{
+  char  x;
+  return stack_downwards_helper (&x);
+}
+
+
+void
+setup_error_handler (void)
+{
+#if HAVE_SIGACTION
+  struct sigaction act;
+  act.sa_handler = cleanup_and_exit;
+  sigemptyset (&(act.sa_mask));
+#define SIGNAL(sig)  sigaction (sig, &act, NULL)
+#else
+  struct { int sa_flags; } act;
+#define SIGNAL(sig)  signal (sig, cleanup_and_exit)
+#endif
+  act.sa_flags = 0;
+
+  /* Set up a stack for signal handling.  A typical cause of error is stack
+     overflow, and in such situation a signal can not be delivered on the
+     overflown stack.  */
+#if HAVE_SIGALTSTACK
+  {
+    /* AIX uses stack_t, MacOS uses struct sigaltstack, various other
+       systems have both. */
+#if HAVE_STACK_T
+    stack_t s;
+#else
+    struct sigaltstack s;
+#endif
+    s.ss_sp = malloc (SIGSTKSZ);
+    s.ss_size = SIGSTKSZ;
+    s.ss_flags = 0;
+    if (sigaltstack (&s, NULL) != 0)
+      perror("sigaltstack");
+    act.sa_flags = SA_ONSTACK;
+  }
+#else
+#if HAVE_SIGSTACK
+  {
+    struct sigstack s;
+    s.ss_sp = malloc (SIGSTKSZ);
+    if (stack_downwards_p ())
+      s.ss_sp += SIGSTKSZ;
+    s.ss_onstack = 0;
+    if (sigstack (&s, NULL) != 0)
+      perror("sigstack");
+    act.sa_flags = SA_ONSTACK;
+  }
+#else
+#endif
+#endif
+
+#ifdef LIMIT_RESOURCE_USAGE
+  {
+    struct rlimit limit;
+
+    limit.rlim_cur = limit.rlim_max = 0;
+    setrlimit (RLIMIT_CORE, &limit);
+
+    limit.rlim_cur = 3;
+    limit.rlim_max = 4;
+    setrlimit (RLIMIT_CPU, &limit);
+
+    limit.rlim_cur = limit.rlim_max = 16 * 1024 * 1024;
+    setrlimit (RLIMIT_DATA, &limit);
+
+    getrlimit (RLIMIT_STACK, &limit);
+    limit.rlim_cur = 4 * 1024 * 1024;
+    setrlimit (RLIMIT_STACK, &limit);
+
+    SIGNAL (SIGXCPU);
+  }
+#endif /* LIMIT_RESOURCE_USAGE */
+
+  SIGNAL (SIGILL);
+  SIGNAL (SIGSEGV);
+#ifdef SIGBUS /* not in mingw */
+  SIGNAL (SIGBUS);
+#endif
+  SIGNAL (SIGFPE);
+  SIGNAL (SIGABRT);
+}
+
+int
+main (int argc, char **argv)
+{
+  struct expr *e;
+  int i;
+  mpz_t r;
+  int errcode = 0;
+  char *str;
+  int base = 10;
+
+  setup_error_handler ();
+
+  gmp_randinit (rstate, GMP_RAND_ALG_LC, 128);
+
+  {
+#if HAVE_GETTIMEOFDAY
+    struct timeval tv;
+    gettimeofday (&tv, NULL);
+    gmp_randseed_ui (rstate, tv.tv_sec + tv.tv_usec);
+#else
+    time_t t;
+    time (&t);
+    gmp_randseed_ui (rstate, t);
+#endif
+  }
+
+  mpz_init (r);
+
+  while (argc > 1 && argv[1][0] == '-')
+    {
+      char *arg = argv[1];
+
+      if (arg[1] >= '0' && arg[1] <= '9')
+       break;
+
+      if (arg[1] == 't')
+       print_timing = 1;
+      else if (arg[1] == 'b' && arg[2] >= '0' && arg[2] <= '9')
+       {
+         base = atoi (arg + 2);
+         if (base < 2 || base > 62)
+           {
+             fprintf (stderr, "error: invalid output base\n");
+             exit (-1);
+           }
+       }
+      else if (arg[1] == 'b' && arg[2] == 0)
+       base = 2;
+      else if (arg[1] == 'x' && arg[2] == 0)
+       base = 16;
+      else if (arg[1] == 'X' && arg[2] == 0)
+       base = -16;
+      else if (arg[1] == 'o' && arg[2] == 0)
+       base = 8;
+      else if (arg[1] == 'd' && arg[2] == 0)
+       base = 10;
+      else if (arg[1] == 'v' && arg[2] == 0)
+       {
+         printf ("pexpr linked to gmp %s\n", __gmp_version);
+       }
+      else if (strcmp (arg, "-html") == 0)
+       {
+         flag_html = 1;
+         newline = "<br>";
+       }
+      else if (strcmp (arg, "-wml") == 0)
+       {
+         flag_wml = 1;
+         newline = "<br/>";
+       }
+      else if (strcmp (arg, "-split") == 0)
+       {
+         flag_splitup_output = 1;
+       }
+      else if (strcmp (arg, "-noprint") == 0)
+       {
+         flag_print = 0;
+       }
+      else
+       {
+         fprintf (stderr, "error: unknown option `%s'\n", arg);
+         exit (-1);
+       }
+      argv++;
+      argc--;
+    }
+
+  for (i = 1; i < argc; i++)
+    {
+      int s;
+      int jmpval;
+
+      /* Set up error handler for parsing expression.  */
+      jmpval = setjmp (errjmpbuf);
+      if (jmpval != 0)
+       {
+         fprintf (stderr, "error: %s%s\n", error, newline);
+         fprintf (stderr, "       %s%s\n", argv[i], newline);
+         if (! flag_html)
+           {
+             /* ??? Dunno how to align expression position with arrow in
+                HTML ??? */
+             fprintf (stderr, "       ");
+             for (s = jmpval - (long) argv[i]; --s >= 0; )
+               putc (' ', stderr);
+             fprintf (stderr, "^\n");
+           }
+
+         errcode |= 1;
+         continue;
+       }
+
+      str = expr (argv[i], &e);
+
+      if (str[0] != 0)
+       {
+         fprintf (stderr,
+                  "error: garbage where end of expression expected%s\n",
+                  newline);
+         fprintf (stderr, "       %s%s\n", argv[i], newline);
+         if (! flag_html)
+           {
+             /* ??? Dunno how to align expression position with arrow in
+                HTML ??? */
+             fprintf (stderr, "        ");
+             for (s = str - argv[i]; --s; )
+               putc (' ', stderr);
+             fprintf (stderr, "^\n");
+           }
+
+         errcode |= 1;
+         free_expr (e);
+         continue;
+       }
+
+      /* Set up error handler for evaluating expression.  */
+      if (setjmp (errjmpbuf))
+       {
+         fprintf (stderr, "error: %s%s\n", error, newline);
+         fprintf (stderr, "       %s%s\n", argv[i], newline);
+         if (! flag_html)
+           {
+             /* ??? Dunno how to align expression position with arrow in
+                HTML ??? */
+             fprintf (stderr, "       ");
+             for (s = str - argv[i]; --s >= 0; )
+               putc (' ', stderr);
+             fprintf (stderr, "^\n");
+           }
+
+         errcode |= 2;
+         continue;
+       }
+
+      if (print_timing)
+       {
+         int t;
+         TIME (t, mpz_eval_expr (r, e));
+         printf ("computation took %d ms%s\n", t, newline);
+       }
+      else
+       mpz_eval_expr (r, e);
+
+      if (flag_print)
+       {
+         size_t out_len;
+         char *tmp, *s;
+
+         out_len = mpz_sizeinbase (r, base >= 0 ? base : -base) + 2;
+#ifdef LIMIT_RESOURCE_USAGE
+         if (out_len > 100000)
+           {
+             printf ("result is about %ld digits, not printing it%s\n",
+                     (long) out_len - 3, newline);
+             exit (-2);
+           }
+#endif
+         tmp = malloc (out_len);
+
+         if (print_timing)
+           {
+             int t;
+             printf ("output conversion ");
+             TIME (t, mpz_get_str (tmp, base, r));
+             printf ("took %d ms%s\n", t, newline);
+           }
+         else
+           mpz_get_str (tmp, base, r);
+
+         out_len = strlen (tmp);
+         if (flag_splitup_output)
+           {
+             for (s = tmp; out_len > 80; s += 80)
+               {
+                 fwrite (s, 1, 80, stdout);
+                 printf ("%s\n", newline);
+                 out_len -= 80;
+               }
+
+             fwrite (s, 1, out_len, stdout);
+           }
+         else
+           {
+             fwrite (tmp, 1, out_len, stdout);
+           }
+
+         free (tmp);
+         printf ("%s\n", newline);
+       }
+      else
+       {
+         printf ("result is approximately %ld digits%s\n",
+                 (long) mpz_sizeinbase (r, base >= 0 ? base : -base),
+                 newline);
+       }
+
+      free_expr (e);
+    }
+
+  exit (errcode);
+}
+
+char *
+expr (char *str, expr_t *e)
+{
+  expr_t e2;
+
+  str = skipspace (str);
+  if (str[0] == '+')
+    {
+      str = term (str + 1, e);
+    }
+  else if (str[0] == '-')
+    {
+      str = term (str + 1, e);
+      makeexp (e, NEG, *e, NULL);
+    }
+  else if (str[0] == '~')
+    {
+      str = term (str + 1, e);
+      makeexp (e, NOT, *e, NULL);
+    }
+  else
+    {
+      str = term (str, e);
+    }
+
+  for (;;)
+    {
+      str = skipspace (str);
+      switch (str[0])
+       {
+       case 'p':
+         if (match ("plus", str))
+           {
+             str = term (str + 4, &e2);
+             makeexp (e, PLUS, *e, e2);
+           }
+         else
+           return str;
+         break;
+       case 'm':
+         if (match ("minus", str))
+           {
+             str = term (str + 5, &e2);
+             makeexp (e, MINUS, *e, e2);
+           }
+         else
+           return str;
+         break;
+       case '+':
+         str = term (str + 1, &e2);
+         makeexp (e, PLUS, *e, e2);
+         break;
+       case '-':
+         str = term (str + 1, &e2);
+         makeexp (e, MINUS, *e, e2);
+         break;
+       default:
+         return str;
+       }
+    }
+}
+
+char *
+term (char *str, expr_t *e)
+{
+  expr_t e2;
+
+  str = power (str, e);
+  for (;;)
+    {
+      str = skipspace (str);
+      switch (str[0])
+       {
+       case 'm':
+         if (match ("mul", str))
+           {
+             str = power (str + 3, &e2);
+             makeexp (e, MULT, *e, e2);
+             break;
+           }
+         if (match ("mod", str))
+           {
+             str = power (str + 3, &e2);
+             makeexp (e, MOD, *e, e2);
+             break;
+           }
+         return str;
+       case 'd':
+         if (match ("div", str))
+           {
+             str = power (str + 3, &e2);
+             makeexp (e, DIV, *e, e2);
+             break;
+           }
+         return str;
+       case 'r':
+         if (match ("rem", str))
+           {
+             str = power (str + 3, &e2);
+             makeexp (e, REM, *e, e2);
+             break;
+           }
+         return str;
+       case 'i':
+         if (match ("invmod", str))
+           {
+             str = power (str + 6, &e2);
+             makeexp (e, REM, *e, e2);
+             break;
+           }
+         return str;
+       case 't':
+         if (match ("times", str))
+           {
+             str = power (str + 5, &e2);
+             makeexp (e, MULT, *e, e2);
+             break;
+           }
+         if (match ("thru", str))
+           {
+             str = power (str + 4, &e2);
+             makeexp (e, DIV, *e, e2);
+             break;
+           }
+         if (match ("through", str))
+           {
+             str = power (str + 7, &e2);
+             makeexp (e, DIV, *e, e2);
+             break;
+           }
+         return str;
+       case '*':
+         str = power (str + 1, &e2);
+         makeexp (e, MULT, *e, e2);
+         break;
+       case '/':
+         str = power (str + 1, &e2);
+         makeexp (e, DIV, *e, e2);
+         break;
+       case '%':
+         str = power (str + 1, &e2);
+         makeexp (e, MOD, *e, e2);
+         break;
+       default:
+         return str;
+       }
+    }
+}
+
+char *
+power (char *str, expr_t *e)
+{
+  expr_t e2;
+
+  str = factor (str, e);
+  while (str[0] == '!')
+    {
+      str++;
+      makeexp (e, FAC, *e, NULL);
+    }
+  str = skipspace (str);
+  if (str[0] == '^')
+    {
+      str = power (str + 1, &e2);
+      makeexp (e, POW, *e, e2);
+    }
+  return str;
+}
+
+int
+match (char *s, char *str)
+{
+  char *ostr = str;
+  int i;
+
+  for (i = 0; s[i] != 0; i++)
+    {
+      if (str[i] != s[i])
+       return 0;
+    }
+  str = skipspace (str + i);
+  return str - ostr;
+}
+
+int
+matchp (char *s, char *str)
+{
+  char *ostr = str;
+  int i;
+
+  for (i = 0; s[i] != 0; i++)
+    {
+      if (str[i] != s[i])
+       return 0;
+    }
+  str = skipspace (str + i);
+  if (str[0] == '(')
+    return str - ostr + 1;
+  return 0;
+}
+
+struct functions
+{
+  char *spelling;
+  enum op_t op;
+  int arity; /* 1 or 2 means real arity; 0 means arbitrary.  */
+};
+
+struct functions fns[] =
+{
+  {"sqrt", SQRT, 1},
+#if __GNU_MP_VERSION >= 2
+  {"root", ROOT, 2},
+  {"popc", POPCNT, 1},
+  {"hamdist", HAMDIST, 2},
+#endif
+  {"gcd", GCD, 0},
+#if __GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1
+  {"lcm", LCM, 0},
+#endif
+  {"and", AND, 0},
+  {"ior", IOR, 0},
+#if __GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1
+  {"xor", XOR, 0},
+#endif
+  {"plus", PLUS, 0},
+  {"pow", POW, 2},
+  {"minus", MINUS, 2},
+  {"mul", MULT, 0},
+  {"div", DIV, 2},
+  {"mod", MOD, 2},
+  {"rem", REM, 2},
+#if __GNU_MP_VERSION >= 2
+  {"invmod", INVMOD, 2},
+#endif
+  {"log", LOG, 2},
+  {"log2", LOG2, 1},
+  {"F", FERMAT, 1},
+  {"M", MERSENNE, 1},
+  {"fib", FIBONACCI, 1},
+  {"Fib", FIBONACCI, 1},
+  {"random", RANDOM, 1},
+  {"nextprime", NEXTPRIME, 1},
+  {"binom", BINOM, 2},
+  {"binomial", BINOM, 2},
+  {"fac", FAC, 1},
+  {"fact", FAC, 1},
+  {"factorial", FAC, 1},
+  {"time", TIMING, 1},
+  {"", NOP, 0}
+};
+
+char *
+factor (char *str, expr_t *e)
+{
+  expr_t e1, e2;
+
+  str = skipspace (str);
+
+  if (isalpha (str[0]))
+    {
+      int i;
+      int cnt;
+
+      for (i = 0; fns[i].op != NOP; i++)
+       {
+         if (fns[i].arity == 1)
+           {
+             cnt = matchp (fns[i].spelling, str);
+             if (cnt != 0)
+               {
+                 str = expr (str + cnt, &e1);
+                 str = skipspace (str);
+                 if (str[0] != ')')
+                   {
+                     error = "expected `)'";
+                     longjmp (errjmpbuf, (int) (long) str);
+                   }
+                 makeexp (e, fns[i].op, e1, NULL);
+                 return str + 1;
+               }
+           }
+       }
+
+      for (i = 0; fns[i].op != NOP; i++)
+       {
+         if (fns[i].arity != 1)
+           {
+             cnt = matchp (fns[i].spelling, str);
+             if (cnt != 0)
+               {
+                 str = expr (str + cnt, &e1);
+                 str = skipspace (str);
+
+                 if (str[0] != ',')
+                   {
+                     error = "expected `,' and another operand";
+                     longjmp (errjmpbuf, (int) (long) str);
+                   }
+
+                 str = skipspace (str + 1);
+                 str = expr (str, &e2);
+                 str = skipspace (str);
+
+                 if (fns[i].arity == 0)
+                   {
+                     while (str[0] == ',')
+                       {
+                         makeexp (&e1, fns[i].op, e1, e2);
+                         str = skipspace (str + 1);
+                         str = expr (str, &e2);
+                         str = skipspace (str);
+                       }
+                   }
+
+                 if (str[0] != ')')
+                   {
+                     error = "expected `)'";
+                     longjmp (errjmpbuf, (int) (long) str);
+                   }
+
+                 makeexp (e, fns[i].op, e1, e2);
+                 return str + 1;
+               }
+           }
+       }
+    }
+
+  if (str[0] == '(')
+    {
+      str = expr (str + 1, e);
+      str = skipspace (str);
+      if (str[0] != ')')
+       {
+         error = "expected `)'";
+         longjmp (errjmpbuf, (int) (long) str);
+       }
+      str++;
+    }
+  else if (str[0] >= '0' && str[0] <= '9')
+    {
+      expr_t res;
+      char *s, *sc;
+
+      res = malloc (sizeof (struct expr));
+      res -> op = LIT;
+      mpz_init (res->operands.val);
+
+      s = str;
+      while (isalnum (str[0]))
+       str++;
+      sc = malloc (str - s + 1);
+      memcpy (sc, s, str - s);
+      sc[str - s] = 0;
+
+      mpz_set_str (res->operands.val, sc, 0);
+      *e = res;
+      free (sc);
+    }
+  else
+    {
+      error = "operand expected";
+      longjmp (errjmpbuf, (int) (long) str);
+    }
+  return str;
+}
+
+char *
+skipspace (char *str)
+{
+  while (str[0] == ' ')
+    str++;
+  return str;
+}
+
+/* Make a new expression with operation OP and right hand side
+   RHS and left hand side lhs.  Put the result in R.  */
+void
+makeexp (expr_t *r, enum op_t op, expr_t lhs, expr_t rhs)
+{
+  expr_t res;
+  res = malloc (sizeof (struct expr));
+  res -> op = op;
+  res -> operands.ops.lhs = lhs;
+  res -> operands.ops.rhs = rhs;
+  *r = res;
+  return;
+}
+
+/* Free the memory used by expression E.  */
+void
+free_expr (expr_t e)
+{
+  if (e->op != LIT)
+    {
+      free_expr (e->operands.ops.lhs);
+      if (e->operands.ops.rhs != NULL)
+       free_expr (e->operands.ops.rhs);
+    }
+  else
+    {
+      mpz_clear (e->operands.val);
+    }
+}
+
+/* Evaluate the expression E and put the result in R.  */
+void
+mpz_eval_expr (mpz_ptr r, expr_t e)
+{
+  mpz_t lhs, rhs;
+
+  switch (e->op)
+    {
+    case LIT:
+      mpz_set (r, e->operands.val);
+      return;
+    case PLUS:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_add (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    case MINUS:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_sub (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    case MULT:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_mul (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    case DIV:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_fdiv_q (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    case MOD:
+      mpz_init (rhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_abs (rhs, rhs);
+      mpz_eval_mod_expr (r, e->operands.ops.lhs, rhs);
+      mpz_clear (rhs);
+      return;
+    case REM:
+      /* Check if lhs operand is POW expression and optimize for that case.  */
+      if (e->operands.ops.lhs->op == POW)
+       {
+         mpz_t powlhs, powrhs;
+         mpz_init (powlhs);
+         mpz_init (powrhs);
+         mpz_init (rhs);
+         mpz_eval_expr (powlhs, e->operands.ops.lhs->operands.ops.lhs);
+         mpz_eval_expr (powrhs, e->operands.ops.lhs->operands.ops.rhs);
+         mpz_eval_expr (rhs, e->operands.ops.rhs);
+         mpz_powm (r, powlhs, powrhs, rhs);
+         if (mpz_cmp_si (rhs, 0L) < 0)
+           mpz_neg (r, r);
+         mpz_clear (powlhs);
+         mpz_clear (powrhs);
+         mpz_clear (rhs);
+         return;
+       }
+
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_fdiv_r (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#if __GNU_MP_VERSION >= 2
+    case INVMOD:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_invert (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#endif
+    case POW:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      if (mpz_cmpabs_ui (lhs, 1) <= 0)
+       {
+         /* For 0^rhs and 1^rhs, we just need to verify that
+            rhs is well-defined.  For (-1)^rhs we need to
+            determine (rhs mod 2).  For simplicity, compute
+            (rhs mod 2) for all three cases.  */
+         expr_t two, et;
+         two = malloc (sizeof (struct expr));
+         two -> op = LIT;
+         mpz_init_set_ui (two->operands.val, 2L);
+         makeexp (&et, MOD, e->operands.ops.rhs, two);
+         e->operands.ops.rhs = et;
+       }
+
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      if (mpz_cmp_si (rhs, 0L) == 0)
+       /* x^0 is 1 */
+       mpz_set_ui (r, 1L);
+      else if (mpz_cmp_si (lhs, 0L) == 0)
+       /* 0^y (where y != 0) is 0 */
+       mpz_set_ui (r, 0L);
+      else if (mpz_cmp_ui (lhs, 1L) == 0)
+       /* 1^y is 1 */
+       mpz_set_ui (r, 1L);
+      else if (mpz_cmp_si (lhs, -1L) == 0)
+       /* (-1)^y just depends on whether y is even or odd */
+       mpz_set_si (r, (mpz_get_ui (rhs) & 1) ? -1L : 1L);
+      else if (mpz_cmp_si (rhs, 0L) < 0)
+       /* x^(-n) is 0 */
+       mpz_set_ui (r, 0L);
+      else
+       {
+         unsigned long int cnt;
+         unsigned long int y;
+         /* error if exponent does not fit into an unsigned long int.  */
+         if (mpz_cmp_ui (rhs, ~(unsigned long int) 0) > 0)
+           goto pow_err;
+
+         y = mpz_get_ui (rhs);
+         /* x^y == (x/(2^c))^y * 2^(c*y) */
+#if __GNU_MP_VERSION >= 2
+         cnt = mpz_scan1 (lhs, 0);
+#else
+         cnt = 0;
+#endif
+         if (cnt != 0)
+           {
+             if (y * cnt / cnt != y)
+               goto pow_err;
+             mpz_tdiv_q_2exp (lhs, lhs, cnt);
+             mpz_pow_ui (r, lhs, y);
+             mpz_mul_2exp (r, r, y * cnt);
+           }
+         else
+           mpz_pow_ui (r, lhs, y);
+       }
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    pow_err:
+      error = "result of `pow' operator too large";
+      mpz_clear (lhs); mpz_clear (rhs);
+      longjmp (errjmpbuf, 1);
+    case GCD:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_gcd (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#if __GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1
+    case LCM:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_lcm (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#endif
+    case AND:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_and (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    case IOR:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_ior (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#if __GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1
+    case XOR:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      mpz_xor (r, lhs, rhs);
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#endif
+    case NEG:
+      mpz_eval_expr (r, e->operands.ops.lhs);
+      mpz_neg (r, r);
+      return;
+    case NOT:
+      mpz_eval_expr (r, e->operands.ops.lhs);
+      mpz_com (r, r);
+      return;
+    case SQRT:
+      mpz_init (lhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      if (mpz_sgn (lhs) < 0)
+       {
+         error = "cannot take square root of negative numbers";
+         mpz_clear (lhs);
+         longjmp (errjmpbuf, 1);
+       }
+      mpz_sqrt (r, lhs);
+      return;
+#if __GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1
+    case ROOT:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      if (mpz_sgn (rhs) <= 0)
+       {
+         error = "cannot take non-positive root orders";
+         mpz_clear (lhs); mpz_clear (rhs);
+         longjmp (errjmpbuf, 1);
+       }
+      if (mpz_sgn (lhs) < 0 && (mpz_get_ui (rhs) & 1) == 0)
+       {
+         error = "cannot take even root orders of negative numbers";
+         mpz_clear (lhs); mpz_clear (rhs);
+         longjmp (errjmpbuf, 1);
+       }
+
+      {
+       unsigned long int nth = mpz_get_ui (rhs);
+       if (mpz_cmp_ui (rhs, ~(unsigned long int) 0) > 0)
+         {
+           /* If we are asked to take an awfully large root order, cheat and
+              ask for the largest order we can pass to mpz_root.  This saves
+              some error prone special cases.  */
+           nth = ~(unsigned long int) 0;
+         }
+       mpz_root (r, lhs, nth);
+      }
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+#endif
+    case FAC:
+      mpz_eval_expr (r, e->operands.ops.lhs);
+      if (mpz_size (r) > 1)
+       {
+         error = "result of `!' operator too large";
+         longjmp (errjmpbuf, 1);
+       }
+      mpz_fac_ui (r, mpz_get_ui (r));
+      return;
+#if __GNU_MP_VERSION >= 2
+    case POPCNT:
+      mpz_eval_expr (r, e->operands.ops.lhs);
+      { long int cnt;
+       cnt = mpz_popcount (r);
+       mpz_set_si (r, cnt);
+      }
+      return;
+    case HAMDIST:
+      { long int cnt;
+       mpz_init (lhs); mpz_init (rhs);
+       mpz_eval_expr (lhs, e->operands.ops.lhs);
+       mpz_eval_expr (rhs, e->operands.ops.rhs);
+       cnt = mpz_hamdist (lhs, rhs);
+       mpz_clear (lhs); mpz_clear (rhs);
+       mpz_set_si (r, cnt);
+      }
+      return;
+#endif
+    case LOG2:
+      mpz_eval_expr (r, e->operands.ops.lhs);
+      { unsigned long int cnt;
+       if (mpz_sgn (r) <= 0)
+         {
+           error = "logarithm of non-positive number";
+           longjmp (errjmpbuf, 1);
+         }
+       cnt = mpz_sizeinbase (r, 2);
+       mpz_set_ui (r, cnt - 1);
+      }
+      return;
+    case LOG:
+      { unsigned long int cnt;
+       mpz_init (lhs); mpz_init (rhs);
+       mpz_eval_expr (lhs, e->operands.ops.lhs);
+       mpz_eval_expr (rhs, e->operands.ops.rhs);
+       if (mpz_sgn (lhs) <= 0)
+         {
+           error = "logarithm of non-positive number";
+           mpz_clear (lhs); mpz_clear (rhs);
+           longjmp (errjmpbuf, 1);
+         }
+       if (mpz_cmp_ui (rhs, 256) >= 0)
+         {
+           error = "logarithm base too large";
+           mpz_clear (lhs); mpz_clear (rhs);
+           longjmp (errjmpbuf, 1);
+         }
+       cnt = mpz_sizeinbase (lhs, mpz_get_ui (rhs));
+       mpz_set_ui (r, cnt - 1);
+       mpz_clear (lhs); mpz_clear (rhs);
+      }
+      return;
+    case FERMAT:
+      {
+       unsigned long int t;
+       mpz_init (lhs);
+       mpz_eval_expr (lhs, e->operands.ops.lhs);
+       t = (unsigned long int) 1 << mpz_get_ui (lhs);
+       if (mpz_cmp_ui (lhs, ~(unsigned long int) 0) > 0 || t == 0)
+         {
+           error = "too large Mersenne number index";
+           mpz_clear (lhs);
+           longjmp (errjmpbuf, 1);
+         }
+       mpz_set_ui (r, 1);
+       mpz_mul_2exp (r, r, t);
+       mpz_add_ui (r, r, 1);
+       mpz_clear (lhs);
+      }
+      return;
+    case MERSENNE:
+      mpz_init (lhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      if (mpz_cmp_ui (lhs, ~(unsigned long int) 0) > 0)
+       {
+         error = "too large Mersenne number index";
+         mpz_clear (lhs);
+         longjmp (errjmpbuf, 1);
+       }
+      mpz_set_ui (r, 1);
+      mpz_mul_2exp (r, r, mpz_get_ui (lhs));
+      mpz_sub_ui (r, r, 1);
+      mpz_clear (lhs);
+      return;
+    case FIBONACCI:
+      { mpz_t t;
+       unsigned long int n, i;
+       mpz_init (lhs);
+       mpz_eval_expr (lhs, e->operands.ops.lhs);
+       if (mpz_sgn (lhs) <= 0 || mpz_cmp_si (lhs, 1000000000) > 0)
+         {
+           error = "Fibonacci index out of range";
+           mpz_clear (lhs);
+           longjmp (errjmpbuf, 1);
+         }
+       n = mpz_get_ui (lhs);
+       mpz_clear (lhs);
+
+#if __GNU_MP_VERSION > 2 || __GNU_MP_VERSION_MINOR >= 1
+       mpz_fib_ui (r, n);
+#else
+       mpz_init_set_ui (t, 1);
+       mpz_set_ui (r, 1);
+
+       if (n <= 2)
+         mpz_set_ui (r, 1);
+       else
+         {
+           for (i = 3; i <= n; i++)
+             {
+               mpz_add (t, t, r);
+               mpz_swap (t, r);
+             }
+         }
+       mpz_clear (t);
+#endif
+      }
+      return;
+    case RANDOM:
+      {
+       unsigned long int n;
+       mpz_init (lhs);
+       mpz_eval_expr (lhs, e->operands.ops.lhs);
+       if (mpz_sgn (lhs) <= 0 || mpz_cmp_si (lhs, 1000000000) > 0)
+         {
+           error = "random number size out of range";
+           mpz_clear (lhs);
+           longjmp (errjmpbuf, 1);
+         }
+       n = mpz_get_ui (lhs);
+       mpz_clear (lhs);
+       mpz_urandomb (r, rstate, n);
+      }
+      return;
+    case NEXTPRIME:
+      {
+       mpz_eval_expr (r, e->operands.ops.lhs);
+       mpz_nextprime (r, r);
+      }
+      return;
+    case BINOM:
+      mpz_init (lhs); mpz_init (rhs);
+      mpz_eval_expr (lhs, e->operands.ops.lhs);
+      mpz_eval_expr (rhs, e->operands.ops.rhs);
+      {
+       unsigned long int k;
+       if (mpz_cmp_ui (rhs, ~(unsigned long int) 0) > 0)
+         {
+           error = "k too large in (n over k) expression";
+           mpz_clear (lhs); mpz_clear (rhs);
+           longjmp (errjmpbuf, 1);
+         }
+       k = mpz_get_ui (rhs);
+       mpz_bin_ui (r, lhs, k);
+      }
+      mpz_clear (lhs); mpz_clear (rhs);
+      return;
+    case TIMING:
+      {
+       int t0;
+       t0 = cputime ();
+       mpz_eval_expr (r, e->operands.ops.lhs);
+       printf ("time: %d\n", cputime () - t0);
+      }
+      return;
+    default:
+      abort ();
+    }
+}
+
+/* Evaluate the expression E modulo MOD and put the result in R.  */
+void
+mpz_eval_mod_expr (mpz_ptr r, expr_t e, mpz_ptr mod)
+{
+  mpz_t lhs, rhs;
+
+  switch (e->op)
+    {
+      case POW:
+       mpz_init (lhs); mpz_init (rhs);
+       mpz_eval_mod_expr (lhs, e->operands.ops.lhs, mod);
+       mpz_eval_expr (rhs, e->operands.ops.rhs);
+       mpz_powm (r, lhs, rhs, mod);
+       mpz_clear (lhs); mpz_clear (rhs);
+       return;
+      case PLUS:
+       mpz_init (lhs); mpz_init (rhs);
+       mpz_eval_mod_expr (lhs, e->operands.ops.lhs, mod);
+       mpz_eval_mod_expr (rhs, e->operands.ops.rhs, mod);
+       mpz_add (r, lhs, rhs);
+       if (mpz_cmp_si (r, 0L) < 0)
+         mpz_add (r, r, mod);
+       else if (mpz_cmp (r, mod) >= 0)
+         mpz_sub (r, r, mod);
+       mpz_clear (lhs); mpz_clear (rhs);
+       return;
+      case MINUS:
+       mpz_init (lhs); mpz_init (rhs);
+       mpz_eval_mod_expr (lhs, e->operands.ops.lhs, mod);
+       mpz_eval_mod_expr (rhs, e->operands.ops.rhs, mod);
+       mpz_sub (r, lhs, rhs);
+       if (mpz_cmp_si (r, 0L) < 0)
+         mpz_add (r, r, mod);
+       else if (mpz_cmp (r, mod) >= 0)
+         mpz_sub (r, r, mod);
+       mpz_clear (lhs); mpz_clear (rhs);
+       return;
+      case MULT:
+       mpz_init (lhs); mpz_init (rhs);
+       mpz_eval_mod_expr (lhs, e->operands.ops.lhs, mod);
+       mpz_eval_mod_expr (rhs, e->operands.ops.rhs, mod);
+       mpz_mul (r, lhs, rhs);
+       mpz_mod (r, r, mod);
+       mpz_clear (lhs); mpz_clear (rhs);
+       return;
+      default:
+       mpz_init (lhs);
+       mpz_eval_expr (lhs, e);
+       mpz_mod (r, lhs, mod);
+       mpz_clear (lhs);
+       return;
+    }
+}
+
+void
+cleanup_and_exit (int sig)
+{
+  switch (sig) {
+#ifdef LIMIT_RESOURCE_USAGE
+  case SIGXCPU:
+    printf ("expression took too long to evaluate%s\n", newline);
+    break;
+#endif
+  case SIGFPE:
+    printf ("divide by zero%s\n", newline);
+    break;
+  default:
+    printf ("expression required too much memory to evaluate%s\n", newline);
+    break;
+  }
+  exit (-2);
+}
diff --git a/demos/primes.c b/demos/primes.c

new file mode 100644 (file)

index 0000000..5e078bd
--- /dev/null
+++ b/demos/primes.c
@@ -0,0 +1,387 @@
+/* List and count primes.
+   Written by tege while on holiday in Rodupp, August 2001.
+   Between 10 and 500 times faster than previous program.
+
+Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+/* IDEAS:
+ * Do not fill primes[] with real primes when the range [fr,to] is small,
+   when fr,to are relatively large.  Fill primes[] with odd numbers instead.
+   [Probably a bad idea, since the primes[] array would become very large.]
+ * Separate small primes and large primes when sieving.  Either the Montgomery
+   way (i.e., having a large array a multiple of L1 cache size), or just
+   separate loops for primes <= S and primes > S.  The latter primes do not
+   require an inner loop, since they will touch the sieving array at most once.
+ * Pre-fill sieving array with an appropriately aligned ...00100100... pattern,
+   then omit 3 from primes array.  (May require similar special handling of 3
+   as we now have for 2.)
+ * A large SIEVE_LIMIT currently implies very large memory usage, mainly due
+   to the sieving array in make_primelist, but also because of the primes[]
+   array.  We might want to stage the program, using sieve_region/find_primes
+   to build primes[].  Make report() a function pointer, as part of achieving
+   this.
+ * Store primes[] as two arrays, one array with primes represented as delta
+   values using just 8 bits (if gaps are too big, store bogus primes!)
+   and one array with "rem" values.  The latter needs 32-bit values.
+ * A new entry point, mpz_probab_prime_likely_p, would be useful.
+ * Improve command line syntax and versatility.  "primes -f FROM -t TO",
+   allow either to be omitted for open interval.  (But disallow
+   "primes -c -f FROM" since that would be infinity.)  Allow printing a
+   limited *number* of primes using syntax like "primes -f FROM -n NUMBER".
+ * When looking for maxgaps, we should not perform any primality testing until
+   we find possible record gaps.  Should speed up the searches tremendously.
+ */
+
+#include "gmp.h"
+
+struct primes
+{
+  unsigned int prime;
+  int rem;
+};
+
+struct primes *primes;
+unsigned long n_primes;
+
+void find_primes __GMP_PROTO ((unsigned char *, mpz_t, unsigned long, mpz_t));
+void sieve_region __GMP_PROTO ((unsigned char *, mpz_t, unsigned long));
+void make_primelist __GMP_PROTO ((unsigned long));
+
+int flag_print = 1;
+int flag_count = 0;
+int flag_maxgap = 0;
+unsigned long maxgap = 0;
+unsigned long total_primes = 0;
+
+void
+report (mpz_t prime)
+{
+  total_primes += 1;
+  if (flag_print)
+    {
+      mpz_out_str (stdout, 10, prime);
+      printf ("\n");
+    }
+  if (flag_maxgap)
+    {
+      static unsigned long prev_prime_low = 0;
+      unsigned long gap;
+      if (prev_prime_low != 0)
+       {
+         gap = mpz_get_ui (prime) - prev_prime_low;
+         if (maxgap < gap)
+           maxgap = gap;
+       }
+      prev_prime_low = mpz_get_ui (prime);
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  char *progname = argv[0];
+  mpz_t fr, to;
+  mpz_t fr2, to2;
+  unsigned long sieve_lim;
+  unsigned long est_n_primes;
+  unsigned char *s;
+  mpz_t tmp;
+  mpz_t siev_sqr_lim;
+
+  while (argc != 1)
+    {
+      if (strcmp (argv[1], "-c") == 0)
+       {
+         flag_count = 1;
+         argv++;
+         argc--;
+       }
+      else if (strcmp (argv[1], "-p") == 0)
+       {
+         flag_print = 2;
+         argv++;
+         argc--;
+       }
+      else if (strcmp (argv[1], "-g") == 0)
+       {
+         flag_maxgap = 1;
+         argv++;
+         argc--;
+       }
+      else
+       break;
+    }
+
+  if (flag_count || flag_maxgap)
+    flag_print--;              /* clear unless an explicit -p  */
+
+  mpz_init (fr);
+  mpz_init (to);
+  mpz_init (fr2);
+  mpz_init (to2);
+
+  if (argc == 3)
+    {
+      mpz_set_str (fr, argv[1], 0);
+      if (argv[2][0] == '+')
+       {
+         mpz_set_str (to, argv[2] + 1, 0);
+         mpz_add (to, to, fr);
+       }
+      else
+       mpz_set_str (to, argv[2], 0);
+    }
+  else if (argc == 2)
+    {
+      mpz_set_ui (fr, 0);
+      mpz_set_str (to, argv[1], 0);
+    }
+  else
+    {
+      fprintf (stderr, "usage: %s [-c] [-p] [-g] [from [+]]to\n", progname);
+      exit (1);
+    }
+
+  mpz_set (fr2, fr);
+  if (mpz_cmp_ui (fr2, 3) < 0)
+    {
+      mpz_set_ui (fr2, 2);
+      report (fr2);
+      mpz_set_ui (fr2, 3);
+    }
+  mpz_setbit (fr2, 0);                         /* make odd */
+  mpz_sub_ui (to2, to, 1);
+  mpz_setbit (to2, 0);                         /* make odd */
+
+  mpz_init (tmp);
+  mpz_init (siev_sqr_lim);
+
+  mpz_sqrt (tmp, to2);
+#define SIEVE_LIMIT 10000000
+  if (mpz_cmp_ui (tmp, SIEVE_LIMIT) < 0)
+    {
+      sieve_lim = mpz_get_ui (tmp);
+    }
+  else
+    {
+      sieve_lim = SIEVE_LIMIT;
+      mpz_sub (tmp, to2, fr2);
+      if (mpz_cmp_ui (tmp, sieve_lim) < 0)
+       sieve_lim = mpz_get_ui (tmp);   /* limit sieving for small ranges */
+    }
+  mpz_set_ui (siev_sqr_lim, sieve_lim + 1);
+  mpz_mul_ui (siev_sqr_lim, siev_sqr_lim, sieve_lim + 1);
+
+  est_n_primes = (size_t) (sieve_lim / log((double) sieve_lim) * 1.13) + 10;
+  primes = malloc (est_n_primes * sizeof primes[0]);
+  make_primelist (sieve_lim);
+  assert (est_n_primes >= n_primes);
+
+#if DEBUG
+  printf ("sieve_lim = %lu\n", sieve_lim);
+  printf ("n_primes = %lu (3..%u)\n",
+         n_primes, primes[n_primes - 1].prime);
+#endif
+
+#define S (1 << 15)            /* FIXME: Figure out L1 cache size */
+  s = malloc (S/2);
+  while (mpz_cmp (fr2, to2) <= 0)
+    {
+      unsigned long rsize;
+      rsize = S;
+      mpz_add_ui (tmp, fr2, rsize);
+      if (mpz_cmp (tmp, to2) > 0)
+       {
+         mpz_sub (tmp, to2, fr2);
+         rsize = mpz_get_ui (tmp) + 2;
+       }
+#if DEBUG
+      printf ("Sieving region ["); mpz_out_str (stdout, 10, fr2);
+      printf (","); mpz_add_ui (tmp, fr2, rsize - 2);
+      mpz_out_str (stdout, 10, tmp); printf ("]\n");
+#endif
+      sieve_region (s, fr2, rsize);
+      find_primes (s, fr2, rsize / 2, siev_sqr_lim);
+
+      mpz_add_ui (fr2, fr2, S);
+    }
+  free (s);
+
+  if (flag_count)
+    printf ("Pi(interval) = %lu\n", total_primes);
+
+  if (flag_maxgap)
+    printf ("max gap: %lu\n", maxgap);
+
+  return 0;
+}
+
+/* Find primes in region [fr,fr+rsize).  Requires that fr is odd and that
+   rsize is even.  The sieving array s should be aligned for "long int" and
+   have rsize/2 entries, rounded up to the nearest multiple of "long int".  */
+void
+sieve_region (unsigned char *s, mpz_t fr, unsigned long rsize)
+{
+  unsigned long ssize = rsize / 2;
+  unsigned long start, start2, prime;
+  unsigned long i;
+  mpz_t tmp;
+
+  mpz_init (tmp);
+
+#if 0
+  /* initialize sieving array */
+  for (ii = 0; ii < (ssize + sizeof (long) - 1) / sizeof (long); ii++)
+    ((long *) s) [ii] = ~0L;
+#else
+  {
+    long k;
+    long *se = (long *) (s + ((ssize + sizeof (long) - 1) & -sizeof (long)));
+    for (k = -((ssize + sizeof (long) - 1) / sizeof (long)); k < 0; k++)
+      se[k] = ~0L;
+  }
+#endif
+
+  for (i = 0; i < n_primes; i++)
+    {
+      prime = primes[i].prime;
+
+      if (primes[i].rem >= 0)
+       {
+         start2 = primes[i].rem;
+       }
+      else
+       {
+         mpz_set_ui (tmp, prime);
+         mpz_mul_ui (tmp, tmp, prime);
+         if (mpz_cmp (fr, tmp) <= 0)
+           {
+             mpz_sub (tmp, tmp, fr);
+             if (mpz_cmp_ui (tmp, 2 * ssize) > 0)
+               break;          /* avoid overflow at next line, also speedup */
+             start = mpz_get_ui (tmp);
+           }
+         else
+           {
+             start = (prime - mpz_tdiv_ui (fr, prime)) % prime;
+             if (start % 2 != 0)
+               start += prime;         /* adjust if even divisible */
+           }
+         start2 = start / 2;
+       }
+
+#if 0
+      for (ii = start2; ii < ssize; ii += prime)
+       s[ii] = 0;
+      primes[i].rem = ii - ssize;
+#else
+      {
+       long k;
+       unsigned char *se = s + ssize; /* point just beyond sieving range */
+       for (k = start2 - ssize; k < 0; k += prime)
+         se[k] = 0;
+       primes[i].rem = k;
+      }
+#endif
+    }
+  mpz_clear (tmp);
+}
+
+/* Find primes in region [fr,fr+rsize), using the previously sieved s[].  */
+void
+find_primes (unsigned char *s, mpz_t  fr, unsigned long ssize,
+            mpz_t siev_sqr_lim)
+{
+  unsigned long j, ij;
+  mpz_t tmp;
+
+  mpz_init (tmp);
+  for (j = 0; j < (ssize + sizeof (long) - 1) / sizeof (long); j++)
+    {
+      if (((long *) s) [j] != 0)
+       {
+         for (ij = 0; ij < sizeof (long); ij++)
+           {
+             if (s[j * sizeof (long) + ij] != 0)
+               {
+                 if (j * sizeof (long) + ij >= ssize)
+                   goto out;
+                 mpz_add_ui (tmp, fr, (j * sizeof (long) + ij) * 2);
+                 if (mpz_cmp (tmp, siev_sqr_lim) < 0 ||
+                     mpz_probab_prime_p (tmp, 10))
+                   report (tmp);
+               }
+           }
+       }
+    }
+ out:
+  mpz_clear (tmp);
+}
+
+/* Generate a list of primes and store in the global array primes[].  */
+void
+make_primelist (unsigned long maxprime)
+{
+#if 1
+  unsigned char *s;
+  unsigned long ssize = maxprime / 2;
+  unsigned long i, ii, j;
+
+  s = malloc (ssize);
+  memset (s, ~0, ssize);
+  for (i = 3; ; i += 2)
+    {
+      unsigned long isqr = i * i;
+      if (isqr >= maxprime)
+       break;
+      if (s[i * i / 2 - 1] == 0)
+       continue;                               /* only sieve with primes */
+      for (ii = i * i / 2 - 1; ii < ssize; ii += i)
+       s[ii] = 0;
+    }
+  n_primes = 0;
+  for (j = 0; j < ssize; j++)
+    {
+      if (s[j] != 0)
+       {
+         primes[n_primes].prime = j * 2 + 3;
+         primes[n_primes].rem = -1;
+         n_primes++;
+       }
+    }
+  /* FIXME: This should not be needed if fencepost errors were fixed... */
+  if (primes[n_primes - 1].prime > maxprime)
+    n_primes--;
+  free (s);
+#else
+  unsigned long i;
+  n_primes = 0;
+  for (i = 3; i <= maxprime; i += 2)
+    {
+      if (i < 7 || (i % 3 != 0 && i % 5 != 0 && i % 7 != 0))
+       {
+         primes[n_primes].prime = i;
+         primes[n_primes].rem = -1;
+         n_primes++;
+       }
+    }
+#endif
+}
diff --git a/demos/qcn.c b/demos/qcn.c

new file mode 100644 (file)

index 0000000..b0c7e63
--- /dev/null
+++ b/demos/qcn.c
@@ -0,0 +1,172 @@
+/* Use mpz_kronecker_ui() to calculate an estimate for the quadratic
+   class number h(d), for a given negative fundamental discriminant, using
+   Dirichlet's analytic formula.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage: qcn [-p limit] <discriminant>...
+
+   A fundamental discriminant means one of the form D or 4*D with D
+   square-free.  Each argument is checked to see it's congruent to 0 or 1
+   mod 4 (as all discriminants must be), and that it's negative, but there's
+   no check on D being square-free.
+
+   This program is a bit of a toy, there are better methods for calculating
+   the class number and class group structure.
+
+   Reference:
+
+   Daniel Shanks, "Class Number, A Theory of Factorization, and Genera",
+   Proc. Symp. Pure Math., vol 20, 1970, pages 415-440.
+
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+
+#ifndef M_PI
+#define M_PI  3.14159265358979323846
+#endif
+
+
+/* A simple but slow primality test.  */
+int
+prime_p (unsigned long n)
+{
+  unsigned long  i, limit;
+
+  if (n == 2)
+    return 1;
+  if (n < 2 || !(n&1))
+    return 0;
+
+  limit = (unsigned long) floor (sqrt ((double) n));
+  for (i = 3; i <= limit; i+=2)
+    if ((n % i) == 0)
+      return 0;
+
+  return 1;
+}
+
+
+/* The formula is as follows, with d < 0.
+
+              w * sqrt(-d)      inf      p
+       h(d) = ------------ *  product --------
+                 2 * pi         p=2   p - (d/p)
+
+
+   (d/p) is the Kronecker symbol and the product is over primes p.  w is 6
+   when d=-3, 4 when d=-4, or 2 otherwise.
+
+   Calculating the product up to p=infinity would take a long time, so for
+   the estimate primes up to 132,000 are used.  Shanks found this giving an
+   accuracy of about 1 part in 1000, in normal cases.  */
+
+unsigned long  p_limit = 132000;
+
+double
+qcn_estimate (mpz_t d)
+{
+  double  h;
+  unsigned long  p;
+
+  /* p=2 */
+  h = sqrt (-mpz_get_d (d)) / M_PI
+    * 2.0 / (2.0 - mpz_kronecker_ui (d, 2));
+
+  if (mpz_cmp_si (d, -3) == 0)       h *= 3;
+  else if (mpz_cmp_si (d, -4) == 0)  h *= 2;
+
+  for (p = 3; p <= p_limit; p += 2)
+    if (prime_p (p))
+      h *= (double) p / (double) (p - mpz_kronecker_ui (d, p));
+
+  return h;
+}
+
+
+void
+qcn_str (char *num)
+{
+  mpz_t  z;
+
+  mpz_init_set_str (z, num, 0);
+
+  if (mpz_sgn (z) >= 0)
+    {
+      mpz_out_str (stdout, 0, z);
+      printf (" is not supported (negatives only)\n");
+    }
+  else if (mpz_fdiv_ui (z, 4) != 0 && mpz_fdiv_ui (z, 4) != 1)
+    {
+      mpz_out_str (stdout, 0, z);
+      printf (" is not a discriminant (must == 0 or 1 mod 4)\n");
+    }
+  else
+    {
+      printf ("h(");
+      mpz_out_str (stdout, 0, z);
+      printf (") approx %.1f\n", qcn_estimate (z));
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int  i;
+  int  saw_number = 0;
+
+  for (i = 1; i < argc; i++)
+    {
+      if (strcmp (argv[i], "-p") == 0)
+       {
+         i++;
+         if (i >= argc)
+           {
+             fprintf (stderr, "Missing argument to -p\n");
+             exit (1);
+           }
+         p_limit = atoi (argv[i]);
+       }
+      else
+       {
+         qcn_str (argv[i]);
+         saw_number = 1;
+       }
+    }
+
+  if (! saw_number)
+    {
+      /* some default output */
+      qcn_str ("-85702502803");           /* is 16259   */
+      qcn_str ("-328878692999");          /* is 1499699 */
+      qcn_str ("-928185925902146563");    /* is 52739552 */
+      qcn_str ("-84148631888752647283");  /* is 496652272 */
+      return 0;
+    }
+
+  return 0;
+}
diff --git a/doc/Makefile.am b/doc/Makefile.am

new file mode 100644 (file)

index 0000000..2bc34dd
--- /dev/null
+++ b/doc/Makefile.am
@@ -0,0 +1,25 @@
+## Process this file with automake to generate Makefile.in
+
+
+# Copyright 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+EXTRA_DIST = configuration isa_abi_headache projects.html tasks.html
+
+info_TEXINFOS = gmp.texi
+gmp_TEXINFOS = fdl-1.3.texi
diff --git a/doc/Makefile.in b/doc/Makefile.in

new file mode 100644 (file)

index 0000000..1e92f65
--- /dev/null
+++ b/doc/Makefile.in
@@ -0,0 +1,710 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = doc
+DIST_COMMON = $(gmp_TEXINFOS) $(srcdir)/Makefile.am \
+       $(srcdir)/Makefile.in $(srcdir)/stamp-vti \
+       $(srcdir)/version.texi mdate-sh texinfo.tex
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+depcomp =
+am__depfiles_maybe =
+SOURCES =
+DIST_SOURCES =
+INFO_DEPS = $(srcdir)/gmp.info
+am__TEXINFO_TEX_DIR = $(srcdir)
+DVIS = gmp.dvi
+PDFS = gmp.pdf
+PSS = gmp.ps
+HTMLS = gmp.html
+TEXINFOS = gmp.texi
+TEXI2DVI = texi2dvi
+TEXI2PDF = $(TEXI2DVI) --pdf --batch
+MAKEINFOHTML = $(MAKEINFO) --html
+AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS)
+DVIPS = dvips
+am__installdirs = "$(DESTDIR)$(infodir)"
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+EXTRA_DIST = configuration isa_abi_headache projects.html tasks.html
+info_TEXINFOS = gmp.texi
+gmp_TEXINFOS = fdl-1.3.texi
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .dvi .html .info .pdf .ps .texi
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps doc/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps doc/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+.texi.info:
+       restore=: && backupdir="$(am__leading_dot)am$$$$" && \
+       am__cwd=`pwd` && $(am__cd) $(srcdir) && \
+       rm -rf $$backupdir && mkdir $$backupdir && \
+       if ($(MAKEINFO) --version) >/dev/null 2>&1; then \
+         for f in $@ $@-[0-9] $@-[0-9][0-9] $(@:.info=).i[0-9] $(@:.info=).i[0-9][0-9]; do \
+           if test -f $$f; then mv $$f $$backupdir; restore=mv; else :; fi; \
+         done; \
+       else :; fi && \
+       cd "$$am__cwd"; \
+       if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \
+        -o $@ $<; \
+       then \
+         rc=0; \
+         $(am__cd) $(srcdir); \
+       else \
+         rc=$$?; \
+         $(am__cd) $(srcdir) && \
+         $$restore $$backupdir/* `echo "./$@" | sed 's|[^/]*$$||'`; \
+       fi; \
+       rm -rf $$backupdir; exit $$rc
+
+.texi.dvi:
+       TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \
+       MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \
+       $(TEXI2DVI) $<
+
+.texi.pdf:
+       TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \
+       MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \
+       $(TEXI2PDF) $<
+
+.texi.html:
+       rm -rf $(@:.html=.htp)
+       if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \
+        -o $(@:.html=.htp) $<; \
+       then \
+         rm -rf $@; \
+         if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \
+           mv $(@:.html=) $@; else mv $(@:.html=.htp) $@; fi; \
+       else \
+         if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \
+           rm -rf $(@:.html=); else rm -Rf $(@:.html=.htp) $@; fi; \
+         exit 1; \
+       fi
+$(srcdir)/gmp.info: gmp.texi $(srcdir)/version.texi $(gmp_TEXINFOS)
+gmp.dvi: gmp.texi $(srcdir)/version.texi $(gmp_TEXINFOS)
+gmp.pdf: gmp.texi $(srcdir)/version.texi $(gmp_TEXINFOS)
+gmp.html: gmp.texi $(srcdir)/version.texi $(gmp_TEXINFOS)
+$(srcdir)/version.texi: @MAINTAINER_MODE_TRUE@ $(srcdir)/stamp-vti
+$(srcdir)/stamp-vti: gmp.texi $(top_srcdir)/configure
+       @(dir=.; test -f ./gmp.texi || dir=$(srcdir); \
+       set `$(SHELL) $(srcdir)/mdate-sh $$dir/gmp.texi`; \
+       echo "@set UPDATED $$1 $$2 $$3"; \
+       echo "@set UPDATED-MONTH $$2 $$3"; \
+       echo "@set EDITION $(VERSION)"; \
+       echo "@set VERSION $(VERSION)") > vti.tmp
+       @cmp -s vti.tmp $(srcdir)/version.texi \
+         || (echo "Updating $(srcdir)/version.texi"; \
+             cp vti.tmp $(srcdir)/version.texi)
+       -@rm -f vti.tmp
+       @cp $(srcdir)/version.texi $@
+
+mostlyclean-vti:
+       -rm -f vti.tmp
+
+maintainer-clean-vti:
+@MAINTAINER_MODE_TRUE@ -rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi
+.dvi.ps:
+       TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \
+       $(DVIPS) -o $@ $<
+
+uninstall-dvi-am:
+       @$(NORMAL_UNINSTALL)
+       @list='$(DVIS)'; test -n "$(dvidir)" || list=; \
+       for p in $$list; do \
+         $(am__strip_dir) \
+         echo " rm -f '$(DESTDIR)$(dvidir)/$$f'"; \
+         rm -f "$(DESTDIR)$(dvidir)/$$f"; \
+       done
+
+uninstall-html-am:
+       @$(NORMAL_UNINSTALL)
+       @list='$(HTMLS)'; test -n "$(htmldir)" || list=; \
+       for p in $$list; do \
+         $(am__strip_dir) \
+         echo " rm -rf '$(DESTDIR)$(htmldir)/$$f'"; \
+         rm -rf "$(DESTDIR)$(htmldir)/$$f"; \
+       done
+
+uninstall-info-am:
+       @$(PRE_UNINSTALL)
+       @if test -d '$(DESTDIR)$(infodir)' && \
+           (install-info --version && \
+            install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \
+         list='$(INFO_DEPS)'; \
+         for file in $$list; do \
+           relfile=`echo "$$file" | sed 's|^.*/||'`; \
+           echo " install-info --info-dir='$(DESTDIR)$(infodir)' --remove '$(DESTDIR)$(infodir)/$$relfile'"; \
+           if install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$$relfile"; \
+           then :; else test ! -f "$(DESTDIR)$(infodir)/$$relfile" || exit 1; fi; \
+         done; \
+       else :; fi
+       @$(NORMAL_UNINSTALL)
+       @list='$(INFO_DEPS)'; \
+       for file in $$list; do \
+         relfile=`echo "$$file" | sed 's|^.*/||'`; \
+         relfile_i=`echo "$$relfile" | sed 's|\.info$$||;s|$$|.i|'`; \
+         (if test -d "$(DESTDIR)$(infodir)" && cd "$(DESTDIR)$(infodir)"; then \
+            echo " cd '$(DESTDIR)$(infodir)' && rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]"; \
+            rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]; \
+          else :; fi); \
+       done
+
+uninstall-pdf-am:
+       @$(NORMAL_UNINSTALL)
+       @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \
+       for p in $$list; do \
+         $(am__strip_dir) \
+         echo " rm -f '$(DESTDIR)$(pdfdir)/$$f'"; \
+         rm -f "$(DESTDIR)$(pdfdir)/$$f"; \
+       done
+
+uninstall-ps-am:
+       @$(NORMAL_UNINSTALL)
+       @list='$(PSS)'; test -n "$(psdir)" || list=; \
+       for p in $$list; do \
+         $(am__strip_dir) \
+         echo " rm -f '$(DESTDIR)$(psdir)/$$f'"; \
+         rm -f "$(DESTDIR)$(psdir)/$$f"; \
+       done
+
+dist-info: $(INFO_DEPS)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+       list='$(INFO_DEPS)'; \
+       for base in $$list; do \
+         case $$base in \
+           $(srcdir)/*) base=`echo "$$base" | sed "s|^$$srcdirstrip/||"`;; \
+         esac; \
+         if test -f $$base; then d=.; else d=$(srcdir); fi; \
+         base_i=`echo "$$base" | sed 's|\.info$$||;s|$$|.i|'`; \
+         for file in $$d/$$base $$d/$$base-[0-9] $$d/$$base-[0-9][0-9] $$d/$$base_i[0-9] $$d/$$base_i[0-9][0-9]; do \
+           if test -f $$file; then \
+             relfile=`expr "$$file" : "$$d/\(.*\)"`; \
+             test -f "$(distdir)/$$relfile" || \
+               cp -p $$file "$(distdir)/$$relfile"; \
+           else :; fi; \
+         done; \
+       done
+
+mostlyclean-aminfo:
+       -rm -rf gmp.aux gmp.cp gmp.cps gmp.fn gmp.fns gmp.ky gmp.kys gmp.log gmp.pg \
+         gmp.pgs gmp.tmp gmp.toc gmp.tp gmp.vr gmp.vrs
+
+clean-aminfo:
+       -test -z "gmp.dvi gmp.pdf gmp.ps gmp.html" \
+       || rm -rf gmp.dvi gmp.pdf gmp.ps gmp.html
+
+maintainer-clean-aminfo:
+       @list='$(INFO_DEPS)'; for i in $$list; do \
+         i_i=`echo "$$i" | sed 's|\.info$$||;s|$$|.i|'`; \
+         echo " rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]"; \
+         rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]; \
+       done
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+       $(MAKE) $(AM_MAKEFLAGS) \
+         top_distdir="$(top_distdir)" distdir="$(distdir)" \
+         dist-info
+check-am: all-am
+check: check-am
+all-am: Makefile $(INFO_DEPS)
+installdirs:
+       for dir in "$(DESTDIR)$(infodir)"; do \
+         test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+       done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-aminfo clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am: $(DVIS)
+
+html: html-am
+
+html-am: $(HTMLS)
+
+info: info-am
+
+info-am: $(INFO_DEPS)
+
+install-data-am: install-info-am
+
+install-dvi: install-dvi-am
+
+install-dvi-am: $(DVIS)
+       @$(NORMAL_INSTALL)
+       test -z "$(dvidir)" || $(MKDIR_P) "$(DESTDIR)$(dvidir)"
+       @list='$(DVIS)'; test -n "$(dvidir)" || list=; \
+       for p in $$list; do \
+         if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+         echo "$$d$$p"; \
+       done | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(dvidir)'"; \
+         $(INSTALL_DATA) $$files "$(DESTDIR)$(dvidir)" || exit $$?; \
+       done
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am: $(HTMLS)
+       @$(NORMAL_INSTALL)
+       test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)"
+       @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \
+       for p in $$list; do \
+         if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \
+         $(am__strip_dir) \
+         if test -d "$$d$$p"; then \
+           echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \
+           $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \
+           echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \
+           $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \
+         else \
+           list2="$$list2 $$d$$p"; \
+         fi; \
+       done; \
+       test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \
+         $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \
+       done; }
+install-info: install-info-am
+
+install-info-am: $(INFO_DEPS)
+       @$(NORMAL_INSTALL)
+       test -z "$(infodir)" || $(MKDIR_P) "$(DESTDIR)$(infodir)"
+       @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+       list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \
+       for file in $$list; do \
+         case $$file in \
+           $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+         esac; \
+         if test -f $$file; then d=.; else d=$(srcdir); fi; \
+         file_i=`echo "$$file" | sed 's|\.info$$||;s|$$|.i|'`; \
+         for ifile in $$d/$$file $$d/$$file-[0-9] $$d/$$file-[0-9][0-9] \
+                      $$d/$$file_i[0-9] $$d/$$file_i[0-9][0-9] ; do \
+           if test -f $$ifile; then \
+             echo "$$ifile"; \
+           else : ; fi; \
+         done; \
+       done | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \
+         $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done
+       @$(POST_INSTALL)
+       @if (install-info --version && \
+            install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \
+         list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \
+         for file in $$list; do \
+           relfile=`echo "$$file" | sed 's|^.*/||'`; \
+           echo " install-info --info-dir='$(DESTDIR)$(infodir)' '$(DESTDIR)$(infodir)/$$relfile'";\
+           install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$$relfile" || :;\
+         done; \
+       else : ; fi
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am: $(PDFS)
+       @$(NORMAL_INSTALL)
+       test -z "$(pdfdir)" || $(MKDIR_P) "$(DESTDIR)$(pdfdir)"
+       @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \
+       for p in $$list; do \
+         if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+         echo "$$d$$p"; \
+       done | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pdfdir)'"; \
+         $(INSTALL_DATA) $$files "$(DESTDIR)$(pdfdir)" || exit $$?; done
+install-ps: install-ps-am
+
+install-ps-am: $(PSS)
+       @$(NORMAL_INSTALL)
+       test -z "$(psdir)" || $(MKDIR_P) "$(DESTDIR)$(psdir)"
+       @list='$(PSS)'; test -n "$(psdir)" || list=; \
+       for p in $$list; do \
+         if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+         echo "$$d$$p"; \
+       done | $(am__base_list) | \
+       while read files; do \
+         echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(psdir)'"; \
+         $(INSTALL_DATA) $$files "$(DESTDIR)$(psdir)" || exit $$?; done
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-aminfo \
+       maintainer-clean-generic maintainer-clean-vti
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-aminfo mostlyclean-generic \
+       mostlyclean-libtool mostlyclean-vti
+
+pdf: pdf-am
+
+pdf-am: $(PDFS)
+
+ps: ps-am
+
+ps-am: $(PSS)
+
+uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \
+       uninstall-pdf-am uninstall-ps-am
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-aminfo clean-generic \
+       clean-libtool dist-info distclean distclean-generic \
+       distclean-libtool distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-aminfo \
+       maintainer-clean-generic maintainer-clean-vti mostlyclean \
+       mostlyclean-aminfo mostlyclean-generic mostlyclean-libtool \
+       mostlyclean-vti pdf pdf-am ps ps-am uninstall uninstall-am \
+       uninstall-dvi-am uninstall-html-am uninstall-info-am \
+       uninstall-pdf-am uninstall-ps-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/configuration b/doc/configuration

new file mode 100644 (file)

index 0000000..b28e217
--- /dev/null
+++ b/doc/configuration
@@ -0,0 +1,422 @@
+/* doc/configuration (in Emacs -*-outline-*- format). */
+
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+* Adding a new file
+
+** Adding a top-level file
+
+  i) Add it to libgmp_la_SOURCES in Makefile.am.
+
+  ii) If libmp.la needs it (usually doesn't), then add it to
+      libmp_la_SOURCES too.
+
+** Adding a subdirectory file
+
+For instance for mpz,
+
+  i) Add file.c to libmpz_la_SOURCES in mpz/Makefile.am.
+
+  ii) Add mpz/file$U.lo to MPZ_OBJECTS in the top-level Makefile.am
+
+  iii) If for some reason libmp.la needs it (usually doesn't) then add
+       mpz/file$U.lo to libmp_la_DEPENDENCIES in the top-level
+       Makefile.am too.
+
+The same applies to mpf, mpq, scanf and printf.
+
+** Adding an mpn file
+
+The way we build libmpn (in the `mpn' subdirectory) is quite special.
+
+Currently only mpn/mp_bases.c is truely generic and included in every
+configuration.  All other files are linked at build time into the mpn
+build directory from one of the CPU specific sub-directories, or from
+the mpn/generic directory.
+
+There are four types of mpn source files.
+
+  .asm   Assembly code preprocessed with m4
+  .S     Assembly code preprocessed with cpp
+  .s     Assembly code not preprocessed at all
+  .c     C code
+
+There are two types of .asm files.
+
+  i) ``Normal'' files containing one function, though possibly with
+     more than one entry point.
+
+  ii) Multi-function files that generate one of a set of functions
+      according to build options.
+
+To add a new implementation of an existing function,
+
+  i) Put it in the appropriate CPU-specific mpn subdirectory, it'll be
+     detected and used.
+
+  ii) Any entrypoints tested by HAVE_NATIVE_func in other code must
+      have PROLOGUE(func) for configure to grep.  This is normal for
+      .asm or .S files, but for .c files a dummy comment like the
+      following will be needed.
+
+              /*
+              PROLOGUE(func)
+              */
+
+To add a new implementation using a multi-function file, in addition
+do the following,
+
+  i) Use a MULFUNC_PROLOGUE(func1 func2 ...) in the .asm, declaring
+     all the functions implemented, including carry-in variants.
+
+     If there's a separate PROLOGUE(func) for each possible function
+     (but this is usually not the case), then MULFUNC_PROLOGUE isn't
+     necessary.
+
+To add a new style of multi-function file, in addition do the
+following,
+
+  i) Add to the GMP_MULFUNC_CHOICES "case" statement in configure.in
+     which lists each multi-function filename and what function files
+     it can provide.
+
+To add a completely new mpn function file, do the following,
+
+  i) Ensure the filename is a valid C identifier, due to the
+     -DOPERATION_$* used to support multi-function files.  This means
+     "-" can't be used (but "_" can).
+
+  ii) Add it to configure.in under one of the following
+
+      a) `gmp_mpn_functions' if it exists for every target.  This
+         means there must be a C version in mpn/generic.  (Eg. mul_1)
+
+      b) `gmp_mpn_functions_optional' if it's a standard function, but
+         doesn't need to exist for every target.  Code wanting to use
+         this will test HAVE_NATIVE_func to see if it's available.
+         (Eg. copyi)
+
+      c) `extra_functions' for some targets, if it's a special
+         function that only ever needs to exist for certain targets.
+         Code wanting to use it can test either HAVE_NATIVE_func or
+         HAVE_HOST_CPU_foo, as desired.
+
+  iii) If HAVE_NATIVE_func is going to be used, then add a #undef to
+       the AH_VERBATIM([HAVE_NATIVE] block in configure.in.
+
+  iv) Add file.c to nodist_libdummy_la_SOURCES in mpn/Makefile.am (in
+      order to get an ansi2knr rule).  If the file is only in
+      assembler then this step is unnecessary, but do it anyway so as
+      not to forget if later a .c version is added.
+
+  v) If the function can be provided by a multi-function file, then
+     add to the "case" statement in configure.in which lists each
+     multi-function filename and what function files it can provide.
+
+
+** Adding a test program
+
+  i) Tests to be run early in the testing can be added to the main
+     "tests" sub-directory.
+
+  ii) Tests for mpn, mpz, mpq and mpf can be added under the
+      corresponding tests subdirectory.
+
+  iii) Generic tests for late in the testing can be added to
+       "tests/misc".  printf and scanf tests currently live there too.
+
+  iv) Random number function tests can be added to "tests/rand".  That
+      directory has some development-time programs too.
+
+  v) C++ test programs can be added to "tests/cxx".  A line like the
+     following must be added for each, since by default automake looks
+     for a .c file.
+
+             t_foo_SOURCES = t-foo.cc
+
+In all cases the name of the program should be added to check_PROGRAMS
+in the Makefile.am.  TESTS is equal to check_PROGRAMS, so all those
+programs get run.
+
+"tests/devel" has a number of programs which are only for development
+purposes and are not for use in "make check".  These should be listed
+in EXTRA_PROGRAMS to get Makefile rules created, but they're never
+built or run unless an explicit "make someprog" is used.
+
+
+* Adding a new CPU
+
+In general it's policy to use proper names for each CPU type
+supported.  If two CPUs are quite similar and perhaps don't have any
+actual differences in GMP then they're still given separate names, for
+example alphaev67 and alphaev68.
+
+Canonical names:
+
+  i) Decide the canonical CPU names GMP will accept.
+
+  ii) Add these to the config.sub wrapper if configfsf.sub doesn't
+      already accept them.
+
+  iii) Document the names in gmp.texi.
+
+Aliases (optional):
+
+  i) Any aliases can be added to the config.sub wrapper, unless
+     configfsf.sub already does the right thing with them.
+
+  ii) Leave configure.in and everywhere else using only the canonical
+      names.  Aliases shouldn't appear anywhere except config.sub.
+
+  iii) Document in gmp.texi, if desired.  Usually this isn't a good
+       idea, better encourage users to know just the canonical
+       names.
+
+Configure:
+
+  i) Add patterns to configure.in for the new CPU names.  Include the
+     following (see configure.in for the variables to set up),
+
+     a) ABI choices (if any).
+     b) Compiler choices.
+     c) mpn path for CPU specific code.
+     d) Good default CFLAGS for each likely compiler.
+     d) Any special tests necessary on the compiler or assembler
+        capabilities.
+
+  ii) M4 macros to be shared by asm files in a CPU family are by
+      convention in a foo-defs.m4 like mpn/x86/x86-defs.m4.  They're
+      likely to use settings from config.m4 generated by configure.
+
+Fat binaries:
+
+  i) In configure.in, add CPU specific directory(s) to fat_path.
+
+  ii) In mpn/<cpu>/fat.c, identify the CPU at runtime and use suitable
+      CPUVEC_SETUP_subdir macros to select the function pointers for it.
+
+  iii) For the x86s, add to the "$tmp_prefix" setups in configure.in
+       which abbreviates subdirectory names to fit an 8.3 filesystem.
+       (No need to restrict to 8.3, just ensure uniqueness when
+       truncated.)
+
+
+* The configure system
+
+** Installing tools
+
+The current versions of automake, autoconf and libtool in use can be
+checked in the ChangeLog.  Look for "Update to ...".  Patches may have
+been applied, look for "Regenerate ...".
+
+The GMP build system is in places somewhat dependent on the internals
+of the build tools.  Obviously that's avoided as much as possible, but
+where it can't it creates a problem when upgrading or attempting to
+use different tools versions.
+
+** Updating gmp
+
+The following files need to be updated when going to a new version of
+the build tools.  Unfortunately the tools generally don't identify
+when an out-of-date version is present.
+
+aclocal.m4 is updated by running "aclocal".  (Only needed for a new
+automake or libtool.)
+
+INSTALL.autoconf can be copied from INSTALL in autoconf.
+
+ltmain.sh comes from libtool.  Remove it and run "libtoolize --copy",
+or just copy the file by hand.
+
+ansi2knr.c, ansi2knr.1, install-sh and doc/mdate-sh come from automake
+and can be updated by copying or by removing and running "automake
+--add-missing --copy".
+
+texinfo.tex can be updated from ftp.gnu.org.  Check it still works
+with "make gmp.dvi", "make gmp.ps" and "make gmp.pdf".
+
+configfsf.guess and configfsf.sub can be updated from ftp.gnu.org (or
+from the "config" cvs module at subversions.gnu.org).  The gmp
+config.guess and config.sub wrappers are supposed to make such an
+update fairly painless.
+
+depcomp from automake is not needed because configure.in specifies
+automake with "no-dependencies".
+
+** How it works
+
+During development:
+
+    Input files                       Tool       Output files
+    ---------------------------------------------------------
+
+                                     aclocal
+    $prefix/share/aclocal*/*.m4 ----------------> aclocal.m4
+
+
+    configure.in \                   autoconf
+    aclocal.m4   / -----------------------------> configure
+
+
+    */Makefile.am \                  automake
+    configure.in  | ----------------------------> Makefile.in
+    aclocal.m4    /
+
+    configure.in \                  autoheader
+    aclocal.m4   / -----------------------------> config.in
+
+At build time:
+
+    Input files          Tool       Output files
+    --------------------------------------------
+
+    */Makefile.in  \   configure    / */Makefile
+    config.in      | -------------> | config.h
+    gmp-h.in       |                | config.m4
+    mp-h.in        /                | gmp.h
+                                    | mp.h
+                                    \ fat.h  (fat binary build only)
+
+When configured with --enable-maintainer-mode the Makefiles include
+rules to re-run the necessary tools if the input files are changed.
+This can end up running a lot more things than are really necessary.
+
+If a build tree is in too much of a mess for those rules to work
+properly then a bootstrap can be done from the source directory with
+
+       aclocal
+       autoconf
+       automake
+       autoheader
+
+The autom4te.cache directory is created by autoconf to save some work
+in subsequent automake or autoheader runs.  It's recreated
+automatically if removed, it doesn't get distributed.
+
+** C++ configuration
+
+It's intended that the contents of libgmp.la won't vary according to
+whether --enable-cxx is selected.  This means that if C++ shared
+libraries don't work properly then a shared+static with --disable-cxx
+can be done for the C parts, then a static-only with --enable-cxx to
+get libgmpxx.
+
+libgmpxx.la uses some internals from libgmp.la, in order to share code
+between C and C++.  It's intended that libgmpxx can only be expected
+to work with libgmp from the same version of GMP.  If some of the
+shared internals change their interface, then it's proposed to rename
+them, for instance __gmp_doprint2 or the like, so as to provoke link
+errors rather than mysterious failures from a mismatch.
+
+* Development setups
+
+** General
+
+--disable-shared will make builds go much faster, though of course
+shared or shared+static should be tested too.
+
+--enable-mpbsd grabs various bits of mpz, which might need to be
+adjusted if things in those routines are changed.  Building mpbsd all
+the time doesn't cost much.
+
+--prefix to a dummy directory followed by "make install" will show
+what's installed.
+
+"make check" acts on the libgmp just built, and will ignore any other
+/usr/lib/libgmp, or at least it should do.  Libtool does various hairy
+things to ensure it hits the just-built library.
+
+** Long long limb testing
+
+On systems where gcc supports long long, but a limb is normally just a
+long, the following can be used to force long long for testing
+purposes.  It will probably run quite slowly.
+
+       ./configure --host=none ABI=longlong
+
+** Function argument conversions
+
+When using gcc, configuring with something like
+
+       ./configure CFLAGS="-g -Wall -Wconversion -Wno-sign-compare"
+
+can show where function parameters are being converted due to having
+function prototypes available, which won't happen in a K&R compiler.
+Doing this in combination with the long long limb setups above is
+good.
+
+Conversions between int and long aren't warned about by gcc when
+they're the same size, which is unfortunate because casts should be
+used in such cases, for the benefit of K&R compilers with int!=long
+and where the difference matters in function calls.
+
+** K&R support
+
+Function definitions must be in the GNU stylized form to work.  See
+the ansi2knr.1 man page (included in the GMP sources).
+
+__GMP_PROTO is used for function prototypes, other ANSI / K&R
+differences are conditionalized in various places.
+
+Proper testing of the K&R support requires a compiler which gives an
+error for ANSI-isms.  Configuring with --host=none is a good idea, to
+test all the generic C code.
+
+When using an ANSI compiler, the ansi2knr setups can be partially
+tested with
+
+       ./configure am_cv_prog_cc_stdc=no ac_cv_prog_cc_stdc=no
+
+This will test the use of $U and the like in the makefiles, but not
+much else.
+
+Forcing the cache variables can be used with a compiler like HP C
+which is K&R by default but to which configure normally adds ANSI mode
+flags.  This then should be a good full K&R test.
+
+* Other Notes
+
+** Compatibility
+
+compat.c is the home of functions retained for binary compatibility,
+    but now done by other means (like a macro).
+
+struct __mpz_struct etc - this must be retained for C++ compatibility.
+    C++ applications defining functions taking mpz_t etc parameters
+    will get this in the mangled name because C++ "sees though" the
+    typedef mpz_t to the underlying struct.
+
+    Incidentally, this probably means for C++ that our mp.h is not
+    compatible with an original BSD mp.h, since we use struct
+    __mpz_struct for MINT in ours.  Maybe we could change to whatever
+    the original did, but it seems unlikely anyone would be using C++
+    with mp.h.
+
+__gmpn - note that glibc defines some __mpn symbols, old versions of
+    some mpn routines, which it uses for floating point printfs.
+
+
+
+
+Local variables:
+mode: outline
+fill-column: 70
+End:
+/* eof doc/configuration */
diff --git a/doc/fdl-1.3.texi b/doc/fdl-1.3.texi

new file mode 100644 (file)

index 0000000..8805f1a
--- /dev/null
+++ b/doc/fdl-1.3.texi
@@ -0,0 +1,506 @@
+@c The GNU Free Documentation License.
+@center Version 1.3, 3 November 2008
+
+@c This file is intended to be included within another document,
+@c hence no sectioning command or @node.
+
+@display
+Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+@uref{http://fsf.org/}
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@enumerate 0
+@item
+PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+functional and useful document @dfn{free} in the sense of freedom: to
+assure everyone the effective freedom to copy and redistribute it,
+with or without modifying it, either commercially or noncommercially.
+Secondarily, this License preserves for the author and publisher a way
+to get credit for their work, while not being considered responsible
+for modifications made by others.
+
+This License is a kind of ``copyleft'', which means that derivative
+works of the document must themselves be free in the same sense.  It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does.  But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book.  We recommend this License
+principally for works whose purpose is instruction or reference.
+
+@item
+APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work, in any medium, that
+contains a notice placed by the copyright holder saying it can be
+distributed under the terms of this License.  Such a notice grants a
+world-wide, royalty-free license, unlimited in duration, to use that
+work under the conditions stated herein.  The ``Document'', below,
+refers to any such manual or work.  Any member of the public is a
+licensee, and is addressed as ``you''.  You accept the license if you
+copy, modify or distribute the work in a way requiring permission
+under copyright law.
+
+A ``Modified Version'' of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A ``Secondary Section'' is a named appendix or a front-matter section
+of the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall
+subject (or to related matters) and contains nothing that could fall
+directly within that overall subject.  (Thus, if the Document is in
+part a textbook of mathematics, a Secondary Section may not explain
+any mathematics.)  The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The ``Invariant Sections'' are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License.  If a
+section does not fit the above definition of Secondary then it is not
+allowed to be designated as Invariant.  The Document may contain zero
+Invariant Sections.  If the Document does not identify any Invariant
+Sections then there are none.
+
+The ``Cover Texts'' are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License.  A Front-Cover Text may
+be at most 5 words, and a Back-Cover Text may be at most 25 words.
+
+A ``Transparent'' copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, that is suitable for revising the document
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters.  A copy made in an otherwise Transparent file
+format whose markup, or absence of markup, has been arranged to thwart
+or discourage subsequent modification by readers is not Transparent.
+An image format is not Transparent if used for any substantial amount
+of text.  A copy that is not ``Transparent'' is called ``Opaque''.
+
+Examples of suitable formats for Transparent copies include plain
+@sc{ascii} without markup, Texinfo input format, La@TeX{} input
+format, @acronym{SGML} or @acronym{XML} using a publicly available
+@acronym{DTD}, and standard-conforming simple @acronym{HTML},
+PostScript or @acronym{PDF} designed for human modification.  Examples
+of transparent image formats include @acronym{PNG}, @acronym{XCF} and
+@acronym{JPG}.  Opaque formats include proprietary formats that can be
+read and edited only by proprietary word processors, @acronym{SGML} or
+@acronym{XML} for which the @acronym{DTD} and/or processing tools are
+not generally available, and the machine-generated @acronym{HTML},
+PostScript or @acronym{PDF} produced by some word processors for
+output purposes only.
+
+The ``Title Page'' means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page.  For works in
+formats which do not have any title page as such, ``Title Page'' means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+The ``publisher'' means any person or entity that distributes copies
+of the Document to the public.
+
+A section ``Entitled XYZ'' means a named subunit of the Document whose
+title either is precisely XYZ or contains XYZ in parentheses following
+text that translates XYZ in another language.  (Here XYZ stands for a
+specific section name mentioned below, such as ``Acknowledgements'',
+``Dedications'', ``Endorsements'', or ``History''.)  To ``Preserve the Title''
+of such a section when you modify the Document means that it remains a
+section ``Entitled XYZ'' according to this definition.
+
+The Document may include Warranty Disclaimers next to the notice which
+states that this License applies to the Document.  These Warranty
+Disclaimers are considered to be included by reference in this
+License, but only as regards disclaiming warranties: any other
+implication that these Warranty Disclaimers may have is void and has
+no effect on the meaning of this License.
+
+@item
+VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License.  You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute.  However, you may accept
+compensation in exchange for copies.  If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+@item
+COPYING IN QUANTITY
+
+If you publish printed copies (or copies in media that commonly have
+printed covers) of the Document, numbering more than 100, and the
+Document's license notice requires Cover Texts, you must enclose the
+copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover.  Both covers must also clearly and legibly identify
+you as the publisher of these copies.  The front cover must present
+the full title with all words of the title equally prominent and
+visible.  You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a computer-network location from which the general network-using
+public has access to download using public-standard network protocols
+a complete Transparent copy of the Document, free of added material.
+If you use the latter option, you must take reasonably prudent steps,
+when you begin distribution of Opaque copies in quantity, to ensure
+that this Transparent copy will remain thus accessible at the stated
+location until at least one year after the last time you distribute an
+Opaque copy (directly or through your agents or retailers) of that
+edition to the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+@item
+MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it.  In addition, you must do these things in the Modified Version:
+
+@enumerate A
+@item
+Use in the Title Page (and on the covers, if any) a title distinct
+from that of the Document, and from those of previous versions
+(which should, if there were any, be listed in the History section
+of the Document).  You may use the same title as a previous version
+if the original publisher of that version gives permission.
+
+@item
+List on the Title Page, as authors, one or more persons or entities
+responsible for authorship of the modifications in the Modified
+Version, together with at least five of the principal authors of the
+Document (all of its principal authors, if it has fewer than five),
+unless they release you from this requirement.
+
+@item
+State on the Title page the name of the publisher of the
+Modified Version, as the publisher.
+
+@item
+Preserve all the copyright notices of the Document.
+
+@item
+Add an appropriate copyright notice for your modifications
+adjacent to the other copyright notices.
+
+@item
+Include, immediately after the copyright notices, a license notice
+giving the public permission to use the Modified Version under the
+terms of this License, in the form shown in the Addendum below.
+
+@item
+Preserve in that license notice the full lists of Invariant Sections
+and required Cover Texts given in the Document's license notice.
+
+@item
+Include an unaltered copy of this License.
+
+@item
+Preserve the section Entitled ``History'', Preserve its Title, and add
+to it an item stating at least the title, year, new authors, and
+publisher of the Modified Version as given on the Title Page.  If
+there is no section Entitled ``History'' in the Document, create one
+stating the title, year, authors, and publisher of the Document as
+given on its Title Page, then add an item describing the Modified
+Version as stated in the previous sentence.
+
+@item
+Preserve the network location, if any, given in the Document for
+public access to a Transparent copy of the Document, and likewise
+the network locations given in the Document for previous versions
+it was based on.  These may be placed in the ``History'' section.
+You may omit a network location for a work that was published at
+least four years before the Document itself, or if the original
+publisher of the version it refers to gives permission.
+
+@item
+For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve
+the Title of the section, and preserve in the section all the
+substance and tone of each of the contributor acknowledgements and/or
+dedications given therein.
+
+@item
+Preserve all the Invariant Sections of the Document,
+unaltered in their text and in their titles.  Section numbers
+or the equivalent are not considered part of the section titles.
+
+@item
+Delete any section Entitled ``Endorsements''.  Such a section
+may not be included in the Modified Version.
+
+@item
+Do not retitle any existing section to be Entitled ``Endorsements'' or
+to conflict in title with any Invariant Section.
+
+@item
+Preserve any Warranty Disclaimers.
+@end enumerate
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant.  To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section Entitled ``Endorsements'', provided it contains
+nothing but endorsements of your Modified Version by various
+parties---for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version.  Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity.  If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+@item
+COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice, and that you preserve all their Warranty Disclaimers.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy.  If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections Entitled ``History''
+in the various original documents, forming one section Entitled
+``History''; likewise combine any sections Entitled ``Acknowledgements'',
+and any sections Entitled ``Dedications''.  You must delete all
+sections Entitled ``Endorsements.''
+
+@item
+COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+@item
+AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, is called an ``aggregate'' if the copyright
+resulting from the compilation is not used to limit the legal rights
+of the compilation's users beyond what the individual works permit.
+When the Document is included in an aggregate, this License does not
+apply to the other works in the aggregate which are not themselves
+derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one half of
+the entire aggregate, the Document's Cover Texts may be placed on
+covers that bracket the Document within the aggregate, or the
+electronic equivalent of covers if the Document is in electronic form.
+Otherwise they must appear on printed covers that bracket the whole
+aggregate.
+
+@item
+TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections.  You may include a
+translation of this License, and all the license notices in the
+Document, and any Warranty Disclaimers, provided that you also include
+the original English version of this License and the original versions
+of those notices and disclaimers.  In case of a disagreement between
+the translation and the original version of this License or a notice
+or disclaimer, the original version will prevail.
+
+If a section in the Document is Entitled ``Acknowledgements'',
+``Dedications'', or ``History'', the requirement (section 4) to Preserve
+its Title (section 1) will typically require changing the actual
+title.
+
+@item
+TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense, or distribute it is void, and
+will automatically terminate your rights under this License.
+
+However, if you cease all violation of this License, then your license
+from a particular copyright holder is reinstated (a) provisionally,
+unless and until the copyright holder explicitly and finally
+terminates your license, and (b) permanently, if the copyright holder
+fails to notify you of the violation by some reasonable means prior to
+60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, receipt of a copy of some or all of the same material does
+not give you any rights to use it.
+
+@item
+FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time.  Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.  See
+@uref{http://www.gnu.org/copyleft/}.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License ``or any later version'' applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation.  If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.  If the Document
+specifies that a proxy can decide which future versions of this
+License can be used, that proxy's public statement of acceptance of a
+version permanently authorizes you to choose that version for the
+Document.
+
+@item
+RELICENSING
+
+``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any
+World Wide Web server that publishes copyrightable works and also
+provides prominent facilities for anybody to edit those works.  A
+public wiki that anybody can edit is an example of such a server.  A
+``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the
+site means any set of copyrightable works thus published on the MMC
+site.
+
+``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0
+license published by Creative Commons Corporation, a not-for-profit
+corporation with a principal place of business in San Francisco,
+California, as well as future copyleft versions of that license
+published by that same organization.
+
+``Incorporate'' means to publish or republish a Document, in whole or
+in part, as part of another Document.
+
+An MMC is ``eligible for relicensing'' if it is licensed under this
+License, and if all works that were first published under this License
+somewhere other than this MMC, and subsequently incorporated in whole
+or in part into the MMC, (1) had no cover texts or invariant sections,
+and (2) were thus incorporated prior to November 1, 2008.
+
+The operator of an MMC Site may republish an MMC contained in the site
+under CC-BY-SA on the same site at any time before August 1, 2009,
+provided the MMC is eligible for relicensing.
+
+@end enumerate
+
+@page
+@heading ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+@smallexample
+@group
+  Copyright (C)  @var{year}  @var{your name}.
+  Permission is granted to copy, distribute and/or modify this document
+  under the terms of the GNU Free Documentation License, Version 1.3
+  or any later version published by the Free Software Foundation;
+  with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
+  Texts.  A copy of the license is included in the section entitled ``GNU
+  Free Documentation License''.
+@end group
+@end smallexample
+
+If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
+replace the ``with@dots{}Texts.'' line with this:
+
+@smallexample
+@group
+    with the Invariant Sections being @var{list their titles}, with
+    the Front-Cover Texts being @var{list}, and with the Back-Cover Texts
+    being @var{list}.
+@end group
+@end smallexample
+
+If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
+
+@c Local Variables:
+@c ispell-local-pdict: "ispell-dict"
+@c End:
+
diff --git a/doc/gmp.info b/doc/gmp.info

new file mode 100644 (file)

index 0000000..d11b20b
--- /dev/null
+++ b/doc/gmp.info
@@ -0,0 +1,184 @@
+This is ../../gmp/doc/gmp.info, produced by makeinfo version 4.13 from
+../../gmp/doc/gmp.texi.
+
+This manual describes how to install and use the GNU multiple precision
+arithmetic library, version 5.0.5.
+
+   Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
+
+   Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version
+1.3 or any later version published by the Free Software Foundation;
+with no Invariant Sections, with the Front-Cover Texts being "A GNU
+Manual", and with the Back-Cover Texts being "You have freedom to copy
+and modify this GNU Manual, like GNU software".  A copy of the license
+is included in *note GNU Free Documentation License::.
+
+INFO-DIR-SECTION GNU libraries
+START-INFO-DIR-ENTRY
+* gmp: (gmp).                   GNU Multiple Precision Arithmetic Library.
+END-INFO-DIR-ENTRY
+
+\1f
+Indirect:
+gmp.info-1: 991
+gmp.info-2: 299801
+\1f
+Tag Table:
+(Indirect)
+Node: Top\7f991
+Node: Copying\7f3233
+Node: Introduction to GMP\7f5084
+Node: Installing GMP\7f7795
+Node: Build Options\7f8527
+Node: ABI and ISA\7f24620
+Node: Notes for Package Builds\7f34306
+Node: Notes for Particular Systems\7f37393
+Node: Known Build Problems\7f43950
+Node: Performance optimization\7f47484
+Node: GMP Basics\7f48613
+Node: Headers and Libraries\7f49261
+Node: Nomenclature and Types\7f50685
+Node: Function Classes\7f52681
+Node: Variable Conventions\7f54374
+Node: Parameter Conventions\7f55983
+Node: Memory Management\7f58039
+Node: Reentrancy\7f59167
+Node: Useful Macros and Constants\7f61040
+Node: Compatibility with older versions\7f62038
+Node: Demonstration Programs\7f62999
+Node: Efficiency\7f64864
+Node: Debugging\7f72488
+Node: Profiling\7f79386
+Node: Autoconf\7f83377
+Node: Emacs\7f85156
+Node: Reporting Bugs\7f85762
+Node: Integer Functions\7f88305
+Node: Initializing Integers\7f89081
+Node: Assigning Integers\7f91228
+Node: Simultaneous Integer Init & Assign\7f92815
+Node: Converting Integers\7f94440
+Node: Integer Arithmetic\7f97364
+Node: Integer Division\7f98950
+Node: Integer Exponentiation\7f105260
+Node: Integer Roots\7f106700
+Node: Number Theoretic Functions\7f108374
+Node: Integer Comparisons\7f115063
+Node: Integer Logic and Bit Fiddling\7f116441
+Node: I/O of Integers\7f118988
+Node: Integer Random Numbers\7f121957
+Node: Integer Import and Export\7f124568
+Node: Miscellaneous Integer Functions\7f128578
+Node: Integer Special Functions\7f130438
+Node: Rational Number Functions\7f133525
+Node: Initializing Rationals\7f134718
+Node: Rational Conversions\7f137179
+Node: Rational Arithmetic\7f138911
+Node: Comparing Rationals\7f140215
+Node: Applying Integer Functions\7f141582
+Node: I/O of Rationals\7f143065
+Node: Floating-point Functions\7f145107
+Node: Initializing Floats\7f147992
+Node: Assigning Floats\7f152079
+Node: Simultaneous Float Init & Assign\7f154646
+Node: Converting Floats\7f156174
+Node: Float Arithmetic\7f159424
+Node: Float Comparison\7f161437
+Node: I/O of Floats\7f163018
+Node: Miscellaneous Float Functions\7f165701
+Node: Low-level Functions\7f167643
+Node: Random Number Functions\7f192196
+Node: Random State Initialization\7f193264
+Node: Random State Seeding\7f196123
+Node: Random State Miscellaneous\7f197512
+Node: Formatted Output\7f198154
+Node: Formatted Output Strings\7f198399
+Node: Formatted Output Functions\7f203778
+Node: C++ Formatted Output\7f207853
+Node: Formatted Input\7f210535
+Node: Formatted Input Strings\7f210771
+Node: Formatted Input Functions\7f215423
+Node: C++ Formatted Input\7f218392
+Node: C++ Class Interface\7f220295
+Node: C++ Interface General\7f221296
+Node: C++ Interface Integers\7f224366
+Node: C++ Interface Rationals\7f227711
+Node: C++ Interface Floats\7f231298
+Node: C++ Interface Random Numbers\7f237000
+Node: C++ Interface Limitations\7f239404
+Node: BSD Compatible Functions\7f242224
+Node: Custom Allocation\7f246935
+Node: Language Bindings\7f251295
+Node: Algorithms\7f255250
+Node: Multiplication Algorithms\7f255950
+Node: Basecase Multiplication\7f257039
+Node: Karatsuba Multiplication\7f258947
+Node: Toom 3-Way Multiplication\7f262573
+Node: Toom 4-Way Multiplication\7f268992
+Node: Higher degree Toom'n'half\7f270371
+Node: FFT Multiplication\7f271656
+Node: Other Multiplication\7f276991
+Node: Unbalanced Multiplication\7f279465
+Node: Division Algorithms\7f280253
+Node: Single Limb Division\7f280632
+Node: Basecase Division\7f283522
+Node: Divide and Conquer Division\7f284725
+Node: Block-Wise Barrett Division\7f286794
+Node: Exact Division\7f287446
+Node: Exact Remainder\7f290611
+Node: Small Quotient Division\7f292838
+Node: Greatest Common Divisor Algorithms\7f294436
+Node: Binary GCD\7f294733
+Node: Lehmer's Algorithm\7f297582
+Node: Subquadratic GCD\7f299801
+Node: Extended GCD\7f302258
+Node: Jacobi Symbol\7f303570
+Node: Powering Algorithms\7f304486
+Node: Normal Powering Algorithm\7f304749
+Node: Modular Powering Algorithm\7f305277
+Node: Root Extraction Algorithms\7f306057
+Node: Square Root Algorithm\7f306372
+Node: Nth Root Algorithm\7f308513
+Node: Perfect Square Algorithm\7f309298
+Node: Perfect Power Algorithm\7f311385
+Node: Radix Conversion Algorithms\7f312006
+Node: Binary to Radix\7f312382
+Node: Radix to Binary\7f316312
+Node: Other Algorithms\7f318400
+Node: Prime Testing Algorithm\7f318752
+Node: Factorial Algorithm\7f319936
+Node: Binomial Coefficients Algorithm\7f321339
+Node: Fibonacci Numbers Algorithm\7f322233
+Node: Lucas Numbers Algorithm\7f324707
+Node: Random Number Algorithms\7f325428
+Node: Assembly Coding\7f327549
+Node: Assembly Code Organisation\7f328509
+Node: Assembly Basics\7f329476
+Node: Assembly Carry Propagation\7f330626
+Node: Assembly Cache Handling\7f332457
+Node: Assembly Functional Units\7f334618
+Node: Assembly Floating Point\7f336231
+Node: Assembly SIMD Instructions\7f340009
+Node: Assembly Software Pipelining\7f340991
+Node: Assembly Loop Unrolling\7f342053
+Node: Assembly Writing Guide\7f344268
+Node: Internals\7f347033
+Node: Integer Internals\7f347545
+Node: Rational Internals\7f349801
+Node: Float Internals\7f351039
+Node: Raw Output Internals\7f358453
+Node: C++ Interface Internals\7f359647
+Node: Contributors\7f362933
+Node: References\7f367884
+Node: GNU Free Documentation License\7f373639
+Node: Concept Index\7f398808
+Node: Function Index\7f445058
+\1f
+End Tag Table
+
+\1f
+Local Variables:
+coding: iso-8859-1
+End:
diff --git a/doc/gmp.info-1 b/doc/gmp.info-1

new file mode 100644 (file)

index 0000000..e8d7b72
--- /dev/null
+++ b/doc/gmp.info-1
@@ -0,0 +1,7070 @@
+This is ../../gmp/doc/gmp.info, produced by makeinfo version 4.13 from
+../../gmp/doc/gmp.texi.
+
+This manual describes how to install and use the GNU multiple precision
+arithmetic library, version 5.0.5.
+
+   Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
+
+   Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version
+1.3 or any later version published by the Free Software Foundation;
+with no Invariant Sections, with the Front-Cover Texts being "A GNU
+Manual", and with the Back-Cover Texts being "You have freedom to copy
+and modify this GNU Manual, like GNU software".  A copy of the license
+is included in *note GNU Free Documentation License::.
+
+INFO-DIR-SECTION GNU libraries
+START-INFO-DIR-ENTRY
+* gmp: (gmp).                   GNU Multiple Precision Arithmetic Library.
+END-INFO-DIR-ENTRY
+
+\1f
+File: gmp.info,  Node: Top,  Next: Copying,  Prev: (dir),  Up: (dir)
+
+GNU MP
+******
+
+   This manual describes how to install and use the GNU multiple
+precision arithmetic library, version 5.0.5.
+
+   Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
+
+   Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version
+1.3 or any later version published by the Free Software Foundation;
+with no Invariant Sections, with the Front-Cover Texts being "A GNU
+Manual", and with the Back-Cover Texts being "You have freedom to copy
+and modify this GNU Manual, like GNU software".  A copy of the license
+is included in *note GNU Free Documentation License::.
+
+
+* Menu:
+
+* Copying::                    GMP Copying Conditions (LGPL).
+* Introduction to GMP::        Brief introduction to GNU MP.
+* Installing GMP::             How to configure and compile the GMP library.
+* GMP Basics::                 What every GMP user should know.
+* Reporting Bugs::             How to usefully report bugs.
+* Integer Functions::          Functions for arithmetic on signed integers.
+* Rational Number Functions::  Functions for arithmetic on rational numbers.
+* Floating-point Functions::   Functions for arithmetic on floats.
+* Low-level Functions::        Fast functions for natural numbers.
+* Random Number Functions::    Functions for generating random numbers.
+* Formatted Output::           `printf' style output.
+* Formatted Input::            `scanf' style input.
+* C++ Class Interface::        Class wrappers around GMP types.
+* BSD Compatible Functions::   All functions found in BSD MP.
+* Custom Allocation::          How to customize the internal allocation.
+* Language Bindings::          Using GMP from other languages.
+* Algorithms::                 What happens behind the scenes.
+* Internals::                  How values are represented behind the scenes.
+
+* Contributors::               Who brings you this library?
+* References::                 Some useful papers and books to read.
+* GNU Free Documentation License::
+* Concept Index::
+* Function Index::
+
+\1f
+File: gmp.info,  Node: Copying,  Next: Introduction to GMP,  Prev: Top,  Up: Top
+
+GNU MP Copying Conditions
+*************************
+
+This library is "free"; this means that everyone is free to use it and
+free to redistribute it on a free basis.  The library is not in the
+public domain; it is copyrighted and there are restrictions on its
+distribution, but these restrictions are designed to permit everything
+that a good cooperating citizen would want to do.  What is not allowed
+is to try to prevent others from further sharing any version of this
+library that they might get from you.
+
+   Specifically, we want to make sure that you have the right to give
+away copies of the library, that you receive source code or else can
+get it if you want it, that you can change this library or use pieces
+of it in new free programs, and that you know you can do these things.
+
+   To make sure that everyone has such rights, we have to forbid you to
+deprive anyone else of these rights.  For example, if you distribute
+copies of the GNU MP library, you must give the recipients all the
+rights that you have.  You must make sure that they, too, receive or
+can get the source code.  And you must tell them their rights.
+
+   Also, for our own protection, we must make certain that everyone
+finds out that there is no warranty for the GNU MP library.  If it is
+modified by someone else and passed on, we want their recipients to
+know that what they have is not what we distributed, so that any
+problems introduced by others will not reflect on our reputation.
+
+   The precise conditions of the license for the GNU MP library are
+found in the Lesser General Public License version 3 that accompanies
+the source code, see `COPYING.LIB'.  Certain demonstration programs are
+provided under the terms of the plain General Public License version 3,
+see `COPYING'.
+
+\1f
+File: gmp.info,  Node: Introduction to GMP,  Next: Installing GMP,  Prev: Copying,  Up: Top
+
+1 Introduction to GNU MP
+************************
+
+GNU MP is a portable library written in C for arbitrary precision
+arithmetic on integers, rational numbers, and floating-point numbers.
+It aims to provide the fastest possible arithmetic for all applications
+that need higher precision than is directly supported by the basic C
+types.
+
+   Many applications use just a few hundred bits of precision; but some
+applications may need thousands or even millions of bits.  GMP is
+designed to give good performance for both, by choosing algorithms
+based on the sizes of the operands, and by carefully keeping the
+overhead at a minimum.
+
+   The speed of GMP is achieved by using fullwords as the basic
+arithmetic type, by using sophisticated algorithms, by including
+carefully optimized assembly code for the most common inner loops for
+many different CPUs, and by a general emphasis on speed (as opposed to
+simplicity or elegance).
+
+   There is assembly code for these CPUs: ARM, DEC Alpha 21064, 21164,
+and 21264, AMD 29000, AMD K6, K6-2, Athlon, and Athlon64, Hitachi
+SuperH and SH-2, HPPA 1.0, 1.1 and 2.0, Intel Pentium, Pentium
+Pro/II/III, Pentium 4, generic x86, Intel IA-64, i960, Motorola
+MC68000, MC68020, MC88100, and MC88110, Motorola/IBM PowerPC 32 and 64,
+National NS32000, IBM POWER, MIPS R3000, R4000, SPARCv7, SuperSPARC,
+generic SPARCv8, UltraSPARC, DEC VAX, and Zilog Z8000.  Some
+optimizations also for Cray vector systems, Clipper, IBM ROMP (RT), and
+Pyramid AP/XP.
+
+For up-to-date information on GMP, please see the GMP web pages at
+
+     `http://gmplib.org/'
+
+The latest version of the library is available at
+
+     `ftp://ftp.gnu.org/gnu/gmp/'
+
+   Many sites around the world mirror `ftp.gnu.org', please use a mirror
+near you, see `http://www.gnu.org/order/ftp.html' for a full list.
+
+   There are three public mailing lists of interest.  One for release
+announcements, one for general questions and discussions about usage of
+the GMP library and one for bug reports.  For more information, see
+
+     `http://gmplib.org/mailman/listinfo/'.
+
+   The proper place for bug reports is <gmp-bugs@gmplib.org>.  See
+*note Reporting Bugs:: for information about reporting bugs.
+
+
+1.1 How to use this Manual
+==========================
+
+Everyone should read *note GMP Basics::.  If you need to install the
+library yourself, then read *note Installing GMP::.  If you have a
+system with multiple ABIs, then read *note ABI and ISA::, for the
+compiler options that must be used on applications.
+
+   The rest of the manual can be used for later reference, although it
+is probably a good idea to glance through it.
+
+\1f
+File: gmp.info,  Node: Installing GMP,  Next: GMP Basics,  Prev: Introduction to GMP,  Up: Top
+
+2 Installing GMP
+****************
+
+GMP has an autoconf/automake/libtool based configuration system.  On a
+Unix-like system a basic build can be done with
+
+     ./configure
+     make
+
+Some self-tests can be run with
+
+     make check
+
+And you can install (under `/usr/local' by default) with
+
+     make install
+
+   If you experience problems, please report them to
+<gmp-bugs@gmplib.org>.  See *note Reporting Bugs::, for information on
+what to include in useful bug reports.
+
+* Menu:
+
+* Build Options::
+* ABI and ISA::
+* Notes for Package Builds::
+* Notes for Particular Systems::
+* Known Build Problems::
+* Performance optimization::
+
+\1f
+File: gmp.info,  Node: Build Options,  Next: ABI and ISA,  Prev: Installing GMP,  Up: Installing GMP
+
+2.1 Build Options
+=================
+
+All the usual autoconf configure options are available, run `./configure
+--help' for a summary.  The file `INSTALL.autoconf' has some generic
+installation information too.
+
+Tools
+     `configure' requires various Unix-like tools.  See *note Notes for
+     Particular Systems::, for some options on non-Unix systems.
+
+     It might be possible to build without the help of `configure',
+     certainly all the code is there, but unfortunately you'll be on
+     your own.
+
+Build Directory
+     To compile in a separate build directory, `cd' to that directory,
+     and prefix the configure command with the path to the GMP source
+     directory.  For example
+
+          cd /my/build/dir
+          /my/sources/gmp-5.0.5/configure
+
+     Not all `make' programs have the necessary features (`VPATH') to
+     support this.  In particular, SunOS and Slowaris `make' have bugs
+     that make them unable to build in a separate directory.  Use GNU
+     `make' instead.
+
+`--prefix' and `--exec-prefix'
+     The `--prefix' option can be used in the normal way to direct GMP
+     to install under a particular tree.  The default is `/usr/local'.
+
+     `--exec-prefix' can be used to direct architecture-dependent files
+     like `libgmp.a' to a different location.  This can be used to share
+     architecture-independent parts like the documentation, but
+     separate the dependent parts.  Note however that `gmp.h' and
+     `mp.h' are architecture-dependent since they encode certain
+     aspects of `libgmp', so it will be necessary to ensure both
+     `$prefix/include' and `$exec_prefix/include' are available to the
+     compiler.
+
+`--disable-shared', `--disable-static'
+     By default both shared and static libraries are built (where
+     possible), but one or other can be disabled.  Shared libraries
+     result in smaller executables and permit code sharing between
+     separate running processes, but on some CPUs are slightly slower,
+     having a small cost on each function call.
+
+Native Compilation, `--build=CPU-VENDOR-OS'
+     For normal native compilation, the system can be specified with
+     `--build'.  By default `./configure' uses the output from running
+     `./config.guess'.  On some systems `./config.guess' can determine
+     the exact CPU type, on others it will be necessary to give it
+     explicitly.  For example,
+
+          ./configure --build=ultrasparc-sun-solaris2.7
+
+     In all cases the `OS' part is important, since it controls how
+     libtool generates shared libraries.  Running `./config.guess' is
+     the simplest way to see what it should be, if you don't know
+     already.
+
+Cross Compilation, `--host=CPU-VENDOR-OS'
+     When cross-compiling, the system used for compiling is given by
+     `--build' and the system where the library will run is given by
+     `--host'.  For example when using a FreeBSD Athlon system to build
+     GNU/Linux m68k binaries,
+
+          ./configure --build=athlon-pc-freebsd3.5 --host=m68k-mac-linux-gnu
+
+     Compiler tools are sought first with the host system type as a
+     prefix.  For example `m68k-mac-linux-gnu-ranlib' is tried, then
+     plain `ranlib'.  This makes it possible for a set of
+     cross-compiling tools to co-exist with native tools.  The prefix
+     is the argument to `--host', and this can be an alias, such as
+     `m68k-linux'.  But note that tools don't have to be setup this
+     way, it's enough to just have a `PATH' with a suitable
+     cross-compiling `cc' etc.
+
+     Compiling for a different CPU in the same family as the build
+     system is a form of cross-compilation, though very possibly this
+     would merely be special options on a native compiler.  In any case
+     `./configure' avoids depending on being able to run code on the
+     build system, which is important when creating binaries for a
+     newer CPU since they very possibly won't run on the build system.
+
+     In all cases the compiler must be able to produce an executable
+     (of whatever format) from a standard C `main'.  Although only
+     object files will go to make up `libgmp', `./configure' uses
+     linking tests for various purposes, such as determining what
+     functions are available on the host system.
+
+     Currently a warning is given unless an explicit `--build' is used
+     when cross-compiling, because it may not be possible to correctly
+     guess the build system type if the `PATH' has only a
+     cross-compiling `cc'.
+
+     Note that the `--target' option is not appropriate for GMP.  It's
+     for use when building compiler tools, with `--host' being where
+     they will run, and `--target' what they'll produce code for.
+     Ordinary programs or libraries like GMP are only interested in the
+     `--host' part, being where they'll run.  (Some past versions of
+     GMP used `--target' incorrectly.)
+
+CPU types
+     In general, if you want a library that runs as fast as possible,
+     you should configure GMP for the exact CPU type your system uses.
+     However, this may mean the binaries won't run on older members of
+     the family, and might run slower on other members, older or newer.
+     The best idea is always to build GMP for the exact machine type
+     you intend to run it on.
+
+     The following CPUs have specific support.  See `configure.in' for
+     details of what code and compiler options they select.
+
+        * Alpha: alpha, alphaev5, alphaev56, alphapca56, alphapca57,
+          alphaev6, alphaev67, alphaev68 alphaev7
+
+        * Cray: c90, j90, t90, sv1
+
+        * HPPA: hppa1.0, hppa1.1, hppa2.0, hppa2.0n, hppa2.0w, hppa64
+
+        * IA-64: ia64, itanium, itanium2
+
+        * MIPS: mips, mips3, mips64
+
+        * Motorola: m68k, m68000, m68010, m68020, m68030, m68040,
+          m68060, m68302, m68360, m88k, m88110
+
+        * POWER: power, power1, power2, power2sc
+
+        * PowerPC: powerpc, powerpc64, powerpc401, powerpc403,
+          powerpc405, powerpc505, powerpc601, powerpc602, powerpc603,
+          powerpc603e, powerpc604, powerpc604e, powerpc620, powerpc630,
+          powerpc740, powerpc7400, powerpc7450, powerpc750, powerpc801,
+          powerpc821, powerpc823, powerpc860, powerpc970
+
+        * SPARC: sparc, sparcv8, microsparc, supersparc, sparcv9,
+          ultrasparc, ultrasparc2, ultrasparc2i, ultrasparc3, sparc64
+
+        * x86 family: i386, i486, i586, pentium, pentiummmx, pentiumpro,
+          pentium2, pentium3, pentium4, k6, k62, k63, athlon, amd64,
+          viac3, viac32
+
+        * Other: a29k, arm, clipper, i960, ns32k, pyramid, sh, sh2, vax,
+          z8k
+
+     CPUs not listed will use generic C code.
+
+Generic C Build
+     If some of the assembly code causes problems, or if otherwise
+     desired, the generic C code can be selected with CPU `none'.  For
+     example,
+
+          ./configure --host=none-unknown-freebsd3.5
+
+     Note that this will run quite slowly, but it should be portable
+     and should at least make it possible to get something running if
+     all else fails.
+
+Fat binary, `--enable-fat'
+     Using `--enable-fat' selects a "fat binary" build on x86, where
+     optimized low level subroutines are chosen at runtime according to
+     the CPU detected.  This means more code, but gives good
+     performance on all x86 chips.  (This option might become available
+     for more architectures in the future.)
+
+`ABI'
+     On some systems GMP supports multiple ABIs (application binary
+     interfaces), meaning data type sizes and calling conventions.  By
+     default GMP chooses the best ABI available, but a particular ABI
+     can be selected.  For example
+
+          ./configure --host=mips64-sgi-irix6 ABI=n32
+
+     See *note ABI and ISA::, for the available choices on relevant
+     CPUs, and what applications need to do.
+
+`CC', `CFLAGS'
+     By default the C compiler used is chosen from among some likely
+     candidates, with `gcc' normally preferred if it's present.  The
+     usual `CC=whatever' can be passed to `./configure' to choose
+     something different.
+
+     For various systems, default compiler flags are set based on the
+     CPU and compiler.  The usual `CFLAGS="-whatever"' can be passed to
+     `./configure' to use something different or to set good flags for
+     systems GMP doesn't otherwise know.
+
+     The `CC' and `CFLAGS' used are printed during `./configure', and
+     can be found in each generated `Makefile'.  This is the easiest way
+     to check the defaults when considering changing or adding
+     something.
+
+     Note that when `CC' and `CFLAGS' are specified on a system
+     supporting multiple ABIs it's important to give an explicit
+     `ABI=whatever', since GMP can't determine the ABI just from the
+     flags and won't be able to select the correct assembly code.
+
+     If just `CC' is selected then normal default `CFLAGS' for that
+     compiler will be used (if GMP recognises it).  For example
+     `CC=gcc' can be used to force the use of GCC, with default flags
+     (and default ABI).
+
+`CPPFLAGS'
+     Any flags like `-D' defines or `-I' includes required by the
+     preprocessor should be set in `CPPFLAGS' rather than `CFLAGS'.
+     Compiling is done with both `CPPFLAGS' and `CFLAGS', but
+     preprocessing uses just `CPPFLAGS'.  This distinction is because
+     most preprocessors won't accept all the flags the compiler does.
+     Preprocessing is done separately in some configure tests, and in
+     the `ansi2knr' support for K&R compilers.
+
+`CC_FOR_BUILD'
+     Some build-time programs are compiled and run to generate
+     host-specific data tables.  `CC_FOR_BUILD' is the compiler used
+     for this.  It doesn't need to be in any particular ABI or mode, it
+     merely needs to generate executables that can run.  The default is
+     to try the selected `CC' and some likely candidates such as `cc'
+     and `gcc', looking for something that works.
+
+     No flags are used with `CC_FOR_BUILD' because a simple invocation
+     like `cc foo.c' should be enough.  If some particular options are
+     required they can be included as for instance `CC_FOR_BUILD="cc
+     -whatever"'.
+
+C++ Support, `--enable-cxx'
+     C++ support in GMP can be enabled with `--enable-cxx', in which
+     case a C++ compiler will be required.  As a convenience
+     `--enable-cxx=detect' can be used to enable C++ support only if a
+     compiler can be found.  The C++ support consists of a library
+     `libgmpxx.la' and header file `gmpxx.h' (*note Headers and
+     Libraries::).
+
+     A separate `libgmpxx.la' has been adopted rather than having C++
+     objects within `libgmp.la' in order to ensure dynamic linked C
+     programs aren't bloated by a dependency on the C++ standard
+     library, and to avoid any chance that the C++ compiler could be
+     required when linking plain C programs.
+
+     `libgmpxx.la' will use certain internals from `libgmp.la' and can
+     only be expected to work with `libgmp.la' from the same GMP
+     version.  Future changes to the relevant internals will be
+     accompanied by renaming, so a mismatch will cause unresolved
+     symbols rather than perhaps mysterious misbehaviour.
+
+     In general `libgmpxx.la' will be usable only with the C++ compiler
+     that built it, since name mangling and runtime support are usually
+     incompatible between different compilers.
+
+`CXX', `CXXFLAGS'
+     When C++ support is enabled, the C++ compiler and its flags can be
+     set with variables `CXX' and `CXXFLAGS' in the usual way.  The
+     default for `CXX' is the first compiler that works from a list of
+     likely candidates, with `g++' normally preferred when available.
+     The default for `CXXFLAGS' is to try `CFLAGS', `CFLAGS' without
+     `-g', then for `g++' either `-g -O2' or `-O2', or for other
+     compilers `-g' or nothing.  Trying `CFLAGS' this way is convenient
+     when using `gcc' and `g++' together, since the flags for `gcc' will
+     usually suit `g++'.
+
+     It's important that the C and C++ compilers match, meaning their
+     startup and runtime support routines are compatible and that they
+     generate code in the same ABI (if there's a choice of ABIs on the
+     system).  `./configure' isn't currently able to check these things
+     very well itself, so for that reason `--disable-cxx' is the
+     default, to avoid a build failure due to a compiler mismatch.
+     Perhaps this will change in the future.
+
+     Incidentally, it's normally not good enough to set `CXX' to the
+     same as `CC'.  Although `gcc' for instance recognises `foo.cc' as
+     C++ code, only `g++' will invoke the linker the right way when
+     building an executable or shared library from C++ object files.
+
+Temporary Memory, `--enable-alloca=<choice>'
+     GMP allocates temporary workspace using one of the following three
+     methods, which can be selected with for instance
+     `--enable-alloca=malloc-reentrant'.
+
+        * `alloca' - C library or compiler builtin.
+
+        * `malloc-reentrant' - the heap, in a re-entrant fashion.
+
+        * `malloc-notreentrant' - the heap, with global variables.
+
+     For convenience, the following choices are also available.
+     `--disable-alloca' is the same as `no'.
+
+        * `yes' - a synonym for `alloca'.
+
+        * `no' - a synonym for `malloc-reentrant'.
+
+        * `reentrant' - `alloca' if available, otherwise
+          `malloc-reentrant'.  This is the default.
+
+        * `notreentrant' - `alloca' if available, otherwise
+          `malloc-notreentrant'.
+
+     `alloca' is reentrant and fast, and is recommended.  It actually
+     allocates just small blocks on the stack; larger ones use
+     malloc-reentrant.
+
+     `malloc-reentrant' is, as the name suggests, reentrant and thread
+     safe, but `malloc-notreentrant' is faster and should be used if
+     reentrancy is not required.
+
+     The two malloc methods in fact use the memory allocation functions
+     selected by `mp_set_memory_functions', these being `malloc' and
+     friends by default.  *Note Custom Allocation::.
+
+     An additional choice `--enable-alloca=debug' is available, to help
+     when debugging memory related problems (*note Debugging::).
+
+FFT Multiplication, `--disable-fft'
+     By default multiplications are done using Karatsuba, 3-way Toom,
+     higher degree Toom, and Fermat FFT.  The FFT is only used on large
+     to very large operands and can be disabled to save code size if
+     desired.
+
+Berkeley MP, `--enable-mpbsd'
+     The Berkeley MP compatibility library (`libmp') and header file
+     (`mp.h') are built and installed only if `--enable-mpbsd' is used.
+     *Note BSD Compatible Functions::.
+
+Assertion Checking, `--enable-assert'
+     This option enables some consistency checking within the library.
+     This can be of use while debugging, *note Debugging::.
+
+Execution Profiling, `--enable-profiling=prof/gprof/instrument'
+     Enable profiling support, in one of various styles, *note
+     Profiling::.
+
+`MPN_PATH'
+     Various assembly versions of each mpn subroutines are provided.
+     For a given CPU, a search is made though a path to choose a
+     version of each.  For example `sparcv8' has
+
+          MPN_PATH="sparc32/v8 sparc32 generic"
+
+     which means look first for v8 code, then plain sparc32 (which is
+     v7), and finally fall back on generic C.  Knowledgeable users with
+     special requirements can specify a different path.  Normally this
+     is completely unnecessary.
+
+Documentation
+     The source for the document you're now reading is `doc/gmp.texi',
+     in Texinfo format, see *note Texinfo: (texinfo)Top.
+
+     Info format `doc/gmp.info' is included in the distribution.  The
+     usual automake targets are available to make PostScript, DVI, PDF
+     and HTML (these will require various TeX and Texinfo tools).
+
+     DocBook and XML can be generated by the Texinfo `makeinfo' program
+     too, see *note Options for `makeinfo': (texinfo)makeinfo options.
+
+     Some supplementary notes can also be found in the `doc'
+     subdirectory.
+
+
+\1f
+File: gmp.info,  Node: ABI and ISA,  Next: Notes for Package Builds,  Prev: Build Options,  Up: Installing GMP
+
+2.2 ABI and ISA
+===============
+
+ABI (Application Binary Interface) refers to the calling conventions
+between functions, meaning what registers are used and what sizes the
+various C data types are.  ISA (Instruction Set Architecture) refers to
+the instructions and registers a CPU has available.
+
+   Some 64-bit ISA CPUs have both a 64-bit ABI and a 32-bit ABI
+defined, the latter for compatibility with older CPUs in the family.
+GMP supports some CPUs like this in both ABIs.  In fact within GMP
+`ABI' means a combination of chip ABI, plus how GMP chooses to use it.
+For example in some 32-bit ABIs, GMP may support a limb as either a
+32-bit `long' or a 64-bit `long long'.
+
+   By default GMP chooses the best ABI available for a given system,
+and this generally gives significantly greater speed.  But an ABI can
+be chosen explicitly to make GMP compatible with other libraries, or
+particular application requirements.  For example,
+
+     ./configure ABI=32
+
+   In all cases it's vital that all object code used in a given program
+is compiled for the same ABI.
+
+   Usually a limb is implemented as a `long'.  When a `long long' limb
+is used this is encoded in the generated `gmp.h'.  This is convenient
+for applications, but it does mean that `gmp.h' will vary, and can't be
+just copied around.  `gmp.h' remains compiler independent though, since
+all compilers for a particular ABI will be expected to use the same
+limb type.
+
+   Currently no attempt is made to follow whatever conventions a system
+has for installing library or header files built for a particular ABI.
+This will probably only matter when installing multiple builds of GMP,
+and it might be as simple as configuring with a special `libdir', or it
+might require more than that.  Note that builds for different ABIs need
+to done separately, with a fresh `./configure' and `make' each.
+
+
+AMD64 (`x86_64')
+     On AMD64 systems supporting both 32-bit and 64-bit modes for
+     applications, the following ABI choices are available.
+
+    `ABI=64'
+          The 64-bit ABI uses 64-bit limbs and pointers and makes full
+          use of the chip architecture.  This is the default.
+          Applications will usually not need special compiler flags,
+          but for reference the option is
+
+               gcc  -m64
+
+    `ABI=32'
+          The 32-bit ABI is the usual i386 conventions.  This will be
+          slower, and is not recommended except for inter-operating
+          with other code not yet 64-bit capable.  Applications must be
+          compiled with
+
+               gcc  -m32
+
+          (In GCC 2.95 and earlier there's no `-m32' option, it's the
+          only mode.)
+
+
+HPPA 2.0 (`hppa2.0*', `hppa64')
+
+    `ABI=2.0w'
+          The 2.0w ABI uses 64-bit limbs and pointers and is available
+          on HP-UX 11 or up.  Applications must be compiled with
+
+               gcc [built for 2.0w]
+               cc  +DD64
+
+    `ABI=2.0n'
+          The 2.0n ABI means the 32-bit HPPA 1.0 ABI and all its normal
+          calling conventions, but with 64-bit instructions permitted
+          within functions.  GMP uses a 64-bit `long long' for a limb.
+          This ABI is available on hppa64 GNU/Linux and on HP-UX 10 or
+          higher.  Applications must be compiled with
+
+               gcc [built for 2.0n]
+               cc  +DA2.0 +e
+
+          Note that current versions of GCC (eg. 3.2) don't generate
+          64-bit instructions for `long long' operations and so may be
+          slower than for 2.0w.  (The GMP assembly code is the same
+          though.)
+
+    `ABI=1.0'
+          HPPA 2.0 CPUs can run all HPPA 1.0 and 1.1 code in the 32-bit
+          HPPA 1.0 ABI.  No special compiler options are needed for
+          applications.
+
+     All three ABIs are available for CPU types `hppa2.0w', `hppa2.0'
+     and `hppa64', but for CPU type `hppa2.0n' only 2.0n or 1.0 are
+     considered.
+
+     Note that GCC on HP-UX has no options to choose between 2.0n and
+     2.0w modes, unlike HP `cc'.  Instead it must be built for one or
+     the other ABI.  GMP will detect how it was built, and skip to the
+     corresponding `ABI'.
+
+
+IA-64 under HP-UX (`ia64*-*-hpux*', `itanium*-*-hpux*')
+     HP-UX supports two ABIs for IA-64.  GMP performance is the same in
+     both.
+
+    `ABI=32'
+          In the 32-bit ABI, pointers, `int's and `long's are 32 bits
+          and GMP uses a 64 bit `long long' for a limb.  Applications
+          can be compiled without any special flags since this ABI is
+          the default in both HP C and GCC, but for reference the flags
+          are
+
+               gcc  -milp32
+               cc   +DD32
+
+    `ABI=64'
+          In the 64-bit ABI, `long's and pointers are 64 bits and GMP
+          uses a `long' for a limb.  Applications must be compiled with
+
+               gcc  -mlp64
+               cc   +DD64
+
+     On other IA-64 systems, GNU/Linux for instance, `ABI=64' is the
+     only choice.
+
+
+MIPS under IRIX 6 (`mips*-*-irix[6789]')
+     IRIX 6 always has a 64-bit MIPS 3 or better CPU, and supports ABIs
+     o32, n32, and 64.  n32 or 64 are recommended, and GMP performance
+     will be the same in each.  The default is n32.
+
+    `ABI=o32'
+          The o32 ABI is 32-bit pointers and integers, and no 64-bit
+          operations.  GMP will be slower than in n32 or 64, this
+          option only exists to support old compilers, eg. GCC 2.7.2.
+          Applications can be compiled with no special flags on an old
+          compiler, or on a newer compiler with
+
+               gcc  -mabi=32
+               cc   -32
+
+    `ABI=n32'
+          The n32 ABI is 32-bit pointers and integers, but with a
+          64-bit limb using a `long long'.  Applications must be
+          compiled with
+
+               gcc  -mabi=n32
+               cc   -n32
+
+    `ABI=64'
+          The 64-bit ABI is 64-bit pointers and integers.  Applications
+          must be compiled with
+
+               gcc  -mabi=64
+               cc   -64
+
+     Note that MIPS GNU/Linux, as of kernel version 2.2, doesn't have
+     the necessary support for n32 or 64 and so only gets a 32-bit limb
+     and the MIPS 2 code.
+
+
+PowerPC 64 (`powerpc64', `powerpc620', `powerpc630', `powerpc970', `power4', `power5')
+
+    `ABI=aix64'
+          The AIX 64 ABI uses 64-bit limbs and pointers and is the
+          default on PowerPC 64 `*-*-aix*' systems.  Applications must
+          be compiled with
+
+               gcc  -maix64
+               xlc  -q64
+
+    `ABI=mode64'
+          The `mode64' ABI uses 64-bit limbs and pointers, and is the
+          default on 64-bit GNU/Linux, BSD, and Mac OS X/Darwin
+          systems.  Applications must be compiled with
+
+               gcc  -m64
+
+    `ABI=mode32'
+          The `mode32' ABI uses a 64-bit `long long' limb but with the
+          chip still in 32-bit mode and using 32-bit calling
+          conventions.  This is the default for systems where the true
+          64-bit ABI is unavailable.  No special compiler options are
+          typically needed for applications.
+
+    `ABI=32'
+          This is the basic 32-bit PowerPC ABI, with a 32-bit limb.  No
+          special compiler options are needed for applications.
+
+     GMP's speed is greatest for `aix64' and `mode64'.  In `ABI=32'
+     only the 32-bit ISA is used and this doesn't make full use of a
+     64-bit chip.  On a suitable system we could perhaps use more of
+     the ISA, but there are no plans to do so.
+
+
+Sparc V9 (`sparc64', `sparcv9', `ultrasparc*')
+
+    `ABI=64'
+          The 64-bit V9 ABI is available on the various BSD sparc64
+          ports, recent versions of Sparc64 GNU/Linux, and Solaris 2.7
+          and up (when the kernel is in 64-bit mode).  GCC 3.2 or
+          higher, or Sun `cc' is required.  On GNU/Linux, depending on
+          the default `gcc' mode, applications must be compiled with
+
+               gcc  -m64
+
+          On Solaris applications must be compiled with
+
+               gcc  -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9
+               cc   -xarch=v9
+
+          On the BSD sparc64 systems no special options are required,
+          since 64-bits is the only ABI available.
+
+    `ABI=32'
+          For the basic 32-bit ABI, GMP still uses as much of the V9
+          ISA as it can.  In the Sun documentation this combination is
+          known as "v8plus".  On GNU/Linux, depending on the default
+          `gcc' mode, applications may need to be compiled with
+
+               gcc  -m32
+
+          On Solaris, no special compiler options are required for
+          applications, though using something like the following is
+          recommended.  (`gcc' 2.8 and earlier only support `-mv8'
+          though.)
+
+               gcc  -mv8plus
+               cc   -xarch=v8plus
+
+     GMP speed is greatest in `ABI=64', so it's the default where
+     available.  The speed is partly because there are extra registers
+     available and partly because 64-bits is considered the more
+     important case and has therefore had better code written for it.
+
+     Don't be confused by the names of the `-m' and `-x' compiler
+     options, they're called `arch' but effectively control both ABI
+     and ISA.
+
+     On Solaris 2.6 and earlier, only `ABI=32' is available since the
+     kernel doesn't save all registers.
+
+     On Solaris 2.7 with the kernel in 32-bit mode, a normal native
+     build will reject `ABI=64' because the resulting executables won't
+     run.  `ABI=64' can still be built if desired by making it look
+     like a cross-compile, for example
+
+          ./configure --build=none --host=sparcv9-sun-solaris2.7 ABI=64
+
+\1f
+File: gmp.info,  Node: Notes for Package Builds,  Next: Notes for Particular Systems,  Prev: ABI and ISA,  Up: Installing GMP
+
+2.3 Notes for Package Builds
+============================
+
+GMP should present no great difficulties for packaging in a binary
+distribution.
+
+   Libtool is used to build the library and `-version-info' is set
+appropriately, having started from `3:0:0' in GMP 3.0 (*note Library
+interface versions: (libtool)Versioning.).
+
+   The GMP 4 series will be upwardly binary compatible in each release
+and will be upwardly binary compatible with all of the GMP 3 series.
+Additional function interfaces may be added in each release, so on
+systems where libtool versioning is not fully checked by the loader an
+auxiliary mechanism may be needed to express that a dynamic linked
+application depends on a new enough GMP.
+
+   An auxiliary mechanism may also be needed to express that
+`libgmpxx.la' (from `--enable-cxx', *note Build Options::) requires
+`libgmp.la' from the same GMP version, since this is not done by the
+libtool versioning, nor otherwise.  A mismatch will result in
+unresolved symbols from the linker, or perhaps the loader.
+
+   When building a package for a CPU family, care should be taken to use
+`--host' (or `--build') to choose the least common denominator among
+the CPUs which might use the package.  For example this might mean plain
+`sparc' (meaning V7) for SPARCs.
+
+   For x86s, `--enable-fat' sets things up for a fat binary build,
+making a runtime selection of optimized low level routines.  This is a
+good choice for packaging to run on a range of x86 chips.
+
+   Users who care about speed will want GMP built for their exact CPU
+type, to make best use of the available optimizations.  Providing a way
+to suitably rebuild a package may be useful.  This could be as simple
+as making it possible for a user to omit `--build' (and `--host') so
+`./config.guess' will detect the CPU.  But a way to manually specify a
+`--build' will be wanted for systems where `./config.guess' is inexact.
+
+   On systems with multiple ABIs, a packaged build will need to decide
+which among the choices is to be provided, see *note ABI and ISA::.  A
+given run of `./configure' etc will only build one ABI.  If a second
+ABI is also required then a second run of `./configure' etc must be
+made, starting from a clean directory tree (`make distclean').
+
+   As noted under "ABI and ISA", currently no attempt is made to follow
+system conventions for install locations that vary with ABI, such as
+`/usr/lib/sparcv9' for `ABI=64' as opposed to `/usr/lib' for `ABI=32'.
+A package build can override `libdir' and other standard variables as
+necessary.
+
+   Note that `gmp.h' is a generated file, and will be architecture and
+ABI dependent.  When attempting to install two ABIs simultaneously it
+will be important that an application compile gets the correct `gmp.h'
+for its desired ABI.  If compiler include paths don't vary with ABI
+options then it might be necessary to create a `/usr/include/gmp.h'
+which tests preprocessor symbols and chooses the correct actual `gmp.h'.
+
+\1f
+File: gmp.info,  Node: Notes for Particular Systems,  Next: Known Build Problems,  Prev: Notes for Package Builds,  Up: Installing GMP
+
+2.4 Notes for Particular Systems
+================================
+
+AIX 3 and 4
+     On systems `*-*-aix[34]*' shared libraries are disabled by
+     default, since some versions of the native `ar' fail on the
+     convenience libraries used.  A shared build can be attempted with
+
+          ./configure --enable-shared --disable-static
+
+     Note that the `--disable-static' is necessary because in a shared
+     build libtool makes `libgmp.a' a symlink to `libgmp.so',
+     apparently for the benefit of old versions of `ld' which only
+     recognise `.a', but unfortunately this is done even if a fully
+     functional `ld' is available.
+
+ARM
+     On systems `arm*-*-*', versions of GCC up to and including 2.95.3
+     have a bug in unsigned division, giving wrong results for some
+     operands.  GMP `./configure' will demand GCC 2.95.4 or later.
+
+Compaq C++
+     Compaq C++ on OSF 5.1 has two flavours of `iostream', a standard
+     one and an old pre-standard one (see `man iostream_intro').  GMP
+     can only use the standard one, which unfortunately is not the
+     default but must be selected by defining `__USE_STD_IOSTREAM'.
+     Configure with for instance
+
+          ./configure --enable-cxx CPPFLAGS=-D__USE_STD_IOSTREAM
+
+Floating Point Mode
+     On some systems, the hardware floating point has a control mode
+     which can set all operations to be done in a particular precision,
+     for instance single, double or extended on x86 systems (x87
+     floating point).  The GMP functions involving a `double' cannot be
+     expected to operate to their full precision when the hardware is
+     in single precision mode.  Of course this affects all code,
+     including application code, not just GMP.
+
+MS-DOS and MS Windows
+     On an MS-DOS system DJGPP can be used to build GMP, and on an MS
+     Windows system Cygwin, DJGPP and MINGW can be used.  All three are
+     excellent ports of GCC and the various GNU tools.
+
+          `http://www.cygwin.com/'
+          `http://www.delorie.com/djgpp/'
+          `http://www.mingw.org/'
+
+     Microsoft also publishes an Interix "Services for Unix" which can
+     be used to build GMP on Windows (with a normal `./configure'), but
+     it's not free software.
+
+MS Windows DLLs
+     On systems `*-*-cygwin*', `*-*-mingw*' and `*-*-pw32*' by default
+     GMP builds only a static library, but a DLL can be built instead
+     using
+
+          ./configure --disable-static --enable-shared
+
+     Static and DLL libraries can't both be built, since certain export
+     directives in `gmp.h' must be different.
+
+     A MINGW DLL build of GMP can be used with Microsoft C.  Libtool
+     doesn't install a `.lib' format import library, but it can be
+     created with MS `lib' as follows, and copied to the install
+     directory.  Similarly for `libmp' and `libgmpxx'.
+
+          cd .libs
+          lib /def:libgmp-3.dll.def /out:libgmp-3.lib
+
+     MINGW uses the C runtime library `msvcrt.dll' for I/O, so
+     applications wanting to use the GMP I/O routines must be compiled
+     with `cl /MD' to do the same.  If one of the other C runtime
+     library choices provided by MS C is desired then the suggestion is
+     to use the GMP string functions and confine I/O to the application.
+
+Motorola 68k CPU Types
+     `m68k' is taken to mean 68000.  `m68020' or higher will give a
+     performance boost on applicable CPUs.  `m68360' can be used for
+     CPU32 series chips.  `m68302' can be used for "Dragonball" series
+     chips, though this is merely a synonym for `m68000'.
+
+OpenBSD 2.6
+     `m4' in this release of OpenBSD has a bug in `eval' that makes it
+     unsuitable for `.asm' file processing.  `./configure' will detect
+     the problem and either abort or choose another m4 in the `PATH'.
+     The bug is fixed in OpenBSD 2.7, so either upgrade or use GNU m4.
+
+Power CPU Types
+     In GMP, CPU types `power*' and `powerpc*' will each use
+     instructions not available on the other, so it's important to
+     choose the right one for the CPU that will be used.  Currently GMP
+     has no assembly code support for using just the common instruction
+     subset.  To get executables that run on both, the current
+     suggestion is to use the generic C code (CPU `none'), possibly
+     with appropriate compiler options (like `-mcpu=common' for `gcc').
+     CPU `rs6000' (which is not a CPU but a family of workstations) is
+     accepted by `config.sub', but is currently equivalent to `none'.
+
+Sparc CPU Types
+     `sparcv8' or `supersparc' on relevant systems will give a
+     significant performance increase over the V7 code selected by plain
+     `sparc'.
+
+Sparc App Regs
+     The GMP assembly code for both 32-bit and 64-bit Sparc clobbers the
+     "application registers" `g2', `g3' and `g4', the same way that the
+     GCC default `-mapp-regs' does (*note SPARC Options: (gcc)SPARC
+     Options.).
+
+     This makes that code unsuitable for use with the special V9
+     `-mcmodel=embmedany' (which uses `g4' as a data segment pointer),
+     and for applications wanting to use those registers for special
+     purposes.  In these cases the only suggestion currently is to
+     build GMP with CPU `none' to avoid the assembly code.
+
+SunOS 4
+     `/usr/bin/m4' lacks various features needed to process `.asm'
+     files, and instead `./configure' will automatically use
+     `/usr/5bin/m4', which we believe is always available (if not then
+     use GNU m4).
+
+x86 CPU Types
+     `i586', `pentium' or `pentiummmx' code is good for its intended P5
+     Pentium chips, but quite slow when run on Intel P6 class chips
+     (PPro, P-II, P-III).  `i386' is a better choice when making
+     binaries that must run on both.
+
+x86 MMX and SSE2 Code
+     If the CPU selected has MMX code but the assembler doesn't support
+     it, a warning is given and non-MMX code is used instead.  This
+     will be an inferior build, since the MMX code that's present is
+     there because it's faster than the corresponding plain integer
+     code.  The same applies to SSE2.
+
+     Old versions of `gas' don't support MMX instructions, in particular
+     version 1.92.3 that comes with FreeBSD 2.2.8 or the more recent
+     OpenBSD 3.1 doesn't.
+
+     Solaris 2.6 and 2.7 `as' generate incorrect object code for
+     register to register `movq' instructions, and so can't be used for
+     MMX code.  Install a recent `gas' if MMX code is wanted on these
+     systems.
+
+\1f
+File: gmp.info,  Node: Known Build Problems,  Next: Performance optimization,  Prev: Notes for Particular Systems,  Up: Installing GMP
+
+2.5 Known Build Problems
+========================
+
+You might find more up-to-date information at `http://gmplib.org/'.
+
+Compiler link options
+     The version of libtool currently in use rather aggressively strips
+     compiler options when linking a shared library.  This will
+     hopefully be relaxed in the future, but for now if this is a
+     problem the suggestion is to create a little script to hide them,
+     and for instance configure with
+
+          ./configure CC=gcc-with-my-options
+
+DJGPP (`*-*-msdosdjgpp*')
+     The DJGPP port of `bash' 2.03 is unable to run the `configure'
+     script, it exits silently, having died writing a preamble to
+     `config.log'.  Use `bash' 2.04 or higher.
+
+     `make all' was found to run out of memory during the final
+     `libgmp.la' link on one system tested, despite having 64Mb
+     available.  Running `make libgmp.la' directly helped, perhaps
+     recursing into the various subdirectories uses up memory.
+
+GNU binutils `strip' prior to 2.12
+     `strip' from GNU binutils 2.11 and earlier should not be used on
+     the static libraries `libgmp.a' and `libmp.a' since it will
+     discard all but the last of multiple archive members with the same
+     name, like the three versions of `init.o' in `libgmp.a'.  Binutils
+     2.12 or higher can be used successfully.
+
+     The shared libraries `libgmp.so' and `libmp.so' are not affected by
+     this and any version of `strip' can be used on them.
+
+`make' syntax error
+     On certain versions of SCO OpenServer 5 and IRIX 6.5 the native
+     `make' is unable to handle the long dependencies list for
+     `libgmp.la'.  The symptom is a "syntax error" on the following
+     line of the top-level `Makefile'.
+
+          libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES)
+
+     Either use GNU Make, or as a workaround remove
+     `$(libgmp_la_DEPENDENCIES)' from that line (which will make the
+     initial build work, but if any recompiling is done `libgmp.la'
+     might not be rebuilt).
+
+MacOS X (`*-*-darwin*')
+     Libtool currently only knows how to create shared libraries on
+     MacOS X using the native `cc' (which is a modified GCC), not a
+     plain GCC.  A static-only build should work though
+     (`--disable-shared').
+
+NeXT prior to 3.3
+     The system compiler on old versions of NeXT was a massacred and
+     old GCC, even if it called itself `cc'.  This compiler cannot be
+     used to build GMP, you need to get a real GCC, and install that.
+     (NeXT may have fixed this in release 3.3 of their system.)
+
+POWER and PowerPC
+     Bugs in GCC 2.7.2 (and 2.6.3) mean it can't be used to compile GMP
+     on POWER or PowerPC.  If you want to use GCC for these machines,
+     get GCC 2.7.2.1 (or later).
+
+Sequent Symmetry
+     Use the GNU assembler instead of the system assembler, since the
+     latter has serious bugs.
+
+Solaris 2.6
+     The system `sed' prints an error "Output line too long" when
+     libtool builds `libgmp.la'.  This doesn't seem to cause any
+     obvious ill effects, but GNU `sed' is recommended, to avoid any
+     doubt.
+
+Sparc Solaris 2.7 with gcc 2.95.2 in `ABI=32'
+     A shared library build of GMP seems to fail in this combination,
+     it builds but then fails the tests, apparently due to some
+     incorrect data relocations within `gmp_randinit_lc_2exp_size'.
+     The exact cause is unknown, `--disable-shared' is recommended.
+
+\1f
+File: gmp.info,  Node: Performance optimization,  Prev: Known Build Problems,  Up: Installing GMP
+
+2.6 Performance optimization
+============================
+
+For optimal performance, build GMP for the exact CPU type of the target
+computer, see *note Build Options::.
+
+   Unlike what is the case for most other programs, the compiler
+typically doesn't matter much, since GMP uses assembly language for the
+most critical operation.
+
+   In particular for long-running GMP applications, and applications
+demanding extremely large numbers, building and running the `tuneup'
+program in the `tune' subdirectory, can be important.  For example,
+
+     cd tune
+     make tuneup
+     ./tuneup
+
+   will generate better contents for the `gmp-mparam.h' parameter file.
+
+   To use the results, put the output in the file indicated in the
+`Parameters for ...' header.  Then recompile from scratch.
+
+   The `tuneup' program takes one useful parameter, `-f NNN', which
+instructs the program how long to check FFT multiply parameters.  If
+you're going to use GMP for extremely large numbers, you may want to
+run `tuneup' with a large NNN value.
+
+\1f
+File: gmp.info,  Node: GMP Basics,  Next: Reporting Bugs,  Prev: Installing GMP,  Up: Top
+
+3 GMP Basics
+************
+
+*Using functions, macros, data types, etc. not documented in this
+manual is strongly discouraged.  If you do so your application is
+guaranteed to be incompatible with future versions of GMP.*
+
+* Menu:
+
+* Headers and Libraries::
+* Nomenclature and Types::
+* Function Classes::
+* Variable Conventions::
+* Parameter Conventions::
+* Memory Management::
+* Reentrancy::
+* Useful Macros and Constants::
+* Compatibility with older versions::
+* Demonstration Programs::
+* Efficiency::
+* Debugging::
+* Profiling::
+* Autoconf::
+* Emacs::
+
+\1f
+File: gmp.info,  Node: Headers and Libraries,  Next: Nomenclature and Types,  Prev: GMP Basics,  Up: GMP Basics
+
+3.1 Headers and Libraries
+=========================
+
+All declarations needed to use GMP are collected in the include file
+`gmp.h'.  It is designed to work with both C and C++ compilers.
+
+     #include <gmp.h>
+
+   Note however that prototypes for GMP functions with `FILE *'
+parameters are only provided if `<stdio.h>' is included too.
+
+     #include <stdio.h>
+     #include <gmp.h>
+
+   Likewise `<stdarg.h>' (or `<varargs.h>') is required for prototypes
+with `va_list' parameters, such as `gmp_vprintf'.  And `<obstack.h>'
+for prototypes with `struct obstack' parameters, such as
+`gmp_obstack_printf', when available.
+
+   All programs using GMP must link against the `libgmp' library.  On a
+typical Unix-like system this can be done with `-lgmp', for example
+
+     gcc myprogram.c -lgmp
+
+   GMP C++ functions are in a separate `libgmpxx' library.  This is
+built and installed if C++ support has been enabled (*note Build
+Options::).  For example,
+
+     g++ mycxxprog.cc -lgmpxx -lgmp
+
+   GMP is built using Libtool and an application can use that to link
+if desired, *note GNU Libtool: (libtool)Top.
+
+   If GMP has been installed to a non-standard location then it may be
+necessary to use `-I' and `-L' compiler options to point to the right
+directories, and some sort of run-time path for a shared library.
+
+\1f
+File: gmp.info,  Node: Nomenclature and Types,  Next: Function Classes,  Prev: Headers and Libraries,  Up: GMP Basics
+
+3.2 Nomenclature and Types
+==========================
+
+In this manual, "integer" usually means a multiple precision integer, as
+defined by the GMP library.  The C data type for such integers is
+`mpz_t'.  Here are some examples of how to declare such integers:
+
+     mpz_t sum;
+
+     struct foo { mpz_t x, y; };
+
+     mpz_t vec[20];
+
+   "Rational number" means a multiple precision fraction.  The C data
+type for these fractions is `mpq_t'.  For example:
+
+     mpq_t quotient;
+
+   "Floating point number" or "Float" for short, is an arbitrary
+precision mantissa with a limited precision exponent.  The C data type
+for such objects is `mpf_t'.  For example:
+
+     mpf_t fp;
+
+   The floating point functions accept and return exponents in the C
+type `mp_exp_t'.  Currently this is usually a `long', but on some
+systems it's an `int' for efficiency.
+
+   A "limb" means the part of a multi-precision number that fits in a
+single machine word.  (We chose this word because a limb of the human
+body is analogous to a digit, only larger, and containing several
+digits.)  Normally a limb is 32 or 64 bits.  The C data type for a limb
+is `mp_limb_t'.
+
+   Counts of limbs of a multi-precision number represented in the C type
+`mp_size_t'.  Currently this is normally a `long', but on some systems
+it's an `int' for efficiency, and on some systems it will be `long
+long' in the future.
+
+   Counts of bits of a multi-precision number are represented in the C
+type `mp_bitcnt_t'.  Currently this is always an `unsigned long', but on
+some systems it will be an `unsigned long long' in the future.
+
+   "Random state" means an algorithm selection and current state data.
+The C data type for such objects is `gmp_randstate_t'.  For example:
+
+     gmp_randstate_t rstate;
+
+   Also, in general `mp_bitcnt_t' is used for bit counts and ranges, and
+`size_t' is used for byte or character counts.
+
+\1f
+File: gmp.info,  Node: Function Classes,  Next: Variable Conventions,  Prev: Nomenclature and Types,  Up: GMP Basics
+
+3.3 Function Classes
+====================
+
+There are six classes of functions in the GMP library:
+
+  1. Functions for signed integer arithmetic, with names beginning with
+     `mpz_'.  The associated type is `mpz_t'.  There are about 150
+     functions in this class.  (*note Integer Functions::)
+
+  2. Functions for rational number arithmetic, with names beginning with
+     `mpq_'.  The associated type is `mpq_t'.  There are about 40
+     functions in this class, but the integer functions can be used for
+     arithmetic on the numerator and denominator separately.  (*note
+     Rational Number Functions::)
+
+  3. Functions for floating-point arithmetic, with names beginning with
+     `mpf_'.  The associated type is `mpf_t'.  There are about 60
+     functions is this class.  (*note Floating-point Functions::)
+
+  4. Functions compatible with Berkeley MP, such as `itom', `madd', and
+     `mult'.  The associated type is `MINT'.  (*note BSD Compatible
+     Functions::)
+
+  5. Fast low-level functions that operate on natural numbers.  These
+     are used by the functions in the preceding groups, and you can
+     also call them directly from very time-critical user programs.
+     These functions' names begin with `mpn_'.  The associated type is
+     array of `mp_limb_t'.  There are about 30 (hard-to-use) functions
+     in this class.  (*note Low-level Functions::)
+
+  6. Miscellaneous functions.  Functions for setting up custom
+     allocation and functions for generating random numbers.  (*note
+     Custom Allocation::, and *note Random Number Functions::)
+
+\1f
+File: gmp.info,  Node: Variable Conventions,  Next: Parameter Conventions,  Prev: Function Classes,  Up: GMP Basics
+
+3.4 Variable Conventions
+========================
+
+GMP functions generally have output arguments before input arguments.
+This notation is by analogy with the assignment operator.  The BSD MP
+compatibility functions are exceptions, having the output arguments
+last.
+
+   GMP lets you use the same variable for both input and output in one
+call.  For example, the main function for integer multiplication,
+`mpz_mul', can be used to square `x' and put the result back in `x' with
+
+     mpz_mul (x, x, x);
+
+   Before you can assign to a GMP variable, you need to initialize it
+by calling one of the special initialization functions.  When you're
+done with a variable, you need to clear it out, using one of the
+functions for that purpose.  Which function to use depends on the type
+of variable.  See the chapters on integer functions, rational number
+functions, and floating-point functions for details.
+
+   A variable should only be initialized once, or at least cleared
+between each initialization.  After a variable has been initialized, it
+may be assigned to any number of times.
+
+   For efficiency reasons, avoid excessive initializing and clearing.
+In general, initialize near the start of a function and clear near the
+end.  For example,
+
+     void
+     foo (void)
+     {
+       mpz_t  n;
+       int    i;
+       mpz_init (n);
+       for (i = 1; i < 100; i++)
+         {
+           mpz_mul (n, ...);
+           mpz_fdiv_q (n, ...);
+           ...
+         }
+       mpz_clear (n);
+     }
+
+\1f
+File: gmp.info,  Node: Parameter Conventions,  Next: Memory Management,  Prev: Variable Conventions,  Up: GMP Basics
+
+3.5 Parameter Conventions
+=========================
+
+When a GMP variable is used as a function parameter, it's effectively a
+call-by-reference, meaning if the function stores a value there it will
+change the original in the caller.  Parameters which are input-only can
+be designated `const' to provoke a compiler error or warning on
+attempting to modify them.
+
+   When a function is going to return a GMP result, it should designate
+a parameter that it sets, like the library functions do.  More than one
+value can be returned by having more than one output parameter, again
+like the library functions.  A `return' of an `mpz_t' etc doesn't
+return the object, only a pointer, and this is almost certainly not
+what's wanted.
+
+   Here's an example accepting an `mpz_t' parameter, doing a
+calculation, and storing the result to the indicated parameter.
+
+     void
+     foo (mpz_t result, const mpz_t param, unsigned long n)
+     {
+       unsigned long  i;
+       mpz_mul_ui (result, param, n);
+       for (i = 1; i < n; i++)
+         mpz_add_ui (result, result, i*7);
+     }
+
+     int
+     main (void)
+     {
+       mpz_t  r, n;
+       mpz_init (r);
+       mpz_init_set_str (n, "123456", 0);
+       foo (r, n, 20L);
+       gmp_printf ("%Zd\n", r);
+       return 0;
+     }
+
+   `foo' works even if the mainline passes the same variable for
+`param' and `result', just like the library functions.  But sometimes
+it's tricky to make that work, and an application might not want to
+bother supporting that sort of thing.
+
+   For interest, the GMP types `mpz_t' etc are implemented as
+one-element arrays of certain structures.  This is why declaring a
+variable creates an object with the fields GMP needs, but then using it
+as a parameter passes a pointer to the object.  Note that the actual
+fields in each `mpz_t' etc are for internal use only and should not be
+accessed directly by code that expects to be compatible with future GMP
+releases.
+
+\1f
+File: gmp.info,  Node: Memory Management,  Next: Reentrancy,  Prev: Parameter Conventions,  Up: GMP Basics
+
+3.6 Memory Management
+=====================
+
+The GMP types like `mpz_t' are small, containing only a couple of sizes,
+and pointers to allocated data.  Once a variable is initialized, GMP
+takes care of all space allocation.  Additional space is allocated
+whenever a variable doesn't have enough.
+
+   `mpz_t' and `mpq_t' variables never reduce their allocated space.
+Normally this is the best policy, since it avoids frequent reallocation.
+Applications that need to return memory to the heap at some particular
+point can use `mpz_realloc2', or clear variables no longer needed.
+
+   `mpf_t' variables, in the current implementation, use a fixed amount
+of space, determined by the chosen precision and allocated at
+initialization, so their size doesn't change.
+
+   All memory is allocated using `malloc' and friends by default, but
+this can be changed, see *note Custom Allocation::.  Temporary memory
+on the stack is also used (via `alloca'), but this can be changed at
+build-time if desired, see *note Build Options::.
+
+\1f
+File: gmp.info,  Node: Reentrancy,  Next: Useful Macros and Constants,  Prev: Memory Management,  Up: GMP Basics
+
+3.7 Reentrancy
+==============
+
+GMP is reentrant and thread-safe, with some exceptions:
+
+   * If configured with `--enable-alloca=malloc-notreentrant' (or with
+     `--enable-alloca=notreentrant' when `alloca' is not available),
+     then naturally GMP is not reentrant.
+
+   * `mpf_set_default_prec' and `mpf_init' use a global variable for the
+     selected precision.  `mpf_init2' can be used instead, and in the
+     C++ interface an explicit precision to the `mpf_class' constructor.
+
+   * `mpz_random' and the other old random number functions use a global
+     random state and are hence not reentrant.  The newer random number
+     functions that accept a `gmp_randstate_t' parameter can be used
+     instead.
+
+   * `gmp_randinit' (obsolete) returns an error indication through a
+     global variable, which is not thread safe.  Applications are
+     advised to use `gmp_randinit_default' or `gmp_randinit_lc_2exp'
+     instead.
+
+   * `mp_set_memory_functions' uses global variables to store the
+     selected memory allocation functions.
+
+   * If the memory allocation functions set by a call to
+     `mp_set_memory_functions' (or `malloc' and friends by default) are
+     not reentrant, then GMP will not be reentrant either.
+
+   * If the standard I/O functions such as `fwrite' are not reentrant
+     then the GMP I/O functions using them will not be reentrant either.
+
+   * It's safe for two threads to read from the same GMP variable
+     simultaneously, but it's not safe for one to read while the
+     another might be writing, nor for two threads to write
+     simultaneously.  It's not safe for two threads to generate a
+     random number from the same `gmp_randstate_t' simultaneously,
+     since this involves an update of that variable.
+
+\1f
+File: gmp.info,  Node: Useful Macros and Constants,  Next: Compatibility with older versions,  Prev: Reentrancy,  Up: GMP Basics
+
+3.8 Useful Macros and Constants
+===============================
+
+ -- Global Constant: const int mp_bits_per_limb
+     The number of bits per limb.
+
+ -- Macro: __GNU_MP_VERSION
+ -- Macro: __GNU_MP_VERSION_MINOR
+ -- Macro: __GNU_MP_VERSION_PATCHLEVEL
+     The major and minor GMP version, and patch level, respectively, as
+     integers.  For GMP i.j, these numbers will be i, j, and 0,
+     respectively.  For GMP i.j.k, these numbers will be i, j, and k,
+     respectively.
+
+ -- Global Constant: const char * const gmp_version
+     The GMP version number, as a null-terminated string, in the form
+     "i.j.k".  This release is "5.0.5".  Note that the format "i.j" was
+     used when k was zero was used before version 4.3.0.
+
+ -- Macro: __GMP_CC
+ -- Macro: __GMP_CFLAGS
+     The compiler and compiler flags, respectively, used when compiling
+     GMP, as strings.
+
+\1f
+File: gmp.info,  Node: Compatibility with older versions,  Next: Demonstration Programs,  Prev: Useful Macros and Constants,  Up: GMP Basics
+
+3.9 Compatibility with older versions
+=====================================
+
+This version of GMP is upwardly binary compatible with all 4.x and 3.x
+versions, and upwardly compatible at the source level with all 2.x
+versions, with the following exceptions.
+
+   * `mpn_gcd' had its source arguments swapped as of GMP 3.0, for
+     consistency with other `mpn' functions.
+
+   * `mpf_get_prec' counted precision slightly differently in GMP 3.0
+     and 3.0.1, but in 3.1 reverted to the 2.x style.
+
+   There are a number of compatibility issues between GMP 1 and GMP 2
+that of course also apply when porting applications from GMP 1 to GMP
+4.  Please see the GMP 2 manual for details.
+
+   The Berkeley MP compatibility library (*note BSD Compatible
+Functions::) is source and binary compatible with the standard `libmp'.
+
+\1f
+File: gmp.info,  Node: Demonstration Programs,  Next: Efficiency,  Prev: Compatibility with older versions,  Up: GMP Basics
+
+3.10 Demonstration programs
+===========================
+
+The `demos' subdirectory has some sample programs using GMP.  These
+aren't built or installed, but there's a `Makefile' with rules for them.
+For instance,
+
+     make pexpr
+     ./pexpr 68^975+10
+
+The following programs are provided
+
+   * `pexpr' is an expression evaluator, the program used on the GMP
+     web page.
+
+   * The `calc' subdirectory has a similar but simpler evaluator using
+     `lex' and `yacc'.
+
+   * The `expr' subdirectory is yet another expression evaluator, a
+     library designed for ease of use within a C program.  See
+     `demos/expr/README' for more information.
+
+   * `factorize' is a Pollard-Rho factorization program.
+
+   * `isprime' is a command-line interface to the `mpz_probab_prime_p'
+     function.
+
+   * `primes' counts or lists primes in an interval, using a sieve.
+
+   * `qcn' is an example use of `mpz_kronecker_ui' to estimate quadratic
+     class numbers.
+
+   * The `perl' subdirectory is a comprehensive perl interface to GMP.
+     See `demos/perl/INSTALL' for more information.  Documentation is
+     in POD format in `demos/perl/GMP.pm'.
+
+   As an aside, consideration has been given at various times to some
+sort of expression evaluation within the main GMP library.  Going
+beyond something minimal quickly leads to matters like user-defined
+functions, looping, fixnums for control variables, etc, which are
+considered outside the scope of GMP (much closer to language
+interpreters or compilers, *Note Language Bindings::.)  Something
+simple for program input convenience may yet be a possibility, a
+combination of the `expr' demo and the `pexpr' tree back-end perhaps.
+But for now the above evaluators are offered as illustrations.
+
+\1f
+File: gmp.info,  Node: Efficiency,  Next: Debugging,  Prev: Demonstration Programs,  Up: GMP Basics
+
+3.11 Efficiency
+===============
+
+Small Operands
+     On small operands, the time for function call overheads and memory
+     allocation can be significant in comparison to actual calculation.
+     This is unavoidable in a general purpose variable precision
+     library, although GMP attempts to be as efficient as it can on
+     both large and small operands.
+
+Static Linking
+     On some CPUs, in particular the x86s, the static `libgmp.a' should
+     be used for maximum speed, since the PIC code in the shared
+     `libgmp.so' will have a small overhead on each function call and
+     global data address.  For many programs this will be
+     insignificant, but for long calculations there's a gain to be had.
+
+Initializing and Clearing
+     Avoid excessive initializing and clearing of variables, since this
+     can be quite time consuming, especially in comparison to otherwise
+     fast operations like addition.
+
+     A language interpreter might want to keep a free list or stack of
+     initialized variables ready for use.  It should be possible to
+     integrate something like that with a garbage collector too.
+
+Reallocations
+     An `mpz_t' or `mpq_t' variable used to hold successively increasing
+     values will have its memory repeatedly `realloc'ed, which could be
+     quite slow or could fragment memory, depending on the C library.
+     If an application can estimate the final size then `mpz_init2' or
+     `mpz_realloc2' can be called to allocate the necessary space from
+     the beginning (*note Initializing Integers::).
+
+     It doesn't matter if a size set with `mpz_init2' or `mpz_realloc2'
+     is too small, since all functions will do a further reallocation
+     if necessary.  Badly overestimating memory required will waste
+     space though.
+
+`2exp' Functions
+     It's up to an application to call functions like `mpz_mul_2exp'
+     when appropriate.  General purpose functions like `mpz_mul' make
+     no attempt to identify powers of two or other special forms,
+     because such inputs will usually be very rare and testing every
+     time would be wasteful.
+
+`ui' and `si' Functions
+     The `ui' functions and the small number of `si' functions exist for
+     convenience and should be used where applicable.  But if for
+     example an `mpz_t' contains a value that fits in an `unsigned
+     long' there's no need extract it and call a `ui' function, just
+     use the regular `mpz' function.
+
+In-Place Operations
+     `mpz_abs', `mpq_abs', `mpf_abs', `mpz_neg', `mpq_neg' and
+     `mpf_neg' are fast when used for in-place operations like
+     `mpz_abs(x,x)', since in the current implementation only a single
+     field of `x' needs changing.  On suitable compilers (GCC for
+     instance) this is inlined too.
+
+     `mpz_add_ui', `mpz_sub_ui', `mpf_add_ui' and `mpf_sub_ui' benefit
+     from an in-place operation like `mpz_add_ui(x,x,y)', since usually
+     only one or two limbs of `x' will need to be changed.  The same
+     applies to the full precision `mpz_add' etc if `y' is small.  If
+     `y' is big then cache locality may be helped, but that's all.
+
+     `mpz_mul' is currently the opposite, a separate destination is
+     slightly better.  A call like `mpz_mul(x,x,y)' will, unless `y' is
+     only one limb, make a temporary copy of `x' before forming the
+     result.  Normally that copying will only be a tiny fraction of the
+     time for the multiply, so this is not a particularly important
+     consideration.
+
+     `mpz_set', `mpq_set', `mpq_set_num', `mpf_set', etc, make no
+     attempt to recognise a copy of something to itself, so a call like
+     `mpz_set(x,x)' will be wasteful.  Naturally that would never be
+     written deliberately, but if it might arise from two pointers to
+     the same object then a test to avoid it might be desirable.
+
+          if (x != y)
+            mpz_set (x, y);
+
+     Note that it's never worth introducing extra `mpz_set' calls just
+     to get in-place operations.  If a result should go to a particular
+     variable then just direct it there and let GMP take care of data
+     movement.
+
+Divisibility Testing (Small Integers)
+     `mpz_divisible_ui_p' and `mpz_congruent_ui_p' are the best
+     functions for testing whether an `mpz_t' is divisible by an
+     individual small integer.  They use an algorithm which is faster
+     than `mpz_tdiv_ui', but which gives no useful information about
+     the actual remainder, only whether it's zero (or a particular
+     value).
+
+     However when testing divisibility by several small integers, it's
+     best to take a remainder modulo their product, to save
+     multi-precision operations.  For instance to test whether a number
+     is divisible by any of 23, 29 or 31 take a remainder modulo
+     23*29*31 = 20677 and then test that.
+
+     The division functions like `mpz_tdiv_q_ui' which give a quotient
+     as well as a remainder are generally a little slower than the
+     remainder-only functions like `mpz_tdiv_ui'.  If the quotient is
+     only rarely wanted then it's probably best to just take a
+     remainder and then go back and calculate the quotient if and when
+     it's wanted (`mpz_divexact_ui' can be used if the remainder is
+     zero).
+
+Rational Arithmetic
+     The `mpq' functions operate on `mpq_t' values with no common
+     factors in the numerator and denominator.  Common factors are
+     checked-for and cast out as necessary.  In general, cancelling
+     factors every time is the best approach since it minimizes the
+     sizes for subsequent operations.
+
+     However, applications that know something about the factorization
+     of the values they're working with might be able to avoid some of
+     the GCDs used for canonicalization, or swap them for divisions.
+     For example when multiplying by a prime it's enough to check for
+     factors of it in the denominator instead of doing a full GCD.  Or
+     when forming a big product it might be known that very little
+     cancellation will be possible, and so canonicalization can be left
+     to the end.
+
+     The `mpq_numref' and `mpq_denref' macros give access to the
+     numerator and denominator to do things outside the scope of the
+     supplied `mpq' functions.  *Note Applying Integer Functions::.
+
+     The canonical form for rationals allows mixed-type `mpq_t' and
+     integer additions or subtractions to be done directly with
+     multiples of the denominator.  This will be somewhat faster than
+     `mpq_add'.  For example,
+
+          /* mpq increment */
+          mpz_add (mpq_numref(q), mpq_numref(q), mpq_denref(q));
+
+          /* mpq += unsigned long */
+          mpz_addmul_ui (mpq_numref(q), mpq_denref(q), 123UL);
+
+          /* mpq -= mpz */
+          mpz_submul (mpq_numref(q), mpq_denref(q), z);
+
+Number Sequences
+     Functions like `mpz_fac_ui', `mpz_fib_ui' and `mpz_bin_uiui' are
+     designed for calculating isolated values.  If a range of values is
+     wanted it's probably best to call to get a starting point and
+     iterate from there.
+
+Text Input/Output
+     Hexadecimal or octal are suggested for input or output in text
+     form.  Power-of-2 bases like these can be converted much more
+     efficiently than other bases, like decimal.  For big numbers
+     there's usually nothing of particular interest to be seen in the
+     digits, so the base doesn't matter much.
+
+     Maybe we can hope octal will one day become the normal base for
+     everyday use, as proposed by King Charles XII of Sweden and later
+     reformers.
+
+\1f
+File: gmp.info,  Node: Debugging,  Next: Profiling,  Prev: Efficiency,  Up: GMP Basics
+
+3.12 Debugging
+==============
+
+Stack Overflow
+     Depending on the system, a segmentation violation or bus error
+     might be the only indication of stack overflow.  See
+     `--enable-alloca' choices in *note Build Options::, for how to
+     address this.
+
+     In new enough versions of GCC, `-fstack-check' may be able to
+     ensure an overflow is recognised by the system before too much
+     damage is done, or `-fstack-limit-symbol' or
+     `-fstack-limit-register' may be able to add checking if the system
+     itself doesn't do any (*note Options for Code Generation:
+     (gcc)Code Gen Options.).  These options must be added to the
+     `CFLAGS' used in the GMP build (*note Build Options::), adding
+     them just to an application will have no effect.  Note also
+     they're a slowdown, adding overhead to each function call and each
+     stack allocation.
+
+Heap Problems
+     The most likely cause of application problems with GMP is heap
+     corruption.  Failing to `init' GMP variables will have
+     unpredictable effects, and corruption arising elsewhere in a
+     program may well affect GMP.  Initializing GMP variables more than
+     once or failing to clear them will cause memory leaks.
+
+     In all such cases a `malloc' debugger is recommended.  On a GNU or
+     BSD system the standard C library `malloc' has some diagnostic
+     facilities, see *note Allocation Debugging: (libc)Allocation
+     Debugging, or `man 3 malloc'.  Other possibilities, in no
+     particular order, include
+
+          `http://www.inf.ethz.ch/personal/biere/projects/ccmalloc/'
+          `http://dmalloc.com/'
+          `http://www.perens.com/FreeSoftware/'  (electric fence)
+          `http://packages.debian.org/stable/devel/fda'
+          `http://www.gnupdate.org/components/leakbug/'
+          `http://people.redhat.com/~otaylor/memprof/'
+          `http://www.cbmamiga.demon.co.uk/mpatrol/'
+
+     The GMP default allocation routines in `memory.c' also have a
+     simple sentinel scheme which can be enabled with `#define DEBUG'
+     in that file.  This is mainly designed for detecting buffer
+     overruns during GMP development, but might find other uses.
+
+Stack Backtraces
+     On some systems the compiler options GMP uses by default can
+     interfere with debugging.  In particular on x86 and 68k systems
+     `-fomit-frame-pointer' is used and this generally inhibits stack
+     backtracing.  Recompiling without such options may help while
+     debugging, though the usual caveats about it potentially moving a
+     memory problem or hiding a compiler bug will apply.
+
+GDB, the GNU Debugger
+     A sample `.gdbinit' is included in the distribution, showing how
+     to call some undocumented dump functions to print GMP variables
+     from within GDB.  Note that these functions shouldn't be used in
+     final application code since they're undocumented and may be
+     subject to incompatible changes in future versions of GMP.
+
+Source File Paths
+     GMP has multiple source files with the same name, in different
+     directories.  For example `mpz', `mpq' and `mpf' each have an
+     `init.c'.  If the debugger can't already determine the right one
+     it may help to build with absolute paths on each C file.  One way
+     to do that is to use a separate object directory with an absolute
+     path to the source directory.
+
+          cd /my/build/dir
+          /my/source/dir/gmp-5.0.5/configure
+
+     This works via `VPATH', and might require GNU `make'.  Alternately
+     it might be possible to change the `.c.lo' rules appropriately.
+
+Assertion Checking
+     The build option `--enable-assert' is available to add some
+     consistency checks to the library (see *note Build Options::).
+     These are likely to be of limited value to most applications.
+     Assertion failures are just as likely to indicate memory
+     corruption as a library or compiler bug.
+
+     Applications using the low-level `mpn' functions, however, will
+     benefit from `--enable-assert' since it adds checks on the
+     parameters of most such functions, many of which have subtle
+     restrictions on their usage.  Note however that only the generic C
+     code has checks, not the assembly code, so CPU `none' should be
+     used for maximum checking.
+
+Temporary Memory Checking
+     The build option `--enable-alloca=debug' arranges that each block
+     of temporary memory in GMP is allocated with a separate call to
+     `malloc' (or the allocation function set with
+     `mp_set_memory_functions').
+
+     This can help a malloc debugger detect accesses outside the
+     intended bounds, or detect memory not released.  In a normal
+     build, on the other hand, temporary memory is allocated in blocks
+     which GMP divides up for its own use, or may be allocated with a
+     compiler builtin `alloca' which will go nowhere near any malloc
+     debugger hooks.
+
+Maximum Debuggability
+     To summarize the above, a GMP build for maximum debuggability
+     would be
+
+          ./configure --disable-shared --enable-assert \
+            --enable-alloca=debug --host=none CFLAGS=-g
+
+     For C++, add `--enable-cxx CXXFLAGS=-g'.
+
+Checker
+     The GCC checker (`http://savannah.nongnu.org/projects/checker/')
+     can be used with GMP.  It contains a stub library which means GMP
+     applications compiled with checker can use a normal GMP build.
+
+     A build of GMP with checking within GMP itself can be made.  This
+     will run very very slowly.  On GNU/Linux for example,
+
+          ./configure --host=none-pc-linux-gnu CC=checkergcc
+
+     `--host=none' must be used, since the GMP assembly code doesn't
+     support the checking scheme.  The GMP C++ features cannot be used,
+     since current versions of checker (0.9.9.1) don't yet support the
+     standard C++ library.
+
+Valgrind
+     The valgrind program (`http://valgrind.org/') is a memory checker
+     for x86s.  It translates and emulates machine instructions to do
+     strong checks for uninitialized data (at the level of individual
+     bits), memory accesses through bad pointers, and memory leaks.
+
+     Recent versions of Valgrind are getting support for MMX and
+     SSE/SSE2 instructions, for past versions GMP will need to be
+     configured not to use those, i.e. for an x86 without them (for
+     instance plain `i486').
+
+     GMP's assembly code sometimes promotes a read of the limbs to some
+     larger size, for efficiency.  GMP will do this even at the start
+     and end of a multilimb operand, using naturaly aligned operations
+     on the larger type.  This may lead to benign reads outside of
+     allocated areas, triggering complants from Valgrind.
+
+Other Problems
+     Any suspected bug in GMP itself should be isolated to make sure
+     it's not an application problem, see *note Reporting Bugs::.
+
+\1f
+File: gmp.info,  Node: Profiling,  Next: Autoconf,  Prev: Debugging,  Up: GMP Basics
+
+3.13 Profiling
+==============
+
+Running a program under a profiler is a good way to find where it's
+spending most time and where improvements can be best sought.  The
+profiling choices for a GMP build are as follows.
+
+`--disable-profiling'
+     The default is to add nothing special for profiling.
+
+     It should be possible to just compile the mainline of a program
+     with `-p' and use `prof' to get a profile consisting of
+     timer-based sampling of the program counter.  Most of the GMP
+     assembly code has the necessary symbol information.
+
+     This approach has the advantage of minimizing interference with
+     normal program operation, but on most systems the resolution of
+     the sampling is quite low (10 milliseconds for instance),
+     requiring long runs to get accurate information.
+
+`--enable-profiling=prof'
+     Build with support for the system `prof', which means `-p' added
+     to the `CFLAGS'.
+
+     This provides call counting in addition to program counter
+     sampling, which allows the most frequently called routines to be
+     identified, and an average time spent in each routine to be
+     determined.
+
+     The x86 assembly code has support for this option, but on other
+     processors the assembly routines will be as if compiled without
+     `-p' and therefore won't appear in the call counts.
+
+     On some systems, such as GNU/Linux, `-p' in fact means `-pg' and in
+     this case `--enable-profiling=gprof' described below should be used
+     instead.
+
+`--enable-profiling=gprof'
+     Build with support for `gprof', which means `-pg' added to the
+     `CFLAGS'.
+
+     This provides call graph construction in addition to call counting
+     and program counter sampling, which makes it possible to count
+     calls coming from different locations.  For example the number of
+     calls to `mpn_mul' from `mpz_mul' versus the number from
+     `mpf_mul'.  The program counter sampling is still flat though, so
+     only a total time in `mpn_mul' would be accumulated, not a
+     separate amount for each call site.
+
+     The x86 assembly code has support for this option, but on other
+     processors the assembly routines will be as if compiled without
+     `-pg' and therefore not be included in the call counts.
+
+     On x86 and m68k systems `-pg' and `-fomit-frame-pointer' are
+     incompatible, so the latter is omitted from the default flags in
+     that case, which might result in poorer code generation.
+
+     Incidentally, it should be possible to use the `gprof' program
+     with a plain `--enable-profiling=prof' build.  But in that case
+     only the `gprof -p' flat profile and call counts can be expected
+     to be valid, not the `gprof -q' call graph.
+
+`--enable-profiling=instrument'
+     Build with the GCC option `-finstrument-functions' added to the
+     `CFLAGS' (*note Options for Code Generation: (gcc)Code Gen
+     Options.).
+
+     This inserts special instrumenting calls at the start and end of
+     each function, allowing exact timing and full call graph
+     construction.
+
+     This instrumenting is not normally a standard system feature and
+     will require support from an external library, such as
+
+          `http://sourceforge.net/projects/fnccheck/'
+
+     This should be included in `LIBS' during the GMP configure so that
+     test programs will link.  For example,
+
+          ./configure --enable-profiling=instrument LIBS=-lfc
+
+     On a GNU system the C library provides dummy instrumenting
+     functions, so programs compiled with this option will link.  In
+     this case it's only necessary to ensure the correct library is
+     added when linking an application.
+
+     The x86 assembly code supports this option, but on other
+     processors the assembly routines will be as if compiled without
+     `-finstrument-functions' meaning time spent in them will
+     effectively be attributed to their caller.
+
+\1f
+File: gmp.info,  Node: Autoconf,  Next: Emacs,  Prev: Profiling,  Up: GMP Basics
+
+3.14 Autoconf
+=============
+
+Autoconf based applications can easily check whether GMP is installed.
+The only thing to be noted is that GMP library symbols from version 3
+onwards have prefixes like `__gmpz'.  The following therefore would be
+a simple test,
+
+     AC_CHECK_LIB(gmp, __gmpz_init)
+
+   This just uses the default `AC_CHECK_LIB' actions for found or not
+found, but an application that must have GMP would want to generate an
+error if not found.  For example,
+
+     AC_CHECK_LIB(gmp, __gmpz_init, ,
+       [AC_MSG_ERROR([GNU MP not found, see http://gmplib.org/])])
+
+   If functions added in some particular version of GMP are required,
+then one of those can be used when checking.  For example `mpz_mul_si'
+was added in GMP 3.1,
+
+     AC_CHECK_LIB(gmp, __gmpz_mul_si, ,
+       [AC_MSG_ERROR(
+       [GNU MP not found, or not 3.1 or up, see http://gmplib.org/])])
+
+   An alternative would be to test the version number in `gmp.h' using
+say `AC_EGREP_CPP'.  That would make it possible to test the exact
+version, if some particular sub-minor release is known to be necessary.
+
+   In general it's recommended that applications should simply demand a
+new enough GMP rather than trying to provide supplements for features
+not available in past versions.
+
+   Occasionally an application will need or want to know the size of a
+type at configuration or preprocessing time, not just with `sizeof' in
+the code.  This can be done in the normal way with `mp_limb_t' etc, but
+GMP 4.0 or up is best for this, since prior versions needed certain
+`-D' defines on systems using a `long long' limb.  The following would
+suit Autoconf 2.50 or up,
+
+     AC_CHECK_SIZEOF(mp_limb_t, , [#include <gmp.h>])
+
+\1f
+File: gmp.info,  Node: Emacs,  Prev: Autoconf,  Up: GMP Basics
+
+3.15 Emacs
+==========
+
+<C-h C-i> (`info-lookup-symbol') is a good way to find documentation on
+C functions while editing (*note Info Documentation Lookup: (emacs)Info
+Lookup.).
+
+   The GMP manual can be included in such lookups by putting the
+following in your `.emacs',
+
+     (eval-after-load "info-look"
+       '(let ((mode-value (assoc 'c-mode (assoc 'symbol info-lookup-alist))))
+          (setcar (nthcdr 3 mode-value)
+                  (cons '("(gmp)Function Index" nil "^ -.* " "\\>")
+                        (nth 3 mode-value)))))
+
+\1f
+File: gmp.info,  Node: Reporting Bugs,  Next: Integer Functions,  Prev: GMP Basics,  Up: Top
+
+4 Reporting Bugs
+****************
+
+If you think you have found a bug in the GMP library, please
+investigate it and report it.  We have made this library available to
+you, and it is not too much to ask you to report the bugs you find.
+
+   Before you report a bug, check it's not already addressed in *note
+Known Build Problems::, or perhaps *note Notes for Particular
+Systems::.  You may also want to check `http://gmplib.org/' for patches
+for this release.
+
+   Please include the following in any report,
+
+   * The GMP version number, and if pre-packaged or patched then say so.
+
+   * A test program that makes it possible for us to reproduce the bug.
+     Include instructions on how to run the program.
+
+   * A description of what is wrong.  If the results are incorrect, in
+     what way.  If you get a crash, say so.
+
+   * If you get a crash, include a stack backtrace from the debugger if
+     it's informative (`where' in `gdb', or `$C' in `adb').
+
+   * Please do not send core dumps, executables or `strace's.
+
+   * The configuration options you used when building GMP, if any.
+
+   * The name of the compiler and its version.  For `gcc', get the
+     version with `gcc -v', otherwise perhaps `what `which cc`', or
+     similar.
+
+   * The output from running `uname -a'.
+
+   * The output from running `./config.guess', and from running
+     `./configfsf.guess' (might be the same).
+
+   * If the bug is related to `configure', then the compressed contents
+     of `config.log'.
+
+   * If the bug is related to an `asm' file not assembling, then the
+     contents of `config.m4' and the offending line or lines from the
+     temporary `mpn/tmp-<file>.s'.
+
+   Please make an effort to produce a self-contained report, with
+something definite that can be tested or debugged.  Vague queries or
+piecemeal messages are difficult to act on and don't help the
+development effort.
+
+   It is not uncommon that an observed problem is actually due to a bug
+in the compiler; the GMP code tends to explore interesting corners in
+compilers.
+
+   If your bug report is good, we will do our best to help you get a
+corrected version of the library; if the bug report is poor, we won't
+do anything about it (except maybe ask you to send a better report).
+
+   Send your report to: <gmp-bugs@gmplib.org>.
+
+   If you think something in this manual is unclear, or downright
+incorrect, or if the language needs to be improved, please send a note
+to the same address.
+
+\1f
+File: gmp.info,  Node: Integer Functions,  Next: Rational Number Functions,  Prev: Reporting Bugs,  Up: Top
+
+5 Integer Functions
+*******************
+
+This chapter describes the GMP functions for performing integer
+arithmetic.  These functions start with the prefix `mpz_'.
+
+   GMP integers are stored in objects of type `mpz_t'.
+
+* Menu:
+
+* Initializing Integers::
+* Assigning Integers::
+* Simultaneous Integer Init & Assign::
+* Converting Integers::
+* Integer Arithmetic::
+* Integer Division::
+* Integer Exponentiation::
+* Integer Roots::
+* Number Theoretic Functions::
+* Integer Comparisons::
+* Integer Logic and Bit Fiddling::
+* I/O of Integers::
+* Integer Random Numbers::
+* Integer Import and Export::
+* Miscellaneous Integer Functions::
+* Integer Special Functions::
+
+\1f
+File: gmp.info,  Node: Initializing Integers,  Next: Assigning Integers,  Prev: Integer Functions,  Up: Integer Functions
+
+5.1 Initialization Functions
+============================
+
+The functions for integer arithmetic assume that all integer objects are
+initialized.  You do that by calling the function `mpz_init'.  For
+example,
+
+     {
+       mpz_t integ;
+       mpz_init (integ);
+       ...
+       mpz_add (integ, ...);
+       ...
+       mpz_sub (integ, ...);
+
+       /* Unless the program is about to exit, do ... */
+       mpz_clear (integ);
+     }
+
+   As you can see, you can store new values any number of times, once an
+object is initialized.
+
+ -- Function: void mpz_init (mpz_t X)
+     Initialize X, and set its value to 0.
+
+ -- Function: void mpz_inits (mpz_t X, ...)
+     Initialize a NULL-terminated list of `mpz_t' variables, and set
+     their values to 0.
+
+ -- Function: void mpz_init2 (mpz_t X, mp_bitcnt_t N)
+     Initialize X, with space for N-bit numbers, and set its value to 0.
+     Calling this function instead of `mpz_init' or `mpz_inits' is never
+     necessary; reallocation is handled automatically by GMP when
+     needed.
+
+     N is only the initial space, X will grow automatically in the
+     normal way, if necessary, for subsequent values stored.
+     `mpz_init2' makes it possible to avoid such reallocations if a
+     maximum size is known in advance.
+
+ -- Function: void mpz_clear (mpz_t X)
+     Free the space occupied by X.  Call this function for all `mpz_t'
+     variables when you are done with them.
+
+ -- Function: void mpz_clears (mpz_t X, ...)
+     Free the space occupied by a NULL-terminated list of `mpz_t'
+     variables.
+
+ -- Function: void mpz_realloc2 (mpz_t X, mp_bitcnt_t N)
+     Change the space allocated for X to N bits.  The value in X is
+     preserved if it fits, or is set to 0 if not.
+
+     Calling this function is never necessary; reallocation is handled
+     automatically by GMP when needed.  But this function can be used
+     to increase the space for a variable in order to avoid repeated
+     automatic reallocations, or to decrease it to give memory back to
+     the heap.
+
+\1f
+File: gmp.info,  Node: Assigning Integers,  Next: Simultaneous Integer Init & Assign,  Prev: Initializing Integers,  Up: Integer Functions
+
+5.2 Assignment Functions
+========================
+
+These functions assign new values to already initialized integers
+(*note Initializing Integers::).
+
+ -- Function: void mpz_set (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_set_ui (mpz_t ROP, unsigned long int OP)
+ -- Function: void mpz_set_si (mpz_t ROP, signed long int OP)
+ -- Function: void mpz_set_d (mpz_t ROP, double OP)
+ -- Function: void mpz_set_q (mpz_t ROP, mpq_t OP)
+ -- Function: void mpz_set_f (mpz_t ROP, mpf_t OP)
+     Set the value of ROP from OP.
+
+     `mpz_set_d', `mpz_set_q' and `mpz_set_f' truncate OP to make it an
+     integer.
+
+ -- Function: int mpz_set_str (mpz_t ROP, char *STR, int BASE)
+     Set the value of ROP from STR, a null-terminated C string in base
+     BASE.  White space is allowed in the string, and is simply ignored.
+
+     The BASE may vary from 2 to 62, or if BASE is 0, then the leading
+     characters are used: `0x' and `0X' for hexadecimal, `0b' and `0B'
+     for binary, `0' for octal, or decimal otherwise.
+
+     For bases up to 36, case is ignored; upper-case and lower-case
+     letters have the same value.  For bases 37 to 62, upper-case
+     letter represent the usual 10..35 while lower-case letter
+     represent 36..61.
+
+     This function returns 0 if the entire string is a valid number in
+     base BASE.  Otherwise it returns -1.
+
+ -- Function: void mpz_swap (mpz_t ROP1, mpz_t ROP2)
+     Swap the values ROP1 and ROP2 efficiently.
+
+\1f
+File: gmp.info,  Node: Simultaneous Integer Init & Assign,  Next: Converting Integers,  Prev: Assigning Integers,  Up: Integer Functions
+
+5.3 Combined Initialization and Assignment Functions
+====================================================
+
+For convenience, GMP provides a parallel series of initialize-and-set
+functions which initialize the output and then store the value there.
+These functions' names have the form `mpz_init_set...'
+
+   Here is an example of using one:
+
+     {
+       mpz_t pie;
+       mpz_init_set_str (pie, "3141592653589793238462643383279502884", 10);
+       ...
+       mpz_sub (pie, ...);
+       ...
+       mpz_clear (pie);
+     }
+
+Once the integer has been initialized by any of the `mpz_init_set...'
+functions, it can be used as the source or destination operand for the
+ordinary integer functions.  Don't use an initialize-and-set function
+on a variable already initialized!
+
+ -- Function: void mpz_init_set (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_init_set_ui (mpz_t ROP, unsigned long int OP)
+ -- Function: void mpz_init_set_si (mpz_t ROP, signed long int OP)
+ -- Function: void mpz_init_set_d (mpz_t ROP, double OP)
+     Initialize ROP with limb space and set the initial numeric value
+     from OP.
+
+ -- Function: int mpz_init_set_str (mpz_t ROP, char *STR, int BASE)
+     Initialize ROP and set its value like `mpz_set_str' (see its
+     documentation above for details).
+
+     If the string is a correct base BASE number, the function returns
+     0; if an error occurs it returns -1.  ROP is initialized even if
+     an error occurs.  (I.e., you have to call `mpz_clear' for it.)
+
+\1f
+File: gmp.info,  Node: Converting Integers,  Next: Integer Arithmetic,  Prev: Simultaneous Integer Init & Assign,  Up: Integer Functions
+
+5.4 Conversion Functions
+========================
+
+This section describes functions for converting GMP integers to
+standard C types.  Functions for converting _to_ GMP integers are
+described in *note Assigning Integers:: and *note I/O of Integers::.
+
+ -- Function: unsigned long int mpz_get_ui (mpz_t OP)
+     Return the value of OP as an `unsigned long'.
+
+     If OP is too big to fit an `unsigned long' then just the least
+     significant bits that do fit are returned.  The sign of OP is
+     ignored, only the absolute value is used.
+
+ -- Function: signed long int mpz_get_si (mpz_t OP)
+     If OP fits into a `signed long int' return the value of OP.
+     Otherwise return the least significant part of OP, with the same
+     sign as OP.
+
+     If OP is too big to fit in a `signed long int', the returned
+     result is probably not very useful.  To find out if the value will
+     fit, use the function `mpz_fits_slong_p'.
+
+ -- Function: double mpz_get_d (mpz_t OP)
+     Convert OP to a `double', truncating if necessary (i.e. rounding
+     towards zero).
+
+     If the exponent from the conversion is too big, the result is
+     system dependent.  An infinity is returned where available.  A
+     hardware overflow trap may or may not occur.
+
+ -- Function: double mpz_get_d_2exp (signed long int *EXP, mpz_t OP)
+     Convert OP to a `double', truncating if necessary (i.e. rounding
+     towards zero), and returning the exponent separately.
+
+     The return value is in the range 0.5<=abs(D)<1 and the exponent is
+     stored to `*EXP'.  D * 2^EXP is the (truncated) OP value.  If OP
+     is zero, the return is 0.0 and 0 is stored to `*EXP'.
+
+     This is similar to the standard C `frexp' function (*note
+     Normalization Functions: (libc)Normalization Functions.).
+
+ -- Function: char * mpz_get_str (char *STR, int BASE, mpz_t OP)
+     Convert OP to a string of digits in base BASE.  The base argument
+     may vary from 2 to 62 or from -2 to -36.
+
+     For BASE in the range 2..36, digits and lower-case letters are
+     used; for -2..-36, digits and upper-case letters are used; for
+     37..62, digits, upper-case letters, and lower-case letters (in
+     that significance order) are used.
+
+     If STR is `NULL', the result string is allocated using the current
+     allocation function (*note Custom Allocation::).  The block will be
+     `strlen(str)+1' bytes, that being exactly enough for the string and
+     null-terminator.
+
+     If STR is not `NULL', it should point to a block of storage large
+     enough for the result, that being `mpz_sizeinbase (OP, BASE) + 2'.
+     The two extra bytes are for a possible minus sign, and the
+     null-terminator.
+
+     A pointer to the result string is returned, being either the
+     allocated block, or the given STR.
+
+\1f
+File: gmp.info,  Node: Integer Arithmetic,  Next: Integer Division,  Prev: Converting Integers,  Up: Integer Functions
+
+5.5 Arithmetic Functions
+========================
+
+ -- Function: void mpz_add (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_add_ui (mpz_t ROP, mpz_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1 + OP2.
+
+ -- Function: void mpz_sub (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_sub_ui (mpz_t ROP, mpz_t OP1, unsigned long int
+          OP2)
+ -- Function: void mpz_ui_sub (mpz_t ROP, unsigned long int OP1, mpz_t
+          OP2)
+     Set ROP to OP1 - OP2.
+
+ -- Function: void mpz_mul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_mul_si (mpz_t ROP, mpz_t OP1, long int OP2)
+ -- Function: void mpz_mul_ui (mpz_t ROP, mpz_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1 times OP2.
+
+ -- Function: void mpz_addmul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_addmul_ui (mpz_t ROP, mpz_t OP1, unsigned long
+          int OP2)
+     Set ROP to ROP + OP1 times OP2.
+
+ -- Function: void mpz_submul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_submul_ui (mpz_t ROP, mpz_t OP1, unsigned long
+          int OP2)
+     Set ROP to ROP - OP1 times OP2.
+
+ -- Function: void mpz_mul_2exp (mpz_t ROP, mpz_t OP1, mp_bitcnt_t OP2)
+     Set ROP to OP1 times 2 raised to OP2.  This operation can also be
+     defined as a left shift by OP2 bits.
+
+ -- Function: void mpz_neg (mpz_t ROP, mpz_t OP)
+     Set ROP to -OP.
+
+ -- Function: void mpz_abs (mpz_t ROP, mpz_t OP)
+     Set ROP to the absolute value of OP.
+
+\1f
+File: gmp.info,  Node: Integer Division,  Next: Integer Exponentiation,  Prev: Integer Arithmetic,  Up: Integer Functions
+
+5.6 Division Functions
+======================
+
+Division is undefined if the divisor is zero.  Passing a zero divisor
+to the division or modulo functions (including the modular powering
+functions `mpz_powm' and `mpz_powm_ui'), will cause an intentional
+division by zero.  This lets a program handle arithmetic exceptions in
+these functions the same way as for normal C `int' arithmetic.
+
+ -- Function: void mpz_cdiv_q (mpz_t Q, mpz_t N, mpz_t D)
+ -- Function: void mpz_cdiv_r (mpz_t R, mpz_t N, mpz_t D)
+ -- Function: void mpz_cdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
+ -- Function: unsigned long int mpz_cdiv_q_ui (mpz_t Q, mpz_t N,
+          unsigned long int D)
+ -- Function: unsigned long int mpz_cdiv_r_ui (mpz_t R, mpz_t N,
+          unsigned long int D)
+ -- Function: unsigned long int mpz_cdiv_qr_ui (mpz_t Q, mpz_t R,
+          mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_cdiv_ui (mpz_t N,
+          unsigned long int D)
+ -- Function: void mpz_cdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
+ -- Function: void mpz_cdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
+
+ -- Function: void mpz_fdiv_q (mpz_t Q, mpz_t N, mpz_t D)
+ -- Function: void mpz_fdiv_r (mpz_t R, mpz_t N, mpz_t D)
+ -- Function: void mpz_fdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
+ -- Function: unsigned long int mpz_fdiv_q_ui (mpz_t Q, mpz_t N,
+          unsigned long int D)
+ -- Function: unsigned long int mpz_fdiv_r_ui (mpz_t R, mpz_t N,
+          unsigned long int D)
+ -- Function: unsigned long int mpz_fdiv_qr_ui (mpz_t Q, mpz_t R,
+          mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_fdiv_ui (mpz_t N,
+          unsigned long int D)
+ -- Function: void mpz_fdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
+ -- Function: void mpz_fdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
+
+ -- Function: void mpz_tdiv_q (mpz_t Q, mpz_t N, mpz_t D)
+ -- Function: void mpz_tdiv_r (mpz_t R, mpz_t N, mpz_t D)
+ -- Function: void mpz_tdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
+ -- Function: unsigned long int mpz_tdiv_q_ui (mpz_t Q, mpz_t N,
+          unsigned long int D)
+ -- Function: unsigned long int mpz_tdiv_r_ui (mpz_t R, mpz_t N,
+          unsigned long int D)
+ -- Function: unsigned long int mpz_tdiv_qr_ui (mpz_t Q, mpz_t R,
+          mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_tdiv_ui (mpz_t N,
+          unsigned long int D)
+ -- Function: void mpz_tdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
+ -- Function: void mpz_tdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
+
+     Divide N by D, forming a quotient Q and/or remainder R.  For the
+     `2exp' functions, D=2^B.  The rounding is in three styles, each
+     suiting different applications.
+
+        * `cdiv' rounds Q up towards +infinity, and R will have the
+          opposite sign to D.  The `c' stands for "ceil".
+
+        * `fdiv' rounds Q down towards -infinity, and R will have the
+          same sign as D.  The `f' stands for "floor".
+
+        * `tdiv' rounds Q towards zero, and R will have the same sign
+          as N.  The `t' stands for "truncate".
+
+     In all cases Q and R will satisfy N=Q*D+R, and R will satisfy
+     0<=abs(R)<abs(D).
+
+     The `q' functions calculate only the quotient, the `r' functions
+     only the remainder, and the `qr' functions calculate both.  Note
+     that for `qr' the same variable cannot be passed for both Q and R,
+     or results will be unpredictable.
+
+     For the `ui' variants the return value is the remainder, and in
+     fact returning the remainder is all the `div_ui' functions do.  For
+     `tdiv' and `cdiv' the remainder can be negative, so for those the
+     return value is the absolute value of the remainder.
+
+     For the `2exp' variants the divisor is 2^B.  These functions are
+     implemented as right shifts and bit masks, but of course they
+     round the same as the other functions.
+
+     For positive N both `mpz_fdiv_q_2exp' and `mpz_tdiv_q_2exp' are
+     simple bitwise right shifts.  For negative N, `mpz_fdiv_q_2exp' is
+     effectively an arithmetic right shift treating N as twos complement
+     the same as the bitwise logical functions do, whereas
+     `mpz_tdiv_q_2exp' effectively treats N as sign and magnitude.
+
+ -- Function: void mpz_mod (mpz_t R, mpz_t N, mpz_t D)
+ -- Function: unsigned long int mpz_mod_ui (mpz_t R, mpz_t N,
+          unsigned long int D)
+     Set R to N `mod' D.  The sign of the divisor is ignored; the
+     result is always non-negative.
+
+     `mpz_mod_ui' is identical to `mpz_fdiv_r_ui' above, returning the
+     remainder as well as setting R.  See `mpz_fdiv_ui' above if only
+     the return value is wanted.
+
+ -- Function: void mpz_divexact (mpz_t Q, mpz_t N, mpz_t D)
+ -- Function: void mpz_divexact_ui (mpz_t Q, mpz_t N, unsigned long D)
+     Set Q to N/D.  These functions produce correct results only when
+     it is known in advance that D divides N.
+
+     These routines are much faster than the other division functions,
+     and are the best choice when exact division is known to occur, for
+     example reducing a rational to lowest terms.
+
+ -- Function: int mpz_divisible_p (mpz_t N, mpz_t D)
+ -- Function: int mpz_divisible_ui_p (mpz_t N, unsigned long int D)
+ -- Function: int mpz_divisible_2exp_p (mpz_t N, mp_bitcnt_t B)
+     Return non-zero if N is exactly divisible by D, or in the case of
+     `mpz_divisible_2exp_p' by 2^B.
+
+     N is divisible by D if there exists an integer Q satisfying N =
+     Q*D.  Unlike the other division functions, D=0 is accepted and
+     following the rule it can be seen that only 0 is considered
+     divisible by 0.
+
+ -- Function: int mpz_congruent_p (mpz_t N, mpz_t C, mpz_t D)
+ -- Function: int mpz_congruent_ui_p (mpz_t N, unsigned long int C,
+          unsigned long int D)
+ -- Function: int mpz_congruent_2exp_p (mpz_t N, mpz_t C, mp_bitcnt_t B)
+     Return non-zero if N is congruent to C modulo D, or in the case of
+     `mpz_congruent_2exp_p' modulo 2^B.
+
+     N is congruent to C mod D if there exists an integer Q satisfying
+     N = C + Q*D.  Unlike the other division functions, D=0 is accepted
+     and following the rule it can be seen that N and C are considered
+     congruent mod 0 only when exactly equal.
+
+\1f
+File: gmp.info,  Node: Integer Exponentiation,  Next: Integer Roots,  Prev: Integer Division,  Up: Integer Functions
+
+5.7 Exponentiation Functions
+============================
+
+ -- Function: void mpz_powm (mpz_t ROP, mpz_t BASE, mpz_t EXP, mpz_t
+          MOD)
+ -- Function: void mpz_powm_ui (mpz_t ROP, mpz_t BASE, unsigned long
+          int EXP, mpz_t MOD)
+     Set ROP to (BASE raised to EXP) modulo MOD.
+
+     Negative EXP is supported if an inverse BASE^-1 mod MOD exists
+     (see `mpz_invert' in *note Number Theoretic Functions::).  If an
+     inverse doesn't exist then a divide by zero is raised.
+
+ -- Function: void mpz_powm_sec (mpz_t ROP, mpz_t BASE, mpz_t EXP,
+          mpz_t MOD)
+     Set ROP to (BASE raised to EXP) modulo MOD.
+
+     It is required that EXP > 0 and that MOD is odd.
+
+     This function is designed to take the same time and have the same
+     cache access patterns for any two same-size arguments, assuming
+     that function arguments are placed at the same position and that
+     the machine state is identical upon function entry.  This function
+     is intended for cryptographic purposes, where resilience to
+     side-channel attacks is desired.
+
+ -- Function: void mpz_pow_ui (mpz_t ROP, mpz_t BASE, unsigned long int
+          EXP)
+ -- Function: void mpz_ui_pow_ui (mpz_t ROP, unsigned long int BASE,
+          unsigned long int EXP)
+     Set ROP to BASE raised to EXP.  The case 0^0 yields 1.
+
+\1f
+File: gmp.info,  Node: Integer Roots,  Next: Number Theoretic Functions,  Prev: Integer Exponentiation,  Up: Integer Functions
+
+5.8 Root Extraction Functions
+=============================
+
+ -- Function: int mpz_root (mpz_t ROP, mpz_t OP, unsigned long int N)
+     Set ROP to  the truncated integer part of the Nth root of OP.
+     Return non-zero if the computation was exact, i.e., if OP is ROP
+     to the Nth power.
+
+ -- Function: void mpz_rootrem (mpz_t ROOT, mpz_t REM, mpz_t U,
+          unsigned long int N)
+     Set ROOT to  the truncated integer part of the Nth root of U.  Set
+     REM to the remainder, U-ROOT**N.
+
+ -- Function: void mpz_sqrt (mpz_t ROP, mpz_t OP)
+     Set ROP to  the truncated integer part of the square root of OP.
+
+ -- Function: void mpz_sqrtrem (mpz_t ROP1, mpz_t ROP2, mpz_t OP)
+     Set ROP1 to the truncated integer part of the square root of OP,
+     like `mpz_sqrt'.  Set ROP2 to the remainder OP-ROP1*ROP1, which
+     will be zero if OP is a perfect square.
+
+     If ROP1 and ROP2 are the same variable, the results are undefined.
+
+ -- Function: int mpz_perfect_power_p (mpz_t OP)
+     Return non-zero if OP is a perfect power, i.e., if there exist
+     integers A and B, with B>1, such that OP equals A raised to the
+     power B.
+
+     Under this definition both 0 and 1 are considered to be perfect
+     powers.  Negative values of OP are accepted, but of course can
+     only be odd perfect powers.
+
+ -- Function: int mpz_perfect_square_p (mpz_t OP)
+     Return non-zero if OP is a perfect square, i.e., if the square
+     root of OP is an integer.  Under this definition both 0 and 1 are
+     considered to be perfect squares.
+
+\1f
+File: gmp.info,  Node: Number Theoretic Functions,  Next: Integer Comparisons,  Prev: Integer Roots,  Up: Integer Functions
+
+5.9 Number Theoretic Functions
+==============================
+
+ -- Function: int mpz_probab_prime_p (mpz_t N, int REPS)
+     Determine whether N is prime.  Return 2 if N is definitely prime,
+     return 1 if N is probably prime (without being certain), or return
+     0 if N is definitely composite.
+
+     This function does some trial divisions, then some Miller-Rabin
+     probabilistic primality tests.  REPS controls how many such tests
+     are done, 5 to 10 is a reasonable number, more will reduce the
+     chances of a composite being returned as "probably prime".
+
+     Miller-Rabin and similar tests can be more properly called
+     compositeness tests.  Numbers which fail are known to be composite
+     but those which pass might be prime or might be composite.  Only a
+     few composites pass, hence those which pass are considered
+     probably prime.
+
+ -- Function: void mpz_nextprime (mpz_t ROP, mpz_t OP)
+     Set ROP to the next prime greater than OP.
+
+     This function uses a probabilistic algorithm to identify primes.
+     For practical purposes it's adequate, the chance of a composite
+     passing will be extremely small.
+
+ -- Function: void mpz_gcd (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+     Set ROP to the greatest common divisor of OP1 and OP2.  The result
+     is always positive even if one or both input operands are negative.
+     Except if both inputs are zero; then this function defines
+     gcd(0,0) = 0.
+
+ -- Function: unsigned long int mpz_gcd_ui (mpz_t ROP, mpz_t OP1,
+          unsigned long int OP2)
+     Compute the greatest common divisor of OP1 and OP2.  If ROP is not
+     `NULL', store the result there.
+
+     If the result is small enough to fit in an `unsigned long int', it
+     is returned.  If the result does not fit, 0 is returned, and the
+     result is equal to the argument OP1.  Note that the result will
+     always fit if OP2 is non-zero.
+
+ -- Function: void mpz_gcdext (mpz_t G, mpz_t S, mpz_t T, mpz_t A,
+          mpz_t B)
+     Set G to the greatest common divisor of A and B, and in addition
+     set S and T to coefficients satisfying A*S + B*T = G.  The value
+     in G is always positive, even if one or both of A and B are
+     negative (or zero if both inputs are zero).  The values in S and T
+     are chosen such that normally, abs(S) < abs(B) / (2 G) and abs(T)
+     < abs(A) / (2 G), and these relations define S and T uniquely.
+     There are a few exceptional cases:
+
+     If abs(A) = abs(B), then S = 0, T = sgn(B).
+
+     Otherwise, S = sgn(A) if B = 0 or abs(B) = 2 G, and T = sgn(B) if
+     A = 0 or abs(A) = 2 G.
+
+     In all cases, S = 0 if and only if G = abs(B), i.e., if B divides
+     A or A = B = 0.
+
+     If T is `NULL' then that value is not computed.
+
+ -- Function: void mpz_lcm (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_lcm_ui (mpz_t ROP, mpz_t OP1, unsigned long OP2)
+     Set ROP to the least common multiple of OP1 and OP2.  ROP is
+     always positive, irrespective of the signs of OP1 and OP2.  ROP
+     will be zero if either OP1 or OP2 is zero.
+
+ -- Function: int mpz_invert (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+     Compute the inverse of OP1 modulo OP2 and put the result in ROP.
+     If the inverse exists, the return value is non-zero and ROP will
+     satisfy 0 < ROP < abs(OP2).  If an inverse doesn't exist the
+     return value is zero and ROP is undefined.  The behaviour of this
+     function is undefined when OP2 is zero.
+
+ -- Function: int mpz_jacobi (mpz_t A, mpz_t B)
+     Calculate the Jacobi symbol (A/B).  This is defined only for B odd.
+
+ -- Function: int mpz_legendre (mpz_t A, mpz_t P)
+     Calculate the Legendre symbol (A/P).  This is defined only for P
+     an odd positive prime, and for such P it's identical to the Jacobi
+     symbol.
+
+ -- Function: int mpz_kronecker (mpz_t A, mpz_t B)
+ -- Function: int mpz_kronecker_si (mpz_t A, long B)
+ -- Function: int mpz_kronecker_ui (mpz_t A, unsigned long B)
+ -- Function: int mpz_si_kronecker (long A, mpz_t B)
+ -- Function: int mpz_ui_kronecker (unsigned long A, mpz_t B)
+     Calculate the Jacobi symbol (A/B) with the Kronecker extension
+     (a/2)=(2/a) when a odd, or (a/2)=0 when a even.
+
+     When B is odd the Jacobi symbol and Kronecker symbol are
+     identical, so `mpz_kronecker_ui' etc can be used for mixed
+     precision Jacobi symbols too.
+
+     For more information see Henri Cohen section 1.4.2 (*note
+     References::), or any number theory textbook.  See also the
+     example program `demos/qcn.c' which uses `mpz_kronecker_ui'.
+
+ -- Function: mp_bitcnt_t mpz_remove (mpz_t ROP, mpz_t OP, mpz_t F)
+     Remove all occurrences of the factor F from OP and store the
+     result in ROP.  The return value is how many such occurrences were
+     removed.
+
+ -- Function: void mpz_fac_ui (mpz_t ROP, unsigned long int OP)
+     Set ROP to OP!, the factorial of OP.
+
+ -- Function: void mpz_bin_ui (mpz_t ROP, mpz_t N, unsigned long int K)
+ -- Function: void mpz_bin_uiui (mpz_t ROP, unsigned long int N,
+          unsigned long int K)
+     Compute the binomial coefficient N over K and store the result in
+     ROP.  Negative values of N are supported by `mpz_bin_ui', using
+     the identity bin(-n,k) = (-1)^k * bin(n+k-1,k), see Knuth volume 1
+     section 1.2.6 part G.
+
+ -- Function: void mpz_fib_ui (mpz_t FN, unsigned long int N)
+ -- Function: void mpz_fib2_ui (mpz_t FN, mpz_t FNSUB1, unsigned long
+          int N)
+     `mpz_fib_ui' sets FN to to F[n], the N'th Fibonacci number.
+     `mpz_fib2_ui' sets FN to F[n], and FNSUB1 to F[n-1].
+
+     These functions are designed for calculating isolated Fibonacci
+     numbers.  When a sequence of values is wanted it's best to start
+     with `mpz_fib2_ui' and iterate the defining F[n+1]=F[n]+F[n-1] or
+     similar.
+
+ -- Function: void mpz_lucnum_ui (mpz_t LN, unsigned long int N)
+ -- Function: void mpz_lucnum2_ui (mpz_t LN, mpz_t LNSUB1, unsigned
+          long int N)
+     `mpz_lucnum_ui' sets LN to to L[n], the N'th Lucas number.
+     `mpz_lucnum2_ui' sets LN to L[n], and LNSUB1 to L[n-1].
+
+     These functions are designed for calculating isolated Lucas
+     numbers.  When a sequence of values is wanted it's best to start
+     with `mpz_lucnum2_ui' and iterate the defining L[n+1]=L[n]+L[n-1]
+     or similar.
+
+     The Fibonacci numbers and Lucas numbers are related sequences, so
+     it's never necessary to call both `mpz_fib2_ui' and
+     `mpz_lucnum2_ui'.  The formulas for going from Fibonacci to Lucas
+     can be found in *note Lucas Numbers Algorithm::, the reverse is
+     straightforward too.
+
+\1f
+File: gmp.info,  Node: Integer Comparisons,  Next: Integer Logic and Bit Fiddling,  Prev: Number Theoretic Functions,  Up: Integer Functions
+
+5.10 Comparison Functions
+=========================
+
+ -- Function: int mpz_cmp (mpz_t OP1, mpz_t OP2)
+ -- Function: int mpz_cmp_d (mpz_t OP1, double OP2)
+ -- Macro: int mpz_cmp_si (mpz_t OP1, signed long int OP2)
+ -- Macro: int mpz_cmp_ui (mpz_t OP1, unsigned long int OP2)
+     Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
+     if OP1 = OP2, or a negative value if OP1 < OP2.
+
+     `mpz_cmp_ui' and `mpz_cmp_si' are macros and will evaluate their
+     arguments more than once.  `mpz_cmp_d' can be called with an
+     infinity, but results are undefined for a NaN.
+
+ -- Function: int mpz_cmpabs (mpz_t OP1, mpz_t OP2)
+ -- Function: int mpz_cmpabs_d (mpz_t OP1, double OP2)
+ -- Function: int mpz_cmpabs_ui (mpz_t OP1, unsigned long int OP2)
+     Compare the absolute values of OP1 and OP2.  Return a positive
+     value if abs(OP1) > abs(OP2), zero if abs(OP1) = abs(OP2), or a
+     negative value if abs(OP1) < abs(OP2).
+
+     `mpz_cmpabs_d' can be called with an infinity, but results are
+     undefined for a NaN.
+
+ -- Macro: int mpz_sgn (mpz_t OP)
+     Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
+
+     This function is actually implemented as a macro.  It evaluates
+     its argument multiple times.
+
+\1f
+File: gmp.info,  Node: Integer Logic and Bit Fiddling,  Next: I/O of Integers,  Prev: Integer Comparisons,  Up: Integer Functions
+
+5.11 Logical and Bit Manipulation Functions
+===========================================
+
+These functions behave as if twos complement arithmetic were used
+(although sign-magnitude is the actual implementation).  The least
+significant bit is number 0.
+
+ -- Function: void mpz_and (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+     Set ROP to OP1 bitwise-and OP2.
+
+ -- Function: void mpz_ior (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+     Set ROP to OP1 bitwise inclusive-or OP2.
+
+ -- Function: void mpz_xor (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+     Set ROP to OP1 bitwise exclusive-or OP2.
+
+ -- Function: void mpz_com (mpz_t ROP, mpz_t OP)
+     Set ROP to the one's complement of OP.
+
+ -- Function: mp_bitcnt_t mpz_popcount (mpz_t OP)
+     If OP>=0, return the population count of OP, which is the number
+     of 1 bits in the binary representation.  If OP<0, the number of 1s
+     is infinite, and the return value is the largest possible
+     `mp_bitcnt_t'.
+
+ -- Function: mp_bitcnt_t mpz_hamdist (mpz_t OP1, mpz_t OP2)
+     If OP1 and OP2 are both >=0 or both <0, return the hamming
+     distance between the two operands, which is the number of bit
+     positions where OP1 and OP2 have different bit values.  If one
+     operand is >=0 and the other <0 then the number of bits different
+     is infinite, and the return value is the largest possible
+     `mp_bitcnt_t'.
+
+ -- Function: mp_bitcnt_t mpz_scan0 (mpz_t OP, mp_bitcnt_t STARTING_BIT)
+ -- Function: mp_bitcnt_t mpz_scan1 (mpz_t OP, mp_bitcnt_t STARTING_BIT)
+     Scan OP, starting from bit STARTING_BIT, towards more significant
+     bits, until the first 0 or 1 bit (respectively) is found.  Return
+     the index of the found bit.
+
+     If the bit at STARTING_BIT is already what's sought, then
+     STARTING_BIT is returned.
+
+     If there's no bit found, then the largest possible `mp_bitcnt_t' is
+     returned.  This will happen in `mpz_scan0' past the end of a
+     negative number, or `mpz_scan1' past the end of a nonnegative
+     number.
+
+ -- Function: void mpz_setbit (mpz_t ROP, mp_bitcnt_t BIT_INDEX)
+     Set bit BIT_INDEX in ROP.
+
+ -- Function: void mpz_clrbit (mpz_t ROP, mp_bitcnt_t BIT_INDEX)
+     Clear bit BIT_INDEX in ROP.
+
+ -- Function: void mpz_combit (mpz_t ROP, mp_bitcnt_t BIT_INDEX)
+     Complement bit BIT_INDEX in ROP.
+
+ -- Function: int mpz_tstbit (mpz_t OP, mp_bitcnt_t BIT_INDEX)
+     Test bit BIT_INDEX in OP and return 0 or 1 accordingly.
+
+\1f
+File: gmp.info,  Node: I/O of Integers,  Next: Integer Random Numbers,  Prev: Integer Logic and Bit Fiddling,  Up: Integer Functions
+
+5.12 Input and Output Functions
+===============================
+
+Functions that perform input from a stdio stream, and functions that
+output to a stdio stream, of `mpz' numbers.  Passing a `NULL' pointer
+for a STREAM argument to any of these functions will make them read from
+`stdin' and write to `stdout', respectively.
+
+   When using any of these functions, it is a good idea to include
+`stdio.h' before `gmp.h', since that will allow `gmp.h' to define
+prototypes for these functions.
+
+   See also *note Formatted Output:: and *note Formatted Input::.
+
+ -- Function: size_t mpz_out_str (FILE *STREAM, int BASE, mpz_t OP)
+     Output OP on stdio stream STREAM, as a string of digits in base
+     BASE.  The base argument may vary from 2 to 62 or from -2 to -36.
+
+     For BASE in the range 2..36, digits and lower-case letters are
+     used; for -2..-36, digits and upper-case letters are used; for
+     37..62, digits, upper-case letters, and lower-case letters (in
+     that significance order) are used.
+
+     Return the number of bytes written, or if an error occurred,
+     return 0.
+
+ -- Function: size_t mpz_inp_str (mpz_t ROP, FILE *STREAM, int BASE)
+     Input a possibly white-space preceded string in base BASE from
+     stdio stream STREAM, and put the read integer in ROP.
+
+     The BASE may vary from 2 to 62, or if BASE is 0, then the leading
+     characters are used: `0x' and `0X' for hexadecimal, `0b' and `0B'
+     for binary, `0' for octal, or decimal otherwise.
+
+     For bases up to 36, case is ignored; upper-case and lower-case
+     letters have the same value.  For bases 37 to 62, upper-case
+     letter represent the usual 10..35 while lower-case letter
+     represent 36..61.
+
+     Return the number of bytes read, or if an error occurred, return 0.
+
+ -- Function: size_t mpz_out_raw (FILE *STREAM, mpz_t OP)
+     Output OP on stdio stream STREAM, in raw binary format.  The
+     integer is written in a portable format, with 4 bytes of size
+     information, and that many bytes of limbs.  Both the size and the
+     limbs are written in decreasing significance order (i.e., in
+     big-endian).
+
+     The output can be read with `mpz_inp_raw'.
+
+     Return the number of bytes written, or if an error occurred,
+     return 0.
+
+     The output of this can not be read by `mpz_inp_raw' from GMP 1,
+     because of changes necessary for compatibility between 32-bit and
+     64-bit machines.
+
+ -- Function: size_t mpz_inp_raw (mpz_t ROP, FILE *STREAM)
+     Input from stdio stream STREAM in the format written by
+     `mpz_out_raw', and put the result in ROP.  Return the number of
+     bytes read, or if an error occurred, return 0.
+
+     This routine can read the output from `mpz_out_raw' also from GMP
+     1, in spite of changes necessary for compatibility between 32-bit
+     and 64-bit machines.
+
+\1f
+File: gmp.info,  Node: Integer Random Numbers,  Next: Integer Import and Export,  Prev: I/O of Integers,  Up: Integer Functions
+
+5.13 Random Number Functions
+============================
+
+The random number functions of GMP come in two groups; older function
+that rely on a global state, and newer functions that accept a state
+parameter that is read and modified.  Please see the *note Random
+Number Functions:: for more information on how to use and not to use
+random number functions.
+
+ -- Function: void mpz_urandomb (mpz_t ROP, gmp_randstate_t STATE,
+          mp_bitcnt_t N)
+     Generate a uniformly distributed random integer in the range 0 to
+     2^N-1, inclusive.
+
+     The variable STATE must be initialized by calling one of the
+     `gmp_randinit' functions (*note Random State Initialization::)
+     before invoking this function.
+
+ -- Function: void mpz_urandomm (mpz_t ROP, gmp_randstate_t STATE,
+          mpz_t N)
+     Generate a uniform random integer in the range 0 to N-1, inclusive.
+
+     The variable STATE must be initialized by calling one of the
+     `gmp_randinit' functions (*note Random State Initialization::)
+     before invoking this function.
+
+ -- Function: void mpz_rrandomb (mpz_t ROP, gmp_randstate_t STATE,
+          mp_bitcnt_t N)
+     Generate a random integer with long strings of zeros and ones in
+     the binary representation.  Useful for testing functions and
+     algorithms, since this kind of random numbers have proven to be
+     more likely to trigger corner-case bugs.  The random number will
+     be in the range 0 to 2^N-1, inclusive.
+
+     The variable STATE must be initialized by calling one of the
+     `gmp_randinit' functions (*note Random State Initialization::)
+     before invoking this function.
+
+ -- Function: void mpz_random (mpz_t ROP, mp_size_t MAX_SIZE)
+     Generate a random integer of at most MAX_SIZE limbs.  The generated
+     random number doesn't satisfy any particular requirements of
+     randomness.  Negative random numbers are generated when MAX_SIZE
+     is negative.
+
+     This function is obsolete.  Use `mpz_urandomb' or `mpz_urandomm'
+     instead.
+
+ -- Function: void mpz_random2 (mpz_t ROP, mp_size_t MAX_SIZE)
+     Generate a random integer of at most MAX_SIZE limbs, with long
+     strings of zeros and ones in the binary representation.  Useful
+     for testing functions and algorithms, since this kind of random
+     numbers have proven to be more likely to trigger corner-case bugs.
+     Negative random numbers are generated when MAX_SIZE is negative.
+
+     This function is obsolete.  Use `mpz_rrandomb' instead.
+
+\1f
+File: gmp.info,  Node: Integer Import and Export,  Next: Miscellaneous Integer Functions,  Prev: Integer Random Numbers,  Up: Integer Functions
+
+5.14 Integer Import and Export
+==============================
+
+`mpz_t' variables can be converted to and from arbitrary words of binary
+data with the following functions.
+
+ -- Function: void mpz_import (mpz_t ROP, size_t COUNT, int ORDER,
+          size_t SIZE, int ENDIAN, size_t NAILS, const void *OP)
+     Set ROP from an array of word data at OP.
+
+     The parameters specify the format of the data.  COUNT many words
+     are read, each SIZE bytes.  ORDER can be 1 for most significant
+     word first or -1 for least significant first.  Within each word
+     ENDIAN can be 1 for most significant byte first, -1 for least
+     significant first, or 0 for the native endianness of the host CPU.
+     The most significant NAILS bits of each word are skipped, this can
+     be 0 to use the full words.
+
+     There is no sign taken from the data, ROP will simply be a positive
+     integer.  An application can handle any sign itself, and apply it
+     for instance with `mpz_neg'.
+
+     There are no data alignment restrictions on OP, any address is
+     allowed.
+
+     Here's an example converting an array of `unsigned long' data, most
+     significant element first, and host byte order within each value.
+
+          unsigned long  a[20];
+          /* Initialize Z and A */
+          mpz_import (z, 20, 1, sizeof(a[0]), 0, 0, a);
+
+     This example assumes the full `sizeof' bytes are used for data in
+     the given type, which is usually true, and certainly true for
+     `unsigned long' everywhere we know of.  However on Cray vector
+     systems it may be noted that `short' and `int' are always stored
+     in 8 bytes (and with `sizeof' indicating that) but use only 32 or
+     46 bits.  The NAILS feature can account for this, by passing for
+     instance `8*sizeof(int)-INT_BIT'.
+
+ -- Function: void * mpz_export (void *ROP, size_t *COUNTP, int ORDER,
+          size_t SIZE, int ENDIAN, size_t NAILS, mpz_t OP)
+     Fill ROP with word data from OP.
+
+     The parameters specify the format of the data produced.  Each word
+     will be SIZE bytes and ORDER can be 1 for most significant word
+     first or -1 for least significant first.  Within each word ENDIAN
+     can be 1 for most significant byte first, -1 for least significant
+     first, or 0 for the native endianness of the host CPU.  The most
+     significant NAILS bits of each word are unused and set to zero,
+     this can be 0 to produce full words.
+
+     The number of words produced is written to `*COUNTP', or COUNTP
+     can be `NULL' to discard the count.  ROP must have enough space
+     for the data, or if ROP is `NULL' then a result array of the
+     necessary size is allocated using the current GMP allocation
+     function (*note Custom Allocation::).  In either case the return
+     value is the destination used, either ROP or the allocated block.
+
+     If OP is non-zero then the most significant word produced will be
+     non-zero.  If OP is zero then the count returned will be zero and
+     nothing written to ROP.  If ROP is `NULL' in this case, no block
+     is allocated, just `NULL' is returned.
+
+     The sign of OP is ignored, just the absolute value is exported.  An
+     application can use `mpz_sgn' to get the sign and handle it as
+     desired.  (*note Integer Comparisons::)
+
+     There are no data alignment restrictions on ROP, any address is
+     allowed.
+
+     When an application is allocating space itself the required size
+     can be determined with a calculation like the following.  Since
+     `mpz_sizeinbase' always returns at least 1, `count' here will be
+     at least one, which avoids any portability problems with
+     `malloc(0)', though if `z' is zero no space at all is actually
+     needed (or written).
+
+          numb = 8*size - nail;
+          count = (mpz_sizeinbase (z, 2) + numb-1) / numb;
+          p = malloc (count * size);
+
+\1f
+File: gmp.info,  Node: Miscellaneous Integer Functions,  Next: Integer Special Functions,  Prev: Integer Import and Export,  Up: Integer Functions
+
+5.15 Miscellaneous Functions
+============================
+
+ -- Function: int mpz_fits_ulong_p (mpz_t OP)
+ -- Function: int mpz_fits_slong_p (mpz_t OP)
+ -- Function: int mpz_fits_uint_p (mpz_t OP)
+ -- Function: int mpz_fits_sint_p (mpz_t OP)
+ -- Function: int mpz_fits_ushort_p (mpz_t OP)
+ -- Function: int mpz_fits_sshort_p (mpz_t OP)
+     Return non-zero iff the value of OP fits in an `unsigned long int',
+     `signed long int', `unsigned int', `signed int', `unsigned short
+     int', or `signed short int', respectively.  Otherwise, return zero.
+
+ -- Macro: int mpz_odd_p (mpz_t OP)
+ -- Macro: int mpz_even_p (mpz_t OP)
+     Determine whether OP is odd or even, respectively.  Return
+     non-zero if yes, zero if no.  These macros evaluate their argument
+     more than once.
+
+ -- Function: size_t mpz_sizeinbase (mpz_t OP, int BASE)
+     Return the size of OP measured in number of digits in the given
+     BASE.  BASE can vary from 2 to 62.  The sign of OP is ignored,
+     just the absolute value is used.  The result will be either exact
+     or 1 too big.  If BASE is a power of 2, the result is always
+     exact.  If OP is zero the return value is always 1.
+
+     This function can be used to determine the space required when
+     converting OP to a string.  The right amount of allocation is
+     normally two more than the value returned by `mpz_sizeinbase', one
+     extra for a minus sign and one for the null-terminator.
+
+     It will be noted that `mpz_sizeinbase(OP,2)' can be used to locate
+     the most significant 1 bit in OP, counting from 1.  (Unlike the
+     bitwise functions which start from 0, *Note Logical and Bit
+     Manipulation Functions: Integer Logic and Bit Fiddling.)
+
+\1f
+File: gmp.info,  Node: Integer Special Functions,  Prev: Miscellaneous Integer Functions,  Up: Integer Functions
+
+5.16 Special Functions
+======================
+
+The functions in this section are for various special purposes.  Most
+applications will not need them.
+
+ -- Function: void mpz_array_init (mpz_t INTEGER_ARRAY, mp_size_t
+          ARRAY_SIZE, mp_size_t FIXED_NUM_BITS)
+     This is a special type of initialization.  *Fixed* space of
+     FIXED_NUM_BITS is allocated to each of the ARRAY_SIZE integers in
+     INTEGER_ARRAY.  There is no way to free the storage allocated by
+     this function.  Don't call `mpz_clear'!
+
+     The INTEGER_ARRAY parameter is the first `mpz_t' in the array.  For
+     example,
+
+          mpz_t  arr[20000];
+          mpz_array_init (arr[0], 20000, 512);
+
+     This function is only intended for programs that create a large
+     number of integers and need to reduce memory usage by avoiding the
+     overheads of allocating and reallocating lots of small blocks.  In
+     normal programs this function is not recommended.
+
+     The space allocated to each integer by this function will not be
+     automatically increased, unlike the normal `mpz_init', so an
+     application must ensure it is sufficient for any value stored.
+     The following space requirements apply to various routines,
+
+        * `mpz_abs', `mpz_neg', `mpz_set', `mpz_set_si' and
+          `mpz_set_ui' need room for the value they store.
+
+        * `mpz_add', `mpz_add_ui', `mpz_sub' and `mpz_sub_ui' need room
+          for the larger of the two operands, plus an extra
+          `mp_bits_per_limb'.
+
+        * `mpz_mul', `mpz_mul_ui' and `mpz_mul_si' need room for the sum
+          of the number of bits in their operands, but each rounded up
+          to a multiple of `mp_bits_per_limb'.
+
+        * `mpz_swap' can be used between two array variables, but not
+          between an array and a normal variable.
+
+     For other functions, or if in doubt, the suggestion is to
+     calculate in a regular `mpz_init' variable and copy the result to
+     an array variable with `mpz_set'.
+
+ -- Function: void * _mpz_realloc (mpz_t INTEGER, mp_size_t NEW_ALLOC)
+     Change the space for INTEGER to NEW_ALLOC limbs.  The value in
+     INTEGER is preserved if it fits, or is set to 0 if not.  The return
+     value is not useful to applications and should be ignored.
+
+     `mpz_realloc2' is the preferred way to accomplish allocation
+     changes like this.  `mpz_realloc2' and `_mpz_realloc' are the same
+     except that `_mpz_realloc' takes its size in limbs.
+
+ -- Function: mp_limb_t mpz_getlimbn (mpz_t OP, mp_size_t N)
+     Return limb number N from OP.  The sign of OP is ignored, just the
+     absolute value is used.  The least significant limb is number 0.
+
+     `mpz_size' can be used to find how many limbs make up OP.
+     `mpz_getlimbn' returns zero if N is outside the range 0 to
+     `mpz_size(OP)-1'.
+
+ -- Function: size_t mpz_size (mpz_t OP)
+     Return the size of OP measured in number of limbs.  If OP is zero,
+     the returned value will be zero.
+
+\1f
+File: gmp.info,  Node: Rational Number Functions,  Next: Floating-point Functions,  Prev: Integer Functions,  Up: Top
+
+6 Rational Number Functions
+***************************
+
+This chapter describes the GMP functions for performing arithmetic on
+rational numbers.  These functions start with the prefix `mpq_'.
+
+   Rational numbers are stored in objects of type `mpq_t'.
+
+   All rational arithmetic functions assume operands have a canonical
+form, and canonicalize their result.  The canonical from means that the
+denominator and the numerator have no common factors, and that the
+denominator is positive.  Zero has the unique representation 0/1.
+
+   Pure assignment functions do not canonicalize the assigned variable.
+It is the responsibility of the user to canonicalize the assigned
+variable before any arithmetic operations are performed on that
+variable.
+
+ -- Function: void mpq_canonicalize (mpq_t OP)
+     Remove any factors that are common to the numerator and
+     denominator of OP, and make the denominator positive.
+
+* Menu:
+
+* Initializing Rationals::
+* Rational Conversions::
+* Rational Arithmetic::
+* Comparing Rationals::
+* Applying Integer Functions::
+* I/O of Rationals::
+
+\1f
+File: gmp.info,  Node: Initializing Rationals,  Next: Rational Conversions,  Prev: Rational Number Functions,  Up: Rational Number Functions
+
+6.1 Initialization and Assignment Functions
+===========================================
+
+ -- Function: void mpq_init (mpq_t X)
+     Initialize X and set it to 0/1.  Each variable should normally
+     only be initialized once, or at least cleared out (using the
+     function `mpq_clear') between each initialization.
+
+ -- Function: void mpq_inits (mpq_t X, ...)
+     Initialize a NULL-terminated list of `mpq_t' variables, and set
+     their values to 0/1.
+
+ -- Function: void mpq_clear (mpq_t X)
+     Free the space occupied by X.  Make sure to call this function for
+     all `mpq_t' variables when you are done with them.
+
+ -- Function: void mpq_clears (mpq_t X, ...)
+     Free the space occupied by a NULL-terminated list of `mpq_t'
+     variables.
+
+ -- Function: void mpq_set (mpq_t ROP, mpq_t OP)
+ -- Function: void mpq_set_z (mpq_t ROP, mpz_t OP)
+     Assign ROP from OP.
+
+ -- Function: void mpq_set_ui (mpq_t ROP, unsigned long int OP1,
+          unsigned long int OP2)
+ -- Function: void mpq_set_si (mpq_t ROP, signed long int OP1, unsigned
+          long int OP2)
+     Set the value of ROP to OP1/OP2.  Note that if OP1 and OP2 have
+     common factors, ROP has to be passed to `mpq_canonicalize' before
+     any operations are performed on ROP.
+
+ -- Function: int mpq_set_str (mpq_t ROP, char *STR, int BASE)
+     Set ROP from a null-terminated string STR in the given BASE.
+
+     The string can be an integer like "41" or a fraction like
+     "41/152".  The fraction must be in canonical form (*note Rational
+     Number Functions::), or if not then `mpq_canonicalize' must be
+     called.
+
+     The numerator and optional denominator are parsed the same as in
+     `mpz_set_str' (*note Assigning Integers::).  White space is
+     allowed in the string, and is simply ignored.  The BASE can vary
+     from 2 to 62, or if BASE is 0 then the leading characters are
+     used: `0x' or `0X' for hex, `0b' or `0B' for binary, `0' for
+     octal, or decimal otherwise.  Note that this is done separately
+     for the numerator and denominator, so for instance `0xEF/100' is
+     239/100, whereas `0xEF/0x100' is 239/256.
+
+     The return value is 0 if the entire string is a valid number, or
+     -1 if not.
+
+ -- Function: void mpq_swap (mpq_t ROP1, mpq_t ROP2)
+     Swap the values ROP1 and ROP2 efficiently.
+
+\1f
+File: gmp.info,  Node: Rational Conversions,  Next: Rational Arithmetic,  Prev: Initializing Rationals,  Up: Rational Number Functions
+
+6.2 Conversion Functions
+========================
+
+ -- Function: double mpq_get_d (mpq_t OP)
+     Convert OP to a `double', truncating if necessary (i.e. rounding
+     towards zero).
+
+     If the exponent from the conversion is too big or too small to fit
+     a `double' then the result is system dependent.  For too big an
+     infinity is returned when available.  For too small 0.0 is
+     normally returned.  Hardware overflow, underflow and denorm traps
+     may or may not occur.
+
+ -- Function: void mpq_set_d (mpq_t ROP, double OP)
+ -- Function: void mpq_set_f (mpq_t ROP, mpf_t OP)
+     Set ROP to the value of OP.  There is no rounding, this conversion
+     is exact.
+
+ -- Function: char * mpq_get_str (char *STR, int BASE, mpq_t OP)
+     Convert OP to a string of digits in base BASE.  The base may vary
+     from 2 to 36.  The string will be of the form `num/den', or if the
+     denominator is 1 then just `num'.
+
+     If STR is `NULL', the result string is allocated using the current
+     allocation function (*note Custom Allocation::).  The block will be
+     `strlen(str)+1' bytes, that being exactly enough for the string and
+     null-terminator.
+
+     If STR is not `NULL', it should point to a block of storage large
+     enough for the result, that being
+
+          mpz_sizeinbase (mpq_numref(OP), BASE)
+          + mpz_sizeinbase (mpq_denref(OP), BASE) + 3
+
+     The three extra bytes are for a possible minus sign, possible
+     slash, and the null-terminator.
+
+     A pointer to the result string is returned, being either the
+     allocated block, or the given STR.
+
+\1f
+File: gmp.info,  Node: Rational Arithmetic,  Next: Comparing Rationals,  Prev: Rational Conversions,  Up: Rational Number Functions
+
+6.3 Arithmetic Functions
+========================
+
+ -- Function: void mpq_add (mpq_t SUM, mpq_t ADDEND1, mpq_t ADDEND2)
+     Set SUM to ADDEND1 + ADDEND2.
+
+ -- Function: void mpq_sub (mpq_t DIFFERENCE, mpq_t MINUEND, mpq_t
+          SUBTRAHEND)
+     Set DIFFERENCE to MINUEND - SUBTRAHEND.
+
+ -- Function: void mpq_mul (mpq_t PRODUCT, mpq_t MULTIPLIER, mpq_t
+          MULTIPLICAND)
+     Set PRODUCT to MULTIPLIER times MULTIPLICAND.
+
+ -- Function: void mpq_mul_2exp (mpq_t ROP, mpq_t OP1, mp_bitcnt_t OP2)
+     Set ROP to OP1 times 2 raised to OP2.
+
+ -- Function: void mpq_div (mpq_t QUOTIENT, mpq_t DIVIDEND, mpq_t
+          DIVISOR)
+     Set QUOTIENT to DIVIDEND/DIVISOR.
+
+ -- Function: void mpq_div_2exp (mpq_t ROP, mpq_t OP1, mp_bitcnt_t OP2)
+     Set ROP to OP1 divided by 2 raised to OP2.
+
+ -- Function: void mpq_neg (mpq_t NEGATED_OPERAND, mpq_t OPERAND)
+     Set NEGATED_OPERAND to -OPERAND.
+
+ -- Function: void mpq_abs (mpq_t ROP, mpq_t OP)
+     Set ROP to the absolute value of OP.
+
+ -- Function: void mpq_inv (mpq_t INVERTED_NUMBER, mpq_t NUMBER)
+     Set INVERTED_NUMBER to 1/NUMBER.  If the new denominator is zero,
+     this routine will divide by zero.
+
+\1f
+File: gmp.info,  Node: Comparing Rationals,  Next: Applying Integer Functions,  Prev: Rational Arithmetic,  Up: Rational Number Functions
+
+6.4 Comparison Functions
+========================
+
+ -- Function: int mpq_cmp (mpq_t OP1, mpq_t OP2)
+     Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
+     if OP1 = OP2, and a negative value if OP1 < OP2.
+
+     To determine if two rationals are equal, `mpq_equal' is faster than
+     `mpq_cmp'.
+
+ -- Macro: int mpq_cmp_ui (mpq_t OP1, unsigned long int NUM2, unsigned
+          long int DEN2)
+ -- Macro: int mpq_cmp_si (mpq_t OP1, long int NUM2, unsigned long int
+          DEN2)
+     Compare OP1 and NUM2/DEN2.  Return a positive value if OP1 >
+     NUM2/DEN2, zero if OP1 = NUM2/DEN2, and a negative value if OP1 <
+     NUM2/DEN2.
+
+     NUM2 and DEN2 are allowed to have common factors.
+
+     These functions are implemented as a macros and evaluate their
+     arguments multiple times.
+
+ -- Macro: int mpq_sgn (mpq_t OP)
+     Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
+
+     This function is actually implemented as a macro.  It evaluates its
+     arguments multiple times.
+
+ -- Function: int mpq_equal (mpq_t OP1, mpq_t OP2)
+     Return non-zero if OP1 and OP2 are equal, zero if they are
+     non-equal.  Although `mpq_cmp' can be used for the same purpose,
+     this function is much faster.
+
+\1f
+File: gmp.info,  Node: Applying Integer Functions,  Next: I/O of Rationals,  Prev: Comparing Rationals,  Up: Rational Number Functions
+
+6.5 Applying Integer Functions to Rationals
+===========================================
+
+The set of `mpq' functions is quite small.  In particular, there are few
+functions for either input or output.  The following functions give
+direct access to the numerator and denominator of an `mpq_t'.
+
+   Note that if an assignment to the numerator and/or denominator could
+take an `mpq_t' out of the canonical form described at the start of
+this chapter (*note Rational Number Functions::) then
+`mpq_canonicalize' must be called before any other `mpq' functions are
+applied to that `mpq_t'.
+
+ -- Macro: mpz_t mpq_numref (mpq_t OP)
+ -- Macro: mpz_t mpq_denref (mpq_t OP)
+     Return a reference to the numerator and denominator of OP,
+     respectively.  The `mpz' functions can be used on the result of
+     these macros.
+
+ -- Function: void mpq_get_num (mpz_t NUMERATOR, mpq_t RATIONAL)
+ -- Function: void mpq_get_den (mpz_t DENOMINATOR, mpq_t RATIONAL)
+ -- Function: void mpq_set_num (mpq_t RATIONAL, mpz_t NUMERATOR)
+ -- Function: void mpq_set_den (mpq_t RATIONAL, mpz_t DENOMINATOR)
+     Get or set the numerator or denominator of a rational.  These
+     functions are equivalent to calling `mpz_set' with an appropriate
+     `mpq_numref' or `mpq_denref'.  Direct use of `mpq_numref' or
+     `mpq_denref' is recommended instead of these functions.
+
+\1f
+File: gmp.info,  Node: I/O of Rationals,  Prev: Applying Integer Functions,  Up: Rational Number Functions
+
+6.6 Input and Output Functions
+==============================
+
+Functions that perform input from a stdio stream, and functions that
+output to a stdio stream, of `mpq' numbers.  Passing a `NULL' pointer
+for a STREAM argument to any of these functions will make them read from
+`stdin' and write to `stdout', respectively.
+
+   When using any of these functions, it is a good idea to include
+`stdio.h' before `gmp.h', since that will allow `gmp.h' to define
+prototypes for these functions.
+
+   See also *note Formatted Output:: and *note Formatted Input::.
+
+ -- Function: size_t mpq_out_str (FILE *STREAM, int BASE, mpq_t OP)
+     Output OP on stdio stream STREAM, as a string of digits in base
+     BASE.  The base may vary from 2 to 36.  Output is in the form
+     `num/den' or if the denominator is 1 then just `num'.
+
+     Return the number of bytes written, or if an error occurred,
+     return 0.
+
+ -- Function: size_t mpq_inp_str (mpq_t ROP, FILE *STREAM, int BASE)
+     Read a string of digits from STREAM and convert them to a rational
+     in ROP.  Any initial white-space characters are read and
+     discarded.  Return the number of characters read (including white
+     space), or 0 if a rational could not be read.
+
+     The input can be a fraction like `17/63' or just an integer like
+     `123'.  Reading stops at the first character not in this form, and
+     white space is not permitted within the string.  If the input
+     might not be in canonical form, then `mpq_canonicalize' must be
+     called (*note Rational Number Functions::).
+
+     The BASE can be between 2 and 36, or can be 0 in which case the
+     leading characters of the string determine the base, `0x' or `0X'
+     for hexadecimal, `0' for octal, or decimal otherwise.  The leading
+     characters are examined separately for the numerator and
+     denominator of a fraction, so for instance `0x10/11' is 16/11,
+     whereas `0x10/0x11' is 16/17.
+
+\1f
+File: gmp.info,  Node: Floating-point Functions,  Next: Low-level Functions,  Prev: Rational Number Functions,  Up: Top
+
+7 Floating-point Functions
+**************************
+
+GMP floating point numbers are stored in objects of type `mpf_t' and
+functions operating on them have an `mpf_' prefix.
+
+   The mantissa of each float has a user-selectable precision, limited
+only by available memory.  Each variable has its own precision, and
+that can be increased or decreased at any time.
+
+   The exponent of each float is a fixed precision, one machine word on
+most systems.  In the current implementation the exponent is a count of
+limbs, so for example on a 32-bit system this means a range of roughly
+2^-68719476768 to 2^68719476736, or on a 64-bit system this will be
+greater.  Note however `mpf_get_str' can only return an exponent which
+fits an `mp_exp_t' and currently `mpf_set_str' doesn't accept exponents
+bigger than a `long'.
+
+   Each variable keeps a size for the mantissa data actually in use.
+This means that if a float is exactly represented in only a few bits
+then only those bits will be used in a calculation, even if the
+selected precision is high.
+
+   All calculations are performed to the precision of the destination
+variable.  Each function is defined to calculate with "infinite
+precision" followed by a truncation to the destination precision, but
+of course the work done is only what's needed to determine a result
+under that definition.
+
+   The precision selected for a variable is a minimum value, GMP may
+increase it a little to facilitate efficient calculation.  Currently
+this means rounding up to a whole limb, and then sometimes having a
+further partial limb, depending on the high limb of the mantissa.  But
+applications shouldn't be concerned by such details.
+
+   The mantissa in stored in binary, as might be imagined from the fact
+precisions are expressed in bits.  One consequence of this is that
+decimal fractions like 0.1 cannot be represented exactly.  The same is
+true of plain IEEE `double' floats.  This makes both highly unsuitable
+for calculations involving money or other values that should be exact
+decimal fractions.  (Suitably scaled integers, or perhaps rationals,
+are better choices.)
+
+   `mpf' functions and variables have no special notion of infinity or
+not-a-number, and applications must take care not to overflow the
+exponent or results will be unpredictable.  This might change in a
+future release.
+
+   Note that the `mpf' functions are _not_ intended as a smooth
+extension to IEEE P754 arithmetic.  In particular results obtained on
+one computer often differ from the results on a computer with a
+different word size.
+
+* Menu:
+
+* Initializing Floats::
+* Assigning Floats::
+* Simultaneous Float Init & Assign::
+* Converting Floats::
+* Float Arithmetic::
+* Float Comparison::
+* I/O of Floats::
+* Miscellaneous Float Functions::
+
+\1f
+File: gmp.info,  Node: Initializing Floats,  Next: Assigning Floats,  Prev: Floating-point Functions,  Up: Floating-point Functions
+
+7.1 Initialization Functions
+============================
+
+ -- Function: void mpf_set_default_prec (mp_bitcnt_t PREC)
+     Set the default precision to be *at least* PREC bits.  All
+     subsequent calls to `mpf_init' will use this precision, but
+     previously initialized variables are unaffected.
+
+ -- Function: mp_bitcnt_t mpf_get_default_prec (void)
+     Return the default precision actually used.
+
+   An `mpf_t' object must be initialized before storing the first value
+in it.  The functions `mpf_init' and `mpf_init2' are used for that
+purpose.
+
+ -- Function: void mpf_init (mpf_t X)
+     Initialize X to 0.  Normally, a variable should be initialized
+     once only or at least be cleared, using `mpf_clear', between
+     initializations.  The precision of X is undefined unless a default
+     precision has already been established by a call to
+     `mpf_set_default_prec'.
+
+ -- Function: void mpf_init2 (mpf_t X, mp_bitcnt_t PREC)
+     Initialize X to 0 and set its precision to be *at least* PREC
+     bits.  Normally, a variable should be initialized once only or at
+     least be cleared, using `mpf_clear', between initializations.
+
+ -- Function: void mpf_inits (mpf_t X, ...)
+     Initialize a NULL-terminated list of `mpf_t' variables, and set
+     their values to 0.  The precision of the initialized variables is
+     undefined unless a default precision has already been established
+     by a call to `mpf_set_default_prec'.
+
+ -- Function: void mpf_clear (mpf_t X)
+     Free the space occupied by X.  Make sure to call this function for
+     all `mpf_t' variables when you are done with them.
+
+ -- Function: void mpf_clears (mpf_t X, ...)
+     Free the space occupied by a NULL-terminated list of `mpf_t'
+     variables.
+
+   Here is an example on how to initialize floating-point variables:
+     {
+       mpf_t x, y;
+       mpf_init (x);           /* use default precision */
+       mpf_init2 (y, 256);     /* precision _at least_ 256 bits */
+       ...
+       /* Unless the program is about to exit, do ... */
+       mpf_clear (x);
+       mpf_clear (y);
+     }
+
+   The following three functions are useful for changing the precision
+during a calculation.  A typical use would be for adjusting the
+precision gradually in iterative algorithms like Newton-Raphson, making
+the computation precision closely match the actual accurate part of the
+numbers.
+
+ -- Function: mp_bitcnt_t mpf_get_prec (mpf_t OP)
+     Return the current precision of OP, in bits.
+
+ -- Function: void mpf_set_prec (mpf_t ROP, mp_bitcnt_t PREC)
+     Set the precision of ROP to be *at least* PREC bits.  The value in
+     ROP will be truncated to the new precision.
+
+     This function requires a call to `realloc', and so should not be
+     used in a tight loop.
+
+ -- Function: void mpf_set_prec_raw (mpf_t ROP, mp_bitcnt_t PREC)
+     Set the precision of ROP to be *at least* PREC bits, without
+     changing the memory allocated.
+
+     PREC must be no more than the allocated precision for ROP, that
+     being the precision when ROP was initialized, or in the most recent
+     `mpf_set_prec'.
+
+     The value in ROP is unchanged, and in particular if it had a higher
+     precision than PREC it will retain that higher precision.  New
+     values written to ROP will use the new PREC.
+
+     Before calling `mpf_clear' or the full `mpf_set_prec', another
+     `mpf_set_prec_raw' call must be made to restore ROP to its original
+     allocated precision.  Failing to do so will have unpredictable
+     results.
+
+     `mpf_get_prec' can be used before `mpf_set_prec_raw' to get the
+     original allocated precision.  After `mpf_set_prec_raw' it
+     reflects the PREC value set.
+
+     `mpf_set_prec_raw' is an efficient way to use an `mpf_t' variable
+     at different precisions during a calculation, perhaps to gradually
+     increase precision in an iteration, or just to use various
+     different precisions for different purposes during a calculation.
+
+\1f
+File: gmp.info,  Node: Assigning Floats,  Next: Simultaneous Float Init & Assign,  Prev: Initializing Floats,  Up: Floating-point Functions
+
+7.2 Assignment Functions
+========================
+
+These functions assign new values to already initialized floats (*note
+Initializing Floats::).
+
+ -- Function: void mpf_set (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_set_ui (mpf_t ROP, unsigned long int OP)
+ -- Function: void mpf_set_si (mpf_t ROP, signed long int OP)
+ -- Function: void mpf_set_d (mpf_t ROP, double OP)
+ -- Function: void mpf_set_z (mpf_t ROP, mpz_t OP)
+ -- Function: void mpf_set_q (mpf_t ROP, mpq_t OP)
+     Set the value of ROP from OP.
+
+ -- Function: int mpf_set_str (mpf_t ROP, char *STR, int BASE)
+     Set the value of ROP from the string in STR.  The string is of the
+     form `M@N' or, if the base is 10 or less, alternatively `MeN'.
+     `M' is the mantissa and `N' is the exponent.  The mantissa is
+     always in the specified base.  The exponent is either in the
+     specified base or, if BASE is negative, in decimal.  The decimal
+     point expected is taken from the current locale, on systems
+     providing `localeconv'.
+
+     The argument BASE may be in the ranges 2 to 62, or -62 to -2.
+     Negative values are used to specify that the exponent is in
+     decimal.
+
+     For bases up to 36, case is ignored; upper-case and lower-case
+     letters have the same value; for bases 37 to 62, upper-case letter
+     represent the usual 10..35 while lower-case letter represent
+     36..61.
+
+     Unlike the corresponding `mpz' function, the base will not be
+     determined from the leading characters of the string if BASE is 0.
+     This is so that numbers like `0.23' are not interpreted as octal.
+
+     White space is allowed in the string, and is simply ignored.
+     [This is not really true; white-space is ignored in the beginning
+     of the string and within the mantissa, but not in other places,
+     such as after a minus sign or in the exponent.  We are considering
+     changing the definition of this function, making it fail when
+     there is any white-space in the input, since that makes a lot of
+     sense.  Please tell us your opinion about this change.  Do you
+     really want it to accept "3 14" as meaning 314 as it does now?]
+
+     This function returns 0 if the entire string is a valid number in
+     base BASE.  Otherwise it returns -1.
+
+ -- Function: void mpf_swap (mpf_t ROP1, mpf_t ROP2)
+     Swap ROP1 and ROP2 efficiently.  Both the values and the
+     precisions of the two variables are swapped.
+
+\1f
+File: gmp.info,  Node: Simultaneous Float Init & Assign,  Next: Converting Floats,  Prev: Assigning Floats,  Up: Floating-point Functions
+
+7.3 Combined Initialization and Assignment Functions
+====================================================
+
+For convenience, GMP provides a parallel series of initialize-and-set
+functions which initialize the output and then store the value there.
+These functions' names have the form `mpf_init_set...'
+
+   Once the float has been initialized by any of the `mpf_init_set...'
+functions, it can be used as the source or destination operand for the
+ordinary float functions.  Don't use an initialize-and-set function on
+a variable already initialized!
+
+ -- Function: void mpf_init_set (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_init_set_ui (mpf_t ROP, unsigned long int OP)
+ -- Function: void mpf_init_set_si (mpf_t ROP, signed long int OP)
+ -- Function: void mpf_init_set_d (mpf_t ROP, double OP)
+     Initialize ROP and set its value from OP.
+
+     The precision of ROP will be taken from the active default
+     precision, as set by `mpf_set_default_prec'.
+
+ -- Function: int mpf_init_set_str (mpf_t ROP, char *STR, int BASE)
+     Initialize ROP and set its value from the string in STR.  See
+     `mpf_set_str' above for details on the assignment operation.
+
+     Note that ROP is initialized even if an error occurs.  (I.e., you
+     have to call `mpf_clear' for it.)
+
+     The precision of ROP will be taken from the active default
+     precision, as set by `mpf_set_default_prec'.
+
+\1f
+File: gmp.info,  Node: Converting Floats,  Next: Float Arithmetic,  Prev: Simultaneous Float Init & Assign,  Up: Floating-point Functions
+
+7.4 Conversion Functions
+========================
+
+ -- Function: double mpf_get_d (mpf_t OP)
+     Convert OP to a `double', truncating if necessary (i.e. rounding
+     towards zero).
+
+     If the exponent in OP is too big or too small to fit a `double'
+     then the result is system dependent.  For too big an infinity is
+     returned when available.  For too small 0.0 is normally returned.
+     Hardware overflow, underflow and denorm traps may or may not occur.
+
+ -- Function: double mpf_get_d_2exp (signed long int *EXP, mpf_t OP)
+     Convert OP to a `double', truncating if necessary (i.e. rounding
+     towards zero), and with an exponent returned separately.
+
+     The return value is in the range 0.5<=abs(D)<1 and the exponent is
+     stored to `*EXP'.  D * 2^EXP is the (truncated) OP value.  If OP
+     is zero, the return is 0.0 and 0 is stored to `*EXP'.
+
+     This is similar to the standard C `frexp' function (*note
+     Normalization Functions: (libc)Normalization Functions.).
+
+ -- Function: long mpf_get_si (mpf_t OP)
+ -- Function: unsigned long mpf_get_ui (mpf_t OP)
+     Convert OP to a `long' or `unsigned long', truncating any fraction
+     part.  If OP is too big for the return type, the result is
+     undefined.
+
+     See also `mpf_fits_slong_p' and `mpf_fits_ulong_p' (*note
+     Miscellaneous Float Functions::).
+
+ -- Function: char * mpf_get_str (char *STR, mp_exp_t *EXPPTR, int
+          BASE, size_t N_DIGITS, mpf_t OP)
+     Convert OP to a string of digits in base BASE.  The base argument
+     may vary from 2 to 62 or from -2 to -36.  Up to N_DIGITS digits
+     will be generated.  Trailing zeros are not returned.  No more
+     digits than can be accurately represented by OP are ever
+     generated.  If N_DIGITS is 0 then that accurate maximum number of
+     digits are generated.
+
+     For BASE in the range 2..36, digits and lower-case letters are
+     used; for -2..-36, digits and upper-case letters are used; for
+     37..62, digits, upper-case letters, and lower-case letters (in
+     that significance order) are used.
+
+     If STR is `NULL', the result string is allocated using the current
+     allocation function (*note Custom Allocation::).  The block will be
+     `strlen(str)+1' bytes, that being exactly enough for the string and
+     null-terminator.
+
+     If STR is not `NULL', it should point to a block of N_DIGITS + 2
+     bytes, that being enough for the mantissa, a possible minus sign,
+     and a null-terminator.  When N_DIGITS is 0 to get all significant
+     digits, an application won't be able to know the space required,
+     and STR should be `NULL' in that case.
+
+     The generated string is a fraction, with an implicit radix point
+     immediately to the left of the first digit.  The applicable
+     exponent is written through the EXPPTR pointer.  For example, the
+     number 3.1416 would be returned as string "31416" and exponent 1.
+
+     When OP is zero, an empty string is produced and the exponent
+     returned is 0.
+
+     A pointer to the result string is returned, being either the
+     allocated block or the given STR.
+
+\1f
+File: gmp.info,  Node: Float Arithmetic,  Next: Float Comparison,  Prev: Converting Floats,  Up: Floating-point Functions
+
+7.5 Arithmetic Functions
+========================
+
+ -- Function: void mpf_add (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+ -- Function: void mpf_add_ui (mpf_t ROP, mpf_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1 + OP2.
+
+ -- Function: void mpf_sub (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+ -- Function: void mpf_ui_sub (mpf_t ROP, unsigned long int OP1, mpf_t
+          OP2)
+ -- Function: void mpf_sub_ui (mpf_t ROP, mpf_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1 - OP2.
+
+ -- Function: void mpf_mul (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+ -- Function: void mpf_mul_ui (mpf_t ROP, mpf_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1 times OP2.
+
+   Division is undefined if the divisor is zero, and passing a zero
+divisor to the divide functions will make these functions intentionally
+divide by zero.  This lets the user handle arithmetic exceptions in
+these functions in the same manner as other arithmetic exceptions.
+
+ -- Function: void mpf_div (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+ -- Function: void mpf_ui_div (mpf_t ROP, unsigned long int OP1, mpf_t
+          OP2)
+ -- Function: void mpf_div_ui (mpf_t ROP, mpf_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1/OP2.
+
+ -- Function: void mpf_sqrt (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_sqrt_ui (mpf_t ROP, unsigned long int OP)
+     Set ROP to the square root of OP.
+
+ -- Function: void mpf_pow_ui (mpf_t ROP, mpf_t OP1, unsigned long int
+          OP2)
+     Set ROP to OP1 raised to the power OP2.
+
+ -- Function: void mpf_neg (mpf_t ROP, mpf_t OP)
+     Set ROP to -OP.
+
+ -- Function: void mpf_abs (mpf_t ROP, mpf_t OP)
+     Set ROP to the absolute value of OP.
+
+ -- Function: void mpf_mul_2exp (mpf_t ROP, mpf_t OP1, mp_bitcnt_t OP2)
+     Set ROP to OP1 times 2 raised to OP2.
+
+ -- Function: void mpf_div_2exp (mpf_t ROP, mpf_t OP1, mp_bitcnt_t OP2)
+     Set ROP to OP1 divided by 2 raised to OP2.
+
+\1f
+File: gmp.info,  Node: Float Comparison,  Next: I/O of Floats,  Prev: Float Arithmetic,  Up: Floating-point Functions
+
+7.6 Comparison Functions
+========================
+
+ -- Function: int mpf_cmp (mpf_t OP1, mpf_t OP2)
+ -- Function: int mpf_cmp_d (mpf_t OP1, double OP2)
+ -- Function: int mpf_cmp_ui (mpf_t OP1, unsigned long int OP2)
+ -- Function: int mpf_cmp_si (mpf_t OP1, signed long int OP2)
+     Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
+     if OP1 = OP2, and a negative value if OP1 < OP2.
+
+     `mpf_cmp_d' can be called with an infinity, but results are
+     undefined for a NaN.
+
+ -- Function: int mpf_eq (mpf_t OP1, mpf_t OP2, mp_bitcnt_t op3)
+     Return non-zero if the first OP3 bits of OP1 and OP2 are equal,
+     zero otherwise.  I.e., test if OP1 and OP2 are approximately equal.
+
+     Caution 1: All version of GMP up to version 4.2.4 compared just
+     whole limbs, meaning sometimes more than OP3 bits, sometimes fewer.
+
+     Caution 2: This function will consider XXX11...111 and XX100...000
+     different, even if ... is replaced by a semi-infinite number of
+     bits.  Such numbers are really just one ulp off, and should be
+     considered equal.
+
+ -- Function: void mpf_reldiff (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+     Compute the relative difference between OP1 and OP2 and store the
+     result in ROP.  This is abs(OP1-OP2)/OP1.
+
+ -- Macro: int mpf_sgn (mpf_t OP)
+     Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
+
+     This function is actually implemented as a macro.  It evaluates
+     its arguments multiple times.
+
+\1f
+File: gmp.info,  Node: I/O of Floats,  Next: Miscellaneous Float Functions,  Prev: Float Comparison,  Up: Floating-point Functions
+
+7.7 Input and Output Functions
+==============================
+
+Functions that perform input from a stdio stream, and functions that
+output to a stdio stream, of `mpf' numbers.  Passing a `NULL' pointer
+for a STREAM argument to any of these functions will make them read from
+`stdin' and write to `stdout', respectively.
+
+   When using any of these functions, it is a good idea to include
+`stdio.h' before `gmp.h', since that will allow `gmp.h' to define
+prototypes for these functions.
+
+   See also *note Formatted Output:: and *note Formatted Input::.
+
+ -- Function: size_t mpf_out_str (FILE *STREAM, int BASE, size_t
+          N_DIGITS, mpf_t OP)
+     Print OP to STREAM, as a string of digits.  Return the number of
+     bytes written, or if an error occurred, return 0.
+
+     The mantissa is prefixed with an `0.' and is in the given BASE,
+     which may vary from 2 to 62 or from -2 to -36.  An exponent is
+     then printed, separated by an `e', or if the base is greater than
+     10 then by an `@'.  The exponent is always in decimal.  The
+     decimal point follows the current locale, on systems providing
+     `localeconv'.
+
+     For BASE in the range 2..36, digits and lower-case letters are
+     used; for -2..-36, digits and upper-case letters are used; for
+     37..62, digits, upper-case letters, and lower-case letters (in
+     that significance order) are used.
+
+     Up to N_DIGITS will be printed from the mantissa, except that no
+     more digits than are accurately representable by OP will be
+     printed.  N_DIGITS can be 0 to select that accurate maximum.
+
+ -- Function: size_t mpf_inp_str (mpf_t ROP, FILE *STREAM, int BASE)
+     Read a string in base BASE from STREAM, and put the read float in
+     ROP.  The string is of the form `M@N' or, if the base is 10 or
+     less, alternatively `MeN'.  `M' is the mantissa and `N' is the
+     exponent.  The mantissa is always in the specified base.  The
+     exponent is either in the specified base or, if BASE is negative,
+     in decimal.  The decimal point expected is taken from the current
+     locale, on systems providing `localeconv'.
+
+     The argument BASE may be in the ranges 2 to 36, or -36 to -2.
+     Negative values are used to specify that the exponent is in
+     decimal.
+
+     Unlike the corresponding `mpz' function, the base will not be
+     determined from the leading characters of the string if BASE is 0.
+     This is so that numbers like `0.23' are not interpreted as octal.
+
+     Return the number of bytes read, or if an error occurred, return 0.
+
+\1f
+File: gmp.info,  Node: Miscellaneous Float Functions,  Prev: I/O of Floats,  Up: Floating-point Functions
+
+7.8 Miscellaneous Functions
+===========================
+
+ -- Function: void mpf_ceil (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_floor (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_trunc (mpf_t ROP, mpf_t OP)
+     Set ROP to OP rounded to an integer.  `mpf_ceil' rounds to the
+     next higher integer, `mpf_floor' to the next lower, and `mpf_trunc'
+     to the integer towards zero.
+
+ -- Function: int mpf_integer_p (mpf_t OP)
+     Return non-zero if OP is an integer.
+
+ -- Function: int mpf_fits_ulong_p (mpf_t OP)
+ -- Function: int mpf_fits_slong_p (mpf_t OP)
+ -- Function: int mpf_fits_uint_p (mpf_t OP)
+ -- Function: int mpf_fits_sint_p (mpf_t OP)
+ -- Function: int mpf_fits_ushort_p (mpf_t OP)
+ -- Function: int mpf_fits_sshort_p (mpf_t OP)
+     Return non-zero if OP would fit in the respective C data type, when
+     truncated to an integer.
+
+ -- Function: void mpf_urandomb (mpf_t ROP, gmp_randstate_t STATE,
+          mp_bitcnt_t NBITS)
+     Generate a uniformly distributed random float in ROP, such that 0
+     <= ROP < 1, with NBITS significant bits in the mantissa or less if
+     the precision of ROP is smaller.
+
+     The variable STATE must be initialized by calling one of the
+     `gmp_randinit' functions (*note Random State Initialization::)
+     before invoking this function.
+
+ -- Function: void mpf_random2 (mpf_t ROP, mp_size_t MAX_SIZE, mp_exp_t
+          EXP)
+     Generate a random float of at most MAX_SIZE limbs, with long
+     strings of zeros and ones in the binary representation.  The
+     exponent of the number is in the interval -EXP to EXP (in limbs).
+     This function is useful for testing functions and algorithms,
+     since these kind of random numbers have proven to be more likely
+     to trigger corner-case bugs.  Negative random numbers are
+     generated when MAX_SIZE is negative.
+
+\1f
+File: gmp.info,  Node: Low-level Functions,  Next: Random Number Functions,  Prev: Floating-point Functions,  Up: Top
+
+8 Low-level Functions
+*********************
+
+This chapter describes low-level GMP functions, used to implement the
+high-level GMP functions, but also intended for time-critical user code.
+
+   These functions start with the prefix `mpn_'.
+
+   The `mpn' functions are designed to be as fast as possible, *not* to
+provide a coherent calling interface.  The different functions have
+somewhat similar interfaces, but there are variations that make them
+hard to use.  These functions do as little as possible apart from the
+real multiple precision computation, so that no time is spent on things
+that not all callers need.
+
+   A source operand is specified by a pointer to the least significant
+limb and a limb count.  A destination operand is specified by just a
+pointer.  It is the responsibility of the caller to ensure that the
+destination has enough space for storing the result.
+
+   With this way of specifying operands, it is possible to perform
+computations on subranges of an argument, and store the result into a
+subrange of a destination.
+
+   A common requirement for all functions is that each source area
+needs at least one limb.  No size argument may be zero.  Unless
+otherwise stated, in-place operations are allowed where source and
+destination are the same, but not where they only partly overlap.
+
+   The `mpn' functions are the base for the implementation of the
+`mpz_', `mpf_', and `mpq_' functions.
+
+   This example adds the number beginning at S1P and the number
+beginning at S2P and writes the sum at DESTP.  All areas have N limbs.
+
+     cy = mpn_add_n (destp, s1p, s2p, n)
+
+   It should be noted that the `mpn' functions make no attempt to
+identify high or low zero limbs on their operands, or other special
+forms.  On random data such cases will be unlikely and it'd be wasteful
+for every function to check every time.  An application knowing
+something about its data can take steps to trim or perhaps split its
+calculations.
+
+
+In the notation used below, a source operand is identified by the
+pointer to the least significant limb, and the limb count in braces.
+For example, {S1P, S1N}.
+
+ -- Function: mp_limb_t mpn_add_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Add {S1P, N} and {S2P, N}, and write the N least significant limbs
+     of the result to RP.  Return carry, either 0 or 1.
+
+     This is the lowest-level function for addition.  It is the
+     preferred function for addition, since it is written in assembly
+     for most CPUs.  For addition of a variable to itself (i.e., S1P
+     equals S2P) use `mpn_lshift' with a count of 1 for optimal speed.
+
+ -- Function: mp_limb_t mpn_add_1 (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t N, mp_limb_t S2LIMB)
+     Add {S1P, N} and S2LIMB, and write the N least significant limbs
+     of the result to RP.  Return carry, either 0 or 1.
+
+ -- Function: mp_limb_t mpn_add (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t S1N, const mp_limb_t *S2P, mp_size_t S2N)
+     Add {S1P, S1N} and {S2P, S2N}, and write the S1N least significant
+     limbs of the result to RP.  Return carry, either 0 or 1.
+
+     This function requires that S1N is greater than or equal to S2N.
+
+ -- Function: mp_limb_t mpn_sub_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Subtract {S2P, N} from {S1P, N}, and write the N least significant
+     limbs of the result to RP.  Return borrow, either 0 or 1.
+
+     This is the lowest-level function for subtraction.  It is the
+     preferred function for subtraction, since it is written in
+     assembly for most CPUs.
+
+ -- Function: mp_limb_t mpn_sub_1 (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t N, mp_limb_t S2LIMB)
+     Subtract S2LIMB from {S1P, N}, and write the N least significant
+     limbs of the result to RP.  Return borrow, either 0 or 1.
+
+ -- Function: mp_limb_t mpn_sub (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t S1N, const mp_limb_t *S2P, mp_size_t S2N)
+     Subtract {S2P, S2N} from {S1P, S1N}, and write the S1N least
+     significant limbs of the result to RP.  Return borrow, either 0 or
+     1.
+
+     This function requires that S1N is greater than or equal to S2N.
+
+ -- Function: void mpn_neg (mp_limb_t *RP, const mp_limb_t *SP,
+          mp_size_t N)
+     Perform the negation of {SP, N}, and write the result to {RP, N}.
+     Return carry-out.
+
+ -- Function: void mpn_mul_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Multiply {S1P, N} and {S2P, N}, and write the 2*N-limb result to
+     RP.
+
+     The destination has to have space for 2*N limbs, even if the
+     product's most significant limb is zero.  No overlap is permitted
+     between the destination and either source.
+
+     If the two input operands are the same, use `mpn_sqr'.
+
+ -- Function: mp_limb_t mpn_mul (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t S1N, const mp_limb_t *S2P, mp_size_t S2N)
+     Multiply {S1P, S1N} and {S2P, S2N}, and write the (S1N+S2N)-limb
+     result to RP.  Return the most significant limb of the result.
+
+     The destination has to have space for S1N + S2N limbs, even if the
+     product's most significant limb is zero.  No overlap is permitted
+     between the destination and either source.
+
+     This function requires that S1N is greater than or equal to S2N.
+
+ -- Function: void mpn_sqr (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t N)
+     Compute the square of {S1P, N} and write the 2*N-limb result to RP.
+
+     The destination has to have space for 2*N limbs, even if the
+     result's most significant limb is zero.  No overlap is permitted
+     between the destination and the source.
+
+ -- Function: mp_limb_t mpn_mul_1 (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t N, mp_limb_t S2LIMB)
+     Multiply {S1P, N} by S2LIMB, and write the N least significant
+     limbs of the product to RP.  Return the most significant limb of
+     the product.  {S1P, N} and {RP, N} are allowed to overlap provided
+     RP <= S1P.
+
+     This is a low-level function that is a building block for general
+     multiplication as well as other operations in GMP.  It is written
+     in assembly for most CPUs.
+
+     Don't call this function if S2LIMB is a power of 2; use
+     `mpn_lshift' with a count equal to the logarithm of S2LIMB
+     instead, for optimal speed.
+
+ -- Function: mp_limb_t mpn_addmul_1 (mp_limb_t *RP, const mp_limb_t
+          *S1P, mp_size_t N, mp_limb_t S2LIMB)
+     Multiply {S1P, N} and S2LIMB, and add the N least significant
+     limbs of the product to {RP, N} and write the result to RP.
+     Return the most significant limb of the product, plus carry-out
+     from the addition.
+
+     This is a low-level function that is a building block for general
+     multiplication as well as other operations in GMP.  It is written
+     in assembly for most CPUs.
+
+ -- Function: mp_limb_t mpn_submul_1 (mp_limb_t *RP, const mp_limb_t
+          *S1P, mp_size_t N, mp_limb_t S2LIMB)
+     Multiply {S1P, N} and S2LIMB, and subtract the N least significant
+     limbs of the product from {RP, N} and write the result to RP.
+     Return the most significant limb of the product, plus borrow-out
+     from the subtraction.
+
+     This is a low-level function that is a building block for general
+     multiplication and division as well as other operations in GMP.
+     It is written in assembly for most CPUs.
+
+ -- Function: void mpn_tdiv_qr (mp_limb_t *QP, mp_limb_t *RP, mp_size_t
+          QXN, const mp_limb_t *NP, mp_size_t NN, const mp_limb_t *DP,
+          mp_size_t DN)
+     Divide {NP, NN} by {DP, DN} and put the quotient at {QP, NN-DN+1}
+     and the remainder at {RP, DN}.  The quotient is rounded towards 0.
+
+     No overlap is permitted between arguments, except that NP might
+     equal RP.  The dividend size NN must be greater than or equal to
+     divisor size DN.  The most significant limb of the divisor must be
+     non-zero.  The QXN operand must be zero.
+
+ -- Function: mp_limb_t mpn_divrem (mp_limb_t *R1P, mp_size_t QXN,
+          mp_limb_t *RS2P, mp_size_t RS2N, const mp_limb_t *S3P,
+          mp_size_t S3N)
+     [This function is obsolete.  Please call `mpn_tdiv_qr' instead for
+     best performance.]
+
+     Divide {RS2P, RS2N} by {S3P, S3N}, and write the quotient at R1P,
+     with the exception of the most significant limb, which is
+     returned.  The remainder replaces the dividend at RS2P; it will be
+     S3N limbs long (i.e., as many limbs as the divisor).
+
+     In addition to an integer quotient, QXN fraction limbs are
+     developed, and stored after the integral limbs.  For most usages,
+     QXN will be zero.
+
+     It is required that RS2N is greater than or equal to S3N.  It is
+     required that the most significant bit of the divisor is set.
+
+     If the quotient is not needed, pass RS2P + S3N as R1P.  Aside from
+     that special case, no overlap between arguments is permitted.
+
+     Return the most significant limb of the quotient, either 0 or 1.
+
+     The area at R1P needs to be RS2N - S3N + QXN limbs large.
+
+ -- Function: mp_limb_t mpn_divrem_1 (mp_limb_t *R1P, mp_size_t QXN,
+          mp_limb_t *S2P, mp_size_t S2N, mp_limb_t S3LIMB)
+ -- Macro: mp_limb_t mpn_divmod_1 (mp_limb_t *R1P, mp_limb_t *S2P,
+          mp_size_t S2N, mp_limb_t S3LIMB)
+     Divide {S2P, S2N} by S3LIMB, and write the quotient at R1P.
+     Return the remainder.
+
+     The integer quotient is written to {R1P+QXN, S2N} and in addition
+     QXN fraction limbs are developed and written to {R1P, QXN}.
+     Either or both S2N and QXN can be zero.  For most usages, QXN will
+     be zero.
+
+     `mpn_divmod_1' exists for upward source compatibility and is
+     simply a macro calling `mpn_divrem_1' with a QXN of 0.
+
+     The areas at R1P and S2P have to be identical or completely
+     separate, not partially overlapping.
+
+ -- Function: mp_limb_t mpn_divmod (mp_limb_t *R1P, mp_limb_t *RS2P,
+          mp_size_t RS2N, const mp_limb_t *S3P, mp_size_t S3N)
+     [This function is obsolete.  Please call `mpn_tdiv_qr' instead for
+     best performance.]
+
+ -- Macro: mp_limb_t mpn_divexact_by3 (mp_limb_t *RP, mp_limb_t *SP,
+          mp_size_t N)
+ -- Function: mp_limb_t mpn_divexact_by3c (mp_limb_t *RP, mp_limb_t
+          *SP, mp_size_t N, mp_limb_t CARRY)
+     Divide {SP, N} by 3, expecting it to divide exactly, and writing
+     the result to {RP, N}.  If 3 divides exactly, the return value is
+     zero and the result is the quotient.  If not, the return value is
+     non-zero and the result won't be anything useful.
+
+     `mpn_divexact_by3c' takes an initial carry parameter, which can be
+     the return value from a previous call, so a large calculation can
+     be done piece by piece from low to high.  `mpn_divexact_by3' is
+     simply a macro calling `mpn_divexact_by3c' with a 0 carry
+     parameter.
+
+     These routines use a multiply-by-inverse and will be faster than
+     `mpn_divrem_1' on CPUs with fast multiplication but slow division.
+
+     The source a, result q, size n, initial carry i, and return value
+     c satisfy c*b^n + a-i = 3*q, where b=2^GMP_NUMB_BITS.  The return
+     c is always 0, 1 or 2, and the initial carry i must also be 0, 1
+     or 2 (these are both borrows really).  When c=0 clearly q=(a-i)/3.
+     When c!=0, the remainder (a-i) mod 3 is given by 3-c, because b ==
+     1 mod 3 (when `mp_bits_per_limb' is even, which is always so
+     currently).
+
+ -- Function: mp_limb_t mpn_mod_1 (const mp_limb_t *S1P, mp_size_t S1N,
+          mp_limb_t S2LIMB)
+     Divide {S1P, S1N} by S2LIMB, and return the remainder.  S1N can be
+     zero.
+
+ -- Function: mp_limb_t mpn_lshift (mp_limb_t *RP, const mp_limb_t *SP,
+          mp_size_t N, unsigned int COUNT)
+     Shift {SP, N} left by COUNT bits, and write the result to {RP, N}.
+     The bits shifted out at the left are returned in the least
+     significant COUNT bits of the return value (the rest of the return
+     value is zero).
+
+     COUNT must be in the range 1 to mp_bits_per_limb-1.  The regions
+     {SP, N} and {RP, N} may overlap, provided RP >= SP.
+
+     This function is written in assembly for most CPUs.
+
+ -- Function: mp_limb_t mpn_rshift (mp_limb_t *RP, const mp_limb_t *SP,
+          mp_size_t N, unsigned int COUNT)
+     Shift {SP, N} right by COUNT bits, and write the result to {RP,
+     N}.  The bits shifted out at the right are returned in the most
+     significant COUNT bits of the return value (the rest of the return
+     value is zero).
+
+     COUNT must be in the range 1 to mp_bits_per_limb-1.  The regions
+     {SP, N} and {RP, N} may overlap, provided RP <= SP.
+
+     This function is written in assembly for most CPUs.
+
+ -- Function: int mpn_cmp (const mp_limb_t *S1P, const mp_limb_t *S2P,
+          mp_size_t N)
+     Compare {S1P, N} and {S2P, N} and return a positive value if S1 >
+     S2, 0 if they are equal, or a negative value if S1 < S2.
+
+ -- Function: mp_size_t mpn_gcd (mp_limb_t *RP, mp_limb_t *XP,
+          mp_size_t XN, mp_limb_t *YP, mp_size_t YN)
+     Set {RP, RETVAL} to the greatest common divisor of {XP, XN} and
+     {YP, YN}.  The result can be up to YN limbs, the return value is
+     the actual number produced.  Both source operands are destroyed.
+
+     {XP, XN} must have at least as many bits as {YP, YN}.  {YP, YN}
+     must be odd.  Both operands must have non-zero most significant
+     limbs.  No overlap is permitted between {XP, XN} and {YP, YN}.
+
+ -- Function: mp_limb_t mpn_gcd_1 (const mp_limb_t *XP, mp_size_t XN,
+          mp_limb_t YLIMB)
+     Return the greatest common divisor of {XP, XN} and YLIMB.  Both
+     operands must be non-zero.
+
+ -- Function: mp_size_t mpn_gcdext (mp_limb_t *GP, mp_limb_t *SP,
+          mp_size_t *SN, mp_limb_t *UP, mp_size_t UN, mp_limb_t *VP,
+          mp_size_t VN)
+     Let U be defined by {UP, UN} and let V be defined by {VP, VN}.
+
+     Compute the greatest common divisor G of U and V.  Compute a
+     cofactor S such that G = US + VT.  The second cofactor T is not
+     computed but can easily be obtained from (G - U*S) / V (the
+     division will be exact).  It is required that UN >= VN > 0, and
+     the most significant limb of {VP, VN} must be non-zero.
+
+     S satisfies S = 1 or abs(S) < V / (2 G). S = 0 if and only if V
+     divides U (i.e., G = V).
+
+     Store G at GP and let the return value define its limb count.
+     Store S at SP and let |*SN| define its limb count.  S can be
+     negative; when this happens *SN will be negative.  The area at GP
+     should have room for VN limbs and the area at SP should have room
+     for VN+1 limbs.
+
+     Both source operands are destroyed.
+
+     Compatibility notes: GMP 4.3.0 and 4.3.1 defined S less strictly.
+     Earlier as well as later GMP releases define S as described here.
+     GMP releases before GMP 4.3.0 required additional space for both
+     input and output areas. More precisely, the areas {UP, UN+1} and
+     {VP, VN+1} were destroyed (i.e. the operands plus an extra limb
+     past the end of each), and the areas pointed to by GP and SP
+     should each have room for UN+1 limbs.
+
+ -- Function: mp_size_t mpn_sqrtrem (mp_limb_t *R1P, mp_limb_t *R2P,
+          const mp_limb_t *SP, mp_size_t N)
+     Compute the square root of {SP, N} and put the result at {R1P,
+     ceil(N/2)} and the remainder at {R2P, RETVAL}.  R2P needs space
+     for N limbs, but the return value indicates how many are produced.
+
+     The most significant limb of {SP, N} must be non-zero.  The areas
+     {R1P, ceil(N/2)} and {SP, N} must be completely separate.  The
+     areas {R2P, N} and {SP, N} must be either identical or completely
+     separate.
+
+     If the remainder is not wanted then R2P can be `NULL', and in this
+     case the return value is zero or non-zero according to whether the
+     remainder would have been zero or non-zero.
+
+     A return value of zero indicates a perfect square.  See also
+     `mpz_perfect_square_p'.
+
+ -- Function: mp_size_t mpn_get_str (unsigned char *STR, int BASE,
+          mp_limb_t *S1P, mp_size_t S1N)
+     Convert {S1P, S1N} to a raw unsigned char array at STR in base
+     BASE, and return the number of characters produced.  There may be
+     leading zeros in the string.  The string is not in ASCII; to
+     convert it to printable format, add the ASCII codes for `0' or
+     `A', depending on the base and range.  BASE can vary from 2 to 256.
+
+     The most significant limb of the input {S1P, S1N} must be
+     non-zero.  The input {S1P, S1N} is clobbered, except when BASE is
+     a power of 2, in which case it's unchanged.
+
+     The area at STR has to have space for the largest possible number
+     represented by a S1N long limb array, plus one extra character.
+
+ -- Function: mp_size_t mpn_set_str (mp_limb_t *RP, const unsigned char
+          *STR, size_t STRSIZE, int BASE)
+     Convert bytes {STR,STRSIZE} in the given BASE to limbs at RP.
+
+     STR[0] is the most significant byte and STR[STRSIZE-1] is the
+     least significant.  Each byte should be a value in the range 0 to
+     BASE-1, not an ASCII character.  BASE can vary from 2 to 256.
+
+     The return value is the number of limbs written to RP.  If the most
+     significant input byte is non-zero then the high limb at RP will be
+     non-zero, and only that exact number of limbs will be required
+     there.
+
+     If the most significant input byte is zero then there may be high
+     zero limbs written to RP and included in the return value.
+
+     STRSIZE must be at least 1, and no overlap is permitted between
+     {STR,STRSIZE} and the result at RP.
+
+ -- Function: mp_bitcnt_t mpn_scan0 (const mp_limb_t *S1P, mp_bitcnt_t
+          BIT)
+     Scan S1P from bit position BIT for the next clear bit.
+
+     It is required that there be a clear bit within the area at S1P at
+     or beyond bit position BIT, so that the function has something to
+     return.
+
+ -- Function: mp_bitcnt_t mpn_scan1 (const mp_limb_t *S1P, mp_bitcnt_t
+          BIT)
+     Scan S1P from bit position BIT for the next set bit.
+
+     It is required that there be a set bit within the area at S1P at or
+     beyond bit position BIT, so that the function has something to
+     return.
+
+ -- Function: void mpn_random (mp_limb_t *R1P, mp_size_t R1N)
+ -- Function: void mpn_random2 (mp_limb_t *R1P, mp_size_t R1N)
+     Generate a random number of length R1N and store it at R1P.  The
+     most significant limb is always non-zero.  `mpn_random' generates
+     uniformly distributed limb data, `mpn_random2' generates long
+     strings of zeros and ones in the binary representation.
+
+     `mpn_random2' is intended for testing the correctness of the `mpn'
+     routines.
+
+ -- Function: mp_bitcnt_t mpn_popcount (const mp_limb_t *S1P, mp_size_t
+          N)
+     Count the number of set bits in {S1P, N}.
+
+ -- Function: mp_bitcnt_t mpn_hamdist (const mp_limb_t *S1P, const
+          mp_limb_t *S2P, mp_size_t N)
+     Compute the hamming distance between {S1P, N} and {S2P, N}, which
+     is the number of bit positions where the two operands have
+     different bit values.
+
+ -- Function: int mpn_perfect_square_p (const mp_limb_t *S1P, mp_size_t
+          N)
+     Return non-zero iff {S1P, N} is a perfect square.  The most
+     significant limb of the input {S1P, N} must be non-zero.
+
+ -- Function: void mpn_and_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical and of {S1P, N} and {S2P, N}, and
+     write the result to {RP, N}.
+
+ -- Function: void mpn_ior_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical inclusive or of {S1P, N} and {S2P, N},
+     and write the result to {RP, N}.
+
+ -- Function: void mpn_xor_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical exclusive or of {S1P, N} and {S2P, N},
+     and write the result to {RP, N}.
+
+ -- Function: void mpn_andn_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical and of {S1P, N} and the bitwise
+     complement of {S2P, N}, and write the result to {RP, N}.
+
+ -- Function: void mpn_iorn_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical inclusive or of {S1P, N} and the
+     bitwise complement of {S2P, N}, and write the result to {RP, N}.
+
+ -- Function: void mpn_nand_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical and of {S1P, N} and {S2P, N}, and
+     write the bitwise complement of the result to {RP, N}.
+
+ -- Function: void mpn_nior_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical inclusive or of {S1P, N} and {S2P, N},
+     and write the bitwise complement of the result to {RP, N}.
+
+ -- Function: void mpn_xnor_n (mp_limb_t *RP, const mp_limb_t *S1P,
+          const mp_limb_t *S2P, mp_size_t N)
+     Perform the bitwise logical exclusive or of {S1P, N} and {S2P, N},
+     and write the bitwise complement of the result to {RP, N}.
+
+ -- Function: void mpn_com (mp_limb_t *RP, const mp_limb_t *SP,
+          mp_size_t N)
+     Perform the bitwise complement of {SP, N}, and write the result to
+     {RP, N}.
+
+ -- Function: void mpn_copyi (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t N)
+     Copy from {S1P, N} to {RP, N}, increasingly.
+
+ -- Function: void mpn_copyd (mp_limb_t *RP, const mp_limb_t *S1P,
+          mp_size_t N)
+     Copy from {S1P, N} to {RP, N}, decreasingly.
+
+ -- Function: void mpn_zero (mp_limb_t *RP, mp_size_t N)
+     Zero {RP, N}.
+
+
+8.1 Nails
+=========
+
+*Everything in this section is highly experimental and may disappear or
+be subject to incompatible changes in a future version of GMP.*
+
+   Nails are an experimental feature whereby a few bits are left unused
+at the top of each `mp_limb_t'.  This can significantly improve carry
+handling on some processors.
+
+   All the `mpn' functions accepting limb data will expect the nail
+bits to be zero on entry, and will return data with the nails similarly
+all zero.  This applies both to limb vectors and to single limb
+arguments.
+
+   Nails can be enabled by configuring with `--enable-nails'.  By
+default the number of bits will be chosen according to what suits the
+host processor, but a particular number can be selected with
+`--enable-nails=N'.
+
+   At the mpn level, a nail build is neither source nor binary
+compatible with a non-nail build, strictly speaking.  But programs
+acting on limbs only through the mpn functions are likely to work
+equally well with either build, and judicious use of the definitions
+below should make any program compatible with either build, at the
+source level.
+
+   For the higher level routines, meaning `mpz' etc, a nail build
+should be fully source and binary compatible with a non-nail build.
+
+ -- Macro: GMP_NAIL_BITS
+ -- Macro: GMP_NUMB_BITS
+ -- Macro: GMP_LIMB_BITS
+     `GMP_NAIL_BITS' is the number of nail bits, or 0 when nails are
+     not in use.  `GMP_NUMB_BITS' is the number of data bits in a limb.
+     `GMP_LIMB_BITS' is the total number of bits in an `mp_limb_t'.  In
+     all cases
+
+          GMP_LIMB_BITS == GMP_NAIL_BITS + GMP_NUMB_BITS
+
+ -- Macro: GMP_NAIL_MASK
+ -- Macro: GMP_NUMB_MASK
+     Bit masks for the nail and number parts of a limb.
+     `GMP_NAIL_MASK' is 0 when nails are not in use.
+
+     `GMP_NAIL_MASK' is not often needed, since the nail part can be
+     obtained with `x >> GMP_NUMB_BITS', and that means one less large
+     constant, which can help various RISC chips.
+
+ -- Macro: GMP_NUMB_MAX
+     The maximum value that can be stored in the number part of a limb.
+     This is the same as `GMP_NUMB_MASK', but can be used for clarity
+     when doing comparisons rather than bit-wise operations.
+
+   The term "nails" comes from finger or toe nails, which are at the
+ends of a limb (arm or leg).  "numb" is short for number, but is also
+how the developers felt after trying for a long time to come up with
+sensible names for these things.
+
+   In the future (the distant future most likely) a non-zero nail might
+be permitted, giving non-unique representations for numbers in a limb
+vector.  This would help vector processors since carries would only
+ever need to propagate one or two limbs.
+
+\1f
+File: gmp.info,  Node: Random Number Functions,  Next: Formatted Output,  Prev: Low-level Functions,  Up: Top
+
+9 Random Number Functions
+*************************
+
+Sequences of pseudo-random numbers in GMP are generated using a
+variable of type `gmp_randstate_t', which holds an algorithm selection
+and a current state.  Such a variable must be initialized by a call to
+one of the `gmp_randinit' functions, and can be seeded with one of the
+`gmp_randseed' functions.
+
+   The functions actually generating random numbers are described in
+*note Integer Random Numbers::, and *note Miscellaneous Float
+Functions::.
+
+   The older style random number functions don't accept a
+`gmp_randstate_t' parameter but instead share a global variable of that
+type.  They use a default algorithm and are currently not seeded
+(though perhaps that will change in the future).  The new functions
+accepting a `gmp_randstate_t' are recommended for applications that
+care about randomness.
+
+* Menu:
+
+* Random State Initialization::
+* Random State Seeding::
+* Random State Miscellaneous::
+
+\1f
+File: gmp.info,  Node: Random State Initialization,  Next: Random State Seeding,  Prev: Random Number Functions,  Up: Random Number Functions
+
+9.1 Random State Initialization
+===============================
+
+ -- Function: void gmp_randinit_default (gmp_randstate_t STATE)
+     Initialize STATE with a default algorithm.  This will be a
+     compromise between speed and randomness, and is recommended for
+     applications with no special requirements.  Currently this is
+     `gmp_randinit_mt'.
+
+ -- Function: void gmp_randinit_mt (gmp_randstate_t STATE)
+     Initialize STATE for a Mersenne Twister algorithm.  This algorithm
+     is fast and has good randomness properties.
+
+ -- Function: void gmp_randinit_lc_2exp (gmp_randstate_t STATE, mpz_t
+          A, unsigned long C, mp_bitcnt_t M2EXP)
+     Initialize STATE with a linear congruential algorithm X = (A*X +
+     C) mod 2^M2EXP.
+
+     The low bits of X in this algorithm are not very random.  The least
+     significant bit will have a period no more than 2, and the second
+     bit no more than 4, etc.  For this reason only the high half of
+     each X is actually used.
+
+     When a random number of more than M2EXP/2 bits is to be generated,
+     multiple iterations of the recurrence are used and the results
+     concatenated.
+
+ -- Function: int gmp_randinit_lc_2exp_size (gmp_randstate_t STATE,
+          mp_bitcnt_t SIZE)
+     Initialize STATE for a linear congruential algorithm as per
+     `gmp_randinit_lc_2exp'.  A, C and M2EXP are selected from a table,
+     chosen so that SIZE bits (or more) of each X will be used, i.e.
+     M2EXP/2 >= SIZE.
+
+     If successful the return value is non-zero.  If SIZE is bigger
+     than the table data provides then the return value is zero.  The
+     maximum SIZE currently supported is 128.
+
+ -- Function: void gmp_randinit_set (gmp_randstate_t ROP,
+          gmp_randstate_t OP)
+     Initialize ROP with a copy of the algorithm and state from OP.
+
+ -- Function: void gmp_randinit (gmp_randstate_t STATE,
+          gmp_randalg_t ALG, ...)
+     *This function is obsolete.*
+
+     Initialize STATE with an algorithm selected by ALG.  The only
+     choice is `GMP_RAND_ALG_LC', which is `gmp_randinit_lc_2exp_size'
+     described above.  A third parameter of type `unsigned long' is
+     required, this is the SIZE for that function.
+     `GMP_RAND_ALG_DEFAULT' or 0 are the same as `GMP_RAND_ALG_LC'.
+
+     `gmp_randinit' sets bits in the global variable `gmp_errno' to
+     indicate an error.  `GMP_ERROR_UNSUPPORTED_ARGUMENT' if ALG is
+     unsupported, or `GMP_ERROR_INVALID_ARGUMENT' if the SIZE parameter
+     is too big.  It may be noted this error reporting is not thread
+     safe (a good reason to use `gmp_randinit_lc_2exp_size' instead).
+
+ -- Function: void gmp_randclear (gmp_randstate_t STATE)
+     Free all memory occupied by STATE.
+
+\1f
+File: gmp.info,  Node: Random State Seeding,  Next: Random State Miscellaneous,  Prev: Random State Initialization,  Up: Random Number Functions
+
+9.2 Random State Seeding
+========================
+
+ -- Function: void gmp_randseed (gmp_randstate_t STATE, mpz_t SEED)
+ -- Function: void gmp_randseed_ui (gmp_randstate_t STATE,
+          unsigned long int SEED)
+     Set an initial seed value into STATE.
+
+     The size of a seed determines how many different sequences of
+     random numbers that it's possible to generate.  The "quality" of
+     the seed is the randomness of a given seed compared to the
+     previous seed used, and this affects the randomness of separate
+     number sequences.  The method for choosing a seed is critical if
+     the generated numbers are to be used for important applications,
+     such as generating cryptographic keys.
+
+     Traditionally the system time has been used to seed, but care
+     needs to be taken with this.  If an application seeds often and
+     the resolution of the system clock is low, then the same sequence
+     of numbers might be repeated.  Also, the system time is quite easy
+     to guess, so if unpredictability is required then it should
+     definitely not be the only source for the seed value.  On some
+     systems there's a special device `/dev/random' which provides
+     random data better suited for use as a seed.
+
+\1f
+File: gmp.info,  Node: Random State Miscellaneous,  Prev: Random State Seeding,  Up: Random Number Functions
+
+9.3 Random State Miscellaneous
+==============================
+
+ -- Function: unsigned long gmp_urandomb_ui (gmp_randstate_t STATE,
+          unsigned long N)
+     Return a uniformly distributed random number of N bits, i.e. in the
+     range 0 to 2^N-1 inclusive.  N must be less than or equal to the
+     number of bits in an `unsigned long'.
+
+ -- Function: unsigned long gmp_urandomm_ui (gmp_randstate_t STATE,
+          unsigned long N)
+     Return a uniformly distributed random number in the range 0 to
+     N-1, inclusive.
+
+\1f
+File: gmp.info,  Node: Formatted Output,  Next: Formatted Input,  Prev: Random Number Functions,  Up: Top
+
+10 Formatted Output
+*******************
+
+* Menu:
+
+* Formatted Output Strings::
+* Formatted Output Functions::
+* C++ Formatted Output::
+
+\1f
+File: gmp.info,  Node: Formatted Output Strings,  Next: Formatted Output Functions,  Prev: Formatted Output,  Up: Formatted Output
+
+10.1 Format Strings
+===================
+
+`gmp_printf' and friends accept format strings similar to the standard C
+`printf' (*note Formatted Output: (libc)Formatted Output.).  A format
+specification is of the form
+
+     % [flags] [width] [.[precision]] [type] conv
+
+   GMP adds types `Z', `Q' and `F' for `mpz_t', `mpq_t' and `mpf_t'
+respectively, `M' for `mp_limb_t', and `N' for an `mp_limb_t' array.
+`Z', `Q', `M' and `N' behave like integers.  `Q' will print a `/' and a
+denominator, if needed.  `F' behaves like a float.  For example,
+
+     mpz_t z;
+     gmp_printf ("%s is an mpz %Zd\n", "here", z);
+
+     mpq_t q;
+     gmp_printf ("a hex rational: %#40Qx\n", q);
+
+     mpf_t f;
+     int   n;
+     gmp_printf ("fixed point mpf %.*Ff with %d digits\n", n, f, n);
+
+     mp_limb_t l;
+     gmp_printf ("limb %Mu\n", l);
+
+     const mp_limb_t *ptr;
+     mp_size_t       size;
+     gmp_printf ("limb array %Nx\n", ptr, size);
+
+   For `N' the limbs are expected least significant first, as per the
+`mpn' functions (*note Low-level Functions::).  A negative size can be
+given to print the value as a negative.
+
+   All the standard C `printf' types behave the same as the C library
+`printf', and can be freely intermixed with the GMP extensions.  In the
+current implementation the standard parts of the format string are
+simply handed to `printf' and only the GMP extensions handled directly.
+
+   The flags accepted are as follows.  GLIBC style ' is only for the
+standard C types (not the GMP types), and only if the C library
+supports it.
+
+     0         pad with zeros (rather than spaces)
+     #         show the base with `0x', `0X' or `0'
+     +         always show a sign
+     (space)   show a space or a `-' sign
+     '         group digits, GLIBC style (not GMP types)
+
+   The optional width and precision can be given as a number within the
+format string, or as a `*' to take an extra parameter of type `int', the
+same as the standard `printf'.
+
+   The standard types accepted are as follows.  `h' and `l' are
+portable, the rest will depend on the compiler (or include files) for
+the type and the C library for the output.
+
+     h         short
+     hh        char
+     j         intmax_t or uintmax_t
+     l         long or wchar_t
+     ll        long long
+     L         long double
+     q         quad_t or u_quad_t
+     t         ptrdiff_t
+     z         size_t
+
+The GMP types are
+
+     F         mpf_t, float conversions
+     Q         mpq_t, integer conversions
+     M         mp_limb_t, integer conversions
+     N         mp_limb_t array, integer conversions
+     Z         mpz_t, integer conversions
+
+   The conversions accepted are as follows.  `a' and `A' are always
+supported for `mpf_t' but depend on the C library for standard C float
+types.  `m' and `p' depend on the C library.
+
+     a A       hex floats, C99 style
+     c         character
+     d         decimal integer
+     e E       scientific format float
+     f         fixed point float
+     i         same as d
+     g G       fixed or scientific float
+     m         `strerror' string, GLIBC style
+     n         store characters written so far
+     o         octal integer
+     p         pointer
+     s         string
+     u         unsigned integer
+     x X       hex integer
+
+   `o', `x' and `X' are unsigned for the standard C types, but for
+types `Z', `Q' and `N' they are signed.  `u' is not meaningful for `Z',
+`Q' and `N'.
+
+   `M' is a proxy for the C library `l' or `L', according to the size
+of `mp_limb_t'.  Unsigned conversions will be usual, but a signed
+conversion can be used and will interpret the value as a twos complement
+negative.
+
+   `n' can be used with any type, even the GMP types.
+
+   Other types or conversions that might be accepted by the C library
+`printf' cannot be used through `gmp_printf', this includes for
+instance extensions registered with GLIBC `register_printf_function'.
+Also currently there's no support for POSIX `$' style numbered arguments
+(perhaps this will be added in the future).
+
+   The precision field has its usual meaning for integer `Z' and float
+`F' types, but is currently undefined for `Q' and should not be used
+with that.
+
+   `mpf_t' conversions only ever generate as many digits as can be
+accurately represented by the operand, the same as `mpf_get_str' does.
+Zeros will be used if necessary to pad to the requested precision.  This
+happens even for an `f' conversion of an `mpf_t' which is an integer,
+for instance 2^1024 in an `mpf_t' of 128 bits precision will only
+produce about 40 digits, then pad with zeros to the decimal point.  An
+empty precision field like `%.Fe' or `%.Ff' can be used to specifically
+request just the significant digits.  Without any dot and thus no
+precision field, a precision value of 6 will be used.  Note that these
+rules mean that `%Ff', `%.Ff', and `%.0Ff' will all be different.
+
+   The decimal point character (or string) is taken from the current
+locale settings on systems which provide `localeconv' (*note Locales
+and Internationalization: (libc)Locales.).  The C library will normally
+do the same for standard float output.
+
+   The format string is only interpreted as plain `char's, multibyte
+characters are not recognised.  Perhaps this will change in the future.
+
+\1f
+File: gmp.info,  Node: Formatted Output Functions,  Next: C++ Formatted Output,  Prev: Formatted Output Strings,  Up: Formatted Output
+
+10.2 Functions
+==============
+
+Each of the following functions is similar to the corresponding C
+library function.  The basic `printf' forms take a variable argument
+list.  The `vprintf' forms take an argument pointer, see *note Variadic
+Functions: (libc)Variadic Functions, or `man 3 va_start'.
+
+   It should be emphasised that if a format string is invalid, or the
+arguments don't match what the format specifies, then the behaviour of
+any of these functions will be unpredictable.  GCC format string
+checking is not available, since it doesn't recognise the GMP
+extensions.
+
+   The file based functions `gmp_printf' and `gmp_fprintf' will return
+-1 to indicate a write error.  Output is not "atomic", so partial
+output may be produced if a write error occurs.  All the functions can
+return -1 if the C library `printf' variant in use returns -1, but this
+shouldn't normally occur.
+
+ -- Function: int gmp_printf (const char *FMT, ...)
+ -- Function: int gmp_vprintf (const char *FMT, va_list AP)
+     Print to the standard output `stdout'.  Return the number of
+     characters written, or -1 if an error occurred.
+
+ -- Function: int gmp_fprintf (FILE *FP, const char *FMT, ...)
+ -- Function: int gmp_vfprintf (FILE *FP, const char *FMT, va_list AP)
+     Print to the stream FP.  Return the number of characters written,
+     or -1 if an error occurred.
+
+ -- Function: int gmp_sprintf (char *BUF, const char *FMT, ...)
+ -- Function: int gmp_vsprintf (char *BUF, const char *FMT, va_list AP)
+     Form a null-terminated string in BUF.  Return the number of
+     characters written, excluding the terminating null.
+
+     No overlap is permitted between the space at BUF and the string
+     FMT.
+
+     These functions are not recommended, since there's no protection
+     against exceeding the space available at BUF.
+
+ -- Function: int gmp_snprintf (char *BUF, size_t SIZE, const char
+          *FMT, ...)
+ -- Function: int gmp_vsnprintf (char *BUF, size_t SIZE, const char
+          *FMT, va_list AP)
+     Form a null-terminated string in BUF.  No more than SIZE bytes
+     will be written.  To get the full output, SIZE must be enough for
+     the string and null-terminator.
+
+     The return value is the total number of characters which ought to
+     have been produced, excluding the terminating null.  If RETVAL >=
+     SIZE then the actual output has been truncated to the first SIZE-1
+     characters, and a null appended.
+
+     No overlap is permitted between the region {BUF,SIZE} and the FMT
+     string.
+
+     Notice the return value is in ISO C99 `snprintf' style.  This is
+     so even if the C library `vsnprintf' is the older GLIBC 2.0.x
+     style.
+
+ -- Function: int gmp_asprintf (char **PP, const char *FMT, ...)
+ -- Function: int gmp_vasprintf (char **PP, const char *FMT, va_list AP)
+     Form a null-terminated string in a block of memory obtained from
+     the current memory allocation function (*note Custom
+     Allocation::).  The block will be the size of the string and
+     null-terminator.  The address of the block in stored to *PP.  The
+     return value is the number of characters produced, excluding the
+     null-terminator.
+
+     Unlike the C library `asprintf', `gmp_asprintf' doesn't return -1
+     if there's no more memory available, it lets the current allocation
+     function handle that.
+
+ -- Function: int gmp_obstack_printf (struct obstack *OB, const char
+          *FMT, ...)
+ -- Function: int gmp_obstack_vprintf (struct obstack *OB, const char
+          *FMT, va_list AP)
+     Append to the current object in OB.  The return value is the
+     number of characters written.  A null-terminator is not written.
+
+     FMT cannot be within the current object in OB, since that object
+     might move as it grows.
+
+     These functions are available only when the C library provides the
+     obstack feature, which probably means only on GNU systems, see
+     *note Obstacks: (libc)Obstacks.
+
+\1f
+File: gmp.info,  Node: C++ Formatted Output,  Prev: Formatted Output Functions,  Up: Formatted Output
+
+10.3 C++ Formatted Output
+=========================
+
+The following functions are provided in `libgmpxx' (*note Headers and
+Libraries::), which is built if C++ support is enabled (*note Build
+Options::).  Prototypes are available from `<gmp.h>'.
+
+ -- Function: ostream& operator<< (ostream& STREAM, mpz_t OP)
+     Print OP to STREAM, using its `ios' formatting settings.
+     `ios::width' is reset to 0 after output, the same as the standard
+     `ostream operator<<' routines do.
+
+     In hex or octal, OP is printed as a signed number, the same as for
+     decimal.  This is unlike the standard `operator<<' routines on
+     `int' etc, which instead give twos complement.
+
+ -- Function: ostream& operator<< (ostream& STREAM, mpq_t OP)
+     Print OP to STREAM, using its `ios' formatting settings.
+     `ios::width' is reset to 0 after output, the same as the standard
+     `ostream operator<<' routines do.
+
+     Output will be a fraction like `5/9', or if the denominator is 1
+     then just a plain integer like `123'.
+
+     In hex or octal, OP is printed as a signed value, the same as for
+     decimal.  If `ios::showbase' is set then a base indicator is shown
+     on both the numerator and denominator (if the denominator is
+     required).
+
+ -- Function: ostream& operator<< (ostream& STREAM, mpf_t OP)
+     Print OP to STREAM, using its `ios' formatting settings.
+     `ios::width' is reset to 0 after output, the same as the standard
+     `ostream operator<<' routines do.
+
+     The decimal point follows the standard library float `operator<<',
+     which on recent systems means the `std::locale' imbued on STREAM.
+
+     Hex and octal are supported, unlike the standard `operator<<' on
+     `double'.  The mantissa will be in hex or octal, the exponent will
+     be in decimal.  For hex the exponent delimiter is an `@'.  This is
+     as per `mpf_out_str'.
+
+     `ios::showbase' is supported, and will put a base on the mantissa,
+     for example hex `0x1.8' or `0x0.8', or octal `01.4' or `00.4'.
+     This last form is slightly strange, but at least differentiates
+     itself from decimal.
+
+   These operators mean that GMP types can be printed in the usual C++
+way, for example,
+
+     mpz_t  z;
+     int    n;
+     ...
+     cout << "iteration " << n << " value " << z << "\n";
+
+   But note that `ostream' output (and `istream' input, *note C++
+Formatted Input::) is the only overloading available for the GMP types
+and that for instance using `+' with an `mpz_t' will have unpredictable
+results.  For classes with overloading, see *note C++ Class Interface::.
+
+\1f
+File: gmp.info,  Node: Formatted Input,  Next: C++ Class Interface,  Prev: Formatted Output,  Up: Top
+
+11 Formatted Input
+******************
+
+* Menu:
+
+* Formatted Input Strings::
+* Formatted Input Functions::
+* C++ Formatted Input::
+
+\1f
+File: gmp.info,  Node: Formatted Input Strings,  Next: Formatted Input Functions,  Prev: Formatted Input,  Up: Formatted Input
+
+11.1 Formatted Input Strings
+============================
+
+`gmp_scanf' and friends accept format strings similar to the standard C
+`scanf' (*note Formatted Input: (libc)Formatted Input.).  A format
+specification is of the form
+
+     % [flags] [width] [type] conv
+
+   GMP adds types `Z', `Q' and `F' for `mpz_t', `mpq_t' and `mpf_t'
+respectively.  `Z' and `Q' behave like integers.  `Q' will read a `/'
+and a denominator, if present.  `F' behaves like a float.
+
+   GMP variables don't require an `&' when passed to `gmp_scanf', since
+they're already "call-by-reference".  For example,
+
+     /* to read say "a(5) = 1234" */
+     int   n;
+     mpz_t z;
+     gmp_scanf ("a(%d) = %Zd\n", &n, z);
+
+     mpq_t q1, q2;
+     gmp_sscanf ("0377 + 0x10/0x11", "%Qi + %Qi", q1, q2);
+
+     /* to read say "topleft (1.55,-2.66)" */
+     mpf_t x, y;
+     char  buf[32];
+     gmp_scanf ("%31s (%Ff,%Ff)", buf, x, y);
+
+   All the standard C `scanf' types behave the same as in the C library
+`scanf', and can be freely intermixed with the GMP extensions.  In the
+current implementation the standard parts of the format string are
+simply handed to `scanf' and only the GMP extensions handled directly.
+
+   The flags accepted are as follows.  `a' and `'' will depend on
+support from the C library, and `'' cannot be used with GMP types.
+
+     *         read but don't store
+     a         allocate a buffer (string conversions)
+     '         grouped digits, GLIBC style (not GMP
+               types)
+
+   The standard types accepted are as follows.  `h' and `l' are
+portable, the rest will depend on the compiler (or include files) for
+the type and the C library for the input.
+
+     h         short
+     hh        char
+     j         intmax_t or uintmax_t
+     l         long int, double or wchar_t
+     ll        long long
+     L         long double
+     q         quad_t or u_quad_t
+     t         ptrdiff_t
+     z         size_t
+
+The GMP types are
+
+     F         mpf_t, float conversions
+     Q         mpq_t, integer conversions
+     Z         mpz_t, integer conversions
+
+   The conversions accepted are as follows.  `p' and `[' will depend on
+support from the C library, the rest are standard.
+
+     c         character or characters
+     d         decimal integer
+     e E f g G float
+     i         integer with base indicator
+     n         characters read so far
+     o         octal integer
+     p         pointer
+     s         string of non-whitespace characters
+     u         decimal integer
+     x X       hex integer
+     [         string of characters in a set
+
+   `e', `E', `f', `g' and `G' are identical, they all read either fixed
+point or scientific format, and either upper or lower case `e' for the
+exponent in scientific format.
+
+   C99 style hex float format (`printf %a', *note Formatted Output
+Strings::) is always accepted for `mpf_t', but for the standard float
+types it will depend on the C library.
+
+   `x' and `X' are identical, both accept both upper and lower case
+hexadecimal.
+
+   `o', `u', `x' and `X' all read positive or negative values.  For the
+standard C types these are described as "unsigned" conversions, but
+that merely affects certain overflow handling, negatives are still
+allowed (per `strtoul', *note Parsing of Integers: (libc)Parsing of
+Integers.).  For GMP types there are no overflows, so `d' and `u' are
+identical.
+
+   `Q' type reads the numerator and (optional) denominator as given.
+If the value might not be in canonical form then `mpq_canonicalize'
+must be called before using it in any calculations (*note Rational
+Number Functions::).
+
+   `Qi' will read a base specification separately for the numerator and
+denominator.  For example `0x10/11' would be 16/11, whereas `0x10/0x11'
+would be 16/17.
+
+   `n' can be used with any of the types above, even the GMP types.
+`*' to suppress assignment is allowed, though in that case it would do
+nothing at all.
+
+   Other conversions or types that might be accepted by the C library
+`scanf' cannot be used through `gmp_scanf'.
+
+   Whitespace is read and discarded before a field, except for `c' and
+`[' conversions.
+
+   For float conversions, the decimal point character (or string)
+expected is taken from the current locale settings on systems which
+provide `localeconv' (*note Locales and Internationalization:
+(libc)Locales.).  The C library will normally do the same for standard
+float input.
+
+   The format string is only interpreted as plain `char's, multibyte
+characters are not recognised.  Perhaps this will change in the future.
+
+\1f
+File: gmp.info,  Node: Formatted Input Functions,  Next: C++ Formatted Input,  Prev: Formatted Input Strings,  Up: Formatted Input
+
+11.2 Formatted Input Functions
+==============================
+
+Each of the following functions is similar to the corresponding C
+library function.  The plain `scanf' forms take a variable argument
+list.  The `vscanf' forms take an argument pointer, see *note Variadic
+Functions: (libc)Variadic Functions, or `man 3 va_start'.
+
+   It should be emphasised that if a format string is invalid, or the
+arguments don't match what the format specifies, then the behaviour of
+any of these functions will be unpredictable.  GCC format string
+checking is not available, since it doesn't recognise the GMP
+extensions.
+
+   No overlap is permitted between the FMT string and any of the results
+produced.
+
+ -- Function: int gmp_scanf (const char *FMT, ...)
+ -- Function: int gmp_vscanf (const char *FMT, va_list AP)
+     Read from the standard input `stdin'.
+
+ -- Function: int gmp_fscanf (FILE *FP, const char *FMT, ...)
+ -- Function: int gmp_vfscanf (FILE *FP, const char *FMT, va_list AP)
+     Read from the stream FP.
+
+ -- Function: int gmp_sscanf (const char *S, const char *FMT, ...)
+ -- Function: int gmp_vsscanf (const char *S, const char *FMT, va_list
+          AP)
+     Read from a null-terminated string S.
+
+   The return value from each of these functions is the same as the
+standard C99 `scanf', namely the number of fields successfully parsed
+and stored.  `%n' fields and fields read but suppressed by `*' don't
+count towards the return value.
+
+   If end of input (or a file error) is reached before a character for
+a field or a literal, and if no previous non-suppressed fields have
+matched, then the return value is `EOF' instead of 0.  A whitespace
+character in the format string is only an optional match and doesn't
+induce an `EOF' in this fashion.  Leading whitespace read and discarded
+for a field don't count as characters for that field.
+
+   For the GMP types, input parsing follows C99 rules, namely one
+character of lookahead is used and characters are read while they
+continue to meet the format requirements.  If this doesn't provide a
+complete number then the function terminates, with that field not
+stored nor counted towards the return value.  For instance with `mpf_t'
+an input `1.23e-XYZ' would be read up to the `X' and that character
+pushed back since it's not a digit.  The string `1.23e-' would then be
+considered invalid since an `e' must be followed by at least one digit.
+
+   For the standard C types, in the current implementation GMP calls
+the C library `scanf' functions, which might have looser rules about
+what constitutes a valid input.
+
+   Note that `gmp_sscanf' is the same as `gmp_fscanf' and only does one
+character of lookahead when parsing.  Although clearly it could look at
+its entire input, it is deliberately made identical to `gmp_fscanf',
+the same way C99 `sscanf' is the same as `fscanf'.
+
+\1f
+File: gmp.info,  Node: C++ Formatted Input,  Prev: Formatted Input Functions,  Up: Formatted Input
+
+11.3 C++ Formatted Input
+========================
+
+The following functions are provided in `libgmpxx' (*note Headers and
+Libraries::), which is built only if C++ support is enabled (*note
+Build Options::).  Prototypes are available from `<gmp.h>'.
+
+ -- Function: istream& operator>> (istream& STREAM, mpz_t ROP)
+     Read ROP from STREAM, using its `ios' formatting settings.
+
+ -- Function: istream& operator>> (istream& STREAM, mpq_t ROP)
+     An integer like `123' will be read, or a fraction like `5/9'.  No
+     whitespace is allowed around the `/'.  If the fraction is not in
+     canonical form then `mpq_canonicalize' must be called (*note
+     Rational Number Functions::) before operating on it.
+
+     As per integer input, an `0' or `0x' base indicator is read when
+     none of `ios::dec', `ios::oct' or `ios::hex' are set.  This is
+     done separately for numerator and denominator, so that for instance
+     `0x10/11' is 16/11 and `0x10/0x11' is 16/17.
+
+ -- Function: istream& operator>> (istream& STREAM, mpf_t ROP)
+     Read ROP from STREAM, using its `ios' formatting settings.
+
+     Hex or octal floats are not supported, but might be in the future,
+     or perhaps it's best to accept only what the standard float
+     `operator>>' does.
+
+   Note that digit grouping specified by the `istream' locale is
+currently not accepted.  Perhaps this will change in the future.
+
+
+   These operators mean that GMP types can be read in the usual C++
+way, for example,
+
+     mpz_t  z;
+     ...
+     cin >> z;
+
+   But note that `istream' input (and `ostream' output, *note C++
+Formatted Output::) is the only overloading available for the GMP types
+and that for instance using `+' with an `mpz_t' will have unpredictable
+results.  For classes with overloading, see *note C++ Class Interface::.
+
+\1f
+File: gmp.info,  Node: C++ Class Interface,  Next: BSD Compatible Functions,  Prev: Formatted Input,  Up: Top
+
+12 C++ Class Interface
+**********************
+
+This chapter describes the C++ class based interface to GMP.
+
+   All GMP C language types and functions can be used in C++ programs,
+since `gmp.h' has `extern "C"' qualifiers, but the class interface
+offers overloaded functions and operators which may be more convenient.
+
+   Due to the implementation of this interface, a reasonably recent C++
+compiler is required, one supporting namespaces, partial specialization
+of templates and member templates.  For GCC this means version 2.91 or
+later.
+
+   *Everything described in this chapter is to be considered preliminary
+and might be subject to incompatible changes if some unforeseen
+difficulty reveals itself.*
+
+* Menu:
+
+* C++ Interface General::
+* C++ Interface Integers::
+* C++ Interface Rationals::
+* C++ Interface Floats::
+* C++ Interface Random Numbers::
+* C++ Interface Limitations::
+
+\1f
+File: gmp.info,  Node: C++ Interface General,  Next: C++ Interface Integers,  Prev: C++ Class Interface,  Up: C++ Class Interface
+
+12.1 C++ Interface General
+==========================
+
+All the C++ classes and functions are available with
+
+     #include <gmpxx.h>
+
+   Programs should be linked with the `libgmpxx' and `libgmp'
+libraries.  For example,
+
+     g++ mycxxprog.cc -lgmpxx -lgmp
+
+The classes defined are
+
+ -- Class: mpz_class
+ -- Class: mpq_class
+ -- Class: mpf_class
+
+   The standard operators and various standard functions are overloaded
+to allow arithmetic with these classes.  For example,
+
+     int
+     main (void)
+     {
+       mpz_class a, b, c;
+
+       a = 1234;
+       b = "-5678";
+       c = a+b;
+       cout << "sum is " << c << "\n";
+       cout << "absolute value is " << abs(c) << "\n";
+
+       return 0;
+     }
+
+   An important feature of the implementation is that an expression like
+`a=b+c' results in a single call to the corresponding `mpz_add',
+without using a temporary for the `b+c' part.  Expressions which by
+their nature imply intermediate values, like `a=b*c+d*e', still use
+temporaries though.
+
+   The classes can be freely intermixed in expressions, as can the
+classes and the standard types `long', `unsigned long' and `double'.
+Smaller types like `int' or `float' can also be intermixed, since C++
+will promote them.
+
+   Note that `bool' is not accepted directly, but must be explicitly
+cast to an `int' first.  This is because C++ will automatically convert
+any pointer to a `bool', so if GMP accepted `bool' it would make all
+sorts of invalid class and pointer combinations compile but almost
+certainly not do anything sensible.
+
+   Conversions back from the classes to standard C++ types aren't done
+automatically, instead member functions like `get_si' are provided (see
+the following sections for details).
+
+   Also there are no automatic conversions from the classes to the
+corresponding GMP C types, instead a reference to the underlying C
+object can be obtained with the following functions,
+
+ -- Function: mpz_t mpz_class::get_mpz_t ()
+ -- Function: mpq_t mpq_class::get_mpq_t ()
+ -- Function: mpf_t mpf_class::get_mpf_t ()
+
+   These can be used to call a C function which doesn't have a C++ class
+interface.  For example to set `a' to the GCD of `b' and `c',
+
+     mpz_class a, b, c;
+     ...
+     mpz_gcd (a.get_mpz_t(), b.get_mpz_t(), c.get_mpz_t());
+
+   In the other direction, a class can be initialized from the
+corresponding GMP C type, or assigned to if an explicit constructor is
+used.  In both cases this makes a copy of the value, it doesn't create
+any sort of association.  For example,
+
+     mpz_t z;
+     // ... init and calculate z ...
+     mpz_class x(z);
+     mpz_class y;
+     y = mpz_class (z);
+
+   There are no namespace setups in `gmpxx.h', all types and functions
+are simply put into the global namespace.  This is what `gmp.h' has
+done in the past, and continues to do for compatibility.  The extras
+provided by `gmpxx.h' follow GMP naming conventions and are unlikely to
+clash with anything.
+
+\1f
+File: gmp.info,  Node: C++ Interface Integers,  Next: C++ Interface Rationals,  Prev: C++ Interface General,  Up: C++ Class Interface
+
+12.2 C++ Interface Integers
+===========================
+
+ -- Function:  mpz_class::mpz_class (type N)
+     Construct an `mpz_class'.  All the standard C++ types may be used,
+     except `long long' and `long double', and all the GMP C++ classes
+     can be used.  Any necessary conversion follows the corresponding C
+     function, for example `double' follows `mpz_set_d' (*note
+     Assigning Integers::).
+
+ -- Function: explicit mpz_class::mpz_class (mpz_t Z)
+     Construct an `mpz_class' from an `mpz_t'.  The value in Z is
+     copied into the new `mpz_class', there won't be any permanent
+     association between it and Z.
+
+ -- Function: explicit mpz_class::mpz_class (const char *S, int BASE =
+          0)
+ -- Function: explicit mpz_class::mpz_class (const string& S, int BASE
+          = 0)
+     Construct an `mpz_class' converted from a string using
+     `mpz_set_str' (*note Assigning Integers::).
+
+     If the string is not a valid integer, an `std::invalid_argument'
+     exception is thrown.  The same applies to `operator='.
+
+ -- Function: mpz_class operator/ (mpz_class A, mpz_class D)
+ -- Function: mpz_class operator% (mpz_class A, mpz_class D)
+     Divisions involving `mpz_class' round towards zero, as per the
+     `mpz_tdiv_q' and `mpz_tdiv_r' functions (*note Integer Division::).
+     This is the same as the C99 `/' and `%' operators.
+
+     The `mpz_fdiv...' or `mpz_cdiv...' functions can always be called
+     directly if desired.  For example,
+
+          mpz_class q, a, d;
+          ...
+          mpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());
+
+ -- Function: mpz_class abs (mpz_class OP1)
+ -- Function: int cmp (mpz_class OP1, type OP2)
+ -- Function: int cmp (type OP1, mpz_class OP2)
+ -- Function: bool mpz_class::fits_sint_p (void)
+ -- Function: bool mpz_class::fits_slong_p (void)
+ -- Function: bool mpz_class::fits_sshort_p (void)
+ -- Function: bool mpz_class::fits_uint_p (void)
+ -- Function: bool mpz_class::fits_ulong_p (void)
+ -- Function: bool mpz_class::fits_ushort_p (void)
+ -- Function: double mpz_class::get_d (void)
+ -- Function: long mpz_class::get_si (void)
+ -- Function: string mpz_class::get_str (int BASE = 10)
+ -- Function: unsigned long mpz_class::get_ui (void)
+ -- Function: int mpz_class::set_str (const char *STR, int BASE)
+ -- Function: int mpz_class::set_str (const string& STR, int BASE)
+ -- Function: int sgn (mpz_class OP)
+ -- Function: mpz_class sqrt (mpz_class OP)
+     These functions provide a C++ class interface to the corresponding
+     GMP C routines.
+
+     `cmp' can be used with any of the classes or the standard C++
+     types, except `long long' and `long double'.
+
+
+   Overloaded operators for combinations of `mpz_class' and `double'
+are provided for completeness, but it should be noted that if the given
+`double' is not an integer then the way any rounding is done is
+currently unspecified.  The rounding might take place at the start, in
+the middle, or at the end of the operation, and it might change in the
+future.
+
+   Conversions between `mpz_class' and `double', however, are defined
+to follow the corresponding C functions `mpz_get_d' and `mpz_set_d'.
+And comparisons are always made exactly, as per `mpz_cmp_d'.
+
+\1f
+File: gmp.info,  Node: C++ Interface Rationals,  Next: C++ Interface Floats,  Prev: C++ Interface Integers,  Up: C++ Class Interface
+
+12.3 C++ Interface Rationals
+============================
+
+In all the following constructors, if a fraction is given then it
+should be in canonical form, or if not then `mpq_class::canonicalize'
+called.
+
+ -- Function:  mpq_class::mpq_class (type OP)
+ -- Function:  mpq_class::mpq_class (integer NUM, integer DEN)
+     Construct an `mpq_class'.  The initial value can be a single value
+     of any type, or a pair of integers (`mpz_class' or standard C++
+     integer types) representing a fraction, except that `long long'
+     and `long double' are not supported.  For example,
+
+          mpq_class q (99);
+          mpq_class q (1.75);
+          mpq_class q (1, 3);
+
+ -- Function: explicit mpq_class::mpq_class (mpq_t Q)
+     Construct an `mpq_class' from an `mpq_t'.  The value in Q is
+     copied into the new `mpq_class', there won't be any permanent
+     association between it and Q.
+
+ -- Function: explicit mpq_class::mpq_class (const char *S, int BASE =
+          0)
+ -- Function: explicit mpq_class::mpq_class (const string& S, int BASE
+          = 0)
+     Construct an `mpq_class' converted from a string using
+     `mpq_set_str' (*note Initializing Rationals::).
+
+     If the string is not a valid rational, an `std::invalid_argument'
+     exception is thrown.  The same applies to `operator='.
+
+ -- Function: void mpq_class::canonicalize ()
+     Put an `mpq_class' into canonical form, as per *note Rational
+     Number Functions::.  All arithmetic operators require their
+     operands in canonical form, and will return results in canonical
+     form.
+
+ -- Function: mpq_class abs (mpq_class OP)
+ -- Function: int cmp (mpq_class OP1, type OP2)
+ -- Function: int cmp (type OP1, mpq_class OP2)
+ -- Function: double mpq_class::get_d (void)
+ -- Function: string mpq_class::get_str (int BASE = 10)
+ -- Function: int mpq_class::set_str (const char *STR, int BASE)
+ -- Function: int mpq_class::set_str (const string& STR, int BASE)
+ -- Function: int sgn (mpq_class OP)
+     These functions provide a C++ class interface to the corresponding
+     GMP C routines.
+
+     `cmp' can be used with any of the classes or the standard C++
+     types, except `long long' and `long double'.
+
+ -- Function: mpz_class& mpq_class::get_num ()
+ -- Function: mpz_class& mpq_class::get_den ()
+     Get a reference to an `mpz_class' which is the numerator or
+     denominator of an `mpq_class'.  This can be used both for read and
+     write access.  If the object returned is modified, it modifies the
+     original `mpq_class'.
+
+     If direct manipulation might produce a non-canonical value, then
+     `mpq_class::canonicalize' must be called before further operations.
+
+ -- Function: mpz_t mpq_class::get_num_mpz_t ()
+ -- Function: mpz_t mpq_class::get_den_mpz_t ()
+     Get a reference to the underlying `mpz_t' numerator or denominator
+     of an `mpq_class'.  This can be passed to C functions expecting an
+     `mpz_t'.  Any modifications made to the `mpz_t' will modify the
+     original `mpq_class'.
+
+     If direct manipulation might produce a non-canonical value, then
+     `mpq_class::canonicalize' must be called before further operations.
+
+ -- Function: istream& operator>> (istream& STREAM, mpq_class& ROP);
+     Read ROP from STREAM, using its `ios' formatting settings, the
+     same as `mpq_t operator>>' (*note C++ Formatted Input::).
+
+     If the ROP read might not be in canonical form then
+     `mpq_class::canonicalize' must be called.
+
+\1f
+File: gmp.info,  Node: C++ Interface Floats,  Next: C++ Interface Random Numbers,  Prev: C++ Interface Rationals,  Up: C++ Class Interface
+
+12.4 C++ Interface Floats
+=========================
+
+When an expression requires the use of temporary intermediate
+`mpf_class' values, like `f=g*h+x*y', those temporaries will have the
+same precision as the destination `f'.  Explicit constructors can be
+used if this doesn't suit.
+
+ -- Function:  mpf_class::mpf_class (type OP)
+ -- Function:  mpf_class::mpf_class (type OP, mp_bitcnt_t PREC)
+     Construct an `mpf_class'.  Any standard C++ type can be used,
+     except `long long' and `long double', and any of the GMP C++
+     classes can be used.
+
+     If PREC is given, the initial precision is that value, in bits.  If
+     PREC is not given, then the initial precision is determined by the
+     type of OP given.  An `mpz_class', `mpq_class', or C++ builtin
+     type will give the default `mpf' precision (*note Initializing
+     Floats::).  An `mpf_class' or expression will give the precision
+     of that value.  The precision of a binary expression is the higher
+     of the two operands.
+
+          mpf_class f(1.5);        // default precision
+          mpf_class f(1.5, 500);   // 500 bits (at least)
+          mpf_class f(x);          // precision of x
+          mpf_class f(abs(x));     // precision of x
+          mpf_class f(-g, 1000);   // 1000 bits (at least)
+          mpf_class f(x+y);        // greater of precisions of x and y
+
+ -- Function: explicit mpf_class::mpf_class (mpf_t F)
+ -- Function:  mpf_class::mpf_class (mpf_t F, mp_bitcnt_t PREC)
+     Construct an `mpf_class' from an `mpf_t'.  The value in F is
+     copied into the new `mpf_class', there won't be any permanent
+     association between it and F.
+
+     If PREC is given, the initial precision is that value, in bits.  If
+     PREC is not given, then the initial precision is that of F.
+
+ -- Function: explicit mpf_class::mpf_class (const char *S)
+ -- Function:  mpf_class::mpf_class (const char *S, mp_bitcnt_t PREC,
+          int BASE = 0)
+ -- Function: explicit mpf_class::mpf_class (const string& S)
+ -- Function:  mpf_class::mpf_class (const string& S, mp_bitcnt_t PREC,
+          int BASE = 0)
+     Construct an `mpf_class' converted from a string using
+     `mpf_set_str' (*note Assigning Floats::).  If PREC is given, the
+     initial precision is that value, in bits.  If not, the default
+     `mpf' precision (*note Initializing Floats::) is used.
+
+     If the string is not a valid float, an `std::invalid_argument'
+     exception is thrown.  The same applies to `operator='.
+
+ -- Function: mpf_class& mpf_class::operator= (type OP)
+     Convert and store the given OP value to an `mpf_class' object.  The
+     same types are accepted as for the constructors above.
+
+     Note that `operator=' only stores a new value, it doesn't copy or
+     change the precision of the destination, instead the value is
+     truncated if necessary.  This is the same as `mpf_set' etc.  Note
+     in particular this means for `mpf_class' a copy constructor is not
+     the same as a default constructor plus assignment.
+
+          mpf_class x (y);   // x created with precision of y
+
+          mpf_class x;       // x created with default precision
+          x = y;             // value truncated to that precision
+
+     Applications using templated code may need to be careful about the
+     assumptions the code makes in this area, when working with
+     `mpf_class' values of various different or non-default precisions.
+     For instance implementations of the standard `complex' template
+     have been seen in both styles above, though of course `complex' is
+     normally only actually specified for use with the builtin float
+     types.
+
+ -- Function: mpf_class abs (mpf_class OP)
+ -- Function: mpf_class ceil (mpf_class OP)
+ -- Function: int cmp (mpf_class OP1, type OP2)
+ -- Function: int cmp (type OP1, mpf_class OP2)
+ -- Function: bool mpf_class::fits_sint_p (void)
+ -- Function: bool mpf_class::fits_slong_p (void)
+ -- Function: bool mpf_class::fits_sshort_p (void)
+ -- Function: bool mpf_class::fits_uint_p (void)
+ -- Function: bool mpf_class::fits_ulong_p (void)
+ -- Function: bool mpf_class::fits_ushort_p (void)
+ -- Function: mpf_class floor (mpf_class OP)
+ -- Function: mpf_class hypot (mpf_class OP1, mpf_class OP2)
+ -- Function: double mpf_class::get_d (void)
+ -- Function: long mpf_class::get_si (void)
+ -- Function: string mpf_class::get_str (mp_exp_t& EXP, int BASE = 10,
+          size_t DIGITS = 0)
+ -- Function: unsigned long mpf_class::get_ui (void)
+ -- Function: int mpf_class::set_str (const char *STR, int BASE)
+ -- Function: int mpf_class::set_str (const string& STR, int BASE)
+ -- Function: int sgn (mpf_class OP)
+ -- Function: mpf_class sqrt (mpf_class OP)
+ -- Function: mpf_class trunc (mpf_class OP)
+     These functions provide a C++ class interface to the corresponding
+     GMP C routines.
+
+     `cmp' can be used with any of the classes or the standard C++
+     types, except `long long' and `long double'.
+
+     The accuracy provided by `hypot' is not currently guaranteed.
+
+ -- Function: mp_bitcnt_t mpf_class::get_prec ()
+ -- Function: void mpf_class::set_prec (mp_bitcnt_t PREC)
+ -- Function: void mpf_class::set_prec_raw (mp_bitcnt_t PREC)
+     Get or set the current precision of an `mpf_class'.
+
+     The restrictions described for `mpf_set_prec_raw' (*note
+     Initializing Floats::) apply to `mpf_class::set_prec_raw'.  Note
+     in particular that the `mpf_class' must be restored to it's
+     allocated precision before being destroyed.  This must be done by
+     application code, there's no automatic mechanism for it.
+
+\1f
+File: gmp.info,  Node: C++ Interface Random Numbers,  Next: C++ Interface Limitations,  Prev: C++ Interface Floats,  Up: C++ Class Interface
+
+12.5 C++ Interface Random Numbers
+=================================
+
+ -- Class: gmp_randclass
+     The C++ class interface to the GMP random number functions uses
+     `gmp_randclass' to hold an algorithm selection and current state,
+     as per `gmp_randstate_t'.
+
+ -- Function:  gmp_randclass::gmp_randclass (void (*RANDINIT)
+          (gmp_randstate_t, ...), ...)
+     Construct a `gmp_randclass', using a call to the given RANDINIT
+     function (*note Random State Initialization::).  The arguments
+     expected are the same as RANDINIT, but with `mpz_class' instead of
+     `mpz_t'.  For example,
+
+          gmp_randclass r1 (gmp_randinit_default);
+          gmp_randclass r2 (gmp_randinit_lc_2exp_size, 32);
+          gmp_randclass r3 (gmp_randinit_lc_2exp, a, c, m2exp);
+          gmp_randclass r4 (gmp_randinit_mt);
+
+     `gmp_randinit_lc_2exp_size' will fail if the size requested is too
+     big, an `std::length_error' exception is thrown in that case.
+
+ -- Function:  gmp_randclass::gmp_randclass (gmp_randalg_t ALG, ...)
+     Construct a `gmp_randclass' using the same parameters as
+     `gmp_randinit' (*note Random State Initialization::).  This
+     function is obsolete and the above RANDINIT style should be
+     preferred.
+
+ -- Function: void gmp_randclass::seed (unsigned long int S)
+ -- Function: void gmp_randclass::seed (mpz_class S)
+     Seed a random number generator.  See *note Random Number
+     Functions::, for how to choose a good seed.
+
+ -- Function: mpz_class gmp_randclass::get_z_bits (unsigned long BITS)
+ -- Function: mpz_class gmp_randclass::get_z_bits (mpz_class BITS)
+     Generate a random integer with a specified number of bits.
+
+ -- Function: mpz_class gmp_randclass::get_z_range (mpz_class N)
+     Generate a random integer in the range 0 to N-1 inclusive.
+
+ -- Function: mpf_class gmp_randclass::get_f ()
+ -- Function: mpf_class gmp_randclass::get_f (mp_bitcnt_t PREC)
+     Generate a random float F in the range 0 <= F < 1.  F will be to
+     PREC bits precision, or if PREC is not given then to the precision
+     of the destination.  For example,
+
+          gmp_randclass  r;
+          ...
+          mpf_class  f (0, 512);   // 512 bits precision
+          f = r.get_f();           // random number, 512 bits
+
+\1f
+File: gmp.info,  Node: C++ Interface Limitations,  Prev: C++ Interface Random Numbers,  Up: C++ Class Interface
+
+12.6 C++ Interface Limitations
+==============================
+
+`mpq_class' and Templated Reading
+     A generic piece of template code probably won't know that
+     `mpq_class' requires a `canonicalize' call if inputs read with
+     `operator>>' might be non-canonical.  This can lead to incorrect
+     results.
+
+     `operator>>' behaves as it does for reasons of efficiency.  A
+     canonicalize can be quite time consuming on large operands, and is
+     best avoided if it's not necessary.
+
+     But this potential difficulty reduces the usefulness of
+     `mpq_class'.  Perhaps a mechanism to tell `operator>>' what to do
+     will be adopted in the future, maybe a preprocessor define, a
+     global flag, or an `ios' flag pressed into service.  Or maybe, at
+     the risk of inconsistency, the `mpq_class' `operator>>' could
+     canonicalize and leave `mpq_t' `operator>>' not doing so, for use
+     on those occasions when that's acceptable.  Send feedback or
+     alternate ideas to <gmp-bugs@gmplib.org>.
+
+Subclassing
+     Subclassing the GMP C++ classes works, but is not currently
+     recommended.
+
+     Expressions involving subclasses resolve correctly (or seem to),
+     but in normal C++ fashion the subclass doesn't inherit
+     constructors and assignments.  There's many of those in the GMP
+     classes, and a good way to reestablish them in a subclass is not
+     yet provided.
+
+Templated Expressions
+     A subtle difficulty exists when using expressions together with
+     application-defined template functions.  Consider the following,
+     with `T' intended to be some numeric type,
+
+          template <class T>
+          T fun (const T &, const T &);
+
+     When used with, say, plain `mpz_class' variables, it works fine:
+     `T' is resolved as `mpz_class'.
+
+          mpz_class f(1), g(2);
+          fun (f, g);    // Good
+
+     But when one of the arguments is an expression, it doesn't work.
+
+          mpz_class f(1), g(2), h(3);
+          fun (f, g+h);  // Bad
+
+     This is because `g+h' ends up being a certain expression template
+     type internal to `gmpxx.h', which the C++ template resolution
+     rules are unable to automatically convert to `mpz_class'.  The
+     workaround is simply to add an explicit cast.
+
+          mpz_class f(1), g(2), h(3);
+          fun (f, mpz_class(g+h));  // Good
+
+     Similarly, within `fun' it may be necessary to cast an expression
+     to type `T' when calling a templated `fun2'.
+
+          template <class T>
+          void fun (T f, T g)
+          {
+            fun2 (f, f+g);     // Bad
+          }
+
+          template <class T>
+          void fun (T f, T g)
+          {
+            fun2 (f, T(f+g));  // Good
+          }
+
+\1f
+File: gmp.info,  Node: BSD Compatible Functions,  Next: Custom Allocation,  Prev: C++ Class Interface,  Up: Top
+
+13 Berkeley MP Compatible Functions
+***********************************
+
+These functions are intended to be fully compatible with the Berkeley MP
+library which is available on many BSD derived U*ix systems.  The
+`--enable-mpbsd' option must be used when building GNU MP to make these
+available (*note Installing GMP::).
+
+   The original Berkeley MP library has a usage restriction: you cannot
+use the same variable as both source and destination in a single
+function call.  The compatible functions in GNU MP do not share this
+restriction--inputs and outputs may overlap.
+
+   It is not recommended that new programs are written using these
+functions.  Apart from the incomplete set of functions, the interface
+for initializing `MINT' objects is more error prone, and the `pow'
+function collides with `pow' in `libm.a'.
+
+   Include the header `mp.h' to get the definition of the necessary
+types and functions.  If you are on a BSD derived system, make sure to
+include GNU `mp.h' if you are going to link the GNU `libmp.a' to your
+program.  This means that you probably need to give the `-I<dir>'
+option to the compiler, where `<dir>' is the directory where you have
+GNU `mp.h'.
+
+ -- Function: MINT * itom (signed short int INITIAL_VALUE)
+     Allocate an integer consisting of a `MINT' object and dynamic limb
+     space.  Initialize the integer to INITIAL_VALUE.  Return a pointer
+     to the `MINT' object.
+
+ -- Function: MINT * xtom (char *INITIAL_VALUE)
+     Allocate an integer consisting of a `MINT' object and dynamic limb
+     space.  Initialize the integer from INITIAL_VALUE, a hexadecimal,
+     null-terminated C string.  Return a pointer to the `MINT' object.
+
+ -- Function: void move (MINT *SRC, MINT *DEST)
+     Set DEST to SRC by copying.  Both variables must be previously
+     initialized.
+
+ -- Function: void madd (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
+     Add SRC_1 and SRC_2 and put the sum in DESTINATION.
+
+ -- Function: void msub (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
+     Subtract SRC_2 from SRC_1 and put the difference in DESTINATION.
+
+ -- Function: void mult (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
+     Multiply SRC_1 and SRC_2 and put the product in DESTINATION.
+
+ -- Function: void mdiv (MINT *DIVIDEND, MINT *DIVISOR, MINT *QUOTIENT,
+          MINT *REMAINDER)
+ -- Function: void sdiv (MINT *DIVIDEND, signed short int DIVISOR, MINT
+          *QUOTIENT, signed short int *REMAINDER)
+     Set QUOTIENT to DIVIDEND/DIVISOR, and REMAINDER to DIVIDEND mod
+     DIVISOR.  The quotient is rounded towards zero; the remainder has
+     the same sign as the dividend unless it is zero.
+
+     Some implementations of these functions work differently--or not
+     at all--for negative arguments.
+
+ -- Function: void msqrt (MINT *OP, MINT *ROOT, MINT *REMAINDER)
+     Set ROOT to the truncated integer part of the square root of OP,
+     like `mpz_sqrt'.  Set REMAINDER to OP-ROOT*ROOT, i.e.  zero if OP
+     is a perfect square.
+
+     If ROOT and REMAINDER are the same variable, the results are
+     undefined.
+
+ -- Function: void pow (MINT *BASE, MINT *EXP, MINT *MOD, MINT *DEST)
+     Set DEST to (BASE raised to EXP) modulo MOD.
+
+     Note that the name `pow' clashes with `pow' from the standard C
+     math library (*note Exponentiation and Logarithms: (libc)Exponents
+     and Logarithms.).  An application will only be able to use one or
+     the other.
+
+ -- Function: void rpow (MINT *BASE, signed short int EXP, MINT *DEST)
+     Set DEST to BASE raised to EXP.
+
+ -- Function: void gcd (MINT *OP1, MINT *OP2, MINT *RES)
+     Set RES to the greatest common divisor of OP1 and OP2.
+
+ -- Function: int mcmp (MINT *OP1, MINT *OP2)
+     Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
+     if OP1 = OP2, and a negative value if OP1 < OP2.
+
+ -- Function: void min (MINT *DEST)
+     Input a decimal string from `stdin', and put the read integer in
+     DEST.  SPC and TAB are allowed in the number string, and are
+     ignored.
+
+ -- Function: void mout (MINT *SRC)
+     Output SRC to `stdout', as a decimal string.  Also output a
+     newline.
+
+ -- Function: char * mtox (MINT *OP)
+     Convert OP to a hexadecimal string, and return a pointer to the
+     string.  The returned string is allocated using the default memory
+     allocation function, `malloc' by default.  It will be
+     `strlen(str)+1' bytes, that being exactly enough for the string
+     and null-terminator.
+
+ -- Function: void mfree (MINT *OP)
+     De-allocate, the space used by OP.  *This function should only be
+     passed a value returned by `itom' or `xtom'.*
+
+\1f
+File: gmp.info,  Node: Custom Allocation,  Next: Language Bindings,  Prev: BSD Compatible Functions,  Up: Top
+
+14 Custom Allocation
+********************
+
+By default GMP uses `malloc', `realloc' and `free' for memory
+allocation, and if they fail GMP prints a message to the standard error
+output and terminates the program.
+
+   Alternate functions can be specified, to allocate memory in a
+different way or to have a different error action on running out of
+memory.
+
+   This feature is available in the Berkeley compatibility library
+(*note BSD Compatible Functions::) as well as the main GMP library.
+
+ -- Function: void mp_set_memory_functions (
+          void *(*ALLOC_FUNC_PTR) (size_t),
+          void *(*REALLOC_FUNC_PTR) (void *, size_t, size_t),
+          void (*FREE_FUNC_PTR) (void *, size_t))
+     Replace the current allocation functions from the arguments.  If
+     an argument is `NULL', the corresponding default function is used.
+
+     These functions will be used for all memory allocation done by
+     GMP, apart from temporary space from `alloca' if that function is
+     available and GMP is configured to use it (*note Build Options::).
+
+     *Be sure to call `mp_set_memory_functions' only when there are no
+     active GMP objects allocated using the previous memory functions!
+     Usually that means calling it before any other GMP function.*
+
+   The functions supplied should fit the following declarations:
+
+ -- Function: void * allocate_function (size_t ALLOC_SIZE)
+     Return a pointer to newly allocated space with at least ALLOC_SIZE
+     bytes.
+
+ -- Function: void * reallocate_function (void *PTR, size_t OLD_SIZE,
+          size_t NEW_SIZE)
+     Resize a previously allocated block PTR of OLD_SIZE bytes to be
+     NEW_SIZE bytes.
+
+     The block may be moved if necessary or if desired, and in that
+     case the smaller of OLD_SIZE and NEW_SIZE bytes must be copied to
+     the new location.  The return value is a pointer to the resized
+     block, that being the new location if moved or just PTR if not.
+
+     PTR is never `NULL', it's always a previously allocated block.
+     NEW_SIZE may be bigger or smaller than OLD_SIZE.
+
+ -- Function: void free_function (void *PTR, size_t SIZE)
+     De-allocate the space pointed to by PTR.
+
+     PTR is never `NULL', it's always a previously allocated block of
+     SIZE bytes.
+
+   A "byte" here means the unit used by the `sizeof' operator.
+
+   The REALLOCATE_FUNCTION parameter OLD_SIZE and the FREE_FUNCTION
+parameter SIZE are passed for convenience, but of course they can be
+ignored if not needed by an implementation.  The default functions
+using `malloc' and friends for instance don't use them.
+
+   No error return is allowed from any of these functions, if they
+return then they must have performed the specified operation.  In
+particular note that ALLOCATE_FUNCTION or REALLOCATE_FUNCTION mustn't
+return `NULL'.
+
+   Getting a different fatal error action is a good use for custom
+allocation functions, for example giving a graphical dialog rather than
+the default print to `stderr'.  How much is possible when genuinely out
+of memory is another question though.
+
+   There's currently no defined way for the allocation functions to
+recover from an error such as out of memory, they must terminate
+program execution.  A `longjmp' or throwing a C++ exception will have
+undefined results.  This may change in the future.
+
+   GMP may use allocated blocks to hold pointers to other allocated
+blocks.  This will limit the assumptions a conservative garbage
+collection scheme can make.
+
+   Since the default GMP allocation uses `malloc' and friends, those
+functions will be linked in even if the first thing a program does is an
+`mp_set_memory_functions'.  It's necessary to change the GMP sources if
+this is a problem.
+
+
+ -- Function: void mp_get_memory_functions (
+          void *(**ALLOC_FUNC_PTR) (size_t),
+          void *(**REALLOC_FUNC_PTR) (void *, size_t, size_t),
+          void (**FREE_FUNC_PTR) (void *, size_t))
+     Get the current allocation functions, storing function pointers to
+     the locations given by the arguments.  If an argument is `NULL',
+     that function pointer is not stored.
+
+     For example, to get just the current free function,
+
+          void (*freefunc) (void *, size_t);
+
+          mp_get_memory_functions (NULL, NULL, &freefunc);
+
+\1f
+File: gmp.info,  Node: Language Bindings,  Next: Algorithms,  Prev: Custom Allocation,  Up: Top
+
+15 Language Bindings
+********************
+
+The following packages and projects offer access to GMP from languages
+other than C, though perhaps with varying levels of functionality and
+efficiency.
+
+
+C++
+        * GMP C++ class interface, *note C++ Class Interface::
+          Straightforward interface, expression templates to eliminate
+          temporaries.
+
+        * ALP `http://www-sop.inria.fr/saga/logiciels/ALP/'
+          Linear algebra and polynomials using templates.
+
+        * Arithmos `http://cant.ua.ac.be/old/arithmos/'
+          Rationals with infinities and square roots.
+
+        * CLN `http://www.ginac.de/CLN/'
+          High level classes for arithmetic.
+
+        * LiDIA `http://www.cdc.informatik.tu-darmstadt.de/TI/LiDIA/'
+          A C++ library for computational number theory.
+
+        * Linbox `http://www.linalg.org/'
+          Sparse vectors and matrices.
+
+        * NTL `http://www.shoup.net/ntl/'
+          A C++ number theory library.
+
+Eiffel
+        * Eiffelroom `http://www.eiffelroom.org/node/442'
+
+Fortran
+        * Omni F77 `http://phase.hpcc.jp/Omni/home.html'
+          Arbitrary precision floats.
+
+Haskell
+        * Glasgow Haskell Compiler `http://www.haskell.org/ghc/'
+
+Java
+        * Kaffe `http://www.kaffe.org/'
+
+        * Kissme `http://kissme.sourceforge.net/'
+
+Lisp
+        * GNU Common Lisp `http://www.gnu.org/software/gcl/gcl.html'
+
+        * Librep `http://librep.sourceforge.net/'
+
+        * XEmacs (21.5.18 beta and up) `http://www.xemacs.org'
+          Optional big integers, rationals and floats using GMP.
+
+M4
+        * GNU m4 betas `http://www.seindal.dk/rene/gnu/'
+          Optionally provides an arbitrary precision `mpeval'.
+
+ML
+        * MLton compiler `http://mlton.org/'
+
+Objective Caml
+        * MLGMP `http://www.di.ens.fr/~monniaux/programmes.html.en'
+
+        * Numerix `http://pauillac.inria.fr/~quercia/'
+          Optionally using GMP.
+
+Oz
+        * Mozart `http://www.mozart-oz.org/'
+
+Pascal
+        * GNU Pascal Compiler `http://www.gnu-pascal.de/'
+          GMP unit.
+
+        * Numerix `http://pauillac.inria.fr/~quercia/'
+          For Free Pascal, optionally using GMP.
+
+Perl
+        * GMP module, see `demos/perl' in the GMP sources (*note
+          Demonstration Programs::).
+
+        * Math::GMP `http://www.cpan.org/'
+          Compatible with Math::BigInt, but not as many functions as
+          the GMP module above.
+
+        * Math::BigInt::GMP `http://www.cpan.org/'
+          Plug Math::GMP into normal Math::BigInt operations.
+
+Pike
+        * mpz module in the standard distribution,
+          `http://pike.ida.liu.se/'
+
+Prolog
+        * SWI Prolog `http://www.swi-prolog.org/'
+          Arbitrary precision floats.
+
+Python
+        * GMPY `http://code.google.com/p/gmpy/'
+
+Ruby
+        * http://rubygems.org/gems/gmp
+
+Scheme
+        * GNU Guile (upcoming 1.8)
+          `http://www.gnu.org/software/guile/guile.html'
+
+        * RScheme `http://www.rscheme.org/'
+
+        * STklos `http://www.stklos.org/'
+
+Smalltalk
+        * GNU Smalltalk
+          `http://www.smalltalk.org/versions/GNUSmalltalk.html'
+
+Other
+        * Axiom `http://savannah.nongnu.org/projects/axiom'
+          Computer algebra using GCL.
+
+        * DrGenius `http://drgenius.seul.org/'
+          Geometry system and mathematical programming language.
+
+        * GiNaC `http://www.ginac.de/'
+          C++ computer algebra using CLN.
+
+        * GOO `http://www.googoogaga.org/'
+          Dynamic object oriented language.
+
+        * Maxima `http://www.ma.utexas.edu/users/wfs/maxima.html'
+          Macsyma computer algebra using GCL.
+
+        * Q `http://q-lang.sourceforge.net/'
+          Equational programming system.
+
+        * Regina `http://regina.sourceforge.net/'
+          Topological calculator.
+
+        * Yacas `yacas.sourceforge.net'
+          Yet another computer algebra system.
+
+
+\1f
+File: gmp.info,  Node: Algorithms,  Next: Internals,  Prev: Language Bindings,  Up: Top
+
+16 Algorithms
+*************
+
+This chapter is an introduction to some of the algorithms used for
+various GMP operations.  The code is likely to be hard to understand
+without knowing something about the algorithms.
+
+   Some GMP internals are mentioned, but applications that expect to be
+compatible with future GMP releases should take care to use only the
+documented functions.
+
+* Menu:
+
+* Multiplication Algorithms::
+* Division Algorithms::
+* Greatest Common Divisor Algorithms::
+* Powering Algorithms::
+* Root Extraction Algorithms::
+* Radix Conversion Algorithms::
+* Other Algorithms::
+* Assembly Coding::
+
+\1f
+File: gmp.info,  Node: Multiplication Algorithms,  Next: Division Algorithms,  Prev: Algorithms,  Up: Algorithms
+
+16.1 Multiplication
+===================
+
+NxN limb multiplications and squares are done using one of seven
+algorithms, as the size N increases.
+
+     Algorithm      Threshold
+     Basecase       (none)
+     Karatsuba      `MUL_TOOM22_THRESHOLD'
+     Toom-3         `MUL_TOOM33_THRESHOLD'
+     Toom-4         `MUL_TOOM44_THRESHOLD'
+     Toom-6.5       `MUL_TOOM6H_THRESHOLD'
+     Toom-8.5       `MUL_TOOM8H_THRESHOLD'
+     FFT            `MUL_FFT_THRESHOLD'
+
+   Similarly for squaring, with the `SQR' thresholds.
+
+   NxM multiplications of operands with different sizes above
+`MUL_TOOM22_THRESHOLD' are currently done by special Toom-inspired
+algorithms or directly with FFT, depending on operand size (*note
+Unbalanced Multiplication::).
+
+* Menu:
+
+* Basecase Multiplication::
+* Karatsuba Multiplication::
+* Toom 3-Way Multiplication::
+* Toom 4-Way Multiplication::
+* Higher degree Toom'n'half::
+* FFT Multiplication::
+* Other Multiplication::
+* Unbalanced Multiplication::
+
+\1f
+File: gmp.info,  Node: Basecase Multiplication,  Next: Karatsuba Multiplication,  Prev: Multiplication Algorithms,  Up: Multiplication Algorithms
+
+16.1.1 Basecase Multiplication
+------------------------------
+
+Basecase NxM multiplication is a straightforward rectangular set of
+cross-products, the same as long multiplication done by hand and for
+that reason sometimes known as the schoolbook or grammar school method.
+This is an O(N*M) algorithm.  See Knuth section 4.3.1 algorithm M
+(*note References::), and the `mpn/generic/mul_basecase.c' code.
+
+   Assembly implementations of `mpn_mul_basecase' are essentially the
+same as the generic C code, but have all the usual assembly tricks and
+obscurities introduced for speed.
+
+   A square can be done in roughly half the time of a multiply, by
+using the fact that the cross products above and below the diagonal are
+the same.  A triangle of products below the diagonal is formed, doubled
+(left shift by one bit), and then the products on the diagonal added.
+This can be seen in `mpn/generic/sqr_basecase.c'.  Again the assembly
+implementations take essentially the same approach.
+
+          u0  u1  u2  u3  u4
+        +---+---+---+---+---+
+     u0 | d |   |   |   |   |
+        +---+---+---+---+---+
+     u1 |   | d |   |   |   |
+        +---+---+---+---+---+
+     u2 |   |   | d |   |   |
+        +---+---+---+---+---+
+     u3 |   |   |   | d |   |
+        +---+---+---+---+---+
+     u4 |   |   |   |   | d |
+        +---+---+---+---+---+
+
+   In practice squaring isn't a full 2x faster than multiplying, it's
+usually around 1.5x.  Less than 1.5x probably indicates
+`mpn_sqr_basecase' wants improving on that CPU.
+
+   On some CPUs `mpn_mul_basecase' can be faster than the generic C
+`mpn_sqr_basecase' on some small sizes.  `SQR_BASECASE_THRESHOLD' is
+the size at which to use `mpn_sqr_basecase', this will be zero if that
+routine should be used always.
+
+\1f
+File: gmp.info,  Node: Karatsuba Multiplication,  Next: Toom 3-Way Multiplication,  Prev: Basecase Multiplication,  Up: Multiplication Algorithms
+
+16.1.2 Karatsuba Multiplication
+-------------------------------
+
+The Karatsuba multiplication algorithm is described in Knuth section
+4.3.3 part A, and various other textbooks.  A brief description is
+given here.
+
+   The inputs x and y are treated as each split into two parts of equal
+length (or the most significant part one limb shorter if N is odd).
+
+      high              low
+     +----------+----------+
+     |    x1    |    x0    |
+     +----------+----------+
+
+     +----------+----------+
+     |    y1    |    y0    |
+     +----------+----------+
+
+   Let b be the power of 2 where the split occurs, i.e. if x0 is k
+limbs (y0 the same) then b=2^(k*mp_bits_per_limb).  With that x=x1*b+x0
+and y=y1*b+y0, and the following holds,
+
+     x*y = (b^2+b)*x1*y1 - b*(x1-x0)*(y1-y0) + (b+1)*x0*y0
+
+   This formula means doing only three multiplies of (N/2)x(N/2) limbs,
+whereas a basecase multiply of NxN limbs is equivalent to four
+multiplies of (N/2)x(N/2).  The factors (b^2+b) etc represent the
+positions where the three products must be added.
+
+      high                              low
+     +--------+--------+ +--------+--------+
+     |      x1*y1      | |      x0*y0      |
+     +--------+--------+ +--------+--------+
+               +--------+--------+
+           add |      x1*y1      |
+               +--------+--------+
+               +--------+--------+
+           add |      x0*y0      |
+               +--------+--------+
+               +--------+--------+
+           sub | (x1-x0)*(y1-y0) |
+               +--------+--------+
+
+   The term (x1-x0)*(y1-y0) is best calculated as an absolute value,
+and the sign used to choose to add or subtract.  Notice the sum
+high(x0*y0)+low(x1*y1) occurs twice, so it's possible to do 5*k limb
+additions, rather than 6*k, but in GMP extra function call overheads
+outweigh the saving.
+
+   Squaring is similar to multiplying, but with x=y the formula reduces
+to an equivalent with three squares,
+
+     x^2 = (b^2+b)*x1^2 - b*(x1-x0)^2 + (b+1)*x0^2
+
+   The final result is accumulated from those three squares the same
+way as for the three multiplies above.  The middle term (x1-x0)^2 is now
+always positive.
+
+   A similar formula for both multiplying and squaring can be
+constructed with a middle term (x1+x0)*(y1+y0).  But those sums can
+exceed k limbs, leading to more carry handling and additions than the
+form above.
+
+   Karatsuba multiplication is asymptotically an O(N^1.585) algorithm,
+the exponent being log(3)/log(2), representing 3 multiplies each 1/2
+the size of the inputs.  This is a big improvement over the basecase
+multiply at O(N^2) and the advantage soon overcomes the extra additions
+Karatsuba performs.  `MUL_TOOM22_THRESHOLD' can be as little as 10
+limbs.  The `SQR' threshold is usually about twice the `MUL'.
+
+   The basecase algorithm will take a time of the form M(N) = a*N^2 +
+b*N + c and the Karatsuba algorithm K(N) = 3*M(N/2) + d*N + e, which
+expands to K(N) = 3/4*a*N^2 + 3/2*b*N + 3*c + d*N + e.  The factor 3/4
+for a means per-crossproduct speedups in the basecase code will
+increase the threshold since they benefit M(N) more than K(N).  And
+conversely the 3/2 for b means linear style speedups of b will increase
+the threshold since they benefit K(N) more than M(N).  The latter can
+be seen for instance when adding an optimized `mpn_sqr_diagonal' to
+`mpn_sqr_basecase'.  Of course all speedups reduce total time, and in
+that sense the algorithm thresholds are merely of academic interest.
+
+\1f
+File: gmp.info,  Node: Toom 3-Way Multiplication,  Next: Toom 4-Way Multiplication,  Prev: Karatsuba Multiplication,  Up: Multiplication Algorithms
+
+16.1.3 Toom 3-Way Multiplication
+--------------------------------
+
+The Karatsuba formula is the simplest case of a general approach to
+splitting inputs that leads to both Toom and FFT algorithms.  A
+description of Toom can be found in Knuth section 4.3.3, with an
+example 3-way calculation after Theorem A.  The 3-way form used in GMP
+is described here.
+
+   The operands are each considered split into 3 pieces of equal length
+(or the most significant part 1 or 2 limbs shorter than the other two).
+
+      high                         low
+     +----------+----------+----------+
+     |    x2    |    x1    |    x0    |
+     +----------+----------+----------+
+
+     +----------+----------+----------+
+     |    y2    |    y1    |    y0    |
+     +----------+----------+----------+
+
+These parts are treated as the coefficients of two polynomials
+
+     X(t) = x2*t^2 + x1*t + x0
+     Y(t) = y2*t^2 + y1*t + y0
+
+   Let b equal the power of 2 which is the size of the x0, x1, y0 and
+y1 pieces, i.e. if they're k limbs each then b=2^(k*mp_bits_per_limb).
+With this x=X(b) and y=Y(b).
+
+   Let a polynomial W(t)=X(t)*Y(t) and suppose its coefficients are
+
+     W(t) = w4*t^4 + w3*t^3 + w2*t^2 + w1*t + w0
+
+   The w[i] are going to be determined, and when they are they'll give
+the final result using w=W(b), since x*y=X(b)*Y(b)=W(b).  The
+coefficients will be roughly b^2 each, and the final W(b) will be an
+addition like,
+
+      high                                        low
+     +-------+-------+
+     |       w4      |
+     +-------+-------+
+            +--------+-------+
+            |        w3      |
+            +--------+-------+
+                    +--------+-------+
+                    |        w2      |
+                    +--------+-------+
+                            +--------+-------+
+                            |        w1      |
+                            +--------+-------+
+                                     +-------+-------+
+                                     |       w0      |
+                                     +-------+-------+
+
+   The w[i] coefficients could be formed by a simple set of cross
+products, like w4=x2*y2, w3=x2*y1+x1*y2, w2=x2*y0+x1*y1+x0*y2 etc, but
+this would need all nine x[i]*y[j] for i,j=0,1,2, and would be
+equivalent merely to a basecase multiply.  Instead the following
+approach is used.
+
+   X(t) and Y(t) are evaluated and multiplied at 5 points, giving
+values of W(t) at those points.  In GMP the following points are used,
+
+     Point    Value
+     t=0      x0 * y0, which gives w0 immediately
+     t=1      (x2+x1+x0) * (y2+y1+y0)
+     t=-1     (x2-x1+x0) * (y2-y1+y0)
+     t=2      (4*x2+2*x1+x0) * (4*y2+2*y1+y0)
+     t=inf    x2 * y2, which gives w4 immediately
+
+   At t=-1 the values can be negative and that's handled using the
+absolute values and tracking the sign separately.  At t=inf the value
+is actually X(t)*Y(t)/t^4 in the limit as t approaches infinity, but
+it's much easier to think of as simply x2*y2 giving w4 immediately
+(much like x0*y0 at t=0 gives w0 immediately).
+
+   Each of the points substituted into W(t)=w4*t^4+...+w0 gives a
+linear combination of the w[i] coefficients, and the value of those
+combinations has just been calculated.
+
+     W(0)   =                              w0
+     W(1)   =    w4 +   w3 +   w2 +   w1 + w0
+     W(-1)  =    w4 -   w3 +   w2 -   w1 + w0
+     W(2)   = 16*w4 + 8*w3 + 4*w2 + 2*w1 + w0
+     W(inf) =    w4
+
+   This is a set of five equations in five unknowns, and some
+elementary linear algebra quickly isolates each w[i].  This involves
+adding or subtracting one W(t) value from another, and a couple of
+divisions by powers of 2 and one division by 3, the latter using the
+special `mpn_divexact_by3' (*note Exact Division::).
+
+   The conversion of W(t) values to the coefficients is interpolation.
+A polynomial of degree 4 like W(t) is uniquely determined by values
+known at 5 different points.  The points are arbitrary and can be
+chosen to make the linear equations come out with a convenient set of
+steps for quickly isolating the w[i].
+
+   Squaring follows the same procedure as multiplication, but there's
+only one X(t) and it's evaluated at the 5 points, and those values
+squared to give values of W(t).  The interpolation is then identical,
+and in fact the same `toom_interpolate_5pts' subroutine is used for
+both squaring and multiplying.
+
+   Toom-3 is asymptotically O(N^1.465), the exponent being
+log(5)/log(3), representing 5 recursive multiplies of 1/3 the original
+size each.  This is an improvement over Karatsuba at O(N^1.585), though
+Toom does more work in the evaluation and interpolation and so it only
+realizes its advantage above a certain size.
+
+   Near the crossover between Toom-3 and Karatsuba there's generally a
+range of sizes where the difference between the two is small.
+`MUL_TOOM33_THRESHOLD' is a somewhat arbitrary point in that range and
+successive runs of the tune program can give different values due to
+small variations in measuring.  A graph of time versus size for the two
+shows the effect, see `tune/README'.
+
+   At the fairly small sizes where the Toom-3 thresholds occur it's
+worth remembering that the asymptotic behaviour for Karatsuba and
+Toom-3 can't be expected to make accurate predictions, due of course to
+the big influence of all sorts of overheads, and the fact that only a
+few recursions of each are being performed.  Even at large sizes
+there's a good chance machine dependent effects like cache architecture
+will mean actual performance deviates from what might be predicted.
+
+   The formula given for the Karatsuba algorithm (*note Karatsuba
+Multiplication::) has an equivalent for Toom-3 involving only five
+multiplies, but this would be complicated and unenlightening.
+
+   An alternate view of Toom-3 can be found in Zuras (*note
+References::), using a vector to represent the x and y splits and a
+matrix multiplication for the evaluation and interpolation stages.  The
+matrix inverses are not meant to be actually used, and they have
+elements with values much greater than in fact arise in the
+interpolation steps.  The diagram shown for the 3-way is attractive,
+but again doesn't have to be implemented that way and for example with
+a bit of rearrangement just one division by 6 can be done.
+
+\1f
+File: gmp.info,  Node: Toom 4-Way Multiplication,  Next: Higher degree Toom'n'half,  Prev: Toom 3-Way Multiplication,  Up: Multiplication Algorithms
+
+16.1.4 Toom 4-Way Multiplication
+--------------------------------
+
+Karatsuba and Toom-3 split the operands into 2 and 3 coefficients,
+respectively.  Toom-4 analogously splits the operands into 4
+coefficients.  Using the notation from the section on Toom-3
+multiplication, we form two polynomials:
+
+     X(t) = x3*t^3 + x2*t^2 + x1*t + x0
+     Y(t) = y3*t^3 + y2*t^2 + y1*t + y0
+
+   X(t) and Y(t) are evaluated and multiplied at 7 points, giving
+values of W(t) at those points.  In GMP the following points are used,
+
+     Point    Value
+     t=0      x0 * y0, which gives w0 immediately
+     t=1/2    (x3+2*x2+4*x1+8*x0) * (y3+2*y2+4*y1+8*y0)
+     t=-1/2   (-x3+2*x2-4*x1+8*x0) * (-y3+2*y2-4*y1+8*y0)
+     t=1      (x3+x2+x1+x0) * (y3+y2+y1+y0)
+     t=-1     (-x3+x2-x1+x0) * (-y3+y2-y1+y0)
+     t=2      (8*x3+4*x2+2*x1+x0) * (8*y3+4*y2+2*y1+y0)
+     t=inf    x3 * y3, which gives w6 immediately
+
+   The number of additions and subtractions for Toom-4 is much larger
+than for Toom-3.  But several subexpressions occur multiple times, for
+example x2+x0, occurs for both t=1 and t=-1.
+
+   Toom-4 is asymptotically O(N^1.404), the exponent being
+log(7)/log(4), representing 7 recursive multiplies of 1/4 the original
+size each.
+
+\1f
+File: gmp.info,  Node: Higher degree Toom'n'half,  Next: FFT Multiplication,  Prev: Toom 4-Way Multiplication,  Up: Multiplication Algorithms
+
+16.1.5 Higher degree Toom'n'half
+--------------------------------
+
+The Toom algorithms described above (*note Toom 3-Way Multiplication::,
+*note Toom 4-Way Multiplication::) generalizes to split into an
+arbitrary number of pieces. In general a split of two equally long
+operands into r pieces leads to evaluations and pointwise
+multiplications done at 2*r-1 points. To fully exploit symmetries it
+would be better to have a multiple of 4 points, that's why for higher
+degree Toom'n'half is used.
+
+   Toom'n'half means that the existence of one more piece is considered
+for a single operand. It can be virtual, i.e. zero, or real, when the
+two operand are not exactly balanced. By chosing an even r, Toom-r+1/2
+requires 2r points, a multiple of four.
+
+   The four-plets of points inlcude 0, inf, +1, -1 and +-2^i, +-2^-i .
+Each of them giving shortcuts for the evaluation phase and for some
+steps in the interpolation phase. Further tricks are used to reduce the
+memory footprint of the whole multiplication algorithm to a memory
+buffer equanl in size to the result of the product.
+
+   Current GMP uses both Toom-6'n'half and Toom-8'n'half.
+
+\1f
+File: gmp.info,  Node: FFT Multiplication,  Next: Other Multiplication,  Prev: Higher degree Toom'n'half,  Up: Multiplication Algorithms
+
+16.1.6 FFT Multiplication
+-------------------------
+
+At large to very large sizes a Fermat style FFT multiplication is used,
+following Schönhage and Strassen (*note References::).  Descriptions of
+FFTs in various forms can be found in many textbooks, for instance
+Knuth section 4.3.3 part C or Lipson chapter IX.  A brief description
+of the form used in GMP is given here.
+
+   The multiplication done is x*y mod 2^N+1, for a given N.  A full
+product x*y is obtained by choosing N>=bits(x)+bits(y) and padding x
+and y with high zero limbs.  The modular product is the native form for
+the algorithm, so padding to get a full product is unavoidable.
+
+   The algorithm follows a split, evaluate, pointwise multiply,
+interpolate and combine similar to that described above for Karatsuba
+and Toom-3.  A k parameter controls the split, with an FFT-k splitting
+into 2^k pieces of M=N/2^k bits each.  N must be a multiple of
+(2^k)*mp_bits_per_limb so the split falls on limb boundaries, avoiding
+bit shifts in the split and combine stages.
+
+   The evaluations, pointwise multiplications, and interpolation, are
+all done modulo 2^N'+1 where N' is 2M+k+3 rounded up to a multiple of
+2^k and of `mp_bits_per_limb'.  The results of interpolation will be
+the following negacyclic convolution of the input pieces, and the
+choice of N' ensures these sums aren't truncated.
+
+                ---
+                \         b
+     w[n] =     /     (-1) * x[i] * y[j]
+                ---
+            i+j==b*2^k+n
+               b=0,1
+
+   The points used for the evaluation are g^i for i=0 to 2^k-1 where
+g=2^(2N'/2^k).  g is a 2^k'th root of unity mod 2^N'+1, which produces
+necessary cancellations at the interpolation stage, and it's also a
+power of 2 so the fast Fourier transforms used for the evaluation and
+interpolation do only shifts, adds and negations.
+
+   The pointwise multiplications are done modulo 2^N'+1 and either
+recurse into a further FFT or use a plain multiplication (Toom-3,
+Karatsuba or basecase), whichever is optimal at the size N'.  The
+interpolation is an inverse fast Fourier transform.  The resulting set
+of sums of x[i]*y[j] are added at appropriate offsets to give the final
+result.
+
+   Squaring is the same, but x is the only input so it's one transform
+at the evaluate stage and the pointwise multiplies are squares.  The
+interpolation is the same.
+
+   For a mod 2^N+1 product, an FFT-k is an O(N^(k/(k-1))) algorithm,
+the exponent representing 2^k recursed modular multiplies each
+1/2^(k-1) the size of the original.  Each successive k is an asymptotic
+improvement, but overheads mean each is only faster at bigger and
+bigger sizes.  In the code, `MUL_FFT_TABLE' and `SQR_FFT_TABLE' are the
+thresholds where each k is used.  Each new k effectively swaps some
+multiplying for some shifts, adds and overheads.
+
+   A mod 2^N+1 product can be formed with a normal NxN->2N bit multiply
+plus a subtraction, so an FFT and Toom-3 etc can be compared directly.
+A k=4 FFT at O(N^1.333) can be expected to be the first faster than
+Toom-3 at O(N^1.465).  In practice this is what's found, with
+`MUL_FFT_MODF_THRESHOLD' and `SQR_FFT_MODF_THRESHOLD' being between 300
+and 1000 limbs, depending on the CPU.  So far it's been found that only
+very large FFTs recurse into pointwise multiplies above these sizes.
+
+   When an FFT is to give a full product, the change of N to 2N doesn't
+alter the theoretical complexity for a given k, but for the purposes of
+considering where an FFT might be first used it can be assumed that the
+FFT is recursing into a normal multiply and that on that basis it's
+doing 2^k recursed multiplies each 1/2^(k-2) the size of the inputs,
+making it O(N^(k/(k-2))).  This would mean k=7 at O(N^1.4) would be the
+first FFT faster than Toom-3.  In practice `MUL_FFT_THRESHOLD' and
+`SQR_FFT_THRESHOLD' have been found to be in the k=8 range, somewhere
+between 3000 and 10000 limbs.
+
+   The way N is split into 2^k pieces and then 2M+k+3 is rounded up to
+a multiple of 2^k and `mp_bits_per_limb' means that when
+2^k>=mp_bits_per_limb the effective N is a multiple of 2^(2k-1) bits.
+The +k+3 means some values of N just under such a multiple will be
+rounded to the next.  The complexity calculations above assume that a
+favourable size is used, meaning one which isn't padded through
+rounding, and it's also assumed that the extra +k+3 bits are negligible
+at typical FFT sizes.
+
+   The practical effect of the 2^(2k-1) constraint is to introduce a
+step-effect into measured speeds.  For example k=8 will round N up to a
+multiple of 32768 bits, so for a 32-bit limb there'll be 512 limb
+groups of sizes for which `mpn_mul_n' runs at the same speed.  Or for
+k=9 groups of 2048 limbs, k=10 groups of 8192 limbs, etc.  In practice
+it's been found each k is used at quite small multiples of its size
+constraint and so the step effect is quite noticeable in a time versus
+size graph.
+
+   The threshold determinations currently measure at the mid-points of
+size steps, but this is sub-optimal since at the start of a new step it
+can happen that it's better to go back to the previous k for a while.
+Something more sophisticated for `MUL_FFT_TABLE' and `SQR_FFT_TABLE'
+will be needed.
+
+\1f
+File: gmp.info,  Node: Other Multiplication,  Next: Unbalanced Multiplication,  Prev: FFT Multiplication,  Up: Multiplication Algorithms
+
+16.1.7 Other Multiplication
+---------------------------
+
+The Toom algorithms described above (*note Toom 3-Way Multiplication::,
+*note Toom 4-Way Multiplication::) generalizes to split into an
+arbitrary number of pieces, as per Knuth section 4.3.3 algorithm C.
+This is not currently used.  The notes here are merely for interest.
+
+   In general a split into r+1 pieces is made, and evaluations and
+pointwise multiplications done at 2*r+1 points.  A 4-way split does 7
+pointwise multiplies, 5-way does 9, etc.  Asymptotically an (r+1)-way
+algorithm is O(N^(log(2*r+1)/log(r+1))).  Only the pointwise
+multiplications count towards big-O complexity, but the time spent in
+the evaluate and interpolate stages grows with r and has a significant
+practical impact, with the asymptotic advantage of each r realized only
+at bigger and bigger sizes.  The overheads grow as O(N*r), whereas in
+an r=2^k FFT they grow only as O(N*log(r)).
+
+   Knuth algorithm C evaluates at points 0,1,2,...,2*r, but exercise 4
+uses -r,...,0,...,r and the latter saves some small multiplies in the
+evaluate stage (or rather trades them for additions), and has a further
+saving of nearly half the interpolate steps.  The idea is to separate
+odd and even final coefficients and then perform algorithm C steps C7
+and C8 on them separately.  The divisors at step C7 become j^2 and the
+multipliers at C8 become 2*t*j-j^2.
+
+   Splitting odd and even parts through positive and negative points
+can be thought of as using -1 as a square root of unity.  If a 4th root
+of unity was available then a further split and speedup would be
+possible, but no such root exists for plain integers.  Going to complex
+integers with i=sqrt(-1) doesn't help, essentially because in Cartesian
+form it takes three real multiplies to do a complex multiply.  The
+existence of 2^k'th roots of unity in a suitable ring or field lets the
+fast Fourier transform keep splitting and get to O(N*log(r)).
+
+   Floating point FFTs use complex numbers approximating Nth roots of
+unity.  Some processors have special support for such FFTs.  But these
+are not used in GMP since it's very difficult to guarantee an exact
+result (to some number of bits).  An occasional difference of 1 in the
+last bit might not matter to a typical signal processing algorithm, but
+is of course of vital importance to GMP.
+
+\1f
+File: gmp.info,  Node: Unbalanced Multiplication,  Prev: Other Multiplication,  Up: Multiplication Algorithms
+
+16.1.8 Unbalanced Multiplication
+--------------------------------
+
+Multiplication of operands with different sizes, both below
+`MUL_TOOM22_THRESHOLD' are done with plain schoolbook multiplication
+(*note Basecase Multiplication::).
+
+   For really large operands, we invoke FFT directly.
+
+   For operands between these sizes, we use Toom inspired algorithms
+suggested by Alberto Zanoni and Marco Bodrato.  The idea is to split
+the operands into polynomials of different degree.  GMP currently
+splits the smaller operand onto 2 coefficients, i.e., a polynomial of
+degree 1, but the larger operand can be split into 2, 3, or 4
+coefficients, i.e., a polynomial of degree 1 to 3.
+
+\1f
+File: gmp.info,  Node: Division Algorithms,  Next: Greatest Common Divisor Algorithms,  Prev: Multiplication Algorithms,  Up: Algorithms
+
+16.2 Division Algorithms
+========================
+
+* Menu:
+
+* Single Limb Division::
+* Basecase Division::
+* Divide and Conquer Division::
+* Block-Wise Barrett Division::
+* Exact Division::
+* Exact Remainder::
+* Small Quotient Division::
+
+\1f
+File: gmp.info,  Node: Single Limb Division,  Next: Basecase Division,  Prev: Division Algorithms,  Up: Division Algorithms
+
+16.2.1 Single Limb Division
+---------------------------
+
+Nx1 division is implemented using repeated 2x1 divisions from high to
+low, either with a hardware divide instruction or a multiplication by
+inverse, whichever is best on a given CPU.
+
+   The multiply by inverse follows "Improved division by invariant
+integers" by Möller and Granlund (*note References::) and is
+implemented as `udiv_qrnnd_preinv' in `gmp-impl.h'.  The idea is to
+have a fixed-point approximation to 1/d (see `invert_limb') and then
+multiply by the high limb (plus one bit) of the dividend to get a
+quotient q.  With d normalized (high bit set), q is no more than 1 too
+small.  Subtracting q*d from the dividend gives a remainder, and
+reveals whether q or q-1 is correct.
+
+   The result is a division done with two multiplications and four or
+five arithmetic operations.  On CPUs with low latency multipliers this
+can be much faster than a hardware divide, though the cost of
+calculating the inverse at the start may mean it's only better on
+inputs bigger than say 4 or 5 limbs.
+
+   When a divisor must be normalized, either for the generic C
+`__udiv_qrnnd_c' or the multiply by inverse, the division performed is
+actually a*2^k by d*2^k where a is the dividend and k is the power
+necessary to have the high bit of d*2^k set.  The bit shifts for the
+dividend are usually accomplished "on the fly" meaning by extracting
+the appropriate bits at each step.  Done this way the quotient limbs
+come out aligned ready to store.  When only the remainder is wanted, an
+alternative is to take the dividend limbs unshifted and calculate r = a
+mod d*2^k followed by an extra final step r*2^k mod d*2^k.  This can
+help on CPUs with poor bit shifts or few registers.
+
+   The multiply by inverse can be done two limbs at a time.  The
+calculation is basically the same, but the inverse is two limbs and the
+divisor treated as if padded with a low zero limb.  This means more
+work, since the inverse will need a 2x2 multiply, but the four 1x1s to
+do that are independent and can therefore be done partly or wholly in
+parallel.  Likewise for a 2x1 calculating q*d.  The net effect is to
+process two limbs with roughly the same two multiplies worth of latency
+that one limb at a time gives.  This extends to 3 or 4 limbs at a time,
+though the extra work to apply the inverse will almost certainly soon
+reach the limits of multiplier throughput.
+
+   A similar approach in reverse can be taken to process just half a
+limb at a time if the divisor is only a half limb.  In this case the
+1x1 multiply for the inverse effectively becomes two (1/2)x1 for each
+limb, which can be a saving on CPUs with a fast half limb multiply, or
+in fact if the only multiply is a half limb, and especially if it's not
+pipelined.
+
+\1f
+File: gmp.info,  Node: Basecase Division,  Next: Divide and Conquer Division,  Prev: Single Limb Division,  Up: Division Algorithms
+
+16.2.2 Basecase Division
+------------------------
+
+Basecase NxM division is like long division done by hand, but in base
+2^mp_bits_per_limb.  See Knuth section 4.3.1 algorithm D, and
+`mpn/generic/sb_divrem_mn.c'.
+
+   Briefly stated, while the dividend remains larger than the divisor,
+a high quotient limb is formed and the Nx1 product q*d subtracted at
+the top end of the dividend.  With a normalized divisor (most
+significant bit set), each quotient limb can be formed with a 2x1
+division and a 1x1 multiplication plus some subtractions.  The 2x1
+division is by the high limb of the divisor and is done either with a
+hardware divide or a multiply by inverse (the same as in *note Single
+Limb Division::) whichever is faster.  Such a quotient is sometimes one
+too big, requiring an addback of the divisor, but that happens rarely.
+
+   With Q=N-M being the number of quotient limbs, this is an O(Q*M)
+algorithm and will run at a speed similar to a basecase QxM
+multiplication, differing in fact only in the extra multiply and divide
+for each of the Q quotient limbs.
+
+\1f
+File: gmp.info,  Node: Divide and Conquer Division,  Next: Block-Wise Barrett Division,  Prev: Basecase Division,  Up: Division Algorithms
+
+16.2.3 Divide and Conquer Division
+----------------------------------
+
+For divisors larger than `DC_DIV_QR_THRESHOLD', division is done by
+dividing.  Or to be precise by a recursive divide and conquer algorithm
+based on work by Moenck and Borodin, Jebelean, and Burnikel and Ziegler
+(*note References::).
+
+   The algorithm consists essentially of recognising that a 2NxN
+division can be done with the basecase division algorithm (*note
+Basecase Division::), but using N/2 limbs as a base, not just a single
+limb.  This way the multiplications that arise are (N/2)x(N/2) and can
+take advantage of Karatsuba and higher multiplication algorithms (*note
+Multiplication Algorithms::).  The two "digits" of the quotient are
+formed by recursive Nx(N/2) divisions.
+
+   If the (N/2)x(N/2) multiplies are done with a basecase multiplication
+then the work is about the same as a basecase division, but with more
+function call overheads and with some subtractions separated from the
+multiplies.  These overheads mean that it's only when N/2 is above
+`MUL_TOOM22_THRESHOLD' that divide and conquer is of use.
+
+   `DC_DIV_QR_THRESHOLD' is based on the divisor size N, so it will be
+somewhere above twice `MUL_TOOM22_THRESHOLD', but how much above
+depends on the CPU.  An optimized `mpn_mul_basecase' can lower
+`DC_DIV_QR_THRESHOLD' a little by offering a ready-made advantage over
+repeated `mpn_submul_1' calls.
+
+   Divide and conquer is asymptotically O(M(N)*log(N)) where M(N) is
+the time for an NxN multiplication done with FFTs.  The actual time is
+a sum over multiplications of the recursed sizes, as can be seen near
+the end of section 2.2 of Burnikel and Ziegler.  For example, within
+the Toom-3 range, divide and conquer is 2.63*M(N).  With higher
+algorithms the M(N) term improves and the multiplier tends to log(N).
+In practice, at moderate to large sizes, a 2NxN division is about 2 to
+4 times slower than an NxN multiplication.
+
+\1f
+File: gmp.info,  Node: Block-Wise Barrett Division,  Next: Exact Division,  Prev: Divide and Conquer Division,  Up: Division Algorithms
+
+16.2.4 Block-Wise Barrett Division
+----------------------------------
+
+For the largest divisions, a block-wise Barrett division algorithm is
+used.  Here, the divisor is inverted to a precision determined by the
+relative size of the dividend and divisor.  Blocks of quotient limbs
+are then generated by multiplying blocks from the dividend by the
+inverse.
+
+   Our block-wise algorithm computes a smaller inverse than in the
+plain Barrett algorithm.  For a 2n/n division, the inverse will be just
+ceil(n/2) limbs.
+
+\1f
+File: gmp.info,  Node: Exact Division,  Next: Exact Remainder,  Prev: Block-Wise Barrett Division,  Up: Division Algorithms
+
+16.2.5 Exact Division
+---------------------
+
+A so-called exact division is when the dividend is known to be an exact
+multiple of the divisor.  Jebelean's exact division algorithm uses this
+knowledge to make some significant optimizations (*note References::).
+
+   The idea can be illustrated in decimal for example with 368154
+divided by 543.  Because the low digit of the dividend is 4, the low
+digit of the quotient must be 8.  This is arrived at from 4*7 mod 10,
+using the fact 7 is the modular inverse of 3 (the low digit of the
+divisor), since 3*7 == 1 mod 10.  So 8*543=4344 can be subtracted from
+the dividend leaving 363810.  Notice the low digit has become zero.
+
+   The procedure is repeated at the second digit, with the next
+quotient digit 7 (7 == 1*7 mod 10), subtracting 7*543=3801, leaving
+325800.  And finally at the third digit with quotient digit 6 (8*7 mod
+10), subtracting 6*543=3258 leaving 0.  So the quotient is 678.
+
+   Notice however that the multiplies and subtractions don't need to
+extend past the low three digits of the dividend, since that's enough
+to determine the three quotient digits.  For the last quotient digit no
+subtraction is needed at all.  On a 2NxN division like this one, only
+about half the work of a normal basecase division is necessary.
+
+   For an NxM exact division producing Q=N-M quotient limbs, the saving
+over a normal basecase division is in two parts.  Firstly, each of the
+Q quotient limbs needs only one multiply, not a 2x1 divide and
+multiply.  Secondly, the crossproducts are reduced when Q>M to
+Q*M-M*(M+1)/2, or when Q<=M to Q*(Q-1)/2.  Notice the savings are
+complementary.  If Q is big then many divisions are saved, or if Q is
+small then the crossproducts reduce to a small number.
+
+   The modular inverse used is calculated efficiently by `binvert_limb'
+in `gmp-impl.h'.  This does four multiplies for a 32-bit limb, or six
+for a 64-bit limb.  `tune/modlinv.c' has some alternate implementations
+that might suit processors better at bit twiddling than multiplying.
+
+   The sub-quadratic exact division described by Jebelean in "Exact
+Division with Karatsuba Complexity" is not currently implemented.  It
+uses a rearrangement similar to the divide and conquer for normal
+division (*note Divide and Conquer Division::), but operating from low
+to high.  A further possibility not currently implemented is
+"Bidirectional Exact Integer Division" by Krandick and Jebelean which
+forms quotient limbs from both the high and low ends of the dividend,
+and can halve once more the number of crossproducts needed in a 2NxN
+division.
+
+   A special case exact division by 3 exists in `mpn_divexact_by3',
+supporting Toom-3 multiplication and `mpq' canonicalizations.  It forms
+quotient digits with a multiply by the modular inverse of 3 (which is
+`0xAA..AAB') and uses two comparisons to determine a borrow for the next
+limb.  The multiplications don't need to be on the dependent chain, as
+long as the effect of the borrows is applied, which can help chips with
+pipelined multipliers.
+
+\1f
+File: gmp.info,  Node: Exact Remainder,  Next: Small Quotient Division,  Prev: Exact Division,  Up: Division Algorithms
+
+16.2.6 Exact Remainder
+----------------------
+
+If the exact division algorithm is done with a full subtraction at each
+stage and the dividend isn't a multiple of the divisor, then low zero
+limbs are produced but with a remainder in the high limbs.  For
+dividend a, divisor d, quotient q, and b = 2^mp_bits_per_limb, this
+remainder r is of the form
+
+     a = q*d + r*b^n
+
+   n represents the number of zero limbs produced by the subtractions,
+that being the number of limbs produced for q.  r will be in the range
+0<=r<d and can be viewed as a remainder, but one shifted up by a factor
+of b^n.
+
+   Carrying out full subtractions at each stage means the same number
+of cross products must be done as a normal division, but there's still
+some single limb divisions saved.  When d is a single limb some
+simplifications arise, providing good speedups on a number of
+processors.
+
+   `mpn_divexact_by3', `mpn_modexact_1_odd' and the `mpn_redc_X'
+functions differ subtly in how they return r, leading to some negations
+in the above formula, but all are essentially the same.
+
+   Clearly r is zero when a is a multiple of d, and this leads to
+divisibility or congruence tests which are potentially more efficient
+than a normal division.
+
+   The factor of b^n on r can be ignored in a GCD when d is odd, hence
+the use of `mpn_modexact_1_odd' by `mpn_gcd_1' and `mpz_kronecker_ui'
+etc (*note Greatest Common Divisor Algorithms::).
+
+   Montgomery's REDC method for modular multiplications uses operands
+of the form of x*b^-n and y*b^-n and on calculating (x*b^-n)*(y*b^-n)
+uses the factor of b^n in the exact remainder to reach a product in the
+same form (x*y)*b^-n (*note Modular Powering Algorithm::).
+
+   Notice that r generally gives no useful information about the
+ordinary remainder a mod d since b^n mod d could be anything.  If
+however b^n == 1 mod d, then r is the negative of the ordinary
+remainder.  This occurs whenever d is a factor of b^n-1, as for example
+with 3 in `mpn_divexact_by3'.  For a 32 or 64 bit limb other such
+factors include 5, 17 and 257, but no particular use has been found for
+this.
+
+\1f
+File: gmp.info,  Node: Small Quotient Division,  Prev: Exact Remainder,  Up: Division Algorithms
+
+16.2.7 Small Quotient Division
+------------------------------
+
+An NxM division where the number of quotient limbs Q=N-M is small can
+be optimized somewhat.
+
+   An ordinary basecase division normalizes the divisor by shifting it
+to make the high bit set, shifting the dividend accordingly, and
+shifting the remainder back down at the end of the calculation.  This
+is wasteful if only a few quotient limbs are to be formed.  Instead a
+division of just the top 2*Q limbs of the dividend by the top Q limbs
+of the divisor can be used to form a trial quotient.  This requires
+only those limbs normalized, not the whole of the divisor and dividend.
+
+   A multiply and subtract then applies the trial quotient to the M-Q
+unused limbs of the divisor and N-Q dividend limbs (which includes Q
+limbs remaining from the trial quotient division).  The starting trial
+quotient can be 1 or 2 too big, but all cases of 2 too big and most
+cases of 1 too big are detected by first comparing the most significant
+limbs that will arise from the subtraction.  An addback is done if the
+quotient still turns out to be 1 too big.
+
+   This whole procedure is essentially the same as one step of the
+basecase algorithm done in a Q limb base, though with the trial
+quotient test done only with the high limbs, not an entire Q limb
+"digit" product.  The correctness of this weaker test can be
+established by following the argument of Knuth section 4.3.1 exercise
+20 but with the v2*q>b*r+u2 condition appropriately relaxed.
+
+\1f
+File: gmp.info,  Node: Greatest Common Divisor Algorithms,  Next: Powering Algorithms,  Prev: Division Algorithms,  Up: Algorithms
+
+16.3 Greatest Common Divisor
+============================
+
+* Menu:
+
+* Binary GCD::
+* Lehmer's Algorithm::
+* Subquadratic GCD::
+* Extended GCD::
+* Jacobi Symbol::
+
+\1f
+File: gmp.info,  Node: Binary GCD,  Next: Lehmer's Algorithm,  Prev: Greatest Common Divisor Algorithms,  Up: Greatest Common Divisor Algorithms
+
+16.3.1 Binary GCD
+-----------------
+
+At small sizes GMP uses an O(N^2) binary style GCD.  This is described
+in many textbooks, for example Knuth section 4.5.2 algorithm B.  It
+simply consists of successively reducing odd operands a and b using
+
+     a,b = abs(a-b),min(a,b)
+     strip factors of 2 from a
+
+   The Euclidean GCD algorithm, as per Knuth algorithms E and A,
+repeatedly computes the quotient q = floor(a/b) and replaces a,b by v,
+u - q v. The binary algorithm has so far been found to be faster than
+the Euclidean algorithm everywhere.  One reason the binary method does
+well is that the implied quotient at each step is usually small, so
+often only one or two subtractions are needed to get the same effect as
+a division.  Quotients 1, 2 and 3 for example occur 67.7% of the time,
+see Knuth section 4.5.3 Theorem E.
+
+   When the implied quotient is large, meaning b is much smaller than
+a, then a division is worthwhile.  This is the basis for the initial a
+mod b reductions in `mpn_gcd' and `mpn_gcd_1' (the latter for both Nx1
+and 1x1 cases).  But after that initial reduction, big quotients occur
+too rarely to make it worth checking for them.
+
+
+   The final 1x1 GCD in `mpn_gcd_1' is done in the generic C code as
+described above.  For two N-bit operands, the algorithm takes about
+0.68 iterations per bit.  For optimum performance some attention needs
+to be paid to the way the factors of 2 are stripped from a.
+
+   Firstly it may be noted that in twos complement the number of low
+zero bits on a-b is the same as b-a, so counting or testing can begin on
+a-b without waiting for abs(a-b) to be determined.
+
+   A loop stripping low zero bits tends not to branch predict well,
+since the condition is data dependent.  But on average there's only a
+few low zeros, so an option is to strip one or two bits arithmetically
+then loop for more (as done for AMD K6).  Or use a lookup table to get
+a count for several bits then loop for more (as done for AMD K7).  An
+alternative approach is to keep just one of a or b odd and iterate
+
+     a,b = abs(a-b), min(a,b)
+     a = a/2 if even
+     b = b/2 if even
+
+   This requires about 1.25 iterations per bit, but stripping of a
+single bit at each step avoids any branching.  Repeating the bit strip
+reduces to about 0.9 iterations per bit, which may be a worthwhile
+tradeoff.
+
+   Generally with the above approaches a speed of perhaps 6 cycles per
+bit can be achieved, which is still not terribly fast with for instance
+a 64-bit GCD taking nearly 400 cycles.  It's this sort of time which
+means it's not usually advantageous to combine a set of divisibility
+tests into a GCD.
+
+   Currently, the binary algorithm is used for GCD only when N < 3.
+
+\1f
+File: gmp.info,  Node: Lehmer's Algorithm,  Next: Subquadratic GCD,  Prev: Binary GCD,  Up: Greatest Common Divisor Algorithms
+
+16.3.2 Lehmer's algorithm
+-------------------------
+
+Lehmer's improvement of the Euclidean algorithms is based on the
+observation that the initial part of the quotient sequence depends only
+on the most significant parts of the inputs. The variant of Lehmer's
+algorithm used in GMP splits off the most significant two limbs, as
+suggested, e.g., in "A Double-Digit Lehmer-Euclid Algorithm" by
+Jebelean (*note References::). The quotients of two double-limb inputs
+are collected as a 2 by 2 matrix with single-limb elements. This is
+done by the function `mpn_hgcd2'. The resulting matrix is applied to
+the inputs using `mpn_mul_1' and `mpn_submul_1'. Each iteration usually
+reduces the inputs by almost one limb. In the rare case of a large
+quotient, no progress can be made by examining just the most
+significant two limbs, and the quotient is computed using plain
+division.
+
+   The resulting algorithm is asymptotically O(N^2), just as the
+Euclidean algorithm and the binary algorithm. The quadratic part of the
+work are the calls to `mpn_mul_1' and `mpn_submul_1'. For small sizes,
+the linear work is also significant. There are roughly N calls to the
+`mpn_hgcd2' function. This function uses a couple of important
+optimizations:
+
+   * It uses the same relaxed notion of correctness as `mpn_hgcd' (see
+     next section). This means that when called with the most
+     significant two limbs of two large numbers, the returned matrix
+     does not always correspond exactly to the initial quotient
+     sequence for the two large numbers; the final quotient may
+     sometimes be one off.
+
+   * It takes advantage of the fact the quotients are usually small.
+     The division operator is not used, since the corresponding
+     assembler instruction is very slow on most architectures. (This
+     code could probably be improved further, it uses many branches
+     that are unfriendly to prediction).
+
+   * It switches from double-limb calculations to single-limb
+     calculations half-way through, when the input numbers have been
+     reduced in size from two limbs to one and a half.
+
+
+
+\1f
+Local Variables:
+coding: iso-8859-1
+End:
diff --git a/doc/gmp.info-2 b/doc/gmp.info-2

new file mode 100644 (file)

index 0000000..0536cc9
--- /dev/null
+++ b/doc/gmp.info-2
@@ -0,0 +1,3598 @@
+This is ../../gmp/doc/gmp.info, produced by makeinfo version 4.13 from
+../../gmp/doc/gmp.texi.
+
+This manual describes how to install and use the GNU multiple precision
+arithmetic library, version 5.0.5.
+
+   Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
+
+   Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version
+1.3 or any later version published by the Free Software Foundation;
+with no Invariant Sections, with the Front-Cover Texts being "A GNU
+Manual", and with the Back-Cover Texts being "You have freedom to copy
+and modify this GNU Manual, like GNU software".  A copy of the license
+is included in *note GNU Free Documentation License::.
+
+INFO-DIR-SECTION GNU libraries
+START-INFO-DIR-ENTRY
+* gmp: (gmp).                   GNU Multiple Precision Arithmetic Library.
+END-INFO-DIR-ENTRY
+
+\1f
+File: gmp.info,  Node: Subquadratic GCD,  Next: Extended GCD,  Prev: Lehmer's Algorithm,  Up: Greatest Common Divisor Algorithms
+
+16.3.3 Subquadratic GCD
+-----------------------
+
+For inputs larger than `GCD_DC_THRESHOLD', GCD is computed via the HGCD
+(Half GCD) function, as a generalization to Lehmer's algorithm.
+
+   Let the inputs a,b be of size N limbs each. Put S = floor(N/2) + 1.
+Then HGCD(a,b) returns a transformation matrix T with non-negative
+elements, and reduced numbers (c;d) = T^-1 (a;b). The reduced numbers
+c,d must be larger than S limbs, while their difference abs(c-d) must
+fit in S limbs. The matrix elements will also be of size roughly N/2.
+
+   The HGCD base case uses Lehmer's algorithm, but with the above stop
+condition that returns reduced numbers and the corresponding
+transformation matrix half-way through. For inputs larger than
+`HGCD_THRESHOLD', HGCD is computed recursively, using the divide and
+conquer algorithm in "On Schönhage's algorithm and subquadratic integer
+GCD computation" by Möller (*note References::). The recursive
+algorithm consists of these main steps.
+
+   * Call HGCD recursively, on the most significant N/2 limbs. Apply the
+     resulting matrix T_1 to the full numbers, reducing them to a size
+     just above 3N/2.
+
+   * Perform a small number of division or subtraction steps to reduce
+     the numbers to size below 3N/2. This is essential mainly for the
+     unlikely case of large quotients.
+
+   * Call HGCD recursively, on the most significant N/2 limbs of the
+     reduced numbers. Apply the resulting matrix T_2 to the full
+     numbers, reducing them to a size just above N/2.
+
+   * Compute T = T_1 T_2.
+
+   * Perform a small number of division and subtraction steps to
+     satisfy the requirements, and return.
+
+   GCD is then implemented as a loop around HGCD, similarly to Lehmer's
+algorithm. Where Lehmer repeatedly chops off the top two limbs, calls
+`mpn_hgcd2', and applies the resulting matrix to the full numbers, the
+subquadratic GCD chops off the most significant third of the limbs (the
+proportion is a tuning parameter, and 1/3 seems to be more efficient
+than, e.g, 1/2), calls `mpn_hgcd', and applies the resulting matrix.
+Once the input numbers are reduced to size below `GCD_DC_THRESHOLD',
+Lehmer's algorithm is used for the rest of the work.
+
+   The asymptotic running time of both HGCD and GCD is O(M(N)*log(N)),
+where M(N) is the time for multiplying two N-limb numbers.
+
+\1f
+File: gmp.info,  Node: Extended GCD,  Next: Jacobi Symbol,  Prev: Subquadratic GCD,  Up: Greatest Common Divisor Algorithms
+
+16.3.4 Extended GCD
+-------------------
+
+The extended GCD function, or GCDEXT, calculates gcd(a,b) and also
+cofactors x and y satisfying a*x+b*y=gcd(a,b). All the algorithms used
+for plain GCD are extended to handle this case. The binary algorithm is
+used only for single-limb GCDEXT.  Lehmer's algorithm is used for sizes
+up to `GCDEXT_DC_THRESHOLD'. Above this threshold, GCDEXT is
+implemented as a loop around HGCD, but with more book-keeping to keep
+track of the cofactors. This gives the same asymptotic running time as
+for GCD and HGCD, O(M(N)*log(N))
+
+   One difference to plain GCD is that while the inputs a and b are
+reduced as the algorithm proceeds, the cofactors x and y grow in size.
+This makes the tuning of the chopping-point more difficult. The current
+code chops off the most significant half of the inputs for the call to
+HGCD in the first iteration, and the most significant two thirds for
+the remaining calls. This strategy could surely be improved. Also the
+stop condition for the loop, where Lehmer's algorithm is invoked once
+the inputs are reduced below `GCDEXT_DC_THRESHOLD', could maybe be
+improved by taking into account the current size of the cofactors.
+
+\1f
+File: gmp.info,  Node: Jacobi Symbol,  Prev: Extended GCD,  Up: Greatest Common Divisor Algorithms
+
+16.3.5 Jacobi Symbol
+--------------------
+
+`mpz_jacobi' and `mpz_kronecker' are currently implemented with a
+simple binary algorithm similar to that described for the GCDs (*note
+Binary GCD::).  They're not very fast when both inputs are large.
+Lehmer's multi-step improvement or a binary based multi-step algorithm
+is likely to be better.
+
+   When one operand fits a single limb, and that includes
+`mpz_kronecker_ui' and friends, an initial reduction is done with
+either `mpn_mod_1' or `mpn_modexact_1_odd', followed by the binary
+algorithm on a single limb.  The binary algorithm is well suited to a
+single limb, and the whole calculation in this case is quite efficient.
+
+   In all the routines sign changes for the result are accumulated
+using some bit twiddling, avoiding table lookups or conditional jumps.
+
+\1f
+File: gmp.info,  Node: Powering Algorithms,  Next: Root Extraction Algorithms,  Prev: Greatest Common Divisor Algorithms,  Up: Algorithms
+
+16.4 Powering Algorithms
+========================
+
+* Menu:
+
+* Normal Powering Algorithm::
+* Modular Powering Algorithm::
+
+\1f
+File: gmp.info,  Node: Normal Powering Algorithm,  Next: Modular Powering Algorithm,  Prev: Powering Algorithms,  Up: Powering Algorithms
+
+16.4.1 Normal Powering
+----------------------
+
+Normal `mpz' or `mpf' powering uses a simple binary algorithm,
+successively squaring and then multiplying by the base when a 1 bit is
+seen in the exponent, as per Knuth section 4.6.3.  The "left to right"
+variant described there is used rather than algorithm A, since it's
+just as easy and can be done with somewhat less temporary memory.
+
+\1f
+File: gmp.info,  Node: Modular Powering Algorithm,  Prev: Normal Powering Algorithm,  Up: Powering Algorithms
+
+16.4.2 Modular Powering
+-----------------------
+
+Modular powering is implemented using a 2^k-ary sliding window
+algorithm, as per "Handbook of Applied Cryptography" algorithm 14.85
+(*note References::).  k is chosen according to the size of the
+exponent.  Larger exponents use larger values of k, the choice being
+made to minimize the average number of multiplications that must
+supplement the squaring.
+
+   The modular multiplies and squares use either a simple division or
+the REDC method by Montgomery (*note References::).  REDC is a little
+faster, essentially saving N single limb divisions in a fashion similar
+to an exact remainder (*note Exact Remainder::).
+
+\1f
+File: gmp.info,  Node: Root Extraction Algorithms,  Next: Radix Conversion Algorithms,  Prev: Powering Algorithms,  Up: Algorithms
+
+16.5 Root Extraction Algorithms
+===============================
+
+* Menu:
+
+* Square Root Algorithm::
+* Nth Root Algorithm::
+* Perfect Square Algorithm::
+* Perfect Power Algorithm::
+
+\1f
+File: gmp.info,  Node: Square Root Algorithm,  Next: Nth Root Algorithm,  Prev: Root Extraction Algorithms,  Up: Root Extraction Algorithms
+
+16.5.1 Square Root
+------------------
+
+Square roots are taken using the "Karatsuba Square Root" algorithm by
+Paul Zimmermann (*note References::).
+
+   An input n is split into four parts of k bits each, so with b=2^k we
+have n = a3*b^3 + a2*b^2 + a1*b + a0.  Part a3 must be "normalized" so
+that either the high or second highest bit is set.  In GMP, k is kept
+on a limb boundary and the input is left shifted (by an even number of
+bits) to normalize.
+
+   The square root of the high two parts is taken, by recursive
+application of the algorithm (bottoming out in a one-limb Newton's
+method),
+
+     s1,r1 = sqrtrem (a3*b + a2)
+
+   This is an approximation to the desired root and is extended by a
+division to give s,r,
+
+     q,u = divrem (r1*b + a1, 2*s1)
+     s = s1*b + q
+     r = u*b + a0 - q^2
+
+   The normalization requirement on a3 means at this point s is either
+correct or 1 too big.  r is negative in the latter case, so
+
+     if r < 0 then
+       r = r + 2*s - 1
+       s = s - 1
+
+   The algorithm is expressed in a divide and conquer form, but as
+noted in the paper it can also be viewed as a discrete variant of
+Newton's method, or as a variation on the schoolboy method (no longer
+taught) for square roots two digits at a time.
+
+   If the remainder r is not required then usually only a few high limbs
+of r and u need to be calculated to determine whether an adjustment to
+s is required.  This optimization is not currently implemented.
+
+   In the Karatsuba multiplication range this algorithm is
+O(1.5*M(N/2)), where M(n) is the time to multiply two numbers of n
+limbs.  In the FFT multiplication range this grows to a bound of
+O(6*M(N/2)).  In practice a factor of about 1.5 to 1.8 is found in the
+Karatsuba and Toom-3 ranges, growing to 2 or 3 in the FFT range.
+
+   The algorithm does all its calculations in integers and the resulting
+`mpn_sqrtrem' is used for both `mpz_sqrt' and `mpf_sqrt'.  The extended
+precision given by `mpf_sqrt_ui' is obtained by padding with zero limbs.
+
+\1f
+File: gmp.info,  Node: Nth Root Algorithm,  Next: Perfect Square Algorithm,  Prev: Square Root Algorithm,  Up: Root Extraction Algorithms
+
+16.5.2 Nth Root
+---------------
+
+Integer Nth roots are taken using Newton's method with the following
+iteration, where A is the input and n is the root to be taken.
+
+              1         A
+     a[i+1] = - * ( --------- + (n-1)*a[i] )
+              n     a[i]^(n-1)
+
+   The initial approximation a[1] is generated bitwise by successively
+powering a trial root with or without new 1 bits, aiming to be just
+above the true root.  The iteration converges quadratically when
+started from a good approximation.  When n is large more initial bits
+are needed to get good convergence.  The current implementation is not
+particularly well optimized.
+
+\1f
+File: gmp.info,  Node: Perfect Square Algorithm,  Next: Perfect Power Algorithm,  Prev: Nth Root Algorithm,  Up: Root Extraction Algorithms
+
+16.5.3 Perfect Square
+---------------------
+
+A significant fraction of non-squares can be quickly identified by
+checking whether the input is a quadratic residue modulo small integers.
+
+   `mpz_perfect_square_p' first tests the input mod 256, which means
+just examining the low byte.  Only 44 different values occur for
+squares mod 256, so 82.8% of inputs can be immediately identified as
+non-squares.
+
+   On a 32-bit system similar tests are done mod 9, 5, 7, 13 and 17,
+for a total 99.25% of inputs identified as non-squares.  On a 64-bit
+system 97 is tested too, for a total 99.62%.
+
+   These moduli are chosen because they're factors of 2^24-1 (or 2^48-1
+for 64-bits), and such a remainder can be quickly taken just using
+additions (see `mpn_mod_34lsub1').
+
+   When nails are in use moduli are instead selected by the `gen-psqr.c'
+program and applied with an `mpn_mod_1'.  The same 2^24-1 or 2^48-1
+could be done with nails using some extra bit shifts, but this is not
+currently implemented.
+
+   In any case each modulus is applied to the `mpn_mod_34lsub1' or
+`mpn_mod_1' remainder and a table lookup identifies non-squares.  By
+using a "modexact" style calculation, and suitably permuted tables,
+just one multiply each is required, see the code for details.  Moduli
+are also combined to save operations, so long as the lookup tables
+don't become too big.  `gen-psqr.c' does all the pre-calculations.
+
+   A square root must still be taken for any value that passes these
+tests, to verify it's really a square and not one of the small fraction
+of non-squares that get through (i.e. a pseudo-square to all the tested
+bases).
+
+   Clearly more residue tests could be done, `mpz_perfect_square_p' only
+uses a compact and efficient set.  Big inputs would probably benefit
+from more residue testing, small inputs might be better off with less.
+The assumed distribution of squares versus non-squares in the input
+would affect such considerations.
+
+\1f
+File: gmp.info,  Node: Perfect Power Algorithm,  Prev: Perfect Square Algorithm,  Up: Root Extraction Algorithms
+
+16.5.4 Perfect Power
+--------------------
+
+Detecting perfect powers is required by some factorization algorithms.
+Currently `mpz_perfect_power_p' is implemented using repeated Nth root
+extractions, though naturally only prime roots need to be considered.
+(*Note Nth Root Algorithm::.)
+
+   If a prime divisor p with multiplicity e can be found, then only
+roots which are divisors of e need to be considered, much reducing the
+work necessary.  To this end divisibility by a set of small primes is
+checked.
+
+\1f
+File: gmp.info,  Node: Radix Conversion Algorithms,  Next: Other Algorithms,  Prev: Root Extraction Algorithms,  Up: Algorithms
+
+16.6 Radix Conversion
+=====================
+
+Radix conversions are less important than other algorithms.  A program
+dominated by conversions should probably use a different data
+representation.
+
+* Menu:
+
+* Binary to Radix::
+* Radix to Binary::
+
+\1f
+File: gmp.info,  Node: Binary to Radix,  Next: Radix to Binary,  Prev: Radix Conversion Algorithms,  Up: Radix Conversion Algorithms
+
+16.6.1 Binary to Radix
+----------------------
+
+Conversions from binary to a power-of-2 radix use a simple and fast
+O(N) bit extraction algorithm.
+
+   Conversions from binary to other radices use one of two algorithms.
+Sizes below `GET_STR_PRECOMPUTE_THRESHOLD' use a basic O(N^2) method.
+Repeated divisions by b^n are made, where b is the radix and n is the
+biggest power that fits in a limb.  But instead of simply using the
+remainder r from such divisions, an extra divide step is done to give a
+fractional limb representing r/b^n.  The digits of r can then be
+extracted using multiplications by b rather than divisions.  Special
+case code is provided for decimal, allowing multiplications by 10 to
+optimize to shifts and adds.
+
+   Above `GET_STR_PRECOMPUTE_THRESHOLD' a sub-quadratic algorithm is
+used.  For an input t, powers b^(n*2^i) of the radix are calculated,
+until a power between t and sqrt(t) is reached.  t is then divided by
+that largest power, giving a quotient which is the digits above that
+power, and a remainder which is those below.  These two parts are in
+turn divided by the second highest power, and so on recursively.  When
+a piece has been divided down to less than `GET_STR_DC_THRESHOLD'
+limbs, the basecase algorithm described above is used.
+
+   The advantage of this algorithm is that big divisions can make use
+of the sub-quadratic divide and conquer division (*note Divide and
+Conquer Division::), and big divisions tend to have less overheads than
+lots of separate single limb divisions anyway.  But in any case the
+cost of calculating the powers b^(n*2^i) must first be overcome.
+
+   `GET_STR_PRECOMPUTE_THRESHOLD' and `GET_STR_DC_THRESHOLD' represent
+the same basic thing, the point where it becomes worth doing a big
+division to cut the input in half.  `GET_STR_PRECOMPUTE_THRESHOLD'
+includes the cost of calculating the radix power required, whereas
+`GET_STR_DC_THRESHOLD' assumes that's already available, which is the
+case when recursing.
+
+   Since the base case produces digits from least to most significant
+but they want to be stored from most to least, it's necessary to
+calculate in advance how many digits there will be, or at least be sure
+not to underestimate that.  For GMP the number of input bits is
+multiplied by `chars_per_bit_exactly' from `mp_bases', rounding up.
+The result is either correct or one too big.
+
+   Examining some of the high bits of the input could increase the
+chance of getting the exact number of digits, but an exact result every
+time would not be practical, since in general the difference between
+numbers 100... and 99... is only in the last few bits and the work to
+identify 99...  might well be almost as much as a full conversion.
+
+   `mpf_get_str' doesn't currently use the algorithm described here, it
+multiplies or divides by a power of b to move the radix point to the
+just above the highest non-zero digit (or at worst one above that
+location), then multiplies by b^n to bring out digits.  This is O(N^2)
+and is certainly not optimal.
+
+   The r/b^n scheme described above for using multiplications to bring
+out digits might be useful for more than a single limb.  Some brief
+experiments with it on the base case when recursing didn't give a
+noticeable improvement, but perhaps that was only due to the
+implementation.  Something similar would work for the sub-quadratic
+divisions too, though there would be the cost of calculating a bigger
+radix power.
+
+   Another possible improvement for the sub-quadratic part would be to
+arrange for radix powers that balanced the sizes of quotient and
+remainder produced, i.e. the highest power would be an b^(n*k)
+approximately equal to sqrt(t), not restricted to a 2^i factor.  That
+ought to smooth out a graph of times against sizes, but may or may not
+be a net speedup.
+
+\1f
+File: gmp.info,  Node: Radix to Binary,  Prev: Binary to Radix,  Up: Radix Conversion Algorithms
+
+16.6.2 Radix to Binary
+----------------------
+
+*This section needs to be rewritten, it currently describes the
+algorithms used before GMP 4.3.*
+
+   Conversions from a power-of-2 radix into binary use a simple and fast
+O(N) bitwise concatenation algorithm.
+
+   Conversions from other radices use one of two algorithms.  Sizes
+below `SET_STR_PRECOMPUTE_THRESHOLD' use a basic O(N^2) method.  Groups
+of n digits are converted to limbs, where n is the biggest power of the
+base b which will fit in a limb, then those groups are accumulated into
+the result by multiplying by b^n and adding.  This saves
+multi-precision operations, as per Knuth section 4.4 part E (*note
+References::).  Some special case code is provided for decimal, giving
+the compiler a chance to optimize multiplications by 10.
+
+   Above `SET_STR_PRECOMPUTE_THRESHOLD' a sub-quadratic algorithm is
+used.  First groups of n digits are converted into limbs.  Then adjacent
+limbs are combined into limb pairs with x*b^n+y, where x and y are the
+limbs.  Adjacent limb pairs are combined into quads similarly with
+x*b^(2n)+y.  This continues until a single block remains, that being
+the result.
+
+   The advantage of this method is that the multiplications for each x
+are big blocks, allowing Karatsuba and higher algorithms to be used.
+But the cost of calculating the powers b^(n*2^i) must be overcome.
+`SET_STR_PRECOMPUTE_THRESHOLD' usually ends up quite big, around 5000
+digits, and on some processors much bigger still.
+
+   `SET_STR_PRECOMPUTE_THRESHOLD' is based on the input digits (and
+tuned for decimal), though it might be better based on a limb count, so
+as to be independent of the base.  But that sort of count isn't used by
+the base case and so would need some sort of initial calculation or
+estimate.
+
+   The main reason `SET_STR_PRECOMPUTE_THRESHOLD' is so much bigger
+than the corresponding `GET_STR_PRECOMPUTE_THRESHOLD' is that
+`mpn_mul_1' is much faster than `mpn_divrem_1' (often by a factor of 5,
+or more).
+
+\1f
+File: gmp.info,  Node: Other Algorithms,  Next: Assembly Coding,  Prev: Radix Conversion Algorithms,  Up: Algorithms
+
+16.7 Other Algorithms
+=====================
+
+* Menu:
+
+* Prime Testing Algorithm::
+* Factorial Algorithm::
+* Binomial Coefficients Algorithm::
+* Fibonacci Numbers Algorithm::
+* Lucas Numbers Algorithm::
+* Random Number Algorithms::
+
+\1f
+File: gmp.info,  Node: Prime Testing Algorithm,  Next: Factorial Algorithm,  Prev: Other Algorithms,  Up: Other Algorithms
+
+16.7.1 Prime Testing
+--------------------
+
+The primality testing in `mpz_probab_prime_p' (*note Number Theoretic
+Functions::) first does some trial division by small factors and then
+uses the Miller-Rabin probabilistic primality testing algorithm, as
+described in Knuth section 4.5.4 algorithm P (*note References::).
+
+   For an odd input n, and with n = q*2^k+1 where q is odd, this
+algorithm selects a random base x and tests whether x^q mod n is 1 or
+-1, or an x^(q*2^j) mod n is 1, for 1<=j<=k.  If so then n is probably
+prime, if not then n is definitely composite.
+
+   Any prime n will pass the test, but some composites do too.  Such
+composites are known as strong pseudoprimes to base x.  No n is a
+strong pseudoprime to more than 1/4 of all bases (see Knuth exercise
+22), hence with x chosen at random there's no more than a 1/4 chance a
+"probable prime" will in fact be composite.
+
+   In fact strong pseudoprimes are quite rare, making the test much more
+powerful than this analysis would suggest, but 1/4 is all that's proven
+for an arbitrary n.
+
+\1f
+File: gmp.info,  Node: Factorial Algorithm,  Next: Binomial Coefficients Algorithm,  Prev: Prime Testing Algorithm,  Up: Other Algorithms
+
+16.7.2 Factorial
+----------------
+
+Factorials are calculated by a combination of removal of twos,
+powering, and binary splitting.  The procedure can be best illustrated
+with an example,
+
+     23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23
+
+has factors of two removed,
+
+     23! = 2^19.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23
+
+and the resulting terms collected up according to their multiplicity,
+
+     23! = 2^19.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)
+
+   Each sequence such as 13.15.17.19.21.23 is evaluated by splitting
+into every second term, as for instance (13.17.21).(15.19.23), and the
+same recursively on each half.  This is implemented iteratively using
+some bit twiddling.
+
+   Such splitting is more efficient than repeated Nx1 multiplies since
+it forms big multiplies, allowing Karatsuba and higher algorithms to be
+used.  And even below the Karatsuba threshold a big block of work can
+be more efficient for the basecase algorithm.
+
+   Splitting into subsequences of every second term keeps the resulting
+products more nearly equal in size than would the simpler approach of
+say taking the first half and second half of the sequence.  Nearly
+equal products are more efficient for the current multiply
+implementation.
+
+\1f
+File: gmp.info,  Node: Binomial Coefficients Algorithm,  Next: Fibonacci Numbers Algorithm,  Prev: Factorial Algorithm,  Up: Other Algorithms
+
+16.7.3 Binomial Coefficients
+----------------------------
+
+Binomial coefficients C(n,k) are calculated by first arranging k <= n/2
+using C(n,k) = C(n,n-k) if necessary, and then evaluating the following
+product simply from i=2 to i=k.
+
+                           k  (n-k+i)
+     C(n,k) =  (n-k+1) * prod -------
+                          i=2    i
+
+   It's easy to show that each denominator i will divide the product so
+far, so the exact division algorithm is used (*note Exact Division::).
+
+   The numerators n-k+i and denominators i are first accumulated into
+as many fit a limb, to save multi-precision operations, though for
+`mpz_bin_ui' this applies only to the divisors, since n is an `mpz_t'
+and n-k+i in general won't fit in a limb at all.
+
+\1f
+File: gmp.info,  Node: Fibonacci Numbers Algorithm,  Next: Lucas Numbers Algorithm,  Prev: Binomial Coefficients Algorithm,  Up: Other Algorithms
+
+16.7.4 Fibonacci Numbers
+------------------------
+
+The Fibonacci functions `mpz_fib_ui' and `mpz_fib2_ui' are designed for
+calculating isolated F[n] or F[n],F[n-1] values efficiently.
+
+   For small n, a table of single limb values in `__gmp_fib_table' is
+used.  On a 32-bit limb this goes up to F[47], or on a 64-bit limb up
+to F[93].  For convenience the table starts at F[-1].
+
+   Beyond the table, values are generated with a binary powering
+algorithm, calculating a pair F[n] and F[n-1] working from high to low
+across the bits of n.  The formulas used are
+
+     F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k
+     F[2k-1] =   F[k]^2 + F[k-1]^2
+
+     F[2k] = F[2k+1] - F[2k-1]
+
+   At each step, k is the high b bits of n.  If the next bit of n is 0
+then F[2k],F[2k-1] is used, or if it's a 1 then F[2k+1],F[2k] is used,
+and the process repeated until all bits of n are incorporated.  Notice
+these formulas require just two squares per bit of n.
+
+   It'd be possible to handle the first few n above the single limb
+table with simple additions, using the defining Fibonacci recurrence
+F[k+1]=F[k]+F[k-1], but this is not done since it usually turns out to
+be faster for only about 10 or 20 values of n, and including a block of
+code for just those doesn't seem worthwhile.  If they really mattered
+it'd be better to extend the data table.
+
+   Using a table avoids lots of calculations on small numbers, and
+makes small n go fast.  A bigger table would make more small n go fast,
+it's just a question of balancing size against desired speed.  For GMP
+the code is kept compact, with the emphasis primarily on a good
+powering algorithm.
+
+   `mpz_fib2_ui' returns both F[n] and F[n-1], but `mpz_fib_ui' is only
+interested in F[n].  In this case the last step of the algorithm can
+become one multiply instead of two squares.  One of the following two
+formulas is used, according as n is odd or even.
+
+     F[2k]   = F[k]*(F[k]+2F[k-1])
+
+     F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k
+
+   F[2k+1] here is the same as above, just rearranged to be a multiply.
+For interest, the 2*(-1)^k term both here and above can be applied just
+to the low limb of the calculation, without a carry or borrow into
+further limbs, which saves some code size.  See comments with
+`mpz_fib_ui' and the internal `mpn_fib2_ui' for how this is done.
+
+\1f
+File: gmp.info,  Node: Lucas Numbers Algorithm,  Next: Random Number Algorithms,  Prev: Fibonacci Numbers Algorithm,  Up: Other Algorithms
+
+16.7.5 Lucas Numbers
+--------------------
+
+`mpz_lucnum2_ui' derives a pair of Lucas numbers from a pair of
+Fibonacci numbers with the following simple formulas.
+
+     L[k]   =   F[k] + 2*F[k-1]
+     L[k-1] = 2*F[k] -   F[k-1]
+
+   `mpz_lucnum_ui' is only interested in L[n], and some work can be
+saved.  Trailing zero bits on n can be handled with a single square
+each.
+
+     L[2k] = L[k]^2 - 2*(-1)^k
+
+   And the lowest 1 bit can be handled with one multiply of a pair of
+Fibonacci numbers, similar to what `mpz_fib_ui' does.
+
+     L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k
+
+\1f
+File: gmp.info,  Node: Random Number Algorithms,  Prev: Lucas Numbers Algorithm,  Up: Other Algorithms
+
+16.7.6 Random Numbers
+---------------------
+
+For the `urandomb' functions, random numbers are generated simply by
+concatenating bits produced by the generator.  As long as the generator
+has good randomness properties this will produce well-distributed N bit
+numbers.
+
+   For the `urandomm' functions, random numbers in a range 0<=R<N are
+generated by taking values R of ceil(log2(N)) bits each until one
+satisfies R<N.  This will normally require only one or two attempts,
+but the attempts are limited in case the generator is somehow
+degenerate and produces only 1 bits or similar.
+
+   The Mersenne Twister generator is by Matsumoto and Nishimura (*note
+References::).  It has a non-repeating period of 2^19937-1, which is a
+Mersenne prime, hence the name of the generator.  The state is 624
+words of 32-bits each, which is iterated with one XOR and shift for each
+32-bit word generated, making the algorithm very fast.  Randomness
+properties are also very good and this is the default algorithm used by
+GMP.
+
+   Linear congruential generators are described in many text books, for
+instance Knuth volume 2 (*note References::).  With a modulus M and
+parameters A and C, a integer state S is iterated by the formula S <-
+A*S+C mod M.  At each step the new state is a linear function of the
+previous, mod M, hence the name of the generator.
+
+   In GMP only moduli of the form 2^N are supported, and the current
+implementation is not as well optimized as it could be.  Overheads are
+significant when N is small, and when N is large clearly the multiply
+at each step will become slow.  This is not a big concern, since the
+Mersenne Twister generator is better in every respect and is therefore
+recommended for all normal applications.
+
+   For both generators the current state can be deduced by observing
+enough output and applying some linear algebra (over GF(2) in the case
+of the Mersenne Twister).  This generally means raw output is
+unsuitable for cryptographic applications without further hashing or
+the like.
+
+\1f
+File: gmp.info,  Node: Assembly Coding,  Prev: Other Algorithms,  Up: Algorithms
+
+16.8 Assembly Coding
+====================
+
+The assembly subroutines in GMP are the most significant source of
+speed at small to moderate sizes.  At larger sizes algorithm selection
+becomes more important, but of course speedups in low level routines
+will still speed up everything proportionally.
+
+   Carry handling and widening multiplies that are important for GMP
+can't be easily expressed in C.  GCC `asm' blocks help a lot and are
+provided in `longlong.h', but hand coding low level routines invariably
+offers a speedup over generic C by a factor of anything from 2 to 10.
+
+* Menu:
+
+* Assembly Code Organisation::
+* Assembly Basics::
+* Assembly Carry Propagation::
+* Assembly Cache Handling::
+* Assembly Functional Units::
+* Assembly Floating Point::
+* Assembly SIMD Instructions::
+* Assembly Software Pipelining::
+* Assembly Loop Unrolling::
+* Assembly Writing Guide::
+
+\1f
+File: gmp.info,  Node: Assembly Code Organisation,  Next: Assembly Basics,  Prev: Assembly Coding,  Up: Assembly Coding
+
+16.8.1 Code Organisation
+------------------------
+
+The various `mpn' subdirectories contain machine-dependent code, written
+in C or assembly.  The `mpn/generic' subdirectory contains default code,
+used when there's no machine-specific version of a particular file.
+
+   Each `mpn' subdirectory is for an ISA family.  Generally 32-bit and
+64-bit variants in a family cannot share code and have separate
+directories.  Within a family further subdirectories may exist for CPU
+variants.
+
+   In each directory a `nails' subdirectory may exist, holding code with
+nails support for that CPU variant.  A `NAILS_SUPPORT' directive in each
+file indicates the nails values the code handles.  Nails code only
+exists where it's faster, or promises to be faster, than plain code.
+There's no effort put into nails if they're not going to enhance a
+given CPU.
+
+\1f
+File: gmp.info,  Node: Assembly Basics,  Next: Assembly Carry Propagation,  Prev: Assembly Code Organisation,  Up: Assembly Coding
+
+16.8.2 Assembly Basics
+----------------------
+
+`mpn_addmul_1' and `mpn_submul_1' are the most important routines for
+overall GMP performance.  All multiplications and divisions come down to
+repeated calls to these.  `mpn_add_n', `mpn_sub_n', `mpn_lshift' and
+`mpn_rshift' are next most important.
+
+   On some CPUs assembly versions of the internal functions
+`mpn_mul_basecase' and `mpn_sqr_basecase' give significant speedups,
+mainly through avoiding function call overheads.  They can also
+potentially make better use of a wide superscalar processor, as can
+bigger primitives like `mpn_addmul_2' or `mpn_addmul_4'.
+
+   The restrictions on overlaps between sources and destinations (*note
+Low-level Functions::) are designed to facilitate a variety of
+implementations.  For example, knowing `mpn_add_n' won't have partly
+overlapping sources and destination means reading can be done far ahead
+of writing on superscalar processors, and loops can be vectorized on a
+vector processor, depending on the carry handling.
+
+\1f
+File: gmp.info,  Node: Assembly Carry Propagation,  Next: Assembly Cache Handling,  Prev: Assembly Basics,  Up: Assembly Coding
+
+16.8.3 Carry Propagation
+------------------------
+
+The problem that presents most challenges in GMP is propagating carries
+from one limb to the next.  In functions like `mpn_addmul_1' and
+`mpn_add_n', carries are the only dependencies between limb operations.
+
+   On processors with carry flags, a straightforward CISC style `adc' is
+generally best.  AMD K6 `mpn_addmul_1' however is an example of an
+unusual set of circumstances where a branch works out better.
+
+   On RISC processors generally an add and compare for overflow is
+used.  This sort of thing can be seen in `mpn/generic/aors_n.c'.  Some
+carry propagation schemes require 4 instructions, meaning at least 4
+cycles per limb, but other schemes may use just 1 or 2.  On wide
+superscalar processors performance may be completely determined by the
+number of dependent instructions between carry-in and carry-out for
+each limb.
+
+   On vector processors good use can be made of the fact that a carry
+bit only very rarely propagates more than one limb.  When adding a
+single bit to a limb, there's only a carry out if that limb was
+`0xFF...FF' which on random data will be only 1 in 2^mp_bits_per_limb.
+`mpn/cray/add_n.c' is an example of this, it adds all limbs in
+parallel, adds one set of carry bits in parallel and then only rarely
+needs to fall through to a loop propagating further carries.
+
+   On the x86s, GCC (as of version 2.95.2) doesn't generate
+particularly good code for the RISC style idioms that are necessary to
+handle carry bits in C.  Often conditional jumps are generated where
+`adc' or `sbb' forms would be better.  And so unfortunately almost any
+loop involving carry bits needs to be coded in assembly for best
+results.
+
+\1f
+File: gmp.info,  Node: Assembly Cache Handling,  Next: Assembly Functional Units,  Prev: Assembly Carry Propagation,  Up: Assembly Coding
+
+16.8.4 Cache Handling
+---------------------
+
+GMP aims to perform well both on operands that fit entirely in L1 cache
+and those which don't.
+
+   Basic routines like `mpn_add_n' or `mpn_lshift' are often used on
+large operands, so L2 and main memory performance is important for them.
+`mpn_mul_1' and `mpn_addmul_1' are mostly used for multiply and square
+basecases, so L1 performance matters most for them, unless assembly
+versions of `mpn_mul_basecase' and `mpn_sqr_basecase' exist, in which
+case the remaining uses are mostly for larger operands.
+
+   For L2 or main memory operands, memory access times will almost
+certainly be more than the calculation time.  The aim therefore is to
+maximize memory throughput, by starting a load of the next cache line
+while processing the contents of the previous one.  Clearly this is
+only possible if the chip has a lock-up free cache or some sort of
+prefetch instruction.  Most current chips have both these features.
+
+   Prefetching sources combines well with loop unrolling, since a
+prefetch can be initiated once per unrolled loop (or more than once if
+the loop covers more than one cache line).
+
+   On CPUs without write-allocate caches, prefetching destinations will
+ensure individual stores don't go further down the cache hierarchy,
+limiting bandwidth.  Of course for calculations which are slow anyway,
+like `mpn_divrem_1', write-throughs might be fine.
+
+   The distance ahead to prefetch will be determined by memory latency
+versus throughput.  The aim of course is to have data arriving
+continuously, at peak throughput.  Some CPUs have limits on the number
+of fetches or prefetches in progress.
+
+   If a special prefetch instruction doesn't exist then a plain load
+can be used, but in that case care must be taken not to attempt to read
+past the end of an operand, since that might produce a segmentation
+violation.
+
+   Some CPUs or systems have hardware that detects sequential memory
+accesses and initiates suitable cache movements automatically, making
+life easy.
+
+\1f
+File: gmp.info,  Node: Assembly Functional Units,  Next: Assembly Floating Point,  Prev: Assembly Cache Handling,  Up: Assembly Coding
+
+16.8.5 Functional Units
+-----------------------
+
+When choosing an approach for an assembly loop, consideration is given
+to what operations can execute simultaneously and what throughput can
+thereby be achieved.  In some cases an algorithm can be tweaked to
+accommodate available resources.
+
+   Loop control will generally require a counter and pointer updates,
+costing as much as 5 instructions, plus any delays a branch introduces.
+CPU addressing modes might reduce pointer updates, perhaps by allowing
+just one updating pointer and others expressed as offsets from it, or
+on CISC chips with all addressing done with the loop counter as a
+scaled index.
+
+   The final loop control cost can be amortised by processing several
+limbs in each iteration (*note Assembly Loop Unrolling::).  This at
+least ensures loop control isn't a big fraction the work done.
+
+   Memory throughput is always a limit.  If perhaps only one load or
+one store can be done per cycle then 3 cycles/limb will the top speed
+for "binary" operations like `mpn_add_n', and any code achieving that
+is optimal.
+
+   Integer resources can be freed up by having the loop counter in a
+float register, or by pressing the float units into use for some
+multiplying, perhaps doing every second limb on the float side (*note
+Assembly Floating Point::).
+
+   Float resources can be freed up by doing carry propagation on the
+integer side, or even by doing integer to float conversions in integers
+using bit twiddling.
+
+\1f
+File: gmp.info,  Node: Assembly Floating Point,  Next: Assembly SIMD Instructions,  Prev: Assembly Functional Units,  Up: Assembly Coding
+
+16.8.6 Floating Point
+---------------------
+
+Floating point arithmetic is used in GMP for multiplications on CPUs
+with poor integer multipliers.  It's mostly useful for `mpn_mul_1',
+`mpn_addmul_1' and `mpn_submul_1' on 64-bit machines, and
+`mpn_mul_basecase' on both 32-bit and 64-bit machines.
+
+   With IEEE 53-bit double precision floats, integer multiplications
+producing up to 53 bits will give exact results.  Breaking a 64x64
+multiplication into eight 16x32->48 bit pieces is convenient.  With
+some care though six 21x32->53 bit products can be used, if one of the
+lower two 21-bit pieces also uses the sign bit.
+
+   For the `mpn_mul_1' family of functions on a 64-bit machine, the
+invariant single limb is split at the start, into 3 or 4 pieces.
+Inside the loop, the bignum operand is split into 32-bit pieces.  Fast
+conversion of these unsigned 32-bit pieces to floating point is highly
+machine-dependent.  In some cases, reading the data into the integer
+unit, zero-extending to 64-bits, then transferring to the floating
+point unit back via memory is the only option.
+
+   Converting partial products back to 64-bit limbs is usually best
+done as a signed conversion.  Since all values are smaller than 2^53,
+signed and unsigned are the same, but most processors lack unsigned
+conversions.
+
+
+
+   Here is a diagram showing 16x32 bit products for an `mpn_mul_1' or
+`mpn_addmul_1' with a 64-bit limb.  The single limb operand V is split
+into four 16-bit parts.  The multi-limb operand U is split in the loop
+into two 32-bit parts.
+
+                     +---+---+---+---+
+                     |v48|v32|v16|v00|    V operand
+                     +---+---+---+---+
+
+                     +-------+---+---+
+                 x   |  u32  |  u00  |    U operand (one limb)
+                     +---------------+
+
+     ---------------------------------
+
+                         +-----------+
+                         | u00 x v00 |    p00    48-bit products
+                         +-----------+
+                     +-----------+
+                     | u00 x v16 |        p16
+                     +-----------+
+                 +-----------+
+                 | u00 x v32 |            p32
+                 +-----------+
+             +-----------+
+             | u00 x v48 |                p48
+             +-----------+
+                 +-----------+
+                 | u32 x v00 |            r32
+                 +-----------+
+             +-----------+
+             | u32 x v16 |                r48
+             +-----------+
+         +-----------+
+         | u32 x v32 |                    r64
+         +-----------+
+     +-----------+
+     | u32 x v48 |                        r80
+     +-----------+
+
+   p32 and r32 can be summed using floating-point addition, and
+likewise p48 and r48.  p00 and p16 can be summed with r64 and r80 from
+the previous iteration.
+
+   For each loop then, four 49-bit quantities are transferred to the
+integer unit, aligned as follows,
+
+     |-----64bits----|-----64bits----|
+                        +------------+
+                        | p00 + r64' |    i00
+                        +------------+
+                    +------------+
+                    | p16 + r80' |        i16
+                    +------------+
+                +------------+
+                | p32 + r32  |            i32
+                +------------+
+            +------------+
+            | p48 + r48  |                i48
+            +------------+
+
+   The challenge then is to sum these efficiently and add in a carry
+limb, generating a low 64-bit result limb and a high 33-bit carry limb
+(i48 extends 33 bits into the high half).
+
+\1f
+File: gmp.info,  Node: Assembly SIMD Instructions,  Next: Assembly Software Pipelining,  Prev: Assembly Floating Point,  Up: Assembly Coding
+
+16.8.7 SIMD Instructions
+------------------------
+
+The single-instruction multiple-data support in current microprocessors
+is aimed at signal processing algorithms where each data point can be
+treated more or less independently.  There's generally not much support
+for propagating the sort of carries that arise in GMP.
+
+   SIMD multiplications of say four 16x16 bit multiplies only do as much
+work as one 32x32 from GMP's point of view, and need some shifts and
+adds besides.  But of course if say the SIMD form is fully pipelined
+and uses less instruction decoding then it may still be worthwhile.
+
+   On the x86 chips, MMX has so far found a use in `mpn_rshift' and
+`mpn_lshift', and is used in a special case for 16-bit multipliers in
+the P55 `mpn_mul_1'.  SSE2 is used for Pentium 4 `mpn_mul_1',
+`mpn_addmul_1', and `mpn_submul_1'.
+
+\1f
+File: gmp.info,  Node: Assembly Software Pipelining,  Next: Assembly Loop Unrolling,  Prev: Assembly SIMD Instructions,  Up: Assembly Coding
+
+16.8.8 Software Pipelining
+--------------------------
+
+Software pipelining consists of scheduling instructions around the
+branch point in a loop.  For example a loop might issue a load not for
+use in the present iteration but the next, thereby allowing extra
+cycles for the data to arrive from memory.
+
+   Naturally this is wanted only when doing things like loads or
+multiplies that take several cycles to complete, and only where a CPU
+has multiple functional units so that other work can be done in the
+meantime.
+
+   A pipeline with several stages will have a data value in progress at
+each stage and each loop iteration moves them along one stage.  This is
+like juggling.
+
+   If the latency of some instruction is greater than the loop time
+then it will be necessary to unroll, so one register has a result ready
+to use while another (or multiple others) are still in progress.
+(*note Assembly Loop Unrolling::).
+
+\1f
+File: gmp.info,  Node: Assembly Loop Unrolling,  Next: Assembly Writing Guide,  Prev: Assembly Software Pipelining,  Up: Assembly Coding
+
+16.8.9 Loop Unrolling
+---------------------
+
+Loop unrolling consists of replicating code so that several limbs are
+processed in each loop.  At a minimum this reduces loop overheads by a
+corresponding factor, but it can also allow better register usage, for
+example alternately using one register combination and then another.
+Judicious use of `m4' macros can help avoid lots of duplication in the
+source code.
+
+   Any amount of unrolling can be handled with a loop counter that's
+decremented by N each time, stopping when the remaining count is less
+than the further N the loop will process.  Or by subtracting N at the
+start, the termination condition becomes when the counter C is less
+than 0 (and the count of remaining limbs is C+N).
+
+   Alternately for a power of 2 unroll the loop count and remainder can
+be established with a shift and mask.  This is convenient if also
+making a computed jump into the middle of a large loop.
+
+   The limbs not a multiple of the unrolling can be handled in various
+ways, for example
+
+   * A simple loop at the end (or the start) to process the excess.
+     Care will be wanted that it isn't too much slower than the
+     unrolled part.
+
+   * A set of binary tests, for example after an 8-limb unrolling, test
+     for 4 more limbs to process, then a further 2 more or not, and
+     finally 1 more or not.  This will probably take more code space
+     than a simple loop.
+
+   * A `switch' statement, providing separate code for each possible
+     excess, for example an 8-limb unrolling would have separate code
+     for 0 remaining, 1 remaining, etc, up to 7 remaining.  This might
+     take a lot of code, but may be the best way to optimize all cases
+     in combination with a deep pipelined loop.
+
+   * A computed jump into the middle of the loop, thus making the first
+     iteration handle the excess.  This should make times smoothly
+     increase with size, which is attractive, but setups for the jump
+     and adjustments for pointers can be tricky and could become quite
+     difficult in combination with deep pipelining.
+
+\1f
+File: gmp.info,  Node: Assembly Writing Guide,  Prev: Assembly Loop Unrolling,  Up: Assembly Coding
+
+16.8.10 Writing Guide
+---------------------
+
+This is a guide to writing software pipelined loops for processing limb
+vectors in assembly.
+
+   First determine the algorithm and which instructions are needed.
+Code it without unrolling or scheduling, to make sure it works.  On a
+3-operand CPU try to write each new value to a new register, this will
+greatly simplify later steps.
+
+   Then note for each instruction the functional unit and/or issue port
+requirements.  If an instruction can use either of two units, like U0
+or U1 then make a category "U0/U1".  Count the total using each unit
+(or combined unit), and count all instructions.
+
+   Figure out from those counts the best possible loop time.  The goal
+will be to find a perfect schedule where instruction latencies are
+completely hidden.  The total instruction count might be the limiting
+factor, or perhaps a particular functional unit.  It might be possible
+to tweak the instructions to help the limiting factor.
+
+   Suppose the loop time is N, then make N issue buckets, with the
+final loop branch at the end of the last.  Now fill the buckets with
+dummy instructions using the functional units desired.  Run this to
+make sure the intended speed is reached.
+
+   Now replace the dummy instructions with the real instructions from
+the slow but correct loop you started with.  The first will typically
+be a load instruction.  Then the instruction using that value is placed
+in a bucket an appropriate distance down.  Run the loop again, to check
+it still runs at target speed.
+
+   Keep placing instructions, frequently measuring the loop.  After a
+few you will need to wrap around from the last bucket back to the top
+of the loop.  If you used the new-register for new-value strategy above
+then there will be no register conflicts.  If not then take care not to
+clobber something already in use.  Changing registers at this time is
+very error prone.
+
+   The loop will overlap two or more of the original loop iterations,
+and the computation of one vector element result will be started in one
+iteration of the new loop, and completed one or several iterations
+later.
+
+   The final step is to create feed-in and wind-down code for the loop.
+A good way to do this is to make a copy (or copies) of the loop at the
+start and delete those instructions which don't have valid antecedents,
+and at the end replicate and delete those whose results are unwanted
+(including any further loads).
+
+   The loop will have a minimum number of limbs loaded and processed,
+so the feed-in code must test if the request size is smaller and skip
+either to a suitable part of the wind-down or to special code for small
+sizes.
+
+\1f
+File: gmp.info,  Node: Internals,  Next: Contributors,  Prev: Algorithms,  Up: Top
+
+17 Internals
+************
+
+*This chapter is provided only for informational purposes and the
+various internals described here may change in future GMP releases.
+Applications expecting to be compatible with future releases should use
+only the documented interfaces described in previous chapters.*
+
+* Menu:
+
+* Integer Internals::
+* Rational Internals::
+* Float Internals::
+* Raw Output Internals::
+* C++ Interface Internals::
+
+\1f
+File: gmp.info,  Node: Integer Internals,  Next: Rational Internals,  Prev: Internals,  Up: Internals
+
+17.1 Integer Internals
+======================
+
+`mpz_t' variables represent integers using sign and magnitude, in space
+dynamically allocated and reallocated.  The fields are as follows.
+
+`_mp_size'
+     The number of limbs, or the negative of that when representing a
+     negative integer.  Zero is represented by `_mp_size' set to zero,
+     in which case the `_mp_d' data is unused.
+
+`_mp_d'
+     A pointer to an array of limbs which is the magnitude.  These are
+     stored "little endian" as per the `mpn' functions, so `_mp_d[0]'
+     is the least significant limb and `_mp_d[ABS(_mp_size)-1]' is the
+     most significant.  Whenever `_mp_size' is non-zero, the most
+     significant limb is non-zero.
+
+     Currently there's always at least one limb allocated, so for
+     instance `mpz_set_ui' never needs to reallocate, and `mpz_get_ui'
+     can fetch `_mp_d[0]' unconditionally (though its value is then
+     only wanted if `_mp_size' is non-zero).
+
+`_mp_alloc'
+     `_mp_alloc' is the number of limbs currently allocated at `_mp_d',
+     and naturally `_mp_alloc >= ABS(_mp_size)'.  When an `mpz' routine
+     is about to (or might be about to) increase `_mp_size', it checks
+     `_mp_alloc' to see whether there's enough space, and reallocates
+     if not.  `MPZ_REALLOC' is generally used for this.
+
+   The various bitwise logical functions like `mpz_and' behave as if
+negative values were twos complement.  But sign and magnitude is always
+used internally, and necessary adjustments are made during the
+calculations.  Sometimes this isn't pretty, but sign and magnitude are
+best for other routines.
+
+   Some internal temporary variables are setup with `MPZ_TMP_INIT' and
+these have `_mp_d' space obtained from `TMP_ALLOC' rather than the
+memory allocation functions.  Care is taken to ensure that these are
+big enough that no reallocation is necessary (since it would have
+unpredictable consequences).
+
+   `_mp_size' and `_mp_alloc' are `int', although `mp_size_t' is
+usually a `long'.  This is done to make the fields just 32 bits on some
+64 bits systems, thereby saving a few bytes of data space but still
+providing plenty of range.
+
+\1f
+File: gmp.info,  Node: Rational Internals,  Next: Float Internals,  Prev: Integer Internals,  Up: Internals
+
+17.2 Rational Internals
+=======================
+
+`mpq_t' variables represent rationals using an `mpz_t' numerator and
+denominator (*note Integer Internals::).
+
+   The canonical form adopted is denominator positive (and non-zero),
+no common factors between numerator and denominator, and zero uniquely
+represented as 0/1.
+
+   It's believed that casting out common factors at each stage of a
+calculation is best in general.  A GCD is an O(N^2) operation so it's
+better to do a few small ones immediately than to delay and have to do
+a big one later.  Knowing the numerator and denominator have no common
+factors can be used for example in `mpq_mul' to make only two cross
+GCDs necessary, not four.
+
+   This general approach to common factors is badly sub-optimal in the
+presence of simple factorizations or little prospect for cancellation,
+but GMP has no way to know when this will occur.  As per *note
+Efficiency::, that's left to applications.  The `mpq_t' framework might
+still suit, with `mpq_numref' and `mpq_denref' for direct access to the
+numerator and denominator, or of course `mpz_t' variables can be used
+directly.
+
+\1f
+File: gmp.info,  Node: Float Internals,  Next: Raw Output Internals,  Prev: Rational Internals,  Up: Internals
+
+17.3 Float Internals
+====================
+
+Efficient calculation is the primary aim of GMP floats and the use of
+whole limbs and simple rounding facilitates this.
+
+   `mpf_t' floats have a variable precision mantissa and a single
+machine word signed exponent.  The mantissa is represented using sign
+and magnitude.
+
+        most                   least
+     significant            significant
+        limb                   limb
+
+                                 _mp_d
+      |---- _mp_exp --->           |
+       _____ _____ _____ _____ _____
+      |_____|_____|_____|_____|_____|
+                        . <------------ radix point
+
+       <-------- _mp_size --------->
+
+The fields are as follows.
+
+`_mp_size'
+     The number of limbs currently in use, or the negative of that when
+     representing a negative value.  Zero is represented by `_mp_size'
+     and `_mp_exp' both set to zero, and in that case the `_mp_d' data
+     is unused.  (In the future `_mp_exp' might be undefined when
+     representing zero.)
+
+`_mp_prec'
+     The precision of the mantissa, in limbs.  In any calculation the
+     aim is to produce `_mp_prec' limbs of result (the most significant
+     being non-zero).
+
+`_mp_d'
+     A pointer to the array of limbs which is the absolute value of the
+     mantissa.  These are stored "little endian" as per the `mpn'
+     functions, so `_mp_d[0]' is the least significant limb and
+     `_mp_d[ABS(_mp_size)-1]' the most significant.
+
+     The most significant limb is always non-zero, but there are no
+     other restrictions on its value, in particular the highest 1 bit
+     can be anywhere within the limb.
+
+     `_mp_prec+1' limbs are allocated to `_mp_d', the extra limb being
+     for convenience (see below).  There are no reallocations during a
+     calculation, only in a change of precision with `mpf_set_prec'.
+
+`_mp_exp'
+     The exponent, in limbs, determining the location of the implied
+     radix point.  Zero means the radix point is just above the most
+     significant limb.  Positive values mean a radix point offset
+     towards the lower limbs and hence a value >= 1, as for example in
+     the diagram above.  Negative exponents mean a radix point further
+     above the highest limb.
+
+     Naturally the exponent can be any value, it doesn't have to fall
+     within the limbs as the diagram shows, it can be a long way above
+     or a long way below.  Limbs other than those included in the
+     `{_mp_d,_mp_size}' data are treated as zero.
+
+   The `_mp_size' and `_mp_prec' fields are `int', although the
+`mp_size_t' type is usually a `long'.  The `_mp_exp' field is usually
+`long'.  This is done to make some fields just 32 bits on some 64 bits
+systems, thereby saving a few bytes of data space but still providing
+plenty of precision and a very large range.
+
+
+The following various points should be noted.
+
+Low Zeros
+     The least significant limbs `_mp_d[0]' etc can be zero, though
+     such low zeros can always be ignored.  Routines likely to produce
+     low zeros check and avoid them to save time in subsequent
+     calculations, but for most routines they're quite unlikely and
+     aren't checked.
+
+Mantissa Size Range
+     The `_mp_size' count of limbs in use can be less than `_mp_prec' if
+     the value can be represented in less.  This means low precision
+     values or small integers stored in a high precision `mpf_t' can
+     still be operated on efficiently.
+
+     `_mp_size' can also be greater than `_mp_prec'.  Firstly a value is
+     allowed to use all of the `_mp_prec+1' limbs available at `_mp_d',
+     and secondly when `mpf_set_prec_raw' lowers `_mp_prec' it leaves
+     `_mp_size' unchanged and so the size can be arbitrarily bigger than
+     `_mp_prec'.
+
+Rounding
+     All rounding is done on limb boundaries.  Calculating `_mp_prec'
+     limbs with the high non-zero will ensure the application requested
+     minimum precision is obtained.
+
+     The use of simple "trunc" rounding towards zero is efficient,
+     since there's no need to examine extra limbs and increment or
+     decrement.
+
+Bit Shifts
+     Since the exponent is in limbs, there are no bit shifts in basic
+     operations like `mpf_add' and `mpf_mul'.  When differing exponents
+     are encountered all that's needed is to adjust pointers to line up
+     the relevant limbs.
+
+     Of course `mpf_mul_2exp' and `mpf_div_2exp' will require bit
+     shifts, but the choice is between an exponent in limbs which
+     requires shifts there, or one in bits which requires them almost
+     everywhere else.
+
+Use of `_mp_prec+1' Limbs
+     The extra limb on `_mp_d' (`_mp_prec+1' rather than just
+     `_mp_prec') helps when an `mpf' routine might get a carry from its
+     operation.  `mpf_add' for instance will do an `mpn_add' of
+     `_mp_prec' limbs.  If there's no carry then that's the result, but
+     if there is a carry then it's stored in the extra limb of space and
+     `_mp_size' becomes `_mp_prec+1'.
+
+     Whenever `_mp_prec+1' limbs are held in a variable, the low limb
+     is not needed for the intended precision, only the `_mp_prec' high
+     limbs.  But zeroing it out or moving the rest down is unnecessary.
+     Subsequent routines reading the value will simply take the high
+     limbs they need, and this will be `_mp_prec' if their target has
+     that same precision.  This is no more than a pointer adjustment,
+     and must be checked anyway since the destination precision can be
+     different from the sources.
+
+     Copy functions like `mpf_set' will retain a full `_mp_prec+1' limbs
+     if available.  This ensures that a variable which has `_mp_size'
+     equal to `_mp_prec+1' will get its full exact value copied.
+     Strictly speaking this is unnecessary since only `_mp_prec' limbs
+     are needed for the application's requested precision, but it's
+     considered that an `mpf_set' from one variable into another of the
+     same precision ought to produce an exact copy.
+
+Application Precisions
+     `__GMPF_BITS_TO_PREC' converts an application requested precision
+     to an `_mp_prec'.  The value in bits is rounded up to a whole limb
+     then an extra limb is added since the most significant limb of
+     `_mp_d' is only non-zero and therefore might contain only one bit.
+
+     `__GMPF_PREC_TO_BITS' does the reverse conversion, and removes the
+     extra limb from `_mp_prec' before converting to bits.  The net
+     effect of reading back with `mpf_get_prec' is simply the precision
+     rounded up to a multiple of `mp_bits_per_limb'.
+
+     Note that the extra limb added here for the high only being
+     non-zero is in addition to the extra limb allocated to `_mp_d'.
+     For example with a 32-bit limb, an application request for 250
+     bits will be rounded up to 8 limbs, then an extra added for the
+     high being only non-zero, giving an `_mp_prec' of 9.  `_mp_d' then
+     gets 10 limbs allocated.  Reading back with `mpf_get_prec' will
+     take `_mp_prec' subtract 1 limb and multiply by 32, giving 256
+     bits.
+
+     Strictly speaking, the fact the high limb has at least one bit
+     means that a float with, say, 3 limbs of 32-bits each will be
+     holding at least 65 bits, but for the purposes of `mpf_t' it's
+     considered simply to be 64 bits, a nice multiple of the limb size.
+
+\1f
+File: gmp.info,  Node: Raw Output Internals,  Next: C++ Interface Internals,  Prev: Float Internals,  Up: Internals
+
+17.4 Raw Output Internals
+=========================
+
+`mpz_out_raw' uses the following format.
+
+     +------+------------------------+
+     | size |       data bytes       |
+     +------+------------------------+
+
+   The size is 4 bytes written most significant byte first, being the
+number of subsequent data bytes, or the twos complement negative of
+that when a negative integer is represented.  The data bytes are the
+absolute value of the integer, written most significant byte first.
+
+   The most significant data byte is always non-zero, so the output is
+the same on all systems, irrespective of limb size.
+
+   In GMP 1, leading zero bytes were written to pad the data bytes to a
+multiple of the limb size.  `mpz_inp_raw' will still accept this, for
+compatibility.
+
+   The use of "big endian" for both the size and data fields is
+deliberate, it makes the data easy to read in a hex dump of a file.
+Unfortunately it also means that the limb data must be reversed when
+reading or writing, so neither a big endian nor little endian system
+can just read and write `_mp_d'.
+
+\1f
+File: gmp.info,  Node: C++ Interface Internals,  Prev: Raw Output Internals,  Up: Internals
+
+17.5 C++ Interface Internals
+============================
+
+A system of expression templates is used to ensure something like
+`a=b+c' turns into a simple call to `mpz_add' etc.  For `mpf_class' the
+scheme also ensures the precision of the final destination is used for
+any temporaries within a statement like `f=w*x+y*z'.  These are
+important features which a naive implementation cannot provide.
+
+   A simplified description of the scheme follows.  The true scheme is
+complicated by the fact that expressions have different return types.
+For detailed information, refer to the source code.
+
+   To perform an operation, say, addition, we first define a "function
+object" evaluating it,
+
+     struct __gmp_binary_plus
+     {
+       static void eval(mpf_t f, mpf_t g, mpf_t h) { mpf_add(f, g, h); }
+     };
+
+And an "additive expression" object,
+
+     __gmp_expr<__gmp_binary_expr<mpf_class, mpf_class, __gmp_binary_plus> >
+     operator+(const mpf_class &f, const mpf_class &g)
+     {
+       return __gmp_expr
+         <__gmp_binary_expr<mpf_class, mpf_class, __gmp_binary_plus> >(f, g);
+     }
+
+   The seemingly redundant `__gmp_expr<__gmp_binary_expr<...>>' is used
+to encapsulate any possible kind of expression into a single template
+type.  In fact even `mpf_class' etc are `typedef' specializations of
+`__gmp_expr'.
+
+   Next we define assignment of `__gmp_expr' to `mpf_class'.
+
+     template <class T>
+     mpf_class & mpf_class::operator=(const __gmp_expr<T> &expr)
+     {
+       expr.eval(this->get_mpf_t(), this->precision());
+       return *this;
+     }
+
+     template <class Op>
+     void __gmp_expr<__gmp_binary_expr<mpf_class, mpf_class, Op> >::eval
+     (mpf_t f, mp_bitcnt_t precision)
+     {
+       Op::eval(f, expr.val1.get_mpf_t(), expr.val2.get_mpf_t());
+     }
+
+   where `expr.val1' and `expr.val2' are references to the expression's
+operands (here `expr' is the `__gmp_binary_expr' stored within the
+`__gmp_expr').
+
+   This way, the expression is actually evaluated only at the time of
+assignment, when the required precision (that of `f') is known.
+Furthermore the target `mpf_t' is now available, thus we can call
+`mpf_add' directly with `f' as the output argument.
+
+   Compound expressions are handled by defining operators taking
+subexpressions as their arguments, like this:
+
+     template <class T, class U>
+     __gmp_expr
+     <__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, __gmp_binary_plus> >
+     operator+(const __gmp_expr<T> &expr1, const __gmp_expr<U> &expr2)
+     {
+       return __gmp_expr
+         <__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, __gmp_binary_plus> >
+         (expr1, expr2);
+     }
+
+   And the corresponding specializations of `__gmp_expr::eval':
+
+     template <class T, class U, class Op>
+     void __gmp_expr
+     <__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, Op> >::eval
+     (mpf_t f, mp_bitcnt_t precision)
+     {
+       // declare two temporaries
+       mpf_class temp1(expr.val1, precision), temp2(expr.val2, precision);
+       Op::eval(f, temp1.get_mpf_t(), temp2.get_mpf_t());
+     }
+
+   The expression is thus recursively evaluated to any level of
+complexity and all subexpressions are evaluated to the precision of `f'.
+
+\1f
+File: gmp.info,  Node: Contributors,  Next: References,  Prev: Internals,  Up: Top
+
+Appendix A Contributors
+***********************
+
+Torbjörn Granlund wrote the original GMP library and is still the main
+developer.  Code not explicitly attributed to others, was contributed by
+Torbjörn.  Several other individuals and organizations have contributed
+GMP.  Here is a list in chronological order on first contribution:
+
+   Gunnar Sjödin and Hans Riesel helped with mathematical problems in
+early versions of the library.
+
+   Richard Stallman helped with the interface design and revised the
+first version of this manual.
+
+   Brian Beuning and Doug Lea helped with testing of early versions of
+the library and made creative suggestions.
+
+   John Amanatides of York University in Canada contributed the function
+`mpz_probab_prime_p'.
+
+   Paul Zimmermann wrote the REDC-based mpz_powm code, the
+Schönhage-Strassen FFT multiply code, and the Karatsuba square root
+code.  He also improved the Toom3 code for GMP 4.2.  Paul sparked the
+development of GMP 2, with his comparisons between bignum packages.
+The ECMNET project Paul is organizing was a driving force behind many
+of the optimizations in GMP 3.  Paul also wrote the new GMP 4.3 nth
+root code (with Torbjörn).
+
+   Ken Weber (Kent State University, Universidade Federal do Rio Grande
+do Sul) contributed now defunct versions of `mpz_gcd', `mpz_divexact',
+`mpn_gcd', and `mpn_bdivmod', partially supported by CNPq (Brazil)
+grant 301314194-2.
+
+   Per Bothner of Cygnus Support helped to set up GMP to use Cygnus'
+configure.  He has also made valuable suggestions and tested numerous
+intermediary releases.
+
+   Joachim Hollman was involved in the design of the `mpf' interface,
+and in the `mpz' design revisions for version 2.
+
+   Bennet Yee contributed the initial versions of `mpz_jacobi' and
+`mpz_legendre'.
+
+   Andreas Schwab contributed the files `mpn/m68k/lshift.S' and
+`mpn/m68k/rshift.S' (now in `.asm' form).
+
+   Robert Harley of Inria, France and David Seal of ARM, England,
+suggested clever improvements for population count.  Robert also wrote
+highly optimized Karatsuba and 3-way Toom multiplication functions for
+GMP 3, and contributed the ARM assembly code.
+
+   Torsten Ekedahl of the Mathematical department of Stockholm
+University provided significant inspiration during several phases of
+the GMP development.  His mathematical expertise helped improve several
+algorithms.
+
+   Linus Nordberg wrote the new configure system based on autoconf and
+implemented the new random functions.
+
+   Kevin Ryde worked on a large number of things: optimized x86 code,
+m4 asm macros, parameter tuning, speed measuring, the configure system,
+function inlining, divisibility tests, bit scanning, Jacobi symbols,
+Fibonacci and Lucas number functions, printf and scanf functions, perl
+interface, demo expression parser, the algorithms chapter in the
+manual, `gmpasm-mode.el', and various miscellaneous improvements
+elsewhere.
+
+   Kent Boortz made the Mac OS 9 port.
+
+   Steve Root helped write the optimized alpha 21264 assembly code.
+
+   Gerardo Ballabio wrote the `gmpxx.h' C++ class interface and the C++
+`istream' input routines.
+
+   Jason Moxham rewrote `mpz_fac_ui'.
+
+   Pedro Gimeno implemented the Mersenne Twister and made other random
+number improvements.
+
+   Niels Möller wrote the sub-quadratic GCD and extended GCD code, the
+quadratic Hensel division code, and (with Torbjörn) the new divide and
+conquer division code for GMP 4.3.  Niels also helped implement the new
+Toom multiply code for GMP 4.3 and implemented helper functions to
+simplify Toom evaluations for GMP 5.0.  He wrote the original version
+of mpn_mulmod_bnm1.
+
+   Alberto Zanoni and Marco Bodrato suggested the unbalanced multiply
+strategy, and found the optimal strategies for evaluation and
+interpolation in Toom multiplication.
+
+   Marco Bodrato helped implement the new Toom multiply code for GMP
+4.3 and implemented most of the new Toom multiply and squaring code for
+5.0.  He is the main author of the current mpn_mulmod_bnm1 and
+mpn_mullo_n.  Marco also wrote the functions mpn_invert and
+mpn_invertappr.
+
+   David Harvey suggested the internal function `mpn_bdiv_dbm1',
+implementing division relevant to Toom multiplication.  He also worked
+on fast assembly sequences, in particular on a fast AMD64
+`mpn_mul_basecase'.
+
+   Martin Boij wrote `mpn_perfect_power_p'.
+
+   (This list is chronological, not ordered after significance.  If you
+have contributed to GMP but are not listed above, please tell
+<gmp-devel@gmplib.org> about the omission!)
+
+   The development of floating point functions of GNU MP 2, were
+supported in part by the ESPRIT-BRA (Basic Research Activities) 6846
+project POSSO (POlynomial System SOlving).
+
+   The development of GMP 2, 3, and 4 was supported in part by the IDA
+Center for Computing Sciences.
+
+   Thanks go to Hans Thorsen for donating an SGI system for the GMP
+test system environment.
+
+\1f
+File: gmp.info,  Node: References,  Next: GNU Free Documentation License,  Prev: Contributors,  Up: Top
+
+Appendix B References
+*********************
+
+B.1 Books
+=========
+
+   * Jonathan M. Borwein and Peter B. Borwein, "Pi and the AGM: A Study
+     in Analytic Number Theory and Computational Complexity", Wiley,
+     1998.
+
+   * Richard Crandall and Carl Pomerance, "Prime Numbers: A
+     Computational Perspective", 2nd edition, Springer-Verlag, 2005.
+     `http://www.math.dartmouth.edu/~carlp/'
+
+   * Henri Cohen, "A Course in Computational Algebraic Number Theory",
+     Graduate Texts in Mathematics number 138, Springer-Verlag, 1993.
+     `http://www.math.u-bordeaux.fr/~cohen/'
+
+   * Donald E. Knuth, "The Art of Computer Programming", volume 2,
+     "Seminumerical Algorithms", 3rd edition, Addison-Wesley, 1998.
+     `http://www-cs-faculty.stanford.edu/~knuth/taocp.html'
+
+   * John D. Lipson, "Elements of Algebra and Algebraic Computing", The
+     Benjamin Cummings Publishing Company Inc, 1981.
+
+   * Alfred J. Menezes, Paul C. van Oorschot and Scott A. Vanstone,
+     "Handbook of Applied Cryptography",
+     `http://www.cacr.math.uwaterloo.ca/hac/'
+
+   * Richard M. Stallman and the GCC Developer Community, "Using the
+     GNU Compiler Collection", Free Software Foundation, 2008,
+     available online `http://gcc.gnu.org/onlinedocs/', and in the GCC
+     package `ftp://ftp.gnu.org/gnu/gcc/'
+
+B.2 Papers
+==========
+
+   * Yves Bertot, Nicolas Magaud and Paul Zimmermann, "A Proof of GMP
+     Square Root", Journal of Automated Reasoning, volume 29, 2002, pp.
+     225-252.  Also available online as INRIA Research Report 4475,
+     June 2002, `http://hal.inria.fr/docs/00/07/21/13/PDF/RR-4475.pdf'
+
+   * Christoph Burnikel and Joachim Ziegler, "Fast Recursive Division",
+     Max-Planck-Institut fuer Informatik Research Report MPI-I-98-1-022,
+     `http://data.mpi-sb.mpg.de/internet/reports.nsf/NumberView/1998-1-022'
+
+   * Torbjörn Granlund and Peter L. Montgomery, "Division by Invariant
+     Integers using Multiplication", in Proceedings of the SIGPLAN
+     PLDI'94 Conference, June 1994.  Also available
+     `http://gmplib.org/~tege/divcnst-pldi94.pdf'.
+
+   * Niels Möller and Torbjörn Granlund, "Improved division by invariant
+     integers", IEEE Transactions on Computers, 11 June 2010.
+     `http://gmplib.org/~tege/division-paper.pdf'
+
+   * Torbjörn Granlund and Niels Möller, "Division of integers large and
+     small", to appear.
+
+   * Tudor Jebelean, "An algorithm for exact division", Journal of
+     Symbolic Computation, volume 15, 1993, pp. 169-180.  Research
+     report version available
+     `ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-35.ps.gz'
+
+   * Tudor Jebelean, "Exact Division with Karatsuba Complexity -
+     Extended Abstract", RISC-Linz technical report 96-31,
+     `ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1996/96-31.ps.gz'
+
+   * Tudor Jebelean, "Practical Integer Division with Karatsuba
+     Complexity", ISSAC 97, pp. 339-341.  Technical report available
+     `ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1996/96-29.ps.gz'
+
+   * Tudor Jebelean, "A Generalization of the Binary GCD Algorithm",
+     ISSAC 93, pp. 111-116.  Technical report version available
+     `ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1993/93-01.ps.gz'
+
+   * Tudor Jebelean, "A Double-Digit Lehmer-Euclid Algorithm for
+     Finding the GCD of Long Integers", Journal of Symbolic
+     Computation, volume 19, 1995, pp. 145-157.  Technical report
+     version also available
+     `ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz'
+
+   * Werner Krandick and Tudor Jebelean, "Bidirectional Exact Integer
+     Division", Journal of Symbolic Computation, volume 21, 1996, pp.
+     441-455.  Early technical report version also available
+     `ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-50.ps.gz'
+
+   * Makoto Matsumoto and Takuji Nishimura, "Mersenne Twister: A
+     623-dimensionally equidistributed uniform pseudorandom number
+     generator", ACM Transactions on Modelling and Computer Simulation,
+     volume 8, January 1998, pp. 3-30.  Available online
+     `http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.ps.gz'
+     (or .pdf)
+
+   * R. Moenck and A. Borodin, "Fast Modular Transforms via Division",
+     Proceedings of the 13th Annual IEEE Symposium on Switching and
+     Automata Theory, October 1972, pp. 90-96.  Reprinted as "Fast
+     Modular Transforms", Journal of Computer and System Sciences,
+     volume 8, number 3, June 1974, pp. 366-386.
+
+   * Niels Möller, "On Schönhage's algorithm and subquadratic integer
+     GCD   computation", in Mathematics of Computation, volume 77,
+     January 2008, pp.    589-607.
+
+   * Peter L. Montgomery, "Modular Multiplication Without Trial
+     Division", in Mathematics of Computation, volume 44, number 170,
+     April 1985.
+
+   * Arnold Schönhage and Volker Strassen, "Schnelle Multiplikation
+     grosser Zahlen", Computing 7, 1971, pp. 281-292.
+
+   * Kenneth Weber, "The accelerated integer GCD algorithm", ACM
+     Transactions on Mathematical Software, volume 21, number 1, March
+     1995, pp. 111-122.
+
+   * Paul Zimmermann, "Karatsuba Square Root", INRIA Research Report
+     3805, November 1999,
+     `http://hal.inria.fr/inria-00072854/PDF/RR-3805.pdf'
+
+   * Paul Zimmermann, "A Proof of GMP Fast Division and Square Root
+     Implementations",
+     `http://www.loria.fr/~zimmerma/papers/proof-div-sqrt.ps.gz'
+
+   * Dan Zuras, "On Squaring and Multiplying Large Integers", ARITH-11:
+     IEEE Symposium on Computer Arithmetic, 1993, pp. 260 to 271.
+     Reprinted as "More on Multiplying and Squaring Large Integers",
+     IEEE Transactions on Computers, volume 43, number 8, August 1994,
+     pp. 899-908.
+
+\1f
+File: gmp.info,  Node: GNU Free Documentation License,  Next: Concept Index,  Prev: References,  Up: Top
+
+Appendix C GNU Free Documentation License
+*****************************************
+
+                     Version 1.3, 3 November 2008
+
+     Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+     `http://fsf.org/'
+
+     Everyone is permitted to copy and distribute verbatim copies
+     of this license document, but changing it is not allowed.
+
+  0. PREAMBLE
+
+     The purpose of this License is to make a manual, textbook, or other
+     functional and useful document "free" in the sense of freedom: to
+     assure everyone the effective freedom to copy and redistribute it,
+     with or without modifying it, either commercially or
+     noncommercially.  Secondarily, this License preserves for the
+     author and publisher a way to get credit for their work, while not
+     being considered responsible for modifications made by others.
+
+     This License is a kind of "copyleft", which means that derivative
+     works of the document must themselves be free in the same sense.
+     It complements the GNU General Public License, which is a copyleft
+     license designed for free software.
+
+     We have designed this License in order to use it for manuals for
+     free software, because free software needs free documentation: a
+     free program should come with manuals providing the same freedoms
+     that the software does.  But this License is not limited to
+     software manuals; it can be used for any textual work, regardless
+     of subject matter or whether it is published as a printed book.
+     We recommend this License principally for works whose purpose is
+     instruction or reference.
+
+  1. APPLICABILITY AND DEFINITIONS
+
+     This License applies to any manual or other work, in any medium,
+     that contains a notice placed by the copyright holder saying it
+     can be distributed under the terms of this License.  Such a notice
+     grants a world-wide, royalty-free license, unlimited in duration,
+     to use that work under the conditions stated herein.  The
+     "Document", below, refers to any such manual or work.  Any member
+     of the public is a licensee, and is addressed as "you".  You
+     accept the license if you copy, modify or distribute the work in a
+     way requiring permission under copyright law.
+
+     A "Modified Version" of the Document means any work containing the
+     Document or a portion of it, either copied verbatim, or with
+     modifications and/or translated into another language.
+
+     A "Secondary Section" is a named appendix or a front-matter section
+     of the Document that deals exclusively with the relationship of the
+     publishers or authors of the Document to the Document's overall
+     subject (or to related matters) and contains nothing that could
+     fall directly within that overall subject.  (Thus, if the Document
+     is in part a textbook of mathematics, a Secondary Section may not
+     explain any mathematics.)  The relationship could be a matter of
+     historical connection with the subject or with related matters, or
+     of legal, commercial, philosophical, ethical or political position
+     regarding them.
+
+     The "Invariant Sections" are certain Secondary Sections whose
+     titles are designated, as being those of Invariant Sections, in
+     the notice that says that the Document is released under this
+     License.  If a section does not fit the above definition of
+     Secondary then it is not allowed to be designated as Invariant.
+     The Document may contain zero Invariant Sections.  If the Document
+     does not identify any Invariant Sections then there are none.
+
+     The "Cover Texts" are certain short passages of text that are
+     listed, as Front-Cover Texts or Back-Cover Texts, in the notice
+     that says that the Document is released under this License.  A
+     Front-Cover Text may be at most 5 words, and a Back-Cover Text may
+     be at most 25 words.
+
+     A "Transparent" copy of the Document means a machine-readable copy,
+     represented in a format whose specification is available to the
+     general public, that is suitable for revising the document
+     straightforwardly with generic text editors or (for images
+     composed of pixels) generic paint programs or (for drawings) some
+     widely available drawing editor, and that is suitable for input to
+     text formatters or for automatic translation to a variety of
+     formats suitable for input to text formatters.  A copy made in an
+     otherwise Transparent file format whose markup, or absence of
+     markup, has been arranged to thwart or discourage subsequent
+     modification by readers is not Transparent.  An image format is
+     not Transparent if used for any substantial amount of text.  A
+     copy that is not "Transparent" is called "Opaque".
+
+     Examples of suitable formats for Transparent copies include plain
+     ASCII without markup, Texinfo input format, LaTeX input format,
+     SGML or XML using a publicly available DTD, and
+     standard-conforming simple HTML, PostScript or PDF designed for
+     human modification.  Examples of transparent image formats include
+     PNG, XCF and JPG.  Opaque formats include proprietary formats that
+     can be read and edited only by proprietary word processors, SGML or
+     XML for which the DTD and/or processing tools are not generally
+     available, and the machine-generated HTML, PostScript or PDF
+     produced by some word processors for output purposes only.
+
+     The "Title Page" means, for a printed book, the title page itself,
+     plus such following pages as are needed to hold, legibly, the
+     material this License requires to appear in the title page.  For
+     works in formats which do not have any title page as such, "Title
+     Page" means the text near the most prominent appearance of the
+     work's title, preceding the beginning of the body of the text.
+
+     The "publisher" means any person or entity that distributes copies
+     of the Document to the public.
+
+     A section "Entitled XYZ" means a named subunit of the Document
+     whose title either is precisely XYZ or contains XYZ in parentheses
+     following text that translates XYZ in another language.  (Here XYZ
+     stands for a specific section name mentioned below, such as
+     "Acknowledgements", "Dedications", "Endorsements", or "History".)
+     To "Preserve the Title" of such a section when you modify the
+     Document means that it remains a section "Entitled XYZ" according
+     to this definition.
+
+     The Document may include Warranty Disclaimers next to the notice
+     which states that this License applies to the Document.  These
+     Warranty Disclaimers are considered to be included by reference in
+     this License, but only as regards disclaiming warranties: any other
+     implication that these Warranty Disclaimers may have is void and
+     has no effect on the meaning of this License.
+
+  2. VERBATIM COPYING
+
+     You may copy and distribute the Document in any medium, either
+     commercially or noncommercially, provided that this License, the
+     copyright notices, and the license notice saying this License
+     applies to the Document are reproduced in all copies, and that you
+     add no other conditions whatsoever to those of this License.  You
+     may not use technical measures to obstruct or control the reading
+     or further copying of the copies you make or distribute.  However,
+     you may accept compensation in exchange for copies.  If you
+     distribute a large enough number of copies you must also follow
+     the conditions in section 3.
+
+     You may also lend copies, under the same conditions stated above,
+     and you may publicly display copies.
+
+  3. COPYING IN QUANTITY
+
+     If you publish printed copies (or copies in media that commonly
+     have printed covers) of the Document, numbering more than 100, and
+     the Document's license notice requires Cover Texts, you must
+     enclose the copies in covers that carry, clearly and legibly, all
+     these Cover Texts: Front-Cover Texts on the front cover, and
+     Back-Cover Texts on the back cover.  Both covers must also clearly
+     and legibly identify you as the publisher of these copies.  The
+     front cover must present the full title with all words of the
+     title equally prominent and visible.  You may add other material
+     on the covers in addition.  Copying with changes limited to the
+     covers, as long as they preserve the title of the Document and
+     satisfy these conditions, can be treated as verbatim copying in
+     other respects.
+
+     If the required texts for either cover are too voluminous to fit
+     legibly, you should put the first ones listed (as many as fit
+     reasonably) on the actual cover, and continue the rest onto
+     adjacent pages.
+
+     If you publish or distribute Opaque copies of the Document
+     numbering more than 100, you must either include a
+     machine-readable Transparent copy along with each Opaque copy, or
+     state in or with each Opaque copy a computer-network location from
+     which the general network-using public has access to download
+     using public-standard network protocols a complete Transparent
+     copy of the Document, free of added material.  If you use the
+     latter option, you must take reasonably prudent steps, when you
+     begin distribution of Opaque copies in quantity, to ensure that
+     this Transparent copy will remain thus accessible at the stated
+     location until at least one year after the last time you
+     distribute an Opaque copy (directly or through your agents or
+     retailers) of that edition to the public.
+
+     It is requested, but not required, that you contact the authors of
+     the Document well before redistributing any large number of
+     copies, to give them a chance to provide you with an updated
+     version of the Document.
+
+  4. MODIFICATIONS
+
+     You may copy and distribute a Modified Version of the Document
+     under the conditions of sections 2 and 3 above, provided that you
+     release the Modified Version under precisely this License, with
+     the Modified Version filling the role of the Document, thus
+     licensing distribution and modification of the Modified Version to
+     whoever possesses a copy of it.  In addition, you must do these
+     things in the Modified Version:
+
+       A. Use in the Title Page (and on the covers, if any) a title
+          distinct from that of the Document, and from those of
+          previous versions (which should, if there were any, be listed
+          in the History section of the Document).  You may use the
+          same title as a previous version if the original publisher of
+          that version gives permission.
+
+       B. List on the Title Page, as authors, one or more persons or
+          entities responsible for authorship of the modifications in
+          the Modified Version, together with at least five of the
+          principal authors of the Document (all of its principal
+          authors, if it has fewer than five), unless they release you
+          from this requirement.
+
+       C. State on the Title page the name of the publisher of the
+          Modified Version, as the publisher.
+
+       D. Preserve all the copyright notices of the Document.
+
+       E. Add an appropriate copyright notice for your modifications
+          adjacent to the other copyright notices.
+
+       F. Include, immediately after the copyright notices, a license
+          notice giving the public permission to use the Modified
+          Version under the terms of this License, in the form shown in
+          the Addendum below.
+
+       G. Preserve in that license notice the full lists of Invariant
+          Sections and required Cover Texts given in the Document's
+          license notice.
+
+       H. Include an unaltered copy of this License.
+
+       I. Preserve the section Entitled "History", Preserve its Title,
+          and add to it an item stating at least the title, year, new
+          authors, and publisher of the Modified Version as given on
+          the Title Page.  If there is no section Entitled "History" in
+          the Document, create one stating the title, year, authors,
+          and publisher of the Document as given on its Title Page,
+          then add an item describing the Modified Version as stated in
+          the previous sentence.
+
+       J. Preserve the network location, if any, given in the Document
+          for public access to a Transparent copy of the Document, and
+          likewise the network locations given in the Document for
+          previous versions it was based on.  These may be placed in
+          the "History" section.  You may omit a network location for a
+          work that was published at least four years before the
+          Document itself, or if the original publisher of the version
+          it refers to gives permission.
+
+       K. For any section Entitled "Acknowledgements" or "Dedications",
+          Preserve the Title of the section, and preserve in the
+          section all the substance and tone of each of the contributor
+          acknowledgements and/or dedications given therein.
+
+       L. Preserve all the Invariant Sections of the Document,
+          unaltered in their text and in their titles.  Section numbers
+          or the equivalent are not considered part of the section
+          titles.
+
+       M. Delete any section Entitled "Endorsements".  Such a section
+          may not be included in the Modified Version.
+
+       N. Do not retitle any existing section to be Entitled
+          "Endorsements" or to conflict in title with any Invariant
+          Section.
+
+       O. Preserve any Warranty Disclaimers.
+
+     If the Modified Version includes new front-matter sections or
+     appendices that qualify as Secondary Sections and contain no
+     material copied from the Document, you may at your option
+     designate some or all of these sections as invariant.  To do this,
+     add their titles to the list of Invariant Sections in the Modified
+     Version's license notice.  These titles must be distinct from any
+     other section titles.
+
+     You may add a section Entitled "Endorsements", provided it contains
+     nothing but endorsements of your Modified Version by various
+     parties--for example, statements of peer review or that the text
+     has been approved by an organization as the authoritative
+     definition of a standard.
+
+     You may add a passage of up to five words as a Front-Cover Text,
+     and a passage of up to 25 words as a Back-Cover Text, to the end
+     of the list of Cover Texts in the Modified Version.  Only one
+     passage of Front-Cover Text and one of Back-Cover Text may be
+     added by (or through arrangements made by) any one entity.  If the
+     Document already includes a cover text for the same cover,
+     previously added by you or by arrangement made by the same entity
+     you are acting on behalf of, you may not add another; but you may
+     replace the old one, on explicit permission from the previous
+     publisher that added the old one.
+
+     The author(s) and publisher(s) of the Document do not by this
+     License give permission to use their names for publicity for or to
+     assert or imply endorsement of any Modified Version.
+
+  5. COMBINING DOCUMENTS
+
+     You may combine the Document with other documents released under
+     this License, under the terms defined in section 4 above for
+     modified versions, provided that you include in the combination
+     all of the Invariant Sections of all of the original documents,
+     unmodified, and list them all as Invariant Sections of your
+     combined work in its license notice, and that you preserve all
+     their Warranty Disclaimers.
+
+     The combined work need only contain one copy of this License, and
+     multiple identical Invariant Sections may be replaced with a single
+     copy.  If there are multiple Invariant Sections with the same name
+     but different contents, make the title of each such section unique
+     by adding at the end of it, in parentheses, the name of the
+     original author or publisher of that section if known, or else a
+     unique number.  Make the same adjustment to the section titles in
+     the list of Invariant Sections in the license notice of the
+     combined work.
+
+     In the combination, you must combine any sections Entitled
+     "History" in the various original documents, forming one section
+     Entitled "History"; likewise combine any sections Entitled
+     "Acknowledgements", and any sections Entitled "Dedications".  You
+     must delete all sections Entitled "Endorsements."
+
+  6. COLLECTIONS OF DOCUMENTS
+
+     You may make a collection consisting of the Document and other
+     documents released under this License, and replace the individual
+     copies of this License in the various documents with a single copy
+     that is included in the collection, provided that you follow the
+     rules of this License for verbatim copying of each of the
+     documents in all other respects.
+
+     You may extract a single document from such a collection, and
+     distribute it individually under this License, provided you insert
+     a copy of this License into the extracted document, and follow
+     this License in all other respects regarding verbatim copying of
+     that document.
+
+  7. AGGREGATION WITH INDEPENDENT WORKS
+
+     A compilation of the Document or its derivatives with other
+     separate and independent documents or works, in or on a volume of
+     a storage or distribution medium, is called an "aggregate" if the
+     copyright resulting from the compilation is not used to limit the
+     legal rights of the compilation's users beyond what the individual
+     works permit.  When the Document is included in an aggregate, this
+     License does not apply to the other works in the aggregate which
+     are not themselves derivative works of the Document.
+
+     If the Cover Text requirement of section 3 is applicable to these
+     copies of the Document, then if the Document is less than one half
+     of the entire aggregate, the Document's Cover Texts may be placed
+     on covers that bracket the Document within the aggregate, or the
+     electronic equivalent of covers if the Document is in electronic
+     form.  Otherwise they must appear on printed covers that bracket
+     the whole aggregate.
+
+  8. TRANSLATION
+
+     Translation is considered a kind of modification, so you may
+     distribute translations of the Document under the terms of section
+     4.  Replacing Invariant Sections with translations requires special
+     permission from their copyright holders, but you may include
+     translations of some or all Invariant Sections in addition to the
+     original versions of these Invariant Sections.  You may include a
+     translation of this License, and all the license notices in the
+     Document, and any Warranty Disclaimers, provided that you also
+     include the original English version of this License and the
+     original versions of those notices and disclaimers.  In case of a
+     disagreement between the translation and the original version of
+     this License or a notice or disclaimer, the original version will
+     prevail.
+
+     If a section in the Document is Entitled "Acknowledgements",
+     "Dedications", or "History", the requirement (section 4) to
+     Preserve its Title (section 1) will typically require changing the
+     actual title.
+
+  9. TERMINATION
+
+     You may not copy, modify, sublicense, or distribute the Document
+     except as expressly provided under this License.  Any attempt
+     otherwise to copy, modify, sublicense, or distribute it is void,
+     and will automatically terminate your rights under this License.
+
+     However, if you cease all violation of this License, then your
+     license from a particular copyright holder is reinstated (a)
+     provisionally, unless and until the copyright holder explicitly
+     and finally terminates your license, and (b) permanently, if the
+     copyright holder fails to notify you of the violation by some
+     reasonable means prior to 60 days after the cessation.
+
+     Moreover, your license from a particular copyright holder is
+     reinstated permanently if the copyright holder notifies you of the
+     violation by some reasonable means, this is the first time you have
+     received notice of violation of this License (for any work) from
+     that copyright holder, and you cure the violation prior to 30 days
+     after your receipt of the notice.
+
+     Termination of your rights under this section does not terminate
+     the licenses of parties who have received copies or rights from
+     you under this License.  If your rights have been terminated and
+     not permanently reinstated, receipt of a copy of some or all of
+     the same material does not give you any rights to use it.
+
+ 10. FUTURE REVISIONS OF THIS LICENSE
+
+     The Free Software Foundation may publish new, revised versions of
+     the GNU Free Documentation License from time to time.  Such new
+     versions will be similar in spirit to the present version, but may
+     differ in detail to address new problems or concerns.  See
+     `http://www.gnu.org/copyleft/'.
+
+     Each version of the License is given a distinguishing version
+     number.  If the Document specifies that a particular numbered
+     version of this License "or any later version" applies to it, you
+     have the option of following the terms and conditions either of
+     that specified version or of any later version that has been
+     published (not as a draft) by the Free Software Foundation.  If
+     the Document does not specify a version number of this License,
+     you may choose any version ever published (not as a draft) by the
+     Free Software Foundation.  If the Document specifies that a proxy
+     can decide which future versions of this License can be used, that
+     proxy's public statement of acceptance of a version permanently
+     authorizes you to choose that version for the Document.
+
+ 11. RELICENSING
+
+     "Massive Multiauthor Collaboration Site" (or "MMC Site") means any
+     World Wide Web server that publishes copyrightable works and also
+     provides prominent facilities for anybody to edit those works.  A
+     public wiki that anybody can edit is an example of such a server.
+     A "Massive Multiauthor Collaboration" (or "MMC") contained in the
+     site means any set of copyrightable works thus published on the MMC
+     site.
+
+     "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0
+     license published by Creative Commons Corporation, a not-for-profit
+     corporation with a principal place of business in San Francisco,
+     California, as well as future copyleft versions of that license
+     published by that same organization.
+
+     "Incorporate" means to publish or republish a Document, in whole or
+     in part, as part of another Document.
+
+     An MMC is "eligible for relicensing" if it is licensed under this
+     License, and if all works that were first published under this
+     License somewhere other than this MMC, and subsequently
+     incorporated in whole or in part into the MMC, (1) had no cover
+     texts or invariant sections, and (2) were thus incorporated prior
+     to November 1, 2008.
+
+     The operator of an MMC Site may republish an MMC contained in the
+     site under CC-BY-SA on the same site at any time before August 1,
+     2009, provided the MMC is eligible for relicensing.
+
+
+ADDENDUM: How to use this License for your documents
+====================================================
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and license
+notices just after the title page:
+
+       Copyright (C)  YEAR  YOUR NAME.
+       Permission is granted to copy, distribute and/or modify this document
+       under the terms of the GNU Free Documentation License, Version 1.3
+       or any later version published by the Free Software Foundation;
+       with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
+       Texts.  A copy of the license is included in the section entitled ``GNU
+       Free Documentation License''.
+
+   If you have Invariant Sections, Front-Cover Texts and Back-Cover
+Texts, replace the "with...Texts." line with this:
+
+         with the Invariant Sections being LIST THEIR TITLES, with
+         the Front-Cover Texts being LIST, and with the Back-Cover Texts
+         being LIST.
+
+   If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.
+
+   If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License, to
+permit their use in free software.
+
+\1f
+File: gmp.info,  Node: Concept Index,  Next: Function Index,  Prev: GNU Free Documentation License,  Up: Top
+
+Concept Index
+*************
+
+\0\b[index\0\b]
+* Menu:
+
+* #include:                              Headers and Libraries.
+                                                              (line   6)
+* --build:                               Build Options.       (line  52)
+* --disable-fft:                         Build Options.       (line 317)
+* --disable-shared:                      Build Options.       (line  45)
+* --disable-static:                      Build Options.       (line  45)
+* --enable-alloca:                       Build Options.       (line 278)
+* --enable-assert:                       Build Options.       (line 328)
+* --enable-cxx:                          Build Options.       (line 230)
+* --enable-fat:                          Build Options.       (line 164)
+* --enable-mpbsd:                        Build Options.       (line 323)
+* --enable-profiling <1>:                Profiling.           (line   6)
+* --enable-profiling:                    Build Options.       (line 332)
+* --exec-prefix:                         Build Options.       (line  32)
+* --host:                                Build Options.       (line  66)
+* --prefix:                              Build Options.       (line  32)
+* -finstrument-functions:                Profiling.           (line  66)
+* 2exp functions:                        Efficiency.          (line  43)
+* 68000:                                 Notes for Particular Systems.
+                                                              (line  80)
+* 80x86:                                 Notes for Particular Systems.
+                                                              (line 126)
+* ABI <1>:                               Build Options.       (line 171)
+* ABI:                                   ABI and ISA.         (line   6)
+* About this manual:                     Introduction to GMP. (line  58)
+* AC_CHECK_LIB:                          Autoconf.            (line  11)
+* AIX <1>:                               ABI and ISA.         (line 169)
+* AIX:                                   Notes for Particular Systems.
+                                                              (line   7)
+* Algorithms:                            Algorithms.          (line   6)
+* alloca:                                Build Options.       (line 278)
+* Allocation of memory:                  Custom Allocation.   (line   6)
+* AMD64:                                 ABI and ISA.         (line  44)
+* Anonymous FTP of latest version:       Introduction to GMP. (line  38)
+* Application Binary Interface:          ABI and ISA.         (line   6)
+* Arithmetic functions <1>:              Float Arithmetic.    (line   6)
+* Arithmetic functions <2>:              Integer Arithmetic.  (line   6)
+* Arithmetic functions:                  Rational Arithmetic. (line   6)
+* ARM:                                   Notes for Particular Systems.
+                                                              (line  20)
+* Assembly cache handling:               Assembly Cache Handling.
+                                                              (line   6)
+* Assembly carry propagation:            Assembly Carry Propagation.
+                                                              (line   6)
+* Assembly code organisation:            Assembly Code Organisation.
+                                                              (line   6)
+* Assembly coding:                       Assembly Coding.     (line   6)
+* Assembly floating Point:               Assembly Floating Point.
+                                                              (line   6)
+* Assembly loop unrolling:               Assembly Loop Unrolling.
+                                                              (line   6)
+* Assembly SIMD:                         Assembly SIMD Instructions.
+                                                              (line   6)
+* Assembly software pipelining:          Assembly Software Pipelining.
+                                                              (line   6)
+* Assembly writing guide:                Assembly Writing Guide.
+                                                              (line   6)
+* Assertion checking <1>:                Debugging.           (line  79)
+* Assertion checking:                    Build Options.       (line 328)
+* Assignment functions <1>:              Assigning Integers.  (line   6)
+* Assignment functions <2>:              Simultaneous Float Init & Assign.
+                                                              (line   6)
+* Assignment functions <3>:              Assigning Floats.    (line   6)
+* Assignment functions <4>:              Initializing Rationals.
+                                                              (line   6)
+* Assignment functions:                  Simultaneous Integer Init & Assign.
+                                                              (line   6)
+* Autoconf:                              Autoconf.            (line   6)
+* Basics:                                GMP Basics.          (line   6)
+* Berkeley MP compatible functions <1>:  Build Options.       (line 323)
+* Berkeley MP compatible functions:      BSD Compatible Functions.
+                                                              (line   6)
+* Binomial coefficient algorithm:        Binomial Coefficients Algorithm.
+                                                              (line   6)
+* Binomial coefficient functions:        Number Theoretic Functions.
+                                                              (line 113)
+* Binutils strip:                        Known Build Problems.
+                                                              (line  28)
+* Bit manipulation functions:            Integer Logic and Bit Fiddling.
+                                                              (line   6)
+* Bit scanning functions:                Integer Logic and Bit Fiddling.
+                                                              (line  38)
+* Bit shift left:                        Integer Arithmetic.  (line  35)
+* Bit shift right:                       Integer Division.    (line  53)
+* Bits per limb:                         Useful Macros and Constants.
+                                                              (line   7)
+* BSD MP compatible functions <1>:       BSD Compatible Functions.
+                                                              (line   6)
+* BSD MP compatible functions:           Build Options.       (line 323)
+* Bug reporting:                         Reporting Bugs.      (line   6)
+* Build directory:                       Build Options.       (line  19)
+* Build notes for binary packaging:      Notes for Package Builds.
+                                                              (line   6)
+* Build notes for particular systems:    Notes for Particular Systems.
+                                                              (line   6)
+* Build options:                         Build Options.       (line   6)
+* Build problems known:                  Known Build Problems.
+                                                              (line   6)
+* Build system:                          Build Options.       (line  52)
+* Building GMP:                          Installing GMP.      (line   6)
+* Bus error:                             Debugging.           (line   7)
+* C compiler:                            Build Options.       (line 182)
+* C++ compiler:                          Build Options.       (line 254)
+* C++ interface:                         C++ Class Interface. (line   6)
+* C++ interface internals:               C++ Interface Internals.
+                                                              (line   6)
+* C++ istream input:                     C++ Formatted Input. (line   6)
+* C++ ostream output:                    C++ Formatted Output.
+                                                              (line   6)
+* C++ support:                           Build Options.       (line 230)
+* CC:                                    Build Options.       (line 182)
+* CC_FOR_BUILD:                          Build Options.       (line 217)
+* CFLAGS:                                Build Options.       (line 182)
+* Checker:                               Debugging.           (line 115)
+* checkergcc:                            Debugging.           (line 122)
+* Code organisation:                     Assembly Code Organisation.
+                                                              (line   6)
+* Compaq C++:                            Notes for Particular Systems.
+                                                              (line  25)
+* Comparison functions <1>:              Float Comparison.    (line   6)
+* Comparison functions <2>:              Integer Comparisons. (line   6)
+* Comparison functions:                  Comparing Rationals. (line   6)
+* Compatibility with older versions:     Compatibility with older versions.
+                                                              (line   6)
+* Conditions for copying GNU MP:         Copying.             (line   6)
+* Configuring GMP:                       Installing GMP.      (line   6)
+* Congruence algorithm:                  Exact Remainder.     (line  29)
+* Congruence functions:                  Integer Division.    (line 124)
+* Constants:                             Useful Macros and Constants.
+                                                              (line   6)
+* Contributors:                          Contributors.        (line   6)
+* Conventions for parameters:            Parameter Conventions.
+                                                              (line   6)
+* Conventions for variables:             Variable Conventions.
+                                                              (line   6)
+* Conversion functions <1>:              Rational Conversions.
+                                                              (line   6)
+* Conversion functions <2>:              Converting Integers. (line   6)
+* Conversion functions:                  Converting Floats.   (line   6)
+* Copying conditions:                    Copying.             (line   6)
+* CPPFLAGS:                              Build Options.       (line 208)
+* CPU types <1>:                         Introduction to GMP. (line  24)
+* CPU types:                             Build Options.       (line 108)
+* Cross compiling:                       Build Options.       (line  66)
+* Custom allocation:                     Custom Allocation.   (line   6)
+* CXX:                                   Build Options.       (line 254)
+* CXXFLAGS:                              Build Options.       (line 254)
+* Cygwin:                                Notes for Particular Systems.
+                                                              (line  43)
+* Darwin:                                Known Build Problems.
+                                                              (line  51)
+* Debugging:                             Debugging.           (line   6)
+* Demonstration programs:                Demonstration Programs.
+                                                              (line   6)
+* Digits in an integer:                  Miscellaneous Integer Functions.
+                                                              (line  23)
+* Divisibility algorithm:                Exact Remainder.     (line  29)
+* Divisibility functions:                Integer Division.    (line 112)
+* Divisibility testing:                  Efficiency.          (line  91)
+* Division algorithms:                   Division Algorithms. (line   6)
+* Division functions <1>:                Float Arithmetic.    (line  33)
+* Division functions <2>:                Rational Arithmetic. (line  22)
+* Division functions:                    Integer Division.    (line   6)
+* DJGPP <1>:                             Notes for Particular Systems.
+                                                              (line  43)
+* DJGPP:                                 Known Build Problems.
+                                                              (line  18)
+* DLLs:                                  Notes for Particular Systems.
+                                                              (line  56)
+* DocBook:                               Build Options.       (line 355)
+* Documentation formats:                 Build Options.       (line 348)
+* Documentation license:                 GNU Free Documentation License.
+                                                              (line   6)
+* DVI:                                   Build Options.       (line 351)
+* Efficiency:                            Efficiency.          (line   6)
+* Emacs:                                 Emacs.               (line   6)
+* Exact division functions:              Integer Division.    (line 102)
+* Exact remainder:                       Exact Remainder.     (line   6)
+* Example programs:                      Demonstration Programs.
+                                                              (line   6)
+* Exec prefix:                           Build Options.       (line  32)
+* Execution profiling <1>:               Profiling.           (line   6)
+* Execution profiling:                   Build Options.       (line 332)
+* Exponentiation functions <1>:          Integer Exponentiation.
+                                                              (line   6)
+* Exponentiation functions:              Float Arithmetic.    (line  41)
+* Export:                                Integer Import and Export.
+                                                              (line  45)
+* Expression parsing demo:               Demonstration Programs.
+                                                              (line  15)
+* Extended GCD:                          Number Theoretic Functions.
+                                                              (line  47)
+* Factor removal functions:              Number Theoretic Functions.
+                                                              (line 103)
+* Factorial algorithm:                   Factorial Algorithm. (line   6)
+* Factorial functions:                   Number Theoretic Functions.
+                                                              (line 108)
+* Factorization demo:                    Demonstration Programs.
+                                                              (line  25)
+* Fast Fourier Transform:                FFT Multiplication.  (line   6)
+* Fat binary:                            Build Options.       (line 164)
+* FFT multiplication <1>:                Build Options.       (line 317)
+* FFT multiplication:                    FFT Multiplication.  (line   6)
+* Fibonacci number algorithm:            Fibonacci Numbers Algorithm.
+                                                              (line   6)
+* Fibonacci sequence functions:          Number Theoretic Functions.
+                                                              (line 121)
+* Float arithmetic functions:            Float Arithmetic.    (line   6)
+* Float assignment functions <1>:        Simultaneous Float Init & Assign.
+                                                              (line   6)
+* Float assignment functions:            Assigning Floats.    (line   6)
+* Float comparison functions:            Float Comparison.    (line   6)
+* Float conversion functions:            Converting Floats.   (line   6)
+* Float functions:                       Floating-point Functions.
+                                                              (line   6)
+* Float initialization functions <1>:    Simultaneous Float Init & Assign.
+                                                              (line   6)
+* Float initialization functions:        Initializing Floats. (line   6)
+* Float input and output functions:      I/O of Floats.       (line   6)
+* Float internals:                       Float Internals.     (line   6)
+* Float miscellaneous functions:         Miscellaneous Float Functions.
+                                                              (line   6)
+* Float random number functions:         Miscellaneous Float Functions.
+                                                              (line  27)
+* Float rounding functions:              Miscellaneous Float Functions.
+                                                              (line   9)
+* Float sign tests:                      Float Comparison.    (line  33)
+* Floating point mode:                   Notes for Particular Systems.
+                                                              (line  34)
+* Floating-point functions:              Floating-point Functions.
+                                                              (line   6)
+* Floating-point number:                 Nomenclature and Types.
+                                                              (line  21)
+* fnccheck:                              Profiling.           (line  77)
+* Formatted input:                       Formatted Input.     (line   6)
+* Formatted output:                      Formatted Output.    (line   6)
+* Free Documentation License:            GNU Free Documentation License.
+                                                              (line   6)
+* frexp <1>:                             Converting Floats.   (line  23)
+* frexp:                                 Converting Integers. (line  42)
+* FTP of latest version:                 Introduction to GMP. (line  38)
+* Function classes:                      Function Classes.    (line   6)
+* FunctionCheck:                         Profiling.           (line  77)
+* GCC Checker:                           Debugging.           (line 115)
+* GCD algorithms:                        Greatest Common Divisor Algorithms.
+                                                              (line   6)
+* GCD extended:                          Number Theoretic Functions.
+                                                              (line  47)
+* GCD functions:                         Number Theoretic Functions.
+                                                              (line  30)
+* GDB:                                   Debugging.           (line  58)
+* Generic C:                             Build Options.       (line 153)
+* GMP Perl module:                       Demonstration Programs.
+                                                              (line  35)
+* GMP version number:                    Useful Macros and Constants.
+                                                              (line  12)
+* gmp.h:                                 Headers and Libraries.
+                                                              (line   6)
+* gmpxx.h:                               C++ Interface General.
+                                                              (line   8)
+* GNU Debugger:                          Debugging.           (line  58)
+* GNU Free Documentation License:        GNU Free Documentation License.
+                                                              (line   6)
+* GNU strip:                             Known Build Problems.
+                                                              (line  28)
+* gprof:                                 Profiling.           (line  41)
+* Greatest common divisor algorithms:    Greatest Common Divisor Algorithms.
+                                                              (line   6)
+* Greatest common divisor functions:     Number Theoretic Functions.
+                                                              (line  30)
+* Hardware floating point mode:          Notes for Particular Systems.
+                                                              (line  34)
+* Headers:                               Headers and Libraries.
+                                                              (line   6)
+* Heap problems:                         Debugging.           (line  24)
+* Home page:                             Introduction to GMP. (line  34)
+* Host system:                           Build Options.       (line  66)
+* HP-UX:                                 ABI and ISA.         (line 107)
+* HPPA:                                  ABI and ISA.         (line  68)
+* I/O functions <1>:                     I/O of Floats.       (line   6)
+* I/O functions <2>:                     I/O of Integers.     (line   6)
+* I/O functions:                         I/O of Rationals.    (line   6)
+* i386:                                  Notes for Particular Systems.
+                                                              (line 126)
+* IA-64:                                 ABI and ISA.         (line 107)
+* Import:                                Integer Import and Export.
+                                                              (line  11)
+* In-place operations:                   Efficiency.          (line  57)
+* Include files:                         Headers and Libraries.
+                                                              (line   6)
+* info-lookup-symbol:                    Emacs.               (line   6)
+* Initialization functions <1>:          Initializing Integers.
+                                                              (line   6)
+* Initialization functions <2>:          Random State Initialization.
+                                                              (line   6)
+* Initialization functions <3>:          Initializing Rationals.
+                                                              (line   6)
+* Initialization functions <4>:          Initializing Floats. (line   6)
+* Initialization functions <5>:          Simultaneous Float Init & Assign.
+                                                              (line   6)
+* Initialization functions:              Simultaneous Integer Init & Assign.
+                                                              (line   6)
+* Initializing and clearing:             Efficiency.          (line  21)
+* Input functions <1>:                   I/O of Floats.       (line   6)
+* Input functions <2>:                   I/O of Rationals.    (line   6)
+* Input functions <3>:                   I/O of Integers.     (line   6)
+* Input functions:                       Formatted Input Functions.
+                                                              (line   6)
+* Install prefix:                        Build Options.       (line  32)
+* Installing GMP:                        Installing GMP.      (line   6)
+* Instruction Set Architecture:          ABI and ISA.         (line   6)
+* instrument-functions:                  Profiling.           (line  66)
+* Integer:                               Nomenclature and Types.
+                                                              (line   6)
+* Integer arithmetic functions:          Integer Arithmetic.  (line   6)
+* Integer assignment functions <1>:      Assigning Integers.  (line   6)
+* Integer assignment functions:          Simultaneous Integer Init & Assign.
+                                                              (line   6)
+* Integer bit manipulation functions:    Integer Logic and Bit Fiddling.
+                                                              (line   6)
+* Integer comparison functions:          Integer Comparisons. (line   6)
+* Integer conversion functions:          Converting Integers. (line   6)
+* Integer division functions:            Integer Division.    (line   6)
+* Integer exponentiation functions:      Integer Exponentiation.
+                                                              (line   6)
+* Integer export:                        Integer Import and Export.
+                                                              (line  45)
+* Integer functions:                     Integer Functions.   (line   6)
+* Integer import:                        Integer Import and Export.
+                                                              (line  11)
+* Integer initialization functions <1>:  Initializing Integers.
+                                                              (line   6)
+* Integer initialization functions:      Simultaneous Integer Init & Assign.
+                                                              (line   6)
+* Integer input and output functions:    I/O of Integers.     (line   6)
+* Integer internals:                     Integer Internals.   (line   6)
+* Integer logical functions:             Integer Logic and Bit Fiddling.
+                                                              (line   6)
+* Integer miscellaneous functions:       Miscellaneous Integer Functions.
+                                                              (line   6)
+* Integer random number functions:       Integer Random Numbers.
+                                                              (line   6)
+* Integer root functions:                Integer Roots.       (line   6)
+* Integer sign tests:                    Integer Comparisons. (line  28)
+* Integer special functions:             Integer Special Functions.
+                                                              (line   6)
+* Interix:                               Notes for Particular Systems.
+                                                              (line  51)
+* Internals:                             Internals.           (line   6)
+* Introduction:                          Introduction to GMP. (line   6)
+* Inverse modulo functions:              Number Theoretic Functions.
+                                                              (line  72)
+* IRIX <1>:                              Known Build Problems.
+                                                              (line  38)
+* IRIX:                                  ABI and ISA.         (line 132)
+* ISA:                                   ABI and ISA.         (line   6)
+* istream input:                         C++ Formatted Input. (line   6)
+* Jacobi symbol algorithm:               Jacobi Symbol.       (line   6)
+* Jacobi symbol functions:               Number Theoretic Functions.
+                                                              (line  79)
+* Karatsuba multiplication:              Karatsuba Multiplication.
+                                                              (line   6)
+* Karatsuba square root algorithm:       Square Root Algorithm.
+                                                              (line   6)
+* Kronecker symbol functions:            Number Theoretic Functions.
+                                                              (line  91)
+* Language bindings:                     Language Bindings.   (line   6)
+* Latest version of GMP:                 Introduction to GMP. (line  38)
+* LCM functions:                         Number Theoretic Functions.
+                                                              (line  67)
+* Least common multiple functions:       Number Theoretic Functions.
+                                                              (line  67)
+* Legendre symbol functions:             Number Theoretic Functions.
+                                                              (line  82)
+* libgmp:                                Headers and Libraries.
+                                                              (line  22)
+* libgmpxx:                              Headers and Libraries.
+                                                              (line  27)
+* Libraries:                             Headers and Libraries.
+                                                              (line  22)
+* Libtool:                               Headers and Libraries.
+                                                              (line  33)
+* Libtool versioning:                    Notes for Package Builds.
+                                                              (line   9)
+* License conditions:                    Copying.             (line   6)
+* Limb:                                  Nomenclature and Types.
+                                                              (line  31)
+* Limb size:                             Useful Macros and Constants.
+                                                              (line   7)
+* Linear congruential algorithm:         Random Number Algorithms.
+                                                              (line  25)
+* Linear congruential random numbers:    Random State Initialization.
+                                                              (line  18)
+* Linking:                               Headers and Libraries.
+                                                              (line  22)
+* Logical functions:                     Integer Logic and Bit Fiddling.
+                                                              (line   6)
+* Low-level functions:                   Low-level Functions. (line   6)
+* Lucas number algorithm:                Lucas Numbers Algorithm.
+                                                              (line   6)
+* Lucas number functions:                Number Theoretic Functions.
+                                                              (line 132)
+* MacOS X:                               Known Build Problems.
+                                                              (line  51)
+* Mailing lists:                         Introduction to GMP. (line  45)
+* Malloc debugger:                       Debugging.           (line  30)
+* Malloc problems:                       Debugging.           (line  24)
+* Memory allocation:                     Custom Allocation.   (line   6)
+* Memory management:                     Memory Management.   (line   6)
+* Mersenne twister algorithm:            Random Number Algorithms.
+                                                              (line  17)
+* Mersenne twister random numbers:       Random State Initialization.
+                                                              (line  13)
+* MINGW:                                 Notes for Particular Systems.
+                                                              (line  43)
+* MIPS:                                  ABI and ISA.         (line 132)
+* Miscellaneous float functions:         Miscellaneous Float Functions.
+                                                              (line   6)
+* Miscellaneous integer functions:       Miscellaneous Integer Functions.
+                                                              (line   6)
+* MMX:                                   Notes for Particular Systems.
+                                                              (line 132)
+* Modular inverse functions:             Number Theoretic Functions.
+                                                              (line  72)
+* Most significant bit:                  Miscellaneous Integer Functions.
+                                                              (line  34)
+* mp.h:                                  BSD Compatible Functions.
+                                                              (line  21)
+* MPN_PATH:                              Build Options.       (line 336)
+* MS Windows:                            Notes for Particular Systems.
+                                                              (line  43)
+* MS-DOS:                                Notes for Particular Systems.
+                                                              (line  43)
+* Multi-threading:                       Reentrancy.          (line   6)
+* Multiplication algorithms:             Multiplication Algorithms.
+                                                              (line   6)
+* Nails:                                 Low-level Functions. (line 485)
+* Native compilation:                    Build Options.       (line  52)
+* NeXT:                                  Known Build Problems.
+                                                              (line  57)
+* Next prime function:                   Number Theoretic Functions.
+                                                              (line  23)
+* Nomenclature:                          Nomenclature and Types.
+                                                              (line   6)
+* Non-Unix systems:                      Build Options.       (line  11)
+* Nth root algorithm:                    Nth Root Algorithm.  (line   6)
+* Number sequences:                      Efficiency.          (line 147)
+* Number theoretic functions:            Number Theoretic Functions.
+                                                              (line   6)
+* Numerator and denominator:             Applying Integer Functions.
+                                                              (line   6)
+* obstack output:                        Formatted Output Functions.
+                                                              (line  81)
+* OpenBSD:                               Notes for Particular Systems.
+                                                              (line  86)
+* Optimizing performance:                Performance optimization.
+                                                              (line   6)
+* ostream output:                        C++ Formatted Output.
+                                                              (line   6)
+* Other languages:                       Language Bindings.   (line   6)
+* Output functions <1>:                  I/O of Integers.     (line   6)
+* Output functions <2>:                  I/O of Rationals.    (line   6)
+* Output functions <3>:                  Formatted Output Functions.
+                                                              (line   6)
+* Output functions:                      I/O of Floats.       (line   6)
+* Packaged builds:                       Notes for Package Builds.
+                                                              (line   6)
+* Parameter conventions:                 Parameter Conventions.
+                                                              (line   6)
+* Parsing expressions demo:              Demonstration Programs.
+                                                              (line  21)
+* Particular systems:                    Notes for Particular Systems.
+                                                              (line   6)
+* Past GMP versions:                     Compatibility with older versions.
+                                                              (line   6)
+* PDF:                                   Build Options.       (line 351)
+* Perfect power algorithm:               Perfect Power Algorithm.
+                                                              (line   6)
+* Perfect power functions:               Integer Roots.       (line  27)
+* Perfect square algorithm:              Perfect Square Algorithm.
+                                                              (line   6)
+* Perfect square functions:              Integer Roots.       (line  36)
+* perl:                                  Demonstration Programs.
+                                                              (line  35)
+* Perl module:                           Demonstration Programs.
+                                                              (line  35)
+* Postscript:                            Build Options.       (line 351)
+* Power/PowerPC <1>:                     Known Build Problems.
+                                                              (line  63)
+* Power/PowerPC:                         Notes for Particular Systems.
+                                                              (line  92)
+* Powering algorithms:                   Powering Algorithms. (line   6)
+* Powering functions <1>:                Float Arithmetic.    (line  41)
+* Powering functions:                    Integer Exponentiation.
+                                                              (line   6)
+* PowerPC:                               ABI and ISA.         (line 167)
+* Precision of floats:                   Floating-point Functions.
+                                                              (line   6)
+* Precision of hardware floating point:  Notes for Particular Systems.
+                                                              (line  34)
+* Prefix:                                Build Options.       (line  32)
+* Prime testing algorithms:              Prime Testing Algorithm.
+                                                              (line   6)
+* Prime testing functions:               Number Theoretic Functions.
+                                                              (line   7)
+* printf formatted output:               Formatted Output.    (line   6)
+* Probable prime testing functions:      Number Theoretic Functions.
+                                                              (line   7)
+* prof:                                  Profiling.           (line  24)
+* Profiling:                             Profiling.           (line   6)
+* Radix conversion algorithms:           Radix Conversion Algorithms.
+                                                              (line   6)
+* Random number algorithms:              Random Number Algorithms.
+                                                              (line   6)
+* Random number functions <1>:           Random Number Functions.
+                                                              (line   6)
+* Random number functions <2>:           Miscellaneous Float Functions.
+                                                              (line  27)
+* Random number functions:               Integer Random Numbers.
+                                                              (line   6)
+* Random number seeding:                 Random State Seeding.
+                                                              (line   6)
+* Random number state:                   Random State Initialization.
+                                                              (line   6)
+* Random state:                          Nomenclature and Types.
+                                                              (line  46)
+* Rational arithmetic:                   Efficiency.          (line 113)
+* Rational arithmetic functions:         Rational Arithmetic. (line   6)
+* Rational assignment functions:         Initializing Rationals.
+                                                              (line   6)
+* Rational comparison functions:         Comparing Rationals. (line   6)
+* Rational conversion functions:         Rational Conversions.
+                                                              (line   6)
+* Rational initialization functions:     Initializing Rationals.
+                                                              (line   6)
+* Rational input and output functions:   I/O of Rationals.    (line   6)
+* Rational internals:                    Rational Internals.  (line   6)
+* Rational number:                       Nomenclature and Types.
+                                                              (line  16)
+* Rational number functions:             Rational Number Functions.
+                                                              (line   6)
+* Rational numerator and denominator:    Applying Integer Functions.
+                                                              (line   6)
+* Rational sign tests:                   Comparing Rationals. (line  27)
+* Raw output internals:                  Raw Output Internals.
+                                                              (line   6)
+* Reallocations:                         Efficiency.          (line  30)
+* Reentrancy:                            Reentrancy.          (line   6)
+* References:                            References.          (line   6)
+* Remove factor functions:               Number Theoretic Functions.
+                                                              (line 103)
+* Reporting bugs:                        Reporting Bugs.      (line   6)
+* Root extraction algorithm:             Nth Root Algorithm.  (line   6)
+* Root extraction algorithms:            Root Extraction Algorithms.
+                                                              (line   6)
+* Root extraction functions <1>:         Float Arithmetic.    (line  37)
+* Root extraction functions:             Integer Roots.       (line   6)
+* Root testing functions:                Integer Roots.       (line  27)
+* Rounding functions:                    Miscellaneous Float Functions.
+                                                              (line   9)
+* Sample programs:                       Demonstration Programs.
+                                                              (line   6)
+* Scan bit functions:                    Integer Logic and Bit Fiddling.
+                                                              (line  38)
+* scanf formatted input:                 Formatted Input.     (line   6)
+* SCO:                                   Known Build Problems.
+                                                              (line  38)
+* Seeding random numbers:                Random State Seeding.
+                                                              (line   6)
+* Segmentation violation:                Debugging.           (line   7)
+* Sequent Symmetry:                      Known Build Problems.
+                                                              (line  68)
+* Services for Unix:                     Notes for Particular Systems.
+                                                              (line  51)
+* Shared library versioning:             Notes for Package Builds.
+                                                              (line   9)
+* Sign tests <1>:                        Integer Comparisons. (line  28)
+* Sign tests <2>:                        Comparing Rationals. (line  27)
+* Sign tests:                            Float Comparison.    (line  33)
+* Size in digits:                        Miscellaneous Integer Functions.
+                                                              (line  23)
+* Small operands:                        Efficiency.          (line   7)
+* Solaris <1>:                           Known Build Problems.
+                                                              (line  78)
+* Solaris:                               ABI and ISA.         (line 201)
+* Sparc:                                 Notes for Particular Systems.
+                                                              (line 103)
+* Sparc V9:                              ABI and ISA.         (line 201)
+* Special integer functions:             Integer Special Functions.
+                                                              (line   6)
+* Square root algorithm:                 Square Root Algorithm.
+                                                              (line   6)
+* SSE2:                                  Notes for Particular Systems.
+                                                              (line 132)
+* Stack backtrace:                       Debugging.           (line  50)
+* Stack overflow <1>:                    Build Options.       (line 278)
+* Stack overflow:                        Debugging.           (line   7)
+* Static linking:                        Efficiency.          (line  14)
+* stdarg.h:                              Headers and Libraries.
+                                                              (line  17)
+* stdio.h:                               Headers and Libraries.
+                                                              (line  11)
+* Stripped libraries:                    Known Build Problems.
+                                                              (line  28)
+* Sun:                                   ABI and ISA.         (line 201)
+* SunOS:                                 Notes for Particular Systems.
+                                                              (line 120)
+* Systems:                               Notes for Particular Systems.
+                                                              (line   6)
+* Temporary memory:                      Build Options.       (line 278)
+* Texinfo:                               Build Options.       (line 348)
+* Text input/output:                     Efficiency.          (line 153)
+* Thread safety:                         Reentrancy.          (line   6)
+* Toom multiplication <1>:               Other Multiplication.
+                                                              (line   6)
+* Toom multiplication <2>:               Toom 3-Way Multiplication.
+                                                              (line   6)
+* Toom multiplication <3>:               Toom 4-Way Multiplication.
+                                                              (line   6)
+* Toom multiplication:                   Higher degree Toom'n'half.
+                                                              (line   6)
+* Types:                                 Nomenclature and Types.
+                                                              (line   6)
+* ui and si functions:                   Efficiency.          (line  50)
+* Unbalanced multiplication:             Unbalanced Multiplication.
+                                                              (line   6)
+* Upward compatibility:                  Compatibility with older versions.
+                                                              (line   6)
+* Useful macros and constants:           Useful Macros and Constants.
+                                                              (line   6)
+* User-defined precision:                Floating-point Functions.
+                                                              (line   6)
+* Valgrind:                              Debugging.           (line 130)
+* Variable conventions:                  Variable Conventions.
+                                                              (line   6)
+* Version number:                        Useful Macros and Constants.
+                                                              (line  12)
+* Web page:                              Introduction to GMP. (line  34)
+* Windows:                               Notes for Particular Systems.
+                                                              (line  43)
+* x86:                                   Notes for Particular Systems.
+                                                              (line 126)
+* x87:                                   Notes for Particular Systems.
+                                                              (line  34)
+* XML:                                   Build Options.       (line 355)
+
+\1f
+File: gmp.info,  Node: Function Index,  Prev: Concept Index,  Up: Top
+
+Function and Type Index
+***********************
+
+\0\b[index\0\b]
+* Menu:
+
+* __GMP_CC:                              Useful Macros and Constants.
+                                                              (line  23)
+* __GMP_CFLAGS:                          Useful Macros and Constants.
+                                                              (line  24)
+* __GNU_MP_VERSION:                      Useful Macros and Constants.
+                                                              (line  10)
+* __GNU_MP_VERSION_MINOR:                Useful Macros and Constants.
+                                                              (line  11)
+* __GNU_MP_VERSION_PATCHLEVEL:           Useful Macros and Constants.
+                                                              (line  12)
+* _mpz_realloc:                          Integer Special Functions.
+                                                              (line  51)
+* abs <1>:                               C++ Interface Floats.
+                                                              (line  79)
+* abs <2>:                               C++ Interface Rationals.
+                                                              (line  43)
+* abs:                                   C++ Interface Integers.
+                                                              (line  42)
+* ceil:                                  C++ Interface Floats.
+                                                              (line  80)
+* cmp <1>:                               C++ Interface Floats.
+                                                              (line  81)
+* cmp <2>:                               C++ Interface Integers.
+                                                              (line  43)
+* cmp <3>:                               C++ Interface Floats.
+                                                              (line  82)
+* cmp <4>:                               C++ Interface Rationals.
+                                                              (line  45)
+* cmp:                                   C++ Interface Integers.
+                                                              (line  44)
+* floor:                                 C++ Interface Floats.
+                                                              (line  89)
+* gcd:                                   BSD Compatible Functions.
+                                                              (line  82)
+* gmp_asprintf:                          Formatted Output Functions.
+                                                              (line  65)
+* gmp_errno:                             Random State Initialization.
+                                                              (line  55)
+* GMP_ERROR_INVALID_ARGUMENT:            Random State Initialization.
+                                                              (line  55)
+* GMP_ERROR_UNSUPPORTED_ARGUMENT:        Random State Initialization.
+                                                              (line  55)
+* gmp_fprintf:                           Formatted Output Functions.
+                                                              (line  29)
+* gmp_fscanf:                            Formatted Input Functions.
+                                                              (line  25)
+* GMP_LIMB_BITS:                         Low-level Functions. (line 515)
+* GMP_NAIL_BITS:                         Low-level Functions. (line 513)
+* GMP_NAIL_MASK:                         Low-level Functions. (line 523)
+* GMP_NUMB_BITS:                         Low-level Functions. (line 514)
+* GMP_NUMB_MASK:                         Low-level Functions. (line 524)
+* GMP_NUMB_MAX:                          Low-level Functions. (line 532)
+* gmp_obstack_printf:                    Formatted Output Functions.
+                                                              (line  79)
+* gmp_obstack_vprintf:                   Formatted Output Functions.
+                                                              (line  81)
+* gmp_printf:                            Formatted Output Functions.
+                                                              (line  24)
+* GMP_RAND_ALG_DEFAULT:                  Random State Initialization.
+                                                              (line  49)
+* GMP_RAND_ALG_LC:                       Random State Initialization.
+                                                              (line  49)
+* gmp_randclass:                         C++ Interface Random Numbers.
+                                                              (line   7)
+* gmp_randclass::get_f:                  C++ Interface Random Numbers.
+                                                              (line  45)
+* gmp_randclass::get_z_bits:             C++ Interface Random Numbers.
+                                                              (line  38)
+* gmp_randclass::get_z_range:            C++ Interface Random Numbers.
+                                                              (line  42)
+* gmp_randclass::gmp_randclass:          C++ Interface Random Numbers.
+                                                              (line  27)
+* gmp_randclass::seed:                   C++ Interface Random Numbers.
+                                                              (line  34)
+* gmp_randclear:                         Random State Initialization.
+                                                              (line  62)
+* gmp_randinit:                          Random State Initialization.
+                                                              (line  47)
+* gmp_randinit_default:                  Random State Initialization.
+                                                              (line   7)
+* gmp_randinit_lc_2exp:                  Random State Initialization.
+                                                              (line  18)
+* gmp_randinit_lc_2exp_size:             Random State Initialization.
+                                                              (line  32)
+* gmp_randinit_mt:                       Random State Initialization.
+                                                              (line  13)
+* gmp_randinit_set:                      Random State Initialization.
+                                                              (line  43)
+* gmp_randseed:                          Random State Seeding.
+                                                              (line   7)
+* gmp_randseed_ui:                       Random State Seeding.
+                                                              (line   9)
+* gmp_randstate_t:                       Nomenclature and Types.
+                                                              (line  46)
+* gmp_scanf:                             Formatted Input Functions.
+                                                              (line  21)
+* gmp_snprintf:                          Formatted Output Functions.
+                                                              (line  46)
+* gmp_sprintf:                           Formatted Output Functions.
+                                                              (line  34)
+* gmp_sscanf:                            Formatted Input Functions.
+                                                              (line  29)
+* gmp_urandomb_ui:                       Random State Miscellaneous.
+                                                              (line   8)
+* gmp_urandomm_ui:                       Random State Miscellaneous.
+                                                              (line  14)
+* gmp_vasprintf:                         Formatted Output Functions.
+                                                              (line  66)
+* gmp_version:                           Useful Macros and Constants.
+                                                              (line  18)
+* gmp_vfprintf:                          Formatted Output Functions.
+                                                              (line  30)
+* gmp_vfscanf:                           Formatted Input Functions.
+                                                              (line  26)
+* gmp_vprintf:                           Formatted Output Functions.
+                                                              (line  25)
+* gmp_vscanf:                            Formatted Input Functions.
+                                                              (line  22)
+* gmp_vsnprintf:                         Formatted Output Functions.
+                                                              (line  48)
+* gmp_vsprintf:                          Formatted Output Functions.
+                                                              (line  35)
+* gmp_vsscanf:                           Formatted Input Functions.
+                                                              (line  31)
+* hypot:                                 C++ Interface Floats.
+                                                              (line  90)
+* itom:                                  BSD Compatible Functions.
+                                                              (line  29)
+* madd:                                  BSD Compatible Functions.
+                                                              (line  43)
+* mcmp:                                  BSD Compatible Functions.
+                                                              (line  85)
+* mdiv:                                  BSD Compatible Functions.
+                                                              (line  53)
+* mfree:                                 BSD Compatible Functions.
+                                                              (line 105)
+* min:                                   BSD Compatible Functions.
+                                                              (line  89)
+* MINT:                                  BSD Compatible Functions.
+                                                              (line  21)
+* mout:                                  BSD Compatible Functions.
+                                                              (line  94)
+* move:                                  BSD Compatible Functions.
+                                                              (line  39)
+* mp_bitcnt_t:                           Nomenclature and Types.
+                                                              (line  42)
+* mp_bits_per_limb:                      Useful Macros and Constants.
+                                                              (line   7)
+* mp_exp_t:                              Nomenclature and Types.
+                                                              (line  27)
+* mp_get_memory_functions:               Custom Allocation.   (line  93)
+* mp_limb_t:                             Nomenclature and Types.
+                                                              (line  31)
+* mp_set_memory_functions:               Custom Allocation.   (line  21)
+* mp_size_t:                             Nomenclature and Types.
+                                                              (line  37)
+* mpf_abs:                               Float Arithmetic.    (line  47)
+* mpf_add:                               Float Arithmetic.    (line   7)
+* mpf_add_ui:                            Float Arithmetic.    (line   9)
+* mpf_ceil:                              Miscellaneous Float Functions.
+                                                              (line   7)
+* mpf_class:                             C++ Interface General.
+                                                              (line  20)
+* mpf_class::fits_sint_p:                C++ Interface Floats.
+                                                              (line  83)
+* mpf_class::fits_slong_p:               C++ Interface Floats.
+                                                              (line  84)
+* mpf_class::fits_sshort_p:              C++ Interface Floats.
+                                                              (line  85)
+* mpf_class::fits_uint_p:                C++ Interface Floats.
+                                                              (line  86)
+* mpf_class::fits_ulong_p:               C++ Interface Floats.
+                                                              (line  87)
+* mpf_class::fits_ushort_p:              C++ Interface Floats.
+                                                              (line  88)
+* mpf_class::get_d:                      C++ Interface Floats.
+                                                              (line  91)
+* mpf_class::get_mpf_t:                  C++ Interface General.
+                                                              (line  66)
+* mpf_class::get_prec:                   C++ Interface Floats.
+                                                              (line 109)
+* mpf_class::get_si:                     C++ Interface Floats.
+                                                              (line  92)
+* mpf_class::get_str:                    C++ Interface Floats.
+                                                              (line  94)
+* mpf_class::get_ui:                     C++ Interface Floats.
+                                                              (line  95)
+* mpf_class::mpf_class:                  C++ Interface Floats.
+                                                              (line  12)
+* mpf_class::operator=:                  C++ Interface Floats.
+                                                              (line  56)
+* mpf_class::set_prec:                   C++ Interface Floats.
+                                                              (line 110)
+* mpf_class::set_prec_raw:               C++ Interface Floats.
+                                                              (line 111)
+* mpf_class::set_str:                    C++ Interface Floats.
+                                                              (line  97)
+* mpf_clear:                             Initializing Floats. (line  37)
+* mpf_clears:                            Initializing Floats. (line  41)
+* mpf_cmp:                               Float Comparison.    (line   7)
+* mpf_cmp_d:                             Float Comparison.    (line   8)
+* mpf_cmp_si:                            Float Comparison.    (line  10)
+* mpf_cmp_ui:                            Float Comparison.    (line   9)
+* mpf_div:                               Float Arithmetic.    (line  29)
+* mpf_div_2exp:                          Float Arithmetic.    (line  53)
+* mpf_div_ui:                            Float Arithmetic.    (line  33)
+* mpf_eq:                                Float Comparison.    (line  17)
+* mpf_fits_sint_p:                       Miscellaneous Float Functions.
+                                                              (line  20)
+* mpf_fits_slong_p:                      Miscellaneous Float Functions.
+                                                              (line  18)
+* mpf_fits_sshort_p:                     Miscellaneous Float Functions.
+                                                              (line  22)
+* mpf_fits_uint_p:                       Miscellaneous Float Functions.
+                                                              (line  19)
+* mpf_fits_ulong_p:                      Miscellaneous Float Functions.
+                                                              (line  17)
+* mpf_fits_ushort_p:                     Miscellaneous Float Functions.
+                                                              (line  21)
+* mpf_floor:                             Miscellaneous Float Functions.
+                                                              (line   8)
+* mpf_get_d:                             Converting Floats.   (line   7)
+* mpf_get_d_2exp:                        Converting Floats.   (line  16)
+* mpf_get_default_prec:                  Initializing Floats. (line  12)
+* mpf_get_prec:                          Initializing Floats. (line  62)
+* mpf_get_si:                            Converting Floats.   (line  27)
+* mpf_get_str:                           Converting Floats.   (line  37)
+* mpf_get_ui:                            Converting Floats.   (line  28)
+* mpf_init:                              Initializing Floats. (line  19)
+* mpf_init2:                             Initializing Floats. (line  26)
+* mpf_init_set:                          Simultaneous Float Init & Assign.
+                                                              (line  16)
+* mpf_init_set_d:                        Simultaneous Float Init & Assign.
+                                                              (line  19)
+* mpf_init_set_si:                       Simultaneous Float Init & Assign.
+                                                              (line  18)
+* mpf_init_set_str:                      Simultaneous Float Init & Assign.
+                                                              (line  25)
+* mpf_init_set_ui:                       Simultaneous Float Init & Assign.
+                                                              (line  17)
+* mpf_inits:                             Initializing Floats. (line  31)
+* mpf_inp_str:                           I/O of Floats.       (line  39)
+* mpf_integer_p:                         Miscellaneous Float Functions.
+                                                              (line  14)
+* mpf_mul:                               Float Arithmetic.    (line  19)
+* mpf_mul_2exp:                          Float Arithmetic.    (line  50)
+* mpf_mul_ui:                            Float Arithmetic.    (line  21)
+* mpf_neg:                               Float Arithmetic.    (line  44)
+* mpf_out_str:                           I/O of Floats.       (line  19)
+* mpf_pow_ui:                            Float Arithmetic.    (line  41)
+* mpf_random2:                           Miscellaneous Float Functions.
+                                                              (line  37)
+* mpf_reldiff:                           Float Comparison.    (line  29)
+* mpf_set:                               Assigning Floats.    (line  10)
+* mpf_set_d:                             Assigning Floats.    (line  13)
+* mpf_set_default_prec:                  Initializing Floats. (line   7)
+* mpf_set_prec:                          Initializing Floats. (line  65)
+* mpf_set_prec_raw:                      Initializing Floats. (line  72)
+* mpf_set_q:                             Assigning Floats.    (line  15)
+* mpf_set_si:                            Assigning Floats.    (line  12)
+* mpf_set_str:                           Assigning Floats.    (line  18)
+* mpf_set_ui:                            Assigning Floats.    (line  11)
+* mpf_set_z:                             Assigning Floats.    (line  14)
+* mpf_sgn:                               Float Comparison.    (line  33)
+* mpf_sqrt:                              Float Arithmetic.    (line  36)
+* mpf_sqrt_ui:                           Float Arithmetic.    (line  37)
+* mpf_sub:                               Float Arithmetic.    (line  12)
+* mpf_sub_ui:                            Float Arithmetic.    (line  16)
+* mpf_swap:                              Assigning Floats.    (line  52)
+* mpf_t:                                 Nomenclature and Types.
+                                                              (line  21)
+* mpf_trunc:                             Miscellaneous Float Functions.
+                                                              (line   9)
+* mpf_ui_div:                            Float Arithmetic.    (line  31)
+* mpf_ui_sub:                            Float Arithmetic.    (line  14)
+* mpf_urandomb:                          Miscellaneous Float Functions.
+                                                              (line  27)
+* mpn_add:                               Low-level Functions. (line  69)
+* mpn_add_1:                             Low-level Functions. (line  64)
+* mpn_add_n:                             Low-level Functions. (line  54)
+* mpn_addmul_1:                          Low-level Functions. (line 148)
+* mpn_and_n:                             Low-level Functions. (line 427)
+* mpn_andn_n:                            Low-level Functions. (line 442)
+* mpn_cmp:                               Low-level Functions. (line 284)
+* mpn_com:                               Low-level Functions. (line 467)
+* mpn_copyd:                             Low-level Functions. (line 476)
+* mpn_copyi:                             Low-level Functions. (line 472)
+* mpn_divexact_by3:                      Low-level Functions. (line 229)
+* mpn_divexact_by3c:                     Low-level Functions. (line 231)
+* mpn_divmod:                            Low-level Functions. (line 224)
+* mpn_divmod_1:                          Low-level Functions. (line 208)
+* mpn_divrem:                            Low-level Functions. (line 182)
+* mpn_divrem_1:                          Low-level Functions. (line 206)
+* mpn_gcd:                               Low-level Functions. (line 289)
+* mpn_gcd_1:                             Low-level Functions. (line 299)
+* mpn_gcdext:                            Low-level Functions. (line 305)
+* mpn_get_str:                           Low-level Functions. (line 352)
+* mpn_hamdist:                           Low-level Functions. (line 416)
+* mpn_ior_n:                             Low-level Functions. (line 432)
+* mpn_iorn_n:                            Low-level Functions. (line 447)
+* mpn_lshift:                            Low-level Functions. (line 260)
+* mpn_mod_1:                             Low-level Functions. (line 255)
+* mpn_mul:                               Low-level Functions. (line 114)
+* mpn_mul_1:                             Low-level Functions. (line 133)
+* mpn_mul_n:                             Low-level Functions. (line 103)
+* mpn_nand_n:                            Low-level Functions. (line 452)
+* mpn_neg:                               Low-level Functions. (line  98)
+* mpn_nior_n:                            Low-level Functions. (line 457)
+* mpn_perfect_square_p:                  Low-level Functions. (line 422)
+* mpn_popcount:                          Low-level Functions. (line 412)
+* mpn_random:                            Low-level Functions. (line 401)
+* mpn_random2:                           Low-level Functions. (line 402)
+* mpn_rshift:                            Low-level Functions. (line 272)
+* mpn_scan0:                             Low-level Functions. (line 386)
+* mpn_scan1:                             Low-level Functions. (line 394)
+* mpn_set_str:                           Low-level Functions. (line 367)
+* mpn_sqr:                               Low-level Functions. (line 125)
+* mpn_sqrtrem:                           Low-level Functions. (line 334)
+* mpn_sub:                               Low-level Functions. (line  90)
+* mpn_sub_1:                             Low-level Functions. (line  85)
+* mpn_sub_n:                             Low-level Functions. (line  76)
+* mpn_submul_1:                          Low-level Functions. (line 159)
+* mpn_tdiv_qr:                           Low-level Functions. (line 171)
+* mpn_xnor_n:                            Low-level Functions. (line 462)
+* mpn_xor_n:                             Low-level Functions. (line 437)
+* mpn_zero:                              Low-level Functions. (line 479)
+* mpq_abs:                               Rational Arithmetic. (line  31)
+* mpq_add:                               Rational Arithmetic. (line   7)
+* mpq_canonicalize:                      Rational Number Functions.
+                                                              (line  22)
+* mpq_class:                             C++ Interface General.
+                                                              (line  19)
+* mpq_class::canonicalize:               C++ Interface Rationals.
+                                                              (line  37)
+* mpq_class::get_d:                      C++ Interface Rationals.
+                                                              (line  46)
+* mpq_class::get_den:                    C++ Interface Rationals.
+                                                              (line  58)
+* mpq_class::get_den_mpz_t:              C++ Interface Rationals.
+                                                              (line  68)
+* mpq_class::get_mpq_t:                  C++ Interface General.
+                                                              (line  65)
+* mpq_class::get_num:                    C++ Interface Rationals.
+                                                              (line  57)
+* mpq_class::get_num_mpz_t:              C++ Interface Rationals.
+                                                              (line  67)
+* mpq_class::get_str:                    C++ Interface Rationals.
+                                                              (line  47)
+* mpq_class::mpq_class:                  C++ Interface Rationals.
+                                                              (line  30)
+* mpq_class::set_str:                    C++ Interface Rationals.
+                                                              (line  48)
+* mpq_clear:                             Initializing Rationals.
+                                                              (line  16)
+* mpq_clears:                            Initializing Rationals.
+                                                              (line  20)
+* mpq_cmp:                               Comparing Rationals. (line   7)
+* mpq_cmp_si:                            Comparing Rationals. (line  17)
+* mpq_cmp_ui:                            Comparing Rationals. (line  15)
+* mpq_denref:                            Applying Integer Functions.
+                                                              (line  18)
+* mpq_div:                               Rational Arithmetic. (line  22)
+* mpq_div_2exp:                          Rational Arithmetic. (line  25)
+* mpq_equal:                             Comparing Rationals. (line  33)
+* mpq_get_d:                             Rational Conversions.
+                                                              (line   7)
+* mpq_get_den:                           Applying Integer Functions.
+                                                              (line  24)
+* mpq_get_num:                           Applying Integer Functions.
+                                                              (line  23)
+* mpq_get_str:                           Rational Conversions.
+                                                              (line  22)
+* mpq_init:                              Initializing Rationals.
+                                                              (line   7)
+* mpq_inits:                             Initializing Rationals.
+                                                              (line  12)
+* mpq_inp_str:                           I/O of Rationals.    (line  26)
+* mpq_inv:                               Rational Arithmetic. (line  34)
+* mpq_mul:                               Rational Arithmetic. (line  15)
+* mpq_mul_2exp:                          Rational Arithmetic. (line  18)
+* mpq_neg:                               Rational Arithmetic. (line  28)
+* mpq_numref:                            Applying Integer Functions.
+                                                              (line  17)
+* mpq_out_str:                           I/O of Rationals.    (line  18)
+* mpq_set:                               Initializing Rationals.
+                                                              (line  24)
+* mpq_set_d:                             Rational Conversions.
+                                                              (line  17)
+* mpq_set_den:                           Applying Integer Functions.
+                                                              (line  26)
+* mpq_set_f:                             Rational Conversions.
+                                                              (line  18)
+* mpq_set_num:                           Applying Integer Functions.
+                                                              (line  25)
+* mpq_set_si:                            Initializing Rationals.
+                                                              (line  31)
+* mpq_set_str:                           Initializing Rationals.
+                                                              (line  36)
+* mpq_set_ui:                            Initializing Rationals.
+                                                              (line  29)
+* mpq_set_z:                             Initializing Rationals.
+                                                              (line  25)
+* mpq_sgn:                               Comparing Rationals. (line  27)
+* mpq_sub:                               Rational Arithmetic. (line  11)
+* mpq_swap:                              Initializing Rationals.
+                                                              (line  56)
+* mpq_t:                                 Nomenclature and Types.
+                                                              (line  16)
+* mpz_abs:                               Integer Arithmetic.  (line  42)
+* mpz_add:                               Integer Arithmetic.  (line   7)
+* mpz_add_ui:                            Integer Arithmetic.  (line   9)
+* mpz_addmul:                            Integer Arithmetic.  (line  25)
+* mpz_addmul_ui:                         Integer Arithmetic.  (line  27)
+* mpz_and:                               Integer Logic and Bit Fiddling.
+                                                              (line  11)
+* mpz_array_init:                        Integer Special Functions.
+                                                              (line  11)
+* mpz_bin_ui:                            Number Theoretic Functions.
+                                                              (line 111)
+* mpz_bin_uiui:                          Number Theoretic Functions.
+                                                              (line 113)
+* mpz_cdiv_q:                            Integer Division.    (line  13)
+* mpz_cdiv_q_2exp:                       Integer Division.    (line  24)
+* mpz_cdiv_q_ui:                         Integer Division.    (line  17)
+* mpz_cdiv_qr:                           Integer Division.    (line  15)
+* mpz_cdiv_qr_ui:                        Integer Division.    (line  21)
+* mpz_cdiv_r:                            Integer Division.    (line  14)
+* mpz_cdiv_r_2exp:                       Integer Division.    (line  25)
+* mpz_cdiv_r_ui:                         Integer Division.    (line  19)
+* mpz_cdiv_ui:                           Integer Division.    (line  23)
+* mpz_class:                             C++ Interface General.
+                                                              (line  18)
+* mpz_class::fits_sint_p:                C++ Interface Integers.
+                                                              (line  45)
+* mpz_class::fits_slong_p:               C++ Interface Integers.
+                                                              (line  46)
+* mpz_class::fits_sshort_p:              C++ Interface Integers.
+                                                              (line  47)
+* mpz_class::fits_uint_p:                C++ Interface Integers.
+                                                              (line  48)
+* mpz_class::fits_ulong_p:               C++ Interface Integers.
+                                                              (line  49)
+* mpz_class::fits_ushort_p:              C++ Interface Integers.
+                                                              (line  50)
+* mpz_class::get_d:                      C++ Interface Integers.
+                                                              (line  51)
+* mpz_class::get_mpz_t:                  C++ Interface General.
+                                                              (line  64)
+* mpz_class::get_si:                     C++ Interface Integers.
+                                                              (line  52)
+* mpz_class::get_str:                    C++ Interface Integers.
+                                                              (line  53)
+* mpz_class::get_ui:                     C++ Interface Integers.
+                                                              (line  54)
+* mpz_class::mpz_class:                  C++ Interface Integers.
+                                                              (line  20)
+* mpz_class::set_str:                    C++ Interface Integers.
+                                                              (line  55)
+* mpz_clear:                             Initializing Integers.
+                                                              (line  44)
+* mpz_clears:                            Initializing Integers.
+                                                              (line  48)
+* mpz_clrbit:                            Integer Logic and Bit Fiddling.
+                                                              (line  54)
+* mpz_cmp:                               Integer Comparisons. (line   7)
+* mpz_cmp_d:                             Integer Comparisons. (line   8)
+* mpz_cmp_si:                            Integer Comparisons. (line   9)
+* mpz_cmp_ui:                            Integer Comparisons. (line  10)
+* mpz_cmpabs:                            Integer Comparisons. (line  18)
+* mpz_cmpabs_d:                          Integer Comparisons. (line  19)
+* mpz_cmpabs_ui:                         Integer Comparisons. (line  20)
+* mpz_com:                               Integer Logic and Bit Fiddling.
+                                                              (line  20)
+* mpz_combit:                            Integer Logic and Bit Fiddling.
+                                                              (line  57)
+* mpz_congruent_2exp_p:                  Integer Division.    (line 124)
+* mpz_congruent_p:                       Integer Division.    (line 121)
+* mpz_congruent_ui_p:                    Integer Division.    (line 123)
+* mpz_divexact:                          Integer Division.    (line 101)
+* mpz_divexact_ui:                       Integer Division.    (line 102)
+* mpz_divisible_2exp_p:                  Integer Division.    (line 112)
+* mpz_divisible_p:                       Integer Division.    (line 110)
+* mpz_divisible_ui_p:                    Integer Division.    (line 111)
+* mpz_even_p:                            Miscellaneous Integer Functions.
+                                                              (line  18)
+* mpz_export:                            Integer Import and Export.
+                                                              (line  45)
+* mpz_fac_ui:                            Number Theoretic Functions.
+                                                              (line 108)
+* mpz_fdiv_q:                            Integer Division.    (line  27)
+* mpz_fdiv_q_2exp:                       Integer Division.    (line  38)
+* mpz_fdiv_q_ui:                         Integer Division.    (line  31)
+* mpz_fdiv_qr:                           Integer Division.    (line  29)
+* mpz_fdiv_qr_ui:                        Integer Division.    (line  35)
+* mpz_fdiv_r:                            Integer Division.    (line  28)
+* mpz_fdiv_r_2exp:                       Integer Division.    (line  39)
+* mpz_fdiv_r_ui:                         Integer Division.    (line  33)
+* mpz_fdiv_ui:                           Integer Division.    (line  37)
+* mpz_fib2_ui:                           Number Theoretic Functions.
+                                                              (line 121)
+* mpz_fib_ui:                            Number Theoretic Functions.
+                                                              (line 119)
+* mpz_fits_sint_p:                       Miscellaneous Integer Functions.
+                                                              (line  10)
+* mpz_fits_slong_p:                      Miscellaneous Integer Functions.
+                                                              (line   8)
+* mpz_fits_sshort_p:                     Miscellaneous Integer Functions.
+                                                              (line  12)
+* mpz_fits_uint_p:                       Miscellaneous Integer Functions.
+                                                              (line   9)
+* mpz_fits_ulong_p:                      Miscellaneous Integer Functions.
+                                                              (line   7)
+* mpz_fits_ushort_p:                     Miscellaneous Integer Functions.
+                                                              (line  11)
+* mpz_gcd:                               Number Theoretic Functions.
+                                                              (line  30)
+* mpz_gcd_ui:                            Number Theoretic Functions.
+                                                              (line  37)
+* mpz_gcdext:                            Number Theoretic Functions.
+                                                              (line  47)
+* mpz_get_d:                             Converting Integers. (line  27)
+* mpz_get_d_2exp:                        Converting Integers. (line  35)
+* mpz_get_si:                            Converting Integers. (line  18)
+* mpz_get_str:                           Converting Integers. (line  46)
+* mpz_get_ui:                            Converting Integers. (line  11)
+* mpz_getlimbn:                          Integer Special Functions.
+                                                              (line  60)
+* mpz_hamdist:                           Integer Logic and Bit Fiddling.
+                                                              (line  29)
+* mpz_import:                            Integer Import and Export.
+                                                              (line  11)
+* mpz_init:                              Initializing Integers.
+                                                              (line  26)
+* mpz_init2:                             Initializing Integers.
+                                                              (line  33)
+* mpz_init_set:                          Simultaneous Integer Init & Assign.
+                                                              (line  27)
+* mpz_init_set_d:                        Simultaneous Integer Init & Assign.
+                                                              (line  30)
+* mpz_init_set_si:                       Simultaneous Integer Init & Assign.
+                                                              (line  29)
+* mpz_init_set_str:                      Simultaneous Integer Init & Assign.
+                                                              (line  34)
+* mpz_init_set_ui:                       Simultaneous Integer Init & Assign.
+                                                              (line  28)
+* mpz_inits:                             Initializing Integers.
+                                                              (line  29)
+* mpz_inp_raw:                           I/O of Integers.     (line  61)
+* mpz_inp_str:                           I/O of Integers.     (line  30)
+* mpz_invert:                            Number Theoretic Functions.
+                                                              (line  72)
+* mpz_ior:                               Integer Logic and Bit Fiddling.
+                                                              (line  14)
+* mpz_jacobi:                            Number Theoretic Functions.
+                                                              (line  79)
+* mpz_kronecker:                         Number Theoretic Functions.
+                                                              (line  87)
+* mpz_kronecker_si:                      Number Theoretic Functions.
+                                                              (line  88)
+* mpz_kronecker_ui:                      Number Theoretic Functions.
+                                                              (line  89)
+* mpz_lcm:                               Number Theoretic Functions.
+                                                              (line  66)
+* mpz_lcm_ui:                            Number Theoretic Functions.
+                                                              (line  67)
+* mpz_legendre:                          Number Theoretic Functions.
+                                                              (line  82)
+* mpz_lucnum2_ui:                        Number Theoretic Functions.
+                                                              (line 132)
+* mpz_lucnum_ui:                         Number Theoretic Functions.
+                                                              (line 130)
+* mpz_mod:                               Integer Division.    (line  91)
+* mpz_mod_ui:                            Integer Division.    (line  93)
+* mpz_mul:                               Integer Arithmetic.  (line  19)
+* mpz_mul_2exp:                          Integer Arithmetic.  (line  35)
+* mpz_mul_si:                            Integer Arithmetic.  (line  20)
+* mpz_mul_ui:                            Integer Arithmetic.  (line  22)
+* mpz_neg:                               Integer Arithmetic.  (line  39)
+* mpz_nextprime:                         Number Theoretic Functions.
+                                                              (line  23)
+* mpz_odd_p:                             Miscellaneous Integer Functions.
+                                                              (line  17)
+* mpz_out_raw:                           I/O of Integers.     (line  45)
+* mpz_out_str:                           I/O of Integers.     (line  18)
+* mpz_perfect_power_p:                   Integer Roots.       (line  27)
+* mpz_perfect_square_p:                  Integer Roots.       (line  36)
+* mpz_popcount:                          Integer Logic and Bit Fiddling.
+                                                              (line  23)
+* mpz_pow_ui:                            Integer Exponentiation.
+                                                              (line  31)
+* mpz_powm:                              Integer Exponentiation.
+                                                              (line   8)
+* mpz_powm_sec:                          Integer Exponentiation.
+                                                              (line  18)
+* mpz_powm_ui:                           Integer Exponentiation.
+                                                              (line  10)
+* mpz_probab_prime_p:                    Number Theoretic Functions.
+                                                              (line   7)
+* mpz_random:                            Integer Random Numbers.
+                                                              (line  42)
+* mpz_random2:                           Integer Random Numbers.
+                                                              (line  51)
+* mpz_realloc2:                          Initializing Integers.
+                                                              (line  52)
+* mpz_remove:                            Number Theoretic Functions.
+                                                              (line 103)
+* mpz_root:                              Integer Roots.       (line   7)
+* mpz_rootrem:                           Integer Roots.       (line  13)
+* mpz_rrandomb:                          Integer Random Numbers.
+                                                              (line  31)
+* mpz_scan0:                             Integer Logic and Bit Fiddling.
+                                                              (line  37)
+* mpz_scan1:                             Integer Logic and Bit Fiddling.
+                                                              (line  38)
+* mpz_set:                               Assigning Integers.  (line  10)
+* mpz_set_d:                             Assigning Integers.  (line  13)
+* mpz_set_f:                             Assigning Integers.  (line  15)
+* mpz_set_q:                             Assigning Integers.  (line  14)
+* mpz_set_si:                            Assigning Integers.  (line  12)
+* mpz_set_str:                           Assigning Integers.  (line  21)
+* mpz_set_ui:                            Assigning Integers.  (line  11)
+* mpz_setbit:                            Integer Logic and Bit Fiddling.
+                                                              (line  51)
+* mpz_sgn:                               Integer Comparisons. (line  28)
+* mpz_si_kronecker:                      Number Theoretic Functions.
+                                                              (line  90)
+* mpz_size:                              Integer Special Functions.
+                                                              (line  68)
+* mpz_sizeinbase:                        Miscellaneous Integer Functions.
+                                                              (line  23)
+* mpz_sqrt:                              Integer Roots.       (line  17)
+* mpz_sqrtrem:                           Integer Roots.       (line  20)
+* mpz_sub:                               Integer Arithmetic.  (line  12)
+* mpz_sub_ui:                            Integer Arithmetic.  (line  14)
+* mpz_submul:                            Integer Arithmetic.  (line  30)
+* mpz_submul_ui:                         Integer Arithmetic.  (line  32)
+* mpz_swap:                              Assigning Integers.  (line  37)
+* mpz_t:                                 Nomenclature and Types.
+                                                              (line   6)
+* mpz_tdiv_q:                            Integer Division.    (line  41)
+* mpz_tdiv_q_2exp:                       Integer Division.    (line  52)
+* mpz_tdiv_q_ui:                         Integer Division.    (line  45)
+* mpz_tdiv_qr:                           Integer Division.    (line  43)
+* mpz_tdiv_qr_ui:                        Integer Division.    (line  49)
+* mpz_tdiv_r:                            Integer Division.    (line  42)
+* mpz_tdiv_r_2exp:                       Integer Division.    (line  53)
+* mpz_tdiv_r_ui:                         Integer Division.    (line  47)
+* mpz_tdiv_ui:                           Integer Division.    (line  51)
+* mpz_tstbit:                            Integer Logic and Bit Fiddling.
+                                                              (line  60)
+* mpz_ui_kronecker:                      Number Theoretic Functions.
+                                                              (line  91)
+* mpz_ui_pow_ui:                         Integer Exponentiation.
+                                                              (line  33)
+* mpz_ui_sub:                            Integer Arithmetic.  (line  16)
+* mpz_urandomb:                          Integer Random Numbers.
+                                                              (line  14)
+* mpz_urandomm:                          Integer Random Numbers.
+                                                              (line  23)
+* mpz_xor:                               Integer Logic and Bit Fiddling.
+                                                              (line  17)
+* msqrt:                                 BSD Compatible Functions.
+                                                              (line  63)
+* msub:                                  BSD Compatible Functions.
+                                                              (line  46)
+* mtox:                                  BSD Compatible Functions.
+                                                              (line  98)
+* mult:                                  BSD Compatible Functions.
+                                                              (line  49)
+* operator%:                             C++ Interface Integers.
+                                                              (line  30)
+* operator/:                             C++ Interface Integers.
+                                                              (line  29)
+* operator<<:                            C++ Formatted Output.
+                                                              (line  11)
+* operator>> <1>:                        C++ Formatted Input. (line  11)
+* operator>> <2>:                        C++ Interface Rationals.
+                                                              (line  77)
+* operator>>:                            C++ Formatted Input. (line  14)
+* pow:                                   BSD Compatible Functions.
+                                                              (line  71)
+* rpow:                                  BSD Compatible Functions.
+                                                              (line  79)
+* sdiv:                                  BSD Compatible Functions.
+                                                              (line  55)
+* sgn <1>:                               C++ Interface Rationals.
+                                                              (line  50)
+* sgn <2>:                               C++ Interface Integers.
+                                                              (line  57)
+* sgn:                                   C++ Interface Floats.
+                                                              (line  98)
+* sqrt <1>:                              C++ Interface Floats.
+                                                              (line  99)
+* sqrt:                                  C++ Interface Integers.
+                                                              (line  58)
+* trunc:                                 C++ Interface Floats.
+                                                              (line 100)
+* xtom:                                  BSD Compatible Functions.
+                                                              (line  34)
+
+
+
+\1f
+Local Variables:
+coding: iso-8859-1
+End:
diff --git a/doc/gmp.texi b/doc/gmp.texi

new file mode 100644 (file)

index 0000000..933df39
--- /dev/null
+++ b/doc/gmp.texi
@@ -0,0 +1,10668 @@
+\input texinfo    @c -*-texinfo-*-
+@c %**start of header
+@setfilename gmp.info
+@documentencoding ISO-8859-1
+@include version.texi
+@settitle GNU MP @value{VERSION}
+@synindex tp fn
+@iftex
+@afourpaper
+@end iftex
+@comment %**end of header
+
+@copying
+This manual describes how to install and use the GNU multiple precision
+arithmetic library, version @value{VERSION}.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
+Foundation, Inc.
+
+Permission is granted to copy, distribute and/or modify this document under
+the terms of the GNU Free Documentation License, Version 1.3 or any later
+version published by the Free Software Foundation; with no Invariant Sections,
+with the Front-Cover Texts being ``A GNU Manual'', and with the Back-Cover
+Texts being ``You have freedom to copy and modify this GNU Manual, like GNU
+software''.  A copy of the license is included in
+@ref{GNU Free Documentation License}.
+@end copying
+@c  Note the @ref above must be on one line, a line break in an @ref within
+@c  @copying will bomb in recent texinfo.tex (eg. 2004-04-07.08 which comes
+@c  with texinfo 4.7), with messages about missing @endcsname.
+
+
+@c  Texinfo version 4.2 or up will be needed to process this file.
+@c
+@c  The version number and edition number are taken from version.texi provided
+@c  by automake (note that it's regenerated only if you configure with
+@c  --enable-maintainer-mode).
+@c
+@c  Notes discussing the present version number of GMP in relation to previous
+@c  ones (for instance in the "Compatibility" section) must be updated at
+@c  manually though.
+@c
+@c  @cindex entries have been made for function categories and programming
+@c  topics.  The "mpn" section is not included in this, because a beginner
+@c  looking for "GCD" or something is only going to be confused by pointers to
+@c  low level routines.
+@c
+@c  @cindex entries are present for processors and systems when there's
+@c  particular notes concerning them, but not just for everything GMP
+@c  supports.
+@c
+@c  Index entries for files use @code rather than @file, @samp or @option,
+@c  since the latter come out with quotes in TeX, which are nice in the text
+@c  but don't look so good in index columns.
+@c
+@c  Tex:
+@c
+@c  A suitable texinfo.tex is supplied, a newer one should work equally well.
+@c
+@c  HTML:
+@c
+@c  Nothing special is done for links to external manuals, they just come out
+@c  in the usual makeinfo style, eg. "../libc/Locales.html".  If you have
+@c  local copies of such manuals then this is a good thing, if not then you
+@c  may want to search-and-replace to some online source.
+@c
+
+@dircategory GNU libraries
+@direntry
+* gmp: (gmp).                   GNU Multiple Precision Arithmetic Library.
+@end direntry
+
+@c  html <meta name="description" content="...">
+@documentdescription
+How to install and use the GNU multiple precision arithmetic library, version @value{VERSION}.
+@end documentdescription
+
+@c smallbook
+@finalout
+@setchapternewpage on
+
+@ifnottex
+@node Top, Copying, (dir), (dir)
+@top GNU MP
+@end ifnottex
+
+@iftex
+@titlepage
+@title GNU MP
+@subtitle The GNU Multiple Precision Arithmetic Library
+@subtitle Edition @value{EDITION}
+@subtitle @value{UPDATED}
+
+@author by Torbj@"orn Granlund and the GMP development team
+@c @email{tg@@gmplib.org}
+
+@c Include the Distribution inside the titlepage so
+@c that headings are turned off.
+
+@tex
+\global\parindent=0pt
+\global\parskip=8pt
+\global\baselineskip=13pt
+@end tex
+
+@page
+@vskip 0pt plus 1filll
+@end iftex
+
+@insertcopying
+@ifnottex
+@sp 1
+@end ifnottex
+
+@iftex
+@end titlepage
+@headings double
+@end iftex
+
+@c  Don't bother with contents for html, the menus seem adequate.
+@ifnothtml
+@contents
+@end ifnothtml
+
+@menu
+* Copying::                    GMP Copying Conditions (LGPL).
+* Introduction to GMP::        Brief introduction to GNU MP.
+* Installing GMP::             How to configure and compile the GMP library.
+* GMP Basics::                 What every GMP user should know.
+* Reporting Bugs::             How to usefully report bugs.
+* Integer Functions::          Functions for arithmetic on signed integers.
+* Rational Number Functions::  Functions for arithmetic on rational numbers.
+* Floating-point Functions::   Functions for arithmetic on floats.
+* Low-level Functions::        Fast functions for natural numbers.
+* Random Number Functions::    Functions for generating random numbers.
+* Formatted Output::           @code{printf} style output.
+* Formatted Input::            @code{scanf} style input.
+* C++ Class Interface::        Class wrappers around GMP types.
+* BSD Compatible Functions::   All functions found in BSD MP.
+* Custom Allocation::          How to customize the internal allocation.
+* Language Bindings::          Using GMP from other languages.
+* Algorithms::                 What happens behind the scenes.
+* Internals::                  How values are represented behind the scenes.
+
+* Contributors::               Who brings you this library?
+* References::                 Some useful papers and books to read.
+* GNU Free Documentation License::
+* Concept Index::
+* Function Index::
+@end menu
+
+
+@c  @m{T,N} is $T$ in tex or @math{N} otherwise.  This is an easy way to give
+@c  different forms for math in tex and info.  Commas in N or T don't work,
+@c  but @C{} can be used instead.  \, works in info but not in tex.
+@iftex
+@macro m {T,N}
+@tex$\T\$@end tex
+@end macro
+@end iftex
+@ifnottex
+@macro m {T,N}
+@math{\N\}
+@end macro
+@end ifnottex
+
+@macro C {}
+,
+@end macro
+
+@c  @ms{V,N} is $V_N$ in tex or just vn otherwise.  This suits simple
+@c  subscripts like @ms{x,0}.
+@iftex
+@macro ms {V,N}
+@tex$\V\_{\N\}$@end tex
+@end macro
+@end iftex
+@ifnottex
+@macro ms {V,N}
+\V\\N\
+@end macro
+@end ifnottex
+
+@c  @nicode{S} is plain S in info, or @code{S} elsewhere.  This can be used
+@c  when the quotes that @code{} gives in info aren't wanted, but the
+@c  fontification in tex or html is wanted.  Doesn't work as @nicode{'\\0'}
+@c  though (gives two backslashes in tex).
+@ifinfo
+@macro nicode {S}
+\S\
+@end macro
+@end ifinfo
+@ifnotinfo
+@macro nicode {S}
+@code{\S\}
+@end macro
+@end ifnotinfo
+
+@c  @nisamp{S} is plain S in info, or @samp{S} elsewhere.  This can be used
+@c  when the quotes that @samp{} gives in info aren't wanted, but the
+@c  fontification in tex or html is wanted.
+@ifinfo
+@macro nisamp {S}
+\S\
+@end macro
+@end ifinfo
+@ifnotinfo
+@macro nisamp {S}
+@samp{\S\}
+@end macro
+@end ifnotinfo
+
+@c  Usage: @GMPtimes{}
+@c  Give either \times or the word "times".
+@tex
+\gdef\GMPtimes{\times}
+@end tex
+@ifnottex
+@macro GMPtimes
+times
+@end macro
+@end ifnottex
+
+@c  Usage: @GMPmultiply{}
+@c  Give * in info, or nothing in tex.
+@tex
+\gdef\GMPmultiply{}
+@end tex
+@ifnottex
+@macro GMPmultiply
+*
+@end macro
+@end ifnottex
+
+@c  Usage: @GMPabs{x}
+@c  Give either |x| in tex, or abs(x) in info or html.
+@tex
+\gdef\GMPabs#1{|#1|}
+@end tex
+@ifnottex
+@macro GMPabs {X}
+@abs{}(\X\)
+@end macro
+@end ifnottex
+
+@c  Usage: @GMPfloor{x}
+@c  Give either \lfloor x\rfloor in tex, or floor(x) in info or html.
+@tex
+\gdef\GMPfloor#1{\lfloor #1\rfloor}
+@end tex
+@ifnottex
+@macro GMPfloor {X}
+floor(\X\)
+@end macro
+@end ifnottex
+
+@c  Usage: @GMPceil{x}
+@c  Give either \lceil x\rceil in tex, or ceil(x) in info or html.
+@tex
+\gdef\GMPceil#1{\lceil #1 \rceil}
+@end tex
+@ifnottex
+@macro GMPceil {X}
+ceil(\X\)
+@end macro
+@end ifnottex
+
+@c  Math operators already available in tex, made available in info too.
+@c  For example @bmod{} can be used in both tex and info.
+@ifnottex
+@macro bmod
+mod
+@end macro
+@macro gcd
+gcd
+@end macro
+@macro ge
+>=
+@end macro
+@macro le
+<=
+@end macro
+@macro log
+log
+@end macro
+@macro min
+min
+@end macro
+@macro leftarrow
+<-
+@end macro
+@macro rightarrow
+->
+@end macro
+@end ifnottex
+
+@c  New math operators.
+@c  @abs{} can be used in both tex and info, or just \abs in tex.
+@tex
+\gdef\abs{\mathop{\rm abs}}
+@end tex
+@ifnottex
+@macro abs
+abs
+@end macro
+@end ifnottex
+
+@c  @cross{} is a \times symbol in tex, or an "x" in info.  In tex it works
+@c  inside or outside $ $.
+@tex
+\gdef\cross{\ifmmode\times\else$\times$\fi}
+@end tex
+@ifnottex
+@macro cross
+x
+@end macro
+@end ifnottex
+
+@c  @times{} made available as a "*" in info and html (already works in tex).
+@ifnottex
+@macro times
+*
+@end macro
+@end ifnottex
+
+@c  Usage: @W{text}
+@c  Like @w{} but working in math mode too.
+@tex
+\gdef\W#1{\ifmmode{#1}\else\w{#1}\fi}
+@end tex
+@ifnottex
+@macro W {S}
+@w{\S\}
+@end macro
+@end ifnottex
+
+@c  Usage: \GMPdisplay{text}
+@c  Put the given text in an @display style indent, but without turning off
+@c  paragraph reflow etc.
+@tex
+\gdef\GMPdisplay#1{%
+\noindent
+\advance\leftskip by \lispnarrowing
+#1\par}
+@end tex
+
+@c  Usage: \GMPhat
+@c  A new \hat that will work in math mode, unlike the texinfo redefined
+@c  version.
+@tex
+\gdef\GMPhat{\mathaccent"705E}
+@end tex
+
+@c  Usage: \GMPraise{text}
+@c  For use in a $ $ math expression as an alternative to "^".  This is good
+@c  for @code{} in an exponent, since there seems to be no superscript font
+@c  for that.
+@tex
+\gdef\GMPraise#1{\mskip0.5\thinmuskip\hbox{\raise0.8ex\hbox{#1}}}
+@end tex
+
+@c  Usage: @texlinebreak{}
+@c  A line break as per @*, but only in tex.
+@iftex
+@macro texlinebreak
+@*
+@end macro
+@end iftex
+@ifnottex
+@macro texlinebreak
+@end macro
+@end ifnottex
+
+@c  Usage: @maybepagebreak
+@c  Allow tex to insert a page break, if it feels the urge.
+@c  Normally blocks of @deftypefun/funx are kept together, which can lead to
+@c  some poor page break positioning if it's a big block, like the sets of
+@c  division functions etc.
+@tex
+\gdef\maybepagebreak{\penalty0}
+@end tex
+@ifnottex
+@macro maybepagebreak
+@end macro
+@end ifnottex
+
+@c  Usage: @GMPreftop{info,title}
+@c  Usage: @GMPpxreftop{info,title}
+@c
+@c  Like @ref{} and @pxref{}, but designed for a reference to the top of a
+@c  document, not a particular section.  The TeX output for plain @ref insists
+@c  on printing a particular section, GMPreftop gives just the title.
+@c
+@c  The texinfo manual recommends putting a likely section name in references
+@c  like this, eg. "Introduction", but it seems better to just give the title.
+@c
+@iftex
+@macro GMPreftop{info,title}
+@i{\title\}
+@end macro
+@macro GMPpxreftop{info,title}
+see @i{\title\}
+@end macro
+@end iftex
+@c
+@ifnottex
+@macro GMPreftop{info,title}
+@ref{Top,\title\,\title\,\info\,\title\}
+@end macro
+@macro GMPpxreftop{info,title}
+@pxref{Top,\title\,\title\,\info\,\title\}
+@end macro
+@end ifnottex
+
+
+@node Copying, Introduction to GMP, Top, Top
+@comment  node-name, next, previous,  up
+@unnumbered GNU MP Copying Conditions
+@cindex Copying conditions
+@cindex Conditions for copying GNU MP
+@cindex License conditions
+
+This library is @dfn{free}; this means that everyone is free to use it and
+free to redistribute it on a free basis.  The library is not in the public
+domain; it is copyrighted and there are restrictions on its distribution, but
+these restrictions are designed to permit everything that a good cooperating
+citizen would want to do.  What is not allowed is to try to prevent others
+from further sharing any version of this library that they might get from
+you.@refill
+
+Specifically, we want to make sure that you have the right to give away copies
+of the library, that you receive source code or else can get it if you want
+it, that you can change this library or use pieces of it in new free programs,
+and that you know you can do these things.@refill
+
+To make sure that everyone has such rights, we have to forbid you to deprive
+anyone else of these rights.  For example, if you distribute copies of the GNU
+MP library, you must give the recipients all the rights that you have.  You
+must make sure that they, too, receive or can get the source code.  And you
+must tell them their rights.@refill
+
+Also, for our own protection, we must make certain that everyone finds out
+that there is no warranty for the GNU MP library.  If it is modified by
+someone else and passed on, we want their recipients to know that what they
+have is not what we distributed, so that any problems introduced by others
+will not reflect on our reputation.@refill
+
+The precise conditions of the license for the GNU MP library are found in the
+Lesser General Public License version 3 that accompanies the source code,
+see @file{COPYING.LIB}.  Certain demonstration programs are provided under the
+terms of the plain General Public License version 3, see @file{COPYING}.
+
+
+@node Introduction to GMP, Installing GMP, Copying, Top
+@comment  node-name,  next,  previous,  up
+@chapter Introduction to GNU MP
+@cindex Introduction
+
+GNU MP is a portable library written in C for arbitrary precision arithmetic
+on integers, rational numbers, and floating-point numbers.  It aims to provide
+the fastest possible arithmetic for all applications that need higher
+precision than is directly supported by the basic C types.
+
+Many applications use just a few hundred bits of precision; but some
+applications may need thousands or even millions of bits.  GMP is designed to
+give good performance for both, by choosing algorithms based on the sizes of
+the operands, and by carefully keeping the overhead at a minimum.
+
+The speed of GMP is achieved by using fullwords as the basic arithmetic type,
+by using sophisticated algorithms, by including carefully optimized assembly
+code for the most common inner loops for many different CPUs, and by a general
+emphasis on speed (as opposed to simplicity or elegance).
+
+There is assembly code for these CPUs:
+@cindex CPU types
+ARM,
+DEC Alpha 21064, 21164, and 21264,
+AMD 29000,
+AMD K6, K6-2, Athlon, and Athlon64,
+Hitachi SuperH and SH-2,
+HPPA 1.0, 1.1 and 2.0,
+Intel Pentium, Pentium Pro/II/III, Pentium 4, generic x86,
+Intel IA-64, i960,
+Motorola MC68000, MC68020, MC88100, and MC88110,
+Motorola/IBM PowerPC 32 and 64,
+National NS32000,
+IBM POWER,
+MIPS R3000, R4000,
+SPARCv7, SuperSPARC, generic SPARCv8, UltraSPARC,
+DEC VAX,
+and
+Zilog Z8000.
+Some optimizations also for
+Cray vector systems,
+Clipper,
+IBM ROMP (RT),
+and
+Pyramid AP/XP.
+
+@cindex Home page
+@cindex Web page
+@noindent
+For up-to-date information on GMP, please see the GMP web pages at
+
+@display
+@uref{http://gmplib.org/}
+@end display
+
+@cindex Latest version of GMP
+@cindex Anonymous FTP of latest version
+@cindex FTP of latest version
+@noindent
+The latest version of the library is available at
+
+@display
+@uref{ftp://ftp.gnu.org/gnu/gmp/}
+@end display
+
+Many sites around the world mirror @samp{ftp.gnu.org}, please use a mirror
+near you, see @uref{http://www.gnu.org/order/ftp.html} for a full list.
+
+@cindex Mailing lists
+There are three public mailing lists of interest.  One for release
+announcements, one for general questions and discussions about usage of the GMP
+library and one for bug reports.  For more information, see
+
+@display
+@uref{http://gmplib.org/mailman/listinfo/}.
+@end display
+
+The proper place for bug reports is @email{gmp-bugs@@gmplib.org}.  See
+@ref{Reporting Bugs} for information about reporting bugs.
+
+@sp 1
+@section How to use this Manual
+@cindex About this manual
+
+Everyone should read @ref{GMP Basics}.  If you need to install the library
+yourself, then read @ref{Installing GMP}.  If you have a system with multiple
+ABIs, then read @ref{ABI and ISA}, for the compiler options that must be used
+on applications.
+
+The rest of the manual can be used for later reference, although it is
+probably a good idea to glance through it.
+
+
+@node Installing GMP, GMP Basics, Introduction to GMP, Top
+@comment  node-name,  next,  previous,  up
+@chapter Installing GMP
+@cindex Installing GMP
+@cindex Configuring GMP
+@cindex Building GMP
+
+GMP has an autoconf/automake/libtool based configuration system.  On a
+Unix-like system a basic build can be done with
+
+@example
+./configure
+make
+@end example
+
+@noindent
+Some self-tests can be run with
+
+@example
+make check
+@end example
+
+@noindent
+And you can install (under @file{/usr/local} by default) with
+
+@example
+make install
+@end example
+
+If you experience problems, please report them to @email{gmp-bugs@@gmplib.org}.
+See @ref{Reporting Bugs}, for information on what to include in useful bug
+reports.
+
+@menu
+* Build Options::
+* ABI and ISA::
+* Notes for Package Builds::
+* Notes for Particular Systems::
+* Known Build Problems::
+* Performance optimization::
+@end menu
+
+
+@node Build Options, ABI and ISA, Installing GMP, Installing GMP
+@section Build Options
+@cindex Build options
+
+All the usual autoconf configure options are available, run @samp{./configure
+--help} for a summary.  The file @file{INSTALL.autoconf} has some generic
+installation information too.
+
+@table @asis
+@item Tools
+@cindex Non-Unix systems
+@samp{configure} requires various Unix-like tools.  See @ref{Notes for
+Particular Systems}, for some options on non-Unix systems.
+
+It might be possible to build without the help of @samp{configure}, certainly
+all the code is there, but unfortunately you'll be on your own.
+
+@item Build Directory
+@cindex Build directory
+To compile in a separate build directory, @command{cd} to that directory, and
+prefix the configure command with the path to the GMP source directory.  For
+example
+
+@example
+cd /my/build/dir
+/my/sources/gmp-@value{VERSION}/configure
+@end example
+
+Not all @samp{make} programs have the necessary features (@code{VPATH}) to
+support this.  In particular, SunOS and Slowaris @command{make} have bugs that
+make them unable to build in a separate directory.  Use GNU @command{make}
+instead.
+
+@item @option{--prefix} and @option{--exec-prefix}
+@cindex Prefix
+@cindex Exec prefix
+@cindex Install prefix
+@cindex @code{--prefix}
+@cindex @code{--exec-prefix}
+The @option{--prefix} option can be used in the normal way to direct GMP to
+install under a particular tree.  The default is @samp{/usr/local}.
+
+@option{--exec-prefix} can be used to direct architecture-dependent files like
+@file{libgmp.a} to a different location.  This can be used to share
+architecture-independent parts like the documentation, but separate the
+dependent parts.  Note however that @file{gmp.h} and @file{mp.h} are
+architecture-dependent since they encode certain aspects of @file{libgmp}, so
+it will be necessary to ensure both @file{$prefix/include} and
+@file{$exec_prefix/include} are available to the compiler.
+
+@item @option{--disable-shared}, @option{--disable-static}
+@cindex @code{--disable-shared}
+@cindex @code{--disable-static}
+By default both shared and static libraries are built (where possible), but
+one or other can be disabled.  Shared libraries result in smaller executables
+and permit code sharing between separate running processes, but on some CPUs
+are slightly slower, having a small cost on each function call.
+
+@item Native Compilation, @option{--build=CPU-VENDOR-OS}
+@cindex Native compilation
+@cindex Build system
+@cindex @code{--build}
+For normal native compilation, the system can be specified with
+@samp{--build}.  By default @samp{./configure} uses the output from running
+@samp{./config.guess}.  On some systems @samp{./config.guess} can determine
+the exact CPU type, on others it will be necessary to give it explicitly.  For
+example,
+
+@example
+./configure --build=ultrasparc-sun-solaris2.7
+@end example
+
+In all cases the @samp{OS} part is important, since it controls how libtool
+generates shared libraries.  Running @samp{./config.guess} is the simplest way
+to see what it should be, if you don't know already.
+
+@item Cross Compilation, @option{--host=CPU-VENDOR-OS}
+@cindex Cross compiling
+@cindex Host system
+@cindex @code{--host}
+When cross-compiling, the system used for compiling is given by @samp{--build}
+and the system where the library will run is given by @samp{--host}.  For
+example when using a FreeBSD Athlon system to build GNU/Linux m68k binaries,
+
+@example
+./configure --build=athlon-pc-freebsd3.5 --host=m68k-mac-linux-gnu
+@end example
+
+Compiler tools are sought first with the host system type as a prefix.  For
+example @command{m68k-mac-linux-gnu-ranlib} is tried, then plain
+@command{ranlib}.  This makes it possible for a set of cross-compiling tools
+to co-exist with native tools.  The prefix is the argument to @samp{--host},
+and this can be an alias, such as @samp{m68k-linux}.  But note that tools
+don't have to be setup this way, it's enough to just have a @env{PATH} with a
+suitable cross-compiling @command{cc} etc.
+
+Compiling for a different CPU in the same family as the build system is a form
+of cross-compilation, though very possibly this would merely be special
+options on a native compiler.  In any case @samp{./configure} avoids depending
+on being able to run code on the build system, which is important when
+creating binaries for a newer CPU since they very possibly won't run on the
+build system.
+
+In all cases the compiler must be able to produce an executable (of whatever
+format) from a standard C @code{main}.  Although only object files will go to
+make up @file{libgmp}, @samp{./configure} uses linking tests for various
+purposes, such as determining what functions are available on the host system.
+
+Currently a warning is given unless an explicit @samp{--build} is used when
+cross-compiling, because it may not be possible to correctly guess the build
+system type if the @env{PATH} has only a cross-compiling @command{cc}.
+
+Note that the @samp{--target} option is not appropriate for GMP@.  It's for use
+when building compiler tools, with @samp{--host} being where they will run,
+and @samp{--target} what they'll produce code for.  Ordinary programs or
+libraries like GMP are only interested in the @samp{--host} part, being where
+they'll run.  (Some past versions of GMP used @samp{--target} incorrectly.)
+
+@item CPU types
+@cindex CPU types
+In general, if you want a library that runs as fast as possible, you should
+configure GMP for the exact CPU type your system uses.  However, this may mean
+the binaries won't run on older members of the family, and might run slower on
+other members, older or newer.  The best idea is always to build GMP for the
+exact machine type you intend to run it on.
+
+The following CPUs have specific support.  See @file{configure.in} for details
+of what code and compiler options they select.
+
+@itemize @bullet
+
+@c Keep this formatting, it's easy to read and it can be grepped to
+@c automatically test that CPUs listed get through ./config.sub
+
+@item
+Alpha:
+@nisamp{alpha},
+@nisamp{alphaev5},
+@nisamp{alphaev56},
+@nisamp{alphapca56},
+@nisamp{alphapca57},
+@nisamp{alphaev6},
+@nisamp{alphaev67},
+@nisamp{alphaev68}
+@nisamp{alphaev7}
+
+@item
+Cray:
+@nisamp{c90},
+@nisamp{j90},
+@nisamp{t90},
+@nisamp{sv1}
+
+@item
+HPPA:
+@nisamp{hppa1.0},
+@nisamp{hppa1.1},
+@nisamp{hppa2.0},
+@nisamp{hppa2.0n},
+@nisamp{hppa2.0w},
+@nisamp{hppa64}
+
+@item
+IA-64:
+@nisamp{ia64},
+@nisamp{itanium},
+@nisamp{itanium2}
+
+@item
+MIPS:
+@nisamp{mips},
+@nisamp{mips3},
+@nisamp{mips64}
+
+@item
+Motorola:
+@nisamp{m68k},
+@nisamp{m68000},
+@nisamp{m68010},
+@nisamp{m68020},
+@nisamp{m68030},
+@nisamp{m68040},
+@nisamp{m68060},
+@nisamp{m68302},
+@nisamp{m68360},
+@nisamp{m88k},
+@nisamp{m88110}
+
+@item
+POWER:
+@nisamp{power},
+@nisamp{power1},
+@nisamp{power2},
+@nisamp{power2sc}
+
+@item
+PowerPC:
+@nisamp{powerpc},
+@nisamp{powerpc64},
+@nisamp{powerpc401},
+@nisamp{powerpc403},
+@nisamp{powerpc405},
+@nisamp{powerpc505},
+@nisamp{powerpc601},
+@nisamp{powerpc602},
+@nisamp{powerpc603},
+@nisamp{powerpc603e},
+@nisamp{powerpc604},
+@nisamp{powerpc604e},
+@nisamp{powerpc620},
+@nisamp{powerpc630},
+@nisamp{powerpc740},
+@nisamp{powerpc7400},
+@nisamp{powerpc7450},
+@nisamp{powerpc750},
+@nisamp{powerpc801},
+@nisamp{powerpc821},
+@nisamp{powerpc823},
+@nisamp{powerpc860},
+@nisamp{powerpc970}
+
+@item
+SPARC:
+@nisamp{sparc},
+@nisamp{sparcv8},
+@nisamp{microsparc},
+@nisamp{supersparc},
+@nisamp{sparcv9},
+@nisamp{ultrasparc},
+@nisamp{ultrasparc2},
+@nisamp{ultrasparc2i},
+@nisamp{ultrasparc3},
+@nisamp{sparc64}
+
+@item
+x86 family:
+@nisamp{i386},
+@nisamp{i486},
+@nisamp{i586},
+@nisamp{pentium},
+@nisamp{pentiummmx},
+@nisamp{pentiumpro},
+@nisamp{pentium2},
+@nisamp{pentium3},
+@nisamp{pentium4},
+@nisamp{k6},
+@nisamp{k62},
+@nisamp{k63},
+@nisamp{athlon},
+@nisamp{amd64},
+@nisamp{viac3},
+@nisamp{viac32}
+
+@item
+Other:
+@nisamp{a29k},
+@nisamp{arm},
+@nisamp{clipper},
+@nisamp{i960},
+@nisamp{ns32k},
+@nisamp{pyramid},
+@nisamp{sh},
+@nisamp{sh2},
+@nisamp{vax},
+@nisamp{z8k}
+@end itemize
+
+CPUs not listed will use generic C code.
+
+@item Generic C Build
+@cindex Generic C
+If some of the assembly code causes problems, or if otherwise desired, the
+generic C code can be selected with CPU @samp{none}.  For example,
+
+@example
+./configure --host=none-unknown-freebsd3.5
+@end example
+
+Note that this will run quite slowly, but it should be portable and should at
+least make it possible to get something running if all else fails.
+
+@item Fat binary, @option{--enable-fat}
+@cindex Fat binary
+@cindex @option{--enable-fat}
+Using @option{--enable-fat} selects a ``fat binary'' build on x86, where
+optimized low level subroutines are chosen at runtime according to the CPU
+detected.  This means more code, but gives good performance on all x86 chips.
+(This option might become available for more architectures in the future.)
+
+@item @option{ABI}
+@cindex ABI
+On some systems GMP supports multiple ABIs (application binary interfaces),
+meaning data type sizes and calling conventions.  By default GMP chooses the
+best ABI available, but a particular ABI can be selected.  For example
+
+@example
+./configure --host=mips64-sgi-irix6 ABI=n32
+@end example
+
+See @ref{ABI and ISA}, for the available choices on relevant CPUs, and what
+applications need to do.
+
+@item @option{CC}, @option{CFLAGS}
+@cindex C compiler
+@cindex @code{CC}
+@cindex @code{CFLAGS}
+By default the C compiler used is chosen from among some likely candidates,
+with @command{gcc} normally preferred if it's present.  The usual
+@samp{CC=whatever} can be passed to @samp{./configure} to choose something
+different.
+
+For various systems, default compiler flags are set based on the CPU and
+compiler.  The usual @samp{CFLAGS="-whatever"} can be passed to
+@samp{./configure} to use something different or to set good flags for systems
+GMP doesn't otherwise know.
+
+The @samp{CC} and @samp{CFLAGS} used are printed during @samp{./configure},
+and can be found in each generated @file{Makefile}.  This is the easiest way
+to check the defaults when considering changing or adding something.
+
+Note that when @samp{CC} and @samp{CFLAGS} are specified on a system
+supporting multiple ABIs it's important to give an explicit
+@samp{ABI=whatever}, since GMP can't determine the ABI just from the flags and
+won't be able to select the correct assembly code.
+
+If just @samp{CC} is selected then normal default @samp{CFLAGS} for that
+compiler will be used (if GMP recognises it).  For example @samp{CC=gcc} can
+be used to force the use of GCC, with default flags (and default ABI).
+
+@item @option{CPPFLAGS}
+@cindex @code{CPPFLAGS}
+Any flags like @samp{-D} defines or @samp{-I} includes required by the
+preprocessor should be set in @samp{CPPFLAGS} rather than @samp{CFLAGS}.
+Compiling is done with both @samp{CPPFLAGS} and @samp{CFLAGS}, but
+preprocessing uses just @samp{CPPFLAGS}.  This distinction is because most
+preprocessors won't accept all the flags the compiler does.  Preprocessing is
+done separately in some configure tests, and in the @samp{ansi2knr} support
+for K&R compilers.
+
+@item @option{CC_FOR_BUILD}
+@cindex @code{CC_FOR_BUILD}
+Some build-time programs are compiled and run to generate host-specific data
+tables.  @samp{CC_FOR_BUILD} is the compiler used for this.  It doesn't need
+to be in any particular ABI or mode, it merely needs to generate executables
+that can run.  The default is to try the selected @samp{CC} and some likely
+candidates such as @samp{cc} and @samp{gcc}, looking for something that works.
+
+No flags are used with @samp{CC_FOR_BUILD} because a simple invocation like
+@samp{cc foo.c} should be enough.  If some particular options are required
+they can be included as for instance @samp{CC_FOR_BUILD="cc -whatever"}.
+
+@item C++ Support, @option{--enable-cxx}
+@cindex C++ support
+@cindex @code{--enable-cxx}
+C++ support in GMP can be enabled with @samp{--enable-cxx}, in which case a
+C++ compiler will be required.  As a convenience @samp{--enable-cxx=detect}
+can be used to enable C++ support only if a compiler can be found.  The C++
+support consists of a library @file{libgmpxx.la} and header file
+@file{gmpxx.h} (@pxref{Headers and Libraries}).
+
+A separate @file{libgmpxx.la} has been adopted rather than having C++ objects
+within @file{libgmp.la} in order to ensure dynamic linked C programs aren't
+bloated by a dependency on the C++ standard library, and to avoid any chance
+that the C++ compiler could be required when linking plain C programs.
+
+@file{libgmpxx.la} will use certain internals from @file{libgmp.la} and can
+only be expected to work with @file{libgmp.la} from the same GMP version.
+Future changes to the relevant internals will be accompanied by renaming, so a
+mismatch will cause unresolved symbols rather than perhaps mysterious
+misbehaviour.
+
+In general @file{libgmpxx.la} will be usable only with the C++ compiler that
+built it, since name mangling and runtime support are usually incompatible
+between different compilers.
+
+@item @option{CXX}, @option{CXXFLAGS}
+@cindex C++ compiler
+@cindex @code{CXX}
+@cindex @code{CXXFLAGS}
+When C++ support is enabled, the C++ compiler and its flags can be set with
+variables @samp{CXX} and @samp{CXXFLAGS} in the usual way.  The default for
+@samp{CXX} is the first compiler that works from a list of likely candidates,
+with @command{g++} normally preferred when available.  The default for
+@samp{CXXFLAGS} is to try @samp{CFLAGS}, @samp{CFLAGS} without @samp{-g}, then
+for @command{g++} either @samp{-g -O2} or @samp{-O2}, or for other compilers
+@samp{-g} or nothing.  Trying @samp{CFLAGS} this way is convenient when using
+@samp{gcc} and @samp{g++} together, since the flags for @samp{gcc} will
+usually suit @samp{g++}.
+
+It's important that the C and C++ compilers match, meaning their startup and
+runtime support routines are compatible and that they generate code in the
+same ABI (if there's a choice of ABIs on the system).  @samp{./configure}
+isn't currently able to check these things very well itself, so for that
+reason @samp{--disable-cxx} is the default, to avoid a build failure due to a
+compiler mismatch.  Perhaps this will change in the future.
+
+Incidentally, it's normally not good enough to set @samp{CXX} to the same as
+@samp{CC}.  Although @command{gcc} for instance recognises @file{foo.cc} as
+C++ code, only @command{g++} will invoke the linker the right way when
+building an executable or shared library from C++ object files.
+
+@item Temporary Memory, @option{--enable-alloca=<choice>}
+@cindex Temporary memory
+@cindex Stack overflow
+@cindex @code{alloca}
+@cindex @code{--enable-alloca}
+GMP allocates temporary workspace using one of the following three methods,
+which can be selected with for instance
+@samp{--enable-alloca=malloc-reentrant}.
+
+@itemize @bullet
+@item
+@samp{alloca} - C library or compiler builtin.
+@item
+@samp{malloc-reentrant} - the heap, in a re-entrant fashion.
+@item
+@samp{malloc-notreentrant} - the heap, with global variables.
+@end itemize
+
+For convenience, the following choices are also available.
+@samp{--disable-alloca} is the same as @samp{no}.
+
+@itemize @bullet
+@item
+@samp{yes} - a synonym for @samp{alloca}.
+@item
+@samp{no} - a synonym for @samp{malloc-reentrant}.
+@item
+@samp{reentrant} - @code{alloca} if available, otherwise
+@samp{malloc-reentrant}.  This is the default.
+@item
+@samp{notreentrant} - @code{alloca} if available, otherwise
+@samp{malloc-notreentrant}.
+@end itemize
+
+@code{alloca} is reentrant and fast, and is recommended.  It actually allocates
+just small blocks on the stack; larger ones use malloc-reentrant.
+
+@samp{malloc-reentrant} is, as the name suggests, reentrant and thread safe,
+but @samp{malloc-notreentrant} is faster and should be used if reentrancy is
+not required.
+
+The two malloc methods in fact use the memory allocation functions selected by
+@code{mp_set_memory_functions}, these being @code{malloc} and friends by
+default.  @xref{Custom Allocation}.
+
+An additional choice @samp{--enable-alloca=debug} is available, to help when
+debugging memory related problems (@pxref{Debugging}).
+
+@item FFT Multiplication, @option{--disable-fft}
+@cindex FFT multiplication
+@cindex @code{--disable-fft}
+By default multiplications are done using Karatsuba, 3-way Toom, higher degree
+Toom, and Fermat FFT@.  The FFT is only used on large to very large operands
+and can be disabled to save code size if desired.
+
+@item Berkeley MP, @option{--enable-mpbsd}
+@cindex Berkeley MP compatible functions
+@cindex BSD MP compatible functions
+@cindex @code{--enable-mpbsd}
+The Berkeley MP compatibility library (@file{libmp}) and header file
+(@file{mp.h}) are built and installed only if @option{--enable-mpbsd} is used.
+@xref{BSD Compatible Functions}.
+
+@item Assertion Checking, @option{--enable-assert}
+@cindex Assertion checking
+@cindex @code{--enable-assert}
+This option enables some consistency checking within the library.  This can be
+of use while debugging, @pxref{Debugging}.
+
+@item Execution Profiling, @option{--enable-profiling=prof/gprof/instrument}
+@cindex Execution profiling
+@cindex @code{--enable-profiling}
+Enable profiling support, in one of various styles, @pxref{Profiling}.
+
+@item @option{MPN_PATH}
+@cindex @code{MPN_PATH}
+Various assembly versions of each mpn subroutines are provided.  For a given
+CPU, a search is made though a path to choose a version of each.  For example
+@samp{sparcv8} has
+
+@example
+MPN_PATH="sparc32/v8 sparc32 generic"
+@end example
+
+which means look first for v8 code, then plain sparc32 (which is v7), and
+finally fall back on generic C@.  Knowledgeable users with special requirements
+can specify a different path.  Normally this is completely unnecessary.
+
+@item Documentation
+@cindex Documentation formats
+@cindex Texinfo
+The source for the document you're now reading is @file{doc/gmp.texi}, in
+Texinfo format, see @GMPreftop{texinfo, Texinfo}.
+
+@cindex Postscript
+@cindex DVI
+@cindex PDF
+Info format @samp{doc/gmp.info} is included in the distribution.  The usual
+automake targets are available to make PostScript, DVI, PDF and HTML (these
+will require various @TeX{} and Texinfo tools).
+
+@cindex DocBook
+@cindex XML
+DocBook and XML can be generated by the Texinfo @command{makeinfo} program
+too, see @ref{makeinfo options,, Options for @command{makeinfo}, texinfo,
+Texinfo}.
+
+Some supplementary notes can also be found in the @file{doc} subdirectory.
+
+@end table
+
+
+@need 2000
+@node ABI and ISA, Notes for Package Builds, Build Options, Installing GMP
+@section ABI and ISA
+@cindex ABI
+@cindex Application Binary Interface
+@cindex ISA
+@cindex Instruction Set Architecture
+
+ABI (Application Binary Interface) refers to the calling conventions between
+functions, meaning what registers are used and what sizes the various C data
+types are.  ISA (Instruction Set Architecture) refers to the instructions and
+registers a CPU has available.
+
+Some 64-bit ISA CPUs have both a 64-bit ABI and a 32-bit ABI defined, the
+latter for compatibility with older CPUs in the family.  GMP supports some
+CPUs like this in both ABIs.  In fact within GMP @samp{ABI} means a
+combination of chip ABI, plus how GMP chooses to use it.  For example in some
+32-bit ABIs, GMP may support a limb as either a 32-bit @code{long} or a 64-bit
+@code{long long}.
+
+By default GMP chooses the best ABI available for a given system, and this
+generally gives significantly greater speed.  But an ABI can be chosen
+explicitly to make GMP compatible with other libraries, or particular
+application requirements.  For example,
+
+@example
+./configure ABI=32
+@end example
+
+In all cases it's vital that all object code used in a given program is
+compiled for the same ABI.
+
+Usually a limb is implemented as a @code{long}.  When a @code{long long} limb
+is used this is encoded in the generated @file{gmp.h}.  This is convenient for
+applications, but it does mean that @file{gmp.h} will vary, and can't be just
+copied around.  @file{gmp.h} remains compiler independent though, since all
+compilers for a particular ABI will be expected to use the same limb type.
+
+Currently no attempt is made to follow whatever conventions a system has for
+installing library or header files built for a particular ABI@.  This will
+probably only matter when installing multiple builds of GMP, and it might be
+as simple as configuring with a special @samp{libdir}, or it might require
+more than that.  Note that builds for different ABIs need to done separately,
+with a fresh @command{./configure} and @command{make} each.
+
+@sp 1
+@table @asis
+@need 1000
+@item AMD64 (@samp{x86_64})
+@cindex AMD64
+On AMD64 systems supporting both 32-bit and 64-bit modes for applications, the
+following ABI choices are available.
+
+@table @asis
+@item @samp{ABI=64}
+The 64-bit ABI uses 64-bit limbs and pointers and makes full use of the chip
+architecture.  This is the default.  Applications will usually not need
+special compiler flags, but for reference the option is
+
+@example
+gcc  -m64
+@end example
+
+@item @samp{ABI=32}
+The 32-bit ABI is the usual i386 conventions.  This will be slower, and is not
+recommended except for inter-operating with other code not yet 64-bit capable.
+Applications must be compiled with
+
+@example
+gcc  -m32
+@end example
+
+(In GCC 2.95 and earlier there's no @samp{-m32} option, it's the only mode.)
+@end table
+
+@sp 1
+@need 1000
+@item HPPA 2.0 (@samp{hppa2.0*}, @samp{hppa64})
+@cindex HPPA
+@cindex HP-UX
+@table @asis
+@item @samp{ABI=2.0w}
+The 2.0w ABI uses 64-bit limbs and pointers and is available on HP-UX 11 or
+up.  Applications must be compiled with
+
+@example
+gcc [built for 2.0w]
+cc  +DD64
+@end example
+
+@item @samp{ABI=2.0n}
+The 2.0n ABI means the 32-bit HPPA 1.0 ABI and all its normal calling
+conventions, but with 64-bit instructions permitted within functions.  GMP
+uses a 64-bit @code{long long} for a limb.  This ABI is available on hppa64
+GNU/Linux and on HP-UX 10 or higher.  Applications must be compiled with
+
+@example
+gcc [built for 2.0n]
+cc  +DA2.0 +e
+@end example
+
+Note that current versions of GCC (eg.@: 3.2) don't generate 64-bit
+instructions for @code{long long} operations and so may be slower than for
+2.0w.  (The GMP assembly code is the same though.)
+
+@item @samp{ABI=1.0}
+HPPA 2.0 CPUs can run all HPPA 1.0 and 1.1 code in the 32-bit HPPA 1.0 ABI@.
+No special compiler options are needed for applications.
+@end table
+
+All three ABIs are available for CPU types @samp{hppa2.0w}, @samp{hppa2.0} and
+@samp{hppa64}, but for CPU type @samp{hppa2.0n} only 2.0n or 1.0 are
+considered.
+
+Note that GCC on HP-UX has no options to choose between 2.0n and 2.0w modes,
+unlike HP @command{cc}.  Instead it must be built for one or the other ABI@.
+GMP will detect how it was built, and skip to the corresponding @samp{ABI}.
+
+@sp 1
+@need 1500
+@item IA-64 under HP-UX (@samp{ia64*-*-hpux*}, @samp{itanium*-*-hpux*})
+@cindex IA-64
+@cindex HP-UX
+HP-UX supports two ABIs for IA-64.  GMP performance is the same in both.
+
+@table @asis
+@item @samp{ABI=32}
+In the 32-bit ABI, pointers, @code{int}s and @code{long}s are 32 bits and GMP
+uses a 64 bit @code{long long} for a limb.  Applications can be compiled
+without any special flags since this ABI is the default in both HP C and GCC,
+but for reference the flags are
+
+@example
+gcc  -milp32
+cc   +DD32
+@end example
+
+@item @samp{ABI=64}
+In the 64-bit ABI, @code{long}s and pointers are 64 bits and GMP uses a
+@code{long} for a limb.  Applications must be compiled with
+
+@example
+gcc  -mlp64
+cc   +DD64
+@end example
+@end table
+
+On other IA-64 systems, GNU/Linux for instance, @samp{ABI=64} is the only
+choice.
+
+@sp 1
+@need 1000
+@item MIPS under IRIX 6 (@samp{mips*-*-irix[6789]})
+@cindex MIPS
+@cindex IRIX
+IRIX 6 always has a 64-bit MIPS 3 or better CPU, and supports ABIs o32, n32,
+and 64.  n32 or 64 are recommended, and GMP performance will be the same in
+each.  The default is n32.
+
+@table @asis
+@item @samp{ABI=o32}
+The o32 ABI is 32-bit pointers and integers, and no 64-bit operations.  GMP
+will be slower than in n32 or 64, this option only exists to support old
+compilers, eg.@: GCC 2.7.2.  Applications can be compiled with no special
+flags on an old compiler, or on a newer compiler with
+
+@example
+gcc  -mabi=32
+cc   -32
+@end example
+
+@item @samp{ABI=n32}
+The n32 ABI is 32-bit pointers and integers, but with a 64-bit limb using a
+@code{long long}.  Applications must be compiled with
+
+@example
+gcc  -mabi=n32
+cc   -n32
+@end example
+
+@item @samp{ABI=64}
+The 64-bit ABI is 64-bit pointers and integers.  Applications must be compiled
+with
+
+@example
+gcc  -mabi=64
+cc   -64
+@end example
+@end table
+
+Note that MIPS GNU/Linux, as of kernel version 2.2, doesn't have the necessary
+support for n32 or 64 and so only gets a 32-bit limb and the MIPS 2 code.
+
+@sp 1
+@need 1000
+@item PowerPC 64 (@samp{powerpc64}, @samp{powerpc620}, @samp{powerpc630}, @samp{powerpc970}, @samp{power4}, @samp{power5})
+@cindex PowerPC
+@table @asis
+@item @samp{ABI=aix64}
+@cindex AIX
+The AIX 64 ABI uses 64-bit limbs and pointers and is the default on PowerPC 64
+@samp{*-*-aix*} systems.  Applications must be compiled with
+
+@example
+gcc  -maix64
+xlc  -q64
+@end example
+
+@item @samp{ABI=mode64}
+The @samp{mode64} ABI uses 64-bit limbs and pointers, and is the default on
+64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems.  Applications must be
+compiled with
+
+@example
+gcc  -m64
+@end example
+
+@item @samp{ABI=mode32}
+@cindex AIX
+The @samp{mode32} ABI uses a 64-bit @code{long long} limb but with the chip
+still in 32-bit mode and using 32-bit calling conventions.  This is the default
+for systems where the true 64-bit ABI is unavailable.  No special compiler
+options are typically needed for applications.
+
+@item @samp{ABI=32}
+This is the basic 32-bit PowerPC ABI, with a 32-bit limb.  No special compiler
+options are needed for applications.
+@end table
+
+GMP's speed is greatest for @samp{aix64} and @samp{mode64}.  In @samp{ABI=32}
+only the 32-bit ISA is used and this doesn't make full use of a 64-bit chip.
+On a suitable system we could perhaps use more of the ISA, but there are no
+plans to do so.
+
+@sp 1
+@need 1000
+@item Sparc V9 (@samp{sparc64}, @samp{sparcv9}, @samp{ultrasparc*})
+@cindex Sparc V9
+@cindex Solaris
+@cindex Sun
+@table @asis
+@item @samp{ABI=64}
+The 64-bit V9 ABI is available on the various BSD sparc64 ports, recent
+versions of Sparc64 GNU/Linux, and Solaris 2.7 and up (when the kernel is in
+64-bit mode).  GCC 3.2 or higher, or Sun @command{cc} is required.  On
+GNU/Linux, depending on the default @command{gcc} mode, applications must be
+compiled with
+
+@example
+gcc  -m64
+@end example
+
+On Solaris applications must be compiled with
+
+@example
+gcc  -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9
+cc   -xarch=v9
+@end example
+
+On the BSD sparc64 systems no special options are required, since 64-bits is
+the only ABI available.
+
+@item @samp{ABI=32}
+For the basic 32-bit ABI, GMP still uses as much of the V9 ISA as it can.  In
+the Sun documentation this combination is known as ``v8plus''.  On GNU/Linux,
+depending on the default @command{gcc} mode, applications may need to be
+compiled with
+
+@example
+gcc  -m32
+@end example
+
+On Solaris, no special compiler options are required for applications, though
+using something like the following is recommended.  (@command{gcc} 2.8 and
+earlier only support @samp{-mv8} though.)
+
+@example
+gcc  -mv8plus
+cc   -xarch=v8plus
+@end example
+@end table
+
+GMP speed is greatest in @samp{ABI=64}, so it's the default where available.
+The speed is partly because there are extra registers available and partly
+because 64-bits is considered the more important case and has therefore had
+better code written for it.
+
+Don't be confused by the names of the @samp{-m} and @samp{-x} compiler
+options, they're called @samp{arch} but effectively control both ABI and ISA@.
+
+On Solaris 2.6 and earlier, only @samp{ABI=32} is available since the kernel
+doesn't save all registers.
+
+On Solaris 2.7 with the kernel in 32-bit mode, a normal native build will
+reject @samp{ABI=64} because the resulting executables won't run.
+@samp{ABI=64} can still be built if desired by making it look like a
+cross-compile, for example
+
+@example
+./configure --build=none --host=sparcv9-sun-solaris2.7 ABI=64
+@end example
+@end table
+
+
+@need 2000
+@node Notes for Package Builds, Notes for Particular Systems, ABI and ISA, Installing GMP
+@section Notes for Package Builds
+@cindex Build notes for binary packaging
+@cindex Packaged builds
+
+GMP should present no great difficulties for packaging in a binary
+distribution.
+
+@cindex Libtool versioning
+@cindex Shared library versioning
+Libtool is used to build the library and @samp{-version-info} is set
+appropriately, having started from @samp{3:0:0} in GMP 3.0 (@pxref{Versioning,
+Library interface versions, Library interface versions, libtool, GNU
+Libtool}).
+
+The GMP 4 series will be upwardly binary compatible in each release and will
+be upwardly binary compatible with all of the GMP 3 series.  Additional
+function interfaces may be added in each release, so on systems where libtool
+versioning is not fully checked by the loader an auxiliary mechanism may be
+needed to express that a dynamic linked application depends on a new enough
+GMP.
+
+An auxiliary mechanism may also be needed to express that @file{libgmpxx.la}
+(from @option{--enable-cxx}, @pxref{Build Options}) requires @file{libgmp.la}
+from the same GMP version, since this is not done by the libtool versioning,
+nor otherwise.  A mismatch will result in unresolved symbols from the linker,
+or perhaps the loader.
+
+When building a package for a CPU family, care should be taken to use
+@samp{--host} (or @samp{--build}) to choose the least common denominator among
+the CPUs which might use the package.  For example this might mean plain
+@samp{sparc} (meaning V7) for SPARCs.
+
+For x86s, @option{--enable-fat} sets things up for a fat binary build, making a
+runtime selection of optimized low level routines.  This is a good choice for
+packaging to run on a range of x86 chips.
+
+Users who care about speed will want GMP built for their exact CPU type, to
+make best use of the available optimizations.  Providing a way to suitably
+rebuild a package may be useful.  This could be as simple as making it
+possible for a user to omit @samp{--build} (and @samp{--host}) so
+@samp{./config.guess} will detect the CPU@.  But a way to manually specify a
+@samp{--build} will be wanted for systems where @samp{./config.guess} is
+inexact.
+
+On systems with multiple ABIs, a packaged build will need to decide which
+among the choices is to be provided, see @ref{ABI and ISA}.  A given run of
+@samp{./configure} etc will only build one ABI@.  If a second ABI is also
+required then a second run of @samp{./configure} etc must be made, starting
+from a clean directory tree (@samp{make distclean}).
+
+As noted under ``ABI and ISA'', currently no attempt is made to follow system
+conventions for install locations that vary with ABI, such as
+@file{/usr/lib/sparcv9} for @samp{ABI=64} as opposed to @file{/usr/lib} for
+@samp{ABI=32}.  A package build can override @samp{libdir} and other standard
+variables as necessary.
+
+Note that @file{gmp.h} is a generated file, and will be architecture and ABI
+dependent.  When attempting to install two ABIs simultaneously it will be
+important that an application compile gets the correct @file{gmp.h} for its
+desired ABI@.  If compiler include paths don't vary with ABI options then it
+might be necessary to create a @file{/usr/include/gmp.h} which tests
+preprocessor symbols and chooses the correct actual @file{gmp.h}.
+
+
+@need 2000
+@node Notes for Particular Systems, Known Build Problems, Notes for Package Builds, Installing GMP
+@section Notes for Particular Systems
+@cindex Build notes for particular systems
+@cindex Particular systems
+@cindex Systems
+@table @asis
+
+@c This section is more or less meant for notes about performance or about
+@c build problems that have been worked around but might leave a user
+@c scratching their head.  Fun with different ABIs on a system belongs in the
+@c above section.
+
+@item AIX 3 and 4
+@cindex AIX
+On systems @samp{*-*-aix[34]*} shared libraries are disabled by default, since
+some versions of the native @command{ar} fail on the convenience libraries
+used.  A shared build can be attempted with
+
+@example
+./configure --enable-shared --disable-static
+@end example
+
+Note that the @samp{--disable-static} is necessary because in a shared build
+libtool makes @file{libgmp.a} a symlink to @file{libgmp.so}, apparently for
+the benefit of old versions of @command{ld} which only recognise @file{.a},
+but unfortunately this is done even if a fully functional @command{ld} is
+available.
+
+@item ARM
+@cindex ARM
+On systems @samp{arm*-*-*}, versions of GCC up to and including 2.95.3 have a
+bug in unsigned division, giving wrong results for some operands.  GMP
+@samp{./configure} will demand GCC 2.95.4 or later.
+
+@item Compaq C++
+@cindex Compaq C++
+Compaq C++ on OSF 5.1 has two flavours of @code{iostream}, a standard one and
+an old pre-standard one (see @samp{man iostream_intro}).  GMP can only use the
+standard one, which unfortunately is not the default but must be selected by
+defining @code{__USE_STD_IOSTREAM}.  Configure with for instance
+
+@example
+./configure --enable-cxx CPPFLAGS=-D__USE_STD_IOSTREAM
+@end example
+
+@item Floating Point Mode
+@cindex Floating point mode
+@cindex Hardware floating point mode
+@cindex Precision of hardware floating point
+@cindex x87
+On some systems, the hardware floating point has a control mode which can set
+all operations to be done in a particular precision, for instance single,
+double or extended on x86 systems (x87 floating point).  The GMP functions
+involving a @code{double} cannot be expected to operate to their full
+precision when the hardware is in single precision mode.  Of course this
+affects all code, including application code, not just GMP.
+
+@item MS-DOS and MS Windows
+@cindex MS-DOS
+@cindex MS Windows
+@cindex Windows
+@cindex Cygwin
+@cindex DJGPP
+@cindex MINGW
+On an MS-DOS system DJGPP can be used to build GMP, and on an MS Windows
+system Cygwin, DJGPP and MINGW can be used.  All three are excellent ports of
+GCC and the various GNU tools.
+
+@display
+@uref{http://www.cygwin.com/}
+@uref{http://www.delorie.com/djgpp/}
+@uref{http://www.mingw.org/}
+@end display
+
+@cindex Interix
+@cindex Services for Unix
+Microsoft also publishes an Interix ``Services for Unix'' which can be used to
+build GMP on Windows (with a normal @samp{./configure}), but it's not free
+software.
+
+@item MS Windows DLLs
+@cindex DLLs
+@cindex MS Windows
+@cindex Windows
+On systems @samp{*-*-cygwin*}, @samp{*-*-mingw*} and @samp{*-*-pw32*} by
+default GMP builds only a static library, but a DLL can be built instead using
+
+@example
+./configure --disable-static --enable-shared
+@end example
+
+Static and DLL libraries can't both be built, since certain export directives
+in @file{gmp.h} must be different.
+
+A MINGW DLL build of GMP can be used with Microsoft C@.  Libtool doesn't
+install a @file{.lib} format import library, but it can be created with MS
+@command{lib} as follows, and copied to the install directory.  Similarly for
+@file{libmp} and @file{libgmpxx}.
+
+@example
+cd .libs
+lib /def:libgmp-3.dll.def /out:libgmp-3.lib
+@end example
+
+MINGW uses the C runtime library @samp{msvcrt.dll} for I/O, so applications
+wanting to use the GMP I/O routines must be compiled with @samp{cl /MD} to do
+the same.  If one of the other C runtime library choices provided by MS C is
+desired then the suggestion is to use the GMP string functions and confine I/O
+to the application.
+
+@item Motorola 68k CPU Types
+@cindex 68000
+@samp{m68k} is taken to mean 68000.  @samp{m68020} or higher will give a
+performance boost on applicable CPUs.  @samp{m68360} can be used for CPU32
+series chips.  @samp{m68302} can be used for ``Dragonball'' series chips,
+though this is merely a synonym for @samp{m68000}.
+
+@item OpenBSD 2.6
+@cindex OpenBSD
+@command{m4} in this release of OpenBSD has a bug in @code{eval} that makes it
+unsuitable for @file{.asm} file processing.  @samp{./configure} will detect
+the problem and either abort or choose another m4 in the @env{PATH}.  The bug
+is fixed in OpenBSD 2.7, so either upgrade or use GNU m4.
+
+@item Power CPU Types
+@cindex Power/PowerPC
+In GMP, CPU types @samp{power*} and @samp{powerpc*} will each use instructions
+not available on the other, so it's important to choose the right one for the
+CPU that will be used.  Currently GMP has no assembly code support for using
+just the common instruction subset.  To get executables that run on both, the
+current suggestion is to use the generic C code (CPU @samp{none}), possibly
+with appropriate compiler options (like @samp{-mcpu=common} for
+@command{gcc}).  CPU @samp{rs6000} (which is not a CPU but a family of
+workstations) is accepted by @file{config.sub}, but is currently equivalent to
+@samp{none}.
+
+@item Sparc CPU Types
+@cindex Sparc
+@samp{sparcv8} or @samp{supersparc} on relevant systems will give a
+significant performance increase over the V7 code selected by plain
+@samp{sparc}.
+
+@item Sparc App Regs
+@cindex Sparc
+The GMP assembly code for both 32-bit and 64-bit Sparc clobbers the
+``application registers'' @code{g2}, @code{g3} and @code{g4}, the same way
+that the GCC default @samp{-mapp-regs} does (@pxref{SPARC Options,, SPARC
+Options, gcc, Using the GNU Compiler Collection (GCC)}).
+
+This makes that code unsuitable for use with the special V9
+@samp{-mcmodel=embmedany} (which uses @code{g4} as a data segment pointer), and
+for applications wanting to use those registers for special purposes.  In these
+cases the only suggestion currently is to build GMP with CPU @samp{none} to
+avoid the assembly code.
+
+@item SunOS 4
+@cindex SunOS
+@command{/usr/bin/m4} lacks various features needed to process @file{.asm}
+files, and instead @samp{./configure} will automatically use
+@command{/usr/5bin/m4}, which we believe is always available (if not then use
+GNU m4).
+
+@item x86 CPU Types
+@cindex x86
+@cindex 80x86
+@cindex i386
+@samp{i586}, @samp{pentium} or @samp{pentiummmx} code is good for its intended
+P5 Pentium chips, but quite slow when run on Intel P6 class chips (PPro, P-II,
+P-III)@.  @samp{i386} is a better choice when making binaries that must run on
+both.
+
+@item x86 MMX and SSE2 Code
+@cindex MMX
+@cindex SSE2
+If the CPU selected has MMX code but the assembler doesn't support it, a
+warning is given and non-MMX code is used instead.  This will be an inferior
+build, since the MMX code that's present is there because it's faster than the
+corresponding plain integer code.  The same applies to SSE2.
+
+Old versions of @samp{gas} don't support MMX instructions, in particular
+version 1.92.3 that comes with FreeBSD 2.2.8 or the more recent OpenBSD 3.1
+doesn't.
+
+Solaris 2.6 and 2.7 @command{as} generate incorrect object code for register
+to register @code{movq} instructions, and so can't be used for MMX code.
+Install a recent @command{gas} if MMX code is wanted on these systems.
+@end table
+
+
+@need 2000
+@node Known Build Problems, Performance optimization, Notes for Particular Systems, Installing GMP
+@section Known Build Problems
+@cindex Build problems known
+
+@c This section is more or less meant for known build problems that are not
+@c otherwise worked around and require some sort of manual intervention.
+
+You might find more up-to-date information at @uref{http://gmplib.org/}.
+
+@table @asis
+@item Compiler link options
+The version of libtool currently in use rather aggressively strips compiler
+options when linking a shared library.  This will hopefully be relaxed in the
+future, but for now if this is a problem the suggestion is to create a little
+script to hide them, and for instance configure with
+
+@example
+./configure CC=gcc-with-my-options
+@end example
+
+@item DJGPP (@samp{*-*-msdosdjgpp*})
+@cindex DJGPP
+The DJGPP port of @command{bash} 2.03 is unable to run the @samp{configure}
+script, it exits silently, having died writing a preamble to
+@file{config.log}.  Use @command{bash} 2.04 or higher.
+
+@samp{make all} was found to run out of memory during the final
+@file{libgmp.la} link on one system tested, despite having 64Mb available.
+Running @samp{make libgmp.la} directly helped, perhaps recursing into the
+various subdirectories uses up memory.
+
+@item GNU binutils @command{strip} prior to 2.12
+@cindex Stripped libraries
+@cindex Binutils @command{strip}
+@cindex GNU @command{strip}
+@command{strip} from GNU binutils 2.11 and earlier should not be used on the
+static libraries @file{libgmp.a} and @file{libmp.a} since it will discard all
+but the last of multiple archive members with the same name, like the three
+versions of @file{init.o} in @file{libgmp.a}.  Binutils 2.12 or higher can be
+used successfully.
+
+The shared libraries @file{libgmp.so} and @file{libmp.so} are not affected by
+this and any version of @command{strip} can be used on them.
+
+@item @command{make} syntax error
+@cindex SCO
+@cindex IRIX
+On certain versions of SCO OpenServer 5 and IRIX 6.5 the native @command{make}
+is unable to handle the long dependencies list for @file{libgmp.la}.  The
+symptom is a ``syntax error'' on the following line of the top-level
+@file{Makefile}.
+
+@example
+libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES)
+@end example
+
+Either use GNU Make, or as a workaround remove
+@code{$(libgmp_la_DEPENDENCIES)} from that line (which will make the initial
+build work, but if any recompiling is done @file{libgmp.la} might not be
+rebuilt).
+
+@item MacOS X (@samp{*-*-darwin*})
+@cindex MacOS X
+@cindex Darwin
+Libtool currently only knows how to create shared libraries on MacOS X using
+the native @command{cc} (which is a modified GCC), not a plain GCC@.  A
+static-only build should work though (@samp{--disable-shared}).
+
+@item NeXT prior to 3.3
+@cindex NeXT
+The system compiler on old versions of NeXT was a massacred and old GCC, even
+if it called itself @file{cc}.  This compiler cannot be used to build GMP, you
+need to get a real GCC, and install that.  (NeXT may have fixed this in
+release 3.3 of their system.)
+
+@item POWER and PowerPC
+@cindex Power/PowerPC
+Bugs in GCC 2.7.2 (and 2.6.3) mean it can't be used to compile GMP on POWER or
+PowerPC@.  If you want to use GCC for these machines, get GCC 2.7.2.1 (or
+later).
+
+@item Sequent Symmetry
+@cindex Sequent Symmetry
+Use the GNU assembler instead of the system assembler, since the latter has
+serious bugs.
+
+@item Solaris 2.6
+@cindex Solaris
+The system @command{sed} prints an error ``Output line too long'' when libtool
+builds @file{libgmp.la}.  This doesn't seem to cause any obvious ill effects,
+but GNU @command{sed} is recommended, to avoid any doubt.
+
+@item Sparc Solaris 2.7 with gcc 2.95.2 in @samp{ABI=32}
+@cindex Solaris
+A shared library build of GMP seems to fail in this combination, it builds but
+then fails the tests, apparently due to some incorrect data relocations within
+@code{gmp_randinit_lc_2exp_size}.  The exact cause is unknown,
+@samp{--disable-shared} is recommended.
+@end table
+
+
+@need 2000
+@node Performance optimization, , Known Build Problems, Installing GMP
+@section Performance optimization
+@cindex Optimizing performance
+
+@c At some point, this should perhaps move to a separate chapter on optimizing
+@c performance.
+
+For optimal performance, build GMP for the exact CPU type of the target
+computer, see @ref{Build Options}.
+
+Unlike what is the case for most other programs, the compiler typically
+doesn't matter much, since GMP uses assembly language for the most critical
+operation.
+
+In particular for long-running GMP applications, and applications demanding
+extremely large numbers, building and running the @code{tuneup} program in the
+@file{tune} subdirectory, can be important.  For example,
+
+@example
+cd tune
+make tuneup
+./tuneup
+@end example
+
+will generate better contents for the @file{gmp-mparam.h} parameter file.
+
+To use the results, put the output in the file indicated in the
+@samp{Parameters for ...} header.  Then recompile from scratch.
+
+The @code{tuneup} program takes one useful parameter, @samp{-f NNN}, which
+instructs the program how long to check FFT multiply parameters.  If you're
+going to use GMP for extremely large numbers, you may want to run @code{tuneup}
+with a large NNN value.
+
+
+@node GMP Basics, Reporting Bugs, Installing GMP, Top
+@comment  node-name,  next,  previous,  up
+@chapter GMP Basics
+@cindex Basics
+
+@strong{Using functions, macros, data types, etc.@: not documented in this
+manual is strongly discouraged.  If you do so your application is guaranteed
+to be incompatible with future versions of GMP.}
+
+@menu
+* Headers and Libraries::
+* Nomenclature and Types::
+* Function Classes::
+* Variable Conventions::
+* Parameter Conventions::
+* Memory Management::
+* Reentrancy::
+* Useful Macros and Constants::
+* Compatibility with older versions::
+* Demonstration Programs::
+* Efficiency::
+* Debugging::
+* Profiling::
+* Autoconf::
+* Emacs::
+@end menu
+
+@node Headers and Libraries, Nomenclature and Types, GMP Basics, GMP Basics
+@section Headers and Libraries
+@cindex Headers
+
+@cindex @file{gmp.h}
+@cindex Include files
+@cindex @code{#include}
+All declarations needed to use GMP are collected in the include file
+@file{gmp.h}.  It is designed to work with both C and C++ compilers.
+
+@example
+#include <gmp.h>
+@end example
+
+@cindex @code{stdio.h}
+Note however that prototypes for GMP functions with @code{FILE *} parameters
+are only provided if @code{<stdio.h>} is included too.
+
+@example
+#include <stdio.h>
+#include <gmp.h>
+@end example
+
+@cindex @code{stdarg.h}
+Likewise @code{<stdarg.h>} (or @code{<varargs.h>}) is required for prototypes
+with @code{va_list} parameters, such as @code{gmp_vprintf}.  And
+@code{<obstack.h>} for prototypes with @code{struct obstack} parameters, such
+as @code{gmp_obstack_printf}, when available.
+
+@cindex Libraries
+@cindex Linking
+@cindex @code{libgmp}
+All programs using GMP must link against the @file{libgmp} library.  On a
+typical Unix-like system this can be done with @samp{-lgmp}, for example
+
+@example
+gcc myprogram.c -lgmp
+@end example
+
+@cindex @code{libgmpxx}
+GMP C++ functions are in a separate @file{libgmpxx} library.  This is built
+and installed if C++ support has been enabled (@pxref{Build Options}).  For
+example,
+
+@example
+g++ mycxxprog.cc -lgmpxx -lgmp
+@end example
+
+@cindex Libtool
+GMP is built using Libtool and an application can use that to link if desired,
+@GMPpxreftop{libtool, GNU Libtool}.
+
+If GMP has been installed to a non-standard location then it may be necessary
+to use @samp{-I} and @samp{-L} compiler options to point to the right
+directories, and some sort of run-time path for a shared library.
+
+
+@node Nomenclature and Types, Function Classes, Headers and Libraries, GMP Basics
+@section Nomenclature and Types
+@cindex Nomenclature
+@cindex Types
+
+@cindex Integer
+@tindex @code{mpz_t}
+In this manual, @dfn{integer} usually means a multiple precision integer, as
+defined by the GMP library.  The C data type for such integers is @code{mpz_t}.
+Here are some examples of how to declare such integers:
+
+@example
+mpz_t sum;
+
+struct foo @{ mpz_t x, y; @};
+
+mpz_t vec[20];
+@end example
+
+@cindex Rational number
+@tindex @code{mpq_t}
+@dfn{Rational number} means a multiple precision fraction.  The C data type
+for these fractions is @code{mpq_t}.  For example:
+
+@example
+mpq_t quotient;
+@end example
+
+@cindex Floating-point number
+@tindex @code{mpf_t}
+@dfn{Floating point number} or @dfn{Float} for short, is an arbitrary precision
+mantissa with a limited precision exponent.  The C data type for such objects
+is @code{mpf_t}.  For example:
+
+@example
+mpf_t fp;
+@end example
+
+@tindex @code{mp_exp_t}
+The floating point functions accept and return exponents in the C type
+@code{mp_exp_t}.  Currently this is usually a @code{long}, but on some systems
+it's an @code{int} for efficiency.
+
+@cindex Limb
+@tindex @code{mp_limb_t}
+A @dfn{limb} means the part of a multi-precision number that fits in a single
+machine word.  (We chose this word because a limb of the human body is
+analogous to a digit, only larger, and containing several digits.)  Normally a
+limb is 32 or 64 bits.  The C data type for a limb is @code{mp_limb_t}.
+
+@tindex @code{mp_size_t}
+Counts of limbs of a multi-precision number represented in the C type
+@code{mp_size_t}.  Currently this is normally a @code{long}, but on some
+systems it's an @code{int} for efficiency, and on some systems it will be
+@code{long long} in the future.
+
+@tindex @code{mp_bitcnt_t}
+Counts of bits of a multi-precision number are represented in the C type
+@code{mp_bitcnt_t}.  Currently this is always an @code{unsigned long}, but on
+some systems it will be an @code{unsigned long long} in the future.
+
+@cindex Random state
+@tindex @code{gmp_randstate_t}
+@dfn{Random state} means an algorithm selection and current state data.  The C
+data type for such objects is @code{gmp_randstate_t}.  For example:
+
+@example
+gmp_randstate_t rstate;
+@end example
+
+Also, in general @code{mp_bitcnt_t} is used for bit counts and ranges, and
+@code{size_t} is used for byte or character counts.
+
+
+@node Function Classes, Variable Conventions, Nomenclature and Types, GMP Basics
+@section Function Classes
+@cindex Function classes
+
+There are six classes of functions in the GMP library:
+
+@enumerate
+@item
+Functions for signed integer arithmetic, with names beginning with
+@code{mpz_}.  The associated type is @code{mpz_t}.  There are about 150
+functions in this class.  (@pxref{Integer Functions})
+
+@item
+Functions for rational number arithmetic, with names beginning with
+@code{mpq_}.  The associated type is @code{mpq_t}.  There are about 40
+functions in this class, but the integer functions can be used for arithmetic
+on the numerator and denominator separately.  (@pxref{Rational Number
+Functions})
+
+@item
+Functions for floating-point arithmetic, with names beginning with
+@code{mpf_}.  The associated type is @code{mpf_t}.  There are about 60
+functions is this class.  (@pxref{Floating-point Functions})
+
+@item
+Functions compatible with Berkeley MP, such as @code{itom}, @code{madd}, and
+@code{mult}.  The associated type is @code{MINT}.  (@pxref{BSD Compatible
+Functions})
+
+@item
+Fast low-level functions that operate on natural numbers.  These are used by
+the functions in the preceding groups, and you can also call them directly
+from very time-critical user programs.  These functions' names begin with
+@code{mpn_}.  The associated type is array of @code{mp_limb_t}.  There are
+about 30 (hard-to-use) functions in this class.  (@pxref{Low-level Functions})
+
+@item
+Miscellaneous functions.  Functions for setting up custom allocation and
+functions for generating random numbers.  (@pxref{Custom Allocation}, and
+@pxref{Random Number Functions})
+@end enumerate
+
+
+@node Variable Conventions, Parameter Conventions, Function Classes, GMP Basics
+@section Variable Conventions
+@cindex Variable conventions
+@cindex Conventions for variables
+
+GMP functions generally have output arguments before input arguments.  This
+notation is by analogy with the assignment operator.  The BSD MP compatibility
+functions are exceptions, having the output arguments last.
+
+GMP lets you use the same variable for both input and output in one call.  For
+example, the main function for integer multiplication, @code{mpz_mul}, can be
+used to square @code{x} and put the result back in @code{x} with
+
+@example
+mpz_mul (x, x, x);
+@end example
+
+Before you can assign to a GMP variable, you need to initialize it by calling
+one of the special initialization functions.  When you're done with a
+variable, you need to clear it out, using one of the functions for that
+purpose.  Which function to use depends on the type of variable.  See the
+chapters on integer functions, rational number functions, and floating-point
+functions for details.
+
+A variable should only be initialized once, or at least cleared between each
+initialization.  After a variable has been initialized, it may be assigned to
+any number of times.
+
+For efficiency reasons, avoid excessive initializing and clearing.  In
+general, initialize near the start of a function and clear near the end.  For
+example,
+
+@example
+void
+foo (void)
+@{
+  mpz_t  n;
+  int    i;
+  mpz_init (n);
+  for (i = 1; i < 100; i++)
+    @{
+      mpz_mul (n, @dots{});
+      mpz_fdiv_q (n, @dots{});
+      @dots{}
+    @}
+  mpz_clear (n);
+@}
+@end example
+
+
+@node Parameter Conventions, Memory Management, Variable Conventions, GMP Basics
+@section Parameter Conventions
+@cindex Parameter conventions
+@cindex Conventions for parameters
+
+When a GMP variable is used as a function parameter, it's effectively a
+call-by-reference, meaning if the function stores a value there it will change
+the original in the caller.  Parameters which are input-only can be designated
+@code{const} to provoke a compiler error or warning on attempting to modify
+them.
+
+When a function is going to return a GMP result, it should designate a
+parameter that it sets, like the library functions do.  More than one value
+can be returned by having more than one output parameter, again like the
+library functions.  A @code{return} of an @code{mpz_t} etc doesn't return the
+object, only a pointer, and this is almost certainly not what's wanted.
+
+Here's an example accepting an @code{mpz_t} parameter, doing a calculation,
+and storing the result to the indicated parameter.
+
+@example
+void
+foo (mpz_t result, const mpz_t param, unsigned long n)
+@{
+  unsigned long  i;
+  mpz_mul_ui (result, param, n);
+  for (i = 1; i < n; i++)
+    mpz_add_ui (result, result, i*7);
+@}
+
+int
+main (void)
+@{
+  mpz_t  r, n;
+  mpz_init (r);
+  mpz_init_set_str (n, "123456", 0);
+  foo (r, n, 20L);
+  gmp_printf ("%Zd\n", r);
+  return 0;
+@}
+@end example
+
+@code{foo} works even if the mainline passes the same variable for
+@code{param} and @code{result}, just like the library functions.  But
+sometimes it's tricky to make that work, and an application might not want to
+bother supporting that sort of thing.
+
+For interest, the GMP types @code{mpz_t} etc are implemented as one-element
+arrays of certain structures.  This is why declaring a variable creates an
+object with the fields GMP needs, but then using it as a parameter passes a
+pointer to the object.  Note that the actual fields in each @code{mpz_t} etc
+are for internal use only and should not be accessed directly by code that
+expects to be compatible with future GMP releases.
+
+
+@need 1000
+@node Memory Management, Reentrancy, Parameter Conventions, GMP Basics
+@section Memory Management
+@cindex Memory management
+
+The GMP types like @code{mpz_t} are small, containing only a couple of sizes,
+and pointers to allocated data.  Once a variable is initialized, GMP takes
+care of all space allocation.  Additional space is allocated whenever a
+variable doesn't have enough.
+
+@code{mpz_t} and @code{mpq_t} variables never reduce their allocated space.
+Normally this is the best policy, since it avoids frequent reallocation.
+Applications that need to return memory to the heap at some particular point
+can use @code{mpz_realloc2}, or clear variables no longer needed.
+
+@code{mpf_t} variables, in the current implementation, use a fixed amount of
+space, determined by the chosen precision and allocated at initialization, so
+their size doesn't change.
+
+All memory is allocated using @code{malloc} and friends by default, but this
+can be changed, see @ref{Custom Allocation}.  Temporary memory on the stack is
+also used (via @code{alloca}), but this can be changed at build-time if
+desired, see @ref{Build Options}.
+
+
+@node Reentrancy, Useful Macros and Constants, Memory Management, GMP Basics
+@section Reentrancy
+@cindex Reentrancy
+@cindex Thread safety
+@cindex Multi-threading
+
+@noindent
+GMP is reentrant and thread-safe, with some exceptions:
+
+@itemize @bullet
+@item
+If configured with @option{--enable-alloca=malloc-notreentrant} (or with
+@option{--enable-alloca=notreentrant} when @code{alloca} is not available),
+then naturally GMP is not reentrant.
+
+@item
+@code{mpf_set_default_prec} and @code{mpf_init} use a global variable for the
+selected precision.  @code{mpf_init2} can be used instead, and in the C++
+interface an explicit precision to the @code{mpf_class} constructor.
+
+@item
+@code{mpz_random} and the other old random number functions use a global
+random state and are hence not reentrant.  The newer random number functions
+that accept a @code{gmp_randstate_t} parameter can be used instead.
+
+@item
+@code{gmp_randinit} (obsolete) returns an error indication through a global
+variable, which is not thread safe.  Applications are advised to use
+@code{gmp_randinit_default} or @code{gmp_randinit_lc_2exp} instead.
+
+@item
+@code{mp_set_memory_functions} uses global variables to store the selected
+memory allocation functions.
+
+@item
+If the memory allocation functions set by a call to
+@code{mp_set_memory_functions} (or @code{malloc} and friends by default) are
+not reentrant, then GMP will not be reentrant either.
+
+@item
+If the standard I/O functions such as @code{fwrite} are not reentrant then the
+GMP I/O functions using them will not be reentrant either.
+
+@item
+It's safe for two threads to read from the same GMP variable simultaneously,
+but it's not safe for one to read while the another might be writing, nor for
+two threads to write simultaneously.  It's not safe for two threads to
+generate a random number from the same @code{gmp_randstate_t} simultaneously,
+since this involves an update of that variable.
+@end itemize
+
+
+@need 2000
+@node Useful Macros and Constants, Compatibility with older versions, Reentrancy, GMP Basics
+@section Useful Macros and Constants
+@cindex Useful macros and constants
+@cindex Constants
+
+@deftypevr {Global Constant} {const int} mp_bits_per_limb
+@findex mp_bits_per_limb
+@cindex Bits per limb
+@cindex Limb size
+The number of bits per limb.
+@end deftypevr
+
+@defmac __GNU_MP_VERSION
+@defmacx __GNU_MP_VERSION_MINOR
+@defmacx __GNU_MP_VERSION_PATCHLEVEL
+@cindex Version number
+@cindex GMP version number
+The major and minor GMP version, and patch level, respectively, as integers.
+For GMP i.j, these numbers will be i, j, and 0, respectively.
+For GMP i.j.k, these numbers will be i, j, and k, respectively.
+@end defmac
+
+@deftypevr {Global Constant} {const char * const} gmp_version
+@findex gmp_version
+The GMP version number, as a null-terminated string, in the form ``i.j.k''.
+This release is @nicode{"@value{VERSION}"}.  Note that the format ``i.j'' was
+used when k was zero was used before version 4.3.0.
+@end deftypevr
+
+@defmac __GMP_CC
+@defmacx __GMP_CFLAGS
+The compiler and compiler flags, respectively, used when compiling GMP, as
+strings.
+@end defmac
+
+
+@node Compatibility with older versions, Demonstration Programs, Useful Macros and Constants, GMP Basics
+@section Compatibility with older versions
+@cindex Compatibility with older versions
+@cindex Past GMP versions
+@cindex Upward compatibility
+
+This version of GMP is upwardly binary compatible with all 4.x and 3.x
+versions, and upwardly compatible at the source level with all 2.x versions,
+with the following exceptions.
+
+@itemize @bullet
+@item
+@code{mpn_gcd} had its source arguments swapped as of GMP 3.0, for consistency
+with other @code{mpn} functions.
+
+@item
+@code{mpf_get_prec} counted precision slightly differently in GMP 3.0 and
+3.0.1, but in 3.1 reverted to the 2.x style.
+@end itemize
+
+There are a number of compatibility issues between GMP 1 and GMP 2 that of
+course also apply when porting applications from GMP 1 to GMP 4.  Please
+see the GMP 2 manual for details.
+
+The Berkeley MP compatibility library (@pxref{BSD Compatible Functions}) is
+source and binary compatible with the standard @file{libmp}.
+
+@c @enumerate
+@c @item Integer division functions round the result differently.  The obsolete
+@c functions (@code{mpz_div}, @code{mpz_divmod}, @code{mpz_mdiv},
+@c @code{mpz_mdivmod}, etc) now all use floor rounding (i.e., they round the
+@c quotient towards
+@c @ifinfo
+@c @minus{}infinity).
+@c @end ifinfo
+@c @iftex
+@c @tex
+@c $-\infty$).
+@c @end tex
+@c @end iftex
+@c There are a lot of functions for integer division, giving the user better
+@c control over the rounding.
+
+@c @item The function @code{mpz_mod} now compute the true @strong{mod} function.
+
+@c @item The functions @code{mpz_powm} and @code{mpz_powm_ui} now use
+@c @strong{mod} for reduction.
+
+@c @item The assignment functions for rational numbers do no longer canonicalize
+@c their results.  In the case a non-canonical result could arise from an
+@c assignment, the user need to insert an explicit call to
+@c @code{mpq_canonicalize}.  This change was made for efficiency.
+
+@c @item Output generated by @code{mpz_out_raw} in this release cannot be read
+@c by @code{mpz_inp_raw} in previous releases.  This change was made for making
+@c the file format truly portable between machines with different word sizes.
+
+@c @item Several @code{mpn} functions have changed.  But they were intentionally
+@c undocumented in previous releases.
+
+@c @item The functions @code{mpz_cmp_ui}, @code{mpz_cmp_si}, and @code{mpq_cmp_ui}
+@c are now implemented as macros, and thereby sometimes evaluate their
+@c arguments multiple times.
+
+@c @item The functions @code{mpz_pow_ui} and @code{mpz_ui_pow_ui} now yield 1
+@c for 0^0.  (In version 1, they yielded 0.)
+
+@c In version 1 of the library, @code{mpq_set_den} handled negative
+@c denominators by copying the sign to the numerator.  That is no longer done.
+
+@c Pure assignment functions do not canonicalize the assigned variable.  It is
+@c the responsibility of the user to canonicalize the assigned variable before
+@c any arithmetic operations are performed on that variable.
+@c Note that this is an incompatible change from version 1 of the library.
+
+@c @end enumerate
+
+
+@need 1000
+@node Demonstration Programs, Efficiency, Compatibility with older versions, GMP Basics
+@section Demonstration programs
+@cindex Demonstration programs
+@cindex Example programs
+@cindex Sample programs
+The @file{demos} subdirectory has some sample programs using GMP@.  These
+aren't built or installed, but there's a @file{Makefile} with rules for them.
+For instance,
+
+@example
+make pexpr
+./pexpr 68^975+10
+@end example
+
+@noindent
+The following programs are provided
+
+@itemize @bullet
+@item
+@cindex Expression parsing demo
+@cindex Parsing expressions demo
+@samp{pexpr} is an expression evaluator, the program used on the GMP web page.
+@item
+@cindex Expression parsing demo
+@cindex Parsing expressions demo
+The @samp{calc} subdirectory has a similar but simpler evaluator using
+@command{lex} and @command{yacc}.
+@item
+@cindex Expression parsing demo
+@cindex Parsing expressions demo
+The @samp{expr} subdirectory is yet another expression evaluator, a library
+designed for ease of use within a C program.  See @file{demos/expr/README} for
+more information.
+@item
+@cindex Factorization demo
+@samp{factorize} is a Pollard-Rho factorization program.
+@item
+@samp{isprime} is a command-line interface to the @code{mpz_probab_prime_p}
+function.
+@item
+@samp{primes} counts or lists primes in an interval, using a sieve.
+@item
+@samp{qcn} is an example use of @code{mpz_kronecker_ui} to estimate quadratic
+class numbers.
+@item
+@cindex @code{perl}
+@cindex GMP Perl module
+@cindex Perl module
+The @samp{perl} subdirectory is a comprehensive perl interface to GMP@.  See
+@file{demos/perl/INSTALL} for more information.  Documentation is in POD
+format in @file{demos/perl/GMP.pm}.
+@end itemize
+
+As an aside, consideration has been given at various times to some sort of
+expression evaluation within the main GMP library.  Going beyond something
+minimal quickly leads to matters like user-defined functions, looping, fixnums
+for control variables, etc, which are considered outside the scope of GMP
+(much closer to language interpreters or compilers, @xref{Language Bindings}.)
+Something simple for program input convenience may yet be a possibility, a
+combination of the @file{expr} demo and the @file{pexpr} tree back-end
+perhaps.  But for now the above evaluators are offered as illustrations.
+
+
+@need 1000
+@node Efficiency, Debugging, Demonstration Programs, GMP Basics
+@section Efficiency
+@cindex Efficiency
+
+@table @asis
+@item Small Operands
+@cindex Small operands
+On small operands, the time for function call overheads and memory allocation
+can be significant in comparison to actual calculation.  This is unavoidable
+in a general purpose variable precision library, although GMP attempts to be
+as efficient as it can on both large and small operands.
+
+@item Static Linking
+@cindex Static linking
+On some CPUs, in particular the x86s, the static @file{libgmp.a} should be
+used for maximum speed, since the PIC code in the shared @file{libgmp.so} will
+have a small overhead on each function call and global data address.  For many
+programs this will be insignificant, but for long calculations there's a gain
+to be had.
+
+@item Initializing and Clearing
+@cindex Initializing and clearing
+Avoid excessive initializing and clearing of variables, since this can be
+quite time consuming, especially in comparison to otherwise fast operations
+like addition.
+
+A language interpreter might want to keep a free list or stack of
+initialized variables ready for use.  It should be possible to integrate
+something like that with a garbage collector too.
+
+@item Reallocations
+@cindex Reallocations
+An @code{mpz_t} or @code{mpq_t} variable used to hold successively increasing
+values will have its memory repeatedly @code{realloc}ed, which could be quite
+slow or could fragment memory, depending on the C library.  If an application
+can estimate the final size then @code{mpz_init2} or @code{mpz_realloc2} can
+be called to allocate the necessary space from the beginning
+(@pxref{Initializing Integers}).
+
+It doesn't matter if a size set with @code{mpz_init2} or @code{mpz_realloc2}
+is too small, since all functions will do a further reallocation if necessary.
+Badly overestimating memory required will waste space though.
+
+@item @code{2exp} Functions
+@cindex @code{2exp} functions
+It's up to an application to call functions like @code{mpz_mul_2exp} when
+appropriate.  General purpose functions like @code{mpz_mul} make no attempt to
+identify powers of two or other special forms, because such inputs will
+usually be very rare and testing every time would be wasteful.
+
+@item @code{ui} and @code{si} Functions
+@cindex @code{ui} and @code{si} functions
+The @code{ui} functions and the small number of @code{si} functions exist for
+convenience and should be used where applicable.  But if for example an
+@code{mpz_t} contains a value that fits in an @code{unsigned long} there's no
+need extract it and call a @code{ui} function, just use the regular @code{mpz}
+function.
+
+@item In-Place Operations
+@cindex In-place operations
+@code{mpz_abs}, @code{mpq_abs}, @code{mpf_abs}, @code{mpz_neg}, @code{mpq_neg}
+and @code{mpf_neg} are fast when used for in-place operations like
+@code{mpz_abs(x,x)}, since in the current implementation only a single field
+of @code{x} needs changing.  On suitable compilers (GCC for instance) this is
+inlined too.
+
+@code{mpz_add_ui}, @code{mpz_sub_ui}, @code{mpf_add_ui} and @code{mpf_sub_ui}
+benefit from an in-place operation like @code{mpz_add_ui(x,x,y)}, since
+usually only one or two limbs of @code{x} will need to be changed.  The same
+applies to the full precision @code{mpz_add} etc if @code{y} is small.  If
+@code{y} is big then cache locality may be helped, but that's all.
+
+@code{mpz_mul} is currently the opposite, a separate destination is slightly
+better.  A call like @code{mpz_mul(x,x,y)} will, unless @code{y} is only one
+limb, make a temporary copy of @code{x} before forming the result.  Normally
+that copying will only be a tiny fraction of the time for the multiply, so
+this is not a particularly important consideration.
+
+@code{mpz_set}, @code{mpq_set}, @code{mpq_set_num}, @code{mpf_set}, etc, make
+no attempt to recognise a copy of something to itself, so a call like
+@code{mpz_set(x,x)} will be wasteful.  Naturally that would never be written
+deliberately, but if it might arise from two pointers to the same object then
+a test to avoid it might be desirable.
+
+@example
+if (x != y)
+  mpz_set (x, y);
+@end example
+
+Note that it's never worth introducing extra @code{mpz_set} calls just to get
+in-place operations.  If a result should go to a particular variable then just
+direct it there and let GMP take care of data movement.
+
+@item Divisibility Testing (Small Integers)
+@cindex Divisibility testing
+@code{mpz_divisible_ui_p} and @code{mpz_congruent_ui_p} are the best functions
+for testing whether an @code{mpz_t} is divisible by an individual small
+integer.  They use an algorithm which is faster than @code{mpz_tdiv_ui}, but
+which gives no useful information about the actual remainder, only whether
+it's zero (or a particular value).
+
+However when testing divisibility by several small integers, it's best to take
+a remainder modulo their product, to save multi-precision operations.  For
+instance to test whether a number is divisible by any of 23, 29 or 31 take a
+remainder modulo @math{23@times{}29@times{}31 = 20677} and then test that.
+
+The division functions like @code{mpz_tdiv_q_ui} which give a quotient as well
+as a remainder are generally a little slower than the remainder-only functions
+like @code{mpz_tdiv_ui}.  If the quotient is only rarely wanted then it's
+probably best to just take a remainder and then go back and calculate the
+quotient if and when it's wanted (@code{mpz_divexact_ui} can be used if the
+remainder is zero).
+
+@item Rational Arithmetic
+@cindex Rational arithmetic
+The @code{mpq} functions operate on @code{mpq_t} values with no common factors
+in the numerator and denominator.  Common factors are checked-for and cast out
+as necessary.  In general, cancelling factors every time is the best approach
+since it minimizes the sizes for subsequent operations.
+
+However, applications that know something about the factorization of the
+values they're working with might be able to avoid some of the GCDs used for
+canonicalization, or swap them for divisions.  For example when multiplying by
+a prime it's enough to check for factors of it in the denominator instead of
+doing a full GCD@.  Or when forming a big product it might be known that very
+little cancellation will be possible, and so canonicalization can be left to
+the end.
+
+The @code{mpq_numref} and @code{mpq_denref} macros give access to the
+numerator and denominator to do things outside the scope of the supplied
+@code{mpq} functions.  @xref{Applying Integer Functions}.
+
+The canonical form for rationals allows mixed-type @code{mpq_t} and integer
+additions or subtractions to be done directly with multiples of the
+denominator.  This will be somewhat faster than @code{mpq_add}.  For example,
+
+@example
+/* mpq increment */
+mpz_add (mpq_numref(q), mpq_numref(q), mpq_denref(q));
+
+/* mpq += unsigned long */
+mpz_addmul_ui (mpq_numref(q), mpq_denref(q), 123UL);
+
+/* mpq -= mpz */
+mpz_submul (mpq_numref(q), mpq_denref(q), z);
+@end example
+
+@item Number Sequences
+@cindex Number sequences
+Functions like @code{mpz_fac_ui}, @code{mpz_fib_ui} and @code{mpz_bin_uiui}
+are designed for calculating isolated values.  If a range of values is wanted
+it's probably best to call to get a starting point and iterate from there.
+
+@item Text Input/Output
+@cindex Text input/output
+Hexadecimal or octal are suggested for input or output in text form.
+Power-of-2 bases like these can be converted much more efficiently than other
+bases, like decimal.  For big numbers there's usually nothing of particular
+interest to be seen in the digits, so the base doesn't matter much.
+
+Maybe we can hope octal will one day become the normal base for everyday use,
+as proposed by King Charles XII of Sweden and later reformers.
+@c Reference: Knuth volume 2 section 4.1, page 184 of second edition.  :-)
+@end table
+
+
+@node Debugging, Profiling, Efficiency, GMP Basics
+@section Debugging
+@cindex Debugging
+
+@table @asis
+@item Stack Overflow
+@cindex Stack overflow
+@cindex Segmentation violation
+@cindex Bus error
+Depending on the system, a segmentation violation or bus error might be the
+only indication of stack overflow.  See @samp{--enable-alloca} choices in
+@ref{Build Options}, for how to address this.
+
+In new enough versions of GCC, @samp{-fstack-check} may be able to ensure an
+overflow is recognised by the system before too much damage is done, or
+@samp{-fstack-limit-symbol} or @samp{-fstack-limit-register} may be able to
+add checking if the system itself doesn't do any (@pxref{Code Gen Options,,
+Options for Code Generation, gcc, Using the GNU Compiler Collection (GCC)}).
+These options must be added to the @samp{CFLAGS} used in the GMP build
+(@pxref{Build Options}), adding them just to an application will have no
+effect.  Note also they're a slowdown, adding overhead to each function call
+and each stack allocation.
+
+@item Heap Problems
+@cindex Heap problems
+@cindex Malloc problems
+The most likely cause of application problems with GMP is heap corruption.
+Failing to @code{init} GMP variables will have unpredictable effects, and
+corruption arising elsewhere in a program may well affect GMP@.  Initializing
+GMP variables more than once or failing to clear them will cause memory leaks.
+
+@cindex Malloc debugger
+In all such cases a @code{malloc} debugger is recommended.  On a GNU or BSD
+system the standard C library @code{malloc} has some diagnostic facilities,
+see @ref{Allocation Debugging,, Allocation Debugging, libc, The GNU C Library
+Reference Manual}, or @samp{man 3 malloc}.  Other possibilities, in no
+particular order, include
+
+@display
+@uref{http://www.inf.ethz.ch/personal/biere/projects/ccmalloc/}
+@uref{http://dmalloc.com/}
+@uref{http://www.perens.com/FreeSoftware/} @ (electric fence)
+@uref{http://packages.debian.org/stable/devel/fda}
+@uref{http://www.gnupdate.org/components/leakbug/}
+@uref{http://people.redhat.com/~otaylor/memprof/}
+@uref{http://www.cbmamiga.demon.co.uk/mpatrol/}
+@end display
+
+The GMP default allocation routines in @file{memory.c} also have a simple
+sentinel scheme which can be enabled with @code{#define DEBUG} in that file.
+This is mainly designed for detecting buffer overruns during GMP development,
+but might find other uses.
+
+@item Stack Backtraces
+@cindex Stack backtrace
+On some systems the compiler options GMP uses by default can interfere with
+debugging.  In particular on x86 and 68k systems @samp{-fomit-frame-pointer}
+is used and this generally inhibits stack backtracing.  Recompiling without
+such options may help while debugging, though the usual caveats about it
+potentially moving a memory problem or hiding a compiler bug will apply.
+
+@item GDB, the GNU Debugger
+@cindex GDB
+@cindex GNU Debugger
+A sample @file{.gdbinit} is included in the distribution, showing how to call
+some undocumented dump functions to print GMP variables from within GDB@.  Note
+that these functions shouldn't be used in final application code since they're
+undocumented and may be subject to incompatible changes in future versions of
+GMP.
+
+@item Source File Paths
+GMP has multiple source files with the same name, in different directories.
+For example @file{mpz}, @file{mpq} and @file{mpf} each have an
+@file{init.c}.  If the debugger can't already determine the right one it may
+help to build with absolute paths on each C file.  One way to do that is to
+use a separate object directory with an absolute path to the source directory.
+
+@example
+cd /my/build/dir
+/my/source/dir/gmp-@value{VERSION}/configure
+@end example
+
+This works via @code{VPATH}, and might require GNU @command{make}.
+Alternately it might be possible to change the @code{.c.lo} rules
+appropriately.
+
+@item Assertion Checking
+@cindex Assertion checking
+The build option @option{--enable-assert} is available to add some consistency
+checks to the library (see @ref{Build Options}).  These are likely to be of
+limited value to most applications.  Assertion failures are just as likely to
+indicate memory corruption as a library or compiler bug.
+
+Applications using the low-level @code{mpn} functions, however, will benefit
+from @option{--enable-assert} since it adds checks on the parameters of most
+such functions, many of which have subtle restrictions on their usage.  Note
+however that only the generic C code has checks, not the assembly code, so
+CPU @samp{none} should be used for maximum checking.
+
+@item Temporary Memory Checking
+The build option @option{--enable-alloca=debug} arranges that each block of
+temporary memory in GMP is allocated with a separate call to @code{malloc} (or
+the allocation function set with @code{mp_set_memory_functions}).
+
+This can help a malloc debugger detect accesses outside the intended bounds,
+or detect memory not released.  In a normal build, on the other hand,
+temporary memory is allocated in blocks which GMP divides up for its own use,
+or may be allocated with a compiler builtin @code{alloca} which will go
+nowhere near any malloc debugger hooks.
+
+@item Maximum Debuggability
+To summarize the above, a GMP build for maximum debuggability would be
+
+@example
+./configure --disable-shared --enable-assert \
+  --enable-alloca=debug --host=none CFLAGS=-g
+@end example
+
+For C++, add @samp{--enable-cxx CXXFLAGS=-g}.
+
+@item Checker
+@cindex Checker
+@cindex GCC Checker
+The GCC checker (@uref{http://savannah.nongnu.org/projects/checker/}) can be
+used with GMP@.  It contains a stub library which means GMP applications
+compiled with checker can use a normal GMP build.
+
+A build of GMP with checking within GMP itself can be made.  This will run
+very very slowly.  On GNU/Linux for example,
+
+@cindex @command{checkergcc}
+@example
+./configure --host=none-pc-linux-gnu CC=checkergcc
+@end example
+
+@samp{--host=none} must be used, since the GMP assembly code doesn't support
+the checking scheme.  The GMP C++ features cannot be used, since current
+versions of checker (0.9.9.1) don't yet support the standard C++ library.
+
+@item Valgrind
+@cindex Valgrind
+The valgrind program (@uref{http://valgrind.org/}) is a memory
+checker for x86s.  It translates and emulates machine instructions to do
+strong checks for uninitialized data (at the level of individual bits), memory
+accesses through bad pointers, and memory leaks.
+
+Recent versions of Valgrind are getting support for MMX and SSE/SSE2
+instructions, for past versions GMP will need to be configured not to use
+those, i.e.@: for an x86 without them (for instance plain @samp{i486}).
+
+GMP's assembly code sometimes promotes a read of the limbs to some larger size,
+for efficiency.  GMP will do this even at the start and end of a multilimb
+operand, using naturaly aligned operations on the larger type.  This may lead
+to benign reads outside of allocated areas, triggering complants from Valgrind.
+
+@item Other Problems
+Any suspected bug in GMP itself should be isolated to make sure it's not an
+application problem, see @ref{Reporting Bugs}.
+@end table
+
+
+@node Profiling, Autoconf, Debugging, GMP Basics
+@section Profiling
+@cindex Profiling
+@cindex Execution profiling
+@cindex @code{--enable-profiling}
+
+Running a program under a profiler is a good way to find where it's spending
+most time and where improvements can be best sought.  The profiling choices
+for a GMP build are as follows.
+
+@table @asis
+@item @samp{--disable-profiling}
+The default is to add nothing special for profiling.
+
+It should be possible to just compile the mainline of a program with @code{-p}
+and use @command{prof} to get a profile consisting of timer-based sampling of
+the program counter.  Most of the GMP assembly code has the necessary symbol
+information.
+
+This approach has the advantage of minimizing interference with normal program
+operation, but on most systems the resolution of the sampling is quite low (10
+milliseconds for instance), requiring long runs to get accurate information.
+
+@item @samp{--enable-profiling=prof}
+@cindex @code{prof}
+Build with support for the system @command{prof}, which means @samp{-p} added
+to the @samp{CFLAGS}.
+
+This provides call counting in addition to program counter sampling, which
+allows the most frequently called routines to be identified, and an average
+time spent in each routine to be determined.
+
+The x86 assembly code has support for this option, but on other processors
+the assembly routines will be as if compiled without @samp{-p} and therefore
+won't appear in the call counts.
+
+On some systems, such as GNU/Linux, @samp{-p} in fact means @samp{-pg} and in
+this case @samp{--enable-profiling=gprof} described below should be used
+instead.
+
+@item @samp{--enable-profiling=gprof}
+@cindex @code{gprof}
+Build with support for @command{gprof}, which means @samp{-pg} added to the
+@samp{CFLAGS}.
+
+This provides call graph construction in addition to call counting and program
+counter sampling, which makes it possible to count calls coming from different
+locations.  For example the number of calls to @code{mpn_mul} from
+@code{mpz_mul} versus the number from @code{mpf_mul}.  The program counter
+sampling is still flat though, so only a total time in @code{mpn_mul} would be
+accumulated, not a separate amount for each call site.
+
+The x86 assembly code has support for this option, but on other processors
+the assembly routines will be as if compiled without @samp{-pg} and therefore
+not be included in the call counts.
+
+On x86 and m68k systems @samp{-pg} and @samp{-fomit-frame-pointer} are
+incompatible, so the latter is omitted from the default flags in that case,
+which might result in poorer code generation.
+
+Incidentally, it should be possible to use the @command{gprof} program with a
+plain @samp{--enable-profiling=prof} build.  But in that case only the
+@samp{gprof -p} flat profile and call counts can be expected to be valid, not
+the @samp{gprof -q} call graph.
+
+@item @samp{--enable-profiling=instrument}
+@cindex @code{-finstrument-functions}
+@cindex @code{instrument-functions}
+Build with the GCC option @samp{-finstrument-functions} added to the
+@samp{CFLAGS} (@pxref{Code Gen Options,, Options for Code Generation, gcc,
+Using the GNU Compiler Collection (GCC)}).
+
+This inserts special instrumenting calls at the start and end of each
+function, allowing exact timing and full call graph construction.
+
+This instrumenting is not normally a standard system feature and will require
+support from an external library, such as
+
+@cindex FunctionCheck
+@cindex fnccheck
+@display
+@uref{http://sourceforge.net/projects/fnccheck/}
+@end display
+
+This should be included in @samp{LIBS} during the GMP configure so that test
+programs will link.  For example,
+
+@example
+./configure --enable-profiling=instrument LIBS=-lfc
+@end example
+
+On a GNU system the C library provides dummy instrumenting functions, so
+programs compiled with this option will link.  In this case it's only
+necessary to ensure the correct library is added when linking an application.
+
+The x86 assembly code supports this option, but on other processors the
+assembly routines will be as if compiled without
+@samp{-finstrument-functions} meaning time spent in them will effectively be
+attributed to their caller.
+@end table
+
+
+@node Autoconf, Emacs, Profiling, GMP Basics
+@section Autoconf
+@cindex Autoconf
+
+Autoconf based applications can easily check whether GMP is installed.  The
+only thing to be noted is that GMP library symbols from version 3 onwards have
+prefixes like @code{__gmpz}.  The following therefore would be a simple test,
+
+@cindex @code{AC_CHECK_LIB}
+@example
+AC_CHECK_LIB(gmp, __gmpz_init)
+@end example
+
+This just uses the default @code{AC_CHECK_LIB} actions for found or not found,
+but an application that must have GMP would want to generate an error if not
+found.  For example,
+
+@example
+AC_CHECK_LIB(gmp, __gmpz_init, ,
+  [AC_MSG_ERROR([GNU MP not found, see http://gmplib.org/])])
+@end example
+
+If functions added in some particular version of GMP are required, then one of
+those can be used when checking.  For example @code{mpz_mul_si} was added in
+GMP 3.1,
+
+@example
+AC_CHECK_LIB(gmp, __gmpz_mul_si, ,
+  [AC_MSG_ERROR(
+  [GNU MP not found, or not 3.1 or up, see http://gmplib.org/])])
+@end example
+
+An alternative would be to test the version number in @file{gmp.h} using say
+@code{AC_EGREP_CPP}.  That would make it possible to test the exact version,
+if some particular sub-minor release is known to be necessary.
+
+In general it's recommended that applications should simply demand a new
+enough GMP rather than trying to provide supplements for features not
+available in past versions.
+
+Occasionally an application will need or want to know the size of a type at
+configuration or preprocessing time, not just with @code{sizeof} in the code.
+This can be done in the normal way with @code{mp_limb_t} etc, but GMP 4.0 or
+up is best for this, since prior versions needed certain @samp{-D} defines on
+systems using a @code{long long} limb.  The following would suit Autoconf 2.50
+or up,
+
+@example
+AC_CHECK_SIZEOF(mp_limb_t, , [#include <gmp.h>])
+@end example
+
+
+@node Emacs,  , Autoconf, GMP Basics
+@section Emacs
+@cindex Emacs
+@cindex @code{info-lookup-symbol}
+
+@key{C-h C-i} (@code{info-lookup-symbol}) is a good way to find documentation
+on C functions while editing (@pxref{Info Lookup, , Info Documentation Lookup,
+emacs, The Emacs Editor}).
+
+The GMP manual can be included in such lookups by putting the following in
+your @file{.emacs},
+
+@c  This isn't pretty, but there doesn't seem to be a better way (in emacs
+@c  21.2 at least).  info-lookup->mode-value could be used for the "assoc"s,
+@c  but that function isn't documented, whereas info-lookup-alist is.
+@c
+@example
+(eval-after-load "info-look"
+  '(let ((mode-value (assoc 'c-mode (assoc 'symbol info-lookup-alist))))
+     (setcar (nthcdr 3 mode-value)
+             (cons '("(gmp)Function Index" nil "^ -.* " "\\>")
+                   (nth 3 mode-value)))))
+@end example
+
+
+@node Reporting Bugs, Integer Functions, GMP Basics, Top
+@comment  node-name,  next,  previous,  up
+@chapter Reporting Bugs
+@cindex Reporting bugs
+@cindex Bug reporting
+
+If you think you have found a bug in the GMP library, please investigate it
+and report it.  We have made this library available to you, and it is not too
+much to ask you to report the bugs you find.
+
+Before you report a bug, check it's not already addressed in @ref{Known Build
+Problems}, or perhaps @ref{Notes for Particular Systems}.  You may also want
+to check @uref{http://gmplib.org/} for patches for this release.
+
+Please include the following in any report,
+
+@itemize @bullet
+@item
+The GMP version number, and if pre-packaged or patched then say so.
+
+@item
+A test program that makes it possible for us to reproduce the bug.  Include
+instructions on how to run the program.
+
+@item
+A description of what is wrong.  If the results are incorrect, in what way.
+If you get a crash, say so.
+
+@item
+If you get a crash, include a stack backtrace from the debugger if it's
+informative (@samp{where} in @command{gdb}, or @samp{$C} in @command{adb}).
+
+@item
+Please do not send core dumps, executables or @command{strace}s.
+
+@item
+The configuration options you used when building GMP, if any.
+
+@item
+The name of the compiler and its version.  For @command{gcc}, get the version
+with @samp{gcc -v}, otherwise perhaps @samp{what `which cc`}, or similar.
+
+@item
+The output from running @samp{uname -a}.
+
+@item
+The output from running @samp{./config.guess}, and from running
+@samp{./configfsf.guess} (might be the same).
+
+@item
+If the bug is related to @samp{configure}, then the compressed contents of
+@file{config.log}.
+
+@item
+If the bug is related to an @file{asm} file not assembling, then the contents
+of @file{config.m4} and the offending line or lines from the temporary
+@file{mpn/tmp-<file>.s}.
+@end itemize
+
+Please make an effort to produce a self-contained report, with something
+definite that can be tested or debugged.  Vague queries or piecemeal messages
+are difficult to act on and don't help the development effort.
+
+It is not uncommon that an observed problem is actually due to a bug in the
+compiler; the GMP code tends to explore interesting corners in compilers.
+
+If your bug report is good, we will do our best to help you get a corrected
+version of the library; if the bug report is poor, we won't do anything about
+it (except maybe ask you to send a better report).
+
+Send your report to: @email{gmp-bugs@@gmplib.org}.
+
+If you think something in this manual is unclear, or downright incorrect, or if
+the language needs to be improved, please send a note to the same address.
+
+
+@node Integer Functions, Rational Number Functions, Reporting Bugs, Top
+@comment  node-name,  next,  previous,  up
+@chapter Integer Functions
+@cindex Integer functions
+
+This chapter describes the GMP functions for performing integer arithmetic.
+These functions start with the prefix @code{mpz_}.
+
+GMP integers are stored in objects of type @code{mpz_t}.
+
+@menu
+* Initializing Integers::
+* Assigning Integers::
+* Simultaneous Integer Init & Assign::
+* Converting Integers::
+* Integer Arithmetic::
+* Integer Division::
+* Integer Exponentiation::
+* Integer Roots::
+* Number Theoretic Functions::
+* Integer Comparisons::
+* Integer Logic and Bit Fiddling::
+* I/O of Integers::
+* Integer Random Numbers::
+* Integer Import and Export::
+* Miscellaneous Integer Functions::
+* Integer Special Functions::
+@end menu
+
+@node Initializing Integers, Assigning Integers, Integer Functions, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Initialization Functions
+@cindex Integer initialization functions
+@cindex Initialization functions
+
+The functions for integer arithmetic assume that all integer objects are
+initialized.  You do that by calling the function @code{mpz_init}.  For
+example,
+
+@example
+@{
+  mpz_t integ;
+  mpz_init (integ);
+  @dots{}
+  mpz_add (integ, @dots{});
+  @dots{}
+  mpz_sub (integ, @dots{});
+
+  /* Unless the program is about to exit, do ... */
+  mpz_clear (integ);
+@}
+@end example
+
+As you can see, you can store new values any number of times, once an
+object is initialized.
+
+@deftypefun void mpz_init (mpz_t @var{x})
+Initialize @var{x}, and set its value to 0.
+@end deftypefun
+
+@deftypefun void mpz_inits (mpz_t @var{x}, ...)
+Initialize a NULL-terminated list of @code{mpz_t} variables, and set their
+values to 0.
+@end deftypefun
+
+@deftypefun void mpz_init2 (mpz_t @var{x}, mp_bitcnt_t @var{n})
+Initialize @var{x}, with space for @var{n}-bit numbers, and set its value to 0.
+Calling this function instead of @code{mpz_init} or @code{mpz_inits} is never
+necessary; reallocation is handled automatically by GMP when needed.
+
+@var{n} is only the initial space, @var{x} will grow automatically in
+the normal way, if necessary, for subsequent values stored.  @code{mpz_init2}
+makes it possible to avoid such reallocations if a maximum size is known in
+advance.
+@end deftypefun
+
+@deftypefun void mpz_clear (mpz_t @var{x})
+Free the space occupied by @var{x}.  Call this function for all @code{mpz_t}
+variables when you are done with them.
+@end deftypefun
+
+@deftypefun void mpz_clears (mpz_t @var{x}, ...)
+Free the space occupied by a NULL-terminated list of @code{mpz_t} variables.
+@end deftypefun
+
+@deftypefun void mpz_realloc2 (mpz_t @var{x}, mp_bitcnt_t @var{n})
+Change the space allocated for @var{x} to @var{n} bits.  The value in @var{x}
+is preserved if it fits, or is set to 0 if not.
+
+Calling this function is never necessary; reallocation is handled automatically
+by GMP when needed.  But this function can be used to increase the space for a
+variable in order to avoid repeated automatic reallocations, or to decrease it
+to give memory back to the heap.
+@end deftypefun
+
+
+@node Assigning Integers, Simultaneous Integer Init & Assign, Initializing Integers, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Assignment Functions
+@cindex Integer assignment functions
+@cindex Assignment functions
+
+These functions assign new values to already initialized integers
+(@pxref{Initializing Integers}).
+
+@deftypefun void mpz_set (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefunx void mpz_set_ui (mpz_t @var{rop}, unsigned long int @var{op})
+@deftypefunx void mpz_set_si (mpz_t @var{rop}, signed long int @var{op})
+@deftypefunx void mpz_set_d (mpz_t @var{rop}, double @var{op})
+@deftypefunx void mpz_set_q (mpz_t @var{rop}, mpq_t @var{op})
+@deftypefunx void mpz_set_f (mpz_t @var{rop}, mpf_t @var{op})
+Set the value of @var{rop} from @var{op}.
+
+@code{mpz_set_d}, @code{mpz_set_q} and @code{mpz_set_f} truncate @var{op} to
+make it an integer.
+@end deftypefun
+
+@deftypefun int mpz_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})
+Set the value of @var{rop} from @var{str}, a null-terminated C string in base
+@var{base}.  White space is allowed in the string, and is simply ignored.
+
+The @var{base} may vary from 2 to 62, or if @var{base} is 0, then the leading
+characters are used: @code{0x} and @code{0X} for hexadecimal, @code{0b} and
+@code{0B} for binary, @code{0} for octal, or decimal otherwise.
+
+For bases up to 36, case is ignored; upper-case and lower-case letters have
+the same value.  For bases 37 to 62, upper-case letter represent the usual
+10..35 while lower-case letter represent 36..61.
+
+This function returns 0 if the entire string is a valid number in base
+@var{base}.  Otherwise it returns @minus{}1.
+@c
+@c  It turns out that it is not entirely true that this function ignores
+@c  white-space.  It does ignore it between digits, but not after a minus sign
+@c  or within or after ``0x''.  Some thought was given to disallowing all
+@c  whitespace, but that would be an incompatible change, whitespace has been
+@c  documented as ignored ever since GMP 1.
+@c
+@end deftypefun
+
+@deftypefun void mpz_swap (mpz_t @var{rop1}, mpz_t @var{rop2})
+Swap the values @var{rop1} and @var{rop2} efficiently.
+@end deftypefun
+
+
+@node Simultaneous Integer Init & Assign, Converting Integers, Assigning Integers, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Combined Initialization and Assignment Functions
+@cindex Integer assignment functions
+@cindex Assignment functions
+@cindex Integer initialization functions
+@cindex Initialization functions
+
+For convenience, GMP provides a parallel series of initialize-and-set functions
+which initialize the output and then store the value there.  These functions'
+names have the form @code{mpz_init_set@dots{}}
+
+Here is an example of using one:
+
+@example
+@{
+  mpz_t pie;
+  mpz_init_set_str (pie, "3141592653589793238462643383279502884", 10);
+  @dots{}
+  mpz_sub (pie, @dots{});
+  @dots{}
+  mpz_clear (pie);
+@}
+@end example
+
+@noindent
+Once the integer has been initialized by any of the @code{mpz_init_set@dots{}}
+functions, it can be used as the source or destination operand for the ordinary
+integer functions.  Don't use an initialize-and-set function on a variable
+already initialized!
+
+@deftypefun void mpz_init_set (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefunx void mpz_init_set_ui (mpz_t @var{rop}, unsigned long int @var{op})
+@deftypefunx void mpz_init_set_si (mpz_t @var{rop}, signed long int @var{op})
+@deftypefunx void mpz_init_set_d (mpz_t @var{rop}, double @var{op})
+Initialize @var{rop} with limb space and set the initial numeric value from
+@var{op}.
+@end deftypefun
+
+@deftypefun int mpz_init_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})
+Initialize @var{rop} and set its value like @code{mpz_set_str} (see its
+documentation above for details).
+
+If the string is a correct base @var{base} number, the function returns 0;
+if an error occurs it returns @minus{}1.  @var{rop} is initialized even if
+an error occurs.  (I.e., you have to call @code{mpz_clear} for it.)
+@end deftypefun
+
+
+@node Converting Integers, Integer Arithmetic, Simultaneous Integer Init & Assign, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Conversion Functions
+@cindex Integer conversion functions
+@cindex Conversion functions
+
+This section describes functions for converting GMP integers to standard C
+types.  Functions for converting @emph{to} GMP integers are described in
+@ref{Assigning Integers} and @ref{I/O of Integers}.
+
+@deftypefun {unsigned long int} mpz_get_ui (mpz_t @var{op})
+Return the value of @var{op} as an @code{unsigned long}.
+
+If @var{op} is too big to fit an @code{unsigned long} then just the least
+significant bits that do fit are returned.  The sign of @var{op} is ignored,
+only the absolute value is used.
+@end deftypefun
+
+@deftypefun {signed long int} mpz_get_si (mpz_t @var{op})
+If @var{op} fits into a @code{signed long int} return the value of @var{op}.
+Otherwise return the least significant part of @var{op}, with the same sign
+as @var{op}.
+
+If @var{op} is too big to fit in a @code{signed long int}, the returned
+result is probably not very useful.  To find out if the value will fit, use
+the function @code{mpz_fits_slong_p}.
+@end deftypefun
+
+@deftypefun double mpz_get_d (mpz_t @var{op})
+Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
+towards zero).
+
+If the exponent from the conversion is too big, the result is system
+dependent.  An infinity is returned where available.  A hardware overflow trap
+may or may not occur.
+@end deftypefun
+
+@deftypefun double mpz_get_d_2exp (signed long int *@var{exp}, mpz_t @var{op})
+Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
+towards zero), and returning the exponent separately.
+
+The return value is in the range @math{0.5@le{}@GMPabs{@var{d}}<1} and the
+exponent is stored to @code{*@var{exp}}.  @m{@var{d} * 2^{exp}, @var{d} *
+2^@var{exp}} is the (truncated) @var{op} value.  If @var{op} is zero, the
+return is @math{0.0} and 0 is stored to @code{*@var{exp}}.
+
+@cindex @code{frexp}
+This is similar to the standard C @code{frexp} function (@pxref{Normalization
+Functions,,, libc, The GNU C Library Reference Manual}).
+@end deftypefun
+
+@deftypefun {char *} mpz_get_str (char *@var{str}, int @var{base}, mpz_t @var{op})
+Convert @var{op} to a string of digits in base @var{base}.  The base argument
+may vary from 2 to 62 or from @minus{}2 to @minus{}36.
+
+For @var{base} in the range 2..36, digits and lower-case letters are used; for
+@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,
+digits, upper-case letters, and lower-case letters (in that significance order)
+are used.
+
+If @var{str} is @code{NULL}, the result string is allocated using the current
+allocation function (@pxref{Custom Allocation}).  The block will be
+@code{strlen(str)+1} bytes, that being exactly enough for the string and
+null-terminator.
+
+If @var{str} is not @code{NULL}, it should point to a block of storage large
+enough for the result, that being @code{mpz_sizeinbase (@var{op}, @var{base})
++ 2}.  The two extra bytes are for a possible minus sign, and the
+null-terminator.
+
+A pointer to the result string is returned, being either the allocated block,
+or the given @var{str}.
+@end deftypefun
+
+
+@need 2000
+@node Integer Arithmetic, Integer Division, Converting Integers, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Arithmetic Functions
+@cindex Integer arithmetic functions
+@cindex Arithmetic functions
+
+@deftypefun void mpz_add (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefunx void mpz_add_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @math{@var{op1} + @var{op2}}.
+@end deftypefun
+
+@deftypefun void mpz_sub (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefunx void mpz_sub_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefunx void mpz_ui_sub (mpz_t @var{rop}, unsigned long int @var{op1}, mpz_t @var{op2})
+Set @var{rop} to @var{op1} @minus{} @var{op2}.
+@end deftypefun
+
+@deftypefun void mpz_mul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefunx void mpz_mul_si (mpz_t @var{rop}, mpz_t @var{op1}, long int @var{op2})
+@deftypefunx void mpz_mul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.
+@end deftypefun
+
+@deftypefun void mpz_addmul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefunx void mpz_addmul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @math{@var{rop} + @var{op1} @GMPtimes{} @var{op2}}.
+@end deftypefun
+
+@deftypefun void mpz_submul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefunx void mpz_submul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @math{@var{rop} - @var{op1} @GMPtimes{} @var{op2}}.
+@end deftypefun
+
+@deftypefun void mpz_mul_2exp (mpz_t @var{rop}, mpz_t @var{op1}, mp_bitcnt_t @var{op2})
+@cindex Bit shift left
+Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
+@var{op2}}.  This operation can also be defined as a left shift by @var{op2}
+bits.
+@end deftypefun
+
+@deftypefun void mpz_neg (mpz_t @var{rop}, mpz_t @var{op})
+Set @var{rop} to @minus{}@var{op}.
+@end deftypefun
+
+@deftypefun void mpz_abs (mpz_t @var{rop}, mpz_t @var{op})
+Set @var{rop} to the absolute value of @var{op}.
+@end deftypefun
+
+
+@need 2000
+@node Integer Division, Integer Exponentiation, Integer Arithmetic, Integer Functions
+@section Division Functions
+@cindex Integer division functions
+@cindex Division functions
+
+Division is undefined if the divisor is zero.  Passing a zero divisor to the
+division or modulo functions (including the modular powering functions
+@code{mpz_powm} and @code{mpz_powm_ui}), will cause an intentional division by
+zero.  This lets a program handle arithmetic exceptions in these functions the
+same way as for normal C @code{int} arithmetic.
+
+@c  Separate deftypefun groups for cdiv, fdiv and tdiv produce a blank line
+@c  between each, and seem to let tex do a better job of page breaks than an
+@c  @sp 1 in the middle of one big set.
+
+@deftypefun void mpz_cdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_cdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_cdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@maybepagebreak
+@deftypefunx {unsigned long int} mpz_cdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@maybepagebreak
+@deftypefunx void mpz_cdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_cdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@end deftypefun
+
+@deftypefun void mpz_fdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_fdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_fdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@maybepagebreak
+@deftypefunx {unsigned long int} mpz_fdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@maybepagebreak
+@deftypefunx void mpz_fdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_fdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@end deftypefun
+
+@deftypefun void mpz_tdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_tdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_tdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@maybepagebreak
+@deftypefunx {unsigned long int} mpz_tdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@maybepagebreak
+@deftypefunx void mpz_tdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_tdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@cindex Bit shift right
+
+@sp 1
+Divide @var{n} by @var{d}, forming a quotient @var{q} and/or remainder
+@var{r}.  For the @code{2exp} functions, @m{@var{d}=2^b, @var{d}=2^@var{b}}.
+The rounding is in three styles, each suiting different applications.
+
+@itemize @bullet
+@item
+@code{cdiv} rounds @var{q} up towards @m{+\infty, +infinity}, and @var{r} will
+have the opposite sign to @var{d}.  The @code{c} stands for ``ceil''.
+
+@item
+@code{fdiv} rounds @var{q} down towards @m{-\infty, @minus{}infinity}, and
+@var{r} will have the same sign as @var{d}.  The @code{f} stands for
+``floor''.
+
+@item
+@code{tdiv} rounds @var{q} towards zero, and @var{r} will have the same sign
+as @var{n}.  The @code{t} stands for ``truncate''.
+@end itemize
+
+In all cases @var{q} and @var{r} will satisfy
+@m{@var{n}=@var{q}@var{d}+@var{r}, @var{n}=@var{q}*@var{d}+@var{r}}, and
+@var{r} will satisfy @math{0@le{}@GMPabs{@var{r}}<@GMPabs{@var{d}}}.
+
+The @code{q} functions calculate only the quotient, the @code{r} functions
+only the remainder, and the @code{qr} functions calculate both.  Note that for
+@code{qr} the same variable cannot be passed for both @var{q} and @var{r}, or
+results will be unpredictable.
+
+For the @code{ui} variants the return value is the remainder, and in fact
+returning the remainder is all the @code{div_ui} functions do.  For
+@code{tdiv} and @code{cdiv} the remainder can be negative, so for those the
+return value is the absolute value of the remainder.
+
+For the @code{2exp} variants the divisor is @m{2^b,2^@var{b}}.  These
+functions are implemented as right shifts and bit masks, but of course they
+round the same as the other functions.
+
+For positive @var{n} both @code{mpz_fdiv_q_2exp} and @code{mpz_tdiv_q_2exp}
+are simple bitwise right shifts.  For negative @var{n}, @code{mpz_fdiv_q_2exp}
+is effectively an arithmetic right shift treating @var{n} as twos complement
+the same as the bitwise logical functions do, whereas @code{mpz_tdiv_q_2exp}
+effectively treats @var{n} as sign and magnitude.
+@end deftypefun
+
+@deftypefun void mpz_mod (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx {unsigned long int} mpz_mod_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+Set @var{r} to @var{n} @code{mod} @var{d}.  The sign of the divisor is
+ignored; the result is always non-negative.
+
+@code{mpz_mod_ui} is identical to @code{mpz_fdiv_r_ui} above, returning the
+remainder as well as setting @var{r}.  See @code{mpz_fdiv_ui} above if only
+the return value is wanted.
+@end deftypefun
+
+@deftypefun void mpz_divexact (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx void mpz_divexact_ui (mpz_t @var{q}, mpz_t @var{n}, unsigned long @var{d})
+@cindex Exact division functions
+Set @var{q} to @var{n}/@var{d}.  These functions produce correct results only
+when it is known in advance that @var{d} divides @var{n}.
+
+These routines are much faster than the other division functions, and are the
+best choice when exact division is known to occur, for example reducing a
+rational to lowest terms.
+@end deftypefun
+
+@deftypefun int mpz_divisible_p (mpz_t @var{n}, mpz_t @var{d})
+@deftypefunx int mpz_divisible_ui_p (mpz_t @var{n}, unsigned long int @var{d})
+@deftypefunx int mpz_divisible_2exp_p (mpz_t @var{n}, mp_bitcnt_t @var{b})
+@cindex Divisibility functions
+Return non-zero if @var{n} is exactly divisible by @var{d}, or in the case of
+@code{mpz_divisible_2exp_p} by @m{2^b,2^@var{b}}.
+
+@var{n} is divisible by @var{d} if there exists an integer @var{q} satisfying
+@math{@var{n} = @var{q}@GMPmultiply{}@var{d}}.  Unlike the other division
+functions, @math{@var{d}=0} is accepted and following the rule it can be seen
+that only 0 is considered divisible by 0.
+@end deftypefun
+
+@deftypefun int mpz_congruent_p (mpz_t @var{n}, mpz_t @var{c}, mpz_t @var{d})
+@deftypefunx int mpz_congruent_ui_p (mpz_t @var{n}, unsigned long int @var{c}, unsigned long int @var{d})
+@deftypefunx int mpz_congruent_2exp_p (mpz_t @var{n}, mpz_t @var{c}, mp_bitcnt_t @var{b})
+@cindex Divisibility functions
+@cindex Congruence functions
+Return non-zero if @var{n} is congruent to @var{c} modulo @var{d}, or in the
+case of @code{mpz_congruent_2exp_p} modulo @m{2^b,2^@var{b}}.
+
+@var{n} is congruent to @var{c} mod @var{d} if there exists an integer @var{q}
+satisfying @math{@var{n} = @var{c} + @var{q}@GMPmultiply{}@var{d}}.  Unlike
+the other division functions, @math{@var{d}=0} is accepted and following the
+rule it can be seen that @var{n} and @var{c} are considered congruent mod 0
+only when exactly equal.
+@end deftypefun
+
+
+@need 2000
+@node Integer Exponentiation, Integer Roots, Integer Division, Integer Functions
+@section Exponentiation Functions
+@cindex Integer exponentiation functions
+@cindex Exponentiation functions
+@cindex Powering functions
+
+@deftypefun void mpz_powm (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})
+@deftypefunx void mpz_powm_ui (mpz_t @var{rop}, mpz_t @var{base}, unsigned long int @var{exp}, mpz_t @var{mod})
+Set @var{rop} to @m{base^{exp} \bmod mod, (@var{base} raised to @var{exp})
+modulo @var{mod}}.
+
+Negative @var{exp} is supported if an inverse @math{@var{base}^@W{-1} @bmod
+@var{mod}} exists (see @code{mpz_invert} in @ref{Number Theoretic Functions}).
+If an inverse doesn't exist then a divide by zero is raised.
+@end deftypefun
+
+@deftypefun void mpz_powm_sec (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})
+Set @var{rop} to @m{base^{exp} \bmod mod, (@var{base} raised to @var{exp})
+modulo @var{mod}}.
+
+It is required that @math{@var{exp} > 0} and that @var{mod} is odd.
+
+This function is designed to take the same time and have the same cache access
+patterns for any two same-size arguments, assuming that function arguments are
+placed at the same position and that the machine state is identical upon
+function entry.  This function is intended for cryptographic purposes, where
+resilience to side-channel attacks is desired.
+@end deftypefun
+
+@deftypefun void mpz_pow_ui (mpz_t @var{rop}, mpz_t @var{base}, unsigned long int @var{exp})
+@deftypefunx void mpz_ui_pow_ui (mpz_t @var{rop}, unsigned long int @var{base}, unsigned long int @var{exp})
+Set @var{rop} to @m{base^{exp}, @var{base} raised to @var{exp}}.  The case
+@math{0^0} yields 1.
+@end deftypefun
+
+
+@need 2000
+@node Integer Roots, Number Theoretic Functions, Integer Exponentiation, Integer Functions
+@section Root Extraction Functions
+@cindex Integer root functions
+@cindex Root extraction functions
+
+@deftypefun int mpz_root (mpz_t @var{rop}, mpz_t @var{op}, unsigned long int @var{n})
+Set @var{rop} to @m{\lfloor\root n \of {op}\rfloor@C{},} the truncated integer
+part of the @var{n}th root of @var{op}.  Return non-zero if the computation
+was exact, i.e., if @var{op} is @var{rop} to the @var{n}th power.
+@end deftypefun
+
+@deftypefun void mpz_rootrem (mpz_t @var{root}, mpz_t @var{rem}, mpz_t @var{u}, unsigned long int @var{n})
+Set @var{root} to @m{\lfloor\root n \of {u}\rfloor@C{},} the truncated
+integer part of the @var{n}th root of @var{u}.  Set @var{rem} to the
+remainder, @m{(@var{u} - @var{root}^n),
+@var{u}@minus{}@var{root}**@var{n}}.
+@end deftypefun
+
+@deftypefun void mpz_sqrt (mpz_t @var{rop}, mpz_t @var{op})
+Set @var{rop} to @m{\lfloor\sqrt{@var{op}}\rfloor@C{},} the truncated
+integer part of the square root of @var{op}.
+@end deftypefun
+
+@deftypefun void mpz_sqrtrem (mpz_t @var{rop1}, mpz_t @var{rop2}, mpz_t @var{op})
+Set @var{rop1} to @m{\lfloor\sqrt{@var{op}}\rfloor, the truncated integer part
+of the square root of @var{op}}, like @code{mpz_sqrt}.  Set @var{rop2} to the
+remainder @m{(@var{op} - @var{rop1}^2),
+@var{op}@minus{}@var{rop1}*@var{rop1}}, which will be zero if @var{op} is a
+perfect square.
+
+If @var{rop1} and @var{rop2} are the same variable, the results are
+undefined.
+@end deftypefun
+
+@deftypefun int mpz_perfect_power_p (mpz_t @var{op})
+@cindex Perfect power functions
+@cindex Root testing functions
+Return non-zero if @var{op} is a perfect power, i.e., if there exist integers
+@m{a,@var{a}} and @m{b,@var{b}}, with @m{b>1, @var{b}>1}, such that
+@m{@var{op}=a^b, @var{op} equals @var{a} raised to the power @var{b}}.
+
+Under this definition both 0 and 1 are considered to be perfect powers.
+Negative values of @var{op} are accepted, but of course can only be odd
+perfect powers.
+@end deftypefun
+
+@deftypefun int mpz_perfect_square_p (mpz_t @var{op})
+@cindex Perfect square functions
+@cindex Root testing functions
+Return non-zero if @var{op} is a perfect square, i.e., if the square root of
+@var{op} is an integer.  Under this definition both 0 and 1 are considered to
+be perfect squares.
+@end deftypefun
+
+
+@need 2000
+@node Number Theoretic Functions, Integer Comparisons, Integer Roots, Integer Functions
+@section Number Theoretic Functions
+@cindex Number theoretic functions
+
+@deftypefun int mpz_probab_prime_p (mpz_t @var{n}, int @var{reps})
+@cindex Prime testing functions
+@cindex Probable prime testing functions
+Determine whether @var{n} is prime.  Return 2 if @var{n} is definitely prime,
+return 1 if @var{n} is probably prime (without being certain), or return 0 if
+@var{n} is definitely composite.
+
+This function does some trial divisions, then some Miller-Rabin probabilistic
+primality tests.  @var{reps} controls how many such tests are done, 5 to 10 is
+a reasonable number, more will reduce the chances of a composite being
+returned as ``probably prime''.
+
+Miller-Rabin and similar tests can be more properly called compositeness
+tests.  Numbers which fail are known to be composite but those which pass
+might be prime or might be composite.  Only a few composites pass, hence those
+which pass are considered probably prime.
+@end deftypefun
+
+@deftypefun void mpz_nextprime (mpz_t @var{rop}, mpz_t @var{op})
+@cindex Next prime function
+Set @var{rop} to the next prime greater than @var{op}.
+
+This function uses a probabilistic algorithm to identify primes.  For
+practical purposes it's adequate, the chance of a composite passing will be
+extremely small.
+@end deftypefun
+
+@c mpz_prime_p not implemented as of gmp 3.0.
+
+@c @deftypefun int mpz_prime_p (mpz_t @var{n})
+@c Return non-zero if @var{n} is prime and zero if @var{n} is a non-prime.
+@c This function is far slower than @code{mpz_probab_prime_p}, but then it
+@c never returns non-zero for composite numbers.
+
+@c (For practical purposes, using @code{mpz_probab_prime_p} is adequate.
+@c The likelihood of a programming error or hardware malfunction is orders
+@c of magnitudes greater than the likelihood for a composite to pass as a
+@c prime, if the @var{reps} argument is in the suggested range.)
+@c @end deftypefun
+
+@deftypefun void mpz_gcd (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@cindex Greatest common divisor functions
+@cindex GCD functions
+Set @var{rop} to the greatest common divisor of @var{op1} and @var{op2}.  The
+result is always positive even if one or both input operands are negative.
+Except if both inputs are zero; then this function defines @math{gcd(0,0) = 0}.
+@end deftypefun
+
+@deftypefun {unsigned long int} mpz_gcd_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+Compute the greatest common divisor of @var{op1} and @var{op2}.  If
+@var{rop} is not @code{NULL}, store the result there.
+
+If the result is small enough to fit in an @code{unsigned long int}, it is
+returned.  If the result does not fit, 0 is returned, and the result is equal
+to the argument @var{op1}.  Note that the result will always fit if @var{op2}
+is non-zero.
+@end deftypefun
+
+@deftypefun void mpz_gcdext (mpz_t @var{g}, mpz_t @var{s}, mpz_t @var{t}, mpz_t @var{a}, mpz_t @var{b})
+@cindex Extended GCD
+@cindex GCD extended
+Set @var{g} to the greatest common divisor of @var{a} and @var{b}, and in
+addition set @var{s} and @var{t} to coefficients satisfying
+@math{@var{a}@GMPmultiply{}@var{s} + @var{b}@GMPmultiply{}@var{t} = @var{g}}.
+The value in @var{g} is always positive, even if one or both of @var{a} and
+@var{b} are negative (or zero if both inputs are zero).  The values in @var{s}
+and @var{t} are chosen such that normally, @math{@GMPabs{@var{s}} <
+@GMPabs{@var{b}} / (2 @var{g})} and @math{@GMPabs{@var{t}} < @GMPabs{@var{a}}
+/ (2 @var{g})}, and these relations define @var{s} and @var{t} uniquely. There
+are a few exceptional cases:
+
+If @math{@GMPabs{@var{a}} = @GMPabs{@var{b}}}, then @math{@var{s} = 0},
+@math{@var{t} = sgn(@var{b})}.
+
+Otherwise, @math{@var{s} = sgn(@var{a})} if @math{@var{b} = 0} or
+@math{@GMPabs{@var{b}} = 2 @var{g}}, and @math{@var{t} = sgn(@var{b})} if
+@math{@var{a} = 0} or @math{@GMPabs{@var{a}} = 2 @var{g}}.
+
+In all cases, @math{@var{s} = 0} if and only if @math{@var{g} =
+@GMPabs{@var{b}}}, i.e., if @var{b} divides @var{a} or @math{@var{a} = @var{b}
+= 0}.
+
+If @var{t} is @code{NULL} then that value is not computed.
+@end deftypefun
+
+@deftypefun void mpz_lcm (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefunx void mpz_lcm_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long @var{op2})
+@cindex Least common multiple functions
+@cindex LCM functions
+Set @var{rop} to the least common multiple of @var{op1} and @var{op2}.
+@var{rop} is always positive, irrespective of the signs of @var{op1} and
+@var{op2}.  @var{rop} will be zero if either @var{op1} or @var{op2} is zero.
+@end deftypefun
+
+@deftypefun int mpz_invert (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@cindex Modular inverse functions
+@cindex Inverse modulo functions
+Compute the inverse of @var{op1} modulo @var{op2} and put the result in
+@var{rop}.  If the inverse exists, the return value is non-zero and @var{rop}
+will satisfy @math{0 < @var{rop} < @GMPabs{@var{op2}}}.  If an inverse doesn't
+exist the return value is zero and @var{rop} is undefined.  The behaviour of
+this function is undefined when @var{op2} is zero.
+@end deftypefun
+
+@deftypefun int mpz_jacobi (mpz_t @var{a}, mpz_t @var{b})
+@cindex Jacobi symbol functions
+Calculate the Jacobi symbol @m{\left(a \over b\right),
+(@var{a}/@var{b})}.  This is defined only for @var{b} odd.
+@end deftypefun
+
+@deftypefun int mpz_legendre (mpz_t @var{a}, mpz_t @var{p})
+@cindex Legendre symbol functions
+Calculate the Legendre symbol @m{\left(a \over p\right),
+(@var{a}/@var{p})}.  This is defined only for @var{p} an odd positive
+prime, and for such @var{p} it's identical to the Jacobi symbol.
+@end deftypefun
+
+@deftypefun int mpz_kronecker (mpz_t @var{a}, mpz_t @var{b})
+@deftypefunx int mpz_kronecker_si (mpz_t @var{a}, long @var{b})
+@deftypefunx int mpz_kronecker_ui (mpz_t @var{a}, unsigned long @var{b})
+@deftypefunx int mpz_si_kronecker (long @var{a}, mpz_t @var{b})
+@deftypefunx int mpz_ui_kronecker (unsigned long @var{a}, mpz_t @var{b})
+@cindex Kronecker symbol functions
+Calculate the Jacobi symbol @m{\left(a \over b\right),
+(@var{a}/@var{b})} with the Kronecker extension @m{\left(a \over
+2\right) = \left(2 \over a\right), (a/2)=(2/a)} when @math{a} odd, or
+@m{\left(a \over 2\right) = 0, (a/2)=0} when @math{a} even.
+
+When @var{b} is odd the Jacobi symbol and Kronecker symbol are
+identical, so @code{mpz_kronecker_ui} etc can be used for mixed
+precision Jacobi symbols too.
+
+For more information see Henri Cohen section 1.4.2 (@pxref{References}),
+or any number theory textbook.  See also the example program
+@file{demos/qcn.c} which uses @code{mpz_kronecker_ui}.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpz_remove (mpz_t @var{rop}, mpz_t @var{op}, mpz_t @var{f})
+@cindex Remove factor functions
+@cindex Factor removal functions
+Remove all occurrences of the factor @var{f} from @var{op} and store the
+result in @var{rop}.  The return value is how many such occurrences were
+removed.
+@end deftypefun
+
+@deftypefun void mpz_fac_ui (mpz_t @var{rop}, unsigned long int @var{op})
+@cindex Factorial functions
+Set @var{rop} to @var{op}!, the factorial of @var{op}.
+@end deftypefun
+
+@deftypefun void mpz_bin_ui (mpz_t @var{rop}, mpz_t @var{n}, unsigned long int @var{k})
+@deftypefunx void mpz_bin_uiui (mpz_t @var{rop}, unsigned long int @var{n}, @w{unsigned long int @var{k}})
+@cindex Binomial coefficient functions
+Compute the binomial coefficient @m{\left({n}\atop{k}\right), @var{n} over
+@var{k}} and store the result in @var{rop}.  Negative values of @var{n} are
+supported by @code{mpz_bin_ui}, using the identity
+@m{\left({-n}\atop{k}\right) = (-1)^k \left({n+k-1}\atop{k}\right),
+bin(-n@C{}k) = (-1)^k * bin(n+k-1@C{}k)}, see Knuth volume 1 section 1.2.6
+part G.
+@end deftypefun
+
+@deftypefun void mpz_fib_ui (mpz_t @var{fn}, unsigned long int @var{n})
+@deftypefunx void mpz_fib2_ui (mpz_t @var{fn}, mpz_t @var{fnsub1}, unsigned long int @var{n})
+@cindex Fibonacci sequence functions
+@code{mpz_fib_ui} sets @var{fn} to to @m{F_n,F[n]}, the @var{n}'th Fibonacci
+number.  @code{mpz_fib2_ui} sets @var{fn} to @m{F_n,F[n]}, and @var{fnsub1} to
+@m{F_{n-1},F[n-1]}.
+
+These functions are designed for calculating isolated Fibonacci numbers.  When
+a sequence of values is wanted it's best to start with @code{mpz_fib2_ui} and
+iterate the defining @m{F_{n+1} = F_n + F_{n-1}, F[n+1]=F[n]+F[n-1]} or
+similar.
+@end deftypefun
+
+@deftypefun void mpz_lucnum_ui (mpz_t @var{ln}, unsigned long int @var{n})
+@deftypefunx void mpz_lucnum2_ui (mpz_t @var{ln}, mpz_t @var{lnsub1}, unsigned long int @var{n})
+@cindex Lucas number functions
+@code{mpz_lucnum_ui} sets @var{ln} to to @m{L_n,L[n]}, the @var{n}'th Lucas
+number.  @code{mpz_lucnum2_ui} sets @var{ln} to @m{L_n,L[n]}, and @var{lnsub1}
+to @m{L_{n-1},L[n-1]}.
+
+These functions are designed for calculating isolated Lucas numbers.  When a
+sequence of values is wanted it's best to start with @code{mpz_lucnum2_ui} and
+iterate the defining @m{L_{n+1} = L_n + L_{n-1}, L[n+1]=L[n]+L[n-1]} or
+similar.
+
+The Fibonacci numbers and Lucas numbers are related sequences, so it's never
+necessary to call both @code{mpz_fib2_ui} and @code{mpz_lucnum2_ui}.  The
+formulas for going from Fibonacci to Lucas can be found in @ref{Lucas Numbers
+Algorithm}, the reverse is straightforward too.
+@end deftypefun
+
+
+@node Integer Comparisons, Integer Logic and Bit Fiddling, Number Theoretic Functions, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Comparison Functions
+@cindex Integer comparison functions
+@cindex Comparison functions
+
+@deftypefn Function int mpz_cmp (mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefnx Function int mpz_cmp_d (mpz_t @var{op1}, double @var{op2})
+@deftypefnx Macro int mpz_cmp_si (mpz_t @var{op1}, signed long int @var{op2})
+@deftypefnx Macro int mpz_cmp_ui (mpz_t @var{op1}, unsigned long int @var{op2})
+Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
+@var{op2}}, zero if @math{@var{op1} = @var{op2}}, or a negative value if
+@math{@var{op1} < @var{op2}}.
+
+@code{mpz_cmp_ui} and @code{mpz_cmp_si} are macros and will evaluate their
+arguments more than once.  @code{mpz_cmp_d} can be called with an infinity,
+but results are undefined for a NaN.
+@end deftypefn
+
+@deftypefn Function int mpz_cmpabs (mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefnx Function int mpz_cmpabs_d (mpz_t @var{op1}, double @var{op2})
+@deftypefnx Function int mpz_cmpabs_ui (mpz_t @var{op1}, unsigned long int @var{op2})
+Compare the absolute values of @var{op1} and @var{op2}.  Return a positive
+value if @math{@GMPabs{@var{op1}} > @GMPabs{@var{op2}}}, zero if
+@math{@GMPabs{@var{op1}} = @GMPabs{@var{op2}}}, or a negative value if
+@math{@GMPabs{@var{op1}} < @GMPabs{@var{op2}}}.
+
+@code{mpz_cmpabs_d} can be called with an infinity, but results are undefined
+for a NaN.
+@end deftypefn
+
+@deftypefn Macro int mpz_sgn (mpz_t @var{op})
+@cindex Sign tests
+@cindex Integer sign tests
+Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
+@math{-1} if @math{@var{op} < 0}.
+
+This function is actually implemented as a macro.  It evaluates its argument
+multiple times.
+@end deftypefn
+
+
+@node Integer Logic and Bit Fiddling, I/O of Integers, Integer Comparisons, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Logical and Bit Manipulation Functions
+@cindex Logical functions
+@cindex Bit manipulation functions
+@cindex Integer logical functions
+@cindex Integer bit manipulation functions
+
+These functions behave as if twos complement arithmetic were used (although
+sign-magnitude is the actual implementation).  The least significant bit is
+number 0.
+
+@deftypefun void mpz_and (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+Set @var{rop} to @var{op1} bitwise-and @var{op2}.
+@end deftypefun
+
+@deftypefun void mpz_ior (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+Set @var{rop} to @var{op1} bitwise inclusive-or @var{op2}.
+@end deftypefun
+
+@deftypefun void mpz_xor (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+Set @var{rop} to @var{op1} bitwise exclusive-or @var{op2}.
+@end deftypefun
+
+@deftypefun void mpz_com (mpz_t @var{rop}, mpz_t @var{op})
+Set @var{rop} to the one's complement of @var{op}.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpz_popcount (mpz_t @var{op})
+If @math{@var{op}@ge{}0}, return the population count of @var{op}, which is the
+number of 1 bits in the binary representation.  If @math{@var{op}<0}, the
+number of 1s is infinite, and the return value is the largest possible
+@code{mp_bitcnt_t}.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpz_hamdist (mpz_t @var{op1}, mpz_t @var{op2})
+If @var{op1} and @var{op2} are both @math{@ge{}0} or both @math{<0}, return the
+hamming distance between the two operands, which is the number of bit positions
+where @var{op1} and @var{op2} have different bit values.  If one operand is
+@math{@ge{}0} and the other @math{<0} then the number of bits different is
+infinite, and the return value is the largest possible @code{mp_bitcnt_t}.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpz_scan0 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
+@deftypefunx {mp_bitcnt_t} mpz_scan1 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
+@cindex Bit scanning functions
+@cindex Scan bit functions
+Scan @var{op}, starting from bit @var{starting_bit}, towards more significant
+bits, until the first 0 or 1 bit (respectively) is found.  Return the index of
+the found bit.
+
+If the bit at @var{starting_bit} is already what's sought, then
+@var{starting_bit} is returned.
+
+If there's no bit found, then the largest possible @code{mp_bitcnt_t} is
+returned.  This will happen in @code{mpz_scan0} past the end of a negative
+number, or @code{mpz_scan1} past the end of a nonnegative number.
+@end deftypefun
+
+@deftypefun void mpz_setbit (mpz_t @var{rop}, mp_bitcnt_t @var{bit_index})
+Set bit @var{bit_index} in @var{rop}.
+@end deftypefun
+
+@deftypefun void mpz_clrbit (mpz_t @var{rop}, mp_bitcnt_t @var{bit_index})
+Clear bit @var{bit_index} in @var{rop}.
+@end deftypefun
+
+@deftypefun void mpz_combit (mpz_t @var{rop}, mp_bitcnt_t @var{bit_index})
+Complement bit @var{bit_index} in @var{rop}.
+@end deftypefun
+
+@deftypefun int mpz_tstbit (mpz_t @var{op}, mp_bitcnt_t @var{bit_index})
+Test bit @var{bit_index} in @var{op} and return 0 or 1 accordingly.
+@end deftypefun
+
+@node I/O of Integers, Integer Random Numbers, Integer Logic and Bit Fiddling, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Input and Output Functions
+@cindex Integer input and output functions
+@cindex Input functions
+@cindex Output functions
+@cindex I/O functions
+
+Functions that perform input from a stdio stream, and functions that output to
+a stdio stream, of @code{mpz} numbers.  Passing a @code{NULL} pointer for a
+@var{stream} argument to any of these functions will make them read from
+@code{stdin} and write to @code{stdout}, respectively.
+
+When using any of these functions, it is a good idea to include @file{stdio.h}
+before @file{gmp.h}, since that will allow @file{gmp.h} to define prototypes
+for these functions.
+
+See also @ref{Formatted Output} and @ref{Formatted Input}.
+
+@deftypefun size_t mpz_out_str (FILE *@var{stream}, int @var{base}, mpz_t @var{op})
+Output @var{op} on stdio stream @var{stream}, as a string of digits in base
+@var{base}.  The base argument may vary from 2 to 62 or from @minus{}2 to
+@minus{}36.
+
+For @var{base} in the range 2..36, digits and lower-case letters are used; for
+@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,
+digits, upper-case letters, and lower-case letters (in that significance order)
+are used.
+
+Return the number of bytes written, or if an error occurred, return 0.
+@end deftypefun
+
+@deftypefun size_t mpz_inp_str (mpz_t @var{rop}, FILE *@var{stream}, int @var{base})
+Input a possibly white-space preceded string in base @var{base} from stdio
+stream @var{stream}, and put the read integer in @var{rop}.
+
+The @var{base} may vary from 2 to 62, or if @var{base} is 0, then the leading
+characters are used: @code{0x} and @code{0X} for hexadecimal, @code{0b} and
+@code{0B} for binary, @code{0} for octal, or decimal otherwise.
+
+For bases up to 36, case is ignored; upper-case and lower-case letters have
+the same value.  For bases 37 to 62, upper-case letter represent the usual
+10..35 while lower-case letter represent 36..61.
+
+Return the number of bytes read, or if an error occurred, return 0.
+@end deftypefun
+
+@deftypefun size_t mpz_out_raw (FILE *@var{stream}, mpz_t @var{op})
+Output @var{op} on stdio stream @var{stream}, in raw binary format.  The
+integer is written in a portable format, with 4 bytes of size information, and
+that many bytes of limbs.  Both the size and the limbs are written in
+decreasing significance order (i.e., in big-endian).
+
+The output can be read with @code{mpz_inp_raw}.
+
+Return the number of bytes written, or if an error occurred, return 0.
+
+The output of this can not be read by @code{mpz_inp_raw} from GMP 1, because
+of changes necessary for compatibility between 32-bit and 64-bit machines.
+@end deftypefun
+
+@deftypefun size_t mpz_inp_raw (mpz_t @var{rop}, FILE *@var{stream})
+Input from stdio stream @var{stream} in the format written by
+@code{mpz_out_raw}, and put the result in @var{rop}.  Return the number of
+bytes read, or if an error occurred, return 0.
+
+This routine can read the output from @code{mpz_out_raw} also from GMP 1, in
+spite of changes necessary for compatibility between 32-bit and 64-bit
+machines.
+@end deftypefun
+
+
+@need 2000
+@node Integer Random Numbers, Integer Import and Export, I/O of Integers, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Random Number Functions
+@cindex Integer random number functions
+@cindex Random number functions
+
+The random number functions of GMP come in two groups; older function
+that rely on a global state, and newer functions that accept a state
+parameter that is read and modified.  Please see the @ref{Random Number
+Functions} for more information on how to use and not to use random
+number functions.
+
+@deftypefun void mpz_urandomb (mpz_t @var{rop}, gmp_randstate_t @var{state}, mp_bitcnt_t @var{n})
+Generate a uniformly distributed random integer in the range 0 to @m{2^n-1,
+2^@var{n}@minus{}1}, inclusive.
+
+The variable @var{state} must be initialized by calling one of the
+@code{gmp_randinit} functions (@ref{Random State Initialization}) before
+invoking this function.
+@end deftypefun
+
+@deftypefun void mpz_urandomm (mpz_t @var{rop}, gmp_randstate_t @var{state}, mpz_t @var{n})
+Generate a uniform random integer in the range 0 to @math{@var{n}-1},
+inclusive.
+
+The variable @var{state} must be initialized by calling one of the
+@code{gmp_randinit} functions (@ref{Random State Initialization})
+before invoking this function.
+@end deftypefun
+
+@deftypefun void mpz_rrandomb (mpz_t @var{rop}, gmp_randstate_t @var{state}, mp_bitcnt_t @var{n})
+Generate a random integer with long strings of zeros and ones in the
+binary representation.  Useful for testing functions and algorithms,
+since this kind of random numbers have proven to be more likely to
+trigger corner-case bugs.  The random number will be in the range
+0 to @m{2^n-1, 2^@var{n}@minus{}1}, inclusive.
+
+The variable @var{state} must be initialized by calling one of the
+@code{gmp_randinit} functions (@ref{Random State Initialization})
+before invoking this function.
+@end deftypefun
+
+@deftypefun void mpz_random (mpz_t @var{rop}, mp_size_t @var{max_size})
+Generate a random integer of at most @var{max_size} limbs.  The generated
+random number doesn't satisfy any particular requirements of randomness.
+Negative random numbers are generated when @var{max_size} is negative.
+
+This function is obsolete.  Use @code{mpz_urandomb} or
+@code{mpz_urandomm} instead.
+@end deftypefun
+
+@deftypefun void mpz_random2 (mpz_t @var{rop}, mp_size_t @var{max_size})
+Generate a random integer of at most @var{max_size} limbs, with long strings
+of zeros and ones in the binary representation.  Useful for testing functions
+and algorithms, since this kind of random numbers have proven to be more
+likely to trigger corner-case bugs.  Negative random numbers are generated
+when @var{max_size} is negative.
+
+This function is obsolete.  Use @code{mpz_rrandomb} instead.
+@end deftypefun
+
+
+@node Integer Import and Export, Miscellaneous Integer Functions, Integer Random Numbers, Integer Functions
+@section Integer Import and Export
+
+@code{mpz_t} variables can be converted to and from arbitrary words of binary
+data with the following functions.
+
+@deftypefun void mpz_import (mpz_t @var{rop}, size_t @var{count}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, const void *@var{op})
+@cindex Integer import
+@cindex Import
+Set @var{rop} from an array of word data at @var{op}.
+
+The parameters specify the format of the data.  @var{count} many words are
+read, each @var{size} bytes.  @var{order} can be 1 for most significant word
+first or -1 for least significant first.  Within each word @var{endian} can be
+1 for most significant byte first, -1 for least significant first, or 0 for
+the native endianness of the host CPU@.  The most significant @var{nails} bits
+of each word are skipped, this can be 0 to use the full words.
+
+There is no sign taken from the data, @var{rop} will simply be a positive
+integer.  An application can handle any sign itself, and apply it for instance
+with @code{mpz_neg}.
+
+There are no data alignment restrictions on @var{op}, any address is allowed.
+
+Here's an example converting an array of @code{unsigned long} data, most
+significant element first, and host byte order within each value.
+
+@example
+unsigned long  a[20];
+/* Initialize @var{z} and @var{a} */
+mpz_import (z, 20, 1, sizeof(a[0]), 0, 0, a);
+@end example
+
+This example assumes the full @code{sizeof} bytes are used for data in the
+given type, which is usually true, and certainly true for @code{unsigned long}
+everywhere we know of.  However on Cray vector systems it may be noted that
+@code{short} and @code{int} are always stored in 8 bytes (and with
+@code{sizeof} indicating that) but use only 32 or 46 bits.  The @var{nails}
+feature can account for this, by passing for instance
+@code{8*sizeof(int)-INT_BIT}.
+@end deftypefun
+
+@deftypefun {void *} mpz_export (void *@var{rop}, size_t *@var{countp}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, mpz_t @var{op})
+@cindex Integer export
+@cindex Export
+Fill @var{rop} with word data from @var{op}.
+
+The parameters specify the format of the data produced.  Each word will be
+@var{size} bytes and @var{order} can be 1 for most significant word first or
+-1 for least significant first.  Within each word @var{endian} can be 1 for
+most significant byte first, -1 for least significant first, or 0 for the
+native endianness of the host CPU@.  The most significant @var{nails} bits of
+each word are unused and set to zero, this can be 0 to produce full words.
+
+The number of words produced is written to @code{*@var{countp}}, or
+@var{countp} can be @code{NULL} to discard the count.  @var{rop} must have
+enough space for the data, or if @var{rop} is @code{NULL} then a result array
+of the necessary size is allocated using the current GMP allocation function
+(@pxref{Custom Allocation}).  In either case the return value is the
+destination used, either @var{rop} or the allocated block.
+
+If @var{op} is non-zero then the most significant word produced will be
+non-zero.  If @var{op} is zero then the count returned will be zero and
+nothing written to @var{rop}.  If @var{rop} is @code{NULL} in this case, no
+block is allocated, just @code{NULL} is returned.
+
+The sign of @var{op} is ignored, just the absolute value is exported.  An
+application can use @code{mpz_sgn} to get the sign and handle it as desired.
+(@pxref{Integer Comparisons})
+
+There are no data alignment restrictions on @var{rop}, any address is allowed.
+
+When an application is allocating space itself the required size can be
+determined with a calculation like the following.  Since @code{mpz_sizeinbase}
+always returns at least 1, @code{count} here will be at least one, which
+avoids any portability problems with @code{malloc(0)}, though if @code{z} is
+zero no space at all is actually needed (or written).
+
+@example
+numb = 8*size - nail;
+count = (mpz_sizeinbase (z, 2) + numb-1) / numb;
+p = malloc (count * size);
+@end example
+@end deftypefun
+
+
+@need 2000
+@node Miscellaneous Integer Functions, Integer Special Functions, Integer Import and Export, Integer Functions
+@comment  node-name,  next,  previous,  up
+@section Miscellaneous Functions
+@cindex Miscellaneous integer functions
+@cindex Integer miscellaneous functions
+
+@deftypefun int mpz_fits_ulong_p (mpz_t @var{op})
+@deftypefunx int mpz_fits_slong_p (mpz_t @var{op})
+@deftypefunx int mpz_fits_uint_p (mpz_t @var{op})
+@deftypefunx int mpz_fits_sint_p (mpz_t @var{op})
+@deftypefunx int mpz_fits_ushort_p (mpz_t @var{op})
+@deftypefunx int mpz_fits_sshort_p (mpz_t @var{op})
+Return non-zero iff the value of @var{op} fits in an @code{unsigned long int},
+@code{signed long int}, @code{unsigned int}, @code{signed int}, @code{unsigned
+short int}, or @code{signed short int}, respectively.  Otherwise, return zero.
+@end deftypefun
+
+@deftypefn Macro int mpz_odd_p (mpz_t @var{op})
+@deftypefnx Macro int mpz_even_p (mpz_t @var{op})
+Determine whether @var{op} is odd or even, respectively.  Return non-zero if
+yes, zero if no.  These macros evaluate their argument more than once.
+@end deftypefn
+
+@deftypefun size_t mpz_sizeinbase (mpz_t @var{op}, int @var{base})
+@cindex Size in digits
+@cindex Digits in an integer
+Return the size of @var{op} measured in number of digits in the given
+@var{base}.  @var{base} can vary from 2 to 62.  The sign of @var{op} is
+ignored, just the absolute value is used.  The result will be either exact or
+1 too big.  If @var{base} is a power of 2, the result is always exact.  If
+@var{op} is zero the return value is always 1.
+
+This function can be used to determine the space required when converting
+@var{op} to a string.  The right amount of allocation is normally two more
+than the value returned by @code{mpz_sizeinbase}, one extra for a minus sign
+and one for the null-terminator.
+
+@cindex Most significant bit
+It will be noted that @code{mpz_sizeinbase(@var{op},2)} can be used to locate
+the most significant 1 bit in @var{op}, counting from 1.  (Unlike the bitwise
+functions which start from 0, @xref{Integer Logic and Bit Fiddling,, Logical
+and Bit Manipulation Functions}.)
+@end deftypefun
+
+
+@node Integer Special Functions,  , Miscellaneous Integer Functions, Integer Functions
+@section Special Functions
+@cindex Special integer functions
+@cindex Integer special functions
+
+The functions in this section are for various special purposes.  Most
+applications will not need them.
+
+@deftypefun void mpz_array_init (mpz_t @var{integer_array}, mp_size_t @var{array_size}, @w{mp_size_t @var{fixed_num_bits}})
+This is a special type of initialization.  @strong{Fixed} space of
+@var{fixed_num_bits} is allocated to each of the @var{array_size} integers in
+@var{integer_array}.  There is no way to free the storage allocated by this
+function.  Don't call @code{mpz_clear}!
+
+The @var{integer_array} parameter is the first @code{mpz_t} in the array.  For
+example,
+
+@example
+mpz_t  arr[20000];
+mpz_array_init (arr[0], 20000, 512);
+@end example
+
+@c  In case anyone's wondering, yes this parameter style is a bit anomalous,
+@c  it'd probably be nicer if it was "arr" instead of "arr[0]".  Obviously the
+@c  two differ only in the declaration, not the pointer value, but changing is
+@c  not possible since it'd provoke warnings or errors in existing sources.
+
+This function is only intended for programs that create a large number
+of integers and need to reduce memory usage by avoiding the overheads of
+allocating and reallocating lots of small blocks.  In normal programs this
+function is not recommended.
+
+The space allocated to each integer by this function will not be automatically
+increased, unlike the normal @code{mpz_init}, so an application must ensure it
+is sufficient for any value stored.  The following space requirements apply to
+various routines,
+
+@itemize @bullet
+@item
+@code{mpz_abs}, @code{mpz_neg}, @code{mpz_set}, @code{mpz_set_si} and
+@code{mpz_set_ui} need room for the value they store.
+
+@item
+@code{mpz_add}, @code{mpz_add_ui}, @code{mpz_sub} and @code{mpz_sub_ui} need
+room for the larger of the two operands, plus an extra
+@code{mp_bits_per_limb}.
+
+@item
+@code{mpz_mul}, @code{mpz_mul_ui} and @code{mpz_mul_si} need room for the sum
+of the number of bits in their operands, but each rounded up to a multiple of
+@code{mp_bits_per_limb}.
+
+@item
+@code{mpz_swap} can be used between two array variables, but not between an
+array and a normal variable.
+@end itemize
+
+For other functions, or if in doubt, the suggestion is to calculate in a
+regular @code{mpz_init} variable and copy the result to an array variable with
+@code{mpz_set}.
+@end deftypefun
+
+@deftypefun {void *} _mpz_realloc (mpz_t @var{integer}, mp_size_t @var{new_alloc})
+Change the space for @var{integer} to @var{new_alloc} limbs.  The value in
+@var{integer} is preserved if it fits, or is set to 0 if not.  The return
+value is not useful to applications and should be ignored.
+
+@code{mpz_realloc2} is the preferred way to accomplish allocation changes like
+this.  @code{mpz_realloc2} and @code{_mpz_realloc} are the same except that
+@code{_mpz_realloc} takes its size in limbs.
+@end deftypefun
+
+@deftypefun mp_limb_t mpz_getlimbn (mpz_t @var{op}, mp_size_t @var{n})
+Return limb number @var{n} from @var{op}.  The sign of @var{op} is ignored,
+just the absolute value is used.  The least significant limb is number 0.
+
+@code{mpz_size} can be used to find how many limbs make up @var{op}.
+@code{mpz_getlimbn} returns zero if @var{n} is outside the range 0 to
+@code{mpz_size(@var{op})-1}.
+@end deftypefun
+
+@deftypefun size_t mpz_size (mpz_t @var{op})
+Return the size of @var{op} measured in number of limbs.  If @var{op} is zero,
+the returned value will be zero.
+@c (@xref{Nomenclature}, for an explanation of the concept @dfn{limb}.)
+@end deftypefun
+
+
+
+@node Rational Number Functions, Floating-point Functions, Integer Functions, Top
+@comment  node-name,  next,  previous,  up
+@chapter Rational Number Functions
+@cindex Rational number functions
+
+This chapter describes the GMP functions for performing arithmetic on rational
+numbers.  These functions start with the prefix @code{mpq_}.
+
+Rational numbers are stored in objects of type @code{mpq_t}.
+
+All rational arithmetic functions assume operands have a canonical form, and
+canonicalize their result.  The canonical from means that the denominator and
+the numerator have no common factors, and that the denominator is positive.
+Zero has the unique representation 0/1.
+
+Pure assignment functions do not canonicalize the assigned variable.  It is
+the responsibility of the user to canonicalize the assigned variable before
+any arithmetic operations are performed on that variable.
+
+@deftypefun void mpq_canonicalize (mpq_t @var{op})
+Remove any factors that are common to the numerator and denominator of
+@var{op}, and make the denominator positive.
+@end deftypefun
+
+@menu
+* Initializing Rationals::
+* Rational Conversions::
+* Rational Arithmetic::
+* Comparing Rationals::
+* Applying Integer Functions::
+* I/O of Rationals::
+@end menu
+
+@node Initializing Rationals, Rational Conversions, Rational Number Functions, Rational Number Functions
+@comment  node-name,  next,  previous,  up
+@section Initialization and Assignment Functions
+@cindex Rational assignment functions
+@cindex Assignment functions
+@cindex Rational initialization functions
+@cindex Initialization functions
+
+@deftypefun void mpq_init (mpq_t @var{x})
+Initialize @var{x} and set it to 0/1.  Each variable should normally only be
+initialized once, or at least cleared out (using the function @code{mpq_clear})
+between each initialization.
+@end deftypefun
+
+@deftypefun void mpq_inits (mpq_t @var{x}, ...)
+Initialize a NULL-terminated list of @code{mpq_t} variables, and set their
+values to 0/1.
+@end deftypefun
+
+@deftypefun void mpq_clear (mpq_t @var{x})
+Free the space occupied by @var{x}.  Make sure to call this function for all
+@code{mpq_t} variables when you are done with them.
+@end deftypefun
+
+@deftypefun void mpq_clears (mpq_t @var{x}, ...)
+Free the space occupied by a NULL-terminated list of @code{mpq_t} variables.
+@end deftypefun
+
+@deftypefun void mpq_set (mpq_t @var{rop}, mpq_t @var{op})
+@deftypefunx void mpq_set_z (mpq_t @var{rop}, mpz_t @var{op})
+Assign @var{rop} from @var{op}.
+@end deftypefun
+
+@deftypefun void mpq_set_ui (mpq_t @var{rop}, unsigned long int @var{op1}, unsigned long int @var{op2})
+@deftypefunx void mpq_set_si (mpq_t @var{rop}, signed long int @var{op1}, unsigned long int @var{op2})
+Set the value of @var{rop} to @var{op1}/@var{op2}.  Note that if @var{op1} and
+@var{op2} have common factors, @var{rop} has to be passed to
+@code{mpq_canonicalize} before any operations are performed on @var{rop}.
+@end deftypefun
+
+@deftypefun int mpq_set_str (mpq_t @var{rop}, char *@var{str}, int @var{base})
+Set @var{rop} from a null-terminated string @var{str} in the given @var{base}.
+
+The string can be an integer like ``41'' or a fraction like ``41/152''.  The
+fraction must be in canonical form (@pxref{Rational Number Functions}), or if
+not then @code{mpq_canonicalize} must be called.
+
+The numerator and optional denominator are parsed the same as in
+@code{mpz_set_str} (@pxref{Assigning Integers}).  White space is allowed in
+the string, and is simply ignored.  The @var{base} can vary from 2 to 62, or
+if @var{base} is 0 then the leading characters are used: @code{0x} or @code{0X} for hex,
+@code{0b} or @code{0B} for binary,
+@code{0} for octal, or decimal otherwise.  Note that this is done separately
+for the numerator and denominator, so for instance @code{0xEF/100} is 239/100,
+whereas @code{0xEF/0x100} is 239/256.
+
+The return value is 0 if the entire string is a valid number, or @minus{}1 if
+not.
+@end deftypefun
+
+@deftypefun void mpq_swap (mpq_t @var{rop1}, mpq_t @var{rop2})
+Swap the values @var{rop1} and @var{rop2} efficiently.
+@end deftypefun
+
+
+@need 2000
+@node Rational Conversions, Rational Arithmetic, Initializing Rationals, Rational Number Functions
+@comment  node-name,  next,  previous,  up
+@section Conversion Functions
+@cindex Rational conversion functions
+@cindex Conversion functions
+
+@deftypefun double mpq_get_d (mpq_t @var{op})
+Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
+towards zero).
+
+If the exponent from the conversion is too big or too small to fit a
+@code{double} then the result is system dependent.  For too big an infinity is
+returned when available.  For too small @math{0.0} is normally returned.
+Hardware overflow, underflow and denorm traps may or may not occur.
+@end deftypefun
+
+@deftypefun void mpq_set_d (mpq_t @var{rop}, double @var{op})
+@deftypefunx void mpq_set_f (mpq_t @var{rop}, mpf_t @var{op})
+Set @var{rop} to the value of @var{op}.  There is no rounding, this conversion
+is exact.
+@end deftypefun
+
+@deftypefun {char *} mpq_get_str (char *@var{str}, int @var{base}, mpq_t @var{op})
+Convert @var{op} to a string of digits in base @var{base}.  The base may vary
+from 2 to 36.  The string will be of the form @samp{num/den}, or if the
+denominator is 1 then just @samp{num}.
+
+If @var{str} is @code{NULL}, the result string is allocated using the current
+allocation function (@pxref{Custom Allocation}).  The block will be
+@code{strlen(str)+1} bytes, that being exactly enough for the string and
+null-terminator.
+
+If @var{str} is not @code{NULL}, it should point to a block of storage large
+enough for the result, that being
+
+@example
+mpz_sizeinbase (mpq_numref(@var{op}), @var{base})
++ mpz_sizeinbase (mpq_denref(@var{op}), @var{base}) + 3
+@end example
+
+The three extra bytes are for a possible minus sign, possible slash, and the
+null-terminator.
+
+A pointer to the result string is returned, being either the allocated block,
+or the given @var{str}.
+@end deftypefun
+
+
+@node Rational Arithmetic, Comparing Rationals, Rational Conversions, Rational Number Functions
+@comment  node-name,  next,  previous,  up
+@section Arithmetic Functions
+@cindex Rational arithmetic functions
+@cindex Arithmetic functions
+
+@deftypefun void mpq_add (mpq_t @var{sum}, mpq_t @var{addend1}, mpq_t @var{addend2})
+Set @var{sum} to @var{addend1} + @var{addend2}.
+@end deftypefun
+
+@deftypefun void mpq_sub (mpq_t @var{difference}, mpq_t @var{minuend}, mpq_t @var{subtrahend})
+Set @var{difference} to @var{minuend} @minus{} @var{subtrahend}.
+@end deftypefun
+
+@deftypefun void mpq_mul (mpq_t @var{product}, mpq_t @var{multiplier}, mpq_t @var{multiplicand})
+Set @var{product} to @math{@var{multiplier} @GMPtimes{} @var{multiplicand}}.
+@end deftypefun
+
+@deftypefun void mpq_mul_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})
+Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
+@var{op2}}.
+@end deftypefun
+
+@deftypefun void mpq_div (mpq_t @var{quotient}, mpq_t @var{dividend}, mpq_t @var{divisor})
+@cindex Division functions
+Set @var{quotient} to @var{dividend}/@var{divisor}.
+@end deftypefun
+
+@deftypefun void mpq_div_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})
+Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
+@var{op2}}.
+@end deftypefun
+
+@deftypefun void mpq_neg (mpq_t @var{negated_operand}, mpq_t @var{operand})
+Set @var{negated_operand} to @minus{}@var{operand}.
+@end deftypefun
+
+@deftypefun void mpq_abs (mpq_t @var{rop}, mpq_t @var{op})
+Set @var{rop} to the absolute value of @var{op}.
+@end deftypefun
+
+@deftypefun void mpq_inv (mpq_t @var{inverted_number}, mpq_t @var{number})
+Set @var{inverted_number} to 1/@var{number}.  If the new denominator is
+zero, this routine will divide by zero.
+@end deftypefun
+
+@node Comparing Rationals, Applying Integer Functions, Rational Arithmetic, Rational Number Functions
+@comment  node-name,  next,  previous,  up
+@section Comparison Functions
+@cindex Rational comparison functions
+@cindex Comparison functions
+
+@deftypefun int mpq_cmp (mpq_t @var{op1}, mpq_t @var{op2})
+Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
+@var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if
+@math{@var{op1} < @var{op2}}.
+
+To determine if two rationals are equal, @code{mpq_equal} is faster than
+@code{mpq_cmp}.
+@end deftypefun
+
+@deftypefn Macro int mpq_cmp_ui (mpq_t @var{op1}, unsigned long int @var{num2}, unsigned long int @var{den2})
+@deftypefnx Macro int mpq_cmp_si (mpq_t @var{op1}, long int @var{num2}, unsigned long int @var{den2})
+Compare @var{op1} and @var{num2}/@var{den2}.  Return a positive value if
+@math{@var{op1} > @var{num2}/@var{den2}}, zero if @math{@var{op1} =
+@var{num2}/@var{den2}}, and a negative value if @math{@var{op1} <
+@var{num2}/@var{den2}}.
+
+@var{num2} and @var{den2} are allowed to have common factors.
+
+These functions are implemented as a macros and evaluate their arguments
+multiple times.
+@end deftypefn
+
+@deftypefn Macro int mpq_sgn (mpq_t @var{op})
+@cindex Sign tests
+@cindex Rational sign tests
+Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
+@math{-1} if @math{@var{op} < 0}.
+
+This function is actually implemented as a macro.  It evaluates its
+arguments multiple times.
+@end deftypefn
+
+@deftypefun int mpq_equal (mpq_t @var{op1}, mpq_t @var{op2})
+Return non-zero if @var{op1} and @var{op2} are equal, zero if they are
+non-equal.  Although @code{mpq_cmp} can be used for the same purpose, this
+function is much faster.
+@end deftypefun
+
+@node Applying Integer Functions, I/O of Rationals, Comparing Rationals, Rational Number Functions
+@comment  node-name,  next,  previous,  up
+@section Applying Integer Functions to Rationals
+@cindex Rational numerator and denominator
+@cindex Numerator and denominator
+
+The set of @code{mpq} functions is quite small.  In particular, there are few
+functions for either input or output.  The following functions give direct
+access to the numerator and denominator of an @code{mpq_t}.
+
+Note that if an assignment to the numerator and/or denominator could take an
+@code{mpq_t} out of the canonical form described at the start of this chapter
+(@pxref{Rational Number Functions}) then @code{mpq_canonicalize} must be
+called before any other @code{mpq} functions are applied to that @code{mpq_t}.
+
+@deftypefn Macro mpz_t mpq_numref (mpq_t @var{op})
+@deftypefnx Macro mpz_t mpq_denref (mpq_t @var{op})
+Return a reference to the numerator and denominator of @var{op}, respectively.
+The @code{mpz} functions can be used on the result of these macros.
+@end deftypefn
+
+@deftypefun void mpq_get_num (mpz_t @var{numerator}, mpq_t @var{rational})
+@deftypefunx void mpq_get_den (mpz_t @var{denominator}, mpq_t @var{rational})
+@deftypefunx void mpq_set_num (mpq_t @var{rational}, mpz_t @var{numerator})
+@deftypefunx void mpq_set_den (mpq_t @var{rational}, mpz_t @var{denominator})
+Get or set the numerator or denominator of a rational.  These functions are
+equivalent to calling @code{mpz_set} with an appropriate @code{mpq_numref} or
+@code{mpq_denref}.  Direct use of @code{mpq_numref} or @code{mpq_denref} is
+recommended instead of these functions.
+@end deftypefun
+
+
+@need 2000
+@node I/O of Rationals,  , Applying Integer Functions, Rational Number Functions
+@comment  node-name,  next,  previous,  up
+@section Input and Output Functions
+@cindex Rational input and output functions
+@cindex Input functions
+@cindex Output functions
+@cindex I/O functions
+
+Functions that perform input from a stdio stream, and functions that output to
+a stdio stream, of @code{mpq} numbers.  Passing a @code{NULL} pointer for a
+@var{stream} argument to any of these functions will make them read from
+@code{stdin} and write to @code{stdout}, respectively.
+
+When using any of these functions, it is a good idea to include @file{stdio.h}
+before @file{gmp.h}, since that will allow @file{gmp.h} to define prototypes
+for these functions.
+
+See also @ref{Formatted Output} and @ref{Formatted Input}.
+
+@deftypefun size_t mpq_out_str (FILE *@var{stream}, int @var{base}, mpq_t @var{op})
+Output @var{op} on stdio stream @var{stream}, as a string of digits in base
+@var{base}.  The base may vary from 2 to 36.  Output is in the form
+@samp{num/den} or if the denominator is 1 then just @samp{num}.
+
+Return the number of bytes written, or if an error occurred, return 0.
+@end deftypefun
+
+@deftypefun size_t mpq_inp_str (mpq_t @var{rop}, FILE *@var{stream}, int @var{base})
+Read a string of digits from @var{stream} and convert them to a rational in
+@var{rop}.  Any initial white-space characters are read and discarded.  Return
+the number of characters read (including white space), or 0 if a rational
+could not be read.
+
+The input can be a fraction like @samp{17/63} or just an integer like
+@samp{123}.  Reading stops at the first character not in this form, and white
+space is not permitted within the string.  If the input might not be in
+canonical form, then @code{mpq_canonicalize} must be called (@pxref{Rational
+Number Functions}).
+
+The @var{base} can be between 2 and 36, or can be 0 in which case the leading
+characters of the string determine the base, @samp{0x} or @samp{0X} for
+hexadecimal, @samp{0} for octal, or decimal otherwise.  The leading characters
+are examined separately for the numerator and denominator of a fraction, so
+for instance @samp{0x10/11} is @math{16/11}, whereas @samp{0x10/0x11} is
+@math{16/17}.
+@end deftypefun
+
+
+@node Floating-point Functions, Low-level Functions, Rational Number Functions, Top
+@comment  node-name,  next,  previous,  up
+@chapter Floating-point Functions
+@cindex Floating-point functions
+@cindex Float functions
+@cindex User-defined precision
+@cindex Precision of floats
+
+GMP floating point numbers are stored in objects of type @code{mpf_t} and
+functions operating on them have an @code{mpf_} prefix.
+
+The mantissa of each float has a user-selectable precision, limited only by
+available memory.  Each variable has its own precision, and that can be
+increased or decreased at any time.
+
+The exponent of each float is a fixed precision, one machine word on most
+systems.  In the current implementation the exponent is a count of limbs, so
+for example on a 32-bit system this means a range of roughly
+@math{2^@W{-68719476768}} to @math{2^@W{68719476736}}, or on a 64-bit system
+this will be greater.  Note however @code{mpf_get_str} can only return an
+exponent which fits an @code{mp_exp_t} and currently @code{mpf_set_str}
+doesn't accept exponents bigger than a @code{long}.
+
+Each variable keeps a size for the mantissa data actually in use.  This means
+that if a float is exactly represented in only a few bits then only those bits
+will be used in a calculation, even if the selected precision is high.
+
+All calculations are performed to the precision of the destination variable.
+Each function is defined to calculate with ``infinite precision'' followed by
+a truncation to the destination precision, but of course the work done is only
+what's needed to determine a result under that definition.
+
+The precision selected for a variable is a minimum value, GMP may increase it
+a little to facilitate efficient calculation.  Currently this means rounding
+up to a whole limb, and then sometimes having a further partial limb,
+depending on the high limb of the mantissa.  But applications shouldn't be
+concerned by such details.
+
+The mantissa in stored in binary, as might be imagined from the fact
+precisions are expressed in bits.  One consequence of this is that decimal
+fractions like @math{0.1} cannot be represented exactly.  The same is true of
+plain IEEE @code{double} floats.  This makes both highly unsuitable for
+calculations involving money or other values that should be exact decimal
+fractions.  (Suitably scaled integers, or perhaps rationals, are better
+choices.)
+
+@code{mpf} functions and variables have no special notion of infinity or
+not-a-number, and applications must take care not to overflow the exponent or
+results will be unpredictable.  This might change in a future release.
+
+Note that the @code{mpf} functions are @emph{not} intended as a smooth
+extension to IEEE P754 arithmetic.  In particular results obtained on one
+computer often differ from the results on a computer with a different word
+size.
+
+@menu
+* Initializing Floats::
+* Assigning Floats::
+* Simultaneous Float Init & Assign::
+* Converting Floats::
+* Float Arithmetic::
+* Float Comparison::
+* I/O of Floats::
+* Miscellaneous Float Functions::
+@end menu
+
+@node Initializing Floats, Assigning Floats, Floating-point Functions, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Initialization Functions
+@cindex Float initialization functions
+@cindex Initialization functions
+
+@deftypefun void mpf_set_default_prec (mp_bitcnt_t @var{prec})
+Set the default precision to be @strong{at least} @var{prec} bits.  All
+subsequent calls to @code{mpf_init} will use this precision, but previously
+initialized variables are unaffected.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpf_get_default_prec (void)
+Return the default precision actually used.
+@end deftypefun
+
+An @code{mpf_t} object must be initialized before storing the first value in
+it.  The functions @code{mpf_init} and @code{mpf_init2} are used for that
+purpose.
+
+@deftypefun void mpf_init (mpf_t @var{x})
+Initialize @var{x} to 0.  Normally, a variable should be initialized once only
+or at least be cleared, using @code{mpf_clear}, between initializations.  The
+precision of @var{x} is undefined unless a default precision has already been
+established by a call to @code{mpf_set_default_prec}.
+@end deftypefun
+
+@deftypefun void mpf_init2 (mpf_t @var{x}, mp_bitcnt_t @var{prec})
+Initialize @var{x} to 0 and set its precision to be @strong{at least}
+@var{prec} bits.  Normally, a variable should be initialized once only or at
+least be cleared, using @code{mpf_clear}, between initializations.
+@end deftypefun
+
+@deftypefun void mpf_inits (mpf_t @var{x}, ...)
+Initialize a NULL-terminated list of @code{mpf_t} variables, and set their
+values to 0.  The precision of the initialized variables is undefined unless a
+default precision has already been established by a call to
+@code{mpf_set_default_prec}.
+@end deftypefun
+
+@deftypefun void mpf_clear (mpf_t @var{x})
+Free the space occupied by @var{x}.  Make sure to call this function for all
+@code{mpf_t} variables when you are done with them.
+@end deftypefun
+
+@deftypefun void mpf_clears (mpf_t @var{x}, ...)
+Free the space occupied by a NULL-terminated list of @code{mpf_t} variables.
+@end deftypefun
+
+@need 2000
+Here is an example on how to initialize floating-point variables:
+@example
+@{
+  mpf_t x, y;
+  mpf_init (x);           /* use default precision */
+  mpf_init2 (y, 256);     /* precision @emph{at least} 256 bits */
+  @dots{}
+  /* Unless the program is about to exit, do ... */
+  mpf_clear (x);
+  mpf_clear (y);
+@}
+@end example
+
+The following three functions are useful for changing the precision during a
+calculation.  A typical use would be for adjusting the precision gradually in
+iterative algorithms like Newton-Raphson, making the computation precision
+closely match the actual accurate part of the numbers.
+
+@deftypefun {mp_bitcnt_t} mpf_get_prec (mpf_t @var{op})
+Return the current precision of @var{op}, in bits.
+@end deftypefun
+
+@deftypefun void mpf_set_prec (mpf_t @var{rop}, mp_bitcnt_t @var{prec})
+Set the precision of @var{rop} to be @strong{at least} @var{prec} bits.  The
+value in @var{rop} will be truncated to the new precision.
+
+This function requires a call to @code{realloc}, and so should not be used in
+a tight loop.
+@end deftypefun
+
+@deftypefun void mpf_set_prec_raw (mpf_t @var{rop}, mp_bitcnt_t @var{prec})
+Set the precision of @var{rop} to be @strong{at least} @var{prec} bits,
+without changing the memory allocated.
+
+@var{prec} must be no more than the allocated precision for @var{rop}, that
+being the precision when @var{rop} was initialized, or in the most recent
+@code{mpf_set_prec}.
+
+The value in @var{rop} is unchanged, and in particular if it had a higher
+precision than @var{prec} it will retain that higher precision.  New values
+written to @var{rop} will use the new @var{prec}.
+
+Before calling @code{mpf_clear} or the full @code{mpf_set_prec}, another
+@code{mpf_set_prec_raw} call must be made to restore @var{rop} to its original
+allocated precision.  Failing to do so will have unpredictable results.
+
+@code{mpf_get_prec} can be used before @code{mpf_set_prec_raw} to get the
+original allocated precision.  After @code{mpf_set_prec_raw} it reflects the
+@var{prec} value set.
+
+@code{mpf_set_prec_raw} is an efficient way to use an @code{mpf_t} variable at
+different precisions during a calculation, perhaps to gradually increase
+precision in an iteration, or just to use various different precisions for
+different purposes during a calculation.
+@end deftypefun
+
+
+@need 2000
+@node Assigning Floats, Simultaneous Float Init & Assign, Initializing Floats, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Assignment Functions
+@cindex Float assignment functions
+@cindex Assignment functions
+
+These functions assign new values to already initialized floats
+(@pxref{Initializing Floats}).
+
+@deftypefun void mpf_set (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpf_set_ui (mpf_t @var{rop}, unsigned long int @var{op})
+@deftypefunx void mpf_set_si (mpf_t @var{rop}, signed long int @var{op})
+@deftypefunx void mpf_set_d (mpf_t @var{rop}, double @var{op})
+@deftypefunx void mpf_set_z (mpf_t @var{rop}, mpz_t @var{op})
+@deftypefunx void mpf_set_q (mpf_t @var{rop}, mpq_t @var{op})
+Set the value of @var{rop} from @var{op}.
+@end deftypefun
+
+@deftypefun int mpf_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})
+Set the value of @var{rop} from the string in @var{str}.  The string is of the
+form @samp{M@@N} or, if the base is 10 or less, alternatively @samp{MeN}.
+@samp{M} is the mantissa and @samp{N} is the exponent.  The mantissa is always
+in the specified base.  The exponent is either in the specified base or, if
+@var{base} is negative, in decimal.  The decimal point expected is taken from
+the current locale, on systems providing @code{localeconv}.
+
+The argument @var{base} may be in the ranges 2 to 62, or @minus{}62 to
+@minus{}2.  Negative values are used to specify that the exponent is in
+decimal.
+
+For bases up to 36, case is ignored; upper-case and lower-case letters have
+the same value; for bases 37 to 62, upper-case letter represent the usual
+10..35 while lower-case letter represent 36..61.
+
+Unlike the corresponding @code{mpz} function, the base will not be determined
+from the leading characters of the string if @var{base} is 0.  This is so that
+numbers like @samp{0.23} are not interpreted as octal.
+
+White space is allowed in the string, and is simply ignored.  [This is not
+really true; white-space is ignored in the beginning of the string and within
+the mantissa, but not in other places, such as after a minus sign or in the
+exponent.  We are considering changing the definition of this function, making
+it fail when there is any white-space in the input, since that makes a lot of
+sense.  Please tell us your opinion about this change.  Do you really want it
+to accept @nicode{"3 14"} as meaning 314 as it does now?]
+
+This function returns 0 if the entire string is a valid number in base
+@var{base}.  Otherwise it returns @minus{}1.
+@end deftypefun
+
+@deftypefun void mpf_swap (mpf_t @var{rop1}, mpf_t @var{rop2})
+Swap @var{rop1} and @var{rop2} efficiently.  Both the values and the
+precisions of the two variables are swapped.
+@end deftypefun
+
+
+@node Simultaneous Float Init & Assign, Converting Floats, Assigning Floats, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Combined Initialization and Assignment Functions
+@cindex Float assignment functions
+@cindex Assignment functions
+@cindex Float initialization functions
+@cindex Initialization functions
+
+For convenience, GMP provides a parallel series of initialize-and-set functions
+which initialize the output and then store the value there.  These functions'
+names have the form @code{mpf_init_set@dots{}}
+
+Once the float has been initialized by any of the @code{mpf_init_set@dots{}}
+functions, it can be used as the source or destination operand for the ordinary
+float functions.  Don't use an initialize-and-set function on a variable
+already initialized!
+
+@deftypefun void mpf_init_set (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpf_init_set_ui (mpf_t @var{rop}, unsigned long int @var{op})
+@deftypefunx void mpf_init_set_si (mpf_t @var{rop}, signed long int @var{op})
+@deftypefunx void mpf_init_set_d (mpf_t @var{rop}, double @var{op})
+Initialize @var{rop} and set its value from @var{op}.
+
+The precision of @var{rop} will be taken from the active default precision, as
+set by @code{mpf_set_default_prec}.
+@end deftypefun
+
+@deftypefun int mpf_init_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})
+Initialize @var{rop} and set its value from the string in @var{str}.  See
+@code{mpf_set_str} above for details on the assignment operation.
+
+Note that @var{rop} is initialized even if an error occurs.  (I.e., you have to
+call @code{mpf_clear} for it.)
+
+The precision of @var{rop} will be taken from the active default precision, as
+set by @code{mpf_set_default_prec}.
+@end deftypefun
+
+
+@node Converting Floats, Float Arithmetic, Simultaneous Float Init & Assign, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Conversion Functions
+@cindex Float conversion functions
+@cindex Conversion functions
+
+@deftypefun double mpf_get_d (mpf_t @var{op})
+Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
+towards zero).
+
+If the exponent in @var{op} is too big or too small to fit a @code{double}
+then the result is system dependent.  For too big an infinity is returned when
+available.  For too small @math{0.0} is normally returned.  Hardware overflow,
+underflow and denorm traps may or may not occur.
+@end deftypefun
+
+@deftypefun double mpf_get_d_2exp (signed long int *@var{exp}, mpf_t @var{op})
+Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
+towards zero), and with an exponent returned separately.
+
+The return value is in the range @math{0.5@le{}@GMPabs{@var{d}}<1} and the
+exponent is stored to @code{*@var{exp}}.  @m{@var{d} * 2^{exp}, @var{d} *
+2^@var{exp}} is the (truncated) @var{op} value.  If @var{op} is zero, the
+return is @math{0.0} and 0 is stored to @code{*@var{exp}}.
+
+@cindex @code{frexp}
+This is similar to the standard C @code{frexp} function (@pxref{Normalization
+Functions,,, libc, The GNU C Library Reference Manual}).
+@end deftypefun
+
+@deftypefun long mpf_get_si (mpf_t @var{op})
+@deftypefunx {unsigned long} mpf_get_ui (mpf_t @var{op})
+Convert @var{op} to a @code{long} or @code{unsigned long}, truncating any
+fraction part.  If @var{op} is too big for the return type, the result is
+undefined.
+
+See also @code{mpf_fits_slong_p} and @code{mpf_fits_ulong_p}
+(@pxref{Miscellaneous Float Functions}).
+@end deftypefun
+
+@deftypefun {char *} mpf_get_str (char *@var{str}, mp_exp_t *@var{expptr}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})
+Convert @var{op} to a string of digits in base @var{base}.  The base argument
+may vary from 2 to 62 or from @minus{}2 to @minus{}36.  Up to @var{n_digits}
+digits will be generated.  Trailing zeros are not returned.  No more digits
+than can be accurately represented by @var{op} are ever generated.  If
+@var{n_digits} is 0 then that accurate maximum number of digits are generated.
+
+For @var{base} in the range 2..36, digits and lower-case letters are used; for
+@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,
+digits, upper-case letters, and lower-case letters (in that significance order)
+are used.
+
+If @var{str} is @code{NULL}, the result string is allocated using the current
+allocation function (@pxref{Custom Allocation}).  The block will be
+@code{strlen(str)+1} bytes, that being exactly enough for the string and
+null-terminator.
+
+If @var{str} is not @code{NULL}, it should point to a block of
+@math{@var{n_digits} + 2} bytes, that being enough for the mantissa, a
+possible minus sign, and a null-terminator.  When @var{n_digits} is 0 to get
+all significant digits, an application won't be able to know the space
+required, and @var{str} should be @code{NULL} in that case.
+
+The generated string is a fraction, with an implicit radix point immediately
+to the left of the first digit.  The applicable exponent is written through
+the @var{expptr} pointer.  For example, the number 3.1416 would be returned as
+string @nicode{"31416"} and exponent 1.
+
+When @var{op} is zero, an empty string is produced and the exponent returned
+is 0.
+
+A pointer to the result string is returned, being either the allocated block
+or the given @var{str}.
+@end deftypefun
+
+
+@node Float Arithmetic, Float Comparison, Converting Floats, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Arithmetic Functions
+@cindex Float arithmetic functions
+@cindex Arithmetic functions
+
+@deftypefun void mpf_add (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefunx void mpf_add_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @math{@var{op1} + @var{op2}}.
+@end deftypefun
+
+@deftypefun void mpf_sub (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefunx void mpf_ui_sub (mpf_t @var{rop}, unsigned long int @var{op1}, mpf_t @var{op2})
+@deftypefunx void mpf_sub_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @var{op1} @minus{} @var{op2}.
+@end deftypefun
+
+@deftypefun void mpf_mul (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefunx void mpf_mul_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+Set @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.
+@end deftypefun
+
+Division is undefined if the divisor is zero, and passing a zero divisor to the
+divide functions will make these functions intentionally divide by zero.  This
+lets the user handle arithmetic exceptions in these functions in the same
+manner as other arithmetic exceptions.
+
+@deftypefun void mpf_div (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefunx void mpf_ui_div (mpf_t @var{rop}, unsigned long int @var{op1}, mpf_t @var{op2})
+@deftypefunx void mpf_div_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@cindex Division functions
+Set @var{rop} to @var{op1}/@var{op2}.
+@end deftypefun
+
+@deftypefun void mpf_sqrt (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpf_sqrt_ui (mpf_t @var{rop}, unsigned long int @var{op})
+@cindex Root extraction functions
+Set @var{rop} to @m{\sqrt{@var{op}}, the square root of @var{op}}.
+@end deftypefun
+
+@deftypefun void mpf_pow_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@cindex Exponentiation functions
+@cindex Powering functions
+Set @var{rop} to @m{@var{op1}^{op2}, @var{op1} raised to the power @var{op2}}.
+@end deftypefun
+
+@deftypefun void mpf_neg (mpf_t @var{rop}, mpf_t @var{op})
+Set @var{rop} to @minus{}@var{op}.
+@end deftypefun
+
+@deftypefun void mpf_abs (mpf_t @var{rop}, mpf_t @var{op})
+Set @var{rop} to the absolute value of @var{op}.
+@end deftypefun
+
+@deftypefun void mpf_mul_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})
+Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
+@var{op2}}.
+@end deftypefun
+
+@deftypefun void mpf_div_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})
+Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
+@var{op2}}.
+@end deftypefun
+
+@node Float Comparison, I/O of Floats, Float Arithmetic, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Comparison Functions
+@cindex Float comparison functions
+@cindex Comparison functions
+
+@deftypefun int mpf_cmp (mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefunx int mpf_cmp_d (mpf_t @var{op1}, double @var{op2})
+@deftypefunx int mpf_cmp_ui (mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefunx int mpf_cmp_si (mpf_t @var{op1}, signed long int @var{op2})
+Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
+@var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if
+@math{@var{op1} < @var{op2}}.
+
+@code{mpf_cmp_d} can be called with an infinity, but results are undefined for
+a NaN.
+@end deftypefun
+
+@deftypefun int mpf_eq (mpf_t @var{op1}, mpf_t @var{op2}, mp_bitcnt_t op3)
+Return non-zero if the first @var{op3} bits of @var{op1} and @var{op2} are
+equal, zero otherwise.  I.e., test if @var{op1} and @var{op2} are approximately
+equal.
+
+Caution 1: All version of GMP up to version 4.2.4 compared just whole limbs,
+meaning sometimes more than @var{op3} bits, sometimes fewer.
+
+Caution 2: This function will consider XXX11...111 and XX100...000 different,
+even if ... is replaced by a semi-infinite number of bits.  Such numbers are
+really just one ulp off, and should be considered equal.
+@end deftypefun
+
+@deftypefun void mpf_reldiff (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+Compute the relative difference between @var{op1} and @var{op2} and store the
+result in @var{rop}.  This is @math{@GMPabs{@var{op1}-@var{op2}}/@var{op1}}.
+@end deftypefun
+
+@deftypefn Macro int mpf_sgn (mpf_t @var{op})
+@cindex Sign tests
+@cindex Float sign tests
+Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
+@math{-1} if @math{@var{op} < 0}.
+
+This function is actually implemented as a macro.  It evaluates its arguments
+multiple times.
+@end deftypefn
+
+@node I/O of Floats, Miscellaneous Float Functions, Float Comparison, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Input and Output Functions
+@cindex Float input and output functions
+@cindex Input functions
+@cindex Output functions
+@cindex I/O functions
+
+Functions that perform input from a stdio stream, and functions that output to
+a stdio stream, of @code{mpf} numbers.  Passing a @code{NULL} pointer for a
+@var{stream} argument to any of these functions will make them read from
+@code{stdin} and write to @code{stdout}, respectively.
+
+When using any of these functions, it is a good idea to include @file{stdio.h}
+before @file{gmp.h}, since that will allow @file{gmp.h} to define prototypes
+for these functions.
+
+See also @ref{Formatted Output} and @ref{Formatted Input}.
+
+@deftypefun size_t mpf_out_str (FILE *@var{stream}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})
+Print @var{op} to @var{stream}, as a string of digits.  Return the number of
+bytes written, or if an error occurred, return 0.
+
+The mantissa is prefixed with an @samp{0.} and is in the given @var{base},
+which may vary from 2 to 62 or from @minus{}2 to @minus{}36.  An exponent is
+then printed, separated by an @samp{e}, or if the base is greater than 10 then
+by an @samp{@@}.  The exponent is always in decimal.  The decimal point follows
+the current locale, on systems providing @code{localeconv}.
+
+For @var{base} in the range 2..36, digits and lower-case letters are used; for
+@minus{}2..@minus{}36, digits and upper-case letters are used; for 37..62,
+digits, upper-case letters, and lower-case letters (in that significance order)
+are used.
+
+Up to @var{n_digits} will be printed from the mantissa, except that no more
+digits than are accurately representable by @var{op} will be printed.
+@var{n_digits} can be 0 to select that accurate maximum.
+@end deftypefun
+
+@deftypefun size_t mpf_inp_str (mpf_t @var{rop}, FILE *@var{stream}, int @var{base})
+Read a string in base @var{base} from @var{stream}, and put the read float in
+@var{rop}.  The string is of the form @samp{M@@N} or, if the base is 10 or
+less, alternatively @samp{MeN}.  @samp{M} is the mantissa and @samp{N} is the
+exponent.  The mantissa is always in the specified base.  The exponent is
+either in the specified base or, if @var{base} is negative, in decimal.  The
+decimal point expected is taken from the current locale, on systems providing
+@code{localeconv}.
+
+The argument @var{base} may be in the ranges 2 to 36, or @minus{}36 to
+@minus{}2.  Negative values are used to specify that the exponent is in
+decimal.
+
+Unlike the corresponding @code{mpz} function, the base will not be determined
+from the leading characters of the string if @var{base} is 0.  This is so that
+numbers like @samp{0.23} are not interpreted as octal.
+
+Return the number of bytes read, or if an error occurred, return 0.
+@end deftypefun
+
+@c @deftypefun void mpf_out_raw (FILE *@var{stream}, mpf_t @var{float})
+@c Output @var{float} on stdio stream @var{stream}, in raw binary
+@c format.  The float is written in a portable format, with 4 bytes of
+@c size information, and that many bytes of limbs.  Both the size and the
+@c limbs are written in decreasing significance order.
+@c @end deftypefun
+
+@c @deftypefun void mpf_inp_raw (mpf_t @var{float}, FILE *@var{stream})
+@c Input from stdio stream @var{stream} in the format written by
+@c @code{mpf_out_raw}, and put the result in @var{float}.
+@c @end deftypefun
+
+
+@node Miscellaneous Float Functions,  , I/O of Floats, Floating-point Functions
+@comment  node-name,  next,  previous,  up
+@section Miscellaneous Functions
+@cindex Miscellaneous float functions
+@cindex Float miscellaneous functions
+
+@deftypefun void mpf_ceil (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpf_floor (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpf_trunc (mpf_t @var{rop}, mpf_t @var{op})
+@cindex Rounding functions
+@cindex Float rounding functions
+Set @var{rop} to @var{op} rounded to an integer.  @code{mpf_ceil} rounds to the
+next higher integer, @code{mpf_floor} to the next lower, and @code{mpf_trunc}
+to the integer towards zero.
+@end deftypefun
+
+@deftypefun int mpf_integer_p (mpf_t @var{op})
+Return non-zero if @var{op} is an integer.
+@end deftypefun
+
+@deftypefun int mpf_fits_ulong_p (mpf_t @var{op})
+@deftypefunx int mpf_fits_slong_p (mpf_t @var{op})
+@deftypefunx int mpf_fits_uint_p (mpf_t @var{op})
+@deftypefunx int mpf_fits_sint_p (mpf_t @var{op})
+@deftypefunx int mpf_fits_ushort_p (mpf_t @var{op})
+@deftypefunx int mpf_fits_sshort_p (mpf_t @var{op})
+Return non-zero if @var{op} would fit in the respective C data type, when
+truncated to an integer.
+@end deftypefun
+
+@deftypefun void mpf_urandomb (mpf_t @var{rop}, gmp_randstate_t @var{state}, mp_bitcnt_t @var{nbits})
+@cindex Random number functions
+@cindex Float random number functions
+Generate a uniformly distributed random float in @var{rop}, such that @math{0
+@le{} @var{rop} < 1}, with @var{nbits} significant bits in the mantissa or
+less if the precision of @var{rop} is smaller.
+
+The variable @var{state} must be initialized by calling one of the
+@code{gmp_randinit} functions (@ref{Random State Initialization}) before
+invoking this function.
+@end deftypefun
+
+@deftypefun void mpf_random2 (mpf_t @var{rop}, mp_size_t @var{max_size}, mp_exp_t @var{exp})
+Generate a random float of at most @var{max_size} limbs, with long strings of
+zeros and ones in the binary representation.  The exponent of the number is in
+the interval @minus{}@var{exp} to @var{exp} (in limbs).  This function is
+useful for testing functions and algorithms, since these kind of random
+numbers have proven to be more likely to trigger corner-case bugs.  Negative
+random numbers are generated when @var{max_size} is negative.
+@end deftypefun
+
+@c @deftypefun size_t mpf_size (mpf_t @var{op})
+@c Return the size of @var{op} measured in number of limbs.  If @var{op} is
+@c zero, the returned value will be zero.  (@xref{Nomenclature}, for an
+@c explanation of the concept @dfn{limb}.)
+@c
+@c @strong{This function is obsolete.  It will disappear from future GMP
+@c releases.}
+@c @end deftypefun
+
+
+@node Low-level Functions, Random Number Functions, Floating-point Functions, Top
+@comment  node-name,  next,  previous,  up
+@chapter Low-level Functions
+@cindex Low-level functions
+
+This chapter describes low-level GMP functions, used to implement the
+high-level GMP functions, but also intended for time-critical user code.
+
+These functions start with the prefix @code{mpn_}.
+
+@c 1. Some of these function clobber input operands.
+@c
+
+The @code{mpn} functions are designed to be as fast as possible, @strong{not}
+to provide a coherent calling interface.  The different functions have somewhat
+similar interfaces, but there are variations that make them hard to use.  These
+functions do as little as possible apart from the real multiple precision
+computation, so that no time is spent on things that not all callers need.
+
+A source operand is specified by a pointer to the least significant limb and a
+limb count.  A destination operand is specified by just a pointer.  It is the
+responsibility of the caller to ensure that the destination has enough space
+for storing the result.
+
+With this way of specifying operands, it is possible to perform computations on
+subranges of an argument, and store the result into a subrange of a
+destination.
+
+A common requirement for all functions is that each source area needs at least
+one limb.  No size argument may be zero.  Unless otherwise stated, in-place
+operations are allowed where source and destination are the same, but not where
+they only partly overlap.
+
+The @code{mpn} functions are the base for the implementation of the
+@code{mpz_}, @code{mpf_}, and @code{mpq_} functions.
+
+This example adds the number beginning at @var{s1p} and the number beginning at
+@var{s2p} and writes the sum at @var{destp}.  All areas have @var{n} limbs.
+
+@example
+cy = mpn_add_n (destp, s1p, s2p, n)
+@end example
+
+It should be noted that the @code{mpn} functions make no attempt to identify
+high or low zero limbs on their operands, or other special forms.  On random
+data such cases will be unlikely and it'd be wasteful for every function to
+check every time.  An application knowing something about its data can take
+steps to trim or perhaps split its calculations.
+@c
+@c  For reference, within gmp mpz_t operands never have high zero limbs, and
+@c  we rate low zero limbs as unlikely too (or something an application should
+@c  handle).  This is a prime motivation for not stripping zero limbs in say
+@c  mpn_mul_n etc.
+@c
+@c  Other applications doing variable-length calculations will quite likely do
+@c  something similar to mpz.  And even if not then it's highly likely zero
+@c  limb stripping can be done at just a few judicious points, which will be
+@c  more efficient than having lots of mpn functions checking every time.
+
+@sp 1
+@noindent
+In the notation used below, a source operand is identified by the pointer to
+the least significant limb, and the limb count in braces.  For example,
+@{@var{s1p}, @var{s1n}@}.
+
+@deftypefun mp_limb_t mpn_add_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Add @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@}, and write the @var{n}
+least significant limbs of the result to @var{rp}.  Return carry, either 0 or
+1.
+
+This is the lowest-level function for addition.  It is the preferred function
+for addition, since it is written in assembly for most CPUs.  For addition of
+a variable to itself (i.e., @var{s1p} equals @var{s2p}) use @code{mpn_lshift}
+with a count of 1 for optimal speed.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_add_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
+Add @{@var{s1p}, @var{n}@} and @var{s2limb}, and write the @var{n} least
+significant limbs of the result to @var{rp}.  Return carry, either 0 or 1.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_add (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n})
+Add @{@var{s1p}, @var{s1n}@} and @{@var{s2p}, @var{s2n}@}, and write the
+@var{s1n} least significant limbs of the result to @var{rp}.  Return carry,
+either 0 or 1.
+
+This function requires that @var{s1n} is greater than or equal to @var{s2n}.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_sub_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Subtract @{@var{s2p}, @var{n}@} from @{@var{s1p}, @var{n}@}, and write the
+@var{n} least significant limbs of the result to @var{rp}.  Return borrow,
+either 0 or 1.
+
+This is the lowest-level function for subtraction.  It is the preferred
+function for subtraction, since it is written in assembly for most CPUs.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_sub_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
+Subtract @var{s2limb} from @{@var{s1p}, @var{n}@}, and write the @var{n} least
+significant limbs of the result to @var{rp}.  Return borrow, either 0 or 1.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_sub (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n})
+Subtract @{@var{s2p}, @var{s2n}@} from @{@var{s1p}, @var{s1n}@}, and write the
+@var{s1n} least significant limbs of the result to @var{rp}.  Return borrow,
+either 0 or 1.
+
+This function requires that @var{s1n} is greater than or equal to
+@var{s2n}.
+@end deftypefun
+
+@deftypefun void mpn_neg (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
+Perform the negation of @{@var{sp}, @var{n}@}, and write the result to
+@{@var{rp}, @var{n}@}.  Return carry-out.
+@end deftypefun
+
+@deftypefun void mpn_mul_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Multiply @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@}, and write the
+2*@var{n}-limb result to @var{rp}.
+
+The destination has to have space for 2*@var{n} limbs, even if the product's
+most significant limb is zero.  No overlap is permitted between the
+destination and either source.
+
+If the two input operands are the same, use @code{mpn_sqr}.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_mul (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, const mp_limb_t *@var{s2p}, mp_size_t @var{s2n})
+Multiply @{@var{s1p}, @var{s1n}@} and @{@var{s2p}, @var{s2n}@}, and write the
+(@var{s1n}+@var{s2n})-limb result to @var{rp}.  Return the most significant
+limb of the result.
+
+The destination has to have space for @var{s1n} + @var{s2n} limbs, even if the
+product's most significant limb is zero.  No overlap is permitted between the
+destination and either source.
+
+This function requires that @var{s1n} is greater than or equal to @var{s2n}.
+@end deftypefun
+
+@deftypefun void mpn_sqr (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})
+Compute the square of @{@var{s1p}, @var{n}@} and write the 2*@var{n}-limb
+result to @var{rp}.
+
+The destination has to have space for 2*@var{n} limbs, even if the result's
+most significant limb is zero.  No overlap is permitted between the
+destination and the source.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_mul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
+Multiply @{@var{s1p}, @var{n}@} by @var{s2limb}, and write the @var{n} least
+significant limbs of the product to @var{rp}.  Return the most significant
+limb of the product.  @{@var{s1p}, @var{n}@} and @{@var{rp}, @var{n}@} are
+allowed to overlap provided @math{@var{rp} @le{} @var{s1p}}.
+
+This is a low-level function that is a building block for general
+multiplication as well as other operations in GMP@.  It is written in assembly
+for most CPUs.
+
+Don't call this function if @var{s2limb} is a power of 2; use @code{mpn_lshift}
+with a count equal to the logarithm of @var{s2limb} instead, for optimal speed.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_addmul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
+Multiply @{@var{s1p}, @var{n}@} and @var{s2limb}, and add the @var{n} least
+significant limbs of the product to @{@var{rp}, @var{n}@} and write the result
+to @var{rp}.  Return the most significant limb of the product, plus carry-out
+from the addition.
+
+This is a low-level function that is a building block for general
+multiplication as well as other operations in GMP@.  It is written in assembly
+for most CPUs.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_submul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
+Multiply @{@var{s1p}, @var{n}@} and @var{s2limb}, and subtract the @var{n}
+least significant limbs of the product from @{@var{rp}, @var{n}@} and write the
+result to @var{rp}.  Return the most significant limb of the product, plus
+borrow-out from the subtraction.
+
+This is a low-level function that is a building block for general
+multiplication and division as well as other operations in GMP@.  It is written
+in assembly for most CPUs.
+@end deftypefun
+
+@deftypefun void mpn_tdiv_qr (mp_limb_t *@var{qp}, mp_limb_t *@var{rp}, mp_size_t @var{qxn}, const mp_limb_t *@var{np}, mp_size_t @var{nn}, const mp_limb_t *@var{dp}, mp_size_t @var{dn})
+Divide @{@var{np}, @var{nn}@} by @{@var{dp}, @var{dn}@} and put the quotient
+at @{@var{qp}, @var{nn}@minus{}@var{dn}+1@} and the remainder at @{@var{rp},
+@var{dn}@}.  The quotient is rounded towards 0.
+
+No overlap is permitted between arguments, except that @var{np} might equal
+@var{rp}.  The dividend size @var{nn} must be greater than or equal to divisor
+size @var{dn}.  The most significant limb of the divisor must be non-zero.  The
+@var{qxn} operand must be zero.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_divrem (mp_limb_t *@var{r1p}, mp_size_t @var{qxn}, mp_limb_t *@var{rs2p}, mp_size_t @var{rs2n}, const mp_limb_t *@var{s3p}, mp_size_t @var{s3n})
+[This function is obsolete.  Please call @code{mpn_tdiv_qr} instead for best
+performance.]
+
+Divide @{@var{rs2p}, @var{rs2n}@} by @{@var{s3p}, @var{s3n}@}, and write the
+quotient at @var{r1p}, with the exception of the most significant limb, which
+is returned.  The remainder replaces the dividend at @var{rs2p}; it will be
+@var{s3n} limbs long (i.e., as many limbs as the divisor).
+
+In addition to an integer quotient, @var{qxn} fraction limbs are developed, and
+stored after the integral limbs.  For most usages, @var{qxn} will be zero.
+
+It is required that @var{rs2n} is greater than or equal to @var{s3n}.  It is
+required that the most significant bit of the divisor is set.
+
+If the quotient is not needed, pass @var{rs2p} + @var{s3n} as @var{r1p}.  Aside
+from that special case, no overlap between arguments is permitted.
+
+Return the most significant limb of the quotient, either 0 or 1.
+
+The area at @var{r1p} needs to be @var{rs2n} @minus{} @var{s3n} + @var{qxn}
+limbs large.
+@end deftypefun
+
+@deftypefn Function mp_limb_t mpn_divrem_1 (mp_limb_t *@var{r1p}, mp_size_t @var{qxn}, @w{mp_limb_t *@var{s2p}}, mp_size_t @var{s2n}, mp_limb_t @var{s3limb})
+@deftypefnx Macro mp_limb_t mpn_divmod_1 (mp_limb_t *@var{r1p}, mp_limb_t *@var{s2p}, @w{mp_size_t @var{s2n}}, @w{mp_limb_t @var{s3limb}})
+Divide @{@var{s2p}, @var{s2n}@} by @var{s3limb}, and write the quotient at
+@var{r1p}.  Return the remainder.
+
+The integer quotient is written to @{@var{r1p}+@var{qxn}, @var{s2n}@} and in
+addition @var{qxn} fraction limbs are developed and written to @{@var{r1p},
+@var{qxn}@}.  Either or both @var{s2n} and @var{qxn} can be zero.  For most
+usages, @var{qxn} will be zero.
+
+@code{mpn_divmod_1} exists for upward source compatibility and is simply a
+macro calling @code{mpn_divrem_1} with a @var{qxn} of 0.
+
+The areas at @var{r1p} and @var{s2p} have to be identical or completely
+separate, not partially overlapping.
+@end deftypefn
+
+@deftypefun mp_limb_t mpn_divmod (mp_limb_t *@var{r1p}, mp_limb_t *@var{rs2p}, mp_size_t @var{rs2n}, const mp_limb_t *@var{s3p}, mp_size_t @var{s3n})
+[This function is obsolete.  Please call @code{mpn_tdiv_qr} instead for best
+performance.]
+@end deftypefun
+
+@deftypefn Macro mp_limb_t mpn_divexact_by3 (mp_limb_t *@var{rp}, mp_limb_t *@var{sp}, @w{mp_size_t @var{n}})
+@deftypefnx Function mp_limb_t mpn_divexact_by3c (mp_limb_t *@var{rp}, mp_limb_t *@var{sp}, @w{mp_size_t @var{n}}, mp_limb_t @var{carry})
+Divide @{@var{sp}, @var{n}@} by 3, expecting it to divide exactly, and writing
+the result to @{@var{rp}, @var{n}@}.  If 3 divides exactly, the return value is
+zero and the result is the quotient.  If not, the return value is non-zero and
+the result won't be anything useful.
+
+@code{mpn_divexact_by3c} takes an initial carry parameter, which can be the
+return value from a previous call, so a large calculation can be done piece by
+piece from low to high.  @code{mpn_divexact_by3} is simply a macro calling
+@code{mpn_divexact_by3c} with a 0 carry parameter.
+
+These routines use a multiply-by-inverse and will be faster than
+@code{mpn_divrem_1} on CPUs with fast multiplication but slow division.
+
+The source @math{a}, result @math{q}, size @math{n}, initial carry @math{i},
+and return value @math{c} satisfy @m{cb^n+a-i=3q, c*b^n + a-i = 3*q}, where
+@m{b=2\GMPraise{@code{GMP\_NUMB\_BITS}}, b=2^GMP_NUMB_BITS}.  The
+return @math{c} is always 0, 1 or 2, and the initial carry @math{i} must also
+be 0, 1 or 2 (these are both borrows really).  When @math{c=0} clearly
+@math{q=(a-i)/3}.  When @m{c \neq 0, c!=0}, the remainder @math{(a-i) @bmod{}
+3} is given by @math{3-c}, because @math{b @equiv{} 1 @bmod{} 3} (when
+@code{mp_bits_per_limb} is even, which is always so currently).
+@end deftypefn
+
+@deftypefun mp_limb_t mpn_mod_1 (const mp_limb_t *@var{s1p}, mp_size_t @var{s1n}, mp_limb_t @var{s2limb})
+Divide @{@var{s1p}, @var{s1n}@} by @var{s2limb}, and return the remainder.
+@var{s1n} can be zero.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_lshift (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n}, unsigned int @var{count})
+Shift @{@var{sp}, @var{n}@} left by @var{count} bits, and write the result to
+@{@var{rp}, @var{n}@}.  The bits shifted out at the left are returned in the
+least significant @var{count} bits of the return value (the rest of the return
+value is zero).
+
+@var{count} must be in the range 1 to @nicode{mp_bits_per_limb}@minus{}1.  The
+regions @{@var{sp}, @var{n}@} and @{@var{rp}, @var{n}@} may overlap, provided
+@math{@var{rp} @ge{} @var{sp}}.
+
+This function is written in assembly for most CPUs.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_rshift (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n}, unsigned int @var{count})
+Shift @{@var{sp}, @var{n}@} right by @var{count} bits, and write the result to
+@{@var{rp}, @var{n}@}.  The bits shifted out at the right are returned in the
+most significant @var{count} bits of the return value (the rest of the return
+value is zero).
+
+@var{count} must be in the range 1 to @nicode{mp_bits_per_limb}@minus{}1.  The
+regions @{@var{sp}, @var{n}@} and @{@var{rp}, @var{n}@} may overlap, provided
+@math{@var{rp} @le{} @var{sp}}.
+
+This function is written in assembly for most CPUs.
+@end deftypefun
+
+@deftypefun int mpn_cmp (const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Compare @{@var{s1p}, @var{n}@} and @{@var{s2p}, @var{n}@} and return a
+positive value if @math{@var{s1} > @var{s2}}, 0 if they are equal, or a
+negative value if @math{@var{s1} < @var{s2}}.
+@end deftypefun
+
+@deftypefun mp_size_t mpn_gcd (mp_limb_t *@var{rp}, mp_limb_t *@var{xp}, mp_size_t @var{xn}, mp_limb_t *@var{yp}, mp_size_t @var{yn})
+Set @{@var{rp}, @var{retval}@} to the greatest common divisor of @{@var{xp},
+@var{xn}@} and @{@var{yp}, @var{yn}@}.  The result can be up to @var{yn} limbs,
+the return value is the actual number produced.  Both source operands are
+destroyed.
+
+@{@var{xp}, @var{xn}@} must have at least as many bits as @{@var{yp},
+@var{yn}@}.  @{@var{yp}, @var{yn}@} must be odd.  Both operands must have
+non-zero most significant limbs.  No overlap is permitted between @{@var{xp},
+@var{xn}@} and @{@var{yp}, @var{yn}@}.
+@end deftypefun
+
+@deftypefun mp_limb_t mpn_gcd_1 (const mp_limb_t *@var{xp}, mp_size_t @var{xn}, mp_limb_t @var{ylimb})
+Return the greatest common divisor of @{@var{xp}, @var{xn}@} and @var{ylimb}.
+Both operands must be non-zero.
+@end deftypefun
+
+@deftypefun mp_size_t mpn_gcdext (mp_limb_t *@var{gp}, mp_limb_t *@var{sp}, mp_size_t *@var{sn}, mp_limb_t *@var{up}, mp_size_t @var{un}, mp_limb_t *@var{vp}, mp_size_t @var{vn})
+Let @m{U,@var{U}} be defined by @{@var{up}, @var{un}@} and let @m{V,@var{V}} be
+defined by @{@var{vp}, @var{vn}@}.
+
+Compute the greatest common divisor @math{G} of @math{U} and @math{V}.  Compute
+a cofactor @math{S} such that @math{G = US + VT}.  The second cofactor @var{T}
+is not computed but can easily be obtained from @m{(G - US) / V, (@var{G} -
+@var{U}*@var{S}) / @var{V}} (the division will be exact).  It is required that
+@math{@var{un} @ge @var{vn} > 0}, and the most significant
+limb of @{@var{vp}, @var{vn}@} must be non-zero.
+
+@math{S} satisfies @math{S = 1} or @math{@GMPabs{S} < V / (2 G)}. @math{S =
+0} if and only if @math{V} divides @math{U} (i.e., @math{G = V}).
+
+Store @math{G} at @var{gp} and let the return value define its limb count.
+Store @math{S} at @var{sp} and let |*@var{sn}| define its limb count.  @math{S}
+can be negative; when this happens *@var{sn} will be negative.  The area at
+@var{gp} should have room for @var{vn} limbs and the area at @var{sp} should
+have room for @math{@var{vn}+1} limbs.
+
+Both source operands are destroyed.
+
+Compatibility notes: GMP 4.3.0 and 4.3.1 defined @math{S} less strictly.
+Earlier as well as later GMP releases define @math{S} as described here.
+GMP releases before GMP 4.3.0 required additional space for both input and output
+areas. More precisely, the areas @{@var{up}, @math{@var{un}+1}@} and
+@{@var{vp}, @math{@var{vn}+1}@} were destroyed (i.e.@: the operands plus an
+extra limb past the end of each), and the areas pointed to by @var{gp} and
+@var{sp} should each have room for @math{@var{un}+1} limbs.
+@end deftypefun
+
+@deftypefun mp_size_t mpn_sqrtrem (mp_limb_t *@var{r1p}, mp_limb_t *@var{r2p}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
+Compute the square root of @{@var{sp}, @var{n}@} and put the result at
+@{@var{r1p}, @math{@GMPceil{@var{n}/2}}@} and the remainder at @{@var{r2p},
+@var{retval}@}.  @var{r2p} needs space for @var{n} limbs, but the return value
+indicates how many are produced.
+
+The most significant limb of @{@var{sp}, @var{n}@} must be non-zero.  The
+areas @{@var{r1p}, @math{@GMPceil{@var{n}/2}}@} and @{@var{sp}, @var{n}@} must
+be completely separate.  The areas @{@var{r2p}, @var{n}@} and @{@var{sp},
+@var{n}@} must be either identical or completely separate.
+
+If the remainder is not wanted then @var{r2p} can be @code{NULL}, and in this
+case the return value is zero or non-zero according to whether the remainder
+would have been zero or non-zero.
+
+A return value of zero indicates a perfect square.  See also
+@code{mpz_perfect_square_p}.
+@end deftypefun
+
+@deftypefun mp_size_t mpn_get_str (unsigned char *@var{str}, int @var{base}, mp_limb_t *@var{s1p}, mp_size_t @var{s1n})
+Convert @{@var{s1p}, @var{s1n}@} to a raw unsigned char array at @var{str} in
+base @var{base}, and return the number of characters produced.  There may be
+leading zeros in the string.  The string is not in ASCII; to convert it to
+printable format, add the ASCII codes for @samp{0} or @samp{A}, depending on
+the base and range.  @var{base} can vary from 2 to 256.
+
+The most significant limb of the input @{@var{s1p}, @var{s1n}@} must be
+non-zero.  The input @{@var{s1p}, @var{s1n}@} is clobbered, except when
+@var{base} is a power of 2, in which case it's unchanged.
+
+The area at @var{str} has to have space for the largest possible number
+represented by a @var{s1n} long limb array, plus one extra character.
+@end deftypefun
+
+@deftypefun mp_size_t mpn_set_str (mp_limb_t *@var{rp}, const unsigned char *@var{str}, size_t @var{strsize}, int @var{base})
+Convert bytes @{@var{str},@var{strsize}@} in the given @var{base} to limbs at
+@var{rp}.
+
+@math{@var{str}[0]} is the most significant byte and
+@math{@var{str}[@var{strsize}-1]} is the least significant.  Each byte should
+be a value in the range 0 to @math{@var{base}-1}, not an ASCII character.
+@var{base} can vary from 2 to 256.
+
+The return value is the number of limbs written to @var{rp}.  If the most
+significant input byte is non-zero then the high limb at @var{rp} will be
+non-zero, and only that exact number of limbs will be required there.
+
+If the most significant input byte is zero then there may be high zero limbs
+written to @var{rp} and included in the return value.
+
+@var{strsize} must be at least 1, and no overlap is permitted between
+@{@var{str},@var{strsize}@} and the result at @var{rp}.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpn_scan0 (const mp_limb_t *@var{s1p}, mp_bitcnt_t @var{bit})
+Scan @var{s1p} from bit position @var{bit} for the next clear bit.
+
+It is required that there be a clear bit within the area at @var{s1p} at or
+beyond bit position @var{bit}, so that the function has something to return.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpn_scan1 (const mp_limb_t *@var{s1p}, mp_bitcnt_t @var{bit})
+Scan @var{s1p} from bit position @var{bit} for the next set bit.
+
+It is required that there be a set bit within the area at @var{s1p} at or
+beyond bit position @var{bit}, so that the function has something to return.
+@end deftypefun
+
+@deftypefun void mpn_random (mp_limb_t *@var{r1p}, mp_size_t @var{r1n})
+@deftypefunx void mpn_random2 (mp_limb_t *@var{r1p}, mp_size_t @var{r1n})
+Generate a random number of length @var{r1n} and store it at @var{r1p}.  The
+most significant limb is always non-zero.  @code{mpn_random} generates
+uniformly distributed limb data, @code{mpn_random2} generates long strings of
+zeros and ones in the binary representation.
+
+@code{mpn_random2} is intended for testing the correctness of the @code{mpn}
+routines.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpn_popcount (const mp_limb_t *@var{s1p}, mp_size_t @var{n})
+Count the number of set bits in @{@var{s1p}, @var{n}@}.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpn_hamdist (const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Compute the hamming distance between @{@var{s1p}, @var{n}@} and @{@var{s2p},
+@var{n}@}, which is the number of bit positions where the two operands have
+different bit values.
+@end deftypefun
+
+@deftypefun int mpn_perfect_square_p (const mp_limb_t *@var{s1p}, mp_size_t @var{n})
+Return non-zero iff @{@var{s1p}, @var{n}@} is a perfect square.
+The most significant limb of the input @{@var{s1p}, @var{n}@} must be
+non-zero.
+@end deftypefun
+
+@deftypefun void mpn_and_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical and of @{@var{s1p}, @var{n}@} and @{@var{s2p},
+@var{n}@}, and write the result to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_ior_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical inclusive or of @{@var{s1p}, @var{n}@} and
+@{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_xor_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical exclusive or of @{@var{s1p}, @var{n}@} and
+@{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_andn_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical and of @{@var{s1p}, @var{n}@} and the bitwise
+complement of @{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_iorn_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical inclusive or of @{@var{s1p}, @var{n}@} and the bitwise
+complement of @{@var{s2p}, @var{n}@}, and write the result to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_nand_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical and of @{@var{s1p}, @var{n}@} and @{@var{s2p},
+@var{n}@}, and write the bitwise complement of the result to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_nior_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical inclusive or of @{@var{s1p}, @var{n}@} and
+@{@var{s2p}, @var{n}@}, and write the bitwise complement of the result to
+@{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_xnor_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, const mp_limb_t *@var{s2p}, mp_size_t @var{n})
+Perform the bitwise logical exclusive or of @{@var{s1p}, @var{n}@} and
+@{@var{s2p}, @var{n}@}, and write the bitwise complement of the result to
+@{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_com (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
+Perform the bitwise complement of @{@var{sp}, @var{n}@}, and write the result
+to @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@deftypefun void mpn_copyi (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})
+Copy from @{@var{s1p}, @var{n}@} to @{@var{rp}, @var{n}@}, increasingly.
+@end deftypefun
+
+@deftypefun void mpn_copyd (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})
+Copy from @{@var{s1p}, @var{n}@} to @{@var{rp}, @var{n}@}, decreasingly.
+@end deftypefun
+
+@deftypefun void mpn_zero (mp_limb_t *@var{rp}, mp_size_t @var{n})
+Zero @{@var{rp}, @var{n}@}.
+@end deftypefun
+
+@sp 1
+@section Nails
+@cindex Nails
+
+@strong{Everything in this section is highly experimental and may disappear or
+be subject to incompatible changes in a future version of GMP.}
+
+Nails are an experimental feature whereby a few bits are left unused at the
+top of each @code{mp_limb_t}.  This can significantly improve carry handling
+on some processors.
+
+All the @code{mpn} functions accepting limb data will expect the nail bits to
+be zero on entry, and will return data with the nails similarly all zero.
+This applies both to limb vectors and to single limb arguments.
+
+Nails can be enabled by configuring with @samp{--enable-nails}.  By default
+the number of bits will be chosen according to what suits the host processor,
+but a particular number can be selected with @samp{--enable-nails=N}.
+
+At the mpn level, a nail build is neither source nor binary compatible with a
+non-nail build, strictly speaking.  But programs acting on limbs only through
+the mpn functions are likely to work equally well with either build, and
+judicious use of the definitions below should make any program compatible with
+either build, at the source level.
+
+For the higher level routines, meaning @code{mpz} etc, a nail build should be
+fully source and binary compatible with a non-nail build.
+
+@defmac GMP_NAIL_BITS
+@defmacx GMP_NUMB_BITS
+@defmacx GMP_LIMB_BITS
+@code{GMP_NAIL_BITS} is the number of nail bits, or 0 when nails are not in
+use.  @code{GMP_NUMB_BITS} is the number of data bits in a limb.
+@code{GMP_LIMB_BITS} is the total number of bits in an @code{mp_limb_t}.  In
+all cases
+
+@example
+GMP_LIMB_BITS == GMP_NAIL_BITS + GMP_NUMB_BITS
+@end example
+@end defmac
+
+@defmac GMP_NAIL_MASK
+@defmacx GMP_NUMB_MASK
+Bit masks for the nail and number parts of a limb.  @code{GMP_NAIL_MASK} is 0
+when nails are not in use.
+
+@code{GMP_NAIL_MASK} is not often needed, since the nail part can be obtained
+with @code{x >> GMP_NUMB_BITS}, and that means one less large constant, which
+can help various RISC chips.
+@end defmac
+
+@defmac GMP_NUMB_MAX
+The maximum value that can be stored in the number part of a limb.  This is
+the same as @code{GMP_NUMB_MASK}, but can be used for clarity when doing
+comparisons rather than bit-wise operations.
+@end defmac
+
+The term ``nails'' comes from finger or toe nails, which are at the ends of a
+limb (arm or leg).  ``numb'' is short for number, but is also how the
+developers felt after trying for a long time to come up with sensible names
+for these things.
+
+In the future (the distant future most likely) a non-zero nail might be
+permitted, giving non-unique representations for numbers in a limb vector.
+This would help vector processors since carries would only ever need to
+propagate one or two limbs.
+
+
+@node Random Number Functions, Formatted Output, Low-level Functions, Top
+@chapter Random Number Functions
+@cindex Random number functions
+
+Sequences of pseudo-random numbers in GMP are generated using a variable of
+type @code{gmp_randstate_t}, which holds an algorithm selection and a current
+state.  Such a variable must be initialized by a call to one of the
+@code{gmp_randinit} functions, and can be seeded with one of the
+@code{gmp_randseed} functions.
+
+The functions actually generating random numbers are described in @ref{Integer
+Random Numbers}, and @ref{Miscellaneous Float Functions}.
+
+The older style random number functions don't accept a @code{gmp_randstate_t}
+parameter but instead share a global variable of that type.  They use a
+default algorithm and are currently not seeded (though perhaps that will
+change in the future).  The new functions accepting a @code{gmp_randstate_t}
+are recommended for applications that care about randomness.
+
+@menu
+* Random State Initialization::
+* Random State Seeding::
+* Random State Miscellaneous::
+@end menu
+
+@node Random State Initialization, Random State Seeding, Random Number Functions, Random Number Functions
+@section Random State Initialization
+@cindex Random number state
+@cindex Initialization functions
+
+@deftypefun void gmp_randinit_default (gmp_randstate_t @var{state})
+Initialize @var{state} with a default algorithm.  This will be a compromise
+between speed and randomness, and is recommended for applications with no
+special requirements.  Currently this is @code{gmp_randinit_mt}.
+@end deftypefun
+
+@deftypefun void gmp_randinit_mt (gmp_randstate_t @var{state})
+@cindex Mersenne twister random numbers
+Initialize @var{state} for a Mersenne Twister algorithm.  This algorithm is
+fast and has good randomness properties.
+@end deftypefun
+
+@deftypefun void gmp_randinit_lc_2exp (gmp_randstate_t @var{state}, mpz_t @var{a}, @w{unsigned long @var{c}}, @w{mp_bitcnt_t @var{m2exp}})
+@cindex Linear congruential random numbers
+Initialize @var{state} with a linear congruential algorithm @m{X = (@var{a}X +
+@var{c}) @bmod 2^{m2exp}, X = (@var{a}*X + @var{c}) mod 2^@var{m2exp}}.
+
+The low bits of @math{X} in this algorithm are not very random.  The least
+significant bit will have a period no more than 2, and the second bit no more
+than 4, etc.  For this reason only the high half of each @math{X} is actually
+used.
+
+When a random number of more than @math{@var{m2exp}/2} bits is to be
+generated, multiple iterations of the recurrence are used and the results
+concatenated.
+@end deftypefun
+
+@deftypefun int gmp_randinit_lc_2exp_size (gmp_randstate_t @var{state}, mp_bitcnt_t @var{size})
+@cindex Linear congruential random numbers
+Initialize @var{state} for a linear congruential algorithm as per
+@code{gmp_randinit_lc_2exp}.  @var{a}, @var{c} and @var{m2exp} are selected
+from a table, chosen so that @var{size} bits (or more) of each @math{X} will
+be used, i.e.@: @math{@var{m2exp}/2 @ge{} @var{size}}.
+
+If successful the return value is non-zero.  If @var{size} is bigger than the
+table data provides then the return value is zero.  The maximum @var{size}
+currently supported is 128.
+@end deftypefun
+
+@deftypefun void gmp_randinit_set (gmp_randstate_t @var{rop}, gmp_randstate_t @var{op})
+Initialize @var{rop} with a copy of the algorithm and state from @var{op}.
+@end deftypefun
+
+@c  Although gmp_randinit, gmp_errno and related constants are obsolete, we
+@c  still put @findex entries for them, since they're still documented and
+@c  someone might be looking them up when perusing old application code.
+
+@deftypefun void gmp_randinit (gmp_randstate_t @var{state}, @w{gmp_randalg_t @var{alg}}, @dots{})
+@strong{This function is obsolete.}
+
+@findex GMP_RAND_ALG_LC
+@findex GMP_RAND_ALG_DEFAULT
+Initialize @var{state} with an algorithm selected by @var{alg}.  The only
+choice is @code{GMP_RAND_ALG_LC}, which is @code{gmp_randinit_lc_2exp_size}
+described above.  A third parameter of type @code{unsigned long} is required,
+this is the @var{size} for that function.  @code{GMP_RAND_ALG_DEFAULT} or 0
+are the same as @code{GMP_RAND_ALG_LC}.
+
+@c  For reference, this is the only place gmp_errno has been documented, and
+@c  due to being non thread safe we won't be adding to it's uses.
+@findex gmp_errno
+@findex GMP_ERROR_UNSUPPORTED_ARGUMENT
+@findex GMP_ERROR_INVALID_ARGUMENT
+@code{gmp_randinit} sets bits in the global variable @code{gmp_errno} to
+indicate an error.  @code{GMP_ERROR_UNSUPPORTED_ARGUMENT} if @var{alg} is
+unsupported, or @code{GMP_ERROR_INVALID_ARGUMENT} if the @var{size} parameter
+is too big.  It may be noted this error reporting is not thread safe (a good
+reason to use @code{gmp_randinit_lc_2exp_size} instead).
+@end deftypefun
+
+@deftypefun void gmp_randclear (gmp_randstate_t @var{state})
+Free all memory occupied by @var{state}.
+@end deftypefun
+
+
+@node Random State Seeding, Random State Miscellaneous, Random State Initialization, Random Number Functions
+@section Random State Seeding
+@cindex Random number seeding
+@cindex Seeding random numbers
+
+@deftypefun void gmp_randseed (gmp_randstate_t @var{state}, mpz_t @var{seed})
+@deftypefunx void gmp_randseed_ui (gmp_randstate_t @var{state}, @w{unsigned long int @var{seed}})
+Set an initial seed value into @var{state}.
+
+The size of a seed determines how many different sequences of random numbers
+that it's possible to generate.  The ``quality'' of the seed is the randomness
+of a given seed compared to the previous seed used, and this affects the
+randomness of separate number sequences.  The method for choosing a seed is
+critical if the generated numbers are to be used for important applications,
+such as generating cryptographic keys.
+
+Traditionally the system time has been used to seed, but care needs to be
+taken with this.  If an application seeds often and the resolution of the
+system clock is low, then the same sequence of numbers might be repeated.
+Also, the system time is quite easy to guess, so if unpredictability is
+required then it should definitely not be the only source for the seed value.
+On some systems there's a special device @file{/dev/random} which provides
+random data better suited for use as a seed.
+@end deftypefun
+
+
+@node Random State Miscellaneous,  , Random State Seeding, Random Number Functions
+@section Random State Miscellaneous
+
+@deftypefun {unsigned long} gmp_urandomb_ui (gmp_randstate_t @var{state}, unsigned long @var{n})
+Return a uniformly distributed random number of @var{n} bits, i.e.@: in the
+range 0 to @m{2^n-1,2^@var{n}-1} inclusive.  @var{n} must be less than or
+equal to the number of bits in an @code{unsigned long}.
+@end deftypefun
+
+@deftypefun {unsigned long} gmp_urandomm_ui (gmp_randstate_t @var{state}, unsigned long @var{n})
+Return a uniformly distributed random number in the range 0 to
+@math{@var{n}-1}, inclusive.
+@end deftypefun
+
+
+@node Formatted Output, Formatted Input, Random Number Functions, Top
+@chapter Formatted Output
+@cindex Formatted output
+@cindex @code{printf} formatted output
+
+@menu
+* Formatted Output Strings::
+* Formatted Output Functions::
+* C++ Formatted Output::
+@end menu
+
+@node Formatted Output Strings, Formatted Output Functions, Formatted Output, Formatted Output
+@section Format Strings
+
+@code{gmp_printf} and friends accept format strings similar to the standard C
+@code{printf} (@pxref{Formatted Output,, Formatted Output, libc, The GNU C
+Library Reference Manual}).  A format specification is of the form
+
+@example
+% [flags] [width] [.[precision]] [type] conv
+@end example
+
+GMP adds types @samp{Z}, @samp{Q} and @samp{F} for @code{mpz_t}, @code{mpq_t}
+and @code{mpf_t} respectively, @samp{M} for @code{mp_limb_t}, and @samp{N} for
+an @code{mp_limb_t} array.  @samp{Z}, @samp{Q}, @samp{M} and @samp{N} behave
+like integers.  @samp{Q} will print a @samp{/} and a denominator, if needed.
+@samp{F} behaves like a float.  For example,
+
+@example
+mpz_t z;
+gmp_printf ("%s is an mpz %Zd\n", "here", z);
+
+mpq_t q;
+gmp_printf ("a hex rational: %#40Qx\n", q);
+
+mpf_t f;
+int   n;
+gmp_printf ("fixed point mpf %.*Ff with %d digits\n", n, f, n);
+
+mp_limb_t l;
+gmp_printf ("limb %Mu\n", l);
+
+const mp_limb_t *ptr;
+mp_size_t       size;
+gmp_printf ("limb array %Nx\n", ptr, size);
+@end example
+
+For @samp{N} the limbs are expected least significant first, as per the
+@code{mpn} functions (@pxref{Low-level Functions}).  A negative size can be
+given to print the value as a negative.
+
+All the standard C @code{printf} types behave the same as the C library
+@code{printf}, and can be freely intermixed with the GMP extensions.  In the
+current implementation the standard parts of the format string are simply
+handed to @code{printf} and only the GMP extensions handled directly.
+
+The flags accepted are as follows.  GLIBC style @nisamp{'} is only for the
+standard C types (not the GMP types), and only if the C library supports it.
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{0} @tab pad with zeros (rather than spaces)
+@item @nicode{#} @tab show the base with @samp{0x}, @samp{0X} or @samp{0}
+@item @nicode{+} @tab always show a sign
+@item (space)    @tab show a space or a @samp{-} sign
+@item @nicode{'} @tab group digits, GLIBC style (not GMP types)
+@end multitable
+@end quotation
+
+The optional width and precision can be given as a number within the format
+string, or as a @samp{*} to take an extra parameter of type @code{int}, the
+same as the standard @code{printf}.
+
+The standard types accepted are as follows.  @samp{h} and @samp{l} are
+portable, the rest will depend on the compiler (or include files) for the type
+and the C library for the output.
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{h}  @tab @nicode{short}
+@item @nicode{hh} @tab @nicode{char}
+@item @nicode{j}  @tab @nicode{intmax_t} or @nicode{uintmax_t}
+@item @nicode{l}  @tab @nicode{long} or @nicode{wchar_t}
+@item @nicode{ll} @tab @nicode{long long}
+@item @nicode{L}  @tab @nicode{long double}
+@item @nicode{q}  @tab @nicode{quad_t} or @nicode{u_quad_t}
+@item @nicode{t}  @tab @nicode{ptrdiff_t}
+@item @nicode{z}  @tab @nicode{size_t}
+@end multitable
+@end quotation
+
+@noindent
+The GMP types are
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{F}  @tab @nicode{mpf_t}, float conversions
+@item @nicode{Q}  @tab @nicode{mpq_t}, integer conversions
+@item @nicode{M}  @tab @nicode{mp_limb_t}, integer conversions
+@item @nicode{N}  @tab @nicode{mp_limb_t} array, integer conversions
+@item @nicode{Z}  @tab @nicode{mpz_t}, integer conversions
+@end multitable
+@end quotation
+
+The conversions accepted are as follows.  @samp{a} and @samp{A} are always
+supported for @code{mpf_t} but depend on the C library for standard C float
+types.  @samp{m} and @samp{p} depend on the C library.
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{a} @nicode{A} @tab hex floats, C99 style
+@item @nicode{c}            @tab character
+@item @nicode{d}            @tab decimal integer
+@item @nicode{e} @nicode{E} @tab scientific format float
+@item @nicode{f}            @tab fixed point float
+@item @nicode{i}            @tab same as @nicode{d}
+@item @nicode{g} @nicode{G} @tab fixed or scientific float
+@item @nicode{m}            @tab @code{strerror} string, GLIBC style
+@item @nicode{n}            @tab store characters written so far
+@item @nicode{o}            @tab octal integer
+@item @nicode{p}            @tab pointer
+@item @nicode{s}            @tab string
+@item @nicode{u}            @tab unsigned integer
+@item @nicode{x} @nicode{X} @tab hex integer
+@end multitable
+@end quotation
+
+@samp{o}, @samp{x} and @samp{X} are unsigned for the standard C types, but for
+types @samp{Z}, @samp{Q} and @samp{N} they are signed.  @samp{u} is not
+meaningful for @samp{Z}, @samp{Q} and @samp{N}.
+
+@samp{M} is a proxy for the C library @samp{l} or @samp{L}, according to the
+size of @code{mp_limb_t}.  Unsigned conversions will be usual, but a signed
+conversion can be used and will interpret the value as a twos complement
+negative.
+
+@samp{n} can be used with any type, even the GMP types.
+
+Other types or conversions that might be accepted by the C library
+@code{printf} cannot be used through @code{gmp_printf}, this includes for
+instance extensions registered with GLIBC @code{register_printf_function}.
+Also currently there's no support for POSIX @samp{$} style numbered arguments
+(perhaps this will be added in the future).
+
+The precision field has its usual meaning for integer @samp{Z} and float
+@samp{F} types, but is currently undefined for @samp{Q} and should not be used
+with that.
+
+@code{mpf_t} conversions only ever generate as many digits as can be
+accurately represented by the operand, the same as @code{mpf_get_str} does.
+Zeros will be used if necessary to pad to the requested precision.  This
+happens even for an @samp{f} conversion of an @code{mpf_t} which is an
+integer, for instance @math{2^@W{1024}} in an @code{mpf_t} of 128 bits
+precision will only produce about 40 digits, then pad with zeros to the
+decimal point.  An empty precision field like @samp{%.Fe} or @samp{%.Ff} can
+be used to specifically request just the significant digits.  Without any dot
+and thus no precision field, a precision value of 6 will be used.  Note that
+these rules mean that @samp{%Ff}, @samp{%.Ff}, and @samp{%.0Ff} will all be
+different.
+
+The decimal point character (or string) is taken from the current locale
+settings on systems which provide @code{localeconv} (@pxref{Locales,, Locales
+and Internationalization, libc, The GNU C Library Reference Manual}).  The C
+library will normally do the same for standard float output.
+
+The format string is only interpreted as plain @code{char}s, multibyte
+characters are not recognised.  Perhaps this will change in the future.
+
+
+@node Formatted Output Functions, C++ Formatted Output, Formatted Output Strings, Formatted Output
+@section Functions
+@cindex Output functions
+
+Each of the following functions is similar to the corresponding C library
+function.  The basic @code{printf} forms take a variable argument list.  The
+@code{vprintf} forms take an argument pointer, see @ref{Variadic Functions,,
+Variadic Functions, libc, The GNU C Library Reference Manual}, or @samp{man 3
+va_start}.
+
+It should be emphasised that if a format string is invalid, or the arguments
+don't match what the format specifies, then the behaviour of any of these
+functions will be unpredictable.  GCC format string checking is not available,
+since it doesn't recognise the GMP extensions.
+
+The file based functions @code{gmp_printf} and @code{gmp_fprintf} will return
+@math{-1} to indicate a write error.  Output is not ``atomic'', so partial
+output may be produced if a write error occurs.  All the functions can return
+@math{-1} if the C library @code{printf} variant in use returns @math{-1}, but
+this shouldn't normally occur.
+
+@deftypefun int gmp_printf (const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vprintf (const char *@var{fmt}, va_list @var{ap})
+Print to the standard output @code{stdout}.  Return the number of characters
+written, or @math{-1} if an error occurred.
+@end deftypefun
+
+@deftypefun int gmp_fprintf (FILE *@var{fp}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vfprintf (FILE *@var{fp}, const char *@var{fmt}, va_list @var{ap})
+Print to the stream @var{fp}.  Return the number of characters written, or
+@math{-1} if an error occurred.
+@end deftypefun
+
+@deftypefun int gmp_sprintf (char *@var{buf}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vsprintf (char *@var{buf}, const char *@var{fmt}, va_list @var{ap})
+Form a null-terminated string in @var{buf}.  Return the number of characters
+written, excluding the terminating null.
+
+No overlap is permitted between the space at @var{buf} and the string
+@var{fmt}.
+
+These functions are not recommended, since there's no protection against
+exceeding the space available at @var{buf}.
+@end deftypefun
+
+@deftypefun int gmp_snprintf (char *@var{buf}, size_t @var{size}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vsnprintf (char *@var{buf}, size_t @var{size}, const char *@var{fmt}, va_list @var{ap})
+Form a null-terminated string in @var{buf}.  No more than @var{size} bytes
+will be written.  To get the full output, @var{size} must be enough for the
+string and null-terminator.
+
+The return value is the total number of characters which ought to have been
+produced, excluding the terminating null.  If @math{@var{retval} @ge{}
+@var{size}} then the actual output has been truncated to the first
+@math{@var{size}-1} characters, and a null appended.
+
+No overlap is permitted between the region @{@var{buf},@var{size}@} and the
+@var{fmt} string.
+
+Notice the return value is in ISO C99 @code{snprintf} style.  This is so even
+if the C library @code{vsnprintf} is the older GLIBC 2.0.x style.
+@end deftypefun
+
+@deftypefun int gmp_asprintf (char **@var{pp}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vasprintf (char **@var{pp}, const char *@var{fmt}, va_list @var{ap})
+Form a null-terminated string in a block of memory obtained from the current
+memory allocation function (@pxref{Custom Allocation}).  The block will be the
+size of the string and null-terminator.  The address of the block in stored to
+*@var{pp}.  The return value is the number of characters produced, excluding
+the null-terminator.
+
+Unlike the C library @code{asprintf}, @code{gmp_asprintf} doesn't return
+@math{-1} if there's no more memory available, it lets the current allocation
+function handle that.
+@end deftypefun
+
+@deftypefun int gmp_obstack_printf (struct obstack *@var{ob}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_obstack_vprintf (struct obstack *@var{ob}, const char *@var{fmt}, va_list @var{ap})
+@cindex @code{obstack} output
+Append to the current object in @var{ob}.  The return value is the number of
+characters written.  A null-terminator is not written.
+
+@var{fmt} cannot be within the current object in @var{ob}, since that object
+might move as it grows.
+
+These functions are available only when the C library provides the obstack
+feature, which probably means only on GNU systems, see @ref{Obstacks,,
+Obstacks, libc, The GNU C Library Reference Manual}.
+@end deftypefun
+
+
+@node C++ Formatted Output,  , Formatted Output Functions, Formatted Output
+@section C++ Formatted Output
+@cindex C++ @code{ostream} output
+@cindex @code{ostream} output
+
+The following functions are provided in @file{libgmpxx} (@pxref{Headers and
+Libraries}), which is built if C++ support is enabled (@pxref{Build Options}).
+Prototypes are available from @code{<gmp.h>}.
+
+@deftypefun ostream& operator<< (ostream& @var{stream}, mpz_t @var{op})
+Print @var{op} to @var{stream}, using its @code{ios} formatting settings.
+@code{ios::width} is reset to 0 after output, the same as the standard
+@code{ostream operator<<} routines do.
+
+In hex or octal, @var{op} is printed as a signed number, the same as for
+decimal.  This is unlike the standard @code{operator<<} routines on @code{int}
+etc, which instead give twos complement.
+@end deftypefun
+
+@deftypefun ostream& operator<< (ostream& @var{stream}, mpq_t @var{op})
+Print @var{op} to @var{stream}, using its @code{ios} formatting settings.
+@code{ios::width} is reset to 0 after output, the same as the standard
+@code{ostream operator<<} routines do.
+
+Output will be a fraction like @samp{5/9}, or if the denominator is 1 then
+just a plain integer like @samp{123}.
+
+In hex or octal, @var{op} is printed as a signed value, the same as for
+decimal.  If @code{ios::showbase} is set then a base indicator is shown on
+both the numerator and denominator (if the denominator is required).
+@end deftypefun
+
+@deftypefun ostream& operator<< (ostream& @var{stream}, mpf_t @var{op})
+Print @var{op} to @var{stream}, using its @code{ios} formatting settings.
+@code{ios::width} is reset to 0 after output, the same as the standard
+@code{ostream operator<<} routines do.
+
+The decimal point follows the standard library float @code{operator<<}, which
+on recent systems means the @code{std::locale} imbued on @var{stream}.
+
+Hex and octal are supported, unlike the standard @code{operator<<} on
+@code{double}.  The mantissa will be in hex or octal, the exponent will be in
+decimal.  For hex the exponent delimiter is an @samp{@@}.  This is as per
+@code{mpf_out_str}.
+
+@code{ios::showbase} is supported, and will put a base on the mantissa, for
+example hex @samp{0x1.8} or @samp{0x0.8}, or octal @samp{01.4} or @samp{00.4}.
+This last form is slightly strange, but at least differentiates itself from
+decimal.
+@end deftypefun
+
+These operators mean that GMP types can be printed in the usual C++ way, for
+example,
+
+@example
+mpz_t  z;
+int    n;
+...
+cout << "iteration " << n << " value " << z << "\n";
+@end example
+
+But note that @code{ostream} output (and @code{istream} input, @pxref{C++
+Formatted Input}) is the only overloading available for the GMP types and that
+for instance using @code{+} with an @code{mpz_t} will have unpredictable
+results.  For classes with overloading, see @ref{C++ Class Interface}.
+
+
+@node Formatted Input, C++ Class Interface, Formatted Output, Top
+@chapter Formatted Input
+@cindex Formatted input
+@cindex @code{scanf} formatted input
+
+@menu
+* Formatted Input Strings::
+* Formatted Input Functions::
+* C++ Formatted Input::
+@end menu
+
+
+@node Formatted Input Strings, Formatted Input Functions, Formatted Input, Formatted Input
+@section Formatted Input Strings
+
+@code{gmp_scanf} and friends accept format strings similar to the standard C
+@code{scanf} (@pxref{Formatted Input,, Formatted Input, libc, The GNU C
+Library Reference Manual}).  A format specification is of the form
+
+@example
+% [flags] [width] [type] conv
+@end example
+
+GMP adds types @samp{Z}, @samp{Q} and @samp{F} for @code{mpz_t}, @code{mpq_t}
+and @code{mpf_t} respectively.  @samp{Z} and @samp{Q} behave like integers.
+@samp{Q} will read a @samp{/} and a denominator, if present.  @samp{F} behaves
+like a float.
+
+GMP variables don't require an @code{&} when passed to @code{gmp_scanf}, since
+they're already ``call-by-reference''.  For example,
+
+@example
+/* to read say "a(5) = 1234" */
+int   n;
+mpz_t z;
+gmp_scanf ("a(%d) = %Zd\n", &n, z);
+
+mpq_t q1, q2;
+gmp_sscanf ("0377 + 0x10/0x11", "%Qi + %Qi", q1, q2);
+
+/* to read say "topleft (1.55,-2.66)" */
+mpf_t x, y;
+char  buf[32];
+gmp_scanf ("%31s (%Ff,%Ff)", buf, x, y);
+@end example
+
+All the standard C @code{scanf} types behave the same as in the C library
+@code{scanf}, and can be freely intermixed with the GMP extensions.  In the
+current implementation the standard parts of the format string are simply
+handed to @code{scanf} and only the GMP extensions handled directly.
+
+The flags accepted are as follows.  @samp{a} and @samp{'} will depend on
+support from the C library, and @samp{'} cannot be used with GMP types.
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{*} @tab read but don't store
+@item @nicode{a} @tab allocate a buffer (string conversions)
+@item @nicode{'} @tab grouped digits, GLIBC style (not GMP types)
+@end multitable
+@end quotation
+
+The standard types accepted are as follows.  @samp{h} and @samp{l} are
+portable, the rest will depend on the compiler (or include files) for the type
+and the C library for the input.
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{h}  @tab @nicode{short}
+@item @nicode{hh} @tab @nicode{char}
+@item @nicode{j}  @tab @nicode{intmax_t} or @nicode{uintmax_t}
+@item @nicode{l}  @tab @nicode{long int}, @nicode{double} or @nicode{wchar_t}
+@item @nicode{ll} @tab @nicode{long long}
+@item @nicode{L}  @tab @nicode{long double}
+@item @nicode{q}  @tab @nicode{quad_t} or @nicode{u_quad_t}
+@item @nicode{t}  @tab @nicode{ptrdiff_t}
+@item @nicode{z}  @tab @nicode{size_t}
+@end multitable
+@end quotation
+
+@noindent
+The GMP types are
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{F}  @tab @nicode{mpf_t}, float conversions
+@item @nicode{Q}  @tab @nicode{mpq_t}, integer conversions
+@item @nicode{Z}  @tab @nicode{mpz_t}, integer conversions
+@end multitable
+@end quotation
+
+The conversions accepted are as follows.  @samp{p} and @samp{[} will depend on
+support from the C library, the rest are standard.
+
+@quotation
+@multitable {(space)} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item @nicode{c}            @tab character or characters
+@item @nicode{d}            @tab decimal integer
+@item @nicode{e} @nicode{E} @nicode{f} @nicode{g} @nicode{G}
+                            @tab float
+@item @nicode{i}            @tab integer with base indicator
+@item @nicode{n}            @tab characters read so far
+@item @nicode{o}            @tab octal integer
+@item @nicode{p}            @tab pointer
+@item @nicode{s}            @tab string of non-whitespace characters
+@item @nicode{u}            @tab decimal integer
+@item @nicode{x} @nicode{X} @tab hex integer
+@item @nicode{[}            @tab string of characters in a set
+@end multitable
+@end quotation
+
+@samp{e}, @samp{E}, @samp{f}, @samp{g} and @samp{G} are identical, they all
+read either fixed point or scientific format, and either upper or lower case
+@samp{e} for the exponent in scientific format.
+
+C99 style hex float format (@code{printf %a}, @pxref{Formatted Output
+Strings}) is always accepted for @code{mpf_t}, but for the standard float
+types it will depend on the C library.
+
+@samp{x} and @samp{X} are identical, both accept both upper and lower case
+hexadecimal.
+
+@samp{o}, @samp{u}, @samp{x} and @samp{X} all read positive or negative
+values.  For the standard C types these are described as ``unsigned''
+conversions, but that merely affects certain overflow handling, negatives are
+still allowed (per @code{strtoul}, @pxref{Parsing of Integers,, Parsing of
+Integers, libc, The GNU C Library Reference Manual}).  For GMP types there are
+no overflows, so @samp{d} and @samp{u} are identical.
+
+@samp{Q} type reads the numerator and (optional) denominator as given.  If the
+value might not be in canonical form then @code{mpq_canonicalize} must be
+called before using it in any calculations (@pxref{Rational Number
+Functions}).
+
+@samp{Qi} will read a base specification separately for the numerator and
+denominator.  For example @samp{0x10/11} would be 16/11, whereas
+@samp{0x10/0x11} would be 16/17.
+
+@samp{n} can be used with any of the types above, even the GMP types.
+@samp{*} to suppress assignment is allowed, though in that case it would do
+nothing at all.
+
+Other conversions or types that might be accepted by the C library
+@code{scanf} cannot be used through @code{gmp_scanf}.
+
+Whitespace is read and discarded before a field, except for @samp{c} and
+@samp{[} conversions.
+
+For float conversions, the decimal point character (or string) expected is
+taken from the current locale settings on systems which provide
+@code{localeconv} (@pxref{Locales,, Locales and Internationalization, libc,
+The GNU C Library Reference Manual}).  The C library will normally do the same
+for standard float input.
+
+The format string is only interpreted as plain @code{char}s, multibyte
+characters are not recognised.  Perhaps this will change in the future.
+
+
+@node Formatted Input Functions, C++ Formatted Input, Formatted Input Strings, Formatted Input
+@section Formatted Input Functions
+@cindex Input functions
+
+Each of the following functions is similar to the corresponding C library
+function.  The plain @code{scanf} forms take a variable argument list.  The
+@code{vscanf} forms take an argument pointer, see @ref{Variadic Functions,,
+Variadic Functions, libc, The GNU C Library Reference Manual}, or @samp{man 3
+va_start}.
+
+It should be emphasised that if a format string is invalid, or the arguments
+don't match what the format specifies, then the behaviour of any of these
+functions will be unpredictable.  GCC format string checking is not available,
+since it doesn't recognise the GMP extensions.
+
+No overlap is permitted between the @var{fmt} string and any of the results
+produced.
+
+@deftypefun int gmp_scanf (const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vscanf (const char *@var{fmt}, va_list @var{ap})
+Read from the standard input @code{stdin}.
+@end deftypefun
+
+@deftypefun int gmp_fscanf (FILE *@var{fp}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vfscanf (FILE *@var{fp}, const char *@var{fmt}, va_list @var{ap})
+Read from the stream @var{fp}.
+@end deftypefun
+
+@deftypefun int gmp_sscanf (const char *@var{s}, const char *@var{fmt}, @dots{})
+@deftypefunx int gmp_vsscanf (const char *@var{s}, const char *@var{fmt}, va_list @var{ap})
+Read from a null-terminated string @var{s}.
+@end deftypefun
+
+The return value from each of these functions is the same as the standard C99
+@code{scanf}, namely the number of fields successfully parsed and stored.
+@samp{%n} fields and fields read but suppressed by @samp{*} don't count
+towards the return value.
+
+If end of input (or a file error) is reached before a character for a field or
+a literal, and if no previous non-suppressed fields have matched, then the
+return value is @code{EOF} instead of 0.  A whitespace character in the format
+string is only an optional match and doesn't induce an @code{EOF} in this
+fashion.  Leading whitespace read and discarded for a field don't count as
+characters for that field.
+
+For the GMP types, input parsing follows C99 rules, namely one character of
+lookahead is used and characters are read while they continue to meet the
+format requirements.  If this doesn't provide a complete number then the
+function terminates, with that field not stored nor counted towards the return
+value.  For instance with @code{mpf_t} an input @samp{1.23e-XYZ} would be read
+up to the @samp{X} and that character pushed back since it's not a digit.  The
+string @samp{1.23e-} would then be considered invalid since an @samp{e} must
+be followed by at least one digit.
+
+For the standard C types, in the current implementation GMP calls the C
+library @code{scanf} functions, which might have looser rules about what
+constitutes a valid input.
+
+Note that @code{gmp_sscanf} is the same as @code{gmp_fscanf} and only does one
+character of lookahead when parsing.  Although clearly it could look at its
+entire input, it is deliberately made identical to @code{gmp_fscanf}, the same
+way C99 @code{sscanf} is the same as @code{fscanf}.
+
+
+@node C++ Formatted Input,  , Formatted Input Functions, Formatted Input
+@section C++ Formatted Input
+@cindex C++ @code{istream} input
+@cindex @code{istream} input
+
+The following functions are provided in @file{libgmpxx} (@pxref{Headers and
+Libraries}), which is built only if C++ support is enabled (@pxref{Build
+Options}).  Prototypes are available from @code{<gmp.h>}.
+
+@deftypefun istream& operator>> (istream& @var{stream}, mpz_t @var{rop})
+Read @var{rop} from @var{stream}, using its @code{ios} formatting settings.
+@end deftypefun
+
+@deftypefun istream& operator>> (istream& @var{stream}, mpq_t @var{rop})
+An integer like @samp{123} will be read, or a fraction like @samp{5/9}.  No
+whitespace is allowed around the @samp{/}.  If the fraction is not in
+canonical form then @code{mpq_canonicalize} must be called (@pxref{Rational
+Number Functions}) before operating on it.
+
+As per integer input, an @samp{0} or @samp{0x} base indicator is read when
+none of @code{ios::dec}, @code{ios::oct} or @code{ios::hex} are set.  This is
+done separately for numerator and denominator, so that for instance
+@samp{0x10/11} is @math{16/11} and @samp{0x10/0x11} is @math{16/17}.
+@end deftypefun
+
+@deftypefun istream& operator>> (istream& @var{stream}, mpf_t @var{rop})
+Read @var{rop} from @var{stream}, using its @code{ios} formatting settings.
+
+Hex or octal floats are not supported, but might be in the future, or perhaps
+it's best to accept only what the standard float @code{operator>>} does.
+@end deftypefun
+
+Note that digit grouping specified by the @code{istream} locale is currently
+not accepted.  Perhaps this will change in the future.
+
+@sp 1
+These operators mean that GMP types can be read in the usual C++ way, for
+example,
+
+@example
+mpz_t  z;
+...
+cin >> z;
+@end example
+
+But note that @code{istream} input (and @code{ostream} output, @pxref{C++
+Formatted Output}) is the only overloading available for the GMP types and
+that for instance using @code{+} with an @code{mpz_t} will have unpredictable
+results.  For classes with overloading, see @ref{C++ Class Interface}.
+
+
+
+@node C++ Class Interface, BSD Compatible Functions, Formatted Input, Top
+@chapter C++ Class Interface
+@cindex C++ interface
+
+This chapter describes the C++ class based interface to GMP.
+
+All GMP C language types and functions can be used in C++ programs, since
+@file{gmp.h} has @code{extern "C"} qualifiers, but the class interface offers
+overloaded functions and operators which may be more convenient.
+
+Due to the implementation of this interface, a reasonably recent C++ compiler
+is required, one supporting namespaces, partial specialization of templates
+and member templates.  For GCC this means version 2.91 or later.
+
+@strong{Everything described in this chapter is to be considered preliminary
+and might be subject to incompatible changes if some unforeseen difficulty
+reveals itself.}
+
+@menu
+* C++ Interface General::
+* C++ Interface Integers::
+* C++ Interface Rationals::
+* C++ Interface Floats::
+* C++ Interface Random Numbers::
+* C++ Interface Limitations::
+@end menu
+
+
+@node C++ Interface General, C++ Interface Integers, C++ Class Interface, C++ Class Interface
+@section C++ Interface General
+
+@noindent
+All the C++ classes and functions are available with
+
+@cindex @code{gmpxx.h}
+@example
+#include <gmpxx.h>
+@end example
+
+Programs should be linked with the @file{libgmpxx} and @file{libgmp}
+libraries.  For example,
+
+@example
+g++ mycxxprog.cc -lgmpxx -lgmp
+@end example
+
+@noindent
+The classes defined are
+
+@deftp Class mpz_class
+@deftpx Class mpq_class
+@deftpx Class mpf_class
+@end deftp
+
+The standard operators and various standard functions are overloaded to allow
+arithmetic with these classes.  For example,
+
+@example
+int
+main (void)
+@{
+  mpz_class a, b, c;
+
+  a = 1234;
+  b = "-5678";
+  c = a+b;
+  cout << "sum is " << c << "\n";
+  cout << "absolute value is " << abs(c) << "\n";
+
+  return 0;
+@}
+@end example
+
+An important feature of the implementation is that an expression like
+@code{a=b+c} results in a single call to the corresponding @code{mpz_add},
+without using a temporary for the @code{b+c} part.  Expressions which by their
+nature imply intermediate values, like @code{a=b*c+d*e}, still use temporaries
+though.
+
+The classes can be freely intermixed in expressions, as can the classes and
+the standard types @code{long}, @code{unsigned long} and @code{double}.
+Smaller types like @code{int} or @code{float} can also be intermixed, since
+C++ will promote them.
+
+Note that @code{bool} is not accepted directly, but must be explicitly cast to
+an @code{int} first.  This is because C++ will automatically convert any
+pointer to a @code{bool}, so if GMP accepted @code{bool} it would make all
+sorts of invalid class and pointer combinations compile but almost certainly
+not do anything sensible.
+
+Conversions back from the classes to standard C++ types aren't done
+automatically, instead member functions like @code{get_si} are provided (see
+the following sections for details).
+
+Also there are no automatic conversions from the classes to the corresponding
+GMP C types, instead a reference to the underlying C object can be obtained
+with the following functions,
+
+@deftypefun mpz_t mpz_class::get_mpz_t ()
+@deftypefunx mpq_t mpq_class::get_mpq_t ()
+@deftypefunx mpf_t mpf_class::get_mpf_t ()
+@end deftypefun
+
+These can be used to call a C function which doesn't have a C++ class
+interface.  For example to set @code{a} to the GCD of @code{b} and @code{c},
+
+@example
+mpz_class a, b, c;
+...
+mpz_gcd (a.get_mpz_t(), b.get_mpz_t(), c.get_mpz_t());
+@end example
+
+In the other direction, a class can be initialized from the corresponding GMP
+C type, or assigned to if an explicit constructor is used.  In both cases this
+makes a copy of the value, it doesn't create any sort of association.  For
+example,
+
+@example
+mpz_t z;
+// ... init and calculate z ...
+mpz_class x(z);
+mpz_class y;
+y = mpz_class (z);
+@end example
+
+There are no namespace setups in @file{gmpxx.h}, all types and functions are
+simply put into the global namespace.  This is what @file{gmp.h} has done in
+the past, and continues to do for compatibility.  The extras provided by
+@file{gmpxx.h} follow GMP naming conventions and are unlikely to clash with
+anything.
+
+
+@node C++ Interface Integers, C++ Interface Rationals, C++ Interface General, C++ Class Interface
+@section C++ Interface Integers
+
+@deftypefun {} mpz_class::mpz_class (type @var{n})
+Construct an @code{mpz_class}.  All the standard C++ types may be used, except
+@code{long long} and @code{long double}, and all the GMP C++ classes can be
+used.  Any necessary conversion follows the corresponding C function, for
+example @code{double} follows @code{mpz_set_d} (@pxref{Assigning Integers}).
+@end deftypefun
+
+@deftypefun explicit mpz_class::mpz_class (mpz_t @var{z})
+Construct an @code{mpz_class} from an @code{mpz_t}.  The value in @var{z} is
+copied into the new @code{mpz_class}, there won't be any permanent association
+between it and @var{z}.
+@end deftypefun
+
+@deftypefun explicit mpz_class::mpz_class (const char *@var{s}, int @var{base} = 0)
+@deftypefunx explicit mpz_class::mpz_class (const string& @var{s}, int @var{base} = 0)
+Construct an @code{mpz_class} converted from a string using @code{mpz_set_str}
+(@pxref{Assigning Integers}).
+
+If the string is not a valid integer, an @code{std::invalid_argument}
+exception is thrown.  The same applies to @code{operator=}.
+@end deftypefun
+
+@deftypefun mpz_class operator/ (mpz_class @var{a}, mpz_class @var{d})
+@deftypefunx mpz_class operator% (mpz_class @var{a}, mpz_class @var{d})
+Divisions involving @code{mpz_class} round towards zero, as per the
+@code{mpz_tdiv_q} and @code{mpz_tdiv_r} functions (@pxref{Integer Division}).
+This is the same as the C99 @code{/} and @code{%} operators.
+
+The @code{mpz_fdiv@dots{}} or @code{mpz_cdiv@dots{}} functions can always be called
+directly if desired.  For example,
+
+@example
+mpz_class q, a, d;
+...
+mpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());
+@end example
+@end deftypefun
+
+@deftypefun mpz_class abs (mpz_class @var{op1})
+@deftypefunx int cmp (mpz_class @var{op1}, type @var{op2})
+@deftypefunx int cmp (type @var{op1}, mpz_class @var{op2})
+@maybepagebreak
+@deftypefunx bool mpz_class::fits_sint_p (void)
+@deftypefunx bool mpz_class::fits_slong_p (void)
+@deftypefunx bool mpz_class::fits_sshort_p (void)
+@maybepagebreak
+@deftypefunx bool mpz_class::fits_uint_p (void)
+@deftypefunx bool mpz_class::fits_ulong_p (void)
+@deftypefunx bool mpz_class::fits_ushort_p (void)
+@maybepagebreak
+@deftypefunx double mpz_class::get_d (void)
+@deftypefunx long mpz_class::get_si (void)
+@deftypefunx string mpz_class::get_str (int @var{base} = 10)
+@deftypefunx {unsigned long} mpz_class::get_ui (void)
+@maybepagebreak
+@deftypefunx int mpz_class::set_str (const char *@var{str}, int @var{base})
+@deftypefunx int mpz_class::set_str (const string& @var{str}, int @var{base})
+@deftypefunx int sgn (mpz_class @var{op})
+@deftypefunx mpz_class sqrt (mpz_class @var{op})
+These functions provide a C++ class interface to the corresponding GMP C
+routines.
+
+@code{cmp} can be used with any of the classes or the standard C++ types,
+except @code{long long} and @code{long double}.
+@end deftypefun
+
+@sp 1
+Overloaded operators for combinations of @code{mpz_class} and @code{double}
+are provided for completeness, but it should be noted that if the given
+@code{double} is not an integer then the way any rounding is done is currently
+unspecified.  The rounding might take place at the start, in the middle, or at
+the end of the operation, and it might change in the future.
+
+Conversions between @code{mpz_class} and @code{double}, however, are defined
+to follow the corresponding C functions @code{mpz_get_d} and @code{mpz_set_d}.
+And comparisons are always made exactly, as per @code{mpz_cmp_d}.
+
+
+@node C++ Interface Rationals, C++ Interface Floats, C++ Interface Integers, C++ Class Interface
+@section C++ Interface Rationals
+
+In all the following constructors, if a fraction is given then it should be in
+canonical form, or if not then @code{mpq_class::canonicalize} called.
+
+@deftypefun {} mpq_class::mpq_class (type @var{op})
+@deftypefunx {} mpq_class::mpq_class (integer @var{num}, integer @var{den})
+Construct an @code{mpq_class}.  The initial value can be a single value of any
+type, or a pair of integers (@code{mpz_class} or standard C++ integer types)
+representing a fraction, except that @code{long long} and @code{long double}
+are not supported.  For example,
+
+@example
+mpq_class q (99);
+mpq_class q (1.75);
+mpq_class q (1, 3);
+@end example
+@end deftypefun
+
+@deftypefun explicit mpq_class::mpq_class (mpq_t @var{q})
+Construct an @code{mpq_class} from an @code{mpq_t}.  The value in @var{q} is
+copied into the new @code{mpq_class}, there won't be any permanent association
+between it and @var{q}.
+@end deftypefun
+
+@deftypefun explicit mpq_class::mpq_class (const char *@var{s}, int @var{base} = 0)
+@deftypefunx explicit mpq_class::mpq_class (const string& @var{s}, int @var{base} = 0)
+Construct an @code{mpq_class} converted from a string using @code{mpq_set_str}
+(@pxref{Initializing Rationals}).
+
+If the string is not a valid rational, an @code{std::invalid_argument}
+exception is thrown.  The same applies to @code{operator=}.
+@end deftypefun
+
+@deftypefun void mpq_class::canonicalize ()
+Put an @code{mpq_class} into canonical form, as per @ref{Rational Number
+Functions}.  All arithmetic operators require their operands in canonical
+form, and will return results in canonical form.
+@end deftypefun
+
+@deftypefun mpq_class abs (mpq_class @var{op})
+@deftypefunx int cmp (mpq_class @var{op1}, type @var{op2})
+@deftypefunx int cmp (type @var{op1}, mpq_class @var{op2})
+@maybepagebreak
+@deftypefunx double mpq_class::get_d (void)
+@deftypefunx string mpq_class::get_str (int @var{base} = 10)
+@maybepagebreak
+@deftypefunx int mpq_class::set_str (const char *@var{str}, int @var{base})
+@deftypefunx int mpq_class::set_str (const string& @var{str}, int @var{base})
+@deftypefunx int sgn (mpq_class @var{op})
+These functions provide a C++ class interface to the corresponding GMP C
+routines.
+
+@code{cmp} can be used with any of the classes or the standard C++ types,
+except @code{long long} and @code{long double}.
+@end deftypefun
+
+@deftypefun {mpz_class&} mpq_class::get_num ()
+@deftypefunx {mpz_class&} mpq_class::get_den ()
+Get a reference to an @code{mpz_class} which is the numerator or denominator
+of an @code{mpq_class}.  This can be used both for read and write access.  If
+the object returned is modified, it modifies the original @code{mpq_class}.
+
+If direct manipulation might produce a non-canonical value, then
+@code{mpq_class::canonicalize} must be called before further operations.
+@end deftypefun
+
+@deftypefun mpz_t mpq_class::get_num_mpz_t ()
+@deftypefunx mpz_t mpq_class::get_den_mpz_t ()
+Get a reference to the underlying @code{mpz_t} numerator or denominator of an
+@code{mpq_class}.  This can be passed to C functions expecting an
+@code{mpz_t}.  Any modifications made to the @code{mpz_t} will modify the
+original @code{mpq_class}.
+
+If direct manipulation might produce a non-canonical value, then
+@code{mpq_class::canonicalize} must be called before further operations.
+@end deftypefun
+
+@deftypefun istream& operator>> (istream& @var{stream}, mpq_class& @var{rop});
+Read @var{rop} from @var{stream}, using its @code{ios} formatting settings,
+the same as @code{mpq_t operator>>} (@pxref{C++ Formatted Input}).
+
+If the @var{rop} read might not be in canonical form then
+@code{mpq_class::canonicalize} must be called.
+@end deftypefun
+
+
+@node C++ Interface Floats, C++ Interface Random Numbers, C++ Interface Rationals, C++ Class Interface
+@section C++ Interface Floats
+
+When an expression requires the use of temporary intermediate @code{mpf_class}
+values, like @code{f=g*h+x*y}, those temporaries will have the same precision
+as the destination @code{f}.  Explicit constructors can be used if this
+doesn't suit.
+
+@deftypefun {} mpf_class::mpf_class (type @var{op})
+@deftypefunx {} mpf_class::mpf_class (type @var{op}, mp_bitcnt_t @var{prec})
+Construct an @code{mpf_class}.  Any standard C++ type can be used, except
+@code{long long} and @code{long double}, and any of the GMP C++ classes can be
+used.
+
+If @var{prec} is given, the initial precision is that value, in bits.  If
+@var{prec} is not given, then the initial precision is determined by the type
+of @var{op} given.  An @code{mpz_class}, @code{mpq_class}, or C++
+builtin type will give the default @code{mpf} precision (@pxref{Initializing
+Floats}).  An @code{mpf_class} or expression will give the precision of that
+value.  The precision of a binary expression is the higher of the two
+operands.
+
+@example
+mpf_class f(1.5);        // default precision
+mpf_class f(1.5, 500);   // 500 bits (at least)
+mpf_class f(x);          // precision of x
+mpf_class f(abs(x));     // precision of x
+mpf_class f(-g, 1000);   // 1000 bits (at least)
+mpf_class f(x+y);        // greater of precisions of x and y
+@end example
+@end deftypefun
+
+@deftypefun explicit mpf_class::mpf_class (mpf_t @var{f})
+@deftypefunx {} mpf_class::mpf_class (mpf_t @var{f}, mp_bitcnt_t @var{prec})
+Construct an @code{mpf_class} from an @code{mpf_t}.  The value in @var{f} is
+copied into the new @code{mpf_class}, there won't be any permanent association
+between it and @var{f}.
+
+If @var{prec} is given, the initial precision is that value, in bits.  If
+@var{prec} is not given, then the initial precision is that of @var{f}.
+@end deftypefun
+
+@deftypefun explicit mpf_class::mpf_class (const char *@var{s})
+@deftypefunx {} mpf_class::mpf_class (const char *@var{s}, mp_bitcnt_t @var{prec}, int @var{base} = 0)
+@deftypefunx explicit mpf_class::mpf_class (const string& @var{s})
+@deftypefunx {} mpf_class::mpf_class (const string& @var{s}, mp_bitcnt_t @var{prec}, int @var{base} = 0)
+Construct an @code{mpf_class} converted from a string using @code{mpf_set_str}
+(@pxref{Assigning Floats}).  If @var{prec} is given, the initial precision is
+that value, in bits.  If not, the default @code{mpf} precision
+(@pxref{Initializing Floats}) is used.
+
+If the string is not a valid float, an @code{std::invalid_argument} exception
+is thrown.  The same applies to @code{operator=}.
+@end deftypefun
+
+@deftypefun {mpf_class&} mpf_class::operator= (type @var{op})
+Convert and store the given @var{op} value to an @code{mpf_class} object.  The
+same types are accepted as for the constructors above.
+
+Note that @code{operator=} only stores a new value, it doesn't copy or change
+the precision of the destination, instead the value is truncated if necessary.
+This is the same as @code{mpf_set} etc.  Note in particular this means for
+@code{mpf_class} a copy constructor is not the same as a default constructor
+plus assignment.
+
+@example
+mpf_class x (y);   // x created with precision of y
+
+mpf_class x;       // x created with default precision
+x = y;             // value truncated to that precision
+@end example
+
+Applications using templated code may need to be careful about the assumptions
+the code makes in this area, when working with @code{mpf_class} values of
+various different or non-default precisions.  For instance implementations of
+the standard @code{complex} template have been seen in both styles above,
+though of course @code{complex} is normally only actually specified for use
+with the builtin float types.
+@end deftypefun
+
+@deftypefun mpf_class abs (mpf_class @var{op})
+@deftypefunx mpf_class ceil (mpf_class @var{op})
+@deftypefunx int cmp (mpf_class @var{op1}, type @var{op2})
+@deftypefunx int cmp (type @var{op1}, mpf_class @var{op2})
+@maybepagebreak
+@deftypefunx bool mpf_class::fits_sint_p (void)
+@deftypefunx bool mpf_class::fits_slong_p (void)
+@deftypefunx bool mpf_class::fits_sshort_p (void)
+@maybepagebreak
+@deftypefunx bool mpf_class::fits_uint_p (void)
+@deftypefunx bool mpf_class::fits_ulong_p (void)
+@deftypefunx bool mpf_class::fits_ushort_p (void)
+@maybepagebreak
+@deftypefunx mpf_class floor (mpf_class @var{op})
+@deftypefunx mpf_class hypot (mpf_class @var{op1}, mpf_class @var{op2})
+@maybepagebreak
+@deftypefunx double mpf_class::get_d (void)
+@deftypefunx long mpf_class::get_si (void)
+@deftypefunx string mpf_class::get_str (mp_exp_t& @var{exp}, int @var{base} = 10, size_t @var{digits} = 0)
+@deftypefunx {unsigned long} mpf_class::get_ui (void)
+@maybepagebreak
+@deftypefunx int mpf_class::set_str (const char *@var{str}, int @var{base})
+@deftypefunx int mpf_class::set_str (const string& @var{str}, int @var{base})
+@deftypefunx int sgn (mpf_class @var{op})
+@deftypefunx mpf_class sqrt (mpf_class @var{op})
+@deftypefunx mpf_class trunc (mpf_class @var{op})
+These functions provide a C++ class interface to the corresponding GMP C
+routines.
+
+@code{cmp} can be used with any of the classes or the standard C++ types,
+except @code{long long} and @code{long double}.
+
+The accuracy provided by @code{hypot} is not currently guaranteed.
+@end deftypefun
+
+@deftypefun {mp_bitcnt_t} mpf_class::get_prec ()
+@deftypefunx void mpf_class::set_prec (mp_bitcnt_t @var{prec})
+@deftypefunx void mpf_class::set_prec_raw (mp_bitcnt_t @var{prec})
+Get or set the current precision of an @code{mpf_class}.
+
+The restrictions described for @code{mpf_set_prec_raw} (@pxref{Initializing
+Floats}) apply to @code{mpf_class::set_prec_raw}.  Note in particular that the
+@code{mpf_class} must be restored to it's allocated precision before being
+destroyed.  This must be done by application code, there's no automatic
+mechanism for it.
+@end deftypefun
+
+
+@node C++ Interface Random Numbers, C++ Interface Limitations, C++ Interface Floats, C++ Class Interface
+@section C++ Interface Random Numbers
+
+@deftp Class gmp_randclass
+The C++ class interface to the GMP random number functions uses
+@code{gmp_randclass} to hold an algorithm selection and current state, as per
+@code{gmp_randstate_t}.
+@end deftp
+
+@deftypefun {} gmp_randclass::gmp_randclass (void (*@var{randinit}) (gmp_randstate_t, @dots{}), @dots{})
+Construct a @code{gmp_randclass}, using a call to the given @var{randinit}
+function (@pxref{Random State Initialization}).  The arguments expected are
+the same as @var{randinit}, but with @code{mpz_class} instead of @code{mpz_t}.
+For example,
+
+@example
+gmp_randclass r1 (gmp_randinit_default);
+gmp_randclass r2 (gmp_randinit_lc_2exp_size, 32);
+gmp_randclass r3 (gmp_randinit_lc_2exp, a, c, m2exp);
+gmp_randclass r4 (gmp_randinit_mt);
+@end example
+
+@code{gmp_randinit_lc_2exp_size} will fail if the size requested is too big,
+an @code{std::length_error} exception is thrown in that case.
+@end deftypefun
+
+@deftypefun {} gmp_randclass::gmp_randclass (gmp_randalg_t @var{alg}, @dots{})
+Construct a @code{gmp_randclass} using the same parameters as
+@code{gmp_randinit} (@pxref{Random State Initialization}).  This function is
+obsolete and the above @var{randinit} style should be preferred.
+@end deftypefun
+
+@deftypefun void gmp_randclass::seed (unsigned long int @var{s})
+@deftypefunx void gmp_randclass::seed (mpz_class @var{s})
+Seed a random number generator.  See @pxref{Random Number Functions}, for how
+to choose a good seed.
+@end deftypefun
+
+@deftypefun mpz_class gmp_randclass::get_z_bits (unsigned long @var{bits})
+@deftypefunx mpz_class gmp_randclass::get_z_bits (mpz_class @var{bits})
+Generate a random integer with a specified number of bits.
+@end deftypefun
+
+@deftypefun mpz_class gmp_randclass::get_z_range (mpz_class @var{n})
+Generate a random integer in the range 0 to @math{@var{n}-1} inclusive.
+@end deftypefun
+
+@deftypefun mpf_class gmp_randclass::get_f ()
+@deftypefunx mpf_class gmp_randclass::get_f (mp_bitcnt_t @var{prec})
+Generate a random float @var{f} in the range @math{0 <= @var{f} < 1}.  @var{f}
+will be to @var{prec} bits precision, or if @var{prec} is not given then to
+the precision of the destination.  For example,
+
+@example
+gmp_randclass  r;
+...
+mpf_class  f (0, 512);   // 512 bits precision
+f = r.get_f();           // random number, 512 bits
+@end example
+@end deftypefun
+
+
+
+@node C++ Interface Limitations,  , C++ Interface Random Numbers, C++ Class Interface
+@section C++ Interface Limitations
+
+@table @asis
+@item @code{mpq_class} and Templated Reading
+A generic piece of template code probably won't know that @code{mpq_class}
+requires a @code{canonicalize} call if inputs read with @code{operator>>}
+might be non-canonical.  This can lead to incorrect results.
+
+@code{operator>>} behaves as it does for reasons of efficiency.  A
+canonicalize can be quite time consuming on large operands, and is best
+avoided if it's not necessary.
+
+But this potential difficulty reduces the usefulness of @code{mpq_class}.
+Perhaps a mechanism to tell @code{operator>>} what to do will be adopted in
+the future, maybe a preprocessor define, a global flag, or an @code{ios} flag
+pressed into service.  Or maybe, at the risk of inconsistency, the
+@code{mpq_class} @code{operator>>} could canonicalize and leave @code{mpq_t}
+@code{operator>>} not doing so, for use on those occasions when that's
+acceptable.  Send feedback or alternate ideas to @email{gmp-bugs@@gmplib.org}.
+
+@item Subclassing
+Subclassing the GMP C++ classes works, but is not currently recommended.
+
+Expressions involving subclasses resolve correctly (or seem to), but in normal
+C++ fashion the subclass doesn't inherit constructors and assignments.
+There's many of those in the GMP classes, and a good way to reestablish them
+in a subclass is not yet provided.
+
+@item Templated Expressions
+A subtle difficulty exists when using expressions together with
+application-defined template functions.  Consider the following, with @code{T}
+intended to be some numeric type,
+
+@example
+template <class T>
+T fun (const T &, const T &);
+@end example
+
+@noindent
+When used with, say, plain @code{mpz_class} variables, it works fine: @code{T}
+is resolved as @code{mpz_class}.
+
+@example
+mpz_class f(1), g(2);
+fun (f, g);    // Good
+@end example
+
+@noindent
+But when one of the arguments is an expression, it doesn't work.
+
+@example
+mpz_class f(1), g(2), h(3);
+fun (f, g+h);  // Bad
+@end example
+
+This is because @code{g+h} ends up being a certain expression template type
+internal to @code{gmpxx.h}, which the C++ template resolution rules are unable
+to automatically convert to @code{mpz_class}.  The workaround is simply to add
+an explicit cast.
+
+@example
+mpz_class f(1), g(2), h(3);
+fun (f, mpz_class(g+h));  // Good
+@end example
+
+Similarly, within @code{fun} it may be necessary to cast an expression to type
+@code{T} when calling a templated @code{fun2}.
+
+@example
+template <class T>
+void fun (T f, T g)
+@{
+  fun2 (f, f+g);     // Bad
+@}
+
+template <class T>
+void fun (T f, T g)
+@{
+  fun2 (f, T(f+g));  // Good
+@}
+@end example
+@end table
+
+
+@node BSD Compatible Functions, Custom Allocation, C++ Class Interface, Top
+@comment  node-name,  next,  previous,  up
+@chapter Berkeley MP Compatible Functions
+@cindex Berkeley MP compatible functions
+@cindex BSD MP compatible functions
+
+These functions are intended to be fully compatible with the Berkeley MP
+library which is available on many BSD derived U*ix systems.  The
+@samp{--enable-mpbsd} option must be used when building GNU MP to make these
+available (@pxref{Installing GMP}).
+
+The original Berkeley MP library has a usage restriction: you cannot use the
+same variable as both source and destination in a single function call.  The
+compatible functions in GNU MP do not share this restriction---inputs and
+outputs may overlap.
+
+It is not recommended that new programs are written using these functions.
+Apart from the incomplete set of functions, the interface for initializing
+@code{MINT} objects is more error prone, and the @code{pow} function collides
+with @code{pow} in @file{libm.a}.
+
+@cindex @code{mp.h}
+@tindex MINT
+Include the header @file{mp.h} to get the definition of the necessary types and
+functions.  If you are on a BSD derived system, make sure to include GNU
+@file{mp.h} if you are going to link the GNU @file{libmp.a} to your program.
+This means that you probably need to give the @samp{-I<dir>} option to the
+compiler, where @samp{<dir>} is the directory where you have GNU @file{mp.h}.
+
+@deftypefun {MINT *} itom (signed short int @var{initial_value})
+Allocate an integer consisting of a @code{MINT} object and dynamic limb space.
+Initialize the integer to @var{initial_value}.  Return a pointer to the
+@code{MINT} object.
+@end deftypefun
+
+@deftypefun {MINT *} xtom (char *@var{initial_value})
+Allocate an integer consisting of a @code{MINT} object and dynamic limb space.
+Initialize the integer from @var{initial_value}, a hexadecimal,
+null-terminated C string.  Return a pointer to the @code{MINT} object.
+@end deftypefun
+
+@deftypefun void move (MINT *@var{src}, MINT *@var{dest})
+Set @var{dest} to @var{src} by copying.  Both variables must be previously
+initialized.
+@end deftypefun
+
+@deftypefun void madd (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
+Add @var{src_1} and @var{src_2} and put the sum in @var{destination}.
+@end deftypefun
+
+@deftypefun void msub (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
+Subtract @var{src_2} from @var{src_1} and put the difference in
+@var{destination}.
+@end deftypefun
+
+@deftypefun void mult (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
+Multiply @var{src_1} and @var{src_2} and put the product in @var{destination}.
+@end deftypefun
+
+@deftypefun void mdiv (MINT *@var{dividend}, MINT *@var{divisor}, MINT *@var{quotient}, MINT *@var{remainder})
+@deftypefunx void sdiv (MINT *@var{dividend}, signed short int @var{divisor}, MINT *@var{quotient}, signed short int *@var{remainder})
+Set @var{quotient} to @var{dividend}/@var{divisor}, and @var{remainder} to
+@var{dividend} mod @var{divisor}.  The quotient is rounded towards zero; the
+remainder has the same sign as the dividend unless it is zero.
+
+Some implementations of these functions work differently---or not at all---for
+negative arguments.
+@end deftypefun
+
+@deftypefun void msqrt (MINT *@var{op}, MINT *@var{root}, MINT *@var{remainder})
+Set @var{root} to @m{\lfloor\sqrt{@var{op}}\rfloor, the truncated integer part
+of the square root of @var{op}}, like @code{mpz_sqrt}.  Set @var{remainder} to
+@m{(@var{op} - @var{root}^2), @var{op}@minus{}@var{root}*@var{root}}, i.e.
+zero if @var{op} is a perfect square.
+
+If @var{root} and @var{remainder} are the same variable, the results are
+undefined.
+@end deftypefun
+
+@deftypefun void pow (MINT *@var{base}, MINT *@var{exp}, MINT *@var{mod}, MINT *@var{dest})
+Set @var{dest} to (@var{base} raised to @var{exp}) modulo @var{mod}.
+
+Note that the name @code{pow} clashes with @code{pow} from the standard C math
+library (@pxref{Exponents and Logarithms,, Exponentiation and Logarithms,
+libc, The GNU C Library Reference Manual}).  An application will only be able
+to use one or the other.
+@end deftypefun
+
+@deftypefun void rpow (MINT *@var{base}, signed short int @var{exp}, MINT *@var{dest})
+Set @var{dest} to @var{base} raised to @var{exp}.
+@end deftypefun
+
+@deftypefun void gcd (MINT *@var{op1}, MINT *@var{op2}, MINT *@var{res})
+Set @var{res} to the greatest common divisor of @var{op1} and @var{op2}.
+@end deftypefun
+
+@deftypefun int mcmp (MINT *@var{op1}, MINT *@var{op2})
+Compare @var{op1} and @var{op2}.  Return a positive value if @var{op1} >
+@var{op2}, zero if @var{op1} = @var{op2}, and a negative value if @var{op1} <
+@var{op2}.
+@end deftypefun
+
+@deftypefun void min (MINT *@var{dest})
+Input a decimal string from @code{stdin}, and put the read integer in
+@var{dest}.  SPC and TAB are allowed in the number string, and are ignored.
+@end deftypefun
+
+@deftypefun void mout (MINT *@var{src})
+Output @var{src} to @code{stdout}, as a decimal string.  Also output a newline.
+@end deftypefun
+
+@deftypefun {char *} mtox (MINT *@var{op})
+Convert @var{op} to a hexadecimal string, and return a pointer to the string.
+The returned string is allocated using the default memory allocation function,
+@code{malloc} by default.  It will be @code{strlen(str)+1} bytes, that being
+exactly enough for the string and null-terminator.
+@end deftypefun
+
+@deftypefun void mfree (MINT *@var{op})
+De-allocate, the space used by @var{op}.  @strong{This function should only be
+passed a value returned by @code{itom} or @code{xtom}.}
+@end deftypefun
+
+
+@node Custom Allocation, Language Bindings, BSD Compatible Functions, Top
+@comment  node-name,  next,  previous,  up
+@chapter Custom Allocation
+@cindex Custom allocation
+@cindex Memory allocation
+@cindex Allocation of memory
+
+By default GMP uses @code{malloc}, @code{realloc} and @code{free} for memory
+allocation, and if they fail GMP prints a message to the standard error output
+and terminates the program.
+
+Alternate functions can be specified, to allocate memory in a different way or
+to have a different error action on running out of memory.
+
+This feature is available in the Berkeley compatibility library (@pxref{BSD
+Compatible Functions}) as well as the main GMP library.
+
+@deftypefun void mp_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t, size_t), @* void (*@var{free_func_ptr}) (void *, size_t))
+Replace the current allocation functions from the arguments.  If an argument
+is @code{NULL}, the corresponding default function is used.
+
+These functions will be used for all memory allocation done by GMP, apart from
+temporary space from @code{alloca} if that function is available and GMP is
+configured to use it (@pxref{Build Options}).
+
+@strong{Be sure to call @code{mp_set_memory_functions} only when there are no
+active GMP objects allocated using the previous memory functions!  Usually
+that means calling it before any other GMP function.}
+@end deftypefun
+
+The functions supplied should fit the following declarations:
+
+@deftypevr Function {void *} allocate_function (size_t @var{alloc_size})
+Return a pointer to newly allocated space with at least @var{alloc_size}
+bytes.
+@end deftypevr
+
+@deftypevr Function {void *} reallocate_function (void *@var{ptr}, size_t @var{old_size}, size_t @var{new_size})
+Resize a previously allocated block @var{ptr} of @var{old_size} bytes to be
+@var{new_size} bytes.
+
+The block may be moved if necessary or if desired, and in that case the
+smaller of @var{old_size} and @var{new_size} bytes must be copied to the new
+location.  The return value is a pointer to the resized block, that being the
+new location if moved or just @var{ptr} if not.
+
+@var{ptr} is never @code{NULL}, it's always a previously allocated block.
+@var{new_size} may be bigger or smaller than @var{old_size}.
+@end deftypevr
+
+@deftypevr Function void free_function (void *@var{ptr}, size_t @var{size})
+De-allocate the space pointed to by @var{ptr}.
+
+@var{ptr} is never @code{NULL}, it's always a previously allocated block of
+@var{size} bytes.
+@end deftypevr
+
+A @dfn{byte} here means the unit used by the @code{sizeof} operator.
+
+The @var{reallocate_function} parameter @var{old_size} and the
+@var{free_function} parameter @var{size} are passed for convenience, but of
+course they can be ignored if not needed by an implementation.  The default
+functions using @code{malloc} and friends for instance don't use them.
+
+No error return is allowed from any of these functions, if they return then
+they must have performed the specified operation.  In particular note that
+@var{allocate_function} or @var{reallocate_function} mustn't return
+@code{NULL}.
+
+Getting a different fatal error action is a good use for custom allocation
+functions, for example giving a graphical dialog rather than the default print
+to @code{stderr}.  How much is possible when genuinely out of memory is
+another question though.
+
+There's currently no defined way for the allocation functions to recover from
+an error such as out of memory, they must terminate program execution.  A
+@code{longjmp} or throwing a C++ exception will have undefined results.  This
+may change in the future.
+
+GMP may use allocated blocks to hold pointers to other allocated blocks.  This
+will limit the assumptions a conservative garbage collection scheme can make.
+
+Since the default GMP allocation uses @code{malloc} and friends, those
+functions will be linked in even if the first thing a program does is an
+@code{mp_set_memory_functions}.  It's necessary to change the GMP sources if
+this is a problem.
+
+@sp 1
+@deftypefun void mp_get_memory_functions (@* void *(**@var{alloc_func_ptr}) (size_t), @* void *(**@var{realloc_func_ptr}) (void *, size_t, size_t), @* void (**@var{free_func_ptr}) (void *, size_t))
+Get the current allocation functions, storing function pointers to the
+locations given by the arguments.  If an argument is @code{NULL}, that
+function pointer is not stored.
+
+@need 1000
+For example, to get just the current free function,
+
+@example
+void (*freefunc) (void *, size_t);
+
+mp_get_memory_functions (NULL, NULL, &freefunc);
+@end example
+@end deftypefun
+
+@node Language Bindings, Algorithms, Custom Allocation, Top
+@chapter Language Bindings
+@cindex Language bindings
+@cindex Other languages
+
+The following packages and projects offer access to GMP from languages other
+than C, though perhaps with varying levels of functionality and efficiency.
+
+@c  @spaceuref{U} is the same as @uref{U}, but with a couple of extra spaces
+@c  in tex, just to separate the URL from the preceding text a bit.
+@iftex
+@macro spaceuref {U}
+@ @ @uref{\U\}
+@end macro
+@end iftex
+@ifnottex
+@macro spaceuref {U}
+@uref{\U\}
+@end macro
+@end ifnottex
+
+@sp 1
+@table @asis
+@item C++
+@itemize @bullet
+@item
+GMP C++ class interface, @pxref{C++ Class Interface} @* Straightforward
+interface, expression templates to eliminate temporaries.
+@item
+ALP @spaceuref{http://www-sop.inria.fr/saga/logiciels/ALP/} @* Linear algebra and
+polynomials using templates.
+@item
+Arithmos @spaceuref{http://cant.ua.ac.be/old/arithmos/} @* Rationals
+with infinities and square roots.
+@item
+CLN @spaceuref{http://www.ginac.de/CLN/} @* High level classes for arithmetic.
+@item
+LiDIA @spaceuref{http://www.cdc.informatik.tu-darmstadt.de/TI/LiDIA/} @* A C++
+library for computational number theory.
+@item
+Linbox @spaceuref{http://www.linalg.org/} @* Sparse vectors and matrices.
+@item
+NTL @spaceuref{http://www.shoup.net/ntl/} @* A C++ number theory library.
+@end itemize
+
+@c @item D
+@c @itemize @bullet
+@c @item
+@c gmp-d @spaceuref{http://home.comcast.net/~benhinkle/gmp-d/}
+@c @end itemize
+
+@item Eiffel
+@itemize @bullet
+@item
+Eiffelroom @spaceuref{http://www.eiffelroom.org/node/442}
+@end itemize
+
+@item Fortran
+@itemize @bullet
+@item
+Omni F77 @spaceuref{http://phase.hpcc.jp/Omni/home.html} @* Arbitrary
+precision floats.
+@end itemize
+
+@item Haskell
+@itemize @bullet
+@item
+Glasgow Haskell Compiler @spaceuref{http://www.haskell.org/ghc/}
+@end itemize
+
+@item Java
+@itemize @bullet
+@item
+Kaffe @spaceuref{http://www.kaffe.org/}
+@item
+Kissme @spaceuref{http://kissme.sourceforge.net/}
+@end itemize
+
+@item Lisp
+@itemize @bullet
+@item
+GNU Common Lisp @spaceuref{http://www.gnu.org/software/gcl/gcl.html}
+@item
+Librep @spaceuref{http://librep.sourceforge.net/}
+@item
+@c  FIXME: When there's a stable release with gmp support, just refer to it
+@c  rather than bothering to talk about betas.
+XEmacs (21.5.18 beta and up) @spaceuref{http://www.xemacs.org} @* Optional
+big integers, rationals and floats using GMP.
+@end itemize
+
+@item M4
+@itemize @bullet
+@item
+@c  FIXME: When there's a stable release with gmp support, just refer to it
+@c  rather than bothering to talk about betas.
+GNU m4 betas @spaceuref{http://www.seindal.dk/rene/gnu/} @* Optionally provides
+an arbitrary precision @code{mpeval}.
+@end itemize
+
+@item ML
+@itemize @bullet
+@item
+MLton compiler @spaceuref{http://mlton.org/}
+@end itemize
+
+@item Objective Caml
+@itemize @bullet
+@item
+MLGMP @spaceuref{http://www.di.ens.fr/~monniaux/programmes.html.en}
+@item
+Numerix @spaceuref{http://pauillac.inria.fr/~quercia/} @* Optionally using
+GMP.
+@end itemize
+
+@item Oz
+@itemize @bullet
+@item
+Mozart @spaceuref{http://www.mozart-oz.org/}
+@end itemize
+
+@item Pascal
+@itemize @bullet
+@item
+GNU Pascal Compiler @spaceuref{http://www.gnu-pascal.de/} @* GMP unit.
+@item
+Numerix @spaceuref{http://pauillac.inria.fr/~quercia/} @* For Free Pascal,
+optionally using GMP.
+@end itemize
+
+@item Perl
+@itemize @bullet
+@item
+GMP module, see @file{demos/perl} in the GMP sources (@pxref{Demonstration
+Programs}).
+@item
+Math::GMP @spaceuref{http://www.cpan.org/} @* Compatible with Math::BigInt, but
+not as many functions as the GMP module above.
+@item
+Math::BigInt::GMP @spaceuref{http://www.cpan.org/} @* Plug Math::GMP into
+normal Math::BigInt operations.
+@end itemize
+
+@need 1000
+@item Pike
+@itemize @bullet
+@item
+mpz module in the standard distribution, @uref{http://pike.ida.liu.se/}
+@end itemize
+
+@need 500
+@item Prolog
+@itemize @bullet
+@item
+SWI Prolog @spaceuref{http://www.swi-prolog.org/} @*
+Arbitrary precision floats.
+@end itemize
+
+@item Python
+@itemize @bullet
+@item
+GMPY @uref{http://code.google.com/p/gmpy/}
+@end itemize
+
+@item Ruby
+@itemize @bullet
+@item
+http://rubygems.org/gems/gmp
+@end itemize
+
+@item Scheme
+@itemize @bullet
+@item
+GNU Guile (upcoming 1.8) @spaceuref{http://www.gnu.org/software/guile/guile.html}
+@item
+RScheme @spaceuref{http://www.rscheme.org/}
+@item
+STklos @spaceuref{http://www.stklos.org/}
+@c
+@c  For reference, MzScheme uses some of gmp, but (as of version 205) it only
+@c  has copies of some of the generic C code, and we don't consider that a
+@c  language binding to gmp.
+@c
+@end itemize
+
+@item Smalltalk
+@itemize @bullet
+@item
+GNU Smalltalk @spaceuref{http://www.smalltalk.org/versions/GNUSmalltalk.html}
+@end itemize
+
+@item Other
+@itemize @bullet
+@item
+Axiom @uref{http://savannah.nongnu.org/projects/axiom} @* Computer algebra
+using GCL.
+@item
+DrGenius @spaceuref{http://drgenius.seul.org/} @* Geometry system and
+mathematical programming language.
+@item
+GiNaC @spaceuref{http://www.ginac.de/} @* C++ computer algebra using CLN.
+@item
+GOO @spaceuref{http://www.googoogaga.org/} @* Dynamic object oriented
+language.
+@item
+Maxima @uref{http://www.ma.utexas.edu/users/wfs/maxima.html} @* Macsyma
+computer algebra using GCL.
+@item
+Q @spaceuref{http://q-lang.sourceforge.net/} @* Equational programming system.
+@item
+Regina @spaceuref{http://regina.sourceforge.net/} @* Topological calculator.
+@item
+Yacas @spaceuref{yacas.sourceforge.net} @* Yet another computer algebra system.
+@end itemize
+
+@end table
+
+
+@node Algorithms, Internals, Language Bindings, Top
+@chapter Algorithms
+@cindex Algorithms
+
+This chapter is an introduction to some of the algorithms used for various GMP
+operations.  The code is likely to be hard to understand without knowing
+something about the algorithms.
+
+Some GMP internals are mentioned, but applications that expect to be
+compatible with future GMP releases should take care to use only the
+documented functions.
+
+@menu
+* Multiplication Algorithms::
+* Division Algorithms::
+* Greatest Common Divisor Algorithms::
+* Powering Algorithms::
+* Root Extraction Algorithms::
+* Radix Conversion Algorithms::
+* Other Algorithms::
+* Assembly Coding::
+@end menu
+
+
+@node Multiplication Algorithms, Division Algorithms, Algorithms, Algorithms
+@section Multiplication
+@cindex Multiplication algorithms
+
+N@cross{}N limb multiplications and squares are done using one of seven
+algorithms, as the size N increases.
+
+@quotation
+@multitable {KaratsubaMMM} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item Algorithm @tab Threshold
+@item Basecase  @tab (none)
+@item Karatsuba @tab @code{MUL_TOOM22_THRESHOLD}
+@item Toom-3    @tab @code{MUL_TOOM33_THRESHOLD}
+@item Toom-4    @tab @code{MUL_TOOM44_THRESHOLD}
+@item Toom-6.5  @tab @code{MUL_TOOM6H_THRESHOLD}
+@item Toom-8.5  @tab @code{MUL_TOOM8H_THRESHOLD}
+@item FFT       @tab @code{MUL_FFT_THRESHOLD}
+@end multitable
+@end quotation
+
+Similarly for squaring, with the @code{SQR} thresholds.
+
+N@cross{}M multiplications of operands with different sizes above
+@code{MUL_TOOM22_THRESHOLD} are currently done by special Toom-inspired
+algorithms or directly with FFT, depending on operand size (@pxref{Unbalanced
+Multiplication}).
+
+@menu
+* Basecase Multiplication::
+* Karatsuba Multiplication::
+* Toom 3-Way Multiplication::
+* Toom 4-Way Multiplication::
+* Higher degree Toom'n'half::
+* FFT Multiplication::
+* Other Multiplication::
+* Unbalanced Multiplication::
+@end menu
+
+
+@node Basecase Multiplication, Karatsuba Multiplication, Multiplication Algorithms, Multiplication Algorithms
+@subsection Basecase Multiplication
+
+Basecase N@cross{}M multiplication is a straightforward rectangular set of
+cross-products, the same as long multiplication done by hand and for that
+reason sometimes known as the schoolbook or grammar school method.  This is an
+@m{O(NM),O(N*M)} algorithm.  See Knuth section 4.3.1 algorithm M
+(@pxref{References}), and the @file{mpn/generic/mul_basecase.c} code.
+
+Assembly implementations of @code{mpn_mul_basecase} are essentially the same
+as the generic C code, but have all the usual assembly tricks and
+obscurities introduced for speed.
+
+A square can be done in roughly half the time of a multiply, by using the fact
+that the cross products above and below the diagonal are the same.  A triangle
+of products below the diagonal is formed, doubled (left shift by one bit), and
+then the products on the diagonal added.  This can be seen in
+@file{mpn/generic/sqr_basecase.c}.  Again the assembly implementations take
+essentially the same approach.
+
+@tex
+\def\GMPline#1#2#3#4#5#6{%
+  \hbox {%
+    \vrule height 2.5ex depth 1ex
+           \hbox to 2em {\hfil{#2}\hfil}%
+    \vrule \hbox to 2em {\hfil{#3}\hfil}%
+    \vrule \hbox to 2em {\hfil{#4}\hfil}%
+    \vrule \hbox to 2em {\hfil{#5}\hfil}%
+    \vrule \hbox to 2em {\hfil{#6}\hfil}%
+    \vrule}}
+\GMPdisplay{
+  \hbox{%
+    \vbox{%
+      \hbox to 1.5em {\vrule height 2.5ex depth 1ex width 0pt}%
+      \hbox {\vrule height 2.5ex depth 1ex width 0pt u0\hfil}%
+      \hbox {\vrule height 2.5ex depth 1ex width 0pt u1\hfil}%
+      \hbox {\vrule height 2.5ex depth 1ex width 0pt u2\hfil}%
+      \hbox {\vrule height 2.5ex depth 1ex width 0pt u3\hfil}%
+      \hbox {\vrule height 2.5ex depth 1ex width 0pt u4\hfil}%
+      \vfill}%
+    \vbox{%
+      \hbox{%
+        \hbox to 2em {\hfil u0\hfil}%
+        \hbox to 2em {\hfil u1\hfil}%
+        \hbox to 2em {\hfil u2\hfil}%
+        \hbox to 2em {\hfil u3\hfil}%
+        \hbox to 2em {\hfil u4\hfil}}%
+      \vskip 0.7ex
+      \hrule
+      \GMPline{u0}{d}{}{}{}{}%
+      \hrule
+      \GMPline{u1}{}{d}{}{}{}%
+      \hrule
+      \GMPline{u2}{}{}{d}{}{}%
+      \hrule
+      \GMPline{u3}{}{}{}{d}{}%
+      \hrule
+      \GMPline{u4}{}{}{}{}{d}%
+      \hrule}}}
+@end tex
+@ifnottex
+@example
+@group
+     u0  u1  u2  u3  u4
+   +---+---+---+---+---+
+u0 | d |   |   |   |   |
+   +---+---+---+---+---+
+u1 |   | d |   |   |   |
+   +---+---+---+---+---+
+u2 |   |   | d |   |   |
+   +---+---+---+---+---+
+u3 |   |   |   | d |   |
+   +---+---+---+---+---+
+u4 |   |   |   |   | d |
+   +---+---+---+---+---+
+@end group
+@end example
+@end ifnottex
+
+In practice squaring isn't a full 2@cross{} faster than multiplying, it's
+usually around 1.5@cross{}.  Less than 1.5@cross{} probably indicates
+@code{mpn_sqr_basecase} wants improving on that CPU.
+
+On some CPUs @code{mpn_mul_basecase} can be faster than the generic C
+@code{mpn_sqr_basecase} on some small sizes.  @code{SQR_BASECASE_THRESHOLD} is
+the size at which to use @code{mpn_sqr_basecase}, this will be zero if that
+routine should be used always.
+
+
+@node Karatsuba Multiplication, Toom 3-Way Multiplication, Basecase Multiplication, Multiplication Algorithms
+@subsection Karatsuba Multiplication
+@cindex Karatsuba multiplication
+
+The Karatsuba multiplication algorithm is described in Knuth section 4.3.3
+part A, and various other textbooks.  A brief description is given here.
+
+The inputs @math{x} and @math{y} are treated as each split into two parts of
+equal length (or the most significant part one limb shorter if N is odd).
+
+@tex
+% GMPboxwidth used for all the multiplication pictures
+\global\newdimen\GMPboxwidth \global\GMPboxwidth=5em
+% GMPboxdepth and GMPboxheight are also used for the float pictures
+\global\newdimen\GMPboxdepth  \global\GMPboxdepth=1ex
+\global\newdimen\GMPboxheight \global\GMPboxheight=2ex
+\gdef\GMPvrule{\vrule height \GMPboxheight depth \GMPboxdepth}
+\def\GMPbox#1#2{%
+  \vbox {%
+    \hrule
+    \hbox to 2\GMPboxwidth{%
+      \GMPvrule \hfil $#1$\hfil \vrule \hfil $#2$\hfil \vrule}%
+    \hrule}}
+\GMPdisplay{%
+\vbox{%
+  \hbox to 2\GMPboxwidth {high \hfil low}
+  \vskip 0.7ex
+  \GMPbox{x_1}{x_0}
+  \vskip 0.5ex
+  \GMPbox{y_1}{y_0}
+}}
+@end tex
+@ifnottex
+@example
+@group
+ high              low
++----------+----------+
+|    x1    |    x0    |
++----------+----------+
+
++----------+----------+
+|    y1    |    y0    |
++----------+----------+
+@end group
+@end example
+@end ifnottex
+
+Let @math{b} be the power of 2 where the split occurs, i.e.@: if @ms{x,0} is
+@math{k} limbs (@ms{y,0} the same) then
+@m{b=2\GMPraise{$k*$@code{mp\_bits\_per\_limb}}, b=2^(k*mp_bits_per_limb)}.
+With that @m{x=x_1b+x_0,x=x1*b+x0} and @m{y=y_1b+y_0,y=y1*b+y0}, and the
+following holds,
+
+@display
+@m{xy = (b^2+b)x_1y_1 - b(x_1-x_0)(y_1-y_0) + (b+1)x_0y_0,
+  x*y = (b^2+b)*x1*y1 - b*(x1-x0)*(y1-y0) + (b+1)*x0*y0}
+@end display
+
+This formula means doing only three multiplies of (N/2)@cross{}(N/2) limbs,
+whereas a basecase multiply of N@cross{}N limbs is equivalent to four
+multiplies of (N/2)@cross{}(N/2).  The factors @math{(b^2+b)} etc represent
+the positions where the three products must be added.
+
+@tex
+\def\GMPboxA#1#2{%
+  \vbox{%
+    \hrule
+    \hbox{%
+      \GMPvrule
+      \hbox to 2\GMPboxwidth {\hfil\hbox{$#1$}\hfil}%
+      \vrule
+      \hbox to 2\GMPboxwidth {\hfil\hbox{$#2$}\hfil}%
+      \vrule}
+    \hrule}}
+\def\GMPboxB#1#2{%
+  \hbox{%
+    \raise \GMPboxdepth \hbox to \GMPboxwidth {\hfil #1\hskip 0.5em}%
+    \vbox{%
+      \hrule
+      \hbox{%
+        \GMPvrule
+        \hbox to 2\GMPboxwidth {\hfil\hbox{$#2$}\hfil}%
+        \vrule}%
+      \hrule}}}
+\GMPdisplay{%
+\vbox{%
+  \hbox to 4\GMPboxwidth {high \hfil low}
+  \vskip 0.7ex
+  \GMPboxA{x_1y_1}{x_0y_0}
+  \vskip 0.5ex
+  \GMPboxB{$+$}{x_1y_1}
+  \vskip 0.5ex
+  \GMPboxB{$+$}{x_0y_0}
+  \vskip 0.5ex
+  \GMPboxB{$-$}{(x_1-x_0)(y_1-y_0)}
+}}
+@end tex
+@ifnottex
+@example
+@group
+ high                              low
++--------+--------+ +--------+--------+
+|      x1*y1      | |      x0*y0      |
++--------+--------+ +--------+--------+
+          +--------+--------+
+      add |      x1*y1      |
+          +--------+--------+
+          +--------+--------+
+      add |      x0*y0      |
+          +--------+--------+
+          +--------+--------+
+      sub | (x1-x0)*(y1-y0) |
+          +--------+--------+
+@end group
+@end example
+@end ifnottex
+
+The term @m{(x_1-x_0)(y_1-y_0),(x1-x0)*(y1-y0)} is best calculated as an
+absolute value, and the sign used to choose to add or subtract.  Notice the
+sum @m{\mathop{\rm high}(x_0y_0)+\mathop{\rm low}(x_1y_1),
+high(x0*y0)+low(x1*y1)} occurs twice, so it's possible to do @m{5k,5*k} limb
+additions, rather than @m{6k,6*k}, but in GMP extra function call overheads
+outweigh the saving.
+
+Squaring is similar to multiplying, but with @math{x=y} the formula reduces to
+an equivalent with three squares,
+
+@display
+@m{x^2 = (b^2+b)x_1^2 - b(x_1-x_0)^2 + (b+1)x_0^2,
+   x^2 = (b^2+b)*x1^2 - b*(x1-x0)^2 + (b+1)*x0^2}
+@end display
+
+The final result is accumulated from those three squares the same way as for
+the three multiplies above.  The middle term @m{(x_1-x_0)^2,(x1-x0)^2} is now
+always positive.
+
+A similar formula for both multiplying and squaring can be constructed with a
+middle term @m{(x_1+x_0)(y_1+y_0),(x1+x0)*(y1+y0)}.  But those sums can exceed
+@math{k} limbs, leading to more carry handling and additions than the form
+above.
+
+Karatsuba multiplication is asymptotically an @math{O(N^@W{1.585})} algorithm,
+the exponent being @m{\log3/\log2,log(3)/log(2)}, representing 3 multiplies
+each @math{1/2} the size of the inputs.  This is a big improvement over the
+basecase multiply at @math{O(N^2)} and the advantage soon overcomes the extra
+additions Karatsuba performs.  @code{MUL_TOOM22_THRESHOLD} can be as little
+as 10 limbs.  The @code{SQR} threshold is usually about twice the @code{MUL}.
+
+The basecase algorithm will take a time of the form @m{M(N) = aN^2 + bN + c,
+M(N) = a*N^2 + b*N + c} and the Karatsuba algorithm @m{K(N) = 3M(N/2) + dN +
+e, K(N) = 3*M(N/2) + d*N + e}, which expands to @m{K(N) = {3\over4} aN^2 +
+{3\over2} bN + 3c + dN + e, K(N) = 3/4*a*N^2 + 3/2*b*N + 3*c + d*N + e}.  The
+factor @m{3\over4, 3/4} for @math{a} means per-crossproduct speedups in the
+basecase code will increase the threshold since they benefit @math{M(N)} more
+than @math{K(N)}.  And conversely the @m{3\over2, 3/2} for @math{b} means
+linear style speedups of @math{b} will increase the threshold since they
+benefit @math{K(N)} more than @math{M(N)}.  The latter can be seen for
+instance when adding an optimized @code{mpn_sqr_diagonal} to
+@code{mpn_sqr_basecase}.  Of course all speedups reduce total time, and in
+that sense the algorithm thresholds are merely of academic interest.
+
+
+@node Toom 3-Way Multiplication, Toom 4-Way Multiplication, Karatsuba Multiplication, Multiplication Algorithms
+@subsection Toom 3-Way Multiplication
+@cindex Toom multiplication
+
+The Karatsuba formula is the simplest case of a general approach to splitting
+inputs that leads to both Toom and FFT algorithms.  A description of
+Toom can be found in Knuth section 4.3.3, with an example 3-way
+calculation after Theorem A@.  The 3-way form used in GMP is described here.
+
+The operands are each considered split into 3 pieces of equal length (or the
+most significant part 1 or 2 limbs shorter than the other two).
+
+@tex
+\def\GMPbox#1#2#3{%
+  \vbox{%
+    \hrule \vfil
+    \hbox to 3\GMPboxwidth {%
+      \GMPvrule
+      \hfil$#1$\hfil
+      \vrule
+      \hfil$#2$\hfil
+      \vrule
+      \hfil$#3$\hfil
+      \vrule}%
+    \vfil \hrule
+}}
+\GMPdisplay{%
+\vbox{%
+  \hbox to 3\GMPboxwidth {high \hfil low}
+  \vskip 0.7ex
+  \GMPbox{x_2}{x_1}{x_0}
+  \vskip 0.5ex
+  \GMPbox{y_2}{y_1}{y_0}
+  \vskip 0.5ex
+}}
+@end tex
+@ifnottex
+@example
+@group
+ high                         low
++----------+----------+----------+
+|    x2    |    x1    |    x0    |
++----------+----------+----------+
+
++----------+----------+----------+
+|    y2    |    y1    |    y0    |
++----------+----------+----------+
+@end group
+@end example
+@end ifnottex
+
+@noindent
+These parts are treated as the coefficients of two polynomials
+
+@display
+@group
+@m{X(t) = x_2t^2 + x_1t + x_0,
+   X(t) = x2*t^2 + x1*t + x0}
+@m{Y(t) = y_2t^2 + y_1t + y_0,
+   Y(t) = y2*t^2 + y1*t + y0}
+@end group
+@end display
+
+Let @math{b} equal the power of 2 which is the size of the @ms{x,0}, @ms{x,1},
+@ms{y,0} and @ms{y,1} pieces, i.e.@: if they're @math{k} limbs each then
+@m{b=2\GMPraise{$k*$@code{mp\_bits\_per\_limb}}, b=2^(k*mp_bits_per_limb)}.
+With this @math{x=X(b)} and @math{y=Y(b)}.
+
+Let a polynomial @m{W(t)=X(t)Y(t),W(t)=X(t)*Y(t)} and suppose its coefficients
+are
+
+@display
+@m{W(t) = w_4t^4 + w_3t^3 + w_2t^2 + w_1t + w_0,
+   W(t) = w4*t^4 + w3*t^3 + w2*t^2 + w1*t + w0}
+@end display
+
+The @m{w_i,w[i]} are going to be determined, and when they are they'll give
+the final result using @math{w=W(b)}, since
+@m{xy=X(b)Y(b),x*y=X(b)*Y(b)=W(b)}.  The coefficients will be roughly
+@math{b^2} each, and the final @math{W(b)} will be an addition like,
+
+@tex
+\def\GMPbox#1#2{%
+  \moveright #1\GMPboxwidth
+  \vbox{%
+    \hrule
+    \hbox{%
+      \GMPvrule
+      \hbox to 2\GMPboxwidth {\hfil$#2$\hfil}%
+      \vrule}%
+    \hrule
+}}
+\GMPdisplay{%
+\vbox{%
+  \hbox to 6\GMPboxwidth {high \hfil low}%
+  \vskip 0.7ex
+  \GMPbox{0}{w_4}
+  \vskip 0.5ex
+  \GMPbox{1}{w_3}
+  \vskip 0.5ex
+  \GMPbox{2}{w_2}
+  \vskip 0.5ex
+  \GMPbox{3}{w_1}
+  \vskip 0.5ex
+  \GMPbox{4}{w_0}
+}}
+@end tex
+@ifnottex
+@example
+@group
+ high                                        low
++-------+-------+
+|       w4      |
++-------+-------+
+       +--------+-------+
+       |        w3      |
+       +--------+-------+
+               +--------+-------+
+               |        w2      |
+               +--------+-------+
+                       +--------+-------+
+                       |        w1      |
+                       +--------+-------+
+                                +-------+-------+
+                                |       w0      |
+                                +-------+-------+
+@end group
+@end example
+@end ifnottex
+
+The @m{w_i,w[i]} coefficients could be formed by a simple set of cross
+products, like @m{w_4=x_2y_2,w4=x2*y2}, @m{w_3=x_2y_1+x_1y_2,w3=x2*y1+x1*y2},
+@m{w_2=x_2y_0+x_1y_1+x_0y_2,w2=x2*y0+x1*y1+x0*y2} etc, but this would need all
+nine @m{x_iy_j,x[i]*y[j]} for @math{i,j=0,1,2}, and would be equivalent merely
+to a basecase multiply.  Instead the following approach is used.
+
+@math{X(t)} and @math{Y(t)} are evaluated and multiplied at 5 points, giving
+values of @math{W(t)} at those points.  In GMP the following points are used,
+
+@quotation
+@multitable {@m{t=\infty,t=inf}M} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item Point                 @tab Value
+@item @math{t=0}            @tab @m{x_0y_0,x0 * y0}, which gives @ms{w,0} immediately
+@item @math{t=1}            @tab @m{(x_2+x_1+x_0)(y_2+y_1+y_0),(x2+x1+x0) * (y2+y1+y0)}
+@item @math{t=-1}           @tab @m{(x_2-x_1+x_0)(y_2-y_1+y_0),(x2-x1+x0) * (y2-y1+y0)}
+@item @math{t=2}            @tab @m{(4x_2+2x_1+x_0)(4y_2+2y_1+y_0),(4*x2+2*x1+x0) * (4*y2+2*y1+y0)}
+@item @m{t=\infty,t=inf}    @tab @m{x_2y_2,x2 * y2}, which gives @ms{w,4} immediately
+@end multitable
+@end quotation
+
+At @math{t=-1} the values can be negative and that's handled using the
+absolute values and tracking the sign separately.  At @m{t=\infty,t=inf} the
+value is actually @m{\lim_{t\to\infty} {X(t)Y(t)\over t^4}, X(t)*Y(t)/t^4 in
+the limit as t approaches infinity}, but it's much easier to think of as
+simply @m{x_2y_2,x2*y2} giving @ms{w,4} immediately (much like
+@m{x_0y_0,x0*y0} at @math{t=0} gives @ms{w,0} immediately).
+
+Each of the points substituted into
+@m{W(t)=w_4t^4+\cdots+w_0,W(t)=w4*t^4+@dots{}+w0} gives a linear combination
+of the @m{w_i,w[i]} coefficients, and the value of those combinations has just
+been calculated.
+
+@tex
+\GMPdisplay{%
+$\matrix{%
+W(0)      & = &       &   &      &   &      &   &      &   & w_0 \cr
+W(1)      & = &   w_4 & + &  w_3 & + &  w_2 & + &  w_1 & + & w_0 \cr
+W(-1)     & = &   w_4 & - &  w_3 & + &  w_2 & - &  w_1 & + & w_0 \cr
+W(2)      & = & 16w_4 & + & 8w_3 & + & 4w_2 & + & 2w_1 & + & w_0 \cr
+W(\infty) & = &   w_4 \cr
+}$}
+@end tex
+@ifnottex
+@example
+@group
+W(0)   =                              w0
+W(1)   =    w4 +   w3 +   w2 +   w1 + w0
+W(-1)  =    w4 -   w3 +   w2 -   w1 + w0
+W(2)   = 16*w4 + 8*w3 + 4*w2 + 2*w1 + w0
+W(inf) =    w4
+@end group
+@end example
+@end ifnottex
+
+This is a set of five equations in five unknowns, and some elementary linear
+algebra quickly isolates each @m{w_i,w[i]}.  This involves adding or
+subtracting one @math{W(t)} value from another, and a couple of divisions by
+powers of 2 and one division by 3, the latter using the special
+@code{mpn_divexact_by3} (@pxref{Exact Division}).
+
+The conversion of @math{W(t)} values to the coefficients is interpolation.  A
+polynomial of degree 4 like @math{W(t)} is uniquely determined by values known
+at 5 different points.  The points are arbitrary and can be chosen to make the
+linear equations come out with a convenient set of steps for quickly isolating
+the @m{w_i,w[i]}.
+
+Squaring follows the same procedure as multiplication, but there's only one
+@math{X(t)} and it's evaluated at the 5 points, and those values squared to
+give values of @math{W(t)}.  The interpolation is then identical, and in fact
+the same @code{toom_interpolate_5pts} subroutine is used for both squaring and
+multiplying.
+
+Toom-3 is asymptotically @math{O(N^@W{1.465})}, the exponent being
+@m{\log5/\log3,log(5)/log(3)}, representing 5 recursive multiplies of 1/3 the
+original size each.  This is an improvement over Karatsuba at
+@math{O(N^@W{1.585})}, though Toom does more work in the evaluation and
+interpolation and so it only realizes its advantage above a certain size.
+
+Near the crossover between Toom-3 and Karatsuba there's generally a range of
+sizes where the difference between the two is small.
+@code{MUL_TOOM33_THRESHOLD} is a somewhat arbitrary point in that range and
+successive runs of the tune program can give different values due to small
+variations in measuring.  A graph of time versus size for the two shows the
+effect, see @file{tune/README}.
+
+At the fairly small sizes where the Toom-3 thresholds occur it's worth
+remembering that the asymptotic behaviour for Karatsuba and Toom-3 can't be
+expected to make accurate predictions, due of course to the big influence of
+all sorts of overheads, and the fact that only a few recursions of each are
+being performed.  Even at large sizes there's a good chance machine dependent
+effects like cache architecture will mean actual performance deviates from
+what might be predicted.
+
+The formula given for the Karatsuba algorithm (@pxref{Karatsuba
+Multiplication}) has an equivalent for Toom-3 involving only five multiplies,
+but this would be complicated and unenlightening.
+
+An alternate view of Toom-3 can be found in Zuras (@pxref{References}), using
+a vector to represent the @math{x} and @math{y} splits and a matrix
+multiplication for the evaluation and interpolation stages.  The matrix
+inverses are not meant to be actually used, and they have elements with values
+much greater than in fact arise in the interpolation steps.  The diagram shown
+for the 3-way is attractive, but again doesn't have to be implemented that way
+and for example with a bit of rearrangement just one division by 6 can be
+done.
+
+
+@node Toom 4-Way Multiplication, Higher degree Toom'n'half, Toom 3-Way Multiplication, Multiplication Algorithms
+@subsection Toom 4-Way Multiplication
+@cindex Toom multiplication
+
+Karatsuba and Toom-3 split the operands into 2 and 3 coefficients,
+respectively.  Toom-4 analogously splits the operands into 4 coefficients.
+Using the notation from the section on Toom-3 multiplication, we form two
+polynomials:
+
+@display
+@group
+@m{X(t) = x_3t^3 + x_2t^2 + x_1t + x_0,
+   X(t) = x3*t^3 + x2*t^2 + x1*t + x0}
+@m{Y(t) = y_3t^3 + y_2t^2 + y_1t + y_0,
+   Y(t) = y3*t^3 + y2*t^2 + y1*t + y0}
+@end group
+@end display
+
+@math{X(t)} and @math{Y(t)} are evaluated and multiplied at 7 points, giving
+values of @math{W(t)} at those points.  In GMP the following points are used,
+
+@quotation
+@multitable {@m{t=-1/2,t=inf}M} {MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM}
+@item Point              @tab Value
+@item @math{t=0}         @tab @m{x_0y_0,x0 * y0}, which gives @ms{w,0} immediately
+@item @math{t=1/2}       @tab @m{(x_3+2x_2+4x_1+8x_0)(y_3+2y_2+4y_1+8y_0),(x3+2*x2+4*x1+8*x0) * (y3+2*y2+4*y1+8*y0)}
+@item @math{t=-1/2}      @tab @m{(-x_3+2x_2-4x_1+8x_0)(-y_3+2y_2-4y_1+8y_0),(-x3+2*x2-4*x1+8*x0) * (-y3+2*y2-4*y1+8*y0)}
+@item @math{t=1}         @tab @m{(x_3+x_2+x_1+x_0)(y_3+y_2+y_1+y_0),(x3+x2+x1+x0) * (y3+y2+y1+y0)}
+@item @math{t=-1}        @tab @m{(-x_3+x_2-x_1+x_0)(-y_3+y_2-y_1+y_0),(-x3+x2-x1+x0) * (-y3+y2-y1+y0)}
+@item @math{t=2}         @tab @m{(8x_3+4x_2+2x_1+x_0)(8y_3+4y_2+2y_1+y_0),(8*x3+4*x2+2*x1+x0) * (8*y3+4*y2+2*y1+y0)}
+@item @m{t=\infty,t=inf} @tab @m{x_3y_3,x3 * y3}, which gives @ms{w,6} immediately
+@end multitable
+@end quotation
+
+The number of additions and subtractions for Toom-4 is much larger than for Toom-3.
+But several subexpressions occur multiple times, for example @m{x_2+x_0,x2+x0}, occurs
+for both @math{t=1} and @math{t=-1}.
+
+Toom-4 is asymptotically @math{O(N^@W{1.404})}, the exponent being
+@m{\log7/\log4,log(7)/log(4)}, representing 7 recursive multiplies of 1/4 the
+original size each.
+
+
+@node Higher degree Toom'n'half, FFT Multiplication, Toom 4-Way Multiplication, Multiplication Algorithms
+@subsection Higher degree Toom'n'half
+@cindex Toom multiplication
+
+The Toom algorithms described above (@pxref{Toom 3-Way Multiplication},
+@pxref{Toom 4-Way Multiplication}) generalizes to split into an arbitrary
+number of pieces. In general a split of two equally long operands into
+@math{r} pieces leads to evaluations and pointwise multiplications done at
+@m{2r-1,2*r-1} points. To fully exploit symmetries it would be better to have
+a multiple of 4 points, that's why for higher degree Toom'n'half is used.
+
+Toom'n'half means that the existence of one more piece is considered for a
+single operand. It can be virtual, i.e. zero, or real, when the two operand
+are not exactly balanced. By chosing an even @math{r},
+Toom-@m{r{1\over2},r+1/2} requires @math{2r} points, a multiple of four.
+
+The four-plets of points inlcude 0, @m{\infty,inf}, +1, -1 and
+@m{\pm2^i,+-2^i}, @m{\pm2^{-i},+-2^-i} . Each of them giving shortcuts for the
+evaluation phase and for some steps in the interpolation phase. Further tricks
+are used to reduce the memory footprint of the whole multiplication algorithm
+to a memory buffer equanl in size to the result of the product.
+
+Current GMP uses both Toom-6'n'half and Toom-8'n'half.
+
+
+@node FFT Multiplication, Other Multiplication, Higher degree Toom'n'half, Multiplication Algorithms
+@subsection FFT Multiplication
+@cindex FFT multiplication
+@cindex Fast Fourier Transform
+
+At large to very large sizes a Fermat style FFT multiplication is used,
+following Sch@"onhage and Strassen (@pxref{References}).  Descriptions of FFTs
+in various forms can be found in many textbooks, for instance Knuth section
+4.3.3 part C or Lipson chapter IX@.  A brief description of the form used in
+GMP is given here.
+
+The multiplication done is @m{xy \bmod 2^N+1, x*y mod 2^N+1}, for a given
+@math{N}.  A full product @m{xy,x*y} is obtained by choosing @m{N \ge
+\mathop{\rm bits}(x)+\mathop{\rm bits}(y), N>=bits(x)+bits(y)} and padding
+@math{x} and @math{y} with high zero limbs.  The modular product is the native
+form for the algorithm, so padding to get a full product is unavoidable.
+
+The algorithm follows a split, evaluate, pointwise multiply, interpolate and
+combine similar to that described above for Karatsuba and Toom-3.  A @math{k}
+parameter controls the split, with an FFT-@math{k} splitting into @math{2^k}
+pieces of @math{M=N/2^k} bits each.  @math{N} must be a multiple of
+@m{2^k\times@code{mp\_bits\_per\_limb}, (2^k)*@nicode{mp_bits_per_limb}} so
+the split falls on limb boundaries, avoiding bit shifts in the split and
+combine stages.
+
+The evaluations, pointwise multiplications, and interpolation, are all done
+modulo @m{2^{N'}+1, 2^N'+1} where @math{N'} is @math{2M+k+3} rounded up to a
+multiple of @math{2^k} and of @code{mp_bits_per_limb}.  The results of
+interpolation will be the following negacyclic convolution of the input
+pieces, and the choice of @math{N'} ensures these sums aren't truncated.
+@tex
+$$ w_n = \sum_{{i+j = b2^k+n}\atop{b=0,1}} (-1)^b x_i y_j $$
+@end tex
+@ifnottex
+
+@example
+           ---
+           \         b
+w[n] =     /     (-1) * x[i] * y[j]
+           ---
+       i+j==b*2^k+n
+          b=0,1
+@end example
+
+@end ifnottex
+The points used for the evaluation are @math{g^i} for @math{i=0} to
+@math{2^k-1} where @m{g=2^{2N'/2^k}, g=2^(2N'/2^k)}.  @math{g} is a
+@m{2^k,2^k'}th root of unity mod @m{2^{N'}+1,2^N'+1}, which produces necessary
+cancellations at the interpolation stage, and it's also a power of 2 so the
+fast Fourier transforms used for the evaluation and interpolation do only
+shifts, adds and negations.
+
+The pointwise multiplications are done modulo @m{2^{N'}+1, 2^N'+1} and either
+recurse into a further FFT or use a plain multiplication (Toom-3, Karatsuba or
+basecase), whichever is optimal at the size @math{N'}.  The interpolation is
+an inverse fast Fourier transform.  The resulting set of sums of @m{x_iy_j,
+x[i]*y[j]} are added at appropriate offsets to give the final result.
+
+Squaring is the same, but @math{x} is the only input so it's one transform at
+the evaluate stage and the pointwise multiplies are squares.  The
+interpolation is the same.
+
+For a mod @math{2^N+1} product, an FFT-@math{k} is an @m{O(N^{k/(k-1)}),
+O(N^(k/(k-1)))} algorithm, the exponent representing @math{2^k} recursed
+modular multiplies each @m{1/2^{k-1},1/2^(k-1)} the size of the original.
+Each successive @math{k} is an asymptotic improvement, but overheads mean each
+is only faster at bigger and bigger sizes.  In the code, @code{MUL_FFT_TABLE}
+and @code{SQR_FFT_TABLE} are the thresholds where each @math{k} is used.  Each
+new @math{k} effectively swaps some multiplying for some shifts, adds and
+overheads.
+
+A mod @math{2^N+1} product can be formed with a normal
+@math{N@cross{}N@rightarrow{}2N} bit multiply plus a subtraction, so an FFT
+and Toom-3 etc can be compared directly.  A @math{k=4} FFT at
+@math{O(N^@W{1.333})} can be expected to be the first faster than Toom-3 at
+@math{O(N^@W{1.465})}.  In practice this is what's found, with
+@code{MUL_FFT_MODF_THRESHOLD} and @code{SQR_FFT_MODF_THRESHOLD} being between
+300 and 1000 limbs, depending on the CPU@.  So far it's been found that only
+very large FFTs recurse into pointwise multiplies above these sizes.
+
+When an FFT is to give a full product, the change of @math{N} to @math{2N}
+doesn't alter the theoretical complexity for a given @math{k}, but for the
+purposes of considering where an FFT might be first used it can be assumed
+that the FFT is recursing into a normal multiply and that on that basis it's
+doing @math{2^k} recursed multiplies each @m{1/2^{k-2},1/2^(k-2)} the size of
+the inputs, making it @m{O(N^{k/(k-2)}), O(N^(k/(k-2)))}.  This would mean
+@math{k=7} at @math{O(N^@W{1.4})} would be the first FFT faster than Toom-3.
+In practice @code{MUL_FFT_THRESHOLD} and @code{SQR_FFT_THRESHOLD} have been
+found to be in the @math{k=8} range, somewhere between 3000 and 10000 limbs.
+
+The way @math{N} is split into @math{2^k} pieces and then @math{2M+k+3} is
+rounded up to a multiple of @math{2^k} and @code{mp_bits_per_limb} means that
+when @math{2^k@ge{}@nicode{mp\_bits\_per\_limb}} the effective @math{N} is a
+multiple of @m{2^{2k-1},2^(2k-1)} bits.  The @math{+k+3} means some values of
+@math{N} just under such a multiple will be rounded to the next.  The
+complexity calculations above assume that a favourable size is used, meaning
+one which isn't padded through rounding, and it's also assumed that the extra
+@math{+k+3} bits are negligible at typical FFT sizes.
+
+The practical effect of the @m{2^{2k-1},2^(2k-1)} constraint is to introduce a
+step-effect into measured speeds.  For example @math{k=8} will round @math{N}
+up to a multiple of 32768 bits, so for a 32-bit limb there'll be 512 limb
+groups of sizes for which @code{mpn_mul_n} runs at the same speed.  Or for
+@math{k=9} groups of 2048 limbs, @math{k=10} groups of 8192 limbs, etc.  In
+practice it's been found each @math{k} is used at quite small multiples of its
+size constraint and so the step effect is quite noticeable in a time versus
+size graph.
+
+The threshold determinations currently measure at the mid-points of size
+steps, but this is sub-optimal since at the start of a new step it can happen
+that it's better to go back to the previous @math{k} for a while.  Something
+more sophisticated for @code{MUL_FFT_TABLE} and @code{SQR_FFT_TABLE} will be
+needed.
+
+
+@node Other Multiplication, Unbalanced Multiplication, FFT Multiplication, Multiplication Algorithms
+@subsection Other Multiplication
+@cindex Toom multiplication
+
+The Toom algorithms described above (@pxref{Toom 3-Way Multiplication},
+@pxref{Toom 4-Way Multiplication}) generalizes to split into an arbitrary
+number of pieces, as per Knuth section 4.3.3 algorithm C@.  This is not
+currently used.  The notes here are merely for interest.
+
+In general a split into @math{r+1} pieces is made, and evaluations and
+pointwise multiplications done at @m{2r+1,2*r+1} points.  A 4-way split does 7
+pointwise multiplies, 5-way does 9, etc.  Asymptotically an @math{(r+1)}-way
+algorithm is @m{O(N^{log(2r+1)/log(r+1)}), O(N^(log(2*r+1)/log(r+1)))}.  Only
+the pointwise multiplications count towards big-@math{O} complexity, but the
+time spent in the evaluate and interpolate stages grows with @math{r} and has
+a significant practical impact, with the asymptotic advantage of each @math{r}
+realized only at bigger and bigger sizes.  The overheads grow as
+@m{O(Nr),O(N*r)}, whereas in an @math{r=2^k} FFT they grow only as @m{O(N \log
+r), O(N*log(r))}.
+
+Knuth algorithm C evaluates at points 0,1,2,@dots{},@m{2r,2*r}, but exercise 4
+uses @math{-r},@dots{},0,@dots{},@math{r} and the latter saves some small
+multiplies in the evaluate stage (or rather trades them for additions), and
+has a further saving of nearly half the interpolate steps.  The idea is to
+separate odd and even final coefficients and then perform algorithm C steps C7
+and C8 on them separately.  The divisors at step C7 become @math{j^2} and the
+multipliers at C8 become @m{2tj-j^2,2*t*j-j^2}.
+
+Splitting odd and even parts through positive and negative points can be
+thought of as using @math{-1} as a square root of unity.  If a 4th root of
+unity was available then a further split and speedup would be possible, but no
+such root exists for plain integers.  Going to complex integers with
+@m{i=\sqrt{-1}, i=sqrt(-1)} doesn't help, essentially because in Cartesian
+form it takes three real multiplies to do a complex multiply.  The existence
+of @m{2^k,2^k'}th roots of unity in a suitable ring or field lets the fast
+Fourier transform keep splitting and get to @m{O(N \log r), O(N*log(r))}.
+
+Floating point FFTs use complex numbers approximating Nth roots of unity.
+Some processors have special support for such FFTs.  But these are not used in
+GMP since it's very difficult to guarantee an exact result (to some number of
+bits).  An occasional difference of 1 in the last bit might not matter to a
+typical signal processing algorithm, but is of course of vital importance to
+GMP.
+
+
+@node Unbalanced Multiplication,  , Other Multiplication, Multiplication Algorithms
+@subsection Unbalanced Multiplication
+@cindex Unbalanced multiplication
+
+Multiplication of operands with different sizes, both below
+@code{MUL_TOOM22_THRESHOLD} are done with plain schoolbook multiplication
+(@pxref{Basecase Multiplication}).
+
+For really large operands, we invoke FFT directly.
+
+For operands between these sizes, we use Toom inspired algorithms suggested by
+Alberto Zanoni and Marco Bodrato.  The idea is to split the operands into
+polynomials of different degree.  GMP currently splits the smaller operand
+onto 2 coefficients, i.e., a polynomial of degree 1, but the larger operand
+can be split into 2, 3, or 4 coefficients, i.e., a polynomial of degree 1 to
+3.
+
+@c FIXME: This is mighty ugly, but a cleaner @need triggers texinfo bugs that
+@c screws up layout here and there in the rest of the manual.
+@c @tex
+@c \goodbreak
+@c @end tex
+@node Division Algorithms, Greatest Common Divisor Algorithms, Multiplication Algorithms, Algorithms
+@section Division Algorithms
+@cindex Division algorithms
+
+@menu
+* Single Limb Division::
+* Basecase Division::
+* Divide and Conquer Division::
+* Block-Wise Barrett Division::
+* Exact Division::
+* Exact Remainder::
+* Small Quotient Division::
+@end menu
+
+
+@node Single Limb Division, Basecase Division, Division Algorithms, Division Algorithms
+@subsection Single Limb Division
+
+N@cross{}1 division is implemented using repeated 2@cross{}1 divisions from
+high to low, either with a hardware divide instruction or a multiplication by
+inverse, whichever is best on a given CPU.
+
+The multiply by inverse follows ``Improved division by invariant integers'' by
+M@"oller and Granlund (@pxref{References}) and is implemented as
+@code{udiv_qrnnd_preinv} in @file{gmp-impl.h}.  The idea is to have a
+fixed-point approximation to @math{1/d} (see @code{invert_limb}) and then
+multiply by the high limb (plus one bit) of the dividend to get a quotient
+@math{q}.  With @math{d} normalized (high bit set), @math{q} is no more than 1
+too small.  Subtracting @m{qd,q*d} from the dividend gives a remainder, and
+reveals whether @math{q} or @math{q-1} is correct.
+
+The result is a division done with two multiplications and four or five
+arithmetic operations.  On CPUs with low latency multipliers this can be much
+faster than a hardware divide, though the cost of calculating the inverse at
+the start may mean it's only better on inputs bigger than say 4 or 5 limbs.
+
+When a divisor must be normalized, either for the generic C
+@code{__udiv_qrnnd_c} or the multiply by inverse, the division performed is
+actually @m{a2^k,a*2^k} by @m{d2^k,d*2^k} where @math{a} is the dividend and
+@math{k} is the power necessary to have the high bit of @m{d2^k,d*2^k} set.
+The bit shifts for the dividend are usually accomplished ``on the fly''
+meaning by extracting the appropriate bits at each step.  Done this way the
+quotient limbs come out aligned ready to store.  When only the remainder is
+wanted, an alternative is to take the dividend limbs unshifted and calculate
+@m{r = a \bmod d2^k, r = a mod d*2^k} followed by an extra final step @m{r2^k
+\bmod d2^k, r*2^k mod d*2^k}.  This can help on CPUs with poor bit shifts or
+few registers.
+
+The multiply by inverse can be done two limbs at a time.  The calculation is
+basically the same, but the inverse is two limbs and the divisor treated as if
+padded with a low zero limb.  This means more work, since the inverse will
+need a 2@cross{}2 multiply, but the four 1@cross{}1s to do that are
+independent and can therefore be done partly or wholly in parallel.  Likewise
+for a 2@cross{}1 calculating @m{qd,q*d}.  The net effect is to process two
+limbs with roughly the same two multiplies worth of latency that one limb at a
+time gives.  This extends to 3 or 4 limbs at a time, though the extra work to
+apply the inverse will almost certainly soon reach the limits of multiplier
+throughput.
+
+A similar approach in reverse can be taken to process just half a limb at a
+time if the divisor is only a half limb.  In this case the 1@cross{}1 multiply
+for the inverse effectively becomes two @m{{1\over2}\times1, (1/2)x1} for each
+limb, which can be a saving on CPUs with a fast half limb multiply, or in fact
+if the only multiply is a half limb, and especially if it's not pipelined.
+
+
+@node Basecase Division, Divide and Conquer Division, Single Limb Division, Division Algorithms
+@subsection Basecase Division
+
+Basecase N@cross{}M division is like long division done by hand, but in base
+@m{2\GMPraise{@code{mp\_bits\_per\_limb}}, 2^mp_bits_per_limb}.  See Knuth
+section 4.3.1 algorithm D, and @file{mpn/generic/sb_divrem_mn.c}.
+
+Briefly stated, while the dividend remains larger than the divisor, a high
+quotient limb is formed and the N@cross{}1 product @m{qd,q*d} subtracted at
+the top end of the dividend.  With a normalized divisor (most significant bit
+set), each quotient limb can be formed with a 2@cross{}1 division and a
+1@cross{}1 multiplication plus some subtractions.  The 2@cross{}1 division is
+by the high limb of the divisor and is done either with a hardware divide or a
+multiply by inverse (the same as in @ref{Single Limb Division}) whichever is
+faster.  Such a quotient is sometimes one too big, requiring an addback of the
+divisor, but that happens rarely.
+
+With Q=N@minus{}M being the number of quotient limbs, this is an
+@m{O(QM),O(Q*M)} algorithm and will run at a speed similar to a basecase
+Q@cross{}M multiplication, differing in fact only in the extra multiply and
+divide for each of the Q quotient limbs.
+
+
+@node Divide and Conquer Division, Block-Wise Barrett Division, Basecase Division, Division Algorithms
+@subsection Divide and Conquer Division
+
+For divisors larger than @code{DC_DIV_QR_THRESHOLD}, division is done by dividing.
+Or to be precise by a recursive divide and conquer algorithm based on work by
+Moenck and Borodin, Jebelean, and Burnikel and Ziegler (@pxref{References}).
+
+The algorithm consists essentially of recognising that a 2N@cross{}N division
+can be done with the basecase division algorithm (@pxref{Basecase Division}),
+but using N/2 limbs as a base, not just a single limb.  This way the
+multiplications that arise are (N/2)@cross{}(N/2) and can take advantage of
+Karatsuba and higher multiplication algorithms (@pxref{Multiplication
+Algorithms}).  The two ``digits'' of the quotient are formed by recursive
+N@cross{}(N/2) divisions.
+
+If the (N/2)@cross{}(N/2) multiplies are done with a basecase multiplication
+then the work is about the same as a basecase division, but with more function
+call overheads and with some subtractions separated from the multiplies.
+These overheads mean that it's only when N/2 is above
+@code{MUL_TOOM22_THRESHOLD} that divide and conquer is of use.
+
+@code{DC_DIV_QR_THRESHOLD} is based on the divisor size N, so it will be somewhere
+above twice @code{MUL_TOOM22_THRESHOLD}, but how much above depends on the
+CPU@.  An optimized @code{mpn_mul_basecase} can lower @code{DC_DIV_QR_THRESHOLD} a
+little by offering a ready-made advantage over repeated @code{mpn_submul_1}
+calls.
+
+Divide and conquer is asymptotically @m{O(M(N)\log N),O(M(N)*log(N))} where
+@math{M(N)} is the time for an N@cross{}N multiplication done with FFTs.  The
+actual time is a sum over multiplications of the recursed sizes, as can be
+seen near the end of section 2.2 of Burnikel and Ziegler.  For example, within
+the Toom-3 range, divide and conquer is @m{2.63M(N), 2.63*M(N)}.  With higher
+algorithms the @math{M(N)} term improves and the multiplier tends to @m{\log
+N, log(N)}.  In practice, at moderate to large sizes, a 2N@cross{}N division
+is about 2 to 4 times slower than an N@cross{}N multiplication.
+
+
+@node Block-Wise Barrett Division, Exact Division, Divide and Conquer Division, Division Algorithms
+@subsection Block-Wise Barrett Division
+
+For the largest divisions, a block-wise Barrett division algorithm is used.
+Here, the divisor is inverted to a precision determined by the relative size of
+the dividend and divisor.  Blocks of quotient limbs are then generated by
+multiplying blocks from the dividend by the inverse.
+
+Our block-wise algorithm computes a smaller inverse than in the plain Barrett
+algorithm.  For a @math{2n/n} division, the inverse will be just @m{\lceil n/2
+\rceil, ceil(n/2)} limbs.
+
+
+@node Exact Division, Exact Remainder, Block-Wise Barrett Division, Division Algorithms
+@subsection Exact Division
+
+
+A so-called exact division is when the dividend is known to be an exact
+multiple of the divisor.  Jebelean's exact division algorithm uses this
+knowledge to make some significant optimizations (@pxref{References}).
+
+The idea can be illustrated in decimal for example with 368154 divided by
+543.  Because the low digit of the dividend is 4, the low digit of the
+quotient must be 8.  This is arrived at from @m{4 \mathord{\times} 7 \bmod 10,
+4*7 mod 10}, using the fact 7 is the modular inverse of 3 (the low digit of
+the divisor), since @m{3 \mathord{\times} 7 \mathop{\equiv} 1 \bmod 10, 3*7
+@equiv{} 1 mod 10}.  So @m{8\mathord{\times}543 = 4344,8*543=4344} can be
+subtracted from the dividend leaving 363810.  Notice the low digit has become
+zero.
+
+The procedure is repeated at the second digit, with the next quotient digit 7
+(@m{1 \mathord{\times} 7 \bmod 10, 7 @equiv{} 1*7 mod 10}), subtracting
+@m{7\mathord{\times}543 = 3801,7*543=3801}, leaving 325800.  And finally at
+the third digit with quotient digit 6 (@m{8 \mathord{\times} 7 \bmod 10, 8*7
+mod 10}), subtracting @m{6\mathord{\times}543 = 3258,6*543=3258} leaving 0.
+So the quotient is 678.
+
+Notice however that the multiplies and subtractions don't need to extend past
+the low three digits of the dividend, since that's enough to determine the
+three quotient digits.  For the last quotient digit no subtraction is needed
+at all.  On a 2N@cross{}N division like this one, only about half the work of
+a normal basecase division is necessary.
+
+For an N@cross{}M exact division producing Q=N@minus{}M quotient limbs, the
+saving over a normal basecase division is in two parts.  Firstly, each of the
+Q quotient limbs needs only one multiply, not a 2@cross{}1 divide and
+multiply.  Secondly, the crossproducts are reduced when @math{Q>M} to
+@m{QM-M(M+1)/2,Q*M-M*(M+1)/2}, or when @math{Q@le{}M} to @m{Q(Q-1)/2,
+Q*(Q-1)/2}.  Notice the savings are complementary.  If Q is big then many
+divisions are saved, or if Q is small then the crossproducts reduce to a small
+number.
+
+The modular inverse used is calculated efficiently by @code{binvert_limb} in
+@file{gmp-impl.h}.  This does four multiplies for a 32-bit limb, or six for a
+64-bit limb.  @file{tune/modlinv.c} has some alternate implementations that
+might suit processors better at bit twiddling than multiplying.
+
+The sub-quadratic exact division described by Jebelean in ``Exact Division
+with Karatsuba Complexity'' is not currently implemented.  It uses a
+rearrangement similar to the divide and conquer for normal division
+(@pxref{Divide and Conquer Division}), but operating from low to high.  A
+further possibility not currently implemented is ``Bidirectional Exact Integer
+Division'' by Krandick and Jebelean which forms quotient limbs from both the
+high and low ends of the dividend, and can halve once more the number of
+crossproducts needed in a 2N@cross{}N division.
+
+A special case exact division by 3 exists in @code{mpn_divexact_by3},
+supporting Toom-3 multiplication and @code{mpq} canonicalizations.  It forms
+quotient digits with a multiply by the modular inverse of 3 (which is
+@code{0xAA..AAB}) and uses two comparisons to determine a borrow for the next
+limb.  The multiplications don't need to be on the dependent chain, as long as
+the effect of the borrows is applied, which can help chips with pipelined
+multipliers.
+
+
+@node Exact Remainder, Small Quotient Division, Exact Division, Division Algorithms
+@subsection Exact Remainder
+@cindex Exact remainder
+
+If the exact division algorithm is done with a full subtraction at each stage
+and the dividend isn't a multiple of the divisor, then low zero limbs are
+produced but with a remainder in the high limbs.  For dividend @math{a},
+divisor @math{d}, quotient @math{q}, and @m{b = 2
+\GMPraise{@code{mp\_bits\_per\_limb}}, b = 2^mp_bits_per_limb}, this remainder
+@math{r} is of the form
+@tex
+$$ a = qd + r b^n $$
+@end tex
+@ifnottex
+
+@example
+a = q*d + r*b^n
+@end example
+
+@end ifnottex
+@math{n} represents the number of zero limbs produced by the subtractions,
+that being the number of limbs produced for @math{q}.  @math{r} will be in the
+range @math{0@le{}r<d} and can be viewed as a remainder, but one shifted up by
+a factor of @math{b^n}.
+
+Carrying out full subtractions at each stage means the same number of cross
+products must be done as a normal division, but there's still some single limb
+divisions saved.  When @math{d} is a single limb some simplifications arise,
+providing good speedups on a number of processors.
+
+@code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the @code{mpn_redc_X}
+functions differ subtly in how they return @math{r}, leading to some negations
+in the above formula, but all are essentially the same.
+
+@cindex Divisibility algorithm
+@cindex Congruence algorithm
+Clearly @math{r} is zero when @math{a} is a multiple of @math{d}, and this
+leads to divisibility or congruence tests which are potentially more efficient
+than a normal division.
+
+The factor of @math{b^n} on @math{r} can be ignored in a GCD when @math{d} is
+odd, hence the use of @code{mpn_modexact_1_odd} by @code{mpn_gcd_1} and
+@code{mpz_kronecker_ui} etc (@pxref{Greatest Common Divisor Algorithms}).
+
+Montgomery's REDC method for modular multiplications uses operands of the form
+of @m{xb^{-n}, x*b^-n} and @m{yb^{-n}, y*b^-n} and on calculating @m{(xb^{-n})
+(yb^{-n}), (x*b^-n)*(y*b^-n)} uses the factor of @math{b^n} in the exact
+remainder to reach a product in the same form @m{(xy)b^{-n}, (x*y)*b^-n}
+(@pxref{Modular Powering Algorithm}).
+
+Notice that @math{r} generally gives no useful information about the ordinary
+remainder @math{a @bmod d} since @math{b^n @bmod d} could be anything.  If
+however @math{b^n @equiv{} 1 @bmod d}, then @math{r} is the negative of the
+ordinary remainder.  This occurs whenever @math{d} is a factor of
+@math{b^n-1}, as for example with 3 in @code{mpn_divexact_by3}.  For a 32 or
+64 bit limb other such factors include 5, 17 and 257, but no particular use
+has been found for this.
+
+
+@node Small Quotient Division,  , Exact Remainder, Division Algorithms
+@subsection Small Quotient Division
+
+An N@cross{}M division where the number of quotient limbs Q=N@minus{}M is
+small can be optimized somewhat.
+
+An ordinary basecase division normalizes the divisor by shifting it to make
+the high bit set, shifting the dividend accordingly, and shifting the
+remainder back down at the end of the calculation.  This is wasteful if only a
+few quotient limbs are to be formed.  Instead a division of just the top
+@m{\rm2Q,2*Q} limbs of the dividend by the top Q limbs of the divisor can be
+used to form a trial quotient.  This requires only those limbs normalized, not
+the whole of the divisor and dividend.
+
+A multiply and subtract then applies the trial quotient to the M@minus{}Q
+unused limbs of the divisor and N@minus{}Q dividend limbs (which includes Q
+limbs remaining from the trial quotient division).  The starting trial
+quotient can be 1 or 2 too big, but all cases of 2 too big and most cases of 1
+too big are detected by first comparing the most significant limbs that will
+arise from the subtraction.  An addback is done if the quotient still turns
+out to be 1 too big.
+
+This whole procedure is essentially the same as one step of the basecase
+algorithm done in a Q limb base, though with the trial quotient test done only
+with the high limbs, not an entire Q limb ``digit'' product.  The correctness
+of this weaker test can be established by following the argument of Knuth
+section 4.3.1 exercise 20 but with the @m{v_2 \GMPhat q > b \GMPhat r
++ u_2, v2*q>b*r+u2} condition appropriately relaxed.
+
+
+@need 1000
+@node Greatest Common Divisor Algorithms, Powering Algorithms, Division Algorithms, Algorithms
+@section Greatest Common Divisor
+@cindex Greatest common divisor algorithms
+@cindex GCD algorithms
+
+@menu
+* Binary GCD::
+* Lehmer's Algorithm::
+* Subquadratic GCD::
+* Extended GCD::
+* Jacobi Symbol::
+@end menu
+
+
+@node Binary GCD, Lehmer's Algorithm, Greatest Common Divisor Algorithms, Greatest Common Divisor Algorithms
+@subsection Binary GCD
+
+At small sizes GMP uses an @math{O(N^2)} binary style GCD@.  This is described
+in many textbooks, for example Knuth section 4.5.2 algorithm B@.  It simply
+consists of successively reducing odd operands @math{a} and @math{b} using
+
+@quotation
+@math{a,b = @abs{}(a-b),@min{}(a,b)} @*
+strip factors of 2 from @math{a}
+@end quotation
+
+The Euclidean GCD algorithm, as per Knuth algorithms E and A, repeatedly
+computes the quotient @m{q = \lfloor a/b \rfloor, q = floor(a/b)} and replaces
+@math{a,b} by @math{v, u - q v}. The binary algorithm has so far been found to
+be faster than the Euclidean algorithm everywhere.  One reason the binary
+method does well is that the implied quotient at each step is usually small,
+so often only one or two subtractions are needed to get the same effect as a
+division.  Quotients 1, 2 and 3 for example occur 67.7% of the time, see Knuth
+section 4.5.3 Theorem E.
+
+When the implied quotient is large, meaning @math{b} is much smaller than
+@math{a}, then a division is worthwhile.  This is the basis for the initial
+@math{a @bmod b} reductions in @code{mpn_gcd} and @code{mpn_gcd_1} (the latter
+for both N@cross{}1 and 1@cross{}1 cases).  But after that initial reduction,
+big quotients occur too rarely to make it worth checking for them.
+
+@sp 1
+The final @math{1@cross{}1} GCD in @code{mpn_gcd_1} is done in the generic C
+code as described above.  For two N-bit operands, the algorithm takes about
+0.68 iterations per bit.  For optimum performance some attention needs to be
+paid to the way the factors of 2 are stripped from @math{a}.
+
+Firstly it may be noted that in twos complement the number of low zero bits on
+@math{a-b} is the same as @math{b-a}, so counting or testing can begin on
+@math{a-b} without waiting for @math{@abs{}(a-b)} to be determined.
+
+A loop stripping low zero bits tends not to branch predict well, since the
+condition is data dependent.  But on average there's only a few low zeros, so
+an option is to strip one or two bits arithmetically then loop for more (as
+done for AMD K6).  Or use a lookup table to get a count for several bits then
+loop for more (as done for AMD K7).  An alternative approach is to keep just
+one of @math{a} or @math{b} odd and iterate
+
+@quotation
+@math{a,b = @abs{}(a-b), @min{}(a,b)} @*
+@math{a = a/2} if even @*
+@math{b = b/2} if even
+@end quotation
+
+This requires about 1.25 iterations per bit, but stripping of a single bit at
+each step avoids any branching.  Repeating the bit strip reduces to about 0.9
+iterations per bit, which may be a worthwhile tradeoff.
+
+Generally with the above approaches a speed of perhaps 6 cycles per bit can be
+achieved, which is still not terribly fast with for instance a 64-bit GCD
+taking nearly 400 cycles.  It's this sort of time which means it's not usually
+advantageous to combine a set of divisibility tests into a GCD.
+
+Currently, the binary algorithm is used for GCD only when @math{N < 3}.
+
+@node Lehmer's Algorithm, Subquadratic GCD, Binary GCD, Greatest Common Divisor Algorithms
+@comment  node-name,  next,  previous,  up
+@subsection Lehmer's algorithm
+
+Lehmer's improvement of the Euclidean algorithms is based on the observation
+that the initial part of the quotient sequence depends only on the most
+significant parts of the inputs. The variant of Lehmer's algorithm used in GMP
+splits off the most significant two limbs, as suggested, e.g., in ``A
+Double-Digit Lehmer-Euclid Algorithm'' by Jebelean (@pxref{References}). The
+quotients of two double-limb inputs are collected as a 2 by 2 matrix with
+single-limb elements. This is done by the function @code{mpn_hgcd2}. The
+resulting matrix is applied to the inputs using @code{mpn_mul_1} and
+@code{mpn_submul_1}. Each iteration usually reduces the inputs by almost one
+limb. In the rare case of a large quotient, no progress can be made by
+examining just the most significant two limbs, and the quotient is computed
+using plain division.
+
+The resulting algorithm is asymptotically @math{O(N^2)}, just as the Euclidean
+algorithm and the binary algorithm. The quadratic part of the work are
+the calls to @code{mpn_mul_1} and @code{mpn_submul_1}. For small sizes, the
+linear work is also significant. There are roughly @math{N} calls to the
+@code{mpn_hgcd2} function. This function uses a couple of important
+optimizations:
+
+@itemize
+@item
+It uses the same relaxed notion of correctness as @code{mpn_hgcd} (see next
+section). This means that when called with the most significant two limbs of
+two large numbers, the returned matrix does not always correspond exactly to
+the initial quotient sequence for the two large numbers; the final quotient
+may sometimes be one off.
+
+@item
+It takes advantage of the fact the quotients are usually small. The division
+operator is not used, since the corresponding assembler instruction is very
+slow on most architectures. (This code could probably be improved further, it
+uses many branches that are unfriendly to prediction).
+
+@item
+It switches from double-limb calculations to single-limb calculations half-way
+through, when the input numbers have been reduced in size from two limbs to
+one and a half.
+
+@end itemize
+
+@node Subquadratic GCD, Extended GCD, Lehmer's Algorithm, Greatest Common Divisor Algorithms
+@subsection Subquadratic GCD
+
+For inputs larger than @code{GCD_DC_THRESHOLD}, GCD is computed via the HGCD
+(Half GCD) function, as a generalization to Lehmer's algorithm.
+
+Let the inputs @math{a,b} be of size @math{N} limbs each. Put @m{S=\lfloor N/2
+\rfloor + 1, S = floor(N/2) + 1}. Then HGCD(a,b) returns a transformation
+matrix @math{T} with non-negative elements, and reduced numbers @math{(c;d) =
+T^{-1} (a;b)}. The reduced numbers @math{c,d} must be larger than @math{S}
+limbs, while their difference @math{abs(c-d)} must fit in @math{S} limbs. The
+matrix elements will also be of size roughly @math{N/2}.
+
+The HGCD base case uses Lehmer's algorithm, but with the above stop condition
+that returns reduced numbers and the corresponding transformation matrix
+half-way through. For inputs larger than @code{HGCD_THRESHOLD}, HGCD is
+computed recursively, using the divide and conquer algorithm in ``On
+Sch@"onhage's algorithm and subquadratic integer GCD computation'' by M@"oller
+(@pxref{References}). The recursive algorithm consists of these main
+steps.
+
+@itemize
+
+@item
+Call HGCD recursively, on the most significant @math{N/2} limbs. Apply the
+resulting matrix @math{T_1} to the full numbers, reducing them to a size just
+above @math{3N/2}.
+
+@item
+Perform a small number of division or subtraction steps to reduce the numbers
+to size below @math{3N/2}. This is essential mainly for the unlikely case of
+large quotients.
+
+@item
+Call HGCD recursively, on the most significant @math{N/2} limbs of the reduced
+numbers. Apply the resulting matrix @math{T_2} to the full numbers, reducing
+them to a size just above @math{N/2}.
+
+@item
+Compute @math{T = T_1 T_2}.
+
+@item
+Perform a small number of division and subtraction steps to satisfy the
+requirements, and return.
+@end itemize
+
+GCD is then implemented as a loop around HGCD, similarly to Lehmer's
+algorithm. Where Lehmer repeatedly chops off the top two limbs, calls
+@code{mpn_hgcd2}, and applies the resulting matrix to the full numbers, the
+subquadratic GCD chops off the most significant third of the limbs (the
+proportion is a tuning parameter, and @math{1/3} seems to be more efficient
+than, e.g, @math{1/2}), calls @code{mpn_hgcd}, and applies the resulting
+matrix. Once the input numbers are reduced to size below
+@code{GCD_DC_THRESHOLD}, Lehmer's algorithm is used for the rest of the work.
+
+The asymptotic running time of both HGCD and GCD is @m{O(M(N)\log N),O(M(N)*log(N))},
+where @math{M(N)} is the time for multiplying two @math{N}-limb numbers.
+
+@comment  node-name,  next,  previous,  up
+
+@node Extended GCD, Jacobi Symbol, Subquadratic GCD, Greatest Common Divisor Algorithms
+@subsection Extended GCD
+
+The extended GCD function, or GCDEXT, calculates @math{@gcd{}(a,b)} and also
+cofactors @math{x} and @math{y} satisfying @m{ax+by=\gcd(a@C{}b),
+a*x+b*y=gcd(a@C{}b)}. All the algorithms used for plain GCD are extended to
+handle this case. The binary algorithm is used only for single-limb GCDEXT.
+Lehmer's algorithm is used for sizes up to @code{GCDEXT_DC_THRESHOLD}. Above
+this threshold, GCDEXT is implemented as a loop around HGCD, but with more
+book-keeping to keep track of the cofactors. This gives the same asymptotic
+running time as for GCD and HGCD, @m{O(M(N)\log N),O(M(N)*log(N))}
+
+One difference to plain GCD is that while the inputs @math{a} and @math{b} are
+reduced as the algorithm proceeds, the cofactors @math{x} and @math{y} grow in
+size. This makes the tuning of the chopping-point more difficult. The current
+code chops off the most significant half of the inputs for the call to HGCD in
+the first iteration, and the most significant two thirds for the remaining
+calls. This strategy could surely be improved. Also the stop condition for the
+loop, where Lehmer's algorithm is invoked once the inputs are reduced below
+@code{GCDEXT_DC_THRESHOLD}, could maybe be improved by taking into account the
+current size of the cofactors.
+
+@node Jacobi Symbol,  , Extended GCD, Greatest Common Divisor Algorithms
+@subsection Jacobi Symbol
+@cindex Jacobi symbol algorithm
+
+@code{mpz_jacobi} and @code{mpz_kronecker} are currently implemented with a
+simple binary algorithm similar to that described for the GCDs (@pxref{Binary
+GCD}).  They're not very fast when both inputs are large.  Lehmer's multi-step
+improvement or a binary based multi-step algorithm is likely to be better.
+
+When one operand fits a single limb, and that includes @code{mpz_kronecker_ui}
+and friends, an initial reduction is done with either @code{mpn_mod_1} or
+@code{mpn_modexact_1_odd}, followed by the binary algorithm on a single limb.
+The binary algorithm is well suited to a single limb, and the whole
+calculation in this case is quite efficient.
+
+In all the routines sign changes for the result are accumulated using some bit
+twiddling, avoiding table lookups or conditional jumps.
+
+
+@need 1000
+@node Powering Algorithms, Root Extraction Algorithms, Greatest Common Divisor Algorithms, Algorithms
+@section Powering Algorithms
+@cindex Powering algorithms
+
+@menu
+* Normal Powering Algorithm::
+* Modular Powering Algorithm::
+@end menu
+
+
+@node Normal Powering Algorithm, Modular Powering Algorithm, Powering Algorithms, Powering Algorithms
+@subsection Normal Powering
+
+Normal @code{mpz} or @code{mpf} powering uses a simple binary algorithm,
+successively squaring and then multiplying by the base when a 1 bit is seen in
+the exponent, as per Knuth section 4.6.3.  The ``left to right''
+variant described there is used rather than algorithm A, since it's just as
+easy and can be done with somewhat less temporary memory.
+
+
+@node Modular Powering Algorithm,  , Normal Powering Algorithm, Powering Algorithms
+@subsection Modular Powering
+
+Modular powering is implemented using a @math{2^k}-ary sliding window
+algorithm, as per ``Handbook of Applied Cryptography'' algorithm 14.85
+(@pxref{References}).  @math{k} is chosen according to the size of the
+exponent.  Larger exponents use larger values of @math{k}, the choice being
+made to minimize the average number of multiplications that must supplement
+the squaring.
+
+The modular multiplies and squares use either a simple division or the REDC
+method by Montgomery (@pxref{References}).  REDC is a little faster,
+essentially saving N single limb divisions in a fashion similar to an exact
+remainder (@pxref{Exact Remainder}).
+
+
+@node Root Extraction Algorithms, Radix Conversion Algorithms, Powering Algorithms, Algorithms
+@section Root Extraction Algorithms
+@cindex Root extraction algorithms
+
+@menu
+* Square Root Algorithm::
+* Nth Root Algorithm::
+* Perfect Square Algorithm::
+* Perfect Power Algorithm::
+@end menu
+
+
+@node Square Root Algorithm, Nth Root Algorithm, Root Extraction Algorithms, Root Extraction Algorithms
+@subsection Square Root
+@cindex Square root algorithm
+@cindex Karatsuba square root algorithm
+
+Square roots are taken using the ``Karatsuba Square Root'' algorithm by Paul
+Zimmermann (@pxref{References}).
+
+An input @math{n} is split into four parts of @math{k} bits each, so with
+@math{b=2^k} we have @m{n = a_3b^3 + a_2b^2 + a_1b + a_0, n = a3*b^3 + a2*b^2
++ a1*b + a0}.  Part @ms{a,3} must be ``normalized'' so that either the high or
+second highest bit is set.  In GMP, @math{k} is kept on a limb boundary and
+the input is left shifted (by an even number of bits) to normalize.
+
+The square root of the high two parts is taken, by recursive application of
+the algorithm (bottoming out in a one-limb Newton's method),
+@tex
+$$ s',r' = \mathop{\rm sqrtrem} \> (a_3b + a_2) $$
+@end tex
+@ifnottex
+
+@example
+s1,r1 = sqrtrem (a3*b + a2)
+@end example
+
+@end ifnottex
+This is an approximation to the desired root and is extended by a division to
+give @math{s},@math{r},
+@tex
+$$\eqalign{
+q,u &= \mathop{\rm divrem} \> (r'b + a_1, 2s') \cr
+s &= s'b + q \cr
+r &= ub + a_0 - q^2
+}$$
+@end tex
+@ifnottex
+
+@example
+q,u = divrem (r1*b + a1, 2*s1)
+s = s1*b + q
+r = u*b + a0 - q^2
+@end example
+
+@end ifnottex
+The normalization requirement on @ms{a,3} means at this point @math{s} is
+either correct or 1 too big.  @math{r} is negative in the latter case, so
+@tex
+$$\eqalign{
+\mathop{\rm if} \; r &< 0 \; \mathop{\rm then} \cr
+r &\leftarrow r + 2s - 1 \cr
+s &\leftarrow s - 1
+}$$
+@end tex
+@ifnottex
+
+@example
+if r < 0 then
+  r = r + 2*s - 1
+  s = s - 1
+@end example
+
+@end ifnottex
+The algorithm is expressed in a divide and conquer form, but as noted in the
+paper it can also be viewed as a discrete variant of Newton's method, or as a
+variation on the schoolboy method (no longer taught) for square roots two
+digits at a time.
+
+If the remainder @math{r} is not required then usually only a few high limbs
+of @math{r} and @math{u} need to be calculated to determine whether an
+adjustment to @math{s} is required.  This optimization is not currently
+implemented.
+
+In the Karatsuba multiplication range this algorithm is @m{O({3\over2}
+M(N/2)),O(1.5*M(N/2))}, where @math{M(n)} is the time to multiply two numbers
+of @math{n} limbs.  In the FFT multiplication range this grows to a bound of
+@m{O(6 M(N/2)),O(6*M(N/2))}.  In practice a factor of about 1.5 to 1.8 is
+found in the Karatsuba and Toom-3 ranges, growing to 2 or 3 in the FFT range.
+
+The algorithm does all its calculations in integers and the resulting
+@code{mpn_sqrtrem} is used for both @code{mpz_sqrt} and @code{mpf_sqrt}.
+The extended precision given by @code{mpf_sqrt_ui} is obtained by
+padding with zero limbs.
+
+
+@node Nth Root Algorithm, Perfect Square Algorithm, Square Root Algorithm, Root Extraction Algorithms
+@subsection Nth Root
+@cindex Root extraction algorithm
+@cindex Nth root algorithm
+
+Integer Nth roots are taken using Newton's method with the following
+iteration, where @math{A} is the input and @math{n} is the root to be taken.
+@tex
+$$a_{i+1} = {1\over n} \left({A \over a_i^{n-1}} + (n-1)a_i \right)$$
+@end tex
+@ifnottex
+
+@example
+         1         A
+a[i+1] = - * ( --------- + (n-1)*a[i] )
+         n     a[i]^(n-1)
+@end example
+
+@end ifnottex
+The initial approximation @m{a_1,a[1]} is generated bitwise by successively
+powering a trial root with or without new 1 bits, aiming to be just above the
+true root.  The iteration converges quadratically when started from a good
+approximation.  When @math{n} is large more initial bits are needed to get
+good convergence.  The current implementation is not particularly well
+optimized.
+
+
+@node Perfect Square Algorithm, Perfect Power Algorithm, Nth Root Algorithm, Root Extraction Algorithms
+@subsection Perfect Square
+@cindex Perfect square algorithm
+
+A significant fraction of non-squares can be quickly identified by checking
+whether the input is a quadratic residue modulo small integers.
+
+@code{mpz_perfect_square_p} first tests the input mod 256, which means just
+examining the low byte.  Only 44 different values occur for squares mod 256,
+so 82.8% of inputs can be immediately identified as non-squares.
+
+On a 32-bit system similar tests are done mod 9, 5, 7, 13 and 17, for a total
+99.25% of inputs identified as non-squares.  On a 64-bit system 97 is tested
+too, for a total 99.62%.
+
+These moduli are chosen because they're factors of @math{2^@W{24}-1} (or
+@math{2^@W{48}-1} for 64-bits), and such a remainder can be quickly taken just
+using additions (see @code{mpn_mod_34lsub1}).
+
+When nails are in use moduli are instead selected by the @file{gen-psqr.c}
+program and applied with an @code{mpn_mod_1}.  The same @math{2^@W{24}-1} or
+@math{2^@W{48}-1} could be done with nails using some extra bit shifts, but
+this is not currently implemented.
+
+In any case each modulus is applied to the @code{mpn_mod_34lsub1} or
+@code{mpn_mod_1} remainder and a table lookup identifies non-squares.  By
+using a ``modexact'' style calculation, and suitably permuted tables, just one
+multiply each is required, see the code for details.  Moduli are also combined
+to save operations, so long as the lookup tables don't become too big.
+@file{gen-psqr.c} does all the pre-calculations.
+
+A square root must still be taken for any value that passes these tests, to
+verify it's really a square and not one of the small fraction of non-squares
+that get through (i.e.@: a pseudo-square to all the tested bases).
+
+Clearly more residue tests could be done, @code{mpz_perfect_square_p} only
+uses a compact and efficient set.  Big inputs would probably benefit from more
+residue testing, small inputs might be better off with less.  The assumed
+distribution of squares versus non-squares in the input would affect such
+considerations.
+
+
+@node Perfect Power Algorithm,  , Perfect Square Algorithm, Root Extraction Algorithms
+@subsection Perfect Power
+@cindex Perfect power algorithm
+
+Detecting perfect powers is required by some factorization algorithms.
+Currently @code{mpz_perfect_power_p} is implemented using repeated Nth root
+extractions, though naturally only prime roots need to be considered.
+(@xref{Nth Root Algorithm}.)
+
+If a prime divisor @math{p} with multiplicity @math{e} can be found, then only
+roots which are divisors of @math{e} need to be considered, much reducing the
+work necessary.  To this end divisibility by a set of small primes is checked.
+
+
+@node Radix Conversion Algorithms, Other Algorithms, Root Extraction Algorithms, Algorithms
+@section Radix Conversion
+@cindex Radix conversion algorithms
+
+Radix conversions are less important than other algorithms.  A program
+dominated by conversions should probably use a different data representation.
+
+@menu
+* Binary to Radix::
+* Radix to Binary::
+@end menu
+
+
+@node Binary to Radix, Radix to Binary, Radix Conversion Algorithms, Radix Conversion Algorithms
+@subsection Binary to Radix
+
+Conversions from binary to a power-of-2 radix use a simple and fast
+@math{O(N)} bit extraction algorithm.
+
+Conversions from binary to other radices use one of two algorithms.  Sizes
+below @code{GET_STR_PRECOMPUTE_THRESHOLD} use a basic @math{O(N^2)} method.
+Repeated divisions by @math{b^n} are made, where @math{b} is the radix and
+@math{n} is the biggest power that fits in a limb.  But instead of simply
+using the remainder @math{r} from such divisions, an extra divide step is done
+to give a fractional limb representing @math{r/b^n}.  The digits of @math{r}
+can then be extracted using multiplications by @math{b} rather than divisions.
+Special case code is provided for decimal, allowing multiplications by 10 to
+optimize to shifts and adds.
+
+Above @code{GET_STR_PRECOMPUTE_THRESHOLD} a sub-quadratic algorithm is used.
+For an input @math{t}, powers @m{b^{n2^i},b^(n*2^i)} of the radix are
+calculated, until a power between @math{t} and @m{\sqrt{t},sqrt(t)} is
+reached.  @math{t} is then divided by that largest power, giving a quotient
+which is the digits above that power, and a remainder which is those below.
+These two parts are in turn divided by the second highest power, and so on
+recursively.  When a piece has been divided down to less than
+@code{GET_STR_DC_THRESHOLD} limbs, the basecase algorithm described above is
+used.
+
+The advantage of this algorithm is that big divisions can make use of the
+sub-quadratic divide and conquer division (@pxref{Divide and Conquer
+Division}), and big divisions tend to have less overheads than lots of
+separate single limb divisions anyway.  But in any case the cost of
+calculating the powers @m{b^{n2^i},b^(n*2^i)} must first be overcome.
+
+@code{GET_STR_PRECOMPUTE_THRESHOLD} and @code{GET_STR_DC_THRESHOLD} represent
+the same basic thing, the point where it becomes worth doing a big division to
+cut the input in half.  @code{GET_STR_PRECOMPUTE_THRESHOLD} includes the cost
+of calculating the radix power required, whereas @code{GET_STR_DC_THRESHOLD}
+assumes that's already available, which is the case when recursing.
+
+Since the base case produces digits from least to most significant but they
+want to be stored from most to least, it's necessary to calculate in advance
+how many digits there will be, or at least be sure not to underestimate that.
+For GMP the number of input bits is multiplied by @code{chars_per_bit_exactly}
+from @code{mp_bases}, rounding up.  The result is either correct or one too
+big.
+
+Examining some of the high bits of the input could increase the chance of
+getting the exact number of digits, but an exact result every time would not
+be practical, since in general the difference between numbers 100@dots{} and
+99@dots{} is only in the last few bits and the work to identify 99@dots{}
+might well be almost as much as a full conversion.
+
+@code{mpf_get_str} doesn't currently use the algorithm described here, it
+multiplies or divides by a power of @math{b} to move the radix point to the
+just above the highest non-zero digit (or at worst one above that location),
+then multiplies by @math{b^n} to bring out digits.  This is @math{O(N^2)} and
+is certainly not optimal.
+
+The @math{r/b^n} scheme described above for using multiplications to bring out
+digits might be useful for more than a single limb.  Some brief experiments
+with it on the base case when recursing didn't give a noticeable improvement,
+but perhaps that was only due to the implementation.  Something similar would
+work for the sub-quadratic divisions too, though there would be the cost of
+calculating a bigger radix power.
+
+Another possible improvement for the sub-quadratic part would be to arrange
+for radix powers that balanced the sizes of quotient and remainder produced,
+i.e.@: the highest power would be an @m{b^{nk},b^(n*k)} approximately equal to
+@m{\sqrt{t},sqrt(t)}, not restricted to a @math{2^i} factor.  That ought to
+smooth out a graph of times against sizes, but may or may not be a net
+speedup.
+
+
+@node Radix to Binary,  , Binary to Radix, Radix Conversion Algorithms
+@subsection Radix to Binary
+
+@strong{This section needs to be rewritten, it currently describes the
+algorithms used before GMP 4.3.}
+
+Conversions from a power-of-2 radix into binary use a simple and fast
+@math{O(N)} bitwise concatenation algorithm.
+
+Conversions from other radices use one of two algorithms.  Sizes below
+@code{SET_STR_PRECOMPUTE_THRESHOLD} use a basic @math{O(N^2)} method.  Groups
+of @math{n} digits are converted to limbs, where @math{n} is the biggest
+power of the base @math{b} which will fit in a limb, then those groups are
+accumulated into the result by multiplying by @math{b^n} and adding.  This
+saves multi-precision operations, as per Knuth section 4.4 part E
+(@pxref{References}).  Some special case code is provided for decimal, giving
+the compiler a chance to optimize multiplications by 10.
+
+Above @code{SET_STR_PRECOMPUTE_THRESHOLD} a sub-quadratic algorithm is used.
+First groups of @math{n} digits are converted into limbs.  Then adjacent
+limbs are combined into limb pairs with @m{xb^n+y,x*b^n+y}, where @math{x}
+and @math{y} are the limbs.  Adjacent limb pairs are combined into quads
+similarly with @m{xb^{2n}+y,x*b^(2n)+y}.  This continues until a single block
+remains, that being the result.
+
+The advantage of this method is that the multiplications for each @math{x} are
+big blocks, allowing Karatsuba and higher algorithms to be used.  But the cost
+of calculating the powers @m{b^{n2^i},b^(n*2^i)} must be overcome.
+@code{SET_STR_PRECOMPUTE_THRESHOLD} usually ends up quite big, around 5000 digits, and on
+some processors much bigger still.
+
+@code{SET_STR_PRECOMPUTE_THRESHOLD} is based on the input digits (and tuned
+for decimal), though it might be better based on a limb count, so as to be
+independent of the base.  But that sort of count isn't used by the base case
+and so would need some sort of initial calculation or estimate.
+
+The main reason @code{SET_STR_PRECOMPUTE_THRESHOLD} is so much bigger than the
+corresponding @code{GET_STR_PRECOMPUTE_THRESHOLD} is that @code{mpn_mul_1} is
+much faster than @code{mpn_divrem_1} (often by a factor of 5, or more).
+
+
+@need 1000
+@node Other Algorithms, Assembly Coding, Radix Conversion Algorithms, Algorithms
+@section Other Algorithms
+
+@menu
+* Prime Testing Algorithm::
+* Factorial Algorithm::
+* Binomial Coefficients Algorithm::
+* Fibonacci Numbers Algorithm::
+* Lucas Numbers Algorithm::
+* Random Number Algorithms::
+@end menu
+
+
+@node Prime Testing Algorithm, Factorial Algorithm, Other Algorithms, Other Algorithms
+@subsection Prime Testing
+@cindex Prime testing algorithms
+
+The primality testing in @code{mpz_probab_prime_p} (@pxref{Number Theoretic
+Functions}) first does some trial division by small factors and then uses the
+Miller-Rabin probabilistic primality testing algorithm, as described in Knuth
+section 4.5.4 algorithm P (@pxref{References}).
+
+For an odd input @math{n}, and with @math{n = q@GMPmultiply{}2^k+1} where
+@math{q} is odd, this algorithm selects a random base @math{x} and tests
+whether @math{x^q @bmod{} n} is 1 or @math{-1}, or an @m{x^{q2^j} \bmod n,
+x^(q*2^j) mod n} is @math{1}, for @math{1@le{}j@le{}k}.  If so then @math{n}
+is probably prime, if not then @math{n} is definitely composite.
+
+Any prime @math{n} will pass the test, but some composites do too.  Such
+composites are known as strong pseudoprimes to base @math{x}.  No @math{n} is
+a strong pseudoprime to more than @math{1/4} of all bases (see Knuth exercise
+22), hence with @math{x} chosen at random there's no more than a @math{1/4}
+chance a ``probable prime'' will in fact be composite.
+
+In fact strong pseudoprimes are quite rare, making the test much more
+powerful than this analysis would suggest, but @math{1/4} is all that's proven
+for an arbitrary @math{n}.
+
+
+@node Factorial Algorithm, Binomial Coefficients Algorithm, Prime Testing Algorithm, Other Algorithms
+@subsection Factorial
+@cindex Factorial algorithm
+
+Factorials are calculated by a combination of removal of twos, powering, and
+binary splitting.  The procedure can be best illustrated with an example,
+
+@quotation
+@math{23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23}
+@end quotation
+
+@noindent
+has factors of two removed,
+
+@quotation
+@math{23! = 2^{19}.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23}
+@end quotation
+
+@noindent
+and the resulting terms collected up according to their multiplicity,
+
+@quotation
+@math{23! = 2^{19}.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)}
+@end quotation
+
+Each sequence such as @math{13.15.17.19.21.23} is evaluated by splitting into
+every second term, as for instance @math{(13.17.21).(15.19.23)}, and the same
+recursively on each half.  This is implemented iteratively using some bit
+twiddling.
+
+Such splitting is more efficient than repeated N@cross{}1 multiplies since it
+forms big multiplies, allowing Karatsuba and higher algorithms to be used.
+And even below the Karatsuba threshold a big block of work can be more
+efficient for the basecase algorithm.
+
+Splitting into subsequences of every second term keeps the resulting products
+more nearly equal in size than would the simpler approach of say taking the
+first half and second half of the sequence.  Nearly equal products are more
+efficient for the current multiply implementation.
+
+
+@node Binomial Coefficients Algorithm, Fibonacci Numbers Algorithm, Factorial Algorithm, Other Algorithms
+@subsection Binomial Coefficients
+@cindex Binomial coefficient algorithm
+
+Binomial coefficients @m{\left({n}\atop{k}\right), C(n@C{}k)} are calculated
+by first arranging @math{k @le{} n/2} using @m{\left({n}\atop{k}\right) =
+\left({n}\atop{n-k}\right), C(n@C{}k) = C(n@C{}n-k)} if necessary, and then
+evaluating the following product simply from @math{i=2} to @math{i=k}.
+@tex
+$$ \left({n}\atop{k}\right) = (n-k+1) \prod_{i=2}^{k} {{n-k+i} \over i} $$
+@end tex
+@ifnottex
+
+@example
+                      k  (n-k+i)
+C(n,k) =  (n-k+1) * prod -------
+                     i=2    i
+@end example
+
+@end ifnottex
+It's easy to show that each denominator @math{i} will divide the product so
+far, so the exact division algorithm is used (@pxref{Exact Division}).
+
+The numerators @math{n-k+i} and denominators @math{i} are first accumulated
+into as many fit a limb, to save multi-precision operations, though for
+@code{mpz_bin_ui} this applies only to the divisors, since @math{n} is an
+@code{mpz_t} and @math{n-k+i} in general won't fit in a limb at all.
+
+
+@node Fibonacci Numbers Algorithm, Lucas Numbers Algorithm, Binomial Coefficients Algorithm, Other Algorithms
+@subsection Fibonacci Numbers
+@cindex Fibonacci number algorithm
+
+The Fibonacci functions @code{mpz_fib_ui} and @code{mpz_fib2_ui} are designed
+for calculating isolated @m{F_n,F[n]} or @m{F_n,F[n]},@m{F_{n-1},F[n-1]}
+values efficiently.
+
+For small @math{n}, a table of single limb values in @code{__gmp_fib_table} is
+used.  On a 32-bit limb this goes up to @m{F_{47},F[47]}, or on a 64-bit limb
+up to @m{F_{93},F[93]}.  For convenience the table starts at @m{F_{-1},F[-1]}.
+
+Beyond the table, values are generated with a binary powering algorithm,
+calculating a pair @m{F_n,F[n]} and @m{F_{n-1},F[n-1]} working from high to
+low across the bits of @math{n}.  The formulas used are
+@tex
+$$\eqalign{
+  F_{2k+1} &= 4F_k^2 - F_{k-1}^2 + 2(-1)^k \cr
+  F_{2k-1} &=  F_k^2 + F_{k-1}^2           \cr
+  F_{2k}   &= F_{2k+1} - F_{2k-1}
+}$$
+@end tex
+@ifnottex
+
+@example
+F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k
+F[2k-1] =   F[k]^2 + F[k-1]^2
+
+F[2k] = F[2k+1] - F[2k-1]
+@end example
+
+@end ifnottex
+At each step, @math{k} is the high @math{b} bits of @math{n}.  If the next bit
+of @math{n} is 0 then @m{F_{2k},F[2k]},@m{F_{2k-1},F[2k-1]} is used, or if
+it's a 1 then @m{F_{2k+1},F[2k+1]},@m{F_{2k},F[2k]} is used, and the process
+repeated until all bits of @math{n} are incorporated.  Notice these formulas
+require just two squares per bit of @math{n}.
+
+It'd be possible to handle the first few @math{n} above the single limb table
+with simple additions, using the defining Fibonacci recurrence @m{F_{k+1} =
+F_k + F_{k-1}, F[k+1]=F[k]+F[k-1]}, but this is not done since it usually
+turns out to be faster for only about 10 or 20 values of @math{n}, and
+including a block of code for just those doesn't seem worthwhile.  If they
+really mattered it'd be better to extend the data table.
+
+Using a table avoids lots of calculations on small numbers, and makes small
+@math{n} go fast.  A bigger table would make more small @math{n} go fast, it's
+just a question of balancing size against desired speed.  For GMP the code is
+kept compact, with the emphasis primarily on a good powering algorithm.
+
+@code{mpz_fib2_ui} returns both @m{F_n,F[n]} and @m{F_{n-1},F[n-1]}, but
+@code{mpz_fib_ui} is only interested in @m{F_n,F[n]}.  In this case the last
+step of the algorithm can become one multiply instead of two squares.  One of
+the following two formulas is used, according as @math{n} is odd or even.
+@tex
+$$\eqalign{
+  F_{2k}   &= F_k (F_k + 2F_{k-1}) \cr
+  F_{2k+1} &= (2F_k + F_{k-1}) (2F_k - F_{k-1}) + 2(-1)^k
+}$$
+@end tex
+@ifnottex
+
+@example
+F[2k]   = F[k]*(F[k]+2F[k-1])
+
+F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k
+@end example
+
+@end ifnottex
+@m{F_{2k+1},F[2k+1]} here is the same as above, just rearranged to be a
+multiply.  For interest, the @m{2(-1)^k, 2*(-1)^k} term both here and above
+can be applied just to the low limb of the calculation, without a carry or
+borrow into further limbs, which saves some code size.  See comments with
+@code{mpz_fib_ui} and the internal @code{mpn_fib2_ui} for how this is done.
+
+
+@node Lucas Numbers Algorithm, Random Number Algorithms, Fibonacci Numbers Algorithm, Other Algorithms
+@subsection Lucas Numbers
+@cindex Lucas number algorithm
+
+@code{mpz_lucnum2_ui} derives a pair of Lucas numbers from a pair of Fibonacci
+numbers with the following simple formulas.
+@tex
+$$\eqalign{
+  L_k     &=  F_k + 2F_{k-1} \cr
+  L_{k-1} &= 2F_k -  F_{k-1}
+}$$
+@end tex
+@ifnottex
+
+@example
+L[k]   =   F[k] + 2*F[k-1]
+L[k-1] = 2*F[k] -   F[k-1]
+@end example
+
+@end ifnottex
+@code{mpz_lucnum_ui} is only interested in @m{L_n,L[n]}, and some work can be
+saved.  Trailing zero bits on @math{n} can be handled with a single square
+each.
+@tex
+$$ L_{2k} = L_k^2 - 2(-1)^k $$
+@end tex
+@ifnottex
+
+@example
+L[2k] = L[k]^2 - 2*(-1)^k
+@end example
+
+@end ifnottex
+And the lowest 1 bit can be handled with one multiply of a pair of Fibonacci
+numbers, similar to what @code{mpz_fib_ui} does.
+@tex
+$$ L_{2k+1} = 5F_{k-1} (2F_k + F_{k-1}) - 4(-1)^k $$
+@end tex
+@ifnottex
+
+@example
+L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k
+@end example
+
+@end ifnottex
+
+
+@node Random Number Algorithms,  , Lucas Numbers Algorithm, Other Algorithms
+@subsection Random Numbers
+@cindex Random number algorithms
+
+For the @code{urandomb} functions, random numbers are generated simply by
+concatenating bits produced by the generator.  As long as the generator has
+good randomness properties this will produce well-distributed @math{N} bit
+numbers.
+
+For the @code{urandomm} functions, random numbers in a range @math{0@le{}R<N}
+are generated by taking values @math{R} of @m{\lceil \log_2 N \rceil,
+ceil(log2(N))} bits each until one satisfies @math{R<N}.  This will normally
+require only one or two attempts, but the attempts are limited in case the
+generator is somehow degenerate and produces only 1 bits or similar.
+
+@cindex Mersenne twister algorithm
+The Mersenne Twister generator is by Matsumoto and Nishimura
+(@pxref{References}).  It has a non-repeating period of @math{2^@W{19937}-1},
+which is a Mersenne prime, hence the name of the generator.  The state is 624
+words of 32-bits each, which is iterated with one XOR and shift for each
+32-bit word generated, making the algorithm very fast.  Randomness properties
+are also very good and this is the default algorithm used by GMP.
+
+@cindex Linear congruential algorithm
+Linear congruential generators are described in many text books, for instance
+Knuth volume 2 (@pxref{References}).  With a modulus @math{M} and parameters
+@math{A} and @math{C}, a integer state @math{S} is iterated by the formula
+@math{S @leftarrow{} A@GMPmultiply{}S+C @bmod{} M}.  At each step the new
+state is a linear function of the previous, mod @math{M}, hence the name of
+the generator.
+
+In GMP only moduli of the form @math{2^N} are supported, and the current
+implementation is not as well optimized as it could be.  Overheads are
+significant when @math{N} is small, and when @math{N} is large clearly the
+multiply at each step will become slow.  This is not a big concern, since the
+Mersenne Twister generator is better in every respect and is therefore
+recommended for all normal applications.
+
+For both generators the current state can be deduced by observing enough
+output and applying some linear algebra (over GF(2) in the case of the
+Mersenne Twister).  This generally means raw output is unsuitable for
+cryptographic applications without further hashing or the like.
+
+
+@node Assembly Coding,  , Other Algorithms, Algorithms
+@section Assembly Coding
+@cindex Assembly coding
+
+The assembly subroutines in GMP are the most significant source of speed at
+small to moderate sizes.  At larger sizes algorithm selection becomes more
+important, but of course speedups in low level routines will still speed up
+everything proportionally.
+
+Carry handling and widening multiplies that are important for GMP can't be
+easily expressed in C@.  GCC @code{asm} blocks help a lot and are provided in
+@file{longlong.h}, but hand coding low level routines invariably offers a
+speedup over generic C by a factor of anything from 2 to 10.
+
+@menu
+* Assembly Code Organisation::
+* Assembly Basics::
+* Assembly Carry Propagation::
+* Assembly Cache Handling::
+* Assembly Functional Units::
+* Assembly Floating Point::
+* Assembly SIMD Instructions::
+* Assembly Software Pipelining::
+* Assembly Loop Unrolling::
+* Assembly Writing Guide::
+@end menu
+
+
+@node Assembly Code Organisation, Assembly Basics, Assembly Coding, Assembly Coding
+@subsection Code Organisation
+@cindex Assembly code organisation
+@cindex Code organisation
+
+The various @file{mpn} subdirectories contain machine-dependent code, written
+in C or assembly.  The @file{mpn/generic} subdirectory contains default code,
+used when there's no machine-specific version of a particular file.
+
+Each @file{mpn} subdirectory is for an ISA family.  Generally 32-bit and
+64-bit variants in a family cannot share code and have separate directories.
+Within a family further subdirectories may exist for CPU variants.
+
+In each directory a @file{nails} subdirectory may exist, holding code with
+nails support for that CPU variant.  A @code{NAILS_SUPPORT} directive in each
+file indicates the nails values the code handles.  Nails code only exists
+where it's faster, or promises to be faster, than plain code.  There's no
+effort put into nails if they're not going to enhance a given CPU.
+
+
+@node Assembly Basics, Assembly Carry Propagation, Assembly Code Organisation, Assembly Coding
+@subsection Assembly Basics
+
+@code{mpn_addmul_1} and @code{mpn_submul_1} are the most important routines
+for overall GMP performance.  All multiplications and divisions come down to
+repeated calls to these.  @code{mpn_add_n}, @code{mpn_sub_n},
+@code{mpn_lshift} and @code{mpn_rshift} are next most important.
+
+On some CPUs assembly versions of the internal functions
+@code{mpn_mul_basecase} and @code{mpn_sqr_basecase} give significant speedups,
+mainly through avoiding function call overheads.  They can also potentially
+make better use of a wide superscalar processor, as can bigger primitives like
+@code{mpn_addmul_2} or @code{mpn_addmul_4}.
+
+The restrictions on overlaps between sources and destinations
+(@pxref{Low-level Functions}) are designed to facilitate a variety of
+implementations.  For example, knowing @code{mpn_add_n} won't have partly
+overlapping sources and destination means reading can be done far ahead of
+writing on superscalar processors, and loops can be vectorized on a vector
+processor, depending on the carry handling.
+
+
+@node Assembly Carry Propagation, Assembly Cache Handling, Assembly Basics, Assembly Coding
+@subsection Carry Propagation
+@cindex Assembly carry propagation
+
+The problem that presents most challenges in GMP is propagating carries from
+one limb to the next.  In functions like @code{mpn_addmul_1} and
+@code{mpn_add_n}, carries are the only dependencies between limb operations.
+
+On processors with carry flags, a straightforward CISC style @code{adc} is
+generally best.  AMD K6 @code{mpn_addmul_1} however is an example of an
+unusual set of circumstances where a branch works out better.
+
+On RISC processors generally an add and compare for overflow is used.  This
+sort of thing can be seen in @file{mpn/generic/aors_n.c}.  Some carry
+propagation schemes require 4 instructions, meaning at least 4 cycles per
+limb, but other schemes may use just 1 or 2.  On wide superscalar processors
+performance may be completely determined by the number of dependent
+instructions between carry-in and carry-out for each limb.
+
+On vector processors good use can be made of the fact that a carry bit only
+very rarely propagates more than one limb.  When adding a single bit to a
+limb, there's only a carry out if that limb was @code{0xFF@dots{}FF} which on
+random data will be only 1 in @m{2\GMPraise{@code{mp\_bits\_per\_limb}},
+2^mp_bits_per_limb}.  @file{mpn/cray/add_n.c} is an example of this, it adds
+all limbs in parallel, adds one set of carry bits in parallel and then only
+rarely needs to fall through to a loop propagating further carries.
+
+On the x86s, GCC (as of version 2.95.2) doesn't generate particularly good code
+for the RISC style idioms that are necessary to handle carry bits in
+C@.  Often conditional jumps are generated where @code{adc} or @code{sbb} forms
+would be better.  And so unfortunately almost any loop involving carry bits
+needs to be coded in assembly for best results.
+
+
+@node Assembly Cache Handling, Assembly Functional Units, Assembly Carry Propagation, Assembly Coding
+@subsection Cache Handling
+@cindex Assembly cache handling
+
+GMP aims to perform well both on operands that fit entirely in L1 cache and
+those which don't.
+
+Basic routines like @code{mpn_add_n} or @code{mpn_lshift} are often used on
+large operands, so L2 and main memory performance is important for them.
+@code{mpn_mul_1} and @code{mpn_addmul_1} are mostly used for multiply and
+square basecases, so L1 performance matters most for them, unless assembly
+versions of @code{mpn_mul_basecase} and @code{mpn_sqr_basecase} exist, in
+which case the remaining uses are mostly for larger operands.
+
+For L2 or main memory operands, memory access times will almost certainly be
+more than the calculation time.  The aim therefore is to maximize memory
+throughput, by starting a load of the next cache line while processing the
+contents of the previous one.  Clearly this is only possible if the chip has a
+lock-up free cache or some sort of prefetch instruction.  Most current chips
+have both these features.
+
+Prefetching sources combines well with loop unrolling, since a prefetch can be
+initiated once per unrolled loop (or more than once if the loop covers more
+than one cache line).
+
+On CPUs without write-allocate caches, prefetching destinations will ensure
+individual stores don't go further down the cache hierarchy, limiting
+bandwidth.  Of course for calculations which are slow anyway, like
+@code{mpn_divrem_1}, write-throughs might be fine.
+
+The distance ahead to prefetch will be determined by memory latency versus
+throughput.  The aim of course is to have data arriving continuously, at peak
+throughput.  Some CPUs have limits on the number of fetches or prefetches in
+progress.
+
+If a special prefetch instruction doesn't exist then a plain load can be used,
+but in that case care must be taken not to attempt to read past the end of an
+operand, since that might produce a segmentation violation.
+
+Some CPUs or systems have hardware that detects sequential memory accesses and
+initiates suitable cache movements automatically, making life easy.
+
+
+@node Assembly Functional Units, Assembly Floating Point, Assembly Cache Handling, Assembly Coding
+@subsection Functional Units
+
+When choosing an approach for an assembly loop, consideration is given to
+what operations can execute simultaneously and what throughput can thereby be
+achieved.  In some cases an algorithm can be tweaked to accommodate available
+resources.
+
+Loop control will generally require a counter and pointer updates, costing as
+much as 5 instructions, plus any delays a branch introduces.  CPU addressing
+modes might reduce pointer updates, perhaps by allowing just one updating
+pointer and others expressed as offsets from it, or on CISC chips with all
+addressing done with the loop counter as a scaled index.
+
+The final loop control cost can be amortised by processing several limbs in
+each iteration (@pxref{Assembly Loop Unrolling}).  This at least ensures loop
+control isn't a big fraction the work done.
+
+Memory throughput is always a limit.  If perhaps only one load or one store
+can be done per cycle then 3 cycles/limb will the top speed for ``binary''
+operations like @code{mpn_add_n}, and any code achieving that is optimal.
+
+Integer resources can be freed up by having the loop counter in a float
+register, or by pressing the float units into use for some multiplying,
+perhaps doing every second limb on the float side (@pxref{Assembly Floating
+Point}).
+
+Float resources can be freed up by doing carry propagation on the integer
+side, or even by doing integer to float conversions in integers using bit
+twiddling.
+
+
+@node Assembly Floating Point, Assembly SIMD Instructions, Assembly Functional Units, Assembly Coding
+@subsection Floating Point
+@cindex Assembly floating Point
+
+Floating point arithmetic is used in GMP for multiplications on CPUs with poor
+integer multipliers.  It's mostly useful for @code{mpn_mul_1},
+@code{mpn_addmul_1} and @code{mpn_submul_1} on 64-bit machines, and
+@code{mpn_mul_basecase} on both 32-bit and 64-bit machines.
+
+With IEEE 53-bit double precision floats, integer multiplications producing up
+to 53 bits will give exact results.  Breaking a 64@cross{}64 multiplication
+into eight 16@cross{}@math{32@rightarrow{}48} bit pieces is convenient.  With
+some care though six 21@cross{}@math{32@rightarrow{}53} bit products can be
+used, if one of the lower two 21-bit pieces also uses the sign bit.
+
+For the @code{mpn_mul_1} family of functions on a 64-bit machine, the
+invariant single limb is split at the start, into 3 or 4 pieces.  Inside the
+loop, the bignum operand is split into 32-bit pieces.  Fast conversion of
+these unsigned 32-bit pieces to floating point is highly machine-dependent.
+In some cases, reading the data into the integer unit, zero-extending to
+64-bits, then transferring to the floating point unit back via memory is the
+only option.
+
+Converting partial products back to 64-bit limbs is usually best done as a
+signed conversion.  Since all values are smaller than @m{2^{53},2^53}, signed
+and unsigned are the same, but most processors lack unsigned conversions.
+
+@sp 2
+
+Here is a diagram showing 16@cross{}32 bit products for an @code{mpn_mul_1} or
+@code{mpn_addmul_1} with a 64-bit limb.  The single limb operand V is split
+into four 16-bit parts.  The multi-limb operand U is split in the loop into
+two 32-bit parts.
+
+@tex
+\global\newdimen\GMPbits      \global\GMPbits=0.18em
+\def\GMPbox#1#2#3{%
+  \hbox{%
+    \hbox to 128\GMPbits{\hfil
+      \vbox{%
+        \hrule
+        \hbox to 48\GMPbits {\GMPvrule \hfil$#2$\hfil \vrule}%
+        \hrule}%
+      \hskip #1\GMPbits}%
+    \raise \GMPboxdepth \hbox{\hskip 2em #3}}}
+%
+\GMPdisplay{%
+  \vbox{%
+    \hbox{%
+      \hbox to 128\GMPbits {\hfil
+        \vbox{%
+          \hrule
+          \hbox to 64\GMPbits{%
+            \GMPvrule \hfil$v48$\hfil
+            \vrule    \hfil$v32$\hfil
+            \vrule    \hfil$v16$\hfil
+            \vrule    \hfil$v00$\hfil
+            \vrule}
+          \hrule}}%
+       \raise \GMPboxdepth \hbox{\hskip 2em V Operand}}
+    \vskip 0.5ex
+    \hbox{%
+      \hbox to 128\GMPbits {\hfil
+        \raise \GMPboxdepth \hbox{$\times$\hskip 1.5em}%
+        \vbox{%
+          \hrule
+          \hbox to 64\GMPbits {%
+            \GMPvrule \hfil$u32$\hfil
+            \vrule \hfil$u00$\hfil
+            \vrule}%
+          \hrule}}%
+       \raise \GMPboxdepth \hbox{\hskip 2em U Operand (one limb)}}%
+    \vskip 0.5ex
+    \hbox{\vbox to 2ex{\hrule width 128\GMPbits}}%
+    \GMPbox{0}{u00 \times v00}{$p00$\hskip 1.5em 48-bit products}%
+    \vskip 0.5ex
+    \GMPbox{16}{u00 \times v16}{$p16$}
+    \vskip 0.5ex
+    \GMPbox{32}{u00 \times v32}{$p32$}
+    \vskip 0.5ex
+    \GMPbox{48}{u00 \times v48}{$p48$}
+    \vskip 0.5ex
+    \GMPbox{32}{u32 \times v00}{$r32$}
+    \vskip 0.5ex
+    \GMPbox{48}{u32 \times v16}{$r48$}
+    \vskip 0.5ex
+    \GMPbox{64}{u32 \times v32}{$r64$}
+    \vskip 0.5ex
+    \GMPbox{80}{u32 \times v48}{$r80$}
+}}
+@end tex
+@ifnottex
+@example
+@group
+                +---+---+---+---+
+                |v48|v32|v16|v00|    V operand
+                +---+---+---+---+
+
+                +-------+---+---+
+            x   |  u32  |  u00  |    U operand (one limb)
+                +---------------+
+
+---------------------------------
+
+                    +-----------+
+                    | u00 x v00 |    p00    48-bit products
+                    +-----------+
+                +-----------+
+                | u00 x v16 |        p16
+                +-----------+
+            +-----------+
+            | u00 x v32 |            p32
+            +-----------+
+        +-----------+
+        | u00 x v48 |                p48
+        +-----------+
+            +-----------+
+            | u32 x v00 |            r32
+            +-----------+
+        +-----------+
+        | u32 x v16 |                r48
+        +-----------+
+    +-----------+
+    | u32 x v32 |                    r64
+    +-----------+
++-----------+
+| u32 x v48 |                        r80
++-----------+
+@end group
+@end example
+@end ifnottex
+
+@math{p32} and @math{r32} can be summed using floating-point addition, and
+likewise @math{p48} and @math{r48}.  @math{p00} and @math{p16} can be summed
+with @math{r64} and @math{r80} from the previous iteration.
+
+For each loop then, four 49-bit quantities are transferred to the integer unit,
+aligned as follows,
+
+@tex
+% GMPbox here should be 49 bits wide, but use 51 to better show p16+r80'
+% crossing into the upper 64 bits.
+\def\GMPbox#1#2#3{%
+  \hbox{%
+    \hbox to 128\GMPbits {%
+      \hfil
+      \vbox{%
+        \hrule
+        \hbox to 51\GMPbits {\GMPvrule \hfil$#2$\hfil \vrule}%
+        \hrule}%
+      \hskip #1\GMPbits}%
+    \raise \GMPboxdepth \hbox{\hskip 1.5em $#3$\hfil}%
+}}
+\newbox\b \setbox\b\hbox{64 bits}%
+\newdimen\bw \bw=\wd\b \advance\bw by 2em
+\newdimen\x \x=128\GMPbits
+\advance\x by -2\bw
+\divide\x by4
+\GMPdisplay{%
+  \vbox{%
+    \hbox to 128\GMPbits {%
+      \GMPvrule
+      \raise 0.5ex \vbox{\hrule \hbox to \x {}}%
+      \hfil 64 bits\hfil
+      \raise 0.5ex \vbox{\hrule \hbox to \x {}}%
+      \vrule
+      \raise 0.5ex \vbox{\hrule \hbox to \x {}}%
+      \hfil 64 bits\hfil
+      \raise 0.5ex \vbox{\hrule \hbox to \x {}}%
+      \vrule}%
+    \vskip 0.7ex
+    \GMPbox{0}{p00+r64'}{i00}
+    \vskip 0.5ex
+    \GMPbox{16}{p16+r80'}{i16}
+    \vskip 0.5ex
+    \GMPbox{32}{p32+r32}{i32}
+    \vskip 0.5ex
+    \GMPbox{48}{p48+r48}{i48}
+}}
+@end tex
+@ifnottex
+@example
+@group
+|-----64bits----|-----64bits----|
+                   +------------+
+                   | p00 + r64' |    i00
+                   +------------+
+               +------------+
+               | p16 + r80' |        i16
+               +------------+
+           +------------+
+           | p32 + r32  |            i32
+           +------------+
+       +------------+
+       | p48 + r48  |                i48
+       +------------+
+@end group
+@end example
+@end ifnottex
+
+The challenge then is to sum these efficiently and add in a carry limb,
+generating a low 64-bit result limb and a high 33-bit carry limb (@math{i48}
+extends 33 bits into the high half).
+
+
+@node Assembly SIMD Instructions, Assembly Software Pipelining, Assembly Floating Point, Assembly Coding
+@subsection SIMD Instructions
+@cindex Assembly SIMD
+
+The single-instruction multiple-data support in current microprocessors is
+aimed at signal processing algorithms where each data point can be treated
+more or less independently.  There's generally not much support for
+propagating the sort of carries that arise in GMP.
+
+SIMD multiplications of say four 16@cross{}16 bit multiplies only do as much
+work as one 32@cross{}32 from GMP's point of view, and need some shifts and
+adds besides.  But of course if say the SIMD form is fully pipelined and uses
+less instruction decoding then it may still be worthwhile.
+
+On the x86 chips, MMX has so far found a use in @code{mpn_rshift} and
+@code{mpn_lshift}, and is used in a special case for 16-bit multipliers in the
+P55 @code{mpn_mul_1}.  SSE2 is used for Pentium 4 @code{mpn_mul_1},
+@code{mpn_addmul_1}, and @code{mpn_submul_1}.
+
+
+@node Assembly Software Pipelining, Assembly Loop Unrolling, Assembly SIMD Instructions, Assembly Coding
+@subsection Software Pipelining
+@cindex Assembly software pipelining
+
+Software pipelining consists of scheduling instructions around the branch
+point in a loop.  For example a loop might issue a load not for use in the
+present iteration but the next, thereby allowing extra cycles for the data to
+arrive from memory.
+
+Naturally this is wanted only when doing things like loads or multiplies that
+take several cycles to complete, and only where a CPU has multiple functional
+units so that other work can be done in the meantime.
+
+A pipeline with several stages will have a data value in progress at each
+stage and each loop iteration moves them along one stage.  This is like
+juggling.
+
+If the latency of some instruction is greater than the loop time then it will
+be necessary to unroll, so one register has a result ready to use while
+another (or multiple others) are still in progress.  (@pxref{Assembly Loop
+Unrolling}).
+
+
+@node Assembly Loop Unrolling, Assembly Writing Guide, Assembly Software Pipelining, Assembly Coding
+@subsection Loop Unrolling
+@cindex Assembly loop unrolling
+
+Loop unrolling consists of replicating code so that several limbs are
+processed in each loop.  At a minimum this reduces loop overheads by a
+corresponding factor, but it can also allow better register usage, for example
+alternately using one register combination and then another.  Judicious use of
+@command{m4} macros can help avoid lots of duplication in the source code.
+
+Any amount of unrolling can be handled with a loop counter that's decremented
+by @math{N} each time, stopping when the remaining count is less than the
+further @math{N} the loop will process.  Or by subtracting @math{N} at the
+start, the termination condition becomes when the counter @math{C} is less
+than 0 (and the count of remaining limbs is @math{C+N}).
+
+Alternately for a power of 2 unroll the loop count and remainder can be
+established with a shift and mask.  This is convenient if also making a
+computed jump into the middle of a large loop.
+
+The limbs not a multiple of the unrolling can be handled in various ways, for
+example
+
+@itemize @bullet
+@item
+A simple loop at the end (or the start) to process the excess.  Care will be
+wanted that it isn't too much slower than the unrolled part.
+
+@item
+A set of binary tests, for example after an 8-limb unrolling, test for 4 more
+limbs to process, then a further 2 more or not, and finally 1 more or not.
+This will probably take more code space than a simple loop.
+
+@item
+A @code{switch} statement, providing separate code for each possible excess,
+for example an 8-limb unrolling would have separate code for 0 remaining, 1
+remaining, etc, up to 7 remaining.  This might take a lot of code, but may be
+the best way to optimize all cases in combination with a deep pipelined loop.
+
+@item
+A computed jump into the middle of the loop, thus making the first iteration
+handle the excess.  This should make times smoothly increase with size, which
+is attractive, but setups for the jump and adjustments for pointers can be
+tricky and could become quite difficult in combination with deep pipelining.
+@end itemize
+
+
+@node Assembly Writing Guide,  , Assembly Loop Unrolling, Assembly Coding
+@subsection Writing Guide
+@cindex Assembly writing guide
+
+This is a guide to writing software pipelined loops for processing limb
+vectors in assembly.
+
+First determine the algorithm and which instructions are needed.  Code it
+without unrolling or scheduling, to make sure it works.  On a 3-operand CPU
+try to write each new value to a new register, this will greatly simplify later
+steps.
+
+Then note for each instruction the functional unit and/or issue port
+requirements.  If an instruction can use either of two units, like U0 or U1
+then make a category ``U0/U1''.  Count the total using each unit (or combined
+unit), and count all instructions.
+
+Figure out from those counts the best possible loop time.  The goal will be to
+find a perfect schedule where instruction latencies are completely hidden.
+The total instruction count might be the limiting factor, or perhaps a
+particular functional unit.  It might be possible to tweak the instructions to
+help the limiting factor.
+
+Suppose the loop time is @math{N}, then make @math{N} issue buckets, with the
+final loop branch at the end of the last.  Now fill the buckets with dummy
+instructions using the functional units desired.  Run this to make sure the
+intended speed is reached.
+
+Now replace the dummy instructions with the real instructions from the slow
+but correct loop you started with.  The first will typically be a load
+instruction.  Then the instruction using that value is placed in a bucket an
+appropriate distance down.  Run the loop again, to check it still runs at
+target speed.
+
+Keep placing instructions, frequently measuring the loop.  After a few you
+will need to wrap around from the last bucket back to the top of the loop.  If
+you used the new-register for new-value strategy above then there will be no
+register conflicts.  If not then take care not to clobber something already in
+use.  Changing registers at this time is very error prone.
+
+The loop will overlap two or more of the original loop iterations, and the
+computation of one vector element result will be started in one iteration of
+the new loop, and completed one or several iterations later.
+
+The final step is to create feed-in and wind-down code for the loop.  A good
+way to do this is to make a copy (or copies) of the loop at the start and
+delete those instructions which don't have valid antecedents, and at the end
+replicate and delete those whose results are unwanted (including any further
+loads).
+
+The loop will have a minimum number of limbs loaded and processed, so the
+feed-in code must test if the request size is smaller and skip either to a
+suitable part of the wind-down or to special code for small sizes.
+
+
+@node Internals, Contributors, Algorithms, Top
+@chapter Internals
+@cindex Internals
+
+@strong{This chapter is provided only for informational purposes and the
+various internals described here may change in future GMP releases.
+Applications expecting to be compatible with future releases should use only
+the documented interfaces described in previous chapters.}
+
+@menu
+* Integer Internals::
+* Rational Internals::
+* Float Internals::
+* Raw Output Internals::
+* C++ Interface Internals::
+@end menu
+
+@node Integer Internals, Rational Internals, Internals, Internals
+@section Integer Internals
+@cindex Integer internals
+
+@code{mpz_t} variables represent integers using sign and magnitude, in space
+dynamically allocated and reallocated.  The fields are as follows.
+
+@table @asis
+@item @code{_mp_size}
+The number of limbs, or the negative of that when representing a negative
+integer.  Zero is represented by @code{_mp_size} set to zero, in which case
+the @code{_mp_d} data is unused.
+
+@item @code{_mp_d}
+A pointer to an array of limbs which is the magnitude.  These are stored
+``little endian'' as per the @code{mpn} functions, so @code{_mp_d[0]} is the
+least significant limb and @code{_mp_d[ABS(_mp_size)-1]} is the most
+significant.  Whenever @code{_mp_size} is non-zero, the most significant limb
+is non-zero.
+
+Currently there's always at least one limb allocated, so for instance
+@code{mpz_set_ui} never needs to reallocate, and @code{mpz_get_ui} can fetch
+@code{_mp_d[0]} unconditionally (though its value is then only wanted if
+@code{_mp_size} is non-zero).
+
+@item @code{_mp_alloc}
+@code{_mp_alloc} is the number of limbs currently allocated at @code{_mp_d},
+and naturally @code{_mp_alloc >= ABS(_mp_size)}.  When an @code{mpz} routine
+is about to (or might be about to) increase @code{_mp_size}, it checks
+@code{_mp_alloc} to see whether there's enough space, and reallocates if not.
+@code{MPZ_REALLOC} is generally used for this.
+@end table
+
+The various bitwise logical functions like @code{mpz_and} behave as if
+negative values were twos complement.  But sign and magnitude is always used
+internally, and necessary adjustments are made during the calculations.
+Sometimes this isn't pretty, but sign and magnitude are best for other
+routines.
+
+Some internal temporary variables are setup with @code{MPZ_TMP_INIT} and these
+have @code{_mp_d} space obtained from @code{TMP_ALLOC} rather than the memory
+allocation functions.  Care is taken to ensure that these are big enough that
+no reallocation is necessary (since it would have unpredictable consequences).
+
+@code{_mp_size} and @code{_mp_alloc} are @code{int}, although @code{mp_size_t}
+is usually a @code{long}.  This is done to make the fields just 32 bits on
+some 64 bits systems, thereby saving a few bytes of data space but still
+providing plenty of range.
+
+
+@node Rational Internals, Float Internals, Integer Internals, Internals
+@section Rational Internals
+@cindex Rational internals
+
+@code{mpq_t} variables represent rationals using an @code{mpz_t} numerator and
+denominator (@pxref{Integer Internals}).
+
+The canonical form adopted is denominator positive (and non-zero), no common
+factors between numerator and denominator, and zero uniquely represented as
+0/1.
+
+It's believed that casting out common factors at each stage of a calculation
+is best in general.  A GCD is an @math{O(N^2)} operation so it's better to do
+a few small ones immediately than to delay and have to do a big one later.
+Knowing the numerator and denominator have no common factors can be used for
+example in @code{mpq_mul} to make only two cross GCDs necessary, not four.
+
+This general approach to common factors is badly sub-optimal in the presence
+of simple factorizations or little prospect for cancellation, but GMP has no
+way to know when this will occur.  As per @ref{Efficiency}, that's left to
+applications.  The @code{mpq_t} framework might still suit, with
+@code{mpq_numref} and @code{mpq_denref} for direct access to the numerator and
+denominator, or of course @code{mpz_t} variables can be used directly.
+
+
+@node Float Internals, Raw Output Internals, Rational Internals, Internals
+@section Float Internals
+@cindex Float internals
+
+Efficient calculation is the primary aim of GMP floats and the use of whole
+limbs and simple rounding facilitates this.
+
+@code{mpf_t} floats have a variable precision mantissa and a single machine
+word signed exponent.  The mantissa is represented using sign and magnitude.
+
+@c FIXME: The arrow heads don't join to the lines exactly.
+@tex
+\global\newdimen\GMPboxwidth \GMPboxwidth=5em
+\global\newdimen\GMPboxheight \GMPboxheight=3ex
+\def\centreline{\hbox{\raise 0.8ex \vbox{\hrule \hbox{\hfil}}}}
+\GMPdisplay{%
+\vbox{%
+  \hbox to 5\GMPboxwidth {most significant limb \hfil least significant limb}
+  \vskip 0.7ex
+  \def\GMPcentreline#1{\hbox{\raise 0.5 ex \vbox{\hrule \hbox to #1 {}}}}
+  \hbox {
+    \hbox to 3\GMPboxwidth {%
+      \setbox 0 = \hbox{@code{\_mp\_exp}}%
+      \dimen0=3\GMPboxwidth
+      \advance\dimen0 by -\wd0
+      \divide\dimen0 by 2
+      \advance\dimen0 by -1em
+      \setbox1 = \hbox{$\rightarrow$}%
+      \dimen1=\dimen0
+      \advance\dimen1 by -\wd1
+      \GMPcentreline{\dimen0}%
+      \hfil
+      \box0%
+      \hfil
+      \GMPcentreline{\dimen1{}}%
+      \box1}
+    \hbox to 2\GMPboxwidth {\hfil @code{\_mp\_d}}}
+  \vskip 0.5ex
+  \vbox {%
+    \hrule
+    \hbox{%
+      \vrule height 2ex depth 1ex
+      \hbox to \GMPboxwidth {}%
+      \vrule
+      \hbox to \GMPboxwidth {}%
+      \vrule
+      \hbox to \GMPboxwidth {}%
+      \vrule
+      \hbox to \GMPboxwidth {}%
+      \vrule
+      \hbox to \GMPboxwidth {}%
+      \vrule}
+    \hrule
+  }
+  \hbox {%
+    \hbox to 0.8 pt {}
+    \hbox to 3\GMPboxwidth {%
+      \hfil $\cdot$} \hbox {$\leftarrow$ radix point\hfil}}
+  \hbox to 5\GMPboxwidth{%
+    \setbox 0 = \hbox{@code{\_mp\_size}}%
+    \dimen0 = 5\GMPboxwidth
+    \advance\dimen0 by -\wd0
+    \divide\dimen0 by 2
+    \advance\dimen0 by -1em
+    \dimen1 = \dimen0
+    \setbox1 = \hbox{$\leftarrow$}%
+    \setbox2 = \hbox{$\rightarrow$}%
+    \advance\dimen0 by -\wd1
+    \advance\dimen1 by -\wd2
+    \hbox to 0.3 em {}%
+    \box1
+    \GMPcentreline{\dimen0}%
+    \hfil
+    \box0
+    \hfil
+    \GMPcentreline{\dimen1}%
+    \box2}
+}}
+@end tex
+@ifnottex
+@example
+   most                   least
+significant            significant
+   limb                   limb
+
+                            _mp_d
+ |---- _mp_exp --->           |
+  _____ _____ _____ _____ _____
+ |_____|_____|_____|_____|_____|
+                   . <------------ radix point
+
+  <-------- _mp_size --------->
+@sp 1
+@end example
+@end ifnottex
+
+@noindent
+The fields are as follows.
+
+@table @asis
+@item @code{_mp_size}
+The number of limbs currently in use, or the negative of that when
+representing a negative value.  Zero is represented by @code{_mp_size} and
+@code{_mp_exp} both set to zero, and in that case the @code{_mp_d} data is
+unused.  (In the future @code{_mp_exp} might be undefined when representing
+zero.)
+
+@item @code{_mp_prec}
+The precision of the mantissa, in limbs.  In any calculation the aim is to
+produce @code{_mp_prec} limbs of result (the most significant being non-zero).
+
+@item @code{_mp_d}
+A pointer to the array of limbs which is the absolute value of the mantissa.
+These are stored ``little endian'' as per the @code{mpn} functions, so
+@code{_mp_d[0]} is the least significant limb and
+@code{_mp_d[ABS(_mp_size)-1]} the most significant.
+
+The most significant limb is always non-zero, but there are no other
+restrictions on its value, in particular the highest 1 bit can be anywhere
+within the limb.
+
+@code{_mp_prec+1} limbs are allocated to @code{_mp_d}, the extra limb being
+for convenience (see below).  There are no reallocations during a calculation,
+only in a change of precision with @code{mpf_set_prec}.
+
+@item @code{_mp_exp}
+The exponent, in limbs, determining the location of the implied radix point.
+Zero means the radix point is just above the most significant limb.  Positive
+values mean a radix point offset towards the lower limbs and hence a value
+@math{@ge{} 1}, as for example in the diagram above.  Negative exponents mean
+a radix point further above the highest limb.
+
+Naturally the exponent can be any value, it doesn't have to fall within the
+limbs as the diagram shows, it can be a long way above or a long way below.
+Limbs other than those included in the @code{@{_mp_d,_mp_size@}} data
+are treated as zero.
+@end table
+
+The @code{_mp_size} and @code{_mp_prec} fields are @code{int}, although the
+@code{mp_size_t} type is usually a @code{long}.  The @code{_mp_exp} field is
+usually @code{long}.  This is done to make some fields just 32 bits on some 64
+bits systems, thereby saving a few bytes of data space but still providing
+plenty of precision and a very large range.
+
+
+@sp 1
+@noindent
+The following various points should be noted.
+
+@table @asis
+@item Low Zeros
+The least significant limbs @code{_mp_d[0]} etc can be zero, though such low
+zeros can always be ignored.  Routines likely to produce low zeros check and
+avoid them to save time in subsequent calculations, but for most routines
+they're quite unlikely and aren't checked.
+
+@item Mantissa Size Range
+The @code{_mp_size} count of limbs in use can be less than @code{_mp_prec} if
+the value can be represented in less.  This means low precision values or
+small integers stored in a high precision @code{mpf_t} can still be operated
+on efficiently.
+
+@code{_mp_size} can also be greater than @code{_mp_prec}.  Firstly a value is
+allowed to use all of the @code{_mp_prec+1} limbs available at @code{_mp_d},
+and secondly when @code{mpf_set_prec_raw} lowers @code{_mp_prec} it leaves
+@code{_mp_size} unchanged and so the size can be arbitrarily bigger than
+@code{_mp_prec}.
+
+@item Rounding
+All rounding is done on limb boundaries.  Calculating @code{_mp_prec} limbs
+with the high non-zero will ensure the application requested minimum precision
+is obtained.
+
+The use of simple ``trunc'' rounding towards zero is efficient, since there's
+no need to examine extra limbs and increment or decrement.
+
+@item Bit Shifts
+Since the exponent is in limbs, there are no bit shifts in basic operations
+like @code{mpf_add} and @code{mpf_mul}.  When differing exponents are
+encountered all that's needed is to adjust pointers to line up the relevant
+limbs.
+
+Of course @code{mpf_mul_2exp} and @code{mpf_div_2exp} will require bit shifts,
+but the choice is between an exponent in limbs which requires shifts there, or
+one in bits which requires them almost everywhere else.
+
+@item Use of @code{_mp_prec+1} Limbs
+The extra limb on @code{_mp_d} (@code{_mp_prec+1} rather than just
+@code{_mp_prec}) helps when an @code{mpf} routine might get a carry from its
+operation.  @code{mpf_add} for instance will do an @code{mpn_add} of
+@code{_mp_prec} limbs.  If there's no carry then that's the result, but if
+there is a carry then it's stored in the extra limb of space and
+@code{_mp_size} becomes @code{_mp_prec+1}.
+
+Whenever @code{_mp_prec+1} limbs are held in a variable, the low limb is not
+needed for the intended precision, only the @code{_mp_prec} high limbs.  But
+zeroing it out or moving the rest down is unnecessary.  Subsequent routines
+reading the value will simply take the high limbs they need, and this will be
+@code{_mp_prec} if their target has that same precision.  This is no more than
+a pointer adjustment, and must be checked anyway since the destination
+precision can be different from the sources.
+
+Copy functions like @code{mpf_set} will retain a full @code{_mp_prec+1} limbs
+if available.  This ensures that a variable which has @code{_mp_size} equal to
+@code{_mp_prec+1} will get its full exact value copied.  Strictly speaking
+this is unnecessary since only @code{_mp_prec} limbs are needed for the
+application's requested precision, but it's considered that an @code{mpf_set}
+from one variable into another of the same precision ought to produce an exact
+copy.
+
+@item Application Precisions
+@code{__GMPF_BITS_TO_PREC} converts an application requested precision to an
+@code{_mp_prec}.  The value in bits is rounded up to a whole limb then an
+extra limb is added since the most significant limb of @code{_mp_d} is only
+non-zero and therefore might contain only one bit.
+
+@code{__GMPF_PREC_TO_BITS} does the reverse conversion, and removes the extra
+limb from @code{_mp_prec} before converting to bits.  The net effect of
+reading back with @code{mpf_get_prec} is simply the precision rounded up to a
+multiple of @code{mp_bits_per_limb}.
+
+Note that the extra limb added here for the high only being non-zero is in
+addition to the extra limb allocated to @code{_mp_d}.  For example with a
+32-bit limb, an application request for 250 bits will be rounded up to 8
+limbs, then an extra added for the high being only non-zero, giving an
+@code{_mp_prec} of 9.  @code{_mp_d} then gets 10 limbs allocated.  Reading
+back with @code{mpf_get_prec} will take @code{_mp_prec} subtract 1 limb and
+multiply by 32, giving 256 bits.
+
+Strictly speaking, the fact the high limb has at least one bit means that a
+float with, say, 3 limbs of 32-bits each will be holding at least 65 bits, but
+for the purposes of @code{mpf_t} it's considered simply to be 64 bits, a nice
+multiple of the limb size.
+@end table
+
+
+@node Raw Output Internals, C++ Interface Internals, Float Internals, Internals
+@section Raw Output Internals
+@cindex Raw output internals
+
+@noindent
+@code{mpz_out_raw} uses the following format.
+
+@tex
+\global\newdimen\GMPboxwidth \GMPboxwidth=5em
+\global\newdimen\GMPboxheight \GMPboxheight=3ex
+\def\centreline{\hbox{\raise 0.8ex \vbox{\hrule \hbox{\hfil}}}}
+\GMPdisplay{%
+\vbox{%
+  \def\GMPcentreline#1{\hbox{\raise 0.5 ex \vbox{\hrule \hbox to #1 {}}}}
+  \vbox {%
+    \hrule
+    \hbox{%
+      \vrule height 2.5ex depth 1.5ex
+      \hbox to \GMPboxwidth {\hfil size\hfil}%
+      \vrule
+      \hbox to 3\GMPboxwidth {\hfil data bytes\hfil}%
+      \vrule}
+    \hrule}
+}}
+@end tex
+@ifnottex
+@example
++------+------------------------+
+| size |       data bytes       |
++------+------------------------+
+@end example
+@end ifnottex
+
+The size is 4 bytes written most significant byte first, being the number of
+subsequent data bytes, or the twos complement negative of that when a negative
+integer is represented.  The data bytes are the absolute value of the integer,
+written most significant byte first.
+
+The most significant data byte is always non-zero, so the output is the same
+on all systems, irrespective of limb size.
+
+In GMP 1, leading zero bytes were written to pad the data bytes to a multiple
+of the limb size.  @code{mpz_inp_raw} will still accept this, for
+compatibility.
+
+The use of ``big endian'' for both the size and data fields is deliberate, it
+makes the data easy to read in a hex dump of a file.  Unfortunately it also
+means that the limb data must be reversed when reading or writing, so neither
+a big endian nor little endian system can just read and write @code{_mp_d}.
+
+
+@node C++ Interface Internals,  , Raw Output Internals, Internals
+@section C++ Interface Internals
+@cindex C++ interface internals
+
+A system of expression templates is used to ensure something like @code{a=b+c}
+turns into a simple call to @code{mpz_add} etc.  For @code{mpf_class}
+the scheme also ensures the precision of the final
+destination is used for any temporaries within a statement like
+@code{f=w*x+y*z}.  These are important features which a naive implementation
+cannot provide.
+
+A simplified description of the scheme follows.  The true scheme is
+complicated by the fact that expressions have different return types.  For
+detailed information, refer to the source code.
+
+To perform an operation, say, addition, we first define a ``function object''
+evaluating it,
+
+@example
+struct __gmp_binary_plus
+@{
+  static void eval(mpf_t f, mpf_t g, mpf_t h) @{ mpf_add(f, g, h); @}
+@};
+@end example
+
+@noindent
+And an ``additive expression'' object,
+
+@example
+__gmp_expr<__gmp_binary_expr<mpf_class, mpf_class, __gmp_binary_plus> >
+operator+(const mpf_class &f, const mpf_class &g)
+@{
+  return __gmp_expr
+    <__gmp_binary_expr<mpf_class, mpf_class, __gmp_binary_plus> >(f, g);
+@}
+@end example
+
+The seemingly redundant @code{__gmp_expr<__gmp_binary_expr<@dots{}>>} is used to
+encapsulate any possible kind of expression into a single template type.  In
+fact even @code{mpf_class} etc are @code{typedef} specializations of
+@code{__gmp_expr}.
+
+Next we define assignment of @code{__gmp_expr} to @code{mpf_class}.
+
+@example
+template <class T>
+mpf_class & mpf_class::operator=(const __gmp_expr<T> &expr)
+@{
+  expr.eval(this->get_mpf_t(), this->precision());
+  return *this;
+@}
+
+template <class Op>
+void __gmp_expr<__gmp_binary_expr<mpf_class, mpf_class, Op> >::eval
+(mpf_t f, mp_bitcnt_t precision)
+@{
+  Op::eval(f, expr.val1.get_mpf_t(), expr.val2.get_mpf_t());
+@}
+@end example
+
+where @code{expr.val1} and @code{expr.val2} are references to the expression's
+operands (here @code{expr} is the @code{__gmp_binary_expr} stored within the
+@code{__gmp_expr}).
+
+This way, the expression is actually evaluated only at the time of assignment,
+when the required precision (that of @code{f}) is known.  Furthermore the
+target @code{mpf_t} is now available, thus we can call @code{mpf_add} directly
+with @code{f} as the output argument.
+
+Compound expressions are handled by defining operators taking subexpressions
+as their arguments, like this:
+
+@example
+template <class T, class U>
+__gmp_expr
+<__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, __gmp_binary_plus> >
+operator+(const __gmp_expr<T> &expr1, const __gmp_expr<U> &expr2)
+@{
+  return __gmp_expr
+    <__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, __gmp_binary_plus> >
+    (expr1, expr2);
+@}
+@end example
+
+And the corresponding specializations of @code{__gmp_expr::eval}:
+
+@example
+template <class T, class U, class Op>
+void __gmp_expr
+<__gmp_binary_expr<__gmp_expr<T>, __gmp_expr<U>, Op> >::eval
+(mpf_t f, mp_bitcnt_t precision)
+@{
+  // declare two temporaries
+  mpf_class temp1(expr.val1, precision), temp2(expr.val2, precision);
+  Op::eval(f, temp1.get_mpf_t(), temp2.get_mpf_t());
+@}
+@end example
+
+The expression is thus recursively evaluated to any level of complexity and
+all subexpressions are evaluated to the precision of @code{f}.
+
+
+@node Contributors, References, Internals, Top
+@comment  node-name,  next,  previous,  up
+@appendix Contributors
+@cindex Contributors
+
+Torbj@"orn Granlund wrote the original GMP library and is still the main
+developer.  Code not explicitly attributed to others, was contributed by
+Torbj@"orn.  Several other individuals and organizations have contributed
+GMP.  Here is a list in chronological order on first contribution:
+
+Gunnar Sj@"odin and Hans Riesel helped with mathematical problems in early
+versions of the library.
+
+Richard Stallman helped with the interface design and revised the first
+version of this manual.
+
+Brian Beuning and Doug Lea helped with testing of early versions of the
+library and made creative suggestions.
+
+John Amanatides of York University in Canada contributed the function
+@code{mpz_probab_prime_p}.
+
+Paul Zimmermann wrote the REDC-based mpz_powm code, the Sch@"onhage-Strassen
+FFT multiply code, and the Karatsuba square root code.  He also improved the
+Toom3 code for GMP 4.2.  Paul sparked the development of GMP 2, with his
+comparisons between bignum packages.  The ECMNET project Paul is organizing
+was a driving force behind many of the optimizations in GMP 3.  Paul also
+wrote the new GMP 4.3 nth root code (with Torbj@"orn).
+
+Ken Weber (Kent State University, Universidade Federal do Rio Grande do Sul)
+contributed now defunct versions of @code{mpz_gcd}, @code{mpz_divexact},
+@code{mpn_gcd}, and @code{mpn_bdivmod}, partially supported by CNPq (Brazil)
+grant 301314194-2.
+
+Per Bothner of Cygnus Support helped to set up GMP to use Cygnus' configure.
+He has also made valuable suggestions and tested numerous intermediary
+releases.
+
+Joachim Hollman was involved in the design of the @code{mpf} interface, and in
+the @code{mpz} design revisions for version 2.
+
+Bennet Yee contributed the initial versions of @code{mpz_jacobi} and
+@code{mpz_legendre}.
+
+Andreas Schwab contributed the files @file{mpn/m68k/lshift.S} and
+@file{mpn/m68k/rshift.S} (now in @file{.asm} form).
+
+Robert Harley of Inria, France and David Seal of ARM, England, suggested clever
+improvements for population count.  Robert also wrote highly optimized
+Karatsuba and 3-way Toom multiplication functions for GMP 3, and contributed
+the ARM assembly code.
+
+Torsten Ekedahl of the Mathematical department of Stockholm University provided
+significant inspiration during several phases of the GMP development.  His
+mathematical expertise helped improve several algorithms.
+
+Linus Nordberg wrote the new configure system based on autoconf and
+implemented the new random functions.
+
+Kevin Ryde worked on a large number of things: optimized x86 code, m4 asm
+macros, parameter tuning, speed measuring, the configure system, function
+inlining, divisibility tests, bit scanning, Jacobi symbols, Fibonacci and Lucas
+number functions, printf and scanf functions, perl interface, demo expression
+parser, the algorithms chapter in the manual, @file{gmpasm-mode.el}, and
+various miscellaneous improvements elsewhere.
+
+Kent Boortz made the Mac OS 9 port.
+
+Steve Root helped write the optimized alpha 21264 assembly code.
+
+Gerardo Ballabio wrote the @file{gmpxx.h} C++ class interface and the C++
+@code{istream} input routines.
+
+Jason Moxham rewrote @code{mpz_fac_ui}.
+
+Pedro Gimeno implemented the Mersenne Twister and made other random number
+improvements.
+
+Niels M@"oller wrote the sub-quadratic GCD and extended GCD code, the
+quadratic Hensel division code, and (with Torbj@"orn) the new divide and
+conquer division code for GMP 4.3.  Niels also helped implement the new Toom
+multiply code for GMP 4.3 and implemented helper functions to simplify Toom
+evaluations for GMP 5.0.  He wrote the original version of mpn_mulmod_bnm1.
+
+Alberto Zanoni and Marco Bodrato suggested the unbalanced multiply strategy,
+and found the optimal strategies for evaluation and interpolation in Toom
+multiplication.
+
+Marco Bodrato helped implement the new Toom multiply code for GMP 4.3 and
+implemented most of the new Toom multiply and squaring code for 5.0.
+He is the main author of the current mpn_mulmod_bnm1 and mpn_mullo_n.  Marco
+also wrote the functions mpn_invert and mpn_invertappr.
+
+David Harvey suggested the internal function @code{mpn_bdiv_dbm1}, implementing
+division relevant to Toom multiplication.  He also worked on fast assembly
+sequences, in particular on a fast AMD64 @code{mpn_mul_basecase}.
+
+Martin Boij wrote @code{mpn_perfect_power_p}.
+
+(This list is chronological, not ordered after significance.  If you have
+contributed to GMP but are not listed above, please tell
+@email{gmp-devel@@gmplib.org} about the omission!)
+
+The development of floating point functions of GNU MP 2, were supported in part
+by the ESPRIT-BRA (Basic Research Activities) 6846 project POSSO (POlynomial
+System SOlving).
+
+The development of GMP 2, 3, and 4 was supported in part by the IDA Center for
+Computing Sciences.
+
+Thanks go to Hans Thorsen for donating an SGI system for the GMP test system
+environment.
+
+@node References, GNU Free Documentation License, Contributors, Top
+@comment  node-name,  next,  previous,  up
+@appendix References
+@cindex References
+
+@c  FIXME: In tex, the @uref's are unhyphenated, which is good for clarity,
+@c  but being long words they upset paragraph formatting (the preceding line
+@c  can get badly stretched).  Would like an conditional @* style line break
+@c  if the uref is too long to fit on the last line of the paragraph, but it's
+@c  not clear how to do that.  For now explicit @texlinebreak{}s are used on
+@c  paragraphs that come out bad.
+
+@section Books
+
+@itemize @bullet
+@item
+Jonathan M. Borwein and Peter B. Borwein, ``Pi and the AGM: A Study in
+Analytic Number Theory and Computational Complexity'', Wiley, 1998.
+
+@item
+Richard Crandall and Carl Pomerance, ``Prime Numbers: A Computational
+Perspective'', 2nd edition, Springer-Verlag, 2005.
+@texlinebreak{} @uref{http://www.math.dartmouth.edu/~carlp/}
+
+@item
+Henri Cohen, ``A Course in Computational Algebraic Number Theory'', Graduate
+Texts in Mathematics number 138, Springer-Verlag, 1993.
+@texlinebreak{} @uref{http://www.math.u-bordeaux.fr/~cohen/}
+
+@item
+Donald E. Knuth, ``The Art of Computer Programming'', volume 2,
+``Seminumerical Algorithms'', 3rd edition, Addison-Wesley, 1998.
+@texlinebreak{} @uref{http://www-cs-faculty.stanford.edu/~knuth/taocp.html}
+
+@item
+John D. Lipson, ``Elements of Algebra and Algebraic Computing'',
+The Benjamin Cummings Publishing Company Inc, 1981.
+
+@item
+Alfred J. Menezes, Paul C. van Oorschot and Scott A. Vanstone, ``Handbook of
+Applied Cryptography'', @uref{http://www.cacr.math.uwaterloo.ca/hac/}
+
+@item
+Richard M. Stallman and the GCC Developer Community, ``Using the GNU Compiler
+Collection'', Free Software Foundation, 2008, available online
+@uref{http://gcc.gnu.org/onlinedocs/}, and in the GCC package
+@uref{ftp://ftp.gnu.org/gnu/gcc/}
+@end itemize
+
+@section Papers
+
+@itemize @bullet
+@item
+Yves Bertot, Nicolas Magaud and Paul Zimmermann, ``A Proof of GMP Square
+Root'', Journal of Automated Reasoning, volume 29, 2002, pp.@: 225-252.  Also
+available online as INRIA Research Report 4475, June 2002,
+@uref{http://hal.inria.fr/docs/00/07/21/13/PDF/RR-4475.pdf}
+
+@item
+Christoph Burnikel and Joachim Ziegler, ``Fast Recursive Division'',
+Max-Planck-Institut fuer Informatik Research Report MPI-I-98-1-022,
+@texlinebreak{} @uref{http://data.mpi-sb.mpg.de/internet/reports.nsf/NumberView/1998-1-022}
+
+@item
+Torbj@"orn Granlund and Peter L. Montgomery, ``Division by Invariant Integers
+using Multiplication'', in Proceedings of the SIGPLAN PLDI'94 Conference, June
+1994.  Also available @uref{http://gmplib.org/~tege/divcnst-pldi94.pdf}.
+
+@item
+Niels M@"oller and Torbj@"orn Granlund, ``Improved division by invariant
+integers'', IEEE Transactions on Computers, 11 June 2010.
+@uref{http://gmplib.org/~tege/division-paper.pdf}
+
+@item
+Torbj@"orn Granlund and Niels M@"oller, ``Division of integers large and
+small'', to appear.
+
+@item
+Tudor Jebelean,
+``An algorithm for exact division'',
+Journal of Symbolic Computation,
+volume 15, 1993, pp.@: 169-180.
+Research report version available @texlinebreak{}
+@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-35.ps.gz}
+
+@item
+Tudor Jebelean, ``Exact Division with Karatsuba Complexity - Extended
+Abstract'', RISC-Linz technical report 96-31, @texlinebreak{}
+@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1996/96-31.ps.gz}
+
+@item
+Tudor Jebelean, ``Practical Integer Division with Karatsuba Complexity'',
+ISSAC 97, pp.@: 339-341.  Technical report available @texlinebreak{}
+@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1996/96-29.ps.gz}
+
+@item
+Tudor Jebelean, ``A Generalization of the Binary GCD Algorithm'', ISSAC 93,
+pp.@: 111-116.  Technical report version available @texlinebreak{}
+@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1993/93-01.ps.gz}
+
+@item
+Tudor Jebelean, ``A Double-Digit Lehmer-Euclid Algorithm for Finding the GCD
+of Long Integers'', Journal of Symbolic Computation, volume 19, 1995,
+pp.@: 145-157.  Technical report version also available @texlinebreak{}
+@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz}
+
+@item
+Werner Krandick and Tudor Jebelean, ``Bidirectional Exact Integer Division'',
+Journal of Symbolic Computation, volume 21, 1996, pp.@: 441-455.  Early
+technical report version also available
+@uref{ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-50.ps.gz}
+
+@item
+Makoto Matsumoto and Takuji Nishimura, ``Mersenne Twister: A 623-dimensionally
+equidistributed uniform pseudorandom number generator'', ACM Transactions on
+Modelling and Computer Simulation, volume 8, January 1998, pp.@: 3-30.
+Available online @texlinebreak{}
+@uref{http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.ps.gz} (or .pdf)
+
+@item
+R. Moenck and A. Borodin, ``Fast Modular Transforms via Division'',
+Proceedings of the 13th Annual IEEE Symposium on Switching and Automata
+Theory, October 1972, pp.@: 90-96.  Reprinted as ``Fast Modular Transforms'',
+Journal of Computer and System Sciences, volume 8, number 3, June 1974,
+pp.@: 366-386.
+
+@item
+Niels M@"oller, ``On Sch@"onhage's algorithm and subquadratic integer GCD
+  computation'', in Mathematics of Computation, volume 77, January 2008, pp.@:
+  589-607.
+
+@item
+Peter L. Montgomery, ``Modular Multiplication Without Trial Division'', in
+Mathematics of Computation, volume 44, number 170, April 1985.
+
+@item
+Arnold Sch@"onhage and Volker Strassen, ``Schnelle Multiplikation grosser
+Zahlen'', Computing 7, 1971, pp.@: 281-292.
+
+@item
+Kenneth Weber, ``The accelerated integer GCD algorithm'',
+ACM Transactions on Mathematical Software,
+volume 21, number 1, March 1995, pp.@: 111-122.
+
+@item
+Paul Zimmermann, ``Karatsuba Square Root'', INRIA Research Report 3805,
+November 1999, @uref{http://hal.inria.fr/inria-00072854/PDF/RR-3805.pdf}
+
+@item
+Paul Zimmermann, ``A Proof of GMP Fast Division and Square Root
+Implementations'', @texlinebreak{}
+@uref{http://www.loria.fr/~zimmerma/papers/proof-div-sqrt.ps.gz}
+
+@item
+Dan Zuras, ``On Squaring and Multiplying Large Integers'', ARITH-11: IEEE
+Symposium on Computer Arithmetic, 1993, pp.@: 260 to 271.  Reprinted as ``More
+on Multiplying and Squaring Large Integers'', IEEE Transactions on Computers,
+volume 43, number 8, August 1994, pp.@: 899-908.
+@end itemize
+
+
+@node GNU Free Documentation License, Concept Index, References, Top
+@appendix GNU Free Documentation License
+@cindex GNU Free Documentation License
+@cindex Free Documentation License
+@cindex Documentation license
+@include fdl-1.3.texi
+
+
+@node Concept Index, Function Index, GNU Free Documentation License, Top
+@comment  node-name,  next,  previous,  up
+@unnumbered Concept Index
+@printindex cp
+
+@node Function Index,  , Concept Index, Top
+@comment  node-name,  next,  previous,  up
+@unnumbered Function and Type Index
+@printindex fn
+
+@bye
+
+@c Local variables:
+@c fill-column: 78
+@c compile-command: "make gmp.info"
+@c End:
diff --git a/doc/isa_abi_headache b/doc/isa_abi_headache

new file mode 100644 (file)

index 0000000..753589d
--- /dev/null
+++ b/doc/isa_abi_headache
@@ -0,0 +1,117 @@
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+Terms Used In This Document:
+  ISA = Instruction Set Architecture.   The instructions the current
+        processor provides.
+  ABI = Application Binary Interface.  Specifies calling convention,
+        type sizes, etc.
+  AR64 = Arithmetic operations are 64-bit using 64-bit instructions
+        (E.g., addition, subtraction, load, store, of 64-bit integer types
+        are done with single instructions, not 32 bits at a time.)
+  Environment = The operating system and compiler.
+
+GMP is a very complex package to build since its speed is very
+sensitive to the ISA and ABI.  For example, if the ISA provides 64-bit
+instructions, it is crucial that GMP is configured to use them.
+
+Most environments that run on a 64-bit ISA provide more than one ABI.
+Typically one of the supported ABI's is a backward compatible 32-bit
+ABI, and one ABI provides 64-bit addressing and `long' (sometimes
+known as LP64).  But a few environments (IRIX, HP-UX) provide
+intermediate ABI's using 32-bit addressing but allow efficient 64-bit
+operations through a `long long' type.  For the latter to be useful to
+GMP, the ABI must allow operations using the native 64-bit
+instructions provided by the ISA, and allow passing of 64-bit
+quantities atomically.
+
+The ABI is typically chosen by means of command line options to the
+compiler tools (gcc, cc, c89, nm, ar, ld, as).  Different environments
+use different defaults, but as of this writing (May 2000) the
+dominating default is to the plain 32-bit ABI in its most arcane form.
+
+The GMP 3.0.x approach was to compile using the ABI that gives the
+best performance.  That places the burden on users to pass special
+options to the compiler when they compile their GMP applications.
+That approach has its advantages and disadvantages.  The main
+advantage is that users don't unknowingly get bad GMP performance.
+The main disadvantage is that users' compiles (actually links) will
+fail unless they pass special compiler options.
+
+** SPARC
+
+System vendors often confuse ABI, ISA, and implementation.  The worst
+case is Solaris, were the unbundled compiler confuses ISA and ABI, and
+the options have very confusing names.
+
+     option            interpretation
+     ======            ==============
+cc   -xarch=v8plus     ISA=sparcv9, ABI=V8plus (PTR=32, see below)
+gcc  -mv8plus          ISA=sparcv9, ABI=V8plus (see below)
+cc   -xarch=v9         ISA=sparcv9, ABI=V9 (implying AR=64, PTR=64)
+
+It's hard to believe, but the option v8plus really means ISA=V9!
+
+Solaris releases prior to version 7 running on a V9 CPU fails to
+save/restore the upper 32 bits of the `i' and `l' registers.  The
+`v8plus' option generates code that use as many V9 features as
+possible under such circumstances.
+
+** MIPS
+
+The IRIX 6 compilers gets things right.  They have a clear
+understanding of the differences between ABI and ISA.  The option
+names are descriptive.
+
+     option            interpretation
+     ======            ==============
+cc   -n32              ABI=n32 (implying AR=64, PTR=32)
+gcc  -mabi=n32         ABI=n32 (implying AR=64, PTR=32)
+cc   -64               ABI=64 (implying AR=64, PTR=64)
+gcc  -mabi=64          ABI=64 (implying AR=64, PTR=64)
+cc   -mips3            ISA=mips3
+gcc  -mips3            ISA=mips3
+cc   -mips4            ISA=mips4
+gcc  -mips4            ISA=mips4
+
+** HP-PA
+
+HP-UX is somewhat weird, but not as broken as Solaris.
+
+     option            interpretation
+     ======            ==============
+cc   +DA2.0            ABI=32bit (implying AR=64, PTR=32)
+cc   +DD64             ABI=64bit (implying AR=64, PTR=64)
+
+Code performing 64-bit arithmetic in the HP-UX 32-bit is not
+compatible with the 64-bit ABI; the former has a calling convention
+that passes/returns 64-bit integer quantities as two 32-bit chunks.
+
+** PowerPC
+
+While the PowerPC ABI's are capable of supporting 64-bit
+registers/operations, the compilers under AIX are similar to Solaris'
+cc in that they don't currently provide any 32-bit addressing with
+64-bit arithmetic.
+
+     option                    interpretation
+     ======                    ==============
+cc   -q64                      ABI=64bit (implying AR=64, PTR=64)
+gcc  -maix64 -mpowerpc64       ABI=64bit (implying AR=64, PTR=64)
diff --git a/doc/mdate-sh b/doc/mdate-sh

new file mode 100755 (executable)

index 0000000..8941738
--- /dev/null
+++ b/doc/mdate-sh
@@ -0,0 +1,176 @@
+#!/bin/sh
+# Get modification time of a file or directory and pretty-print it.
+
+scriptversion=2003-11-09.00
+
+# Copyright (C) 1995, 1996, 1997, 2003  Free Software Foundation, Inc.
+# written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, June 1995
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+case $1 in
+  '')
+     echo "$0: No file.  Try \`$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: mdate-sh [--help] [--version] FILE
+
+Pretty-print the modification time of FILE.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit 0
+    ;;
+  -v | --v*)
+    echo "mdate-sh $scriptversion"
+    exit 0
+    ;;
+esac
+
+# Prevent date giving response in another language.
+LANG=C
+export LANG
+LC_ALL=C
+export LC_ALL
+LC_TIME=C
+export LC_TIME
+
+# GNU ls changes its time format in response to the TIME_STYLE variable, but
+# we cannot unset it since the V7 shell did not have an "unset" command.
+# The documentation says that the default is "posix-long-iso".
+#
+test "${TIME_STYLE+set}" = set && TIME_STYLE=posix-long-iso
+
+save_arg1="$1"
+
+# Find out how to get the extended ls output of a file or directory.
+if ls -L /dev/null 1>/dev/null 2>&1; then
+  ls_command='ls -L -l -d'
+else
+  ls_command='ls -l -d'
+fi
+
+# A `ls -l' line looks as follows on OS/2.
+#  drwxrwx---        0 Aug 11  2001 foo
+# This differs from Unix, which adds ownership information.
+#  drwxrwx---   2 root  root      4096 Aug 11  2001 foo
+#
+# To find the date, we split the line on spaces and iterate on words
+# until we find a month.  This cannot work with files whose owner is a
+# user named `Jan', or `Feb', etc.  However, it's unlikely that `/'
+# will be owned by a user whose name is a month.  So we first look at
+# the extended ls output of the root directory to decide how many
+# words should be skipped to get the date.
+
+# On HPUX /bin/sh, "set" interprets "-rw-r--r--" as options, so the "x" below.
+set - x`$ls_command /`
+
+# Find which argument is the month.
+month=
+command=
+until test $month
+do
+  shift
+  # Add another shift to the command.
+  command="$command shift;"
+  case $1 in
+    Jan) month=January; nummonth=1;;
+    Feb) month=February; nummonth=2;;
+    Mar) month=March; nummonth=3;;
+    Apr) month=April; nummonth=4;;
+    May) month=May; nummonth=5;;
+    Jun) month=June; nummonth=6;;
+    Jul) month=July; nummonth=7;;
+    Aug) month=August; nummonth=8;;
+    Sep) month=September; nummonth=9;;
+    Oct) month=October; nummonth=10;;
+    Nov) month=November; nummonth=11;;
+    Dec) month=December; nummonth=12;;
+  esac
+done
+
+# Get the extended ls output of the file or directory.
+set - x`eval "$ls_command \"\$save_arg1\""`
+
+# Remove all preceding arguments
+eval $command
+
+# Get the month.  Next argument is day, followed by the year or time.
+case $1 in
+  Jan) month=January; nummonth=1;;
+  Feb) month=February; nummonth=2;;
+  Mar) month=March; nummonth=3;;
+  Apr) month=April; nummonth=4;;
+  May) month=May; nummonth=5;;
+  Jun) month=June; nummonth=6;;
+  Jul) month=July; nummonth=7;;
+  Aug) month=August; nummonth=8;;
+  Sep) month=September; nummonth=9;;
+  Oct) month=October; nummonth=10;;
+  Nov) month=November; nummonth=11;;
+  Dec) month=December; nummonth=12;;
+esac
+
+day=$2
+
+# Here we have to deal with the problem that the ls output gives either
+# the time of day or the year.
+case $3 in
+  *:*) set `date`; eval year=\$$#
+       case $2 in
+        Jan) nummonthtod=1;;
+        Feb) nummonthtod=2;;
+        Mar) nummonthtod=3;;
+        Apr) nummonthtod=4;;
+        May) nummonthtod=5;;
+        Jun) nummonthtod=6;;
+        Jul) nummonthtod=7;;
+        Aug) nummonthtod=8;;
+        Sep) nummonthtod=9;;
+        Oct) nummonthtod=10;;
+        Nov) nummonthtod=11;;
+        Dec) nummonthtod=12;;
+       esac
+       # For the first six month of the year the time notation can also
+       # be used for files modified in the last year.
+       if (expr $nummonth \> $nummonthtod) > /dev/null;
+       then
+        year=`expr $year - 1`
+       fi;;
+  *) year=$3;;
+esac
+
+# The result.
+echo $day $month $year
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/doc/projects.html b/doc/projects.html

new file mode 100644 (file)

index 0000000..79e5aa2
--- /dev/null
+++ b/doc/projects.html
@@ -0,0 +1,606 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <title>GMP Development Projects</title>
+  <link rel="shortcut icon" href="favicon.ico">
+  <link rel="stylesheet" href="gmp.css">
+  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+
+<center>
+  <h1>
+    GMP Development Projects
+  </h1>
+</center>
+
+<font size=-1>
+<pre>
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+</pre>
+</font>
+
+<hr>
+<!-- NB. timestamp updated automatically by emacs -->
+  This file current as of 15 Nov 2009.  An up-to-date version is available at
+  <a href="http://gmplib.org/projects.html">http://gmplib.org/projects.html</a>.
+  Please send comments about this page to gmp-devel<font>@</font>gmplib.org.
+
+<p> This file lists projects suitable for volunteers.  Please see the
+    <a href="tasks.html">tasks file</a> for smaller tasks.
+
+<p> If you want to work on any of the projects below, please let
+    gmp-devel<font>@</font>gmplib.org know.  If you want to help with a project
+    that already somebody else is working on, you will get in touch through
+    gmp-devel<font>@</font>gmplib.org.  (There are no email addresses of
+    volunteers below, due to spamming problems.)
+
+<ul>
+<li> <strong>Faster multiplication</strong>
+
+  <p> The current multiplication code uses Karatsuba, 3-way and 4-way Toom, and
+      Fermat FFT.  Several new developments are desirable:
+
+  <ol>
+
+    <li> Write more toom multiply functions for unbalanced operands.  We now have
+        toom22, toom32, toom42, toom62, toom33, toom53, and toom44.  Most
+        desirable is toom43, which will require a new toom_interpolate_6pts
+        function.  Writing toom52 will then be straightforward.  See also
+        <a href="http://bodrato.it/software/toom.html">Marco Bodrato's
+        site</a>
+
+    <li> Perhaps consider N-way Toom, N > 4.  See Knuth's Seminumerical
+        Algorithms for details on the method, as well as Bodrato's site.  Code
+        implementing it exists.  This is asymptotically inferior to FFTs, but
+        is finer grained.
+
+    <li> The mpn_mul call now (from GMP 4.3) uses toom22, toom32, and toom42
+        for unbalanced operations.  We don't use any of the other new toom
+        functions currently.  Write new clever code for choosing the best toom
+        function from an m-limb and an n-limb operand.
+
+    <li> Implement an FFT variant computing the coefficients mod m different
+        limb size primes of the form l*2^k+1. i.e., compute m separate FFTs.
+        The wanted coefficients will at the end be found by lifting with CRT
+        (Chinese Remainder Theorem).  If we let m = 3, i.e., use 3 primes, we
+        can split the operands into coefficients at limb boundaries, and if
+        our machine uses b-bit limbs, we can multiply numbers with close to
+        2^b limbs without coefficient overflow.  For smaller multiplication,
+        we might perhaps let m = 1, and instead of splitting our operands at
+        limb boundaries, split them in much smaller pieces.  We might also use
+        4 or more primes, and split operands into bigger than b-bit chunks.
+        By using more primes, the gain in shorter transform length, but lose
+        in having to do more FFTs, but that is a slight total save.  We then
+        lose in more expensive CRT. <br><br>
+
+        <p> [We now have two implementations of this algorithm, one by Tommy
+        Färnqvist and one by Niels Möller.]
+
+    <li> Add support for short products, either a given number of low limbs, a
+        given number of high limbs, or perhaps the middle limbs of the result.
+        High short product can be used by <code>mpf_mul</code>, by
+        left-to-right Newton approximations, and for quotient approximation.
+        Low half short product can be of use in sub-quadratic REDC and for
+        right-to-left Newton approximations.  On small sizes a short product
+        will be faster simply through fewer cross-products, similar to the way
+        squaring is faster.  But work by Thom Mulders shows that for Karatsuba
+        and higher order algorithms the advantage is progressively lost, so
+        for large sizes shows products turn out to be no faster.
+
+  </ol>
+
+  <p> Another possibility would be an optimized cube.  In the basecase that
+      should definitely be able to save cross-products in a similar fashion to
+      squaring, but some investigation might be needed for how best to adapt
+      the higher-order algorithms.  Not sure whether cubing or further small
+      powers have any particularly important uses though.
+
+
+<li> <strong>Assembly routines</strong>
+
+  <p> Write new and improve existing assembly routines.  The tests/devel
+      programs and the tune/speed.c and tune/many.pl programs are useful for
+      testing and timing the routines you write.  See the README files in those
+      directories for more information.
+
+  <p> Please make sure your new routines are fast for these three situations:
+      <ol>
+       <li> Operands that fit into the cache.
+       <li> Small operands of less than, say, 10 limbs.
+       <li> Huge operands that does not fit into the cache.
+      </ol>
+
+  <p> The most important routines are mpn_addmul_1, mpn_mul_basecase and
+      mpn_sqr_basecase.  The latter two don't exist for all machines, while
+      mpn_addmul_1 exists for almost all machines.
+
+  <p> Standard techniques for these routines are unrolling, software
+      pipelining, and specialization for common operand values.  For machines
+      with poor integer multiplication, it is sometimes possible to remedy the
+      situation using floating-point operations or SIMD operations such as MMX
+      (x86) (x86), SSE (x86), VMX (PowerPC), VIS (Sparc).
+
+  <p> Using floating-point operations is interesting but somewhat tricky.
+      Since IEEE double has 53 bit of mantissa, one has to split the operands
+      in small pieces, so that no intermediates are greater than 2^53.  For
+      32-bit computers, splitting one operand into 16-bit pieces works.  For
+      64-bit machines, one operand can be split into 21-bit pieces and the
+      other into 32-bit pieces.  (A 64-bit operand can be split into just three
+      21-bit pieces if one allows the split operands to be negative!)
+
+
+<li> <strong>Math functions for the mpf layer</strong>
+
+  <p> Implement the functions of math.h for the GMP mpf layer! Check the book
+      "Pi and the AGM" by Borwein and Borwein for ideas how to do this.  These
+      functions are desirable: acos, acosh, asin, asinh, atan, atanh, atan2,
+      cos, cosh, exp, log, log10, pow, sin, sinh, tan, tanh.
+
+  <p> Note that the <a href="http://mpfr.org">mpfr</a> functions already
+  provide these functions, and that we usually recommend new programs to use
+  mpfr instead of mpf.
+
+
+<li> <strong>Faster sqrt</strong>
+
+  <p> The current code uses divisions, which are reasonably fast, but it'd be
+      possible to use only multiplications by computing 1/sqrt(A) using this
+      iteration:
+      <pre>
+                                   2
+                  x   = x  (3 &minus; A x )/2
+                   i+1   i         i  </pre>
+      The square root can then be computed like this:
+      <pre>
+                    sqrt(A) = A x
+                                 n  </pre>
+  <p> That final multiply might be the full size of the input (though it might
+      only need the high half of that), so there may or may not be any speedup
+      overall.
+
+  <p> We should probably allow a special exponent-like parameter, to speed
+      computations of a precise square root of a small number in mpf and mpfr.
+
+
+<li> <strong>Nth root</strong>
+
+  <p> Improve mpn_rootrem.  The current code is not too bad, but its average
+      time complexity is a function of the input, while it is possible to
+      make it a function of the output.
+
+
+<li> <strong>Exceptions</strong>
+
+  <p> Some sort of scheme for exceptions handling would be desirable.
+      Presently the only thing documented is that divide by zero in GMP
+      functions provokes a deliberate machine divide by zero (on those systems
+      where such a thing exists at least).  The global <code>gmp_errno</code>
+      is not actually documented, except for the old <code>gmp_randinit</code>
+      function.  Being currently just a plain global means it's not
+      thread-safe.
+
+  <p> The basic choices for exceptions are returning an error code or having a
+      handler function to be called.  The disadvantage of error returns is they
+      have to be checked, leading to tedious and rarely executed code, and
+      strictly speaking such a scheme wouldn't be source or binary compatible.
+      The disadvantage of a handler function is that a <code>longjmp</code> or
+      similar recovery from it may be difficult.  A combination would be
+      possible, for instance by allowing the handler to return an error code.
+
+  <p> Divide-by-zero, sqrt-of-negative, and similar operand range errors can
+      normally be detected at the start of functions, so exception handling
+      would have a clean state.  What's worth considering though is that the
+      GMP function detecting the exception may have been called via some third
+      party library or self contained application module, and hence have
+      various bits of state to be cleaned up above it.  It'd be highly
+      desirable for an exceptions scheme to allow for such cleanups.
+
+  <p> The C++ destructor mechanism could help with cleanups both internally and
+      externally, but being a plain C library we don't want to depend on that.
+
+  <p> A C++ <code>throw</code> might be a good optional extra exceptions
+      mechanism, perhaps under a build option.  For
+      GCC <code>-fexceptions</code> will add the necessary frame information to
+      plain C code, or GMP could be compiled as C++.
+
+  <p> Out-of-memory exceptions are expected to be handled by the
+      <code>mp_set_memory_functions</code> routines, rather than being a
+      prospective part of divide-by-zero etc.  Some similar considerations
+      apply but what differs is that out-of-memory can arise deep within GMP
+      internals.  Even fundamental routines like <code>mpn_add_n</code> and
+      <code>mpn_addmul_1</code> can use temporary memory (for instance on Cray
+      vector systems).  Allowing for an error code return would require an
+      awful lot of checking internally.  Perhaps it'd still be worthwhile, but
+      it'd be a lot of changes and the extra code would probably be rather
+      rarely executed in normal usages.
+
+  <p> A <code>longjmp</code> recovery for out-of-memory will currently, in
+      general, lead to memory leaks and may leave GMP variables operated on in
+      inconsistent states.  Maybe it'd be possible to record recovery
+      information for use by the relevant allocate or reallocate function, but
+      that too would be a lot of changes.
+
+  <p> One scheme for out-of-memory would be to note that all GMP allocations go
+      through the <code>mp_set_memory_functions</code> routines.  So if the
+      application has an intended <code>setjmp</code> recovery point it can
+      record memory activity by GMP and abandon space allocated and variables
+      initialized after that point.  This might be as simple as directing the
+      allocation functions to a separate pool, but in general would have the
+      disadvantage of needing application-level bookkeeping on top of the
+      normal system <code>malloc</code>.  An advantage however is that it needs
+      nothing from GMP itself and on that basis doesn't burden applications not
+      needing recovery.  Note that there's probably some details to be worked
+      out here about reallocs of existing variables, and perhaps about copying
+      or swapping between "permanent" and "temporary" variables.
+
+  <p> Applications desiring a fine-grained error control, for instance a
+      language interpreter, would very possibly not be well served by a scheme
+      requiring <code>longjmp</code>.  Wrapping every GMP function call with a
+      <code>setjmp</code> would be very inconvenient.
+
+  <p> Another option would be to let <code>mpz_t</code> etc hold a sort of NaN,
+      a special value indicating an out-of-memory or other failure.  This would
+      be similar to NaNs in mpfr.  Unfortunately such a scheme could only be
+      used by programs prepared to handle such special values, since for
+      instance a program waiting for some condition to be satisfied could
+      become an infinite loop if it wasn't also watching for NaNs.  The work to
+      implement this would be significant too, lots of checking of inputs and
+      intermediate results.  And if <code>mpn</code> routines were to
+      participate in this (which they would have to internally) a lot of new
+      return values would need to be added, since of course there's no
+      <code>mpz_t</code> etc structure for them to indicate failure in.
+
+  <p> Stack overflow is another possible exception, but perhaps not one that
+      can be easily detected in general.  On i386 GNU/Linux for instance GCC
+      normally doesn't generate stack probes for an <code>alloca</code>, but
+      merely adjusts <code>%esp</code>.  A big enough <code>alloca</code> can
+      miss the stack redzone and hit arbitrary data.  GMP stack usage is
+      normally a function of operand size, which might be enough for some
+      applications to know they'll be safe.  Otherwise a fixed maximum usage
+      can probably be obtained by building with
+      <code>--enable-alloca=malloc-reentrant</code> (or
+      <code>notreentrant</code>).  Arranging the default to be
+      <code>alloca</code> only on blocks up to a certain size and
+      <code>malloc</code> thereafter might be a better approach and would have
+      the advantage of not having calculations limited by available stack.
+
+  <p> Actually recovering from stack overflow is of course another problem.  It
+      might be possible to catch a <code>SIGSEGV</code> in the stack redzone
+      and do something in a <code>sigaltstack</code>, on systems which have
+      that, but recovery might otherwise not be possible.  This is worth
+      bearing in mind because there's no point worrying about tight and careful
+      out-of-memory recovery if an out-of-stack is fatal.
+
+  <p> Operand overflow is another exception to be addressed.  It's easy for
+      instance to ask <code>mpz_pow_ui</code> for a result bigger than an
+      <code>mpz_t</code> can possibly represent.  Currently overflows in limb
+      or byte count calculations will go undetected.  Often they'll still end
+      up asking the memory functions for blocks bigger than available memory,
+      but that's by no means certain and results are unpredictable in general.
+      It'd be desirable to tighten up such size calculations.  Probably only
+      selected routines would need checks, if it's assumed say that no input
+      will be more than half of all memory and hence size additions like say
+      <code>mpz_mul</code> won't overflow.
+
+
+<li> <strong>Performance Tool</strong>
+
+  <p> It'd be nice to have some sort of tool for getting an overview of
+      performance.  Clearly a great many things could be done, but some primary
+      uses would be,
+
+      <ol>
+       <li> Checking speed variations between compilers.
+       <li> Checking relative performance between systems or CPUs.
+      </ol>
+
+  <p> A combination of measuring some fundamental routines and some
+      representative application routines might satisfy these.
+
+  <p> The tune/time.c routines would be the easiest way to get good accurate
+      measurements on lots of different systems.  The high level
+      <code>speed_measure</code> may or may not suit, but the basic
+      <code>speed_starttime</code> and <code>speed_endtime</code> would cover
+      lots of portability and accuracy questions.
+
+
+<li> <strong>Using <code>restrict</code></strong>
+
+  <p> There might be some value in judicious use of C99 style
+      <code>restrict</code> on various pointers, but this would need some
+      careful thought about what it implies for the various operand overlaps
+      permitted in GMP.
+
+  <p> Rumour has it some pre-C99 compilers had <code>restrict</code>, but
+      expressing tighter (or perhaps looser) requirements.  Might be worth
+      investigating that before using <code>restrict</code> unconditionally.
+
+  <p> Loops are presumably where the greatest benefit would be had, by allowing
+      the compiler to advance reads ahead of writes, perhaps as part of loop
+      unrolling.  However critical loops are generally coded in assembler, so
+      there might not be very much to gain.  And on Cray systems the explicit
+      use of <code>_Pragma</code> gives an equivalent effect.
+
+  <p> One thing to note is that Microsoft C headers (on ia64 at least) contain
+      <code>__declspec(restrict)</code>, so a <code>#define</code> of
+      <code>restrict</code> should be avoided.  It might be wisest to setup a
+      <code>gmp_restrict</code>.
+
+
+<li> <strong>Nx1 Division</strong>
+
+  <p> The limb-by-limb dependencies in the existing Nx1 division (and
+      remainder) code means that chips with multiple execution units or
+      pipelined multipliers are not fully utilized.
+
+  <p> One possibility is to follow the current preinv method but taking two
+      limbs at a time.  That means a 2x2-&gt;4 and a 2x1-&gt;2 multiply for
+      each two limbs processed, and because the 2x2 and 2x1 can each be done in
+      parallel the latency will be not much more than 2 multiplies for two
+      limbs, whereas the single limb method has a 2 multiply latency for just
+      one limb.  A version of <code>mpn_divrem_1</code> doing this has been
+      written in C, but not yet tested on likely chips.  Clearly this scheme
+      would extend to 3x3-&gt;9 and 3x1-&gt;3 etc, though with diminishing
+      returns.
+
+  <p> For <code>mpn_mod_1</code>, Peter L. Montgomery proposes the following
+      scheme.  For a limb R=2^<code>bits_per_mp_limb</code>, pre-calculate
+      values R mod N, R^2 mod N, R^3 mod N, R^4 mod N.  Then take dividend
+      limbs and multiply them by those values, thereby reducing them (moving
+      them down) by the corresponding factor.  The products can be added to
+      produce an intermediate remainder of 2 or 3 limbs to be similarly
+      included in the next step.  The point is that such multiplies can be done
+      in parallel, meaning as little as 1 multiply worth of latency for 4
+      limbs.  If the modulus N is less than R/4 (or is it R/5?) the summed
+      products will fit in 2 limbs, otherwise 3 will be required, but with the
+      high only being small.  Clearly this extends to as many factors of R as a
+      chip can efficiently apply.
+
+  <p> The logical conclusion for powers R^i is a whole array "p[i] = R^i mod N"
+      for i up to k, the size of the dividend.  This could then be applied at
+      multiplier throughput speed like an inner product.  If the powers took
+      roughly k divide steps to calculate then there'd be an advantage any time
+      the same N was used three or more times.  Suggested by Victor Shoup in
+      connection with chinese-remainder style decompositions, but perhaps with
+      other uses.
+
+  <p> <code>mpn_modexact_1_odd</code> calculates an x in the range 0&lt;=x&lt;d
+      satisfying a = q*d + x*b^n, where b=2^bits_per_limb.  The factor b^n
+      needed to get the true remainder r could be calculated by a powering
+      algorithm, allowing <code>mpn_modexact_1_odd</code> to be pressed into
+      service for an <code>mpn_mod_1</code>.  <code>modexact_1</code> is
+      simpler and on some chips can run noticeably faster than plain
+      <code>mod_1</code>, on Athlon for instance 11 cycles/limb instead of 17.
+      Such a difference could soon overcome the time to calculate b^n.  The
+      requirement for an odd divisor in <code>modexact</code> can be handled by
+      some shifting on-the-fly, or perhaps by an extra partial-limb step at the
+      end.
+
+
+<li> <strong>Factorial</strong>
+
+  <p> The removal of twos in the current code could be extended to factors of 3
+      or 5.  Taking this to its logical conclusion would be a complete
+      decomposition into powers of primes.  The power for a prime p is of
+      course floor(n/p)+floor(n/p^2)+...  Conrad Curry found this is quite fast
+      (using simultaneous powering as per Handbook of Applied Cryptography
+      algorithm 14.88).
+
+  <p> A difficulty with using all primes is that quite large n can be
+      calculated on a system with enough memory, larger than we'd probably want
+      for a table of primes, so some sort of sieving would be wanted.  Perhaps
+      just taking out the factors of 3 and 5 would give most of the speedup
+      that a prime decomposition can offer.
+
+
+<li> <strong>Binomial Coefficients</strong>
+
+  <p> An obvious improvement to the current code would be to strip factors of 2
+      from each multiplier and divisor and count them separately, to be applied
+      with a bit shift at the end.  Factors of 3 and perhaps 5 could even be
+      handled similarly.
+
+  <p> Conrad Curry reports a big speedup for binomial coefficients using a
+      prime powering scheme, at least for k near n/2.  Of course this is only
+      practical for moderate size n since again it requires primes up to n.
+
+  <p> When k is small the current (n-k+1)...n/1...k will be fastest.  Some sort
+      of rule would be needed for when to use this or when to use prime
+      powering.  Such a rule will be a function of both n and k.  Some
+      investigation is needed to see what sort of shape the crossover line will
+      have, the usual parameter tuning can of course find machine dependent
+      constants to fill in where necessary.
+
+  <p> An easier possibility also reported by Conrad Curry is that it may be
+      faster not to divide out the denominator (1...k) one-limb at a time, but
+      do one big division at the end.  Is this because a big divisor in
+      <code>mpn_bdivmod</code> trades the latency of
+      <code>mpn_divexact_1</code> for the throughput of
+      <code>mpn_submul_1</code>?  Overheads must hurt though.
+
+  <p> Another reason a big divisor might help is that
+      <code>mpn_divexact_1</code> won't be getting a full limb in
+      <code>mpz_bin_uiui</code>.  It's called when the n accumulator is full
+      but the k may be far from full.  Perhaps the two could be decoupled so k
+      is applied when full.  It'd be necessary to delay consideration of k
+      terms until the corresponding n terms had been applied though, since
+      otherwise the division won't be exact.
+
+
+<li> <strong>Perfect Power Testing</strong>
+
+  <p> <code>mpz_perfect_power_p</code> could be improved in a number of ways,
+      for instance p-adic arithmetic to find possible roots.
+
+  <p> Non-powers can be quickly identified by checking for Nth power residues
+      modulo small primes, like <code>mpn_perfect_square_p</code> does for
+      squares.  The residues to each power N for a given remainder could be
+      grouped into a bit mask, the masks for the remainders to each divisor
+      would then be "and"ed together to hopefully leave only a few candidate
+      powers.  Need to think about how wide to make such masks, ie. how many
+      powers to examine in this way.
+
+  <p> Any zero remainders found in residue testing reveal factors which can be
+      divided out, with the multiplicity restricting the powers that need to be
+      considered, as per the current code.  Further prime dividing should be
+      grouped into limbs like <code>PP</code>.  Need to think about how much
+      dividing to do like that, probably more for bigger inputs, less for
+      smaller inputs.
+
+  <p> <code>mpn_gcd_1</code> would probably be better than the current private
+      GCD routine.  The use it's put to isn't time-critical, and it might help
+      ensure correctness to just use the main GCD routine.
+
+  <p> [There is work-in-progress with a very fast function.]
+
+
+<li> <strong>Prime Testing</strong>
+
+  <p> GMP is not really a number theory library and probably shouldn't have
+      large amounts of code dedicated to sophisticated prime testing
+      algorithms, but basic things well-implemented would suit.  Tests offering
+      certainty are probably all too big or too slow (or both!) to justify
+      inclusion in the main library.  Demo programs showing some possibilities
+      would be good though.
+
+  <p> The present "repetitions" argument to <code>mpz_probab_prime_p</code> is
+      rather specific to the Miller-Rabin tests of the current implementation.
+      Better would be some sort of parameter asking perhaps for a maximum
+      chance 1/2^x of a probable prime in fact being composite.  If
+      applications follow the advice that the present reps gives 1/4^reps
+      chance then perhaps such a change is unnecessary, but an explicitly
+      described 1/2^x would allow for changes in the implementation or even for
+      new proofs about the theory.
+
+  <p> <code>mpz_probab_prime_p</code> always initializes a new
+      <code>gmp_randstate_t</code> for randomized tests, which unfortunately
+      means it's not really very random and in particular always runs the same
+      tests for a given input.  Perhaps a new interface could accept an rstate
+      to use, so successive tests could increase confidence in the result.
+
+  <p> <code>mpn_mod_34lsub1</code> is an obvious and easy improvement to the
+      trial divisions.  And since the various prime factors are constants, the
+      remainder can be tested with something like
+<pre>
+#define MP_LIMB_DIVISIBLE_7_P(n) \
+  ((n) * MODLIMB_INVERSE_7 &lt;= MP_LIMB_T_MAX/7)
+</pre>
+      Which would help compilers that don't know how to optimize divisions by
+      constants, and is even an improvement on current gcc 3.2 code.  This
+      technique works for any modulus, see Granlund and Montgomery "Division by
+      Invariant Integers" section 9.
+
+  <p> The trial divisions are done with primes generated and grouped at
+      runtime.  This could instead be a table of data, with pre-calculated
+      inverses too.  Storing deltas, ie. amounts to add, rather than actual
+      primes would save space.  <code>udiv_qrnnd_preinv</code> style inverses
+      can be made to exist by adding dummy factors of 2 if necessary.  Some
+      thought needs to be given as to how big such a table should be, based on
+      how much dividing would be profitable for what sort of size inputs.  The
+      data could be shared by the perfect power testing.
+
+  <p> Jason Moxham points out that if a sqrt(-1) mod N exists then any factor
+      of N must be == 1 mod 4, saving half the work in trial dividing.  (If
+      x^2==-1 mod N then for a prime factor p we have x^2==-1 mod p and so the
+      jacobi symbol (-1/p)=1.  But also (-1/p)=(-1)^((p-1)/2), hence must have
+      p==1 mod 4.)  But knowing whether sqrt(-1) mod N exists is not too easy.
+      A strong pseudoprime test can reveal one, so perhaps such a test could be
+      inserted part way though the dividing.
+
+  <p> Jon Grantham "Frobenius Pseudoprimes" (www.pseudoprime.com) describes a
+      quadratic pseudoprime test taking about 3x longer than a plain test, but
+      with only a 1/7710 chance of error (whereas 3 plain Miller-Rabin tests
+      would offer only (1/4)^3 == 1/64).  Such a test needs completely random
+      parameters to satisfy the theory, though single-limb values would run
+      faster.  It's probably best to do at least one plain Miller-Rabin before
+      any quadratic tests, since that can identify composites in less total
+      time.
+
+  <p> Some thought needs to be given to the structure of which tests (trial
+      division, Miller-Rabin, quadratic) and how many are done, based on what
+      sort of inputs we expect, with a view to minimizing average time.
+
+  <p> It might be a good idea to break out subroutines for the various tests,
+      so that an application can combine them in ways it prefers, if sensible
+      defaults in <code>mpz_probab_prime_p</code> don't suit.  In particular
+      this would let applications skip tests it knew would be unprofitable,
+      like trial dividing when an input is already known to have no small
+      factors.
+
+  <p> For small inputs, combinations of theory and explicit search make it
+      relatively easy to offer certainty.  For instance numbers up to 2^32
+      could be handled with a strong pseudoprime test and table lookup.  But
+      it's rather doubtful whether a smallnum prime test belongs in a bignum
+      library.  Perhaps if it had other internal uses.
+
+  <p> An <code>mpz_nthprime</code> might be cute, but is almost certainly
+      impractical for anything but small n.
+
+
+<li> <strong>Intra-Library Calls</strong>
+
+  <p> On various systems, calls within libgmp still go through the PLT, TOC or
+      other mechanism, which makes the code bigger and slower than it needs to
+      be.
+
+  <p> The theory would be to have all GMP intra-library calls resolved directly
+      to the routines in the library.  An application wouldn't be able to
+      replace a routine, the way it can normally, but there seems no good
+      reason to do that, in normal circumstances.
+
+  <p> The <code>visibility</code> attribute in recent gcc is good for this,
+      because it lets gcc omit unnecessary GOT pointer setups or whatever if it
+      finds all calls are local and there's no global data references.
+      Documented entrypoints would be <code>protected</code>, and purely
+      internal things not wanted by test programs or anything can be
+      <code>internal</code>.
+
+  <p> Unfortunately, on i386 it seems <code>protected</code> ends up causing
+      text segment relocations within libgmp.so, meaning the library code can't
+      be shared between processes, defeating the purpose of a shared library.
+      Perhaps this is just a gremlin in binutils (debian packaged
+      2.13.90.0.16-1).
+
+  <p> The linker can be told directly (with a link script, or options) to do
+      the same sort of thing.  This doesn't change the code emitted by gcc of
+      course, but it does mean calls are resolved directly to their targets,
+      avoiding a PLT entry.
+
+  <p> Keeping symbols private to libgmp.so is probably a good thing in general
+      too, to stop anyone even attempting to access them.  But some
+      undocumented things will need or want to be kept visible, for use by
+      mpfr, or the test and tune programs.  Libtool has a standard option for
+      selecting public symbols (used now for libmp).
+
+
+</ul>
+<hr>
+
+</body>
+</html>
+
+<!--
+Local variables:
+eval: (add-hook 'write-file-hooks 'time-stamp)
+time-stamp-start: "This file current as of "
+time-stamp-format: "%:d %3b %:y"
+time-stamp-end: "\\."
+time-stamp-line-limit: 50
+End:
+-->
diff --git a/doc/stamp-vti b/doc/stamp-vti

new file mode 100644 (file)

index 0000000..bcfbdca
--- /dev/null
+++ b/doc/stamp-vti
@@ -0,0 +1,4 @@
+@set UPDATED 6 May 2012
+@set UPDATED-MONTH May 2012
+@set EDITION 5.0.5
+@set VERSION 5.0.5
diff --git a/doc/tasks.html b/doc/tasks.html

new file mode 100644 (file)

index 0000000..d86e794
--- /dev/null
+++ b/doc/tasks.html
@@ -0,0 +1,910 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <title>GMP Itemized Development Tasks</title>
+  <link rel="shortcut icon" href="favicon.ico">
+  <link rel="stylesheet" href="gmp.css">
+  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+
+<center>
+  <h1>
+    GMP Itemized Development Tasks
+  </h1>
+</center>
+
+<font size=-1>
+<pre>
+Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+</pre>
+</font>
+
+<hr>
+<!-- NB. timestamp updated automatically by emacs -->
+  This file current as of 28 Dec 2009.  An up-to-date version is available at
+  <a href="http://gmplib.org/tasks.html">http://gmplib.org/tasks.html</a>.
+  Please send comments about this page to gmp-devel<font>@</font>gmplib.org.
+
+<p> These are itemized GMP development tasks.  Not all the tasks
+    listed here are suitable for volunteers, but many of them are.
+    Please see the <a href="projects.html">projects file</a> for more
+    sizeable projects.
+
+<p> CAUTION: This file needs updating.  Many of the tasks here have
+either already been taken care of, or have become irrelevant.
+
+<h4>Correctness and Completeness</h4>
+<ul>
+<li> <code>_LONG_LONG_LIMB</code> in gmp.h is not namespace clean.  Reported
+     by Patrick Pelissier.
+     <br>
+     We sort of mentioned <code>_LONG_LONG_LIMB</code> in past releases, so
+     need to be careful about changing it.  It used to be a define
+     applications had to set for long long limb systems, but that in
+     particular is no longer relevant now that it's established automatically.
+<li> The various reuse.c tests need to force reallocation by calling
+     <code>_mpz_realloc</code> with a small (1 limb) size.
+<li> One reuse case is missing from mpX/tests/reuse.c:
+     <code>mpz_XXX(a,a,a)</code>.
+<li> When printing <code>mpf_t</code> numbers with exponents &gt;2^53 on
+     machines with 64-bit <code>mp_exp_t</code>, the precision of
+     <code>__mp_bases[base].chars_per_bit_exactly</code> is insufficient and
+     <code>mpf_get_str</code> aborts.  Detect and compensate.  Alternately,
+     think seriously about using some sort of fixed-point integer value.
+     Avoiding unnecessary floating point is probably a good thing in general,
+     and it might be faster on some CPUs.
+<li> Make the string reading functions allow the `0x' prefix when the base is
+     explicitly 16.  They currently only allow that prefix when the base is
+     unspecified (zero).
+<li> <code>mpf_eq</code> is not always correct, when one operand is
+     1000000000... and the other operand is 0111111111..., i.e., extremely
+     close.  There is a special case in <code>mpf_sub</code> for this
+     situation; put similar code in <code>mpf_eq</code>.  [In progress.]
+<li> <code>mpf_eq</code> doesn't implement what gmp.texi specifies.  It should
+     not use just whole limbs, but partial limbs.  [In progress.]
+<li> <code>mpf_set_str</code> doesn't validate it's exponent, for instance
+     garbage 123.456eX789X is accepted (and an exponent 0 used), and overflow
+     of a <code>long</code> is not detected.
+<li> <code>mpf_add</code> doesn't check for a carry from truncated portions of
+     the inputs, and in that respect doesn't implement the "infinite precision
+     followed by truncate" specified in the manual.
+<li> Windows DLLs: tests/mpz/reuse.c and tests/mpf/reuse.c initialize global
+     variables with pointers to <code>mpz_add</code> etc, which doesn't work
+     when those routines are coming from a DLL (because they're effectively
+     function pointer global variables themselves).  Need to rearrange perhaps
+     to a set of calls to a test function rather than iterating over an array.
+<li> <code>mpz_pow_ui</code>: Detect when the result would be more memory than
+     a <code>size_t</code> can represent and raise some suitable exception,
+     probably an alloc call asking for <code>SIZE_T_MAX</code>, and if that
+     somehow succeeds then an <code>abort</code>.  Various size overflows of
+     this kind are not handled gracefully, probably resulting in segvs.
+     <br>
+     In <code>mpz_n_pow_ui</code>, detect when the count of low zero bits
+     exceeds an <code>unsigned long</code>.  There's a (small) chance of this
+     happening but still having enough memory to represent the value.
+     Reported by Winfried Dreckmann in for instance <code>mpz_ui_pow_ui (x,
+     4UL, 1431655766UL)</code>.
+<li> <code>mpf</code>: Detect exponent overflow and raise some exception.
+     It'd be nice to allow the full <code>mp_exp_t</code> range since that's
+     how it's been in the past, but maybe dropping one bit would make it
+     easier to test if e1+e2 goes out of bounds.
+</ul>
+
+
+
+<h4>Machine Independent Optimization</h4>
+<ul>
+<li> <code>mpf_cmp</code>: For better cache locality, don't test for low zero
+     limbs until the high limbs fail to give an ordering.  Reduce code size by
+     turning the three <code>mpn_cmp</code>'s into a single loop stopping when
+     the end of one operand is reached (and then looking for a non-zero in the
+     rest of the other).
+<li> <code>mpf_mul_2exp</code>, <code>mpf_div_2exp</code>: The use of
+     <code>mpn_lshift</code> for any size&lt;=prec means repeated
+     <code>mul_2exp</code> and <code>div_2exp</code> calls accumulate low zero
+     limbs until size==prec+1 is reached.  Those zeros will slow down
+     subsequent operations, especially if the value is otherwise only small.
+     If low bits of the low limb are zero, use <code>mpn_rshift</code> so as
+     to not increase the size.
+<li> <code>mpn_dc_sqrtrem</code>: Don't use <code>mpn_addmul_1</code> with
+     multiplier==2, instead either <code>mpn_addlsh1_n</code> when available,
+     or <code>mpn_lshift</code>+<code>mpn_add_n</code> if not.
+<li> <code>mpn_dc_sqrtrem</code>, <code>mpn_sqrtrem2</code>: Don't use
+     <code>mpn_add_1</code> and <code>mpn_sub_1</code> for 1 limb operations,
+     instead <code>ADDC_LIMB</code> and <code>SUBC_LIMB</code>.
+<li> <code>mpn_sqrtrem2</code>: Use plain variables for <code>sp[0]</code> and
+     <code>rp[0]</code> calculations, so the compiler needn't worry about
+     aliasing between <code>sp</code> and <code>rp</code>.
+<li> <code>mpn_sqrtrem</code>: Some work can be saved in the last step when
+     the remainder is not required, as noted in Paul's paper.
+<li> <code>mpq_add</code>, <code>mpq_add</code>: The division "op1.den / gcd"
+     is done twice, where of course only once is necessary.  Reported by Larry
+     Lambe.
+<li> <code>mpq_add</code>, <code>mpq_sub</code>: The gcd fits a single limb
+     with high probability and in this case <code>modlimb_invert</code> could
+     be used to calculate the inverse just once for the two exact divisions
+     "op1.den / gcd" and "op2.den / gcd", rather than letting
+     <code>mpn_divexact_1</code> do it each time.  This would require a new
+     <code>mpn_preinv_divexact_1</code> interface.  Not sure if it'd be worth
+     the trouble.
+<li> <code>mpq_add</code>, <code>mpq_sub</code>: The use of
+     <code>mpz_mul(x,y,x)</code> causes temp allocation or copying in
+     <code>mpz_mul</code> which can probably be avoided.  A rewrite using
+     <code>mpn</code> might be best.
+<li> <code>mpn_gcdext</code>: Don't test <code>count_leading_zeros</code> for
+     zero, instead check the high bit of the operand and avoid invoking
+     <code>count_leading_zeros</code>.  This is an optimization on all
+     machines, and significant on machines with slow
+     <code>count_leading_zeros</code>, though it's possible an already
+     normalized operand might not be encountered very often.
+<li> Rewrite <code>umul_ppmm</code> to use floating-point for generating the
+     most significant limb (if <code>GMP_LIMB_BITS</code> &lt= 52 bits).
+     (Peter Montgomery has some ideas on this subject.)
+<li> Improve the default <code>umul_ppmm</code> code in longlong.h: Add partial
+     products with fewer operations.
+<li> Consider inlining <code>mpz_set_ui</code>.  This would be both small and
+     fast, especially for compile-time constants, but would make application
+     binaries depend on having 1 limb allocated to an <code>mpz_t</code>,
+     preventing the "lazy" allocation scheme below.
+<li> Consider inlining <code>mpz_[cft]div_ui</code> and maybe
+     <code>mpz_[cft]div_r_ui</code>.  A <code>__gmp_divide_by_zero</code>
+     would be needed for the divide by zero test, unless that could be left to
+     <code>mpn_mod_1</code> (not sure currently whether all the risc chips
+     provoke the right exception there if using mul-by-inverse).
+<li> Consider inlining: <code>mpz_fits_s*_p</code>.  The setups for
+     <code>LONG_MAX</code> etc would need to go into gmp.h, and on Cray it
+     might, unfortunately, be necessary to forcibly include &lt;limits.h&gt;
+     since there's no apparent way to get <code>SHRT_MAX</code> with an
+     expression (since <code>short</code> and <code>unsigned short</code> can
+     be different sizes).
+<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> aren't very
+     fast on one or two limb moduli, due to a lot of function call
+     overheads.  These could perhaps be handled as special cases.
+<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> want better
+     algorithm selection, and the latter should use REDC.  Both could
+     change to use an <code>mpn_powm</code> and <code>mpn_redc</code>.
+<li> <code>mpz_powm</code> REDC should do multiplications by <code>g[]</code>
+     using the division method when they're small, since the REDC form of a
+     small multiplier is normally a full size product.  Probably would need a
+     new tuned parameter to say what size multiplier is "small", as a function
+     of the size of the modulus.
+<li> <code>mpz_powm</code> REDC should handle even moduli if possible.  Maybe
+     this would mean for m=n*2^k doing mod n using REDC and an auxiliary
+     calculation mod 2^k, then putting them together at the end.
+<li> <code>mpn_gcd</code> might be able to be sped up on small to
+     moderate sizes by improving <code>find_a</code>, possibly just by
+     providing an alternate implementation for CPUs with slowish
+     <code>count_leading_zeros</code>.
+<li> Toom3 could use a low to high cache localized evaluate and interpolate.
+     The necessary <code>mpn_divexact_by3c</code> exists.
+<li> <code>mpf_set_str</code> produces low zero limbs when a string has a
+     fraction but is exactly representable, eg. 0.5 in decimal.  These could be
+     stripped to save work in later operations.
+<li> <code>mpz_and</code>, <code>mpz_ior</code> and <code>mpz_xor</code> should
+     use <code>mpn_and_n</code> etc for the benefit of the small number of
+     targets with native versions of those routines.  Need to be careful not to
+     pass size==0.  Is some code sharing possible between the <code>mpz</code>
+     routines?
+<li> <code>mpf_add</code>: Don't do a copy to avoid overlapping operands
+     unless it's really necessary (currently only sizes are tested, not
+     whether r really is u or v).
+<li> <code>mpf_add</code>: Under the check for v having no effect on the
+     result, perhaps test for r==u and do nothing in that case, rather than
+     currently it looks like an <code>MPN_COPY_INCR</code> will be done to
+     reduce prec+1 limbs to prec.
+<li> <code>mpf_div_ui</code>: Instead of padding with low zeros, call
+     <code>mpn_divrem_1</code> asking for fractional quotient limbs.
+<li> <code>mpf_div_ui</code>: Eliminate <code>TMP_ALLOC</code>.  When r!=u
+     there's no overlap and the division can be called on those operands.
+     When r==u and is prec+1 limbs, then it's an in-place division.  If r==u
+     and not prec+1 limbs, then move the available limbs up to prec+1 and do
+     an in-place there.
+<li> <code>mpf_div_ui</code>: Whether the high quotient limb is zero can be
+     determined by testing the dividend for high&lt;divisor.  When non-zero, the
+     division can be done on prec dividend limbs instead of prec+1.  The result
+     size is also known before the division, so that can be a tail call (once
+     the <code>TMP_ALLOC</code> is eliminated).
+<li> <code>mpn_divrem_2</code> could usefully accept unnormalized divisors and
+     shift the dividend on-the-fly, since this should cost nothing on
+     superscalar processors and avoid the need for temporary copying in
+     <code>mpn_tdiv_qr</code>.
+<li> <code>mpf_sqrt</code>: If r!=u, and if u doesn't need to be padded with
+     zeros, then there's no need for the tp temporary.
+<li> <code>mpq_cmp_ui</code> could form the <code>num1*den2</code> and
+     <code>num2*den1</code> products limb-by-limb from high to low and look at
+     each step for values differing by more than the possible carry bit from
+     the uncalculated portion.
+<li> <code>mpq_cmp</code> could do the same high-to-low progressive multiply
+     and compare.  The benefits of karatsuba and higher multiplication
+     algorithms are lost, but if it's assumed only a few high limbs will be
+     needed to determine an order then that's fine.
+<li> <code>mpn_add_1</code>, <code>mpn_sub_1</code>, <code>mpn_add</code>,
+     <code>mpn_sub</code>: Internally use <code>__GMPN_ADD_1</code> etc
+     instead of the functions, so they get inlined on all compilers, not just
+     gcc and others with <code>inline</code> recognised in gmp.h.
+     <code>__GMPN_ADD_1</code> etc are meant mostly to support application
+     inline <code>mpn_add_1</code> etc and if they don't come out good for
+     internal uses then special forms can be introduced, for instance many
+     internal uses are in-place.  Sometimes a block of code is executed based
+     on the carry-out, rather than using it arithmetically, and those places
+     might want to do their own loops entirely.
+<li> <code>__gmp_extract_double</code> on 64-bit systems could use just one
+     bitfield for the mantissa extraction, not two, when endianness permits.
+     Might depend on the compiler allowing <code>long long</code> bit fields
+     when that's the only actual 64-bit type.
+<li> tal-notreent.c could keep a block of memory permanently allocated.
+     Currently the last nested <code>TMP_FREE</code> releases all memory, so
+     there's an allocate and free every time a top-level function using
+     <code>TMP</code> is called.  Would need
+     <code>mp_set_memory_functions</code> to tell tal-notreent.c to release
+     any cached memory when changing allocation functions though.
+<li> <code>__gmp_tmp_alloc</code> from tal-notreent.c could be partially
+     inlined.  If the current chunk has enough room then a couple of pointers
+     can be updated.  Only if more space is required then a call to some sort
+     of <code>__gmp_tmp_increase</code> would be needed.  The requirement that
+     <code>TMP_ALLOC</code> is an expression might make the implementation a
+     bit ugly and/or a bit sub-optimal.
+<pre>
+#define TMP_ALLOC(n)
+  ((ROUND_UP(n) &gt; current-&gt;end - current-&gt;point ?
+     __gmp_tmp_increase (ROUND_UP (n)) : 0),
+     current-&gt;point += ROUND_UP (n),
+     current-&gt;point - ROUND_UP (n))
+</pre>
+<li> <code>__mp_bases</code> has a lot of data for bases which are pretty much
+     never used.  Perhaps the table should just go up to base 16, and have
+     code to generate data above that, if and when required.  Naturally this
+     assumes the code would be smaller than the data saved.
+<li> <code>__mp_bases</code> field <code>big_base_inverted</code> is only used
+     if <code>USE_PREINV_DIVREM_1</code> is true, and could be omitted
+     otherwise, to save space.
+<li> <code>mpz_get_str</code>, <code>mtox</code>: For power-of-2 bases, which
+     are of course fast, it seems a little silly to make a second pass over
+     the <code>mpn_get_str</code> output to convert to ASCII.  Perhaps combine
+     that with the bit extractions.
+<li> <code>mpz_gcdext</code>: If the caller requests only the S cofactor (of
+     A), and A&lt;B, then the code ends up generating the cofactor T (of B) and
+     deriving S from that.  Perhaps it'd be possible to arrange to get S in
+     the first place by calling <code>mpn_gcdext</code> with A+B,B.  This
+     might only be an advantage if A and B are about the same size.
+<li> <code>mpz_n_pow_ui</code> does a good job with small bases and stripping
+     powers of 2, but it's perhaps a bit too complicated for what it gains.
+     The simpler <code>mpn_pow_1</code> is a little faster on small exponents.
+     (Note some of the ugliness in <code>mpz_n_pow_ui</code> is due to
+     supporting <code>mpn_mul_2</code>.)
+     <br>
+     Perhaps the stripping of 2s in <code>mpz_n_pow_ui</code> should be
+     confined to single limb operands for simplicity and since that's where
+     the greatest gain would be.
+     <br>
+     Ideally <code>mpn_pow_1</code> and <code>mpz_n_pow_ui</code> would be
+     merged.  The reason <code>mpz_n_pow_ui</code> writes to an
+     <code>mpz_t</code> is that its callers leave it to make a good estimate
+     of the result size.  Callers of <code>mpn_pow_1</code> already know the
+     size by separate means (<code>mp_bases</code>).
+<li> <code>mpz_invert</code> should call <code>mpn_gcdext</code> directly.
+</ul>
+
+
+<h4>Machine Dependent Optimization</h4>
+<ul>
+<li> <code>invert_limb</code> on various processors might benefit from the
+     little Newton iteration done for alpha and ia64.
+<li> Alpha 21264: <code>mpn_addlsh1_n</code> could be implemented with
+     <code>mpn_addmul_1</code>, since that code at 3.5 is a touch faster than
+     a separate <code>lshift</code> and <code>add_n</code> at
+     1.75+2.125=3.875.  Or very likely some specific <code>addlsh1_n</code>
+     code could beat both.
+<li> Alpha 21264: Improve feed-in code for <code>mpn_mul_1</code>,
+     <code>mpn_addmul_1</code>, and <code>mpn_submul_1</code>.
+<li> Alpha 21164: Rewrite <code>mpn_mul_1</code>, <code>mpn_addmul_1</code>,
+     and <code>mpn_submul_1</code> for the 21164.  This should use both integer
+     multiplies and floating-point multiplies.  For the floating-point
+     operations, the single-limb multiplier should be split into three 21-bit
+     chunks, or perhaps even better in four 16-bit chunks.  Probably possible
+     to reach 9 cycles/limb.
+<li> Alpha: GCC 3.4 will introduce <code>__builtin_ctzl</code>,
+     <code>__builtin_clzl</code> and <code>__builtin_popcountl</code> using
+     the corresponding CIX <code>ct</code> instructions, and
+     <code>__builtin_alpha_cmpbge</code>.  These should give GCC more
+     information about scheduling etc than the <code>asm</code> blocks
+     currently used in longlong.h and gmp-impl.h.
+<li> Alpha Unicos: Apparently there's no <code>alloca</code> on this system,
+     making <code>configure</code> choose the slower
+     <code>malloc-reentrant</code> allocation method.  Is there a better way?
+     Maybe variable-length arrays per notes below.
+<li> Alpha Unicos 21164, 21264: <code>.align</code> is not used since it pads
+     with garbage.  Does the code get the intended slotting required for the
+     claimed speeds?  <code>.align</code> at the start of a function would
+     presumably be safe no matter how it pads.
+<li> ARM V5: <code>count_leading_zeros</code> can use the <code>clz</code>
+     instruction.  For GCC 3.4 and up, do this via <code>__builtin_clzl</code>
+     since then gcc knows it's "predicable".
+<li> Itanium: GCC 3.4 introduces <code>__builtin_popcount</code> which can be
+     used instead of an <code>asm</code> block.  The builtin should give gcc
+     more opportunities for scheduling, bundling and predication.
+     <code>__builtin_ctz</code> similarly (it just uses popcount as per
+     current longlong.h).
+<li> UltraSPARC/64: Optimize <code>mpn_mul_1</code>, <code>mpn_addmul_1</code>,
+     for s2 &lt; 2^32 (or perhaps for any zero 16-bit s2 chunk).  Not sure how
+     much this can improve the speed, though, since the symmetry that we rely
+     on is lost.  Perhaps we can just gain cycles when s2 &lt; 2^16, or more
+     accurately, when two 16-bit s2 chunks which are 16 bits apart are zero.
+<li> UltraSPARC/64: Write native <code>mpn_submul_1</code>, analogous to
+     <code>mpn_addmul_1</code>.
+<li> UltraSPARC/64: Write <code>umul_ppmm</code>.  Using four
+     "<code>mulx</code>"s either with an asm block or via the generic C code is
+     about 90 cycles.  Try using fp operations, and also try using karatsuba
+     for just three "<code>mulx</code>"s.
+<li> UltraSPARC/32: Rewrite <code>mpn_lshift</code>, <code>mpn_rshift</code>.
+     Will give 2 cycles/limb.  Trivial modifications of mpn/sparc64 should do.
+<li> UltraSPARC/32: Write special mpn_Xmul_1 loops for s2 &lt; 2^16.
+<li> UltraSPARC/32: Use <code>mulx</code> for <code>umul_ppmm</code> if
+     possible (see commented out code in longlong.h).  This is unlikely to
+     save more than a couple of cycles, so perhaps isn't worth bothering with.
+<li> UltraSPARC/32: On Solaris gcc doesn't give us <code>__sparc_v9__</code>
+     or anything to indicate V9 support when -mcpu=v9 is selected.  See
+     gcc/config/sol2-sld-64.h.  Will need to pass something through from
+     ./configure to select the right code in longlong.h.  (Currently nothing
+     is lost because <code>mulx</code> for multiplying is commented out.)
+<li> UltraSPARC/32: <code>mpn_divexact_1</code> and
+     <code>mpn_modexact_1c_odd</code> can use a 64-bit inverse and take
+     64-bits at a time from the dividend, as per the 32-bit divisor case in
+     mpn/sparc64/mode1o.c.  This must be done in assembler, since the full
+     64-bit registers (<code>%gN</code>) are not available from C.
+<li> UltraSPARC/32: <code>mpn_divexact_by3c</code> can work 64-bits at a time
+     using <code>mulx</code>, in assembler.  This would be the same as for
+     sparc64.
+<li> UltraSPARC: <code>modlimb_invert</code> might save a few cycles from
+     masking down to just the useful bits at each point in the calculation,
+     since <code>mulx</code> speed depends on the highest bit set.  Either
+     explicit masks or small types like <code>short</code> and
+     <code>int</code> ought to work.
+<li> Sparc64 HAL R1 <code>popc</code>: This chip reputedly implements
+     <code>popc</code> properly (see gcc sparc.md).  Would need to recognise
+     it as <code>sparchalr1</code> or something in configure / config.sub /
+     config.guess.  <code>popc_limb</code> in gmp-impl.h could use this (per
+     commented out code).  <code>count_trailing_zeros</code> could use it too.
+<li> PA64: Improve <code>mpn_addmul_1</code>, <code>mpn_submul_1</code>, and
+     <code>mpn_mul_1</code>.  The current code runs at 11 cycles/limb.  It
+     should be possible to saturate the cache, which will happen at 8
+     cycles/limb (7.5 for mpn_mul_1).  Write special loops for s2 &lt; 2^32;
+     it should be possible to make them run at about 5 cycles/limb.
+<li> PPC601: See which of the power or powerpc32 code runs better.  Currently
+     the powerpc32 is used, but only because it's the default for
+     <code>powerpc*</code>.
+<li> PPC630: Rewrite <code>mpn_addmul_1</code>, <code>mpn_submul_1</code>, and
+     <code>mpn_mul_1</code>.  Use both integer and floating-point operations,
+     possibly two floating-point and one integer limb per loop.  Split operands
+     into four 16-bit chunks for fast fp operations.  Should easily reach 9
+     cycles/limb (using one int + one fp), but perhaps even 7 cycles/limb
+     (using one int + two fp).
+<li> PPC630: <code>mpn_rshift</code> could do the same sort of unrolled loop
+     as <code>mpn_lshift</code>.  Some judicious use of m4 might let the two
+     share source code, or with a register to control the loop direction
+     perhaps even share object code.
+<li> Implement <code>mpn_mul_basecase</code> and <code>mpn_sqr_basecase</code>
+     for important machines.  Helping the generic sqr_basecase.c with an
+     <code>mpn_sqr_diagonal</code> might be enough for some of the RISCs.
+<li> POWER2/POWER2SC: Schedule <code>mpn_lshift</code>/<code>mpn_rshift</code>.
+     Will bring time from 1.75 to 1.25 cycles/limb.
+<li> X86: Optimize non-MMX <code>mpn_lshift</code> for shifts by 1.  (See
+     Pentium code.)
+<li> X86: Good authority has it that in the past an inline <code>rep
+     movs</code> would upset GCC register allocation for the whole function.
+     Is this still true in GCC 3?  It uses <code>rep movs</code> itself for
+     <code>__builtin_memcpy</code>.  Examine the code for some simple and
+     complex functions to find out.  Inlining <code>rep movs</code> would be
+     desirable, it'd be both smaller and faster.
+<li> Pentium P54: <code>mpn_lshift</code> and <code>mpn_rshift</code> can come
+     down from 6.0 c/l to 5.5 or 5.375 by paying attention to pairing after
+     <code>shrdl</code> and <code>shldl</code>, see mpn/x86/pentium/README.
+<li> Pentium P55 MMX: <code>mpn_lshift</code> and <code>mpn_rshift</code>
+     might benefit from some destination prefetching.
+<li> PentiumPro: <code>mpn_divrem_1</code> might be able to use a
+     mul-by-inverse, hoping for maybe 30 c/l.
+<li> K7: <code>mpn_lshift</code> and <code>mpn_rshift</code> might be able to
+     do something branch-free for unaligned startups, and shaving one insn
+     from the loop with alternative indexing might save a cycle.
+<li> PPC32: Try using fewer registers in the current <code>mpn_lshift</code>.
+     The pipeline is now extremely deep, perhaps unnecessarily deep.
+<li> Fujitsu VPP: Vectorize main functions, perhaps in assembly language.
+<li> Fujitsu VPP: Write <code>mpn_mul_basecase</code> and
+     <code>mpn_sqr_basecase</code>.  This should use a "vertical multiplication
+     method", to avoid carry propagation.  splitting one of the operands in
+     11-bit chunks.
+<li> Pentium: <code>mpn_lshift</code> by 31 should use the special rshift
+     by 1 code, and vice versa <code>mpn_rshift</code> by 31 should use the
+     special lshift by 1.  This would be best as a jump across to the other
+     routine, could let both live in lshift.asm and omit rshift.asm on finding
+     <code>mpn_rshift</code> already provided.
+<li> Cray T3E: Experiment with optimization options.  In particular,
+     -hpipeline3 seems promising.  We should at least up -O to -O2 or -O3.
+<li> Cray: <code>mpn_com</code> and <code>mpn_and_n</code> etc very probably
+     wants a pragma like <code>MPN_COPY_INCR</code>.
+<li> Cray vector systems: <code>mpn_lshift</code>, <code>mpn_rshift</code>,
+     <code>mpn_popcount</code> and <code>mpn_hamdist</code> are nice and small
+     and could be inlined to avoid function calls.
+<li> Cray: Variable length arrays seem to be faster than the tal-notreent.c
+     scheme.  Not sure why, maybe they merely give the compiler more
+     information about aliasing (or the lack thereof).  Would like to modify
+     <code>TMP_ALLOC</code> to use them, or introduce a new scheme.  Memory
+     blocks wanted unconditionally are easy enough, those wanted only
+     sometimes are a problem.  Perhaps a special size calculation to ask for a
+     dummy length 1 when unwanted, or perhaps an inlined subroutine
+     duplicating code under each conditional.  Don't really want to turn
+     everything into a dog's dinner just because Cray don't offer an
+     <code>alloca</code>.
+<li> Cray: <code>mpn_get_str</code> on power-of-2 bases ought to vectorize.
+     Does it?  <code>bits_per_digit</code> and the inner loop over bits in a
+     limb might prevent it.  Perhaps special cases for binary, octal and hex
+     would be worthwhile (very possibly for all processors too).
+<li> S390: <code>BSWAP_LIMB_FETCH</code> looks like it could be done with
+     <code>lrvg</code>, as per glibc sysdeps/s390/s390-64/bits/byteswap.h.
+     This is only for 64-bit mode or something is it, since 32-bit mode has
+     other code?  Also, is it worth using for <code>BSWAP_LIMB</code> too, or
+     would that mean a store and re-fetch?  Presumably that's what comes out
+     in glibc.
+<li> Improve <code>count_leading_zeros</code> for 64-bit machines:
+  <pre>
+          if ((x &gt&gt 32) == 0) { x &lt&lt= 32; cnt += 32; }
+          if ((x &gt&gt 48) == 0) { x &lt&lt= 16; cnt += 16; }
+          ... </pre>
+<li> IRIX 6 MIPSpro compiler has an <code>__inline</code> which could perhaps
+     be used in <code>__GMP_EXTERN_INLINE</code>.  What would be the right way
+     to identify suitable versions of that compiler?
+<li> IRIX <code>cc</code> is rumoured to have an <code>_int_mult_upper</code>
+     (in <code>&lt;intrinsics.h&gt;</code> like Cray), but it didn't seem to
+     exist on some IRIX 6.5 systems tried.  If it does actually exist
+     somewhere it would very likely be an improvement over a function call to
+     umul.asm.
+<li> <code>mpn_get_str</code> final divisions by the base with
+     <code>udiv_qrnd_unnorm</code> could use some sort of multiply-by-inverse
+     on suitable machines.  This ends up happening for decimal by presenting
+     the compiler with a run-time constant, but the same for other bases would
+     be good.  Perhaps use could be made of the fact base&lt;256.
+<li> <code>mpn_umul_ppmm</code>, <code>mpn_udiv_qrnnd</code>: Return a
+     structure like <code>div_t</code> to avoid going through memory, in
+     particular helping RISCs that don't do store-to-load forwarding.  Clearly
+     this is only possible if the ABI returns a structure of two
+     <code>mp_limb_t</code>s in registers.
+     <br>
+     On PowerPC, structures are returned in memory on AIX and Darwin.  In SVR4
+     they're returned in registers, except that draft SVR4 had said memory, so
+     it'd be prudent to check which is done.  We can jam the compiler into the
+     right mode if we know how, since all this is purely internal to libgmp.
+     (gcc has an option, though of course gcc doesn't matter since we use
+     inline asm there.)
+</ul>
+
+<h4>New Functionality</h4>
+<ul>
+<li> Maybe add <code>mpz_crr</code> (Chinese Remainder Reconstruction).
+<li> Let `0b' and `0B' mean binary input everywhere.
+<li> <code>mpz_init</code> and <code>mpq_init</code> could do lazy allocation.
+     Set <code>ALLOC(var)</code> to 0 to indicate nothing allocated, and let
+     <code>_mpz_realloc</code> do the initial alloc.  Set
+     <code>z-&gt;_mp_d</code> to a dummy that <code>mpz_get_ui</code> and
+     similar can unconditionally fetch from.  Niels Möller has had a go at
+     this.
+     <br>
+     The advantages of the lazy scheme would be:
+     <ul>
+     <li> Initial allocate would be the size required for the first value
+          stored, rather than getting 1 limb in <code>mpz_init</code> and then
+          more or less immediately reallocating.
+     <li> <code>mpz_init</code> would only store magic values in the
+          <code>mpz_t</code> fields, and could be inlined.
+     <li> A fixed initializer could even be used by applications, like
+          <code>mpz_t z = MPZ_INITIALIZER;</code>, which might be convenient
+          for globals.
+     </ul>
+     The advantages of the current scheme are:
+     <ul>
+     <li> <code>mpz_set_ui</code> and other similar routines needn't check the
+          size allocated and can just store unconditionally.
+     <li> <code>mpz_set_ui</code> and perhaps others like
+          <code>mpz_tdiv_r_ui</code> and a prospective
+          <code>mpz_set_ull</code> could be inlined.
+     </ul>
+<li> Add <code>mpf_out_raw</code> and <code>mpf_inp_raw</code>.  Make sure
+     format is portable between 32-bit and 64-bit machines, and between
+     little-endian and big-endian machines.  A format which MPFR can use too
+     would be good.
+<li> <code>mpn_and_n</code> ... <code>mpn_copyd</code>: Perhaps make the mpn
+     logops and copys available in gmp.h, either as library functions or
+     inlines, with the availability of library functions instantiated in the
+     generated gmp.h at build time.
+<li> <code>mpz_set_str</code> etc variants taking string lengths rather than
+     null-terminators.
+<li> <code>mpz_andn</code>, <code>mpz_iorn</code>, <code>mpz_nand</code>,
+     <code>mpz_nior</code>, <code>mpz_xnor</code> might be useful additions,
+     if they could share code with the current such functions (which should be
+     possible).
+<li> <code>mpz_and_ui</code> etc might be of use sometimes.  Suggested by
+     Niels Möller.
+<li> <code>mpf_set_str</code> and <code>mpf_inp_str</code> could usefully
+     accept 0x, 0b etc when base==0.  Perhaps the exponent could default to
+     decimal in this case, with a further 0x, 0b etc allowed there.
+     Eg. 0xFFAA@0x5A.  A leading "0" for octal would match the integers, but
+     probably something like "0.123" ought not mean octal.
+<li> <code>GMP_LONG_LONG_LIMB</code> or some such could become a documented
+     feature of gmp.h, so applications could know whether to
+     <code>printf</code> a limb using <code>%lu</code> or <code>%Lu</code>.
+<li> <code>GMP_PRIdMP_LIMB</code> and similar defines following C99
+     &lt;inttypes.h&gt; might be of use to applications printing limbs.  But
+     if <code>GMP_LONG_LONG_LIMB</code> or whatever is added then perhaps this
+     can easily enough be left to applications.
+<li> <code>gmp_printf</code> could accept <code>%b</code> for binary output.
+     It'd be nice if it worked for plain <code>int</code> etc too, not just
+     <code>mpz_t</code> etc.
+<li> <code>gmp_printf</code> in fact could usefully accept an arbitrary base,
+     for both integer and float conversions.  A base either in the format
+     string or as a parameter with <code>*</code> should be allowed.  Maybe
+     <code>&amp;13b</code> (b for base) or something like that.
+<li> <code>gmp_printf</code> could perhaps accept <code>mpq_t</code> for float
+     conversions, eg. <code>"%.4Qf"</code>.  This would be merely for
+     convenience, but still might be useful.  Rounding would be the same as
+     for an <code>mpf_t</code> (ie. currently round-to-nearest, but not
+     actually documented).  Alternately, perhaps a separate
+     <code>mpq_get_str_point</code> or some such might be more use.  Suggested
+     by Pedro Gimeno.
+<li> <code>mpz_rscan0</code> or <code>mpz_revscan0</code> or some such
+     searching towards the low end of an integer might match
+     <code>mpz_scan0</code> nicely.  Likewise for <code>scan1</code>.
+     Suggested by Roberto Bagnara.
+<li> <code>mpz_bit_subset</code> or some such to test whether one integer is a
+     bitwise subset of another might be of use.  Some sort of return value
+     indicating whether it's a proper or non-proper subset would be good and
+     wouldn't cost anything in the implementation.  Suggested by Roberto
+     Bagnara.
+<li> <code>mpf_get_ld</code>, <code>mpf_set_ld</code>: Conversions between
+     <code>mpf_t</code> and <code>long double</code>, suggested by Dan
+     Christensen.  Other <code>long double</code> routines might be desirable
+     too, but <code>mpf</code> would be a start.
+     <br>
+     <code>long double</code> is an ANSI-ism, so everything involving it would
+     need to be suppressed on a K&amp;R compiler.
+     <br>
+     There'd be some work to be done by <code>configure</code> to recognise
+     the format in use, MPFR has a start on this.  Often <code>long
+     double</code> is the same as <code>double</code>, which is easy but
+     pretty pointless.  A single float format detector macro could look at
+     <code>double</code> then <code>long double</code>
+     <br>
+     Sometimes there's a compiler option for the size of a <code>long
+     double</code>, eg. xlc on AIX can use either 64-bit or 128-bit.  It's
+     probably simplest to regard this as a compiler compatibility issue, and
+     leave it to users or sysadmins to ensure application and library code is
+     built the same.
+<li> <code>mpz_sqrt_if_perfect_square</code>: When
+     <code>mpz_perfect_square_p</code> does its tests it calculates a square
+     root and then discards it.  For some applications it might be useful to
+     return that root.  Suggested by Jason Moxham.
+<li> <code>mpz_get_ull</code>, <code>mpz_set_ull</code>,
+     <code>mpz_get_sll</code>, <code>mpz_get_sll</code>: Conversions for
+     <code>long long</code>.  These would aid interoperability, though a
+     mixture of GMP and <code>long long</code> would probably not be too
+     common.  Since <code>long long</code> is not always available (it's in
+     C99 and GCC though), disadvantages of using <code>long long</code> in
+     libgmp.a would be
+     <ul>
+     <li> Library contents vary according to the build compiler.
+     <li> gmp.h would need an ugly <code>#ifdef</code> block to decide if the
+          application compiler could take the <code>long long</code>
+          prototypes.
+     <li> Some sort of <code>LIBGMP_HAS_LONGLONG</code> might be wanted to
+          indicate whether the functions are available.  (Applications using
+          autoconf could probe the library too.)
+     </ul>
+     It'd be possible to defer the need for <code>long long</code> to
+     application compile time, by having something like
+     <code>mpz_set_2ui</code> called with two halves of a <code>long
+     long</code>.  Disadvantages of this would be,
+     <ul>
+     <li> Bigger code in the application, though perhaps not if a <code>long
+          long</code> is normally passed as two halves anyway.
+     <li> <code>mpz_get_ull</code> would be a rather big inline, or would have
+          to be two function calls.
+     <li> <code>mpz_get_sll</code> would be a worse inline, and would put the
+          treatment of <code>-0x10..00</code> into applications (see
+          <code>mpz_get_si</code> correctness above).
+     <li> Although having libgmp.a independent of the build compiler is nice,
+          it sort of sacrifices the capabilities of a good compiler to
+          uniformity with inferior ones.
+     </ul>
+     Plain use of <code>long long</code> is probably the lesser evil, if only
+     because it makes best use of gcc.  In fact perhaps it would suffice to
+     guarantee <code>long long</code> conversions only when using GCC for both
+     application and library.  That would cover free software, and we can
+     worry about selected vendor compilers later.
+     <br>
+     In C++ the situation is probably clearer, we demand fairly recent C++ so
+     <code>long long</code> should be available always.  We'd probably prefer
+     to have the C and C++ the same in respect of <code>long long</code>
+     support, but it would be possible to have it unconditionally in gmpxx.h,
+     by some means or another.
+<li> <code>mpz_strtoz</code> parsing the same as <code>strtol</code>.
+     Suggested by Alexander Kruppa.
+</ul>
+
+
+<h4>Configuration</h4>
+
+<ul>
+<li> Alpha ev7, ev79: Add code to config.guess to detect these.  Believe ev7
+     will be "3-1307" in the current switch, but need to verify that.  (On
+     OSF, current configfsf.guess identifies ev7 using psrinfo, we need to do
+     it ourselves for other systems.)
+<li> Alpha OSF: Libtool (version 1.5) doesn't seem to recognise this system is
+     "pic always" and ends up running gcc twice with the same options.  This
+     is wasteful, but harmless.  Perhaps a newer libtool will be better.
+<li> ARM: <code>umul_ppmm</code> in longlong.h always uses <code>umull</code>,
+     but is that available only for M series chips or some such?  Perhaps it
+     should be configured in some way.
+<li> HPPA: config.guess should recognize 7000, 7100, 7200, and 8x00.
+<li> HPPA: gcc 3.2 introduces a <code>-mschedule=7200</code> etc parameter,
+     which could be driven by an exact hppa cpu type.
+<li> Mips: config.guess should say mipsr3000, mipsr4000, mipsr10000, etc.
+     "hinv -c processor" gives lots of information on Irix.  Standard
+     config.guess appends "el" to indicate endianness, but
+     <code>AC_C_BIGENDIAN</code> seems the best way to handle that for GMP.
+<li> PowerPC: The function descriptor nonsense for AIX is currently driven by
+     <code>*-*-aix*</code>.  It might be more reliable to do some sort of
+     feature test, examining the compiler output perhaps.  It might also be
+     nice to merge the aix.m4 files into powerpc-defs.m4.
+<li> config.m4 is generated only by the configure script, it won't be
+     regenerated by config.status.  Creating it as an <code>AC_OUTPUT</code>
+     would work, but it might upset "make" to have things like <code>L$</code>
+     get into the Makefiles through <code>AC_SUBST</code>.
+     <code>AC_CONFIG_COMMANDS</code> would be the alternative.  With some
+     careful m4 quoting the <code>changequote</code> calls might not be
+     needed, which might free up the order in which things had to be output.
+<li> Automake: Latest automake has a <code>CCAS</code>, <code>CCASFLAGS</code>
+     scheme.  Though we probably wouldn't be using its assembler support we
+     could try to use those variables in compatible ways.
+<li> <code>GMP_LDFLAGS</code> could probably be done with plain
+     <code>LDFLAGS</code> already used by automake for all linking.  But with
+     a bit of luck the next libtool will pass pretty much all
+     <code>CFLAGS</code> through to the compiler when linking, making
+     <code>GMP_LDFLAGS</code> unnecessary.
+<li> mpn/Makeasm.am uses <code>-c</code> and <code>-o</code> together in the
+     .S and .asm rules, but apparently that isn't completely portable (there's
+     an autoconf <code>AC_PROG_CC_C_O</code> test for it).  So far we've not
+     had problems, but perhaps the rules could be rewritten to use "foo.s" as
+     the temporary, or to do a suitable "mv" of the result.  The only danger
+     from using foo.s would be if a compile failed and the temporary foo.s
+     then looked like the primary source.  Hopefully if the
+     <code>SUFFIXES</code> are ordered to have .S and .asm ahead of .s that
+     wouldn't happen.  Might need to check.
+</ul>
+
+
+<h4>Random Numbers</h4>
+<ul>
+<li> <code>_gmp_rand</code> is not particularly fast on the linear
+     congruential algorithm and could stand various improvements.
+     <ul>
+     <li> Make a second seed area within <code>gmp_randstate_t</code> (or
+          <code>_mp_algdata</code> rather) to save some copying.
+     <li> Make a special case for a single limb <code>2exp</code> modulus, to
+          avoid <code>mpn_mul</code> calls.  Perhaps the same for two limbs.
+     <li> Inline the <code>lc</code> code, to avoid a function call and
+          <code>TMP_ALLOC</code> for every chunk.
+     <li> Perhaps the <code>2exp</code> and general LC cases should be split,
+          for clarity (if the general case is retained).
+     </ul>
+<li> <code>gmp_randstate_t</code> used for parameters perhaps should become
+     <code>gmp_randstate_ptr</code> the same as other types.
+<li> Some of the empirical randomness tests could be included in a "make
+     check".  They ought to work everywhere, for a given seed at least.
+</ul>
+
+
+<h4>C++</h4>
+<ul>
+<li> <code>mpz_class(string)</code>, etc: Use the C++ global locale to
+     identify whitespace.
+     <br>
+     <code>mpf_class(string)</code>: Use the C++ global locale decimal point,
+     rather than the C one.
+     <br>
+     Consider making these variant <code>mpz_set_str</code> etc forms
+     available for <code>mpz_t</code> too, not just <code>mpz_class</code>
+     etc.
+<li> <code>mpq_class operator+=</code>: Don't emit an unnecssary
+     <code>mpq_set(q,q)</code> before <code>mpz_addmul</code> etc.
+<li> Put various bits of gmpxx.h into libgmpxx, to avoid excessive inlining.
+     Candidates for this would be,
+     <ul>
+     <li> <code>mpz_class(const char *)</code>, etc: since they're normally
+          not fast anyway, and we can hide the exception <code>throw</code>.
+     <li> <code>mpz_class(string)</code>, etc: to hide the <code>cstr</code>
+          needed to get to the C conversion function.
+     <li> <code>mpz_class string, char*</code> etc constructors: likewise to
+          hide the throws and conversions.
+     <li> <code>mpz_class::get_str</code>, etc: to hide the <code>char*</code>
+          to <code>string</code> conversion and free.  Perhaps
+          <code>mpz_get_str</code> can write directly into a
+          <code>string</code>, to avoid copying.
+          <br>
+          Consider making such <code>string</code> returning variants
+          available for use with plain <code>mpz_t</code> etc too.
+     </ul>
+</ul>
+
+<h4>Miscellaneous</h4>
+<ul>
+<li> <code>mpz_gcdext</code> and <code>mpn_gcdext</code> ought to document
+     what range of values the generated cofactors can take, and preferably
+     ensure the definition uniquely specifies the cofactors for given inputs.
+     A basic extended Euclidean algorithm or multi-step variant leads to
+     |x|&lt;|b| and |y|&lt;|a| or something like that, but there's probably
+     two solutions under just those restrictions.
+<li> demos/factorize.c: use <code>mpz_divisible_ui_p</code> rather than
+     <code>mpz_tdiv_qr_ui</code>.  (Of course dividing multiple primes at a
+     time would be better still.)
+<li> The various test programs use quite a bit of the main
+     <code>libgmp</code>.  This establishes good cross-checks, but it might be
+     better to use simple reference routines where possible.  Where it's not
+     possible some attention could be paid to the order of the tests, so a
+     <code>libgmp</code> routine is only used for tests once it seems to be
+     good.
+<li> <code>MUL_FFT_THRESHOLD</code> etc: the FFT thresholds should allow a
+     return to a previous k at certain sizes.  This arises basically due to
+     the step effect caused by size multiples effectively used for each k.
+     Looking at a graph makes it fairly clear.
+<li> <code>__gmp_doprnt_mpf</code> does a rather unattractive round-to-nearest
+     on the string returned by <code>mpf_get_str</code>.  Perhaps some variant
+     of <code>mpf_get_str</code> could be made which would better suit.
+</ul>
+
+
+<h4>Aids to Development</h4>
+<ul>
+<li> Add <code>ASSERT</code>s at the start of each user-visible mpz/mpq/mpf
+     function to check the validity of each <code>mp?_t</code> parameter, in
+     particular to check they've been <code>mp?_init</code>ed.  This might
+     catch elementary mistakes in user programs.  Care would need to be taken
+     over <code>MPZ_TMP_INIT</code>ed variables used internally.  If nothing
+     else then consistency checks like size&lt;=alloc, ptr not
+     <code>NULL</code> and ptr+size not wrapping around the address space,
+     would be possible.  A more sophisticated scheme could track
+     <code>_mp_d</code> pointers and ensure only a valid one is used.  Such a
+     scheme probably wouldn't be reentrant, not without some help from the
+     system.
+<li> tune/time.c could try to determine at runtime whether
+     <code>getrusage</code> and <code>gettimeofday</code> are reliable.
+     Currently we pretend in configure that the dodgy m68k netbsd 1.4.1
+     <code>getrusage</code> doesn't exist.  If a test might take a long time
+     to run then perhaps cache the result in a file somewhere.
+<li> tune/time.c could choose the default precision based on the
+     <code>speed_unittime</code> determined, independent of the method in use.
+<li> Cray vector systems: CPU frequency could be determined from
+     <code>sysconf(_SC_CLK_TCK)</code>, since it seems to be clock cycle
+     based.  Is this true for all Cray systems?  Would like some documentation
+     or something to confirm.
+</ul>
+
+
+<h4>Documentation</h4>
+<ul>
+<li> <code>mpz_inp_str</code> (etc) doesn't say when it stops reading digits.
+<li> <code>mpn_get_str</code> isn't terribly clear about how many digits it
+     produces.  It'd probably be possible to say at most one leading zero,
+     which is what both it and <code>mpz_get_str</code> currently do.  But
+     want to be careful not to bind ourselves to something that might not suit
+     another implementation.
+<li> <code>va_arg</code> doesn't do the right thing with <code>mpz_t</code>
+     etc directly, but instead needs a pointer type like <code>MP_INT*</code>.
+     It'd be good to show how to do this, but we'd either need to document
+     <code>mpz_ptr</code> and friends, or perhaps fallback on something
+     slightly nasty with <code>void*</code>.
+</ul>
+
+
+<h4>Bright Ideas</h4>
+
+<p> The following may or may not be feasible, and aren't likely to get done in the
+near future, but are at least worth thinking about.
+
+<ul>
+<li> Reorganize longlong.h so that we can inline the operations even for the
+     system compiler.  When there is no such compiler feature, make calls to
+     stub functions.  Write such stub functions for as many machines as
+     possible.
+<li> longlong.h could declare when it's using, or would like to use,
+     <code>mpn_umul_ppmm</code>, and the corresponding umul.asm file could be
+     included in libgmp only in that case, the same as is effectively done for
+     <code>__clz_tab</code>.  Likewise udiv.asm and perhaps cntlz.asm.  This
+     would only be a very small space saving, so perhaps not worth the
+     complexity.
+<li> longlong.h could be built at configure time by concatenating or
+     #including fragments from each directory in the mpn path.  This would
+     select CPU specific macros the same way as CPU specific assembler code.
+     Code used would no longer depend on cpp predefines, and the current
+     nested conditionals could be flattened out.
+<li> <code>mpz_get_si</code> returns 0x80000000 for -0x100000000, whereas it's
+     sort of supposed to return the low 31 (or 63) bits.  But this is
+     undocumented, and perhaps not too important.
+<li> <code>mpz_init_set*</code> and <code>mpz_realloc</code> could allocate
+     say an extra 16 limbs over what's needed, so as to reduce the chance of
+     having to do a reallocate if the <code>mpz_t</code> grows a bit more.
+     This could only be an option, since it'd badly bloat memory usage in
+     applications using many small values.
+<li> <code>mpq</code> functions could perhaps check for numerator or
+     denominator equal to 1, on the assumption that integers or
+     denominator-only values might be expected to occur reasonably often.
+<li> <code>count_trailing_zeros</code> is used on more or less uniformly
+     distributed numbers in a couple of places.  For some CPUs
+     <code>count_trailing_zeros</code> is slow and it's probably worth handling
+     the frequently occurring 0 to 2 trailing zeros cases specially.
+<li> <code>mpf_t</code> might like to let the exponent be undefined when
+     size==0, instead of requiring it 0 as now.  It should be possible to do
+     size==0 tests before paying attention to the exponent.  The advantage is
+     not needing to set exp in the various places a zero result can arise,
+     which avoids some tedium but is otherwise perhaps not too important.
+     Currently <code>mpz_set_f</code> and <code>mpf_cmp_ui</code> depend on
+     exp==0, maybe elsewhere too.
+<li> <code>__gmp_allocate_func</code>: Could use GCC <code>__attribute__
+     ((malloc))</code> on this, though don't know if it'd do much.  GCC 3.0
+     allows that attribute on functions, but not function pointers (see info
+     node "Attribute Syntax"), so would need a new autoconf test.  This can
+     wait until there's a GCC that supports it.
+<li> <code>mpz_add_ui</code> contains two <code>__GMPN_COPY</code>s, one from
+     <code>mpn_add_1</code> and one from <code>mpn_sub_1</code>.  If those two
+     routines were opened up a bit maybe that code could be shared.  When a
+     copy needs to be done there's no carry to append for the add, and if the
+     copy is non-empty no high zero for the sub.
+</ul>
+
+
+<h4>Old and Obsolete Stuff</h4>
+
+<p> The following tasks apply to chips or systems that are old and/or obsolete.
+It's unlikely anything will be done about them unless anyone is actively using
+them.
+
+<ul>
+<li> Sparc32: The integer based udiv_nfp.asm used to be selected by
+     <code>configure --nfp</code> but that option is gone now that autoconf is
+     used.  The file could go somewhere suitable in the mpn search if any
+     chips might benefit from it, though it's possible we don't currently
+     differentiate enough exact cpu types to do this properly.
+<li> VAX D and G format <code>double</code> floats are straightforward and
+     could perhaps be handled directly in <code>__gmp_extract_double</code>
+     and maybe in <code>mpn_get_d</code>, rather than falling back on the
+     generic code.  (Both formats are detected by <code>configure</code>.)
+</ul>
+
+
+<hr>
+
+</body>
+</html>
+
+<!--
+Local variables:
+eval: (add-hook 'write-file-hooks 'time-stamp)
+time-stamp-start: "This file current as of "
+time-stamp-format: "%:d %3b %:y"
+time-stamp-end: "\\."
+time-stamp-line-limit: 50
+End:
+-->
diff --git a/doc/texinfo.tex b/doc/texinfo.tex

new file mode 100644 (file)

index 0000000..9f14cc5
--- /dev/null
+++ b/doc/texinfo.tex
@@ -0,0 +1,8997 @@
+% texinfo.tex -- TeX macros to handle Texinfo files.
+%
+% Load plain if necessary, i.e., if running under initex.
+\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
+%
+\def\texinfoversion{2009-11-15.11}
+%
+% Copyright (C) 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995,
+% 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+% 2007, 2008 Free Software Foundation, Inc.
+%
+% This texinfo.tex file is free software: you can redistribute it and/or
+% modify it under the terms of the GNU General Public License as
+% published by the Free Software Foundation, either version 3 of the
+% License, or (at your option) any later version.
+%
+% This texinfo.tex file is distributed in the hope that it will be
+% useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+% General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/>.
+%
+% As a special exception, when this file is read by TeX when processing
+% a Texinfo source document, you may use the result without
+% restriction.  (This has been our intent since Texinfo was invented.)
+%
+% Please try the latest version of texinfo.tex before submitting bug
+% reports; you can get the latest version from:
+%   http://www.gnu.org/software/texinfo/ (the Texinfo home page), or
+%   ftp://tug.org/tex/texinfo.tex
+%     (and all CTAN mirrors, see http://www.ctan.org).
+% The texinfo.tex in any given distribution could well be out
+% of date, so if that's what you're using, please check.
+%
+% Send bug reports to bug-texinfo@gnu.org.  Please include including a
+% complete document in each bug report with which we can reproduce the
+% problem.  Patches are, of course, greatly appreciated.
+%
+% To process a Texinfo manual with TeX, it's most reliable to use the
+% texi2dvi shell script that comes with the distribution.  For a simple
+% manual foo.texi, however, you can get away with this:
+%   tex foo.texi
+%   texindex foo.??
+%   tex foo.texi
+%   tex foo.texi
+%   dvips foo.dvi -o  # or whatever; this makes foo.ps.
+% The extra TeX runs get the cross-reference information correct.
+% Sometimes one run after texindex suffices, and sometimes you need more
+% than two; texi2dvi does it as many times as necessary.
+%
+% It is possible to adapt texinfo.tex for other languages, to some
+% extent.  You can get the existing language-specific files from the
+% full Texinfo distribution.
+%
+% The GNU Texinfo home page is http://www.gnu.org/software/texinfo.
+
+
+\message{Loading texinfo [version \texinfoversion]:}
+
+% If in a .fmt file, print the version number
+% and turn on active characters that we couldn't do earlier because
+% they might have appeared in the input file name.
+\everyjob{\message{[Texinfo version \texinfoversion]}%
+  \catcode`+=\active \catcode`\_=\active}
+
+
+\chardef\other=12
+
+% We never want plain's \outer definition of \+ in Texinfo.
+% For @tex, we can use \tabalign.
+\let\+ = \relax
+
+% Save some plain tex macros whose names we will redefine.
+\let\ptexb=\b
+\let\ptexbullet=\bullet
+\let\ptexc=\c
+\let\ptexcomma=\,
+\let\ptexdot=\.
+\let\ptexdots=\dots
+\let\ptexend=\end
+\let\ptexequiv=\equiv
+\let\ptexexclam=\!
+\let\ptexfootnote=\footnote
+\let\ptexgtr=>
+\let\ptexhat=^
+\let\ptexi=\i
+\let\ptexindent=\indent
+\let\ptexinsert=\insert
+\let\ptexlbrace=\{
+\let\ptexless=<
+\let\ptexnewwrite\newwrite
+\let\ptexnoindent=\noindent
+\let\ptexplus=+
+\let\ptexrbrace=\}
+\let\ptexslash=\/
+\let\ptexstar=\*
+\let\ptext=\t
+\let\ptextop=\top
+
+% If this character appears in an error message or help string, it
+% starts a new line in the output.
+\newlinechar = `^^J
+
+% Use TeX 3.0's \inputlineno to get the line number, for better error
+% messages, but if we're using an old version of TeX, don't do anything.
+%
+\ifx\inputlineno\thisisundefined
+  \let\linenumber = \empty % Pre-3.0.
+\else
+  \def\linenumber{l.\the\inputlineno:\space}
+\fi
+
+% Set up fixed words for English if not already set.
+\ifx\putwordAppendix\undefined  \gdef\putwordAppendix{Appendix}\fi
+\ifx\putwordChapter\undefined   \gdef\putwordChapter{Chapter}\fi
+\ifx\putwordfile\undefined      \gdef\putwordfile{file}\fi
+\ifx\putwordin\undefined        \gdef\putwordin{in}\fi
+\ifx\putwordIndexIsEmpty\undefined     \gdef\putwordIndexIsEmpty{(Index is empty)}\fi
+\ifx\putwordIndexNonexistent\undefined \gdef\putwordIndexNonexistent{(Index is nonexistent)}\fi
+\ifx\putwordInfo\undefined      \gdef\putwordInfo{Info}\fi
+\ifx\putwordInstanceVariableof\undefined \gdef\putwordInstanceVariableof{Instance Variable of}\fi
+\ifx\putwordMethodon\undefined  \gdef\putwordMethodon{Method on}\fi
+\ifx\putwordNoTitle\undefined   \gdef\putwordNoTitle{No Title}\fi
+\ifx\putwordof\undefined        \gdef\putwordof{of}\fi
+\ifx\putwordon\undefined        \gdef\putwordon{on}\fi
+\ifx\putwordpage\undefined      \gdef\putwordpage{page}\fi
+\ifx\putwordsection\undefined   \gdef\putwordsection{section}\fi
+\ifx\putwordSection\undefined   \gdef\putwordSection{Section}\fi
+\ifx\putwordsee\undefined       \gdef\putwordsee{see}\fi
+\ifx\putwordSee\undefined       \gdef\putwordSee{See}\fi
+\ifx\putwordShortTOC\undefined  \gdef\putwordShortTOC{Short Contents}\fi
+\ifx\putwordTOC\undefined       \gdef\putwordTOC{Table of Contents}\fi
+%
+\ifx\putwordMJan\undefined \gdef\putwordMJan{January}\fi
+\ifx\putwordMFeb\undefined \gdef\putwordMFeb{February}\fi
+\ifx\putwordMMar\undefined \gdef\putwordMMar{March}\fi
+\ifx\putwordMApr\undefined \gdef\putwordMApr{April}\fi
+\ifx\putwordMMay\undefined \gdef\putwordMMay{May}\fi
+\ifx\putwordMJun\undefined \gdef\putwordMJun{June}\fi
+\ifx\putwordMJul\undefined \gdef\putwordMJul{July}\fi
+\ifx\putwordMAug\undefined \gdef\putwordMAug{August}\fi
+\ifx\putwordMSep\undefined \gdef\putwordMSep{September}\fi
+\ifx\putwordMOct\undefined \gdef\putwordMOct{October}\fi
+\ifx\putwordMNov\undefined \gdef\putwordMNov{November}\fi
+\ifx\putwordMDec\undefined \gdef\putwordMDec{December}\fi
+%
+\ifx\putwordDefmac\undefined    \gdef\putwordDefmac{Macro}\fi
+\ifx\putwordDefspec\undefined   \gdef\putwordDefspec{Special Form}\fi
+\ifx\putwordDefvar\undefined    \gdef\putwordDefvar{Variable}\fi
+\ifx\putwordDefopt\undefined    \gdef\putwordDefopt{User Option}\fi
+\ifx\putwordDeffunc\undefined   \gdef\putwordDeffunc{Function}\fi
+
+% Since the category of space is not known, we have to be careful.
+\chardef\spacecat = 10
+\def\spaceisspace{\catcode`\ =\spacecat}
+
+% sometimes characters are active, so we need control sequences.
+\chardef\colonChar = `\:
+\chardef\commaChar = `\,
+\chardef\dashChar  = `\-
+\chardef\dotChar   = `\.
+\chardef\exclamChar= `\!
+\chardef\lquoteChar= `\`
+\chardef\questChar = `\?
+\chardef\rquoteChar= `\'
+\chardef\semiChar  = `\;
+\chardef\underChar = `\_
+
+% Ignore a token.
+%
+\def\gobble#1{}
+
+% The following is used inside several \edef's.
+\def\makecsname#1{\expandafter\noexpand\csname#1\endcsname}
+
+% Hyphenation fixes.
+\hyphenation{
+  Flor-i-da Ghost-script Ghost-view Mac-OS Post-Script
+  ap-pen-dix bit-map bit-maps
+  data-base data-bases eshell fall-ing half-way long-est man-u-script
+  man-u-scripts mini-buf-fer mini-buf-fers over-view par-a-digm
+  par-a-digms rath-er rec-tan-gu-lar ro-bot-ics se-vere-ly set-up spa-ces
+  spell-ing spell-ings
+  stand-alone strong-est time-stamp time-stamps which-ever white-space
+  wide-spread wrap-around
+}
+
+% Margin to add to right of even pages, to left of odd pages.
+\newdimen\bindingoffset
+\newdimen\normaloffset
+\newdimen\pagewidth \newdimen\pageheight
+
+% For a final copy, take out the rectangles
+% that mark overfull boxes (in case you have decided
+% that the text looks ok even though it passes the margin).
+%
+\def\finalout{\overfullrule=0pt}
+
+% @| inserts a changebar to the left of the current line.  It should
+% surround any changed text.  This approach does *not* work if the
+% change spans more than two lines of output.  To handle that, we would
+% have adopt a much more difficult approach (putting marks into the main
+% vertical list for the beginning and end of each change).
+%
+\def\|{%
+  % \vadjust can only be used in horizontal mode.
+  \leavevmode
+  %
+  % Append this vertical mode material after the current line in the output.
+  \vadjust{%
+    % We want to insert a rule with the height and depth of the current
+    % leading; that is exactly what \strutbox is supposed to record.
+    \vskip-\baselineskip
+    %
+    % \vadjust-items are inserted at the left edge of the type.  So
+    % the \llap here moves out into the left-hand margin.
+    \llap{%
+      %
+      % For a thicker or thinner bar, change the `1pt'.
+      \vrule height\baselineskip width1pt
+      %
+      % This is the space between the bar and the text.
+      \hskip 12pt
+    }%
+  }%
+}
+
+% Sometimes it is convenient to have everything in the transcript file
+% and nothing on the terminal.  We don't just call \tracingall here,
+% since that produces some useless output on the terminal.  We also make
+% some effort to order the tracing commands to reduce output in the log
+% file; cf. trace.sty in LaTeX.
+%
+\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}%
+\def\loggingall{%
+  \tracingstats2
+  \tracingpages1
+  \tracinglostchars2  % 2 gives us more in etex
+  \tracingparagraphs1
+  \tracingoutput1
+  \tracingmacros2
+  \tracingrestores1
+  \showboxbreadth\maxdimen \showboxdepth\maxdimen
+  \ifx\eTeXversion\undefined\else % etex gives us more logging
+    \tracingscantokens1
+    \tracingifs1
+    \tracinggroups1
+    \tracingnesting2
+    \tracingassigns1
+  \fi
+  \tracingcommands3  % 3 gives us more in etex
+  \errorcontextlines16
+}%
+
+% add check for \lastpenalty to plain's definitions.  If the last thing
+% we did was a \nobreak, we don't want to insert more space.
+%
+\def\smallbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\smallskipamount
+  \removelastskip\penalty-50\smallskip\fi\fi}
+\def\medbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\medskipamount
+  \removelastskip\penalty-100\medskip\fi\fi}
+\def\bigbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\bigskipamount
+  \removelastskip\penalty-200\bigskip\fi\fi}
+
+% For @cropmarks command.
+% Do @cropmarks to get crop marks.
+%
+\newif\ifcropmarks
+\let\cropmarks = \cropmarkstrue
+%
+% Dimensions to add cropmarks at corners.
+% Added by P. A. MacKay, 12 Nov. 1986
+%
+\newdimen\outerhsize \newdimen\outervsize % set by the paper size routines
+\newdimen\cornerlong  \cornerlong=1pc
+\newdimen\cornerthick \cornerthick=.3pt
+\newdimen\topandbottommargin \topandbottommargin=.75in
+
+% Output a mark which sets \thischapter, \thissection and \thiscolor.
+% We dump everything together because we only have one kind of mark.
+% This works because we only use \botmark / \topmark, not \firstmark.
+%
+% A mark contains a subexpression of the \ifcase ... \fi construct.
+% \get*marks macros below extract the needed part using \ifcase.
+%
+% Another complication is to let the user choose whether \thischapter
+% (\thissection) refers to the chapter (section) in effect at the top
+% of a page, or that at the bottom of a page.  The solution is
+% described on page 260 of The TeXbook.  It involves outputting two
+% marks for the sectioning macros, one before the section break, and
+% one after.  I won't pretend I can describe this better than DEK...
+\def\domark{%
+  \toks0=\expandafter{\lastchapterdefs}%
+  \toks2=\expandafter{\lastsectiondefs}%
+  \toks4=\expandafter{\prevchapterdefs}%
+  \toks6=\expandafter{\prevsectiondefs}%
+  \toks8=\expandafter{\lastcolordefs}%
+  \mark{%
+                   \the\toks0 \the\toks2
+      \noexpand\or \the\toks4 \the\toks6
+    \noexpand\else \the\toks8
+  }%
+}
+% \topmark doesn't work for the very first chapter (after the title
+% page or the contents), so we use \firstmark there -- this gets us
+% the mark with the chapter defs, unless the user sneaks in, e.g.,
+% @setcolor (or @url, or @link, etc.) between @contents and the very
+% first @chapter.
+\def\gettopheadingmarks{%
+  \ifcase0\topmark\fi
+  \ifx\thischapter\empty \ifcase0\firstmark\fi \fi
+}
+\def\getbottomheadingmarks{\ifcase1\botmark\fi}
+\def\getcolormarks{\ifcase2\topmark\fi}
+
+% Avoid "undefined control sequence" errors.
+\def\lastchapterdefs{}
+\def\lastsectiondefs{}
+\def\prevchapterdefs{}
+\def\prevsectiondefs{}
+\def\lastcolordefs{}
+
+% Main output routine.
+\chardef\PAGE = 255
+\output = {\onepageout{\pagecontents\PAGE}}
+
+\newbox\headlinebox
+\newbox\footlinebox
+
+% \onepageout takes a vbox as an argument.  Note that \pagecontents
+% does insertions, but you have to call it yourself.
+\def\onepageout#1{%
+  \ifcropmarks \hoffset=0pt \else \hoffset=\normaloffset \fi
+  %
+  \ifodd\pageno  \advance\hoffset by \bindingoffset
+  \else \advance\hoffset by -\bindingoffset\fi
+  %
+  % Do this outside of the \shipout so @code etc. will be expanded in
+  % the headline as they should be, not taken literally (outputting ''code).
+  \ifodd\pageno \getoddheadingmarks \else \getevenheadingmarks \fi
+  \setbox\headlinebox = \vbox{\let\hsize=\pagewidth \makeheadline}%
+  \ifodd\pageno \getoddfootingmarks \else \getevenfootingmarks \fi
+  \setbox\footlinebox = \vbox{\let\hsize=\pagewidth \makefootline}%
+  %
+  {%
+    % Have to do this stuff outside the \shipout because we want it to
+    % take effect in \write's, yet the group defined by the \vbox ends
+    % before the \shipout runs.
+    %
+    \indexdummies         % don't expand commands in the output.
+    \normalturnoffactive  % \ in index entries must not stay \, e.g., if
+               % the page break happens to be in the middle of an example.
+               % We don't want .vr (or whatever) entries like this:
+               % \entry{{\tt \indexbackslash }acronym}{32}{\code {\acronym}}
+               % "\acronym" won't work when it's read back in;
+               % it needs to be 
+               % {\code {{\tt \backslashcurfont }acronym}
+    \shipout\vbox{%
+      % Do this early so pdf references go to the beginning of the page.
+      \ifpdfmakepagedest \pdfdest name{\the\pageno} xyz\fi
+      %
+      \ifcropmarks \vbox to \outervsize\bgroup
+        \hsize = \outerhsize
+        \vskip-\topandbottommargin
+        \vtop to0pt{%
+          \line{\ewtop\hfil\ewtop}%
+          \nointerlineskip
+          \line{%
+            \vbox{\moveleft\cornerthick\nstop}%
+            \hfill
+            \vbox{\moveright\cornerthick\nstop}%
+          }%
+          \vss}%
+        \vskip\topandbottommargin
+        \line\bgroup
+          \hfil % center the page within the outer (page) hsize.
+          \ifodd\pageno\hskip\bindingoffset\fi
+          \vbox\bgroup
+      \fi
+      %
+      \unvbox\headlinebox
+      \pagebody{#1}%
+      \ifdim\ht\footlinebox > 0pt
+        % Only leave this space if the footline is nonempty.
+        % (We lessened \vsize for it in \oddfootingyyy.)
+        % The \baselineskip=24pt in plain's \makefootline has no effect.
+        \vskip 24pt
+        \unvbox\footlinebox
+      \fi
+      %
+      \ifcropmarks
+          \egroup % end of \vbox\bgroup
+        \hfil\egroup % end of (centering) \line\bgroup
+        \vskip\topandbottommargin plus1fill minus1fill
+        \boxmaxdepth = \cornerthick
+        \vbox to0pt{\vss
+          \line{%
+            \vbox{\moveleft\cornerthick\nsbot}%
+            \hfill
+            \vbox{\moveright\cornerthick\nsbot}%
+          }%
+          \nointerlineskip
+          \line{\ewbot\hfil\ewbot}%
+        }%
+      \egroup % \vbox from first cropmarks clause
+      \fi
+    }% end of \shipout\vbox
+  }% end of group with \indexdummies
+  \advancepageno
+  \ifnum\outputpenalty>-20000 \else\dosupereject\fi
+}
+
+\newinsert\margin \dimen\margin=\maxdimen
+
+\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}}
+{\catcode`\@ =11
+\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi
+% marginal hacks, juha@viisa.uucp (Juha Takala)
+\ifvoid\margin\else % marginal info is present
+  \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi
+\dimen@=\dp#1\relax \unvbox#1\relax
+\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi
+\ifr@ggedbottom \kern-\dimen@ \vfil \fi}
+}
+
+% Here are the rules for the cropmarks.  Note that they are
+% offset so that the space between them is truly \outerhsize or \outervsize
+% (P. A. MacKay, 12 November, 1986)
+%
+\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong}
+\def\nstop{\vbox
+  {\hrule height\cornerthick depth\cornerlong width\cornerthick}}
+\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong}
+\def\nsbot{\vbox
+  {\hrule height\cornerlong depth\cornerthick width\cornerthick}}
+
+% Parse an argument, then pass it to #1.  The argument is the rest of
+% the input line (except we remove a trailing comment).  #1 should be a
+% macro which expects an ordinary undelimited TeX argument.
+%
+\def\parsearg{\parseargusing{}}
+\def\parseargusing#1#2{%
+  \def\argtorun{#2}%
+  \begingroup
+    \obeylines
+    \spaceisspace
+    #1%
+    \parseargline\empty% Insert the \empty token, see \finishparsearg below.
+}
+
+{\obeylines %
+  \gdef\parseargline#1^^M{%
+    \endgroup % End of the group started in \parsearg.
+    \argremovecomment #1\comment\ArgTerm%
+  }%
+}
+
+% First remove any @comment, then any @c comment.
+\def\argremovecomment#1\comment#2\ArgTerm{\argremovec #1\c\ArgTerm}
+\def\argremovec#1\c#2\ArgTerm{\argcheckspaces#1\^^M\ArgTerm}
+
+% Each occurrence of `\^^M' or `<space>\^^M' is replaced by a single space.
+%
+% \argremovec might leave us with trailing space, e.g.,
+%    @end itemize  @c foo
+% This space token undergoes the same procedure and is eventually removed
+% by \finishparsearg.
+%
+\def\argcheckspaces#1\^^M{\argcheckspacesX#1\^^M \^^M}
+\def\argcheckspacesX#1 \^^M{\argcheckspacesY#1\^^M}
+\def\argcheckspacesY#1\^^M#2\^^M#3\ArgTerm{%
+  \def\temp{#3}%
+  \ifx\temp\empty
+    % Do not use \next, perhaps the caller of \parsearg uses it; reuse \temp:
+    \let\temp\finishparsearg
+  \else
+    \let\temp\argcheckspaces
+  \fi
+  % Put the space token in:
+  \temp#1 #3\ArgTerm
+}
+
+% If a _delimited_ argument is enclosed in braces, they get stripped; so
+% to get _exactly_ the rest of the line, we had to prevent such situation.
+% We prepended an \empty token at the very beginning and we expand it now,
+% just before passing the control to \argtorun.
+% (Similarly, we have to think about #3 of \argcheckspacesY above: it is
+% either the null string, or it ends with \^^M---thus there is no danger
+% that a pair of braces would be stripped.
+%
+% But first, we have to remove the trailing space token.
+%
+\def\finishparsearg#1 \ArgTerm{\expandafter\argtorun\expandafter{#1}}
+
+% \parseargdef\foo{...}
+%      is roughly equivalent to
+% \def\foo{\parsearg\Xfoo}
+% \def\Xfoo#1{...}
+%
+% Actually, I use \csname\string\foo\endcsname, ie. \\foo, as it is my
+% favourite TeX trick.  --kasal, 16nov03
+
+\def\parseargdef#1{%
+  \expandafter \doparseargdef \csname\string#1\endcsname #1%
+}
+\def\doparseargdef#1#2{%
+  \def#2{\parsearg#1}%
+  \def#1##1%
+}
+
+% Several utility definitions with active space:
+{
+  \obeyspaces
+  \gdef\obeyedspace{ }
+
+  % Make each space character in the input produce a normal interword
+  % space in the output.  Don't allow a line break at this space, as this
+  % is used only in environments like @example, where each line of input
+  % should produce a line of output anyway.
+  %
+  \gdef\sepspaces{\obeyspaces\let =\tie}
+
+  % If an index command is used in an @example environment, any spaces
+  % therein should become regular spaces in the raw index file, not the
+  % expansion of \tie (\leavevmode \penalty \@M \ ).
+  \gdef\unsepspaces{\let =\space}
+}
+
+
+\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next}
+
+% Define the framework for environments in texinfo.tex.  It's used like this:
+%
+%   \envdef\foo{...}
+%   \def\Efoo{...}
+%
+% It's the responsibility of \envdef to insert \begingroup before the
+% actual body; @end closes the group after calling \Efoo.  \envdef also
+% defines \thisenv, so the current environment is known; @end checks
+% whether the environment name matches.  The \checkenv macro can also be
+% used to check whether the current environment is the one expected.
+%
+% Non-false conditionals (@iftex, @ifset) don't fit into this, so they
+% are not treated as environments; they don't open a group.  (The
+% implementation of @end takes care not to call \endgroup in this
+% special case.)
+
+
+% At run-time, environments start with this:
+\def\startenvironment#1{\begingroup\def\thisenv{#1}}
+% initialize
+\let\thisenv\empty
+
+% ... but they get defined via ``\envdef\foo{...}'':
+\long\def\envdef#1#2{\def#1{\startenvironment#1#2}}
+\def\envparseargdef#1#2{\parseargdef#1{\startenvironment#1#2}}
+
+% Check whether we're in the right environment:
+\def\checkenv#1{%
+  \def\temp{#1}%
+  \ifx\thisenv\temp
+  \else
+    \badenverr
+  \fi
+}
+
+% Environment mismatch, #1 expected:
+\def\badenverr{%
+  \errhelp = \EMsimple
+  \errmessage{This command can appear only \inenvironment\temp,
+    not \inenvironment\thisenv}%
+}
+\def\inenvironment#1{%
+  \ifx#1\empty
+    out of any environment%
+  \else
+    in environment \expandafter\string#1%
+  \fi
+}
+
+% @end foo executes the definition of \Efoo.
+% But first, it executes a specialized version of \checkenv
+%
+\parseargdef\end{%
+  \if 1\csname iscond.#1\endcsname
+  \else
+    % The general wording of \badenverr may not be ideal, but... --kasal, 06nov03
+    \expandafter\checkenv\csname#1\endcsname
+    \csname E#1\endcsname
+    \endgroup
+  \fi
+}
+
+\newhelp\EMsimple{Press RETURN to continue.}
+
+
+%% Simple single-character @ commands
+
+% @@ prints an @
+% Kludge this until the fonts are right (grr).
+\def\@{{\tt\char64}}
+
+% This is turned off because it was never documented
+% and you can use @w{...} around a quote to suppress ligatures.
+%% Define @` and @' to be the same as ` and '
+%% but suppressing ligatures.
+%\def\`{{`}}
+%\def\'{{'}}
+
+% Used to generate quoted braces.
+\def\mylbrace {{\tt\char123}}
+\def\myrbrace {{\tt\char125}}
+\let\{=\mylbrace
+\let\}=\myrbrace
+\begingroup
+  % Definitions to produce \{ and \} commands for indices,
+  % and @{ and @} for the aux/toc files.
+  \catcode`\{ = \other \catcode`\} = \other
+  \catcode`\[ = 1 \catcode`\] = 2
+  \catcode`\! = 0 \catcode`\\ = \other
+  !gdef!lbracecmd[\{]%
+  !gdef!rbracecmd[\}]%
+  !gdef!lbraceatcmd[@{]%
+  !gdef!rbraceatcmd[@}]%
+!endgroup
+
+% @comma{} to avoid , parsing problems.
+\let\comma = ,
+
+% Accents: @, @dotaccent @ringaccent @ubaraccent @udotaccent
+% Others are defined by plain TeX: @` @' @" @^ @~ @= @u @v @H.
+\let\, = \c
+\let\dotaccent = \.
+\def\ringaccent#1{{\accent23 #1}}
+\let\tieaccent = \t
+\let\ubaraccent = \b
+\let\udotaccent = \d
+
+% Other special characters: @questiondown @exclamdown @ordf @ordm
+% Plain TeX defines: @AA @AE @O @OE @L (plus lowercase versions) @ss.
+\def\questiondown{?`}
+\def\exclamdown{!`}
+\def\ordf{\leavevmode\raise1ex\hbox{\selectfonts\lllsize \underbar{a}}}
+\def\ordm{\leavevmode\raise1ex\hbox{\selectfonts\lllsize \underbar{o}}}
+
+% Dotless i and dotless j, used for accents.
+\def\imacro{i}
+\def\jmacro{j}
+\def\dotless#1{%
+  \def\temp{#1}%
+  \ifx\temp\imacro \ifmmode\imath \else\ptexi \fi
+  \else\ifx\temp\jmacro \ifmmode\jmath \else\j \fi
+  \else \errmessage{@dotless can be used only with i or j}%
+  \fi\fi
+}
+
+% The \TeX{} logo, as in plain, but resetting the spacing so that a
+% period following counts as ending a sentence.  (Idea found in latex.)
+%
+\edef\TeX{\TeX \spacefactor=1000 }
+
+% @LaTeX{} logo.  Not quite the same results as the definition in
+% latex.ltx, since we use a different font for the raised A; it's most
+% convenient for us to use an explicitly smaller font, rather than using
+% the \scriptstyle font (since we don't reset \scriptstyle and
+% \scriptscriptstyle).
+%
+\def\LaTeX{%
+  L\kern-.36em
+  {\setbox0=\hbox{T}%
+   \vbox to \ht0{\hbox{\selectfonts\lllsize A}\vss}}%
+  \kern-.15em
+  \TeX
+}
+
+% Be sure we're in horizontal mode when doing a tie, since we make space
+% equivalent to this in @example-like environments. Otherwise, a space
+% at the beginning of a line will start with \penalty -- and
+% since \penalty is valid in vertical mode, we'd end up putting the
+% penalty on the vertical list instead of in the new paragraph.
+{\catcode`@ = 11
+ % Avoid using \@M directly, because that causes trouble
+ % if the definition is written into an index file.
+ \global\let\tiepenalty = \@M
+ \gdef\tie{\leavevmode\penalty\tiepenalty\ }
+}
+
+% @: forces normal size whitespace following.
+\def\:{\spacefactor=1000 }
+
+% @* forces a line break.
+\def\*{\hfil\break\hbox{}\ignorespaces}
+
+% @/ allows a line break.
+\let\/=\allowbreak
+
+% @. is an end-of-sentence period.
+\def\.{.\spacefactor=\endofsentencespacefactor\space}
+
+% @! is an end-of-sentence bang.
+\def\!{!\spacefactor=\endofsentencespacefactor\space}
+
+% @? is an end-of-sentence query.
+\def\?{?\spacefactor=\endofsentencespacefactor\space}
+
+% @frenchspacing on|off  says whether to put extra space after punctuation.
+% 
+\def\onword{on}
+\def\offword{off}
+%
+\parseargdef\frenchspacing{%
+  \def\temp{#1}%
+  \ifx\temp\onword \plainfrenchspacing
+  \else\ifx\temp\offword \plainnonfrenchspacing
+  \else
+    \errhelp = \EMsimple
+    \errmessage{Unknown @frenchspacing option `\temp', must be on/off}%
+  \fi\fi
+}
+
+% @w prevents a word break.  Without the \leavevmode, @w at the
+% beginning of a paragraph, when TeX is still in vertical mode, would
+% produce a whole line of output instead of starting the paragraph.
+\def\w#1{\leavevmode\hbox{#1}}
+
+% @group ... @end group forces ... to be all on one page, by enclosing
+% it in a TeX vbox.  We use \vtop instead of \vbox to construct the box
+% to keep its height that of a normal line.  According to the rules for
+% \topskip (p.114 of the TeXbook), the glue inserted is
+% max (\topskip - \ht (first item), 0).  If that height is large,
+% therefore, no glue is inserted, and the space between the headline and
+% the text is small, which looks bad.
+%
+% Another complication is that the group might be very large.  This can
+% cause the glue on the previous page to be unduly stretched, because it
+% does not have much material.  In this case, it's better to add an
+% explicit \vfill so that the extra space is at the bottom.  The
+% threshold for doing this is if the group is more than \vfilllimit
+% percent of a page (\vfilllimit can be changed inside of @tex).
+%
+\newbox\groupbox
+\def\vfilllimit{0.7}
+%
+\envdef\group{%
+  \ifnum\catcode`\^^M=\active \else
+    \errhelp = \groupinvalidhelp
+    \errmessage{@group invalid in context where filling is enabled}%
+  \fi
+  \startsavinginserts
+  %
+  \setbox\groupbox = \vtop\bgroup
+    % Do @comment since we are called inside an environment such as
+    % @example, where each end-of-line in the input causes an
+    % end-of-line in the output.  We don't want the end-of-line after
+    % the `@group' to put extra space in the output.  Since @group
+    % should appear on a line by itself (according to the Texinfo
+    % manual), we don't worry about eating any user text.
+    \comment
+}
+%
+% The \vtop produces a box with normal height and large depth; thus, TeX puts
+% \baselineskip glue before it, and (when the next line of text is done)
+% \lineskip glue after it.  Thus, space below is not quite equal to space
+% above.  But it's pretty close.
+\def\Egroup{%
+    % To get correct interline space between the last line of the group
+    % and the first line afterwards, we have to propagate \prevdepth.
+    \endgraf % Not \par, as it may have been set to \lisppar.
+    \global\dimen1 = \prevdepth
+  \egroup           % End the \vtop.
+  % \dimen0 is the vertical size of the group's box.
+  \dimen0 = \ht\groupbox  \advance\dimen0 by \dp\groupbox
+  % \dimen2 is how much space is left on the page (more or less).
+  \dimen2 = \pageheight   \advance\dimen2 by -\pagetotal
+  % if the group doesn't fit on the current page, and it's a big big
+  % group, force a page break.
+  \ifdim \dimen0 > \dimen2
+    \ifdim \pagetotal < \vfilllimit\pageheight
+      \page
+    \fi
+  \fi
+  \box\groupbox
+  \prevdepth = \dimen1
+  \checkinserts
+}
+%
+% TeX puts in an \escapechar (i.e., `@') at the beginning of the help
+% message, so this ends up printing `@group can only ...'.
+%
+\newhelp\groupinvalidhelp{%
+group can only be used in environments such as @example,^^J%
+where each line of input produces a line of output.}
+
+% @need space-in-mils
+% forces a page break if there is not space-in-mils remaining.
+
+\newdimen\mil  \mil=0.001in
+
+% Old definition--didn't work.
+%\parseargdef\need{\par %
+%% This method tries to make TeX break the page naturally
+%% if the depth of the box does not fit.
+%{\baselineskip=0pt%
+%\vtop to #1\mil{\vfil}\kern -#1\mil\nobreak
+%\prevdepth=-1000pt
+%}}
+
+\parseargdef\need{%
+  % Ensure vertical mode, so we don't make a big box in the middle of a
+  % paragraph.
+  \par
+  %
+  % If the @need value is less than one line space, it's useless.
+  \dimen0 = #1\mil
+  \dimen2 = \ht\strutbox
+  \advance\dimen2 by \dp\strutbox
+  \ifdim\dimen0 > \dimen2
+    %
+    % Do a \strut just to make the height of this box be normal, so the
+    % normal leading is inserted relative to the preceding line.
+    % And a page break here is fine.
+    \vtop to #1\mil{\strut\vfil}%
+    %
+    % TeX does not even consider page breaks if a penalty added to the
+    % main vertical list is 10000 or more.  But in order to see if the
+    % empty box we just added fits on the page, we must make it consider
+    % page breaks.  On the other hand, we don't want to actually break the
+    % page after the empty box.  So we use a penalty of 9999.
+    %
+    % There is an extremely small chance that TeX will actually break the
+    % page at this \penalty, if there are no other feasible breakpoints in
+    % sight.  (If the user is using lots of big @group commands, which
+    % almost-but-not-quite fill up a page, TeX will have a hard time doing
+    % good page breaking, for example.)  However, I could not construct an
+    % example where a page broke at this \penalty; if it happens in a real
+    % document, then we can reconsider our strategy.
+    \penalty9999
+    %
+    % Back up by the size of the box, whether we did a page break or not.
+    \kern -#1\mil
+    %
+    % Do not allow a page break right after this kern.
+    \nobreak
+  \fi
+}
+
+% @br   forces paragraph break (and is undocumented).
+
+\let\br = \par
+
+% @page forces the start of a new page.
+%
+\def\page{\par\vfill\supereject}
+
+% @exdent text....
+% outputs text on separate line in roman font, starting at standard page margin
+
+% This records the amount of indent in the innermost environment.
+% That's how much \exdent should take out.
+\newskip\exdentamount
+
+% This defn is used inside fill environments such as @defun.
+\parseargdef\exdent{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break}
+
+% This defn is used inside nofill environments such as @example.
+\parseargdef\nofillexdent{{\advance \leftskip by -\exdentamount
+  \leftline{\hskip\leftskip{\rm#1}}}}
+
+% @inmargin{WHICH}{TEXT} puts TEXT in the WHICH margin next to the current
+% paragraph.  For more general purposes, use the \margin insertion
+% class.  WHICH is `l' or `r'.
+%
+\newskip\inmarginspacing \inmarginspacing=1cm
+\def\strutdepth{\dp\strutbox}
+%
+\def\doinmargin#1#2{\strut\vadjust{%
+  \nobreak
+  \kern-\strutdepth
+  \vtop to \strutdepth{%
+    \baselineskip=\strutdepth
+    \vss
+    % if you have multiple lines of stuff to put here, you'll need to
+    % make the vbox yourself of the appropriate size.
+    \ifx#1l%
+      \llap{\ignorespaces #2\hskip\inmarginspacing}%
+    \else
+      \rlap{\hskip\hsize \hskip\inmarginspacing \ignorespaces #2}%
+    \fi
+    \null
+  }%
+}}
+\def\inleftmargin{\doinmargin l}
+\def\inrightmargin{\doinmargin r}
+%
+% @inmargin{TEXT [, RIGHT-TEXT]}
+% (if RIGHT-TEXT is given, use TEXT for left page, RIGHT-TEXT for right;
+% else use TEXT for both).
+%
+\def\inmargin#1{\parseinmargin #1,,\finish}
+\def\parseinmargin#1,#2,#3\finish{% not perfect, but better than nothing.
+  \setbox0 = \hbox{\ignorespaces #2}%
+  \ifdim\wd0 > 0pt
+    \def\lefttext{#1}%  have both texts
+    \def\righttext{#2}%
+  \else
+    \def\lefttext{#1}%  have only one text
+    \def\righttext{#1}%
+  \fi
+  %
+  \ifodd\pageno
+    \def\temp{\inrightmargin\righttext}% odd page -> outside is right margin
+  \else
+    \def\temp{\inleftmargin\lefttext}%
+  \fi
+  \temp
+}
+
+% @include FILE -- \input text of FILE.
+%
+\def\include{\parseargusing\filenamecatcodes\includezzz}
+\def\includezzz#1{%
+  \pushthisfilestack
+  \def\thisfile{#1}%
+  {%
+    \makevalueexpandable  % we want to expand any @value in FILE.
+    \turnoffactive        % and allow special characters in the expansion
+    \edef\temp{\noexpand\input #1 }%
+    %
+    % This trickery is to read FILE outside of a group, in case it makes
+    % definitions, etc.
+    \expandafter
+  }\temp
+  \popthisfilestack
+}
+\def\filenamecatcodes{%
+  \catcode`\\=\other
+  \catcode`~=\other
+  \catcode`^=\other
+  \catcode`_=\other
+  \catcode`|=\other
+  \catcode`<=\other
+  \catcode`>=\other
+  \catcode`+=\other
+  \catcode`-=\other
+}
+
+\def\pushthisfilestack{%
+  \expandafter\pushthisfilestackX\popthisfilestack\StackTerm
+}
+\def\pushthisfilestackX{%
+  \expandafter\pushthisfilestackY\thisfile\StackTerm
+}
+\def\pushthisfilestackY #1\StackTerm #2\StackTerm {%
+  \gdef\popthisfilestack{\gdef\thisfile{#1}\gdef\popthisfilestack{#2}}%
+}
+
+\def\popthisfilestack{\errthisfilestackempty}
+\def\errthisfilestackempty{\errmessage{Internal error:
+  the stack of filenames is empty.}}
+
+\def\thisfile{}
+
+% @center line
+% outputs that line, centered.
+%
+\parseargdef\center{%
+  \ifhmode
+    \let\next\centerH
+  \else
+    \let\next\centerV
+  \fi
+  \next{\hfil \ignorespaces#1\unskip \hfil}%
+}
+\def\centerH#1{%
+  {%
+    \hfil\break
+    \advance\hsize by -\leftskip
+    \advance\hsize by -\rightskip
+    \line{#1}%
+    \break
+  }%
+}
+\def\centerV#1{\line{\kern\leftskip #1\kern\rightskip}}
+
+% @sp n   outputs n lines of vertical space
+
+\parseargdef\sp{\vskip #1\baselineskip}
+
+% @comment ...line which is ignored...
+% @c is the same as @comment
+% @ignore ... @end ignore  is another way to write a comment
+
+\def\comment{\begingroup \catcode`\^^M=\other%
+\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other%
+\commentxxx}
+{\catcode`\^^M=\other \gdef\commentxxx#1^^M{\endgroup}}
+
+\let\c=\comment
+
+% @paragraphindent NCHARS
+% We'll use ems for NCHARS, close enough.
+% NCHARS can also be the word `asis' or `none'.
+% We cannot feasibly implement @paragraphindent asis, though.
+%
+\def\asisword{asis} % no translation, these are keywords
+\def\noneword{none}
+%
+\parseargdef\paragraphindent{%
+  \def\temp{#1}%
+  \ifx\temp\asisword
+  \else
+    \ifx\temp\noneword
+      \defaultparindent = 0pt
+    \else
+      \defaultparindent = #1em
+    \fi
+  \fi
+  \parindent = \defaultparindent
+}
+
+% @exampleindent NCHARS
+% We'll use ems for NCHARS like @paragraphindent.
+% It seems @exampleindent asis isn't necessary, but
+% I preserve it to make it similar to @paragraphindent.
+\parseargdef\exampleindent{%
+  \def\temp{#1}%
+  \ifx\temp\asisword
+  \else
+    \ifx\temp\noneword
+      \lispnarrowing = 0pt
+    \else
+      \lispnarrowing = #1em
+    \fi
+  \fi
+}
+
+% @firstparagraphindent WORD
+% If WORD is `none', then suppress indentation of the first paragraph
+% after a section heading.  If WORD is `insert', then do indent at such
+% paragraphs.
+%
+% The paragraph indentation is suppressed or not by calling
+% \suppressfirstparagraphindent, which the sectioning commands do.
+% We switch the definition of this back and forth according to WORD.
+% By default, we suppress indentation.
+%
+\def\suppressfirstparagraphindent{\dosuppressfirstparagraphindent}
+\def\insertword{insert}
+%
+\parseargdef\firstparagraphindent{%
+  \def\temp{#1}%
+  \ifx\temp\noneword
+    \let\suppressfirstparagraphindent = \dosuppressfirstparagraphindent
+  \else\ifx\temp\insertword
+    \let\suppressfirstparagraphindent = \relax
+  \else
+    \errhelp = \EMsimple
+    \errmessage{Unknown @firstparagraphindent option `\temp'}%
+  \fi\fi
+}
+
+% Here is how we actually suppress indentation.  Redefine \everypar to
+% \kern backwards by \parindent, and then reset itself to empty.
+%
+% We also make \indent itself not actually do anything until the next
+% paragraph.
+%
+\gdef\dosuppressfirstparagraphindent{%
+  \gdef\indent{%
+    \restorefirstparagraphindent
+    \indent
+  }%
+  \gdef\noindent{%
+    \restorefirstparagraphindent
+    \noindent
+  }%
+  \global\everypar = {%
+    \kern -\parindent
+    \restorefirstparagraphindent
+  }%
+}
+
+\gdef\restorefirstparagraphindent{%
+  \global \let \indent = \ptexindent
+  \global \let \noindent = \ptexnoindent
+  \global \everypar = {}%
+}
+
+
+% @asis just yields its argument.  Used with @table, for example.
+%
+\def\asis#1{#1}
+
+% @math outputs its argument in math mode.
+%
+% One complication: _ usually means subscripts, but it could also mean
+% an actual _ character, as in @math{@var{some_variable} + 1}.  So make
+% _ active, and distinguish by seeing if the current family is \slfam,
+% which is what @var uses.
+{
+  \catcode`\_ = \active
+  \gdef\mathunderscore{%
+    \catcode`\_=\active
+    \def_{\ifnum\fam=\slfam \_\else\sb\fi}%
+  }
+}
+% Another complication: we want \\ (and @\) to output a \ character.
+% FYI, plain.tex uses \\ as a temporary control sequence (why?), but
+% this is not advertised and we don't care.  Texinfo does not
+% otherwise define @\.
+%
+% The \mathchar is class=0=ordinary, family=7=ttfam, position=5C=\.
+\def\mathbackslash{\ifnum\fam=\ttfam \mathchar"075C \else\backslash \fi}
+%
+\def\math{%
+  \tex
+  \mathunderscore
+  \let\\ = \mathbackslash
+  \mathactive
+  % make the texinfo accent commands work in math mode
+  \let\"=\ddot
+  \let\'=\acute
+  \let\==\bar
+  \let\^=\hat
+  \let\`=\grave
+  \let\u=\breve
+  \let\v=\check
+  \let\~=\tilde
+  \let\dotaccent=\dot
+  $\finishmath
+}
+\def\finishmath#1{#1$\endgroup}  % Close the group opened by \tex.
+
+% Some active characters (such as <) are spaced differently in math.
+% We have to reset their definitions in case the @math was an argument
+% to a command which sets the catcodes (such as @item or @section).
+%
+{
+  \catcode`^ = \active
+  \catcode`< = \active
+  \catcode`> = \active
+  \catcode`+ = \active
+  \gdef\mathactive{%
+    \let^ = \ptexhat
+    \let< = \ptexless
+    \let> = \ptexgtr
+    \let+ = \ptexplus
+  }
+}
+
+% Some math mode symbols.
+\def\bullet{$\ptexbullet$}
+\def\geq{\ifmmode \ge\else $\ge$\fi}
+\def\leq{\ifmmode \le\else $\le$\fi}
+\def\minus{\ifmmode -\else $-$\fi}
+
+% @dots{} outputs an ellipsis using the current font.
+% We do .5em per period so that it has the same spacing in the cm
+% typewriter fonts as three actual period characters; on the other hand,
+% in other typewriter fonts three periods are wider than 1.5em.  So do
+% whichever is larger.
+%
+\def\dots{%
+  \leavevmode
+  \setbox0=\hbox{...}% get width of three periods
+  \ifdim\wd0 > 1.5em
+    \dimen0 = \wd0
+  \else
+    \dimen0 = 1.5em
+  \fi
+  \hbox to \dimen0{%
+    \hskip 0pt plus.25fil
+    .\hskip 0pt plus1fil
+    .\hskip 0pt plus1fil
+    .\hskip 0pt plus.5fil
+  }%
+}
+
+% @enddots{} is an end-of-sentence ellipsis.
+%
+\def\enddots{%
+  \dots
+  \spacefactor=\endofsentencespacefactor
+}
+
+% @comma{} is so commas can be inserted into text without messing up
+% Texinfo's parsing.
+%
+\let\comma = ,
+
+% @refill is a no-op.
+\let\refill=\relax
+
+% If working on a large document in chapters, it is convenient to
+% be able to disable indexing, cross-referencing, and contents, for test runs.
+% This is done with @novalidate (before @setfilename).
+%
+\newif\iflinks \linkstrue % by default we want the aux files.
+\let\novalidate = \linksfalse
+
+% @setfilename is done at the beginning of every texinfo file.
+% So open here the files we need to have open while reading the input.
+% This makes it possible to make a .fmt file for texinfo.
+\def\setfilename{%
+   \fixbackslash  % Turn off hack to swallow `\input texinfo'.
+   \iflinks
+     \tryauxfile
+     % Open the new aux file.  TeX will close it automatically at exit.
+     \immediate\openout\auxfile=\jobname.aux
+   \fi % \openindices needs to do some work in any case.
+   \openindices
+   \let\setfilename=\comment % Ignore extra @setfilename cmds.
+   %
+   % If texinfo.cnf is present on the system, read it.
+   % Useful for site-wide @afourpaper, etc.
+   \openin 1 texinfo.cnf
+   \ifeof 1 \else \input texinfo.cnf \fi
+   \closein 1
+   %
+   \comment % Ignore the actual filename.
+}
+
+% Called from \setfilename.
+%
+\def\openindices{%
+  \newindex{cp}%
+  \newcodeindex{fn}%
+  \newcodeindex{vr}%
+  \newcodeindex{tp}%
+  \newcodeindex{ky}%
+  \newcodeindex{pg}%
+}
+
+% @bye.
+\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend}
+
+
+\message{pdf,}
+% adobe `portable' document format
+\newcount\tempnum
+\newcount\lnkcount
+\newtoks\filename
+\newcount\filenamelength
+\newcount\pgn
+\newtoks\toksA
+\newtoks\toksB
+\newtoks\toksC
+\newtoks\toksD
+\newbox\boxA
+\newcount\countA
+\newif\ifpdf
+\newif\ifpdfmakepagedest
+
+% when pdftex is run in dvi mode, \pdfoutput is defined (so \pdfoutput=1
+% can be set).  So we test for \relax and 0 as well as \undefined,
+% borrowed from ifpdf.sty.
+\ifx\pdfoutput\undefined
+\else
+  \ifx\pdfoutput\relax
+  \else
+    \ifcase\pdfoutput
+    \else
+      \pdftrue
+    \fi
+  \fi
+\fi
+
+% PDF uses PostScript string constants for the names of xref targets,
+% for display in the outlines, and in other places.  Thus, we have to
+% double any backslashes.  Otherwise, a name like "\node" will be
+% interpreted as a newline (\n), followed by o, d, e.  Not good.
+% http://www.ntg.nl/pipermail/ntg-pdftex/2004-July/000654.html
+% (and related messages, the final outcome is that it is up to the TeX
+% user to double the backslashes and otherwise make the string valid, so
+% that's what we do).
+
+% double active backslashes.
+% 
+{\catcode`\@=0 \catcode`\\=\active
+ @gdef@activebackslashdouble{%
+   @catcode`@\=@active
+   @let\=@doublebackslash}
+}
+
+% To handle parens, we must adopt a different approach, since parens are
+% not active characters.  hyperref.dtx (which has the same problem as
+% us) handles it with this amazing macro to replace tokens, with minor
+% changes for Texinfo.  It is included here under the GPL by permission
+% from the author, Heiko Oberdiek.
+% 
+% #1 is the tokens to replace.
+% #2 is the replacement.
+% #3 is the control sequence with the string.
+% 
+\def\HyPsdSubst#1#2#3{%
+  \def\HyPsdReplace##1#1##2\END{%
+    ##1%
+    \ifx\\##2\\%
+    \else
+      #2%
+      \HyReturnAfterFi{%
+        \HyPsdReplace##2\END
+      }%
+    \fi
+  }%
+  \xdef#3{\expandafter\HyPsdReplace#3#1\END}%
+}
+\long\def\HyReturnAfterFi#1\fi{\fi#1}
+
+% #1 is a control sequence in which to do the replacements.
+\def\backslashparens#1{%
+  \xdef#1{#1}% redefine it as its expansion; the definition is simply
+             % \lastnode when called from \setref -> \pdfmkdest.
+  \HyPsdSubst{(}{\realbackslash(}{#1}%
+  \HyPsdSubst{)}{\realbackslash)}{#1}%
+}
+
+\newhelp\nopdfimagehelp{Texinfo supports .png, .jpg, .jpeg, and .pdf images
+with PDF output, and none of those formats could be found.  (.eps cannot
+be supported due to the design of the PDF format; use regular TeX (DVI
+output) for that.)}
+
+\ifpdf
+  %
+  % Color manipulation macros based on pdfcolor.tex.
+  \def\cmykDarkRed{0.28 1 1 0.35}
+  \def\cmykBlack{0 0 0 1}
+  %
+  \def\pdfsetcolor#1{\pdfliteral{#1 k}}
+  % Set color, and create a mark which defines \thiscolor accordingly,
+  % so that \makeheadline knows which color to restore.
+  \def\setcolor#1{%
+    \xdef\lastcolordefs{\gdef\noexpand\thiscolor{#1}}%
+    \domark
+    \pdfsetcolor{#1}%
+  }
+  %
+  \def\maincolor{\cmykBlack}
+  \pdfsetcolor{\maincolor}
+  \edef\thiscolor{\maincolor}
+  \def\lastcolordefs{}
+  %
+  \def\makefootline{%
+    \baselineskip24pt
+    \line{\pdfsetcolor{\maincolor}\the\footline}%
+  }
+  %
+  \def\makeheadline{%
+    \vbox to 0pt{%
+      \vskip-22.5pt
+      \line{%
+        \vbox to8.5pt{}%
+        % Extract \thiscolor definition from the marks.
+        \getcolormarks
+        % Typeset the headline with \maincolor, then restore the color.
+        \pdfsetcolor{\maincolor}\the\headline\pdfsetcolor{\thiscolor}%
+      }%
+      \vss
+    }%
+    \nointerlineskip
+  }
+  %
+  %
+  \pdfcatalog{/PageMode /UseOutlines}
+  %
+  % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto).
+  \def\dopdfimage#1#2#3{%
+    \def\imagewidth{#2}\setbox0 = \hbox{\ignorespaces #2}%
+    \def\imageheight{#3}\setbox2 = \hbox{\ignorespaces #3}%
+    %
+    % pdftex (and the PDF format) support .png, .jpg, .pdf (among
+    % others).  Let's try in that order.
+    \let\pdfimgext=\empty
+    \begingroup
+      \openin 1 #1.png \ifeof 1
+        \openin 1 #1.jpg \ifeof 1
+          \openin 1 #1.jpeg \ifeof 1
+            \openin 1 #1.JPG \ifeof 1
+              \openin 1 #1.pdf \ifeof 1
+                \openin 1 #1.PDF \ifeof 1
+                  \errhelp = \nopdfimagehelp
+                  \errmessage{Could not find image file #1 for pdf}%
+                \else \gdef\pdfimgext{PDF}%
+                \fi
+              \else \gdef\pdfimgext{pdf}%
+              \fi
+            \else \gdef\pdfimgext{JPG}%
+            \fi
+          \else \gdef\pdfimgext{jpeg}%
+          \fi
+        \else \gdef\pdfimgext{jpg}%
+        \fi
+      \else \gdef\pdfimgext{png}%
+      \fi
+      \closein 1
+    \endgroup
+    %
+    % without \immediate, ancient pdftex seg faults when the same image is
+    % included twice.  (Version 3.14159-pre-1.0-unofficial-20010704.)
+    \ifnum\pdftexversion < 14
+      \immediate\pdfimage
+    \else
+      \immediate\pdfximage
+    \fi
+      \ifdim \wd0 >0pt width \imagewidth \fi
+      \ifdim \wd2 >0pt height \imageheight \fi
+      \ifnum\pdftexversion<13
+         #1.\pdfimgext
+       \else
+         {#1.\pdfimgext}%
+       \fi
+    \ifnum\pdftexversion < 14 \else
+      \pdfrefximage \pdflastximage
+    \fi}
+  %
+  \def\pdfmkdest#1{{%
+    % We have to set dummies so commands such as @code, and characters
+    % such as \, aren't expanded when present in a section title.
+    \indexnofonts
+    \turnoffactive
+    \activebackslashdouble
+    \makevalueexpandable
+    \def\pdfdestname{#1}%
+    \backslashparens\pdfdestname
+    \safewhatsit{\pdfdest name{\pdfdestname} xyz}%
+  }}
+  %
+  % used to mark target names; must be expandable.
+  \def\pdfmkpgn#1{#1}
+  %
+  % by default, use a color that is dark enough to print on paper as
+  % nearly black, but still distinguishable for online viewing.
+  \def\urlcolor{\cmykDarkRed}
+  \def\linkcolor{\cmykDarkRed}
+  \def\endlink{\setcolor{\maincolor}\pdfendlink}
+  %
+  % Adding outlines to PDF; macros for calculating structure of outlines
+  % come from Petr Olsak
+  \def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0%
+    \else \csname#1\endcsname \fi}
+  \def\advancenumber#1{\tempnum=\expnumber{#1}\relax
+    \advance\tempnum by 1
+    \expandafter\xdef\csname#1\endcsname{\the\tempnum}}
+  %
+  % #1 is the section text, which is what will be displayed in the
+  % outline by the pdf viewer.  #2 is the pdf expression for the number
+  % of subentries (or empty, for subsubsections).  #3 is the node text,
+  % which might be empty if this toc entry had no corresponding node.
+  % #4 is the page number
+  %
+  \def\dopdfoutline#1#2#3#4{%
+    % Generate a link to the node text if that exists; else, use the
+    % page number.  We could generate a destination for the section
+    % text in the case where a section has no node, but it doesn't
+    % seem worth the trouble, since most documents are normally structured.
+    \def\pdfoutlinedest{#3}%
+    \ifx\pdfoutlinedest\empty
+      \def\pdfoutlinedest{#4}%
+    \else
+      % Doubled backslashes in the name.
+      {\activebackslashdouble \xdef\pdfoutlinedest{#3}%
+       \backslashparens\pdfoutlinedest}%
+    \fi
+    %
+    % Also double the backslashes in the display string.
+    {\activebackslashdouble \xdef\pdfoutlinetext{#1}%
+     \backslashparens\pdfoutlinetext}%
+    %
+    \pdfoutline goto name{\pdfmkpgn{\pdfoutlinedest}}#2{\pdfoutlinetext}%
+  }
+  %
+  \def\pdfmakeoutlines{%
+    \begingroup
+      % Thanh's hack / proper braces in bookmarks
+      \edef\mylbrace{\iftrue \string{\else}\fi}\let\{=\mylbrace
+      \edef\myrbrace{\iffalse{\else\string}\fi}\let\}=\myrbrace
+      %
+      % Read toc silently, to get counts of subentries for \pdfoutline.
+      \def\numchapentry##1##2##3##4{%
+       \def\thischapnum{##2}%
+       \def\thissecnum{0}%
+       \def\thissubsecnum{0}%
+      }%
+      \def\numsecentry##1##2##3##4{%
+       \advancenumber{chap\thischapnum}%
+       \def\thissecnum{##2}%
+       \def\thissubsecnum{0}%
+      }%
+      \def\numsubsecentry##1##2##3##4{%
+       \advancenumber{sec\thissecnum}%
+       \def\thissubsecnum{##2}%
+      }%
+      \def\numsubsubsecentry##1##2##3##4{%
+       \advancenumber{subsec\thissubsecnum}%
+      }%
+      \def\thischapnum{0}%
+      \def\thissecnum{0}%
+      \def\thissubsecnum{0}%
+      %
+      % use \def rather than \let here because we redefine \chapentry et
+      % al. a second time, below.
+      \def\appentry{\numchapentry}%
+      \def\appsecentry{\numsecentry}%
+      \def\appsubsecentry{\numsubsecentry}%
+      \def\appsubsubsecentry{\numsubsubsecentry}%
+      \def\unnchapentry{\numchapentry}%
+      \def\unnsecentry{\numsecentry}%
+      \def\unnsubsecentry{\numsubsecentry}%
+      \def\unnsubsubsecentry{\numsubsubsecentry}%
+      \readdatafile{toc}%
+      %
+      % Read toc second time, this time actually producing the outlines.
+      % The `-' means take the \expnumber as the absolute number of
+      % subentries, which we calculated on our first read of the .toc above.
+      %
+      % We use the node names as the destinations.
+      \def\numchapentry##1##2##3##4{%
+        \dopdfoutline{##1}{count-\expnumber{chap##2}}{##3}{##4}}%
+      \def\numsecentry##1##2##3##4{%
+        \dopdfoutline{##1}{count-\expnumber{sec##2}}{##3}{##4}}%
+      \def\numsubsecentry##1##2##3##4{%
+        \dopdfoutline{##1}{count-\expnumber{subsec##2}}{##3}{##4}}%
+      \def\numsubsubsecentry##1##2##3##4{% count is always zero
+        \dopdfoutline{##1}{}{##3}{##4}}%
+      %
+      % PDF outlines are displayed using system fonts, instead of
+      % document fonts.  Therefore we cannot use special characters,
+      % since the encoding is unknown.  For example, the eogonek from
+      % Latin 2 (0xea) gets translated to a | character.  Info from
+      % Staszek Wawrykiewicz, 19 Jan 2004 04:09:24 +0100.
+      %
+      % xx to do this right, we have to translate 8-bit characters to
+      % their "best" equivalent, based on the @documentencoding.  Right
+      % now, I guess we'll just let the pdf reader have its way.
+      \indexnofonts
+      \setupdatafile
+      \catcode`\\=\active \otherbackslash
+      \input \tocreadfilename
+    \endgroup
+  }
+  %
+  \def\skipspaces#1{\def\PP{#1}\def\D{|}%
+    \ifx\PP\D\let\nextsp\relax
+    \else\let\nextsp\skipspaces
+      \ifx\p\space\else\addtokens{\filename}{\PP}%
+        \advance\filenamelength by 1
+      \fi
+    \fi
+    \nextsp}
+  \def\getfilename#1{\filenamelength=0\expandafter\skipspaces#1|\relax}
+  \ifnum\pdftexversion < 14
+    \let \startlink \pdfannotlink
+  \else
+    \let \startlink \pdfstartlink
+  \fi
+  % make a live url in pdf output.
+  \def\pdfurl#1{%
+    \begingroup
+      % it seems we really need yet another set of dummies; have not
+      % tried to figure out what each command should do in the context
+      % of @url.  for now, just make @/ a no-op, that's the only one
+      % people have actually reported a problem with.
+      % 
+      \normalturnoffactive
+      \def\@{@}%
+      \let\/=\empty
+      \makevalueexpandable
+      \leavevmode\setcolor{\urlcolor}%
+      \startlink attr{/Border [0 0 0]}%
+        user{/Subtype /Link /A << /S /URI /URI (#1) >>}%
+    \endgroup}
+  \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}}
+  \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks}
+  \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks}
+  \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}}
+  \def\maketoks{%
+    \expandafter\poptoks\the\toksA|ENDTOKS|\relax
+    \ifx\first0\adn0
+    \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3
+    \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6
+    \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9
+    \else
+      \ifnum0=\countA\else\makelink\fi
+      \ifx\first.\let\next=\done\else
+        \let\next=\maketoks
+        \addtokens{\toksB}{\the\toksD}
+        \ifx\first,\addtokens{\toksB}{\space}\fi
+      \fi
+    \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi
+    \next}
+  \def\makelink{\addtokens{\toksB}%
+    {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0}
+  \def\pdflink#1{%
+    \startlink attr{/Border [0 0 0]} goto name{\pdfmkpgn{#1}}
+    \setcolor{\linkcolor}#1\endlink}
+  \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st}
+\else
+  \let\pdfmkdest = \gobble
+  \let\pdfurl = \gobble
+  \let\endlink = \relax
+  \let\setcolor = \gobble
+  \let\pdfsetcolor = \gobble
+  \let\pdfmakeoutlines = \relax
+\fi  % \ifx\pdfoutput
+
+
+\message{fonts,}
+
+% Change the current font style to #1, remembering it in \curfontstyle.
+% For now, we do not accumulate font styles: @b{@i{foo}} prints foo in
+% italics, not bold italics.
+%
+\def\setfontstyle#1{%
+  \def\curfontstyle{#1}% not as a control sequence, because we are \edef'd.
+  \csname ten#1\endcsname  % change the current font
+}
+
+% Select #1 fonts with the current style.
+%
+\def\selectfonts#1{\csname #1fonts\endcsname \csname\curfontstyle\endcsname}
+
+\def\rm{\fam=0 \setfontstyle{rm}}
+\def\it{\fam=\itfam \setfontstyle{it}}
+\def\sl{\fam=\slfam \setfontstyle{sl}}
+\def\bf{\fam=\bffam \setfontstyle{bf}}\def\bfstylename{bf}
+\def\tt{\fam=\ttfam \setfontstyle{tt}}
+
+% Texinfo sort of supports the sans serif font style, which plain TeX does not.
+% So we set up a \sf.
+\newfam\sffam
+\def\sf{\fam=\sffam \setfontstyle{sf}}
+\let\li = \sf % Sometimes we call it \li, not \sf.
+
+% We don't need math for this font style.
+\def\ttsl{\setfontstyle{ttsl}}
+
+
+% Default leading.
+\newdimen\textleading  \textleading = 13.2pt
+
+% Set the baselineskip to #1, and the lineskip and strut size
+% correspondingly.  There is no deep meaning behind these magic numbers
+% used as factors; they just match (closely enough) what Knuth defined.
+%
+\def\lineskipfactor{.08333}
+\def\strutheightpercent{.70833}
+\def\strutdepthpercent {.29167}
+%
+% can get a sort of poor man's double spacing by redefining this.
+\def\baselinefactor{1}
+%
+\def\setleading#1{%
+  \dimen0 = #1\relax
+  \normalbaselineskip = \baselinefactor\dimen0
+  \normallineskip = \lineskipfactor\normalbaselineskip
+  \normalbaselines
+  \setbox\strutbox =\hbox{%
+    \vrule width0pt height\strutheightpercent\baselineskip
+                    depth \strutdepthpercent \baselineskip
+  }%
+}
+
+% PDF CMaps.  See also LaTeX's t1.cmap.
+%
+% do nothing with this by default.
+\expandafter\let\csname cmapOT1\endcsname\gobble
+\expandafter\let\csname cmapOT1IT\endcsname\gobble
+\expandafter\let\csname cmapOT1TT\endcsname\gobble
+
+% if we are producing pdf, and we have \pdffontattr, then define cmaps.
+% (\pdffontattr was introduced many years ago, but people still run
+% older pdftex's; it's easy to conditionalize, so we do.)
+\ifpdf \ifx\pdffontattr\undefined \else
+  \begingroup
+    \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char.
+    \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap
+%%DocumentNeededResources: ProcSet (CIDInit)
+%%IncludeResource: ProcSet (CIDInit)
+%%BeginResource: CMap (TeX-OT1-0)
+%%Title: (TeX-OT1-0 TeX OT1 0)
+%%Version: 1.000
+%%EndComments
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo
+<< /Registry (TeX)
+/Ordering (OT1)
+/Supplement 0
+>> def
+/CMapName /TeX-OT1-0 def
+/CMapType 2 def
+1 begincodespacerange
+<00> <7F>
+endcodespacerange
+8 beginbfrange
+<00> <01> <0393>
+<09> <0A> <03A8>
+<23> <26> <0023>
+<28> <3B> <0028>
+<3F> <5B> <003F>
+<5D> <5E> <005D>
+<61> <7A> <0061>
+<7B> <7C> <2013>
+endbfrange
+40 beginbfchar
+<02> <0398>
+<03> <039B>
+<04> <039E>
+<05> <03A0>
+<06> <03A3>
+<07> <03D2>
+<08> <03A6>
+<0B> <00660066>
+<0C> <00660069>
+<0D> <0066006C>
+<0E> <006600660069>
+<0F> <00660066006C>
+<10> <0131>
+<11> <0237>
+<12> <0060>
+<13> <00B4>
+<14> <02C7>
+<15> <02D8>
+<16> <00AF>
+<17> <02DA>
+<18> <00B8>
+<19> <00DF>
+<1A> <00E6>
+<1B> <0153>
+<1C> <00F8>
+<1D> <00C6>
+<1E> <0152>
+<1F> <00D8>
+<21> <0021>
+<22> <201D>
+<27> <2019>
+<3C> <00A1>
+<3D> <003D>
+<3E> <00BF>
+<5C> <201C>
+<5F> <02D9>
+<60> <2018>
+<7D> <02DD>
+<7E> <007E>
+<7F> <00A8>
+endbfchar
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+%%EndResource
+%%EOF
+    }\endgroup
+  \expandafter\edef\csname cmapOT1\endcsname#1{%
+    \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}%
+  }%
+%
+% \cmapOT1IT
+  \begingroup
+    \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char.
+    \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap
+%%DocumentNeededResources: ProcSet (CIDInit)
+%%IncludeResource: ProcSet (CIDInit)
+%%BeginResource: CMap (TeX-OT1IT-0)
+%%Title: (TeX-OT1IT-0 TeX OT1IT 0)
+%%Version: 1.000
+%%EndComments
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo
+<< /Registry (TeX)
+/Ordering (OT1IT)
+/Supplement 0
+>> def
+/CMapName /TeX-OT1IT-0 def
+/CMapType 2 def
+1 begincodespacerange
+<00> <7F>
+endcodespacerange
+8 beginbfrange
+<00> <01> <0393>
+<09> <0A> <03A8>
+<25> <26> <0025>
+<28> <3B> <0028>
+<3F> <5B> <003F>
+<5D> <5E> <005D>
+<61> <7A> <0061>
+<7B> <7C> <2013>
+endbfrange
+42 beginbfchar
+<02> <0398>
+<03> <039B>
+<04> <039E>
+<05> <03A0>
+<06> <03A3>
+<07> <03D2>
+<08> <03A6>
+<0B> <00660066>
+<0C> <00660069>
+<0D> <0066006C>
+<0E> <006600660069>
+<0F> <00660066006C>
+<10> <0131>
+<11> <0237>
+<12> <0060>
+<13> <00B4>
+<14> <02C7>
+<15> <02D8>
+<16> <00AF>
+<17> <02DA>
+<18> <00B8>
+<19> <00DF>
+<1A> <00E6>
+<1B> <0153>
+<1C> <00F8>
+<1D> <00C6>
+<1E> <0152>
+<1F> <00D8>
+<21> <0021>
+<22> <201D>
+<23> <0023>
+<24> <00A3>
+<27> <2019>
+<3C> <00A1>
+<3D> <003D>
+<3E> <00BF>
+<5C> <201C>
+<5F> <02D9>
+<60> <2018>
+<7D> <02DD>
+<7E> <007E>
+<7F> <00A8>
+endbfchar
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+%%EndResource
+%%EOF
+    }\endgroup
+  \expandafter\edef\csname cmapOT1IT\endcsname#1{%
+    \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}%
+  }%
+%
+% \cmapOT1TT
+  \begingroup
+    \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char.
+    \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap
+%%DocumentNeededResources: ProcSet (CIDInit)
+%%IncludeResource: ProcSet (CIDInit)
+%%BeginResource: CMap (TeX-OT1TT-0)
+%%Title: (TeX-OT1TT-0 TeX OT1TT 0)
+%%Version: 1.000
+%%EndComments
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo
+<< /Registry (TeX)
+/Ordering (OT1TT)
+/Supplement 0
+>> def
+/CMapName /TeX-OT1TT-0 def
+/CMapType 2 def
+1 begincodespacerange
+<00> <7F>
+endcodespacerange
+5 beginbfrange
+<00> <01> <0393>
+<09> <0A> <03A8>
+<21> <26> <0021>
+<28> <5F> <0028>
+<61> <7E> <0061>
+endbfrange
+32 beginbfchar
+<02> <0398>
+<03> <039B>
+<04> <039E>
+<05> <03A0>
+<06> <03A3>
+<07> <03D2>
+<08> <03A6>
+<0B> <2191>
+<0C> <2193>
+<0D> <0027>
+<0E> <00A1>
+<0F> <00BF>
+<10> <0131>
+<11> <0237>
+<12> <0060>
+<13> <00B4>
+<14> <02C7>
+<15> <02D8>
+<16> <00AF>
+<17> <02DA>
+<18> <00B8>
+<19> <00DF>
+<1A> <00E6>
+<1B> <0153>
+<1C> <00F8>
+<1D> <00C6>
+<1E> <0152>
+<1F> <00D8>
+<20> <2423>
+<27> <2019>
+<60> <2018>
+<7F> <00A8>
+endbfchar
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+%%EndResource
+%%EOF
+    }\endgroup
+  \expandafter\edef\csname cmapOT1TT\endcsname#1{%
+    \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}%
+  }%
+\fi\fi
+
+
+% Set the font macro #1 to the font named #2, adding on the
+% specified font prefix (normally `cm').
+% #3 is the font's design size, #4 is a scale factor, #5 is the CMap
+% encoding (currently only OT1, OT1IT and OT1TT are allowed, pass
+% empty to omit).
+\def\setfont#1#2#3#4#5{%
+  \font#1=\fontprefix#2#3 scaled #4
+  \csname cmap#5\endcsname#1%
+}
+% This is what gets called when #5 of \setfont is empty.
+\let\cmap\gobble
+% emacs-page end of cmaps
+
+% Use cm as the default font prefix.
+% To specify the font prefix, you must define \fontprefix
+% before you read in texinfo.tex.
+\ifx\fontprefix\undefined
+\def\fontprefix{cm}
+\fi
+% Support font families that don't use the same naming scheme as CM.
+\def\rmshape{r}
+\def\rmbshape{bx}               %where the normal face is bold
+\def\bfshape{b}
+\def\bxshape{bx}
+\def\ttshape{tt}
+\def\ttbshape{tt}
+\def\ttslshape{sltt}
+\def\itshape{ti}
+\def\itbshape{bxti}
+\def\slshape{sl}
+\def\slbshape{bxsl}
+\def\sfshape{ss}
+\def\sfbshape{ss}
+\def\scshape{csc}
+\def\scbshape{csc}
+
+% Definitions for a main text size of 11pt.  This is the default in
+% Texinfo.
+% 
+\def\definetextfontsizexi{%
+% Text fonts (11.2pt, magstep1).
+\def\textnominalsize{11pt}
+\edef\mainmagstep{\magstephalf}
+\setfont\textrm\rmshape{10}{\mainmagstep}{OT1}
+\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT}
+\setfont\textbf\bfshape{10}{\mainmagstep}{OT1}
+\setfont\textit\itshape{10}{\mainmagstep}{OT1IT}
+\setfont\textsl\slshape{10}{\mainmagstep}{OT1}
+\setfont\textsf\sfshape{10}{\mainmagstep}{OT1}
+\setfont\textsc\scshape{10}{\mainmagstep}{OT1}
+\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT}
+\font\texti=cmmi10 scaled \mainmagstep
+\font\textsy=cmsy10 scaled \mainmagstep
+\def\textecsize{1095}
+
+% A few fonts for @defun names and args.
+\setfont\defbf\bfshape{10}{\magstep1}{OT1}
+\setfont\deftt\ttshape{10}{\magstep1}{OT1TT}
+\setfont\defttsl\ttslshape{10}{\magstep1}{OT1TT}
+\def\df{\let\tentt=\deftt \let\tenbf = \defbf \let\tenttsl=\defttsl \bf}
+
+% Fonts for indices, footnotes, small examples (9pt).
+\def\smallnominalsize{9pt}
+\setfont\smallrm\rmshape{9}{1000}{OT1}
+\setfont\smalltt\ttshape{9}{1000}{OT1TT}
+\setfont\smallbf\bfshape{10}{900}{OT1}
+\setfont\smallit\itshape{9}{1000}{OT1IT}
+\setfont\smallsl\slshape{9}{1000}{OT1}
+\setfont\smallsf\sfshape{9}{1000}{OT1}
+\setfont\smallsc\scshape{10}{900}{OT1}
+\setfont\smallttsl\ttslshape{10}{900}{OT1TT}
+\font\smalli=cmmi9
+\font\smallsy=cmsy9
+\def\smallecsize{0900}
+
+% Fonts for small examples (8pt).
+\def\smallernominalsize{8pt}
+\setfont\smallerrm\rmshape{8}{1000}{OT1}
+\setfont\smallertt\ttshape{8}{1000}{OT1TT}
+\setfont\smallerbf\bfshape{10}{800}{OT1}
+\setfont\smallerit\itshape{8}{1000}{OT1IT}
+\setfont\smallersl\slshape{8}{1000}{OT1}
+\setfont\smallersf\sfshape{8}{1000}{OT1}
+\setfont\smallersc\scshape{10}{800}{OT1}
+\setfont\smallerttsl\ttslshape{10}{800}{OT1TT}
+\font\smalleri=cmmi8
+\font\smallersy=cmsy8
+\def\smallerecsize{0800}
+
+% Fonts for title page (20.4pt):
+\def\titlenominalsize{20pt}
+\setfont\titlerm\rmbshape{12}{\magstep3}{OT1}
+\setfont\titleit\itbshape{10}{\magstep4}{OT1IT}
+\setfont\titlesl\slbshape{10}{\magstep4}{OT1}
+\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT}
+\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT}
+\setfont\titlesf\sfbshape{17}{\magstep1}{OT1}
+\let\titlebf=\titlerm
+\setfont\titlesc\scbshape{10}{\magstep4}{OT1}
+\font\titlei=cmmi12 scaled \magstep3
+\font\titlesy=cmsy10 scaled \magstep4
+\def\authorrm{\secrm}
+\def\authortt{\sectt}
+\def\titleecsize{2074}
+
+% Chapter (and unnumbered) fonts (17.28pt).
+\def\chapnominalsize{17pt}
+\setfont\chaprm\rmbshape{12}{\magstep2}{OT1}
+\setfont\chapit\itbshape{10}{\magstep3}{OT1IT}
+\setfont\chapsl\slbshape{10}{\magstep3}{OT1}
+\setfont\chaptt\ttbshape{12}{\magstep2}{OT1TT}
+\setfont\chapttsl\ttslshape{10}{\magstep3}{OT1TT}
+\setfont\chapsf\sfbshape{17}{1000}{OT1}
+\let\chapbf=\chaprm
+\setfont\chapsc\scbshape{10}{\magstep3}{OT1}
+\font\chapi=cmmi12 scaled \magstep2
+\font\chapsy=cmsy10 scaled \magstep3
+\def\chapecsize{1728}
+
+% Section fonts (14.4pt).
+\def\secnominalsize{14pt}
+\setfont\secrm\rmbshape{12}{\magstep1}{OT1}
+\setfont\secit\itbshape{10}{\magstep2}{OT1IT}
+\setfont\secsl\slbshape{10}{\magstep2}{OT1}
+\setfont\sectt\ttbshape{12}{\magstep1}{OT1TT}
+\setfont\secttsl\ttslshape{10}{\magstep2}{OT1TT}
+\setfont\secsf\sfbshape{12}{\magstep1}{OT1}
+\let\secbf\secrm
+\setfont\secsc\scbshape{10}{\magstep2}{OT1}
+\font\seci=cmmi12 scaled \magstep1
+\font\secsy=cmsy10 scaled \magstep2
+\def\sececsize{1440}
+
+% Subsection fonts (13.15pt).
+\def\ssecnominalsize{13pt}
+\setfont\ssecrm\rmbshape{12}{\magstephalf}{OT1}
+\setfont\ssecit\itbshape{10}{1315}{OT1IT}
+\setfont\ssecsl\slbshape{10}{1315}{OT1}
+\setfont\ssectt\ttbshape{12}{\magstephalf}{OT1TT}
+\setfont\ssecttsl\ttslshape{10}{1315}{OT1TT}
+\setfont\ssecsf\sfbshape{12}{\magstephalf}{OT1}
+\let\ssecbf\ssecrm
+\setfont\ssecsc\scbshape{10}{1315}{OT1}
+\font\sseci=cmmi12 scaled \magstephalf
+\font\ssecsy=cmsy10 scaled 1315
+\def\ssececsize{1200}
+
+% Reduced fonts for @acro in text (10pt).
+\def\reducednominalsize{10pt}
+\setfont\reducedrm\rmshape{10}{1000}{OT1}
+\setfont\reducedtt\ttshape{10}{1000}{OT1TT}
+\setfont\reducedbf\bfshape{10}{1000}{OT1}
+\setfont\reducedit\itshape{10}{1000}{OT1IT}
+\setfont\reducedsl\slshape{10}{1000}{OT1}
+\setfont\reducedsf\sfshape{10}{1000}{OT1}
+\setfont\reducedsc\scshape{10}{1000}{OT1}
+\setfont\reducedttsl\ttslshape{10}{1000}{OT1TT}
+\font\reducedi=cmmi10
+\font\reducedsy=cmsy10
+\def\reducedecsize{1000}
+
+% reset the current fonts
+\textfonts
+\rm
+} % end of 11pt text font size definitions
+
+
+% Definitions to make the main text be 10pt Computer Modern, with
+% section, chapter, etc., sizes following suit.  This is for the GNU
+% Press printing of the Emacs 22 manual.  Maybe other manuals in the
+% future.  Used with @smallbook, which sets the leading to 12pt.
+% 
+\def\definetextfontsizex{%
+% Text fonts (10pt).
+\def\textnominalsize{10pt}
+\edef\mainmagstep{1000}
+\setfont\textrm\rmshape{10}{\mainmagstep}{OT1}
+\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT}
+\setfont\textbf\bfshape{10}{\mainmagstep}{OT1}
+\setfont\textit\itshape{10}{\mainmagstep}{OT1IT}
+\setfont\textsl\slshape{10}{\mainmagstep}{OT1}
+\setfont\textsf\sfshape{10}{\mainmagstep}{OT1}
+\setfont\textsc\scshape{10}{\mainmagstep}{OT1}
+\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT}
+\font\texti=cmmi10 scaled \mainmagstep
+\font\textsy=cmsy10 scaled \mainmagstep
+\def\textecsize{1000}
+
+% A few fonts for @defun names and args.
+\setfont\defbf\bfshape{10}{\magstephalf}{OT1}
+\setfont\deftt\ttshape{10}{\magstephalf}{OT1TT}
+\setfont\defttsl\ttslshape{10}{\magstephalf}{OT1TT}
+\def\df{\let\tentt=\deftt \let\tenbf = \defbf \let\tenttsl=\defttsl \bf}
+
+% Fonts for indices, footnotes, small examples (9pt).
+\def\smallnominalsize{9pt}
+\setfont\smallrm\rmshape{9}{1000}{OT1}
+\setfont\smalltt\ttshape{9}{1000}{OT1TT}
+\setfont\smallbf\bfshape{10}{900}{OT1}
+\setfont\smallit\itshape{9}{1000}{OT1IT}
+\setfont\smallsl\slshape{9}{1000}{OT1}
+\setfont\smallsf\sfshape{9}{1000}{OT1}
+\setfont\smallsc\scshape{10}{900}{OT1}
+\setfont\smallttsl\ttslshape{10}{900}{OT1TT}
+\font\smalli=cmmi9
+\font\smallsy=cmsy9
+\def\smallecsize{0900}
+
+% Fonts for small examples (8pt).
+\def\smallernominalsize{8pt}
+\setfont\smallerrm\rmshape{8}{1000}{OT1}
+\setfont\smallertt\ttshape{8}{1000}{OT1TT}
+\setfont\smallerbf\bfshape{10}{800}{OT1}
+\setfont\smallerit\itshape{8}{1000}{OT1IT}
+\setfont\smallersl\slshape{8}{1000}{OT1}
+\setfont\smallersf\sfshape{8}{1000}{OT1}
+\setfont\smallersc\scshape{10}{800}{OT1}
+\setfont\smallerttsl\ttslshape{10}{800}{OT1TT}
+\font\smalleri=cmmi8
+\font\smallersy=cmsy8
+\def\smallerecsize{0800}
+
+% Fonts for title page (20.4pt):
+\def\titlenominalsize{20pt}
+\setfont\titlerm\rmbshape{12}{\magstep3}{OT1}
+\setfont\titleit\itbshape{10}{\magstep4}{OT1IT}
+\setfont\titlesl\slbshape{10}{\magstep4}{OT1}
+\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT}
+\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT}
+\setfont\titlesf\sfbshape{17}{\magstep1}{OT1}
+\let\titlebf=\titlerm
+\setfont\titlesc\scbshape{10}{\magstep4}{OT1}
+\font\titlei=cmmi12 scaled \magstep3
+\font\titlesy=cmsy10 scaled \magstep4
+\def\authorrm{\secrm}
+\def\authortt{\sectt}
+\def\titleecsize{2074}
+
+% Chapter fonts (14.4pt).
+\def\chapnominalsize{14pt}
+\setfont\chaprm\rmbshape{12}{\magstep1}{OT1}
+\setfont\chapit\itbshape{10}{\magstep2}{OT1IT}
+\setfont\chapsl\slbshape{10}{\magstep2}{OT1}
+\setfont\chaptt\ttbshape{12}{\magstep1}{OT1TT}
+\setfont\chapttsl\ttslshape{10}{\magstep2}{OT1TT}
+\setfont\chapsf\sfbshape{12}{\magstep1}{OT1}
+\let\chapbf\chaprm
+\setfont\chapsc\scbshape{10}{\magstep2}{OT1}
+\font\chapi=cmmi12 scaled \magstep1
+\font\chapsy=cmsy10 scaled \magstep2
+\def\chapecsize{1440}
+
+% Section fonts (12pt).
+\def\secnominalsize{12pt}
+\setfont\secrm\rmbshape{12}{1000}{OT1}
+\setfont\secit\itbshape{10}{\magstep1}{OT1IT}
+\setfont\secsl\slbshape{10}{\magstep1}{OT1}
+\setfont\sectt\ttbshape{12}{1000}{OT1TT}
+\setfont\secttsl\ttslshape{10}{\magstep1}{OT1TT}
+\setfont\secsf\sfbshape{12}{1000}{OT1}
+\let\secbf\secrm
+\setfont\secsc\scbshape{10}{\magstep1}{OT1}
+\font\seci=cmmi12 
+\font\secsy=cmsy10 scaled \magstep1
+\def\sececsize{1200}
+
+% Subsection fonts (10pt).
+\def\ssecnominalsize{10pt}
+\setfont\ssecrm\rmbshape{10}{1000}{OT1}
+\setfont\ssecit\itbshape{10}{1000}{OT1IT}
+\setfont\ssecsl\slbshape{10}{1000}{OT1}
+\setfont\ssectt\ttbshape{10}{1000}{OT1TT}
+\setfont\ssecttsl\ttslshape{10}{1000}{OT1TT}
+\setfont\ssecsf\sfbshape{10}{1000}{OT1}
+\let\ssecbf\ssecrm
+\setfont\ssecsc\scbshape{10}{1000}{OT1}
+\font\sseci=cmmi10
+\font\ssecsy=cmsy10
+\def\ssececsize{1000}
+
+% Reduced fonts for @acro in text (9pt).
+\def\reducednominalsize{9pt}
+\setfont\reducedrm\rmshape{9}{1000}{OT1}
+\setfont\reducedtt\ttshape{9}{1000}{OT1TT}
+\setfont\reducedbf\bfshape{10}{900}{OT1}
+\setfont\reducedit\itshape{9}{1000}{OT1IT}
+\setfont\reducedsl\slshape{9}{1000}{OT1}
+\setfont\reducedsf\sfshape{9}{1000}{OT1}
+\setfont\reducedsc\scshape{10}{900}{OT1}
+\setfont\reducedttsl\ttslshape{10}{900}{OT1TT}
+\font\reducedi=cmmi9
+\font\reducedsy=cmsy9
+\def\reducedecsize{0900}
+
+% reduce space between paragraphs
+\divide\parskip by 2
+
+% reset the current fonts
+\textfonts
+\rm
+} % end of 10pt text font size definitions
+
+
+% We provide the user-level command
+%   @fonttextsize 10
+% (or 11) to redefine the text font size.  pt is assumed.
+% 
+\def\xword{10}
+\def\xiword{11}
+%
+\parseargdef\fonttextsize{%
+  \def\textsizearg{#1}%
+  \wlog{doing @fonttextsize \textsizearg}%
+  %
+  % Set \globaldefs so that documents can use this inside @tex, since
+  % makeinfo 4.8 does not support it, but we need it nonetheless.
+  % 
+ \begingroup \globaldefs=1
+  \ifx\textsizearg\xword \definetextfontsizex
+  \else \ifx\textsizearg\xiword \definetextfontsizexi
+  \else
+    \errhelp=\EMsimple
+    \errmessage{@fonttextsize only supports `10' or `11', not `\textsizearg'}
+  \fi\fi
+ \endgroup
+}
+
+
+% In order for the font changes to affect most math symbols and letters,
+% we have to define the \textfont of the standard families.  Since
+% texinfo doesn't allow for producing subscripts and superscripts except
+% in the main text, we don't bother to reset \scriptfont and
+% \scriptscriptfont (which would also require loading a lot more fonts).
+%
+\def\resetmathfonts{%
+  \textfont0=\tenrm \textfont1=\teni \textfont2=\tensy
+  \textfont\itfam=\tenit \textfont\slfam=\tensl \textfont\bffam=\tenbf
+  \textfont\ttfam=\tentt \textfont\sffam=\tensf
+}
+
+% The font-changing commands redefine the meanings of \tenSTYLE, instead
+% of just \STYLE.  We do this because \STYLE needs to also set the
+% current \fam for math mode.  Our \STYLE (e.g., \rm) commands hardwire
+% \tenSTYLE to set the current font.
+%
+% Each font-changing command also sets the names \lsize (one size lower)
+% and \lllsize (three sizes lower).  These relative commands are used in
+% the LaTeX logo and acronyms.
+%
+% This all needs generalizing, badly.
+%
+\def\textfonts{%
+  \let\tenrm=\textrm \let\tenit=\textit \let\tensl=\textsl
+  \let\tenbf=\textbf \let\tentt=\texttt \let\smallcaps=\textsc
+  \let\tensf=\textsf \let\teni=\texti \let\tensy=\textsy
+  \let\tenttsl=\textttsl
+  \def\curfontsize{text}%
+  \def\lsize{reduced}\def\lllsize{smaller}%
+  \resetmathfonts \setleading{\textleading}}
+\def\titlefonts{%
+  \let\tenrm=\titlerm \let\tenit=\titleit \let\tensl=\titlesl
+  \let\tenbf=\titlebf \let\tentt=\titlett \let\smallcaps=\titlesc
+  \let\tensf=\titlesf \let\teni=\titlei \let\tensy=\titlesy
+  \let\tenttsl=\titlettsl
+  \def\curfontsize{title}%
+  \def\lsize{chap}\def\lllsize{subsec}%
+  \resetmathfonts \setleading{25pt}}
+\def\titlefont#1{{\titlefonts\rm #1}}
+\def\chapfonts{%
+  \let\tenrm=\chaprm \let\tenit=\chapit \let\tensl=\chapsl
+  \let\tenbf=\chapbf \let\tentt=\chaptt \let\smallcaps=\chapsc
+  \let\tensf=\chapsf \let\teni=\chapi \let\tensy=\chapsy
+  \let\tenttsl=\chapttsl
+  \def\curfontsize{chap}%
+  \def\lsize{sec}\def\lllsize{text}%
+  \resetmathfonts \setleading{19pt}}
+\def\secfonts{%
+  \let\tenrm=\secrm \let\tenit=\secit \let\tensl=\secsl
+  \let\tenbf=\secbf \let\tentt=\sectt \let\smallcaps=\secsc
+  \let\tensf=\secsf \let\teni=\seci \let\tensy=\secsy
+  \let\tenttsl=\secttsl
+  \def\curfontsize{sec}%
+  \def\lsize{subsec}\def\lllsize{reduced}%
+  \resetmathfonts \setleading{16pt}}
+\def\subsecfonts{%
+  \let\tenrm=\ssecrm \let\tenit=\ssecit \let\tensl=\ssecsl
+  \let\tenbf=\ssecbf \let\tentt=\ssectt \let\smallcaps=\ssecsc
+  \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy
+  \let\tenttsl=\ssecttsl
+  \def\curfontsize{ssec}%
+  \def\lsize{text}\def\lllsize{small}%
+  \resetmathfonts \setleading{15pt}}
+\let\subsubsecfonts = \subsecfonts
+\def\reducedfonts{%
+  \let\tenrm=\reducedrm \let\tenit=\reducedit \let\tensl=\reducedsl
+  \let\tenbf=\reducedbf \let\tentt=\reducedtt \let\reducedcaps=\reducedsc
+  \let\tensf=\reducedsf \let\teni=\reducedi \let\tensy=\reducedsy
+  \let\tenttsl=\reducedttsl
+  \def\curfontsize{reduced}%
+  \def\lsize{small}\def\lllsize{smaller}%
+  \resetmathfonts \setleading{10.5pt}}
+\def\smallfonts{%
+  \let\tenrm=\smallrm \let\tenit=\smallit \let\tensl=\smallsl
+  \let\tenbf=\smallbf \let\tentt=\smalltt \let\smallcaps=\smallsc
+  \let\tensf=\smallsf \let\teni=\smalli \let\tensy=\smallsy
+  \let\tenttsl=\smallttsl
+  \def\curfontsize{small}%
+  \def\lsize{smaller}\def\lllsize{smaller}%
+  \resetmathfonts \setleading{10.5pt}}
+\def\smallerfonts{%
+  \let\tenrm=\smallerrm \let\tenit=\smallerit \let\tensl=\smallersl
+  \let\tenbf=\smallerbf \let\tentt=\smallertt \let\smallcaps=\smallersc
+  \let\tensf=\smallersf \let\teni=\smalleri \let\tensy=\smallersy
+  \let\tenttsl=\smallerttsl
+  \def\curfontsize{smaller}%
+  \def\lsize{smaller}\def\lllsize{smaller}%
+  \resetmathfonts \setleading{9.5pt}}
+
+% Set the fonts to use with the @small... environments.
+\let\smallexamplefonts = \smallfonts
+
+% About \smallexamplefonts.  If we use \smallfonts (9pt), @smallexample
+% can fit this many characters:
+%   8.5x11=86   smallbook=72  a4=90  a5=69
+% If we use \scriptfonts (8pt), then we can fit this many characters:
+%   8.5x11=90+  smallbook=80  a4=90+  a5=77
+% For me, subjectively, the few extra characters that fit aren't worth
+% the additional smallness of 8pt.  So I'm making the default 9pt.
+%
+% By the way, for comparison, here's what fits with @example (10pt):
+%   8.5x11=71  smallbook=60  a4=75  a5=58
+%
+% I wish the USA used A4 paper.
+% --karl, 24jan03.
+
+
+% Set up the default fonts, so we can use them for creating boxes.
+%
+\definetextfontsizexi
+
+% Define these so they can be easily changed for other fonts.
+\def\angleleft{$\langle$}
+\def\angleright{$\rangle$}
+
+% Count depth in font-changes, for error checks
+\newcount\fontdepth \fontdepth=0
+
+% Fonts for short table of contents.
+\setfont\shortcontrm\rmshape{12}{1000}{OT1}
+\setfont\shortcontbf\bfshape{10}{\magstep1}{OT1}  % no cmb12
+\setfont\shortcontsl\slshape{12}{1000}{OT1}
+\setfont\shortconttt\ttshape{12}{1000}{OT1TT}
+
+%% Add scribe-like font environments, plus @l for inline lisp (usually sans
+%% serif) and @ii for TeX italic
+
+% \smartitalic{ARG} outputs arg in italics, followed by an italic correction
+% unless the following character is such as not to need one.
+\def\smartitalicx{\ifx\next,\else\ifx\next-\else\ifx\next.\else
+                    \ptexslash\fi\fi\fi}
+\def\smartslanted#1{{\ifusingtt\ttsl\sl #1}\futurelet\next\smartitalicx}
+\def\smartitalic#1{{\ifusingtt\ttsl\it #1}\futurelet\next\smartitalicx}
+
+% like \smartslanted except unconditionally uses \ttsl.
+% @var is set to this for defun arguments.
+\def\ttslanted#1{{\ttsl #1}\futurelet\next\smartitalicx}
+
+% like \smartslanted except unconditionally use \sl.  We never want
+% ttsl for book titles, do we?
+\def\cite#1{{\sl #1}\futurelet\next\smartitalicx}
+
+\let\i=\smartitalic
+\let\slanted=\smartslanted
+\let\var=\smartslanted
+\let\dfn=\smartslanted
+\let\emph=\smartitalic
+
+% @b, explicit bold.
+\def\b#1{{\bf #1}}
+\let\strong=\b
+
+% @sansserif, explicit sans.
+\def\sansserif#1{{\sf #1}}
+
+% We can't just use \exhyphenpenalty, because that only has effect at
+% the end of a paragraph.  Restore normal hyphenation at the end of the
+% group within which \nohyphenation is presumably called.
+%
+\def\nohyphenation{\hyphenchar\font = -1  \aftergroup\restorehyphenation}
+\def\restorehyphenation{\hyphenchar\font = `- }
+
+% Set sfcode to normal for the chars that usually have another value.
+% Can't use plain's \frenchspacing because it uses the `\x notation, and
+% sometimes \x has an active definition that messes things up.
+%
+\catcode`@=11
+  \def\plainfrenchspacing{%
+    \sfcode\dotChar  =\@m \sfcode\questChar=\@m \sfcode\exclamChar=\@m
+    \sfcode\colonChar=\@m \sfcode\semiChar =\@m \sfcode\commaChar =\@m
+    \def\endofsentencespacefactor{1000}% for @. and friends
+  }
+  \def\plainnonfrenchspacing{%
+    \sfcode`\.3000\sfcode`\?3000\sfcode`\!3000
+    \sfcode`\:2000\sfcode`\;1500\sfcode`\,1250
+    \def\endofsentencespacefactor{3000}% for @. and friends
+  }
+\catcode`@=\other
+\def\endofsentencespacefactor{3000}% default
+
+\def\t#1{%
+  {\tt \rawbackslash \plainfrenchspacing #1}%
+  \null
+}
+\def\samp#1{`\tclose{#1}'\null}
+\setfont\keyrm\rmshape{8}{1000}{OT1}
+\font\keysy=cmsy9
+\def\key#1{{\keyrm\textfont2=\keysy \leavevmode\hbox{%
+  \raise0.4pt\hbox{\angleleft}\kern-.08em\vtop{%
+    \vbox{\hrule\kern-0.4pt
+     \hbox{\raise0.4pt\hbox{\vphantom{\angleleft}}#1}}%
+    \kern-0.4pt\hrule}%
+  \kern-.06em\raise0.4pt\hbox{\angleright}}}}
+\def\key #1{{\nohyphenation \uppercase{#1}}\null}
+% The old definition, with no lozenge:
+%\def\key #1{{\ttsl \nohyphenation \uppercase{#1}}\null}
+\def\ctrl #1{{\tt \rawbackslash \hat}#1}
+
+% @file, @option are the same as @samp.
+\let\file=\samp
+\let\option=\samp
+
+% @code is a modification of @t,
+% which makes spaces the same size as normal in the surrounding text.
+\def\tclose#1{%
+  {%
+    % Change normal interword space to be same as for the current font.
+    \spaceskip = \fontdimen2\font
+    %
+    % Switch to typewriter.
+    \tt
+    %
+    % But `\ ' produces the large typewriter interword space.
+    \def\ {{\spaceskip = 0pt{} }}%
+    %
+    % Turn off hyphenation.
+    \nohyphenation
+    %
+    \rawbackslash
+    \plainfrenchspacing
+    #1%
+  }%
+  \null
+}
+
+% We *must* turn on hyphenation at `-' and `_' in @code.
+% Otherwise, it is too hard to avoid overfull hboxes
+% in the Emacs manual, the Library manual, etc.
+
+% Unfortunately, TeX uses one parameter (\hyphenchar) to control
+% both hyphenation at - and hyphenation within words.
+% We must therefore turn them both off (\tclose does that)
+% and arrange explicitly to hyphenate at a dash.
+%  -- rms.
+{
+  \catcode`\-=\active \catcode`\_=\active
+  \catcode`\'=\active \catcode`\`=\active
+  %
+  \global\def\code{\begingroup
+    \catcode\rquoteChar=\active \catcode\lquoteChar=\active
+    \let'\codequoteright \let`\codequoteleft
+    %
+    \catcode\dashChar=\active  \catcode\underChar=\active
+    \ifallowcodebreaks
+     \let-\codedash
+     \let_\codeunder
+    \else
+     \let-\realdash
+     \let_\realunder
+    \fi
+    \codex
+  }
+}
+
+\def\realdash{-}
+\def\codedash{-\discretionary{}{}{}}
+\def\codeunder{%
+  % this is all so @math{@code{var_name}+1} can work.  In math mode, _
+  % is "active" (mathcode"8000) and \normalunderscore (or \char95, etc.)
+  % will therefore expand the active definition of _, which is us
+  % (inside @code that is), therefore an endless loop.
+  \ifusingtt{\ifmmode
+               \mathchar"075F % class 0=ordinary, family 7=ttfam, pos 0x5F=_.
+             \else\normalunderscore \fi
+             \discretionary{}{}{}}%
+            {\_}%
+}
+\def\codex #1{\tclose{#1}\endgroup}
+
+% An additional complication: the above will allow breaks after, e.g.,
+% each of the four underscores in __typeof__.  This is undesirable in
+% some manuals, especially if they don't have long identifiers in
+% general.  @allowcodebreaks provides a way to control this.
+% 
+\newif\ifallowcodebreaks  \allowcodebreakstrue
+
+\def\keywordtrue{true}
+\def\keywordfalse{false}
+
+\parseargdef\allowcodebreaks{%
+  \def\txiarg{#1}%
+  \ifx\txiarg\keywordtrue
+    \allowcodebreakstrue
+  \else\ifx\txiarg\keywordfalse
+    \allowcodebreaksfalse
+  \else
+    \errhelp = \EMsimple
+    \errmessage{Unknown @allowcodebreaks option `\txiarg'}%
+  \fi\fi
+}
+
+% @kbd is like @code, except that if the argument is just one @key command,
+% then @kbd has no effect.
+
+% @kbdinputstyle -- arg is `distinct' (@kbd uses slanted tty font always),
+%   `example' (@kbd uses ttsl only inside of @example and friends),
+%   or `code' (@kbd uses normal tty font always).
+\parseargdef\kbdinputstyle{%
+  \def\txiarg{#1}%
+  \ifx\txiarg\worddistinct
+    \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\ttsl}%
+  \else\ifx\txiarg\wordexample
+    \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\tt}%
+  \else\ifx\txiarg\wordcode
+    \gdef\kbdexamplefont{\tt}\gdef\kbdfont{\tt}%
+  \else
+    \errhelp = \EMsimple
+    \errmessage{Unknown @kbdinputstyle option `\txiarg'}%
+  \fi\fi\fi
+}
+\def\worddistinct{distinct}
+\def\wordexample{example}
+\def\wordcode{code}
+
+% Default is `distinct.'
+\kbdinputstyle distinct
+
+\def\xkey{\key}
+\def\kbdfoo#1#2#3\par{\def\one{#1}\def\three{#3}\def\threex{??}%
+\ifx\one\xkey\ifx\threex\three \key{#2}%
+\else{\tclose{\kbdfont\look}}\fi
+\else{\tclose{\kbdfont\look}}\fi}
+
+% For @indicateurl, @env, @command quotes seem unnecessary, so use \code.
+\let\indicateurl=\code
+\let\env=\code
+\let\command=\code
+
+% @clicksequence{File @click{} Open ...}
+\def\clicksequence#1{\begingroup #1\endgroup}
+
+% @clickstyle @arrow   (by default)
+\parseargdef\clickstyle{\def\click{#1}}
+\def\click{\arrow}
+
+% @uref (abbreviation for `urlref') takes an optional (comma-separated)
+% second argument specifying the text to display and an optional third
+% arg as text to display instead of (rather than in addition to) the url
+% itself.  First (mandatory) arg is the url.  Perhaps eventually put in
+% a hypertex \special here.
+%
+\def\uref#1{\douref #1,,,\finish}
+\def\douref#1,#2,#3,#4\finish{\begingroup
+  \unsepspaces
+  \pdfurl{#1}%
+  \setbox0 = \hbox{\ignorespaces #3}%
+  \ifdim\wd0 > 0pt
+    \unhbox0 % third arg given, show only that
+  \else
+    \setbox0 = \hbox{\ignorespaces #2}%
+    \ifdim\wd0 > 0pt
+      \ifpdf
+        \unhbox0             % PDF: 2nd arg given, show only it
+      \else
+        \unhbox0\ (\code{#1})% DVI: 2nd arg given, show both it and url
+      \fi
+    \else
+      \code{#1}% only url given, so show it
+    \fi
+  \fi
+  \endlink
+\endgroup}
+
+% @url synonym for @uref, since that's how everyone uses it.
+%
+\let\url=\uref
+
+% rms does not like angle brackets --karl, 17may97.
+% So now @email is just like @uref, unless we are pdf.
+%
+%\def\email#1{\angleleft{\tt #1}\angleright}
+\ifpdf
+  \def\email#1{\doemail#1,,\finish}
+  \def\doemail#1,#2,#3\finish{\begingroup
+    \unsepspaces
+    \pdfurl{mailto:#1}%
+    \setbox0 = \hbox{\ignorespaces #2}%
+    \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi
+    \endlink
+  \endgroup}
+\else
+  \let\email=\uref
+\fi
+
+% Check if we are currently using a typewriter font.  Since all the
+% Computer Modern typewriter fonts have zero interword stretch (and
+% shrink), and it is reasonable to expect all typewriter fonts to have
+% this property, we can check that font parameter.
+%
+\def\ifmonospace{\ifdim\fontdimen3\font=0pt }
+
+% Typeset a dimension, e.g., `in' or `pt'.  The only reason for the
+% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt.
+%
+\def\dmn#1{\thinspace #1}
+
+\def\kbd#1{\def\look{#1}\expandafter\kbdfoo\look??\par}
+
+% @l was never documented to mean ``switch to the Lisp font'',
+% and it is not used as such in any manual I can find.  We need it for
+% Polish suppressed-l.  --karl, 22sep96.
+%\def\l#1{{\li #1}\null}
+
+% Explicit font changes: @r, @sc, undocumented @ii.
+\def\r#1{{\rm #1}}              % roman font
+\def\sc#1{{\smallcaps#1}}       % smallcaps font
+\def\ii#1{{\it #1}}             % italic font
+
+% @acronym for "FBI", "NATO", and the like.
+% We print this one point size smaller, since it's intended for
+% all-uppercase.
+% 
+\def\acronym#1{\doacronym #1,,\finish}
+\def\doacronym#1,#2,#3\finish{%
+  {\selectfonts\lsize #1}%
+  \def\temp{#2}%
+  \ifx\temp\empty \else
+    \space ({\unsepspaces \ignorespaces \temp \unskip})%
+  \fi
+}
+
+% @abbr for "Comput. J." and the like.
+% No font change, but don't do end-of-sentence spacing.
+% 
+\def\abbr#1{\doabbr #1,,\finish}
+\def\doabbr#1,#2,#3\finish{%
+  {\plainfrenchspacing #1}%
+  \def\temp{#2}%
+  \ifx\temp\empty \else
+    \space ({\unsepspaces \ignorespaces \temp \unskip})%
+  \fi
+}
+
+% @pounds{} is a sterling sign, which Knuth put in the CM italic font.
+%
+\def\pounds{{\it\$}}
+
+% @euro{} comes from a separate font, depending on the current style.
+% We use the free feym* fonts from the eurosym package by Henrik
+% Theiling, which support regular, slanted, bold and bold slanted (and
+% "outlined" (blackboard board, sort of) versions, which we don't need).
+% It is available from http://www.ctan.org/tex-archive/fonts/eurosym.
+% 
+% Although only regular is the truly official Euro symbol, we ignore
+% that.  The Euro is designed to be slightly taller than the regular
+% font height.
+% 
+% feymr - regular
+% feymo - slanted
+% feybr - bold
+% feybo - bold slanted
+% 
+% There is no good (free) typewriter version, to my knowledge.
+% A feymr10 euro is ~7.3pt wide, while a normal cmtt10 char is ~5.25pt wide.
+% Hmm.
+% 
+% Also doesn't work in math.  Do we need to do math with euro symbols?
+% Hope not.
+% 
+% 
+\def\euro{{\eurofont e}}
+\def\eurofont{%
+  % We set the font at each command, rather than predefining it in
+  % \textfonts and the other font-switching commands, so that
+  % installations which never need the symbol don't have to have the
+  % font installed.
+  % 
+  % There is only one designed size (nominal 10pt), so we always scale
+  % that to the current nominal size.
+  % 
+  % By the way, simply using "at 1em" works for cmr10 and the like, but
+  % does not work for cmbx10 and other extended/shrunken fonts.
+  % 
+  \def\eurosize{\csname\curfontsize nominalsize\endcsname}%
+  %
+  \ifx\curfontstyle\bfstylename 
+    % bold:
+    \font\thiseurofont = \ifusingit{feybo10}{feybr10} at \eurosize
+  \else 
+    % regular:
+    \font\thiseurofont = \ifusingit{feymo10}{feymr10} at \eurosize
+  \fi
+  \thiseurofont
+}
+
+% Hacks for glyphs from the EC fonts similar to \euro.  We don't
+% use \let for the aliases, because sometimes we redefine the original
+% macro, and the alias should reflect the redefinition.
+\def\guillemetleft{{\ecfont \char"13}}
+\def\guillemotleft{\guillemetleft}
+\def\guillemetright{{\ecfont \char"14}}
+\def\guillemotright{\guillemetright}
+\def\guilsinglleft{{\ecfont \char"0E}}
+\def\guilsinglright{{\ecfont \char"0F}}
+\def\quotedblbase{{\ecfont \char"12}}
+\def\quotesinglbase{{\ecfont \char"0D}}
+%
+\def\ecfont{%
+  % We can't distinguish serif/sanserif and italic/slanted, but this
+  % is used for crude hacks anyway (like adding French and German
+  % quotes to documents typeset with CM, where we lose kerning), so
+  % hopefully nobody will notice/care.
+  \edef\ecsize{\csname\curfontsize ecsize\endcsname}%
+  \edef\nominalsize{\csname\curfontsize nominalsize\endcsname}%
+  \ifx\curfontstyle\bfstylename
+    % bold:
+    \font\thisecfont = ecb\ifusingit{i}{x}\ecsize \space at \nominalsize
+  \else
+    % regular:
+    \font\thisecfont = ec\ifusingit{ti}{rm}\ecsize \space at \nominalsize
+  \fi
+  \thisecfont
+}
+
+% @registeredsymbol - R in a circle.  The font for the R should really
+% be smaller yet, but lllsize is the best we can do for now.
+% Adapted from the plain.tex definition of \copyright.
+%
+\def\registeredsymbol{%
+  $^{{\ooalign{\hfil\raise.07ex\hbox{\selectfonts\lllsize R}%
+               \hfil\crcr\Orb}}%
+    }$%
+}
+
+% @textdegree - the normal degrees sign.
+%
+\def\textdegree{$^\circ$}
+
+% Laurent Siebenmann reports \Orb undefined with:
+%  Textures 1.7.7 (preloaded format=plain 93.10.14)  (68K)  16 APR 2004 02:38
+% so we'll define it if necessary.
+% 
+\ifx\Orb\undefined
+\def\Orb{\mathhexbox20D}
+\fi
+
+% Quotes.
+\chardef\quotedblleft="5C
+\chardef\quotedblright=`\"
+\chardef\quoteleft=`\`
+\chardef\quoteright=`\'
+
+
+\message{page headings,}
+
+\newskip\titlepagetopglue \titlepagetopglue = 1.5in
+\newskip\titlepagebottomglue \titlepagebottomglue = 2pc
+
+% First the title page.  Must do @settitle before @titlepage.
+\newif\ifseenauthor
+\newif\iffinishedtitlepage
+
+% Do an implicit @contents or @shortcontents after @end titlepage if the
+% user says @setcontentsaftertitlepage or @setshortcontentsaftertitlepage.
+%
+\newif\ifsetcontentsaftertitlepage
+ \let\setcontentsaftertitlepage = \setcontentsaftertitlepagetrue
+\newif\ifsetshortcontentsaftertitlepage
+ \let\setshortcontentsaftertitlepage = \setshortcontentsaftertitlepagetrue
+
+\parseargdef\shorttitlepage{\begingroup\hbox{}\vskip 1.5in \chaprm \centerline{#1}%
+        \endgroup\page\hbox{}\page}
+
+\envdef\titlepage{%
+  % Open one extra group, as we want to close it in the middle of \Etitlepage.
+  \begingroup
+    \parindent=0pt \textfonts
+    % Leave some space at the very top of the page.
+    \vglue\titlepagetopglue
+    % No rule at page bottom unless we print one at the top with @title.
+    \finishedtitlepagetrue
+    %
+    % Most title ``pages'' are actually two pages long, with space
+    % at the top of the second.  We don't want the ragged left on the second.
+    \let\oldpage = \page
+    \def\page{%
+      \iffinishedtitlepage\else
+        \finishtitlepage
+      \fi
+      \let\page = \oldpage
+      \page
+      \null
+    }%
+}
+
+\def\Etitlepage{%
+    \iffinishedtitlepage\else
+       \finishtitlepage
+    \fi
+    % It is important to do the page break before ending the group,
+    % because the headline and footline are only empty inside the group.
+    % If we use the new definition of \page, we always get a blank page
+    % after the title page, which we certainly don't want.
+    \oldpage
+  \endgroup
+  %
+  % Need this before the \...aftertitlepage checks so that if they are
+  % in effect the toc pages will come out with page numbers.
+  \HEADINGSon
+  %
+  % If they want short, they certainly want long too.
+  \ifsetshortcontentsaftertitlepage
+    \shortcontents
+    \contents
+    \global\let\shortcontents = \relax
+    \global\let\contents = \relax
+  \fi
+  %
+  \ifsetcontentsaftertitlepage
+    \contents
+    \global\let\contents = \relax
+    \global\let\shortcontents = \relax
+  \fi
+}
+
+\def\finishtitlepage{%
+  \vskip4pt \hrule height 2pt width \hsize
+  \vskip\titlepagebottomglue
+  \finishedtitlepagetrue
+}
+
+%%% Macros to be used within @titlepage:
+
+\let\subtitlerm=\tenrm
+\def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines}
+
+\def\authorfont{\authorrm \normalbaselineskip = 16pt \normalbaselines
+               \let\tt=\authortt}
+
+\parseargdef\title{%
+  \checkenv\titlepage
+  \leftline{\titlefonts\rm #1}
+  % print a rule at the page bottom also.
+  \finishedtitlepagefalse
+  \vskip4pt \hrule height 4pt width \hsize \vskip4pt
+}
+
+\parseargdef\subtitle{%
+  \checkenv\titlepage
+  {\subtitlefont \rightline{#1}}%
+}
+
+% @author should come last, but may come many times.
+% It can also be used inside @quotation.
+%
+\parseargdef\author{%
+  \def\temp{\quotation}%
+  \ifx\thisenv\temp
+    \def\quotationauthor{#1}% printed in \Equotation.
+  \else
+    \checkenv\titlepage
+    \ifseenauthor\else \vskip 0pt plus 1filll \seenauthortrue \fi
+    {\authorfont \leftline{#1}}%
+  \fi
+}
+
+
+%%% Set up page headings and footings.
+
+\let\thispage=\folio
+
+\newtoks\evenheadline    % headline on even pages
+\newtoks\oddheadline     % headline on odd pages
+\newtoks\evenfootline    % footline on even pages
+\newtoks\oddfootline     % footline on odd pages
+
+% Now make TeX use those variables
+\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline
+                            \else \the\evenheadline \fi}}
+\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline
+                            \else \the\evenfootline \fi}\HEADINGShook}
+\let\HEADINGShook=\relax
+
+% Commands to set those variables.
+% For example, this is what  @headings on  does
+% @evenheading @thistitle|@thispage|@thischapter
+% @oddheading @thischapter|@thispage|@thistitle
+% @evenfooting @thisfile||
+% @oddfooting ||@thisfile
+
+
+\def\evenheading{\parsearg\evenheadingxxx}
+\def\evenheadingxxx #1{\evenheadingyyy #1\|\|\|\|\finish}
+\def\evenheadingyyy #1\|#2\|#3\|#4\finish{%
+\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}}
+
+\def\oddheading{\parsearg\oddheadingxxx}
+\def\oddheadingxxx #1{\oddheadingyyy #1\|\|\|\|\finish}
+\def\oddheadingyyy #1\|#2\|#3\|#4\finish{%
+\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}}
+
+\parseargdef\everyheading{\oddheadingxxx{#1}\evenheadingxxx{#1}}%
+
+\def\evenfooting{\parsearg\evenfootingxxx}
+\def\evenfootingxxx #1{\evenfootingyyy #1\|\|\|\|\finish}
+\def\evenfootingyyy #1\|#2\|#3\|#4\finish{%
+\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}}
+
+\def\oddfooting{\parsearg\oddfootingxxx}
+\def\oddfootingxxx #1{\oddfootingyyy #1\|\|\|\|\finish}
+\def\oddfootingyyy #1\|#2\|#3\|#4\finish{%
+  \global\oddfootline = {\rlap{\centerline{#2}}\line{#1\hfil#3}}%
+  %
+  % Leave some space for the footline.  Hopefully ok to assume
+  % @evenfooting will not be used by itself.
+  \global\advance\pageheight by -12pt
+  \global\advance\vsize by -12pt
+}
+
+\parseargdef\everyfooting{\oddfootingxxx{#1}\evenfootingxxx{#1}}
+
+% @evenheadingmarks top     \thischapter <- chapter at the top of a page
+% @evenheadingmarks bottom  \thischapter <- chapter at the bottom of a page
+%
+% The same set of arguments for:
+%
+% @oddheadingmarks
+% @evenfootingmarks
+% @oddfootingmarks
+% @everyheadingmarks
+% @everyfootingmarks
+
+\def\evenheadingmarks{\headingmarks{even}{heading}}
+\def\oddheadingmarks{\headingmarks{odd}{heading}}
+\def\evenfootingmarks{\headingmarks{even}{footing}}
+\def\oddfootingmarks{\headingmarks{odd}{footing}}
+\def\everyheadingmarks#1 {\headingmarks{even}{heading}{#1}
+                          \headingmarks{odd}{heading}{#1} }
+\def\everyfootingmarks#1 {\headingmarks{even}{footing}{#1}
+                          \headingmarks{odd}{footing}{#1} }
+% #1 = even/odd, #2 = heading/footing, #3 = top/bottom.
+\def\headingmarks#1#2#3 {%
+  \expandafter\let\expandafter\temp \csname get#3headingmarks\endcsname
+  \global\expandafter\let\csname get#1#2marks\endcsname \temp
+}
+
+\everyheadingmarks bottom
+\everyfootingmarks bottom
+
+% @headings double      turns headings on for double-sided printing.
+% @headings single      turns headings on for single-sided printing.
+% @headings off         turns them off.
+% @headings on          same as @headings double, retained for compatibility.
+% @headings after       turns on double-sided headings after this page.
+% @headings doubleafter turns on double-sided headings after this page.
+% @headings singleafter turns on single-sided headings after this page.
+% By default, they are off at the start of a document,
+% and turned `on' after @end titlepage.
+
+\def\headings #1 {\csname HEADINGS#1\endcsname}
+
+\def\HEADINGSoff{%
+\global\evenheadline={\hfil} \global\evenfootline={\hfil}
+\global\oddheadline={\hfil} \global\oddfootline={\hfil}}
+\HEADINGSoff
+% When we turn headings on, set the page number to 1.
+% For double-sided printing, put current file name in lower left corner,
+% chapter name on inside top of right hand pages, document
+% title on inside top of left hand pages, and page numbers on outside top
+% edge of all pages.
+\def\HEADINGSdouble{%
+\global\pageno=1
+\global\evenfootline={\hfil}
+\global\oddfootline={\hfil}
+\global\evenheadline={\line{\folio\hfil\thistitle}}
+\global\oddheadline={\line{\thischapter\hfil\folio}}
+\global\let\contentsalignmacro = \chapoddpage
+}
+\let\contentsalignmacro = \chappager
+
+% For single-sided printing, chapter title goes across top left of page,
+% page number on top right.
+\def\HEADINGSsingle{%
+\global\pageno=1
+\global\evenfootline={\hfil}
+\global\oddfootline={\hfil}
+\global\evenheadline={\line{\thischapter\hfil\folio}}
+\global\oddheadline={\line{\thischapter\hfil\folio}}
+\global\let\contentsalignmacro = \chappager
+}
+\def\HEADINGSon{\HEADINGSdouble}
+
+\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex}
+\let\HEADINGSdoubleafter=\HEADINGSafter
+\def\HEADINGSdoublex{%
+\global\evenfootline={\hfil}
+\global\oddfootline={\hfil}
+\global\evenheadline={\line{\folio\hfil\thistitle}}
+\global\oddheadline={\line{\thischapter\hfil\folio}}
+\global\let\contentsalignmacro = \chapoddpage
+}
+
+\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex}
+\def\HEADINGSsinglex{%
+\global\evenfootline={\hfil}
+\global\oddfootline={\hfil}
+\global\evenheadline={\line{\thischapter\hfil\folio}}
+\global\oddheadline={\line{\thischapter\hfil\folio}}
+\global\let\contentsalignmacro = \chappager
+}
+
+% Subroutines used in generating headings
+% This produces Day Month Year style of output.
+% Only define if not already defined, in case a txi-??.tex file has set
+% up a different format (e.g., txi-cs.tex does this).
+\ifx\today\undefined
+\def\today{%
+  \number\day\space
+  \ifcase\month
+  \or\putwordMJan\or\putwordMFeb\or\putwordMMar\or\putwordMApr
+  \or\putwordMMay\or\putwordMJun\or\putwordMJul\or\putwordMAug
+  \or\putwordMSep\or\putwordMOct\or\putwordMNov\or\putwordMDec
+  \fi
+  \space\number\year}
+\fi
+
+% @settitle line...  specifies the title of the document, for headings.
+% It generates no output of its own.
+\def\thistitle{\putwordNoTitle}
+\def\settitle{\parsearg{\gdef\thistitle}}
+
+
+\message{tables,}
+% Tables -- @table, @ftable, @vtable, @item(x).
+
+% default indentation of table text
+\newdimen\tableindent \tableindent=.8in
+% default indentation of @itemize and @enumerate text
+\newdimen\itemindent  \itemindent=.3in
+% margin between end of table item and start of table text.
+\newdimen\itemmargin  \itemmargin=.1in
+
+% used internally for \itemindent minus \itemmargin
+\newdimen\itemmax
+
+% Note @table, @ftable, and @vtable define @item, @itemx, etc., with
+% these defs.
+% They also define \itemindex
+% to index the item name in whatever manner is desired (perhaps none).
+
+\newif\ifitemxneedsnegativevskip
+
+\def\itemxpar{\par\ifitemxneedsnegativevskip\nobreak\vskip-\parskip\nobreak\fi}
+
+\def\internalBitem{\smallbreak \parsearg\itemzzz}
+\def\internalBitemx{\itemxpar \parsearg\itemzzz}
+
+\def\itemzzz #1{\begingroup %
+  \advance\hsize by -\rightskip
+  \advance\hsize by -\tableindent
+  \setbox0=\hbox{\itemindicate{#1}}%
+  \itemindex{#1}%
+  \nobreak % This prevents a break before @itemx.
+  %
+  % If the item text does not fit in the space we have, put it on a line
+  % by itself, and do not allow a page break either before or after that
+  % line.  We do not start a paragraph here because then if the next
+  % command is, e.g., @kindex, the whatsit would get put into the
+  % horizontal list on a line by itself, resulting in extra blank space.
+  \ifdim \wd0>\itemmax
+    %
+    % Make this a paragraph so we get the \parskip glue and wrapping,
+    % but leave it ragged-right.
+    \begingroup
+      \advance\leftskip by-\tableindent
+      \advance\hsize by\tableindent
+      \advance\rightskip by0pt plus1fil
+      \leavevmode\unhbox0\par
+    \endgroup
+    %
+    % We're going to be starting a paragraph, but we don't want the
+    % \parskip glue -- logically it's part of the @item we just started.
+    \nobreak \vskip-\parskip
+    %
+    % Stop a page break at the \parskip glue coming up.  However, if
+    % what follows is an environment such as @example, there will be no
+    % \parskip glue; then the negative vskip we just inserted would
+    % cause the example and the item to crash together.  So we use this
+    % bizarre value of 10001 as a signal to \aboveenvbreak to insert
+    % \parskip glue after all.  Section titles are handled this way also.
+    % 
+    \penalty 10001
+    \endgroup
+    \itemxneedsnegativevskipfalse
+  \else
+    % The item text fits into the space.  Start a paragraph, so that the
+    % following text (if any) will end up on the same line.
+    \noindent
+    % Do this with kerns and \unhbox so that if there is a footnote in
+    % the item text, it can migrate to the main vertical list and
+    % eventually be printed.
+    \nobreak\kern-\tableindent
+    \dimen0 = \itemmax  \advance\dimen0 by \itemmargin \advance\dimen0 by -\wd0
+    \unhbox0
+    \nobreak\kern\dimen0
+    \endgroup
+    \itemxneedsnegativevskiptrue
+  \fi
+}
+
+\def\item{\errmessage{@item while not in a list environment}}
+\def\itemx{\errmessage{@itemx while not in a list environment}}
+
+% @table, @ftable, @vtable.
+\envdef\table{%
+  \let\itemindex\gobble
+  \tablecheck{table}%
+}
+\envdef\ftable{%
+  \def\itemindex ##1{\doind {fn}{\code{##1}}}%
+  \tablecheck{ftable}%
+}
+\envdef\vtable{%
+  \def\itemindex ##1{\doind {vr}{\code{##1}}}%
+  \tablecheck{vtable}%
+}
+\def\tablecheck#1{%
+  \ifnum \the\catcode`\^^M=\active
+    \endgroup
+    \errmessage{This command won't work in this context; perhaps the problem is
+      that we are \inenvironment\thisenv}%
+    \def\next{\doignore{#1}}%
+  \else
+    \let\next\tablex
+  \fi
+  \next
+}
+\def\tablex#1{%
+  \def\itemindicate{#1}%
+  \parsearg\tabley
+}
+\def\tabley#1{%
+  {%
+    \makevalueexpandable
+    \edef\temp{\noexpand\tablez #1\space\space\space}%
+    \expandafter
+  }\temp \endtablez
+}
+\def\tablez #1 #2 #3 #4\endtablez{%
+  \aboveenvbreak
+  \ifnum 0#1>0 \advance \leftskip by #1\mil \fi
+  \ifnum 0#2>0 \tableindent=#2\mil \fi
+  \ifnum 0#3>0 \advance \rightskip by #3\mil \fi
+  \itemmax=\tableindent
+  \advance \itemmax by -\itemmargin
+  \advance \leftskip by \tableindent
+  \exdentamount=\tableindent
+  \parindent = 0pt
+  \parskip = \smallskipamount
+  \ifdim \parskip=0pt \parskip=2pt \fi
+  \let\item = \internalBitem
+  \let\itemx = \internalBitemx
+}
+\def\Etable{\endgraf\afterenvbreak}
+\let\Eftable\Etable
+\let\Evtable\Etable
+\let\Eitemize\Etable
+\let\Eenumerate\Etable
+
+% This is the counter used by @enumerate, which is really @itemize
+
+\newcount \itemno
+
+\envdef\itemize{\parsearg\doitemize}
+
+\def\doitemize#1{%
+  \aboveenvbreak
+  \itemmax=\itemindent
+  \advance\itemmax by -\itemmargin
+  \advance\leftskip by \itemindent
+  \exdentamount=\itemindent
+  \parindent=0pt
+  \parskip=\smallskipamount
+  \ifdim\parskip=0pt \parskip=2pt \fi
+  \def\itemcontents{#1}%
+  % @itemize with no arg is equivalent to @itemize @bullet.
+  \ifx\itemcontents\empty\def\itemcontents{\bullet}\fi
+  \let\item=\itemizeitem
+}
+
+% Definition of @item while inside @itemize and @enumerate.
+%
+\def\itemizeitem{%
+  \advance\itemno by 1  % for enumerations
+  {\let\par=\endgraf \smallbreak}% reasonable place to break
+  {%
+   % If the document has an @itemize directly after a section title, a
+   % \nobreak will be last on the list, and \sectionheading will have
+   % done a \vskip-\parskip.  In that case, we don't want to zero
+   % parskip, or the item text will crash with the heading.  On the
+   % other hand, when there is normal text preceding the item (as there
+   % usually is), we do want to zero parskip, or there would be too much
+   % space.  In that case, we won't have a \nobreak before.  At least
+   % that's the theory.
+   \ifnum\lastpenalty<10000 \parskip=0in \fi
+   \noindent
+   \hbox to 0pt{\hss \itemcontents \kern\itemmargin}%
+   \vadjust{\penalty 1200}}% not good to break after first line of item.
+  \flushcr
+}
+
+% \splitoff TOKENS\endmark defines \first to be the first token in
+% TOKENS, and \rest to be the remainder.
+%
+\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}%
+
+% Allow an optional argument of an uppercase letter, lowercase letter,
+% or number, to specify the first label in the enumerated list.  No
+% argument is the same as `1'.
+%
+\envparseargdef\enumerate{\enumeratey #1  \endenumeratey}
+\def\enumeratey #1 #2\endenumeratey{%
+  % If we were given no argument, pretend we were given `1'.
+  \def\thearg{#1}%
+  \ifx\thearg\empty \def\thearg{1}\fi
+  %
+  % Detect if the argument is a single token.  If so, it might be a
+  % letter.  Otherwise, the only valid thing it can be is a number.
+  % (We will always have one token, because of the test we just made.
+  % This is a good thing, since \splitoff doesn't work given nothing at
+  % all -- the first parameter is undelimited.)
+  \expandafter\splitoff\thearg\endmark
+  \ifx\rest\empty
+    % Only one token in the argument.  It could still be anything.
+    % A ``lowercase letter'' is one whose \lccode is nonzero.
+    % An ``uppercase letter'' is one whose \lccode is both nonzero, and
+    %   not equal to itself.
+    % Otherwise, we assume it's a number.
+    %
+    % We need the \relax at the end of the \ifnum lines to stop TeX from
+    % continuing to look for a <number>.
+    %
+    \ifnum\lccode\expandafter`\thearg=0\relax
+      \numericenumerate % a number (we hope)
+    \else
+      % It's a letter.
+      \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax
+        \lowercaseenumerate % lowercase letter
+      \else
+        \uppercaseenumerate % uppercase letter
+      \fi
+    \fi
+  \else
+    % Multiple tokens in the argument.  We hope it's a number.
+    \numericenumerate
+  \fi
+}
+
+% An @enumerate whose labels are integers.  The starting integer is
+% given in \thearg.
+%
+\def\numericenumerate{%
+  \itemno = \thearg
+  \startenumeration{\the\itemno}%
+}
+
+% The starting (lowercase) letter is in \thearg.
+\def\lowercaseenumerate{%
+  \itemno = \expandafter`\thearg
+  \startenumeration{%
+    % Be sure we're not beyond the end of the alphabet.
+    \ifnum\itemno=0
+      \errmessage{No more lowercase letters in @enumerate; get a bigger
+                  alphabet}%
+    \fi
+    \char\lccode\itemno
+  }%
+}
+
+% The starting (uppercase) letter is in \thearg.
+\def\uppercaseenumerate{%
+  \itemno = \expandafter`\thearg
+  \startenumeration{%
+    % Be sure we're not beyond the end of the alphabet.
+    \ifnum\itemno=0
+      \errmessage{No more uppercase letters in @enumerate; get a bigger
+                  alphabet}
+    \fi
+    \char\uccode\itemno
+  }%
+}
+
+% Call \doitemize, adding a period to the first argument and supplying the
+% common last two arguments.  Also subtract one from the initial value in
+% \itemno, since @item increments \itemno.
+%
+\def\startenumeration#1{%
+  \advance\itemno by -1
+  \doitemize{#1.}\flushcr
+}
+
+% @alphaenumerate and @capsenumerate are abbreviations for giving an arg
+% to @enumerate.
+%
+\def\alphaenumerate{\enumerate{a}}
+\def\capsenumerate{\enumerate{A}}
+\def\Ealphaenumerate{\Eenumerate}
+\def\Ecapsenumerate{\Eenumerate}
+
+
+% @multitable macros
+% Amy Hendrickson, 8/18/94, 3/6/96
+%
+% @multitable ... @end multitable will make as many columns as desired.
+% Contents of each column will wrap at width given in preamble.  Width
+% can be specified either with sample text given in a template line,
+% or in percent of \hsize, the current width of text on page.
+
+% Table can continue over pages but will only break between lines.
+
+% To make preamble:
+%
+% Either define widths of columns in terms of percent of \hsize:
+%   @multitable @columnfractions .25 .3 .45
+%   @item ...
+%
+%   Numbers following @columnfractions are the percent of the total
+%   current hsize to be used for each column. You may use as many
+%   columns as desired.
+
+
+% Or use a template:
+%   @multitable {Column 1 template} {Column 2 template} {Column 3 template}
+%   @item ...
+%   using the widest term desired in each column.
+
+% Each new table line starts with @item, each subsequent new column
+% starts with @tab. Empty columns may be produced by supplying @tab's
+% with nothing between them for as many times as empty columns are needed,
+% ie, @tab@tab@tab will produce two empty columns.
+
+% @item, @tab do not need to be on their own lines, but it will not hurt
+% if they are.
+
+% Sample multitable:
+
+%   @multitable {Column 1 template} {Column 2 template} {Column 3 template}
+%   @item first col stuff @tab second col stuff @tab third col
+%   @item
+%   first col stuff
+%   @tab
+%   second col stuff
+%   @tab
+%   third col
+%   @item first col stuff @tab second col stuff
+%   @tab Many paragraphs of text may be used in any column.
+%
+%         They will wrap at the width determined by the template.
+%   @item@tab@tab This will be in third column.
+%   @end multitable
+
+% Default dimensions may be reset by user.
+% @multitableparskip is vertical space between paragraphs in table.
+% @multitableparindent is paragraph indent in table.
+% @multitablecolmargin is horizontal space to be left between columns.
+% @multitablelinespace is space to leave between table items, baseline
+%                                                            to baseline.
+%   0pt means it depends on current normal line spacing.
+%
+\newskip\multitableparskip
+\newskip\multitableparindent
+\newdimen\multitablecolspace
+\newskip\multitablelinespace
+\multitableparskip=0pt
+\multitableparindent=6pt
+\multitablecolspace=12pt
+\multitablelinespace=0pt
+
+% Macros used to set up halign preamble:
+%
+\let\endsetuptable\relax
+\def\xendsetuptable{\endsetuptable}
+\let\columnfractions\relax
+\def\xcolumnfractions{\columnfractions}
+\newif\ifsetpercent
+
+% #1 is the @columnfraction, usually a decimal number like .5, but might
+% be just 1.  We just use it, whatever it is.
+%
+\def\pickupwholefraction#1 {%
+  \global\advance\colcount by 1
+  \expandafter\xdef\csname col\the\colcount\endcsname{#1\hsize}%
+  \setuptable
+}
+
+\newcount\colcount
+\def\setuptable#1{%
+  \def\firstarg{#1}%
+  \ifx\firstarg\xendsetuptable
+    \let\go = \relax
+  \else
+    \ifx\firstarg\xcolumnfractions
+      \global\setpercenttrue
+    \else
+      \ifsetpercent
+         \let\go\pickupwholefraction
+      \else
+         \global\advance\colcount by 1
+         \setbox0=\hbox{#1\unskip\space}% Add a normal word space as a
+                   % separator; typically that is always in the input, anyway.
+         \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}%
+      \fi
+    \fi
+    \ifx\go\pickupwholefraction
+      % Put the argument back for the \pickupwholefraction call, so
+      % we'll always have a period there to be parsed.
+      \def\go{\pickupwholefraction#1}%
+    \else
+      \let\go = \setuptable
+    \fi%
+  \fi
+  \go
+}
+
+% multitable-only commands.
+%
+% @headitem starts a heading row, which we typeset in bold.
+% Assignments have to be global since we are inside the implicit group
+% of an alignment entry.  Note that \everycr resets \everytab.
+\def\headitem{\checkenv\multitable \crcr \global\everytab={\bf}\the\everytab}%
+%
+% A \tab used to include \hskip1sp.  But then the space in a template
+% line is not enough.  That is bad.  So let's go back to just `&' until
+% we encounter the problem it was intended to solve again.
+%                                      --karl, nathan@acm.org, 20apr99.
+\def\tab{\checkenv\multitable &\the\everytab}%
+
+% @multitable ... @end multitable definitions:
+%
+\newtoks\everytab  % insert after every tab.
+%
+\envdef\multitable{%
+  \vskip\parskip
+  \startsavinginserts
+  %
+  % @item within a multitable starts a normal row.
+  % We use \def instead of \let so that if one of the multitable entries
+  % contains an @itemize, we don't choke on the \item (seen as \crcr aka
+  % \endtemplate) expanding \doitemize.
+  \def\item{\crcr}%
+  %
+  \tolerance=9500
+  \hbadness=9500
+  \setmultitablespacing
+  \parskip=\multitableparskip
+  \parindent=\multitableparindent
+  \overfullrule=0pt
+  \global\colcount=0
+  %
+  \everycr = {%
+    \noalign{%
+      \global\everytab={}%
+      \global\colcount=0 % Reset the column counter.
+      % Check for saved footnotes, etc.
+      \checkinserts
+      % Keeps underfull box messages off when table breaks over pages.
+      %\filbreak
+       % Maybe so, but it also creates really weird page breaks when the
+       % table breaks over pages. Wouldn't \vfil be better?  Wait until the
+       % problem manifests itself, so it can be fixed for real --karl.
+    }%
+  }%
+  %
+  \parsearg\domultitable
+}
+\def\domultitable#1{%
+  % To parse everything between @multitable and @item:
+  \setuptable#1 \endsetuptable
+  %
+  % This preamble sets up a generic column definition, which will
+  % be used as many times as user calls for columns.
+  % \vtop will set a single line and will also let text wrap and
+  % continue for many paragraphs if desired.
+  \halign\bgroup &%
+    \global\advance\colcount by 1
+    \multistrut
+    \vtop{%
+      % Use the current \colcount to find the correct column width:
+      \hsize=\expandafter\csname col\the\colcount\endcsname
+      %
+      % In order to keep entries from bumping into each other
+      % we will add a \leftskip of \multitablecolspace to all columns after
+      % the first one.
+      %
+      % If a template has been used, we will add \multitablecolspace
+      % to the width of each template entry.
+      %
+      % If the user has set preamble in terms of percent of \hsize we will
+      % use that dimension as the width of the column, and the \leftskip
+      % will keep entries from bumping into each other.  Table will start at
+      % left margin and final column will justify at right margin.
+      %
+      % Make sure we don't inherit \rightskip from the outer environment.
+      \rightskip=0pt
+      \ifnum\colcount=1
+       % The first column will be indented with the surrounding text.
+       \advance\hsize by\leftskip
+      \else
+       \ifsetpercent \else
+         % If user has not set preamble in terms of percent of \hsize
+         % we will advance \hsize by \multitablecolspace.
+         \advance\hsize by \multitablecolspace
+       \fi
+       % In either case we will make \leftskip=\multitablecolspace:
+      \leftskip=\multitablecolspace
+      \fi
+      % Ignoring space at the beginning and end avoids an occasional spurious
+      % blank line, when TeX decides to break the line at the space before the
+      % box from the multistrut, so the strut ends up on a line by itself.
+      % For example:
+      % @multitable @columnfractions .11 .89
+      % @item @code{#}
+      % @tab Legal holiday which is valid in major parts of the whole country.
+      % Is automatically provided with highlighting sequences respectively
+      % marking characters.
+      \noindent\ignorespaces##\unskip\multistrut
+    }\cr
+}
+\def\Emultitable{%
+  \crcr
+  \egroup % end the \halign
+  \global\setpercentfalse
+}
+
+\def\setmultitablespacing{%
+  \def\multistrut{\strut}% just use the standard line spacing
+  %
+  % Compute \multitablelinespace (if not defined by user) for use in
+  % \multitableparskip calculation.  We used define \multistrut based on
+  % this, but (ironically) that caused the spacing to be off.
+  % See bug-texinfo report from Werner Lemberg, 31 Oct 2004 12:52:20 +0100.
+\ifdim\multitablelinespace=0pt
+\setbox0=\vbox{X}\global\multitablelinespace=\the\baselineskip
+\global\advance\multitablelinespace by-\ht0
+\fi
+%% Test to see if parskip is larger than space between lines of
+%% table. If not, do nothing.
+%%        If so, set to same dimension as multitablelinespace.
+\ifdim\multitableparskip>\multitablelinespace
+\global\multitableparskip=\multitablelinespace
+\global\advance\multitableparskip-7pt %% to keep parskip somewhat smaller
+                                      %% than skip between lines in the table.
+\fi%
+\ifdim\multitableparskip=0pt
+\global\multitableparskip=\multitablelinespace
+\global\advance\multitableparskip-7pt %% to keep parskip somewhat smaller
+                                      %% than skip between lines in the table.
+\fi}
+
+
+\message{conditionals,}
+
+% @iftex, @ifnotdocbook, @ifnothtml, @ifnotinfo, @ifnotplaintext,
+% @ifnotxml always succeed.  They currently do nothing; we don't
+% attempt to check whether the conditionals are properly nested.  But we
+% have to remember that they are conditionals, so that @end doesn't
+% attempt to close an environment group.
+%
+\def\makecond#1{%
+  \expandafter\let\csname #1\endcsname = \relax
+  \expandafter\let\csname iscond.#1\endcsname = 1
+}
+\makecond{iftex}
+\makecond{ifnotdocbook}
+\makecond{ifnothtml}
+\makecond{ifnotinfo}
+\makecond{ifnotplaintext}
+\makecond{ifnotxml}
+
+% Ignore @ignore, @ifhtml, @ifinfo, and the like.
+%
+\def\direntry{\doignore{direntry}}
+\def\documentdescription{\doignore{documentdescription}}
+\def\docbook{\doignore{docbook}}
+\def\html{\doignore{html}}
+\def\ifdocbook{\doignore{ifdocbook}}
+\def\ifhtml{\doignore{ifhtml}}
+\def\ifinfo{\doignore{ifinfo}}
+\def\ifnottex{\doignore{ifnottex}}
+\def\ifplaintext{\doignore{ifplaintext}}
+\def\ifxml{\doignore{ifxml}}
+\def\ignore{\doignore{ignore}}
+\def\menu{\doignore{menu}}
+\def\xml{\doignore{xml}}
+
+% Ignore text until a line `@end #1', keeping track of nested conditionals.
+%
+% A count to remember the depth of nesting.
+\newcount\doignorecount
+
+\def\doignore#1{\begingroup
+  % Scan in ``verbatim'' mode:
+  \obeylines
+  \catcode`\@ = \other
+  \catcode`\{ = \other
+  \catcode`\} = \other
+  %
+  % Make sure that spaces turn into tokens that match what \doignoretext wants.
+  \spaceisspace
+  %
+  % Count number of #1's that we've seen.
+  \doignorecount = 0
+  %
+  % Swallow text until we reach the matching `@end #1'.
+  \dodoignore{#1}%
+}
+
+{ \catcode`_=11 % We want to use \_STOP_ which cannot appear in texinfo source.
+  \obeylines %
+  %
+  \gdef\dodoignore#1{%
+    % #1 contains the command name as a string, e.g., `ifinfo'.
+    %
+    % Define a command to find the next `@end #1'.
+    \long\def\doignoretext##1^^M@end #1{%
+      \doignoretextyyy##1^^M@#1\_STOP_}%
+    %
+    % And this command to find another #1 command, at the beginning of a
+    % line.  (Otherwise, we would consider a line `@c @ifset', for
+    % example, to count as an @ifset for nesting.)
+    \long\def\doignoretextyyy##1^^M@#1##2\_STOP_{\doignoreyyy{##2}\_STOP_}%
+    %
+    % And now expand that command.
+    \doignoretext ^^M%
+  }%
+}
+
+\def\doignoreyyy#1{%
+  \def\temp{#1}%
+  \ifx\temp\empty                      % Nothing found.
+    \let\next\doignoretextzzz
+  \else                                        % Found a nested condition, ...
+    \advance\doignorecount by 1
+    \let\next\doignoretextyyy          % ..., look for another.
+    % If we're here, #1 ends with ^^M\ifinfo (for example).
+  \fi
+  \next #1% the token \_STOP_ is present just after this macro.
+}
+
+% We have to swallow the remaining "\_STOP_".
+%
+\def\doignoretextzzz#1{%
+  \ifnum\doignorecount = 0     % We have just found the outermost @end.
+    \let\next\enddoignore
+  \else                                % Still inside a nested condition.
+    \advance\doignorecount by -1
+    \let\next\doignoretext      % Look for the next @end.
+  \fi
+  \next
+}
+
+% Finish off ignored text.
+{ \obeylines%
+  % Ignore anything after the last `@end #1'; this matters in verbatim
+  % environments, where otherwise the newline after an ignored conditional
+  % would result in a blank line in the output.
+  \gdef\enddoignore#1^^M{\endgroup\ignorespaces}%
+}
+
+
+% @set VAR sets the variable VAR to an empty value.
+% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE.
+%
+% Since we want to separate VAR from REST-OF-LINE (which might be
+% empty), we can't just use \parsearg; we have to insert a space of our
+% own to delimit the rest of the line, and then take it out again if we
+% didn't need it.
+% We rely on the fact that \parsearg sets \catcode`\ =10.
+%
+\parseargdef\set{\setyyy#1 \endsetyyy}
+\def\setyyy#1 #2\endsetyyy{%
+  {%
+    \makevalueexpandable
+    \def\temp{#2}%
+    \edef\next{\gdef\makecsname{SET#1}}%
+    \ifx\temp\empty
+      \next{}%
+    \else
+      \setzzz#2\endsetzzz
+    \fi
+  }%
+}
+% Remove the trailing space \setxxx inserted.
+\def\setzzz#1 \endsetzzz{\next{#1}}
+
+% @clear VAR clears (i.e., unsets) the variable VAR.
+%
+\parseargdef\clear{%
+  {%
+    \makevalueexpandable
+    \global\expandafter\let\csname SET#1\endcsname=\relax
+  }%
+}
+
+% @value{foo} gets the text saved in variable foo.
+\def\value{\begingroup\makevalueexpandable\valuexxx}
+\def\valuexxx#1{\expandablevalue{#1}\endgroup}
+{
+  \catcode`\- = \active \catcode`\_ = \active
+  %
+  \gdef\makevalueexpandable{%
+    \let\value = \expandablevalue
+    % We don't want these characters active, ...
+    \catcode`\-=\other \catcode`\_=\other
+    % ..., but we might end up with active ones in the argument if
+    % we're called from @code, as @code{@value{foo-bar_}}, though.
+    % So \let them to their normal equivalents.
+    \let-\realdash \let_\normalunderscore
+  }
+}
+
+% We have this subroutine so that we can handle at least some @value's
+% properly in indexes (we call \makevalueexpandable in \indexdummies).
+% The command has to be fully expandable (if the variable is set), since
+% the result winds up in the index file.  This means that if the
+% variable's value contains other Texinfo commands, it's almost certain
+% it will fail (although perhaps we could fix that with sufficient work
+% to do a one-level expansion on the result, instead of complete).
+%
+\def\expandablevalue#1{%
+  \expandafter\ifx\csname SET#1\endcsname\relax
+    {[No value for ``#1'']}%
+    \message{Variable `#1', used in @value, is not set.}%
+  \else
+    \csname SET#1\endcsname
+  \fi
+}
+
+% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined
+% with @set.
+%
+% To get special treatment of `@end ifset,' call \makeond and the redefine.
+%
+\makecond{ifset}
+\def\ifset{\parsearg{\doifset{\let\next=\ifsetfail}}}
+\def\doifset#1#2{%
+  {%
+    \makevalueexpandable
+    \let\next=\empty
+    \expandafter\ifx\csname SET#2\endcsname\relax
+      #1% If not set, redefine \next.
+    \fi
+    \expandafter
+  }\next
+}
+\def\ifsetfail{\doignore{ifset}}
+
+% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been
+% defined with @set, or has been undefined with @clear.
+%
+% The `\else' inside the `\doifset' parameter is a trick to reuse the
+% above code: if the variable is not set, do nothing, if it is set,
+% then redefine \next to \ifclearfail.
+%
+\makecond{ifclear}
+\def\ifclear{\parsearg{\doifset{\else \let\next=\ifclearfail}}}
+\def\ifclearfail{\doignore{ifclear}}
+
+% @dircategory CATEGORY  -- specify a category of the dir file
+% which this file should belong to.  Ignore this in TeX.
+\let\dircategory=\comment
+
+% @defininfoenclose.
+\let\definfoenclose=\comment
+
+
+\message{indexing,}
+% Index generation facilities
+
+% Define \newwrite to be identical to plain tex's \newwrite
+% except not \outer, so it can be used within macros and \if's.
+\edef\newwrite{\makecsname{ptexnewwrite}}
+
+% \newindex {foo} defines an index named foo.
+% It automatically defines \fooindex such that
+% \fooindex ...rest of line... puts an entry in the index foo.
+% It also defines \fooindfile to be the number of the output channel for
+% the file that accumulates this index.  The file's extension is foo.
+% The name of an index should be no more than 2 characters long
+% for the sake of vms.
+%
+\def\newindex#1{%
+  \iflinks
+    \expandafter\newwrite \csname#1indfile\endcsname
+    \openout \csname#1indfile\endcsname \jobname.#1 % Open the file
+  \fi
+  \expandafter\xdef\csname#1index\endcsname{%     % Define @#1index
+    \noexpand\doindex{#1}}
+}
+
+% @defindex foo  ==  \newindex{foo}
+%
+\def\defindex{\parsearg\newindex}
+
+% Define @defcodeindex, like @defindex except put all entries in @code.
+%
+\def\defcodeindex{\parsearg\newcodeindex}
+%
+\def\newcodeindex#1{%
+  \iflinks
+    \expandafter\newwrite \csname#1indfile\endcsname
+    \openout \csname#1indfile\endcsname \jobname.#1
+  \fi
+  \expandafter\xdef\csname#1index\endcsname{%
+    \noexpand\docodeindex{#1}}%
+}
+
+
+% @synindex foo bar    makes index foo feed into index bar.
+% Do this instead of @defindex foo if you don't want it as a separate index.
+%
+% @syncodeindex foo bar   similar, but put all entries made for index foo
+% inside @code.
+%
+\def\synindex#1 #2 {\dosynindex\doindex{#1}{#2}}
+\def\syncodeindex#1 #2 {\dosynindex\docodeindex{#1}{#2}}
+
+% #1 is \doindex or \docodeindex, #2 the index getting redefined (foo),
+% #3 the target index (bar).
+\def\dosynindex#1#2#3{%
+  % Only do \closeout if we haven't already done it, else we'll end up
+  % closing the target index.
+  \expandafter \ifx\csname donesynindex#2\endcsname \undefined
+    % The \closeout helps reduce unnecessary open files; the limit on the
+    % Acorn RISC OS is a mere 16 files.
+    \expandafter\closeout\csname#2indfile\endcsname
+    \expandafter\let\csname\donesynindex#2\endcsname = 1
+  \fi
+  % redefine \fooindfile:
+  \expandafter\let\expandafter\temp\expandafter=\csname#3indfile\endcsname
+  \expandafter\let\csname#2indfile\endcsname=\temp
+  % redefine \fooindex:
+  \expandafter\xdef\csname#2index\endcsname{\noexpand#1{#3}}%
+}
+
+% Define \doindex, the driver for all \fooindex macros.
+% Argument #1 is generated by the calling \fooindex macro,
+%  and it is "foo", the name of the index.
+
+% \doindex just uses \parsearg; it calls \doind for the actual work.
+% This is because \doind is more useful to call from other macros.
+
+% There is also \dosubind {index}{topic}{subtopic}
+% which makes an entry in a two-level index such as the operation index.
+
+\def\doindex#1{\edef\indexname{#1}\parsearg\singleindexer}
+\def\singleindexer #1{\doind{\indexname}{#1}}
+
+% like the previous two, but they put @code around the argument.
+\def\docodeindex#1{\edef\indexname{#1}\parsearg\singlecodeindexer}
+\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}}
+
+% Take care of Texinfo commands that can appear in an index entry.
+% Since there are some commands we want to expand, and others we don't,
+% we have to laboriously prevent expansion for those that we don't.
+%
+\def\indexdummies{%
+  \escapechar = `\\     % use backslash in output files.
+  \def\@{@}% change to @@ when we switch to @ as escape char in index files.
+  \def\ {\realbackslash\space }%
+  %
+  % Need these in case \tex is in effect and \{ is a \delimiter again.
+  % But can't use \lbracecmd and \rbracecmd because texindex assumes
+  % braces and backslashes are used only as delimiters.
+  \let\{ = \mylbrace
+  \let\} = \myrbrace
+  %
+  % I don't entirely understand this, but when an index entry is
+  % generated from a macro call, the \endinput which \scanmacro inserts
+  % causes processing to be prematurely terminated.  This is,
+  % apparently, because \indexsorttmp is fully expanded, and \endinput
+  % is an expandable command.  The redefinition below makes \endinput
+  % disappear altogether for that purpose -- although logging shows that
+  % processing continues to some further point.  On the other hand, it
+  % seems \endinput does not hurt in the printed index arg, since that
+  % is still getting written without apparent harm.
+  % 
+  % Sample source (mac-idx3.tex, reported by Graham Percival to
+  % help-texinfo, 22may06):
+  % @macro funindex {WORD}
+  % @findex xyz
+  % @end macro
+  % ...
+  % @funindex commtest
+  % 
+  % The above is not enough to reproduce the bug, but it gives the flavor.
+  % 
+  % Sample whatsit resulting:
+  % .@write3{\entry{xyz}{@folio }{@code {xyz@endinput }}}
+  % 
+  % So:
+  \let\endinput = \empty
+  %
+  % Do the redefinitions.
+  \commondummies
+}
+
+% For the aux and toc files, @ is the escape character.  So we want to
+% redefine everything using @ as the escape character (instead of
+% \realbackslash, still used for index files).  When everything uses @,
+% this will be simpler.
+%
+\def\atdummies{%
+  \def\@{@@}%
+  \def\ {@ }%
+  \let\{ = \lbraceatcmd
+  \let\} = \rbraceatcmd
+  %
+  % Do the redefinitions.
+  \commondummies
+  \otherbackslash
+}
+
+% Called from \indexdummies and \atdummies.
+%
+\def\commondummies{%
+  %
+  % \definedummyword defines \#1 as \string\#1\space, thus effectively
+  % preventing its expansion.  This is used only for control% words,
+  % not control letters, because the \space would be incorrect for
+  % control characters, but is needed to separate the control word
+  % from whatever follows.
+  %
+  % For control letters, we have \definedummyletter, which omits the
+  % space.
+  %
+  % These can be used both for control words that take an argument and
+  % those that do not.  If it is followed by {arg} in the input, then
+  % that will dutifully get written to the index (or wherever).
+  %
+  \def\definedummyword  ##1{\def##1{\string##1\space}}%
+  \def\definedummyletter##1{\def##1{\string##1}}%
+  \let\definedummyaccent\definedummyletter
+  %
+  \commondummiesnofonts
+  %
+  \definedummyletter\_%
+  %
+  % Non-English letters.
+  \definedummyword\AA
+  \definedummyword\AE
+  \definedummyword\L
+  \definedummyword\OE
+  \definedummyword\O
+  \definedummyword\aa
+  \definedummyword\ae
+  \definedummyword\l
+  \definedummyword\oe
+  \definedummyword\o
+  \definedummyword\ss
+  \definedummyword\exclamdown
+  \definedummyword\questiondown
+  \definedummyword\ordf
+  \definedummyword\ordm
+  %
+  % Although these internal commands shouldn't show up, sometimes they do.
+  \definedummyword\bf
+  \definedummyword\gtr
+  \definedummyword\hat
+  \definedummyword\less
+  \definedummyword\sf
+  \definedummyword\sl
+  \definedummyword\tclose
+  \definedummyword\tt
+  %
+  \definedummyword\LaTeX
+  \definedummyword\TeX
+  %
+  % Assorted special characters.
+  \definedummyword\bullet
+  \definedummyword\comma
+  \definedummyword\copyright
+  \definedummyword\registeredsymbol
+  \definedummyword\dots
+  \definedummyword\enddots
+  \definedummyword\equiv
+  \definedummyword\error
+  \definedummyword\euro
+  \definedummyword\guillemetleft
+  \definedummyword\guillemetright
+  \definedummyword\guilsinglleft
+  \definedummyword\guilsinglright
+  \definedummyword\expansion
+  \definedummyword\minus
+  \definedummyword\pounds
+  \definedummyword\point
+  \definedummyword\print
+  \definedummyword\quotedblbase
+  \definedummyword\quotedblleft
+  \definedummyword\quotedblright
+  \definedummyword\quoteleft
+  \definedummyword\quoteright
+  \definedummyword\quotesinglbase
+  \definedummyword\result
+  \definedummyword\textdegree
+  %
+  % We want to disable all macros so that they are not expanded by \write.
+  \macrolist
+  %
+  \normalturnoffactive
+  %
+  % Handle some cases of @value -- where it does not contain any
+  % (non-fully-expandable) commands.
+  \makevalueexpandable
+}
+
+% \commondummiesnofonts: common to \commondummies and \indexnofonts.
+%
+\def\commondummiesnofonts{%
+  % Control letters and accents.
+  \definedummyletter\!%
+  \definedummyaccent\"%
+  \definedummyaccent\'%
+  \definedummyletter\*%
+  \definedummyaccent\,%
+  \definedummyletter\.%
+  \definedummyletter\/%
+  \definedummyletter\:%
+  \definedummyaccent\=%
+  \definedummyletter\?%
+  \definedummyaccent\^%
+  \definedummyaccent\`%
+  \definedummyaccent\~%
+  \definedummyword\u
+  \definedummyword\v
+  \definedummyword\H
+  \definedummyword\dotaccent
+  \definedummyword\ringaccent
+  \definedummyword\tieaccent
+  \definedummyword\ubaraccent
+  \definedummyword\udotaccent
+  \definedummyword\dotless
+  %
+  % Texinfo font commands.
+  \definedummyword\b
+  \definedummyword\i
+  \definedummyword\r
+  \definedummyword\sc
+  \definedummyword\t
+  %
+  % Commands that take arguments.
+  \definedummyword\acronym
+  \definedummyword\cite
+  \definedummyword\code
+  \definedummyword\command
+  \definedummyword\dfn
+  \definedummyword\emph
+  \definedummyword\env
+  \definedummyword\file
+  \definedummyword\kbd
+  \definedummyword\key
+  \definedummyword\math
+  \definedummyword\option
+  \definedummyword\pxref
+  \definedummyword\ref
+  \definedummyword\samp
+  \definedummyword\strong
+  \definedummyword\tie
+  \definedummyword\uref
+  \definedummyword\url
+  \definedummyword\var
+  \definedummyword\verb
+  \definedummyword\w
+  \definedummyword\xref
+}
+
+% \indexnofonts is used when outputting the strings to sort the index
+% by, and when constructing control sequence names.  It eliminates all
+% control sequences and just writes whatever the best ASCII sort string
+% would be for a given command (usually its argument).
+%
+\def\indexnofonts{%
+  % Accent commands should become @asis.
+  \def\definedummyaccent##1{\let##1\asis}%
+  % We can just ignore other control letters.
+  \def\definedummyletter##1{\let##1\empty}%
+  % Hopefully, all control words can become @asis.
+  \let\definedummyword\definedummyaccent
+  %
+  \commondummiesnofonts
+  %
+  % Don't no-op \tt, since it isn't a user-level command
+  % and is used in the definitions of the active chars like <, >, |, etc.
+  % Likewise with the other plain tex font commands.
+  %\let\tt=\asis
+  %
+  \def\ { }%
+  \def\@{@}%
+  % how to handle braces?
+  \def\_{\normalunderscore}%
+  %
+  % Non-English letters.
+  \def\AA{AA}%
+  \def\AE{AE}%
+  \def\L{L}%
+  \def\OE{OE}%
+  \def\O{O}%
+  \def\aa{aa}%
+  \def\ae{ae}%
+  \def\l{l}%
+  \def\oe{oe}%
+  \def\o{o}%
+  \def\ss{ss}%
+  \def\exclamdown{!}%
+  \def\questiondown{?}%
+  \def\ordf{a}%
+  \def\ordm{o}%
+  %
+  \def\LaTeX{LaTeX}%
+  \def\TeX{TeX}%
+  %
+  % Assorted special characters.
+  % (The following {} will end up in the sort string, but that's ok.)
+  \def\bullet{bullet}%
+  \def\comma{,}%
+  \def\copyright{copyright}%
+  \def\registeredsymbol{R}%
+  \def\dots{...}%
+  \def\enddots{...}%
+  \def\equiv{==}%
+  \def\error{error}%
+  \def\euro{euro}%
+  \def\guillemetleft{<<}%
+  \def\guillemetright{>>}%
+  \def\guilsinglleft{<}%
+  \def\guilsinglright{>}%
+  \def\expansion{==>}%
+  \def\minus{-}%
+  \def\pounds{pounds}%
+  \def\point{.}%
+  \def\print{-|}%
+  \def\quotedblbase{"}%
+  \def\quotedblleft{"}%
+  \def\quotedblright{"}%
+  \def\quoteleft{`}%
+  \def\quoteright{'}%
+  \def\quotesinglbase{,}%
+  \def\result{=>}%
+  \def\textdegree{degrees}%
+  %
+  % We need to get rid of all macros, leaving only the arguments (if present).
+  % Of course this is not nearly correct, but it is the best we can do for now.
+  % makeinfo does not expand macros in the argument to @deffn, which ends up
+  % writing an index entry, and texindex isn't prepared for an index sort entry
+  % that starts with \.
+  % 
+  % Since macro invocations are followed by braces, we can just redefine them
+  % to take a single TeX argument.  The case of a macro invocation that
+  % goes to end-of-line is not handled.
+  % 
+  \macrolist
+}
+
+\let\indexbackslash=0  %overridden during \printindex.
+\let\SETmarginindex=\relax % put index entries in margin (undocumented)?
+
+% Most index entries go through here, but \dosubind is the general case.
+% #1 is the index name, #2 is the entry text.
+\def\doind#1#2{\dosubind{#1}{#2}{}}
+
+% Workhorse for all \fooindexes.
+% #1 is name of index, #2 is stuff to put there, #3 is subentry --
+% empty if called from \doind, as we usually are (the main exception
+% is with most defuns, which call us directly).
+%
+\def\dosubind#1#2#3{%
+  \iflinks
+  {%
+    % Store the main index entry text (including the third arg).
+    \toks0 = {#2}%
+    % If third arg is present, precede it with a space.
+    \def\thirdarg{#3}%
+    \ifx\thirdarg\empty \else
+      \toks0 = \expandafter{\the\toks0 \space #3}%
+    \fi
+    %
+    \edef\writeto{\csname#1indfile\endcsname}%
+    %
+    \safewhatsit\dosubindwrite
+  }%
+  \fi
+}
+
+% Write the entry in \toks0 to the index file:
+%
+\def\dosubindwrite{%
+  % Put the index entry in the margin if desired.
+  \ifx\SETmarginindex\relax\else
+    \insert\margin{\hbox{\vrule height8pt depth3pt width0pt \the\toks0}}%
+  \fi
+  %
+  % Remember, we are within a group.
+  \indexdummies % Must do this here, since \bf, etc expand at this stage
+  \def\backslashcurfont{\indexbackslash}% \indexbackslash isn't defined now
+      % so it will be output as is; and it will print as backslash.
+  %
+  % Process the index entry with all font commands turned off, to
+  % get the string to sort by.
+  {\indexnofonts
+   \edef\temp{\the\toks0}% need full expansion
+   \xdef\indexsorttmp{\temp}%
+  }%
+  %
+  % Set up the complete index entry, with both the sort key and
+  % the original text, including any font commands.  We write
+  % three arguments to \entry to the .?? file (four in the
+  % subentry case), texindex reduces to two when writing the .??s
+  % sorted result.
+  \edef\temp{%
+    \write\writeto{%
+      \string\entry{\indexsorttmp}{\noexpand\folio}{\the\toks0}}%
+  }%
+  \temp
+}
+
+% Take care of unwanted page breaks/skips around a whatsit:
+%
+% If a skip is the last thing on the list now, preserve it
+% by backing up by \lastskip, doing the \write, then inserting
+% the skip again.  Otherwise, the whatsit generated by the
+% \write or \pdfdest will make \lastskip zero.  The result is that
+% sequences like this:
+% @end defun
+% @tindex whatever
+% @defun ...
+% will have extra space inserted, because the \medbreak in the
+% start of the @defun won't see the skip inserted by the @end of
+% the previous defun.
+%
+% But don't do any of this if we're not in vertical mode.  We
+% don't want to do a \vskip and prematurely end a paragraph.
+%
+% Avoid page breaks due to these extra skips, too.
+%
+% But wait, there is a catch there:
+% We'll have to check whether \lastskip is zero skip.  \ifdim is not
+% sufficient for this purpose, as it ignores stretch and shrink parts
+% of the skip.  The only way seems to be to check the textual
+% representation of the skip.
+%
+% The following is almost like \def\zeroskipmacro{0.0pt} except that
+% the ``p'' and ``t'' characters have catcode \other, not 11 (letter).
+%
+\edef\zeroskipmacro{\expandafter\the\csname z@skip\endcsname}
+%
+\newskip\whatsitskip
+\newcount\whatsitpenalty
+%
+% ..., ready, GO:
+%
+\def\safewhatsit#1{%
+\ifhmode
+  #1%
+\else
+  % \lastskip and \lastpenalty cannot both be nonzero simultaneously.
+  \whatsitskip = \lastskip
+  \edef\lastskipmacro{\the\lastskip}%
+  \whatsitpenalty = \lastpenalty
+  %
+  % If \lastskip is nonzero, that means the last item was a
+  % skip.  And since a skip is discardable, that means this
+  % -\whatsitskip glue we're inserting is preceded by a
+  % non-discardable item, therefore it is not a potential
+  % breakpoint, therefore no \nobreak needed.
+  \ifx\lastskipmacro\zeroskipmacro
+  \else
+    \vskip-\whatsitskip
+  \fi
+  %
+  #1%
+  %
+  \ifx\lastskipmacro\zeroskipmacro
+    % If \lastskip was zero, perhaps the last item was a penalty, and
+    % perhaps it was >=10000, e.g., a \nobreak.  In that case, we want
+    % to re-insert the same penalty (values >10000 are used for various
+    % signals); since we just inserted a non-discardable item, any
+    % following glue (such as a \parskip) would be a breakpoint.  For example:
+    % 
+    %   @deffn deffn-whatever
+    %   @vindex index-whatever
+    %   Description.
+    % would allow a break between the index-whatever whatsit
+    % and the "Description." paragraph.
+    \ifnum\whatsitpenalty>9999 \penalty\whatsitpenalty \fi
+  \else
+    % On the other hand, if we had a nonzero \lastskip,
+    % this make-up glue would be preceded by a non-discardable item
+    % (the whatsit from the \write), so we must insert a \nobreak.
+    \nobreak\vskip\whatsitskip
+  \fi
+\fi
+}
+
+% The index entry written in the file actually looks like
+%  \entry {sortstring}{page}{topic}
+% or
+%  \entry {sortstring}{page}{topic}{subtopic}
+% The texindex program reads in these files and writes files
+% containing these kinds of lines:
+%  \initial {c}
+%     before the first topic whose initial is c
+%  \entry {topic}{pagelist}
+%     for a topic that is used without subtopics
+%  \primary {topic}
+%     for the beginning of a topic that is used with subtopics
+%  \secondary {subtopic}{pagelist}
+%     for each subtopic.
+
+% Define the user-accessible indexing commands
+% @findex, @vindex, @kindex, @cindex.
+
+\def\findex {\fnindex}
+\def\kindex {\kyindex}
+\def\cindex {\cpindex}
+\def\vindex {\vrindex}
+\def\tindex {\tpindex}
+\def\pindex {\pgindex}
+
+\def\cindexsub {\begingroup\obeylines\cindexsub}
+{\obeylines %
+\gdef\cindexsub "#1" #2^^M{\endgroup %
+\dosubind{cp}{#2}{#1}}}
+
+% Define the macros used in formatting output of the sorted index material.
+
+% @printindex causes a particular index (the ??s file) to get printed.
+% It does not print any chapter heading (usually an @unnumbered).
+%
+\parseargdef\printindex{\begingroup
+  \dobreak \chapheadingskip{10000}%
+  %
+  \smallfonts \rm
+  \tolerance = 9500
+  \plainfrenchspacing
+  \everypar = {}% don't want the \kern\-parindent from indentation suppression.
+  %
+  % See if the index file exists and is nonempty.
+  % Change catcode of @ here so that if the index file contains
+  % \initial {@}
+  % as its first line, TeX doesn't complain about mismatched braces
+  % (because it thinks @} is a control sequence).
+  \catcode`\@ = 11
+  \openin 1 \jobname.#1s
+  \ifeof 1
+    % \enddoublecolumns gets confused if there is no text in the index,
+    % and it loses the chapter title and the aux file entries for the
+    % index.  The easiest way to prevent this problem is to make sure
+    % there is some text.
+    \putwordIndexNonexistent
+  \else
+    %
+    % If the index file exists but is empty, then \openin leaves \ifeof
+    % false.  We have to make TeX try to read something from the file, so
+    % it can discover if there is anything in it.
+    \read 1 to \temp
+    \ifeof 1
+      \putwordIndexIsEmpty
+    \else
+      % Index files are almost Texinfo source, but we use \ as the escape
+      % character.  It would be better to use @, but that's too big a change
+      % to make right now.
+      \def\indexbackslash{\backslashcurfont}%
+      \catcode`\\ = 0
+      \escapechar = `\\
+      \begindoublecolumns
+      \input \jobname.#1s
+      \enddoublecolumns
+    \fi
+  \fi
+  \closein 1
+\endgroup}
+
+% These macros are used by the sorted index file itself.
+% Change them to control the appearance of the index.
+
+\def\initial#1{{%
+  % Some minor font changes for the special characters.
+  \let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt
+  %
+  % Remove any glue we may have, we'll be inserting our own.
+  \removelastskip
+  %
+  % We like breaks before the index initials, so insert a bonus.
+  \nobreak
+  \vskip 0pt plus 3\baselineskip
+  \penalty 0
+  \vskip 0pt plus -3\baselineskip
+  %
+  % Typeset the initial.  Making this add up to a whole number of
+  % baselineskips increases the chance of the dots lining up from column
+  % to column.  It still won't often be perfect, because of the stretch
+  % we need before each entry, but it's better.
+  %
+  % No shrink because it confuses \balancecolumns.
+  \vskip 1.67\baselineskip plus .5\baselineskip
+  \leftline{\secbf #1}%
+  % Do our best not to break after the initial.
+  \nobreak
+  \vskip .33\baselineskip plus .1\baselineskip
+}}
+
+% \entry typesets a paragraph consisting of the text (#1), dot leaders, and
+% then page number (#2) flushed to the right margin.  It is used for index
+% and table of contents entries.  The paragraph is indented by \leftskip.
+%
+% A straightforward implementation would start like this:
+%      \def\entry#1#2{...
+% But this freezes the catcodes in the argument, and can cause problems to
+% @code, which sets - active.  This problem was fixed by a kludge---
+% ``-'' was active throughout whole index, but this isn't really right.
+%
+% The right solution is to prevent \entry from swallowing the whole text.
+%                                 --kasal, 21nov03
+\def\entry{%
+  \begingroup
+    %
+    % Start a new paragraph if necessary, so our assignments below can't
+    % affect previous text.
+    \par
+    %
+    % Do not fill out the last line with white space.
+    \parfillskip = 0in
+    %
+    % No extra space above this paragraph.
+    \parskip = 0in
+    %
+    % Do not prefer a separate line ending with a hyphen to fewer lines.
+    \finalhyphendemerits = 0
+    %
+    % \hangindent is only relevant when the entry text and page number
+    % don't both fit on one line.  In that case, bob suggests starting the
+    % dots pretty far over on the line.  Unfortunately, a large
+    % indentation looks wrong when the entry text itself is broken across
+    % lines.  So we use a small indentation and put up with long leaders.
+    %
+    % \hangafter is reset to 1 (which is the value we want) at the start
+    % of each paragraph, so we need not do anything with that.
+    \hangindent = 2em
+    %
+    % When the entry text needs to be broken, just fill out the first line
+    % with blank space.
+    \rightskip = 0pt plus1fil
+    %
+    % A bit of stretch before each entry for the benefit of balancing
+    % columns.
+    \vskip 0pt plus1pt
+    %
+    % Swallow the left brace of the text (first parameter):
+    \afterassignment\doentry
+    \let\temp =
+}
+\def\doentry{%
+    \bgroup % Instead of the swallowed brace.
+      \noindent
+      \aftergroup\finishentry
+      % And now comes the text of the entry.
+}
+\def\finishentry#1{%
+    % #1 is the page number.
+    %
+    % The following is kludged to not output a line of dots in the index if
+    % there are no page numbers.  The next person who breaks this will be
+    % cursed by a Unix daemon.
+    \setbox\boxA = \hbox{#1}%
+    \ifdim\wd\boxA = 0pt
+      \ %
+    \else
+      %
+      % If we must, put the page number on a line of its own, and fill out
+      % this line with blank space.  (The \hfil is overwhelmed with the
+      % fill leaders glue in \indexdotfill if the page number does fit.)
+      \hfil\penalty50
+      \null\nobreak\indexdotfill % Have leaders before the page number.
+      %
+      % The `\ ' here is removed by the implicit \unskip that TeX does as
+      % part of (the primitive) \par.  Without it, a spurious underfull
+      % \hbox ensues.
+      \ifpdf
+       \pdfgettoks#1.%
+       \ \the\toksA
+      \else
+       \ #1%
+      \fi
+    \fi
+    \par
+  \endgroup
+}
+
+% Like plain.tex's \dotfill, except uses up at least 1 em.
+\def\indexdotfill{\cleaders
+  \hbox{$\mathsurround=0pt \mkern1.5mu.\mkern1.5mu$}\hskip 1em plus 1fill}
+
+\def\primary #1{\line{#1\hfil}}
+
+\newskip\secondaryindent \secondaryindent=0.5cm
+\def\secondary#1#2{{%
+  \parfillskip=0in
+  \parskip=0in
+  \hangindent=1in
+  \hangafter=1
+  \noindent\hskip\secondaryindent\hbox{#1}\indexdotfill
+  \ifpdf
+    \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph.
+  \else
+    #2
+  \fi
+  \par
+}}
+
+% Define two-column mode, which we use to typeset indexes.
+% Adapted from the TeXbook, page 416, which is to say,
+% the manmac.tex format used to print the TeXbook itself.
+\catcode`\@=11
+
+\newbox\partialpage
+\newdimen\doublecolumnhsize
+
+\def\begindoublecolumns{\begingroup % ended by \enddoublecolumns
+  % Grab any single-column material above us.
+  \output = {%
+    %
+    % Here is a possibility not foreseen in manmac: if we accumulate a
+    % whole lot of material, we might end up calling this \output
+    % routine twice in a row (see the doublecol-lose test, which is
+    % essentially a couple of indexes with @setchapternewpage off).  In
+    % that case we just ship out what is in \partialpage with the normal
+    % output routine.  Generally, \partialpage will be empty when this
+    % runs and this will be a no-op.  See the indexspread.tex test case.
+    \ifvoid\partialpage \else
+      \onepageout{\pagecontents\partialpage}%
+    \fi
+    %
+    \global\setbox\partialpage = \vbox{%
+      % Unvbox the main output page.
+      \unvbox\PAGE
+      \kern-\topskip \kern\baselineskip
+    }%
+  }%
+  \eject % run that output routine to set \partialpage
+  %
+  % Use the double-column output routine for subsequent pages.
+  \output = {\doublecolumnout}%
+  %
+  % Change the page size parameters.  We could do this once outside this
+  % routine, in each of @smallbook, @afourpaper, and the default 8.5x11
+  % format, but then we repeat the same computation.  Repeating a couple
+  % of assignments once per index is clearly meaningless for the
+  % execution time, so we may as well do it in one place.
+  %
+  % First we halve the line length, less a little for the gutter between
+  % the columns.  We compute the gutter based on the line length, so it
+  % changes automatically with the paper format.  The magic constant
+  % below is chosen so that the gutter has the same value (well, +-<1pt)
+  % as it did when we hard-coded it.
+  %
+  % We put the result in a separate register, \doublecolumhsize, so we
+  % can restore it in \pagesofar, after \hsize itself has (potentially)
+  % been clobbered.
+  %
+  \doublecolumnhsize = \hsize
+    \advance\doublecolumnhsize by -.04154\hsize
+    \divide\doublecolumnhsize by 2
+  \hsize = \doublecolumnhsize
+  %
+  % Double the \vsize as well.  (We don't need a separate register here,
+  % since nobody clobbers \vsize.)
+  \vsize = 2\vsize
+}
+
+% The double-column output routine for all double-column pages except
+% the last.
+%
+\def\doublecolumnout{%
+  \splittopskip=\topskip \splitmaxdepth=\maxdepth
+  % Get the available space for the double columns -- the normal
+  % (undoubled) page height minus any material left over from the
+  % previous page.
+  \dimen@ = \vsize
+  \divide\dimen@ by 2
+  \advance\dimen@ by -\ht\partialpage
+  %
+  % box0 will be the left-hand column, box2 the right.
+  \setbox0=\vsplit255 to\dimen@ \setbox2=\vsplit255 to\dimen@
+  \onepageout\pagesofar
+  \unvbox255
+  \penalty\outputpenalty
+}
+%
+% Re-output the contents of the output page -- any previous material,
+% followed by the two boxes we just split, in box0 and box2.
+\def\pagesofar{%
+  \unvbox\partialpage
+  %
+  \hsize = \doublecolumnhsize
+  \wd0=\hsize \wd2=\hsize
+  \hbox to\pagewidth{\box0\hfil\box2}%
+}
+%
+% All done with double columns.
+\def\enddoublecolumns{%
+  % The following penalty ensures that the page builder is exercised
+  % _before_ we change the output routine.  This is necessary in the
+  % following situation:
+  %
+  % The last section of the index consists only of a single entry.
+  % Before this section, \pagetotal is less than \pagegoal, so no
+  % break occurs before the last section starts.  However, the last
+  % section, consisting of \initial and the single \entry, does not
+  % fit on the page and has to be broken off.  Without the following
+  % penalty the page builder will not be exercised until \eject
+  % below, and by that time we'll already have changed the output
+  % routine to the \balancecolumns version, so the next-to-last
+  % double-column page will be processed with \balancecolumns, which
+  % is wrong:  The two columns will go to the main vertical list, with
+  % the broken-off section in the recent contributions.  As soon as
+  % the output routine finishes, TeX starts reconsidering the page
+  % break.  The two columns and the broken-off section both fit on the
+  % page, because the two columns now take up only half of the page
+  % goal.  When TeX sees \eject from below which follows the final
+  % section, it invokes the new output routine that we've set after
+  % \balancecolumns below; \onepageout will try to fit the two columns
+  % and the final section into the vbox of \pageheight (see
+  % \pagebody), causing an overfull box.
+  %
+  % Note that glue won't work here, because glue does not exercise the
+  % page builder, unlike penalties (see The TeXbook, pp. 280-281).
+  \penalty0
+  %
+  \output = {%
+    % Split the last of the double-column material.  Leave it on the
+    % current page, no automatic page break.
+    \balancecolumns
+    %
+    % If we end up splitting too much material for the current page,
+    % though, there will be another page break right after this \output
+    % invocation ends.  Having called \balancecolumns once, we do not
+    % want to call it again.  Therefore, reset \output to its normal
+    % definition right away.  (We hope \balancecolumns will never be
+    % called on to balance too much material, but if it is, this makes
+    % the output somewhat more palatable.)
+    \global\output = {\onepageout{\pagecontents\PAGE}}%
+  }%
+  \eject
+  \endgroup % started in \begindoublecolumns
+  %
+  % \pagegoal was set to the doubled \vsize above, since we restarted
+  % the current page.  We're now back to normal single-column
+  % typesetting, so reset \pagegoal to the normal \vsize (after the
+  % \endgroup where \vsize got restored).
+  \pagegoal = \vsize
+}
+%
+% Called at the end of the double column material.
+\def\balancecolumns{%
+  \setbox0 = \vbox{\unvbox255}% like \box255 but more efficient, see p.120.
+  \dimen@ = \ht0
+  \advance\dimen@ by \topskip
+  \advance\dimen@ by-\baselineskip
+  \divide\dimen@ by 2 % target to split to
+  %debug\message{final 2-column material height=\the\ht0, target=\the\dimen@.}%
+  \splittopskip = \topskip
+  % Loop until we get a decent breakpoint.
+  {%
+    \vbadness = 10000
+    \loop
+      \global\setbox3 = \copy0
+      \global\setbox1 = \vsplit3 to \dimen@
+    \ifdim\ht3>\dimen@
+      \global\advance\dimen@ by 1pt
+    \repeat
+  }%
+  %debug\message{split to \the\dimen@, column heights: \the\ht1, \the\ht3.}%
+  \setbox0=\vbox to\dimen@{\unvbox1}%
+  \setbox2=\vbox to\dimen@{\unvbox3}%
+  %
+  \pagesofar
+}
+\catcode`\@ = \other
+
+
+\message{sectioning,}
+% Chapters, sections, etc.
+
+% \unnumberedno is an oxymoron, of course.  But we count the unnumbered
+% sections so that we can refer to them unambiguously in the pdf
+% outlines by their "section number".  We avoid collisions with chapter
+% numbers by starting them at 10000.  (If a document ever has 10000
+% chapters, we're in trouble anyway, I'm sure.)
+\newcount\unnumberedno \unnumberedno = 10000
+\newcount\chapno
+\newcount\secno        \secno=0
+\newcount\subsecno     \subsecno=0
+\newcount\subsubsecno  \subsubsecno=0
+
+% This counter is funny since it counts through charcodes of letters A, B, ...
+\newcount\appendixno  \appendixno = `\@
+%
+% \def\appendixletter{\char\the\appendixno}
+% We do the following ugly conditional instead of the above simple
+% construct for the sake of pdftex, which needs the actual
+% letter in the expansion, not just typeset.
+%
+\def\appendixletter{%
+  \ifnum\appendixno=`A A%
+  \else\ifnum\appendixno=`B B%
+  \else\ifnum\appendixno=`C C%
+  \else\ifnum\appendixno=`D D%
+  \else\ifnum\appendixno=`E E%
+  \else\ifnum\appendixno=`F F%
+  \else\ifnum\appendixno=`G G%
+  \else\ifnum\appendixno=`H H%
+  \else\ifnum\appendixno=`I I%
+  \else\ifnum\appendixno=`J J%
+  \else\ifnum\appendixno=`K K%
+  \else\ifnum\appendixno=`L L%
+  \else\ifnum\appendixno=`M M%
+  \else\ifnum\appendixno=`N N%
+  \else\ifnum\appendixno=`O O%
+  \else\ifnum\appendixno=`P P%
+  \else\ifnum\appendixno=`Q Q%
+  \else\ifnum\appendixno=`R R%
+  \else\ifnum\appendixno=`S S%
+  \else\ifnum\appendixno=`T T%
+  \else\ifnum\appendixno=`U U%
+  \else\ifnum\appendixno=`V V%
+  \else\ifnum\appendixno=`W W%
+  \else\ifnum\appendixno=`X X%
+  \else\ifnum\appendixno=`Y Y%
+  \else\ifnum\appendixno=`Z Z%
+  % The \the is necessary, despite appearances, because \appendixletter is
+  % expanded while writing the .toc file.  \char\appendixno is not
+  % expandable, thus it is written literally, thus all appendixes come out
+  % with the same letter (or @) in the toc without it.
+  \else\char\the\appendixno
+  \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi
+  \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi}
+
+% Each @chapter defines these (using marks) as the number+name, number
+% and name of the chapter.  Page headings and footings can use
+% these.  @section does likewise.
+\def\thischapter{}
+\def\thischapternum{}
+\def\thischaptername{}
+\def\thissection{}
+\def\thissectionnum{}
+\def\thissectionname{}
+
+\newcount\absseclevel % used to calculate proper heading level
+\newcount\secbase\secbase=0 % @raisesections/@lowersections modify this count
+
+% @raisesections: treat @section as chapter, @subsection as section, etc.
+\def\raisesections{\global\advance\secbase by -1}
+\let\up=\raisesections % original BFox name
+
+% @lowersections: treat @chapter as section, @section as subsection, etc.
+\def\lowersections{\global\advance\secbase by 1}
+\let\down=\lowersections % original BFox name
+
+% we only have subsub.
+\chardef\maxseclevel = 3
+%
+% A numbered section within an unnumbered changes to unnumbered too.
+% To achieve this, remember the "biggest" unnum. sec. we are currently in:
+\chardef\unmlevel = \maxseclevel
+%
+% Trace whether the current chapter is an appendix or not:
+% \chapheadtype is "N" or "A", unnumbered chapters are ignored.
+\def\chapheadtype{N}
+
+% Choose a heading macro
+% #1 is heading type
+% #2 is heading level
+% #3 is text for heading
+\def\genhead#1#2#3{%
+  % Compute the abs. sec. level:
+  \absseclevel=#2
+  \advance\absseclevel by \secbase
+  % Make sure \absseclevel doesn't fall outside the range:
+  \ifnum \absseclevel < 0
+    \absseclevel = 0
+  \else
+    \ifnum \absseclevel > 3
+      \absseclevel = 3
+    \fi
+  \fi
+  % The heading type:
+  \def\headtype{#1}%
+  \if \headtype U%
+    \ifnum \absseclevel < \unmlevel
+      \chardef\unmlevel = \absseclevel
+    \fi
+  \else
+    % Check for appendix sections:
+    \ifnum \absseclevel = 0
+      \edef\chapheadtype{\headtype}%
+    \else
+      \if \headtype A\if \chapheadtype N%
+       \errmessage{@appendix... within a non-appendix chapter}%
+      \fi\fi
+    \fi
+    % Check for numbered within unnumbered:
+    \ifnum \absseclevel > \unmlevel
+      \def\headtype{U}%
+    \else
+      \chardef\unmlevel = 3
+    \fi
+  \fi
+  % Now print the heading:
+  \if \headtype U%
+    \ifcase\absseclevel
+       \unnumberedzzz{#3}%
+    \or \unnumberedseczzz{#3}%
+    \or \unnumberedsubseczzz{#3}%
+    \or \unnumberedsubsubseczzz{#3}%
+    \fi
+  \else
+    \if \headtype A%
+      \ifcase\absseclevel
+         \appendixzzz{#3}%
+      \or \appendixsectionzzz{#3}%
+      \or \appendixsubseczzz{#3}%
+      \or \appendixsubsubseczzz{#3}%
+      \fi
+    \else
+      \ifcase\absseclevel
+         \chapterzzz{#3}%
+      \or \seczzz{#3}%
+      \or \numberedsubseczzz{#3}%
+      \or \numberedsubsubseczzz{#3}%
+      \fi
+    \fi
+  \fi
+  \suppressfirstparagraphindent
+}
+
+% an interface:
+\def\numhead{\genhead N}
+\def\apphead{\genhead A}
+\def\unnmhead{\genhead U}
+
+% @chapter, @appendix, @unnumbered.  Increment top-level counter, reset
+% all lower-level sectioning counters to zero.
+%
+% Also set \chaplevelprefix, which we prepend to @float sequence numbers
+% (e.g., figures), q.v.  By default (before any chapter), that is empty.
+\let\chaplevelprefix = \empty
+%
+\outer\parseargdef\chapter{\numhead0{#1}} % normally numhead0 calls chapterzzz
+\def\chapterzzz#1{%
+  % section resetting is \global in case the chapter is in a group, such
+  % as an @include file.
+  \global\secno=0 \global\subsecno=0 \global\subsubsecno=0
+    \global\advance\chapno by 1
+  %
+  % Used for \float.
+  \gdef\chaplevelprefix{\the\chapno.}%
+  \resetallfloatnos
+  %
+  \message{\putwordChapter\space \the\chapno}%
+  %
+  % Write the actual heading.
+  \chapmacro{#1}{Ynumbered}{\the\chapno}%
+  %
+  % So @section and the like are numbered underneath this chapter.
+  \global\let\section = \numberedsec
+  \global\let\subsection = \numberedsubsec
+  \global\let\subsubsection = \numberedsubsubsec
+}
+
+\outer\parseargdef\appendix{\apphead0{#1}} % normally apphead0 calls appendixzzz
+\def\appendixzzz#1{%
+  \global\secno=0 \global\subsecno=0 \global\subsubsecno=0
+    \global\advance\appendixno by 1
+  \gdef\chaplevelprefix{\appendixletter.}%
+  \resetallfloatnos
+  %
+  \def\appendixnum{\putwordAppendix\space \appendixletter}%
+  \message{\appendixnum}%
+  %
+  \chapmacro{#1}{Yappendix}{\appendixletter}%
+  %
+  \global\let\section = \appendixsec
+  \global\let\subsection = \appendixsubsec
+  \global\let\subsubsection = \appendixsubsubsec
+}
+
+\outer\parseargdef\unnumbered{\unnmhead0{#1}} % normally unnmhead0 calls unnumberedzzz
+\def\unnumberedzzz#1{%
+  \global\secno=0 \global\subsecno=0 \global\subsubsecno=0
+    \global\advance\unnumberedno by 1
+  %
+  % Since an unnumbered has no number, no prefix for figures.
+  \global\let\chaplevelprefix = \empty
+  \resetallfloatnos
+  %
+  % This used to be simply \message{#1}, but TeX fully expands the
+  % argument to \message.  Therefore, if #1 contained @-commands, TeX
+  % expanded them.  For example, in `@unnumbered The @cite{Book}', TeX
+  % expanded @cite (which turns out to cause errors because \cite is meant
+  % to be executed, not expanded).
+  %
+  % Anyway, we don't want the fully-expanded definition of @cite to appear
+  % as a result of the \message, we just want `@cite' itself.  We use
+  % \the<toks register> to achieve this: TeX expands \the<toks> only once,
+  % simply yielding the contents of <toks register>.  (We also do this for
+  % the toc entries.)
+  \toks0 = {#1}%
+  \message{(\the\toks0)}%
+  %
+  \chapmacro{#1}{Ynothing}{\the\unnumberedno}%
+  %
+  \global\let\section = \unnumberedsec
+  \global\let\subsection = \unnumberedsubsec
+  \global\let\subsubsection = \unnumberedsubsubsec
+}
+
+% @centerchap is like @unnumbered, but the heading is centered.
+\outer\parseargdef\centerchap{%
+  % Well, we could do the following in a group, but that would break
+  % an assumption that \chapmacro is called at the outermost level.
+  % Thus we are safer this way:                --kasal, 24feb04
+  \let\centerparametersmaybe = \centerparameters
+  \unnmhead0{#1}%
+  \let\centerparametersmaybe = \relax
+}
+
+% @top is like @unnumbered.
+\let\top\unnumbered
+
+% Sections.
+\outer\parseargdef\numberedsec{\numhead1{#1}} % normally calls seczzz
+\def\seczzz#1{%
+  \global\subsecno=0 \global\subsubsecno=0  \global\advance\secno by 1
+  \sectionheading{#1}{sec}{Ynumbered}{\the\chapno.\the\secno}%
+}
+
+\outer\parseargdef\appendixsection{\apphead1{#1}} % normally calls appendixsectionzzz
+\def\appendixsectionzzz#1{%
+  \global\subsecno=0 \global\subsubsecno=0  \global\advance\secno by 1
+  \sectionheading{#1}{sec}{Yappendix}{\appendixletter.\the\secno}%
+}
+\let\appendixsec\appendixsection
+
+\outer\parseargdef\unnumberedsec{\unnmhead1{#1}} % normally calls unnumberedseczzz
+\def\unnumberedseczzz#1{%
+  \global\subsecno=0 \global\subsubsecno=0  \global\advance\secno by 1
+  \sectionheading{#1}{sec}{Ynothing}{\the\unnumberedno.\the\secno}%
+}
+
+% Subsections.
+\outer\parseargdef\numberedsubsec{\numhead2{#1}} % normally calls numberedsubseczzz
+\def\numberedsubseczzz#1{%
+  \global\subsubsecno=0  \global\advance\subsecno by 1
+  \sectionheading{#1}{subsec}{Ynumbered}{\the\chapno.\the\secno.\the\subsecno}%
+}
+
+\outer\parseargdef\appendixsubsec{\apphead2{#1}} % normally calls appendixsubseczzz
+\def\appendixsubseczzz#1{%
+  \global\subsubsecno=0  \global\advance\subsecno by 1
+  \sectionheading{#1}{subsec}{Yappendix}%
+                 {\appendixletter.\the\secno.\the\subsecno}%
+}
+
+\outer\parseargdef\unnumberedsubsec{\unnmhead2{#1}} %normally calls unnumberedsubseczzz
+\def\unnumberedsubseczzz#1{%
+  \global\subsubsecno=0  \global\advance\subsecno by 1
+  \sectionheading{#1}{subsec}{Ynothing}%
+                 {\the\unnumberedno.\the\secno.\the\subsecno}%
+}
+
+% Subsubsections.
+\outer\parseargdef\numberedsubsubsec{\numhead3{#1}} % normally numberedsubsubseczzz
+\def\numberedsubsubseczzz#1{%
+  \global\advance\subsubsecno by 1
+  \sectionheading{#1}{subsubsec}{Ynumbered}%
+                 {\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno}%
+}
+
+\outer\parseargdef\appendixsubsubsec{\apphead3{#1}} % normally appendixsubsubseczzz
+\def\appendixsubsubseczzz#1{%
+  \global\advance\subsubsecno by 1
+  \sectionheading{#1}{subsubsec}{Yappendix}%
+                 {\appendixletter.\the\secno.\the\subsecno.\the\subsubsecno}%
+}
+
+\outer\parseargdef\unnumberedsubsubsec{\unnmhead3{#1}} %normally unnumberedsubsubseczzz
+\def\unnumberedsubsubseczzz#1{%
+  \global\advance\subsubsecno by 1
+  \sectionheading{#1}{subsubsec}{Ynothing}%
+                 {\the\unnumberedno.\the\secno.\the\subsecno.\the\subsubsecno}%
+}
+
+% These macros control what the section commands do, according
+% to what kind of chapter we are in (ordinary, appendix, or unnumbered).
+% Define them by default for a numbered chapter.
+\let\section = \numberedsec
+\let\subsection = \numberedsubsec
+\let\subsubsection = \numberedsubsubsec
+
+% Define @majorheading, @heading and @subheading
+
+% NOTE on use of \vbox for chapter headings, section headings, and such:
+%       1) We use \vbox rather than the earlier \line to permit
+%          overlong headings to fold.
+%       2) \hyphenpenalty is set to 10000 because hyphenation in a
+%          heading is obnoxious; this forbids it.
+%       3) Likewise, headings look best if no \parindent is used, and
+%          if justification is not attempted.  Hence \raggedright.
+
+
+\def\majorheading{%
+  {\advance\chapheadingskip by 10pt \chapbreak }%
+  \parsearg\chapheadingzzz
+}
+
+\def\chapheading{\chapbreak \parsearg\chapheadingzzz}
+\def\chapheadingzzz#1{%
+  {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000
+                    \parindent=0pt\raggedright
+                    \rm #1\hfill}}%
+  \bigskip \par\penalty 200\relax
+  \suppressfirstparagraphindent
+}
+
+% @heading, @subheading, @subsubheading.
+\parseargdef\heading{\sectionheading{#1}{sec}{Yomitfromtoc}{}
+  \suppressfirstparagraphindent}
+\parseargdef\subheading{\sectionheading{#1}{subsec}{Yomitfromtoc}{}
+  \suppressfirstparagraphindent}
+\parseargdef\subsubheading{\sectionheading{#1}{subsubsec}{Yomitfromtoc}{}
+  \suppressfirstparagraphindent}
+
+% These macros generate a chapter, section, etc. heading only
+% (including whitespace, linebreaking, etc. around it),
+% given all the information in convenient, parsed form.
+
+%%% Args are the skip and penalty (usually negative)
+\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi}
+
+%%% Define plain chapter starts, and page on/off switching for it
+% Parameter controlling skip before chapter headings (if needed)
+
+\newskip\chapheadingskip
+
+\def\chapbreak{\dobreak \chapheadingskip {-4000}}
+\def\chappager{\par\vfill\supereject}
+% Because \domark is called before \chapoddpage, the filler page will
+% get the headings for the next chapter, which is wrong.  But we don't
+% care -- we just disable all headings on the filler page.
+\def\chapoddpage{%
+  \chappager
+  \ifodd\pageno \else
+    \begingroup
+      \evenheadline={\hfil}\evenfootline={\hfil}%
+      \oddheadline={\hfil}\oddfootline={\hfil}%
+      \hbox to 0pt{}%
+      \chappager
+    \endgroup
+  \fi
+}
+
+\def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname}
+
+\def\CHAPPAGoff{%
+\global\let\contentsalignmacro = \chappager
+\global\let\pchapsepmacro=\chapbreak
+\global\let\pagealignmacro=\chappager}
+
+\def\CHAPPAGon{%
+\global\let\contentsalignmacro = \chappager
+\global\let\pchapsepmacro=\chappager
+\global\let\pagealignmacro=\chappager
+\global\def\HEADINGSon{\HEADINGSsingle}}
+
+\def\CHAPPAGodd{%
+\global\let\contentsalignmacro = \chapoddpage
+\global\let\pchapsepmacro=\chapoddpage
+\global\let\pagealignmacro=\chapoddpage
+\global\def\HEADINGSon{\HEADINGSdouble}}
+
+\CHAPPAGon
+
+% Chapter opening.
+%
+% #1 is the text, #2 is the section type (Ynumbered, Ynothing,
+% Yappendix, Yomitfromtoc), #3 the chapter number.
+%
+% To test against our argument.
+\def\Ynothingkeyword{Ynothing}
+\def\Yomitfromtockeyword{Yomitfromtoc}
+\def\Yappendixkeyword{Yappendix}
+%
+\def\chapmacro#1#2#3{%
+  % Insert the first mark before the heading break (see notes for \domark).
+  \let\prevchapterdefs=\lastchapterdefs
+  \let\prevsectiondefs=\lastsectiondefs
+  \gdef\lastsectiondefs{\gdef\thissectionname{}\gdef\thissectionnum{}%
+                        \gdef\thissection{}}%
+  %
+  \def\temptype{#2}%
+  \ifx\temptype\Ynothingkeyword
+    \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}%
+                          \gdef\thischapter{\thischaptername}}%
+  \else\ifx\temptype\Yomitfromtockeyword
+    \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}%
+                          \gdef\thischapter{}}%
+  \else\ifx\temptype\Yappendixkeyword
+    \toks0={#1}%
+    \xdef\lastchapterdefs{%
+      \gdef\noexpand\thischaptername{\the\toks0}%
+      \gdef\noexpand\thischapternum{\appendixletter}%
+      \gdef\noexpand\thischapter{\putwordAppendix{} \noexpand\thischapternum:
+                                 \noexpand\thischaptername}%
+    }%
+  \else
+    \toks0={#1}%
+    \xdef\lastchapterdefs{%
+      \gdef\noexpand\thischaptername{\the\toks0}%
+      \gdef\noexpand\thischapternum{\the\chapno}%
+      \gdef\noexpand\thischapter{\putwordChapter{} \noexpand\thischapternum:
+                                 \noexpand\thischaptername}%
+    }%
+  \fi\fi\fi
+  %
+  % Output the mark.  Pass it through \safewhatsit, to take care of
+  % the preceding space.
+  \safewhatsit\domark
+  %
+  % Insert the chapter heading break.
+  \pchapsepmacro
+  %
+  % Now the second mark, after the heading break.  No break points
+  % between here and the heading.
+  \let\prevchapterdefs=\lastchapterdefs
+  \let\prevsectiondefs=\lastsectiondefs
+  \domark
+  %
+  {%
+    \chapfonts \rm
+    %
+    % Have to define \lastsection before calling \donoderef, because the
+    % xref code eventually uses it.  On the other hand, it has to be called
+    % after \pchapsepmacro, or the headline will change too soon.
+    \gdef\lastsection{#1}%
+    %
+    % Only insert the separating space if we have a chapter/appendix
+    % number, and don't print the unnumbered ``number''.
+    \ifx\temptype\Ynothingkeyword
+      \setbox0 = \hbox{}%
+      \def\toctype{unnchap}%
+    \else\ifx\temptype\Yomitfromtockeyword
+      \setbox0 = \hbox{}% contents like unnumbered, but no toc entry
+      \def\toctype{omit}%
+    \else\ifx\temptype\Yappendixkeyword
+      \setbox0 = \hbox{\putwordAppendix{} #3\enspace}%
+      \def\toctype{app}%
+    \else
+      \setbox0 = \hbox{#3\enspace}%
+      \def\toctype{numchap}%
+    \fi\fi\fi
+    %
+    % Write the toc entry for this chapter.  Must come before the
+    % \donoderef, because we include the current node name in the toc
+    % entry, and \donoderef resets it to empty.
+    \writetocentry{\toctype}{#1}{#3}%
+    %
+    % For pdftex, we have to write out the node definition (aka, make
+    % the pdfdest) after any page break, but before the actual text has
+    % been typeset.  If the destination for the pdf outline is after the
+    % text, then jumping from the outline may wind up with the text not
+    % being visible, for instance under high magnification.
+    \donoderef{#2}%
+    %
+    % Typeset the actual heading.
+    \nobreak % Avoid page breaks at the interline glue.
+    \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \raggedright
+          \hangindent=\wd0 \centerparametersmaybe
+          \unhbox0 #1\par}%
+  }%
+  \nobreak\bigskip % no page break after a chapter title
+  \nobreak
+}
+
+% @centerchap -- centered and unnumbered.
+\let\centerparametersmaybe = \relax
+\def\centerparameters{%
+  \advance\rightskip by 3\rightskip
+  \leftskip = \rightskip
+  \parfillskip = 0pt
+}
+
+
+% I don't think this chapter style is supported any more, so I'm not
+% updating it with the new noderef stuff.  We'll see.  --karl, 11aug03.
+%
+\def\setchapterstyle #1 {\csname CHAPF#1\endcsname}
+%
+\def\unnchfopen #1{%
+\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000
+                       \parindent=0pt\raggedright
+                       \rm #1\hfill}}\bigskip \par\nobreak
+}
+\def\chfopen #1#2{\chapoddpage {\chapfonts
+\vbox to 3in{\vfil \hbox to\hsize{\hfil #2} \hbox to\hsize{\hfil #1} \vfil}}%
+\par\penalty 5000 %
+}
+\def\centerchfopen #1{%
+\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000
+                       \parindent=0pt
+                       \hfill {\rm #1}\hfill}}\bigskip \par\nobreak
+}
+\def\CHAPFopen{%
+  \global\let\chapmacro=\chfopen
+  \global\let\centerchapmacro=\centerchfopen}
+
+
+% Section titles.  These macros combine the section number parts and
+% call the generic \sectionheading to do the printing.
+%
+\newskip\secheadingskip
+\def\secheadingbreak{\dobreak \secheadingskip{-1000}}
+
+% Subsection titles.
+\newskip\subsecheadingskip
+\def\subsecheadingbreak{\dobreak \subsecheadingskip{-500}}
+
+% Subsubsection titles.
+\def\subsubsecheadingskip{\subsecheadingskip}
+\def\subsubsecheadingbreak{\subsecheadingbreak}
+
+
+% Print any size, any type, section title.
+%
+% #1 is the text, #2 is the section level (sec/subsec/subsubsec), #3 is
+% the section type for xrefs (Ynumbered, Ynothing, Yappendix), #4 is the
+% section number.
+%
+\def\seckeyword{sec}
+%
+\def\sectionheading#1#2#3#4{%
+  {%
+    % Switch to the right set of fonts.
+    \csname #2fonts\endcsname \rm
+    %
+    \def\sectionlevel{#2}%
+    \def\temptype{#3}%
+    %
+    % Insert first mark before the heading break (see notes for \domark).
+    \let\prevsectiondefs=\lastsectiondefs
+    \ifx\temptype\Ynothingkeyword
+      \ifx\sectionlevel\seckeyword
+        \gdef\lastsectiondefs{\gdef\thissectionname{#1}\gdef\thissectionnum{}%
+                              \gdef\thissection{\thissectionname}}%
+      \fi
+    \else\ifx\temptype\Yomitfromtockeyword
+      % Don't redefine \thissection.
+    \else\ifx\temptype\Yappendixkeyword
+      \ifx\sectionlevel\seckeyword
+        \toks0={#1}%
+        \xdef\lastsectiondefs{%
+          \gdef\noexpand\thissectionname{\the\toks0}%
+          \gdef\noexpand\thissectionnum{#4}%
+          \gdef\noexpand\thissection{\putwordSection{} \noexpand\thissectionnum:
+                                     \noexpand\thissectionname}%
+        }%
+      \fi
+    \else
+      \ifx\sectionlevel\seckeyword
+        \toks0={#1}%
+        \xdef\lastsectiondefs{%
+          \gdef\noexpand\thissectionname{\the\toks0}%
+          \gdef\noexpand\thissectionnum{#4}%
+          \gdef\noexpand\thissection{\putwordSection{} \noexpand\thissectionnum:
+                                     \noexpand\thissectionname}%
+        }%
+      \fi
+    \fi\fi\fi
+    %
+    % Output the mark.  Pass it through \safewhatsit, to take care of
+    % the preceding space.
+    \safewhatsit\domark
+    %
+    % Insert space above the heading.
+    \csname #2headingbreak\endcsname
+    %
+    % Now the second mark, after the heading break.  No break points
+    % between here and the heading.
+    \let\prevsectiondefs=\lastsectiondefs
+    \domark
+    %
+    % Only insert the space after the number if we have a section number.
+    \ifx\temptype\Ynothingkeyword
+      \setbox0 = \hbox{}%
+      \def\toctype{unn}%
+      \gdef\lastsection{#1}%
+    \else\ifx\temptype\Yomitfromtockeyword
+      % for @headings -- no section number, don't include in toc,
+      % and don't redefine \lastsection.
+      \setbox0 = \hbox{}%
+      \def\toctype{omit}%
+      \let\sectionlevel=\empty
+    \else\ifx\temptype\Yappendixkeyword
+      \setbox0 = \hbox{#4\enspace}%
+      \def\toctype{app}%
+      \gdef\lastsection{#1}%
+    \else
+      \setbox0 = \hbox{#4\enspace}%
+      \def\toctype{num}%
+      \gdef\lastsection{#1}%
+    \fi\fi\fi
+    %
+    % Write the toc entry (before \donoderef).  See comments in \chapmacro.
+    \writetocentry{\toctype\sectionlevel}{#1}{#4}%
+    %
+    % Write the node reference (= pdf destination for pdftex).
+    % Again, see comments in \chapmacro.
+    \donoderef{#3}%
+    %
+    % Interline glue will be inserted when the vbox is completed.
+    % That glue will be a valid breakpoint for the page, since it'll be
+    % preceded by a whatsit (usually from the \donoderef, or from the
+    % \writetocentry if there was no node).  We don't want to allow that
+    % break, since then the whatsits could end up on page n while the
+    % section is on page n+1, thus toc/etc. are wrong.  Debian bug 276000.
+    \nobreak
+    %
+    % Output the actual section heading.
+    \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \raggedright
+          \hangindent=\wd0  % zero if no section number
+          \unhbox0 #1}%
+  }%
+  % Add extra space after the heading -- half of whatever came above it.
+  % Don't allow stretch, though.
+  \kern .5 \csname #2headingskip\endcsname
+  %
+  % Do not let the kern be a potential breakpoint, as it would be if it
+  % was followed by glue.
+  \nobreak
+  %
+  % We'll almost certainly start a paragraph next, so don't let that
+  % glue accumulate.  (Not a breakpoint because it's preceded by a
+  % discardable item.)
+  \vskip-\parskip
+  % 
+  % This is purely so the last item on the list is a known \penalty >
+  % 10000.  This is so \startdefun can avoid allowing breakpoints after
+  % section headings.  Otherwise, it would insert a valid breakpoint between:
+  % 
+  %   @section sec-whatever
+  %   @deffn def-whatever
+  \penalty 10001
+}
+
+
+\message{toc,}
+% Table of contents.
+\newwrite\tocfile
+
+% Write an entry to the toc file, opening it if necessary.
+% Called from @chapter, etc.
+%
+% Example usage: \writetocentry{sec}{Section Name}{\the\chapno.\the\secno}
+% We append the current node name (if any) and page number as additional
+% arguments for the \{chap,sec,...}entry macros which will eventually
+% read this.  The node name is used in the pdf outlines as the
+% destination to jump to.
+%
+% We open the .toc file for writing here instead of at @setfilename (or
+% any other fixed time) so that @contents can be anywhere in the document.
+% But if #1 is `omit', then we don't do anything.  This is used for the
+% table of contents chapter openings themselves.
+%
+\newif\iftocfileopened
+\def\omitkeyword{omit}%
+%
+\def\writetocentry#1#2#3{%
+  \edef\writetoctype{#1}%
+  \ifx\writetoctype\omitkeyword \else
+    \iftocfileopened\else
+      \immediate\openout\tocfile = \jobname.toc
+      \global\tocfileopenedtrue
+    \fi
+    %
+    \iflinks
+      {\atdummies
+       \edef\temp{%
+         \write\tocfile{@#1entry{#2}{#3}{\lastnode}{\noexpand\folio}}}%
+       \temp
+      }%
+    \fi
+  \fi
+  %
+  % Tell \shipout to create a pdf destination on each page, if we're
+  % writing pdf.  These are used in the table of contents.  We can't
+  % just write one on every page because the title pages are numbered
+  % 1 and 2 (the page numbers aren't printed), and so are the first
+  % two pages of the document.  Thus, we'd have two destinations named
+  % `1', and two named `2'.
+  \ifpdf \global\pdfmakepagedesttrue \fi
+}
+
+
+% These characters do not print properly in the Computer Modern roman
+% fonts, so we must take special care.  This is more or less redundant
+% with the Texinfo input format setup at the end of this file.
+% 
+\def\activecatcodes{%
+  \catcode`\"=\active
+  \catcode`\$=\active
+  \catcode`\<=\active
+  \catcode`\>=\active
+  \catcode`\\=\active
+  \catcode`\^=\active
+  \catcode`\_=\active
+  \catcode`\|=\active
+  \catcode`\~=\active
+}
+
+
+% Read the toc file, which is essentially Texinfo input.
+\def\readtocfile{%
+  \setupdatafile
+  \activecatcodes
+  \input \tocreadfilename
+}
+
+\newskip\contentsrightmargin \contentsrightmargin=1in
+\newcount\savepageno
+\newcount\lastnegativepageno \lastnegativepageno = -1
+
+% Prepare to read what we've written to \tocfile.
+%
+\def\startcontents#1{%
+  % If @setchapternewpage on, and @headings double, the contents should
+  % start on an odd page, unlike chapters.  Thus, we maintain
+  % \contentsalignmacro in parallel with \pagealignmacro.
+  % From: Torbjorn Granlund <tege@matematik.su.se>
+  \contentsalignmacro
+  \immediate\closeout\tocfile
+  %
+  % Don't need to put `Contents' or `Short Contents' in the headline.
+  % It is abundantly clear what they are.
+  \chapmacro{#1}{Yomitfromtoc}{}%
+  %
+  \savepageno = \pageno
+  \begingroup                  % Set up to handle contents files properly.
+    \raggedbottom              % Worry more about breakpoints than the bottom.
+    \advance\hsize by -\contentsrightmargin % Don't use the full line length.
+    %
+    % Roman numerals for page numbers.
+    \ifnum \pageno>0 \global\pageno = \lastnegativepageno \fi
+}
+
+% redefined for the two-volume lispref.  We always output on
+% \jobname.toc even if this is redefined.
+% 
+\def\tocreadfilename{\jobname.toc}
+
+% Normal (long) toc.
+%
+\def\contents{%
+  \startcontents{\putwordTOC}%
+    \openin 1 \tocreadfilename\space
+    \ifeof 1 \else
+      \readtocfile
+    \fi
+    \vfill \eject
+    \contentsalignmacro % in case @setchapternewpage odd is in effect
+    \ifeof 1 \else
+      \pdfmakeoutlines
+    \fi
+    \closein 1
+  \endgroup
+  \lastnegativepageno = \pageno
+  \global\pageno = \savepageno
+}
+
+% And just the chapters.
+\def\summarycontents{%
+  \startcontents{\putwordShortTOC}%
+    %
+    \let\numchapentry = \shortchapentry
+    \let\appentry = \shortchapentry
+    \let\unnchapentry = \shortunnchapentry
+    % We want a true roman here for the page numbers.
+    \secfonts
+    \let\rm=\shortcontrm \let\bf=\shortcontbf
+    \let\sl=\shortcontsl \let\tt=\shortconttt
+    \rm
+    \hyphenpenalty = 10000
+    \advance\baselineskip by 1pt % Open it up a little.
+    \def\numsecentry##1##2##3##4{}
+    \let\appsecentry = \numsecentry
+    \let\unnsecentry = \numsecentry
+    \let\numsubsecentry = \numsecentry
+    \let\appsubsecentry = \numsecentry
+    \let\unnsubsecentry = \numsecentry
+    \let\numsubsubsecentry = \numsecentry
+    \let\appsubsubsecentry = \numsecentry
+    \let\unnsubsubsecentry = \numsecentry
+    \openin 1 \tocreadfilename\space
+    \ifeof 1 \else
+      \readtocfile
+    \fi
+    \closein 1
+    \vfill \eject
+    \contentsalignmacro % in case @setchapternewpage odd is in effect
+  \endgroup
+  \lastnegativepageno = \pageno
+  \global\pageno = \savepageno
+}
+\let\shortcontents = \summarycontents
+
+% Typeset the label for a chapter or appendix for the short contents.
+% The arg is, e.g., `A' for an appendix, or `3' for a chapter.
+%
+\def\shortchaplabel#1{%
+  % This space should be enough, since a single number is .5em, and the
+  % widest letter (M) is 1em, at least in the Computer Modern fonts.
+  % But use \hss just in case.
+  % (This space doesn't include the extra space that gets added after
+  % the label; that gets put in by \shortchapentry above.)
+  %
+  % We'd like to right-justify chapter numbers, but that looks strange
+  % with appendix letters.  And right-justifying numbers and
+  % left-justifying letters looks strange when there is less than 10
+  % chapters.  Have to read the whole toc once to know how many chapters
+  % there are before deciding ...
+  \hbox to 1em{#1\hss}%
+}
+
+% These macros generate individual entries in the table of contents.
+% The first argument is the chapter or section name.
+% The last argument is the page number.
+% The arguments in between are the chapter number, section number, ...
+
+% Chapters, in the main contents.
+\def\numchapentry#1#2#3#4{\dochapentry{#2\labelspace#1}{#4}}
+%
+% Chapters, in the short toc.
+% See comments in \dochapentry re vbox and related settings.
+\def\shortchapentry#1#2#3#4{%
+  \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno\bgroup#4\egroup}%
+}
+
+% Appendices, in the main contents.
+% Need the word Appendix, and a fixed-size box.
+%
+\def\appendixbox#1{%
+  % We use M since it's probably the widest letter.
+  \setbox0 = \hbox{\putwordAppendix{} M}%
+  \hbox to \wd0{\putwordAppendix{} #1\hss}}
+%
+\def\appentry#1#2#3#4{\dochapentry{\appendixbox{#2}\labelspace#1}{#4}}
+
+% Unnumbered chapters.
+\def\unnchapentry#1#2#3#4{\dochapentry{#1}{#4}}
+\def\shortunnchapentry#1#2#3#4{\tocentry{#1}{\doshortpageno\bgroup#4\egroup}}
+
+% Sections.
+\def\numsecentry#1#2#3#4{\dosecentry{#2\labelspace#1}{#4}}
+\let\appsecentry=\numsecentry
+\def\unnsecentry#1#2#3#4{\dosecentry{#1}{#4}}
+
+% Subsections.
+\def\numsubsecentry#1#2#3#4{\dosubsecentry{#2\labelspace#1}{#4}}
+\let\appsubsecentry=\numsubsecentry
+\def\unnsubsecentry#1#2#3#4{\dosubsecentry{#1}{#4}}
+
+% And subsubsections.
+\def\numsubsubsecentry#1#2#3#4{\dosubsubsecentry{#2\labelspace#1}{#4}}
+\let\appsubsubsecentry=\numsubsubsecentry
+\def\unnsubsubsecentry#1#2#3#4{\dosubsubsecentry{#1}{#4}}
+
+% This parameter controls the indentation of the various levels.
+% Same as \defaultparindent.
+\newdimen\tocindent \tocindent = 15pt
+
+% Now for the actual typesetting. In all these, #1 is the text and #2 is the
+% page number.
+%
+% If the toc has to be broken over pages, we want it to be at chapters
+% if at all possible; hence the \penalty.
+\def\dochapentry#1#2{%
+   \penalty-300 \vskip1\baselineskip plus.33\baselineskip minus.25\baselineskip
+   \begingroup
+     \chapentryfonts
+     \tocentry{#1}{\dopageno\bgroup#2\egroup}%
+   \endgroup
+   \nobreak\vskip .25\baselineskip plus.1\baselineskip
+}
+
+\def\dosecentry#1#2{\begingroup
+  \secentryfonts \leftskip=\tocindent
+  \tocentry{#1}{\dopageno\bgroup#2\egroup}%
+\endgroup}
+
+\def\dosubsecentry#1#2{\begingroup
+  \subsecentryfonts \leftskip=2\tocindent
+  \tocentry{#1}{\dopageno\bgroup#2\egroup}%
+\endgroup}
+
+\def\dosubsubsecentry#1#2{\begingroup
+  \subsubsecentryfonts \leftskip=3\tocindent
+  \tocentry{#1}{\dopageno\bgroup#2\egroup}%
+\endgroup}
+
+% We use the same \entry macro as for the index entries.
+\let\tocentry = \entry
+
+% Space between chapter (or whatever) number and the title.
+\def\labelspace{\hskip1em \relax}
+
+\def\dopageno#1{{\rm #1}}
+\def\doshortpageno#1{{\rm #1}}
+
+\def\chapentryfonts{\secfonts \rm}
+\def\secentryfonts{\textfonts}
+\def\subsecentryfonts{\textfonts}
+\def\subsubsecentryfonts{\textfonts}
+
+
+\message{environments,}
+% @foo ... @end foo.
+
+% @point{}, @result{}, @expansion{}, @print{}, @equiv{}.
+%
+% Since these characters are used in examples, they should be an even number of
+% \tt widths. Each \tt character is 1en, so two makes it 1em.
+%
+\def\point{$\star$}
+\def\arrow{\leavevmode\raise.05ex\hbox to 1em{\hfil$\rightarrow$\hfil}}
+\def\result{\leavevmode\raise.05ex\hbox to 1em{\hfil$\Rightarrow$\hfil}}
+\def\expansion{\leavevmode\hbox to 1em{\hfil$\mapsto$\hfil}}
+\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}}
+\def\equiv{\leavevmode\hbox to 1em{\hfil$\ptexequiv$\hfil}}
+
+% The @error{} command.
+% Adapted from the TeXbook's \boxit.
+%
+\newbox\errorbox
+%
+{\tentt \global\dimen0 = 3em}% Width of the box.
+\dimen2 = .55pt % Thickness of rules
+% The text. (`r' is open on the right, `e' somewhat less so on the left.)
+\setbox0 = \hbox{\kern-.75pt \reducedsf error\kern-1.5pt}
+%
+\setbox\errorbox=\hbox to \dimen0{\hfil
+   \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right.
+   \advance\hsize by -2\dimen2 % Rules.
+   \vbox{%
+      \hrule height\dimen2
+      \hbox{\vrule width\dimen2 \kern3pt          % Space to left of text.
+         \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below.
+         \kern3pt\vrule width\dimen2}% Space to right.
+      \hrule height\dimen2}
+    \hfil}
+%
+\def\error{\leavevmode\lower.7ex\copy\errorbox}
+
+% @tex ... @end tex    escapes into raw Tex temporarily.
+% One exception: @ is still an escape character, so that @end tex works.
+% But \@ or @@ will get a plain tex @ character.
+
+\envdef\tex{%
+  \catcode `\\=0 \catcode `\{=1 \catcode `\}=2
+  \catcode `\$=3 \catcode `\&=4 \catcode `\#=6
+  \catcode `\^=7 \catcode `\_=8 \catcode `\~=\active \let~=\tie
+  \catcode `\%=14
+  \catcode `\+=\other
+  \catcode `\"=\other
+  \catcode `\|=\other
+  \catcode `\<=\other
+  \catcode `\>=\other
+  \escapechar=`\\
+  %
+  \let\b=\ptexb
+  \let\bullet=\ptexbullet
+  \let\c=\ptexc
+  \let\,=\ptexcomma
+  \let\.=\ptexdot
+  \let\dots=\ptexdots
+  \let\equiv=\ptexequiv
+  \let\!=\ptexexclam
+  \let\i=\ptexi
+  \let\indent=\ptexindent
+  \let\noindent=\ptexnoindent
+  \let\{=\ptexlbrace
+  \let\+=\tabalign
+  \let\}=\ptexrbrace
+  \let\/=\ptexslash
+  \let\*=\ptexstar
+  \let\t=\ptext
+  \expandafter \let\csname top\endcsname=\ptextop  % outer
+  \let\frenchspacing=\plainfrenchspacing
+  %
+  \def\endldots{\mathinner{\ldots\ldots\ldots\ldots}}%
+  \def\enddots{\relax\ifmmode\endldots\else$\mathsurround=0pt \endldots\,$\fi}%
+  \def\@{@}%
+}
+% There is no need to define \Etex.
+
+% Define @lisp ... @end lisp.
+% @lisp environment forms a group so it can rebind things,
+% including the definition of @end lisp (which normally is erroneous).
+
+% Amount to narrow the margins by for @lisp.
+\newskip\lispnarrowing \lispnarrowing=0.4in
+
+% This is the definition that ^^M gets inside @lisp, @example, and other
+% such environments.  \null is better than a space, since it doesn't
+% have any width.
+\def\lisppar{\null\endgraf}
+
+% This space is always present above and below environments.
+\newskip\envskipamount \envskipamount = 0pt
+
+% Make spacing and below environment symmetrical.  We use \parskip here
+% to help in doing that, since in @example-like environments \parskip
+% is reset to zero; thus the \afterenvbreak inserts no space -- but the
+% start of the next paragraph will insert \parskip.
+%
+\def\aboveenvbreak{{%
+  % =10000 instead of <10000 because of a special case in \itemzzz and
+  % \sectionheading, q.v.
+  \ifnum \lastpenalty=10000 \else
+    \advance\envskipamount by \parskip
+    \endgraf
+    \ifdim\lastskip<\envskipamount
+      \removelastskip
+      % it's not a good place to break if the last penalty was \nobreak
+      % or better ...
+      \ifnum\lastpenalty<10000 \penalty-50 \fi
+      \vskip\envskipamount
+    \fi
+  \fi
+}}
+
+\let\afterenvbreak = \aboveenvbreak
+
+% \nonarrowing is a flag.  If "set", @lisp etc don't narrow margins; it will
+% also clear it, so that its embedded environments do the narrowing again.
+\let\nonarrowing=\relax
+
+% @cartouche ... @end cartouche: draw rectangle w/rounded corners around
+% environment contents.
+\font\circle=lcircle10
+\newdimen\circthick
+\newdimen\cartouter\newdimen\cartinner
+\newskip\normbskip\newskip\normpskip\newskip\normlskip
+\circthick=\fontdimen8\circle
+%
+\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth
+\def\ctr{{\hskip 6pt\circle\char'010}}
+\def\cbl{{\circle\char'012\hskip -6pt}}
+\def\cbr{{\hskip 6pt\circle\char'011}}
+\def\carttop{\hbox to \cartouter{\hskip\lskip
+        \ctl\leaders\hrule height\circthick\hfil\ctr
+        \hskip\rskip}}
+\def\cartbot{\hbox to \cartouter{\hskip\lskip
+        \cbl\leaders\hrule height\circthick\hfil\cbr
+        \hskip\rskip}}
+%
+\newskip\lskip\newskip\rskip
+
+\envdef\cartouche{%
+  \ifhmode\par\fi  % can't be in the midst of a paragraph.
+  \startsavinginserts
+  \lskip=\leftskip \rskip=\rightskip
+  \leftskip=0pt\rightskip=0pt % we want these *outside*.
+  \cartinner=\hsize \advance\cartinner by-\lskip
+  \advance\cartinner by-\rskip
+  \cartouter=\hsize
+  \advance\cartouter by 18.4pt % allow for 3pt kerns on either
+                               % side, and for 6pt waste from
+                               % each corner char, and rule thickness
+  \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip
+  % Flag to tell @lisp, etc., not to narrow margin.
+  \let\nonarrowing = t%
+  \vbox\bgroup
+      \baselineskip=0pt\parskip=0pt\lineskip=0pt
+      \carttop
+      \hbox\bgroup
+         \hskip\lskip
+         \vrule\kern3pt
+         \vbox\bgroup
+             \kern3pt
+             \hsize=\cartinner
+             \baselineskip=\normbskip
+             \lineskip=\normlskip
+             \parskip=\normpskip
+             \vskip -\parskip
+             \comment % For explanation, see the end of \def\group.
+}
+\def\Ecartouche{%
+              \ifhmode\par\fi
+             \kern3pt
+         \egroup
+         \kern3pt\vrule
+         \hskip\rskip
+      \egroup
+      \cartbot
+  \egroup
+  \checkinserts
+}
+
+
+% This macro is called at the beginning of all the @example variants,
+% inside a group.
+\def\nonfillstart{%
+  \aboveenvbreak
+  \hfuzz = 12pt % Don't be fussy
+  \sepspaces % Make spaces be word-separators rather than space tokens.
+  \let\par = \lisppar % don't ignore blank lines
+  \obeylines % each line of input is a line of output
+  \parskip = 0pt
+  \parindent = 0pt
+  \emergencystretch = 0pt % don't try to avoid overfull boxes
+  \ifx\nonarrowing\relax
+    \advance \leftskip by \lispnarrowing
+    \exdentamount=\lispnarrowing
+  \else
+    \let\nonarrowing = \relax
+  \fi
+  \let\exdent=\nofillexdent
+}
+
+% If you want all examples etc. small: @set dispenvsize small.
+% If you want even small examples the full size: @set dispenvsize nosmall.
+% This affects the following displayed environments:
+%    @example, @display, @format, @lisp
+%
+\def\smallword{small}
+\def\nosmallword{nosmall}
+\let\SETdispenvsize\relax
+\def\setnormaldispenv{%
+  \ifx\SETdispenvsize\smallword
+    % end paragraph for sake of leading, in case document has no blank
+    % line.  This is redundant with what happens in \aboveenvbreak, but
+    % we need to do it before changing the fonts, and it's inconvenient
+    % to change the fonts afterward.
+    \ifnum \lastpenalty=10000 \else \endgraf \fi
+    \smallexamplefonts \rm
+  \fi
+}
+\def\setsmalldispenv{%
+  \ifx\SETdispenvsize\nosmallword
+  \else
+    \ifnum \lastpenalty=10000 \else \endgraf \fi
+    \smallexamplefonts \rm
+  \fi
+}
+
+% We often define two environments, @foo and @smallfoo.
+% Let's do it by one command:
+\def\makedispenv #1#2{
+  \expandafter\envdef\csname#1\endcsname {\setnormaldispenv #2}
+  \expandafter\envdef\csname small#1\endcsname {\setsmalldispenv #2}
+  \expandafter\let\csname E#1\endcsname \afterenvbreak
+  \expandafter\let\csname Esmall#1\endcsname \afterenvbreak
+}
+
+% Define two synonyms:
+\def\maketwodispenvs #1#2#3{
+  \makedispenv{#1}{#3}
+  \makedispenv{#2}{#3}
+}
+
+% @lisp: indented, narrowed, typewriter font; @example: same as @lisp.
+%
+% @smallexample and @smalllisp: use smaller fonts.
+% Originally contributed by Pavel@xerox.
+%
+\maketwodispenvs {lisp}{example}{%
+  \nonfillstart
+  \tt\quoteexpand
+  \let\kbdfont = \kbdexamplefont % Allow @kbd to do something special.
+  \gobble       % eat return
+}
+% @display/@smalldisplay: same as @lisp except keep current font.
+%
+\makedispenv {display}{%
+  \nonfillstart
+  \gobble
+}
+
+% @format/@smallformat: same as @display except don't narrow margins.
+%
+\makedispenv{format}{%
+  \let\nonarrowing = t%
+  \nonfillstart
+  \gobble
+}
+
+% @flushleft: same as @format, but doesn't obey \SETdispenvsize.
+\envdef\flushleft{%
+  \let\nonarrowing = t%
+  \nonfillstart
+  \gobble
+}
+\let\Eflushleft = \afterenvbreak
+
+% @flushright.
+%
+\envdef\flushright{%
+  \let\nonarrowing = t%
+  \nonfillstart
+  \advance\leftskip by 0pt plus 1fill
+  \gobble
+}
+\let\Eflushright = \afterenvbreak
+
+
+% @quotation does normal linebreaking (hence we can't use \nonfillstart)
+% and narrows the margins.  We keep \parskip nonzero in general, since
+% we're doing normal filling.  So, when using \aboveenvbreak and
+% \afterenvbreak, temporarily make \parskip 0.
+%
+\envdef\quotation{%
+  {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip
+  \parindent=0pt
+  %
+  % @cartouche defines \nonarrowing to inhibit narrowing at next level down.
+  \ifx\nonarrowing\relax
+    \advance\leftskip by \lispnarrowing
+    \advance\rightskip by \lispnarrowing
+    \exdentamount = \lispnarrowing
+  \else
+    \let\nonarrowing = \relax
+  \fi
+  \parsearg\quotationlabel
+}
+
+% We have retained a nonzero parskip for the environment, since we're
+% doing normal filling.
+%
+\def\Equotation{%
+  \par
+  \ifx\quotationauthor\undefined\else
+    % indent a bit.
+    \leftline{\kern 2\leftskip \sl ---\quotationauthor}%
+  \fi
+  {\parskip=0pt \afterenvbreak}%
+}
+
+% If we're given an argument, typeset it in bold with a colon after.
+\def\quotationlabel#1{%
+  \def\temp{#1}%
+  \ifx\temp\empty \else
+    {\bf #1: }%
+  \fi
+}
+
+
+% LaTeX-like @verbatim...@end verbatim and @verb{<char>...<char>}
+% If we want to allow any <char> as delimiter,
+% we need the curly braces so that makeinfo sees the @verb command, eg:
+% `@verbx...x' would look like the '@verbx' command.  --janneke@gnu.org
+%
+% [Knuth]: Donald Ervin Knuth, 1996.  The TeXbook.
+%
+% [Knuth] p.344; only we need to do the other characters Texinfo sets
+% active too.  Otherwise, they get lost as the first character on a
+% verbatim line.
+\def\dospecials{%
+  \do\ \do\\\do\{\do\}\do\$\do\&%
+  \do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~%
+  \do\<\do\>\do\|\do\@\do+\do\"%
+}
+%
+% [Knuth] p. 380
+\def\uncatcodespecials{%
+  \def\do##1{\catcode`##1=\other}\dospecials}
+%
+% [Knuth] pp. 380,381,391
+% Disable Spanish ligatures ?` and !` of \tt font
+\begingroup
+  \catcode`\`=\active\gdef`{\relax\lq}
+\endgroup
+%
+% Setup for the @verb command.
+%
+% Eight spaces for a tab
+\begingroup
+  \catcode`\^^I=\active
+  \gdef\tabeightspaces{\catcode`\^^I=\active\def^^I{\ \ \ \ \ \ \ \ }}
+\endgroup
+%
+\def\setupverb{%
+  \tt  % easiest (and conventionally used) font for verbatim
+  \def\par{\leavevmode\endgraf}%
+  \catcode`\`=\active
+  \tabeightspaces
+  % Respect line breaks,
+  % print special symbols as themselves, and
+  % make each space count
+  % must do in this order:
+  \obeylines \uncatcodespecials \sepspaces
+}
+
+% Setup for the @verbatim environment
+%
+% Real tab expansion
+\newdimen\tabw \setbox0=\hbox{\tt\space} \tabw=8\wd0 % tab amount
+%
+\def\starttabbox{\setbox0=\hbox\bgroup}
+
+% Allow an option to not replace quotes with a regular directed right
+% quote/apostrophe (char 0x27), but instead use the undirected quote
+% from cmtt (char 0x0d).  The undirected quote is ugly, so don't make it
+% the default, but it works for pasting with more pdf viewers (at least
+% evince), the lilypond developers report.  xpdf does work with the
+% regular 0x27.  
+% 
+\def\codequoteright{%
+  \expandafter\ifx\csname SETtxicodequoteundirected\endcsname\relax
+    \expandafter\ifx\csname SETcodequoteundirected\endcsname\relax
+      '%
+    \else \char'15 \fi
+  \else \char'15 \fi
+}
+%
+% and a similar option for the left quote char vs. a grave accent.
+% Modern fonts display ASCII 0x60 as a grave accent, so some people like
+% the code environments to do likewise.
+% 
+\def\codequoteleft{%
+  \expandafter\ifx\csname SETtxicodequotebacktick\endcsname\relax
+    \expandafter\ifx\csname SETcodequotebacktick\endcsname\relax
+      `%
+    \else \char'22 \fi
+  \else \char'22 \fi
+}
+%
+\begingroup
+  \catcode`\^^I=\active
+  \gdef\tabexpand{%
+    \catcode`\^^I=\active
+    \def^^I{\leavevmode\egroup
+      \dimen0=\wd0 % the width so far, or since the previous tab
+      \divide\dimen0 by\tabw
+      \multiply\dimen0 by\tabw % compute previous multiple of \tabw
+      \advance\dimen0 by\tabw  % advance to next multiple of \tabw
+      \wd0=\dimen0 \box0 \starttabbox
+    }%
+  }
+  \catcode`\'=\active
+  \gdef\rquoteexpand{\catcode\rquoteChar=\active \def'{\codequoteright}}%
+  %
+  \catcode`\`=\active
+  \gdef\lquoteexpand{\catcode\lquoteChar=\active \def`{\codequoteleft}}%
+  %
+  \gdef\quoteexpand{\rquoteexpand \lquoteexpand}%
+\endgroup
+
+% start the verbatim environment.
+\def\setupverbatim{%
+  \let\nonarrowing = t%
+  \nonfillstart
+  % Easiest (and conventionally used) font for verbatim
+  \tt
+  \def\par{\leavevmode\egroup\box0\endgraf}%
+  \catcode`\`=\active
+  \tabexpand
+  \quoteexpand
+  % Respect line breaks,
+  % print special symbols as themselves, and
+  % make each space count
+  % must do in this order:
+  \obeylines \uncatcodespecials \sepspaces
+  \everypar{\starttabbox}%
+}
+
+% Do the @verb magic: verbatim text is quoted by unique
+% delimiter characters.  Before first delimiter expect a
+% right brace, after last delimiter expect closing brace:
+%
+%    \def\doverb'{'<char>#1<char>'}'{#1}
+%
+% [Knuth] p. 382; only eat outer {}
+\begingroup
+  \catcode`[=1\catcode`]=2\catcode`\{=\other\catcode`\}=\other
+  \gdef\doverb{#1[\def\next##1#1}[##1\endgroup]\next]
+\endgroup
+%
+\def\verb{\begingroup\setupverb\doverb}
+%
+%
+% Do the @verbatim magic: define the macro \doverbatim so that
+% the (first) argument ends when '@end verbatim' is reached, ie:
+%
+%     \def\doverbatim#1@end verbatim{#1}
+%
+% For Texinfo it's a lot easier than for LaTeX,
+% because texinfo's \verbatim doesn't stop at '\end{verbatim}':
+% we need not redefine '\', '{' and '}'.
+%
+% Inspired by LaTeX's verbatim command set [latex.ltx]
+%
+\begingroup
+  \catcode`\ =\active
+  \obeylines %
+  % ignore everything up to the first ^^M, that's the newline at the end
+  % of the @verbatim input line itself.  Otherwise we get an extra blank
+  % line in the output.
+  \xdef\doverbatim#1^^M#2@end verbatim{#2\noexpand\end\gobble verbatim}%
+  % We really want {...\end verbatim} in the body of the macro, but
+  % without the active space; thus we have to use \xdef and \gobble.
+\endgroup
+%
+\envdef\verbatim{%
+    \setupverbatim\doverbatim
+}
+\let\Everbatim = \afterenvbreak
+
+
+% @verbatiminclude FILE - insert text of file in verbatim environment.
+%
+\def\verbatiminclude{\parseargusing\filenamecatcodes\doverbatiminclude}
+%
+\def\doverbatiminclude#1{%
+  {%
+    \makevalueexpandable
+    \setupverbatim
+    \input #1
+    \afterenvbreak
+  }%
+}
+
+% @copying ... @end copying.
+% Save the text away for @insertcopying later.
+%
+% We save the uninterpreted tokens, rather than creating a box.
+% Saving the text in a box would be much easier, but then all the
+% typesetting commands (@smallbook, font changes, etc.) have to be done
+% beforehand -- and a) we want @copying to be done first in the source
+% file; b) letting users define the frontmatter in as flexible order as
+% possible is very desirable.
+%
+\def\copying{\checkenv{}\begingroup\scanargctxt\docopying}
+\def\docopying#1@end copying{\endgroup\def\copyingtext{#1}}
+%
+\def\insertcopying{%
+  \begingroup
+    \parindent = 0pt  % paragraph indentation looks wrong on title page
+    \scanexp\copyingtext
+  \endgroup
+}
+
+
+\message{defuns,}
+% @defun etc.
+
+\newskip\defbodyindent \defbodyindent=.4in
+\newskip\defargsindent \defargsindent=50pt
+\newskip\deflastargmargin \deflastargmargin=18pt
+\newcount\defunpenalty
+
+% Start the processing of @deffn:
+\def\startdefun{%
+  \ifnum\lastpenalty<10000
+    \medbreak
+    \defunpenalty=10003 % Will keep this @deffn together with the
+                        % following @def command, see below.
+  \else
+    % If there are two @def commands in a row, we'll have a \nobreak,
+    % which is there to keep the function description together with its
+    % header.  But if there's nothing but headers, we need to allow a
+    % break somewhere.  Check specifically for penalty 10002, inserted
+    % by \printdefunline, instead of 10000, since the sectioning
+    % commands also insert a nobreak penalty, and we don't want to allow
+    % a break between a section heading and a defun.
+    %
+    % As a minor refinement, we avoid "club" headers by signalling
+    % with penalty of 10003 after the very first @deffn in the
+    % sequence (see above), and penalty of 10002 after any following
+    % @def command.
+    \ifnum\lastpenalty=10002 \penalty2000 \else \defunpenalty=10002 \fi
+    %
+    % Similarly, after a section heading, do not allow a break.
+    % But do insert the glue.
+    \medskip  % preceded by discardable penalty, so not a breakpoint
+  \fi
+  %
+  \parindent=0in
+  \advance\leftskip by \defbodyindent
+  \exdentamount=\defbodyindent
+}
+
+\def\dodefunx#1{%
+  % First, check whether we are in the right environment:
+  \checkenv#1%
+  %
+  % As above, allow line break if we have multiple x headers in a row.
+  % It's not a great place, though.
+  \ifnum\lastpenalty=10002 \penalty3000 \else \defunpenalty=10002 \fi
+  %
+  % And now, it's time to reuse the body of the original defun:
+  \expandafter\gobbledefun#1%
+}
+\def\gobbledefun#1\startdefun{}
+
+% \printdefunline \deffnheader{text}
+%
+\def\printdefunline#1#2{%
+  \begingroup
+    % call \deffnheader:
+    #1#2 \endheader
+    % common ending:
+    \interlinepenalty = 10000
+    \advance\rightskip by 0pt plus 1fil
+    \endgraf
+    \nobreak\vskip -\parskip
+    \penalty\defunpenalty  % signal to \startdefun and \dodefunx
+    % Some of the @defun-type tags do not enable magic parentheses,
+    % rendering the following check redundant.  But we don't optimize.
+    \checkparencounts
+  \endgroup
+}
+
+\def\Edefun{\endgraf\medbreak}
+
+% \makedefun{deffn} creates \deffn, \deffnx and \Edeffn;
+% the only thing remaining is to define \deffnheader.
+%
+\def\makedefun#1{%
+  \expandafter\let\csname E#1\endcsname = \Edefun
+  \edef\temp{\noexpand\domakedefun
+    \makecsname{#1}\makecsname{#1x}\makecsname{#1header}}%
+  \temp
+}
+
+% \domakedefun \deffn \deffnx \deffnheader
+%
+% Define \deffn and \deffnx, without parameters.
+% \deffnheader has to be defined explicitly.
+%
+\def\domakedefun#1#2#3{%
+  \envdef#1{%
+    \startdefun
+    \parseargusing\activeparens{\printdefunline#3}%
+  }%
+  \def#2{\dodefunx#1}%
+  \def#3%
+}
+
+%%% Untyped functions:
+
+% @deffn category name args
+\makedefun{deffn}{\deffngeneral{}}
+
+% @deffn category class name args
+\makedefun{defop}#1 {\defopon{#1\ \putwordon}}
+
+% \defopon {category on}class name args
+\def\defopon#1#2 {\deffngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} }
+
+% \deffngeneral {subind}category name args
+%
+\def\deffngeneral#1#2 #3 #4\endheader{%
+  % Remember that \dosubind{fn}{foo}{} is equivalent to \doind{fn}{foo}.
+  \dosubind{fn}{\code{#3}}{#1}%
+  \defname{#2}{}{#3}\magicamp\defunargs{#4\unskip}%
+}
+
+%%% Typed functions:
+
+% @deftypefn category type name args
+\makedefun{deftypefn}{\deftypefngeneral{}}
+
+% @deftypeop category class type name args
+\makedefun{deftypeop}#1 {\deftypeopon{#1\ \putwordon}}
+
+% \deftypeopon {category on}class type name args
+\def\deftypeopon#1#2 {\deftypefngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} }
+
+% \deftypefngeneral {subind}category type name args
+%
+\def\deftypefngeneral#1#2 #3 #4 #5\endheader{%
+  \dosubind{fn}{\code{#4}}{#1}%
+  \defname{#2}{#3}{#4}\defunargs{#5\unskip}%
+}
+
+%%% Typed variables:
+
+% @deftypevr category type var args
+\makedefun{deftypevr}{\deftypecvgeneral{}}
+
+% @deftypecv category class type var args
+\makedefun{deftypecv}#1 {\deftypecvof{#1\ \putwordof}}
+
+% \deftypecvof {category of}class type var args
+\def\deftypecvof#1#2 {\deftypecvgeneral{\putwordof\ \code{#2}}{#1\ \code{#2}} }
+
+% \deftypecvgeneral {subind}category type var args
+%
+\def\deftypecvgeneral#1#2 #3 #4 #5\endheader{%
+  \dosubind{vr}{\code{#4}}{#1}%
+  \defname{#2}{#3}{#4}\defunargs{#5\unskip}%
+}
+
+%%% Untyped variables:
+
+% @defvr category var args
+\makedefun{defvr}#1 {\deftypevrheader{#1} {} }
+
+% @defcv category class var args
+\makedefun{defcv}#1 {\defcvof{#1\ \putwordof}}
+
+% \defcvof {category of}class var args
+\def\defcvof#1#2 {\deftypecvof{#1}#2 {} }
+
+%%% Type:
+% @deftp category name args
+\makedefun{deftp}#1 #2 #3\endheader{%
+  \doind{tp}{\code{#2}}%
+  \defname{#1}{}{#2}\defunargs{#3\unskip}%
+}
+
+% Remaining @defun-like shortcuts:
+\makedefun{defun}{\deffnheader{\putwordDeffunc} }
+\makedefun{defmac}{\deffnheader{\putwordDefmac} }
+\makedefun{defspec}{\deffnheader{\putwordDefspec} }
+\makedefun{deftypefun}{\deftypefnheader{\putwordDeffunc} }
+\makedefun{defvar}{\defvrheader{\putwordDefvar} }
+\makedefun{defopt}{\defvrheader{\putwordDefopt} }
+\makedefun{deftypevar}{\deftypevrheader{\putwordDefvar} }
+\makedefun{defmethod}{\defopon\putwordMethodon}
+\makedefun{deftypemethod}{\deftypeopon\putwordMethodon}
+\makedefun{defivar}{\defcvof\putwordInstanceVariableof}
+\makedefun{deftypeivar}{\deftypecvof\putwordInstanceVariableof}
+
+% \defname, which formats the name of the @def (not the args).
+% #1 is the category, such as "Function".
+% #2 is the return type, if any.
+% #3 is the function name.
+%
+% We are followed by (but not passed) the arguments, if any.
+%
+\def\defname#1#2#3{%
+  % Get the values of \leftskip and \rightskip as they were outside the @def...
+  \advance\leftskip by -\defbodyindent
+  %
+  % How we'll format the type name.  Putting it in brackets helps
+  % distinguish it from the body text that may end up on the next line
+  % just below it.
+  \def\temp{#1}%
+  \setbox0=\hbox{\kern\deflastargmargin \ifx\temp\empty\else [\rm\temp]\fi}
+  %
+  % Figure out line sizes for the paragraph shape.
+  % The first line needs space for \box0; but if \rightskip is nonzero,
+  % we need only space for the part of \box0 which exceeds it:
+  \dimen0=\hsize  \advance\dimen0 by -\wd0  \advance\dimen0 by \rightskip
+  % The continuations:
+  \dimen2=\hsize  \advance\dimen2 by -\defargsindent
+  % (plain.tex says that \dimen1 should be used only as global.)
+  \parshape 2 0in \dimen0 \defargsindent \dimen2
+  %
+  % Put the type name to the right margin.
+  \noindent
+  \hbox to 0pt{%
+    \hfil\box0 \kern-\hsize
+    % \hsize has to be shortened this way:
+    \kern\leftskip
+    % Intentionally do not respect \rightskip, since we need the space.
+  }%
+  %
+  % Allow all lines to be underfull without complaint:
+  \tolerance=10000 \hbadness=10000
+  \exdentamount=\defbodyindent
+  {%
+    % defun fonts. We use typewriter by default (used to be bold) because:
+    % . we're printing identifiers, they should be in tt in principle.
+    % . in languages with many accents, such as Czech or French, it's
+    %   common to leave accents off identifiers.  The result looks ok in
+    %   tt, but exceedingly strange in rm.
+    % . we don't want -- and --- to be treated as ligatures.
+    % . this still does not fix the ?` and !` ligatures, but so far no
+    %   one has made identifiers using them :).
+    \df \tt
+    \def\temp{#2}% return value type
+    \ifx\temp\empty\else \tclose{\temp} \fi
+    #3% output function name
+  }%
+  {\rm\enskip}% hskip 0.5 em of \tenrm
+  %
+  \boldbrax
+  % arguments will be output next, if any.
+}
+
+% Print arguments in slanted roman (not ttsl), inconsistently with using
+% tt for the name.  This is because literal text is sometimes needed in
+% the argument list (groff manual), and ttsl and tt are not very
+% distinguishable.  Prevent hyphenation at `-' chars.
+%
+\def\defunargs#1{%
+  % use sl by default (not ttsl),
+  % tt for the names.
+  \df \sl \hyphenchar\font=0
+  %
+  % On the other hand, if an argument has two dashes (for instance), we
+  % want a way to get ttsl.  Let's try @var for that.
+  \let\var=\ttslanted
+  #1%
+  \sl\hyphenchar\font=45
+}
+
+% We want ()&[] to print specially on the defun line.
+%
+\def\activeparens{%
+  \catcode`\(=\active \catcode`\)=\active
+  \catcode`\[=\active \catcode`\]=\active
+  \catcode`\&=\active
+}
+
+% Make control sequences which act like normal parenthesis chars.
+\let\lparen = ( \let\rparen = )
+
+% Be sure that we always have a definition for `(', etc.  For example,
+% if the fn name has parens in it, \boldbrax will not be in effect yet,
+% so TeX would otherwise complain about undefined control sequence.
+{
+  \activeparens
+  \global\let(=\lparen \global\let)=\rparen
+  \global\let[=\lbrack \global\let]=\rbrack
+  \global\let& = \&
+
+  \gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb}
+  \gdef\magicamp{\let&=\amprm}
+}
+
+\newcount\parencount
+
+% If we encounter &foo, then turn on ()-hacking afterwards
+\newif\ifampseen
+\def\amprm#1 {\ampseentrue{\bf\&#1 }}
+
+\def\parenfont{%
+  \ifampseen
+    % At the first level, print parens in roman,
+    % otherwise use the default font.
+    \ifnum \parencount=1 \rm \fi
+  \else
+    % The \sf parens (in \boldbrax) actually are a little bolder than
+    % the contained text.  This is especially needed for [ and ] .
+    \sf
+  \fi
+}
+\def\infirstlevel#1{%
+  \ifampseen
+    \ifnum\parencount=1
+      #1%
+    \fi
+  \fi
+}
+\def\bfafterword#1 {#1 \bf}
+
+\def\opnr{%
+  \global\advance\parencount by 1
+  {\parenfont(}%
+  \infirstlevel \bfafterword
+}
+\def\clnr{%
+  {\parenfont)}%
+  \infirstlevel \sl
+  \global\advance\parencount by -1
+}
+
+\newcount\brackcount
+\def\lbrb{%
+  \global\advance\brackcount by 1
+  {\bf[}%
+}
+\def\rbrb{%
+  {\bf]}%
+  \global\advance\brackcount by -1
+}
+
+\def\checkparencounts{%
+  \ifnum\parencount=0 \else \badparencount \fi
+  \ifnum\brackcount=0 \else \badbrackcount \fi
+}
+% these should not use \errmessage; the glibc manual, at least, actually
+% has such constructs (when documenting function pointers).
+\def\badparencount{%
+  \message{Warning: unbalanced parentheses in @def...}%
+  \global\parencount=0
+}
+\def\badbrackcount{%
+  \message{Warning: unbalanced square brackets in @def...}%
+  \global\brackcount=0
+}
+
+
+\message{macros,}
+% @macro.
+
+% To do this right we need a feature of e-TeX, \scantokens,
+% which we arrange to emulate with a temporary file in ordinary TeX.
+\ifx\eTeXversion\undefined
+  \newwrite\macscribble
+  \def\scantokens#1{%
+    \toks0={#1}%
+    \immediate\openout\macscribble=\jobname.tmp
+    \immediate\write\macscribble{\the\toks0}%
+    \immediate\closeout\macscribble
+    \input \jobname.tmp
+  }
+\fi
+
+\def\scanmacro#1{%
+  \begingroup
+    \newlinechar`\^^M
+    \let\xeatspaces\eatspaces
+    % Undo catcode changes of \startcontents and \doprintindex
+    % When called from @insertcopying or (short)caption, we need active
+    % backslash to get it printed correctly.  Previously, we had
+    % \catcode`\\=\other instead.  We'll see whether a problem appears
+    % with macro expansion.                            --kasal, 19aug04
+    \catcode`\@=0 \catcode`\\=\active \escapechar=`\@
+    % ... and \example
+    \spaceisspace
+    %
+    % Append \endinput to make sure that TeX does not see the ending newline.
+    % I've verified that it is necessary both for e-TeX and for ordinary TeX
+    %                                                  --kasal, 29nov03
+    \scantokens{#1\endinput}%
+  \endgroup
+}
+
+\def\scanexp#1{%
+  \edef\temp{\noexpand\scanmacro{#1}}%
+  \temp
+}
+
+\newcount\paramno   % Count of parameters
+\newtoks\macname    % Macro name
+\newif\ifrecursive  % Is it recursive?
+
+% List of all defined macros in the form
+%    \definedummyword\macro1\definedummyword\macro2...
+% Currently is also contains all @aliases; the list can be split
+% if there is a need.
+\def\macrolist{}
+
+% Add the macro to \macrolist
+\def\addtomacrolist#1{\expandafter \addtomacrolistxxx \csname#1\endcsname}
+\def\addtomacrolistxxx#1{%
+     \toks0 = \expandafter{\macrolist\definedummyword#1}%
+     \xdef\macrolist{\the\toks0}%
+}
+
+% Utility routines.
+% This does \let #1 = #2, with \csnames; that is,
+%   \let \csname#1\endcsname = \csname#2\endcsname
+% (except of course we have to play expansion games).
+% 
+\def\cslet#1#2{%
+  \expandafter\let
+  \csname#1\expandafter\endcsname
+  \csname#2\endcsname
+}
+
+% Trim leading and trailing spaces off a string.
+% Concepts from aro-bend problem 15 (see CTAN).
+{\catcode`\@=11
+\gdef\eatspaces #1{\expandafter\trim@\expandafter{#1 }}
+\gdef\trim@ #1{\trim@@ @#1 @ #1 @ @@}
+\gdef\trim@@ #1@ #2@ #3@@{\trim@@@\empty #2 @}
+\def\unbrace#1{#1}
+\unbrace{\gdef\trim@@@ #1 } #2@{#1}
+}
+
+% Trim a single trailing ^^M off a string.
+{\catcode`\^^M=\other \catcode`\Q=3%
+\gdef\eatcr #1{\eatcra #1Q^^MQ}%
+\gdef\eatcra#1^^MQ{\eatcrb#1Q}%
+\gdef\eatcrb#1Q#2Q{#1}%
+}
+
+% Macro bodies are absorbed as an argument in a context where
+% all characters are catcode 10, 11 or 12, except \ which is active
+% (as in normal texinfo). It is necessary to change the definition of \.
+
+% Non-ASCII encodings make 8-bit characters active, so un-activate
+% them to avoid their expansion.  Must do this non-globally, to
+% confine the change to the current group.
+
+% It's necessary to have hard CRs when the macro is executed. This is
+% done by  making ^^M (\endlinechar) catcode 12 when reading the macro
+% body, and then making it the \newlinechar in \scanmacro.
+
+\def\scanctxt{%
+  \catcode`\"=\other
+  \catcode`\+=\other
+  \catcode`\<=\other
+  \catcode`\>=\other
+  \catcode`\@=\other
+  \catcode`\^=\other
+  \catcode`\_=\other
+  \catcode`\|=\other
+  \catcode`\~=\other
+  \ifx\declaredencoding\ascii \else \setnonasciicharscatcodenonglobal\other \fi
+}
+
+\def\scanargctxt{%
+  \scanctxt
+  \catcode`\\=\other
+  \catcode`\^^M=\other
+}
+
+\def\macrobodyctxt{%
+  \scanctxt
+  \catcode`\{=\other
+  \catcode`\}=\other
+  \catcode`\^^M=\other
+  \usembodybackslash
+}
+
+\def\macroargctxt{%
+  \scanctxt
+  \catcode`\\=\other
+}
+
+% \mbodybackslash is the definition of \ in @macro bodies.
+% It maps \foo\ => \csname macarg.foo\endcsname => #N
+% where N is the macro parameter number.
+% We define \csname macarg.\endcsname to be \realbackslash, so
+% \\ in macro replacement text gets you a backslash.
+
+{\catcode`@=0 @catcode`@\=@active
+ @gdef@usembodybackslash{@let\=@mbodybackslash}
+ @gdef@mbodybackslash#1\{@csname macarg.#1@endcsname}
+}
+\expandafter\def\csname macarg.\endcsname{\realbackslash}
+
+\def\macro{\recursivefalse\parsearg\macroxxx}
+\def\rmacro{\recursivetrue\parsearg\macroxxx}
+
+\def\macroxxx#1{%
+  \getargs{#1}%           now \macname is the macname and \argl the arglist
+  \ifx\argl\empty       % no arguments
+     \paramno=0%
+  \else
+     \expandafter\parsemargdef \argl;%
+  \fi
+  \if1\csname ismacro.\the\macname\endcsname
+     \message{Warning: redefining \the\macname}%
+  \else
+     \expandafter\ifx\csname \the\macname\endcsname \relax
+     \else \errmessage{Macro name \the\macname\space already defined}\fi
+     \global\cslet{macsave.\the\macname}{\the\macname}%
+     \global\expandafter\let\csname ismacro.\the\macname\endcsname=1%
+     \addtomacrolist{\the\macname}%
+  \fi
+  \begingroup \macrobodyctxt
+  \ifrecursive \expandafter\parsermacbody
+  \else \expandafter\parsemacbody
+  \fi}
+
+\parseargdef\unmacro{%
+  \if1\csname ismacro.#1\endcsname
+    \global\cslet{#1}{macsave.#1}%
+    \global\expandafter\let \csname ismacro.#1\endcsname=0%
+    % Remove the macro name from \macrolist:
+    \begingroup
+      \expandafter\let\csname#1\endcsname \relax
+      \let\definedummyword\unmacrodo
+      \xdef\macrolist{\macrolist}%
+    \endgroup
+  \else
+    \errmessage{Macro #1 not defined}%
+  \fi
+}
+
+% Called by \do from \dounmacro on each macro.  The idea is to omit any
+% macro definitions that have been changed to \relax.
+%
+\def\unmacrodo#1{%
+  \ifx #1\relax
+    % remove this
+  \else
+    \noexpand\definedummyword \noexpand#1%
+  \fi
+}
+
+% This makes use of the obscure feature that if the last token of a
+% <parameter list> is #, then the preceding argument is delimited by
+% an opening brace, and that opening brace is not consumed.
+\def\getargs#1{\getargsxxx#1{}}
+\def\getargsxxx#1#{\getmacname #1 \relax\getmacargs}
+\def\getmacname #1 #2\relax{\macname={#1}}
+\def\getmacargs#1{\def\argl{#1}}
+
+% Parse the optional {params} list.  Set up \paramno and \paramlist
+% so \defmacro knows what to do.  Define \macarg.blah for each blah
+% in the params list, to be ##N where N is the position in that list.
+% That gets used by \mbodybackslash (above).
+
+% We need to get `macro parameter char #' into several definitions.
+% The technique used is stolen from LaTeX:  let \hash be something
+% unexpandable, insert that wherever you need a #, and then redefine
+% it to # just before using the token list produced.
+%
+% The same technique is used to protect \eatspaces till just before
+% the macro is used.
+
+\def\parsemargdef#1;{\paramno=0\def\paramlist{}%
+        \let\hash\relax\let\xeatspaces\relax\parsemargdefxxx#1,;,}
+\def\parsemargdefxxx#1,{%
+  \if#1;\let\next=\relax
+  \else \let\next=\parsemargdefxxx
+    \advance\paramno by 1%
+    \expandafter\edef\csname macarg.\eatspaces{#1}\endcsname
+        {\xeatspaces{\hash\the\paramno}}%
+    \edef\paramlist{\paramlist\hash\the\paramno,}%
+  \fi\next}
+
+% These two commands read recursive and nonrecursive macro bodies.
+% (They're different since rec and nonrec macros end differently.)
+
+\long\def\parsemacbody#1@end macro%
+{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}%
+\long\def\parsermacbody#1@end rmacro%
+{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}%
+
+% This defines the macro itself. There are six cases: recursive and
+% nonrecursive macros of zero, one, and many arguments.
+% Much magic with \expandafter here.
+% \xdef is used so that macro definitions will survive the file
+% they're defined in; @include reads the file inside a group.
+\def\defmacro{%
+  \let\hash=##% convert placeholders to macro parameter chars
+  \ifrecursive
+    \ifcase\paramno
+    % 0
+      \expandafter\xdef\csname\the\macname\endcsname{%
+        \noexpand\scanmacro{\temp}}%
+    \or % 1
+      \expandafter\xdef\csname\the\macname\endcsname{%
+         \bgroup\noexpand\macroargctxt
+         \noexpand\braceorline
+         \expandafter\noexpand\csname\the\macname xxx\endcsname}%
+      \expandafter\xdef\csname\the\macname xxx\endcsname##1{%
+         \egroup\noexpand\scanmacro{\temp}}%
+    \else % many
+      \expandafter\xdef\csname\the\macname\endcsname{%
+         \bgroup\noexpand\macroargctxt
+         \noexpand\csname\the\macname xx\endcsname}%
+      \expandafter\xdef\csname\the\macname xx\endcsname##1{%
+          \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}%
+      \expandafter\expandafter
+      \expandafter\xdef
+      \expandafter\expandafter
+        \csname\the\macname xxx\endcsname
+          \paramlist{\egroup\noexpand\scanmacro{\temp}}%
+    \fi
+  \else
+    \ifcase\paramno
+    % 0
+      \expandafter\xdef\csname\the\macname\endcsname{%
+        \noexpand\norecurse{\the\macname}%
+        \noexpand\scanmacro{\temp}\egroup}%
+    \or % 1
+      \expandafter\xdef\csname\the\macname\endcsname{%
+         \bgroup\noexpand\macroargctxt
+         \noexpand\braceorline
+         \expandafter\noexpand\csname\the\macname xxx\endcsname}%
+      \expandafter\xdef\csname\the\macname xxx\endcsname##1{%
+        \egroup
+        \noexpand\norecurse{\the\macname}%
+        \noexpand\scanmacro{\temp}\egroup}%
+    \else % many
+      \expandafter\xdef\csname\the\macname\endcsname{%
+         \bgroup\noexpand\macroargctxt
+         \expandafter\noexpand\csname\the\macname xx\endcsname}%
+      \expandafter\xdef\csname\the\macname xx\endcsname##1{%
+          \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}%
+      \expandafter\expandafter
+      \expandafter\xdef
+      \expandafter\expandafter
+      \csname\the\macname xxx\endcsname
+      \paramlist{%
+          \egroup
+          \noexpand\norecurse{\the\macname}%
+          \noexpand\scanmacro{\temp}\egroup}%
+    \fi
+  \fi}
+
+\def\norecurse#1{\bgroup\cslet{#1}{macsave.#1}}
+
+% \braceorline decides whether the next nonwhitespace character is a
+% {.  If so it reads up to the closing }, if not, it reads the whole
+% line.  Whatever was read is then fed to the next control sequence
+% as an argument (by \parsebrace or \parsearg)
+\def\braceorline#1{\let\macnamexxx=#1\futurelet\nchar\braceorlinexxx}
+\def\braceorlinexxx{%
+  \ifx\nchar\bgroup\else
+    \expandafter\parsearg
+  \fi \macnamexxx}
+
+
+% @alias.
+% We need some trickery to remove the optional spaces around the equal
+% sign.  Just make them active and then expand them all to nothing.
+\def\alias{\parseargusing\obeyspaces\aliasxxx}
+\def\aliasxxx #1{\aliasyyy#1\relax}
+\def\aliasyyy #1=#2\relax{%
+  {%
+    \expandafter\let\obeyedspace=\empty
+    \addtomacrolist{#1}%
+    \xdef\next{\global\let\makecsname{#1}=\makecsname{#2}}%
+  }%
+  \next
+}
+
+
+\message{cross references,}
+
+\newwrite\auxfile
+\newif\ifhavexrefs    % True if xref values are known.
+\newif\ifwarnedxrefs  % True if we warned once that they aren't known.
+
+% @inforef is relatively simple.
+\def\inforef #1{\inforefzzz #1,,,,**}
+\def\inforefzzz #1,#2,#3,#4**{\putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}},
+  node \samp{\ignorespaces#1{}}}
+
+% @node's only job in TeX is to define \lastnode, which is used in
+% cross-references.  The @node line might or might not have commas, and
+% might or might not have spaces before the first comma, like:
+% @node foo , bar , ...
+% We don't want such trailing spaces in the node name.
+%
+\parseargdef\node{\checkenv{}\donode #1 ,\finishnodeparse}
+%
+% also remove a trailing comma, in case of something like this:
+% @node Help-Cross,  ,  , Cross-refs
+\def\donode#1 ,#2\finishnodeparse{\dodonode #1,\finishnodeparse}
+\def\dodonode#1,#2\finishnodeparse{\gdef\lastnode{#1}}
+
+\let\nwnode=\node
+\let\lastnode=\empty
+
+% Write a cross-reference definition for the current node.  #1 is the
+% type (Ynumbered, Yappendix, Ynothing).
+%
+\def\donoderef#1{%
+  \ifx\lastnode\empty\else
+    \setref{\lastnode}{#1}%
+    \global\let\lastnode=\empty
+  \fi
+}
+
+% @anchor{NAME} -- define xref target at arbitrary point.
+%
+\newcount\savesfregister
+%
+\def\savesf{\relax \ifhmode \savesfregister=\spacefactor \fi}
+\def\restoresf{\relax \ifhmode \spacefactor=\savesfregister \fi}
+\def\anchor#1{\savesf \setref{#1}{Ynothing}\restoresf \ignorespaces}
+
+% \setref{NAME}{SNT} defines a cross-reference point NAME (a node or an
+% anchor), which consists of three parts:
+% 1) NAME-title - the current sectioning name taken from \lastsection,
+%                 or the anchor name.
+% 2) NAME-snt   - section number and type, passed as the SNT arg, or
+%                 empty for anchors.
+% 3) NAME-pg    - the page number.
+%
+% This is called from \donoderef, \anchor, and \dofloat.  In the case of
+% floats, there is an additional part, which is not written here:
+% 4) NAME-lof   - the text as it should appear in a @listoffloats.
+%
+\def\setref#1#2{%
+  \pdfmkdest{#1}%
+  \iflinks
+    {%
+      \atdummies  % preserve commands, but don't expand them
+      \edef\writexrdef##1##2{%
+       \write\auxfile{@xrdef{#1-% #1 of \setref, expanded by the \edef
+         ##1}{##2}}% these are parameters of \writexrdef
+      }%
+      \toks0 = \expandafter{\lastsection}%
+      \immediate \writexrdef{title}{\the\toks0 }%
+      \immediate \writexrdef{snt}{\csname #2\endcsname}% \Ynumbered etc.
+      \safewhatsit{\writexrdef{pg}{\folio}}% will be written later, during \shipout
+    }%
+  \fi
+}
+
+% @xref, @pxref, and @ref generate cross-references.  For \xrefX, #1 is
+% the node name, #2 the name of the Info cross-reference, #3 the printed
+% node name, #4 the name of the Info file, #5 the name of the printed
+% manual.  All but the node name can be omitted.
+%
+\def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]}
+\def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]}
+\def\ref#1{\xrefX[#1,,,,,,,]}
+\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup
+  \unsepspaces
+  \def\printedmanual{\ignorespaces #5}%
+  \def\printedrefname{\ignorespaces #3}%
+  \setbox1=\hbox{\printedmanual\unskip}%
+  \setbox0=\hbox{\printedrefname\unskip}%
+  \ifdim \wd0 = 0pt
+    % No printed node name was explicitly given.
+    \expandafter\ifx\csname SETxref-automatic-section-title\endcsname\relax
+      % Use the node name inside the square brackets.
+      \def\printedrefname{\ignorespaces #1}%
+    \else
+      % Use the actual chapter/section title appear inside
+      % the square brackets.  Use the real section title if we have it.
+      \ifdim \wd1 > 0pt
+        % It is in another manual, so we don't have it.
+        \def\printedrefname{\ignorespaces #1}%
+      \else
+        \ifhavexrefs
+          % We know the real title if we have the xref values.
+          \def\printedrefname{\refx{#1-title}{}}%
+        \else
+          % Otherwise just copy the Info node name.
+          \def\printedrefname{\ignorespaces #1}%
+        \fi%
+      \fi
+    \fi
+  \fi
+  %
+  % Make link in pdf output.
+  \ifpdf
+    {\indexnofonts
+     \turnoffactive
+     % This expands tokens, so do it after making catcode changes, so _
+     % etc. don't get their TeX definitions.
+     \getfilename{#4}%
+     %
+     % See comments at \activebackslashdouble.
+     {\activebackslashdouble \xdef\pdfxrefdest{#1}%
+      \backslashparens\pdfxrefdest}%
+     %
+     \leavevmode
+     \startlink attr{/Border [0 0 0]}%
+     \ifnum\filenamelength>0
+       goto file{\the\filename.pdf} name{\pdfxrefdest}%
+     \else
+       goto name{\pdfmkpgn{\pdfxrefdest}}%
+     \fi
+    }%
+    \setcolor{\linkcolor}%
+  \fi
+  %
+  % Float references are printed completely differently: "Figure 1.2"
+  % instead of "[somenode], p.3".  We distinguish them by the
+  % LABEL-title being set to a magic string.
+  {%
+    % Have to otherify everything special to allow the \csname to
+    % include an _ in the xref name, etc.
+    \indexnofonts
+    \turnoffactive
+    \expandafter\global\expandafter\let\expandafter\Xthisreftitle
+      \csname XR#1-title\endcsname
+  }%
+  \iffloat\Xthisreftitle
+    % If the user specified the print name (third arg) to the ref,
+    % print it instead of our usual "Figure 1.2".
+    \ifdim\wd0 = 0pt
+      \refx{#1-snt}{}%
+    \else
+      \printedrefname
+    \fi
+    %
+    % if the user also gave the printed manual name (fifth arg), append
+    % "in MANUALNAME".
+    \ifdim \wd1 > 0pt
+      \space \putwordin{} \cite{\printedmanual}%
+    \fi
+  \else
+    % node/anchor (non-float) references.
+    %
+    % If we use \unhbox0 and \unhbox1 to print the node names, TeX does not
+    % insert empty discretionaries after hyphens, which means that it will
+    % not find a line break at a hyphen in a node names.  Since some manuals
+    % are best written with fairly long node names, containing hyphens, this
+    % is a loss.  Therefore, we give the text of the node name again, so it
+    % is as if TeX is seeing it for the first time.
+    \ifdim \wd1 > 0pt
+      \putwordSection{} ``\printedrefname'' \putwordin{} \cite{\printedmanual}%
+    \else
+      % _ (for example) has to be the character _ for the purposes of the
+      % control sequence corresponding to the node, but it has to expand
+      % into the usual \leavevmode...\vrule stuff for purposes of
+      % printing. So we \turnoffactive for the \refx-snt, back on for the
+      % printing, back off for the \refx-pg.
+      {\turnoffactive
+       % Only output a following space if the -snt ref is nonempty; for
+       % @unnumbered and @anchor, it won't be.
+       \setbox2 = \hbox{\ignorespaces \refx{#1-snt}{}}%
+       \ifdim \wd2 > 0pt \refx{#1-snt}\space\fi
+      }%
+      % output the `[mynode]' via a macro so it can be overridden.
+      \xrefprintnodename\printedrefname
+      %
+      % But we always want a comma and a space:
+      ,\space
+      %
+      % output the `page 3'.
+      \turnoffactive \putwordpage\tie\refx{#1-pg}{}%
+    \fi
+  \fi
+  \endlink
+\endgroup}
+
+% This macro is called from \xrefX for the `[nodename]' part of xref
+% output.  It's a separate macro only so it can be changed more easily,
+% since square brackets don't work well in some documents.  Particularly
+% one that Bob is working on :).
+%
+\def\xrefprintnodename#1{[#1]}
+
+% Things referred to by \setref.
+%
+\def\Ynothing{}
+\def\Yomitfromtoc{}
+\def\Ynumbered{%
+  \ifnum\secno=0
+    \putwordChapter@tie \the\chapno
+  \else \ifnum\subsecno=0
+    \putwordSection@tie \the\chapno.\the\secno
+  \else \ifnum\subsubsecno=0
+    \putwordSection@tie \the\chapno.\the\secno.\the\subsecno
+  \else
+    \putwordSection@tie \the\chapno.\the\secno.\the\subsecno.\the\subsubsecno
+  \fi\fi\fi
+}
+\def\Yappendix{%
+  \ifnum\secno=0
+     \putwordAppendix@tie @char\the\appendixno{}%
+  \else \ifnum\subsecno=0
+     \putwordSection@tie @char\the\appendixno.\the\secno
+  \else \ifnum\subsubsecno=0
+    \putwordSection@tie @char\the\appendixno.\the\secno.\the\subsecno
+  \else
+    \putwordSection@tie
+      @char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno
+  \fi\fi\fi
+}
+
+% Define \refx{NAME}{SUFFIX} to reference a cross-reference string named NAME.
+% If its value is nonempty, SUFFIX is output afterward.
+%
+\def\refx#1#2{%
+  {%
+    \indexnofonts
+    \otherbackslash
+    \expandafter\global\expandafter\let\expandafter\thisrefX
+      \csname XR#1\endcsname
+  }%
+  \ifx\thisrefX\relax
+    % If not defined, say something at least.
+    \angleleft un\-de\-fined\angleright
+    \iflinks
+      \ifhavexrefs
+        \message{\linenumber Undefined cross reference `#1'.}%
+      \else
+        \ifwarnedxrefs\else
+          \global\warnedxrefstrue
+          \message{Cross reference values unknown; you must run TeX again.}%
+        \fi
+      \fi
+    \fi
+  \else
+    % It's defined, so just use it.
+    \thisrefX
+  \fi
+  #2% Output the suffix in any case.
+}
+
+% This is the macro invoked by entries in the aux file.  Usually it's
+% just a \def (we prepend XR to the control sequence name to avoid
+% collisions).  But if this is a float type, we have more work to do.
+%
+\def\xrdef#1#2{%
+  {% The node name might contain 8-bit characters, which in our current
+   % implementation are changed to commands like @'e.  Don't let these
+   % mess up the control sequence name.
+    \indexnofonts
+    \turnoffactive
+    \xdef\safexrefname{#1}%
+  }%
+  %
+  \expandafter\gdef\csname XR\safexrefname\endcsname{#2}% remember this xref
+  %
+  % Was that xref control sequence that we just defined for a float?
+  \expandafter\iffloat\csname XR\safexrefname\endcsname
+    % it was a float, and we have the (safe) float type in \iffloattype.
+    \expandafter\let\expandafter\floatlist
+      \csname floatlist\iffloattype\endcsname
+    %
+    % Is this the first time we've seen this float type?
+    \expandafter\ifx\floatlist\relax
+      \toks0 = {\do}% yes, so just \do
+    \else
+      % had it before, so preserve previous elements in list.
+      \toks0 = \expandafter{\floatlist\do}%
+    \fi
+    %
+    % Remember this xref in the control sequence \floatlistFLOATTYPE,
+    % for later use in \listoffloats.
+    \expandafter\xdef\csname floatlist\iffloattype\endcsname{\the\toks0
+      {\safexrefname}}%
+  \fi
+}
+
+% Read the last existing aux file, if any.  No error if none exists.
+%
+\def\tryauxfile{%
+  \openin 1 \jobname.aux
+  \ifeof 1 \else
+    \readdatafile{aux}%
+    \global\havexrefstrue
+  \fi
+  \closein 1
+}
+
+\def\setupdatafile{%
+  \catcode`\^^@=\other
+  \catcode`\^^A=\other
+  \catcode`\^^B=\other
+  \catcode`\^^C=\other
+  \catcode`\^^D=\other
+  \catcode`\^^E=\other
+  \catcode`\^^F=\other
+  \catcode`\^^G=\other
+  \catcode`\^^H=\other
+  \catcode`\^^K=\other
+  \catcode`\^^L=\other
+  \catcode`\^^N=\other
+  \catcode`\^^P=\other
+  \catcode`\^^Q=\other
+  \catcode`\^^R=\other
+  \catcode`\^^S=\other
+  \catcode`\^^T=\other
+  \catcode`\^^U=\other
+  \catcode`\^^V=\other
+  \catcode`\^^W=\other
+  \catcode`\^^X=\other
+  \catcode`\^^Z=\other
+  \catcode`\^^[=\other
+  \catcode`\^^\=\other
+  \catcode`\^^]=\other
+  \catcode`\^^^=\other
+  \catcode`\^^_=\other
+  % It was suggested to set the catcode of ^ to 7, which would allow ^^e4 etc.
+  % in xref tags, i.e., node names.  But since ^^e4 notation isn't
+  % supported in the main text, it doesn't seem desirable.  Furthermore,
+  % that is not enough: for node names that actually contain a ^
+  % character, we would end up writing a line like this: 'xrdef {'hat
+  % b-title}{'hat b} and \xrdef does a \csname...\endcsname on the first
+  % argument, and \hat is not an expandable control sequence.  It could
+  % all be worked out, but why?  Either we support ^^ or we don't.
+  %
+  % The other change necessary for this was to define \auxhat:
+  % \def\auxhat{\def^{'hat }}% extra space so ok if followed by letter
+  % and then to call \auxhat in \setq.
+  %
+  \catcode`\^=\other
+  %
+  % Special characters.  Should be turned off anyway, but...
+  \catcode`\~=\other
+  \catcode`\[=\other
+  \catcode`\]=\other
+  \catcode`\"=\other
+  \catcode`\_=\other
+  \catcode`\|=\other
+  \catcode`\<=\other
+  \catcode`\>=\other
+  \catcode`\$=\other
+  \catcode`\#=\other
+  \catcode`\&=\other
+  \catcode`\%=\other
+  \catcode`+=\other % avoid \+ for paranoia even though we've turned it off
+  %
+  % This is to support \ in node names and titles, since the \
+  % characters end up in a \csname.  It's easier than
+  % leaving it active and making its active definition an actual \
+  % character.  What I don't understand is why it works in the *value*
+  % of the xrdef.  Seems like it should be a catcode12 \, and that
+  % should not typeset properly.  But it works, so I'm moving on for
+  % now.  --karl, 15jan04.
+  \catcode`\\=\other
+  %
+  % Make the characters 128-255 be printing characters.
+  {%
+    \count1=128
+    \def\loop{%
+      \catcode\count1=\other
+      \advance\count1 by 1
+      \ifnum \count1<256 \loop \fi
+    }%
+  }%
+  %
+  % @ is our escape character in .aux files, and we need braces.
+  \catcode`\{=1
+  \catcode`\}=2
+  \catcode`\@=0
+}
+
+\def\readdatafile#1{%
+\begingroup
+  \setupdatafile
+  \input\jobname.#1
+\endgroup}
+
+
+\message{insertions,}
+% including footnotes.
+
+\newcount \footnoteno
+
+% The trailing space in the following definition for supereject is
+% vital for proper filling; pages come out unaligned when you do a
+% pagealignmacro call if that space before the closing brace is
+% removed. (Generally, numeric constants should always be followed by a
+% space to prevent strange expansion errors.)
+\def\supereject{\par\penalty -20000\footnoteno =0 }
+
+% @footnotestyle is meaningful for info output only.
+\let\footnotestyle=\comment
+
+{\catcode `\@=11
+%
+% Auto-number footnotes.  Otherwise like plain.
+\gdef\footnote{%
+  \let\indent=\ptexindent
+  \let\noindent=\ptexnoindent
+  \global\advance\footnoteno by \@ne
+  \edef\thisfootno{$^{\the\footnoteno}$}%
+  %
+  % In case the footnote comes at the end of a sentence, preserve the
+  % extra spacing after we do the footnote number.
+  \let\@sf\empty
+  \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\ptexslash\fi
+  %
+  % Remove inadvertent blank space before typesetting the footnote number.
+  \unskip
+  \thisfootno\@sf
+  \dofootnote
+}%
+
+% Don't bother with the trickery in plain.tex to not require the
+% footnote text as a parameter.  Our footnotes don't need to be so general.
+%
+% Oh yes, they do; otherwise, @ifset (and anything else that uses
+% \parseargline) fails inside footnotes because the tokens are fixed when
+% the footnote is read.  --karl, 16nov96.
+%
+\gdef\dofootnote{%
+  \insert\footins\bgroup
+  % We want to typeset this text as a normal paragraph, even if the
+  % footnote reference occurs in (for example) a display environment.
+  % So reset some parameters.
+  \hsize=\pagewidth
+  \interlinepenalty\interfootnotelinepenalty
+  \splittopskip\ht\strutbox % top baseline for broken footnotes
+  \splitmaxdepth\dp\strutbox
+  \floatingpenalty\@MM
+  \leftskip\z@skip
+  \rightskip\z@skip
+  \spaceskip\z@skip
+  \xspaceskip\z@skip
+  \parindent\defaultparindent
+  %
+  \smallfonts \rm
+  %
+  % Because we use hanging indentation in footnotes, a @noindent appears
+  % to exdent this text, so make it be a no-op.  makeinfo does not use
+  % hanging indentation so @noindent can still be needed within footnote
+  % text after an @example or the like (not that this is good style).
+  \let\noindent = \relax
+  %
+  % Hang the footnote text off the number.  Use \everypar in case the
+  % footnote extends for more than one paragraph.
+  \everypar = {\hang}%
+  \textindent{\thisfootno}%
+  %
+  % Don't crash into the line above the footnote text.  Since this
+  % expands into a box, it must come within the paragraph, lest it
+  % provide a place where TeX can split the footnote.
+  \footstrut
+  \futurelet\next\fo@t
+}
+}%end \catcode `\@=11
+
+% In case a @footnote appears in a vbox, save the footnote text and create
+% the real \insert just after the vbox finished.  Otherwise, the insertion
+% would be lost.
+% Similarly, if a @footnote appears inside an alignment, save the footnote
+% text to a box and make the \insert when a row of the table is finished.
+% And the same can be done for other insert classes.  --kasal, 16nov03.
+
+% Replace the \insert primitive by a cheating macro.
+% Deeper inside, just make sure that the saved insertions are not spilled
+% out prematurely.
+%
+\def\startsavinginserts{%
+  \ifx \insert\ptexinsert
+    \let\insert\saveinsert
+  \else
+    \let\checkinserts\relax
+  \fi
+}
+
+% This \insert replacement works for both \insert\footins{foo} and
+% \insert\footins\bgroup foo\egroup, but it doesn't work for \insert27{foo}.
+%
+\def\saveinsert#1{%
+  \edef\next{\noexpand\savetobox \makeSAVEname#1}%
+  \afterassignment\next
+  % swallow the left brace
+  \let\temp =
+}
+\def\makeSAVEname#1{\makecsname{SAVE\expandafter\gobble\string#1}}
+\def\savetobox#1{\global\setbox#1 = \vbox\bgroup \unvbox#1}
+
+\def\checksaveins#1{\ifvoid#1\else \placesaveins#1\fi}
+
+\def\placesaveins#1{%
+  \ptexinsert \csname\expandafter\gobblesave\string#1\endcsname
+    {\box#1}%
+}
+
+% eat @SAVE -- beware, all of them have catcode \other:
+{
+  \def\dospecials{\do S\do A\do V\do E} \uncatcodespecials  %  ;-)
+  \gdef\gobblesave @SAVE{}
+}
+
+% initialization:
+\def\newsaveins #1{%
+  \edef\next{\noexpand\newsaveinsX \makeSAVEname#1}%
+  \next
+}
+\def\newsaveinsX #1{%
+  \csname newbox\endcsname #1%
+  \expandafter\def\expandafter\checkinserts\expandafter{\checkinserts
+    \checksaveins #1}%
+}
+
+% initialize:
+\let\checkinserts\empty
+\newsaveins\footins
+\newsaveins\margin
+
+
+% @image.  We use the macros from epsf.tex to support this.
+% If epsf.tex is not installed and @image is used, we complain.
+%
+% Check for and read epsf.tex up front.  If we read it only at @image
+% time, we might be inside a group, and then its definitions would get
+% undone and the next image would fail.
+\openin 1 = epsf.tex
+\ifeof 1 \else
+  % Do not bother showing banner with epsf.tex v2.7k (available in
+  % doc/epsf.tex and on ctan).
+  \def\epsfannounce{\toks0 = }%
+  \input epsf.tex
+\fi
+\closein 1
+%
+% We will only complain once about lack of epsf.tex.
+\newif\ifwarnednoepsf
+\newhelp\noepsfhelp{epsf.tex must be installed for images to
+  work.  It is also included in the Texinfo distribution, or you can get
+  it from ftp://tug.org/tex/epsf.tex.}
+%
+\def\image#1{%
+  \ifx\epsfbox\undefined
+    \ifwarnednoepsf \else
+      \errhelp = \noepsfhelp
+      \errmessage{epsf.tex not found, images will be ignored}%
+      \global\warnednoepsftrue
+    \fi
+  \else
+    \imagexxx #1,,,,,\finish
+  \fi
+}
+%
+% Arguments to @image:
+% #1 is (mandatory) image filename; we tack on .eps extension.
+% #2 is (optional) width, #3 is (optional) height.
+% #4 is (ignored optional) html alt text.
+% #5 is (ignored optional) extension.
+% #6 is just the usual extra ignored arg for parsing this stuff.
+\newif\ifimagevmode
+\def\imagexxx#1,#2,#3,#4,#5,#6\finish{\begingroup
+  \catcode`\^^M = 5     % in case we're inside an example
+  \normalturnoffactive  % allow _ et al. in names
+  % If the image is by itself, center it.
+  \ifvmode
+    \imagevmodetrue
+    \nobreak\medskip
+    % Usually we'll have text after the image which will insert
+    % \parskip glue, so insert it here too to equalize the space
+    % above and below.
+    \nobreak\vskip\parskip
+    \nobreak
+  \fi
+  %
+  % Leave vertical mode so that indentation from an enclosing
+  % environment such as @quotation is respected.  On the other hand, if
+  % it's at the top level, we don't want the normal paragraph indentation.
+  \noindent
+  %
+  % Output the image.
+  \ifpdf
+    \dopdfimage{#1}{#2}{#3}%
+  \else
+    % \epsfbox itself resets \epsf?size at each figure.
+    \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi
+    \setbox0 = \hbox{\ignorespaces #3}\ifdim\wd0 > 0pt \epsfysize=#3\relax \fi
+    \epsfbox{#1.eps}%
+  \fi
+  %
+  \ifimagevmode \medskip \fi  % space after the standalone image
+\endgroup}
+
+
+% @float FLOATTYPE,LABEL,LOC ... @end float for displayed figures, tables,
+% etc.  We don't actually implement floating yet, we always include the
+% float "here".  But it seemed the best name for the future.
+%
+\envparseargdef\float{\eatcommaspace\eatcommaspace\dofloat#1, , ,\finish}
+
+% There may be a space before second and/or third parameter; delete it.
+\def\eatcommaspace#1, {#1,}
+
+% #1 is the optional FLOATTYPE, the text label for this float, typically
+% "Figure", "Table", "Example", etc.  Can't contain commas.  If omitted,
+% this float will not be numbered and cannot be referred to.
+%
+% #2 is the optional xref label.  Also must be present for the float to
+% be referable.
+%
+% #3 is the optional positioning argument; for now, it is ignored.  It
+% will somehow specify the positions allowed to float to (here, top, bottom).
+%
+% We keep a separate counter for each FLOATTYPE, which we reset at each
+% chapter-level command.
+\let\resetallfloatnos=\empty
+%
+\def\dofloat#1,#2,#3,#4\finish{%
+  \let\thiscaption=\empty
+  \let\thisshortcaption=\empty
+  %
+  % don't lose footnotes inside @float.
+  %
+  % BEWARE: when the floats start float, we have to issue warning whenever an
+  % insert appears inside a float which could possibly float. --kasal, 26may04
+  %
+  \startsavinginserts
+  %
+  % We can't be used inside a paragraph.
+  \par
+  %
+  \vtop\bgroup
+    \def\floattype{#1}%
+    \def\floatlabel{#2}%
+    \def\floatloc{#3}% we do nothing with this yet.
+    %
+    \ifx\floattype\empty
+      \let\safefloattype=\empty
+    \else
+      {%
+        % the floattype might have accents or other special characters,
+        % but we need to use it in a control sequence name.
+        \indexnofonts
+        \turnoffactive
+        \xdef\safefloattype{\floattype}%
+      }%
+    \fi
+    %
+    % If label is given but no type, we handle that as the empty type.
+    \ifx\floatlabel\empty \else
+      % We want each FLOATTYPE to be numbered separately (Figure 1,
+      % Table 1, Figure 2, ...).  (And if no label, no number.)
+      %
+      \expandafter\getfloatno\csname\safefloattype floatno\endcsname
+      \global\advance\floatno by 1
+      %
+      {%
+        % This magic value for \lastsection is output by \setref as the
+        % XREFLABEL-title value.  \xrefX uses it to distinguish float
+        % labels (which have a completely different output format) from
+        % node and anchor labels.  And \xrdef uses it to construct the
+        % lists of floats.
+        %
+        \edef\lastsection{\floatmagic=\safefloattype}%
+        \setref{\floatlabel}{Yfloat}%
+      }%
+    \fi
+    %
+    % start with \parskip glue, I guess.
+    \vskip\parskip
+    %
+    % Don't suppress indentation if a float happens to start a section.
+    \restorefirstparagraphindent
+}
+
+% we have these possibilities:
+% @float Foo,lbl & @caption{Cap}: Foo 1.1: Cap
+% @float Foo,lbl & no caption:    Foo 1.1
+% @float Foo & @caption{Cap}:     Foo: Cap
+% @float Foo & no caption:        Foo
+% @float ,lbl & Caption{Cap}:     1.1: Cap
+% @float ,lbl & no caption:       1.1
+% @float & @caption{Cap}:         Cap
+% @float & no caption:
+%
+\def\Efloat{%
+    \let\floatident = \empty
+    %
+    % In all cases, if we have a float type, it comes first.
+    \ifx\floattype\empty \else \def\floatident{\floattype}\fi
+    %
+    % If we have an xref label, the number comes next.
+    \ifx\floatlabel\empty \else
+      \ifx\floattype\empty \else % if also had float type, need tie first.
+        \appendtomacro\floatident{\tie}%
+      \fi
+      % the number.
+      \appendtomacro\floatident{\chaplevelprefix\the\floatno}%
+    \fi
+    %
+    % Start the printed caption with what we've constructed in
+    % \floatident, but keep it separate; we need \floatident again.
+    \let\captionline = \floatident
+    %
+    \ifx\thiscaption\empty \else
+      \ifx\floatident\empty \else
+       \appendtomacro\captionline{: }% had ident, so need a colon between
+      \fi
+      %
+      % caption text.
+      \appendtomacro\captionline{\scanexp\thiscaption}%
+    \fi
+    %
+    % If we have anything to print, print it, with space before.
+    % Eventually this needs to become an \insert.
+    \ifx\captionline\empty \else
+      \vskip.5\parskip
+      \captionline
+      %
+      % Space below caption.
+      \vskip\parskip
+    \fi
+    %
+    % If have an xref label, write the list of floats info.  Do this
+    % after the caption, to avoid chance of it being a breakpoint.
+    \ifx\floatlabel\empty \else
+      % Write the text that goes in the lof to the aux file as
+      % \floatlabel-lof.  Besides \floatident, we include the short
+      % caption if specified, else the full caption if specified, else nothing.
+      {%
+        \atdummies
+        %
+        % since we read the caption text in the macro world, where ^^M
+        % is turned into a normal character, we have to scan it back, so
+        % we don't write the literal three characters "^^M" into the aux file.
+       \scanexp{%
+         \xdef\noexpand\gtemp{%
+           \ifx\thisshortcaption\empty
+             \thiscaption
+           \else
+             \thisshortcaption
+           \fi
+         }%
+       }%
+        \immediate\write\auxfile{@xrdef{\floatlabel-lof}{\floatident
+         \ifx\gtemp\empty \else : \gtemp \fi}}%
+      }%
+    \fi
+  \egroup  % end of \vtop
+  %
+  % place the captured inserts
+  %
+  % BEWARE: when the floats start floating, we have to issue warning
+  % whenever an insert appears inside a float which could possibly
+  % float. --kasal, 26may04
+  %
+  \checkinserts
+}
+
+% Append the tokens #2 to the definition of macro #1, not expanding either.
+%
+\def\appendtomacro#1#2{%
+  \expandafter\def\expandafter#1\expandafter{#1#2}%
+}
+
+% @caption, @shortcaption
+%
+\def\caption{\docaption\thiscaption}
+\def\shortcaption{\docaption\thisshortcaption}
+\def\docaption{\checkenv\float \bgroup\scanargctxt\defcaption}
+\def\defcaption#1#2{\egroup \def#1{#2}}
+
+% The parameter is the control sequence identifying the counter we are
+% going to use.  Create it if it doesn't exist and assign it to \floatno.
+\def\getfloatno#1{%
+  \ifx#1\relax
+      % Haven't seen this figure type before.
+      \csname newcount\endcsname #1%
+      %
+      % Remember to reset this floatno at the next chap.
+      \expandafter\gdef\expandafter\resetallfloatnos
+        \expandafter{\resetallfloatnos #1=0 }%
+  \fi
+  \let\floatno#1%
+}
+
+% \setref calls this to get the XREFLABEL-snt value.  We want an @xref
+% to the FLOATLABEL to expand to "Figure 3.1".  We call \setref when we
+% first read the @float command.
+%
+\def\Yfloat{\floattype@tie \chaplevelprefix\the\floatno}%
+
+% Magic string used for the XREFLABEL-title value, so \xrefX can
+% distinguish floats from other xref types.
+\def\floatmagic{!!float!!}
+
+% #1 is the control sequence we are passed; we expand into a conditional
+% which is true if #1 represents a float ref.  That is, the magic
+% \lastsection value which we \setref above.
+%
+\def\iffloat#1{\expandafter\doiffloat#1==\finish}
+%
+% #1 is (maybe) the \floatmagic string.  If so, #2 will be the
+% (safe) float type for this float.  We set \iffloattype to #2.
+%
+\def\doiffloat#1=#2=#3\finish{%
+  \def\temp{#1}%
+  \def\iffloattype{#2}%
+  \ifx\temp\floatmagic
+}
+
+% @listoffloats FLOATTYPE - print a list of floats like a table of contents.
+%
+\parseargdef\listoffloats{%
+  \def\floattype{#1}% floattype
+  {%
+    % the floattype might have accents or other special characters,
+    % but we need to use it in a control sequence name.
+    \indexnofonts
+    \turnoffactive
+    \xdef\safefloattype{\floattype}%
+  }%
+  %
+  % \xrdef saves the floats as a \do-list in \floatlistSAFEFLOATTYPE.
+  \expandafter\ifx\csname floatlist\safefloattype\endcsname \relax
+    \ifhavexrefs
+      % if the user said @listoffloats foo but never @float foo.
+      \message{\linenumber No `\safefloattype' floats to list.}%
+    \fi
+  \else
+    \begingroup
+      \leftskip=\tocindent  % indent these entries like a toc
+      \let\do=\listoffloatsdo
+      \csname floatlist\safefloattype\endcsname
+    \endgroup
+  \fi
+}
+
+% This is called on each entry in a list of floats.  We're passed the
+% xref label, in the form LABEL-title, which is how we save it in the
+% aux file.  We strip off the -title and look up \XRLABEL-lof, which
+% has the text we're supposed to typeset here.
+%
+% Figures without xref labels will not be included in the list (since
+% they won't appear in the aux file).
+%
+\def\listoffloatsdo#1{\listoffloatsdoentry#1\finish}
+\def\listoffloatsdoentry#1-title\finish{{%
+  % Can't fully expand XR#1-lof because it can contain anything.  Just
+  % pass the control sequence.  On the other hand, XR#1-pg is just the
+  % page number, and we want to fully expand that so we can get a link
+  % in pdf output.
+  \toksA = \expandafter{\csname XR#1-lof\endcsname}%
+  %
+  % use the same \entry macro we use to generate the TOC and index.
+  \edef\writeentry{\noexpand\entry{\the\toksA}{\csname XR#1-pg\endcsname}}%
+  \writeentry
+}}
+
+
+\message{localization,}
+
+% @documentlanguage is usually given very early, just after
+% @setfilename.  If done too late, it may not override everything
+% properly.  Single argument is the language (de) or locale (de_DE)
+% abbreviation.  It would be nice if we could set up a hyphenation file.
+%
+{
+  \catcode`\_ = \active
+  \globaldefs=1
+\parseargdef\documentlanguage{\begingroup
+  \let_=\normalunderscore  % normal _ character for filenames
+  \tex % read txi-??.tex file in plain TeX.
+    % Read the file by the name they passed if it exists.
+    \openin 1 txi-#1.tex
+    \ifeof 1
+      \documentlanguagetrywithoutunderscore{#1_\finish}%
+    \else
+      \input txi-#1.tex
+    \fi
+    \closein 1
+  \endgroup
+\endgroup}
+}
+%
+% If they passed de_DE, and txi-de_DE.tex doesn't exist,
+% try txi-de.tex.
+% 
+\def\documentlanguagetrywithoutunderscore#1_#2\finish{%
+  \openin 1 txi-#1.tex
+  \ifeof 1
+    \errhelp = \nolanghelp
+    \errmessage{Cannot read language file txi-#1.tex}%
+  \else
+    \input txi-#1.tex
+  \fi
+  \closein 1
+}
+%
+\newhelp\nolanghelp{The given language definition file cannot be found or
+is empty.  Maybe you need to install it?  In the current directory
+should work if nowhere else does.}
+
+% Set the catcode of characters 128 through 255 to the specified number.
+%
+\def\setnonasciicharscatcode#1{%
+   \count255=128
+   \loop\ifnum\count255<256
+      \global\catcode\count255=#1\relax
+      \advance\count255 by 1
+   \repeat
+}
+
+\def\setnonasciicharscatcodenonglobal#1{%
+   \count255=128
+   \loop\ifnum\count255<256
+      \catcode\count255=#1\relax
+      \advance\count255 by 1
+   \repeat
+}
+
+% @documentencoding sets the definition of non-ASCII characters
+% according to the specified encoding.
+%
+\parseargdef\documentencoding{%
+  % Encoding being declared for the document.
+  \def\declaredencoding{\csname #1.enc\endcsname}%
+  %
+  % Supported encodings: names converted to tokens in order to be able
+  % to compare them with \ifx.
+  \def\ascii{\csname US-ASCII.enc\endcsname}%
+  \def\latnine{\csname ISO-8859-15.enc\endcsname}%
+  \def\latone{\csname ISO-8859-1.enc\endcsname}%
+  \def\lattwo{\csname ISO-8859-2.enc\endcsname}%
+  \def\utfeight{\csname UTF-8.enc\endcsname}%
+  %
+  \ifx \declaredencoding \ascii
+     \asciichardefs
+  %
+  \else \ifx \declaredencoding \lattwo
+     \setnonasciicharscatcode\active
+     \lattwochardefs
+  %
+  \else \ifx \declaredencoding \latone 
+     \setnonasciicharscatcode\active
+     \latonechardefs
+  %
+  \else \ifx \declaredencoding \latnine
+     \setnonasciicharscatcode\active
+     \latninechardefs
+  %
+  \else \ifx \declaredencoding \utfeight
+     \setnonasciicharscatcode\active
+     \utfeightchardefs
+  %
+  \else 
+    \message{Unknown document encoding #1, ignoring.}%
+  %
+  \fi % utfeight
+  \fi % latnine
+  \fi % latone
+  \fi % lattwo
+  \fi % ascii
+}
+
+% A message to be logged when using a character that isn't available
+% the default font encoding (OT1).
+% 
+\def\missingcharmsg#1{\message{Character missing in OT1 encoding: #1.}}
+
+% Take account of \c (plain) vs. \, (Texinfo) difference.
+\def\cedilla#1{\ifx\c\ptexc\c{#1}\else\,{#1}\fi}
+
+% First, make active non-ASCII characters in order for them to be
+% correctly categorized when TeX reads the replacement text of
+% macros containing the character definitions.
+\setnonasciicharscatcode\active
+%
+% Latin1 (ISO-8859-1) character definitions.
+\def\latonechardefs{%
+  \gdef^^a0{~} 
+  \gdef^^a1{\exclamdown}
+  \gdef^^a2{\missingcharmsg{CENT SIGN}} 
+  \gdef^^a3{{\pounds}}
+  \gdef^^a4{\missingcharmsg{CURRENCY SIGN}}
+  \gdef^^a5{\missingcharmsg{YEN SIGN}}
+  \gdef^^a6{\missingcharmsg{BROKEN BAR}} 
+  \gdef^^a7{\S}
+  \gdef^^a8{\"{}} 
+  \gdef^^a9{\copyright} 
+  \gdef^^aa{\ordf}
+  \gdef^^ab{\missingcharmsg{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}} 
+  \gdef^^ac{$\lnot$}
+  \gdef^^ad{\-} 
+  \gdef^^ae{\registeredsymbol} 
+  \gdef^^af{\={}}
+  %
+  \gdef^^b0{\textdegree}
+  \gdef^^b1{$\pm$}
+  \gdef^^b2{$^2$}
+  \gdef^^b3{$^3$}
+  \gdef^^b4{\'{}}
+  \gdef^^b5{$\mu$}
+  \gdef^^b6{\P}
+  %
+  \gdef^^b7{$^.$}
+  \gdef^^b8{\cedilla\ }
+  \gdef^^b9{$^1$}
+  \gdef^^ba{\ordm}
+  %
+  \gdef^^bb{\missingcharmsg{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}}
+  \gdef^^bc{$1\over4$}
+  \gdef^^bd{$1\over2$}
+  \gdef^^be{$3\over4$}
+  \gdef^^bf{\questiondown}
+  %
+  \gdef^^c0{\`A}
+  \gdef^^c1{\'A}
+  \gdef^^c2{\^A}
+  \gdef^^c3{\~A}
+  \gdef^^c4{\"A}
+  \gdef^^c5{\ringaccent A} 
+  \gdef^^c6{\AE}
+  \gdef^^c7{\cedilla C}
+  \gdef^^c8{\`E}
+  \gdef^^c9{\'E}
+  \gdef^^ca{\^E}
+  \gdef^^cb{\"E}
+  \gdef^^cc{\`I}
+  \gdef^^cd{\'I}
+  \gdef^^ce{\^I}
+  \gdef^^cf{\"I}
+  %
+  \gdef^^d0{\missingcharmsg{LATIN CAPITAL LETTER ETH}}
+  \gdef^^d1{\~N}
+  \gdef^^d2{\`O}
+  \gdef^^d3{\'O}
+  \gdef^^d4{\^O}
+  \gdef^^d5{\~O}
+  \gdef^^d6{\"O}
+  \gdef^^d7{$\times$}
+  \gdef^^d8{\O}
+  \gdef^^d9{\`U}
+  \gdef^^da{\'U}
+  \gdef^^db{\^U}
+  \gdef^^dc{\"U}
+  \gdef^^dd{\'Y}
+  \gdef^^de{\missingcharmsg{LATIN CAPITAL LETTER THORN}}
+  \gdef^^df{\ss}
+  %
+  \gdef^^e0{\`a}
+  \gdef^^e1{\'a}
+  \gdef^^e2{\^a}
+  \gdef^^e3{\~a}
+  \gdef^^e4{\"a}
+  \gdef^^e5{\ringaccent a}
+  \gdef^^e6{\ae}
+  \gdef^^e7{\cedilla c}
+  \gdef^^e8{\`e}
+  \gdef^^e9{\'e}
+  \gdef^^ea{\^e}
+  \gdef^^eb{\"e}
+  \gdef^^ec{\`{\dotless i}}
+  \gdef^^ed{\'{\dotless i}}
+  \gdef^^ee{\^{\dotless i}}
+  \gdef^^ef{\"{\dotless i}}
+  %
+  \gdef^^f0{\missingcharmsg{LATIN SMALL LETTER ETH}}
+  \gdef^^f1{\~n}
+  \gdef^^f2{\`o}
+  \gdef^^f3{\'o}
+  \gdef^^f4{\^o}
+  \gdef^^f5{\~o}
+  \gdef^^f6{\"o}
+  \gdef^^f7{$\div$}
+  \gdef^^f8{\o}
+  \gdef^^f9{\`u}
+  \gdef^^fa{\'u}
+  \gdef^^fb{\^u}
+  \gdef^^fc{\"u}
+  \gdef^^fd{\'y}
+  \gdef^^fe{\missingcharmsg{LATIN SMALL LETTER THORN}}
+  \gdef^^ff{\"y}
+}
+
+% Latin9 (ISO-8859-15) encoding character definitions.
+\def\latninechardefs{%
+  % Encoding is almost identical to Latin1.
+  \latonechardefs
+  %
+  \gdef^^a4{\euro}
+  \gdef^^a6{\v S}
+  \gdef^^a8{\v s}
+  \gdef^^b4{\v Z}
+  \gdef^^b8{\v z}
+  \gdef^^bc{\OE}
+  \gdef^^bd{\oe}
+  \gdef^^be{\"Y}
+}
+
+% Latin2 (ISO-8859-2) character definitions.
+\def\lattwochardefs{%
+  \gdef^^a0{~}
+  \gdef^^a1{\missingcharmsg{LATIN CAPITAL LETTER A WITH OGONEK}}
+  \gdef^^a2{\u{}}
+  \gdef^^a3{\L}
+  \gdef^^a4{\missingcharmsg{CURRENCY SIGN}}
+  \gdef^^a5{\v L}
+  \gdef^^a6{\'S}
+  \gdef^^a7{\S}
+  \gdef^^a8{\"{}}
+  \gdef^^a9{\v S}
+  \gdef^^aa{\cedilla S}
+  \gdef^^ab{\v T}
+  \gdef^^ac{\'Z}
+  \gdef^^ad{\-}
+  \gdef^^ae{\v Z}
+  \gdef^^af{\dotaccent Z}
+  %
+  \gdef^^b0{\textdegree}
+  \gdef^^b1{\missingcharmsg{LATIN SMALL LETTER A WITH OGONEK}}
+  \gdef^^b2{\missingcharmsg{OGONEK}}
+  \gdef^^b3{\l}
+  \gdef^^b4{\'{}}
+  \gdef^^b5{\v l}
+  \gdef^^b6{\'s}
+  \gdef^^b7{\v{}}
+  \gdef^^b8{\cedilla\ }
+  \gdef^^b9{\v s}
+  \gdef^^ba{\cedilla s}
+  \gdef^^bb{\v t}
+  \gdef^^bc{\'z}
+  \gdef^^bd{\H{}}
+  \gdef^^be{\v z}
+  \gdef^^bf{\dotaccent z}
+  %
+  \gdef^^c0{\'R}
+  \gdef^^c1{\'A}
+  \gdef^^c2{\^A}
+  \gdef^^c3{\u A}
+  \gdef^^c4{\"A}
+  \gdef^^c5{\'L}
+  \gdef^^c6{\'C}
+  \gdef^^c7{\cedilla C}
+  \gdef^^c8{\v C}
+  \gdef^^c9{\'E}
+  \gdef^^ca{\missingcharmsg{LATIN CAPITAL LETTER E WITH OGONEK}}
+  \gdef^^cb{\"E}
+  \gdef^^cc{\v E}
+  \gdef^^cd{\'I}
+  \gdef^^ce{\^I}
+  \gdef^^cf{\v D}
+  %
+  \gdef^^d0{\missingcharmsg{LATIN CAPITAL LETTER D WITH STROKE}}
+  \gdef^^d1{\'N}
+  \gdef^^d2{\v N}
+  \gdef^^d3{\'O}
+  \gdef^^d4{\^O}
+  \gdef^^d5{\H O}
+  \gdef^^d6{\"O}
+  \gdef^^d7{$\times$}
+  \gdef^^d8{\v R}
+  \gdef^^d9{\ringaccent U} 
+  \gdef^^da{\'U}
+  \gdef^^db{\H U}
+  \gdef^^dc{\"U}
+  \gdef^^dd{\'Y}
+  \gdef^^de{\cedilla T}
+  \gdef^^df{\ss}
+  %
+  \gdef^^e0{\'r}
+  \gdef^^e1{\'a}
+  \gdef^^e2{\^a}
+  \gdef^^e3{\u a}
+  \gdef^^e4{\"a}
+  \gdef^^e5{\'l}
+  \gdef^^e6{\'c}
+  \gdef^^e7{\cedilla c}
+  \gdef^^e8{\v c}
+  \gdef^^e9{\'e}
+  \gdef^^ea{\missingcharmsg{LATIN SMALL LETTER E WITH OGONEK}}
+  \gdef^^eb{\"e}
+  \gdef^^ec{\v e}
+  \gdef^^ed{\'\i}
+  \gdef^^ee{\^\i}
+  \gdef^^ef{\v d}
+  %
+  \gdef^^f0{\missingcharmsg{LATIN SMALL LETTER D WITH STROKE}}
+  \gdef^^f1{\'n}
+  \gdef^^f2{\v n}
+  \gdef^^f3{\'o}
+  \gdef^^f4{\^o}
+  \gdef^^f5{\H o}
+  \gdef^^f6{\"o}
+  \gdef^^f7{$\div$}
+  \gdef^^f8{\v r}
+  \gdef^^f9{\ringaccent u}
+  \gdef^^fa{\'u}
+  \gdef^^fb{\H u}
+  \gdef^^fc{\"u}
+  \gdef^^fd{\'y}
+  \gdef^^fe{\cedilla t}
+  \gdef^^ff{\dotaccent{}}
+}
+
+% UTF-8 character definitions.
+% 
+% This code to support UTF-8 is based on LaTeX's utf8.def, with some
+% changes for Texinfo conventions.  It is included here under the GPL by
+% permission from Frank Mittelbach and the LaTeX team.
+% 
+\newcount\countUTFx
+\newcount\countUTFy
+\newcount\countUTFz
+
+\gdef\UTFviiiTwoOctets#1#2{\expandafter
+   \UTFviiiDefined\csname u8:#1\string #2\endcsname}
+%
+\gdef\UTFviiiThreeOctets#1#2#3{\expandafter
+   \UTFviiiDefined\csname u8:#1\string #2\string #3\endcsname}
+%
+\gdef\UTFviiiFourOctets#1#2#3#4{\expandafter
+   \UTFviiiDefined\csname u8:#1\string #2\string #3\string #4\endcsname}
+
+\gdef\UTFviiiDefined#1{%
+  \ifx #1\relax
+    \message{\linenumber Unicode char \string #1 not defined for Texinfo}%
+  \else
+    \expandafter #1%
+  \fi
+}
+
+\begingroup
+  \catcode`\~13
+  \catcode`\"12
+
+  \def\UTFviiiLoop{%
+    \global\catcode\countUTFx\active
+    \uccode`\~\countUTFx
+    \uppercase\expandafter{\UTFviiiTmp}%
+    \advance\countUTFx by 1
+    \ifnum\countUTFx < \countUTFy
+      \expandafter\UTFviiiLoop
+    \fi}
+
+  \countUTFx = "C2
+  \countUTFy = "E0
+  \def\UTFviiiTmp{%
+    \xdef~{\noexpand\UTFviiiTwoOctets\string~}}
+  \UTFviiiLoop
+
+  \countUTFx = "E0
+  \countUTFy = "F0
+  \def\UTFviiiTmp{%
+    \xdef~{\noexpand\UTFviiiThreeOctets\string~}}
+  \UTFviiiLoop
+
+  \countUTFx = "F0
+  \countUTFy = "F4
+  \def\UTFviiiTmp{%
+    \xdef~{\noexpand\UTFviiiFourOctets\string~}}
+  \UTFviiiLoop
+\endgroup
+
+\begingroup
+  \catcode`\"=12
+  \catcode`\<=12
+  \catcode`\.=12
+  \catcode`\,=12
+  \catcode`\;=12
+  \catcode`\!=12
+  \catcode`\~=13
+
+  \gdef\DeclareUnicodeCharacter#1#2{%
+    \countUTFz = "#1\relax
+    \wlog{\space\space defining Unicode char U+#1 (decimal \the\countUTFz)}%
+    \begingroup
+      \parseXMLCharref
+      \def\UTFviiiTwoOctets##1##2{%
+        \csname u8:##1\string ##2\endcsname}%
+      \def\UTFviiiThreeOctets##1##2##3{%
+        \csname u8:##1\string ##2\string ##3\endcsname}%
+      \def\UTFviiiFourOctets##1##2##3##4{%
+        \csname u8:##1\string ##2\string ##3\string ##4\endcsname}%
+      \expandafter\expandafter\expandafter\expandafter
+       \expandafter\expandafter\expandafter
+       \gdef\UTFviiiTmp{#2}%
+    \endgroup}
+
+  \gdef\parseXMLCharref{%
+    \ifnum\countUTFz < "A0\relax
+      \errhelp = \EMsimple
+      \errmessage{Cannot define Unicode char value < 00A0}%
+    \else\ifnum\countUTFz < "800\relax
+      \parseUTFviiiA,%
+      \parseUTFviiiB C\UTFviiiTwoOctets.,%
+    \else\ifnum\countUTFz < "10000\relax
+      \parseUTFviiiA;%
+      \parseUTFviiiA,%
+      \parseUTFviiiB E\UTFviiiThreeOctets.{,;}%
+    \else
+      \parseUTFviiiA;%
+      \parseUTFviiiA,%
+      \parseUTFviiiA!%
+      \parseUTFviiiB F\UTFviiiFourOctets.{!,;}%
+    \fi\fi\fi
+  }
+
+  \gdef\parseUTFviiiA#1{%
+    \countUTFx = \countUTFz
+    \divide\countUTFz by 64
+    \countUTFy = \countUTFz
+    \multiply\countUTFz by 64
+    \advance\countUTFx by -\countUTFz
+    \advance\countUTFx by 128
+    \uccode `#1\countUTFx
+    \countUTFz = \countUTFy}
+
+  \gdef\parseUTFviiiB#1#2#3#4{%
+    \advance\countUTFz by "#10\relax
+    \uccode `#3\countUTFz
+    \uppercase{\gdef\UTFviiiTmp{#2#3#4}}}
+\endgroup
+
+\def\utfeightchardefs{%
+  \DeclareUnicodeCharacter{00A0}{\tie}
+  \DeclareUnicodeCharacter{00A1}{\exclamdown}
+  \DeclareUnicodeCharacter{00A3}{\pounds}
+  \DeclareUnicodeCharacter{00A8}{\"{ }}
+  \DeclareUnicodeCharacter{00A9}{\copyright}
+  \DeclareUnicodeCharacter{00AA}{\ordf}
+  \DeclareUnicodeCharacter{00AB}{\guillemetleft}
+  \DeclareUnicodeCharacter{00AD}{\-}
+  \DeclareUnicodeCharacter{00AE}{\registeredsymbol}
+  \DeclareUnicodeCharacter{00AF}{\={ }}
+
+  \DeclareUnicodeCharacter{00B0}{\ringaccent{ }}
+  \DeclareUnicodeCharacter{00B4}{\'{ }}
+  \DeclareUnicodeCharacter{00B8}{\cedilla{ }}
+  \DeclareUnicodeCharacter{00BA}{\ordm}
+  \DeclareUnicodeCharacter{00BB}{\guillemetright}
+  \DeclareUnicodeCharacter{00BF}{\questiondown}
+
+  \DeclareUnicodeCharacter{00C0}{\`A}
+  \DeclareUnicodeCharacter{00C1}{\'A}
+  \DeclareUnicodeCharacter{00C2}{\^A}
+  \DeclareUnicodeCharacter{00C3}{\~A}
+  \DeclareUnicodeCharacter{00C4}{\"A}
+  \DeclareUnicodeCharacter{00C5}{\AA}
+  \DeclareUnicodeCharacter{00C6}{\AE}
+  \DeclareUnicodeCharacter{00C7}{\cedilla{C}}
+  \DeclareUnicodeCharacter{00C8}{\`E}
+  \DeclareUnicodeCharacter{00C9}{\'E}
+  \DeclareUnicodeCharacter{00CA}{\^E}
+  \DeclareUnicodeCharacter{00CB}{\"E}
+  \DeclareUnicodeCharacter{00CC}{\`I}
+  \DeclareUnicodeCharacter{00CD}{\'I}
+  \DeclareUnicodeCharacter{00CE}{\^I}
+  \DeclareUnicodeCharacter{00CF}{\"I}
+
+  \DeclareUnicodeCharacter{00D1}{\~N}
+  \DeclareUnicodeCharacter{00D2}{\`O}
+  \DeclareUnicodeCharacter{00D3}{\'O}
+  \DeclareUnicodeCharacter{00D4}{\^O}
+  \DeclareUnicodeCharacter{00D5}{\~O}
+  \DeclareUnicodeCharacter{00D6}{\"O}
+  \DeclareUnicodeCharacter{00D8}{\O}
+  \DeclareUnicodeCharacter{00D9}{\`U}
+  \DeclareUnicodeCharacter{00DA}{\'U}
+  \DeclareUnicodeCharacter{00DB}{\^U}
+  \DeclareUnicodeCharacter{00DC}{\"U}
+  \DeclareUnicodeCharacter{00DD}{\'Y}
+  \DeclareUnicodeCharacter{00DF}{\ss}
+
+  \DeclareUnicodeCharacter{00E0}{\`a}
+  \DeclareUnicodeCharacter{00E1}{\'a}
+  \DeclareUnicodeCharacter{00E2}{\^a}
+  \DeclareUnicodeCharacter{00E3}{\~a}
+  \DeclareUnicodeCharacter{00E4}{\"a}
+  \DeclareUnicodeCharacter{00E5}{\aa}
+  \DeclareUnicodeCharacter{00E6}{\ae}
+  \DeclareUnicodeCharacter{00E7}{\cedilla{c}}
+  \DeclareUnicodeCharacter{00E8}{\`e}
+  \DeclareUnicodeCharacter{00E9}{\'e}
+  \DeclareUnicodeCharacter{00EA}{\^e}
+  \DeclareUnicodeCharacter{00EB}{\"e}
+  \DeclareUnicodeCharacter{00EC}{\`{\dotless{i}}}
+  \DeclareUnicodeCharacter{00ED}{\'{\dotless{i}}}
+  \DeclareUnicodeCharacter{00EE}{\^{\dotless{i}}}
+  \DeclareUnicodeCharacter{00EF}{\"{\dotless{i}}}
+
+  \DeclareUnicodeCharacter{00F1}{\~n}
+  \DeclareUnicodeCharacter{00F2}{\`o}
+  \DeclareUnicodeCharacter{00F3}{\'o}
+  \DeclareUnicodeCharacter{00F4}{\^o}
+  \DeclareUnicodeCharacter{00F5}{\~o}
+  \DeclareUnicodeCharacter{00F6}{\"o}
+  \DeclareUnicodeCharacter{00F8}{\o}
+  \DeclareUnicodeCharacter{00F9}{\`u}
+  \DeclareUnicodeCharacter{00FA}{\'u}
+  \DeclareUnicodeCharacter{00FB}{\^u}
+  \DeclareUnicodeCharacter{00FC}{\"u}
+  \DeclareUnicodeCharacter{00FD}{\'y}
+  \DeclareUnicodeCharacter{00FF}{\"y}
+
+  \DeclareUnicodeCharacter{0100}{\=A}
+  \DeclareUnicodeCharacter{0101}{\=a}
+  \DeclareUnicodeCharacter{0102}{\u{A}}
+  \DeclareUnicodeCharacter{0103}{\u{a}}
+  \DeclareUnicodeCharacter{0106}{\'C}
+  \DeclareUnicodeCharacter{0107}{\'c}
+  \DeclareUnicodeCharacter{0108}{\^C}
+  \DeclareUnicodeCharacter{0109}{\^c}
+  \DeclareUnicodeCharacter{010A}{\dotaccent{C}}
+  \DeclareUnicodeCharacter{010B}{\dotaccent{c}}
+  \DeclareUnicodeCharacter{010C}{\v{C}}
+  \DeclareUnicodeCharacter{010D}{\v{c}}
+  \DeclareUnicodeCharacter{010E}{\v{D}}
+
+  \DeclareUnicodeCharacter{0112}{\=E}
+  \DeclareUnicodeCharacter{0113}{\=e}
+  \DeclareUnicodeCharacter{0114}{\u{E}}
+  \DeclareUnicodeCharacter{0115}{\u{e}}
+  \DeclareUnicodeCharacter{0116}{\dotaccent{E}}
+  \DeclareUnicodeCharacter{0117}{\dotaccent{e}}
+  \DeclareUnicodeCharacter{011A}{\v{E}}
+  \DeclareUnicodeCharacter{011B}{\v{e}}
+  \DeclareUnicodeCharacter{011C}{\^G}
+  \DeclareUnicodeCharacter{011D}{\^g}
+  \DeclareUnicodeCharacter{011E}{\u{G}}
+  \DeclareUnicodeCharacter{011F}{\u{g}}
+
+  \DeclareUnicodeCharacter{0120}{\dotaccent{G}}
+  \DeclareUnicodeCharacter{0121}{\dotaccent{g}}
+  \DeclareUnicodeCharacter{0124}{\^H}
+  \DeclareUnicodeCharacter{0125}{\^h}
+  \DeclareUnicodeCharacter{0128}{\~I}
+  \DeclareUnicodeCharacter{0129}{\~{\dotless{i}}}
+  \DeclareUnicodeCharacter{012A}{\=I}
+  \DeclareUnicodeCharacter{012B}{\={\dotless{i}}}
+  \DeclareUnicodeCharacter{012C}{\u{I}}
+  \DeclareUnicodeCharacter{012D}{\u{\dotless{i}}}
+
+  \DeclareUnicodeCharacter{0130}{\dotaccent{I}}
+  \DeclareUnicodeCharacter{0131}{\dotless{i}}
+  \DeclareUnicodeCharacter{0132}{IJ}
+  \DeclareUnicodeCharacter{0133}{ij}
+  \DeclareUnicodeCharacter{0134}{\^J}
+  \DeclareUnicodeCharacter{0135}{\^{\dotless{j}}}
+  \DeclareUnicodeCharacter{0139}{\'L}
+  \DeclareUnicodeCharacter{013A}{\'l}
+
+  \DeclareUnicodeCharacter{0141}{\L}
+  \DeclareUnicodeCharacter{0142}{\l}
+  \DeclareUnicodeCharacter{0143}{\'N}
+  \DeclareUnicodeCharacter{0144}{\'n}
+  \DeclareUnicodeCharacter{0147}{\v{N}}
+  \DeclareUnicodeCharacter{0148}{\v{n}}
+  \DeclareUnicodeCharacter{014C}{\=O}
+  \DeclareUnicodeCharacter{014D}{\=o}
+  \DeclareUnicodeCharacter{014E}{\u{O}}
+  \DeclareUnicodeCharacter{014F}{\u{o}}
+
+  \DeclareUnicodeCharacter{0150}{\H{O}}
+  \DeclareUnicodeCharacter{0151}{\H{o}}
+  \DeclareUnicodeCharacter{0152}{\OE}
+  \DeclareUnicodeCharacter{0153}{\oe}
+  \DeclareUnicodeCharacter{0154}{\'R}
+  \DeclareUnicodeCharacter{0155}{\'r}
+  \DeclareUnicodeCharacter{0158}{\v{R}}
+  \DeclareUnicodeCharacter{0159}{\v{r}}
+  \DeclareUnicodeCharacter{015A}{\'S}
+  \DeclareUnicodeCharacter{015B}{\'s}
+  \DeclareUnicodeCharacter{015C}{\^S}
+  \DeclareUnicodeCharacter{015D}{\^s}
+  \DeclareUnicodeCharacter{015E}{\cedilla{S}}
+  \DeclareUnicodeCharacter{015F}{\cedilla{s}}
+
+  \DeclareUnicodeCharacter{0160}{\v{S}}
+  \DeclareUnicodeCharacter{0161}{\v{s}}
+  \DeclareUnicodeCharacter{0162}{\cedilla{t}}
+  \DeclareUnicodeCharacter{0163}{\cedilla{T}}
+  \DeclareUnicodeCharacter{0164}{\v{T}}
+
+  \DeclareUnicodeCharacter{0168}{\~U}
+  \DeclareUnicodeCharacter{0169}{\~u}
+  \DeclareUnicodeCharacter{016A}{\=U}
+  \DeclareUnicodeCharacter{016B}{\=u}
+  \DeclareUnicodeCharacter{016C}{\u{U}}
+  \DeclareUnicodeCharacter{016D}{\u{u}}
+  \DeclareUnicodeCharacter{016E}{\ringaccent{U}}
+  \DeclareUnicodeCharacter{016F}{\ringaccent{u}}
+
+  \DeclareUnicodeCharacter{0170}{\H{U}}
+  \DeclareUnicodeCharacter{0171}{\H{u}}
+  \DeclareUnicodeCharacter{0174}{\^W}
+  \DeclareUnicodeCharacter{0175}{\^w}
+  \DeclareUnicodeCharacter{0176}{\^Y}
+  \DeclareUnicodeCharacter{0177}{\^y}
+  \DeclareUnicodeCharacter{0178}{\"Y}
+  \DeclareUnicodeCharacter{0179}{\'Z}
+  \DeclareUnicodeCharacter{017A}{\'z}
+  \DeclareUnicodeCharacter{017B}{\dotaccent{Z}}
+  \DeclareUnicodeCharacter{017C}{\dotaccent{z}}
+  \DeclareUnicodeCharacter{017D}{\v{Z}}
+  \DeclareUnicodeCharacter{017E}{\v{z}}
+
+  \DeclareUnicodeCharacter{01C4}{D\v{Z}}
+  \DeclareUnicodeCharacter{01C5}{D\v{z}}
+  \DeclareUnicodeCharacter{01C6}{d\v{z}}
+  \DeclareUnicodeCharacter{01C7}{LJ}
+  \DeclareUnicodeCharacter{01C8}{Lj}
+  \DeclareUnicodeCharacter{01C9}{lj}
+  \DeclareUnicodeCharacter{01CA}{NJ}
+  \DeclareUnicodeCharacter{01CB}{Nj}
+  \DeclareUnicodeCharacter{01CC}{nj}
+  \DeclareUnicodeCharacter{01CD}{\v{A}}
+  \DeclareUnicodeCharacter{01CE}{\v{a}}
+  \DeclareUnicodeCharacter{01CF}{\v{I}}
+
+  \DeclareUnicodeCharacter{01D0}{\v{\dotless{i}}}
+  \DeclareUnicodeCharacter{01D1}{\v{O}}
+  \DeclareUnicodeCharacter{01D2}{\v{o}}
+  \DeclareUnicodeCharacter{01D3}{\v{U}}
+  \DeclareUnicodeCharacter{01D4}{\v{u}}
+
+  \DeclareUnicodeCharacter{01E2}{\={\AE}}
+  \DeclareUnicodeCharacter{01E3}{\={\ae}}
+  \DeclareUnicodeCharacter{01E6}{\v{G}}
+  \DeclareUnicodeCharacter{01E7}{\v{g}}
+  \DeclareUnicodeCharacter{01E8}{\v{K}}
+  \DeclareUnicodeCharacter{01E9}{\v{k}}
+
+  \DeclareUnicodeCharacter{01F0}{\v{\dotless{j}}}
+  \DeclareUnicodeCharacter{01F1}{DZ}
+  \DeclareUnicodeCharacter{01F2}{Dz}
+  \DeclareUnicodeCharacter{01F3}{dz}
+  \DeclareUnicodeCharacter{01F4}{\'G}
+  \DeclareUnicodeCharacter{01F5}{\'g}
+  \DeclareUnicodeCharacter{01F8}{\`N}
+  \DeclareUnicodeCharacter{01F9}{\`n}
+  \DeclareUnicodeCharacter{01FC}{\'{\AE}}
+  \DeclareUnicodeCharacter{01FD}{\'{\ae}}
+  \DeclareUnicodeCharacter{01FE}{\'{\O}}
+  \DeclareUnicodeCharacter{01FF}{\'{\o}}
+
+  \DeclareUnicodeCharacter{021E}{\v{H}}
+  \DeclareUnicodeCharacter{021F}{\v{h}}
+
+  \DeclareUnicodeCharacter{0226}{\dotaccent{A}}
+  \DeclareUnicodeCharacter{0227}{\dotaccent{a}}
+  \DeclareUnicodeCharacter{0228}{\cedilla{E}}
+  \DeclareUnicodeCharacter{0229}{\cedilla{e}}
+  \DeclareUnicodeCharacter{022E}{\dotaccent{O}}
+  \DeclareUnicodeCharacter{022F}{\dotaccent{o}}
+
+  \DeclareUnicodeCharacter{0232}{\=Y}
+  \DeclareUnicodeCharacter{0233}{\=y}
+  \DeclareUnicodeCharacter{0237}{\dotless{j}}
+
+  \DeclareUnicodeCharacter{1E02}{\dotaccent{B}}
+  \DeclareUnicodeCharacter{1E03}{\dotaccent{b}}
+  \DeclareUnicodeCharacter{1E04}{\udotaccent{B}}
+  \DeclareUnicodeCharacter{1E05}{\udotaccent{b}}
+  \DeclareUnicodeCharacter{1E06}{\ubaraccent{B}}
+  \DeclareUnicodeCharacter{1E07}{\ubaraccent{b}}
+  \DeclareUnicodeCharacter{1E0A}{\dotaccent{D}}
+  \DeclareUnicodeCharacter{1E0B}{\dotaccent{d}}
+  \DeclareUnicodeCharacter{1E0C}{\udotaccent{D}}
+  \DeclareUnicodeCharacter{1E0D}{\udotaccent{d}}
+  \DeclareUnicodeCharacter{1E0E}{\ubaraccent{D}}
+  \DeclareUnicodeCharacter{1E0F}{\ubaraccent{d}}
+
+  \DeclareUnicodeCharacter{1E1E}{\dotaccent{F}}
+  \DeclareUnicodeCharacter{1E1F}{\dotaccent{f}}
+
+  \DeclareUnicodeCharacter{1E20}{\=G}
+  \DeclareUnicodeCharacter{1E21}{\=g}
+  \DeclareUnicodeCharacter{1E22}{\dotaccent{H}}
+  \DeclareUnicodeCharacter{1E23}{\dotaccent{h}}
+  \DeclareUnicodeCharacter{1E24}{\udotaccent{H}}
+  \DeclareUnicodeCharacter{1E25}{\udotaccent{h}}
+  \DeclareUnicodeCharacter{1E26}{\"H}
+  \DeclareUnicodeCharacter{1E27}{\"h}
+
+  \DeclareUnicodeCharacter{1E30}{\'K}
+  \DeclareUnicodeCharacter{1E31}{\'k}
+  \DeclareUnicodeCharacter{1E32}{\udotaccent{K}}
+  \DeclareUnicodeCharacter{1E33}{\udotaccent{k}}
+  \DeclareUnicodeCharacter{1E34}{\ubaraccent{K}}
+  \DeclareUnicodeCharacter{1E35}{\ubaraccent{k}}
+  \DeclareUnicodeCharacter{1E36}{\udotaccent{L}}
+  \DeclareUnicodeCharacter{1E37}{\udotaccent{l}}
+  \DeclareUnicodeCharacter{1E3A}{\ubaraccent{L}}
+  \DeclareUnicodeCharacter{1E3B}{\ubaraccent{l}}
+  \DeclareUnicodeCharacter{1E3E}{\'M}
+  \DeclareUnicodeCharacter{1E3F}{\'m}
+
+  \DeclareUnicodeCharacter{1E40}{\dotaccent{M}}
+  \DeclareUnicodeCharacter{1E41}{\dotaccent{m}}
+  \DeclareUnicodeCharacter{1E42}{\udotaccent{M}}
+  \DeclareUnicodeCharacter{1E43}{\udotaccent{m}}
+  \DeclareUnicodeCharacter{1E44}{\dotaccent{N}}
+  \DeclareUnicodeCharacter{1E45}{\dotaccent{n}}
+  \DeclareUnicodeCharacter{1E46}{\udotaccent{N}}
+  \DeclareUnicodeCharacter{1E47}{\udotaccent{n}}
+  \DeclareUnicodeCharacter{1E48}{\ubaraccent{N}}
+  \DeclareUnicodeCharacter{1E49}{\ubaraccent{n}}
+
+  \DeclareUnicodeCharacter{1E54}{\'P}
+  \DeclareUnicodeCharacter{1E55}{\'p}
+  \DeclareUnicodeCharacter{1E56}{\dotaccent{P}}
+  \DeclareUnicodeCharacter{1E57}{\dotaccent{p}}
+  \DeclareUnicodeCharacter{1E58}{\dotaccent{R}}
+  \DeclareUnicodeCharacter{1E59}{\dotaccent{r}}
+  \DeclareUnicodeCharacter{1E5A}{\udotaccent{R}}
+  \DeclareUnicodeCharacter{1E5B}{\udotaccent{r}}
+  \DeclareUnicodeCharacter{1E5E}{\ubaraccent{R}}
+  \DeclareUnicodeCharacter{1E5F}{\ubaraccent{r}}
+
+  \DeclareUnicodeCharacter{1E60}{\dotaccent{S}}
+  \DeclareUnicodeCharacter{1E61}{\dotaccent{s}}
+  \DeclareUnicodeCharacter{1E62}{\udotaccent{S}}
+  \DeclareUnicodeCharacter{1E63}{\udotaccent{s}}
+  \DeclareUnicodeCharacter{1E6A}{\dotaccent{T}}
+  \DeclareUnicodeCharacter{1E6B}{\dotaccent{t}}
+  \DeclareUnicodeCharacter{1E6C}{\udotaccent{T}}
+  \DeclareUnicodeCharacter{1E6D}{\udotaccent{t}}
+  \DeclareUnicodeCharacter{1E6E}{\ubaraccent{T}}
+  \DeclareUnicodeCharacter{1E6F}{\ubaraccent{t}}
+
+  \DeclareUnicodeCharacter{1E7C}{\~V}
+  \DeclareUnicodeCharacter{1E7D}{\~v}
+  \DeclareUnicodeCharacter{1E7E}{\udotaccent{V}}
+  \DeclareUnicodeCharacter{1E7F}{\udotaccent{v}}
+
+  \DeclareUnicodeCharacter{1E80}{\`W}
+  \DeclareUnicodeCharacter{1E81}{\`w}
+  \DeclareUnicodeCharacter{1E82}{\'W}
+  \DeclareUnicodeCharacter{1E83}{\'w}
+  \DeclareUnicodeCharacter{1E84}{\"W}
+  \DeclareUnicodeCharacter{1E85}{\"w}
+  \DeclareUnicodeCharacter{1E86}{\dotaccent{W}}
+  \DeclareUnicodeCharacter{1E87}{\dotaccent{w}}
+  \DeclareUnicodeCharacter{1E88}{\udotaccent{W}}
+  \DeclareUnicodeCharacter{1E89}{\udotaccent{w}}
+  \DeclareUnicodeCharacter{1E8A}{\dotaccent{X}}
+  \DeclareUnicodeCharacter{1E8B}{\dotaccent{x}}
+  \DeclareUnicodeCharacter{1E8C}{\"X}
+  \DeclareUnicodeCharacter{1E8D}{\"x}
+  \DeclareUnicodeCharacter{1E8E}{\dotaccent{Y}}
+  \DeclareUnicodeCharacter{1E8F}{\dotaccent{y}}
+
+  \DeclareUnicodeCharacter{1E90}{\^Z}
+  \DeclareUnicodeCharacter{1E91}{\^z}
+  \DeclareUnicodeCharacter{1E92}{\udotaccent{Z}}
+  \DeclareUnicodeCharacter{1E93}{\udotaccent{z}}
+  \DeclareUnicodeCharacter{1E94}{\ubaraccent{Z}}
+  \DeclareUnicodeCharacter{1E95}{\ubaraccent{z}}
+  \DeclareUnicodeCharacter{1E96}{\ubaraccent{h}}
+  \DeclareUnicodeCharacter{1E97}{\"t}
+  \DeclareUnicodeCharacter{1E98}{\ringaccent{w}}
+  \DeclareUnicodeCharacter{1E99}{\ringaccent{y}}
+
+  \DeclareUnicodeCharacter{1EA0}{\udotaccent{A}}
+  \DeclareUnicodeCharacter{1EA1}{\udotaccent{a}}
+
+  \DeclareUnicodeCharacter{1EB8}{\udotaccent{E}}
+  \DeclareUnicodeCharacter{1EB9}{\udotaccent{e}}
+  \DeclareUnicodeCharacter{1EBC}{\~E}
+  \DeclareUnicodeCharacter{1EBD}{\~e}
+
+  \DeclareUnicodeCharacter{1ECA}{\udotaccent{I}}
+  \DeclareUnicodeCharacter{1ECB}{\udotaccent{i}}
+  \DeclareUnicodeCharacter{1ECC}{\udotaccent{O}}
+  \DeclareUnicodeCharacter{1ECD}{\udotaccent{o}}
+
+  \DeclareUnicodeCharacter{1EE4}{\udotaccent{U}}
+  \DeclareUnicodeCharacter{1EE5}{\udotaccent{u}}
+
+  \DeclareUnicodeCharacter{1EF2}{\`Y}
+  \DeclareUnicodeCharacter{1EF3}{\`y}
+  \DeclareUnicodeCharacter{1EF4}{\udotaccent{Y}}
+
+  \DeclareUnicodeCharacter{1EF8}{\~Y}
+  \DeclareUnicodeCharacter{1EF9}{\~y}
+
+  \DeclareUnicodeCharacter{2013}{--}
+  \DeclareUnicodeCharacter{2014}{---}
+  \DeclareUnicodeCharacter{2018}{\quoteleft}
+  \DeclareUnicodeCharacter{2019}{\quoteright}
+  \DeclareUnicodeCharacter{201A}{\quotesinglbase}
+  \DeclareUnicodeCharacter{201C}{\quotedblleft}
+  \DeclareUnicodeCharacter{201D}{\quotedblright}
+  \DeclareUnicodeCharacter{201E}{\quotedblbase}
+  \DeclareUnicodeCharacter{2022}{\bullet}
+  \DeclareUnicodeCharacter{2026}{\dots}
+  \DeclareUnicodeCharacter{2039}{\guilsinglleft}
+  \DeclareUnicodeCharacter{203A}{\guilsinglright}
+  \DeclareUnicodeCharacter{20AC}{\euro}
+
+  \DeclareUnicodeCharacter{2192}{\expansion}
+  \DeclareUnicodeCharacter{21D2}{\result}
+
+  \DeclareUnicodeCharacter{2212}{\minus}
+  \DeclareUnicodeCharacter{2217}{\point}
+  \DeclareUnicodeCharacter{2261}{\equiv}
+}% end of \utfeightchardefs
+
+
+% US-ASCII character definitions.
+\def\asciichardefs{% nothing need be done
+   \relax
+}
+
+% Make non-ASCII characters printable again for compatibility with
+% existing Texinfo documents that may use them, even without declaring a
+% document encoding.
+%
+\setnonasciicharscatcode \other
+
+
+\message{formatting,}
+
+\newdimen\defaultparindent \defaultparindent = 15pt
+
+\chapheadingskip = 15pt plus 4pt minus 2pt
+\secheadingskip = 12pt plus 3pt minus 2pt
+\subsecheadingskip = 9pt plus 2pt minus 2pt
+
+% Prevent underfull vbox error messages.
+\vbadness = 10000
+
+% Don't be so finicky about underfull hboxes, either.
+\hbadness = 2000
+
+% Following George Bush, get rid of widows and orphans.
+\widowpenalty=10000
+\clubpenalty=10000
+
+% Use TeX 3.0's \emergencystretch to help line breaking, but if we're
+% using an old version of TeX, don't do anything.  We want the amount of
+% stretch added to depend on the line length, hence the dependence on
+% \hsize.  We call this whenever the paper size is set.
+%
+\def\setemergencystretch{%
+  \ifx\emergencystretch\thisisundefined
+    % Allow us to assign to \emergencystretch anyway.
+    \def\emergencystretch{\dimen0}%
+  \else
+    \emergencystretch = .15\hsize
+  \fi
+}
+
+% Parameters in order: 1) textheight; 2) textwidth;
+% 3) voffset; 4) hoffset; 5) binding offset; 6) topskip;
+% 7) physical page height; 8) physical page width.
+%
+% We also call \setleading{\textleading}, so the caller should define
+% \textleading.  The caller should also set \parskip.
+%
+\def\internalpagesizes#1#2#3#4#5#6#7#8{%
+  \voffset = #3\relax
+  \topskip = #6\relax
+  \splittopskip = \topskip
+  %
+  \vsize = #1\relax
+  \advance\vsize by \topskip
+  \outervsize = \vsize
+  \advance\outervsize by 2\topandbottommargin
+  \pageheight = \vsize
+  %
+  \hsize = #2\relax
+  \outerhsize = \hsize
+  \advance\outerhsize by 0.5in
+  \pagewidth = \hsize
+  %
+  \normaloffset = #4\relax
+  \bindingoffset = #5\relax
+  %
+  \ifpdf
+    \pdfpageheight #7\relax
+    \pdfpagewidth #8\relax
+    % if we don't reset these, they will remain at "1 true in" of
+    % whatever layout pdftex was dumped with.
+    \pdfhorigin = 1 true in
+    \pdfvorigin = 1 true in
+  \fi
+  %
+  \setleading{\textleading}
+  %
+  \parindent = \defaultparindent
+  \setemergencystretch
+}
+
+% @letterpaper (the default).
+\def\letterpaper{{\globaldefs = 1
+  \parskip = 3pt plus 2pt minus 1pt
+  \textleading = 13.2pt
+  %
+  % If page is nothing but text, make it come out even.
+  \internalpagesizes{607.2pt}{6in}% that's 46 lines
+                    {\voffset}{.25in}%
+                    {\bindingoffset}{36pt}%
+                    {11in}{8.5in}%
+}}
+
+% Use @smallbook to reset parameters for 7x9.25 trim size.
+\def\smallbook{{\globaldefs = 1
+  \parskip = 2pt plus 1pt
+  \textleading = 12pt
+  %
+  \internalpagesizes{7.5in}{5in}%
+                    {-.2in}{0in}%
+                    {\bindingoffset}{16pt}%
+                    {9.25in}{7in}%
+  %
+  \lispnarrowing = 0.3in
+  \tolerance = 700
+  \hfuzz = 1pt
+  \contentsrightmargin = 0pt
+  \defbodyindent = .5cm
+}}
+
+% Use @smallerbook to reset parameters for 6x9 trim size.
+% (Just testing, parameters still in flux.)
+\def\smallerbook{{\globaldefs = 1
+  \parskip = 1.5pt plus 1pt
+  \textleading = 12pt
+  %
+  \internalpagesizes{7.4in}{4.8in}%
+                    {-.2in}{-.4in}%
+                    {0pt}{14pt}%
+                    {9in}{6in}%
+  %
+  \lispnarrowing = 0.25in
+  \tolerance = 700
+  \hfuzz = 1pt
+  \contentsrightmargin = 0pt
+  \defbodyindent = .4cm
+}}
+
+% Use @afourpaper to print on European A4 paper.
+\def\afourpaper{{\globaldefs = 1
+  \parskip = 3pt plus 2pt minus 1pt
+  \textleading = 13.2pt
+  %
+  % Double-side printing via postscript on Laserjet 4050
+  % prints double-sided nicely when \bindingoffset=10mm and \hoffset=-6mm.
+  % To change the settings for a different printer or situation, adjust
+  % \normaloffset until the front-side and back-side texts align.  Then
+  % do the same for \bindingoffset.  You can set these for testing in
+  % your texinfo source file like this:
+  % @tex
+  % \global\normaloffset = -6mm
+  % \global\bindingoffset = 10mm
+  % @end tex
+  \internalpagesizes{673.2pt}{160mm}% that's 51 lines
+                    {\voffset}{\hoffset}%
+                    {\bindingoffset}{44pt}%
+                    {297mm}{210mm}%
+  %
+  \tolerance = 700
+  \hfuzz = 1pt
+  \contentsrightmargin = 0pt
+  \defbodyindent = 5mm
+}}
+
+% Use @afivepaper to print on European A5 paper.
+% From romildo@urano.iceb.ufop.br, 2 July 2000.
+% He also recommends making @example and @lisp be small.
+\def\afivepaper{{\globaldefs = 1
+  \parskip = 2pt plus 1pt minus 0.1pt
+  \textleading = 12.5pt
+  %
+  \internalpagesizes{160mm}{120mm}%
+                    {\voffset}{\hoffset}%
+                    {\bindingoffset}{8pt}%
+                    {210mm}{148mm}%
+  %
+  \lispnarrowing = 0.2in
+  \tolerance = 800
+  \hfuzz = 1.2pt
+  \contentsrightmargin = 0pt
+  \defbodyindent = 2mm
+  \tableindent = 12mm
+}}
+
+% A specific text layout, 24x15cm overall, intended for A4 paper.
+\def\afourlatex{{\globaldefs = 1
+  \afourpaper
+  \internalpagesizes{237mm}{150mm}%
+                    {\voffset}{4.6mm}%
+                    {\bindingoffset}{7mm}%
+                    {297mm}{210mm}%
+  %
+  % Must explicitly reset to 0 because we call \afourpaper.
+  \globaldefs = 0
+}}
+
+% Use @afourwide to print on A4 paper in landscape format.
+\def\afourwide{{\globaldefs = 1
+  \afourpaper
+  \internalpagesizes{241mm}{165mm}%
+                    {\voffset}{-2.95mm}%
+                    {\bindingoffset}{7mm}%
+                    {297mm}{210mm}%
+  \globaldefs = 0
+}}
+
+% @pagesizes TEXTHEIGHT[,TEXTWIDTH]
+% Perhaps we should allow setting the margins, \topskip, \parskip,
+% and/or leading, also. Or perhaps we should compute them somehow.
+%
+\parseargdef\pagesizes{\pagesizesyyy #1,,\finish}
+\def\pagesizesyyy#1,#2,#3\finish{{%
+  \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \hsize=#2\relax \fi
+  \globaldefs = 1
+  %
+  \parskip = 3pt plus 2pt minus 1pt
+  \setleading{\textleading}%
+  %
+  \dimen0 = #1\relax
+  \advance\dimen0 by \voffset
+  %
+  \dimen2 = \hsize
+  \advance\dimen2 by \normaloffset
+  %
+  \internalpagesizes{#1}{\hsize}%
+                    {\voffset}{\normaloffset}%
+                    {\bindingoffset}{44pt}%
+                    {\dimen0}{\dimen2}%
+}}
+
+% Set default to letter.
+%
+\letterpaper
+
+
+\message{and turning on texinfo input format.}
+
+% Define macros to output various characters with catcode for normal text.
+\catcode`\"=\other
+\catcode`\~=\other
+\catcode`\^=\other
+\catcode`\_=\other
+\catcode`\|=\other
+\catcode`\<=\other
+\catcode`\>=\other
+\catcode`\+=\other
+\catcode`\$=\other
+\def\normaldoublequote{"}
+\def\normaltilde{~}
+\def\normalcaret{^}
+\def\normalunderscore{_}
+\def\normalverticalbar{|}
+\def\normalless{<}
+\def\normalgreater{>}
+\def\normalplus{+}
+\def\normaldollar{$}%$ font-lock fix
+
+% This macro is used to make a character print one way in \tt
+% (where it can probably be output as-is), and another way in other fonts,
+% where something hairier probably needs to be done.
+%
+% #1 is what to print if we are indeed using \tt; #2 is what to print
+% otherwise.  Since all the Computer Modern typewriter fonts have zero
+% interword stretch (and shrink), and it is reasonable to expect all
+% typewriter fonts to have this, we can check that font parameter.
+%
+\def\ifusingtt#1#2{\ifdim \fontdimen3\font=0pt #1\else #2\fi}
+
+% Same as above, but check for italic font.  Actually this also catches
+% non-italic slanted fonts since it is impossible to distinguish them from
+% italic fonts.  But since this is only used by $ and it uses \sl anyway
+% this is not a problem.
+\def\ifusingit#1#2{\ifdim \fontdimen1\font>0pt #1\else #2\fi}
+
+% Turn off all special characters except @
+% (and those which the user can use as if they were ordinary).
+% Most of these we simply print from the \tt font, but for some, we can
+% use math or other variants that look better in normal text.
+
+\catcode`\"=\active
+\def\activedoublequote{{\tt\char34}}
+\let"=\activedoublequote
+\catcode`\~=\active
+\def~{{\tt\char126}}
+\chardef\hat=`\^
+\catcode`\^=\active
+\def^{{\tt \hat}}
+
+\catcode`\_=\active
+\def_{\ifusingtt\normalunderscore\_}
+\let\realunder=_
+% Subroutine for the previous macro.
+\def\_{\leavevmode \kern.07em \vbox{\hrule width.3em height.1ex}\kern .07em }
+
+\catcode`\|=\active
+\def|{{\tt\char124}}
+\chardef \less=`\<
+\catcode`\<=\active
+\def<{{\tt \less}}
+\chardef \gtr=`\>
+\catcode`\>=\active
+\def>{{\tt \gtr}}
+\catcode`\+=\active
+\def+{{\tt \char 43}}
+\catcode`\$=\active
+\def${\ifusingit{{\sl\$}}\normaldollar}%$ font-lock fix
+
+% If a .fmt file is being used, characters that might appear in a file
+% name cannot be active until we have parsed the command line.
+% So turn them off again, and have \everyjob (or @setfilename) turn them on.
+% \otherifyactive is called near the end of this file.
+\def\otherifyactive{\catcode`+=\other \catcode`\_=\other}
+
+% Used sometimes to turn off (effectively) the active characters even after
+% parsing them.
+\def\turnoffactive{%
+  \normalturnoffactive
+  \otherbackslash
+}
+
+\catcode`\@=0
+
+% \backslashcurfont outputs one backslash character in current font,
+% as in \char`\\.
+\global\chardef\backslashcurfont=`\\
+\global\let\rawbackslashxx=\backslashcurfont  % let existing .??s files work
+
+% \realbackslash is an actual character `\' with catcode other, and
+% \doublebackslash is two of them (for the pdf outlines).
+{\catcode`\\=\other @gdef@realbackslash{\} @gdef@doublebackslash{\\}}
+
+% In texinfo, backslash is an active character; it prints the backslash
+% in fixed width font.
+\catcode`\\=\active
+@def@normalbackslash{{@tt@backslashcurfont}}
+% On startup, @fixbackslash assigns:
+%  @let \ = @normalbackslash
+
+% \rawbackslash defines an active \ to do \backslashcurfont.
+% \otherbackslash defines an active \ to be a literal `\' character with
+% catcode other.
+@gdef@rawbackslash{@let\=@backslashcurfont}
+@gdef@otherbackslash{@let\=@realbackslash}
+
+% Same as @turnoffactive except outputs \ as {\tt\char`\\} instead of
+% the literal character `\'.
+% 
+@def@normalturnoffactive{%
+  @let\=@normalbackslash
+  @let"=@normaldoublequote
+  @let~=@normaltilde
+  @let^=@normalcaret
+  @let_=@normalunderscore
+  @let|=@normalverticalbar
+  @let<=@normalless
+  @let>=@normalgreater
+  @let+=@normalplus
+  @let$=@normaldollar %$ font-lock fix
+  @unsepspaces
+}
+
+% Make _ and + \other characters, temporarily.
+% This is canceled by @fixbackslash.
+@otherifyactive
+
+% If a .fmt file is being used, we don't want the `\input texinfo' to show up.
+% That is what \eatinput is for; after that, the `\' should revert to printing
+% a backslash.
+%
+@gdef@eatinput input texinfo{@fixbackslash}
+@global@let\ = @eatinput
+
+% On the other hand, perhaps the file did not have a `\input texinfo'. Then
+% the first `\' in the file would cause an error. This macro tries to fix
+% that, assuming it is called before the first `\' could plausibly occur.
+% Also turn back on active characters that might appear in the input
+% file name, in case not using a pre-dumped format.
+%
+@gdef@fixbackslash{%
+  @ifx\@eatinput @let\ = @normalbackslash @fi
+  @catcode`+=@active
+  @catcode`@_=@active
+}
+
+% Say @foo, not \foo, in error messages.
+@escapechar = `@@
+
+% These look ok in all fonts, so just make them not special.
+@catcode`@& = @other
+@catcode`@# = @other
+@catcode`@% = @other
+
+
+@c Local variables:
+@c eval: (add-hook 'write-file-hooks 'time-stamp)
+@c page-delimiter: "^\\\\message"
+@c time-stamp-start: "def\\\\texinfoversion{"
+@c time-stamp-format: "%:y-%02m-%02d.%02H"
+@c time-stamp-end: "}"
+@c End:
+
+@c vim:sw=2:
+
+@ignore
+   arch-tag: e1b36e32-c96e-4135-a41a-0b2efa2ea115
+@end ignore
diff --git a/doc/version.texi b/doc/version.texi

new file mode 100644 (file)

index 0000000..bcfbdca
--- /dev/null
+++ b/doc/version.texi
@@ -0,0 +1,4 @@
+@set UPDATED 6 May 2012
+@set UPDATED-MONTH May 2012
+@set EDITION 5.0.5
+@set VERSION 5.0.5
diff --git a/dumbmp.c b/dumbmp.c

new file mode 100644 (file)

index 0000000..c87aae4
--- /dev/null
+++ b/dumbmp.c
@@ -0,0 +1,922 @@
+/* dumbmp mini GMP compatible library.
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* The code here implements a subset (a very limited subset) of the main GMP
+   functions.  It's designed for use in a few build-time calculations and
+   will be slow, but highly portable.
+
+   None of the normal GMP configure things are used, nor any of the normal
+   gmp.h or gmp-impl.h.  To use this file in a program just #include
+   "dumbmp.c".
+
+   ANSI function definitions can be used here, since ansi2knr is run if
+   necessary.  But other ANSI-isms like "const" should be avoided.
+
+   mp_limb_t here is an unsigned long, since that's a sensible type
+   everywhere we know of, with 8*sizeof(unsigned long) giving the number of
+   bits in the type (that not being true for instance with int or short on
+   Cray vector systems.)
+
+   Only the low half of each mp_limb_t is used, so as to make carry handling
+   and limb multiplies easy.  GMP_LIMB_BITS is the number of bits used.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+typedef unsigned long mp_limb_t;
+
+typedef struct {
+  int        _mp_alloc;
+  int        _mp_size;
+  mp_limb_t *_mp_d;
+} mpz_t[1];
+
+#define GMP_LIMB_BITS  (sizeof (mp_limb_t) * 8 / 2)
+
+#define ABS(x)   ((x) >= 0 ? (x) : -(x))
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+
+#define ALLOC(x) ((x)->_mp_alloc)
+#define PTR(x)   ((x)->_mp_d)
+#define SIZ(x)   ((x)->_mp_size)
+#define ABSIZ(x) ABS (SIZ (x))
+#define LOMASK   ((1L << GMP_LIMB_BITS) - 1)
+#define LO(x)    ((x) & LOMASK)
+#define HI(x)    ((x) >> GMP_LIMB_BITS)
+
+#define ASSERT(cond)                                    \
+  do {                                                  \
+    if (! (cond))                                       \
+      {                                                 \
+        fprintf (stderr, "Assertion failure\n");        \
+        abort ();                                       \
+      }                                                 \
+  } while (0)
+
+
+char *
+xmalloc (int n)
+{
+  char  *p;
+  p = malloc (n);
+  if (p == NULL)
+    {
+      fprintf (stderr, "Out of memory (alloc %d bytes)\n", n);
+      abort ();
+    }
+  return p;
+}
+
+mp_limb_t *
+xmalloc_limbs (int n)
+{
+  return (mp_limb_t *) xmalloc (n * sizeof (mp_limb_t));
+}
+
+void
+mem_copyi (char *dst, char *src, int size)
+{
+  int  i;
+  for (i = 0; i < size; i++)
+    dst[i] = src[i];
+}
+
+static int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  if (t < 32)
+    return (0xa08a28acUL >> t) & 1;
+  if ((t & 1) == 0)
+    return 0;
+
+  if (t % 3 == 0)
+    return 0;
+  if (t % 5 == 0)
+    return 0;
+  if (t % 7 == 0)
+    return 0;
+
+  for (d = 11;;)
+    {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+       return 1;
+      if (r == 0)
+       break;
+      d += 2;
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+       return 1;
+      if (r == 0)
+       break;
+      d += 4;
+    }
+  return 0;
+}
+
+int
+log2_ceil (int n)
+{
+  int  e;
+  ASSERT (n >= 1);
+  for (e = 0; ; e++)
+    if ((1 << e) >= n)
+      break;
+  return e;
+}
+
+void
+mpz_realloc (mpz_t r, int n)
+{
+  if (n <= ALLOC(r))
+    return;
+
+  ALLOC(r) = n;
+  PTR(r) = (mp_limb_t *) realloc (PTR(r), n * sizeof (mp_limb_t));
+  if (PTR(r) == NULL)
+    {
+      fprintf (stderr, "Out of memory (realloc to %d)\n", n);
+      abort ();
+    }
+}
+
+void
+mpn_normalize (mp_limb_t *rp, int *rnp)
+{
+  int  rn = *rnp;
+  while (rn > 0 && rp[rn-1] == 0)
+    rn--;
+  *rnp = rn;
+}
+
+void
+mpn_copyi (mp_limb_t *dst, mp_limb_t *src, int n)
+{
+  int  i;
+  for (i = 0; i < n; i++)
+    dst[i] = src[i];
+}
+
+void
+mpn_zero (mp_limb_t *rp, int rn)
+{
+  int  i;
+  for (i = 0; i < rn; i++)
+    rp[i] = 0;
+}
+
+void
+mpz_init (mpz_t r)
+{
+  ALLOC(r) = 1;
+  PTR(r) = xmalloc_limbs (ALLOC(r));
+  PTR(r)[0] = 0;
+  SIZ(r) = 0;
+}
+
+void
+mpz_clear (mpz_t r)
+{
+  free (PTR (r));
+  ALLOC(r) = -1;
+  SIZ (r) = 0xbadcafeL;
+  PTR (r) = (mp_limb_t *) 0xdeadbeefL;
+}
+
+int
+mpz_sgn (mpz_t a)
+{
+  return (SIZ(a) > 0 ? 1 : SIZ(a) == 0 ? 0 : -1);
+}
+
+int
+mpz_odd_p (mpz_t a)
+{
+  if (SIZ(a) == 0)
+    return 0;
+  else
+    return (PTR(a)[0] & 1) != 0;
+}
+
+int
+mpz_even_p (mpz_t a)
+{
+  if (SIZ(a) == 0)
+    return 1;
+  else
+    return (PTR(a)[0] & 1) == 0;
+}
+
+size_t
+mpz_sizeinbase (mpz_t a, int base)
+{
+  int an = ABSIZ (a);
+  mp_limb_t *ap = PTR (a);
+  int cnt;
+  mp_limb_t hi;
+
+  if (base != 2)
+    abort ();
+
+  if (an == 0)
+    return 1;
+
+  cnt = 0;
+  for (hi = ap[an - 1]; hi != 0; hi >>= 1)
+    cnt += 1;
+  return (an - 1) * GMP_LIMB_BITS + cnt;
+}
+
+void
+mpz_set (mpz_t r, mpz_t a)
+{
+  mpz_realloc (r, ABSIZ (a));
+  SIZ(r) = SIZ(a);
+  mpn_copyi (PTR(r), PTR(a), ABSIZ (a));
+}
+
+void
+mpz_init_set (mpz_t r, mpz_t a)
+{
+  mpz_init (r);
+  mpz_set (r, a);
+}
+
+void
+mpz_set_ui (mpz_t r, unsigned long ui)
+{
+  int  rn;
+  mpz_realloc (r, 2);
+  PTR(r)[0] = LO(ui);
+  PTR(r)[1] = HI(ui);
+  rn = 2;
+  mpn_normalize (PTR(r), &rn);
+  SIZ(r) = rn;
+}
+
+void
+mpz_init_set_ui (mpz_t r, unsigned long ui)
+{
+  mpz_init (r);
+  mpz_set_ui (r, ui);
+}
+
+void
+mpz_setbit (mpz_t r, unsigned long bit)
+{
+  int        limb, rn, extend;
+  mp_limb_t  *rp;
+
+  rn = SIZ(r);
+  if (rn < 0)
+    abort ();  /* only r>=0 */
+
+  limb = bit / GMP_LIMB_BITS;
+  bit %= GMP_LIMB_BITS;
+
+  mpz_realloc (r, limb+1);
+  rp = PTR(r);
+  extend = (limb+1) - rn;
+  if (extend > 0)
+    mpn_zero (rp + rn, extend);
+
+  rp[limb] |= (mp_limb_t) 1 << bit;
+  SIZ(r) = MAX (rn, limb+1);
+}
+
+int
+mpz_tstbit (mpz_t r, unsigned long bit)
+{
+  int  limb;
+
+  if (SIZ(r) < 0)
+    abort ();  /* only r>=0 */
+
+  limb = bit / GMP_LIMB_BITS;
+  if (SIZ(r) <= limb)
+    return 0;
+
+  bit %= GMP_LIMB_BITS;
+  return (PTR(r)[limb] >> bit) & 1;
+}
+
+int
+popc_limb (mp_limb_t a)
+{
+  int  ret = 0;
+  while (a != 0)
+    {
+      ret += (a & 1);
+      a >>= 1;
+    }
+  return ret;
+}
+
+unsigned long
+mpz_popcount (mpz_t a)
+{
+  unsigned long  ret;
+  int            i;
+
+  if (SIZ(a) < 0)
+    abort ();
+
+  ret = 0;
+  for (i = 0; i < SIZ(a); i++)
+    ret += popc_limb (PTR(a)[i]);
+  return ret;
+}
+
+void
+mpz_add (mpz_t r, mpz_t a, mpz_t b)
+{
+  int an = ABSIZ (a), bn = ABSIZ (b), rn;
+  mp_limb_t *rp, *ap, *bp;
+  int i;
+  mp_limb_t t, cy;
+
+  if ((SIZ (a) ^ SIZ (b)) < 0)
+    abort ();                  /* really subtraction */
+  if (SIZ (a) < 0)
+    abort ();
+
+  mpz_realloc (r, MAX (an, bn) + 1);
+  ap = PTR (a);  bp = PTR (b);  rp = PTR (r);
+  if (an < bn)
+    {
+      mp_limb_t *tp;  int tn;
+      tn = an; an = bn; bn = tn;
+      tp = ap; ap = bp; bp = tp;
+    }
+
+  cy = 0;
+  for (i = 0; i < bn; i++)
+    {
+      t = ap[i] + bp[i] + cy;
+      rp[i] = LO (t);
+      cy = HI (t);
+    }
+  for (i = bn; i < an; i++)
+    {
+      t = ap[i] + cy;
+      rp[i] = LO (t);
+      cy = HI (t);
+    }
+  rp[an] = cy;
+  rn = an + 1;
+
+  mpn_normalize (rp, &rn);
+  SIZ (r) = rn;
+}
+
+void
+mpz_add_ui (mpz_t r, mpz_t a, unsigned long int ui)
+{
+  mpz_t b;
+
+  mpz_init (b);
+  mpz_set_ui (b, ui);
+  mpz_add (r, a, b);
+  mpz_clear (b);
+}
+
+void
+mpz_sub (mpz_t r, mpz_t a, mpz_t b)
+{
+  int an = ABSIZ (a), bn = ABSIZ (b), rn;
+  mp_limb_t *rp, *ap, *bp;
+  int i;
+  mp_limb_t t, cy;
+
+  if ((SIZ (a) ^ SIZ (b)) < 0)
+    abort ();                  /* really addition */
+  if (SIZ (a) < 0)
+    abort ();
+
+  mpz_realloc (r, MAX (an, bn) + 1);
+  ap = PTR (a);  bp = PTR (b);  rp = PTR (r);
+  if (an < bn)
+    {
+      mp_limb_t *tp;  int tn;
+      tn = an; an = bn; bn = tn;
+      tp = ap; ap = bp; bp = tp;
+    }
+
+  cy = 0;
+  for (i = 0; i < bn; i++)
+    {
+      t = ap[i] - bp[i] - cy;
+      rp[i] = LO (t);
+      cy = LO (-HI (t));
+    }
+  for (i = bn; i < an; i++)
+    {
+      t = ap[i] - cy;
+      rp[i] = LO (t);
+      cy = LO (-HI (t));
+    }
+  rp[an] = cy;
+  rn = an + 1;
+
+  if (cy != 0)
+    {
+      cy = 0;
+      for (i = 0; i < rn; i++)
+       {
+         t = -rp[i] - cy;
+         rp[i] = LO (t);
+         cy = LO (-HI (t));
+       }
+      SIZ (r) = -rn;
+      return;
+    }
+
+  mpn_normalize (rp, &rn);
+  SIZ (r) = rn;
+}
+
+void
+mpz_sub_ui (mpz_t r, mpz_t a, unsigned long int ui)
+{
+  mpz_t b;
+
+  mpz_init (b);
+  mpz_set_ui (b, ui);
+  mpz_sub (r, a, b);
+  mpz_clear (b);
+}
+
+void
+mpz_mul (mpz_t r, mpz_t a, mpz_t b)
+{
+  int an = ABSIZ (a), bn = ABSIZ (b), rn;
+  mp_limb_t *scratch, *tmp, *ap = PTR (a), *bp = PTR (b);
+  int ai, bi;
+  mp_limb_t t, cy;
+
+  scratch = xmalloc_limbs (an + bn);
+  tmp = scratch;
+
+  for (bi = 0; bi < bn; bi++)
+    tmp[bi] = 0;
+
+  for (ai = 0; ai < an; ai++)
+    {
+      tmp = scratch + ai;
+      cy = 0;
+      for (bi = 0; bi < bn; bi++)
+       {
+         t = ap[ai] * bp[bi] + tmp[bi] + cy;
+         tmp[bi] = LO (t);
+         cy = HI (t);
+       }
+      tmp[bn] = cy;
+    }
+
+  rn = an + bn;
+  mpn_normalize (scratch, &rn);
+  free (PTR (r));
+  PTR (r) = scratch;
+  SIZ (r) = (SIZ (a) ^ SIZ (b)) >= 0 ? rn : -rn;
+  ALLOC (r) = an + bn;
+}
+
+void
+mpz_mul_ui (mpz_t r, mpz_t a, unsigned long int ui)
+{
+  mpz_t b;
+
+  mpz_init (b);
+  mpz_set_ui (b, ui);
+  mpz_mul (r, a, b);
+  mpz_clear (b);
+}
+
+void
+mpz_mul_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
+{
+  mpz_set (r, a);
+  while (bcnt)
+    {
+      mpz_add (r, r, r);
+      bcnt -= 1;
+    }
+}
+
+void
+mpz_ui_pow_ui (mpz_t r, unsigned long b, unsigned long e)
+{
+  unsigned long  i;
+  mpz_t          bz;
+
+  mpz_init (bz);
+  mpz_set_ui (bz, b);
+
+  mpz_set_ui (r, 1L);
+  for (i = 0; i < e; i++)
+    mpz_mul (r, r, bz);
+
+  mpz_clear (bz);
+}
+
+void
+mpz_tdiv_q_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
+{
+  int as, rn;
+  int cnt, tnc;
+  int lcnt;
+  mp_limb_t high_limb, low_limb;
+  int i;
+
+  as = SIZ (a);
+  lcnt = bcnt / GMP_LIMB_BITS;
+  rn = ABS (as) - lcnt;
+  if (rn <= 0)
+    SIZ (r) = 0;
+  else
+    {
+      mp_limb_t *rp, *ap;
+
+      mpz_realloc (r, rn);
+
+      rp = PTR (r);
+      ap = PTR (a);
+
+      cnt = bcnt % GMP_LIMB_BITS;
+      if (cnt != 0)
+        {
+         ap += lcnt;
+         tnc = GMP_LIMB_BITS - cnt;
+         high_limb = *ap++;
+         low_limb = high_limb >> cnt;
+
+         for (i = rn - 1; i != 0; i--)
+           {
+             high_limb = *ap++;
+             *rp++ = low_limb | LO (high_limb << tnc);
+             low_limb = high_limb >> cnt;
+           }
+         *rp = low_limb;
+          rn -= low_limb == 0;
+        }
+      else
+        {
+         ap += lcnt;
+          mpn_copyi (rp, ap, rn);
+        }
+
+      SIZ (r) = as >= 0 ? rn : -rn;
+    }
+}
+
+void
+mpz_tdiv_r_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
+{
+  int    rn, bwhole;
+
+  mpz_set (r, a);
+  rn = ABSIZ(r);
+
+  bwhole = bcnt / GMP_LIMB_BITS;
+  bcnt %= GMP_LIMB_BITS;
+  if (rn > bwhole)
+    {
+      rn = bwhole+1;
+      PTR(r)[rn-1] &= ((mp_limb_t) 1 << bcnt) - 1;
+      mpn_normalize (PTR(r), &rn);
+      SIZ(r) = (SIZ(r) >= 0 ? rn : -rn);
+    }
+}
+
+int
+mpz_cmp (mpz_t a, mpz_t b)
+{
+  mp_limb_t *ap, *bp, al, bl;
+  int as = SIZ (a), bs = SIZ (b);
+  int i;
+  int sign;
+
+  if (as != bs)
+    return as > bs ? 1 : -1;
+
+  sign = as > 0 ? 1 : -1;
+
+  ap = PTR (a);
+  bp = PTR (b);
+  for (i = ABS (as) - 1; i >= 0; i--)
+    {
+      al = ap[i];
+      bl = bp[i];
+      if (al != bl)
+       return al > bl ? sign : -sign;
+    }
+  return 0;
+}
+
+int
+mpz_cmp_ui (mpz_t a, unsigned long b)
+{
+  mpz_t  bz;
+  int    ret;
+  mpz_init_set_ui (bz, b);
+  ret = mpz_cmp (a, bz);
+  mpz_clear (bz);
+  return ret;
+}
+
+void
+mpz_tdiv_qr (mpz_t q, mpz_t r, mpz_t a, mpz_t b)
+{
+  mpz_t          tmpr, tmpb;
+  unsigned long  cnt;
+
+  ASSERT (mpz_sgn(a) >= 0);
+  ASSERT (mpz_sgn(b) > 0);
+
+  mpz_init_set (tmpr, a);
+  mpz_init_set (tmpb, b);
+  mpz_set_ui (q, 0L);
+
+  if (mpz_cmp (tmpr, tmpb) > 0)
+    {
+      cnt = mpz_sizeinbase (tmpr, 2) - mpz_sizeinbase (tmpb, 2) + 1;
+      mpz_mul_2exp (tmpb, tmpb, cnt);
+
+      for ( ; cnt > 0; cnt--)
+        {
+          mpz_mul_2exp (q, q, 1);
+          mpz_tdiv_q_2exp (tmpb, tmpb, 1L);
+          if (mpz_cmp (tmpr, tmpb) >= 0)
+            {
+              mpz_sub (tmpr, tmpr, tmpb);
+              mpz_add_ui (q, q, 1L);
+              ASSERT (mpz_cmp (tmpr, tmpb) < 0);
+            }
+        }
+    }
+
+  mpz_set (r, tmpr);
+  mpz_clear (tmpr);
+  mpz_clear (tmpb);
+}
+
+void
+mpz_tdiv_qr_ui (mpz_t q, mpz_t r, mpz_t a, unsigned long b)
+{
+  mpz_t  bz;
+  mpz_init_set_ui (bz, b);
+  mpz_tdiv_qr (q, r, a, bz);
+  mpz_clear (bz);
+}
+
+void
+mpz_tdiv_q (mpz_t q, mpz_t a, mpz_t b)
+{
+  mpz_t  r;
+
+  mpz_init (r);
+  mpz_tdiv_qr (q, r, a, b);
+  mpz_clear (r);
+}
+
+void
+mpz_tdiv_r (mpz_t r, mpz_t a, mpz_t b)
+{
+  mpz_t  q;
+
+  mpz_init (q);
+  mpz_tdiv_qr (q, r, a, b);
+  mpz_clear (q);
+}
+
+void
+mpz_tdiv_q_ui (mpz_t q, mpz_t n, unsigned long d)
+{
+  mpz_t  dz;
+  mpz_init_set_ui (dz, d);
+  mpz_tdiv_q (q, n, dz);
+  mpz_clear (dz);
+}
+
+/* Set inv to the inverse of d, in the style of invert_limb, ie. for
+   udiv_qrnnd_preinv.  */
+void
+mpz_preinv_invert (mpz_t inv, mpz_t d, int numb_bits)
+{
+  mpz_t  t;
+  int    norm;
+  ASSERT (SIZ(d) > 0);
+
+  norm = numb_bits - mpz_sizeinbase (d, 2);
+  ASSERT (norm >= 0);
+  mpz_init_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, 2*numb_bits - norm);
+  mpz_tdiv_q (inv, t, d);
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, numb_bits);
+  mpz_sub (inv, inv, t);
+
+  mpz_clear (t);
+}
+
+/* Remove leading '0' characters from the start of a string, by copying the
+   remainder down. */
+void
+strstrip_leading_zeros (char *s)
+{
+  char  c, *p;
+
+  p = s;
+  while (*s == '0')
+    s++;
+
+  do
+    {
+      c = *s++;
+      *p++ = c;
+    }
+  while (c != '\0');
+}
+
+char *
+mpz_get_str (char *buf, int base, mpz_t a)
+{
+  static char  tohex[] = "0123456789abcdef";
+
+  mp_limb_t  alimb, *ap;
+  int        an, bn, i, j;
+  char       *bp;
+
+  if (base != 16)
+    abort ();
+  if (SIZ (a) < 0)
+    abort ();
+
+  if (buf == 0)
+    buf = xmalloc (ABSIZ (a) * (GMP_LIMB_BITS / 4) + 3);
+
+  an = ABSIZ (a);
+  if (an == 0)
+    {
+      buf[0] = '0';
+      buf[1] = '\0';
+      return buf;
+    }
+
+  ap = PTR (a);
+  bn = an * (GMP_LIMB_BITS / 4);
+  bp = buf + bn;
+
+  for (i = 0; i < an; i++)
+    {
+      alimb = ap[i];
+      for (j = 0; j < GMP_LIMB_BITS / 4; j++)
+        {
+          bp--;
+          *bp = tohex [alimb & 0xF];
+          alimb >>= 4;
+        }
+      ASSERT (alimb == 0);
+    }
+  ASSERT (bp == buf);
+
+  buf[bn] = '\0';
+
+  strstrip_leading_zeros (buf);
+  return buf;
+}
+
+void
+mpz_out_str (FILE *file, int base, mpz_t a)
+{
+  char *str;
+
+  if (file == 0)
+    file = stdout;
+
+  str = mpz_get_str (0, 16, a);
+  fputs (str, file);
+  free (str);
+}
+
+/* Calculate r satisfying r*d == 1 mod 2^n. */
+void
+mpz_invert_2exp (mpz_t r, mpz_t a, unsigned long n)
+{
+  unsigned long  i;
+  mpz_t  inv, prod;
+
+  ASSERT (mpz_odd_p (a));
+
+  mpz_init_set_ui (inv, 1L);
+  mpz_init (prod);
+
+  for (i = 1; i < n; i++)
+    {
+      mpz_mul (prod, inv, a);
+      if (mpz_tstbit (prod, i) != 0)
+        mpz_setbit (inv, i);
+    }
+
+  mpz_mul (prod, inv, a);
+  mpz_tdiv_r_2exp (prod, prod, n);
+  ASSERT (mpz_cmp_ui (prod, 1L) == 0);
+
+  mpz_set (r, inv);
+
+  mpz_clear (inv);
+  mpz_clear (prod);
+}
+
+/* Calculate inv satisfying r*a == 1 mod 2^n. */
+void
+mpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)
+{
+  mpz_t  az;
+  mpz_init_set_ui (az, a);
+  mpz_invert_2exp (r, az, n);
+  mpz_clear (az);
+}
+
+/* x=y^z */
+void
+mpz_pow_ui (mpz_t x, mpz_t y, unsigned long z)
+{
+  mpz_t t;
+
+  mpz_init_set_ui (t, 1);
+  for (; z != 0; z--)
+    mpz_mul (t, t, y);
+  mpz_set (x, t);
+  mpz_clear (t);
+}
+
+/* x=x+y*z */
+void
+mpz_addmul_ui (mpz_t x, mpz_t y, unsigned long z)
+{
+  mpz_t t;
+
+  mpz_init (t);
+  mpz_mul_ui (t, y, z);
+  mpz_add (x, x, t);
+  mpz_clear (t);
+}
+
+/* x=floor(y^(1/z)) */
+void
+mpz_root (mpz_t x, mpz_t y, unsigned long z)
+{
+  mpz_t t, u;
+
+  if (mpz_sgn (y) < 0)
+    {
+      fprintf (stderr, "mpz_root does not accept negative values\n");
+      abort ();
+    }
+  if (mpz_cmp_ui (y, 1) <= 0)
+    {
+      mpz_set (x, y);
+      return;
+    }
+  mpz_init (t);
+  mpz_init_set (u, y);
+  do
+    {
+      mpz_pow_ui (t, u, z - 1);
+      mpz_tdiv_q (t, y, t);
+      mpz_addmul_ui (t, u, z - 1);
+      mpz_tdiv_q_ui (t, t, z);
+      if (mpz_cmp (t, u) >= 0)
+       break;
+      mpz_set (u, t);
+    }
+  while (1);
+  mpz_set (x, u);
+  mpz_clear (t);
+  mpz_clear (u);
+}
diff --git a/errno.c b/errno.c

new file mode 100644 (file)

index 0000000..e5e160d
--- /dev/null
+++ b/errno.c
@@ -0,0 +1,59 @@
+/* gmp_errno, __gmp_exception -- exception handling and reporting.
+
+   THE FUNCTIONS IN THIS FILE, APART FROM gmp_errno, ARE FOR INTERNAL USE
+   ONLY.  THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR
+   DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int gmp_errno = 0;
+
+
+/* The deliberate divide by zero triggers an exception on most systems.  On
+   those where it doesn't, for example power and powerpc, use abort instead.
+
+   Enhancement: Perhaps raise(SIGFPE) (or the same with kill()) would be
+   better than abort.  Perhaps it'd be possible to get the BSD style
+   FPE_INTDIV_TRAP parameter in there too.  */
+
+void
+__gmp_exception (int error_bit)
+{
+  gmp_errno |= error_bit;
+  __gmp_junk = 10 / __gmp_0;
+  abort ();
+}
+
+
+/* These functions minimize the amount of code required in functions raising
+   exceptions.  Since they're "noreturn" and don't take any parameters, a
+   test and call might even come out as a simple conditional jump.  */
+void
+__gmp_sqrt_of_negative (void)
+{
+  __gmp_exception (GMP_ERROR_SQRT_OF_NEGATIVE);
+}
+void
+__gmp_divide_by_zero (void)
+{
+  __gmp_exception (GMP_ERROR_DIVISION_BY_ZERO);
+}
diff --git a/extract-dbl.c b/extract-dbl.c

new file mode 100644 (file)

index 0000000..9c2ae9b
--- /dev/null
+++ b/extract-dbl.c
@@ -0,0 +1,302 @@
+/* __gmp_extract_double -- convert from double to array of mp_limb_t.
+
+Copyright 1996, 1999, 2000, 2001, 2002, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef XDEBUG
+#undef _GMP_IEEE_FLOATS
+#endif
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+#define BITS_IN_MANTISSA 53
+
+/* Extract a non-negative double in d.  */
+
+int
+__gmp_extract_double (mp_ptr rp, double d)
+{
+  long exp;
+  unsigned sc;
+#ifdef _LONG_LONG_LIMB
+#define BITS_PER_PART 64       /* somewhat bogus */
+  unsigned long long int manl;
+#else
+#define BITS_PER_PART GMP_LIMB_BITS
+  unsigned long int manh, manl;
+#endif
+
+  /* BUGS
+
+     1. Should handle Inf and NaN in IEEE specific code.
+     2. Handle Inf and NaN also in default code, to avoid hangs.
+     3. Generalize to handle all GMP_LIMB_BITS >= 32.
+     4. This lits is incomplete and misspelled.
+   */
+
+  ASSERT (d >= 0.0);
+
+  if (d == 0.0)
+    {
+      MPN_ZERO (rp, LIMBS_PER_DOUBLE);
+      return 0;
+    }
+
+#if _GMP_IEEE_FLOATS
+  {
+#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8
+    /* Work around alpha-specific bug in GCC 2.8.x.  */
+    volatile
+#endif
+    union ieee_double_extract x;
+    x.d = d;
+    exp = x.s.exp;
+#if BITS_PER_PART == 64                /* generalize this to BITS_PER_PART > BITS_IN_MANTISSA */
+    manl = (((mp_limb_t) 1 << 63)
+           | ((mp_limb_t) x.s.manh << 43) | ((mp_limb_t) x.s.manl << 11));
+    if (exp == 0)
+      {
+       /* Denormalized number.  Don't try to be clever about this,
+          since it is not an important case to make fast.  */
+       exp = 1;
+       do
+         {
+           manl = manl << 1;
+           exp--;
+         }
+       while ((manl & GMP_LIMB_HIGHBIT) == 0);
+      }
+#endif
+#if BITS_PER_PART == 32
+    manh = ((mp_limb_t) 1 << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
+    manl = x.s.manl << 11;
+    if (exp == 0)
+      {
+       /* Denormalized number.  Don't try to be clever about this,
+          since it is not an important case to make fast.  */
+       exp = 1;
+       do
+         {
+           manh = (manh << 1) | (manl >> 31);
+           manl = manl << 1;
+           exp--;
+         }
+       while ((manh & GMP_LIMB_HIGHBIT) == 0);
+      }
+#endif
+#if BITS_PER_PART != 32 && BITS_PER_PART != 64
+  You need to generalize the code above to handle this.
+#endif
+    exp -= 1022;               /* Remove IEEE bias.  */
+  }
+#else
+  {
+    /* Unknown (or known to be non-IEEE) double format.  */
+    exp = 0;
+    if (d >= 1.0)
+      {
+       ASSERT_ALWAYS (d * 0.5 != d);
+
+       while (d >= 32768.0)
+         {
+           d *= (1.0 / 65536.0);
+           exp += 16;
+         }
+       while (d >= 1.0)
+         {
+           d *= 0.5;
+           exp += 1;
+         }
+      }
+    else if (d < 0.5)
+      {
+       while (d < (1.0 / 65536.0))
+         {
+           d *=  65536.0;
+           exp -= 16;
+         }
+       while (d < 0.5)
+         {
+           d *= 2.0;
+           exp -= 1;
+         }
+      }
+
+    d *= (4.0 * ((unsigned long int) 1 << (BITS_PER_PART - 2)));
+#if BITS_PER_PART == 64
+    manl = d;
+#endif
+#if BITS_PER_PART == 32
+    manh = d;
+    manl = (d - manh) * (4.0 * ((unsigned long int) 1 << (BITS_PER_PART - 2)));
+#endif
+  }
+#endif /* IEEE */
+
+  sc = (unsigned) (exp + 64 * GMP_NUMB_BITS) % GMP_NUMB_BITS;
+
+  /* We add something here to get rounding right.  */
+  exp = (exp + 64 * GMP_NUMB_BITS) / GMP_NUMB_BITS - 64 * GMP_NUMB_BITS / GMP_NUMB_BITS + 1;
+
+#if BITS_PER_PART == 64 && LIMBS_PER_DOUBLE == 2
+#if GMP_NAIL_BITS == 0
+  if (sc != 0)
+    {
+      rp[1] = manl >> (GMP_LIMB_BITS - sc);
+      rp[0] = manl << sc;
+    }
+  else
+    {
+      rp[1] = manl;
+      rp[0] = 0;
+      exp--;
+    }
+#else
+  if (sc > GMP_NAIL_BITS)
+    {
+      rp[1] = manl >> (GMP_LIMB_BITS - sc);
+      rp[0] = (manl << (sc - GMP_NAIL_BITS)) & GMP_NUMB_MASK;
+    }
+  else
+    {
+      if (sc == 0)
+       {
+         rp[1] = manl >> GMP_NAIL_BITS;
+         rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS) & GMP_NUMB_MASK;
+         exp--;
+       }
+      else
+       {
+         rp[1] = manl >> (GMP_LIMB_BITS - sc);
+         rp[0] = (manl >> (GMP_NAIL_BITS - sc)) & GMP_NUMB_MASK;
+       }
+    }
+#endif
+#endif
+
+#if BITS_PER_PART == 64 && LIMBS_PER_DOUBLE == 3
+  if (sc > GMP_NAIL_BITS)
+    {
+      rp[2] = manl >> (GMP_LIMB_BITS - sc);
+      rp[1] = (manl << sc - GMP_NAIL_BITS) & GMP_NUMB_MASK;
+      if (sc >= 2 * GMP_NAIL_BITS)
+       rp[0] = 0;
+      else
+       rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS + sc) & GMP_NUMB_MASK;
+    }
+  else
+    {
+      if (sc == 0)
+       {
+         rp[2] = manl >> GMP_NAIL_BITS;
+         rp[1] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS) & GMP_NUMB_MASK;
+         rp[0] = 0;
+         exp--;
+       }
+      else
+       {
+         rp[2] = manl >> (GMP_LIMB_BITS - sc);
+         rp[1] = (manl >> GMP_NAIL_BITS - sc) & GMP_NUMB_MASK;
+         rp[0] = (manl << GMP_NUMB_BITS - GMP_NAIL_BITS + sc) & GMP_NUMB_MASK;
+       }
+    }
+#endif
+
+#if BITS_PER_PART == 32 && LIMBS_PER_DOUBLE == 3
+#if GMP_NAIL_BITS == 0
+  if (sc != 0)
+    {
+      rp[2] = manh >> (GMP_LIMB_BITS - sc);
+      rp[1] = (manh << sc) | (manl >> (GMP_LIMB_BITS - sc));
+      rp[0] = manl << sc;
+    }
+  else
+    {
+      rp[2] = manh;
+      rp[1] = manl;
+      rp[0] = 0;
+      exp--;
+    }
+#else
+  if (sc > GMP_NAIL_BITS)
+    {
+      rp[2] = (manh >> (GMP_LIMB_BITS - sc));
+      rp[1] = ((manh << (sc - GMP_NAIL_BITS)) |
+              (manl >> (GMP_LIMB_BITS - sc + GMP_NAIL_BITS))) & GMP_NUMB_MASK;
+      if (sc >= 2 * GMP_NAIL_BITS)
+       rp[0] = (manl << sc - 2 * GMP_NAIL_BITS) & GMP_NUMB_MASK;
+      else
+       rp[0] = manl >> (2 * GMP_NAIL_BITS - sc) & GMP_NUMB_MASK;
+    }
+  else
+    {
+      if (sc == 0)
+       {
+         rp[2] = manh >> GMP_NAIL_BITS;
+         rp[1] = ((manh << GMP_NUMB_BITS - GMP_NAIL_BITS) | (manl >> 2 * GMP_NAIL_BITS)) & GMP_NUMB_MASK;
+         rp[0] = (manl << GMP_NUMB_BITS - 2 * GMP_NAIL_BITS) & GMP_NUMB_MASK;
+         exp--;
+       }
+      else
+       {
+         rp[2] = (manh >> (GMP_LIMB_BITS - sc));
+         rp[1] = (manh >> (GMP_NAIL_BITS - sc)) & GMP_NUMB_MASK;
+         rp[0] = ((manh << (GMP_NUMB_BITS - GMP_NAIL_BITS + sc))
+                  | (manl >> (GMP_LIMB_BITS - (GMP_NUMB_BITS - GMP_NAIL_BITS + sc)))) & GMP_NUMB_MASK;
+       }
+    }
+#endif
+#endif
+
+#if BITS_PER_PART == 32 && LIMBS_PER_DOUBLE > 3
+  if (sc == 0)
+    {
+      int i;
+
+      for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)
+       {
+         rp[i] = manh >> (BITS_PER_ULONG - GMP_NUMB_BITS);
+         manh = ((manh << GMP_NUMB_BITS)
+                 | (manl >> (BITS_PER_ULONG - GMP_NUMB_BITS)));
+         manl = manl << GMP_NUMB_BITS;
+       }
+      exp--;
+    }
+  else
+    {
+      int i;
+
+      rp[LIMBS_PER_DOUBLE - 1] = (manh >> (GMP_LIMB_BITS - sc));
+      manh = (manh << sc) | (manl >> (GMP_LIMB_BITS - sc));
+      manl = (manl << sc);
+      for (i = LIMBS_PER_DOUBLE - 2; i >= 0; i--)
+       {
+         rp[i] = manh >> (BITS_PER_ULONG - GMP_NUMB_BITS);
+         manh = ((manh << GMP_NUMB_BITS)
+                 | (manl >> (BITS_PER_ULONG - GMP_NUMB_BITS)));
+         manl = manl << GMP_NUMB_BITS;
+       }
+  }
+#endif
+
+  return exp;
+}
diff --git a/gen-bases.c b/gen-bases.c

new file mode 100644 (file)

index 0000000..31895e8
--- /dev/null
+++ b/gen-bases.c
@@ -0,0 +1,180 @@
+/* Generate mp_bases data.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <math.h>
+
+#include "dumbmp.c"
+
+
+int    chars_per_limb;
+double chars_per_bit_exactly;
+mpz_t  big_base;
+int    normalization_steps;
+mpz_t  big_base_inverted;
+
+mpz_t  t;
+
+#define POW2_P(n)  (((n) & ((n) - 1)) == 0)
+
+unsigned int
+ulog2 (unsigned int x)
+{
+  unsigned int i;
+  for (i = 0;  x != 0;  i++)
+    x >>= 1;
+  return i;
+}
+
+void
+generate (int limb_bits, int nail_bits, int base)
+{
+  int  numb_bits = limb_bits - nail_bits;
+
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, numb_bits);
+  mpz_set_ui (big_base, 1L);
+  chars_per_limb = 0;
+  for (;;)
+    {
+      mpz_mul_ui (big_base, big_base, (long) base);
+      if (mpz_cmp (big_base, t) > 0)
+        break;
+      chars_per_limb++;
+    }
+
+  chars_per_bit_exactly = 0.69314718055994530942 / log ((double) base);
+
+  mpz_ui_pow_ui (big_base, (long) base, (long) chars_per_limb);
+
+  normalization_steps = limb_bits - mpz_sizeinbase (big_base, 2);
+
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, 2*limb_bits - normalization_steps);
+  mpz_tdiv_q (big_base_inverted, t, big_base);
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, limb_bits);
+  mpz_sub (big_base_inverted, big_base_inverted, t);
+}
+
+void
+header (int limb_bits, int nail_bits)
+{
+  int  numb_bits = limb_bits - nail_bits;
+
+  generate (limb_bits, nail_bits, 10);
+
+  printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("/* mp_bases[10] data, as literal values */\n");
+  printf ("#define MP_BASES_CHARS_PER_LIMB_10      %d\n", chars_per_limb);
+  printf ("#define MP_BASES_BIG_BASE_10            CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, big_base);
+  printf (")\n");
+  printf ("#define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, big_base_inverted);
+  printf (")\n");
+  printf ("#define MP_BASES_NORMALIZATION_STEPS_10 %d\n", normalization_steps);
+}
+
+void
+table (int limb_bits, int nail_bits)
+{
+  int  numb_bits = limb_bits - nail_bits;
+  int  base;
+
+  printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#include \"gmp.h\"\n");
+  printf ("#include \"gmp-impl.h\"\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  puts ("const struct bases mp_bases[257] =\n{");
+  puts ("  /*   0 */ { 0, 0.0, 0 },");
+  puts ("  /*   1 */ { 0, 1e37, 0 },");
+  for (base = 2; base <= 256; base++)
+    {
+      generate (limb_bits, nail_bits, base);
+
+      printf ("  /* %3u */ { ", base);
+      if (POW2_P (base))
+       {
+          printf ("%u, %.16f, 0x%x },\n",
+                  chars_per_limb, chars_per_bit_exactly, ulog2 (base) - 1);
+       }
+      else
+       {
+          printf ("%u, %.16f, CNST_LIMB(0x",
+                  chars_per_limb, chars_per_bit_exactly);
+         mpz_out_str (stdout, 16, big_base);
+          printf ("), CNST_LIMB(0x");
+         mpz_out_str (stdout, 16, big_base_inverted);
+          printf (") },\n");
+       }
+    }
+
+  puts ("};");
+}
+
+int
+main (int argc, char **argv)
+{
+  int  limb_bits, nail_bits;
+
+  mpz_init (big_base);
+  mpz_init (big_base_inverted);
+  mpz_init (t);
+
+  if (argc != 4)
+    {
+      fprintf (stderr, "Usage: gen-bases <header|table> <limbbits> <nailbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[2]);
+  nail_bits = atoi (argv[3]);
+
+  if (limb_bits <= 0
+      || nail_bits < 0
+      || nail_bits >= limb_bits)
+    {
+      fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
+               limb_bits, nail_bits);
+      exit (1);
+    }
+
+  if (strcmp (argv[1], "header") == 0)
+    header (limb_bits, nail_bits);
+  else if (strcmp (argv[1], "table") == 0)
+    table (limb_bits, nail_bits);
+  else
+    {
+      fprintf (stderr, "Invalid header/table choice: %s\n", argv[1]);
+      exit (1);
+    }
+
+  return 0;
+}
diff --git a/gen-fac_ui.c b/gen-fac_ui.c

new file mode 100644 (file)

index 0000000..a9521ba
--- /dev/null
+++ b/gen-fac_ui.c
@@ -0,0 +1,159 @@
+/* Generate mpz_fac_ui data.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dumbmp.c"
+
+
+/* sets x=y*(y+2)*(y+4)*....*(y+2*(z-1))       */
+void
+odd_products (mpz_t x, mpz_t y, int z)
+{
+  mpz_t t;
+
+  mpz_init_set (t, y);
+  mpz_set_ui (x, 1);
+  for (; z != 0; z--)
+    {
+      mpz_mul (x, x, t);
+      mpz_add_ui (t, t, 2);
+    }
+  mpz_clear (t);
+  return;
+}
+
+/* returns 0 on success                */
+int
+gen_consts (int numb, int nail, int limb)
+{
+  mpz_t x, y, z, t;
+  unsigned long a, b, first = 1;
+
+  printf ("/* This file is automatically generated by gen-fac_ui.c */\n\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb);
+  printf ("Error , error this data is for %d GMP_NUMB_BITS only\n", numb);
+  printf ("#endif\n");
+  printf ("#if GMP_LIMB_BITS != %d\n", limb);
+  printf ("Error , error this data is for %d GMP_LIMB_BITS only\n", limb);
+  printf ("#endif\n");
+
+  printf
+    ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),");
+  mpz_init_set_ui (x, 2);
+  for (b = 3;; b++)
+    {
+      mpz_mul_ui (x, x, b);    /* so b!=a       */
+      if (mpz_sizeinbase (x, 2) > numb)
+       break;
+      if (first)
+       {
+         first = 0;
+       }
+      else
+       {
+         printf ("),");
+       }
+      printf ("CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+
+  mpz_set_ui (x, 1);
+  mpz_mul_2exp (x, x, limb + 1);       /* x=2^(limb+1)        */
+  mpz_init (y);
+  mpz_set_ui (y, 10000);
+  mpz_mul (x, x, y);           /* x=2^(limb+1)*10^4     */
+  mpz_set_ui (y, 27182);       /* exp(1)*10^4      */
+  mpz_tdiv_q (x, x, y);                /* x=2^(limb+1)/exp(1)        */
+  printf ("\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\n");
+  printf ("#define FAC2OVERE CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, x);
+  printf (")\n");
+
+
+  printf
+    ("\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\n\n");
+  mpz_init (z);
+  mpz_init (t);
+  for (a = 2; a <= 4; a++)
+    {
+      mpz_set_ui (x, 1);
+      mpz_mul_2exp (x, x, numb);
+      mpz_root (x, x, a);
+      /* so x is approx sol       */
+      if (mpz_even_p (x))
+       mpz_sub_ui (x, x, 1);
+      mpz_set_ui (y, 1);
+      mpz_mul_2exp (y, y, numb);
+      mpz_sub_ui (y, y, 1);
+      /* decrement x until we are <= real sol     */
+      do
+       {
+         mpz_sub_ui (x, x, 2);
+         odd_products (t, x, a);
+         if (mpz_cmp (t, y) <= 0)
+           break;
+       }
+      while (1);
+      /* increment x until > real sol     */
+      do
+       {
+         mpz_add_ui (x, x, 2);
+         odd_products (t, x, a);
+         if (mpz_cmp (t, y) > 0)
+           break;
+       }
+      while (1);
+      /* dec once to get real sol */
+      mpz_sub_ui (x, x, 2);
+      printf ("#define FACMUL%lu CNST_LIMB(0x", a);
+      mpz_out_str (stdout, 16, x);
+      printf (")\n");
+    }
+
+  return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int nail_bits, limb_bits, numb_bits;
+
+  if (argc != 3)
+    {
+      fprintf (stderr, "Usage: gen-fac_ui limbbits nailbits\n");
+      exit (1);
+    }
+  limb_bits = atoi (argv[1]);
+  nail_bits = atoi (argv[2]);
+  numb_bits = limb_bits - nail_bits;
+  if (limb_bits < 0 || nail_bits < 0 || numb_bits < 0)
+    {
+      fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
+              nail_bits);
+      exit (1);
+    }
+  gen_consts (numb_bits, nail_bits, limb_bits);
+  return 0;
+}
diff --git a/gen-fib.c b/gen-fib.c

new file mode 100644 (file)

index 0000000..fd7bb96
--- /dev/null
+++ b/gen-fib.c
@@ -0,0 +1,145 @@
+/* Generate Fibonacci table data.
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "dumbmp.c"
+
+mpz_t  *f;
+int    fnum, fib_limit, luc_limit;
+
+void
+generate (int numb_bits)
+{
+  mpz_t  limit, l;
+  int    falloc, i;
+
+  mpz_init_set_ui (limit, 1L);
+  mpz_mul_2exp (limit, limit, numb_bits);
+
+  /* fib(2n) > 2^n, so use 2n as a limit for the table size */
+  falloc = 2 * numb_bits;
+  f = (mpz_t *) xmalloc (falloc * sizeof (*f));
+
+  mpz_init_set_ui (f[0], 1L);  /* F[-1] */
+  mpz_init_set_ui (f[1], 0L);  /* F[0] */
+
+  mpz_init (l);
+
+  for (i = 2; ; i++)
+    {
+      ASSERT (i < falloc);
+
+      /* F[i] = F[i-1] + F[i-2] */
+      mpz_init (f[i]);
+      mpz_add (f[i], f[i-1], f[i-2]);
+      if (mpz_cmp (f[i], limit) >= 0)
+        break;
+
+      fnum = i+1;
+      fib_limit = i-1;
+
+      /* L[i] = F[i]+2*F[i-1] */
+      mpz_add (l, f[i], f[i-1]);
+      mpz_add (l, l, f[i-1]);
+
+      if (mpz_cmp (l, limit) < 0)
+        luc_limit = i-1;
+    }
+
+  mpz_clear (limit);
+}
+
+
+void
+header (int numb_bits)
+{
+  printf ("/* This file generated by gen-fib.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("#define FIB_TABLE_LIMIT         %d\n", fib_limit);
+  printf ("#define FIB_TABLE_LUCNUM_LIMIT  %d\n", luc_limit);
+}
+
+void
+table (int numb_bits)
+{
+  int  i;
+
+  printf ("/* This file generated by gen-fib.c - DO NOT EDIT. */\n");
+  printf ("\n");
+  printf ("#include \"gmp.h\"\n");
+  printf ("#include \"gmp-impl.h\"\n");
+  printf ("\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
+  printf ("Error, error, this data is for %d bits\n", numb_bits);
+  printf ("#endif\n");
+  printf ("\n");
+  printf ("const mp_limb_t\n");
+  printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
+
+  for (i = 0; i < fnum; i++)
+    {
+      printf ("  CNST_LIMB (0x");
+      mpz_out_str (stdout, 16, f[i]);
+      printf ("),  /* %d */\n", i-1);
+    }
+  printf ("};\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int  limb_bits, nail_bits, numb_bits;
+
+  if (argc != 4)
+    {
+      fprintf (stderr, "Usage: gen-bases <header|table> <limbbits> <nailbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[2]);
+  nail_bits = atoi (argv[3]);
+
+  if (limb_bits <= 0
+      || nail_bits < 0
+      || nail_bits >= limb_bits)
+    {
+      fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
+               limb_bits, nail_bits);
+      exit (1);
+    }
+  numb_bits = limb_bits - nail_bits;
+
+  generate (numb_bits);
+
+  if (strcmp (argv[1], "header") == 0)
+    header (numb_bits);
+  else if (strcmp (argv[1], "table") == 0)
+    table (numb_bits);
+  else
+    {
+      fprintf (stderr, "Invalid header/table choice: %s\n", argv[1]);
+      exit (1);
+    }
+
+  return 0;
+}
diff --git a/gen-psqr.c b/gen-psqr.c

new file mode 100644 (file)

index 0000000..9c33d7a
--- /dev/null
+++ b/gen-psqr.c
@@ -0,0 +1,576 @@
+/* Generate perfect square testing data.
+
+Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dumbmp.c"
+
+
+/* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
+   (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
+   residues and non-residues modulo small factors of that modulus.
+
+   For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
+   function exists specifically because 2^24-1 and 2^48-1 have nice sets of
+   prime factors.  For other limb sizes it's considered, but if it doesn't
+   have good factors then mpn_mod_1 will be used instead.
+
+   When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
+   selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
+   with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
+   GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
+   calculation within a single limb.
+
+   In either case primes can be combined to make divisors.  The table data
+   then effectively indicates remainders which are quadratic residues mod
+   all the primes.  This sort of combining reduces the number of steps
+   needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
+   Nothing is gained or lost in terms of detections, the same total fraction
+   of non-residues will be identified.
+
+   Nothing particularly sophisticated is attempted for combining factors to
+   make divisors.  This is probably a kind of knapsack problem so it'd be
+   too hard to attempt anything completely general.  For the usual 32 and 64
+   bit limbs we get a good enough result just pairing the biggest and
+   smallest which fit together, repeatedly.
+
+   Another aim is to get powerful combinations, ie. divisors which identify
+   biggest fraction of non-residues, and have those run first.  Again for
+   the usual 32 and 64 bits it seems good enough just to pair for big
+   divisors then sort according to the resulting fraction of non-residues
+   identified.
+
+   Also in this program, a table sq_res_0x100 of residues modulo 256 is
+   generated.  This simply fills bits into limbs of the appropriate
+   build-time GMP_LIMB_BITS each.
+
+*/
+
+
+/* Normally we aren't using const in gen*.c programs, so as not to have to
+   bother figuring out if it works, but using it with f_cmp_divisor and
+   f_cmp_fraction avoids warnings from the qsort calls. */
+
+/* Same tests as gmp.h. */
+#if  defined (__STDC__)                                 \
+  || defined (__cplusplus)                              \
+  || defined (_AIX)                                     \
+  || defined (__DECC)                                   \
+  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
+  || defined (_MSC_VER)                                 \
+  || defined (_WIN32)
+#define HAVE_CONST        1
+#endif
+
+#if ! HAVE_CONST
+#define const
+#endif
+
+
+mpz_t  *sq_res_0x100;          /* table of limbs */
+int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
+int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
+double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
+
+int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
+int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
+int     max_divisor;       /* all divisors <= max_divisor */
+int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
+double  total_fraction;    /* of squares */
+mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
+mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
+mpz_t   pp_inverted;       /* invert_limb style inverse */
+mpz_t   mod_mask;          /* 2^mod_bits-1 */
+char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
+
+/* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
+struct rawfactor_t {
+  int     divisor;
+  int     multiplicity;
+};
+struct rawfactor_t  *rawfactor;
+int                 nrawfactor;
+
+/* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
+struct factor_t {
+  int     divisor;
+  mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
+  mpz_t   mask;      /* indicating squares mod divisor */
+  double  fraction;  /* squares/total */
+};
+struct factor_t  *factor;
+int              nfactor;       /* entries in use in factor array */
+int              factor_alloc;  /* entries allocated to factor array */
+
+
+int
+f_cmp_divisor (const void *parg, const void *qarg)
+{
+  const struct factor_t *p, *q;
+  p = parg;
+  q = qarg;
+  if (p->divisor > q->divisor)
+    return 1;
+  else if (p->divisor < q->divisor)
+    return -1;
+  else
+    return 0;
+}
+
+int
+f_cmp_fraction (const void *parg, const void *qarg)
+{
+  const struct factor_t *p, *q;
+  p = parg;
+  q = qarg;
+  if (p->fraction > q->fraction)
+    return 1;
+  else if (p->fraction < q->fraction)
+    return -1;
+  else
+    return 0;
+}
+
+/* Remove array[idx] by copying the remainder down, and adjust narray
+   accordingly.  */
+#define COLLAPSE_ELEMENT(array, idx, narray)                    \
+  do {                                                          \
+    mem_copyi ((char *) &(array)[idx],                          \
+               (char *) &(array)[idx+1],                        \
+               ((narray)-((idx)+1)) * sizeof (array[0]));       \
+    (narray)--;                                                 \
+  } while (0)
+
+
+/* return n*2^p mod m */
+int
+mul_2exp_mod (int n, int p, int m)
+{
+  int  i;
+  for (i = 0; i < p; i++)
+    n = (2 * n) % m;
+  return n;
+}
+
+/* return -n mod m */
+int
+neg_mod (int n, int m)
+{
+  ASSERT (n >= 0 && n < m);
+  return (n == 0 ? 0 : m-n);
+}
+
+/* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
+   "-(idx<<mod_bits)" can be a square modulo m.  */
+void
+square_mask (mpz_t mask, int m)
+{
+  int    p, i, r, idx;
+
+  p = mul_2exp_mod (1, mod_bits, m);
+  p = neg_mod (p, m);
+
+  mpz_set_ui (mask, 0L);
+  for (i = 0; i < m; i++)
+    {
+      r = (i * i) % m;
+      idx = (r * p) % m;
+      mpz_setbit (mask, (unsigned long) idx);
+    }
+}
+
+void
+generate_sq_res_0x100 (int limb_bits)
+{
+  int  i, res;
+
+  nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
+  sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
+
+  for (i = 0; i < nsq_res_0x100; i++)
+    mpz_init_set_ui (sq_res_0x100[i], 0L);
+
+  for (i = 0; i < 0x100; i++)
+    {
+      res = (i * i) % 0x100;
+      mpz_setbit (sq_res_0x100[res / limb_bits],
+                  (unsigned long) (res % limb_bits));
+    }
+
+  sq_res_0x100_num = 0;
+  for (i = 0; i < nsq_res_0x100; i++)
+    sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
+  sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
+}
+
+void
+generate_mod (int limb_bits, int nail_bits)
+{
+  int    numb_bits = limb_bits - nail_bits;
+  int    i, divisor;
+
+  mpz_init_set_ui (pp, 0L);
+  mpz_init_set_ui (pp_norm, 0L);
+  mpz_init_set_ui (pp_inverted, 0L);
+
+  /* no more than limb_bits many factors in a one limb modulus (and of
+     course in reality nothing like that many) */
+  factor_alloc = limb_bits;
+  factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
+  rawfactor = (struct rawfactor_t *)
+    xmalloc (factor_alloc * sizeof (*rawfactor));
+
+  if (numb_bits % 4 != 0)
+    {
+      strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
+      goto use_pp;
+    }
+
+  max_divisor = 2*limb_bits;
+  max_divisor_bits = log2_ceil (max_divisor);
+
+  if (numb_bits / 4 < max_divisor_bits)
+    {
+      /* Wind back to one limb worth of max_divisor, if that will let us use
+         mpn_mod_34lsub1.  */
+      max_divisor = limb_bits;
+      max_divisor_bits = log2_ceil (max_divisor);
+
+      if (numb_bits / 4 < max_divisor_bits)
+        {
+          strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
+          goto use_pp;
+        }
+    }
+
+  {
+    /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
+    mpz_t  m, q, r;
+    int    multiplicity;
+
+    mod34_bits = (numb_bits / 4) * 3;
+
+    /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
+       the mod34_bits mark, adding the two halves for a remainder of at most
+       mod34_bits+1 many bits */
+    mod_bits = mod34_bits + 1;
+
+    mpz_init_set_ui (m, 1L);
+    mpz_mul_2exp (m, m, mod34_bits);
+    mpz_sub_ui (m, m, 1L);
+
+    mpz_init (q);
+    mpz_init (r);
+
+    for (i = 3; i <= max_divisor; i++)
+      {
+        if (! isprime (i))
+          continue;
+
+        mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
+        if (mpz_sgn (r) != 0)
+          continue;
+
+        /* if a repeated prime is found it's used as an i^n in one factor */
+        divisor = 1;
+        multiplicity = 0;
+        do
+          {
+            if (divisor > max_divisor / i)
+              break;
+            multiplicity++;
+            mpz_set (m, q);
+            mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
+          }
+        while (mpz_sgn (r) == 0);
+
+        ASSERT (nrawfactor < factor_alloc);
+        rawfactor[nrawfactor].divisor = i;
+        rawfactor[nrawfactor].multiplicity = multiplicity;
+        nrawfactor++;
+      }
+
+    mpz_clear (m);
+    mpz_clear (q);
+    mpz_clear (r);
+  }
+
+  if (nrawfactor <= 2)
+    {
+      mpz_t  new_pp;
+
+      sprintf (mod34_excuse, "only %d small factor%s",
+               nrawfactor, nrawfactor == 1 ? "" : "s");
+
+    use_pp:
+      /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
+         tried with just one */
+      max_divisor = 2*limb_bits;
+      max_divisor_bits = log2_ceil (max_divisor);
+
+      mpz_init (new_pp);
+      nrawfactor = 0;
+      mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
+
+      /* one copy of each small prime */
+      mpz_set_ui (pp, 1L);
+      for (i = 3; i <= max_divisor; i++)
+        {
+          if (! isprime (i))
+            continue;
+
+          mpz_mul_ui (new_pp, pp, (unsigned long) i);
+          if (mpz_sizeinbase (new_pp, 2) > mod_bits)
+            break;
+          mpz_set (pp, new_pp);
+
+          ASSERT (nrawfactor < factor_alloc);
+          rawfactor[nrawfactor].divisor = i;
+          rawfactor[nrawfactor].multiplicity = 1;
+          nrawfactor++;
+        }
+
+      /* Plus an extra copy of one or more of the primes selected, if that
+         still fits in max_divisor and the total in mod_bits.  Usually only
+         3 or 5 will be candidates */
+      for (i = nrawfactor-1; i >= 0; i--)
+        {
+          if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
+            continue;
+          mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
+          if (mpz_sizeinbase (new_pp, 2) > mod_bits)
+            continue;
+          mpz_set (pp, new_pp);
+
+          rawfactor[i].multiplicity++;
+        }
+
+      mod_bits = mpz_sizeinbase (pp, 2);
+
+      mpz_set (pp_norm, pp);
+      while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
+        mpz_add (pp_norm, pp_norm, pp_norm);
+
+      mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
+
+      mpz_clear (new_pp);
+    }
+
+  /* start the factor array */
+  for (i = 0; i < nrawfactor; i++)
+    {
+      int  j;
+      ASSERT (nfactor < factor_alloc);
+      factor[nfactor].divisor = 1;
+      for (j = 0; j < rawfactor[i].multiplicity; j++)
+        factor[nfactor].divisor *= rawfactor[i].divisor;
+      nfactor++;
+    }
+
+ combine:
+  /* Combine entries in the factor array.  Combine the smallest entry with
+     the biggest one that will fit with it (ie. under max_divisor), then
+     repeat that with the new smallest entry. */
+  qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
+  for (i = nfactor-1; i >= 1; i--)
+    {
+      if (factor[i].divisor <= max_divisor / factor[0].divisor)
+        {
+          factor[0].divisor *= factor[i].divisor;
+          COLLAPSE_ELEMENT (factor, i, nfactor);
+          goto combine;
+        }
+    }
+
+  total_fraction = 1.0;
+  for (i = 0; i < nfactor; i++)
+    {
+      mpz_init (factor[i].inverse);
+      mpz_invert_ui_2exp (factor[i].inverse,
+                          (unsigned long) factor[i].divisor,
+                          (unsigned long) mod_bits);
+
+      mpz_init (factor[i].mask);
+      square_mask (factor[i].mask, factor[i].divisor);
+
+      /* fraction of possible squares */
+      factor[i].fraction = (double) mpz_popcount (factor[i].mask)
+        / factor[i].divisor;
+
+      /* total fraction of possible squares */
+      total_fraction *= factor[i].fraction;
+    }
+
+  /* best tests first (ie. smallest fraction) */
+  qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
+}
+
+void
+print (int limb_bits, int nail_bits)
+{
+  int    i;
+  mpz_t  mhi, mlo;
+
+  printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
+  printf ("\n");
+
+  printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
+          limb_bits, nail_bits);
+  printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
+          limb_bits, nail_bits);
+  printf ("#endif\n");
+  printf ("\n");
+
+  printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
+  printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
+          (1.0 - sq_res_0x100_fraction) * 100.0,
+          0x100 - sq_res_0x100_num);
+  printf ("static const mp_limb_t\n");
+  printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
+  for (i = 0; i < nsq_res_0x100; i++)
+    {
+      printf ("  CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, sq_res_0x100[i]);
+      printf ("),\n");
+    }
+  printf ("};\n");
+  printf ("\n");
+
+  if (mpz_sgn (pp) != 0)
+    {
+      printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
+      printf ("/* PERFSQR_PP = ");
+    }
+  else
+    printf ("/* 2^%d-1 = ", mod34_bits);
+  for (i = 0; i < nrawfactor; i++)
+    {
+      if (i != 0)
+        printf (" * ");
+      printf ("%d", rawfactor[i].divisor);
+      if (rawfactor[i].multiplicity != 1)
+        printf ("^%d", rawfactor[i].multiplicity);
+    }
+  printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
+
+  printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
+  if (mpz_sgn (pp) != 0)
+    {
+      printf ("#define PERFSQR_PP            CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, pp);
+      printf (")\n");
+      printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, pp_norm);
+      printf (")\n");
+      printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, pp_inverted);
+      printf (")\n");
+    }
+  printf ("\n");
+
+  mpz_init (mhi);
+  mpz_init (mlo);
+
+  printf ("/* This test identifies %.2f%% as non-squares. */\n",
+          (1.0 - total_fraction) * 100.0);
+  printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
+  printf ("  do {                              \\\n");
+  printf ("    mp_limb_t  r;                   \\\n");
+  if (mpz_sgn (pp) != 0)
+    printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
+  else
+    printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
+
+  for (i = 0; i < nfactor; i++)
+    {
+      printf ("                                    \\\n");
+      printf ("    /* %5.2f%% */                    \\\n",
+              (1.0 - factor[i].fraction) * 100.0);
+
+      printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
+              factor[i].divisor <= limb_bits ? 1 : 2,
+              factor[i].divisor);
+      mpz_out_str (stdout, 16, factor[i].inverse);
+      printf ("), \\\n");
+      printf ("                   CNST_LIMB(0x");
+
+      if ( factor[i].divisor <= limb_bits)
+        {
+          mpz_out_str (stdout, 16, factor[i].mask);
+        }
+      else
+        {
+          mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
+          mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
+          mpz_out_str (stdout, 16, mhi);
+          printf ("), CNST_LIMB(0x");
+          mpz_out_str (stdout, 16, mlo);
+        }
+      printf (")); \\\n");
+    }
+
+  printf ("  } while (0)\n");
+  printf ("\n");
+
+  printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
+          (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
+  printf ("\n");
+
+  printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
+  printf ("#define PERFSQR_DIVISORS  { 256,");
+  for (i = 0; i < nfactor; i++)
+      printf (" %d,", factor[i].divisor);
+  printf (" }\n");
+
+
+  mpz_clear (mhi);
+  mpz_clear (mlo);
+}
+
+int
+main (int argc, char *argv[])
+{
+  int  limb_bits, nail_bits;
+
+  if (argc != 3)
+    {
+      fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[1]);
+  nail_bits = atoi (argv[2]);
+
+  if (limb_bits <= 0
+      || nail_bits < 0
+      || nail_bits >= limb_bits)
+    {
+      fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
+               limb_bits, nail_bits);
+      exit (1);
+    }
+
+  generate_sq_res_0x100 (limb_bits);
+  generate_mod (limb_bits, nail_bits);
+
+  print (limb_bits, nail_bits);
+
+  return 0;
+}
diff --git a/gen-trialdivtab.c b/gen-trialdivtab.c

new file mode 100644 (file)

index 0000000..7082539
--- /dev/null
+++ b/gen-trialdivtab.c
@@ -0,0 +1,298 @@
+/* gen-trialdivtab.c
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.  */
+
+/*
+  Generate tables for fast, division-free trial division for GMP.
+
+  There is one main table, ptab.  It contains primes, multiplied together, and
+  several types of pre-computed inverses.  It refers to tables of the type
+  dtab, via the last two indices.  That table contains the individual primes in
+  the range, except that the primes are not actually included in the table (see
+  the P macro; it sneakingly excludes the primes themselves).  Instead, the
+  dtab tables contains tuples for each prime (modular-inverse, limit) used for
+  divisibility checks.
+
+  This interface is not intended for division of very many primes, since then
+  other algorithms apply.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "dumbmp.c"
+
+int sumspills (mpz_t, mpz_t *, int);
+void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t);
+
+int limb_bits;
+
+mpz_t B;
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long t, p;
+  mpz_t ppp, acc, inv, gmp_numb_max, tmp, Bhalf;
+  mpz_t pre[7];
+  int i;
+  int start_p, end_p, interval_start, interval_end, omitted_p;
+  char *endtok;
+  int stop;
+  int np, start_idx;
+
+  if (argc < 2)
+    {
+      fprintf (stderr, "usage: %s bits endprime\n", argv[0]);
+      exit (1);
+    }
+
+  limb_bits = atoi (argv[1]);
+
+  end_p = 1290;                        /* default end prime */
+  if (argc == 3)
+    end_p = atoi (argv[2]);
+
+  printf ("#if GMP_LIMB_BITS != %d\n", limb_bits);
+  printf ("#error This table is for GMP_LIMB_BITS = %d\n", limb_bits);
+  printf ("#endif\n\n");
+
+  printf ("#if GMP_NAIL_BITS != 0\n");
+  printf ("#error This table does not support nails\n");
+  printf ("#endif\n\n");
+
+  for (i = 0; i < 7; i++)
+    mpz_init (pre[i]);
+
+  mpz_init_set_ui (gmp_numb_max, 1);
+  mpz_mul_2exp (gmp_numb_max, gmp_numb_max, limb_bits);
+  mpz_sub_ui (gmp_numb_max, gmp_numb_max, 1);
+
+  mpz_init (tmp);
+  mpz_init (inv);
+
+  mpz_init_set_ui (B, 1);  mpz_mul_2exp (B, B, limb_bits);
+  mpz_init_set_ui (Bhalf, 1);  mpz_mul_2exp (Bhalf, Bhalf, limb_bits - 1);
+
+  start_p = 3;
+
+  mpz_init_set_ui (ppp, 1);
+  mpz_init (acc);
+  interval_start = start_p;
+  omitted_p = 3;
+  interval_end = 0;
+
+  printf ("static struct gmp_primes_dtab gmp_primes_dtab[] = {\n");
+
+  for (t = start_p; t <= end_p; t += 2)
+    {
+      if (! isprime (t))
+       continue;
+
+      mpz_mul_ui (acc, ppp, t);
+      stop = mpz_cmp (acc, Bhalf) >= 0;
+      if (!stop)
+       {
+         mpn_mod_1s_4p_cps (pre, acc);
+         stop = sumspills (acc, pre + 2, 5);
+       }
+
+      if (stop)
+       {
+         for (p = interval_start; p <= interval_end; p += 2)
+           {
+             if (! isprime (p))
+               continue;
+
+             printf ("  P(%d,", (int) p);
+             mpz_invert_ui_2exp (inv, p, limb_bits);
+             printf ("CNST_LIMB(0x");  mpz_out_str (stdout, 16, inv);  printf ("),");
+
+             mpz_tdiv_q_ui (tmp, gmp_numb_max, p);
+             printf ("CNST_LIMB(0x");  mpz_out_str (stdout, 16, tmp);
+             printf (")),\n");
+           }
+         mpz_set_ui (ppp, t);
+         interval_start = t;
+         omitted_p = t;
+       }
+      else
+       {
+         mpz_set (ppp, acc);
+       }
+      interval_end = t;
+    }
+  printf ("  P(0,0,0)\n};\n");
+
+
+  printf ("static struct gmp_primes_ptab gmp_primes_ptab[] = {\n");
+
+  endtok = "";
+
+  mpz_set_ui (ppp, 1);
+  interval_start = start_p;
+  interval_end = 0;
+  np = 0;
+  start_idx = 0;
+  for (t = start_p; t <= end_p; t += 2)
+    {
+      if (! isprime (t))
+       continue;
+
+      mpz_mul_ui (acc, ppp, t);
+
+      stop = mpz_cmp (acc, Bhalf) >= 0;
+      if (!stop)
+       {
+         mpn_mod_1s_4p_cps (pre, acc);
+         stop = sumspills (acc, pre + 2, 5);
+       }
+
+      if (stop)
+       {
+         mpn_mod_1s_4p_cps (pre, ppp);
+         printf ("%s", endtok);
+         printf ("  {CNST_LIMB(0x");  mpz_out_str (stdout, 16, ppp);
+         printf ("),{CNST_LIMB(0x");  mpz_out_str (stdout, 16, pre[0]);
+         printf ("),%d", (int) PTR(pre[1])[0]);
+         for (i = 0; i < 5; i++)
+           {
+             printf (",");
+             printf ("CNST_LIMB(0x");  mpz_out_str (stdout, 16, pre[2 + i]);
+             printf (")");
+           }
+         printf ("},");
+         printf ("%d,", start_idx);
+         printf ("%d}", np - start_idx);
+
+         endtok = ",\n";
+         mpz_set_ui (ppp, t);
+         interval_start = t;
+         start_idx = np;
+       }
+      else
+       {
+         mpz_set (ppp, acc);
+       }
+      interval_end = t;
+      np++;
+    }
+  printf ("\n};\n");
+
+  printf ("#define SMALLEST_OMITTED_PRIME %d\n", (int) omitted_p);
+
+  return 0;
+}
+
+unsigned long
+mpz_log2 (mpz_t x)
+{
+  mpz_t y;
+  unsigned long cnt;
+
+  mpz_init (y);
+  mpz_set (y, x);
+  cnt = 0;
+  while (mpz_sgn (y) != 0)
+    {
+      mpz_tdiv_q_2exp (y, y, 1);
+      cnt++;
+    }
+  mpz_clear (y);
+
+  return cnt;
+}
+
+void
+mpn_mod_1s_4p_cps (mpz_t cps[7], mpz_t bparm)
+{
+  mpz_t b, bi;
+  mpz_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mpz_t t;
+  int cnt;
+
+  mpz_init_set (b, bparm);
+
+  cnt = limb_bits - mpz_log2 (b);
+
+  mpz_init (bi);
+  mpz_init (t);
+  mpz_init (B1modb);
+  mpz_init (B2modb);
+  mpz_init (B3modb);
+  mpz_init (B4modb);
+  mpz_init (B5modb);
+
+  mpz_set_ui (t, 1);
+  mpz_mul_2exp (t, t, limb_bits - cnt);
+  mpz_sub (t, t, b);
+  mpz_mul_2exp (t, t, limb_bits);
+  mpz_tdiv_q (bi, t, b);               /* bi = B^2/b, except msb */
+
+  mpz_set_ui (t, 1);
+  mpz_mul_2exp (t, t, limb_bits);      /* t = B */
+  mpz_tdiv_r (B1modb, t, b);
+
+  mpz_mul_2exp (t, B1modb, limb_bits);
+  mpz_tdiv_r (B2modb, t, b);
+
+  mpz_mul_2exp (t, B2modb, limb_bits);
+  mpz_tdiv_r (B3modb, t, b);
+
+  mpz_mul_2exp (t, B3modb, limb_bits);
+  mpz_tdiv_r (B4modb, t, b);
+
+  mpz_mul_2exp (t, B4modb, limb_bits);
+  mpz_tdiv_r (B5modb, t, b);
+
+  mpz_set (cps[0], bi);
+  mpz_set_ui (cps[1], cnt);
+  mpz_tdiv_q_2exp (cps[2], B1modb, 0);
+  mpz_tdiv_q_2exp (cps[3], B2modb, 0);
+  mpz_tdiv_q_2exp (cps[4], B3modb, 0);
+  mpz_tdiv_q_2exp (cps[5], B4modb, 0);
+  mpz_tdiv_q_2exp (cps[6], B5modb, 0);
+
+  mpz_clear (b);
+  mpz_clear (bi);
+  mpz_clear (t);
+  mpz_clear (B1modb);
+  mpz_clear (B2modb);
+  mpz_clear (B3modb);
+  mpz_clear (B4modb);
+  mpz_clear (B5modb);
+}
+
+int
+sumspills (mpz_t ppp, mpz_t *a, int n)
+{
+  mpz_t s;
+  int i, ret;
+
+  mpz_init_set (s, a[0]);
+
+  for (i = 1; i < n; i++)
+    {
+      mpz_add (s, s, a[i]);
+    }
+  ret = mpz_cmp (s, B) >= 0;
+  mpz_clear (s);
+
+  return ret;
+}
diff --git a/gmp-h.in b/gmp-h.in

new file mode 100644 (file)

index 0000000..01757df
--- /dev/null
+++ b/gmp-h.in
@@ -0,0 +1,2279 @@
+/* Definitions for GNU multiple precision functions.   -*- mode: c -*-
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
+2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#ifndef __GMP_H__
+
+#if defined (__cplusplus)
+#include <iosfwd>   /* for std::istream, std::ostream, std::string */
+#include <cstdio>
+#endif
+
+
+/* Instantiated by configure. */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+#define __GMP_HAVE_HOST_CPU_FAMILY_power   @HAVE_HOST_CPU_FAMILY_power@
+#define __GMP_HAVE_HOST_CPU_FAMILY_powerpc @HAVE_HOST_CPU_FAMILY_powerpc@
+#define GMP_LIMB_BITS                      @GMP_LIMB_BITS@
+#define GMP_NAIL_BITS                      @GMP_NAIL_BITS@
+#endif
+#define GMP_NUMB_BITS     (GMP_LIMB_BITS - GMP_NAIL_BITS)
+#define GMP_NUMB_MASK     ((~ __GMP_CAST (mp_limb_t, 0)) >> GMP_NAIL_BITS)
+#define GMP_NUMB_MAX      GMP_NUMB_MASK
+#define GMP_NAIL_MASK     (~ GMP_NUMB_MASK)
+
+
+/* The following (everything under ifndef __GNU_MP__) must be identical in
+   gmp.h and mp.h to allow both to be included in an application or during
+   the library build.  */
+#ifndef __GNU_MP__
+#define __GNU_MP__ 5
+
+#define __need_size_t  /* tell gcc stddef.h we only want size_t */
+#if defined (__cplusplus)
+#include <cstddef>     /* for size_t */
+#else
+#include <stddef.h>    /* for size_t */
+#endif
+#undef __need_size_t
+
+/* Instantiated by configure. */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+@DEFN_LONG_LONG_LIMB@
+#define __GMP_LIBGMP_DLL  @LIBGMP_DLL@
+#endif
+
+
+/* __STDC__ - some ANSI compilers define this only to 0, hence the use of
+       "defined" and not "__STDC__-0".  In particular Sun workshop C 5.0
+       sets __STDC__ to 0, but requires "##" for token pasting.
+
+   _AIX - gnu ansidecl.h asserts that all known AIX compilers are ANSI but
+       don't always define __STDC__.
+
+   __DECC - current versions of DEC C (5.9 for instance) for alpha are ANSI,
+       but don't define __STDC__ in their default mode.  Don't know if old
+       versions might have been K&R, but let's not worry about that unless
+       someone is still using one.
+
+   _mips - gnu ansidecl.h says the RISC/OS MIPS compiler is ANSI in SVR4
+       mode, but doesn't define __STDC__.
+
+   _MSC_VER - Microsoft C is ANSI, but __STDC__ is undefined unless the /Za
+       option is given (in which case it's 1).
+
+   _WIN32 - tested for by gnu ansidecl.h, no doubt on the assumption that
+      all w32 compilers are ansi.
+
+   Note: This same set of tests is used by gen-psqr.c and
+   demos/expr/expr-impl.h, so if anything needs adding, then be sure to
+   update those too.  */
+
+#if  defined (__STDC__)                                 \
+  || defined (__cplusplus)                              \
+  || defined (_AIX)                                     \
+  || defined (__DECC)                                   \
+  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
+  || defined (_MSC_VER)                                 \
+  || defined (_WIN32)
+#define __GMP_HAVE_CONST        1
+#define __GMP_HAVE_PROTOTYPES   1
+#define __GMP_HAVE_TOKEN_PASTE  1
+#else
+#define __GMP_HAVE_CONST        0
+#define __GMP_HAVE_PROTOTYPES   0
+#define __GMP_HAVE_TOKEN_PASTE  0
+#endif
+
+
+#if __GMP_HAVE_CONST
+#define __gmp_const   const
+#define __gmp_signed  signed
+#else
+#define __gmp_const
+#define __gmp_signed
+#endif
+
+
+/* __GMP_DECLSPEC supports Windows DLL versions of libgmp, and is empty in
+   all other circumstances.
+
+   When compiling objects for libgmp, __GMP_DECLSPEC is an export directive,
+   or when compiling for an application it's an import directive.  The two
+   cases are differentiated by __GMP_WITHIN_GMP defined by the GMP Makefiles
+   (and not defined from an application).
+
+   __GMP_DECLSPEC_XX is similarly used for libgmpxx.  __GMP_WITHIN_GMPXX
+   indicates when building libgmpxx, and in that case libgmpxx functions are
+   exports, but libgmp functions which might get called are imports.
+
+   Libtool DLL_EXPORT define is not used.
+
+   There's no attempt to support GMP built both static and DLL.  Doing so
+   would mean applications would have to tell us which of the two is going
+   to be used when linking, and that seems very tedious and error prone if
+   using GMP by hand, and equally tedious from a package since autoconf and
+   automake don't give much help.
+
+   __GMP_DECLSPEC is required on all documented global functions and
+   variables, the various internals in gmp-impl.h etc can be left unadorned.
+   But internals used by the test programs or speed measuring programs
+   should have __GMP_DECLSPEC, and certainly constants or variables must
+   have it or the wrong address will be resolved.
+
+   In gcc __declspec can go at either the start or end of a prototype.
+
+   In Microsoft C __declspec must go at the start, or after the type like
+   void __declspec(...) *foo()".  There's no __dllexport or anything to
+   guard against someone foolish #defining dllexport.  _export used to be
+   available, but no longer.
+
+   In Borland C _export still exists, but needs to go after the type, like
+   "void _export foo();".  Would have to change the __GMP_DECLSPEC syntax to
+   make use of that.  Probably more trouble than it's worth.  */
+
+#if defined (__GNUC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(__dllexport__)
+#define __GMP_DECLSPEC_IMPORT  __declspec(__dllimport__)
+#endif
+#if defined (_MSC_VER) || defined (__BORLANDC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(dllexport)
+#define __GMP_DECLSPEC_IMPORT  __declspec(dllimport)
+#endif
+#ifdef __WATCOMC__
+#define __GMP_DECLSPEC_EXPORT  __export
+#define __GMP_DECLSPEC_IMPORT  __import
+#endif
+#ifdef __IBMC__
+#define __GMP_DECLSPEC_EXPORT  _Export
+#define __GMP_DECLSPEC_IMPORT  _Import
+#endif
+
+#if __GMP_LIBGMP_DLL
+#if __GMP_WITHIN_GMP
+/* compiling to go into a DLL libgmp */
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_EXPORT
+#else
+/* compiling to go into an application which will link to a DLL libgmp */
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+/* all other cases */
+#define __GMP_DECLSPEC
+#endif
+
+
+#ifdef __GMP_SHORT_LIMB
+typedef unsigned int           mp_limb_t;
+typedef int                    mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb_t;
+typedef long long int          mp_limb_signed_t;
+#else
+typedef unsigned long int      mp_limb_t;
+typedef long int               mp_limb_signed_t;
+#endif
+#endif
+typedef unsigned long int      mp_bitcnt_t;
+
+/* For reference, note that the name __mpz_struct gets into C++ mangled
+   function names, which means although the "__" suggests an internal, we
+   must leave this name for binary compatibility.  */
+typedef struct
+{
+  int _mp_alloc;               /* Number of *limbs* allocated and pointed
+                                  to by the _mp_d field.  */
+  int _mp_size;                        /* abs(_mp_size) is the number of limbs the
+                                  last field points to.  If _mp_size is
+                                  negative this is a negative number.  */
+  mp_limb_t *_mp_d;            /* Pointer to the limbs.  */
+} __mpz_struct;
+
+#endif /* __GNU_MP__ */
+
+
+typedef __mpz_struct MP_INT;    /* gmp 1 source compatibility */
+typedef __mpz_struct mpz_t[1];
+
+typedef mp_limb_t *            mp_ptr;
+typedef __gmp_const mp_limb_t *        mp_srcptr;
+#if defined (_CRAY) && ! defined (_CRAYMPP)
+/* plain `int' is much faster (48 bits) */
+#define __GMP_MP_SIZE_T_INT     1
+typedef int                    mp_size_t;
+typedef int                    mp_exp_t;
+#else
+#define __GMP_MP_SIZE_T_INT     0
+typedef long int               mp_size_t;
+typedef long int               mp_exp_t;
+#endif
+
+typedef struct
+{
+  __mpz_struct _mp_num;
+  __mpz_struct _mp_den;
+} __mpq_struct;
+
+typedef __mpq_struct MP_RAT;    /* gmp 1 source compatibility */
+typedef __mpq_struct mpq_t[1];
+
+typedef struct
+{
+  int _mp_prec;                        /* Max precision, in number of `mp_limb_t's.
+                                  Set by mpf_init and modified by
+                                  mpf_set_prec.  The area pointed to by the
+                                  _mp_d field contains `prec' + 1 limbs.  */
+  int _mp_size;                        /* abs(_mp_size) is the number of limbs the
+                                  last field points to.  If _mp_size is
+                                  negative this is a negative number.  */
+  mp_exp_t _mp_exp;            /* Exponent, in the base of `mp_limb_t'.  */
+  mp_limb_t *_mp_d;            /* Pointer to the limbs.  */
+} __mpf_struct;
+
+/* typedef __mpf_struct MP_FLOAT; */
+typedef __mpf_struct mpf_t[1];
+
+/* Available random number generation algorithms.  */
+typedef enum
+{
+  GMP_RAND_ALG_DEFAULT = 0,
+  GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential.  */
+} gmp_randalg_t;
+
+/* Random state struct.  */
+typedef struct
+{
+  mpz_t _mp_seed;        /* _mp_d member points to state of the generator. */
+  gmp_randalg_t _mp_alg;  /* Currently unused. */
+  union {
+    void *_mp_lc;         /* Pointer to function pointers structure.  */
+  } _mp_algdata;
+} __gmp_randstate_struct;
+typedef __gmp_randstate_struct gmp_randstate_t[1];
+
+/* Types for function declarations in gmp files.  */
+/* ??? Should not pollute user name space with these ??? */
+typedef __gmp_const __mpz_struct *mpz_srcptr;
+typedef __mpz_struct *mpz_ptr;
+typedef __gmp_const __mpf_struct *mpf_srcptr;
+typedef __mpf_struct *mpf_ptr;
+typedef __gmp_const __mpq_struct *mpq_srcptr;
+typedef __mpq_struct *mpq_ptr;
+
+
+/* This is not wanted in mp.h, so put it outside the __GNU_MP__ common
+   section. */
+#if __GMP_LIBGMP_DLL
+#if __GMP_WITHIN_GMPXX
+/* compiling to go into a DLL libgmpxx */
+#define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_EXPORT
+#else
+/* compiling to go into a application which will link to a DLL libgmpxx */
+#define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+/* all other cases */
+#define __GMP_DECLSPEC_XX
+#endif
+
+
+#if __GMP_HAVE_PROTOTYPES
+#define __GMP_PROTO(x) x
+#else
+#define __GMP_PROTO(x) ()
+#endif
+
+#ifndef __MPN
+#if __GMP_HAVE_TOKEN_PASTE
+#define __MPN(x) __gmpn_##x
+#else
+#define __MPN(x) __gmpn_/**/x
+#endif
+#endif
+
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (__dj_include_stdio_h_)  /* DJGPP */               \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */               \
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */            \
+  || defined (__STDIO_LOADED)         /* VMS */
+#define _GMP_H_HAVE_FILE 1
+#endif
+
+/* In ISO C, if a prototype involving "struct obstack *" is given without
+   that structure defined, then the struct is scoped down to just the
+   prototype, causing a conflict if it's subsequently defined for real.  So
+   only give prototypes if we've got obstack.h.  */
+#if defined (_OBSTACK_H)   /* glibc <obstack.h> */
+#define _GMP_H_HAVE_OBSTACK 1
+#endif
+
+/* The prototypes for gmp_vprintf etc are provided only if va_list is
+   available, via an application having included <stdarg.h> or <varargs.h>.
+   Usually va_list is a typedef so can't be tested directly, but C99
+   specifies that va_start is a macro (and it was normally a macro on past
+   systems too), so look for that.
+
+   <stdio.h> will define some sort of va_list for vprintf and vfprintf, but
+   let's not bother trying to use that since it's not standard and since
+   application uses for gmp_vprintf etc will almost certainly require the
+   whole <stdarg.h> or <varargs.h> anyway.  */
+
+#ifdef va_start
+#define _GMP_H_HAVE_VA_LIST 1
+#endif
+
+/* Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc */
+#if defined (__GNUC__) && defined (__GNUC_MINOR__)
+#define __GMP_GNUC_PREREQ(maj, min) \
+  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+#define __GMP_GNUC_PREREQ(maj, min)  0
+#endif
+
+/* "pure" is in gcc 2.96 and up, see "(gcc)Function Attributes".  Basically
+   it means a function does nothing but examine its arguments and memory
+   (global or via arguments) to generate a return value, but changes nothing
+   and has no side-effects.  __GMP_NO_ATTRIBUTE_CONST_PURE lets
+   tune/common.c etc turn this off when trying to write timing loops.  */
+#if __GMP_GNUC_PREREQ (2,96) && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
+#define __GMP_ATTRIBUTE_PURE   __attribute__ ((__pure__))
+#else
+#define __GMP_ATTRIBUTE_PURE
+#endif
+
+
+/* __GMP_CAST allows us to use static_cast in C++, so our macros are clean
+   to "g++ -Wold-style-cast".
+
+   Casts in "extern inline" code within an extern "C" block don't induce
+   these warnings, so __GMP_CAST only needs to be used on documented
+   macros.  */
+
+#ifdef __cplusplus
+#define __GMP_CAST(type, expr)  (static_cast<type> (expr))
+#else
+#define __GMP_CAST(type, expr)  ((type) (expr))
+#endif
+
+
+/* An empty "throw ()" means the function doesn't throw any C++ exceptions,
+   this can save some stack frame info in applications.
+
+   Currently it's given only on functions which never divide-by-zero etc,
+   don't allocate memory, and are expected to never need to allocate memory.
+   This leaves open the possibility of a C++ throw from a future GMP
+   exceptions scheme.
+
+   mpz_set_ui etc are omitted to leave open the lazy allocation scheme
+   described in doc/tasks.html.  mpz_get_d etc are omitted to leave open
+   exceptions for float overflows.
+
+   Note that __GMP_NOTHROW must be given on any inlines the same as on their
+   prototypes (for g++ at least, where they're used together).  Note also
+   that g++ 3.0 demands that __GMP_NOTHROW is before other attributes like
+   __GMP_ATTRIBUTE_PURE.  */
+
+#if defined (__cplusplus)
+#define __GMP_NOTHROW  throw ()
+#else
+#define __GMP_NOTHROW
+#endif
+
+
+/* PORTME: What other compilers have a useful "extern inline"?  "static
+   inline" would be an acceptable substitute if the compiler (or linker)
+   discards unused statics.  */
+
+ /* gcc has __inline__ in all modes, including strict ansi.  Give a prototype
+    for an inline too, so as to correctly specify "dllimport" on windows, in
+    case the function is called rather than inlined.
+    GCC 4.3 and above with -std=c99 or -std=gnu99 implements ISO C99
+    inline semantics, unless -fgnu89-inline is used.  */
+#ifdef __GNUC__
+#if (defined __GNUC_STDC_INLINE__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 2)
+#define __GMP_EXTERN_INLINE extern __inline__ __attribute__ ((__gnu_inline__))
+#else
+#define __GMP_EXTERN_INLINE      extern __inline__
+#endif
+#define __GMP_INLINE_PROTOTYPES  1
+#endif
+
+/* DEC C (eg. version 5.9) supports "static __inline foo()", even in -std1
+   strict ANSI mode.  Inlining is done even when not optimizing (ie. -O0
+   mode, which is the default), but an unnecessary local copy of foo is
+   emitted unless -O is used.  "extern __inline" is accepted, but the
+   "extern" appears to be ignored, ie. it becomes a plain global function
+   but which is inlined within its file.  Don't know if all old versions of
+   DEC C supported __inline, but as a start let's do the right thing for
+   current versions.  */
+#ifdef __DECC
+#define __GMP_EXTERN_INLINE  static __inline
+#endif
+
+/* SCO OpenUNIX 8 cc supports "static inline foo()" but not in -Xc strict
+   ANSI mode (__STDC__ is 1 in that mode).  Inlining only actually takes
+   place under -O.  Without -O "foo" seems to be emitted whether it's used
+   or not, which is wasteful.  "extern inline foo()" isn't useful, the
+   "extern" is apparently ignored, so foo is inlined if possible but also
+   emitted as a global, which causes multiple definition errors when
+   building a shared libgmp.  */
+#ifdef __SCO_VERSION__
+#if __SCO_VERSION__ > 400000000 && __STDC__ != 1 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  static inline
+#endif
+#endif
+
+/* Microsoft's C compiler accepts __inline */
+#ifdef _MSC_VER
+#define __GMP_EXTERN_INLINE  __inline
+#endif
+
+/* Recent enough Sun C compilers want "inline" */
+#if defined (__SUNPRO_C) && __SUNPRO_C >= 0x560 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  inline
+#endif
+
+/* Somewhat older Sun C compilers want "static inline" */
+#if defined (__SUNPRO_C) && __SUNPRO_C >= 0x540 \
+  && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  static inline
+#endif
+
+
+/* C++ always has "inline" and since it's a normal feature the linker should
+   discard duplicate non-inlined copies, or if it doesn't then that's a
+   problem for everyone, not just GMP.  */
+#if defined (__cplusplus) && ! defined (__GMP_EXTERN_INLINE)
+#define __GMP_EXTERN_INLINE  inline
+#endif
+
+/* Don't do any inlining within a configure run, since if the compiler ends
+   up emitting copies of the code into the object file it can end up
+   demanding the various support routines (like mpn_popcount) for linking,
+   making the "alloca" test and perhaps others fail.  And on hppa ia64 a
+   pre-release gcc 3.2 was seen not respecting the "extern" in "extern
+   __inline__", triggering this problem too.  */
+#if defined (__GMP_WITHIN_CONFIGURE) && ! __GMP_WITHIN_CONFIGURE_INLINE
+#undef __GMP_EXTERN_INLINE
+#endif
+
+/* By default, don't give a prototype when there's going to be an inline
+   version.  Note in particular that Cray C++ objects to the combination of
+   prototype and inline.  */
+#ifdef __GMP_EXTERN_INLINE
+#ifndef __GMP_INLINE_PROTOTYPES
+#define __GMP_INLINE_PROTOTYPES  0
+#endif
+#else
+#define __GMP_INLINE_PROTOTYPES  1
+#endif
+
+
+#define __GMP_ABS(x)   ((x) >= 0 ? (x) : -(x))
+#define __GMP_MAX(h,i) ((h) > (i) ? (h) : (i))
+
+/* __GMP_USHRT_MAX is not "~ (unsigned short) 0" because short is promoted
+   to int by "~".  */
+#define __GMP_UINT_MAX   (~ (unsigned) 0)
+#define __GMP_ULONG_MAX  (~ (unsigned long) 0)
+#define __GMP_USHRT_MAX  ((unsigned short) ~0)
+
+
+/* __builtin_expect is in gcc 3.0, and not in 2.95. */
+#if __GMP_GNUC_PREREQ (3,0)
+#define __GMP_LIKELY(cond)    __builtin_expect ((cond) != 0, 1)
+#define __GMP_UNLIKELY(cond)  __builtin_expect ((cond) != 0, 0)
+#else
+#define __GMP_LIKELY(cond)    (cond)
+#define __GMP_UNLIKELY(cond)  (cond)
+#endif
+
+#ifdef _CRAY
+#define __GMP_CRAY_Pragma(str)  _Pragma (str)
+#else
+#define __GMP_CRAY_Pragma(str)
+#endif
+
+
+/* Allow direct user access to numerator and denominator of a mpq_t object.  */
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+
+#if defined (__cplusplus)
+extern "C" {
+using std::FILE;
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
+                                     void *(*) (void *, size_t, size_t),
+                                     void (*) (void *, size_t))) __GMP_NOTHROW;
+
+#define mp_get_memory_functions __gmp_get_memory_functions
+__GMP_DECLSPEC void mp_get_memory_functions __GMP_PROTO ((void *(**) (size_t),
+                                      void *(**) (void *, size_t, size_t),
+                                      void (**) (void *, size_t))) __GMP_NOTHROW;
+
+#define mp_bits_per_limb __gmp_bits_per_limb
+__GMP_DECLSPEC extern __gmp_const int mp_bits_per_limb;
+
+#define gmp_errno __gmp_errno
+__GMP_DECLSPEC extern int gmp_errno;
+
+#define gmp_version __gmp_version
+__GMP_DECLSPEC extern __gmp_const char * __gmp_const gmp_version;
+
+
+/**************** Random number routines.  ****************/
+
+/* obsolete */
+#define gmp_randinit __gmp_randinit
+__GMP_DECLSPEC void gmp_randinit __GMP_PROTO ((gmp_randstate_t, gmp_randalg_t, ...));
+
+#define gmp_randinit_default __gmp_randinit_default
+__GMP_DECLSPEC void gmp_randinit_default __GMP_PROTO ((gmp_randstate_t));
+
+#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
+__GMP_DECLSPEC void gmp_randinit_lc_2exp __GMP_PROTO ((gmp_randstate_t,
+                                                      mpz_srcptr, unsigned long int,
+                                                      mp_bitcnt_t));
+
+#define gmp_randinit_lc_2exp_size __gmp_randinit_lc_2exp_size
+__GMP_DECLSPEC int gmp_randinit_lc_2exp_size __GMP_PROTO ((gmp_randstate_t, mp_bitcnt_t));
+
+#define gmp_randinit_mt __gmp_randinit_mt
+__GMP_DECLSPEC void gmp_randinit_mt __GMP_PROTO ((gmp_randstate_t));
+
+#define gmp_randinit_set __gmp_randinit_set
+__GMP_DECLSPEC void gmp_randinit_set __GMP_PROTO ((gmp_randstate_t, __gmp_const __gmp_randstate_struct *));
+
+#define gmp_randseed __gmp_randseed
+__GMP_DECLSPEC void gmp_randseed __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
+
+#define gmp_randseed_ui __gmp_randseed_ui
+__GMP_DECLSPEC void gmp_randseed_ui __GMP_PROTO ((gmp_randstate_t, unsigned long int));
+
+#define gmp_randclear __gmp_randclear
+__GMP_DECLSPEC void gmp_randclear __GMP_PROTO ((gmp_randstate_t));
+
+#define gmp_urandomb_ui __gmp_urandomb_ui
+__GMP_DECLSPEC unsigned long gmp_urandomb_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+
+#define gmp_urandomm_ui __gmp_urandomm_ui
+__GMP_DECLSPEC unsigned long gmp_urandomm_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+
+
+/**************** Formatted output routines.  ****************/
+
+#define gmp_asprintf __gmp_asprintf
+__GMP_DECLSPEC int gmp_asprintf __GMP_PROTO ((char **, __gmp_const char *, ...));
+
+#define gmp_fprintf __gmp_fprintf
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC int gmp_fprintf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+#endif
+
+#define gmp_obstack_printf __gmp_obstack_printf
+#if defined (_GMP_H_HAVE_OBSTACK)
+__GMP_DECLSPEC int gmp_obstack_printf __GMP_PROTO ((struct obstack *, __gmp_const char *, ...));
+#endif
+
+#define gmp_obstack_vprintf __gmp_obstack_vprintf
+#if defined (_GMP_H_HAVE_OBSTACK) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_obstack_vprintf __GMP_PROTO ((struct obstack *, __gmp_const char *, va_list));
+#endif
+
+#define gmp_printf __gmp_printf
+__GMP_DECLSPEC int gmp_printf __GMP_PROTO ((__gmp_const char *, ...));
+
+#define gmp_snprintf __gmp_snprintf
+__GMP_DECLSPEC int gmp_snprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, ...));
+
+#define gmp_sprintf __gmp_sprintf
+__GMP_DECLSPEC int gmp_sprintf __GMP_PROTO ((char *, __gmp_const char *, ...));
+
+#define gmp_vasprintf __gmp_vasprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vasprintf __GMP_PROTO ((char **, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vfprintf __gmp_vfprintf
+#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vfprintf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vprintf __gmp_vprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vprintf __GMP_PROTO ((__gmp_const char *, va_list));
+#endif
+
+#define gmp_vsnprintf __gmp_vsnprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsnprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vsprintf __gmp_vsprintf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsprintf __GMP_PROTO ((char *, __gmp_const char *, va_list));
+#endif
+
+
+/**************** Formatted input routines.  ****************/
+
+#define gmp_fscanf __gmp_fscanf
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC int gmp_fscanf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+#endif
+
+#define gmp_scanf __gmp_scanf
+__GMP_DECLSPEC int gmp_scanf __GMP_PROTO ((__gmp_const char *, ...));
+
+#define gmp_sscanf __gmp_sscanf
+__GMP_DECLSPEC int gmp_sscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, ...));
+
+#define gmp_vfscanf __gmp_vfscanf
+#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vfscanf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+#endif
+
+#define gmp_vscanf __gmp_vscanf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vscanf __GMP_PROTO ((__gmp_const char *, va_list));
+#endif
+
+#define gmp_vsscanf __gmp_vsscanf
+#if defined (_GMP_H_HAVE_VA_LIST)
+__GMP_DECLSPEC int gmp_vsscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, va_list));
+#endif
+
+
+/**************** Integer (i.e. Z) routines.  ****************/
+
+#define _mpz_realloc __gmpz_realloc
+#define mpz_realloc __gmpz_realloc
+__GMP_DECLSPEC void *_mpz_realloc __GMP_PROTO ((mpz_ptr, mp_size_t));
+
+#define mpz_abs __gmpz_abs
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_abs)
+__GMP_DECLSPEC void mpz_abs __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+#endif
+
+#define mpz_add __gmpz_add
+__GMP_DECLSPEC void mpz_add __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_add_ui __gmpz_add_ui
+__GMP_DECLSPEC void mpz_add_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_addmul __gmpz_addmul
+__GMP_DECLSPEC void mpz_addmul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_addmul_ui __gmpz_addmul_ui
+__GMP_DECLSPEC void mpz_addmul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_and __gmpz_and
+__GMP_DECLSPEC void mpz_and __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_array_init __gmpz_array_init
+__GMP_DECLSPEC void mpz_array_init __GMP_PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+
+#define mpz_bin_ui __gmpz_bin_ui
+__GMP_DECLSPEC void mpz_bin_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_bin_uiui __gmpz_bin_uiui
+__GMP_DECLSPEC void mpz_bin_uiui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+
+#define mpz_cdiv_q __gmpz_cdiv_q
+__GMP_DECLSPEC void mpz_cdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_cdiv_q_2exp __gmpz_cdiv_q_2exp
+__GMP_DECLSPEC void mpz_cdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_cdiv_qr __gmpz_cdiv_qr
+__GMP_DECLSPEC void mpz_cdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_cdiv_r __gmpz_cdiv_r
+__GMP_DECLSPEC void mpz_cdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_cdiv_r_2exp __gmpz_cdiv_r_2exp
+__GMP_DECLSPEC void mpz_cdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_cdiv_ui __gmpz_cdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_cdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_clear __gmpz_clear
+__GMP_DECLSPEC void mpz_clear __GMP_PROTO ((mpz_ptr));
+
+#define mpz_clears __gmpz_clears
+__GMP_DECLSPEC void mpz_clears __GMP_PROTO ((mpz_ptr, ...));
+
+#define mpz_clrbit __gmpz_clrbit
+__GMP_DECLSPEC void mpz_clrbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_cmp __gmpz_cmp
+__GMP_DECLSPEC int mpz_cmp __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmp_d __gmpz_cmp_d
+__GMP_DECLSPEC int mpz_cmp_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+
+#define _mpz_cmp_si __gmpz_cmp_si
+__GMP_DECLSPEC int _mpz_cmp_si __GMP_PROTO ((mpz_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define _mpz_cmp_ui __gmpz_cmp_ui
+__GMP_DECLSPEC int _mpz_cmp_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs __gmpz_cmpabs
+__GMP_DECLSPEC int mpz_cmpabs __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs_d __gmpz_cmpabs_d
+__GMP_DECLSPEC int mpz_cmpabs_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_cmpabs_ui __gmpz_cmpabs_ui
+__GMP_DECLSPEC int mpz_cmpabs_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_com __gmpz_com
+__GMP_DECLSPEC void mpz_com __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_combit __gmpz_combit
+__GMP_DECLSPEC void mpz_combit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_congruent_p __gmpz_congruent_p
+__GMP_DECLSPEC int mpz_congruent_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_congruent_2exp_p __gmpz_congruent_2exp_p
+__GMP_DECLSPEC int mpz_congruent_2exp_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_congruent_ui_p __gmpz_congruent_ui_p
+__GMP_DECLSPEC int mpz_congruent_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divexact __gmpz_divexact
+__GMP_DECLSPEC void mpz_divexact __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_divexact_ui __gmpz_divexact_ui
+__GMP_DECLSPEC void mpz_divexact_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+
+#define mpz_divisible_p __gmpz_divisible_p
+__GMP_DECLSPEC int mpz_divisible_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divisible_ui_p __gmpz_divisible_ui_p
+__GMP_DECLSPEC int mpz_divisible_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_divisible_2exp_p __gmpz_divisible_2exp_p
+__GMP_DECLSPEC int mpz_divisible_2exp_p __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_dump __gmpz_dump
+__GMP_DECLSPEC void mpz_dump __GMP_PROTO ((mpz_srcptr));
+
+#define mpz_export __gmpz_export
+__GMP_DECLSPEC void *mpz_export __GMP_PROTO ((void *, size_t *, int, size_t, int, size_t, mpz_srcptr));
+
+#define mpz_fac_ui __gmpz_fac_ui
+__GMP_DECLSPEC void mpz_fac_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_fdiv_q __gmpz_fdiv_q
+__GMP_DECLSPEC void mpz_fdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
+__GMP_DECLSPEC void mpz_fdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_fdiv_qr __gmpz_fdiv_qr
+__GMP_DECLSPEC void mpz_fdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_fdiv_r __gmpz_fdiv_r
+__GMP_DECLSPEC void mpz_fdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
+__GMP_DECLSPEC void mpz_fdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_fdiv_ui __gmpz_fdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_fdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fib_ui __gmpz_fib_ui
+__GMP_DECLSPEC void mpz_fib_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_fib2_ui __gmpz_fib2_ui
+__GMP_DECLSPEC void mpz_fib2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+
+#define mpz_fits_sint_p __gmpz_fits_sint_p
+__GMP_DECLSPEC int mpz_fits_sint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_slong_p __gmpz_fits_slong_p
+__GMP_DECLSPEC int mpz_fits_slong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_sshort_p __gmpz_fits_sshort_p
+__GMP_DECLSPEC int mpz_fits_sshort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_fits_uint_p __gmpz_fits_uint_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_uint_p)
+__GMP_DECLSPEC int mpz_fits_uint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_fits_ulong_p __gmpz_fits_ulong_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ulong_p)
+__GMP_DECLSPEC int mpz_fits_ulong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_fits_ushort_p __gmpz_fits_ushort_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ushort_p)
+__GMP_DECLSPEC int mpz_fits_ushort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_gcd __gmpz_gcd
+__GMP_DECLSPEC void mpz_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_gcd_ui __gmpz_gcd_ui
+__GMP_DECLSPEC unsigned long int mpz_gcd_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_gcdext __gmpz_gcdext
+__GMP_DECLSPEC void mpz_gcdext __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_get_d __gmpz_get_d
+__GMP_DECLSPEC double mpz_get_d __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_get_d_2exp __gmpz_get_d_2exp
+__GMP_DECLSPEC double mpz_get_d_2exp __GMP_PROTO ((signed long int *, mpz_srcptr));
+
+#define mpz_get_si __gmpz_get_si
+__GMP_DECLSPEC /* signed */ long int mpz_get_si __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_get_str __gmpz_get_str
+__GMP_DECLSPEC char *mpz_get_str __GMP_PROTO ((char *, int, mpz_srcptr));
+
+#define mpz_get_ui __gmpz_get_ui
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_get_ui)
+__GMP_DECLSPEC unsigned long int mpz_get_ui __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_getlimbn __gmpz_getlimbn
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_getlimbn)
+__GMP_DECLSPEC mp_limb_t mpz_getlimbn __GMP_PROTO ((mpz_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_hamdist __gmpz_hamdist
+__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_import __gmpz_import
+__GMP_DECLSPEC void mpz_import __GMP_PROTO ((mpz_ptr, size_t, int, size_t, int, size_t, __gmp_const void *));
+
+#define mpz_init __gmpz_init
+__GMP_DECLSPEC void mpz_init __GMP_PROTO ((mpz_ptr));
+
+#define mpz_init2 __gmpz_init2
+__GMP_DECLSPEC void mpz_init2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_inits __gmpz_inits
+__GMP_DECLSPEC void mpz_inits __GMP_PROTO ((mpz_ptr, ...));
+
+#define mpz_init_set __gmpz_init_set
+__GMP_DECLSPEC void mpz_init_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_init_set_d __gmpz_init_set_d
+__GMP_DECLSPEC void mpz_init_set_d __GMP_PROTO ((mpz_ptr, double));
+
+#define mpz_init_set_si __gmpz_init_set_si
+__GMP_DECLSPEC void mpz_init_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+
+#define mpz_init_set_str __gmpz_init_set_str
+__GMP_DECLSPEC int mpz_init_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+
+#define mpz_init_set_ui __gmpz_init_set_ui
+__GMP_DECLSPEC void mpz_init_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_inp_raw __gmpz_inp_raw
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_inp_raw __GMP_PROTO ((mpz_ptr, FILE *));
+#endif
+
+#define mpz_inp_str __gmpz_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_inp_str __GMP_PROTO ((mpz_ptr, FILE *, int));
+#endif
+
+#define mpz_invert __gmpz_invert
+__GMP_DECLSPEC int mpz_invert __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_ior __gmpz_ior
+__GMP_DECLSPEC void mpz_ior __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_jacobi __gmpz_jacobi
+__GMP_DECLSPEC int mpz_jacobi __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_kronecker mpz_jacobi  /* alias */
+
+#define mpz_kronecker_si __gmpz_kronecker_si
+__GMP_DECLSPEC int mpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_kronecker_ui __gmpz_kronecker_ui
+__GMP_DECLSPEC int mpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_si_kronecker __gmpz_si_kronecker
+__GMP_DECLSPEC int mpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_ui_kronecker __gmpz_ui_kronecker
+__GMP_DECLSPEC int mpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_lcm __gmpz_lcm
+__GMP_DECLSPEC void mpz_lcm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_lcm_ui __gmpz_lcm_ui
+__GMP_DECLSPEC void mpz_lcm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+
+#define mpz_legendre mpz_jacobi  /* alias */
+
+#define mpz_lucnum_ui __gmpz_lucnum_ui
+__GMP_DECLSPEC void mpz_lucnum_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_lucnum2_ui __gmpz_lucnum2_ui
+__GMP_DECLSPEC void mpz_lucnum2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+
+#define mpz_millerrabin __gmpz_millerrabin
+__GMP_DECLSPEC int mpz_millerrabin __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_mod __gmpz_mod
+__GMP_DECLSPEC void mpz_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_mod_ui mpz_fdiv_r_ui /* same as fdiv_r because divisor unsigned */
+
+#define mpz_mul __gmpz_mul
+__GMP_DECLSPEC void mpz_mul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_mul_2exp __gmpz_mul_2exp
+__GMP_DECLSPEC void mpz_mul_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_mul_si __gmpz_mul_si
+__GMP_DECLSPEC void mpz_mul_si __GMP_PROTO ((mpz_ptr, mpz_srcptr, long int));
+
+#define mpz_mul_ui __gmpz_mul_ui
+__GMP_DECLSPEC void mpz_mul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_neg __gmpz_neg
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_neg)
+__GMP_DECLSPEC void mpz_neg __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+#endif
+
+#define mpz_nextprime __gmpz_nextprime
+__GMP_DECLSPEC void mpz_nextprime __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_out_raw __gmpz_out_raw
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_out_raw __GMP_PROTO ((FILE *, mpz_srcptr));
+#endif
+
+#define mpz_out_str __gmpz_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpz_out_str __GMP_PROTO ((FILE *, int, mpz_srcptr));
+#endif
+
+#define mpz_perfect_power_p __gmpz_perfect_power_p
+__GMP_DECLSPEC int mpz_perfect_power_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_perfect_square_p __gmpz_perfect_square_p
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_perfect_square_p)
+__GMP_DECLSPEC int mpz_perfect_square_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_popcount __gmpz_popcount
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_popcount)
+__GMP_DECLSPEC mp_bitcnt_t mpz_popcount __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_pow_ui __gmpz_pow_ui
+__GMP_DECLSPEC void mpz_pow_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_powm __gmpz_powm
+__GMP_DECLSPEC void mpz_powm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_powm_sec __gmpz_powm_sec
+__GMP_DECLSPEC void mpz_powm_sec __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_powm_ui __gmpz_powm_ui
+__GMP_DECLSPEC void mpz_powm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
+
+#define mpz_probab_prime_p __gmpz_probab_prime_p
+__GMP_DECLSPEC int mpz_probab_prime_p __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_random __gmpz_random
+__GMP_DECLSPEC void mpz_random __GMP_PROTO ((mpz_ptr, mp_size_t));
+
+#define mpz_random2 __gmpz_random2
+__GMP_DECLSPEC void mpz_random2 __GMP_PROTO ((mpz_ptr, mp_size_t));
+
+#define mpz_realloc2 __gmpz_realloc2
+__GMP_DECLSPEC void mpz_realloc2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_remove __gmpz_remove
+__GMP_DECLSPEC mp_bitcnt_t mpz_remove __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_root __gmpz_root
+__GMP_DECLSPEC int mpz_root __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_rootrem __gmpz_rootrem
+__GMP_DECLSPEC void mpz_rootrem __GMP_PROTO ((mpz_ptr,mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_rrandomb __gmpz_rrandomb
+__GMP_DECLSPEC void mpz_rrandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+
+#define mpz_scan0 __gmpz_scan0
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_scan1 __gmpz_scan1
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_set __gmpz_set
+__GMP_DECLSPEC void mpz_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_set_d __gmpz_set_d
+__GMP_DECLSPEC void mpz_set_d __GMP_PROTO ((mpz_ptr, double));
+
+#define mpz_set_f __gmpz_set_f
+__GMP_DECLSPEC void mpz_set_f __GMP_PROTO ((mpz_ptr, mpf_srcptr));
+
+#define mpz_set_q __gmpz_set_q
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_set_q)
+__GMP_DECLSPEC void mpz_set_q __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+#endif
+
+#define mpz_set_si __gmpz_set_si
+__GMP_DECLSPEC void mpz_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+
+#define mpz_set_str __gmpz_set_str
+__GMP_DECLSPEC int mpz_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+
+#define mpz_set_ui __gmpz_set_ui
+__GMP_DECLSPEC void mpz_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+
+#define mpz_setbit __gmpz_setbit
+__GMP_DECLSPEC void mpz_setbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+
+#define mpz_size __gmpz_size
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_size)
+__GMP_DECLSPEC size_t mpz_size __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpz_sizeinbase __gmpz_sizeinbase
+__GMP_DECLSPEC size_t mpz_sizeinbase __GMP_PROTO ((mpz_srcptr, int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_sqrt __gmpz_sqrt
+__GMP_DECLSPEC void mpz_sqrt __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+#define mpz_sqrtrem __gmpz_sqrtrem
+__GMP_DECLSPEC void mpz_sqrtrem __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
+
+#define mpz_sub __gmpz_sub
+__GMP_DECLSPEC void mpz_sub __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_sub_ui __gmpz_sub_ui
+__GMP_DECLSPEC void mpz_sub_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_ui_sub __gmpz_ui_sub
+__GMP_DECLSPEC void mpz_ui_sub __GMP_PROTO ((mpz_ptr, unsigned long int, mpz_srcptr));
+
+#define mpz_submul __gmpz_submul
+__GMP_DECLSPEC void mpz_submul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_submul_ui __gmpz_submul_ui
+__GMP_DECLSPEC void mpz_submul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_swap __gmpz_swap
+__GMP_DECLSPEC void mpz_swap __GMP_PROTO ((mpz_ptr, mpz_ptr)) __GMP_NOTHROW;
+
+#define mpz_tdiv_ui __gmpz_tdiv_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpz_tdiv_q __gmpz_tdiv_q
+__GMP_DECLSPEC void mpz_tdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
+__GMP_DECLSPEC void mpz_tdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_tdiv_qr __gmpz_tdiv_qr
+__GMP_DECLSPEC void mpz_tdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_tdiv_r __gmpz_tdiv_r
+__GMP_DECLSPEC void mpz_tdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
+__GMP_DECLSPEC void mpz_tdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+
+#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
+__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_tstbit __gmpz_tstbit
+__GMP_DECLSPEC int mpz_tstbit __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpz_ui_pow_ui __gmpz_ui_pow_ui
+__GMP_DECLSPEC void mpz_ui_pow_ui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+
+#define mpz_urandomb __gmpz_urandomb
+__GMP_DECLSPEC void mpz_urandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+
+#define mpz_urandomm __gmpz_urandomm
+__GMP_DECLSPEC void mpz_urandomm __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mpz_srcptr));
+
+#define mpz_xor __gmpz_xor
+#define mpz_eor __gmpz_xor
+__GMP_DECLSPEC void mpz_xor __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+
+/**************** Rational (i.e. Q) routines.  ****************/
+
+#define mpq_abs __gmpq_abs
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_abs)
+__GMP_DECLSPEC void mpq_abs __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+#endif
+
+#define mpq_add __gmpq_add
+__GMP_DECLSPEC void mpq_add __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_canonicalize __gmpq_canonicalize
+__GMP_DECLSPEC void mpq_canonicalize __GMP_PROTO ((mpq_ptr));
+
+#define mpq_clear __gmpq_clear
+__GMP_DECLSPEC void mpq_clear __GMP_PROTO ((mpq_ptr));
+
+#define mpq_clears __gmpq_clears
+__GMP_DECLSPEC void mpq_clears __GMP_PROTO ((mpq_ptr, ...));
+
+#define mpq_cmp __gmpq_cmp
+__GMP_DECLSPEC int mpq_cmp __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define _mpq_cmp_si __gmpq_cmp_si
+__GMP_DECLSPEC int _mpq_cmp_si __GMP_PROTO ((mpq_srcptr, long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+
+#define _mpq_cmp_ui __gmpq_cmp_ui
+__GMP_DECLSPEC int _mpq_cmp_ui __GMP_PROTO ((mpq_srcptr, unsigned long int, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_div __gmpq_div
+__GMP_DECLSPEC void mpq_div __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_div_2exp __gmpq_div_2exp
+__GMP_DECLSPEC void mpq_div_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+
+#define mpq_equal __gmpq_equal
+__GMP_DECLSPEC int mpq_equal __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpq_get_num __gmpq_get_num
+__GMP_DECLSPEC void mpq_get_num __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+
+#define mpq_get_den __gmpq_get_den
+__GMP_DECLSPEC void mpq_get_den __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+
+#define mpq_get_d __gmpq_get_d
+__GMP_DECLSPEC double mpq_get_d __GMP_PROTO ((mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpq_get_str __gmpq_get_str
+__GMP_DECLSPEC char *mpq_get_str __GMP_PROTO ((char *, int, mpq_srcptr));
+
+#define mpq_init __gmpq_init
+__GMP_DECLSPEC void mpq_init __GMP_PROTO ((mpq_ptr));
+
+#define mpq_inits __gmpq_inits
+__GMP_DECLSPEC void mpq_inits __GMP_PROTO ((mpq_ptr, ...));
+
+#define mpq_inp_str __gmpq_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpq_inp_str __GMP_PROTO ((mpq_ptr, FILE *, int));
+#endif
+
+#define mpq_inv __gmpq_inv
+__GMP_DECLSPEC void mpq_inv __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+
+#define mpq_mul __gmpq_mul
+__GMP_DECLSPEC void mpq_mul __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_mul_2exp __gmpq_mul_2exp
+__GMP_DECLSPEC void mpq_mul_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+
+#define mpq_neg __gmpq_neg
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_neg)
+__GMP_DECLSPEC void mpq_neg __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+#endif
+
+#define mpq_out_str __gmpq_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpq_out_str __GMP_PROTO ((FILE *, int, mpq_srcptr));
+#endif
+
+#define mpq_set __gmpq_set
+__GMP_DECLSPEC void mpq_set __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+
+#define mpq_set_d __gmpq_set_d
+__GMP_DECLSPEC void mpq_set_d __GMP_PROTO ((mpq_ptr, double));
+
+#define mpq_set_den __gmpq_set_den
+__GMP_DECLSPEC void mpq_set_den __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+
+#define mpq_set_f __gmpq_set_f
+__GMP_DECLSPEC void mpq_set_f __GMP_PROTO ((mpq_ptr, mpf_srcptr));
+
+#define mpq_set_num __gmpq_set_num
+__GMP_DECLSPEC void mpq_set_num __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+
+#define mpq_set_si __gmpq_set_si
+__GMP_DECLSPEC void mpq_set_si __GMP_PROTO ((mpq_ptr, signed long int, unsigned long int));
+
+#define mpq_set_str __gmpq_set_str
+__GMP_DECLSPEC int mpq_set_str __GMP_PROTO ((mpq_ptr, __gmp_const char *, int));
+
+#define mpq_set_ui __gmpq_set_ui
+__GMP_DECLSPEC void mpq_set_ui __GMP_PROTO ((mpq_ptr, unsigned long int, unsigned long int));
+
+#define mpq_set_z __gmpq_set_z
+__GMP_DECLSPEC void mpq_set_z __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+
+#define mpq_sub __gmpq_sub
+__GMP_DECLSPEC void mpq_sub __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+
+#define mpq_swap __gmpq_swap
+__GMP_DECLSPEC void mpq_swap __GMP_PROTO ((mpq_ptr, mpq_ptr)) __GMP_NOTHROW;
+
+
+/**************** Float (i.e. F) routines.  ****************/
+
+#define mpf_abs __gmpf_abs
+__GMP_DECLSPEC void mpf_abs __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_add __gmpf_add
+__GMP_DECLSPEC void mpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_add_ui __gmpf_add_ui
+__GMP_DECLSPEC void mpf_add_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+#define mpf_ceil __gmpf_ceil
+__GMP_DECLSPEC void mpf_ceil __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_clear __gmpf_clear
+__GMP_DECLSPEC void mpf_clear __GMP_PROTO ((mpf_ptr));
+
+#define mpf_clears __gmpf_clears
+__GMP_DECLSPEC void mpf_clears __GMP_PROTO ((mpf_ptr, ...));
+
+#define mpf_cmp __gmpf_cmp
+__GMP_DECLSPEC int mpf_cmp __GMP_PROTO ((mpf_srcptr, mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_d __gmpf_cmp_d
+__GMP_DECLSPEC int mpf_cmp_d __GMP_PROTO ((mpf_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_si __gmpf_cmp_si
+__GMP_DECLSPEC int mpf_cmp_si __GMP_PROTO ((mpf_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_cmp_ui __gmpf_cmp_ui
+__GMP_DECLSPEC int mpf_cmp_ui __GMP_PROTO ((mpf_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_div __gmpf_div
+__GMP_DECLSPEC void mpf_div __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_div_2exp __gmpf_div_2exp
+__GMP_DECLSPEC void mpf_div_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+
+#define mpf_div_ui __gmpf_div_ui
+__GMP_DECLSPEC void mpf_div_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_dump __gmpf_dump
+__GMP_DECLSPEC void mpf_dump __GMP_PROTO ((mpf_srcptr));
+
+#define mpf_eq __gmpf_eq
+__GMP_DECLSPEC int mpf_eq __GMP_PROTO ((mpf_srcptr, mpf_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_sint_p __gmpf_fits_sint_p
+__GMP_DECLSPEC int mpf_fits_sint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_slong_p __gmpf_fits_slong_p
+__GMP_DECLSPEC int mpf_fits_slong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_sshort_p __gmpf_fits_sshort_p
+__GMP_DECLSPEC int mpf_fits_sshort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_uint_p __gmpf_fits_uint_p
+__GMP_DECLSPEC int mpf_fits_uint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_ulong_p __gmpf_fits_ulong_p
+__GMP_DECLSPEC int mpf_fits_ulong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_fits_ushort_p __gmpf_fits_ushort_p
+__GMP_DECLSPEC int mpf_fits_ushort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_floor __gmpf_floor
+__GMP_DECLSPEC void mpf_floor __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_get_d __gmpf_get_d
+__GMP_DECLSPEC double mpf_get_d __GMP_PROTO ((mpf_srcptr)) __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_d_2exp __gmpf_get_d_2exp
+__GMP_DECLSPEC double mpf_get_d_2exp __GMP_PROTO ((signed long int *, mpf_srcptr));
+
+#define mpf_get_default_prec __gmpf_get_default_prec
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec __GMP_PROTO ((void)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_prec __gmpf_get_prec
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_si __gmpf_get_si
+__GMP_DECLSPEC long mpf_get_si __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_get_str __gmpf_get_str
+__GMP_DECLSPEC char *mpf_get_str __GMP_PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
+
+#define mpf_get_ui __gmpf_get_ui
+__GMP_DECLSPEC unsigned long mpf_get_ui __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_init __gmpf_init
+__GMP_DECLSPEC void mpf_init __GMP_PROTO ((mpf_ptr));
+
+#define mpf_init2 __gmpf_init2
+__GMP_DECLSPEC void mpf_init2 __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+
+#define mpf_inits __gmpf_inits
+__GMP_DECLSPEC void mpf_inits __GMP_PROTO ((mpf_ptr, ...));
+
+#define mpf_init_set __gmpf_init_set
+__GMP_DECLSPEC void mpf_init_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_init_set_d __gmpf_init_set_d
+__GMP_DECLSPEC void mpf_init_set_d __GMP_PROTO ((mpf_ptr, double));
+
+#define mpf_init_set_si __gmpf_init_set_si
+__GMP_DECLSPEC void mpf_init_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+
+#define mpf_init_set_str __gmpf_init_set_str
+__GMP_DECLSPEC int mpf_init_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+
+#define mpf_init_set_ui __gmpf_init_set_ui
+__GMP_DECLSPEC void mpf_init_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+
+#define mpf_inp_str __gmpf_inp_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpf_inp_str __GMP_PROTO ((mpf_ptr, FILE *, int));
+#endif
+
+#define mpf_integer_p __gmpf_integer_p
+__GMP_DECLSPEC int mpf_integer_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_mul __gmpf_mul
+__GMP_DECLSPEC void mpf_mul __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_mul_2exp __gmpf_mul_2exp
+__GMP_DECLSPEC void mpf_mul_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+
+#define mpf_mul_ui __gmpf_mul_ui
+__GMP_DECLSPEC void mpf_mul_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_neg __gmpf_neg
+__GMP_DECLSPEC void mpf_neg __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_out_str __gmpf_out_str
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t mpf_out_str __GMP_PROTO ((FILE *, int, size_t, mpf_srcptr));
+#endif
+
+#define mpf_pow_ui __gmpf_pow_ui
+__GMP_DECLSPEC void mpf_pow_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_random2 __gmpf_random2
+__GMP_DECLSPEC void mpf_random2 __GMP_PROTO ((mpf_ptr, mp_size_t, mp_exp_t));
+
+#define mpf_reldiff __gmpf_reldiff
+__GMP_DECLSPEC void mpf_reldiff __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_set __gmpf_set
+__GMP_DECLSPEC void mpf_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_set_d __gmpf_set_d
+__GMP_DECLSPEC void mpf_set_d __GMP_PROTO ((mpf_ptr, double));
+
+#define mpf_set_default_prec __gmpf_set_default_prec
+__GMP_DECLSPEC void mpf_set_default_prec __GMP_PROTO ((mp_bitcnt_t)) __GMP_NOTHROW;
+
+#define mpf_set_prec __gmpf_set_prec
+__GMP_DECLSPEC void mpf_set_prec __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+
+#define mpf_set_prec_raw __gmpf_set_prec_raw
+__GMP_DECLSPEC void mpf_set_prec_raw __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)) __GMP_NOTHROW;
+
+#define mpf_set_q __gmpf_set_q
+__GMP_DECLSPEC void mpf_set_q __GMP_PROTO ((mpf_ptr, mpq_srcptr));
+
+#define mpf_set_si __gmpf_set_si
+__GMP_DECLSPEC void mpf_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+
+#define mpf_set_str __gmpf_set_str
+__GMP_DECLSPEC int mpf_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+
+#define mpf_set_ui __gmpf_set_ui
+__GMP_DECLSPEC void mpf_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+
+#define mpf_set_z __gmpf_set_z
+__GMP_DECLSPEC void mpf_set_z __GMP_PROTO ((mpf_ptr, mpz_srcptr));
+
+#define mpf_size __gmpf_size
+__GMP_DECLSPEC size_t mpf_size __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpf_sqrt __gmpf_sqrt
+__GMP_DECLSPEC void mpf_sqrt __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_sqrt_ui __gmpf_sqrt_ui
+__GMP_DECLSPEC void mpf_sqrt_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+
+#define mpf_sub __gmpf_sub
+__GMP_DECLSPEC void mpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+#define mpf_sub_ui __gmpf_sub_ui
+__GMP_DECLSPEC void mpf_sub_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+#define mpf_swap __gmpf_swap
+__GMP_DECLSPEC void mpf_swap __GMP_PROTO ((mpf_ptr, mpf_ptr)) __GMP_NOTHROW;
+
+#define mpf_trunc __gmpf_trunc
+__GMP_DECLSPEC void mpf_trunc __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+
+#define mpf_ui_div __gmpf_ui_div
+__GMP_DECLSPEC void mpf_ui_div __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+
+#define mpf_ui_sub __gmpf_ui_sub
+__GMP_DECLSPEC void mpf_ui_sub __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+
+#define mpf_urandomb __gmpf_urandomb
+__GMP_DECLSPEC void mpf_urandomb __GMP_PROTO ((mpf_t, gmp_randstate_t, mp_bitcnt_t));
+
+
+/************ Low level positive-integer (i.e. N) routines.  ************/
+
+/* This is ugly, but we need to make user calls reach the prefixed function. */
+
+#define mpn_add __MPN(add)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add)
+__GMP_DECLSPEC mp_limb_t mpn_add __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+#endif
+
+#define mpn_add_1 __MPN(add_1)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add_1)
+__GMP_DECLSPEC mp_limb_t mpn_add_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+#endif
+
+#define mpn_add_n __MPN(add_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_addmul_1 __MPN(addmul_1)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_cmp __MPN(cmp)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_cmp)
+__GMP_DECLSPEC int mpn_cmp __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+#endif
+
+#define mpn_divexact_by3(dst,src,size) \
+  mpn_divexact_by3c (dst, src, size, __GMP_CAST (mp_limb_t, 0))
+
+#define mpn_divexact_by3c __MPN(divexact_by3c)
+__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_divmod_1(qp,np,nsize,dlimb) \
+  mpn_divrem_1 (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dlimb)
+
+#define mpn_divrem __MPN(divrem)
+__GMP_DECLSPEC mp_limb_t mpn_divrem __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define mpn_divrem_1 __MPN(divrem_1)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_divrem_2 __MPN(divrem_2)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+
+#define mpn_gcd __MPN(gcd)
+__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+
+#define mpn_gcd_1 __MPN(gcd_1)
+__GMP_DECLSPEC mp_limb_t mpn_gcd_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_gcdext_1 __MPN(gcdext_1)
+__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 __GMP_PROTO ((mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t));
+
+#define mpn_gcdext __MPN(gcdext)
+__GMP_DECLSPEC mp_size_t mpn_gcdext __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+
+#define mpn_get_str __MPN(get_str)
+__GMP_DECLSPEC size_t mpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+
+#define mpn_hamdist __MPN(hamdist)
+__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpn_lshift __MPN(lshift)
+__GMP_DECLSPEC mp_limb_t mpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+
+#define mpn_mod_1 __MPN(mod_1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mul __MPN(mul)
+__GMP_DECLSPEC mp_limb_t mpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define mpn_mul_1 __MPN(mul_1)
+__GMP_DECLSPEC mp_limb_t mpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_mul_n __MPN(mul_n)
+__GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_sqr __MPN(sqr)
+__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+#define mpn_neg __MPN(neg)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg)
+__GMP_DECLSPEC mp_limb_t mpn_neg __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+#define mpn_com __MPN(com)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_com)
+__GMP_DECLSPEC void mpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+#define mpn_perfect_square_p __MPN(perfect_square_p)
+__GMP_DECLSPEC int mpn_perfect_square_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_perfect_power_p __MPN(perfect_power_p)
+__GMP_DECLSPEC int mpn_perfect_power_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_popcount __MPN(popcount)
+__GMP_DECLSPEC mp_bitcnt_t mpn_popcount __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+
+#define mpn_pow_1 __MPN(pow_1)
+__GMP_DECLSPEC mp_size_t mpn_pow_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+
+/* undocumented now, but retained here for upward compatibility */
+#define mpn_preinv_mod_1 __MPN(preinv_mod_1)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_random __MPN(random)
+__GMP_DECLSPEC void mpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
+
+#define mpn_random2 __MPN(random2)
+__GMP_DECLSPEC void mpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
+
+#define mpn_rshift __MPN(rshift)
+__GMP_DECLSPEC mp_limb_t mpn_rshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+
+#define mpn_scan0 __MPN(scan0)
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_scan1 __MPN(scan1)
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_set_str __MPN(set_str)
+__GMP_DECLSPEC mp_size_t mpn_set_str __GMP_PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int));
+
+#define mpn_sqrtrem __MPN(sqrtrem)
+__GMP_DECLSPEC mp_size_t mpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+
+#define mpn_sub __MPN(sub)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub)
+__GMP_DECLSPEC mp_limb_t mpn_sub __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+#endif
+
+#define mpn_sub_1 __MPN(sub_1)
+#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub_1)
+__GMP_DECLSPEC mp_limb_t mpn_sub_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+#endif
+
+#define mpn_sub_n __MPN(sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_submul_1 __MPN(submul_1)
+__GMP_DECLSPEC mp_limb_t mpn_submul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_tdiv_qr __MPN(tdiv_qr)
+__GMP_DECLSPEC void mpn_tdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define mpn_and_n __MPN(and_n)
+__GMP_DECLSPEC void mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_andn_n __MPN(andn_n)
+__GMP_DECLSPEC void mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_nand_n __MPN(nand_n)
+__GMP_DECLSPEC void mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_ior_n __MPN(ior_n)
+__GMP_DECLSPEC void mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_iorn_n __MPN(iorn_n)
+__GMP_DECLSPEC void mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_nior_n __MPN(nior_n)
+__GMP_DECLSPEC void mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_xor_n __MPN(xor_n)
+__GMP_DECLSPEC void mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_xnor_n __MPN(xnor_n)
+__GMP_DECLSPEC void mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_copyi __MPN(copyi)
+__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#define mpn_copyd __MPN(copyd)
+__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#define mpn_zero __MPN(zero)
+__GMP_DECLSPEC void mpn_zero __GMP_PROTO ((mp_ptr, mp_size_t));
+
+/**************** mpz inlines ****************/
+
+/* The following are provided as inlines where possible, but always exist as
+   library functions too, for binary compatibility.
+
+   Within gmp itself this inlining generally isn't relied on, since it
+   doesn't get done for all compilers, whereas if something is worth
+   inlining then it's worth arranging always.
+
+   There are two styles of inlining here.  When the same bit of code is
+   wanted for the inline as for the library version, then __GMP_FORCE_foo
+   arranges for that code to be emitted and the __GMP_EXTERN_INLINE
+   directive suppressed, eg. mpz_fits_uint_p.  When a different bit of code
+   is wanted for the inline than for the library version, then
+   __GMP_FORCE_foo arranges the inline to be suppressed, eg. mpz_abs.  */
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_abs)
+__GMP_EXTERN_INLINE void
+mpz_abs (mpz_ptr __gmp_w, mpz_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpz_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_size = __GMP_ABS (__gmp_w->_mp_size);
+}
+#endif
+
+#if GMP_NAIL_BITS == 0
+#define __GMPZ_FITS_UTYPE_P(z,maxval)                                  \
+  mp_size_t  __gmp_n = z->_mp_size;                                    \
+  mp_ptr  __gmp_p = z->_mp_d;                                          \
+  return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval));
+#else
+#define __GMPZ_FITS_UTYPE_P(z,maxval)                                  \
+  mp_size_t  __gmp_n = z->_mp_size;                                    \
+  mp_ptr  __gmp_p = z->_mp_d;                                          \
+  return (__gmp_n == 0 || (__gmp_n == 1 && __gmp_p[0] <= maxval)       \
+         || (__gmp_n == 2 && __gmp_p[1] <= ((mp_limb_t) maxval >> GMP_NUMB_BITS)));
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_uint_p)
+#if ! defined (__GMP_FORCE_mpz_fits_uint_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_uint_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_UINT_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ulong_p)
+#if ! defined (__GMP_FORCE_mpz_fits_ulong_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_ulong_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_ULONG_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_fits_ushort_p)
+#if ! defined (__GMP_FORCE_mpz_fits_ushort_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_fits_ushort_p (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  __GMPZ_FITS_UTYPE_P (__gmp_z, __GMP_USHRT_MAX);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_get_ui)
+#if ! defined (__GMP_FORCE_mpz_get_ui)
+__GMP_EXTERN_INLINE
+#endif
+unsigned long
+mpz_get_ui (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  mp_ptr __gmp_p = __gmp_z->_mp_d;
+  mp_size_t __gmp_n = __gmp_z->_mp_size;
+  mp_limb_t __gmp_l = __gmp_p[0];
+  /* This is a "#if" rather than a plain "if" so as to avoid gcc warnings
+     about "<< GMP_NUMB_BITS" exceeding the type size, and to avoid Borland
+     C++ 6.0 warnings about condition always true for something like
+     "__GMP_ULONG_MAX < GMP_NUMB_MASK".  */
+#if GMP_NAIL_BITS == 0 || defined (_LONG_LONG_LIMB)
+  /* limb==long and no nails, or limb==longlong, one limb is enough */
+  return (__gmp_n != 0 ? __gmp_l : 0);
+#else
+  /* limb==long and nails, need two limbs when available */
+  __gmp_n = __GMP_ABS (__gmp_n);
+  if (__gmp_n <= 1)
+    return (__gmp_n != 0 ? __gmp_l : 0);
+  else
+    return __gmp_l + (__gmp_p[1] << GMP_NUMB_BITS);
+#endif
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_getlimbn)
+#if ! defined (__GMP_FORCE_mpz_getlimbn)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpz_getlimbn (mpz_srcptr __gmp_z, mp_size_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_result = 0;
+  if (__GMP_LIKELY (__gmp_n >= 0 && __gmp_n < __GMP_ABS (__gmp_z->_mp_size)))
+    __gmp_result = __gmp_z->_mp_d[__gmp_n];
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpz_neg)
+__GMP_EXTERN_INLINE void
+mpz_neg (mpz_ptr __gmp_w, mpz_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpz_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_size = - __gmp_w->_mp_size;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_perfect_square_p)
+#if ! defined (__GMP_FORCE_mpz_perfect_square_p)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpz_perfect_square_p (mpz_srcptr __gmp_a)
+{
+  mp_size_t __gmp_asize;
+  int       __gmp_result;
+
+  __gmp_asize = __gmp_a->_mp_size;
+  __gmp_result = (__gmp_asize >= 0);  /* zero is a square, negatives are not */
+  if (__GMP_LIKELY (__gmp_asize > 0))
+    __gmp_result = mpn_perfect_square_p (__gmp_a->_mp_d, __gmp_asize);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_popcount)
+#if ! defined (__GMP_FORCE_mpz_popcount)
+__GMP_EXTERN_INLINE
+#endif
+mp_bitcnt_t
+mpz_popcount (mpz_srcptr __gmp_u) __GMP_NOTHROW
+{
+  mp_size_t      __gmp_usize;
+  mp_bitcnt_t    __gmp_result;
+
+  __gmp_usize = __gmp_u->_mp_size;
+  __gmp_result = (__gmp_usize < 0 ? __GMP_ULONG_MAX : 0);
+  if (__GMP_LIKELY (__gmp_usize > 0))
+    __gmp_result =  mpn_popcount (__gmp_u->_mp_d, __gmp_usize);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_set_q)
+#if ! defined (__GMP_FORCE_mpz_set_q)
+__GMP_EXTERN_INLINE
+#endif
+void
+mpz_set_q (mpz_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  mpz_tdiv_q (__gmp_w, mpq_numref (__gmp_u), mpq_denref (__gmp_u));
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpz_size)
+#if ! defined (__GMP_FORCE_mpz_size)
+__GMP_EXTERN_INLINE
+#endif
+size_t
+mpz_size (mpz_srcptr __gmp_z) __GMP_NOTHROW
+{
+  return __GMP_ABS (__gmp_z->_mp_size);
+}
+#endif
+
+
+/**************** mpq inlines ****************/
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_abs)
+__GMP_EXTERN_INLINE void
+mpq_abs (mpq_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpq_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_num._mp_size = __GMP_ABS (__gmp_w->_mp_num._mp_size);
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) && ! defined (__GMP_FORCE_mpq_neg)
+__GMP_EXTERN_INLINE void
+mpq_neg (mpq_ptr __gmp_w, mpq_srcptr __gmp_u)
+{
+  if (__gmp_w != __gmp_u)
+    mpq_set (__gmp_w, __gmp_u);
+  __gmp_w->_mp_num._mp_size = - __gmp_w->_mp_num._mp_size;
+}
+#endif
+
+
+/**************** mpn inlines ****************/
+
+/* The comments with __GMPN_ADD_1 below apply here too.
+
+   The test for FUNCTION returning 0 should predict well.  If it's assumed
+   {yp,ysize} will usually have a random number of bits then the high limb
+   won't be full and a carry out will occur a good deal less than 50% of the
+   time.
+
+   ysize==0 isn't a documented feature, but is used internally in a few
+   places.
+
+   Producing cout last stops it using up a register during the main part of
+   the calculation, though gcc (as of 3.0) on an "if (mpn_add (...))"
+   doesn't seem able to move the true and false legs of the conditional up
+   to the two places cout is generated.  */
+
+#define __GMPN_AORS(cout, wp, xp, xsize, yp, ysize, FUNCTION, TEST)     \
+  do {                                                                  \
+    mp_size_t  __gmp_i;                                                 \
+    mp_limb_t  __gmp_x;                                                 \
+                                                                        \
+    /* ASSERT ((ysize) >= 0); */                                        \
+    /* ASSERT ((xsize) >= (ysize)); */                                  \
+    /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, xp, xsize)); */      \
+    /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, yp, ysize)); */      \
+                                                                        \
+    __gmp_i = (ysize);                                                  \
+    if (__gmp_i != 0)                                                   \
+      {                                                                 \
+        if (FUNCTION (wp, xp, yp, __gmp_i))                             \
+          {                                                             \
+            do                                                          \
+              {                                                         \
+                if (__gmp_i >= (xsize))                                 \
+                  {                                                     \
+                    (cout) = 1;                                         \
+                    goto __gmp_done;                                    \
+                  }                                                     \
+                __gmp_x = (xp)[__gmp_i];                                \
+              }                                                         \
+            while (TEST);                                               \
+          }                                                             \
+      }                                                                 \
+    if ((wp) != (xp))                                                   \
+      __GMPN_COPY_REST (wp, xp, xsize, __gmp_i);                        \
+    (cout) = 0;                                                         \
+  __gmp_done:                                                           \
+    ;                                                                   \
+  } while (0)
+
+#define __GMPN_ADD(cout, wp, xp, xsize, yp, ysize)              \
+  __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_add_n,       \
+               (((wp)[__gmp_i++] = (__gmp_x + 1) & GMP_NUMB_MASK) == 0))
+#define __GMPN_SUB(cout, wp, xp, xsize, yp, ysize)              \
+  __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_sub_n,       \
+               (((wp)[__gmp_i++] = (__gmp_x - 1) & GMP_NUMB_MASK), __gmp_x == 0))
+
+
+/* The use of __gmp_i indexing is designed to ensure a compile time src==dst
+   remains nice and clear to the compiler, so that __GMPN_COPY_REST can
+   disappear, and the load/add/store gets a chance to become a
+   read-modify-write on CISC CPUs.
+
+   Alternatives:
+
+   Using a pair of pointers instead of indexing would be possible, but gcc
+   isn't able to recognise compile-time src==dst in that case, even when the
+   pointers are incremented more or less together.  Other compilers would
+   very likely have similar difficulty.
+
+   gcc could use "if (__builtin_constant_p(src==dst) && src==dst)" or
+   similar to detect a compile-time src==dst.  This works nicely on gcc
+   2.95.x, it's not good on gcc 3.0 where __builtin_constant_p(p==p) seems
+   to be always false, for a pointer p.  But the current code form seems
+   good enough for src==dst anyway.
+
+   gcc on x86 as usual doesn't give particularly good flags handling for the
+   carry/borrow detection.  It's tempting to want some multi instruction asm
+   blocks to help it, and this was tried, but in truth there's only a few
+   instructions to save and any gain is all too easily lost by register
+   juggling setting up for the asm.  */
+
+#if GMP_NAIL_BITS == 0
+#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB)            \
+  do {                                                         \
+    mp_size_t  __gmp_i;                                                \
+    mp_limb_t  __gmp_x, __gmp_r;                                \
+                                                               \
+    /* ASSERT ((n) >= 1); */                                   \
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */       \
+                                                               \
+    __gmp_x = (src)[0];                                                \
+    __gmp_r = __gmp_x OP (v);                                   \
+    (dst)[0] = __gmp_r;                                                \
+    if (CB (__gmp_r, __gmp_x, (v)))                             \
+      {                                                                \
+       (cout) = 1;                                             \
+       for (__gmp_i = 1; __gmp_i < (n);)                       \
+         {                                                     \
+           __gmp_x = (src)[__gmp_i];                           \
+           __gmp_r = __gmp_x OP 1;                             \
+           (dst)[__gmp_i] = __gmp_r;                           \
+           ++__gmp_i;                                          \
+           if (!CB (__gmp_r, __gmp_x, 1))                      \
+             {                                                 \
+               if ((src) != (dst))                             \
+                 __GMPN_COPY_REST (dst, src, n, __gmp_i);      \
+               (cout) = 0;                                     \
+               break;                                          \
+             }                                                 \
+         }                                                     \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       if ((src) != (dst))                                     \
+         __GMPN_COPY_REST (dst, src, n, 1);                    \
+       (cout) = 0;                                             \
+      }                                                                \
+  } while (0)
+#endif
+
+#if GMP_NAIL_BITS >= 1
+#define __GMPN_AORS_1(cout, dst, src, n, v, OP, CB)            \
+  do {                                                         \
+    mp_size_t  __gmp_i;                                                \
+    mp_limb_t  __gmp_x, __gmp_r;                               \
+                                                               \
+    /* ASSERT ((n) >= 1); */                                   \
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, n)); */       \
+                                                               \
+    __gmp_x = (src)[0];                                                \
+    __gmp_r = __gmp_x OP (v);                                  \
+    (dst)[0] = __gmp_r & GMP_NUMB_MASK;                                \
+    if (__gmp_r >> GMP_NUMB_BITS != 0)                         \
+      {                                                                \
+       (cout) = 1;                                             \
+       for (__gmp_i = 1; __gmp_i < (n);)                       \
+         {                                                     \
+           __gmp_x = (src)[__gmp_i];                           \
+           __gmp_r = __gmp_x OP 1;                             \
+           (dst)[__gmp_i] = __gmp_r & GMP_NUMB_MASK;           \
+           ++__gmp_i;                                          \
+           if (__gmp_r >> GMP_NUMB_BITS == 0)                  \
+             {                                                 \
+               if ((src) != (dst))                             \
+                 __GMPN_COPY_REST (dst, src, n, __gmp_i);      \
+               (cout) = 0;                                     \
+               break;                                          \
+             }                                                 \
+         }                                                     \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       if ((src) != (dst))                                     \
+         __GMPN_COPY_REST (dst, src, n, 1);                    \
+       (cout) = 0;                                             \
+      }                                                                \
+  } while (0)
+#endif
+
+#define __GMPN_ADDCB(r,x,y) ((r) < (y))
+#define __GMPN_SUBCB(r,x,y) ((x) < (y))
+
+#define __GMPN_ADD_1(cout, dst, src, n, v)          \
+  __GMPN_AORS_1(cout, dst, src, n, v, +, __GMPN_ADDCB)
+#define __GMPN_SUB_1(cout, dst, src, n, v)          \
+  __GMPN_AORS_1(cout, dst, src, n, v, -, __GMPN_SUBCB)
+
+
+/* Compare {xp,size} and {yp,size}, setting "result" to positive, zero or
+   negative.  size==0 is allowed.  On random data usually only one limb will
+   need to be examined to get a result, so it's worth having it inline.  */
+#define __GMPN_CMP(result, xp, yp, size)                                \
+  do {                                                                  \
+    mp_size_t  __gmp_i;                                                 \
+    mp_limb_t  __gmp_x, __gmp_y;                                        \
+                                                                        \
+    /* ASSERT ((size) >= 0); */                                         \
+                                                                        \
+    (result) = 0;                                                       \
+    __gmp_i = (size);                                                   \
+    while (--__gmp_i >= 0)                                              \
+      {                                                                 \
+        __gmp_x = (xp)[__gmp_i];                                        \
+        __gmp_y = (yp)[__gmp_i];                                        \
+        if (__gmp_x != __gmp_y)                                         \
+          {                                                             \
+            /* Cannot use __gmp_x - __gmp_y, may overflow an "int" */   \
+            (result) = (__gmp_x > __gmp_y ? 1 : -1);                    \
+            break;                                                      \
+          }                                                             \
+      }                                                                 \
+  } while (0)
+
+
+#if defined (__GMPN_COPY) && ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start)                 \
+  do {                                                          \
+    /* ASSERT ((start) >= 0); */                                \
+    /* ASSERT ((start) <= (size)); */                           \
+    __GMPN_COPY ((dst)+(start), (src)+(start), (size)-(start)); \
+  } while (0)
+#endif
+
+/* Copy {src,size} to {dst,size}, starting at "start".  This is designed to
+   keep the indexing dst[j] and src[j] nice and simple for __GMPN_ADD_1,
+   __GMPN_ADD, etc.  */
+#if ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start)                 \
+  do {                                                          \
+    mp_size_t __gmp_j;                                          \
+    /* ASSERT ((size) >= 0); */                                 \
+    /* ASSERT ((start) >= 0); */                                \
+    /* ASSERT ((start) <= (size)); */                           \
+    /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */     \
+    __GMP_CRAY_Pragma ("_CRI ivdep");                           \
+    for (__gmp_j = (start); __gmp_j < (size); __gmp_j++)        \
+      (dst)[__gmp_j] = (src)[__gmp_j];                          \
+  } while (0)
+#endif
+
+/* Enhancement: Use some of the smarter code from gmp-impl.h.  Maybe use
+   mpn_copyi if there's a native version, and if we don't mind demanding
+   binary compatibility for it (on targets which use it).  */
+
+#if ! defined (__GMPN_COPY)
+#define __GMPN_COPY(dst, src, size)   __GMPN_COPY_REST (dst, src, size, 0)
+#endif
+
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add)
+#if ! defined (__GMP_FORCE_mpn_add)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize)
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_ADD (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_add_1)
+#if ! defined (__GMP_FORCE_mpn_add_1)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_ADD_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_cmp)
+#if ! defined (__GMP_FORCE_mpn_cmp)
+__GMP_EXTERN_INLINE
+#endif
+int
+mpn_cmp (mp_srcptr __gmp_xp, mp_srcptr __gmp_yp, mp_size_t __gmp_size) __GMP_NOTHROW
+{
+  int __gmp_result;
+  __GMPN_CMP (__gmp_result, __gmp_xp, __gmp_yp, __gmp_size);
+  return __gmp_result;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub)
+#if ! defined (__GMP_FORCE_mpn_sub)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_sub (mp_ptr __gmp_wp, mp_srcptr __gmp_xp, mp_size_t __gmp_xsize, mp_srcptr __gmp_yp, mp_size_t __gmp_ysize)
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_SUB (__gmp_c, __gmp_wp, __gmp_xp, __gmp_xsize, __gmp_yp, __gmp_ysize);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_sub_1)
+#if ! defined (__GMP_FORCE_mpn_sub_1)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_sub_1 (mp_ptr __gmp_dst, mp_srcptr __gmp_src, mp_size_t __gmp_size, mp_limb_t __gmp_n) __GMP_NOTHROW
+{
+  mp_limb_t  __gmp_c;
+  __GMPN_SUB_1 (__gmp_c, __gmp_dst, __gmp_src, __gmp_size, __gmp_n);
+  return __gmp_c;
+}
+#endif
+
+#if defined (__GMP_EXTERN_INLINE) || defined (__GMP_FORCE_mpn_neg)
+#if ! defined (__GMP_FORCE_mpn_neg)
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_neg (mp_ptr __gmp_rp, mp_srcptr __gmp_up, mp_size_t __gmp_n)
+{
+  mp_limb_t __gmp_ul, __gmp_cy;
+  __gmp_cy = 0;
+  do {
+      __gmp_ul = *__gmp_up++;
+      *__gmp_rp++ = -__gmp_ul - __gmp_cy;
+      __gmp_cy |= __gmp_ul != 0;
+  } while (--__gmp_n != 0);
+  return __gmp_cy;
+}
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* Allow faster testing for negative, zero, and positive.  */
+#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0)
+#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0)
+#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0)
+
+/* When using GCC, optimize certain common comparisons.  */
+#if defined (__GNUC__) && __GNUC__ >= 2
+#define mpz_cmp_ui(Z,UI) \
+  (__builtin_constant_p (UI) && (UI) == 0                              \
+   ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI))
+#define mpz_cmp_si(Z,SI) \
+  (__builtin_constant_p (SI) && (SI) == 0 ? mpz_sgn (Z)                        \
+   : __builtin_constant_p (SI) && (SI) > 0                             \
+    ? _mpz_cmp_ui (Z, __GMP_CAST (unsigned long int, SI))              \
+   : _mpz_cmp_si (Z,SI))
+#define mpq_cmp_ui(Q,NUI,DUI) \
+  (__builtin_constant_p (NUI) && (NUI) == 0                            \
+   ? mpq_sgn (Q) : _mpq_cmp_ui (Q,NUI,DUI))
+#define mpq_cmp_si(q,n,d)                       \
+  (__builtin_constant_p ((n) >= 0) && (n) >= 0  \
+   ? mpq_cmp_ui (q, __GMP_CAST (unsigned long, n), d) \
+   : _mpq_cmp_si (q, n, d))
+#else
+#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI)
+#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI)
+#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI)
+#define mpq_cmp_si(q,n,d)  _mpq_cmp_si(q,n,d)
+#endif
+
+
+/* Using "&" rather than "&&" means these can come out branch-free.  Every
+   mpz_t has at least one limb allocated, so fetching the low limb is always
+   allowed.  */
+#define mpz_odd_p(z)   (((z)->_mp_size != 0) & __GMP_CAST (int, (z)->_mp_d[0]))
+#define mpz_even_p(z)  (! mpz_odd_p (z))
+
+
+/**************** C++ routines ****************/
+
+#ifdef __cplusplus
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpz_srcptr);
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpq_srcptr);
+__GMP_DECLSPEC_XX std::ostream& operator<< (std::ostream &, mpf_srcptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpz_ptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpq_ptr);
+__GMP_DECLSPEC_XX std::istream& operator>> (std::istream &, mpf_ptr);
+#endif
+
+
+/* Source-level compatibility with GMP 2 and earlier. */
+#define mpn_divmod(qp,np,nsize,dp,dsize) \
+  mpn_divrem (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dp, dsize)
+
+/* Source-level compatibility with GMP 1.  */
+#define mpz_mdiv       mpz_fdiv_q
+#define mpz_mdivmod    mpz_fdiv_qr
+#define mpz_mmod       mpz_fdiv_r
+#define mpz_mdiv_ui    mpz_fdiv_q_ui
+#define mpz_mdivmod_ui(q,r,n,d) \
+  (((r) == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d))
+#define mpz_mmod_ui(r,n,d) \
+  (((r) == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d))
+
+/* Useful synonyms, but not quite compatible with GMP 1.  */
+#define mpz_div                mpz_fdiv_q
+#define mpz_divmod     mpz_fdiv_qr
+#define mpz_div_ui     mpz_fdiv_q_ui
+#define mpz_divmod_ui  mpz_fdiv_qr_ui
+#define mpz_div_2exp   mpz_fdiv_q_2exp
+#define mpz_mod_2exp   mpz_fdiv_r_2exp
+
+enum
+{
+  GMP_ERROR_NONE = 0,
+  GMP_ERROR_UNSUPPORTED_ARGUMENT = 1,
+  GMP_ERROR_DIVISION_BY_ZERO = 2,
+  GMP_ERROR_SQRT_OF_NEGATIVE = 4,
+  GMP_ERROR_INVALID_ARGUMENT = 8
+};
+
+/* Define CC and CFLAGS which were used to build this version of GMP */
+#define __GMP_CC "@CC@"
+#define __GMP_CFLAGS "@CFLAGS@"
+
+/* Major version number is the value of __GNU_MP__ too, above and in mp.h. */
+#define __GNU_MP_VERSION 5
+#define __GNU_MP_VERSION_MINOR 0
+#define __GNU_MP_VERSION_PATCHLEVEL 5
+#define __GNU_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL)
+
+#define __GMP_H__
+#endif /* __GMP_H__ */
diff --git a/gmp-impl.h b/gmp-impl.h

new file mode 100644 (file)

index 0000000..b424f9d
--- /dev/null
+++ b/gmp-impl.h
@@ -0,0 +1,4667 @@
+/* Include file for internal GNU MP types and definitions.
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
+2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* __GMP_DECLSPEC must be given on any global data that will be accessed
+   from outside libgmp, meaning from the test or development programs, or
+   from libgmpxx.  Failing to do this will result in an incorrect address
+   being used for the accesses.  On functions __GMP_DECLSPEC makes calls
+   from outside libgmp more efficient, but they'll still work fine without
+   it.  */
+
+
+#ifndef __GMP_IMPL_H__
+#define __GMP_IMPL_H__
+
+#if defined _CRAY
+#include <intrinsics.h>  /* for _popcnt */
+#endif
+
+/* limits.h is not used in general, since it's an ANSI-ism, and since on
+   solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
+   values (the ABI=64 values).
+
+   On Cray vector systems, however, we need the system limits.h since sizes
+   of signed and unsigned types can differ there, depending on compiler
+   options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
+   reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
+   short can be 24, 32, 46 or 64 bits, and different for ushort.  */
+
+#if defined _CRAY
+#include <limits.h>
+#endif
+
+/* For fat.h and other fat binary stuff.
+   No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
+   declared this way are only used to set function pointers in __gmp_cpuvec,
+   they're not called directly.  */
+#define DECL_add_n(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
+#define DECL_addmul_1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+#define DECL_copyd(name) \
+  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
+#define DECL_copyi(name) \
+  DECL_copyd (name)
+#define DECL_divexact_1(name) \
+  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+#define DECL_divexact_by3c(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+#define DECL_divrem_1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
+#define DECL_gcd_1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
+#define DECL_lshift(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
+#define DECL_mod_1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
+#define DECL_mod_34lsub1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
+#define DECL_modexact_1c_odd(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
+#define DECL_mul_1(name) \
+  DECL_addmul_1 (name)
+#define DECL_mul_basecase(name) \
+  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
+#define DECL_preinv_divrem_1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
+#define DECL_preinv_mod_1(name) \
+  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
+#define DECL_rshift(name) \
+  DECL_lshift (name)
+#define DECL_sqr_basecase(name) \
+  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
+#define DECL_sub_n(name) \
+  DECL_add_n (name)
+#define DECL_submul_1(name) \
+  DECL_addmul_1 (name)
+
+#if ! __GMP_WITHIN_CONFIGURE
+#include "config.h"
+#include "gmp-mparam.h"
+#include "fib_table.h"
+#include "mp_bases.h"
+#if WANT_FAT_BINARY
+#include "fat.h"
+#endif
+#endif
+
+#if HAVE_INTTYPES_H      /* for uint_least32_t */
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+
+#ifdef __cplusplus
+#include <cstring>  /* for strlen */
+#include <string>   /* for std::string */
+#endif
+
+
+#ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
+#define WANT_TMP_DEBUG 0
+#endif
+
+/* The following tries to get a good version of alloca.  The tests are
+   adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
+   Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
+   be setup appropriately.
+
+   ifndef alloca - a cpp define might already exist.
+       glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
+       HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
+
+   GCC __builtin_alloca - preferred whenever available.
+
+   _AIX pragma - IBM compilers need a #pragma in "each module that needs to
+       use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
+       used in past versions of GMP, retained still in case it matters.
+
+       The autoconf manual says this pragma needs to be at the start of a C
+       file, apart from comments and preprocessor directives.  Is that true?
+       xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
+       from gmp.h.
+*/
+
+#ifndef alloca
+# ifdef __GNUC__
+#  define alloca __builtin_alloca
+# else
+#  ifdef __DECC
+#   define alloca(x) __ALLOCA(x)
+#  else
+#   ifdef _MSC_VER
+#    include <malloc.h>
+#    define alloca _alloca
+#   else
+#    if HAVE_ALLOCA_H
+#     include <alloca.h>
+#    else
+#     if defined (_AIX) || defined (_IBMR2)
+ #pragma alloca
+#     else
+       char *alloca ();
+#     endif
+#    endif
+#   endif
+#  endif
+# endif
+#endif
+
+
+/* if not provided by gmp-mparam.h */
+#ifndef BYTES_PER_MP_LIMB
+#define BYTES_PER_MP_LIMB  SIZEOF_MP_LIMB_T
+#endif
+#define GMP_LIMB_BYTES  BYTES_PER_MP_LIMB
+#ifndef GMP_LIMB_BITS
+#define GMP_LIMB_BITS  (8 * SIZEOF_MP_LIMB_T)
+#endif
+
+#define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
+
+
+/* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
+#if HAVE_UINT_LEAST32_T
+typedef uint_least32_t      gmp_uint_least32_t;
+#else
+#if SIZEOF_UNSIGNED_SHORT >= 4
+typedef unsigned short      gmp_uint_least32_t;
+#else
+#if SIZEOF_UNSIGNED >= 4
+typedef unsigned            gmp_uint_least32_t;
+#else
+typedef unsigned long       gmp_uint_least32_t;
+#endif
+#endif
+#endif
+
+
+/* gmp_intptr_t, for pointer to integer casts */
+#if HAVE_INTPTR_T
+typedef intptr_t            gmp_intptr_t;
+#else /* fallback */
+typedef size_t              gmp_intptr_t;
+#endif
+
+
+/* pre-inverse types for truncating division and modulo */
+typedef struct {mp_limb_t inv32;} gmp_pi1_t;
+typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
+
+
+/* const and signed must match __gmp_const and __gmp_signed, so follow the
+   decision made for those in gmp.h.    */
+#if ! __GMP_HAVE_CONST
+#define const   /* empty */
+#define signed  /* empty */
+#endif
+
+/* "const" basically means a function does nothing but examine its arguments
+   and give a return value, it doesn't read or write any memory (neither
+   global nor pointed to by arguments), and has no other side-effects.  This
+   is more restrictive than "pure".  See info node "(gcc)Function
+   Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
+   this off when trying to write timing loops.  */
+#if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
+#define ATTRIBUTE_CONST  __attribute__ ((const))
+#else
+#define ATTRIBUTE_CONST
+#endif
+
+#if HAVE_ATTRIBUTE_NORETURN
+#define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
+#else
+#define ATTRIBUTE_NORETURN
+#endif
+
+/* "malloc" means a function behaves like malloc in that the pointer it
+   returns doesn't alias anything.  */
+#if HAVE_ATTRIBUTE_MALLOC
+#define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
+#else
+#define ATTRIBUTE_MALLOC
+#endif
+
+
+#if ! HAVE_STRCHR
+#define strchr(s,c)  index(s,c)
+#endif
+
+#if ! HAVE_MEMSET
+#define memset(p, c, n)                 \
+  do {                                  \
+    ASSERT ((n) >= 0);                  \
+    char *__memset__p = (p);            \
+    int  __i;                           \
+    for (__i = 0; __i < (n); __i++)     \
+      __memset__p[__i] = (c);           \
+  } while (0)
+#endif
+
+/* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
+   mode.  Falling back to a memcpy will give maximum portability, since it
+   works no matter whether va_list is a pointer, struct or array.  */
+#if ! defined (va_copy) && defined (__va_copy)
+#define va_copy(dst,src)  __va_copy(dst,src)
+#endif
+#if ! defined (va_copy)
+#define va_copy(dst,src) \
+  do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
+#endif
+
+
+/* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
+   (ie. ctlz, ctpop, cttz).  */
+#if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
+  || HAVE_HOST_CPU_alphaev7
+#define HAVE_HOST_CPU_alpha_CIX 1
+#endif
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* Usage: TMP_DECL;
+          TMP_MARK;
+          ptr = TMP_ALLOC (bytes);
+          TMP_FREE;
+
+   Small allocations should use TMP_SALLOC, big allocations should use
+   TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
+
+   Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
+   TMP_SFREE.
+
+   TMP_DECL just declares a variable, but might be empty and so must be last
+   in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
+   TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
+   TMP_MARK was made, but then no TMP_ALLOCs.  */
+
+/* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
+   __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC
+   isn't used for "double"s, so that's not in the union.  */
+union tmp_align_t {
+  mp_limb_t  l;
+  char       *p;
+};
+#define __TMP_ALIGN  sizeof (union tmp_align_t)
+
+/* Return "a" rounded upwards to a multiple of "m", if it isn't already.
+   "a" must be an unsigned type.
+   This is designed for use with a compile-time constant "m".
+   The POW2 case is expected to be usual, and gcc 3.0 and up recognises
+   "(-(8*n))%8" or the like is always zero, which means the rounding up in
+   the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
+#define ROUND_UP_MULTIPLE(a,m)          \
+  (POW2_P(m) ? (a) + (-(a))%(m)         \
+   : (a)+(m)-1 - (((a)+(m)-1) % (m)))
+
+#if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
+struct tmp_reentrant_t {
+  struct tmp_reentrant_t  *next;
+  size_t                 size;   /* bytes, including header */
+};
+__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void  __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
+#endif
+
+#if WANT_TMP_ALLOCA
+#define TMP_SDECL
+#define TMP_DECL               struct tmp_reentrant_t *__tmp_marker
+#define TMP_SMARK
+#define TMP_MARK               __tmp_marker = 0
+#define TMP_SALLOC(n)          alloca(n)
+#define TMP_BALLOC(n)          __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
+#define TMP_ALLOC(n)                                                   \
+  (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
+#define TMP_SFREE
+#define TMP_FREE                                                          \
+  do {                                                                    \
+    if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
+  } while (0)
+#endif
+
+#if WANT_TMP_REENTRANT
+#define TMP_SDECL              TMP_DECL
+#define TMP_DECL               struct tmp_reentrant_t *__tmp_marker
+#define TMP_SMARK              TMP_MARK
+#define TMP_MARK               __tmp_marker = 0
+#define TMP_SALLOC(n)          TMP_ALLOC(n)
+#define TMP_BALLOC(n)          TMP_ALLOC(n)
+#define TMP_ALLOC(n)           __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
+#define TMP_SFREE              TMP_FREE
+#define TMP_FREE               __gmp_tmp_reentrant_free (__tmp_marker)
+#endif
+
+#if WANT_TMP_NOTREENTRANT
+struct tmp_marker
+{
+  struct tmp_stack *which_chunk;
+  void *alloc_point;
+};
+__GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
+__GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
+#define TMP_SDECL              TMP_DECL
+#define TMP_DECL               struct tmp_marker __tmp_marker
+#define TMP_SMARK              TMP_MARK
+#define TMP_MARK               __gmp_tmp_mark (&__tmp_marker)
+#define TMP_SALLOC(n)          TMP_ALLOC(n)
+#define TMP_BALLOC(n)          TMP_ALLOC(n)
+#define TMP_ALLOC(n)                                                   \
+  __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
+#define TMP_SFREE              TMP_FREE
+#define TMP_FREE               __gmp_tmp_free (&__tmp_marker)
+#endif
+
+#if WANT_TMP_DEBUG
+/* See tal-debug.c for some comments. */
+struct tmp_debug_t {
+  struct tmp_debug_entry_t  *list;
+  const char                *file;
+  int                       line;
+};
+struct tmp_debug_entry_t {
+  struct tmp_debug_entry_t  *next;
+  char                      *block;
+  size_t                    size;
+};
+__GMP_DECLSPEC void  __gmp_tmp_debug_mark  __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
+                                                        struct tmp_debug_t *,
+                                                        const char *, const char *));
+__GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
+                                                        struct tmp_debug_t **, const char *,
+                                                        size_t)) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
+                                                        struct tmp_debug_t **,
+                                                        const char *, const char *));
+#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
+#define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
+/* The marker variable is designed to provoke an uninitialized variable
+   warning from the compiler if TMP_FREE is used without a TMP_MARK.
+   __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
+   these things up too.  */
+#define TMP_DECL_NAME(marker, marker_name)                      \
+  int marker;                                                   \
+  int __tmp_marker_inscope;                                     \
+  const char *__tmp_marker_name = marker_name;                  \
+  struct tmp_debug_t  __tmp_marker_struct;                      \
+  /* don't demand NULL, just cast a zero */                     \
+  struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
+#define TMP_MARK_NAME(marker, marker_name)                      \
+  do {                                                          \
+    marker = 1;                                                 \
+    __tmp_marker_inscope = 1;                                   \
+    __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
+                           &__tmp_marker, &__tmp_marker_struct, \
+                           __tmp_marker_name, marker_name);     \
+  } while (0)
+#define TMP_SALLOC(n)          TMP_ALLOC(n)
+#define TMP_BALLOC(n)          TMP_ALLOC(n)
+#define TMP_ALLOC(size)                                                 \
+  __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
+                         __tmp_marker_inscope,                          \
+                         &__tmp_marker, __tmp_marker_name, size)
+#define TMP_FREE_NAME(marker, marker_name)                      \
+  do {                                                          \
+    __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
+                           marker, &__tmp_marker,               \
+                           __tmp_marker_name, marker_name);     \
+  } while (0)
+#endif /* WANT_TMP_DEBUG */
+
+
+/* Allocating various types. */
+#define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
+#define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
+#define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
+#define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
+#define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
+#define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
+#define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
+#define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
+#define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
+
+/* It's more efficient to allocate one block than two.  This is certainly
+   true of the malloc methods, but it can even be true of alloca if that
+   involves copying a chunk of stack (various RISCs), or a call to a stack
+   bounds check (mingw).  In any case, when debugging keep separate blocks
+   so a redzoning malloc debugger can protect each individually.  */
+#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \
+  do {                                                  \
+    if (WANT_TMP_DEBUG)                                 \
+      {                                                 \
+        (xp) = TMP_ALLOC_LIMBS (xsize);                 \
+        (yp) = TMP_ALLOC_LIMBS (ysize);                 \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \
+        (yp) = (xp) + (xsize);                          \
+      }                                                 \
+  } while (0)
+
+
+/* From gmp.h, nicer names for internal use. */
+#define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
+#define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
+#define LIKELY(cond)                   __GMP_LIKELY(cond)
+#define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
+
+#define ABS(x) ((x) >= 0 ? (x) : -(x))
+#define ABS_CAST(T,x) ((x) >= 0 ? (T)(x) : -((T)((x) + 1) - 1))
+#undef MIN
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#undef MAX
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+
+/* Field access macros.  */
+#define SIZ(x) ((x)->_mp_size)
+#define ABSIZ(x) ABS (SIZ (x))
+#define PTR(x) ((x)->_mp_d)
+#define LIMBS(x) ((x)->_mp_d)
+#define EXP(x) ((x)->_mp_exp)
+#define PREC(x) ((x)->_mp_prec)
+#define ALLOC(x) ((x)->_mp_alloc)
+
+/* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
+   then that lowest one bit must have been the only bit set.  n==0 will
+   return true though, so avoid that.  */
+#define POW2_P(n)  (((n) & ((n) - 1)) == 0)
+
+
+/* The "short" defines are a bit different because shorts are promoted to
+   ints by ~ or >> etc.
+
+   #ifndef's are used since on some systems (HP?) header files other than
+   limits.h setup these defines.  We could forcibly #undef in that case, but
+   there seems no need to worry about that.  */
+
+#ifndef ULONG_MAX
+#define ULONG_MAX   __GMP_ULONG_MAX
+#endif
+#ifndef UINT_MAX
+#define UINT_MAX    __GMP_UINT_MAX
+#endif
+#ifndef USHRT_MAX
+#define USHRT_MAX   __GMP_USHRT_MAX
+#endif
+#define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
+
+/* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
+   unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
+   treats the plain decimal values in <limits.h> as signed.  */
+#define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
+#define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
+#define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
+#define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
+
+#ifndef LONG_MIN
+#define LONG_MIN           ((long) ULONG_HIGHBIT)
+#endif
+#ifndef LONG_MAX
+#define LONG_MAX           (-(LONG_MIN+1))
+#endif
+
+#ifndef INT_MIN
+#define INT_MIN            ((int) UINT_HIGHBIT)
+#endif
+#ifndef INT_MAX
+#define INT_MAX            (-(INT_MIN+1))
+#endif
+
+#ifndef SHRT_MIN
+#define SHRT_MIN           ((short) USHRT_HIGHBIT)
+#endif
+#ifndef SHRT_MAX
+#define SHRT_MAX           ((short) (-(SHRT_MIN+1)))
+#endif
+
+#if __GMP_MP_SIZE_T_INT
+#define MP_SIZE_T_MAX      INT_MAX
+#define MP_SIZE_T_MIN      INT_MIN
+#else
+#define MP_SIZE_T_MAX      LONG_MAX
+#define MP_SIZE_T_MIN      LONG_MIN
+#endif
+
+/* mp_exp_t is the same as mp_size_t */
+#define MP_EXP_T_MAX   MP_SIZE_T_MAX
+#define MP_EXP_T_MIN   MP_SIZE_T_MIN
+
+#define LONG_HIGHBIT       LONG_MIN
+#define INT_HIGHBIT        INT_MIN
+#define SHRT_HIGHBIT       SHRT_MIN
+
+
+#define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
+
+#if GMP_NAIL_BITS == 0
+#define GMP_NAIL_LOWBIT   CNST_LIMB(0)
+#else
+#define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
+#endif
+
+#if GMP_NAIL_BITS != 0
+/* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
+   code that has not yet been qualified.  */
+
+#undef  DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD              50
+
+#undef DIVREM_1_NORM_THRESHOLD
+#undef DIVREM_1_UNNORM_THRESHOLD
+#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
+#undef USE_PREINV_DIVREM_1
+#undef DIVREM_2_THRESHOLD
+#undef DIVEXACT_1_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
+#define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
+#define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
+#define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
+#define USE_PREINV_DIVREM_1               0  /* no preinv */
+#define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
+
+/* mpn/generic/mul_fft.c is not nails-capable. */
+#undef  MUL_FFT_THRESHOLD
+#undef  SQR_FFT_THRESHOLD
+#define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
+#define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
+#endif
+
+/* Swap macros. */
+
+#define MP_LIMB_T_SWAP(x, y)                    \
+  do {                                          \
+    mp_limb_t __mp_limb_t_swap__tmp = (x);      \
+    (x) = (y);                                  \
+    (y) = __mp_limb_t_swap__tmp;                \
+  } while (0)
+#define MP_SIZE_T_SWAP(x, y)                    \
+  do {                                          \
+    mp_size_t __mp_size_t_swap__tmp = (x);      \
+    (x) = (y);                                  \
+    (y) = __mp_size_t_swap__tmp;                \
+  } while (0)
+
+#define MP_PTR_SWAP(x, y)               \
+  do {                                  \
+    mp_ptr __mp_ptr_swap__tmp = (x);    \
+    (x) = (y);                          \
+    (y) = __mp_ptr_swap__tmp;           \
+  } while (0)
+#define MP_SRCPTR_SWAP(x, y)                    \
+  do {                                          \
+    mp_srcptr __mp_srcptr_swap__tmp = (x);      \
+    (x) = (y);                                  \
+    (y) = __mp_srcptr_swap__tmp;                \
+  } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys)      \
+  do {                                  \
+    MP_PTR_SWAP (xp, yp);               \
+    MP_SIZE_T_SWAP (xs, ys);            \
+  } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
+  do {                                  \
+    MP_SRCPTR_SWAP (xp, yp);            \
+    MP_SIZE_T_SWAP (xs, ys);            \
+  } while(0)
+
+#define MPZ_PTR_SWAP(x, y)              \
+  do {                                  \
+    mpz_ptr __mpz_ptr_swap__tmp = (x);  \
+    (x) = (y);                          \
+    (y) = __mpz_ptr_swap__tmp;          \
+  } while (0)
+#define MPZ_SRCPTR_SWAP(x, y)                   \
+  do {                                          \
+    mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
+    (x) = (y);                                  \
+    (y) = __mpz_srcptr_swap__tmp;               \
+  } while (0)
+
+
+/* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
+   but current gcc (3.0) doesn't seem to support that.  */
+__GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
+__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
+__GMP_DECLSPEC extern void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
+
+__GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t));
+__GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
+__GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
+
+#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
+  ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
+#define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
+
+#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
+  ((type *) (*__gmp_reallocate_func)                            \
+   (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
+#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
+  __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
+
+#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
+#define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
+
+#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
+  do {                                                          \
+    if ((oldsize) != (newsize))                                 \
+      (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
+  } while (0)
+
+#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
+  do {                                                                  \
+    if ((oldsize) != (newsize))                                         \
+      (ptr) = (type *) (*__gmp_reallocate_func)                         \
+        (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
+  } while (0)
+
+
+/* Dummy for non-gcc, code involving it will go dead. */
+#if ! defined (__GNUC__) || __GNUC__ < 2
+#define __builtin_constant_p(x)   0
+#endif
+
+
+/* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
+   stack usage is compatible.  __attribute__ ((regparm (N))) helps by
+   putting leading parameters in registers, avoiding extra stack.
+
+   regparm cannot be used with calls going through the PLT, because the
+   binding code there may clobber the registers (%eax, %edx, %ecx) used for
+   the regparm parameters.  Calls to local (ie. static) functions could
+   still use this, if we cared to differentiate locals and globals.
+
+   On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
+   -p or -pg profiling, since that version of gcc doesn't realize the
+   .mcount calls will clobber the parameter registers.  Other systems are
+   ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
+   bother to try to detect this.  regparm is only an optimization so we just
+   disable it when profiling (profiling being a slowdown anyway).  */
+
+#if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
+  && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
+#define USE_LEADING_REGPARM 1
+#else
+#define USE_LEADING_REGPARM 0
+#endif
+
+/* Macros for altering parameter order according to regparm usage. */
+#if USE_LEADING_REGPARM
+#define REGPARM_2_1(a,b,x)    x,a,b
+#define REGPARM_3_1(a,b,c,x)  x,a,b,c
+#define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
+#else
+#define REGPARM_2_1(a,b,x)    a,b,x
+#define REGPARM_3_1(a,b,c,x)  a,b,c,x
+#define REGPARM_ATTR(n)
+#endif
+
+
+/* ASM_L gives a local label for a gcc asm block, for use when temporary
+   local labels like "1:" might not be available, which is the case for
+   instance on the x86s (the SCO assembler doesn't support them).
+
+   The label generated is made unique by including "%=" which is a unique
+   number for each insn.  This ensures the same name can be used in multiple
+   asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
+   allowed there's no need for a label to be usable outside a single
+   block.  */
+
+#define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
+
+
+#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
+#if 0
+/* FIXME: Check that these actually improve things.
+   FIXME: Need a cld after each std.
+   FIXME: Can't have inputs in clobbered registers, must describe them as
+   dummy outputs, and add volatile. */
+#define MPN_COPY_INCR(DST, SRC, N)                                     \
+  __asm__ ("cld\n\trep\n\tmovsl" : :                                   \
+          "D" (DST), "S" (SRC), "c" (N) :                              \
+          "cx", "di", "si", "memory")
+#define MPN_COPY_DECR(DST, SRC, N)                                     \
+  __asm__ ("std\n\trep\n\tmovsl" : :                                   \
+          "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
+          "cx", "di", "si", "memory")
+#endif
+#endif
+
+
+__GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
+#define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
+
+#define mpz_n_pow_ui __gmpz_n_pow_ui
+__GMP_DECLSPEC void    mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
+
+
+#define mpn_addmul_1c __MPN(addmul_1c)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_addmul_2 __MPN(addmul_2)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_addmul_3 __MPN(addmul_3)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_addmul_4 __MPN(addmul_4)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_addmul_5 __MPN(addmul_5)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_addmul_6 __MPN(addmul_6)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_addmul_7 __MPN(addmul_7)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_addmul_8 __MPN(addmul_8)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+/* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
+   returns the carry out (0, 1 or 2).  */
+#define mpn_addlsh1_n __MPN(addlsh1_n)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+/* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
+   returns the carry out (0, ..., 4).  */
+#define mpn_addlsh2_n __MPN(addlsh2_n)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+/* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
+   returns the carry out (0, ..., 2^k).  */
+#define mpn_addlsh_n __MPN(addlsh_n)
+  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+
+/* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
+   returns the borrow out (0, 1 or 2).  */
+#define mpn_sublsh1_n __MPN(sublsh1_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+/* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
+   returns the carry out (-1, 0, 1).  */
+#define mpn_rsblsh1_n __MPN(rsblsh1_n)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+/* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
+   returns the borrow out (FIXME 0, 1, 2 or 3).  */
+#define mpn_sublsh2_n __MPN(sublsh2_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+/* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
+   returns the carry out (-1, ..., 3).  */
+#define mpn_rsblsh2_n __MPN(rsblsh2_n)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+/* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
+   returns the carry out (-1, 0, ..., 2^k-1).  */
+#define mpn_rsblsh_n __MPN(rsblsh_n)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+
+/* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
+   and returns the bit rshifted out (0 or 1).  */
+#define mpn_rsh1add_n __MPN(rsh1add_n)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_rsh1add_nc __MPN(rsh1add_nc)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+/* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
+   and returns the bit rshifted out (0 or 1).  If there's a borrow from the
+   subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
+   complement negative.  */
+#define mpn_rsh1sub_n __MPN(rsh1sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_lshiftc __MPN(lshiftc)
+__GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+
+#define mpn_add_n_sub_n __MPN(add_n_sub_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
+__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_divrem_1c __MPN(divrem_1c)
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_dump __MPN(dump)
+__GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
+
+#define mpn_fib2_ui __MPN(fib2_ui)
+__GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
+
+/* Remap names of internal mpn functions.  */
+#define __clz_tab               __MPN(clz_tab)
+#define mpn_udiv_w_sdiv                __MPN(udiv_w_sdiv)
+
+#define mpn_jacobi_base __MPN(jacobi_base)
+__GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
+
+#define mpn_mod_1c __MPN(mod_1c)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mul_1c __MPN(mul_1c)
+__GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_mul_2 __MPN(mul_2)
+__GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_mul_3 __MPN(mul_3)
+__GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_mul_4 __MPN(mul_4)
+__GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
+#define mpn_mul_basecase __MPN(mul_basecase)
+__GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+#endif
+
+#define mpn_mullo_n __MPN(mullo_n)
+__GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_mullo_basecase __MPN(mullo_basecase)
+__GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_sqr __MPN(sqr)
+__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+#ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
+#define mpn_sqr_basecase __MPN(sqr_basecase)
+__GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+#define mpn_submul_1c __MPN(submul_1c)
+__GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_redc_1 __MPN(redc_1)
+__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_redc_2 __MPN(redc_2)
+__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+#define mpn_redc_n __MPN(redc_n)
+__GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+
+#define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
+__GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
+#define mpn_mod_1_1p __MPN(mod_1_1p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
+__GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
+#define mpn_mod_1s_2p __MPN(mod_1s_2p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
+__GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
+#define mpn_mod_1s_3p __MPN(mod_1s_3p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
+__GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
+#define mpn_mod_1s_4p __MPN(mod_1s_4p)
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE;
+
+#define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
+__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
+__GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
+__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+static inline mp_size_t
+mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
+  mp_size_t n, itch;
+  n = rn >> 1;
+  itch = rn + 4 +
+    (an > n ? (bn > n ? rn : n) : 0);
+  return itch;
+}
+
+#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
+__GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
+__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+static inline mp_size_t
+mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
+  mp_size_t n, itch;
+  n = rn >> 1;
+  itch = rn + 3 +
+    (an > n ? an : 0);
+  return itch;
+}
+
+typedef __gmp_randstate_struct *gmp_randstate_ptr;
+typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
+
+/* Pseudo-random number generator function pointers structure.  */
+typedef struct {
+  void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
+  void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
+  void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
+  void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
+} gmp_randfnptr_t;
+
+/* Macro to obtain a void pointer to the function pointers structure.  */
+#define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
+
+/* Macro to obtain a pointer to the generator's state.
+   When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
+#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
+
+/* Write a given number of random bits to rp.  */
+#define _gmp_rand(rp, state, bits)                              \
+  do {                                                          \
+    gmp_randstate_ptr  __rstate = (state);                      \
+    (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \
+       (__rstate, rp, bits);                                    \
+  } while (0)
+
+__GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
+
+
+/* __gmp_rands is the global state for the old-style random functions, and
+   is also used in the test programs (hence the __GMP_DECLSPEC).
+
+   There's no seeding here, so mpz_random etc will generate the same
+   sequence every time.  This is not unlike the C library random functions
+   if you don't seed them, so perhaps it's acceptable.  Digging up a seed
+   from /dev/random or the like would work on many systems, but might
+   encourage a false confidence, since it'd be pretty much impossible to do
+   something that would work reliably everywhere.  In any case the new style
+   functions are recommended to applications which care about randomness, so
+   the old functions aren't too important.  */
+
+__GMP_DECLSPEC extern char             __gmp_rands_initialized;
+__GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
+
+#define RANDS                                       \
+  ((__gmp_rands_initialized ? 0                     \
+    : (__gmp_rands_initialized = 1,                 \
+       __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
+   __gmp_rands)
+
+/* this is used by the test programs, to free memory */
+#define RANDS_CLEAR()                   \
+  do {                                  \
+    if (__gmp_rands_initialized)        \
+      {                                 \
+        __gmp_rands_initialized = 0;    \
+        gmp_randclear (__gmp_rands);    \
+      }                                 \
+  } while (0)
+
+
+/* For a threshold between algorithms A and B, size>=thresh is where B
+   should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
+   value 0 means only ever use B.  The tests for these special values will
+   be compile-time constants, so the compiler should be able to eliminate
+   the code for the unwanted algorithm.  */
+
+#define ABOVE_THRESHOLD(size,thresh)    \
+  ((thresh) == 0                        \
+   || ((thresh) != MP_SIZE_T_MAX        \
+       && (size) >= (thresh)))
+#define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
+
+#define MPN_TOOM22_MUL_MINSIZE    4
+#define MPN_TOOM2_SQR_MINSIZE     4
+
+#define MPN_TOOM33_MUL_MINSIZE   17
+#define MPN_TOOM3_SQR_MINSIZE    17
+
+#define MPN_TOOM44_MUL_MINSIZE   30
+#define MPN_TOOM4_SQR_MINSIZE    30
+
+#define MPN_TOOM6H_MUL_MINSIZE   46
+#define MPN_TOOM6_SQR_MINSIZE    46
+
+#define MPN_TOOM8H_MUL_MINSIZE   86
+#define MPN_TOOM8_SQR_MINSIZE    86
+
+#define MPN_TOOM32_MUL_MINSIZE   10
+#define MPN_TOOM42_MUL_MINSIZE   10
+#define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
+#define MPN_TOOM53_MUL_MINSIZE   49 /* ??? */
+#define MPN_TOOM63_MUL_MINSIZE   49
+
+#define   mpn_sqr_diagonal __MPN(sqr_diagonal)
+__GMP_DECLSPEC void      mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+#define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
+
+enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
+#define   mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t));
+
+enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
+#define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+
+#define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+
+#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+
+#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+
+#define   mpn_toom_couple_handling __MPN(toom_couple_handling)
+__GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
+
+#define   mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+
+#define   mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+
+#define   mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
+__GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+
+#define   mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
+__GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+
+#define   mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
+__GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
+
+#define   mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
+__GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
+
+#define   mpn_toom22_mul __MPN(toom22_mul)
+__GMP_DECLSPEC void      mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom32_mul __MPN(toom32_mul)
+__GMP_DECLSPEC void      mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom42_mul __MPN(toom42_mul)
+__GMP_DECLSPEC void      mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom52_mul __MPN(toom52_mul)
+__GMP_DECLSPEC void      mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom62_mul __MPN(toom62_mul)
+__GMP_DECLSPEC void      mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom2_sqr __MPN(toom2_sqr)
+__GMP_DECLSPEC void      mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom33_mul __MPN(toom33_mul)
+__GMP_DECLSPEC void      mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom43_mul __MPN(toom43_mul)
+__GMP_DECLSPEC void      mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom53_mul __MPN(toom53_mul)
+__GMP_DECLSPEC void      mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom63_mul __MPN(toom63_mul)
+__GMP_DECLSPEC void      mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom3_sqr __MPN(toom3_sqr)
+__GMP_DECLSPEC void      mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom44_mul __MPN(toom44_mul)
+__GMP_DECLSPEC void      mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom4_sqr __MPN(toom4_sqr)
+__GMP_DECLSPEC void      mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom6h_mul __MPN(toom6h_mul)
+__GMP_DECLSPEC void      mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom8h_mul __MPN(toom8h_mul)
+__GMP_DECLSPEC void      mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom8_sqr __MPN(toom8_sqr)
+__GMP_DECLSPEC void      mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_fft_best_k __MPN(fft_best_k)
+__GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
+
+#define   mpn_mul_fft __MPN(mul_fft)
+__GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
+
+#define   mpn_mul_fft_full __MPN(mul_fft_full)
+__GMP_DECLSPEC void      mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
+__GMP_DECLSPEC void      mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#define   mpn_fft_next_size __MPN(fft_next_size)
+__GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
+
+#define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define   mpn_sbpi1_div_q __MPN(sbpi1_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define   mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define   mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+#define   mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
+
+#define   mpn_dcpi1_div_q __MPN(dcpi1_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+
+#define   mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+#define   mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
+
+#define   mpn_mu_div_qr __MPN(mu_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+#define   mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
+
+#define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
+
+#define   mpn_mu_divappr_q __MPN(mu_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+#define   mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
+
+#define   mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_mu_div_q __MPN(mu_div_q)
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_mu_div_q_itch __MPN(mu_div_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+
+#define  mpn_div_q __MPN(div_q)
+__GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_invert __MPN(invert)
+__GMP_DECLSPEC void      mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_invert_itch(n)  mpn_invertappr_itch(n)
+
+#define   mpn_ni_invertappr __MPN(ni_invertappr)
+__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_invertappr __MPN(invertappr)
+__GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_invertappr_itch(n)  (3 * (n) + 2)
+
+#define   mpn_binvert __MPN(binvert)
+__GMP_DECLSPEC void      mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_binvert_itch __MPN(binvert_itch)
+__GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
+
+#define mpn_bdiv_q_1 __MPN(bdiv_q_1)
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
+__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
+
+#define   mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define   mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
+__GMP_DECLSPEC void      mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define   mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+#define   mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
+
+#define   mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+#define   mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
+__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define   mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
+#define   mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
+__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+
+#define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+
+#define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
+__GMP_DECLSPEC void      mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+
+#define   mpn_bdiv_qr __MPN(bdiv_qr)
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+
+#define   mpn_bdiv_q __MPN(bdiv_q)
+__GMP_DECLSPEC void      mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_bdiv_q_itch __MPN(bdiv_q_itch)
+__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+
+#define   mpn_divexact __MPN(divexact)
+__GMP_DECLSPEC void      mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+#define   mpn_divexact_itch __MPN(divexact_itch)
+__GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+
+#define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+#define   mpn_bdiv_dbm1(dst, src, size, divisor) \
+  mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
+
+#define   mpn_powm __MPN(powm)
+__GMP_DECLSPEC void      mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_powlo __MPN(powlo)
+__GMP_DECLSPEC void      mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+#define   mpn_powm_sec __MPN(powm_sec)
+__GMP_DECLSPEC void      mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_powm_sec_itch __MPN(powm_sec_itch)
+__GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
+#define   mpn_subcnd_n __MPN(subcnd_n)
+__GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#define   mpn_tabselect __MPN(tabselect)
+__GMP_DECLSPEC void      mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
+#define mpn_redc_1_sec __MPN(redc_1_sec)
+__GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#ifndef DIVEXACT_BY3_METHOD
+#if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
+#define DIVEXACT_BY3_METHOD 0  /* default to using mpn_bdiv_dbm1c */
+#else
+#define DIVEXACT_BY3_METHOD 1
+#endif
+#endif
+
+#if DIVEXACT_BY3_METHOD == 0
+#undef mpn_divexact_by3
+#define mpn_divexact_by3(dst,src,size) \
+  (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
+/* override mpn_divexact_by3c defined in gmp.h */
+/*
+#undef mpn_divexact_by3c
+#define mpn_divexact_by3c(dst,src,size,cy) \
+  (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
+*/
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+#define mpn_divexact_by5(dst,src,size) \
+  (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
+#endif
+
+#if GMP_NUMB_BITS % 6 == 0
+#define mpn_divexact_by7(dst,src,size) \
+  (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
+#endif
+
+#if GMP_NUMB_BITS % 6 == 0
+#define mpn_divexact_by9(dst,src,size) \
+  (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
+#endif
+
+#if GMP_NUMB_BITS % 10 == 0
+#define mpn_divexact_by11(dst,src,size) \
+  (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
+#endif
+
+#if GMP_NUMB_BITS % 12 == 0
+#define mpn_divexact_by13(dst,src,size) \
+  (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+#define mpn_divexact_by15(dst,src,size) \
+  (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
+#endif
+
+#define mpz_divexact_gcd  __gmpz_divexact_gcd
+__GMP_DECLSPEC void    mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+
+#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
+#ifdef _GMP_H_HAVE_FILE
+__GMP_DECLSPEC size_t  mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
+#endif
+
+#define mpn_divisible_p __MPN(divisible_p)
+__GMP_DECLSPEC int     mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+
+#define   mpn_rootrem __MPN(rootrem)
+__GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+
+#if defined (_CRAY)
+#define MPN_COPY_INCR(dst, src, n)                                     \
+  do {                                                                 \
+    int __i;           /* Faster on some Crays with plain int */       \
+    _Pragma ("_CRI ivdep");                                            \
+    for (__i = 0; __i < (n); __i++)                                    \
+      (dst)[__i] = (src)[__i];                                         \
+  } while (0)
+#endif
+
+/* used by test programs, hence __GMP_DECLSPEC */
+#ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
+#define mpn_copyi __MPN(copyi)
+__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
+#define MPN_COPY_INCR(dst, src, size)                   \
+  do {                                                  \
+    ASSERT ((size) >= 0);                               \
+    ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
+    mpn_copyi (dst, src, size);                         \
+  } while (0)
+#endif
+
+/* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
+#if ! defined (MPN_COPY_INCR)
+#define MPN_COPY_INCR(dst, src, n)                      \
+  do {                                                  \
+    ASSERT ((n) >= 0);                                  \
+    ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
+    if ((n) != 0)                                       \
+      {                                                 \
+       mp_size_t __n = (n) - 1;                        \
+       mp_ptr __dst = (dst);                           \
+       mp_srcptr __src = (src);                        \
+       mp_limb_t __x;                                  \
+       __x = *__src++;                                 \
+       if (__n != 0)                                   \
+         {                                             \
+           do                                          \
+             {                                         \
+               *__dst++ = __x;                         \
+               __x = *__src++;                         \
+             }                                         \
+           while (--__n);                              \
+         }                                             \
+       *__dst++ = __x;                                 \
+      }                                                 \
+  } while (0)
+#endif
+
+
+#if defined (_CRAY)
+#define MPN_COPY_DECR(dst, src, n)                                     \
+  do {                                                                 \
+    int __i;           /* Faster on some Crays with plain int */       \
+    _Pragma ("_CRI ivdep");                                            \
+    for (__i = (n) - 1; __i >= 0; __i--)                               \
+      (dst)[__i] = (src)[__i];                                         \
+  } while (0)
+#endif
+
+/* used by test programs, hence __GMP_DECLSPEC */
+#ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
+#define mpn_copyd __MPN(copyd)
+__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
+#define MPN_COPY_DECR(dst, src, size)                   \
+  do {                                                  \
+    ASSERT ((size) >= 0);                               \
+    ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
+    mpn_copyd (dst, src, size);                         \
+  } while (0)
+#endif
+
+/* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
+#if ! defined (MPN_COPY_DECR)
+#define MPN_COPY_DECR(dst, src, n)                      \
+  do {                                                  \
+    ASSERT ((n) >= 0);                                  \
+    ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
+    if ((n) != 0)                                       \
+      {                                                 \
+       mp_size_t __n = (n) - 1;                        \
+       mp_ptr __dst = (dst) + __n;                     \
+       mp_srcptr __src = (src) + __n;                  \
+       mp_limb_t __x;                                  \
+       __x = *__src--;                                 \
+       if (__n != 0)                                   \
+         {                                             \
+           do                                          \
+             {                                         \
+               *__dst-- = __x;                         \
+               __x = *__src--;                         \
+             }                                         \
+           while (--__n);                              \
+         }                                             \
+       *__dst-- = __x;                                 \
+      }                                                 \
+  } while (0)
+#endif
+
+
+#ifndef MPN_COPY
+#define MPN_COPY(d,s,n)                         \
+  do {                                          \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
+    MPN_COPY_INCR (d, s, n);                    \
+  } while (0)
+#endif
+
+
+/* Set {dst,size} to the limbs of {src,size} in reverse order. */
+#define MPN_REVERSE(dst, src, size)                     \
+  do {                                                  \
+    mp_ptr     __dst = (dst);                           \
+    mp_size_t  __size = (size);                         \
+    mp_srcptr  __src = (src) + __size - 1;              \
+    mp_size_t  __i;                                     \
+    ASSERT ((size) >= 0);                               \
+    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
+    CRAY_Pragma ("_CRI ivdep");                         \
+    for (__i = 0; __i < __size; __i++)                  \
+      {                                                 \
+        *__dst = *__src;                                \
+        __dst++;                                        \
+        __src--;                                        \
+      }                                                 \
+  } while (0)
+
+
+/* Zero n limbs at dst.
+
+   For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
+   ppc630 for instance this is optimal since it can sustain only 1 store per
+   cycle.
+
+   gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
+   "for" loop in the generic code below can become stu/bdnz.  The do/while
+   here helps it get to that.  The same caveat about plain -mpowerpc64 mode
+   applies here as to __GMPN_COPY_INCR in gmp.h.
+
+   xlc 3.1 already generates stu/bdnz from the generic C, and does so from
+   this loop too.
+
+   Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
+   at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
+   trouble than it's worth to do the same, though perhaps a call to memset
+   would be good when on a GNU system.  */
+
+#if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
+#define MPN_ZERO(dst, n)                       \
+  do {                                         \
+    ASSERT ((n) >= 0);                         \
+    if ((n) != 0)                              \
+      {                                                \
+       mp_ptr __dst = (dst) - 1;               \
+       mp_size_t __n = (n);                    \
+       do                                      \
+         *++__dst = 0;                         \
+       while (--__n);                          \
+      }                                                \
+  } while (0)
+#endif
+
+#ifndef MPN_ZERO
+#define MPN_ZERO(dst, n)                       \
+  do {                                         \
+    ASSERT ((n) >= 0);                         \
+    if ((n) != 0)                              \
+      {                                                \
+       mp_ptr __dst = (dst);                   \
+       mp_size_t __n = (n);                    \
+       do                                      \
+         *__dst++ = 0;                         \
+       while (--__n);                          \
+      }                                                \
+  } while (0)
+#endif
+
+
+/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
+   start up and would need to strip a lot of zeros before it'd be faster
+   than a simple cmpl loop.  Here are some times in cycles for
+   std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
+   low zeros).
+
+                std   cld
+           P5    18    16
+           P6    46    38
+           K6    36    13
+           K7    21    20
+*/
+#ifndef MPN_NORMALIZE
+#define MPN_NORMALIZE(DST, NLIMBS) \
+  do {                                                                 \
+    while ((NLIMBS) > 0)                                                \
+      {                                                                        \
+       if ((DST)[(NLIMBS) - 1] != 0)                                   \
+         break;                                                        \
+       (NLIMBS)--;                                                     \
+      }                                                                        \
+  } while (0)
+#endif
+#ifndef MPN_NORMALIZE_NOT_ZERO
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
+  do {                                          \
+    ASSERT ((NLIMBS) >= 1);                     \
+    while (1)                                   \
+      {                                         \
+       if ((DST)[(NLIMBS) - 1] != 0)           \
+         break;                                \
+       (NLIMBS)--;                             \
+      }                                         \
+  } while (0)
+#endif
+
+/* Strip least significant zero limbs from {ptr,size} by incrementing ptr
+   and decrementing size.  low should be ptr[0], and will be the new ptr[0]
+   on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
+   somewhere a non-zero limb.  */
+#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
+  do {                                                  \
+    ASSERT ((size) >= 1);                               \
+    ASSERT ((low) == (ptr)[0]);                         \
+                                                        \
+    while ((low) == 0)                                  \
+      {                                                 \
+        (size)--;                                       \
+        ASSERT ((size) >= 1);                           \
+        (ptr)++;                                        \
+        (low) = *(ptr);                                 \
+      }                                                 \
+  } while (0)
+
+/* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
+   temporary variable; it will be automatically cleared out at function
+   return.  We use __x here to make it possible to accept both mpz_ptr and
+   mpz_t arguments.  */
+#define MPZ_TMP_INIT(X, NLIMBS)                                         \
+  do {                                                                  \
+    mpz_ptr __x = (X);                                                  \
+    ASSERT ((NLIMBS) >= 1);                                             \
+    __x->_mp_alloc = (NLIMBS);                                          \
+    __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS);                             \
+  } while (0)
+
+/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
+#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \
+                          ? (mp_ptr) _mpz_realloc(z,n)  \
+                          : PTR(z))
+
+#define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
+
+
+/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
+   f1p.
+
+   From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
+   nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
+   number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
+
+   The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
+   without good floating point.  There's +2 for rounding up, and a further
+   +2 since at the last step x limbs are doubled into a 2x+1 limb region
+   whereas the actual F[2k] value might be only 2x-1 limbs.
+
+   Note that a division is done first, since on a 32-bit system it's at
+   least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
+   about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
+   whatever a multiply of two 190Mbyte numbers takes.)
+
+   Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
+   worked into the multiplier.  */
+
+#define MPN_FIB2_SIZE(n) \
+  ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
+
+
+/* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
+   -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
+
+   FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
+   F[n] + 2*F[n-1] fits in a limb.  */
+
+__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
+#define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
+
+#define SIEVESIZE 512          /* FIXME: Allow gmp_init_primesieve to choose */
+typedef struct
+{
+  unsigned long d;                /* current index in s[] */
+  unsigned long s0;               /* number corresponding to s[0] */
+  unsigned long sqrt_s0;          /* misnomer for sqrt(s[SIEVESIZE-1]) */
+  unsigned char s[SIEVESIZE + 1];  /* sieve table */
+} gmp_primesieve_t;
+
+#define gmp_init_primesieve __gmp_init_primesieve
+__GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
+
+#define gmp_nextprime __gmp_nextprime
+__GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
+
+
+#ifndef MUL_TOOM22_THRESHOLD
+#define MUL_TOOM22_THRESHOLD             30
+#endif
+
+#ifndef MUL_TOOM33_THRESHOLD
+#define MUL_TOOM33_THRESHOLD            100
+#endif
+
+#ifndef MUL_TOOM44_THRESHOLD
+#define MUL_TOOM44_THRESHOLD            300
+#endif
+
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD            350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
+#ifndef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD            450
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
+#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
+#endif
+
+#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD  110
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD  100
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD  110
+#endif
+
+/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
+   normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
+   binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
+   separate hard limit will have been defined.  Similarly for TOOM3.  */
+#ifndef MUL_TOOM22_THRESHOLD_LIMIT
+#define MUL_TOOM22_THRESHOLD_LIMIT  MUL_TOOM22_THRESHOLD
+#endif
+#ifndef MUL_TOOM33_THRESHOLD_LIMIT
+#define MUL_TOOM33_THRESHOLD_LIMIT  MUL_TOOM33_THRESHOLD
+#endif
+#ifndef MULLO_BASECASE_THRESHOLD_LIMIT
+#define MULLO_BASECASE_THRESHOLD_LIMIT  MULLO_BASECASE_THRESHOLD
+#endif
+
+/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
+   mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
+   certainly always want it if there's a native assembler mpn_sqr_basecase.)
+
+   If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
+   before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
+   threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises more or less
+   because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
+   should be used, and that may be never.  */
+
+#ifndef SQR_BASECASE_THRESHOLD
+#define SQR_BASECASE_THRESHOLD            0
+#endif
+
+#ifndef SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD              50
+#endif
+
+#ifndef SQR_TOOM3_THRESHOLD
+#define SQR_TOOM3_THRESHOLD             120
+#endif
+
+#ifndef SQR_TOOM4_THRESHOLD
+#define SQR_TOOM4_THRESHOLD             400
+#endif
+
+/* See comments above about MUL_TOOM33_THRESHOLD_LIMIT.  */
+#ifndef SQR_TOOM3_THRESHOLD_LIMIT
+#define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
+#endif
+
+#ifndef DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD              50
+#endif
+
+#ifndef DC_DIVAPPR_Q_THRESHOLD
+#define DC_DIVAPPR_Q_THRESHOLD          200
+#endif
+
+#ifndef DC_BDIV_QR_THRESHOLD
+#define DC_BDIV_QR_THRESHOLD             50
+#endif
+
+#ifndef DC_BDIV_Q_THRESHOLD
+#define DC_BDIV_Q_THRESHOLD             180
+#endif
+
+#ifndef DIVEXACT_JEB_THRESHOLD
+#define DIVEXACT_JEB_THRESHOLD           25
+#endif
+
+#ifndef INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD  (5*MULMOD_BNM1_THRESHOLD)
+#endif
+
+#ifndef INV_APPR_THRESHOLD
+#define INV_APPR_THRESHOLD         INV_NEWTON_THRESHOLD
+#endif
+
+#ifndef INV_NEWTON_THRESHOLD
+#define INV_NEWTON_THRESHOLD            200
+#endif
+
+#ifndef BINV_NEWTON_THRESHOLD
+#define BINV_NEWTON_THRESHOLD           300
+#endif
+
+#ifndef MU_DIVAPPR_Q_THRESHOLD
+#define MU_DIVAPPR_Q_THRESHOLD         2000
+#endif
+
+#ifndef MU_DIV_QR_THRESHOLD
+#define MU_DIV_QR_THRESHOLD            2000
+#endif
+
+#ifndef MUPI_DIV_QR_THRESHOLD
+#define MUPI_DIV_QR_THRESHOLD           200
+#endif
+
+#ifndef MU_BDIV_Q_THRESHOLD
+#define MU_BDIV_Q_THRESHOLD            2000
+#endif
+
+#ifndef MU_BDIV_QR_THRESHOLD
+#define MU_BDIV_QR_THRESHOLD           2000
+#endif
+
+#ifndef MULMOD_BNM1_THRESHOLD
+#define MULMOD_BNM1_THRESHOLD            16
+#endif
+
+#ifndef SQRMOD_BNM1_THRESHOLD
+#define SQRMOD_BNM1_THRESHOLD            16
+#endif
+
+#ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
+#define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD  (INV_MULMOD_BNM1_THRESHOLD/2)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+
+#ifndef REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD       15
+#endif
+#ifndef REDC_2_TO_REDC_N_THRESHOLD
+#define REDC_2_TO_REDC_N_THRESHOLD      100
+#endif
+
+#else
+
+#ifndef REDC_1_TO_REDC_N_THRESHOLD
+#define REDC_1_TO_REDC_N_THRESHOLD      100
+#endif
+
+#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
+
+
+/* First k to use for an FFT modF multiply.  A modF FFT is an order
+   log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
+   whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
+#define FFT_FIRST_K  4
+
+/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
+#ifndef MUL_FFT_MODF_THRESHOLD
+#define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM33_THRESHOLD * 3)
+#endif
+#ifndef SQR_FFT_MODF_THRESHOLD
+#define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
+#endif
+
+/* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
+   will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
+   NxN->2N multiply and not recursing into itself is an order
+   log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
+   is the first better than toom3.  */
+#ifndef MUL_FFT_THRESHOLD
+#define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
+#endif
+#ifndef SQR_FFT_THRESHOLD
+#define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
+#endif
+
+/* Table of thresholds for successive modF FFT "k"s.  The first entry is
+   where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
+   etc.  See mpn_fft_best_k(). */
+#ifndef MUL_FFT_TABLE
+#define MUL_FFT_TABLE                           \
+  { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */        \
+    MUL_TOOM33_THRESHOLD * 8,   /* k=6 */        \
+    MUL_TOOM33_THRESHOLD * 16,  /* k=7 */        \
+    MUL_TOOM33_THRESHOLD * 32,  /* k=8 */        \
+    MUL_TOOM33_THRESHOLD * 96,  /* k=9 */        \
+    MUL_TOOM33_THRESHOLD * 288, /* k=10 */       \
+    0 }
+#endif
+#ifndef SQR_FFT_TABLE
+#define SQR_FFT_TABLE                           \
+  { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
+    SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
+    SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
+    SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
+    SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
+    SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
+    0 }
+#endif
+
+struct fft_table_nk
+{
+  unsigned int n:27;
+  unsigned int k:5;
+};
+
+#ifndef FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS   static const
+#endif
+
+#define MPN_FFT_TABLE_SIZE  16
+
+
+#ifndef DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD    (3 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#ifndef GET_STR_DC_THRESHOLD
+#define GET_STR_DC_THRESHOLD             18
+#endif
+
+#ifndef GET_STR_PRECOMPUTE_THRESHOLD
+#define GET_STR_PRECOMPUTE_THRESHOLD     35
+#endif
+
+#ifndef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD            750
+#endif
+
+#ifndef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD   2000
+#endif
+
+/* Return non-zero if xp,xsize and yp,ysize overlap.
+   If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
+   overlap.  If both these are false, there's an overlap. */
+#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
+  ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
+#define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
+  (   (char *) (xp) + (xsize) > (char *) (yp)   \
+   && (char *) (yp) + (ysize) > (char *) (xp))
+
+/* Return non-zero if xp,xsize and yp,ysize are either identical or not
+   overlapping.  Return zero if they're partially overlapping. */
+#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
+  MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
+#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
+  ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
+
+/* Return non-zero if dst,dsize and src,ssize are either identical or
+   overlapping in a way suitable for an incrementing/decrementing algorithm.
+   Return zero if they're partially overlapping in an unsuitable fashion. */
+#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
+  ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
+#define MPN_SAME_OR_INCR_P(dst, src, size)      \
+  MPN_SAME_OR_INCR2_P(dst, size, src, size)
+#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
+  ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
+#define MPN_SAME_OR_DECR_P(dst, src, size)      \
+  MPN_SAME_OR_DECR2_P(dst, size, src, size)
+
+
+/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
+   ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
+   does it always.  Generally assertions are meant for development, but
+   might help when looking for a problem later too.
+
+   Note that strings shouldn't be used within the ASSERT expression,
+   eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
+   used in the !HAVE_STRINGIZE case (ie. K&R).  */
+
+#ifdef __LINE__
+#define ASSERT_LINE  __LINE__
+#else
+#define ASSERT_LINE  -1
+#endif
+
+#ifdef __FILE__
+#define ASSERT_FILE  __FILE__
+#else
+#define ASSERT_FILE  ""
+#endif
+
+__GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int));
+__GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
+
+#if HAVE_STRINGIZE
+#define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
+#else
+#define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
+#endif
+
+#define ASSERT_ALWAYS(expr)     \
+  do {                          \
+    if (!(expr))                \
+      ASSERT_FAIL (expr);       \
+  } while (0)
+
+#if WANT_ASSERT
+#define ASSERT(expr)   ASSERT_ALWAYS (expr)
+#else
+#define ASSERT(expr)   do {} while (0)
+#endif
+
+
+/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
+   that it's zero.  In both cases if assertion checking is disabled the
+   expression is still evaluated.  These macros are meant for use with
+   routines like mpn_add_n() where the return value represents a carry or
+   whatever that should or shouldn't occur in some context.  For example,
+   ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
+#if WANT_ASSERT
+#define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
+#define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
+#else
+#define ASSERT_CARRY(expr)     (expr)
+#define ASSERT_NOCARRY(expr)   (expr)
+#endif
+
+
+/* ASSERT_CODE includes code when assertion checking is wanted.  This is the
+   same as writing "#if WANT_ASSERT", but more compact.  */
+#if WANT_ASSERT
+#define ASSERT_CODE(expr)  expr
+#else
+#define ASSERT_CODE(expr)
+#endif
+
+
+/* Test that an mpq_t is in fully canonical form.  This can be used as
+   protection on routines like mpq_equal which give wrong results on
+   non-canonical inputs.  */
+#if WANT_ASSERT
+#define ASSERT_MPQ_CANONICAL(q)                         \
+  do {                                                  \
+    ASSERT (q->_mp_den._mp_size > 0);                   \
+    if (q->_mp_num._mp_size == 0)                       \
+      {                                                 \
+        /* zero should be 0/1 */                        \
+        ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        /* no common factors */                         \
+        mpz_t  __g;                                     \
+        mpz_init (__g);                                 \
+        mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
+        ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
+        mpz_clear (__g);                                \
+      }                                                 \
+  } while (0)
+#else
+#define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
+#endif
+
+/* Check that the nail parts are zero. */
+#define ASSERT_ALWAYS_LIMB(limb)                \
+  do {                                          \
+    mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
+    ASSERT_ALWAYS (__nail == 0);                \
+  } while (0)
+#define ASSERT_ALWAYS_MPN(ptr, size)            \
+  do {                                          \
+    /* let whole loop go dead when no nails */  \
+    if (GMP_NAIL_BITS != 0)                     \
+      {                                         \
+        mp_size_t  __i;                         \
+        for (__i = 0; __i < (size); __i++)      \
+          ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
+      }                                         \
+  } while (0)
+#if WANT_ASSERT
+#define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
+#define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
+#else
+#define ASSERT_LIMB(limb)       do {} while (0)
+#define ASSERT_MPN(ptr, size)   do {} while (0)
+#endif
+
+
+/* Assert that an mpn region {ptr,size} is zero, or non-zero.
+   size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
+#if WANT_ASSERT
+#define ASSERT_MPN_ZERO_P(ptr,size)     \
+  do {                                  \
+    mp_size_t  __i;                     \
+    ASSERT ((size) >= 0);               \
+    for (__i = 0; __i < (size); __i++)  \
+      ASSERT ((ptr)[__i] == 0);         \
+  } while (0)
+#define ASSERT_MPN_NONZERO_P(ptr,size)  \
+  do {                                  \
+    mp_size_t  __i;                     \
+    int        __nonzero = 0;           \
+    ASSERT ((size) >= 0);               \
+    for (__i = 0; __i < (size); __i++)  \
+      if ((ptr)[__i] != 0)              \
+        {                               \
+          __nonzero = 1;                \
+          break;                        \
+        }                               \
+    ASSERT (__nonzero);                 \
+  } while (0)
+#else
+#define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
+#define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
+#endif
+
+
+#if ! HAVE_NATIVE_mpn_com
+#undef mpn_com
+#define mpn_com(d,s,n)                                  \
+  do {                                                  \
+    mp_ptr     __d = (d);                               \
+    mp_srcptr  __s = (s);                               \
+    mp_size_t  __n = (n);                               \
+    ASSERT (__n >= 1);                                  \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
+    do                                                  \
+      *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
+    while (--__n);                                      \
+  } while (0)
+#endif
+
+#define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation)                  \
+  do {                                                                 \
+    mp_srcptr  __up = (up);                                            \
+    mp_srcptr  __vp = (vp);                                            \
+    mp_ptr     __rp = (rp);                                            \
+    mp_size_t  __n = (n);                                              \
+    mp_limb_t __a, __b;                                                        \
+    ASSERT (__n > 0);                                                  \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n));                 \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n));                 \
+    __up += __n;                                                       \
+    __vp += __n;                                                       \
+    __rp += __n;                                                       \
+    __n = -__n;                                                                \
+    do {                                                               \
+      __a = __up[__n];                                                 \
+      __b = __vp[__n];                                                 \
+      __rp[__n] = operation;                                           \
+    } while (++__n);                                                   \
+  } while (0)
+
+
+#if ! HAVE_NATIVE_mpn_and_n
+#undef mpn_and_n
+#define mpn_and_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_andn_n
+#undef mpn_andn_n
+#define mpn_andn_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_nand_n
+#undef mpn_nand_n
+#define mpn_nand_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
+#endif
+
+#if ! HAVE_NATIVE_mpn_ior_n
+#undef mpn_ior_n
+#define mpn_ior_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_iorn_n
+#undef mpn_iorn_n
+#define mpn_iorn_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
+#endif
+
+#if ! HAVE_NATIVE_mpn_nior_n
+#undef mpn_nior_n
+#define mpn_nior_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
+#endif
+
+#if ! HAVE_NATIVE_mpn_xor_n
+#undef mpn_xor_n
+#define mpn_xor_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
+#endif
+
+#if ! HAVE_NATIVE_mpn_xnor_n
+#undef mpn_xnor_n
+#define mpn_xnor_n(rp, up, vp, n) \
+  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
+#endif
+
+#define mpn_trialdiv __MPN(trialdiv)
+__GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
+
+#define mpn_remove __MPN(remove)
+__GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t));
+
+
+/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
+#if GMP_NAIL_BITS == 0
+#define ADDC_LIMB(cout, w, x, y)        \
+  do {                                  \
+    mp_limb_t  __x = (x);               \
+    mp_limb_t  __y = (y);               \
+    mp_limb_t  __w = __x + __y;         \
+    (w) = __w;                          \
+    (cout) = __w < __x;                 \
+  } while (0)
+#else
+#define ADDC_LIMB(cout, w, x, y)        \
+  do {                                  \
+    mp_limb_t  __w;                     \
+    ASSERT_LIMB (x);                    \
+    ASSERT_LIMB (y);                    \
+    __w = (x) + (y);                    \
+    (w) = __w & GMP_NUMB_MASK;          \
+    (cout) = __w >> GMP_NUMB_BITS;      \
+  } while (0)
+#endif
+
+/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
+   subtract.  */
+#if GMP_NAIL_BITS == 0
+#define SUBC_LIMB(cout, w, x, y)        \
+  do {                                  \
+    mp_limb_t  __x = (x);               \
+    mp_limb_t  __y = (y);               \
+    mp_limb_t  __w = __x - __y;         \
+    (w) = __w;                          \
+    (cout) = __w > __x;                 \
+  } while (0)
+#else
+#define SUBC_LIMB(cout, w, x, y)        \
+  do {                                  \
+    mp_limb_t  __w = (x) - (y);         \
+    (w) = __w & GMP_NUMB_MASK;          \
+    (cout) = __w >> (GMP_LIMB_BITS-1);  \
+  } while (0)
+#endif
+
+
+/* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
+   expecting no carry (or borrow) from that.
+
+   The size parameter is only for the benefit of assertion checking.  In a
+   normal build it's unused and the carry/borrow is just propagated as far
+   as it needs to go.
+
+   On random data, usually only one or two limbs of {ptr,size} get updated,
+   so there's no need for any sophisticated looping, just something compact
+   and sensible.
+
+   FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
+   declaring their operand sizes, then remove the former.  This is purely
+   for the benefit of assertion checking.  */
+
+#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
+  && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
+/* Better flags handling than the generic C gives on i386, saving a few
+   bytes of code and maybe a cycle or two.  */
+
+#define MPN_IORD_U(ptr, incr, aors)                                    \
+  do {                                                                 \
+    mp_ptr  __ptr_dummy;                                               \
+    if (__builtin_constant_p (incr) && (incr) == 1)                    \
+      {                                                                        \
+        __asm__ __volatile__                                           \
+          ("\n" ASM_L(top) ":\n"                                       \
+           "\t" aors " $1, (%0)\n"                                     \
+           "\tleal 4(%0),%0\n"                                         \
+           "\tjc " ASM_L(top)                                          \
+           : "=r" (__ptr_dummy)                                                \
+           : "0"  (ptr)                                                        \
+           : "memory");                                                        \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+        __asm__ __volatile__                                           \
+          (   aors  " %2,(%0)\n"                                       \
+           "\tjnc " ASM_L(done) "\n"                                   \
+           ASM_L(top) ":\n"                                            \
+           "\t" aors " $1,4(%0)\n"                                     \
+           "\tleal 4(%0),%0\n"                                         \
+           "\tjc " ASM_L(top) "\n"                                     \
+           ASM_L(done) ":\n"                                           \
+           : "=r" (__ptr_dummy)                                                \
+           : "0"  (ptr),                                               \
+             "ri" (incr)                                               \
+           : "memory");                                                        \
+      }                                                                        \
+  } while (0)
+
+#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
+#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
+#define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
+#define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
+#endif
+
+#if GMP_NAIL_BITS == 0
+#ifndef mpn_incr_u
+#define mpn_incr_u(p,incr)                              \
+  do {                                                  \
+    mp_limb_t __x;                                      \
+    mp_ptr __p = (p);                                   \
+    if (__builtin_constant_p (incr) && (incr) == 1)     \
+      {                                                 \
+        while (++(*(__p++)) == 0)                       \
+          ;                                             \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        __x = *__p + (incr);                            \
+        *__p = __x;                                     \
+        if (__x < (incr))                               \
+          while (++(*(++__p)) == 0)                     \
+            ;                                           \
+      }                                                 \
+  } while (0)
+#endif
+#ifndef mpn_decr_u
+#define mpn_decr_u(p,incr)                              \
+  do {                                                  \
+    mp_limb_t __x;                                      \
+    mp_ptr __p = (p);                                   \
+    if (__builtin_constant_p (incr) && (incr) == 1)     \
+      {                                                 \
+        while ((*(__p++))-- == 0)                       \
+          ;                                             \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        __x = *__p;                                     \
+        *__p = __x - (incr);                            \
+        if (__x < (incr))                               \
+          while ((*(++__p))-- == 0)                     \
+            ;                                           \
+      }                                                 \
+  } while (0)
+#endif
+#endif
+
+#if GMP_NAIL_BITS >= 1
+#ifndef mpn_incr_u
+#define mpn_incr_u(p,incr)                              \
+  do {                                                 \
+    mp_limb_t __x;                                     \
+    mp_ptr __p = (p);                                  \
+    if (__builtin_constant_p (incr) && (incr) == 1)    \
+      {                                                        \
+       do                                              \
+         {                                             \
+           __x = (*__p + 1) & GMP_NUMB_MASK;           \
+           *__p++ = __x;                               \
+         }                                             \
+       while (__x == 0);                               \
+      }                                                        \
+    else                                               \
+      {                                                        \
+       __x = (*__p + (incr));                          \
+       *__p++ = __x & GMP_NUMB_MASK;                   \
+       if (__x >> GMP_NUMB_BITS != 0)                  \
+         {                                             \
+           do                                          \
+             {                                         \
+               __x = (*__p + 1) & GMP_NUMB_MASK;       \
+               *__p++ = __x;                           \
+             }                                         \
+           while (__x == 0);                           \
+         }                                             \
+      }                                                        \
+  } while (0)
+#endif
+#ifndef mpn_decr_u
+#define mpn_decr_u(p,incr)                             \
+  do {                                                 \
+    mp_limb_t __x;                                     \
+    mp_ptr __p = (p);                                  \
+    if (__builtin_constant_p (incr) && (incr) == 1)    \
+      {                                                        \
+       do                                              \
+         {                                             \
+           __x = *__p;                                 \
+           *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
+         }                                             \
+       while (__x == 0);                               \
+      }                                                        \
+    else                                               \
+      {                                                        \
+       __x = *__p - (incr);                            \
+       *__p++ = __x & GMP_NUMB_MASK;                   \
+       if (__x >> GMP_NUMB_BITS != 0)                  \
+         {                                             \
+           do                                          \
+             {                                         \
+               __x = *__p;                             \
+               *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
+             }                                         \
+           while (__x == 0);                           \
+         }                                             \
+      }                                                        \
+  } while (0)
+#endif
+#endif
+
+#ifndef MPN_INCR_U
+#if WANT_ASSERT
+#define MPN_INCR_U(ptr, size, n)                        \
+  do {                                                  \
+    ASSERT ((size) >= 1);                               \
+    ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
+  } while (0)
+#else
+#define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
+#endif
+#endif
+
+#ifndef MPN_DECR_U
+#if WANT_ASSERT
+#define MPN_DECR_U(ptr, size, n)                        \
+  do {                                                  \
+    ASSERT ((size) >= 1);                               \
+    ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
+  } while (0)
+#else
+#define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
+#endif
+#endif
+
+
+/* Structure for conversion between internal binary format and
+   strings in base 2..36.  */
+struct bases
+{
+  /* Number of digits in the conversion base that always fits in an mp_limb_t.
+     For example, for base 10 on a machine where a mp_limb_t has 32 bits this
+     is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */
+  int chars_per_limb;
+
+  /* log(2)/log(conversion_base) */
+  double chars_per_bit_exactly;
+
+  /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
+     factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
+     i.e. the number of bits used to represent each digit in the base.  */
+  mp_limb_t big_base;
+
+  /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
+     fixed-point number.  Instead of dividing by big_base an application can
+     choose to multiply by big_base_inverted.  */
+  mp_limb_t big_base_inverted;
+};
+
+#define   mp_bases __MPN(bases)
+__GMP_DECLSPEC extern const struct bases mp_bases[257];
+
+
+/* For power of 2 bases this is exact.  For other bases the result is either
+   exact or one too big.
+
+   To be exact always it'd be necessary to examine all the limbs of the
+   operand, since numbers like 100..000 and 99...999 generally differ only
+   in the lowest limb.  It'd be possible to examine just a couple of high
+   limbs to increase the probability of being exact, but that doesn't seem
+   worth bothering with.  */
+
+#define MPN_SIZEINBASE(result, ptr, size, base)                         \
+  do {                                                                  \
+    int       __lb_base, __cnt;                                         \
+    size_t __totbits;                                                   \
+                                                                        \
+    ASSERT ((size) >= 0);                                               \
+    ASSERT ((base) >= 2);                                               \
+    ASSERT ((base) < numberof (mp_bases));                              \
+                                                                        \
+    /* Special case for X == 0.  */                                     \
+    if ((size) == 0)                                                    \
+      (result) = 1;                                                     \
+    else                                                                \
+      {                                                                 \
+        /* Calculate the total number of significant bits of X.  */     \
+        count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
+        __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
+                                                                        \
+        if (POW2_P (base))                                              \
+          {                                                             \
+            __lb_base = mp_bases[base].big_base;                        \
+            (result) = (__totbits + __lb_base - 1) / __lb_base;         \
+          }                                                             \
+        else                                                            \
+          (result) = (size_t)                                           \
+            (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
+      }                                                                 \
+  } while (0)
+
+/* eliminate mp_bases lookups for base==16 */
+#define MPN_SIZEINBASE_16(result, ptr, size)                            \
+  do {                                                                  \
+    int       __cnt;                                                    \
+    mp_size_t __totbits;                                                \
+                                                                        \
+    ASSERT ((size) >= 0);                                               \
+                                                                        \
+    /* Special case for X == 0.  */                                     \
+    if ((size) == 0)                                                    \
+      (result) = 1;                                                     \
+    else                                                                \
+      {                                                                 \
+        /* Calculate the total number of significant bits of X.  */     \
+        count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
+        __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
+        (result) = (__totbits + 4 - 1) / 4;                             \
+      }                                                                 \
+  } while (0)
+
+/* bit count to limb count, rounding up */
+#define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
+
+/* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
+   mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
+   in both cases (LIMBS_PER_ULONG is also defined here.) */
+#if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
+
+#define LIMBS_PER_ULONG 1
+#define MPN_SET_UI(zp, zn, u)   \
+  (zp)[0] = (u);                \
+  (zn) = ((zp)[0] != 0);
+#define MPZ_FAKE_UI(z, zp, u)   \
+  (zp)[0] = (u);                \
+  PTR (z) = (zp);               \
+  SIZ (z) = ((zp)[0] != 0);     \
+  ASSERT_CODE (ALLOC (z) = 1);
+
+#else /* need two limbs per ulong */
+
+#define LIMBS_PER_ULONG 2
+#define MPN_SET_UI(zp, zn, u)                          \
+  (zp)[0] = (u) & GMP_NUMB_MASK;                       \
+  (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
+  (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
+#define MPZ_FAKE_UI(z, zp, u)                          \
+  (zp)[0] = (u) & GMP_NUMB_MASK;                       \
+  (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
+  SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
+  PTR (z) = (zp);                                      \
+  ASSERT_CODE (ALLOC (z) = 2);
+
+#endif
+
+
+#if HAVE_HOST_CPU_FAMILY_x86
+#define TARGET_REGISTER_STARVED 1
+#else
+#define TARGET_REGISTER_STARVED 0
+#endif
+
+
+/* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
+   or 0 there into a limb 0xFF..FF or 0 respectively.
+
+   On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
+   but C99 doesn't guarantee signed right shifts are arithmetic, so we have
+   a little compile-time test and a fallback to a "? :" form.  The latter is
+   necessary for instance on Cray vector systems.
+
+   Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
+   to an arithmetic right shift anyway, but it's good to get the desired
+   shift on past versions too (in particular since an important use of
+   LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
+
+#define LIMB_HIGHBIT_TO_MASK(n)                                 \
+  (((mp_limb_signed_t) -1 >> 1) < 0                             \
+   ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \
+   : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
+
+
+/* Use a library function for invert_limb, if available. */
+#define   mpn_invert_limb __MPN(invert_limb)
+__GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
+#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
+#define invert_limb(invxl,xl)           \
+  do {                                  \
+    (invxl) = mpn_invert_limb (xl);     \
+  } while (0)
+#endif
+
+#ifndef invert_limb
+#define invert_limb(invxl,xl)                   \
+  do {                                          \
+    mp_limb_t dummy;                            \
+    ASSERT ((xl) != 0);                         \
+    udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \
+  } while (0)
+#endif
+
+#define invert_pi1(dinv, d1, d0)                               \
+  do {                                                         \
+    mp_limb_t v, p, t1, t0, mask;                              \
+    invert_limb (v, d1);                                       \
+    p = d1 * v;                                                        \
+    p += d0;                                                   \
+    if (p < d0)                                                        \
+      {                                                                \
+       v--;                                                    \
+       mask = -(p >= d1);                                      \
+       p -= d1;                                                \
+       v += mask;                                              \
+       p -= mask & d1;                                         \
+      }                                                                \
+    umul_ppmm (t1, t0, d0, v);                                 \
+    p += t1;                                                   \
+    if (p < t1)                                                        \
+      {                                                                \
+        v--;                                                   \
+       if (UNLIKELY (p >= d1))                                 \
+         {                                                     \
+           if (p > d1 || t0 >= d0)                             \
+             v--;                                              \
+         }                                                     \
+      }                                                                \
+    (dinv).inv32 = v;                                          \
+  } while (0)
+
+
+#ifndef udiv_qrnnd_preinv
+#define udiv_qrnnd_preinv udiv_qrnnd_preinv3
+#endif
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+   limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
+   If this would yield overflow, DI should be the largest possible number
+   (i.e., only ones).  For correct operation, the most significant bit of D
+   has to be set.  Put the quotient in Q and the remainder in R.  */
+#define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)                                \
+  do {                                                                 \
+    mp_limb_t _q, _ql, _r;                                             \
+    mp_limb_t _xh, _xl;                                                        \
+    ASSERT ((d) != 0);                                                 \
+    umul_ppmm (_q, _ql, (nh), (di));                                   \
+    _q += (nh);        /* Compensate, di is 2**GMP_LIMB_BITS too small */      \
+    umul_ppmm (_xh, _xl, _q, (d));                                     \
+    sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                                \
+    if (_xh != 0)                                                      \
+      {                                                                        \
+       sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
+       _q += 1;                                                        \
+       if (_xh != 0)                                                   \
+         {                                                             \
+           _r -= (d);                                                  \
+           _q += 1;                                                    \
+         }                                                             \
+      }                                                                        \
+    if (_r >= (d))                                                     \
+      {                                                                        \
+       _r -= (d);                                                      \
+       _q += 1;                                                        \
+      }                                                                        \
+    (r) = _r;                                                          \
+    (q) = _q;                                                          \
+  } while (0)
+
+/* Like udiv_qrnnd_preinv, but branch-free. */
+#define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)                                \
+  do {                                                                 \
+    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                           \
+    mp_limb_t _xh, _xl;                                                        \
+    _n2 = (nh);                                                                \
+    _n10 = (nl);                                                       \
+    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                              \
+    _nadj = _n10 + (_nmask & (d));                                     \
+    umul_ppmm (_xh, _xl, di, _n2 - _nmask);                            \
+    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                       \
+    _q1 = ~_xh;                                                                \
+    umul_ppmm (_xh, _xl, _q1, d);                                      \
+    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                           \
+    _xh -= (d);                                        /* xh = 0 or -1 */      \
+    (r) = _xl + ((d) & _xh);                                           \
+    (q) = _xh - _q1;                                                   \
+  } while (0)
+
+/* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
+   so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
+#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
+  do {                                                                 \
+    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                           \
+    mp_limb_t _xh, _xl;                                                        \
+    _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1)); \
+    _n10 = (nl) << (GMP_LIMB_BITS - (lgup));                           \
+    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                              \
+    _nadj = _n10 + (_nmask & (dnorm));                                 \
+    umul_ppmm (_xh, _xl, di, _n2 - _nmask);                            \
+    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                       \
+    _q1 = ~_xh;                                                                \
+    umul_ppmm (_xh, _xl, _q1, d);                                      \
+    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                           \
+    _xh -= (d);                                                                \
+    (r) = _xl + ((d) & _xh);                                           \
+    (q) = _xh - _q1;                                                   \
+  } while (0)
+
+/* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
+
+   We write things strangely below, to help gcc.  A more straightforward
+   version:
+
+   _r = (nl) - _qh * (d);
+   _t = _r + (d);
+   if (_r >= _ql)
+     {
+       _qh--;
+       _r = _t;
+     }
+
+   For one operation shorter critical path, one may want to use this form:
+
+   _p = _qh * (d)
+   _s = (nl) + (d);
+   _r = (nl) - _p;
+   _t = _s - _p;
+   if (_r >= _ql)
+     {
+       _qh--;
+       _r = _t;
+     }
+*/
+#define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)                                \
+  do {                                                                 \
+    mp_limb_t _qh, _ql, _r;                                            \
+    umul_ppmm (_qh, _ql, (nh), (di));                                  \
+    if (__builtin_constant_p (nl) && (nl) == 0)                                \
+      _qh += (nh) + 1;                                                 \
+    else                                                               \
+      add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                 \
+    _r = (nl) - _qh * (d);                                             \
+    if (_r > _ql)      /* both > and >= should be OK */                \
+      {                                                                        \
+       _r += (d);                                                      \
+       _qh--;                                                          \
+      }                                                                        \
+    if (UNLIKELY (_r >= (d)))                                          \
+      {                                                                        \
+       _r -= (d);                                                      \
+       _qh++;                                                          \
+      }                                                                        \
+    (r) = _r;                                                          \
+    (q) = _qh;                                                         \
+  } while (0)
+
+/* Compute r = nh*B mod d, where di is the inverse of d.  */
+#define udiv_rnd_preinv(r, nh, d, di)                                  \
+  do {                                                                 \
+    mp_limb_t _qh, _ql, _r;                                            \
+    umul_ppmm (_qh, _ql, (nh), (di));                                  \
+    _qh += (nh) + 1;                                                   \
+    _r = - _qh * (d);                                                  \
+    if (_r > _ql)                                                      \
+      _r += (d);                                                       \
+    (r) = _r;                                                          \
+  } while (0)
+
+/* Compute quotient the quotient and remainder for n / d. Requires d
+   >= B^2 / 2 and n < d B. di is the inverse
+
+     floor ((B^3 - 1) / (d0 + d1 B)) - B.
+
+   NOTE: Output variables are updated multiple times. Only some inputs
+   and outputs may overlap.
+*/
+#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)              \
+  do {                                                                 \
+    mp_limb_t _q0, _t1, _t0, _mask;                                    \
+    umul_ppmm ((q), _q0, (n2), (dinv));                                        \
+    add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));                       \
+                                                                       \
+    /* Compute the two most significant limbs of n - q'd */            \
+    (r1) = (n1) - (d1) * (q);                                          \
+    (r0) = (n0);                                                       \
+    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));                   \
+    umul_ppmm (_t1, _t0, (d0), (q));                                   \
+    sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);                     \
+    (q)++;                                                             \
+                                                                       \
+    /* Conditionally adjust q and the remainders */                    \
+    _mask = - (mp_limb_t) ((r1) >= _q0);                               \
+    (q) += _mask;                                                      \
+    add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));   \
+    if (UNLIKELY ((r1) >= (d1)))                                       \
+      {                                                                        \
+       if ((r1) > (d1) || (r0) >= (d0))                                \
+         {                                                             \
+           (q)++;                                                      \
+           sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));            \
+         }                                                             \
+      }                                                                        \
+  } while (0)
+
+#ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
+#define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
+__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
+#endif
+
+
+/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
+   plain mpn_divrem_1.  The default is yes, since the few CISC chips where
+   preinv is not good have defines saying so.  */
+#ifndef USE_PREINV_DIVREM_1
+#define USE_PREINV_DIVREM_1   1
+#endif
+
+#if USE_PREINV_DIVREM_1
+#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
+  mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
+#else
+#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
+  mpn_divrem_1 (qp, xsize, ap, size, d)
+#endif
+
+#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#endif
+
+/* This selection may seem backwards.  The reason mpn_mod_1 typically takes
+   over for larger sizes is that it uses the mod_1_1 function.  */
+#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)              \
+  (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)             \
+   ? mpn_preinv_mod_1 (src, size, divisor, inverse)                    \
+   : mpn_mod_1 (src, size, divisor))
+
+
+#ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
+#define   mpn_mod_34lsub1 __MPN(mod_34lsub1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+#endif
+
+
+/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
+   plain mpn_divrem_1.  Likewise BMOD_1_TO_MOD_1_THRESHOLD for
+   mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
+   modexact are faster at all sizes, so the defaults are 0.  Those CPUs
+   where this is not right have a tuned threshold.  */
+#ifndef DIVEXACT_1_THRESHOLD
+#define DIVEXACT_1_THRESHOLD  0
+#endif
+#ifndef BMOD_1_TO_MOD_1_THRESHOLD
+#define BMOD_1_TO_MOD_1_THRESHOLD  10
+#endif
+
+#ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
+#define mpn_divexact_1 __MPN(divexact_1)
+__GMP_DECLSPEC void    mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+#endif
+
+#define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
+  do {                                                                        \
+    if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
+      ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
+    else                                                                      \
+      {                                                                       \
+        ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
+        mpn_divexact_1 (dst, src, size, divisor);                             \
+      }                                                                       \
+  } while (0)
+
+#ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
+#define   mpn_modexact_1c_odd __MPN(modexact_1c_odd)
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+#endif
+
+#if HAVE_NATIVE_mpn_modexact_1_odd
+#define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+#else
+#define mpn_modexact_1_odd(src,size,divisor) \
+  mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
+#endif
+
+#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)                    \
+  (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD)                   \
+   ? mpn_modexact_1_odd (src, size, divisor)                           \
+   : mpn_mod_1 (src, size, divisor))
+
+/* binvert_limb() sets inv to the multiplicative inverse of n modulo
+   2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
+   n must be odd (otherwise such an inverse doesn't exist).
+
+   This is not to be confused with invert_limb(), which is completely
+   different.
+
+   The table lookup gives an inverse with the low 8 bits valid, and each
+   multiply step doubles the number of bits.  See Jebelean "An algorithm for
+   exact division" end of section 4 (reference in gmp.texi).
+
+   Possible enhancement: Could use UHWtype until the last step, if half-size
+   multiplies are faster (might help under _LONG_LONG_LIMB).
+
+   Alternative: As noted in Granlund and Montgomery "Division by Invariant
+   Integers using Multiplication" (reference in gmp.texi), n itself gives a
+   3-bit inverse immediately, and could be used instead of a table lookup.
+   A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
+   bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
+
+#define binvert_limb_table  __gmp_binvert_limb_table
+__GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
+
+#define binvert_limb(inv,n)                                            \
+  do {                                                                 \
+    mp_limb_t  __n = (n);                                              \
+    mp_limb_t  __inv;                                                  \
+    ASSERT ((__n & 1) == 1);                                           \
+                                                                       \
+    __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */               \
+    if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;  \
+    if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;  \
+    if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;  \
+                                                                       \
+    if (GMP_NUMB_BITS > 64)                                            \
+      {                                                                        \
+       int  __invbits = 64;                                            \
+       do {                                                            \
+         __inv = 2 * __inv - __inv * __inv * __n;                      \
+         __invbits *= 2;                                               \
+       } while (__invbits < GMP_NUMB_BITS);                            \
+      }                                                                        \
+                                                                       \
+    ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                       \
+    (inv) = __inv & GMP_NUMB_MASK;                                     \
+  } while (0)
+#define modlimb_invert binvert_limb  /* backward compatibility */
+
+/* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
+   Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
+   GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
+   we need to start from GMP_NUMB_MAX>>1. */
+#define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
+
+/* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
+   These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
+#define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
+#define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
+
+
+/* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
+
+   It's not clear whether this is the best way to do this calculation.
+   Anything congruent to -a would be fine for the one limb congruence
+   tests.  */
+
+#define NEG_MOD(r, a, d)                                               \
+  do {                                                                 \
+    ASSERT ((d) != 0);                                                 \
+    ASSERT_LIMB (a);                                                   \
+    ASSERT_LIMB (d);                                                   \
+                                                                       \
+    if ((a) <= (d))                                                    \
+      {                                                                        \
+        /* small a is reasonably likely */                             \
+        (r) = (d) - (a);                                               \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+        unsigned   __twos;                                             \
+        mp_limb_t  __dnorm;                                            \
+        count_leading_zeros (__twos, d);                               \
+        __twos -= GMP_NAIL_BITS;                                       \
+        __dnorm = (d) << __twos;                                       \
+        (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);            \
+      }                                                                        \
+                                                                       \
+    ASSERT_LIMB (r);                                                   \
+  } while (0)
+
+/* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
+#define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
+
+
+/* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
+   to 0 if there's an even number.  "n" should be an unsigned long and "p"
+   an int.  */
+
+#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
+#define ULONG_PARITY(p, n)                                             \
+  do {                                                                 \
+    int __p;                                                           \
+    __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));                   \
+    (p) = __p & 1;                                                     \
+  } while (0)
+#endif
+
+/* Cray intrinsic _popcnt. */
+#ifdef _CRAY
+#define ULONG_PARITY(p, n)      \
+  do {                          \
+    (p) = _popcnt (n) & 1;      \
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                 \
+    && ! defined (NO_ASM) && defined (__ia64)
+/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
+   to a 64 bit unsigned long long for popcnt */
+#define ULONG_PARITY(p, n)                                             \
+  do {                                                                 \
+    unsigned long long  __n = (unsigned long) (n);                     \
+    int  __p;                                                          \
+    __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));               \
+    (p) = __p & 1;                                                     \
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                 \
+    && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
+#if __GMP_GNUC_PREREQ (3,1)
+#define __GMP_qm "=Qm"
+#define __GMP_q "=Q"
+#else
+#define __GMP_qm "=qm"
+#define __GMP_q "=q"
+#endif
+#define ULONG_PARITY(p, n)                                             \
+  do {                                                                 \
+    char          __p;                                                 \
+    unsigned long  __n = (n);                                          \
+    __n ^= (__n >> 16);                                                        \
+    __asm__ ("xorb %h1, %b1\n\t"                                       \
+            "setpo %0"                                                 \
+        : __GMP_qm (__p), __GMP_q (__n)                                \
+        : "1" (__n));                                                  \
+    (p) = __p;                                                         \
+  } while (0)
+#endif
+
+#if ! defined (ULONG_PARITY)
+#define ULONG_PARITY(p, n)                                             \
+  do {                                                                 \
+    unsigned long  __n = (n);                                          \
+    int  __p = 0;                                                      \
+    do                                                                 \
+      {                                                                        \
+        __p ^= 0x96696996L >> (__n & 0x1F);                            \
+        __n >>= 5;                                                     \
+      }                                                                        \
+    while (__n != 0);                                                  \
+                                                                       \
+    (p) = __p & 1;                                                     \
+  } while (0)
+#endif
+
+
+/* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
+   version 3.1 at least) doesn't seem to know how to generate rlwimi for
+   anything other than bit-fields, so use "asm".  */
+#if defined (__GNUC__) && ! defined (NO_ASM)                    \
+  && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    mp_limb_t  __bswapl_src = (src);                                   \
+    mp_limb_t  __tmp1 = __bswapl_src >> 24;            /* low byte */  \
+    mp_limb_t  __tmp2 = __bswapl_src << 24;            /* high byte */ \
+    __asm__ ("rlwimi %0, %2, 24, 16, 23"               /* 2nd low */   \
+        : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));           \
+    __asm__ ("rlwimi %0, %2,  8,  8, 15"               /* 3nd high */  \
+        : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));           \
+    (dst) = __tmp1 | __tmp2;                           /* whole */     \
+  } while (0)
+#endif
+
+/* bswap is available on i486 and up and is fast.  A combination rorw $8 /
+   roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
+   kernel with xchgb instead of rorw), but this is not done here, because
+   i386 means generic x86 and mixing word and dword operations will cause
+   partial register stalls on P6 chips.  */
+#if defined (__GNUC__) && ! defined (NO_ASM)            \
+  && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
+  && GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    __asm__ ("bswap %0" : "=r" (dst) : "0" (src));                     \
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM)            \
+  && defined (__amd64__) && GMP_LIMB_BITS == 64
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));                    \
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                 \
+    && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));          \
+  } while (0)
+#endif
+
+/* As per glibc. */
+#if defined (__GNUC__) && ! defined (NO_ASM)                    \
+  && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    mp_limb_t  __bswapl_src = (src);                                   \
+    __asm__ ("ror%.w %#8, %0\n\t"                                      \
+            "swap   %0\n\t"                                            \
+            "ror%.w %#8, %0"                                           \
+            : "=d" (dst)                                               \
+            : "0" (__bswapl_src));                                     \
+  } while (0)
+#endif
+
+#if ! defined (BSWAP_LIMB)
+#if GMP_LIMB_BITS == 8
+#define BSWAP_LIMB(dst, src)            \
+  do { (dst) = (src); } while (0)
+#endif
+#if GMP_LIMB_BITS == 16
+#define BSWAP_LIMB(dst, src)                    \
+  do {                                          \
+    (dst) = ((src) << 8) + ((src) >> 8);        \
+  } while (0)
+#endif
+#if GMP_LIMB_BITS == 32
+#define BSWAP_LIMB(dst, src)    \
+  do {                          \
+    (dst) =                     \
+      ((src) << 24)             \
+      + (((src) & 0xFF00) << 8) \
+      + (((src) >> 8) & 0xFF00) \
+      + ((src) >> 24);          \
+  } while (0)
+#endif
+#if GMP_LIMB_BITS == 64
+#define BSWAP_LIMB(dst, src)            \
+  do {                                  \
+    (dst) =                             \
+      ((src) << 56)                     \
+      + (((src) & 0xFF00) << 40)        \
+      + (((src) & 0xFF0000) << 24)      \
+      + (((src) & 0xFF000000) << 8)     \
+      + (((src) >> 8) & 0xFF000000)     \
+      + (((src) >> 24) & 0xFF0000)      \
+      + (((src) >> 40) & 0xFF00)        \
+      + ((src) >> 56);                  \
+  } while (0)
+#endif
+#endif
+
+#if ! defined (BSWAP_LIMB)
+#define BSWAP_LIMB(dst, src)                            \
+  do {                                                  \
+    mp_limb_t  __bswapl_src = (src);                    \
+    mp_limb_t  __dst = 0;                               \
+    int        __i;                                     \
+    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \
+      {                                                 \
+        __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \
+        __bswapl_src >>= 8;                             \
+      }                                                 \
+    (dst) = __dst;                                      \
+  } while (0)
+#endif
+
+
+/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
+   those we know are fast.  */
+#if defined (__GNUC__) && ! defined (NO_ASM)                            \
+  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                        \
+  && (HAVE_HOST_CPU_powerpc604                                          \
+      || HAVE_HOST_CPU_powerpc604e                                      \
+      || HAVE_HOST_CPU_powerpc750                                       \
+      || HAVE_HOST_CPU_powerpc7400)
+#define BSWAP_LIMB_FETCH(limb, src)                                    \
+  do {                                                                 \
+    mp_srcptr  __blf_src = (src);                                      \
+    mp_limb_t  __limb;                                                 \
+    __asm__ ("lwbrx %0, 0, %1"                                         \
+            : "=r" (__limb)                                            \
+            : "r" (__blf_src),                                         \
+              "m" (*__blf_src));                                       \
+    (limb) = __limb;                                                   \
+  } while (0)
+#endif
+
+#if ! defined (BSWAP_LIMB_FETCH)
+#define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
+#endif
+
+
+/* On the same basis that lwbrx might be slow, restrict stwbrx to those we
+   know are fast.  FIXME: Is this necessary?  */
+#if defined (__GNUC__) && ! defined (NO_ASM)                            \
+  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                        \
+  && (HAVE_HOST_CPU_powerpc604                                          \
+      || HAVE_HOST_CPU_powerpc604e                                      \
+      || HAVE_HOST_CPU_powerpc750                                       \
+      || HAVE_HOST_CPU_powerpc7400)
+#define BSWAP_LIMB_STORE(dst, limb)                                    \
+  do {                                                                 \
+    mp_ptr     __dst = (dst);                                          \
+    mp_limb_t  __limb = (limb);                                                \
+    __asm__ ("stwbrx %1, 0, %2"                                                \
+            : "=m" (*__dst)                                            \
+            : "r" (__limb),                                            \
+              "r" (__dst));                                            \
+  } while (0)
+#endif
+
+#if ! defined (BSWAP_LIMB_STORE)
+#define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
+#endif
+
+
+/* Byte swap limbs from {src,size} and store at {dst,size}. */
+#define MPN_BSWAP(dst, src, size)                       \
+  do {                                                  \
+    mp_ptr     __dst = (dst);                           \
+    mp_srcptr  __src = (src);                           \
+    mp_size_t  __size = (size);                         \
+    mp_size_t  __i;                                     \
+    ASSERT ((size) >= 0);                               \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
+    CRAY_Pragma ("_CRI ivdep");                         \
+    for (__i = 0; __i < __size; __i++)                  \
+      {                                                 \
+        BSWAP_LIMB_FETCH (*__dst, __src);               \
+        __dst++;                                        \
+        __src++;                                        \
+      }                                                 \
+  } while (0)
+
+/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
+#define MPN_BSWAP_REVERSE(dst, src, size)               \
+  do {                                                  \
+    mp_ptr     __dst = (dst);                           \
+    mp_size_t  __size = (size);                         \
+    mp_srcptr  __src = (src) + __size - 1;              \
+    mp_size_t  __i;                                     \
+    ASSERT ((size) >= 0);                               \
+    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
+    CRAY_Pragma ("_CRI ivdep");                         \
+    for (__i = 0; __i < __size; __i++)                  \
+      {                                                 \
+        BSWAP_LIMB_FETCH (*__dst, __src);               \
+        __dst++;                                        \
+        __src--;                                        \
+      }                                                 \
+  } while (0)
+
+
+/* No processor claiming to be SPARC v9 compliant seems to
+   implement the POPC instruction.  Disable pattern for now.  */
+#if 0
+#if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    DItype __res;                                                      \
+    __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));             \
+  } while (0)
+#endif
+#endif
+
+#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));            \
+  } while (0)
+#endif
+
+/* Cray intrinsic. */
+#ifdef _CRAY
+#define popc_limb(result, input)        \
+  do {                                  \
+    (result) = _popcnt (input);         \
+  } while (0)
+#endif
+
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                 \
+    && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));          \
+  } while (0)
+#endif
+
+/* Cool population count of an mp_limb_t.
+   You have to figure out how this works, We won't tell you!
+
+   The constants could also be expressed as:
+     0x55... = [2^N / 3]     = [(2^N-1)/3]
+     0x33... = [2^N / 5]     = [(2^N-1)/5]
+     0x0f... = [2^N / 17]    = [(2^N-1)/17]
+     (N is GMP_LIMB_BITS, [] denotes truncation.) */
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 8
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    mp_limb_t  __x = (input);                                          \
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                               \
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);    \
+    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                       \
+    (result) = __x & 0xff;                                             \
+  } while (0)
+#endif
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 16
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    mp_limb_t  __x = (input);                                          \
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                               \
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);    \
+    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                       \
+    __x = ((__x >> 8) + __x);                                          \
+    (result) = __x & 0xff;                                             \
+  } while (0)
+#endif
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 32
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    mp_limb_t  __x = (input);                                          \
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                               \
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);    \
+    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                       \
+    __x = ((__x >> 8) + __x);                                          \
+    __x = ((__x >> 16) + __x);                                         \
+    (result) = __x & 0xff;                                             \
+  } while (0)
+#endif
+
+#if ! defined (popc_limb) && GMP_LIMB_BITS == 64
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    mp_limb_t  __x = (input);                                          \
+    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                               \
+    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);    \
+    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                       \
+    __x = ((__x >> 8) + __x);                                          \
+    __x = ((__x >> 16) + __x);                                         \
+    __x = ((__x >> 32) + __x);                                         \
+    (result) = __x & 0xff;                                             \
+  } while (0)
+#endif
+
+
+/* Define stuff for longlong.h.  */
+#if HAVE_ATTRIBUTE_MODE
+typedef unsigned int UQItype   __attribute__ ((mode (QI)));
+typedef                 int SItype     __attribute__ ((mode (SI)));
+typedef unsigned int USItype   __attribute__ ((mode (SI)));
+typedef                 int DItype     __attribute__ ((mode (DI)));
+typedef unsigned int UDItype   __attribute__ ((mode (DI)));
+#else
+typedef unsigned char UQItype;
+typedef                 long SItype;
+typedef unsigned long USItype;
+#if HAVE_LONG_LONG
+typedef        long long int DItype;
+typedef unsigned long long int UDItype;
+#else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
+typedef long int DItype;
+typedef unsigned long int UDItype;
+#endif
+#endif
+
+typedef mp_limb_t UWtype;
+typedef unsigned int UHWtype;
+#define W_TYPE_SIZE GMP_LIMB_BITS
+
+/* Define ieee_double_extract and _GMP_IEEE_FLOATS.
+
+   Bit field packing is "implementation defined" according to C99, which
+   leaves us at the compiler's mercy here.  For some systems packing is
+   defined in the ABI (eg. x86).  In any case so far it seems universal that
+   little endian systems pack from low to high, and big endian from high to
+   low within the given type.
+
+   Within the fields we rely on the integer endianness being the same as the
+   float endianness, this is true everywhere we know of and it'd be a fairly
+   strange system that did anything else.  */
+
+#if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t manh:20;
+      gmp_uint_least32_t exp:11;
+      gmp_uint_least32_t sig:1;
+      gmp_uint_least32_t manl:32;
+    } s;
+  double d;
+};
+#endif
+
+#if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t manl:32;
+      gmp_uint_least32_t manh:20;
+      gmp_uint_least32_t exp:11;
+      gmp_uint_least32_t sig:1;
+    } s;
+  double d;
+};
+#endif
+
+#if HAVE_DOUBLE_IEEE_BIG_ENDIAN
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t sig:1;
+      gmp_uint_least32_t exp:11;
+      gmp_uint_least32_t manh:20;
+      gmp_uint_least32_t manl:32;
+    } s;
+  double d;
+};
+#endif
+
+
+/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
+   that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
+#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
+/* Maximum number of limbs it will take to store any `double'.
+   We assume doubles have 53 mantissa bits.  */
+#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
+
+__GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
+
+#define mpn_get_d __gmpn_get_d
+__GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
+
+
+/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
+   a_inf if x is an infinity.  Both are considered unlikely values, for
+   branch prediction.  */
+
+#if _GMP_IEEE_FLOATS
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
+  do {                                          \
+    union ieee_double_extract  u;               \
+    u.d = (x);                                  \
+    if (UNLIKELY (u.s.exp == 0x7FF))            \
+      {                                         \
+        if (u.s.manl == 0 && u.s.manh == 0)     \
+          { a_inf; }                            \
+        else                                    \
+          { a_nan; }                            \
+      }                                         \
+  } while (0)
+#endif
+
+#if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
+/* no nans or infs in these formats */
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
+  do { } while (0)
+#endif
+
+#ifndef DOUBLE_NAN_INF_ACTION
+/* Unknown format, try something generic.
+   NaN should be "unordered", so x!=x.
+   Inf should be bigger than DBL_MAX.  */
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \
+  do {                                                          \
+    {                                                           \
+      if (UNLIKELY ((x) != (x)))                                \
+        { a_nan; }                                              \
+      else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \
+        { a_inf; }                                              \
+    }                                                           \
+  } while (0)
+#endif
+
+/* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
+   in the coprocessor, which means a bigger exponent range than normal, and
+   depending on the rounding mode, a bigger mantissa than normal.  (See
+   "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
+   "d" through memory to force any rounding and overflows to occur.
+
+   On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
+   registers, where there's no such extra precision and no need for the
+   FORCE_DOUBLE.  We don't bother to detect this since the present uses for
+   FORCE_DOUBLE are only in test programs and default generic C code.
+
+   Not quite sure that an "automatic volatile" will use memory, but it does
+   in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
+   apparently matching operands like "0" are only allowed on a register
+   output.  gcc 3.4 warns about this, though in fact it and past versions
+   seem to put the operand through memory as hoped.  */
+
+#if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
+     || defined (__amd64__))
+#define FORCE_DOUBLE(d) \
+  do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
+#else
+#define FORCE_DOUBLE(d)  do { } while (0)
+#endif
+
+
+__GMP_DECLSPEC extern int __gmp_junk;
+__GMP_DECLSPEC extern const int __gmp_0;
+__GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
+#define GMP_ERROR(code)   __gmp_exception (code)
+#define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
+#define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
+
+#if defined _LONG_LONG_LIMB
+#if __GMP_HAVE_TOKEN_PASTE
+#define CNST_LIMB(C) ((mp_limb_t) C##LL)
+#else
+#define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
+#endif
+#else /* not _LONG_LONG_LIMB */
+#if __GMP_HAVE_TOKEN_PASTE
+#define CNST_LIMB(C) ((mp_limb_t) C##L)
+#else
+#define CNST_LIMB(C) ((mp_limb_t) C/**/L)
+#endif
+#endif /* _LONG_LONG_LIMB */
+
+/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
+#if GMP_NUMB_BITS == 2
+#define PP 0x3                                 /* 3 */
+#define PP_FIRST_OMITTED 5
+#endif
+#if GMP_NUMB_BITS == 4
+#define PP 0xF                                 /* 3 x 5 */
+#define PP_FIRST_OMITTED 7
+#endif
+#if GMP_NUMB_BITS == 8
+#define PP 0x69                                        /* 3 x 5 x 7 */
+#define PP_FIRST_OMITTED 11
+#endif
+#if GMP_NUMB_BITS == 16
+#define PP 0x3AA7                              /* 3 x 5 x 7 x 11 x 13 */
+#define PP_FIRST_OMITTED 17
+#endif
+#if GMP_NUMB_BITS == 32
+#define PP 0xC0CFD797L                         /* 3 x 5 x 7 x 11 x ... x 29 */
+#define PP_INVERTED 0x53E5645CL
+#define PP_FIRST_OMITTED 31
+#endif
+#if GMP_NUMB_BITS == 64
+#define PP CNST_LIMB(0xE221F97C30E94E1D)       /* 3 x 5 x 7 x 11 x ... x 53 */
+#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
+#define PP_FIRST_OMITTED 59
+#endif
+#ifndef PP_FIRST_OMITTED
+#define PP_FIRST_OMITTED 3
+#endif
+
+
+
+/* BIT1 means a result value in bit 1 (second least significant bit), with a
+   zero bit representing +1 and a one bit representing -1.  Bits other than
+   bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
+   used to ensure the expressions are "int"s even if a and/or b might be
+   other types.
+
+   JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
+   and their speed is important.  Expressions are used rather than
+   conditionals to accumulate sign changes, which effectively means XORs
+   instead of conditional JUMPs. */
+
+/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
+
+/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_U0(a)   ((a) == 1)
+
+/* (a/0), with a given by low and size;
+   is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_LS0(alow,asize) \
+  (((asize) == 1 || (asize) == -1) && (alow) == 1)
+
+/* (a/0), with a an mpz_t;
+   fetch of low limb always valid, even if size is zero */
+#define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
+
+/* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
+#define JACOBI_0U(b)   ((b) == 1)
+
+/* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
+#define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
+
+/* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
+#define JACOBI_0LS(blow,bsize) \
+  (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
+
+/* Convert a bit1 to +1 or -1. */
+#define JACOBI_BIT1_TO_PN(result_bit1) \
+  (1 - ((int) (result_bit1) & 2))
+
+/* (2/b), with b unsigned and odd;
+   is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
+   hence obtained from (b>>1)^b */
+#define JACOBI_TWO_U_BIT1(b) \
+  ((int) (((b) >> 1) ^ (b)))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U_BIT1(twos, b) \
+  ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U(twos, b) \
+  (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
+
+/* (-1/b), with b odd (signed or unsigned);
+   is (-1)^((b-1)/2) */
+#define JACOBI_N1B_BIT1(b) \
+  ((int) (b))
+
+/* (a/b) effect due to sign of a: signed/unsigned, b odd;
+   is (-1/b) if a<0, or +1 if a>=0 */
+#define JACOBI_ASGN_SU_BIT1(a, b) \
+  ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
+
+/* (a/b) effect due to sign of b: signed/signed;
+   is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_SS_BIT1(a, b) \
+  ((((a)<0) & ((b)<0)) << 1)
+
+/* (a/b) effect due to sign of b: signed/mpz;
+   is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_SZ_BIT1(a, b) \
+  JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
+
+/* (a/b) effect due to sign of b: mpz/signed;
+   is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_ZS_BIT1(a, b) \
+  JACOBI_BSGN_SZ_BIT1 (b, a)
+
+/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
+   is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
+   both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
+   because this is used in a couple of places with only bit 1 of a or b
+   valid. */
+#define JACOBI_RECIP_UU_BIT1(a, b) \
+  ((int) ((a) & (b)))
+
+/* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
+   decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
+   updated for the new b_ptr.  result_bit1 is updated according to the
+   factors of 2 stripped, as per (a/2).  */
+#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \
+  do {                                                                  \
+    ASSERT ((b_size) >= 1);                                             \
+    ASSERT ((b_low) == (b_ptr)[0]);                                     \
+                                                                        \
+    while (UNLIKELY ((b_low) == 0))                                     \
+      {                                                                 \
+        (b_size)--;                                                     \
+        ASSERT ((b_size) >= 1);                                         \
+        (b_ptr)++;                                                      \
+        (b_low) = *(b_ptr);                                             \
+                                                                        \
+        ASSERT (((a) & 1) != 0);                                        \
+        if ((GMP_NUMB_BITS % 2) == 1)                                   \
+          (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
+      }                                                                 \
+  } while (0)
+
+/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
+   modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
+   unsigned.  modexact_1_odd effectively calculates -a mod b, and
+   result_bit1 is adjusted for the factor of -1.
+
+   The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
+   sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
+   factor to introduce into result_bit1, so for that case use mpn_mod_1
+   unconditionally.
+
+   FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
+   for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
+   or not skip a divide step, or something. */
+
+#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
+  do {                                                                     \
+    mp_srcptr  __a_ptr  = (a_ptr);                                         \
+    mp_size_t  __a_size = (a_size);                                        \
+    mp_limb_t  __b      = (b);                                             \
+                                                                           \
+    ASSERT (__a_size >= 1);                                                \
+    ASSERT (__b & 1);                                                      \
+                                                                           \
+    if ((GMP_NUMB_BITS % 2) != 0                                           \
+        || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))          \
+      {                                                                    \
+        (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
+      }                                                                    \
+    else                                                                   \
+      {                                                                    \
+        (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
+        (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
+      }                                                                    \
+  } while (0)
+
+/* Matrix multiplication */
+#define   mpn_matrix22_mul __MPN(matrix22_mul)
+__GMP_DECLSPEC void      mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
+__GMP_DECLSPEC void      mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+
+#ifndef MATRIX22_STRASSEN_THRESHOLD
+#define MATRIX22_STRASSEN_THRESHOLD 30
+#endif
+
+/* HGCD definitions */
+
+/* Extract one numb, shifting count bits left
+    ________  ________
+   |___xh___||___xl___|
+         |____r____|
+   >count <
+
+   The count includes any nail bits, so it should work fine if count
+   is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
+   xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
+
+   FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
+   those calls where the count high bits of xh may be non-zero.
+*/
+
+#define MPN_EXTRACT_NUMB(count, xh, xl)                                \
+  ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |     \
+   ((xl) >> (GMP_LIMB_BITS - (count))))
+
+
+/* The matrix non-negative M = (u, u'; v,v') keeps track of the
+   reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
+   than a, b. The determinant must always be one, so that M has an
+   inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
+   bits. */
+struct hgcd_matrix1
+{
+  mp_limb_t u[2][2];
+};
+
+#define mpn_hgcd2 __MPN (hgcd2)
+__GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
+
+#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
+
+#define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
+
+struct hgcd_matrix
+{
+  mp_size_t alloc;             /* for sanity checking only */
+  mp_size_t n;
+  mp_ptr p[2][2];
+};
+
+#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
+
+#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
+__GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
+
+#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
+
+#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+
+#define mpn_hgcd_itch __MPN (hgcd_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
+
+#define mpn_hgcd __MPN (hgcd)
+__GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+
+#define MPN_HGCD_LEHMER_ITCH(n) (n)
+
+#define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+
+/* Needs storage for the quotient */
+#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
+
+#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
+__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+
+#define MPN_GCD_LEHMER_N_ITCH(n) (n)
+
+#define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
+__GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+
+#define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
+__GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
+
+#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
+
+#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
+__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+
+/* 4*(an + 1) + 4*(bn + 1) + an */
+#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
+
+#ifndef HGCD_THRESHOLD
+#define HGCD_THRESHOLD 400
+#endif
+
+#ifndef GCD_DC_THRESHOLD
+#define GCD_DC_THRESHOLD 1000
+#endif
+
+#ifndef GCDEXT_DC_THRESHOLD
+#define GCDEXT_DC_THRESHOLD 600
+#endif
+
+/* Definitions for mpn_set_str and mpn_get_str */
+struct powers
+{
+  mp_ptr p;                    /* actual power value */
+  mp_size_t n;                 /* # of limbs at p */
+  mp_size_t shift;             /* weight of lowest limb, in limb base B */
+  size_t digits_in_base;       /* number of corresponding digits */
+  int base;
+};
+typedef struct powers powers_t;
+#define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
+#define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
+#define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
+#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
+
+#define   mpn_dc_set_str __MPN(dc_set_str)
+__GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
+#define   mpn_bc_set_str __MPN(bc_set_str)
+__GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
+#define   mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
+__GMP_DECLSPEC void      mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
+
+
+/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
+   limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
+   hence giving back the user's size in bits rounded up.  Notice that
+   converting prec->bits->prec gives an unchanged value.  */
+#define __GMPF_BITS_TO_PREC(n)                                         \
+  ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
+#define __GMPF_PREC_TO_BITS(n) \
+  ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
+
+__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
+
+
+/* Set n to the number of significant digits an mpf of the given _mp_prec
+   field, in the given base.  This is a rounded up value, designed to ensure
+   there's enough digits to reproduce all the guaranteed part of the value.
+
+   There are prec many limbs, but the high might be only "1" so forget it
+   and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
+   further +1 is because the limbs usually won't fall on digit boundaries.
+
+   FIXME: If base is a power of 2 and the bits per digit divides
+   GMP_LIMB_BITS then the +2 is unnecessary.  This happens always for
+   base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
+
+#define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
+  do {                                                                  \
+    ASSERT (base >= 2 && base < numberof (mp_bases));                   \
+    (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \
+                        * mp_bases[(base)].chars_per_bit_exactly);      \
+  } while (0)
+
+
+/* Decimal point string, from the current C locale.  Needs <langinfo.h> for
+   nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
+   DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
+   their respective #if HAVE_FOO_H.
+
+   GLIBC recommends nl_langinfo because getting only one facet can be
+   faster, apparently. */
+
+/* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
+#if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
+#endif
+/* RADIXCHAR is deprecated, still in unix98 or some such. */
+#if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
+#endif
+/* localeconv is slower since it returns all locale stuff */
+#if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
+#endif
+#if ! defined (GMP_DECIMAL_POINT)
+#define GMP_DECIMAL_POINT  (".")
+#endif
+
+
+#define DOPRNT_CONV_FIXED        1
+#define DOPRNT_CONV_SCIENTIFIC   2
+#define DOPRNT_CONV_GENERAL      3
+
+#define DOPRNT_JUSTIFY_NONE      0
+#define DOPRNT_JUSTIFY_LEFT      1
+#define DOPRNT_JUSTIFY_RIGHT     2
+#define DOPRNT_JUSTIFY_INTERNAL  3
+
+#define DOPRNT_SHOWBASE_YES      1
+#define DOPRNT_SHOWBASE_NO       2
+#define DOPRNT_SHOWBASE_NONZERO  3
+
+struct doprnt_params_t {
+  int         base;          /* negative for upper case */
+  int         conv;          /* choices above */
+  const char  *expfmt;       /* exponent format */
+  int         exptimes4;     /* exponent multiply by 4 */
+  char        fill;          /* character */
+  int         justify;       /* choices above */
+  int         prec;          /* prec field, or -1 for all digits */
+  int         showbase;      /* choices above */
+  int         showpoint;     /* if radix point always shown */
+  int         showtrailing;  /* if trailing zeros wanted */
+  char        sign;          /* '+', ' ', or '\0' */
+  int         width;         /* width field */
+};
+
+#if _GMP_H_HAVE_VA_LIST
+
+__GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
+__GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
+__GMP_DECLSPEC typedef int (*doprnt_reps_t)   __GMP_PROTO ((void *, int, int));
+__GMP_DECLSPEC typedef int (*doprnt_final_t)  __GMP_PROTO ((void *));
+
+struct doprnt_funs_t {
+  doprnt_format_t  format;
+  doprnt_memory_t  memory;
+  doprnt_reps_t    reps;
+  doprnt_final_t   final;   /* NULL if not required */
+};
+
+extern const struct doprnt_funs_t  __gmp_fprintf_funs;
+extern const struct doprnt_funs_t  __gmp_sprintf_funs;
+extern const struct doprnt_funs_t  __gmp_snprintf_funs;
+extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
+extern const struct doprnt_funs_t  __gmp_ostream_funs;
+
+/* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
+   "size" of these have been written.  "alloc > size" is maintained, so
+   there's room to store a '\0' at the end.  "result" is where the
+   application wants the final block pointer.  */
+struct gmp_asprintf_t {
+  char    **result;
+  char    *buf;
+  size_t  size;
+  size_t  alloc;
+};
+
+#define GMP_ASPRINTF_T_INIT(d, output)                          \
+  do {                                                          \
+    (d).result = (output);                                      \
+    (d).alloc = 256;                                            \
+    (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
+    (d).size = 0;                                               \
+  } while (0)
+
+/* If a realloc is necessary, use twice the size actually required, so as to
+   avoid repeated small reallocs.  */
+#define GMP_ASPRINTF_T_NEED(d, n)                                       \
+  do {                                                                  \
+    size_t  alloc, newsize, newalloc;                                   \
+    ASSERT ((d)->alloc >= (d)->size + 1);                               \
+                                                                        \
+    alloc = (d)->alloc;                                                 \
+    newsize = (d)->size + (n);                                          \
+    if (alloc <= newsize)                                               \
+      {                                                                 \
+        newalloc = 2*newsize;                                           \
+        (d)->alloc = newalloc;                                          \
+        (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
+                                               alloc, newalloc, char);  \
+      }                                                                 \
+  } while (0)
+
+__GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
+__GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
+__GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
+
+/* buf is where to write the next output, and size is how much space is left
+   there.  If the application passed size==0 then that's what we'll have
+   here, and nothing at all should be written.  */
+struct gmp_snprintf_t {
+  char    *buf;
+  size_t  size;
+};
+
+/* Add the bytes printed by the call to the total retval, or bail out on an
+   error.  */
+#define DOPRNT_ACCUMULATE(call) \
+  do {                          \
+    int  __ret;                 \
+    __ret = call;               \
+    if (__ret == -1)            \
+      goto error;               \
+    retval += __ret;            \
+  } while (0)
+#define DOPRNT_ACCUMULATE_FUN(fun, params)      \
+  do {                                          \
+    ASSERT ((fun) != NULL);                     \
+    DOPRNT_ACCUMULATE ((*(fun)) params);        \
+  } while (0)
+
+#define DOPRNT_FORMAT(fmt, ap)                          \
+  DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
+#define DOPRNT_MEMORY(ptr, len)                                 \
+  DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
+#define DOPRNT_REPS(c, n)                               \
+  DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
+
+#define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
+
+#define DOPRNT_REPS_MAYBE(c, n) \
+  do {                          \
+    if ((n) != 0)               \
+      DOPRNT_REPS (c, n);       \
+  } while (0)
+#define DOPRNT_MEMORY_MAYBE(ptr, len)   \
+  do {                                  \
+    if ((len) != 0)                     \
+      DOPRNT_MEMORY (ptr, len);         \
+  } while (0)
+
+__GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
+__GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
+
+#define __gmp_doprnt_mpf __gmp_doprnt_mpf2
+__GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
+
+__GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
+#endif /* _GMP_H_HAVE_VA_LIST */
+
+
+typedef int (*gmp_doscan_scan_t)  __GMP_PROTO ((void *, const char *, ...));
+typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
+typedef int (*gmp_doscan_get_t)   __GMP_PROTO ((void *));
+typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
+
+struct gmp_doscan_funs_t {
+  gmp_doscan_scan_t   scan;
+  gmp_doscan_step_t   step;
+  gmp_doscan_get_t    get;
+  gmp_doscan_unget_t  unget;
+};
+extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
+extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
+
+#if _GMP_H_HAVE_VA_LIST
+__GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list));
+#endif
+
+
+/* For testing and debugging.  */
+#define MPZ_CHECK_FORMAT(z)                                    \
+  do {                                                          \
+    ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);  \
+    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                      \
+    ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
+  } while (0)
+
+#define MPQ_CHECK_FORMAT(q)                             \
+  do {                                                  \
+    MPZ_CHECK_FORMAT (mpq_numref (q));                  \
+    MPZ_CHECK_FORMAT (mpq_denref (q));                  \
+    ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
+                                                        \
+    if (SIZ(mpq_numref(q)) == 0)                        \
+      {                                                 \
+        /* should have zero as 0/1 */                   \
+        ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
+                       && PTR(mpq_denref(q))[0] == 1);  \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        /* should have no common factors */             \
+        mpz_t  g;                                       \
+        mpz_init (g);                                   \
+        mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
+        ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
+        mpz_clear (g);                                  \
+      }                                                 \
+  } while (0)
+
+#define MPF_CHECK_FORMAT(f)                             \
+  do {                                                  \
+    ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
+    ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
+    if (SIZ(f) == 0)                                    \
+      ASSERT_ALWAYS (EXP(f) == 0);                      \
+    if (SIZ(f) != 0)                                    \
+      ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
+  } while (0)
+
+
+#define MPZ_PROVOKE_REALLOC(z)                                 \
+  do { ALLOC(z) = ABSIZ(z); } while (0)
+
+
+/* Enhancement: The "mod" and "gcd_1" functions below could have
+   __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
+   function pointers, only actual functions.  It probably doesn't make much
+   difference to the gmp code, since hopefully we arrange calls so there's
+   no great need for the compiler to move things around.  */
+
+#if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
+/* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
+   in mpn/x86/x86-defs.m4.  Be sure to update that when changing here.  */
+struct cpuvec_t {
+  DECL_add_n           ((*add_n));
+  DECL_addmul_1        ((*addmul_1));
+  DECL_copyd           ((*copyd));
+  DECL_copyi           ((*copyi));
+  DECL_divexact_1      ((*divexact_1));
+  DECL_divexact_by3c   ((*divexact_by3c));
+  DECL_divrem_1        ((*divrem_1));
+  DECL_gcd_1           ((*gcd_1));
+  DECL_lshift          ((*lshift));
+  DECL_mod_1           ((*mod_1));
+  DECL_mod_34lsub1     ((*mod_34lsub1));
+  DECL_modexact_1c_odd ((*modexact_1c_odd));
+  DECL_mul_1           ((*mul_1));
+  DECL_mul_basecase    ((*mul_basecase));
+  DECL_preinv_divrem_1 ((*preinv_divrem_1));
+  DECL_preinv_mod_1    ((*preinv_mod_1));
+  DECL_rshift          ((*rshift));
+  DECL_sqr_basecase    ((*sqr_basecase));
+  DECL_sub_n           ((*sub_n));
+  DECL_submul_1        ((*submul_1));
+  int                  initialized;
+  mp_size_t            mul_toom22_threshold;
+  mp_size_t            mul_toom33_threshold;
+  mp_size_t            sqr_toom2_threshold;
+  mp_size_t            sqr_toom3_threshold;
+};
+__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
+#endif /* x86 fat binary */
+
+__GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void));
+
+/* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
+   if that hasn't yet been done (to establish the right values).  */
+#define CPUVEC_THRESHOLD(field)                                               \
+  ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
+   __gmpn_cpuvec.field)
+
+
+#if HAVE_NATIVE_mpn_add_nc
+#define mpn_add_nc __MPN(add_nc)
+__GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#else
+static inline
+mp_limb_t
+mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
+{
+  mp_limb_t co;
+  co = mpn_add_n (rp, up, vp, n);
+  co += mpn_add_1 (rp, rp, n, ci);
+  return co;
+}
+#endif
+
+#if HAVE_NATIVE_mpn_sub_nc
+#define mpn_sub_nc __MPN(sub_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#else
+static inline mp_limb_t
+mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
+{
+  mp_limb_t co;
+  co = mpn_sub_n (rp, up, vp, n);
+  co += mpn_sub_1 (rp, rp, n, ci);
+  return co;
+}
+#endif
+
+static inline int
+mpn_zero_p (mp_srcptr ap, mp_size_t n)
+{
+  mp_size_t i;
+  for (i = n - 1; i >= 0; i--)
+    {
+      if (ap[i] != 0)
+       return 0;
+    }
+  return 1;
+}
+
+#if TUNE_PROGRAM_BUILD
+/* Some extras wanted when recompiling some .c files for use by the tune
+   program.  Not part of a normal build.
+
+   It's necessary to keep these thresholds as #defines (just to an
+   identically named variable), since various defaults are established based
+   on #ifdef in the .c files.  For some this is not so (the defaults are
+   instead established above), but all are done this way for consistency. */
+
+#undef MUL_TOOM22_THRESHOLD
+#define MUL_TOOM22_THRESHOLD           mul_toom22_threshold
+extern mp_size_t                       mul_toom22_threshold;
+
+#undef MUL_TOOM33_THRESHOLD
+#define MUL_TOOM33_THRESHOLD           mul_toom33_threshold
+extern mp_size_t                       mul_toom33_threshold;
+
+#undef MUL_TOOM44_THRESHOLD
+#define MUL_TOOM44_THRESHOLD           mul_toom44_threshold
+extern mp_size_t                       mul_toom44_threshold;
+
+#undef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD           mul_toom6h_threshold
+extern mp_size_t                       mul_toom6h_threshold;
+
+#undef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD           mul_toom8h_threshold
+extern mp_size_t                       mul_toom8h_threshold;
+
+#undef MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
+extern mp_size_t                       mul_toom32_to_toom43_threshold;
+
+#undef MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
+extern mp_size_t                       mul_toom32_to_toom53_threshold;
+
+#undef MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
+extern mp_size_t                       mul_toom42_to_toom53_threshold;
+
+#undef MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
+extern mp_size_t                       mul_toom42_to_toom63_threshold;
+
+#undef MUL_FFT_THRESHOLD
+#define MUL_FFT_THRESHOLD              mul_fft_threshold
+extern mp_size_t                       mul_fft_threshold;
+
+#undef MUL_FFT_MODF_THRESHOLD
+#define MUL_FFT_MODF_THRESHOLD         mul_fft_modf_threshold
+extern mp_size_t                       mul_fft_modf_threshold;
+
+#undef MUL_FFT_TABLE
+#define MUL_FFT_TABLE                  { 0 }
+
+#undef MUL_FFT_TABLE3
+#define MUL_FFT_TABLE3                 { {0,0} }
+
+/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
+   remain as zero (always use it). */
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+#undef SQR_BASECASE_THRESHOLD
+#define SQR_BASECASE_THRESHOLD         sqr_basecase_threshold
+extern mp_size_t                       sqr_basecase_threshold;
+#endif
+
+#if TUNE_PROGRAM_BUILD_SQR
+#undef SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD            SQR_TOOM2_MAX_GENERIC
+#else
+#undef SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD            sqr_toom2_threshold
+extern mp_size_t                       sqr_toom2_threshold;
+#endif
+
+#undef SQR_TOOM3_THRESHOLD
+#define SQR_TOOM3_THRESHOLD            sqr_toom3_threshold
+extern mp_size_t                       sqr_toom3_threshold;
+
+#undef SQR_TOOM4_THRESHOLD
+#define SQR_TOOM4_THRESHOLD            sqr_toom4_threshold
+extern mp_size_t                       sqr_toom4_threshold;
+
+#undef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD            sqr_toom6_threshold
+extern mp_size_t                       sqr_toom6_threshold;
+
+#undef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD            sqr_toom8_threshold
+extern mp_size_t                       sqr_toom8_threshold;
+
+#undef  SQR_FFT_THRESHOLD
+#define SQR_FFT_THRESHOLD              sqr_fft_threshold
+extern mp_size_t                       sqr_fft_threshold;
+
+#undef  SQR_FFT_MODF_THRESHOLD
+#define SQR_FFT_MODF_THRESHOLD         sqr_fft_modf_threshold
+extern mp_size_t                       sqr_fft_modf_threshold;
+
+#undef SQR_FFT_TABLE
+#define SQR_FFT_TABLE                  { 0 }
+
+#undef SQR_FFT_TABLE3
+#define SQR_FFT_TABLE3                 { {0,0} }
+
+#undef MULLO_BASECASE_THRESHOLD
+#define MULLO_BASECASE_THRESHOLD       mullo_basecase_threshold
+extern mp_size_t                       mullo_basecase_threshold;
+
+#undef MULLO_DC_THRESHOLD
+#define MULLO_DC_THRESHOLD             mullo_dc_threshold
+extern mp_size_t                       mullo_dc_threshold;
+
+#undef MULLO_MUL_N_THRESHOLD
+#define MULLO_MUL_N_THRESHOLD          mullo_mul_n_threshold
+extern mp_size_t                       mullo_mul_n_threshold;
+
+#undef DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD            dc_div_qr_threshold
+extern mp_size_t                       dc_div_qr_threshold;
+
+#undef DC_DIVAPPR_Q_THRESHOLD
+#define DC_DIVAPPR_Q_THRESHOLD         dc_divappr_q_threshold
+extern mp_size_t                       dc_divappr_q_threshold;
+
+#undef DC_BDIV_Q_THRESHOLD
+#define DC_BDIV_Q_THRESHOLD            dc_bdiv_q_threshold
+extern mp_size_t                       dc_bdiv_q_threshold;
+
+#undef DC_BDIV_QR_THRESHOLD
+#define DC_BDIV_QR_THRESHOLD           dc_bdiv_qr_threshold
+extern mp_size_t                       dc_bdiv_qr_threshold;
+
+#undef MU_DIV_QR_THRESHOLD
+#define MU_DIV_QR_THRESHOLD            mu_div_qr_threshold
+extern mp_size_t                       mu_div_qr_threshold;
+
+#undef MU_DIVAPPR_Q_THRESHOLD
+#define MU_DIVAPPR_Q_THRESHOLD         mu_divappr_q_threshold
+extern mp_size_t                       mu_divappr_q_threshold;
+
+#undef MUPI_DIV_QR_THRESHOLD
+#define MUPI_DIV_QR_THRESHOLD          mupi_div_qr_threshold
+extern mp_size_t                       mupi_div_qr_threshold;
+
+#undef MU_BDIV_QR_THRESHOLD
+#define MU_BDIV_QR_THRESHOLD           mu_bdiv_qr_threshold
+extern mp_size_t                       mu_bdiv_qr_threshold;
+
+#undef MU_BDIV_Q_THRESHOLD
+#define MU_BDIV_Q_THRESHOLD            mu_bdiv_q_threshold
+extern mp_size_t                       mu_bdiv_q_threshold;
+
+#undef INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD      inv_mulmod_bnm1_threshold
+extern mp_size_t                       inv_mulmod_bnm1_threshold;
+
+#undef INV_NEWTON_THRESHOLD
+#define INV_NEWTON_THRESHOLD           inv_newton_threshold
+extern mp_size_t                       inv_newton_threshold;
+
+#undef INV_APPR_THRESHOLD
+#define INV_APPR_THRESHOLD             inv_appr_threshold
+extern mp_size_t                       inv_appr_threshold;
+
+#undef BINV_NEWTON_THRESHOLD
+#define BINV_NEWTON_THRESHOLD          binv_newton_threshold
+extern mp_size_t                       binv_newton_threshold;
+
+#undef REDC_1_TO_REDC_2_THRESHOLD
+#define REDC_1_TO_REDC_2_THRESHOLD     redc_1_to_redc_2_threshold
+extern mp_size_t                       redc_1_to_redc_2_threshold;
+
+#undef REDC_2_TO_REDC_N_THRESHOLD
+#define REDC_2_TO_REDC_N_THRESHOLD     redc_2_to_redc_n_threshold
+extern mp_size_t                       redc_2_to_redc_n_threshold;
+
+#undef REDC_1_TO_REDC_N_THRESHOLD
+#define REDC_1_TO_REDC_N_THRESHOLD     redc_1_to_redc_n_threshold
+extern mp_size_t                       redc_1_to_redc_n_threshold;
+
+#undef MATRIX22_STRASSEN_THRESHOLD
+#define MATRIX22_STRASSEN_THRESHOLD    matrix22_strassen_threshold
+extern mp_size_t                       matrix22_strassen_threshold;
+
+#undef HGCD_THRESHOLD
+#define HGCD_THRESHOLD                 hgcd_threshold
+extern mp_size_t                       hgcd_threshold;
+
+#undef GCD_DC_THRESHOLD
+#define GCD_DC_THRESHOLD               gcd_dc_threshold
+extern mp_size_t                       gcd_dc_threshold;
+
+#undef  GCDEXT_DC_THRESHOLD
+#define GCDEXT_DC_THRESHOLD            gcdext_dc_threshold
+extern mp_size_t                       gcdext_dc_threshold;
+
+#undef  DIVREM_1_NORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD                divrem_1_norm_threshold
+extern mp_size_t                       divrem_1_norm_threshold;
+
+#undef  DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_UNNORM_THRESHOLD      divrem_1_unnorm_threshold
+extern mp_size_t                       divrem_1_unnorm_threshold;
+
+#undef MOD_1_NORM_THRESHOLD
+#define MOD_1_NORM_THRESHOLD           mod_1_norm_threshold
+extern mp_size_t                       mod_1_norm_threshold;
+
+#undef MOD_1_UNNORM_THRESHOLD
+#define MOD_1_UNNORM_THRESHOLD         mod_1_unnorm_threshold
+extern mp_size_t                       mod_1_unnorm_threshold;
+
+#undef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD    mod_1n_to_mod_1_1_threshold
+extern mp_size_t                       mod_1n_to_mod_1_1_threshold;
+
+#undef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD    mod_1u_to_mod_1_1_threshold
+extern mp_size_t                       mod_1u_to_mod_1_1_threshold;
+
+#undef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD   mod_1_1_to_mod_1_2_threshold
+extern mp_size_t                       mod_1_1_to_mod_1_2_threshold;
+
+#undef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD   mod_1_2_to_mod_1_4_threshold
+extern mp_size_t                       mod_1_2_to_mod_1_4_threshold;
+
+#undef PREINV_MOD_1_TO_MOD_1_THRESHOLD
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD        preinv_mod_1_to_mod_1_threshold
+extern mp_size_t                       preinv_mod_1_to_mod_1_threshold;
+
+#if ! UDIV_PREINV_ALWAYS
+#undef DIVREM_2_THRESHOLD
+#define DIVREM_2_THRESHOLD             divrem_2_threshold
+extern mp_size_t                       divrem_2_threshold;
+#endif
+
+#undef MULMOD_BNM1_THRESHOLD
+#define MULMOD_BNM1_THRESHOLD          mulmod_bnm1_threshold
+extern mp_size_t                       mulmod_bnm1_threshold;
+
+#undef SQRMOD_BNM1_THRESHOLD
+#define SQRMOD_BNM1_THRESHOLD          sqrmod_bnm1_threshold
+extern mp_size_t                       sqrmod_bnm1_threshold;
+
+#undef GET_STR_DC_THRESHOLD
+#define GET_STR_DC_THRESHOLD           get_str_dc_threshold
+extern mp_size_t                       get_str_dc_threshold;
+
+#undef  GET_STR_PRECOMPUTE_THRESHOLD
+#define GET_STR_PRECOMPUTE_THRESHOLD   get_str_precompute_threshold
+extern mp_size_t                       get_str_precompute_threshold;
+
+#undef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD           set_str_dc_threshold
+extern mp_size_t                       set_str_dc_threshold;
+
+#undef  SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD   set_str_precompute_threshold
+extern mp_size_t                       set_str_precompute_threshold;
+
+#undef  FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS
+extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
+#define FFT_TABLE3_SIZE 2000   /* generous space for tuning */
+extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
+
+/* Sizes the tune program tests up to, used in a couple of recompilations. */
+#undef MUL_TOOM22_THRESHOLD_LIMIT
+#undef MUL_TOOM33_THRESHOLD_LIMIT
+#undef MULLO_BASECASE_THRESHOLD_LIMIT
+#undef SQR_TOOM3_THRESHOLD_LIMIT
+#define SQR_TOOM2_MAX_GENERIC           200
+#define MUL_TOOM22_THRESHOLD_LIMIT      700
+#define MUL_TOOM33_THRESHOLD_LIMIT      700
+#define SQR_TOOM3_THRESHOLD_LIMIT       400
+#define MUL_TOOM44_THRESHOLD_LIMIT     1000
+#define SQR_TOOM4_THRESHOLD_LIMIT      1000
+#define MUL_TOOM6H_THRESHOLD_LIMIT     1100
+#define SQR_TOOM6_THRESHOLD_LIMIT      1100
+#define MUL_TOOM8H_THRESHOLD_LIMIT     1200
+#define SQR_TOOM8_THRESHOLD_LIMIT      1200
+#define MULLO_BASECASE_THRESHOLD_LIMIT  200
+#define GET_STR_THRESHOLD_LIMIT         150
+
+#endif /* TUNE_PROGRAM_BUILD */
+
+#if defined (__cplusplus)
+}
+#endif
+
+/* FIXME: Make these itch functions less conservative.  Also consider making
+   them dependent on just 'an', and compute the allocation directly from 'an'
+   instead of via n.  */
+
+/* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
+   k is ths smallest k such that
+     ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
+   which implies that
+     k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
+       = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
+*/
+#define mpn_toom22_mul_itch(an, bn) \
+  (2 * ((an) + GMP_NUMB_BITS))
+#define mpn_toom2_sqr_itch(an) \
+  (2 * ((an) + GMP_NUMB_BITS))
+
+/* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
+   We use 3an + C, so that we can use a smaller constant.
+ */
+#define mpn_toom33_mul_itch(an, bn) \
+  (3 * (an) + GMP_NUMB_BITS)
+#define mpn_toom3_sqr_itch(an) \
+  (3 * (an) + GMP_NUMB_BITS)
+
+/* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
+   We use 3an + C, so that we can use a smaller constant.
+ */
+#define mpn_toom44_mul_itch(an, bn) \
+  (3 * (an) + GMP_NUMB_BITS)
+#define mpn_toom4_sqr_itch(an) \
+  (3 * (an) + GMP_NUMB_BITS)
+
+#define mpn_toom6_sqr_itch(n)                                          \
+( ((n) - SQR_TOOM6_THRESHOLD)*2 +                                      \
+   MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,                        \
+       mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) )
+
+#define mpn_toom6_mul_n_itch(n)                                                \
+( ((n) - MUL_TOOM6H_THRESHOLD)*2 +                                     \
+   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,                       \
+       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 10 + 1;
+  return mpn_toom6_mul_n_itch (estimatedN * 6);
+}
+
+#define mpn_toom8_sqr_itch(n)                                          \
+( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +                      \
+   MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,                \
+       mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) )
+
+#define mpn_toom8_mul_n_itch(n)                                                \
+( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) +                     \
+   MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,               \
+       mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 14 + 1;
+  return mpn_toom8_mul_n_itch (estimatedN * 8);
+}
+
+static inline mp_size_t
+mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
+  mp_size_t itch = 2 * n + 1;
+
+  return itch;
+}
+
+static inline mp_size_t
+mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
+  return 6 * n + 3;
+}
+
+static inline mp_size_t
+mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
+
+  return 6*n + 4;
+}
+
+static inline mp_size_t
+mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
+  return 6*n + 4;
+}
+
+static inline mp_size_t
+mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
+  return 10 * n + 10;
+}
+
+static inline mp_size_t
+mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+  return 10 * n + 10;
+}
+
+static inline mp_size_t
+mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
+  return 9 * n + 3;
+}
+
+#if 0
+#define mpn_fft_mul mpn_mul_fft_full
+#else
+#define mpn_fft_mul mpn_nussbaumer_mul
+#endif
+
+#ifdef __cplusplus
+
+/* A little helper for a null-terminated __gmp_allocate_func string.
+   The destructor ensures it's freed even if an exception is thrown.
+   The len field is needed by the destructor, and can be used by anyone else
+   to avoid a second strlen pass over the data.
+
+   Since our input is a C string, using strlen is correct.  Perhaps it'd be
+   more C++-ish style to use std::char_traits<char>::length, but char_traits
+   isn't available in gcc 2.95.4.  */
+
+class gmp_allocated_string {
+ public:
+  char *str;
+  size_t len;
+  gmp_allocated_string(char *arg)
+  {
+    str = arg;
+    len = std::strlen (str);
+  }
+  ~gmp_allocated_string()
+  {
+    (*__gmp_free_func) (str, len+1);
+  }
+};
+
+std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
+int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
+void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
+void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
+std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
+extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
+
+#endif /* __cplusplus */
+
+#endif /* __GMP_IMPL_H__ */
diff --git a/gmpxx.h b/gmpxx.h

new file mode 100644 (file)

index 0000000..7490312
--- /dev/null
+++ b/gmpxx.h
@@ -0,0 +1,3388 @@
+/* gmpxx.h -- C++ class wrapper for GMP types.  -*- C++ -*-
+
+Copyright 2001, 2002, 2003, 2006, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* the C++ compiler must implement the following features:
+   - member templates
+   - partial specialization of templates
+   - namespace support
+   for g++, this means version 2.91 or higher
+   for other compilers, I don't know */
+#ifdef __GNUC__
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91)
+#error gmpxx.h requires g++ version 2.91 (egcs 1.1.2) or higher
+#endif
+#endif
+
+#ifndef __GMP_PLUSPLUS__
+#define __GMP_PLUSPLUS__
+
+#include <iosfwd>
+
+#include <cstring>  /* for strlen */
+#include <string>
+#include <stdexcept>
+#include <cfloat>
+#include <gmp.h>
+
+
+/**************** Function objects ****************/
+/* Any evaluation of a __gmp_expr ends up calling one of these functions
+   all intermediate functions being inline, the evaluation should optimize
+   to a direct call to the relevant function, thus yielding no overhead
+   over the C interface. */
+
+struct __gmp_unary_plus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_set(z, w); }
+  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_set(q, r); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_set(f, g); }
+};
+
+struct __gmp_unary_minus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_neg(z, w); }
+  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_neg(q, r); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_neg(f, g); }
+};
+
+struct __gmp_unary_com
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_com(z, w); }
+};
+
+struct __gmp_binary_plus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_add(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_add_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { mpz_add_ui(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (l >= 0)
+      mpz_add_ui(z, w, l);
+    else
+      mpz_sub_ui(z, w, -l);
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (l >= 0)
+      mpz_add_ui(z, w, l);
+    else
+      mpz_sub_ui(z, w, -l);
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_add(z, w, temp);
+    mpz_clear(temp);
+  }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_add(z, temp, w);
+    mpz_clear(temp);
+  }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_add(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  { mpq_set(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { mpq_set(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    mpq_set(q, r);
+    if (l >= 0)
+      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+    else
+      mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  {
+    mpq_set(q, r);
+    if (l >= 0)
+      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+    else
+      mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_add(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_add(q, temp, r);
+    mpq_clear(temp);
+  }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
+  { mpq_set(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+  static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
+  { mpq_set(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_add(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_add_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_add_ui(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_add_ui(f, g, l);
+    else
+      mpf_sub_ui(f, g, -l);
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    if (l >= 0)
+      mpf_add_ui(f, g, l);
+    else
+      mpf_sub_ui(f, g, -l);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_add(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_add(f, temp, g);
+    mpf_clear(temp);
+  }
+};
+
+struct __gmp_binary_minus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_sub(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_sub_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { mpz_ui_sub(z, l, w); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (l >= 0)
+      mpz_sub_ui(z, w, l);
+    else
+      mpz_add_ui(z, w, -l);
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (l >= 0)
+      mpz_ui_sub(z, l, w);
+    else
+      {
+        mpz_add_ui(z, w, -l);
+        mpz_neg(z, z);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_sub(z, w, temp);
+    mpz_clear(temp);
+  }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_sub(z, temp, w);
+    mpz_clear(temp);
+  }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_sub(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  { mpq_set(q, r); mpz_submul_ui(mpq_numref(q), mpq_denref(q), l); }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { mpq_neg(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    mpq_set(q, r);
+    if (l >= 0)
+      mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);
+    else
+      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), -l);
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  {
+    mpq_neg(q, r);
+    if (l >= 0)
+      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+    else
+      mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_sub(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_sub(q, temp, r);
+    mpq_clear(temp);
+  }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
+  { mpq_set(q, r); mpz_submul(mpq_numref(q), mpq_denref(q), z); }
+  static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
+  { mpq_neg(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_sub(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_sub_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_ui_sub(f, l, g); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_sub_ui(f, g, l);
+    else
+      mpf_add_ui(f, g, -l);
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    if (l >= 0)
+      mpf_sub_ui(f, g, l);
+    else
+      mpf_add_ui(f, g, -l);
+    mpf_neg(f, f);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_sub(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_sub(f, temp, g);
+    mpf_clear(temp);
+  }
+};
+
+struct __gmp_binary_multiplies
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_mul(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_mul_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { mpz_mul_ui(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  { mpz_mul_si (z, w, l); }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { mpz_mul_si (z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_mul(z, w, temp);
+    mpz_clear(temp);
+  }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_mul(z, temp, w);
+    mpz_clear(temp);
+  }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_mul(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_ui(temp, l, 1);
+    mpq_mul(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_ui(temp, l, 1);
+    mpq_mul(q, temp, r);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_si(temp, l, 1);
+    mpq_mul(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_si(temp, l, 1);
+    mpq_mul(q, temp, r);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_mul(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_mul(q, temp, r);
+    mpq_clear(temp);
+  }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_mul(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_mul_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_mul_ui(f, g, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_mul_ui(f, g, l);
+    else
+      {
+       mpf_mul_ui(f, g, -l);
+       mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    if (l >= 0)
+      mpf_mul_ui(f, g, l);
+    else
+      {
+       mpf_mul_ui(f, g, -l);
+       mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_mul(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_mul(f, temp, g);
+    mpf_clear(temp);
+  }
+};
+
+struct __gmp_binary_divides
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_tdiv_q(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_tdiv_q_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {
+    if (mpz_sgn(w) >= 0)
+      {
+       if (mpz_fits_ulong_p(w))
+         mpz_set_ui(z, l / mpz_get_ui(w));
+       else
+         mpz_set_ui(z, 0);
+      }
+    else
+      {
+       mpz_neg(z, w);
+       if (mpz_fits_ulong_p(z))
+         {
+           mpz_set_ui(z, l / mpz_get_ui(z));
+           mpz_neg(z, z);
+         }
+       else
+         mpz_set_ui(z, 0);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    if (l >= 0)
+      mpz_tdiv_q_ui(z, w, l);
+    else
+      {
+       mpz_tdiv_q_ui(z, w, -l);
+       mpz_neg(z, z);
+      }
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (mpz_fits_slong_p(w))
+      mpz_set_si(z, l / mpz_get_si(w));
+    else
+      {
+        /* if w is bigger than a long then the quotient must be zero, unless
+           l==LONG_MIN and w==-LONG_MIN in which case the quotient is -1 */
+        mpz_set_si (z, (mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? -1 : 0));
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_tdiv_q(z, w, temp);
+    mpz_clear(temp);
+  }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_tdiv_q(z, temp, w);
+    mpz_clear(temp);
+  }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
+  { mpq_div(q, r, s); }
+
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_ui(temp, l, 1);
+    mpq_div(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_ui(temp, l, 1);
+    mpq_div(q, temp, r);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_si(temp, l, 1);
+    mpq_div(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_si(temp, l, 1);
+    mpq_div(q, temp, r);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, double d)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_div(q, r, temp);
+    mpq_clear(temp);
+  }
+  static void eval(mpq_ptr q, double d, mpq_srcptr r)
+  {
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    mpq_div(q, temp, r);
+    mpq_clear(temp);
+  }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  { mpf_div(f, g, h); }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_div_ui(f, g, l); }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  { mpf_ui_div(f, l, g); }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    if (l >= 0)
+      mpf_div_ui(f, g, l);
+    else
+      {
+       mpf_div_ui(f, g, -l);
+       mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    if (l >= 0)
+      mpf_ui_div(f, l, g);
+    else
+      {
+       mpf_ui_div(f, -l, g);
+       mpf_neg(f, f);
+      }
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_div(f, g, temp);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, 8*sizeof(double));
+    mpf_set_d(temp, d);
+    mpf_div(f, temp, g);
+    mpf_clear(temp);
+  }
+};
+
+struct __gmp_binary_modulus
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_tdiv_r(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_tdiv_r_ui(z, w, l); }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {
+    if (mpz_sgn(w) >= 0)
+      {
+       if (mpz_fits_ulong_p(w))
+         mpz_set_ui(z, l % mpz_get_ui(w));
+       else
+         mpz_set_ui(z, l);
+      }
+    else
+      {
+       mpz_neg(z, w);
+       if (mpz_fits_ulong_p(z))
+         mpz_set_ui(z, l % mpz_get_ui(z));
+       else
+         mpz_set_ui(z, l);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {
+    mpz_tdiv_r_ui (z, w, (l >= 0 ? l : -l));
+  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {
+    if (mpz_fits_slong_p(w))
+      mpz_set_si(z, l % mpz_get_si(w));
+    else
+      {
+        /* if w is bigger than a long then the remainder is l unchanged,
+           unless l==LONG_MIN and w==-LONG_MIN in which case it's 0 */
+        mpz_set_si (z, mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? 0 : l);
+      }
+  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_tdiv_r(z, w, temp);
+    mpz_clear(temp);
+  }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {
+    mpz_t temp;
+    mpz_init_set_d(temp, d);
+    mpz_tdiv_r(z, temp, w);
+    mpz_clear(temp);
+  }
+};
+
+// Max allocations for plain types when converted to mpz_t
+#define __GMP_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS)
+#define __GMP_ULI_LIMBS (1 + (8 * sizeof (long) - 1) / GMP_NUMB_BITS)
+
+#define __GMPXX_TMP_UI                                                 \
+  mpz_t temp;                                                          \
+  mp_limb_t limbs[__GMP_ULI_LIMBS];                                    \
+  temp->_mp_d = limbs;                                                 \
+  temp->_mp_alloc = __GMP_ULI_LIMBS;                                   \
+  mpz_set_ui (temp, l)
+#define __GMPXX_TMP_SI                                                 \
+  mpz_t temp;                                                          \
+  mp_limb_t limbs[__GMP_ULI_LIMBS];                                    \
+  temp->_mp_d = limbs;                                                 \
+  temp->_mp_alloc = __GMP_ULI_LIMBS;                                   \
+  mpz_set_si (temp, l)
+#define __GMPXX_TMP_D                                                  \
+  mpz_t temp;                                                          \
+  mp_limb_t limbs[__GMP_DBL_LIMBS];                                    \
+  temp->_mp_d = limbs;                                                 \
+  temp->_mp_alloc = __GMP_DBL_LIMBS;                                   \
+  mpz_set_d (temp, d)
+
+struct __gmp_binary_and
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_and(z, w, v); }
+
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {  __GMPXX_TMP_UI;   mpz_and (z, w, temp);  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {  __GMPXX_TMP_UI;   mpz_and (z, w, temp);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {  __GMPXX_TMP_SI;   mpz_and (z, w, temp);  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {  __GMPXX_TMP_SI;   mpz_and (z, w, temp);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMP_D;    mpz_and (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {  __GMPXX_TMP_D;    mpz_and (z, w, temp); }
+};
+
+struct __gmp_binary_ior
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_ior(z, w, v); }
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {  __GMPXX_TMP_UI;   mpz_ior (z, w, temp);  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {  __GMPXX_TMP_UI;   mpz_ior (z, w, temp);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {  __GMPXX_TMP_SI;   mpz_ior (z, w, temp);  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {  __GMPXX_TMP_SI;   mpz_ior (z, w, temp);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMP_D;    mpz_ior (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {  __GMPXX_TMP_D;    mpz_ior (z, w, temp); }
+};
+
+struct __gmp_binary_xor
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
+  { mpz_xor(z, w, v); }
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  {  __GMPXX_TMP_UI;   mpz_xor (z, w, temp);  }
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  {  __GMPXX_TMP_UI;   mpz_xor (z, w, temp);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
+  {  __GMPXX_TMP_SI;   mpz_xor (z, w, temp);  }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  {  __GMPXX_TMP_SI;   mpz_xor (z, w, temp);  }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMP_D;    mpz_xor (z, w, temp); }
+  static void eval(mpz_ptr z, double d, mpz_srcptr w)
+  {  __GMPXX_TMP_D;    mpz_xor (z, w, temp); }
+};
+
+struct __gmp_binary_lshift
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_mul_2exp(z, w, l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  { mpq_mul_2exp(q, r, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_mul_2exp(f, g, l); }
+};
+
+struct __gmp_binary_rshift
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
+  { mpz_fdiv_q_2exp(z, w, l); }
+  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
+  { mpq_div_2exp(q, r, l); }
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  { mpf_div_2exp(f, g, l); }
+};
+
+struct __gmp_binary_equal
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) == 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) == 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) == 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) == 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) == 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) == 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) == 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r)
+  { return mpq_equal(q, r) != 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) == 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) == 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) == 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) == 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_equal(q, temp) != 0);
+    mpq_clear(temp);
+    return b;
+  }
+  static bool eval(double d, mpq_srcptr q)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_equal(temp, q) != 0);
+    mpq_clear(temp);
+    return b;
+  }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) == 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) == 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) == 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) == 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) == 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) == 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) == 0; }
+};
+
+struct __gmp_binary_not_equal
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) != 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) != 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) != 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) != 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) != 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) != 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) != 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r)
+  { return mpq_equal(q, r) == 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) != 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) != 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) != 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) != 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_equal(q, temp) == 0);
+    mpq_clear(temp);
+    return b;
+  }
+  static bool eval(double d, mpq_srcptr q)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_equal(temp, q) == 0);
+    mpq_clear(temp);
+    return b;
+  }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) != 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) != 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) != 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) != 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) != 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) != 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) != 0; }
+};
+
+struct __gmp_binary_less
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) < 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) > 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) < 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) > 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) < 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) > 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) < 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) > 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) < 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) > 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(q, temp) < 0);
+    mpq_clear(temp);
+    return b;
+  }
+  static bool eval(double d, mpq_srcptr q)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(temp, q) < 0);
+    mpq_clear(temp);
+    return b;
+  }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) < 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) > 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) < 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) > 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) < 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) > 0; }
+};
+
+struct __gmp_binary_less_equal
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) <= 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) <= 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) >= 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) <= 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) >= 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) <= 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) >= 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) <= 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) <= 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) >= 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) <= 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) >= 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(q, temp) <= 0);
+    mpq_clear(temp);
+    return b;
+  }
+  static bool eval(double d, mpq_srcptr q)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(temp, q) <= 0);
+    mpq_clear(temp);
+    return b;
+  }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) <= 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) <= 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) >= 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) <= 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) >= 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) <= 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) >= 0; }
+};
+
+struct __gmp_binary_greater
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) > 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) > 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) < 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) > 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) < 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) > 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) < 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) > 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) > 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) < 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) > 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) < 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(q, temp) > 0);
+    mpq_clear(temp);
+    return b;
+  }
+  static bool eval(double d, mpq_srcptr q)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(temp, q) > 0);
+    mpq_clear(temp);
+    return b;
+  }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) > 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) > 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) < 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) > 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) < 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) > 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) < 0; }
+};
+
+struct __gmp_binary_greater_equal
+{
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) >= 0; }
+
+  static bool eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l) >= 0; }
+  static bool eval(unsigned long int l, mpz_srcptr z)
+  { return mpz_cmp_ui(z, l) <= 0; }
+  static bool eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l) >= 0; }
+  static bool eval(signed long int l, mpz_srcptr z)
+  { return mpz_cmp_si(z, l) <= 0; }
+  static bool eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d) >= 0; }
+  static bool eval(double d, mpz_srcptr z)
+  { return mpz_cmp_d(z, d) <= 0; }
+
+  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) >= 0; }
+
+  static bool eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1) >= 0; }
+  static bool eval(unsigned long int l, mpq_srcptr q)
+  { return mpq_cmp_ui(q, l, 1) <= 0; }
+  static bool eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1) >= 0; }
+  static bool eval(signed long int l, mpq_srcptr q)
+  { return mpq_cmp_si(q, l, 1) <= 0; }
+  static bool eval(mpq_srcptr q, double d)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(q, temp) >= 0);
+    mpq_clear(temp);
+    return b;
+  }
+  static bool eval(double d, mpq_srcptr q)
+  {
+    bool b;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    b = (mpq_cmp(temp, q) >= 0);
+    mpq_clear(temp);
+    return b;
+  }
+
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) >= 0; }
+
+  static bool eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l) >= 0; }
+  static bool eval(unsigned long int l, mpf_srcptr f)
+  { return mpf_cmp_ui(f, l) <= 0; }
+  static bool eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l) >= 0; }
+  static bool eval(signed long int l, mpf_srcptr f)
+  { return mpf_cmp_si(f, l) <= 0; }
+  static bool eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d) >= 0; }
+  static bool eval(double d, mpf_srcptr f)
+  { return mpf_cmp_d(f, d) <= 0; }
+};
+
+struct __gmp_unary_increment
+{
+  static void eval(mpz_ptr z) { mpz_add_ui(z, z, 1); }
+  static void eval(mpq_ptr q)
+  { mpz_add(mpq_numref(q), mpq_numref(q), mpq_denref(q)); }
+  static void eval(mpf_ptr f) { mpf_add_ui(f, f, 1); }
+};
+
+struct __gmp_unary_decrement
+{
+  static void eval(mpz_ptr z) { mpz_sub_ui(z, z, 1); }
+  static void eval(mpq_ptr q)
+  { mpz_sub(mpq_numref(q), mpq_numref(q), mpq_denref(q)); }
+  static void eval(mpf_ptr f) { mpf_sub_ui(f, f, 1); }
+};
+
+struct __gmp_abs_function
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_abs(z, w); }
+  static void eval(mpq_ptr q, mpq_srcptr r) { mpq_abs(q, r); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_abs(f, g); }
+};
+
+struct __gmp_trunc_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_trunc(f, g); }
+};
+
+struct __gmp_floor_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_floor(f, g); }
+};
+
+struct __gmp_ceil_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_ceil(f, g); }
+};
+
+struct __gmp_sqrt_function
+{
+  static void eval(mpz_ptr z, mpz_srcptr w) { mpz_sqrt(z, w); }
+  static void eval(mpf_ptr f, mpf_srcptr g) { mpf_sqrt(f, g); }
+};
+
+struct __gmp_hypot_function
+{
+  static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_mul(f, h, h);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+
+  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_ui(f, l);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_ui(f, l);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_si(f, l);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_si(f, l);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, double d)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_d(f, d);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+  static void eval(mpf_ptr f, double d, mpf_srcptr g)
+  {
+    mpf_t temp;
+    mpf_init2(temp, mpf_get_prec(f));
+    mpf_mul(temp, g, g);
+    mpf_set_d(f, d);
+    mpf_mul(f, f, f);
+    mpf_add(f, f, temp);
+    mpf_sqrt(f, f);
+    mpf_clear(temp);
+  }
+};
+
+struct __gmp_sgn_function
+{
+  static int eval(mpz_srcptr z) { return mpz_sgn(z); }
+  static int eval(mpq_srcptr q) { return mpq_sgn(q); }
+  static int eval(mpf_srcptr f) { return mpf_sgn(f); }
+};
+
+struct __gmp_cmp_function
+{
+  static int eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w); }
+
+  static int eval(mpz_srcptr z, unsigned long int l)
+  { return mpz_cmp_ui(z, l); }
+  static int eval(unsigned long int l, mpz_srcptr z)
+  { return -mpz_cmp_ui(z, l); }
+  static int eval(mpz_srcptr z, signed long int l)
+  { return mpz_cmp_si(z, l); }
+  static int eval(signed long int l, mpz_srcptr z)
+  { return -mpz_cmp_si(z, l); }
+  static int eval(mpz_srcptr z, double d)
+  { return mpz_cmp_d(z, d); }
+  static int eval(double d, mpz_srcptr z)
+  { return -mpz_cmp_d(z, d); }
+
+  static int eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r); }
+
+  static int eval(mpq_srcptr q, unsigned long int l)
+  { return mpq_cmp_ui(q, l, 1); }
+  static int eval(unsigned long int l, mpq_srcptr q)
+  { return -mpq_cmp_ui(q, l, 1); }
+  static int eval(mpq_srcptr q, signed long int l)
+  { return mpq_cmp_si(q, l, 1); }
+  static int eval(signed long int l, mpq_srcptr q)
+  { return -mpq_cmp_si(q, l, 1); }
+  static int eval(mpq_srcptr q, double d)
+  {
+    int i;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    i = mpq_cmp(q, temp);
+    mpq_clear(temp);
+    return i;
+  }
+  static int eval(double d, mpq_srcptr q)
+  {
+    int i;
+    mpq_t temp;
+    mpq_init(temp);
+    mpq_set_d(temp, d);
+    i = mpq_cmp(temp, q);
+    mpq_clear(temp);
+    return i;
+  }
+
+  static int eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g); }
+
+  static int eval(mpf_srcptr f, unsigned long int l)
+  { return mpf_cmp_ui(f, l); }
+  static int eval(unsigned long int l, mpf_srcptr f)
+  { return -mpf_cmp_ui(f, l); }
+  static int eval(mpf_srcptr f, signed long int l)
+  { return mpf_cmp_si(f, l); }
+  static int eval(signed long int l, mpf_srcptr f)
+  { return -mpf_cmp_si(f, l); }
+  static int eval(mpf_srcptr f, double d)
+  { return mpf_cmp_d(f, d); }
+  static int eval(double d, mpf_srcptr f)
+  { return -mpf_cmp_d(f, d); }
+};
+
+struct __gmp_rand_function
+{
+  static void eval(mpz_ptr z, gmp_randstate_t s, unsigned long int l)
+  { mpz_urandomb(z, s, l); }
+  static void eval(mpz_ptr z, gmp_randstate_t s, mpz_srcptr w)
+  { mpz_urandomm(z, s, w); }
+  static void eval(mpf_ptr f, gmp_randstate_t s, mp_bitcnt_t prec)
+  { mpf_urandomb(f, s, prec); }
+};
+
+
+/**************** Auxiliary classes ****************/
+
+/* this is much the same as gmp_allocated_string in gmp-impl.h
+   since gmp-impl.h is not publicly available, I redefine it here
+   I use a different name to avoid possible clashes */
+
+extern "C" {
+  typedef void (*__gmp_freefunc_t) (void *, size_t);
+}
+struct __gmp_alloc_cstring
+{
+  char *str;
+  __gmp_alloc_cstring(char *s) { str = s; }
+  ~__gmp_alloc_cstring()
+  {
+    __gmp_freefunc_t freefunc;
+    mp_get_memory_functions (NULL, NULL, &freefunc);
+    (*freefunc) (str, std::strlen(str)+1);
+  }
+};
+
+
+// general expression template class
+template <class T, class U>
+class __gmp_expr;
+
+
+// templates for resolving expression types
+template <class T>
+struct __gmp_resolve_ref
+{
+  typedef T ref_type;
+};
+
+template <class T, class U>
+struct __gmp_resolve_ref<__gmp_expr<T, U> >
+{
+  typedef const __gmp_expr<T, U> & ref_type;
+};
+
+
+template <class T, class U = T>
+struct __gmp_resolve_expr;
+
+template <>
+struct __gmp_resolve_expr<mpz_t>
+{
+  typedef mpz_t value_type;
+  typedef mpz_ptr ptr_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpq_t>
+{
+  typedef mpq_t value_type;
+  typedef mpq_ptr ptr_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpf_t>
+{
+  typedef mpf_t value_type;
+  typedef mpf_ptr ptr_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpz_t, mpq_t>
+{
+  typedef mpq_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpq_t, mpz_t>
+{
+  typedef mpq_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpz_t, mpf_t>
+{
+  typedef mpf_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpf_t, mpz_t>
+{
+  typedef mpf_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpq_t, mpf_t>
+{
+  typedef mpf_t value_type;
+};
+
+template <>
+struct __gmp_resolve_expr<mpf_t, mpq_t>
+{
+  typedef mpf_t value_type;
+};
+
+
+
+template <class T, class U, class V>
+struct __gmp_resolve_temp
+{
+  typedef __gmp_expr<T, T> temp_type;
+};
+
+template <class T>
+struct __gmp_resolve_temp<T, T, T>
+{
+  typedef const __gmp_expr<T, T> & temp_type;
+};
+
+
+// classes for evaluating unary and binary expressions
+template <class T, class Op>
+struct __gmp_unary_expr
+{
+  const T &val;
+
+  __gmp_unary_expr(const T &v) : val(v) { }
+private:
+  __gmp_unary_expr();
+};
+
+template <class T, class U, class Op>
+struct __gmp_binary_expr
+{
+  typename __gmp_resolve_ref<T>::ref_type val1;
+  typename __gmp_resolve_ref<U>::ref_type val2;
+
+  __gmp_binary_expr(const T &v1, const U &v2) : val1(v1), val2(v2) { }
+private:
+  __gmp_binary_expr();
+};
+
+
+// functions for evaluating expressions
+template <class T, class U>
+void __gmp_set_expr(mpz_ptr, const __gmp_expr<T, U> &);
+template <class T, class U>
+void __gmp_set_expr(mpq_ptr, const __gmp_expr<T, U> &);
+template <class T, class U>
+void __gmp_set_expr(mpf_ptr, const __gmp_expr<T, U> &);
+
+
+/**************** Macros for in-class declarations ****************/
+/* This is just repetitive code that is easier to maintain if it's written
+   only once */
+
+#define __GMPP_DECLARE_COMPOUND_OPERATOR(fun)                         \
+  template <class T, class U>                                         \
+  __gmp_expr<value_type, value_type> & fun(const __gmp_expr<T, U> &);
+
+#define __GMPN_DECLARE_COMPOUND_OPERATOR(fun) \
+  __gmp_expr & fun(signed char);              \
+  __gmp_expr & fun(unsigned char);            \
+  __gmp_expr & fun(signed int);               \
+  __gmp_expr & fun(unsigned int);             \
+  __gmp_expr & fun(signed short int);         \
+  __gmp_expr & fun(unsigned short int);       \
+  __gmp_expr & fun(signed long int);          \
+  __gmp_expr & fun(unsigned long int);        \
+  __gmp_expr & fun(float);                    \
+  __gmp_expr & fun(double);                   \
+  __gmp_expr & fun(long double);
+
+#define __GMP_DECLARE_COMPOUND_OPERATOR(fun) \
+__GMPP_DECLARE_COMPOUND_OPERATOR(fun)        \
+__GMPN_DECLARE_COMPOUND_OPERATOR(fun)
+
+#define __GMP_DECLARE_COMPOUND_OPERATOR_UI(fun) \
+  __gmp_expr & fun(unsigned long int);
+
+#define __GMP_DECLARE_INCREMENT_OPERATOR(fun) \
+  inline __gmp_expr & fun();                  \
+  inline __gmp_expr fun(int);
+
+
+/**************** mpz_class -- wrapper for mpz_t ****************/
+
+template <>
+class __gmp_expr<mpz_t, mpz_t>
+{
+private:
+  typedef mpz_t value_type;
+  value_type mp;
+public:
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+
+  // constructors and destructor
+  __gmp_expr() { mpz_init(mp); }
+
+  __gmp_expr(const __gmp_expr &z) { mpz_init_set(mp, z.mp); }
+  template <class T, class U>
+  __gmp_expr(const __gmp_expr<T, U> &expr)
+  { mpz_init(mp); __gmp_set_expr(mp, expr); }
+
+  __gmp_expr(signed char c) { mpz_init_set_si(mp, c); }
+  __gmp_expr(unsigned char c) { mpz_init_set_ui(mp, c); }
+
+  __gmp_expr(signed int i) { mpz_init_set_si(mp, i); }
+  __gmp_expr(unsigned int i) { mpz_init_set_ui(mp, i); }
+
+  __gmp_expr(signed short int s) { mpz_init_set_si(mp, s); }
+  __gmp_expr(unsigned short int s) { mpz_init_set_ui(mp, s); }
+
+  __gmp_expr(signed long int l) { mpz_init_set_si(mp, l); }
+  __gmp_expr(unsigned long int l) { mpz_init_set_ui(mp, l); }
+
+  __gmp_expr(float f) { mpz_init_set_d(mp, f); }
+  __gmp_expr(double d) { mpz_init_set_d(mp, d); }
+  // __gmp_expr(long double ld) { mpz_init_set_d(mp, ld); }
+
+  explicit __gmp_expr(const char *s)
+  {
+    if (mpz_init_set_str (mp, s, 0) != 0)
+      {
+        mpz_clear (mp);
+        throw std::invalid_argument ("mpz_set_str");
+      }
+  }
+  __gmp_expr(const char *s, int base)
+  {
+    if (mpz_init_set_str (mp, s, base) != 0)
+      {
+        mpz_clear (mp);
+        throw std::invalid_argument ("mpz_set_str");
+      }
+  }
+  explicit __gmp_expr(const std::string &s)
+  {
+    if (mpz_init_set_str (mp, s.c_str(), 0) != 0)
+      {
+        mpz_clear (mp);
+        throw std::invalid_argument ("mpz_set_str");
+      }
+  }
+  __gmp_expr(const std::string &s, int base)
+  {
+    if (mpz_init_set_str(mp, s.c_str(), base) != 0)
+      {
+        mpz_clear (mp);
+        throw std::invalid_argument ("mpz_set_str");
+      }
+  }
+
+  explicit __gmp_expr(mpz_srcptr z) { mpz_init_set(mp, z); }
+
+  ~__gmp_expr() { mpz_clear(mp); }
+
+  // assignment operators
+  __gmp_expr & operator=(const __gmp_expr &z)
+  { mpz_set(mp, z.mp); return *this; }
+  template <class T, class U>
+  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
+  { __gmp_set_expr(mp, expr); return *this; }
+
+  __gmp_expr & operator=(signed char c) { mpz_set_si(mp, c); return *this; }
+  __gmp_expr & operator=(unsigned char c) { mpz_set_ui(mp, c); return *this; }
+
+  __gmp_expr & operator=(signed int i) { mpz_set_si(mp, i); return *this; }
+  __gmp_expr & operator=(unsigned int i) { mpz_set_ui(mp, i); return *this; }
+
+  __gmp_expr & operator=(signed short int s)
+  { mpz_set_si(mp, s); return *this; }
+  __gmp_expr & operator=(unsigned short int s)
+  { mpz_set_ui(mp, s); return *this; }
+
+  __gmp_expr & operator=(signed long int l)
+  { mpz_set_si(mp, l); return *this; }
+  __gmp_expr & operator=(unsigned long int l)
+  { mpz_set_ui(mp, l); return *this; }
+
+  __gmp_expr & operator=(float f) { mpz_set_d(mp, f); return *this; }
+  __gmp_expr & operator=(double d) { mpz_set_d(mp, d); return *this; }
+  // __gmp_expr & operator=(long double ld)
+  // { mpz_set_ld(mp, ld); return *this; }
+
+  __gmp_expr & operator=(const char *s)
+  {
+    if (mpz_set_str (mp, s, 0) != 0)
+      throw std::invalid_argument ("mpz_set_str");
+    return *this;
+  }
+  __gmp_expr & operator=(const std::string &s)
+  {
+    if (mpz_set_str(mp, s.c_str(), 0) != 0)
+      throw std::invalid_argument ("mpz_set_str");
+    return *this;
+  }
+
+  // string input/output functions
+  int set_str(const char *s, int base)
+  { return mpz_set_str(mp, s, base); }
+  int set_str(const std::string &s, int base)
+  { return mpz_set_str(mp, s.c_str(), base); }
+  std::string get_str(int base = 10) const
+  {
+    __gmp_alloc_cstring temp(mpz_get_str(0, base, mp));
+    return std::string(temp.str);
+  }
+
+  // conversion functions
+  mpz_srcptr __get_mp() const { return mp; }
+  mpz_ptr __get_mp() { return mp; }
+  mpz_srcptr get_mpz_t() const { return mp; }
+  mpz_ptr get_mpz_t() { return mp; }
+
+  signed long int get_si() const { return mpz_get_si(mp); }
+  unsigned long int get_ui() const { return mpz_get_ui(mp); }
+  double get_d() const { return mpz_get_d(mp); }
+
+  // bool fits_schar_p() const { return mpz_fits_schar_p(mp); }
+  // bool fits_uchar_p() const { return mpz_fits_uchar_p(mp); }
+  bool fits_sint_p() const { return mpz_fits_sint_p(mp); }
+  bool fits_uint_p() const { return mpz_fits_uint_p(mp); }
+  bool fits_sshort_p() const { return mpz_fits_sshort_p(mp); }
+  bool fits_ushort_p() const { return mpz_fits_ushort_p(mp); }
+  bool fits_slong_p() const { return mpz_fits_slong_p(mp); }
+  bool fits_ulong_p() const { return mpz_fits_ulong_p(mp); }
+  // bool fits_float_p() const { return mpz_fits_float_p(mp); }
+  // bool fits_double_p() const { return mpz_fits_double_p(mp); }
+  // bool fits_ldouble_p() const { return mpz_fits_ldouble_p(mp); }
+
+  // member operators
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator%=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator&=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator|=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator^=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)
+
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)
+};
+
+typedef __gmp_expr<mpz_t, mpz_t> mpz_class;
+
+
+/**************** mpq_class -- wrapper for mpq_t ****************/
+
+template <>
+class __gmp_expr<mpq_t, mpq_t>
+{
+private:
+  typedef mpq_t value_type;
+  value_type mp;
+public:
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
+  void canonicalize() { mpq_canonicalize(mp); }
+
+  // constructors and destructor
+  __gmp_expr() { mpq_init(mp); }
+
+  __gmp_expr(const __gmp_expr &q) { mpq_init(mp); mpq_set(mp, q.mp); }
+  template <class T, class U>
+  __gmp_expr(const __gmp_expr<T, U> &expr)
+  { mpq_init(mp); __gmp_set_expr(mp, expr); }
+
+  __gmp_expr(signed char c) { mpq_init(mp); mpq_set_si(mp, c, 1); }
+  __gmp_expr(unsigned char c) { mpq_init(mp); mpq_set_ui(mp, c, 1); }
+
+  __gmp_expr(signed int i) { mpq_init(mp); mpq_set_si(mp, i, 1); }
+  __gmp_expr(unsigned int i) { mpq_init(mp); mpq_set_ui(mp, i, 1); }
+
+  __gmp_expr(signed short int s) { mpq_init(mp); mpq_set_si(mp, s, 1); }
+  __gmp_expr(unsigned short int s) { mpq_init(mp); mpq_set_ui(mp, s, 1); }
+
+  __gmp_expr(signed long int l) { mpq_init(mp); mpq_set_si(mp, l, 1); }
+  __gmp_expr(unsigned long int l) { mpq_init(mp); mpq_set_ui(mp, l, 1); }
+
+  __gmp_expr(float f) { mpq_init(mp); mpq_set_d(mp, f); }
+  __gmp_expr(double d) { mpq_init(mp); mpq_set_d(mp, d); }
+  // __gmp_expr(long double ld) { mpq_init(mp); mpq_set_ld(mp, ld); }
+
+  explicit __gmp_expr(const char *s)
+  {
+    mpq_init (mp);
+    if (mpq_set_str (mp, s, 0) != 0)
+      {
+        mpq_clear (mp);
+        throw std::invalid_argument ("mpq_set_str");
+      }
+  }
+  __gmp_expr(const char *s, int base)
+  {
+    mpq_init (mp);
+    if (mpq_set_str(mp, s, base) != 0)
+      {
+        mpq_clear (mp);
+        throw std::invalid_argument ("mpq_set_str");
+      }
+  }
+  explicit __gmp_expr(const std::string &s)
+  {
+    mpq_init (mp);
+    if (mpq_set_str (mp, s.c_str(), 0) != 0)
+      {
+        mpq_clear (mp);
+        throw std::invalid_argument ("mpq_set_str");
+      }
+  }
+  __gmp_expr(const std::string &s, int base)
+  {
+    mpq_init(mp);
+    if (mpq_set_str (mp, s.c_str(), base) != 0)
+      {
+        mpq_clear (mp);
+        throw std::invalid_argument ("mpq_set_str");
+      }
+  }
+  explicit __gmp_expr(mpq_srcptr q) { mpq_init(mp); mpq_set(mp, q); }
+
+  __gmp_expr(const mpz_class &num, const mpz_class &den)
+  {
+    mpq_init(mp);
+    mpz_set(mpq_numref(mp), num.get_mpz_t());
+    mpz_set(mpq_denref(mp), den.get_mpz_t());
+  }
+
+  ~__gmp_expr() { mpq_clear(mp); }
+
+  // assignment operators
+  __gmp_expr & operator=(const __gmp_expr &q)
+  { mpq_set(mp, q.mp); return *this; }
+  template <class T, class U>
+  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
+  { __gmp_set_expr(mp, expr); return *this; }
+
+  __gmp_expr & operator=(signed char c)
+  { mpq_set_si(mp, c, 1); return *this; }
+  __gmp_expr & operator=(unsigned char c)
+  { mpq_set_ui(mp, c, 1); return *this; }
+
+  __gmp_expr & operator=(signed int i) { mpq_set_si(mp, i, 1); return *this; }
+  __gmp_expr & operator=(unsigned int i)
+  { mpq_set_ui(mp, i, 1); return *this; }
+
+  __gmp_expr & operator=(signed short int s)
+  { mpq_set_si(mp, s, 1); return *this; }
+  __gmp_expr & operator=(unsigned short int s)
+  { mpq_set_ui(mp, s, 1); return *this; }
+
+  __gmp_expr & operator=(signed long int l)
+  { mpq_set_si(mp, l, 1); return *this; }
+  __gmp_expr & operator=(unsigned long int l)
+  { mpq_set_ui(mp, l, 1); return *this; }
+
+  __gmp_expr & operator=(float f) { mpq_set_d(mp, f); return *this; }
+  __gmp_expr & operator=(double d) { mpq_set_d(mp, d); return *this; }
+  // __gmp_expr & operator=(long double ld)
+  // { mpq_set_ld(mp, ld); return *this; }
+
+  __gmp_expr & operator=(const char *s)
+  {
+    if (mpq_set_str (mp, s, 0) != 0)
+      throw std::invalid_argument ("mpq_set_str");
+    return *this;
+  }
+  __gmp_expr & operator=(const std::string &s)
+  {
+    if (mpq_set_str(mp, s.c_str(), 0) != 0)
+      throw std::invalid_argument ("mpq_set_str");
+    return *this;
+  }
+
+  // string input/output functions
+  int set_str(const char *s, int base)
+  { return mpq_set_str(mp, s, base); }
+  int set_str(const std::string &s, int base)
+  { return mpq_set_str(mp, s.c_str(), base); }
+  std::string get_str(int base = 10) const
+  {
+    __gmp_alloc_cstring temp(mpq_get_str(0, base, mp));
+    return std::string(temp.str);
+  }
+
+  // conversion functions
+
+  // casting a reference to an mpz_t to mpz_class & is a dirty hack,
+  // but works because the internal representation of mpz_class is
+  // exactly an mpz_t
+  const mpz_class & get_num() const
+  { return reinterpret_cast<const mpz_class &>(*mpq_numref(mp)); }
+  mpz_class & get_num()
+  { return reinterpret_cast<mpz_class &>(*mpq_numref(mp)); }
+  const mpz_class & get_den() const
+  { return reinterpret_cast<const mpz_class &>(*mpq_denref(mp)); }
+  mpz_class & get_den()
+  { return reinterpret_cast<mpz_class &>(*mpq_denref(mp)); }
+
+  mpq_srcptr __get_mp() const { return mp; }
+  mpq_ptr __get_mp() { return mp; }
+  mpq_srcptr get_mpq_t() const { return mp; }
+  mpq_ptr get_mpq_t() { return mp; }
+
+  mpz_srcptr get_num_mpz_t() const { return mpq_numref(mp); }
+  mpz_ptr get_num_mpz_t() { return mpq_numref(mp); }
+  mpz_srcptr get_den_mpz_t() const { return mpq_denref(mp); }
+  mpz_ptr get_den_mpz_t() { return mpq_denref(mp); }
+
+  double get_d() const { return mpq_get_d(mp); }
+
+  // compound assignments
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)
+
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)
+};
+
+typedef __gmp_expr<mpq_t, mpq_t> mpq_class;
+
+
+/**************** mpf_class -- wrapper for mpf_t ****************/
+
+template <>
+class __gmp_expr<mpf_t, mpf_t>
+{
+private:
+  typedef mpf_t value_type;
+  value_type mp;
+public:
+  mp_bitcnt_t get_prec() const { return mpf_get_prec(mp); }
+
+  void set_prec(mp_bitcnt_t prec) { mpf_set_prec(mp, prec); }
+  void set_prec_raw(mp_bitcnt_t prec) { mpf_set_prec_raw(mp, prec); }
+
+  // constructors and destructor
+  __gmp_expr() { mpf_init(mp); }
+
+  __gmp_expr(const __gmp_expr &f)
+  { mpf_init2(mp, f.get_prec()); mpf_set(mp, f.mp); }
+  __gmp_expr(const __gmp_expr &f, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set(mp, f.mp); }
+  template <class T, class U>
+  __gmp_expr(const __gmp_expr<T, U> &expr)
+  { mpf_init2(mp, expr.get_prec()); __gmp_set_expr(mp, expr); }
+  template <class T, class U>
+  __gmp_expr(const __gmp_expr<T, U> &expr, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); __gmp_set_expr(mp, expr); }
+
+  __gmp_expr(signed char c) { mpf_init_set_si(mp, c); }
+  __gmp_expr(signed char c, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, c); }
+  __gmp_expr(unsigned char c) { mpf_init_set_ui(mp, c); }
+  __gmp_expr(unsigned char c, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, c); }
+
+  __gmp_expr(signed int i) { mpf_init_set_si(mp, i); }
+  __gmp_expr(signed int i, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, i); }
+  __gmp_expr(unsigned int i) { mpf_init_set_ui(mp, i); }
+  __gmp_expr(unsigned int i, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, i); }
+
+  __gmp_expr(signed short int s) { mpf_init_set_si(mp, s); }
+  __gmp_expr(signed short int s, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, s); }
+  __gmp_expr(unsigned short int s) { mpf_init_set_ui(mp, s); }
+  __gmp_expr(unsigned short int s, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, s); }
+
+  __gmp_expr(signed long int l) { mpf_init_set_si(mp, l); }
+  __gmp_expr(signed long int l, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_si(mp, l); }
+  __gmp_expr(unsigned long int l) { mpf_init_set_ui(mp, l); }
+  __gmp_expr(unsigned long int l, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_ui(mp, l); }
+
+  __gmp_expr(float f) { mpf_init_set_d(mp, f); }
+  __gmp_expr(float f, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_d(mp, f); }
+  __gmp_expr(double d) { mpf_init_set_d(mp, d); }
+  __gmp_expr(double d, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set_d(mp, d); }
+  // __gmp_expr(long double ld) { mpf_init_set_d(mp, ld); }
+  // __gmp_expr(long double ld, mp_bitcnt_t prec)
+  // { mpf_init2(mp, prec); mpf_set_d(mp, ld); }
+
+  explicit __gmp_expr(const char *s)
+  {
+    if (mpf_init_set_str (mp, s, 0) != 0)
+      {
+        mpf_clear (mp);
+        throw std::invalid_argument ("mpf_set_str");
+      }
+  }
+  __gmp_expr(const char *s, mp_bitcnt_t prec, int base = 0)
+  {
+    mpf_init2(mp, prec);
+    if (mpf_set_str(mp, s, base) != 0)
+      {
+        mpf_clear (mp);
+        throw std::invalid_argument ("mpf_set_str");
+      }
+  }
+  explicit __gmp_expr(const std::string &s)
+  {
+    if (mpf_init_set_str(mp, s.c_str(), 0) != 0)
+      {
+        mpf_clear (mp);
+        throw std::invalid_argument ("mpf_set_str");
+      }
+  }
+  __gmp_expr(const std::string &s, mp_bitcnt_t prec, int base = 0)
+  {
+    mpf_init2(mp, prec);
+    if (mpf_set_str(mp, s.c_str(), base) != 0)
+      {
+        mpf_clear (mp);
+        throw std::invalid_argument ("mpf_set_str");
+      }
+  }
+
+  explicit __gmp_expr(mpf_srcptr f)
+  { mpf_init2(mp, mpf_get_prec(f)); mpf_set(mp, f); }
+  __gmp_expr(mpf_srcptr f, mp_bitcnt_t prec)
+  { mpf_init2(mp, prec); mpf_set(mp, f); }
+
+  ~__gmp_expr() { mpf_clear(mp); }
+
+  // assignment operators
+  __gmp_expr & operator=(const __gmp_expr &f)
+  { mpf_set(mp, f.mp); return *this; }
+  template <class T, class U>
+  __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
+  { __gmp_set_expr(mp, expr); return *this; }
+
+  __gmp_expr & operator=(signed char c) { mpf_set_si(mp, c); return *this; }
+  __gmp_expr & operator=(unsigned char c) { mpf_set_ui(mp, c); return *this; }
+
+  __gmp_expr & operator=(signed int i) { mpf_set_si(mp, i); return *this; }
+  __gmp_expr & operator=(unsigned int i) { mpf_set_ui(mp, i); return *this; }
+
+  __gmp_expr & operator=(signed short int s)
+  { mpf_set_si(mp, s); return *this; }
+  __gmp_expr & operator=(unsigned short int s)
+  { mpf_set_ui(mp, s); return *this; }
+
+  __gmp_expr & operator=(signed long int l)
+  { mpf_set_si(mp, l); return *this; }
+  __gmp_expr & operator=(unsigned long int l)
+  { mpf_set_ui(mp, l); return *this; }
+
+  __gmp_expr & operator=(float f) { mpf_set_d(mp, f); return *this; }
+  __gmp_expr & operator=(double d) { mpf_set_d(mp, d); return *this; }
+  // __gmp_expr & operator=(long double ld)
+  // { mpf_set_ld(mp, ld); return *this; }
+
+  __gmp_expr & operator=(const char *s)
+  {
+    if (mpf_set_str (mp, s, 0) != 0)
+      throw std::invalid_argument ("mpf_set_str");
+    return *this;
+  }
+  __gmp_expr & operator=(const std::string &s)
+  {
+    if (mpf_set_str(mp, s.c_str(), 0) != 0)
+      throw std::invalid_argument ("mpf_set_str");
+    return *this;
+  }
+
+  // string input/output functions
+  int set_str(const char *s, int base)
+  { return mpf_set_str(mp, s, base); }
+  int set_str(const std::string &s, int base)
+  { return mpf_set_str(mp, s.c_str(), base); }
+  std::string get_str(mp_exp_t &expo, int base = 10, size_t size = 0) const
+  {
+    __gmp_alloc_cstring temp(mpf_get_str(0, &expo, base, size, mp));
+    return std::string(temp.str);
+  }
+
+  // conversion functions
+  mpf_srcptr __get_mp() const { return mp; }
+  mpf_ptr __get_mp() { return mp; }
+  mpf_srcptr get_mpf_t() const { return mp; }
+  mpf_ptr get_mpf_t() { return mp; }
+
+  signed long int get_si() const { return mpf_get_si(mp); }
+  unsigned long int get_ui() const { return mpf_get_ui(mp); }
+  double get_d() const { return mpf_get_d(mp); }
+
+  // bool fits_schar_p() const { return mpf_fits_schar_p(mp); }
+  // bool fits_uchar_p() const { return mpf_fits_uchar_p(mp); }
+  bool fits_sint_p() const { return mpf_fits_sint_p(mp); }
+  bool fits_uint_p() const { return mpf_fits_uint_p(mp); }
+  bool fits_sshort_p() const { return mpf_fits_sshort_p(mp); }
+  bool fits_ushort_p() const { return mpf_fits_ushort_p(mp); }
+  bool fits_slong_p() const { return mpf_fits_slong_p(mp); }
+  bool fits_ulong_p() const { return mpf_fits_ulong_p(mp); }
+  // bool fits_float_p() const { return mpf_fits_float_p(mp); }
+  // bool fits_double_p() const { return mpf_fits_double_p(mp); }
+  // bool fits_ldouble_p() const { return mpf_fits_ldouble_p(mp); }
+
+  // compound assignments
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator*=)
+  __GMP_DECLARE_COMPOUND_OPERATOR(operator/=)
+
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator<<=)
+  __GMP_DECLARE_COMPOUND_OPERATOR_UI(operator>>=)
+
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator++)
+  __GMP_DECLARE_INCREMENT_OPERATOR(operator--)
+};
+
+typedef __gmp_expr<mpf_t, mpf_t> mpf_class;
+
+
+
+/**************** I/O operators ****************/
+
+// these should (and will) be provided separately
+
+template <class T>
+inline std::ostream & operator<<
+(std::ostream &o, const __gmp_expr<T, T> &expr)
+{
+  return o << expr.__get_mp();
+}
+
+template <class T, class U>
+inline std::ostream & operator<<
+(std::ostream &o, const __gmp_expr<T, U> &expr)
+{
+  __gmp_expr<T, T> temp(expr);
+  return o << temp.__get_mp();
+}
+
+
+template <class T>
+inline std::istream & operator>>(std::istream &i, __gmp_expr<T, T> &expr)
+{
+  return i >> expr.__get_mp();
+}
+
+inline std::istream & operator>>(std::istream &i, mpq_class &q)
+{
+  i >> q.get_mpq_t();
+  // q.canonicalize(); // you might want to uncomment this
+  return i;
+}
+
+
+/**************** Functions for type conversion ****************/
+
+template <>
+inline void __gmp_set_expr(mpz_ptr z, const mpz_class &w)
+{
+  mpz_set(z, w.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpz_t, T> &expr)
+{
+  expr.eval(z);
+}
+
+template <>
+inline void __gmp_set_expr(mpz_ptr z, const mpq_class &q)
+{
+  mpz_set_q(z, q.get_mpq_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpq_t, T> &expr)
+{
+  mpq_class temp(expr);
+  mpz_set_q(z, temp.get_mpq_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const mpf_class &f)
+{
+  mpz_set_f(z, f.get_mpf_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpf_t, T> &expr)
+{
+  mpf_class temp(expr);
+  mpz_set_f(z, temp.get_mpf_t());
+}
+
+template <>
+inline void __gmp_set_expr(mpq_ptr q, const mpz_class &z)
+{
+  mpq_set_z(q, z.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpz_t, T> &expr)
+{
+  mpz_class temp(expr);
+  mpq_set_z(q, temp.get_mpz_t());
+}
+
+template <>
+inline void __gmp_set_expr(mpq_ptr q, const mpq_class &r)
+{
+  mpq_set(q, r.get_mpq_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpq_t, T> &expr)
+{
+  expr.eval(q);
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const mpf_class &f)
+{
+  mpq_set_f(q, f.get_mpf_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpf_t, T> &expr)
+{
+  mpf_class temp(expr);
+  mpq_set_f(q, temp.get_mpf_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const mpz_class &z)
+{
+  mpf_set_z(f, z.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpz_t, T> &expr)
+{
+  mpz_class temp(expr);
+  mpf_set_z(f, temp.get_mpz_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const mpq_class &q)
+{
+  mpf_set_q(f, q.get_mpq_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpq_t, T> &expr)
+{
+  mpq_class temp(expr);
+  mpf_set_q(f, temp.get_mpq_t());
+}
+
+template <>
+inline void __gmp_set_expr(mpf_ptr f, const mpf_class &g)
+{
+  mpf_set(f, g.get_mpf_t());
+}
+
+template <class T>
+inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpf_t, T> &expr)
+{
+  expr.eval(f, mpf_get_prec(f));
+}
+
+
+/**************** Specializations of __gmp_expr ****************/
+/* The eval() method of __gmp_expr<T, U> evaluates the corresponding
+   expression and assigns the result to its argument, which is either an
+   mpz_t, mpq_t, or mpf_t as specified by the T argument.
+   Compound expressions are evaluated recursively (temporaries are created
+   to hold intermediate values), while for simple expressions the eval()
+   method of the appropriate function object (available as the Op argument
+   of either __gmp_unary_expr<T, Op> or __gmp_binary_expr<T, U, Op>) is
+   called. */
+
+
+/**************** Unary expressions ****************/
+/* cases:
+   - simple:   argument is mp*_class, that is, __gmp_expr<T, T>
+   - compound: argument is __gmp_expr<T, U> (with U not equal to T) */
+
+
+// simple expressions
+
+template <class T, class Op>
+class __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val_type;
+
+  __gmp_unary_expr<val_type, Op> expr;
+public:
+  __gmp_expr(const val_type &val) : expr(val) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           unsigned long int = 0) const
+  { Op::eval(p, expr.val.__get_mp()); }
+  const val_type & get_val() const { return expr.val; }
+  unsigned long int get_prec() const { return expr.val.get_prec(); }
+};
+
+
+// compound expressions
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val_type;
+
+  __gmp_unary_expr<val_type, Op> expr;
+public:
+  __gmp_expr(const val_type &val) : expr(val) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  { __gmp_expr<T, T> temp(expr.val); Op::eval(p, temp.__get_mp()); }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  { __gmp_expr<T, T> temp(expr.val, prec); Op::eval(p, temp.__get_mp()); }
+  const val_type & get_val() const { return expr.val; }
+  unsigned long int get_prec() const { return expr.val.get_prec(); }
+};
+
+
+/**************** Binary expressions ****************/
+/* simple:
+   - arguments are both mp*_class
+   - one argument is mp*_class, one is a built-in type
+   compound:
+   - one is mp*_class, one is __gmp_expr<T, U>
+   - one is __gmp_expr<T, U>, one is built-in
+   - both arguments are __gmp_expr<...> */
+
+
+// simple expressions
+
+template <class T, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           unsigned long int = 0) const
+  { Op::eval(p, expr.val1.__get_mp(), expr.val2.__get_mp()); }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+
+// simple expressions, T is a built-in numerical type
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef U val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           unsigned long int = 0) const
+  { Op::eval(p, expr.val1.__get_mp(), expr.val2); }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const { return expr.val1.get_prec(); }
+};
+
+template <class T, class U, class Op>
+class __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef U val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           unsigned long int = 0) const
+  { Op::eval(p, expr.val1, expr.val2.__get_mp()); }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const { return expr.val2.get_prec(); }
+};
+
+
+// compound expressions, one argument is a subexpression
+
+template <class T, class U, class V, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef __gmp_expr<U, V> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp(expr.val2);
+    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp(expr.val2, prec);
+    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class V, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<U, V> val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp(expr.val1);
+    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp(expr.val1, prec);
+    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+{
+private:
+  typedef __gmp_expr<T, T> val1_type;
+  typedef __gmp_expr<T, U> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp(expr.val2);
+    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp(expr.val2, prec);
+    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef __gmp_expr<T, T> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp(expr.val1);
+    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp(expr.val1, prec);
+    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+
+// one argument is a subexpression, one is a built-in
+
+template <class T, class U, class V, class Op>
+class __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef V val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp(expr.val1);
+    Op::eval(p, temp.__get_mp(), expr.val2);
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp(expr.val1, prec);
+    Op::eval(p, temp.__get_mp(), expr.val2);
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const { return expr.val1.get_prec(); }
+};
+
+template <class T, class U, class V, class Op>
+class __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+{
+private:
+  typedef U val1_type;
+  typedef __gmp_expr<T, V> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp(expr.val2);
+    Op::eval(p, expr.val1, temp.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp(expr.val2, prec);
+    Op::eval(p, expr.val1, temp.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const { return expr.val2.get_prec(); }
+};
+
+
+// both arguments are subexpressions
+
+template <class T, class U, class V, class W, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef __gmp_expr<V, W> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
+    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
+    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class V, class W, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+{
+private:
+  typedef __gmp_expr<U, V> val1_type;
+  typedef __gmp_expr<T, W> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
+    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
+    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+template <class T, class U, class V, class Op>
+class __gmp_expr
+<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+{
+private:
+  typedef __gmp_expr<T, U> val1_type;
+  typedef __gmp_expr<T, V> val2_type;
+
+  __gmp_binary_expr<val1_type, val2_type, Op> expr;
+public:
+  __gmp_expr(const val1_type &val1, const val2_type &val2)
+    : expr(val1, val2) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
+  {
+    __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
+    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+  }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
+           mp_bitcnt_t prec) const
+  {
+    __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
+    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+  }
+  const val1_type & get_val1() const { return expr.val1; }
+  const val2_type & get_val2() const { return expr.val2; }
+  unsigned long int get_prec() const
+  {
+    mp_bitcnt_t prec1 = expr.val1.get_prec(),
+      prec2 = expr.val2.get_prec();
+    return (prec1 > prec2) ? prec1 : prec2;
+  }
+};
+
+
+/**************** Special cases ****************/
+
+/* Some operations (i.e., add and subtract) with mixed mpz/mpq arguments
+   can be done directly without first converting the mpz to mpq.
+   Appropriate specializations of __gmp_expr are required. */
+
+
+#define __GMPZQ_DEFINE_EXPR(eval_fun)                                       \
+                                                                            \
+template <>                                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr<mpz_class, mpq_class, eval_fun> > \
+{                                                                           \
+private:                                                                    \
+  typedef mpz_class val1_type;                                              \
+  typedef mpq_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  { eval_fun::eval(q, expr.val1.get_mpz_t(), expr.val2.get_mpq_t()); }      \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <>                                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr<mpq_class, mpz_class, eval_fun> > \
+{                                                                           \
+private:                                                                    \
+  typedef mpq_class val1_type;                                              \
+  typedef mpz_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  { eval_fun::eval(q, expr.val1.get_mpq_t(), expr.val2.get_mpz_t()); }      \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpq_t, T>, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef mpz_class val1_type;                                              \
+  typedef __gmp_expr<mpq_t, T> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpq_class temp(expr.val2);                                              \
+    eval_fun::eval(q, expr.val1.get_mpz_t(), temp.get_mpq_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<mpq_class, __gmp_expr<mpz_t, T>, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef mpq_class val1_type;                                              \
+  typedef __gmp_expr<mpz_t, T> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp(expr.val2);                                              \
+    eval_fun::eval(q, expr.val1.get_mpq_t(), temp.get_mpz_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, mpq_class, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpz_t, T> val1_type;                                   \
+  typedef mpq_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp(expr.val1);                                              \
+    eval_fun::eval(q, temp.get_mpz_t(), expr.val2.get_mpq_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <class T>                                                          \
+class __gmp_expr                                                            \
+<mpq_t, __gmp_binary_expr<__gmp_expr<mpq_t, T>, mpz_class, eval_fun> >      \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpq_t, T> val1_type;                                   \
+  typedef mpz_class val2_type;                                              \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpq_class temp(expr.val1);                                              \
+    eval_fun::eval(q, temp.get_mpq_t(), expr.val2.get_mpz_t());             \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <class T, class U>                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr                                   \
+<__gmp_expr<mpz_t, T>, __gmp_expr<mpq_t, U>, eval_fun> >                    \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpz_t, T> val1_type;                                   \
+  typedef __gmp_expr<mpq_t, U> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpz_class temp1(expr.val1);                                             \
+    mpq_class temp2(expr.val2);                                             \
+    eval_fun::eval(q, temp1.get_mpz_t(), temp2.get_mpq_t());                \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};                                                                          \
+                                                                            \
+template <class T, class U>                                                 \
+class __gmp_expr<mpq_t, __gmp_binary_expr                                   \
+<__gmp_expr<mpq_t, T>, __gmp_expr<mpz_t, U>, eval_fun> >                    \
+{                                                                           \
+private:                                                                    \
+  typedef __gmp_expr<mpq_t, T> val1_type;                                   \
+  typedef __gmp_expr<mpz_t, U> val2_type;                                   \
+                                                                            \
+  __gmp_binary_expr<val1_type, val2_type, eval_fun> expr;                   \
+public:                                                                     \
+  __gmp_expr(const val1_type &val1, const val2_type &val2)                  \
+    : expr(val1, val2) { }                                                  \
+  void eval(mpq_ptr q) const                                                \
+  {                                                                         \
+    mpq_class temp1(expr.val1);                                             \
+    mpz_class temp2(expr.val2);                                             \
+    eval_fun::eval(q, temp1.get_mpq_t(), temp2.get_mpz_t());                \
+  }                                                                         \
+  const val1_type & get_val1() const { return expr.val1; }                  \
+  const val2_type & get_val2() const { return expr.val2; }                  \
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+};
+
+
+__GMPZQ_DEFINE_EXPR(__gmp_binary_plus)
+__GMPZQ_DEFINE_EXPR(__gmp_binary_minus)
+
+
+
+/**************** Macros for defining functions ****************/
+/* Results of operators and functions are instances of __gmp_expr<T, U>.
+   T determines the numerical type of the expression: it can be either
+   mpz_t, mpq_t, or mpf_t.  When the arguments of a binary
+   expression have different numerical types, __gmp_resolve_expr is used
+   to determine the "larger" type.
+   U is either __gmp_unary_expr<V, Op> or __gmp_binary_expr<V, W, Op>,
+   where V and W are the arguments' types -- they can in turn be
+   expressions, thus allowing to build compound expressions to any
+   degree of complexity.
+   Op is a function object that must have an eval() method accepting
+   appropriate arguments.
+   Actual evaluation of a __gmp_expr<T, U> object is done when it gets
+   assigned to an mp*_class ("lazy" evaluation): this is done by calling
+   its eval() method. */
+
+
+// non-member unary operators and functions
+
+#define __GMP_DEFINE_UNARY_FUNCTION(fun, eval_fun)                           \
+                                                                             \
+template <class T, class U>                                                  \
+inline __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >          \
+fun(const __gmp_expr<T, U> &expr)                                            \
+{                                                                            \
+  return __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, eval_fun> >(expr); \
+}
+
+#define __GMP_DEFINE_UNARY_TYPE_FUNCTION(type, fun, eval_fun) \
+                                                              \
+template <class T, class U>                                   \
+inline type fun(const __gmp_expr<T, U> &expr)                 \
+{                                                             \
+  typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr); \
+  return eval_fun::eval(temp.__get_mp());                     \
+}
+
+
+// non-member binary operators and functions
+
+#define __GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)                   \
+                                                                       \
+template <class T, class U, class V, class W>                          \
+inline __gmp_expr<typename __gmp_resolve_expr<T, V>::value_type,       \
+__gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, eval_fun> >      \
+fun(const __gmp_expr<T, U> &expr1, const __gmp_expr<V, W> &expr2)      \
+{                                                                      \
+  return __gmp_expr<typename __gmp_resolve_expr<T, V>::value_type,     \
+     __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, eval_fun> > \
+    (expr1, expr2);                                                    \
+}
+
+#define __GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, bigtype)       \
+                                                                           \
+template <class T, class U>                                                \
+inline __gmp_expr                                                          \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >               \
+fun(const __gmp_expr<T, U> &expr, type t)                                  \
+{                                                                          \
+  return __gmp_expr                                                        \
+    <T, __gmp_binary_expr<__gmp_expr<T, U>, bigtype, eval_fun> >(expr, t); \
+}                                                                          \
+                                                                           \
+template <class T, class U>                                                \
+inline __gmp_expr                                                          \
+<T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >               \
+fun(type t, const __gmp_expr<T, U> &expr)                                  \
+{                                                                          \
+  return __gmp_expr                                                        \
+    <T, __gmp_binary_expr<bigtype, __gmp_expr<T, U>, eval_fun> >(t, expr); \
+}
+
+#define __GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)          \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, signed long int)
+
+#define __GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)            \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, unsigned long int)
+
+#define __GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, type) \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, double)
+
+#define __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, type)     \
+__GMPNN_DEFINE_BINARY_FUNCTION(fun, eval_fun, type, long double)
+
+#define __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)              \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float)              \
+__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double)             \
+__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double)
+
+#define __GMP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \
+__GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)        \
+__GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)
+
+
+#define __GMP_DEFINE_BINARY_FUNCTION_UI(fun, eval_fun)                 \
+                                                                       \
+template <class T, class U>                                            \
+inline __gmp_expr                                                      \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, unsigned long int, eval_fun> > \
+fun(const __gmp_expr<T, U> &expr, unsigned long int l)                 \
+{                                                                      \
+  return __gmp_expr<T, __gmp_binary_expr                               \
+    <__gmp_expr<T, U>, unsigned long int, eval_fun> >(expr, l);        \
+}
+
+
+#define __GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)         \
+                                                                        \
+template <class T, class U, class V, class W>                           \
+inline type fun(const __gmp_expr<T, U> &expr1,                          \
+               const __gmp_expr<V, W> &expr2)                          \
+{                                                                       \
+  typedef typename __gmp_resolve_expr<T, V>::value_type eval_type;      \
+  typename __gmp_resolve_temp<eval_type, T, U>::temp_type temp1(expr1); \
+  typename __gmp_resolve_temp<eval_type, V, W>::temp_type temp2(expr2); \
+  return eval_fun::eval(temp1.__get_mp(), temp2.__get_mp());            \
+}
+
+#define __GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,   \
+                                           type2, bigtype)        \
+                                                                   \
+template <class T, class U>                                        \
+inline type fun(const __gmp_expr<T, U> &expr, type2 t)             \
+{                                                                  \
+  typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr);      \
+  return eval_fun::eval(temp.__get_mp(), static_cast<bigtype>(t)); \
+}                                                                  \
+                                                                   \
+template <class T, class U>                                        \
+inline type fun(type2 t, const __gmp_expr<T, U> &expr)             \
+{                                                                  \
+  typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr);      \
+  return eval_fun::eval(static_cast<bigtype>(t), temp.__get_mp()); \
+}
+
+#define __GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,                \
+                                   type2, signed long int)
+
+#define __GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun,                \
+                                   type2, unsigned long int)
+
+#define __GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, double)
+
+#define __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2)     \
+__GMPNN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, type2, long double)
+
+#define __GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)              \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float)              \
+__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double)             \
+__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double)
+
+#define __GMP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \
+__GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)        \
+__GMPN_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)
+
+
+// member operators
+
+#define __GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)                 \
+                                                                             \
+template <class T, class U>                                                  \
+inline type##_class & type##_class::fun(const __gmp_expr<T, U> &expr)        \
+{                                                                            \
+  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr                  \
+                <type##_class, __gmp_expr<T, U>, eval_fun> >(*this, expr)); \
+  return *this;                                                              \
+}
+
+#define __GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,    \
+                                        type2, bigtype)         \
+                                                                 \
+inline type##_class & type##_class::fun(type2 t)                 \
+{                                                                \
+  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr      \
+                <type##_class, bigtype, eval_fun> >(*this, t)); \
+  return *this;                                                  \
+}
+
+#define __GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,                \
+                                type2, signed long int)
+
+#define __GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun,                \
+                                type2, unsigned long int)
+
+#define __GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2) \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, double)
+
+#define __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2)     \
+__GMPNN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, type2, long double)
+
+#define __GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)              \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed char)        \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned char)      \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed int)         \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned int)       \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed short int)   \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned short int) \
+__GMPNS_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, signed long int)    \
+__GMPNU_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, unsigned long int)  \
+__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, float)              \
+__GMPND_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, double)             \
+/* __GMPNLD_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun, long double) */
+
+#define __GMP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun) \
+__GMPP_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)        \
+__GMPN_DEFINE_COMPOUND_OPERATOR(type, fun, eval_fun)
+
+#define __GMPZ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR(mpz, fun, eval_fun)
+
+#define __GMPQ_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR(mpq, fun, eval_fun)
+
+#define __GMPF_DEFINE_COMPOUND_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR(mpf, fun, eval_fun)
+
+
+
+#define __GMP_DEFINE_COMPOUND_OPERATOR_UI(type, fun, eval_fun)  \
+                                                                \
+inline type##_class & type##_class::fun(unsigned long int l)    \
+{                                                               \
+  __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr     \
+    <type##_class, unsigned long int, eval_fun> >(*this, l));   \
+  return *this;                                                 \
+}
+
+#define __GMPZ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpz, fun, eval_fun)
+
+#define __GMPQ_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpq, fun, eval_fun)
+
+#define __GMPF_DEFINE_COMPOUND_OPERATOR_UI(fun, eval_fun) \
+__GMP_DEFINE_COMPOUND_OPERATOR_UI(mpf, fun, eval_fun)
+
+
+
+#define __GMP_DEFINE_INCREMENT_OPERATOR(type, fun, eval_fun) \
+                                                             \
+inline type##_class & type##_class::fun()                    \
+{                                                            \
+  eval_fun::eval(mp);                                        \
+  return *this;                                              \
+}                                                            \
+                                                             \
+inline type##_class type##_class::fun(int)                   \
+{                                                            \
+  type##_class temp(*this);                                  \
+  eval_fun::eval(mp);                                        \
+  return temp;                                               \
+}
+
+#define __GMPZ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_INCREMENT_OPERATOR(mpz, fun, eval_fun)
+
+#define __GMPQ_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_INCREMENT_OPERATOR(mpq, fun, eval_fun)
+
+#define __GMPF_DEFINE_INCREMENT_OPERATOR(fun, eval_fun) \
+__GMP_DEFINE_INCREMENT_OPERATOR(mpf, fun, eval_fun)
+
+
+
+/**************** Arithmetic operators and functions ****************/
+
+// non-member operators and functions
+
+__GMP_DEFINE_UNARY_FUNCTION(operator+, __gmp_unary_plus)
+__GMP_DEFINE_UNARY_FUNCTION(operator-, __gmp_unary_minus)
+__GMP_DEFINE_UNARY_FUNCTION(operator~, __gmp_unary_com)
+
+__GMP_DEFINE_BINARY_FUNCTION(operator+, __gmp_binary_plus)
+__GMP_DEFINE_BINARY_FUNCTION(operator-, __gmp_binary_minus)
+__GMP_DEFINE_BINARY_FUNCTION(operator*, __gmp_binary_multiplies)
+__GMP_DEFINE_BINARY_FUNCTION(operator/, __gmp_binary_divides)
+__GMP_DEFINE_BINARY_FUNCTION(operator%, __gmp_binary_modulus)
+__GMP_DEFINE_BINARY_FUNCTION(operator&, __gmp_binary_and)
+__GMP_DEFINE_BINARY_FUNCTION(operator|, __gmp_binary_ior)
+__GMP_DEFINE_BINARY_FUNCTION(operator^, __gmp_binary_xor)
+
+__GMP_DEFINE_BINARY_FUNCTION_UI(operator<<, __gmp_binary_lshift)
+__GMP_DEFINE_BINARY_FUNCTION_UI(operator>>, __gmp_binary_rshift)
+
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator==, __gmp_binary_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, __gmp_binary_not_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<, __gmp_binary_less)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, __gmp_binary_less_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>, __gmp_binary_greater)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, \
+                                  __gmp_binary_greater_equal)
+
+__GMP_DEFINE_UNARY_FUNCTION(abs, __gmp_abs_function)
+__GMP_DEFINE_UNARY_FUNCTION(trunc, __gmp_trunc_function)
+__GMP_DEFINE_UNARY_FUNCTION(floor, __gmp_floor_function)
+__GMP_DEFINE_UNARY_FUNCTION(ceil, __gmp_ceil_function)
+__GMP_DEFINE_UNARY_FUNCTION(sqrt, __gmp_sqrt_function)
+__GMP_DEFINE_BINARY_FUNCTION(hypot, __gmp_hypot_function)
+
+__GMP_DEFINE_UNARY_TYPE_FUNCTION(int, sgn, __gmp_sgn_function)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(int, cmp, __gmp_cmp_function)
+
+// member operators for mpz_class
+
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator%=, __gmp_binary_modulus)
+
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator&=, __gmp_binary_and)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator|=, __gmp_binary_ior)
+__GMPZ_DEFINE_COMPOUND_OPERATOR(operator^=, __gmp_binary_xor)
+
+__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)
+__GMPZ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)
+
+__GMPZ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)
+__GMPZ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)
+
+// member operators for mpq_class
+
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)
+__GMPQ_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)
+
+__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)
+__GMPQ_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)
+
+__GMPQ_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)
+__GMPQ_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)
+
+// member operators for mpf_class
+
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator-=, __gmp_binary_minus)
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator*=, __gmp_binary_multiplies)
+__GMPF_DEFINE_COMPOUND_OPERATOR(operator/=, __gmp_binary_divides)
+
+__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator<<=, __gmp_binary_lshift)
+__GMPF_DEFINE_COMPOUND_OPERATOR_UI(operator>>=, __gmp_binary_rshift)
+
+__GMPF_DEFINE_INCREMENT_OPERATOR(operator++, __gmp_unary_increment)
+__GMPF_DEFINE_INCREMENT_OPERATOR(operator--, __gmp_unary_decrement)
+
+
+
+/**************** Class wrapper for gmp_randstate_t ****************/
+
+class __gmp_urandomb_value { };
+class __gmp_urandomm_value { };
+
+template <>
+class __gmp_expr<mpz_t, __gmp_urandomb_value>
+{
+private:
+  __gmp_randstate_struct *state;
+  unsigned long int bits;
+public:
+  __gmp_expr(gmp_randstate_t s, unsigned long int l) : state(s), bits(l) { }
+  void eval(mpz_ptr z) const { __gmp_rand_function::eval(z, state, bits); }
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }
+};
+
+template <>
+class __gmp_expr<mpz_t, __gmp_urandomm_value>
+{
+private:
+  __gmp_randstate_struct *state;
+  mpz_class range;
+public:
+  __gmp_expr(gmp_randstate_t s, const mpz_class &z) : state(s), range(z) { }
+  void eval(mpz_ptr z) const
+  { __gmp_rand_function::eval(z, state, range.get_mpz_t()); }
+  unsigned long int get_prec() const { return mpf_get_default_prec(); }
+};
+
+template <>
+class __gmp_expr<mpf_t, __gmp_urandomb_value>
+{
+private:
+  __gmp_randstate_struct *state;
+  unsigned long int bits;
+public:
+  __gmp_expr(gmp_randstate_t s, unsigned long int l) : state(s), bits(l) { }
+  void eval(mpf_ptr f, mp_bitcnt_t prec) const
+  { __gmp_rand_function::eval(f, state, (bits>0) ? get_prec() : prec); }
+  unsigned long int get_prec() const
+  {
+    if (bits == 0)
+      return mpf_get_default_prec();
+    else
+      return bits;
+  }
+};
+
+extern "C" {
+  typedef void __gmp_randinit_default_t (gmp_randstate_t);
+  typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, unsigned long int, unsigned long int);
+  typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, unsigned long int);
+}
+
+class gmp_randclass
+{
+private:
+  gmp_randstate_t state;
+
+  // copy construction and assignment not allowed
+  gmp_randclass(const gmp_randclass &);
+  void operator=(const gmp_randclass &);
+public:
+  // constructors and destructor
+  gmp_randclass(gmp_randalg_t alg, unsigned long int size)
+  {
+    switch (alg)
+      {
+      case GMP_RAND_ALG_LC: // no other cases for now
+      default:
+       gmp_randinit(state, alg, size);
+       break;
+      }
+  }
+
+  // gmp_randinit_default
+  gmp_randclass(__gmp_randinit_default_t* f) { f(state); }
+
+  // gmp_randinit_lc_2exp
+  gmp_randclass(__gmp_randinit_lc_2exp_t* f,
+               mpz_class z, unsigned long int l1, unsigned long int l2)
+  { f(state, z.get_mpz_t(), l1, l2); }
+
+  // gmp_randinit_lc_2exp_size
+  gmp_randclass(__gmp_randinit_lc_2exp_size_t* f,
+               unsigned long int size)
+  {
+    if (f (state, size) == 0)
+      throw std::length_error ("gmp_randinit_lc_2exp_size");
+  }
+
+  ~gmp_randclass() { gmp_randclear(state); }
+
+  // initialize
+  void seed(); // choose a random seed some way (?)
+  void seed(unsigned long int s) { gmp_randseed_ui(state, s); }
+  void seed(const mpz_class &z) { gmp_randseed(state, z.get_mpz_t()); }
+
+  // get random number
+  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(unsigned long int l)
+  { return __gmp_expr<mpz_t, __gmp_urandomb_value>(state, l); }
+  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(const mpz_class &z)
+  { return get_z_bits(z.get_ui()); }
+
+  __gmp_expr<mpz_t, __gmp_urandomm_value> get_z_range(const mpz_class &z)
+  { return __gmp_expr<mpz_t, __gmp_urandomm_value>(state, z); }
+
+  __gmp_expr<mpf_t, __gmp_urandomb_value> get_f(mp_bitcnt_t prec = 0)
+  { return __gmp_expr<mpf_t, __gmp_urandomb_value>(state, prec); }
+};
+
+
+/**************** #undef all private macros ****************/
+
+#undef __GMPP_DECLARE_COMPOUND_OPERATOR
+#undef __GMPN_DECLARE_COMPOUND_OPERATOR
+#undef __GMP_DECLARE_COMPOUND_OPERATOR
+#undef __GMP_DECLARE_COMPOUND_OPERATOR_UI
+#undef __GMP_DECLARE_INCREMENT_OPERATOR
+
+#undef __GMPZQ_DEFINE_EXPR
+
+#undef __GMP_DEFINE_UNARY_FUNCTION
+#undef __GMP_DEFINE_UNARY_TYPE_FUNCTION
+
+#undef __GMPP_DEFINE_BINARY_FUNCTION
+#undef __GMPNN_DEFINE_BINARY_FUNCTION
+#undef __GMPNS_DEFINE_BINARY_FUNCTION
+#undef __GMPNU_DEFINE_BINARY_FUNCTION
+#undef __GMPND_DEFINE_BINARY_FUNCTION
+#undef __GMPNLD_DEFINE_BINARY_FUNCTION
+#undef __GMPN_DEFINE_BINARY_FUNCTION
+#undef __GMP_DEFINE_BINARY_FUNCTION
+
+#undef __GMP_DEFINE_BINARY_FUNCTION_UI
+
+#undef __GMPP_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNN_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNS_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNU_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPND_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMPN_DEFINE_BINARY_TYPE_FUNCTION
+#undef __GMP_DEFINE_BINARY_TYPE_FUNCTION
+
+#undef __GMPZ_DEFINE_COMPOUND_OPERATOR
+#undef __GMPZN_DEFINE_COMPOUND_OPERATOR
+#undef __GMPZNN_DEFINE_COMPOUND_OPERATOR
+#undef __GMPZNS_DEFINE_COMPOUND_OPERATOR
+#undef __GMPZNU_DEFINE_COMPOUND_OPERATOR
+#undef __GMPZND_DEFINE_COMPOUND_OPERATOR
+#undef __GMPZNLD_DEFINE_COMPOUND_OPERATOR
+
+#undef __GMPP_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNN_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNS_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNU_DEFINE_COMPOUND_OPERATOR
+#undef __GMPND_DEFINE_COMPOUND_OPERATOR
+#undef __GMPNLD_DEFINE_COMPOUND_OPERATOR
+#undef __GMPN_DEFINE_COMPOUND_OPERATOR
+#undef __GMP_DEFINE_COMPOUND_OPERATOR
+
+#undef __GMPQ_DEFINE_COMPOUND_OPERATOR
+#undef __GMPF_DEFINE_COMPOUND_OPERATOR
+
+#undef __GMP_DEFINE_COMPOUND_OPERATOR_UI
+#undef __GMPZ_DEFINE_COMPOUND_OPERATOR_UI
+#undef __GMPQ_DEFINE_COMPOUND_OPERATOR_UI
+#undef __GMPF_DEFINE_COMPOUND_OPERATOR_UI
+
+#undef __GMP_DEFINE_INCREMENT_OPERATOR
+#undef __GMPZ_DEFINE_INCREMENT_OPERATOR
+#undef __GMPQ_DEFINE_INCREMENT_OPERATOR
+#undef __GMPF_DEFINE_INCREMENT_OPERATOR
+
+#endif /* __GMP_PLUSPLUS__ */
diff --git a/install-sh b/install-sh

new file mode 100755 (executable)

index 0000000..e4160c9
--- /dev/null
+++ b/install-sh
@@ -0,0 +1,325 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2004-04-01.17
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=
+transform_arg=
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=
+chgrpcmd=
+stripcmd=
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=
+dst=
+dir_arg=
+
+usage="Usage: $0 [OPTION]... SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 -d DIRECTORIES...
+
+In the first form, install SRCFILE to DSTFILE, removing SRCFILE by default.
+In the second, create the directory path DIR.
+
+Options:
+-b=TRANSFORMBASENAME
+-c         copy source (using $cpprog) instead of moving (using $mvprog).
+-d         create directories instead of installing files.
+-g GROUP   $chgrp installed files to GROUP.
+-m MODE    $chmod installed files to MODE.
+-o USER    $chown installed files to USER.
+-s         strip installed files (using $stripprog).
+-t=TRANSFORM
+--help     display this help and exit.
+--version  display version info and exit.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
+
+while test -n "$1"; do
+  case $1 in
+    -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+        shift
+        continue;;
+
+    -c) instcmd=$cpprog
+        shift
+        continue;;
+
+    -d) dir_arg=true
+        shift
+        continue;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift
+        shift
+        continue;;
+
+    --help) echo "$usage"; exit 0;;
+
+    -m) chmodcmd="$chmodprog $2"
+        shift
+        shift
+        continue;;
+
+    -o) chowncmd="$chownprog $2"
+        shift
+        shift
+        continue;;
+
+    -s) stripcmd=$stripprog
+        shift
+        continue;;
+
+    -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+        shift
+        continue;;
+
+    --version) echo "$0 $scriptversion"; exit 0;;
+
+    *)  # When -d is used, all remaining arguments are directories to create.
+       test -n "$dir_arg" && break
+        # Otherwise, the last argument is the destination.  Remove it from $@.
+       for arg
+       do
+          if test -n "$dstarg"; then
+           # $@ is not empty: it contains at least $arg.
+           set fnord "$@" "$dstarg"
+           shift # fnord
+         fi
+         shift # arg
+         dstarg=$arg
+       done
+       break;;
+  esac
+done
+
+if test -z "$1"; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call `install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+for src
+do
+  # Protect names starting with `-'.
+  case $src in
+    -*) src=./$src ;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    src=
+
+    if test -d "$dst"; then
+      instcmd=:
+      chmodcmd=
+    else
+      instcmd=$mkdirprog
+    fi
+  else
+    # Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dstarg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+
+    dst=$dstarg
+    # Protect names starting with `-'.
+    case $dst in
+      -*) dst=./$dst ;;
+    esac
+
+    # If destination is a directory, append the input filename; won't work
+    # if double slashes aren't ignored.
+    if test -d "$dst"; then
+      dst=$dst/`basename "$src"`
+    fi
+  fi
+
+  # This sed command emulates the dirname command.
+  dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+  # Make sure that the destination directory exists.
+
+  # Skip lots of stat calls in the usual case.
+  if test ! -d "$dstdir"; then
+    defaultIFS='
+        '
+    IFS="${IFS-$defaultIFS}"
+
+    oIFS=$IFS
+    # Some sh's can't handle IFS=/ for some reason.
+    IFS='%'
+    set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+    IFS=$oIFS
+
+    pathcomp=
+
+    while test $# -ne 0 ; do
+      pathcomp=$pathcomp$1
+      shift
+      if test ! -d "$pathcomp"; then
+        $mkdirprog "$pathcomp" || lasterr=$?
+       # mkdir can fail with a `File exist' error in case several
+       # install-sh are creating the directory concurrently.  This
+       # is OK.
+       test ! -d "$pathcomp" && { (exit ${lasterr-1}); exit; }
+      fi
+      pathcomp=$pathcomp/
+    done
+  fi
+
+  if test -n "$dir_arg"; then
+    $doit $instcmd "$dst" \
+      && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
+
+  else
+    # If we're going to rename the final executable, determine the name now.
+    if test -z "$transformarg"; then
+      dstfile=`basename "$dst"`
+    else
+      dstfile=`basename "$dst" $transformbasename \
+               | sed $transformarg`$transformbasename
+    fi
+
+    # don't allow the sed command to completely eliminate the filename.
+    test -z "$dstfile" && dstfile=`basename "$dst"`
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=$dstdir/_inst.$$_
+    rmtmp=$dstdir/_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
+    trap '(exit $?); exit' 1 2 13 15
+
+    # Move or copy the file name to the temp name
+    $doit $instcmd "$src" "$dsttmp" &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $instcmd $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
+
+    # Now rename the file to the real destination.
+    { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
+      || {
+          # The rename failed, perhaps because mv can't rename something else
+          # to itself, or perhaps because mv is so ancient that it does not
+          # support -f.
+
+          # Now remove or move aside any old file at destination location.
+          # We try this two ways since rm can't unlink itself on some
+          # systems and the destination file might be busy for other
+          # reasons.  In this case, the final cleanup might fail but the new
+          # file should still install successfully.
+          {
+            if test -f "$dstdir/$dstfile"; then
+              $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+              || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+              || {
+                echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+                (exit 1); exit
+              }
+            else
+              :
+            fi
+          } &&
+
+          # Now rename the file to the real destination.
+          $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+        }
+    }
+  fi || { (exit 1); exit; }
+done
+
+# The final little trick to "correctly" pass the exit status to the exit trap.
+{
+  (exit 0); exit
+}
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/invalid.c b/invalid.c

new file mode 100644 (file)

index 0000000..24c6f13
--- /dev/null
+++ b/invalid.c
@@ -0,0 +1,72 @@
+/* __gmp_invalid_operation -- invalid floating point operation.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <signal.h>
+#include <stdlib.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for getpid */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Incidentally, kill is not available on mingw, but that's ok, it has raise
+   and we'll be using that.  */
+#if ! HAVE_RAISE
+#define raise(sig)   kill (getpid(), sig)
+#endif
+
+
+/* __gmp_invalid_operation is for an invalid floating point operation, like
+   mpz_set_d on a NaN or Inf.  It's done as a subroutine to minimize code in
+   places raising an exception.
+
+   feraiseexcept(FE_INVALID) is not used here, since unfortunately on most
+   systems it would require libm.
+
+   Alternatives:
+
+   It might be possible to check whether a hardware "invalid operation" trap
+   is enabled or not before raising a signal.  This would require all
+   callers to be prepared to continue with some bogus result.  Bogus returns
+   are bad, but presumably an application disabling the trap is prepared for
+   that.
+
+   On some systems (eg. BSD) the signal handler can find out the reason for
+   a SIGFPE (overflow, invalid, div-by-zero, etc).  Perhaps we could get
+   that into our raise too.
+
+   i386 GLIBC implements feraiseexcept(FE_INVALID) with an asm fdiv 0/0.
+   That would both respect the exceptions mask and give a reason code in a
+   BSD signal.  */
+
+void
+__gmp_invalid_operation (void)
+{
+  raise (SIGFPE);
+  abort ();
+}
diff --git a/libmp.sym b/libmp.sym

new file mode 100644 (file)

index 0000000..f36d5c0
--- /dev/null
+++ b/libmp.sym
@@ -0,0 +1,18 @@
+itom
+xtom
+move
+madd
+msub
+mult
+mdiv
+sdiv
+msqrt
+pow
+rpow
+gcd
+mcmp
+min
+mout
+mtox
+mfree
+__gmp_set_memory_functions
diff --git a/longlong.h b/longlong.h

new file mode 100644 (file)

index 0000000..8cac79d
--- /dev/null
+++ b/longlong.h
@@ -0,0 +1,2093 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
+2004, 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it under the
+terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this file.  If not, see http://www.gnu.org/licenses/.  */
+
+/* You have to define the following before including this file:
+
+   UWtype -- An unsigned type, default type for operations (typically a "word")
+   UHWtype -- An unsigned type, at least half the size of UWtype
+   UDWtype -- An unsigned type, at least twice as large a UWtype
+   W_TYPE_SIZE -- size in bits of UWtype
+
+   SItype, USItype -- Signed and unsigned 32 bit types
+   DItype, UDItype -- Signed and unsigned 64 bit types
+
+   On a 32 bit machine UWtype should typically be USItype;
+   on a 64 bit machine, UWtype should typically be UDItype.
+
+   Optionally, define:
+
+   LONGLONG_STANDALONE -- Avoid code that needs machine-dependent support files
+   NO_ASM -- Disable inline asm
+
+
+   CAUTION!  Using this version of longlong.h outside of GMP is not safe.  You
+   need to include gmp.h and gmp-impl.h, or certain things might not work as
+   expected.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+   that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+/* Define auxiliary asm macros.
+
+   1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
+   UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
+   word product in HIGH_PROD and LOW_PROD.
+
+   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+   UDWtype product.  This is just a variant of umul_ppmm.
+
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+   is rounded towards 0.
+
+   5) count_leading_zeros(count, x) counts the number of zero-bits from the
+   msb to the first non-zero bit in the UWtype X.  This is the number of
+   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+   from the least significant end.
+
+   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two UWtype integers, composed by
+   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+   (i.e. carry out) is not stored anywhere, and is lost.
+
+   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.
+
+
+   Notes:
+
+   For add_ssaaaa the two high and two low addends can both commute, but
+   unfortunately gcc only supports one "%" commutative in each asm block.
+   This has always been so but is only documented in recent versions
+   (eg. pre-release 3.3).  Having two or more "%"s can cause an internal
+   compiler error in certain rare circumstances.
+
+   Apparently it was only the last "%" that was ever actually respected, so
+   the code has been updated to leave just that.  Clearly there's a free
+   choice whether high or low should get it, if there's a reason to favour
+   one over the other.  Also obviously when the constraints on the two
+   operands are identical there's no benefit to the reloader in any "%" at
+   all.
+
+   */
+
+/* The CPUs come in alphabetical order below.
+
+   Please add support for more CPUs here, or improve the current support
+   for the CPUs below!  */
+
+
+/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc
+   3.4 __builtin_clzl or __builtin_clzll, according to our limb size.
+   Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or
+   __builtin_ctzll.
+
+   These builtins are only used when we check what code comes out, on some
+   chips they're merely libgcc calls, where we will instead want an inline
+   in that case (either asm or generic C).
+
+   These builtins are better than an asm block of the same insn, since an
+   asm block doesn't give gcc any information about scheduling or resource
+   usage.  We keep an asm block for use on prior versions of gcc though.
+
+   For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but
+   it's not used (for count_leading_zeros) because it generally gives extra
+   code to ensure the result is 0 when the input is 0, which we don't need
+   or want.  */
+
+#ifdef _LONG_LONG_LIMB
+#define count_leading_zeros_gcc_clz(count,x)    \
+  do {                                          \
+    ASSERT ((x) != 0);                          \
+    (count) = __builtin_clzll (x);              \
+  } while (0)
+#else
+#define count_leading_zeros_gcc_clz(count,x)    \
+  do {                                          \
+    ASSERT ((x) != 0);                          \
+    (count) = __builtin_clzl (x);               \
+  } while (0)
+#endif
+
+#ifdef _LONG_LONG_LIMB
+#define count_trailing_zeros_gcc_ctz(count,x)   \
+  do {                                          \
+    ASSERT ((x) != 0);                          \
+    (count) = __builtin_ctzll (x);              \
+  } while (0)
+#else
+#define count_trailing_zeros_gcc_ctz(count,x)   \
+  do {                                          \
+    ASSERT ((x) != 0);                          \
+    (count) = __builtin_ctzl (x);               \
+  } while (0)
+#endif
+
+
+/* FIXME: The macros using external routines like __MPN(count_leading_zeros)
+   don't need to be under !NO_ASM */
+#if ! defined (NO_ASM)
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+/* Most alpha-based machines, except Cray systems. */
+#if defined (__GNUC__)
+#if __GMP_GNUC_PREREQ (3,3)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    (ph) = __builtin_alpha_umulh (__m0, __m1);                         \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#else
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    __asm__ ("umulh %r1,%2,%0"                                         \
+            : "=r" (ph)                                                \
+            : "%rJ" (m0), "rI" (m1));                                  \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#endif
+#define UMUL_TIME 18
+#else /* ! __GNUC__ */
+#include <machine/builtins.h>
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    (ph) = __UMULH (m0, m1);                                           \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;                                                    \
+    __di = __MPN(invert_limb) (d);                                     \
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);                         \
+  } while (0)
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+
+/* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm
+   always goes into libgmp.so, even when not actually used.  */
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+
+#if defined (__GNUC__) && HAVE_HOST_CPU_alpha_CIX
+#define count_leading_zeros(COUNT,X) \
+  __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
+#define count_trailing_zeros(COUNT,X) \
+  __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
+#endif /* clz/ctz using cix */
+
+#if ! defined (count_leading_zeros)                             \
+  && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)
+/* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0.
+   "$31" is written explicitly in the asm, since an "r" constraint won't
+   select reg 31.  There seems no need to worry about "r31" syntax for cray,
+   since gcc itself (pre-release 3.4) emits just $31 in various places.  */
+#define ALPHA_CMPBGE_0(dst, src)                                        \
+  do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0)
+/* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts
+   them, locating the highest non-zero byte.  A second __clz_tab lookup
+   counts the leading zero bits in that byte, giving the result.  */
+#define count_leading_zeros(count, x)                                   \
+  do {                                                                  \
+    UWtype  __clz__b, __clz__c, __clz__x = (x);                         \
+    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);           /* zero bytes */    \
+    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */   \
+    __clz__b = __clz__b * 8 - 7;                    /* 57 to 1 shift */ \
+    __clz__x >>= __clz__b;                                              \
+    __clz__c = __clz_tab [__clz__x];                /* 8 to 1 bit */    \
+    __clz__b = 65 - __clz__b;                                           \
+    (count) = __clz__b - __clz__c;                                      \
+  } while (0)
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif /* clz using cmpbge */
+
+#if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE)
+#if HAVE_ATTRIBUTE_CONST
+long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const));
+#else
+long __MPN(count_leading_zeros) _PROTO ((UDItype));
+#endif
+#define count_leading_zeros(count, x) \
+  ((count) = __MPN(count_leading_zeros) (x))
+#endif /* clz using mpn */
+#endif /* __alpha */
+
+#if defined (_CRAY) && W_TYPE_SIZE == 64
+#include <intrinsics.h>
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#define UDIV_TIME 220
+long __MPN(count_leading_zeros) _PROTO ((UDItype));
+#define count_leading_zeros(count, x) \
+  ((count) = _leadz ((UWtype) (x)))
+#if defined (_CRAYIEEE)                /* I.e., Cray T90/ieee, T3D, and T3E */
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    (ph) = _int_mult_upper (m0, m1);                                   \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;                                                    \
+    __di = __MPN(invert_limb) (d);                                     \
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);                         \
+  } while (0)
+#endif /* LONGLONG_STANDALONE */
+#endif /* _CRAYIEEE */
+#endif /* _CRAY */
+
+#if defined (__ia64) && W_TYPE_SIZE == 64
+/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
+   "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
+   code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
+   register, which takes an extra cycle.  */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)      \
+  do {                                          \
+    UWtype __x;                                 \
+    __x = (al) - (bl);                          \
+    if ((al) < (bl))                            \
+      (sh) = (ah) - (bh) - 1;                   \
+    else                                        \
+      (sh) = (ah) - (bh);                       \
+    (sl) = __x;                                 \
+  } while (0)
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
+/* Do both product parts in assembly, since that gives better code with
+   all gcc versions.  Some callers will just use the upper part, and in
+   that situation we waste an instruction, but not any cycles.  */
+#define umul_ppmm(ph, pl, m0, m1) \
+    __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"         \
+            : "=&f" (ph), "=f" (pl)                                    \
+            : "f" (m0), "f" (m1))
+#define UMUL_TIME 14
+#define count_leading_zeros(count, x) \
+  do {                                                                 \
+    UWtype _x = (x), _y, _a, _c;                                       \
+    __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));             \
+    __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));           \
+    _c = (_a - 1) << 3;                                                        \
+    _x >>= _c;                                                         \
+    if (_x >= 1 << 4)                                                  \
+      _x >>= 4, _c += 4;                                               \
+    if (_x >= 1 << 2)                                                  \
+      _x >>= 2, _c += 2;                                               \
+    _c += _x >> 1;                                                     \
+    (count) =  W_TYPE_SIZE - 1 - _c;                                   \
+  } while (0)
+/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
+   based, and we don't need a special case for x==0 here */
+#define count_trailing_zeros(count, x)                                 \
+  do {                                                                 \
+    UWtype __ctz_x = (x);                                              \
+    __asm__ ("popcnt %0 = %1"                                          \
+            : "=r" (count)                                             \
+            : "r" ((__ctz_x-1) & ~__ctz_x));                           \
+  } while (0)
+#endif
+#if defined (__INTEL_COMPILER)
+#include <ia64intrin.h>
+#define umul_ppmm(ph, pl, m0, m1)                                      \
+  do {                                                                 \
+    UWtype _m0 = (m0), _m1 = (m1);                                     \
+    ph = _m64_xmahu (_m0, _m1, 0);                                     \
+    pl = _m0 * _m1;                                                    \
+  } while (0)
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;                                                    \
+    __di = __MPN(invert_limb) (d);                                     \
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);                         \
+  } while (0)
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#endif
+#define UDIV_TIME 220
+#endif
+
+
+#if defined (__GNUC__)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+   understood by gcc1.  Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3"                             \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3"                             \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {                                                                 \
+    USItype __m0 = (m0), __m1 = (m1);                                  \
+    __asm__ ("multiplu %0,%1,%2"                                       \
+            : "=r" (xl)                                                \
+            : "r" (__m0), "r" (__m1));                                 \
+    __asm__ ("multmu %0,%1,%2"                                         \
+            : "=r" (xh)                                                \
+            : "r" (__m0), "r" (__m1));                                 \
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("dividu %0,%3,%4"                                           \
+          : "=r" (q), "=q" (r)                                         \
+          : "1" (n1), "r" (n0), "r" (d))
+#define count_leading_zeros(count, x) \
+    __asm__ ("clz %0,%1"                                               \
+            : "=r" (count)                                             \
+            : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined (__arc__)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3"                      \
+          : "=r" (sh),                                                 \
+            "=&r" (sl)                                                 \
+          : "r"  ((USItype) (ah)),                                     \
+            "rIJ" ((USItype) (bh)),                                    \
+            "%r" ((USItype) (al)),                                     \
+            "rIJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3"                      \
+          : "=r" (sh),                                                 \
+            "=&r" (sl)                                                 \
+          : "r" ((USItype) (ah)),                                      \
+            "rIJ" ((USItype) (bh)),                                    \
+            "r" ((USItype) (al)),                                      \
+            "rIJ" ((USItype) (bl)))
+#endif
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"                       \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    if (__builtin_constant_p (al))                                     \
+      {                                                                        \
+       if (__builtin_constant_p (ah))                                  \
+         __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+       else                                                            \
+         __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+      }                                                                        \
+    else if (__builtin_constant_p (ah))                                        \
+      {                                                                        \
+       if (__builtin_constant_p (bl))                                  \
+         __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+       else                                                            \
+         __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+      }                                                                        \
+    else if (__builtin_constant_p (bl))                                        \
+      {                                                                        \
+       if (__builtin_constant_p (bh))                                  \
+         __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+       else                                                            \
+         __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+      }                                                                        \
+    else /* only bh might be a constant */                             \
+      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"                   \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
+    } while (0)
+#if 1 || defined (__arm_m__)   /* `M' series has widening multiply support */
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#define UMUL_TIME 5
+#define smul_ppmm(xh, xl, a, b) \
+  __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __di;                                                    \
+    __di = __MPN(invert_limb) (d);                                     \
+    udiv_qrnnd_preinv (q, r, n1, n0, d, __di);                         \
+  } while (0)
+#define UDIV_PREINV_ALWAYS  1
+#define UDIV_NEEDS_NORMALIZATION 1
+#define UDIV_TIME 70
+#endif /* LONGLONG_STANDALONE */
+#else
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("%@ Inlined umul_ppmm\n"                                    \
+"      mov     %|r0, %2, lsr #16\n"                                    \
+"      mov     %|r2, %3, lsr #16\n"                                    \
+"      bic     %|r1, %2, %|r0, lsl #16\n"                              \
+"      bic     %|r2, %3, %|r2, lsl #16\n"                              \
+"      mul     %1, %|r1, %|r2\n"                                       \
+"      mul     %|r2, %|r0, %|r2\n"                                     \
+"      mul     %|r1, %0, %|r1\n"                                       \
+"      mul     %0, %|r0, %0\n"                                         \
+"      adds    %|r1, %|r2, %|r1\n"                                     \
+"      addcs   %0, %0, #65536\n"                                       \
+"      adds    %1, %1, %|r1, lsl #16\n"                                \
+"      adc     %0, %0, %|r1, lsr #16"                                  \
+          : "=&r" (xh), "=r" (xl)                                      \
+          : "r" (a), "r" (b)                                           \
+          : "r0", "r1", "r2")
+#define UMUL_TIME 20
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __r;                                                     \
+    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));                   \
+    (r) = __r;                                                         \
+  } while (0)
+extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+#define UDIV_TIME 200
+#endif /* LONGLONG_STANDALONE */
+#endif
+#if defined (__ARM_ARCH_5__)
+/* This actually requires arm 5 */
+#define count_leading_zeros(count, x) \
+  __asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* __arm__ */
+
+#if defined (__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __x;                                                        \
+  __asm__ ("mulwux %2,%0"                                              \
+          : "=r" (__x.__ll)                                            \
+          : "%0" ((USItype)(u)), "r" ((USItype)(v)));                  \
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define smul_ppmm(w1, w0, u, v) \
+  ({union {DItype __ll;                                                        \
+          struct {SItype __l, __h;} __i;                               \
+         } __x;                                                        \
+  __asm__ ("mulwx %2,%0"                                               \
+          : "=r" (__x.__ll)                                            \
+          : "%0" ((SItype)(u)), "r" ((SItype)(v)));                    \
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;                                                       \
+    __asm__ ("mulwux %2,%0"                                            \
+            : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v)));   \
+    __w; })
+#endif /* __clipper__ */
+
+/* Fujitsu vector computers.  */
+#if defined (__uxp__) && W_TYPE_SIZE == 32
+#define umul_ppmm(ph, pl, u, v) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("mult.lu %1,%2,%0"        : "=r" (__x.__ll) : "%r" (u), "rK" (v));\
+    (ph) = __x.__i.__h;                                                        \
+    (pl) = __x.__i.__l;                                                        \
+  } while (0)
+#define smul_ppmm(ph, pl, u, v) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));        \
+    (ph) = __x.__i.__h;                                                        \
+    (pl) = __x.__i.__l;                                                        \
+  } while (0)
+#endif
+
+#if defined (__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.w %5,%1\n\taddx %3,%0"                                 \
+          : "=g" (sh), "=&g" (sl)                                      \
+          : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),                 \
+            "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.w %5,%1\n\tsubx %3,%0"                                 \
+          : "=g" (sh), "=&g" (sl)                                      \
+          : "0" ((USItype)(ah)), "g" ((USItype)(bh)),                  \
+            "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+  __asm__ ("mulx %3,%0,%1"                                             \
+          : "=g" (ph), "=r" (pl)                                       \
+          : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  __asm__ ("divx %4,%0,%1"                                             \
+          : "=g" (q), "=r" (r)                                         \
+          : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+  __asm__ ("bsch/1 %1,%0"                                              \
+          : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
+#endif
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%I5 %5,%r4,%1\n\taddc %r2,%r3,%0"                       \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%I4 %4,%r5,%1\n\tsubb %r2,%r3,%0"                       \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v));        \
+    (wh) = __x.__i.__h;                                                        \
+    (wl) = __x.__i.__l;                                                        \
+  } while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#define count_leading_zeros(count, x) \
+  do {                                                                 \
+    USItype __tmp;                                                     \
+    __asm__ (                                                          \
+       "ldi            1,%0\n"                                         \
+"      extru,=         %1,15,16,%%r0   ; Bits 31..16 zero?\n"          \
+"      extru,tr        %1,15,16,%1     ; No.  Shift down, skip add.\n" \
+"      ldo             16(%0),%0       ; Yes.  Perform add.\n"         \
+"      extru,=         %1,23,8,%%r0    ; Bits 15..8 zero?\n"           \
+"      extru,tr        %1,23,8,%1      ; No.  Shift down, skip add.\n" \
+"      ldo             8(%0),%0        ; Yes.  Perform add.\n"         \
+"      extru,=         %1,27,4,%%r0    ; Bits 7..4 zero?\n"            \
+"      extru,tr        %1,27,4,%1      ; No.  Shift down, skip add.\n" \
+"      ldo             4(%0),%0        ; Yes.  Perform add.\n"         \
+"      extru,=         %1,29,2,%%r0    ; Bits 3..2 zero?\n"            \
+"      extru,tr        %1,29,2,%1      ; No.  Shift down, skip add.\n" \
+"      ldo             2(%0),%0        ; Yes.  Perform add.\n"         \
+"      extru           %1,30,1,%1      ; Extract bit 1.\n"             \
+"      sub             %0,%1,%0        ; Subtract it.\n"               \
+       : "=r" (count), "=r" (__tmp) : "1" (x));                        \
+  } while (0)
+#endif /* hppa */
+
+/* These macros are for ABI=2.0w.  In ABI=2.0n they can't be used, since GCC
+   (3.2) puts longlong into two adjacent 32-bit registers.  Presumably this
+   is just a case of no direct support for 2.0n but treating it like 1.0. */
+#if defined (__hppa) && W_TYPE_SIZE == 64 && ! defined (_LONG_LONG_LIMB)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%I5 %5,%r4,%1\n\tadd,dc %r2,%r3,%0"                     \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%I4 %4,%r5,%1\n\tsub,db %r2,%r3,%0"                     \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl))
+#endif /* hppa */
+
+#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#if defined (__zarch__) || defined (HAVE_HOST_CPU_s390_zarch)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)                             \
+  do {                                                                 \
+/*  if (__builtin_constant_p (bl))                                     \
+      __asm__ ("alfi\t%1,%o5\n\talcr\t%0,%3"                           \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "0"  (ah), "r" (bh), "%1" (al), "n" (bl) __CLOBBER_CC);\
+    else                                                               \
+*/    __asm__ ("alr\t%1,%5\n\talcr\t%0,%3"                             \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "0"  (ah), "r" (bh), "%1" (al), "r" (bl)__CLOBBER_CC); \
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)                             \
+  do {                                                                 \
+/*  if (__builtin_constant_p (bl))                                     \
+      __asm__ ("slfi\t%1,%o5\n\tslbr\t%0,%3"                           \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "0" (ah), "r" (bh), "1" (al), "n" (bl) __CLOBBER_CC);  \
+    else                                                               \
+*/    __asm__ ("slr\t%1,%5\n\tslbr\t%0,%3"                             \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "0" (ah), "r" (bh), "1" (al), "r" (bl) __CLOBBER_CC);  \
+  } while (0)
+#if __GMP_GNUC_PREREQ (4,5)
+#define umul_ppmm(xh, xl, m0, m1)                                      \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __x.__ll = (UDItype) (m0) * (UDItype) (m1);                                \
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                            \
+  } while (0)
+#else
+#if 0
+/* FIXME: this fails if gcc knows about the 64-bit registers.  Use only
+   with a new enough processor pretending we have 32-bit registers.  */
+#define umul_ppmm(xh, xl, m0, m1)                                      \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("mlr\t%0,%2"                                              \
+            : "=r" (__x.__ll)                                          \
+            : "%0" (m0), "r" (m1));                                    \
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                            \
+  } while (0)
+#else
+#define umul_ppmm(xh, xl, m0, m1)                                      \
+  do {                                                                 \
+  /* When we have 64-bit regs and gcc is aware of that, we cannot simply use
+     DImode for the product, since that would be allocated to a single 64-bit
+     register, whereas mlr uses the low 32-bits of an even-odd register pair.
+  */                                                                   \
+    register USItype __r0 __asm__ ("0");                               \
+    register USItype __r1 __asm__ ("1") = (m0);                                \
+    __asm__ ("mlr\t%0,%3"                                              \
+            : "=r" (__r0), "=r" (__r1)                                 \
+            : "r" (__r1), "r" (m1));                                   \
+    (xh) = __r0; (xl) = __r1;                                          \
+  } while (0)
+#endif /* if 0 */
+#endif
+#if 0
+/* FIXME: this fails if gcc knows about the 64-bit registers.  Use only
+   with a new enough processor pretending we have 32-bit registers.  */
+#define udiv_qrnnd(q, r, n1, n0, d)                                    \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __x.__i.__h = n1; __x.__i.__l = n0;                                        \
+    __asm__ ("dlr\t%0,%2"                                              \
+            : "=r" (__x.__ll)                                          \
+            : "0" (__x.__ll), "r" (d));                                \
+    (q) = __x.__i.__l; (r) = __x.__i.__h;                              \
+  } while (0)
+#else
+#define udiv_qrnnd(q, r, n1, n0, d)                                    \
+  do {                                                                 \
+    register USItype __r0 __asm__ ("0") = (n1);                                \
+    register USItype __r1 __asm__ ("1") = (n0);                                \
+    __asm__ ("dlr\t%0,%4"                                              \
+            : "=r" (__r0), "=r" (__r1)                                 \
+            : "r" (__r0), "r" (__r1), "r" (d));                        \
+    (q) = __r1; (r) = __r0;                                            \
+  } while (0)
+#endif /* if 0 */
+#else /* if __zarch__ */
+/* FIXME: this fails if gcc knows about the 64-bit registers.  */
+#define smul_ppmm(xh, xl, m0, m1)                                      \
+  do {                                                                 \
+    union {DItype __ll;                                                        \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("mr\t%0,%2"                                               \
+            : "=r" (__x.__ll)                                          \
+            : "%0" (m0), "r" (m1));                                    \
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                            \
+  } while (0)
+/* FIXME: this fails if gcc knows about the 64-bit registers.  */
+#define sdiv_qrnnd(q, r, n1, n0, d)                                    \
+  do {                                                                 \
+    union {DItype __ll;                                                        \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __x.__i.__h = n1; __x.__i.__l = n0;                                        \
+    __asm__ ("dr\t%0,%2"                                               \
+            : "=r" (__x.__ll)                                          \
+            : "0" (__x.__ll), "r" (d));                                \
+    (q) = __x.__i.__l; (r) = __x.__i.__h;                              \
+  } while (0)
+#endif /* if __zarch__ */
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+/* We need to cast operands with register constraints, otherwise their types
+   will be assumed to be SImode by gcc.  For these machines, such operations
+   will insert a value into the low 32 bits, and leave the high 32 bits with
+   garbage.  */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)                             \
+  do {                                                                 \
+    __asm__ ("algr\t%1,%5\n\talcgr\t%0,%3"                             \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "0"  ((UDItype)(ah)), "r" ((UDItype)(bh)),             \
+                "%1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)                             \
+  do {                                                                 \
+    __asm__ ("slgr\t%1,%5\n\tslbgr\t%0,%3"                             \
+            : "=r" (sh), "=&r" (sl)                                    \
+            : "0" ((UDItype)(ah)), "r" ((UDItype)(bh)),                \
+              "1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC);  \
+  } while (0)
+#define umul_ppmm(xh, xl, m0, m1)                                      \
+  do {                                                                 \
+    union {unsigned int __attribute__ ((mode(TI))) __ll;               \
+          struct {UDItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("mlgr\t%0,%2"                                             \
+            : "=r" (__x.__ll)                                          \
+            : "%0" ((UDItype)(m0)), "r" ((UDItype)(m1)));              \
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                            \
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d)                                    \
+  do {                                                                 \
+    union {unsigned int __attribute__ ((mode(TI))) __ll;               \
+          struct {UDItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __x.__i.__h = n1; __x.__i.__l = n0;                                        \
+    __asm__ ("dlgr\t%0,%2"                                             \
+            : "=r" (__x.__ll)                                          \
+            : "0" (__x.__ll), "r" ((UDItype)(d)));                     \
+    (q) = __x.__i.__l; (r) = __x.__i.__h;                              \
+  } while (0)
+#if 0 /* FIXME: Enable for z10 (?) */
+#define count_leading_zeros(cnt, x)                                    \
+  do {                                                                 \
+    union {unsigned int __attribute__ ((mode(TI))) __ll;               \
+          struct {UDItype __h, __l;} __i;                              \
+         } __clr_cnt;                                                  \
+    __asm__ ("flogr\t%0,%1"                                            \
+            : "=r" (__clr_cnt.__ll)                                    \
+            : "r" (x) __CLOBBER_CC);                                   \
+    (cnt) = __clr_cnt.__i.__h;                                         \
+  } while (0)
+#endif
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl %5,%k1\n\tadcl %3,%k0"                                        \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),                 \
+            "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl %5,%k1\n\tsbbl %3,%k0"                                        \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0" ((USItype)(ah)), "g" ((USItype)(bh)),                  \
+            "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mull %3"                                                   \
+          : "=a" (w0), "=d" (w1)                                       \
+          : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
+  __asm__ ("divl %4"                /* stringification in K&R C */     \
+          : "=a" (q), "=d" (r)                                         \
+          : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
+
+#if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium || HAVE_HOST_CPU_pentiummmx
+/* Pentium bsrl takes between 10 and 72 cycles depending where the most
+   significant 1 bit is, hence the use of the following alternatives.  bsfl
+   is slow too, between 18 and 42 depending where the least significant 1
+   bit is, so let the generic count_trailing_zeros below make use of the
+   count_leading_zeros here too.  */
+
+#if HAVE_HOST_CPU_pentiummmx && ! defined (LONGLONG_STANDALONE)
+/* The following should be a fixed 14 or 15 cycles, but possibly plus an L1
+   cache miss reading from __clz_tab.  For P55 it's favoured over the float
+   below so as to avoid mixing MMX and x87, since the penalty for switching
+   between the two is about 100 cycles.
+
+   The asm block sets __shift to -3 if the high 24 bits are clear, -2 for
+   16, -1 for 8, or 0 otherwise.  This could be written equivalently as
+   follows, but as of gcc 2.95.2 it results in conditional jumps.
+
+       __shift = -(__n < 0x1000000);
+       __shift -= (__n < 0x10000);
+       __shift -= (__n < 0x100);
+
+   The middle two sbbl and cmpl's pair, and with luck something gcc
+   generates might pair with the first cmpl and the last sbbl.  The "32+1"
+   constant could be folded into __clz_tab[], but it doesn't seem worth
+   making a different table just for that.  */
+
+#define count_leading_zeros(c,n)                                       \
+  do {                                                                 \
+    USItype  __n = (n);                                                        \
+    USItype  __shift;                                                  \
+    __asm__ ("cmpl  $0x1000000, %1\n"                                  \
+            "sbbl  %0, %0\n"                                           \
+            "cmpl  $0x10000, %1\n"                                     \
+            "sbbl  $0, %0\n"                                           \
+            "cmpl  $0x100, %1\n"                                       \
+            "sbbl  $0, %0\n"                                           \
+            : "=&r" (__shift) : "r"  (__n));                           \
+    __shift = __shift*8 + 24 + 1;                                      \
+    (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift];                        \
+  } while (0)
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_0   31   /* n==0 indistinguishable from n==1 */
+
+#else /* ! pentiummmx || LONGLONG_STANDALONE */
+/* The following should be a fixed 14 cycles or so.  Some scheduling
+   opportunities should be available between the float load/store too.  This
+   sort of code is used in gcc 3 for __builtin_ffs (with "n&-n") and is
+   apparently suggested by the Intel optimizing manual (don't know exactly
+   where).  gcc 2.95 or up will be best for this, so the "double" is
+   correctly aligned on the stack.  */
+#define count_leading_zeros(c,n)                                       \
+  do {                                                                 \
+    union {                                                            \
+      double    d;                                                     \
+      unsigned  a[2];                                                  \
+    } __u;                                                             \
+    ASSERT ((n) != 0);                                                 \
+    __u.d = (UWtype) (n);                                              \
+    (c) = 0x3FF + 31 - (__u.a[1] >> 20);                               \
+  } while (0)
+#define COUNT_LEADING_ZEROS_0   (0x3FF + 31)
+#endif /* pentiummx */
+
+#else /* ! pentium */
+
+#if __GMP_GNUC_PREREQ (3,4)  /* using bsrl */
+#define count_leading_zeros(count,x)  count_leading_zeros_gcc_clz(count,x)
+#endif /* gcc clz */
+
+/* On P6, gcc prior to 3.0 generates a partial register stall for
+   __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former
+   being 1 code byte smaller.  "31-__cbtmp" is a workaround, probably at the
+   cost of one extra instruction.  Do this for "i386" too, since that means
+   generic x86.  */
+#if ! defined (count_leading_zeros) && __GNUC__ < 3                     \
+  && (HAVE_HOST_CPU_i386                                               \
+      || HAVE_HOST_CPU_i686                                            \
+      || HAVE_HOST_CPU_pentiumpro                                      \
+      || HAVE_HOST_CPU_pentium2                                                \
+      || HAVE_HOST_CPU_pentium3)
+#define count_leading_zeros(count, x)                                  \
+  do {                                                                 \
+    USItype __cbtmp;                                                   \
+    ASSERT ((x) != 0);                                                 \
+    __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x)));     \
+    (count) = 31 - __cbtmp;                                            \
+  } while (0)
+#endif /* gcc<3 asm bsrl */
+
+#ifndef count_leading_zeros
+#define count_leading_zeros(count, x)                                  \
+  do {                                                                 \
+    USItype __cbtmp;                                                   \
+    ASSERT ((x) != 0);                                                 \
+    __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x)));     \
+    (count) = __cbtmp ^ 31;                                            \
+  } while (0)
+#endif /* asm bsrl */
+
+#if __GMP_GNUC_PREREQ (3,4)  /* using bsfl */
+#define count_trailing_zeros(count,x)  count_trailing_zeros_gcc_ctz(count,x)
+#endif /* gcc ctz */
+
+#ifndef count_trailing_zeros
+#define count_trailing_zeros(count, x)                                 \
+  do {                                                                 \
+    ASSERT ((x) != 0);                                                 \
+    __asm__ ("bsfl %1,%k0" : "=r" (count) : "rm" ((USItype)(x)));      \
+  } while (0)
+#endif /* asm bsfl */
+
+#endif /* ! pentium */
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 10
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addq %5,%q1\n\tadcq %3,%q0"                                        \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0"  ((UDItype)(ah)), "rme" ((UDItype)(bh)),               \
+            "%1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subq %5,%q1\n\tsbbq %3,%q0"                                        \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)),                \
+            "1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulq %3"                                                   \
+          : "=a" (w0), "=d" (w1)                                       \
+          : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
+  __asm__ ("divq %4"                /* stringification in K&R C */     \
+          : "=a" (q), "=d" (r)                                         \
+          : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
+/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+#define count_leading_zeros(count, x)                                  \
+  do {                                                                 \
+    UDItype __cbtmp;                                                   \
+    ASSERT ((x) != 0);                                                 \
+    __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));     \
+    (count) = __cbtmp ^ 63;                                            \
+  } while (0)
+/* bsfq destination must be a 64-bit register, "%q0" forces this in case
+   count is only an int. */
+#define count_trailing_zeros(count, x)                                 \
+  do {                                                                 \
+    ASSERT ((x) != 0);                                                 \
+    __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x)));      \
+  } while (0)
+#endif /* x86_64 */
+
+#if defined (__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r,h,l,c) \
+  __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0"                               \
+          "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0"                    \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0"                    \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __x;                                                        \
+  __asm__ ("emul %2,%1,%0"                                             \
+          : "=d" (__x.__ll) : "%dI" (u), "dI" (v));                    \
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;                                                       \
+    __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v));      \
+    __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __nn;                                                       \
+    __nn.__i.__h = (nh); __nn.__i.__l = (nl);                          \
+    __asm__ ("ediv %d,%n,%0"                                           \
+          : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d));            \
+    (r) = __rq.__i.__l; (q) = __rq.__i.__h;                            \
+  } while (0)
+#define count_leading_zeros(count, x) \
+  do {                                                                 \
+    USItype __cbtmp;                                                   \
+    __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x));              \
+    (count) = __cbtmp ^ 31;                                            \
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
+#if defined (__i960mx)         /* what is the proper symbol to test??? */
+#define rshift_rhlc(r,h,l,c) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __nn;                                                       \
+    __nn.__i.__h = (h); __nn.__i.__l = (l);                            \
+    __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+  }
+#endif /* i960mx */
+#endif /* i960 */
+
+#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
+     || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
+     || defined (__mc5307__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                             \
+          : "=d" (sh), "=&d" (sl)                                      \
+          : "0"  ((USItype)(ah)), "d" ((USItype)(bh)),                 \
+            "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                             \
+          : "=d" (sh), "=&d" (sl)                                      \
+          : "0" ((USItype)(ah)), "d" ((USItype)(bh)),                  \
+            "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
+#if defined (__mc68020__) || defined(mc68020) \
+     || defined (__mc68030__) || defined (mc68030) \
+     || defined (__mc68040__) || defined (mc68040) \
+     || defined (__mcpu32__) || defined (mcpu32) \
+     || defined (__NeXT__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulu%.l %3,%1:%0"                                          \
+          : "=d" (w0), "=d" (w1)                                       \
+          : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divu%.l %4,%1:%0"                                          \
+          : "=d" (q), "=d" (r)                                         \
+          : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divs%.l %4,%1:%0"                                          \
+          : "=d" (q), "=d" (r)                                         \
+          : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#else /* for other 68k family members use 16x16->32 multiplication */
+#define umul_ppmm(xh, xl, a, b) \
+  do { USItype __umul_tmp1, __umul_tmp2;                               \
+       __asm__ ("| Inlined umul_ppmm\n"                                \
+"      move%.l %5,%3\n"                                                \
+"      move%.l %2,%0\n"                                                \
+"      move%.w %3,%1\n"                                                \
+"      swap    %3\n"                                                   \
+"      swap    %0\n"                                                   \
+"      mulu%.w %2,%1\n"                                                \
+"      mulu%.w %3,%0\n"                                                \
+"      mulu%.w %2,%3\n"                                                \
+"      swap    %2\n"                                                   \
+"      mulu%.w %5,%2\n"                                                \
+"      add%.l  %3,%2\n"                                                \
+"      jcc     1f\n"                                                   \
+"      add%.l  %#0x10000,%0\n"                                         \
+"1:    move%.l %2,%3\n"                                                \
+"      clr%.w  %2\n"                                                   \
+"      swap    %2\n"                                                   \
+"      swap    %3\n"                                                   \
+"      clr%.w  %3\n"                                                   \
+"      add%.l  %3,%1\n"                                                \
+"      addx%.l %2,%0\n"                                                \
+"      | End inlined umul_ppmm"                                        \
+             : "=&d" (xh), "=&d" (xl),                                 \
+               "=d" (__umul_tmp1), "=&d" (__umul_tmp2)                 \
+             : "%2" ((USItype)(a)), "d" ((USItype)(b)));               \
+  } while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+/* The '020, '030, '040 and '060 have bitfield insns.
+   GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to
+   exclude bfffo on that chip (bitfield insns not available).  */
+#if (defined (__mc68020__) || defined (mc68020)    \
+     || defined (__mc68030__) || defined (mc68030) \
+     || defined (__mc68040__) || defined (mc68040) \
+     || defined (__mc68060__) || defined (mc68060) \
+     || defined (__NeXT__))                        \
+  && ! defined (__mcpu32__)
+#define count_leading_zeros(count, x) \
+  __asm__ ("bfffo %1{%b2:%b2},%0"                                      \
+          : "=d" (count)                                               \
+          : "od" ((USItype) (x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                  \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                  \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
+#define count_leading_zeros(count, x) \
+  do {                                                                 \
+    USItype __cbtmp;                                                   \
+    __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x));                  \
+    (count) = __cbtmp ^ 31;                                            \
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+    __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v));  \
+    (wh) = __x.__i.__h;                                                        \
+    (wl) = __x.__i.__l;                                                        \
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x, __q;                                                   \
+  __x.__i.__h = (n1); __x.__i.__l = (n0);                              \
+  __asm__ ("divu.d %0,%1,%2"                                           \
+          : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));                \
+  (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips) && W_TYPE_SIZE == 32
+#if __GMP_GNUC_PREREQ (4,4)
+#define umul_ppmm(w1, w0, u, v) \
+  do {                                                                 \
+    UDItype __ll = (UDItype)(u) * (v);                                 \
+    w1 = __ll >> 32;                                                   \
+    w0 = __ll;                                                         \
+  } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1"                         \
+          : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips */
+
+#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GMP_GNUC_PREREQ (4,4)
+#define umul_ppmm(w1, w0, u, v) \
+  do {                                                                 \
+    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));       \
+    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);                       \
+    w1 = __ll >> 64;                                                   \
+    w0 = __ll;                                                         \
+  } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1"                                \
+          : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips */
+
+#if defined (__mmix__) && W_TYPE_SIZE == 64
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("MULU %0,%2,%3" : "=r" (w0), "=z" (w1) : "r" (u), "r" (v))
+#endif
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __x;                                                        \
+  __asm__ ("meid %2,%0"                                                        \
+          : "=g" (__x.__ll)                                            \
+          : "%0" ((USItype)(u)), "g" ((USItype)(v)));                  \
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;                                                       \
+    __asm__ ("meid %2,%0"                                              \
+            : "=g" (__w)                                               \
+            : "%0" ((USItype)(u)), "g" ((USItype)(v)));                \
+    __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __x;                                                        \
+  __x.__i.__h = (n1); __x.__i.__l = (n0);                              \
+  __asm__ ("deid %2,%0"                                                        \
+          : "=g" (__x.__ll)                                            \
+          : "0" (__x.__ll), "g" ((USItype)(d)));                       \
+  (r) = __x.__i.__l; (q) = __x.__i.__h; })
+#define count_trailing_zeros(count,x) \
+  do {                                                                 \
+    __asm__ ("ffsd     %2,%0"                                          \
+            : "=r" (count)                                             \
+            : "0" ((USItype) 0), "r" ((USItype) (x)));                 \
+  } while (0)
+#endif /* __ns32000__ */
+
+/* In the past we had a block of various #defines tested
+       _ARCH_PPC    - AIX
+       _ARCH_PWR    - AIX
+       __powerpc__  - gcc
+       __POWERPC__  - BEOS
+       __ppc__      - Darwin
+       PPC          - old gcc, GNU/Linux, SysV
+   The plain PPC test was not good for vxWorks, since PPC is defined on all
+   CPUs there (eg. m68k too), as a constant one is expected to compare
+   CPU_FAMILY against.
+
+   At any rate, this was pretty unattractive and a bit fragile.  The use of
+   HAVE_HOST_CPU_FAMILY is designed to cut through it all and be sure of
+   getting the desired effect.
+
+   ENHANCE-ME: We should test _IBMR2 here when we add assembly support for
+   the system vendor compilers.  (Is that vendor compilers with inline asm,
+   or what?)  */
+
+#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)        \
+  && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    if (__builtin_constant_p (bh) && (bh) == 0)                                \
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                \
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else                                                               \
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
+            : "=r" (sh), "=&r" (sl)                                    \
+            : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    if (__builtin_constant_p (ah) && (ah) == 0)                                \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"      \
+              : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)                \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"      \
+              : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)                   \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"                \
+              : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"                \
+              : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else                                                               \
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"     \
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if HAVE_HOST_CPU_FAMILY_powerpc
+#if __GMP_GNUC_PREREQ (4,4)
+#define umul_ppmm(w1, w0, u, v) \
+  do {                                                                 \
+    UDItype __ll = (UDItype)(u) * (v);                                 \
+    w1 = __ll >> 32;                                                   \
+    w0 = __ll;                                                         \
+  } while (0)
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    USItype __m0 = (m0), __m1 = (m1);                                  \
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));     \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#endif
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    SItype __m0 = (m0), __m1 = (m1);                                   \
+    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+  __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#define UDIV_TIME 100
+#endif
+#endif /* 32-bit POWER architecture variants.  */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+   vendor compilers.  */
+#if HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
+#if !defined (_LONG_LONG_LIMB)
+/* _LONG_LONG_LIMB is ABI=mode32 where adde operates on 32-bit values.  So
+   use adde etc only when not _LONG_LONG_LIMB.  */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    if (__builtin_constant_p (bh) && (bh) == 0)                                \
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)                \
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else                                                               \
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
+            : "=r" (sh), "=&r" (sl)                                    \
+            : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
+  } while (0)
+/* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
+   This might seem strange, but gcc folds away the dead code late.  */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {                                                                       \
+    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {         \
+       if (__builtin_constant_p (ah) && (ah) == 0)                           \
+         __asm__ ("{ai|addic} %1,%3,%4\n\t{sfze|subfze} %0,%2"               \
+                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
+       else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)           \
+         __asm__ ("{ai|addic} %1,%3,%4\n\t{sfme|subfme} %0,%2"               \
+                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
+       else if (__builtin_constant_p (bh) && (bh) == 0)                      \
+         __asm__ ("{ai|addic} %1,%3,%4\n\t{ame|addme} %0,%2"                 \
+                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
+       else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)           \
+         __asm__ ("{ai|addic} %1,%3,%4\n\t{aze|addze} %0,%2"                 \
+                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
+       else                                                                  \
+         __asm__ ("{ai|addic} %1,%4,%5\n\t{sfe|subfe} %0,%3,%2"              \
+                  : "=r" (sh), "=&r" (sl)                                    \
+                  : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl));             \
+      } else {                                                               \
+       if (__builtin_constant_p (ah) && (ah) == 0)                           \
+         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"         \
+                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
+       else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)           \
+         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"         \
+                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
+       else if (__builtin_constant_p (bh) && (bh) == 0)                      \
+         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"           \
+                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
+       else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)           \
+         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"           \
+                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
+       else                                                                  \
+         __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"        \
+                  : "=r" (sh), "=&r" (sl)                                    \
+                  : "r" (ah), "r" (bh), "rI" (al), "r" (bl));                \
+      }                                                                              \
+  } while (0)
+#endif /* ! _LONG_LONG_LIMB */
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#if 0 && __GMP_GNUC_PREREQ (4,4) /* Disable, this results in libcalls! */
+#define umul_ppmm(w1, w0, u, v) \
+  do {                                                                 \
+    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));       \
+    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);                       \
+    w1 = __ll >> 64;                                                   \
+    w0 = __ll;                                                         \
+  } while (0)
+#endif
+#if !defined (umul_ppmm)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));     \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#endif
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    DItype __m0 = (m0), __m1 = (m1);                                   \
+    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#define SMUL_TIME 14  /* ??? */
+#define UDIV_TIME 120 /* ??? */
+#endif /* 64-bit PowerPC.  */
+
+#if defined (__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addw %5,%1\n\taddwc %3,%0"                                 \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),                 \
+            "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subw %5,%1\n\tsubwb %3,%0"                                 \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0" ((USItype)(ah)), "g" ((USItype)(bh)),                  \
+            "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;                                               \
+          struct {USItype __h, __l;} __i;                              \
+         } __x;                                                        \
+  __asm__ ("movw %1,%R0\n\tuemul %2,%0"                                        \
+          : "=&r" (__x.__ll)                                           \
+          : "g" ((USItype) (u)), "g" ((USItype)(v)));                  \
+  (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#endif /* __pyr__ */
+
+#if defined (__ibm032__) /* RT/ROMP */  && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("a %1,%5\n\tae %0,%3"                                       \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0"  ((USItype)(ah)), "r" ((USItype)(bh)),                 \
+            "%1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("s %1,%5\n\tse %0,%3"                                       \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0" ((USItype)(ah)), "r" ((USItype)(bh)),                  \
+            "1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define smul_ppmm(ph, pl, m0, m1) \
+  __asm__ (                                                            \
+       "s      r2,r2\n"                                                \
+"      mts r10,%2\n"                                                   \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      m       r2,%3\n"                                                \
+"      cas     %0,r2,r0\n"                                             \
+"      mfs     r10,%1"                                                 \
+          : "=r" (ph), "=r" (pl)                                       \
+          : "%r" ((USItype)(m0)), "r" ((USItype)(m1))                  \
+          : "r2")
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+  do {                                                                 \
+    if ((x) >= 0x10000)                                                        \
+      __asm__ ("clz    %0,%1"                                          \
+              : "=r" (count) : "r" ((USItype)(x) >> 16));              \
+    else                                                               \
+      {                                                                        \
+       __asm__ ("clz   %0,%1"                                          \
+                : "=r" (count) : "r" ((USItype)(x)));                  \
+       (count) += 16;                                                  \
+      }                                                                        \
+  } while (0)
+#endif /* RT/ROMP */
+
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0"               \
+          : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                         \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl)                 \
+          __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                         \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
+          __CLOBBER_CC)
+/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h
+   doesn't define anything to indicate that to us, it only sets __sparcv8. */
+#if defined (__sparc_v9__) || defined (__sparcv9)
+/* Perhaps we should use floating-point operations here?  */
+#if 0
+/* Triggers a bug making mpz/tests/t-gcd.c fail.
+   Perhaps we simply need explicitly zero-extend the inputs?  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" :         \
+          "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
+#else
+/* Use v8 umul until above bug is fixed.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#endif
+/* Use a plain v8 divide for v9.  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {                                                                 \
+    USItype __q;                                                       \
+    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"                    \
+            : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));               \
+    (r) = (n0) - __q * (d);                                            \
+    (q) = __q;                                                         \
+  } while (0)
+#else
+#if defined (__sparc_v8__)   /* gcc normal */                          \
+  || defined (__sparcv8)     /* gcc solaris */                         \
+  || HAVE_HOST_CPU_supersparc
+/* Don't match immediate range because, 1) it is not often useful,
+   2) the 'I' flag thinks of the range as a 13 bit signed interval,
+   while we want to match a 13 bit interval, sign extended to 32 bits,
+   but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define UMUL_TIME 5
+
+#if HAVE_HOST_CPU_supersparc
+#define UDIV_TIME 60           /* SuperSPARC timing */
+#else
+/* Don't use this on SuperSPARC because its udiv only handles 53 bit
+   dividends and will trap to the kernel for the rest. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {                                                                 \
+    USItype __q;                                                       \
+    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"                    \
+            : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));               \
+    (r) = (n0) - __q * (d);                                            \
+    (q) = __q;                                                         \
+  } while (0)
+#define UDIV_TIME 25
+#endif /* HAVE_HOST_CPU_supersparc */
+
+#else /* ! __sparc_v8__ */
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+   instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"                                    \
+"      wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
+"      tst     %%g0\n"                                                 \
+"      divscc  %3,%4,%%g1\n"                                           \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%%g1\n"                                         \
+"      divscc  %%g1,%4,%0\n"                                           \
+"      rd      %%y,%1\n"                                               \
+"      bl,a 1f\n"                                                      \
+"      add     %1,%4,%1\n"                                             \
+"1:    ! End of inline udiv_qrnnd"                                     \
+          : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d)          \
+          : "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+  __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+   undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* __sparc_v9__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm\n"                                     \
+"      wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n" \
+"      sra     %3,31,%%g2      ! Don't move this insn\n"               \
+"      and     %2,%%g2,%%g2    ! Don't move this insn\n"               \
+"      andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,%3,%%g1\n"                                         \
+"      mulscc  %%g1,0,%%g1\n"                                          \
+"      add     %%g1,%%g2,%0\n"                                         \
+"      rd      %%y,%1"                                                 \
+          : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v)                  \
+          : "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39           /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UWtype __r;                                                     \
+    (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));                   \
+    (r) = __r;                                                         \
+  } while (0)
+extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+#ifndef UDIV_TIME
+#define UDIV_TIME 140
+#endif
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ (                                                            \
+       "addcc  %r4,%5,%1\n"                                            \
+      "        addccc  %r6,%7,%%g0\n"                                          \
+      "        addc    %r2,%3,%0"                                              \
+         : "=r" (sh), "=&r" (sl)                                       \
+         : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),                \
+           "%rJ" ((al) >> 32), "rI" ((bl) >> 32)                       \
+          __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ (                                                            \
+       "subcc  %r4,%5,%1\n"                                            \
+      "        subccc  %r6,%7,%%g0\n"                                          \
+      "        subc    %r2,%3,%0"                                              \
+         : "=r" (sh), "=&r" (sl)                                       \
+         : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl),         \
+           "rJ" ((al) >> 32), "rI" ((bl) >> 32)                        \
+          __CLOBBER_CC)
+#endif
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                 \
+          : "=g" (sh), "=&g" (sl)                                      \
+          : "0"  ((USItype)(ah)), "g" ((USItype)(bh)),                 \
+            "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                 \
+          : "=g" (sh), "=&g" (sl)                                      \
+          : "0" ((USItype)(ah)), "g" ((USItype)(bh)),                  \
+            "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define smul_ppmm(xh, xl, m0, m1) \
+  do {                                                                 \
+    union {UDItype __ll;                                               \
+          struct {USItype __l, __h;} __i;                              \
+         } __x;                                                        \
+    USItype __m0 = (m0), __m1 = (m1);                                  \
+    __asm__ ("emul %1,%2,$0,%0"                                                \
+            : "=g" (__x.__ll) : "g" (__m0), "g" (__m1));               \
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                            \
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {                                                                 \
+    union {DItype __ll;                                                        \
+          struct {SItype __l, __h;} __i;                               \
+         } __x;                                                        \
+    __x.__i.__h = n1; __x.__i.__l = n0;                                        \
+    __asm__ ("ediv %3,%2,%0,%1"                                                \
+            : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d));           \
+  } while (0)
+#if 0
+/* FIXME: This instruction appears to be unimplemented on some systems (vax
+   8800 maybe). */
+#define count_trailing_zeros(count,x)                                  \
+  do {                                                                 \
+    __asm__ ("ffs 0, 31, %1, %0"                                       \
+            : "=g" (count)                                             \
+            : "g" ((USItype) (x)));                                    \
+  } while (0)
+#endif
+#endif /* __vax__ */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add        %H1,%H5\n\tadc  %H0,%H3"                                \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0"  ((unsigned int)(ah)), "r" ((unsigned int)(bh)),       \
+            "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub        %H1,%H5\n\tsbc  %H0,%H3"                                \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)),        \
+            "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {                                                                 \
+    union {long int __ll;                                              \
+          struct {unsigned int __h, __l;} __i;                         \
+         } __x;                                                        \
+    unsigned int __m0 = (m0), __m1 = (m1);                             \
+    __asm__ ("mult     %S0,%H3"                                        \
+            : "=r" (__x.__i.__h), "=r" (__x.__i.__l)                   \
+            : "%1" (m0), "rQR" (m1));                                  \
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;                            \
+    (xh) += ((((signed int) __m0 >> 15) & __m1)                                \
+            + (((signed int) __m1 >> 15) & __m0));                     \
+  } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+#endif /* NO_ASM */
+
+
+#if !defined (umul_ppmm) && defined (__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+  {                                                                    \
+    UDWtype __ll = __umulsidi3 (m0, m1);                               \
+    ph = (UWtype) (__ll >> W_TYPE_SIZE);                               \
+    pl = (UWtype) __ll;                                                        \
+  }
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+  ({UWtype __hi, __lo;                                                 \
+    umul_ppmm (__hi, __lo, u, v);                                      \
+    ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+
+/* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist.  The "_r"
+   forms have "reversed" arguments, meaning the pointer is last, which
+   sometimes allows better parameter passing, in particular on 64-bit
+   hppa. */
+
+#define mpn_umul_ppmm  __MPN(umul_ppmm)
+extern UWtype mpn_umul_ppmm _PROTO ((UWtype *, UWtype, UWtype));
+
+#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm  \
+  && ! defined (LONGLONG_STANDALONE)
+#define umul_ppmm(wh, wl, u, v)                                                      \
+  do {                                                                       \
+    UWtype __umul_ppmm__p0;                                                  \
+    (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));      \
+    (wl) = __umul_ppmm__p0;                                                  \
+  } while (0)
+#endif
+
+#define mpn_umul_ppmm_r  __MPN(umul_ppmm_r)
+extern UWtype mpn_umul_ppmm_r _PROTO ((UWtype, UWtype, UWtype *));
+
+#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r       \
+  && ! defined (LONGLONG_STANDALONE)
+#define umul_ppmm(wh, wl, u, v)                                                      \
+  do {                                                                       \
+    UWtype __umul_ppmm__p0;                                                  \
+    (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_ppmm__p0);    \
+    (wl) = __umul_ppmm__p0;                                                  \
+  } while (0)
+#endif
+
+#define mpn_udiv_qrnnd  __MPN(udiv_qrnnd)
+extern UWtype mpn_udiv_qrnnd _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+
+#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd       \
+  && ! defined (LONGLONG_STANDALONE)
+#define udiv_qrnnd(q, r, n1, n0, d)                                    \
+  do {                                                                 \
+    UWtype __udiv_qrnnd__r;                                            \
+    (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r,                            \
+                         (UWtype) (n1), (UWtype) (n0), (UWtype) d);    \
+    (r) = __udiv_qrnnd__r;                                             \
+  } while (0)
+#endif
+
+#define mpn_udiv_qrnnd_r  __MPN(udiv_qrnnd_r)
+extern UWtype mpn_udiv_qrnnd_r _PROTO ((UWtype, UWtype, UWtype, UWtype *));
+
+#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r     \
+  && ! defined (LONGLONG_STANDALONE)
+#define udiv_qrnnd(q, r, n1, n0, d)                                    \
+  do {                                                                 \
+    UWtype __udiv_qrnnd__r;                                            \
+    (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d,  \
+                           &__udiv_qrnnd__r);                          \
+    (r) = __udiv_qrnnd__r;                                             \
+  } while (0)
+#endif
+
+
+/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    UWtype __x;                                                                \
+    __x = (al) + (bl);                                                 \
+    (sh) = (ah) + (bh) + (__x < (al));                                 \
+    (sl) = __x;                                                                \
+  } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    UWtype __x;                                                                \
+    __x = (al) - (bl);                                                 \
+    (sh) = (ah) - (bh) - ((al) < (bl));                                 \
+    (sl) = __x;                                                                \
+  } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+   smul_ppmm.  */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v)                                                \
+  do {                                                                 \
+    UWtype __w1;                                                       \
+    UWtype __xm0 = (u), __xm1 = (v);                                   \
+    smul_ppmm (__w1, w0, __xm0, __xm1);                                        \
+    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)              \
+               + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
+  } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C.
+
+   For reference, when this code is used for squaring (ie. u and v identical
+   expressions), gcc recognises __x1 and __x2 are the same and generates 3
+   multiplies, not 4.  The subsequent additions could be optimized a bit,
+   but the only place GMP currently uses such a square is mpn_sqr_basecase,
+   and chips obliged to use this generic C umul will have plenty of worse
+   performance problems than a couple of extra instructions on the diagonal
+   of sqr_basecase.  */
+
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)                                                \
+  do {                                                                 \
+    UWtype __x0, __x1, __x2, __x3;                                     \
+    UHWtype __ul, __vl, __uh, __vh;                                    \
+    UWtype __u = (u), __v = (v);                                       \
+                                                                       \
+    __ul = __ll_lowpart (__u);                                         \
+    __uh = __ll_highpart (__u);                                                \
+    __vl = __ll_lowpart (__v);                                         \
+    __vh = __ll_highpart (__v);                                                \
+                                                                       \
+    __x0 = (UWtype) __ul * __vl;                                       \
+    __x1 = (UWtype) __ul * __vh;                                       \
+    __x2 = (UWtype) __uh * __vl;                                       \
+    __x3 = (UWtype) __uh * __vh;                                       \
+                                                                       \
+    __x1 += __ll_highpart (__x0);/* this can't give carry */           \
+    __x1 += __x2;              /* but this indeed can */               \
+    if (__x1 < __x2)           /* did we get it? */                    \
+      __x3 += __ll_B;          /* yes, add it in the proper pos. */    \
+                                                                       \
+    (w1) = __x3 + __ll_highpart (__x1);                                        \
+    (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0);              \
+  } while (0)
+#endif
+
+/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
+   exist in one form or another.  */
+#if !defined (smul_ppmm)
+#define smul_ppmm(w1, w0, u, v)                                                \
+  do {                                                                 \
+    UWtype __w1;                                                       \
+    UWtype __xm0 = (u), __xm1 = (v);                                   \
+    umul_ppmm (__w1, w0, __xm0, __xm1);                                        \
+    (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)              \
+               - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
+  } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {                                                                 \
+    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;                    \
+                                                                       \
+    ASSERT ((d) != 0);                                                 \
+    ASSERT ((n1) < (d));                                               \
+                                                                       \
+    __d1 = __ll_highpart (d);                                          \
+    __d0 = __ll_lowpart (d);                                           \
+                                                                       \
+    __q1 = (n1) / __d1;                                                        \
+    __r1 = (n1) - __q1 * __d1;                                         \
+    __m = __q1 * __d0;                                                 \
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);                         \
+    if (__r1 < __m)                                                    \
+      {                                                                        \
+       __q1--, __r1 += (d);                                            \
+       if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+         if (__r1 < __m)                                               \
+           __q1--, __r1 += (d);                                        \
+      }                                                                        \
+    __r1 -= __m;                                                       \
+                                                                       \
+    __q0 = __r1 / __d1;                                                        \
+    __r0 = __r1  - __q0 * __d1;                                                \
+    __m = __q0 * __d0;                                                 \
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);                          \
+    if (__r0 < __m)                                                    \
+      {                                                                        \
+       __q0--, __r0 += (d);                                            \
+       if (__r0 >= (d))                                                \
+         if (__r0 < __m)                                               \
+           __q0--, __r0 += (d);                                        \
+      }                                                                        \
+    __r0 -= __m;                                                       \
+                                                                       \
+    (q) = __q1 * __ll_B | __q0;                                                \
+    (r) = __r0;                                                                \
+  } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {                                                                 \
+    UWtype __r;                                                                \
+    (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d);                                \
+    (r) = __r;                                                         \
+  } while (0)
+__GMP_DECLSPEC UWtype __MPN(udiv_w_sdiv) (UWtype *, UWtype, UWtype, UWtype);
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+#define count_leading_zeros(count, x) \
+  do {                                                                 \
+    UWtype __xr = (x);                                                 \
+    UWtype __a;                                                                \
+                                                                       \
+    if (W_TYPE_SIZE == 32)                                             \
+      {                                                                        \
+       __a = __xr < ((UWtype) 1 << 2*__BITS4)                          \
+         ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1)          \
+         : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1           \
+         : 3*__BITS4 + 1);                                             \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
+         if (((__xr >> __a) & 0xff) != 0)                              \
+           break;                                                      \
+       ++__a;                                                          \
+      }                                                                        \
+                                                                       \
+    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];          \
+  } while (0)
+/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+/* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */
+#if HAVE_HOST_CPU_FAMILY_x86 && WANT_FAT_BINARY
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+extern const unsigned char __GMP_DECLSPEC __clz_tab[128];
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+   defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+  do {                                                                 \
+    UWtype __ctz_x = (x);                                              \
+    UWtype __ctz_c;                                                    \
+    ASSERT (__ctz_x != 0);                                             \
+    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                 \
+    (count) = W_TYPE_SIZE - 1 - __ctz_c;                               \
+  } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
+
+/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and
+   that hence the latter should always be used.  */
+#ifndef UDIV_PREINV_ALWAYS
+#define UDIV_PREINV_ALWAYS 0
+#endif
+
+/* Give defaults for UMUL_TIME and UDIV_TIME.  */
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
diff --git a/ltmain.sh b/ltmain.sh

new file mode 100644 (file)

index 0000000..63ae69d
--- /dev/null
+++ b/ltmain.sh
@@ -0,0 +1,9655 @@
+
+# libtool (GNU libtool) 2.4.2
+# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006,
+# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+# This is free software; see the source for copying conditions.  There is NO
+# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+# GNU Libtool is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from http://www.gnu.org/licenses/gpl.html,
+# or obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# Usage: $progname [OPTION]... [MODE-ARG]...
+#
+# Provide generalized library-building support services.
+#
+#       --config             show all configuration variables
+#       --debug              enable verbose shell tracing
+#   -n, --dry-run            display commands without modifying any files
+#       --features           display basic configuration information and exit
+#       --mode=MODE          use operation mode MODE
+#       --preserve-dup-deps  don't remove duplicate dependency libraries
+#       --quiet, --silent    don't print informational messages
+#       --no-quiet, --no-silent
+#                            print informational messages (default)
+#       --no-warn            don't display warning messages
+#       --tag=TAG            use configuration variables from tag TAG
+#   -v, --verbose            print more informational messages than default
+#       --no-verbose         don't print the extra informational messages
+#       --version            print version information
+#   -h, --help, --help-all   print short, long, or detailed help message
+#
+# MODE must be one of the following:
+#
+#         clean              remove files from the build directory
+#         compile            compile a source file into a libtool object
+#         execute            automatically set library path, then run a program
+#         finish             complete the installation of libtool libraries
+#         install            install libraries or executables
+#         link               create a library or an executable
+#         uninstall          remove libraries from an installed directory
+#
+# MODE-ARGS vary depending on the MODE.  When passed as first option,
+# `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that.
+# Try `$progname --help --mode=MODE' for a more detailed description of MODE.
+#
+# When reporting a bug, please describe a test case to reproduce it and
+# include the following information:
+#
+#         host-triplet:        $host
+#         shell:               $SHELL
+#         compiler:            $LTCC
+#         compiler flags:              $LTCFLAGS
+#         linker:              $LD (gnu? $with_gnu_ld)
+#         $progname:   (GNU libtool) 2.4.2
+#         automake:    $automake_version
+#         autoconf:    $autoconf_version
+#
+# Report bugs to <bug-libtool@gnu.org>.
+# GNU libtool home page: <http://www.gnu.org/software/libtool/>.
+# General help using GNU software: <http://www.gnu.org/gethelp/>.
+
+PROGRAM=libtool
+PACKAGE=libtool
+VERSION=2.4.2
+TIMESTAMP=""
+package_revision=1.3337
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+}
+
+# NLS nuisances: We save the old values to restore during execute mode.
+lt_user_locale=
+lt_safe_locale=
+for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
+do
+  eval "if test \"\${$lt_var+set}\" = set; then
+          save_$lt_var=\$$lt_var
+          $lt_var=C
+         export $lt_var
+         lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\"
+         lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\"
+       fi"
+done
+LC_ALL=C
+LANGUAGE=C
+export LANGUAGE LC_ALL
+
+$lt_unset CDPATH
+
+
+# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh
+# is ksh but when the shell is invoked as "sh" and the current value of
+# the _XPG environment variable is not equal to 1 (one), the special
+# positional parameter $0, within a function call, is the name of the
+# function.
+progpath="$0"
+
+
+
+: ${CP="cp -f"}
+test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'}
+: ${MAKE="make"}
+: ${MKDIR="mkdir"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+: ${SHELL="${CONFIG_SHELL-/bin/sh}"}
+: ${Xsed="$SED -e 1s/^X//"}
+
+# Global variables:
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+EXIT_MISMATCH=63  # $? = 63 is used to indicate version mismatch to missing.
+EXIT_SKIP=77     # $? = 77 is used to indicate a skipped test to automake.
+
+exit_status=$EXIT_SUCCESS
+
+# Make sure IFS has a sensible default
+lt_nl='
+'
+IFS="  $lt_nl"
+
+dirname="s,/[^/]*$,,"
+basename="s,^.*/,,"
+
+# func_dirname file append nondir_replacement
+# Compute the dirname of FILE.  If nonempty, add APPEND to the result,
+# otherwise set result to NONDIR_REPLACEMENT.
+func_dirname ()
+{
+    func_dirname_result=`$ECHO "${1}" | $SED "$dirname"`
+    if test "X$func_dirname_result" = "X${1}"; then
+      func_dirname_result="${3}"
+    else
+      func_dirname_result="$func_dirname_result${2}"
+    fi
+} # func_dirname may be replaced by extended shell implementation
+
+
+# func_basename file
+func_basename ()
+{
+    func_basename_result=`$ECHO "${1}" | $SED "$basename"`
+} # func_basename may be replaced by extended shell implementation
+
+
+# func_dirname_and_basename file append nondir_replacement
+# perform func_basename and func_dirname in a single function
+# call:
+#   dirname:  Compute the dirname of FILE.  If nonempty,
+#             add APPEND to the result, otherwise set result
+#             to NONDIR_REPLACEMENT.
+#             value returned in "$func_dirname_result"
+#   basename: Compute filename of FILE.
+#             value retuned in "$func_basename_result"
+# Implementation must be kept synchronized with func_dirname
+# and func_basename. For efficiency, we do not delegate to
+# those functions but instead duplicate the functionality here.
+func_dirname_and_basename ()
+{
+    # Extract subdirectory from the argument.
+    func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"`
+    if test "X$func_dirname_result" = "X${1}"; then
+      func_dirname_result="${3}"
+    else
+      func_dirname_result="$func_dirname_result${2}"
+    fi
+    func_basename_result=`$ECHO "${1}" | $SED -e "$basename"`
+} # func_dirname_and_basename may be replaced by extended shell implementation
+
+
+# func_stripname prefix suffix name
+# strip PREFIX and SUFFIX off of NAME.
+# PREFIX and SUFFIX must not contain globbing or regex special
+# characters, hashes, percent signs, but SUFFIX may contain a leading
+# dot (in which case that matches only a dot).
+# func_strip_suffix prefix name
+func_stripname ()
+{
+    case ${2} in
+      .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
+      *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
+    esac
+} # func_stripname may be replaced by extended shell implementation
+
+
+# These SED scripts presuppose an absolute path with a trailing slash.
+pathcar='s,^/\([^/]*\).*$,\1,'
+pathcdr='s,^/[^/]*,,'
+removedotparts=':dotsl
+               s@/\./@/@g
+               t dotsl
+               s,/\.$,/,'
+collapseslashes='s@/\{1,\}@/@g'
+finalslash='s,/*$,/,'
+
+# func_normal_abspath PATH
+# Remove doubled-up and trailing slashes, "." path components,
+# and cancel out any ".." path components in PATH after making
+# it an absolute path.
+#             value returned in "$func_normal_abspath_result"
+func_normal_abspath ()
+{
+  # Start from root dir and reassemble the path.
+  func_normal_abspath_result=
+  func_normal_abspath_tpath=$1
+  func_normal_abspath_altnamespace=
+  case $func_normal_abspath_tpath in
+    "")
+      # Empty path, that just means $cwd.
+      func_stripname '' '/' "`pwd`"
+      func_normal_abspath_result=$func_stripname_result
+      return
+    ;;
+    # The next three entries are used to spot a run of precisely
+    # two leading slashes without using negated character classes;
+    # we take advantage of case's first-match behaviour.
+    ///*)
+      # Unusual form of absolute path, do nothing.
+    ;;
+    //*)
+      # Not necessarily an ordinary path; POSIX reserves leading '//'
+      # and for example Cygwin uses it to access remote file shares
+      # over CIFS/SMB, so we conserve a leading double slash if found.
+      func_normal_abspath_altnamespace=/
+    ;;
+    /*)
+      # Absolute path, do nothing.
+    ;;
+    *)
+      # Relative path, prepend $cwd.
+      func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath
+    ;;
+  esac
+  # Cancel out all the simple stuff to save iterations.  We also want
+  # the path to end with a slash for ease of parsing, so make sure
+  # there is one (and only one) here.
+  func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \
+        -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"`
+  while :; do
+    # Processed it all yet?
+    if test "$func_normal_abspath_tpath" = / ; then
+      # If we ascended to the root using ".." the result may be empty now.
+      if test -z "$func_normal_abspath_result" ; then
+        func_normal_abspath_result=/
+      fi
+      break
+    fi
+    func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \
+        -e "$pathcar"`
+    func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \
+        -e "$pathcdr"`
+    # Figure out what to do with it
+    case $func_normal_abspath_tcomponent in
+      "")
+        # Trailing empty path component, ignore it.
+      ;;
+      ..)
+        # Parent dir; strip last assembled component from result.
+        func_dirname "$func_normal_abspath_result"
+        func_normal_abspath_result=$func_dirname_result
+      ;;
+      *)
+        # Actual path component, append it.
+        func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent
+      ;;
+    esac
+  done
+  # Restore leading double-slash if one was found on entry.
+  func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result
+}
+
+# func_relative_path SRCDIR DSTDIR
+# generates a relative path from SRCDIR to DSTDIR, with a trailing
+# slash if non-empty, suitable for immediately appending a filename
+# without needing to append a separator.
+#             value returned in "$func_relative_path_result"
+func_relative_path ()
+{
+  func_relative_path_result=
+  func_normal_abspath "$1"
+  func_relative_path_tlibdir=$func_normal_abspath_result
+  func_normal_abspath "$2"
+  func_relative_path_tbindir=$func_normal_abspath_result
+
+  # Ascend the tree starting from libdir
+  while :; do
+    # check if we have found a prefix of bindir
+    case $func_relative_path_tbindir in
+      $func_relative_path_tlibdir)
+        # found an exact match
+        func_relative_path_tcancelled=
+        break
+        ;;
+      $func_relative_path_tlibdir*)
+        # found a matching prefix
+        func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir"
+        func_relative_path_tcancelled=$func_stripname_result
+        if test -z "$func_relative_path_result"; then
+          func_relative_path_result=.
+        fi
+        break
+        ;;
+      *)
+        func_dirname $func_relative_path_tlibdir
+        func_relative_path_tlibdir=${func_dirname_result}
+        if test "x$func_relative_path_tlibdir" = x ; then
+          # Have to descend all the way to the root!
+          func_relative_path_result=../$func_relative_path_result
+          func_relative_path_tcancelled=$func_relative_path_tbindir
+          break
+        fi
+        func_relative_path_result=../$func_relative_path_result
+        ;;
+    esac
+  done
+
+  # Now calculate path; take care to avoid doubling-up slashes.
+  func_stripname '' '/' "$func_relative_path_result"
+  func_relative_path_result=$func_stripname_result
+  func_stripname '/' '/' "$func_relative_path_tcancelled"
+  if test "x$func_stripname_result" != x ; then
+    func_relative_path_result=${func_relative_path_result}/${func_stripname_result}
+  fi
+
+  # Normalisation. If bindir is libdir, return empty string,
+  # else relative path ending with a slash; either way, target
+  # file name can be directly appended.
+  if test ! -z "$func_relative_path_result"; then
+    func_stripname './' '' "$func_relative_path_result/"
+    func_relative_path_result=$func_stripname_result
+  fi
+}
+
+# The name of this program:
+func_dirname_and_basename "$progpath"
+progname=$func_basename_result
+
+# Make sure we have an absolute path for reexecution:
+case $progpath in
+  [\\/]*|[A-Za-z]:\\*) ;;
+  *[\\/]*)
+     progdir=$func_dirname_result
+     progdir=`cd "$progdir" && pwd`
+     progpath="$progdir/$progname"
+     ;;
+  *)
+     save_IFS="$IFS"
+     IFS=${PATH_SEPARATOR-:}
+     for progdir in $PATH; do
+       IFS="$save_IFS"
+       test -x "$progdir/$progname" && break
+     done
+     IFS="$save_IFS"
+     test -n "$progdir" || progdir=`pwd`
+     progpath="$progdir/$progname"
+     ;;
+esac
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed="${SED}"' -e 1s/^X//'
+sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\(["`\\]\)/\\\1/g'
+
+# Sed substitution that turns a string into a regex matching for the
+# string literally.
+sed_make_literal_regex='s,[].[^$\\*\/],\\&,g'
+
+# Sed substitution that converts a w32 file name or path
+# which contains forward slashes, into one that contains
+# (escaped) backslashes.  A very naive implementation.
+lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g'
+
+# Re-`\' parameter expansions in output of double_quote_subst that were
+# `\'-ed in input to the same.  If an odd number of `\' preceded a '$'
+# in input to double_quote_subst, that '$' was protected from expansion.
+# Since each input `\' is now two `\'s, look for any number of runs of
+# four `\'s followed by two `\'s and then a '$'.  `\' that '$'.
+bs='\\'
+bs2='\\\\'
+bs4='\\\\\\\\'
+dollar='\$'
+sed_double_backslash="\
+  s/$bs4/&\\
+/g
+  s/^$bs2$dollar/$bs&/
+  s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g
+  s/\n//g"
+
+# Standard options:
+opt_dry_run=false
+opt_help=false
+opt_quiet=false
+opt_verbose=false
+opt_warning=:
+
+# func_echo arg...
+# Echo program name prefixed message, along with the current mode
+# name if it has been set yet.
+func_echo ()
+{
+    $ECHO "$progname: ${opt_mode+$opt_mode: }$*"
+}
+
+# func_verbose arg...
+# Echo program name prefixed message in verbose mode only.
+func_verbose ()
+{
+    $opt_verbose && func_echo ${1+"$@"}
+
+    # A bug in bash halts the script if the last line of a function
+    # fails when set -e is in force, so we need another command to
+    # work around that:
+    :
+}
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO "$*"
+}
+
+# func_error arg...
+# Echo program name prefixed message to standard error.
+func_error ()
+{
+    $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2
+}
+
+# func_warning arg...
+# Echo program name prefixed warning message to standard error.
+func_warning ()
+{
+    $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2
+
+    # bash bug again:
+    :
+}
+
+# func_fatal_error arg...
+# Echo program name prefixed message to standard error, and exit.
+func_fatal_error ()
+{
+    func_error ${1+"$@"}
+    exit $EXIT_FAILURE
+}
+
+# func_fatal_help arg...
+# Echo program name prefixed message to standard error, followed by
+# a help hint, and exit.
+func_fatal_help ()
+{
+    func_error ${1+"$@"}
+    func_fatal_error "$help"
+}
+help="Try \`$progname --help' for more information."  ## default
+
+
+# func_grep expression filename
+# Check whether EXPRESSION matches any line of FILENAME, without output.
+func_grep ()
+{
+    $GREP "$1" "$2" >/dev/null 2>&1
+}
+
+
+# func_mkdir_p directory-path
+# Make sure the entire path to DIRECTORY-PATH is available.
+func_mkdir_p ()
+{
+    my_directory_path="$1"
+    my_dir_list=
+
+    if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then
+
+      # Protect directory names starting with `-'
+      case $my_directory_path in
+        -*) my_directory_path="./$my_directory_path" ;;
+      esac
+
+      # While some portion of DIR does not yet exist...
+      while test ! -d "$my_directory_path"; do
+        # ...make a list in topmost first order.  Use a colon delimited
+       # list incase some portion of path contains whitespace.
+        my_dir_list="$my_directory_path:$my_dir_list"
+
+        # If the last portion added has no slash in it, the list is done
+        case $my_directory_path in */*) ;; *) break ;; esac
+
+        # ...otherwise throw away the child directory and loop
+        my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"`
+      done
+      my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'`
+
+      save_mkdir_p_IFS="$IFS"; IFS=':'
+      for my_dir in $my_dir_list; do
+       IFS="$save_mkdir_p_IFS"
+        # mkdir can fail with a `File exist' error if two processes
+        # try to create one of the directories concurrently.  Don't
+        # stop in that case!
+        $MKDIR "$my_dir" 2>/dev/null || :
+      done
+      IFS="$save_mkdir_p_IFS"
+
+      # Bail out if we (or some other process) failed to create a directory.
+      test -d "$my_directory_path" || \
+        func_fatal_error "Failed to create \`$1'"
+    fi
+}
+
+
+# func_mktempdir [string]
+# Make a temporary directory that won't clash with other running
+# libtool processes, and avoids race conditions if possible.  If
+# given, STRING is the basename for that directory.
+func_mktempdir ()
+{
+    my_template="${TMPDIR-/tmp}/${1-$progname}"
+
+    if test "$opt_dry_run" = ":"; then
+      # Return a directory name, but don't create it in dry-run mode
+      my_tmpdir="${my_template}-$$"
+    else
+
+      # If mktemp works, use that first and foremost
+      my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null`
+
+      if test ! -d "$my_tmpdir"; then
+        # Failing that, at least try and use $RANDOM to avoid a race
+        my_tmpdir="${my_template}-${RANDOM-0}$$"
+
+        save_mktempdir_umask=`umask`
+        umask 0077
+        $MKDIR "$my_tmpdir"
+        umask $save_mktempdir_umask
+      fi
+
+      # If we're not in dry-run mode, bomb out on failure
+      test -d "$my_tmpdir" || \
+        func_fatal_error "cannot create temporary directory \`$my_tmpdir'"
+    fi
+
+    $ECHO "$my_tmpdir"
+}
+
+
+# func_quote_for_eval arg
+# Aesthetically quote ARG to be evaled later.
+# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT
+# is double-quoted, suitable for a subsequent eval, whereas
+# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters
+# which are still active within double quotes backslashified.
+func_quote_for_eval ()
+{
+    case $1 in
+      *[\\\`\"\$]*)
+       func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;;
+      *)
+        func_quote_for_eval_unquoted_result="$1" ;;
+    esac
+
+    case $func_quote_for_eval_unquoted_result in
+      # Double-quote args containing shell metacharacters to delay
+      # word splitting, command substitution and and variable
+      # expansion for a subsequent eval.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, so we specify it separately.
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*|"")
+        func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\""
+        ;;
+      *)
+        func_quote_for_eval_result="$func_quote_for_eval_unquoted_result"
+    esac
+}
+
+
+# func_quote_for_expand arg
+# Aesthetically quote ARG to be evaled later; same as above,
+# but do not quote variable references.
+func_quote_for_expand ()
+{
+    case $1 in
+      *[\\\`\"]*)
+       my_arg=`$ECHO "$1" | $SED \
+           -e "$double_quote_subst" -e "$sed_double_backslash"` ;;
+      *)
+        my_arg="$1" ;;
+    esac
+
+    case $my_arg in
+      # Double-quote args containing shell metacharacters to delay
+      # word splitting and command substitution for a subsequent eval.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, so we specify it separately.
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \    ]*|*]*|"")
+        my_arg="\"$my_arg\""
+        ;;
+    esac
+
+    func_quote_for_expand_result="$my_arg"
+}
+
+
+# func_show_eval cmd [fail_exp]
+# Unless opt_silent is true, then output CMD.  Then, if opt_dryrun is
+# not true, evaluate CMD.  If the evaluation of CMD fails, and FAIL_EXP
+# is given, then evaluate it.
+func_show_eval ()
+{
+    my_cmd="$1"
+    my_fail_exp="${2-:}"
+
+    ${opt_silent-false} || {
+      func_quote_for_expand "$my_cmd"
+      eval "func_echo $func_quote_for_expand_result"
+    }
+
+    if ${opt_dry_run-false}; then :; else
+      eval "$my_cmd"
+      my_status=$?
+      if test "$my_status" -eq 0; then :; else
+       eval "(exit $my_status); $my_fail_exp"
+      fi
+    fi
+}
+
+
+# func_show_eval_locale cmd [fail_exp]
+# Unless opt_silent is true, then output CMD.  Then, if opt_dryrun is
+# not true, evaluate CMD.  If the evaluation of CMD fails, and FAIL_EXP
+# is given, then evaluate it.  Use the saved locale for evaluation.
+func_show_eval_locale ()
+{
+    my_cmd="$1"
+    my_fail_exp="${2-:}"
+
+    ${opt_silent-false} || {
+      func_quote_for_expand "$my_cmd"
+      eval "func_echo $func_quote_for_expand_result"
+    }
+
+    if ${opt_dry_run-false}; then :; else
+      eval "$lt_user_locale
+           $my_cmd"
+      my_status=$?
+      eval "$lt_safe_locale"
+      if test "$my_status" -eq 0; then :; else
+       eval "(exit $my_status); $my_fail_exp"
+      fi
+    fi
+}
+
+# func_tr_sh
+# Turn $1 into a string suitable for a shell variable name.
+# Result is stored in $func_tr_sh_result.  All characters
+# not in the set a-zA-Z0-9_ are replaced with '_'. Further,
+# if $1 begins with a digit, a '_' is prepended as well.
+func_tr_sh ()
+{
+  case $1 in
+  [0-9]* | *[!a-zA-Z0-9_]*)
+    func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'`
+    ;;
+  * )
+    func_tr_sh_result=$1
+    ;;
+  esac
+}
+
+
+# func_version
+# Echo version message to standard output and exit.
+func_version ()
+{
+    $opt_debug
+
+    $SED -n '/(C)/!b go
+       :more
+       /\./!{
+         N
+         s/\n# / /
+         b more
+       }
+       :go
+       /^# '$PROGRAM' (GNU /,/# warranty; / {
+        s/^# //
+       s/^# *$//
+        s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/
+        p
+     }' < "$progpath"
+     exit $?
+}
+
+# func_usage
+# Echo short help message to standard output and exit.
+func_usage ()
+{
+    $opt_debug
+
+    $SED -n '/^# Usage:/,/^#  *.*--help/ {
+        s/^# //
+       s/^# *$//
+       s/\$progname/'$progname'/
+       p
+    }' < "$progpath"
+    echo
+    $ECHO "run \`$progname --help | more' for full usage"
+    exit $?
+}
+
+# func_help [NOEXIT]
+# Echo long help message to standard output and exit,
+# unless 'noexit' is passed as argument.
+func_help ()
+{
+    $opt_debug
+
+    $SED -n '/^# Usage:/,/# Report bugs to/ {
+       :print
+        s/^# //
+       s/^# *$//
+       s*\$progname*'$progname'*
+       s*\$host*'"$host"'*
+       s*\$SHELL*'"$SHELL"'*
+       s*\$LTCC*'"$LTCC"'*
+       s*\$LTCFLAGS*'"$LTCFLAGS"'*
+       s*\$LD*'"$LD"'*
+       s/\$with_gnu_ld/'"$with_gnu_ld"'/
+       s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/
+       s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/
+       p
+       d
+     }
+     /^# .* home page:/b print
+     /^# General help using/b print
+     ' < "$progpath"
+    ret=$?
+    if test -z "$1"; then
+      exit $ret
+    fi
+}
+
+# func_missing_arg argname
+# Echo program name prefixed message to standard error and set global
+# exit_cmd.
+func_missing_arg ()
+{
+    $opt_debug
+
+    func_error "missing argument for $1."
+    exit_cmd=exit
+}
+
+
+# func_split_short_opt shortopt
+# Set func_split_short_opt_name and func_split_short_opt_arg shell
+# variables after splitting SHORTOPT after the 2nd character.
+func_split_short_opt ()
+{
+    my_sed_short_opt='1s/^\(..\).*$/\1/;q'
+    my_sed_short_rest='1s/^..\(.*\)$/\1/;q'
+
+    func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"`
+    func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"`
+} # func_split_short_opt may be replaced by extended shell implementation
+
+
+# func_split_long_opt longopt
+# Set func_split_long_opt_name and func_split_long_opt_arg shell
+# variables after splitting LONGOPT at the `=' sign.
+func_split_long_opt ()
+{
+    my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q'
+    my_sed_long_arg='1s/^--[^=]*=//'
+
+    func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"`
+    func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"`
+} # func_split_long_opt may be replaced by extended shell implementation
+
+exit_cmd=:
+
+
+
+
+
+magic="%%%MAGIC variable%%%"
+magic_exe="%%%MAGIC EXE variable%%%"
+
+# Global variables.
+nonopt=
+preserve_args=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+extracted_archives=
+extracted_serial=0
+
+# If this variable is set in any of the actions, the command in it
+# will be execed at the end.  This prevents here-documents from being
+# left over by shells.
+exec_cmd=
+
+# func_append var value
+# Append VALUE to the end of shell variable VAR.
+func_append ()
+{
+    eval "${1}=\$${1}\${2}"
+} # func_append may be replaced by extended shell implementation
+
+# func_append_quoted var value
+# Quote VALUE and append to the end of shell variable VAR, separated
+# by a space.
+func_append_quoted ()
+{
+    func_quote_for_eval "${2}"
+    eval "${1}=\$${1}\\ \$func_quote_for_eval_result"
+} # func_append_quoted may be replaced by extended shell implementation
+
+
+# func_arith arithmetic-term...
+func_arith ()
+{
+    func_arith_result=`expr "${@}"`
+} # func_arith may be replaced by extended shell implementation
+
+
+# func_len string
+# STRING may not start with a hyphen.
+func_len ()
+{
+    func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len`
+} # func_len may be replaced by extended shell implementation
+
+
+# func_lo2o object
+func_lo2o ()
+{
+    func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"`
+} # func_lo2o may be replaced by extended shell implementation
+
+
+# func_xform libobj-or-source
+func_xform ()
+{
+    func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'`
+} # func_xform may be replaced by extended shell implementation
+
+
+# func_fatal_configuration arg...
+# Echo program name prefixed message to standard error, followed by
+# a configuration failure hint, and exit.
+func_fatal_configuration ()
+{
+    func_error ${1+"$@"}
+    func_error "See the $PACKAGE documentation for more information."
+    func_fatal_error "Fatal configuration error."
+}
+
+
+# func_config
+# Display the configuration for all the tags in this script.
+func_config ()
+{
+    re_begincf='^# ### BEGIN LIBTOOL'
+    re_endcf='^# ### END LIBTOOL'
+
+    # Default configuration.
+    $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath"
+
+    # Now print the configurations for the tags.
+    for tagname in $taglist; do
+      $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath"
+    done
+
+    exit $?
+}
+
+# func_features
+# Display the features supported by this script.
+func_features ()
+{
+    echo "host: $host"
+    if test "$build_libtool_libs" = yes; then
+      echo "enable shared libraries"
+    else
+      echo "disable shared libraries"
+    fi
+    if test "$build_old_libs" = yes; then
+      echo "enable static libraries"
+    else
+      echo "disable static libraries"
+    fi
+
+    exit $?
+}
+
+# func_enable_tag tagname
+# Verify that TAGNAME is valid, and either flag an error and exit, or
+# enable the TAGNAME tag.  We also add TAGNAME to the global $taglist
+# variable here.
+func_enable_tag ()
+{
+  # Global variable:
+  tagname="$1"
+
+  re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$"
+  re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$"
+  sed_extractcf="/$re_begincf/,/$re_endcf/p"
+
+  # Validate tagname.
+  case $tagname in
+    *[!-_A-Za-z0-9,/]*)
+      func_fatal_error "invalid tag name: $tagname"
+      ;;
+  esac
+
+  # Don't test for the "default" C tag, as we know it's
+  # there but not specially marked.
+  case $tagname in
+    CC) ;;
+    *)
+      if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then
+       taglist="$taglist $tagname"
+
+       # Evaluate the configuration.  Be careful to quote the path
+       # and the sed script, to avoid splitting on whitespace, but
+       # also don't use non-portable quotes within backquotes within
+       # quotes we have to do it in 2 steps:
+       extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"`
+       eval "$extractedcf"
+      else
+       func_error "ignoring unknown tag $tagname"
+      fi
+      ;;
+  esac
+}
+
+# func_check_version_match
+# Ensure that we are using m4 macros, and libtool script from the same
+# release of libtool.
+func_check_version_match ()
+{
+  if test "$package_revision" != "$macro_revision"; then
+    if test "$VERSION" != "$macro_version"; then
+      if test -z "$macro_version"; then
+        cat >&2 <<_LT_EOF
+$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
+$progname: definition of this LT_INIT comes from an older release.
+$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
+$progname: and run autoconf again.
+_LT_EOF
+      else
+        cat >&2 <<_LT_EOF
+$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
+$progname: definition of this LT_INIT comes from $PACKAGE $macro_version.
+$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
+$progname: and run autoconf again.
+_LT_EOF
+      fi
+    else
+      cat >&2 <<_LT_EOF
+$progname: Version mismatch error.  This is $PACKAGE $VERSION, revision $package_revision,
+$progname: but the definition of this LT_INIT comes from revision $macro_revision.
+$progname: You should recreate aclocal.m4 with macros from revision $package_revision
+$progname: of $PACKAGE $VERSION and run autoconf again.
+_LT_EOF
+    fi
+
+    exit $EXIT_MISMATCH
+  fi
+}
+
+
+# Shorthand for --mode=foo, only valid as the first argument
+case $1 in
+clean|clea|cle|cl)
+  shift; set dummy --mode clean ${1+"$@"}; shift
+  ;;
+compile|compil|compi|comp|com|co|c)
+  shift; set dummy --mode compile ${1+"$@"}; shift
+  ;;
+execute|execut|execu|exec|exe|ex|e)
+  shift; set dummy --mode execute ${1+"$@"}; shift
+  ;;
+finish|finis|fini|fin|fi|f)
+  shift; set dummy --mode finish ${1+"$@"}; shift
+  ;;
+install|instal|insta|inst|ins|in|i)
+  shift; set dummy --mode install ${1+"$@"}; shift
+  ;;
+link|lin|li|l)
+  shift; set dummy --mode link ${1+"$@"}; shift
+  ;;
+uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u)
+  shift; set dummy --mode uninstall ${1+"$@"}; shift
+  ;;
+esac
+
+
+
+# Option defaults:
+opt_debug=:
+opt_dry_run=false
+opt_config=false
+opt_preserve_dup_deps=false
+opt_features=false
+opt_finish=false
+opt_help=false
+opt_help_all=false
+opt_silent=:
+opt_warning=:
+opt_verbose=:
+opt_silent=false
+opt_verbose=false
+
+
+# Parse options once, thoroughly.  This comes as soon as possible in the
+# script to make things like `--version' happen as quickly as we can.
+{
+  # this just eases exit handling
+  while test $# -gt 0; do
+    opt="$1"
+    shift
+    case $opt in
+      --debug|-x)      opt_debug='set -x'
+                       func_echo "enabling shell trace mode"
+                       $opt_debug
+                       ;;
+      --dry-run|--dryrun|-n)
+                       opt_dry_run=:
+                       ;;
+      --config)
+                       opt_config=:
+func_config
+                       ;;
+      --dlopen|-dlopen)
+                       optarg="$1"
+                       opt_dlopen="${opt_dlopen+$opt_dlopen
+}$optarg"
+                       shift
+                       ;;
+      --preserve-dup-deps)
+                       opt_preserve_dup_deps=:
+                       ;;
+      --features)
+                       opt_features=:
+func_features
+                       ;;
+      --finish)
+                       opt_finish=:
+set dummy --mode finish ${1+"$@"}; shift
+                       ;;
+      --help)
+                       opt_help=:
+                       ;;
+      --help-all)
+                       opt_help_all=:
+opt_help=': help-all'
+                       ;;
+      --mode)
+                       test $# = 0 && func_missing_arg $opt && break
+                       optarg="$1"
+                       opt_mode="$optarg"
+case $optarg in
+  # Valid mode arguments:
+  clean|compile|execute|finish|install|link|relink|uninstall) ;;
+
+  # Catch anything else as an error
+  *) func_error "invalid argument for $opt"
+     exit_cmd=exit
+     break
+     ;;
+esac
+                       shift
+                       ;;
+      --no-silent|--no-quiet)
+                       opt_silent=false
+func_append preserve_args " $opt"
+                       ;;
+      --no-warning|--no-warn)
+                       opt_warning=false
+func_append preserve_args " $opt"
+                       ;;
+      --no-verbose)
+                       opt_verbose=false
+func_append preserve_args " $opt"
+                       ;;
+      --silent|--quiet)
+                       opt_silent=:
+func_append preserve_args " $opt"
+        opt_verbose=false
+                       ;;
+      --verbose|-v)
+                       opt_verbose=:
+func_append preserve_args " $opt"
+opt_silent=false
+                       ;;
+      --tag)
+                       test $# = 0 && func_missing_arg $opt && break
+                       optarg="$1"
+                       opt_tag="$optarg"
+func_append preserve_args " $opt $optarg"
+func_enable_tag "$optarg"
+                       shift
+                       ;;
+
+      -\?|-h)          func_usage                              ;;
+      --help)          func_help                               ;;
+      --version)       func_version                            ;;
+
+      # Separate optargs to long options:
+      --*=*)
+                       func_split_long_opt "$opt"
+                       set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"}
+                       shift
+                       ;;
+
+      # Separate non-argument short options:
+      -\?*|-h*|-n*|-v*)
+                       func_split_short_opt "$opt"
+                       set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"}
+                       shift
+                       ;;
+
+      --)              break                                   ;;
+      -*)              func_fatal_help "unrecognized option \`$opt'" ;;
+      *)               set dummy "$opt" ${1+"$@"};     shift; break  ;;
+    esac
+  done
+
+  # Validate options:
+
+  # save first non-option argument
+  if test "$#" -gt 0; then
+    nonopt="$opt"
+    shift
+  fi
+
+  # preserve --debug
+  test "$opt_debug" = : || func_append preserve_args " --debug"
+
+  case $host in
+    *cygwin* | *mingw* | *pw32* | *cegcc*)
+      # don't eliminate duplications in $postdeps and $predeps
+      opt_duplicate_compiler_generated_deps=:
+      ;;
+    *)
+      opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps
+      ;;
+  esac
+
+  $opt_help || {
+    # Sanity checks first:
+    func_check_version_match
+
+    if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+      func_fatal_configuration "not configured to build any kind of library"
+    fi
+
+    # Darwin sucks
+    eval std_shrext=\"$shrext_cmds\"
+
+    # Only execute mode is allowed to have -dlopen flags.
+    if test -n "$opt_dlopen" && test "$opt_mode" != execute; then
+      func_error "unrecognized option \`-dlopen'"
+      $ECHO "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    # Change the help message to a mode-specific one.
+    generic_help="$help"
+    help="Try \`$progname --help --mode=$opt_mode' for more information."
+  }
+
+
+  # Bail if the options were screwed
+  $exit_cmd $EXIT_FAILURE
+}
+
+
+
+
+## ----------- ##
+##    Main.    ##
+## ----------- ##
+
+# func_lalib_p file
+# True iff FILE is a libtool `.la' library or `.lo' object file.
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_lalib_p ()
+{
+    test -f "$1" &&
+      $SED -e 4q "$1" 2>/dev/null \
+        | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1
+}
+
+# func_lalib_unsafe_p file
+# True iff FILE is a libtool `.la' library or `.lo' object file.
+# This function implements the same check as func_lalib_p without
+# resorting to external programs.  To this end, it redirects stdin and
+# closes it afterwards, without saving the original file descriptor.
+# As a safety measure, use it only where a negative result would be
+# fatal anyway.  Works if `file' does not exist.
+func_lalib_unsafe_p ()
+{
+    lalib_p=no
+    if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then
+       for lalib_p_l in 1 2 3 4
+       do
+           read lalib_p_line
+           case "$lalib_p_line" in
+               \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;;
+           esac
+       done
+       exec 0<&5 5<&-
+    fi
+    test "$lalib_p" = yes
+}
+
+# func_ltwrapper_script_p file
+# True iff FILE is a libtool wrapper script
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_ltwrapper_script_p ()
+{
+    func_lalib_p "$1"
+}
+
+# func_ltwrapper_executable_p file
+# True iff FILE is a libtool wrapper executable
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_ltwrapper_executable_p ()
+{
+    func_ltwrapper_exec_suffix=
+    case $1 in
+    *.exe) ;;
+    *) func_ltwrapper_exec_suffix=.exe ;;
+    esac
+    $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1
+}
+
+# func_ltwrapper_scriptname file
+# Assumes file is an ltwrapper_executable
+# uses $file to determine the appropriate filename for a
+# temporary ltwrapper_script.
+func_ltwrapper_scriptname ()
+{
+    func_dirname_and_basename "$1" "" "."
+    func_stripname '' '.exe' "$func_basename_result"
+    func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper"
+}
+
+# func_ltwrapper_p file
+# True iff FILE is a libtool wrapper script or wrapper executable
+# This function is only a basic sanity check; it will hardly flush out
+# determined imposters.
+func_ltwrapper_p ()
+{
+    func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1"
+}
+
+
+# func_execute_cmds commands fail_cmd
+# Execute tilde-delimited COMMANDS.
+# If FAIL_CMD is given, eval that upon failure.
+# FAIL_CMD may read-access the current command in variable CMD!
+func_execute_cmds ()
+{
+    $opt_debug
+    save_ifs=$IFS; IFS='~'
+    for cmd in $1; do
+      IFS=$save_ifs
+      eval cmd=\"$cmd\"
+      func_show_eval "$cmd" "${2-:}"
+    done
+    IFS=$save_ifs
+}
+
+
+# func_source file
+# Source FILE, adding directory component if necessary.
+# Note that it is not necessary on cygwin/mingw to append a dot to
+# FILE even if both FILE and FILE.exe exist: automatic-append-.exe
+# behavior happens only for exec(3), not for open(2)!  Also, sourcing
+# `FILE.' does not work on cygwin managed mounts.
+func_source ()
+{
+    $opt_debug
+    case $1 in
+    */* | *\\*)        . "$1" ;;
+    *)         . "./$1" ;;
+    esac
+}
+
+
+# func_resolve_sysroot PATH
+# Replace a leading = in PATH with a sysroot.  Store the result into
+# func_resolve_sysroot_result
+func_resolve_sysroot ()
+{
+  func_resolve_sysroot_result=$1
+  case $func_resolve_sysroot_result in
+  =*)
+    func_stripname '=' '' "$func_resolve_sysroot_result"
+    func_resolve_sysroot_result=$lt_sysroot$func_stripname_result
+    ;;
+  esac
+}
+
+# func_replace_sysroot PATH
+# If PATH begins with the sysroot, replace it with = and
+# store the result into func_replace_sysroot_result.
+func_replace_sysroot ()
+{
+  case "$lt_sysroot:$1" in
+  ?*:"$lt_sysroot"*)
+    func_stripname "$lt_sysroot" '' "$1"
+    func_replace_sysroot_result="=$func_stripname_result"
+    ;;
+  *)
+    # Including no sysroot.
+    func_replace_sysroot_result=$1
+    ;;
+  esac
+}
+
+# func_infer_tag arg
+# Infer tagged configuration to use if any are available and
+# if one wasn't chosen via the "--tag" command line option.
+# Only attempt this if the compiler in the base compile
+# command doesn't match the default compiler.
+# arg is usually of the form 'gcc ...'
+func_infer_tag ()
+{
+    $opt_debug
+    if test -n "$available_tags" && test -z "$tagname"; then
+      CC_quoted=
+      for arg in $CC; do
+       func_append_quoted CC_quoted "$arg"
+      done
+      CC_expanded=`func_echo_all $CC`
+      CC_quoted_expanded=`func_echo_all $CC_quoted`
+      case $@ in
+      # Blanks in the command may have been stripped by the calling shell,
+      # but not from the CC environment variable when configure was run.
+      " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \
+      " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;;
+      # Blanks at the start of $base_compile will cause this to fail
+      # if we don't check for them as well.
+      *)
+       for z in $available_tags; do
+         if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then
+           # Evaluate the configuration.
+           eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`"
+           CC_quoted=
+           for arg in $CC; do
+             # Double-quote args containing other shell metacharacters.
+             func_append_quoted CC_quoted "$arg"
+           done
+           CC_expanded=`func_echo_all $CC`
+           CC_quoted_expanded=`func_echo_all $CC_quoted`
+           case "$@ " in
+           " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \
+           " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*)
+             # The compiler in the base compile command matches
+             # the one in the tagged configuration.
+             # Assume this is the tagged configuration we want.
+             tagname=$z
+             break
+             ;;
+           esac
+         fi
+       done
+       # If $tagname still isn't set, then no tagged configuration
+       # was found and let the user know that the "--tag" command
+       # line option must be used.
+       if test -z "$tagname"; then
+         func_echo "unable to infer tagged configuration"
+         func_fatal_error "specify a tag with \`--tag'"
+#      else
+#        func_verbose "using $tagname tagged configuration"
+       fi
+       ;;
+      esac
+    fi
+}
+
+
+
+# func_write_libtool_object output_name pic_name nonpic_name
+# Create a libtool object file (analogous to a ".la" file),
+# but don't create it if we're doing a dry run.
+func_write_libtool_object ()
+{
+    write_libobj=${1}
+    if test "$build_libtool_libs" = yes; then
+      write_lobj=\'${2}\'
+    else
+      write_lobj=none
+    fi
+
+    if test "$build_old_libs" = yes; then
+      write_oldobj=\'${3}\'
+    else
+      write_oldobj=none
+    fi
+
+    $opt_dry_run || {
+      cat >${write_libobj}T <<EOF
+# $write_libobj - a libtool object file
+# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# Name of the PIC object.
+pic_object=$write_lobj
+
+# Name of the non-PIC object
+non_pic_object=$write_oldobj
+
+EOF
+      $MV "${write_libobj}T" "${write_libobj}"
+    }
+}
+
+
+##################################################
+# FILE NAME AND PATH CONVERSION HELPER FUNCTIONS #
+##################################################
+
+# func_convert_core_file_wine_to_w32 ARG
+# Helper function used by file name conversion functions when $build is *nix,
+# and $host is mingw, cygwin, or some other w32 environment. Relies on a
+# correctly configured wine environment available, with the winepath program
+# in $build's $PATH.
+#
+# ARG is the $build file name to be converted to w32 format.
+# Result is available in $func_convert_core_file_wine_to_w32_result, and will
+# be empty on error (or when ARG is empty)
+func_convert_core_file_wine_to_w32 ()
+{
+  $opt_debug
+  func_convert_core_file_wine_to_w32_result="$1"
+  if test -n "$1"; then
+    # Unfortunately, winepath does not exit with a non-zero error code, so we
+    # are forced to check the contents of stdout. On the other hand, if the
+    # command is not found, the shell will set an exit code of 127 and print
+    # *an error message* to stdout. So we must check for both error code of
+    # zero AND non-empty stdout, which explains the odd construction:
+    func_convert_core_file_wine_to_w32_tmp=`winepath -w "$1" 2>/dev/null`
+    if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then
+      func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" |
+        $SED -e "$lt_sed_naive_backslashify"`
+    else
+      func_convert_core_file_wine_to_w32_result=
+    fi
+  fi
+}
+# end: func_convert_core_file_wine_to_w32
+
+
+# func_convert_core_path_wine_to_w32 ARG
+# Helper function used by path conversion functions when $build is *nix, and
+# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly
+# configured wine environment available, with the winepath program in $build's
+# $PATH. Assumes ARG has no leading or trailing path separator characters.
+#
+# ARG is path to be converted from $build format to win32.
+# Result is available in $func_convert_core_path_wine_to_w32_result.
+# Unconvertible file (directory) names in ARG are skipped; if no directory names
+# are convertible, then the result may be empty.
+func_convert_core_path_wine_to_w32 ()
+{
+  $opt_debug
+  # unfortunately, winepath doesn't convert paths, only file names
+  func_convert_core_path_wine_to_w32_result=""
+  if test -n "$1"; then
+    oldIFS=$IFS
+    IFS=:
+    for func_convert_core_path_wine_to_w32_f in $1; do
+      IFS=$oldIFS
+      func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f"
+      if test -n "$func_convert_core_file_wine_to_w32_result" ; then
+        if test -z "$func_convert_core_path_wine_to_w32_result"; then
+          func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result"
+        else
+          func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result"
+        fi
+      fi
+    done
+    IFS=$oldIFS
+  fi
+}
+# end: func_convert_core_path_wine_to_w32
+
+
+# func_cygpath ARGS...
+# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when
+# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2)
+# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or
+# (2), returns the Cygwin file name or path in func_cygpath_result (input
+# file name or path is assumed to be in w32 format, as previously converted
+# from $build's *nix or MSYS format). In case (3), returns the w32 file name
+# or path in func_cygpath_result (input file name or path is assumed to be in
+# Cygwin format). Returns an empty string on error.
+#
+# ARGS are passed to cygpath, with the last one being the file name or path to
+# be converted.
+#
+# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH
+# environment variable; do not put it in $PATH.
+func_cygpath ()
+{
+  $opt_debug
+  if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then
+    func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null`
+    if test "$?" -ne 0; then
+      # on failure, ensure result is empty
+      func_cygpath_result=
+    fi
+  else
+    func_cygpath_result=
+    func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'"
+  fi
+}
+#end: func_cygpath
+
+
+# func_convert_core_msys_to_w32 ARG
+# Convert file name or path ARG from MSYS format to w32 format.  Return
+# result in func_convert_core_msys_to_w32_result.
+func_convert_core_msys_to_w32 ()
+{
+  $opt_debug
+  # awkward: cmd appends spaces to result
+  func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null |
+    $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"`
+}
+#end: func_convert_core_msys_to_w32
+
+
+# func_convert_file_check ARG1 ARG2
+# Verify that ARG1 (a file name in $build format) was converted to $host
+# format in ARG2. Otherwise, emit an error message, but continue (resetting
+# func_to_host_file_result to ARG1).
+func_convert_file_check ()
+{
+  $opt_debug
+  if test -z "$2" && test -n "$1" ; then
+    func_error "Could not determine host file name corresponding to"
+    func_error "  \`$1'"
+    func_error "Continuing, but uninstalled executables may not work."
+    # Fallback:
+    func_to_host_file_result="$1"
+  fi
+}
+# end func_convert_file_check
+
+
+# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH
+# Verify that FROM_PATH (a path in $build format) was converted to $host
+# format in TO_PATH. Otherwise, emit an error message, but continue, resetting
+# func_to_host_file_result to a simplistic fallback value (see below).
+func_convert_path_check ()
+{
+  $opt_debug
+  if test -z "$4" && test -n "$3"; then
+    func_error "Could not determine the host path corresponding to"
+    func_error "  \`$3'"
+    func_error "Continuing, but uninstalled executables may not work."
+    # Fallback.  This is a deliberately simplistic "conversion" and
+    # should not be "improved".  See libtool.info.
+    if test "x$1" != "x$2"; then
+      lt_replace_pathsep_chars="s|$1|$2|g"
+      func_to_host_path_result=`echo "$3" |
+        $SED -e "$lt_replace_pathsep_chars"`
+    else
+      func_to_host_path_result="$3"
+    fi
+  fi
+}
+# end func_convert_path_check
+
+
+# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG
+# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT
+# and appending REPL if ORIG matches BACKPAT.
+func_convert_path_front_back_pathsep ()
+{
+  $opt_debug
+  case $4 in
+  $1 ) func_to_host_path_result="$3$func_to_host_path_result"
+    ;;
+  esac
+  case $4 in
+  $2 ) func_append func_to_host_path_result "$3"
+    ;;
+  esac
+}
+# end func_convert_path_front_back_pathsep
+
+
+##################################################
+# $build to $host FILE NAME CONVERSION FUNCTIONS #
+##################################################
+# invoked via `$to_host_file_cmd ARG'
+#
+# In each case, ARG is the path to be converted from $build to $host format.
+# Result will be available in $func_to_host_file_result.
+
+
+# func_to_host_file ARG
+# Converts the file name ARG from $build format to $host format. Return result
+# in func_to_host_file_result.
+func_to_host_file ()
+{
+  $opt_debug
+  $to_host_file_cmd "$1"
+}
+# end func_to_host_file
+
+
+# func_to_tool_file ARG LAZY
+# converts the file name ARG from $build format to toolchain format. Return
+# result in func_to_tool_file_result.  If the conversion in use is listed
+# in (the comma separated) LAZY, no conversion takes place.
+func_to_tool_file ()
+{
+  $opt_debug
+  case ,$2, in
+    *,"$to_tool_file_cmd",*)
+      func_to_tool_file_result=$1
+      ;;
+    *)
+      $to_tool_file_cmd "$1"
+      func_to_tool_file_result=$func_to_host_file_result
+      ;;
+  esac
+}
+# end func_to_tool_file
+
+
+# func_convert_file_noop ARG
+# Copy ARG to func_to_host_file_result.
+func_convert_file_noop ()
+{
+  func_to_host_file_result="$1"
+}
+# end func_convert_file_noop
+
+
+# func_convert_file_msys_to_w32 ARG
+# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic
+# conversion to w32 is not available inside the cwrapper.  Returns result in
+# func_to_host_file_result.
+func_convert_file_msys_to_w32 ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    func_convert_core_msys_to_w32 "$1"
+    func_to_host_file_result="$func_convert_core_msys_to_w32_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_msys_to_w32
+
+
+# func_convert_file_cygwin_to_w32 ARG
+# Convert file name ARG from Cygwin to w32 format.  Returns result in
+# func_to_host_file_result.
+func_convert_file_cygwin_to_w32 ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    # because $build is cygwin, we call "the" cygpath in $PATH; no need to use
+    # LT_CYGPATH in this case.
+    func_to_host_file_result=`cygpath -m "$1"`
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_cygwin_to_w32
+
+
+# func_convert_file_nix_to_w32 ARG
+# Convert file name ARG from *nix to w32 format.  Requires a wine environment
+# and a working winepath. Returns result in func_to_host_file_result.
+func_convert_file_nix_to_w32 ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    func_convert_core_file_wine_to_w32 "$1"
+    func_to_host_file_result="$func_convert_core_file_wine_to_w32_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_nix_to_w32
+
+
+# func_convert_file_msys_to_cygwin ARG
+# Convert file name ARG from MSYS to Cygwin format.  Requires LT_CYGPATH set.
+# Returns result in func_to_host_file_result.
+func_convert_file_msys_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    func_convert_core_msys_to_w32 "$1"
+    func_cygpath -u "$func_convert_core_msys_to_w32_result"
+    func_to_host_file_result="$func_cygpath_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_msys_to_cygwin
+
+
+# func_convert_file_nix_to_cygwin ARG
+# Convert file name ARG from *nix to Cygwin format.  Requires Cygwin installed
+# in a wine environment, working winepath, and LT_CYGPATH set.  Returns result
+# in func_to_host_file_result.
+func_convert_file_nix_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_file_result="$1"
+  if test -n "$1"; then
+    # convert from *nix to w32, then use cygpath to convert from w32 to cygwin.
+    func_convert_core_file_wine_to_w32 "$1"
+    func_cygpath -u "$func_convert_core_file_wine_to_w32_result"
+    func_to_host_file_result="$func_cygpath_result"
+  fi
+  func_convert_file_check "$1" "$func_to_host_file_result"
+}
+# end func_convert_file_nix_to_cygwin
+
+
+#############################################
+# $build to $host PATH CONVERSION FUNCTIONS #
+#############################################
+# invoked via `$to_host_path_cmd ARG'
+#
+# In each case, ARG is the path to be converted from $build to $host format.
+# The result will be available in $func_to_host_path_result.
+#
+# Path separators are also converted from $build format to $host format.  If
+# ARG begins or ends with a path separator character, it is preserved (but
+# converted to $host format) on output.
+#
+# All path conversion functions are named using the following convention:
+#   file name conversion function    : func_convert_file_X_to_Y ()
+#   path conversion function         : func_convert_path_X_to_Y ()
+# where, for any given $build/$host combination the 'X_to_Y' value is the
+# same.  If conversion functions are added for new $build/$host combinations,
+# the two new functions must follow this pattern, or func_init_to_host_path_cmd
+# will break.
+
+
+# func_init_to_host_path_cmd
+# Ensures that function "pointer" variable $to_host_path_cmd is set to the
+# appropriate value, based on the value of $to_host_file_cmd.
+to_host_path_cmd=
+func_init_to_host_path_cmd ()
+{
+  $opt_debug
+  if test -z "$to_host_path_cmd"; then
+    func_stripname 'func_convert_file_' '' "$to_host_file_cmd"
+    to_host_path_cmd="func_convert_path_${func_stripname_result}"
+  fi
+}
+
+
+# func_to_host_path ARG
+# Converts the path ARG from $build format to $host format. Return result
+# in func_to_host_path_result.
+func_to_host_path ()
+{
+  $opt_debug
+  func_init_to_host_path_cmd
+  $to_host_path_cmd "$1"
+}
+# end func_to_host_path
+
+
+# func_convert_path_noop ARG
+# Copy ARG to func_to_host_path_result.
+func_convert_path_noop ()
+{
+  func_to_host_path_result="$1"
+}
+# end func_convert_path_noop
+
+
+# func_convert_path_msys_to_w32 ARG
+# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic
+# conversion to w32 is not available inside the cwrapper.  Returns result in
+# func_to_host_path_result.
+func_convert_path_msys_to_w32 ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # Remove leading and trailing path separator characters from ARG.  MSYS
+    # behavior is inconsistent here; cygpath turns them into '.;' and ';.';
+    # and winepath ignores them completely.
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
+    func_to_host_path_result="$func_convert_core_msys_to_w32_result"
+    func_convert_path_check : ";" \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
+  fi
+}
+# end func_convert_path_msys_to_w32
+
+
+# func_convert_path_cygwin_to_w32 ARG
+# Convert path ARG from Cygwin to w32 format.  Returns result in
+# func_to_host_file_result.
+func_convert_path_cygwin_to_w32 ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # See func_convert_path_msys_to_w32:
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"`
+    func_convert_path_check : ";" \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
+  fi
+}
+# end func_convert_path_cygwin_to_w32
+
+
+# func_convert_path_nix_to_w32 ARG
+# Convert path ARG from *nix to w32 format.  Requires a wine environment and
+# a working winepath.  Returns result in func_to_host_file_result.
+func_convert_path_nix_to_w32 ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # See func_convert_path_msys_to_w32:
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
+    func_to_host_path_result="$func_convert_core_path_wine_to_w32_result"
+    func_convert_path_check : ";" \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
+  fi
+}
+# end func_convert_path_nix_to_w32
+
+
+# func_convert_path_msys_to_cygwin ARG
+# Convert path ARG from MSYS to Cygwin format.  Requires LT_CYGPATH set.
+# Returns result in func_to_host_file_result.
+func_convert_path_msys_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # See func_convert_path_msys_to_w32:
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
+    func_cygpath -u -p "$func_convert_core_msys_to_w32_result"
+    func_to_host_path_result="$func_cygpath_result"
+    func_convert_path_check : : \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" : "$1"
+  fi
+}
+# end func_convert_path_msys_to_cygwin
+
+
+# func_convert_path_nix_to_cygwin ARG
+# Convert path ARG from *nix to Cygwin format.  Requires Cygwin installed in a
+# a wine environment, working winepath, and LT_CYGPATH set.  Returns result in
+# func_to_host_file_result.
+func_convert_path_nix_to_cygwin ()
+{
+  $opt_debug
+  func_to_host_path_result="$1"
+  if test -n "$1"; then
+    # Remove leading and trailing path separator characters from
+    # ARG. msys behavior is inconsistent here, cygpath turns them
+    # into '.;' and ';.', and winepath ignores them completely.
+    func_stripname : : "$1"
+    func_to_host_path_tmp1=$func_stripname_result
+    func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
+    func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result"
+    func_to_host_path_result="$func_cygpath_result"
+    func_convert_path_check : : \
+      "$func_to_host_path_tmp1" "$func_to_host_path_result"
+    func_convert_path_front_back_pathsep ":*" "*:" : "$1"
+  fi
+}
+# end func_convert_path_nix_to_cygwin
+
+
+# func_mode_compile arg...
+func_mode_compile ()
+{
+    $opt_debug
+    # Get the compilation command and the source file.
+    base_compile=
+    srcfile="$nonopt"  #  always keep a non-empty value in "srcfile"
+    suppress_opt=yes
+    suppress_output=
+    arg_mode=normal
+    libobj=
+    later=
+    pie_flag=
+
+    for arg
+    do
+      case $arg_mode in
+      arg  )
+       # do not "continue".  Instead, add this to base_compile
+       lastarg="$arg"
+       arg_mode=normal
+       ;;
+
+      target )
+       libobj="$arg"
+       arg_mode=normal
+       continue
+       ;;
+
+      normal )
+       # Accept any command-line options.
+       case $arg in
+       -o)
+         test -n "$libobj" && \
+           func_fatal_error "you cannot specify \`-o' more than once"
+         arg_mode=target
+         continue
+         ;;
+
+       -pie | -fpie | -fPIE)
+          func_append pie_flag " $arg"
+         continue
+         ;;
+
+       -shared | -static | -prefer-pic | -prefer-non-pic)
+         func_append later " $arg"
+         continue
+         ;;
+
+       -no-suppress)
+         suppress_opt=no
+         continue
+         ;;
+
+       -Xcompiler)
+         arg_mode=arg  #  the next one goes into the "base_compile" arg list
+         continue      #  The current "srcfile" will either be retained or
+         ;;            #  replaced later.  I would guess that would be a bug.
+
+       -Wc,*)
+         func_stripname '-Wc,' '' "$arg"
+         args=$func_stripname_result
+         lastarg=
+         save_ifs="$IFS"; IFS=','
+         for arg in $args; do
+           IFS="$save_ifs"
+           func_append_quoted lastarg "$arg"
+         done
+         IFS="$save_ifs"
+         func_stripname ' ' '' "$lastarg"
+         lastarg=$func_stripname_result
+
+         # Add the arguments to base_compile.
+         func_append base_compile " $lastarg"
+         continue
+         ;;
+
+       *)
+         # Accept the current argument as the source file.
+         # The previous "srcfile" becomes the current argument.
+         #
+         lastarg="$srcfile"
+         srcfile="$arg"
+         ;;
+       esac  #  case $arg
+       ;;
+      esac    #  case $arg_mode
+
+      # Aesthetically quote the previous argument.
+      func_append_quoted base_compile "$lastarg"
+    done # for arg
+
+    case $arg_mode in
+    arg)
+      func_fatal_error "you must specify an argument for -Xcompile"
+      ;;
+    target)
+      func_fatal_error "you must specify a target with \`-o'"
+      ;;
+    *)
+      # Get the name of the library object.
+      test -z "$libobj" && {
+       func_basename "$srcfile"
+       libobj="$func_basename_result"
+      }
+      ;;
+    esac
+
+    # Recognize several different file suffixes.
+    # If the user specifies -o file.o, it is replaced with file.lo
+    case $libobj in
+    *.[cCFSifmso] | \
+    *.ada | *.adb | *.ads | *.asm | \
+    *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \
+    *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup)
+      func_xform "$libobj"
+      libobj=$func_xform_result
+      ;;
+    esac
+
+    case $libobj in
+    *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;;
+    *)
+      func_fatal_error "cannot determine name of library object from \`$libobj'"
+      ;;
+    esac
+
+    func_infer_tag $base_compile
+
+    for arg in $later; do
+      case $arg in
+      -shared)
+       test "$build_libtool_libs" != yes && \
+         func_fatal_configuration "can not build a shared library"
+       build_old_libs=no
+       continue
+       ;;
+
+      -static)
+       build_libtool_libs=no
+       build_old_libs=yes
+       continue
+       ;;
+
+      -prefer-pic)
+       pic_mode=yes
+       continue
+       ;;
+
+      -prefer-non-pic)
+       pic_mode=no
+       continue
+       ;;
+      esac
+    done
+
+    func_quote_for_eval "$libobj"
+    test "X$libobj" != "X$func_quote_for_eval_result" \
+      && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"'   &()|`$[]' \
+      && func_warning "libobj name \`$libobj' may not contain shell special characters."
+    func_dirname_and_basename "$obj" "/" ""
+    objname="$func_basename_result"
+    xdir="$func_dirname_result"
+    lobj=${xdir}$objdir/$objname
+
+    test -z "$base_compile" && \
+      func_fatal_help "you must specify a compilation command"
+
+    # Delete any leftover library objects.
+    if test "$build_old_libs" = yes; then
+      removelist="$obj $lobj $libobj ${libobj}T"
+    else
+      removelist="$lobj $libobj ${libobj}T"
+    fi
+
+    # On Cygwin there's no "real" PIC flag so we must build both object types
+    case $host_os in
+    cygwin* | mingw* | pw32* | os2* | cegcc*)
+      pic_mode=default
+      ;;
+    esac
+    if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
+      # non-PIC code in shared libraries is not supported
+      pic_mode=default
+    fi
+
+    # Calculate the filename of the output object if compiler does
+    # not support -o with -c
+    if test "$compiler_c_o" = no; then
+      output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext}
+      lockfile="$output_obj.lock"
+    else
+      output_obj=
+      need_locks=no
+      lockfile=
+    fi
+
+    # Lock this critical section if it is needed
+    # We use this script file to make the link, it avoids creating a new file
+    if test "$need_locks" = yes; then
+      until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
+       func_echo "Waiting for $lockfile to be removed"
+       sleep 2
+      done
+    elif test "$need_locks" = warn; then
+      if test -f "$lockfile"; then
+       $ECHO "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+       $opt_dry_run || $RM $removelist
+       exit $EXIT_FAILURE
+      fi
+      func_append removelist " $output_obj"
+      $ECHO "$srcfile" > "$lockfile"
+    fi
+
+    $opt_dry_run || $RM $removelist
+    func_append removelist " $lockfile"
+    trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15
+
+    func_to_tool_file "$srcfile" func_convert_file_msys_to_w32
+    srcfile=$func_to_tool_file_result
+    func_quote_for_eval "$srcfile"
+    qsrcfile=$func_quote_for_eval_result
+
+    # Only build a PIC object if we are building libtool libraries.
+    if test "$build_libtool_libs" = yes; then
+      # Without this assignment, base_compile gets emptied.
+      fbsd_hideous_sh_bug=$base_compile
+
+      if test "$pic_mode" != no; then
+       command="$base_compile $qsrcfile $pic_flag"
+      else
+       # Don't build PIC code
+       command="$base_compile $qsrcfile"
+      fi
+
+      func_mkdir_p "$xdir$objdir"
+
+      if test -z "$output_obj"; then
+       # Place PIC objects in $objdir
+       func_append command " -o $lobj"
+      fi
+
+      func_show_eval_locale "$command" \
+          'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE'
+
+      if test "$need_locks" = warn &&
+        test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+       $ECHO "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+       $opt_dry_run || $RM $removelist
+       exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed, then go on to compile the next one
+      if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then
+       func_show_eval '$MV "$output_obj" "$lobj"' \
+         'error=$?; $opt_dry_run || $RM $removelist; exit $error'
+      fi
+
+      # Allow error messages only from the first compilation.
+      if test "$suppress_opt" = yes; then
+       suppress_output=' >/dev/null 2>&1'
+      fi
+    fi
+
+    # Only build a position-dependent object if we build old libraries.
+    if test "$build_old_libs" = yes; then
+      if test "$pic_mode" != yes; then
+       # Don't build PIC code
+       command="$base_compile $qsrcfile$pie_flag"
+      else
+       command="$base_compile $qsrcfile $pic_flag"
+      fi
+      if test "$compiler_c_o" = yes; then
+       func_append command " -o $obj"
+      fi
+
+      # Suppress compiler output if we already did a PIC compilation.
+      func_append command "$suppress_output"
+      func_show_eval_locale "$command" \
+        '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE'
+
+      if test "$need_locks" = warn &&
+        test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+       $ECHO "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+       $opt_dry_run || $RM $removelist
+       exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed
+      if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then
+       func_show_eval '$MV "$output_obj" "$obj"' \
+         'error=$?; $opt_dry_run || $RM $removelist; exit $error'
+      fi
+    fi
+
+    $opt_dry_run || {
+      func_write_libtool_object "$libobj" "$objdir/$objname" "$objname"
+
+      # Unlock the critical section if it was locked
+      if test "$need_locks" != no; then
+       removelist=$lockfile
+        $RM "$lockfile"
+      fi
+    }
+
+    exit $EXIT_SUCCESS
+}
+
+$opt_help || {
+  test "$opt_mode" = compile && func_mode_compile ${1+"$@"}
+}
+
+func_mode_help ()
+{
+    # We need to display help for each of the modes.
+    case $opt_mode in
+      "")
+        # Generic help is extracted from the usage comments
+        # at the start of this file.
+        func_help
+        ;;
+
+      clean)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+        ;;
+
+      compile)
+      $ECHO \
+"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+  -o OUTPUT-FILE    set the output file name to OUTPUT-FILE
+  -no-suppress      do not suppress compiler output for multiple passes
+  -prefer-pic       try to build PIC objects only
+  -prefer-non-pic   try to build non-PIC objects only
+  -shared           do not build a \`.o' file suitable for static linking
+  -static           only build a \`.o' file suitable for static linking
+  -Wc,FLAG          pass FLAG directly to the compiler
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+        ;;
+
+      execute)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+  -dlopen FILE      add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+        ;;
+
+      finish)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges.  Use
+the \`--dry-run' option if you just want to see what would be executed."
+        ;;
+
+      install)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command.  The first component should be
+either the \`install' or \`cp' program.
+
+The following components of INSTALL-COMMAND are treated specially:
+
+  -inst-prefix-dir PREFIX-DIR  Use PREFIX-DIR as a staging area for installation
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+        ;;
+
+      link)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+  -all-static       do not do any dynamic linking at all
+  -avoid-version    do not add a version suffix if possible
+  -bindir BINDIR    specify path to binaries directory (for systems where
+                    libraries must be found in the PATH setting at runtime)
+  -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
+  -dlpreopen FILE   link in FILE and add its symbols to lt_preloaded_symbols
+  -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+  -export-symbols SYMFILE
+                    try to export only the symbols listed in SYMFILE
+  -export-symbols-regex REGEX
+                    try to export only the symbols matching REGEX
+  -LLIBDIR          search LIBDIR for required installed libraries
+  -lNAME            OUTPUT-FILE requires the installed library libNAME
+  -module           build a library that can dlopened
+  -no-fast-install  disable the fast-install mode
+  -no-install       link a not-installable executable
+  -no-undefined     declare that a library does not refer to external symbols
+  -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
+  -objectlist FILE  Use a list of object files found in FILE to specify objects
+  -precious-files-regex REGEX
+                    don't remove output files matching REGEX
+  -release RELEASE  specify package release information
+  -rpath LIBDIR     the created library will eventually be installed in LIBDIR
+  -R[ ]LIBDIR       add LIBDIR to the runtime path of programs and libraries
+  -shared           only do dynamic linking of libtool libraries
+  -shrext SUFFIX    override the standard shared library file extension
+  -static           do not do any dynamic linking of uninstalled libtool libraries
+  -static-libtool-libs
+                    do not do any dynamic linking of libtool libraries
+  -version-info CURRENT[:REVISION[:AGE]]
+                    specify library version info [each variable defaults to 0]
+  -weak LIBNAME     declare that the target provides the LIBNAME interface
+  -Wc,FLAG
+  -Xcompiler FLAG   pass linker-specific FLAG directly to the compiler
+  -Wl,FLAG
+  -Xlinker FLAG     pass linker-specific FLAG directly to the linker
+  -XCClinker FLAG   pass link-specific FLAG to the compiler driver (CC)
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename.  Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+        ;;
+
+      uninstall)
+        $ECHO \
+"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+        ;;
+
+      *)
+        func_fatal_help "invalid operation mode \`$opt_mode'"
+        ;;
+    esac
+
+    echo
+    $ECHO "Try \`$progname --help' for more information about other modes."
+}
+
+# Now that we've collected a possible --mode arg, show help if necessary
+if $opt_help; then
+  if test "$opt_help" = :; then
+    func_mode_help
+  else
+    {
+      func_help noexit
+      for opt_mode in compile link execute install finish uninstall clean; do
+       func_mode_help
+      done
+    } | sed -n '1p; 2,$s/^Usage:/  or: /p'
+    {
+      func_help noexit
+      for opt_mode in compile link execute install finish uninstall clean; do
+       echo
+       func_mode_help
+      done
+    } |
+    sed '1d
+      /^When reporting/,/^Report/{
+       H
+       d
+      }
+      $x
+      /information about other modes/d
+      /more detailed .*MODE/d
+      s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/'
+  fi
+  exit $?
+fi
+
+
+# func_mode_execute arg...
+func_mode_execute ()
+{
+    $opt_debug
+    # The first argument is the command name.
+    cmd="$nonopt"
+    test -z "$cmd" && \
+      func_fatal_help "you must specify a COMMAND"
+
+    # Handle -dlopen flags immediately.
+    for file in $opt_dlopen; do
+      test -f "$file" \
+       || func_fatal_help "\`$file' is not a file"
+
+      dir=
+      case $file in
+      *.la)
+       func_resolve_sysroot "$file"
+       file=$func_resolve_sysroot_result
+
+       # Check to see that this really is a libtool archive.
+       func_lalib_unsafe_p "$file" \
+         || func_fatal_help "\`$lib' is not a valid libtool archive"
+
+       # Read the libtool library.
+       dlname=
+       library_names=
+       func_source "$file"
+
+       # Skip this library if it cannot be dlopened.
+       if test -z "$dlname"; then
+         # Warn if it was a shared library.
+         test -n "$library_names" && \
+           func_warning "\`$file' was not linked with \`-export-dynamic'"
+         continue
+       fi
+
+       func_dirname "$file" "" "."
+       dir="$func_dirname_result"
+
+       if test -f "$dir/$objdir/$dlname"; then
+         func_append dir "/$objdir"
+       else
+         if test ! -f "$dir/$dlname"; then
+           func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'"
+         fi
+       fi
+       ;;
+
+      *.lo)
+       # Just add the directory containing the .lo file.
+       func_dirname "$file" "" "."
+       dir="$func_dirname_result"
+       ;;
+
+      *)
+       func_warning "\`-dlopen' is ignored for non-libtool libraries and objects"
+       continue
+       ;;
+      esac
+
+      # Get the absolute pathname.
+      absdir=`cd "$dir" && pwd`
+      test -n "$absdir" && dir="$absdir"
+
+      # Now add the directory to shlibpath_var.
+      if eval "test -z \"\$$shlibpath_var\""; then
+       eval "$shlibpath_var=\"\$dir\""
+      else
+       eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+      fi
+    done
+
+    # This variable tells wrapper scripts just to set shlibpath_var
+    # rather than running their programs.
+    libtool_execute_magic="$magic"
+
+    # Check if any of the arguments is a wrapper script.
+    args=
+    for file
+    do
+      case $file in
+      -* | *.la | *.lo ) ;;
+      *)
+       # Do a test to see if this is really a libtool program.
+       if func_ltwrapper_script_p "$file"; then
+         func_source "$file"
+         # Transform arg to wrapped name.
+         file="$progdir/$program"
+       elif func_ltwrapper_executable_p "$file"; then
+         func_ltwrapper_scriptname "$file"
+         func_source "$func_ltwrapper_scriptname_result"
+         # Transform arg to wrapped name.
+         file="$progdir/$program"
+       fi
+       ;;
+      esac
+      # Quote arguments (to preserve shell metacharacters).
+      func_append_quoted args "$file"
+    done
+
+    if test "X$opt_dry_run" = Xfalse; then
+      if test -n "$shlibpath_var"; then
+       # Export the shlibpath_var.
+       eval "export $shlibpath_var"
+      fi
+
+      # Restore saved environment variables
+      for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
+      do
+       eval "if test \"\${save_$lt_var+set}\" = set; then
+                $lt_var=\$save_$lt_var; export $lt_var
+             else
+               $lt_unset $lt_var
+             fi"
+      done
+
+      # Now prepare to actually exec the command.
+      exec_cmd="\$cmd$args"
+    else
+      # Display what would be done.
+      if test -n "$shlibpath_var"; then
+       eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\""
+       echo "export $shlibpath_var"
+      fi
+      $ECHO "$cmd$args"
+      exit $EXIT_SUCCESS
+    fi
+}
+
+test "$opt_mode" = execute && func_mode_execute ${1+"$@"}
+
+
+# func_mode_finish arg...
+func_mode_finish ()
+{
+    $opt_debug
+    libs=
+    libdirs=
+    admincmds=
+
+    for opt in "$nonopt" ${1+"$@"}
+    do
+      if test -d "$opt"; then
+       func_append libdirs " $opt"
+
+      elif test -f "$opt"; then
+       if func_lalib_unsafe_p "$opt"; then
+         func_append libs " $opt"
+       else
+         func_warning "\`$opt' is not a valid libtool archive"
+       fi
+
+      else
+       func_fatal_error "invalid argument \`$opt'"
+      fi
+    done
+
+    if test -n "$libs"; then
+      if test -n "$lt_sysroot"; then
+        sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"`
+        sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;"
+      else
+        sysroot_cmd=
+      fi
+
+      # Remove sysroot references
+      if $opt_dry_run; then
+        for lib in $libs; do
+          echo "removing references to $lt_sysroot and \`=' prefixes from $lib"
+        done
+      else
+        tmpdir=`func_mktempdir`
+        for lib in $libs; do
+         sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \
+           > $tmpdir/tmp-la
+         mv -f $tmpdir/tmp-la $lib
+       done
+        ${RM}r "$tmpdir"
+      fi
+    fi
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      for libdir in $libdirs; do
+       if test -n "$finish_cmds"; then
+         # Do each command in the finish commands.
+         func_execute_cmds "$finish_cmds" 'admincmds="$admincmds
+'"$cmd"'"'
+       fi
+       if test -n "$finish_eval"; then
+         # Do the single finish_eval.
+         eval cmds=\"$finish_eval\"
+         $opt_dry_run || eval "$cmds" || func_append admincmds "
+       $cmds"
+       fi
+      done
+    fi
+
+    # Exit here if they wanted silent mode.
+    $opt_silent && exit $EXIT_SUCCESS
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      echo "----------------------------------------------------------------------"
+      echo "Libraries have been installed in:"
+      for libdir in $libdirs; do
+       $ECHO "   $libdir"
+      done
+      echo
+      echo "If you ever happen to want to link against installed libraries"
+      echo "in a given directory, LIBDIR, you must either use libtool, and"
+      echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
+      echo "flag during linking and do at least one of the following:"
+      if test -n "$shlibpath_var"; then
+       echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
+       echo "     during execution"
+      fi
+      if test -n "$runpath_var"; then
+       echo "   - add LIBDIR to the \`$runpath_var' environment variable"
+       echo "     during linking"
+      fi
+      if test -n "$hardcode_libdir_flag_spec"; then
+       libdir=LIBDIR
+       eval flag=\"$hardcode_libdir_flag_spec\"
+
+       $ECHO "   - use the \`$flag' linker flag"
+      fi
+      if test -n "$admincmds"; then
+       $ECHO "   - have your system administrator run these commands:$admincmds"
+      fi
+      if test -f /etc/ld.so.conf; then
+       echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+      fi
+      echo
+
+      echo "See any operating system documentation about shared libraries for"
+      case $host in
+       solaris2.[6789]|solaris2.1[0-9])
+         echo "more information, such as the ld(1), crle(1) and ld.so(8) manual"
+         echo "pages."
+         ;;
+       *)
+         echo "more information, such as the ld(1) and ld.so(8) manual pages."
+         ;;
+      esac
+      echo "----------------------------------------------------------------------"
+    fi
+    exit $EXIT_SUCCESS
+}
+
+test "$opt_mode" = finish && func_mode_finish ${1+"$@"}
+
+
+# func_mode_install arg...
+func_mode_install ()
+{
+    $opt_debug
+    # There may be an optional sh(1) argument at the beginning of
+    # install_prog (especially on Windows NT).
+    if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
+       # Allow the use of GNU shtool's install command.
+       case $nonopt in *shtool*) :;; *) false;; esac; then
+      # Aesthetically quote it.
+      func_quote_for_eval "$nonopt"
+      install_prog="$func_quote_for_eval_result "
+      arg=$1
+      shift
+    else
+      install_prog=
+      arg=$nonopt
+    fi
+
+    # The real first argument should be the name of the installation program.
+    # Aesthetically quote it.
+    func_quote_for_eval "$arg"
+    func_append install_prog "$func_quote_for_eval_result"
+    install_shared_prog=$install_prog
+    case " $install_prog " in
+      *[\\\ /]cp\ *) install_cp=: ;;
+      *) install_cp=false ;;
+    esac
+
+    # We need to accept at least all the BSD install flags.
+    dest=
+    files=
+    opts=
+    prev=
+    install_type=
+    isdir=no
+    stripme=
+    no_mode=:
+    for arg
+    do
+      arg2=
+      if test -n "$dest"; then
+       func_append files " $dest"
+       dest=$arg
+       continue
+      fi
+
+      case $arg in
+      -d) isdir=yes ;;
+      -f)
+       if $install_cp; then :; else
+         prev=$arg
+       fi
+       ;;
+      -g | -m | -o)
+       prev=$arg
+       ;;
+      -s)
+       stripme=" -s"
+       continue
+       ;;
+      -*)
+       ;;
+      *)
+       # If the previous option needed an argument, then skip it.
+       if test -n "$prev"; then
+         if test "x$prev" = x-m && test -n "$install_override_mode"; then
+           arg2=$install_override_mode
+           no_mode=false
+         fi
+         prev=
+       else
+         dest=$arg
+         continue
+       fi
+       ;;
+      esac
+
+      # Aesthetically quote the argument.
+      func_quote_for_eval "$arg"
+      func_append install_prog " $func_quote_for_eval_result"
+      if test -n "$arg2"; then
+       func_quote_for_eval "$arg2"
+      fi
+      func_append install_shared_prog " $func_quote_for_eval_result"
+    done
+
+    test -z "$install_prog" && \
+      func_fatal_help "you must specify an install program"
+
+    test -n "$prev" && \
+      func_fatal_help "the \`$prev' option requires an argument"
+
+    if test -n "$install_override_mode" && $no_mode; then
+      if $install_cp; then :; else
+       func_quote_for_eval "$install_override_mode"
+       func_append install_shared_prog " -m $func_quote_for_eval_result"
+      fi
+    fi
+
+    if test -z "$files"; then
+      if test -z "$dest"; then
+       func_fatal_help "no file or destination specified"
+      else
+       func_fatal_help "you must specify a destination"
+      fi
+    fi
+
+    # Strip any trailing slash from the destination.
+    func_stripname '' '/' "$dest"
+    dest=$func_stripname_result
+
+    # Check to see that the destination is a directory.
+    test -d "$dest" && isdir=yes
+    if test "$isdir" = yes; then
+      destdir="$dest"
+      destname=
+    else
+      func_dirname_and_basename "$dest" "" "."
+      destdir="$func_dirname_result"
+      destname="$func_basename_result"
+
+      # Not a directory, so check to see that there is only one file specified.
+      set dummy $files; shift
+      test "$#" -gt 1 && \
+       func_fatal_help "\`$dest' is not a directory"
+    fi
+    case $destdir in
+    [\\/]* | [A-Za-z]:[\\/]*) ;;
+    *)
+      for file in $files; do
+       case $file in
+       *.lo) ;;
+       *)
+         func_fatal_help "\`$destdir' must be an absolute directory name"
+         ;;
+       esac
+      done
+      ;;
+    esac
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    staticlibs=
+    future_libdirs=
+    current_libdirs=
+    for file in $files; do
+
+      # Do each installation.
+      case $file in
+      *.$libext)
+       # Do the static libraries later.
+       func_append staticlibs " $file"
+       ;;
+
+      *.la)
+       func_resolve_sysroot "$file"
+       file=$func_resolve_sysroot_result
+
+       # Check to see that this really is a libtool archive.
+       func_lalib_unsafe_p "$file" \
+         || func_fatal_help "\`$file' is not a valid libtool archive"
+
+       library_names=
+       old_library=
+       relink_command=
+       func_source "$file"
+
+       # Add the libdir to current_libdirs if it is the destination.
+       if test "X$destdir" = "X$libdir"; then
+         case "$current_libdirs " in
+         *" $libdir "*) ;;
+         *) func_append current_libdirs " $libdir" ;;
+         esac
+       else
+         # Note the libdir as a future libdir.
+         case "$future_libdirs " in
+         *" $libdir "*) ;;
+         *) func_append future_libdirs " $libdir" ;;
+         esac
+       fi
+
+       func_dirname "$file" "/" ""
+       dir="$func_dirname_result"
+       func_append dir "$objdir"
+
+       if test -n "$relink_command"; then
+         # Determine the prefix the user has applied to our future dir.
+         inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"`
+
+         # Don't allow the user to place us outside of our expected
+         # location b/c this prevents finding dependent libraries that
+         # are installed to the same prefix.
+         # At present, this check doesn't affect windows .dll's that
+         # are installed into $libdir/../bin (currently, that works fine)
+         # but it's something to keep an eye on.
+         test "$inst_prefix_dir" = "$destdir" && \
+           func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir"
+
+         if test -n "$inst_prefix_dir"; then
+           # Stick the inst_prefix_dir data into the link command.
+           relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"`
+         else
+           relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"`
+         fi
+
+         func_warning "relinking \`$file'"
+         func_show_eval "$relink_command" \
+           'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"'
+       fi
+
+       # See the names of the shared library.
+       set dummy $library_names; shift
+       if test -n "$1"; then
+         realname="$1"
+         shift
+
+         srcname="$realname"
+         test -n "$relink_command" && srcname="$realname"T
+
+         # Install the shared library and build the symlinks.
+         func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \
+             'exit $?'
+         tstripme="$stripme"
+         case $host_os in
+         cygwin* | mingw* | pw32* | cegcc*)
+           case $realname in
+           *.dll.a)
+             tstripme=""
+             ;;
+           esac
+           ;;
+         esac
+         if test -n "$tstripme" && test -n "$striplib"; then
+           func_show_eval "$striplib $destdir/$realname" 'exit $?'
+         fi
+
+         if test "$#" -gt 0; then
+           # Delete the old symlinks, and create new ones.
+           # Try `ln -sf' first, because the `ln' binary might depend on
+           # the symlink we replace!  Solaris /bin/ln does not understand -f,
+           # so we also need to try rm && ln -s.
+           for linkname
+           do
+             test "$linkname" != "$realname" \
+               && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })"
+           done
+         fi
+
+         # Do each command in the postinstall commands.
+         lib="$destdir/$realname"
+         func_execute_cmds "$postinstall_cmds" 'exit $?'
+       fi
+
+       # Install the pseudo-library for information purposes.
+       func_basename "$file"
+       name="$func_basename_result"
+       instname="$dir/$name"i
+       func_show_eval "$install_prog $instname $destdir/$name" 'exit $?'
+
+       # Maybe install the static library, too.
+       test -n "$old_library" && func_append staticlibs " $dir/$old_library"
+       ;;
+
+      *.lo)
+       # Install (i.e. copy) a libtool object.
+
+       # Figure out destination file name, if it wasn't already specified.
+       if test -n "$destname"; then
+         destfile="$destdir/$destname"
+       else
+         func_basename "$file"
+         destfile="$func_basename_result"
+         destfile="$destdir/$destfile"
+       fi
+
+       # Deduce the name of the destination old-style object file.
+       case $destfile in
+       *.lo)
+         func_lo2o "$destfile"
+         staticdest=$func_lo2o_result
+         ;;
+       *.$objext)
+         staticdest="$destfile"
+         destfile=
+         ;;
+       *)
+         func_fatal_help "cannot copy a libtool object to \`$destfile'"
+         ;;
+       esac
+
+       # Install the libtool object if requested.
+       test -n "$destfile" && \
+         func_show_eval "$install_prog $file $destfile" 'exit $?'
+
+       # Install the old object if enabled.
+       if test "$build_old_libs" = yes; then
+         # Deduce the name of the old-style object file.
+         func_lo2o "$file"
+         staticobj=$func_lo2o_result
+         func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?'
+       fi
+       exit $EXIT_SUCCESS
+       ;;
+
+      *)
+       # Figure out destination file name, if it wasn't already specified.
+       if test -n "$destname"; then
+         destfile="$destdir/$destname"
+       else
+         func_basename "$file"
+         destfile="$func_basename_result"
+         destfile="$destdir/$destfile"
+       fi
+
+       # If the file is missing, and there is a .exe on the end, strip it
+       # because it is most likely a libtool script we actually want to
+       # install
+       stripped_ext=""
+       case $file in
+         *.exe)
+           if test ! -f "$file"; then
+             func_stripname '' '.exe' "$file"
+             file=$func_stripname_result
+             stripped_ext=".exe"
+           fi
+           ;;
+       esac
+
+       # Do a test to see if this is really a libtool program.
+       case $host in
+       *cygwin* | *mingw*)
+           if func_ltwrapper_executable_p "$file"; then
+             func_ltwrapper_scriptname "$file"
+             wrapper=$func_ltwrapper_scriptname_result
+           else
+             func_stripname '' '.exe' "$file"
+             wrapper=$func_stripname_result
+           fi
+           ;;
+       *)
+           wrapper=$file
+           ;;
+       esac
+       if func_ltwrapper_script_p "$wrapper"; then
+         notinst_deplibs=
+         relink_command=
+
+         func_source "$wrapper"
+
+         # Check the variables that should have been set.
+         test -z "$generated_by_libtool_version" && \
+           func_fatal_error "invalid libtool wrapper script \`$wrapper'"
+
+         finalize=yes
+         for lib in $notinst_deplibs; do
+           # Check to see that each library is installed.
+           libdir=
+           if test -f "$lib"; then
+             func_source "$lib"
+           fi
+           libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test
+           if test -n "$libdir" && test ! -f "$libfile"; then
+             func_warning "\`$lib' has not been installed in \`$libdir'"
+             finalize=no
+           fi
+         done
+
+         relink_command=
+         func_source "$wrapper"
+
+         outputname=
+         if test "$fast_install" = no && test -n "$relink_command"; then
+           $opt_dry_run || {
+             if test "$finalize" = yes; then
+               tmpdir=`func_mktempdir`
+               func_basename "$file$stripped_ext"
+               file="$func_basename_result"
+               outputname="$tmpdir/$file"
+               # Replace the output file specification.
+               relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'`
+
+               $opt_silent || {
+                 func_quote_for_expand "$relink_command"
+                 eval "func_echo $func_quote_for_expand_result"
+               }
+               if eval "$relink_command"; then :
+                 else
+                 func_error "error: relink \`$file' with the above command before installing it"
+                 $opt_dry_run || ${RM}r "$tmpdir"
+                 continue
+               fi
+               file="$outputname"
+             else
+               func_warning "cannot relink \`$file'"
+             fi
+           }
+         else
+           # Install the binary that we compiled earlier.
+           file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"`
+         fi
+       fi
+
+       # remove .exe since cygwin /usr/bin/install will append another
+       # one anyway
+       case $install_prog,$host in
+       */usr/bin/install*,*cygwin*)
+         case $file:$destfile in
+         *.exe:*.exe)
+           # this is ok
+           ;;
+         *.exe:*)
+           destfile=$destfile.exe
+           ;;
+         *:*.exe)
+           func_stripname '' '.exe' "$destfile"
+           destfile=$func_stripname_result
+           ;;
+         esac
+         ;;
+       esac
+       func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?'
+       $opt_dry_run || if test -n "$outputname"; then
+         ${RM}r "$tmpdir"
+       fi
+       ;;
+      esac
+    done
+
+    for file in $staticlibs; do
+      func_basename "$file"
+      name="$func_basename_result"
+
+      # Set up the ranlib parameters.
+      oldlib="$destdir/$name"
+      func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
+      tool_oldlib=$func_to_tool_file_result
+
+      func_show_eval "$install_prog \$file \$oldlib" 'exit $?'
+
+      if test -n "$stripme" && test -n "$old_striplib"; then
+       func_show_eval "$old_striplib $tool_oldlib" 'exit $?'
+      fi
+
+      # Do each command in the postinstall commands.
+      func_execute_cmds "$old_postinstall_cmds" 'exit $?'
+    done
+
+    test -n "$future_libdirs" && \
+      func_warning "remember to run \`$progname --finish$future_libdirs'"
+
+    if test -n "$current_libdirs"; then
+      # Maybe just do a dry run.
+      $opt_dry_run && current_libdirs=" -n$current_libdirs"
+      exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs'
+    else
+      exit $EXIT_SUCCESS
+    fi
+}
+
+test "$opt_mode" = install && func_mode_install ${1+"$@"}
+
+
+# func_generate_dlsyms outputname originator pic_p
+# Extract symbols from dlprefiles and create ${outputname}S.o with
+# a dlpreopen symbol table.
+func_generate_dlsyms ()
+{
+    $opt_debug
+    my_outputname="$1"
+    my_originator="$2"
+    my_pic_p="${3-no}"
+    my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'`
+    my_dlsyms=
+
+    if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+      if test -n "$NM" && test -n "$global_symbol_pipe"; then
+       my_dlsyms="${my_outputname}S.c"
+      else
+       func_error "not configured to extract global symbols from dlpreopened files"
+      fi
+    fi
+
+    if test -n "$my_dlsyms"; then
+      case $my_dlsyms in
+      "") ;;
+      *.c)
+       # Discover the nlist of each of the dlfiles.
+       nlist="$output_objdir/${my_outputname}.nm"
+
+       func_show_eval "$RM $nlist ${nlist}S ${nlist}T"
+
+       # Parse the name list into a source file.
+       func_verbose "creating $output_objdir/$my_dlsyms"
+
+       $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\
+/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */
+/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+#if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4))
+#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
+#endif
+
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  */
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT_DLSYM_CONST
+#else
+# define LT_DLSYM_CONST const
+#endif
+
+/* External symbol declarations for the compiler. */\
+"
+
+       if test "$dlself" = yes; then
+         func_verbose "generating symbol list for \`$output'"
+
+         $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist"
+
+         # Add our own program objects to the symbol list.
+         progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP`
+         for progfile in $progfiles; do
+           func_to_tool_file "$progfile" func_convert_file_msys_to_w32
+           func_verbose "extracting global C symbols from \`$func_to_tool_file_result'"
+           $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'"
+         done
+
+         if test -n "$exclude_expsyms"; then
+           $opt_dry_run || {
+             eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+             eval '$MV "$nlist"T "$nlist"'
+           }
+         fi
+
+         if test -n "$export_symbols_regex"; then
+           $opt_dry_run || {
+             eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+             eval '$MV "$nlist"T "$nlist"'
+           }
+         fi
+
+         # Prepare the list of exported symbols
+         if test -z "$export_symbols"; then
+           export_symbols="$output_objdir/$outputname.exp"
+           $opt_dry_run || {
+             $RM $export_symbols
+             eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+             case $host in
+             *cygwin* | *mingw* | *cegcc* )
+                eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+                eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"'
+               ;;
+             esac
+           }
+         else
+           $opt_dry_run || {
+             eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"'
+             eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T'
+             eval '$MV "$nlist"T "$nlist"'
+             case $host in
+               *cygwin* | *mingw* | *cegcc* )
+                 eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+                 eval 'cat "$nlist" >> "$output_objdir/$outputname.def"'
+                 ;;
+             esac
+           }
+         fi
+       fi
+
+       for dlprefile in $dlprefiles; do
+         func_verbose "extracting global C symbols from \`$dlprefile'"
+         func_basename "$dlprefile"
+         name="$func_basename_result"
+          case $host in
+           *cygwin* | *mingw* | *cegcc* )
+             # if an import library, we need to obtain dlname
+             if func_win32_import_lib_p "$dlprefile"; then
+               func_tr_sh "$dlprefile"
+               eval "curr_lafile=\$libfile_$func_tr_sh_result"
+               dlprefile_dlbasename=""
+               if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then
+                 # Use subshell, to avoid clobbering current variable values
+                 dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"`
+                 if test -n "$dlprefile_dlname" ; then
+                   func_basename "$dlprefile_dlname"
+                   dlprefile_dlbasename="$func_basename_result"
+                 else
+                   # no lafile. user explicitly requested -dlpreopen <import library>.
+                   $sharedlib_from_linklib_cmd "$dlprefile"
+                   dlprefile_dlbasename=$sharedlib_from_linklib_result
+                 fi
+               fi
+               $opt_dry_run || {
+                 if test -n "$dlprefile_dlbasename" ; then
+                   eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"'
+                 else
+                   func_warning "Could not compute DLL name from $name"
+                   eval '$ECHO ": $name " >> "$nlist"'
+                 fi
+                 func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
+                 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe |
+                   $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'"
+               }
+             else # not an import lib
+               $opt_dry_run || {
+                 eval '$ECHO ": $name " >> "$nlist"'
+                 func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
+                 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'"
+               }
+             fi
+           ;;
+           *)
+             $opt_dry_run || {
+               eval '$ECHO ": $name " >> "$nlist"'
+               func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
+               eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'"
+             }
+           ;;
+          esac
+       done
+
+       $opt_dry_run || {
+         # Make sure we have at least an empty file.
+         test -f "$nlist" || : > "$nlist"
+
+         if test -n "$exclude_expsyms"; then
+           $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+           $MV "$nlist"T "$nlist"
+         fi
+
+         # Try sorting and uniquifying the output.
+         if $GREP -v "^: " < "$nlist" |
+             if sort -k 3 </dev/null >/dev/null 2>&1; then
+               sort -k 3
+             else
+               sort +2
+             fi |
+             uniq > "$nlist"S; then
+           :
+         else
+           $GREP -v "^: " < "$nlist" > "$nlist"S
+         fi
+
+         if test -f "$nlist"S; then
+           eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"'
+         else
+           echo '/* NONE */' >> "$output_objdir/$my_dlsyms"
+         fi
+
+         echo >> "$output_objdir/$my_dlsyms" "\
+
+/* The mapping between symbol names and symbols.  */
+typedef struct {
+  const char *name;
+  void *address;
+} lt_dlsymlist;
+extern LT_DLSYM_CONST lt_dlsymlist
+lt_${my_prefix}_LTX_preloaded_symbols[];
+LT_DLSYM_CONST lt_dlsymlist
+lt_${my_prefix}_LTX_preloaded_symbols[] =
+{\
+  { \"$my_originator\", (void *) 0 },"
+
+         case $need_lib_prefix in
+         no)
+           eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms"
+           ;;
+         *)
+           eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms"
+           ;;
+         esac
+         echo >> "$output_objdir/$my_dlsyms" "\
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt_${my_prefix}_LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+       } # !$opt_dry_run
+
+       pic_flag_for_symtable=
+       case "$compile_command " in
+       *" -static "*) ;;
+       *)
+         case $host in
+         # compiling the symbol table file with pic_flag works around
+         # a FreeBSD bug that causes programs to crash when -lm is
+         # linked before any other PIC object.  But we must not use
+         # pic_flag when linking with -static.  The problem exists in
+         # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+         *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+           pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;;
+         *-*-hpux*)
+           pic_flag_for_symtable=" $pic_flag"  ;;
+         *)
+           if test "X$my_pic_p" != Xno; then
+             pic_flag_for_symtable=" $pic_flag"
+           fi
+           ;;
+         esac
+         ;;
+       esac
+       symtab_cflags=
+       for arg in $LTCFLAGS; do
+         case $arg in
+         -pie | -fpie | -fPIE) ;;
+         *) func_append symtab_cflags " $arg" ;;
+         esac
+       done
+
+       # Now compile the dynamic symbol file.
+       func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?'
+
+       # Clean up the generated files.
+       func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"'
+
+       # Transform the symbol file into the correct name.
+       symfileobj="$output_objdir/${my_outputname}S.$objext"
+       case $host in
+       *cygwin* | *mingw* | *cegcc* )
+         if test -f "$output_objdir/$my_outputname.def"; then
+           compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
+           finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
+         else
+           compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+           finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+         fi
+         ;;
+       *)
+         compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+         finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"`
+         ;;
+       esac
+       ;;
+      *)
+       func_fatal_error "unknown suffix for \`$my_dlsyms'"
+       ;;
+      esac
+    else
+      # We keep going just in case the user didn't refer to
+      # lt_preloaded_symbols.  The linker will fail if global_symbol_pipe
+      # really was required.
+
+      # Nullify the symbol file.
+      compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"`
+      finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"`
+    fi
+}
+
+# func_win32_libid arg
+# return the library type of file 'arg'
+#
+# Need a lot of goo to handle *both* DLLs and import libs
+# Has to be a shell function in order to 'eat' the argument
+# that is supplied when $file_magic_command is called.
+# Despite the name, also deal with 64 bit binaries.
+func_win32_libid ()
+{
+  $opt_debug
+  win32_libid_type="unknown"
+  win32_fileres=`file -L $1 2>/dev/null`
+  case $win32_fileres in
+  *ar\ archive\ import\ library*) # definitely import
+    win32_libid_type="x86 archive import"
+    ;;
+  *ar\ archive*) # could be an import, or static
+    # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD.
+    if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null |
+       $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then
+      func_to_tool_file "$1" func_convert_file_msys_to_w32
+      win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" |
+       $SED -n -e '
+           1,100{
+               / I /{
+                   s,.*,import,
+                   p
+                   q
+               }
+           }'`
+      case $win32_nmres in
+      import*)  win32_libid_type="x86 archive import";;
+      *)        win32_libid_type="x86 archive static";;
+      esac
+    fi
+    ;;
+  *DLL*)
+    win32_libid_type="x86 DLL"
+    ;;
+  *executable*) # but shell scripts are "executable" too...
+    case $win32_fileres in
+    *MS\ Windows\ PE\ Intel*)
+      win32_libid_type="x86 DLL"
+      ;;
+    esac
+    ;;
+  esac
+  $ECHO "$win32_libid_type"
+}
+
+# func_cygming_dll_for_implib ARG
+#
+# Platform-specific function to extract the
+# name of the DLL associated with the specified
+# import library ARG.
+# Invoked by eval'ing the libtool variable
+#    $sharedlib_from_linklib_cmd
+# Result is available in the variable
+#    $sharedlib_from_linklib_result
+func_cygming_dll_for_implib ()
+{
+  $opt_debug
+  sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"`
+}
+
+# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs
+#
+# The is the core of a fallback implementation of a
+# platform-specific function to extract the name of the
+# DLL associated with the specified import library LIBNAME.
+#
+# SECTION_NAME is either .idata$6 or .idata$7, depending
+# on the platform and compiler that created the implib.
+#
+# Echos the name of the DLL associated with the
+# specified import library.
+func_cygming_dll_for_implib_fallback_core ()
+{
+  $opt_debug
+  match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"`
+  $OBJDUMP -s --section "$1" "$2" 2>/dev/null |
+    $SED '/^Contents of section '"$match_literal"':/{
+      # Place marker at beginning of archive member dllname section
+      s/.*/====MARK====/
+      p
+      d
+    }
+    # These lines can sometimes be longer than 43 characters, but
+    # are always uninteresting
+    /:[         ]*file format pe[i]\{,1\}-/d
+    /^In archive [^:]*:/d
+    # Ensure marker is printed
+    /^====MARK====/p
+    # Remove all lines with less than 43 characters
+    /^.\{43\}/!d
+    # From remaining lines, remove first 43 characters
+    s/^.\{43\}//' |
+    $SED -n '
+      # Join marker and all lines until next marker into a single line
+      /^====MARK====/ b para
+      H
+      $ b para
+      b
+      :para
+      x
+      s/\n//g
+      # Remove the marker
+      s/^====MARK====//
+      # Remove trailing dots and whitespace
+      s/[\. \t]*$//
+      # Print
+      /./p' |
+    # we now have a list, one entry per line, of the stringified
+    # contents of the appropriate section of all members of the
+    # archive which possess that section. Heuristic: eliminate
+    # all those which have a first or second character that is
+    # a '.' (that is, objdump's representation of an unprintable
+    # character.) This should work for all archives with less than
+    # 0x302f exports -- but will fail for DLLs whose name actually
+    # begins with a literal '.' or a single character followed by
+    # a '.'.
+    #
+    # Of those that remain, print the first one.
+    $SED -e '/^\./d;/^.\./d;q'
+}
+
+# func_cygming_gnu_implib_p ARG
+# This predicate returns with zero status (TRUE) if
+# ARG is a GNU/binutils-style import library. Returns
+# with nonzero status (FALSE) otherwise.
+func_cygming_gnu_implib_p ()
+{
+  $opt_debug
+  func_to_tool_file "$1" func_convert_file_msys_to_w32
+  func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'`
+  test -n "$func_cygming_gnu_implib_tmp"
+}
+
+# func_cygming_ms_implib_p ARG
+# This predicate returns with zero status (TRUE) if
+# ARG is an MS-style import library. Returns
+# with nonzero status (FALSE) otherwise.
+func_cygming_ms_implib_p ()
+{
+  $opt_debug
+  func_to_tool_file "$1" func_convert_file_msys_to_w32
+  func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'`
+  test -n "$func_cygming_ms_implib_tmp"
+}
+
+# func_cygming_dll_for_implib_fallback ARG
+# Platform-specific function to extract the
+# name of the DLL associated with the specified
+# import library ARG.
+#
+# This fallback implementation is for use when $DLLTOOL
+# does not support the --identify-strict option.
+# Invoked by eval'ing the libtool variable
+#    $sharedlib_from_linklib_cmd
+# Result is available in the variable
+#    $sharedlib_from_linklib_result
+func_cygming_dll_for_implib_fallback ()
+{
+  $opt_debug
+  if func_cygming_gnu_implib_p "$1" ; then
+    # binutils import library
+    sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"`
+  elif func_cygming_ms_implib_p "$1" ; then
+    # ms-generated import library
+    sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"`
+  else
+    # unknown
+    sharedlib_from_linklib_result=""
+  fi
+}
+
+
+# func_extract_an_archive dir oldlib
+func_extract_an_archive ()
+{
+    $opt_debug
+    f_ex_an_ar_dir="$1"; shift
+    f_ex_an_ar_oldlib="$1"
+    if test "$lock_old_archive_extraction" = yes; then
+      lockfile=$f_ex_an_ar_oldlib.lock
+      until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
+       func_echo "Waiting for $lockfile to be removed"
+       sleep 2
+      done
+    fi
+    func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \
+                  'stat=$?; rm -f "$lockfile"; exit $stat'
+    if test "$lock_old_archive_extraction" = yes; then
+      $opt_dry_run || rm -f "$lockfile"
+    fi
+    if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
+     :
+    else
+      func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib"
+    fi
+}
+
+
+# func_extract_archives gentop oldlib ...
+func_extract_archives ()
+{
+    $opt_debug
+    my_gentop="$1"; shift
+    my_oldlibs=${1+"$@"}
+    my_oldobjs=""
+    my_xlib=""
+    my_xabs=""
+    my_xdir=""
+
+    for my_xlib in $my_oldlibs; do
+      # Extract the objects.
+      case $my_xlib in
+       [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;;
+       *) my_xabs=`pwd`"/$my_xlib" ;;
+      esac
+      func_basename "$my_xlib"
+      my_xlib="$func_basename_result"
+      my_xlib_u=$my_xlib
+      while :; do
+        case " $extracted_archives " in
+       *" $my_xlib_u "*)
+         func_arith $extracted_serial + 1
+         extracted_serial=$func_arith_result
+         my_xlib_u=lt$extracted_serial-$my_xlib ;;
+       *) break ;;
+       esac
+      done
+      extracted_archives="$extracted_archives $my_xlib_u"
+      my_xdir="$my_gentop/$my_xlib_u"
+
+      func_mkdir_p "$my_xdir"
+
+      case $host in
+      *-darwin*)
+       func_verbose "Extracting $my_xabs"
+       # Do not bother doing anything if just a dry run
+       $opt_dry_run || {
+         darwin_orig_dir=`pwd`
+         cd $my_xdir || exit $?
+         darwin_archive=$my_xabs
+         darwin_curdir=`pwd`
+         darwin_base_archive=`basename "$darwin_archive"`
+         darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true`
+         if test -n "$darwin_arches"; then
+           darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'`
+           darwin_arch=
+           func_verbose "$darwin_base_archive has multiple architectures $darwin_arches"
+           for darwin_arch in  $darwin_arches ; do
+             func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+             $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}"
+             cd "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+             func_extract_an_archive "`pwd`" "${darwin_base_archive}"
+             cd "$darwin_curdir"
+             $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}"
+           done # $darwin_arches
+            ## Okay now we've a bunch of thin objects, gotta fatten them up :)
+           darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u`
+           darwin_file=
+           darwin_files=
+           for darwin_file in $darwin_filelist; do
+             darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP`
+             $LIPO -create -output "$darwin_file" $darwin_files
+           done # $darwin_filelist
+           $RM -rf unfat-$$
+           cd "$darwin_orig_dir"
+         else
+           cd $darwin_orig_dir
+           func_extract_an_archive "$my_xdir" "$my_xabs"
+         fi # $darwin_arches
+       } # !$opt_dry_run
+       ;;
+      *)
+        func_extract_an_archive "$my_xdir" "$my_xabs"
+       ;;
+      esac
+      my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP`
+    done
+
+    func_extract_archives_result="$my_oldobjs"
+}
+
+
+# func_emit_wrapper [arg=no]
+#
+# Emit a libtool wrapper script on stdout.
+# Don't directly open a file because we may want to
+# incorporate the script contents within a cygwin/mingw
+# wrapper executable.  Must ONLY be called from within
+# func_mode_link because it depends on a number of variables
+# set therein.
+#
+# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR
+# variable will take.  If 'yes', then the emitted script
+# will assume that the directory in which it is stored is
+# the $objdir directory.  This is a cygwin/mingw-specific
+# behavior.
+func_emit_wrapper ()
+{
+       func_emit_wrapper_arg1=${1-no}
+
+       $ECHO "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+sed_quote_subst='$sed_quote_subst'
+
+# Be Bourne compatible
+if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else
+  case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+  # install mode needs the following variables:
+  generated_by_libtool_version='$macro_version'
+  notinst_deplibs='$notinst_deplibs'
+else
+  # When we are sourced in execute mode, \$file and \$ECHO are already set.
+  if test \"\$libtool_execute_magic\" != \"$magic\"; then
+    file=\"\$0\""
+
+    qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"`
+    $ECHO "\
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$1
+_LTECHO_EOF'
+}
+    ECHO=\"$qECHO\"
+  fi
+
+# Very basic option parsing. These options are (a) specific to
+# the libtool wrapper, (b) are identical between the wrapper
+# /script/ and the wrapper /executable/ which is used only on
+# windows platforms, and (c) all begin with the string "--lt-"
+# (application programs are unlikely to have options which match
+# this pattern).
+#
+# There are only two supported options: --lt-debug and
+# --lt-dump-script. There is, deliberately, no --lt-help.
+#
+# The first argument to this parsing function should be the
+# script's $0 value, followed by "$@".
+lt_option_debug=
+func_parse_lt_options ()
+{
+  lt_script_arg0=\$0
+  shift
+  for lt_opt
+  do
+    case \"\$lt_opt\" in
+    --lt-debug) lt_option_debug=1 ;;
+    --lt-dump-script)
+        lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\`
+        test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=.
+        lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\`
+        cat \"\$lt_dump_D/\$lt_dump_F\"
+        exit 0
+      ;;
+    --lt-*)
+        \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2
+        exit 1
+      ;;
+    esac
+  done
+
+  # Print the debug banner immediately:
+  if test -n \"\$lt_option_debug\"; then
+    echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2
+  fi
+}
+
+# Used when --lt-debug. Prints its arguments to stdout
+# (redirection is the responsibility of the caller)
+func_lt_dump_args ()
+{
+  lt_dump_args_N=1;
+  for lt_arg
+  do
+    \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\"
+    lt_dump_args_N=\`expr \$lt_dump_args_N + 1\`
+  done
+}
+
+# Core function for launching the target application
+func_exec_program_core ()
+{
+"
+  case $host in
+  # Backslashes separate directories on plain windows
+  *-*-mingw | *-*-os2* | *-cegcc*)
+    $ECHO "\
+      if test -n \"\$lt_option_debug\"; then
+        \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2
+        func_lt_dump_args \${1+\"\$@\"} 1>&2
+      fi
+      exec \"\$progdir\\\\\$program\" \${1+\"\$@\"}
+"
+    ;;
+
+  *)
+    $ECHO "\
+      if test -n \"\$lt_option_debug\"; then
+        \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2
+        func_lt_dump_args \${1+\"\$@\"} 1>&2
+      fi
+      exec \"\$progdir/\$program\" \${1+\"\$@\"}
+"
+    ;;
+  esac
+  $ECHO "\
+      \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2
+      exit 1
+}
+
+# A function to encapsulate launching the target application
+# Strips options in the --lt-* namespace from \$@ and
+# launches target application with the remaining arguments.
+func_exec_program ()
+{
+  case \" \$* \" in
+  *\\ --lt-*)
+    for lt_wr_arg
+    do
+      case \$lt_wr_arg in
+      --lt-*) ;;
+      *) set x \"\$@\" \"\$lt_wr_arg\"; shift;;
+      esac
+      shift
+    done ;;
+  esac
+  func_exec_program_core \${1+\"\$@\"}
+}
+
+  # Parse options
+  func_parse_lt_options \"\$0\" \${1+\"\$@\"}
+
+  # Find the directory that this script lives in.
+  thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\`
+  test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\`
+  while test -n \"\$file\"; do
+    destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\`
+
+    # If there was a directory component, then change thisdir.
+    if test \"x\$destdir\" != \"x\$file\"; then
+      case \"\$destdir\" in
+      [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
+      *) thisdir=\"\$thisdir/\$destdir\" ;;
+      esac
+    fi
+
+    file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\`
+    file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\`
+  done
+
+  # Usually 'no', except on cygwin/mingw when embedded into
+  # the cwrapper.
+  WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1
+  if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then
+    # special case for '.'
+    if test \"\$thisdir\" = \".\"; then
+      thisdir=\`pwd\`
+    fi
+    # remove .libs from thisdir
+    case \"\$thisdir\" in
+    *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;;
+    $objdir )   thisdir=. ;;
+    esac
+  fi
+
+  # Try to get the absolute directory name.
+  absdir=\`cd \"\$thisdir\" && pwd\`
+  test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+       if test "$fast_install" = yes; then
+         $ECHO "\
+  program=lt-'$outputname'$exeext
+  progdir=\"\$thisdir/$objdir\"
+
+  if test ! -f \"\$progdir/\$program\" ||
+     { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
+       test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+    file=\"\$\$-\$program\"
+
+    if test ! -d \"\$progdir\"; then
+      $MKDIR \"\$progdir\"
+    else
+      $RM \"\$progdir/\$file\"
+    fi"
+
+         $ECHO "\
+
+    # relink executable if necessary
+    if test -n \"\$relink_command\"; then
+      if relink_command_output=\`eval \$relink_command 2>&1\`; then :
+      else
+       $ECHO \"\$relink_command_output\" >&2
+       $RM \"\$progdir/\$file\"
+       exit 1
+      fi
+    fi
+
+    $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+    { $RM \"\$progdir/\$program\";
+      $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+    $RM \"\$progdir/\$file\"
+  fi"
+       else
+         $ECHO "\
+  program='$outputname'
+  progdir=\"\$thisdir/$objdir\"
+"
+       fi
+
+       $ECHO "\
+
+  if test -f \"\$progdir/\$program\"; then"
+
+       # fixup the dll searchpath if we need to.
+       #
+       # Fix the DLL searchpath if we need to.  Do this before prepending
+       # to shlibpath, because on Windows, both are PATH and uninstalled
+       # libraries must come first.
+       if test -n "$dllsearchpath"; then
+         $ECHO "\
+    # Add the dll search path components to the executable PATH
+    PATH=$dllsearchpath:\$PATH
+"
+       fi
+
+       # Export our shlibpath_var if we have one.
+       if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+         $ECHO "\
+    # Add our own library path to $shlibpath_var
+    $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+    # Some systems cannot cope with colon-terminated $shlibpath_var
+    # The second colon is a workaround for a bug in BeOS R4 sed
+    $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\`
+
+    export $shlibpath_var
+"
+       fi
+
+       $ECHO "\
+    if test \"\$libtool_execute_magic\" != \"$magic\"; then
+      # Run the actual program with our arguments.
+      func_exec_program \${1+\"\$@\"}
+    fi
+  else
+    # The program doesn't exist.
+    \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2
+    \$ECHO \"This script is just a wrapper for \$program.\" 1>&2
+    \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2
+    exit 1
+  fi
+fi\
+"
+}
+
+
+# func_emit_cwrapperexe_src
+# emit the source code for a wrapper executable on stdout
+# Must ONLY be called from within func_mode_link because
+# it depends on a number of variable set therein.
+func_emit_cwrapperexe_src ()
+{
+       cat <<EOF
+
+/* $cwrappersource - temporary wrapper executable for $objdir/$outputname
+   Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+
+   The $output program cannot be directly executed until all the libtool
+   libraries that it depends on are installed.
+
+   This wrapper executable should never be moved out of the build directory.
+   If it is, it will not operate correctly.
+*/
+EOF
+           cat <<"EOF"
+#ifdef _MSC_VER
+# define _CRT_SECURE_NO_DEPRECATE 1
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef _MSC_VER
+# include <direct.h>
+# include <process.h>
+# include <io.h>
+#else
+# include <unistd.h>
+# include <stdint.h>
+# ifdef __CYGWIN__
+#  include <io.h>
+# endif
+#endif
+#include <malloc.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* declarations of non-ANSI functions */
+#if defined(__MINGW32__)
+# ifdef __STRICT_ANSI__
+int _putenv (const char *);
+# endif
+#elif defined(__CYGWIN__)
+# ifdef __STRICT_ANSI__
+char *realpath (const char *, char *);
+int putenv (char *);
+int setenv (const char *, const char *, int);
+# endif
+/* #elif defined (other platforms) ... */
+#endif
+
+/* portability defines, excluding path handling macros */
+#if defined(_MSC_VER)
+# define setmode _setmode
+# define stat    _stat
+# define chmod   _chmod
+# define getcwd  _getcwd
+# define putenv  _putenv
+# define S_IXUSR _S_IEXEC
+# ifndef _INTPTR_T_DEFINED
+#  define _INTPTR_T_DEFINED
+#  define intptr_t int
+# endif
+#elif defined(__MINGW32__)
+# define setmode _setmode
+# define stat    _stat
+# define chmod   _chmod
+# define getcwd  _getcwd
+# define putenv  _putenv
+#elif defined(__CYGWIN__)
+# define HAVE_SETENV
+# define FOPEN_WB "wb"
+/* #elif defined (other platforms) ... */
+#endif
+
+#if defined(PATH_MAX)
+# define LT_PATHMAX PATH_MAX
+#elif defined(MAXPATHLEN)
+# define LT_PATHMAX MAXPATHLEN
+#else
+# define LT_PATHMAX 1024
+#endif
+
+#ifndef S_IXOTH
+# define S_IXOTH 0
+#endif
+#ifndef S_IXGRP
+# define S_IXGRP 0
+#endif
+
+/* path handling portability macros */
+#ifndef DIR_SEPARATOR
+# define DIR_SEPARATOR '/'
+# define PATH_SEPARATOR ':'
+#endif
+
+#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \
+  defined (__OS2__)
+# define HAVE_DOS_BASED_FILE_SYSTEM
+# define FOPEN_WB "wb"
+# ifndef DIR_SEPARATOR_2
+#  define DIR_SEPARATOR_2 '\\'
+# endif
+# ifndef PATH_SEPARATOR_2
+#  define PATH_SEPARATOR_2 ';'
+# endif
+#endif
+
+#ifndef DIR_SEPARATOR_2
+# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else /* DIR_SEPARATOR_2 */
+# define IS_DIR_SEPARATOR(ch) \
+       (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif /* DIR_SEPARATOR_2 */
+
+#ifndef PATH_SEPARATOR_2
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR)
+#else /* PATH_SEPARATOR_2 */
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2)
+#endif /* PATH_SEPARATOR_2 */
+
+#ifndef FOPEN_WB
+# define FOPEN_WB "w"
+#endif
+#ifndef _O_BINARY
+# define _O_BINARY 0
+#endif
+
+#define XMALLOC(type, num)      ((type *) xmalloc ((num) * sizeof(type)))
+#define XFREE(stale) do { \
+  if (stale) { free ((void *) stale); stale = 0; } \
+} while (0)
+
+#if defined(LT_DEBUGWRAPPER)
+static int lt_debug = 1;
+#else
+static int lt_debug = 0;
+#endif
+
+const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */
+
+void *xmalloc (size_t num);
+char *xstrdup (const char *string);
+const char *base_name (const char *name);
+char *find_executable (const char *wrapper);
+char *chase_symlinks (const char *pathspec);
+int make_executable (const char *path);
+int check_executable (const char *path);
+char *strendzap (char *str, const char *pat);
+void lt_debugprintf (const char *file, int line, const char *fmt, ...);
+void lt_fatal (const char *file, int line, const char *message, ...);
+static const char *nonnull (const char *s);
+static const char *nonempty (const char *s);
+void lt_setenv (const char *name, const char *value);
+char *lt_extend_str (const char *orig_value, const char *add, int to_end);
+void lt_update_exe_path (const char *name, const char *value);
+void lt_update_lib_path (const char *name, const char *value);
+char **prepare_spawn (char **argv);
+void lt_dump_script (FILE *f);
+EOF
+
+           cat <<EOF
+volatile const char * MAGIC_EXE = "$magic_exe";
+const char * LIB_PATH_VARNAME = "$shlibpath_var";
+EOF
+
+           if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+              func_to_host_path "$temp_rpath"
+             cat <<EOF
+const char * LIB_PATH_VALUE   = "$func_to_host_path_result";
+EOF
+           else
+             cat <<"EOF"
+const char * LIB_PATH_VALUE   = "";
+EOF
+           fi
+
+           if test -n "$dllsearchpath"; then
+              func_to_host_path "$dllsearchpath:"
+             cat <<EOF
+const char * EXE_PATH_VARNAME = "PATH";
+const char * EXE_PATH_VALUE   = "$func_to_host_path_result";
+EOF
+           else
+             cat <<"EOF"
+const char * EXE_PATH_VARNAME = "";
+const char * EXE_PATH_VALUE   = "";
+EOF
+           fi
+
+           if test "$fast_install" = yes; then
+             cat <<EOF
+const char * TARGET_PROGRAM_NAME = "lt-$outputname"; /* hopefully, no .exe */
+EOF
+           else
+             cat <<EOF
+const char * TARGET_PROGRAM_NAME = "$outputname"; /* hopefully, no .exe */
+EOF
+           fi
+
+
+           cat <<"EOF"
+
+#define LTWRAPPER_OPTION_PREFIX         "--lt-"
+
+static const char *ltwrapper_option_prefix = LTWRAPPER_OPTION_PREFIX;
+static const char *dumpscript_opt       = LTWRAPPER_OPTION_PREFIX "dump-script";
+static const char *debug_opt            = LTWRAPPER_OPTION_PREFIX "debug";
+
+int
+main (int argc, char *argv[])
+{
+  char **newargz;
+  int  newargc;
+  char *tmp_pathspec;
+  char *actual_cwrapper_path;
+  char *actual_cwrapper_name;
+  char *target_name;
+  char *lt_argv_zero;
+  intptr_t rval = 127;
+
+  int i;
+
+  program_name = (char *) xstrdup (base_name (argv[0]));
+  newargz = XMALLOC (char *, argc + 1);
+
+  /* very simple arg parsing; don't want to rely on getopt
+   * also, copy all non cwrapper options to newargz, except
+   * argz[0], which is handled differently
+   */
+  newargc=0;
+  for (i = 1; i < argc; i++)
+    {
+      if (strcmp (argv[i], dumpscript_opt) == 0)
+       {
+EOF
+           case "$host" in
+             *mingw* | *cygwin* )
+               # make stdout use "unix" line endings
+               echo "          setmode(1,_O_BINARY);"
+               ;;
+             esac
+
+           cat <<"EOF"
+         lt_dump_script (stdout);
+         return 0;
+       }
+      if (strcmp (argv[i], debug_opt) == 0)
+       {
+          lt_debug = 1;
+          continue;
+       }
+      if (strcmp (argv[i], ltwrapper_option_prefix) == 0)
+        {
+          /* however, if there is an option in the LTWRAPPER_OPTION_PREFIX
+             namespace, but it is not one of the ones we know about and
+             have already dealt with, above (inluding dump-script), then
+             report an error. Otherwise, targets might begin to believe
+             they are allowed to use options in the LTWRAPPER_OPTION_PREFIX
+             namespace. The first time any user complains about this, we'll
+             need to make LTWRAPPER_OPTION_PREFIX a configure-time option
+             or a configure.ac-settable value.
+           */
+          lt_fatal (__FILE__, __LINE__,
+                   "unrecognized %s option: '%s'",
+                    ltwrapper_option_prefix, argv[i]);
+        }
+      /* otherwise ... */
+      newargz[++newargc] = xstrdup (argv[i]);
+    }
+  newargz[++newargc] = NULL;
+
+EOF
+           cat <<EOF
+  /* The GNU banner must be the first non-error debug message */
+  lt_debugprintf (__FILE__, __LINE__, "libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\n");
+EOF
+           cat <<"EOF"
+  lt_debugprintf (__FILE__, __LINE__, "(main) argv[0]: %s\n", argv[0]);
+  lt_debugprintf (__FILE__, __LINE__, "(main) program_name: %s\n", program_name);
+
+  tmp_pathspec = find_executable (argv[0]);
+  if (tmp_pathspec == NULL)
+    lt_fatal (__FILE__, __LINE__, "couldn't find %s", argv[0]);
+  lt_debugprintf (__FILE__, __LINE__,
+                  "(main) found exe (before symlink chase) at: %s\n",
+                 tmp_pathspec);
+
+  actual_cwrapper_path = chase_symlinks (tmp_pathspec);
+  lt_debugprintf (__FILE__, __LINE__,
+                  "(main) found exe (after symlink chase) at: %s\n",
+                 actual_cwrapper_path);
+  XFREE (tmp_pathspec);
+
+  actual_cwrapper_name = xstrdup (base_name (actual_cwrapper_path));
+  strendzap (actual_cwrapper_path, actual_cwrapper_name);
+
+  /* wrapper name transforms */
+  strendzap (actual_cwrapper_name, ".exe");
+  tmp_pathspec = lt_extend_str (actual_cwrapper_name, ".exe", 1);
+  XFREE (actual_cwrapper_name);
+  actual_cwrapper_name = tmp_pathspec;
+  tmp_pathspec = 0;
+
+  /* target_name transforms -- use actual target program name; might have lt- prefix */
+  target_name = xstrdup (base_name (TARGET_PROGRAM_NAME));
+  strendzap (target_name, ".exe");
+  tmp_pathspec = lt_extend_str (target_name, ".exe", 1);
+  XFREE (target_name);
+  target_name = tmp_pathspec;
+  tmp_pathspec = 0;
+
+  lt_debugprintf (__FILE__, __LINE__,
+                 "(main) libtool target name: %s\n",
+                 target_name);
+EOF
+
+           cat <<EOF
+  newargz[0] =
+    XMALLOC (char, (strlen (actual_cwrapper_path) +
+                   strlen ("$objdir") + 1 + strlen (actual_cwrapper_name) + 1));
+  strcpy (newargz[0], actual_cwrapper_path);
+  strcat (newargz[0], "$objdir");
+  strcat (newargz[0], "/");
+EOF
+
+           cat <<"EOF"
+  /* stop here, and copy so we don't have to do this twice */
+  tmp_pathspec = xstrdup (newargz[0]);
+
+  /* do NOT want the lt- prefix here, so use actual_cwrapper_name */
+  strcat (newargz[0], actual_cwrapper_name);
+
+  /* DO want the lt- prefix here if it exists, so use target_name */
+  lt_argv_zero = lt_extend_str (tmp_pathspec, target_name, 1);
+  XFREE (tmp_pathspec);
+  tmp_pathspec = NULL;
+EOF
+
+           case $host_os in
+             mingw*)
+           cat <<"EOF"
+  {
+    char* p;
+    while ((p = strchr (newargz[0], '\\')) != NULL)
+      {
+       *p = '/';
+      }
+    while ((p = strchr (lt_argv_zero, '\\')) != NULL)
+      {
+       *p = '/';
+      }
+  }
+EOF
+           ;;
+           esac
+
+           cat <<"EOF"
+  XFREE (target_name);
+  XFREE (actual_cwrapper_path);
+  XFREE (actual_cwrapper_name);
+
+  lt_setenv ("BIN_SH", "xpg4"); /* for Tru64 */
+  lt_setenv ("DUALCASE", "1");  /* for MSK sh */
+  /* Update the DLL searchpath.  EXE_PATH_VALUE ($dllsearchpath) must
+     be prepended before (that is, appear after) LIB_PATH_VALUE ($temp_rpath)
+     because on Windows, both *_VARNAMEs are PATH but uninstalled
+     libraries must come first. */
+  lt_update_exe_path (EXE_PATH_VARNAME, EXE_PATH_VALUE);
+  lt_update_lib_path (LIB_PATH_VARNAME, LIB_PATH_VALUE);
+
+  lt_debugprintf (__FILE__, __LINE__, "(main) lt_argv_zero: %s\n",
+                 nonnull (lt_argv_zero));
+  for (i = 0; i < newargc; i++)
+    {
+      lt_debugprintf (__FILE__, __LINE__, "(main) newargz[%d]: %s\n",
+                     i, nonnull (newargz[i]));
+    }
+
+EOF
+
+           case $host_os in
+             mingw*)
+               cat <<"EOF"
+  /* execv doesn't actually work on mingw as expected on unix */
+  newargz = prepare_spawn (newargz);
+  rval = _spawnv (_P_WAIT, lt_argv_zero, (const char * const *) newargz);
+  if (rval == -1)
+    {
+      /* failed to start process */
+      lt_debugprintf (__FILE__, __LINE__,
+                     "(main) failed to launch target \"%s\": %s\n",
+                     lt_argv_zero, nonnull (strerror (errno)));
+      return 127;
+    }
+  return rval;
+EOF
+               ;;
+             *)
+               cat <<"EOF"
+  execv (lt_argv_zero, newargz);
+  return rval; /* =127, but avoids unused variable warning */
+EOF
+               ;;
+           esac
+
+           cat <<"EOF"
+}
+
+void *
+xmalloc (size_t num)
+{
+  void *p = (void *) malloc (num);
+  if (!p)
+    lt_fatal (__FILE__, __LINE__, "memory exhausted");
+
+  return p;
+}
+
+char *
+xstrdup (const char *string)
+{
+  return string ? strcpy ((char *) xmalloc (strlen (string) + 1),
+                         string) : NULL;
+}
+
+const char *
+base_name (const char *name)
+{
+  const char *base;
+
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  /* Skip over the disk name in MSDOS pathnames. */
+  if (isalpha ((unsigned char) name[0]) && name[1] == ':')
+    name += 2;
+#endif
+
+  for (base = name; *name; name++)
+    if (IS_DIR_SEPARATOR (*name))
+      base = name + 1;
+  return base;
+}
+
+int
+check_executable (const char *path)
+{
+  struct stat st;
+
+  lt_debugprintf (__FILE__, __LINE__, "(check_executable): %s\n",
+                  nonempty (path));
+  if ((!path) || (!*path))
+    return 0;
+
+  if ((stat (path, &st) >= 0)
+      && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
+    return 1;
+  else
+    return 0;
+}
+
+int
+make_executable (const char *path)
+{
+  int rval = 0;
+  struct stat st;
+
+  lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n",
+                  nonempty (path));
+  if ((!path) || (!*path))
+    return 0;
+
+  if (stat (path, &st) >= 0)
+    {
+      rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR);
+    }
+  return rval;
+}
+
+/* Searches for the full path of the wrapper.  Returns
+   newly allocated full path name if found, NULL otherwise
+   Does not chase symlinks, even on platforms that support them.
+*/
+char *
+find_executable (const char *wrapper)
+{
+  int has_slash = 0;
+  const char *p;
+  const char *p_next;
+  /* static buffer for getcwd */
+  char tmp[LT_PATHMAX + 1];
+  int tmp_len;
+  char *concat_name;
+
+  lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n",
+                  nonempty (wrapper));
+
+  if ((wrapper == NULL) || (*wrapper == '\0'))
+    return NULL;
+
+  /* Absolute path? */
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':')
+    {
+      concat_name = xstrdup (wrapper);
+      if (check_executable (concat_name))
+       return concat_name;
+      XFREE (concat_name);
+    }
+  else
+    {
+#endif
+      if (IS_DIR_SEPARATOR (wrapper[0]))
+       {
+         concat_name = xstrdup (wrapper);
+         if (check_executable (concat_name))
+           return concat_name;
+         XFREE (concat_name);
+       }
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+    }
+#endif
+
+  for (p = wrapper; *p; p++)
+    if (*p == '/')
+      {
+       has_slash = 1;
+       break;
+      }
+  if (!has_slash)
+    {
+      /* no slashes; search PATH */
+      const char *path = getenv ("PATH");
+      if (path != NULL)
+       {
+         for (p = path; *p; p = p_next)
+           {
+             const char *q;
+             size_t p_len;
+             for (q = p; *q; q++)
+               if (IS_PATH_SEPARATOR (*q))
+                 break;
+             p_len = q - p;
+             p_next = (*q == '\0' ? q : q + 1);
+             if (p_len == 0)
+               {
+                 /* empty path: current directory */
+                 if (getcwd (tmp, LT_PATHMAX) == NULL)
+                   lt_fatal (__FILE__, __LINE__, "getcwd failed: %s",
+                              nonnull (strerror (errno)));
+                 tmp_len = strlen (tmp);
+                 concat_name =
+                   XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
+                 memcpy (concat_name, tmp, tmp_len);
+                 concat_name[tmp_len] = '/';
+                 strcpy (concat_name + tmp_len + 1, wrapper);
+               }
+             else
+               {
+                 concat_name =
+                   XMALLOC (char, p_len + 1 + strlen (wrapper) + 1);
+                 memcpy (concat_name, p, p_len);
+                 concat_name[p_len] = '/';
+                 strcpy (concat_name + p_len + 1, wrapper);
+               }
+             if (check_executable (concat_name))
+               return concat_name;
+             XFREE (concat_name);
+           }
+       }
+      /* not found in PATH; assume curdir */
+    }
+  /* Relative path | not found in path: prepend cwd */
+  if (getcwd (tmp, LT_PATHMAX) == NULL)
+    lt_fatal (__FILE__, __LINE__, "getcwd failed: %s",
+              nonnull (strerror (errno)));
+  tmp_len = strlen (tmp);
+  concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
+  memcpy (concat_name, tmp, tmp_len);
+  concat_name[tmp_len] = '/';
+  strcpy (concat_name + tmp_len + 1, wrapper);
+
+  if (check_executable (concat_name))
+    return concat_name;
+  XFREE (concat_name);
+  return NULL;
+}
+
+char *
+chase_symlinks (const char *pathspec)
+{
+#ifndef S_ISLNK
+  return xstrdup (pathspec);
+#else
+  char buf[LT_PATHMAX];
+  struct stat s;
+  char *tmp_pathspec = xstrdup (pathspec);
+  char *p;
+  int has_symlinks = 0;
+  while (strlen (tmp_pathspec) && !has_symlinks)
+    {
+      lt_debugprintf (__FILE__, __LINE__,
+                     "checking path component for symlinks: %s\n",
+                     tmp_pathspec);
+      if (lstat (tmp_pathspec, &s) == 0)
+       {
+         if (S_ISLNK (s.st_mode) != 0)
+           {
+             has_symlinks = 1;
+             break;
+           }
+
+         /* search backwards for last DIR_SEPARATOR */
+         p = tmp_pathspec + strlen (tmp_pathspec) - 1;
+         while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
+           p--;
+         if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
+           {
+             /* no more DIR_SEPARATORS left */
+             break;
+           }
+         *p = '\0';
+       }
+      else
+       {
+         lt_fatal (__FILE__, __LINE__,
+                   "error accessing file \"%s\": %s",
+                   tmp_pathspec, nonnull (strerror (errno)));
+       }
+    }
+  XFREE (tmp_pathspec);
+
+  if (!has_symlinks)
+    {
+      return xstrdup (pathspec);
+    }
+
+  tmp_pathspec = realpath (pathspec, buf);
+  if (tmp_pathspec == 0)
+    {
+      lt_fatal (__FILE__, __LINE__,
+               "could not follow symlinks for %s", pathspec);
+    }
+  return xstrdup (tmp_pathspec);
+#endif
+}
+
+char *
+strendzap (char *str, const char *pat)
+{
+  size_t len, patlen;
+
+  assert (str != NULL);
+  assert (pat != NULL);
+
+  len = strlen (str);
+  patlen = strlen (pat);
+
+  if (patlen <= len)
+    {
+      str += len - patlen;
+      if (strcmp (str, pat) == 0)
+       *str = '\0';
+    }
+  return str;
+}
+
+void
+lt_debugprintf (const char *file, int line, const char *fmt, ...)
+{
+  va_list args;
+  if (lt_debug)
+    {
+      (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line);
+      va_start (args, fmt);
+      (void) vfprintf (stderr, fmt, args);
+      va_end (args);
+    }
+}
+
+static void
+lt_error_core (int exit_status, const char *file,
+              int line, const char *mode,
+              const char *message, va_list ap)
+{
+  fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode);
+  vfprintf (stderr, message, ap);
+  fprintf (stderr, ".\n");
+
+  if (exit_status >= 0)
+    exit (exit_status);
+}
+
+void
+lt_fatal (const char *file, int line, const char *message, ...)
+{
+  va_list ap;
+  va_start (ap, message);
+  lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap);
+  va_end (ap);
+}
+
+static const char *
+nonnull (const char *s)
+{
+  return s ? s : "(null)";
+}
+
+static const char *
+nonempty (const char *s)
+{
+  return (s && !*s) ? "(empty)" : nonnull (s);
+}
+
+void
+lt_setenv (const char *name, const char *value)
+{
+  lt_debugprintf (__FILE__, __LINE__,
+                 "(lt_setenv) setting '%s' to '%s'\n",
+                  nonnull (name), nonnull (value));
+  {
+#ifdef HAVE_SETENV
+    /* always make a copy, for consistency with !HAVE_SETENV */
+    char *str = xstrdup (value);
+    setenv (name, str, 1);
+#else
+    int len = strlen (name) + 1 + strlen (value) + 1;
+    char *str = XMALLOC (char, len);
+    sprintf (str, "%s=%s", name, value);
+    if (putenv (str) != EXIT_SUCCESS)
+      {
+        XFREE (str);
+      }
+#endif
+  }
+}
+
+char *
+lt_extend_str (const char *orig_value, const char *add, int to_end)
+{
+  char *new_value;
+  if (orig_value && *orig_value)
+    {
+      int orig_value_len = strlen (orig_value);
+      int add_len = strlen (add);
+      new_value = XMALLOC (char, add_len + orig_value_len + 1);
+      if (to_end)
+        {
+          strcpy (new_value, orig_value);
+          strcpy (new_value + orig_value_len, add);
+        }
+      else
+        {
+          strcpy (new_value, add);
+          strcpy (new_value + add_len, orig_value);
+        }
+    }
+  else
+    {
+      new_value = xstrdup (add);
+    }
+  return new_value;
+}
+
+void
+lt_update_exe_path (const char *name, const char *value)
+{
+  lt_debugprintf (__FILE__, __LINE__,
+                 "(lt_update_exe_path) modifying '%s' by prepending '%s'\n",
+                  nonnull (name), nonnull (value));
+
+  if (name && *name && value && *value)
+    {
+      char *new_value = lt_extend_str (getenv (name), value, 0);
+      /* some systems can't cope with a ':'-terminated path #' */
+      int len = strlen (new_value);
+      while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1]))
+        {
+          new_value[len-1] = '\0';
+        }
+      lt_setenv (name, new_value);
+      XFREE (new_value);
+    }
+}
+
+void
+lt_update_lib_path (const char *name, const char *value)
+{
+  lt_debugprintf (__FILE__, __LINE__,
+                 "(lt_update_lib_path) modifying '%s' by prepending '%s'\n",
+                  nonnull (name), nonnull (value));
+
+  if (name && *name && value && *value)
+    {
+      char *new_value = lt_extend_str (getenv (name), value, 0);
+      lt_setenv (name, new_value);
+      XFREE (new_value);
+    }
+}
+
+EOF
+           case $host_os in
+             mingw*)
+               cat <<"EOF"
+
+/* Prepares an argument vector before calling spawn().
+   Note that spawn() does not by itself call the command interpreter
+     (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") :
+      ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+         GetVersionEx(&v);
+         v.dwPlatformId == VER_PLATFORM_WIN32_NT;
+      }) ? "cmd.exe" : "command.com").
+   Instead it simply concatenates the arguments, separated by ' ', and calls
+   CreateProcess().  We must quote the arguments since Win32 CreateProcess()
+   interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a
+   special way:
+   - Space and tab are interpreted as delimiters. They are not treated as
+     delimiters if they are surrounded by double quotes: "...".
+   - Unescaped double quotes are removed from the input. Their only effect is
+     that within double quotes, space and tab are treated like normal
+     characters.
+   - Backslashes not followed by double quotes are not special.
+   - But 2*n+1 backslashes followed by a double quote become
+     n backslashes followed by a double quote (n >= 0):
+       \" -> "
+       \\\" -> \"
+       \\\\\" -> \\"
+ */
+#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+char **
+prepare_spawn (char **argv)
+{
+  size_t argc;
+  char **new_argv;
+  size_t i;
+
+  /* Count number of arguments.  */
+  for (argc = 0; argv[argc] != NULL; argc++)
+    ;
+
+  /* Allocate new argument vector.  */
+  new_argv = XMALLOC (char *, argc + 1);
+
+  /* Put quoted arguments into the new argument vector.  */
+  for (i = 0; i < argc; i++)
+    {
+      const char *string = argv[i];
+
+      if (string[0] == '\0')
+       new_argv[i] = xstrdup ("\"\"");
+      else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL)
+       {
+         int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL);
+         size_t length;
+         unsigned int backslashes;
+         const char *s;
+         char *quoted_string;
+         char *p;
+
+         length = 0;
+         backslashes = 0;
+         if (quote_around)
+           length++;
+         for (s = string; *s != '\0'; s++)
+           {
+             char c = *s;
+             if (c == '"')
+               length += backslashes + 1;
+             length++;
+             if (c == '\\')
+               backslashes++;
+             else
+               backslashes = 0;
+           }
+         if (quote_around)
+           length += backslashes + 1;
+
+         quoted_string = XMALLOC (char, length + 1);
+
+         p = quoted_string;
+         backslashes = 0;
+         if (quote_around)
+           *p++ = '"';
+         for (s = string; *s != '\0'; s++)
+           {
+             char c = *s;
+             if (c == '"')
+               {
+                 unsigned int j;
+                 for (j = backslashes + 1; j > 0; j--)
+                   *p++ = '\\';
+               }
+             *p++ = c;
+             if (c == '\\')
+               backslashes++;
+             else
+               backslashes = 0;
+           }
+         if (quote_around)
+           {
+             unsigned int j;
+             for (j = backslashes; j > 0; j--)
+               *p++ = '\\';
+             *p++ = '"';
+           }
+         *p = '\0';
+
+         new_argv[i] = quoted_string;
+       }
+      else
+       new_argv[i] = (char *) string;
+    }
+  new_argv[argc] = NULL;
+
+  return new_argv;
+}
+EOF
+               ;;
+           esac
+
+            cat <<"EOF"
+void lt_dump_script (FILE* f)
+{
+EOF
+           func_emit_wrapper yes |
+             $SED -n -e '
+s/^\(.\{79\}\)\(..*\)/\1\
+\2/
+h
+s/\([\\"]\)/\\\1/g
+s/$/\\n/
+s/\([^\n]*\).*/  fputs ("\1", f);/p
+g
+D'
+            cat <<"EOF"
+}
+EOF
+}
+# end: func_emit_cwrapperexe_src
+
+# func_win32_import_lib_p ARG
+# True if ARG is an import lib, as indicated by $file_magic_cmd
+func_win32_import_lib_p ()
+{
+    $opt_debug
+    case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in
+    *import*) : ;;
+    *) false ;;
+    esac
+}
+
+# func_mode_link arg...
+func_mode_link ()
+{
+    $opt_debug
+    case $host in
+    *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
+      # It is impossible to link a dll without this setting, and
+      # we shouldn't force the makefile maintainer to figure out
+      # which system we are compiling for in order to pass an extra
+      # flag for every libtool invocation.
+      # allow_undefined=no
+
+      # FIXME: Unfortunately, there are problems with the above when trying
+      # to make a dll which has undefined symbols, in which case not
+      # even a static library is built.  For now, we need to specify
+      # -no-undefined on the libtool link line when we can be certain
+      # that all symbols are satisfied, otherwise we get a static library.
+      allow_undefined=yes
+      ;;
+    *)
+      allow_undefined=yes
+      ;;
+    esac
+    libtool_args=$nonopt
+    base_compile="$nonopt $@"
+    compile_command=$nonopt
+    finalize_command=$nonopt
+
+    compile_rpath=
+    finalize_rpath=
+    compile_shlibpath=
+    finalize_shlibpath=
+    convenience=
+    old_convenience=
+    deplibs=
+    old_deplibs=
+    compiler_flags=
+    linker_flags=
+    dllsearchpath=
+    lib_search_path=`pwd`
+    inst_prefix_dir=
+    new_inherited_linker_flags=
+
+    avoid_version=no
+    bindir=
+    dlfiles=
+    dlprefiles=
+    dlself=no
+    export_dynamic=no
+    export_symbols=
+    export_symbols_regex=
+    generated=
+    libobjs=
+    ltlibs=
+    module=no
+    no_install=no
+    objs=
+    non_pic_objects=
+    precious_files_regex=
+    prefer_static_libs=no
+    preload=no
+    prev=
+    prevarg=
+    release=
+    rpath=
+    xrpath=
+    perm_rpath=
+    temp_rpath=
+    thread_safe=no
+    vinfo=
+    vinfo_number=no
+    weak_libs=
+    single_module="${wl}-single_module"
+    func_infer_tag $base_compile
+
+    # We need to know -static, to get the right output filenames.
+    for arg
+    do
+      case $arg in
+      -shared)
+       test "$build_libtool_libs" != yes && \
+         func_fatal_configuration "can not build a shared library"
+       build_old_libs=no
+       break
+       ;;
+      -all-static | -static | -static-libtool-libs)
+       case $arg in
+       -all-static)
+         if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+           func_warning "complete static linking is impossible in this configuration"
+         fi
+         if test -n "$link_static_flag"; then
+           dlopen_self=$dlopen_self_static
+         fi
+         prefer_static_libs=yes
+         ;;
+       -static)
+         if test -z "$pic_flag" && test -n "$link_static_flag"; then
+           dlopen_self=$dlopen_self_static
+         fi
+         prefer_static_libs=built
+         ;;
+       -static-libtool-libs)
+         if test -z "$pic_flag" && test -n "$link_static_flag"; then
+           dlopen_self=$dlopen_self_static
+         fi
+         prefer_static_libs=yes
+         ;;
+       esac
+       build_libtool_libs=no
+       build_old_libs=yes
+       break
+       ;;
+      esac
+    done
+
+    # See if our shared archives depend on static archives.
+    test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+    # Go through the arguments, transforming them on the way.
+    while test "$#" -gt 0; do
+      arg="$1"
+      shift
+      func_quote_for_eval "$arg"
+      qarg=$func_quote_for_eval_unquoted_result
+      func_append libtool_args " $func_quote_for_eval_result"
+
+      # If the previous option needs an argument, assign it.
+      if test -n "$prev"; then
+       case $prev in
+       output)
+         func_append compile_command " @OUTPUT@"
+         func_append finalize_command " @OUTPUT@"
+         ;;
+       esac
+
+       case $prev in
+       bindir)
+         bindir="$arg"
+         prev=
+         continue
+         ;;
+       dlfiles|dlprefiles)
+         if test "$preload" = no; then
+           # Add the symbol object into the linking commands.
+           func_append compile_command " @SYMFILE@"
+           func_append finalize_command " @SYMFILE@"
+           preload=yes
+         fi
+         case $arg in
+         *.la | *.lo) ;;  # We handle these cases below.
+         force)
+           if test "$dlself" = no; then
+             dlself=needless
+             export_dynamic=yes
+           fi
+           prev=
+           continue
+           ;;
+         self)
+           if test "$prev" = dlprefiles; then
+             dlself=yes
+           elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+             dlself=yes
+           else
+             dlself=needless
+             export_dynamic=yes
+           fi
+           prev=
+           continue
+           ;;
+         *)
+           if test "$prev" = dlfiles; then
+             func_append dlfiles " $arg"
+           else
+             func_append dlprefiles " $arg"
+           fi
+           prev=
+           continue
+           ;;
+         esac
+         ;;
+       expsyms)
+         export_symbols="$arg"
+         test -f "$arg" \
+           || func_fatal_error "symbol file \`$arg' does not exist"
+         prev=
+         continue
+         ;;
+       expsyms_regex)
+         export_symbols_regex="$arg"
+         prev=
+         continue
+         ;;
+       framework)
+         case $host in
+           *-*-darwin*)
+             case "$deplibs " in
+               *" $qarg.ltframework "*) ;;
+               *) func_append deplibs " $qarg.ltframework" # this is fixed later
+                  ;;
+             esac
+             ;;
+         esac
+         prev=
+         continue
+         ;;
+       inst_prefix)
+         inst_prefix_dir="$arg"
+         prev=
+         continue
+         ;;
+       objectlist)
+         if test -f "$arg"; then
+           save_arg=$arg
+           moreargs=
+           for fil in `cat "$save_arg"`
+           do
+#            func_append moreargs " $fil"
+             arg=$fil
+             # A libtool-controlled object.
+
+             # Check to see that this really is a libtool object.
+             if func_lalib_unsafe_p "$arg"; then
+               pic_object=
+               non_pic_object=
+
+               # Read the .lo file
+               func_source "$arg"
+
+               if test -z "$pic_object" ||
+                  test -z "$non_pic_object" ||
+                  test "$pic_object" = none &&
+                  test "$non_pic_object" = none; then
+                 func_fatal_error "cannot find name of object for \`$arg'"
+               fi
+
+               # Extract subdirectory from the argument.
+               func_dirname "$arg" "/" ""
+               xdir="$func_dirname_result"
+
+               if test "$pic_object" != none; then
+                 # Prepend the subdirectory the object is found in.
+                 pic_object="$xdir$pic_object"
+
+                 if test "$prev" = dlfiles; then
+                   if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+                     func_append dlfiles " $pic_object"
+                     prev=
+                     continue
+                   else
+                     # If libtool objects are unsupported, then we need to preload.
+                     prev=dlprefiles
+                   fi
+                 fi
+
+                 # CHECK ME:  I think I busted this.  -Ossama
+                 if test "$prev" = dlprefiles; then
+                   # Preload the old-style object.
+                   func_append dlprefiles " $pic_object"
+                   prev=
+                 fi
+
+                 # A PIC object.
+                 func_append libobjs " $pic_object"
+                 arg="$pic_object"
+               fi
+
+               # Non-PIC object.
+               if test "$non_pic_object" != none; then
+                 # Prepend the subdirectory the object is found in.
+                 non_pic_object="$xdir$non_pic_object"
+
+                 # A standard non-PIC object
+                 func_append non_pic_objects " $non_pic_object"
+                 if test -z "$pic_object" || test "$pic_object" = none ; then
+                   arg="$non_pic_object"
+                 fi
+               else
+                 # If the PIC object exists, use it instead.
+                 # $xdir was prepended to $pic_object above.
+                 non_pic_object="$pic_object"
+                 func_append non_pic_objects " $non_pic_object"
+               fi
+             else
+               # Only an error if not doing a dry-run.
+               if $opt_dry_run; then
+                 # Extract subdirectory from the argument.
+                 func_dirname "$arg" "/" ""
+                 xdir="$func_dirname_result"
+
+                 func_lo2o "$arg"
+                 pic_object=$xdir$objdir/$func_lo2o_result
+                 non_pic_object=$xdir$func_lo2o_result
+                 func_append libobjs " $pic_object"
+                 func_append non_pic_objects " $non_pic_object"
+               else
+                 func_fatal_error "\`$arg' is not a valid libtool object"
+               fi
+             fi
+           done
+         else
+           func_fatal_error "link input file \`$arg' does not exist"
+         fi
+         arg=$save_arg
+         prev=
+         continue
+         ;;
+       precious_regex)
+         precious_files_regex="$arg"
+         prev=
+         continue
+         ;;
+       release)
+         release="-$arg"
+         prev=
+         continue
+         ;;
+       rpath | xrpath)
+         # We need an absolute path.
+         case $arg in
+         [\\/]* | [A-Za-z]:[\\/]*) ;;
+         *)
+           func_fatal_error "only absolute run-paths are allowed"
+           ;;
+         esac
+         if test "$prev" = rpath; then
+           case "$rpath " in
+           *" $arg "*) ;;
+           *) func_append rpath " $arg" ;;
+           esac
+         else
+           case "$xrpath " in
+           *" $arg "*) ;;
+           *) func_append xrpath " $arg" ;;
+           esac
+         fi
+         prev=
+         continue
+         ;;
+       shrext)
+         shrext_cmds="$arg"
+         prev=
+         continue
+         ;;
+       weak)
+         func_append weak_libs " $arg"
+         prev=
+         continue
+         ;;
+       xcclinker)
+         func_append linker_flags " $qarg"
+         func_append compiler_flags " $qarg"
+         prev=
+         func_append compile_command " $qarg"
+         func_append finalize_command " $qarg"
+         continue
+         ;;
+       xcompiler)
+         func_append compiler_flags " $qarg"
+         prev=
+         func_append compile_command " $qarg"
+         func_append finalize_command " $qarg"
+         continue
+         ;;
+       xlinker)
+         func_append linker_flags " $qarg"
+         func_append compiler_flags " $wl$qarg"
+         prev=
+         func_append compile_command " $wl$qarg"
+         func_append finalize_command " $wl$qarg"
+         continue
+         ;;
+       *)
+         eval "$prev=\"\$arg\""
+         prev=
+         continue
+         ;;
+       esac
+      fi # test -n "$prev"
+
+      prevarg="$arg"
+
+      case $arg in
+      -all-static)
+       if test -n "$link_static_flag"; then
+         # See comment for -static flag below, for more details.
+         func_append compile_command " $link_static_flag"
+         func_append finalize_command " $link_static_flag"
+       fi
+       continue
+       ;;
+
+      -allow-undefined)
+       # FIXME: remove this flag sometime in the future.
+       func_fatal_error "\`-allow-undefined' must not be used because it is the default"
+       ;;
+
+      -avoid-version)
+       avoid_version=yes
+       continue
+       ;;
+
+      -bindir)
+       prev=bindir
+       continue
+       ;;
+
+      -dlopen)
+       prev=dlfiles
+       continue
+       ;;
+
+      -dlpreopen)
+       prev=dlprefiles
+       continue
+       ;;
+
+      -export-dynamic)
+       export_dynamic=yes
+       continue
+       ;;
+
+      -export-symbols | -export-symbols-regex)
+       if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+         func_fatal_error "more than one -exported-symbols argument is not allowed"
+       fi
+       if test "X$arg" = "X-export-symbols"; then
+         prev=expsyms
+       else
+         prev=expsyms_regex
+       fi
+       continue
+       ;;
+
+      -framework)
+       prev=framework
+       continue
+       ;;
+
+      -inst-prefix-dir)
+       prev=inst_prefix
+       continue
+       ;;
+
+      # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
+      # so, if we see these flags be careful not to treat them like -L
+      -L[A-Z][A-Z]*:*)
+       case $with_gcc/$host in
+       no/*-*-irix* | /*-*-irix*)
+         func_append compile_command " $arg"
+         func_append finalize_command " $arg"
+         ;;
+       esac
+       continue
+       ;;
+
+      -L*)
+       func_stripname "-L" '' "$arg"
+       if test -z "$func_stripname_result"; then
+         if test "$#" -gt 0; then
+           func_fatal_error "require no space between \`-L' and \`$1'"
+         else
+           func_fatal_error "need path for \`-L' option"
+         fi
+       fi
+       func_resolve_sysroot "$func_stripname_result"
+       dir=$func_resolve_sysroot_result
+       # We need an absolute path.
+       case $dir in
+       [\\/]* | [A-Za-z]:[\\/]*) ;;
+       *)
+         absdir=`cd "$dir" && pwd`
+         test -z "$absdir" && \
+           func_fatal_error "cannot determine absolute directory name of \`$dir'"
+         dir="$absdir"
+         ;;
+       esac
+       case "$deplibs " in
+       *" -L$dir "* | *" $arg "*)
+         # Will only happen for absolute or sysroot arguments
+         ;;
+       *)
+         # Preserve sysroot, but never include relative directories
+         case $dir in
+           [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;;
+           *) func_append deplibs " -L$dir" ;;
+         esac
+         func_append lib_search_path " $dir"
+         ;;
+       esac
+       case $host in
+       *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
+         testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'`
+         case :$dllsearchpath: in
+         *":$dir:"*) ;;
+         ::) dllsearchpath=$dir;;
+         *) func_append dllsearchpath ":$dir";;
+         esac
+         case :$dllsearchpath: in
+         *":$testbindir:"*) ;;
+         ::) dllsearchpath=$testbindir;;
+         *) func_append dllsearchpath ":$testbindir";;
+         esac
+         ;;
+       esac
+       continue
+       ;;
+
+      -l*)
+       if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
+         case $host in
+         *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*)
+           # These systems don't actually have a C or math library (as such)
+           continue
+           ;;
+         *-*-os2*)
+           # These systems don't actually have a C library (as such)
+           test "X$arg" = "X-lc" && continue
+           ;;
+         *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+           # Do not include libc due to us having libc/libc_r.
+           test "X$arg" = "X-lc" && continue
+           ;;
+         *-*-rhapsody* | *-*-darwin1.[012])
+           # Rhapsody C and math libraries are in the System framework
+           func_append deplibs " System.ltframework"
+           continue
+           ;;
+         *-*-sco3.2v5* | *-*-sco5v6*)
+           # Causes problems with __ctype
+           test "X$arg" = "X-lc" && continue
+           ;;
+         *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+           # Compiler inserts libc in the correct place for threads to work
+           test "X$arg" = "X-lc" && continue
+           ;;
+         esac
+       elif test "X$arg" = "X-lc_r"; then
+        case $host in
+        *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+          # Do not include libc_r directly, use -pthread flag.
+          continue
+          ;;
+        esac
+       fi
+       func_append deplibs " $arg"
+       continue
+       ;;
+
+      -module)
+       module=yes
+       continue
+       ;;
+
+      # Tru64 UNIX uses -model [arg] to determine the layout of C++
+      # classes, name mangling, and exception handling.
+      # Darwin uses the -arch flag to determine output architecture.
+      -model|-arch|-isysroot|--sysroot)
+       func_append compiler_flags " $arg"
+       func_append compile_command " $arg"
+       func_append finalize_command " $arg"
+       prev=xcompiler
+       continue
+       ;;
+
+      -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+      |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
+       func_append compiler_flags " $arg"
+       func_append compile_command " $arg"
+       func_append finalize_command " $arg"
+       case "$new_inherited_linker_flags " in
+           *" $arg "*) ;;
+           * ) func_append new_inherited_linker_flags " $arg" ;;
+       esac
+       continue
+       ;;
+
+      -multi_module)
+       single_module="${wl}-multi_module"
+       continue
+       ;;
+
+      -no-fast-install)
+       fast_install=no
+       continue
+       ;;
+
+      -no-install)
+       case $host in
+       *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*)
+         # The PATH hackery in wrapper scripts is required on Windows
+         # and Darwin in order for the loader to find any dlls it needs.
+         func_warning "\`-no-install' is ignored for $host"
+         func_warning "assuming \`-no-fast-install' instead"
+         fast_install=no
+         ;;
+       *) no_install=yes ;;
+       esac
+       continue
+       ;;
+
+      -no-undefined)
+       allow_undefined=no
+       continue
+       ;;
+
+      -objectlist)
+       prev=objectlist
+       continue
+       ;;
+
+      -o) prev=output ;;
+
+      -precious-files-regex)
+       prev=precious_regex
+       continue
+       ;;
+
+      -release)
+       prev=release
+       continue
+       ;;
+
+      -rpath)
+       prev=rpath
+       continue
+       ;;
+
+      -R)
+       prev=xrpath
+       continue
+       ;;
+
+      -R*)
+       func_stripname '-R' '' "$arg"
+       dir=$func_stripname_result
+       # We need an absolute path.
+       case $dir in
+       [\\/]* | [A-Za-z]:[\\/]*) ;;
+       =*)
+         func_stripname '=' '' "$dir"
+         dir=$lt_sysroot$func_stripname_result
+         ;;
+       *)
+         func_fatal_error "only absolute run-paths are allowed"
+         ;;
+       esac
+       case "$xrpath " in
+       *" $dir "*) ;;
+       *) func_append xrpath " $dir" ;;
+       esac
+       continue
+       ;;
+
+      -shared)
+       # The effects of -shared are defined in a previous loop.
+       continue
+       ;;
+
+      -shrext)
+       prev=shrext
+       continue
+       ;;
+
+      -static | -static-libtool-libs)
+       # The effects of -static are defined in a previous loop.
+       # We used to do the same as -all-static on platforms that
+       # didn't have a PIC flag, but the assumption that the effects
+       # would be equivalent was wrong.  It would break on at least
+       # Digital Unix and AIX.
+       continue
+       ;;
+
+      -thread-safe)
+       thread_safe=yes
+       continue
+       ;;
+
+      -version-info)
+       prev=vinfo
+       continue
+       ;;
+
+      -version-number)
+       prev=vinfo
+       vinfo_number=yes
+       continue
+       ;;
+
+      -weak)
+        prev=weak
+       continue
+       ;;
+
+      -Wc,*)
+       func_stripname '-Wc,' '' "$arg"
+       args=$func_stripname_result
+       arg=
+       save_ifs="$IFS"; IFS=','
+       for flag in $args; do
+         IFS="$save_ifs"
+          func_quote_for_eval "$flag"
+         func_append arg " $func_quote_for_eval_result"
+         func_append compiler_flags " $func_quote_for_eval_result"
+       done
+       IFS="$save_ifs"
+       func_stripname ' ' '' "$arg"
+       arg=$func_stripname_result
+       ;;
+
+      -Wl,*)
+       func_stripname '-Wl,' '' "$arg"
+       args=$func_stripname_result
+       arg=
+       save_ifs="$IFS"; IFS=','
+       for flag in $args; do
+         IFS="$save_ifs"
+          func_quote_for_eval "$flag"
+         func_append arg " $wl$func_quote_for_eval_result"
+         func_append compiler_flags " $wl$func_quote_for_eval_result"
+         func_append linker_flags " $func_quote_for_eval_result"
+       done
+       IFS="$save_ifs"
+       func_stripname ' ' '' "$arg"
+       arg=$func_stripname_result
+       ;;
+
+      -Xcompiler)
+       prev=xcompiler
+       continue
+       ;;
+
+      -Xlinker)
+       prev=xlinker
+       continue
+       ;;
+
+      -XCClinker)
+       prev=xcclinker
+       continue
+       ;;
+
+      # -msg_* for osf cc
+      -msg_*)
+       func_quote_for_eval "$arg"
+       arg="$func_quote_for_eval_result"
+       ;;
+
+      # Flags to be passed through unchanged, with rationale:
+      # -64, -mips[0-9]      enable 64-bit mode for the SGI compiler
+      # -r[0-9][0-9]*        specify processor for the SGI compiler
+      # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler
+      # +DA*, +DD*           enable 64-bit mode for the HP compiler
+      # -q*                  compiler args for the IBM compiler
+      # -m*, -t[45]*, -txscale* architecture-specific flags for GCC
+      # -F/path              path to uninstalled frameworks, gcc on darwin
+      # -p, -pg, --coverage, -fprofile-*  profiling flags for GCC
+      # @file                GCC response files
+      # -tp=*                Portland pgcc target processor selection
+      # --sysroot=*          for sysroot support
+      # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization
+      -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \
+      -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \
+      -O*|-flto*|-fwhopr*|-fuse-linker-plugin)
+        func_quote_for_eval "$arg"
+       arg="$func_quote_for_eval_result"
+        func_append compile_command " $arg"
+        func_append finalize_command " $arg"
+        func_append compiler_flags " $arg"
+        continue
+        ;;
+
+      # Some other compiler flag.
+      -* | +*)
+        func_quote_for_eval "$arg"
+       arg="$func_quote_for_eval_result"
+       ;;
+
+      *.$objext)
+       # A standard object.
+       func_append objs " $arg"
+       ;;
+
+      *.lo)
+       # A libtool-controlled object.
+
+       # Check to see that this really is a libtool object.
+       if func_lalib_unsafe_p "$arg"; then
+         pic_object=
+         non_pic_object=
+
+         # Read the .lo file
+         func_source "$arg"
+
+         if test -z "$pic_object" ||
+            test -z "$non_pic_object" ||
+            test "$pic_object" = none &&
+            test "$non_pic_object" = none; then
+           func_fatal_error "cannot find name of object for \`$arg'"
+         fi
+
+         # Extract subdirectory from the argument.
+         func_dirname "$arg" "/" ""
+         xdir="$func_dirname_result"
+
+         if test "$pic_object" != none; then
+           # Prepend the subdirectory the object is found in.
+           pic_object="$xdir$pic_object"
+
+           if test "$prev" = dlfiles; then
+             if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+               func_append dlfiles " $pic_object"
+               prev=
+               continue
+             else
+               # If libtool objects are unsupported, then we need to preload.
+               prev=dlprefiles
+             fi
+           fi
+
+           # CHECK ME:  I think I busted this.  -Ossama
+           if test "$prev" = dlprefiles; then
+             # Preload the old-style object.
+             func_append dlprefiles " $pic_object"
+             prev=
+           fi
+
+           # A PIC object.
+           func_append libobjs " $pic_object"
+           arg="$pic_object"
+         fi
+
+         # Non-PIC object.
+         if test "$non_pic_object" != none; then
+           # Prepend the subdirectory the object is found in.
+           non_pic_object="$xdir$non_pic_object"
+
+           # A standard non-PIC object
+           func_append non_pic_objects " $non_pic_object"
+           if test -z "$pic_object" || test "$pic_object" = none ; then
+             arg="$non_pic_object"
+           fi
+         else
+           # If the PIC object exists, use it instead.
+           # $xdir was prepended to $pic_object above.
+           non_pic_object="$pic_object"
+           func_append non_pic_objects " $non_pic_object"
+         fi
+       else
+         # Only an error if not doing a dry-run.
+         if $opt_dry_run; then
+           # Extract subdirectory from the argument.
+           func_dirname "$arg" "/" ""
+           xdir="$func_dirname_result"
+
+           func_lo2o "$arg"
+           pic_object=$xdir$objdir/$func_lo2o_result
+           non_pic_object=$xdir$func_lo2o_result
+           func_append libobjs " $pic_object"
+           func_append non_pic_objects " $non_pic_object"
+         else
+           func_fatal_error "\`$arg' is not a valid libtool object"
+         fi
+       fi
+       ;;
+
+      *.$libext)
+       # An archive.
+       func_append deplibs " $arg"
+       func_append old_deplibs " $arg"
+       continue
+       ;;
+
+      *.la)
+       # A libtool-controlled library.
+
+       func_resolve_sysroot "$arg"
+       if test "$prev" = dlfiles; then
+         # This library was specified with -dlopen.
+         func_append dlfiles " $func_resolve_sysroot_result"
+         prev=
+       elif test "$prev" = dlprefiles; then
+         # The library was specified with -dlpreopen.
+         func_append dlprefiles " $func_resolve_sysroot_result"
+         prev=
+       else
+         func_append deplibs " $func_resolve_sysroot_result"
+       fi
+       continue
+       ;;
+
+      # Some other compiler argument.
+      *)
+       # Unknown arguments in both finalize_command and compile_command need
+       # to be aesthetically quoted because they are evaled later.
+       func_quote_for_eval "$arg"
+       arg="$func_quote_for_eval_result"
+       ;;
+      esac # arg
+
+      # Now actually substitute the argument into the commands.
+      if test -n "$arg"; then
+       func_append compile_command " $arg"
+       func_append finalize_command " $arg"
+      fi
+    done # argument parsing loop
+
+    test -n "$prev" && \
+      func_fatal_help "the \`$prevarg' option requires an argument"
+
+    if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+      eval arg=\"$export_dynamic_flag_spec\"
+      func_append compile_command " $arg"
+      func_append finalize_command " $arg"
+    fi
+
+    oldlibs=
+    # calculate the name of the file, without its directory
+    func_basename "$output"
+    outputname="$func_basename_result"
+    libobjs_save="$libobjs"
+
+    if test -n "$shlibpath_var"; then
+      # get the directories listed in $shlibpath_var
+      eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\`
+    else
+      shlib_search_path=
+    fi
+    eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+    eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+
+    func_dirname "$output" "/" ""
+    output_objdir="$func_dirname_result$objdir"
+    func_to_tool_file "$output_objdir/"
+    tool_output_objdir=$func_to_tool_file_result
+    # Create the object directory.
+    func_mkdir_p "$output_objdir"
+
+    # Determine the type of output
+    case $output in
+    "")
+      func_fatal_help "you must specify an output file"
+      ;;
+    *.$libext) linkmode=oldlib ;;
+    *.lo | *.$objext) linkmode=obj ;;
+    *.la) linkmode=lib ;;
+    *) linkmode=prog ;; # Anything else should be a program.
+    esac
+
+    specialdeplibs=
+
+    libs=
+    # Find all interdependent deplibs by searching for libraries
+    # that are linked more than once (e.g. -la -lb -la)
+    for deplib in $deplibs; do
+      if $opt_preserve_dup_deps ; then
+       case "$libs " in
+       *" $deplib "*) func_append specialdeplibs " $deplib" ;;
+       esac
+      fi
+      func_append libs " $deplib"
+    done
+
+    if test "$linkmode" = lib; then
+      libs="$predeps $libs $compiler_lib_search_path $postdeps"
+
+      # Compute libraries that are listed more than once in $predeps
+      # $postdeps and mark them as special (i.e., whose duplicates are
+      # not to be eliminated).
+      pre_post_deps=
+      if $opt_duplicate_compiler_generated_deps; then
+       for pre_post_dep in $predeps $postdeps; do
+         case "$pre_post_deps " in
+         *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;;
+         esac
+         func_append pre_post_deps " $pre_post_dep"
+       done
+      fi
+      pre_post_deps=
+    fi
+
+    deplibs=
+    newdependency_libs=
+    newlib_search_path=
+    need_relink=no # whether we're linking any uninstalled libtool libraries
+    notinst_deplibs= # not-installed libtool libraries
+    notinst_path= # paths that contain not-installed libtool libraries
+
+    case $linkmode in
+    lib)
+       passes="conv dlpreopen link"
+       for file in $dlfiles $dlprefiles; do
+         case $file in
+         *.la) ;;
+         *)
+           func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file"
+           ;;
+         esac
+       done
+       ;;
+    prog)
+       compile_deplibs=
+       finalize_deplibs=
+       alldeplibs=no
+       newdlfiles=
+       newdlprefiles=
+       passes="conv scan dlopen dlpreopen link"
+       ;;
+    *)  passes="conv"
+       ;;
+    esac
+
+    for pass in $passes; do
+      # The preopen pass in lib mode reverses $deplibs; put it back here
+      # so that -L comes before libs that need it for instance...
+      if test "$linkmode,$pass" = "lib,link"; then
+       ## FIXME: Find the place where the list is rebuilt in the wrong
+       ##        order, and fix it there properly
+        tmp_deplibs=
+       for deplib in $deplibs; do
+         tmp_deplibs="$deplib $tmp_deplibs"
+       done
+       deplibs="$tmp_deplibs"
+      fi
+
+      if test "$linkmode,$pass" = "lib,link" ||
+        test "$linkmode,$pass" = "prog,scan"; then
+       libs="$deplibs"
+       deplibs=
+      fi
+      if test "$linkmode" = prog; then
+       case $pass in
+       dlopen) libs="$dlfiles" ;;
+       dlpreopen) libs="$dlprefiles" ;;
+       link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+       esac
+      fi
+      if test "$linkmode,$pass" = "lib,dlpreopen"; then
+       # Collect and forward deplibs of preopened libtool libs
+       for lib in $dlprefiles; do
+         # Ignore non-libtool-libs
+         dependency_libs=
+         func_resolve_sysroot "$lib"
+         case $lib in
+         *.la) func_source "$func_resolve_sysroot_result" ;;
+         esac
+
+         # Collect preopened libtool deplibs, except any this library
+         # has declared as weak libs
+         for deplib in $dependency_libs; do
+           func_basename "$deplib"
+            deplib_base=$func_basename_result
+           case " $weak_libs " in
+           *" $deplib_base "*) ;;
+           *) func_append deplibs " $deplib" ;;
+           esac
+         done
+       done
+       libs="$dlprefiles"
+      fi
+      if test "$pass" = dlopen; then
+       # Collect dlpreopened libraries
+       save_deplibs="$deplibs"
+       deplibs=
+      fi
+
+      for deplib in $libs; do
+       lib=
+       found=no
+       case $deplib in
+       -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+        |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
+         if test "$linkmode,$pass" = "prog,link"; then
+           compile_deplibs="$deplib $compile_deplibs"
+           finalize_deplibs="$deplib $finalize_deplibs"
+         else
+           func_append compiler_flags " $deplib"
+           if test "$linkmode" = lib ; then
+               case "$new_inherited_linker_flags " in
+                   *" $deplib "*) ;;
+                   * ) func_append new_inherited_linker_flags " $deplib" ;;
+               esac
+           fi
+         fi
+         continue
+         ;;
+       -l*)
+         if test "$linkmode" != lib && test "$linkmode" != prog; then
+           func_warning "\`-l' is ignored for archives/objects"
+           continue
+         fi
+         func_stripname '-l' '' "$deplib"
+         name=$func_stripname_result
+         if test "$linkmode" = lib; then
+           searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path"
+         else
+           searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path"
+         fi
+         for searchdir in $searchdirs; do
+           for search_ext in .la $std_shrext .so .a; do
+             # Search the libtool library
+             lib="$searchdir/lib${name}${search_ext}"
+             if test -f "$lib"; then
+               if test "$search_ext" = ".la"; then
+                 found=yes
+               else
+                 found=no
+               fi
+               break 2
+             fi
+           done
+         done
+         if test "$found" != yes; then
+           # deplib doesn't seem to be a libtool library
+           if test "$linkmode,$pass" = "prog,link"; then
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           else
+             deplibs="$deplib $deplibs"
+             test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+           fi
+           continue
+         else # deplib is a libtool library
+           # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib,
+           # We need to do some special things here, and not later.
+           if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+             case " $predeps $postdeps " in
+             *" $deplib "*)
+               if func_lalib_p "$lib"; then
+                 library_names=
+                 old_library=
+                 func_source "$lib"
+                 for l in $old_library $library_names; do
+                   ll="$l"
+                 done
+                 if test "X$ll" = "X$old_library" ; then # only static version available
+                   found=no
+                   func_dirname "$lib" "" "."
+                   ladir="$func_dirname_result"
+                   lib=$ladir/$old_library
+                   if test "$linkmode,$pass" = "prog,link"; then
+                     compile_deplibs="$deplib $compile_deplibs"
+                     finalize_deplibs="$deplib $finalize_deplibs"
+                   else
+                     deplibs="$deplib $deplibs"
+                     test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+                   fi
+                   continue
+                 fi
+               fi
+               ;;
+             *) ;;
+             esac
+           fi
+         fi
+         ;; # -l
+       *.ltframework)
+         if test "$linkmode,$pass" = "prog,link"; then
+           compile_deplibs="$deplib $compile_deplibs"
+           finalize_deplibs="$deplib $finalize_deplibs"
+         else
+           deplibs="$deplib $deplibs"
+           if test "$linkmode" = lib ; then
+               case "$new_inherited_linker_flags " in
+                   *" $deplib "*) ;;
+                   * ) func_append new_inherited_linker_flags " $deplib" ;;
+               esac
+           fi
+         fi
+         continue
+         ;;
+       -L*)
+         case $linkmode in
+         lib)
+           deplibs="$deplib $deplibs"
+           test "$pass" = conv && continue
+           newdependency_libs="$deplib $newdependency_libs"
+           func_stripname '-L' '' "$deplib"
+           func_resolve_sysroot "$func_stripname_result"
+           func_append newlib_search_path " $func_resolve_sysroot_result"
+           ;;
+         prog)
+           if test "$pass" = conv; then
+             deplibs="$deplib $deplibs"
+             continue
+           fi
+           if test "$pass" = scan; then
+             deplibs="$deplib $deplibs"
+           else
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           fi
+           func_stripname '-L' '' "$deplib"
+           func_resolve_sysroot "$func_stripname_result"
+           func_append newlib_search_path " $func_resolve_sysroot_result"
+           ;;
+         *)
+           func_warning "\`-L' is ignored for archives/objects"
+           ;;
+         esac # linkmode
+         continue
+         ;; # -L
+       -R*)
+         if test "$pass" = link; then
+           func_stripname '-R' '' "$deplib"
+           func_resolve_sysroot "$func_stripname_result"
+           dir=$func_resolve_sysroot_result
+           # Make sure the xrpath contains only unique directories.
+           case "$xrpath " in
+           *" $dir "*) ;;
+           *) func_append xrpath " $dir" ;;
+           esac
+         fi
+         deplibs="$deplib $deplibs"
+         continue
+         ;;
+       *.la)
+         func_resolve_sysroot "$deplib"
+         lib=$func_resolve_sysroot_result
+         ;;
+       *.$libext)
+         if test "$pass" = conv; then
+           deplibs="$deplib $deplibs"
+           continue
+         fi
+         case $linkmode in
+         lib)
+           # Linking convenience modules into shared libraries is allowed,
+           # but linking other static libraries is non-portable.
+           case " $dlpreconveniencelibs " in
+           *" $deplib "*) ;;
+           *)
+             valid_a_lib=no
+             case $deplibs_check_method in
+               match_pattern*)
+                 set dummy $deplibs_check_method; shift
+                 match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
+                 if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \
+                   | $EGREP "$match_pattern_regex" > /dev/null; then
+                   valid_a_lib=yes
+                 fi
+               ;;
+               pass_all)
+                 valid_a_lib=yes
+               ;;
+             esac
+             if test "$valid_a_lib" != yes; then
+               echo
+               $ECHO "*** Warning: Trying to link with static lib archive $deplib."
+               echo "*** I have the capability to make that library automatically link in when"
+               echo "*** you link to this library.  But I can only do this if you have a"
+               echo "*** shared version of the library, which you do not appear to have"
+               echo "*** because the file extensions .$libext of this argument makes me believe"
+               echo "*** that it is just a static archive that I should not use here."
+             else
+               echo
+               $ECHO "*** Warning: Linking the shared library $output against the"
+               $ECHO "*** static library $deplib is not portable!"
+               deplibs="$deplib $deplibs"
+             fi
+             ;;
+           esac
+           continue
+           ;;
+         prog)
+           if test "$pass" != link; then
+             deplibs="$deplib $deplibs"
+           else
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           fi
+           continue
+           ;;
+         esac # linkmode
+         ;; # *.$libext
+       *.lo | *.$objext)
+         if test "$pass" = conv; then
+           deplibs="$deplib $deplibs"
+         elif test "$linkmode" = prog; then
+           if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+             # If there is no dlopen support or we're linking statically,
+             # we need to preload.
+             func_append newdlprefiles " $deplib"
+             compile_deplibs="$deplib $compile_deplibs"
+             finalize_deplibs="$deplib $finalize_deplibs"
+           else
+             func_append newdlfiles " $deplib"
+           fi
+         fi
+         continue
+         ;;
+       %DEPLIBS%)
+         alldeplibs=yes
+         continue
+         ;;
+       esac # case $deplib
+
+       if test "$found" = yes || test -f "$lib"; then :
+       else
+         func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'"
+       fi
+
+       # Check to see that this really is a libtool archive.
+       func_lalib_unsafe_p "$lib" \
+         || func_fatal_error "\`$lib' is not a valid libtool archive"
+
+       func_dirname "$lib" "" "."
+       ladir="$func_dirname_result"
+
+       dlname=
+       dlopen=
+       dlpreopen=
+       libdir=
+       library_names=
+       old_library=
+       inherited_linker_flags=
+       # If the library was installed with an old release of libtool,
+       # it will not redefine variables installed, or shouldnotlink
+       installed=yes
+       shouldnotlink=no
+       avoidtemprpath=
+
+
+       # Read the .la file
+       func_source "$lib"
+
+       # Convert "-framework foo" to "foo.ltframework"
+       if test -n "$inherited_linker_flags"; then
+         tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'`
+         for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do
+           case " $new_inherited_linker_flags " in
+             *" $tmp_inherited_linker_flag "*) ;;
+             *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";;
+           esac
+         done
+       fi
+       dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+       if test "$linkmode,$pass" = "lib,link" ||
+          test "$linkmode,$pass" = "prog,scan" ||
+          { test "$linkmode" != prog && test "$linkmode" != lib; }; then
+         test -n "$dlopen" && func_append dlfiles " $dlopen"
+         test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen"
+       fi
+
+       if test "$pass" = conv; then
+         # Only check for convenience libraries
+         deplibs="$lib $deplibs"
+         if test -z "$libdir"; then
+           if test -z "$old_library"; then
+             func_fatal_error "cannot find name of link library for \`$lib'"
+           fi
+           # It is a libtool convenience library, so add in its objects.
+           func_append convenience " $ladir/$objdir/$old_library"
+           func_append old_convenience " $ladir/$objdir/$old_library"
+         elif test "$linkmode" != prog && test "$linkmode" != lib; then
+           func_fatal_error "\`$lib' is not a convenience library"
+         fi
+         tmp_libs=
+         for deplib in $dependency_libs; do
+           deplibs="$deplib $deplibs"
+           if $opt_preserve_dup_deps ; then
+             case "$tmp_libs " in
+             *" $deplib "*) func_append specialdeplibs " $deplib" ;;
+             esac
+           fi
+           func_append tmp_libs " $deplib"
+         done
+         continue
+       fi # $pass = conv
+
+
+       # Get the name of the library we link against.
+       linklib=
+       if test -n "$old_library" &&
+          { test "$prefer_static_libs" = yes ||
+            test "$prefer_static_libs,$installed" = "built,no"; }; then
+         linklib=$old_library
+       else
+         for l in $old_library $library_names; do
+           linklib="$l"
+         done
+       fi
+       if test -z "$linklib"; then
+         func_fatal_error "cannot find name of link library for \`$lib'"
+       fi
+
+       # This library was specified with -dlopen.
+       if test "$pass" = dlopen; then
+         if test -z "$libdir"; then
+           func_fatal_error "cannot -dlopen a convenience library: \`$lib'"
+         fi
+         if test -z "$dlname" ||
+            test "$dlopen_support" != yes ||
+            test "$build_libtool_libs" = no; then
+           # If there is no dlname, no dlopen support or we're linking
+           # statically, we need to preload.  We also need to preload any
+           # dependent libraries so libltdl's deplib preloader doesn't
+           # bomb out in the load deplibs phase.
+           func_append dlprefiles " $lib $dependency_libs"
+         else
+           func_append newdlfiles " $lib"
+         fi
+         continue
+       fi # $pass = dlopen
+
+       # We need an absolute path.
+       case $ladir in
+       [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+       *)
+         abs_ladir=`cd "$ladir" && pwd`
+         if test -z "$abs_ladir"; then
+           func_warning "cannot determine absolute directory name of \`$ladir'"
+           func_warning "passing it literally to the linker, although it might fail"
+           abs_ladir="$ladir"
+         fi
+         ;;
+       esac
+       func_basename "$lib"
+       laname="$func_basename_result"
+
+       # Find the relevant object directory and library name.
+       if test "X$installed" = Xyes; then
+         if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+           func_warning "library \`$lib' was moved."
+           dir="$ladir"
+           absdir="$abs_ladir"
+           libdir="$abs_ladir"
+         else
+           dir="$lt_sysroot$libdir"
+           absdir="$lt_sysroot$libdir"
+         fi
+         test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes
+       else
+         if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+           dir="$ladir"
+           absdir="$abs_ladir"
+           # Remove this search path later
+           func_append notinst_path " $abs_ladir"
+         else
+           dir="$ladir/$objdir"
+           absdir="$abs_ladir/$objdir"
+           # Remove this search path later
+           func_append notinst_path " $abs_ladir"
+         fi
+       fi # $installed = yes
+       func_stripname 'lib' '.la' "$laname"
+       name=$func_stripname_result
+
+       # This library was specified with -dlpreopen.
+       if test "$pass" = dlpreopen; then
+         if test -z "$libdir" && test "$linkmode" = prog; then
+           func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'"
+         fi
+         case "$host" in
+           # special handling for platforms with PE-DLLs.
+           *cygwin* | *mingw* | *cegcc* )
+             # Linker will automatically link against shared library if both
+             # static and shared are present.  Therefore, ensure we extract
+             # symbols from the import library if a shared library is present
+             # (otherwise, the dlopen module name will be incorrect).  We do
+             # this by putting the import library name into $newdlprefiles.
+             # We recover the dlopen module name by 'saving' the la file
+             # name in a special purpose variable, and (later) extracting the
+             # dlname from the la file.
+             if test -n "$dlname"; then
+               func_tr_sh "$dir/$linklib"
+               eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname"
+               func_append newdlprefiles " $dir/$linklib"
+             else
+               func_append newdlprefiles " $dir/$old_library"
+               # Keep a list of preopened convenience libraries to check
+               # that they are being used correctly in the link pass.
+               test -z "$libdir" && \
+                 func_append dlpreconveniencelibs " $dir/$old_library"
+             fi
+           ;;
+           * )
+             # Prefer using a static library (so that no silly _DYNAMIC symbols
+             # are required to link).
+             if test -n "$old_library"; then
+               func_append newdlprefiles " $dir/$old_library"
+               # Keep a list of preopened convenience libraries to check
+               # that they are being used correctly in the link pass.
+               test -z "$libdir" && \
+                 func_append dlpreconveniencelibs " $dir/$old_library"
+             # Otherwise, use the dlname, so that lt_dlopen finds it.
+             elif test -n "$dlname"; then
+               func_append newdlprefiles " $dir/$dlname"
+             else
+               func_append newdlprefiles " $dir/$linklib"
+             fi
+           ;;
+         esac
+       fi # $pass = dlpreopen
+
+       if test -z "$libdir"; then
+         # Link the convenience library
+         if test "$linkmode" = lib; then
+           deplibs="$dir/$old_library $deplibs"
+         elif test "$linkmode,$pass" = "prog,link"; then
+           compile_deplibs="$dir/$old_library $compile_deplibs"
+           finalize_deplibs="$dir/$old_library $finalize_deplibs"
+         else
+           deplibs="$lib $deplibs" # used for prog,scan pass
+         fi
+         continue
+       fi
+
+
+       if test "$linkmode" = prog && test "$pass" != link; then
+         func_append newlib_search_path " $ladir"
+         deplibs="$lib $deplibs"
+
+         linkalldeplibs=no
+         if test "$link_all_deplibs" != no || test -z "$library_names" ||
+            test "$build_libtool_libs" = no; then
+           linkalldeplibs=yes
+         fi
+
+         tmp_libs=
+         for deplib in $dependency_libs; do
+           case $deplib in
+           -L*) func_stripname '-L' '' "$deplib"
+                func_resolve_sysroot "$func_stripname_result"
+                func_append newlib_search_path " $func_resolve_sysroot_result"
+                ;;
+           esac
+           # Need to link against all dependency_libs?
+           if test "$linkalldeplibs" = yes; then
+             deplibs="$deplib $deplibs"
+           else
+             # Need to hardcode shared library paths
+             # or/and link against static libraries
+             newdependency_libs="$deplib $newdependency_libs"
+           fi
+           if $opt_preserve_dup_deps ; then
+             case "$tmp_libs " in
+             *" $deplib "*) func_append specialdeplibs " $deplib" ;;
+             esac
+           fi
+           func_append tmp_libs " $deplib"
+         done # for deplib
+         continue
+       fi # $linkmode = prog...
+
+       if test "$linkmode,$pass" = "prog,link"; then
+         if test -n "$library_names" &&
+            { { test "$prefer_static_libs" = no ||
+                test "$prefer_static_libs,$installed" = "built,yes"; } ||
+              test -z "$old_library"; }; then
+           # We need to hardcode the library path
+           if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then
+             # Make sure the rpath contains only unique directories.
+             case "$temp_rpath:" in
+             *"$absdir:"*) ;;
+             *) func_append temp_rpath "$absdir:" ;;
+             esac
+           fi
+
+           # Hardcode the library path.
+           # Skip directories that are in the system default run-time
+           # search path.
+           case " $sys_lib_dlsearch_path " in
+           *" $absdir "*) ;;
+           *)
+             case "$compile_rpath " in
+             *" $absdir "*) ;;
+             *) func_append compile_rpath " $absdir" ;;
+             esac
+             ;;
+           esac
+           case " $sys_lib_dlsearch_path " in
+           *" $libdir "*) ;;
+           *)
+             case "$finalize_rpath " in
+             *" $libdir "*) ;;
+             *) func_append finalize_rpath " $libdir" ;;
+             esac
+             ;;
+           esac
+         fi # $linkmode,$pass = prog,link...
+
+         if test "$alldeplibs" = yes &&
+            { test "$deplibs_check_method" = pass_all ||
+              { test "$build_libtool_libs" = yes &&
+                test -n "$library_names"; }; }; then
+           # We only need to search for static libraries
+           continue
+         fi
+       fi
+
+       link_static=no # Whether the deplib will be linked statically
+       use_static_libs=$prefer_static_libs
+       if test "$use_static_libs" = built && test "$installed" = yes; then
+         use_static_libs=no
+       fi
+       if test -n "$library_names" &&
+          { test "$use_static_libs" = no || test -z "$old_library"; }; then
+         case $host in
+         *cygwin* | *mingw* | *cegcc*)
+             # No point in relinking DLLs because paths are not encoded
+             func_append notinst_deplibs " $lib"
+             need_relink=no
+           ;;
+         *)
+           if test "$installed" = no; then
+             func_append notinst_deplibs " $lib"
+             need_relink=yes
+           fi
+           ;;
+         esac
+         # This is a shared library
+
+         # Warn about portability, can't link against -module's on some
+         # systems (darwin).  Don't bleat about dlopened modules though!
+         dlopenmodule=""
+         for dlpremoduletest in $dlprefiles; do
+           if test "X$dlpremoduletest" = "X$lib"; then
+             dlopenmodule="$dlpremoduletest"
+             break
+           fi
+         done
+         if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then
+           echo
+           if test "$linkmode" = prog; then
+             $ECHO "*** Warning: Linking the executable $output against the loadable module"
+           else
+             $ECHO "*** Warning: Linking the shared library $output against the loadable module"
+           fi
+           $ECHO "*** $linklib is not portable!"
+         fi
+         if test "$linkmode" = lib &&
+            test "$hardcode_into_libs" = yes; then
+           # Hardcode the library path.
+           # Skip directories that are in the system default run-time
+           # search path.
+           case " $sys_lib_dlsearch_path " in
+           *" $absdir "*) ;;
+           *)
+             case "$compile_rpath " in
+             *" $absdir "*) ;;
+             *) func_append compile_rpath " $absdir" ;;
+             esac
+             ;;
+           esac
+           case " $sys_lib_dlsearch_path " in
+           *" $libdir "*) ;;
+           *)
+             case "$finalize_rpath " in
+             *" $libdir "*) ;;
+             *) func_append finalize_rpath " $libdir" ;;
+             esac
+             ;;
+           esac
+         fi
+
+         if test -n "$old_archive_from_expsyms_cmds"; then
+           # figure out the soname
+           set dummy $library_names
+           shift
+           realname="$1"
+           shift
+           libname=`eval "\\$ECHO \"$libname_spec\""`
+           # use dlname if we got it. it's perfectly good, no?
+           if test -n "$dlname"; then
+             soname="$dlname"
+           elif test -n "$soname_spec"; then
+             # bleh windows
+             case $host in
+             *cygwin* | mingw* | *cegcc*)
+               func_arith $current - $age
+               major=$func_arith_result
+               versuffix="-$major"
+               ;;
+             esac
+             eval soname=\"$soname_spec\"
+           else
+             soname="$realname"
+           fi
+
+           # Make a new name for the extract_expsyms_cmds to use
+           soroot="$soname"
+           func_basename "$soroot"
+           soname="$func_basename_result"
+           func_stripname 'lib' '.dll' "$soname"
+           newlib=libimp-$func_stripname_result.a
+
+           # If the library has no export list, then create one now
+           if test -f "$output_objdir/$soname-def"; then :
+           else
+             func_verbose "extracting exported symbol list from \`$soname'"
+             func_execute_cmds "$extract_expsyms_cmds" 'exit $?'
+           fi
+
+           # Create $newlib
+           if test -f "$output_objdir/$newlib"; then :; else
+             func_verbose "generating import library for \`$soname'"
+             func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?'
+           fi
+           # make sure the library variables are pointing to the new library
+           dir=$output_objdir
+           linklib=$newlib
+         fi # test -n "$old_archive_from_expsyms_cmds"
+
+         if test "$linkmode" = prog || test "$opt_mode" != relink; then
+           add_shlibpath=
+           add_dir=
+           add=
+           lib_linked=yes
+           case $hardcode_action in
+           immediate | unsupported)
+             if test "$hardcode_direct" = no; then
+               add="$dir/$linklib"
+               case $host in
+                 *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;;
+                 *-*-sysv4*uw2*) add_dir="-L$dir" ;;
+                 *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \
+                   *-*-unixware7*) add_dir="-L$dir" ;;
+                 *-*-darwin* )
+                   # if the lib is a (non-dlopened) module then we can not
+                   # link against it, someone is ignoring the earlier warnings
+                   if /usr/bin/file -L $add 2> /dev/null |
+                        $GREP ": [^:]* bundle" >/dev/null ; then
+                     if test "X$dlopenmodule" != "X$lib"; then
+                       $ECHO "*** Warning: lib $linklib is a module, not a shared library"
+                       if test -z "$old_library" ; then
+                         echo
+                         echo "*** And there doesn't seem to be a static archive available"
+                         echo "*** The link will probably fail, sorry"
+                       else
+                         add="$dir/$old_library"
+                       fi
+                     elif test -n "$old_library"; then
+                       add="$dir/$old_library"
+                     fi
+                   fi
+               esac
+             elif test "$hardcode_minus_L" = no; then
+               case $host in
+               *-*-sunos*) add_shlibpath="$dir" ;;
+               esac
+               add_dir="-L$dir"
+               add="-l$name"
+             elif test "$hardcode_shlibpath_var" = no; then
+               add_shlibpath="$dir"
+               add="-l$name"
+             else
+               lib_linked=no
+             fi
+             ;;
+           relink)
+             if test "$hardcode_direct" = yes &&
+                test "$hardcode_direct_absolute" = no; then
+               add="$dir/$linklib"
+             elif test "$hardcode_minus_L" = yes; then
+               add_dir="-L$absdir"
+               # Try looking first in the location we're being installed to.
+               if test -n "$inst_prefix_dir"; then
+                 case $libdir in
+                   [\\/]*)
+                     func_append add_dir " -L$inst_prefix_dir$libdir"
+                     ;;
+                 esac
+               fi
+               add="-l$name"
+             elif test "$hardcode_shlibpath_var" = yes; then
+               add_shlibpath="$dir"
+               add="-l$name"
+             else
+               lib_linked=no
+             fi
+             ;;
+           *) lib_linked=no ;;
+           esac
+
+           if test "$lib_linked" != yes; then
+             func_fatal_configuration "unsupported hardcode properties"
+           fi
+
+           if test -n "$add_shlibpath"; then
+             case :$compile_shlibpath: in
+             *":$add_shlibpath:"*) ;;
+             *) func_append compile_shlibpath "$add_shlibpath:" ;;
+             esac
+           fi
+           if test "$linkmode" = prog; then
+             test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+             test -n "$add" && compile_deplibs="$add $compile_deplibs"
+           else
+             test -n "$add_dir" && deplibs="$add_dir $deplibs"
+             test -n "$add" && deplibs="$add $deplibs"
+             if test "$hardcode_direct" != yes &&
+                test "$hardcode_minus_L" != yes &&
+                test "$hardcode_shlibpath_var" = yes; then
+               case :$finalize_shlibpath: in
+               *":$libdir:"*) ;;
+               *) func_append finalize_shlibpath "$libdir:" ;;
+               esac
+             fi
+           fi
+         fi
+
+         if test "$linkmode" = prog || test "$opt_mode" = relink; then
+           add_shlibpath=
+           add_dir=
+           add=
+           # Finalize command for both is simple: just hardcode it.
+           if test "$hardcode_direct" = yes &&
+              test "$hardcode_direct_absolute" = no; then
+             add="$libdir/$linklib"
+           elif test "$hardcode_minus_L" = yes; then
+             add_dir="-L$libdir"
+             add="-l$name"
+           elif test "$hardcode_shlibpath_var" = yes; then
+             case :$finalize_shlibpath: in
+             *":$libdir:"*) ;;
+             *) func_append finalize_shlibpath "$libdir:" ;;
+             esac
+             add="-l$name"
+           elif test "$hardcode_automatic" = yes; then
+             if test -n "$inst_prefix_dir" &&
+                test -f "$inst_prefix_dir$libdir/$linklib" ; then
+               add="$inst_prefix_dir$libdir/$linklib"
+             else
+               add="$libdir/$linklib"
+             fi
+           else
+             # We cannot seem to hardcode it, guess we'll fake it.
+             add_dir="-L$libdir"
+             # Try looking first in the location we're being installed to.
+             if test -n "$inst_prefix_dir"; then
+               case $libdir in
+                 [\\/]*)
+                   func_append add_dir " -L$inst_prefix_dir$libdir"
+                   ;;
+               esac
+             fi
+             add="-l$name"
+           fi
+
+           if test "$linkmode" = prog; then
+             test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+             test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+           else
+             test -n "$add_dir" && deplibs="$add_dir $deplibs"
+             test -n "$add" && deplibs="$add $deplibs"
+           fi
+         fi
+       elif test "$linkmode" = prog; then
+         # Here we assume that one of hardcode_direct or hardcode_minus_L
+         # is not unsupported.  This is valid on all known static and
+         # shared platforms.
+         if test "$hardcode_direct" != unsupported; then
+           test -n "$old_library" && linklib="$old_library"
+           compile_deplibs="$dir/$linklib $compile_deplibs"
+           finalize_deplibs="$dir/$linklib $finalize_deplibs"
+         else
+           compile_deplibs="-l$name -L$dir $compile_deplibs"
+           finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+         fi
+       elif test "$build_libtool_libs" = yes; then
+         # Not a shared library
+         if test "$deplibs_check_method" != pass_all; then
+           # We're trying link a shared library against a static one
+           # but the system doesn't support it.
+
+           # Just print a warning and add the library to dependency_libs so
+           # that the program can be linked against the static library.
+           echo
+           $ECHO "*** Warning: This system can not link to static lib archive $lib."
+           echo "*** I have the capability to make that library automatically link in when"
+           echo "*** you link to this library.  But I can only do this if you have a"
+           echo "*** shared version of the library, which you do not appear to have."
+           if test "$module" = yes; then
+             echo "*** But as you try to build a module library, libtool will still create "
+             echo "*** a static module, that should work as long as the dlopening application"
+             echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
+             if test -z "$global_symbol_pipe"; then
+               echo
+               echo "*** However, this would only work if libtool was able to extract symbol"
+               echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+               echo "*** not find such a program.  So, this module is probably useless."
+               echo "*** \`nm' from GNU binutils and a full rebuild may help."
+             fi
+             if test "$build_old_libs" = no; then
+               build_libtool_libs=module
+               build_old_libs=yes
+             else
+               build_libtool_libs=no
+             fi
+           fi
+         else
+           deplibs="$dir/$old_library $deplibs"
+           link_static=yes
+         fi
+       fi # link shared/static library?
+
+       if test "$linkmode" = lib; then
+         if test -n "$dependency_libs" &&
+            { test "$hardcode_into_libs" != yes ||
+              test "$build_old_libs" = yes ||
+              test "$link_static" = yes; }; then
+           # Extract -R from dependency_libs
+           temp_deplibs=
+           for libdir in $dependency_libs; do
+             case $libdir in
+             -R*) func_stripname '-R' '' "$libdir"
+                  temp_xrpath=$func_stripname_result
+                  case " $xrpath " in
+                  *" $temp_xrpath "*) ;;
+                  *) func_append xrpath " $temp_xrpath";;
+                  esac;;
+             *) func_append temp_deplibs " $libdir";;
+             esac
+           done
+           dependency_libs="$temp_deplibs"
+         fi
+
+         func_append newlib_search_path " $absdir"
+         # Link against this library
+         test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+         # ... and its dependency_libs
+         tmp_libs=
+         for deplib in $dependency_libs; do
+           newdependency_libs="$deplib $newdependency_libs"
+           case $deplib in
+              -L*) func_stripname '-L' '' "$deplib"
+                   func_resolve_sysroot "$func_stripname_result";;
+              *) func_resolve_sysroot "$deplib" ;;
+            esac
+           if $opt_preserve_dup_deps ; then
+             case "$tmp_libs " in
+             *" $func_resolve_sysroot_result "*)
+                func_append specialdeplibs " $func_resolve_sysroot_result" ;;
+             esac
+           fi
+           func_append tmp_libs " $func_resolve_sysroot_result"
+         done
+
+         if test "$link_all_deplibs" != no; then
+           # Add the search paths of all dependency libraries
+           for deplib in $dependency_libs; do
+             path=
+             case $deplib in
+             -L*) path="$deplib" ;;
+             *.la)
+               func_resolve_sysroot "$deplib"
+               deplib=$func_resolve_sysroot_result
+               func_dirname "$deplib" "" "."
+               dir=$func_dirname_result
+               # We need an absolute path.
+               case $dir in
+               [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+               *)
+                 absdir=`cd "$dir" && pwd`
+                 if test -z "$absdir"; then
+                   func_warning "cannot determine absolute directory name of \`$dir'"
+                   absdir="$dir"
+                 fi
+                 ;;
+               esac
+               if $GREP "^installed=no" $deplib > /dev/null; then
+               case $host in
+               *-*-darwin*)
+                 depdepl=
+                 eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib`
+                 if test -n "$deplibrary_names" ; then
+                   for tmp in $deplibrary_names ; do
+                     depdepl=$tmp
+                   done
+                   if test -f "$absdir/$objdir/$depdepl" ; then
+                     depdepl="$absdir/$objdir/$depdepl"
+                     darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'`
+                      if test -z "$darwin_install_name"; then
+                          darwin_install_name=`${OTOOL64} -L $depdepl  | awk '{if (NR == 2) {print $1;exit}}'`
+                      fi
+                     func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}"
+                     func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}"
+                     path=
+                   fi
+                 fi
+                 ;;
+               *)
+                 path="-L$absdir/$objdir"
+                 ;;
+               esac
+               else
+                 eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+                 test -z "$libdir" && \
+                   func_fatal_error "\`$deplib' is not a valid libtool archive"
+                 test "$absdir" != "$libdir" && \
+                   func_warning "\`$deplib' seems to be moved"
+
+                 path="-L$absdir"
+               fi
+               ;;
+             esac
+             case " $deplibs " in
+             *" $path "*) ;;
+             *) deplibs="$path $deplibs" ;;
+             esac
+           done
+         fi # link_all_deplibs != no
+       fi # linkmode = lib
+      done # for deplib in $libs
+      if test "$pass" = link; then
+       if test "$linkmode" = "prog"; then
+         compile_deplibs="$new_inherited_linker_flags $compile_deplibs"
+         finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs"
+       else
+         compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+       fi
+      fi
+      dependency_libs="$newdependency_libs"
+      if test "$pass" = dlpreopen; then
+       # Link the dlpreopened libraries before other libraries
+       for deplib in $save_deplibs; do
+         deplibs="$deplib $deplibs"
+       done
+      fi
+      if test "$pass" != dlopen; then
+       if test "$pass" != conv; then
+         # Make sure lib_search_path contains only unique directories.
+         lib_search_path=
+         for dir in $newlib_search_path; do
+           case "$lib_search_path " in
+           *" $dir "*) ;;
+           *) func_append lib_search_path " $dir" ;;
+           esac
+         done
+         newlib_search_path=
+       fi
+
+       if test "$linkmode,$pass" != "prog,link"; then
+         vars="deplibs"
+       else
+         vars="compile_deplibs finalize_deplibs"
+       fi
+       for var in $vars dependency_libs; do
+         # Add libraries to $var in reverse order
+         eval tmp_libs=\"\$$var\"
+         new_libs=
+         for deplib in $tmp_libs; do
+           # FIXME: Pedantically, this is the right thing to do, so
+           #        that some nasty dependency loop isn't accidentally
+           #        broken:
+           #new_libs="$deplib $new_libs"
+           # Pragmatically, this seems to cause very few problems in
+           # practice:
+           case $deplib in
+           -L*) new_libs="$deplib $new_libs" ;;
+           -R*) ;;
+           *)
+             # And here is the reason: when a library appears more
+             # than once as an explicit dependence of a library, or
+             # is implicitly linked in more than once by the
+             # compiler, it is considered special, and multiple
+             # occurrences thereof are not removed.  Compare this
+             # with having the same library being listed as a
+             # dependency of multiple other libraries: in this case,
+             # we know (pedantically, we assume) the library does not
+             # need to be listed more than once, so we keep only the
+             # last copy.  This is not always right, but it is rare
+             # enough that we require users that really mean to play
+             # such unportable linking tricks to link the library
+             # using -Wl,-lname, so that libtool does not consider it
+             # for duplicate removal.
+             case " $specialdeplibs " in
+             *" $deplib "*) new_libs="$deplib $new_libs" ;;
+             *)
+               case " $new_libs " in
+               *" $deplib "*) ;;
+               *) new_libs="$deplib $new_libs" ;;
+               esac
+               ;;
+             esac
+             ;;
+           esac
+         done
+         tmp_libs=
+         for deplib in $new_libs; do
+           case $deplib in
+           -L*)
+             case " $tmp_libs " in
+             *" $deplib "*) ;;
+             *) func_append tmp_libs " $deplib" ;;
+             esac
+             ;;
+           *) func_append tmp_libs " $deplib" ;;
+           esac
+         done
+         eval $var=\"$tmp_libs\"
+       done # for var
+      fi
+      # Last step: remove runtime libs from dependency_libs
+      # (they stay in deplibs)
+      tmp_libs=
+      for i in $dependency_libs ; do
+       case " $predeps $postdeps $compiler_lib_search_path " in
+       *" $i "*)
+         i=""
+         ;;
+       esac
+       if test -n "$i" ; then
+         func_append tmp_libs " $i"
+       fi
+      done
+      dependency_libs=$tmp_libs
+    done # for pass
+    if test "$linkmode" = prog; then
+      dlfiles="$newdlfiles"
+    fi
+    if test "$linkmode" = prog || test "$linkmode" = lib; then
+      dlprefiles="$newdlprefiles"
+    fi
+
+    case $linkmode in
+    oldlib)
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+       func_warning "\`-dlopen' is ignored for archives"
+      fi
+
+      case " $deplibs" in
+      *\ -l* | *\ -L*)
+       func_warning "\`-l' and \`-L' are ignored for archives" ;;
+      esac
+
+      test -n "$rpath" && \
+       func_warning "\`-rpath' is ignored for archives"
+
+      test -n "$xrpath" && \
+       func_warning "\`-R' is ignored for archives"
+
+      test -n "$vinfo" && \
+       func_warning "\`-version-info/-version-number' is ignored for archives"
+
+      test -n "$release" && \
+       func_warning "\`-release' is ignored for archives"
+
+      test -n "$export_symbols$export_symbols_regex" && \
+       func_warning "\`-export-symbols' is ignored for archives"
+
+      # Now set the variables for building old libraries.
+      build_libtool_libs=no
+      oldlibs="$output"
+      func_append objs "$old_deplibs"
+      ;;
+
+    lib)
+      # Make sure we only generate libraries of the form `libNAME.la'.
+      case $outputname in
+      lib*)
+       func_stripname 'lib' '.la' "$outputname"
+       name=$func_stripname_result
+       eval shared_ext=\"$shrext_cmds\"
+       eval libname=\"$libname_spec\"
+       ;;
+      *)
+       test "$module" = no && \
+         func_fatal_help "libtool library \`$output' must begin with \`lib'"
+
+       if test "$need_lib_prefix" != no; then
+         # Add the "lib" prefix for modules if required
+         func_stripname '' '.la' "$outputname"
+         name=$func_stripname_result
+         eval shared_ext=\"$shrext_cmds\"
+         eval libname=\"$libname_spec\"
+       else
+         func_stripname '' '.la' "$outputname"
+         libname=$func_stripname_result
+       fi
+       ;;
+      esac
+
+      if test -n "$objs"; then
+       if test "$deplibs_check_method" != pass_all; then
+         func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs"
+       else
+         echo
+         $ECHO "*** Warning: Linking the shared library $output against the non-libtool"
+         $ECHO "*** objects $objs is not portable!"
+         func_append libobjs " $objs"
+       fi
+      fi
+
+      test "$dlself" != no && \
+       func_warning "\`-dlopen self' is ignored for libtool libraries"
+
+      set dummy $rpath
+      shift
+      test "$#" -gt 1 && \
+       func_warning "ignoring multiple \`-rpath's for a libtool library"
+
+      install_libdir="$1"
+
+      oldlibs=
+      if test -z "$rpath"; then
+       if test "$build_libtool_libs" = yes; then
+         # Building a libtool convenience library.
+         # Some compilers have problems with a `.al' extension so
+         # convenience libraries should have the same extension an
+         # archive normally would.
+         oldlibs="$output_objdir/$libname.$libext $oldlibs"
+         build_libtool_libs=convenience
+         build_old_libs=yes
+       fi
+
+       test -n "$vinfo" && \
+         func_warning "\`-version-info/-version-number' is ignored for convenience libraries"
+
+       test -n "$release" && \
+         func_warning "\`-release' is ignored for convenience libraries"
+      else
+
+       # Parse the version information argument.
+       save_ifs="$IFS"; IFS=':'
+       set dummy $vinfo 0 0 0
+       shift
+       IFS="$save_ifs"
+
+       test -n "$7" && \
+         func_fatal_help "too many parameters to \`-version-info'"
+
+       # convert absolute version numbers to libtool ages
+       # this retains compatibility with .la files and attempts
+       # to make the code below a bit more comprehensible
+
+       case $vinfo_number in
+       yes)
+         number_major="$1"
+         number_minor="$2"
+         number_revision="$3"
+         #
+         # There are really only two kinds -- those that
+         # use the current revision as the major version
+         # and those that subtract age and use age as
+         # a minor version.  But, then there is irix
+         # which has an extra 1 added just for fun
+         #
+         case $version_type in
+         # correct linux to gnu/linux during the next big refactor
+         darwin|linux|osf|windows|none)
+           func_arith $number_major + $number_minor
+           current=$func_arith_result
+           age="$number_minor"
+           revision="$number_revision"
+           ;;
+         freebsd-aout|freebsd-elf|qnx|sunos)
+           current="$number_major"
+           revision="$number_minor"
+           age="0"
+           ;;
+         irix|nonstopux)
+           func_arith $number_major + $number_minor
+           current=$func_arith_result
+           age="$number_minor"
+           revision="$number_minor"
+           lt_irix_increment=no
+           ;;
+         esac
+         ;;
+       no)
+         current="$1"
+         revision="$2"
+         age="$3"
+         ;;
+       esac
+
+       # Check that each of the things are valid numbers.
+       case $current in
+       0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+       *)
+         func_error "CURRENT \`$current' must be a nonnegative integer"
+         func_fatal_error "\`$vinfo' is not valid version information"
+         ;;
+       esac
+
+       case $revision in
+       0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+       *)
+         func_error "REVISION \`$revision' must be a nonnegative integer"
+         func_fatal_error "\`$vinfo' is not valid version information"
+         ;;
+       esac
+
+       case $age in
+       0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+       *)
+         func_error "AGE \`$age' must be a nonnegative integer"
+         func_fatal_error "\`$vinfo' is not valid version information"
+         ;;
+       esac
+
+       if test "$age" -gt "$current"; then
+         func_error "AGE \`$age' is greater than the current interface number \`$current'"
+         func_fatal_error "\`$vinfo' is not valid version information"
+       fi
+
+       # Calculate the version variables.
+       major=
+       versuffix=
+       verstring=
+       case $version_type in
+       none) ;;
+
+       darwin)
+         # Like Linux, but with the current version available in
+         # verstring for coding it into the library header
+         func_arith $current - $age
+         major=.$func_arith_result
+         versuffix="$major.$age.$revision"
+         # Darwin ld doesn't like 0 for these options...
+         func_arith $current + 1
+         minor_current=$func_arith_result
+         xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision"
+         verstring="-compatibility_version $minor_current -current_version $minor_current.$revision"
+         ;;
+
+       freebsd-aout)
+         major=".$current"
+         versuffix=".$current.$revision";
+         ;;
+
+       freebsd-elf)
+         major=".$current"
+         versuffix=".$current"
+         ;;
+
+       irix | nonstopux)
+         if test "X$lt_irix_increment" = "Xno"; then
+           func_arith $current - $age
+         else
+           func_arith $current - $age + 1
+         fi
+         major=$func_arith_result
+
+         case $version_type in
+           nonstopux) verstring_prefix=nonstopux ;;
+           *)         verstring_prefix=sgi ;;
+         esac
+         verstring="$verstring_prefix$major.$revision"
+
+         # Add in all the interfaces that we are compatible with.
+         loop=$revision
+         while test "$loop" -ne 0; do
+           func_arith $revision - $loop
+           iface=$func_arith_result
+           func_arith $loop - 1
+           loop=$func_arith_result
+           verstring="$verstring_prefix$major.$iface:$verstring"
+         done
+
+         # Before this point, $major must not contain `.'.
+         major=.$major
+         versuffix="$major.$revision"
+         ;;
+
+       linux) # correct to gnu/linux during the next big refactor
+         func_arith $current - $age
+         major=.$func_arith_result
+         versuffix="$major.$age.$revision"
+         ;;
+
+       osf)
+         func_arith $current - $age
+         major=.$func_arith_result
+         versuffix=".$current.$age.$revision"
+         verstring="$current.$age.$revision"
+
+         # Add in all the interfaces that we are compatible with.
+         loop=$age
+         while test "$loop" -ne 0; do
+           func_arith $current - $loop
+           iface=$func_arith_result
+           func_arith $loop - 1
+           loop=$func_arith_result
+           verstring="$verstring:${iface}.0"
+         done
+
+         # Make executables depend on our current version.
+         func_append verstring ":${current}.0"
+         ;;
+
+       qnx)
+         major=".$current"
+         versuffix=".$current"
+         ;;
+
+       sunos)
+         major=".$current"
+         versuffix=".$current.$revision"
+         ;;
+
+       windows)
+         # Use '-' rather than '.', since we only want one
+         # extension on DOS 8.3 filesystems.
+         func_arith $current - $age
+         major=$func_arith_result
+         versuffix="-$major"
+         ;;
+
+       *)
+         func_fatal_configuration "unknown library version type \`$version_type'"
+         ;;
+       esac
+
+       # Clear the version info if we defaulted, and they specified a release.
+       if test -z "$vinfo" && test -n "$release"; then
+         major=
+         case $version_type in
+         darwin)
+           # we can't check for "0.0" in archive_cmds due to quoting
+           # problems, so we reset it completely
+           verstring=
+           ;;
+         *)
+           verstring="0.0"
+           ;;
+         esac
+         if test "$need_version" = no; then
+           versuffix=
+         else
+           versuffix=".0.0"
+         fi
+       fi
+
+       # Remove version info from name if versioning should be avoided
+       if test "$avoid_version" = yes && test "$need_version" = no; then
+         major=
+         versuffix=
+         verstring=""
+       fi
+
+       # Check to see if the archive will have undefined symbols.
+       if test "$allow_undefined" = yes; then
+         if test "$allow_undefined_flag" = unsupported; then
+           func_warning "undefined symbols not allowed in $host shared libraries"
+           build_libtool_libs=no
+           build_old_libs=yes
+         fi
+       else
+         # Don't allow undefined symbols.
+         allow_undefined_flag="$no_undefined_flag"
+       fi
+
+      fi
+
+      func_generate_dlsyms "$libname" "$libname" "yes"
+      func_append libobjs " $symfileobj"
+      test "X$libobjs" = "X " && libobjs=
+
+      if test "$opt_mode" != relink; then
+       # Remove our outputs, but don't remove object files since they
+       # may have been created when compiling PIC objects.
+       removelist=
+       tempremovelist=`$ECHO "$output_objdir/*"`
+       for p in $tempremovelist; do
+         case $p in
+           *.$objext | *.gcno)
+              ;;
+           $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*)
+              if test "X$precious_files_regex" != "X"; then
+                if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1
+                then
+                  continue
+                fi
+              fi
+              func_append removelist " $p"
+              ;;
+           *) ;;
+         esac
+       done
+       test -n "$removelist" && \
+         func_show_eval "${RM}r \$removelist"
+      fi
+
+      # Now set the variables for building old libraries.
+      if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+       func_append oldlibs " $output_objdir/$libname.$libext"
+
+       # Transform .lo files to .o files.
+       oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP`
+      fi
+
+      # Eliminate all temporary directories.
+      #for path in $notinst_path; do
+      #        lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"`
+      #        deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"`
+      #        dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"`
+      #done
+
+      if test -n "$xrpath"; then
+       # If the user specified any rpath flags, then add them.
+       temp_xrpath=
+       for libdir in $xrpath; do
+         func_replace_sysroot "$libdir"
+         func_append temp_xrpath " -R$func_replace_sysroot_result"
+         case "$finalize_rpath " in
+         *" $libdir "*) ;;
+         *) func_append finalize_rpath " $libdir" ;;
+         esac
+       done
+       if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then
+         dependency_libs="$temp_xrpath $dependency_libs"
+       fi
+      fi
+
+      # Make sure dlfiles contains only unique files that won't be dlpreopened
+      old_dlfiles="$dlfiles"
+      dlfiles=
+      for lib in $old_dlfiles; do
+       case " $dlprefiles $dlfiles " in
+       *" $lib "*) ;;
+       *) func_append dlfiles " $lib" ;;
+       esac
+      done
+
+      # Make sure dlprefiles contains only unique files
+      old_dlprefiles="$dlprefiles"
+      dlprefiles=
+      for lib in $old_dlprefiles; do
+       case "$dlprefiles " in
+       *" $lib "*) ;;
+       *) func_append dlprefiles " $lib" ;;
+       esac
+      done
+
+      if test "$build_libtool_libs" = yes; then
+       if test -n "$rpath"; then
+         case $host in
+         *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*)
+           # these systems don't actually have a c library (as such)!
+           ;;
+         *-*-rhapsody* | *-*-darwin1.[012])
+           # Rhapsody C library is in the System framework
+           func_append deplibs " System.ltframework"
+           ;;
+         *-*-netbsd*)
+           # Don't link with libc until the a.out ld.so is fixed.
+           ;;
+         *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+           # Do not include libc due to us having libc/libc_r.
+           ;;
+         *-*-sco3.2v5* | *-*-sco5v6*)
+           # Causes problems with __ctype
+           ;;
+         *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+           # Compiler inserts libc in the correct place for threads to work
+           ;;
+         *)
+           # Add libc to deplibs on all other systems if necessary.
+           if test "$build_libtool_need_lc" = "yes"; then
+             func_append deplibs " -lc"
+           fi
+           ;;
+         esac
+       fi
+
+       # Transform deplibs into only deplibs that can be linked in shared.
+       name_save=$name
+       libname_save=$libname
+       release_save=$release
+       versuffix_save=$versuffix
+       major_save=$major
+       # I'm not sure if I'm treating the release correctly.  I think
+       # release should show up in the -l (ie -lgmp5) so we don't want to
+       # add it in twice.  Is that correct?
+       release=""
+       versuffix=""
+       major=""
+       newdeplibs=
+       droppeddeps=no
+       case $deplibs_check_method in
+       pass_all)
+         # Don't check for shared/static.  Everything works.
+         # This might be a little naive.  We might want to check
+         # whether the library exists or not.  But this is on
+         # osf3 & osf4 and I'm not really sure... Just
+         # implementing what was already the behavior.
+         newdeplibs=$deplibs
+         ;;
+       test_compile)
+         # This code stresses the "libraries are programs" paradigm to its
+         # limits. Maybe even breaks it.  We compile a program, linking it
+         # against the deplibs as a proxy for the library.  Then we can check
+         # whether they linked in statically or dynamically with ldd.
+         $opt_dry_run || $RM conftest.c
+         cat > conftest.c <<EOF
+         int main() { return 0; }
+EOF
+         $opt_dry_run || $RM conftest
+         if $LTCC $LTCFLAGS -o conftest conftest.c $deplibs; then
+           ldd_output=`ldd conftest`
+           for i in $deplibs; do
+             case $i in
+             -l*)
+               func_stripname -l '' "$i"
+               name=$func_stripname_result
+               if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+                 case " $predeps $postdeps " in
+                 *" $i "*)
+                   func_append newdeplibs " $i"
+                   i=""
+                   ;;
+                 esac
+               fi
+               if test -n "$i" ; then
+                 libname=`eval "\\$ECHO \"$libname_spec\""`
+                 deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
+                 set dummy $deplib_matches; shift
+                 deplib_match=$1
+                 if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+                   func_append newdeplibs " $i"
+                 else
+                   droppeddeps=yes
+                   echo
+                   $ECHO "*** Warning: dynamic linker does not accept needed library $i."
+                   echo "*** I have the capability to make that library automatically link in when"
+                   echo "*** you link to this library.  But I can only do this if you have a"
+                   echo "*** shared version of the library, which I believe you do not have"
+                   echo "*** because a test_compile did reveal that the linker did not use it for"
+                   echo "*** its dynamic dependency list that programs get resolved with at runtime."
+                 fi
+               fi
+               ;;
+             *)
+               func_append newdeplibs " $i"
+               ;;
+             esac
+           done
+         else
+           # Error occurred in the first compile.  Let's try to salvage
+           # the situation: Compile a separate program for each library.
+           for i in $deplibs; do
+             case $i in
+             -l*)
+               func_stripname -l '' "$i"
+               name=$func_stripname_result
+               $opt_dry_run || $RM conftest
+               if $LTCC $LTCFLAGS -o conftest conftest.c $i; then
+                 ldd_output=`ldd conftest`
+                 if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+                   case " $predeps $postdeps " in
+                   *" $i "*)
+                     func_append newdeplibs " $i"
+                     i=""
+                     ;;
+                   esac
+                 fi
+                 if test -n "$i" ; then
+                   libname=`eval "\\$ECHO \"$libname_spec\""`
+                   deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
+                   set dummy $deplib_matches; shift
+                   deplib_match=$1
+                   if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+                     func_append newdeplibs " $i"
+                   else
+                     droppeddeps=yes
+                     echo
+                     $ECHO "*** Warning: dynamic linker does not accept needed library $i."
+                     echo "*** I have the capability to make that library automatically link in when"
+                     echo "*** you link to this library.  But I can only do this if you have a"
+                     echo "*** shared version of the library, which you do not appear to have"
+                     echo "*** because a test_compile did reveal that the linker did not use this one"
+                     echo "*** as a dynamic dependency that programs can get resolved with at runtime."
+                   fi
+                 fi
+               else
+                 droppeddeps=yes
+                 echo
+                 $ECHO "*** Warning!  Library $i is needed by this library but I was not able to"
+                 echo "*** make it link in!  You will probably need to install it or some"
+                 echo "*** library that it depends on before this library will be fully"
+                 echo "*** functional.  Installing it before continuing would be even better."
+               fi
+               ;;
+             *)
+               func_append newdeplibs " $i"
+               ;;
+             esac
+           done
+         fi
+         ;;
+       file_magic*)
+         set dummy $deplibs_check_method; shift
+         file_magic_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
+         for a_deplib in $deplibs; do
+           case $a_deplib in
+           -l*)
+             func_stripname -l '' "$a_deplib"
+             name=$func_stripname_result
+             if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+               case " $predeps $postdeps " in
+               *" $a_deplib "*)
+                 func_append newdeplibs " $a_deplib"
+                 a_deplib=""
+                 ;;
+               esac
+             fi
+             if test -n "$a_deplib" ; then
+               libname=`eval "\\$ECHO \"$libname_spec\""`
+               if test -n "$file_magic_glob"; then
+                 libnameglob=`func_echo_all "$libname" | $SED -e $file_magic_glob`
+               else
+                 libnameglob=$libname
+               fi
+               test "$want_nocaseglob" = yes && nocaseglob=`shopt -p nocaseglob`
+               for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+                 if test "$want_nocaseglob" = yes; then
+                   shopt -s nocaseglob
+                   potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null`
+                   $nocaseglob
+                 else
+                   potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null`
+                 fi
+                 for potent_lib in $potential_libs; do
+                     # Follow soft links.
+                     if ls -lLd "$potent_lib" 2>/dev/null |
+                        $GREP " -> " >/dev/null; then
+                       continue
+                     fi
+                     # The statement above tries to avoid entering an
+                     # endless loop below, in case of cyclic links.
+                     # We might still enter an endless loop, since a link
+                     # loop can be closed while we follow links,
+                     # but so what?
+                     potlib="$potent_lib"
+                     while test -h "$potlib" 2>/dev/null; do
+                       potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
+                       case $potliblink in
+                       [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+                       *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";;
+                       esac
+                     done
+                     if eval $file_magic_cmd \"\$potlib\" 2>/dev/null |
+                        $SED -e 10q |
+                        $EGREP "$file_magic_regex" > /dev/null; then
+                       func_append newdeplibs " $a_deplib"
+                       a_deplib=""
+                       break 2
+                     fi
+                 done
+               done
+             fi
+             if test -n "$a_deplib" ; then
+               droppeddeps=yes
+               echo
+               $ECHO "*** Warning: linker path does not have real file for library $a_deplib."
+               echo "*** I have the capability to make that library automatically link in when"
+               echo "*** you link to this library.  But I can only do this if you have a"
+               echo "*** shared version of the library, which you do not appear to have"
+               echo "*** because I did check the linker path looking for a file starting"
+               if test -z "$potlib" ; then
+                 $ECHO "*** with $libname but no candidates were found. (...for file magic test)"
+               else
+                 $ECHO "*** with $libname and none of the candidates passed a file format test"
+                 $ECHO "*** using a file magic. Last file checked: $potlib"
+               fi
+             fi
+             ;;
+           *)
+             # Add a -L argument.
+             func_append newdeplibs " $a_deplib"
+             ;;
+           esac
+         done # Gone through all deplibs.
+         ;;
+       match_pattern*)
+         set dummy $deplibs_check_method; shift
+         match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
+         for a_deplib in $deplibs; do
+           case $a_deplib in
+           -l*)
+             func_stripname -l '' "$a_deplib"
+             name=$func_stripname_result
+             if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+               case " $predeps $postdeps " in
+               *" $a_deplib "*)
+                 func_append newdeplibs " $a_deplib"
+                 a_deplib=""
+                 ;;
+               esac
+             fi
+             if test -n "$a_deplib" ; then
+               libname=`eval "\\$ECHO \"$libname_spec\""`
+               for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+                 potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+                 for potent_lib in $potential_libs; do
+                   potlib="$potent_lib" # see symlink-check above in file_magic test
+                   if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \
+                      $EGREP "$match_pattern_regex" > /dev/null; then
+                     func_append newdeplibs " $a_deplib"
+                     a_deplib=""
+                     break 2
+                   fi
+                 done
+               done
+             fi
+             if test -n "$a_deplib" ; then
+               droppeddeps=yes
+               echo
+               $ECHO "*** Warning: linker path does not have real file for library $a_deplib."
+               echo "*** I have the capability to make that library automatically link in when"
+               echo "*** you link to this library.  But I can only do this if you have a"
+               echo "*** shared version of the library, which you do not appear to have"
+               echo "*** because I did check the linker path looking for a file starting"
+               if test -z "$potlib" ; then
+                 $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)"
+               else
+                 $ECHO "*** with $libname and none of the candidates passed a file format test"
+                 $ECHO "*** using a regex pattern. Last file checked: $potlib"
+               fi
+             fi
+             ;;
+           *)
+             # Add a -L argument.
+             func_append newdeplibs " $a_deplib"
+             ;;
+           esac
+         done # Gone through all deplibs.
+         ;;
+       none | unknown | *)
+         newdeplibs=""
+         tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'`
+         if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+           for i in $predeps $postdeps ; do
+             # can't use Xsed below, because $i might contain '/'
+             tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"`
+           done
+         fi
+         case $tmp_deplibs in
+         *[!\  \ ]*)
+           echo
+           if test "X$deplibs_check_method" = "Xnone"; then
+             echo "*** Warning: inter-library dependencies are not supported in this platform."
+           else
+             echo "*** Warning: inter-library dependencies are not known to be supported."
+           fi
+           echo "*** All declared inter-library dependencies are being dropped."
+           droppeddeps=yes
+           ;;
+         esac
+         ;;
+       esac
+       versuffix=$versuffix_save
+       major=$major_save
+       release=$release_save
+       libname=$libname_save
+       name=$name_save
+
+       case $host in
+       *-*-rhapsody* | *-*-darwin1.[012])
+         # On Rhapsody replace the C library with the System framework
+         newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'`
+         ;;
+       esac
+
+       if test "$droppeddeps" = yes; then
+         if test "$module" = yes; then
+           echo
+           echo "*** Warning: libtool could not satisfy all declared inter-library"
+           $ECHO "*** dependencies of module $libname.  Therefore, libtool will create"
+           echo "*** a static module, that should work as long as the dlopening"
+           echo "*** application is linked with the -dlopen flag."
+           if test -z "$global_symbol_pipe"; then
+             echo
+             echo "*** However, this would only work if libtool was able to extract symbol"
+             echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+             echo "*** not find such a program.  So, this module is probably useless."
+             echo "*** \`nm' from GNU binutils and a full rebuild may help."
+           fi
+           if test "$build_old_libs" = no; then
+             oldlibs="$output_objdir/$libname.$libext"
+             build_libtool_libs=module
+             build_old_libs=yes
+           else
+             build_libtool_libs=no
+           fi
+         else
+           echo "*** The inter-library dependencies that have been dropped here will be"
+           echo "*** automatically added whenever a program is linked with this library"
+           echo "*** or is declared to -dlopen it."
+
+           if test "$allow_undefined" = no; then
+             echo
+             echo "*** Since this library must not contain undefined symbols,"
+             echo "*** because either the platform does not support them or"
+             echo "*** it was explicitly requested with -no-undefined,"
+             echo "*** libtool will only create a static version of it."
+             if test "$build_old_libs" = no; then
+               oldlibs="$output_objdir/$libname.$libext"
+               build_libtool_libs=module
+               build_old_libs=yes
+             else
+               build_libtool_libs=no
+             fi
+           fi
+         fi
+       fi
+       # Done checking deplibs!
+       deplibs=$newdeplibs
+      fi
+      # Time to change all our "foo.ltframework" stuff back to "-framework foo"
+      case $host in
+       *-*-darwin*)
+         newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+         new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+         deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+         ;;
+      esac
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+       case " $new_libs " in
+       *" -L$path/$objdir "*) ;;
+       *)
+         case " $deplibs " in
+         *" -L$path/$objdir "*)
+           func_append new_libs " -L$path/$objdir" ;;
+         esac
+         ;;
+       esac
+      done
+      for deplib in $deplibs; do
+       case $deplib in
+       -L*)
+         case " $new_libs " in
+         *" $deplib "*) ;;
+         *) func_append new_libs " $deplib" ;;
+         esac
+         ;;
+       *) func_append new_libs " $deplib" ;;
+       esac
+      done
+      deplibs="$new_libs"
+
+      # All the library-specific variables (install_libdir is set above).
+      library_names=
+      old_library=
+      dlname=
+
+      # Test again, we may have decided not to build it any more
+      if test "$build_libtool_libs" = yes; then
+       # Remove ${wl} instances when linking with ld.
+       # FIXME: should test the right _cmds variable.
+       case $archive_cmds in
+         *\$LD\ *) wl= ;;
+        esac
+       if test "$hardcode_into_libs" = yes; then
+         # Hardcode the library paths
+         hardcode_libdirs=
+         dep_rpath=
+         rpath="$finalize_rpath"
+         test "$opt_mode" != relink && rpath="$compile_rpath$rpath"
+         for libdir in $rpath; do
+           if test -n "$hardcode_libdir_flag_spec"; then
+             if test -n "$hardcode_libdir_separator"; then
+               func_replace_sysroot "$libdir"
+               libdir=$func_replace_sysroot_result
+               if test -z "$hardcode_libdirs"; then
+                 hardcode_libdirs="$libdir"
+               else
+                 # Just accumulate the unique libdirs.
+                 case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+                 *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+                   ;;
+                 *)
+                   func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
+                   ;;
+                 esac
+               fi
+             else
+               eval flag=\"$hardcode_libdir_flag_spec\"
+               func_append dep_rpath " $flag"
+             fi
+           elif test -n "$runpath_var"; then
+             case "$perm_rpath " in
+             *" $libdir "*) ;;
+             *) func_append perm_rpath " $libdir" ;;
+             esac
+           fi
+         done
+         # Substitute the hardcoded libdirs into the rpath.
+         if test -n "$hardcode_libdir_separator" &&
+            test -n "$hardcode_libdirs"; then
+           libdir="$hardcode_libdirs"
+           eval "dep_rpath=\"$hardcode_libdir_flag_spec\""
+         fi
+         if test -n "$runpath_var" && test -n "$perm_rpath"; then
+           # We should set the runpath_var.
+           rpath=
+           for dir in $perm_rpath; do
+             func_append rpath "$dir:"
+           done
+           eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+         fi
+         test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+       fi
+
+       shlibpath="$finalize_shlibpath"
+       test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+       if test -n "$shlibpath"; then
+         eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+       fi
+
+       # Get the real and link names of the library.
+       eval shared_ext=\"$shrext_cmds\"
+       eval library_names=\"$library_names_spec\"
+       set dummy $library_names
+       shift
+       realname="$1"
+       shift
+
+       if test -n "$soname_spec"; then
+         eval soname=\"$soname_spec\"
+       else
+         soname="$realname"
+       fi
+       if test -z "$dlname"; then
+         dlname=$soname
+       fi
+
+       lib="$output_objdir/$realname"
+       linknames=
+       for link
+       do
+         func_append linknames " $link"
+       done
+
+       # Use standard objects if they are pic
+       test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP`
+       test "X$libobjs" = "X " && libobjs=
+
+       delfiles=
+       if test -n "$export_symbols" && test -n "$include_expsyms"; then
+         $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp"
+         export_symbols="$output_objdir/$libname.uexp"
+         func_append delfiles " $export_symbols"
+       fi
+
+       orig_export_symbols=
+       case $host_os in
+       cygwin* | mingw* | cegcc*)
+         if test -n "$export_symbols" && test -z "$export_symbols_regex"; then
+           # exporting using user supplied symfile
+           if test "x`$SED 1q $export_symbols`" != xEXPORTS; then
+             # and it's NOT already a .def file. Must figure out
+             # which of the given symbols are data symbols and tag
+             # them as such. So, trigger use of export_symbols_cmds.
+             # export_symbols gets reassigned inside the "prepare
+             # the list of exported symbols" if statement, so the
+             # include_expsyms logic still works.
+             orig_export_symbols="$export_symbols"
+             export_symbols=
+             always_export_symbols=yes
+           fi
+         fi
+         ;;
+       esac
+
+       # Prepare the list of exported symbols
+       if test -z "$export_symbols"; then
+         if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+           func_verbose "generating symbol list for \`$libname.la'"
+           export_symbols="$output_objdir/$libname.exp"
+           $opt_dry_run || $RM $export_symbols
+           cmds=$export_symbols_cmds
+           save_ifs="$IFS"; IFS='~'
+           for cmd1 in $cmds; do
+             IFS="$save_ifs"
+             # Take the normal branch if the nm_file_list_spec branch
+             # doesn't work or if tool conversion is not needed.
+             case $nm_file_list_spec~$to_tool_file_cmd in
+               *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*)
+                 try_normal_branch=yes
+                 eval cmd=\"$cmd1\"
+                 func_len " $cmd"
+                 len=$func_len_result
+                 ;;
+               *)
+                 try_normal_branch=no
+                 ;;
+             esac
+             if test "$try_normal_branch" = yes \
+                && { test "$len" -lt "$max_cmd_len" \
+                     || test "$max_cmd_len" -le -1; }
+             then
+               func_show_eval "$cmd" 'exit $?'
+               skipped_export=false
+             elif test -n "$nm_file_list_spec"; then
+               func_basename "$output"
+               output_la=$func_basename_result
+               save_libobjs=$libobjs
+               save_output=$output
+               output=${output_objdir}/${output_la}.nm
+               func_to_tool_file "$output"
+               libobjs=$nm_file_list_spec$func_to_tool_file_result
+               func_append delfiles " $output"
+               func_verbose "creating $NM input file list: $output"
+               for obj in $save_libobjs; do
+                 func_to_tool_file "$obj"
+                 $ECHO "$func_to_tool_file_result"
+               done > "$output"
+               eval cmd=\"$cmd1\"
+               func_show_eval "$cmd" 'exit $?'
+               output=$save_output
+               libobjs=$save_libobjs
+               skipped_export=false
+             else
+               # The command line is too long to execute in one step.
+               func_verbose "using reloadable object file for export list..."
+               skipped_export=:
+               # Break out early, otherwise skipped_export may be
+               # set to false by a later but shorter cmd.
+               break
+             fi
+           done
+           IFS="$save_ifs"
+           if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then
+             func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+             func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
+           fi
+         fi
+       fi
+
+       if test -n "$export_symbols" && test -n "$include_expsyms"; then
+         tmp_export_symbols="$export_symbols"
+         test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
+         $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"'
+       fi
+
+       if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then
+         # The given exports_symbols file has to be filtered, so filter it.
+         func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
+         # FIXME: $output_objdir/$libname.filter potentially contains lots of
+         # 's' commands which not all seds can handle. GNU sed should be fine
+         # though. Also, the filter scales superlinearly with the number of
+         # global variables. join(1) would be nice here, but unfortunately
+         # isn't a blessed tool.
+         $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
+         func_append delfiles " $export_symbols $output_objdir/$libname.filter"
+         export_symbols=$output_objdir/$libname.def
+         $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
+       fi
+
+       tmp_deplibs=
+       for test_deplib in $deplibs; do
+         case " $convenience " in
+         *" $test_deplib "*) ;;
+         *)
+           func_append tmp_deplibs " $test_deplib"
+           ;;
+         esac
+       done
+       deplibs="$tmp_deplibs"
+
+       if test -n "$convenience"; then
+         if test -n "$whole_archive_flag_spec" &&
+           test "$compiler_needs_object" = yes &&
+           test -z "$libobjs"; then
+           # extract the archives, so we have objects to list.
+           # TODO: could optimize this to just extract one archive.
+           whole_archive_flag_spec=
+         fi
+         if test -n "$whole_archive_flag_spec"; then
+           save_libobjs=$libobjs
+           eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+           test "X$libobjs" = "X " && libobjs=
+         else
+           gentop="$output_objdir/${outputname}x"
+           func_append generated " $gentop"
+
+           func_extract_archives $gentop $convenience
+           func_append libobjs " $func_extract_archives_result"
+           test "X$libobjs" = "X " && libobjs=
+         fi
+       fi
+
+       if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+         eval flag=\"$thread_safe_flag_spec\"
+         func_append linker_flags " $flag"
+       fi
+
+       # Make a backup of the uninstalled library when relinking
+       if test "$opt_mode" = relink; then
+         $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $?
+       fi
+
+       # Do each of the archive commands.
+       if test "$module" = yes && test -n "$module_cmds" ; then
+         if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+           eval test_cmds=\"$module_expsym_cmds\"
+           cmds=$module_expsym_cmds
+         else
+           eval test_cmds=\"$module_cmds\"
+           cmds=$module_cmds
+         fi
+       else
+         if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+           eval test_cmds=\"$archive_expsym_cmds\"
+           cmds=$archive_expsym_cmds
+         else
+           eval test_cmds=\"$archive_cmds\"
+           cmds=$archive_cmds
+         fi
+       fi
+
+       if test "X$skipped_export" != "X:" &&
+          func_len " $test_cmds" &&
+          len=$func_len_result &&
+          test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+         :
+       else
+         # The command line is too long to link in one step, link piecewise
+         # or, if using GNU ld and skipped_export is not :, use a linker
+         # script.
+
+         # Save the value of $output and $libobjs because we want to
+         # use them later.  If we have whole_archive_flag_spec, we
+         # want to use save_libobjs as it was before
+         # whole_archive_flag_spec was expanded, because we can't
+         # assume the linker understands whole_archive_flag_spec.
+         # This may have to be revisited, in case too many
+         # convenience libraries get linked in and end up exceeding
+         # the spec.
+         if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then
+           save_libobjs=$libobjs
+         fi
+         save_output=$output
+         func_basename "$output"
+         output_la=$func_basename_result
+
+         # Clear the reloadable object creation command queue and
+         # initialize k to one.
+         test_cmds=
+         concat_cmds=
+         objlist=
+         last_robj=
+         k=1
+
+         if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then
+           output=${output_objdir}/${output_la}.lnkscript
+           func_verbose "creating GNU ld script: $output"
+           echo 'INPUT (' > $output
+           for obj in $save_libobjs
+           do
+             func_to_tool_file "$obj"
+             $ECHO "$func_to_tool_file_result" >> $output
+           done
+           echo ')' >> $output
+           func_append delfiles " $output"
+           func_to_tool_file "$output"
+           output=$func_to_tool_file_result
+         elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then
+           output=${output_objdir}/${output_la}.lnk
+           func_verbose "creating linker input file list: $output"
+           : > $output
+           set x $save_libobjs
+           shift
+           firstobj=
+           if test "$compiler_needs_object" = yes; then
+             firstobj="$1 "
+             shift
+           fi
+           for obj
+           do
+             func_to_tool_file "$obj"
+             $ECHO "$func_to_tool_file_result" >> $output
+           done
+           func_append delfiles " $output"
+           func_to_tool_file "$output"
+           output=$firstobj\"$file_list_spec$func_to_tool_file_result\"
+         else
+           if test -n "$save_libobjs"; then
+             func_verbose "creating reloadable object files..."
+             output=$output_objdir/$output_la-${k}.$objext
+             eval test_cmds=\"$reload_cmds\"
+             func_len " $test_cmds"
+             len0=$func_len_result
+             len=$len0
+
+             # Loop over the list of objects to be linked.
+             for obj in $save_libobjs
+             do
+               func_len " $obj"
+               func_arith $len + $func_len_result
+               len=$func_arith_result
+               if test "X$objlist" = X ||
+                  test "$len" -lt "$max_cmd_len"; then
+                 func_append objlist " $obj"
+               else
+                 # The command $test_cmds is almost too long, add a
+                 # command to the queue.
+                 if test "$k" -eq 1 ; then
+                   # The first file doesn't have a previous command to add.
+                   reload_objs=$objlist
+                   eval concat_cmds=\"$reload_cmds\"
+                 else
+                   # All subsequent reloadable object files will link in
+                   # the last one created.
+                   reload_objs="$objlist $last_robj"
+                   eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\"
+                 fi
+                 last_robj=$output_objdir/$output_la-${k}.$objext
+                 func_arith $k + 1
+                 k=$func_arith_result
+                 output=$output_objdir/$output_la-${k}.$objext
+                 objlist=" $obj"
+                 func_len " $last_robj"
+                 func_arith $len0 + $func_len_result
+                 len=$func_arith_result
+               fi
+             done
+             # Handle the remaining objects by creating one last
+             # reloadable object file.  All subsequent reloadable object
+             # files will link in the last one created.
+             test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+             reload_objs="$objlist $last_robj"
+             eval concat_cmds=\"\${concat_cmds}$reload_cmds\"
+             if test -n "$last_robj"; then
+               eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\"
+             fi
+             func_append delfiles " $output"
+
+           else
+             output=
+           fi
+
+           if ${skipped_export-false}; then
+             func_verbose "generating symbol list for \`$libname.la'"
+             export_symbols="$output_objdir/$libname.exp"
+             $opt_dry_run || $RM $export_symbols
+             libobjs=$output
+             # Append the command to create the export file.
+             test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+             eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\"
+             if test -n "$last_robj"; then
+               eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\"
+             fi
+           fi
+
+           test -n "$save_libobjs" &&
+             func_verbose "creating a temporary reloadable object file: $output"
+
+           # Loop through the commands generated above and execute them.
+           save_ifs="$IFS"; IFS='~'
+           for cmd in $concat_cmds; do
+             IFS="$save_ifs"
+             $opt_silent || {
+                 func_quote_for_expand "$cmd"
+                 eval "func_echo $func_quote_for_expand_result"
+             }
+             $opt_dry_run || eval "$cmd" || {
+               lt_exit=$?
+
+               # Restore the uninstalled library and exit
+               if test "$opt_mode" = relink; then
+                 ( cd "$output_objdir" && \
+                   $RM "${realname}T" && \
+                   $MV "${realname}U" "$realname" )
+               fi
+
+               exit $lt_exit
+             }
+           done
+           IFS="$save_ifs"
+
+           if test -n "$export_symbols_regex" && ${skipped_export-false}; then
+             func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+             func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
+           fi
+         fi
+
+          if ${skipped_export-false}; then
+           if test -n "$export_symbols" && test -n "$include_expsyms"; then
+             tmp_export_symbols="$export_symbols"
+             test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
+             $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"'
+           fi
+
+           if test -n "$orig_export_symbols"; then
+             # The given exports_symbols file has to be filtered, so filter it.
+             func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
+             # FIXME: $output_objdir/$libname.filter potentially contains lots of
+             # 's' commands which not all seds can handle. GNU sed should be fine
+             # though. Also, the filter scales superlinearly with the number of
+             # global variables. join(1) would be nice here, but unfortunately
+             # isn't a blessed tool.
+             $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
+             func_append delfiles " $export_symbols $output_objdir/$libname.filter"
+             export_symbols=$output_objdir/$libname.def
+             $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
+           fi
+         fi
+
+         libobjs=$output
+         # Restore the value of output.
+         output=$save_output
+
+         if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then
+           eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+           test "X$libobjs" = "X " && libobjs=
+         fi
+         # Expand the library linking commands again to reset the
+         # value of $libobjs for piecewise linking.
+
+         # Do each of the archive commands.
+         if test "$module" = yes && test -n "$module_cmds" ; then
+           if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+             cmds=$module_expsym_cmds
+           else
+             cmds=$module_cmds
+           fi
+         else
+           if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+             cmds=$archive_expsym_cmds
+           else
+             cmds=$archive_cmds
+           fi
+         fi
+       fi
+
+       if test -n "$delfiles"; then
+         # Append the command to remove temporary files to $cmds.
+         eval cmds=\"\$cmds~\$RM $delfiles\"
+       fi
+
+       # Add any objects from preloaded convenience libraries
+       if test -n "$dlprefiles"; then
+         gentop="$output_objdir/${outputname}x"
+         func_append generated " $gentop"
+
+         func_extract_archives $gentop $dlprefiles
+         func_append libobjs " $func_extract_archives_result"
+         test "X$libobjs" = "X " && libobjs=
+       fi
+
+       save_ifs="$IFS"; IFS='~'
+       for cmd in $cmds; do
+         IFS="$save_ifs"
+         eval cmd=\"$cmd\"
+         $opt_silent || {
+           func_quote_for_expand "$cmd"
+           eval "func_echo $func_quote_for_expand_result"
+         }
+         $opt_dry_run || eval "$cmd" || {
+           lt_exit=$?
+
+           # Restore the uninstalled library and exit
+           if test "$opt_mode" = relink; then
+             ( cd "$output_objdir" && \
+               $RM "${realname}T" && \
+               $MV "${realname}U" "$realname" )
+           fi
+
+           exit $lt_exit
+         }
+       done
+       IFS="$save_ifs"
+
+       # Restore the uninstalled library and exit
+       if test "$opt_mode" = relink; then
+         $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $?
+
+         if test -n "$convenience"; then
+           if test -z "$whole_archive_flag_spec"; then
+             func_show_eval '${RM}r "$gentop"'
+           fi
+         fi
+
+         exit $EXIT_SUCCESS
+       fi
+
+       # Create links to the real library.
+       for linkname in $linknames; do
+         if test "$realname" != "$linkname"; then
+           func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?'
+         fi
+       done
+
+       # If -module or -export-dynamic was specified, set the dlname.
+       if test "$module" = yes || test "$export_dynamic" = yes; then
+         # On all known operating systems, these are identical.
+         dlname="$soname"
+       fi
+      fi
+      ;;
+
+    obj)
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+       func_warning "\`-dlopen' is ignored for objects"
+      fi
+
+      case " $deplibs" in
+      *\ -l* | *\ -L*)
+       func_warning "\`-l' and \`-L' are ignored for objects" ;;
+      esac
+
+      test -n "$rpath" && \
+       func_warning "\`-rpath' is ignored for objects"
+
+      test -n "$xrpath" && \
+       func_warning "\`-R' is ignored for objects"
+
+      test -n "$vinfo" && \
+       func_warning "\`-version-info' is ignored for objects"
+
+      test -n "$release" && \
+       func_warning "\`-release' is ignored for objects"
+
+      case $output in
+      *.lo)
+       test -n "$objs$old_deplibs" && \
+         func_fatal_error "cannot build library object \`$output' from non-libtool objects"
+
+       libobj=$output
+       func_lo2o "$libobj"
+       obj=$func_lo2o_result
+       ;;
+      *)
+       libobj=
+       obj="$output"
+       ;;
+      esac
+
+      # Delete the old objects.
+      $opt_dry_run || $RM $obj $libobj
+
+      # Objects from convenience libraries.  This assumes
+      # single-version convenience libraries.  Whenever we create
+      # different ones for PIC/non-PIC, this we'll have to duplicate
+      # the extraction.
+      reload_conv_objs=
+      gentop=
+      # reload_cmds runs $LD directly, so let us get rid of
+      # -Wl from whole_archive_flag_spec and hope we can get by with
+      # turning comma into space..
+      wl=
+
+      if test -n "$convenience"; then
+       if test -n "$whole_archive_flag_spec"; then
+         eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\"
+         reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'`
+       else
+         gentop="$output_objdir/${obj}x"
+         func_append generated " $gentop"
+
+         func_extract_archives $gentop $convenience
+         reload_conv_objs="$reload_objs $func_extract_archives_result"
+       fi
+      fi
+
+      # If we're not building shared, we need to use non_pic_objs
+      test "$build_libtool_libs" != yes && libobjs="$non_pic_objects"
+
+      # Create the old-style object.
+      reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+      output="$obj"
+      func_execute_cmds "$reload_cmds" 'exit $?'
+
+      # Exit if we aren't doing a library object file.
+      if test -z "$libobj"; then
+       if test -n "$gentop"; then
+         func_show_eval '${RM}r "$gentop"'
+       fi
+
+       exit $EXIT_SUCCESS
+      fi
+
+      if test "$build_libtool_libs" != yes; then
+       if test -n "$gentop"; then
+         func_show_eval '${RM}r "$gentop"'
+       fi
+
+       # Create an invalid libtool object if no PIC, so that we don't
+       # accidentally link it into a program.
+       # $show "echo timestamp > $libobj"
+       # $opt_dry_run || eval "echo timestamp > $libobj" || exit $?
+       exit $EXIT_SUCCESS
+      fi
+
+      if test -n "$pic_flag" || test "$pic_mode" != default; then
+       # Only do commands if we really have different PIC objects.
+       reload_objs="$libobjs $reload_conv_objs"
+       output="$libobj"
+       func_execute_cmds "$reload_cmds" 'exit $?'
+      fi
+
+      if test -n "$gentop"; then
+       func_show_eval '${RM}r "$gentop"'
+      fi
+
+      exit $EXIT_SUCCESS
+      ;;
+
+    prog)
+      case $host in
+       *cygwin*) func_stripname '' '.exe' "$output"
+                 output=$func_stripname_result.exe;;
+      esac
+      test -n "$vinfo" && \
+       func_warning "\`-version-info' is ignored for programs"
+
+      test -n "$release" && \
+       func_warning "\`-release' is ignored for programs"
+
+      test "$preload" = yes \
+        && test "$dlopen_support" = unknown \
+       && test "$dlopen_self" = unknown \
+       && test "$dlopen_self_static" = unknown && \
+         func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support."
+
+      case $host in
+      *-*-rhapsody* | *-*-darwin1.[012])
+       # On Rhapsody replace the C library is the System framework
+       compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'`
+       finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'`
+       ;;
+      esac
+
+      case $host in
+      *-*-darwin*)
+       # Don't allow lazy linking, it breaks C++ global constructors
+       # But is supposedly fixed on 10.4 or later (yay!).
+       if test "$tagname" = CXX ; then
+         case ${MACOSX_DEPLOYMENT_TARGET-10.0} in
+           10.[0123])
+             func_append compile_command " ${wl}-bind_at_load"
+             func_append finalize_command " ${wl}-bind_at_load"
+           ;;
+         esac
+       fi
+       # Time to change all our "foo.ltframework" stuff back to "-framework foo"
+       compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+       finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
+       ;;
+      esac
+
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+       case " $new_libs " in
+       *" -L$path/$objdir "*) ;;
+       *)
+         case " $compile_deplibs " in
+         *" -L$path/$objdir "*)
+           func_append new_libs " -L$path/$objdir" ;;
+         esac
+         ;;
+       esac
+      done
+      for deplib in $compile_deplibs; do
+       case $deplib in
+       -L*)
+         case " $new_libs " in
+         *" $deplib "*) ;;
+         *) func_append new_libs " $deplib" ;;
+         esac
+         ;;
+       *) func_append new_libs " $deplib" ;;
+       esac
+      done
+      compile_deplibs="$new_libs"
+
+
+      func_append compile_command " $compile_deplibs"
+      func_append finalize_command " $finalize_deplibs"
+
+      if test -n "$rpath$xrpath"; then
+       # If the user specified any rpath flags, then add them.
+       for libdir in $rpath $xrpath; do
+         # This is the magic to use -rpath.
+         case "$finalize_rpath " in
+         *" $libdir "*) ;;
+         *) func_append finalize_rpath " $libdir" ;;
+         esac
+       done
+      fi
+
+      # Now hardcode the library paths
+      rpath=
+      hardcode_libdirs=
+      for libdir in $compile_rpath $finalize_rpath; do
+       if test -n "$hardcode_libdir_flag_spec"; then
+         if test -n "$hardcode_libdir_separator"; then
+           if test -z "$hardcode_libdirs"; then
+             hardcode_libdirs="$libdir"
+           else
+             # Just accumulate the unique libdirs.
+             case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+             *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+               ;;
+             *)
+               func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
+               ;;
+             esac
+           fi
+         else
+           eval flag=\"$hardcode_libdir_flag_spec\"
+           func_append rpath " $flag"
+         fi
+       elif test -n "$runpath_var"; then
+         case "$perm_rpath " in
+         *" $libdir "*) ;;
+         *) func_append perm_rpath " $libdir" ;;
+         esac
+       fi
+       case $host in
+       *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
+         testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'`
+         case :$dllsearchpath: in
+         *":$libdir:"*) ;;
+         ::) dllsearchpath=$libdir;;
+         *) func_append dllsearchpath ":$libdir";;
+         esac
+         case :$dllsearchpath: in
+         *":$testbindir:"*) ;;
+         ::) dllsearchpath=$testbindir;;
+         *) func_append dllsearchpath ":$testbindir";;
+         esac
+         ;;
+       esac
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+        test -n "$hardcode_libdirs"; then
+       libdir="$hardcode_libdirs"
+       eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      compile_rpath="$rpath"
+
+      rpath=
+      hardcode_libdirs=
+      for libdir in $finalize_rpath; do
+       if test -n "$hardcode_libdir_flag_spec"; then
+         if test -n "$hardcode_libdir_separator"; then
+           if test -z "$hardcode_libdirs"; then
+             hardcode_libdirs="$libdir"
+           else
+             # Just accumulate the unique libdirs.
+             case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+             *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+               ;;
+             *)
+               func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
+               ;;
+             esac
+           fi
+         else
+           eval flag=\"$hardcode_libdir_flag_spec\"
+           func_append rpath " $flag"
+         fi
+       elif test -n "$runpath_var"; then
+         case "$finalize_perm_rpath " in
+         *" $libdir "*) ;;
+         *) func_append finalize_perm_rpath " $libdir" ;;
+         esac
+       fi
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+        test -n "$hardcode_libdirs"; then
+       libdir="$hardcode_libdirs"
+       eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      finalize_rpath="$rpath"
+
+      if test -n "$libobjs" && test "$build_old_libs" = yes; then
+       # Transform all the library objects into standard objects.
+       compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP`
+       finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP`
+      fi
+
+      func_generate_dlsyms "$outputname" "@PROGRAM@" "no"
+
+      # template prelinking step
+      if test -n "$prelink_cmds"; then
+       func_execute_cmds "$prelink_cmds" 'exit $?'
+      fi
+
+      wrappers_required=yes
+      case $host in
+      *cegcc* | *mingw32ce*)
+        # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway.
+        wrappers_required=no
+        ;;
+      *cygwin* | *mingw* )
+        if test "$build_libtool_libs" != yes; then
+          wrappers_required=no
+        fi
+        ;;
+      *)
+        if test "$need_relink" = no || test "$build_libtool_libs" != yes; then
+          wrappers_required=no
+        fi
+        ;;
+      esac
+      if test "$wrappers_required" = no; then
+       # Replace the output file specification.
+       compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'`
+       link_command="$compile_command$compile_rpath"
+
+       # We have no uninstalled library dependencies, so finalize right now.
+       exit_status=0
+       func_show_eval "$link_command" 'exit_status=$?'
+
+       if test -n "$postlink_cmds"; then
+         func_to_tool_file "$output"
+         postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
+         func_execute_cmds "$postlink_cmds" 'exit $?'
+       fi
+
+       # Delete the generated files.
+       if test -f "$output_objdir/${outputname}S.${objext}"; then
+         func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"'
+       fi
+
+       exit $exit_status
+      fi
+
+      if test -n "$compile_shlibpath$finalize_shlibpath"; then
+       compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+      fi
+      if test -n "$finalize_shlibpath"; then
+       finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+      fi
+
+      compile_var=
+      finalize_var=
+      if test -n "$runpath_var"; then
+       if test -n "$perm_rpath"; then
+         # We should set the runpath_var.
+         rpath=
+         for dir in $perm_rpath; do
+           func_append rpath "$dir:"
+         done
+         compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+       fi
+       if test -n "$finalize_perm_rpath"; then
+         # We should set the runpath_var.
+         rpath=
+         for dir in $finalize_perm_rpath; do
+           func_append rpath "$dir:"
+         done
+         finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+       fi
+      fi
+
+      if test "$no_install" = yes; then
+       # We don't need to create a wrapper script.
+       link_command="$compile_var$compile_command$compile_rpath"
+       # Replace the output file specification.
+       link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'`
+       # Delete the old output file.
+       $opt_dry_run || $RM $output
+       # Link the executable and exit
+       func_show_eval "$link_command" 'exit $?'
+
+       if test -n "$postlink_cmds"; then
+         func_to_tool_file "$output"
+         postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
+         func_execute_cmds "$postlink_cmds" 'exit $?'
+       fi
+
+       exit $EXIT_SUCCESS
+      fi
+
+      if test "$hardcode_action" = relink; then
+       # Fast installation is not supported
+       link_command="$compile_var$compile_command$compile_rpath"
+       relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+       func_warning "this platform does not like uninstalled shared libraries"
+       func_warning "\`$output' will be relinked during installation"
+      else
+       if test "$fast_install" != no; then
+         link_command="$finalize_var$compile_command$finalize_rpath"
+         if test "$fast_install" = yes; then
+           relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'`
+         else
+           # fast_install is set to needless
+           relink_command=
+         fi
+       else
+         link_command="$compile_var$compile_command$compile_rpath"
+         relink_command="$finalize_var$finalize_command$finalize_rpath"
+       fi
+      fi
+
+      # Replace the output file specification.
+      link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+      # Delete the old output files.
+      $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+      func_show_eval "$link_command" 'exit $?'
+
+      if test -n "$postlink_cmds"; then
+       func_to_tool_file "$output_objdir/$outputname"
+       postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
+       func_execute_cmds "$postlink_cmds" 'exit $?'
+      fi
+
+      # Now create the wrapper script.
+      func_verbose "creating $output"
+
+      # Quote the relink command for shipping.
+      if test -n "$relink_command"; then
+       # Preserve any variables that may affect compiler behavior
+       for var in $variables_saved_for_relink; do
+         if eval test -z \"\${$var+set}\"; then
+           relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
+         elif eval var_value=\$$var; test -z "$var_value"; then
+           relink_command="$var=; export $var; $relink_command"
+         else
+           func_quote_for_eval "$var_value"
+           relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
+         fi
+       done
+       relink_command="(cd `pwd`; $relink_command)"
+       relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"`
+      fi
+
+      # Only actually do things if not in dry run mode.
+      $opt_dry_run || {
+       # win32 will think the script is a binary if it has
+       # a .exe suffix, so we strip it off here.
+       case $output in
+         *.exe) func_stripname '' '.exe' "$output"
+                output=$func_stripname_result ;;
+       esac
+       # test for cygwin because mv fails w/o .exe extensions
+       case $host in
+         *cygwin*)
+           exeext=.exe
+           func_stripname '' '.exe' "$outputname"
+           outputname=$func_stripname_result ;;
+         *) exeext= ;;
+       esac
+       case $host in
+         *cygwin* | *mingw* )
+           func_dirname_and_basename "$output" "" "."
+           output_name=$func_basename_result
+           output_path=$func_dirname_result
+           cwrappersource="$output_path/$objdir/lt-$output_name.c"
+           cwrapper="$output_path/$output_name.exe"
+           $RM $cwrappersource $cwrapper
+           trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15
+
+           func_emit_cwrapperexe_src > $cwrappersource
+
+           # The wrapper executable is built using the $host compiler,
+           # because it contains $host paths and files. If cross-
+           # compiling, it, like the target executable, must be
+           # executed on the $host or under an emulation environment.
+           $opt_dry_run || {
+             $LTCC $LTCFLAGS -o $cwrapper $cwrappersource
+             $STRIP $cwrapper
+           }
+
+           # Now, create the wrapper script for func_source use:
+           func_ltwrapper_scriptname $cwrapper
+           $RM $func_ltwrapper_scriptname_result
+           trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15
+           $opt_dry_run || {
+             # note: this script will not be executed, so do not chmod.
+             if test "x$build" = "x$host" ; then
+               $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result
+             else
+               func_emit_wrapper no > $func_ltwrapper_scriptname_result
+             fi
+           }
+         ;;
+         * )
+           $RM $output
+           trap "$RM $output; exit $EXIT_FAILURE" 1 2 15
+
+           func_emit_wrapper no > $output
+           chmod +x $output
+         ;;
+       esac
+      }
+      exit $EXIT_SUCCESS
+      ;;
+    esac
+
+    # See if we need to build an old-fashioned archive.
+    for oldlib in $oldlibs; do
+
+      if test "$build_libtool_libs" = convenience; then
+       oldobjs="$libobjs_save $symfileobj"
+       addlibs="$convenience"
+       build_libtool_libs=no
+      else
+       if test "$build_libtool_libs" = module; then
+         oldobjs="$libobjs_save"
+         build_libtool_libs=no
+       else
+         oldobjs="$old_deplibs $non_pic_objects"
+         if test "$preload" = yes && test -f "$symfileobj"; then
+           func_append oldobjs " $symfileobj"
+         fi
+       fi
+       addlibs="$old_convenience"
+      fi
+
+      if test -n "$addlibs"; then
+       gentop="$output_objdir/${outputname}x"
+       func_append generated " $gentop"
+
+       func_extract_archives $gentop $addlibs
+       func_append oldobjs " $func_extract_archives_result"
+      fi
+
+      # Do each command in the archive commands.
+      if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+       cmds=$old_archive_from_new_cmds
+      else
+
+       # Add any objects from preloaded convenience libraries
+       if test -n "$dlprefiles"; then
+         gentop="$output_objdir/${outputname}x"
+         func_append generated " $gentop"
+
+         func_extract_archives $gentop $dlprefiles
+         func_append oldobjs " $func_extract_archives_result"
+       fi
+
+       # POSIX demands no paths to be encoded in archives.  We have
+       # to avoid creating archives with duplicate basenames if we
+       # might have to extract them afterwards, e.g., when creating a
+       # static archive out of a convenience library, or when linking
+       # the entirety of a libtool archive into another (currently
+       # not supported by libtool).
+       if (for obj in $oldobjs
+           do
+             func_basename "$obj"
+             $ECHO "$func_basename_result"
+           done | sort | sort -uc >/dev/null 2>&1); then
+         :
+       else
+         echo "copying selected object files to avoid basename conflicts..."
+         gentop="$output_objdir/${outputname}x"
+         func_append generated " $gentop"
+         func_mkdir_p "$gentop"
+         save_oldobjs=$oldobjs
+         oldobjs=
+         counter=1
+         for obj in $save_oldobjs
+         do
+           func_basename "$obj"
+           objbase="$func_basename_result"
+           case " $oldobjs " in
+           " ") oldobjs=$obj ;;
+           *[\ /]"$objbase "*)
+             while :; do
+               # Make sure we don't pick an alternate name that also
+               # overlaps.
+               newobj=lt$counter-$objbase
+               func_arith $counter + 1
+               counter=$func_arith_result
+               case " $oldobjs " in
+               *[\ /]"$newobj "*) ;;
+               *) if test ! -f "$gentop/$newobj"; then break; fi ;;
+               esac
+             done
+             func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj"
+             func_append oldobjs " $gentop/$newobj"
+             ;;
+           *) func_append oldobjs " $obj" ;;
+           esac
+         done
+       fi
+       func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
+       tool_oldlib=$func_to_tool_file_result
+       eval cmds=\"$old_archive_cmds\"
+
+       func_len " $cmds"
+       len=$func_len_result
+       if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+         cmds=$old_archive_cmds
+       elif test -n "$archiver_list_spec"; then
+         func_verbose "using command file archive linking..."
+         for obj in $oldobjs
+         do
+           func_to_tool_file "$obj"
+           $ECHO "$func_to_tool_file_result"
+         done > $output_objdir/$libname.libcmd
+         func_to_tool_file "$output_objdir/$libname.libcmd"
+         oldobjs=" $archiver_list_spec$func_to_tool_file_result"
+         cmds=$old_archive_cmds
+       else
+         # the command line is too long to link in one step, link in parts
+         func_verbose "using piecewise archive linking..."
+         save_RANLIB=$RANLIB
+         RANLIB=:
+         objlist=
+         concat_cmds=
+         save_oldobjs=$oldobjs
+         oldobjs=
+         # Is there a better way of finding the last object in the list?
+         for obj in $save_oldobjs
+         do
+           last_oldobj=$obj
+         done
+         eval test_cmds=\"$old_archive_cmds\"
+         func_len " $test_cmds"
+         len0=$func_len_result
+         len=$len0
+         for obj in $save_oldobjs
+         do
+           func_len " $obj"
+           func_arith $len + $func_len_result
+           len=$func_arith_result
+           func_append objlist " $obj"
+           if test "$len" -lt "$max_cmd_len"; then
+             :
+           else
+             # the above command should be used before it gets too long
+             oldobjs=$objlist
+             if test "$obj" = "$last_oldobj" ; then
+               RANLIB=$save_RANLIB
+             fi
+             test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+             eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\"
+             objlist=
+             len=$len0
+           fi
+         done
+         RANLIB=$save_RANLIB
+         oldobjs=$objlist
+         if test "X$oldobjs" = "X" ; then
+           eval cmds=\"\$concat_cmds\"
+         else
+           eval cmds=\"\$concat_cmds~\$old_archive_cmds\"
+         fi
+       fi
+      fi
+      func_execute_cmds "$cmds" 'exit $?'
+    done
+
+    test -n "$generated" && \
+      func_show_eval "${RM}r$generated"
+
+    # Now create the libtool archive.
+    case $output in
+    *.la)
+      old_library=
+      test "$build_old_libs" = yes && old_library="$libname.$libext"
+      func_verbose "creating $output"
+
+      # Preserve any variables that may affect compiler behavior
+      for var in $variables_saved_for_relink; do
+       if eval test -z \"\${$var+set}\"; then
+         relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
+       elif eval var_value=\$$var; test -z "$var_value"; then
+         relink_command="$var=; export $var; $relink_command"
+       else
+         func_quote_for_eval "$var_value"
+         relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
+       fi
+      done
+      # Quote the link command for shipping.
+      relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)"
+      relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"`
+      if test "$hardcode_automatic" = yes ; then
+       relink_command=
+      fi
+
+      # Only create the output if not a dry run.
+      $opt_dry_run || {
+       for installed in no yes; do
+         if test "$installed" = yes; then
+           if test -z "$install_libdir"; then
+             break
+           fi
+           output="$output_objdir/$outputname"i
+           # Replace all uninstalled libtool libraries with the installed ones
+           newdependency_libs=
+           for deplib in $dependency_libs; do
+             case $deplib in
+             *.la)
+               func_basename "$deplib"
+               name="$func_basename_result"
+               func_resolve_sysroot "$deplib"
+               eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result`
+               test -z "$libdir" && \
+                 func_fatal_error "\`$deplib' is not a valid libtool archive"
+               func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name"
+               ;;
+             -L*)
+               func_stripname -L '' "$deplib"
+               func_replace_sysroot "$func_stripname_result"
+               func_append newdependency_libs " -L$func_replace_sysroot_result"
+               ;;
+             -R*)
+               func_stripname -R '' "$deplib"
+               func_replace_sysroot "$func_stripname_result"
+               func_append newdependency_libs " -R$func_replace_sysroot_result"
+               ;;
+             *) func_append newdependency_libs " $deplib" ;;
+             esac
+           done
+           dependency_libs="$newdependency_libs"
+           newdlfiles=
+
+           for lib in $dlfiles; do
+             case $lib in
+             *.la)
+               func_basename "$lib"
+               name="$func_basename_result"
+               eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+               test -z "$libdir" && \
+                 func_fatal_error "\`$lib' is not a valid libtool archive"
+               func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name"
+               ;;
+             *) func_append newdlfiles " $lib" ;;
+             esac
+           done
+           dlfiles="$newdlfiles"
+           newdlprefiles=
+           for lib in $dlprefiles; do
+             case $lib in
+             *.la)
+               # Only pass preopened files to the pseudo-archive (for
+               # eventual linking with the app. that links it) if we
+               # didn't already link the preopened objects directly into
+               # the library:
+               func_basename "$lib"
+               name="$func_basename_result"
+               eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+               test -z "$libdir" && \
+                 func_fatal_error "\`$lib' is not a valid libtool archive"
+               func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name"
+               ;;
+             esac
+           done
+           dlprefiles="$newdlprefiles"
+         else
+           newdlfiles=
+           for lib in $dlfiles; do
+             case $lib in
+               [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+               *) abs=`pwd`"/$lib" ;;
+             esac
+             func_append newdlfiles " $abs"
+           done
+           dlfiles="$newdlfiles"
+           newdlprefiles=
+           for lib in $dlprefiles; do
+             case $lib in
+               [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+               *) abs=`pwd`"/$lib" ;;
+             esac
+             func_append newdlprefiles " $abs"
+           done
+           dlprefiles="$newdlprefiles"
+         fi
+         $RM $output
+         # place dlname in correct position for cygwin
+         # In fact, it would be nice if we could use this code for all target
+         # systems that can't hard-code library paths into their executables
+         # and that have no shared library path variable independent of PATH,
+         # but it turns out we can't easily determine that from inspecting
+         # libtool variables, so we have to hard-code the OSs to which it
+         # applies here; at the moment, that means platforms that use the PE
+         # object format with DLL files.  See the long comment at the top of
+         # tests/bindir.at for full details.
+         tdlname=$dlname
+         case $host,$output,$installed,$module,$dlname in
+           *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll)
+             # If a -bindir argument was supplied, place the dll there.
+             if test "x$bindir" != x ;
+             then
+               func_relative_path "$install_libdir" "$bindir"
+               tdlname=$func_relative_path_result$dlname
+             else
+               # Otherwise fall back on heuristic.
+               tdlname=../bin/$dlname
+             fi
+             ;;
+         esac
+         $ECHO > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$tdlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Linker flags that can not go in dependency_libs.
+inherited_linker_flags='$new_inherited_linker_flags'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Names of additional weak libraries provided by this library
+weak_library_names='$weak_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Should we warn about portability when linking against -modules?
+shouldnotlink=$module
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+         if test "$installed" = no && test "$need_relink" = yes; then
+           $ECHO >> $output "\
+relink_command=\"$relink_command\""
+         fi
+       done
+      }
+
+      # Do a symbolic link so that the libtool archive can be found in
+      # LD_LIBRARY_PATH before the program is installed.
+      func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?'
+      ;;
+    esac
+    exit $EXIT_SUCCESS
+}
+
+{ test "$opt_mode" = link || test "$opt_mode" = relink; } &&
+    func_mode_link ${1+"$@"}
+
+
+# func_mode_uninstall arg...
+func_mode_uninstall ()
+{
+    $opt_debug
+    RM="$nonopt"
+    files=
+    rmforce=
+    exit_status=0
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    for arg
+    do
+      case $arg in
+      -f) func_append RM " $arg"; rmforce=yes ;;
+      -*) func_append RM " $arg" ;;
+      *) func_append files " $arg" ;;
+      esac
+    done
+
+    test -z "$RM" && \
+      func_fatal_help "you must specify an RM program"
+
+    rmdirs=
+
+    for file in $files; do
+      func_dirname "$file" "" "."
+      dir="$func_dirname_result"
+      if test "X$dir" = X.; then
+       odir="$objdir"
+      else
+       odir="$dir/$objdir"
+      fi
+      func_basename "$file"
+      name="$func_basename_result"
+      test "$opt_mode" = uninstall && odir="$dir"
+
+      # Remember odir for removal later, being careful to avoid duplicates
+      if test "$opt_mode" = clean; then
+       case " $rmdirs " in
+         *" $odir "*) ;;
+         *) func_append rmdirs " $odir" ;;
+       esac
+      fi
+
+      # Don't error if the file doesn't exist and rm -f was used.
+      if { test -L "$file"; } >/dev/null 2>&1 ||
+        { test -h "$file"; } >/dev/null 2>&1 ||
+        test -f "$file"; then
+       :
+      elif test -d "$file"; then
+       exit_status=1
+       continue
+      elif test "$rmforce" = yes; then
+       continue
+      fi
+
+      rmfiles="$file"
+
+      case $name in
+      *.la)
+       # Possibly a libtool archive, so verify it.
+       if func_lalib_p "$file"; then
+         func_source $dir/$name
+
+         # Delete the libtool libraries and symlinks.
+         for n in $library_names; do
+           func_append rmfiles " $odir/$n"
+         done
+         test -n "$old_library" && func_append rmfiles " $odir/$old_library"
+
+         case "$opt_mode" in
+         clean)
+           case " $library_names " in
+           *" $dlname "*) ;;
+           *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;;
+           esac
+           test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i"
+           ;;
+         uninstall)
+           if test -n "$library_names"; then
+             # Do each command in the postuninstall commands.
+             func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
+           fi
+
+           if test -n "$old_library"; then
+             # Do each command in the old_postuninstall commands.
+             func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
+           fi
+           # FIXME: should reinstall the best remaining shared library.
+           ;;
+         esac
+       fi
+       ;;
+
+      *.lo)
+       # Possibly a libtool object, so verify it.
+       if func_lalib_p "$file"; then
+
+         # Read the .lo file
+         func_source $dir/$name
+
+         # Add PIC object to the list of files to remove.
+         if test -n "$pic_object" &&
+            test "$pic_object" != none; then
+           func_append rmfiles " $dir/$pic_object"
+         fi
+
+         # Add non-PIC object to the list of files to remove.
+         if test -n "$non_pic_object" &&
+            test "$non_pic_object" != none; then
+           func_append rmfiles " $dir/$non_pic_object"
+         fi
+       fi
+       ;;
+
+      *)
+       if test "$opt_mode" = clean ; then
+         noexename=$name
+         case $file in
+         *.exe)
+           func_stripname '' '.exe' "$file"
+           file=$func_stripname_result
+           func_stripname '' '.exe' "$name"
+           noexename=$func_stripname_result
+           # $file with .exe has already been added to rmfiles,
+           # add $file without .exe
+           func_append rmfiles " $file"
+           ;;
+         esac
+         # Do a test to see if this is a libtool program.
+         if func_ltwrapper_p "$file"; then
+           if func_ltwrapper_executable_p "$file"; then
+             func_ltwrapper_scriptname "$file"
+             relink_command=
+             func_source $func_ltwrapper_scriptname_result
+             func_append rmfiles " $func_ltwrapper_scriptname_result"
+           else
+             relink_command=
+             func_source $dir/$noexename
+           fi
+
+           # note $name still contains .exe if it was in $file originally
+           # as does the version of $file that was added into $rmfiles
+           func_append rmfiles " $odir/$name $odir/${name}S.${objext}"
+           if test "$fast_install" = yes && test -n "$relink_command"; then
+             func_append rmfiles " $odir/lt-$name"
+           fi
+           if test "X$noexename" != "X$name" ; then
+             func_append rmfiles " $odir/lt-${noexename}.c"
+           fi
+         fi
+       fi
+       ;;
+      esac
+      func_show_eval "$RM $rmfiles" 'exit_status=1'
+    done
+
+    # Try to remove the ${objdir}s in the directories where we deleted files
+    for dir in $rmdirs; do
+      if test -d "$dir"; then
+       func_show_eval "rmdir $dir >/dev/null 2>&1"
+      fi
+    done
+
+    exit $exit_status
+}
+
+{ test "$opt_mode" = uninstall || test "$opt_mode" = clean; } &&
+    func_mode_uninstall ${1+"$@"}
+
+test -z "$opt_mode" && {
+  help="$generic_help"
+  func_fatal_help "you must specify a MODE"
+}
+
+test -z "$exec_cmd" && \
+  func_fatal_help "invalid operation mode \`$opt_mode'"
+
+if test -n "$exec_cmd"; then
+  eval exec "$exec_cmd"
+  exit $EXIT_FAILURE
+fi
+
+exit $exit_status
+
+
+# The TAGs below are defined such that we never get into a situation
+# in which we disable both kinds of libraries.  Given conflicting
+# choices, we go for a static library, that is the most portable,
+# since we can't tell whether shared libraries were disabled because
+# the user asked for that or because the platform doesn't support
+# them.  This is particularly important on AIX, because we don't
+# support having both static and shared libraries enabled at the same
+# time on that platform, so we default to a shared-only configuration.
+# If a disable-shared tag is given, we'll fallback to a static-only
+# configuration.  But we'll never go from static-only to shared-only.
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-shared
+build_libtool_libs=no
+build_old_libs=yes
+# ### END LIBTOOL TAG CONFIG: disable-shared
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-static
+build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac`
+# ### END LIBTOOL TAG CONFIG: disable-static
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
+# vi:sw=2
+
diff --git a/memory.c b/memory.c

new file mode 100644 (file)

index 0000000..42a9834
--- /dev/null
+++ b/memory.c
@@ -0,0 +1,136 @@
+/* Memory allocation routines.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h> /* for malloc, realloc, free */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void * (*__gmp_allocate_func) __GMP_PROTO ((size_t)) = __gmp_default_allocate;
+void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t))
+     = __gmp_default_reallocate;
+void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t)) = __gmp_default_free;
+
+
+/* Default allocation functions.  In case of failure to allocate/reallocate
+   an error message is written to stderr and the program aborts.  */
+
+void *
+__gmp_default_allocate (size_t size)
+{
+  void *ret;
+#ifdef DEBUG
+  size_t req_size = size;
+  size += 2 * BYTES_PER_MP_LIMB;
+#endif
+  ret = malloc (size);
+  if (ret == 0)
+    {
+      fprintf (stderr, "GNU MP: Cannot allocate memory (size=%lu)\n", (long) size);
+      abort ();
+    }
+
+#ifdef DEBUG
+  {
+    mp_ptr p = ret;
+    p++;
+    p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+    if (req_size % BYTES_PER_MP_LIMB == 0)
+      p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+    ret = p;
+  }
+#endif
+  return ret;
+}
+
+void *
+__gmp_default_reallocate (void *oldptr, size_t old_size, size_t new_size)
+{
+  void *ret;
+
+#ifdef DEBUG
+  size_t req_size = new_size;
+
+  if (old_size != 0)
+    {
+      mp_ptr p = oldptr;
+      if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+       {
+         fprintf (stderr, "gmp: (realloc) data clobbered before allocation block\n");
+         abort ();
+       }
+      if (old_size % BYTES_PER_MP_LIMB == 0)
+       if (p[old_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+         {
+           fprintf (stderr, "gmp: (realloc) data clobbered after allocation block\n");
+           abort ();
+         }
+      oldptr = p - 1;
+    }
+
+  new_size += 2 * BYTES_PER_MP_LIMB;
+#endif
+
+  ret = realloc (oldptr, new_size);
+  if (ret == 0)
+    {
+      fprintf (stderr, "GNU MP: Cannot reallocate memory (old_size=%lu new_size=%lu)\n", (long) old_size, (long) new_size);
+      abort ();
+    }
+
+#ifdef DEBUG
+  {
+    mp_ptr p = ret;
+    p++;
+    p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+    if (req_size % BYTES_PER_MP_LIMB == 0)
+      p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+    ret = p;
+  }
+#endif
+  return ret;
+}
+
+void
+__gmp_default_free (void *blk_ptr, size_t blk_size)
+{
+#ifdef DEBUG
+  {
+    mp_ptr p = blk_ptr;
+    if (blk_size != 0)
+      {
+       if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+         {
+           fprintf (stderr, "gmp: (free) data clobbered before allocation block\n");
+           abort ();
+         }
+       if (blk_size % BYTES_PER_MP_LIMB == 0)
+         if (p[blk_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+           {
+             fprintf (stderr, "gmp: (free) data clobbered after allocation block\n");
+             abort ();
+           }
+      }
+    blk_ptr = p - 1;
+  }
+#endif
+  free (blk_ptr);
+}
diff --git a/missing b/missing

new file mode 100755 (executable)

index 0000000..c3a0147
--- /dev/null
+++ b/missing
@@ -0,0 +1,360 @@
+#! /bin/sh
+# Common stub for a few missing GNU programs while installing.
+
+scriptversion=2009-11-15.01
+
+# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003
+#   Free Software Foundation, Inc.
+# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try \`$0 --help' for more information"
+  exit 1
+fi
+
+run=:
+
+# In the cases where this matters, `missing' is being run in the
+# srcdir already.
+if test -f configure.ac; then
+  configure_ac=configure.ac
+else
+  configure_ac=configure.in
+fi
+
+msg="missing on your system"
+
+case "$1" in
+--run)
+  # Try to run requested program, and just exit if it succeeds.
+  run=
+  shift
+  "$@" && exit 0
+  # Exit code 63 means version mismatch.  This often happens
+  # when the user try to use an ancient version of a tool on
+  # a file that requires a minimum version.  In this case we
+  # we should proceed has if the program had been absent, or
+  # if --run hadn't been passed.
+  if test $? = 63; then
+    run=:
+    msg="probably too old"
+  fi
+  ;;
+esac
+
+# If it does not exist, or fails to run (possibly an outdated version),
+# try to emulate it.
+case "$1" in
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
+error status if there is no known handling for PROGRAM.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+  --run           try to run the given command, and emulate it if it fails
+
+Supported PROGRAM values:
+  aclocal      touch file \`aclocal.m4'
+  autoconf     touch file \`configure'
+  autoheader   touch file \`config.h.in'
+  automake     touch all \`Makefile.in' files
+  bison        create \`y.tab.[ch]', if possible, from existing .[ch]
+  flex         create \`lex.yy.c', if possible, from existing .c
+  help2man     touch the output file
+  lex          create \`lex.yy.c', if possible, from existing .c
+  makeinfo     touch the output file
+  tar          try tar, gnutar, gtar, then tar without non-portable flags
+  yacc         create \`y.tab.[ch]', if possible, from existing .[ch]
+
+Send bug reports to <bug-automake@gnu.org>."
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    ;;
+
+  -*)
+    echo 1>&2 "$0: Unknown \`$1' option"
+    echo 1>&2 "Try \`$0 --help' for more information"
+    exit 1
+    ;;
+
+  aclocal*)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`acinclude.m4' or \`${configure_ac}'.  You might want
+         to install the \`Automake' and \`Perl' packages.  Grab them from
+         any GNU archive site."
+    touch aclocal.m4
+    ;;
+
+  autoconf)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`${configure_ac}'.  You might want to install the
+         \`Autoconf' and \`GNU m4' packages.  Grab them from any GNU
+         archive site."
+    touch configure
+    ;;
+
+  autoheader)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`acconfig.h' or \`${configure_ac}'.  You might want
+         to install the \`Autoconf' and \`GNU m4' packages.  Grab them
+         from any GNU archive site."
+    files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
+    test -z "$files" && files="config.h"
+    touch_files=
+    for f in $files; do
+      case "$f" in
+      *:*) touch_files="$touch_files "`echo "$f" |
+                                      sed -e 's/^[^:]*://' -e 's/:.*//'`;;
+      *) touch_files="$touch_files $f.in";;
+      esac
+    done
+    touch $touch_files
+    ;;
+
+  automake*)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
+         You might want to install the \`Automake' and \`Perl' packages.
+         Grab them from any GNU archive site."
+    find . -type f -name Makefile.am -print |
+          sed 's/\.am$/.in/' |
+          while read f; do touch "$f"; done
+    ;;
+
+  autom4te)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is needed, but is $msg.
+         You might have modified some files without having the
+         proper tools for further handling them.
+         You can get \`$1' as part of \`Autoconf' from any GNU
+         archive site."
+
+    file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
+    test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
+    if test -f "$file"; then
+       touch $file
+    else
+       test -z "$file" || exec >$file
+       echo "#! /bin/sh"
+       echo "# Created by GNU Automake missing as a replacement of"
+       echo "#  $ $@"
+       echo "exit 0"
+       chmod +x $file
+       exit 1
+    fi
+    ;;
+
+  bison|yacc)
+    echo 1>&2 "\
+WARNING: \`$1' $msg.  You should only need it if
+         you modified a \`.y' file.  You may need the \`Bison' package
+         in order for those modifications to take effect.  You can get
+         \`Bison' from any GNU archive site."
+    rm -f y.tab.c y.tab.h
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+       case "$LASTARG" in
+       *.y)
+           SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
+           if [ -f "$SRCFILE" ]; then
+                cp "$SRCFILE" y.tab.c
+           fi
+           SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
+           if [ -f "$SRCFILE" ]; then
+                cp "$SRCFILE" y.tab.h
+           fi
+         ;;
+       esac
+    fi
+    if [ ! -f y.tab.h ]; then
+       echo >y.tab.h
+    fi
+    if [ ! -f y.tab.c ]; then
+       echo 'main() { return 0; }' >y.tab.c
+    fi
+    ;;
+
+  lex|flex)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified a \`.l' file.  You may need the \`Flex' package
+         in order for those modifications to take effect.  You can get
+         \`Flex' from any GNU archive site."
+    rm -f lex.yy.c
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+       case "$LASTARG" in
+       *.l)
+           SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
+           if [ -f "$SRCFILE" ]; then
+                cp "$SRCFILE" lex.yy.c
+           fi
+         ;;
+       esac
+    fi
+    if [ ! -f lex.yy.c ]; then
+       echo 'main() { return 0; }' >lex.yy.c
+    fi
+    ;;
+
+  help2man)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+        you modified a dependency of a manual page.  You may need the
+        \`Help2man' package in order for those modifications to take
+        effect.  You can get \`Help2man' from any GNU archive site."
+
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+       file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
+    fi
+    if [ -f "$file" ]; then
+       touch $file
+    else
+       test -z "$file" || exec >$file
+       echo ".ab help2man is required to generate this page"
+       exit 1
+    fi
+    ;;
+
+  makeinfo)
+    if test -z "$run" && (makeinfo --version) > /dev/null 2>&1; then
+       # We have makeinfo, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified a \`.texi' or \`.texinfo' file, or any other file
+         indirectly affecting the aspect of the manual.  The spurious
+         call might also be the consequence of using a buggy \`make' (AIX,
+         DU, IRIX).  You might want to install the \`Texinfo' package or
+         the \`GNU make' package.  Grab either from any GNU archive site."
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+      file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
+      file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
+    fi
+    touch $file
+    ;;
+
+  tar)
+    shift
+    if test -n "$run"; then
+      echo 1>&2 "ERROR: \`tar' requires --run"
+      exit 1
+    fi
+
+    # We have already tried tar in the generic part.
+    # Look for gnutar/gtar before invocation to avoid ugly error
+    # messages.
+    if (gnutar --version > /dev/null 2>&1); then
+       gnutar "$@" && exit 0
+    fi
+    if (gtar --version > /dev/null 2>&1); then
+       gtar "$@" && exit 0
+    fi
+    firstarg="$1"
+    if shift; then
+       case "$firstarg" in
+       *o*)
+           firstarg=`echo "$firstarg" | sed s/o//`
+           tar "$firstarg" "$@" && exit 0
+           ;;
+       esac
+       case "$firstarg" in
+       *h*)
+           firstarg=`echo "$firstarg" | sed s/h//`
+           tar "$firstarg" "$@" && exit 0
+           ;;
+       esac
+    fi
+
+    echo 1>&2 "\
+WARNING: I can't seem to be able to run \`tar' with the given arguments.
+         You may want to install GNU tar or Free paxutils, or check the
+         command line arguments."
+    exit 1
+    ;;
+
+  *)
+    echo 1>&2 "\
+WARNING: \`$1' is needed, and is $msg.
+         You might have modified some files without having the
+         proper tools for further handling them.  Check the \`README' file,
+         it often tells you about the needed prerequisites for installing
+         this package.  You may also peek at any GNU archive site, in case
+         some other package would contain this missing \`$1' program."
+    exit 1
+    ;;
+esac
+
+exit 0
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/mp-h.in b/mp-h.in

new file mode 100644 (file)

index 0000000..35bc34b
--- /dev/null
+++ b/mp-h.in
@@ -0,0 +1,164 @@
+/* mp-h.in -- Definitions for the GNU multiple precision library  -*-mode:c-*-
+   BSD mp compatible functions.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#ifndef __MP_H__
+
+
+/* The following (everything under ifndef __GNU_MP__) must be identical in
+   gmp.h and mp.h to allow both to be included in an application or during
+   the library build.  Use the t-gmp-mp-h.pl script to check.  */
+#ifndef __GNU_MP__
+#define __GNU_MP__ 5
+
+#define __need_size_t  /* tell gcc stddef.h we only want size_t */
+#if defined (__cplusplus)
+#include <cstddef>     /* for size_t */
+#else
+#include <stddef.h>    /* for size_t */
+#endif
+#undef __need_size_t
+
+/* The following instantiated by configure, for internal use only */
+#if ! defined (__GMP_WITHIN_CONFIGURE)
+@DEFN_LONG_LONG_LIMB@
+#define __GMP_LIBGMP_DLL  @LIBGMP_DLL@
+#endif
+
+#if  defined (__STDC__)                                 \
+  || defined (__cplusplus)                              \
+  || defined (_AIX)                                     \
+  || defined (__DECC)                                   \
+  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
+  || defined (_MSC_VER)                                 \
+  || defined (_WIN32)
+#define __GMP_HAVE_CONST        1
+#define __GMP_HAVE_PROTOTYPES   1
+#define __GMP_HAVE_TOKEN_PASTE  1
+#else
+#define __GMP_HAVE_CONST        0
+#define __GMP_HAVE_PROTOTYPES   0
+#define __GMP_HAVE_TOKEN_PASTE  0
+#endif
+
+
+#if __GMP_HAVE_CONST
+#define __gmp_const   const
+#define __gmp_signed  signed
+#else
+#define __gmp_const
+#define __gmp_signed
+#endif
+
+#if defined (__GNUC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(__dllexport__)
+#define __GMP_DECLSPEC_IMPORT  __declspec(__dllimport__)
+#endif
+#if defined (_MSC_VER) || defined (__BORLANDC__)
+#define __GMP_DECLSPEC_EXPORT  __declspec(dllexport)
+#define __GMP_DECLSPEC_IMPORT  __declspec(dllimport)
+#endif
+#ifdef __WATCOMC__
+#define __GMP_DECLSPEC_EXPORT  __export
+#define __GMP_DECLSPEC_IMPORT  __import
+#endif
+#ifdef __IBMC__
+#define __GMP_DECLSPEC_EXPORT  _Export
+#define __GMP_DECLSPEC_IMPORT  _Import
+#endif
+
+#if __GMP_LIBGMP_DLL
+#if __GMP_WITHIN_GMP
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_EXPORT
+#else
+#define __GMP_DECLSPEC  __GMP_DECLSPEC_IMPORT
+#endif
+#else
+#define __GMP_DECLSPEC
+#endif
+
+#ifdef __GMP_SHORT_LIMB
+typedef unsigned int           mp_limb_t;
+typedef int                    mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb_t;
+typedef long long int          mp_limb_signed_t;
+#else
+typedef unsigned long int      mp_limb_t;
+typedef long int               mp_limb_signed_t;
+#endif
+#endif
+typedef unsigned long int      mp_bitcnt_t;
+
+typedef struct
+{
+  int _mp_alloc;               /* Number of *limbs* allocated and pointed
+                                  to by the _mp_d field.  */
+  int _mp_size;                        /* abs(_mp_size) is the number of limbs the
+                                  last field points to.  If _mp_size is
+                                  negative this is a negative number.  */
+  mp_limb_t *_mp_d;            /* Pointer to the limbs.  */
+} __mpz_struct;
+
+#endif /* __GNU_MP__ */
+
+/* User-visible types.  */
+typedef __mpz_struct MINT;
+
+
+#if __GMP_HAVE_PROTOTYPES
+#define __GMP_PROTO(x) x
+#else
+#define __GMP_PROTO(x) ()
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
+                                      void *(*) (void *, size_t, size_t),
+                                      void (*) (void *, size_t)));
+__GMP_DECLSPEC MINT *itom __GMP_PROTO ((signed short int));
+__GMP_DECLSPEC MINT *xtom __GMP_PROTO ((const char *));
+__GMP_DECLSPEC void move __GMP_PROTO ((const MINT *, MINT *));
+__GMP_DECLSPEC void madd __GMP_PROTO ((const MINT *, const MINT *, MINT *));
+__GMP_DECLSPEC void msub __GMP_PROTO ((const MINT *, const MINT *, MINT *));
+__GMP_DECLSPEC void mult __GMP_PROTO ((const MINT *, const MINT *, MINT *));
+__GMP_DECLSPEC void mdiv __GMP_PROTO ((const MINT *, const MINT *, MINT *, MINT *));
+__GMP_DECLSPEC void sdiv __GMP_PROTO ((const MINT *, signed short int, MINT *, signed short int *));
+__GMP_DECLSPEC void msqrt __GMP_PROTO ((const MINT *, MINT *, MINT *));
+__GMP_DECLSPEC void pow __GMP_PROTO ((const MINT *, const MINT *, const MINT *, MINT *));
+__GMP_DECLSPEC void rpow __GMP_PROTO ((const MINT *, signed short int, MINT *));
+__GMP_DECLSPEC void gcd __GMP_PROTO ((const MINT *, const MINT *, MINT *));
+__GMP_DECLSPEC int  mcmp __GMP_PROTO ((const MINT *, const MINT *));
+__GMP_DECLSPEC void min __GMP_PROTO ((MINT *));
+__GMP_DECLSPEC void mout __GMP_PROTO ((const MINT *));
+__GMP_DECLSPEC char *mtox __GMP_PROTO ((const MINT *));
+__GMP_DECLSPEC void mfree __GMP_PROTO ((MINT *));
+
+#if defined (__cplusplus)
+}
+#endif
+
+#define __MP_H__
+#endif /* __MP_H__ */
diff --git a/mp_bpl.c b/mp_bpl.c

new file mode 100644 (file)

index 0000000..1fc1ca9
--- /dev/null
+++ b/mp_bpl.c
@@ -0,0 +1,24 @@
+/*
+Copyright 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const int mp_bits_per_limb = GMP_LIMB_BITS;
+const int __gmp_0 = 0;
+int __gmp_junk;
diff --git a/mp_clz_tab.c b/mp_clz_tab.c

new file mode 100644 (file)

index 0000000..1e0cee4
--- /dev/null
+++ b/mp_clz_tab.c
@@ -0,0 +1,37 @@
+/* __clz_tab -- support for longlong.h
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+const
+unsigned char __clz_tab[128] =
+{
+  1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+};
+#endif
diff --git a/mp_dv_tab.c b/mp_dv_tab.c

new file mode 100644 (file)

index 0000000..c7e4c13
--- /dev/null
+++ b/mp_dv_tab.c
@@ -0,0 +1,68 @@
+/* __gmp_digit_value_tab -- support for mp*_set_str
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Table to be indexed by character, to get its numerical value.  Assumes ASCII
+   character set.
+
+   First part of table supports common usages, where 'A' and 'a' have the same
+   value; this supports bases 2..36
+
+   At offset 224, values for bases 37..62 start.  Here, 'A' has the value 10
+   (in decimal) and 'a' has the value 36.  */
+
+#define X 0xff
+const unsigned char __gmp_digit_value_tab[] =
+{
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,
+  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
+  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,
+  X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+  25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
+  X,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,
+  51,52,53,54,55,56,57,58,59,60,61,X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
+  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X
+};
diff --git a/mp_get_fns.c b/mp_get_fns.c

new file mode 100644 (file)

index 0000000..8d7c69f
--- /dev/null
+++ b/mp_get_fns.c
@@ -0,0 +1,37 @@
+/* mp_get_memory_functions -- Get the allocate, reallocate, and free functions.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mp_get_memory_functions (void *(**alloc_func) (size_t),
+                        void *(**realloc_func) (void *, size_t, size_t),
+                        void (**free_func) (void *, size_t)) __GMP_NOTHROW
+{
+  if (alloc_func != NULL)
+    *alloc_func = __gmp_allocate_func;
+
+  if (realloc_func != NULL)
+    *realloc_func = __gmp_reallocate_func;
+
+  if (free_func != NULL)
+    *free_func = __gmp_free_func;
+}
diff --git a/mp_minv_tab.c b/mp_minv_tab.c

new file mode 100644 (file)

index 0000000..a7f342b
--- /dev/null
+++ b/mp_minv_tab.c
@@ -0,0 +1,48 @@
+/* A table of data supporting binvert_limb().
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+   INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* binvert_limb_table[i] is the multiplicative inverse of 2*i+1 mod 256,
+   ie. (binvert_limb_table[i] * (2*i+1)) % 256 == 1 */
+
+const unsigned char  binvert_limb_table[128] = {
+  0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
+  0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
+  0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
+  0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
+  0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
+  0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
+  0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
+  0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
+  0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
+  0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
+  0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
+  0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
+  0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
+  0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
+  0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
+  0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
+};
diff --git a/mp_set_fns.c b/mp_set_fns.c

new file mode 100644 (file)

index 0000000..3d2aaf3
--- /dev/null
+++ b/mp_set_fns.c
@@ -0,0 +1,39 @@
+/* mp_set_memory_functions -- Set the allocate, reallocate, and free functions
+   for use by the mp package.
+
+Copyright 1991, 1993, 1994, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+                        void *(*realloc_func) (void *, size_t, size_t),
+                        void (*free_func) (void *, size_t)) __GMP_NOTHROW
+{
+  if (alloc_func == 0)
+    alloc_func = __gmp_default_allocate;
+  if (realloc_func == 0)
+    realloc_func = __gmp_default_reallocate;
+  if (free_func == 0)
+    free_func = __gmp_default_free;
+
+  __gmp_allocate_func = alloc_func;
+  __gmp_reallocate_func = realloc_func;
+  __gmp_free_func = free_func;
+}
diff --git a/mpbsd/Makefile.am b/mpbsd/Makefile.am

new file mode 100644 (file)

index 0000000..4272bbf
--- /dev/null
+++ b/mpbsd/Makefile.am
@@ -0,0 +1,37 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# -I$(top_srcdir)/mpz is for #includes done by mpz .c files.  Perhaps most
+# compilers are smart enough to look in the same directory as the .c file
+# already, but lets make absolutely sure.
+#
+INCLUDES = -DBERKELEY_MP -D__GMP_WITHIN_GMP -D__gmpz_realloc=_mp_realloc \
+       -I$(top_srcdir) -I$(top_srcdir)/mpz
+
+# The mpz sources here all know to look for -DBERKELEY_MP to compile to in
+# mpbsd form.
+#
+libmpbsd_la_SOURCES = itom.c mfree.c min.c mout.c mtox.c rpow.c sdiv.c xtom.c \
+  ../mpz/add.c ../mpz/cmp.c ../mpz/gcd.c ../mpz/mul.c ../mpz/powm.c \
+  ../mpz/realloc.c ../mpz/set.c ../mpz/sqrtrem.c ../mpz/sub.c ../mpz/tdiv_qr.c
+
+if WANT_MPBSD
+noinst_LTLIBRARIES = libmpbsd.la
+endif
diff --git a/mpbsd/Makefile.in b/mpbsd/Makefile.in

new file mode 100644 (file)

index 0000000..b2f7e5c
--- /dev/null
+++ b/mpbsd/Makefile.in
@@ -0,0 +1,643 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = mpbsd
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libmpbsd_la_LIBADD =
+am_libmpbsd_la_OBJECTS = itom$U.lo mfree$U.lo min$U.lo mout$U.lo \
+       mtox$U.lo rpow$U.lo sdiv$U.lo xtom$U.lo add$U.lo cmp$U.lo \
+       gcd$U.lo mul$U.lo powm$U.lo realloc$U.lo set$U.lo sqrtrem$U.lo \
+       sub$U.lo tdiv_qr$U.lo
+libmpbsd_la_OBJECTS = $(am_libmpbsd_la_OBJECTS)
+@WANT_MPBSD_TRUE@am_libmpbsd_la_rpath =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libmpbsd_la_SOURCES)
+DIST_SOURCES = $(libmpbsd_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# -I$(top_srcdir)/mpz is for #includes done by mpz .c files.  Perhaps most
+# compilers are smart enough to look in the same directory as the .c file
+# already, but lets make absolutely sure.
+#
+INCLUDES = -DBERKELEY_MP -D__GMP_WITHIN_GMP -D__gmpz_realloc=_mp_realloc \
+       -I$(top_srcdir) -I$(top_srcdir)/mpz
+
+
+# The mpz sources here all know to look for -DBERKELEY_MP to compile to in
+# mpbsd form.
+#
+libmpbsd_la_SOURCES = itom.c mfree.c min.c mout.c mtox.c rpow.c sdiv.c xtom.c \
+  ../mpz/add.c ../mpz/cmp.c ../mpz/gcd.c ../mpz/mul.c ../mpz/powm.c \
+  ../mpz/realloc.c ../mpz/set.c ../mpz/sqrtrem.c ../mpz/sub.c ../mpz/tdiv_qr.c
+
+@WANT_MPBSD_TRUE@noinst_LTLIBRARIES = libmpbsd.la
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpbsd/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps mpbsd/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libmpbsd.la: $(libmpbsd_la_OBJECTS) $(libmpbsd_la_DEPENDENCIES) 
+       $(LINK) $(am_libmpbsd_la_rpath) $(libmpbsd_la_OBJECTS) $(libmpbsd_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+
+add_.lo: add_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o add_.lo `test -f 'add_.c' || echo '$(srcdir)/'`add_.c
+
+add.lo: ../mpz/add.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o add.lo `test -f '../mpz/add.c' || echo '$(srcdir)/'`../mpz/add.c
+
+cmp_.lo: cmp_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmp_.lo `test -f 'cmp_.c' || echo '$(srcdir)/'`cmp_.c
+
+cmp.lo: ../mpz/cmp.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmp.lo `test -f '../mpz/cmp.c' || echo '$(srcdir)/'`../mpz/cmp.c
+
+gcd_.lo: gcd_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gcd_.lo `test -f 'gcd_.c' || echo '$(srcdir)/'`gcd_.c
+
+gcd.lo: ../mpz/gcd.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gcd.lo `test -f '../mpz/gcd.c' || echo '$(srcdir)/'`../mpz/gcd.c
+
+mul_.lo: mul_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mul_.lo `test -f 'mul_.c' || echo '$(srcdir)/'`mul_.c
+
+mul.lo: ../mpz/mul.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mul.lo `test -f '../mpz/mul.c' || echo '$(srcdir)/'`../mpz/mul.c
+
+powm_.lo: powm_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o powm_.lo `test -f 'powm_.c' || echo '$(srcdir)/'`powm_.c
+
+powm.lo: ../mpz/powm.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o powm.lo `test -f '../mpz/powm.c' || echo '$(srcdir)/'`../mpz/powm.c
+
+realloc_.lo: realloc_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o realloc_.lo `test -f 'realloc_.c' || echo '$(srcdir)/'`realloc_.c
+
+realloc.lo: ../mpz/realloc.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o realloc.lo `test -f '../mpz/realloc.c' || echo '$(srcdir)/'`../mpz/realloc.c
+
+set_.lo: set_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o set_.lo `test -f 'set_.c' || echo '$(srcdir)/'`set_.c
+
+set.lo: ../mpz/set.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o set.lo `test -f '../mpz/set.c' || echo '$(srcdir)/'`../mpz/set.c
+
+sqrtrem_.lo: sqrtrem_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sqrtrem_.lo `test -f 'sqrtrem_.c' || echo '$(srcdir)/'`sqrtrem_.c
+
+sqrtrem.lo: ../mpz/sqrtrem.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sqrtrem.lo `test -f '../mpz/sqrtrem.c' || echo '$(srcdir)/'`../mpz/sqrtrem.c
+
+sub_.lo: sub_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sub_.lo `test -f 'sub_.c' || echo '$(srcdir)/'`sub_.c
+
+sub.lo: ../mpz/sub.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sub.lo `test -f '../mpz/sub.c' || echo '$(srcdir)/'`../mpz/sub.c
+
+tdiv_qr_.lo: tdiv_qr_.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tdiv_qr_.lo `test -f 'tdiv_qr_.c' || echo '$(srcdir)/'`tdiv_qr_.c
+
+tdiv_qr.lo: ../mpz/tdiv_qr.c
+       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tdiv_qr.lo `test -f '../mpz/tdiv_qr.c' || echo '$(srcdir)/'`../mpz/tdiv_qr.c
+add_.c: ../mpz/add.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/add.c; then echo $(srcdir)/../mpz/add.c; else echo ../mpz/add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_.c: ../mpz/cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/cmp.c; then echo $(srcdir)/../mpz/cmp.c; else echo ../mpz/cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: ../mpz/gcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/gcd.c; then echo $(srcdir)/../mpz/gcd.c; else echo ../mpz/gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+itom_.c: itom.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/itom.c; then echo $(srcdir)/itom.c; else echo itom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mfree_.c: mfree.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mfree.c; then echo $(srcdir)/mfree.c; else echo mfree.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+min_.c: min.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/min.c; then echo $(srcdir)/min.c; else echo min.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mout_.c: mout.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mout.c; then echo $(srcdir)/mout.c; else echo mout.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mtox_.c: mtox.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mtox.c; then echo $(srcdir)/mtox.c; else echo mtox.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: ../mpz/mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/mul.c; then echo $(srcdir)/../mpz/mul.c; else echo ../mpz/mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_.c: ../mpz/powm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/powm.c; then echo $(srcdir)/../mpz/powm.c; else echo ../mpz/powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+realloc_.c: ../mpz/realloc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/realloc.c; then echo $(srcdir)/../mpz/realloc.c; else echo ../mpz/realloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rpow_.c: rpow.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rpow.c; then echo $(srcdir)/rpow.c; else echo rpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sdiv_.c: sdiv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sdiv.c; then echo $(srcdir)/sdiv.c; else echo sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_.c: ../mpz/set.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/set.c; then echo $(srcdir)/../mpz/set.c; else echo ../mpz/set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrtrem_.c: ../mpz/sqrtrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/sqrtrem.c; then echo $(srcdir)/../mpz/sqrtrem.c; else echo ../mpz/sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_.c: ../mpz/sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/sub.c; then echo $(srcdir)/../mpz/sub.c; else echo ../mpz/sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: ../mpz/tdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/tdiv_qr.c; then echo $(srcdir)/../mpz/tdiv_qr.c; else echo ../mpz/tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+xtom_.c: xtom.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xtom.c; then echo $(srcdir)/xtom.c; else echo xtom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_.$(OBJEXT) add_.lo cmp_.$(OBJEXT) cmp_.lo gcd_.$(OBJEXT) gcd_.lo \
+itom_.$(OBJEXT) itom_.lo mfree_.$(OBJEXT) mfree_.lo min_.$(OBJEXT) \
+min_.lo mout_.$(OBJEXT) mout_.lo mtox_.$(OBJEXT) mtox_.lo \
+mul_.$(OBJEXT) mul_.lo powm_.$(OBJEXT) powm_.lo realloc_.$(OBJEXT) \
+realloc_.lo rpow_.$(OBJEXT) rpow_.lo sdiv_.$(OBJEXT) sdiv_.lo \
+set_.$(OBJEXT) set_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) \
+sub_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo xtom_.$(OBJEXT) xtom_.lo : \
+$(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/mpbsd/itom.c b/mpbsd/itom.c

new file mode 100644 (file)

index 0000000..6f0e31c
--- /dev/null
+++ b/mpbsd/itom.c
@@ -0,0 +1,47 @@
+/* itom -- BSD compatible allocate and initiate a MINT.
+
+Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+
+MINT *
+itom (signed short int n)
+{
+  MINT *x;
+  mp_ptr xp;
+
+  x = (MINT *) (*__gmp_allocate_func) (sizeof (MINT));
+  x->_mp_alloc = 1;
+  x->_mp_d = xp = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  if (n > 0)
+    {
+      x->_mp_size = 1;
+      xp[0] = n;
+    }
+  else if (n < 0)
+    {
+      x->_mp_size = -1;
+      xp[0] = (unsigned short) -n;
+    }
+  else
+    x->_mp_size = 0;
+
+  return x;
+}
diff --git a/mpbsd/mfree.c b/mpbsd/mfree.c

new file mode 100644 (file)

index 0000000..84e93f8
--- /dev/null
+++ b/mpbsd/mfree.c
@@ -0,0 +1,29 @@
+/* mfree -- BSD compatible mfree.
+
+Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mfree (MINT *m)
+{
+  (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
+  (*__gmp_free_func) (m, sizeof (MINT));
+}
diff --git a/mpbsd/min.c b/mpbsd/min.c

new file mode 100644 (file)

index 0000000..113cc17
--- /dev/null
+++ b/mpbsd/min.c
@@ -0,0 +1,89 @@
+/* min(MINT) -- Do decimal input from standard input and store result in
+   MINT.
+
+Copyright 1991, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+
+extern const unsigned char __gmp_digit_value_tab[];
+#define digit_value_tab __gmp_digit_value_tab
+
+void
+min (MINT *dest)
+{
+  char *str;
+  size_t alloc_size, str_size;
+  int c;
+  int negative;
+  mp_size_t dest_size;
+  const unsigned char *digit_value;
+
+  digit_value = digit_value_tab;
+
+  alloc_size = 100;
+  str = (char *) (*__gmp_allocate_func) (alloc_size);
+  str_size = 0;
+
+  /* Skip whitespace.  */
+  do
+    c = getc (stdin);
+  while (isspace (c));
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = getc (stdin);
+    }
+
+  if (c == EOF || digit_value[c] >= 10)
+    return;                    /* error if no digits */
+
+  do
+    {
+      int dig;
+      dig = digit_value[c];
+      if (dig >= 10)
+       break;
+      if (str_size >= alloc_size)
+       {
+         size_t old_alloc_size = alloc_size;
+         alloc_size = alloc_size * 3 / 2;
+         str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
+       }
+      str[str_size++] = dig;
+      c = getc (stdin);
+    }
+  while (c != EOF);
+
+  ungetc (c, stdin);
+
+  dest_size = str_size / mp_bases[10].chars_per_limb + 1;
+  if (dest->_mp_alloc < dest_size)
+    _mp_realloc (dest, dest_size);
+
+  dest_size = mpn_set_str (dest->_mp_d, (unsigned char *) str, str_size, 10);
+  dest->_mp_size = negative ? -dest_size : dest_size;
+
+  (*__gmp_free_func) (str, alloc_size);
+  return;
+}
diff --git a/mpbsd/mout.c b/mpbsd/mout.c

new file mode 100644 (file)

index 0000000..545539c
--- /dev/null
+++ b/mpbsd/mout.c
@@ -0,0 +1,92 @@
+/* mout(MINT) -- Do decimal output of MINT to standard output.
+
+Copyright 1991, 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mout (const MINT *x)
+{
+  mp_ptr xp;
+  mp_srcptr x_ptr;
+  mp_size_t x_size;
+  unsigned char *str;
+  size_t str_size;
+  int i;
+  TMP_DECL;
+
+  x_size = x->_mp_size;
+  if (x_size == 0)
+    {
+      fputc ('0', stdout);
+      fputc ('\n', stdout);
+      return;
+    }
+  if (x_size < 0)
+    {
+      fputc ('-', stdout);
+      x_size = -x_size;
+    }
+
+  TMP_MARK;
+  x_ptr = x->_mp_d;
+  MPN_SIZEINBASE (str_size, x_ptr, x_size, 10);
+  str_size += 2;
+  str = (unsigned char *) TMP_ALLOC (str_size);
+
+  /* mpn_get_str clobbers its argument */
+  xp = TMP_ALLOC_LIMBS (x_size);
+  MPN_COPY (xp, x_ptr, x_size);
+
+  str_size = mpn_get_str (str, 10, xp, x_size);
+
+  /* mpn_get_str might make a leading zero, skip it.  */
+  str_size -= (*str == 0);
+  str += (*str == 0);
+  ASSERT (*str != 0);
+
+  /* Translate to printable chars.  */
+  for (i = 0; i < str_size; i++)
+    str[i] = "0123456789"[str[i]];
+  str[str_size] = 0;
+
+  str_size = strlen ((char *) str);
+  if (str_size % 10 != 0)
+    {
+      fwrite (str, 1, str_size % 10, stdout);
+      str += str_size % 10;
+      str_size -= str_size % 10;
+      if (str_size != 0)
+       fputc (' ', stdout);
+    }
+  for (i = 0; i < str_size; i += 10)
+    {
+      fwrite (str, 1, 10, stdout);
+      str += 10;
+      if (i + 10 < str_size)
+       fputc (' ', stdout);
+    }
+  fputc ('\n', stdout);
+  TMP_FREE;
+}
diff --git a/mpbsd/mtox.c b/mpbsd/mtox.c

new file mode 100644 (file)

index 0000000..7babfdd
--- /dev/null
+++ b/mpbsd/mtox.c
@@ -0,0 +1,61 @@
+/* mtox -- Convert OPERAND to hexadecimal and return a malloc'ed string
+   with the result of the conversion.
+
+Copyright 1991, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <string.h>
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+char *
+mtox (const MINT *x)
+{
+  mp_size_t xsize = x->_mp_size;
+  mp_ptr    xp;
+  mp_size_t xsign;
+  unsigned char *str, *s;
+  size_t str_size, alloc_size, i;
+
+  xsign = xsize;
+  if (xsize < 0)
+    xsize = -xsize;
+
+  /* digits, plus '\0', plus possible '-', for an exact size */
+  xp = x->_mp_d;
+  MPN_SIZEINBASE_16 (alloc_size, xp, xsize);
+  alloc_size += 1 + (xsign < 0);
+
+  str = (unsigned char *) (*__gmp_allocate_func) (alloc_size);
+  s = str;
+
+  if (xsign < 0)
+    *s++ = '-';
+
+  str_size = mpn_get_str (s, 16, xp, xsize);
+  ASSERT (str_size <= alloc_size - (xsign < 0));
+  ASSERT (str_size == 1 || *s != 0);
+
+  for (i = 0; i < str_size; i++)
+    s[i] = "0123456789abcdef"[s[i]];
+  s[str_size] = 0;
+
+  ASSERT (strlen (str) + 1 == alloc_size);
+  return (char *) str;
+}
diff --git a/mpbsd/rpow.c b/mpbsd/rpow.c

new file mode 100644 (file)

index 0000000..827aacb
--- /dev/null
+++ b/mpbsd/rpow.c
@@ -0,0 +1,32 @@
+/* rpow -- MINT raised to short. */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+rpow (const MINT *b, short e, MINT *r)
+{
+  if (e >= 0)
+    mpz_n_pow_ui (r, PTR(b), (mp_size_t) SIZ(b), (unsigned long) e);
+  else
+    SIZ(r) = 0;
+}
diff --git a/mpbsd/sdiv.c b/mpbsd/sdiv.c

new file mode 100644 (file)

index 0000000..802fe52
--- /dev/null
+++ b/mpbsd/sdiv.c
@@ -0,0 +1,67 @@
+/* sdiv -- Divide a MINT by a short integer.  Produce a MINT quotient
+   and a short remainder.
+
+Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+sdiv (const MINT *dividend, signed short int divisor_short, MINT *quot, short *rem_ptr)
+{
+  mp_size_t sign_dividend;
+  signed long int sign_divisor;
+  mp_size_t dividend_size, quot_size;
+  mp_ptr dividend_ptr, quot_ptr;
+  mp_limb_t divisor_limb;
+  mp_limb_t remainder_limb;
+
+  sign_dividend = dividend->_mp_size;
+  dividend_size = ABS (dividend->_mp_size);
+
+  if (dividend_size == 0)
+    {
+      quot->_mp_size = 0;
+      *rem_ptr = 0;
+      return;
+    }
+
+  sign_divisor = divisor_short;
+  divisor_limb = (unsigned short) ABS (divisor_short);
+
+  /* No need for temporary allocation and copying even if QUOT == DIVIDEND
+     as the divisor is just one limb, and thus no intermediate remainders
+     need to be stored.  */
+
+  if (quot->_mp_alloc < dividend_size)
+    _mp_realloc (quot, dividend_size);
+
+  quot_ptr = quot->_mp_d;
+  dividend_ptr = dividend->_mp_d;
+
+  remainder_limb = mpn_divmod_1 (quot_ptr,
+                                dividend_ptr, dividend_size, divisor_limb);
+
+  *rem_ptr = sign_dividend >= 0 ? remainder_limb : -remainder_limb;
+  /* The quotient is DIVIDEND_SIZE limbs, but the most significant
+     might be zero.  Set QUOT_SIZE properly. */
+  quot_size = dividend_size - (quot_ptr[dividend_size - 1] == 0);
+  quot->_mp_size = (sign_divisor ^ sign_dividend) >= 0 ? quot_size : -quot_size;
+}
diff --git a/mpbsd/xtom.c b/mpbsd/xtom.c

new file mode 100644 (file)

index 0000000..75fa88a
--- /dev/null
+++ b/mpbsd/xtom.c
@@ -0,0 +1,88 @@
+/* xtom -- convert a hexadecimal string to a MINT, and return a pointer to
+   the MINT.
+
+Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2002, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <string.h>
+#include <ctype.h>
+#include "mp.h"
+#include "gmp.h"
+#include "gmp-impl.h"
+
+extern const unsigned char __gmp_digit_value_tab[];
+#define digit_value __gmp_digit_value_tab
+
+MINT *
+xtom (const char *str)
+{
+  size_t str_size;
+  char *s, *begs;
+  size_t i;
+  mp_size_t xsize;
+  int c;
+  int negative;
+  MINT *x = (MINT *) (*__gmp_allocate_func) (sizeof (MINT));
+  TMP_DECL;
+
+  /* Skip whitespace.  */
+  do
+    c = (unsigned char) *str++;
+  while (isspace (c));
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = (unsigned char) *str++;
+    }
+
+  if (digit_value[c] >= 16)
+    return 0;                  /* error if no digits */
+
+  TMP_MARK;
+  str_size = strlen (str - 1);
+  s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+  for (i = 0; i < str_size; i++)
+    {
+      if (!isspace (c))
+       {
+         int dig = digit_value[c];
+         if (dig >= 16)
+           {
+             TMP_FREE;
+             return 0;
+           }
+         *s++ = dig;
+       }
+      c = (unsigned char) *str++;
+    }
+
+  str_size = s - begs;
+
+  xsize = str_size / mp_bases[16].chars_per_limb + 1;
+  x->_mp_alloc = xsize;
+  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (xsize * BYTES_PER_MP_LIMB);
+
+  xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, 16);
+  x->_mp_size = negative ? -xsize : xsize;
+
+  TMP_FREE;
+  return x;
+}
diff --git a/mpf/Makefile.am b/mpf/Makefile.am

new file mode 100644 (file)

index 0000000..4285e0b
--- /dev/null
+++ b/mpf/Makefile.am
@@ -0,0 +1,37 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = libmpf.la
+libmpf_la_SOURCES = \
+  init.c init2.c inits.c set.c set_ui.c set_si.c set_str.c set_d.c set_z.c \
+  set_q.c iset.c iset_ui.c iset_si.c iset_str.c iset_d.c clear.c clears.c \
+  get_str.c dump.c size.c eq.c reldiff.c sqrt.c random2.c inp_str.c out_str.c \
+  add.c add_ui.c sub.c sub_ui.c ui_sub.c mul.c mul_ui.c div.c div_ui.c \
+  cmp.c cmp_d.c cmp_si.c cmp_ui.c mul_2exp.c div_2exp.c abs.c neg.c get_d.c \
+  get_d_2exp.c set_dfl_prec.c set_prc.c set_prc_raw.c get_dfl_prec.c get_prc.c \
+  ui_div.c sqrt_ui.c \
+  pow_ui.c urandomb.c swap.c get_si.c get_ui.c int_p.c \
+  ceilfloor.c trunc.c \
+  fits_sint.c fits_slong.c fits_sshort.c \
+  fits_uint.c fits_ulong.c fits_ushort.c \
+  fits_s.h fits_u.h
diff --git a/mpf/Makefile.in b/mpf/Makefile.in

new file mode 100644 (file)

index 0000000..96a2077
--- /dev/null
+++ b/mpf/Makefile.in
@@ -0,0 +1,713 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = mpf
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libmpf_la_LIBADD =
+am_libmpf_la_OBJECTS = init$U.lo init2$U.lo inits$U.lo set$U.lo \
+       set_ui$U.lo set_si$U.lo set_str$U.lo set_d$U.lo set_z$U.lo \
+       set_q$U.lo iset$U.lo iset_ui$U.lo iset_si$U.lo iset_str$U.lo \
+       iset_d$U.lo clear$U.lo clears$U.lo get_str$U.lo dump$U.lo \
+       size$U.lo eq$U.lo reldiff$U.lo sqrt$U.lo random2$U.lo \
+       inp_str$U.lo out_str$U.lo add$U.lo add_ui$U.lo sub$U.lo \
+       sub_ui$U.lo ui_sub$U.lo mul$U.lo mul_ui$U.lo div$U.lo \
+       div_ui$U.lo cmp$U.lo cmp_d$U.lo cmp_si$U.lo cmp_ui$U.lo \
+       mul_2exp$U.lo div_2exp$U.lo abs$U.lo neg$U.lo get_d$U.lo \
+       get_d_2exp$U.lo set_dfl_prec$U.lo set_prc$U.lo \
+       set_prc_raw$U.lo get_dfl_prec$U.lo get_prc$U.lo ui_div$U.lo \
+       sqrt_ui$U.lo pow_ui$U.lo urandomb$U.lo swap$U.lo get_si$U.lo \
+       get_ui$U.lo int_p$U.lo ceilfloor$U.lo trunc$U.lo \
+       fits_sint$U.lo fits_slong$U.lo fits_sshort$U.lo fits_uint$U.lo \
+       fits_ulong$U.lo fits_ushort$U.lo
+libmpf_la_OBJECTS = $(am_libmpf_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libmpf_la_SOURCES)
+DIST_SOURCES = $(libmpf_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = libmpf.la
+libmpf_la_SOURCES = \
+  init.c init2.c inits.c set.c set_ui.c set_si.c set_str.c set_d.c set_z.c \
+  set_q.c iset.c iset_ui.c iset_si.c iset_str.c iset_d.c clear.c clears.c \
+  get_str.c dump.c size.c eq.c reldiff.c sqrt.c random2.c inp_str.c out_str.c \
+  add.c add_ui.c sub.c sub_ui.c ui_sub.c mul.c mul_ui.c div.c div_ui.c \
+  cmp.c cmp_d.c cmp_si.c cmp_ui.c mul_2exp.c div_2exp.c abs.c neg.c get_d.c \
+  get_d_2exp.c set_dfl_prec.c set_prc.c set_prc_raw.c get_dfl_prec.c get_prc.c \
+  ui_div.c sqrt_ui.c \
+  pow_ui.c urandomb.c swap.c get_si.c get_ui.c int_p.c \
+  ceilfloor.c trunc.c \
+  fits_sint.c fits_slong.c fits_sshort.c \
+  fits_uint.c fits_ulong.c fits_ushort.c \
+  fits_s.h fits_u.h
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpf/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps mpf/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libmpf.la: $(libmpf_la_OBJECTS) $(libmpf_la_DEPENDENCIES) 
+       $(LINK)  $(libmpf_la_OBJECTS) $(libmpf_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+abs_.c: abs.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_.c: add.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_ui_.c: add_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_ui.c; then echo $(srcdir)/add_ui.c; else echo add_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ceilfloor_.c: ceilfloor.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ceilfloor.c; then echo $(srcdir)/ceilfloor.c; else echo ceilfloor.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clear_.c: clear.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clears_.c: clears.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_.c: cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_d_.c: cmp_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_d.c; then echo $(srcdir)/cmp_d.c; else echo cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_si_.c: cmp_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+div_.c: div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div.c; then echo $(srcdir)/div.c; else echo div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+div_2exp_.c: div_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div_2exp.c; then echo $(srcdir)/div_2exp.c; else echo div_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+div_ui_.c: div_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div_ui.c; then echo $(srcdir)/div_ui.c; else echo div_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dump_.c: dump.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+eq_.c: eq.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/eq.c; then echo $(srcdir)/eq.c; else echo eq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_sint_.c: fits_sint.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sint.c; then echo $(srcdir)/fits_sint.c; else echo fits_sint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_slong_.c: fits_slong.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_slong.c; then echo $(srcdir)/fits_slong.c; else echo fits_slong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_sshort_.c: fits_sshort.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sshort.c; then echo $(srcdir)/fits_sshort.c; else echo fits_sshort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_uint_.c: fits_uint.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_uint.c; then echo $(srcdir)/fits_uint.c; else echo fits_uint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_ulong_.c: fits_ulong.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ulong.c; then echo $(srcdir)/fits_ulong.c; else echo fits_ulong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_ushort_.c: fits_ushort.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ushort.c; then echo $(srcdir)/fits_ushort.c; else echo fits_ushort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_.c: get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_2exp_.c: get_d_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d_2exp.c; then echo $(srcdir)/get_d_2exp.c; else echo get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_dfl_prec_.c: get_dfl_prec.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_dfl_prec.c; then echo $(srcdir)/get_dfl_prec.c; else echo get_dfl_prec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_prc_.c: get_prc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_prc.c; then echo $(srcdir)/get_prc.c; else echo get_prc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_si_.c: get_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_si.c; then echo $(srcdir)/get_si.c; else echo get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_ui_.c: get_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_ui.c; then echo $(srcdir)/get_ui.c; else echo get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+init_.c: init.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+init2_.c: init2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init2.c; then echo $(srcdir)/init2.c; else echo init2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inits_.c: inits.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inp_str_.c: inp_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+int_p_.c: int_p.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/int_p.c; then echo $(srcdir)/int_p.c; else echo int_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_.c: iset.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset.c; then echo $(srcdir)/iset.c; else echo iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_d_.c: iset_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_d.c; then echo $(srcdir)/iset_d.c; else echo iset_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_si_.c: iset_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_si.c; then echo $(srcdir)/iset_si.c; else echo iset_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_str_.c: iset_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_str.c; then echo $(srcdir)/iset_str.c; else echo iset_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_ui_.c: iset_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_ui.c; then echo $(srcdir)/iset_ui.c; else echo iset_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_2exp_.c: mul_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2exp.c; then echo $(srcdir)/mul_2exp.c; else echo mul_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_ui_.c: mul_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_ui.c; then echo $(srcdir)/mul_ui.c; else echo mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+neg_.c: neg.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+out_str_.c: out_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pow_ui_.c: pow_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_ui.c; then echo $(srcdir)/pow_ui.c; else echo pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random2_.c: random2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+reldiff_.c: reldiff.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reldiff.c; then echo $(srcdir)/reldiff.c; else echo reldiff.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_.c: set.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_d_.c: set_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_dfl_prec_.c: set_dfl_prec.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_dfl_prec.c; then echo $(srcdir)/set_dfl_prec.c; else echo set_dfl_prec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_prc_.c: set_prc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_prc.c; then echo $(srcdir)/set_prc.c; else echo set_prc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_prc_raw_.c: set_prc_raw.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_prc_raw.c; then echo $(srcdir)/set_prc_raw.c; else echo set_prc_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_q_.c: set_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_q.c; then echo $(srcdir)/set_q.c; else echo set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_si_.c: set_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_ui_.c: set_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_z_.c: set_z.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_z.c; then echo $(srcdir)/set_z.c; else echo set_z.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+size_.c: size.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/size.c; then echo $(srcdir)/size.c; else echo size.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrt_.c: sqrt.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt.c; then echo $(srcdir)/sqrt.c; else echo sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrt_ui_.c: sqrt_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt_ui.c; then echo $(srcdir)/sqrt_ui.c; else echo sqrt_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_.c: sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_ui_.c: sub_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_ui.c; then echo $(srcdir)/sub_ui.c; else echo sub_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+swap_.c: swap.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+trunc_.c: trunc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/trunc.c; then echo $(srcdir)/trunc.c; else echo trunc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ui_div_.c: ui_div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_div.c; then echo $(srcdir)/ui_div.c; else echo ui_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ui_sub_.c: ui_sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_sub.c; then echo $(srcdir)/ui_sub.c; else echo ui_sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+urandomb_.c: urandomb.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomb.c; then echo $(srcdir)/urandomb.c; else echo urandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+abs_.$(OBJEXT) abs_.lo add_.$(OBJEXT) add_.lo add_ui_.$(OBJEXT) \
+add_ui_.lo ceilfloor_.$(OBJEXT) ceilfloor_.lo clear_.$(OBJEXT) \
+clear_.lo clears_.$(OBJEXT) clears_.lo cmp_.$(OBJEXT) cmp_.lo \
+cmp_d_.$(OBJEXT) cmp_d_.lo cmp_si_.$(OBJEXT) cmp_si_.lo \
+cmp_ui_.$(OBJEXT) cmp_ui_.lo div_.$(OBJEXT) div_.lo \
+div_2exp_.$(OBJEXT) div_2exp_.lo div_ui_.$(OBJEXT) div_ui_.lo \
+dump_.$(OBJEXT) dump_.lo eq_.$(OBJEXT) eq_.lo fits_sint_.$(OBJEXT) \
+fits_sint_.lo fits_slong_.$(OBJEXT) fits_slong_.lo \
+fits_sshort_.$(OBJEXT) fits_sshort_.lo fits_uint_.$(OBJEXT) \
+fits_uint_.lo fits_ulong_.$(OBJEXT) fits_ulong_.lo \
+fits_ushort_.$(OBJEXT) fits_ushort_.lo get_d_.$(OBJEXT) get_d_.lo \
+get_d_2exp_.$(OBJEXT) get_d_2exp_.lo get_dfl_prec_.$(OBJEXT) \
+get_dfl_prec_.lo get_prc_.$(OBJEXT) get_prc_.lo get_si_.$(OBJEXT) \
+get_si_.lo get_str_.$(OBJEXT) get_str_.lo get_ui_.$(OBJEXT) get_ui_.lo \
+init_.$(OBJEXT) init_.lo init2_.$(OBJEXT) init2_.lo inits_.$(OBJEXT) \
+inits_.lo inp_str_.$(OBJEXT) inp_str_.lo int_p_.$(OBJEXT) int_p_.lo \
+iset_.$(OBJEXT) iset_.lo iset_d_.$(OBJEXT) iset_d_.lo \
+iset_si_.$(OBJEXT) iset_si_.lo iset_str_.$(OBJEXT) iset_str_.lo \
+iset_ui_.$(OBJEXT) iset_ui_.lo mul_.$(OBJEXT) mul_.lo \
+mul_2exp_.$(OBJEXT) mul_2exp_.lo mul_ui_.$(OBJEXT) mul_ui_.lo \
+neg_.$(OBJEXT) neg_.lo out_str_.$(OBJEXT) out_str_.lo \
+pow_ui_.$(OBJEXT) pow_ui_.lo random2_.$(OBJEXT) random2_.lo \
+reldiff_.$(OBJEXT) reldiff_.lo set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) \
+set_d_.lo set_dfl_prec_.$(OBJEXT) set_dfl_prec_.lo set_prc_.$(OBJEXT) \
+set_prc_.lo set_prc_raw_.$(OBJEXT) set_prc_raw_.lo set_q_.$(OBJEXT) \
+set_q_.lo set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
+set_ui_.$(OBJEXT) set_ui_.lo set_z_.$(OBJEXT) set_z_.lo \
+size_.$(OBJEXT) size_.lo sqrt_.$(OBJEXT) sqrt_.lo sqrt_ui_.$(OBJEXT) \
+sqrt_ui_.lo sub_.$(OBJEXT) sub_.lo sub_ui_.$(OBJEXT) sub_ui_.lo \
+swap_.$(OBJEXT) swap_.lo trunc_.$(OBJEXT) trunc_.lo ui_div_.$(OBJEXT) \
+ui_div_.lo ui_sub_.$(OBJEXT) ui_sub_.lo urandomb_.$(OBJEXT) \
+urandomb_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/mpf/abs.c b/mpf/abs.c

new file mode 100644 (file)

index 0000000..4f8a76e
--- /dev/null
+++ b/mpf/abs.c
@@ -0,0 +1,48 @@
+/* mpf_abs -- Compute the absolute value of a float.
+
+Copyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_abs (mpf_ptr r, mpf_srcptr u)
+{
+  mp_size_t size;
+
+  size = ABS (u->_mp_size);
+  if (r != u)
+    {
+      mp_size_t prec;
+      mp_ptr rp, up;
+
+      prec = r->_mp_prec + 1;  /* lie not to lose precision in assignment */
+      rp = r->_mp_d;
+      up = u->_mp_d;
+
+      if (size > prec)
+       {
+         up += size - prec;
+         size = prec;
+       }
+
+      MPN_COPY (rp, up, size);
+      r->_mp_exp = u->_mp_exp;
+    }
+  r->_mp_size = size;
+}
diff --git a/mpf/add.c b/mpf/add.c

new file mode 100644 (file)

index 0000000..48f73e9
--- /dev/null
+++ b/mpf/add.c
@@ -0,0 +1,173 @@
+/* mpf_add -- Add two floats.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_add (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp;
+  mp_size_t usize, vsize, rsize;
+  mp_size_t prec;
+  mp_exp_t uexp;
+  mp_size_t ediff;
+  mp_limb_t cy;
+  int negate;
+  TMP_DECL;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+
+  /* Handle special cases that don't work in generic code below.  */
+  if (usize == 0)
+    {
+    set_r_v_maybe:
+      if (r != v)
+        mpf_set (r, v);
+      return;
+    }
+  if (vsize == 0)
+    {
+      v = u;
+      goto set_r_v_maybe;
+    }
+
+  /* If signs of U and V are different, perform subtraction.  */
+  if ((usize ^ vsize) < 0)
+    {
+      __mpf_struct v_negated;
+      v_negated._mp_size = -vsize;
+      v_negated._mp_exp = v->_mp_exp;
+      v_negated._mp_d = v->_mp_d;
+      mpf_sub (r, u, &v_negated);
+      return;
+    }
+
+  TMP_MARK;
+
+  /* Signs are now known to be the same.  */
+  negate = usize < 0;
+
+  /* Make U be the operand with the largest exponent.  */
+  if (u->_mp_exp < v->_mp_exp)
+    {
+      mpf_srcptr t;
+      t = u; u = v; v = t;
+      usize = u->_mp_size;
+      vsize = v->_mp_size;
+    }
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  rp = r->_mp_d;
+  prec = r->_mp_prec;
+  uexp = u->_mp_exp;
+  ediff = u->_mp_exp - v->_mp_exp;
+
+  /* If U extends beyond PREC, ignore the part that does.  */
+  if (usize > prec)
+    {
+      up += usize - prec;
+      usize = prec;
+    }
+
+  /* If V extends beyond PREC, ignore the part that does.
+     Note that this may make vsize negative.  */
+  if (vsize + ediff > prec)
+    {
+      vp += vsize + ediff - prec;
+      vsize = prec - ediff;
+    }
+
+#if 0
+  /* Locate the least significant non-zero limb in (the needed parts
+     of) U and V, to simplify the code below.  */
+  while (up[0] == 0)
+    up++, usize--;
+  while (vp[0] == 0)
+    vp++, vsize--;
+#endif
+
+  /* Allocate temp space for the result.  Allocate
+     just vsize + ediff later???  */
+  tp = TMP_ALLOC_LIMBS (prec);
+
+  if (ediff >= prec)
+    {
+      /* V completely cancelled.  */
+      if (rp != up)
+       MPN_COPY_INCR (rp, up, usize);
+      rsize = usize;
+    }
+  else
+    {
+      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
+      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */
+
+      if (usize > ediff)
+       {
+         /* U and V partially overlaps.  */
+         if (vsize + ediff <= usize)
+           {
+             /* uuuu     */
+             /*   v      */
+             mp_size_t size;
+             size = usize - ediff - vsize;
+             MPN_COPY (tp, up, size);
+             cy = mpn_add (tp + size, up + size, usize - size, vp, vsize);
+             rsize = usize;
+           }
+         else
+           {
+             /* uuuu     */
+             /*   vvvvv  */
+             mp_size_t size;
+             size = vsize + ediff - usize;
+             MPN_COPY (tp, vp, size);
+             cy = mpn_add (tp + size, up, usize, vp + size, usize - ediff);
+             rsize = vsize + ediff;
+           }
+       }
+      else
+       {
+         /* uuuu     */
+         /*      vv  */
+         mp_size_t size;
+         size = vsize + ediff - usize;
+         MPN_COPY (tp, vp, vsize);
+         MPN_ZERO (tp + vsize, ediff - usize);
+         MPN_COPY (tp + size, up, usize);
+         cy = 0;
+         rsize = size + usize;
+       }
+
+      MPN_COPY (rp, tp, rsize);
+      rp[rsize] = cy;
+      rsize += cy;
+      uexp += cy;
+    }
+
+  r->_mp_size = negate ? -rsize : rsize;
+  r->_mp_exp = uexp;
+  TMP_FREE;
+}
diff --git a/mpf/add_ui.c b/mpf/add_ui.c

new file mode 100644 (file)

index 0000000..549ca99
--- /dev/null
+++ b/mpf/add_ui.c
@@ -0,0 +1,142 @@
+/* mpf_add_ui -- Add a float and an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_add_ui (mpf_ptr sum, mpf_srcptr u, unsigned long int v)
+{
+  mp_srcptr up = u->_mp_d;
+  mp_ptr sump = sum->_mp_d;
+  mp_size_t usize, sumsize;
+  mp_size_t prec = sum->_mp_prec;
+  mp_exp_t uexp = u->_mp_exp;
+
+  usize = u->_mp_size;
+  if (usize <= 0)
+    {
+      if (usize == 0)
+       {
+         mpf_set_ui (sum, v);
+         return;
+       }
+      else
+       {
+         __mpf_struct u_negated;
+         u_negated._mp_size = -usize;
+         u_negated._mp_exp = u->_mp_exp;
+         u_negated._mp_d = u->_mp_d;
+         mpf_sub_ui (sum, &u_negated, v);
+         sum->_mp_size = -(sum->_mp_size);
+         return;
+       }
+    }
+
+  if (v == 0)
+    {
+    sum_is_u:
+      if (u != sum)
+       {
+         sumsize = MIN (usize, prec + 1);
+         MPN_COPY (sum->_mp_d, up + usize - sumsize, sumsize);
+         sum->_mp_size = sumsize;
+         sum->_mp_exp = u->_mp_exp;
+       }
+      return;
+    }
+
+  if (uexp > 0)
+    {
+      /* U >= 1.  */
+      if (uexp > prec)
+       {
+         /* U >> V, V is not part of final result.  */
+         goto sum_is_u;
+       }
+      else
+       {
+         /* U's "limb point" is somewhere between the first limb
+            and the PREC:th limb.
+            Both U and V are part of the final result.  */
+         if (uexp > usize)
+           {
+             /*   uuuuuu0000. */
+             /* +          v. */
+             /* We begin with moving U to the top of SUM, to handle
+                samevar(U,SUM).  */
+             MPN_COPY_DECR (sump + uexp - usize, up, usize);
+             sump[0] = v;
+             MPN_ZERO (sump + 1, uexp - usize - 1);
+#if 0 /* What is this??? */
+             if (sum == u)
+               MPN_COPY (sum->_mp_d, sump, uexp);
+#endif
+             sum->_mp_size = uexp;
+             sum->_mp_exp = uexp;
+           }
+         else
+           {
+             /*   uuuuuu.uuuu */
+             /* +      v.     */
+             mp_limb_t cy_limb;
+             if (usize > prec)
+               {
+                 /* Ignore excess limbs in U.  */
+                 up += usize - prec;
+                 usize -= usize - prec; /* Eq. usize = prec */
+               }
+             if (sump != up)
+               MPN_COPY_INCR (sump, up, usize - uexp);
+             cy_limb = mpn_add_1 (sump + usize - uexp, up + usize - uexp,
+                                  uexp, (mp_limb_t) v);
+             sump[usize] = cy_limb;
+             sum->_mp_size = usize + cy_limb;
+             sum->_mp_exp = uexp + cy_limb;
+           }
+       }
+    }
+  else
+    {
+      /* U < 1, so V > U for sure.  */
+      /* v.         */
+      /*  .0000uuuu */
+      if ((-uexp) >= prec)
+       {
+         sump[0] = v;
+         sum->_mp_size = 1;
+         sum->_mp_exp = 1;
+       }
+      else
+       {
+         if (usize + (-uexp) + 1 > prec)
+           {
+             /* Ignore excess limbs in U.  */
+             up += usize + (-uexp) + 1 - prec;
+             usize -= usize + (-uexp) + 1 - prec;
+           }
+         if (sump != up)
+           MPN_COPY_INCR (sump, up, usize);
+         MPN_ZERO (sump + usize, -uexp);
+         sump[usize + (-uexp)] = v;
+         sum->_mp_size = usize + (-uexp) + 1;
+         sum->_mp_exp = 1;
+       }
+    }
+}
diff --git a/mpf/ceilfloor.c b/mpf/ceilfloor.c

new file mode 100644 (file)

index 0000000..a0c5d77
--- /dev/null
+++ b/mpf/ceilfloor.c
@@ -0,0 +1,115 @@
+/* mpf_ceil, mpf_floor -- round an mpf to an integer.
+
+Copyright 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* dir==1 for ceil, dir==-1 for floor
+
+   Notice the use of prec+1 ensures mpf_ceil and mpf_floor are equivalent to
+   mpf_set if u is already an integer.  */
+
+static void __gmpf_ceil_or_floor __GMP_PROTO ((REGPARM_2_1 (mpf_ptr, mpf_srcptr, int))) REGPARM_ATTR (1);
+#define mpf_ceil_or_floor(r,u,dir)  __gmpf_ceil_or_floor (REGPARM_2_1 (r, u, dir))
+
+REGPARM_ATTR (1) static void
+mpf_ceil_or_floor (mpf_ptr r, mpf_srcptr u, int dir)
+{
+  mp_ptr     rp, up, p;
+  mp_size_t  size, asize, prec;
+  mp_exp_t   exp;
+
+  size = SIZ(u);
+  if (size == 0)
+    {
+    zero:
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+
+  rp = PTR(r);
+  exp = EXP(u);
+  if (exp <= 0)
+    {
+      /* u is only a fraction */
+      if ((size ^ dir) < 0)
+        goto zero;
+      rp[0] = 1;
+      EXP(r) = 1;
+      SIZ(r) = dir;
+      return;
+    }
+  EXP(r) = exp;
+
+  up = PTR(u);
+  asize = ABS (size);
+  up += asize;
+
+  /* skip fraction part of u */
+  asize = MIN (asize, exp);
+
+  /* don't lose precision in the copy */
+  prec = PREC (r) + 1;
+
+  /* skip excess over target precision */
+  asize = MIN (asize, prec);
+
+  up -= asize;
+
+  if ((size ^ dir) >= 0)
+    {
+      /* rounding direction matches sign, must increment if ignored part is
+         non-zero */
+      for (p = PTR(u); p != up; p++)
+        {
+          if (*p != 0)
+            {
+              if (mpn_add_1 (rp, up, asize, CNST_LIMB(1)))
+                {
+                  /* was all 0xFF..FFs, which have become zeros, giving just
+                     a carry */
+                  rp[0] = 1;
+                  asize = 1;
+                  EXP(r)++;
+                }
+              SIZ(r) = (size >= 0 ? asize : -asize);
+              return;
+            }
+        }
+    }
+
+  SIZ(r) = (size >= 0 ? asize : -asize);
+  if (rp != up)
+    MPN_COPY_INCR (rp, up, asize);
+}
+
+
+void
+mpf_ceil (mpf_ptr r, mpf_srcptr u)
+{
+  mpf_ceil_or_floor (r, u, 1);
+}
+
+void
+mpf_floor (mpf_ptr r, mpf_srcptr u)
+{
+  mpf_ceil_or_floor (r, u, -1);
+}
diff --git a/mpf/clear.c b/mpf/clear.c

new file mode 100644 (file)

index 0000000..78fc138
--- /dev/null
+++ b/mpf/clear.c
@@ -0,0 +1,28 @@
+/* mpf_clear -- de-allocate the space occupied by the dynamic digit space of
+   an integer.
+
+Copyright 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_clear (mpf_ptr m)
+{
+  (*__gmp_free_func) (m->_mp_d, (m->_mp_prec + 1) * BYTES_PER_MP_LIMB);
+}
diff --git a/mpf/clears.c b/mpf/clears.c

new file mode 100644 (file)

index 0000000..8d43006
--- /dev/null
+++ b/mpf/clears.c
@@ -0,0 +1,56 @@
+/* mpf_clears() -- Clear multiple mpf_t variables.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+mpf_clears (mpf_ptr x, ...)
+#else
+mpf_clears (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+
+#if HAVE_STDARG
+  va_start (ap, x);
+#else
+  mpf_ptr x;
+  va_start (ap);
+  x = va_arg (ap, mpf_ptr);
+#endif
+
+  while (x != NULL)
+    {
+      mpf_clear (x);
+      x = va_arg (ap, mpf_ptr);
+    }
+  va_end (ap);
+}
diff --git a/mpf/cmp.c b/mpf/cmp.c

new file mode 100644 (file)

index 0000000..9a3c7ae
--- /dev/null
+++ b/mpf/cmp.c
@@ -0,0 +1,106 @@
+/* mpf_cmp -- Compare two floats.
+
+Copyright 1993, 1994, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpf_cmp (mpf_srcptr u, mpf_srcptr v) __GMP_NOTHROW
+{
+  mp_srcptr up, vp;
+  mp_size_t usize, vsize;
+  mp_exp_t uexp, vexp;
+  int cmp;
+  int usign;
+
+  uexp = u->_mp_exp;
+  vexp = v->_mp_exp;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+
+  /* 1. Are the signs different?  */
+  if ((usize ^ vsize) >= 0)
+    {
+      /* U and V are both non-negative or both negative.  */
+      if (usize == 0)
+       /* vsize >= 0 */
+       return -(vsize != 0);
+      if (vsize == 0)
+       /* usize >= 0 */
+       return usize != 0;
+      /* Fall out.  */
+    }
+  else
+    {
+      /* Either U or V is negative, but not both.  */
+      return usize >= 0 ? 1 : -1;
+    }
+
+  /* U and V have the same sign and are both non-zero.  */
+
+  usign = usize >= 0 ? 1 : -1;
+
+  /* 2. Are the exponents different?  */
+  if (uexp > vexp)
+    return usign;
+  if (uexp < vexp)
+    return -usign;
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+#define STRICT_MPF_NORMALIZATION 0
+#if ! STRICT_MPF_NORMALIZATION
+  /* Ignore zeroes at the low end of U and V.  */
+  while (up[0] == 0)
+    {
+      up++;
+      usize--;
+    }
+  while (vp[0] == 0)
+    {
+      vp++;
+      vsize--;
+    }
+#endif
+
+  if (usize > vsize)
+    {
+      cmp = mpn_cmp (up + usize - vsize, vp, vsize);
+      if (cmp == 0)
+       return usign;
+    }
+  else if (vsize > usize)
+    {
+      cmp = mpn_cmp (up, vp + vsize - usize, usize);
+      if (cmp == 0)
+       return -usign;
+    }
+  else
+    {
+      cmp = mpn_cmp (up, vp, usize);
+      if (cmp == 0)
+       return 0;
+    }
+  return cmp > 0 ? usign : -usign;
+}
diff --git a/mpf/cmp_d.c b/mpf/cmp_d.c

new file mode 100644 (file)

index 0000000..09f1b65
--- /dev/null
+++ b/mpf/cmp_d.c
@@ -0,0 +1,49 @@
+/* mpf_cmp_d -- compare mpf and double.
+
+Copyright 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpf_cmp_d (mpf_srcptr f, double d)
+{
+  mp_limb_t  darray[LIMBS_PER_DOUBLE];
+  mpf_t      df;
+
+  /* d=NaN has no sensible return value, so raise an exception.
+     d=Inf or -Inf is always bigger than z.  */
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         return (d < 0.0 ? 1 : -1));
+
+  if (d == 0.0)
+    return SIZ(f);
+
+  PTR(df) = darray;
+  SIZ(df) = (d >= 0.0 ? LIMBS_PER_DOUBLE : -LIMBS_PER_DOUBLE);
+  EXP(df) = __gmp_extract_double (darray, ABS(d));
+
+  return mpf_cmp (f, df);
+}
diff --git a/mpf/cmp_si.c b/mpf/cmp_si.c

new file mode 100644 (file)

index 0000000..9b364a3
--- /dev/null
+++ b/mpf/cmp_si.c
@@ -0,0 +1,108 @@
+/* mpf_cmp_si -- Compare a float with a signed integer.
+
+Copyright 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpf_cmp_si (mpf_srcptr u, long int vval) __GMP_NOTHROW
+{
+  mp_srcptr up;
+  mp_size_t usize;
+  mp_exp_t uexp;
+  mp_limb_t ulimb;
+  int usign;
+  unsigned long abs_vval;
+
+  uexp = u->_mp_exp;
+  usize = u->_mp_size;
+
+  /* 1. Are the signs different?  */
+  if ((usize < 0) == (vval < 0)) /* don't use xor, type size may differ */
+    {
+      /* U and V are both non-negative or both negative.  */
+      if (usize == 0)
+       /* vval >= 0 */
+       return -(vval != 0);
+      if (vval == 0)
+       /* usize >= 0 */
+       return usize != 0;
+      /* Fall out.  */
+    }
+  else
+    {
+      /* Either U or V is negative, but not both.  */
+      return usize >= 0 ? 1 : -1;
+    }
+
+  /* U and V have the same sign and are both non-zero.  */
+
+  usign = usize >= 0 ? 1 : -1;
+  usize = ABS (usize);
+  abs_vval = ABS_CAST (unsigned long, vval);
+
+  /* 2. Are the exponents different (V's exponent == 1)?  */
+#if GMP_NAIL_BITS != 0
+  if (uexp > 1 + (abs_vval > GMP_NUMB_MAX))
+    return usign;
+  if (uexp < 1 + (abs_vval > GMP_NUMB_MAX))
+    return -usign;
+#else
+  if (uexp > 1)
+    return usign;
+  if (uexp < 1)
+    return -usign;
+#endif
+
+  up = u->_mp_d;
+
+  ulimb = up[usize - 1];
+#if GMP_NAIL_BITS != 0
+  if (usize >= 2 && uexp == 2)
+    {
+      if ((ulimb >> GMP_NAIL_BITS) != 0)
+       return usign;
+      ulimb = (ulimb << GMP_NUMB_BITS) | up[usize - 2];
+      usize--;
+    }
+#endif
+  usize--;
+
+  /* 3. Compare the most significant mantissa limb with V.  */
+  if (ulimb > abs_vval)
+    return usign;
+  else if (ulimb < abs_vval)
+    return -usign;
+
+  /* Ignore zeroes at the low end of U.  */
+  while (*up == 0)
+    {
+      up++;
+      usize--;
+    }
+
+  /* 4. Now, if the number of limbs are different, we have a difference
+     since we have made sure the trailing limbs are not zero.  */
+  if (usize > 0)
+    return usign;
+
+  /* Wow, we got zero even if we tried hard to avoid it.  */
+  return 0;
+}
diff --git a/mpf/cmp_ui.c b/mpf/cmp_ui.c

new file mode 100644 (file)

index 0000000..5e5ed00
--- /dev/null
+++ b/mpf/cmp_ui.c
@@ -0,0 +1,89 @@
+/* mpf_cmp_ui -- Compare a float with an unsigned integer.
+
+Copyright 1993, 1994, 1995, 1999, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpf_cmp_ui (mpf_srcptr u, unsigned long int vval) __GMP_NOTHROW
+{
+  mp_srcptr up;
+  mp_size_t usize;
+  mp_exp_t uexp;
+  mp_limb_t ulimb;
+
+  uexp = u->_mp_exp;
+  usize = u->_mp_size;
+
+  /* 1. Is U negative?  */
+  if (usize < 0)
+    return -1;
+  /* We rely on usize being non-negative in the code that follows.  */
+
+  if (vval == 0)
+    return usize != 0;
+
+  /* 2. Are the exponents different (V's exponent == 1)?  */
+#if GMP_NAIL_BITS != 0
+  if (uexp > 1 + (vval > GMP_NUMB_MAX))
+    return 1;
+  if (uexp < 1 + (vval > GMP_NUMB_MAX))
+    return -1;
+#else
+  if (uexp > 1)
+    return 1;
+  if (uexp < 1)
+    return -1;
+#endif
+
+  up = u->_mp_d;
+
+  ulimb = up[usize - 1];
+#if GMP_NAIL_BITS != 0
+  if (usize >= 2 && uexp == 2)
+    {
+      if ((ulimb >> GMP_NAIL_BITS) != 0)
+       return 1;
+      ulimb = (ulimb << GMP_NUMB_BITS) | up[usize - 2];
+      usize--;
+    }
+#endif
+  usize--;
+
+  /* 3. Compare the most significant mantissa limb with V.  */
+  if (ulimb > vval)
+    return 1;
+  else if (ulimb < vval)
+    return -1;
+
+  /* Ignore zeroes at the low end of U.  */
+  while (*up == 0)
+    {
+      up++;
+      usize--;
+    }
+
+  /* 4. Now, if the number of limbs are different, we have a difference
+     since we have made sure the trailing limbs are not zero.  */
+  if (usize > 0)
+    return 1;
+
+  /* Wow, we got zero even if we tried hard to avoid it.  */
+  return 0;
+}
diff --git a/mpf/div.c b/mpf/div.c

new file mode 100644 (file)

index 0000000..8f3abc6
--- /dev/null
+++ b/mpf/div.c
@@ -0,0 +1,126 @@
+/* mpf_div -- Divide two floats.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Not done:
+
+   No attempt is made to identify an overlap u==v.  The result will be
+   correct (1.0), but a full actual division is done whereas of course
+   x/x==1 needs no work.  Such a call is not a sensible thing to make, and
+   it's left to an application to notice and optimize if it might arise
+   somehow through pointer aliasing or whatever.
+
+   Enhancements:
+
+   The high quotient limb is non-zero when high{up,vsize} >= {vp,vsize}.  We
+   could make that comparison and use qsize==prec instead of qsize==prec+1,
+   to save one limb in the division.
+
+   If r==u but the size is enough bigger than prec that there won't be an
+   overlap between quotient and dividend in mpn_tdiv_qr, then we can avoid
+   copying up,usize.  This would only arise from a prec reduced with
+   mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if
+   it could be worked into the copy_u decision cleanly.  */
+
+void
+mpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp, new_vp;
+  mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros;
+  mp_size_t sign_quotient, prec, high_zero, chop;
+  mp_exp_t rexp;
+  int copy_u;
+  TMP_DECL;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+  sign_quotient = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  prec = PREC(r);
+
+  if (vsize == 0)
+    DIVIDE_BY_ZERO;
+
+  if (usize == 0)
+    {
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+
+  TMP_MARK;
+  rexp = EXP(u) - EXP(v) + 1;
+
+  rp = PTR(r);
+  up = PTR(u);
+  vp = PTR(v);
+
+  prospective_rsize = usize - vsize + 1; /* quot from using given u,v sizes */
+  rsize = prec + 1;                     /* desired quot */
+
+  zeros = rsize - prospective_rsize;    /* padding u to give rsize */
+  copy_u = (zeros > 0 || rp == up);     /* copy u if overlap or padding */
+
+  chop = MAX (-zeros, 0);               /* negative zeros means shorten u */
+  up += chop;
+  usize -= chop;
+  zeros += chop;                        /* now zeros >= 0 */
+
+  tsize = usize + zeros;                /* size for possible copy of u */
+
+  /* copy and possibly extend u if necessary */
+  if (copy_u)
+    {
+      tp = TMP_ALLOC_LIMBS (tsize + 1);        /* +1 for mpn_div_q's scratch needs */
+      MPN_ZERO (tp, zeros);
+      MPN_COPY (tp+zeros, up, usize);
+      up = tp;
+      usize = tsize;
+    }
+  else
+    {
+      tp = TMP_ALLOC_LIMBS (usize + 1);
+    }
+
+  /* ensure divisor doesn't overlap quotient */
+  if (rp == vp)
+    {
+      new_vp = TMP_ALLOC_LIMBS (vsize);
+      MPN_COPY (new_vp, vp, vsize);
+      vp = new_vp;
+    }
+
+  ASSERT (usize-vsize+1 == rsize);
+  mpn_div_q (rp, up, usize, vp, vsize, tp);
+
+  /* strip possible zero high limb */
+  high_zero = (rp[rsize-1] == 0);
+  rsize -= high_zero;
+  rexp -= high_zero;
+
+  SIZ(r) = sign_quotient >= 0 ? rsize : -rsize;
+  EXP(r) = rexp;
+  TMP_FREE;
+}
diff --git a/mpf/div_2exp.c b/mpf/div_2exp.c

new file mode 100644 (file)

index 0000000..f74cd8b
--- /dev/null
+++ b/mpf/div_2exp.c
@@ -0,0 +1,129 @@
+/* mpf_div_2exp -- Divide a float by 2^n.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Multiples of GMP_NUMB_BITS in exp simply mean an amount subtracted from
+   EXP(u) to set EXP(r).  The remainder exp%GMP_NUMB_BITS is then a right
+   shift for the limb data.
+
+   If exp%GMP_NUMB_BITS == 0 then there's no shifting, we effectively just
+   do an mpz_set with changed EXP(r).  Like mpz_set we take prec+1 limbs in
+   this case.  Although just prec would suffice, it's nice to have
+   mpf_div_2exp with exp==0 come out the same as mpz_set.
+
+   When shifting we take up to prec many limbs from the input.  Our shift is
+   cy = mpn_rshift (PTR(r)+1, PTR(u)+k, ...), where k is the number of low
+   limbs dropped from u, and the carry out is stored to PTR(r)[0].  We don't
+   try to work extra bits from PTR(u)[k-1] (when k>=1 makes it available)
+   into that low carry limb.  Just prec limbs (with the high non-zero) from
+   the input is enough bits for the application requested precision, no need
+   to do extra work.
+
+   If r==u the shift will have overlapping operands.  When k>=1 (ie. when
+   usize > prec), the overlap is in the style supported by rshift (ie. dst
+   <= src).
+
+   But when r==u and k==0 (ie. usize <= prec), we would have an invalid
+   overlap (mpn_rshift (rp+1, rp, ...)).  In this case we must instead use
+   mpn_lshift (PTR(r), PTR(u), size, NUMB-shift).  An lshift by NUMB-shift
+   bits gives identical data of course, it's just its overlap restrictions
+   which differ.
+
+   In both shift cases, the resulting data is abs_usize+1 limbs.  "adj" is
+   used to add +1 to that size if the high is non-zero (it may of course
+   have become zero by the shifting).  EXP(u) is the exponent just above
+   those abs_usize+1 limbs, so it gets -1+adj, which means -1 if the high is
+   zero, or no change if the high is non-zero.
+
+   Enhancements:
+
+   The way mpn_lshift is used means successive mpf_div_2exp calls on the
+   same operand will accumulate low zero limbs, until prec+1 limbs is
+   reached.  This is wasteful for subsequent operations.  When abs_usize <=
+   prec, we should test the low exp%GMP_NUMB_BITS many bits of PTR(u)[0],
+   ie. those which would be shifted out by an mpn_rshift.  If they're zero
+   then use that mpn_rshift.  */
+
+void
+mpf_div_2exp (mpf_ptr r, mpf_srcptr u, mp_bitcnt_t exp)
+{
+  mp_srcptr up;
+  mp_ptr rp = r->_mp_d;
+  mp_size_t usize;
+  mp_size_t abs_usize;
+  mp_size_t prec = r->_mp_prec;
+  mp_exp_t uexp = u->_mp_exp;
+
+  usize = u->_mp_size;
+
+  if (UNLIKELY (usize == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  abs_usize = ABS (usize);
+  up = u->_mp_d;
+
+  if (exp % GMP_NUMB_BITS == 0)
+    {
+      prec++;                  /* retain more precision here as we don't need
+                                  to account for carry-out here */
+      if (abs_usize > prec)
+       {
+         up += abs_usize - prec;
+         abs_usize = prec;
+       }
+      if (rp != up)
+       MPN_COPY_INCR (rp, up, abs_usize);
+      r->_mp_exp = uexp - exp / GMP_NUMB_BITS;
+    }
+  else
+    {
+      mp_limb_t cy_limb;
+      mp_size_t adj;
+      if (abs_usize > prec)
+       {
+         up += abs_usize - prec;
+         abs_usize = prec;
+         /* Use mpn_rshift since mpn_lshift operates downwards, and we
+            therefore would clobber part of U before using that part, in case
+            R is the same variable as U.  */
+         cy_limb = mpn_rshift (rp + 1, up, abs_usize, exp % GMP_NUMB_BITS);
+         rp[0] = cy_limb;
+         adj = rp[abs_usize] != 0;
+       }
+      else
+       {
+         cy_limb = mpn_lshift (rp, up, abs_usize,
+                               GMP_NUMB_BITS - exp % GMP_NUMB_BITS);
+         rp[abs_usize] = cy_limb;
+         adj = cy_limb != 0;
+       }
+
+      abs_usize += adj;
+      r->_mp_exp = uexp - exp / GMP_NUMB_BITS - 1 + adj;
+    }
+  r->_mp_size = usize >= 0 ? abs_usize : -abs_usize;
+}
diff --git a/mpf/div_ui.c b/mpf/div_ui.c

new file mode 100644 (file)

index 0000000..2f4de15
--- /dev/null
+++ b/mpf/div_ui.c
@@ -0,0 +1,99 @@
+/* mpf_div_ui -- Divide a float with an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpf_div_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
+{
+  mp_srcptr up;
+  mp_ptr rp, tp, rtp;
+  mp_size_t usize;
+  mp_size_t rsize, tsize;
+  mp_size_t sign_quotient;
+  mp_size_t prec;
+  mp_limb_t q_limb;
+  mp_exp_t rexp;
+  TMP_DECL;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpf_t vf;
+      mp_limb_t vl[2];
+      SIZ(vf) = 2;
+      EXP(vf) = 2;
+      PTR(vf) = vl;
+      vl[0] = v & GMP_NUMB_MASK;
+      vl[1] = v >> GMP_NUMB_BITS;
+      mpf_div (r, u, vf);
+      return;
+    }
+#endif
+
+  usize = u->_mp_size;
+  sign_quotient = usize;
+  usize = ABS (usize);
+  prec = r->_mp_prec;
+
+  if (v == 0)
+    DIVIDE_BY_ZERO;
+
+  if (usize == 0)
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  rp = r->_mp_d;
+  up = u->_mp_d;
+
+  tsize = 1 + prec;
+  tp = TMP_ALLOC_LIMBS (tsize + 1);
+
+  if (usize > tsize)
+    {
+      up += usize - tsize;
+      usize = tsize;
+      rtp = tp;
+    }
+  else
+    {
+      MPN_ZERO (tp, tsize - usize);
+      rtp = tp + (tsize - usize);
+    }
+
+  /* Move the dividend to the remainder.  */
+  MPN_COPY (rtp, up, usize);
+
+  mpn_divmod_1 (rp, tp, tsize, (mp_limb_t) v);
+  q_limb = rp[tsize - 1];
+
+  rsize = tsize - (q_limb == 0);
+  rexp = u->_mp_exp - (q_limb == 0);
+  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;
+  r->_mp_exp = rexp;
+  TMP_FREE;
+}
diff --git a/mpf/dump.c b/mpf/dump.c

new file mode 100644 (file)

index 0000000..ded3606
--- /dev/null
+++ b/mpf/dump.c
@@ -0,0 +1,42 @@
+/* mpf_dump -- Dump a float to stdout.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h> /* for strlen */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_dump (mpf_srcptr u)
+{
+  mp_exp_t exp;
+  char *str;
+
+  str = mpf_get_str (0, &exp, 10, 0, u);
+  if (str[0] == '-')
+    printf ("-0.%se%ld\n", str + 1, exp);
+  else
+    printf ("0.%se%ld\n", str, exp);
+  (*__gmp_free_func) (str, strlen (str) + 1);
+}
diff --git a/mpf/eq.c b/mpf/eq.c

new file mode 100644 (file)

index 0000000..cdbbcb9
--- /dev/null
+++ b/mpf/eq.c
@@ -0,0 +1,139 @@
+/* mpf_eq -- Compare two floats up to a specified bit #.
+
+Copyright 1993, 1995, 1996, 2001, 2002, 2008, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+int
+mpf_eq (mpf_srcptr u, mpf_srcptr v, mp_bitcnt_t n_bits)
+{
+  mp_srcptr up, vp, p;
+  mp_size_t usize, vsize, minsize, maxsize, n_limbs, i, size;
+  mp_exp_t uexp, vexp;
+  mp_limb_t diff;
+  int cnt;
+
+  uexp = u->_mp_exp;
+  vexp = v->_mp_exp;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+
+  /* 1. Are the signs different?  */
+  if ((usize ^ vsize) >= 0)
+    {
+      /* U and V are both non-negative or both negative.  */
+      if (usize == 0)
+       return vsize == 0;
+      if (vsize == 0)
+       return 0;
+
+      /* Fall out.  */
+    }
+  else
+    {
+      /* Either U or V is negative, but not both.  */
+      return 0;
+    }
+
+  /* U and V have the same sign and are both non-zero.  */
+
+  /* 2. Are the exponents different?  */
+  if (uexp != vexp)
+    return 0;
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  up += usize;                 /* point just above most significant limb */
+  vp += vsize;                 /* point just above most significant limb */
+
+  count_leading_zeros (cnt, up[-1]);
+  if ((vp[-1] >> (GMP_LIMB_BITS - 1 - cnt)) != 1)
+    return 0;                  /* msb positions different */
+
+  n_bits += cnt - GMP_NAIL_BITS;
+  n_limbs = (n_bits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
+
+  usize = MIN (usize, n_limbs);
+  vsize = MIN (vsize, n_limbs);
+
+#if 0
+  /* Ignore zeros at the low end of U and V.  */
+  while (up[0] == 0)
+    up++, usize--;
+  while (vp[0] == 0)
+    vp++, vsize--;
+#endif
+
+  minsize = MIN (usize, vsize);
+  maxsize = usize + vsize - minsize;
+
+  up -= minsize;               /* point at most significant common limb */
+  vp -= minsize;               /* point at most significant common limb */
+
+  /* Compare the most significant part which has explicit limbs for U and V. */
+  for (i = minsize - 1; i > 0; i--)
+    {
+      if (up[i] != vp[i])
+       return 0;
+    }
+
+  n_bits -= (maxsize - 1) * GMP_NUMB_BITS;
+
+  size = maxsize - minsize;
+  if (size != 0)
+    {
+      if (up[0] != vp[0])
+       return 0;
+
+      /* Now either U or V has its limbs consumed, i.e, continues with an
+        infinite number of implicit zero limbs.  Check that the other operand
+        has just zeros in the corresponding, relevant part.  */
+
+      if (usize > vsize)
+       p = up - size;
+      else
+       p = vp - size;
+
+      for (i = size - 1; i > 0; i--)
+       {
+         if (p[i] != 0)
+           return 0;
+       }
+
+      diff = p[0];
+    }
+  else
+    {
+      /* Both U or V has its limbs consumed.  */
+
+      diff = up[0] ^ vp[0];
+    }
+
+  if (n_bits < GMP_NUMB_BITS)
+    diff >>= GMP_NUMB_BITS - n_bits;
+
+  return diff == 0;
+}
diff --git a/mpf/fits_s.h b/mpf/fits_s.h

new file mode 100644 (file)

index 0000000..f10f2c7
--- /dev/null
+++ b/mpf/fits_s.h
@@ -0,0 +1,64 @@
+/* mpf_fits_s*_p -- test whether an mpf fits a C signed type.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Notice this is equivalent to mpz_set_f + mpz_fits_s*_p.  */
+
+int
+FUNCTION (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_size_t  fs, fn;
+  mp_srcptr  fp;
+  mp_exp_t   exp;
+  mp_limb_t  fl;
+
+  fs = SIZ(f);
+  if (fs == 0)
+    return 1;  /* zero fits */
+
+  exp = EXP(f);
+  if (exp < 1)
+    return 1;  /* -1 < f < 1 truncates to zero, so fits */
+
+  fp = PTR(f);
+  fn = ABS (fs);
+
+  if (exp == 1)
+    {
+      fl = fp[fn-1];
+    }
+#if GMP_NAIL_BITS != 0
+  else if (exp == 2 && MAXIMUM > GMP_NUMB_MAX)
+    {
+      fl = fp[fn-1];
+      if ((fl >> GMP_NAIL_BITS) != 0)
+       return 0;
+      fl = (fl << GMP_NUMB_BITS);
+      if (fn >= 2)
+        fl |= fp[fn-2];
+    }
+#endif
+  else
+    return 0;
+
+  return fl <= (fs >= 0 ? (mp_limb_t) MAXIMUM : - (mp_limb_t) MINIMUM);
+}
diff --git a/mpf/fits_sint.c b/mpf/fits_sint.c

new file mode 100644 (file)

index 0000000..c8b245a
--- /dev/null
+++ b/mpf/fits_sint.c
@@ -0,0 +1,25 @@
+/* mpf_fits_sint_p -- test whether an mpf fits an int.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION   mpf_fits_sint_p
+#define MAXIMUM    INT_MAX
+#define MINIMUM    INT_MIN
+
+#include "fits_s.h"
diff --git a/mpf/fits_slong.c b/mpf/fits_slong.c

new file mode 100644 (file)

index 0000000..42f5416
--- /dev/null
+++ b/mpf/fits_slong.c
@@ -0,0 +1,25 @@
+/* mpf_fits_slong_p -- test whether an mpf fits a long.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION   mpf_fits_slong_p
+#define MAXIMUM    LONG_MAX
+#define MINIMUM    LONG_MIN
+
+#include "fits_s.h"
diff --git a/mpf/fits_sshort.c b/mpf/fits_sshort.c

new file mode 100644 (file)

index 0000000..bd2492b
--- /dev/null
+++ b/mpf/fits_sshort.c
@@ -0,0 +1,25 @@
+/* mpf_fits_sshort_p -- test whether an mpf fits a short.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION   mpf_fits_sshort_p
+#define MAXIMUM    SHRT_MAX
+#define MINIMUM    SHRT_MIN
+
+#include "fits_s.h"
diff --git a/mpf/fits_u.h b/mpf/fits_u.h

new file mode 100644 (file)

index 0000000..4503f03
--- /dev/null
+++ b/mpf/fits_u.h
@@ -0,0 +1,63 @@
+/* mpf_fits_u*_p -- test whether an mpf fits a C unsigned type.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Notice this is equivalent to mpz_set_f + mpz_fits_u*_p.  */
+
+int
+FUNCTION (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_size_t  fn;
+  mp_srcptr  fp;
+  mp_exp_t   exp;
+  mp_limb_t  fl;
+
+  fn = SIZ(f);
+  if (fn <= 0)
+    return fn == 0;  /* zero fits, negatives don't */
+
+  exp = EXP(f);
+  if (exp < 1)
+    return 1;  /* 0 < f < 1 truncates to zero, so fits */
+
+  fp = PTR(f);
+
+  if (exp == 1)
+    {
+      fl = fp[fn-1];
+    }
+#if GMP_NAIL_BITS != 0
+  else if (exp == 2 && MAXIMUM > GMP_NUMB_MAX)
+    {
+      fl = fp[fn-1];
+      if ((fl >> GMP_NAIL_BITS) != 0)
+       return 0;
+      fl = (fl << GMP_NUMB_BITS);
+      if (fn >= 2)
+        fl |= fp[fn-2];
+    }
+#endif
+  else
+    return 0;
+
+  return fl <= MAXIMUM;
+}
diff --git a/mpf/fits_uint.c b/mpf/fits_uint.c

new file mode 100644 (file)

index 0000000..c2ae6fc
--- /dev/null
+++ b/mpf/fits_uint.c
@@ -0,0 +1,24 @@
+/* mpf_fits_uint_p -- test whether an mpf fits an unsigned int.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpf_fits_uint_p
+#define MAXIMUM   UINT_MAX
+
+#include "fits_u.h"
diff --git a/mpf/fits_ulong.c b/mpf/fits_ulong.c

new file mode 100644 (file)

index 0000000..6fd4b8d
--- /dev/null
+++ b/mpf/fits_ulong.c
@@ -0,0 +1,24 @@
+/* mpf_fits_ulong_p -- test whether an mpf fits an unsigned long.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpf_fits_ulong_p
+#define MAXIMUM   ULONG_MAX
+
+#include "fits_u.h"
diff --git a/mpf/fits_ushort.c b/mpf/fits_ushort.c

new file mode 100644 (file)

index 0000000..8228c79
--- /dev/null
+++ b/mpf/fits_ushort.c
@@ -0,0 +1,24 @@
+/* mpf_fits_ushort_p -- test whether an mpf fits an unsigned short.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpf_fits_ushort_p
+#define MAXIMUM   USHRT_MAX
+
+#include "fits_u.h"
diff --git a/mpf/get_d.c b/mpf/get_d.c

new file mode 100644 (file)

index 0000000..7be1c6b
--- /dev/null
+++ b/mpf/get_d.c
@@ -0,0 +1,36 @@
+/* double mpf_get_d (mpf_t src) -- return SRC truncated to a double.
+
+Copyright 1996, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+double
+mpf_get_d (mpf_srcptr src)
+{
+  mp_size_t  size, abs_size;
+  long       exp;
+
+  size = SIZ (src);
+  if (UNLIKELY (size == 0))
+    return 0.0;
+
+  abs_size = ABS (size);
+  exp = (EXP (src) - abs_size) * GMP_NUMB_BITS;
+  return mpn_get_d (PTR (src), abs_size, size, exp);
+}
diff --git a/mpf/get_d_2exp.c b/mpf/get_d_2exp.c

new file mode 100644 (file)

index 0000000..a097ab6
--- /dev/null
+++ b/mpf/get_d_2exp.c
@@ -0,0 +1,50 @@
+/* double mpf_get_d_2exp (signed long int *exp, mpf_t src).
+
+Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+double
+mpf_get_d_2exp (signed long int *exp2, mpf_srcptr src)
+{
+  mp_size_t size, abs_size;
+  mp_srcptr ptr;
+  int cnt;
+  long exp;
+
+  size = SIZ(src);
+  if (UNLIKELY (size == 0))
+    {
+      *exp2 = 0;
+      return 0.0;
+    }
+
+  ptr = PTR(src);
+  abs_size = ABS (size);
+  count_leading_zeros (cnt, ptr[abs_size - 1]);
+  cnt -= GMP_NAIL_BITS;
+
+  exp = EXP(src) * GMP_NUMB_BITS - cnt;
+  *exp2 = exp;
+
+  return mpn_get_d (ptr, abs_size, (mp_size_t) 0,
+                    (long) - (abs_size * GMP_NUMB_BITS - cnt));
+}
diff --git a/mpf/get_dfl_prec.c b/mpf/get_dfl_prec.c

new file mode 100644 (file)

index 0000000..14606f0
--- /dev/null
+++ b/mpf/get_dfl_prec.c
@@ -0,0 +1,28 @@
+/* mpf_get_default_prec -- return default precision in bits.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_bitcnt_t
+mpf_get_default_prec (void) __GMP_NOTHROW
+{
+  return __GMPF_PREC_TO_BITS (__gmp_default_fp_limb_precision);
+}
diff --git a/mpf/get_prc.c b/mpf/get_prc.c

new file mode 100644 (file)

index 0000000..ca7a056
--- /dev/null
+++ b/mpf/get_prc.c
@@ -0,0 +1,27 @@
+/* mpf_get_prec(x) -- Return the precision in bits of x.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_bitcnt_t
+mpf_get_prec (mpf_srcptr x) __GMP_NOTHROW
+{
+  return __GMPF_PREC_TO_BITS (x->_mp_prec);
+}
diff --git a/mpf/get_si.c b/mpf/get_si.c

new file mode 100644 (file)

index 0000000..e3d18e8
--- /dev/null
+++ b/mpf/get_si.c
@@ -0,0 +1,76 @@
+/* mpf_get_si -- mpf to long conversion
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Any fraction bits are truncated, meaning simply discarded.
+
+   For values bigger than a long, the low bits are returned, like
+   mpz_get_si, but this isn't documented.
+
+   Notice this is equivalent to mpz_set_f + mpz_get_si.
+
+
+   Implementation:
+
+   fl is established in basically the same way as for mpf_get_ui, see that
+   code for explanations of the conditions.
+
+   However unlike mpf_get_ui we need an explicit return 0 for exp<=0.  When
+   f is a negative fraction (ie. size<0 and exp<=0) we can't let fl==0 go
+   through to the zany final "~ ((fl - 1) & LONG_MAX)", that would give
+   -0x80000000 instead of the desired 0.  */
+
+long
+mpf_get_si (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_exp_t exp;
+  mp_size_t size, abs_size;
+  mp_srcptr fp;
+  mp_limb_t fl;
+
+  exp = EXP (f);
+  size = SIZ (f);
+  fp = PTR (f);
+
+  /* fraction alone truncates to zero
+     this also covers zero, since we have exp==0 for zero */
+  if (exp <= 0)
+    return 0L;
+
+  /* there are some limbs above the radix point */
+
+  fl = 0;
+  abs_size = ABS (size);
+  if (abs_size >= exp)
+    fl = fp[abs_size-exp];
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (exp > 1 && abs_size+1 >= exp)
+    fl |= fp[abs_size - exp + 1] << GMP_NUMB_BITS;
+#endif
+
+  if (size > 0)
+    return fl & LONG_MAX;
+  else
+    /* this form necessary to correctly handle -0x80..00 */
+    return -1 - (long) ((fl - 1) & LONG_MAX);
+}
diff --git a/mpf/get_str.c b/mpf/get_str.c

new file mode 100644 (file)

index 0000000..447bfdb
--- /dev/null
+++ b/mpf/get_str.c
@@ -0,0 +1,317 @@
+/* mpf_get_str (digit_ptr, exp, base, n_digits, a) -- Convert the floating
+   point number A to a base BASE number and store N_DIGITS raw digits at
+   DIGIT_PTR, and the base BASE exponent in the word pointed to by EXP.  For
+   example, the number 3.1416 would be returned as "31416" in DIGIT_PTR and
+   1 in EXP.
+
+Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2006 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>            /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"          /* for count_leading_zeros */
+
+/* Could use some more work.
+
+   1. Allocation is excessive.  Try to combine areas.  Perhaps use result
+      string area for temp limb space?
+   2. We generate up to two limbs of extra digits.  This is because we don't
+      check the exact number of bits in the input operand, and from that
+      compute an accurate exponent (variable e in the code).  It would be
+      cleaner and probably somewhat faster to change this.
+*/
+
+/* Compute base^exp and return the most significant prec limbs in rp[].
+   Put the count of omitted low limbs in *ign.
+   Return the actual size (which might be less than prec).
+   Allocation of rp[] and the temporary tp[] should be 2*prec+2 limbs.  */
+static mp_size_t
+mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
+                   mp_limb_t base, unsigned long exp,
+                   mp_size_t prec, mp_ptr tp)
+{
+  mp_size_t ign;               /* counts number of ignored low limbs in r */
+  mp_size_t off;               /* keeps track of offset where value starts */
+  mp_ptr passed_rp = rp;
+  mp_size_t rn;
+  int cnt;
+  int i;
+
+  if (exp == 0)
+    {
+      rp[0] = 1;
+      *ignp = 0;
+      return 1;
+    }
+
+  rp[0] = base;
+  rn = 1;
+  off = 0;
+  ign = 0;
+  count_leading_zeros (cnt, exp);
+  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
+    {
+      mpn_sqr (tp, rp + off, rn);
+      rn = 2 * rn;
+      rn -= tp[rn - 1] == 0;
+      ign <<= 1;
+
+      off = 0;
+      if (rn > prec)
+       {
+         ign += rn - prec;
+         off = rn - prec;
+         rn = prec;
+       }
+      MP_PTR_SWAP (rp, tp);
+
+      if (((exp >> i) & 1) != 0)
+       {
+         mp_limb_t cy;
+         cy = mpn_mul_1 (rp, rp + off, rn, base);
+         rp[rn] = cy;
+         rn += cy != 0;
+         off = 0;
+       }
+    }
+
+  if (rn > prec)
+    {
+      ASSERT (rn == prec + 1);
+
+      ign += rn - prec;
+      rp += rn - prec;
+      rn = prec;
+    }
+
+  /* With somewhat less than 50% probability, we can skip this copy.  */
+  if (passed_rp != rp + off)
+    MPN_COPY_INCR (passed_rp, rp + off, rn);
+  *ignp = ign;
+  return rn;
+}
+
+char *
+mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
+{
+  mp_exp_t ue;
+  mp_size_t n_limbs_needed;
+  size_t max_digits;
+  mp_ptr up, pp, tp;
+  mp_size_t un, pn, tn;
+  unsigned char *tstr;
+  mp_exp_t exp_in_base;
+  size_t n_digits_computed;
+  mp_size_t i;
+  const char *num_to_text;
+  size_t alloc_size = 0;
+  char *dp;
+  TMP_DECL;
+
+  up = PTR(u);
+  un = ABSIZ(u);
+  ue = EXP(u);
+
+  if (base >= 0)
+    {
+      num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+      if (base == 0)
+       base = 10;
+      else if (base > 36)
+       {
+         num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+         if (base > 62)
+           return NULL;
+       }
+    }
+  else
+    {
+      base = -base;
+      num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    }
+
+  MPF_SIGNIFICANT_DIGITS (max_digits, base, PREC(u));
+  if (n_digits == 0 || n_digits > max_digits)
+    n_digits = max_digits;
+
+  if (dbuf == 0)
+    {
+      /* We didn't get a string from the user.  Allocate one (and return
+        a pointer to it) with space for `-' and terminating null.  */
+      alloc_size = n_digits + 2;
+      dbuf = (char *) (*__gmp_allocate_func) (n_digits + 2);
+    }
+
+  if (un == 0)
+    {
+      *exp = 0;
+      *dbuf = 0;
+      n_digits = 0;
+      goto done;
+    }
+
+  TMP_MARK;
+
+  /* Allocate temporary digit space.  We can't put digits directly in the user
+     area, since we generate more digits than requested.  (We allocate
+     2 * GMP_LIMB_BITS extra bytes because of the digit block nature of the
+     conversion.)  */
+  tstr = (unsigned char *) TMP_ALLOC (n_digits + 2 * GMP_LIMB_BITS + 3);
+
+  n_limbs_needed = 2 + (mp_size_t)
+    (n_digits / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+
+  if (ue <= n_limbs_needed)
+    {
+      /* We need to multiply number by base^n to get an n_digits integer part.  */
+      mp_size_t n_more_limbs_needed, ign, off;
+      unsigned long e;
+
+      n_more_limbs_needed = n_limbs_needed - ue;
+      e = (unsigned long) n_more_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);
+
+      if (un > n_limbs_needed)
+       {
+         up += un - n_limbs_needed;
+         un = n_limbs_needed;
+       }
+      pp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);
+      tp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);
+
+      pn = mpn_pow_1_highpart (pp, &ign, (mp_limb_t) base, e, n_limbs_needed, tp);
+      if (un > pn)
+       mpn_mul (tp, up, un, pp, pn);   /* FIXME: mpn_mul_highpart */
+      else
+       mpn_mul (tp, pp, pn, up, un);   /* FIXME: mpn_mul_highpart */
+      tn = un + pn;
+      tn -= tp[tn - 1] == 0;
+      off = un - ue - ign;
+      if (off < 0)
+       {
+         MPN_COPY_DECR (tp - off, tp, tn);
+         MPN_ZERO (tp, -off);
+         tn -= off;
+         off = 0;
+       }
+      n_digits_computed = mpn_get_str (tstr, base, tp + off, tn - off);
+
+      exp_in_base = n_digits_computed - e;
+    }
+  else
+    {
+      /* We need to divide number by base^n to get an n_digits integer part.  */
+      mp_size_t n_less_limbs_needed, ign, off, xn;
+      unsigned long e;
+      mp_ptr dummyp, xp;
+
+      n_less_limbs_needed = ue - n_limbs_needed;
+      e = (unsigned long) n_less_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);
+
+      if (un > n_limbs_needed)
+       {
+         up += un - n_limbs_needed;
+         un = n_limbs_needed;
+       }
+      pp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);
+      tp = TMP_ALLOC_LIMBS (2 * n_limbs_needed + 2);
+
+      pn = mpn_pow_1_highpart (pp, &ign, (mp_limb_t) base, e, n_limbs_needed, tp);
+
+      xn = n_limbs_needed + (n_less_limbs_needed-ign);
+      xp = TMP_ALLOC_LIMBS (xn);
+      off = xn - un;
+      MPN_ZERO (xp, off);
+      MPN_COPY (xp + off, up, un);
+
+      dummyp = TMP_ALLOC_LIMBS (pn);
+      mpn_tdiv_qr (tp, dummyp, (mp_size_t) 0, xp, xn, pp, pn);
+      tn = xn - pn + 1;
+      tn -= tp[tn - 1] == 0;
+      n_digits_computed = mpn_get_str (tstr, base, tp, tn);
+
+      exp_in_base = n_digits_computed + e;
+    }
+
+  /* We should normally have computed too many digits.  Round the result
+     at the point indicated by n_digits.  */
+  if (n_digits_computed > n_digits)
+    {
+      size_t i;
+      /* Round the result.  */
+      if (tstr[n_digits] * 2 >= base)
+       {
+         n_digits_computed = n_digits;
+         for (i = n_digits - 1;; i--)
+           {
+             unsigned int x;
+             x = ++(tstr[i]);
+             if (x != base)
+               break;
+             n_digits_computed--;
+             if (i == 0)
+               {
+                 /* We had something like `bbbbbbb...bd', where 2*d >= base
+                    and `b' denotes digit with significance base - 1.
+                    This rounds up to `1', increasing the exponent.  */
+                 tstr[0] = 1;
+                 n_digits_computed = 1;
+                 exp_in_base++;
+                 break;
+               }
+           }
+       }
+    }
+
+  /* We might have fewer digits than requested as a result of rounding above,
+     (i.e. 0.999999 => 1.0) or because we have a number that simply doesn't
+     need many digits in this base (e.g., 0.125 in base 10).  */
+  if (n_digits > n_digits_computed)
+    n_digits = n_digits_computed;
+
+  /* Remove trailing 0.  There can be many zeros.  */
+  while (n_digits != 0 && tstr[n_digits - 1] == 0)
+    n_digits--;
+
+  dp = dbuf + (SIZ(u) < 0);
+
+  /* Translate to ASCII and copy to result string.  */
+  for (i = 0; i < n_digits; i++)
+    dp[i] = num_to_text[tstr[i]];
+  dp[n_digits] = 0;
+
+  *exp = exp_in_base;
+
+  if (SIZ(u) < 0)
+    {
+      dbuf[0] = '-';
+      n_digits++;
+    }
+
+  TMP_FREE;
+
+ done:
+  /* If the string was alloced then resize it down to the actual space
+     required.  */
+  if (alloc_size != 0)
+    {
+      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (dbuf, alloc_size, n_digits + 1, char);
+    }
+
+  return dbuf;
+}
diff --git a/mpf/get_ui.c b/mpf/get_ui.c

new file mode 100644 (file)

index 0000000..0d909d4
--- /dev/null
+++ b/mpf/get_ui.c
@@ -0,0 +1,91 @@
+/* mpf_get_ui -- mpf to ulong conversion
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Any fraction bits are truncated, meaning simply discarded.
+
+   For values bigger than a ulong, the low bits are returned (the low
+   absolute value bits actually), like mpz_get_ui, but this isn't
+   documented.
+
+   Notice this is equivalent to mpz_set_f + mpz_get_ui.
+
+
+   Implementation:
+
+   The limb just above the radix point for us to extract is ptr[size-exp].
+
+   We need to check that the size-exp index falls in our available data
+   range, 0 to size-1 inclusive.  We test this without risk of an overflow
+   involving exp by requiring size>=exp (giving size-exp >= 0) and exp>0
+   (giving size-exp <= size-1).
+
+   Notice if size==0 there's no fetch, since of course size>=exp and exp>0
+   can only be true if size>0.  So there's no special handling for size==0,
+   it comes out as 0 the same as any other time we have no data at our
+   target index.
+
+   For nails, the second limb above the radix point is also required, this
+   is ptr[size-exp+1].
+
+   Again we need to check that size-exp+1 falls in our data range, 0 to
+   size-1 inclusive.  We test without risk of overflow by requiring
+   size+1>=exp (giving size-exp+1 >= 0) and exp>1 (giving size-exp+1 <=
+   size-1).
+
+   And again if size==0 these second fetch conditions are not satisfied
+   either since size+1>=exp and exp>1 are only true if size>0.
+
+   The code is arranged with exp>0 wrapping the exp>1 test since exp>1 is
+   mis-compiled by alpha gcc prior to version 3.4.  It re-writes it as
+   exp-1>0, which is incorrect when exp==MP_EXP_T_MIN.  By having exp>0
+   tested first we ensure MP_EXP_T_MIN doesn't reach exp>1.  */
+
+unsigned long
+mpf_get_ui (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  mp_srcptr fp;
+  mp_limb_t fl;
+
+  exp = EXP (f);
+  size = SIZ (f);
+  fp = PTR (f);
+
+  fl = 0;
+  if (exp > 0)
+    {
+      /* there are some limbs above the radix point */
+
+      size = ABS (size);
+      if (size >= exp)
+        fl = fp[size-exp];
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+      if (exp > 1 && size+1 >= exp)
+        fl += (fp[size-exp+1] << GMP_NUMB_BITS);
+#endif
+    }
+
+  return (unsigned long) fl;
+}
diff --git a/mpf/init.c b/mpf/init.c

new file mode 100644 (file)

index 0000000..fae6f19
--- /dev/null
+++ b/mpf/init.c
@@ -0,0 +1,31 @@
+/* mpf_init() -- Make a new multiple precision number with value 0.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_init (mpf_ptr r)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  r->_mp_size = 0;
+  r->_mp_exp = 0;
+  r->_mp_prec = prec;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+}
diff --git a/mpf/init2.c b/mpf/init2.c

new file mode 100644 (file)

index 0000000..8298a6b
--- /dev/null
+++ b/mpf/init2.c
@@ -0,0 +1,33 @@
+/* mpf_init2() -- Make a new multiple precision number with value 0.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_init2 (mpf_ptr r, mp_bitcnt_t prec_in_bits)
+{
+  mp_size_t prec;
+
+  prec = __GMPF_BITS_TO_PREC (prec_in_bits);
+  r->_mp_size = 0;
+  r->_mp_exp = 0;
+  r->_mp_prec = prec;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+}
diff --git a/mpf/inits.c b/mpf/inits.c

new file mode 100644 (file)

index 0000000..33471f6
--- /dev/null
+++ b/mpf/inits.c
@@ -0,0 +1,56 @@
+/* mpf_inits() -- Initialize multiple mpf_t variables and set them to 0.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+mpf_inits (mpf_ptr x, ...)
+#else
+mpf_inits (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+
+#if HAVE_STDARG
+  va_start (ap, x);
+#else
+  mpf_ptr x;
+  va_start (ap);
+  x = va_arg (ap, mpf_ptr);
+#endif
+
+  while (x != NULL)
+    {
+      mpf_init (x);
+      x = va_arg (ap, mpf_ptr);
+    }
+  va_end (ap);
+}
diff --git a/mpf/inp_str.c b/mpf/inp_str.c

new file mode 100644 (file)

index 0000000..042a20d
--- /dev/null
+++ b/mpf/inp_str.c
@@ -0,0 +1,82 @@
+/* mpf_inp_str(dest_float, stream, base) -- Input a number in base
+   BASE from stdio stream STREAM and store the result in DEST_FLOAT.
+
+Copyright 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+mpf_inp_str (mpf_ptr rop, FILE *stream, int base)
+{
+  char *str;
+  size_t alloc_size, str_size;
+  int c;
+  int res;
+  size_t nread;
+
+  if (stream == 0)
+    stream = stdin;
+
+  alloc_size = 100;
+  str = (char *) (*__gmp_allocate_func) (alloc_size);
+  str_size = 0;
+  nread = 0;
+
+  /* Skip whitespace.  */
+  do
+    {
+      c = getc (stream);
+      nread++;
+    }
+  while (isspace (c));
+
+  for (;;)
+    {
+      if (str_size >= alloc_size)
+       {
+         size_t old_alloc_size = alloc_size;
+         alloc_size = alloc_size * 3 / 2;
+         str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
+       }
+      if (c == EOF || isspace (c))
+       break;
+      str[str_size++] = c;
+      c = getc (stream);
+    }
+  ungetc (c, stream);
+  nread--;
+
+  if (str_size >= alloc_size)
+    {
+      size_t old_alloc_size = alloc_size;
+      alloc_size = alloc_size * 3 / 2;
+      str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
+    }
+  str[str_size] = 0;
+
+  res = mpf_set_str (rop, str, base);
+  (*__gmp_free_func) (str, alloc_size);
+
+  if (res == -1)
+    return 0;                  /* error */
+
+  return str_size + nread;
+}
diff --git a/mpf/int_p.c b/mpf/int_p.c

new file mode 100644 (file)

index 0000000..3168314
--- /dev/null
+++ b/mpf/int_p.c
@@ -0,0 +1,48 @@
+/* mpf_integer_p -- test whether an mpf is an integer */
+
+/*
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+mpf_integer_p (mpf_srcptr f) __GMP_NOTHROW
+{
+  mp_srcptr ptr;
+  mp_exp_t exp;
+  mp_size_t size, frac, i;
+
+  size = SIZ (f);
+  if (size == 0)
+    return 1;  /* zero is an integer */
+
+  exp = EXP (f);
+  if (exp <= 0)
+    return 0;  /* has only fraction limbs */
+
+  /* any fraction limbs must be zero */
+  frac = ABS (size) - exp;
+  ptr = PTR (f);
+  for (i = 0; i < frac; i++)
+    if (ptr[i] != 0)
+      return 0;
+
+  return 1;
+}
diff --git a/mpf/iset.c b/mpf/iset.c

new file mode 100644 (file)

index 0000000..60ccebc
--- /dev/null
+++ b/mpf/iset.c
@@ -0,0 +1,51 @@
+/* mpf_init_set -- Initialize a float and assign it from another float.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_init_set (mpf_ptr r, mpf_srcptr s)
+{
+  mp_ptr rp, sp;
+  mp_size_t ssize, size;
+  mp_size_t prec;
+
+  prec = __gmp_default_fp_limb_precision;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+  r->_mp_prec = prec;
+
+  prec++;              /* lie not to lose precision in assignment */
+  ssize = s->_mp_size;
+  size = ABS (ssize);
+
+  rp = r->_mp_d;
+  sp = s->_mp_d;
+
+  if (size > prec)
+    {
+      sp += size - prec;
+      size = prec;
+    }
+
+  r->_mp_exp = s->_mp_exp;
+  r->_mp_size = ssize >= 0 ? size : -size;
+
+  MPN_COPY (rp, sp, size);
+}
diff --git a/mpf/iset_d.c b/mpf/iset_d.c

new file mode 100644 (file)

index 0000000..e0ac141
--- /dev/null
+++ b/mpf/iset_d.c
@@ -0,0 +1,31 @@
+/* mpf_init_set_d -- Initialize a float and assign it from a double.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_init_set_d (mpf_ptr r, double val)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  r->_mp_prec = prec;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+
+  mpf_set_d (r, val);
+}
diff --git a/mpf/iset_si.c b/mpf/iset_si.c

new file mode 100644 (file)

index 0000000..a689d0d
--- /dev/null
+++ b/mpf/iset_si.c
@@ -0,0 +1,47 @@
+/* mpf_init_set_si() -- Initialize a float and assign it from a signed int.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2003, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_init_set_si (mpf_ptr r, long int val)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  mp_size_t size;
+  mp_limb_t vl;
+
+  r->_mp_prec = prec;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  r->_mp_d[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  vl >>= GMP_NUMB_BITS;
+  r->_mp_d[1] = vl;
+  size += (vl != 0);
+#endif
+
+  r->_mp_exp = size;
+  r->_mp_size = val >= 0 ? size : -size;
+}
diff --git a/mpf/iset_str.c b/mpf/iset_str.c

new file mode 100644 (file)

index 0000000..82a6f2e
--- /dev/null
+++ b/mpf/iset_str.c
@@ -0,0 +1,33 @@
+/* mpf_init_set_str -- Initialize a float and assign it from a string.
+
+Copyright 1995, 1996, 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpf_init_set_str (mpf_ptr r, const char *s, int base)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  r->_mp_size = 0;
+  r->_mp_exp = 0;
+  r->_mp_prec = prec;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+
+  return mpf_set_str (r, s, base);
+}
diff --git a/mpf/iset_ui.c b/mpf/iset_ui.c

new file mode 100644 (file)

index 0000000..4ac1771
--- /dev/null
+++ b/mpf/iset_ui.c
@@ -0,0 +1,43 @@
+/* mpf_init_set_ui() -- Initialize a float and assign it from an unsigned int.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2003, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_init_set_ui (mpf_ptr r, unsigned long int val)
+{
+  mp_size_t prec = __gmp_default_fp_limb_precision;
+  mp_size_t size;
+
+  r->_mp_prec = prec;
+  r->_mp_d = (mp_ptr) (*__gmp_allocate_func) ((prec + 1) * BYTES_PER_MP_LIMB);
+  r->_mp_d[0] = val & GMP_NUMB_MASK;
+  size = (val != 0);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  val >>= GMP_NUMB_BITS;
+  r->_mp_d[1] = val;
+  size += (val != 0);
+#endif
+
+  r->_mp_size = size;
+  r->_mp_exp = size;
+}
diff --git a/mpf/mul.c b/mpf/mul.c

new file mode 100644 (file)

index 0000000..0082aa4
--- /dev/null
+++ b/mpf/mul.c
@@ -0,0 +1,85 @@
+/* mpf_mul -- Multiply two floats.
+
+Copyright 1993, 1994, 1996, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_mul (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_size_t usize, vsize;
+  mp_size_t sign_product;
+  mp_size_t prec = r->_mp_prec;
+  TMP_DECL;
+
+  TMP_MARK;
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+  sign_product = usize ^ vsize;
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  if (usize > prec)
+    {
+      up += usize - prec;
+      usize = prec;
+    }
+  if (vsize > prec)
+    {
+      vp += vsize - prec;
+      vsize = prec;
+    }
+
+  if (usize == 0 || vsize == 0)
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;          /* ??? */
+    }
+  else
+    {
+      mp_size_t rsize;
+      mp_limb_t cy_limb;
+      mp_ptr rp, tp;
+      mp_size_t adj;
+
+      rsize = usize + vsize;
+      tp = TMP_ALLOC_LIMBS (rsize);
+      cy_limb = (usize >= vsize
+                ? mpn_mul (tp, up, usize, vp, vsize)
+                : mpn_mul (tp, vp, vsize, up, usize));
+
+      adj = cy_limb == 0;
+      rsize -= adj;
+      prec++;
+      if (rsize > prec)
+       {
+         tp += rsize - prec;
+         rsize = prec;
+       }
+      rp = r->_mp_d;
+      MPN_COPY (rp, tp, rsize);
+      r->_mp_exp = u->_mp_exp + v->_mp_exp - adj;
+      r->_mp_size = sign_product >= 0 ? rsize : -rsize;
+    }
+  TMP_FREE;
+}
diff --git a/mpf/mul_2exp.c b/mpf/mul_2exp.c

new file mode 100644 (file)

index 0000000..5ec70e4
--- /dev/null
+++ b/mpf/mul_2exp.c
@@ -0,0 +1,123 @@
+/* mpf_mul_2exp -- Multiply a float by 2^n.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Multiples of GMP_NUMB_BITS in exp simply mean an amount added to EXP(u)
+   to set EXP(r).  The remainder exp%GMP_NUMB_BITS is then a left shift for
+   the limb data.
+
+   If exp%GMP_NUMB_BITS == 0 then there's no shifting, we effectively just
+   do an mpz_set with changed EXP(r).  Like mpz_set we take prec+1 limbs in
+   this case.  Although just prec would suffice, it's nice to have
+   mpf_mul_2exp with exp==0 come out the same as mpz_set.
+
+   When shifting we take up to prec many limbs from the input.  Our shift is
+   cy = mpn_lshift (PTR(r), PTR(u)+k, size, ...), where k is the number of
+   low limbs dropped from u, and the carry out is stored to PTR(r)[size].
+
+   It may be noted that the low limb PTR(r)[0] doesn't incorporate bits from
+   PTR(u)[k-1] (when k>=1 makes that limb available).  Taking just prec
+   limbs from the input (with the high non-zero) is enough bits for the
+   application requested precision, there's no need for extra work.
+
+   If r==u the shift will have overlapping operands.  When k==0 (ie. when
+   usize <= prec), the overlap is supported by lshift (ie. dst == src).
+
+   But when r==u and k>=1 (ie. usize > prec), we would have an invalid
+   overlap (ie. mpn_lshift (rp, rp+k, ...)).  In this case we must instead
+   use mpn_rshift (PTR(r)+1, PTR(u)+k, size, NUMB-shift) with the carry out
+   stored to PTR(r)[0].  An rshift by NUMB-shift bits like this gives
+   identical data, it's just its overlap restrictions which differ.
+
+   Enhancements:
+
+   The way mpn_lshift is used means successive mpf_mul_2exp calls on the
+   same operand will accumulate low zero limbs, until prec+1 limbs is
+   reached.  This is wasteful for subsequent operations.  When abs_usize <=
+   prec, we should test the low exp%GMP_NUMB_BITS many bits of PTR(u)[0],
+   ie. those which would be shifted out by an mpn_rshift.  If they're zero
+   then use that mpn_rshift.  */
+
+void
+mpf_mul_2exp (mpf_ptr r, mpf_srcptr u, mp_bitcnt_t exp)
+{
+  mp_srcptr up;
+  mp_ptr rp = r->_mp_d;
+  mp_size_t usize;
+  mp_size_t abs_usize;
+  mp_size_t prec = r->_mp_prec;
+  mp_exp_t uexp = u->_mp_exp;
+
+  usize = u->_mp_size;
+
+  if (UNLIKELY (usize == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  abs_usize = ABS (usize);
+  up = u->_mp_d;
+
+  if (exp % GMP_NUMB_BITS == 0)
+    {
+      prec++;                  /* retain more precision here as we don't need
+                                  to account for carry-out here */
+      if (abs_usize > prec)
+       {
+         up += abs_usize - prec;
+         abs_usize = prec;
+       }
+      if (rp != up)
+       MPN_COPY_INCR (rp, up, abs_usize);
+      r->_mp_exp = uexp + exp / GMP_NUMB_BITS;
+    }
+  else
+    {
+      mp_limb_t cy_limb;
+      mp_size_t adj;
+      if (abs_usize > prec)
+       {
+         up += abs_usize - prec;
+         abs_usize = prec;
+         /* Use mpn_rshift since mpn_lshift operates downwards, and we
+            therefore would clobber part of U before using that part, in case
+            R is the same variable as U.  */
+         cy_limb = mpn_rshift (rp + 1, up, abs_usize,
+                               GMP_NUMB_BITS - exp % GMP_NUMB_BITS);
+         rp[0] = cy_limb;
+         adj = rp[abs_usize] != 0;
+       }
+      else
+       {
+         cy_limb = mpn_lshift (rp, up, abs_usize, exp % GMP_NUMB_BITS);
+         rp[abs_usize] = cy_limb;
+         adj = cy_limb != 0;
+       }
+
+      abs_usize += adj;
+      r->_mp_exp = uexp + exp / GMP_NUMB_BITS + adj;
+    }
+  r->_mp_size = usize >= 0 ? abs_usize : -abs_usize;
+}
diff --git a/mpf/mul_ui.c b/mpf/mul_ui.c

new file mode 100644 (file)

index 0000000..96e8012
--- /dev/null
+++ b/mpf/mul_ui.c
@@ -0,0 +1,171 @@
+/* mpf_mul_ui -- Multiply a float and an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The core operation is a multiply of PREC(r) limbs from u by v, producing
+   either PREC(r) or PREC(r)+1 result limbs.  If u is shorter than PREC(r),
+   then we take only as much as it has.  If u is longer we incorporate a
+   carry from the lower limbs.
+
+   If u has just 1 extra limb, then the carry to add is high(up[0]*v).  That
+   is of course what mpn_mul_1 would do if it was called with PREC(r)+1
+   limbs of input.
+
+   If u has more than 1 extra limb, then there can be a further carry bit
+   out of lower uncalculated limbs (the way the low of one product adds to
+   the high of the product below it).  This is of course what an mpn_mul_1
+   would do if it was called with the full u operand.  But we instead work
+   downwards explicitly, until a carry occurs or until a value other than
+   GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate
+   across).
+
+   The carry determination normally requires two umul_ppmm's, only rarely
+   will GMP_NUMB_MAX occur and require further products.
+
+   The carry limb is conveniently added into the mul_1 using mpn_mul_1c when
+   that function exists, otherwise a subsequent mpn_add_1 is needed.
+
+   Clearly when mpn_mul_1c is used the carry must be calculated first.  But
+   this is also the case when add_1 is used, since if r==u and ABSIZ(r) >
+   PREC(r) then the mpn_mul_1 overwrites the low part of the input.
+
+   A reuse r==u with size > prec can occur from a size PREC(r)+1 in the
+   usual way, or it can occur from an mpf_set_prec_raw leaving a bigger
+   sized value.  In both cases we can end up calling mpn_mul_1 with
+   overlapping src and dst regions, but this will be with dst < src and such
+   an overlap is permitted.
+
+   Not done:
+
+   No attempt is made to determine in advance whether the result will be
+   PREC(r) or PREC(r)+1 limbs.  If it's going to be PREC(r)+1 then we could
+   take one less limb from u and generate just PREC(r), that of course
+   satisfying application requested precision.  But any test counting bits
+   or forming the high product would almost certainly take longer than the
+   incremental cost of an extra limb in mpn_mul_1.
+
+   Enhancements:
+
+   Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the
+   result, leaving low zero limbs after a while, which it might be nice to
+   strip to save work in subsequent operations.  Calculating the low limb
+   explicitly would let us direct mpn_mul_1 to put the balance at rp when
+   the low is zero (instead of normally rp+1).  But it's not clear whether
+   this would be worthwhile.  Explicit code for the low limb will probably
+   be slower than having it done in mpn_mul_1, so we need to consider how
+   often a zero will be stripped and how much that's likely to save
+   later.  */
+
+void
+mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
+{
+  mp_srcptr up;
+  mp_size_t usize;
+  mp_size_t size;
+  mp_size_t prec, excess;
+  mp_limb_t cy_limb, vl, cbit, cin;
+  mp_ptr rp;
+
+  usize = u->_mp_size;
+  if (UNLIKELY (v == 0) || UNLIKELY (usize == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpf_t     vf;
+      mp_limb_t vp[2];
+      vp[0] = v & GMP_NUMB_MASK;
+      vp[1] = v >> GMP_NUMB_BITS;
+      PTR(vf) = vp;
+      SIZ(vf) = 2;
+      ASSERT_CODE (PREC(vf) = 2);
+      EXP(vf) = 2;
+      mpf_mul (r, u, vf);
+      return;
+    }
+#endif
+
+  size = ABS (usize);
+  prec = r->_mp_prec;
+  up = u->_mp_d;
+  vl = v;
+  excess = size - prec;
+  cin = 0;
+
+  if (excess > 0)
+    {
+      /* up is bigger than desired rp, shorten it to prec limbs and
+         determine a carry-in */
+
+      mp_limb_t  vl_shifted = vl << GMP_NAIL_BITS;
+      mp_limb_t  hi, lo, next_lo, sum;
+      mp_size_t  i;
+
+      /* high limb of top product */
+      i = excess - 1;
+      umul_ppmm (cin, lo, up[i], vl_shifted);
+
+      /* and carry bit out of products below that, if any */
+      for (;;)
+        {
+          i--;
+          if (i < 0)
+            break;
+
+          umul_ppmm (hi, next_lo, up[i], vl_shifted);
+          lo >>= GMP_NAIL_BITS;
+          ADDC_LIMB (cbit, sum, hi, lo);
+          cin += cbit;
+          lo = next_lo;
+
+          /* Continue only if the sum is GMP_NUMB_MAX.  GMP_NUMB_MAX is the
+             only value a carry from below can propagate across.  If we've
+             just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX,
+             so this test stops us for that case too.  */
+          if (LIKELY (sum != GMP_NUMB_MAX))
+            break;
+        }
+
+      up += excess;
+      size = prec;
+    }
+
+  rp = r->_mp_d;
+#if HAVE_NATIVE_mpn_mul_1c
+  cy_limb = mpn_mul_1c (rp, up, size, vl, cin);
+#else
+  cy_limb = mpn_mul_1 (rp, up, size, vl);
+  __GMPN_ADD_1 (cbit, rp, rp, size, cin);
+  cy_limb += cbit;
+#endif
+  rp[size] = cy_limb;
+  cy_limb = cy_limb != 0;
+  r->_mp_exp = u->_mp_exp + cy_limb;
+  size += cy_limb;
+  r->_mp_size = usize >= 0 ? size : -size;
+}
diff --git a/mpf/neg.c b/mpf/neg.c

new file mode 100644 (file)

index 0000000..c7d7d47
--- /dev/null
+++ b/mpf/neg.c
@@ -0,0 +1,51 @@
+/* mpf_neg -- Negate a float.
+
+Copyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_neg (mpf_ptr r, mpf_srcptr u)
+{
+  mp_size_t size;
+
+  size = -u->_mp_size;
+  if (r != u)
+    {
+      mp_size_t prec;
+      mp_size_t asize;
+      mp_ptr rp, up;
+
+      prec = r->_mp_prec + 1;  /* lie not to lose precision in assignment */
+      asize = ABS (size);
+      rp = r->_mp_d;
+      up = u->_mp_d;
+
+      if (asize > prec)
+       {
+         up += asize - prec;
+         asize = prec;
+       }
+
+      MPN_COPY (rp, up, asize);
+      r->_mp_exp = u->_mp_exp;
+      size = size >= 0 ? asize : -asize;
+    }
+  r->_mp_size = size;
+}
diff --git a/mpf/out_str.c b/mpf/out_str.c

new file mode 100644 (file)

index 0000000..afccdbb
--- /dev/null
+++ b/mpf/out_str.c
@@ -0,0 +1,105 @@
+/* mpf_out_str (stream, base, n_digits, op) -- Print N_DIGITS digits from
+   the float OP to STREAM in base BASE.  Return the number of characters
+   written, or 0 if an error occurred.
+
+Copyright 1996, 1997, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+size_t
+mpf_out_str (FILE *stream, int base, size_t n_digits, mpf_srcptr op)
+{
+  char *str;
+  mp_exp_t exp;
+  size_t written;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (base == 0)
+    base = 10;
+  if (n_digits == 0)
+    MPF_SIGNIFICANT_DIGITS (n_digits, base, op->_mp_prec);
+
+  if (stream == 0)
+    stream = stdout;
+
+  /* Consider these changes:
+     * Don't allocate memory here for huge n_digits; pass NULL to mpf_get_str.
+     * Make mpf_get_str allocate extra space when passed NULL, to avoid
+       allocating two huge string buffers.
+     * Implement more/other allocation reductions tricks.  */
+
+  str = (char *) TMP_ALLOC (n_digits + 2); /* extra for minus sign and \0 */
+
+  mpf_get_str (str, &exp, base, n_digits, op);
+  n_digits = strlen (str);
+
+  written = 0;
+
+  /* Write sign */
+  if (str[0] == '-')
+    {
+      str++;
+      fputc ('-', stream);
+      written = 1;
+      n_digits--;
+    }
+
+  {
+    const char  *point = GMP_DECIMAL_POINT;
+    size_t      pointlen = strlen (point);
+    putc ('0', stream);
+    fwrite (point, 1, pointlen, stream);
+    written += pointlen + 1;
+  }
+
+  /* Write mantissa */
+  {
+    size_t fwret;
+    fwret = fwrite (str, 1, n_digits, stream);
+    written += fwret;
+  }
+
+  /* Write exponent */
+  {
+    int fpret;
+    fpret = fprintf (stream, (base <= 10 ? "e%ld" : "@%ld"), exp);
+    written += fpret;
+  }
+
+  TMP_FREE;
+  return ferror (stream) ? 0 : written;
+}
diff --git a/mpf/pow_ui.c b/mpf/pow_ui.c

new file mode 100644 (file)

index 0000000..5d02914
--- /dev/null
+++ b/mpf/pow_ui.c
@@ -0,0 +1,43 @@
+/* mpf_pow_ui -- Compute b^e.
+
+Copyright 1998, 1999, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
+{
+  mpf_t b2;
+  unsigned long int e2;
+
+  mpf_init2 (b2, mpf_get_prec (r));
+  mpf_set (b2, b);
+  mpf_set_ui (r, 1);
+
+  if ((e & 1) != 0)
+    mpf_set (r, b2);
+  for (e2 = e >> 1; e2 != 0; e2 >>= 1)
+    {
+      mpf_mul (b2, b2, b2);
+      if ((e2 & 1) != 0)
+       mpf_mul (r, r, b2);
+    }
+
+  mpf_clear (b2);
+}
diff --git a/mpf/random2.c b/mpf/random2.c

new file mode 100644 (file)

index 0000000..d1bef10
--- /dev/null
+++ b/mpf/random2.c
@@ -0,0 +1,56 @@
+/* mpf_random2 -- Generate a positive random mpf_t of specified size, with
+   long runs of consecutive ones and zeros in the binary representation.
+   Intended for testing of other MP routines.
+
+Copyright 1995, 1996, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpf_random2 (mpf_ptr x, mp_size_t xs, mp_exp_t exp)
+{
+  mp_size_t xn;
+  mp_size_t prec;
+  mp_limb_t elimb;
+
+  xn = ABS (xs);
+  prec = PREC(x);
+
+  if (xn == 0)
+    {
+      EXP(x) = 0;
+      SIZ(x) = 0;
+      return;
+    }
+
+  if (xn > prec + 1)
+    xn = prec + 1;
+
+  /* General random mantissa.  */
+  mpn_random2 (PTR(x), xn);
+
+  /* Generate random exponent.  */
+  _gmp_rand (&elimb, RANDS, GMP_NUMB_BITS);
+  exp = ABS (exp);
+  exp = elimb % (2 * exp + 1) - exp;
+
+  EXP(x) = exp;
+  SIZ(x) = xs < 0 ? -xn : xn;
+}
diff --git a/mpf/reldiff.c b/mpf/reldiff.c

new file mode 100644 (file)

index 0000000..f9e40b6
--- /dev/null
+++ b/mpf/reldiff.c
@@ -0,0 +1,54 @@
+/* mpf_reldiff -- Generate the relative difference of two floats.
+
+Copyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* The precision we use for d = x-y is based on what mpf_div will want from
+   the dividend.  It calls mpn_tdiv_qr to produce a quotient of rprec+1
+   limbs.  So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize.  */
+
+void
+mpf_reldiff (mpf_t rdiff, mpf_srcptr x, mpf_srcptr y)
+{
+  if (UNLIKELY (SIZ(x) == 0))
+    {
+      mpf_set_ui (rdiff, (unsigned long int) (mpf_sgn (y) != 0));
+    }
+  else
+    {
+      mp_size_t dprec;
+      mpf_t d;
+      TMP_DECL;
+
+      TMP_MARK;
+      dprec = PREC(rdiff) + ABSIZ(x);
+      ASSERT (PREC(rdiff)+1 == dprec - ABSIZ(x) + 1);
+
+      PREC(d) = dprec;
+      PTR(d) = TMP_ALLOC_LIMBS (dprec + 1);
+
+      mpf_sub (d, x, y);
+      SIZ(d) = ABSIZ(d);
+      mpf_div (rdiff, d, x);
+
+      TMP_FREE;
+    }
+}
diff --git a/mpf/set.c b/mpf/set.c

new file mode 100644 (file)

index 0000000..115ab71
--- /dev/null
+++ b/mpf/set.c
@@ -0,0 +1,45 @@
+/* mpf_set -- Assign a float from another float.
+
+Copyright 1993, 1994, 1995, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_set (mpf_ptr r, mpf_srcptr u)
+{
+  mp_ptr rp, up;
+  mp_size_t size, asize;
+  mp_size_t prec;
+
+  prec = r->_mp_prec + 1;              /* lie not to lose precision in assignment */
+  size = u->_mp_size;
+  asize = ABS (size);
+  rp = r->_mp_d;
+  up = u->_mp_d;
+
+  if (asize > prec)
+    {
+      up += asize - prec;
+      asize = prec;
+    }
+
+  r->_mp_exp = u->_mp_exp;
+  r->_mp_size = size >= 0 ? asize : -asize;
+  MPN_COPY_INCR (rp, up, asize);
+}
diff --git a/mpf/set_d.c b/mpf/set_d.c

new file mode 100644 (file)

index 0000000..d72865d
--- /dev/null
+++ b/mpf/set_d.c
@@ -0,0 +1,50 @@
+/* mpf_set_d -- Assign a float from a double.
+
+Copyright 1993, 1994, 1995, 1996, 2001, 2003, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_set_d (mpf_ptr r, double d)
+{
+  int negative;
+
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         __gmp_invalid_operation ());
+
+  if (UNLIKELY (d == 0))
+    {
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+  negative = d < 0;
+  d = ABS (d);
+
+  SIZ(r) = negative ? -LIMBS_PER_DOUBLE : LIMBS_PER_DOUBLE;
+  EXP(r) = __gmp_extract_double (PTR(r), d);
+}
diff --git a/mpf/set_dfl_prec.c b/mpf/set_dfl_prec.c

new file mode 100644 (file)

index 0000000..c209dd9
--- /dev/null
+++ b/mpf/set_dfl_prec.c
@@ -0,0 +1,29 @@
+/* mpf_set_default_prec --
+
+Copyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_size_t __gmp_default_fp_limb_precision = __GMPF_BITS_TO_PREC (53);
+
+void
+mpf_set_default_prec (mp_bitcnt_t prec_in_bits) __GMP_NOTHROW
+{
+  __gmp_default_fp_limb_precision = __GMPF_BITS_TO_PREC (prec_in_bits);
+}
diff --git a/mpf/set_prc.c b/mpf/set_prc.c

new file mode 100644 (file)

index 0000000..873b12e
--- /dev/null
+++ b/mpf/set_prc.c
@@ -0,0 +1,58 @@
+/* mpf_set_prec(x) -- Change the precision of x.
+
+Copyright 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* A full new_prec+1 limbs are always retained, even though just new_prec
+   would satisfy the requested precision.  If size==new_prec+1 then
+   certainly new_prec+1 should be kept since no copying is needed in that
+   case.  If just new_prec was kept for size>new_prec+1 it'd be a bit
+   inconsistent.  */
+
+void
+mpf_set_prec (mpf_ptr x, mp_bitcnt_t new_prec_in_bits)
+{
+  mp_size_t  old_prec, new_prec, new_prec_plus1;
+  mp_size_t  size, sign;
+  mp_ptr     xp;
+
+  new_prec = __GMPF_BITS_TO_PREC (new_prec_in_bits);
+  old_prec = PREC(x);
+
+  /* do nothing if already the right precision */
+  if (new_prec == old_prec)
+    return;
+
+  PREC(x) = new_prec;
+  new_prec_plus1 = new_prec + 1;
+
+  /* retain most significant limbs */
+  sign = SIZ(x);
+  size = ABS (sign);
+  xp = PTR(x);
+  if (size > new_prec_plus1)
+    {
+      SIZ(x) = (sign >= 0 ? new_prec_plus1 : -new_prec_plus1);
+      MPN_COPY_INCR (xp, xp + size - new_prec_plus1, new_prec_plus1);
+    }
+
+  PTR(x) = __GMP_REALLOCATE_FUNC_LIMBS (xp, old_prec+1, new_prec_plus1);
+}
diff --git a/mpf/set_prc_raw.c b/mpf/set_prc_raw.c

new file mode 100644 (file)

index 0000000..0473b5d
--- /dev/null
+++ b/mpf/set_prc_raw.c
@@ -0,0 +1,29 @@
+/* mpf_set_prec_raw(x,bits) -- Change the precision of x without changing
+   allocation.  For proper operation, the original precision need to be reset
+   sooner or later.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_set_prec_raw (mpf_ptr x, mp_bitcnt_t prec_in_bits) __GMP_NOTHROW
+{
+  x->_mp_prec = __GMPF_BITS_TO_PREC (prec_in_bits);
+}
diff --git a/mpf/set_q.c b/mpf/set_q.c

new file mode 100644 (file)

index 0000000..a54aa3b
--- /dev/null
+++ b/mpf/set_q.c
@@ -0,0 +1,144 @@
+/* mpf_set_q (mpf_t rop, mpq_t op) -- Convert the rational op to the float rop.
+
+Copyright 1996, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* As usual the aim is to produce PREC(r) limbs, with the high non-zero.
+   The basic mpn_tdiv_qr produces a quotient of nsize-dsize+1 limbs, with
+   either the high or second highest limb non-zero.  We arrange for
+   nsize-dsize+1 to equal prec+1, hence giving either prec or prec+1 result
+   limbs at PTR(r).
+
+   nsize-dsize+1 == prec+1 is achieved by adjusting num(q), either dropping
+   low limbs if it's too big, or padding with low zeros if it's too small.
+   The full given den(q) is always used.
+
+   We cannot truncate den(q), because even when it's much bigger than prec
+   the last limbs can still influence the final quotient.  Often they don't,
+   but we leave optimization of that to a prospective quotient-only mpn
+   division.
+
+   Not done:
+
+   If den(q) is a power of 2 then we may end up with low zero limbs on the
+   result.  But nothing is done about this, since it should be unlikely on
+   random data, and can be left to an application to call mpf_div_2exp if it
+   might occur with any frequency.
+
+   Enhancements:
+
+   The high quotient limb is non-zero when high{np,dsize} >= {dp,dsize}.  We
+   could make that comparison and use qsize==prec instead of qsize==prec+1,
+   to save one limb in the division.
+
+   Future:
+
+   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of
+   padding n with zeros in temporary space.
+
+   If/when a quotient-only division exists it can be used here immediately.
+   remp is only to satisfy mpn_tdiv_qr, the remainder is not used.  */
+
+void
+mpf_set_q (mpf_t r, mpq_srcptr q)
+{
+  mp_srcptr np, dp;
+  mp_size_t prec, nsize, dsize, qsize, prospective_qsize, tsize, zeros;
+  mp_size_t sign_quotient, high_zero;
+  mp_ptr qp, tp, remp;
+  mp_exp_t exp;
+  TMP_DECL;
+
+  ASSERT (SIZ(&q->_mp_den) > 0);  /* canonical q */
+
+  nsize = SIZ (&q->_mp_num);
+  dsize = SIZ (&q->_mp_den);
+
+  if (UNLIKELY (nsize == 0))
+    {
+      SIZ (r) = 0;
+      EXP (r) = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  prec = PREC (r);
+  qp = PTR (r);
+
+  sign_quotient = nsize;
+  nsize = ABS (nsize);
+  np = PTR (&q->_mp_num);
+  dp = PTR (&q->_mp_den);
+
+  prospective_qsize = nsize - dsize + 1;  /* q from using given n,d sizes */
+  exp = prospective_qsize;                /* ie. number of integer limbs */
+  qsize = prec + 1;                       /* desired q */
+
+  zeros = qsize - prospective_qsize;   /* n zeros to get desired qsize */
+  tsize = nsize + zeros;               /* possible copy of n */
+
+  if (WANT_TMP_DEBUG)
+    {
+      /* separate alloc blocks, for malloc debugging */
+      remp = TMP_ALLOC_LIMBS (dsize);
+      tp = NULL;
+      if (zeros > 0)
+        tp = TMP_ALLOC_LIMBS (tsize);
+    }
+  else
+    {
+      /* one alloc with a conditionalized size, for efficiency */
+      mp_size_t size = dsize + (zeros > 0 ? tsize : 0);
+      remp = TMP_ALLOC_LIMBS (size);
+      tp = remp + dsize;
+    }
+
+  if (zeros > 0)
+    {
+      /* pad n with zeros into temporary space */
+      MPN_ZERO (tp, zeros);
+      MPN_COPY (tp+zeros, np, nsize);
+      np = tp;
+      nsize = tsize;
+    }
+  else
+    {
+      /* shorten n to get desired qsize */
+      nsize += zeros;
+      np -= zeros;
+    }
+
+  ASSERT (nsize-dsize+1 == qsize);
+  mpn_tdiv_qr (qp, remp, (mp_size_t) 0, np, nsize, dp, dsize);
+
+  /* strip possible zero high limb */
+  high_zero = (qp[qsize-1] == 0);
+  qsize -= high_zero;
+  exp -= high_zero;
+
+  EXP (r) = exp;
+  SIZ (r) = sign_quotient >= 0 ? qsize : -qsize;
+
+  TMP_FREE;
+}
diff --git a/mpf/set_si.c b/mpf/set_si.c

new file mode 100644 (file)

index 0000000..aa7b4ee
--- /dev/null
+++ b/mpf/set_si.c
@@ -0,0 +1,43 @@
+/* mpf_set_si() -- Assign a float from a signed int.
+
+Copyright 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_set_si (mpf_ptr dest, long val)
+{
+  mp_size_t size;
+  mp_limb_t vl;
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  vl >>= GMP_NUMB_BITS;
+  dest->_mp_d[1] = vl;
+  size += (vl != 0);
+#endif
+
+  dest->_mp_exp = size;
+  dest->_mp_size = val >= 0 ? size : -size;
+}
diff --git a/mpf/set_str.c b/mpf/set_str.c

new file mode 100644 (file)

index 0000000..01a175f
--- /dev/null
+++ b/mpf/set_str.c
@@ -0,0 +1,391 @@
+/* mpf_set_str (dest, string, base) -- Convert the string STRING
+   in base BASE to a float in dest.  If BASE is zero, the leading characters
+   of STRING is used to figure out the base.
+
+Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2007,
+2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/*
+  This still needs work, as suggested by some FIXME comments.
+  1. Don't depend on superfluous mantissa digits.
+  2. Allocate temp space more cleverly.
+  3. Use mpn_tdiv_qr instead of mpn_lshift+mpn_divrem.
+*/
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+extern const unsigned char __gmp_digit_value_tab[];
+#define digit_value_tab __gmp_digit_value_tab
+
+/* Compute base^exp and return the most significant prec limbs in rp[].
+   Put the count of omitted low limbs in *ign.
+   Return the actual size (which might be less than prec).  */
+static mp_size_t
+mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
+                   mp_limb_t base, mp_exp_t exp,
+                   mp_size_t prec, mp_ptr tp)
+{
+  mp_size_t ign;               /* counts number of ignored low limbs in r */
+  mp_size_t off;               /* keeps track of offset where value starts */
+  mp_ptr passed_rp = rp;
+  mp_size_t rn;
+  int cnt;
+  int i;
+
+  rp[0] = base;
+  rn = 1;
+  off = 0;
+  ign = 0;
+  count_leading_zeros (cnt, exp);
+  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
+    {
+      mpn_sqr (tp, rp + off, rn);
+      rn = 2 * rn;
+      rn -= tp[rn - 1] == 0;
+      ign <<= 1;
+
+      off = 0;
+      if (rn > prec)
+       {
+         ign += rn - prec;
+         off = rn - prec;
+         rn = prec;
+       }
+      MP_PTR_SWAP (rp, tp);
+
+      if (((exp >> i) & 1) != 0)
+       {
+         mp_limb_t cy;
+         cy = mpn_mul_1 (rp, rp + off, rn, base);
+         rp[rn] = cy;
+         rn += cy != 0;
+         off = 0;
+       }
+    }
+
+  if (rn > prec)
+    {
+      ign += rn - prec;
+      rp += rn - prec;
+      rn = prec;
+    }
+
+  MPN_COPY_INCR (passed_rp, rp + off, rn);
+  *ignp = ign;
+  return rn;
+}
+
+int
+mpf_set_str (mpf_ptr x, const char *str, int base)
+{
+  size_t str_size;
+  char *s, *begs;
+  size_t i, j;
+  int c;
+  int negative;
+  char *dotpos = 0;
+  const char *expptr;
+  int exp_base;
+  const char  *point = GMP_DECIMAL_POINT;
+  size_t      pointlen = strlen (point);
+  const unsigned char *digit_value;
+  TMP_DECL;
+
+  c = (unsigned char) *str;
+
+  /* Skip whitespace.  */
+  while (isspace (c))
+    c = (unsigned char) *++str;
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = (unsigned char) *++str;
+    }
+
+  /* Default base to decimal.  */
+  if (base == 0)
+    base = 10;
+
+  exp_base = base;
+
+  if (base < 0)
+    {
+      exp_base = 10;
+      base = -base;
+    }
+
+  digit_value = digit_value_tab;
+  if (base > 36)
+    {
+      /* For bases > 36, use the collating sequence
+        0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
+      digit_value += 224;
+      if (base > 62)
+       return -1;              /* too large base */
+    }
+
+  /* Require at least one digit, possibly after an initial decimal point.  */
+  if (digit_value[c] >= (base == 0 ? 10 : base))
+    {
+      /* not a digit, must be a decimal point */
+      for (i = 0; i < pointlen; i++)
+        if (str[i] != point[i])
+          return -1;
+      if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base))
+       return -1;
+    }
+
+  /* Locate exponent part of the input.  Look from the right of the string,
+     since the exponent is usually a lot shorter than the mantissa.  */
+  expptr = NULL;
+  str_size = strlen (str);
+  for (i = str_size - 1; i > 0; i--)
+    {
+      c = (unsigned char) str[i];
+      if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
+       {
+         expptr = str + i + 1;
+         str_size = i;
+         break;
+       }
+    }
+
+  TMP_MARK;
+  s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+  /* Loop through mantissa, converting it from ASCII to raw byte values.  */
+  for (i = 0; i < str_size; i++)
+    {
+      c = (unsigned char) *str;
+      if (!isspace (c))
+       {
+         int dig;
+
+          for (j = 0; j < pointlen; j++)
+            if (str[j] != point[j])
+              goto not_point;
+          if (1)
+           {
+             if (dotpos != 0)
+               {
+                 /* already saw a decimal point, another is invalid */
+                 TMP_FREE;
+                 return -1;
+               }
+             dotpos = s;
+             str += pointlen - 1;
+             i += pointlen - 1;
+           }
+         else
+           {
+            not_point:
+             dig = digit_value[c];
+             if (dig >= base)
+               {
+                 TMP_FREE;
+                 return -1;
+               }
+             *s++ = dig;
+           }
+       }
+      c = (unsigned char) *++str;
+    }
+
+  str_size = s - begs;
+
+  {
+    long exp_in_base;
+    mp_size_t ra, ma, rn, mn;
+    int cnt;
+    mp_ptr mp, tp, rp;
+    mp_exp_t exp_in_limbs;
+    mp_size_t prec = PREC(x) + 1;
+    int divflag;
+    mp_size_t madj, radj;
+
+#if 0
+    size_t n_chars_needed;
+
+    /* This breaks things like 0.000...0001.  To safely ignore superfluous
+       digits, we need to skip over leading zeros.  */
+    /* Just consider the relevant leading digits of the mantissa.  */
+    n_chars_needed = 2 + (size_t)
+      (((size_t) prec * GMP_NUMB_BITS) * mp_bases[base].chars_per_bit_exactly);
+    if (str_size > n_chars_needed)
+      str_size = n_chars_needed;
+#endif
+
+    ma = 2 + (mp_size_t)
+      (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+    mp = TMP_ALLOC_LIMBS (ma);
+    mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
+
+    if (mn == 0)
+      {
+       SIZ(x) = 0;
+       EXP(x) = 0;
+       TMP_FREE;
+       return 0;
+      }
+
+    madj = 0;
+    /* Ignore excess limbs in MP,MSIZE.  */
+    if (mn > prec)
+      {
+       madj = mn - prec;
+       mp += mn - prec;
+       mn = prec;
+      }
+
+    if (expptr != 0)
+      {
+       /* Scan and convert the exponent, in base exp_base.  */
+       long dig, minus, plusminus;
+       c = (unsigned char) *expptr;
+       minus = -(long) (c == '-');
+       plusminus = minus | -(long) (c == '+');
+       expptr -= plusminus;                    /* conditional increment */
+       c = (unsigned char) *expptr++;
+       dig = digit_value[c];
+       if (dig >= exp_base)
+         {
+           TMP_FREE;
+           return -1;
+         }
+       exp_in_base = dig;
+       c = (unsigned char) *expptr++;
+       dig = digit_value[c];
+       while (dig < exp_base)
+         {
+           exp_in_base = exp_in_base * exp_base;
+           exp_in_base += dig;
+           c = (unsigned char) *expptr++;
+           dig = digit_value[c];
+         }
+       exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
+      }
+    else
+      exp_in_base = 0;
+    if (dotpos != 0)
+      exp_in_base -= s - dotpos;
+    divflag = exp_in_base < 0;
+    exp_in_base = ABS (exp_in_base);
+
+    if (exp_in_base == 0)
+      {
+       MPN_COPY (PTR(x), mp, mn);
+       SIZ(x) = negative ? -mn : mn;
+       EXP(x) = mn + madj;
+       TMP_FREE;
+       return 0;
+      }
+
+    ra = 2 * (prec + 1);
+    rp = TMP_ALLOC_LIMBS (ra);
+    tp = TMP_ALLOC_LIMBS (ra);
+    rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
+
+    if (divflag)
+      {
+#if 0
+       /* FIXME: Should use mpn_tdiv here.  */
+       mpn_tdiv_qr (qp, mp, 0L, mp, mn, rp, rn);
+#else
+       mp_ptr qp;
+       mp_limb_t qlimb;
+       if (mn < rn)
+         {
+           /* Pad out MP,MSIZE for current divrem semantics.  */
+           mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
+           MPN_ZERO (tmp, rn - mn);
+           MPN_COPY (tmp + rn - mn, mp, mn);
+           mp = tmp;
+           madj -= rn - mn;
+           mn = rn;
+         }
+       if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
+         {
+           mp_limb_t cy;
+           count_leading_zeros (cnt, rp[rn - 1]);
+           cnt -= GMP_NAIL_BITS;
+           mpn_lshift (rp, rp, rn, cnt);
+           cy = mpn_lshift (mp, mp, mn, cnt);
+           if (cy)
+             mp[mn++] = cy;
+         }
+
+       qp = TMP_ALLOC_LIMBS (prec + 1);
+       qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
+       tp = qp;
+       exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
+       rn = prec;
+       if (qlimb != 0)
+         {
+           tp[prec] = qlimb;
+           /* Skip the least significant limb not to overrun the destination
+              variable.  */
+           tp++;
+         }
+#endif
+      }
+    else
+      {
+       tp = TMP_ALLOC_LIMBS (rn + mn);
+       if (rn > mn)
+         mpn_mul (tp, rp, rn, mp, mn);
+       else
+         mpn_mul (tp, mp, mn, rp, rn);
+       rn += mn;
+       rn -= tp[rn - 1] == 0;
+       exp_in_limbs = rn + madj + radj;
+
+       if (rn > prec)
+         {
+           tp += rn - prec;
+           rn = prec;
+           exp_in_limbs += 0;
+         }
+      }
+
+    MPN_COPY (PTR(x), tp, rn);
+    SIZ(x) = negative ? -rn : rn;
+    EXP(x) = exp_in_limbs;
+    TMP_FREE;
+    return 0;
+  }
+}
diff --git a/mpf/set_ui.c b/mpf/set_ui.c

new file mode 100644 (file)

index 0000000..3a793c8
--- /dev/null
+++ b/mpf/set_ui.c
@@ -0,0 +1,38 @@
+/* mpf_set_ui() -- Assign a float from an unsigned int.
+
+Copyright 1993, 1994, 1995, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_set_ui (mpf_ptr f, unsigned long val)
+{
+  mp_size_t size;
+
+  f->_mp_d[0] = val & GMP_NUMB_MASK;
+  size = val != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  val >>= GMP_NUMB_BITS;
+  f->_mp_d[1] = val;
+  size += (val != 0);
+#endif
+
+  f->_mp_exp = f->_mp_size = size;
+}
diff --git a/mpf/set_z.c b/mpf/set_z.c

new file mode 100644 (file)

index 0000000..4b0f01b
--- /dev/null
+++ b/mpf/set_z.c
@@ -0,0 +1,46 @@
+/* mpf_set_z -- Assign a float from an integer.
+
+Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_set_z (mpf_ptr r, mpz_srcptr u)
+{
+  mp_ptr rp, up;
+  mp_size_t size, asize;
+  mp_size_t prec;
+
+  prec = PREC (r) + 1;
+  size = SIZ (u);
+  asize = ABS (size);
+  rp = PTR (r);
+  up = PTR (u);
+
+  EXP (r) = asize;
+
+  if (asize > prec)
+    {
+      up += asize - prec;
+      asize = prec;
+    }
+
+  SIZ (r) = size >= 0 ? asize : -asize;
+  MPN_COPY (rp, up, asize);
+}
diff --git a/mpf/size.c b/mpf/size.c

new file mode 100644 (file)

index 0000000..34dfa5f
--- /dev/null
+++ b/mpf/size.c
@@ -0,0 +1,28 @@
+/* mpf_size(x) -- return the number of limbs currently used by the
+   value of the float X.
+
+Copyright 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+mpf_size (mpf_srcptr f) __GMP_NOTHROW
+{
+  return __GMP_ABS (f->_mp_size);
+}
diff --git a/mpf/sqrt.c b/mpf/sqrt.c

new file mode 100644 (file)

index 0000000..dce9aff
--- /dev/null
+++ b/mpf/sqrt.c
@@ -0,0 +1,102 @@
+/* mpf_sqrt -- Compute the square root of a float.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* As usual, the aim is to produce PREC(r) limbs of result, with the high
+   limb non-zero.  This is accomplished by applying mpn_sqrtrem to either
+   2*prec or 2*prec-1 limbs, both such sizes resulting in prec limbs.
+
+   The choice between 2*prec or 2*prec-1 limbs is based on the input
+   exponent.  With b=2^GMP_NUMB_BITS the limb base then we can think of
+   effectively taking out a factor b^(2k), for suitable k, to get to an
+   integer input of the desired size ready for mpn_sqrtrem.  It must be an
+   even power taken out, ie. an even number of limbs, so the square root
+   gives factor b^k and the radix point is still on a limb boundary.  So if
+   EXP(r) is even we'll get an even number of input limbs 2*prec, or if
+   EXP(r) is odd we get an odd number 2*prec-1.
+
+   Further limbs below the 2*prec or 2*prec-1 used don't affect the result
+   and are simply truncated.  This can be seen by considering an integer x,
+   with s=floor(sqrt(x)).  s is the unique integer satisfying s^2 <= x <
+   (s+1)^2.  Notice that adding a fraction part to x (ie. some further bits)
+   doesn't change the inequality, s remains the unique solution.  Working
+   suitable factors of 2 into this argument lets it apply to an intended
+   precision at any position for any x, not just the integer binary point.
+
+   If the input is smaller than 2*prec or 2*prec-1, then we just pad with
+   zeros, that of course being our usual interpretation of short inputs.
+   The effect is to extend the root beyond the size of the input (for
+   instance into fractional limbs if u is an integer).  */
+
+void
+mpf_sqrt (mpf_ptr r, mpf_srcptr u)
+{
+  mp_size_t usize;
+  mp_ptr up, tp;
+  mp_size_t prec, tsize;
+  mp_exp_t uexp, expodd;
+  TMP_DECL;
+
+  usize = u->_mp_size;
+  if (usize <= 0)
+    {
+      if (usize < 0)
+        SQRT_OF_NEGATIVE;
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  uexp = u->_mp_exp;
+  prec = r->_mp_prec;
+  up = u->_mp_d;
+
+  expodd = (uexp & 1);
+  tsize = 2 * prec - expodd;
+  r->_mp_size = prec;
+  r->_mp_exp = (uexp + expodd) / 2;    /* ceil(uexp/2) */
+
+  /* root size is ceil(tsize/2), this will be our desired "prec" limbs */
+  ASSERT ((tsize + 1) / 2 == prec);
+
+  tp = TMP_ALLOC_LIMBS (tsize);
+
+  if (usize > tsize)
+    {
+      up += usize - tsize;
+      usize = tsize;
+      MPN_COPY (tp, up, tsize);
+    }
+  else
+    {
+      MPN_ZERO (tp, tsize - usize);
+      MPN_COPY (tp + (tsize - usize), up, usize);
+    }
+
+  mpn_sqrtrem (r->_mp_d, NULL, tp, tsize);
+
+  TMP_FREE;
+}
diff --git a/mpf/sqrt_ui.c b/mpf/sqrt_ui.c

new file mode 100644 (file)

index 0000000..c322255
--- /dev/null
+++ b/mpf/sqrt_ui.c
@@ -0,0 +1,98 @@
+/* mpf_sqrt_ui -- Compute the square root of an unsigned integer.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* As usual the aim is to produce PREC(r) limbs of result with the high limb
+   non-zero.  That high limb will end up floor(sqrt(u)), and limbs below are
+   produced by padding the input with zeros, two for each desired result
+   limb, being 2*(prec-1) for a total 2*prec-1 limbs passed to mpn_sqrtrem.
+   The way mpn_sqrtrem calculates floor(sqrt(x)) ensures the root is correct
+   to the intended accuracy, ie. truncated to prec limbs.
+
+   With nails, u might be two limbs, in which case a total 2*prec limbs is
+   passed to mpn_sqrtrem (still giving a prec limb result).  If uhigh is
+   zero we adjust back to 2*prec-1, since mpn_sqrtrem requires the high
+   non-zero.  2*prec limbs are always allocated, even when uhigh is zero, so
+   the store of uhigh can be done without a conditional.
+
+   u==0 is a special case so the rest of the code can assume the result is
+   non-zero (ie. will have a non-zero high limb on the result).
+
+   Not done:
+
+   No attempt is made to identify perfect squares.  It's considered this can
+   be left to an application if it might occur with any frequency.  As it
+   stands, mpn_sqrtrem does its normal amount of work on a perfect square
+   followed by zero limbs, though of course only an mpn_sqrtrem1 would be
+   actually needed.  We also end up leaving our mpf result with lots of low
+   trailing zeros, slowing down subsequent operations.
+
+   We're not aware of any optimizations that can be made using the fact the
+   input has lots of trailing zeros (apart from the perfect square
+   case).  */
+
+
+/* 1 if we (might) need two limbs for u */
+#define U2   (GMP_NUMB_BITS < BITS_PER_ULONG)
+
+void
+mpf_sqrt_ui (mpf_ptr r, unsigned long int u)
+{
+  mp_size_t rsize, zeros;
+  mp_ptr tp;
+  mp_size_t prec;
+  TMP_DECL;
+
+  if (UNLIKELY (u == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  prec = r->_mp_prec;
+  zeros = 2 * prec - 2;
+  rsize = zeros + 1 + U2;
+
+  tp = TMP_ALLOC_LIMBS (rsize);
+
+  MPN_ZERO (tp, zeros);
+  tp[zeros] = u & GMP_NUMB_MASK;
+
+#if U2
+  {
+    mp_limb_t uhigh = u >> GMP_NUMB_BITS;
+    tp[zeros + 1] = uhigh;
+    rsize -= (uhigh == 0);
+  }
+#endif
+
+  mpn_sqrtrem (r->_mp_d, NULL, tp, rsize);
+
+  r->_mp_size = prec;
+  r->_mp_exp = 1;
+  TMP_FREE;
+}
diff --git a/mpf/sub.c b/mpf/sub.c

new file mode 100644 (file)

index 0000000..575b412
--- /dev/null
+++ b/mpf/sub.c
@@ -0,0 +1,409 @@
+/* mpf_sub -- Subtract two floats.
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2011 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_sub (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp;
+  mp_size_t usize, vsize, rsize;
+  mp_size_t prec;
+  mp_exp_t exp;
+  mp_size_t ediff;
+  int negate;
+  TMP_DECL;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+
+  /* Handle special cases that don't work in generic code below.  */
+  if (usize == 0)
+    {
+      mpf_neg (r, v);
+      return;
+    }
+  if (vsize == 0)
+    {
+      if (r != u)
+        mpf_set (r, u);
+      return;
+    }
+
+  /* If signs of U and V are different, perform addition.  */
+  if ((usize ^ vsize) < 0)
+    {
+      __mpf_struct v_negated;
+      v_negated._mp_size = -vsize;
+      v_negated._mp_exp = v->_mp_exp;
+      v_negated._mp_d = v->_mp_d;
+      mpf_add (r, u, &v_negated);
+      return;
+    }
+
+  TMP_MARK;
+
+  /* Signs are now known to be the same.  */
+  negate = usize < 0;
+
+  /* Make U be the operand with the largest exponent.  */
+  if (u->_mp_exp < v->_mp_exp)
+    {
+      mpf_srcptr t;
+      t = u; u = v; v = t;
+      negate ^= 1;
+      usize = u->_mp_size;
+      vsize = v->_mp_size;
+    }
+
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  rp = r->_mp_d;
+  prec = r->_mp_prec + 1;
+  exp = u->_mp_exp;
+  ediff = u->_mp_exp - v->_mp_exp;
+
+  /* If ediff is 0 or 1, we might have a situation where the operands are
+     extremely close.  We need to scan the operands from the most significant
+     end ignore the initial parts that are equal.  */
+  if (ediff <= 1)
+    {
+      if (ediff == 0)
+       {
+         /* Skip leading limbs in U and V that are equal.  */
+         if (up[usize - 1] == vp[vsize - 1])
+           {
+             /* This loop normally exits immediately.  Optimize for that.  */
+             do
+               {
+                 usize--;
+                 vsize--;
+                 exp--;
+
+                 if (usize == 0)
+                   {
+                      /* u cancels high limbs of v, result is rest of v */
+                     negate ^= 1;
+                    cancellation:
+                      /* strip high zeros before truncating to prec */
+                      while (vsize != 0 && vp[vsize - 1] == 0)
+                        {
+                          vsize--;
+                          exp--;
+                        }
+                     if (vsize > prec)
+                       {
+                         vp += vsize - prec;
+                         vsize = prec;
+                       }
+                      MPN_COPY_INCR (rp, vp, vsize);
+                      rsize = vsize;
+                      goto done;
+                   }
+                 if (vsize == 0)
+                   {
+                      vp = up;
+                      vsize = usize;
+                      goto cancellation;
+                   }
+               }
+             while (up[usize - 1] == vp[vsize - 1]);
+           }
+
+         if (up[usize - 1] < vp[vsize - 1])
+           {
+             /* For simplicity, swap U and V.  Note that since the loop above
+                wouldn't have exited unless up[usize - 1] and vp[vsize - 1]
+                were non-equal, this if-statement catches all cases where U
+                is smaller than V.  */
+             MPN_SRCPTR_SWAP (up,usize, vp,vsize);
+             negate ^= 1;
+             /* negating ediff not necessary since it is 0.  */
+           }
+
+         /* Check for
+            x+1 00000000 ...
+             x  ffffffff ... */
+         if (up[usize - 1] != vp[vsize - 1] + 1)
+           goto general_case;
+         usize--;
+         vsize--;
+         exp--;
+       }
+      else /* ediff == 1 */
+       {
+         /* Check for
+            1 00000000 ...
+            0 ffffffff ... */
+
+         if (up[usize - 1] != 1 || vp[vsize - 1] != GMP_NUMB_MAX
+             || (usize >= 2 && up[usize - 2] != 0))
+           goto general_case;
+
+         usize--;
+         exp--;
+       }
+
+      /* Skip sequences of 00000000/ffffffff */
+      while (vsize != 0 && usize != 0 && up[usize - 1] == 0
+            && vp[vsize - 1] == GMP_NUMB_MAX)
+       {
+         usize--;
+         vsize--;
+         exp--;
+       }
+
+      if (usize == 0)
+       {
+         while (vsize != 0 && vp[vsize - 1] == GMP_NUMB_MAX)
+           {
+             vsize--;
+             exp--;
+           }
+       }
+
+      if (usize > prec - 1)
+       {
+         up += usize - (prec - 1);
+         usize = prec - 1;
+       }
+      if (vsize > prec - 1)
+       {
+         vp += vsize - (prec - 1);
+         vsize = prec - 1;
+       }
+
+      tp = TMP_ALLOC_LIMBS (prec);
+      {
+       mp_limb_t cy_limb;
+       if (vsize == 0)
+         {
+           mp_size_t size, i;
+           size = usize;
+           for (i = 0; i < size; i++)
+             tp[i] = up[i];
+           tp[size] = 1;
+           rsize = size + 1;
+           exp++;
+           goto normalize;
+         }
+       if (usize == 0)
+         {
+           mp_size_t size, i;
+           size = vsize;
+           for (i = 0; i < size; i++)
+             tp[i] = ~vp[i] & GMP_NUMB_MASK;
+           cy_limb = 1 - mpn_add_1 (tp, tp, vsize, (mp_limb_t) 1);
+           rsize = vsize;
+           if (cy_limb == 0)
+             {
+               tp[rsize] = 1;
+               rsize++;
+               exp++;
+             }
+           goto normalize;
+         }
+       if (usize >= vsize)
+         {
+           /* uuuu     */
+           /* vv       */
+           mp_size_t size;
+           size = usize - vsize;
+           MPN_COPY (tp, up, size);
+           cy_limb = mpn_sub_n (tp + size, up + size, vp, vsize);
+           rsize = usize;
+         }
+       else /* (usize < vsize) */
+         {
+           /* uuuu     */
+           /* vvvvvvv  */
+           mp_size_t size, i;
+           size = vsize - usize;
+           for (i = 0; i < size; i++)
+             tp[i] = ~vp[i] & GMP_NUMB_MASK;
+           cy_limb = mpn_sub_n (tp + size, up, vp + size, usize);
+           cy_limb+= mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
+           cy_limb-= mpn_add_1 (tp, tp, vsize, (mp_limb_t) 1);
+           rsize = vsize;
+         }
+       if (cy_limb == 0)
+         {
+           tp[rsize] = 1;
+           rsize++;
+           exp++;
+         }
+       goto normalize;
+      }
+    }
+
+general_case:
+  /* If U extends beyond PREC, ignore the part that does.  */
+  if (usize > prec)
+    {
+      up += usize - prec;
+      usize = prec;
+    }
+
+  /* If V extends beyond PREC, ignore the part that does.
+     Note that this may make vsize negative.  */
+  if (vsize + ediff > prec)
+    {
+      vp += vsize + ediff - prec;
+      vsize = prec - ediff;
+    }
+
+  if (ediff >= prec)
+    {
+      /* V completely cancelled.  */
+      if (rp != up)
+       MPN_COPY (rp, up, usize);
+      rsize = usize;
+    }
+  else
+    {
+      /* Allocate temp space for the result.  Allocate
+        just vsize + ediff later???  */
+      tp = TMP_ALLOC_LIMBS (prec);
+
+      /* Locate the least significant non-zero limb in (the needed
+        parts of) U and V, to simplify the code below.  */
+      for (;;)
+       {
+         if (vsize == 0)
+           {
+             MPN_COPY (rp, up, usize);
+             rsize = usize;
+             goto done;
+           }
+         if (vp[0] != 0)
+           break;
+         vp++, vsize--;
+       }
+      for (;;)
+       {
+         if (usize == 0)
+           {
+             MPN_COPY (rp, vp, vsize);
+             rsize = vsize;
+             negate ^= 1;
+             goto done;
+           }
+         if (up[0] != 0)
+           break;
+         up++, usize--;
+       }
+
+      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
+      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */
+
+      if (usize > ediff)
+       {
+         /* U and V partially overlaps.  */
+         if (ediff == 0)
+           {
+             /* Have to compare the leading limbs of u and v
+                to determine whether to compute u - v or v - u.  */
+             if (usize >= vsize)
+               {
+                 /* uuuu     */
+                 /* vv       */
+                 mp_size_t size;
+                 size = usize - vsize;
+                 MPN_COPY (tp, up, size);
+                 mpn_sub_n (tp + size, up + size, vp, vsize);
+                 rsize = usize;
+               }
+             else /* (usize < vsize) */
+               {
+                 /* uuuu     */
+                 /* vvvvvvv  */
+                 mp_size_t size, i;
+                 size = vsize - usize;
+                 tp[0] = -vp[0] & GMP_NUMB_MASK;
+                 for (i = 1; i < size; i++)
+                   tp[i] = ~vp[i] & GMP_NUMB_MASK;
+                 mpn_sub_n (tp + size, up, vp + size, usize);
+                 mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
+                 rsize = vsize;
+               }
+           }
+         else
+           {
+             if (vsize + ediff <= usize)
+               {
+                 /* uuuu     */
+                 /*   v      */
+                 mp_size_t size;
+                 size = usize - ediff - vsize;
+                 MPN_COPY (tp, up, size);
+                 mpn_sub (tp + size, up + size, usize - size, vp, vsize);
+                 rsize = usize;
+               }
+             else
+               {
+                 /* uuuu     */
+                 /*   vvvvv  */
+                 mp_size_t size, i;
+                 size = vsize + ediff - usize;
+                 tp[0] = -vp[0] & GMP_NUMB_MASK;
+                 for (i = 1; i < size; i++)
+                   tp[i] = ~vp[i] & GMP_NUMB_MASK;
+                 mpn_sub (tp + size, up, usize, vp + size, usize - ediff);
+                 mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
+                 rsize = vsize + ediff;
+               }
+           }
+       }
+      else
+       {
+         /* uuuu     */
+         /*      vv  */
+         mp_size_t size, i;
+         size = vsize + ediff - usize;
+         tp[0] = -vp[0] & GMP_NUMB_MASK;
+         for (i = 1; i < vsize; i++)
+           tp[i] = ~vp[i] & GMP_NUMB_MASK;
+         for (i = vsize; i < size; i++)
+           tp[i] = GMP_NUMB_MAX;
+         mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);
+         rsize = size + usize;
+       }
+
+    normalize:
+      /* Full normalize.  Optimize later.  */
+      while (rsize != 0 && tp[rsize - 1] == 0)
+       {
+         rsize--;
+         exp--;
+       }
+      MPN_COPY (rp, tp, rsize);
+    }
+
+ done:
+  r->_mp_size = negate ? -rsize : rsize;
+  if (rsize == 0)
+    exp = 0;
+  r->_mp_exp = exp;
+  TMP_FREE;
+}
diff --git a/mpf/sub_ui.c b/mpf/sub_ui.c

new file mode 100644 (file)

index 0000000..4ce7595
--- /dev/null
+++ b/mpf/sub_ui.c
@@ -0,0 +1,40 @@
+/* mpf_sub_ui -- Subtract an unsigned integer from a float.
+
+Copyright 1993, 1994, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_sub_ui (mpf_ptr sum, mpf_srcptr u, unsigned long int v)
+{
+  __mpf_struct vv;
+  mp_limb_t vl;
+
+  if (v == 0)
+    {
+      mpf_set (sum, u);
+      return;
+    }
+
+  vl = v;
+  vv._mp_size = 1;
+  vv._mp_d = &vl;
+  vv._mp_exp = 1;
+  mpf_sub (sum, u, &vv);
+}
diff --git a/mpf/swap.c b/mpf/swap.c

new file mode 100644 (file)

index 0000000..26934f6
--- /dev/null
+++ b/mpf/swap.c
@@ -0,0 +1,50 @@
+/* mpf_swap (U, V) -- Swap U and V.
+
+Copyright 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_swap (mpf_ptr u, mpf_ptr v) __GMP_NOTHROW
+{
+  mp_ptr up, vp;
+  mp_size_t usize, vsize;
+  mp_size_t uprec, vprec;
+  mp_exp_t  uexp, vexp;
+
+  uprec = u->_mp_prec;
+  vprec = v->_mp_prec;
+  v->_mp_prec = uprec;
+  u->_mp_prec = vprec;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+  v->_mp_size = usize;
+  u->_mp_size = vsize;
+
+  uexp = u->_mp_exp;
+  vexp = v->_mp_exp;
+  v->_mp_exp = uexp;
+  u->_mp_exp = vexp;
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  v->_mp_d = up;
+  u->_mp_d = vp;
+}
diff --git a/mpf/trunc.c b/mpf/trunc.c

new file mode 100644 (file)

index 0000000..d329bc1
--- /dev/null
+++ b/mpf/trunc.c
@@ -0,0 +1,64 @@
+/* mpf_trunc -- truncate an mpf to an integer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Notice the use of prec+1 ensures mpf_trunc is equivalent to mpf_set if u
+   is already an integer.  */
+
+void
+mpf_trunc (mpf_ptr r, mpf_srcptr u)
+{
+  mp_ptr     rp;
+  mp_srcptr  up;
+  mp_size_t  size, asize, prec;
+  mp_exp_t   exp;
+
+  exp = EXP(u);
+  size = SIZ(u);
+  if (size == 0 || exp <= 0)
+    {
+      /* u is only a fraction */
+      SIZ(r) = 0;
+      EXP(r) = 0;
+      return;
+    }
+
+  up = PTR(u);
+  EXP(r) = exp;
+  asize = ABS (size);
+  up += asize;
+
+  /* skip fraction part of u */
+  asize = MIN (asize, exp);
+
+  /* don't lose precision in the copy */
+  prec = PREC(r) + 1;
+
+  /* skip excess over target precision */
+  asize = MIN (asize, prec);
+
+  up -= asize;
+  rp = PTR(r);
+  SIZ(r) = (size >= 0 ? asize : -asize);
+  if (rp != up)
+    MPN_COPY_INCR (rp, up, asize);
+}
diff --git a/mpf/ui_div.c b/mpf/ui_div.c

new file mode 100644 (file)

index 0000000..39a0bd9
--- /dev/null
+++ b/mpf/ui_div.c
@@ -0,0 +1,117 @@
+/* mpf_ui_div -- Divide an unsigned integer with a float.
+
+Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpf_ui_div (mpf_ptr r, unsigned long int u, mpf_srcptr v)
+{
+  mp_srcptr vp;
+  mp_ptr rp, tp, remp, new_vp;
+  mp_size_t vsize;
+  mp_size_t rsize, prospective_rsize, zeros, tsize, high_zero;
+  mp_size_t sign_quotient;
+  mp_size_t prec;
+  mp_exp_t rexp;
+  TMP_DECL;
+
+  vsize = v->_mp_size;
+  sign_quotient = vsize;
+  vsize = ABS (vsize);
+  prec = r->_mp_prec;
+
+  if (UNLIKELY (vsize == 0))
+    DIVIDE_BY_ZERO;
+
+  if (UNLIKELY (u == 0))
+    {
+      r->_mp_size = 0;
+      r->_mp_exp = 0;
+      return;
+    }
+
+  TMP_MARK;
+  rexp = 1 - v->_mp_exp + 1;
+
+  rp = r->_mp_d;
+  vp = v->_mp_d;
+
+  prospective_rsize = 1 - vsize + 1;    /* quot from using given u,v sizes */
+  rsize = prec + 1;                     /* desired quot size */
+
+  zeros = rsize - prospective_rsize;    /* padding u to give rsize */
+  tsize = 1 + zeros;                    /* u with zeros */
+
+  if (WANT_TMP_DEBUG)
+    {
+      /* separate alloc blocks, for malloc debugging */
+      remp = TMP_ALLOC_LIMBS (vsize);
+      tp = TMP_ALLOC_LIMBS (tsize);
+      new_vp = NULL;
+      if (rp == vp)
+        new_vp = TMP_ALLOC_LIMBS (vsize);
+    }
+  else
+    {
+      /* one alloc with calculated size, for efficiency */
+      mp_size_t size = vsize + tsize + (rp == vp ? vsize : 0);
+      remp = TMP_ALLOC_LIMBS (size);
+      tp = remp + vsize;
+      new_vp = tp + tsize;
+    }
+
+  /* ensure divisor doesn't overlap quotient */
+  if (rp == vp)
+    {
+      MPN_COPY (new_vp, vp, vsize);
+      vp = new_vp;
+    }
+
+  MPN_ZERO (tp, tsize-1);
+
+  tp[tsize - 1] = u & GMP_NUMB_MASK;
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (u > GMP_NUMB_MAX)
+    {
+      /* tsize-vsize+1 == rsize, so tsize >= rsize.  rsize == prec+1 >= 2,
+         so tsize >= 2, hence there's room for 2-limb u with nails */
+      ASSERT (tsize >= 2);
+      tp[tsize - 1] = u >> GMP_NUMB_BITS;
+      tp[tsize - 2] = u & GMP_NUMB_MASK;
+      rexp++;
+    }
+#endif
+
+  ASSERT (tsize-vsize+1 == rsize);
+  mpn_tdiv_qr (rp, remp, (mp_size_t) 0, tp, tsize, vp, vsize);
+
+  /* strip possible zero high limb */
+  high_zero = (rp[rsize-1] == 0);
+  rsize -= high_zero;
+  rexp -= high_zero;
+
+  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;
+  r->_mp_exp = rexp;
+  TMP_FREE;
+}
diff --git a/mpf/ui_sub.c b/mpf/ui_sub.c

new file mode 100644 (file)

index 0000000..081ca34
--- /dev/null
+++ b/mpf/ui_sub.c
@@ -0,0 +1,326 @@
+/* mpf_ui_sub -- Subtract a float from an unsigned long int.
+
+Copyright 1993, 1994, 1995, 1996, 2001, 2002, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_ui_sub (mpf_ptr r, unsigned long int u, mpf_srcptr v)
+{
+  mp_srcptr up, vp;
+  mp_ptr rp, tp;
+  mp_size_t usize, vsize, rsize;
+  mp_size_t prec;
+  mp_exp_t uexp;
+  mp_size_t ediff;
+  int negate;
+  mp_limb_t ulimb;
+  TMP_DECL;
+
+  vsize = v->_mp_size;
+
+  /* Handle special cases that don't work in generic code below.  */
+  if (u == 0)
+    {
+      mpf_neg (r, v);
+      return;
+    }
+  if (vsize == 0)
+    {
+      mpf_set_ui (r, u);
+      return;
+    }
+
+  /* If signs of U and V are different, perform addition.  */
+  if (vsize < 0)
+    {
+      __mpf_struct v_negated;
+      v_negated._mp_size = -vsize;
+      v_negated._mp_exp = v->_mp_exp;
+      v_negated._mp_d = v->_mp_d;
+      mpf_add_ui (r, &v_negated, u);
+      return;
+    }
+
+  TMP_MARK;
+
+  /* Signs are now known to be the same.  */
+
+  ulimb = u;
+  /* Make U be the operand with the largest exponent.  */
+  if (1 < v->_mp_exp)
+    {
+      negate = 1;
+      usize = ABS (vsize);
+      vsize = 1;
+      up = v->_mp_d;
+      vp = &ulimb;
+      rp = r->_mp_d;
+      prec = r->_mp_prec + 1;
+      uexp = v->_mp_exp;
+      ediff = uexp - 1;
+    }
+  else
+    {
+      negate = 0;
+      usize = 1;
+      vsize = ABS (vsize);
+      up = &ulimb;
+      vp = v->_mp_d;
+      rp = r->_mp_d;
+      prec = r->_mp_prec;
+      uexp = 1;
+      ediff = 1 - v->_mp_exp;
+    }
+
+  /* Ignore leading limbs in U and V that are equal.  Doing
+     this helps increase the precision of the result.  */
+  if (ediff == 0)
+    {
+      /* This loop normally exits immediately.  Optimize for that.  */
+      for (;;)
+       {
+         usize--;
+         vsize--;
+         if (up[usize] != vp[vsize])
+           break;
+         uexp--;
+         if (usize == 0)
+           goto Lu0;
+         if (vsize == 0)
+           goto Lv0;
+       }
+      usize++;
+      vsize++;
+      /* Note that either operand (but not both operands) might now have
+        leading zero limbs.  It matters only that U is unnormalized if
+        vsize is now zero, and vice versa.  And it is only in that case
+        that we have to adjust uexp.  */
+      if (vsize == 0)
+      Lv0:
+       while (usize != 0 && up[usize - 1] == 0)
+         usize--, uexp--;
+      if (usize == 0)
+      Lu0:
+       while (vsize != 0 && vp[vsize - 1] == 0)
+         vsize--, uexp--;
+    }
+
+  /* If U extends beyond PREC, ignore the part that does.  */
+  if (usize > prec)
+    {
+      up += usize - prec;
+      usize = prec;
+    }
+
+  /* If V extends beyond PREC, ignore the part that does.
+     Note that this may make vsize negative.  */
+  if (vsize + ediff > prec)
+    {
+      vp += vsize + ediff - prec;
+      vsize = prec - ediff;
+    }
+
+  /* Allocate temp space for the result.  Allocate
+     just vsize + ediff later???  */
+  tp = TMP_ALLOC_LIMBS (prec);
+
+  if (ediff >= prec)
+    {
+      /* V completely cancelled.  */
+      if (tp != up)
+       MPN_COPY (rp, up, usize);
+      rsize = usize;
+    }
+  else
+    {
+      /* Locate the least significant non-zero limb in (the needed
+        parts of) U and V, to simplify the code below.  */
+      for (;;)
+       {
+         if (vsize == 0)
+           {
+             MPN_COPY (rp, up, usize);
+             rsize = usize;
+             goto done;
+           }
+         if (vp[0] != 0)
+           break;
+         vp++, vsize--;
+       }
+      for (;;)
+       {
+         if (usize == 0)
+           {
+             MPN_COPY (rp, vp, vsize);
+             rsize = vsize;
+             negate ^= 1;
+             goto done;
+           }
+         if (up[0] != 0)
+           break;
+         up++, usize--;
+       }
+
+      /* uuuu     |  uuuu     |  uuuu     |  uuuu     |  uuuu    */
+      /* vvvvvvv  |  vv       |    vvvvv  |    v      |       vv */
+
+      if (usize > ediff)
+       {
+         /* U and V partially overlaps.  */
+         if (ediff == 0)
+           {
+             /* Have to compare the leading limbs of u and v
+                to determine whether to compute u - v or v - u.  */
+             if (usize > vsize)
+               {
+                 /* uuuu     */
+                 /* vv       */
+                 int cmp;
+                 cmp = mpn_cmp (up + usize - vsize, vp, vsize);
+                 if (cmp >= 0)
+                   {
+                     mp_size_t size;
+                     size = usize - vsize;
+                     MPN_COPY (tp, up, size);
+                     mpn_sub_n (tp + size, up + size, vp, vsize);
+                     rsize = usize;
+                   }
+                 else
+                   {
+                     /* vv       */  /* Swap U and V. */
+                     /* uuuu     */
+                     mp_size_t size, i;
+                     size = usize - vsize;
+                     tp[0] = -up[0] & GMP_NUMB_MASK;
+                     for (i = 1; i < size; i++)
+                       tp[i] = ~up[i] & GMP_NUMB_MASK;
+                     mpn_sub_n (tp + size, vp, up + size, vsize);
+                     mpn_sub_1 (tp + size, tp + size, vsize, (mp_limb_t) 1);
+                     negate ^= 1;
+                     rsize = usize;
+                   }
+               }
+             else if (usize < vsize)
+               {
+                 /* uuuu     */
+                 /* vvvvvvv  */
+                 int cmp;
+                 cmp = mpn_cmp (up, vp + vsize - usize, usize);
+                 if (cmp > 0)
+                   {
+                     mp_size_t size, i;
+                     size = vsize - usize;
+                     tp[0] = -vp[0] & GMP_NUMB_MASK;
+                     for (i = 1; i < size; i++)
+                       tp[i] = ~vp[i] & GMP_NUMB_MASK;
+                     mpn_sub_n (tp + size, up, vp + size, usize);
+                     mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
+                     rsize = vsize;
+                   }
+                 else
+                   {
+                     /* vvvvvvv  */  /* Swap U and V. */
+                     /* uuuu     */
+                     /* This is the only place we can get 0.0.  */
+                     mp_size_t size;
+                     size = vsize - usize;
+                     MPN_COPY (tp, vp, size);
+                     mpn_sub_n (tp + size, vp + size, up, usize);
+                     negate ^= 1;
+                     rsize = vsize;
+                   }
+               }
+             else
+               {
+                 /* uuuu     */
+                 /* vvvv     */
+                 int cmp;
+                 cmp = mpn_cmp (up, vp + vsize - usize, usize);
+                 if (cmp > 0)
+                   {
+                     mpn_sub_n (tp, up, vp, usize);
+                     rsize = usize;
+                   }
+                 else
+                   {
+                     mpn_sub_n (tp, vp, up, usize);
+                     negate ^= 1;
+                     rsize = usize;
+                     /* can give zero */
+                   }
+               }
+           }
+         else
+           {
+             if (vsize + ediff <= usize)
+               {
+                 /* uuuu     */
+                 /*   v      */
+                 mp_size_t size;
+                 size = usize - ediff - vsize;
+                 MPN_COPY (tp, up, size);
+                 mpn_sub (tp + size, up + size, usize - size, vp, vsize);
+                 rsize = usize;
+               }
+             else
+               {
+                 /* uuuu     */
+                 /*   vvvvv  */
+                 mp_size_t size, i;
+                 size = vsize + ediff - usize;
+                 tp[0] = -vp[0] & GMP_NUMB_MASK;
+                 for (i = 1; i < size; i++)
+                   tp[i] = ~vp[i] & GMP_NUMB_MASK;
+                 mpn_sub (tp + size, up, usize, vp + size, usize - ediff);
+                 mpn_sub_1 (tp + size, tp + size, usize, (mp_limb_t) 1);
+                 rsize = vsize + ediff;
+               }
+           }
+       }
+      else
+       {
+         /* uuuu     */
+         /*      vv  */
+         mp_size_t size, i;
+         size = vsize + ediff - usize;
+         tp[0] = -vp[0] & GMP_NUMB_MASK;
+         for (i = 1; i < vsize; i++)
+           tp[i] = ~vp[i] & GMP_NUMB_MASK;
+         for (i = vsize; i < size; i++)
+           tp[i] = GMP_NUMB_MAX;
+         mpn_sub_1 (tp + size, up, usize, (mp_limb_t) 1);
+         rsize = size + usize;
+       }
+
+      /* Full normalize.  Optimize later.  */
+      while (rsize != 0 && tp[rsize - 1] == 0)
+       {
+         rsize--;
+         uexp--;
+       }
+      MPN_COPY (rp, tp, rsize);
+    }
+
+ done:
+  r->_mp_size = negate ? -rsize : rsize;
+  r->_mp_exp = uexp;
+  TMP_FREE;
+}
diff --git a/mpf/urandomb.c b/mpf/urandomb.c

new file mode 100644 (file)

index 0000000..02307e0
--- /dev/null
+++ b/mpf/urandomb.c
@@ -0,0 +1,58 @@
+/* mpf_urandomb (rop, state, nbits) -- Generate a uniform pseudorandom
+   real number between 0 (inclusive) and 1 (exclusive) of size NBITS,
+   using STATE as the random state previously initialized by a call to
+   gmp_randinit().
+
+Copyright 1999, 2000, 2001, 2002  Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpf_urandomb (mpf_t rop, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_ptr rp;
+  mp_size_t nlimbs;
+  mp_exp_t exp;
+  mp_size_t prec;
+
+  rp = PTR (rop);
+  nlimbs = BITS_TO_LIMBS (nbits);
+  prec = PREC (rop);
+
+  if (nlimbs > prec + 1 || nlimbs == 0)
+    {
+      nlimbs = prec + 1;
+      nbits = nlimbs * GMP_NUMB_BITS;
+    }
+
+  _gmp_rand (rp, rstate, nbits);
+
+  /* If nbits isn't a multiple of GMP_NUMB_BITS, shift up.  */
+  if (nbits % GMP_NUMB_BITS != 0)
+    mpn_lshift (rp, rp, nlimbs, GMP_NUMB_BITS - nbits % GMP_NUMB_BITS);
+
+  exp = 0;
+  while (nlimbs != 0 && rp[nlimbs - 1] == 0)
+    {
+      nlimbs--;
+      exp--;
+    }
+  EXP (rop) = exp;
+  SIZ (rop) = nlimbs;
+}
diff --git a/mpn/Makeasm.am b/mpn/Makeasm.am

new file mode 100644 (file)

index 0000000..bb66700
--- /dev/null
+++ b/mpn/Makeasm.am
@@ -0,0 +1,108 @@
+## Automake asm file rules.
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# COMPILE minus CC.
+#
+COMPILE_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(ASMFLAGS)
+
+# Flags used for preprocessing (in ansi2knr rules).
+#
+PREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS)
+
+
+# Recent versions of automake (1.5 and up for instance) append automake
+# generated suffixes to this $(SUFFIXES) list.  This is essential for us,
+# since .c must come after .s, .S and .asm.  If .c is before .s, for
+# instance, then in the mpn directory "make" will see add_n.c mentioned in
+# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,
+# even if add_n.c doesn't exist but add_n.s does.  See GNU make
+# documentation "(make)Implicit Rule Search", part 5c.
+#
+# On IRIX 6 native make this doesn't work properly though.  Somehow .c
+# remains ahead of .s, perhaps because .c.s is a builtin rule.  .asm works
+# fine though, and mpn/mips3 uses this.
+#
+SUFFIXES = .s .S .asm
+
+
+# .s assembler, no preprocessing.
+#
+.s.o:
+       $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+.s.obj:
+       $(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+.s.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+
+# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+RM_TMP = rm -f
+
+
+# .S assembler, preprocessed with cpp.
+#
+# It's necessary to run $(CPP) separately, since it seems not all compilers
+# recognise .S files, in particular "cc" on HP-UX 10 and 11 doesn't (and
+# will silently do nothing if given a .S).
+#
+# For .lo we need a helper script, as described below for .asm.lo.
+#
+.S.o:
+       $(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.obj:
+       $(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp="$(CPP) $(PREPROCESS_FLAGS)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+
+# .asm assembler, preprocessed with m4.
+#
+# .o and .obj are non-PIC and just need m4 followed by a compile.
+#
+# .lo is a bit tricky.  Libtool (as of version 1.5) has foo.lo as a little
+# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,
+# respectively.  It'd be asking for lots of trouble to try to create foo.lo
+# ourselves, so instead arrange to invoke libtool like a --mode=compile, but
+# with a special m4-ccas script which first m4 preprocesses, then compiles.
+# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.
+#
+# Libtool adds -DPIC when building a shared object and the .asm files look
+# for that.  But it should be noted that the other PIC flags are on occasion
+# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before
+# it accepts PIC constructs like @GOT, and gcc adds that flag only under
+# -fPIC.  (Later versions of gas are happy to accept PIC stuff any time.)
+#
+.asm.o:
+       $(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.obj:
+       $(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
diff --git a/mpn/Makefile.am b/mpn/Makefile.am

new file mode 100644 (file)

index 0000000..15705f7
--- /dev/null
+++ b/mpn/Makefile.am
@@ -0,0 +1,95 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2005, 2011 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
+  -DOPERATION_`echo $* | sed 's/_$$//'`
+
+OFILES = @mpn_objects@
+
+
+# All possible mpn normal and optional function files are listed here, to
+# get automake to generate ansi2knr rules for each.  Such rules will be
+# ignored for any that are instead implemented with a .asm (or whatever) for
+# a particular target.
+#
+nodist_EXTRA_libmpn_la_SOURCES =                                           \
+  add.c add_1.c add_n.c                                                            \
+  addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c        \
+  addmul_7.c addmul_8.c                                                            \
+  and_n.c andn_n.c                                                         \
+  cmp.c com.c copyd.c copyi.c                                              \
+  dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c                \
+  sbpi1_bdiv_qr.c sbpi1_bdiv_q.c                                           \
+  sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c                           \
+  dcpi1_bdiv_qr.c dcpi1_bdiv_q.c                                           \
+  dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c                           \
+  dump.c fib2_ui.c gcd.c                                                   \
+  gcd_1.c gcdext.c get_d.c get_str.c                                       \
+  hamdist.c hgcd2.c hgcd.c invert_limb.c                                   \
+  ior_n.c iorn_n.c jacbase.c lshift.c                                      \
+  matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c                                    \
+  mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c                                  \
+  mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c    \
+  nussbaumer_mul.c                                                         \
+  toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c         \
+  toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c                      \
+  toom44_mul.c                                                             \
+  toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c                        \
+  toom_couple_handling.c                                                   \
+  toom2_sqr.c toom3_sqr.c toom4_sqr.c                                      \
+  toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c                                \
+  toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c    \
+  toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c   \
+  toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
+  invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c              \
+  mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c     \
+  popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
+  rootrem.c scan0.c scan1.c set_str.c                      \
+  sqr_basecase.c sqr_diagonal.c                                                    \
+  sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c                               \
+  tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
+
+noinst_LTLIBRARIES = libmpn.la
+nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
+libmpn_la_LIBADD = $(OFILES)
+libmpn_la_DEPENDENCIES = $(OFILES)
+
+TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
+  minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr \
+  s390_32 s390_64 sh sparc32 sparc64 vax x86 x86_64 z8000 z8000x
+
+EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
+
+
+# These are BUILT_SOURCES at the top-level, so normally they're built before
+# recursing into this directory.
+#
+fib_table.c:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/fib_table.c
+mp_bases.c:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/mp_bases.c
+perfsqr.h:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
+
+tune-gcd-p: gcd.c
+       $(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
+
+include Makeasm.am
diff --git a/mpn/Makefile.in b/mpn/Makefile.in

new file mode 100644 (file)

index 0000000..184c80d
--- /dev/null
+++ b/mpn/Makefile.in
@@ -0,0 +1,1027 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2005, 2011 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+DIST_COMMON = README $(srcdir)/Makeasm.am $(srcdir)/Makefile.am \
+       $(srcdir)/Makefile.in
+subdir = mpn
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+nodist_libmpn_la_OBJECTS = fib_table$U.lo mp_bases$U.lo
+libmpn_la_OBJECTS = $(nodist_libmpn_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(nodist_libmpn_la_SOURCES) \
+       $(nodist_EXTRA_libmpn_la_SOURCES)
+DIST_SOURCES =
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
+  -DOPERATION_`echo $* | sed 's/_$$//'`
+
+OFILES = @mpn_objects@
+
+# All possible mpn normal and optional function files are listed here, to
+# get automake to generate ansi2knr rules for each.  Such rules will be
+# ignored for any that are instead implemented with a .asm (or whatever) for
+# a particular target.
+#
+nodist_EXTRA_libmpn_la_SOURCES = \
+  add.c add_1.c add_n.c                                                            \
+  addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c        \
+  addmul_7.c addmul_8.c                                                            \
+  and_n.c andn_n.c                                                         \
+  cmp.c com.c copyd.c copyi.c                                              \
+  dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c                \
+  sbpi1_bdiv_qr.c sbpi1_bdiv_q.c                                           \
+  sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c                           \
+  dcpi1_bdiv_qr.c dcpi1_bdiv_q.c                                           \
+  dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c                           \
+  dump.c fib2_ui.c gcd.c                                                   \
+  gcd_1.c gcdext.c get_d.c get_str.c                                       \
+  hamdist.c hgcd2.c hgcd.c invert_limb.c                                   \
+  ior_n.c iorn_n.c jacbase.c lshift.c                                      \
+  matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c                                    \
+  mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c                                  \
+  mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c    \
+  nussbaumer_mul.c                                                         \
+  toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c         \
+  toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c                      \
+  toom44_mul.c                                                             \
+  toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c                        \
+  toom_couple_handling.c                                                   \
+  toom2_sqr.c toom3_sqr.c toom4_sqr.c                                      \
+  toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c                                \
+  toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c    \
+  toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c   \
+  toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
+  invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c              \
+  mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c     \
+  popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
+  rootrem.c scan0.c scan1.c set_str.c                      \
+  sqr_basecase.c sqr_diagonal.c                                                    \
+  sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c                               \
+  tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
+
+noinst_LTLIBRARIES = libmpn.la
+nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
+libmpn_la_LIBADD = $(OFILES)
+libmpn_la_DEPENDENCIES = $(OFILES)
+TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
+  minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr \
+  s390_32 s390_64 sh sparc32 sparc64 vax x86 x86_64 z8000 z8000x
+
+EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
+
+# COMPILE minus CC.
+#
+COMPILE_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(ASMFLAGS)
+
+
+# Flags used for preprocessing (in ansi2knr rules).
+#
+PREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS)
+
+
+# Recent versions of automake (1.5 and up for instance) append automake
+# generated suffixes to this $(SUFFIXES) list.  This is essential for us,
+# since .c must come after .s, .S and .asm.  If .c is before .s, for
+# instance, then in the mpn directory "make" will see add_n.c mentioned in
+# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,
+# even if add_n.c doesn't exist but add_n.s does.  See GNU make
+# documentation "(make)Implicit Rule Search", part 5c.
+#
+# On IRIX 6 native make this doesn't work properly though.  Somehow .c
+# remains ahead of .s, perhaps because .c.s is a builtin rule.  .asm works
+# fine though, and mpn/mips3 uses this.
+#
+SUFFIXES = .s .S .asm
+
+# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+RM_TMP = rm -f
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .s .S .asm .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/Makeasm.am $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpn/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps mpn/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) 
+       $(LINK)  $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+add_.c: add.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_1_.c: add_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_1.c; then echo $(srcdir)/add_1.c; else echo add_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_n_.c: add_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_n.c; then echo $(srcdir)/add_n.c; else echo add_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_1_.c: addmul_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_1.c; then echo $(srcdir)/addmul_1.c; else echo addmul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_2_.c: addmul_2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_2.c; then echo $(srcdir)/addmul_2.c; else echo addmul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_3_.c: addmul_3.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_3.c; then echo $(srcdir)/addmul_3.c; else echo addmul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_4_.c: addmul_4.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_4.c; then echo $(srcdir)/addmul_4.c; else echo addmul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_5_.c: addmul_5.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_5.c; then echo $(srcdir)/addmul_5.c; else echo addmul_5.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_6_.c: addmul_6.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_6.c; then echo $(srcdir)/addmul_6.c; else echo addmul_6.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_7_.c: addmul_7.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_7.c; then echo $(srcdir)/addmul_7.c; else echo addmul_7.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_8_.c: addmul_8.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_8.c; then echo $(srcdir)/addmul_8.c; else echo addmul_8.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+and_n_.c: and_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and_n.c; then echo $(srcdir)/and_n.c; else echo and_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+andn_n_.c: andn_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/andn_n.c; then echo $(srcdir)/andn_n.c; else echo andn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+binvert_.c: binvert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/binvert.c; then echo $(srcdir)/binvert.c; else echo binvert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_.c: cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+com_.c: com.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com.c; then echo $(srcdir)/com.c; else echo com.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+copyd_.c: copyd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyd.c; then echo $(srcdir)/copyd.c; else echo copyd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+copyi_.c: copyi.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyi.c; then echo $(srcdir)/copyi.c; else echo copyi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_bdiv_q_.c: dcpi1_bdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_q.c; then echo $(srcdir)/dcpi1_bdiv_q.c; else echo dcpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_bdiv_qr_.c: dcpi1_bdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_qr.c; then echo $(srcdir)/dcpi1_bdiv_qr.c; else echo dcpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_div_q_.c: dcpi1_div_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_q.c; then echo $(srcdir)/dcpi1_div_q.c; else echo dcpi1_div_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_div_qr_.c: dcpi1_div_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_qr.c; then echo $(srcdir)/dcpi1_div_qr.c; else echo dcpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_divappr_q_.c: dcpi1_divappr_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_divappr_q.c; then echo $(srcdir)/dcpi1_divappr_q.c; else echo dcpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dive_1_.c: dive_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_1.c; then echo $(srcdir)/dive_1.c; else echo dive_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+diveby3_.c: diveby3.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/diveby3.c; then echo $(srcdir)/diveby3.c; else echo diveby3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divis_.c: divis.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_.c: divrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_1_.c: divrem_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_2_.c: divrem_2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dump_.c: dump.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fib_table_.c: fib_table.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_table.c; then echo $(srcdir)/fib_table.c; else echo fib_table.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: gcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_1_.c: gcd_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_1.c; then echo $(srcdir)/gcd_1.c; else echo gcd_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_.c: gcdext.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_.c: get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hamdist_.c: hamdist.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd_.c: hgcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd2_.c: hgcd2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd2.c; then echo $(srcdir)/hgcd2.c; else echo hgcd2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invert_.c: invert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invert_limb_.c: invert_limb.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert_limb.c; then echo $(srcdir)/invert_limb.c; else echo invert_limb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invertappr_.c: invertappr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invertappr.c; then echo $(srcdir)/invertappr.c; else echo invertappr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ior_n_.c: ior_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior_n.c; then echo $(srcdir)/ior_n.c; else echo ior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iorn_n_.c: iorn_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iorn_n.c; then echo $(srcdir)/iorn_n.c; else echo iorn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase_.c: jacbase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase.c; then echo $(srcdir)/jacbase.c; else echo jacbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+lshift_.c: lshift.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lshift.c; then echo $(srcdir)/lshift.c; else echo lshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_.c: mod_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_1_.c: mod_1_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_1.c; then echo $(srcdir)/mod_1_1.c; else echo mod_1_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_2_.c: mod_1_2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_2.c; then echo $(srcdir)/mod_1_2.c; else echo mod_1_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_3_.c: mod_1_3.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_3.c; then echo $(srcdir)/mod_1_3.c; else echo mod_1_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_4_.c: mod_1_4.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_4.c; then echo $(srcdir)/mod_1_4.c; else echo mod_1_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_34lsub1_.c: mod_34lsub1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_34lsub1.c; then echo $(srcdir)/mod_34lsub1.c; else echo mod_34lsub1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mode1o_.c: mode1o.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mode1o.c; then echo $(srcdir)/mode1o.c; else echo mode1o.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_bases_.c: mp_bases.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bases.c; then echo $(srcdir)/mp_bases.c; else echo mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_1_.c: mul_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_1.c; then echo $(srcdir)/mul_1.c; else echo mul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_2_.c: mul_2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2.c; then echo $(srcdir)/mul_2.c; else echo mul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_3_.c: mul_3.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_3.c; then echo $(srcdir)/mul_3.c; else echo mul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_4_.c: mul_4.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_4.c; then echo $(srcdir)/mul_4.c; else echo mul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_basecase_.c: mul_basecase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_basecase.c; then echo $(srcdir)/mul_basecase.c; else echo mul_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_fft_.c: mul_fft.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_n_.c: mul_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullo_basecase_.c: mullo_basecase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_basecase.c; then echo $(srcdir)/mullo_basecase.c; else echo mullo_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullo_n_.c: mullo_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_n.c; then echo $(srcdir)/mullo_n.c; else echo mullo_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mulmod_bnm1_.c: mulmod_bnm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mulmod_bnm1.c; then echo $(srcdir)/mulmod_bnm1.c; else echo mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nand_n_.c: nand_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nand_n.c; then echo $(srcdir)/nand_n.c; else echo nand_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+neg_.c: neg.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nior_n_.c: nior_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nior_n.c; then echo $(srcdir)/nior_n.c; else echo nior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nussbaumer_mul_.c: nussbaumer_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nussbaumer_mul.c; then echo $(srcdir)/nussbaumer_mul.c; else echo nussbaumer_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+perfsqr_.c: perfsqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+popcount_.c: popcount.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pow_1_.c: pow_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_1.c; then echo $(srcdir)/pow_1.c; else echo pow_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_mod_1_.c: pre_mod_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_mod_1.c; then echo $(srcdir)/pre_mod_1.c; else echo pre_mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random_.c: random.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random2_.c: random2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rootrem_.c: rootrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rshift_.c: rshift.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rshift.c; then echo $(srcdir)/rshift.c; else echo rshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sbpi1_bdiv_q_.c: sbpi1_bdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_bdiv_q.c; then echo $(srcdir)/sbpi1_bdiv_q.c; else echo sbpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sbpi1_bdiv_qr_.c: sbpi1_bdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_bdiv_qr.c; then echo $(srcdir)/sbpi1_bdiv_qr.c; else echo sbpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sbpi1_div_q_.c: sbpi1_div_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_div_q.c; then echo $(srcdir)/sbpi1_div_q.c; else echo sbpi1_div_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sbpi1_div_qr_.c: sbpi1_div_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_div_qr.c; then echo $(srcdir)/sbpi1_div_qr.c; else echo sbpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sbpi1_divappr_q_.c: sbpi1_divappr_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_divappr_q.c; then echo $(srcdir)/sbpi1_divappr_q.c; else echo sbpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scan0_.c: scan0.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scan1_.c: scan1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_diagonal_.c: sqr_diagonal.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_diagonal.c; then echo $(srcdir)/sqr_diagonal.c; else echo sqr_diagonal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrmod_bnm1_.c: sqrmod_bnm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrmod_bnm1.c; then echo $(srcdir)/sqrmod_bnm1.c; else echo sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_.c: sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_1_.c: sub_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_1.c; then echo $(srcdir)/sub_1.c; else echo sub_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_n_.c: sub_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_n.c; then echo $(srcdir)/sub_n.c; else echo sub_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+submul_1_.c: submul_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/submul_1.c; then echo $(srcdir)/submul_1.c; else echo submul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom22_mul_.c: toom22_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom22_mul.c; then echo $(srcdir)/toom22_mul.c; else echo toom22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom2_sqr_.c: toom2_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom2_sqr.c; then echo $(srcdir)/toom2_sqr.c; else echo toom2_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom32_mul_.c: toom32_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom32_mul.c; then echo $(srcdir)/toom32_mul.c; else echo toom32_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom33_mul_.c: toom33_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom33_mul.c; then echo $(srcdir)/toom33_mul.c; else echo toom33_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom3_sqr_.c: toom3_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom3_sqr.c; then echo $(srcdir)/toom3_sqr.c; else echo toom3_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom42_mul_.c: toom42_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom42_mul.c; then echo $(srcdir)/toom42_mul.c; else echo toom42_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom43_mul_.c: toom43_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom43_mul.c; then echo $(srcdir)/toom43_mul.c; else echo toom43_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom52_mul_.c: toom52_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom52_mul.c; then echo $(srcdir)/toom52_mul.c; else echo toom52_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom53_mul_.c: toom53_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom53_mul.c; then echo $(srcdir)/toom53_mul.c; else echo toom53_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom62_mul_.c: toom62_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom62_mul.c; then echo $(srcdir)/toom62_mul.c; else echo toom62_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom63_mul_.c: toom63_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom63_mul.c; then echo $(srcdir)/toom63_mul.c; else echo toom63_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom6_sqr_.c: toom6_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6_sqr.c; then echo $(srcdir)/toom6_sqr.c; else echo toom6_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom6h_mul_.c: toom6h_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6h_mul.c; then echo $(srcdir)/toom6h_mul.c; else echo toom6h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom8_sqr_.c: toom8_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8_sqr.c; then echo $(srcdir)/toom8_sqr.c; else echo toom8_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom8h_mul_.c: toom8h_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8h_mul.c; then echo $(srcdir)/toom8h_mul.c; else echo toom8h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_couple_handling_.c: toom_couple_handling.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_couple_handling.c; then echo $(srcdir)/toom_couple_handling.c; else echo toom_couple_handling.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_eval_dgr3_pm1_.c: toom_eval_dgr3_pm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_dgr3_pm1.c; then echo $(srcdir)/toom_eval_dgr3_pm1.c; else echo toom_eval_dgr3_pm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_eval_dgr3_pm2_.c: toom_eval_dgr3_pm2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_dgr3_pm2.c; then echo $(srcdir)/toom_eval_dgr3_pm2.c; else echo toom_eval_dgr3_pm2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_eval_pm1_.c: toom_eval_pm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm1.c; then echo $(srcdir)/toom_eval_pm1.c; else echo toom_eval_pm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_eval_pm2exp_.c: toom_eval_pm2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm2exp.c; then echo $(srcdir)/toom_eval_pm2exp.c; else echo toom_eval_pm2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_eval_pm2rexp_.c: toom_eval_pm2rexp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm2rexp.c; then echo $(srcdir)/toom_eval_pm2rexp.c; else echo toom_eval_pm2rexp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_interpolate_12pts_.c: toom_interpolate_12pts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_12pts.c; then echo $(srcdir)/toom_interpolate_12pts.c; else echo toom_interpolate_12pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_interpolate_16pts_.c: toom_interpolate_16pts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_16pts.c; then echo $(srcdir)/toom_interpolate_16pts.c; else echo toom_interpolate_16pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_interpolate_5pts_.c: toom_interpolate_5pts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_5pts.c; then echo $(srcdir)/toom_interpolate_5pts.c; else echo toom_interpolate_5pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_interpolate_6pts_.c: toom_interpolate_6pts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_6pts.c; then echo $(srcdir)/toom_interpolate_6pts.c; else echo toom_interpolate_6pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_interpolate_7pts_.c: toom_interpolate_7pts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_7pts.c; then echo $(srcdir)/toom_interpolate_7pts.c; else echo toom_interpolate_7pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom_interpolate_8pts_.c: toom_interpolate_8pts.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_8pts.c; then echo $(srcdir)/toom_interpolate_8pts.c; else echo toom_interpolate_8pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+udiv_qrnnd_.c: udiv_qrnnd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_qrnnd.c; then echo $(srcdir)/udiv_qrnnd.c; else echo udiv_qrnnd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+udiv_w_sdiv_.c: udiv_w_sdiv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_w_sdiv.c; then echo $(srcdir)/udiv_w_sdiv.c; else echo udiv_w_sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+xnor_n_.c: xnor_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xnor_n.c; then echo $(srcdir)/xnor_n.c; else echo xnor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+xor_n_.c: xor_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor_n.c; then echo $(srcdir)/xor_n.c; else echo xor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_.$(OBJEXT) add_.lo add_1_.$(OBJEXT) add_1_.lo add_n_.$(OBJEXT) \
+add_n_.lo addmul_1_.$(OBJEXT) addmul_1_.lo addmul_2_.$(OBJEXT) \
+addmul_2_.lo addmul_3_.$(OBJEXT) addmul_3_.lo addmul_4_.$(OBJEXT) \
+addmul_4_.lo addmul_5_.$(OBJEXT) addmul_5_.lo addmul_6_.$(OBJEXT) \
+addmul_6_.lo addmul_7_.$(OBJEXT) addmul_7_.lo addmul_8_.$(OBJEXT) \
+addmul_8_.lo and_n_.$(OBJEXT) and_n_.lo andn_n_.$(OBJEXT) andn_n_.lo \
+binvert_.$(OBJEXT) binvert_.lo cmp_.$(OBJEXT) cmp_.lo com_.$(OBJEXT) \
+com_.lo copyd_.$(OBJEXT) copyd_.lo copyi_.$(OBJEXT) copyi_.lo \
+dcpi1_bdiv_q_.$(OBJEXT) dcpi1_bdiv_q_.lo dcpi1_bdiv_qr_.$(OBJEXT) \
+dcpi1_bdiv_qr_.lo dcpi1_div_q_.$(OBJEXT) dcpi1_div_q_.lo \
+dcpi1_div_qr_.$(OBJEXT) dcpi1_div_qr_.lo dcpi1_divappr_q_.$(OBJEXT) \
+dcpi1_divappr_q_.lo dive_1_.$(OBJEXT) dive_1_.lo diveby3_.$(OBJEXT) \
+diveby3_.lo divis_.$(OBJEXT) divis_.lo divrem_.$(OBJEXT) divrem_.lo \
+divrem_1_.$(OBJEXT) divrem_1_.lo divrem_2_.$(OBJEXT) divrem_2_.lo \
+dump_.$(OBJEXT) dump_.lo fib2_ui_.$(OBJEXT) fib2_ui_.lo \
+fib_table_.$(OBJEXT) fib_table_.lo gcd_.$(OBJEXT) gcd_.lo \
+gcd_1_.$(OBJEXT) gcd_1_.lo gcdext_.$(OBJEXT) gcdext_.lo \
+get_d_.$(OBJEXT) get_d_.lo get_str_.$(OBJEXT) get_str_.lo \
+hamdist_.$(OBJEXT) hamdist_.lo hgcd_.$(OBJEXT) hgcd_.lo \
+hgcd2_.$(OBJEXT) hgcd2_.lo invert_.$(OBJEXT) invert_.lo \
+invert_limb_.$(OBJEXT) invert_limb_.lo invertappr_.$(OBJEXT) \
+invertappr_.lo ior_n_.$(OBJEXT) ior_n_.lo iorn_n_.$(OBJEXT) iorn_n_.lo \
+jacbase_.$(OBJEXT) jacbase_.lo lshift_.$(OBJEXT) lshift_.lo \
+matrix22_mul_.$(OBJEXT) matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo \
+mod_1_1_.$(OBJEXT) mod_1_1_.lo mod_1_2_.$(OBJEXT) mod_1_2_.lo \
+mod_1_3_.$(OBJEXT) mod_1_3_.lo mod_1_4_.$(OBJEXT) mod_1_4_.lo \
+mod_34lsub1_.$(OBJEXT) mod_34lsub1_.lo mode1o_.$(OBJEXT) mode1o_.lo \
+mp_bases_.$(OBJEXT) mp_bases_.lo mul_.$(OBJEXT) mul_.lo \
+mul_1_.$(OBJEXT) mul_1_.lo mul_2_.$(OBJEXT) mul_2_.lo mul_3_.$(OBJEXT) \
+mul_3_.lo mul_4_.$(OBJEXT) mul_4_.lo mul_basecase_.$(OBJEXT) \
+mul_basecase_.lo mul_fft_.$(OBJEXT) mul_fft_.lo mul_n_.$(OBJEXT) \
+mul_n_.lo mullo_basecase_.$(OBJEXT) mullo_basecase_.lo \
+mullo_n_.$(OBJEXT) mullo_n_.lo mulmod_bnm1_.$(OBJEXT) mulmod_bnm1_.lo \
+nand_n_.$(OBJEXT) nand_n_.lo neg_.$(OBJEXT) neg_.lo nior_n_.$(OBJEXT) \
+nior_n_.lo nussbaumer_mul_.$(OBJEXT) nussbaumer_mul_.lo \
+perfsqr_.$(OBJEXT) perfsqr_.lo popcount_.$(OBJEXT) popcount_.lo \
+pow_1_.$(OBJEXT) pow_1_.lo pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo \
+pre_mod_1_.$(OBJEXT) pre_mod_1_.lo random_.$(OBJEXT) random_.lo \
+random2_.$(OBJEXT) random2_.lo rootrem_.$(OBJEXT) rootrem_.lo \
+rshift_.$(OBJEXT) rshift_.lo sbpi1_bdiv_q_.$(OBJEXT) sbpi1_bdiv_q_.lo \
+sbpi1_bdiv_qr_.$(OBJEXT) sbpi1_bdiv_qr_.lo sbpi1_div_q_.$(OBJEXT) \
+sbpi1_div_q_.lo sbpi1_div_qr_.$(OBJEXT) sbpi1_div_qr_.lo \
+sbpi1_divappr_q_.$(OBJEXT) sbpi1_divappr_q_.lo scan0_.$(OBJEXT) \
+scan0_.lo scan1_.$(OBJEXT) scan1_.lo set_str_.$(OBJEXT) set_str_.lo \
+sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqr_diagonal_.$(OBJEXT) \
+sqr_diagonal_.lo sqrmod_bnm1_.$(OBJEXT) sqrmod_bnm1_.lo \
+sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) sub_.lo sub_1_.$(OBJEXT) \
+sub_1_.lo sub_n_.$(OBJEXT) sub_n_.lo submul_1_.$(OBJEXT) submul_1_.lo \
+tdiv_qr_.$(OBJEXT) tdiv_qr_.lo toom22_mul_.$(OBJEXT) toom22_mul_.lo \
+toom2_sqr_.$(OBJEXT) toom2_sqr_.lo toom32_mul_.$(OBJEXT) \
+toom32_mul_.lo toom33_mul_.$(OBJEXT) toom33_mul_.lo \
+toom3_sqr_.$(OBJEXT) toom3_sqr_.lo toom42_mul_.$(OBJEXT) \
+toom42_mul_.lo toom43_mul_.$(OBJEXT) toom43_mul_.lo \
+toom44_mul_.$(OBJEXT) toom44_mul_.lo toom4_sqr_.$(OBJEXT) \
+toom4_sqr_.lo toom52_mul_.$(OBJEXT) toom52_mul_.lo \
+toom53_mul_.$(OBJEXT) toom53_mul_.lo toom62_mul_.$(OBJEXT) \
+toom62_mul_.lo toom63_mul_.$(OBJEXT) toom63_mul_.lo \
+toom6_sqr_.$(OBJEXT) toom6_sqr_.lo toom6h_mul_.$(OBJEXT) \
+toom6h_mul_.lo toom8_sqr_.$(OBJEXT) toom8_sqr_.lo \
+toom8h_mul_.$(OBJEXT) toom8h_mul_.lo toom_couple_handling_.$(OBJEXT) \
+toom_couple_handling_.lo toom_eval_dgr3_pm1_.$(OBJEXT) \
+toom_eval_dgr3_pm1_.lo toom_eval_dgr3_pm2_.$(OBJEXT) \
+toom_eval_dgr3_pm2_.lo toom_eval_pm1_.$(OBJEXT) toom_eval_pm1_.lo \
+toom_eval_pm2exp_.$(OBJEXT) toom_eval_pm2exp_.lo \
+toom_eval_pm2rexp_.$(OBJEXT) toom_eval_pm2rexp_.lo \
+toom_interpolate_12pts_.$(OBJEXT) toom_interpolate_12pts_.lo \
+toom_interpolate_16pts_.$(OBJEXT) toom_interpolate_16pts_.lo \
+toom_interpolate_5pts_.$(OBJEXT) toom_interpolate_5pts_.lo \
+toom_interpolate_6pts_.$(OBJEXT) toom_interpolate_6pts_.lo \
+toom_interpolate_7pts_.$(OBJEXT) toom_interpolate_7pts_.lo \
+toom_interpolate_8pts_.$(OBJEXT) toom_interpolate_8pts_.lo \
+udiv_qrnnd_.$(OBJEXT) udiv_qrnnd_.lo udiv_w_sdiv_.$(OBJEXT) \
+udiv_w_sdiv_.lo xnor_n_.$(OBJEXT) xnor_n_.lo xor_n_.$(OBJEXT) \
+xor_n_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# These are BUILT_SOURCES at the top-level, so normally they're built before
+# recursing into this directory.
+#
+fib_table.c:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/fib_table.c
+mp_bases.c:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/mp_bases.c
+perfsqr.h:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
+
+tune-gcd-p: gcd.c
+       $(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
+
+# .s assembler, no preprocessing.
+#
+.s.o:
+       $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+.s.obj:
+       $(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+.s.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .S assembler, preprocessed with cpp.
+#
+# It's necessary to run $(CPP) separately, since it seems not all compilers
+# recognise .S files, in particular "cc" on HP-UX 10 and 11 doesn't (and
+# will silently do nothing if given a .S).
+#
+# For .lo we need a helper script, as described below for .asm.lo.
+#
+.S.o:
+       $(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.obj:
+       $(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp="$(CPP) $(PREPROCESS_FLAGS)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .asm assembler, preprocessed with m4.
+#
+# .o and .obj are non-PIC and just need m4 followed by a compile.
+#
+# .lo is a bit tricky.  Libtool (as of version 1.5) has foo.lo as a little
+# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,
+# respectively.  It'd be asking for lots of trouble to try to create foo.lo
+# ourselves, so instead arrange to invoke libtool like a --mode=compile, but
+# with a special m4-ccas script which first m4 preprocesses, then compiles.
+# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.
+#
+# Libtool adds -DPIC when building a shared object and the .asm files look
+# for that.  But it should be noted that the other PIC flags are on occasion
+# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before
+# it accepts PIC constructs like @GOT, and gcc adds that flag only under
+# -fPIC.  (Later versions of gas are happy to accept PIC stuff any time.)
+#
+.asm.o:
+       $(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.obj:
+       $(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/mpn/README b/mpn/README

new file mode 100644 (file)

index 0000000..32fc007
--- /dev/null
+++ b/mpn/README
@@ -0,0 +1,33 @@
+Copyright 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+This directory contains all code for the mpn layer of GMP.
+
+Most subdirectories contain machine-dependent code, written in assembly or C.
+The `generic' subdirectory contains default code, used when there is no
+machine-dependent replacement for a particular machine.
+
+There is one subdirectory for each ISA family.  Note that e.g., 32-bit SPARC
+and 64-bit SPARC are very different ISA's, and thus cannot share any code.
+
+A particular compile will only use code from one subdirectory, and the
+`generic' subdirectory.  The ISA-specific subdirectories contain hierachies of
+directories for various architecture variants and implementations; the
+top-most level contains code that runs correctly on all variants.
diff --git a/mpn/a29k/add_n.s b/mpn/a29k/add_n.s

new file mode 100644 (file)

index 0000000..2d92604
--- /dev/null
+++ b/mpn/a29k/add_n.s
@@ -0,0 +1,118 @@
+; 29000 mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; s2_ptr       lr4
+; size         lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers.  gr116 is used for this purpose.  Note that
+; gr116==0 means that carry should be set.
+
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_add_n
+       .word   0x60000
+___gmpn_add_n:
+       srl     gr117,lr5,3
+       sub     gr118,gr117,1
+       jmpt    gr118,Ltail
+        constn gr116,-1                ; init cy reg
+       sub     gr117,gr117,2           ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:  mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr3
+       add     lr3,lr3,32
+       mtsrim  cr,(8-1)
+       loadm   0,0,gr104,lr4
+       add     lr4,lr4,32
+
+       subr    gr116,gr116,0           ; restore carry
+       addc    gr96,gr96,gr104
+       addc    gr97,gr97,gr105
+       addc    gr98,gr98,gr106
+       addc    gr99,gr99,gr107
+       addc    gr100,gr100,gr108
+       addc    gr101,gr101,gr109
+       addc    gr102,gr102,gr110
+       addc    gr103,gr103,gr111
+       subc    gr116,gr116,gr116       ; gr116 = not(cy)
+
+       mtsrim  cr,(8-1)
+       storem  0,0,gr96,lr2
+       jmpfdec gr117,Loop
+        add    lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+       and     lr5,lr5,(8-1)
+Ltail: sub     gr118,lr5,1             ; count for CR
+       jmpt    gr118,Lend
+        sub    gr117,lr5,2             ; count for jmpfdec
+
+       mtsr    cr,gr118
+       loadm   0,0,gr96,lr3
+       mtsr    cr,gr118
+       loadm   0,0,gr104,lr4
+
+       subr    gr116,gr116,0           ; restore carry
+
+       jmpfdec gr117,L1
+        addc   gr96,gr96,gr104
+       jmp     Lstore
+        mtsr   cr,gr118
+L1:    jmpfdec gr117,L2
+        addc   gr97,gr97,gr105
+       jmp     Lstore
+        mtsr   cr,gr118
+L2:    jmpfdec gr117,L3
+        addc   gr98,gr98,gr106
+       jmp     Lstore
+        mtsr   cr,gr118
+L3:    jmpfdec gr117,L4
+        addc   gr99,gr99,gr107
+       jmp     Lstore
+        mtsr   cr,gr118
+L4:    jmpfdec gr117,L5
+        addc   gr100,gr100,gr108
+       jmp     Lstore
+        mtsr   cr,gr118
+L5:    jmpfdec gr117,L6
+        addc   gr101,gr101,gr109
+       jmp     Lstore
+        mtsr   cr,gr118
+L6:    addc    gr102,gr102,gr110
+
+Lstore:        storem  0,0,gr96,lr2
+       subc    gr116,gr116,gr116       ; gr116 = not(cy)
+
+Lend:  jmpi    lr0
+        add    gr96,gr116,1
diff --git a/mpn/a29k/addmul_1.s b/mpn/a29k/addmul_1.s

new file mode 100644 (file)

index 0000000..fcf7fc2
--- /dev/null
+++ b/mpn/a29k/addmul_1.s
@@ -0,0 +1,111 @@
+; 29000 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; add the product to a second limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; size         lr4
+; s2_limb      lr5
+
+       .cputype 29050
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_addmul_1
+       .word   0x60000
+___gmpn_addmul_1:
+       sub     lr4,lr4,8
+       jmpt    lr4,Ltail
+        const  gr120,0                 ; init cylimb reg
+
+       srl     gr117,lr4,3             ; divide by 8
+       sub     gr117,gr117,1           ; count for jmpfdec
+
+Loop:  mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr3
+       add     lr3,lr3,32
+
+       multiplu gr104,gr96,lr5
+       multmu   gr96,gr96,lr5
+       multiplu gr105,gr97,lr5
+       multmu   gr97,gr97,lr5
+       multiplu gr106,gr98,lr5
+       multmu   gr98,gr98,lr5
+       multiplu gr107,gr99,lr5
+       multmu   gr99,gr99,lr5
+       multiplu gr108,gr100,lr5
+       multmu   gr100,gr100,lr5
+       multiplu gr109,gr101,lr5
+       multmu   gr101,gr101,lr5
+       multiplu gr110,gr102,lr5
+       multmu   gr102,gr102,lr5
+       multiplu gr111,gr103,lr5
+       multmu   gr103,gr103,lr5
+
+       add     gr104,gr104,gr120
+       addc    gr105,gr105,gr96
+       addc    gr106,gr106,gr97
+       addc    gr107,gr107,gr98
+       addc    gr108,gr108,gr99
+       addc    gr109,gr109,gr100
+       addc    gr110,gr110,gr101
+       addc    gr111,gr111,gr102
+       addc    gr120,gr103,0
+
+       mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr2
+
+       add     gr104,gr96,gr104
+       addc    gr105,gr97,gr105
+       addc    gr106,gr98,gr106
+       addc    gr107,gr99,gr107
+       addc    gr108,gr100,gr108
+       addc    gr109,gr101,gr109
+       addc    gr110,gr102,gr110
+       addc    gr111,gr103,gr111
+       addc    gr120,gr120,0
+
+       mtsrim  cr,(8-1)
+       storem  0,0,gr104,lr2
+       jmpfdec gr117,Loop
+        add    lr2,lr2,32
+
+Ltail: and     lr4,lr4,(8-1)
+       sub     gr118,lr4,1             ; count for CR
+       jmpt    gr118,Lend
+        sub    lr4,lr4,2
+       sub     lr2,lr2,4               ; offset res_ptr by one limb
+
+Loop2: load    0,0,gr116,lr3
+       add     lr3,lr3,4
+       multiplu gr117,gr116,lr5
+       multmu  gr118,gr116,lr5
+       add     lr2,lr2,4
+       load    0,0,gr119,lr2
+       add     gr117,gr117,gr120
+       addc    gr118,gr118,0
+       add     gr117,gr117,gr119
+       store   0,0,gr117,lr2
+       jmpfdec lr4,Loop2
+        addc   gr120,gr118,0
+
+Lend:  jmpi    lr0
+        or     gr96,gr120,0            ; copy
diff --git a/mpn/a29k/lshift.s b/mpn/a29k/lshift.s

new file mode 100644 (file)

index 0000000..3df6dab
--- /dev/null
+++ b/mpn/a29k/lshift.s
@@ -0,0 +1,91 @@
+; 29000 __gmpn_lshift --
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; s2_ptr       lr4
+; size         lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_lshift
+       .word   0x60000
+___gmpn_lshift:
+       sll     gr116,lr4,2
+       add     lr3,gr116,lr3
+       add     lr2,gr116,lr2
+       sub     lr3,lr3,4
+       load    0,0,gr119,lr3
+
+       subr    gr116,lr5,32
+       srl     gr96,gr119,gr116        ; return value
+       sub     lr4,lr4,1               ; actual loop count is SIZE - 1
+
+       srl     gr117,lr4,3             ; chuck count = (actual count) / 8
+       cpeq    gr118,gr117,0
+       jmpt    gr118,Ltail
+        mtsr   fc,lr5
+
+       sub     gr117,gr117,2           ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:  sub     lr3,lr3,32
+       mtsrim  cr,(8-1)
+       loadm   0,0,gr100,lr3
+
+       extract gr109,gr119,gr107
+       extract gr108,gr107,gr106
+       extract gr107,gr106,gr105
+       extract gr106,gr105,gr104
+       extract gr105,gr104,gr103
+       extract gr104,gr103,gr102
+       extract gr103,gr102,gr101
+       extract gr102,gr101,gr100
+
+       sub     lr2,lr2,32
+       mtsrim  cr,(8-1)
+       storem  0,0,gr102,lr2
+       jmpfdec gr117,Loop
+        or     gr119,gr100,0
+
+; Code for the last up-to-7 limbs.
+
+       and     lr4,lr4,(8-1)
+Ltail: cpeq    gr118,lr4,0
+       jmpt    gr118,Lend
+        sub    lr4,lr4,2               ; count for jmpfdec
+
+Loop2: sub     lr3,lr3,4
+       load    0,0,gr116,lr3
+       extract gr117,gr119,gr116
+       sub     lr2,lr2,4
+       store   0,0,gr117,lr2
+       jmpfdec lr4,Loop2
+        or     gr119,gr116,0
+
+Lend:  extract gr117,gr119,0
+       sub     lr2,lr2,4
+       jmpi    lr0
+        store  0,0,gr117,lr2
diff --git a/mpn/a29k/mul_1.s b/mpn/a29k/mul_1.s

new file mode 100644 (file)

index 0000000..a55fe3e
--- /dev/null
+++ b/mpn/a29k/mul_1.s
@@ -0,0 +1,95 @@
+; 29000 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; size         lr4
+; s2_limb      lr5
+
+       .cputype 29050
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_mul_1
+       .word   0x60000
+___gmpn_mul_1:
+       sub     lr4,lr4,8
+       jmpt    lr4,Ltail
+        const  gr120,0                 ; init cylimb reg
+
+       srl     gr117,lr4,3             ; divide by 8
+       sub     gr117,gr117,1           ; count for jmpfdec
+
+Loop:  mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr3
+       add     lr3,lr3,32
+
+       multiplu gr104,gr96,lr5
+       multmu   gr96,gr96,lr5
+       multiplu gr105,gr97,lr5
+       multmu   gr97,gr97,lr5
+       multiplu gr106,gr98,lr5
+       multmu   gr98,gr98,lr5
+       multiplu gr107,gr99,lr5
+       multmu   gr99,gr99,lr5
+       multiplu gr108,gr100,lr5
+       multmu   gr100,gr100,lr5
+       multiplu gr109,gr101,lr5
+       multmu   gr101,gr101,lr5
+       multiplu gr110,gr102,lr5
+       multmu   gr102,gr102,lr5
+       multiplu gr111,gr103,lr5
+       multmu   gr103,gr103,lr5
+
+       add     gr104,gr104,gr120
+       addc    gr105,gr105,gr96
+       addc    gr106,gr106,gr97
+       addc    gr107,gr107,gr98
+       addc    gr108,gr108,gr99
+       addc    gr109,gr109,gr100
+       addc    gr110,gr110,gr101
+       addc    gr111,gr111,gr102
+       addc    gr120,gr103,0
+
+       mtsrim  cr,(8-1)
+       storem  0,0,gr104,lr2
+       jmpfdec gr117,Loop
+        add    lr2,lr2,32
+
+Ltail: and     lr4,lr4,(8-1)
+       sub     gr118,lr4,1             ; count for CR
+       jmpt    gr118,Lend
+        sub    lr4,lr4,2
+       sub     lr2,lr2,4               ; offset res_ptr by one limb
+
+Loop2: load    0,0,gr116,lr3
+       add     lr3,lr3,4
+       multiplu gr117,gr116,lr5
+       multmu  gr118,gr116,lr5
+       add     lr2,lr2,4
+       add     gr117,gr117,gr120
+       store   0,0,gr117,lr2
+       jmpfdec lr4,Loop2
+        addc   gr120,gr118,0
+
+Lend:  jmpi    lr0
+        or     gr96,gr120,0            ; copy
diff --git a/mpn/a29k/rshift.s b/mpn/a29k/rshift.s

new file mode 100644 (file)

index 0000000..8a30867
--- /dev/null
+++ b/mpn/a29k/rshift.s
@@ -0,0 +1,87 @@
+; 29000 __gmpn_rshift --
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; s2_ptr       lr4
+; size         lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_rshift
+       .word   0x60000
+___gmpn_rshift:
+       load    0,0,gr119,lr3
+       add     lr3,lr3,4
+
+       subr    gr116,lr5,32
+       sll     gr96,gr119,gr116        ; return value
+       sub     lr4,lr4,1               ; actual loop count is SIZE - 1
+
+       srl     gr117,lr4,3             ; chuck count = (actual count) / 8
+       cpeq    gr118,gr117,0
+       jmpt    gr118,Ltail
+        mtsr   fc,gr116
+
+       sub     gr117,gr117,2           ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:  mtsrim  cr,(8-1)
+       loadm   0,0,gr100,lr3
+       add     lr3,lr3,32
+
+       extract gr98,gr100,gr119
+       extract gr99,gr101,gr100
+       extract gr100,gr102,gr101
+       extract gr101,gr103,gr102
+       extract gr102,gr104,gr103
+       extract gr103,gr105,gr104
+       extract gr104,gr106,gr105
+       extract gr105,gr107,gr106
+
+       mtsrim  cr,(8-1)
+       storem  0,0,gr98,lr2
+       add     lr2,lr2,32
+       jmpfdec gr117,Loop
+        or     gr119,gr107,0
+
+; Code for the last up-to-7 limbs.
+
+       and     lr4,lr4,(8-1)
+Ltail: cpeq    gr118,lr4,0
+       jmpt    gr118,Lend
+        sub    lr4,lr4,2               ; count for jmpfdec
+
+Loop2: load    0,0,gr100,lr3
+       add     lr3,lr3,4
+       extract gr117,gr100,gr119
+       store   0,0,gr117,lr2
+       add     lr2,lr2,4
+       jmpfdec lr4,Loop2
+        or     gr119,gr100,0
+
+Lend:  srl     gr117,gr119,lr5
+       jmpi    lr0
+        store  0,0,gr117,lr2
diff --git a/mpn/a29k/sub_n.s b/mpn/a29k/sub_n.s

new file mode 100644 (file)

index 0000000..42072a4
--- /dev/null
+++ b/mpn/a29k/sub_n.s
@@ -0,0 +1,118 @@
+; 29000 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; s2_ptr       lr4
+; size         lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers.  gr116 is used for this purpose.  Note that
+; gr116==0 means that carry should be set.
+
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_sub_n
+       .word   0x60000
+___gmpn_sub_n:
+       srl     gr117,lr5,3
+       sub     gr118,gr117,1
+       jmpt    gr118,Ltail
+        constn gr116,-1                ; init cy reg
+       sub     gr117,gr117,2           ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:  mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr3
+       add     lr3,lr3,32
+       mtsrim  cr,(8-1)
+       loadm   0,0,gr104,lr4
+       add     lr4,lr4,32
+
+       subr    gr116,gr116,0           ; restore carry
+       subc    gr96,gr96,gr104
+       subc    gr97,gr97,gr105
+       subc    gr98,gr98,gr106
+       subc    gr99,gr99,gr107
+       subc    gr100,gr100,gr108
+       subc    gr101,gr101,gr109
+       subc    gr102,gr102,gr110
+       subc    gr103,gr103,gr111
+       subc    gr116,gr116,gr116       ; gr116 = not(cy)
+
+       mtsrim  cr,(8-1)
+       storem  0,0,gr96,lr2
+       jmpfdec gr117,Loop
+        add    lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+       and     lr5,lr5,(8-1)
+Ltail: sub     gr118,lr5,1             ; count for CR
+       jmpt    gr118,Lend
+        sub    gr117,lr5,2             ; count for jmpfdec
+
+       mtsr    cr,gr118
+       loadm   0,0,gr96,lr3
+       mtsr    cr,gr118
+       loadm   0,0,gr104,lr4
+
+       subr    gr116,gr116,0           ; restore carry
+
+       jmpfdec gr117,L1
+        subc   gr96,gr96,gr104
+       jmp     Lstore
+        mtsr   cr,gr118
+L1:    jmpfdec gr117,L2
+        subc   gr97,gr97,gr105
+       jmp     Lstore
+        mtsr   cr,gr118
+L2:    jmpfdec gr117,L3
+        subc   gr98,gr98,gr106
+       jmp     Lstore
+        mtsr   cr,gr118
+L3:    jmpfdec gr117,L4
+        subc   gr99,gr99,gr107
+       jmp     Lstore
+        mtsr   cr,gr118
+L4:    jmpfdec gr117,L5
+        subc   gr100,gr100,gr108
+       jmp     Lstore
+        mtsr   cr,gr118
+L5:    jmpfdec gr117,L6
+        subc   gr101,gr101,gr109
+       jmp     Lstore
+        mtsr   cr,gr118
+L6:    subc    gr102,gr102,gr110
+
+Lstore:        storem  0,0,gr96,lr2
+       subc    gr116,gr116,gr116       ; gr116 = not(cy)
+
+Lend:  jmpi    lr0
+        add    gr96,gr116,1
diff --git a/mpn/a29k/submul_1.s b/mpn/a29k/submul_1.s

new file mode 100644 (file)

index 0000000..8a02d88
--- /dev/null
+++ b/mpn/a29k/submul_1.s
@@ -0,0 +1,114 @@
+; 29000 __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+; subtract the product from a second limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      lr2
+; s1_ptr       lr3
+; size         lr4
+; s2_limb      lr5
+
+       .cputype 29050
+       .sect .lit,lit
+       .text
+       .align  4
+       .global ___gmpn_submul_1
+       .word   0x60000
+___gmpn_submul_1:
+       sub     lr4,lr4,8
+       jmpt    lr4,Ltail
+        const  gr120,0                 ; init cylimb reg
+
+       srl     gr117,lr4,3             ; divide by 8
+       sub     gr117,gr117,1           ; count for jmpfdec
+
+Loop:  mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr3
+       add     lr3,lr3,32
+
+       multiplu gr104,gr96,lr5
+       multmu   gr96,gr96,lr5
+       multiplu gr105,gr97,lr5
+       multmu   gr97,gr97,lr5
+       multiplu gr106,gr98,lr5
+       multmu   gr98,gr98,lr5
+       multiplu gr107,gr99,lr5
+       multmu   gr99,gr99,lr5
+       multiplu gr108,gr100,lr5
+       multmu   gr100,gr100,lr5
+       multiplu gr109,gr101,lr5
+       multmu   gr101,gr101,lr5
+       multiplu gr110,gr102,lr5
+       multmu   gr102,gr102,lr5
+       multiplu gr111,gr103,lr5
+       multmu   gr103,gr103,lr5
+
+       add     gr104,gr104,gr120
+       addc    gr105,gr105,gr96
+       addc    gr106,gr106,gr97
+       addc    gr107,gr107,gr98
+       addc    gr108,gr108,gr99
+       addc    gr109,gr109,gr100
+       addc    gr110,gr110,gr101
+       addc    gr111,gr111,gr102
+       addc    gr120,gr103,0
+
+       mtsrim  cr,(8-1)
+       loadm   0,0,gr96,lr2
+
+       sub     gr96,gr96,gr104
+       subc    gr97,gr97,gr105
+       subc    gr98,gr98,gr106
+       subc    gr99,gr99,gr107
+       subc    gr100,gr100,gr108
+       subc    gr101,gr101,gr109
+       subc    gr102,gr102,gr110
+       subc    gr103,gr103,gr111
+
+       add     gr104,gr103,gr111       ; invert carry from previous sub
+       addc    gr120,gr120,0
+
+       mtsrim  cr,(8-1)
+       storem  0,0,gr96,lr2
+       jmpfdec gr117,Loop
+        add    lr2,lr2,32
+
+Ltail: and     lr4,lr4,(8-1)
+       sub     gr118,lr4,1             ; count for CR
+       jmpt    gr118,Lend
+        sub    lr4,lr4,2
+       sub     lr2,lr2,4               ; offset res_ptr by one limb
+
+Loop2: load    0,0,gr116,lr3
+       add     lr3,lr3,4
+       multiplu gr117,gr116,lr5
+       multmu  gr118,gr116,lr5
+       add     lr2,lr2,4
+       load    0,0,gr119,lr2
+       add     gr117,gr117,gr120
+       addc    gr118,gr118,0
+       sub     gr119,gr119,gr117
+       add     gr104,gr119,gr117       ; invert carry from previous sub
+       store   0,0,gr119,lr2
+       jmpfdec lr4,Loop2
+        addc   gr120,gr118,0
+
+Lend:  jmpi    lr0
+        or     gr96,gr120,0            ; copy
diff --git a/mpn/a29k/udiv.s b/mpn/a29k/udiv.s

new file mode 100644 (file)

index 0000000..82c3925
--- /dev/null
+++ b/mpn/a29k/udiv.s
@@ -0,0 +1,28 @@
+; Copyright 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+       .sect .lit,lit
+       .text
+       .align 4
+       .global ___udiv_qrnnd
+       .word 0x60000
+___udiv_qrnnd:
+       mtsr q,lr3
+       dividu gr96,lr4,lr5
+       mfsr gr116,q
+       jmpi lr0
+       store 0,0,gr116,lr2
diff --git a/mpn/a29k/umul.s b/mpn/a29k/umul.s

new file mode 100644 (file)

index 0000000..02c34e9
--- /dev/null
+++ b/mpn/a29k/umul.s
@@ -0,0 +1,27 @@
+; Copyright 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+       .sect .lit,lit
+       .text
+       .align 4
+       .global ___umul_ppmm
+       .word 0x50000
+___umul_ppmm:
+       multiplu gr116,lr3,lr4
+       multmu gr96,lr3,lr4
+       jmpi lr0
+       store 0,0,gr116,lr2
diff --git a/mpn/alpha/README b/mpn/alpha/README

new file mode 100644 (file)

index 0000000..abefaa6
--- /dev/null
+++ b/mpn/alpha/README
@@ -0,0 +1,198 @@
+Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions optimized for DEC Alpha processors.
+
+ALPHA ASSEMBLY RULES AND REGULATIONS
+
+The `.prologue N' pseudo op marks the end of instruction that needs special
+handling by unwinding.  It also says whether $27 is really needed for computing
+the gp.  The `.mask M' pseudo op says which registers are saved on the stack,
+and at what offset in the frame.
+
+Cray T3 code is very very different...
+
+"$6" / "$f6" etc is the usual syntax for registers, but on Unicos instead "r6"
+/ "f6" is required.  We use the "r6" / "f6" forms, and have m4 defines expand
+them to "$6" or "$f6" where necessary.
+
+"0x" introduces a hex constant in gas and DEC as, but on Unicos "^X" is
+required.  The X() macro accommodates this difference.
+
+"cvttqc" is required by DEC as, "cvttq/c" is required by Unicos, and gas will
+accept either.  We use cvttqc and have an m4 define expand to cvttq/c where
+necessary.
+
+"not" as an alias for "ornot r31, ..." is available in gas and DEC as, but not
+the Unicos assembler.  The full "ornot" must be used.
+
+"unop" is not available in Unicos.  We make an m4 define to the usual "ldq_u
+r31,0(r30)", and in fact use that define on all systems since it comes out the
+same.
+
+"!literal!123" etc explicit relocations as per Tru64 4.0 are apparently not
+available in older alpha assemblers (including gas prior to 2.12), according to
+the GCC manual, so the assembler macro forms must be used (eg. ldgp).
+
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+EV4
+
+1. This chip has very limited store bandwidth.  The on-chip L1 cache is write-
+   through, and a cache line is transferred from the store buffer to the off-
+   chip L2 in as much 15 cycles on most systems.  This delay hurts mpn_add_n,
+   mpn_sub_n, mpn_lshift, and mpn_rshift.
+
+2. Pairing is possible between memory instructions and integer arithmetic
+   instructions.
+
+3. mulq and umulh are documented to have a latency of 23 cycles, but 2 of these
+   cycles are pipelined.  Thus, multiply instructions can be issued at a rate
+   of one each 21st cycle.
+
+EV5
+
+1. The memory bandwidth of this chip is good, both for loads and stores.  The
+   L1 cache can handle two loads or one store per cycle, but two cycles after a
+   store, no ld can issue.
+
+2. mulq has a latency of 12 cycles and an issue rate of 1 each 8th cycle.
+   umulh has a latency of 14 cycles and an issue rate of 1 each 10th cycle.
+   (Note that published documentation gets these numbers slightly wrong.)
+
+3. mpn_add_n.  With 4-fold unrolling, we need 37 instructions, whereof 12
+   are memory operations.  This will take at least
+       ceil(37/2) [dual issue] + 1 [taken branch] = 19 cycles
+   We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data
+   cache cycles, which should be completely hidden in the 19 issue cycles.
+   The computation is inherently serial, with these dependencies:
+
+              ldq  ldq
+                \  /\
+         (or)   addq |
+          |\   /   \ |
+          | addq  cmpult
+           \  |     |
+            cmpult  |
+                \  /
+                 or
+
+   I.e., 3 operations are needed between carry-in and carry-out, making 12
+   cycles the absolute minimum for the 4 limbs.  We could replace the `or' with
+   a cmoveq/cmovne, which could issue one cycle earlier that the `or', but that
+   might waste a cycle on EV4.  The total depth remain unaffected, since cmov
+   has a latency of 2 cycles.
+
+     addq
+     /   \
+   addq  cmpult
+     |      \
+   cmpult -> cmovne
+
+  Montgomery has a slightly different way of computing carry that requires one
+  less instruction, but has depth 4 (instead of the current 3).  Since the code
+  is currently instruction issue bound, Montgomery's idea should save us 1/2
+  cycle per limb, or bring us down to a total of 17 cycles or 4.25 cycles/limb.
+  Unfortunately, this method will not be good for the EV6.
+
+4. addmul_1 and friends: We previously had a scheme for splitting the single-
+   limb operand in 21-bits chunks and the multi-limb operand in 32-bit chunks,
+   and then use FP operations for every 2nd multiply, and integer operations
+   for every 2nd multiply.
+
+   But it seems much better to split the single-limb operand in 16-bit chunks,
+   since we save many integer shifts and adds that way.  See powerpc64/README
+   for some more details.
+
+EV6
+
+Here we have a really parallel pipeline, capable of issuing up to 4 integer
+instructions per cycle.  In actual practice, it is never possible to sustain
+more than 3.5 integer insns/cycle due to rename register shortage.  One integer
+multiply instruction can issue each cycle.  To get optimal speed, we need to
+pretend we are vectorizing the code, i.e., minimize the depth of recurrences.
+
+There are two dependencies to watch out for.  1) Address arithmetic
+dependencies, and 2) carry propagation dependencies.
+
+We can avoid serializing due to address arithmetic by unrolling loops, so that
+addresses don't depend heavily on an index variable.  Avoiding serializing
+because of carry propagation is trickier; the ultimate performance of the code
+will be determined of the number of latency cycles it takes from accepting
+carry-in to a vector point until we can generate carry-out.
+
+Most integer instructions can execute in either the L0, U0, L1, or U1
+pipelines.  Shifts only execute in U0 and U1, and multiply only in U1.
+
+CMOV instructions split into two internal instructions, CMOV1 and CMOV2.  CMOV
+split the mapping process (see pg 2-26 in cmpwrgd.pdf), suggesting the CMOV
+should always be placed as the last instruction of an aligned 4 instruction
+block, or perhaps simply avoided.
+
+Perhaps the most important issue is the latency between the L0/U0 and L1/U1
+clusters; a result obtained on either cluster has an extra cycle of latency for
+consumers in the opposite cluster.  Because of the dynamic nature of the
+implementation, it is hard to predict where an instruction will execute.
+
+
+
+REFERENCES
+
+"Alpha Architecture Handbook", version 4, Compaq, October 1998, order number
+EC-QD2KC-TE.
+
+"Alpha 21164 Microprocessor Hardware Reference Manual", Compaq, December 1998,
+order number EC-QP99C-TE.
+
+"Alpha 21264/EV67 Microprocessor Hardware Reference Manual", revision 1.4,
+Compaq, September 2000, order number DS-0028B-TE.
+
+"Compiler Writer's Guide for the Alpha 21264", Compaq, June 1999, order number
+EC-RJ66A-TE.
+
+All of the above are available online from
+
+  http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+  ftp://ftp.compaq.com/pub/products/alphaCPUdocs
+
+"Tru64 Unix Assembly Language Programmer's Guide", Compaq, March 1996, part
+number AA-PS31D-TE.
+
+"Digital UNIX Calling Standard for Alpha Systems", Digital Equipment Corp,
+March 1996, part number AA-PY8AC-TE.
+
+The above are available online,
+
+  http://h30097.www3.hp.com/docs/pub_page/V40F_DOCS.HTM
+
+(Dunno what h30097 means in this URL, but if it moves try searching for "tru64
+online documentation" from the main www.hp.com page.)
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 79
+End:
diff --git a/mpn/alpha/add_n.asm b/mpn/alpha/add_n.asm

new file mode 100644 (file)

index 0000000..e24c3cb
--- /dev/null
+++ b/mpn/alpha/add_n.asm
@@ -0,0 +1,146 @@
+dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     4.75
+C EV6:     3
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r16
+dnl  s1_ptr    r17
+dnl  s2_ptr    r18
+dnl  size      r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       bis     r31,r31,r25             C clear cy
+       subq    r19,4,r19               C decr loop cnt
+       blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+       ldq     r0,0(r18)
+       ldq     r4,0(r17)
+       ldq     r1,8(r18)
+       ldq     r5,8(r17)
+       addq    r17,32,r17              C update s1_ptr
+       ldq     r2,16(r18)
+       addq    r0,r4,r20               C 1st main add
+       ldq     r3,24(r18)
+       subq    r19,4,r19               C decr loop cnt
+       ldq     r6,-16(r17)
+       cmpult  r20,r0,r25              C compute cy from last add
+       ldq     r7,-8(r17)
+       addq    r1,r5,r28               C 2nd main add
+       addq    r18,32,r18              C update s2_ptr
+       addq    r28,r25,r21             C 2nd carry add
+       cmpult  r28,r5,r8               C compute cy from last add
+       blt     r19,$Lend1              C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+       ALIGN(16)
+$Loop: cmpult  r21,r28,r25             C compute cy from last add
+       ldq     r0,0(r18)
+       bis     r8,r25,r25              C combine cy from the two adds
+       ldq     r1,8(r18)
+       addq    r2,r6,r28               C 3rd main add
+       ldq     r4,0(r17)
+       addq    r28,r25,r22             C 3rd carry add
+       ldq     r5,8(r17)
+       cmpult  r28,r6,r8               C compute cy from last add
+       cmpult  r22,r28,r25             C compute cy from last add
+       stq     r20,0(r16)
+       bis     r8,r25,r25              C combine cy from the two adds
+       stq     r21,8(r16)
+       addq    r3,r7,r28               C 4th main add
+       addq    r28,r25,r23             C 4th carry add
+       cmpult  r28,r7,r8               C compute cy from last add
+       cmpult  r23,r28,r25             C compute cy from last add
+               addq    r17,32,r17              C update s1_ptr
+       bis     r8,r25,r25              C combine cy from the two adds
+               addq    r16,32,r16              C update res_ptr
+       addq    r0,r4,r28               C 1st main add
+       ldq     r2,16(r18)
+       addq    r25,r28,r20             C 1st carry add
+       ldq     r3,24(r18)
+       cmpult  r28,r4,r8               C compute cy from last add
+       ldq     r6,-16(r17)
+       cmpult  r20,r28,r25             C compute cy from last add
+       ldq     r7,-8(r17)
+       bis     r8,r25,r25              C combine cy from the two adds
+       subq    r19,4,r19               C decr loop cnt
+       stq     r22,-16(r16)
+       addq    r1,r5,r28               C 2nd main add
+       stq     r23,-8(r16)
+       addq    r25,r28,r21             C 2nd carry add
+               addq    r18,32,r18              C update s2_ptr
+       cmpult  r28,r5,r8               C compute cy from last add
+       bge     r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1:        cmpult  r21,r28,r25             C compute cy from last add
+       bis     r8,r25,r25              C combine cy from the two adds
+       addq    r2,r6,r28               C 3rd main add
+       addq    r28,r25,r22             C 3rd carry add
+       cmpult  r28,r6,r8               C compute cy from last add
+       cmpult  r22,r28,r25             C compute cy from last add
+       stq     r20,0(r16)
+       bis     r8,r25,r25              C combine cy from the two adds
+       stq     r21,8(r16)
+       addq    r3,r7,r28               C 4th main add
+       addq    r28,r25,r23             C 4th carry add
+       cmpult  r28,r7,r8               C compute cy from last add
+       cmpult  r23,r28,r25             C compute cy from last add
+       bis     r8,r25,r25              C combine cy from the two adds
+       addq    r16,32,r16              C update res_ptr
+       stq     r22,-16(r16)
+       stq     r23,-8(r16)
+$Lend2:        addq    r19,4,r19               C restore loop cnt
+       beq     r19,$Lret
+C Start software pipeline for 2nd loop
+       ldq     r0,0(r18)
+       ldq     r4,0(r17)
+       subq    r19,1,r19
+       beq     r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+       ALIGN(16)
+$Loop0:        addq    r0,r4,r28               C main add
+       ldq     r0,8(r18)
+       cmpult  r28,r4,r8               C compute cy from last add
+       ldq     r4,8(r17)
+       addq    r28,r25,r20             C carry add
+       addq    r18,8,r18
+       addq    r17,8,r17
+       stq     r20,0(r16)
+       cmpult  r20,r28,r25             C compute cy from last add
+       subq    r19,1,r19               C decr loop cnt
+       bis     r8,r25,r25              C combine cy from the two adds
+       addq    r16,8,r16
+       bne     r19,$Loop0
+$Lend0:        addq    r0,r4,r28               C main add
+       addq    r28,r25,r20             C carry add
+       cmpult  r28,r4,r8               C compute cy from last add
+       cmpult  r20,r28,r25             C compute cy from last add
+       stq     r20,0(r16)
+       bis     r8,r25,r25              C combine cy from the two adds
+
+$Lret: bis     r25,r31,r0              C return cy
+       ret     r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/mpn/alpha/addmul_1.asm b/mpn/alpha/addmul_1.asm

new file mode 100644 (file)

index 0000000..22c41a5
--- /dev/null
+++ b/mpn/alpha/addmul_1.asm
@@ -0,0 +1,88 @@
+dnl Alpha mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     42
+C EV5:     18
+C EV6:      7
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+C  vl  r19
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       ldq     r2,0(r17)       C r2 = s1_limb
+       addq    r17,8,r17       C s1_ptr++
+       subq    r18,1,r18       C size--
+       mulq    r2,r19,r3       C r3 = prod_low
+       ldq     r5,0(r16)       C r5 = *res_ptr
+       umulh   r2,r19,r0       C r0 = prod_high
+       beq     r18,$Lend1      C jump if size was == 1
+       ldq     r2,0(r17)       C r2 = s1_limb
+       addq    r17,8,r17       C s1_ptr++
+       subq    r18,1,r18       C size--
+       addq    r5,r3,r3
+       cmpult  r3,r5,r4
+       stq     r3,0(r16)
+       addq    r16,8,r16       C res_ptr++
+       beq     r18,$Lend2      C jump if size was == 2
+
+       ALIGN(8)
+$Loop: mulq    r2,r19,r3       C r3 = prod_low
+       ldq     r5,0(r16)       C r5 = *res_ptr
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       subq    r18,1,r18       C size--
+       umulh   r2,r19,r4       C r4 = cy_limb
+       ldq     r2,0(r17)       C r2 = s1_limb
+       addq    r17,8,r17       C s1_ptr++
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       addq    r5,r3,r3
+       cmpult  r3,r5,r5
+       stq     r3,0(r16)
+       addq    r16,8,r16       C res_ptr++
+       addq    r5,r0,r0        C combine carries
+       bne     r18,$Loop
+
+$Lend2:        mulq    r2,r19,r3       C r3 = prod_low
+       ldq     r5,0(r16)       C r5 = *res_ptr
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       umulh   r2,r19,r4       C r4 = cy_limb
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       addq    r5,r3,r3
+       cmpult  r3,r5,r5
+       stq     r3,0(r16)
+       addq    r5,r0,r0        C combine carries
+       addq    r4,r0,r0        C cy_limb = prod_high + cy
+       ret     r31,(r26),1
+$Lend1:        addq    r5,r3,r3
+       cmpult  r3,r5,r5
+       stq     r3,0(r16)
+       addq    r0,r5,r0
+       ret     r31,(r26),1
+EPILOGUE(mpn_addmul_1)
+ASM_END()
diff --git a/mpn/alpha/alpha-defs.m4 b/mpn/alpha/alpha-defs.m4

new file mode 100644 (file)

index 0000000..b2f9a24
--- /dev/null
+++ b/mpn/alpha/alpha-defs.m4
@@ -0,0 +1,96 @@
+divert(-1)
+
+dnl  m4 macros for Alpha assembler.
+
+dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Usage: ASSERT([reg] [,code])
+dnl
+dnl  Require that the given reg is non-zero after executing the test code.
+dnl  For example,
+dnl
+dnl         ASSERT(r8,
+dnl         `       cmpult r16, r17, r8')
+dnl
+dnl  If the register argument is empty then nothing is tested, the code is
+dnl  just executed.  This can be used for setups required by later ASSERTs.
+dnl  If the code argument is omitted then the register is just tested, with
+dnl  no special setup code.
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+`ifelse(`$2',,,`$2')
+ifelse(`$1',,,
+`      bne     $1, L(ASSERTok`'ASSERT_label_counter)
+       .long   0       C halt
+L(ASSERTok`'ASSERT_label_counter):
+define(`ASSERT_label_counter',eval(ASSERT_label_counter+1))
+')
+')')
+define(`ASSERT_label_counter',1)
+
+
+dnl  Usage: bigend(`code')
+dnl
+dnl  Emit the given code only for a big-endian system, like Unicos.  This
+dnl  can be used for instance for extra stuff needed by extwl.
+
+define(bigend,
+m4_assert_numargs(1)
+`ifdef(`HAVE_LIMB_BIG_ENDIAN',`$1',
+`ifdef(`HAVE_LIMB_LITTLE_ENDIAN',`',
+`m4_error(`Cannot assemble, unknown limb endianness')')')')
+
+
+dnl  Usage: bwx_available_p
+dnl
+dnl  Evaluate to 1 if the BWX byte memory instructions are available, or to
+dnl  0 if not.
+dnl
+dnl  Listing the chips which do have BWX means anything we haven't looked at
+dnl  will use safe non-BWX code.  The only targets without BWX currently are
+dnl  plain alpha (ie. ev4) and alphaev5.
+
+define(bwx_available_p,
+m4_assert_numargs(-1)
+`m4_ifdef_anyof_p(
+       `HAVE_HOST_CPU_alphaev56',
+       `HAVE_HOST_CPU_alphapca56',
+       `HAVE_HOST_CPU_alphapca57',
+       `HAVE_HOST_CPU_alphaev6',
+       `HAVE_HOST_CPU_alphaev67',
+       `HAVE_HOST_CPU_alphaev68',
+       `HAVE_HOST_CPU_alphaev69',
+       `HAVE_HOST_CPU_alphaev7',
+       `HAVE_HOST_CPU_alphaev79')')
+
+
+dnl  Usage: unop
+dnl
+dnl  The Cray Unicos assembler lacks unop, so give the equivalent ldq_u
+dnl  explicitly.
+
+define(unop,
+m4_assert_numargs(-1)
+`ldq_u r31, 0(r30)')
+
+
+divert
diff --git a/mpn/alpha/aorslsh1_n.asm b/mpn/alpha/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..3694f78
--- /dev/null
+++ b/mpn/alpha/aorslsh1_n.asm
@@ -0,0 +1,234 @@
+dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl  Copyright 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    12.5
+C EV5:     6.25
+C EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
+
+C TODO
+C  * Write special version for ev6, as this is a slowdown for 100 < n < 2200
+C    compared to separate mpn_lshift and mpn_add_n.
+C  * Use addq instead of sll for left shift, and similarly cmplt instead of srl
+C    for right shift.
+
+dnl  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`vp',`r18')
+define(`n', `r19')
+
+define(`u0', `r8')
+define(`u1', `r1')
+define(`u2', `r2')
+define(`u3', `r3')
+define(`v0', `r4')
+define(`v1', `r5')
+define(`v2', `r6')
+define(`v3', `r7')
+
+define(`cy0', `r0')
+define(`cy1', `r20')
+define(`cy', `r22')
+define(`rr', `r24')
+define(`ps', `r25')
+define(`sl', `r28')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADDSUB,       addq)
+  define(CARRY,       `cmpult $1,$2,$3')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADDSUB,       subq)
+  define(CARRY,       `cmpult $2,$1,$3')
+  define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+       lda     n, -4(n)
+       bis     r31, r31, cy1
+       and     n, 3, r1
+       beq     r1, $Lb00
+       cmpeq   r1, 1, r2
+       bne     r2, $Lb01
+       cmpeq   r1, 2, r2
+       bne     r2, $Lb10
+$Lb11: C n = 3, 7, 11, ...
+       ldq     v0, 0(vp)
+       ldq     u0, 0(up)
+       ldq     v1, 8(vp)
+       ldq     u1, 8(up)
+       ldq     v2, 16(vp)
+       ldq     u2, 16(up)
+       lda     vp, 24(vp)
+       lda     up, 24(up)
+       bge     n, $Loop
+       br      r31, $Lcj3
+$Lb10: C n = 2, 6, 10, ...
+       bis     r31, r31, cy0
+       ldq     v1, 0(vp)
+       ldq     u1, 0(up)
+       ldq     v2, 8(vp)
+       ldq     u2, 8(up)
+       lda     rp, -8(rp)
+       blt     n, $Lcj2
+       ldq     v3, 16(vp)
+       ldq     u3, 16(up)
+       lda     vp, 48(vp)
+       lda     up, 16(up)
+       br      r31, $LL10
+$Lb01: C n = 1, 5, 9, ...
+       ldq     v2, 0(vp)
+       ldq     u2, 0(up)
+       lda     rp, -16(rp)
+       blt     n, $Lcj1
+       ldq     v3, 8(vp)
+       ldq     u3, 8(up)
+       ldq     v0, 16(vp)
+       ldq     u0, 16(up)
+       lda     vp, 40(vp)
+       lda     up, 8(up)
+       lda     rp, 32(rp)
+       br      r31, $LL01
+$Lb00: C n = 4, 8, 12, ...
+       bis     r31, r31, cy0
+       ldq     v3, 0(vp)
+       ldq     u3, 0(up)
+       ldq     v0, 8(vp)
+       ldq     u0, 8(up)
+       ldq     v1, 16(vp)
+       ldq     u1, 16(up)
+       lda     vp, 32(vp)
+       lda     rp, 8(rp)
+       br      r31, $LL00x
+       ALIGN(16)
+C 0
+$Loop: sll     v0, 1, sl       C left shift vlimb
+       ldq     v3, 0(vp)
+C 1
+       ADDSUB  u0, sl, ps      C ulimb + (vlimb << 1)
+       ldq     u3, 0(up)
+C 2
+       ADDSUB  ps, cy1, rr     C consume carry from previous operation
+       srl     v0, 63, cy0     C carry out #1
+C 3
+       CARRY(  ps, u0, cy)     C carry out #2
+       stq     rr, 0(rp)
+C 4
+       addq    cy, cy0, cy0    C combine carry out #1 and #2
+       CARRY(  rr, ps, cy)     C carry out #3
+C 5
+       addq    cy, cy0, cy0    C final carry out
+       lda     vp, 32(vp)      C bookkeeping
+C 6
+$LL10: sll     v1, 1, sl
+       ldq     v0, -24(vp)
+C 7
+       ADDSUB  u1, sl, ps
+       ldq     u0, 8(up)
+C 8
+       ADDSUB  ps, cy0, rr
+       srl     v1, 63, cy1
+C 9
+       CARRY(  ps, u1, cy)
+       stq     rr, 8(rp)
+C 10
+       addq    cy, cy1, cy1
+       CARRY(  rr, ps, cy)
+C 11
+       addq    cy, cy1, cy1
+       lda     rp, 32(rp)      C bookkeeping
+C 12
+$LL01: sll     v2, 1, sl
+       ldq     v1, -16(vp)
+C 13
+       ADDSUB  u2, sl, ps
+       ldq     u1, 16(up)
+C 14
+       ADDSUB  ps, cy1, rr
+       srl     v2, 63, cy0
+C 15
+       CARRY(  ps, u2, cy)
+       stq     rr, -16(rp)
+C 16
+       addq    cy, cy0, cy0
+       CARRY(  rr, ps, cy)
+C 17
+       addq    cy, cy0, cy0
+$LL00x:        lda     up, 32(up)      C bookkeeping
+C 18
+       sll     v3, 1, sl
+       ldq     v2, -8(vp)
+C 19
+       ADDSUB  u3, sl, ps
+       ldq     u2, -8(up)
+C 20
+       ADDSUB  ps, cy0, rr
+       srl     v3, 63, cy1
+C 21
+       CARRY(  ps, u3, cy)
+       stq     rr, -8(rp)
+C 22
+       addq    cy, cy1, cy1
+       CARRY(  rr, ps, cy)
+C 23
+       addq    cy, cy1, cy1
+       lda     n, -4(n)        C bookkeeping
+C 24
+       bge     n, $Loop
+
+$Lcj3: sll     v0, 1, sl
+       ADDSUB  u0, sl, ps
+       ADDSUB  ps, cy1, rr
+       srl     v0, 63, cy0
+       CARRY(  ps, u0, cy)
+       stq     rr, 0(rp)
+       addq    cy, cy0, cy0
+       CARRY(  rr, ps, cy)
+       addq    cy, cy0, cy0
+
+$Lcj2: sll     v1, 1, sl
+       ADDSUB  u1, sl, ps
+       ADDSUB  ps, cy0, rr
+       srl     v1, 63, cy1
+       CARRY(  ps, u1, cy)
+       stq     rr, 8(rp)
+       addq    cy, cy1, cy1
+       CARRY(  rr, ps, cy)
+       addq    cy, cy1, cy1
+
+$Lcj1: sll     v2, 1, sl
+       ADDSUB  u2, sl, ps
+       ADDSUB  ps, cy1, rr
+       srl     v2, 63, cy0
+       CARRY(  ps, u2, cy)
+       stq     rr, 16(rp)
+       addq    cy, cy0, cy0
+       CARRY(  rr, ps, cy)
+       addq    cy, cy0, cy0
+
+       ret     r31,(r26),1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/bdiv_dbm1c.asm b/mpn/alpha/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..e5f11db
--- /dev/null
+++ b/mpn/alpha/bdiv_dbm1c.asm
@@ -0,0 +1,271 @@
+dnl  Alpha mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     42
+C EV5:     18
+C EV6:      3
+
+C TODO
+C  * Try less unrolling, 2-way should give the same performance.
+C  * Optimize feed-in and wind-down code, for speed, and perhaps further for
+C    code size.
+C  * This runs optimally given the algorithm, r8 is on a 3 operation recurrency
+C    path.  We have not tried very hard to find a better algorithm.  Perhaps
+C    it would be a good task for the GNU superoptimizer.
+
+C INPUT PARAMETERS
+define(`rp', `r16')
+define(`up', `r17')
+define(`n',  `r18')
+define(`bd', `r19')
+define(`cy', `r19')
+
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+       mov     r20, r8
+
+       ldq     r24, 0(r17)
+       and     r18, 3, r28
+       lda     r18, -4(r18)
+       beq     r28, L(b0)
+       cmpeq   r28, 1, r21
+       bne     r21, L(b1)
+       cmpeq   r28, 2, r21
+       bne     r21, L(b2)
+
+
+L(b3): ldq     r2, 8(r17)
+       ldq     r3, 16(r17)
+       bgt     r18, L(gt3)
+
+       mulq    r24, r19, r5    C U1
+       umulh   r24, r19, r21   C U1
+       mulq    r2, r19, r6     C U1
+       umulh   r2, r19, r22    C U1
+       mulq    r3, r19, r7     C U1
+       umulh   r3, r19, r23    C U1
+       lda     r16, -32(r16)
+       br      L(cj3)
+
+L(gt3):        ldq     r0, 24(r17)
+       mulq    r24, r19, r5    C U1
+       umulh   r24, r19, r21   C U1
+       ldq     r1, 32(r17)
+       mulq    r2, r19, r6     C U1
+       umulh   r2, r19, r22    C U1
+       ldq     r2, 40(r17)
+       mulq    r3, r19, r7     C U1
+       umulh   r3, r19, r23    C U1
+       ldq     r3, 48(r17)
+       lda     r18, -4(r18)
+       lda     r17, 56(r17)
+       mulq    r0, r19, r4     C U1
+       bgt     r18, L(L3)
+
+       br      L(cj7)
+
+
+L(b2): ldq     r3, 8(r17)
+       bgt     r18, L(gt2)
+
+       mulq    r24, r19, r6    C U1
+       umulh   r24, r19, r22   C U1
+       mulq    r3, r19, r7     C U1
+       umulh   r3, r19, r23    C U1
+       lda     r16, -40(r16)
+       br      L(cj2)
+
+L(gt2):        ldq     r0, 16(r17)
+       ldq     r1, 24(r17)
+       mulq    r24, r19, r6    C U1
+       umulh   r24, r19, r22   C U1
+       ldq     r2, 32(r17)
+       mulq    r3, r19, r7     C U1
+       umulh   r3, r19, r23    C U1
+       ldq     r3, 40(r17)
+       lda     r18, -4(r18)
+       lda     r17, 48(r17)
+       mulq    r0, r19, r4     C U1
+       umulh   r0, r19, r20    C U1
+       lda     r16, -8(r16)
+       bgt     r18, L(gt6)
+
+       mulq    r1, r19, r5     C U1
+       br      L(cj6)
+
+L(gt6):        ldq     r0, 0(r17)
+       mulq    r1, r19, r5     C U1
+       br      L(L2)
+
+
+L(b1): bgt     r18, L(gt1)
+
+       mulq    r24, r19, r7    C U1
+       umulh   r24, r19, r23   C U1
+       lda     r16, -48(r16)
+       br      L(cj1)
+
+L(gt1):        ldq     r0, 8(r17)
+       ldq     r1, 16(r17)
+       ldq     r2, 24(r17)
+       mulq    r24, r19, r7    C U1
+       umulh   r24, r19, r23   C U1
+       ldq     r3, 32(r17)
+       lda     r18, -4(r18)
+       lda     r17, 40(r17)
+       mulq    r0, r19, r4     C U1
+       umulh   r0, r19, r20    C U1
+       lda     r16, -16(r16)
+       bgt     r18, L(gt5)
+
+       mulq    r1, r19, r5     C U1
+       umulh   r1, r19, r21    C U1
+       mulq    r2, r19, r6     C U1
+       br      L(cj5)
+
+L(gt5):        ldq     r0, 0(r17)
+       mulq    r1, r19, r5     C U1
+       umulh   r1, r19, r21    C U1
+       ldq     r1, 8(r17)
+       mulq    r2, r19, r6     C U1
+       br      L(L1)
+
+
+L(b0): ldq     r1, 8(r17)
+       ldq     r2, 16(r17)
+       ldq     r3, 24(r17)
+       lda     r17, 32(r17)
+       lda     r16, -24(r16)
+       mulq    r24, r19, r4    C U1
+       umulh   r24, r19, r20   C U1
+       bgt     r18, L(gt4)
+
+       mulq    r1, r19, r5     C U1
+       umulh   r1, r19, r21    C U1
+       mulq    r2, r19, r6     C U1
+       umulh   r2, r19, r22    C U1
+       mulq    r3, r19, r7     C U1
+       br      L(cj4)
+
+L(gt4):        ldq     r0, 0(r17)
+       mulq    r1, r19, r5     C U1
+       umulh   r1, r19, r21    C U1
+       ldq     r1, 8(r17)
+       mulq    r2, r19, r6     C U1
+       umulh   r2, r19, r22    C U1
+       ldq     r2, 16(r17)
+       mulq    r3, r19, r7     C U1
+       br      L(L0)
+
+C *** MAIN LOOP START ***
+       ALIGN(16)
+L(top):        mulq    r0, r19, r4     C U1
+       subq    r8, r28, r8
+L(L3): umulh   r0, r19, r20    C U1
+       cmpult  r8, r5, r28
+       ldq     r0, 0(r17)
+       subq    r8, r5, r8
+       addq    r21, r28, r28
+       stq     r8, 0(r16)
+
+       mulq    r1, r19, r5     C U1
+       subq    r8, r28, r8
+L(L2): umulh   r1, r19, r21    C U1
+       cmpult  r8, r6, r28
+       ldq     r1, 8(r17)
+       subq    r8, r6, r8
+       addq    r22, r28, r28
+       stq     r8, 8(r16)
+
+       mulq    r2, r19, r6     C U1
+       subq    r8, r28, r8
+L(L1): umulh   r2, r19, r22    C U1
+       cmpult  r8, r7, r28
+       ldq     r2, 16(r17)
+       subq    r8, r7, r8
+       addq    r23, r28, r28
+       stq     r8, 16(r16)
+
+       mulq    r3, r19, r7     C U1
+       subq    r8, r28, r8
+L(L0): umulh   r3, r19, r23    C U1
+       cmpult  r8, r4, r28
+       ldq     r3, 24(r17)
+       subq    r8, r4, r8
+       addq    r20, r28, r28
+       stq     r8, 24(r16)
+
+       lda     r18, -4(r18)
+       lda     r17, 32(r17)
+       lda     r16, 32(r16)
+       bgt     r18, L(top)
+C *** MAIN LOOP END ***
+
+       mulq    r0, r19, r4     C U1
+       subq    r8, r28, r8
+L(cj7):        umulh   r0, r19, r20    C U1
+       cmpult  r8, r5, r28
+       subq    r8, r5, r8
+       addq    r21, r28, r28
+       stq     r8, 0(r16)
+       mulq    r1, r19, r5     C U1
+       subq    r8, r28, r8
+L(cj6):        umulh   r1, r19, r21    C U1
+       cmpult  r8, r6, r28
+       subq    r8, r6, r8
+       addq    r22, r28, r28
+       stq     r8, 8(r16)
+       mulq    r2, r19, r6     C U1
+       subq    r8, r28, r8
+L(cj5):        umulh   r2, r19, r22    C U1
+       cmpult  r8, r7, r28
+       subq    r8, r7, r8
+       addq    r23, r28, r28
+       stq     r8, 16(r16)
+       mulq    r3, r19, r7     C U1
+       subq    r8, r28, r8
+L(cj4):        umulh   r3, r19, r23    C U1
+       cmpult  r8, r4, r28
+       subq    r8, r4, r8
+       addq    r20, r28, r28
+       stq     r8, 24(r16)
+       subq    r8, r28, r8
+L(cj3):        cmpult  r8, r5, r28
+       subq    r8, r5, r8
+       addq    r21, r28, r28
+       stq     r8, 32(r16)
+       subq    r8, r28, r8
+L(cj2):        cmpult  r8, r6, r28
+       subq    r8, r6, r8
+       addq    r22, r28, r28
+       stq     r8, 40(r16)
+       subq    r8, r28, r8
+L(cj1):        cmpult  r8, r7, r28
+       subq    r8, r7, r8
+       addq    r23, r28, r28
+       stq     r8, 48(r16)
+       subq    r8, r28, r0
+       ret     r31, (r26), 1
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/cntlz.asm b/mpn/alpha/cntlz.asm

new file mode 100644 (file)

index 0000000..2bfd923
--- /dev/null
+++ b/mpn/alpha/cntlz.asm
@@ -0,0 +1,44 @@
+dnl  Alpha auxiliary for longlong.h's count_leading_zeros
+
+dnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+ASM_START()
+EXTERN(__clz_tab)
+PROLOGUE(mpn_count_leading_zeros,gp)
+       cmpbge  r31,  r16, r1
+       LEA(r3,__clz_tab)
+       sra     r1,   1,   r1
+       xor     r1,   127, r1
+       srl     r16,  1,   r16
+       addq    r1,   r3,  r1
+       ldq_u   r0,   0(r1)
+       lda     r2,   64
+       extbl   r0,   r1,   r0
+       s8subl  r0,   8,    r0
+       srl     r16,  r0,   r16
+       addq    r16,  r3,   r16
+       ldq_u   r1,   0(r16)
+       extbl   r1,   r16,  r1
+       subq    r2,   r1,   r2
+       subq    r2,   r0,   r0
+       ret     r31,  (r26),1
+EPILOGUE(mpn_count_leading_zeros)
+ASM_END()
diff --git a/mpn/alpha/com.asm b/mpn/alpha/com.asm

new file mode 100644 (file)

index 0000000..6f6c39a
--- /dev/null
+++ b/mpn/alpha/com.asm
@@ -0,0 +1,165 @@
+dnl  Alpha mpn_com -- mpn one's complement.
+
+dnl  Copyright 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C      cycles/limb
+C EV4:    4.75
+C EV5:    2.0
+C EV6:    1.5
+
+
+C mp_limb_t mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C For ev5 the main loop is 7 cycles plus 1 taken branch bubble, for a total
+C 2.0 c/l.  In general, a pattern like this unrolled to N limbs per loop
+C will be 1.5+2/N c/l.
+C
+C 2 cycles of loop control are unavoidable, for pointer updates and the
+C taken branch bubble, but also since ldq cannot issue two cycles after stq
+C (and with a run of stqs that means neither of two cycles at the end of the
+C loop.
+C
+C The fbeq is forced into the second cycle of the loop using unops, since
+C the first time through it must wait for the cvtqt result.  Once that
+C result is ready (a 1 cycle stall) then both the branch and following loads
+C can issue together.
+C
+C The main loop handles an odd count of limbs, being two limbs loaded before
+C each size test, plus one pipelined around from the previous iteration (or
+C setup in the entry sequence).
+C
+C An even number of limbs is handled by an explicit dst[0]=~src[0] in the
+C entry sequence, and an increment of the pointers.  For an odd size there's
+C no increment and the first store in the loop (r24) is a repeat of dst[0].
+C
+C Note that the load for r24 after the possible pointer increment is done
+C before the explicit store to dst[0], in case src==dst.
+
+
+ASM_START()
+
+FLOAT64(L(dat), 2.0)
+
+       ALIGN(16)
+
+PROLOGUE(mpn_com,gp)
+
+       C r16   dst
+       C r17   src
+       C r18   size
+
+       lda     r30, -16(r30)           C temporary stack space
+       lda     r7, -3(r18)             C size - 3
+
+       ldq     r20, 0(r17)             C src[0]
+       srl     r7, 1, r6               C (size-3)/2
+
+       stq     r6, 8(r30)              C (size-3)/2
+       and     r7, 1, r5               C 1 if size even
+
+       LEA(    r8, L(dat))
+       s8addq  r5, r17, r17            C skip src[0] if even
+
+       ornot   r31, r20, r20           C ~src[0]
+       unop
+
+       ldt     f0, 8(r30)              C (size-3)/2
+       ldq     r24, 0(r17)             C src[0 or 1]
+
+       stq     r20, 0(r16)             C dst[0]
+       s8addq  r5, r16, r19            C skip dst[0] if even
+
+       ldt     f1, 0(r8)               C data 2.0
+       lda     r30, 16(r30)            C restore stack
+       unop
+       cvtqt   f0, f0                  C (size-3)/2 as float
+
+       ornot   r31, r24, r24
+       blt     r7, L(done_1)           C if size<=2
+       unop
+       unop
+
+
+       C 16-byte alignment here
+L(top):
+       C r17   src, incrementing
+       C r19   dst, incrementing
+       C r24   dst[i] result, ready to store
+       C f0    (size-3)/2, decrementing
+       C f1    2.0
+
+       ldq     r20, 8(r17)             C src[i+1]
+       ldq     r21, 16(r17)            C src[i+2]
+       unop
+       unop
+
+       fbeq    f0, L(done_2)
+       unop
+       ldq     r22, 24(r17)            C src[i+3]
+       ldq     r23, 32(r17)            C src[i+4]
+
+       stq     r24, 0(r19)             C dst[i]
+       ornot   r31, r20, r20
+       subt    f0, f1, f0              C count -= 2
+       unop
+
+       stq     r20, 8(r19)             C dst[i+1]
+       ornot   r31, r21, r21
+       unop
+       unop
+
+       stq     r21, 16(r19)            C dst[i+2]
+       ornot   r31, r22, r22
+
+       stq     r22, 24(r19)            C dst[i+3]
+       ornot   r31, r23, r24
+
+       lda     r17, 32(r17)            C src += 4
+       lda     r19, 32(r19)            C dst += 4
+       unop
+       fbge    f0, L(top)
+
+
+L(done_1):
+       C r19   &dst[size-1]
+       C r24   result for dst[size-1]
+
+       stq     r24, 0(r19)             C dst[size-1]
+       ret     r31, (r26), 1
+
+
+L(done_2):
+       C r19   &dst[size-3]
+       C r20   src[size-2]
+       C r21   src[size-1]
+       C r24   result for dst[size-3]
+
+       stq     r24, 0(r19)             C dst[size-3]
+       ornot   r31, r20, r20
+
+       stq     r20, 8(r19)             C dst[size-2]
+       ornot   r31, r21, r21
+
+       stq     r21, 16(r19)            C dst[size-1]
+       ret     r31, (r26), 1
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/copyd.asm b/mpn/alpha/copyd.asm

new file mode 100644 (file)

index 0000000..ba8fa1c
--- /dev/null
+++ b/mpn/alpha/copyd.asm
@@ -0,0 +1,77 @@
+dnl  Alpha mpn_copyd -- copy, decrementing.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     4
+C EV5:     1.75
+C EV6:     1
+
+C INPUT PARAMETERS
+C rp   r16
+C up   r17
+C n    r18
+
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+       s8addq  r18,r16,r16             C E0
+       s8addq  r18,r17,r17             C E1
+       lda     r18,-8(r18)             C E0
+       blt     r18,$Lend               C E1
+$Loop: ldq     r0,-8(r17)              C E0
+       ldq     r1,-16(r17)             C E1
+       ldq     r2,-24(r17)             C E0
+       ldq     r3,-32(r17)             C E1
+       ldq     r4,-40(r17)             C E0
+       ldq     r5,-48(r17)             C E1
+       ldq     r6,-56(r17)             C E0
+       ldq     r7,-64(r17)             C E1
+       stq     r0,-8(r16)              C E0
+       lda     r17,-64(r17)            C E1
+       stq     r1,-16(r16)             C E0
+       bis     r31, r31, r31           C E1
+       stq     r2,-24(r16)             C E0
+       lda     r18,-8(r18)             C E1
+       stq     r3,-32(r16)             C E0
+       bis     r31, r31, r31           C E1
+       stq     r4,-40(r16)             C E0
+       bis     r31, r31, r31           C E1
+       stq     r5,-48(r16)             C E0
+       bis     r31, r31, r31           C E1
+       stq     r6,-56(r16)             C E0
+       bis     r31, r31, r31           C E1
+       stq     r7,-64(r16)             C E0
+       lda     r16,-64(r16)            C E1
+       bge     r18,$Loop               C E1
+$Lend: lda     r18,7(r18)              C E0
+       blt     r18,$Lret               C E1
+       ldq     r0,-8(r17)              C E0
+       beq     r18,$Lend0              C E1
+$Loop0:        stq     r0,-8(r16)              C E0
+       lda     r16,-8(r16)             C E1
+       ldq     r0,-16(r17)             C E0
+       lda     r18,-1(r18)             C E1
+       lda     r17,-8(r17)             C E0
+       bgt     r18,$Loop0              C E1
+$Lend0:        stq     r0,-8(r16)              C E0
+$Lret: ret     r31,(r26),1             C E1
+EPILOGUE(mpn_copyd)
+ASM_END()
diff --git a/mpn/alpha/copyi.asm b/mpn/alpha/copyi.asm

new file mode 100644 (file)

index 0000000..4258041
--- /dev/null
+++ b/mpn/alpha/copyi.asm
@@ -0,0 +1,75 @@
+dnl  Alpha mpn_copyi -- copy, incrementing.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     4
+C EV5:     1.75
+C EV6:     1
+
+C INPUT PARAMETERS
+C rp   r16
+C up   r17
+C n    r18
+
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+       lda     r18,-8(r18)             C E0
+       blt     r18,$Lend               C E1
+$Loop: ldq     r0,0(r17)               C E0
+       ldq     r1,8(r17)               C E1
+       ldq     r2,16(r17)              C E0
+       ldq     r3,24(r17)              C E1
+       ldq     r4,32(r17)              C E0
+       ldq     r5,40(r17)              C E1
+       ldq     r6,48(r17)              C E0
+       ldq     r7,56(r17)              C E1
+       stq     r0,0(r16)               C E0
+       lda     r17,64(r17)             C E1
+       stq     r1,8(r16)               C E0
+       bis     r31, r31, r31           C E1
+       stq     r2,16(r16)              C E0
+       lda     r18,-8(r18)             C E1
+       stq     r3,24(r16)              C E0
+       bis     r31, r31, r31           C E1
+       stq     r4,32(r16)              C E0
+       bis     r31, r31, r31           C E1
+       stq     r5,40(r16)              C E0
+       bis     r31, r31, r31           C E1
+       stq     r6,48(r16)              C E0
+       bis     r31, r31, r31           C E1
+       stq     r7,56(r16)              C E0
+       lda     r16,64(r16)             C E1
+       bge     r18,$Loop               C E1
+$Lend: lda     r18,7(r18)              C E0
+       blt     r18,$Lret               C E1
+       ldq     r0,0(r17)               C E0
+       beq     r18,$Lend0              C E1
+$Loop0:        stq     r0,0(r16)               C E0
+       lda     r16,8(r16)              C E1
+       ldq     r0,8(r17)               C E0
+       lda     r18,-1(r18)             C E1
+       lda     r17,8(r17)              C E0
+       bgt     r18,$Loop0              C E1
+$Lend0:        stq     r0,0(r16)               C E0
+$Lret: ret     r31,(r26),1             C E1
+EPILOGUE(mpn_copyi)
+ASM_END()
diff --git a/mpn/alpha/default.m4 b/mpn/alpha/default.m4

new file mode 100644 (file)

index 0000000..e7aae2e
--- /dev/null
+++ b/mpn/alpha/default.m4
@@ -0,0 +1,114 @@
+divert(-1)
+
+dnl  m4 macros for alpha assembler (everywhere except unicos).
+
+
+dnl  Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Usage: ASM_START()
+define(`ASM_START',
+m4_assert_numargs(0)
+`      .set noreorder
+       .set noat')
+
+dnl  Usage: X(value)
+define(`X',
+m4_assert_numargs(1)
+`0x$1')
+
+dnl  Usage: FLOAT64(label,value)
+define(`FLOAT64',
+m4_assert_numargs(2)
+`      .align  3
+$1:    .t_floating $2')
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,gp|noalign])
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs_range(1,2)
+`ifelse(`$2',gp,,
+`ifelse(`$2',noalign,,
+`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter
+')')')')dnl
+       .text
+ifelse(`$2',noalign,,` ALIGN(16)')
+       .globl  $1
+       .ent    $1
+$1:
+ifelse(`$2',gp,`       ldgp    r29,0(r27)')
+       .frame r30,0,r26
+       .prologue ifelse(`$2',gp,1,0)')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .end    $1')
+
+
+dnl  Usage: LDGP(dst,src)
+dnl
+dnl  Emit an "ldgp dst,src", but only if the system uses a GOT.
+
+define(LDGP,
+m4_assert_numargs(2)
+`ldgp  `$1', `$2'')
+
+
+dnl  Usage: EXTERN(variable_name)
+define(`EXTERN',
+m4_assert_numargs(1)
+)
+
+dnl  Usage: r0 ... r31
+dnl         f0 ... f31
+dnl
+dnl  Map register names r0 to $0, and f0 to $f0, etc.
+dnl  This is needed on all systems but Unicos
+dnl
+dnl  defreg() is used to protect the $ in $0 (otherwise it would represent a
+dnl  macro argument).  Double quoting is used to protect the f0 in $f0
+dnl  (otherwise it would be an infinite recursion).
+
+forloop(i,0,31,`defreg(`r'i,$i)')
+forloop(i,0,31,`deflit(`f'i,``$f''i)')
+
+
+dnl  Usage: DATASTART(name)
+dnl         DATAEND()
+
+define(`DATASTART',
+m4_assert_numargs(1)
+`      DATA
+$1:')
+define(`DATAEND',
+m4_assert_numargs(0)
+)
+
+dnl  Load a symbolic address into a register
+define(`LEA',
+m4_assert_numargs(2)
+`lda   $1,  $2')
+
+dnl  Usage: ASM_END()
+define(`ASM_END',
+m4_assert_numargs(0)
+)
+
+divert
diff --git a/mpn/alpha/dive_1.c b/mpn/alpha/dive_1.c

new file mode 100644 (file)

index 0000000..a915c58
--- /dev/null
+++ b/mpn/alpha/dive_1.c
@@ -0,0 +1,104 @@
+/* Alpha mpn_divexact_1 -- mpn by limb exact division.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/*      cycles/limb
+   EV4:    47.0
+   EV5:    30.0
+   EV6:    15.0
+*/
+
+
+/* The dependent chain is as follows (the same as modexact), and this is
+   what the code runs as.
+
+       ev4    ev5   ev6
+        1      1     1    sub    y = x - h
+       23     13     7    mulq   q = y * inverse
+       23     15     7    umulh  h = high (q * d)
+       --     --    --
+       47     30    15
+
+   The time to load src[i+1] and establish x hides under the umulh latency.  */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  mp_limb_t  inverse, lshift_mask, s, sr, s_next, c, h, x, y, q, dummy;
+  unsigned   rshift, lshift;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (divisor);
+
+  s_next = *src++;   /* src[0] */
+
+  rshift = 0;
+  lshift_mask = 0;
+  if ((divisor & 1) == 0)
+    {
+      count_trailing_zeros (rshift, divisor);
+      lshift_mask = MP_LIMB_T_MAX;
+      divisor >>= rshift;
+    }
+
+  binvert_limb (inverse, divisor);
+  lshift = 64 - rshift;
+
+  c = 0;
+  h = 0;
+  sr = s_next >> rshift;
+
+  size--;
+  if (LIKELY (size != 0))
+    {
+      do
+        {
+          s_next = *src++;      /* src[i+1] */
+          s = sr | ((s_next << lshift) & lshift_mask);
+          x = s - c;
+          c = s < c;
+          sr = s_next >> rshift;
+
+          y = x - h;
+          c += (x < h);
+          q = y * inverse;
+          *dst++ = q;
+          umul_ppmm (h, dummy, q, divisor);
+
+          size--;
+        }
+      while (size != 0);
+    }
+
+  x = sr - c;
+  y = x - h;
+  q = y * inverse;
+  *dst = q;         /* dst[size-1] */
+}
diff --git a/mpn/alpha/divrem_2.asm b/mpn/alpha/divrem_2.asm

new file mode 100644 (file)

index 0000000..b68468b
--- /dev/null
+++ b/mpn/alpha/divrem_2.asm
@@ -0,0 +1,167 @@
+dnl  Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              norm    frac
+C ev4
+C ev5          70      70
+C ev6          29      29
+
+C TODO
+C  * Perhaps inline mpn_invert_limb, that would allow us to not save/restore
+C    any registers (thus save ~10 cycles per call).
+C  * Use negated d1 and/or d0 to speed carry propagation.  Might save a cycle
+C    or two.
+C  * Check cluster delays (for ev6).  We very likely could save some cycles.
+C  * Use branch-free code for computing di.
+C  * CAVEAT: We rely on r19 not being clobbered by mpn_invert_limb call.
+
+C INPUT PARAMETERS
+define(`qp',           `r16')
+define(`fn',           `r17')
+define(`up_param',     `r18')
+define(`un_param',     `r19')
+define(`dp',           `r20')
+
+ASM_START()
+PROLOGUE(mpn_divrem_2)
+       ldgp    r29, 0(r27)
+       lda     r30, -80(r30)
+       stq     r26, 0(r30)
+       stq     r9, 8(r30)
+       stq     r10, 16(r30)
+       stq     r11, 24(r30)
+       stq     r12, 32(r30)
+       stq     r13, 40(r30)
+C      stq     r14, 48(r30)
+       stq     r15, 56(r30)
+       .prologue       1
+       stq     r16, 64(r30)
+       bis     r31, r17, r15
+       s8addq  r19, r18, r13
+       lda     r13, -24(r13)
+       ldq     r12, 8(r20)
+       ldq     r10, 0(r20)
+       ldq     r11, 16(r13)
+       ldq     r9, 8(r13)
+
+       bis     r31, r31, r3            C most_significant_q_limb = 0
+       cmpult  r11, r12, r1
+       bne     r1, L(L8)
+       cmpule  r11, r12, r1
+       cmpult  r9, r10, r2
+       and     r1, r2, r1
+       bne     r1, L(L8)
+       subq    r11, r12, r11
+       subq    r11, r2, r11
+       subq    r9, r10, r9
+       lda     r3, 1(r31)              C most_significant_q_limb = 1
+L(L8): stq     r3, 72(r30)
+
+       addq    r15, r19, r19
+       lda     r19, -3(r19)
+       blt     r19, L(L10)
+       bis     r31, r12, r16
+       jsr     r26, mpn_invert_limb
+       ldgp    r29, 0(r26)
+       mulq    r0, r12, r4             C t0 = LO(di * d1)
+       umulh   r0, r10, r2             C s1 = HI(di * d0)
+       addq    r4, r10, r4             C t0 += d0
+       cmpule  r10, r4, r7             C (t0 < d0)
+       addq    r4, r2, r4              C t0 += s1
+       cmpult  r4, r2, r1
+       subq    r1, r7, r7              C t1 (-1, 0, or 1)
+       blt     r7, L(L42)
+L(L22):
+       lda     r0, -1(r0)              C di--
+       cmpult  r4, r12, r1             C cy for: t0 -= d1 (below)
+       subq    r7, r1, r7              C t1 -= cy
+       subq    r4, r12, r4             C t0 -= d1
+       bge     r7, L(L22)
+L(L42):
+       ldq     r16, 64(r30)
+       s8addq  r19, r16, r16
+       ALIGN(16)
+L(loop):
+       mulq    r11, r0, r5             C q0 (early)
+       umulh   r11, r0, r6             C q  (early)
+       addq    r5, r9, r8              C q0 += n1
+       addq    r6, r11, r6             C q  += n2
+       cmpult  r8, r5, r1              C cy for: q0 += n1
+       addq    r6, r1, r6              C q  += cy
+       unop
+       mulq    r12, r6, r1             C LO(d1 * q)
+       umulh   r10, r6, r7             C t1 = HI(d0 * q)
+       subq    r9, r1, r9              C n1 -= LO(d1 * q)
+       mulq    r10, r6, r4             C t0 = LO(d0 * q)
+       unop
+       cmple   r15, r19, r5            C condition and n0...
+       beq     r5, L(L31)
+       ldq     r5, 0(r13)
+       lda     r13, -8(r13)
+L(L31):        subq    r9, r12, r9             C n1 -= d1
+       cmpult  r5, r10, r1             C
+       subq    r9, r1, r9              C
+       subq    r5, r10, r5             C n0 -= d0
+       subq    r9, r7, r9              C n1 -= t0
+       cmpult  r5, r4, r1              C
+       subq    r9, r1, r2              C
+       subq    r5, r4, r5              C n0 -= t1
+       cmpult  r2, r8, r1              C (n1 < q0)
+       addq    r6, r1, r6              C q += cond
+       lda     r1, -1(r1)              C -(n1 >= q0)
+       and     r1, r10, r4             C
+       addq    r5, r4, r9              C n0 += mask & d0
+       and     r1, r12, r1             C
+       cmpult  r9, r5, r11             C cy for: n0 += mask & d0
+       addq    r2, r1, r1              C n1 += mask & d1
+       addq    r1, r11, r11            C n1 += cy
+       cmpult  r11, r12, r1            C
+       beq     r1, L(fix)              C
+L(bck):        stq     r6, 0(r16)
+       lda     r16, -8(r16)
+       lda     r19, -1(r19)
+       bge     r19, L(loop)
+
+L(L10):        stq     r9, 8(r13)
+       stq     r11, 16(r13)
+       ldq     r0, 72(r30)
+       ldq     r26, 0(r30)
+       ldq     r9, 8(r30)
+       ldq     r10, 16(r30)
+       ldq     r11, 24(r30)
+       ldq     r12, 32(r30)
+       ldq     r13, 40(r30)
+C      ldq     r14, 48(r30)
+       ldq     r15, 56(r30)
+       lda     r30, 80(r30)
+       ret     r31, (r26), 1
+
+L(fix):        cmpule  r11, r12, r1
+       cmpult  r9, r10, r2
+       and     r1, r2, r1
+       bne     r1, L(bck)
+       subq    r11, r12, r11
+       subq    r11, r2, r11
+       subq    r9, r10, r9
+       lda     r6, 1(r6)
+       br      L(bck)
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev5/diveby3.asm b/mpn/alpha/ev5/diveby3.asm

new file mode 100644 (file)

index 0000000..334e1ce
--- /dev/null
+++ b/mpn/alpha/ev5/diveby3.asm
@@ -0,0 +1,321 @@
+dnl  Alpha mpn_divexact_by3c -- mpn division by 3, expecting no remainder.
+
+dnl  Copyright 2004, 2005, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    22
+C EV5:    11.5
+C EV6:     6.3         Note that mpn_bdiv_dbm1c is faster
+
+C TODO
+C  * Remove the unops, they benefit just ev6, which no longer uses this file.
+C  * Try prefetch for destination, using lds.
+C  * Improve feed-in code, by moving initial mulq earlier; make initial load
+C    to u0/u0 to save some copying.
+C  * Combine u0 and u2, u1 and u3.
+
+C INPUT PARAMETERS
+define(`rp',   `r16')
+define(`up',   `r17')
+define(`n',    `r18')
+define(`cy',   `r19')
+
+ASM_START()
+
+DATASTART(L(LC))
+       .quad   0xAAAAAAAAAAAAAAAB
+       .quad   0x5555555555555555
+       .quad   0xAAAAAAAAAAAAAAAA
+DATAEND()
+
+define(`xAAAAAAAAAAAAAAAB',    `r20')
+define(`x5555555555555555',    `r21')
+define(`xAAAAAAAAAAAAAAAA',    `r22')
+define(`u0',   `r0')   define(`u1',    `r1')
+define(`u2',   `r2')   define(`u3',    `r3')
+define(`l0',   `r25')  define(`x',     `r8')
+define(`q0',   `r4')   define(`q1',    `r5')
+define(`p6',   `r6')   define(`p7',    `r7')
+define(`t0',   `r23')  define(`t1',    `r24')
+define(`cymask',`r28')
+
+
+PROLOGUE(mpn_divexact_by3c,gp)
+
+       ldq     r28, 0(up)                      C load first limb early
+
+C Put magic constants in registers
+       lda     r0, L(LC)
+       ldq     xAAAAAAAAAAAAAAAB, 0(r0)
+       ldq     x5555555555555555, 8(r0)
+       ldq     xAAAAAAAAAAAAAAAA, 16(r0)
+
+C Compute initial l0 value
+       cmpeq   cy, 1, p6
+       cmpeq   cy, 2, p7
+       negq    p6, p6
+       and     p6, x5555555555555555, l0
+       cmovne  p7, xAAAAAAAAAAAAAAAA, l0
+
+C Feed-in depending on (n mod 4)
+       and     n, 3, r8
+       lda     n, -3(n)
+       cmpeq   r8, 1, r4
+       cmpeq   r8, 2, r5
+       bne     r4, $Lb01
+       bne     r5, $Lb10
+       beq     r8, $Lb00
+
+$Lb11: ldq     u3, 8(up)
+       lda     up, -24(up)
+       lda     rp, -24(rp)
+       mulq    r28, xAAAAAAAAAAAAAAAB, q0
+       mov     r28, u2
+       br      r31, $L11
+
+$Lb00: ldq     u2, 8(up)
+       lda     up, -16(up)
+       lda     rp, -16(rp)
+       mulq    r28, xAAAAAAAAAAAAAAAB, q1
+       mov     r28, u1
+       br      r31, $L00
+
+$Lb01: lda     rp, -8(rp)
+       mulq    r28, xAAAAAAAAAAAAAAAB, q0
+       mov     r28, u0
+       blt     n, $Lcj1
+       ldq     u1, 8(up)
+       lda     up, -8(up)
+       br      r31, $L01
+
+$Lb10: ldq     u0, 8(up)
+       mulq    r28, xAAAAAAAAAAAAAAAB, q1
+       mov     r28, u3
+       blt     n, $Lend
+
+       ALIGN(16)
+$Ltop:
+C 0
+       cmpult  u3, cy, cy                      C L0
+       mulq    u0, xAAAAAAAAAAAAAAAB, q0       C U1
+       ldq     u1, 16(up)                      C L1
+       addq    q1, l0, x                       C U0
+C 1
+       negq    cy, cymask                      C L0
+       unop                                    C U1
+       unop                                    C L1
+       cmpult  x5555555555555555, x, p6        C U0
+C 2
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7        C U1
+       unop
+       unop
+       negq    p6, t0                          C L0
+C 3
+       negq    p7, t1                          C L0
+       and     cymask, x5555555555555555, l0   C U1
+       addq    p6, cy, cy
+       and     t0, x5555555555555555, t0
+C 4
+       and     t1, x5555555555555555, t1
+       addq    p7, cy, cy
+       unop
+       addq    t0, l0, l0
+C 5
+       addq    t1, l0, l0
+       unop
+       stq     x, 0(rp)                        C L1
+       unop
+$L01:
+C 0
+       cmpult  u0, cy, cy                      C L0
+       mulq    u1, xAAAAAAAAAAAAAAAB, q1       C U1
+       ldq     u2, 24(up)                      C L1
+       addq    q0, l0, x                       C U0
+C 1
+       negq    cy, cymask                      C L0
+       unop                                    C U1
+       unop                                    C L1
+       cmpult  x5555555555555555, x, p6        C U0
+C 2
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7        C U1
+       unop
+       unop
+       negq    p6, t0                          C L0
+C 3
+       negq    p7, t1                          C L0
+       and     cymask, x5555555555555555, l0   C U1
+       addq    p6, cy, cy
+       and     t0, x5555555555555555, t0
+C 4
+       and     t1, x5555555555555555, t1
+       addq    p7, cy, cy
+       unop
+       addq    t0, l0, l0
+C 5
+       addq    t1, l0, l0
+       unop
+       stq     x, 8(rp)                        C L1
+       unop
+$L00:
+C 0
+       cmpult  u1, cy, cy                      C L0
+       mulq    u2, xAAAAAAAAAAAAAAAB, q0       C U1
+       ldq     u3, 32(up)                      C L1
+       addq    q1, l0, x                       C U0
+C 1
+       negq    cy, cymask                      C L0
+       unop                                    C U1
+       unop                                    C L1
+       cmpult  x5555555555555555, x, p6        C U0
+C 2
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7        C U1
+       unop
+       unop
+       negq    p6, t0                          C L0
+C 3
+       negq    p7, t1                          C L0
+       and     cymask, x5555555555555555, l0   C U1
+       addq    p6, cy, cy
+       and     t0, x5555555555555555, t0
+C 4
+       and     t1, x5555555555555555, t1
+       addq    p7, cy, cy
+       unop
+       addq    t0, l0, l0
+C 5
+       addq    t1, l0, l0
+       unop
+       stq     x, 16(rp)                       C L1
+       unop
+$L11:
+C 0
+       cmpult  u2, cy, cy                      C L0
+       mulq    u3, xAAAAAAAAAAAAAAAB, q1       C U1
+       ldq     u0, 40(up)                      C L1
+       addq    q0, l0, x                       C U0
+C 1
+       negq    cy, cymask                      C L0
+       unop                                    C U1
+       unop                                    C L1
+       cmpult  x5555555555555555, x, p6        C U0
+C 2
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7        C U1
+       lda     n, -4(n)                        C L1 bookkeeping
+       unop
+       negq    p6, t0                          C L0
+C 3
+       negq    p7, t1                          C L0
+       and     cymask, x5555555555555555, l0   C U1
+       addq    p6, cy, cy
+       and     t0, x5555555555555555, t0
+C 4
+       and     t1, x5555555555555555, t1
+       addq    p7, cy, cy
+       unop
+       addq    t0, l0, l0
+C 5
+       addq    t1, l0, l0
+       unop
+       stq     x, 24(rp)                       C L1
+       lda     up, 32(up)
+C
+       ldl     r31, 256(up)                    C prefetch
+       unop
+       lda     rp, 32(rp)
+       bge     n, $Ltop                        C U1
+C *** MAIN LOOP END ***
+$Lend:
+
+       cmpult  u3, cy, cy                      C L0
+       mulq    u0, xAAAAAAAAAAAAAAAB, q0       C U1
+       unop
+       addq    q1, l0, x                       C U0
+C 1
+       negq    cy, cymask                      C L0
+       unop                                    C U1
+       unop                                    C L1
+       cmpult  x5555555555555555, x, p6        C U0
+C 2
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7        C U1
+       unop
+       unop
+       negq    p6, t0                          C L0
+C 3
+       negq    p7, t1                          C L0
+       and     cymask, x5555555555555555, l0   C U1
+       addq    p6, cy, cy
+       and     t0, x5555555555555555, t0
+C 4
+       and     t1, x5555555555555555, t1
+       addq    p7, cy, cy
+       unop
+       addq    t0, l0, l0
+C 5
+       addq    t1, l0, l0
+       unop
+       stq     x, 0(rp)                        C L1
+       unop
+$Lcj1:
+       cmpult  u0, cy, cy                      C L0
+       addq    q0, l0, x                       C U0
+       cmpult  x5555555555555555, x, p6        C U0
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7        C U1
+       addq    p6, cy, cy
+       addq    p7, cy, r0
+       stq     x, 8(rp)                        C L1
+
+       ret     r31,(r26),1
+EPILOGUE()
+ASM_END()
+
+C This is useful for playing with various schedules.
+C Expand as: one(0)one(1)one(2)one(3)
+define(`one',`
+C 0
+       cmpult  `$'eval(($1+3)%4), cy, cy               C L0
+       mulq    `$'$1, xAAAAAAAAAAAAAAAB, `$'eval(4+$1%2) C U1
+       ldq     `$'eval(($1+1)%4), eval($1*8+16)(up)    C L1
+       addq    `$'eval(4+($1+1)%2), l0, x              C U0
+C 1
+       negq    cy, cymask                              C L0
+       unop                                            C U1
+       unop                                            C L1
+       cmpult  x5555555555555555, x, p6                C U0
+C 2
+       cmpult  xAAAAAAAAAAAAAAAA, x, p7                C U1
+       unop
+       unop
+       negq    p6, t0                                  C L0
+C 3
+       negq    p7, t1                                  C L0
+       and     cymask, x5555555555555555, l0           C U1
+       addq    p6, cy, cy
+       and     t0, x5555555555555555, t0
+C 4
+       and     t1, x5555555555555555, t1
+       addq    p7, cy, cy
+       unop
+       addq    t0, l0, l0
+C 5
+       addq    t1, l0, l0
+       unop
+       stq     x, eval($1*8)(rp)                       C L1
+       unop
+')
diff --git a/mpn/alpha/ev5/gmp-mparam.h b/mpn/alpha/ev5/gmp-mparam.h

new file mode 100644 (file)

index 0000000..c2e7505
--- /dev/null
+++ b/mpn/alpha/ev5/gmp-mparam.h
@@ -0,0 +1,172 @@
+/* Alpha EV5 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+/* 600 MHz 21164A */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         32
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         7
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     73
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           87
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                53
+#define MUL_TOOM44_THRESHOLD               121
+#define MUL_TOOM6H_THRESHOLD               173
+#define MUL_TOOM8H_THRESHOLD               236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
+
+#define SQR_BASECASE_THRESHOLD               5
+#define SQR_TOOM2_THRESHOLD                 28
+#define SQR_TOOM3_THRESHOLD                 78
+#define SQR_TOOM4_THRESHOLD                136
+#define SQR_TOOM6_THRESHOLD                180
+#define SQR_TOOM8_THRESHOLD                260
+
+#define MULMOD_BNM1_THRESHOLD               11
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    244, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 7}, {     27, 9}, {      7, 8}, {     21, 9}, \
+    {     11, 8}, {     25,10}, {      7, 9}, {     15, 8}, \
+    {     33, 9}, {     23,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79,11}, {     47,10}, {     95, 9}, {    191,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
+    {    319, 8}, {    639,10}, {    175,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207, 9}, {    415,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    319,11}, {    175,10}, {    351,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
+    {    223,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    415,12}, {    223,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
+    {    575,12}, {    319,11}, {    639,12}, {    351,11}, \
+    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
+    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
+    {    255,12}, {    575,13}, {    319,12}, {    703,13}, \
+    {    383,12}, {    831,13}, {    447,12}, {    895,14}, \
+    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
+    {   1151,13}, {    703,12}, {   1407,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 141
+#define MUL_FFT_THRESHOLD                 3008
+
+#define SQR_FFT_MODF_THRESHOLD             220  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    220, 5}, {     13, 6}, {     15, 7}, {      8, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
+    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 7}, {     30, 8}, \
+    {     19, 4}, {    319, 9}, {     11, 8}, {     25,10}, \
+    {      7, 9}, {     15, 8}, {     31, 7}, {     64, 9}, \
+    {     19, 8}, {     39, 7}, {     79, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47,11}, \
+    {     31,10}, {     63, 9}, {    127,10}, {     71, 9}, \
+    {    143, 8}, {    287,10}, {     79,11}, {     47,10}, \
+    {     95, 9}, {    191,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287,11}, \
+    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
+    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    255,11}, {    143,10}, {    287,11}, {    159,10}, \
+    {    319,11}, {    175,10}, {    351,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
+    {    223,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511,11}, {    287,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,12}, {    223,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
+    {    575,12}, {    319,11}, {    639,12}, {    351,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
+    {    575,13}, {    319,12}, {    703,13}, {    383,12}, \
+    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
+    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
+    {    703,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 135
+#define SQR_FFT_THRESHOLD                 2240
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  55
+#define MULLO_MUL_N_THRESHOLD             5558
+
+#define DC_DIV_QR_THRESHOLD                 55
+#define DC_DIVAPPR_Q_THRESHOLD             192
+#define DC_BDIV_QR_THRESHOLD                51
+#define DC_BDIV_Q_THRESHOLD                120
+
+#define INV_MULMOD_BNM1_THRESHOLD           61
+#define INV_NEWTON_THRESHOLD               174
+#define INV_APPR_THRESHOLD                 180
+
+#define BINV_NEWTON_THRESHOLD              199
+#define REDC_1_TO_REDC_N_THRESHOLD          55
+
+#define MU_DIV_QR_THRESHOLD                979
+#define MU_DIVAPPR_Q_THRESHOLD             998
+#define MUPI_DIV_QR_THRESHOLD               90
+#define MU_BDIV_QR_THRESHOLD               792
+#define MU_BDIV_Q_THRESHOLD                942
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      94
+#define GCD_DC_THRESHOLD                   306
+#define GCDEXT_DC_THRESHOLD                210
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                16
+#define GET_STR_PRECOMPUTE_THRESHOLD        31
+#define SET_STR_DC_THRESHOLD               422
+#define SET_STR_PRECOMPUTE_THRESHOLD      1524
diff --git a/mpn/alpha/ev6/add_n.asm b/mpn/alpha/ev6/add_n.asm

new file mode 100644 (file)

index 0000000..114af73
--- /dev/null
+++ b/mpn/alpha/ev6/add_n.asm
@@ -0,0 +1,272 @@
+dnl  Alpha ev6 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright 2000, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     5.4
+C EV6:     2.125
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  vp  r18
+C  n   r19
+C  cy  r20   (for mpn_add_nc)
+
+C TODO
+C   Finish cleaning up cy registers r22, r23 (make them use cy0/cy1)
+C   Use multi-pronged feed-in.
+C   Perform additional micro-tuning
+
+C  This code was written in cooperation with ev6 pipeline expert Steve Root.
+
+C  Pair loads and stores where possible
+C  Store pairs oct-aligned where possible (didn't need it here)
+C  Stores are delayed every third cycle
+C  Loads and stores are delayed by fills
+C  U stays still, put code there where possible (note alternation of U1 and U0)
+C  L moves because of loads and stores
+C  Note dampers in L to limit damage
+
+C  This odd-looking optimization expects that were having random bits in our
+C  data, so that a pure zero result is unlikely. so we penalize the unlikely
+C  case to help the common case.
+
+define(`u0', `r0')  define(`u1', `r3')
+define(`v0', `r1')  define(`v1', `r4')
+
+define(`cy0', `r20')  define(`cy1', `r21')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc)
+
+ASM_START()
+PROLOGUE(mpn_add_nc)
+       br      r31,    $entry
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+       bis     r31,    r31,    cy0     C clear carry in
+$entry:        cmpult  r19,    5,      r22     C L1 move counter
+       ldq     u1,     0(r17)          C L0 get next ones
+       ldq     v1,     0(r18)          C L1
+       bne     r22,    $Lsmall
+
+       ldq     u0,     8(r17)          C L0 get next ones
+       ldq     v0,     8(r18)          C L1
+       addq    u1,     v1,     r5      C U0 add two data
+
+       cmpult  r5,     v1,     r23     C U0 did it carry
+       ldq     u1,     16(r17)         C L0 get next ones
+       ldq     v1,     16(r18)         C L1
+
+       addq    u0,     v0,     r8      C U1 add two data
+       addq    r5,     cy0,    r5      C U0 carry in
+
+       cmpult  r8,     v0,     r22     C U1 did it carry
+       beq     r5,     $fix5f          C U0 fix exact zero
+$ret5f:        ldq     u0,     24(r17)         C L0 get next ones
+       ldq     v0,     24(r18)         C L1
+
+       addq    r8,     r23,    r8      C U1 carry from last
+       addq    u1,     v1,     r7      C U0 add two data
+
+       beq     r8,     $fix6f          C U1 fix exact zero
+$ret6f:        cmpult  r7,     v1,     r23     C U0 did it carry
+       ldq     u1,     32(r17)         C L0 get next ones
+       ldq     v1,     32(r18)         C L1
+
+       lda     r17,    40(r17)         C L0 move pointer
+       lda     r18,    40(r18)         C L1 move pointer
+
+       lda     r16,    -8(r16)
+       lda     r19,    -13(r19)        C L1 move counter
+       blt     r19,    $Lend           C U1 loop control
+
+
+C Main loop.  8-way unrolled.
+       ALIGN(16)
+$Loop: addq    u0,     v0,     r2      C U1 add two data
+       addq    r7,     r22,    r7      C U0 add in carry
+       stq     r5,     8(r16)          C L0 put an answer
+       stq     r8,     16(r16)         C L1 pair
+
+       cmpult  r2,     v0,     cy1     C U1 did it carry
+       beq     r7,     $fix7           C U0 fix exact 0
+$ret7: ldq     u0,     0(r17)          C L0 get next ones
+       ldq     v0,     0(r18)          C L1
+
+       bis     r31,    r31,    r31     C L  damp out
+       addq    r2,     r23,    r2      C U1 carry from last
+       bis     r31,    r31,    r31     C L  moves in L !
+       addq    u1,     v1,     r5      C U0 add two data
+
+       beq     r2,     $fix0           C U1 fix exact zero
+$ret0: cmpult  r5,     v1,     cy0     C U0 did it carry
+       ldq     u1,     8(r17)          C L0 get next ones
+       ldq     v1,     8(r18)          C L1
+
+       addq    u0,     v0,     r8      C U1 add two data
+       addq    r5,     cy1,    r5      C U0 carry from last
+       stq     r7,     24(r16)         C L0 store pair
+       stq     r2,     32(r16)         C L1
+
+       cmpult  r8,     v0,     r22     C U1 did it carry
+       beq     r5,     $fix1           C U0 fix exact zero
+$ret1: ldq     u0,     16(r17)         C L0 get next ones
+       ldq     v0,     16(r18)         C L1
+
+       lda     r16,    64(r16)         C L0 move pointer
+       addq    r8,     cy0,    r8      C U1 carry from last
+       lda     r19,    -8(r19)         C L1 move counter
+       addq    u1,     v1,     r7      C U0 add two data
+
+       beq     r8,     $fix2           C U1 fix exact zero
+$ret2: cmpult  r7,     v1,     r23     C U0 did it carry
+       ldq     u1,     24(r17)         C L0 get next ones
+       ldq     v1,     24(r18)         C L1
+
+       addq    u0,     v0,     r2      C U1 add two data
+       addq    r7,     r22,    r7      C U0 add in carry
+       stq     r5,     -24(r16)        C L0 put an answer
+       stq     r8,     -16(r16)        C L1 pair
+
+       cmpult  r2,     v0,     cy1     C U1 did it carry
+       beq     r7,     $fix3           C U0 fix exact 0
+$ret3: ldq     u0,     32(r17)         C L0 get next ones
+       ldq     v0,     32(r18)         C L1
+
+       bis     r31,    r31,    r31     C L  damp out
+       addq    r2,     r23,    r2      C U1 carry from last
+       bis     r31,    r31,    r31     C L  moves in L !
+       addq    u1,     v1,     r5      C U0 add two data
+
+       beq     r2,     $fix4           C U1 fix exact zero
+$ret4: cmpult  r5,     v1,     cy0     C U0 did it carry
+       ldq     u1,     40(r17)         C L0 get next ones
+       ldq     v1,     40(r18)         C L1
+
+       addq    u0,     v0,     r8      C U1 add two data
+       addq    r5,     cy1,    r5      C U0 carry from last
+       stq     r7,     -8(r16)         C L0 store pair
+       stq     r2,     0(r16)          C L1
+
+       cmpult  r8,     v0,     r22     C U1 did it carry
+       beq     r5,     $fix5           C U0 fix exact zero
+$ret5: ldq     u0,     48(r17)         C L0 get next ones
+       ldq     v0,     48(r18)         C L1
+
+       ldl     r31, 256(r17)           C L0 prefetch
+       addq    r8,     cy0,    r8      C U1 carry from last
+       ldl     r31, 256(r18)           C L1 prefetch
+       addq    u1,     v1,     r7      C U0 add two data
+
+       beq     r8,     $fix6           C U1 fix exact zero
+$ret6: cmpult  r7,     v1,     r23     C U0 did it carry
+       ldq     u1,     56(r17)         C L0 get next ones
+       ldq     v1,     56(r18)         C L1
+
+       lda     r17,    64(r17)         C L0 move pointer
+       bis     r31,    r31,    r31     C U
+       lda     r18,    64(r18)         C L1 move pointer
+       bge     r19,    $Loop           C U1 loop control
+C ==== main loop end
+
+$Lend: addq    u0,     v0,     r2      C U1 add two data
+       addq    r7,     r22,    r7      C U0 add in carry
+       stq     r5,     8(r16)          C L0 put an answer
+       stq     r8,     16(r16)         C L1 pair
+       cmpult  r2,     v0,     cy1     C U1 did it carry
+       beq     r7,     $fix7c          C U0 fix exact 0
+$ret7c:        addq    r2,     r23,    r2      C U1 carry from last
+       addq    u1,     v1,     r5      C U0 add two data
+       beq     r2,     $fix0c          C U1 fix exact zero
+$ret0c:        cmpult  r5,     v1,     cy0     C U0 did it carry
+       addq    r5,     cy1,    r5      C U0 carry from last
+       stq     r7,     24(r16)         C L0 store pair
+       stq     r2,     32(r16)         C L1
+       beq     r5,     $fix1c          C U0 fix exact zero
+$ret1c:        stq     r5,     40(r16)         C L0 put an answer
+       lda     r16,    48(r16)         C L0 move pointer
+
+       lda     r19,    8(r19)
+       beq     r19,    $Lret
+
+       ldq     u1,     0(r17)
+       ldq     v1,     0(r18)
+$Lsmall:
+       lda     r19,    -1(r19)
+       beq     r19,    $Lend0
+
+       ALIGN(8)
+$Loop0:        addq    u1,     v1,     r2      C main add
+       cmpult  r2,     v1,     r8      C compute cy from last add
+       ldq     u1,     8(r17)
+       ldq     v1,     8(r18)
+       addq    r2,     cy0,    r5      C carry add
+       lda     r17,    8(r17)
+       lda     r18,    8(r18)
+       stq     r5,     0(r16)
+       cmpult  r5,     r2,     cy0     C compute cy from last add
+       lda     r19,    -1(r19)         C decr loop cnt
+       bis     r8,     cy0,    cy0     C combine cy from the two adds
+       lda     r16,    8(r16)
+       bne     r19,    $Loop0
+$Lend0:        addq    u1,     v1,     r2      C main add
+       addq    r2,     cy0,    r5      C carry add
+       cmpult  r2,     v1,     r8      C compute cy from last add
+       cmpult  r5,     r2,     cy0     C compute cy from last add
+       stq     r5,     0(r16)
+       bis     r8,     cy0,    r0      C combine cy from the two adds
+       ret     r31,(r26),1
+
+       ALIGN(8)
+$Lret: lda     r0,     0(cy0)          C copy carry into return register
+       ret     r31,(r26),1
+
+$fix5f:        bis     r23,    cy0,    r23     C bring forward carry
+       br      r31,    $ret5f
+$fix6f:        bis     r22,    r23,    r22     C bring forward carry
+       br      r31,    $ret6f
+$fix0: bis     cy1,    r23,    cy1     C bring forward carry
+       br      r31,    $ret0
+$fix1: bis     cy0,    cy1,    cy0     C bring forward carry
+       br      r31,    $ret1
+$fix2: bis     r22,    cy0,    r22     C bring forward carry
+       br      r31,    $ret2
+$fix3: bis     r23,    r22,    r23     C bring forward carry
+       br      r31,    $ret3
+$fix4: bis     cy1,    r23,    cy1     C bring forward carry
+       br      r31,    $ret4
+$fix5: bis     cy1,    cy0,    cy0     C bring forward carry
+       br      r31,    $ret5
+$fix6: bis     r22,    cy0,    r22     C bring forward carry
+       br      r31,    $ret6
+$fix7: bis     r23,    r22,    r23     C bring forward carry
+       br      r31,    $ret7
+$fix0c:        bis     cy1,    r23,    cy1     C bring forward carry
+       br      r31,    $ret0c
+$fix1c:        bis     cy0,    cy1,    cy0     C bring forward carry
+       br      r31,    $ret1c
+$fix7c:        bis     r23,    r22,    r23     C bring forward carry
+       br      r31,    $ret7c
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/aorsmul_1.asm b/mpn/alpha/ev6/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..eda092b
--- /dev/null
+++ b/mpn/alpha/ev6/aorsmul_1.asm
@@ -0,0 +1,387 @@
+dnl  Alpha ev6 mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 2000, 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    42
+C EV5:    18
+C EV6:     3.5
+
+C  INPUT PARAMETERS
+define(`rp',   `r16')
+define(`up',   `r17')
+define(`n',    `r18')
+define(`v0',   `r19')
+
+dnl  This code was written in cooperation with ev6 pipeline expert Steve Root.
+
+dnl  The stores can issue a cycle late so we have paired no-op's to 'catch'
+dnl  them, so that further disturbance to the schedule is damped.
+
+dnl  We couldn't pair the loads, because the entangled schedule of the carry's
+dnl  has to happen on one side {0} of the machine.
+
+dnl  This is a great schedule for the d_cache, a poor schedule for the b_cache.
+dnl  The lockup on U0 means that any stall can't be recovered from.  Consider a
+dnl  ldq in L1, say that load gets stalled because it collides with a fill from
+dnl  the b_cache.  On the next cycle, this load gets priority.  If first looks
+dnl  at L0, and goes there.  The instruction we intended for L0 gets to look at
+dnl  L1, which is NOT where we want it.  It either stalls 1, because it can't
+dnl  go in L0, or goes there, and causes a further instruction to stall.
+
+dnl  So for b_cache, we're likely going to want to put one or more cycles back
+dnl  into the code! And, of course, put in lds prefetch for the rp[] operand.
+dnl  At a place where we have an mt followed by a bookkeeping, put the
+dnl  bookkeeping in upper, and the prefetch into lower.
+
+dnl  Note, the ldq's and stq's are at the end of the quadpacks.  Note, we'd
+dnl  like not to have an ldq or an stq to preceded a conditional branch in a
+dnl  quadpack.  The conditional branch moves the retire pointer one cycle
+dnl  later.
+
+ifdef(`OPERATION_addmul_1',`
+    define(`ADDSUB',   `addq')
+    define(`CMPCY',    `cmpult $2,$1')
+    define(`func',     `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+    define(`ADDSUB',   `subq')
+    define(`CMPCY',    `cmpult $1,$2')
+    define(`func',     `mpn_submul_1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+       ldq     r3,     0(up)           C
+       and     r18,    7,      r20     C
+       lda     r18,    -9(r18)         C
+       cmpeq   r20,    1,      r21     C
+       beq     r21,    $L1             C
+
+$1mod8:        ldq     r5,     0(rp)           C
+       mulq    v0,     r3,     r7      C
+       umulh   v0,     r3,     r8      C
+       ADDSUB  r5,     r7,     r23     C
+       CMPCY(  r5,     r23),   r20     C
+       addq    r8,     r20,    r0      C
+       stq     r23,    0(rp)           C
+       bge     r18,    $ent1           C
+       ret     r31,    (r26),  1       C
+
+$L1:   lda     r8,     0(r31)          C zero carry reg
+       lda     r24,    0(r31)          C zero carry reg
+       cmpeq   r20,    2,      r21     C
+       bne     r21,    $2mod8          C
+       cmpeq   r20,    3,      r21     C
+       bne     r21,    $3mod8          C
+       cmpeq   r20,    4,      r21     C
+       bne     r21,    $4mod8          C
+       cmpeq   r20,    5,      r21     C
+       bne     r21,    $5mod8          C
+       cmpeq   r20,    6,      r21     C
+       bne     r21,    $6mod8          C
+       cmpeq   r20,    7,      r21     C
+       beq     r21,    $0mod8          C
+
+$7mod8:        ldq     r5,     0(rp)           C
+       lda     up,     8(up)           C
+       mulq    v0,     r3,     r7      C
+       umulh   v0,     r3,     r24     C
+       ADDSUB  r5,     r7,     r23     C
+       CMPCY(  r5,     r23),   r20     C
+       addq    r24,    r20,    r24     C
+       stq     r23,    0(rp)           C
+       lda     rp,     8(rp)           C
+       ldq     r3,     0(up)           C
+$6mod8:        ldq     r1,     8(up)           C
+       mulq    v0,     r3,     r25     C
+       umulh   v0,     r3,     r3      C
+       mulq    v0,     r1,     r28     C
+       ldq     r0,     16(up)          C
+       ldq     r4,     0(rp)           C
+       umulh   v0,     r1,     r8      C
+       ldq     r1,     24(up)          C
+       lda     up,     48(up)          C L1 bookkeeping
+       mulq    v0,     r0,     r2      C
+       ldq     r5,     8(rp)           C
+       lda     rp,     -32(rp)         C L1 bookkeeping
+       umulh   v0,     r0,     r6      C
+       ADDSUB  r4,     r25,    r25     C lo + acc
+       mulq    v0,     r1,     r7      C
+       br      r31,    $ent6           C
+
+$ent1: lda     up,     8(up)           C
+       lda     rp,     8(rp)           C
+       lda     r8,     0(r0)           C
+       ldq     r3,     0(up)           C
+$0mod8:        ldq     r1,     8(up)           C
+       mulq    v0,     r3,     r2      C
+       umulh   v0,     r3,     r6      C
+       mulq    v0,     r1,     r7      C
+       ldq     r0,     16(up)          C
+       ldq     r4,     0(rp)           C
+       umulh   v0,     r1,     r24     C
+       ldq     r1,     24(up)          C
+       mulq    v0,     r0,     r25     C
+       ldq     r5,     8(rp)           C
+       umulh   v0,     r0,     r3      C
+       ADDSUB  r4,     r2,     r2      C lo + acc
+       mulq    v0,     r1,     r28     C
+       lda     rp,     -16(rp)         C
+       br      r31,    $ent0           C
+
+$3mod8:        ldq     r5,     0(rp)           C
+       lda     up,     8(up)           C
+       mulq    v0,     r3,     r7      C
+       umulh   v0,     r3,     r8      C
+       ADDSUB  r5,     r7,     r23     C
+       CMPCY(  r5,     r23),   r20     C
+       addq    r8,     r20,    r24     C
+       stq     r23,    0(rp)           C
+       lda     rp,     8(rp)           C
+       ldq     r3,     0(up)           C
+$2mod8:        ldq     r1,     8(up)           C
+       mulq    v0,     r3,     r25     C
+       umulh   v0,     r3,     r3      C
+       mulq    v0,     r1,     r28     C
+       ble     r18,    $n23            C
+       ldq     r0,     16(up)          C
+       ldq     r4,     0(rp)           C
+       umulh   v0,     r1,     r8      C
+       ldq     r1,     24(up)          C
+       lda     up,     16(up)          C L1 bookkeeping
+       mulq    v0,     r0,     r2      C
+       ldq     r5,     8(rp)           C
+       lda     rp,     0(rp)           C L1 bookkeeping
+       umulh   v0,     r0,     r6      C
+       ADDSUB  r4,     r25,    r25     C lo + acc
+       mulq    v0,     r1,     r7      C
+       br      r31,    $ent2           C
+
+$5mod8:        ldq     r5,     0(rp)           C
+       lda     up,     8(up)           C
+       mulq    v0,     r3,     r7      C
+       umulh   v0,     r3,     r24     C
+       ADDSUB  r5,     r7,     r23     C
+       CMPCY(  r5,     r23),   r20     C
+       addq    r24,    r20,    r8      C
+       stq     r23,    0(rp)           C
+       lda     rp,     8(rp)           C
+       ldq     r3,     0(up)           C
+$4mod8:        ldq     r1,     8(up)           C
+       mulq    v0,     r3,     r2      C
+       umulh   v0,     r3,     r6      C
+       mulq    v0,     r1,     r7      C
+       ldq     r0,     16(up)          C
+       ldq     r4,     0(rp)           C
+       umulh   v0,     r1,     r24     C
+       ldq     r1,     24(up)          C
+       lda     up,     32(up)          C L1 bookkeeping
+       mulq    v0,     r0,     r25     C
+       ldq     r5,     8(rp)           C
+       lda     rp,     16(rp)          C L1 bookkeeping
+       umulh   v0,     r0,     r3      C
+       ADDSUB  r4,     r2,     r2      C lo + acc
+       mulq    v0,     r1,     r28     C
+       CMPCY(  r4,     r2),    r20     C L0 lo add => carry
+       ADDSUB  r2,     r8,     r22     C U0 hi add => answer
+       ble     r18,    $Lend           C
+       ALIGN(16)
+$Loop:
+       bis     r31,    r31,    r31     C U1 mt
+       CMPCY(  r2,     r22),   r21     C L0 hi add => carry
+       addq    r6,     r20,    r6      C U0 hi mul + carry
+       ldq     r0,     0(up)           C
+
+       bis     r31,    r31,    r31     C U1 mt
+       ADDSUB  r5,     r7,     r7      C L0 lo + acc
+       addq    r6,     r21,    r6      C U0 hi mul + carry
+       ldq     r4,     0(rp)           C L1
+
+       umulh   v0,     r1,     r8      C U1
+       CMPCY(  r5,     r7),    r20     C L0 lo add => carry
+       ADDSUB  r7,     r6,     r23     C U0 hi add => answer
+       ldq     r1,     8(up)           C L1
+
+       mulq    v0,     r0,     r2      C U1
+       CMPCY(  r7,     r23),   r21     C L0 hi add => carry
+       addq    r24,    r20,    r24     C U0 hi mul + carry
+       ldq     r5,     8(rp)           C L1
+
+       umulh   v0,     r0,     r6      C U1
+       ADDSUB  r4,     r25,    r25     C U0 lo + acc
+       stq     r22,    -16(rp)         C L0
+       stq     r23,    -8(rp)          C L1
+
+       bis     r31,    r31,    r31     C L0 st slosh
+       mulq    v0,     r1,     r7      C U1
+       bis     r31,    r31,    r31     C L1 st slosh
+       addq    r24,    r21,    r24     C U0 hi mul + carry
+$ent2:
+       CMPCY(  r4,     r25),   r20     C L0 lo add => carry
+       bis     r31,    r31,    r31     C U1 mt
+       lda     r18,    -8(r18)         C L1 bookkeeping
+       ADDSUB  r25,    r24,    r22     C U0 hi add => answer
+
+       bis     r31,    r31,    r31     C U1 mt
+       CMPCY(  r25,    r22),   r21     C L0 hi add => carry
+       addq    r3,     r20,    r3      C U0 hi mul + carry
+       ldq     r0,     16(up)          C L1
+
+       bis     r31,    r31,    r31     C U1 mt
+       ADDSUB  r5,     r28,    r28     C L0 lo + acc
+       addq    r3,     r21,    r3      C U0 hi mul + carry
+       ldq     r4,     16(rp)          C L1
+
+       umulh   v0,     r1,     r24     C U1
+       CMPCY(  r5,     r28),   r20     C L0 lo add => carry
+       ADDSUB  r28,    r3,     r23     C U0 hi add => answer
+       ldq     r1,     24(up)          C L1
+
+       mulq    v0,     r0,     r25     C U1
+       CMPCY(  r28,    r23),   r21     C L0 hi add => carry
+       addq    r8,     r20,    r8      C U0 hi mul + carry
+       ldq     r5,     24(rp)          C L1
+
+       umulh   v0,     r0,     r3      C U1
+       ADDSUB  r4,     r2,     r2      C U0 lo + acc
+       stq     r22,    0(rp)           C L0
+       stq     r23,    8(rp)           C L1
+
+       bis     r31,    r31,    r31     C L0 st slosh
+       mulq    v0,     r1,     r28     C U1
+       bis     r31,    r31,    r31     C L1 st slosh
+       addq    r8,     r21,    r8      C U0 hi mul + carry
+$ent0:
+       CMPCY(  r4,     r2),    r20     C L0 lo add => carry
+       bis     r31,    r31,    r31     C U1 mt
+       lda     up,     64(up)          C L1 bookkeeping
+       ADDSUB  r2,     r8,     r22     C U0 hi add => answer
+
+       bis     r31,    r31,    r31     C U1 mt
+       CMPCY(  r2,     r22),   r21     C L0 hi add => carry
+       addq    r6,     r20,    r6      C U0 hi mul + carry
+       ldq     r0,     -32(up)         C L1
+
+       bis     r31,    r31,    r31     C U1 mt
+       ADDSUB  r5,     r7,     r7      C L0 lo + acc
+       addq    r6,     r21,    r6      C U0 hi mul + carry
+       ldq     r4,     32(rp)          C L1
+
+       umulh   v0,     r1,     r8      C U1
+       CMPCY(  r5,     r7),    r20     C L0 lo add => carry
+       ADDSUB  r7,     r6,     r23     C U0 hi add => answer
+       ldq     r1,     -24(up)         C L1
+
+       mulq    v0,     r0,     r2      C U1
+       CMPCY(  r7,     r23),   r21     C L0 hi add => carry
+       addq    r24,    r20,    r24     C U0 hi mul + carry
+       ldq     r5,     40(rp)          C L1
+
+       umulh   v0,     r0,     r6      C U1
+       ADDSUB  r4,     r25,    r25     C U0 lo + acc
+       stq     r22,    16(rp)          C L0
+       stq     r23,    24(rp)          C L1
+
+       bis     r31,    r31,    r31     C L0 st slosh
+       mulq    v0,     r1,     r7      C U1
+       bis     r31,    r31,    r31     C L1 st slosh
+       addq    r24,    r21,    r24     C U0 hi mul + carry
+$ent6:
+       CMPCY(  r4,     r25),   r20     C L0 lo add => carry
+       bis     r31,    r31,    r31     C U1 mt
+       lda     rp,     64(rp)          C L1 bookkeeping
+       ADDSUB  r25,    r24,    r22     C U0 hi add => answer
+
+       bis     r31,    r31,    r31     C U1 mt
+       CMPCY(  r25,    r22),   r21     C L0 hi add => carry
+       addq    r3,     r20,    r3      C U0 hi mul + carry
+       ldq     r0,     -16(up)         C L1
+
+       bis     r31,    r31,    r31     C U1 mt
+       ADDSUB  r5,     r28,    r28     C L0 lo + acc
+       addq    r3,     r21,    r3      C U0 hi mul + carry
+       ldq     r4,     -16(rp)         C L1
+
+       umulh   v0,     r1,     r24     C U1
+       CMPCY(  r5,     r28),   r20     C L0 lo add => carry
+       ADDSUB  r28,    r3,     r23     C U0 hi add => answer
+       ldq     r1,     -8(up)          C L1
+
+       mulq    v0,     r0,     r25     C U1
+       CMPCY(  r28,    r23),   r21     C L0 hi add => carry
+       addq    r8,     r20,    r8      C U0 hi mul + carry
+       ldq     r5,     -8(rp)          C L1
+
+       umulh   v0,     r0,     r3      C U1
+       ADDSUB  r4,     r2,     r2      C U0 lo + acc
+       stq     r22,    -32(rp)         C L0
+       stq     r23,    -24(rp)         C L1
+
+       bis     r31,    r31,    r31     C L0 st slosh
+       mulq    v0,     r1,     r28     C U1
+       bis     r31,    r31,    r31     C L1 st slosh
+       addq    r8,     r21,    r8      C U0 hi mul + carry
+
+       CMPCY(  r4,     r2),    r20     C L0 lo add => carry
+       ADDSUB  r2,     r8,     r22     C U0 hi add => answer
+       ldl     r31,    256(up)         C prefetch up[]
+       bgt     r18,    $Loop           C U1 bookkeeping
+
+$Lend: CMPCY(  r2,     r22),   r21     C
+       addq    r6,     r20,    r6      C
+       ADDSUB  r5,     r7,     r7      C
+       addq    r6,     r21,    r6      C
+       ldq     r4,     0(rp)           C
+       umulh   v0,     r1,     r8      C
+       CMPCY(  r5,     r7),    r20     C
+       ADDSUB  r7,     r6,     r23     C
+       CMPCY(r7,       r23),   r21     C
+       addq    r24,    r20,    r24     C
+       ldq     r5,     8(rp)           C
+       ADDSUB  r4,     r25,    r25     C
+       stq     r22,    -16(rp)         C
+       stq     r23,    -8(rp)          C
+       addq    r24,    r21,    r24     C
+       br      L(x)
+
+       ALIGN(16)
+$n23:  ldq     r4,     0(rp)           C
+       ldq     r5,     8(rp)           C
+       umulh   v0,     r1,     r8      C
+       ADDSUB  r4,     r25,    r25     C
+L(x):  CMPCY(  r4,     r25),   r20     C
+       ADDSUB  r25,    r24,    r22     C
+       CMPCY(  r25,    r22),   r21     C
+       addq    r3,     r20,    r3      C
+       ADDSUB  r5,     r28,    r28     C
+       addq    r3,     r21,    r3      C
+       CMPCY(  r5,     r28),   r20     C
+       ADDSUB  r28,    r3,     r23     C
+       CMPCY(  r28,    r23),   r21     C
+       addq    r8,     r20,    r8      C
+       stq     r22,    0(rp)           C
+       stq     r23,    8(rp)           C
+       addq    r8,     r21,    r0      C
+       ret     r31,    (r26),  1       C
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/gmp-mparam.h b/mpn/alpha/ev6/gmp-mparam.h

new file mode 100644 (file)

index 0000000..7541a4e
--- /dev/null
+++ b/mpn/alpha/ev6/gmp-mparam.h
@@ -0,0 +1,181 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#define DIVEXACT_BY3_METHOD 0  /* override ../diveby3.asm */
+
+/* 500 MHz 21164 (agnesi.math.su.se) */
+
+/* Generated by tuneup.c, 2009-11-29, gcc 3.3 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        30
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           18
+
+#define MUL_TOOM22_THRESHOLD                35
+#define MUL_TOOM33_THRESHOLD                74
+#define MUL_TOOM44_THRESHOLD               178
+#define MUL_TOOM6H_THRESHOLD               288
+#define MUL_TOOM8H_THRESHOLD               333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      75
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     101
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     105
+
+#define SQR_BASECASE_THRESHOLD               5
+#define SQR_TOOM2_THRESHOLD                 61
+#define SQR_TOOM3_THRESHOLD                107
+#define SQR_TOOM4_THRESHOLD                170
+#define SQR_TOOM6_THRESHOLD                309
+#define SQR_TOOM8_THRESHOLD                360
+
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               23
+
+#define MUL_FFT_MODF_THRESHOLD             480  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    480, 5}, {     18, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
+    {     19, 7}, {     10, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     14, 6}, {     29, 7}, {     25, 8}, \
+    {     13, 7}, {     29, 8}, {     15, 7}, {     32, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     55, 9}, {     31, 8}, {     63, 9}, {     35, 8}, \
+    {     71, 9}, {     39,10}, {     23, 9}, {     55,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     83,10}, \
+    {     47, 9}, {     99,10}, {     55,11}, {     31,10}, \
+    {     79,11}, {     47,10}, {    103,12}, {     31,11}, \
+    {     63,10}, {    135,11}, {     79,10}, {    167,11}, \
+    {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
+    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,12}, \
+    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,11}, {    895,14}, {    127,13}, \
+    {    255,12}, {    543,11}, {   1087,12}, {    575,11}, \
+    {   1151,12}, {    607,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    703,13}, {    383,12}, {    831,13}, \
+    {    447,12}, {    927,14}, {    255,13}, {    511,12}, \
+    {   1087,13}, {    575,12}, {   1151,13}, {    639,12}, \
+    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
+    {    767,15}, {    255,14}, {    511,13}, {   1215,14}, \
+    {    639,13}, {   1407,14}, {    767,13}, {   1663,14}, \
+    {    895,13}, {   1791,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 151
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    476, 5}, {     19, 6}, {     10, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
+    {     29, 7}, {     28, 8}, {     15, 7}, {     31, 8}, \
+    {     29, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
+    {     15, 9}, {     35, 8}, {     71, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {    103,12}, {     31,11}, {     63,10}, \
+    {    135,11}, {     79,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    335,10}, \
+    {    671,11}, {    351,10}, {    703,11}, {    367,10}, \
+    {    735,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,10}, {    831,11}, {    447,10}, {    895,13}, \
+    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
+    {    575,10}, {   1151,11}, {    607,12}, {    319,11}, \
+    {    671,12}, {    351,11}, {    735,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
+    {    447,11}, {    895,12}, {    479,14}, {    127,13}, \
+    {    255,12}, {    575,11}, {   1151,12}, {    607,13}, \
+    {    319,12}, {    735,13}, {    383,12}, {    831,13}, \
+    {    447,12}, {    959,14}, {    255,13}, {    511,12}, \
+    {   1023,13}, {    575,12}, {   1215,13}, {    639,12}, \
+    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
+    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
+    {    895,12}, {   1791,15}, {    255,14}, {    511,13}, \
+    {   1215,14}, {    639,13}, {   1407,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1791,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD                 3136
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 101
+#define MULLO_MUL_N_THRESHOLD            15604
+
+#define DC_DIV_QR_THRESHOLD                119
+#define DC_DIVAPPR_Q_THRESHOLD             390
+#define DC_BDIV_QR_THRESHOLD               110
+#define DC_BDIV_Q_THRESHOLD                318
+
+#define INV_MULMOD_BNM1_THRESHOLD           79
+#define INV_NEWTON_THRESHOLD               387
+#define INV_APPR_THRESHOLD                 381
+
+#define BINV_NEWTON_THRESHOLD              393
+#define REDC_1_TO_REDC_N_THRESHOLD         110
+
+#define MU_DIV_QR_THRESHOLD               1718
+#define MU_DIVAPPR_Q_THRESHOLD            1895
+#define MUPI_DIV_QR_THRESHOLD              180
+#define MU_BDIV_QR_THRESHOLD              1387
+#define MU_BDIV_Q_THRESHOLD               1652
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     282
+#define GCD_DC_THRESHOLD                  1138
+#define GCDEXT_DC_THRESHOLD                773
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        19
+#define SET_STR_DC_THRESHOLD              3754
+#define SET_STR_PRECOMPUTE_THRESHOLD      8097
diff --git a/mpn/alpha/ev6/mul_1.asm b/mpn/alpha/ev6/mul_1.asm

new file mode 100644 (file)

index 0000000..d3f138d
--- /dev/null
+++ b/mpn/alpha/ev6/mul_1.asm
@@ -0,0 +1,485 @@
+dnl  Alpha ev6 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 2000, 2001, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r16
+C s1_ptr       r17
+C size         r18
+C s2_limb      r19
+
+C This code runs at 2.25 cycles/limb on EV6.
+
+C This code was written in close cooperation with ev6 pipeline expert
+C Steve Root.  Any errors are tege's fault, though.
+
+C Code structure:
+
+C  code for n < 8
+C  code for n > 8      code for (n mod 8)
+C                      code for (n div 8)      feed-in code
+C                                              8-way unrolled loop
+C                                              wind-down code
+
+C Some notes about unrolled loop:
+C
+C   r1-r8     multiplies and workup
+C   r21-r28   multiplies and workup
+C   r9-r12    loads
+C   r0       -1
+C   r20,r29,r13-r15  scramble
+C
+C   We're doing 7 of the 8 carry propagations with a br fixup code and 1 with a
+C   put-the-carry-into-hi.  The idea is that these branches are very rarely
+C   taken, and since a non-taken branch consumes no resources, that is better
+C   than an addq.
+C
+C   Software pipeline: a load in cycle #09, feeds a mul in cycle #16, feeds an
+C   add NEXT cycle #09 which feeds a store in NEXT cycle #02
+
+C The code could use some further work:
+C   1. Speed up really small multiplies.  The default alpha/mul_1.asm code is
+C      faster than this for size < 3.
+C   2. Improve feed-in code, perhaps with the equivalent of switch(n%8) unless
+C      that is too costly.
+C   3. Consider using 4-way unrolling, even if that runs slower.
+C   4. Reduce register usage.  In particular, try to avoid using r29.
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       cmpult  r18,    8,      r1
+       beq     r1,     $Large
+$Lsmall:
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r18,-1(r18)     C size--
+       mulq    r2,r19,r3       C r3 = prod_low
+       bic     r31,r31,r4      C clear cy_limb
+       umulh   r2,r19,r0       C r0 = prod_high
+       beq     r18,$Le1a       C jump if size was == 1
+       ldq     r2,8(r17)       C r2 = s1_limb
+       lda     r18,-1(r18)     C size--
+       stq     r3,0(r16)
+       beq     r18,$Le2a       C jump if size was == 2
+       ALIGN(8)
+$Lopa: mulq    r2,r19,r3       C r3 = prod_low
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       lda     r18,-1(r18)     C size--
+       umulh   r2,r19,r4       C r4 = cy_limb
+       ldq     r2,16(r17)      C r2 = s1_limb
+       lda     r17,8(r17)      C s1_ptr++
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       stq     r3,8(r16)
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       lda     r16,8(r16)      C res_ptr++
+       bne     r18,$Lopa
+
+$Le2a: mulq    r2,r19,r3       C r3 = prod_low
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       umulh   r2,r19,r4       C r4 = cy_limb
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       stq     r3,8(r16)
+       addq    r4,r0,r0        C cy_limb = prod_high + cy
+       ret     r31,(r26),1
+$Le1a: stq     r3,0(r16)
+       ret     r31,(r26),1
+
+$Large:
+       lda     r30,    -224(r30)
+       stq     r26,    0(r30)
+       stq     r9,     8(r30)
+       stq     r10,    16(r30)
+       stq     r11,    24(r30)
+       stq     r12,    32(r30)
+       stq     r13,    40(r30)
+       stq     r14,    48(r30)
+       stq     r15,    56(r30)
+       stq     r29,    64(r30)
+
+       and     r18,    7,      r20     C count for the first loop, 0-7
+       srl     r18,    3,      r18     C count for unrolled loop
+       bis     r31,    r31,    r21
+       beq     r20,    $L_8_or_more    C skip first loop
+
+$L_9_or_more:
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r17,8(r17)      C s1_ptr++
+       lda     r20,-1(r20)     C size--
+       mulq    r2,r19,r3       C r3 = prod_low
+       umulh   r2,r19,r21      C r21 = prod_high
+       beq     r20,$Le1b       C jump if size was == 1
+       bis     r31, r31, r0    C FIXME: shouldn't need this
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r17,8(r17)      C s1_ptr++
+       lda     r20,-1(r20)     C size--
+       stq     r3,0(r16)
+       lda     r16,8(r16)      C res_ptr++
+       beq     r20,$Le2b       C jump if size was == 2
+       ALIGN(8)
+$Lopb: mulq    r2,r19,r3       C r3 = prod_low
+       addq    r21,r0,r0       C cy_limb = cy_limb + 'cy'
+       lda     r20,-1(r20)     C size--
+       umulh   r2,r19,r21      C r21 = prod_high
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r17,8(r17)      C s1_ptr++
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       stq     r3,0(r16)
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       lda     r16,8(r16)      C res_ptr++
+       bne     r20,$Lopb
+
+$Le2b: mulq    r2,r19,r3       C r3 = prod_low
+       addq    r21,r0,r0       C cy_limb = cy_limb + 'cy'
+       umulh   r2,r19,r21      C r21 = prod_high
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       stq     r3,0(r16)
+       lda     r16,8(r16)      C res_ptr++
+       addq    r21,r0,r21      C cy_limb = prod_high + cy
+       br      r31,    $L_8_or_more
+$Le1b: stq     r3,0(r16)
+       lda     r16,8(r16)      C res_ptr++
+
+$L_8_or_more:
+       lda     r0,     -1(r31)         C put -1 in r0, for tricky loop control
+       lda     r17,    -32(r17)        C L1 bookkeeping
+       lda     r18,    -1(r18)         C decrement count
+
+       ldq     r9,     32(r17)         C L1
+       ldq     r10,    40(r17)         C L1
+       mulq    r9,     r19,    r22     C U1 #07
+       ldq     r11,    48(r17)         C L1
+       umulh   r9,     r19,    r23     C U1 #08
+       ldq     r12,    56(r17)         C L1
+       mulq    r10,    r19,    r24     C U1 #09
+       ldq     r9,     64(r17)         C L1
+
+       lda     r17,    64(r17)         C L1 bookkeeping
+
+       umulh   r10,    r19,    r25     C U1 #11
+       mulq    r11,    r19,    r26     C U1 #12
+       umulh   r11,    r19,    r27     C U1 #13
+       mulq    r12,    r19,    r28     C U1 #14
+       ldq     r10,    8(r17)          C L1
+       umulh   r12,    r19,    r1      C U1 #15
+       ldq     r11,    16(r17)         C L1
+       mulq    r9,     r19,    r2      C U1 #16
+       ldq     r12,    24(r17)         C L1
+       umulh   r9,     r19,    r3      C U1 #17
+       addq    r21,    r22,    r13     C L1 mov
+       mulq    r10,    r19,    r4      C U1 #18
+       addq    r23,    r24,    r22     C L0 sum 2 mul's
+       cmpult  r13,    r21,    r14     C L1 carry from sum
+       bgt     r18,    $L_16_or_more
+
+       cmpult  r22,    r24,    r24     C U0 carry from sum
+       umulh   r10,    r19,    r5      C U1 #02
+       addq    r25,    r26,    r23     C U0 sum 2 mul's
+       mulq    r11,    r19,    r6      C U1 #03
+       cmpult  r23,    r26,    r25     C U0 carry from sum
+       umulh   r11,    r19,    r7      C U1 #04
+       addq    r27,    r28,    r28     C U0 sum 2 mul's
+       mulq    r12,    r19,    r8      C U1 #05
+       cmpult  r28,    r27,    r15     C L0 carry from sum
+       lda     r16,    32(r16)         C L1 bookkeeping
+       addq    r13,    r31,    r13     C U0 start carry cascade
+       umulh   r12,    r19,    r21     C U1 #06
+       br      r31,    $ret0c
+
+$L_16_or_more:
+C ---------------------------------------------------------------
+       subq    r18,1,r18
+       cmpult  r22,    r24,    r24     C U0 carry from sum
+       ldq     r9,     32(r17)         C L1
+
+       umulh   r10,    r19,    r5      C U1 #02
+       addq    r25,    r26,    r23     C U0 sum 2 mul's
+       mulq    r11,    r19,    r6      C U1 #03
+       cmpult  r23,    r26,    r25     C U0 carry from sum
+       umulh   r11,    r19,    r7      C U1 #04
+       addq    r27,    r28,    r28     C U0 sum 2 mul's
+       mulq    r12,    r19,    r8      C U1 #05
+       cmpult  r28,    r27,    r15     C L0 carry from sum
+       lda     r16,    32(r16)         C L1 bookkeeping
+       addq    r13,    r31,    r13     C U0 start carry cascade
+
+       umulh   r12,    r19,    r21     C U1 #06
+C      beq     r13,    $fix0w          C U0
+$ret0w:        addq    r22,    r14,    r26     C L0
+       ldq     r10,    40(r17)         C L1
+
+       mulq    r9,     r19,    r22     C U1 #07
+       beq     r26,    $fix1w          C U0
+$ret1w:        addq    r23,    r24,    r27     C L0
+       ldq     r11,    48(r17)         C L1
+
+       umulh   r9,     r19,    r23     C U1 #08
+       beq     r27,    $fix2w          C U0
+$ret2w:        addq    r28,    r25,    r28     C L0
+       ldq     r12,    56(r17)         C L1
+
+       mulq    r10,    r19,    r24     C U1 #09
+       beq     r28,    $fix3w          C U0
+$ret3w:        addq    r1,     r2,     r20     C L0 sum 2 mul's
+       ldq     r9,     64(r17)         C L1
+
+       addq    r3,     r4,     r2      C L0 #10 2 mul's
+       lda     r17,    64(r17)         C L1 bookkeeping
+       cmpult  r20,    r1,     r29     C U0 carry from sum
+
+       umulh   r10,    r19,    r25     C U1 #11
+       cmpult  r2,     r4,     r4      C U0 carry from sum
+       stq     r13,    -32(r16)        C L0
+       stq     r26,    -24(r16)        C L1
+
+       mulq    r11,    r19,    r26     C U1 #12
+       addq    r5,     r6,     r14     C U0 sum 2 mul's
+       stq     r27,    -16(r16)        C L0
+       stq     r28,    -8(r16)         C L1
+
+       umulh   r11,    r19,    r27     C U1 #13
+       cmpult  r14,    r6,     r3      C U0 carry from sum
+C could do cross-jumping here:
+C      bra     $L_middle_of_unrolled_loop
+       mulq    r12,    r19,    r28     C U1 #14
+       addq    r7,     r3,     r5      C L0 eat carry
+       addq    r20,    r15,    r20     C U0 carry cascade
+       ldq     r10,    8(r17)          C L1
+
+       umulh   r12,    r19,    r1      C U1 #15
+       beq     r20,    $fix4           C U0
+$ret4w:        addq    r2,     r29,    r6      C L0
+       ldq     r11,    16(r17)         C L1
+
+       mulq    r9,     r19,    r2      C U1 #16
+       beq     r6,     $fix5           C U0
+$ret5w:        addq    r14,    r4,     r7      C L0
+       ldq     r12,    24(r17)         C L1
+
+       umulh   r9,     r19,    r3      C U1 #17
+       beq     r7,     $fix6           C U0
+$ret6w:        addq    r5,     r8,     r8      C L0 sum 2
+       addq    r21,    r22,    r13     C L1 sum 2 mul's
+
+       mulq    r10,    r19,    r4      C U1 #18
+       addq    r23,    r24,    r22     C L0 sum 2 mul's
+       cmpult  r13,    r21,    r14     C L1 carry from sum
+       ble     r18,    $Lend           C U0
+C ---------------------------------------------------------------
+       ALIGN(16)
+$Loop:
+       umulh   r0,     r18,    r18     C U1 #01 decrement r18!
+       cmpult  r8,     r5,     r29     C L0 carry from last bunch
+       cmpult  r22,    r24,    r24     C U0 carry from sum
+       ldq     r9,     32(r17)         C L1
+
+       umulh   r10,    r19,    r5      C U1 #02
+       addq    r25,    r26,    r23     C U0 sum 2 mul's
+       stq     r20,    0(r16)          C L0
+       stq     r6,     8(r16)          C L1
+
+       mulq    r11,    r19,    r6      C U1 #03
+       cmpult  r23,    r26,    r25     C U0 carry from sum
+       stq     r7,     16(r16)         C L0
+       stq     r8,     24(r16)         C L1
+
+       umulh   r11,    r19,    r7      C U1 #04
+       bis     r31,    r31,    r31     C L0 st slosh
+       bis     r31,    r31,    r31     C L1 st slosh
+       addq    r27,    r28,    r28     C U0 sum 2 mul's
+
+       mulq    r12,    r19,    r8      C U1 #05
+       cmpult  r28,    r27,    r15     C L0 carry from sum
+       lda     r16,    64(r16)         C L1 bookkeeping
+       addq    r13,    r29,    r13     C U0 start carry cascade
+
+       umulh   r12,    r19,    r21     C U1 #06
+       beq     r13,    $fix0           C U0
+$ret0: addq    r22,    r14,    r26     C L0
+       ldq     r10,    40(r17)         C L1
+
+       mulq    r9,     r19,    r22     C U1 #07
+       beq     r26,    $fix1           C U0
+$ret1: addq    r23,    r24,    r27     C L0
+       ldq     r11,    48(r17)         C L1
+
+       umulh   r9,     r19,    r23     C U1 #08
+       beq     r27,    $fix2           C U0
+$ret2: addq    r28,    r25,    r28     C L0
+       ldq     r12,    56(r17)         C L1
+
+       mulq    r10,    r19,    r24     C U1 #09
+       beq     r28,    $fix3           C U0
+$ret3: addq    r1,     r2,     r20     C L0 sum 2 mul's
+       ldq     r9,     64(r17)         C L1
+
+       addq    r3,     r4,     r2      C L0 #10 2 mul's
+       bis     r31,    r31,    r31     C U1 mul hole
+       lda     r17,    64(r17)         C L1 bookkeeping
+       cmpult  r20,    r1,     r29     C U0 carry from sum
+
+       umulh   r10,    r19,    r25     C U1 #11
+       cmpult  r2,     r4,     r4      C U0 carry from sum
+       stq     r13,    -32(r16)        C L0
+       stq     r26,    -24(r16)        C L1
+
+       mulq    r11,    r19,    r26     C U1 #12
+       addq    r5,     r6,     r14     C U0 sum 2 mul's
+       stq     r27,    -16(r16)        C L0
+       stq     r28,    -8(r16)         C L1
+
+       umulh   r11,    r19,    r27     C U1 #13
+       bis     r31,    r31,    r31     C L0 st slosh
+       bis     r31,    r31,    r31     C L1 st slosh
+       cmpult  r14,    r6,     r3      C U0 carry from sum
+$L_middle_of_unrolled_loop:
+       mulq    r12,    r19,    r28     C U1 #14
+       addq    r7,     r3,     r5      C L0 eat carry
+       addq    r20,    r15,    r20     C U0 carry cascade
+       ldq     r10,    8(r17)          C L1
+
+       umulh   r12,    r19,    r1      C U1 #15
+       beq     r20,    $fix4           C U0
+$ret4: addq    r2,     r29,    r6      C L0
+       ldq     r11,    16(r17)         C L1
+
+       mulq    r9,     r19,    r2      C U1 #16
+       beq     r6,     $fix5           C U0
+$ret5: addq    r14,    r4,     r7      C L0
+       ldq     r12,    24(r17)         C L1
+
+       umulh   r9,     r19,    r3      C U1 #17
+       beq     r7,     $fix6           C U0
+$ret6: addq    r5,     r8,     r8      C L0 sum 2
+       addq    r21,    r22,    r13     C L1 sum 2 mul's
+
+       mulq    r10,    r19,    r4      C U1 #18
+       addq    r23,    r24,    r22     C L0 sum 2 mul's
+       cmpult  r13,    r21,    r14     C L1 carry from sum
+       bgt     r18,    $Loop           C U0
+C ---------------------------------------------------------------
+$Lend:
+       cmpult  r8,     r5,     r29     C L0 carry from last bunch
+       cmpult  r22,    r24,    r24     C U0 carry from sum
+
+       umulh   r10,    r19,    r5      C U1 #02
+       addq    r25,    r26,    r23     C U0 sum 2 mul's
+       stq     r20,    0(r16)          C L0
+       stq     r6,     8(r16)          C L1
+
+       mulq    r11,    r19,    r6      C U1 #03
+       cmpult  r23,    r26,    r25     C U0 carry from sum
+       stq     r7,     16(r16)         C L0
+       stq     r8,     24(r16)         C L1
+
+       umulh   r11,    r19,    r7      C U1 #04
+       addq    r27,    r28,    r28     C U0 sum 2 mul's
+
+       mulq    r12,    r19,    r8      C U1 #05
+       cmpult  r28,    r27,    r15     C L0 carry from sum
+       lda     r16,    64(r16)         C L1 bookkeeping
+       addq    r13,    r29,    r13     C U0 start carry cascade
+
+       umulh   r12,    r19,    r21     C U1 #06
+       beq     r13,    $fix0c          C U0
+$ret0c:        addq    r22,    r14,    r26     C L0
+       beq     r26,    $fix1c          C U0
+$ret1c:        addq    r23,    r24,    r27     C L0
+       beq     r27,    $fix2c          C U0
+$ret2c:        addq    r28,    r25,    r28     C L0
+       beq     r28,    $fix3c          C U0
+$ret3c:        addq    r1,     r2,     r20     C L0 sum 2 mul's
+       addq    r3,     r4,     r2      C L0 #10 2 mul's
+       lda     r17,    64(r17)         C L1 bookkeeping
+       cmpult  r20,    r1,     r29     C U0 carry from sum
+       cmpult  r2,     r4,     r4      C U0 carry from sum
+       stq     r13,    -32(r16)        C L0
+       stq     r26,    -24(r16)        C L1
+       addq    r5,     r6,     r14     C U0 sum 2 mul's
+       stq     r27,    -16(r16)        C L0
+       stq     r28,    -8(r16)         C L1
+       cmpult  r14,    r6,     r3      C U0 carry from sum
+       addq    r7,     r3,     r5      C L0 eat carry
+       addq    r20,    r15,    r20     C U0 carry cascade
+       beq     r20,    $fix4c          C U0
+$ret4c:        addq    r2,     r29,    r6      C L0
+       beq     r6,     $fix5c          C U0
+$ret5c:        addq    r14,    r4,     r7      C L0
+       beq     r7,     $fix6c          C U0
+$ret6c:        addq    r5,     r8,     r8      C L0 sum 2
+       cmpult  r8,     r5,     r29     C L0 carry from last bunch
+       stq     r20,    0(r16)          C L0
+       stq     r6,     8(r16)          C L1
+       stq     r7,     16(r16)         C L0
+       stq     r8,     24(r16)         C L1
+       addq    r29,    r21,    r0
+
+       ldq     r26,    0(r30)
+       ldq     r9,     8(r30)
+       ldq     r10,    16(r30)
+       ldq     r11,    24(r30)
+       ldq     r12,    32(r30)
+       ldq     r13,    40(r30)
+       ldq     r14,    48(r30)
+       ldq     r15,    56(r30)
+       ldq     r29,    64(r30)
+       lda     r30,    224(r30)
+       ret     r31,    (r26),  1
+
+C $fix0w:      bis     r14,    r29,    r14     C join carries
+C      br      r31,    $ret0w
+$fix1w:        bis     r24,    r14,    r24     C join carries
+       br      r31,    $ret1w
+$fix2w:        bis     r25,    r24,    r25     C join carries
+       br      r31,    $ret2w
+$fix3w:        bis     r15,    r25,    r15     C join carries
+       br      r31,    $ret3w
+$fix0: bis     r14,    r29,    r14     C join carries
+       br      r31,    $ret0
+$fix1: bis     r24,    r14,    r24     C join carries
+       br      r31,    $ret1
+$fix2: bis     r25,    r24,    r25     C join carries
+       br      r31,    $ret2
+$fix3: bis     r15,    r25,    r15     C join carries
+       br      r31,    $ret3
+$fix4: bis     r29,    r15,    r29     C join carries
+       br      r31,    $ret4
+$fix5: bis     r4,     r29,    r4      C join carries
+       br      r31,    $ret5
+$fix6: addq    r5,     r4,     r5      C can't carry twice!
+       br      r31,    $ret6
+$fix0c:        bis     r14,    r29,    r14     C join carries
+       br      r31,    $ret0c
+$fix1c:        bis     r24,    r14,    r24     C join carries
+       br      r31,    $ret1c
+$fix2c:        bis     r25,    r24,    r25     C join carries
+       br      r31,    $ret2c
+$fix3c:        bis     r15,    r25,    r15     C join carries
+       br      r31,    $ret3c
+$fix4c:        bis     r29,    r15,    r29     C join carries
+       br      r31,    $ret4c
+$fix5c:        bis     r4,     r29,    r4      C join carries
+       br      r31,    $ret5c
+$fix6c:        addq    r5,     r4,     r5      C can't carry twice!
+       br      r31,    $ret6c
+
+EPILOGUE(mpn_mul_1)
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/README b/mpn/alpha/ev6/nails/README

new file mode 100644 (file)

index 0000000..8b3b357
--- /dev/null
+++ b/mpn/alpha/ev6/nails/README
@@ -0,0 +1,54 @@
+Copyright 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains assembly code for nails-enabled 21264.  The code is not
+very well optimized.
+
+For addmul_N, as N grows larger, we could make multiple loads together, then do
+about 3.3 i/c.  10 cycles after the last load, we can increase to 4 i/c.  This
+would surely allow addmul_4 to run at 2 c/l, but the same should be possible
+also for addmul_3 and perhaps even addmul_2.
+
+
+               current         fair            best
+Routine                c/l  unroll     c/l  unroll     c/l  i/c
+mul_1          3.25            2.75            2.75 3.273
+addmul_1       4.0     4       3.5     4 14    3.25 3.385
+addmul_2       4.0     1       2.5     2 10    2.25 3.333
+addmul_3       3.0     1       2.33    2 14    2    3.333
+addmul_4       2.5     1       2.125   2 17    2    3.135
+
+addmul_5                       2       1 10
+addmul_6                       2       1 12
+addmul_7                       2       1 14
+
+(The "best" column doesn't account for bookkeeping instructions and
+thereby assumes infinite unrolling.)
+
+Basecase usages:
+
+1       addmul_1
+2       addmul_2
+3       addmul_3
+4       addmul_4
+5       addmul_3 + addmul_2    2.3998
+6       addmul_4 + addmul_2
+7       addmul_4 + addmul_3
diff --git a/mpn/alpha/ev6/nails/addmul_1.asm b/mpn/alpha/ev6/nails/addmul_1.asm

new file mode 100644 (file)

index 0000000..060e78d
--- /dev/null
+++ b/mpn/alpha/ev6/nails/addmul_1.asm
@@ -0,0 +1,385 @@
+dnl  Alpha ev6 nails mpn_addmul_1.
+
+dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    42
+C EV5:    18
+C EV6:     4
+
+C TODO
+C  * Reroll loop for 3.75 c/l with current 4-way unrolling.
+C  * The loop is overscheduled wrt loads and wrt multiplies, in particular
+C    umulh.
+C  * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C    and would work since the loop structure is really regular.
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n', `r18')
+define(`vl0',`r19')
+
+define(`numb_mask',`r6')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+define(`m2a',`r20')
+define(`m2b',`r21')
+define(`m3a',`r22')
+define(`m3b',`r23')
+
+define(`acc0',`r25')
+define(`acc1',`r27')
+
+define(`ul0',`r4')
+define(`ul1',`r5')
+define(`ul2',`r4')
+define(`ul3',`r5')
+
+define(`rl0',`r24')
+define(`rl1',`r24')
+define(`rl2',`r24')
+define(`rl3',`r24')
+
+define(`t0',`r7')
+define(`t1',`r8')
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+dnl  This declaration is munged by configure
+NAILS_SUPPORT(2-63)
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       sll     vl0, NAIL_BITS, vl0
+       lda     numb_mask, -1(r31)
+       srl     numb_mask, NAIL_BITS, numb_mask
+
+       and     n,      3,      r25
+       cmpeq   r25,    1,      r21
+       bne     r21,    L(1m4)
+       cmpeq   r25,    2,      r21
+       bne     r21,    L(2m4)
+       beq     r25,    L(0m4)
+
+L(3m4):        ldq     ul3,    0(up)
+       lda     n,      -4(n)
+       ldq     ul0,    8(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     ul1,    16(up)
+       lda     up,     24(up)
+       lda     rp,     -8(rp)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge3)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc1
+       addq    rl3,    acc1,   acc1
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     m3b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       br      r31,    L(ta3)
+
+L(ge3):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       addq    t0,     r31,    acc1
+       umulh   vl0,    ul2,    m2b
+       addq    rl3,    acc1,   acc1
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       addq    t0,     m3b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       br      r31,    L(el3)
+
+L(0m4):        lda     n,      -8(n)
+       ldq     ul2,    0(up)
+       ldq     ul3,    8(up)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge4)
+
+       ldq     rl2,    0(rp)
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul1,    m1b
+       addq    rl2,    acc0,   acc0
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     m2b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       br      r31,    L(ta4)
+
+L(ge4):        ldq     rl2,    0(rp)
+       srl     m2a,NAIL_BITS,  t0
+       ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul1,    m1b
+       addq    rl2,    acc0,   acc0
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       addq    t0,     m2b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       br      r31,    L(el0)
+
+L(2m4):        lda     n,      -4(n)
+       ldq     ul0,    0(up)
+       ldq     ul1,    8(up)
+       lda     up,     16(up)
+       lda     rp,     -16(rp)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge2)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc0
+       addq    rl0,    acc0,   acc0
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       br      r31,    L(ta2)
+
+L(ge2):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul3,    m3b
+       addq    rl0,    acc0,   acc0
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       lda     rp,     32(rp)
+       mulq    vl0,    ul0,    m0a
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       bge     n,      L(el2)
+
+       br      r31,    L(ta6)
+
+L(1m4):        lda     n,      -4(n)
+       ldq     ul1,    0(up)
+       lda     up,     8(up)
+       lda     rp,     -24(rp)
+       bge     n,      L(ge1)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       addq    rl1,    t0,     acc1
+       and     acc1,numb_mask, r28
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    24(rp)
+       addq    t1,     m1b,    r0
+       ret     r31,    (r26),  1
+
+L(ge1):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       lda     rp,     32(rp)
+       mulq    vl0,    ul0,    m0a
+       addq    t0,     r31,    acc1
+       umulh   vl0,    ul0,    m0b
+       addq    rl1,    acc1,   acc1
+       ldq     rl2,    0(rp)
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     m1b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       blt     n,      L(ta5)
+
+L(ge5):        ldq     ul2,    0(up)
+       br      r31,    L(el1)
+
+       ALIGN(16)
+L(top):        mulq    vl0,    ul0,    m0a             C U1
+       addq    t0,     m0b,    acc1            C L0
+       srl     acc0,NUMB_BITS, t1              C U0
+       stq     r28,    -24(rp)                 C L1
+C
+L(el2):        umulh   vl0,    ul0,    m0b             C U1
+       and     acc0,numb_mask, r28             C L0
+       addq    rl1,    acc1,   acc1            C U0
+       ldq     rl2,    0(rp)                   C L1
+C
+       unop                                    C U1
+       addq    t1,     acc1,   acc1            C L0
+       srl     m2a,NAIL_BITS,  t0              C U0
+       ldq     ul2,    0(up)                   C L1
+C
+       mulq    vl0,    ul1,    m1a             C U1
+       addq    t0,     m1b,    acc0            C L0
+       srl     acc1,NUMB_BITS, t1              C U0
+       stq     r28,    -16(rp)                 C L1
+C
+L(el1):        umulh   vl0,    ul1,    m1b             C U1
+       and     acc1,numb_mask, r28             C L0
+       addq    rl2,    acc0,   acc0            C U0
+       ldq     rl3,    8(rp)                   C L1
+C
+       lda     n,      -4(n)                   C L1
+       addq    t1,     acc0,   acc0            C L0
+       srl     m3a,NAIL_BITS,  t0              C U0
+       ldq     ul3,    8(up)                   C L1
+C
+       mulq    vl0,    ul2,    m2a             C U1
+       addq    t0,     m2b,    acc1            C L0
+       srl     acc0,NUMB_BITS, t1              C U0
+       stq     r28,    -8(rp)                  C L1
+C
+L(el0):        umulh   vl0,    ul2,    m2b             C U1
+       and     acc0,numb_mask, r28             C L0
+       addq    rl3,    acc1,   acc1            C U0
+       ldq     rl0,    16(rp)                  C L1
+C
+       unop                                    C U1
+       addq    t1,     acc1,   acc1            C L0
+       srl     m0a,NAIL_BITS,  t0              C U0
+       ldq     ul0,    16(up)                  C L1
+C
+       mulq    vl0,    ul3,    m3a             C U1
+       addq    t0,     m3b,    acc0            C L0
+       srl     acc1,NUMB_BITS, t1              C U0
+       stq     r28,    0(rp)                   C L1
+C
+L(el3):        umulh   vl0,    ul3,    m3b             C U1
+       and     acc1,numb_mask, r28             C L0
+       addq    rl0,    acc0,   acc0            C U0
+       ldq     rl1,    24(rp)                  C L1
+C
+       unop                                    C U1
+       addq    t1,     acc0,   acc0            C L0
+       srl     m1a,NAIL_BITS,  t0              C U0
+       ldq     ul1,    24(up)                  C L1
+C
+       lda     up,     32(up)                  C L0
+       unop                                    C U1
+       lda     rp,     32(rp)                  C L1
+       bge     n,      L(top)                  C U0
+
+L(end):        mulq    vl0,    ul0,    m0a
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       stq     r28,    -24(rp)
+L(ta6):        umulh   vl0,    ul0,    m0b
+       and     acc0,numb_mask, r28
+       addq    rl1,    acc1,   acc1
+       ldq     rl2,    0(rp)
+       addq    t1,     acc1,   acc1
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     m1b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    -16(rp)
+L(ta5):        umulh   vl0,    ul1,    m1b
+       and     acc1,numb_mask, r28
+       addq    rl2,    acc0,   acc0
+       ldq     rl3,    8(rp)
+       addq    t1,     acc0,   acc0
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     m2b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       stq     r28,    -8(rp)
+       unop
+       ALIGN(16)
+L(ta4):        and     acc0,numb_mask, r28
+       addq    rl3,    acc1,   acc1
+       ldq     rl0,    16(rp)
+       addq    t1,     acc1,   acc1
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     m3b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    0(rp)
+       unop
+       ALIGN(16)
+L(ta3):        and     acc1,numb_mask, r28
+       addq    rl0,    acc0,   acc0
+       ldq     rl1,    24(rp)
+       addq    t1,     acc0,   acc0
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       stq     r28,    8(rp)
+       unop
+       ALIGN(16)
+L(ta2):        and     acc0,numb_mask, r28
+       addq    rl1,    acc1,   acc1
+       addq    t1,     acc1,   acc1
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    16(rp)
+       and     acc1,numb_mask, r28
+       addq    t1,     m1b,    r0
+       stq     r28,    24(rp)
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/addmul_2.asm b/mpn/alpha/ev6/nails/addmul_2.asm

new file mode 100644 (file)

index 0000000..9edaed8
--- /dev/null
+++ b/mpn/alpha/ev6/nails/addmul_2.asm
@@ -0,0 +1,135 @@
+dnl  Alpha ev6 nails mpn_addmul_2.
+
+dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Runs at 4.0 cycles/limb.
+
+C We could either go for 2-way unrolling over 11 cycles, or 2.75 c/l,
+C or 4-way unrolling over 20 cycles, for 2.5 c/l.
+
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n',`r18')
+define(`vp',`r19')
+
+C  Useful register aliases
+define(`numb_mask',`r24')
+define(`ulimb',`r25')
+define(`rlimb',`r27')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+
+define(`acc0',`r4')
+define(`acc1',`r5')
+
+define(`v0',`r6')
+define(`v1',`r7')
+
+C Used for temps: r8 r19 r28
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+C  This declaration is munged by configure
+NAILS_SUPPORT(3-63)
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+       lda     numb_mask,-1(r31)
+       srl     numb_mask,NAIL_BITS,numb_mask
+
+       ldq     v0,     0(vp)
+       ldq     v1,     8(vp)
+
+       bis     r31,    r31,    acc0            C       zero acc0
+       sll     v0,NAIL_BITS,   v0
+       bis     r31,    r31,    acc1            C       zero acc1
+       sll     v1,NAIL_BITS,   v1
+       bis     r31,    r31,    r19
+
+       ldq     ulimb,  0(up)
+       lda     up,     8(up)
+       mulq    v0,     ulimb,  m0a             C U1
+       umulh   v0,     ulimb,  m0b             C U1
+       mulq    v1,     ulimb,  m1a             C U1
+       umulh   v1,     ulimb,  m1b             C U1
+       lda     n,      -1(n)
+       beq     n,      L(end)                  C U0
+
+       ALIGN(16)
+L(top):        bis     r31,    r31,    r31             C U1    nop
+       addq    r19,    acc0,   acc0            C U0    propagate nail
+       ldq     rlimb,  0(rp)                   C L0
+       ldq     ulimb,  0(up)                   C L1
+
+       lda     rp,     8(rp)                   C L1
+       srl     m0a,NAIL_BITS,  r8              C U0
+       lda     up,     8(up)                   C L0
+       mulq    v0,     ulimb,  m0a             C U1
+
+       addq    r8,     acc0,   r19             C U0
+       addq    m0b,    acc1,   acc0            C L1
+       umulh   v0,     ulimb,  m0b             C U1
+       bis     r31,    r31,    r31             C L0    nop
+
+       addq    rlimb,  r19,    r19             C L1    FINAL PROD-SUM
+       srl     m1a,NAIL_BITS,  r8              C U0
+       lda     n,      -1(n)                   C L0
+       mulq    v1,     ulimb,  m1a             C U1
+
+       addq    r8,     acc0,   acc0            C U0
+       bis     r31,    m1b,    acc1            C L1
+       umulh   v1,     ulimb,  m1b             C U1
+       and     r19,numb_mask,  r28             C L0    extract numb part
+
+       unop
+       srl     r19,NUMB_BITS,  r19             C U1    extract nail part
+       stq     r28,    -8(rp)                  C L1
+       bne     n,      L(top)                  C U0
+
+L(end):        ldq     rlimb,  0(rp)
+       addq    r19,    acc0,   acc0            C       propagate nail
+       lda     rp,     8(rp)
+       srl     m0a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   r19
+       addq    m0b,    acc1,   acc0
+       addq    rlimb,  r19,    r19
+       srl     m1a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   acc0
+       bis     r31,    m1b,    acc1
+       and     r19,numb_mask,  r28             C extract limb
+
+       srl     r19,NUMB_BITS,  r19             C extract nail
+       stq     r28,    -8(rp)
+
+       addq    r19,    acc0,   acc0            C propagate nail
+       and     acc0,numb_mask, r28
+       stq     r28,    0(rp)
+       srl     acc0,NUMB_BITS, r19
+       addq    r19,    acc1,   r0
+
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/addmul_3.asm b/mpn/alpha/ev6/nails/addmul_3.asm

new file mode 100644 (file)

index 0000000..1d89769
--- /dev/null
+++ b/mpn/alpha/ev6/nails/addmul_3.asm
@@ -0,0 +1,158 @@
+dnl  Alpha ev6 nails mpn_addmul_3.
+
+dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Runs at 3.0 cycles/limb.
+
+C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c).
+
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n',`r18')
+define(`vp',`r19')
+
+C  Useful register aliases
+define(`numb_mask',`r24')
+define(`ulimb',`r25')
+define(`rlimb',`r27')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+define(`m2a',`r20')
+define(`m2b',`r21')
+
+define(`acc0',`r4')
+define(`acc1',`r5')
+define(`acc2',`r22')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`v2',`r23')
+
+C Used for temps: r8 r19 r28
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+C  This declaration is munged by configure
+NAILS_SUPPORT(3-63)
+
+ASM_START()
+PROLOGUE(mpn_addmul_3)
+       lda     numb_mask,-1(r31)
+       srl     numb_mask,NAIL_BITS,numb_mask
+
+       ldq     v0,     0(vp)
+       ldq     v1,     8(vp)
+       ldq     v2,     16(vp)
+
+       bis     r31,    r31,    acc0            C       zero acc0
+       sll     v0,NAIL_BITS,   v0
+       bis     r31,    r31,    acc1            C       zero acc1
+       sll     v1,NAIL_BITS,   v1
+       bis     r31,    r31,    acc2            C       zero acc2
+       sll     v2,NAIL_BITS,   v2
+       bis     r31,    r31,    r19
+
+       ldq     ulimb,  0(up)
+       lda     up,     8(up)
+       mulq    v0,     ulimb,  m0a             C U1
+       umulh   v0,     ulimb,  m0b             C U1
+       mulq    v1,     ulimb,  m1a             C U1
+       umulh   v1,     ulimb,  m1b             C U1
+       lda     n,      -1(n)
+       mulq    v2,     ulimb,  m2a             C U1
+       umulh   v2,     ulimb,  m2b             C U1
+       beq     n,      L(end)                  C U0
+
+       ALIGN(16)
+L(top):        ldq     rlimb,  0(rp)                   C L1
+       ldq     ulimb,  0(up)                   C L0
+       bis     r31,    r31,    r31             C U0    nop
+       addq    r19,    acc0,   acc0            C U1    propagate nail
+
+       lda     rp,     8(rp)                   C L1
+       srl     m0a,NAIL_BITS,  r8              C U0
+       lda     up,     8(up)                   C L0
+       mulq    v0,     ulimb,  m0a             C U1
+
+       addq    r8,     acc0,   r19             C U0
+       addq    m0b,    acc1,   acc0            C L1
+       umulh   v0,     ulimb,  m0b             C U1
+       bis     r31,    r31,    r31             C L0    nop
+
+       addq    rlimb,  r19,    r19             C L1
+       srl     m1a,NAIL_BITS,  r8              C U0
+       bis     r31,    r31,    r31             C L0    nop
+       mulq    v1,     ulimb,  m1a             C U1
+
+       addq    r8,     acc0,   acc0            C U0
+       addq    m1b,    acc2,   acc1            C L1
+       umulh   v1,     ulimb,  m1b             C U1
+       and     r19,numb_mask,  r28             C L0    extract numb part
+
+       bis     r31,    r31,    r31             C L1    nop
+       srl     m2a,NAIL_BITS,  r8              C U0
+       lda     n,      -1(n)                   C L0
+       mulq    v2,     ulimb,  m2a             C U1
+
+       addq    r8,     acc1,   acc1            C L0
+       bis     r31,    m2b,    acc2            C L1
+       umulh   v2,     ulimb,  m2b             C U1
+       srl     r19,NUMB_BITS,  r19             C U0    extract nail part
+
+       stq     r28,    -8(rp)                  C L
+       bne     n,      L(top)                  C U0
+
+L(end):        ldq     rlimb,  0(rp)
+       addq    r19,    acc0,   acc0            C       propagate nail
+       lda     rp,     8(rp)
+       srl     m0a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   r19
+       addq    m0b,    acc1,   acc0
+       addq    rlimb,  r19,    r19
+       srl     m1a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   acc0
+       addq    m1b,    acc2,   acc1
+       and     r19,numb_mask,  r28             C extract limb
+       srl     m2a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc1,   acc1
+       bis     r31,    m2b,    acc2
+       srl     r19,NUMB_BITS,  r19             C extract nail
+       stq     r28,    -8(rp)
+
+       addq    r19,    acc0,   acc0            C propagate nail
+       and     acc0,numb_mask, r28
+       stq     r28,    0(rp)
+       srl     acc0,NUMB_BITS, r19
+       addq    r19,    acc1,   acc1
+
+       and     acc1,numb_mask, r28
+       stq     r28,    8(rp)
+       srl     acc1,NUMB_BITS, r19
+       addq    r19,    acc2,   m0a
+
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/addmul_4.asm b/mpn/alpha/ev6/nails/addmul_4.asm

new file mode 100644 (file)

index 0000000..f19b023
--- /dev/null
+++ b/mpn/alpha/ev6/nails/addmul_4.asm
@@ -0,0 +1,199 @@
+dnl  Alpha ev6 nails mpn_addmul_4.
+
+dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Runs at 2.5 cycles/limb.
+
+C We should go for 2-way unrolling over 17 cycles, for 2.125 c/l corresponding
+C to 3.24 insn/cycle.
+
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n',`r18')
+define(`vp',`r19')
+
+C  Useful register aliases
+define(`numb_mask',`r24')
+define(`ulimb',`r25')
+define(`rlimb',`r27')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+define(`m2a',`r20')
+define(`m2b',`r21')
+define(`m3a',`r12')
+define(`m3b',`r13')
+
+define(`acc0',`r4')
+define(`acc1',`r5')
+define(`acc2',`r22')
+define(`acc3',`r14')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`v2',`r23')
+define(`v3',`r15')
+
+C Used for temps: r8 r19 r28
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+C  This declaration is munged by configure
+NAILS_SUPPORT(4-63)
+
+ASM_START()
+PROLOGUE(mpn_addmul_4)
+       lda     r30,    -240(r30)
+       stq     r12,    32(r30)
+       stq     r13,    40(r30)
+       stq     r14,    48(r30)
+       stq     r15,    56(r30)
+
+       lda     numb_mask,-1(r31)
+       srl     numb_mask,NAIL_BITS,numb_mask
+
+       ldq     v0,     0(vp)
+       ldq     v1,     8(vp)
+       ldq     v2,     16(vp)
+       ldq     v3,     24(vp)
+
+       bis     r31,    r31,    acc0            C       zero acc0
+       sll     v0,NAIL_BITS,   v0
+       bis     r31,    r31,    acc1            C       zero acc1
+       sll     v1,NAIL_BITS,   v1
+       bis     r31,    r31,    acc2            C       zero acc2
+       sll     v2,NAIL_BITS,   v2
+       bis     r31,    r31,    acc3            C       zero acc3
+       sll     v3,NAIL_BITS,   v3
+       bis     r31,    r31,    r19
+
+       ldq     ulimb,  0(up)
+       lda     up,     8(up)
+       mulq    v0,     ulimb,  m0a             C U1
+       umulh   v0,     ulimb,  m0b             C U1
+       mulq    v1,     ulimb,  m1a             C U1
+       umulh   v1,     ulimb,  m1b             C U1
+       lda     n,      -1(n)
+       mulq    v2,     ulimb,  m2a             C U1
+       umulh   v2,     ulimb,  m2b             C U1
+       mulq    v3,     ulimb,  m3a             C U1
+       umulh   v3,     ulimb,  m3b             C U1
+       beq     n,      L(end)                  C U0
+
+       ALIGN(16)
+L(top):        bis     r31,    r31,    r31             C U1    nop
+       ldq     rlimb,  0(rp)                   C L0
+       ldq     ulimb,  0(up)                   C L1
+       addq    r19,    acc0,   acc0            C U0    propagate nail
+
+       bis     r31,    r31,    r31             C L0    nop
+       bis     r31,    r31,    r31             C U1    nop
+       bis     r31,    r31,    r31             C L1    nop
+       bis     r31,    r31,    r31             C U0    nop
+
+       lda     rp,     8(rp)                   C L0
+       srl     m0a,NAIL_BITS,  r8              C U0
+       lda     up,     8(up)                   C L1
+       mulq    v0,     ulimb,  m0a             C U1
+
+       addq    r8,     acc0,   r19             C U0
+       addq    m0b,    acc1,   acc0            C L0
+       umulh   v0,     ulimb,  m0b             C U1
+       bis     r31,    r31,    r31             C L1    nop
+
+       addq    rlimb,  r19,    r19             C L0
+       srl     m1a,NAIL_BITS,  r8              C U0
+       bis     r31,    r31,    r31             C L1    nop
+       mulq    v1,     ulimb,  m1a             C U1
+
+       addq    r8,     acc0,   acc0            C U0
+       addq    m1b,    acc2,   acc1            C L0
+       umulh   v1,     ulimb,  m1b             C U1
+       and     r19,numb_mask,  r28             C L1    extract numb part
+
+       bis     r31,    r31,    r31             C L0    nop
+       srl     m2a,NAIL_BITS,  r8              C U0
+       lda     n,      -1(n)                   C L1
+       mulq    v2,     ulimb,  m2a             C U1
+
+       addq    r8,     acc1,   acc1            C L1
+       addq    m2b,    acc3,   acc2            C L0
+       umulh   v2,     ulimb,  m2b             C U1
+       srl     r19,NUMB_BITS,  r19             C U0    extract nail part
+
+       bis     r31,    r31,    r31             C L0    nop
+       srl     m3a,NAIL_BITS,  r8              C U0
+       stq     r28,    -8(rp)                  C L1
+       mulq    v3,     ulimb,  m3a             C U1
+
+       addq    r8,     acc2,   acc2            C L0
+       bis     r31,    m3b,    acc3            C L1
+       umulh   v3,     ulimb,  m3b             C U1
+       bne     n,      L(top)                  C U0
+
+L(end):        ldq     rlimb,  0(rp)
+       addq    r19,    acc0,   acc0            C       propagate nail
+       lda     rp,     8(rp)                   C FIXME: DELETE
+       srl     m0a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   r19
+       addq    m0b,    acc1,   acc0
+       addq    rlimb,  r19,    r19
+       srl     m1a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc0,   acc0
+       addq    m1b,    acc2,   acc1
+       and     r19,numb_mask,  r28             C extract limb
+       srl     m2a,NAIL_BITS,  r8              C U0
+       addq    r8,     acc1,   acc1
+       addq    m2b,    acc3,   acc2
+       srl     r19,NUMB_BITS,  r19             C extract nail
+       srl     m3a,NAIL_BITS,  r8              C U0
+       stq     r28,    -8(rp)
+       addq    r8,     acc2,   acc2
+       bis     r31,    m3b,    acc3
+
+       addq    r19,    acc0,   acc0            C propagate nail
+       and     acc0,numb_mask, r28
+       stq     r28,    0(rp)
+       srl     acc0,NUMB_BITS, r19
+       addq    r19,    acc1,   acc1
+
+       and     acc1,numb_mask, r28
+       stq     r28,    8(rp)
+       srl     acc1,NUMB_BITS, r19
+       addq    r19,    acc2,   acc2
+
+       and     acc2,numb_mask, r28
+       stq     r28,    16(rp)
+       srl     acc2,NUMB_BITS, r19
+       addq    r19,    acc3,   r0
+
+       ldq     r12,    32(r30)
+       ldq     r13,    40(r30)
+       ldq     r14,    48(r30)
+       ldq     r15,    56(r30)
+       lda     r30,    240(r30)
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/aors_n.asm b/mpn/alpha/ev6/nails/aors_n.asm

new file mode 100644 (file)

index 0000000..4958e81
--- /dev/null
+++ b/mpn/alpha/ev6/nails/aors_n.asm
@@ -0,0 +1,222 @@
+dnl  Alpha ev6 nails mpn_add_n and mpn_sub_n.
+
+dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Runs at 2.5 cycles/limb.  It would be possible to reach 2.0 cycles/limb
+dnl  with 8-way unrolling.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`vp',`r18')
+define(`n',`r19')
+
+define(`rl0',`r0')
+define(`rl1',`r1')
+define(`rl2',`r2')
+define(`rl3',`r3')
+
+define(`ul0',`r4')
+define(`ul1',`r5')
+define(`ul2',`r6')
+define(`ul3',`r7')
+
+define(`vl0',`r22')
+define(`vl1',`r23')
+define(`vl2',`r24')
+define(`vl3',`r25')
+
+define(`numb_mask',`r21')
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`CYSH',`GMP_NUMB_BITS')
+
+dnl  This declaration is munged by configure
+NAILS_SUPPORT(1-63)
+
+ifdef(`OPERATION_add_n', `
+       define(`OP',        addq)
+       define(`CYSH',`GMP_NUMB_BITS')
+       define(`func',  mpn_add_n)')
+ifdef(`OPERATION_sub_n', `
+       define(`OP',        subq)
+       define(`CYSH',63)
+       define(`func',  mpn_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+       lda     numb_mask, -1(r31)
+       srl     numb_mask, NAIL_BITS, numb_mask
+       bis     r31,    r31,    r20
+
+       and     n,      3,      r25
+       lda     n,      -4(n)
+       beq     r25,    L(ge4)
+
+L(lp0):        ldq     ul0,    0(up)
+       lda     up,     8(up)
+       ldq     vl0,    0(vp)
+       lda     vp,     8(vp)
+       lda     rp,     8(rp)
+       lda     r25,    -1(r25)
+       OP      ul0,    vl0,    rl0
+       OP      rl0,    r20,    rl0
+       and     rl0, numb_mask, r28
+       stq     r28,    -8(rp)
+       srl     rl0,    CYSH,   r20
+       bne     r25,    L(lp0)
+
+       blt     n,      L(ret)
+
+L(ge4):        ldq     ul0,    0(up)
+       ldq     vl0,    0(vp)
+       ldq     ul1,    8(up)
+       ldq     vl1,    8(vp)
+       ldq     ul2,    16(up)
+       ldq     vl2,    16(vp)
+       ldq     ul3,    24(up)
+       ldq     vl3,    24(vp)
+       lda     up,     32(up)
+       lda     vp,     32(vp)
+       lda     n,      -4(n)
+       bge     n,      L(ge8)
+
+       OP      ul0,    vl0,    rl0     C               main-add 0
+       OP      rl0,    r20,    rl0     C               cy-add 0
+       OP      ul1,    vl1,    rl1     C               main-add 1
+       srl     rl0,    CYSH,   r20     C               gen cy 0
+       OP      rl1,    r20,    rl1     C               cy-add 1
+       and     rl0,numb_mask,  r27
+       br      r31,    L(cj0)
+
+L(ge8):        OP      ul0,    vl0,    rl0     C               main-add 0
+       ldq     ul0,    0(up)
+       ldq     vl0,    0(vp)
+       OP      rl0,    r20,    rl0     C               cy-add 0
+       OP      ul1,    vl1,    rl1     C               main-add 1
+       srl     rl0,    CYSH,   r20     C               gen cy 0
+       ldq     ul1,    8(up)
+       ldq     vl1,    8(vp)
+       OP      rl1,    r20,    rl1     C               cy-add 1
+       and     rl0,numb_mask,  r27
+       OP      ul2,    vl2,    rl2     C               main-add 2
+       srl     rl1,    CYSH,   r20     C               gen cy 1
+       ldq     ul2,    16(up)
+       ldq     vl2,    16(vp)
+       OP      rl2,    r20,    rl2     C               cy-add 2
+       and     rl1,numb_mask,  r28
+       stq     r27,    0(rp)
+       OP      ul3,    vl3,    rl3     C               main-add 3
+       srl     rl2,    CYSH,   r20     C               gen cy 2
+       ldq     ul3,    24(up)
+       ldq     vl3,    24(vp)
+       OP      rl3,    r20,    rl3     C               cy-add 3
+       and     rl2,numb_mask,  r27
+       stq     r28,    8(rp)
+       lda     rp,     32(rp)
+       lda     up,     32(up)
+       lda     vp,     32(vp)
+       lda     n,      -4(n)
+       blt     n,      L(end)
+
+       ALIGN(32)
+L(top):        OP      ul0,    vl0,    rl0     C               main-add 0
+       srl     rl3,    CYSH,   r20     C               gen cy 3
+       ldq     ul0,    0(up)
+       ldq     vl0,    0(vp)
+
+       OP      rl0,    r20,    rl0     C               cy-add 0
+       and     rl3,numb_mask,  r28
+       stq     r27,    -16(rp)
+       bis     r31,    r31,    r31
+
+       OP      ul1,    vl1,    rl1     C               main-add 1
+       srl     rl0,    CYSH,   r20     C               gen cy 0
+       ldq     ul1,    8(up)
+       ldq     vl1,    8(vp)
+
+       OP      rl1,    r20,    rl1     C               cy-add 1
+       and     rl0,numb_mask,  r27
+       stq     r28,    -8(rp)
+       bis     r31,    r31,    r31
+
+       OP      ul2,    vl2,    rl2     C               main-add 2
+       srl     rl1,    CYSH,   r20     C               gen cy 1
+       ldq     ul2,    16(up)
+       ldq     vl2,    16(vp)
+
+       OP      rl2,    r20,    rl2     C               cy-add 2
+       and     rl1,numb_mask,  r28
+       stq     r27,    0(rp)
+       bis     r31,    r31,    r31
+
+       OP      ul3,    vl3,    rl3     C               main-add 3
+       srl     rl2,    CYSH,   r20     C               gen cy 2
+       ldq     ul3,    24(up)
+       ldq     vl3,    24(vp)
+
+       OP      rl3,    r20,    rl3     C               cy-add 3
+       and     rl2,numb_mask,  r27
+       stq     r28,    8(rp)
+       bis     r31,    r31,    r31
+
+       bis     r31,    r31,    r31
+       lda     n,      -4(n)
+       lda     up,     32(up)
+       lda     vp,     32(vp)
+
+       bis     r31,    r31,    r31
+       bis     r31,    r31,    r31
+       lda     rp,     32(rp)
+       bge     n,      L(top)
+
+L(end):        OP      ul0,    vl0,    rl0     C               main-add 0
+       srl     rl3,    CYSH,   r20     C               gen cy 3
+       OP      rl0,    r20,    rl0     C               cy-add 0
+       and     rl3,numb_mask,  r28
+       stq     r27,    -16(rp)
+       OP      ul1,    vl1,    rl1     C               main-add 1
+       srl     rl0,    CYSH,   r20     C               gen cy 0
+       OP      rl1,    r20,    rl1     C               cy-add 1
+       and     rl0,numb_mask,  r27
+       stq     r28,    -8(rp)
+L(cj0):        OP      ul2,    vl2,    rl2     C               main-add 2
+       srl     rl1,    CYSH,   r20     C               gen cy 1
+       OP      rl2,    r20,    rl2     C               cy-add 2
+       and     rl1,numb_mask,  r28
+       stq     r27,    0(rp)
+       OP      ul3,    vl3,    rl3     C               main-add 3
+       srl     rl2,    CYSH,   r20     C               gen cy 2
+       OP      rl3,    r20,    rl3     C               cy-add 3
+       and     rl2,numb_mask,  r27
+       stq     r28,    8(rp)
+
+       srl     rl3,    CYSH,   r20     C               gen cy 3
+       and     rl3,numb_mask,  r28
+       stq     r27,    16(rp)
+       stq     r28,    24(rp)
+
+L(ret):        and     r20,    1,      r0
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/gmp-mparam.h b/mpn/alpha/ev6/nails/gmp-mparam.h

new file mode 100644 (file)

index 0000000..9911ea2
--- /dev/null
+++ b/mpn/alpha/ev6/nails/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* Generated by tuneup.c, 2004-02-07, gcc 3.3 */
+
+#define MUL_TOOM22_THRESHOLD             40
+#define MUL_TOOM33_THRESHOLD            236
+
+#define SQR_BASECASE_THRESHOLD            7  /* karatsuba */
+#define SQR_TOOM2_THRESHOLD               0  /* never sqr_basecase */
+#define SQR_TOOM3_THRESHOLD             120
+
+#define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* no preinv with nails */
+#define DIV_DC_THRESHOLD                 48
+#define POWM_THRESHOLD                  113
+
+#define HGCD_THRESHOLD                   78
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                392
+#define JACOBI_BASE_METHOD                1
+
+#define DIVREM_1_NORM_THRESHOLD       MP_SIZE_T_MAX  /* no preinv with nails */
+#define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* no preinv with nails */
+#define MOD_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* no preinv with nails */
+#define MOD_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* no preinv with nails */
+#define USE_PREINV_DIVREM_1               0  /* no preinv with nails */
+#define USE_PREINV_MOD_1                  0  /* no preinv with nails */
+#define DIVREM_2_THRESHOLD            MP_SIZE_T_MAX  /* no preinv with nails */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             15
+#define GET_STR_PRECOMPUTE_THRESHOLD     24
+#define SET_STR_THRESHOLD              6336
+
+#define MUL_FFT_TABLE  { 688, 1440, 3648, 6400, 25600, 0 }
+#define MUL_FFT_MODF_THRESHOLD          488
+#define MUL_FFT_THRESHOLD              3712
+
+#define SQR_FFT_TABLE  { 432, 864, 3136, 6400, 25600, 0 }
+#define SQR_FFT_MODF_THRESHOLD          480
+#define SQR_FFT_THRESHOLD              2976
diff --git a/mpn/alpha/ev6/nails/mul_1.asm b/mpn/alpha/ev6/nails/mul_1.asm

new file mode 100644 (file)

index 0000000..8e2330a
--- /dev/null
+++ b/mpn/alpha/ev6/nails/mul_1.asm
@@ -0,0 +1,353 @@
+dnl  Alpha ev6 nails mpn_mul_1.
+
+dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    42
+C EV5:    18
+C EV6:     3.25
+
+C TODO
+C  * Reroll loop for 3.0 c/l with current 4-way unrolling.
+C  * The loop is overscheduled wrt loads and wrt multiplies, in particular
+C    umulh.
+C  * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C    and would work since the loop structure is really regular.
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n', `r18')
+define(`vl0',`r19')
+
+define(`numb_mask',`r6')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+define(`m2a',`r20')
+define(`m2b',`r21')
+define(`m3a',`r22')
+define(`m3b',`r23')
+
+define(`acc0',`r25')
+define(`acc1',`r27')
+
+define(`ul0',`r4')
+define(`ul1',`r5')
+define(`ul2',`r4')
+define(`ul3',`r5')
+
+define(`rl0',`r24')
+define(`rl1',`r24')
+define(`rl2',`r24')
+define(`rl3',`r24')
+
+define(`t0',`r7')
+define(`t1',`r8')
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+dnl  This declaration is munged by configure
+NAILS_SUPPORT(1-63)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       sll     vl0, NAIL_BITS, vl0
+       lda     numb_mask, -1(r31)
+       srl     numb_mask, NAIL_BITS, numb_mask
+
+       and     n,      3,      r25
+       cmpeq   r25,    1,      r21
+       bne     r21,    L(1m4)
+       cmpeq   r25,    2,      r21
+       bne     r21,    L(2m4)
+       beq     r25,    L(0m4)
+
+L(3m4):        ldq     ul3,    0(up)
+       lda     n,      -4(n)
+       ldq     ul0,    8(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     ul1,    16(up)
+       lda     up,     24(up)
+       lda     rp,     -8(rp)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge3)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc1
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     m3b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       br      r31,    L(ta3)
+
+L(ge3):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       srl     m3a,NAIL_BITS,  t0
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       addq    t0,     r31,    acc1
+       umulh   vl0,    ul2,    m2b
+       srl     m0a,NAIL_BITS,  t0
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       addq    t0,     m3b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       br      r31,    L(el3)
+
+L(0m4):        lda     n,      -8(n)
+       ldq     ul2,    0(up)
+       ldq     ul3,    8(up)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge4)
+
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul1,    m1b
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     m2b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       br      r31,    L(ta4)
+
+L(ge4):        srl     m2a,NAIL_BITS,  t0
+       ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul1,    m1b
+       srl     m3a,NAIL_BITS,  t0
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       addq    t0,     m2b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       br      r31,    L(el0)
+
+L(2m4):        lda     n,      -4(n)
+       ldq     ul0,    0(up)
+       ldq     ul1,    8(up)
+       lda     up,     16(up)
+       lda     rp,     -16(rp)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge2)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc0
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       br      r31,    L(ta2)
+
+L(ge2):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       srl     m0a,NAIL_BITS,  t0
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul3,    m3b
+       srl     m1a,NAIL_BITS,  t0
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       lda     rp,     32(rp)
+       mulq    vl0,    ul0,    m0a
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       bge     n,      L(el2)
+
+       br      r31,    L(ta6)
+
+L(1m4):        lda     n,      -4(n)
+       ldq     ul1,    0(up)
+       lda     up,     8(up)
+       lda     rp,     -24(rp)
+       bge     n,      L(ge1)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc1
+       and     acc1,numb_mask, r28
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    24(rp)
+       addq    t1,     m1b,    r0
+       ret     r31,    (r26),  1
+
+L(ge1):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       srl     m1a,NAIL_BITS,  t0
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       lda     rp,     32(rp)
+       mulq    vl0,    ul0,    m0a
+       addq    t0,     r31,    acc1
+       umulh   vl0,    ul0,    m0b
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     m1b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       blt     n,      L(ta5)
+
+L(ge5):        ldq     ul2,    0(up)
+       br      r31,    L(el1)
+
+       ALIGN(16)
+L(top):        mulq    vl0,    ul0,    m0a             C U1
+       addq    t0,     m0b,    acc1            C L0
+       srl     acc0,NUMB_BITS, t1              C U0
+       stq     r28,    -24(rp)                 C L1
+C
+L(el2):        umulh   vl0,    ul0,    m0b             C U1
+       and     acc0,numb_mask, r28             C L0
+       unop                                    C U0
+       unop                                    C L1
+C
+       unop                                    C U1
+       addq    t1,     acc1,   acc1            C L0
+       srl     m2a,NAIL_BITS,  t0              C U0
+       ldq     ul2,    0(up)                   C L1
+C
+       mulq    vl0,    ul1,    m1a             C U1
+       addq    t0,     m1b,    acc0            C L0
+       srl     acc1,NUMB_BITS, t1              C U0
+       stq     r28,    -16(rp)                 C L1
+C
+L(el1):        umulh   vl0,    ul1,    m1b             C U1
+       and     acc1,numb_mask, r28             C L0
+       unop                                    C U0
+       lda     n,      -4(n)                   C L1
+C
+       unop                                    C U1
+       addq    t1,     acc0,   acc0            C L0
+       srl     m3a,NAIL_BITS,  t0              C U0
+       ldq     ul3,    8(up)                   C L1
+C
+       mulq    vl0,    ul2,    m2a             C U1
+       addq    t0,     m2b,    acc1            C L0
+       srl     acc0,NUMB_BITS, t1              C U0
+       stq     r28,    -8(rp)                  C L1
+C
+L(el0):        umulh   vl0,    ul2,    m2b             C U1
+       and     acc0,numb_mask, r28             C L0
+       unop                                    C U0
+       unop                                    C L1
+C
+       unop                                    C U1
+       addq    t1,     acc1,   acc1            C L0
+       srl     m0a,NAIL_BITS,  t0              C U0
+       ldq     ul0,    16(up)                  C L1
+C
+       mulq    vl0,    ul3,    m3a             C U1
+       addq    t0,     m3b,    acc0            C L0
+       srl     acc1,NUMB_BITS, t1              C U0
+       stq     r28,    0(rp)                   C L1
+C
+L(el3):        umulh   vl0,    ul3,    m3b             C U1
+       and     acc1,numb_mask, r28             C L0
+       unop                                    C U0
+       unop                                    C L1
+C
+       unop                                    C U1
+       addq    t1,     acc0,   acc0            C L0
+       srl     m1a,NAIL_BITS,  t0              C U0
+       ldq     ul1,    24(up)                  C L1
+C
+       lda     up,     32(up)                  C L0
+       unop                                    C U1
+       lda     rp,     32(rp)                  C L1
+       bge     n,      L(top)                  C U0
+
+L(end):        mulq    vl0,    ul0,    m0a
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       stq     r28,    -24(rp)
+L(ta6):        umulh   vl0,    ul0,    m0b
+       and     acc0,numb_mask, r28
+       addq    t1,     acc1,   acc1
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     m1b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    -16(rp)
+L(ta5):        umulh   vl0,    ul1,    m1b
+       and     acc1,numb_mask, r28
+       addq    t1,     acc0,   acc0
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     m2b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       stq     r28,    -8(rp)
+       ALIGN(16)
+L(ta4):        and     acc0,numb_mask, r28
+       addq    t1,     acc1,   acc1
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     m3b,    acc0
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    0(rp)
+       unop
+       ALIGN(16)
+L(ta3):        and     acc1,numb_mask, r28
+       addq    t1,     acc0,   acc0
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     m0b,    acc1
+       srl     acc0,NUMB_BITS, t1
+       stq     r28,    8(rp)
+       unop
+       ALIGN(16)
+L(ta2):        and     acc0,numb_mask, r28
+       addq    t1,     acc1,   acc1
+       srl     acc1,NUMB_BITS, t1
+       stq     r28,    16(rp)
+       and     acc1,numb_mask, r28
+       addq    t1,     m1b,    r0
+       stq     r28,    24(rp)
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/nails/submul_1.asm b/mpn/alpha/ev6/nails/submul_1.asm

new file mode 100644 (file)

index 0000000..7dd7b23
--- /dev/null
+++ b/mpn/alpha/ev6/nails/submul_1.asm
@@ -0,0 +1,385 @@
+dnl  Alpha ev6 nails mpn_submul_1.
+
+dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    42
+C EV5:    18
+C EV6:     4
+
+C TODO
+C  * Reroll loop for 3.75 c/l with current 4-way unrolling.
+C  * The loop is overscheduled wrt loads and wrt multiplies, in particular
+C    umulh.
+C  * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C    and would work since the loop structure is really regular.
+
+C  INPUT PARAMETERS
+define(`rp',`r16')
+define(`up',`r17')
+define(`n', `r18')
+define(`vl0',`r19')
+
+define(`numb_mask',`r6')
+
+define(`m0a',`r0')
+define(`m0b',`r1')
+define(`m1a',`r2')
+define(`m1b',`r3')
+define(`m2a',`r20')
+define(`m2b',`r21')
+define(`m3a',`r22')
+define(`m3b',`r23')
+
+define(`acc0',`r25')
+define(`acc1',`r27')
+
+define(`ul0',`r4')
+define(`ul1',`r5')
+define(`ul2',`r4')
+define(`ul3',`r5')
+
+define(`rl0',`r24')
+define(`rl1',`r24')
+define(`rl2',`r24')
+define(`rl3',`r24')
+
+define(`t0',`r7')
+define(`t1',`r8')
+
+define(`NAIL_BITS',`GMP_NAIL_BITS')
+define(`NUMB_BITS',`GMP_NUMB_BITS')
+
+dnl  This declaration is munged by configure
+NAILS_SUPPORT(2-63)
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       sll     vl0, NAIL_BITS, vl0
+       lda     numb_mask, -1(r31)
+       srl     numb_mask, NAIL_BITS, numb_mask
+
+       and     n,      3,      r25
+       cmpeq   r25,    1,      r21
+       bne     r21,    L(1m4)
+       cmpeq   r25,    2,      r21
+       bne     r21,    L(2m4)
+       beq     r25,    L(0m4)
+
+L(3m4):        ldq     ul3,    0(up)
+       lda     n,      -4(n)
+       ldq     ul0,    8(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     ul1,    16(up)
+       lda     up,     24(up)
+       lda     rp,     -8(rp)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge3)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc1
+       subq    rl3,    acc1,   acc1
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     m3b,    acc0
+       sra     acc1,NUMB_BITS, t1
+       br      r31,    L(ta3)
+
+L(ge3):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       addq    t0,     r31,    acc1
+       umulh   vl0,    ul2,    m2b
+       subq    rl3,    acc1,   acc1
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       addq    t0,     m3b,    acc0
+       sra     acc1,NUMB_BITS, t1
+       br      r31,    L(el3)
+
+L(0m4):        lda     n,      -8(n)
+       ldq     ul2,    0(up)
+       ldq     ul3,    8(up)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge4)
+
+       ldq     rl2,    0(rp)
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul1,    m1b
+       subq    rl2,    acc0,   acc0
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     m2b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       br      r31,    L(ta4)
+
+L(ge4):        ldq     rl2,    0(rp)
+       srl     m2a,NAIL_BITS,  t0
+       ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul1,    m1b
+       subq    rl2,    acc0,   acc0
+       ldq     rl3,    8(rp)
+       srl     m3a,NAIL_BITS,  t0
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       addq    t0,     m2b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       br      r31,    L(el0)
+
+L(2m4):        lda     n,      -4(n)
+       ldq     ul0,    0(up)
+       ldq     ul1,    8(up)
+       lda     up,     16(up)
+       lda     rp,     -16(rp)
+       mulq    vl0,    ul0,    m0a
+       umulh   vl0,    ul0,    m0b
+       bge     n,      L(ge2)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     r31,    acc0
+       subq    rl0,    acc0,   acc0
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     m0b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       br      r31,    L(ta2)
+
+L(ge2):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     rl0,    16(rp)
+       srl     m0a,NAIL_BITS,  t0
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       addq    t0,     r31,    acc0
+       umulh   vl0,    ul3,    m3b
+       subq    rl0,    acc0,   acc0
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       lda     rp,     32(rp)
+       mulq    vl0,    ul0,    m0a
+       addq    t0,     m0b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       bge     n,      L(el2)
+
+       br      r31,    L(ta6)
+
+L(1m4):        lda     n,      -4(n)
+       ldq     ul1,    0(up)
+       lda     up,     8(up)
+       lda     rp,     -24(rp)
+       bge     n,      L(ge1)
+
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       subq    rl1,    t0,     acc1
+       and     acc1,numb_mask, r28
+       sra     acc1,NUMB_BITS, t1
+       stq     r28,    24(rp)
+       subq    m1b,    t1,     r0
+       ret     r31,    (r26),  1
+
+L(ge1):        ldq     ul2,    0(up)
+       mulq    vl0,    ul1,    m1a
+       umulh   vl0,    ul1,    m1b
+       ldq     ul3,    8(up)
+       lda     n,      -4(n)
+       mulq    vl0,    ul2,    m2a
+       umulh   vl0,    ul2,    m2b
+       ldq     ul0,    16(up)
+       mulq    vl0,    ul3,    m3a
+       umulh   vl0,    ul3,    m3b
+       ldq     rl1,    24(rp)
+       srl     m1a,NAIL_BITS,  t0
+       ldq     ul1,    24(up)
+       lda     up,     32(up)
+       lda     rp,     32(rp)
+       mulq    vl0,    ul0,    m0a
+       addq    t0,     r31,    acc1
+       umulh   vl0,    ul0,    m0b
+       subq    rl1,    acc1,   acc1
+       ldq     rl2,    0(rp)
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     m1b,    acc0
+       sra     acc1,NUMB_BITS, t1
+       blt     n,      L(ta5)
+
+L(ge5):        ldq     ul2,    0(up)
+       br      r31,    L(el1)
+
+       ALIGN(16)
+L(top):        mulq    vl0,    ul0,    m0a             C U1
+       addq    t0,     m0b,    acc1            C L0
+       sra     acc0,NUMB_BITS, t1              C U0
+       stq     r28,    -24(rp)                 C L1
+C
+L(el2):        umulh   vl0,    ul0,    m0b             C U1
+       and     acc0,numb_mask, r28             C L0
+       subq    rl1,    acc1,   acc1            C U0
+       ldq     rl2,    0(rp)                   C L1
+C
+       unop                                    C U1
+       addq    t1,     acc1,   acc1            C L0
+       srl     m2a,NAIL_BITS,  t0              C U0
+       ldq     ul2,    0(up)                   C L1
+C
+       mulq    vl0,    ul1,    m1a             C U1
+       addq    t0,     m1b,    acc0            C L0
+       sra     acc1,NUMB_BITS, t1              C U0
+       stq     r28,    -16(rp)                 C L1
+C
+L(el1):        umulh   vl0,    ul1,    m1b             C U1
+       and     acc1,numb_mask, r28             C L0
+       subq    rl2,    acc0,   acc0            C U0
+       ldq     rl3,    8(rp)                   C L1
+C
+       lda     n,      -4(n)                   C L1
+       addq    t1,     acc0,   acc0            C L0
+       srl     m3a,NAIL_BITS,  t0              C U0
+       ldq     ul3,    8(up)                   C L1
+C
+       mulq    vl0,    ul2,    m2a             C U1
+       addq    t0,     m2b,    acc1            C L0
+       sra     acc0,NUMB_BITS, t1              C U0
+       stq     r28,    -8(rp)                  C L1
+C
+L(el0):        umulh   vl0,    ul2,    m2b             C U1
+       and     acc0,numb_mask, r28             C L0
+       subq    rl3,    acc1,   acc1            C U0
+       ldq     rl0,    16(rp)                  C L1
+C
+       unop                                    C U1
+       addq    t1,     acc1,   acc1            C L0
+       srl     m0a,NAIL_BITS,  t0              C U0
+       ldq     ul0,    16(up)                  C L1
+C
+       mulq    vl0,    ul3,    m3a             C U1
+       addq    t0,     m3b,    acc0            C L0
+       sra     acc1,NUMB_BITS, t1              C U0
+       stq     r28,    0(rp)                   C L1
+C
+L(el3):        umulh   vl0,    ul3,    m3b             C U1
+       and     acc1,numb_mask, r28             C L0
+       subq    rl0,    acc0,   acc0            C U0
+       ldq     rl1,    24(rp)                  C L1
+C
+       unop                                    C U1
+       addq    t1,     acc0,   acc0            C L0
+       srl     m1a,NAIL_BITS,  t0              C U0
+       ldq     ul1,    24(up)                  C L1
+C
+       lda     up,     32(up)                  C L0
+       unop                                    C U1
+       lda     rp,     32(rp)                  C L1
+       bge     n,      L(top)                  C U0
+
+L(end):        mulq    vl0,    ul0,    m0a
+       addq    t0,     m0b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       stq     r28,    -24(rp)
+L(ta6):        umulh   vl0,    ul0,    m0b
+       and     acc0,numb_mask, r28
+       subq    rl1,    acc1,   acc1
+       ldq     rl2,    0(rp)
+       addq    t1,     acc1,   acc1
+       srl     m2a,NAIL_BITS,  t0
+       mulq    vl0,    ul1,    m1a
+       addq    t0,     m1b,    acc0
+       sra     acc1,NUMB_BITS, t1
+       stq     r28,    -16(rp)
+L(ta5):        umulh   vl0,    ul1,    m1b
+       and     acc1,numb_mask, r28
+       subq    rl2,    acc0,   acc0
+       ldq     rl3,    8(rp)
+       addq    t1,     acc0,   acc0
+       srl     m3a,NAIL_BITS,  t0
+       addq    t0,     m2b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       stq     r28,    -8(rp)
+       unop
+       ALIGN(16)
+L(ta4):        and     acc0,numb_mask, r28
+       subq    rl3,    acc1,   acc1
+       ldq     rl0,    16(rp)
+       addq    t1,     acc1,   acc1
+       srl     m0a,NAIL_BITS,  t0
+       addq    t0,     m3b,    acc0
+       sra     acc1,NUMB_BITS, t1
+       stq     r28,    0(rp)
+       unop
+       ALIGN(16)
+L(ta3):        and     acc1,numb_mask, r28
+       subq    rl0,    acc0,   acc0
+       ldq     rl1,    24(rp)
+       addq    t1,     acc0,   acc0
+       srl     m1a,NAIL_BITS,  t0
+       addq    t0,     m0b,    acc1
+       sra     acc0,NUMB_BITS, t1
+       stq     r28,    8(rp)
+       unop
+       ALIGN(16)
+L(ta2):        and     acc0,numb_mask, r28
+       subq    rl1,    acc1,   acc1
+       addq    t1,     acc1,   acc1
+       sra     acc1,NUMB_BITS, t1
+       stq     r28,    16(rp)
+       and     acc1,numb_mask, r28
+       subq    m1b,    t1,     r0
+       stq     r28,    24(rp)
+       ret     r31,    (r26),  1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/slot.pl b/mpn/alpha/ev6/slot.pl

new file mode 100644 (file)

index 0000000..17967e7
--- /dev/null
+++ b/mpn/alpha/ev6/slot.pl
@@ -0,0 +1,303 @@
+#!/usr/bin/perl -w
+
+# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: slot.pl [filename.o]...
+#
+# Run "objdump" to produce a disassembly of the given object file(s) and
+# annotate the output with "U" or "L" slotting which Alpha EV6 will use.
+#
+# When an instruction is E (ie. either U or L), an "eU" or "eL" is shown, as
+# a reminder that it wasn't a fixed requirement that gave the U or L, but
+# the octaword slotting rules.
+#
+# If an instruction is not recognised, that octaword does not get any U/L
+# shown, only lower-case "u", "l" or "e" for the instructions which are
+# known.  Add any unknown instructions to %optable below.
+
+
+use strict;
+
+# The U or L which various instructions demand, or E if either.
+#
+my %optable =
+  (
+   'addq'   => 'E',
+   'and'    => 'E',
+   'beq'    => 'U',
+   'bge'    => 'U',
+   'bgt'    => 'U',
+   'blt'    => 'U',
+   'bne'    => 'U',
+   'br'     => 'L',
+   'clr'    => 'E',
+   'cmpule' => 'E',
+   'cmpult' => 'E',
+   'cmpeq'  => 'E',
+   'cmoveq' => 'E',
+   'cmovne' => 'E',
+   'ctpop'  => 'U',
+   'ctlz'   => 'U',
+   'cttz'   => 'U',
+   'extbl'  => 'U',
+   'extlh'  => 'U',
+   'extll'  => 'U',
+   'extqh'  => 'U',
+   'extql'  => 'U',
+   'extwh'  => 'U',
+   'extwl'  => 'U',
+   'jsr'    => 'L',
+   'lda'    => 'E',
+   'ldah'   => 'E',
+   'ldbu'   => 'L',
+   'ldl'    => 'L',
+   'ldq'    => 'L',
+   'ldt'    => 'L',
+   'ret'    => 'L',
+   'mov'    => 'E',
+   'mulq'   => 'U',
+   'negq'   => 'E',
+   'nop'    => 'E',
+   'not'    => 'E',
+   's8addq' => 'E',
+   's8subq' => 'E',
+   # 'sextb'  => ?
+   # 'sextl'  => ?
+   'sll'    => 'U',
+   'srl'    => 'U',
+   'stq'    => 'L',
+   'subq'   => 'E',
+   'umulh'  => 'U',
+   'unop'   => 'E',
+   'xor'    => 'E',
+  );
+
+# Slottings used for a given pattern of U/L/E in an octaword.  This is as
+# per the "Ebox Slotting" section of the EV6 hardware reference manual.
+#
+my %slottable =
+  (
+   'EEEE' => 'ULUL',
+   'EEEL' => 'ULUL',
+   'EEEU' => 'ULLU',
+   'EELE' => 'ULLU',
+   'EELL' => 'UULL',
+   'EELU' => 'ULLU',
+   'EEUE' => 'ULUL',
+   'EEUL' => 'ULUL',
+   'EEUU' => 'LLUU',
+   'ELEE' => 'ULUL',
+   'ELEL' => 'ULUL',
+   'ELEU' => 'ULLU',
+   'ELLE' => 'ULLU',
+   'ELLL' => 'ULLL',
+   'ELLU' => 'ULLU',
+   'ELUE' => 'ULUL',
+   'ELUL' => 'ULUL',
+
+   'LLLL' => 'LLLL',
+   'LLLU' => 'LLLU',
+   'LLUE' => 'LLUU',
+   'LLUL' => 'LLUL',
+   'LLUU' => 'LLUU',
+   'LUEE' => 'LULU',
+   'LUEL' => 'LUUL',
+   'LUEU' => 'LULU',
+   'LULE' => 'LULU',
+   'LULL' => 'LULL',
+   'LULU' => 'LULU',
+   'LUUE' => 'LUUL',
+   'LUUL' => 'LUUL',
+   'LUUU' => 'LUUU',
+   'UEEE' => 'ULUL',
+   'UEEL' => 'ULUL',
+   'UEEU' => 'ULLU',
+
+   'ELUU' => 'LLUU',
+   'EUEE' => 'LULU',
+   'EUEL' => 'LUUL',
+   'EUEU' => 'LULU',
+   'EULE' => 'LULU',
+   'EULL' => 'UULL',
+   'EULU' => 'LULU',
+   'EUUE' => 'LUUL',
+   'EUUL' => 'LUUL',
+   'EUUU' => 'LUUU',
+   'LEEE' => 'LULU',
+   'LEEL' => 'LUUL',
+   'LEEU' => 'LULU',
+   'LELE' => 'LULU',
+   'LELL' => 'LULL',
+   'LELU' => 'LULU',
+   'LEUE' => 'LUUL',
+   'LEUL' => 'LUUL',
+   'LEUU' => 'LLUU',
+   'LLEE' => 'LLUU',
+   'LLEL' => 'LLUL',
+   'LLEU' => 'LLUU',
+   'LLLE' => 'LLLU',
+
+   'UELE' => 'ULLU',
+   'UELL' => 'UULL',
+   'UELU' => 'ULLU',
+   'UEUE' => 'ULUL',
+   'UEUL' => 'ULUL',
+   'UEUU' => 'ULUU',
+   'ULEE' => 'ULUL',
+   'ULEL' => 'ULUL',
+   'ULEU' => 'ULLU',
+   'ULLE' => 'ULLU',
+   'ULLL' => 'ULLL',
+   'ULLU' => 'ULLU',
+   'ULUE' => 'ULUL',
+   'ULUL' => 'ULUL',
+   'ULUU' => 'ULUU',
+   'UUEE' => 'UULL',
+   'UUEL' => 'UULL',
+   'UUEU' => 'UULU',
+   'UULE' => 'UULL',
+   'UULL' => 'UULL',
+   'UULU' => 'UULU',
+   'UUUE' => 'UUUL',
+   'UUUL' => 'UUUL',
+   'UUUU' => 'UUUU',
+  );
+
+# Check all combinations of U/L/E are present in %slottable.
+sub coverage {
+  foreach my $a ('U', 'L', 'E') {
+    foreach my $b ('U', 'L', 'E') {
+      foreach my $c ('U', 'L', 'E') {
+        foreach my $d ('U', 'L', 'E') {
+          my $x = $a . $b . $c . $d;
+          if (! defined $slottable{$x}) {
+            print "slottable missing: $x\n"
+          }
+        }
+      }
+    }
+  }
+}
+
+# Certain consistency checks for %slottable.
+sub check {
+  foreach my $x (keys %slottable) {
+    my $a = substr($x,0,1);
+    my $b = substr($x,1,1);
+    my $c = substr($x,2,1);
+    my $d = substr($x,3,1);
+    my $es = ($a eq 'E') + ($b eq 'E') + ($c eq 'E') + ($d eq 'E');
+    my $ls = ($a eq 'L') + ($b eq 'L') + ($c eq 'L') + ($d eq 'L');
+    my $us = ($a eq 'U') + ($b eq 'U') + ($c eq 'U') + ($d eq 'U');
+
+    my $got = $slottable{$x};
+    my $want = $x;
+
+    if ($es == 0) {
+
+    } elsif ($es == 1) {
+      # when only one E, it's mapped to whichever of U or L is otherwise
+      # used the least
+      if ($ls > $us) {
+        $want =~ s/E/U/;
+      } else {
+        $want =~ s/E/L/;
+      }
+    } elsif ($es == 2) {
+      # when two E's and two U, then the E's map to L; vice versa for two E
+      # and two L
+      if ($ls == 2) {
+        $want =~ s/E/U/g;
+      } elsif ($us == 2) {
+        $want =~ s/E/L/g;
+      } else {
+        next;
+      }
+    } elsif ($es == 3) {
+      next;
+
+    } else { # $es == 4
+      next;
+    }
+
+    if ($want ne $got) {
+      print "slottable $x want $want got $got\n";
+    }
+  }
+}
+
+sub disassemble {
+  my ($file) = @_;
+
+  open (IN, "objdump -Srfh $file |") || die "Cannot open pipe from objdump\n";
+
+  my (%pre, %post, %type);
+  while (<IN>) {
+    my $line = $_ . "";
+
+    if ($line =~ /(^[ \t]*[0-9a-f]*([0-9a-f]):[ \t]*[0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] )\t(([a-z0-9]+).*)/) {
+      my ($this_pre, $addr, $this_post, $opcode) = ($1, $2, $3, $4);
+
+      my $this_type = $optable{$opcode};
+      if (! defined ($this_type)) { $this_type = ' '; }
+
+      $pre{$addr} = $this_pre;
+      $post{$addr} = $this_post;
+      $type{$addr} = $this_type;
+
+      if ($addr eq 'c') {
+        my %slot = ('0'=>' ', '4'=>' ', '8'=>' ', 'c'=>' ');
+
+        my $str = $type{'c'} . $type{'8'} . $type{'4'} . $type{'0'};
+        $str = $slottable{$str};
+        if (defined $str) {
+          $slot{'c'} = substr($str,0,1);
+          $slot{'8'} = substr($str,1,1);
+          $slot{'4'} = substr($str,2,1);
+          $slot{'0'} = substr($str,3,1);
+        }
+
+        foreach my $i ('0', '4', '8', 'c') {
+          if ($slot{$i} eq $type{$i}) { $type{$i} = ' '; }
+          print $pre{$i}, ' ', lc($type{$i}),$slot{$i}, '  ', $post{$i}, "\n";
+        }
+
+        %pre = ();
+        %type = ();
+        %post = ();
+      }
+    }
+  }
+
+  close IN || die "Error from objdump (or objdump not available)\n";
+}
+
+coverage();
+check();
+
+my @files;
+if ($#ARGV >= 0) {
+  @files = @ARGV;
+} else {
+  die
+}
+
+foreach (@files)  {
+    disassemble($_);
+}
diff --git a/mpn/alpha/ev6/sqr_diagonal.asm b/mpn/alpha/ev6/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..58d086e
--- /dev/null
+++ b/mpn/alpha/ev6/sqr_diagonal.asm
@@ -0,0 +1,115 @@
+dnl  Alpha mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:      ?
+C EV5:      ?
+C EV6:      2.3
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       lda     r18, -2(r18)    C n -= 2
+       ldq     r0,   0(r17)
+       mulq    r0, r0, r4
+       umulh   r0, r0, r20
+       blt     r18, L(ex1)
+       ldq     r1,   8(r17)
+       mulq    r1, r1, r5
+       umulh   r1, r1, r21
+       beq     r18, L(ex2)
+       lda     r18, -2(r18)    C n -= 2
+       ldq     r0,  16(r17)
+       blt     r18, L(ex3)
+       ldq     r1,  24(r17)
+       beq     r18, L(ex4)
+
+       ALIGN(16)
+L(top):        lda     r18, -2(r18)    C n -= 2
+       stq     r4,   0(r16)
+       mulq    r0, r0, r4
+       stq     r20,  8(r16)
+       umulh   r0, r0, r20
+       ldq     r0,  32(r17)
+       blt     r18, L(x)
+       stq     r5,  16(r16)
+       mulq    r1, r1, r5
+       stq     r21, 24(r16)
+       umulh   r1, r1, r21
+       ldq     r1,  40(r17)
+       lda     r16, 32(r16)    C rp += 4
+       lda     r17, 16(r17)    C up += 2
+       bne     r18, L(top)
+
+       ALIGN(16)
+L(ex4):        stq     r4,   0(r16)
+       mulq    r0, r0, r4
+       stq     r20,  8(r16)
+       umulh   r0, r0, r20
+       stq     r5,  16(r16)
+       mulq    r1, r1, r5
+       stq     r21, 24(r16)
+       umulh   r1, r1, r21
+       stq     r4,  32(r16)
+       stq     r20, 40(r16)
+       stq     r5,  48(r16)
+       stq     r21, 56(r16)
+       ret     r31, (r26), 1
+       ALIGN(16)
+L(x):  stq     r5,  16(r16)
+       mulq    r1, r1, r5
+       stq     r21, 24(r16)
+       umulh   r1, r1, r21
+       stq     r4,  32(r16)
+       mulq    r0, r0, r4
+       stq     r20, 40(r16)
+       umulh   r0, r0, r20
+       stq     r5,  48(r16)
+       stq     r21, 56(r16)
+       stq     r4,  64(r16)
+       stq     r20, 72(r16)
+       ret     r31, (r26), 1
+L(ex1):        stq     r4,   0(r16)
+       stq     r20,  8(r16)
+       ret     r31, (r26), 1
+       ALIGN(16)
+L(ex2):        stq     r4,   0(r16)
+       stq     r20,  8(r16)
+       stq     r5,  16(r16)
+       stq     r21, 24(r16)
+       ret     r31, (r26), 1
+       ALIGN(16)
+L(ex3):        stq     r4,   0(r16)
+       mulq    r0, r0, r4
+       stq     r20,  8(r16)
+       umulh   r0, r0, r20
+       stq     r5,  16(r16)
+       stq     r21, 24(r16)
+       stq     r4,  32(r16)
+       stq     r20, 40(r16)
+       ret     r31, (r26), 1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev6/sub_n.asm b/mpn/alpha/ev6/sub_n.asm

new file mode 100644 (file)

index 0000000..f23ad44
--- /dev/null
+++ b/mpn/alpha/ev6/sub_n.asm
@@ -0,0 +1,272 @@
+dnl  Alpha ev6 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl  and store difference in a third limb vector.
+
+dnl  Copyright 2000, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     5.4
+C EV6:     2.125
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  vp  r18
+C  n   r19
+C  cy  r20   (for mpn_add_nc)
+
+C TODO
+C   Finish cleaning up cy registers r22, r23 (make them use cy0/cy1)
+C   Use multi-pronged feed-in.
+C   Perform additional micro-tuning
+
+C  This code was written in cooperation with ev6 pipeline expert Steve Root.
+
+C  Pair loads and stores where possible
+C  Store pairs oct-aligned where possible (didn't need it here)
+C  Stores are delayed every third cycle
+C  Loads and stores are delayed by fills
+C  U stays still, put code there where possible (note alternation of U1 and U0)
+C  L moves because of loads and stores
+C  Note dampers in L to limit damage
+
+C  This odd-looking optimization expects that were having random bits in our
+C  data, so that a pure zero result is unlikely. so we penalize the unlikely
+C  case to help the common case.
+
+define(`u0', `r0')  define(`u1', `r3')
+define(`v0', `r1')  define(`v1', `r4')
+
+define(`cy0', `r20')  define(`cy1', `r21')
+
+MULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(mpn_sub_nc)
+       br      r31,    $entry
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+       bis     r31,    r31,    cy0     C clear carry in
+$entry:        cmpult  r19,    5,      r22     C L1 move counter
+       ldq     u1,     0(r17)          C L0 get next ones
+       ldq     v1,     0(r18)          C L1
+       bne     r22,    $Lsmall
+
+       ldq     u0,     8(r17)          C L0 get next ones
+       ldq     v0,     8(r18)          C L1
+       subq    u1,     v1,     r5      C U0 sub two data
+
+       cmpult  u1,     v1,     r23     C U0 did it borrow
+       ldq     u1,     16(r17)         C L0 get next ones
+       ldq     v1,     16(r18)         C L1
+
+       subq    u0,     v0,     r8      C U1 sub two data
+       subq    r5,     cy0,    r24     C U0 borrow in
+
+       cmpult  u0,     v0,     r22     C U1 did it borrow
+       beq     r5,     $fix5f          C U0 fix exact zero
+$ret5f:        ldq     u0,     24(r17)         C L0 get next ones
+       ldq     v0,     24(r18)         C L1
+
+       subq    r8,     r23,    r25     C U1 borrow from last
+       subq    u1,     v1,     r7      C U0 sub two data
+
+       beq     r8,     $fix6f          C U1 fix exact zero
+$ret6f:        cmpult  u1,     v1,     r23     C U0 did it borrow
+       ldq     u1,     32(r17)         C L0 get next ones
+       ldq     v1,     32(r18)         C L1
+
+       lda     r17,    40(r17)         C L0 move pointer
+       lda     r18,    40(r18)         C L1 move pointer
+
+       lda     r16,    -8(r16)
+       lda     r19,    -13(r19)        C L1 move counter
+       blt     r19,    $Lend           C U1 loop control
+
+
+C Main loop.  8-way unrolled.
+       ALIGN(16)
+$Loop: subq    u0,     v0,     r2      C U1 sub two data
+       stq     r24,    8(r16)          C L0 put an answer
+       subq    r7,     r22,    r24     C U0 borrow from last
+       stq     r25,    16(r16)         C L1 pair
+
+       cmpult  u0,     v0,     cy1     C U1 did it borrow
+       beq     r7,     $fix7           C U0 fix exact 0
+$ret7: ldq     u0,     0(r17)          C L0 get next ones
+       ldq     v0,     0(r18)          C L1
+
+       bis     r31,    r31,    r31     C L  damp out
+       subq    r2,     r23,    r25     C U1 borrow from last
+       bis     r31,    r31,    r31     C L  moves in L !
+       subq    u1,     v1,     r5      C U0 sub two data
+
+       beq     r2,     $fix0           C U1 fix exact zero
+$ret0: cmpult  u1,     v1,     cy0     C U0 did it borrow
+       ldq     u1,     8(r17)          C L0 get next ones
+       ldq     v1,     8(r18)          C L1
+
+       subq    u0,     v0,     r8      C U1 sub two data
+       stq     r24,    24(r16)         C L0 store pair
+       subq    r5,     cy1,    r24     C U0 borrow from last
+       stq     r25,    32(r16)         C L1
+
+       cmpult  u0,     v0,     r22     C U1 did it borrow
+       beq     r5,     $fix1           C U0 fix exact zero
+$ret1: ldq     u0,     16(r17)         C L0 get next ones
+       ldq     v0,     16(r18)         C L1
+
+       lda     r16,    64(r16)         C L0 move pointer
+       subq    r8,     cy0,    r25     C U1 borrow from last
+       lda     r19,    -8(r19)         C L1 move counter
+       subq    u1,     v1,     r7      C U0 sub two data
+
+       beq     r8,     $fix2           C U1 fix exact zero
+$ret2: cmpult  u1,     v1,     r23     C U0 did it borrow
+       ldq     u1,     24(r17)         C L0 get next ones
+       ldq     v1,     24(r18)         C L1
+
+       subq    u0,     v0,     r2      C U1 sub two data
+       stq     r24,    -24(r16)        C L0 put an answer
+       subq    r7,     r22,    r24     C U0 borrow from last
+       stq     r25,    -16(r16)        C L1 pair
+
+       cmpult  u0,     v0,     cy1     C U1 did it borrow
+       beq     r7,     $fix3           C U0 fix exact 0
+$ret3: ldq     u0,     32(r17)         C L0 get next ones
+       ldq     v0,     32(r18)         C L1
+
+       bis     r31,    r31,    r31     C L  damp out
+       subq    r2,     r23,    r25     C U1 borrow from last
+       bis     r31,    r31,    r31     C L  moves in L !
+       subq    u1,     v1,     r5      C U0 sub two data
+
+       beq     r2,     $fix4           C U1 fix exact zero
+$ret4: cmpult  u1,     v1,     cy0     C U0 did it borrow
+       ldq     u1,     40(r17)         C L0 get next ones
+       ldq     v1,     40(r18)         C L1
+
+       subq    u0,     v0,     r8      C U1 sub two data
+       stq     r24,    -8(r16)         C L0 store pair
+       subq    r5,     cy1,    r24     C U0 borrow from last
+       stq     r25,    0(r16)          C L1
+
+       cmpult  u0,     v0,     r22     C U1 did it borrow
+       beq     r5,     $fix5           C U0 fix exact zero
+$ret5: ldq     u0,     48(r17)         C L0 get next ones
+       ldq     v0,     48(r18)         C L1
+
+       ldl     r31, 256(r17)           C L0 prefetch
+       subq    r8,     cy0,    r25     C U1 borrow from last
+       ldl     r31, 256(r18)           C L1 prefetch
+       subq    u1,     v1,     r7      C U0 sub two data
+
+       beq     r8,     $fix6           C U1 fix exact zero
+$ret6: cmpult  u1,     v1,     r23     C U0 did it borrow
+       ldq     u1,     56(r17)         C L0 get next ones
+       ldq     v1,     56(r18)         C L1
+
+       lda     r17,    64(r17)         C L0 move pointer
+       bis     r31,    r31,    r31     C U
+       lda     r18,    64(r18)         C L1 move pointer
+       bge     r19,    $Loop           C U1 loop control
+C ==== main loop end
+
+$Lend: subq    u0,     v0,     r2      C U1 sub two data
+       stq     r24,    8(r16)          C L0 put an answer
+       subq    r7,     r22,    r24     C U0 borrow from last
+       stq     r25,    16(r16)         C L1 pair
+       cmpult  u0,     v0,     cy1     C U1 did it borrow
+       beq     r7,     $fix7c          C U0 fix exact 0
+$ret7c:        subq    r2,     r23,    r25     C U1 borrow from last
+       subq    u1,     v1,     r5      C U0 sub two data
+       beq     r2,     $fix0c          C U1 fix exact zero
+$ret0c:        cmpult  u1,     v1,     cy0     C U0 did it borrow
+       stq     r24,    24(r16)         C L0 store pair
+       subq    r5,     cy1,    r24     C U0 borrow from last
+       stq     r25,    32(r16)         C L1
+       beq     r5,     $fix1c          C U0 fix exact zero
+$ret1c:        stq     r24,    40(r16)         C L0 put an answer
+       lda     r16,    48(r16)         C L0 move pointer
+
+       lda     r19,    8(r19)
+       beq     r19,    $Lret
+
+       ldq     u1,     0(r17)
+       ldq     v1,     0(r18)
+$Lsmall:
+       lda     r19,    -1(r19)
+       beq     r19,    $Lend0
+
+       ALIGN(8)
+$Loop0:        subq    u1,     v1,     r2      C main sub
+       cmpult  u1,     v1,     r8      C compute bw from last sub
+       ldq     u1,     8(r17)
+       ldq     v1,     8(r18)
+       subq    r2,     cy0,    r5      C borrow sub
+       lda     r17,    8(r17)
+       lda     r18,    8(r18)
+       stq     r5,     0(r16)
+       cmpult  r2,     cy0,    cy0     C compute bw from last sub
+       lda     r19,    -1(r19)         C decr loop cnt
+       bis     r8,     cy0,    cy0     C combine bw from the two subs
+       lda     r16,    8(r16)
+       bne     r19,    $Loop0
+$Lend0:        subq    u1,     v1,     r2      C main sub
+       subq    r2,     cy0,    r5      C borrow sub
+       cmpult  u1,     v1,     r8      C compute bw from last sub
+       cmpult  r2,     cy0,    cy0     C compute bw from last sub
+       stq     r5,     0(r16)
+       bis     r8,     cy0,    r0      C combine bw from the two subs
+       ret     r31,(r26),1
+
+       ALIGN(8)
+$Lret: lda     r0,     0(cy0)          C copy borrow into return register
+       ret     r31,(r26),1
+
+$fix5f:        bis     r23,    cy0,    r23     C bring forward borrow
+       br      r31,    $ret5f
+$fix6f:        bis     r22,    r23,    r22     C bring forward borrow
+       br      r31,    $ret6f
+$fix0: bis     cy1,    r23,    cy1     C bring forward borrow
+       br      r31,    $ret0
+$fix1: bis     cy0,    cy1,    cy0     C bring forward borrow
+       br      r31,    $ret1
+$fix2: bis     r22,    cy0,    r22     C bring forward borrow
+       br      r31,    $ret2
+$fix3: bis     r23,    r22,    r23     C bring forward borrow
+       br      r31,    $ret3
+$fix4: bis     cy1,    r23,    cy1     C bring forward borrow
+       br      r31,    $ret4
+$fix5: bis     cy1,    cy0,    cy0     C bring forward borrow
+       br      r31,    $ret5
+$fix6: bis     r22,    cy0,    r22     C bring forward borrow
+       br      r31,    $ret6
+$fix7: bis     r23,    r22,    r23     C bring forward borrow
+       br      r31,    $ret7
+$fix0c:        bis     cy1,    r23,    cy1     C bring forward borrow
+       br      r31,    $ret0c
+$fix1c:        bis     cy0,    cy1,    cy0     C bring forward borrow
+       br      r31,    $ret1c
+$fix7c:        bis     r23,    r22,    r23     C bring forward borrow
+       br      r31,    $ret7c
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev67/gcd_1.asm b/mpn/alpha/ev67/gcd_1.asm

new file mode 100644 (file)

index 0000000..2e6f0a5
--- /dev/null
+++ b/mpn/alpha/ev67/gcd_1.asm
@@ -0,0 +1,134 @@
+dnl  Alpha ev67 mpn_gcd_1 -- Nx1 greatest common divisor.
+
+dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C ev67: 3.4 cycles/bitpair for 1x1 part
+
+
+C mp_limb_t mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
+C
+C In the 1x1 part, the algorithm is to change x,y to abs(x-y),min(x,y) and
+C strip trailing zeros from abs(x-y) to maintain x and y both odd.
+C
+C The trailing zeros are calculated from just x-y, since in twos-complement
+C there's the same number of trailing zeros on d or -d.  This means the cttz
+C runs in parallel with abs(x-y).
+C
+C The loop takes 5 cycles, and at 0.68 iterations per bit for two N-bit
+C operands with this algorithm gives the measured 3.4 c/l.
+C
+C The slottings shown are for SVR4 style systems, Unicos differs in the
+C initial gp setup and the LEA.
+C
+C Enhancement:
+C
+C On the jsr, !lituse_jsr! (when available) would allow the linker to relax
+C it to a bsr, but probably only in a static binary.  Plain "jsr foo" gives
+C the right object code for relaxation, and ought to be available
+C everywhere, but we prefer to schedule the GOT ldq (LEA) back earlier, for
+C the usual case of running in a shared library.
+C
+C bsr could perhaps be used explicitly anyway.  We should be able to assume
+C modexact is in the same module as us (ie. shared library or mainline).
+C Would there be any worries about the size of the displacement?  Could
+C always put modexact and gcd_1 in the same .o to be certain.
+
+ASM_START()
+PROLOGUE(mpn_gcd_1, gp)
+
+       C r16   xp
+       C r17   size
+       C r18   y
+
+       C ldah                          C l
+       C lda                           C u
+
+       ldq     r0, 0(r16)              C L   x = xp[0]
+       lda     r30, -32(r30)           C u   alloc stack
+
+       LEA(  r27, mpn_modexact_1c_odd) C L   modexact addr, ldq (gp)
+       stq     r10, 16(r30)            C L   save r10
+       cttz    r18, r10                C U0  y twos
+       cmpeq   r17, 1, r5              C u   test size==1
+
+       stq     r9, 8(r30)              C L   save r9
+       clr     r19                     C u   zero c for modexact
+       unop
+       unop
+
+       cttz    r0, r6                  C U0  x twos
+       stq     r26, 0(r30)             C L   save ra
+
+       srl     r18, r10, r18           C U   y odd
+
+       mov     r18, r9                 C l   hold y across call
+
+       cmpult  r6, r10, r2             C u   test x_twos < y_twos
+
+       cmovne  r2, r6, r10             C l   common_twos = min(x_twos,y_twos)
+       bne     r5, L(one)              C U   no modexact if size==1
+       jsr     r26, (r27), mpn_modexact_1c_odd   C L0
+
+       LDGP(   r29, 0(r26))            C u,l ldah,lda
+       cttz    r0, r6                  C U0  new x twos
+       ldq     r26, 0(r30)             C L   restore ra
+
+L(one):
+       mov     r9, r1                  C u   y
+       ldq     r9, 8(r30)              C L   restore r9
+       mov     r10, r2                 C u   common twos
+       ldq     r10, 16(r30)            C L   restore r10
+
+       lda     r30, 32(r30)            C l   free stack
+       beq     r0, L(done)             C U   return y if x%y==0
+
+       srl     r0, r6, r0              C U   x odd
+       unop
+
+       ALIGN(16)
+L(top):
+       C r0    x
+       C r1    y
+       C r2    common twos, for use at end
+
+       subq    r0, r1, r7              C l0  d = x - y
+       cmpult  r0, r1, r16             C u0  test x >= y
+
+       subq    r1, r0, r4              C l0  new_x = y - x
+       cttz    r7, r8                  C U0  d twos
+
+       cmoveq  r16, r7, r4             C l0  new_x = d if x>=y
+       cmovne  r16, r0, r1             C u0  y = x if x<y
+       unop                            C l   \ force cmoveq into l0
+       unop                            C u   /
+
+       C                               C cmoveq2 L0, cmovne2 U0
+
+       srl     r4, r8, r0              C U0  x = new_x >> twos
+       bne     r7, L(top)              C U1  stop when d==0
+
+
+L(done):
+       sll     r1, r2, r0              C U0  return y << common_twos
+       ret     r31, (r26), 1           C L0
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev67/hamdist.asm b/mpn/alpha/ev67/hamdist.asm

new file mode 100644 (file)

index 0000000..a72d95e
--- /dev/null
+++ b/mpn/alpha/ev67/hamdist.asm
@@ -0,0 +1,100 @@
+dnl  Alpha ev67 mpn_hamdist -- mpn hamming distance.
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C ev67: 2.5 cycles/limb
+
+
+C unsigned long mpn_hamdist (mp_srcptr xp, mp_srcptr yp, mp_size_t size);
+C
+C The hope was for 2.0 c/l here, but that isn't achieved.  We're limited by
+C renaming register shortage.  Since we need 5 instructions per limb, further
+C unrolling could approach 1.5 c/l.
+C
+C The main loop processes two limbs from each operand on each iteration.  An
+C odd size is handled by processing xp[0]^yp[0] at the start.  If the size
+C is even that result is discarded, and is repeated by the main loop.
+C
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+
+       C r16   xp
+       C r17   yp
+       C r18   size
+
+       ldq     r1, 0(r16)              C L0  xp[0]
+       ldq     r2, 0(r17)              C L1  yp[0]
+       and     r18, 1, r8              C U1  1 if size odd
+       srl     r18, 1, r18             C U0  size, limb pairs
+
+       clr     r0                      C L0  initial total
+       s8addq  r8, r17, r17            C U1  yp++ if size odd
+       s8addq  r8, r16, r16            C L1  xp++ if size odd
+       clr     r6                      C U0  dummy initial xor 1
+
+       xor     r1, r2, r5              C L   initial xor 0
+       beq     r18, L(one)             C U   if size==1
+
+       cmoveq  r8, r31, r5             C L   discard first limb if size even
+       unop                            C U
+
+
+       ALIGN(16)
+L(top):
+       C r0    total accumulating
+       C r7    xor 0
+       C r8    xor 1
+       C r16   xp, incrementing
+       C r17   yp, incrementing
+       C r18   size, limb pairs, decrementing
+
+       ldq     r1, 0(r16)              C L
+       ldq     r2, 0(r17)              C L
+       ctpop   r5, r7                  C U0
+       lda     r16, 16(r16)            C U
+
+       ldq     r3, -8(r16)             C L
+       ldq     r4, 8(r17)              C L
+       ctpop   r6, r8                  C U0
+       lda     r17, 16(r17)            C U
+
+       ldl     r31, 256(r16)           C L     prefetch
+       ldl     r31, 256(r17)           C L     prefetch
+       xor     r1, r2, r5              C U
+       lda     r18, -1(r18)            C U
+
+       xor     r3, r4, r6              C U
+       addq    r0, r7, r0              C L
+       addq    r0, r8, r0              C L
+       bne     r18, L(top)             C U
+
+
+       ctpop   r6, r8                  C U0
+       addq    r0, r8, r0              C L
+L(one):
+       ctpop   r5, r7                  C U0
+       addq    r0, r7, r0              C L
+
+       ret     r31, (r26), 1           C L0
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/ev67/popcount.asm b/mpn/alpha/ev67/popcount.asm

new file mode 100644 (file)

index 0000000..6ed79cf
--- /dev/null
+++ b/mpn/alpha/ev67/popcount.asm
@@ -0,0 +1,90 @@
+dnl  Alpha ev67 mpn_popcount -- mpn bit population count.
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C ev67: 1.5 cycles/limb
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C
+C This schedule seems necessary for the full 1.5 c/l, the IQ can't quite hide
+C all latencies, the addq's must be deferred to the next iteration.
+C
+C Since we need just 3 instructions per limb, further unrolling could approach
+C 1.0 c/l.
+C
+C The main loop processes two limbs at a time.  An odd size is handled by
+C processing src[0] at the start.  If the size is even that result is
+C discarded, and src[0] is repeated by the main loop.
+C
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+
+       C r16   src
+       C r17   size
+
+       ldq     r0, 0(r16)              C L0  src[0]
+       and     r17, 1, r8              C U1  1 if size odd
+       srl     r17, 1, r17             C U0  size, limb pairs
+
+       s8addq  r8, r16, r16            C L1  src++ if size odd
+       ctpop   r0, r0                  C U0
+       beq     r17, L(one)             C U1  if size==1
+
+       cmoveq  r8, r31, r0             C L   discard first limb if size even
+       clr     r3                      C L
+
+       clr     r4                      C L
+       unop                            C U
+       unop                            C L
+       unop                            C U
+
+
+       ALIGN(16)
+L(top):
+       C r0    total accumulating
+       C r3    pop 0
+       C r4    pop 1
+       C r16   src, incrementing
+       C r17   size, decrementing
+
+       ldq     r1, 0(r16)              C L
+       ldq     r2, 8(r16)              C L
+       lda     r16, 16(r16)            C U
+       lda     r17, -1(r17)            C U
+
+       addq    r0, r3, r0              C L
+       addq    r0, r4, r0              C L
+       ctpop   r1, r3                  C U0
+       ctpop   r2, r4                  C U0
+
+       ldl     r31, 512(r16)           C L     prefetch
+       bne     r17, L(top)             C U
+
+
+       addq    r0, r3, r0              C L
+       addq    r0, r4, r0              C U
+L(one):
+       ret     r31, (r26), 1           C L0
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/gmp-mparam.h b/mpn/alpha/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ddd9f91
--- /dev/null
+++ b/mpn/alpha/gmp-mparam.h
@@ -0,0 +1,75 @@
+/* Alpha EV4 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2009
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+/* 175MHz 21064 */
+
+/* Generated by tuneup.c, 2009-01-15, gcc 3.2 */
+
+#define MUL_TOOM22_THRESHOLD             12
+#define MUL_TOOM33_THRESHOLD             69
+#define MUL_TOOM44_THRESHOLD             88
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_TOOM2_THRESHOLD              20
+#define SQR_TOOM3_THRESHOLD              62
+#define SQR_TOOM4_THRESHOLD             155
+
+#define MULLO_BASECASE_THRESHOLD          0  /* always */
+#define MULLO_DC_THRESHOLD               40
+#define MULLO_MUL_N_THRESHOLD           202
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                 38
+#define POWM_THRESHOLD                   60
+
+#define MATRIX22_STRASSEN_THRESHOLD      17
+#define HGCD_THRESHOLD                   80
+#define GCD_DC_THRESHOLD                237
+#define GCDEXT_DC_THRESHOLD             198
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 2
+#define MOD_1_2_THRESHOLD                 9
+#define MOD_1_4_THRESHOLD                20
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             20
+#define GET_STR_PRECOMPUTE_THRESHOLD     37
+#define SET_STR_DC_THRESHOLD            746
+#define SET_STR_PRECOMPUTE_THRESHOLD   1332
+
+#define MUL_FFT_TABLE  { 240, 480, 1344, 2304, 5120, 20480, 49152, 0 }
+#define MUL_FFT_MODF_THRESHOLD          232
+#define MUL_FFT_THRESHOLD              1664
+
+#define SQR_FFT_TABLE  { 240, 480, 1216, 2304, 5120, 12288, 49152, 0 }
+#define SQR_FFT_MODF_THRESHOLD          232
+#define SQR_FFT_THRESHOLD              1408
diff --git a/mpn/alpha/invert_limb.asm b/mpn/alpha/invert_limb.asm

new file mode 100644 (file)

index 0000000..99f51a3
--- /dev/null
+++ b/mpn/alpha/invert_limb.asm
@@ -0,0 +1,342 @@
+dnl  Alpha mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:    ~175
+C EV5:    ~111-126
+C EV6:    ~52-76
+
+C  This is based on ideas of Peter L. Montgomery.
+
+ASM_START()
+
+FLOAT64($C36,9223372036854775808.0)            C 2^63
+
+PROLOGUE(mpn_invert_limb,gp)
+       lda     r30,-16(r30)
+       addq    r16,r16,r1
+       bne     r1,$73
+       lda     r0,-1
+       br      r31,$Lend
+$73:
+       srl     r16,1,r1
+       stq     r1,0(r30)
+       ldt     f11,0(r30)
+       cvtqt   f11,f1
+       LEA(r1,$C36)
+       ldt     f10,0(r1)               C f10 = 2^63
+       divt    f10,f1,f10              C f10 = 2^63 / (u / 2)
+       LEA(r2,$invtab-4096)
+       srl     r16,52,r1               C extract high 12 bits
+       addq    r1,r1,r1                C align ...0000bbbbbbbb0
+       addq    r1,r2,r1                C compute array offset
+       ldq_u   r2,0(r1)                C load quadword containing our 16 bits
+bigend(`addq   r1,1,r1')
+       extwl   r2,r1,r2                C extract desired 16 bits
+       sll     r2,48,r0
+       umulh   r16,r0,r1
+       addq    r16,r1,r3
+       stq     r3,0(r30)
+       ldt     f11,0(r30)
+       cvtqt   f11,f1
+       mult    f1,f10,f1
+       cvttqc  f1,f1
+       stt     f1,0(r30)
+       ldq     r4,0(r30)
+       subq    r0,r4,r0
+       umulh   r16,r0,r1
+       mulq    r16,r0,r2
+       addq    r16,r1,r3
+       bge     r3,$Loop2
+$Loop1:        addq    r2,r16,r2
+       cmpult  r2,r16,r1
+       addq    r3,r1,r3
+       addq    r0,1,r0
+       blt     r3,$Loop1
+$Loop2:        cmpult  r2,r16,r1
+       subq    r0,1,r0
+       subq    r3,r1,r3
+       subq    r2,r16,r2
+       bge     r3,$Loop2
+$Lend:
+       lda     r30,16(r30)
+       ret     r31,(r26),1
+EPILOGUE(mpn_invert_limb)
+DATASTART($invtab)
+       .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
+       .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
+       .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
+       .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
+       .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
+       .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
+       .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
+       .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
+       .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
+       .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
+       .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
+       .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
+       .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
+       .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
+       .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
+       .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
+       .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
+       .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
+       .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
+       .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
+       .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
+       .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
+       .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
+       .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
+       .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
+       .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
+       .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
+       .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
+       .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
+       .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
+       .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
+       .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
+       .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
+       .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
+       .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
+       .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
+       .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
+       .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
+       .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
+       .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
+       .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
+       .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
+       .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
+       .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
+       .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
+       .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
+       .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
+       .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
+       .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
+       .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
+       .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
+       .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
+       .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
+       .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
+       .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
+       .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
+       .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
+       .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
+       .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
+       .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
+       .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
+       .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
+       .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
+       .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
+       .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
+       .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
+       .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
+       .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
+       .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
+       .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
+       .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
+       .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
+       .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
+       .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
+       .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
+       .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
+       .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
+       .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
+       .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
+       .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
+       .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
+       .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
+       .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
+       .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
+       .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
+       .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
+       .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
+       .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
+       .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
+       .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
+       .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
+       .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
+       .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
+       .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
+       .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
+       .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
+       .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
+       .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
+       .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
+       .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
+       .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
+       .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
+       .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
+       .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
+       .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
+       .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
+       .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
+       .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
+       .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
+       .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
+       .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
+       .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
+       .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
+       .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
+       .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
+       .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
+       .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
+       .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
+       .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
+       .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
+       .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
+       .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
+       .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
+       .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
+       .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
+       .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
+       .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
+       .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
+       .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
+       .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
+       .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
+       .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
+       .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
+       .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
+       .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
+       .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
+       .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
+       .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
+       .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
+       .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
+       .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
+       .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
+       .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
+       .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
+       .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
+       .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
+       .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
+       .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
+       .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
+       .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
+       .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
+       .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
+       .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
+       .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
+       .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
+       .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
+       .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
+       .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
+       .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
+       .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
+       .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
+       .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
+       .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
+       .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
+       .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
+       .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
+       .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
+       .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
+       .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
+       .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
+       .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
+       .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
+       .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
+       .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
+       .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
+       .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
+       .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
+       .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
+       .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
+       .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
+       .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
+       .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
+       .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
+       .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
+       .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
+       .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
+       .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
+       .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
+       .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
+       .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
+       .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
+       .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
+       .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
+       .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
+       .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
+       .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
+       .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
+       .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
+       .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
+       .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
+       .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
+       .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
+       .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
+       .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
+       .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
+       .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
+       .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
+       .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
+       .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
+       .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
+       .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
+       .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
+       .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
+       .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
+       .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
+       .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
+       .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
+       .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
+       .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
+       .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
+       .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
+       .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
+       .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
+       .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
+       .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
+       .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
+       .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
+       .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
+       .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
+       .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
+       .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
+       .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
+       .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
+       .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
+       .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
+       .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
+       .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
+       .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
+       .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
+       .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
+       .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
+       .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
+       .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
+       .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
+       .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
+       .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
+       .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
+       .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
+       .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
+       .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
+       .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
+       .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
+       .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
+       .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
+       .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
+       .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
+DATAEND()
+ASM_END()
diff --git a/mpn/alpha/lshift.asm b/mpn/alpha/lshift.asm

new file mode 100644 (file)

index 0000000..bc0e03d
--- /dev/null
+++ b/mpn/alpha/lshift.asm
@@ -0,0 +1,171 @@
+dnl  Alpha mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     3.25
+C EV6:     1.75
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+C  cnt r19
+
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       s8addq  r18,r17,r17     C make r17 point at end of s1
+       ldq     r4,-8(r17)      C load first limb
+       subq    r31,r19,r20
+       s8addq  r18,r16,r16     C make r16 point at end of RES
+       subq    r18,1,r18
+       and     r18,4-1,r28     C number of limbs in first loop
+       srl     r4,r20,r0       C compute function result
+
+       beq     r28,L(L0)
+       subq    r18,r28,r18
+
+       ALIGN(8)
+L(top0):
+       ldq     r3,-16(r17)
+       subq    r16,8,r16
+       sll     r4,r19,r5
+       subq    r17,8,r17
+       subq    r28,1,r28
+       srl     r3,r20,r6
+       bis     r3,r3,r4
+       bis     r5,r6,r8
+       stq     r8,0(r16)
+       bne     r28,L(top0)
+
+L(L0): sll     r4,r19,r24
+       beq     r18,L(end)
+C warm up phase 1
+       ldq     r1,-16(r17)
+       subq    r18,4,r18
+       ldq     r2,-24(r17)
+       ldq     r3,-32(r17)
+       ldq     r4,-40(r17)
+C warm up phase 2
+       srl     r1,r20,r7
+       sll     r1,r19,r21
+       srl     r2,r20,r8
+       beq     r18,L(end1)
+       ldq     r1,-48(r17)
+       sll     r2,r19,r22
+       ldq     r2,-56(r17)
+       srl     r3,r20,r5
+       bis     r7,r24,r7
+       sll     r3,r19,r23
+       bis     r8,r21,r8
+       srl     r4,r20,r6
+       ldq     r3,-64(r17)
+       sll     r4,r19,r24
+       ldq     r4,-72(r17)
+       subq    r18,4,r18
+       beq     r18,L(end2)
+       ALIGN(16)
+C main loop
+L(top):        stq     r7,-8(r16)
+       bis     r5,r22,r5
+       stq     r8,-16(r16)
+       bis     r6,r23,r6
+
+       srl     r1,r20,r7
+       subq    r18,4,r18
+       sll     r1,r19,r21
+       unop    C ldq   r31,-96(r17)
+
+       srl     r2,r20,r8
+       ldq     r1,-80(r17)
+       sll     r2,r19,r22
+       ldq     r2,-88(r17)
+
+       stq     r5,-24(r16)
+       bis     r7,r24,r7
+       stq     r6,-32(r16)
+       bis     r8,r21,r8
+
+       srl     r3,r20,r5
+       unop    C ldq   r31,-96(r17)
+       sll     r3,r19,r23
+       subq    r16,32,r16
+
+       srl     r4,r20,r6
+       ldq     r3,-96(r17)
+       sll     r4,r19,r24
+       ldq     r4,-104(r17)
+
+       subq    r17,32,r17
+       bne     r18,L(top)
+C cool down phase 2/1
+L(end2):
+       stq     r7,-8(r16)
+       bis     r5,r22,r5
+       stq     r8,-16(r16)
+       bis     r6,r23,r6
+       srl     r1,r20,r7
+       sll     r1,r19,r21
+       srl     r2,r20,r8
+       sll     r2,r19,r22
+       stq     r5,-24(r16)
+       bis     r7,r24,r7
+       stq     r6,-32(r16)
+       bis     r8,r21,r8
+       srl     r3,r20,r5
+       sll     r3,r19,r23
+       srl     r4,r20,r6
+       sll     r4,r19,r24
+C cool down phase 2/2
+       stq     r7,-40(r16)
+       bis     r5,r22,r5
+       stq     r8,-48(r16)
+       bis     r6,r23,r6
+       stq     r5,-56(r16)
+       stq     r6,-64(r16)
+C cool down phase 2/3
+       stq     r24,-72(r16)
+       ret     r31,(r26),1
+
+C cool down phase 1/1
+L(end1):
+       sll     r2,r19,r22
+       srl     r3,r20,r5
+       bis     r7,r24,r7
+       sll     r3,r19,r23
+       bis     r8,r21,r8
+       srl     r4,r20,r6
+       sll     r4,r19,r24
+C cool down phase 1/2
+       stq     r7,-8(r16)
+       bis     r5,r22,r5
+       stq     r8,-16(r16)
+       bis     r6,r23,r6
+       stq     r5,-24(r16)
+       stq     r6,-32(r16)
+       stq     r24,-40(r16)
+       ret     r31,(r26),1
+
+L(end):        stq     r24,-8(r16)
+       ret     r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/mpn/alpha/mod_34lsub1.asm b/mpn/alpha/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..e5c1d22
--- /dev/null
+++ b/mpn/alpha/mod_34lsub1.asm
@@ -0,0 +1,153 @@
+dnl Alpha mpn_mod_34lsub1.
+
+dnl  Copyright 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     4 (?)
+C EV5:     2.67
+C EV6:     1.67
+
+
+dnl  INPUT PARAMETERS
+dnl  up                r16
+dnl  n         r17
+
+define(`l0',`r18')
+define(`l1',`r19')
+define(`l2',`r20')
+define(`a0',`r21')
+define(`a1',`r22')
+define(`a2',`r23')
+define(`c0',`r24')
+define(`c1',`r5')
+define(`c2',`r6')
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+       bis     r31, r31, c0
+       bis     r31, r31, c1
+       bis     r31, r31, c2
+
+       lda     r17, -3(r17)
+       bge     r17, $L_3_or_more
+       bis     r31, r31, a0
+       bis     r31, r31, a1
+       bis     r31, r31, a2
+       br      r31, $L_012
+
+$L_3_or_more:
+       ldq     a0, 0(r16)
+       ldq     a1, 8(r16)
+       ldq     a2, 16(r16)
+       lda     r16, 24(r16)
+       lda     r17, -3(r17)
+       blt     r17, $L_012
+
+$L_6_or_more:
+       ldq     l0, 0(r16)
+       ldq     l1, 8(r16)
+       ldq     l2, 16(r16)
+       addq    l0, a0, a0
+
+       lda     r16, 24(r16)
+       lda     r17, -3(r17)
+       blt     r17, $L_end
+
+       ALIGN(16)
+C Main loop
+$L_9_or_more:
+$Loop: cmpult  a0, l0, r0
+       ldq     l0, 0(r16)
+       addq    r0, c0, c0
+       addq    l1, a1, a1
+       cmpult  a1, l1, r0
+       ldq     l1, 8(r16)
+       addq    r0, c1, c1
+       addq    l2, a2, a2
+       cmpult  a2, l2, r0
+       ldq     l2, 16(r16)
+       addq    r0, c2, c2
+       addq    l0, a0, a0
+       lda     r16, 24(r16)
+       lda     r17, -3(r17)
+       bge     r17, $Loop
+
+$L_end:        cmpult  a0, l0, r0
+       addq    r0, c0, c0
+       addq    l1, a1, a1
+       cmpult  a1, l1, r0
+       addq    r0, c1, c1
+       addq    l2, a2, a2
+       cmpult  a2, l2, r0
+       addq    r0, c2, c2
+
+C Handle the last (n mod 3) limbs
+$L_012:        lda     r17, 2(r17)
+       blt     r17, $L_0
+       ldq     l0, 0(r16)
+       addq    l0, a0, a0
+       cmpult  a0, l0, r0
+       addq    r0, c0, c0
+       beq     r17, $L_0
+       ldq     l1, 8(r16)
+       addq    l1, a1, a1
+       cmpult  a1, l1, r0
+       addq    r0, c1, c1
+
+C Align and sum our 3 main accumulators and 3 carry accumulators
+$L_0:  srl     a0, 48, r2
+       srl     a1, 32, r4
+ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
+`      insll   a1, 2, r1',             C (a1 & 0xffffffff) << 16
+`      zapnot  a1, 15, r25
+       sll     r25, 16, r1')
+       zapnot  a0, 63, r0              C a0 & 0xffffffffffff
+       srl     a2, 16, a1
+ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
+`      inswl   a2, 4, r3',             C (a2 & 0xffff) << 32
+`      zapnot  a2, 3, r25
+       sll     r25, 32, r3')
+       addq    r1, r4, r1
+       addq    r0, r2, r0
+       srl     c0, 32, a2
+ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
+`      insll   c0, 2, r4',             C (c0 & 0xffffffff) << 16
+`      zapnot  c0, 15, r25
+       sll     r25, 16, r4')
+       addq    r0, r1, r0
+       addq    r3, a1, r3
+       addq    r0, r3, r0
+       srl     c1, 16, c0
+ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
+`      inswl   c1, 4, r2',             C (c1 & 0xffff) << 32
+`      zapnot  c1, 3, r25
+       sll     r25, 32, r2')
+       addq    r4, a2, r4
+C      srl     c2, 48, r3              C This will be 0 in practise
+       zapnot  c2, 63, r1              C r1 = c2 & 0xffffffffffff
+       addq    r0, r4, r0
+       addq    r2, c0, r2
+       addq    r0, r2, r0
+C      addq    r1, r3, r1
+       addq    r0, r1, r0
+
+       ret     r31, (r26), 1
+EPILOGUE(mpn_mod_34lsub1)
+ASM_END()
diff --git a/mpn/alpha/mode1o.asm b/mpn/alpha/mode1o.asm

new file mode 100644 (file)

index 0000000..0611cd8
--- /dev/null
+++ b/mpn/alpha/mode1o.asm
@@ -0,0 +1,198 @@
+dnl  Alpha mpn_modexact_1c_odd -- mpn exact remainder
+
+dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C      cycles/limb
+C EV4:    47
+C EV5:    30
+C EV6:    15
+
+
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
+C                                mp_limb_t c)
+C
+C This code follows the "alternate" code in mpn/generic/mode1o.c,
+C eliminating cbit+climb from the dependent chain.  This leaves,
+C
+C        ev4   ev5   ev6
+C         1     3     1    subq   y = x - h
+C        23    13     7    mulq   q = y * inverse
+C        23    14     7    umulh  h = high (q * d)
+C        --    --    --
+C        47    30    15
+C
+C In each case, the load latency, loop control, and extra carry bit handling
+C hide under the multiply latencies.  Those latencies are long enough that
+C we don't need to worry about alignment or pairing to squeeze out
+C performance.
+C
+C For the first limb, some of the loop code is broken out and scheduled back
+C since it can be done earlier.
+C
+C   - The first ldq src[0] is near the start of the routine, for maximum
+C     time from memory.
+C
+C   - The subq y=x-climb can be done without waiting for the inverse.
+C
+C   - The mulq y*inverse is replicated after the final subq for the inverse,
+C     instead of branching to the mulq in the main loop.  On ev4 a branch
+C     there would cost cycles, but we can hide them under the mulq latency.
+C
+C For the last limb, high<divisor is tested and if that's true a subtract
+C and addback is done, as per the main mpn/generic/mode1o.c code.  This is a
+C data-dependent branch, but we're waiting for umulh so any penalty should
+C hide there.  The multiplies saved would be worth the cost anyway.
+C
+C Enhancements:
+C
+C For size==1, a plain division (done bitwise say) might be faster than
+C calculating an inverse, the latter taking about 130 cycles on ev4 or 70 on
+C ev5.  A call to gcc __remqu might be a possibility.
+
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd,gp)
+
+       C r16   src
+       C r17   size
+       C r18   d
+       C r19   c
+
+       LEA(r0, binvert_limb_table)
+       srl     r18, 1, r20             C d >> 1
+
+       and     r20, 127, r20           C idx = d>>1 & 0x7F
+
+       addq    r0, r20, r21            C table + idx
+
+ifelse(bwx_available_p,1,
+`      ldbu    r20, 0(r21)             C table[idx], inverse 8 bits
+',`
+       ldq_u   r20, 0(r21)             C table[idx] qword
+       extbl   r20, r21, r20           C table[idx], inverse 8 bits
+')
+
+       mull    r20, r20, r7            C i*i
+       addq    r20, r20, r20           C 2*i
+
+       ldq     r2, 0(r16)              C x = s = src[0]
+       lda     r17, -1(r17)            C size--
+       clr     r0                      C initial cbit=0
+
+       mull    r7, r18, r7             C i*i*d
+
+       subq    r20, r7, r20            C 2*i-i*i*d, inverse 16 bits
+
+       mull    r20, r20, r7            C i*i
+       addq    r20, r20, r20           C 2*i
+
+       mull    r7, r18, r7             C i*i*d
+
+       subq    r20, r7, r20            C 2*i-i*i*d, inverse 32 bits
+
+       mulq    r20, r20, r7            C i*i
+       addq    r20, r20, r20           C 2*i
+
+       mulq    r7, r18, r7             C i*i*d
+       subq    r2, r19, r3             C y = x - climb
+
+       subq    r20, r7, r20            C inv = 2*i-i*i*d, inverse 64 bits
+
+ASSERT(r7, C should have d*inv==1 mod 2^64
+`      mulq    r18, r20, r7
+       cmpeq   r7, 1, r7')
+
+       mulq    r3, r20, r4             C first q = y * inv
+
+       beq     r17, L(one)             C if size==1
+       br      L(entry)
+
+
+L(top):
+       C r0    cbit
+       C r16   src, incrementing
+       C r17   size, decrementing
+       C r18   d
+       C r19   climb
+       C r20   inv
+
+       ldq     r1, 0(r16)              C s = src[i]
+       subq    r1, r0, r2              C x = s - cbit
+       cmpult  r1, r0, r0              C new cbit = s < cbit
+
+       subq    r2, r19, r3             C y = x - climb
+
+       mulq    r3, r20, r4             C q = y * inv
+L(entry):
+       cmpult  r2, r19, r5             C cbit2 = x < climb
+       addq    r5, r0, r0              C cbit += cbit2
+       lda     r16, 8(r16)             C src++
+       lda     r17, -1(r17)            C size--
+
+       umulh   r4, r18, r19            C climb = q * d
+       bne     r17, L(top)             C while 2 or more limbs left
+
+
+
+       C r0    cbit
+       C r18   d
+       C r19   climb
+       C r20   inv
+
+       ldq     r1, 0(r16)              C s = src[size-1] high limb
+
+       cmpult  r1, r18, r2             C test high<divisor
+       bne     r2, L(skip)             C skip if so
+
+       C can't skip a division, repeat loop code
+
+       subq    r1, r0, r2              C x = s - cbit
+       cmpult  r1, r0, r0              C new cbit = s < cbit
+
+       subq    r2, r19, r3             C y = x - climb
+
+       mulq    r3, r20, r4             C q = y * inv
+L(one):
+       cmpult  r2, r19, r5             C cbit2 = x < climb
+       addq    r5, r0, r0              C cbit += cbit2
+
+       umulh   r4, r18, r19            C climb = q * d
+
+       addq    r19, r0, r0             C return climb + cbit
+       ret     r31, (r26), 1
+
+
+       ALIGN(8)
+L(skip):
+       C with high<divisor, the final step can be just (cbit+climb)-s and
+       C an addback of d if that underflows
+
+       addq    r19, r0, r19            C c = climb + cbit
+
+       subq    r19, r1, r2             C c - s
+       cmpult  r19, r1, r3             C c < s
+
+       addq    r2, r18, r0             C return c-s + divisor
+
+       cmoveq  r3, r2, r0              C return c-s if no underflow
+       ret     r31, (r26), 1
+
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/mul_1.asm b/mpn/alpha/mul_1.asm

new file mode 100644 (file)

index 0000000..30b1702
--- /dev/null
+++ b/mpn/alpha/mul_1.asm
@@ -0,0 +1,91 @@
+dnl  Alpha mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     42
+C EV5:     18
+C EV6:      7
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+C  vl  r19
+C  cl  r20
+
+
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r18,-1(r18)     C size--
+       mulq    r2,r19,r3       C r3 = prod_low
+       umulh   r2,r19,r4       C r4 = prod_high
+       beq     r18,$Le1c       C jump if size was == 1
+       ldq     r2,8(r17)       C r2 = s1_limb
+       lda     r18,-1(r18)     C size--
+       addq    r3,r20,r3       C r3 = cy_limb + cl
+       stq     r3,0(r16)
+       cmpult  r3,r20,r0       C r0 = carry from (cy_limb + cl)
+       bne     r18,$Loop       C jump if size was == 2
+       br      r31,$Le2
+$Le1c: addq    r3,r20,r3       C r3 = cy_limb + cl
+       cmpult  r3,r20,r0       C r0 = carry from (cy_limb + cl)
+$Le1:  stq     r3,0(r16)
+       addq    r4,r0,r0
+       ret     r31,(r26),1
+EPILOGUE(mpn_mul_1c)
+
+PROLOGUE(mpn_mul_1)
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r18,-1(r18)     C size--
+       mulq    r2,r19,r3       C r3 = prod_low
+       bic     r31,r31,r0      C clear cy_limb
+       umulh   r2,r19,r4       C r4 = prod_high
+       beq     r18,$Le1        C jump if size was == 1
+       ldq     r2,8(r17)       C r2 = s1_limb
+       lda     r18,-1(r18)     C size--
+       stq     r3,0(r16)
+       beq     r18,$Le2        C jump if size was == 2
+
+       ALIGN(8)
+$Loop: mulq    r2,r19,r3       C r3 = prod_low
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       lda     r18,-1(r18)     C size--
+       umulh   r2,r19,r4       C r4 = prod_high
+       ldq     r2,16(r17)      C r2 = s1_limb
+       lda     r17,8(r17)      C s1_ptr++
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       stq     r3,8(r16)
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       lda     r16,8(r16)      C res_ptr++
+       bne     r18,$Loop
+
+$Le2:  mulq    r2,r19,r3       C r3 = prod_low
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       umulh   r2,r19,r4       C r4 = prod_high
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       stq     r3,8(r16)
+       addq    r4,r0,r0        C cy_limb = prod_high + cy
+       ret     r31,(r26),1
+EPILOGUE(mpn_mul_1)
+ASM_END()
diff --git a/mpn/alpha/rshift.asm b/mpn/alpha/rshift.asm

new file mode 100644 (file)

index 0000000..ae0b4b5
--- /dev/null
+++ b/mpn/alpha/rshift.asm
@@ -0,0 +1,169 @@
+dnl  Alpha mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     3.25
+C EV6:     1.75
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+C  cnt r19
+
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       ldq     r4,0(r17)       C load first limb
+       subq    r31,r19,r20
+       subq    r18,1,r18
+       and     r18,4-1,r28     C number of limbs in first loop
+       sll     r4,r20,r0       C compute function result
+
+       beq     r28,L(L0)
+       subq    r18,r28,r18
+
+       ALIGN(8)
+L(top0):
+       ldq     r3,8(r17)
+       addq    r16,8,r16
+       srl     r4,r19,r5
+       addq    r17,8,r17
+       subq    r28,1,r28
+       sll     r3,r20,r6
+       bis     r3,r3,r4
+       bis     r5,r6,r8
+       stq     r8,-8(r16)
+       bne     r28,L(top0)
+
+L(L0): srl     r4,r19,r24
+       beq     r18,L(end)
+C warm up phase 1
+       ldq     r1,8(r17)
+       subq    r18,4,r18
+       ldq     r2,16(r17)
+       ldq     r3,24(r17)
+       ldq     r4,32(r17)
+C warm up phase 2
+       sll     r1,r20,r7
+       srl     r1,r19,r21
+       sll     r2,r20,r8
+       beq     r18,L(end1)
+       ldq     r1,40(r17)
+       srl     r2,r19,r22
+       ldq     r2,48(r17)
+       sll     r3,r20,r5
+       bis     r7,r24,r7
+       srl     r3,r19,r23
+       bis     r8,r21,r8
+       sll     r4,r20,r6
+       ldq     r3,56(r17)
+       srl     r4,r19,r24
+       ldq     r4,64(r17)
+       subq    r18,4,r18
+       beq     r18,L(end2)
+       ALIGN(16)
+C main loop
+L(top):        stq     r7,0(r16)
+       bis     r5,r22,r5
+       stq     r8,8(r16)
+       bis     r6,r23,r6
+
+       sll     r1,r20,r7
+       subq    r18,4,r18
+       srl     r1,r19,r21
+       unop    C ldq   r31,-96(r17)
+
+       sll     r2,r20,r8
+       ldq     r1,72(r17)
+       srl     r2,r19,r22
+       ldq     r2,80(r17)
+
+       stq     r5,16(r16)
+       bis     r7,r24,r7
+       stq     r6,24(r16)
+       bis     r8,r21,r8
+
+       sll     r3,r20,r5
+       unop    C ldq   r31,-96(r17)
+       srl     r3,r19,r23
+       addq    r16,32,r16
+
+       sll     r4,r20,r6
+       ldq     r3,88(r17)
+       srl     r4,r19,r24
+       ldq     r4,96(r17)
+
+       addq    r17,32,r17
+       bne     r18,L(top)
+C cool down phase 2/1
+L(end2):
+       stq     r7,0(r16)
+       bis     r5,r22,r5
+       stq     r8,8(r16)
+       bis     r6,r23,r6
+       sll     r1,r20,r7
+       srl     r1,r19,r21
+       sll     r2,r20,r8
+       srl     r2,r19,r22
+       stq     r5,16(r16)
+       bis     r7,r24,r7
+       stq     r6,24(r16)
+       bis     r8,r21,r8
+       sll     r3,r20,r5
+       srl     r3,r19,r23
+       sll     r4,r20,r6
+       srl     r4,r19,r24
+C cool down phase 2/2
+       stq     r7,32(r16)
+       bis     r5,r22,r5
+       stq     r8,40(r16)
+       bis     r6,r23,r6
+       stq     r5,48(r16)
+       stq     r6,56(r16)
+C cool down phase 2/3
+       stq     r24,64(r16)
+       ret     r31,(r26),1
+
+C cool down phase 1/1
+L(end1):
+       srl     r2,r19,r22
+       sll     r3,r20,r5
+       bis     r7,r24,r7
+       srl     r3,r19,r23
+       bis     r8,r21,r8
+       sll     r4,r20,r6
+       srl     r4,r19,r24
+C cool down phase 1/2
+       stq     r7,0(r16)
+       bis     r5,r22,r5
+       stq     r8,8(r16)
+       bis     r6,r23,r6
+       stq     r5,16(r16)
+       stq     r6,24(r16)
+       stq     r24,32(r16)
+       ret     r31,(r26),1
+
+L(end):        stq     r24,0(r16)
+       ret     r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/mpn/alpha/sqr_diagonal.asm b/mpn/alpha/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..2aa7f2e
--- /dev/null
+++ b/mpn/alpha/sqr_diagonal.asm
@@ -0,0 +1,65 @@
+dnl  Alpha mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     42
+C EV5:     18
+C EV6:      3.45
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       ldq     r2,0(r17)       C r2 = s1_limb
+       lda     r18,-2(r18)     C size -= 2
+       mulq    r2,r2,r3        C r3 = prod_low
+       umulh   r2,r2,r4        C r4 = prod_high
+       blt     r18,$Lend1      C jump if size was == 1
+       ldq     r2,8(r17)       C r2 = s1_limb
+       beq     r18,$Lend2      C jump if size was == 2
+
+       ALIGN(8)
+$Loop: stq     r3,0(r16)
+       mulq    r2,r2,r3        C r3 = prod_low
+       lda     r18,-1(r18)     C size--
+       stq     r4,8(r16)
+       umulh   r2,r2,r4        C r4 = cy_limb
+       ldq     r2,16(r17)      C r2 = s1_limb
+       lda     r17,8(r17)      C s1_ptr++
+       lda     r16,16(r16)     C res_ptr++
+       bne     r18,$Loop
+
+$Lend2:        stq     r3,0(r16)
+       mulq    r2,r2,r3        C r3 = prod_low
+       stq     r4,8(r16)
+       umulh   r2,r2,r4        C r4 = cy_limb
+       stq     r3,16(r16)
+       stq     r4,24(r16)
+       ret     r31,(r26),1
+$Lend1:        stq     r3,0(r16)
+       stq     r4,8(r16)
+       ret     r31,(r26),1
+EPILOGUE(mpn_sqr_diagonal)
+ASM_END()
diff --git a/mpn/alpha/sub_n.asm b/mpn/alpha/sub_n.asm

new file mode 100644 (file)

index 0000000..9567e52
--- /dev/null
+++ b/mpn/alpha/sub_n.asm
@@ -0,0 +1,146 @@
+dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl  and store difference in a third limb vector.
+
+dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     4.75
+C EV6:     3
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r16
+dnl  s1_ptr    r17
+dnl  s2_ptr    r18
+dnl  size      r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       bis     r31,r31,r25             C clear cy
+       subq    r19,4,r19               C decr loop cnt
+       blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+       ldq     r0,0(r18)
+       ldq     r4,0(r17)
+       ldq     r1,8(r18)
+       ldq     r5,8(r17)
+       addq    r17,32,r17              C update s1_ptr
+       ldq     r2,16(r18)
+       subq    r4,r0,r20               C 1st main subtract
+       ldq     r3,24(r18)
+       subq    r19,4,r19               C decr loop cnt
+       ldq     r6,-16(r17)
+       cmpult  r4,r0,r25               C compute cy from last subtract
+       ldq     r7,-8(r17)
+       subq    r5,r1,r28               C 2nd main subtract
+       addq    r18,32,r18              C update s2_ptr
+       subq    r28,r25,r21             C 2nd carry subtract
+       cmpult  r5,r1,r8                C compute cy from last subtract
+       blt     r19,$Lend1              C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+       ALIGN(16)
+$Loop: cmpult  r28,r25,r25             C compute cy from last subtract
+       ldq     r0,0(r18)
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       ldq     r1,8(r18)
+       subq    r6,r2,r28               C 3rd main subtract
+       ldq     r4,0(r17)
+       subq    r28,r25,r22             C 3rd carry subtract
+       ldq     r5,8(r17)
+       cmpult  r6,r2,r8                C compute cy from last subtract
+       cmpult  r28,r25,r25             C compute cy from last subtract
+       stq     r20,0(r16)
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       stq     r21,8(r16)
+       subq    r7,r3,r28               C 4th main subtract
+       subq    r28,r25,r23             C 4th carry subtract
+       cmpult  r7,r3,r8                C compute cy from last subtract
+       cmpult  r28,r25,r25             C compute cy from last subtract
+               addq    r17,32,r17              C update s1_ptr
+       bis     r8,r25,r25              C combine cy from the two subtracts
+               addq    r16,32,r16              C update res_ptr
+       subq    r4,r0,r28               C 1st main subtract
+       ldq     r2,16(r18)
+       subq    r28,r25,r20             C 1st carry subtract
+       ldq     r3,24(r18)
+       cmpult  r4,r0,r8                C compute cy from last subtract
+       ldq     r6,-16(r17)
+       cmpult  r28,r25,r25             C compute cy from last subtract
+       ldq     r7,-8(r17)
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       subq    r19,4,r19               C decr loop cnt
+       stq     r22,-16(r16)
+       subq    r5,r1,r28               C 2nd main subtract
+       stq     r23,-8(r16)
+       subq    r28,r25,r21             C 2nd carry subtract
+               addq    r18,32,r18              C update s2_ptr
+       cmpult  r5,r1,r8                C compute cy from last subtract
+       bge     r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1:        cmpult  r28,r25,r25             C compute cy from last subtract
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       subq    r6,r2,r28               C cy add
+       subq    r28,r25,r22             C 3rd main subtract
+       cmpult  r6,r2,r8                C compute cy from last subtract
+       cmpult  r28,r25,r25             C compute cy from last subtract
+       stq     r20,0(r16)
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       stq     r21,8(r16)
+       subq    r7,r3,r28               C cy add
+       subq    r28,r25,r23             C 4th main subtract
+       cmpult  r7,r3,r8                C compute cy from last subtract
+       cmpult  r28,r25,r25             C compute cy from last subtract
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       addq    r16,32,r16              C update res_ptr
+       stq     r22,-16(r16)
+       stq     r23,-8(r16)
+$Lend2:        addq    r19,4,r19               C restore loop cnt
+       beq     r19,$Lret
+C Start software pipeline for 2nd loop
+       ldq     r0,0(r18)
+       ldq     r4,0(r17)
+       subq    r19,1,r19
+       beq     r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+       ALIGN(16)
+$Loop0:        subq    r4,r0,r28               C main subtract
+       cmpult  r4,r0,r8                C compute cy from last subtract
+       ldq     r0,8(r18)
+       ldq     r4,8(r17)
+       subq    r28,r25,r20             C carry subtract
+       addq    r18,8,r18
+       addq    r17,8,r17
+       stq     r20,0(r16)
+       cmpult  r28,r25,r25             C compute cy from last subtract
+       subq    r19,1,r19               C decr loop cnt
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       addq    r16,8,r16
+       bne     r19,$Loop0
+$Lend0:        subq    r4,r0,r28               C main subtract
+       subq    r28,r25,r20             C carry subtract
+       cmpult  r4,r0,r8                C compute cy from last subtract
+       cmpult  r28,r25,r25             C compute cy from last subtract
+       stq     r20,0(r16)
+       bis     r8,r25,r25              C combine cy from the two subtracts
+
+$Lret: bis     r25,r31,r0              C return cy
+       ret     r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/mpn/alpha/submul_1.asm b/mpn/alpha/submul_1.asm

new file mode 100644 (file)

index 0000000..554ccf5
--- /dev/null
+++ b/mpn/alpha/submul_1.asm
@@ -0,0 +1,88 @@
+dnl  Alpha mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     42
+C EV5:     18
+C EV6:      7
+
+C  INPUT PARAMETERS
+C  rp  r16
+C  up  r17
+C  n   r18
+C  limb        r19
+
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       ldq     r2,0(r17)       C r2 = s1_limb
+       addq    r17,8,r17       C s1_ptr++
+       subq    r18,1,r18       C size--
+       mulq    r2,r19,r3       C r3 = prod_low
+       ldq     r5,0(r16)       C r5 = *res_ptr
+       umulh   r2,r19,r0       C r0 = prod_high
+       beq     r18,$Lend1      C jump if size was == 1
+       ldq     r2,0(r17)       C r2 = s1_limb
+       addq    r17,8,r17       C s1_ptr++
+       subq    r18,1,r18       C size--
+       subq    r5,r3,r3
+       cmpult  r5,r3,r4
+       stq     r3,0(r16)
+       addq    r16,8,r16       C res_ptr++
+       beq     r18,$Lend2      C jump if size was == 2
+
+       ALIGN(8)
+$Loop: mulq    r2,r19,r3       C r3 = prod_low
+       ldq     r5,0(r16)       C r5 = *res_ptr
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       subq    r18,1,r18       C size--
+       umulh   r2,r19,r4       C r4 = cy_limb
+       ldq     r2,0(r17)       C r2 = s1_limb
+       addq    r17,8,r17       C s1_ptr++
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       subq    r5,r3,r3
+       cmpult  r5,r3,r5
+       stq     r3,0(r16)
+       addq    r16,8,r16       C res_ptr++
+       addq    r5,r0,r0        C combine carries
+       bne     r18,$Loop
+
+$Lend2:        mulq    r2,r19,r3       C r3 = prod_low
+       ldq     r5,0(r16)       C r5 = *res_ptr
+       addq    r4,r0,r0        C cy_limb = cy_limb + 'cy'
+       umulh   r2,r19,r4       C r4 = cy_limb
+       addq    r3,r0,r3        C r3 = cy_limb + prod_low
+       cmpult  r3,r0,r0        C r0 = carry from (cy_limb + prod_low)
+       subq    r5,r3,r3
+       cmpult  r5,r3,r5
+       stq     r3,0(r16)
+       addq    r5,r0,r0        C combine carries
+       addq    r4,r0,r0        C cy_limb = prod_high + cy
+       ret     r31,(r26),1
+$Lend1:        subq    r5,r3,r3
+       cmpult  r5,r3,r5
+       stq     r3,0(r16)
+       addq    r0,r5,r0
+       ret     r31,(r26),1
+EPILOGUE(mpn_submul_1)
+ASM_END()
diff --git a/mpn/alpha/umul.asm b/mpn/alpha/umul.asm

new file mode 100644 (file)

index 0000000..7fa3f00
--- /dev/null
+++ b/mpn/alpha/umul.asm
@@ -0,0 +1,33 @@
+dnl  mpn_umul_ppmm -- 1x1->2 limb multiplication
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+       mulq    r17, r18, r1
+       umulh   r17, r18, r0
+       stq     r1, 0(r16)
+       ret     r31, (r26), 1
+EPILOGUE()
+ASM_END()
diff --git a/mpn/alpha/unicos.m4 b/mpn/alpha/unicos.m4

new file mode 100644 (file)

index 0000000..f1f41c1
--- /dev/null
+++ b/mpn/alpha/unicos.m4
@@ -0,0 +1,119 @@
+divert(-1)
+
+dnl  m4 macros for alpha assembler on unicos.
+
+
+dnl  Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Note that none of the standard GMP_ASM_ autoconf tests are done for
+dnl  unicos, so none of the config.m4 results can be used here.
+
+dnl  No underscores on unicos
+define(`GSYM_PREFIX')
+
+define(`ASM_START',
+m4_assert_numargs(0)
+`      .ident  dummy')
+
+define(`X',
+m4_assert_numargs(1)
+`^X$1')
+
+define(`FLOAT64',
+m4_assert_numargs(2)
+`      .psect  $1@crud,data
+$1:    .t_floating $2
+       .endp')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,gp|noalign])
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs_range(1,2)
+`ifelse(`$2',gp,,
+`ifelse(`$2',noalign,,
+`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter
+')')')')dnl
+       .stack  192             ; What does this mean?  Only Cray knows.
+       .psect  $1@code,code,cache
+$1::')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .endp')
+
+
+dnl  Usage: LDGP(dst,src)
+dnl
+dnl  Emit an "ldgp dst,src", but only on systems using a GOT (which unicos
+dnl  doesn't).
+
+define(LDGP,
+m4_assert_numargs(2)
+)
+
+
+dnl  Usage: EXTERN(variable_name)
+define(`EXTERN',
+m4_assert_numargs(1)
+`      .extern $1')
+
+define(`DATASTART',
+m4_assert_numargs(1)
+`      .psect  $1@crud,data
+$1:')
+
+define(`DATAEND',
+m4_assert_numargs(0)
+`      .endp')
+
+define(`ASM_END',
+m4_assert_numargs(0)
+`      .end')
+
+define(`cvttqc',
+m4_assert_numargs(-1)
+`cvttq/c')
+
+dnl  Load a symbolic address into a register
+define(`LEA',
+m4_assert_numargs(2)
+       `laum   $1,  $2(r31)
+       sll     $1,  32,   $1
+       lalm    $1,  $2($1)
+       lal     $1,  $2($1)')
+
+
+dnl  Usage: ALIGN(bytes)
+dnl
+dnl  Unicos assembler .align emits zeros, even in code segments, so disable
+dnl  aligning.
+dnl
+dnl  GCC uses a macro emiting nops until the desired alignment is reached
+dnl  (see unicosmk_file_start in alpha.c).  Could do something like that if
+dnl  we cared.  The maximum desired alignment must be established at the
+dnl  start of the section though, since of course emitting nops only
+dnl  advances relative to the section beginning.
+
+define(`ALIGN',
+m4_assert_numargs(1)
+)
+
+
+divert
diff --git a/mpn/arm/README b/mpn/arm/README

new file mode 100644 (file)

index 0000000..e1ca925
--- /dev/null
+++ b/mpn/arm/README
@@ -0,0 +1,34 @@
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions for ARM processors.
+It has been optimized for StrongARM.
+
+TODO
+
+Write mpn_addmul_2.  The speed of mpn_addmul_1 is 9.75 c/l;
+mpn_addmul_2 could run at 8 c/l.  mpn_addmul_N could
+approach 6 c/l, but register shortage will make this hard.
+
+Perhaps nails is the way to go even for an embedded processor like
+this, since the umlal accumulation could be used very effectively in
+that case.  with just 2 nail bits, we should get close to 5 c/l for a
+mpn_addmul_N or mpn_mul_basecase.
diff --git a/mpn/arm/add_n.asm b/mpn/arm/add_n.asm

new file mode 100644 (file)

index 0000000..0f07917
--- /dev/null
+++ b/mpn/arm/add_n.asm
@@ -0,0 +1,69 @@
+dnl  ARM mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
+dnl  in a third limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 5 cycles/limb.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`vp',`r2')
+define(`n',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       stmfd   sp!, { r8, r9, lr }
+       movs    n, n, lsr #1
+       bcc     L(skip1)
+       ldr     r12, [up], #4
+       ldr     lr, [vp], #4
+       adds    r12, r12, lr
+       str     r12, [rp], #4
+L(skip1):
+       tst     n, #1
+       beq     L(skip2)
+       ldmia   up!, { r8, r9 }
+       ldmia   vp!, { r12, lr }
+       adcs    r8, r8, r12
+       adcs    r9, r9, lr
+       stmia   rp!, { r8, r9 }
+L(skip2):
+       bics    n, n, #1
+       beq     L(return)
+       stmfd   sp!, { r4, r5, r6, r7 }
+L(add_n_loop):
+       ldmia   up!, { r4, r5, r6, r7 }
+       ldmia   vp!, { r8, r9, r12, lr }
+       adcs    r4, r4, r8
+       ldr     r8, [rp, #12]                   C cache allocate
+       adcs    r5, r5, r9
+       adcs    r6, r6, r12
+       adcs    r7, r7, lr
+       stmia   rp!, { r4, r5, r6, r7 }
+       sub     n, n, #2
+       teq     n, #0
+       bne     L(add_n_loop)
+       ldmfd   sp!, { r4, r5, r6, r7 }
+L(return):
+       adc     r0, n, #0
+       ldmfd   sp!, { r8, r9, pc }
+EPILOGUE(mpn_add_n)
diff --git a/mpn/arm/addmul_1.asm b/mpn/arm/addmul_1.asm

new file mode 100644 (file)

index 0000000..de33f2f
--- /dev/null
+++ b/mpn/arm/addmul_1.asm
@@ -0,0 +1,107 @@
+dnl  ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
+dnl  to a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM:  7.75-9.75  (dependent on vl value)
+C XScale:        8-9     (dependent on vl value, estimated)
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+define(`rl',`r12')
+define(`ul',`r6')
+define(`r',`lr')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       stmfd   sp!, { r4-r6, lr }
+       mov     r4, #0                  C clear r4
+       adds    r0, r0, #0              C clear cy
+       tst     n, #1
+       beq     L(skip1)
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       umull   r5, r4, ul, vl
+       adds    r, rl, r5
+       str     r, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       ldr     ul, [up], #4
+       adcs    r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       adcs    r, rl, r5
+       str     r, [rp], #4
+L(skip2):
+       bics    r, n, #3
+       beq     L(return)
+
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       b       L(in)
+
+L(loop):
+       ldr     ul, [up], #4
+       adcs    r, rl, r5
+       ldr     rl, [rp, #4]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       str     r, [rp], #4
+L(in): ldr     ul, [up], #4
+       adcs    r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       ldr     ul, [up], #4
+       adcs    r, rl, r5
+       ldr     rl, [rp, #4]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       str     r, [rp], #4
+       ldr     ul, [up], #4
+       adcs    r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       sub     n, n, #4
+       bics    r, n, #3
+       bne     L(loop)
+
+       adcs    r, rl, r5
+       str     r, [rp], #4
+L(return):
+       adc     r0, r4, #0
+       ldmfd   sp!, { r4-r6, pc }
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/arm/arm-defs.m4 b/mpn/arm/arm-defs.m4

new file mode 100644 (file)

index 0000000..9d169e8
--- /dev/null
+++ b/mpn/arm/arm-defs.m4
@@ -0,0 +1,50 @@
+divert(-1)
+
+dnl  m4 macros for ARM assembler.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Standard commenting is with @, the default m4 # is for constants and we
+dnl  don't want to disable macro expansions in or after them.
+
+changecom(@)
+
+
+dnl  APCS register names.
+
+deflit(a1,r0)
+deflit(a2,r1)
+deflit(a3,r2)
+deflit(a4,r3)
+deflit(v1,r4)
+deflit(v2,r5)
+deflit(v3,r6)
+deflit(v4,r7)
+deflit(v5,r8)
+deflit(v6,r9)
+deflit(sb,r9)
+deflit(v7,r10)
+deflit(sl,r10)
+deflit(fp,r11)
+deflit(ip,r12)
+deflit(sp,r13)
+deflit(lr,r14)
+deflit(pc,r15)
+
+divert
diff --git a/mpn/arm/copyd.asm b/mpn/arm/copyd.asm

new file mode 100644 (file)

index 0000000..718b762
--- /dev/null
+++ b/mpn/arm/copyd.asm
@@ -0,0 +1,58 @@
+dnl  ARM mpn_copyd.
+
+dnl  Copyright 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This runs at 3 cycles/limb in the StrongARM.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+       mov     r12, n, lsl #2
+       sub     r12, r12, #4
+       add     rp, rp, r12                     C make rp point at last limb
+       add     up, up, r12                     C make up point at last limb
+
+       tst     n, #1
+       beq     L(skip1)
+       ldr     r3, [up], #-4
+       str     r3, [rp], #-4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldmda   up!, { r3, r12 }                C load 2 limbs
+       stmda   rp!, { r3, r12 }                C store 2 limbs
+L(skip2):
+       bics    n, n, #3
+       beq     L(return)
+       stmfd   sp!, { r7, r8, r9 }             C save regs on stack
+L(loop):
+       ldmda   up!, { r3, r8, r9, r12 }        C load 4 limbs
+       ldr     r7, [rp, #-12]                  C cache allocate
+       subs    n, n, #4
+       stmda   rp!, { r3, r8, r9, r12 }        C store 4 limbs
+       bne     L(loop)
+       ldmfd   sp!, { r7, r8, r9 }             C restore regs from stack
+L(return):
+       mov     pc, lr
+EPILOGUE(mpn_copyd)
diff --git a/mpn/arm/copyi.asm b/mpn/arm/copyi.asm

new file mode 100644 (file)

index 0000000..5ee93ac
--- /dev/null
+++ b/mpn/arm/copyi.asm
@@ -0,0 +1,53 @@
+dnl  ARM mpn_copyi.
+
+dnl  Copyright 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This runs at 3 cycles/limb in the StrongARM.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+       tst     n, #1
+       beq     L(skip1)
+       ldr     r3, [up], #4
+       str     r3, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldmia   up!, { r3, r12 }                C load 2 limbs
+       stmia   rp!, { r3, r12 }                C store 2 limbs
+L(skip2):
+       bics    n, n, #3
+       beq     L(return)
+       stmfd   sp!, { r7, r8, r9 }             C save regs on stack
+L(loop):
+       ldmia   up!, { r3, r8, r9, r12 }        C load 4 limbs
+       ldr     r7, [rp, #12]                   C cache allocate
+       subs    n, n, #4
+       stmia   rp!, { r3, r8, r9, r12 }        C store 4 limbs
+       bne     L(loop)
+       ldmfd   sp!, { r7, r8, r9 }             C restore regs from stack
+L(return):
+       mov     pc, lr
+EPILOGUE(mpn_copyi)
diff --git a/mpn/arm/gmp-mparam.h b/mpn/arm/gmp-mparam.h

new file mode 100644 (file)

index 0000000..431aa4a
--- /dev/null
+++ b/mpn/arm/gmp-mparam.h
@@ -0,0 +1,142 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 593MHz ARM (gcc50.fsffrance.org) */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     27
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                   0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           44
+
+#define MUL_TOOM22_THRESHOLD                34
+#define MUL_TOOM33_THRESHOLD               121
+#define MUL_TOOM44_THRESHOLD               191
+#define MUL_TOOM6H_THRESHOLD               366
+#define MUL_TOOM8H_THRESHOLD               547
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     191
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     117
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     137
+
+#define SQR_BASECASE_THRESHOLD              13
+#define SQR_TOOM2_THRESHOLD                 78
+#define SQR_TOOM3_THRESHOLD                141
+#define SQR_TOOM4_THRESHOLD                212
+#define SQR_TOOM6_THRESHOLD                330
+#define SQR_TOOM8_THRESHOLD                422
+
+#define MULMOD_BNM1_THRESHOLD               21
+#define SQRMOD_BNM1_THRESHOLD               25
+
+#define MUL_FFT_MODF_THRESHOLD             404  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    404, 5}, {     21, 6}, {     11, 5}, {     25, 6}, \
+    {     13, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
+    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     21, 6}, {     43, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 7}, {     55, 9}, \
+    {     15, 8}, {     31, 7}, {     63, 8}, {     43, 9}, \
+    {     23, 8}, {     55, 9}, {     31, 8}, {     71, 9}, \
+    {     39, 8}, {     83, 9}, {     47, 8}, {     99, 9}, \
+    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
+    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     95, 9}, {    191,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    287,11}, {    159,10}, \
+    {    351,11}, {    191,10}, {    415,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    319,10}, \
+    {    639,11}, {    351,12}, {    191,11}, {    415,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 79
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             400  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    400, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
+    {     32, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 7}, {     55, 9}, \
+    {     15, 8}, {     39, 9}, {     23, 8}, {     55,10}, \
+    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159, 8}, {    319,10}, {     95, 9}, {    191,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    271,10}, \
+    {    143, 9}, {    303,10}, {    159,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207,12}, {     63,11}, \
+    {    127,10}, {    303,11}, {    159,10}, {    367,11}, \
+    {    191,10}, {    415,11}, {    223,10}, {    447,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
+    {    607,11}, {    319,10}, {    639,11}, {    351,12}, \
+    {    191,11}, {    447,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 77
+#define SQR_FFT_THRESHOLD                 3136
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 120
+#define MULLO_MUL_N_THRESHOLD            11317
+
+#define DC_DIV_QR_THRESHOLD                134
+#define DC_DIVAPPR_Q_THRESHOLD             442
+#define DC_BDIV_QR_THRESHOLD               127
+#define DC_BDIV_Q_THRESHOLD                296
+
+#define INV_MULMOD_BNM1_THRESHOLD           66
+#define INV_NEWTON_THRESHOLD               458
+#define INV_APPR_THRESHOLD                 454
+
+#define BINV_NEWTON_THRESHOLD              494
+#define REDC_1_TO_REDC_N_THRESHOLD         116
+
+#define MU_DIV_QR_THRESHOLD               2914
+#define MU_DIVAPPR_Q_THRESHOLD            3091
+#define MUPI_DIV_QR_THRESHOLD              221
+#define MU_BDIV_QR_THRESHOLD              2259
+#define MU_BDIV_Q_THRESHOLD               2747
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     109
+#define GCD_DC_THRESHOLD                   697
+#define GCDEXT_DC_THRESHOLD                535
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        29
+#define SET_STR_DC_THRESHOLD               321
+#define SET_STR_PRECOMPUTE_THRESHOLD      1037
diff --git a/mpn/arm/invert_limb.asm b/mpn/arm/invert_limb.asm

new file mode 100644 (file)

index 0000000..bbc9b9a
--- /dev/null
+++ b/mpn/arm/invert_limb.asm
@@ -0,0 +1,86 @@
+dnl  ARM mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 2001, 2009, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+       ldr     r2, L(4)
+L(2):  add     r2, pc, r2
+       mov     r3, r0, lsr #23
+       mov     r3, r3, asl #1
+       ldrh    r3, [r3, r2]
+       mov     r1, r3, asl #17
+       mul     r12, r3, r3
+       umull   r3, r2, r12, r0
+       sub     r1, r1, r2, asl #1
+       umull   r3, r2, r1, r1
+       umull   r12, r3, r0, r3
+       umull   r2, r12, r0, r2
+       adds    r2, r2, r3
+       adc     r12, r12, #0
+       rsb     r1, r12, r1
+       mvn     r2, r2, lsr #30
+       add     r2, r2, r1, asl #2
+       umull   r12, r3, r0, r2
+       adds    r1, r12, r0
+       adc     r3, r3, r0
+       rsb     r0, r3, r2
+       bx      lr
+
+       ALIGN(4)
+L(4):  .word   approx_tab-8-512-L(2)
+EPILOGUE()
+
+       .section .rodata
+       ALIGN(2)
+approx_tab:
+       .short    0xffc0,0xfec0,0xfdc0,0xfcc0,0xfbc0,0xfac0,0xfa00,0xf900
+       .short    0xf800,0xf700,0xf640,0xf540,0xf440,0xf380,0xf280,0xf180
+       .short    0xf0c0,0xefc0,0xef00,0xee00,0xed40,0xec40,0xeb80,0xeac0
+       .short    0xe9c0,0xe900,0xe840,0xe740,0xe680,0xe5c0,0xe500,0xe400
+       .short    0xe340,0xe280,0xe1c0,0xe100,0xe040,0xdf80,0xdec0,0xde00
+       .short    0xdd40,0xdc80,0xdbc0,0xdb00,0xda40,0xd980,0xd8c0,0xd800
+       .short    0xd740,0xd680,0xd600,0xd540,0xd480,0xd3c0,0xd340,0xd280
+       .short    0xd1c0,0xd140,0xd080,0xcfc0,0xcf40,0xce80,0xcdc0,0xcd40
+       .short    0xcc80,0xcc00,0xcb40,0xcac0,0xca00,0xc980,0xc8c0,0xc840
+       .short    0xc780,0xc700,0xc640,0xc5c0,0xc540,0xc480,0xc400,0xc380
+       .short    0xc2c0,0xc240,0xc1c0,0xc100,0xc080,0xc000,0xbf80,0xbec0
+       .short    0xbe40,0xbdc0,0xbd40,0xbc80,0xbc00,0xbb80,0xbb00,0xba80
+       .short    0xba00,0xb980,0xb900,0xb840,0xb7c0,0xb740,0xb6c0,0xb640
+       .short    0xb5c0,0xb540,0xb4c0,0xb440,0xb3c0,0xb340,0xb2c0,0xb240
+       .short    0xb1c0,0xb140,0xb0c0,0xb080,0xb000,0xaf80,0xaf00,0xae80
+       .short    0xae00,0xad80,0xad40,0xacc0,0xac40,0xabc0,0xab40,0xaac0
+       .short    0xaa80,0xaa00,0xa980,0xa900,0xa8c0,0xa840,0xa7c0,0xa740
+       .short    0xa700,0xa680,0xa600,0xa5c0,0xa540,0xa4c0,0xa480,0xa400
+       .short    0xa380,0xa340,0xa2c0,0xa240,0xa200,0xa180,0xa140,0xa0c0
+       .short    0xa080,0xa000,0x9f80,0x9f40,0x9ec0,0x9e80,0x9e00,0x9dc0
+       .short    0x9d40,0x9d00,0x9c80,0x9c40,0x9bc0,0x9b80,0x9b00,0x9ac0
+       .short    0x9a40,0x9a00,0x9980,0x9940,0x98c0,0x9880,0x9840,0x97c0
+       .short    0x9780,0x9700,0x96c0,0x9680,0x9600,0x95c0,0x9580,0x9500
+       .short    0x94c0,0x9440,0x9400,0x93c0,0x9340,0x9300,0x92c0,0x9240
+       .short    0x9200,0x91c0,0x9180,0x9100,0x90c0,0x9080,0x9000,0x8fc0
+       .short    0x8f80,0x8f40,0x8ec0,0x8e80,0x8e40,0x8e00,0x8d80,0x8d40
+       .short    0x8d00,0x8cc0,0x8c80,0x8c00,0x8bc0,0x8b80,0x8b40,0x8b00
+       .short    0x8a80,0x8a40,0x8a00,0x89c0,0x8980,0x8940,0x88c0,0x8880
+       .short    0x8840,0x8800,0x87c0,0x8780,0x8740,0x8700,0x8680,0x8640
+       .short    0x8600,0x85c0,0x8580,0x8540,0x8500,0x84c0,0x8480,0x8440
+       .short    0x8400,0x8380,0x8340,0x8300,0x82c0,0x8280,0x8240,0x8200
+       .short    0x81c0,0x8180,0x8140,0x8100,0x80c0,0x8080,0x8040,0x8000
+ASM_END()
diff --git a/mpn/arm/mul_1.asm b/mpn/arm/mul_1.asm

new file mode 100644 (file)

index 0000000..e867351
--- /dev/null
+++ b/mpn/arm/mul_1.asm
@@ -0,0 +1,78 @@
+dnl  ARM mpn_mul_1 -- Multiply a limb vector with a limb and store the result
+dnl  in a second limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM:     6-8  (dependent on vl value)
+C XScale:        ?-?
+
+C We should rewrite this along the lines of addmul_1.asm.  That should save a
+C cycle on StrongARM, and several cycles on XScale.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       stmfd   sp!, { r8, r9, lr }
+       ands    r12, n, #1
+       beq     L(skip1)
+       ldr     lr, [up], #4
+       umull   r9, r12, lr, vl
+       str     r9, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       mov     r8, r12
+       ldmia   up!, { r12, lr }
+       mov     r9, #0
+       umlal   r8, r9, r12, vl
+       mov     r12, #0
+       umlal   r9, r12, lr, vl
+       stmia   rp!, { r8, r9 }
+L(skip2):
+       bics    n, n, #3
+       beq     L(return)
+       stmfd   sp!, { r6, r7 }
+L(loop):
+       mov     r6, r12
+       ldmia   up!, { r8, r9, r12, lr }
+       ldr     r7, [rp, #12]                   C cache allocate
+       mov     r7, #0
+       umlal   r6, r7, r8, vl
+       mov     r8, #0
+       umlal   r7, r8, r9, vl
+       mov     r9, #0
+       umlal   r8, r9, r12, vl
+       mov     r12, #0
+       umlal   r9, r12, lr, vl
+       subs    n, n, #4
+       stmia   rp!, { r6, r7, r8, r9 }
+       bne     L(loop)
+       ldmfd   sp!, { r6, r7 }
+L(return):
+       mov     r0, r12
+       ldmfd   sp!, { r8, r9, pc }
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/arm/sub_n.asm b/mpn/arm/sub_n.asm

new file mode 100644 (file)

index 0000000..7063be4
--- /dev/null
+++ b/mpn/arm/sub_n.asm
@@ -0,0 +1,71 @@
+dnl  ARM mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 5 cycles/limb.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`vp',`r2')
+define(`n',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       stmfd   sp!, { r8, r9, lr }
+       subs    r12, r12, r12
+       tst     n, #1
+       beq     L(skip1)
+       ldr     r12, [up], #4
+       ldr     lr, [vp], #4
+       subs    r12, r12, lr
+       str     r12, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldmia   up!, { r8, r9 }
+       ldmia   vp!, { r12, lr }
+       sbcs    r8, r8, r12
+       sbcs    r9, r9, lr
+       stmia   rp!, { r8, r9 }
+L(skip2):
+       bics    n, n, #3
+       beq     L(return)
+       stmfd   sp!, { r4, r5, r6, r7 }
+L(sub_n_loop):
+       ldmia   up!, { r4, r5, r6, r7 }
+       ldmia   vp!, { r8, r9, r12, lr }
+       sbcs    r4, r4, r8
+       ldr     r8, [rp, #12]                   C cache allocate
+       sbcs    r5, r5, r9
+       sbcs    r6, r6, r12
+       sbcs    r7, r7, lr
+       stmia   rp!, { r4, r5, r6, r7 }
+       sub     n, n, #4
+       teq     n, #0
+       bne     L(sub_n_loop)
+       ldmfd   sp!, { r4, r5, r6, r7 }
+L(return):
+       sbc     r0, r0, r0
+       and     r0, r0, #1
+       ldmfd   sp!, { r8, r9, pc }
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/arm/submul_1.asm b/mpn/arm/submul_1.asm

new file mode 100644 (file)

index 0000000..c365437
--- /dev/null
+++ b/mpn/arm/submul_1.asm
@@ -0,0 +1,107 @@
+dnl  ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl  result from a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM:  7.75-9.75  (dependent on vl value)
+C XScale:        8-9     (dependent on vl value, estimated)
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+define(`rl',`r12')
+define(`ul',`r6')
+define(`r',`lr')
+
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       stmfd   sp!, { r4-r6, lr }
+       subs    r4, r0, r0              C clear r4, set cy
+       tst     n, #1
+       beq     L(skip1)
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       umull   r5, r4, ul, vl
+       subs    r, rl, r5
+       str     r, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       ldr     ul, [up], #4
+       sbcs    r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       sbcs    r, rl, r5
+       str     r, [rp], #4
+L(skip2):
+       bics    r, n, #3
+       beq     L(return)
+
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       b       L(in)
+
+L(loop):
+       ldr     ul, [up], #4
+       sbcs    r, rl, r5
+       ldr     rl, [rp, #4]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       str     r, [rp], #4
+L(in): ldr     ul, [up], #4
+       sbcs    r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       ldr     ul, [up], #4
+       sbcs    r, rl, r5
+       ldr     rl, [rp, #4]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       str     r, [rp], #4
+       ldr     ul, [up], #4
+       sbcs    r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       sub     n, n, #4
+       bics    r, n, #3
+       bne     L(loop)
+
+       sbcs    r, rl, r5
+       str     r, [rp], #4
+L(return):
+       sbc     r0, r0, r0
+       sub     r0, r4, r0
+       ldmfd   sp!, { r4-r6, pc }
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/arm/udiv.asm b/mpn/arm/udiv.asm

new file mode 100644 (file)

index 0000000..9434a4f
--- /dev/null
+++ b/mpn/arm/udiv.asm
@@ -0,0 +1,93 @@
+dnl  ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.
+dnl  Return quotient and store remainder through a supplied pointer.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rem_ptr',`r0')
+define(`n1',`r1')
+define(`n0',`r2')
+define(`d',`r3')
+
+C divstep -- develop one quotient bit.  Dividend in $1$2, divisor in $3.
+C Quotient bit is shifted into $2.
+define(`divstep',
+       `adcs   $2, $2, $2
+       adc     $1, $1, $1
+       cmp     $1, $3
+       subcs   $1, $1, $3')
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+       mov     r12, #8                 C loop counter for both loops below
+       cmp     d, #0x80000000          C check divisor msb and clear carry
+       bcs     L(_large_divisor)
+
+L(oop):        divstep(n1,n0,d)
+       divstep(n1,n0,d)
+       divstep(n1,n0,d)
+       divstep(n1,n0,d)
+       sub     r12, r12, #1
+       teq     r12, #0
+       bne     L(oop)
+
+       str     n1, [ rem_ptr ]         C store remainder
+       adc     r0, n0, n0              C quotient: add last carry from divstep
+       mov     pc, lr
+
+L(_large_divisor):
+       stmfd   sp!, { r8, lr }
+
+       and     r8, n0, #1              C save lsb of dividend
+       mov     lr, n1, lsl #31
+       orrs    n0, lr, n0, lsr #1      C n0 = lo(n1n0 >> 1)
+       mov     n1, n1, lsr #1          C n1 = hi(n1n0 >> 1)
+
+       and     lr, d, #1               C save lsb of divisor
+       movs    d, d, lsr #1            C d = floor(orig_d / 2)
+       adc     d, d, #0                C d = ceil(orig_d / 2)
+
+L(oop2):
+       divstep(n1,n0,d)
+       divstep(n1,n0,d)
+       divstep(n1,n0,d)
+       divstep(n1,n0,d)
+       sub     r12, r12, #1
+       teq     r12, #0
+       bne     L(oop2)
+
+       adc     n0, n0, n0              C shift and add last carry from divstep
+       add     n1, r8, n1, lsl #1      C shift in omitted dividend lsb
+       tst     lr, lr                  C test saved divisor lsb
+       beq     L(_even_divisor)
+
+       rsb     d, lr, d, lsl #1        C restore orig d value
+       adds    n1, n1, n0              C fix remainder for omitted divisor lsb
+       addcs   n0, n0, #1              C adjust quotient if rem. fix carried
+       subcs   n1, n1, d               C adjust remainder accordingly
+       cmp     n1, d                   C remainder >= divisor?
+       subcs   n1, n1, d               C adjust remainder
+       addcs   n0, n0, #1              C adjust quotient
+
+L(_even_divisor):
+       str     n1, [ rem_ptr ]         C store remainder
+       mov     r0, n0                  C quotient
+       ldmfd   sp!, { r8, pc }
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/asm-defs.m4 b/mpn/asm-defs.m4

new file mode 100644 (file)

index 0000000..a0382d0
--- /dev/null
+++ b/mpn/asm-defs.m4
@@ -0,0 +1,1663 @@
+divert(-1)
+dnl
+dnl  m4 macros for gmp assembly code, shared by all CPUs.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  These macros are designed for use with any m4 and have been used on
+dnl  GNU, FreeBSD, NetBSD, OpenBSD and SysV.
+dnl
+dnl  GNU m4 and OpenBSD 2.7 m4 will give filenames and line numbers in error
+dnl  messages.
+dnl
+dnl
+dnl  Macros:
+dnl
+dnl  Most new m4 specific macros have an "m4_" prefix to emphasise they're
+dnl  m4 expansions.  But new defining things like deflit() and defreg() are
+dnl  named like the builtin define(), and forloop() is named following the
+dnl  GNU m4 example on which it's based.
+dnl
+dnl  GNU m4 with the -P option uses "m4_" as a prefix for builtins, but that
+dnl  option isn't going to be used, so there's no conflict or confusion.
+dnl
+dnl
+dnl  Comments in output:
+dnl
+dnl  The m4 comment delimiters are left at # and \n, the normal assembler
+dnl  commenting for most CPUs.  m4 passes comment text through without
+dnl  expanding macros in it, which is generally a good thing since it stops
+dnl  unexpected expansions and possible resultant errors.
+dnl
+dnl  But note that when a quoted string is being read, a # isn't special, so
+dnl  apostrophes in comments in quoted strings must be avoided or they'll be
+dnl  interpreted as a closing quote mark.  But when the quoted text is
+dnl  re-read # will still act like a normal comment, suppressing macro
+dnl  expansion.
+dnl
+dnl  For example,
+dnl
+dnl          # apostrophes in comments that're outside quotes are ok
+dnl          # and using macro names like PROLOGUE is ok too
+dnl          ...
+dnl          ifdef(`PIC',`
+dnl                  # but apostrophes aren't ok inside quotes
+dnl                  #                     ^--wrong
+dnl                  ...
+dnl                  # though macro names like PROLOGUE are still ok
+dnl                  ...
+dnl          ')
+dnl
+dnl  If macro expansion in a comment is wanted, use `#' in the .asm (ie. a
+dnl  quoted hash symbol), which will turn into # in the .s but get
+dnl  expansions done on that line.  This can make the .s more readable to
+dnl  humans, but it won't make a blind bit of difference to the assembler.
+dnl
+dnl  All the above applies, mutatis mutandis, when changecom() is used to
+dnl  select @ ! ; or whatever other commenting.
+dnl
+dnl
+dnl  Variations in m4 affecting gmp:
+dnl
+dnl  $# - When a macro is called as "foo" with no brackets, BSD m4 sets $#
+dnl       to 1, whereas GNU or SysV m4 set it to 0.  In all cases though
+dnl       "foo()" sets $# to 1.  This is worked around in various places.
+dnl
+dnl  len() - When "len()" is given an empty argument, BSD m4 evaluates to
+dnl       nothing, whereas GNU, SysV, and the new OpenBSD, evaluate to 0.
+dnl       See m4_length() below which works around this.
+dnl
+dnl  translit() - GNU m4 accepts character ranges like A-Z, and the new
+dnl       OpenBSD m4 does under option -g, but basic BSD and SysV don't.
+dnl
+dnl  popdef() - in BSD and SysV m4 popdef() takes multiple arguments and
+dnl       pops each, but GNU m4 only takes one argument.
+dnl
+dnl  push back - BSD m4 has some limits on the amount of text that can be
+dnl       pushed back.  The limit is reasonably big and so long as macros
+dnl       don't gratuitously duplicate big arguments it isn't a problem.
+dnl       Normally an error message is given, but sometimes it just hangs.
+dnl
+dnl  eval() &,|,^ - GNU and SysV m4 have bitwise operators &,|,^ available,
+dnl       but BSD m4 doesn't (contrary to what the man page suggests) and
+dnl       instead ^ is exponentiation.
+dnl
+dnl  eval() ?: - The C ternary operator "?:" is available in BSD m4, but not
+dnl       in SysV or GNU m4 (as of GNU m4 1.4 and betas of 1.5).
+dnl
+dnl  eval() -2^31 - BSD m4 has a bug where an eval() resulting in -2^31
+dnl       (ie. -2147483648) gives "-(".  Using -2147483648 within an
+dnl       expression is ok, it just can't be a final result.  "-(" will of
+dnl       course upset parsing, with all sorts of strange effects.
+dnl
+dnl  eval() <<,>> - SysV m4 doesn't support shift operators in eval() (on
+dnl       Solaris 7 /usr/xpg4/m4 has them but /usr/ccs/m4 doesn't).  See
+dnl       m4_lshift() and m4_rshift() below for workarounds.
+dnl
+dnl  ifdef() - OSF 4.0 m4 considers a macro defined to a zero value `0' or
+dnl       `00' etc as not defined.  See m4_ifdef below for a workaround.
+dnl
+dnl  m4wrap() sequence - in BSD m4, m4wrap() replaces any previous m4wrap()
+dnl       string, in SysV m4 it appends to it, and in GNU m4 it prepends.
+dnl       See m4wrap_prepend() below which brings uniformity to this.
+dnl
+dnl  m4wrap() 0xFF - old versions of BSD m4 store EOF in a C "char" under an
+dnl       m4wrap() and on systems where char is unsigned by default a
+dnl       spurious 0xFF is output.  This has been observed on recent Cray
+dnl       Unicos Alpha, Apple MacOS X, and HPUX 11 systems.  An autoconf
+dnl       test is used to check for this, see the m4wrap handling below.  It
+dnl       might work to end the m4wrap string with a dnl to consume the
+dnl       0xFF, but that probably induces the offending m4's to read from an
+dnl       already closed "FILE *", which could be bad on a glibc style
+dnl       stdio.
+dnl
+dnl  __file__,__line__ - GNU m4 and OpenBSD 2.7 m4 provide these, and
+dnl       they're used here to make error messages more informative.  GNU m4
+dnl       gives an unhelpful "NONE 0" in an m4wrap(), but that's worked
+dnl       around.
+dnl
+dnl  __file__ quoting - OpenBSD m4, unlike GNU m4, doesn't quote the
+dnl       filename in __file__, so care should be taken that no macro has
+dnl       the same name as a file, or an unwanted expansion will occur when
+dnl       printing an error or warning.
+dnl
+dnl  changecom() - BSD m4 changecom doesn't quite work like the man page
+dnl       suggests, in particular "changecom" or "changecom()" doesn't
+dnl       disable the comment feature, and multi-character comment sequences
+dnl       don't seem to work.  If the default `#' and newline aren't
+dnl       suitable it's necessary to change it to something else,
+dnl       eg. changecom(;).
+dnl
+dnl  OpenBSD 2.6 m4 - in this m4, eval() rejects decimal constants containing
+dnl       an 8 or 9, making it pretty much unusable.  The bug is confined to
+dnl       version 2.6 (it's not in 2.5, and was fixed in 2.7).
+dnl
+dnl  SunOS /usr/bin/m4 - this m4 lacks a number of desired features,
+dnl       including $# and $@, defn(), m4exit(), m4wrap(), pushdef(),
+dnl       popdef().  /usr/5bin/m4 is a SysV style m4 which should always be
+dnl       available, and "configure" will reject /usr/bin/m4 in favour of
+dnl       /usr/5bin/m4 (if necessary).
+dnl
+dnl       The sparc code actually has modest m4 requirements currently and
+dnl       could manage with /usr/bin/m4, but there's no reason to put our
+dnl       macros through contortions when /usr/5bin/m4 is available or GNU
+dnl       m4 can be installed.
+
+
+ifdef(`__ASM_DEFS_M4_INCLUDED__',
+`m4_error(`asm-defs.m4 already included, dont include it twice
+')m4exit(1)')
+define(`__ASM_DEFS_M4_INCLUDED__')
+
+
+dnl  Detect and give a message about the unsuitable OpenBSD 2.6 m4.
+
+ifelse(eval(89),89,,
+`errprint(
+`This m4 doesnt accept 8 and/or 9 in constants in eval(), making it unusable.
+This is probably OpenBSD 2.6 m4 (September 1999).  Upgrade to OpenBSD 2.7,
+or get a bug fix from the CVS (expr.c rev 1.9), or get GNU m4.  Dont forget
+to configure with M4=/wherever/m4 if you install one of these in a directory
+not in $PATH.
+')m4exit(1)')
+
+
+dnl  Detect and give a message about the unsuitable SunOS /usr/bin/m4.
+dnl
+dnl  Unfortunately this test doesn't work when m4 is run in the normal way
+dnl  from mpn/Makefile with "m4 -DOPERATION_foo foo.asm", since the bad m4
+dnl  takes "-" in "-D..." to mean read stdin, so it will look like it just
+dnl  hangs.  But running "m4 asm-defs.m4" to try it out will work.
+dnl
+dnl  We'd like to abort immediately on finding a problem, but unfortunately
+dnl  the bad m4 doesn't have an m4exit(), nor does an invalid eval() kill
+dnl  it.  Unexpanded $#'s in some m4_assert_numargs() later on will comment
+dnl  out some closing parentheses and kill it with "m4: arg stack overflow".
+
+define(m4_dollarhash_works_test,``$#'')
+ifelse(m4_dollarhash_works_test(x),1,,
+`errprint(
+`This m4 doesnt support $# and cant be used for GMP asm processing.
+If this is on SunOS, ./configure should choose /usr/5bin/m4 if you have that
+or can get it, otherwise install GNU m4.  Dont forget to configure with
+M4=/wherever/m4 if you install in a directory not in $PATH.
+')')
+undefine(`m4_dollarhash_works_test')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Basic error handling things.
+
+
+dnl  Usage: m4_dollarhash_1_if_noparen_p
+dnl
+dnl  Expand to 1 if a call "foo" gives $# set to 1 (as opposed to 0 like GNU
+dnl  and SysV m4 give).
+
+define(m4_dollarhash_1_if_noparen_test,`$#')
+define(m4_dollarhash_1_if_noparen_p,
+eval(m4_dollarhash_1_if_noparen_test==1))
+undefine(`m4_dollarhash_1_if_noparen_test')
+
+
+dnl  Usage: m4wrap_prepend(string)
+dnl
+dnl  Prepend the given string to what will be expanded under m4wrap at the
+dnl  end of input.
+dnl
+dnl  This macro exists to work around variations in m4wrap() behaviour in
+dnl  the various m4s (notes at the start of this file).  Don't use m4wrap()
+dnl  directly since it will interfere with this scheme.
+
+define(m4wrap_prepend,
+m4_assert_numargs(1)
+`define(`m4wrap_string',`$1'defn(`m4wrap_string'))')
+
+define(m4wrap_string,`')
+
+define(m4wrap_works_p,
+`ifelse(M4WRAP_SPURIOUS,yes,0,1)')
+
+ifelse(m4wrap_works_p,1,
+`m4wrap(`m4wrap_string')')
+
+
+dnl  Usage: m4_file_and_line
+dnl
+dnl  Expand to the current file and line number, if the GNU m4 extensions
+dnl  __file__ and __line__ are available.
+dnl
+dnl  In GNU m4 1.4 at the end of input when m4wrap text is expanded,
+dnl  __file__ is NONE and __line__ is 0, which is not a helpful thing to
+dnl  print.  If m4_file_seen() has been called to note the last file seen,
+dnl  then that file at a big line number is used, otherwise "end of input"
+dnl  is used (although "end of input" won't parse as an error message).
+
+define(m4_file_and_line,
+`ifdef(`__file__',
+`ifelse(__file__`'__line__,`NONE0',
+`ifdef(`m4_file_seen_last',`m4_file_seen_last: 999999: ',`end of input: ')',
+`__file__: __line__: ')')')
+
+
+dnl  Usage: m4_errprint_commas(arg,...)
+dnl
+dnl  The same as errprint(), but commas are printed between arguments
+dnl  instead of spaces.
+
+define(m4_errprint_commas,
+`errprint(`$1')dnl
+ifelse(eval($#>1),1,`errprint(`,')m4_errprint_commas(shift($@))')')
+
+
+dnl  Usage: m4_error(args...)
+dnl         m4_warning(args...)
+dnl
+dnl  Print an error message, using m4_errprint_commas, prefixed with the
+dnl  current filename and line number (if available).  m4_error sets up to
+dnl  give an error exit at the end of processing, m4_warning just prints.
+dnl  These macros are the recommended way to print errors.
+dnl
+dnl  The arguments here should be quoted in the usual way to prevent them
+dnl  being expanded when the macro call is read.  (m4_error takes care not
+dnl  to do any further expansion.)
+dnl
+dnl  For example,
+dnl
+dnl         m4_error(`some error message
+dnl         ')
+dnl
+dnl  which prints
+dnl
+dnl         foo.asm:123: some error message
+dnl
+dnl  or if __file__ and __line__ aren't available
+dnl
+dnl         some error message
+dnl
+dnl  The "file:line:" format is a basic style, used by gcc and GNU m4, so
+dnl  emacs and other editors will recognise it in their normal error message
+dnl  parsing.
+
+define(m4_warning,
+`m4_errprint_commas(m4_file_and_line`'$@)')
+
+define(m4_error,
+`define(`m4_error_occurred',1)m4_warning($@)dnl
+ifelse(m4wrap_works_p,0,`m4exit(1)')')
+
+define(`m4_error_occurred',0)
+
+dnl  This m4wrap_prepend() is first, so it'll be executed last.
+m4wrap_prepend(
+`ifelse(m4_error_occurred,1,
+`m4_error(`Errors occurred during m4 processing
+')m4exit(1)')')
+
+
+dnl  Usage: m4_assert_numargs(num)
+dnl
+dnl  Put this unquoted on a line on its own at the start of a macro
+dnl  definition to add some code to check that num many arguments get passed
+dnl  to the macro.  For example,
+dnl
+dnl         define(foo,
+dnl         m4_assert_numargs(2)
+dnl         `something `$1' and `$2' blah blah')
+dnl
+dnl  Then a call like foo(one,two,three) will provoke an error like
+dnl
+dnl         file:10: foo expected 2 arguments, got 3 arguments
+dnl
+dnl  Here are some calls and how many arguments they're interpreted as passing.
+dnl
+dnl         foo(abc,def)  2
+dnl         foo(xyz)      1
+dnl         foo()         0
+dnl         foo          -1
+dnl
+dnl  The -1 for no parentheses at all means a macro that's meant to be used
+dnl  that way can be checked with m4_assert_numargs(-1).  For example,
+dnl
+dnl         define(SPECIAL_SUFFIX,
+dnl         m4_assert_numargs(-1)
+dnl         `ifdef(`FOO',`_foo',`_bar')')
+dnl
+dnl  But as an alternative see also deflit() below where parenthesized
+dnl  expressions following a macro are passed through to the output.
+dnl
+dnl  Note that in BSD m4 there's no way to differentiate calls "foo" and
+dnl  "foo()", so in BSD m4 the distinction between the two isn't enforced.
+dnl  (In GNU and SysV m4 it can be checked, and is.)
+
+
+dnl  m4_assert_numargs is able to check its own arguments by calling
+dnl  assert_numargs_internal directly.
+dnl
+dnl  m4_doublequote($`'0) expands to ``$0'', whereas ``$`'0'' would expand
+dnl  to `$`'0' and do the wrong thing, and likewise for $1.  The same is
+dnl  done in other assert macros.
+dnl
+dnl  $`#' leaves $# in the new macro being defined, and stops # being
+dnl  interpreted as a comment character.
+dnl
+dnl  `dnl ' means an explicit dnl isn't necessary when m4_assert_numargs is
+dnl  used.  The space means that if there is a dnl it'll still work.
+
+dnl  Usage: m4_doublequote(x) expands to ``x''
+define(m4_doublequote,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))``$1''')
+
+define(m4_assert_numargs,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))dnl
+`m4_assert_numargs_internal'(m4_doublequote($`'0),$1,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl  Called: m4_assert_numargs_internal(`macroname',wantargs,$#,len(`$1'))
+define(m4_assert_numargs_internal,
+`m4_assert_numargs_internal_check(`$1',`$2',m4_numargs_count(`$3',`$4'))')
+
+dnl  Called: m4_assert_numargs_internal_check(`macroname',wantargs,gotargs)
+dnl
+dnl  If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl  should be -1.  If wantargs is -1 but gotargs is 0 and the two can't be
+dnl  distinguished then it's allowed to pass.
+dnl
+define(m4_assert_numargs_internal_check,
+`ifelse(eval($2 == $3
+             || ($2==-1 && $3==0 && m4_dollarhash_1_if_noparen_p)),0,
+`m4_error(`$1 expected 'm4_Narguments(`$2')`, got 'm4_Narguments(`$3')
+)')')
+
+dnl  Called: m4_numargs_count($#,len(`$1'))
+dnl  If $#==0 then -1 args, if $#==1 but len(`$1')==0 then 0 args, otherwise
+dnl  $# args.
+define(m4_numargs_count,
+`ifelse($1,0, -1,
+`ifelse(eval($1==1 && $2-0==0),1, 0, $1)')')
+
+dnl  Usage: m4_Narguments(N)
+dnl  "$1 argument" or "$1 arguments" with the plural according to $1.
+define(m4_Narguments,
+`$1 argument`'ifelse(`$1',1,,s)')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Additional error checking things.
+
+
+dnl  Usage: m4_file_seen()
+dnl
+dnl  Record __file__ for the benefit of m4_file_and_line in m4wrap text.
+dnl
+dnl  The basic __file__ macro comes out quoted in GNU m4, like `foo.asm',
+dnl  and m4_file_seen_last is defined like that too.
+dnl
+dnl  This is used by PROLOGUE, since that's normally in the main .asm file,
+dnl  and in particular it sets up m4wrap error checks for missing EPILOGUE.
+
+define(m4_file_seen,
+m4_assert_numargs(0)
+`ifelse(__file__,`NONE',,
+`define(`m4_file_seen_last',m4_doublequote(__file__))')')
+
+
+dnl  Usage: m4_assert_onearg()
+dnl
+dnl  Put this, unquoted, at the start of a macro definition to add some code
+dnl  to check that one argument is passed to the macro, but with that
+dnl  argument allowed to be empty.  For example,
+dnl
+dnl          define(foo,
+dnl          m4_assert_onearg()
+dnl          `blah blah $1 blah blah')
+dnl
+dnl  Calls "foo(xyz)" or "foo()" are accepted.  A call "foo(xyz,abc)" fails.
+dnl  A call "foo" fails too, but BSD m4 can't detect this case (GNU and SysV
+dnl  m4 can).
+
+define(m4_assert_onearg,
+m4_assert_numargs(0)
+`m4_assert_onearg_internal'(m4_doublequote($`'0),$`#')`dnl ')
+
+dnl  Called: m4_assert_onearg(`macroname',$#)
+define(m4_assert_onearg_internal,
+`ifelse($2,1,,
+`m4_error(`$1 expected 1 argument, got 'm4_Narguments(`$2')
+)')')
+
+
+dnl  Usage: m4_assert_numargs_range(low,high)
+dnl
+dnl  Put this, unquoted, at the start of a macro definition to add some code
+dnl  to check that between low and high many arguments get passed to the
+dnl  macro.  For example,
+dnl
+dnl         define(foo,
+dnl         m4_assert_numargs_range(3,5)
+dnl         `mandatory $1 $2 $3 optional $4 $5 end')
+dnl
+dnl  See m4_assert_numargs() for more info.
+
+define(m4_assert_numargs_range,
+m4_assert_numargs(2)
+``m4_assert_numargs_range_internal'(m4_doublequote($`'0),$1,$2,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl  Called: m4_assert_numargs_range_internal(`name',low,high,$#,len(`$1'))
+define(m4_assert_numargs_range_internal,
+m4_assert_numargs(5)
+`m4_assert_numargs_range_check(`$1',`$2',`$3',m4_numargs_count(`$4',`$5'))')
+
+dnl  Called: m4_assert_numargs_range_check(`name',low,high,gotargs)
+dnl
+dnl  If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl  should be -1.  To ensure a `high' of -1 works, a fudge is applied to
+dnl  gotargs if it's 0 and the 0 and -1 cases can't be distinguished.
+dnl
+define(m4_assert_numargs_range_check,
+m4_assert_numargs(4)
+`ifelse(eval($2 <= $4 &&
+             ($4 - ($4==0 && m4_dollarhash_1_if_noparen_p) <= $3)),0,
+`m4_error(`$1 expected $2 to $3 arguments, got 'm4_Narguments(`$4')
+)')')
+
+
+dnl  Usage: m4_assert_defined(symbol)
+dnl
+dnl  Put this unquoted on a line of its own at the start of a macro
+dnl  definition to add some code to check that the given symbol is defined
+dnl  when the macro is used.  For example,
+dnl
+dnl          define(foo,
+dnl          m4_assert_defined(`FOO_PREFIX')
+dnl          `FOO_PREFIX whatever')
+dnl
+dnl  This is a convenient way to check that the user or ./configure or
+dnl  whatever has defined the things needed by a macro, as opposed to
+dnl  silently generating garbage.
+
+define(m4_assert_defined,
+m4_assert_numargs(1)
+``m4_assert_defined_internal'(m4_doublequote($`'0),``$1'')`dnl '')
+
+dnl  Called: m4_assert_defined_internal(`macroname',`define_required')
+define(m4_assert_defined_internal,
+m4_assert_numargs(2)
+`m4_ifdef(`$2',,
+`m4_error(`$1 needs $2 defined
+')')')
+
+
+dnl  Usage: m4_not_for_expansion(`SYMBOL')
+dnl         define_not_for_expansion(`SYMBOL')
+dnl
+dnl  m4_not_for_expansion turns SYMBOL, if defined, into something which
+dnl  will give an error if expanded.  For example,
+dnl
+dnl         m4_not_for_expansion(`PIC')
+dnl
+dnl  define_not_for_expansion is the same, but always makes a definition.
+dnl
+dnl  These are for symbols that should be tested with ifdef(`FOO',...)
+dnl  rather than be expanded as such.  They guard against accidentally
+dnl  omitting the quotes, as in ifdef(FOO,...).  Note though that they only
+dnl  catches this when FOO is defined, so be sure to test code both with and
+dnl  without each definition.
+
+define(m4_not_for_expansion,
+m4_assert_numargs(1)
+`ifdef(`$1',`define_not_for_expansion(`$1')')')
+
+define(define_not_for_expansion,
+m4_assert_numargs(1)
+`ifelse(defn(`$1'),,,
+`m4_error(``$1' has a non-empty value, maybe it shouldnt be munged with m4_not_for_expansion()
+')')dnl
+define(`$1',`m4_not_for_expansion_internal(`$1')')')
+
+define(m4_not_for_expansion_internal,
+`m4_error(``$1' is not meant to be expanded, perhaps you mean `ifdef(`$1',...)'
+')')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Various generic m4 things.
+
+
+dnl  Usage: m4_unquote(macro)
+dnl
+dnl  Allow the argument text to be re-evaluated.  This is useful for "token
+dnl  pasting" like m4_unquote(foo`'bar).
+
+define(m4_unquote,
+m4_assert_onearg()
+`$1')
+
+
+dnl  Usage: m4_ifdef(name,yes[,no])
+dnl
+dnl  Expand to the yes argument if name is defined, or to the no argument if
+dnl  not.
+dnl
+dnl  This is the same as the builtin "ifdef", but avoids an OSF 4.0 m4 bug
+dnl  in which a macro with a zero value `0' or `00' etc is considered not
+dnl  defined.
+dnl
+dnl  There's no particular need to use this everywhere, only if there might
+dnl  be a zero value.
+
+define(m4_ifdef,
+m4_assert_numargs_range(2,3)
+`ifelse(eval(ifdef(`$1',1,0)+m4_length(defn(`$1'))),0,
+`$3',`$2')')
+
+
+dnl  Usage: m4_ifdef_anyof_p(`symbol',...)
+dnl
+dnl  Expand to 1 if any of the symbols in the argument list are defined, or
+dnl  to 0 if not.
+
+define(m4_ifdef_anyof_p,
+`ifelse(eval($#<=1 && m4_length(`$1')==0),1, 0,
+`ifdef(`$1', 1,
+`m4_ifdef_anyof_p(shift($@))')')')
+
+
+dnl  Usage: m4_length(string)
+dnl
+dnl  Determine the length of a string.  This is the same as len(), but
+dnl  always expands to a number, working around the BSD len() which
+dnl  evaluates to nothing given an empty argument.
+
+define(m4_length,
+m4_assert_onearg()
+`eval(len(`$1')-0)')
+
+
+dnl  Usage: m4_stringequal_p(x,y)
+dnl
+dnl  Expand to 1 or 0 according as strings x and y are equal or not.
+
+define(m4_stringequal_p,
+`ifelse(`$1',`$2',1,0)')
+
+
+dnl  Usage: m4_incr_or_decr(n,last)
+dnl
+dnl  Do an incr(n) or decr(n), whichever is in the direction of "last".
+dnl  Both n and last must be numbers of course.
+
+define(m4_incr_or_decr,
+m4_assert_numargs(2)
+`ifelse(eval($1<$2),1,incr($1),decr($1))')
+
+
+dnl  Usage: forloop(i, first, last, statement)
+dnl
+dnl  Based on GNU m4 examples/forloop.m4, but extended.
+dnl
+dnl  statement is expanded repeatedly, with i successively defined as
+dnl
+dnl         first, first+1, ..., last-1, last
+dnl
+dnl  Or if first > last, then it's
+dnl
+dnl         first, first-1, ..., last+1, last
+dnl
+dnl  If first == last, then one expansion is done.
+dnl
+dnl  A pushdef/popdef of i is done to preserve any previous definition (or
+dnl  lack of definition).  first and last are eval()ed and so can be
+dnl  expressions.
+dnl
+dnl  forloop_first is defined to 1 on the first iteration, 0 on the rest.
+dnl  forloop_last is defined to 1 on the last iteration, 0 on the others.
+dnl  Nested forloops are allowed, in which case forloop_first and
+dnl  forloop_last apply to the innermost loop that's open.
+dnl
+dnl  A simple example,
+dnl
+dnl         forloop(i, 1, 2*2+1, `dnl
+dnl         iteration number i ... ifelse(forloop_first,1,FIRST)
+dnl         ')
+
+
+dnl  "i" and "statement" are carefully quoted, but "first" and "last" are
+dnl  just plain numbers once eval()ed.
+
+define(`forloop',
+m4_assert_numargs(4)
+`pushdef(`$1',eval(`$2'))dnl
+pushdef(`forloop_first',1)dnl
+pushdef(`forloop_last',0)dnl
+forloop_internal(`$1',eval(`$3'),`$4')`'dnl
+popdef(`forloop_first')dnl
+popdef(`forloop_last')dnl
+popdef(`$1')')
+
+dnl  Called: forloop_internal(`var',last,statement)
+define(`forloop_internal',
+m4_assert_numargs(3)
+`ifelse($1,$2,
+`define(`forloop_last',1)$3',
+`$3`'dnl
+define(`forloop_first',0)dnl
+define(`$1',m4_incr_or_decr($1,$2))dnl
+forloop_internal(`$1',$2,`$3')')')
+
+
+dnl  Usage: foreach(var,body, item1,item2,...,itemN)
+dnl
+dnl  For each "item" argument, define "var" to that value and expand "body".
+dnl  For example,
+dnl
+dnl         foreach(i, `something i
+dnl         ', one, two)
+dnl  gives
+dnl         something one
+dnl         something two
+dnl
+dnl  Any previous definition of "var", or lack thereof, is saved and
+dnl  restored.  Empty "item"s are not allowed.
+
+define(foreach,
+m4_assert_numargs_range(2,1000)
+`ifelse(`$3',,,
+`pushdef(`$1',`$3')$2`'popdef(`$1')dnl
+foreach(`$1',`$2',shift(shift(shift($@))))')')
+
+
+dnl  Usage: m4_toupper(x)
+dnl         m4_tolower(x)
+dnl
+dnl  Convert the argument string to upper or lower case, respectively.
+dnl  Only one argument accepted.
+dnl
+dnl  BSD m4 doesn't take ranges like a-z in translit(), so the full alphabet
+dnl  is written out.
+
+define(m4_alphabet_lower, `abcdefghijklmnopqrstuvwxyz')
+define(m4_alphabet_upper, `ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+
+define(m4_toupper,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_lower, m4_alphabet_upper)')
+
+define(m4_tolower,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_upper, m4_alphabet_lower)')
+
+
+dnl  Usage: m4_empty_if_zero(x)
+dnl
+dnl  Evaluate to x, or to nothing if x is 0.  x is eval()ed and so can be an
+dnl  expression.
+dnl
+dnl  This is useful for x86 addressing mode displacements since forms like
+dnl  (%ebx) are one byte shorter than 0(%ebx).  A macro `foo' for use as
+dnl  foo(%ebx) could be defined with the following so it'll be empty if the
+dnl  expression comes out zero.
+dnl
+dnl       deflit(`foo', `m4_empty_if_zero(a+b*4-c)')
+dnl
+dnl  Naturally this shouldn't be done if, say, a computed jump depends on
+dnl  the code being a particular size.
+
+define(m4_empty_if_zero,
+m4_assert_onearg()
+`ifelse(eval($1),0,,eval($1))')
+
+
+dnl  Usage: m4_log2(x)
+dnl
+dnl  Calculate a logarithm to base 2.
+dnl  x must be an integral power of 2, between 2**0 and 2**30.
+dnl  x is eval()ed, so it can be an expression.
+dnl  An error results if x is invalid.
+dnl
+dnl  2**31 isn't supported, because an unsigned 2147483648 is out of range
+dnl  of a 32-bit signed int.  Also, the bug in BSD m4 where an eval()
+dnl  resulting in 2147483648 (or -2147483648 as the case may be) gives `-('
+dnl  means tests like eval(1<<31==(x)) would be necessary, but that then
+dnl  gives an unattractive explosion of eval() error messages if x isn't
+dnl  numeric.
+
+define(m4_log2,
+m4_assert_numargs(1)
+`m4_log2_internal(0,1,eval(`$1'))')
+
+dnl  Called: m4_log2_internal(n,2**n,target)
+define(m4_log2_internal,
+m4_assert_numargs(3)
+`ifelse($2,$3,$1,
+`ifelse($1,30,
+`m4_error(`m4_log2() argument too big or not a power of two: $3
+')',
+`m4_log2_internal(incr($1),eval(2*$2),$3)')')')
+
+
+dnl  Usage:  m4_div2_towards_zero
+dnl
+dnl  m4 division is probably whatever a C signed division is, and C doesn't
+dnl  specify what rounding gets used on negatives, so this expression forces
+dnl  a rounding towards zero.
+
+define(m4_div2_towards_zero,
+m4_assert_numargs(1)
+`eval((($1) + ((($1)<0) & ($1))) / 2)')
+
+
+dnl  Usage: m4_lshift(n,count)
+dnl         m4_rshift(n,count)
+dnl
+dnl  Calculate n shifted left or right by count many bits.  Both n and count
+dnl  are eval()ed and so can be expressions.
+dnl
+dnl  Negative counts are allowed and mean a shift in the opposite direction.
+dnl  Negative n is allowed and right shifts will be arithmetic (meaning
+dnl  divide by 2**count, rounding towards zero, also meaning the sign bit is
+dnl  duplicated).
+dnl
+dnl  Use these macros instead of << and >> in eval() since the basic ccs
+dnl  SysV m4 doesn't have those operators.
+
+define(m4_rshift,
+m4_assert_numargs(2)
+`m4_lshift(`$1',-(`$2'))')
+
+define(m4_lshift,
+m4_assert_numargs(2)
+`m4_lshift_internal(eval(`$1'),eval(`$2'))')
+
+define(m4_lshift_internal,
+m4_assert_numargs(2)
+`ifelse(eval($2-0==0),1,$1,
+`ifelse(eval($2>0),1,
+`m4_lshift_internal(eval($1*2),decr($2))',
+`m4_lshift_internal(m4_div2_towards_zero($1),incr($2))')')')
+
+
+dnl  Usage: m4_popcount(n)
+dnl
+dnl  Expand to the number 1 bits in n.
+
+define(m4_popcount,
+m4_assert_numargs(1)
+`m4_popcount_internal(0,eval(`$1'))')
+
+dnl  Called: m4_popcount_internal(count,rem)
+define(m4_popcount_internal,
+m4_assert_numargs(2)
+`ifelse($2,0,$1,
+`m4_popcount_internal(eval($1+($2%2)),eval($2/2))')')
+
+
+dnl  Usage: m4_count_trailing_zeros(N)
+dnl
+dnl  Determine the number of trailing zero bits on N.  N is eval()ed and so
+dnl  can be an expression.  If N is zero an error is generated.
+
+define(m4_count_trailing_zeros,
+m4_assert_numargs(1)
+`m4_count_trailing_zeros_internal(eval(`$1'),0)')
+
+dnl  Called: m4_count_trailing_zeros_internal(val,count)
+define(m4_count_trailing_zeros_internal,
+m4_assert_numargs(2)
+`ifelse($1,0,
+`m4_error(`m4_count_trailing_zeros() given a zero value')',
+`ifelse(eval(($1)%2),1,`$2',
+`m4_count_trailing_zeros_internal(eval($1/2),incr($2))')')')
+
+
+dnl  Usage: deflit(name,value)
+dnl
+dnl  Like define(), but "name" expands like a literal, rather than taking
+dnl  arguments.  For example "name(%eax)" expands to "value(%eax)".
+dnl
+dnl  Limitations:
+dnl
+dnl  $ characters in the value part must have quotes to stop them looking
+dnl  like macro parameters.  For example, deflit(reg,`123+$`'4+567').  See
+dnl  defreg() below for handling simple register definitions like $7 etc.
+dnl
+dnl  "name()" is turned into "name", unfortunately.  In GNU and SysV m4 an
+dnl  error is generated when this happens, but in BSD m4 it will happen
+dnl  silently.  The problem is that in BSD m4 $# is 1 in both "name" or
+dnl  "name()", so there's no way to differentiate them.  Because we want
+dnl  plain "name" to turn into plain "value", we end up with "name()"
+dnl  turning into plain "value" too.
+dnl
+dnl  "name(foo)" will lose any whitespace after commas in "foo", for example
+dnl  "disp(%eax, %ecx)" would become "128(%eax,%ecx)".
+dnl
+dnl  These parentheses oddities shouldn't matter in assembler text, but if
+dnl  they do the suggested workaround is to write "name ()" or "name (foo)"
+dnl  to stop the parentheses looking like a macro argument list.  If a space
+dnl  isn't acceptable in the output, then write "name`'()" or "name`'(foo)".
+dnl  The `' is stripped when read, but again stops the parentheses looking
+dnl  like parameters.
+
+dnl  Quoting for deflit_emptyargcheck is similar to m4_assert_numargs.  The
+dnl  stuff in the ifelse gives a $#, $1 and $@ evaluated in the new macro
+dnl  created, not in deflit.
+define(deflit,
+m4_assert_numargs(2)
+`define(`$1',
+`deflit_emptyargcheck'(``$1'',$`#',m4_doublequote($`'1))`dnl
+$2`'dnl
+ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')
+
+dnl  Called: deflit_emptyargcheck(macroname,$#,`$1')
+define(deflit_emptyargcheck,
+`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)
+')')')
+
+
+dnl  Usage: m4_assert(`expr')
+dnl
+dnl  Test a compile-time requirement with an m4 expression.  The expression
+dnl  should be quoted, and will be eval()ed and expected to be non-zero.
+dnl  For example,
+dnl
+dnl         m4_assert(`FOO*2+6 < 14')
+
+define(m4_assert,
+m4_assert_numargs(1)
+`ifelse(eval($1),1,,
+`m4_error(`assertion failed: $1
+')')')
+
+
+dnl  Usage: m4_repeat(count,text)
+dnl
+dnl  Expand to the given repetitions of the given text.  A zero count is
+dnl  allowed, and expands to nothing.
+
+define(m4_repeat,
+m4_assert_numargs(2)
+`m4_repeat_internal(eval($1),`$2')')
+
+define(m4_repeat_internal,
+m4_assert_numargs(2)
+`ifelse(`$1',0,,
+`forloop(m4_repeat_internal_counter,1,$1,``$2'')')')
+
+
+dnl  Usage: m4_hex_lowmask(bits)
+dnl
+dnl  Generate a hex constant which is a low mask of the given number of
+dnl  bits.  For example m4_hex_lowmask(10) would give 0x3ff.
+
+define(m4_hex_lowmask,
+m4_assert_numargs(1)
+`m4_cpu_hex_constant(m4_hex_lowmask_internal1(eval(`$1')))')
+
+dnl  Called: m4_hex_lowmask_internal1(bits)
+define(m4_hex_lowmask_internal1,
+m4_assert_numargs(1)
+`ifelse($1,0,`0',
+`m4_hex_lowmask_internal2(eval(($1)%4),eval(($1)/4))')')
+
+dnl  Called: m4_hex_lowmask_internal(remainder,digits)
+define(m4_hex_lowmask_internal2,
+m4_assert_numargs(2)
+`ifelse($1,1,`1',
+`ifelse($1,2,`3',
+`ifelse($1,3,`7')')')dnl
+m4_repeat($2,`f')')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  The following m4_list functions take a list as multiple arguments.
+dnl  Arguments are evaluated multiple times, there's no attempt at strict
+dnl  quoting.  Empty list elements are not allowed, since an empty final
+dnl  argument is ignored.  These restrictions don't affect the current uses,
+dnl  and make the implementation easier.
+
+
+dnl  Usage: m4_list_quote(list,...)
+dnl
+dnl  Produce a list with quoted commas, so it can be a single argument
+dnl  string.  For instance m4_list_quote(a,b,c) gives
+dnl
+dnl         a`,'b`,'c`,'
+dnl
+dnl  This can be used to put a list in a define,
+dnl
+dnl         define(foolist, m4_list_quote(a,b,c))
+dnl
+dnl  Which can then be used for instance as
+dnl
+dnl         m4_list_find(target, foolist)
+
+define(m4_list_quote,
+`ifelse(`$1',,,
+`$1`,'m4_list_quote(shift($@))')')
+
+
+dnl  Usage: m4_list_find(key,list,...)
+dnl
+dnl  Evaluate to 1 or 0 according to whether key is in the list elements.
+
+define(m4_list_find,
+m4_assert_numargs_range(1,1000)
+`ifelse(`$2',,0,
+`ifelse(`$1',`$2',1,
+`m4_list_find(`$1',shift(shift($@)))')')')
+
+
+dnl  Usage: m4_list_remove(key,list,...)
+dnl
+dnl  Evaluate to the given list with `key' removed (if present).
+
+define(m4_list_remove,
+m4_assert_numargs_range(1,1000)
+`ifelse(`$2',,,
+`ifelse(`$1',`$2',,`$2,')dnl
+m4_list_remove(`$1',shift(shift($@)))')')
+
+
+dnl  Usage: m4_list_first(list,...)
+dnl
+dnl  Evaluate to the first element of the list (if any).
+
+define(m4_list_first,`$1')
+
+
+dnl  Usage: m4_list_count(list,...)
+dnl
+dnl  Evaluate to the number of elements in the list.  This can't just use $#
+dnl  because the last element might be empty.
+
+define(m4_list_count,
+`m4_list_count_internal(0,$@)')
+
+dnl  Called: m4_list_internal(count,list,...)
+define(m4_list_count_internal,
+m4_assert_numargs_range(1,1000)
+`ifelse(`$2',,$1,
+`m4_list_count_internal(eval($1+1),shift(shift($@)))')')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Various assembler things, not specific to any particular CPU.
+dnl
+
+
+dnl  Usage: include_mpn(`filename')
+dnl
+dnl  Like include(), but adds a path to the mpn source directory.  For
+dnl  example,
+dnl
+dnl         include_mpn(`sparc64/addmul_1h.asm')
+
+define(include_mpn,
+m4_assert_numargs(1)
+m4_assert_defined(`CONFIG_TOP_SRCDIR')
+`include(CONFIG_TOP_SRCDIR`/mpn/$1')')
+
+
+dnl  Usage: C comment ...
+dnl
+dnl  This works like a FORTRAN-style comment character.  It can be used for
+dnl  comments to the right of assembly instructions, where just dnl would
+dnl  remove the newline and concatenate adjacent lines.
+dnl
+dnl  C and/or dnl are useful when an assembler doesn't support comments, or
+dnl  where different assemblers for a particular CPU need different styles.
+dnl  The intermediate ".s" files will end up with no comments, just code.
+dnl
+dnl  Using C is not intended to cause offence to anyone who doesn't like
+dnl  FORTRAN; but if that happens it's an unexpected bonus.
+dnl
+dnl  During development, if comments are wanted in the .s files to help see
+dnl  what's expanding where, C can be redefined with something like
+dnl
+dnl         define(`C',`#')
+
+define(C, `
+dnl')
+
+
+dnl  Normally PIC is defined (or not) by libtool, but it doesn't set it on
+dnl  systems which are always PIC.  PIC_ALWAYS established in config.m4
+dnl  identifies these for us.
+
+ifelse(`PIC_ALWAYS',`yes',`define(`PIC')')
+
+
+dnl  Various possible defines passed from the Makefile that are to be tested
+dnl  with ifdef() rather than be expanded.
+
+m4_not_for_expansion(`PIC')
+m4_not_for_expansion(`DLL_EXPORT')
+
+dnl  aors_n
+m4_not_for_expansion(`OPERATION_add_n')
+m4_not_for_expansion(`OPERATION_sub_n')
+
+dnl  aorsmul_1
+m4_not_for_expansion(`OPERATION_addmul_1')
+m4_not_for_expansion(`OPERATION_submul_1')
+
+dnl  logops_n
+m4_not_for_expansion(`OPERATION_and_n')
+m4_not_for_expansion(`OPERATION_andn_n')
+m4_not_for_expansion(`OPERATION_nand_n')
+m4_not_for_expansion(`OPERATION_ior_n')
+m4_not_for_expansion(`OPERATION_iorn_n')
+m4_not_for_expansion(`OPERATION_nior_n')
+m4_not_for_expansion(`OPERATION_xor_n')
+m4_not_for_expansion(`OPERATION_xnor_n')
+
+dnl  popham
+m4_not_for_expansion(`OPERATION_popcount')
+m4_not_for_expansion(`OPERATION_hamdist')
+
+dnl  lorrshift
+m4_not_for_expansion(`OPERATION_lshift')
+m4_not_for_expansion(`OPERATION_rshift')
+
+dnl  aorslsh1_n
+m4_not_for_expansion(`OPERATION_addlsh1_n')
+m4_not_for_expansion(`OPERATION_sublsh1_n')
+m4_not_for_expansion(`OPERATION_rsblsh1_n')
+
+dnl  aorslsh2_n
+m4_not_for_expansion(`OPERATION_addlsh2_n')
+m4_not_for_expansion(`OPERATION_sublsh2_n')
+m4_not_for_expansion(`OPERATION_rsblsh2_n')
+
+dnl  rsh1aors_n
+m4_not_for_expansion(`OPERATION_rsh1add_n')
+m4_not_for_expansion(`OPERATION_rsh1sub_n')
+
+
+dnl  Usage: m4_config_gmp_mparam(`symbol')
+dnl
+dnl  Check that `symbol' is defined.  If it isn't, issue an error and
+dnl  terminate immediately.  The error message explains that the symbol
+dnl  should be in config.m4, copied from gmp-mparam.h.
+dnl
+dnl  Termination is immediate since missing say SQR_TOOM2_THRESHOLD can
+dnl  lead to infinite loops and endless error messages.
+
+define(m4_config_gmp_mparam,
+m4_assert_numargs(1)
+`ifdef(`$1',,
+`m4_error(`$1 is not defined.
+       "configure" should have extracted this from gmp-mparam.h and put it
+       in config.m4 (or in <cpu>_<file>.asm for a fat binary), but somehow
+        this has failed.
+')m4exit(1)')')
+
+
+dnl  Usage: defreg(name,reg)
+dnl
+dnl  Give a name to a $ style register.  For example,
+dnl
+dnl         defreg(foo,$12)
+dnl
+dnl  defreg() inserts an extra pair of quotes after the $ so that it's not
+dnl  interpreted as an m4 macro parameter, ie. foo is actually $`'12.  m4
+dnl  strips those quotes when foo is expanded.
+dnl
+dnl  deflit() is used to make the new definition, so it will expand
+dnl  literally even if followed by parentheses ie. foo(99) will become
+dnl  $12(99).  (But there's nowhere that would be used is there?)
+dnl
+dnl  When making further definitions from existing defreg() macros, remember
+dnl  to use defreg() again to protect the $ in the new definitions too.  For
+dnl  example,
+dnl
+dnl         defreg(a0,$4)
+dnl         defreg(a1,$5)
+dnl         ...
+dnl
+dnl         defreg(PARAM_DST,a0)
+dnl
+dnl  This is only because a0 is expanding at the time the PARAM_DST
+dnl  definition is made, leaving a literal $4 that must be re-quoted.  On
+dnl  the other hand in something like the following ra is only expanded when
+dnl  ret is used and its $`'31 protection will have its desired effect at
+dnl  that time.
+dnl
+dnl         defreg(ra,$31)
+dnl         ...
+dnl         define(ret,`j ra')
+dnl
+dnl  Note that only $n forms are meant to be used here, and something like
+dnl  128($30) doesn't get protected and will come out wrong.
+
+define(defreg,
+m4_assert_numargs(2)
+`deflit(`$1',
+substr(`$2',0,1)``''substr(`$2',1))')
+
+
+dnl  Usage: m4_instruction_wrapper()
+dnl
+dnl  Put this, unquoted, on a line on its own, at the start of a macro
+dnl  that's a wrapper around an assembler instruction.  It adds code to give
+dnl  a descriptive error message if the macro is invoked without arguments.
+dnl
+dnl  For example, suppose jmp needs to be wrapped,
+dnl
+dnl         define(jmp,
+dnl         m4_instruction_wrapper()
+dnl         m4_assert_numargs(1)
+dnl                 `.byte 0x42
+dnl                 .long  $1
+dnl                 nop')
+dnl
+dnl  The point of m4_instruction_wrapper is to get a better error message
+dnl  than m4_assert_numargs would give if jmp is accidentally used as plain
+dnl  "jmp foo" instead of the intended "jmp( foo)".  "jmp()" with no
+dnl  argument also provokes the error message.
+dnl
+dnl  m4_instruction_wrapper should only be used with wrapped instructions
+dnl  that take arguments, since obviously something meant to be used as say
+dnl  plain "ret" doesn't want to give an error when used that way.
+
+define(m4_instruction_wrapper,
+m4_assert_numargs(0)
+``m4_instruction_wrapper_internal'(m4_doublequote($`'0),dnl
+ifdef(`__file__',`m4_doublequote(__file__)',``the m4 sources''),dnl
+$`#',m4_doublequote($`'1))`dnl'')
+
+dnl  Called: m4_instruction_wrapper_internal($0,`filename',$#,$1)
+define(m4_instruction_wrapper_internal,
+`ifelse(eval($3<=1 && m4_length(`$4')==0),1,
+`m4_error(`$1 is a macro replacing that instruction and needs arguments, see $2 for details
+')')')
+
+
+dnl  Usage: m4_cpu_hex_constant(string)
+dnl
+dnl  Expand to the string prefixed by a suitable `0x' hex marker.  This
+dnl  should be redefined as necessary for CPUs with different conventions.
+
+define(m4_cpu_hex_constant,
+m4_assert_numargs(1)
+`0x`$1'')
+
+
+dnl  Usage: UNROLL_LOG2, UNROLL_MASK, UNROLL_BYTES
+dnl         CHUNK_LOG2, CHUNK_MASK, CHUNK_BYTES
+dnl
+dnl  When code supports a variable amount of loop unrolling, the convention
+dnl  is to define UNROLL_COUNT to the number of limbs processed per loop.
+dnl  When testing code this can be varied to see how much the loop overhead
+dnl  is costing.  For example,
+dnl
+dnl         deflit(UNROLL_COUNT, 32)
+dnl
+dnl  If the forloop() generating the unrolled loop has a pattern processing
+dnl  more than one limb, the convention is to express this with CHUNK_COUNT.
+dnl  For example,
+dnl
+dnl         deflit(CHUNK_COUNT, 2)
+dnl
+dnl  The LOG2, MASK and BYTES definitions below are derived from these COUNT
+dnl  definitions.  If COUNT is redefined, the LOG2, MASK and BYTES follow
+dnl  the new definition automatically.
+dnl
+dnl  LOG2 is the log base 2 of COUNT.  MASK is COUNT-1, which can be used as
+dnl  a bit mask.  BYTES is BYTES_PER_MP_LIMB*COUNT, the number of bytes
+dnl  processed in each unrolled loop.
+dnl
+dnl  BYTES_PER_MP_LIMB is defined in a CPU specific m4 include file.  It
+dnl  exists only so the BYTES definitions here can be common to all CPUs.
+dnl  In the actual code for a given CPU, an explicit 4 or 8 may as well be
+dnl  used because the code is only for a particular CPU, it doesn't need to
+dnl  be general.
+dnl
+dnl  Note that none of these macros do anything except give conventional
+dnl  names to commonly used things.  You still have to write your own
+dnl  expressions for a forloop() and the resulting address displacements.
+dnl  Something like the following would be typical for 4 bytes per limb.
+dnl
+dnl         forloop(`i',0,UNROLL_COUNT-1,`
+dnl                 deflit(`disp',eval(i*4))
+dnl                 ...
+dnl         ')
+dnl
+dnl  Or when using CHUNK_COUNT,
+dnl
+dnl         forloop(`i',0,UNROLL_COUNT/CHUNK_COUNT-1,`
+dnl                 deflit(`disp0',eval(i*CHUNK_COUNT*4))
+dnl                 deflit(`disp1',eval(disp0+4))
+dnl                 ...
+dnl         ')
+dnl
+dnl  Clearly `i' can be run starting from 1, or from high to low or whatever
+dnl  best suits.
+
+deflit(UNROLL_LOG2,
+m4_assert_defined(`UNROLL_COUNT')
+`m4_log2(UNROLL_COUNT)')
+
+deflit(UNROLL_MASK,
+m4_assert_defined(`UNROLL_COUNT')
+`eval(UNROLL_COUNT-1)')
+
+deflit(UNROLL_BYTES,
+m4_assert_defined(`UNROLL_COUNT')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(UNROLL_COUNT * BYTES_PER_MP_LIMB)')
+
+deflit(CHUNK_LOG2,
+m4_assert_defined(`CHUNK_COUNT')
+`m4_log2(CHUNK_COUNT)')
+
+deflit(CHUNK_MASK,
+m4_assert_defined(`CHUNK_COUNT')
+`eval(CHUNK_COUNT-1)')
+
+deflit(CHUNK_BYTES,
+m4_assert_defined(`CHUNK_COUNT')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(CHUNK_COUNT * BYTES_PER_MP_LIMB)')
+
+
+dnl  Usage: MPN(name)
+dnl
+dnl  Add MPN_PREFIX to a name.
+dnl  MPN_PREFIX defaults to "__gmpn_" if not defined.
+dnl
+dnl  m4_unquote is used in MPN so that when it expands to say __gmpn_foo,
+dnl  that identifier will be subject to further macro expansion.  This is
+dnl  used by some of the fat binary support for renaming symbols.
+
+ifdef(`MPN_PREFIX',,
+`define(`MPN_PREFIX',`__gmpn_')')
+
+define(MPN,
+m4_assert_numargs(1)
+`m4_unquote(MPN_PREFIX`'$1)')
+
+
+dnl  Usage: mpn_add_n, etc
+dnl
+dnl  Convenience definitions using MPN(), like the #defines in gmp.h.  Each
+dnl  function that might be implemented in assembler is here.
+
+define(define_mpn,
+m4_assert_numargs(1)
+`define(`mpn_$1',`MPN(`$1')')')
+
+define_mpn(add)
+define_mpn(add_1)
+define_mpn(add_n)
+define_mpn(add_nc)
+define_mpn(addlsh1_n)
+define_mpn(addlsh2_n)
+define_mpn(addlsh_n)
+define_mpn(addmul_1)
+define_mpn(addmul_1c)
+define_mpn(addmul_2)
+define_mpn(addmul_3)
+define_mpn(addmul_4)
+define_mpn(add_n_sub_n)
+define_mpn(add_n_sub_nc)
+define_mpn(addaddmul_1msb0)
+define_mpn(and_n)
+define_mpn(andn_n)
+define_mpn(bdiv_q_1)
+define_mpn(pi1_bdiv_q_1)
+define_mpn(bdiv_dbm1c)
+define_mpn(cmp)
+define_mpn(com)
+define_mpn(copyd)
+define_mpn(copyi)
+define_mpn(count_leading_zeros)
+define_mpn(count_trailing_zeros)
+define_mpn(divexact_1)
+define_mpn(divexact_by3c)
+define_mpn(divrem)
+define_mpn(divrem_1)
+define_mpn(divrem_1c)
+define_mpn(divrem_2)
+define_mpn(divrem_classic)
+define_mpn(divrem_newton)
+define_mpn(dump)
+define_mpn(gcd)
+define_mpn(gcd_1)
+define_mpn(gcdext)
+define_mpn(get_str)
+define_mpn(hamdist)
+define_mpn(invert_limb)
+define_mpn(ior_n)
+define_mpn(iorn_n)
+define_mpn(lshift)
+define_mpn(lshiftc)
+define_mpn(mod_1_1p)
+define_mpn(mod_1_1p_cps)
+define_mpn(mod_1s_2p)
+define_mpn(mod_1s_2p_cps)
+define_mpn(mod_1s_3p)
+define_mpn(mod_1s_3p_cps)
+define_mpn(mod_1s_4p)
+define_mpn(mod_1s_4p_cps)
+define_mpn(mod_1)
+define_mpn(mod_1c)
+define_mpn(mod_34lsub1)
+define_mpn(modexact_1_odd)
+define_mpn(modexact_1c_odd)
+define_mpn(mul)
+define_mpn(mul_1)
+define_mpn(mul_1c)
+define_mpn(mul_2)
+define_mpn(mul_3)
+define_mpn(mul_4)
+define_mpn(mul_basecase)
+define_mpn(mul_n)
+define_mpn(mullo_basecase)
+define_mpn(perfect_square_p)
+define_mpn(popcount)
+define_mpn(preinv_divrem_1)
+define_mpn(preinv_mod_1)
+define_mpn(nand_n)
+define_mpn(neg)
+define_mpn(nior_n)
+define_mpn(powm)
+define_mpn(powlo)
+define_mpn(random)
+define_mpn(random2)
+define_mpn(redc_1)
+define_mpn(redc_2)
+define_mpn(rsblsh1_n)
+define_mpn(rsblsh2_n)
+define_mpn(rsblsh_n)
+define_mpn(rsh1add_n)
+define_mpn(rsh1add_nc)
+define_mpn(rsh1sub_n)
+define_mpn(rsh1sub_nc)
+define_mpn(rshift)
+define_mpn(rshiftc)
+define_mpn(scan0)
+define_mpn(scan1)
+define_mpn(set_str)
+define_mpn(sqr_basecase)
+define_mpn(sqr_diagonal)
+define_mpn(sub_n)
+define_mpn(sublsh1_n)
+define_mpn(sublsh2_n)
+define_mpn(sqrtrem)
+define_mpn(sub)
+define_mpn(sub_1)
+define_mpn(sub_n)
+define_mpn(sub_nc)
+define_mpn(submul_1)
+define_mpn(submul_1c)
+define_mpn(umul_ppmm)
+define_mpn(umul_ppmm_r)
+define_mpn(udiv_qrnnd)
+define_mpn(udiv_qrnnd_r)
+define_mpn(xnor_n)
+define_mpn(xor_n)
+
+
+dnl  Defines for C global arrays and variables, with names matching what's
+dnl  used in the C code.
+dnl
+dnl  Notice that GSYM_PREFIX is included, unlike with the function defines
+dnl  above.  Also, "deflit" is used so that something like __clz_tab(%ebx)
+dnl  comes out as __gmpn_clz_tab(%ebx), for the benefit of CPUs with that
+dnl  style assembler syntax.
+
+deflit(__clz_tab,
+m4_assert_defined(`GSYM_PREFIX')
+`GSYM_PREFIX`'MPN(`clz_tab')')
+
+deflit(binvert_limb_table,
+m4_assert_defined(`GSYM_PREFIX')
+`GSYM_PREFIX`'__gmp_binvert_limb_table')
+
+
+dnl  Usage: ASM_START()
+dnl
+dnl  Emit any directives needed once at the start of an assembler file, like
+dnl  ".set noreorder" or whatever.  The default for this is nothing, but
+dnl  it's redefined by CPU specific m4 files.
+
+define(ASM_START)
+
+
+dnl  Usage: ASM_END()
+dnl
+dnl  Emit any directives needed once at the end of an assembler file.  The
+dnl  default for this is nothing, but it's redefined by CPU specific m4 files.
+
+define(ASM_END)
+
+
+dnl  Usage: PROLOGUE(foo[,param])
+dnl         EPILOGUE(foo)
+dnl
+dnl  Emit directives to start or end a function.  GSYM_PREFIX is added by
+dnl  these macros if necessary, so the given "foo" is what the function will
+dnl  be called in C.
+dnl
+dnl  The second parameter to PROLOGUE is used only for some CPUs and should
+dnl  be omitted if not required.
+dnl
+dnl  Nested or overlapping PROLOGUE/EPILOGUE pairs are allowed, if that
+dnl  makes sense for the system.  The name given to EPILOGUE must be a
+dnl  currently open PROLOGUE.
+dnl
+dnl  If only one PROLOGUE is open then the name can be omitted from
+dnl  EPILOGUE.  This is encouraged, since it means the name only has to
+dnl  appear in one place, not two.
+dnl
+dnl  The given name "foo" is not fully quoted here, it will be macro
+dnl  expanded more than once.  This is the way the m4_list macros work, and
+dnl  it also helps the tune/many.pl program do a renaming like
+dnl  -D__gmpn_add_n=mpn_add_n_foo when GSYM_PREFIX is not empty.
+
+define(PROLOGUE,
+m4_assert_numargs_range(1,2)
+`m4_file_seen()dnl
+define(`PROLOGUE_list',m4_list_quote($1,PROLOGUE_list))dnl
+ifelse(`$2',,
+`PROLOGUE_cpu(GSYM_PREFIX`'$1)',
+`PROLOGUE_cpu(GSYM_PREFIX`'$1,`$2')')')
+
+define(EPILOGUE,
+m4_assert_numargs_range(0,1)
+`ifelse(`$1',,
+`ifelse(m4_list_count(PROLOGUE_list),0,
+`m4_error(`no open functions for EPILOGUE
+')',
+`ifelse(m4_list_count(PROLOGUE_list),1,
+`EPILOGUE_internal(PROLOGUE_current_function)',
+`m4_error(`more than one open function for EPILOGUE
+')')')',
+`EPILOGUE_internal(`$1')')')
+
+define(EPILOGUE_internal,
+m4_assert_numargs(1)
+m4_assert_defined(`EPILOGUE_cpu')
+`ifelse(m4_list_find($1,PROLOGUE_list),0,
+`m4_error(`EPILOGUE without PROLOGUE: $1
+')')dnl
+define(`PROLOGUE_list',m4_list_quote(m4_list_remove($1,PROLOGUE_list)))dnl
+EPILOGUE_cpu(GSYM_PREFIX`$1')')
+
+dnl  Currently open PROLOGUEs, as a comma-separated list.
+define(PROLOGUE_list)
+
+
+dnl  Called: PROLOGUE_check(list,...)
+dnl  Check there's no remaining open PROLOGUEs at the end of input.
+define(PROLOGUE_check,
+`ifelse($1,,,
+`m4_error(`no EPILOGUE for: $1
+')dnl
+PROLOGUE_check(shift($@))')')
+
+m4wrap_prepend(`PROLOGUE_check(PROLOGUE_list)')
+
+
+dnl  Usage: PROLOGUE_current_function
+dnl
+dnl  This macro expands to the current PROLOGUE/EPILOGUE function, or the
+dnl  most recent PROLOGUE if such pairs are nested or overlapped.
+
+define(PROLOGUE_current_function,
+m4_assert_numargs(-1)
+`m4_list_first(PROLOGUE_list)')
+
+
+dnl  Usage: PROLOGUE_cpu(GSYM_PREFIX`'foo[,param])
+dnl         EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  These macros hold the CPU-specific parts of PROLOGUE and EPILOGUE.
+dnl  Both are called with the function name, with GSYM_PREFIX already
+dnl  prepended.
+dnl
+dnl  The definitions here are something typical and sensible, but CPU or
+dnl  system specific m4 files should redefine them as necessary.  The
+dnl  optional extra parameter to PROLOGUE_cpu is not expected and not
+dnl  accepted here.
+
+define(PROLOGUE_cpu,
+m4_assert_numargs(1)
+`      TEXT
+       ALIGN(8)
+       GLOBL   `$1' GLOBL_ATTR
+       TYPE(`$1',`function')
+`$1'LABEL_SUFFIX')
+
+define(EPILOGUE_cpu,
+`      SIZE(`$1',.-`$1')')
+
+
+dnl  Usage: L(name)
+dnl
+dnl  Generate a local label with the given name.  This is simply a
+dnl  convenient way to add LSYM_PREFIX.
+dnl
+dnl  LSYM_PREFIX might be L$, so defn() must be used to quote it or the L
+dnl  will expand again as the L macro, making an infinite recursion.
+
+define(`L',
+m4_assert_numargs(1)
+`defn(`LSYM_PREFIX')$1')
+
+
+dnl  Usage: LDEF(name)
+dnl
+dnl  Generate a directive to define a local label.
+dnl
+dnl  On systems with a fixed syntax for defining labels there's no need to
+dnl  use this macro, it's only meant for systems where the syntax varies,
+dnl  like hppa which is "L(foo):" with gas, but just "L(foo)" in column 0
+dnl  with the system `as'.
+dnl
+dnl  The extra `' after LABEL_SUFFIX avoids any chance of a following
+dnl  "(...)"  being interpreted as an argument list.  Not that it'd be
+dnl  sensible to write anything like that after an LDEF(), but just in case.
+
+define(LDEF,
+m4_assert_numargs(1)
+m4_assert_defined(`LABEL_SUFFIX')
+`L(`$1')`'LABEL_SUFFIX`'')
+
+
+dnl  Usage: INT32(label,value)
+dnl         INT64(label,first,second)
+
+define(`INT32',
+m4_assert_defined(`W32')
+`      ALIGN(4)
+LDEF(`$1')
+       W32     $2')
+
+define(`INT64',
+m4_assert_defined(`W32')
+`      ALIGN(8)
+LDEF(`$1')
+       W32     $2
+       W32     $3')
+
+
+dnl  Usage: ALIGN(bytes)
+dnl
+dnl  Emit a ".align" directive.  The alignment is specified in bytes, and
+dnl  will normally need to be a power of 2.  The actual ".align" generated
+dnl  is either bytes or logarithmic according to what ./configure finds the
+dnl  assembler needs.
+dnl
+dnl  If ALIGN_FILL_0x90 is defined and equal to "yes", then ", 0x90" is
+dnl  appended.  This is for x86, see mpn/x86/README.
+
+define(ALIGN,
+m4_assert_numargs(1)
+m4_assert_defined(`ALIGN_LOGARITHMIC')
+`.align        ifelse(ALIGN_LOGARITHMIC,yes,`m4_log2($1)',`eval($1)')dnl
+ifelse(ALIGN_FILL_0x90,yes,`, 0x90')')
+
+
+dnl  Usage: MULFUNC_PROLOGUE(function function...)
+dnl
+dnl  A dummy macro which is grepped for by ./configure to know what
+dnl  functions a multi-function file is providing.  Use this if there aren't
+dnl  explicit PROLOGUE()s for each possible function.
+dnl
+dnl  Multiple MULFUNC_PROLOGUEs can be used, or just one with the function
+dnl  names separated by spaces.
+
+define(`MULFUNC_PROLOGUE',
+m4_assert_numargs(1)
+)
+
+
+dnl  Usage: NAILS_SUPPORT(spec spec ...)
+dnl
+dnl  A dummy macro which is grepped for by ./configure to know what nails
+dnl  are supported in an asm file.
+dnl
+dnl  Ranges can be given, or just individual values.  Multiple values or
+dnl  ranges can be given, separated by spaces.  Multiple NAILS_SUPPORT
+dnl  declarations work too.  Some examples,
+dnl
+dnl         NAILS_SUPPORT(1-20)
+dnl         NAILS_SUPPORT(1 6 9-12)
+dnl         NAILS_SUPPORT(1-10 16-20)
+
+define(NAILS_SUPPORT,
+m4_assert_numargs(1)
+)
+
+
+dnl  Usage: GMP_NUMB_MASK
+dnl
+dnl  A bit mask for the number part of a limb.  Eg. with 6 bit nails in a
+dnl  32 bit limb, GMP_NUMB_MASK would be 0x3ffffff.
+
+define(GMP_NUMB_MASK,
+m4_assert_numargs(-1)
+m4_assert_defined(`GMP_NUMB_BITS')
+`m4_hex_lowmask(GMP_NUMB_BITS)')
+
+
+divert`'dnl
diff --git a/mpn/clipper/add_n.s b/mpn/clipper/add_n.s

new file mode 100644 (file)

index 0000000..225b950
--- /dev/null
+++ b/mpn/clipper/add_n.s
@@ -0,0 +1,46 @@
+; Clipper __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align 16
+.globl ___gmpn_add_n
+___gmpn_add_n:
+       subq    $8,sp
+       storw   r6,(sp)
+       loadw   12(sp),r2
+       loadw   16(sp),r3
+       loadq   $0,r6           ; clear carry-save register
+
+.Loop: loadw   (r1),r4
+       loadw   (r2),r5
+       addwc   r6,r6           ; restore carry from r6
+       addwc   r5,r4
+       storw   r4,(r0)
+       subwc   r6,r6           ; save carry in r6
+       addq    $4,r0
+       addq    $4,r1
+       addq    $4,r2
+       subq    $1,r3
+       brne    .Loop
+
+       negw    r6,r0
+       loadw   (sp),r6
+       addq    $8,sp
+       ret     sp
diff --git a/mpn/clipper/mul_1.s b/mpn/clipper/mul_1.s

new file mode 100644 (file)

index 0000000..058a317
--- /dev/null
+++ b/mpn/clipper/mul_1.s
@@ -0,0 +1,45 @@
+; Clipper __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  16
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+       subq    $8,sp
+       storw   r6,(sp)
+       loadw   12(sp),r2
+       loadw   16(sp),r3
+       loadq   $0,r6           ; clear carry limb
+
+.Loop: loadw   (r1),r4
+       mulwux  r3,r4
+       addw    r6,r4           ; add old carry limb into low product limb
+       loadq   $0,r6
+       addwc   r5,r6           ; propagate cy into high product limb
+       storw   r4,(r0)
+       addq    $4,r0
+       addq    $4,r1
+       subq    $1,r2
+       brne    .Loop
+
+       movw    r6,r0
+       loadw   0(sp),r6
+       addq    $8,sp
+       ret     sp
diff --git a/mpn/clipper/sub_n.s b/mpn/clipper/sub_n.s

new file mode 100644 (file)

index 0000000..58c2cb3
--- /dev/null
+++ b/mpn/clipper/sub_n.s
@@ -0,0 +1,46 @@
+; Clipper __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align 16
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+       subq    $8,sp
+       storw   r6,(sp)
+       loadw   12(sp),r2
+       loadw   16(sp),r3
+       loadq   $0,r6           ; clear carry-save register
+
+.Loop: loadw   (r1),r4
+       loadw   (r2),r5
+       addwc   r6,r6           ; restore carry from r6
+       subwc   r5,r4
+       storw   r4,(r0)
+       subwc   r6,r6           ; save carry in r6
+       addq    $4,r0
+       addq    $4,r1
+       addq    $4,r2
+       subq    $1,r3
+       brne    .Loop
+
+       negw    r6,r0
+       loadw   (sp),r6
+       addq    $8,sp
+       ret     sp
diff --git a/mpn/cpp-ccas b/mpn/cpp-ccas

new file mode 100755 (executable)

index 0000000..fd62f90
--- /dev/null
+++ b/mpn/cpp-ccas
@@ -0,0 +1,107 @@
+#!/bin/sh
+#
+# A helper script for Makeasm.am .S.lo rule.
+
+# Copyright 2001 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: cpp-cc --cpp=CPP CC ... file.S ...
+#
+# Process file.S with the given CPP command plus any -D options in the
+# rest of the arguments, then assemble with the given CC plus all
+# arguments.
+#
+# The CPP command must be in a single --cpp= argument, and will be
+# split on whitespace.  It should include -I options required.
+#
+# When CC is invoked, file.S is replaced with a temporary .s file
+# which is the CPP output.
+#
+# Any lines starting with "#" are removed from the CPP output, usually
+# these will be #line and #file markers from CPP, but they might also
+# be comments from the .S.
+#
+# To allow parallel builds, the temp file name is based on the .S file
+# name, which will be the output object filename for all uses we put
+# this script to.
+
+CPP=
+CPPDEFS=
+CC=
+S=
+SEEN_O=no
+
+for i in "$@"; do
+  case $i in
+    --cpp=*)
+      CPP=`echo "$i" | sed 's/^--cpp=//'`
+      ;;
+    -D*)
+      CPPDEFS="$CPPDEFS $i"
+      CC="$CC $i"
+      ;;
+    *.S)
+      if test -n "$S"; then
+        echo "Only one .S file permitted"
+        exit 1
+      fi
+      BASENAME=`echo "$i" | sed -e 's/\.S$//' -e 's/^.*[\\/:]//'`
+      S=$i
+      TMP_I=tmp-$BASENAME.i
+      TMP_S=tmp-$BASENAME.s
+      CC="$CC $TMP_S"
+      ;;
+    -o)
+      SEEN_O=yes
+      CC="$CC $i"
+      ;;
+    *)
+      CC="$CC $i"
+      ;;
+  esac
+done
+
+if test -z "$CPP"; then
+  echo "No --cpp specified"
+  exit 1
+fi
+
+if test -z "$S"; then
+  echo "No .S specified"
+  exit 1
+fi
+
+# Libtool adds it's own -o when sending output to .libs/foo.o, but not
+# when just wanting foo.o in the current directory.  We need an
+# explicit -o in both cases since we're assembling tmp-foo.s.
+#
+if test $SEEN_O = no; then
+  CC="$CC -o $BASENAME.o"
+fi
+
+echo "$CPP $CPPDEFS $S >$TMP_I"
+$CPP $CPPDEFS $S >$TMP_I || exit
+
+echo "grep -v '^#' $TMP_I >$TMP_S"
+grep -v '^#' $TMP_I >$TMP_S
+
+echo "$CC"
+$CC || exit
+
+# Comment this out to preserve .s intermediates
+rm -f $TMP
diff --git a/mpn/cray/README b/mpn/cray/README

new file mode 100644 (file)

index 0000000..ccd7439
--- /dev/null
+++ b/mpn/cray/README
@@ -0,0 +1,110 @@
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+
+The code in this directory works for Cray vector systems such as C90,
+J90, T90 (both the CFP variant and the IEEE variant) and SV1.  (For
+the T3E and T3D systems, see the `alpha' subdirectory at the same
+level as the directory containing this file.)
+
+The cfp subdirectory is for systems utilizing the traditional Cray
+floating-point format, and the ieee subdirectory is for the newer
+systems that use the IEEE floating-point format.
+
+There are several issues that reduces speed on Cray systems.  For
+systems with cfp floating point, the main obstacle is the forming of
+128-bit products.  For IEEE systems, adding, and in particular
+computing carry is the main issue.  There are no vectorizing
+unsigned-less-than instructions, and the sequence that implement that
+operation is very long.
+
+Shifting is the only operation that is simple to make fast.  All Cray
+systems have a bitblt instructions (Vi Vj,Vj<Ak and Vi Vj,Vj>Ak) that
+should be really useful.
+
+For best speed for cfp systems, we need a mul_basecase, since that
+reduces the need for carry propagation to a minimum.  Depending on the
+size (vn) of the smaller of the two operands (V), we should split U and V
+in different chunk sizes:
+
+U split in 2 32-bit parts
+V split according to the table:
+parts                  4       5       6       7       8
+bits/part              16      13      11      10      8
+max allowed vn         1       8       32      64      256
+number of multiplies   8       10      12      14      16
+peak cycles/limb       4       5       6       7       8
+
+U split in 3 22-bit parts
+V split according to the table:
+parts                  3       4       5
+bits/part              22      16      13
+max allowed vn         16      1024    8192
+number of multiplies   9       12      15
+peak cycles/limb       4.5     6       7.5
+
+U split in 4 16-bit parts
+V split according to the table:
+parts                  4
+bits/part              16
+max allowed vn         65536
+number of multiplies   16
+peak cycles/limb       8
+
+(A T90 CPU can accumulate two products per cycle.)
+
+IDEA:
+* Rewrite mpn_add_n:
+    short cy[n + 1];
+    #pragma _CRI ivdep
+      for (i = 0; i < n; i++)
+       { s = up[i] + vp[i];
+         rp[i] = s;
+         cy[i + 1] = s < up[i]; }
+      more_carries = 0;
+    #pragma _CRI ivdep
+      for (i = 1; i < n; i++)
+       { s = rp[i] + cy[i];
+         rp[i] = s;
+         more_carries += s < cy[i]; }
+      cys = 0;
+      if (more_carries)
+       {
+         cys = rp[1] < cy[1];
+         for (i = 2; i < n; i++)
+           { rp[i] += cys;
+             cys = rp[i] < cys; }
+       }
+      return cys + cy[n];
+
+* Write mpn_add3_n for adding three operands.  First add operands 1
+  and 2, and generate cy[].  Then add operand 3 to the partial result,
+  and accumulate carry into cy[].  Finally propagate carry just like
+  in the new mpn_add_n.
+
+IDEA:
+
+Store fewer bits, perhaps 62, per limb.  That brings mpn_add_n time
+down to 2.5 cycles/limb and mpn_addmul_1 times to 4 cycles/limb.  By
+storing even fewer bits per limb, perhaps 56, it would be possible to
+write a mul_mul_basecase that would run at effectively 1 cycle/limb.
+(Use VM here to better handle the romb-shaped multiply area, perhaps
+rouding operand sizes up to the next power of 2.)
diff --git a/mpn/cray/add_n.c b/mpn/cray/add_n.c

new file mode 100644 (file)

index 0000000..e4f8a0d
--- /dev/null
+++ b/mpn/cray/add_n.c
@@ -0,0 +1,80 @@
+/* Cray PVP mpn_add_n -- add two limb vectors and store their sum in a third
+   limb vector.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This code runs at 4 cycles/limb.  It may be possible to bring it down
+   to 3 cycles/limb.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  /* Main add loop.  Generate a raw output sum in rp[] and a carry vector
+     in cy[].  */
+#pragma _CRI ivdep
+  for (i = 0; i < n; i++)
+    {
+      a = up[i];
+      b = vp[i];
+      s0 = a + b;
+      rp[i] = s0;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      cy[i] = c0;
+    }
+  /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
+     store the new sum back to rp[0].  If this generates further carry, set
+     more_carries.  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
+        These are where we got a recurrency carry.  */
+      for (i = 1; i < n; i++)
+       {
+         r = rp[i];
+         c0 = (r == 0 && cy[i - 1] != 0);
+         s0 = r + cyrec;
+         rp[i] = s0;
+         c1 = (r & ~s0) >> 63;
+         cyrec = c0 | c1;
+       }
+      return cyrec | cy[n - 1];
+    }
+
+  return cy[n - 1];
+}
diff --git a/mpn/cray/cfp/addmul_1.c b/mpn/cray/cfp/addmul_1.c

new file mode 100644 (file)

index 0000000..c981b3d
--- /dev/null
+++ b/mpn/cray/cfp/addmul_1.c
@@ -0,0 +1,38 @@
+/* mpn_addmul_1 for Cray PVP.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+  mp_limb_t p0[n], p1[n], tp[n];
+  mp_limb_t cy_limb;
+
+  GMPN_MULWW (p1, p0, up, &n, &limb);
+  cy_limb = mpn_add_n (tp, rp, p0, n);
+  rp[0] = tp[0];
+  if (n != 1)
+    cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1);
+  cy_limb += p1[n - 1];
+
+  return cy_limb;
+}
diff --git a/mpn/cray/cfp/mul_1.c b/mpn/cray/cfp/mul_1.c

new file mode 100644 (file)

index 0000000..5038e93
--- /dev/null
+++ b/mpn/cray/cfp/mul_1.c
@@ -0,0 +1,37 @@
+/* mpn_mul_1 for Cray PVP.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+  mp_limb_t p0[n], p1[n];
+  mp_limb_t cy_limb;
+
+  GMPN_MULWW (p1, p0, up, &n, &limb);
+  rp[0] = p0[0];
+  cy_limb = p1[n - 1];
+  if (n != 1)
+    cy_limb += mpn_add_n (rp + 1, p0 + 1, p1, n - 1);
+
+  return cy_limb;
+}
diff --git a/mpn/cray/cfp/mulwwc90.s b/mpn/cray/cfp/mulwwc90.s

new file mode 100644 (file)

index 0000000..3234913
--- /dev/null
+++ b/mpn/cray/cfp/mulwwc90.s
@@ -0,0 +1,243 @@
+*     Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+*     Copyright 1996, 2000 Free Software Foundation, Inc.
+*     This file is generated from mulww.f in this same directory.
+
+*     This file is part of the GNU MP Library.
+
+*     The GNU MP Library is free software; you can redistribute it and/or
+*     modify it under the terms of the GNU Lesser General Public License as
+*     published by the Free Software Foundation; either version 3 of the
+*     License, or (at your option) any later version.
+
+*     The GNU MP Library is distributed in the hope that it will be useful,
+*     but WITHOUT ANY WARRANTY; without even the implied warranty of
+*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*     Lesser General Public License for more details.
+
+*     You should have received a copy of the GNU Lesser General Public License
+*     along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+            IDENT           GMPN_MULWW
+**********************************************
+*      Assemble with Cal Version 2.0         *
+*                                            *
+* Generated by CFT77   6.0.4.19              *
+*           on 06/27/00 at 04:34:13          *
+*                                            *
+**********************************************
+* ALLOW UNDERSCORES IN IDENTIFIERS
+            EDIT            OFF
+            FORMAT          NEW
+@DATA       SECTION         DATA,CM
+@DATA       =               W.*
+            CON             O'0000000000040000000000
+            CON             O'0435152404713723252514
+            CON             O'0535270000000000000000
+            CON             O'0000000000000001200012
+            VWD             32/0,32/P.GMPN_MULWW
+            CON             O'0014003000000000001416
+            CON             O'0000000000000000000011
+            CON             O'0000000000000000000215
+            BSSZ            1
+@CODE       SECTION         CODE
+@CODE       =               P.*
+L3          =               P.*
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+            A7              P.L4
+            B00             A7
+            A6              @DATA
+            J               $STKOFEN
+GMPN_MULWW  =               P.*
+            A0              @DATA+3
+            B77             A0
+            A1              13
+            A0              B66
+            A2              B66
+            A4              B67
+            0,A0            B77,A1
+            A7              782
+            A3              A2+A7
+            A0              A4-A3
+            JAM             L3
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+L4          =               P.*
+            A7              B07
+            S7              0,A7
+            A6              B10
+            S6              0,A6
+            S5              1
+            S4              <22
+            S7              S7-S5
+            S5              #S7
+            T00             S6
+            S6              S6>22
+            S7              T00
+            S7              S7>44
+            S3              T00
+            S3              S3&S4
+            S6              S6&S4
+            S7              S7&S4
+            S3              S3<24
+            S6              S6<24
+            S7              S7<24
+            S0              S5
+            S4              S5
+            S1              S6
+            S2              S3
+            S3              S7
+            JSP             L5
+L6          =               P.*
+            S7              -S4
+            A2              S7
+            VL              A2
+            A3              B06
+            A5              B05
+            A4              B04
+            A1              VL
+            A2              S4
+L7          =               P.*
+            A0              A3
+            VL              A1
+            V7              ,A0,1
+            B11             A5
+            A7              22
+            B12             A4
+            V6              V7>A7
+            B13             A3
+            S7              <22
+            A3              B02
+            V5              S7&V6
+            A6              24
+            V4              V5<A6
+            V3              S1*FV4
+            V2              S7&V7
+            V1              V2<A6
+            V0              S3*FV1
+            V6              V0+V3
+            A5              44
+            V5              V7>A5
+            V2              S1*FV1
+            V3              S7&V5
+            A0              14
+            B77             A0
+            A4              B77
+            A0              A4+A3
+            ,A0,1           V2
+            V0              V3<A6
+            V7              S2*FV1
+            A4              142
+            A0              A4+A3
+            ,A0,1           V7
+            V5              V7>A7
+            V2              S2*FV0
+            V3              V6+V2
+            S7              <20
+            V1              S7&V3
+            A4              270
+            A0              A4+A3
+            ,A0,1           V0
+            A4              14
+            A0              A4+A3
+            V7              ,A0,1
+            V6              V1<A7
+            V2              S2*FV4
+            V0              V7+V2
+            S7              <42
+            V1              S7&V0
+            A4              398
+            A0              A4+A3
+            ,A0,1           V0
+            V7              S3*FV4
+            V2              V5+V1
+            V0              V3<A5
+            A5              526
+            A0              A5+A3
+            ,A0,1           V0
+            A5              270
+            A0              A5+A3
+            V4              ,A0,1
+            V5              V2+V6
+            A5              20
+            V1              V3>A5
+            V0              S1*FV4
+            A5              654
+            A0              A5+A3
+            ,A0,1           V1
+            V6              V7+V0
+            A5              2
+            V2              V6<A5
+            V3              S3*FV4
+            A5              142
+            A0              A5+A3
+            V1              ,A0,1
+            A5              526
+            A0              A5+A3
+            V7              ,A0,1
+            V0              V1+V7
+            V6              V3<A6
+            V4              V6+V2
+            A6              42
+            V7              V5>A6
+            A5              654
+            CPW
+            A0              A5+A3
+            V1              ,A0,1
+            A5              398
+            A0              A5+A3
+            V3              ,A0,1
+            V6              V4+V1
+            V2              V3>A6
+            V5              V6+V2
+            A6              B12
+            V4              V3<A7
+            A7              B13
+            A3              A7+A1
+            A7              B11
+            A5              A7+A1
+            A4              A6+A1
+            A7              A2+A1
+            A0              A2+A1
+            A2              128
+            B13             A0
+            V1              V0+V4
+            A0              B11
+            ,A0,1           V1
+            V6              V5+V7
+            A0              A6
+            ,A0,1           V6
+            A0              B13
+            A1              A2
+            A2              A7
+            JAN             L7
+L8          =               P.*
+L5          =               P.*
+            S1              0
+            A0              B02
+            A2              B02
+            A1              13
+            B66             A0
+            B77,A1          0,A0
+            A0              A2+A1
+            A1              1
+            T00,A1          0,A0
+            J               B00
+            EXT             $STKOFEN:p
+            ENTRY           GMPN_MULWW
+            END
diff --git a/mpn/cray/cfp/mulwwj90.s b/mpn/cray/cfp/mulwwj90.s

new file mode 100644 (file)

index 0000000..94d391c
--- /dev/null
+++ b/mpn/cray/cfp/mulwwj90.s
@@ -0,0 +1,242 @@
+*     Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+*     Copyright 1996, 2000 Free Software Foundation, Inc.
+*     This file is generated from mulww.f in this same directory.
+
+*     This file is part of the GNU MP Library.
+
+*     The GNU MP Library is free software; you can redistribute it and/or
+*     modify it under the terms of the GNU Lesser General Public License as
+*     published by the Free Software Foundation; either version 3 of the
+*     License, or (at your option) any later version.
+
+*     The GNU MP Library is distributed in the hope that it will be useful,
+*     but WITHOUT ANY WARRANTY; without even the implied warranty of
+*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*     Lesser General Public License for more details.
+
+*     You should have received a copy of the GNU Lesser General Public License
+*     along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+            IDENT           GMPN_MULWW
+**********************************************
+*      Assemble with Cal Version 2.0         *
+*                                            *
+* Generated by CFT77   6.0.4.19              *
+*           on 06/27/00 at 04:34:13          *
+*                                            *
+**********************************************
+* ALLOW UNDERSCORES IN IDENTIFIERS
+            EDIT            OFF
+            FORMAT          NEW
+@DATA       SECTION         DATA,CM
+@DATA       =               W.*
+            CON             O'0000000000040000000000
+            CON             O'0435152404713723252514
+            CON             O'0535270000000000000000
+            CON             O'0000000000000001200012
+            VWD             32/0,32/P.GMPN_MULWW
+            CON             O'0014003000000000001416
+            CON             O'0000000000000000000011
+            CON             O'0000000000000000000215
+            BSSZ            1
+@CODE       SECTION         CODE
+@CODE       =               P.*
+L3          =               P.*
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+            A7              P.L4
+            B00             A7
+            A6              @DATA
+            J               $STKOFEN
+GMPN_MULWW  =               P.*
+            A0              @DATA+3
+            B77             A0
+            A1              13
+            A0              B66
+            A2              B66
+            A4              B67
+            0,A0            B77,A1
+            A7              782
+            A3              A2+A7
+            A0              A4-A3
+            JAM             L3
+            A0              A6
+            A5              6
+            B03,A5          0,A0
+            A0              A1+A2
+            A5              1
+            0,A0            T00,A5
+            B02             A2
+            B66             A3
+            B01             A6
+L4          =               P.*
+            A7              B07
+            S7              0,A7
+            A6              B10
+            S6              0,A6
+            S5              1
+            S4              <22
+            S7              S7-S5
+            S5              #S7
+            T00             S6
+            S6              S6>22
+            S7              T00
+            S7              S7>44
+            S3              T00
+            S3              S3&S4
+            S6              S6&S4
+            S7              S7&S4
+            S3              S3<24
+            S6              S6<24
+            S7              S7<24
+            S0              S5
+            S4              S5
+            S1              S6
+            S2              S3
+            S3              S7
+            JSP             L5
+L6          =               P.*
+            S7              -S4
+            A2              S7
+            VL              A2
+            A3              B06
+            A5              B05
+            A4              B04
+            A1              VL
+            A2              S4
+L7          =               P.*
+            A0              A3
+            VL              A1
+            V7              ,A0,1
+            B11             A5
+            A7              22
+            B12             A4
+            V6              V7>A7
+            B13             A3
+            S7              <22
+            A3              B02
+            V5              S7&V6
+            A6              24
+            V4              V5<A6
+            V3              S1*FV4
+            V2              S7&V7
+            V1              V2<A6
+            V0              S3*FV1
+            V6              V0+V3
+            A5              44
+            V5              V7>A5
+            V2              S1*FV1
+            V3              S7&V5
+            A0              14
+            B77             A0
+            A4              B77
+            A0              A4+A3
+            ,A0,1           V2
+            V0              V3<A6
+            V7              S2*FV1
+            A4              142
+            A0              A4+A3
+            ,A0,1           V7
+            V5              V7>A7
+            V2              S2*FV0
+            V3              V6+V2
+            S7              <20
+            V1              S7&V3
+            A4              270
+            A0              A4+A3
+            ,A0,1           V0
+            A4              14
+            A0              A4+A3
+            V7              ,A0,1
+            V6              V1<A7
+            V2              S2*FV4
+            V0              V7+V2
+            S7              <42
+            V1              S7&V0
+            A4              398
+            A0              A4+A3
+            ,A0,1           V0
+            V7              S3*FV4
+            V2              V5+V1
+            V0              V3<A5
+            A5              526
+            A0              A5+A3
+            ,A0,1           V0
+            A5              270
+            A0              A5+A3
+            V4              ,A0,1
+            V5              V2+V6
+            A5              20
+            V1              V3>A5
+            V0              S1*FV4
+            A5              654
+            A0              A5+A3
+            ,A0,1           V1
+            V6              V7+V0
+            A5              2
+            V2              V6<A5
+            V3              S3*FV4
+            A5              142
+            A0              A5+A3
+            V1              ,A0,1
+            A5              526
+            A0              A5+A3
+            V7              ,A0,1
+            V0              V1+V7
+            V6              V3<A6
+            V4              V6+V2
+            A6              42
+            V7              V5>A6
+            A5              654
+            A0              A5+A3
+            V1              ,A0,1
+            A5              398
+            A0              A5+A3
+            V3              ,A0,1
+            V6              V4+V1
+            V2              V3>A6
+            V5              V6+V2
+            A6              B12
+            V4              V3<A7
+            A7              B13
+            A3              A7+A1
+            A7              B11
+            A5              A7+A1
+            A4              A6+A1
+            A7              A2+A1
+            A0              A2+A1
+            A2              64
+            B13             A0
+            V1              V0+V4
+            A0              B11
+            ,A0,1           V1
+            V6              V5+V7
+            A0              A6
+            ,A0,1           V6
+            A0              B13
+            A1              A2
+            A2              A7
+            JAN             L7
+L8          =               P.*
+L5          =               P.*
+            S1              0
+            A0              B02
+            A2              B02
+            A1              13
+            B66             A0
+            B77,A1          0,A0
+            A0              A2+A1
+            A1              1
+            T00,A1          0,A0
+            J               B00
+            EXT             $STKOFEN:p
+            ENTRY           GMPN_MULWW
+            END
diff --git a/mpn/cray/cfp/submul_1.c b/mpn/cray/cfp/submul_1.c

new file mode 100644 (file)

index 0000000..0507d0e
--- /dev/null
+++ b/mpn/cray/cfp/submul_1.c
@@ -0,0 +1,38 @@
+/* mpn_submul_1 for Cray PVP.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+  mp_limb_t p0[n], p1[n], tp[n];
+  mp_limb_t cy_limb;
+
+  GMPN_MULWW (p1, p0, up, &n, &limb);
+  cy_limb = mpn_sub_n (tp, rp, p0, n);
+  rp[0] = tp[0];
+  if (n != 1)
+    cy_limb += mpn_sub_n (rp + 1, tp + 1, p1, n - 1);
+  cy_limb += p1[n - 1];
+
+  return cy_limb;
+}
diff --git a/mpn/cray/gmp-mparam.h b/mpn/cray/gmp-mparam.h

new file mode 100644 (file)

index 0000000..3ff8f5e
--- /dev/null
+++ b/mpn/cray/gmp-mparam.h
@@ -0,0 +1,69 @@
+/* Cray T90 CFP gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#if 0
+#define UMUL_TIME       519
+#define UDIV_TIME      2360
+#endif
+
+/* T90 Unicos 10.0.X in CFP mode */
+
+/* Generated by tuneup.c, 2004-02-07, system compiler */
+
+#define MUL_TOOM22_THRESHOLD             71
+#define MUL_TOOM33_THRESHOLD            131
+
+#define SQR_BASECASE_THRESHOLD           32
+#define SQR_TOOM2_THRESHOLD             199
+#define SQR_TOOM3_THRESHOLD             363
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* (preinv always) */
+#define DIV_DC_THRESHOLD                996
+#define POWM_THRESHOLD                  601
+
+#define HGCD_THRESHOLD                  964
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD               2874
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             26
+#define GET_STR_PRECOMPUTE_THRESHOLD     42
+#define SET_STR_THRESHOLD            145756
+
+#define MUL_FFT_TABLE  { 272, 544, 1088, 2304, 5120, 12288, 49152, 0 }
+#define MUL_FFT_MODF_THRESHOLD          200
+#define MUL_FFT_THRESHOLD              1664
+
+#define SQR_FFT_TABLE  { 1008, 2080, 3904, 7936, 17408, 45056, 0 }
+#define SQR_FFT_MODF_THRESHOLD          600
+#define SQR_FFT_THRESHOLD              2976
diff --git a/mpn/cray/hamdist.c b/mpn/cray/hamdist.c

new file mode 100644 (file)

index 0000000..d80b4d6
--- /dev/null
+++ b/mpn/cray/hamdist.c
@@ -0,0 +1,32 @@
+/* Cray mpn_hamdist -- hamming distance count.
+
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpn_hamdist (mp_srcptr p1, mp_srcptr p2, mp_size_t n)
+{
+  unsigned long int result = 0;
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    result += _popcnt (p1[i] ^ p2[i]);
+  return result;
+}
diff --git a/mpn/cray/ieee/addmul_1.c b/mpn/cray/ieee/addmul_1.c

new file mode 100644 (file)

index 0000000..158a79c
--- /dev/null
+++ b/mpn/cray/ieee/addmul_1.c
@@ -0,0 +1,101 @@
+/* Cray PVP/IEEE mpn_addmul_1 -- multiply a limb vector with a limb and add the
+   result to a second limb vector.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
+   mainly due to vector register shortage in the main loop.  Assembly code
+   should bring it down to perhaps 7 cycles/limb.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  if (up == rp)
+    {
+      /* The algorithm used below cannot handle overlap.  Handle it here by
+        making a temporary copy of the source vector, then call ourselves.  */
+      mp_limb_t xp[n];
+      MPN_COPY (xp, up, n);
+      return mpn_addmul_1 (rp, xp, n, vl);
+    }
+
+  a = up[0] * vl;
+  r = rp[0];
+  s0 = a + r;
+  rp[0] = s0;
+  c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+  cy[0] = c0;
+
+  /* Main multiply loop.  Generate a raw accumulated output product in rp[]
+     and a carry vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      a = up[i] * vl;
+      b = _int_mult_upper (up[i - 1], vl);
+      s0 = a + b;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      r = rp[i];
+      s1 = s0 + r;
+      rp[i] = s1;
+      c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+      cy[i] = c0 + c1;
+    }
+  /* Carry add loop.  Add the carry vector cy[] to the raw result rp[] and
+     store the new result back to rp[].  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] == 0 and cy[k-1] == 1 or
+        rp[k] == 1 and cy[k-1] == 2.
+        These are where we got a recurrency carry.  */
+      for (i = 1; i < n; i++)
+       {
+         r = rp[i];
+         c0 = r < cy[i - 1];
+         s0 = r + cyrec;
+         rp[i] = s0;
+         c1 = (r & ~s0) >> 63;
+         cyrec = c0 | c1;
+       }
+      return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+    }
+
+  return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/mpn/cray/ieee/gmp-mparam.h b/mpn/cray/ieee/gmp-mparam.h

new file mode 100644 (file)

index 0000000..3442c11
--- /dev/null
+++ b/mpn/cray/ieee/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* Cray T90 IEEE gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* Generated by tuneup.c, 2004-02-07, system compiler */
+
+#define MUL_TOOM22_THRESHOLD            130
+#define MUL_TOOM33_THRESHOLD            260
+
+#define SQR_BASECASE_THRESHOLD            9  /* karatsuba */
+#define SQR_TOOM2_THRESHOLD               0  /* never sqr_basecase */
+#define SQR_TOOM3_THRESHOLD              34
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                390
+#define POWM_THRESHOLD                  656
+
+#define HGCD_THRESHOLD                  964
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                964
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             45
+#define GET_STR_PRECOMPUTE_THRESHOLD     77
+#define SET_STR_THRESHOLD            145756
+
+#define MUL_FFT_TABLE  { 1104, 2208, 4416, 8960, 19456, 45056, 0 }
+#define MUL_FFT_MODF_THRESHOLD         1168
+#define MUL_FFT_THRESHOLD              6528
+
+#define SQR_FFT_TABLE  { 368, 736, 1600, 2816, 7168, 12288, 0 }
+#define SQR_FFT_MODF_THRESHOLD          296
+#define SQR_FFT_THRESHOLD              1312
diff --git a/mpn/cray/ieee/invert_limb.c b/mpn/cray/ieee/invert_limb.c

new file mode 100644 (file)

index 0000000..220cc25
--- /dev/null
+++ b/mpn/cray/ieee/invert_limb.c
@@ -0,0 +1,117 @@
+/* mpn_invert_limb -- Invert a normalized limb.
+
+Copyright 1991, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  This is needed to make configure define HAVE_NATIVE_mpn_invert_limb:
+  PROLOGUE(mpn_invert_limb)
+*/
+
+static const unsigned short int approx_tab[0x100] =
+{
+  /* 0x400, */
+  0x3ff,
+         0x3fc, 0x3f8, 0x3f4, 0x3f0, 0x3ec, 0x3e8, 0x3e4,
+  0x3e0, 0x3dd, 0x3d9, 0x3d5, 0x3d2, 0x3ce, 0x3ca, 0x3c7,
+  0x3c3, 0x3c0, 0x3bc, 0x3b9, 0x3b5, 0x3b2, 0x3ae, 0x3ab,
+  0x3a8, 0x3a4, 0x3a1, 0x39e, 0x39b, 0x397, 0x394, 0x391,
+  0x38e, 0x38b, 0x387, 0x384, 0x381, 0x37e, 0x37b, 0x378,
+  0x375, 0x372, 0x36f, 0x36c, 0x369, 0x366, 0x364, 0x361,
+  0x35e, 0x35b, 0x358, 0x355, 0x353, 0x350, 0x34d, 0x34a,
+  0x348, 0x345, 0x342, 0x340, 0x33d, 0x33a, 0x338, 0x335,
+  0x333, 0x330, 0x32e, 0x32b, 0x329, 0x326, 0x324, 0x321,
+  0x31f, 0x31c, 0x31a, 0x317, 0x315, 0x313, 0x310, 0x30e,
+  0x30c, 0x309, 0x307, 0x305, 0x303, 0x300, 0x2fe, 0x2fc,
+  0x2fa, 0x2f7, 0x2f5, 0x2f3, 0x2f1, 0x2ef, 0x2ec, 0x2ea,
+  0x2e8, 0x2e6, 0x2e4, 0x2e2, 0x2e0, 0x2de, 0x2dc, 0x2da,
+  0x2d8, 0x2d6, 0x2d4, 0x2d2, 0x2d0, 0x2ce, 0x2cc, 0x2ca,
+  0x2c8, 0x2c6, 0x2c4, 0x2c2, 0x2c0, 0x2be, 0x2bc, 0x2bb,
+  0x2b9, 0x2b7, 0x2b5, 0x2b3, 0x2b1, 0x2b0, 0x2ae, 0x2ac,
+  0x2aa, 0x2a8, 0x2a7, 0x2a5, 0x2a3, 0x2a1, 0x2a0, 0x29e,
+  0x29c, 0x29b, 0x299, 0x297, 0x295, 0x294, 0x292, 0x291,
+  0x28f, 0x28d, 0x28c, 0x28a, 0x288, 0x287, 0x285, 0x284,
+  0x282, 0x280, 0x27f, 0x27d, 0x27c, 0x27a, 0x279, 0x277,
+  0x276, 0x274, 0x273, 0x271, 0x270, 0x26e, 0x26d, 0x26b,
+  0x26a, 0x268, 0x267, 0x265, 0x264, 0x263, 0x261, 0x260,
+  0x25e, 0x25d, 0x25c, 0x25a, 0x259, 0x257, 0x256, 0x255,
+  0x253, 0x252, 0x251, 0x24f, 0x24e, 0x24d, 0x24b, 0x24a,
+  0x249, 0x247, 0x246, 0x245, 0x243, 0x242, 0x241, 0x240,
+  0x23e, 0x23d, 0x23c, 0x23b, 0x239, 0x238, 0x237, 0x236,
+  0x234, 0x233, 0x232, 0x231, 0x230, 0x22e, 0x22d, 0x22c,
+  0x22b, 0x22a, 0x229, 0x227, 0x226, 0x225, 0x224, 0x223,
+  0x222, 0x220, 0x21f, 0x21e, 0x21d, 0x21c, 0x21b, 0x21a,
+  0x219, 0x218, 0x216, 0x215, 0x214, 0x213, 0x212, 0x211,
+  0x210, 0x20f, 0x20e, 0x20d, 0x20c, 0x20b, 0x20a, 0x209,
+  0x208, 0x207, 0x206, 0x205, 0x204, 0x203, 0x202, 0x201,
+};
+
+/* iteration: z = 2z-(z**2)d */
+
+mp_limb_t
+mpn_invert_limb (mp_limb_t d)
+{
+  mp_limb_t z, z2l, z2h, tl, th;
+  mp_limb_t xh, xl;
+  mp_limb_t zh, zl;
+
+#if GMP_LIMB_BITS == 32
+  z = approx_tab[(d >> 23) - 0x100] << 6;      /* z < 2^16 */
+
+  z2l = z * z;                                 /* z2l < 2^32 */
+  umul_ppmm (th, tl, z2l, d);
+  z = (z << 17) - (th << 1);
+#endif
+#if GMP_LIMB_BITS == 64
+  z = approx_tab[(d >> 55) - 0x100] << 6;      /* z < 2^16 */
+
+  z2l = z * z;                                 /* z2l < 2^32 */
+  th = z2l * (d >> 32);                                /* th < 2^64 */
+  z = (z << 17) - (th >> 31);                  /* z < 2^32 */
+
+  z2l = z * z;
+  umul_ppmm (th, tl, z2l, d);
+  z = (z << 33) - (th << 1);
+#endif
+
+  umul_ppmm (z2h, z2l, z, z);
+  umul_ppmm (th, tl, z2h, d);
+  umul_ppmm (xh, xl, z2l, d);
+  tl += xh;
+  th += tl < xh;
+  th = (th << 2) | (tl >> GMP_LIMB_BITS - 2);
+  tl = tl << 2;
+  sub_ddmmss (zh, zl, z << 2, 0, th, tl);
+
+  umul_ppmm (xh, xl, d, zh);
+  xh += d;             /* add_ssaaaa (xh, xl, xh, xl, d, 0); */
+  if (~xh != 0)
+    {
+      add_ssaaaa (xh, xl, xh, xl, 0, d);
+      zh++;
+    }
+
+  add_ssaaaa (xh, xl, xh, xl, 0, d);
+  if (xh != 0)
+    zh++;
+
+  return zh;
+}
diff --git a/mpn/cray/ieee/mul_1.c b/mpn/cray/ieee/mul_1.c

new file mode 100644 (file)

index 0000000..4dc2fd9
--- /dev/null
+++ b/mpn/cray/ieee/mul_1.c
@@ -0,0 +1,93 @@
+/* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
+   result in a second limb vector.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This code runs at 5 cycles/limb on a T90.  That would probably
+   be hard to improve upon, even with assembly code.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  if (up == rp)
+    {
+      /* The algorithm used below cannot handle overlap.  Handle it here by
+        making a temporary copy of the source vector, then call ourselves.  */
+      mp_limb_t xp[n];
+      MPN_COPY (xp, up, n);
+      return mpn_mul_1 (rp, xp, n, vl);
+    }
+
+  a = up[0] * vl;
+  rp[0] = a;
+  cy[0] = 0;
+
+  /* Main multiply loop.  Generate a raw accumulated output product in rp[]
+     and a carry vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      a = up[i] * vl;
+      b = _int_mult_upper (up[i - 1], vl);
+      s0 = a + b;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      rp[i] = s0;
+      cy[i] = c0;
+    }
+  /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
+     store the new sum back to rp[0].  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 2; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
+        These are where we got a recurrency carry.  */
+      for (i = 2; i < n; i++)
+       {
+         r = rp[i];
+         c0 = (r == 0 && cy[i - 1] != 0);
+         s0 = r + cyrec;
+         rp[i] = s0;
+         c1 = (r & ~s0) >> 63;
+         cyrec = c0 | c1;
+       }
+      return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+    }
+
+  return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/mpn/cray/ieee/mul_basecase.c b/mpn/cray/ieee/mul_basecase.c

new file mode 100644 (file)

index 0000000..ea32db3
--- /dev/null
+++ b/mpn/cray/ieee/mul_basecase.c
@@ -0,0 +1,97 @@
+/* Cray PVP/IEEE mpn_mul_basecase.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* The most critical loop of this code runs at about 5 cycles/limb on a T90.
+   That is not perfect, mainly due to vector register shortage.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_mul_basecase (mp_ptr rp,
+                 mp_srcptr up, mp_size_t un,
+                 mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t cy[un + vn];
+  mp_limb_t vl;
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i, j;
+  int more_carries;
+
+  for (i = 0; i < un + vn; i++)
+    {
+      rp[i] = 0;
+      cy[i] = 0;
+    }
+
+#pragma _CRI novector
+  for (j = 0; j < vn; j++)
+    {
+      vl = vp[j];
+
+      a = up[0] * vl;
+      r = rp[j];
+      s0 = a + r;
+      rp[j] = s0;
+      c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+      cy[j] += c0;
+
+#pragma _CRI ivdep
+      for (i = 1; i < un; i++)
+       {
+         a = up[i] * vl;
+         b = _int_mult_upper (up[i - 1], vl);
+         s0 = a + b;
+         c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+         r = rp[j + i];
+         s1 = s0 + r;
+         rp[j + i] = s1;
+         c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+         cy[j + i] += c0 + c1;
+       }
+      rp[j + un] = _int_mult_upper (up[un - 1], vl);
+    }
+
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < un + vn; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      for (i = 1; i < un + vn; i++)
+       {
+         r = rp[i];
+         c0 = (r < cy[i - 1]);
+         s0 = r + cyrec;
+         rp[i] = s0;
+         c1 = (r & ~s0) >> 63;
+         cyrec = c0 | c1;
+       }
+    }
+}
diff --git a/mpn/cray/ieee/sqr_basecase.c b/mpn/cray/ieee/sqr_basecase.c

new file mode 100644 (file)

index 0000000..92a9a0e
--- /dev/null
+++ b/mpn/cray/ieee/sqr_basecase.c
@@ -0,0 +1,95 @@
+/* Cray PVP/IEEE mpn_sqr_basecase.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This is just mpn_mul_basecase with trivial modifications.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_sqr_basecase (mp_ptr rp,
+                 mp_srcptr up, mp_size_t un)
+{
+  mp_limb_t cy[un + un];
+  mp_limb_t ul;
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i, j;
+  int more_carries;
+
+  for (i = 0; i < un + un; i++)
+    {
+      rp[i] = 0;
+      cy[i] = 0;
+    }
+
+#pragma _CRI novector
+  for (j = 0; j < un; j++)
+    {
+      ul = up[j];
+
+      a = up[0] * ul;
+      r = rp[j];
+      s0 = a + r;
+      rp[j] = s0;
+      c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
+      cy[j] += c0;
+
+#pragma _CRI ivdep
+      for (i = 1; i < un; i++)
+       {
+         a = up[i] * ul;
+         b = _int_mult_upper (up[i - 1], ul);
+         s0 = a + b;
+         c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+         r = rp[j + i];
+         s1 = s0 + r;
+         rp[j + i] = s1;
+         c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
+         cy[j + i] += c0 + c1;
+       }
+      rp[j + un] = _int_mult_upper (up[un - 1], ul);
+    }
+
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < un + un; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r + c0;
+      rp[i] = s0;
+      c0 = (r & ~s0) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      for (i = 1; i < un + un; i++)
+       {
+         r = rp[i];
+         c0 = (r < cy[i - 1]);
+         s0 = r + cyrec;
+         rp[i] = s0;
+         c1 = (r & ~s0) >> 63;
+         cyrec = c0 | c1;
+       }
+    }
+}
diff --git a/mpn/cray/ieee/submul_1.c b/mpn/cray/ieee/submul_1.c

new file mode 100644 (file)

index 0000000..4d7a6b4
--- /dev/null
+++ b/mpn/cray/ieee/submul_1.c
@@ -0,0 +1,101 @@
+/* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and
+   subtract the result from a second limb vector.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
+   mainly due to vector register shortage in the main loop.  Assembly code
+   should bring it down to perhaps 7 cycles/limb.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, s1, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  if (up == rp)
+    {
+      /* The algorithm used below cannot handle overlap.  Handle it here by
+        making a temporary copy of the source vector, then call ourselves.  */
+      mp_limb_t xp[n];
+      MPN_COPY (xp, up, n);
+      return mpn_submul_1 (rp, xp, n, vl);
+    }
+
+  a = up[0] * vl;
+  r = rp[0];
+  s0 = r - a;
+  rp[0] = s0;
+  c1 = ((s0 & a) | ((s0 | a) & ~r)) >> 63;
+  cy[0] = c1;
+
+  /* Main multiply loop.  Generate a raw accumulated output product in rp[]
+     and a carry vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      a = up[i] * vl;
+      b = _int_mult_upper (up[i - 1], vl);
+      s0 = a + b;
+      c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
+      r = rp[i];
+      s1 = r - s0;
+      rp[i] = s1;
+      c1 = ((s1 & s0) | ((s1 | s0) & ~r)) >> 63;
+      cy[i] = c0 + c1;
+    }
+  /* Carry subtract loop.  Subtract the carry vector cy[] from the raw result
+     rp[] and store the new result back to rp[].  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r - c0;
+      rp[i] = s0;
+      c0 = (s0 & ~r) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated carry, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] == ~0 and cy[k-1] == 1 or
+        rp[k] == ~1 and cy[k-1] == 2.
+        These are where we got a recurrency carry.  */
+      for (i = 1; i < n; i++)
+       {
+         r = rp[i];
+         c0 = ~r < cy[i - 1];
+         s0 = r - cyrec;
+         rp[i] = s0;
+         c1 = (s0 & ~r) >> 63;
+         cyrec = c0 | c1;
+       }
+      return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
+    }
+
+  return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
+}
diff --git a/mpn/cray/lshift.c b/mpn/cray/lshift.c

new file mode 100644 (file)

index 0000000..4827472
--- /dev/null
+++ b/mpn/cray/lshift.c
@@ -0,0 +1,48 @@
+/* mpn_lshift -- Shift left low level for Cray vector processors.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  unsigned sh_1, sh_2;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  sh_1 = cnt;
+  sh_2 = GMP_LIMB_BITS - sh_1;
+  retval = up[n - 1] >> sh_2;
+
+#pragma _CRI ivdep
+  for (i = n - 1; i > 0; i--)
+    {
+#if 1
+      wp[i] = (up[i] << sh_1) | (up[i - 1] >> sh_2);
+#else
+      /* This is the recommended way, but at least on SV1 it is slower.  */
+      wp[i] = _dshiftl (up[i], up[i - 1], sh_1);
+#endif
+    }
+
+  wp[0] = up[0] << sh_1;
+  return retval;
+}
diff --git a/mpn/cray/mulww.f b/mpn/cray/mulww.f

new file mode 100644 (file)

index 0000000..e0bf96e
--- /dev/null
+++ b/mpn/cray/mulww.f
@@ -0,0 +1,52 @@
+c     Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+c     Copyright 1996, 2000 Free Software Foundation, Inc.
+
+c     This file is part of the GNU MP Library.
+
+c     The GNU MP Library is free software; you can redistribute it and/or
+c     modify it under the terms of the GNU Lesser General Public License as
+c     published by the Free Software Foundation; either version 3 of the
+c     License, or (at your option) any later version.
+
+c     The GNU MP Library is distributed in the hope that it will be useful,
+c     but WITHOUT ANY WARRANTY; without even the implied warranty of
+c     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+c     Lesser General Public License for more details.
+
+c     You should have received a copy of the GNU Lesser General Public License
+c     along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+c     p1[] = hi(a[]*s); the upper limbs of each product
+c     p0[] = low(a[]*s); the corresponding lower limbs
+c     n is number of limbs in the vectors
+
+      subroutine gmpn_mulww(p1,p0,a,n,s)
+      integer*8 p1(0:*),p0(0:*),a(0:*),s
+      integer n
+
+      integer*8 a0,a1,a2,s0,s1,s2,c
+      integer*8 ai,t0,t1,t2,t3,t4
+
+      s0 = shiftl(and(s,4194303),24)
+      s1 = shiftl(and(shiftr(s,22),4194303),24)
+      s2 = shiftl(and(shiftr(s,44),4194303),24)
+
+      do i = 0,n-1
+         ai = a(i)
+         a0 = shiftl(and(ai,4194303),24)
+         a1 = shiftl(and(shiftr(ai,22),4194303),24)
+         a2 = shiftl(and(shiftr(ai,44),4194303),24)
+
+         t0 = i24mult(a0,s0)
+         t1 = i24mult(a0,s1)+i24mult(a1,s0)
+         t2 = i24mult(a0,s2)+i24mult(a1,s1)+i24mult(a2,s0)
+         t3 = i24mult(a1,s2)+i24mult(a2,s1)
+         t4 = i24mult(a2,s2)
+
+         p0(i)=shiftl(t2,44)+shiftl(t1,22)+t0
+         c=shiftr(shiftr(t0,22)+and(t1,4398046511103)+
+     $        shiftl(and(t2,1048575),22),42)
+         p1(i)=shiftl(t4,24)+shiftl(t3,2)+shiftr(t2,20)+shiftr(t1,42)+c
+      end do
+      end
diff --git a/mpn/cray/popcount.c b/mpn/cray/popcount.c

new file mode 100644 (file)

index 0000000..3abdce8
--- /dev/null
+++ b/mpn/cray/popcount.c
@@ -0,0 +1,32 @@
+/* Cray mpn_popcount -- population count.
+
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpn_popcount (mp_srcptr p, mp_size_t n)
+{
+  unsigned long int result = 0;
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    result += _popcnt (p[i]);
+  return result;
+}
diff --git a/mpn/cray/rshift.c b/mpn/cray/rshift.c

new file mode 100644 (file)

index 0000000..3630b62
--- /dev/null
+++ b/mpn/cray/rshift.c
@@ -0,0 +1,48 @@
+/* mpn_rshift -- Shift right low level for Cray vector processors.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <intrinsics.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_rshift (mp_ptr wp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  unsigned sh_1, sh_2;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  sh_1 = cnt;
+  sh_2 = GMP_LIMB_BITS - sh_1;
+  retval = up[0] << sh_2;
+
+#pragma _CRI ivdep
+  for (i = 0; i < n - 1; i++)
+    {
+#if 1
+      wp[i] = (up[i] >> sh_1) | (up[i + 1] << sh_2);
+#else
+      /* This is the recommended way, but at least on SV1 it is slower.  */
+      wp[i] = _dshiftr (up[i + 1], up[i], sh_1);
+#endif
+    }
+
+  wp[n - 1] = up[n - 1] >> sh_1;
+  return retval;
+}
diff --git a/mpn/cray/sub_n.c b/mpn/cray/sub_n.c

new file mode 100644 (file)

index 0000000..90a5f1b
--- /dev/null
+++ b/mpn/cray/sub_n.c
@@ -0,0 +1,80 @@
+/* Cray PVP mpn_sub_n -- subtract two limb vectors and store their difference
+   in a third limb vector.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This code runs at 4 cycles/limb.  It may be possible to bring it down
+   to 3 cycles/limb.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cy[n];
+  mp_limb_t a, b, r, s0, c0, c1;
+  mp_size_t i;
+  int more_carries;
+
+  /* Main subtract loop.  Generate a raw output difference in rp[] and a
+     borrow vector in cy[].  */
+#pragma _CRI ivdep
+  for (i = 0; i < n; i++)
+    {
+      a = up[i];
+      b = vp[i];
+      s0 = a - b;              /* a = s0 + b */
+      rp[i] = s0;
+      c0 = ((s0 & b) | ((s0 | b) & ~a)) >> 63;
+      cy[i] = c0;
+    }
+  /* Borrow subtract loop.  Subtract the borrow vector cy[] from the raw
+     difference rp[] and store the new difference back to rp[0].  If this
+     generates further borrow, set more_carries.  */
+  more_carries = 0;
+#pragma _CRI ivdep
+  for (i = 1; i < n; i++)
+    {
+      r = rp[i];
+      c0 = cy[i - 1];
+      s0 = r - c0;             /* r = s0 + c0 */
+      rp[i] = s0;
+      c0 = (s0 & ~r) >> 63;
+      more_carries += c0;
+    }
+  /* If that second loop generated borrow, handle that in scalar loop.  */
+  if (more_carries)
+    {
+      mp_limb_t cyrec = 0;
+      /* Look for places where rp[k] contains just ones and cy[k-1] is
+        non-zero.  These are where we got a recurrency borrow.  */
+      for (i = 1; i < n; i++)
+       {
+         r = rp[i];
+         c0 = (~r == 0 && cy[i - 1] != 0);
+         s0 = r - cyrec;
+         rp[i] = s0;
+         c1 = (s0 & ~r) >> 63;
+         cyrec = c0 | c1;
+       }
+      return cyrec | cy[n - 1];
+    }
+
+  return cy[n - 1];
+}
diff --git a/mpn/generic/add.c b/mpn/generic/add.c

new file mode 100644 (file)

index 0000000..8065ccf
--- /dev/null
+++ b/mpn/generic/add.c
@@ -0,0 +1,23 @@
+/* mpn_add - add mpn to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_add 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpn/generic/add_1.c b/mpn/generic/add_1.c

new file mode 100644 (file)

index 0000000..2d3fa76
--- /dev/null
+++ b/mpn/generic/add_1.c
@@ -0,0 +1,23 @@
+/* mpn_add_1 - add limb to mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_add_1 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpn/generic/add_n.c b/mpn/generic/add_n.c

new file mode 100644 (file)

index 0000000..47b6df6
--- /dev/null
+++ b/mpn/generic/add_n.c
@@ -0,0 +1,80 @@
+/* mpn_add_n -- Add equal length limb vectors.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      sl = ul + vl;
+      cy1 = sl < ul;
+      rl = sl + cy;
+      cy2 = rl < sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, rl, cy;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      rl = ul + vl + cy;
+      cy = rl >> GMP_NUMB_BITS;
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
diff --git a/mpn/generic/addmul_1.c b/mpn/generic/addmul_1.c

new file mode 100644 (file)

index 0000000..861e1bc
--- /dev/null
+++ b/mpn/generic/addmul_1.c
@@ -0,0 +1,129 @@
+/* mpn_addmul_1 -- multiply the N long limb vector pointed to by UP by VL,
+   add the N least significant limbs of the product to the limb vector
+   pointed to by RP.  Return the most significant limb of the product,
+   adjusted for carry-out from the addition.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl + lpl;
+      cl += lpl < rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, cl, xl, c1, c2, c3;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+      rl = *rp;
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      ADDC_LIMB (c1, xl, prev_hpl, lpl);
+      ADDC_LIMB (c2, xl, xl, rl);
+      ADDC_LIMB (c3, xl, xl, cl);
+      cl = c1 + c2 + c3;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl + cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+      rl = *rp;
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      xw = prev_hpl + lpl + rl + cl;
+      cl = xw >> GMP_NUMB_BITS;
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl + cl;
+}
+
+#endif
diff --git a/mpn/generic/addsub_n.c b/mpn/generic/addsub_n.c

new file mode 100644 (file)

index 0000000..21437c6
--- /dev/null
+++ b/mpn/generic/addsub_n.c
@@ -0,0 +1,162 @@
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192     /* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
+
+
+/* mpn_add_n_sub_n.
+   r1[] = s1[] + s2[]
+   r2[] = s1[] - s2[]
+   All operands have n limbs.
+   In-place operations allowed.  */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+  mp_limb_t acyn, acyo;                /* carry for add */
+  mp_limb_t scyn, scyo;                /* carry for subtract */
+  mp_size_t off;               /* offset in operands */
+  mp_size_t this_n;            /* size of current chunk */
+
+  /* We alternatingly add and subtract in chunks that fit into the (L1)
+     cache.  Since the chunks are several hundred limbs, the function call
+     overhead is insignificant, but we get much better locality.  */
+
+  /* We have three variant of the inner loop, the proper loop is chosen
+     depending on whether r1 or r2 are the same operand as s1 or s2.  */
+
+  if (r1p != s1p && r1p != s2p)
+    {
+      /* r1 is not identical to either input operand.  We can therefore write
+        to r1 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+       {
+         this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+         acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+         acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+         acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+       }
+    }
+  else if (r2p != s1p && r2p != s2p)
+    {
+      /* r2 is not identical to either input operand.  We can therefore write
+        to r2 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+       {
+         this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+         acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+         acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+         acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+       }
+    }
+  else
+    {
+      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+        Need temporary storage.  */
+      mp_limb_t tp[PART_SIZE];
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+       {
+         this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+         acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+         acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+         acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+         MPN_COPY (r1p + off, tp, this_n);
+       }
+    }
+
+  return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr r1p, r2p, s1p, s2p;
+  double t;
+  mp_size_t n;
+
+  n = strtol (argv[1], 0, 0);
+
+  r1p = malloc (n * BYTES_PER_MP_LIMB);
+  r2p = malloc (n * BYTES_PER_MP_LIMB);
+  s1p = malloc (n * BYTES_PER_MP_LIMB);
+  s2p = malloc (n * BYTES_PER_MP_LIMB);
+  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+  printf ("              separate add and sub: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  printf ("combined addsub separate variables: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r1 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r2 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  printf ("          combined addsub in-place: %.3f\n", t);
+
+  return 0;
+}
+#endif
diff --git a/mpn/generic/bdiv_dbm1c.c b/mpn/generic/bdiv_dbm1c.c

new file mode 100644 (file)

index 0000000..23cb6f1
--- /dev/null
+++ b/mpn/generic/bdiv_dbm1c.c
@@ -0,0 +1,48 @@
+/* mpn_bdiv_dbm1c -- divide an mpn number by a divisor of B-1, where B is the
+   limb base.  The dbm1c moniker means "Divisor of B Minus 1 with Carry".
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_bdiv_dbm1c (mp_ptr qp, mp_srcptr ap, mp_size_t n, mp_limb_t bd, mp_limb_t h)
+{
+  mp_limb_t a, p0, p1, cy;
+  mp_size_t i;
+
+  for (i = 0; i < n; i++)
+    {
+      a = ap[i];
+      umul_ppmm (p1, p0, a, bd << GMP_NAIL_BITS);
+      p0 >>= GMP_NAIL_BITS;
+      cy = h < p0;
+      h = (h - p0) & GMP_NUMB_MASK;
+      qp[i] = h;
+      h = h - p1 - cy;
+    }
+
+  return h;
+}
diff --git a/mpn/generic/bdiv_q.c b/mpn/generic/bdiv_q.c

new file mode 100644 (file)

index 0000000..7cb62e8
--- /dev/null
+++ b/mpn/generic/bdiv_q.c
@@ -0,0 +1,66 @@
+/* mpn_bdiv_q -- Hensel division with precomputed inverse, returning quotient.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n. */
+
+void
+mpn_bdiv_q (mp_ptr qp,
+           mp_srcptr np, mp_size_t nn,
+           mp_srcptr dp, mp_size_t dn,
+           mp_ptr tp)
+{
+  mp_limb_t di;
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
+    }
+  else
+    {
+      mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
+    }
+  return;
+}
+
+mp_size_t
+mpn_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+    return nn;
+  else
+    return mpn_mu_bdiv_q_itch (nn, dn);
+}
diff --git a/mpn/generic/bdiv_q_1.c b/mpn/generic/bdiv_q_1.c

new file mode 100644 (file)

index 0000000..727f9f0
--- /dev/null
+++ b/mpn/generic/bdiv_q_1.c
@@ -0,0 +1,115 @@
+/* mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 1-limb
+   divisor, returning quotient only.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_pi1_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d,
+                 mp_limb_t di, int shift)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, u, u_next, dummy;
+
+  ASSERT (n >= 1);
+  ASSERT (d != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (d);
+
+  d <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+
+      u = up[0];
+      rp--;
+      for (i = 1; i < n; i++)
+       {
+         u_next = up[i];
+         u = ((u >> shift) | (u_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+
+         SUBC_LIMB (c, l, u, c);
+
+         l = (l * di) & GMP_NUMB_MASK;
+         rp[i] = l;
+
+         umul_ppmm (h, dummy, l, d);
+         c += h;
+         u = u_next;
+       }
+
+      u = u >> shift;
+      l = u - c;
+      l = (l * di) & GMP_NUMB_MASK;
+      rp[i] = l;
+    }
+  else
+    {
+      u = up[0];
+      l = (u * di) & GMP_NUMB_MASK;
+      rp[0] = l;
+      c = 0;
+
+      for (i = 1; i < n; i++)
+       {
+         umul_ppmm (h, dummy, l, d);
+         c += h;
+
+         u = up[i];
+         SUBC_LIMB (c, l, u, c);
+
+         l = (l * di) & GMP_NUMB_MASK;
+         rp[i] = l;
+       }
+    }
+
+  return c;
+}
+
+mp_limb_t
+mpn_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d)
+{
+  mp_limb_t di;
+  int shift;
+
+  ASSERT (n >= 1);
+  ASSERT (d != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (d);
+
+  if ((d & 1) == 0)
+    {
+      count_trailing_zeros (shift, d);
+      d >>= shift;
+    }
+  else
+    shift = 0;
+
+  binvert_limb (di, d);
+  return mpn_pi1_bdiv_q_1 (rp, up, n, d, di, shift);
+}
diff --git a/mpn/generic/bdiv_qr.c b/mpn/generic/bdiv_qr.c

new file mode 100644 (file)

index 0000000..6fc61b7
--- /dev/null
+++ b/mpn/generic/bdiv_qr.c
@@ -0,0 +1,73 @@
+/* mpn_bdiv_qr -- Hensel division with precomputed inverse, returning quotient
+   and remainder.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n,
+           R = N - QD.  */
+
+mp_limb_t
+mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
+            mp_srcptr np, mp_size_t nn,
+            mp_srcptr dp, mp_size_t dn,
+            mp_ptr tp)
+{
+  mp_limb_t di;
+  mp_limb_t rh;
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+      BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+      MPN_COPY (rp, tp + nn - dn, dn);
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    {
+      MPN_COPY (tp, np, nn);
+      binvert_limb (di, dp[0]);  di = -di;
+      rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+      MPN_COPY (rp, tp + nn - dn, dn);
+    }
+  else
+    {
+      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
+    }
+
+  return rh;
+}
+
+mp_size_t
+mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+  if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    return nn;
+  else
+    return  mpn_mu_bdiv_qr_itch (nn, dn);
+}
diff --git a/mpn/generic/binvert.c b/mpn/generic/binvert.c

new file mode 100644 (file)

index 0000000..f06030c
--- /dev/null
+++ b/mpn/generic/binvert.c
@@ -0,0 +1,99 @@
+/* Compute {up,n}^(-1) mod B^n.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/*
+  r[k+1] = r[k] - r[k] * (u*r[k] - 1)
+  r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
+*/
+
+/* This is intended for constant THRESHOLDs only, where the compiler can
+   completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
+#endif
+
+mp_size_t
+mpn_binvert_itch (mp_size_t n)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
+  return itch_local + itch_out;
+}
+
+void
+mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr xp;
+  mp_size_t rn, newrn;
+  mp_size_t sizes[NPOWS], *sizp;
+  mp_limb_t di;
+
+  /* Compute the computation precisions from highest to lowest, leaving the
+     base case size in 'rn'.  */
+  sizp = sizes;
+  for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
+    *sizp++ = rn;
+
+  xp = scratch;
+
+  /* Compute a base value of rn limbs.  */
+  MPN_ZERO (xp, rn);
+  xp[0] = 1;
+  binvert_limb (di, up[0]);
+  if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
+    mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+  else
+    mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+
+  /* Use Newton iterations to get the desired precision.  */
+  for (; rn < n; rn = newrn)
+    {
+      mp_size_t m;
+      newrn = *--sizp;
+
+      /* X <- UR. */
+      m = mpn_mulmod_bnm1_next_size (newrn);
+      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
+      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);
+
+      /* R = R(X/B^rn) */
+      mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
+      mpn_neg (rp + rn, rp + rn, newrn - rn);
+    }
+}
diff --git a/mpn/generic/cmp.c b/mpn/generic/cmp.c

new file mode 100644 (file)

index 0000000..d352076
--- /dev/null
+++ b/mpn/generic/cmp.c
@@ -0,0 +1,23 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_cmp 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpn/generic/com.c b/mpn/generic/com.c

new file mode 100644 (file)

index 0000000..ed817e6
--- /dev/null
+++ b/mpn/generic/com.c
@@ -0,0 +1,34 @@
+/* mpn_com - complement an mpn.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef mpn_com
+#define mpn_com __MPN(com)
+
+void
+mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_limb_t ul;
+  do {
+      ul = *up++;
+      *rp++ = ~ul & GMP_NUMB_MASK;
+  } while (--n != 0);
+}
diff --git a/mpn/generic/copyd.c b/mpn/generic/copyd.c

new file mode 100644 (file)

index 0000000..2a08ef4
--- /dev/null
+++ b/mpn/generic/copyd.c
@@ -0,0 +1,30 @@
+/* mpn_copyd
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_copyd (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  for (i = n - 1; i >= 0; i--)
+    rp[i] = up[i];
+}
diff --git a/mpn/generic/copyi.c b/mpn/generic/copyi.c

new file mode 100644 (file)

index 0000000..c0a047b
--- /dev/null
+++ b/mpn/generic/copyi.c
@@ -0,0 +1,32 @@
+/* mpn_copyi
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_copyi (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  up += n;
+  rp += n;
+  for (i = -n; i != 0; i++)
+    rp[i] = up[i];
+}
diff --git a/mpn/generic/dcpi1_bdiv_q.c b/mpn/generic/dcpi1_bdiv_q.c

new file mode 100644 (file)

index 0000000..2bc85ef
--- /dev/null
+++ b/mpn/generic/dcpi1_bdiv_q.c
@@ -0,0 +1,149 @@
+/* mpn_dcpi1_bdiv_q -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient.
+
+   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_size_t
+mpn_dcpi1_bdiv_q_n_itch (mp_size_t n)
+{
+  /* NOTE: Depends on mullo_n interface */
+  return n;
+}
+
+/* Computes Q = N / D mod B^n, destroys N.
+
+   N = {np,n}
+   D = {dp,n}
+*/
+
+void
+mpn_dcpi1_bdiv_q_n (mp_ptr qp,
+                   mp_ptr np, mp_srcptr dp, mp_size_t n,
+                   mp_limb_t dinv, mp_ptr tp)
+{
+  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
+    {
+      mp_size_t lo, hi;
+      mp_limb_t cy;
+
+      lo = n >> 1;                     /* floor(n/2) */
+      hi = n - lo;                     /* ceil(n/2) */
+
+      cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+      mpn_mullo_n (tp, qp, dp + hi, lo);
+      mpn_sub_n (np + hi, np + hi, tp, lo);
+
+      if (lo < hi)
+       {
+         cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
+         np[n - 1] -= cy;
+       }
+      qp += lo;
+      np += lo;
+      n -= lo;
+    }
+  mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
+}
+
+/* Computes Q = N / D mod B^nn, destroys N.
+
+   N = {np,nn}
+   D = {dp,dn}
+*/
+
+void
+mpn_dcpi1_bdiv_q (mp_ptr qp,
+                 mp_ptr np, mp_size_t nn,
+                 mp_srcptr dp, mp_size_t dn,
+                 mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 2);
+  ASSERT (nn - dn >= 0);
+  ASSERT (dp[0] & 1);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn in a super-efficient manner.  */
+      do
+       qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+       cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+       cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      if (qn != dn)
+       {
+         if (qn > dn - qn)
+           mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+         else
+           mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+         mpn_incr_u (tp + qn, cy);
+
+         mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
+         cy = 0;
+       }
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - qn;
+      while (qn > dn)
+       {
+         mpn_sub_1 (np + dn, np + dn, qn - dn, cy);
+         cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+         qp += dn;
+         np += dn;
+         qn -= dn;
+       }
+      mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
+       mpn_sbpi1_bdiv_q (qp, np, qn, dp, qn, dinv);
+      else
+       mpn_dcpi1_bdiv_q_n (qp, np, dp, qn, dinv, tp);
+    }
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/dcpi1_bdiv_qr.c b/mpn/generic/dcpi1_bdiv_qr.c

new file mode 100644 (file)

index 0000000..28cc82e
--- /dev/null
+++ b/mpn/generic/dcpi1_bdiv_qr.c
@@ -0,0 +1,166 @@
+/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient and remainder.
+
+   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
+
+   Output:
+
+      q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
+
+      r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
+
+   Stores q at qp. Stores the n least significant limbs of r at the high half
+   of np, and returns the borrow from the subtraction n - q*d.
+
+   d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
+
+mp_size_t
+mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
+{
+  return n;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+                    mp_limb_t dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy;
+  mp_limb_t rh;
+
+  lo = n >> 1;                 /* floor(n/2) */
+  hi = n - lo;                 /* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+
+  mpn_mul (tp, dp + lo, hi, qp, lo);
+
+  mpn_incr_u (tp + lo, cy);
+  rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
+
+  if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp + hi, lo);
+
+  mpn_incr_u (tp + hi, cy);
+  rh += mpn_sub_n (np + n, np + n, tp, n);
+
+  return rh;
+}
+
+mp_limb_t
+mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
+                  mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t rr, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 2);            /* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 1);       /* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (dp[0] & 1);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+       qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+       cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+       cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      rr = 0;
+      if (qn != dn)
+       {
+         if (qn > dn - qn)
+           mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+         else
+           mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+         mpn_incr_u (tp + qn, cy);
+
+         rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
+         cy = 0;
+       }
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - dn - qn;
+      do
+       {
+         rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
+         cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+         qp += dn;
+         np += dn;
+         qn -= dn;
+       }
+      while (qn > 0);
+      TMP_FREE;
+      return rr + cy;
+    }
+
+  if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+  else
+    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+  rr = 0;
+  if (qn != dn)
+    {
+      if (qn > dn - qn)
+       mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+      else
+       mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+      mpn_incr_u (tp + qn, cy);
+
+      rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
+      cy = 0;
+    }
+
+  TMP_FREE;
+  return rr + cy;
+}
diff --git a/mpn/generic/dcpi1_div_q.c b/mpn/generic/dcpi1_div_q.c

new file mode 100644 (file)

index 0000000..9e5cea5
--- /dev/null
+++ b/mpn/generic/dcpi1_div_q.c
@@ -0,0 +1,76 @@
+/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient
+   only.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+                mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+  mp_ptr tp, wp;
+  mp_limb_t qh;
+  mp_size_t qn;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);
+  ASSERT (nn - dn >= 3);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  tp = TMP_SALLOC_LIMBS (nn + 1);
+  MPN_COPY (tp + 1, np, nn);
+  tp[0] = 0;
+
+  qn = nn - dn;
+  wp = TMP_SALLOC_LIMBS (qn + 1);
+
+  qh = mpn_dcpi1_divappr_q (wp, tp, nn + 1, dp, dn, dinv);
+
+  if (wp[0] == 0)
+    {
+      mp_limb_t cy;
+
+      if (qn > dn)
+       mpn_mul (tp, wp + 1, qn, dp, dn);
+      else
+       mpn_mul (tp, dp, dn, wp + 1, qn);
+
+      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;
+
+      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+       qh -= mpn_sub_1 (qp, wp + 1, qn, 1);
+      else /* Same as below */
+       MPN_COPY (qp, wp + 1, qn);
+    }
+  else
+    MPN_COPY (qp, wp + 1, qn);
+
+  TMP_FREE;
+  return qh;
+}
diff --git a/mpn/generic/dcpi1_div_qr.c b/mpn/generic/dcpi1_div_qr.c

new file mode 100644 (file)

index 0000000..815173e
--- /dev/null
+++ b/mpn/generic/dcpi1_div_qr.c
@@ -0,0 +1,238 @@
+/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
+   size operands.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+                   gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;                 /* floor(n/2) */
+  hi = n - lo;                 /* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+  else
+    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
+    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+  else
+    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+  mpn_mul (tp, dp, hi, qp, lo);
+
+  cy = mpn_sub_n (np, np, tp, n);
+  if (ql != 0)
+    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
+
+  while (cy != 0)
+    {
+      mpn_sub_1 (qp, qp, lo, 1);
+      cy -= mpn_add_n (np, np, dp, n);
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_div_qr (mp_ptr qp,
+                 mp_ptr np, mp_size_t nn,
+                 mp_srcptr dp, mp_size_t dn,
+                 gmp_pi1_t *dinv)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);            /* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 3);       /* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+       qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;                        /* point at low limb of next quotient block */
+      np -= qn;                        /* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (qn == 1)
+       {
+         mp_limb_t q, n2, n1, n0, d1, d0;
+
+         /* Handle qh up front, for simplicity. */
+         qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+         if (qh)
+           ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+         /* A single iteration of schoolbook: One 3/2 division,
+            followed by the bignum update and adjustment. */
+         n2 = np[0];
+         n1 = np[-1];
+         n0 = np[-2];
+         d1 = dp[-1];
+         d0 = dp[-2];
+
+         ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+         if (UNLIKELY (n2 == d1) && n1 == d0)
+           {
+             q = GMP_NUMB_MASK;
+             cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+             ASSERT (cy == n2);
+           }
+         else
+           {
+             udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+             if (dn > 2)
+               {
+                 mp_limb_t cy, cy1;
+                 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+                 cy1 = n0 < cy;
+                 n0 = (n0 - cy) & GMP_NUMB_MASK;
+                 cy = n1 < cy1;
+                 n1 = (n1 - cy1) & GMP_NUMB_MASK;
+                 np[-2] = n0;
+
+                 if (UNLIKELY (cy != 0))
+                   {
+                     n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+                     qh -= (q == 0);
+                     q = (q - 1) & GMP_NUMB_MASK;
+                   }
+               }
+             else
+               np[-2] = n0;
+
+             np[-1] = n1;
+           }
+         qp[0] = q;
+       }
+      else
+       {
+         /* Do a 2qn / qn division */
+         if (qn == 2)
+           qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
+         else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+           qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+         else
+           qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+         if (qn != dn)
+           {
+             if (qn > dn - qn)
+               mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+             else
+               mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+             cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+             if (qh != 0)
+               cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+             while (cy != 0)
+               {
+                 qh -= mpn_sub_1 (qp, qp, qn, 1);
+                 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+               }
+           }
+       }
+
+      qn = nn - dn - qn;
+      do
+       {
+         qp -= dn;
+         np -= dn;
+         mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+         qn -= dn;
+       }
+      while (qn > 0);
+    }
+  else
+    {
+      qp -= qn;                        /* point at low limb of next quotient block */
+      np -= qn;                        /* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+       qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+      else
+       qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+      if (qn != dn)
+       {
+         if (qn > dn - qn)
+           mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+         else
+           mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+         cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+         if (qh != 0)
+           cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+         while (cy != 0)
+           {
+             qh -= mpn_sub_1 (qp, qp, qn, 1);
+             cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+           }
+       }
+    }
+
+  TMP_FREE;
+  return qh;
+}
diff --git a/mpn/generic/dcpi1_divappr_q.c b/mpn/generic/dcpi1_divappr_q.c

new file mode 100644 (file)

index 0000000..a0f79ed
--- /dev/null
+++ b/mpn/generic/dcpi1_divappr_q.c
@@ -0,0 +1,246 @@
+/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
+   quotient.  The quotient returned is either correct, or one too large.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+                      gmp_pi1_t *dinv, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;                 /* floor(n/2) */
+  hi = n - lo;                 /* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
+  else
+    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
+    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
+  else
+    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
+
+  if (UNLIKELY (ql != 0))
+    {
+      mp_size_t i;
+      for (i = 0; i < lo; i++)
+       qp[i] = GMP_NUMB_MASK;
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
+                    mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy, qsave;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  ASSERT (dn >= 6);
+  ASSERT (nn > dn);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn >= dn)
+    {
+      qn++;                    /* pretend we'll need an extra limb */
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+       qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;                        /* point at low limb of next quotient block */
+      np -= qn;                        /* point in the middle of partial remainder */
+
+      tp = TMP_SALLOC_LIMBS (dn);
+
+      /* Perform the typically smaller block first.  */
+      if (qn == 1)
+       {
+         mp_limb_t q, n2, n1, n0, d1, d0;
+
+         /* Handle qh up front, for simplicity. */
+         qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+         if (qh)
+           ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+         /* A single iteration of schoolbook: One 3/2 division,
+            followed by the bignum update and adjustment. */
+         n2 = np[0];
+         n1 = np[-1];
+         n0 = np[-2];
+         d1 = dp[-1];
+         d0 = dp[-2];
+
+         ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+         if (UNLIKELY (n2 == d1) && n1 == d0)
+           {
+             q = GMP_NUMB_MASK;
+             cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+             ASSERT (cy == n2);
+           }
+         else
+           {
+             udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+             if (dn > 2)
+               {
+                 mp_limb_t cy, cy1;
+                 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+                 cy1 = n0 < cy;
+                 n0 = (n0 - cy) & GMP_NUMB_MASK;
+                 cy = n1 < cy1;
+                 n1 = (n1 - cy1) & GMP_NUMB_MASK;
+                 np[-2] = n0;
+
+                 if (UNLIKELY (cy != 0))
+                   {
+                     n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+                     qh -= (q == 0);
+                     q = (q - 1) & GMP_NUMB_MASK;
+                   }
+               }
+             else
+               np[-2] = n0;
+
+             np[-1] = n1;
+           }
+         qp[0] = q;
+       }
+      else
+       {
+         if (qn == 2)
+           qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
+         else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+           qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+         else
+           qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+
+         if (qn != dn)
+           {
+             if (qn > dn - qn)
+               mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+             else
+               mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+             cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+             if (qh != 0)
+               cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+             while (cy != 0)
+               {
+                 qh -= mpn_sub_1 (qp, qp, qn, 1);
+                 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+               }
+           }
+       }
+      qn = nn - dn - qn + 1;
+      while (qn > dn)
+       {
+         qp -= dn;
+         np -= dn;
+         mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
+         qn -= dn;
+       }
+
+      /* Since we pretended we'd need an extra quotient limb before, we now
+        have made sure the code above left just dn-1=qn quotient limbs to
+        develop.  Develop that plus a guard limb. */
+      qn--;
+      qp -= qn;
+      np -= dn;
+      qsave = qp[qn];
+      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
+      MPN_COPY_INCR (qp, qp + 1, qn);
+      qp[qn] = qsave;
+    }
+  else    /* (qn < dn) */
+    {
+      mp_ptr q2p;
+#if 0                          /* not possible since we demand nn > dn */
+      if (qn == 0)
+       {
+         qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+         if (qh)
+           mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+         TMP_FREE;
+         return qh;
+       }
+#endif
+
+      qp -= qn;                        /* point at low limb of next quotient block */
+      np -= qn;                        /* point in the middle of partial remainder */
+
+      q2p = TMP_SALLOC_LIMBS (qn + 1);
+      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
+        callers not to be silly?  */
+      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
+       {
+         qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
+                                   dp - (qn + 1), qn + 1, dinv->inv32);
+       }
+      else
+       {
+         /* It is tempting to use qp for recursive scratch and put quotient in
+            tp, but the recursive scratch needs one limb too many.  */
+         tp = TMP_SALLOC_LIMBS (qn + 1);
+         qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
+       }
+      MPN_COPY (qp, q2p + 1, qn);
+    }
+
+  TMP_FREE;
+  return qh;
+}
diff --git a/mpn/generic/div_q.c b/mpn/generic/div_q.c

new file mode 100644 (file)

index 0000000..b2a0fff
--- /dev/null
+++ b/mpn/generic/div_q.c
@@ -0,0 +1,312 @@
+/* mpn_div_q -- division for arbitrary size operands.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Compute Q = N/D with truncation.
+     N = {np,nn}
+     D = {dp,dn}
+     Q = {qp,nn-dn+1}
+     T = {scratch,nn+1} is scratch space
+   N and D are both untouched by the computation.
+   N and T may overlap; pass the same space if N is irrelevant after the call,
+   but note that tp needs an extra limb.
+
+   Operand requirements:
+     N >= D > 0
+     dp[dn-1] != 0
+     No overlap between the N, D, and Q areas.
+
+   This division function does not clobber its input operands, since it is
+   intended to support average-O(qn) division, and for that to be effective, it
+   cannot put requirements on callers to copy a O(nn) operand.
+
+   If a caller does not care about the value of {np,nn+1} after calling this
+   function, it should pass np also for the scratch argument.  This function
+   will then save some time and space by avoiding allocation and copying.
+   (FIXME: Is this a good design?  We only really save any copying for
+   already-normalised divisors, which should be rare.  It also prevents us from
+   reasonably asking for all scratch space we need.)
+
+   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
+   the most significant quotient limb?  Look at the 4 main code blocks below
+   (consisting of an outer if-else where each arm contains an if-else). It is
+   tricky for the first code block, since the mpn_*_div_q calls will typically
+   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
+   we generate the most significant quotient limb here, before calling
+   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
+   critical division case (the SB sub-case in particular) copying is not a good
+   idea.
+
+   It might make sense to split the if-else parts of the (qn + FUDGE
+   >= dn) blocks into separate functions, since we could promise quite
+   different things to callers in these two cases.  The 'then' case
+   benefits from np=scratch, and it could perhaps even tolerate qp=np,
+   saving some headache for many callers.
+
+   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
+   operands, we do not reuse the huge scratch for adjustments.  This can be a
+   serious waste of memory for the largest operands.
+*/
+
+/* FUDGE determines when to try getting an approximate quotient from the upper
+   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
+   for the code to be correct.  */
+#define FUDGE 5                        /* FIXME: tune this */
+
+#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
+#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
+#ifndef MUPI_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
+#endif
+
+void
+mpn_div_q (mp_ptr qp,
+          mp_srcptr np, mp_size_t nn,
+          mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+  mp_ptr new_dp, new_np, tp, rp;
+  mp_limb_t cy, dh, qh;
+  mp_size_t new_nn, qn;
+  gmp_pi1_t dinv;
+  int cnt;
+  TMP_DECL;
+  TMP_MARK;
+
+  ASSERT (nn >= dn);
+  ASSERT (dn > 0);
+  ASSERT (dp[dn - 1] != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
+
+  ASSERT_ALWAYS (FUDGE >= 2);
+
+  if (dn == 1)
+    {
+      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
+      return;
+    }
+
+  qn = nn - dn + 1;            /* Quotient size, high limb might be zero */
+
+  if (qn + FUDGE >= dn)
+    {
+      /* |________________________|
+                          |_______|  */
+      new_np = scratch;
+
+      dh = dp[dn - 1];
+      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+       {
+         count_leading_zeros (cnt, dh);
+
+         cy = mpn_lshift (new_np, np, nn, cnt);
+         new_np[nn] = cy;
+         new_nn = nn + (cy != 0);
+
+         new_dp = TMP_ALLOC_LIMBS (dn);
+         mpn_lshift (new_dp, dp, dn, cnt);
+
+         if (dn == 2)
+           {
+             qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
+           }
+         else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+                  BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
+           {
+             invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+             qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
+           }
+         else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
+                  BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+                  (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+                  + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
+           {
+             invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+             qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
+           }
+         else
+           {
+             mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
+             mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+             qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
+           }
+         if (cy == 0)
+           qp[qn - 1] = qh;
+         else if (UNLIKELY (qh != 0))
+           {
+             /* This happens only when the quotient is close to B^n and
+                mpn_*_divappr_q returned B^n.  */
+             mp_size_t i, n;
+             n = new_nn - dn;
+             for (i = 0; i < n; i++)
+               qp[i] = GMP_NUMB_MAX;
+             qh = 0;           /* currently ignored */
+           }
+       }
+      else  /* divisor is already normalised */
+       {
+         if (new_np != np)
+           MPN_COPY (new_np, np, nn);
+
+         if (dn == 2)
+           {
+             qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
+           }
+         else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
+                  BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
+           {
+             invert_pi1 (dinv, dh, dp[dn - 2]);
+             qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
+           }
+         else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
+                  BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+                  (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+                  + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
+           {
+             invert_pi1 (dinv, dh, dp[dn - 2]);
+             qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
+           }
+         else
+           {
+             mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
+             mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+             qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+           }
+         qp[nn - dn] = qh;
+       }
+    }
+  else
+    {
+      /* |________________________|
+                |_________________|  */
+      tp = TMP_ALLOC_LIMBS (qn + 1);
+
+      new_np = scratch;
+      new_nn = 2 * qn + 1;
+      if (new_np == np)
+       /* We need {np,nn} to remain untouched until the final adjustment, so
+          we need to allocate separate space for new_np.  */
+       new_np = TMP_ALLOC_LIMBS (new_nn + 1);
+
+
+      dh = dp[dn - 1];
+      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+       {
+         count_leading_zeros (cnt, dh);
+
+         cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
+         new_np[new_nn] = cy;
+
+         new_nn += (cy != 0);
+
+         new_dp = TMP_ALLOC_LIMBS (qn + 1);
+         mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
+         new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
+
+         if (qn + 1 == 2)
+           {
+             qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+           }
+         else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+           {
+             invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+             qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+           }
+         else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+           {
+             invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+             qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+           }
+         else
+           {
+             mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+             mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+             qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+           }
+         if (cy == 0)
+           tp[qn] = qh;
+         else if (UNLIKELY (qh != 0))
+           {
+             /* This happens only when the quotient is close to B^n and
+                mpn_*_divappr_q returned B^n.  */
+             mp_size_t i, n;
+             n = new_nn - (qn + 1);
+             for (i = 0; i < n; i++)
+               tp[i] = GMP_NUMB_MAX;
+             qh = 0;           /* currently ignored */
+           }
+       }
+      else  /* divisor is already normalised */
+       {
+         MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
+
+         new_dp = (mp_ptr) dp + dn - (qn + 1);
+
+         if (qn == 2 - 1)
+           {
+             qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+           }
+         else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+           {
+             invert_pi1 (dinv, dh, new_dp[qn - 1]);
+             qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+           }
+         else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+           {
+             invert_pi1 (dinv, dh, new_dp[qn - 1]);
+             qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+           }
+         else
+           {
+             mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+             mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+             qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+           }
+         tp[qn] = qh;
+       }
+
+      MPN_COPY (qp, tp + 1, qn);
+      if (tp[0] <= 4)
+        {
+         mp_size_t rn;
+
+          rp = TMP_ALLOC_LIMBS (dn + qn);
+          mpn_mul (rp, dp, dn, tp + 1, qn);
+         rn = dn + qn;
+         rn -= rp[rn - 1] == 0;
+
+          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
+            mpn_decr_u (qp, 1);
+        }
+    }
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/dive_1.c b/mpn/generic/dive_1.c

new file mode 100644 (file)

index 0000000..f246b09
--- /dev/null
+++ b/mpn/generic/dive_1.c
@@ -0,0 +1,148 @@
+/* mpn_divexact_1 -- mpn by limb exact division.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
+   q will only be correct if d divides a exactly.
+
+   A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
+   give zero on all CPUs (for instance it doesn't on the x86s).  This
+   separate loop might run faster too, helping odd divisors.
+
+   Possibilities:
+
+   mpn_divexact_1c could be created, accepting and returning c.  This would
+   let a long calculation be done piece by piece.  Currently there's no
+   particular need for that, and not returning c means that a final umul can
+   be skipped.
+
+   Another use for returning c would be letting the caller know whether the
+   division was in fact exact.  It would work just to return the carry bit
+   "c=(l>s)" and let the caller do a final umul if interested.
+
+   When the divisor is even, the factors of two could be handled with a
+   separate mpn_rshift, instead of shifting on the fly.  That might be
+   faster on some CPUs and would mean just the shift==0 style loop would be
+   needed.
+
+   If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
+   shift==0 loop is unnecessary, and could be eliminated if there's no great
+   speed difference.
+
+   It's not clear whether "/" is the best way to handle size==1.  Alpha gcc
+   2.95 for instance has a poor "/" and might prefer the modular method.
+   Perhaps a tuned parameter should control this.
+
+   If src[size-1] < divisor then dst[size-1] will be zero, and one divide
+   step could be skipped.  A test at last step for s<divisor (or ls in the
+   even case) might be a good way to do that.  But if this code is often
+   used with small divisors then it might not be worth bothering  */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  mp_size_t  i;
+  mp_limb_t  c, h, l, ls, s, s_next, inverse, dummy;
+  unsigned   shift;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (divisor);
+
+  s = src[0];
+
+  if (size == 1)
+    {
+      dst[0] = s / divisor;
+      return;
+    }
+
+  if ((divisor & 1) == 0)
+    {
+      count_trailing_zeros (shift, divisor);
+      divisor >>= shift;
+    }
+  else
+    shift = 0;
+
+  binvert_limb (inverse, divisor);
+  divisor <<= GMP_NAIL_BITS;
+
+  if (shift != 0)
+    {
+      c = 0;
+      i = 0;
+      size--;
+
+      do
+       {
+         s_next = src[i+1];
+         ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+         s = s_next;
+
+         SUBC_LIMB (c, l, ls, c);
+
+         l = (l * inverse) & GMP_NUMB_MASK;
+         dst[i] = l;
+
+         umul_ppmm (h, dummy, l, divisor);
+         c += h;
+
+         i++;
+       }
+      while (i < size);
+
+      ls = s >> shift;
+      l = ls - c;
+      l = (l * inverse) & GMP_NUMB_MASK;
+      dst[i] = l;
+    }
+  else
+    {
+      l = (s * inverse) & GMP_NUMB_MASK;
+      dst[0] = l;
+      i = 1;
+      c = 0;
+
+      do
+       {
+         umul_ppmm (h, dummy, l, divisor);
+         c += h;
+
+         s = src[i];
+         SUBC_LIMB (c, l, s, c);
+
+         l = (l * inverse) & GMP_NUMB_MASK;
+         dst[i] = l;
+         i++;
+       }
+      while (i < size);
+    }
+}
diff --git a/mpn/generic/diveby3.c b/mpn/generic/diveby3.c

new file mode 100644 (file)

index 0000000..6293f65
--- /dev/null
+++ b/mpn/generic/diveby3.c
@@ -0,0 +1,163 @@
+/* mpn_divexact_by3c -- mpn exact division by 3.
+
+Copyright 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if DIVEXACT_BY3_METHOD == 0
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_limb_t c)
+{
+  mp_limb_t r;
+  r = mpn_bdiv_dbm1c (rp, up, un, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * c);
+
+  /* Possible bdiv_dbm1 return values are C * (GMP_NUMB_MASK / 3), 0 <= C < 3.
+     We want to return C.  We compute the remainder mod 4 and notice that the
+     inverse of (2^(2k)-1)/3 mod 4 is 1.  */
+  return r & 3;
+}
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 1
+
+/* The algorithm here is basically the same as mpn_divexact_1, as described
+   in the manual.  Namely at each step q = (src[i]-c)*inverse, and new c =
+   borrow(src[i]-c) + high(divisor*q).  But because the divisor is just 3,
+   high(divisor*q) can be determined with two comparisons instead of a
+   multiply.
+
+   The "c += ..."s add the high limb of 3*l to c.  That high limb will be 0,
+   1 or 2.  Doing two separate "+="s seems to give better code on gcc (as of
+   2.95.2 at least).
+
+   It will be noted that the new c is formed by adding three values each 0
+   or 1.  But the total is only 0, 1 or 2.  When the subtraction src[i]-c
+   causes a borrow, that leaves a limb value of either 0xFF...FF or
+   0xFF...FE.  The multiply by MODLIMB_INVERSE_3 gives 0x55...55 or
+   0xAA...AA respectively, and in those cases high(3*q) is only 0 or 1
+   respectively, hence a total of no more than 2.
+
+   Alternatives:
+
+   This implementation has each multiply on the dependent chain, due to
+   "l=s-c".  See below for alternative code which avoids that.  */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t c)
+{
+  mp_limb_t  l, q, s;
+  mp_size_t  i;
+
+  ASSERT (un >= 1);
+  ASSERT (c == 0 || c == 1 || c == 2);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+  i = 0;
+  do
+    {
+      s = up[i];
+      SUBC_LIMB (c, l, s, c);
+
+      q = (l * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+      rp[i] = q;
+
+      c += (q >= GMP_NUMB_CEIL_MAX_DIV3);
+      c += (q >= GMP_NUMB_CEIL_2MAX_DIV3);
+    }
+  while (++i < un);
+
+  ASSERT (c == 0 || c == 1 || c == 2);
+  return c;
+}
+
+
+#endif
+
+#if DIVEXACT_BY3_METHOD == 2
+
+/* The following alternative code re-arranges the quotient calculation from
+   (src[i]-c)*inverse to instead
+
+       q = src[i]*inverse - c*inverse
+
+   thereby allowing src[i]*inverse to be scheduled back as far as desired,
+   making full use of multiplier throughput and leaving just some carry
+   handing on the dependent chain.
+
+   The carry handling consists of determining the c for the next iteration.
+   This is the same as described above, namely look for any borrow from
+   src[i]-c, and at the high of 3*q.
+
+   high(3*q) is done with two comparisons as above (in c2 and c3).  The
+   borrow from src[i]-c is incorporated into those by noting that if there's
+   a carry then then we have src[i]-c == 0xFF..FF or 0xFF..FE, in turn
+   giving q = 0x55..55 or 0xAA..AA.  Adding 1 to either of those q values is
+   enough to make high(3*q) come out 1 bigger, as required.
+
+   l = -c*inverse is calculated at the same time as c, since for most chips
+   it can be more conveniently derived from separate c1/c2/c3 values than
+   from a combined c equal to 0, 1 or 2.
+
+   The net effect is that with good pipelining this loop should be able to
+   run at perhaps 4 cycles/limb, depending on available execute resources
+   etc.
+
+   Usage:
+
+   This code is not used by default, since we really can't rely on the
+   compiler generating a good software pipeline, nor on such an approach
+   even being worthwhile on all CPUs.
+
+   Itanium is one chip where this algorithm helps though, see
+   mpn/ia64/diveby3.asm.  */
+
+mp_limb_t
+mpn_divexact_by3c (mp_ptr restrict rp, mp_srcptr restrict up, mp_size_t un, mp_limb_t cy)
+{
+  mp_limb_t  s, sm, cl, q, qx, c2, c3;
+  mp_size_t  i;
+
+  ASSERT (un >= 1);
+  ASSERT (cy == 0 || cy == 1 || cy == 2);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, un));
+
+  cl = cy == 0 ? 0 : cy == 1 ? -MODLIMB_INVERSE_3 : -2*MODLIMB_INVERSE_3;
+
+  for (i = 0; i < un; i++)
+    {
+      s = up[i];
+      sm = (s * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;
+
+      q = (cl + sm) & GMP_NUMB_MASK;
+      rp[i] = q;
+      qx = q + (s < cy);
+
+      c2 = qx >= GMP_NUMB_CEIL_MAX_DIV3;
+      c3 = qx >= GMP_NUMB_CEIL_2MAX_DIV3 ;
+
+      cy = c2 + c3;
+      cl = (-c2 & -MODLIMB_INVERSE_3) + (-c3 & -MODLIMB_INVERSE_3);
+    }
+
+  return cy;
+}
+
+#endif
diff --git a/mpn/generic/divexact.c b/mpn/generic/divexact.c

new file mode 100644 (file)

index 0000000..c8409b2
--- /dev/null
+++ b/mpn/generic/divexact.c
@@ -0,0 +1,285 @@
+/* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
+   the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
+   between Q and N; all other overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if 1
+void
+mpn_divexact (mp_ptr qp,
+             mp_srcptr np, mp_size_t nn,
+             mp_srcptr dp, mp_size_t dn)
+{
+  unsigned shift;
+  mp_size_t qn;
+  mp_ptr tp, wp;
+  TMP_DECL;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn-1] > 0);
+
+  while (dp[0] == 0)
+    {
+      ASSERT (np[0] == 0);
+      dp++;
+      np++;
+      dn--;
+      nn--;
+    }
+
+  if (dn == 1)
+    {
+      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
+      return;
+    }
+
+  TMP_MARK;
+
+  qn = nn + 1 - dn;
+  count_trailing_zeros (shift, dp[0]);
+
+  if (shift > 0)
+    {
+      mp_size_t ss = (dn > qn) ? qn + 1 : dn;
+
+      tp = TMP_ALLOC_LIMBS (ss);
+      mpn_rshift (tp, dp, ss, shift);
+      dp = tp;
+
+      /* Since we have excluded dn == 1, we have nn > qn, and we need
+        to shift one limb beyond qn. */
+      wp = TMP_ALLOC_LIMBS (qn + 1);
+      mpn_rshift (wp, np, qn + 1, shift);
+    }
+  else
+    {
+      wp = TMP_ALLOC_LIMBS (qn);
+      MPN_COPY (wp, np, qn);
+    }
+
+  if (dn > qn)
+    dn = qn;
+
+  tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
+  mpn_bdiv_q (qp, wp, qn, dp, dn, tp);
+  TMP_FREE;
+}
+
+#else
+
+/* We use the Jebelean's bidirectional exact division algorithm.  This is
+   somewhat naively implemented, with equal quotient parts done by 2-adic
+   division and truncating division.  Since 2-adic division is faster, it
+   should be used for a larger chunk.
+
+   This code is horrendously ugly, in all sorts of ways.
+
+   * It was hacked without much care or thought, but with a testing program.
+   * It handles scratch space frivolously, and furthermore the itch function
+     is broken.
+   * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
+     have yet to provoke an error due to this, though.
+   * Algorithm selection leaves a lot to be desired.  In particular, the choice
+     between DC and MU isn't a point, but we treat it like one.
+   * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
+     that the latter is faster.  We should at least reverse this, but perhaps
+     we should make the lsb part considerably larger.  (How do we tune this?)
+*/
+
+mp_size_t
+mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
+{
+  return nn + dn;              /* FIXME this is not right */
+}
+
+void
+mpn_divexact (mp_ptr qp,
+             mp_srcptr np, mp_size_t nn,
+             mp_srcptr dp, mp_size_t dn,
+             mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t nn0, qn0;
+  mp_size_t nn1, qn1;
+  mp_ptr tp;
+  mp_limb_t qml;
+  mp_limb_t qh;
+  int cnt;
+  mp_ptr xdp;
+  mp_limb_t di;
+  mp_limb_t cy;
+  gmp_pi1_t dinv;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn + 1;
+
+  /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
+  if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
+    {
+      tp = scratch;
+      MPN_COPY (tp, np, qn);
+      binvert_limb (di, dp[0]);  di = -di;
+      dn = MIN (dn, qn);
+      mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+      TMP_FREE;
+      return;
+    }
+
+  qn0 = ((nn - dn) >> 1) + 1;  /* low quotient size */
+
+  /* If quotient is much larger than the divisor, the bidirectional algorithm
+     does not work as currently implemented.  Fall back to plain bdiv.  */
+  if (qn0 > dn)
+    {
+      if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
+       {
+         tp = scratch;
+         MPN_COPY (tp, np, qn);
+         binvert_limb (di, dp[0]);  di = -di;
+         dn = MIN (dn, qn);
+         mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+       }
+      else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
+       {
+         tp = scratch;
+         MPN_COPY (tp, np, qn);
+         binvert_limb (di, dp[0]);  di = -di;
+         mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+       }
+      else
+       {
+         mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
+       }
+      TMP_FREE;
+      return;
+    }
+
+  nn0 = qn0 + qn0;
+
+  nn1 = nn0 - 1 + ((nn-dn) & 1);
+  qn1 = qn0;
+  if (LIKELY (qn0 != dn))
+    {
+      nn1 = nn1 + 1;
+      qn1 = qn1 + 1;
+      if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
+       {
+         /* If the leading divisor limb == 1, i.e. has just one bit, we have
+            to include an extra limb in order to get the needed overlap.  */
+         /* FIXME: Now with the mu_divappr_q function, we should really need
+            more overlap. That indicates one of two things: (1) The test code
+            is not good. (2) We actually overlap too much by default.  */
+         nn1 = nn1 + 1;
+         qn1 = qn1 + 1;
+       }
+    }
+
+  tp = TMP_ALLOC_LIMBS (nn1 + 1);
+
+  count_leading_zeros (cnt, dp[dn - 1]);
+
+  /* Normalize divisor, store into tmp area.  */
+  if (cnt != 0)
+    {
+      xdp = TMP_ALLOC_LIMBS (qn1);
+      mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
+    }
+  else
+    {
+      xdp = (mp_ptr) dp + dn - qn1;
+    }
+
+  /* Shift dividend according to the divisor normalization.  */
+  /* FIXME: We compute too much here for XX_divappr_q, but these functions'
+     interfaces want a pointer to the imaginative least significant limb, not
+     to the least significant *used* limb.  Of course, we could leave nn1-qn1
+     rubbish limbs in the low part, to save some time.  */
+  if (cnt != 0)
+    {
+      cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
+      if (cy != 0)
+       {
+         tp[nn1] = cy;
+         nn1++;
+       }
+    }
+  else
+    {
+      /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
+        mpn_sub_n right before is executed.  */
+      MPN_COPY (tp, np + nn - nn1, nn1);
+    }
+
+  invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
+  if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
+    {
+      qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
+    }
+  else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
+    {
+      qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
+    }
+  else
+    {
+      /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
+        conditional subtraction here.  */
+      qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
+      if (qh)
+       mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
+      mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
+      qp[qn0 - 1 + nn1 - qn1] = qh;
+    }
+  qml = qp[qn0 - 1];
+
+  binvert_limb (di, dp[0]);  di = -di;
+
+  if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, qn0);
+      mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+    }
+  else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
+    {
+      MPN_COPY (tp, np, qn0);
+      mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+    }
+  else
+    {
+      mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
+    }
+
+  if (qml < qp[qn0 - 1])
+    mpn_decr_u (qp + qn0, 1);
+
+  TMP_FREE;
+}
+#endif
diff --git a/mpn/generic/divis.c b/mpn/generic/divis.c

new file mode 100644 (file)

index 0000000..a67abdb
--- /dev/null
+++ b/mpn/generic/divis.c
@@ -0,0 +1,192 @@
+/* mpn_divisible_p -- mpn by mpn divisibility test
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Determine whether {ap,an} is divisible by {dp,dn}.  Must have both
+   operands normalized, meaning high limbs non-zero, except that an==0 is
+   allowed.
+
+   There usually won't be many low zero bits on d, but the checks for this
+   are fast and might pick up a few operand combinations, in particular they
+   might reduce d to fit the single-limb mod_1/modexact_1 code.
+
+   Future:
+
+   Getting the remainder limb by limb would make an early exit possible on
+   finding a non-zero.  This would probably have to be bdivmod style so
+   there's no addback, but it would need a multi-precision inverse and so
+   might be slower than the plain method (on small sizes at least).
+
+   When d must be normalized (shifted to high bit set), it's possible to
+   just append a low zero limb to "a" rather than bit-shifting as
+   mpn_tdiv_qr does internally, so long as it's already been checked that a
+   has at least as many trailing zeros bits as d.  Or equivalently, pass
+   qxn==1 to mpn_tdiv_qr, if/when it accepts that.  */
+
+int
+mpn_divisible_p (mp_srcptr ap, mp_size_t an,
+                mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t  alow, dlow, dmask;
+  mp_ptr     qp, rp, tp;
+  mp_size_t  i;
+  mp_limb_t di;
+  unsigned  twos;
+  TMP_DECL;
+
+  ASSERT (an >= 0);
+  ASSERT (an == 0 || ap[an-1] != 0);
+  ASSERT (dn >= 1);
+  ASSERT (dp[dn-1] != 0);
+  ASSERT_MPN (ap, an);
+  ASSERT_MPN (dp, dn);
+
+  /* When a<d only a==0 is divisible.
+     Notice this test covers all cases of an==0. */
+  if (an < dn)
+    return (an == 0);
+
+  /* Strip low zero limbs from d, requiring a==0 on those. */
+  for (;;)
+    {
+      alow = *ap;
+      dlow = *dp;
+
+      if (dlow != 0)
+       break;
+
+      if (alow != 0)
+       return 0;  /* a has fewer low zero limbs than d, so not divisible */
+
+      /* a!=0 and d!=0 so won't get to n==0 */
+      an--; ASSERT (an >= 1);
+      dn--; ASSERT (dn >= 1);
+      ap++;
+      dp++;
+    }
+
+  /* a must have at least as many low zero bits as d */
+  dmask = LOW_ZEROS_MASK (dlow);
+  if ((alow & dmask) != 0)
+    return 0;
+
+  if (dn == 1)
+    {
+      if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
+       return mpn_mod_1 (ap, an, dlow) == 0;
+
+      count_trailing_zeros (twos, dlow);
+      dlow >>= twos;
+      return mpn_modexact_1_odd (ap, an, dlow) == 0;
+    }
+
+  if (dn == 2)
+    {
+      mp_limb_t  dsecond = dp[1];
+      if (dsecond <= dmask)
+       {
+         count_trailing_zeros (twos, dlow);
+         dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+         ASSERT_LIMB (dlow);
+         return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
+       }
+    }
+
+  /* Should we compute Q = A * D^(-1) mod B^k,
+                       R = A - Q * D  mod B^k
+     here, for some small values of k?  Then check if R = 0 (mod B^k).  */
+
+  /* We could also compute A' = A mod T and D' = D mod P, for some
+     P = 3 * 5 * 7 * 11 ..., and then check if any prime factor from P
+     dividing D' also divides A'.  */
+
+  TMP_MARK;
+
+  rp = TMP_ALLOC_LIMBS (an + 1);
+  qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this */
+
+  count_trailing_zeros (twos, dp[0]);
+
+  if (twos != 0)
+    {
+      tp = TMP_ALLOC_LIMBS (dn);
+      ASSERT_NOCARRY (mpn_rshift (tp, dp, dn, twos));
+      dp = tp;
+
+      ASSERT_NOCARRY (mpn_rshift (rp, ap, an, twos));
+    }
+  else
+    {
+      MPN_COPY (rp, ap, an);
+    }
+  if (rp[an - 1] >= dp[dn - 1])
+    {
+      rp[an] = 0;
+      an++;
+    }
+  else if (an == dn)
+    {
+      TMP_FREE;
+      return 0;
+    }
+
+  ASSERT (an > dn);            /* requirement of functions below */
+
+  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
+      BELOW_THRESHOLD (an - dn, DC_BDIV_QR_THRESHOLD))
+    {
+      binvert_limb (di, dp[0]);
+      mpn_sbpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+      rp += an - dn;
+    }
+  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    {
+      binvert_limb (di, dp[0]);
+      mpn_dcpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
+      rp += an - dn;
+    }
+  else
+    {
+      tp = TMP_ALLOC_LIMBS (mpn_mu_bdiv_qr_itch (an, dn));
+      mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
+    }
+
+  /* test for {rp,dn} zero or non-zero */
+  i = 0;
+  do
+    {
+      if (rp[i] != 0)
+       {
+         TMP_FREE;
+         return 0;
+       }
+    }
+  while (++i < dn);
+
+  TMP_FREE;
+  return 1;
+}
diff --git a/mpn/generic/divrem.c b/mpn/generic/divrem.c

new file mode 100644 (file)

index 0000000..1fb4541
--- /dev/null
+++ b/mpn/generic/divrem.c
@@ -0,0 +1,99 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+   quotient.  This is now just a middle layer for calling the new
+   internal mpn_tdiv_qr.
+
+Copyright 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2005 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_divrem (mp_ptr qp, mp_size_t qxn,
+           mp_ptr np, mp_size_t nn,
+           mp_srcptr dp, mp_size_t dn)
+{
+  ASSERT (qxn >= 0);
+  ASSERT (nn >= dn);
+  ASSERT (dn >= 1);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  if (dn == 1)
+    {
+      mp_limb_t ret;
+      mp_ptr q2p;
+      mp_size_t qn;
+      TMP_DECL;
+
+      TMP_MARK;
+      q2p = TMP_ALLOC_LIMBS (nn + qxn);
+
+      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
+      qn = nn + qxn - 1;
+      MPN_COPY (qp, q2p, qn);
+      ret = q2p[qn];
+
+      TMP_FREE;
+      return ret;
+    }
+  else if (dn == 2)
+    {
+      return mpn_divrem_2 (qp, qxn, np, nn, dp);
+    }
+  else
+    {
+      mp_ptr rp, q2p;
+      mp_limb_t qhl;
+      mp_size_t qn;
+      TMP_DECL;
+
+      TMP_MARK;
+      if (UNLIKELY (qxn != 0))
+       {
+         mp_ptr n2p;
+         n2p = TMP_ALLOC_LIMBS (nn + qxn);
+         MPN_ZERO (n2p, qxn);
+         MPN_COPY (n2p + qxn, np, nn);
+         q2p = TMP_ALLOC_LIMBS (nn - dn + qxn + 1);
+         rp = TMP_ALLOC_LIMBS (dn);
+         mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
+         MPN_COPY (np, rp, dn);
+         qn = nn - dn + qxn;
+         MPN_COPY (qp, q2p, qn);
+         qhl = q2p[qn];
+       }
+      else
+       {
+         q2p = TMP_ALLOC_LIMBS (nn - dn + 1);
+         rp = TMP_ALLOC_LIMBS (dn);
+         mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
+         MPN_COPY (np, rp, dn);        /* overwrite np area with remainder */
+         qn = nn - dn;
+         MPN_COPY (qp, q2p, qn);
+         qhl = q2p[qn];
+       }
+      TMP_FREE;
+      return qhl;
+    }
+}
diff --git a/mpn/generic/divrem_1.c b/mpn/generic/divrem_1.c

new file mode 100644 (file)

index 0000000..c416946
--- /dev/null
+++ b/mpn/generic/divrem_1.c
@@ -0,0 +1,245 @@
+/* mpn_divrem_1 -- mpn by limb division.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIVREM_1_NORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD  0
+#endif
+#ifndef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_UNNORM_THRESHOLD  0
+#endif
+
+
+
+/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
+   and UNNORM thresholds are 0 and only the inversion code is included.
+
+   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
+   will be MP_SIZE_T_MAX and only the plain division code is included.
+
+   Otherwise mul-by-inverse is better than plain division above some
+   threshold, and best results are obtained by having code for both present.
+
+   The main reason for separating the norm and unnorm cases is that not all
+   CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
+   code used on an already normalized divisor.
+
+   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
+   non-shifting code for both the norm and unnorm cases, though with
+   different criteria for skipping a division, and with different thresholds
+   of course.  And in fact if inversion is never viable, then that simple
+   non-shifting division would be all that's left.
+
+   The NORM and UNNORM thresholds might not differ much, but if there's
+   going to be separate code for norm and unnorm then it makes sense to have
+   separate thresholds.  One thing that's possible is that the
+   mul-by-inverse might be better only for normalized divisors, due to that
+   case not needing variable bit shifts.
+
+   Notice that the thresholds are tested after the decision to possibly skip
+   one divide step, so they're based on the actual number of divisions done.
+
+   For the unnorm case, it would be possible to call mpn_lshift to adjust
+   the dividend all in one go (into the quotient space say), rather than
+   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+   than what the compiler can generate for EXTRACT.  But this is left to CPU
+   specific implementations to consider, especially since EXTRACT isn't on
+   the dependent chain.  */
+
+mp_limb_t
+mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+             mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  n;
+  mp_size_t  i;
+  mp_limb_t  n1, n0;
+  mp_limb_t  r = 0;
+
+  ASSERT (qxn >= 0);
+  ASSERT (un >= 0);
+  ASSERT (d != 0);
+  /* FIXME: What's the correct overlap rule when qxn!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
+
+  n = un + qxn;
+  if (n == 0)
+    return 0;
+
+  d <<= GMP_NAIL_BITS;
+
+  qp += (n - 1);   /* Make qp point at most significant quotient limb */
+
+  if ((d & GMP_LIMB_HIGHBIT) != 0)
+    {
+      if (un != 0)
+       {
+         /* High quotient limb is 0 or 1, skip a divide step. */
+         mp_limb_t q;
+         r = up[un - 1] << GMP_NAIL_BITS;
+         q = (r >= d);
+         *qp-- = q;
+         r -= (d & -q);
+         r >>= GMP_NAIL_BITS;
+         n--;
+         un--;
+       }
+
+      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
+       {
+       plain:
+         for (i = un - 1; i >= 0; i--)
+           {
+             n0 = up[i] << GMP_NAIL_BITS;
+             udiv_qrnnd (*qp, r, r, n0, d);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         for (i = qxn - 1; i >= 0; i--)
+           {
+             udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         return r;
+       }
+      else
+       {
+         /* Multiply-by-inverse, divisor already normalized. */
+         mp_limb_t dinv;
+         invert_limb (dinv, d);
+
+         for (i = un - 1; i >= 0; i--)
+           {
+             n0 = up[i] << GMP_NAIL_BITS;
+             udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         for (i = qxn - 1; i >= 0; i--)
+           {
+             udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         return r;
+       }
+    }
+  else
+    {
+      /* Most significant bit of divisor == 0.  */
+      int norm;
+
+      /* Skip a division if high < divisor (high quotient 0).  Testing here
+        before normalizing will still skip as often as possible.  */
+      if (un != 0)
+       {
+         n1 = up[un - 1] << GMP_NAIL_BITS;
+         if (n1 < d)
+           {
+             r = n1 >> GMP_NAIL_BITS;
+             *qp-- = 0;
+             n--;
+             if (n == 0)
+               return r;
+             un--;
+           }
+       }
+
+      if (! UDIV_NEEDS_NORMALIZATION
+         && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+       goto plain;
+
+      count_leading_zeros (norm, d);
+      d <<= norm;
+      r <<= norm;
+
+      if (UDIV_NEEDS_NORMALIZATION
+         && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+       {
+         if (un != 0)
+           {
+             n1 = up[un - 1] << GMP_NAIL_BITS;
+             r |= (n1 >> (GMP_LIMB_BITS - norm));
+             for (i = un - 2; i >= 0; i--)
+               {
+                 n0 = up[i] << GMP_NAIL_BITS;
+                 udiv_qrnnd (*qp, r, r,
+                             (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
+                             d);
+                 r >>= GMP_NAIL_BITS;
+                 qp--;
+                 n1 = n0;
+               }
+             udiv_qrnnd (*qp, r, r, n1 << norm, d);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         for (i = qxn - 1; i >= 0; i--)
+           {
+             udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         return r >> norm;
+       }
+      else
+       {
+         mp_limb_t  dinv;
+         invert_limb (dinv, d);
+         if (un != 0)
+           {
+             n1 = up[un - 1] << GMP_NAIL_BITS;
+             r |= (n1 >> (GMP_LIMB_BITS - norm));
+             for (i = un - 2; i >= 0; i--)
+               {
+                 n0 = up[i] << GMP_NAIL_BITS;
+                 udiv_qrnnd_preinv (*qp, r, r,
+                                    ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
+                                    d, dinv);
+                 r >>= GMP_NAIL_BITS;
+                 qp--;
+                 n1 = n0;
+               }
+             udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         for (i = qxn - 1; i >= 0; i--)
+           {
+             udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+             r >>= GMP_NAIL_BITS;
+             qp--;
+           }
+         return r >> norm;
+       }
+    }
+}
diff --git a/mpn/generic/divrem_2.c b/mpn/generic/divrem_2.c

new file mode 100644 (file)

index 0000000..ba761dc
--- /dev/null
+++ b/mpn/generic/divrem_2.c
@@ -0,0 +1,179 @@
+/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
+   quotient.  The divisor is two limbs.
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIVREM_2_THRESHOLD
+#define DIVREM_2_THRESHOLD  0
+#endif
+
+
+/* Divide num (NP/NSIZE) by den (DP/2) and write
+   the NSIZE-2 least significant quotient limbs at QP
+   and the 2 long remainder at NP.  If QEXTRA_LIMBS is
+   non-zero, generate that many fraction bits and append them after the
+   other quotient limbs.
+   Return the most significant limb of the quotient, this is always 0 or 1.
+
+   Preconditions:
+   0. NSIZE >= 2.
+   1. The most significant bit of the divisor must be set.
+   2. QP must either not overlap with the input operands at all, or
+      QP + 2 >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero.  */
+
+mp_limb_t
+mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+             mp_ptr np, mp_size_t nn,
+             mp_srcptr dp)
+{
+  mp_limb_t most_significant_q_limb = 0;
+  mp_size_t i;
+  mp_limb_t n1, n0, n2;
+  mp_limb_t d1, d0;
+  mp_limb_t d1inv;
+  int use_preinv;
+
+  ASSERT (nn >= 2);
+  ASSERT (qxn >= 0);
+  ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, 2);
+
+  np += nn - 2;
+  d1 = dp[1];
+  d0 = dp[0];
+  n1 = np[1];
+  n0 = np[0];
+
+  if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (n1, n0, n1, n0, d1, d0);
+#else
+      n0 = n0 - d0;
+      n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);
+      n0 &= GMP_NUMB_MASK;
+#endif
+      most_significant_q_limb = 1;
+    }
+
+  use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);
+  if (use_preinv)
+    invert_limb (d1inv, d1);
+
+  for (i = qxn + nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      mp_limb_t r;
+
+      if (i >= qxn)
+       np--;
+      else
+       np[0] = 0;
+
+      if (n1 == d1)
+       {
+         /* Q should be either 111..111 or 111..110.  Need special handling
+            of this rare case as normal division would give overflow.  */
+         q = GMP_NUMB_MASK;
+
+         r = (n0 + d1) & GMP_NUMB_MASK;
+         if (r < d1)   /* Carry in the addition? */
+           {
+#if GMP_NAIL_BITS == 0
+             add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+#else
+             n0 = np[0] + d0;
+             n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
+             n0 &= GMP_NUMB_MASK;
+#endif
+             qp[i] = q;
+             continue;
+           }
+         n1 = d0 - (d0 != 0);
+         n0 = -d0 & GMP_NUMB_MASK;
+       }
+      else
+       {
+         if (use_preinv)
+           udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
+         else
+           udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);
+         r >>= GMP_NAIL_BITS;
+         umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);
+         n0 >>= GMP_NAIL_BITS;
+       }
+
+      n2 = np[0];
+
+    q_test:
+      if (n1 > r || (n1 == r && n0 > n2))
+       {
+         /* The estimated Q was too large.  */
+         q--;
+
+#if GMP_NAIL_BITS == 0
+         sub_ddmmss (n1, n0, n1, n0, 0, d0);
+#else
+         n0 = n0 - d0;
+         n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);
+         n0 &= GMP_NUMB_MASK;
+#endif
+         r += d1;
+         if (r >= d1)  /* If not carry, test Q again.  */
+           goto q_test;
+       }
+
+      qp[i] = q;
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (n1, n0, r, n2, n1, n0);
+#else
+      n0 = n2 - n0;
+      n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);
+      n0 &= GMP_NUMB_MASK;
+#endif
+    }
+  np[1] = n1;
+  np[0] = n0;
+
+  return most_significant_q_limb;
+}
diff --git a/mpn/generic/dump.c b/mpn/generic/dump.c

new file mode 100644 (file)

index 0000000..3830999
--- /dev/null
+++ b/mpn/generic/dump.c
@@ -0,0 +1,89 @@
+/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS % 4 == 0
+void
+mpn_dump (mp_srcptr ptr, mp_size_t n)
+{
+  MPN_NORMALIZE (ptr, n);
+
+  if (n == 0)
+    printf ("0\n");
+  else
+    {
+      n--;
+#if _LONG_LONG_LIMB
+      if ((ptr[n] >> GMP_LIMB_BITS / 2) != 0)
+       {
+         printf ("%lX", (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+         printf ("%0*lX", (GMP_LIMB_BITS / 2 / 4), (unsigned long) ptr[n]);
+       }
+      else
+#endif
+       printf ("%lX", (unsigned long) ptr[n]);
+
+      while (n)
+       {
+         n--;
+#if _LONG_LONG_LIMB
+         printf ("%0*lX", (GMP_NUMB_BITS - GMP_LIMB_BITS / 2) / 4,
+                 (unsigned long) (ptr[n] >> GMP_LIMB_BITS / 2));
+         printf ("%0*lX", GMP_LIMB_BITS / 2 / 4, (unsigned long) ptr[n]);
+#else
+         printf ("%0*lX", GMP_NUMB_BITS / 4, (unsigned long) ptr[n]);
+#endif
+       }
+      printf ("\n");
+    }
+}
+
+#else
+
+static void
+mpn_recdump (mp_ptr p, mp_size_t n)
+{
+  mp_limb_t lo;
+  if (n != 0)
+    {
+      lo = p[0] & 0xf;
+      mpn_rshift (p, p, n, 4);
+      mpn_recdump (p, n);
+      printf ("%lX", lo);
+    }
+}
+
+void
+mpn_dump (mp_srcptr p, mp_size_t n)
+{
+  mp_ptr tp;
+  TMP_DECL;
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+  MPN_COPY (tp, p, n);
+  TMP_FREE;
+}
+
+#endif
diff --git a/mpn/generic/fib2_ui.c b/mpn/generic/fib2_ui.c

new file mode 100644 (file)

index 0000000..ddf93fa
--- /dev/null
+++ b/mpn/generic/fib2_ui.c
@@ -0,0 +1,178 @@
+/* mpn_fib2_ui -- calculate Fibonacci numbers.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for
+   MPN_FIB2_SIZE(n) limbs.
+
+   The return value is the actual number of limbs stored, this will be at
+   least 1.  fp[size-1] will be non-zero, except when n==0, in which case
+   fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
+   (for n>0).
+
+   Notes:
+
+   In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
+   low limb.
+
+   In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
+   F[k-1]^2.  This F[2k+1] is an F[4m+3] and such numbers are congruent to
+   1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
+   that would leave 6 or 7 mod 8).
+
+   This property of F[4m+3] can be verified by induction on F[4m+3] =
+   7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
+   identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
+*/
+
+mp_size_t
+mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
+{
+  mp_size_t      size;
+  unsigned long  nfirst, mask;
+
+  TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
+
+  ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
+
+  /* Take a starting pair from the table. */
+  mask = 1;
+  for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
+    mask <<= 1;
+  TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
+
+  f1p[0] = FIB_TABLE ((int) nfirst - 1);
+  fp[0]  = FIB_TABLE (nfirst);
+  size = 1;
+
+  /* Skip to the end if the table lookup gives the final answer. */
+  if (mask != 1)
+    {
+      mp_size_t  alloc;
+      mp_ptr        xp;
+      TMP_DECL;
+
+      TMP_MARK;
+      alloc = MPN_FIB2_SIZE (n);
+      xp = TMP_ALLOC_LIMBS (alloc);
+
+      do
+       {
+         /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
+            n&mask upwards.
+
+            The next bit of n is n&(mask>>1) and we'll double to the pair
+            fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
+            that bit is 0 or 1 respectively.  */
+
+         TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
+                        n >> refmpn_count_trailing_zeros(mask),
+                        mask, size, alloc);
+                mpn_trace ("fp ", fp, size);
+                mpn_trace ("f1p", f1p, size));
+
+         /* fp normalized, f1p at most one high zero */
+         ASSERT (fp[size-1] != 0);
+         ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
+
+         /* f1p[size-1] might be zero, but this occurs rarely, so it's not
+            worth bothering checking for it */
+         ASSERT (alloc >= 2*size);
+         mpn_sqr (xp, fp,  size);
+         mpn_sqr (fp, f1p, size);
+         size *= 2;
+
+         /* Shrink if possible.  Since fp was normalized there'll be at
+            most one high zero on xp (and if there is then there's one on
+            yp too).  */
+         ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
+         size -= (xp[size-1] == 0);
+         ASSERT (xp[size-1] != 0);  /* only one xp high zero */
+
+         /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+         f1p[size] = mpn_add_n (f1p, xp, fp, size);
+
+         /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
+            n&mask is the low bit of our implied k.  */
+#if HAVE_NATIVE_mpn_rsblsh2_n || HAVE_NATIVE_mpn_rsblsh_n
+#if HAVE_NATIVE_mpn_rsblsh2_n
+         fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
+#else /* HAVE_NATIVE_mpn_rsblsh_n */
+         fp[size] = mpn_rsblsh_n (fp, fp, xp, size, 2);
+#endif
+         if ((n & mask) == 0)
+           MPN_INCR_U(fp, size + 1, 2);        /* possible +2 */
+         else
+         {
+           ASSERT (fp[0] >= 2);
+           fp[0] -= 2;                         /* possible -2 */
+         }
+#else
+         {
+           mp_limb_t  c;
+
+           c = mpn_lshift (xp, xp, size, 2);
+           xp[0] |= (n & mask ? 0 : 2);        /* possible +2 */
+           c -= mpn_sub_n (fp, xp, fp, size);
+           ASSERT (n & mask ? fp[0] != 0 && fp[0] != 1 : 1);
+           fp[0] -= (n & mask ? 2 : 0);        /* possible -2 */
+           fp[size] = c;
+         }
+#endif
+         ASSERT (alloc >= size+1);
+         size += (fp[size] != 0);
+
+         /* now n&mask is the new bit of n being considered */
+         mask >>= 1;
+
+         /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
+            F[2k+1] and F[2k-1].  */
+         if (n & mask)
+           ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
+         else {
+           ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
+
+           /* Can have a high zero after replacing F[2k+1] with F[2k].
+              f1p will have a high zero if fp does. */
+           ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+           size -= (fp[size-1] == 0);
+         }
+       }
+      while (mask != 1);
+
+      TMP_FREE;
+    }
+
+  TRACE (printf ("done size=%ld\n", size);
+        mpn_trace ("fp ", fp, size);
+        mpn_trace ("f1p", f1p, size));
+
+  return size;
+}
diff --git a/mpn/generic/gcd.c b/mpn/generic/gcd.c

new file mode 100644 (file)

index 0000000..542e0fe
--- /dev/null
+++ b/mpn/generic/gcd.c
@@ -0,0 +1,286 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Uses the HGCD operation described in
+
+     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     computation, Math. Comp. 77 (2008), 589-607.
+
+  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
+  then uses Lehmer's algorithm.
+*/
+
+/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
+ * 2)/3, which gives a balanced multiplication in
+ * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
+ * performance. The matrix-vector multiplication is then
+ * 4:1-unbalanced, with matrix elements of size n/6, and vector
+ * elements of size p = 2n/3. */
+
+/* From analysis of the theoretical running time, it appears that when
+ * multiplication takes time O(n^alpha), p should be chosen so that
+ * the ratio of the time for the mpn_hgcd call, and the time for the
+ * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
+ * 1). */
+#ifdef TUNE_GCD_P
+#define P_TABLE_SIZE 10000
+mp_size_t p_table[P_TABLE_SIZE];
+#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
+#else
+#define CHOOSE_P(n) (2*(n) / 3)
+#endif
+
+mp_size_t
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+
+  mp_size_t gn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCD_LEHMER_N_ITCH(n);
+
+  /* For initial division */
+  scratch = usize - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+#if TUNE_GCD_P
+  if (CHOOSE_P (n) > 0)
+#else
+  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t scratch;
+#if TUNE_GCD_P
+      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
+        is increasing */
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
+      hgcd_scratch = mpn_hgcd_itch (n);
+      update_scratch = 2*(n - 1);
+#else
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+#endif
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+       talloc = scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (usize > n)
+    {
+      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
+
+      if (mpn_zero_p (up, n))
+       {
+         MPN_COPY (gp, vp, n);
+         TMP_FREE;
+         return n;
+       }
+    }
+
+#if TUNE_GCD_P
+  while (CHOOSE_P (n) > 0)
+#else
+  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+#endif
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+       {
+         ASSERT (M.n <= (n - p - 1)/2);
+         ASSERT (M.n + p <= (p + n - 1) / 2);
+         /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+         n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
+       }
+      else
+       {
+         /* Temporary storage n */
+         n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
+         if (n == 0)
+           {
+             TMP_FREE;
+             return gn;
+           }
+       }
+    }
+
+  gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+  TMP_FREE;
+  return gn;
+}
+
+#ifdef TUNE_GCD_P
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "speed.h"
+
+static int
+compare_double(const void *ap, const void *bp)
+{
+  double a = * (const double *) ap;
+  double b = * (const double *) bp;
+
+  if (a < b)
+    return -1;
+  else if (a > b)
+    return 1;
+  else
+    return 0;
+}
+
+static double
+median (double *v, size_t n)
+{
+  qsort(v, n, sizeof(*v), compare_double);
+
+  return v[n/2];
+}
+
+#define TIME(res, code) do {                           \
+  double time_measurement[5];                          \
+  unsigned time_i;                                     \
+                                                       \
+  for (time_i = 0; time_i < 5; time_i++)               \
+    {                                                  \
+      speed_starttime();                               \
+      code;                                            \
+      time_measurement[time_i] = speed_endtime();      \
+    }                                                  \
+  res = median(time_measurement, 5);                   \
+} while (0)
+
+int
+main(int argc, char *argv)
+{
+  gmp_randstate_t rands;
+  mp_size_t n;
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr up;
+  mp_ptr vp;
+  mp_ptr gp;
+  mp_ptr tp;
+  TMP_DECL;
+
+  /* Unbuffered so if output is redirected to a file it isn't lost if the
+     program is killed part way through.  */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  gmp_randinit_default (rands);
+
+  TMP_MARK;
+
+  ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
+
+  mpn_random (ap, P_TABLE_SIZE);
+  mpn_random (bp, P_TABLE_SIZE);
+
+  memset (p_table, 0, sizeof(p_table));
+
+  for (n = 100; n++; n < P_TABLE_SIZE)
+    {
+      mp_size_t p;
+      mp_size_t best_p;
+      double best_time;
+      double lehmer_time;
+
+      if (ap[n-1] == 0)
+       ap[n-1] = 1;
+
+      if (bp[n-1] == 0)
+       bp[n-1] = 1;
+
+      p_table[n] = 0;
+      TIME(lehmer_time, {
+         MPN_COPY (up, ap, n);
+         MPN_COPY (vp, bp, n);
+         mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+       });
+
+      best_time = lehmer_time;
+      best_p = 0;
+
+      for (p = n * 0.48; p < n * 0.77; p++)
+       {
+         double t;
+
+         p_table[n] = p;
+
+         TIME(t, {
+             MPN_COPY (up, ap, n);
+             MPN_COPY (vp, bp, n);
+             mpn_gcd (gp, up, n, vp, n);
+           });
+
+         if (t < best_time)
+           {
+             best_time = t;
+             best_p = p;
+           }
+       }
+      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
+      if (best_p > 0)
+       {
+         double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+         printf(" %5.3g%%", speedup);
+         if (speedup < 1.0)
+           {
+             printf(" (ignored)");
+             best_p = 0;
+           }
+       }
+      printf("\n");
+
+      p_table[n] = best_p;
+    }
+  TMP_FREE;
+  gmp_randclear(rands);
+  return 0;
+}
+#endif /* TUNE_GCD_P */
diff --git a/mpn/generic/gcd_1.c b/mpn/generic/gcd_1.c

new file mode 100644 (file)

index 0000000..ab16f4b
--- /dev/null
+++ b/mpn/generic/gcd_1.c
@@ -0,0 +1,179 @@
+/* mpn_gcd_1 -- mpn and limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCD_1_METHOD
+#define GCD_1_METHOD 2
+#endif
+
+#define USE_ZEROTAB 0
+
+#if USE_ZEROTAB
+static const unsigned char zerotab[16] = {
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+#endif
+
+/* Does not work for U == 0 or V == 0.  It would be tough to make it work for
+   V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
+
+   The threshold for doing u%v when size==1 will vary by CPU according to
+   the speed of a division and the code generated for the main loop.  Any
+   tuning for this is left to a CPU specific implementation.  */
+
+mp_limb_t
+mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
+{
+  mp_limb_t      ulimb;
+  unsigned long  zero_bits, u_low_zero_bits;
+
+  ASSERT (size >= 1);
+  ASSERT (vlimb != 0);
+  ASSERT_MPN_NONZERO_P (up, size);
+
+  ulimb = up[0];
+
+  /* Need vlimb odd for modexact, want it odd to get common zeros. */
+  count_trailing_zeros (zero_bits, vlimb);
+  vlimb >>= zero_bits;
+
+  if (size > 1)
+    {
+      /* Must get common zeros before the mod reduction.  If ulimb==0 then
+        vlimb already gives the common zeros.  */
+      if (ulimb != 0)
+       {
+         count_trailing_zeros (u_low_zero_bits, ulimb);
+         zero_bits = MIN (zero_bits, u_low_zero_bits);
+       }
+
+      ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
+      if (ulimb == 0)
+       goto done;
+
+      goto strip_u_maybe;
+    }
+
+  /* size==1, so up[0]!=0 */
+  count_trailing_zeros (u_low_zero_bits, ulimb);
+  ulimb >>= u_low_zero_bits;
+  zero_bits = MIN (zero_bits, u_low_zero_bits);
+
+  /* make u bigger */
+  if (vlimb > ulimb)
+    MP_LIMB_T_SWAP (ulimb, vlimb);
+
+  /* if u is much bigger than v, reduce using a division rather than
+     chipping away at it bit-by-bit */
+  if ((ulimb >> 16) > vlimb)
+    {
+      ulimb %= vlimb;
+      if (ulimb == 0)
+       goto done;
+      goto strip_u_maybe;
+    }
+
+  ASSERT (ulimb & 1);
+  ASSERT (vlimb & 1);
+
+#if GCD_1_METHOD == 1
+  while (ulimb != vlimb)
+    {
+      ASSERT (ulimb & 1);
+      ASSERT (vlimb & 1);
+
+      if (ulimb > vlimb)
+       {
+         ulimb -= vlimb;
+         do
+           {
+             ulimb >>= 1;
+             ASSERT (ulimb != 0);
+           strip_u_maybe:
+             ;
+           }
+         while ((ulimb & 1) == 0);
+       }
+      else /*  vlimb > ulimb.  */
+       {
+         vlimb -= ulimb;
+         do
+           {
+             vlimb >>= 1;
+             ASSERT (vlimb != 0);
+           }
+         while ((vlimb & 1) == 0);
+       }
+    }
+#else
+# if GCD_1_METHOD  == 2
+
+  ulimb >>= 1;
+  vlimb >>= 1;
+
+  while (ulimb != vlimb)
+    {
+      int c;
+      mp_limb_t t = ulimb - vlimb;
+      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (t);
+
+      /* v <-- min (u, v) */
+      vlimb += (vgtu & t);
+
+      /* u <-- |u - v| */
+      ulimb = (t ^ vgtu) - vgtu;
+
+#if USE_ZEROTAB
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or ulimb, but using t gives more parallelism. */
+      c = zerotab[t & 15];
+
+      while (UNLIKELY (c == 4))
+       {
+         ulimb >>= 4;
+         if (0)
+         strip_u_maybe:
+           vlimb >>= 1;
+
+         c = zerotab[ulimb & 15];
+       }
+#else
+      if (0)
+       {
+       strip_u_maybe:
+         vlimb >>= 1;
+         t = ulimb;
+       }
+      count_trailing_zeros (c, t);
+#endif
+      ulimb >>= (c + 1);
+    }
+
+  vlimb = (vlimb << 1) | 1;
+# else
+#  error Unknown GCD_1_METHOD
+# endif
+#endif
+
+ done:
+  return vlimb << zero_bits;
+}
diff --git a/mpn/generic/gcd_lehmer.c b/mpn/generic/gcd_lehmer.c

new file mode 100644 (file)

index 0000000..37fd3c5
--- /dev/null
+++ b/mpn/generic/gcd_lehmer.c
@@ -0,0 +1,160 @@
+/* gcd_lehmer.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
+   Both U and V must be odd. */
+static inline mp_size_t
+gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
+{
+  mp_limb_t u0, u1, v0, v1;
+  mp_size_t gn;
+
+  u0 = up[0];
+  u1 = up[1];
+  v0 = vp[0];
+  v1 = vp[1];
+
+  ASSERT (u0 & 1);
+  ASSERT (v0 & 1);
+
+  /* Check for u0 != v0 needed to ensure that argument to
+   * count_trailing_zeros is non-zero. */
+  while (u1 != v1 && u0 != v0)
+    {
+      unsigned long int r;
+      if (u1 > v1)
+       {
+         u1 -= v1 + (u0 < v0);
+         u0 = (u0 - v0) & GMP_NUMB_MASK;
+         count_trailing_zeros (r, u0);
+         u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
+         u1 >>= r;
+       }
+      else  /* u1 < v1.  */
+       {
+         v1 -= u1 + (v0 < u0);
+         v0 = (v0 - u0) & GMP_NUMB_MASK;
+         count_trailing_zeros (r, v0);
+         v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
+         v1 >>= r;
+       }
+    }
+
+  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
+
+  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
+  if (u1 == v1 && u0 == v0)
+    return gn;
+
+  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
+  gp[0] = mpn_gcd_1 (gp, gn, v0);
+
+  return 1;
+}
+
+/* Temporary storage: n */
+mp_size_t
+mpn_gcd_lehmer_n (mp_ptr gp, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  /* Relax this requirement, and normalize at the start? Must disallow
+     A = B = 0, though. */
+  ASSERT(ap[n-1] > 0 || bp[n-1] > 0);
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+       {
+         ah = ap[n-1]; al = ap[n-2];
+         bh = bp[n-1]; bl = bp[n-2];
+       }
+      else
+       {
+         int shift;
+
+         count_leading_zeros (shift, mask);
+         ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+         al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+         bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+         bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+       }
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2 (ah, al, bh, bl, &M))
+       {
+         n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
+         MP_PTR_SWAP (ap, tp);
+       }
+      else
+       {
+         /* mpn_hgcd2 has failed. Then either one of a or b is very
+            small, or the difference is very small. Perform one
+            subtraction followed by one division. */
+         mp_size_t gn;
+
+         /* Temporary storage n */
+         n = mpn_gcd_subdiv_step (gp, &gn, ap, bp, n, tp);
+         if (n == 0)
+           return gn;
+       }
+    }
+
+  if (n == 1)
+    {
+      *gp = mpn_gcd_1(ap, 1, bp[0]);
+      return 1;
+    }
+
+  /* Due to the calling convention for mpn_gcd, at most one can be
+     even. */
+
+  if (! (ap[0] & 1))
+    MP_PTR_SWAP (ap, bp);
+
+  ASSERT (ap[0] & 1);
+
+  if (bp[0] == 0)
+    {
+      *gp = mpn_gcd_1 (ap, 2, bp[1]);
+      return 1;
+    }
+  else if (! (bp[0] & 1))
+    {
+      int r;
+      count_trailing_zeros (r, bp[0]);
+      bp[0] = ((bp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (bp[0] >> r);
+      bp[1] >>= r;
+    }
+
+  return gcd_2(gp, ap, bp);
+}
diff --git a/mpn/generic/gcd_subdiv_step.c b/mpn/generic/gcd_subdiv_step.c

new file mode 100644 (file)

index 0000000..47c0c26
--- /dev/null
+++ b/mpn/generic/gcd_subdiv_step.c
@@ -0,0 +1,104 @@
+/* gcd_subdiv_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+   b is small, or the difference is small. Perform one subtraction
+   followed by one division. If the gcd is found, stores it in gp and
+   *gn, and returns zero. Otherwise, compute the reduced a and b, and
+   return the new size. */
+
+/* FIXME: Check when the smaller number is a single limb, and invoke
+ * mpn_gcd_1. */
+mp_size_t
+mpn_gcd_subdiv_step (mp_ptr gp, mp_size_t *gn,
+                    mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t an, bn;
+
+  ASSERT (n > 0);
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+  an = bn = n;
+  MPN_NORMALIZE (ap, an);
+  MPN_NORMALIZE (bp, bn);
+
+  if (UNLIKELY (an == 0))
+    {
+    return_b:
+      MPN_COPY (gp, bp, bn);
+      *gn = bn;
+      return 0;
+    }
+  else if (UNLIKELY (bn == 0))
+    {
+    return_a:
+      MPN_COPY (gp, ap, an);
+      *gn = an;
+      return 0;
+    }
+
+  /* Arrange so that a > b, subtract an -= bn, and maintain
+     normalization. */
+  if (an < bn)
+    MPN_PTR_SWAP (ap, an, bp, bn);
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+       goto return_a;
+      else if (c < 0)
+       MP_PTR_SWAP (ap, bp);
+    }
+
+  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
+  MPN_NORMALIZE (ap, an);
+  ASSERT (an > 0);
+
+  /* Arrange so that a > b, and divide a = q b + r */
+  /* FIXME: an < bn happens when we have cancellation. If that is the
+     common case, then we could reverse the roles of a and b to avoid
+     the swap. */
+  if (an < bn)
+    MPN_PTR_SWAP (ap, an, bp, bn);
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+       goto return_a;
+      else if (c < 0)
+       MP_PTR_SWAP (ap, bp);
+    }
+
+  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
+
+  if (mpn_zero_p (ap, bn))
+    goto return_b;
+
+  return bn;
+}
diff --git a/mpn/generic/gcdext.c b/mpn/generic/gcdext.c

new file mode 100644 (file)

index 0000000..06e6c13
--- /dev/null
+++ b/mpn/generic/gcdext.c
@@ -0,0 +1,547 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
+   the size is returned (if inputs are non-normalized, result may be
+   non-normalized too). Temporary space needed is M->n + n.
+ */
+static size_t
+hgcd_mul_matrix_vector (struct hgcd_matrix *M,
+                       mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ah, bh;
+
+  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+     t  = u00 * a
+     r  = u10 * b
+     r += t;
+
+     t  = u11 * b
+     b  = u01 * a
+     b += t;
+  */
+
+  if (M->n >= n)
+    {
+      mpn_mul (tp, M->p[0][0], M->n, ap, n);
+      mpn_mul (rp, M->p[1][0], M->n, bp, n);
+    }
+  else
+    {
+      mpn_mul (tp, ap, n, M->p[0][0], M->n);
+      mpn_mul (rp, bp, n, M->p[1][0], M->n);
+    }
+
+  ah = mpn_add_n (rp, rp, tp, n + M->n);
+
+  if (M->n >= n)
+    {
+      mpn_mul (tp, M->p[1][1], M->n, bp, n);
+      mpn_mul (bp, M->p[0][1], M->n, ap, n);
+    }
+  else
+    {
+      mpn_mul (tp, bp, n, M->p[1][1], M->n);
+      mpn_mul (bp, ap, n, M->p[0][1], M->n);
+    }
+  bh = mpn_add_n (bp, bp, tp, n + M->n);
+
+  n += M->n;
+  if ( (ah | bh) > 0)
+    {
+      rp[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* Normalize */
+      while ( (rp[n-1] | bp[n-1]) == 0)
+       n--;
+    }
+
+  return n;
+}
+
+#define COMPUTE_V_ITCH(n) (2*(n) + 1)
+
+/* Computes |v| = |(g - u a)| / b, where u may be positive or
+   negative, and v is of the opposite sign. a, b are of size n, u and
+   v at most size n, and v must have space for n+1 limbs. */
+static mp_size_t
+compute_v (mp_ptr vp,
+          mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+          mp_srcptr gp, mp_size_t gn,
+          mp_srcptr up, mp_size_t usize,
+          mp_ptr tp)
+{
+  mp_size_t size;
+  mp_size_t an;
+  mp_size_t bn;
+  mp_size_t vn;
+
+  ASSERT (n > 0);
+  ASSERT (gn > 0);
+  ASSERT (usize != 0);
+
+  size = ABS (usize);
+  ASSERT (size <= n);
+
+  an = n;
+  MPN_NORMALIZE (ap, an);
+
+  if (an >= size)
+    mpn_mul (tp, ap, an, up, size);
+  else
+    mpn_mul (tp, up, size, ap, an);
+
+  size += an;
+  size -= tp[size - 1] == 0;
+
+  ASSERT (gn <= size);
+
+  if (usize > 0)
+    {
+      /* |v| = -v = (u a - g) / b */
+
+      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
+      MPN_NORMALIZE (tp, size);
+      if (size == 0)
+       return 0;
+    }
+  else
+    { /* usize < 0 */
+      /* |v| = v = (c - u a) / b = (c + |u| a) / b */
+      mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
+      if (cy)
+       tp[size++] = cy;
+    }
+
+  /* Now divide t / b. There must be no remainder */
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+  ASSERT (size >= bn);
+
+  vn = size + 1 - bn;
+  ASSERT (vn <= n + 1);
+
+  mpn_divexact (vp, tp, size, bp, bn);
+  vn -= (vp[vn-1] == 0);
+
+  return vn;
+}
+
+/* Temporary storage:
+
+   Initial division: Quotient of at most an - n + 1 <= an limbs.
+
+   Storage for u0 and u1: 2(n+1).
+
+   Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
+
+   Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
+
+   When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
+
+   When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
+
+   For the lehmer call after the loop, Let T denote
+   GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
+   u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
+   for u, T+1 for v and 2T + 1 scratch space. In all, 7T + 3 is
+   sufficient for both operations.
+
+*/
+
+/* Optimal choice of p seems difficult. In each iteration the division
+ * of work between hgcd and the updates of u0 and u1 depends on the
+ * current size of the u. It may be desirable to use a different
+ * choice of p in each iteration. Also the input size seems to matter;
+ * choosing p = n / 3 in the first iteration seems to improve
+ * performance slightly for input size just above the threshold, but
+ * degrade performance for larger inputs. */
+#define CHOOSE_P_1(n) ((n) / 2)
+#define CHOOSE_P_2(n) ((n) / 3)
+
+mp_size_t
+mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
+           mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
+{
+  mp_size_t talloc;
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+  mp_size_t ualloc = n + 1;
+
+  mp_size_t un;
+  mp_ptr u0;
+  mp_ptr u1;
+
+  mp_ptr tp;
+
+  TMP_DECL;
+
+  ASSERT (an >= n);
+  ASSERT (n > 0);
+
+  TMP_MARK;
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
+
+  /* For initial division */
+  scratch = an - n + 1;
+  if (scratch > talloc)
+    talloc = scratch;
+
+  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      /* For hgcd loop. */
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p1 = CHOOSE_P_1 (n);
+      mp_size_t p2 = CHOOSE_P_2 (n);
+      mp_size_t min_p = MIN(p1, p2);
+      mp_size_t max_p = MAX(p1, p2);
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
+      hgcd_scratch = mpn_hgcd_itch (n - min_p);
+      update_scratch = max_p + n - 1;
+
+      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (scratch > talloc)
+       talloc = scratch;
+
+      /* Final mpn_gcdext_lehmer_n call. Need space for u and for
+        copies of a and b. */
+      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
+       + 3*GCDEXT_DC_THRESHOLD;
+
+      if (scratch > talloc)
+       talloc = scratch;
+
+      /* Cofactors u0 and u1 */
+      talloc += 2*(n+1);
+    }
+
+  tp = TMP_ALLOC_LIMBS(talloc);
+
+  if (an > n)
+    {
+      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
+
+      if (mpn_zero_p (ap, n))
+       {
+         MPN_COPY (gp, bp, n);
+         *usizep = 0;
+         TMP_FREE;
+         return n;
+       }
+    }
+
+  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
+
+      TMP_FREE;
+      return gn;
+    }
+
+  MPN_ZERO (tp, 2*ualloc);
+  u0 = tp; tp += ualloc;
+  u1 = tp; tp += ualloc;
+
+  {
+    /* For the first hgcd call, there are no u updates, and it makes
+       some sense to use a different choice for p. */
+
+    /* FIXME: We could trim use of temporary storage, since u0 and u1
+       are not used yet. For the hgcd call, we could swap in the u0
+       and u1 pointers for the relevant matrix elements. */
+
+    struct hgcd_matrix M;
+    mp_size_t p = CHOOSE_P_1 (n);
+    mp_size_t nn;
+
+    mpn_hgcd_matrix_init (&M, n - p, tp);
+    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+    if (nn > 0)
+      {
+       ASSERT (M.n <= (n - p - 1)/2);
+       ASSERT (M.n + p <= (p + n - 1) / 2);
+
+       /* Temporary storage 2 (p + M->n) <= p + n - 1 */
+       n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+
+       MPN_COPY (u0, M.p[1][0], M.n);
+       MPN_COPY (u1, M.p[1][1], M.n);
+       un = M.n;
+       while ( (u0[un-1] | u1[un-1] ) == 0)
+         un--;
+      }
+    else
+      {
+       /* mpn_hgcd has failed. Then either one of a or b is very
+          small, or the difference is very small. Perform one
+          subtraction followed by one division. */
+       mp_size_t gn;
+       mp_size_t updated_un = 1;
+
+       u1[0] = 1;
+
+       /* Temporary storage 2n + 1 */
+       n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
+                                   u0, u1, &updated_un, tp, tp + n);
+       if (n == 0)
+         {
+           TMP_FREE;
+           return gn;
+         }
+
+       un = updated_un;
+       ASSERT (un < ualloc);
+      }
+  }
+
+  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = CHOOSE_P_2 (n);
+      mp_size_t nn;
+
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
+      if (nn > 0)
+       {
+         mp_ptr t0;
+
+         t0 = tp + matrix_scratch;
+         ASSERT (M.n <= (n - p - 1)/2);
+         ASSERT (M.n + p <= (p + n - 1) / 2);
+
+         /* Temporary storage 2 (p + M->n) <= p + n - 1 */
+         n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
+
+         /* By the same analysis as for mpn_hgcd_matrix_mul */
+         ASSERT (M.n + un <= ualloc);
+
+         /* FIXME: This copying could be avoided by some swapping of
+          * pointers. May need more temporary storage, though. */
+         MPN_COPY (t0, u0, un);
+
+         /* Temporary storage ualloc */
+         un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
+
+         ASSERT (un < ualloc);
+         ASSERT ( (u0[un-1] | u1[un-1]) > 0);
+       }
+      else
+       {
+         /* mpn_hgcd has failed. Then either one of a or b is very
+            small, or the difference is very small. Perform one
+            subtraction followed by one division. */
+         mp_size_t gn;
+         mp_size_t updated_un = un;
+
+         /* Temporary storage 2n + 1 */
+         n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
+                                     u0, u1, &updated_un, tp, tp + n);
+         if (n == 0)
+           {
+             TMP_FREE;
+             return gn;
+           }
+
+         un = updated_un;
+         ASSERT (un < ualloc);
+       }
+    }
+
+  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
+    {
+      /* Must return the smallest cofactor, +u1 or -u0 */
+      int c;
+
+      MPN_COPY (gp, ap, n);
+
+      MPN_CMP (c, u0, u1, un);
+      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
+        this case we choose the cofactor + 1, corresponding to G = A
+        - k B, rather than -1, corresponding to G = - A + (k+1) B. */
+      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      if (c < 0)
+       {
+         MPN_NORMALIZE (u0, un);
+         MPN_COPY (up, u0, un);
+         *usizep = -un;
+       }
+      else
+       {
+         MPN_NORMALIZE_NOT_ZERO (u1, un);
+         MPN_COPY (up, u1, un);
+         *usizep = un;
+       }
+
+      TMP_FREE;
+      return n;
+    }
+  else if (mpn_zero_p (u0, un))
+    {
+      mp_size_t gn;
+      ASSERT (un == 1);
+      ASSERT (u1[0] == 1);
+
+      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
+      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
+
+      TMP_FREE;
+      return gn;
+    }
+  else
+    {
+      /* We have A = ... a + ... b
+                B =  u0 a +  u1 b
+
+                a = u1  A + ... B
+                b = -u0 A + ... B
+
+        with bounds
+
+          |u0|, |u1| <= B / min(a, b)
+
+        Compute g = u a + v b = (u u1 - v u0) A + (...) B
+        Here, u, v are bounded by
+
+        |u| <= b,
+        |v| <= a
+      */
+
+      mp_size_t u0n;
+      mp_size_t u1n;
+      mp_size_t lehmer_un;
+      mp_size_t lehmer_vn;
+      mp_size_t gn;
+
+      mp_ptr lehmer_up;
+      mp_ptr lehmer_vp;
+      int negate;
+
+      lehmer_up = tp; tp += n;
+
+      /* Call mpn_gcdext_lehmer_n with copies of a and b. */
+      MPN_COPY (tp, ap, n);
+      MPN_COPY (tp + n, bp, n);
+      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
+
+      u0n = un;
+      MPN_NORMALIZE (u0, u0n);
+      if (lehmer_un == 0)
+       {
+         /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
+         MPN_COPY (up, u0, u0n);
+         *usizep = -u0n;
+
+         TMP_FREE;
+         return gn;
+       }
+
+      lehmer_vp = tp;
+      /* Compute v = (g - u a) / b */
+      lehmer_vn = compute_v (lehmer_vp,
+                            ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
+
+      if (lehmer_un > 0)
+       negate = 0;
+      else
+       {
+         lehmer_un = -lehmer_un;
+         negate = 1;
+       }
+
+      u1n = un;
+      MPN_NORMALIZE (u1, u1n);
+
+      /* It's possible that u0 = 1, u1 = 0 */
+      if (u1n == 0)
+       {
+         ASSERT (un == 1);
+         ASSERT (u0[0] == 1);
+
+         /* u1 == 0 ==> u u1 + v u0 = v */
+         MPN_COPY (up, lehmer_vp, lehmer_vn);
+         *usizep = negate ? lehmer_vn : - lehmer_vn;
+
+         TMP_FREE;
+         return gn;
+       }
+
+      ASSERT (lehmer_un + u1n <= ualloc);
+      ASSERT (lehmer_vn + u0n <= ualloc);
+
+      /* Now u0, u1, u are non-zero. We may still have v == 0 */
+
+      /* Compute u u0 */
+      if (lehmer_un <= u1n)
+       /* Should be the common case */
+       mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
+      else
+       mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
+
+      un = u1n + lehmer_un;
+      un -= (up[un - 1] == 0);
+
+      if (lehmer_vn > 0)
+       {
+         mp_limb_t cy;
+
+         /* Overwrites old u1 value */
+         if (lehmer_vn <= u0n)
+           /* Should be the common case */
+           mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
+         else
+           mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
+
+         u1n = u0n + lehmer_vn;
+         u1n -= (u1[u1n - 1] == 0);
+
+         if (u1n <= un)
+           {
+             cy = mpn_add (up, up, un, u1, u1n);
+           }
+         else
+           {
+             cy = mpn_add (up, u1, u1n, up, un);
+             un = u1n;
+           }
+         up[un] = cy;
+         un += (cy != 0);
+
+         ASSERT (un < ualloc);
+       }
+      *usizep = negate ? -un : un;
+
+      TMP_FREE;
+      return gn;
+    }
+}
diff --git a/mpn/generic/gcdext_1.c b/mpn/generic/gcdext_1.c

new file mode 100644 (file)

index 0000000..3bb4d21
--- /dev/null
+++ b/mpn/generic/gcdext_1.c
@@ -0,0 +1,318 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCDEXT_1_USE_BINARY
+#define GCDEXT_1_USE_BINARY 0
+#endif
+
+#ifndef GCDEXT_1_BINARY_METHOD
+#define GCDEXT_1_BINARY_METHOD 2
+#endif
+
+#ifndef USE_ZEROTAB
+#define USE_ZEROTAB 1
+#endif
+
+#if GCDEXT_1_USE_BINARY
+
+#if USE_ZEROTAB
+static unsigned char zerotab[0x40] = {
+  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+#endif
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
+             mp_limb_t u, mp_limb_t v)
+{
+  /* Maintain
+
+     U = t1 u + t0 v
+     V = s1 u + s0 v
+
+     where U, V are the inputs (without any shared power of two),
+     and the matris has determinant ± 2^{shift}.
+  */
+  mp_limb_t s0 = 1;
+  mp_limb_t t0 = 0;
+  mp_limb_t s1 = 0;
+  mp_limb_t t1 = 1;
+  mp_limb_t ug;
+  mp_limb_t vg;
+  mp_limb_t ugh;
+  mp_limb_t vgh;
+  unsigned zero_bits;
+  unsigned shift;
+  unsigned i;
+#if GCDEXT_1_BINARY_METHOD == 2
+  mp_limb_t det_sign;
+#endif
+
+  ASSERT (u > 0);
+  ASSERT (v > 0);
+
+  count_trailing_zeros (zero_bits, u | v);
+  u >>= zero_bits;
+  v >>= zero_bits;
+
+  if ((u & 1) == 0)
+    {
+      count_trailing_zeros (shift, u);
+      u >>= shift;
+      t1 <<= shift;
+    }
+  else if ((v & 1) == 0)
+    {
+      count_trailing_zeros (shift, v);
+      v >>= shift;
+      s0 <<= shift;
+    }
+  else
+    shift = 0;
+
+#if GCDEXT_1_BINARY_METHOD == 1
+  while (u != v)
+    {
+      unsigned count;
+      if (u > v)
+       {
+         u -= v;
+#if USE_ZEROTAB
+         count = zerotab [u & 0x3f];
+         u >>= count;
+         if (UNLIKELY (count == 6))
+           {
+             unsigned c;
+             do
+               {
+                 c = zerotab[u & 0x3f];
+                 u >>= c;
+                 count += c;
+               }
+             while (c == 6);
+           }
+#else
+         count_trailing_zeros (count, u);
+         u >>= count;
+#endif
+         t0 += t1; t1 <<= count;
+         s0 += s1; s1 <<= count;
+       }
+      else
+       {
+         v -= u;
+#if USE_ZEROTAB
+         count = zerotab [v & 0x3f];
+         v >>= count;
+         if (UNLIKELY (count == 6))
+           {
+             unsigned c;
+             do
+               {
+                 c = zerotab[v & 0x3f];
+                 v >>= c;
+                 count += c;
+               }
+             while (c == 6);
+           }
+#else
+         count_trailing_zeros (count, v);
+         v >>= count;
+#endif
+         t1 += t0; t0 <<= count;
+         s1 += s0; s0 <<= count;
+       }
+      shift += count;
+    }
+#else
+# if GCDEXT_1_BINARY_METHOD == 2
+  u >>= 1;
+  v >>= 1;
+
+  det_sign = 0;
+
+  while (u != v)
+    {
+      unsigned count;
+      mp_limb_t d =  u - v;
+      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
+      mp_limb_t sx;
+      mp_limb_t tx;
+
+      /* When v <= u (vgtu == 0), the updates are:
+
+          (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
+          (t1, t0) <-- (t1 << count, t0 + t1)
+
+        and when v > 0, the updates are
+
+          (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
+          (t1, t0) <-- (t0 << count, t0 + t1)
+
+        and similarly for s1, s0
+      */
+
+      /* v <-- min (u, v) */
+      v += (vgtu & d);
+
+      /* u <-- |u - v| */
+      u = (d ^ vgtu) - vgtu;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * d or u, but using d gives more parallelism. */
+#if USE_ZEROTAB
+      count = zerotab[d & 0x3f];
+      if (UNLIKELY (count == 6))
+       {
+         unsigned c = 6;
+         do
+           {
+             d >>= c;
+             c = zerotab[d & 0x3f];
+             count += c;
+           }
+         while (c == 6);
+       }
+#else
+      count_trailing_zeros (count, d);
+#endif
+      det_sign ^= vgtu;
+
+      tx = vgtu & (t0 - t1);
+      sx = vgtu & (s0 - s1);
+      t0 += t1;
+      s0 += s1;
+      t1 += tx;
+      s1 += sx;
+
+      count++;
+      u >>= count;
+      t1 <<= count;
+      s1 <<= count;
+      shift += count;
+    }
+  u = (u << 1) + 1;
+# else /* GCDEXT_1_BINARY_METHOD == 2 */
+#  error Unknown GCDEXT_1_BINARY_METHOD
+# endif
+#endif
+
+  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
+  ug = t0 + t1;
+  vg = s0 + s1;
+
+  ugh = ug/2 + (ug & 1);
+  vgh = vg/2 + (vg & 1);
+
+  /* Now ±2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
+     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
+  for (i = 0; i < shift; i++)
+    {
+      mp_limb_t mask = - ( (s0 | t0) & 1);
+
+      s0 /= 2;
+      t0 /= 2;
+      s0 += mask & vgh;
+      t0 += mask & ugh;
+    }
+  /* FIXME: Try simplifying this condition. */
+  if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
+    {
+      s0 -= vg;
+      t0 -= ug;
+    }
+#if GCDEXT_1_BINARY_METHOD == 2
+  /* Conditional negation. */
+  s0 = (s0 ^ det_sign) - det_sign;
+  t0 = (t0 ^ det_sign) - det_sign;
+#endif
+  *sp = s0;
+  *tp = -t0;
+
+  return u << zero_bits;
+}
+
+#else /* !GCDEXT_1_USE_BINARY */
+
+
+/* FIXME: Takes two single-word limbs. It could be extended to a
+ * function that accepts a bignum for the first input, and only
+ * returns the first co-factor. */
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
+             mp_limb_t a, mp_limb_t b)
+{
+  /* Maintain
+
+     a =  u0 A + v0 B
+     b =  u1 A + v1 B
+
+     where A, B are the original inputs.
+  */
+  mp_limb_signed_t u0 = 1;
+  mp_limb_signed_t v0 = 0;
+  mp_limb_signed_t u1 = 0;
+  mp_limb_signed_t v1 = 1;
+
+  ASSERT (a > 0);
+  ASSERT (b > 0);
+
+  if (a < b)
+    goto divide_by_b;
+
+  for (;;)
+    {
+      mp_limb_t q;
+
+      q = a / b;
+      a -= q * b;
+
+      if (a == 0)
+       {
+         *up = u1;
+         *vp = v1;
+         return b;
+       }
+      u0 -= q * u1;
+      v0 -= q * v1;
+
+    divide_by_b:
+      q = b / a;
+      b -= q * a;
+
+      if (b == 0)
+       {
+         *up = u0;
+         *vp = v0;
+         return a;
+       }
+      u1 -= q * u0;
+      v1 -= q * v0;
+    }
+}
+#endif /* !GCDEXT_1_USE_BINARY */
diff --git a/mpn/generic/gcdext_lehmer.c b/mpn/generic/gcdext_lehmer.c

new file mode 100644 (file)

index 0000000..8599a4f
--- /dev/null
+++ b/mpn/generic/gcdext_lehmer.c
@@ -0,0 +1,209 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
+   multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
+   needed for the division, with most n for the quotient, and n+1 for
+   the product q u0. In all, 4n + 3. */
+
+mp_size_t
+mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
+                    mp_ptr ap, mp_ptr bp, mp_size_t n,
+                    mp_ptr tp)
+{
+  mp_size_t ualloc = n + 1;
+
+  /* Keeps track of the second row of the reduction matrix
+   *
+   *   M = (v0, v1 ; u0, u1)
+   *
+   * which correspond to the first column of the inverse
+   *
+   *   M^{-1} = (u1, -v1; -u0, v0)
+   */
+
+  mp_size_t un;
+  mp_ptr u0;
+  mp_ptr u1;
+  mp_ptr u2;
+
+  MPN_ZERO (tp, 3*ualloc);
+  u0 = tp; tp += ualloc;
+  u1 = tp; tp += ualloc;
+  u2 = tp; tp += ualloc;
+
+  u1[0] = 1; un = 1;
+
+  /* FIXME: Handle n == 2 differently, after the loop? */
+  while (n >= 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+       {
+         ah = ap[n-1]; al = ap[n-2];
+         bh = bp[n-1]; bl = bp[n-2];
+       }
+      else if (n == 2)
+       {
+         /* We use the full inputs without truncation, so we can
+            safely shift left. */
+         int shift;
+
+         count_leading_zeros (shift, mask);
+         ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
+         al = ap[0] << shift;
+         bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
+         bl = bp[0] << shift;
+       }
+      else
+       {
+         int shift;
+
+         count_leading_zeros (shift, mask);
+         ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+         al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+         bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+         bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+       }
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2 (ah, al, bh, bl, &M))
+       {
+         n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
+         MP_PTR_SWAP (ap, tp);
+         un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
+         MP_PTR_SWAP (u0, u2);
+       }
+      else
+       {
+         /* mpn_hgcd2 has failed. Then either one of a or b is very
+            small, or the difference is very small. Perform one
+            subtraction followed by one division. */
+         mp_size_t gn;
+         mp_size_t updated_un = un;
+
+         /* Temporary storage n for the quotient and ualloc for the
+            new cofactor. */
+         n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
+                                     u0, u1, &updated_un, tp, u2);
+         if (n == 0)
+           return gn;
+
+         un = updated_un;
+       }
+    }
+  ASSERT_ALWAYS (ap[0] > 0);
+  ASSERT_ALWAYS (bp[0] > 0);
+
+  if (ap[0] == bp[0])
+    {
+      int c;
+
+      /* Which cofactor to return now? Candidates are +u1 and -u0,
+        depending on which of a and b was most recently reduced,
+        which we don't keep track of. So compare and get the smallest
+        one. */
+
+      gp[0] = ap[0];
+
+      MPN_CMP (c, u0, u1, un);
+      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      if (c < 0)
+       {
+         MPN_NORMALIZE (u0, un);
+         MPN_COPY (up, u0, un);
+         *usize = -un;
+       }
+      else
+       {
+         MPN_NORMALIZE_NOT_ZERO (u1, un);
+         MPN_COPY (up, u1, un);
+         *usize = un;
+       }
+      return 1;
+    }
+  else
+    {
+      mp_limb_t uh, vh;
+      mp_limb_signed_t u;
+      mp_limb_signed_t v;
+      int negate;
+
+      gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
+
+      /* Set up = u u1 - v u0. Keep track of size, un grows by one or
+        two limbs. */
+
+      if (u == 0)
+       {
+         ASSERT (v == 1);
+         MPN_NORMALIZE (u0, un);
+         MPN_COPY (up, u0, un);
+         *usize = -un;
+         return 1;
+       }
+      else if (v == 0)
+       {
+         ASSERT (u == 1);
+         MPN_NORMALIZE (u1, un);
+         MPN_COPY (up, u1, un);
+         *usize = un;
+         return 1;
+       }
+      else if (u > 0)
+       {
+         negate = 0;
+         ASSERT (v < 0);
+         v = -v;
+       }
+      else
+       {
+         negate = 1;
+         ASSERT (v > 0);
+         u = -u;
+       }
+
+      uh = mpn_mul_1 (up, u1, un, u);
+      vh = mpn_addmul_1 (up, u0, un, v);
+
+      if ( (uh | vh) > 0)
+       {
+         uh += vh;
+         up[un++] = uh;
+         if (uh < vh)
+           up[un++] = 1;
+       }
+
+      MPN_NORMALIZE_NOT_ZERO (up, un);
+
+      *usize = negate ? -un : un;
+      return 1;
+    }
+}
diff --git a/mpn/generic/gcdext_subdiv_step.c b/mpn/generic/gcdext_subdiv_step.c

new file mode 100644 (file)

index 0000000..21a3a3b
--- /dev/null
+++ b/mpn/generic/gcdext_subdiv_step.c
@@ -0,0 +1,199 @@
+/* gcdext_subdiv_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+   b is small, or the difference is small. Perform one subtraction
+   followed by one division. If the gcd is found, stores it in gp and
+   *gn, and returns zero. Otherwise, compute the reduced a and b,
+   return the new size, and cofactors. */
+
+/* Temporary storage: Needs n limbs for the quotient, at qp. tp must
+   point to an area large enough for the resulting cofactor, plus one
+   limb extra. All in all, 2N + 1 if N is a bound for both inputs and
+   outputs. */
+mp_size_t
+mpn_gcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr up, mp_size_t *usizep,
+                       mp_ptr ap, mp_ptr bp, mp_size_t n,
+                       mp_ptr u0, mp_ptr u1, mp_size_t *unp,
+                       mp_ptr qp, mp_ptr tp)
+{
+  mp_size_t an, bn, un;
+  mp_size_t qn;
+  mp_size_t u0n;
+
+  int swapped;
+
+  an = bn = n;
+
+  ASSERT (an > 0);
+  ASSERT (ap[an-1] > 0 || bp[an-1] > 0);
+
+  MPN_NORMALIZE (ap, an);
+  MPN_NORMALIZE (bp, bn);
+
+  un = *unp;
+
+  swapped = 0;
+
+  if (UNLIKELY (an == 0))
+    {
+    return_b:
+      MPN_COPY (gp, bp, bn);
+      *gn = bn;
+
+      MPN_NORMALIZE (u0, un);
+      MPN_COPY (up, u0, un);
+
+      *usizep = swapped ? un : -un;
+
+      return 0;
+    }
+  else if (UNLIKELY (bn == 0))
+    {
+      MPN_COPY (gp, ap, an);
+      *gn = an;
+
+      MPN_NORMALIZE (u1, un);
+      MPN_COPY (up, u1, un);
+
+      *usizep = swapped ? -un : un;
+
+      return 0;
+    }
+
+  /* Arrange so that a > b, subtract an -= bn, and maintain
+     normalization. */
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      MP_PTR_SWAP (u0, u1);
+      swapped ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+       {
+         MPN_COPY (gp, ap, an);
+         *gn = an;
+
+         /* Must return the smallest cofactor, +u1 or -u0 */
+         MPN_CMP (c, u0, u1, un);
+         ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+
+         if (c < 0)
+           {
+             MPN_NORMALIZE (u0, un);
+             MPN_COPY (up, u0, un);
+             swapped ^= 1;
+           }
+         else
+           {
+             MPN_NORMALIZE_NOT_ZERO (u1, un);
+             MPN_COPY (up, u1, un);
+           }
+
+         *usizep = swapped ? -un : un;
+         return 0;
+       }
+      else if (c < 0)
+       {
+         MP_PTR_SWAP (ap, bp);
+         MP_PTR_SWAP (u0, u1);
+         swapped ^= 1;
+       }
+    }
+  /* Reduce a -= b, u1 += u0 */
+  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
+  MPN_NORMALIZE (ap, an);
+  ASSERT (an > 0);
+
+  u1[un] = mpn_add_n (u1, u1, u0, un);
+  un += (u1[un] > 0);
+
+  /* Arrange so that a > b, and divide a = q b + r */
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      MP_PTR_SWAP (u0, u1);
+      swapped ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+       goto return_b;
+      else if (c < 0)
+       {
+         MP_PTR_SWAP (ap, bp);
+         MP_PTR_SWAP (u0, u1);
+         swapped ^= 1;
+       }
+    }
+
+  /* Reduce a -= q b, u1 += q u0 */
+  qn = an - bn + 1;
+  mpn_tdiv_qr (qp, ap, 0, ap, an, bp, bn);
+
+  if (mpn_zero_p (ap, bn))
+    goto return_b;
+
+  n = bn;
+
+  /* Update u1 += q u0 */
+  u0n = un;
+  MPN_NORMALIZE (u0, u0n);
+
+  if (u0n > 0)
+    {
+      qn -= (qp[qn - 1] == 0);
+
+      if (qn > u0n)
+       mpn_mul (tp, qp, qn, u0, u0n);
+      else
+       mpn_mul (tp, u0, u0n, qp, qn);
+
+      if (qn + u0n > un)
+       {
+         mp_size_t u1n = un;
+         un = qn + u0n;
+         un -= (tp[un-1] == 0);
+         u1[un] = mpn_add (u1, tp, un, u1, u1n);
+       }
+      else
+       {
+         u1[un] = mpn_add (u1, u1, un, tp, qn + u0n);
+       }
+
+      un += (u1[un] > 0);
+    }
+
+  *unp = un;
+  return n;
+}
diff --git a/mpn/generic/get_d.c b/mpn/generic/get_d.c

new file mode 100644 (file)

index 0000000..fdb6e70
--- /dev/null
+++ b/mpn/generic/get_d.c
@@ -0,0 +1,490 @@
+/* mpn_get_d -- limbs to double conversion.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+#if ! _GMP_IEEE_FLOATS
+/* dummy definition, just to let dead code compile */
+union ieee_double_extract {
+  struct {
+    int manh, manl, sig, exp;
+  } s;
+  double d;
+};
+#endif
+
+/* To force use of the generic C code for testing, put
+   "#define _GMP_IEEE_FLOATS 0" at this point.  */
+
+
+
+/* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
+   rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
+   wrong if that addition overflows.
+
+   The workaround here avoids this bug by ensuring n is not a literal
+   constant.  Note that this is alpha specific.  The offending transformation
+   is/was in alpha.c alpha_emit_conditional_branch() under "We want to use
+   cmpcc/bcc".
+
+   Bizarrely, it turns out this happens also with Cray cc on
+   alphaev5-cray-unicosmk2.0.6.X, and has the same solution.  Don't know why
+   or how.  */
+
+#if HAVE_HOST_CPU_FAMILY_alpha                         \
+  && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4)) \
+      || defined (_CRAY))
+static volatile const long CONST_1024 = 1024;
+static volatile const long CONST_NEG_1023 = -1023;
+static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
+#else
+#define CONST_1024           (1024)
+#define CONST_NEG_1023       (-1023)
+#define CONST_NEG_1022_SUB_53 (-1022 - 53)
+#endif
+
+
+
+/* Return the value {ptr,size}*2^exp, and negative if sign<0.
+   Must have size>=1, and a non-zero high limb ptr[size-1].
+
+   {ptr,size} is truncated towards zero.  This is consistent with other gmp
+   conversions, like mpz_set_f or mpz_set_q, and is easy to implement and
+   test.
+
+   In the past conversions had attempted (imperfectly) to let the hardware
+   float rounding mode take effect, but that gets tricky since multiple
+   roundings need to be avoided, or taken into account, and denorms mean the
+   effective precision of the mantissa is not constant.  (For reference,
+   mpz_get_d on IEEE systems was ok, except it operated on the absolute
+   value.  mpf_get_d and mpq_get_d suffered from multiple roundings and from
+   not always using enough bits to get the rounding right.)
+
+   It's felt that GMP is not primarily concerned with hardware floats, and
+   really isn't enhanced by getting involved with hardware rounding modes
+   (which could even be some weird unknown style), so something unambiguous
+   and straightforward is best.
+
+
+   The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
+   limb and is done with shifts and masks.  The 64-bit case in particular
+   should come out nice and compact.
+
+   The generic code works one bit at a time, which will be quite slow, but
+   should support any binary-based "double" and be safe against any rounding
+   mode.  Note in particular it works on IEEE systems too.
+
+
+   Traps:
+
+   Hardware traps for overflow to infinity, underflow to zero, or
+   unsupported denorms may or may not be taken.  The IEEE code works bitwise
+   and so probably won't trigger them, the generic code works by float
+   operations and so probably will.  This difference might be thought less
+   than ideal, but again its felt straightforward code is better than trying
+   to get intimate with hardware exceptions (of perhaps unknown nature).
+
+
+   Not done:
+
+   mpz_get_d in the past handled size==1 with a cast limb->double.  This
+   might still be worthwhile there (for up to the mantissa many bits), but
+   for mpn_get_d here, the cost of applying "exp" to the resulting exponent
+   would probably use up any benefit a cast may have over bit twiddling.
+   Also, if the exponent is pushed into denorm range then bit twiddling is
+   the only option, to ensure the desired truncation is obtained.
+
+
+   Other:
+
+   For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL
+   to the kernel for values >= 2^63.  This makes it slow, and worse the kernel
+   Linux (what versions?) apparently uses untested code in its trap handling
+   routines, and gets the sign wrong.  We don't use such a limb-to-double
+   cast, neither in the IEEE or generic code.  */
+
+
+double
+mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
+{
+  ASSERT (size >= 0);
+  ASSERT_MPN (up, size);
+  ASSERT (size == 0 || up[size-1] != 0);
+
+  if (size == 0)
+    return 0.0;
+
+  /* Adjust exp to a radix point just above {up,size}, guarding against
+     overflow.  After this exp can of course be reduced to anywhere within
+     the {up,size} region without underflow.  */
+  if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
+               > (unsigned long) (LONG_MAX - exp)))
+    {
+      if (_GMP_IEEE_FLOATS)
+       goto ieee_infinity;
+
+      /* generic */
+      exp = LONG_MAX;
+    }
+  else
+    {
+      exp += GMP_NUMB_BITS * size;
+    }
+
+
+#if 1
+{
+  int lshift, nbits;
+  union ieee_double_extract u;
+  mp_limb_t x, mhi, mlo;
+#if GMP_LIMB_BITS == 64
+  mp_limb_t m;
+  up += size;
+  m = *--up;
+  count_leading_zeros (lshift, m);
+
+  exp -= (lshift - GMP_NAIL_BITS) + 1;
+  m <<= lshift;
+
+  nbits = GMP_LIMB_BITS - lshift;
+
+  if (nbits < 53 && size > 1)
+    {
+      x = *--up;
+      x <<= GMP_NAIL_BITS;
+      x >>= nbits;
+      m |= x;
+      nbits += GMP_NUMB_BITS;
+
+      if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+       {
+         x = *--up;
+         x <<= GMP_NAIL_BITS;
+         x >>= nbits;
+         m |= x;
+         nbits += GMP_NUMB_BITS;
+       }
+    }
+  mhi = m >> (32 + 11);
+  mlo = m >> 11;
+#endif
+#if GMP_LIMB_BITS == 32
+  up += size;
+  x = *--up, size--;
+  count_leading_zeros (lshift, x);
+
+  exp -= (lshift - GMP_NAIL_BITS) + 1;
+  x <<= lshift;
+  mhi = x >> 11;
+
+  if (lshift < 11)             /* FIXME: never true if NUMB < 20 bits */
+    {
+      /* All 20 bits in mhi */
+      mlo = x << 21;
+      /* >= 1 bit in mlo */
+      nbits = GMP_LIMB_BITS - lshift - 21;
+    }
+  else
+    {
+      if (size != 0)
+       {
+         nbits = GMP_LIMB_BITS - lshift;
+
+         x = *--up, size--;
+         x <<= GMP_NAIL_BITS;
+         mhi |= x >> nbits >> 11;
+
+         mlo = x << GMP_LIMB_BITS - nbits - 11;
+         nbits = nbits + 11 - GMP_NAIL_BITS;
+       }
+      else
+       {
+         mlo = 0;
+         goto done;
+       }
+    }
+
+  if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size != 0)
+    {
+      x = *--up, size--;
+      x <<= GMP_NAIL_BITS;
+      x >>= nbits;
+      mlo |= x;
+      nbits += GMP_NUMB_BITS;
+
+      if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size != 0)
+       {
+         x = *--up, size--;
+         x <<= GMP_NAIL_BITS;
+         x >>= nbits;
+         mlo |= x;
+         nbits += GMP_NUMB_BITS;
+
+         if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size != 0)
+           {
+             x = *--up;
+             x <<= GMP_NAIL_BITS;
+             x >>= nbits;
+             mlo |= x;
+             nbits += GMP_NUMB_BITS;
+           }
+       }
+    }
+
+ done:;
+
+#endif
+  {
+    if (UNLIKELY (exp >= CONST_1024))
+      {
+       /* overflow, return infinity */
+      ieee_infinity:
+       mhi = 0;
+       mlo = 0;
+       exp = 1024;
+      }
+    else if (UNLIKELY (exp <= CONST_NEG_1023))
+      {
+       int rshift;
+
+       if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+         return 0.0;    /* denorm underflows to zero */
+
+       rshift = -1022 - exp;
+       ASSERT (rshift > 0 && rshift < 53);
+#if GMP_LIMB_BITS > 53
+       mlo >>= rshift;
+       mhi = mlo >> 32;
+#else
+       if (rshift >= 32)
+         {
+           mlo = mhi;
+           mhi = 0;
+           rshift -= 32;
+         }
+       lshift = GMP_LIMB_BITS - rshift;
+       mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+       mhi >>= rshift;
+#endif
+       exp = -1023;
+      }
+  }
+  u.s.manh = mhi;
+  u.s.manl = mlo;
+  u.s.exp = exp + 1023;
+  u.s.sig = (sign < 0);
+  return u.d;
+}
+#else
+
+
+#define ONE_LIMB    (GMP_LIMB_BITS == 64 && 2*GMP_NUMB_BITS >= 53)
+#define TWO_LIMBS   (GMP_LIMB_BITS == 32 && 3*GMP_NUMB_BITS >= 53)
+
+  if (_GMP_IEEE_FLOATS && (ONE_LIMB || TWO_LIMBS))
+    {
+      union ieee_double_extract         u;
+      mp_limb_t         m0, m1, m2, rmask;
+      int       lshift, rshift;
+
+      m0 = up[size-1];                     /* high limb */
+      m1 = (size >= 2 ? up[size-2] : 0);   /* second highest limb */
+      count_leading_zeros (lshift, m0);
+
+      /* relative to just under high non-zero bit */
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+
+      if (ONE_LIMB)
+       {
+         /* lshift to have high of m0 non-zero, and collapse nails */
+         rshift = GMP_LIMB_BITS - lshift;
+         m1 <<= GMP_NAIL_BITS;
+         rmask = GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX;
+         m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
+
+         /* rshift back to have bit 53 of m0 the high non-zero */
+         m0 >>= 11;
+       }
+      else /* TWO_LIMBS */
+       {
+         m2 = (size >= 3 ? up[size-3] : 0);  /* third highest limb */
+
+         /* collapse nails from m1 and m2 */
+#if GMP_NAIL_BITS != 0
+         m1 = (m1 << GMP_NAIL_BITS) | (m2 >> (GMP_NUMB_BITS-GMP_NAIL_BITS));
+         m2 <<= 2*GMP_NAIL_BITS;
+#endif
+
+         /* lshift to have high of m0:m1 non-zero, collapse nails from m0 */
+         rshift = GMP_LIMB_BITS - lshift;
+         rmask = (GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX);
+         m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
+         m1 = (m1 << lshift) | ((m2 >> rshift) & rmask);
+
+         /* rshift back to have bit 53 of m0:m1 the high non-zero */
+         m1 = (m1 >> 11) | (m0 << (GMP_LIMB_BITS-11));
+         m0 >>= 11;
+       }
+
+      if (UNLIKELY (exp >= CONST_1024))
+       {
+         /* overflow, return infinity */
+       ieee_infinity:
+         m0 = 0;
+         m1 = 0;
+         exp = 1024;
+       }
+      else if (UNLIKELY (exp <= CONST_NEG_1023))
+       {
+         if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+           return 0.0;  /* denorm underflows to zero */
+
+         rshift = -1022 - exp;
+         ASSERT (rshift > 0 && rshift < 53);
+         if (ONE_LIMB)
+           {
+             m0 >>= rshift;
+           }
+         else /* TWO_LIMBS */
+           {
+             if (rshift >= 32)
+               {
+                 m1 = m0;
+                 m0 = 0;
+                 rshift -= 32;
+               }
+             lshift = GMP_LIMB_BITS - rshift;
+             m1 = (m1 >> rshift) | (rshift == 0 ? 0 : m0 << lshift);
+             m0 >>= rshift;
+           }
+         exp = -1023;
+       }
+
+      if (ONE_LIMB)
+       {
+#if GMP_LIMB_BITS > 32 /* avoid compiler warning about big shift */
+         u.s.manh = m0 >> 32;
+#endif
+         u.s.manl = m0;
+       }
+      else /* TWO_LIMBS */
+       {
+         u.s.manh = m0;
+         u.s.manl = m1;
+       }
+
+      u.s.exp = exp + 1023;
+      u.s.sig = (sign < 0);
+      return u.d;
+    }
+  else
+    {
+      /* Non-IEEE or strange limb size, do something generic. */
+
+      mp_size_t             i;
+      mp_limb_t             limb, bit;
+      int           shift;
+      double        base, factor, prev_factor, d, new_d, diff;
+
+      /* "limb" is "up[i]" the limb being examined, "bit" is a mask for the
+        bit being examined, initially the highest non-zero bit.  */
+      i = size-1;
+      limb = up[i];
+      count_leading_zeros (shift, limb);
+      bit = GMP_LIMB_HIGHBIT >> shift;
+
+      /* relative to just under high non-zero bit */
+      exp -= (shift - GMP_NAIL_BITS) + 1;
+
+      /* Power up "factor" to 2^exp, being the value of the "bit" in "limb"
+        being examined.  */
+      base = (exp >= 0 ? 2.0 : 0.5);
+      exp = ABS (exp);
+      factor = 1.0;
+      for (;;)
+       {
+         if (exp & 1)
+           {
+             prev_factor = factor;
+             factor *= base;
+             FORCE_DOUBLE (factor);
+             if (factor == 0.0)
+               return 0.0;     /* underflow */
+             if (factor == prev_factor)
+               {
+                 d = factor;     /* overflow, apparent infinity */
+                 goto generic_done;
+               }
+           }
+         exp >>= 1;
+         if (exp == 0)
+           break;
+         base *= base;
+       }
+
+      /* Add a "factor" for each non-zero bit, working from high to low.
+        Stop if any rounding occurs, hence implementing a truncation.
+
+        Note no attention is paid to DBL_MANT_DIG, since the effective
+        number of bits in the mantissa isn't constant when in denorm range.
+        We also encountered an ARM system with apparently somewhat doubtful
+        software floats where DBL_MANT_DIG claimed 53 bits but only 32
+        actually worked.  */
+
+      d = factor;  /* high bit */
+      for (;;)
+       {
+         factor *= 0.5;  /* next bit */
+         bit >>= 1;
+         if (bit == 0)
+           {
+             /* next limb, if any */
+             i--;
+             if (i < 0)
+               break;
+             limb = up[i];
+             bit = GMP_NUMB_HIGHBIT;
+           }
+
+         if (bit & limb)
+           {
+             new_d = d + factor;
+             FORCE_DOUBLE (new_d);
+             diff = new_d - d;
+             if (diff != factor)
+               break;   /* rounding occured, stop now */
+             d = new_d;
+           }
+       }
+
+    generic_done:
+      return (sign >= 0 ? d : -d);
+    }
+#endif
+}
diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c

new file mode 100644 (file)

index 0000000..ac4fb52
--- /dev/null
+++ b/mpn/generic/get_str.c
@@ -0,0 +1,538 @@
+/* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE
+   INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
+   FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
+   GNU MP RELEASE.
+
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
+2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Conversion of U {up,un} to a string in base b.  Internally, we convert to
+   base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
+
+  A) Divide U repeatedly by B, generating a quotient and remainder, until the
+     quotient becomes zero.  The remainders hold the converted digits.  Digits
+     come out from right to left.  (Used in mpn_sb_get_str.)
+
+  B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
+     Then develop digits by multiplying the fraction repeatedly by b.  Digits
+     come out from left to right.  (Currently not used herein, except for in
+     code for converting single limbs to individual digits.)
+
+  C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
+     sqrt(U).  Then divide U by B^s, generating quotient and remainder.
+     Recursively convert the quotient, then the remainder, using the
+     precomputed powers.  Digits come out from left to right.  (Used in
+     mpn_dc_get_str.)
+
+  When using algorithm C, algorithm B might be suitable for basecase code,
+  since the required b^g power will be readily accessible.
+
+  Optimization ideas:
+  1. The recursive function of (C) could use less temporary memory.  The powtab
+     allocation could be trimmed with some computation, and the tmp area could
+     be reduced, or perhaps eliminated if up is reused for both quotient and
+     remainder (it is currently used just for remainder).
+  2. Store the powers of (C) in normalized form, with the normalization count.
+     Quotients will usually need to be left-shifted before each divide, and
+     remainders will either need to be left-shifted of right-shifted.
+  3. In the code for developing digits from a single limb, we could avoid using
+     a full umul_ppmm except for the first (or first few) digits, provided base
+     is even.  Subsequent digits can be developed using plain multiplication.
+     (This saves on register-starved machines (read x86) and on all machines
+     that generate the upper product half using a separate instruction (alpha,
+     powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
+  4. Separate mpn_dc_get_str basecase code from code for small conversions. The
+     former code will have the exact right power readily available in the
+     powtab parameter for dividing the current number into a fraction.  Convert
+     that using algorithm B.
+  5. Completely avoid division.  Compute the inverses of the powers now in
+     powtab instead of the actual powers.
+  6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
+     about 30% (1-log(5)/log(10)).
+
+  Basic structure of (C):
+    mpn_get_str:
+      if POW2_P (n)
+       ...
+      else
+       if (un < GET_STR_PRECOMPUTE_THRESHOLD)
+         mpn_sb_get_str (str, base, up, un);
+       else
+         precompute_power_tables
+         mpn_dc_get_str
+
+    mpn_dc_get_str:
+       mpn_tdiv_qr
+       if (qn < GET_STR_DC_THRESHOLD)
+         mpn_sb_get_str
+       else
+         mpn_dc_get_str
+       if (rn < GET_STR_DC_THRESHOLD)
+         mpn_sb_get_str
+       else
+         mpn_dc_get_str
+
+
+  The reason for the two threshold values is the cost of
+  precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
+  larger than GET_STR_PRECOMPUTE_THRESHOLD.  */
+
+
+/* The x86s and m68020 have a quotient and remainder "div" instruction and
+   gcc recognises an adjacent "/" and "%" can be combined using that.
+   Elsewhere "/" and "%" are either separate instructions, or separate
+   libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
+   A multiply and subtract should be faster than a "%" in those cases.  */
+#if HAVE_HOST_CPU_FAMILY_x86            \
+  || HAVE_HOST_CPU_m68020               \
+  || HAVE_HOST_CPU_m68030               \
+  || HAVE_HOST_CPU_m68040               \
+  || HAVE_HOST_CPU_m68060               \
+  || HAVE_HOST_CPU_m68360 /* CPU32 */
+#define udiv_qrnd_unnorm(q,r,n,d)       \
+  do {                                  \
+    mp_limb_t  __q = (n) / (d);         \
+    mp_limb_t  __r = (n) % (d);         \
+    (q) = __q;                          \
+    (r) = __r;                          \
+  } while (0)
+#else
+#define udiv_qrnd_unnorm(q,r,n,d)       \
+  do {                                  \
+    mp_limb_t  __q = (n) / (d);         \
+    mp_limb_t  __r = (n) - __q*(d);     \
+    (q) = __q;                          \
+    (r) = __r;                          \
+  } while (0)
+#endif
+
+\f
+/* Convert {up,un} to a string in base base, and put the result in str.
+   Generate len characters, possibly padding with zeros to the left.  If len is
+   zero, generate as many characters as required.  Return a pointer immediately
+   after the last digit of the result string.  Complexity is O(un^2); intended
+   for small conversions.  */
+static unsigned char *
+mpn_sb_get_str (unsigned char *str, size_t len,
+               mp_ptr up, mp_size_t un, int base)
+{
+  mp_limb_t rl, ul;
+  unsigned char *s;
+  size_t l;
+  /* Allocate memory for largest possible string, given that we only get here
+     for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
+     base is 3.  7/11 is an approximation to 1/log2(3).  */
+#if TUNE_PROGRAM_BUILD
+#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
+#else
+#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
+#endif
+  unsigned char buf[BUF_ALLOC];
+#if TUNE_PROGRAM_BUILD
+  mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
+#else
+  mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
+#endif
+
+  if (base == 10)
+    {
+      /* Special case code for base==10 so that the compiler has a chance to
+        optimize things.  */
+
+      MPN_COPY (rp + 1, up, un);
+
+      s = buf + BUF_ALLOC;
+      while (un > 1)
+       {
+         int i;
+         mp_limb_t frac, digit;
+         MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+                                        MP_BASES_BIG_BASE_10,
+                                        MP_BASES_BIG_BASE_INVERTED_10,
+                                        MP_BASES_NORMALIZATION_STEPS_10);
+         un -= rp[un] == 0;
+         frac = (rp[0] + 1) << GMP_NAIL_BITS;
+         s -= MP_BASES_CHARS_PER_LIMB_10;
+#if HAVE_HOST_CPU_FAMILY_x86
+         /* The code below turns out to be a bit slower for x86 using gcc.
+            Use plain code.  */
+         i = MP_BASES_CHARS_PER_LIMB_10;
+         do
+           {
+             umul_ppmm (digit, frac, frac, 10);
+             *s++ = digit;
+           }
+         while (--i);
+#else
+         /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
+            After a few umul_ppmm, we will have accumulated enough low zeros
+            to use a plain multiply.  */
+         if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
+           {
+             umul_ppmm (digit, frac, frac, 10);
+             *s++ = digit;
+           }
+         if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
+           {
+             umul_ppmm (digit, frac, frac, 10);
+             *s++ = digit;
+           }
+         if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
+           {
+             umul_ppmm (digit, frac, frac, 10);
+             *s++ = digit;
+           }
+         if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
+           {
+             umul_ppmm (digit, frac, frac, 10);
+             *s++ = digit;
+           }
+         i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
+                                            ? (4-MP_BASES_NORMALIZATION_STEPS_10)
+                                            : 0));
+         frac = (frac + 0xf) >> 4;
+         do
+           {
+             frac *= 10;
+             digit = frac >> (GMP_LIMB_BITS - 4);
+             *s++ = digit;
+             frac &= (~(mp_limb_t) 0) >> 4;
+           }
+         while (--i);
+#endif
+         s -= MP_BASES_CHARS_PER_LIMB_10;
+       }
+
+      ul = rp[1];
+      while (ul != 0)
+       {
+         udiv_qrnd_unnorm (ul, rl, ul, 10);
+         *--s = rl;
+       }
+    }
+  else /* not base 10 */
+    {
+      unsigned chars_per_limb;
+      mp_limb_t big_base, big_base_inverted;
+      unsigned normalization_steps;
+
+      chars_per_limb = mp_bases[base].chars_per_limb;
+      big_base = mp_bases[base].big_base;
+      big_base_inverted = mp_bases[base].big_base_inverted;
+      count_leading_zeros (normalization_steps, big_base);
+
+      MPN_COPY (rp + 1, up, un);
+
+      s = buf + BUF_ALLOC;
+      while (un > 1)
+       {
+         int i;
+         mp_limb_t frac;
+         MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
+                                        big_base, big_base_inverted,
+                                        normalization_steps);
+         un -= rp[un] == 0;
+         frac = (rp[0] + 1) << GMP_NAIL_BITS;
+         s -= chars_per_limb;
+         i = chars_per_limb;
+         do
+           {
+             mp_limb_t digit;
+             umul_ppmm (digit, frac, frac, base);
+             *s++ = digit;
+           }
+         while (--i);
+         s -= chars_per_limb;
+       }
+
+      ul = rp[1];
+      while (ul != 0)
+       {
+         udiv_qrnd_unnorm (ul, rl, ul, base);
+         *--s = rl;
+       }
+    }
+
+  l = buf + BUF_ALLOC - s;
+  while (l < len)
+    {
+      *str++ = 0;
+      len--;
+    }
+  while (l != 0)
+    {
+      *str++ = *s++;
+      l--;
+    }
+  return str;
+}
+
+\f
+/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
+   the string in STR.  Generate LEN characters, possibly padding with zeros to
+   the left.  If LEN is zero, generate as many characters as required.
+   Return a pointer immediately after the last digit of the result string.
+   This uses divide-and-conquer and is intended for large conversions.  */
+static unsigned char *
+mpn_dc_get_str (unsigned char *str, size_t len,
+               mp_ptr up, mp_size_t un,
+               const powers_t *powtab, mp_ptr tmp)
+{
+  if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
+    {
+      if (un != 0)
+       str = mpn_sb_get_str (str, len, up, un, powtab->base);
+      else
+       {
+         while (len != 0)
+           {
+             *str++ = 0;
+             len--;
+           }
+       }
+    }
+  else
+    {
+      mp_ptr pwp, qp, rp;
+      mp_size_t pwn, qn;
+      mp_size_t sn;
+
+      pwp = powtab->p;
+      pwn = powtab->n;
+      sn = powtab->shift;
+
+      if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
+       {
+         str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
+       }
+      else
+       {
+         qp = tmp;             /* (un - pwn + 1) limbs for qp */
+         rp = up;              /* pwn limbs for rp; overwrite up area */
+
+         mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
+         qn = un - sn - pwn; qn += qp[qn] != 0;                /* quotient size */
+
+         ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
+
+         if (len != 0)
+           len = len - powtab->digits_in_base;
+
+         str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
+         str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
+       }
+    }
+  return str;
+}
+
+\f
+/* There are no leading zeros on the digits generated at str, but that's not
+   currently a documented feature.  */
+
+size_t
+mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
+{
+  mp_ptr powtab_mem, powtab_mem_ptr;
+  mp_limb_t big_base;
+  size_t digits_in_base;
+  powers_t powtab[GMP_LIMB_BITS];
+  int pi;
+  mp_size_t n;
+  mp_ptr p, t;
+  size_t out_len;
+  mp_ptr tmp;
+  TMP_DECL;
+
+  /* Special case zero, as the code below doesn't handle it.  */
+  if (un == 0)
+    {
+      str[0] = 0;
+      return 1;
+    }
+
+  if (POW2_P (base))
+    {
+      /* The base is a power of 2.  Convert from most significant end.  */
+      mp_limb_t n1, n0;
+      int bits_per_digit = mp_bases[base].big_base;
+      int cnt;
+      int bit_pos;
+      mp_size_t i;
+      unsigned char *s = str;
+      mp_bitcnt_t bits;
+
+      n1 = up[un - 1];
+      count_leading_zeros (cnt, n1);
+
+      /* BIT_POS should be R when input ends in least significant nibble,
+        R + bits_per_digit * n when input ends in nth least significant
+        nibble. */
+
+      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
+      cnt = bits % bits_per_digit;
+      if (cnt != 0)
+       bits += bits_per_digit - cnt;
+      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
+
+      /* Fast loop for bit output.  */
+      i = un - 1;
+      for (;;)
+       {
+         bit_pos -= bits_per_digit;
+         while (bit_pos >= 0)
+           {
+             *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+             bit_pos -= bits_per_digit;
+           }
+         i--;
+         if (i < 0)
+           break;
+         n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+         n1 = up[i];
+         bit_pos += GMP_NUMB_BITS;
+         *s++ = n0 | (n1 >> bit_pos);
+       }
+
+      return s - str;
+    }
+
+  /* General case.  The base is not a power of 2.  */
+
+  if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
+
+  TMP_MARK;
+
+  /* Allocate one large block for the powers of big_base.  */
+  powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));
+  powtab_mem_ptr = powtab_mem;
+
+  /* Compute a table of powers, were the largest power is >= sqrt(U).  */
+
+  big_base = mp_bases[base].big_base;
+  digits_in_base = mp_bases[base].chars_per_limb;
+
+  {
+    mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
+    mp_limb_t cy;
+    mp_size_t shift;
+
+    n_pows = 0;
+    xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
+    for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
+      {
+       exptab[n_pows] = pn;
+       n_pows++;
+      }
+    exptab[n_pows] = 1;
+
+    powtab[0].p = &big_base;
+    powtab[0].n = 1;
+    powtab[0].digits_in_base = digits_in_base;
+    powtab[0].base = base;
+    powtab[0].shift = 0;
+
+    powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
+    powtab[1].p[0] = big_base;
+    powtab[1].n = 1;
+    powtab[1].digits_in_base = digits_in_base;
+    powtab[1].base = base;
+    powtab[1].shift = 0;
+
+    n = 1;
+    p = &big_base;
+    bexp = 1;
+    shift = 0;
+    for (pi = 2; pi < n_pows; pi++)
+      {
+       t = powtab_mem_ptr;
+       powtab_mem_ptr += 2 * n + 2;
+
+       ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
+
+       mpn_sqr (t, p, n);
+
+       digits_in_base *= 2;
+       n *= 2;  n -= t[n - 1] == 0;
+       bexp *= 2;
+
+       if (bexp + 1 < exptab[n_pows - pi])
+         {
+           digits_in_base += mp_bases[base].chars_per_limb;
+           cy = mpn_mul_1 (t, t, n, big_base);
+           t[n] = cy;
+           n += cy != 0;
+           bexp += 1;
+         }
+       shift *= 2;
+       /* Strip low zero limbs.  */
+       while (t[0] == 0)
+         {
+           t++;
+           n--;
+           shift++;
+         }
+       p = t;
+       powtab[pi].p = p;
+       powtab[pi].n = n;
+       powtab[pi].digits_in_base = digits_in_base;
+       powtab[pi].base = base;
+       powtab[pi].shift = shift;
+      }
+
+    for (pi = 1; pi < n_pows; pi++)
+      {
+       t = powtab[pi].p;
+       n = powtab[pi].n;
+       cy = mpn_mul_1 (t, t, n, big_base);
+       t[n] = cy;
+       n += cy != 0;
+       if (t[0] == 0)
+         {
+           powtab[pi].p = t + 1;
+           n--;
+           powtab[pi].shift++;
+         }
+       powtab[pi].n = n;
+       powtab[pi].digits_in_base += mp_bases[base].chars_per_limb;
+      }
+
+#if 0
+    { int i;
+      printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
+      for (i = 0; i < n_pows; i++)
+       printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);
+    }
+#endif
+  }
+
+  /* Using our precomputed powers, now in powtab[], convert our number.  */
+  tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
+  out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
+  TMP_FREE;
+
+  return out_len;
+}
diff --git a/mpn/generic/gmp-mparam.h b/mpn/generic/gmp-mparam.h

new file mode 100644 (file)

index 0000000..aab5fa5
--- /dev/null
+++ b/mpn/generic/gmp-mparam.h
@@ -0,0 +1,22 @@
+/* Generic C gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Values for GMP_LIMB_BITS etc will be determined by ./configure and put
+   in config.h. */
diff --git a/mpn/generic/hgcd.c b/mpn/generic/hgcd.c

new file mode 100644 (file)

index 0000000..709f880
--- /dev/null
+++ b/mpn/generic/hgcd.c
@@ -0,0 +1,643 @@
+/* hgcd.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+   - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+  mp_size_t s = (n+1)/2 + 1;
+  M->alloc = s;
+  M->n = 1;
+  MPN_ZERO (p, 4 * s);
+  M->p[0][0] = p;
+  M->p[0][1] = p + s;
+  M->p[1][0] = p + 2 * s;
+  M->p[1][1] = p + 3 * s;
+
+  M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Updated column COL, adding in column (1-COL). */
+static void
+hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
+{
+  mp_limb_t c0, c1;
+  ASSERT (col < 2);
+
+  c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
+  c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
+
+  M->p[0][col][M->n] = c0;
+  M->p[1][col][M->n] = c1;
+
+  M->n += (c0 | c1) != 0;
+  ASSERT (M->n < M->alloc);
+}
+
+/* Updated column COL, adding in column Q * (1-COL). Temporary
+ * storage: qn + n <= M->alloc, where n is the size of the largest
+ * element in column 1 - COL. */
+static void
+hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+                     unsigned col, mp_ptr tp)
+{
+  ASSERT (col < 2);
+
+  if (qn == 1)
+    {
+      mp_limb_t q = qp[0];
+      mp_limb_t c0, c1;
+
+      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+      M->p[0][col][M->n] = c0;
+      M->p[1][col][M->n] = c1;
+
+      M->n += (c0 | c1) != 0;
+    }
+  else
+    {
+      unsigned row;
+
+      /* Carries for the unlikely case that we get both high words
+        from the multiplication and carries from the addition. */
+      mp_limb_t c[2];
+      mp_size_t n;
+
+      /* The matrix will not necessarily grow in size by qn, so we
+        need normalization in order not to overflow M. */
+
+      for (n = M->n; n + qn > M->n; n--)
+       {
+         ASSERT (n > 0);
+         if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+           break;
+       }
+
+      ASSERT (qn + n <= M->alloc);
+
+      for (row = 0; row < 2; row++)
+       {
+         if (qn <= n)
+           mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+         else
+           mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+         ASSERT (n + qn >= M->n);
+         c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+       }
+      if (c[0] | c[1])
+       {
+         M->n = n + qn + 1;
+         M->p[0][col][M->n - 1] = c[0];
+         M->p[1][col][M->n - 1] = c[1];
+       }
+      else
+       {
+         n += qn;
+         n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+         if (n > M->n)
+           M->n = n;
+       }
+    }
+
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+   temporary space M->n */
+static void
+hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+                  mp_ptr tp)
+{
+  mp_size_t n0, n1;
+
+  /* Could avoid copy by some swapping of pointers. */
+  MPN_COPY (tp, M->p[0][0], M->n);
+  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+  MPN_COPY (tp, M->p[1][0], M->n);
+  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+  /* Depends on zero initialization */
+  M->n = MAX(n0, n1);
+  ASSERT (M->n < M->alloc);
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   resulting size of $.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+   < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+          struct hgcd_matrix *M, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+  mp_size_t an, bn, qn;
+  int col;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+       goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+    {
+      /* Multiply M <- M * M1 */
+      hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+  /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
+     bh was too small, or ah, bh were (almost) equal. Perform one
+     subtraction step (for possible cancellation of high limbs),
+     followed by one division. */
+
+  /* Since we must ensure that #(a-b) > s, we handle cancellation of
+     high limbs explicitly up front. (FIXME: Or is it better to just
+     subtract, normalize, and use an addition to undo if it turns out
+     the the difference is too small?) */
+  for (an = n; an > s; an--)
+    if (ap[an-1] != bp[an-1])
+      break;
+
+  if (an == s)
+    return 0;
+
+  /* Maintain a > b. When needed, swap a and b, and let col keep track
+     of how to update M. */
+  if (ap[an-1] > bp[an-1])
+    {
+      /* a is largest. In the subtraction step, we need to update
+        column 1 of M */
+      col = 1;
+    }
+  else
+    {
+      MP_PTR_SWAP (ap, bp);
+      col = 0;
+    }
+
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+  if (bn <= s)
+    return 0;
+
+  /* We have #a, #b > s. When is it possible that #(a-b) < s? For
+     cancellation to happen, the numbers must be of the form
+
+       a = x + 1, 0,            ..., 0,            al
+       b = x    , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
+
+     where al, bl denotes the least significant k limbs. If al < bl,
+     then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
+     then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
+
+  if (ap[an-1] == bp[an-1] + 1)
+    {
+      mp_size_t k;
+      int c;
+      for (k = an-1; k > s; k--)
+       if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
+         break;
+
+      MPN_CMP (c, ap, bp, k);
+      if (c < 0)
+       {
+         mp_limb_t cy;
+
+         /* The limbs from k and up are cancelled. */
+         if (k == s)
+           return 0;
+         cy = mpn_sub_n (ap, ap, bp, k);
+         ASSERT (cy == 1);
+         an = k;
+       }
+      else
+       {
+         ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
+         ap[k] = 1;
+         an = k + 1;
+       }
+    }
+  else
+    ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
+
+  ASSERT (an > s);
+  ASSERT (ap[an-1] > 0);
+  ASSERT (bn > s);
+  ASSERT (bp[bn-1] > 0);
+
+  hgcd_matrix_update_1 (M, col);
+
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      col ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (c < 0)
+       {
+         MP_PTR_SWAP (ap, bp);
+         col ^= 1;
+       }
+    }
+
+  /* Divide a / b. */
+  qn = an + 1 - bn;
+
+  /* FIXME: We could use an approximate division, that may return a
+     too small quotient, and only guarantee that the size of r is
+     almost the size of b. FIXME: Let ap and remainder overlap. */
+  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
+  qn -= (tp[qn -1] == 0);
+
+  /* Normalize remainder */
+  an = bn;
+  for ( ; an > s; an--)
+    if (ap[an-1] > 0)
+      break;
+
+  if (an <= s)
+    {
+      /* Quotient is too large */
+      mp_limb_t cy;
+
+      cy = mpn_add (ap, bp, bn, ap, an);
+
+      if (cy > 0)
+       {
+         ASSERT (bn < n);
+         ap[bn] = cy;
+         bp[bn] = 0;
+         bn++;
+       }
+
+      MPN_DECR_U (tp, qn, 1);
+      qn -= (tp[qn-1] == 0);
+    }
+
+  if (qn > 0)
+    hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
+
+  return bn;
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+mp_size_t
+mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
+                struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+  mp_size_t nn;
+
+  ASSERT (n > s);
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+  nn = hgcd_step (n, ap, bp, s, M, tp);
+  if (!nn)
+    return 0;
+
+  for (;;)
+    {
+      n = nn;
+      ASSERT (n > s);
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn )
+       return n;
+    }
+}
+
+/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
+   of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+                    mp_ptr tp)
+{
+  mp_size_t n;
+
+  /* About the new size of M:s elements. Since M1's diagonal elements
+     are > 0, no element can decrease. The new elements are of size
+     M->n + M1->n, one limb more or less. The computation of the
+     matrix product produces elements of size M->n + M1->n + 1. But
+     the true size, after normalization, may be three limbs smaller.
+
+     The reason that the product has normalized size >= M->n + M1->n -
+     2 is subtle. It depends on the fact that M and M1 can be factored
+     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
+     M ending with a large power and M1 starting with a large power of
+     the same matrix. */
+
+  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+     multiplication range, this function could be sped up quite a lot
+     using invariance. */
+  ASSERT (M->n + M1->n < M->alloc);
+
+  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+          | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+          | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+                   M->p[1][0], M->p[1][1], M->n,
+                   M1->p[0][0], M1->p[0][1],
+                   M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+  /* Index of last potentially non-zero limb, size is one greater. */
+  n = M->n + M1->n;
+
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+  M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+   Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
+                       mp_size_t n, mp_ptr ap, mp_ptr bp,
+                       mp_size_t p, mp_ptr tp)
+{
+  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+     = (r11 a - r01 b; - r10 a + r00 b */
+
+  mp_ptr t0 = tp;
+  mp_ptr t1 = tp + p + M->n;
+  mp_limb_t ah, bh;
+  mp_limb_t cy;
+
+  ASSERT (p + M->n  < n);
+
+  /* First compute the two values depending on a, before overwriting a */
+
+  if (M->n >= p)
+    {
+      mpn_mul (t0, M->p[1][1], M->n, ap, p);
+      mpn_mul (t1, M->p[1][0], M->n, ap, p);
+    }
+  else
+    {
+      mpn_mul (t0, ap, p, M->p[1][1], M->n);
+      mpn_mul (t1, ap, p, M->p[1][0], M->n);
+    }
+
+  /* Update a */
+  MPN_COPY (ap, t0, p);
+  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][1], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+  cy = mpn_sub (ap, ap, n, t0, p + M->n);
+  ASSERT (cy <= ah);
+  ah -= cy;
+
+  /* Update b */
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][0], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+  MPN_COPY (bp, t0, p);
+  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+  cy = mpn_sub (bp, bp, n, t1, p + M->n);
+  ASSERT (cy <= bh);
+  bh -= cy;
+
+  if (ah > 0 || bh > 0)
+    {
+      ap[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* The subtraction can reduce the size by at most one limb. */
+      if (ap[n-1] == 0 && bp[n-1] == 0)
+       n--;
+    }
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+  return n;
+}
+
+/* Size analysis for hgcd:
+
+   For the recursive calls, we have n1 <= ceil(n / 2). Then the
+   storage need is determined by the storage for the recursive call
+   computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
+   (after this, the storage needed for M1 can be recycled).
+
+   Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
+   = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
+   and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
+   4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
+
+   For the recursive call, we need S(n1) = S(ceil(n/2)).
+
+   S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
+       <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
+       <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
+       <= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
+*/
+
+mp_size_t
+mpn_hgcd_itch (mp_size_t n)
+{
+  unsigned k;
+  int count;
+  mp_size_t nscaled;
+
+  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+    return MPN_HGCD_LEHMER_ITCH (n);
+
+  /* Get the recursion depth. */
+  nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
+  count_leading_zeros (count, nscaled);
+  k = GMP_LIMB_BITS - count;
+
+  return 20 * ((n+3) / 4) + 22 * k
+    + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+
+mp_size_t
+mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
+         struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+  mp_size_t n2 = (3*n)/4 + 1;
+
+  mp_size_t p, nn;
+  int success = 0;
+
+  if (n <= s)
+    /* Happens when n <= 2, a fairly uninteresting case but exercised
+       by the random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+    return mpn_hgcd_lehmer (ap, bp, n, M, tp);
+
+  p = n/2;
+  nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+  if (nn > 0)
+    {
+      /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+        = 2 (n - 1) */
+      n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+      success = 1;
+    }
+  while (n > n2)
+    {
+      /* Needs n + 1 storage */
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn)
+       return success ? n : 0;
+      n = nn;
+      success = 1;
+    }
+
+  if (n > s + 2)
+    {
+      struct hgcd_matrix M1;
+      mp_size_t scratch;
+
+      p = 2*s - n + 1;
+      scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+      mpn_hgcd_matrix_init(&M1, n - p, tp);
+      nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+      if (nn > 0)
+       {
+         /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+         ASSERT (M->n + 2 >= M1.n);
+
+         /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+            then either q or q + 1 is a correct quotient, and M1 will
+            start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+            rules out the case that the size of M * M1 is much
+            smaller than the expected M->n + M1->n. */
+
+         ASSERT (M->n + M1.n < M->alloc);
+
+         /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+            = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+         n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+         /* We need a bound for of M->n + M1.n. Let n be the original
+            input size. Then
+
+              ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+            and it follows that
+
+              M.n + M1.n <= ceil(n/2) + 1
+
+            Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+            amount of needed scratch space. */
+         mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+         success = 1;
+       }
+    }
+
+  /* This really is the base case */
+  for (;;)
+    {
+      /* Needs s+3 < n */
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn)
+       return success ? n : 0;
+
+      n = nn;
+      success = 1;
+    }
+}
diff --git a/mpn/generic/hgcd2.c b/mpn/generic/hgcd2.c

new file mode 100644 (file)

index 0000000..ffc8c44
--- /dev/null
+++ b/mpn/generic/hgcd2.c
@@ -0,0 +1,469 @@
+/* hgcd2.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS == 0
+
+/* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
+   the remainder. */
+
+/* Single-limb division optimized for small quotients. */
+static inline mp_limb_t
+div1 (mp_ptr rp,
+      mp_limb_t n0,
+      mp_limb_t d0)
+{
+  mp_limb_t q = 0;
+
+  if ((mp_limb_signed_t) n0 < 0)
+    {
+      int cnt;
+      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
+       {
+         d0 = d0 << 1;
+       }
+
+      q = 0;
+      while (cnt)
+       {
+         q <<= 1;
+         if (n0 >= d0)
+           {
+             n0 = n0 - d0;
+             q |= 1;
+           }
+         d0 = d0 >> 1;
+         cnt--;
+       }
+    }
+  else
+    {
+      int cnt;
+      for (cnt = 0; n0 >= d0; cnt++)
+       {
+         d0 = d0 << 1;
+       }
+
+      q = 0;
+      while (cnt)
+       {
+         d0 = d0 >> 1;
+         q <<= 1;
+         if (n0 >= d0)
+           {
+             n0 = n0 - d0;
+             q |= 1;
+           }
+         cnt--;
+       }
+    }
+  *rp = n0;
+  return q;
+}
+
+/* Two-limb division optimized for small quotients.  */
+static inline mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t nh, mp_limb_t nl,
+      mp_limb_t dh, mp_limb_t dl)
+{
+  mp_limb_t q = 0;
+
+  if ((mp_limb_signed_t) nh < 0)
+    {
+      int cnt;
+      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
+       {
+         dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+         dl = dl << 1;
+       }
+
+      while (cnt)
+       {
+         q <<= 1;
+         if (nh > dh || (nh == dh && nl >= dl))
+           {
+             sub_ddmmss (nh, nl, nh, nl, dh, dl);
+             q |= 1;
+           }
+         dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+         dh = dh >> 1;
+         cnt--;
+       }
+    }
+  else
+    {
+      int cnt;
+      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
+       {
+         dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+         dl = dl << 1;
+       }
+
+      while (cnt)
+       {
+         dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+         dh = dh >> 1;
+         q <<= 1;
+         if (nh > dh || (nh == dh && nl >= dl))
+           {
+             sub_ddmmss (nh, nl, nh, nl, dh, dl);
+             q |= 1;
+           }
+         cnt--;
+       }
+    }
+
+  rp[0] = nl;
+  rp[1] = nh;
+
+  return q;
+}
+
+#if 0
+/* This div2 uses less branches, but it seems to nevertheless be
+   slightly slower than the above code. */
+static inline mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t nh, mp_limb_t nl,
+      mp_limb_t dh, mp_limb_t dl)
+{
+  mp_limb_t q = 0;
+  int ncnt;
+  int dcnt;
+
+  count_leading_zeros (ncnt, nh);
+  count_leading_zeros (dcnt, dh);
+  dcnt -= ncnt;
+
+  dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
+  dl <<= dcnt;
+
+  do
+    {
+      mp_limb_t bit;
+      q <<= 1;
+      if (UNLIKELY (nh == dh))
+       bit = (nl >= dl);
+      else
+       bit = (nh > dh);
+
+      q |= bit;
+
+      sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
+
+      dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+      dh = dh >> 1;
+    }
+  while (dcnt--);
+
+  rp[0] = nl;
+  rp[1] = nh;
+
+  return q;
+}
+#endif
+
+#else /* GMP_NAIL_BITS != 0 */
+/* Check all functions for nail support. */
+/* hgcd2 should be defined to take inputs including nail bits, and
+   produce a matrix with elements also including nail bits. This is
+   necessary, for the matrix elements to be useful with mpn_mul_1,
+   mpn_addmul_1 and friends. */
+#error Not implemented
+#endif /* GMP_NAIL_BITS != 0 */
+
+/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
+   matrix M. Returns 1 if we make progress, i.e. can perform at least
+   one subtraction. Otherwise returns zero.. */
+
+/* FIXME: Possible optimizations:
+
+   The div2 function starts with checking the most significant bit of
+   the numerator. We can maintained normalized operands here, call
+   hgcd with normalized operands only, which should make the code
+   simpler and possibly faster.
+
+   Experiment with table lookups on the most significant bits.
+
+   This function is also a candidate for assembler implementation.
+*/
+int
+mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+          struct hgcd_matrix1 *M)
+{
+  mp_limb_t u00, u01, u10, u11;
+
+  if (ah < 2 || bh < 2)
+    return 0;
+
+  if (ah > bh || (ah == bh && al > bl))
+    {
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+      if (ah < 2)
+       return 0;
+
+      u00 = u01 = u11 = 1;
+      u10 = 0;
+    }
+  else
+    {
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+      if (bh < 2)
+       return 0;
+
+      u00 = u10 = u11 = 1;
+      u01 = 0;
+    }
+
+  if (ah < bh)
+    goto subtract_a;
+
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+       goto done;
+
+      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+       {
+         ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+         bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+         break;
+       }
+
+      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+        1), affecting the second column of M. */
+      ASSERT (ah > bh);
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+
+      if (ah < 2)
+       goto done;
+
+      if (ah <= bh)
+       {
+         /* Use q = 1 */
+         u01 += u00;
+         u11 += u10;
+       }
+      else
+       {
+         mp_limb_t r[2];
+         mp_limb_t q = div2 (r, ah, al, bh, bl);
+         al = r[0]; ah = r[1];
+         if (ah < 2)
+           {
+             /* A is too small, but q is correct. */
+             u01 += q * u00;
+             u11 += q * u10;
+             goto done;
+           }
+         q++;
+         u01 += q * u00;
+         u11 += q * u10;
+       }
+    subtract_a:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+       goto done;
+
+      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+       {
+         ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+         bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+         goto subtract_a1;
+       }
+
+      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+        1), affecting the first column of M. */
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+      if (bh < 2)
+       goto done;
+
+      if (bh <= ah)
+       {
+         /* Use q = 1 */
+         u00 += u01;
+         u10 += u11;
+       }
+      else
+       {
+         mp_limb_t r[2];
+         mp_limb_t q = div2 (r, bh, bl, ah, al);
+         bl = r[0]; bh = r[1];
+         if (bh < 2)
+           {
+             /* B is too small, but q is correct. */
+             u00 += q * u01;
+             u10 += q * u11;
+             goto done;
+           }
+         q++;
+         u00 += q * u01;
+         u10 += q * u11;
+       }
+    }
+
+  /* NOTE: Since we discard the least significant half limb, we don't
+     get a truly maximal M (corresponding to |a - b| <
+     2^{GMP_LIMB_BITS +1}). */
+  /* Single precision loop */
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+       break;
+
+      ah -= bh;
+      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+       break;
+
+      if (ah <= bh)
+       {
+         /* Use q = 1 */
+         u01 += u00;
+         u11 += u10;
+       }
+      else
+       {
+         mp_limb_t r;
+         mp_limb_t q = div1 (&r, ah, bh);
+         ah = r;
+         if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+           {
+             /* A is too small, but q is correct. */
+             u01 += q * u00;
+             u11 += q * u10;
+             break;
+           }
+         q++;
+         u01 += q * u00;
+         u11 += q * u10;
+       }
+    subtract_a1:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+       break;
+
+      bh -= ah;
+      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+       break;
+
+      if (bh <= ah)
+       {
+         /* Use q = 1 */
+         u00 += u01;
+         u10 += u11;
+       }
+      else
+       {
+         mp_limb_t r;
+         mp_limb_t q = div1 (&r, bh, ah);
+         bh = r;
+         if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+           {
+             /* B is too small, but q is correct. */
+             u00 += q * u01;
+             u10 += q * u11;
+             break;
+           }
+         q++;
+         u00 += q * u01;
+         u10 += q * u11;
+       }
+    }
+
+ done:
+  M->u[0][0] = u00; M->u[0][1] = u01;
+  M->u[1][0] = u10; M->u[1][1] = u11;
+
+  return 1;
+}
+
+/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
+ * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
+mp_size_t
+mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
+                            mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t ah, bh;
+
+  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
+
+     r  = u00 * a
+     r += u10 * b
+     b *= u11
+     b += u01 * a
+  */
+
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
+  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
+#else
+  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
+  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
+
+  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
+  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
+#endif
+  rp[n] = ah;
+  bp[n] = bh;
+
+  n += (ah | bh) > 0;
+  return n;
+}
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+   the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
+                                    mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t h0, h1;
+
+  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+     r  = u11 * a
+     r -= u01 * b
+     b *= u00
+     b -= u10 * a
+  */
+
+  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
+  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+  ASSERT (h0 == h1);
+
+  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
+  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+  ASSERT (h0 == h1);
+
+  n -= (rp[n-1] | bp[n-1]) == 0;
+  return n;
+}
diff --git a/mpn/generic/invert.c b/mpn/generic/invert.c

new file mode 100644 (file)

index 0000000..dda2500
--- /dev/null
+++ b/mpn/generic/invert.c
@@ -0,0 +1,87 @@
+/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
+   allocate and pass the scratch to the function. */
+#include <stdlib.h>            /* for NULL */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  if (n == 1)
+    invert_limb (*ip, *dp);
+  else {
+    TMP_DECL;
+
+    TMP_MARK;
+    if (scratch == NULL)
+      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n));
+
+    if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
+      {
+       /* Maximum scratch needed by this branch: 2*n */
+       mp_size_t i;
+       mp_ptr xp;
+
+       xp = scratch;                           /* 2 * n limbs */
+       for (i = n - 1; i >= 0; i--)
+         xp[i] = GMP_NUMB_MAX;
+       mpn_com (xp + n, dp, n);
+       if (n == 2) {
+         mpn_divrem_2 (ip, 0, xp, 4, dp);
+       } else {
+         gmp_pi1_t inv;
+         invert_pi1 (inv, dp[n-1], dp[n-2]);
+         /* FIXME: should we use dcpi1_div_q, for big sizes? */
+         mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
+       }
+      }
+    else { /* Use approximated inverse; correct the result if needed. */
+      mp_limb_t e; /* The possible error in the approximate inverse */
+
+      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
+      e = mpn_ni_invertappr (ip, dp, n, scratch);
+
+      if (e) { /* Assume the error can only be "0" (no error) or "1". */
+       /* Code to detect and correct the "off by one" approximation. */
+       mpn_mul_n (scratch, ip, dp, n);
+       ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
+       if (! mpn_add (scratch, scratch, 2*n, dp, n))
+         MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
+      }
+    }
+    TMP_FREE;
+  }
+}
diff --git a/mpn/generic/invertappr.c b/mpn/generic/invertappr.c

new file mode 100644 (file)

index 0000000..8064956
--- /dev/null
+++ b/mpn/generic/invertappr.c
@@ -0,0 +1,311 @@
+/* mpn_invertappr and helper functions.  Compute I such that
+   floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The algorithm used here was inspired by ApproximateReciprocal from "Modern
+   Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
+   thanks to Paul Zimmermann for his very valuable suggestions on all the
+   theoretical aspects during the work on this code.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
+   allocate and pass the scratch to the function. */
+#include <stdlib.h>            /* for NULL */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: The iterative version splits the operand in two slighty unbalanced
+   parts, the use of log_2 (or counting the bits) underestimate the maximum
+   number of iterations.  */
+
+/* This is intended for constant THRESHOLDs only, where the compiler
+   can completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
+
+#if TUNE_PROGRAM_BUILD
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
+#define MAYBE_dcpi1_divappr   1
+#else
+#define NPOWS \
+ ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
+#define MAYBE_dcpi1_divappr \
+  (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
+#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
+    (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
+#undef  INV_MULMOD_BNM1_THRESHOLD
+#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
+#endif
+#endif
+
+/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
+   the strictly normalised value {dp,n} (i.e., most significant bit must be set)
+   as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
+
+   Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
+   following conditions are satisfied by the output:
+     0 <= e <= 1;
+     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
+   I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
+       e=1 means that the result _may_ be one less than expected.
+
+   The _bc version returns e=1 most of the time.
+   The _ni version should return e=0 most of the time; only about 1% of
+   possible random input should give e=1.
+
+   When the strict result is needed, i.e., e=0 in the relation above:
+     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
+   the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
+
+/* Maximum scratch needed by this branch (at tp): 3*n + 2 */
+static mp_limb_t
+mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp)
+{
+  mp_ptr xp;
+
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, tp, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, tp, mpn_invertappr_itch(n)));
+
+  /* Compute a base value of r limbs. */
+  if (n == 1)
+    invert_limb (*ip, *dp);
+  else {
+    mp_size_t i;
+    xp = tp + n + 2;                           /* 2 * n limbs */
+
+    for (i = n - 1; i >= 0; i--)
+      xp[i] = GMP_NUMB_MAX;
+    mpn_com (xp + n, dp, n);
+
+    /* Now xp contains B^2n - {dp,n}*B^n - 1 */
+
+    /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
+    if (n == 2) {
+      mpn_divrem_2 (ip, 0, xp, 4, dp);
+    } else {
+      gmp_pi1_t inv;
+      invert_pi1 (inv, dp[n-1], dp[n-2]);
+      if (! MAYBE_dcpi1_divappr
+         || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
+       mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
+      else
+       mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
+      MPN_DECR_U(ip, n, 1);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
+   iterations (at least one).
+
+   Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
+   Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
+   in version 0.4 of the book.
+
+   Some adaptations were introduced, to allow product mod B^m-1 and return the
+   value e.
+
+   USE_MUL_N = 1 (default) introduces a correction in such a way that "the
+   value of B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
+   "2B^n-1").  This correction should not require to modify the proof.
+
+   We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
+   problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
+   B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
+   happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
+   B^{n+h} - A, then we get into the "negative" branch, where X_h is not
+   incremented (because A < B^n).
+
+   FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
+   is allocated apart.  */
+
+#define USE_MUL_N 1
+
+mp_limb_t
+mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_limb_t cy;
+  mp_ptr xp;
+  mp_size_t rn, mn;
+  mp_size_t sizes[NPOWS], *sizp;
+  mp_ptr tp;
+  TMP_DECL;
+#define rp scratch
+
+  ASSERT (n > 2);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  /* Compute the computation precisions from highest to lowest, leaving the
+     base case size in 'rn'.  */
+  sizp = sizes;
+  rn = n;
+  do {
+    *sizp = rn;
+    rn = ((rn) >> 1) + 1;
+    sizp ++;
+  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
+
+  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
+  dp += n;
+  ip += n;
+
+  /* Compute a base value of rn limbs. */
+  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
+
+  TMP_MARK;
+
+  if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
+    {
+      mn = mpn_mulmod_bnm1_next_size (n + 1);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
+    }
+  /* Use Newton's iterations to get the desired precision.*/
+
+  /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3  limbs */
+  /* Maximum scratch needed by this branch <= 3*n + 2 */
+  xp = scratch + n + 3;                                /*  n + rn limbs */
+  while (1) {
+    mp_limb_t method;
+
+    n = *--sizp;
+    /*
+      v    n  v
+      +----+--+
+      ^ rn ^
+    */
+
+    /* Compute i_jd . */
+    if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
+       || ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
+      /* FIXME: We do only need {xp,n+1}*/
+      mpn_mul (xp, dp - n, n, ip - rn, rn);
+      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
+      method = 1; /* Remember we used (truncated) product */
+      /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
+    } else { /* Use B^n-1 wraparound */
+      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
+      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
+      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
+      /* Add dp*B^rn mod (B^mn-1) */
+      ASSERT (n >= mn - rn);
+      xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
+      cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
+      MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
+      ASSERT (n + rn >=  mn);
+      /* Subtract B^{rn+n} */
+      MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
+      if (xp[mn])
+       MPN_INCR_U (xp, mn, xp[mn] - 1);
+      else
+       MPN_DECR_U (xp, mn, 1);
+      method = 0; /* Remember we are working Mod B^m-1 */
+    }
+
+    if (xp[n] < 2) { /* "positive" residue class */
+      cy = 1;
+      while (xp[n] || mpn_cmp (xp, dp - n, n)>0) {
+       xp[n] -= mpn_sub_n (xp, xp, dp - n, n);
+       cy ++;
+      }
+      MPN_DECR_U(ip - rn, rn, cy);
+      ASSERT (cy <= 4); /* at most 3 cycles for the while above */
+      ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
+      ASSERT (xp[n] == 0);
+    } else { /* "negative" residue class */
+      mpn_com (xp, xp, n + 1);
+      MPN_INCR_U(xp, n + 1, method);
+      ASSERT (xp[n] <= 1);
+#if USE_MUL_N
+      if (xp[n]) {
+       MPN_INCR_U(ip - rn, rn, 1);
+       ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+      }
+#endif
+    }
+
+    /* Compute x_ju_j. FIXME:We need {rp+rn,rn}, mulhi? */
+#if USE_MUL_N
+    mpn_mul_n (rp, xp + n - rn, ip - rn, rn);
+#else
+    rp[2*rn] = 0;
+    mpn_mul (rp, xp + n - rn, rn + xp[n], ip - rn, rn);
+#endif
+    /* We need _only_ the carry from the next addition  */
+    /* Anyway 2rn-n <= 2... we don't need to optimise.  */
+    cy = mpn_add_n (rp + rn, rp + rn, xp + n - rn, 2*rn - n);
+    cy = mpn_add_nc (ip - n, rp + 3*rn - n, xp + rn, n - rn, cy);
+    MPN_INCR_U (ip - rn, rn, cy + (1-USE_MUL_N)*(rp[2*rn] + xp[n]));
+    if (sizp == sizes) { /* Get out of the cycle */
+      /* Check for possible carry propagation from below. */
+      cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - 7; /* Be conservative. */
+/*    cy = mpn_add_1 (rp + rn, rp + rn, 2*rn - n, 4); */
+      break;
+    }
+    rn = n;
+  }
+  TMP_FREE;
+
+  return cy;
+#undef rp
+}
+
+mp_limb_t
+mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_limb_t res;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (scratch == NULL)
+    scratch = TMP_ALLOC_LIMBS (mpn_invertappr_itch (n));
+
+  ASSERT (n > 0);
+  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
+  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
+  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
+
+  if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
+    res = mpn_bc_invertappr (ip, dp, n, scratch);
+  else
+    res = mpn_ni_invertappr (ip, dp, n, scratch);
+
+  TMP_FREE;
+  return res;
+}
diff --git a/mpn/generic/jacbase.c b/mpn/generic/jacbase.c

new file mode 100644 (file)

index 0000000..6972a13
--- /dev/null
+++ b/mpn/generic/jacbase.c
@@ -0,0 +1,168 @@
+/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
+
+   THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
+   INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Use the simple loop by default.  The generic count_trailing_zeros is not
+   very fast, and the extra trickery of method 3 has proven to be less use
+   than might have been though.  */
+#ifndef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD  2
+#endif
+
+
+/* Use count_trailing_zeros.  */
+#if JACOBI_BASE_METHOD == 1
+#define PROCESS_TWOS_ANY                                \
+  {                                                     \
+    mp_limb_t  twos;                                    \
+    count_trailing_zeros (twos, a);                     \
+    result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b);        \
+    a >>= twos;                                         \
+  }
+#define PROCESS_TWOS_EVEN  PROCESS_TWOS_ANY
+#endif
+
+/* Use a simple loop.  A disadvantage of this is that there's a branch on a
+   50/50 chance of a 0 or 1 low bit.  */
+#if JACOBI_BASE_METHOD == 2
+#define PROCESS_TWOS_EVEN               \
+  {                                     \
+    int  two;                           \
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    do                                  \
+      {                                 \
+       a >>= 1;                        \
+       result_bit1 ^= two;             \
+       ASSERT (a != 0);                \
+      }                                 \
+    while ((a & 1) == 0);               \
+  }
+#define PROCESS_TWOS_ANY        \
+  if ((a & 1) == 0)             \
+    PROCESS_TWOS_EVEN;
+#endif
+
+/* Process one bit arithmetically, then a simple loop.  This cuts the loop
+   condition down to a 25/75 chance, which should branch predict better.
+   The CPU will need a reasonable variable left shift.  */
+#if JACOBI_BASE_METHOD == 3
+#define PROCESS_TWOS_EVEN               \
+  {                                     \
+    int  two, mask, shift;              \
+                                        \
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    mask = (~a & 2);                    \
+    a >>= 1;                            \
+                                        \
+    shift = (~a & 1);                   \
+    a >>= shift;                        \
+    result_bit1 ^= two ^ (two & mask);  \
+                                        \
+    while ((a & 1) == 0)                \
+      {                                 \
+       a >>= 1;                        \
+       result_bit1 ^= two;             \
+       ASSERT (a != 0);                \
+      }                                 \
+  }
+#define PROCESS_TWOS_ANY                \
+  {                                     \
+    int  two, mask, shift;              \
+                                        \
+    two = JACOBI_TWO_U_BIT1 (b);        \
+    shift = (~a & 1);                   \
+    a >>= shift;                        \
+                                        \
+    mask = shift << 1;                  \
+    result_bit1 ^= (two & mask);        \
+                                        \
+    while ((a & 1) == 0)                \
+      {                                 \
+       a >>= 1;                        \
+       result_bit1 ^= two;             \
+       ASSERT (a != 0);                \
+      }                                 \
+  }
+#endif
+
+
+/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
+   with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
+
+   The initial result_bit1 is taken as a parameter for the convenience of
+   mpz_kronecker_ui() et al.  The sign changes both here and in those
+   routines accumulate nicely in bit 1, see the JACOBI macros.
+
+   The return value here is the normal +1, 0, or -1.  Note that +1 and -1
+   have bit 1 in the "BIT1" sense, which could be useful if the caller is
+   accumulating it into some extended calculation.
+
+   Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
+   possible, but a couple of tests suggest it's not a significant speedup,
+   and may even be a slowdown, so what's here is good enough for now.
+
+   Future: The code doesn't demand a<=b actually, so maybe this could be
+   relaxed.  All the places this is used currently call with a<=b though.  */
+
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
+{
+  ASSERT (b & 1);  /* b odd */
+  ASSERT (b != 1);
+  ASSERT (a <= b);
+
+  if (a == 0)
+    return 0;
+
+  PROCESS_TWOS_ANY;
+  if (a == 1)
+    goto done;
+
+  for (;;)
+    {
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
+      MP_LIMB_T_SWAP (a, b);
+
+      do
+       {
+         /* working on (a/b), a,b odd, a>=b */
+         ASSERT (a & 1);
+         ASSERT (b & 1);
+         ASSERT (a >= b);
+
+         if ((a -= b) == 0)
+           return 0;
+
+         PROCESS_TWOS_EVEN;
+         if (a == 1)
+           goto done;
+       }
+      while (a >= b);
+    }
+
+ done:
+  return JACOBI_BIT1_TO_PN (result_bit1);
+}
diff --git a/mpn/generic/logops_n.c b/mpn/generic/logops_n.c

new file mode 100644 (file)

index 0000000..618efe5
--- /dev/null
+++ b/mpn/generic/logops_n.c
@@ -0,0 +1,67 @@
+/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef OPERATION_and_n
+#define func __MPN(and_n)
+#define call mpn_and_n
+#endif
+
+#ifdef OPERATION_andn_n
+#define func __MPN(andn_n)
+#define call mpn_andn_n
+#endif
+
+#ifdef OPERATION_nand_n
+#define func __MPN(nand_n)
+#define call mpn_nand_n
+#endif
+
+#ifdef OPERATION_ior_n
+#define func __MPN(ior_n)
+#define call mpn_ior_n
+#endif
+
+#ifdef OPERATION_iorn_n
+#define func __MPN(iorn_n)
+#define call mpn_iorn_n
+#endif
+
+#ifdef OPERATION_nior_n
+#define func __MPN(nior_n)
+#define call mpn_nior_n
+#endif
+
+#ifdef OPERATION_xor_n
+#define func __MPN(xor_n)
+#define call mpn_xor_n
+#endif
+
+#ifdef OPERATION_xnor_n
+#define func __MPN(xnor_n)
+#define call mpn_xnor_n
+#endif
+
+void
+func (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  call (rp, up, vp, n);
+}
diff --git a/mpn/generic/lshift.c b/mpn/generic/lshift.c

new file mode 100644 (file)

index 0000000..fdc7e44
--- /dev/null
+++ b/mpn/generic/lshift.c
@@ -0,0 +1,63 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = high_limb | (low_limb >> tnc);
+      high_limb = (low_limb << cnt) & GMP_NUMB_MASK;
+    }
+  *--rp = high_limb;
+
+  return retval;
+}
diff --git a/mpn/generic/lshiftc.c b/mpn/generic/lshiftc.c

new file mode 100644 (file)

index 0000000..80c1efe
--- /dev/null
+++ b/mpn/generic/lshiftc.c
@@ -0,0 +1,63 @@
+/* mpn_lshiftc -- Shift left low level with complement.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+      high_limb = low_limb << cnt;
+    }
+  *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+  return retval;
+}
diff --git a/mpn/generic/matrix22_mul.c b/mpn/generic/matrix22_mul.c

new file mode 100644 (file)

index 0000000..7e710d2
--- /dev/null
+++ b/mpn/generic/matrix22_mul.c
@@ -0,0 +1,311 @@
+/* matrix22_mul.c.
+
+   Contributed by Niels Möller and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define MUL(rp, ap, an, bp, bn) do {           \
+  if (an >= bn)                                        \
+    mpn_mul (rp, ap, an, bp, bn);              \
+  else                                         \
+    mpn_mul (rp, bp, bn, ap, an);              \
+} while (0)
+
+/* Inputs are unsigned. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  int c;
+  MPN_CMP (c, ap, bp, n);
+  if (c >= 0)
+    {
+      mpn_sub_n (rp, ap, bp, n);
+      return 0;
+    }
+  else
+    {
+      mpn_sub_n (rp, bp, ap, n);
+      return 1;
+    }
+}
+
+static int
+add_signed_n (mp_ptr rp,
+             mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
+{
+  if (as != bs)
+    return as ^ abs_sub_n (rp, ap, bp, n);
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
+      return as;
+    }
+}
+
+mp_size_t
+mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
+{
+  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+    return 3*rn + 2*mn;
+  else
+    return 3*(rn + mn) + 5;
+}
+
+/* Algorithm:
+
+    / s0 \   /  1  0  0  0 \ / r0 \
+    | s1 |   |  0  1  0  1 | | r1 |
+    | s2 |   |  0  0 -1  1 | | r2 |
+    | s3 | = |  0  1 -1  1 | \ r3 /
+    | s4 |   | -1  1 -1  1 |
+    | s5 |   |  0  1  0  0 |
+    \ s6 /   \  0  0  1  0 /
+
+    / t0 \   /  1  0  0  0 \ / m0 \
+    | t1 |   |  0  1  0  1 | | m1 |
+    | t2 |   |  0  0 -1  1 | | m2 |
+    | t3 | = |  0  1 -1  1 | \ m3 /
+    | t4 |   | -1  1 -1  1 |
+    | t5 |   |  0  1  0  0 |
+    \ t6 /   \  0  0  1  0 /
+
+  Note: the two matrices above are the same, but s_i and t_i are used
+  in the same product, only for i<4, see "A Strassen-like Matrix
+  Multiplication suited for squaring and higher power computation" by
+  M. Bodrato, in Proceedings of ISSAC 2010.
+
+    / r0 \   / 1 0  0  0  0  1  0 \ / s0*t0 \
+    | r1 | = | 0 0 -1  1 -1  1  0 | | s1*t1 |
+    | r2 |   | 0 1  0 -1  0 -1 -1 | | s2*t2 |
+    \ r3 /   \ 0 1  1 -1  0 -1  0 / | s3*t3 |
+                                   | s4*t5 |
+                                   | s5*t6 |
+                                   \ s6*t4 /
+
+  The scheduling uses two temporaries U0 and U1 to store products, and
+  two, S0 and T0, to store combinations of entries of the two
+  operands.
+*/
+
+/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
+ *
+ * Resulting elements are of size up to rn + mn + 1.
+ *
+ * Temporary storage: 3 rn + 3 mn + 5. */
+void
+mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+                          mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+                          mp_ptr tp)
+{
+  mp_ptr s0, t0, u0, u1;
+  int r1s, r3s, s0s, t0s, u1s;
+  s0 = tp; tp += rn + 1;
+  t0 = tp; tp += mn + 1;
+  u0 = tp; tp += rn + mn + 1;
+  u1 = tp; /* rn + mn + 2 */
+
+  MUL (u0, r1, rn, m2, mn);            /* u5 = s5 * t6 */
+  r3s = abs_sub_n (r3, r3, r2, rn);    /* r3 - r2 */
+  if (r3s)
+    {
+      r1s = abs_sub_n (r1, r1, r3, rn);
+      r1[rn] = 0;
+    }
+  else
+    {
+      r1[rn] = mpn_add_n (r1, r1, r3, rn);
+      r1s = 0;                         /* r1 - r2 + r3  */
+    }
+  if (r1s)
+    {
+      s0[rn] = mpn_add_n (s0, r1, r0, rn);
+      s0s = 0;
+    }
+  else if (r1[rn] != 0)
+    {
+      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
+      s0s = 1;                         /* s4 = -r0 + r1 - r2 + r3 */
+                                       /* Reverse sign! */
+    }
+  else
+    {
+      s0s = abs_sub_n (s0, r0, r1, rn);
+      s0[rn] = 0;
+    }
+  MUL (u1, r0, rn, m0, mn);            /* u0 = s0 * t0 */
+  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
+  ASSERT (r0[rn+mn] < 2);              /* u0 + u5 */
+
+  t0s = abs_sub_n (t0, m3, m2, mn);
+  u1s = r3s^t0s^1;                     /* Reverse sign! */
+  MUL (u1, r3, rn, t0, mn);            /* u2 = s2 * t2 */
+  u1[rn+mn] = 0;
+  if (t0s)
+    {
+      t0s = abs_sub_n (t0, m1, t0, mn);
+      t0[mn] = 0;
+    }
+  else
+    {
+      t0[mn] = mpn_add_n (t0, t0, m1, mn);
+    }
+
+  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
+     at r3. I'd expect that for matrices of random size, the high
+     words t0[mn] and r1[rn] are non-zero with a pretty small
+     probability. If that can be confirmed this should be done as an
+     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
+     add_n. */
+  if (t0[mn] != 0)
+    {
+      MUL (r3, r1, rn, t0, mn + 1);    /* u3 = s3 * t3 */
+      ASSERT (r1[rn] < 2);
+      if (r1[rn] != 0)
+       mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
+    }
+  else
+    {
+      MUL (r3, r1, rn + 1, t0, mn);
+    }
+
+  ASSERT (r3[rn+mn] < 4);
+
+  u0[rn+mn] = 0;
+  if (r1s^t0s)
+    {
+      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
+      r3s = 0;                         /* u3 + u5 */
+    }
+
+  if (t0s)
+    {
+      t0[mn] = mpn_add_n (t0, t0, m0, mn);
+    }
+  else if (t0[mn] != 0)
+    {
+      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
+    }
+  else
+    {
+      t0s = abs_sub_n (t0, t0, m0, mn);
+    }
+  MUL (u0, r2, rn, t0, mn + 1);                /* u6 = s6 * t4 */
+  ASSERT (u0[rn+mn] < 2);
+  if (r1s)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
+    }
+  else
+    {
+      r1[rn] += mpn_add_n (r1, r1, r2, rn);
+    }
+  rn++;
+  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
+                                       /* u3 + u5 + u6 */
+  ASSERT (r2[rn+mn-1] < 4);
+  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
+                                       /* -u2 + u3 + u5  */
+  ASSERT (r3[rn+mn-1] < 3);
+  MUL (u0, s0, rn, m1, mn);            /* u4 = s4 * t5 */
+  ASSERT (u0[rn+mn-1] < 2);
+  t0[mn] = mpn_add_n (t0, m3, m1, mn);
+  MUL (u1, r1, rn, t0, mn + 1);                /* u1 = s1 * t1 */
+  mn += rn;
+  ASSERT (u1[mn-1] < 4);
+  ASSERT (u1[mn] == 0);
+  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
+                                       /* -u2 + u3 - u4 + u5  */
+  ASSERT (r1[mn-1] < 2);
+  if (r3s)
+    {
+      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
+                                       /* u1 + u2 - u3 - u5  */
+    }
+  ASSERT (r3[mn-1] < 2);
+  if (t0s)
+    {
+      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
+                                       /* u1 - u3 - u5 - u6  */
+    }
+  ASSERT (r2[mn-1] < 2);
+}
+
+void
+mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+                 mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+                 mp_ptr tp)
+{
+  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+    {
+      mp_ptr p0, p1;
+      unsigned i;
+
+      /* Temporary storage: 3 rn + 2 mn */
+      p0 = tp + rn;
+      p1 = p0 + rn + mn;
+
+      for (i = 0; i < 2; i++)
+       {
+         MPN_COPY (tp, r0, rn);
+
+         if (rn >= mn)
+           {
+             mpn_mul (p0, r0, rn, m0, mn);
+             mpn_mul (p1, r1, rn, m3, mn);
+             mpn_mul (r0, r1, rn, m2, mn);
+             mpn_mul (r1, tp, rn, m1, mn);
+           }
+         else
+           {
+             mpn_mul (p0, m0, mn, r0, rn);
+             mpn_mul (p1, m3, mn, r1, rn);
+             mpn_mul (r0, m2, mn, r1, rn);
+             mpn_mul (r1, m1, mn, tp, rn);
+           }
+         r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
+         r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
+
+         r0 = r2; r1 = r3;
+       }
+    }
+  else
+    mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
+                              m0, m1, m2, m3, mn, tp);
+}
diff --git a/mpn/generic/mod_1.c b/mpn/generic/mod_1.c

new file mode 100644 (file)

index 0000000..c5bbaad
--- /dev/null
+++ b/mpn/generic/mod_1.c
@@ -0,0 +1,250 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+   Return the single-limb remainder.
+   There are no constraints on the value of the divisor.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef MOD_1_NORM_THRESHOLD
+#define MOD_1_NORM_THRESHOLD  0
+#endif
+
+#ifndef MOD_1_UNNORM_THRESHOLD
+#define MOD_1_UNNORM_THRESHOLD  0
+#endif
+
+#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#endif
+
+#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
+#endif
+
+#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
+#endif
+
+
+/* The comments in mpn/generic/divrem_1.c apply here too.
+
+   As noted in the algorithms section of the manual, the shifts in the loop
+   for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
+   by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
+   skip a division where (a*2^n)%(d*2^n) can't then there's the same number
+   of divide steps, though how often that happens depends on the assumed
+   distributions of dividend and divisor.  In any case this idea is left to
+   CPU specific implementations to consider.  */
+
+static mp_limb_t
+mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  i;
+  mp_limb_t  n1, n0, r;
+  mp_limb_t  dummy;
+  int cnt;
+
+  ASSERT (un > 0);
+  ASSERT (d != 0);
+
+  d <<= GMP_NAIL_BITS;
+
+  /* Skip a division if high < divisor.  Having the test here before
+     normalizing will still skip as often as possible.  */
+  r = up[un - 1] << GMP_NAIL_BITS;
+  if (r < d)
+    {
+      r >>= GMP_NAIL_BITS;
+      un--;
+      if (un == 0)
+       return r;
+    }
+  else
+    r = 0;
+
+  /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
+     code above. */
+  if (! UDIV_NEEDS_NORMALIZATION
+      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+    {
+      for (i = un - 1; i >= 0; i--)
+       {
+         n0 = up[i] << GMP_NAIL_BITS;
+         udiv_qrnnd (dummy, r, r, n0, d);
+         r >>= GMP_NAIL_BITS;
+       }
+      return r;
+    }
+
+  count_leading_zeros (cnt, d);
+  d <<= cnt;
+
+  n1 = up[un - 1] << GMP_NAIL_BITS;
+  r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
+
+  if (UDIV_NEEDS_NORMALIZATION
+      && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
+    {
+      for (i = un - 2; i >= 0; i--)
+       {
+         n0 = up[i] << GMP_NAIL_BITS;
+         udiv_qrnnd (dummy, r, r,
+                     (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
+                     d);
+         r >>= GMP_NAIL_BITS;
+         n1 = n0;
+       }
+      udiv_qrnnd (dummy, r, r, n1 << cnt, d);
+      r >>= GMP_NAIL_BITS;
+      return r >> cnt;
+    }
+  else
+    {
+      mp_limb_t inv;
+      invert_limb (inv, d);
+
+      for (i = un - 2; i >= 0; i--)
+       {
+         n0 = up[i] << GMP_NAIL_BITS;
+         udiv_qrnnd_preinv (dummy, r, r,
+                            (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
+                            d, inv);
+         r >>= GMP_NAIL_BITS;
+         n1 = n0;
+       }
+      udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
+      r >>= GMP_NAIL_BITS;
+      return r >> cnt;
+    }
+}
+
+static mp_limb_t
+mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
+{
+  mp_size_t  i;
+  mp_limb_t  n0, r;
+  mp_limb_t  dummy;
+
+  ASSERT (un > 0);
+
+  d <<= GMP_NAIL_BITS;
+
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+
+  /* High limb is initial remainder, possibly with one subtract of
+     d to get r<d.  */
+  r = up[un - 1] << GMP_NAIL_BITS;
+  if (r >= d)
+    r -= d;
+  r >>= GMP_NAIL_BITS;
+  un--;
+  if (un == 0)
+    return r;
+
+  if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
+    {
+      for (i = un - 1; i >= 0; i--)
+       {
+         n0 = up[i] << GMP_NAIL_BITS;
+         udiv_qrnnd (dummy, r, r, n0, d);
+         r >>= GMP_NAIL_BITS;
+       }
+      return r;
+    }
+  else
+    {
+      mp_limb_t  inv;
+      invert_limb (inv, d);
+      for (i = un - 1; i >= 0; i--)
+       {
+         n0 = up[i] << GMP_NAIL_BITS;
+         udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
+         r >>= GMP_NAIL_BITS;
+       }
+      return r;
+    }
+}
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  ASSERT (n >= 0);
+  ASSERT (b != 0);
+
+  /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
+     required by mpz/fdiv_r_ui.c and possibly other places.  */
+  if (n == 0)
+    return 0;
+
+  if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+    {
+      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+       {
+         return mpn_mod_1_norm (ap, n, b);
+       }
+      else
+       {
+         mp_limb_t pre[4];
+         mpn_mod_1_1p_cps (pre, b);
+         return mpn_mod_1_1p (ap, n, b, pre);
+       }
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+       {
+         return mpn_mod_1_unnorm (ap, n, b);
+       }
+      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+       {
+         mp_limb_t pre[4];
+         mpn_mod_1_1p_cps (pre, b);
+         return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+       }
+      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+       {
+         mp_limb_t pre[5];
+         mpn_mod_1s_2p_cps (pre, b);
+         return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+       }
+      else
+       {
+         mp_limb_t pre[7];
+         mpn_mod_1s_4p_cps (pre, b);
+         return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+       }
+    }
+}
diff --git a/mpn/generic/mod_1_1.c b/mpn/generic/mod_1_1.c

new file mode 100644 (file)

index 0000000..c6a61eb
--- /dev/null
+++ b/mpn/generic/mod_1_1.c
@@ -0,0 +1,99 @@
+/* mpn_mod_1_1p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  if (UNLIKELY (cnt == 0))
+    B1modb = -b;
+  else
+    B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
+  cps[3] = B2modb >> cnt;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+{
+  mp_limb_t rh, rl, bi, q, ph, pl, r;
+  mp_limb_t B1modb, B2modb;
+  mp_size_t i;
+  int cnt;
+  mp_limb_t mask;
+
+  ASSERT (n >= 2);             /* fix tuneup.c if this is changed */
+
+  B1modb = bmodb[2];
+  B2modb = bmodb[3];
+
+  umul_ppmm (ph, pl, ap[n - 1], B1modb);
+  add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+
+  for (i = n - 3; i >= 0; i -= 1)
+    {
+      /* rr = ap[i]                            < B
+           + LO(rr)  * (B mod b)               <= (B-1)(b-1)
+           + HI(rr)  * (B^2 mod b)             <= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i]);
+
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  bi = bmodb[0];
+  cnt = bmodb[1];
+
+  if (LIKELY (cnt != 0))
+    rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+
+  mask = -(mp_limb_t) (rh >= b);
+  rh -= mask & b;
+
+  udiv_qrnnd_preinv (q, r, rh, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
diff --git a/mpn/generic/mod_1_2.c b/mpn/generic/mod_1_2.c

new file mode 100644 (file)

index 0000000..c81b202
--- /dev/null
+++ b/mpn/generic/mod_1_2.c
@@ -0,0 +1,141 @@
+/* mpn_mod_1s_2p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 2.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 2);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
+  cps[3] = B2modb >> cnt;
+  cps[4] = B3modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 4; i++)
+      {
+       b += cps[i];
+       ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
+{
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+
+  if ((n & 1) != 0)
+    {
+      if (n == 1)
+       {
+         rl = ap[n - 1];
+         bi = cps[0];
+         cnt = cps[1];
+         udiv_qrnnd_preinv (q, r, rl >> (GMP_LIMB_BITS - cnt),
+                            rl << cnt, b, bi);
+         return r >> cnt;
+       }
+
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n--;
+    }
+  else
+    {
+      umul_ppmm (rh, rl, ap[n - 1], B1modb);
+      add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
+    }
+
+  for (i = n - 4; i >= 0; i -= 2)
+    {
+      /* rr = ap[i]                            < B
+           + ap[i+1] * (B mod b)               <= (B-1)(b-1)
+           + LO(rr)  * (B^2 mod b)             <= (B-1)(b-1)
+           + HI(rr)  * (B^3 mod b)             <= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+      umul_ppmm (ch, cl, rl, B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B3modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  bi = cps[0];
+  cnt = cps[1];
+
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+                    (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 2);     /* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
diff --git a/mpn/generic/mod_1_3.c b/mpn/generic/mod_1_3.c

new file mode 100644 (file)

index 0000000..c7c6299
--- /dev/null
+++ b/mpn/generic/mod_1_3.c
@@ -0,0 +1,148 @@
+/* mpn_mod_1s_3p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that d < B / 3.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 3);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
+  cps[3] = B2modb >> cnt;
+  cps[4] = B3modb >> cnt;
+  cps[5] = B4modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 5; i++)
+      {
+       b += cps[i];
+       ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
+{
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+
+  /* We compute n mod 3 in a tricky way, which works except for when n is so
+     close to the maximum size that we don't need to support it.  The final
+     cast to int is a workaround for HP cc.  */
+  switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
+    {
+    case 0:
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 3;
+      break;
+    case 2:    /* n mod 3 = 1 */
+      rh = 0;
+      rl = ap[n - 1];
+      n -= 1;
+      break;
+    case 1:    /* n mod 3 = 2 */
+      umul_ppmm (ph, pl, ap[n - 1], B1modb);
+      add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+      n -= 2;
+      break;
+    }
+
+  for (i = n - 3; i >= 0; i -= 3)
+    {
+      /* rr = ap[i]                            < B
+           + ap[i+1] * (B mod b)               <= (B-1)(b-1)
+           + ap[i+2] * (B^2 mod b)             <= (B-1)(b-1)
+           + LO(rr)  * (B^3 mod b)             <= (B-1)(b-1)
+           + HI(rr)  * (B^4 mod b)             <= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B4modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  bi = cps[0];
+  cnt = cps[1];
+
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+                    (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 3);     /* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
diff --git a/mpn/generic/mod_1_4.c b/mpn/generic/mod_1_4.c

new file mode 100644 (file)

index 0000000..bde191b
--- /dev/null
+++ b/mpn/generic/mod_1_4.c
@@ -0,0 +1,161 @@
+/* mpn_mod_1s_3p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that d < B / 4.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
+  udiv_rnd_preinv (B5modb, B4modb, b, bi);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
+  cps[3] = B2modb >> cnt;
+  cps[4] = B3modb >> cnt;
+  cps[5] = B4modb >> cnt;
+  cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 6; i++)
+      {
+       b += cps[i];
+       ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
+{
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+  B5modb = cps[6];
+
+  switch (n & 3)
+    {
+    case 0:
+      umul_ppmm (ph, pl, ap[n - 3], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+      umul_ppmm (ch, cl, ap[n - 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+      umul_ppmm (rh, rl, ap[n - 1], B3modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 4;
+      break;
+    case 1:
+      rh = 0;
+      rl = ap[n - 1];
+      n -= 1;
+      break;
+    case 2:
+      umul_ppmm (ph, pl, ap[n - 1], B1modb);
+      add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+      n -= 2;
+      break;
+    case 3:
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+      umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      n -= 3;
+      break;
+    }
+
+  for (i = n - 4; i >= 0; i -= 4)
+    {
+      /* rr = ap[i]                            < B
+           + ap[i+1] * (B mod b)               <= (B-1)(b-1)
+           + ap[i+2] * (B^2 mod b)             <= (B-1)(b-1)
+           + ap[i+3] * (B^3 mod b)             <= (B-1)(b-1)
+           + LO(rr)  * (B^4 mod b)             <= (B-1)(b-1)
+           + HI(rr)  * (B^5 mod b)             <= (B-1)(b-1)
+      */
+      umul_ppmm (ph, pl, ap[i + 1], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+      umul_ppmm (ch, cl, ap[i + 2], B2modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, ap[i + 3], B3modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (ch, cl, rl, B4modb);
+      add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+      umul_ppmm (rh, rl, rh, B5modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+    }
+
+  bi = cps[0];
+  cnt = cps[1];
+
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+                    (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 4);     /* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
diff --git a/mpn/generic/mod_34lsub1.c b/mpn/generic/mod_34lsub1.c

new file mode 100644 (file)

index 0000000..6bd1498
--- /dev/null
+++ b/mpn/generic/mod_34lsub1.c
@@ -0,0 +1,120 @@
+/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
+   The remainder is not fully reduced, it's any limb value congruent to
+   {p,n} modulo that divisor.
+
+   This implementation is only correct when GMP_NUMB_BITS is a multiple of
+   4.
+
+   FIXME: If GMP_NAIL_BITS is some silly big value during development then
+   it's possible the carry accumulators c0,c1,c2 could overflow.
+
+   General notes:
+
+   The basic idea is to use a set of N accumulators (N=3 in this case) to
+   effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
+   by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
+   remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good
+   set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
+
+   N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
+   a few more primes than a single accumulator N=1 does, and for no extra
+   cost (assuming the processor has a decent number of registers).
+
+   For strange nailified values of GMP_NUMB_BITS the idea would be to look
+   for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2
+   the choices for M may be opened up a bit.  But such things are probably
+   best done in separate code, not grafted on here.  */
+
+#if GMP_NUMB_BITS % 4 == 0
+
+#define B1  (GMP_NUMB_BITS / 4)
+#define B2  (B1 * 2)
+#define B3  (B1 * 3)
+
+#define M1  ((CNST_LIMB(1) << B1) - 1)
+#define M2  ((CNST_LIMB(1) << B2) - 1)
+#define M3  ((CNST_LIMB(1) << B3) - 1)
+
+#define LOW0(n)      ((n) & M3)
+#define HIGH0(n)     ((n) >> B3)
+
+#define LOW1(n)      (((n) & M2) << B1)
+#define HIGH1(n)     ((n) >> B2)
+
+#define LOW2(n)      (((n) & M1) << B2)
+#define HIGH2(n)     ((n) >> B1)
+
+#define PARTS0(n)    (LOW0(n) + HIGH0(n))
+#define PARTS1(n)    (LOW1(n) + HIGH1(n))
+#define PARTS2(n)    (LOW2(n) + HIGH2(n))
+
+#define ADD(c,a,val)                    \
+  do {                                  \
+    mp_limb_t  new_c;                   \
+    ADDC_LIMB (new_c, a, a, val);       \
+    (c) += new_c;                       \
+  } while (0)
+
+mp_limb_t
+mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
+{
+  mp_limb_t  c0 = 0;
+  mp_limb_t  c1 = 0;
+  mp_limb_t  c2 = 0;
+  mp_limb_t  a0, a1, a2;
+
+  ASSERT (n >= 1);
+  ASSERT (n/3 < GMP_NUMB_MAX);
+
+  a0 = a1 = a2 = 0;
+  c0 = c1 = c2 = 0;
+
+  while ((n -= 3) >= 0)
+    {
+      ADD (c0, a0, p[0]);
+      ADD (c1, a1, p[1]);
+      ADD (c2, a2, p[2]);
+      p += 3;
+    }
+
+  if (n != -3)
+    {
+      ADD (c0, a0, p[0]);
+      if (n != -2)
+       ADD (c1, a1, p[1]);
+    }
+
+  return
+    PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
+    + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
+}
+
+#endif
diff --git a/mpn/generic/mode1o.c b/mpn/generic/mode1o.c

new file mode 100644 (file)

index 0000000..e8978a4
--- /dev/null
+++ b/mpn/generic/mode1o.c
@@ -0,0 +1,225 @@
+/* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Calculate an r satisfying
+
+           r*B^k + a - c == q*d
+
+   where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
+   (the caller won't know which), and q is the quotient (discarded).  d must
+   be odd, c can be any limb value.
+
+   If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
+
+   This slightly strange function suits the initial Nx1 reduction for GCDs
+   or Jacobi symbols since the factors of 2 in B^k can be ignored, leaving
+   -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be
+   ignored, or for the Jacobi symbol it can be accounted for.  The function
+   also suits divisibility and congruence testing since if r=0 (or r=d) is
+   obtained then a==c mod d.
+
+
+   r is a bit like the remainder returned by mpn_divexact_by3c, and is the
+   sort of remainder mpn_divexact_1 might return.  Like mpn_divexact_by3c, r
+   represents a borrow, since effectively quotient limbs are chosen so that
+   subtracting that multiple of d from src at each step will produce a zero
+   limb.
+
+   A long calculation can be done piece by piece from low to high by passing
+   the return value from one part as the carry parameter to the next part.
+   The effective final k becomes anything between size and size-n, if n
+   pieces are used.
+
+
+   A similar sort of routine could be constructed based on adding multiples
+   of d at each limb, much like redc in mpz_powm does.  Subtracting however
+   has a small advantage that when subtracting to cancel out l there's never
+   a borrow into h, whereas using an addition would put a carry into h
+   depending whether l==0 or l!=0.
+
+
+   In terms of efficiency, this function is similar to a mul-by-inverse
+   mpn_mod_1.  Both are essentially two multiplies and are best suited to
+   CPUs with low latency multipliers (in comparison to a divide instruction
+   at least.)  But modexact has a few less supplementary operations, only
+   needs low part and high part multiplies, and has fewer working quantities
+   (helping CPUs with few registers).
+
+
+   In the main loop it will be noted that the new carry (call it r) is the
+   sum of the high product h and any borrow from l=s-c.  If c<d then we will
+   have r<d too, for the following reasons.  Let q=l*inverse be the quotient
+   limb, so that q*d = B*h + l, where B=2^GMP_NUMB_BITS.  Now if h=d-1 then
+
+       l = q*d - B*(d-1) <= (B-1)*d - B*(d-1) = B-d
+
+   But if l=s-c produces a borrow when c<d, then l>=B-d+1 and hence will
+   never have h=d-1 and so r=h+borrow <= d-1.
+
+   When c>=d, on the other hand, h=d-1 can certainly occur together with a
+   borrow, thereby giving only r<=d, as per the function definition above.
+
+   As a design decision it's left to the caller to check for r=d if it might
+   be passing c>=d.  Several applications have c<d initially so the extra
+   test is often unnecessary, for example the GCDs or a plain divisibility
+   d|a test will pass c=0.
+
+
+   The special case for size==1 is so that it can be assumed c<=d in the
+   high<=divisor test at the end.  c<=d is only guaranteed after at least
+   one iteration of the main loop.  There's also a decent chance one % is
+   faster than a binvert_limb, though that will depend on the processor.
+
+   A CPU specific implementation might want to omit the size==1 code or the
+   high<divisor test.  mpn/x86/k6/mode1o.asm for instance finds neither
+   useful.  */
+
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d,
+                     mp_limb_t orig_c)
+{
+  mp_limb_t  s, h, l, inverse, dummy, dmul, ret;
+  mp_limb_t  c = orig_c;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (d);
+  ASSERT_LIMB (c);
+
+  if (size == 1)
+    {
+      s = src[0];
+      if (s > c)
+       {
+         l = s-c;
+         h = l % d;
+         if (h != 0)
+           h = d - h;
+       }
+      else
+       {
+         l = c-s;
+         h = l % d;
+       }
+      return h;
+    }
+
+
+  binvert_limb (inverse, d);
+  dmul = d << GMP_NAIL_BITS;
+
+  i = 0;
+  do
+    {
+      s = src[i];
+      SUBC_LIMB (c, l, s, c);
+      l = (l * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, l, dmul);
+      c += h;
+    }
+  while (++i < size-1);
+
+
+  s = src[i];
+  if (s <= d)
+    {
+      /* With high<=d the final step can be a subtract and addback.  If c==0
+        then the addback will restore to l>=0.  If c==d then will get l==d
+        if s==0, but that's ok per the function definition.  */
+
+      l = c - s;
+      if (c < s)
+       l += d;
+
+      ret = l;
+    }
+  else
+    {
+      /* Can't skip a divide, just do the loop code once more. */
+
+      SUBC_LIMB (c, l, s, c);
+      l = (l * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, l, dmul);
+      c += h;
+      ret = c;
+    }
+
+  ASSERT (orig_c < d ? ret < d : ret <= d);
+  return ret;
+}
+
+
+
+#if 0
+
+/* The following is an alternate form that might shave one cycle on a
+   superscalar processor since it takes c+=h off the dependent chain,
+   leaving just a low product, high product, and a subtract.
+
+   This is for CPU specific implementations to consider.  A special case for
+   high<divisor and/or size==1 can be added if desired.
+
+   Notice that c is only ever 0 or 1, since if s-c produces a borrow then
+   x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become
+   c=(x==0xFF..FF) too, if that helped.  */
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
+{
+  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
+  mp_limb_t  c = 0;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+
+  binvert_limb (inverse, d);
+  dmul = d << GMP_NAIL_BITS;
+
+  for (i = 0; i < size; i++)
+    {
+      ASSERT (c==0 || c==1);
+
+      s = src[i];
+      SUBC_LIMB (c1, x, s, c);
+
+      SUBC_LIMB (c2, y, x, h);
+      c = c1 + c2;
+
+      y = (y * inverse) & GMP_NUMB_MASK;
+      umul_ppmm (h, dummy, y, dmul);
+    }
+
+  h += c;
+  return h;
+}
+
+#endif
diff --git a/mpn/generic/mu_bdiv_q.c b/mpn/generic/mu_bdiv_q.c

new file mode 100644 (file)

index 0000000..01df1e3
--- /dev/null
+++ b/mpn/generic/mu_bdiv_q.c
@@ -0,0 +1,260 @@
+/* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
+   storing the result in {qp,nn}.  Overlap allowed between Q and N; all other
+   overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+                D >= 1
+                D odd
+                dn >= 2
+                nn >= 2
+                scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+         particular, when dn==in, tp and rp could use the same space.
+   FIXME: Trim final quotient calculation to qn limbs of precision.
+*/
+void
+mpn_mu_bdiv_q (mp_ptr qp,
+              mp_srcptr np, mp_size_t nn,
+              mp_srcptr dp, mp_size_t dn,
+              mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t in;
+  int cy, c0;
+  mp_size_t tn, wn;
+
+  qn = nn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+
+      /* |_______________________|   dividend
+                       |________|   divisor  */
+
+#define ip           scratch                   /* in */
+#define rp           (scratch + in)            /* dn or rest >= binvert_itch(in) */
+#define tp           (scratch + in + dn)       /* dn+in or next_size(dn) */
+#define scratch_out  (scratch + in + dn + tn)  /* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;   /* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;   /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+        When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+        limbs of R dies at that point.  We could save memory by letting T live
+        just under R, and let the upper part of T expand into R. These changes
+        should reduce itch to perhaps 3dn.
+       */
+
+      mpn_binvert (ip, dp, in, rp);
+
+      cy = 0;
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      mpn_mullo_n (qp, rp, ip, in);
+      qn -= in;
+
+      while (qn > in)
+       {
+         if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+           mpn_mul (tp, dp, dn, qp, in);       /* mulhi, need tp[dn+in-1...in] */
+         else
+           {
+             tn = mpn_mulmod_bnm1_next_size (dn);
+             mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+             wn = dn + in - tn;                /* number of wrapped limbs */
+             if (wn > 0)
+               {
+                 c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+                 mpn_decr_u (tp + wn, c0);
+               }
+           }
+
+         qp += in;
+         if (dn != in)
+           {
+             /* Subtract tp[dn-1...in] from partial remainder.  */
+             cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+             if (cy == 2)
+               {
+                 mpn_incr_u (tp + dn, 1);
+                 cy = 1;
+               }
+           }
+         /* Subtract tp[dn+in-1...dn] from dividend.  */
+         cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+         np += in;
+         mpn_mullo_n (qp, rp, ip, in);
+         qn -= in;
+       }
+
+      /* Generate last qn limbs.
+        FIXME: It should be possible to limit precision here, since qn is
+        typically somewhat smaller than dn.  No big gains expected.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, dn, qp, in);           /* mulhi, need tp[qn+in-1...in] */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+         wn = dn + in - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+             mpn_decr_u (tp + wn, c0);
+           }
+       }
+
+      qp += in;
+      if (dn != in)
+       {
+         cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+         if (cy == 2)
+           {
+             mpn_incr_u (tp + dn, 1);
+             cy = 1;
+           }
+       }
+
+      mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
+      mpn_mullo_n (qp, rp, ip, qn);
+
+#undef ip
+#undef rp
+#undef tp
+#undef scratch_out
+   }
+  else
+    {
+      /* |_______________________|   dividend
+               |________________|   divisor  */
+
+#define ip           scratch           /* in */
+#define tp           (scratch + in)    /* qn+in or next_size(qn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      mpn_binvert (ip, dp, in, tp);
+
+      mpn_mullo_n (qp, np, ip, in);            /* low `in' quotient limbs */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, qn, qp, in);           /* mulhigh */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (qn);
+         mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
+         wn = qn + in - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             c0 = mpn_cmp (tp, np, wn) < 0;
+             mpn_decr_u (tp + wn, c0);
+           }
+       }
+
+      mpn_sub_n (tp, np + in, tp + in, qn - in);
+      mpn_mullo_n (qp + in, tp, ip, qn - in);  /* high qn-in quotient limbs */
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+}
+
+mp_size_t
+mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+  mp_size_t b;
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      b = (qn - 1) / dn + 1;   /* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;   /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       {
+         tn = dn + in;
+         itch_out = 0;
+       }
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+       }
+      itch_binvert = mpn_binvert_itch (in);
+      itches = dn + tn + itch_out;
+      return in + MAX (itches, itch_binvert);
+    }
+  else
+    {
+      in = qn - (qn >> 1);
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       {
+         tn = qn + in;
+         itch_out = 0;
+       }
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (qn);
+         itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
+       }
+      itch_binvert = mpn_binvert_itch (in);
+      itches = tn + itch_out;
+      return in + MAX (itches, itch_binvert);
+    }
+}
diff --git a/mpn/generic/mu_bdiv_qr.c b/mpn/generic/mu_bdiv_qr.c

new file mode 100644 (file)

index 0000000..312d011
--- /dev/null
+++ b/mpn/generic/mu_bdiv_qr.c
@@ -0,0 +1,280 @@
+/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
+   where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
+   and N; all other overlap disallowed.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* N = {np,nn}
+   D = {dp,dn}
+
+   Requirements: N >= D
+                D >= 1
+                D odd
+                dn >= 2
+                nn >= 2
+                scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
+
+   Write quotient to Q = {qp,nn-dn}.
+
+   FIXME: When iterating, perhaps do the small step before loop, not after.
+   FIXME: Try to avoid the scalar divisions when computing inverse size.
+   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
+         particular, when dn==in, tp and rp could use the same space.
+*/
+mp_limb_t
+mpn_mu_bdiv_qr (mp_ptr qp,
+               mp_ptr rp,
+               mp_srcptr np, mp_size_t nn,
+               mp_srcptr dp, mp_size_t dn,
+               mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_size_t in;
+  mp_limb_t cy, c0;
+  int k;
+  mp_size_t tn, wn;
+  mp_size_t i;
+
+  qn = nn - dn;
+
+  ASSERT (dn >= 2);
+  ASSERT (qn >= 2);
+
+  if (qn > dn)
+    {
+      mp_size_t b;
+
+      /* |_______________________|   dividend
+                       |________|   divisor  */
+
+#define ip           scratch           /* in */
+#define tp           (scratch + in)    /* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute an inverse size that is a nice partition of the quotient.  */
+      b = (qn - 1) / dn + 1;   /* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;   /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+
+      /* Some notes on allocation:
+
+        When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+        limbs of R dies at that point.  We could save memory by letting T live
+        just under R, and let the upper part of T expand into R. These changes
+        should reduce itch to perhaps 3dn.
+       */
+
+      mpn_binvert (ip, dp, in, tp);
+
+      MPN_COPY (rp, np, dn);
+      np += dn;
+      cy = 0;
+
+      while (qn > in)
+       {
+         mpn_mullo_n (qp, rp, ip, in);
+
+         if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+           mpn_mul (tp, dp, dn, qp, in);       /* mulhi, need tp[dn+in-1...in] */
+         else
+           {
+             tn = mpn_mulmod_bnm1_next_size (dn);
+             mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+             wn = dn + in - tn;                /* number of wrapped limbs */
+             if (wn > 0)
+               {
+                 c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+                 mpn_decr_u (tp + wn, c0);
+               }
+           }
+
+         qp += in;
+         qn -= in;
+
+         if (dn != in)
+           {
+             /* Subtract tp[dn-1...in] from partial remainder.  */
+             cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
+             if (cy == 2)
+               {
+                 mpn_incr_u (tp + dn, 1);
+                 cy = 1;
+               }
+           }
+         /* Subtract tp[dn+in-1...dn] from dividend.  */
+         cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
+         np += in;
+       }
+
+      /* Generate last qn limbs.  */
+      mpn_mullo_n (qp, rp, ip, qn);
+
+      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, dn, qp, qn);           /* mulhi, need tp[qn+in-1...in] */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+         wn = dn + qn - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+             mpn_decr_u (tp + wn, c0);
+           }
+       }
+
+      if (dn != qn)
+       {
+         cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+         if (cy == 2)
+           {
+             mpn_incr_u (tp + dn, 1);
+             cy = 1;
+           }
+       }
+      return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+  else
+    {
+      /* |_______________________|   dividend
+               |________________|   divisor  */
+
+#define ip           scratch           /* in */
+#define tp           (scratch + in)    /* dn+in or next_size(dn) or rest >= binvert_itch(in) */
+#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
+
+      /* Compute half-sized inverse.  */
+      in = qn - (qn >> 1);
+
+      mpn_binvert (ip, dp, in, tp);
+
+      mpn_mullo_n (qp, np, ip, in);            /* low `in' quotient limbs */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, dn, qp, in);           /* mulhigh */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+         wn = dn + in - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             c0 = mpn_sub_n (tp + tn, tp, np, wn);
+             mpn_decr_u (tp + wn, c0);
+           }
+       }
+
+      qp += in;
+      qn -= in;
+
+      cy = mpn_sub_n (rp, np + in, tp + in, dn);
+      mpn_mullo_n (qp, rp, ip, qn);            /* high qn quotient limbs */
+
+      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, dn, qp, qn);           /* mulhigh */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
+         wn = dn + qn - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             c0 = mpn_sub_n (tp + tn, tp, rp, wn);
+             mpn_decr_u (tp + wn, c0);
+           }
+       }
+
+      cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
+      if (cy == 2)
+       {
+         mpn_incr_u (tp + dn, 1);
+         cy = 1;
+       }
+      return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
+
+#undef ip
+#undef tp
+#undef scratch_out
+    }
+}
+
+mp_size_t
+mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
+{
+  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
+  mp_size_t b;
+
+  qn = nn - dn;
+
+  if (qn > dn)
+    {
+      b = (qn - 1) / dn + 1;   /* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;   /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       {
+         tn = dn + in;
+         itch_out = 0;
+       }
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+       }
+      itch_binvert = mpn_binvert_itch (in);
+      itches = tn + itch_out;
+      return in + MAX (itches, itch_binvert);
+    }
+  else
+    {
+      in = qn - (qn >> 1);
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       {
+         tn = dn + in;
+         itch_out = 0;
+       }
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn);
+         itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+       }
+    }
+  itch_binvert = mpn_binvert_itch (in);
+  itches = tn + itch_out;
+  return in + MAX (itches, itch_binvert);
+}
diff --git a/mpn/generic/mu_div_q.c b/mpn/generic/mu_div_q.c

new file mode 100644 (file)

index 0000000..86182ac
--- /dev/null
+++ b/mpn/generic/mu_div_q.c
@@ -0,0 +1,222 @@
+/* mpn_mu_div_q.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/*
+  Things to work on:
+
+  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
+     probably close to optimal, except when mpn_mu_divappr_q fails.
+
+     An alternative which could be considered for much simpler code for the
+     complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
+     simply call mpn_mu_divappr_q.  Such a temporary allocation is
+     unfortunately very large.
+
+  2. We used to fall back to mpn_mu_div_qr when we detect a possible
+     mpn_mu_divappr_q rounding problem, now we multiply and compare.
+     Unfortunately, since mpn_mu_divappr_q does not return the partial
+     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
+     solve that.
+
+  3. The allocations done here should be made from the scratch area, which
+     then would need to be amended.
+*/
+
+#include <stdlib.h>            /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_mu_div_q (mp_ptr qp,
+             mp_srcptr np, mp_size_t nn,
+             mp_srcptr dp, mp_size_t dn,
+             mp_ptr scratch)
+{
+  mp_ptr tp, rp, ip, this_ip;
+  mp_size_t qn, in, this_in;
+  mp_limb_t cy, qh;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn;
+
+  tp = TMP_BALLOC_LIMBS (qn + 1);
+
+  if (qn >= dn)                        /* nn >= 2*dn + 1 */
+    {
+      /* Find max inverse size needed by the two preinv calls.  FIXME: This is
+        not optimal, it underestimates the invariance.  */
+      if (dn != qn)
+       {
+         mp_size_t in1, in2;
+
+         in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
+         in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+         in = MAX (in1, in2);
+       }
+      else
+       {
+         in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+       }
+
+      ip = TMP_BALLOC_LIMBS (in + 1);
+
+      if (dn == in)
+       {
+         MPN_COPY (scratch + 1, dp, in);
+         scratch[0] = 1;
+         mpn_invertappr (ip, scratch, in + 1, NULL);
+         MPN_COPY_INCR (ip, ip + 1, in);
+       }
+      else
+       {
+         cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
+         if (UNLIKELY (cy != 0))
+           MPN_ZERO (ip, in);
+         else
+           {
+             mpn_invertappr (ip, scratch, in + 1, NULL);
+             MPN_COPY_INCR (ip, ip + 1, in);
+           }
+       }
+
+       /* |_______________________|   dividend
+                        |________|   divisor  */
+      rp = TMP_BALLOC_LIMBS (2 * dn + 1);
+
+      this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
+      this_ip = ip + in - this_in;
+      qh = mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
+                                this_ip, this_in, scratch);
+
+      MPN_COPY (rp + 1, np, dn);
+      rp[0] = 0;
+      this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+      this_ip = ip + in - this_in;
+      cy = mpn_preinv_mu_divappr_q (tp, rp, 2 * dn + 1, dp, dn,
+                                   this_ip, this_in, scratch);
+
+      if (UNLIKELY (cy != 0))
+       {
+         /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
+            canonically reduced, replace the returned value of B^(qn-dn)+eps
+            by the largest possible value.  */
+         mp_size_t i;
+         for (i = 0; i < dn + 1; i++)
+           tp[i] = GMP_NUMB_MAX;
+       }
+
+      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
+        greater than the max error, we cannot trust the quotient.  */
+      if (tp[0] > 4)
+       {
+         MPN_COPY (qp, tp + 1, qn);
+       }
+      else
+       {
+         mp_limb_t cy;
+         mp_ptr pp;
+
+         /* FIXME: can we use already allocated space? */
+         pp = TMP_BALLOC_LIMBS (nn);
+         mpn_mul (pp, tp + 1, qn, dp, dn);
+
+         cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
+
+         if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+           qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+         else /* Same as above */
+           MPN_COPY (qp, tp + 1, qn);
+       }
+    }
+  else
+    {
+       /* |_______________________|   dividend
+                |________________|   divisor  */
+
+      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
+        here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
+        the most significant dn-1 limbs will actually be read, but it is not
+        pretty.  */
+
+      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
+                            dp + dn - (qn + 1), qn + 1, scratch);
+
+      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
+         error from the divisor truncation.  */
+      if (tp[0] > 6)
+       {
+         MPN_COPY (qp, tp + 1, qn);
+       }
+      else
+       {
+         mp_limb_t cy;
+
+         /* FIXME: a shorter product should be enough; we may use already
+            allocated space... */
+         rp = TMP_BALLOC_LIMBS (nn);
+         mpn_mul (rp, dp, dn, tp + 1, qn);
+
+         cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
+
+         if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+           qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+         else /* Same as above */
+           MPN_COPY (qp, tp + 1, qn);
+       }
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_size_t
+mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn, itch1, itch2;
+
+  qn = nn - dn;
+  if (qn >= dn)
+    {
+      itch1 = mpn_mu_div_qr_itch (qn, dn, mua_k);
+      itch2 = mpn_mu_divappr_q_itch (2 * dn + 1, dn, mua_k);
+      return MAX (itch1, itch2);
+    }
+  else
+    {
+      itch1 = mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
+      return itch1;
+    }
+}
diff --git a/mpn/generic/mu_div_qr.c b/mpn/generic/mu_div_qr.c

new file mode 100644 (file)

index 0000000..34a2abb
--- /dev/null
+++ b/mpn/generic/mu_div_qr.c
@@ -0,0 +1,405 @@
+/* mpn_mu_div_qr, mpn_preinv_mu_div_qr.
+
+   Compute Q = floor(N / D) and R = N-QD.  N is nn limbs and D is dn limbs and
+   must be normalized, and Q must be nn-dn limbs.  The requirement that Q is
+   nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us to
+   let N be unmodified during the operation.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_divappr_q.c should be edited in sync.
+
+ Things to work on:
+
+  * This isn't optimal when the quotient isn't needed, as it might take a lot
+    of space.  The computation is always needed, though, so there is no time to
+    save with special code.
+
+  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+    demonstrated by the fact that the mpn_invertappr function's scratch needs
+    mean that we need to keep a large allocation long after it is needed.
+    Things are worse as mpn_mul_fft does not accept any scratch parameter,
+    which means we'll have a large memory hole while in mpn_mul_fft.  In
+    general, a peak scratch need in the beginning of a function isn't
+    well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h>            /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* FIXME: The MU_DIV_QR_SKEW_THRESHOLD was not analysed properly.  It gives a
+   speedup according to old measurements, but does the decision mechanism
+   really make sense?  It seem like the quotient between dn and qn might be
+   what we really should be checking.  */
+#ifndef MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 100
+#endif
+
+#ifdef CHECK                           /* FIXME: Enable in minithres */
+#undef  MU_DIV_QR_SKEW_THRESHOLD
+#define MU_DIV_QR_SKEW_THRESHOLD 1
+#endif
+
+
+static mp_limb_t mpn_mu_div_qr2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+
+mp_limb_t
+mpn_mu_div_qr (mp_ptr qp,
+              mp_ptr rp,
+              mp_srcptr np,
+              mp_size_t nn,
+              mp_srcptr dp,
+              mp_size_t dn,
+              mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, qh;
+
+  qn = nn - dn;
+  if (qn + MU_DIV_QR_SKEW_THRESHOLD < dn)
+    {
+      /* |______________|_ign_first__|   dividend                        nn
+               |_______|_ign_first__|   divisor                          dn
+
+               |______|             quotient (prel)                      qn
+
+                |___________________|   quotient * ignored-divisor-part  dn-1
+      */
+
+      /* Compute a preliminary quotient and a partial remainder by dividing the
+        most significant limbs of each operand.  */
+      qh = mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
+                          np + nn - (2 * qn + 1), 2 * qn + 1,
+                          dp + dn - (qn + 1), qn + 1,
+                          scratch);
+
+      /* Multiply the quotient by the divisor limbs ignored above.  */
+      if (dn - (qn + 1) > qn)
+       mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
+      else
+       mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
+
+      if (qh)
+       cy = mpn_add_n (scratch + qn, scratch + qn, dp, dn - (qn + 1));
+      else
+       cy = 0;
+      scratch[dn - 1] = cy;
+
+      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
+      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
+                      rp + nn - (2 * qn + 1),
+                      scratch + nn - (2 * qn + 1),
+                      qn + 1, cy);
+      if (cy)
+       {
+         qh -= mpn_sub_1 (qp, qp, qn, 1);
+         mpn_add_n (rp, rp, dp, dn);
+       }
+    }
+  else
+    {
+      qh = mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+    }
+
+  return qh;
+}
+
+static mp_limb_t
+mpn_mu_div_qr2 (mp_ptr qp,
+               mp_ptr rp,
+               mp_srcptr np,
+               mp_size_t nn,
+               mp_srcptr dp,
+               mp_size_t dn,
+               mp_ptr scratch)
+{
+  mp_size_t qn, in;
+  mp_limb_t cy, qh;
+  mp_ptr ip, tp;
+
+  ASSERT (dn > 1);
+
+  qn = nn - dn;
+
+  /* Compute the inverse size.  */
+  in = mpn_mu_div_qr_choose_in (qn, dn, 0);
+  ASSERT (in <= dn);
+
+#if 1
+  /* This alternative inverse computation method gets slightly more accurate
+     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+     not adapted (3) mpn_invertappr scratch needs not met.  */
+  ip = scratch;
+  tp = scratch + in + 1;
+
+  /* compute an approximate inverse on (in+1) limbs */
+  if (dn == in)
+    {
+      MPN_COPY (tp + 1, dp, in);
+      tp[0] = 1;
+      mpn_invertappr (ip, tp, in + 1, NULL);
+      MPN_COPY_INCR (ip, ip + 1, in);
+    }
+  else
+    {
+      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+      if (UNLIKELY (cy != 0))
+       MPN_ZERO (ip, in);
+      else
+       {
+         mpn_invertappr (ip, tp, in + 1, NULL);
+         MPN_COPY_INCR (ip, ip + 1, in);
+       }
+    }
+#else
+  /* This older inverse computation method gets slightly worse results than the
+     one above.  */
+  ip = scratch;
+  tp = scratch + in;
+
+  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
+     inversion function should do this automatically.  */
+  if (dn == in)
+    {
+      tp[in + 1] = 0;
+      MPN_COPY (tp + in + 2, dp, in);
+      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+    }
+  else
+    {
+      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+    }
+  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+  if (UNLIKELY (cy != 0))
+    MPN_ZERO (tp + 1, in);
+  MPN_COPY (ip, tp + 1, in);
+#endif
+
+  qh = mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_mu_div_qr (mp_ptr qp,
+                     mp_ptr rp,
+                     mp_srcptr np,
+                     mp_size_t nn,
+                     mp_srcptr dp,
+                     mp_size_t dn,
+                     mp_srcptr ip,
+                     mp_size_t in,
+                     mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, cx, qh;
+  mp_limb_t r;
+  mp_size_t tn, wn;
+
+#define tp           scratch
+#define scratch_out  (scratch + tn)
+
+  qn = nn - dn;
+
+  np += qn;
+  qp += qn;
+
+  qh = mpn_cmp (np, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (rp, np, dp, dn);
+  else
+    MPN_COPY (rp, np, dn);
+
+  if (qn == 0)
+    return qh;                 /* Degenerate use.  Should we allow this? */
+
+  while (qn > 0)
+    {
+      if (qn < in)
+       {
+         ip += in - qn;
+         in = qn;
+       }
+      np -= in;
+      qp -= in;
+
+      /* Compute the next block of quotient limbs by multiplying the inverse I
+        by the upper part of the partial remainder R.  */
+      mpn_mul_n (tp, rp + dn - in, ip, in);            /* mulhi  */
+      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);  /* I's msb implicit */
+      ASSERT_ALWAYS (cy == 0);
+
+      qn -= in;
+
+      /* Compute the product of the quotient block and the divisor D, to be
+        subtracted from the partial remainder combined with new limbs from the
+        dividend N.  We only really need the low dn+1 limbs.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, dn, qp, in);           /* dn+in limbs, high 'in' cancels */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn + 1);
+         mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+         wn = dn + in - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+             cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+             cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+             ASSERT_ALWAYS (cx >= cy);
+             mpn_incr_u (tp, cx - cy);
+           }
+       }
+
+      r = rp[dn - in] - tp[dn];
+
+      /* Subtract the product from the partial remainder combined with new
+        limbs from the dividend N, generating a new partial remainder R.  */
+      if (dn != in)
+       {
+         cy = mpn_sub_n (tp, np, tp, in);      /* get next 'in' limbs from N */
+         cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+         MPN_COPY (rp, tp, dn);                /* FIXME: try to avoid this */
+       }
+      else
+       {
+         cy = mpn_sub_n (rp, np, tp, in);      /* get next 'in' limbs from N */
+       }
+
+      STAT (int i; int err = 0;
+           static int errarr[5]; static int err_rec; static int tot);
+
+      /* Check the remainder R and adjust the quotient as needed.  */
+      r -= cy;
+      while (r != 0)
+       {
+         /* We loop 0 times with about 69% probability, 1 time with about 31%
+            probability, 2 times with about 0.6% probability, if inverse is
+            computed as recommended.  */
+         mpn_incr_u (qp, 1);
+         cy = mpn_sub_n (rp, rp, dp, dn);
+         r -= cy;
+         STAT (err++);
+       }
+      if (mpn_cmp (rp, dp, dn) >= 0)
+       {
+         /* This is executed with about 76% probability.  */
+         mpn_incr_u (qp, 1);
+         cy = mpn_sub_n (rp, rp, dp, dn);
+         STAT (err++);
+       }
+
+      STAT (
+           tot++;
+           errarr[err]++;
+           if (err > err_rec)
+             err_rec = err;
+           if (tot % 0x10000 == 0)
+             {
+               for (i = 0; i <= err_rec; i++)
+                 printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+               printf ("\n");
+             }
+           );
+    }
+
+  return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+mp_size_t
+mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+       {
+         /* Compute an inverse size that is a nice partition of the quotient.  */
+         b = (qn - 1) / dn + 1;        /* ceil(qn/dn), number of blocks */
+         in = (qn - 1) / b + 1;        /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+       }
+      else if (3 * qn > dn)
+       {
+         in = (qn - 1) / 2 + 1;        /* b = 2 */
+       }
+      else
+       {
+         in = (qn - 1) / 1 + 1;        /* b = 1 */
+       }
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
+mp_size_t
+mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+  return in + itch_local + itch_out;
+}
+
+mp_size_t
+mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+  return itch_local + itch_out;
+}
diff --git a/mpn/generic/mu_divappr_q.c b/mpn/generic/mu_divappr_q.c

new file mode 100644 (file)

index 0000000..0e9afa3
--- /dev/null
+++ b/mpn/generic/mu_divappr_q.c
@@ -0,0 +1,352 @@
+/* mpn_mu_divappr_q, mpn_preinv_mu_divappr_q.
+
+   Compute Q = floor(N / D) + e.  N is nn limbs, D is dn limbs and must be
+   normalized, and Q must be nn-dn limbs, 0 <= e <= 4.  The requirement that Q
+   is nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us
+   to let N be unmodified during the operation.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
+
+ Things to work on:
+
+  * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
+    demonstrated by the fact that the mpn_invertappr function's scratch needs
+    mean that we need to keep a large allocation long after it is needed.
+    Things are worse as mpn_mul_fft does not accept any scratch parameter,
+    which means we'll have a large memory hole while in mpn_mul_fft.  In
+    general, a peak scratch need in the beginning of a function isn't
+    well-handled by the itch/scratch scheme.
+*/
+
+#ifdef STAT
+#undef STAT
+#define STAT(x) x
+#else
+#define STAT(x)
+#endif
+
+#include <stdlib.h>            /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_mu_divappr_q (mp_ptr qp,
+                 mp_srcptr np,
+                 mp_size_t nn,
+                 mp_srcptr dp,
+                 mp_size_t dn,
+                 mp_ptr scratch)
+{
+  mp_size_t qn, in;
+  mp_limb_t cy, qh;
+  mp_ptr ip, tp;
+
+  ASSERT (dn > 1);
+
+  qn = nn - dn;
+
+  /* If Q is smaller than D, truncate operands. */
+  if (qn + 1 < dn)
+    {
+      np += dn - (qn + 1);
+      nn -= dn - (qn + 1);
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  /* Compute the inverse size.  */
+  in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
+  ASSERT (in <= dn);
+
+#if 1
+  /* This alternative inverse computation method gets slightly more accurate
+     results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
+     not adapted (3) mpn_invertappr scratch needs not met.  */
+  ip = scratch;
+  tp = scratch + in + 1;
+
+  /* compute an approximate inverse on (in+1) limbs */
+  if (dn == in)
+    {
+      MPN_COPY (tp + 1, dp, in);
+      tp[0] = 1;
+      mpn_invertappr (ip, tp, in + 1, NULL);
+      MPN_COPY_INCR (ip, ip + 1, in);
+    }
+  else
+    {
+      cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
+      if (UNLIKELY (cy != 0))
+       MPN_ZERO (ip, in);
+      else
+       {
+         mpn_invertappr (ip, tp, in + 1, NULL);
+         MPN_COPY_INCR (ip, ip + 1, in);
+       }
+    }
+#else
+  /* This older inverse computation method gets slightly worse results than the
+     one above.  */
+  ip = scratch;
+  tp = scratch + in;
+
+  /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
+     inversion function should do this automatically.  */
+  if (dn == in)
+    {
+      tp[in + 1] = 0;
+      MPN_COPY (tp + in + 2, dp, in);
+      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+    }
+  else
+    {
+      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+    }
+  cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
+  if (UNLIKELY (cy != 0))
+    MPN_ZERO (tp + 1, in);
+  MPN_COPY (ip, tp + 1, in);
+#endif
+
+  qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_mu_divappr_q (mp_ptr qp,
+                        mp_srcptr np,
+                        mp_size_t nn,
+                        mp_srcptr dp,
+                        mp_size_t dn,
+                        mp_srcptr ip,
+                        mp_size_t in,
+                        mp_ptr scratch)
+{
+  mp_size_t qn;
+  mp_limb_t cy, cx, qh;
+  mp_limb_t r;
+  mp_size_t tn, wn;
+
+#define rp           scratch
+#define tp           (scratch + dn)
+#define scratch_out  (scratch + dn + tn)
+
+  qn = nn - dn;
+
+  np += qn;
+  qp += qn;
+
+  qh = mpn_cmp (np, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (rp, np, dp, dn);
+  else
+    MPN_COPY (rp, np, dn);
+
+  if (qn == 0)
+    return qh;                 /* Degenerate use.  Should we allow this? */
+
+  while (qn > 0)
+    {
+      if (qn < in)
+       {
+         ip += in - qn;
+         in = qn;
+       }
+      np -= in;
+      qp -= in;
+
+      /* Compute the next block of quotient limbs by multiplying the inverse I
+        by the upper part of the partial remainder R.  */
+      mpn_mul_n (tp, rp + dn - in, ip, in);            /* mulhi  */
+      cy = mpn_add_n (qp, tp + in, rp + dn - in, in);  /* I's msb implicit */
+      ASSERT_ALWAYS (cy == 0);
+
+      qn -= in;
+      if (qn == 0)
+       break;
+
+      /* Compute the product of the quotient block and the divisor D, to be
+        subtracted from the partial remainder combined with new limbs from the
+        dividend N.  We only really need the low dn limbs.  */
+
+      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
+       mpn_mul (tp, dp, dn, qp, in);           /* dn+in limbs, high 'in' cancels */
+      else
+       {
+         tn = mpn_mulmod_bnm1_next_size (dn + 1);
+         mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
+         wn = dn + in - tn;                    /* number of wrapped limbs */
+         if (wn > 0)
+           {
+             cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
+             cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
+             cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
+             ASSERT_ALWAYS (cx >= cy);
+             mpn_incr_u (tp, cx - cy);
+           }
+       }
+
+      r = rp[dn - in] - tp[dn];
+
+      /* Subtract the product from the partial remainder combined with new
+        limbs from the dividend N, generating a new partial remainder R.  */
+      if (dn != in)
+       {
+         cy = mpn_sub_n (tp, np, tp, in);      /* get next 'in' limbs from N */
+         cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
+         MPN_COPY (rp, tp, dn);                /* FIXME: try to avoid this */
+       }
+      else
+       {
+         cy = mpn_sub_n (rp, np, tp, in);      /* get next 'in' limbs from N */
+       }
+
+      STAT (int i; int err = 0;
+           static int errarr[5]; static int err_rec; static int tot);
+
+      /* Check the remainder R and adjust the quotient as needed.  */
+      r -= cy;
+      while (r != 0)
+       {
+         /* We loop 0 times with about 69% probability, 1 time with about 31%
+            probability, 2 times with about 0.6% probability, if inverse is
+            computed as recommended.  */
+         mpn_incr_u (qp, 1);
+         cy = mpn_sub_n (rp, rp, dp, dn);
+         r -= cy;
+         STAT (err++);
+       }
+      if (mpn_cmp (rp, dp, dn) >= 0)
+       {
+         /* This is executed with about 76% probability.  */
+         mpn_incr_u (qp, 1);
+         cy = mpn_sub_n (rp, rp, dp, dn);
+         STAT (err++);
+       }
+
+      STAT (
+           tot++;
+           errarr[err]++;
+           if (err > err_rec)
+             err_rec = err;
+           if (tot % 0x10000 == 0)
+             {
+               for (i = 0; i <= err_rec; i++)
+                 printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
+               printf ("\n");
+             }
+           );
+    }
+
+  /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
+     quotient.  For now, just make sure the returned quotient is >= the real
+     quotient; add 3 with saturating arithmetic.  */
+  qn = nn - dn;
+  cy += mpn_add_1 (qp, qp, qn, 3);
+  if (cy != 0)
+    {
+      if (qh != 0)
+       {
+         /* Return a quotient of just 1-bits, with qh set.  */
+         mp_size_t i;
+         for (i = 0; i < qn; i++)
+           qp[i] = GMP_NUMB_MAX;
+       }
+      else
+       {
+         /* Propagate carry into qh.  */
+         qh = 1;
+       }
+    }
+
+  return qh;
+}
+
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+mp_size_t
+mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+       {
+         /* Compute an inverse size that is a nice partition of the quotient.  */
+         b = (qn - 1) / dn + 1;        /* ceil(qn/dn), number of blocks */
+         in = (qn - 1) / b + 1;        /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+       }
+      else if (3 * qn > dn)
+       {
+         in = (qn - 1) / 2 + 1;        /* b = 2 */
+       }
+      else
+       {
+         in = (qn - 1) / 1 + 1;        /* b = 1 */
+       }
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
+mp_size_t
+mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn, in, itch_local, itch_out;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dn = qn + 1;
+    }
+  in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
+
+  itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  return in + dn + itch_local + itch_out;
+}
diff --git a/mpn/generic/mul.c b/mpn/generic/mul.c

new file mode 100644 (file)

index 0000000..c176b45
--- /dev/null
+++ b/mpn/generic/mul.c
@@ -0,0 +1,384 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
+2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifndef MUL_BASECASE_MAX_UN
+#define MUL_BASECASE_MAX_UN 500
+#endif
+
+#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
+#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
+
+/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
+   (pointed to by VP, with VN limbs), and store the result at PRODP.  The
+   result is UN + VN limbs.  Return the most significant limb of the result.
+
+   NOTE: The space pointed to by PRODP is overwritten before finished with U
+   and V, so overlap is an error.
+
+   Argument constraints:
+   1. UN >= VN.
+   2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
+      the multiplier and the multiplicand.  */
+
+/*
+  * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
+    ideal lines of the surrounding algorithms.  Is that optimal?
+
+  * The toomX3 code now uses a structure similar to the one of toomX2, except
+    that it loops longer in the unbalanced case.  The result is that the
+    remaining area might have un < vn.  Should we fix the toomX2 code in a
+    similar way?
+
+  * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
+    therefore calls mpn_mul recursively for certain cases.
+
+  * Allocate static temp space using THRESHOLD variables (except for toom44
+    when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
+
+  * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
+    have the same vn threshold.  This is not true, we should actually use
+    mul_basecase for slightly larger operands for toom32 than for toom22, and
+    even larger for toom42.
+
+  * That problem is even more prevalent for toomX3.  We therefore use special
+    THRESHOLD variables there.
+
+  * Is our ITCH allocation correct?
+*/
+
+#define ITCH (16*vn + 100)
+
+mp_limb_t
+mpn_mul (mp_ptr prodp,
+        mp_srcptr up, mp_size_t un,
+        mp_srcptr vp, mp_size_t vn)
+{
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
+  ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
+
+  if (un == vn)
+    {
+      if (up == vp)
+       mpn_sqr (prodp, up, un);
+      else
+       mpn_mul_n (prodp, up, vp, un);
+    }
+  else if (vn < MUL_TOOM22_THRESHOLD)
+    { /* plain schoolbook multiplication */
+
+      /* Unless un is very large, or else if have an applicable mpn_mul_N,
+        perform basecase multiply directly.  */
+      if (un <= MUL_BASECASE_MAX_UN
+#if HAVE_NATIVE_mpn_mul_2
+         || vn <= 2
+#else
+         || vn == 1
+#endif
+         )
+       mpn_mul_basecase (prodp, up, un, vp, vn);
+      else
+       {
+         /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory
+            locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
+            these pieces with the vp[] operand.  After each such partial
+            multiplication (but the last) we copy the most significant vn
+            limbs into a temporary buffer since that part would otherwise be
+            overwritten by the next multiplication.  After the next
+            multiplication, we add it back.  This illustrates the situation:
+
+                                                    -->vn<--
+                                                      |  |<------- un ------->|
+                                                         _____________________|
+                                                        X                    /|
+                                                      /XX__________________/  |
+                                    _____________________                     |
+                                   X                    /                     |
+                                 /XX__________________/                       |
+               _____________________                                          |
+              /                    /                                          |
+            /____________________/                                            |
+           ==================================================================
+
+           The parts marked with X are the parts whose sums are copied into
+           the temporary buffer.  */
+
+         mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
+         mp_limb_t cy;
+         ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
+
+         mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+         prodp += MUL_BASECASE_MAX_UN;
+         MPN_COPY (tp, prodp, vn);             /* preserve high triangle */
+         up += MUL_BASECASE_MAX_UN;
+         un -= MUL_BASECASE_MAX_UN;
+         while (un > MUL_BASECASE_MAX_UN)
+           {
+             mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
+             cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+             mpn_incr_u (prodp + vn, cy);
+             prodp += MUL_BASECASE_MAX_UN;
+             MPN_COPY (tp, prodp, vn);         /* preserve high triangle */
+             up += MUL_BASECASE_MAX_UN;
+             un -= MUL_BASECASE_MAX_UN;
+           }
+         if (un > vn)
+           {
+             mpn_mul_basecase (prodp, up, un, vp, vn);
+           }
+         else
+           {
+             ASSERT (un > 0);
+             mpn_mul_basecase (prodp, vp, vn, up, un);
+           }
+         cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
+         mpn_incr_u (prodp + vn, cy);
+       }
+    }
+  else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
+    {
+      /* Use ToomX2 variants */
+      mp_ptr scratch;
+      TMP_SDECL; TMP_SMARK;
+
+      scratch = TMP_SALLOC_LIMBS (ITCH);
+
+      /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
+        square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
+        wise; we would get better balance by slightly moving the bound.  We
+        will sometimes end up with un < vn, like the the X3 arm below.  */
+      if (un >= 3 * vn)
+       {
+         mp_limb_t cy;
+         mp_ptr ws;
+
+         /* The maximum ws usage is for the mpn_mul result.  */
+         ws = TMP_SALLOC_LIMBS (4 * vn);
+
+         mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+         un -= 2 * vn;
+         up += 2 * vn;
+         prodp += 2 * vn;
+
+         while (un >= 3 * vn)
+           {
+             mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+             un -= 2 * vn;
+             up += 2 * vn;
+             cy = mpn_add_n (prodp, prodp, ws, vn);
+             MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+             mpn_incr_u (prodp + vn, cy);
+             prodp += 2 * vn;
+           }
+
+         /* vn <= un < 3vn */
+
+         if (4 * un < 5 * vn)
+           mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+         else if (4 * un < 7 * vn)
+           mpn_toom32_mul (ws, up, un, vp, vn, scratch);
+         else
+           mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+         cy = mpn_add_n (prodp, prodp, ws, vn);
+         MPN_COPY (prodp + vn, ws + vn, un);
+         mpn_incr_u (prodp + vn, cy);
+       }
+      else
+       {
+         if (4 * un < 5 * vn)
+           mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+         else if (4 * un < 7 * vn)
+           mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+         else
+           mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+       }
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
+          BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
+    {
+      /* Handle the largest operands that are not in the FFT range.  The 2nd
+        condition makes very unbalanced operands avoid the FFT code (except
+        perhaps as coefficient products of the Toom code.  */
+
+      if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
+       {
+         /* Use ToomX3 variants */
+         mp_ptr scratch;
+         TMP_SDECL; TMP_SMARK;
+
+         scratch = TMP_SALLOC_LIMBS (ITCH);
+
+         if (2 * un >= 5 * vn)
+           {
+             mp_limb_t cy;
+             mp_ptr ws;
+
+             /* The maximum ws usage is for the mpn_mul result.  */
+             ws = TMP_SALLOC_LIMBS (7 * vn >> 1);
+
+             if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+               mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+             else
+               mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
+             un -= 2 * vn;
+             up += 2 * vn;
+             prodp += 2 * vn;
+
+             while (2 * un >= 5 * vn)  /* un >= 2.5vn */
+               {
+                 if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+                   mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+                 else
+                   mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
+                 un -= 2 * vn;
+                 up += 2 * vn;
+                 cy = mpn_add_n (prodp, prodp, ws, vn);
+                 MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+                 mpn_incr_u (prodp + vn, cy);
+                 prodp += 2 * vn;
+               }
+
+             /* vn / 2 <= un < 2.5vn */
+
+             if (un < vn)
+               mpn_mul (ws, vp, vn, up, un);
+             else
+               mpn_mul (ws, up, un, vp, vn);
+
+             cy = mpn_add_n (prodp, prodp, ws, vn);
+             MPN_COPY (prodp + vn, ws + vn, un);
+             mpn_incr_u (prodp + vn, cy);
+           }
+         else
+           {
+             if (6 * un < 7 * vn)
+               mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
+             else if (2 * un < 3 * vn)
+               {
+                 if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
+                   mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+                 else
+                   mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
+               }
+             else if (6 * un < 11 * vn)
+               {
+                 if (4 * un < 7 * vn)
+                   {
+                     if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
+                       mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+                     else
+                       mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+                   }
+                 else
+                   {
+                     if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
+                       mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+                     else
+                       mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
+                   }
+               }
+             else
+               {
+                 if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
+                   mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+                 else
+                   mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
+               }
+           }
+         TMP_SFREE;
+       }
+      else
+       {
+         mp_ptr scratch;
+         TMP_DECL; TMP_MARK;
+
+         if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
+           {
+             scratch = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
+             mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
+           }
+         else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
+           {
+             scratch = TMP_ALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
+             mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
+           }
+         else
+           {
+             scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
+             mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
+           }
+         TMP_FREE;
+       }
+    }
+  else
+    {
+      if (un >= 8 * vn)
+       {
+         mp_limb_t cy;
+         mp_ptr ws;
+         TMP_DECL; TMP_MARK;
+
+         /* The maximum ws usage is for the mpn_mul result.  */
+         ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
+
+         mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
+         un -= 3 * vn;
+         up += 3 * vn;
+         prodp += 3 * vn;
+
+         while (2 * un >= 7 * vn)      /* un >= 3.5vn  */
+           {
+             mpn_fft_mul (ws, up, 3 * vn, vp, vn);
+             un -= 3 * vn;
+             up += 3 * vn;
+             cy = mpn_add_n (prodp, prodp, ws, vn);
+             MPN_COPY (prodp + vn, ws + vn, 3 * vn);
+             mpn_incr_u (prodp + vn, cy);
+             prodp += 3 * vn;
+           }
+
+         /* vn / 2 <= un < 3.5vn */
+
+         if (un < vn)
+           mpn_mul (ws, vp, vn, up, un);
+         else
+           mpn_mul (ws, up, un, vp, vn);
+
+         cy = mpn_add_n (prodp, prodp, ws, vn);
+         MPN_COPY (prodp + vn, ws + vn, un);
+         mpn_incr_u (prodp + vn, cy);
+
+         TMP_FREE;
+       }
+      else
+       mpn_fft_mul (prodp, up, un, vp, vn);
+    }
+
+  return prodp[un + vn - 1];   /* historic */
+}
diff --git a/mpn/generic/mul_1.c b/mpn/generic/mul_1.c

new file mode 100644 (file)

index 0000000..b8290cc
--- /dev/null
+++ b/mpn/generic/mul_1.c
@@ -0,0 +1,87 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and store the
+   product in a second limb vector.
+
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, lpl, hpl, prev_hpl, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      xw = prev_hpl + lpl + cl;
+      cl = xw >> GMP_NUMB_BITS;
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl + cl;
+}
+
+#endif
diff --git a/mpn/generic/mul_basecase.c b/mpn/generic/mul_basecase.c

new file mode 100644 (file)

index 0000000..726bd67
--- /dev/null
+++ b/mpn/generic/mul_basecase.c
@@ -0,0 +1,157 @@
+/* mpn_mul_basecase -- Internal routine to multiply two natural numbers
+   of length m and n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Multiply {up,usize} by {vp,vsize} and write the result to
+   {prodp,usize+vsize}.  Must have usize>=vsize.
+
+   Note that prodp gets usize+vsize limbs stored, even if the actual result
+   only needs usize+vsize-1.
+
+   There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
+   Currently this is allowed, but it might not be in the future.
+
+   This is the most critical code for multiplication.  All multiplies rely
+   on this, both small and huge.  Small ones arrive here immediately, huge
+   ones arrive here as this is the base case for Karatsuba's recursive
+   algorithm.  */
+
+void
+mpn_mul_basecase (mp_ptr rp,
+                 mp_srcptr up, mp_size_t un,
+                 mp_srcptr vp, mp_size_t vn)
+{
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));
+  ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));
+
+  /* We first multiply by the low order limb (or depending on optional function
+     availability, limbs).  This result can be stored, not added, to rp.  We
+     also avoid a loop for zeroing this way.  */
+
+#if HAVE_NATIVE_mpn_mul_2
+  if (vn >= 2)
+    {
+      rp[un + 1] = mpn_mul_2 (rp, up, un, vp);
+      rp += 2, vp += 2, vn -= 2;
+    }
+  else
+    {
+      rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+      return;
+    }
+#else
+  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+  rp += 1, vp += 1, vn -= 1;
+#endif
+
+  /* Now accumulate the product of up[] and the next higher limb (or depending
+     on optional function availability, limbs) from vp[].  */
+
+#define MAX_LEFT MP_SIZE_T_MAX /* Used to simplify loops into if statements */
+
+
+#if HAVE_NATIVE_mpn_addmul_6
+  while (vn >= 6)
+    {
+      rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp);
+      if (MAX_LEFT == 6)
+       return;
+      rp += 6, vp += 6, vn -= 6;
+      if (MAX_LEFT < 2 * 6)
+       break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (6 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_5
+  while (vn >= 5)
+    {
+      rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp);
+      if (MAX_LEFT == 5)
+       return;
+      rp += 5, vp += 5, vn -= 5;
+      if (MAX_LEFT < 2 * 5)
+       break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (5 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_4
+  while (vn >= 4)
+    {
+      rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);
+      if (MAX_LEFT == 4)
+       return;
+      rp += 4, vp += 4, vn -= 4;
+      if (MAX_LEFT < 2 * 4)
+       break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (4 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_3
+  while (vn >= 3)
+    {
+      rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);
+      if (MAX_LEFT == 3)
+       return;
+      rp += 3, vp += 3, vn -= 3;
+      if (MAX_LEFT < 2 * 3)
+       break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (3 - 1)
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+  while (vn >= 2)
+    {
+      rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);
+      if (MAX_LEFT == 2)
+       return;
+      rp += 2, vp += 2, vn -= 2;
+      if (MAX_LEFT < 2 * 2)
+       break;
+    }
+#undef MAX_LEFT
+#define MAX_LEFT (2 - 1)
+#endif
+
+  while (vn >= 1)
+    {
+      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+      if (MAX_LEFT == 1)
+       return;
+      rp += 1, vp += 1, vn -= 1;
+    }
+}
diff --git a/mpn/generic/mul_fft.c b/mpn/generic/mul_fft.c

new file mode 100644 (file)

index 0000000..798f83d
--- /dev/null
+++ b/mpn/generic/mul_fft.c
@@ -0,0 +1,992 @@
+/* Schoenhage's fast multiplication modulo 2^N+1.
+
+   Contributed by Paul Zimmermann.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* References:
+
+   Schnelle Multiplikation grosser Zahlen, by Arnold Schoenhage and Volker
+   Strassen, Computing 7, p. 281-292, 1971.
+
+   Asymptotically fast algorithms for the numerical multiplication and division
+   of polynomials with complex coefficients, by Arnold Schoenhage, Computer
+   Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982.
+
+   Tapes versus Pointers, a study in implementing fast algorithms, by Arnold
+   Schoenhage, Bulletin of the EATCS, 30, p. 23-32, 1986.
+
+   TODO:
+
+   Implement some of the tricks published at ISSAC'2007 by Gaudry, Kruppa, and
+   Zimmermann.
+
+   It might be possible to avoid a small number of MPN_COPYs by using a
+   rotating temporary or two.
+
+   Cleanup and simplify the code!
+*/
+
+#ifdef TRACE
+#undef TRACE
+#define TRACE(x) x
+#include <stdio.h>
+#else
+#define TRACE(x)
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef WANT_ADDSUB
+#include "generic/add_n_sub_n.c"
+#define HAVE_NATIVE_mpn_add_n_sub_n 1
+#endif
+
+static mp_limb_t mpn_mul_fft_internal
+__GMP_PROTO ((mp_ptr, mp_size_t, int, mp_ptr *, mp_ptr *,
+             mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr, int));
+static void mpn_mul_fft_decompose
+__GMP_PROTO ((mp_ptr, mp_ptr *, int, int, mp_srcptr, mp_size_t, int, int, mp_ptr));
+
+
+/* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
+   We have sqr=0 if for a multiply, sqr=1 for a square.
+   There are three generations of this code; we keep the old ones as long as
+   some gmp-mparam.h is not updated.  */
+
+
+/*****************************************************************************/
+
+#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
+
+#ifndef FFT_TABLE3_SIZE                /* When tuning, this is define in gmp-impl.h */
+#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
+#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
+#else
+#define FFT_TABLE3_SIZE SQR_FFT_TABLE3_SIZE
+#endif
+#endif
+#endif
+
+#ifndef FFT_TABLE3_SIZE
+#define FFT_TABLE3_SIZE 200
+#endif
+
+FFT_TABLE_ATTRS struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE] =
+{
+  MUL_FFT_TABLE3,
+  SQR_FFT_TABLE3
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+  FFT_TABLE_ATTRS struct fft_table_nk *fft_tab, *tab;
+  mp_size_t tab_n, thres;
+  int last_k;
+
+  fft_tab = mpn_fft_table3[sqr];
+  last_k = fft_tab->k;
+  for (tab = fft_tab + 1; ; tab++)
+    {
+      tab_n = tab->n;
+      thres = tab_n << last_k;
+      if (n <= thres)
+       break;
+      last_k = tab->k;
+    }
+  return last_k;
+}
+
+#define MPN_FFT_BEST_READY 1
+#endif
+
+/*****************************************************************************/
+
+#if ! defined (MPN_FFT_BEST_READY)
+FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] =
+{
+  MUL_FFT_TABLE,
+  SQR_FFT_TABLE
+};
+
+int
+mpn_fft_best_k (mp_size_t n, int sqr)
+{
+  int i;
+
+  for (i = 0; mpn_fft_table[sqr][i] != 0; i++)
+    if (n < mpn_fft_table[sqr][i])
+      return i + FFT_FIRST_K;
+
+  /* treat 4*last as one further entry */
+  if (i == 0 || n < 4 * mpn_fft_table[sqr][i - 1])
+    return i + FFT_FIRST_K;
+  else
+    return i + FFT_FIRST_K + 1;
+}
+#endif
+
+/*****************************************************************************/
+
+
+/* Returns smallest possible number of limbs >= pl for a fft of size 2^k,
+   i.e. smallest multiple of 2^k >= pl.
+
+   Don't declare static: needed by tuneup.
+*/
+
+mp_size_t
+mpn_fft_next_size (mp_size_t pl, int k)
+{
+  pl = 1 + ((pl - 1) >> k); /* ceil (pl/2^k) */
+  return pl << k;
+}
+
+
+/* Initialize l[i][j] with bitrev(j) */
+static void
+mpn_fft_initl (int **l, int k)
+{
+  int i, j, K;
+  int *li;
+
+  l[0][0] = 0;
+  for (i = 1, K = 1; i <= k; i++, K *= 2)
+    {
+      li = l[i];
+      for (j = 0; j < K; j++)
+       {
+         li[j] = 2 * l[i - 1][j];
+         li[K + j] = 1 + li[j];
+       }
+    }
+}
+
+
+/* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
+   Assumes a is semi-normalized, i.e. a[n] <= 1.
+   r and a must have n+1 limbs, and not overlap.
+*/
+static void
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n)
+{
+  int sh;
+  mp_limb_t cc, rd;
+
+  sh = d % GMP_NUMB_BITS;
+  d /= GMP_NUMB_BITS;
+
+  if (d >= n)                  /* negate */
+    {
+      /* r[0..d-1]  <-- lshift(a[n-d]..a[n-1], sh)
+        r[d..n-1]  <-- -lshift(a[0]..a[n-d-1],  sh) */
+
+      d -= n;
+      if (sh != 0)
+       {
+         /* no out shift below since a[n] <= 1 */
+         mpn_lshift (r, a + n - d, d + 1, sh);
+         rd = r[d];
+         cc = mpn_lshiftc (r + d, a, n - d, sh);
+       }
+      else
+       {
+         MPN_COPY (r, a + n - d, d);
+         rd = a[n];
+         mpn_com (r + d, a, n - d);
+         cc = 0;
+       }
+
+      /* add cc to r[0], and add rd to r[d] */
+
+      /* now add 1 in r[d], subtract 1 in r[n], i.e. add 1 in r[0] */
+
+      r[n] = 0;
+      /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
+      cc++;
+      mpn_incr_u (r, cc);
+
+      rd++;
+      /* rd might overflow when sh=GMP_NUMB_BITS-1 */
+      cc = (rd == 0) ? 1 : rd;
+      r = r + d + (rd == 0);
+      mpn_incr_u (r, cc);
+    }
+  else
+    {
+      /* r[0..d-1]  <-- -lshift(a[n-d]..a[n-1], sh)
+        r[d..n-1]  <-- lshift(a[0]..a[n-d-1],  sh)  */
+      if (sh != 0)
+       {
+         /* no out bits below since a[n] <= 1 */
+         mpn_lshiftc (r, a + n - d, d + 1, sh);
+         rd = ~r[d];
+         /* {r, d+1} = {a+n-d, d+1} << sh */
+         cc = mpn_lshift (r + d, a, n - d, sh); /* {r+d, n-d} = {a, n-d}<<sh */
+       }
+      else
+       {
+         /* r[d] is not used below, but we save a test for d=0 */
+         mpn_com (r, a + n - d, d + 1);
+         rd = a[n];
+         MPN_COPY (r + d, a, n - d);
+         cc = 0;
+       }
+
+      /* now complement {r, d}, subtract cc from r[0], subtract rd from r[d] */
+
+      /* if d=0 we just have r[0]=a[n] << sh */
+      if (d != 0)
+       {
+         /* now add 1 in r[0], subtract 1 in r[d] */
+         if (cc-- == 0) /* then add 1 to r[0] */
+           cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
+         cc = mpn_sub_1 (r, r, d, cc) + 1;
+         /* add 1 to cc instead of rd since rd might overflow */
+       }
+
+      /* now subtract cc and rd from r[d..n] */
+
+      r[n] = -mpn_sub_1 (r + d, r + d, n - d, cc);
+      r[n] -= mpn_sub_1 (r + d, r + d, n - d, rd);
+      if (r[n] & GMP_LIMB_HIGHBIT)
+       r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
+    }
+}
+
+
+/* r <- a+b mod 2^(n*GMP_NUMB_BITS)+1.
+   Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
+{
+  mp_limb_t c, x;
+
+  c = a[n] + b[n] + mpn_add_n (r, a, b, n);
+  /* 0 <= c <= 3 */
+
+#if 1
+  /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch.  The
+     result is slower code, of course.  But the following outsmarts GCC.  */
+  x = (c - 1) & -(c != 0);
+  r[n] = c - x;
+  MPN_DECR_U (r, n + 1, x);
+#endif
+#if 0
+  if (c > 1)
+    {
+      r[n] = 1;                       /* r[n] - c = 1 */
+      MPN_DECR_U (r, n + 1, c - 1);
+    }
+  else
+    {
+      r[n] = c;
+    }
+#endif
+}
+
+/* r <- a-b mod 2^(n*GMP_NUMB_BITS)+1.
+   Assumes a and b are semi-normalized.
+*/
+static inline void
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
+{
+  mp_limb_t c, x;
+
+  c = a[n] - b[n] - mpn_sub_n (r, a, b, n);
+  /* -2 <= c <= 1 */
+
+#if 1
+  /* GCC 4.1 outsmarts most expressions here, and generates a 50% branch.  The
+     result is slower code, of course.  But the following outsmarts GCC.  */
+  x = (-c) & -((c & GMP_LIMB_HIGHBIT) != 0);
+  r[n] = x + c;
+  MPN_INCR_U (r, n + 1, x);
+#endif
+#if 0
+  if ((c & GMP_LIMB_HIGHBIT) != 0)
+    {
+      r[n] = 0;
+      MPN_INCR_U (r, n + 1, -c);
+    }
+  else
+    {
+      r[n] = c;
+    }
+#endif
+}
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+         N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+   output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
+            mp_size_t omega, mp_size_t n, mp_size_t inc, mp_ptr tp)
+{
+  if (K == 2)
+    {
+      mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      cy = mpn_add_n_sub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
+#else
+      MPN_COPY (tp, Ap[0], n + 1);
+      mpn_add_n (Ap[0], Ap[0], Ap[inc], n + 1);
+      cy = mpn_sub_n (Ap[inc], tp, Ap[inc], n + 1);
+#endif
+      if (Ap[0][n] > 1) /* can be 2 or 3 */
+       Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1);
+      if (cy) /* Ap[inc][n] can be -1 or -2 */
+       Ap[inc][n] = mpn_add_1 (Ap[inc], Ap[inc], n, ~Ap[inc][n] + 1);
+    }
+  else
+    {
+      int j;
+      int *lk = *ll;
+
+      mpn_fft_fft (Ap,     K >> 1, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap+inc, K >> 1, ll-1, 2 * omega, n, inc * 2, tp);
+      /* A[2*j*inc]   <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+        A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+      for (j = 0; j < (K >> 1); j++, lk += 2, Ap += 2 * inc)
+       {
+         /* Ap[inc] <- Ap[0] + Ap[inc] * 2^(lk[1] * omega)
+            Ap[0]   <- Ap[0] + Ap[inc] * 2^(lk[0] * omega) */
+         mpn_fft_mul_2exp_modF (tp, Ap[inc], lk[0] * omega, n);
+         mpn_fft_sub_modF (Ap[inc], Ap[0], tp, n);
+         mpn_fft_add_modF (Ap[0],   Ap[0], tp, n);
+       }
+    }
+}
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+         N=n*GMP_NUMB_BITS, and 2^omega is a primitive root mod 2^N+1
+   output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1
+   tp must have space for 2*(n+1) limbs.
+*/
+
+
+/* Given ap[0..n] with ap[n]<=1, reduce it modulo 2^(n*GMP_NUMB_BITS)+1,
+   by subtracting that modulus if necessary.
+
+   If ap[0..n] is exactly 2^(n*GMP_NUMB_BITS) then mpn_sub_1 produces a
+   borrow and the limbs must be zeroed out again.  This will occur very
+   infrequently.  */
+
+static inline void
+mpn_fft_normalize (mp_ptr ap, mp_size_t n)
+{
+  if (ap[n] != 0)
+    {
+      MPN_DECR_U (ap, n + 1, CNST_LIMB(1));
+      if (ap[n] == 0)
+       {
+         /* This happens with very low probability; we have yet to trigger it,
+            and thereby make sure this code is correct.  */
+         MPN_ZERO (ap, n);
+         ap[n] = 1;
+       }
+      else
+       ap[n] = 0;
+    }
+}
+
+/* a[i] <- a[i]*b[i] mod 2^(n*GMP_NUMB_BITS)+1 for 0 <= i < K */
+static void
+mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, int K)
+{
+  int i;
+  int sqr = (ap == bp);
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+      int **fft_l;
+      mp_ptr *Ap, *Bp, A, B, T;
+
+      k = mpn_fft_best_k (n, sqr);
+      K2 = 1 << k;
+      ASSERT_ALWAYS((n & (K2 - 1)) == 0);
+      maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
+      M2 = n * GMP_NUMB_BITS >> k;
+      l = n >> k;
+      Nprime2 = ((2 * M2 + k + 2 + maxLK) / maxLK) * maxLK;
+      /* Nprime2 = ceil((2*M2+k+3)/maxLK)*maxLK*/
+      nprime2 = Nprime2 / GMP_NUMB_BITS;
+
+      /* we should ensure that nprime2 is a multiple of the next K */
+      if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+       {
+         unsigned long K3;
+         for (;;)
+           {
+             K3 = 1L << mpn_fft_best_k (nprime2, sqr);
+             if ((nprime2 & (K3 - 1)) == 0)
+               break;
+             nprime2 = (nprime2 + K3 - 1) & -K3;
+             Nprime2 = nprime2 * GMP_LIMB_BITS;
+             /* warning: since nprime2 changed, K3 may change too! */
+           }
+       }
+      ASSERT_ALWAYS(nprime2 < n); /* otherwise we'll loop */
+
+      Mp2 = Nprime2 >> k;
+
+      Ap = TMP_ALLOC_MP_PTRS (K2);
+      Bp = TMP_ALLOC_MP_PTRS (K2);
+      A = TMP_ALLOC_LIMBS (2 * (nprime2 + 1) << k);
+      T = TMP_ALLOC_LIMBS (2 * (nprime2 + 1));
+      B = A + ((nprime2 + 1) << k);
+      fft_l = TMP_ALLOC_TYPE (k + 1, int *);
+      for (i = 0; i <= k; i++)
+       fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+      mpn_fft_initl (fft_l, k);
+
+      TRACE (printf ("recurse: %ldx%ld limbs -> %d times %dx%d (%1.2f)\n", n,
+                   n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
+      for (i = 0; i < K; i++, ap++, bp++)
+       {
+         mp_limb_t cy;
+         mpn_fft_normalize (*ap, n);
+         if (!sqr)
+           mpn_fft_normalize (*bp, n);
+
+         mpn_mul_fft_decompose (A, Ap, K2, nprime2, *ap, (l << k) + 1, l, Mp2, T);
+         if (!sqr)
+           mpn_mul_fft_decompose (B, Bp, K2, nprime2, *bp, (l << k) + 1, l, Mp2, T);
+
+         cy = mpn_mul_fft_internal (*ap, n, k, Ap, Bp, A, B, nprime2,
+                                    l, Mp2, fft_l, T, sqr);
+         (*ap)[n] = cy;
+       }
+    }
+  else
+    {
+      mp_ptr a, b, tp, tpn;
+      mp_limb_t cc;
+      int n2 = 2 * n;
+      tp = TMP_ALLOC_LIMBS (n2);
+      tpn = tp + n;
+      TRACE (printf ("  mpn_mul_n %d of %ld limbs\n", K, n));
+      for (i = 0; i < K; i++)
+       {
+         a = *ap++;
+         b = *bp++;
+         if (sqr)
+           mpn_sqr (tp, a, n);
+         else
+           mpn_mul_n (tp, b, a, n);
+         if (a[n] != 0)
+           cc = mpn_add_n (tpn, tpn, b, n);
+         else
+           cc = 0;
+         if (b[n] != 0)
+           cc += mpn_add_n (tpn, tpn, a, n) + a[n];
+         if (cc != 0)
+           {
+             /* FIXME: use MPN_INCR_U here, since carry is not expected.  */
+             cc = mpn_add_1 (tp, tp, n2, cc);
+             ASSERT (cc == 0);
+           }
+         a[n] = mpn_sub_n (a, tp, tpn, n) && mpn_add_1 (a, a, n, CNST_LIMB(1));
+       }
+    }
+  TMP_FREE;
+}
+
+
+/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]]
+   output: K*A[0] K*A[K-1] ... K*A[1].
+   Assumes the Ap[] are pseudo-normalized, i.e. 0 <= Ap[][n] <= 1.
+   This condition is also fulfilled at exit.
+*/
+static void
+mpn_fft_fftinv (mp_ptr *Ap, int K, mp_size_t omega, mp_size_t n, mp_ptr tp)
+{
+  if (K == 2)
+    {
+      mp_limb_t cy;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      cy = mpn_add_n_sub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
+#else
+      MPN_COPY (tp, Ap[0], n + 1);
+      mpn_add_n (Ap[0], Ap[0], Ap[1], n + 1);
+      cy = mpn_sub_n (Ap[1], tp, Ap[1], n + 1);
+#endif
+      if (Ap[0][n] > 1) /* can be 2 or 3 */
+       Ap[0][n] = 1 - mpn_sub_1 (Ap[0], Ap[0], n, Ap[0][n] - 1);
+      if (cy) /* Ap[1][n] can be -1 or -2 */
+       Ap[1][n] = mpn_add_1 (Ap[1], Ap[1], n, ~Ap[1][n] + 1);
+    }
+  else
+    {
+      int j, K2 = K >> 1;
+
+      mpn_fft_fftinv (Ap,      K2, 2 * omega, n, tp);
+      mpn_fft_fftinv (Ap + K2, K2, 2 * omega, n, tp);
+      /* A[j]     <- A[j] + omega^j A[j+K/2]
+        A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */
+      for (j = 0; j < K2; j++, Ap++)
+       {
+         /* Ap[K2] <- Ap[0] + Ap[K2] * 2^((j + K2) * omega)
+            Ap[0]  <- Ap[0] + Ap[K2] * 2^(j * omega) */
+         mpn_fft_mul_2exp_modF (tp, Ap[K2], j * omega, n);
+         mpn_fft_sub_modF (Ap[K2], Ap[0], tp, n);
+         mpn_fft_add_modF (Ap[0],  Ap[0], tp, n);
+       }
+    }
+}
+
+
+/* R <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
+static void
+mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n)
+{
+  int i;
+
+  ASSERT (r != a);
+  i = 2 * n * GMP_NUMB_BITS - k;
+  mpn_fft_mul_2exp_modF (r, a, i, n);
+  /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */
+  /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */
+  mpn_fft_normalize (r, n);
+}
+
+
+/* {rp,n} <- {ap,an} mod 2^(n*GMP_NUMB_BITS)+1, n <= an <= 3*n.
+   Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
+   then {rp,n}=0.
+*/
+static int
+mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
+{
+  mp_size_t l;
+  long int m;
+  mp_limb_t cc;
+  int rpn;
+
+  ASSERT ((n <= an) && (an <= 3 * n));
+  m = an - 2 * n;
+  if (m > 0)
+    {
+      l = n;
+      /* add {ap, m} and {ap+2n, m} in {rp, m} */
+      cc = mpn_add_n (rp, ap, ap + 2 * n, m);
+      /* copy {ap+m, n-m} to {rp+m, n-m} */
+      rpn = mpn_add_1 (rp + m, ap + m, n - m, cc);
+    }
+  else
+    {
+      l = an - n; /* l <= n */
+      MPN_COPY (rp, ap, n);
+      rpn = 0;
+    }
+
+  /* remains to subtract {ap+n, l} from {rp, n+1} */
+  cc = mpn_sub_n (rp, rp, ap + n, l);
+  rpn -= mpn_sub_1 (rp + l, rp + l, n - l, cc);
+  if (rpn < 0) /* necessarily rpn = -1 */
+    rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
+  return rpn;
+}
+
+/* store in A[0..nprime] the first M bits from {n, nl},
+   in A[nprime+1..] the following M bits, ...
+   Assumes M is a multiple of GMP_NUMB_BITS (M = l * GMP_NUMB_BITS).
+   T must have space for at least (nprime + 1) limbs.
+   We must have nl <= 2*K*l.
+*/
+static void
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n,
+                      mp_size_t nl, int l, int Mp, mp_ptr T)
+{
+  int i, j;
+  mp_ptr tmp;
+  mp_size_t Kl = K * l;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (nl > Kl) /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+1 */
+    {
+      mp_size_t dif = nl - Kl;
+      mp_limb_signed_t cy;
+
+      tmp = TMP_ALLOC_LIMBS(Kl + 1);
+
+      if (dif > Kl)
+       {
+         int subp = 0;
+
+         cy = mpn_sub_n (tmp, n, n + Kl, Kl);
+         n += 2 * Kl;
+         dif -= Kl;
+
+         /* now dif > 0 */
+         while (dif > Kl)
+           {
+             if (subp)
+               cy += mpn_sub_n (tmp, tmp, n, Kl);
+             else
+               cy -= mpn_add_n (tmp, tmp, n, Kl);
+             subp ^= 1;
+             n += Kl;
+             dif -= Kl;
+           }
+         /* now dif <= Kl */
+         if (subp)
+           cy += mpn_sub (tmp, tmp, Kl, n, dif);
+         else
+           cy -= mpn_add (tmp, tmp, Kl, n, dif);
+         if (cy >= 0)
+           cy = mpn_add_1 (tmp, tmp, Kl, cy);
+         else
+           cy = mpn_sub_1 (tmp, tmp, Kl, -cy);
+       }
+      else /* dif <= Kl, i.e. nl <= 2 * Kl */
+       {
+         cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
+         cy = mpn_add_1 (tmp, tmp, Kl, cy);
+       }
+      tmp[Kl] = cy;
+      nl = Kl + 1;
+      n = tmp;
+    }
+  for (i = 0; i < K; i++)
+    {
+      Ap[i] = A;
+      /* store the next M bits of n into A[0..nprime] */
+      if (nl > 0) /* nl is the number of remaining limbs */
+       {
+         j = (l <= nl && i < K - 1) ? l : nl; /* store j next limbs */
+         nl -= j;
+         MPN_COPY (T, n, j);
+         MPN_ZERO (T + j, nprime + 1 - j);
+         n += l;
+         mpn_fft_mul_2exp_modF (A, T, i * Mp, nprime);
+       }
+      else
+       MPN_ZERO (A, nprime + 1);
+      A += nprime + 1;
+    }
+  ASSERT_ALWAYS (nl == 0);
+  TMP_FREE;
+}
+
+/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*GMP_NUMB_BITS
+   op is pl limbs, its high bit is returned.
+   One must have pl = mpn_fft_next_size (pl, k).
+   T must have space for 2 * (nprime + 1) limbs.
+*/
+
+static mp_limb_t
+mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
+                     mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+                     mp_size_t nprime, mp_size_t l, mp_size_t Mp,
+                     int **fft_l, mp_ptr T, int sqr)
+{
+  int K, i, pla, lo, sh, j;
+  mp_ptr p;
+  mp_limb_t cc;
+
+  K = 1 << k;
+
+  /* direct fft's */
+  mpn_fft_fft (Ap, K, fft_l + k, 2 * Mp, nprime, 1, T);
+  if (!sqr)
+    mpn_fft_fft (Bp, K, fft_l + k, 2 * Mp, nprime, 1, T);
+
+  /* term to term multiplications */
+  mpn_fft_mul_modF_K (Ap, sqr ? Ap : Bp, nprime, K);
+
+  /* inverse fft's */
+  mpn_fft_fftinv (Ap, K, 2 * Mp, nprime, T);
+
+  /* division of terms after inverse fft */
+  Bp[0] = T + nprime + 1;
+  mpn_fft_div_2exp_modF (Bp[0], Ap[0], k, nprime);
+  for (i = 1; i < K; i++)
+    {
+      Bp[i] = Ap[i - 1];
+      mpn_fft_div_2exp_modF (Bp[i], Ap[i], k + (K - i) * Mp, nprime);
+    }
+
+  /* addition of terms in result p */
+  MPN_ZERO (T, nprime + 1);
+  pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+  p = B; /* B has K*(n' + 1) limbs, which is >= pla, i.e. enough */
+  MPN_ZERO (p, pla);
+  cc = 0; /* will accumulate the (signed) carry at p[pla] */
+  for (i = K - 1, lo = l * i + nprime,sh = l * i; i >= 0; i--,lo -= l,sh -= l)
+    {
+      mp_ptr n = p + sh;
+
+      j = (K - i) & (K - 1);
+
+      if (mpn_add_n (n, n, Bp[j], nprime + 1))
+       cc += mpn_add_1 (n + nprime + 1, n + nprime + 1,
+                         pla - sh - nprime - 1, CNST_LIMB(1));
+      T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
+      if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
+       { /* subtract 2^N'+1 */
+         cc -= mpn_sub_1 (n, n, pla - sh, CNST_LIMB(1));
+         cc -= mpn_sub_1 (p + lo, p + lo, pla - lo, CNST_LIMB(1));
+       }
+    }
+  if (cc == -CNST_LIMB(1))
+    {
+      if ((cc = mpn_add_1 (p + pla - pl, p + pla - pl, pl, CNST_LIMB(1))))
+       {
+         /* p[pla-pl]...p[pla-1] are all zero */
+         mpn_sub_1 (p + pla - pl - 1, p + pla - pl - 1, pl + 1, CNST_LIMB(1));
+         mpn_sub_1 (p + pla - 1, p + pla - 1, 1, CNST_LIMB(1));
+       }
+    }
+  else if (cc == 1)
+    {
+      if (pla >= 2 * pl)
+       {
+         while ((cc = mpn_add_1 (p + pla - 2 * pl, p + pla - 2 * pl, 2 * pl, cc)))
+           ;
+       }
+      else
+       {
+         cc = mpn_sub_1 (p + pla - pl, p + pla - pl, pl, cc);
+         ASSERT (cc == 0);
+       }
+    }
+  else
+    ASSERT (cc == 0);
+
+  /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
+     < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
+     < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
+  return mpn_fft_norm_modF (op, pl, p, pla);
+}
+
+/* return the lcm of a and 2^k */
+static unsigned long int
+mpn_mul_fft_lcm (unsigned long int a, unsigned int k)
+{
+  unsigned long int l = k;
+
+  while (a % 2 == 0 && k > 0)
+    {
+      a >>= 1;
+      k --;
+    }
+  return a << l;
+}
+
+
+mp_limb_t
+mpn_mul_fft (mp_ptr op, mp_size_t pl,
+            mp_srcptr n, mp_size_t nl,
+            mp_srcptr m, mp_size_t ml,
+            int k)
+{
+  int K, maxLK, i;
+  mp_size_t N, Nprime, nprime, M, Mp, l;
+  mp_ptr *Ap, *Bp, A, T, B;
+  int **fft_l;
+  int sqr = (n == m && nl == ml);
+  mp_limb_t h;
+  TMP_DECL;
+
+  TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n", pl, nl, ml, k));
+  ASSERT_ALWAYS (mpn_fft_next_size (pl, k) == pl);
+
+  TMP_MARK;
+  N = pl * GMP_NUMB_BITS;
+  fft_l = TMP_ALLOC_TYPE (k + 1, int *);
+  for (i = 0; i <= k; i++)
+    fft_l[i] = TMP_ALLOC_TYPE (1 << i, int);
+  mpn_fft_initl (fft_l, k);
+  K = 1 << k;
+  M = N >> k;  /* N = 2^k M */
+  l = 1 + (M - 1) / GMP_NUMB_BITS;
+  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+
+  Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
+  /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
+  nprime = Nprime / GMP_NUMB_BITS;
+  TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%d, Np=%ld, np=%ld\n",
+                N, K, M, l, maxLK, Nprime, nprime));
+  /* we should ensure that recursively, nprime is a multiple of the next K */
+  if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      unsigned long K2;
+      for (;;)
+       {
+         K2 = 1L << mpn_fft_best_k (nprime, sqr);
+         if ((nprime & (K2 - 1)) == 0)
+           break;
+         nprime = (nprime + K2 - 1) & -K2;
+         Nprime = nprime * GMP_LIMB_BITS;
+         /* warning: since nprime changed, K2 may change too! */
+       }
+      TRACE (printf ("new maxLK=%d, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+    }
+  ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
+
+  T = TMP_ALLOC_LIMBS (2 * (nprime + 1));
+  Mp = Nprime >> k;
+
+  TRACE (printf ("%ldx%ld limbs -> %d times %ldx%ld limbs (%1.2f)\n",
+               pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K);
+        printf ("   temp space %ld\n", 2 * K * (nprime + 1)));
+
+  A = TMP_ALLOC_LIMBS (K * (nprime + 1));
+  Ap = TMP_ALLOC_MP_PTRS (K);
+  mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
+  if (sqr)
+    {
+      mp_size_t pla;
+      pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
+      B = TMP_ALLOC_LIMBS (pla);
+      Bp = TMP_ALLOC_MP_PTRS (K);
+    }
+  else
+    {
+      B = TMP_ALLOC_LIMBS (K * (nprime + 1));
+      Bp = TMP_ALLOC_MP_PTRS (K);
+      mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
+    }
+  h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
+
+  TMP_FREE;
+  return h;
+}
+
+#if WANT_OLD_FFT_FULL
+/* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */
+void
+mpn_mul_fft_full (mp_ptr op,
+                 mp_srcptr n, mp_size_t nl,
+                 mp_srcptr m, mp_size_t ml)
+{
+  mp_ptr pad_op;
+  mp_size_t pl, pl2, pl3, l;
+  int k2, k3;
+  int sqr = (n == m && nl == ml);
+  int cc, c2, oldcc;
+
+  pl = nl + ml; /* total number of limbs of the result */
+
+  /* perform a fft mod 2^(2N)+1 and one mod 2^(3N)+1.
+     We must have pl3 = 3/2 * pl2, with pl2 a multiple of 2^k2, and
+     pl3 a multiple of 2^k3. Since k3 >= k2, both are multiples of 2^k2,
+     and pl2 must be an even multiple of 2^k2. Thus (pl2,pl3) =
+     (2*j*2^k2,3*j*2^k2), which works for 3*j <= pl/2^k2 <= 5*j.
+     We need that consecutive intervals overlap, i.e. 5*j >= 3*(j+1),
+     which requires j>=2. Thus this scheme requires pl >= 6 * 2^FFT_FIRST_K. */
+
+  /*  ASSERT_ALWAYS(pl >= 6 * (1 << FFT_FIRST_K)); */
+
+  pl2 = (2 * pl - 1) / 5; /* ceil (2pl/5) - 1 */
+  do
+    {
+      pl2++;
+      k2 = mpn_fft_best_k (pl2, sqr); /* best fft size for pl2 limbs */
+      pl2 = mpn_fft_next_size (pl2, k2);
+      pl3 = 3 * pl2 / 2; /* since k>=FFT_FIRST_K=4, pl2 is a multiple of 2^4,
+                           thus pl2 / 2 is exact */
+      k3 = mpn_fft_best_k (pl3, sqr);
+    }
+  while (mpn_fft_next_size (pl3, k3) != pl3);
+
+  TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl2=%ld pl3=%ld k=%d\n",
+                nl, ml, pl2, pl3, k2));
+
+  ASSERT_ALWAYS(pl3 <= pl);
+  cc = mpn_mul_fft (op, pl3, n, nl, m, ml, k3);     /* mu */
+  ASSERT(cc == 0);
+  pad_op = __GMP_ALLOCATE_FUNC_LIMBS (pl2);
+  cc = mpn_mul_fft (pad_op, pl2, n, nl, m, ml, k2); /* lambda */
+  cc = -cc + mpn_sub_n (pad_op, pad_op, op, pl2);    /* lambda - low(mu) */
+  /* 0 <= cc <= 1 */
+  ASSERT(0 <= cc && cc <= 1);
+  l = pl3 - pl2; /* l = pl2 / 2 since pl3 = 3/2 * pl2 */
+  c2 = mpn_add_n (pad_op, pad_op, op + pl2, l);
+  cc = mpn_add_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2) - cc;
+  ASSERT(-1 <= cc && cc <= 1);
+  if (cc < 0)
+    cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+  ASSERT(0 <= cc && cc <= 1);
+  /* now lambda-mu = {pad_op, pl2} - cc mod 2^(pl2*GMP_NUMB_BITS)+1 */
+  oldcc = cc;
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  c2 = mpn_add_n_sub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
+  /* c2 & 1 is the borrow, c2 & 2 is the carry */
+  cc += c2 >> 1; /* carry out from high <- low + high */
+  c2 = c2 & 1; /* borrow out from low <- low - high */
+#else
+  {
+    mp_ptr tmp;
+    TMP_DECL;
+
+    TMP_MARK;
+    tmp = TMP_ALLOC_LIMBS (l);
+    MPN_COPY (tmp, pad_op, l);
+    c2 = mpn_sub_n (pad_op,      pad_op, pad_op + l, l);
+    cc += mpn_add_n (pad_op + l, tmp,    pad_op + l, l);
+    TMP_FREE;
+  }
+#endif
+  c2 += oldcc;
+  /* first normalize {pad_op, pl2} before dividing by 2: c2 is the borrow
+     at pad_op + l, cc is the carry at pad_op + pl2 */
+  /* 0 <= cc <= 2 */
+  cc -= mpn_sub_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2);
+  /* -1 <= cc <= 2 */
+  if (cc > 0)
+    cc = -mpn_sub_1 (pad_op, pad_op, pl2, (mp_limb_t) cc);
+  /* now -1 <= cc <= 0 */
+  if (cc < 0)
+    cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
+  /* now {pad_op, pl2} is normalized, with 0 <= cc <= 1 */
+  if (pad_op[0] & 1) /* if odd, add 2^(pl2*GMP_NUMB_BITS)+1 */
+    cc += 1 + mpn_add_1 (pad_op, pad_op, pl2, CNST_LIMB(1));
+  /* now 0 <= cc <= 2, but cc=2 cannot occur since it would give a carry
+     out below */
+  mpn_rshift (pad_op, pad_op, pl2, 1); /* divide by two */
+  if (cc) /* then cc=1 */
+    pad_op [pl2 - 1] |= (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
+  /* now {pad_op,pl2}-cc = (lambda-mu)/(1-2^(l*GMP_NUMB_BITS))
+     mod 2^(pl2*GMP_NUMB_BITS) + 1 */
+  c2 = mpn_add_n (op, op, pad_op, pl2); /* no need to add cc (is 0) */
+  /* since pl2+pl3 >= pl, necessary the extra limbs (including cc) are zero */
+  MPN_COPY (op + pl3, pad_op, pl - pl3);
+  ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
+  __GMP_FREE_FUNC_LIMBS (pad_op, pl2);
+  /* since the final result has at most pl limbs, no carry out below */
+  mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
+}
+#endif
diff --git a/mpn/generic/mul_n.c b/mpn/generic/mul_n.c

new file mode 100644 (file)

index 0000000..80cfb08
--- /dev/null
+++ b/mpn/generic/mul_n.c
@@ -0,0 +1,86 @@
+/* mpn_mul_n -- multiply natural numbers.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));
+
+  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+    {
+      mpn_mul_basecase (p, a, n, b, n);
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[mpn_toom22_mul_itch (MUL_TOOM33_THRESHOLD_LIMIT-1,
+                                       MUL_TOOM33_THRESHOLD_LIMIT-1)];
+      ASSERT (MUL_TOOM33_THRESHOLD <= MUL_TOOM33_THRESHOLD_LIMIT);
+      mpn_toom22_mul (p, a, n, b, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
+      mpn_toom33_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
+      mpn_toom44_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_mul_n_itch (n));
+      mpn_toom6h_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_mul_n_itch (n));
+      mpn_toom8h_mul (p, a, n, b, n, ws);
+      TMP_FREE;
+    }
+  else
+    {
+      /* The current FFT code allocates its own space.  That should probably
+        change.  */
+      mpn_fft_mul (p, a, n, b, n);
+    }
+}
diff --git a/mpn/generic/mullo_basecase.c b/mpn/generic/mullo_basecase.c

new file mode 100644 (file)

index 0000000..ceea829
--- /dev/null
+++ b/mpn/generic/mullo_basecase.c
@@ -0,0 +1,41 @@
+/* mpn_mullo_basecase -- Internal routine to multiply two natural
+   numbers of length m and n and return the low part.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  FIXME: Should use mpn_addmul_2 (and higher).
+*/
+
+void
+mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_size_t i;
+
+  mpn_mul_1 (rp, up, n, vp[0]);
+
+  for (i = 1; i < n; i++)
+    mpn_addmul_1 (rp + i, up, n - i, vp[i]);
+}
diff --git a/mpn/generic/mullo_n.c b/mpn/generic/mullo_n.c

new file mode 100644 (file)

index 0000000..24c2c3c
--- /dev/null
+++ b/mpn/generic/mullo_n.c
@@ -0,0 +1,245 @@
+/* mpn_mullo_n -- multiply two n-limb numbers and return the low n limbs
+   of their products.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifndef MULLO_BASECASE_THRESHOLD
+#define MULLO_BASECASE_THRESHOLD 0     /* never use mpn_mul_basecase */
+#endif
+
+#ifndef MULLO_DC_THRESHOLD
+#define MULLO_DC_THRESHOLD 3*MUL_TOOM22_THRESHOLD
+#endif
+
+#ifndef MULLO_MUL_N_THRESHOLD
+#define MULLO_MUL_N_THRESHOLD MUL_FFT_THRESHOLD
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_range_basecase 1
+#define MAYBE_range_toom22   1
+#else
+#define MAYBE_range_basecase                                           \
+  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
+#define MAYBE_range_toom22                                             \
+  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
+#endif
+
+/*  THINK: The DC strategy uses different constants in different Toom's
+        ranges. Something smoother?
+*/
+
+/*
+  Compute the least significant half of the product {xy,n}*{yp,n}, or
+  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
+
+  Above the given threshold, the Divide and Conquer strategy is used.
+  The operands are split in two, and a full product plus two mullo
+  are used to obtain the final result. The more natural strategy is to
+  split in two halves, but this is far from optimal when a
+  sub-quadratic multiplication is used.
+
+  Mulders suggests an unbalanced split in favour of the full product,
+  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
+
+  To compute the value of a, we assume that the cost of mullo for a
+  given size ML(n) is a fraction of the cost of a full product with
+  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
+  then we can write:
+
+  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
+
+  Given a value for e, want to minimise the value of k, i.e. the
+  function k=(1-a)^e/(1-2*a^e).
+
+  With e=2, the exponent for schoolbook multiplication, the minimum is
+  given by the values a=1-a=1/2.
+
+  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
+  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
+
+  Other possible approximations follow:
+  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
+  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
+  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
+  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
+
+  The values above where obtained with the following trivial commands
+  in the gp-pari shell:
+
+fun(e,a)=(1-a)^e/(1-2*a^e)
+mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
+contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
+contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
+contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
+contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
+contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
+
+  ,
+  |\
+  | \
+  +----,
+  |    |
+  |    |
+  |    |\
+  |    | \
+  +----+--`
+  ^ n2 ^n1^
+
+  For an actual implementation, the assumption that M(n)=n^e is
+  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
+  with a constant k is wrong.
+
+  But theory suggest us two things:
+  - the best the multiplication product is (lower e), the more k
+    approaches 1, and a approaches 0.
+
+  - A value for a smaller than optimal is probably less bad than a
+    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
+    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
+    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
+*/
+
+static mp_size_t
+mpn_mullo_n_itch (mp_size_t n)
+{
+  return 2*n;
+}
+
+/*
+    mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
+    It accepts tp == rp.
+*/
+static void
+mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t n2, n1;
+  ASSERT (n >= 2);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+  /* Divide-and-conquer */
+
+  /* We need fractional approximation of the value 0 < a <= 1/2
+     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+  */
+  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
+    n1 = n >> 1;
+  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
+    n1 = n * 11 / (size_t) 36; /* n1 ~= n*(1-.694...) */
+  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
+    n1 = n * 9 / (size_t) 40;  /* n1 ~= n*(1-.775...) */
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD*10/9))
+    n1 = n * 7 / (size_t) 39;  /* n1 ~= n*(1-.821...) */
+  /* n1 = n * 4 / (size_t) 31; // n1 ~= n*(1-.871...) [TOOM66] */
+  else
+    n1 = n / (size_t) 10;              /* n1 ~= n*(1-.899...) [TOOM88] */
+
+  n2 = n - n1;
+
+  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
+             y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+  /* x0 * y0 */
+  mpn_mul_n (tp, xp, yp, n2);
+  MPN_COPY (rp, tp, n2);
+
+  /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
+  else
+    mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
+  mpn_add_n (rp + n2, tp + n2, tp + n, n1);
+
+  /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+    mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
+  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+    mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
+  else
+    mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
+  mpn_add_n (rp + n2, rp + n2, tp + n, n1);
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
+#define MUL_BASECASE_ALLOC \
+ (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_mullow_n
+   accepts a pointer tp, and handle the case tp == rp, do the same here.
+   Maybe recombine the two functions.
+   THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
+         (typically thanks to mpn_addmul_2) should we unconditionally use
+         mpn_mul_n?
+*/
+
+void
+mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+
+  if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t tp[MUL_BASECASE_ALLOC];
+      mpn_mul_basecase (tp, xp, n, yp, n);
+      MPN_COPY (rp, tp, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
+    {
+      mpn_mullo_basecase (rp, xp, yp, n);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
+      if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
+       {
+         mpn_dc_mullo_n (rp, xp, yp, n, tp);
+       }
+      else
+       {
+         /* For really large operands, use plain mpn_mul_n but throw away upper n
+            limbs of result.  */
+#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
+         mpn_fft_mul (tp, xp, n, yp, n);
+#else
+         mpn_mul_n (tp, xp, yp, n);
+#endif
+         MPN_COPY (rp, tp, n);
+       }
+      TMP_FREE;
+    }
+}
diff --git a/mpn/generic/mulmod_bnm1.c b/mpn/generic/mulmod_bnm1.c

new file mode 100644 (file)

index 0000000..df0eca8
--- /dev/null
+++ b/mpn/generic/mulmod_bnm1.c
@@ -0,0 +1,351 @@
+/* mulmod_bnm1.c -- multiplication mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+void
+mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+                   mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_mul_n (tp, ap, bp, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
+   semi-normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
+                   mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_mul_n (tp, ap, bp, rn + 1);
+  ASSERT (tp[2*rn+1] == 0);
+  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
+  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn+1, cy );
+}
+
+
+/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if one of the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
+ * combine results and obtain a natural number when one knows in
+ * advance that the final value is less than (B^rn-1).
+ * Moreover it should not be a problem if mulmod_bnm1 is used to
+ * compute the full product with an+bn <= rn, because this condition
+ * implies (B^an-1)(B^bn-1) < (B^rn-1) .
+ *
+ * Requires 0 < bn <= an <= rn and an + bn > rn/2
+ * Scratch need: rn + (need for recursive call OR rn + 4). This gives
+ *
+ * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
+ */
+void
+mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
+{
+  ASSERT (0 < bn);
+  ASSERT (bn <= an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (bn < rn))
+       {
+         if (UNLIKELY (an + bn <= rn))
+           {
+             mpn_mul (rp, ap, an, bp, bn);
+           }
+         else
+           {
+             mp_limb_t cy;
+             mpn_mul (tp, ap, an, bp, bn);
+             cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
+             MPN_INCR_U (rp, rn, cy);
+           }
+       }
+      else
+       mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      /* We need at least an + bn >= n, to be able to fit one of the
+        recursive products at rp. Requiring strict inequality makes
+        the coded slightly simpler. If desired, we could avoid this
+        restriction by initially halving rn as long as rn is even and
+        an + bn <= rn/2. */
+
+      ASSERT (an + bn > n);
+
+      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
+        and crt together as
+
+        x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+#define b0 bp
+#define b1 (bp + n)
+
+#define xp  tp /* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+      /* bm1  maybe in {xp + n, n} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+      /* bp1  maybe in {sp1 + n + 1, n + 1} */
+
+      {
+       mp_srcptr am1, bm1;
+       mp_size_t anm, bnm;
+       mp_ptr so;
+
+       if (LIKELY (an > n))
+         {
+           am1 = xp;
+           cy = mpn_add (xp, a0, n, a1, an - n);
+           MPN_INCR_U (xp, n, cy);
+           anm = n;
+           if (LIKELY (bn > n))
+             {
+               bm1 = xp + n;
+               cy = mpn_add (xp + n, b0, n, b1, bn - n);
+               MPN_INCR_U (xp + n, n, cy);
+               bnm = n;
+               so = xp + 2*n;
+             }
+           else
+             {
+               so = xp + n;
+               bm1 = b0;
+               bnm = bn;
+             }
+         }
+       else
+         {
+           so = xp;
+           am1 = a0;
+           anm = an;
+           bm1 = b0;
+           bnm = bn;
+         }
+
+       mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
+      }
+
+      {
+       int       k;
+       mp_srcptr ap1, bp1;
+       mp_size_t anp, bnp;
+
+       if (LIKELY (an > n)) {
+         ap1 = sp1;
+         cy = mpn_sub (sp1, a0, n, a1, an - n);
+         sp1[n] = 0;
+         MPN_INCR_U (sp1, n + 1, cy);
+         anp = n + ap1[n];
+       } else {
+         ap1 = a0;
+         anp = an;
+       }
+
+       if (LIKELY (bn > n)) {
+         bp1 = sp1 + n + 1;
+         cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
+         sp1[2*n+1] = 0;
+         MPN_INCR_U (sp1 + n + 1, n + 1, cy);
+         bnp = n + bp1[n];
+       } else {
+         bp1 = b0;
+         bnp = bn;
+       }
+
+       if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+         k=0;
+       else
+         {
+           int mask;
+           k = mpn_fft_best_k (n, 0);
+           mask = (1<<k) -1;
+           while (n & mask) {k--; mask >>=1;};
+         }
+       if (k >= FFT_FIRST_K)
+         xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
+       else if (UNLIKELY (bp1 == b0))
+         {
+           ASSERT (anp + bnp <= 2*n+1);
+           ASSERT (anp + bnp > n);
+           ASSERT (anp >= bnp);
+           mpn_mul (xp, ap1, anp, bp1, bnp);
+           anp = anp + bnp - n;
+           ASSERT (anp <= n || xp[2*n]==0);
+           anp-= anp > n;
+           cy = mpn_sub (xp, xp, n, xp + n, anp);
+           xp[n] = 0;
+           MPN_INCR_U (xp, n+1, cy);
+         }
+       else
+         mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
+      }
+
+      /* Here the CRT recomposition begins.
+
+        xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+        Division by 2 is a bitwise rotation.
+
+        Assumes xp normalised mod (B^n+1).
+
+        The residue class [0] is represented by [B^n-1]; except when
+        both input are ZERO.
+      */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+        overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+        the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
+
+      /* Compute the highest half:
+        ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+       */
+      if (UNLIKELY (an + bn < rn))
+       {
+         /* Note that in this case, the only way the result can equal
+            zero mod B^{rn} - 1 is if one of the inputs is zero, and
+            then the output of both the recursive calls and this CRT
+            reconstruction is zero, not B^{rn} - 1. Which is good,
+            since the latter representation doesn't fit in the output
+            area.*/
+         cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
+
+         /* FIXME: This subtraction of the high parts is not really
+            necessary, we do it to get the carry out, and for sanity
+            checking. */
+         cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
+                                  xp + an + bn - n, rn - (an + bn), cy);
+         ASSERT (an + bn == rn - 1 ||
+                 mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
+         cy = mpn_sub_1 (rp, rp, an + bn, cy);
+         ASSERT (cy == (xp + an + bn - n)[0]);
+       }
+      else
+       {
+         cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+         /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+            DECR will affect _at most_ the lowest n limbs. */
+         MPN_DECR_U (rp, 2*n, cy);
+       }
+#undef a0
+#undef a1
+#undef b0
+#undef b1
+#undef xp
+#undef sp1
+    }
+}
+
+mp_size_t
+mpn_mulmod_bnm1_next_size (mp_size_t n)
+{
+  mp_size_t nh;
+
+  if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
+    return n;
+  if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (2-1)) & (-2);
+  if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (4-1)) & (-4);
+
+  nh = (n + 1) >> 1;
+
+  if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
+    return (n + (8-1)) & (-8);
+
+  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
+}
diff --git a/mpn/generic/neg.c b/mpn/generic/neg.c

new file mode 100644 (file)

index 0000000..980e59e
--- /dev/null
+++ b/mpn/generic/neg.c
@@ -0,0 +1,23 @@
+/* mpn_neg - negate an mpn.
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_neg 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpn/generic/nussbaumer_mul.c b/mpn/generic/nussbaumer_mul.c

new file mode 100644 (file)

index 0000000..131faf8
--- /dev/null
+++ b/mpn/generic/nussbaumer_mul.c
@@ -0,0 +1,60 @@
+/* mpn_nussbaumer_mul -- Multiply {ap,an} and {bp,bn} using
+   Nussbaumer's negacyclic convolution.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply {ap,an} by {bp,bn}, and put the result in {pp, an+bn} */
+void
+mpn_nussbaumer_mul (mp_ptr pp,
+                   mp_srcptr ap, mp_size_t an,
+                   mp_srcptr bp, mp_size_t bn)
+{
+  mp_size_t rn;
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (an >= bn);
+  ASSERT (bn > 0);
+
+  TMP_MARK;
+
+  if ((ap == bp) && (an == bn))
+    {
+      rn = mpn_sqrmod_bnm1_next_size (2*an);
+      tp = TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (rn, an));
+      mpn_sqrmod_bnm1 (pp, rn, ap, an, tp);
+    }
+  else
+    {
+      rn = mpn_mulmod_bnm1_next_size (an + bn);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (rn, an, bn));
+      mpn_mulmod_bnm1 (pp, rn, ap, an, bp, bn, tp);
+    }
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/perfpow.c b/mpn/generic/perfpow.c

new file mode 100644 (file)

index 0000000..709e2bb
--- /dev/null
+++ b/mpn/generic/perfpow.c
@@ -0,0 +1,493 @@
+/* mpn_perfect_power_p -- mpn perfect power detection.
+
+   Contributed to the GNU project by Martin Boij.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define SMALL 20
+#define MEDIUM 100
+
+/*
+   Returns non-zero if {np,nn} == {xp,xn} ^ k.
+   Algorithm:
+       For s = 1, 2, 4, ..., s_max, compute the s least significant
+       limbs of {xp,xn}^k. Stop if they don't match the s least
+       significant limbs of {np,nn}.
+*/
+static int
+pow_equals (mp_srcptr np, mp_size_t nn,
+           mp_srcptr xp,mp_size_t xn,
+           mp_limb_t k, mp_bitcnt_t f,
+           mp_ptr tp)
+{
+  mp_limb_t *tp2;
+  mp_bitcnt_t y, z, count;
+  mp_size_t i, bn;
+  int ans;
+  mp_limb_t h, l;
+  TMP_DECL;
+
+  ASSERT (nn > 1 || (nn == 1 && np[0] > 1));
+  ASSERT (np[nn - 1] > 0);
+  ASSERT (xn > 0);
+
+  if (xn == 1 && xp[0] == 1)
+    return 0;
+
+  z = 1 + (nn >> 1);
+  for (bn = 1; bn < z; bn <<= 1)
+    {
+      mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
+      if (mpn_cmp (tp, np, bn) != 0)
+       return 0;
+    }
+
+  TMP_MARK;
+
+  /* Final check. Estimate the size of {xp,xn}^k before computing
+     the power with full precision.
+     Optimization: It might pay off to make a more accurate estimation of
+     the logarithm of {xp,xn}, rather than using the index of the MSB.
+  */
+
+  count_leading_zeros (count, xp[xn - 1]);
+  y = xn * GMP_LIMB_BITS - count - 1;  /* msb_index (xp, xn) */
+
+  umul_ppmm (h, l, k, y);
+  h -= l == 0;  l--;   /* two-limb decrement */
+
+  z = f - 1; /* msb_index (np, nn) */
+  if (h == 0 && l <= z)
+    {
+      mp_limb_t size;
+      size = l + k;
+      ASSERT_ALWAYS (size >= k);
+
+      y = 2 + size / GMP_LIMB_BITS;
+      tp2 = TMP_ALLOC_LIMBS (y);
+
+      i = mpn_pow_1 (tp, xp, xn, k, tp2);
+      if (i == nn && mpn_cmp (tp, np, nn) == 0)
+       ans = 1;
+      else
+       ans = 0;
+    }
+  else
+    {
+      ans = 0;
+    }
+
+  TMP_FREE;
+  return ans;
+}
+
+/*
+   Computes rp such that rp^k * yp = 1 (mod 2^b).
+   Algorithm:
+       Apply Hensel lifting repeatedly, each time
+       doubling (approx.) the number of known bits in rp.
+*/
+static void
+binv_root (mp_ptr rp, mp_srcptr yp,
+          mp_limb_t k, mp_size_t bn,
+          mp_bitcnt_t b, mp_ptr tp)
+{
+  mp_limb_t *tp2 = tp + bn, *tp3 = tp + 2 * bn, di, k2 = k + 1;
+  mp_bitcnt_t order[GMP_LIMB_BITS * 2];
+  int i, d = 0;
+
+  ASSERT (bn > 0);
+  ASSERT (b > 0);
+  ASSERT ((k & 1) != 0);
+
+  binvert_limb (di, k);
+
+  rp[0] = 1;
+  for (; b != 1; b = (b + 1) >> 1)
+    order[d++] = b;
+
+  for (i = d - 1; i >= 0; i--)
+    {
+      b = order[i];
+      bn = 1 + (b - 1) / GMP_LIMB_BITS;
+
+      mpn_mul_1 (tp, rp, bn, k2);
+
+      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
+      mpn_mullo_n (rp, yp, tp2, bn);
+
+      mpn_sub_n (tp2, tp, rp, bn);
+      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, di, 0);
+      if ((b % GMP_LIMB_BITS) != 0)
+       rp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+    }
+  return;
+}
+
+/*
+   Computes rp such that rp^2 * yp = 1 (mod 2^{b+1}).
+   Returns non-zero if such an integer rp exists.
+*/
+static int
+binv_sqroot (mp_ptr rp, mp_srcptr yp,
+            mp_size_t bn, mp_bitcnt_t b,
+            mp_ptr tp)
+{
+  mp_limb_t k = 3, *tp2 = tp + bn, *tp3 = tp + (bn << 1);
+  mp_bitcnt_t order[GMP_LIMB_BITS * 2];
+  int i, d = 0;
+
+  ASSERT (bn > 0);
+  ASSERT (b > 0);
+
+  rp[0] = 1;
+  if (b == 1)
+    {
+      if ((yp[0] & 3) != 1)
+       return 0;
+    }
+  else
+    {
+      if ((yp[0] & 7) != 1)
+       return 0;
+
+      for (; b != 2; b = (b + 2) >> 1)
+       order[d++] = b;
+
+      for (i = d - 1; i >= 0; i--)
+       {
+         b = order[i];
+         bn = 1 + b / GMP_LIMB_BITS;
+
+         mpn_mul_1 (tp, rp, bn, k);
+
+         mpn_powlo (tp2, rp, &k, 1, bn, tp3);
+         mpn_mullo_n (rp, yp, tp2, bn);
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+         mpn_rsh1sub_n (rp, tp, rp, bn);
+#else
+         mpn_sub_n (tp2, tp, rp, bn);
+         mpn_rshift (rp, tp2, bn, 1);
+#endif
+         rp[b / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+       }
+    }
+  return 1;
+}
+
+/*
+   Returns non-zero if {np,nn} is a kth power.
+*/
+static int
+is_kth_power (mp_ptr rp, mp_srcptr np,
+             mp_limb_t k, mp_srcptr yp,
+             mp_size_t nn, mp_bitcnt_t f,
+             mp_ptr tp)
+{
+  mp_limb_t x, c;
+  mp_bitcnt_t b;
+  mp_size_t i, rn, xn;
+
+  ASSERT (nn > 0);
+  ASSERT (((k & 1) != 0) || (k == 2));
+  ASSERT ((np[0] & 1) != 0);
+
+  if (k == 2)
+    {
+      b = (f + 1) >> 1;
+      rn = 1 + b / GMP_LIMB_BITS;
+      if (binv_sqroot (rp, yp, rn, b, tp) != 0)
+       {
+         xn = rn;
+         MPN_NORMALIZE (rp, xn);
+         if (pow_equals (np, nn, rp, xn, k, f, tp) != 0)
+           return 1;
+
+         /* Check if (2^b - rp)^2 == np */
+         c = 0;
+         for (i = 0; i < rn; i++)
+           {
+             x = rp[i];
+             rp[i] = -x - c;
+             c |= (x != 0);
+           }
+         rp[rn - 1] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+         MPN_NORMALIZE (rp, rn);
+         if (pow_equals (np, nn, rp, rn, k, f, tp) != 0)
+           return 1;
+       }
+    }
+  else
+    {
+      b = 1 + (f - 1) / k;
+      rn = 1 + (b - 1) / GMP_LIMB_BITS;
+      binv_root (rp, yp, k, rn, b, tp);
+      MPN_NORMALIZE (rp, rn);
+      if (pow_equals (np, nn, rp, rn, k, f, tp) != 0)
+       return 1;
+    }
+  MPN_ZERO (rp, rn); /* Untrash rp */
+  return 0;
+}
+
+static int
+perfpow (mp_srcptr np, mp_size_t nn,
+        mp_limb_t ub, mp_limb_t g,
+        mp_bitcnt_t f, int neg)
+{
+  mp_limb_t *yp, *tp, k = 0, *rp1;
+  int ans = 0;
+  mp_bitcnt_t b;
+  gmp_primesieve_t ps;
+  TMP_DECL;
+
+  ASSERT (nn > 0);
+  ASSERT ((np[0] & 1) != 0);
+  ASSERT (ub > 0);
+
+  TMP_MARK;
+  gmp_init_primesieve (&ps);
+  b = (f + 3) >> 1;
+
+  yp = TMP_ALLOC_LIMBS (nn);
+  rp1 = TMP_ALLOC_LIMBS (nn);
+  tp = TMP_ALLOC_LIMBS (5 * nn);       /* FIXME */
+  MPN_ZERO (rp1, nn);
+
+  mpn_binvert (yp, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
+  if (b % GMP_LIMB_BITS)
+    yp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+
+  if (neg)
+    gmp_nextprime (&ps);
+
+  if (g > 0)
+    {
+      ub = MIN (ub, g + 1);
+      while ((k = gmp_nextprime (&ps)) < ub)
+       {
+         if ((g % k) == 0)
+           {
+             if (is_kth_power (rp1, np, k, yp, nn, f, tp) != 0)
+               {
+                 ans = 1;
+                 goto ret;
+               }
+           }
+       }
+    }
+  else
+    {
+      while ((k = gmp_nextprime (&ps)) < ub)
+       {
+         if (is_kth_power (rp1, np, k, yp, nn, f, tp) != 0)
+           {
+             ans = 1;
+             goto ret;
+           }
+       }
+    }
+ ret:
+  TMP_FREE;
+  return ans;
+}
+
+static const unsigned short nrtrial[] = { 100, 500, 1000 };
+
+/* Table of (log_{p_i} 2) values, where p_i is
+   the (nrtrial[i] + 1)'th prime number.
+*/
+static const double logs[] = { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
+
+int
+mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
+{
+  mp_size_t ncn, s, pn, xn;
+  mp_limb_t *nc, factor, g = 0;
+  mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
+  mp_bitcnt_t twos = 0, count;
+  int ans, where = 0, neg = 0, trial;
+  TMP_DECL;
+
+  nc = (mp_ptr) np;
+
+  if (nn < 0)
+    {
+      neg = 1;
+      nn = -nn;
+    }
+
+  if (nn == 0 || (nn == 1 && np[0] == 1))
+    return 1;
+
+  TMP_MARK;
+
+  ncn = nn;
+  twos = mpn_scan1 (np, 0);
+  if (twos > 0)
+    {
+      if (twos == 1)
+       {
+         ans = 0;
+         goto ret;
+       }
+      s = twos / GMP_LIMB_BITS;
+      if (s + 1 == nn && POW2_P (np[s]))
+       {
+         ans = ! (neg && POW2_P (twos));
+         goto ret;
+       }
+      count = twos % GMP_LIMB_BITS;
+      ncn = nn - s;
+      nc = TMP_ALLOC_LIMBS (ncn);
+      if (count > 0)
+       {
+         mpn_rshift (nc, np + s, ncn, count);
+         ncn -= (nc[ncn - 1] == 0);
+       }
+      else
+       {
+         MPN_COPY (nc, np + s, ncn);
+       }
+      g = twos;
+    }
+
+  if (ncn <= SMALL)
+    trial = 0;
+  else if (ncn <= MEDIUM)
+    trial = 1;
+  else
+    trial = 2;
+
+  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
+
+  if (factor != 0)
+    {
+      if (twos == 0)
+       {
+         nc = TMP_ALLOC_LIMBS (ncn);
+         MPN_COPY (nc, np, ncn);
+       }
+
+      /* Remove factors found by trialdiv.
+        Optimization: Perhaps better to use
+        the strategy in mpz_remove ().
+      */
+      prev = TMP_ALLOC_LIMBS (ncn + 2);
+      next = TMP_ALLOC_LIMBS (ncn + 2);
+      tp = TMP_ALLOC_LIMBS (4 * ncn);
+
+      do
+       {
+         binvert_limb (d, factor);
+         prev[0] = d;
+         pn = 1;
+         exp = 1;
+         while (2 * pn - 1 <= ncn)
+           {
+             mpn_sqr (next, prev, pn);
+             xn = 2 * pn;
+             xn -= (next[xn - 1] == 0);
+
+             if (mpn_divisible_p (nc, ncn, next, xn) == 0)
+               break;
+
+             exp <<= 1;
+             pn = xn;
+             MP_PTR_SWAP (next, prev);
+           }
+
+         /* Binary search for the exponent */
+         l = exp + 1;
+         r = 2 * exp - 1;
+         while (l <= r)
+           {
+             c = (l + r) >> 1;
+             if (c - exp > 1)
+               {
+                 xn = mpn_pow_1 (tp, &d, 1, c - exp, next);
+                 if (pn + xn - 1 > ncn)
+                   {
+                     r = c - 1;
+                     continue;
+                   }
+                 mpn_mul (next, prev, pn, tp, xn);
+                 xn += pn;
+                 xn -= (next[xn - 1] == 0);
+               }
+             else
+               {
+                 cry = mpn_mul_1 (next, prev, pn, d);
+                 next[pn] = cry;
+                 xn = pn + (cry != 0);
+               }
+
+             if (mpn_divisible_p (nc, ncn, next, xn) == 0)
+               {
+                 r = c - 1;
+               }
+             else
+               {
+                 exp = c;
+                 l = c + 1;
+                 MP_PTR_SWAP (next, prev);
+                 pn = xn;
+               }
+           }
+
+         if (g == 0)
+           g = exp;
+         else
+           g = mpn_gcd_1 (&g, 1, exp);
+
+         if (g == 1)
+           {
+             ans = 0;
+             goto ret;
+           }
+
+         mpn_divexact (next, nc, ncn, prev, pn);
+         ncn = ncn - pn;
+         ncn += next[ncn] != 0;
+         MPN_COPY (nc, next, ncn);
+
+         if (ncn == 1 && nc[0] == 1)
+           {
+             ans = ! (neg && POW2_P (g));
+             goto ret;
+           }
+
+         factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
+       }
+      while (factor != 0);
+    }
+
+  count_leading_zeros (count, nc[ncn-1]);
+  count = GMP_LIMB_BITS * ncn - count;   /* log (nc) + 1 */
+  d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
+  ans = perfpow (nc, ncn, d, g, count, neg);
+
+ ret:
+  TMP_FREE;
+  return ans;
+}
diff --git a/mpn/generic/perfsqr.c b/mpn/generic/perfsqr.c

new file mode 100644 (file)

index 0000000..1c65dfb
--- /dev/null
+++ b/mpn/generic/perfsqr.c
@@ -0,0 +1,229 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+   zero otherwise.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "perfsqr.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+
+/* PERFSQR_MOD_* detects non-squares using residue tests.
+
+   A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h.  It takes
+   {up,usize} modulo a selected modulus to get a remainder r.  For 32-bit or
+   64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
+   or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
+   used.  PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
+   case too.
+
+   PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
+   PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
+   data indicating residues and non-residues modulo those divisors.  The
+   table data is in 1 or 2 limbs worth of bits respectively, per the size of
+   each d.
+
+   A "modexact" style remainder is taken to reduce r modulo d.
+   PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
+   the table data.  Notice there's just one multiplication by a constant
+   "inv", for each d.
+
+   The modexact doesn't produce a true r%d remainder, instead idx satisfies
+   "-(idx<<PERFSQR_MOD_BITS) == r mod d".  Because d is odd, this factor
+   -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
+   accounted for by having the table data suitably permuted.
+
+   The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
+   In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
+   each divisor d meaning the modexact multiply can take place entirely
+   within one limb, giving the compiler the chance to optimize it, in a way
+   that say umul_ppmm would not give.
+
+   There's no need for the divisors d to be prime, in fact gen-psqr.c makes
+   a deliberate effort to combine factors so as to reduce the number of
+   separate tests done on r.  But such combining is limited to d <=
+   2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
+
+   Alternatives:
+
+   It'd be possible to use bigger divisors d, and more than 2 limbs of table
+   data, but this doesn't look like it would be of much help to the prime
+   factors in the usual moduli 2^24-1 or 2^48-1.
+
+   The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
+   just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
+   factors.  2^32-1 and 2^64-1 would be equally easy to calculate, but have
+   fewer prime factors.
+
+   The nails case usually ends up using mpn_mod_1, which is a lot slower
+   than mpn_mod_34lsub1.  Perhaps other such special moduli could be found
+   for the nails case.  Two-term things like 2^30-2^15-1 might be
+   candidates.  Or at worst some on-the-fly de-nailing would allow the plain
+   2^24-1 to be used.  Currently nails are too preliminary to be worried
+   about.
+
+*/
+
+#define PERFSQR_MOD_MASK       ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
+
+#define MOD34_BITS  (GMP_NUMB_BITS / 4 * 3)
+#define MOD34_MASK  ((CNST_LIMB(1) << MOD34_BITS) - 1)
+
+#define PERFSQR_MOD_34(r, up, usize)                           \
+  do {                                                         \
+    (r) = mpn_mod_34lsub1 (up, usize);                         \
+    (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS);            \
+  } while (0)
+
+/* FIXME: The %= here isn't good, and might destroy any savings from keeping
+   the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
+   Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
+   and a shift count, like mpn_preinv_divrem_1.         But mod_34lsub1 is our
+   normal case, so lets not worry too much about mod_1.         */
+#define PERFSQR_MOD_PP(r, up, usize)                                   \
+  do {                                                                 \
+    if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))      \
+      {                                                                        \
+       (r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,             \
+                               PERFSQR_PP_INVERTED);                   \
+       (r) %= PERFSQR_PP;                                              \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       (r) = mpn_mod_1 (up, usize, PERFSQR_PP);                        \
+      }                                                                        \
+  } while (0)
+
+#define PERFSQR_MOD_IDX(idx, r, d, inv)                                \
+  do {                                                         \
+    mp_limb_t  q;                                              \
+    ASSERT ((r) <= PERFSQR_MOD_MASK);                          \
+    ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1);          \
+    ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK);          \
+                                                               \
+    q = ((r) * (inv)) & PERFSQR_MOD_MASK;                      \
+    ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK));              \
+    (idx) = (q * (d)) >> PERFSQR_MOD_BITS;                     \
+  } while (0)
+
+#define PERFSQR_MOD_1(r, d, inv, mask)                         \
+  do {                                                         \
+    unsigned   idx;                                            \
+    ASSERT ((d) <= GMP_LIMB_BITS);                             \
+    PERFSQR_MOD_IDX(idx, r, d, inv);                           \
+    TRACE (printf ("  PERFSQR_MOD_1 d=%u r=%lu idx=%u\n",      \
+                  d, r%d, idx));                               \
+    if ((((mask) >> idx) & 1) == 0)                            \
+      {                                                                \
+       TRACE (printf ("  non-square\n"));                      \
+       return 0;                                               \
+      }                                                                \
+  } while (0)
+
+/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
+   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */
+#define PERFSQR_MOD_2(r, d, inv, mhi, mlo)                     \
+  do {                                                         \
+    mp_limb_t  m;                                              \
+    unsigned   idx;                                            \
+    ASSERT ((d) <= 2*GMP_LIMB_BITS);                           \
+                                                               \
+    PERFSQR_MOD_IDX (idx, r, d, inv);                          \
+    TRACE (printf ("  PERFSQR_MOD_2 d=%u r=%lu idx=%u\n",      \
+                  d, r%d, idx));                               \
+    m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi));       \
+    idx %= GMP_LIMB_BITS;                                      \
+    if (((m >> idx) & 1) == 0)                                 \
+      {                                                                \
+       TRACE (printf ("  non-square\n"));                      \
+       return 0;                                               \
+      }                                                                \
+  } while (0)
+
+
+int
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+{
+  ASSERT (usize >= 1);
+
+  TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
+
+  /* The first test excludes 212/256 (82.8%) of the perfect square candidates
+     in O(1) time.  */
+  {
+    unsigned  idx = up[0] % 0x100;
+    if (((sq_res_0x100[idx / GMP_LIMB_BITS]
+         >> (idx % GMP_LIMB_BITS)) & 1) == 0)
+      return 0;
+  }
+
+#if 0
+  /* Check that we have even multiplicity of 2, and then check that the rest is
+     a possible perfect square.  Leave disabled until we can determine this
+     really is an improvement.  It it is, it could completely replace the
+     simple probe above, since this should through out more non-squares, but at
+     the expense of somewhat more cycles.  */
+  {
+    mp_limb_t lo;
+    int cnt;
+    lo = up[0];
+    while (lo == 0)
+      up++, lo = up[0], usize--;
+    count_trailing_zeros (cnt, lo);
+    if ((cnt & 1) != 0)
+      return 0;                        /* return of not even multiplicity of 2 */
+    lo >>= cnt;                        /* shift down to align lowest non-zero bit */
+    lo >>= 1;                  /* shift away lowest non-zero bit */
+    if ((lo & 3) != 0)
+      return 0;
+  }
+#endif
+
+
+  /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
+     according to their residues modulo small primes (or powers of
+     primes).  See perfsqr.h.  */
+  PERFSQR_MOD_TEST (up, usize);
+
+
+  /* For the third and last test, we finally compute the square root,
+     to make sure we've really got a perfect square.  */
+  {
+    mp_ptr root_ptr;
+    int res;
+    TMP_DECL;
+
+    TMP_MARK;
+    root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
+
+    /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
+    res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+    TMP_FREE;
+
+    return res;
+  }
+}
diff --git a/mpn/generic/popham.c b/mpn/generic/popham.c

new file mode 100644 (file)

index 0000000..d81ad9d
--- /dev/null
+++ b/mpn/generic/popham.c
@@ -0,0 +1,114 @@
+/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
+
+Copyright 1994, 1996, 2000, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if OPERATION_popcount
+#define FNAME mpn_popcount
+#define POPHAM(u,v) u
+#endif
+
+#if OPERATION_hamdist
+#define FNAME mpn_hamdist
+#define POPHAM(u,v) u ^ v
+#endif
+
+mp_bitcnt_t
+FNAME (mp_srcptr up,
+#if OPERATION_hamdist
+       mp_srcptr vp,
+#endif
+       mp_size_t n) __GMP_NOTHROW
+{
+  mp_bitcnt_t result = 0;
+  mp_limb_t p0, p1, p2, p3, x, p01, p23;
+  mp_size_t i;
+
+  ASSERT (n >= 1);             /* Actually, this code handles any n, but some
+                                  assembly implementations do not.  */
+
+  for (i = n >> 2; i != 0; i--)
+    {
+      p0 = POPHAM (up[0], vp[0]);
+      p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;                               /* 2 0-2 */
+      p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);     /* 4 0-4 */
+
+      p1 = POPHAM (up[1], vp[1]);
+      p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3;                               /* 2 0-2 */
+      p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5);     /* 4 0-4 */
+
+      p01 = p0 + p1;                                                   /* 8 0-8 */
+      p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17);        /* 8 0-16 */
+
+      p2 = POPHAM (up[2], vp[2]);
+      p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3;                               /* 2 0-2 */
+      p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5);     /* 4 0-4 */
+
+      p3 = POPHAM (up[3], vp[3]);
+      p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3;                               /* 2 0-2 */
+      p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5);     /* 4 0-4 */
+
+      p23 = p2 + p3;                                                   /* 8 0-8 */
+      p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17);        /* 8 0-16 */
+
+      x = p01 + p23;                                                   /* 8 0-32 */
+      x = (x >> 8) + x;                                                        /* 8 0-64 */
+      x = (x >> 16) + x;                                               /* 8 0-128 */
+#if GMP_LIMB_BITS > 32
+      x = ((x >> 32) & 0xff) + (x & 0xff);                             /* 8 0-256 */
+      result += x;
+#else
+      result += x & 0xff;
+#endif
+      up += 4;
+#if OPERATION_hamdist
+      vp += 4;
+#endif
+    }
+
+  n &= 3;
+  if (n != 0)
+    {
+      x = 0;
+      do
+       {
+         p0 = POPHAM (up[0], vp[0]);
+         p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;                            /* 2 0-2 */
+         p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);  /* 4 0-4 */
+         p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17;                     /* 8 0-8 */
+
+         x += p0;
+         up += 1;
+#if OPERATION_hamdist
+         vp += 1;
+#endif
+       }
+      while (--n);
+
+      x = (x >> 8) + x;
+      x = (x >> 16) + x;
+#if GMP_LIMB_BITS > 32
+      x = (x >> 32) + x;
+#endif
+      result += x & 0xff;
+    }
+
+  return result;
+}
diff --git a/mpn/generic/pow_1.c b/mpn/generic/pow_1.c

new file mode 100644 (file)

index 0000000..d379836
--- /dev/null
+++ b/mpn/generic/pow_1.c
@@ -0,0 +1,121 @@
+/* mpn_pow_1 -- Compute powers R = U^exp.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_size_t
+mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
+{
+  mp_limb_t x;
+  int cnt, i;
+  mp_size_t rn;
+  int par;
+
+  ASSERT (bn >= 1);
+  /* FIXME: Add operand overlap criteria */
+
+  if (exp <= 1)
+    {
+      if (exp == 0)
+       {
+         rp[0] = 1;
+         return 1;
+       }
+      else
+       {
+         MPN_COPY (rp, bp, bn);
+         return bn;
+       }
+    }
+
+  /* Count number of bits in exp, and compute where to put initial square in
+     order to magically get results in the entry rp.  Use simple code,
+     optimized for small exp.  For large exp, the bignum operations will take
+     so much time that the slowness of this code will be negligible.  */
+  par = 0;
+  cnt = GMP_LIMB_BITS;
+  for (x = exp; x != 0; x >>= 1)
+    {
+      par ^= x & 1;
+      cnt--;
+    }
+  exp <<= cnt;
+
+  if (bn == 1)
+    {
+      mp_limb_t bl = bp[0];
+
+      if ((cnt & 1) != 0)
+       MP_PTR_SWAP (rp, tp);
+
+      mpn_sqr (rp, bp, bn);
+      rn = 2 * bn; rn -= rp[rn - 1] == 0;
+
+      for (i = GMP_LIMB_BITS - cnt - 1;;)
+       {
+         exp <<= 1;
+         if ((exp & GMP_LIMB_HIGHBIT) != 0)
+           {
+             rp[rn] = mpn_mul_1 (rp, rp, rn, bl);
+             rn += rp[rn] != 0;
+           }
+
+         if (--i == 0)
+           break;
+
+         mpn_sqr (tp, rp, rn);
+         rn = 2 * rn; rn -= tp[rn - 1] == 0;
+         MP_PTR_SWAP (rp, tp);
+       }
+    }
+  else
+    {
+      if (((par ^ cnt) & 1) == 0)
+       MP_PTR_SWAP (rp, tp);
+
+      mpn_sqr (rp, bp, bn);
+      rn = 2 * bn; rn -= rp[rn - 1] == 0;
+
+      for (i = GMP_LIMB_BITS - cnt - 1;;)
+       {
+         exp <<= 1;
+         if ((exp & GMP_LIMB_HIGHBIT) != 0)
+           {
+             rn = rn + bn - (mpn_mul (tp, rp, rn, bp, bn) == 0);
+             MP_PTR_SWAP (rp, tp);
+           }
+
+         if (--i == 0)
+           break;
+
+         mpn_sqr (tp, rp, rn);
+         rn = 2 * rn; rn -= tp[rn - 1] == 0;
+         MP_PTR_SWAP (rp, tp);
+       }
+    }
+
+  return rn;
+}
diff --git a/mpn/generic/powlo.c b/mpn/generic/powlo.c

new file mode 100644 (file)

index 0000000..7eb6454
--- /dev/null
+++ b/mpn/generic/powlo.c
@@ -0,0 +1,164 @@
+/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
+
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;                     /* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;          /* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;             /* bit index in low word */
+      r = p[i] >> bi;                  /* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
+      if (nbits_in_r < nbits)          /* did we get enough bits? */
+       r += p[i + 1] << nbits_in_r;    /* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+static inline int
+win_size (mp_bitcnt_t eb)
+{
+  int k;
+  static mp_bitcnt_t x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  for (k = 0; eb > x[k]; k++)
+    ;
+  return k;
+}
+
+/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
+   Requires that ep[en-1] is non-zero.
+   Uses scratch space tp[3n-1..0], i.e., 3n words.  */
+void
+mpn_powlo (mp_ptr rp, mp_srcptr bp,
+          mp_srcptr ep, mp_size_t en,
+          mp_size_t n, mp_ptr tp)
+{
+  int cnt;
+  mp_bitcnt_t ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_limb_t *pp, *this_pp, *last_pp;
+  mp_limb_t *b2p;
+  long i;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+
+  TMP_MARK;
+
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+
+  windowsize = win_size (ebi);
+
+  pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)) + n); /* + n is for mullo ign part */
+
+  this_pp = pp;
+
+  MPN_COPY (this_pp, bp, n);
+
+  b2p = tp + 2*n;
+
+  /* Store b^2 in b2.  */
+  mpn_sqr (tp, bp, n); /* FIXME: Use "mpn_sqrlo" */
+  MPN_COPY (b2p, tp, n);
+
+  /* Precompute odd powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
+    {
+      last_pp = this_pp;
+      this_pp += n;
+      mpn_mullo_n (this_pp, last_pp, b2p, n);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  if (ebi < windowsize)
+    ebi = 0;
+  else
+    ebi -= windowsize;
+
+  count_trailing_zeros (cnt, expbits);
+  ebi += cnt;
+  expbits >>= cnt;
+
+  MPN_COPY (rp, pp + n * (expbits >> 1), n);
+
+  while (ebi != 0)
+    {
+      while (getbit (ep, ebi) == 0)
+       {
+         mpn_sqr (tp, rp, n);  /* FIXME: Use "mpn_sqrlo" */
+         MPN_COPY (rp, tp, n);
+         ebi--;
+         if (ebi == 0)
+           goto done;
+       }
+
+      /* The next bit of the exponent is 1.  Now extract the largest block of
+        bits <= windowsize, and such that the least significant bit is 1.  */
+
+      expbits = getbits (ep, ebi, windowsize);
+      this_windowsize = windowsize;
+      if (ebi < windowsize)
+       {
+         this_windowsize -= windowsize - ebi;
+         ebi = 0;
+       }
+      else
+       ebi -= windowsize;
+
+      count_trailing_zeros (cnt, expbits);
+      this_windowsize -= cnt;
+      ebi += cnt;
+      expbits >>= cnt;
+
+      do
+       {
+         mpn_sqr (tp, rp, n);
+         MPN_COPY (rp, tp, n);
+         this_windowsize--;
+       }
+      while (this_windowsize != 0);
+
+      mpn_mullo_n (tp, rp, pp + n * (expbits >> 1), n);
+      MPN_COPY (rp, tp, n);
+    }
+
+ done:
+  TMP_FREE;
+}
diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c

new file mode 100644 (file)

index 0000000..8c58795
--- /dev/null
+++ b/mpn/generic/powm.c
@@ -0,0 +1,478 @@
+/* mpn_powm -- Compute R = U^E mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+  1. W <- U
+
+  2. T <- (B^n * U) mod M                Convert to REDC form
+
+  3. Compute table U^1, U^3, U^5... of E-dependent size
+
+  4. While there are more bits in E
+       W <- power left-to-right base-k
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
+     pp area is allocated locally anyway?
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * Handle small bases with initial, reduction-free exponentiation.
+
+   * Call new division functions, not mpn_tdiv_qr.
+
+   * Consider special code for one-limb M.
+
+   * How should we handle the redc1/redc2/redc_n choice?
+     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
+     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
+     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
+     This disregards the addmul_N constant term, but we could think of
+     that as part of the respective mullo.
+
+   * When U (the base) is small, we should start the exponentiation with plain
+     operations, then convert that partial result to REDC form.
+
+   * When U is just one limb, should it be handled without the k-ary tricks?
+     We could keep a factor of B^n in W, but use U' = BU as base.  After
+     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
+     mod M.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;                     /* bit index of low bit to extract */
+      i = bi / GMP_NUMB_BITS;          /* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;             /* bit index in low word */
+      r = p[i] >> bi;                  /* extract (low) bits */
+      nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
+      if (nbits_in_r < nbits)          /* did we get enough bits? */
+       r += p[i + 1] << nbits_in_r;    /* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+static inline int
+win_size (mp_bitcnt_t eb)
+{
+  int k;
+  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  for (k = 1; eb > x[k]; k++)
+    ;
+  return k;
+}
+
+/* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
+{
+  mp_ptr tp, qp;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (un + n);
+  qp = TMP_ALLOC_LIMBS (un + 1);       /* FIXME: Put at tp+? */
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+  TMP_FREE;
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+void
+mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+         mp_srcptr ep, mp_size_t en,
+         mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  int cnt;
+  mp_bitcnt_t ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp;
+  long i;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+  TMP_MARK;
+
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+
+#if 0
+  if (bn < n)
+    {
+      /* Do the first few exponent bits without mod reductions,
+        until the result is greater than the mod argument.  */
+      for (;;)
+       {
+         mpn_sqr (tp, this_pp, tn);
+         tn = tn * 2 - 1,  tn += tp[tn] != 0;
+         if (getbit (ep, ebi) != 0)
+           mpn_mul (..., tp, tn, bp, bn);
+         ebi--;
+       }
+    }
+#endif
+
+  windowsize = win_size (ebi);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+#endif
+  else
+    {
+      mip = TMP_ALLOC_LIMBS (n);
+      mpn_binvert (mip, mp, n, tp);
+    }
+
+  pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
+
+  this_pp = pp;
+  redcify (this_pp, bp, bn, mp, n);
+
+  /* Store b^2 at rp.  */
+  mpn_sqr (tp, this_pp, n);
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    mpn_redc_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  /* Precompute odd powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
+    {
+      mpn_mul_n (tp, this_pp, rp, n);
+      this_pp += n;
+#if WANT_REDC_2
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+       mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+       mpn_redc_2 (this_pp, tp, mp, n, mip);
+#else
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+       mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+#endif
+      else
+       mpn_redc_n (this_pp, tp, mp, n, mip);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  if (ebi < windowsize)
+    ebi = 0;
+  else
+    ebi -= windowsize;
+
+  count_trailing_zeros (cnt, expbits);
+  ebi += cnt;
+  expbits >>= cnt;
+
+  MPN_COPY (rp, pp + n * (expbits >> 1), n);
+
+#define INNERLOOP                                                      \
+  while (ebi != 0)                                                     \
+    {                                                                  \
+      while (getbit (ep, ebi) == 0)                                    \
+       {                                                               \
+         MPN_SQR (tp, rp, n);                                          \
+         MPN_REDUCE (rp, tp, mp, n, mip);                              \
+         ebi--;                                                        \
+         if (ebi == 0)                                                 \
+           goto done;                                                  \
+       }                                                               \
+                                                                       \
+      /* The next bit of the exponent is 1.  Now extract the largest   \
+        block of bits <= windowsize, and such that the least           \
+        significant bit is 1.  */                                      \
+                                                                       \
+      expbits = getbits (ep, ebi, windowsize);                         \
+      this_windowsize = windowsize;                                    \
+      if (ebi < windowsize)                                            \
+       {                                                               \
+         this_windowsize -= windowsize - ebi;                          \
+         ebi = 0;                                                      \
+       }                                                               \
+      else                                                             \
+        ebi -= windowsize;                                             \
+                                                                       \
+      count_trailing_zeros (cnt, expbits);                             \
+      this_windowsize -= cnt;                                          \
+      ebi += cnt;                                                      \
+      expbits >>= cnt;                                                 \
+                                                                       \
+      do                                                               \
+       {                                                               \
+         MPN_SQR (tp, rp, n);                                          \
+         MPN_REDUCE (rp, tp, mp, n, mip);                              \
+         this_windowsize--;                                            \
+       }                                                               \
+      while (this_windowsize != 0);                                    \
+                                                                       \
+      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);                  \
+      MPN_REDUCE (rp, tp, mp, n, mip);                                 \
+    }
+
+
+#if WANT_REDC_2
+  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+         INNERLOOP;
+       }
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_2 (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_2 (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+      else
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+         INNERLOOP;
+       }
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+         INNERLOOP;
+       }
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_2 (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+      else
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+    }
+
+#else  /* WANT_REDC_2 */
+
+  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+         INNERLOOP;
+       }
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+      else
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+         INNERLOOP;
+       }
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+         INNERLOOP;
+       }
+      else
+       {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
+         INNERLOOP;
+       }
+    }
+#endif  /* WANT_REDC_2 */
+
+ done:
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    mpn_redc_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c

new file mode 100644 (file)

index 0000000..1960308
--- /dev/null
+++ b/mpn/generic/powm_sec.c
@@ -0,0 +1,340 @@
+/* mpn_powm_sec -- Compute R = U^E mod M.  Secure variant, side-channel silent
+   under the assumption that the multiply instruction is side channel silent.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+
+  1. T <- (B^n * U) mod M                Convert to REDC form
+
+  2. Compute table U^0, U^1, U^2... of E-dependent size
+
+  3. While there are more bits in E
+       W <- power left-to-right base-k
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
+     pp area is allocated locally anyway?
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * Call new division functions, not mpn_tdiv_qr.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define WANT_CACHE_SECURITY 1
+
+
+/* Define our own mpn squaring function.  We do this since we cannot use a
+   native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
+   SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
+   made inside mpn_sqr_basecase.  */
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n)                                    \
+  mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n)                                    \
+  do {                                                                 \
+    mp_size_t _i;                                                      \
+    for (_i = 0; _i < (n); _i++)                                       \
+      {                                                                        \
+       mp_limb_t ul, lpl;                                              \
+       ul = (up)[_i];                                                  \
+       umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);     \
+       (rp)[2 * _i] = lpl >> GMP_NAIL_BITS;                            \
+      }                                                                        \
+  } while (0)
+#endif
+
+
+#if ! HAVE_NATIVE_mpn_sqr_basecase
+/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_basecase
+#ifdef TUNE_SQR_TOOM2_MAX
+/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
+   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
+   file.  An assembly mpn_sqr_basecase that does not define it, should allow
+   any size.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+#endif
+
+#ifdef WANT_FAT_BINARY
+/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
+   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
+   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
+#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
+#endif
+
+#ifndef SQR_BASECASE_LIM
+/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
+   size.  */
+#define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n)
+#else
+/* Define our own squaring function, which uses mpn_sqr_basecase for its
+   allowed sizes, but its own code for larger sizes.  */
+static void
+mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
+{
+  mp_size_t i;
+
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
+
+  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
+    {
+      mpn_sqr_basecase (rp, up, n);
+      return;
+    }
+
+  {
+    mp_limb_t ul, lpl;
+    ul = up[0];
+    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+    rp[0] = lpl >> GMP_NAIL_BITS;
+  }
+  if (n > 1)
+    {
+      mp_limb_t cy;
+
+      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+      tp[n - 1] = cy;
+      for (i = 2; i < n; i++)
+       {
+         mp_limb_t cy;
+         cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+         tp[n + i - 2] = cy;
+       }
+      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
+
+      {
+       mp_limb_t cy;
+#if HAVE_NATIVE_mpn_addlsh1_n
+       cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+       cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+       cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+       rp[2 * n - 1] += cy;
+      }
+    }
+}
+#endif
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;                     /* bit index of low bit to extract */
+      i = bi / GMP_LIMB_BITS;          /* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;             /* bit index in low word */
+      r = p[i] >> bi;                  /* extract (low) bits */
+      nbits_in_r = GMP_LIMB_BITS - bi; /* number of bits now in r */
+      if (nbits_in_r < nbits)          /* did we get enough bits? */
+       r += p[i + 1] << nbits_in_r;    /* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+static inline int
+win_size (mp_bitcnt_t eb)
+{
+  int k;
+  static mp_bitcnt_t x[] = {0,4,27,100,325,1026,2905,7848,20457,51670,~(mp_bitcnt_t)0};
+  for (k = 1; eb > x[k]; k++)
+    ;
+  return k;
+}
+
+/* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_ptr qp;
+
+  qp = tp + un + n;
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd.  FIXME: is this true?
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space at tp of 3n+1 limbs.  */
+void
+mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+             mp_srcptr ep, mp_size_t en,
+             mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t minv;
+  int cnt;
+  mp_bitcnt_t ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp;
+  long i;
+  int cnd;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 0));
+  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+
+  windowsize = win_size (ebi);
+
+  binvert_limb (minv, mp[0]);
+  minv = -minv;
+
+  pp = tp + 4 * n;
+
+  this_pp = pp;
+  this_pp[n] = 1;
+  redcify (this_pp, this_pp + n, 1, mp, n, tp + 6 * n);
+  this_pp += n;
+  redcify (this_pp, bp, bn, mp, n, tp + 6 * n);
+
+  /* Precompute powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << windowsize) - 2; i > 0; i--)
+    {
+      mpn_mul_basecase (tp, this_pp, n, pp + n, n);
+      this_pp += n;
+      mpn_redc_1_sec (this_pp, tp, mp, n, minv);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  if (ebi < windowsize)
+    ebi = 0;
+  else
+    ebi -= windowsize;
+
+#if WANT_CACHE_SECURITY
+  mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
+#else
+  MPN_COPY (rp, pp + n * expbits, n);
+#endif
+
+  while (ebi != 0)
+    {
+      expbits = getbits (ep, ebi, windowsize);
+      this_windowsize = windowsize;
+      if (ebi < windowsize)
+       {
+         this_windowsize -= windowsize - ebi;
+         ebi = 0;
+       }
+      else
+       ebi -= windowsize;
+
+      do
+       {
+         mpn_local_sqr (tp, rp, n, tp + 2 * n);
+         mpn_redc_1_sec (rp, tp, mp, n, minv);
+         this_windowsize--;
+       }
+      while (this_windowsize != 0);
+
+#if WANT_CACHE_SECURITY
+      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
+      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);
+#else
+      mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
+#endif
+      mpn_redc_1_sec (rp, tp, mp, n, minv);
+    }
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+  mpn_redc_1_sec (rp, tp, mp, n, minv);
+  cnd = mpn_sub_n (tp, rp, mp, n);     /* we need just retval */
+  mpn_subcnd_n (rp, rp, mp, n, !cnd);
+}
+
+#if ! HAVE_NATIVE_mpn_tabselect
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+   limbs.  Store the selected entry at rp.  Reads entire table to avoid
+   side-channel information leaks.  O(n*nents).
+   FIXME: Move to its own file.  */
+void
+mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
+              mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask;
+  volatile mp_limb_t *tp;
+
+  for (k = 0; k < nents; k++)
+    {
+      mask = -(mp_limb_t) (which == k);
+      tp = tab + n * k;
+      for (i = 0; i < n; i++)
+       {
+         rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
+       }
+    }
+}
+#endif
+
+mp_size_t
+mpn_powm_sec_itch (mp_size_t bn, mp_size_t en, mp_size_t n)
+{
+  int windowsize;
+  mp_size_t redcify_itch, itch;
+
+  windowsize = win_size (en * GMP_NUMB_BITS); /* slight over-estimate of exp */
+  itch = 4 * n + (n << windowsize);
+  redcify_itch = 2 * bn + n + 1;
+  /* The 6n is due to the placement of reduce scratch 6n into the start of the
+     scratch area.  */
+  return MAX (itch, redcify_itch + 6 * n);
+}
diff --git a/mpn/generic/pre_divrem_1.c b/mpn/generic/pre_divrem_1.c

new file mode 100644 (file)

index 0000000..134b4cb
--- /dev/null
+++ b/mpn/generic/pre_divrem_1.c
@@ -0,0 +1,135 @@
+/* mpn_preinv_divrem_1 -- mpn by limb division with pre-inverted divisor.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Don't bloat a shared library with unused code. */
+#if USE_PREINV_DIVREM_1
+
+/* Same test here for skipping one divide step as in mpn_divrem_1.
+
+   The main reason for a separate shift==0 case is that not all CPUs give
+   zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
+   code used on shift==0.  shift==0 is also reasonably common in mp_bases
+   big_base, for instance base==10 on a 64-bit limb.
+
+   Under shift!=0 it would be possible to call mpn_lshift to adjust the
+   dividend all in one go (into the quotient space say), rather than
+   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+   than what the compiler can generate for EXTRACT.  But this is left to CPU
+   specific implementations to consider, especially since EXTRACT isn't on
+   the dependent chain.
+
+   If size==0 then the result is simply xsize limbs of zeros, but nothing
+   special is done for that, since it wouldn't be a usual call, and
+   certainly never arises from mpn_get_str which is our main caller.  */
+
+mp_limb_t
+mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
+                    mp_srcptr ap, mp_size_t size, mp_limb_t d_unnorm,
+                    mp_limb_t dinv, int shift)
+{
+  mp_limb_t  ahigh, qhigh, r;
+  mp_size_t  i;
+  mp_limb_t  n1, n0;
+  mp_limb_t  d;
+
+  ASSERT (xsize >= 0);
+  ASSERT (size >= 1);
+  ASSERT (d_unnorm != 0);
+#if WANT_ASSERT
+  {
+    int        want_shift;
+    mp_limb_t  want_dinv;
+    count_leading_zeros (want_shift, d_unnorm);
+    ASSERT (shift == want_shift);
+    invert_limb (want_dinv, d_unnorm << shift);
+    ASSERT (dinv == want_dinv);
+  }
+#endif
+  /* FIXME: What's the correct overlap rule when xsize!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+xsize, ap, size));
+
+  ahigh = ap[size-1];
+  d = d_unnorm << shift;
+  qp += (size + xsize - 1);   /* dest high limb */
+
+  if (shift == 0)
+    {
+      /* High quotient limb is 0 or 1, and skip a divide step. */
+      r = ahigh;
+      qhigh = (r >= d);
+      r = (qhigh ? r-d : r);
+      *qp-- = qhigh;
+      size--;
+
+      for (i = size-1; i >= 0; i--)
+       {
+         n0 = ap[i];
+         udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+         qp--;
+       }
+    }
+  else
+    {
+      r = 0;
+      if (ahigh < d_unnorm)
+       {
+         r = ahigh << shift;
+         *qp-- = 0;
+         size--;
+         if (size == 0)
+           goto done_integer;
+       }
+
+      n1 = ap[size-1];
+      r |= n1 >> (GMP_LIMB_BITS - shift);
+
+      for (i = size-2; i >= 0; i--)
+       {
+         ASSERT (r < d);
+         n0 = ap[i];
+         udiv_qrnnd_preinv (*qp, r, r,
+                            ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
+                            d, dinv);
+         qp--;
+         n1 = n0;
+       }
+      udiv_qrnnd_preinv (*qp, r, r, n1 << shift, d, dinv);
+      qp--;
+    }
+
+ done_integer:
+  for (i = 0; i < xsize; i++)
+    {
+      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
+      qp--;
+    }
+
+  return r >> shift;
+}
+
+#endif /* USE_PREINV_DIVREM_1 */
diff --git a/mpn/generic/pre_mod_1.c b/mpn/generic/pre_mod_1.c

new file mode 100644 (file)

index 0000000..961733b
--- /dev/null
+++ b/mpn/generic/pre_mod_1.c
@@ -0,0 +1,52 @@
+/* mpn_preinv_mod_1 (up, un, d, dinv) -- Divide (UP,,UN) by the normalized D.
+   DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.
+   Return the single-limb remainder.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2004, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function used to be documented, but is now considered obsolete.  It
+   continues to exist for binary compatibility, even when not required
+   internally.  */
+
+mp_limb_t
+mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t n0, r;
+  mp_limb_t dummy;
+
+  ASSERT (un >= 1);
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+
+  r = up[un - 1];
+  if (r >= d)
+    r -= d;
+
+  for (i = un - 2; i >= 0; i--)
+    {
+      n0 = up[i];
+      udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);
+    }
+  return r;
+}
diff --git a/mpn/generic/random.c b/mpn/generic/random.c

new file mode 100644 (file)

index 0000000..c0b85ea
--- /dev/null
+++ b/mpn/generic/random.c
@@ -0,0 +1,40 @@
+/* mpn_random -- Generate random numbers.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_random (mp_ptr ptr, mp_size_t size)
+{
+  gmp_randstate_ptr  rands;
+
+  /* FIXME: Is size==0 supposed to be allowed? */
+  ASSERT (size >= 0);
+
+  if (size == 0)
+    return;
+
+  rands = RANDS;
+  _gmp_rand (ptr, rands, size * GMP_NUMB_BITS);
+
+  /* Make sure the most significant limb is non-zero.  */
+  while (ptr[size-1] == 0)
+    _gmp_rand (&ptr[size-1], rands, GMP_NUMB_BITS);
+}
diff --git a/mpn/generic/random2.c b/mpn/generic/random2.c

new file mode 100644 (file)

index 0000000..7d3da9f
--- /dev/null
+++ b/mpn/generic/random2.c
@@ -0,0 +1,96 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+   of ones and zeroes.  Suitable for border testing.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_bitcnt_t));
+
+/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
+   Thus, we get the same random number sequence in the common cases.
+   FIXME: We should always generate the same random number sequence!  */
+#if GMP_NUMB_BITS < 32
+#define BITS_PER_RANDCALL GMP_NUMB_BITS
+#else
+#define BITS_PER_RANDCALL 32
+#endif
+
+void
+mpn_random2 (mp_ptr rp, mp_size_t n)
+{
+  gmp_randstate_ptr rstate = RANDS;
+  int bit_pos;                 /* bit number of least significant bit where
+                                  next bit field to be inserted */
+  mp_limb_t ran, ranm;         /* buffer for random bits */
+
+  /* FIXME: Is n==0 supposed to be allowed? */
+  ASSERT (n >= 0);
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  ran = ranm;
+
+  /* Start off at a random bit position in the most significant limb.  */
+  bit_pos = ran % GMP_NUMB_BITS;
+
+  gmp_rrandomb (rp, rstate, n * GMP_NUMB_BITS - bit_pos);
+}
+
+static void
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_bitcnt_t bi;
+  mp_limb_t ranm;              /* buffer for random bits */
+  unsigned cap_chunksize, chunksize;
+  mp_size_t i;
+
+  /* Set entire result to 111..1  */
+  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;
+  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
+  for (i = i - 1; i >= 0; i--)
+    rp[i] = GMP_NUMB_MAX;
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  cap_chunksize = nbits / (ranm % 4 + 1);
+  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */
+
+  bi = nbits;
+
+  for (;;)
+    {
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      if (bi == 0)
+       break;                  /* low chunk is ...1 */
+
+      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;
+
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);
+
+      if (bi == 0)
+       break;                  /* low chunk is ...0 */
+    }
+}
diff --git a/mpn/generic/redc_1.c b/mpn/generic/redc_1.c

new file mode 100644 (file)

index 0000000..177f393
--- /dev/null
+++ b/mpn/generic/redc_1.c
@@ -0,0 +1,46 @@
+/* mpn_redc_1.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+  mp_size_t j;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  for (j = n - 1; j >= 0; j--)
+    {
+      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      ASSERT (up[0] == 0);
+      up[0] = cy;
+      up++;
+    }
+  cy = mpn_add_n (rp, up, up - n, n);
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
+}
diff --git a/mpn/generic/redc_1_sec.c b/mpn/generic/redc_1_sec.c

new file mode 100644 (file)

index 0000000..3d91438
--- /dev/null
+++ b/mpn/generic/redc_1_sec.c
@@ -0,0 +1,45 @@
+/* mpn_redc_1_sec.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+  mp_size_t j;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  for (j = n - 1; j >= 0; j--)
+    {
+      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      ASSERT (up[0] == 0);
+      up[0] = cy;
+      up++;
+    }
+  cy = mpn_add_n (rp, up, up - n, n);
+  mpn_subcnd_n (rp, rp, mp, n, cy);
+}
diff --git a/mpn/generic/redc_2.c b/mpn/generic/redc_2.c

new file mode 100644 (file)

index 0000000..2b27586
--- /dev/null
+++ b/mpn/generic/redc_2.c
@@ -0,0 +1,98 @@
+/* mpn_redc_2.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS != 0
+you lose
+#endif
+
+/* For testing purposes, define our own mpn_addmul_2 if there is none already
+   available.  */
+#ifndef HAVE_NATIVE_mpn_addmul_2
+mp_limb_t
+mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
+{
+  rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
+  return mpn_addmul_1 (rp + 1, up, n, vp[1]);
+}
+#endif
+
+#if defined (__GNUC__) && defined (__ia64) && W_TYPE_SIZE == 64
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+  do {                                                                 \
+    mp_limb_t _ph, _pl;                                                        \
+    __asm__ ("xma.hu %0 = %3, %5, f0\n\t"                              \
+            "xma.l %1 = %3, %5, f0\n\t"                                \
+            ";;\n\t"                                                   \
+            "xma.l %0 = %3, %4, %0\n\t"                                \
+            ";;\n\t"                                                   \
+            "xma.l %0 = %2, %5, %0"                                    \
+            : "=&f" (ph), "=&f" (pl)                                   \
+            : "f" (uh), "f" (ul), "f" (vh), "f" (vl));                 \
+  } while (0)
+#endif
+
+#ifndef umul2low
+#define umul2low(ph, pl, uh, ul, vh, vl) \
+  do {                                                                 \
+    mp_limb_t _ph, _pl;                                                        \
+    umul_ppmm (_ph, _pl, ul, vl);                                      \
+    (ph) = _ph + (ul) * (vh) + (uh) * (vl);                            \
+    (pl) = _pl;                                                                \
+  } while (0)
+#endif
+
+void
+mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
+{
+  mp_limb_t q[2];
+  mp_size_t j;
+  mp_limb_t upn;
+  mp_limb_t cy;
+
+  ASSERT (n > 0);
+  ASSERT_MPN (up, 2*n);
+
+  if ((n & 1) != 0)
+    {
+      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);
+      up++;
+    }
+
+  for (j = n - 2; j >= 0; j -= 2)
+    {
+      umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);
+      upn = up[n];             /* mpn_addmul_2 overwrites this */
+      up[1] = mpn_addmul_2 (up, mp, n, q);
+      up[0] = up[n];
+      up[n] = upn;
+      up += 2;
+    }
+  cy = mpn_add_n (rp, up, up - n, n);
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
+}
diff --git a/mpn/generic/redc_n.c b/mpn/generic/redc_n.c

new file mode 100644 (file)

index 0000000..99f618f
--- /dev/null
+++ b/mpn/generic/redc_n.c
@@ -0,0 +1,66 @@
+/* mpn_redc_n.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+   mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  TODO
+
+  * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a
+    future mpn_mulhi) for the range we will be called.  Follow up that
+    assumption.
+
+  * Decrease scratch usage.
+*/
+
+void
+mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
+{
+  mp_ptr xp, yp, scratch;
+  mp_limb_t cy;
+  mp_size_t rn;
+  TMP_DECL;
+  TMP_MARK;
+
+  rn = mpn_mulmod_bnm1_next_size (n);
+
+  scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
+
+  xp = scratch;
+  mpn_mullo_n (xp, up, ip, n);
+
+  yp = scratch + n;
+  mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);
+
+  ASSERT_ALWAYS (2 * n > rn);                          /* could handle this */
+
+  cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn);          /* undo wrap around */
+  MPN_DECR_U (yp + 2*n - rn, rn, cy);
+
+  cy = mpn_sub_n (rp, up + n, yp + n, n);
+  if (cy != 0)
+    mpn_add_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/remove.c b/mpn/generic/remove.c

new file mode 100644 (file)

index 0000000..427a46f
--- /dev/null
+++ b/mpn/generic/remove.c
@@ -0,0 +1,143 @@
+/* mpn_remove -- divide out all multiples of odd mpn number from another mpn
+   number.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if GMP_LIMB_BITS > 50
+#define LOG 50
+#else
+#define LOG GMP_LIMB_BITS
+#endif
+
+
+/* Input: U = {up,un}, V = {vp,vn} must be odd, cap
+   Ouput  W = {wp,*wn} allocation need is exactly *wn
+
+   Set W = U / V^k, where k is the largest integer <= cap such that the
+   division yields an integer.
+
+   FIXME: We currently allow any operand overlap.  This is quite non mpn-ish
+   and might be changed, since it cost significant temporary space.
+   * If we require W to have space for un limbs, we could save qp or qp2 (but
+     we will still need to copy things into wp 50% of the time).
+   * If we allow ourselves to clobber U, we could save the other of qp and qp2.
+*/
+
+mp_bitcnt_t
+mpn_remove (mp_ptr wp, mp_size_t *wn,
+           mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn,
+           mp_bitcnt_t cap)
+{
+  mp_ptr    pwpsp[LOG];
+  mp_size_t pwpsn[LOG];
+  mp_size_t npowers;
+  mp_ptr tp, qp, np, pp, qp2, scratch_out;
+  mp_size_t pn, nn, qn, i;
+  mp_bitcnt_t pwr;
+  TMP_DECL;
+
+  ASSERT (un > 0);
+  ASSERT (vn > 0);
+  ASSERT (vp[0] % 2 != 0);     /* 2-adic division wants odd numbers */
+  ASSERT (vn > 1 || vp[0] > 1);        /* else we would loop indefinitely */
+
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS ((un + vn) / 2); /* remainder */
+  qp = TMP_ALLOC_LIMBS (un);           /* quotient, alternating */
+  qp2 = TMP_ALLOC_LIMBS (un);          /* quotient, alternating */
+  np = TMP_ALLOC_LIMBS (un + LOG);     /* powers of V */
+  pp = vp;
+  pn = vn;
+
+  /* FIXME: This allocation need indicate a flaw in the current itch mechanism:
+     Which operands not greater than un,un will incur the worst itch?  We need
+     a parallel foo_maxitch set of functions.  */
+  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (un, un >> 1));
+
+  MPN_COPY (qp, up, un);
+  qn = un;
+
+  npowers = 0;
+  while (qn >= pn)
+    {
+      mpn_bdiv_qr (qp2, tp, qp, qn, pp, pn, scratch_out);
+      if (!mpn_zero_p (tp, pn))
+       break;                  /* could not divide by V^npowers */
+
+      MP_PTR_SWAP (qp, qp2);
+      qn = qn - pn;
+      qn += qp[qn] != 0;
+
+      pwpsp[npowers] = pp;
+      pwpsn[npowers] = pn;
+      npowers++;
+
+      if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
+       break;
+
+      nn = 2 * pn - 1;         /* next power will be at least this many limbs */
+      if (nn > qn)
+       break;                  /* next power would be overlarge */
+
+      mpn_sqr (np, pp, pn);
+      nn += np[nn] != 0;
+      pp = np;
+      pn = nn;
+      np += nn;
+    }
+
+  pwr = ((mp_bitcnt_t) 1 << npowers) - 1;
+
+  for (i = npowers - 1; i >= 0; i--)
+    {
+      pp = pwpsp[i];
+      pn = pwpsn[i];
+      if (qn < pn)
+       continue;
+
+      if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
+       continue;               /* V^i would bring us past cap */
+
+      mpn_bdiv_qr (qp2, tp, qp, qn, pp, pn, scratch_out);
+      if (!mpn_zero_p (tp, pn))
+       continue;               /* could not divide by V^i */
+
+      MP_PTR_SWAP (qp, qp2);
+      qn = qn - pn;
+      qn += qp[qn] != 0;
+
+      pwr += (mp_bitcnt_t) 1 << i;
+    }
+
+  MPN_COPY (wp, qp, qn);
+  *wn = qn;
+
+  TMP_FREE;
+
+  return pwr;
+}
diff --git a/mpn/generic/rootrem.c b/mpn/generic/rootrem.c

new file mode 100644 (file)

index 0000000..272b95c
--- /dev/null
+++ b/mpn/generic/rootrem.c
@@ -0,0 +1,408 @@
+/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
+   store the truncated integer part at rootp and the remainder at remp.
+
+   Contributed by Paul Zimmermann (algorithm) and
+   Paul Zimmermann and Torbjorn Granlund (implementation).
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* FIXME:
+     This implementation is not optimal when remp == NULL, since the complexity
+     is M(n), whereas it should be M(n/k) on average.
+*/
+
+#include <stdio.h>             /* for NULL */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
+                                      mp_limb_t, int);
+
+#define MPN_RSHIFT(cy,rp,up,un,cnt) \
+  do {                                                                 \
+    if ((cnt) != 0)                                                    \
+      cy = mpn_rshift (rp, up, un, cnt);                               \
+    else                                                               \
+      {                                                                        \
+       MPN_COPY_INCR (rp, up, un);                                     \
+       cy = 0;                                                         \
+      }                                                                        \
+  } while (0)
+
+#define MPN_LSHIFT(cy,rp,up,un,cnt) \
+  do {                                                                 \
+    if ((cnt) != 0)                                                    \
+      cy = mpn_lshift (rp, up, un, cnt);                               \
+    else                                                               \
+      {                                                                        \
+       MPN_COPY_DECR (rp, up, un);                                     \
+       cy = 0;                                                         \
+      }                                                                        \
+  } while (0)
+
+
+/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
+   If remp <> NULL, put in {remp, un} the remainder.
+   Return the size (in limbs) of the remainder if remp <> NULL,
+         or a non-zero value iff the remainder is non-zero when remp = NULL.
+   Assumes:
+   (a) up[un-1] is not zero
+   (b) rootp has at least space for ceil(un/k) limbs
+   (c) remp has at least space for un limbs (in case remp <> NULL)
+   (d) the operands do not overlap.
+
+   The auxiliary memory usage is 3*un+2 if remp = NULL,
+   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
+*/
+mp_size_t
+mpn_rootrem (mp_ptr rootp, mp_ptr remp,
+            mp_srcptr up, mp_size_t un, mp_limb_t k)
+{
+  ASSERT (un > 0);
+  ASSERT (up[un - 1] != 0);
+  ASSERT (k > 1);
+
+  if ((remp == NULL) && (un / k > 2))
+    /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
+       which will produce an approximate root with one more limb,
+       so that in most cases we can conclude. */
+    {
+      mp_ptr sp, wp;
+      mp_size_t rn, sn, wn;
+      TMP_DECL;
+      TMP_MARK;
+      wn = un + k;
+      wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
+      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
+      sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
+      MPN_COPY (wp + k, up, un);
+      MPN_ZERO (wp, k);
+      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
+      /* the approximate root S = {sp,sn} is either the correct root of
+        {sp,sn}, or one too large. Thus unless the least significant limb
+        of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
+        one limb. (In case sp[0]=1, we can deduce the root, but not decide
+        whether it is exact or not.) */
+      MPN_COPY (rootp, sp + 1, sn - 1);
+      TMP_FREE;
+      return rn;
+    }
+  else /* remp <> NULL */
+    {
+      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
+    }
+}
+
+/* if approx is non-zero, does not compute the final remainder */
+static mp_size_t
+mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
+                     mp_limb_t k, int approx)
+{
+  mp_ptr qp, rp, sp, wp, scratch;
+  mp_size_t qn, rn, sn, wn, nl, bn;
+  mp_limb_t save, save2, cy;
+  unsigned long int unb; /* number of significant bits of {up,un} */
+  unsigned long int xnb; /* number of significant bits of the result */
+  unsigned int cnt;
+  unsigned long b, kk;
+  unsigned long sizes[GMP_NUMB_BITS + 1];
+  int ni, i;
+  int c;
+  int logk;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+                                       of R/(k*S^(k-1)), and S^k */
+  if (remp == NULL)
+    {
+      rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
+      scratch = rp;                     /* used by mpn_div_q */
+    }
+  else
+    {
+      scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
+      rp = remp;
+    }
+  sp = rootp;
+  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+                                       and temporary for mpn_pow_1 */
+  count_leading_zeros (cnt, up[un - 1]);
+  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
+  /* unb is the number of bits of the input U */
+
+  xnb = (unb - 1) / k + 1;     /* ceil (unb / k) */
+  /* xnb is the number of bits of the root R */
+
+  if (xnb == 1) /* root is 1 */
+    {
+      if (remp == NULL)
+       remp = rp;
+      mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
+      MPN_NORMALIZE (remp, un);        /* There should be at most one zero limb,
+                                  if we demand u to be normalized  */
+      rootp[0] = 1;
+      TMP_FREE;
+      return un;
+    }
+
+  /* We initialize the algorithm with a 1-bit approximation to zero: since we
+     know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
+     r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
+  kk = k * (xnb - 1);          /* number of truncated bits in the input */
+  rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
+  MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
+  mpn_sub_1 (rp, rp, rn, 1);   /* subtract the initial approximation: since
+                                  the non-truncated part is less than 2^k, it
+                                  is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
+  sp[0] = 1;                   /* initial approximation */
+  sn = 1;                      /* it has one limb */
+
+  for (logk = 1; ((k - 1) >> logk) != 0; logk++)
+    ;
+  /* logk = ceil(log(k)/log(2)) */
+
+  b = xnb - 1; /* number of remaining bits to determine in the kth root */
+  ni = 0;
+  while (b != 0)
+    {
+      /* invariant: here we want b+1 total bits for the kth root */
+      sizes[ni] = b;
+      /* if c is the new value of b, this means that we'll go from a root
+        of c+1 bits (say s') to a root of b+1 bits.
+        It is proved in the book "Modern Computer Arithmetic" from Brent
+        and Zimmermann, Chapter 1, that
+        if s' >= k*beta, then at most one correction is necessary.
+        Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that
+        c >= ceil((b + log2(k))/2). */
+      b = (b + logk + 1) / 2;
+      if (b >= sizes[ni])
+       b = sizes[ni] - 1;      /* add just one bit at a time */
+      ni++;
+    }
+  sizes[ni] = 0;
+  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
+  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
+     sizes[i] <= 2 * sizes[i+1].
+     Newton iteration will first compute sizes[ni-1] extra bits,
+     then sizes[ni-2], ..., then sizes[0] = b. */
+
+  wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
+  wn = 1;
+  for (i = ni; i != 0; i--)
+    {
+      /* 1: loop invariant:
+        {sp, sn} is the current approximation of the root, which has
+                 exactly 1 + sizes[ni] bits.
+        {rp, rn} is the current remainder
+        {wp, wn} = {sp, sn}^(k-1)
+        kk = number of truncated bits of the input
+      */
+      b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
+                                     iteration */
+
+      /* Reinsert a low zero limb if we normalized away the entire remainder */
+      if (rn == 0)
+       {
+         rp[0] = 0;
+         rn = 1;
+       }
+
+      /* first multiply the remainder by 2^b */
+      MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
+      rn = rn + b / GMP_NUMB_BITS;
+      if (cy != 0)
+       {
+         rp[rn] = cy;
+         rn++;
+       }
+
+      kk = kk - b;
+
+      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* Now insert bits [kk,kk+b-1] from the input U */
+      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */
+      save = rp[bn];
+      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
+      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
+      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
+                - floor(kk / GMP_NUMB_BITS)
+            <= 1 + (kk + b - 1) / GMP_NUMB_BITS
+                 - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
+            = 2 + (b - 2) / GMP_NUMB_BITS
+        thus since nl is an integer:
+        nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
+      /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
+      if (nl - 1 > bn)
+       save2 = rp[bn + 1];
+      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
+      /* set to zero high bits of rp[bn] */
+      rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;
+      /* restore corresponding bits */
+      rp[bn] |= save;
+      if (nl - 1 > bn)
+       rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
+                              they start by bit 0 in rp[0], so they use
+                              at most ceil(b/GMP_NUMB_BITS) limbs */
+
+      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
+      cy = mpn_mul_1 (wp, wp, wn, k);
+      wp[wn] = cy;
+      wn += cy != 0;
+
+      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
+      if (rn < wn)
+       {
+         qn = 0;
+       }
+      else
+       {
+         mp_ptr tp;
+         qn = rn - wn; /* expected quotient size */
+         /* tp must have space for wn limbs.
+            The quotient needs rn-wn+1 limbs, thus quotient+remainder
+            need altogether rn+1 limbs. */
+         tp = qp + qn + 1;     /* put remainder in Q buffer */
+         mpn_div_q (qp, rp, rn, wp, wn, scratch);
+         qn += qp[qn] != 0;
+       }
+
+      /* 5: current buffers: {sp,sn}, {qp,qn}.
+        Note: {rp,rn} is not needed any more since we'll compute it from
+        scratch at the end of the loop.
+       */
+
+      /* Number of limbs used by b bits, when least significant bit is
+        aligned to least limb */
+      bn = (b - 1) / GMP_NUMB_BITS + 1;
+
+      /* the quotient should be smaller than 2^b, since the previous
+        approximation was correctly rounded toward zero */
+      if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
+                     qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
+       {
+         qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
+         MPN_ZERO (qp, qn);
+         qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
+         MPN_DECR_U (qp, qn, 1);
+         qn -= qp[qn - 1] == 0;
+       }
+
+      /* 6: current buffers: {sp,sn}, {qp,qn} */
+
+      /* multiply the root approximation by 2^b */
+      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
+      sn = sn + b / GMP_NUMB_BITS;
+      if (cy != 0)
+       {
+         sp[sn] = cy;
+         sn++;
+       }
+
+      /* 7: current buffers: {sp,sn}, {qp,qn} */
+
+      ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
+                                  above, q is set to 2^b-1, which has
+                                  exactly bn limbs */
+
+      /* Combine sB and q to form sB + q.  */
+      save = sp[b / GMP_NUMB_BITS];
+      MPN_COPY (sp, qp, qn);
+      MPN_ZERO (sp + qn, bn - qn);
+      sp[b / GMP_NUMB_BITS] |= save;
+
+      /* 8: current buffer: {sp,sn} */
+
+      /* Since each iteration treats b bits from the root and thus k*b bits
+        from the input, and we already considered b bits from the input,
+        we now have to take another (k-1)*b bits from the input. */
+      kk -= (k - 1) * b; /* remaining input bits */
+      /* {rp, rn} = floor({up, un} / 2^kk) */
+      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
+      rn = un - kk / GMP_NUMB_BITS;
+      rn -= rp[rn - 1] == 0;
+
+      /* 9: current buffers: {sp,sn}, {rp,rn} */
+
+     for (c = 0;; c++)
+       {
+         /* Compute S^k in {qp,qn}. */
+         if (i == 1)
+           {
+             /* Last iteration: we don't need W anymore. */
+             /* mpn_pow_1 requires that both qp and wp have enough space to
+                store the result {sp,sn}^k + 1 limb */
+             approx = approx && (sp[0] > 1);
+             qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
+           }
+         else
+           {
+             /* W <- S^(k-1) for the next iteration,
+                and S^k = W * S. */
+             wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
+             mpn_mul (qp, wp, wn, sp, sn);
+             qn = wn + sn;
+             qn -= qp[qn - 1] == 0;
+           }
+
+         /* if S^k > floor(U/2^kk), the root approximation was too large */
+         if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
+           MPN_DECR_U (sp, sn, 1);
+         else
+           break;
+       }
+
+      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
+
+      ASSERT_ALWAYS (c <= 1);
+      ASSERT_ALWAYS (rn >= qn);
+
+      /* R = R - Q = floor(U/2^kk) - S^k */
+      if ((i > 1) || (approx == 0))
+       {
+         mpn_sub (rp, rp, rn, qp, qn);
+         MPN_NORMALIZE (rp, rn);
+       }
+      /* otherwise we have rn > 0, thus the return value is ok */
+
+      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+    }
+
+  TMP_FREE;
+  return rn;
+}
diff --git a/mpn/generic/rshift.c b/mpn/generic/rshift.c

new file mode 100644 (file)

index 0000000..6225665
--- /dev/null
+++ b/mpn/generic/rshift.c
@@ -0,0 +1,60 @@
+/* mpn_rshift -- Shift right low level.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and N limbs long) cnt bits to the right
+   and store the n least significant limbs of the result at rp.
+   The bits shifted out to the right are returned.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be <= up.
+*/
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+
+  tnc = GMP_NUMB_BITS - cnt;
+  high_limb = *up++;
+  retval = (high_limb << tnc) & GMP_NUMB_MASK;
+  low_limb = high_limb >> cnt;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      high_limb = *up++;
+      *rp++ = low_limb | ((high_limb << tnc) & GMP_NUMB_MASK);
+      low_limb = high_limb >> cnt;
+    }
+  *rp = low_limb;
+
+  return retval;
+}
diff --git a/mpn/generic/sbpi1_bdiv_q.c b/mpn/generic/sbpi1_bdiv_q.c

new file mode 100644 (file)

index 0000000..3d2f743
--- /dev/null
+++ b/mpn/generic/sbpi1_bdiv_q.c
@@ -0,0 +1,87 @@
+/* mpn_sbpi1_bdiv_q -- schoolbook Hensel division with precomputed inverse,
+   returning quotient only.
+
+   Contributed to the GNU project by Niels Möller.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^nn, destroys N.
+
+   D must be odd. dinv is (-D)^-1 mod B.
+
+
+   The straightforward way to compute Q is to cancel one limb at a time, using
+
+     qp[i] = D^{-1} * np[i] (mod B)
+     N -= B^i * qp[i] * D
+
+   But we prefer addition to subtraction, since mpn_addmul_1 is often faster
+   than mpn_submul_1.  Q = - N / D can be computed by iterating
+
+     qp[i] = (-D)^{-1} * np[i] (mod B)
+     N += B^i * qp[i] * D
+
+   And then we flip the sign, -Q = (not Q) + 1. */
+
+void
+mpn_sbpi1_bdiv_q (mp_ptr qp,
+                 mp_ptr np, mp_size_t nn,
+                 mp_srcptr dp, mp_size_t dn,
+                 mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t cy, q;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[0] & 1) != 0);
+
+  for (i = nn - dn; i > 0; i--)
+    {
+      q = dinv * np[0];
+      qp[0] = ~q;
+      qp++;
+      cy = mpn_addmul_1 (np, dp, dn, q);
+      mpn_add_1 (np + dn, np + dn, i, cy);
+      ASSERT (np[0] == 0);
+      np++;
+    }
+
+  for (i = dn; i > 1; i--)
+    {
+      q = dinv * np[0];
+      qp[0] = ~q;
+      qp++;
+      mpn_addmul_1 (np, dp, i, q);
+      ASSERT (np[0] == 0);
+      np++;
+    }
+
+  /* Final limb */
+  q = dinv * np[0];
+  qp[0] = ~q;
+  mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
+}
diff --git a/mpn/generic/sbpi1_bdiv_qr.c b/mpn/generic/sbpi1_bdiv_qr.c

new file mode 100644 (file)

index 0000000..c20477a
--- /dev/null
+++ b/mpn/generic/sbpi1_bdiv_qr.c
@@ -0,0 +1,108 @@
+/* mpn_sbpi1_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
+   returning quotient and remainder.
+
+   Contributed to the GNU project by Niels Möller.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes a binary quotient of size qn = nn - dn.
+   Output:
+
+      Q = N * D^{-1} mod B^qn,
+
+      R = (N - Q * D) * B^(-qn)
+
+   Stores the dn least significant limbs of R at {np + nn - dn, dn},
+   and returns the borrow from the subtraction N - Q*D.
+
+   D must be odd. dinv is (-D)^-1 mod B. */
+
+mp_limb_t
+mpn_sbpi1_bdiv_qr (mp_ptr qp,
+                  mp_ptr np, mp_size_t nn,
+                  mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_size_t i;
+  mp_limb_t rh;
+  mp_limb_t ql;
+
+  ASSERT (dn > 0);
+  ASSERT (nn > dn);
+  ASSERT ((dp[0] & 1) != 0);
+
+  qn = nn - dn;
+
+  rh = 0;
+
+  /* To complete the negation, this value is added to q. */
+  ql = 1;
+  while (qn > dn)
+    {
+      for (i = 0; i < dn; i++)
+       {
+         mp_limb_t q;
+
+         q = dinv * np[i];
+         qp[i] = ~q;
+
+         np[i] = mpn_addmul_1 (np + i, dp, dn, q);
+       }
+      rh += mpn_add (np + dn, np + dn, qn, np, dn);
+      ql = mpn_add_1 (qp, qp, dn, ql);
+
+      qp += dn; qn -= dn;
+      np += dn; nn -= dn;
+    }
+
+  for (i = 0; i < qn; i++)
+    {
+      mp_limb_t q;
+
+      q = dinv * np[i];
+      qp[i] = ~q;
+
+      np[i] = mpn_addmul_1 (np + i, dp, dn, q);
+    }
+
+  rh += mpn_add_n (np + dn, np + dn, np, qn);
+  ql = mpn_add_1 (qp, qp, qn, ql);
+
+  if (UNLIKELY (ql > 0))
+    {
+      /* q == 0 */
+      ASSERT (rh == 0);
+      return 0;
+    }
+  else
+    {
+      mp_limb_t cy;
+
+      cy = mpn_sub_n (np + qn, np + qn, dp, dn);
+      ASSERT (cy >= rh);
+      return cy - rh;
+    }
+}
diff --git a/mpn/generic/sbpi1_div_q.c b/mpn/generic/sbpi1_div_q.c

new file mode 100644 (file)

index 0000000..595a03f
--- /dev/null
+++ b/mpn/generic/sbpi1_div_q.c
@@ -0,0 +1,292 @@
+/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_q (mp_ptr qp,
+                mp_ptr np, mp_size_t nn,
+                mp_srcptr dp, mp_size_t dn,
+                mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t qn, i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+  mp_limb_t flag;
+
+  mp_size_t dn_orig = dn;
+  mp_srcptr dp_orig = dp;
+  mp_ptr np_orig = np;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+
+  dn -= 2;                     /* offset dn by 2 for main division loops,
+                                  saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = qn - (dn + 2); i >= 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+       {
+         q = GMP_NUMB_MASK;
+         mpn_submul_1 (np - dn, dp, dn + 2, q);
+         n1 = np[1];           /* update n1, last loop's value will now be invalid */
+       }
+      else
+       {
+         udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+         cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+         cy1 = n0 < cy;
+         n0 = (n0 - cy) & GMP_NUMB_MASK;
+         cy = n1 < cy1;
+         n1 -= cy1;
+         np[0] = n0;
+
+         if (UNLIKELY (cy != 0))
+           {
+             n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+             q--;
+           }
+       }
+
+      *--qp = q;
+    }
+
+  flag = ~CNST_LIMB(0);
+
+  if (dn >= 0)
+    {
+      for (i = dn; i > 0; i--)
+       {
+         np--;
+         if (UNLIKELY (n1 >= (d1 & flag)))
+           {
+             q = GMP_NUMB_MASK;
+             cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+             if (UNLIKELY (n1 != cy))
+               {
+                 if (n1 < (cy & flag))
+                   {
+                     q--;
+                     mpn_add_n (np - dn, np - dn, dp, dn + 2);
+                   }
+                 else
+                   flag = 0;
+               }
+             n1 = np[1];
+           }
+         else
+           {
+             udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+             cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+             cy1 = n0 < cy;
+             n0 = (n0 - cy) & GMP_NUMB_MASK;
+             cy = n1 < cy1;
+             n1 -= cy1;
+             np[0] = n0;
+
+             if (UNLIKELY (cy != 0))
+               {
+                 n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+                 q--;
+               }
+           }
+
+         *--qp = q;
+
+         /* Truncate operands.  */
+         dn--;
+         dp++;
+       }
+
+      np--;
+      if (UNLIKELY (n1 >= (d1 & flag)))
+       {
+         q = GMP_NUMB_MASK;
+         cy = mpn_submul_1 (np, dp, 2, q);
+
+         if (UNLIKELY (n1 != cy))
+           {
+             if (n1 < (cy & flag))
+               {
+                 q--;
+                 add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+               }
+             else
+               flag = 0;
+           }
+         n1 = np[1];
+       }
+      else
+       {
+         udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+         np[0] = n0;
+         np[1] = n1;
+       }
+
+      *--qp = q;
+    }
+  ASSERT_ALWAYS (np[1] == n1);
+  np += 2;
+
+
+  dn = dn_orig;
+  if (UNLIKELY (n1 < (dn & flag)))
+    {
+      mp_limb_t q, x;
+
+      /* The quotient may be too large if the remainder is small.  Recompute
+        for above ignored operand parts, until the remainder spills.
+
+        FIXME: The quality of this code isn't the same as the code above.
+        1. We don't compute things in an optimal order, high-to-low, in order
+           to terminate as quickly as possible.
+        2. We mess with pointers and sizes, adding and subtracting and
+           adjusting to get things right.  It surely could be streamlined.
+        3. The only termination criteria are that we determine that the
+           quotient needs to be adjusted, or that we have recomputed
+           everything.  We should stop when the remainder is so large
+           that no additional subtracting could make it spill.
+        4. If nothing else, we should not do two loops of submul_1 over the
+           data, instead handle both the triangularization and chopping at
+           once.  */
+
+      x = n1;
+
+      if (dn > 2)
+       {
+         /* Compensate for triangularization.  */
+         mp_limb_t y;
+
+         dp = dp_orig;
+         if (qn + 1 < dn)
+           {
+             dp += dn - (qn + 1);
+             dn = qn + 1;
+           }
+
+         y = np[-2];
+
+         for (i = dn - 3; i >= 0; i--)
+           {
+             q = qp[i];
+             cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
+
+             if (y < cy)
+               {
+                 if (x == 0)
+                   {
+                     cy = mpn_sub_1 (qp, qp, qn, 1);
+                     ASSERT_ALWAYS (cy == 0);
+                     return qh - cy;
+                   }
+                 x--;
+               }
+             y -= cy;
+           }
+         np[-2] = y;
+       }
+
+      dn = dn_orig;
+      if (qn + 1 < dn)
+       {
+         /* Compensate for ignored dividend and divisor tails.  */
+
+         dp = dp_orig;
+         np = np_orig;
+
+         if (qh != 0)
+           {
+             cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
+             if (cy != 0)
+               {
+                 if (x == 0)
+                   {
+                     if (qn != 0)
+                       cy = mpn_sub_1 (qp, qp, qn, 1);
+                     return qh - cy;
+                   }
+                 x--;
+               }
+           }
+
+         if (qn == 0)
+           return qh;
+
+         for (i = dn - qn - 2; i >= 0; i--)
+           {
+             cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
+             cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
+             if (cy != 0)
+               {
+                 if (x == 0)
+                   {
+                     cy = mpn_sub_1 (qp, qp, qn, 1);
+                     return qh;
+                   }
+                 x--;
+               }
+           }
+       }
+    }
+
+  return qh;
+}
diff --git a/mpn/generic/sbpi1_div_qr.c b/mpn/generic/sbpi1_div_qr.c

new file mode 100644 (file)

index 0000000..7e53aae
--- /dev/null
+++ b/mpn/generic/sbpi1_div_qr.c
@@ -0,0 +1,99 @@
+/* mpn_sbpi1_div_qr -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_div_qr (mp_ptr qp,
+                 mp_ptr np, mp_size_t nn,
+                 mp_srcptr dp, mp_size_t dn,
+                 mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += nn - dn;
+
+  dn -= 2;                     /* offset dn by 2 for main division loops,
+                                  saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = nn - (dn + 2); i > 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+       {
+         q = GMP_NUMB_MASK;
+         mpn_submul_1 (np - dn, dp, dn + 2, q);
+         n1 = np[1];           /* update n1, last loop's value will now be invalid */
+       }
+      else
+       {
+         udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+         cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+         cy1 = n0 < cy;
+         n0 = (n0 - cy) & GMP_NUMB_MASK;
+         cy = n1 < cy1;
+         n1 = (n1 - cy1) & GMP_NUMB_MASK;
+         np[0] = n0;
+
+         if (UNLIKELY (cy != 0))
+           {
+             n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+             q--;
+           }
+       }
+
+      *--qp = q;
+    }
+  np[1] = n1;
+
+  return qh;
+}
diff --git a/mpn/generic/sbpi1_divappr_q.c b/mpn/generic/sbpi1_divappr_q.c

new file mode 100644 (file)

index 0000000..53d4a25
--- /dev/null
+++ b/mpn/generic/sbpi1_divappr_q.c
@@ -0,0 +1,188 @@
+/* mpn_sbpi1_divappr_q -- Schoolbook division using the Möller-Granlund 3/2
+   division algorithm, returning approximate quotient.  The quotient returned
+   is either correct, or one too large.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_sbpi1_divappr_q (mp_ptr qp,
+                    mp_ptr np, mp_size_t nn,
+                    mp_srcptr dp, mp_size_t dn,
+                    mp_limb_t dinv)
+{
+  mp_limb_t qh;
+  mp_size_t qn, i;
+  mp_limb_t n1, n0;
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+  mp_limb_t flag;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+
+  np += nn;
+
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+
+  dn -= 2;                     /* offset dn by 2 for main division loops,
+                                  saving two iterations in mpn_submul_1.  */
+  d1 = dp[dn + 1];
+  d0 = dp[dn + 0];
+
+  np -= 2;
+
+  n1 = np[1];
+
+  for (i = qn - (dn + 2); i >= 0; i--)
+    {
+      np--;
+      if (UNLIKELY (n1 == d1) && np[1] == d0)
+       {
+         q = GMP_NUMB_MASK;
+         mpn_submul_1 (np - dn, dp, dn + 2, q);
+         n1 = np[1];           /* update n1, last loop's value will now be invalid */
+       }
+      else
+       {
+         udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+         cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+         cy1 = n0 < cy;
+         n0 = (n0 - cy) & GMP_NUMB_MASK;
+         cy = n1 < cy1;
+         n1 -= cy1;
+         np[0] = n0;
+
+         if (UNLIKELY (cy != 0))
+           {
+             n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+             q--;
+           }
+       }
+
+      *--qp = q;
+    }
+
+  flag = ~CNST_LIMB(0);
+
+  if (dn >= 0)
+    {
+      for (i = dn; i > 0; i--)
+       {
+         np--;
+         if (UNLIKELY (n1 >= (d1 & flag)))
+           {
+             q = GMP_NUMB_MASK;
+             cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
+
+             if (UNLIKELY (n1 != cy))
+               {
+                 if (n1 < (cy & flag))
+                   {
+                     q--;
+                     mpn_add_n (np - dn, np - dn, dp, dn + 2);
+                   }
+                 else
+                   flag = 0;
+               }
+             n1 = np[1];
+           }
+         else
+           {
+             udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+             cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+             cy1 = n0 < cy;
+             n0 = (n0 - cy) & GMP_NUMB_MASK;
+             cy = n1 < cy1;
+             n1 -= cy1;
+             np[0] = n0;
+
+             if (UNLIKELY (cy != 0))
+               {
+                 n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
+                 q--;
+               }
+           }
+
+         *--qp = q;
+
+         /* Truncate operands.  */
+         dn--;
+         dp++;
+       }
+
+      np--;
+      if (UNLIKELY (n1 >= (d1 & flag)))
+       {
+         q = GMP_NUMB_MASK;
+         cy = mpn_submul_1 (np, dp, 2, q);
+
+         if (UNLIKELY (n1 != cy))
+           {
+             if (n1 < (cy & flag))
+               {
+                 q--;
+                 add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
+               }
+             else
+               flag = 0;
+           }
+         n1 = np[1];
+       }
+      else
+       {
+         udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+         np[1] = n1;
+         np[0] = n0;
+       }
+
+      *--qp = q;
+    }
+
+  ASSERT_ALWAYS (np[1] == n1);
+
+  return qh;
+}
diff --git a/mpn/generic/scan0.c b/mpn/generic/scan0.c

new file mode 100644 (file)

index 0000000..234ba9b
--- /dev/null
+++ b/mpn/generic/scan0.c
@@ -0,0 +1,49 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+   1. U must sooner or later have a limb with a clear bit.
+ */
+
+mp_bitcnt_t
+mpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / GMP_NUMB_BITS;
+  p = up + starting_word;
+  alimb = *p++ ^ GMP_NUMB_MASK;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+  while (alimb == 0)
+    alimb = *p++ ^ GMP_NUMB_MASK;
+
+  count_trailing_zeros (cnt, alimb);
+  return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}
diff --git a/mpn/generic/scan1.c b/mpn/generic/scan1.c

new file mode 100644 (file)

index 0000000..9fb2446
--- /dev/null
+++ b/mpn/generic/scan1.c
@@ -0,0 +1,49 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Argument constraints:
+   1. U must sooner or later have a limb != 0.
+ */
+
+mp_bitcnt_t
+mpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / GMP_NUMB_BITS;
+  p = up + starting_word;
+  alimb = *p++;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % GMP_NUMB_BITS);
+
+  while (alimb == 0)
+    alimb = *p++;
+
+  count_trailing_zeros (cnt, alimb);
+  return (p - up - 1) * GMP_NUMB_BITS + cnt;
+}
diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c

new file mode 100644 (file)

index 0000000..83f5ac5
--- /dev/null
+++ b/mpn/generic/set_str.c
@@ -0,0 +1,364 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) --
+   Convert a STR_LEN long base BASE byte string pointed to by STR to a limb
+   vector pointed to by RES_PTR.  Return the number of limbs in RES_PTR.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_set_str, ARE INTERNAL WITH A MUTABLE
+   INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
+   FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
+   GNU MP RELEASE.
+
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
+2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* TODO:
+
+      Perhaps do not compute the highest power?
+      Instead, multiply twice by the 2nd highest power:
+
+              _______
+             |_______|  hp
+             |_______|  pow
+       _______________
+      |_______________|  final result
+
+
+              _______
+             |_______|  hp
+                 |___|  pow[-1]
+          ___________
+         |___________|  intermediate result
+                 |___|  pow[-1]
+       _______________
+      |_______________|  final result
+
+      Generalizing that idea, perhaps we should make powtab contain successive
+      cubes, not squares.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+  if (POW2_P (base))
+    {
+      /* The base is a power of 2.  Read the input string from least to most
+        significant character/digit.  */
+
+      const unsigned char *s;
+      int next_bitpos;
+      mp_limb_t res_digit;
+      mp_size_t size;
+      int bits_per_indigit = mp_bases[base].big_base;
+
+      size = 0;
+      res_digit = 0;
+      next_bitpos = 0;
+
+      for (s = str + str_len - 1; s >= str; s--)
+       {
+         int inp_digit = *s;
+
+         res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;
+         next_bitpos += bits_per_indigit;
+         if (next_bitpos >= GMP_NUMB_BITS)
+           {
+             rp[size++] = res_digit;
+             next_bitpos -= GMP_NUMB_BITS;
+             res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+           }
+       }
+
+      if (res_digit != 0)
+       rp[size++] = res_digit;
+      return size;
+    }
+
+  if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_bc_set_str (rp, str, str_len, base);
+  else
+    {
+      mp_ptr powtab_mem, tp;
+      powers_t powtab[GMP_LIMB_BITS];
+      int chars_per_limb;
+      mp_size_t size;
+      mp_size_t un;
+      TMP_DECL;
+
+      TMP_MARK;
+
+      chars_per_limb = mp_bases[base].chars_per_limb;
+
+      un = str_len / chars_per_limb + 1;
+
+      /* Allocate one large block for the powers of big_base.  */
+      powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
+
+      mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
+
+      tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+      size = mpn_dc_set_str (rp, str, str_len, powtab, tp);
+
+      TMP_FREE;
+      return size;
+    }
+}
+
+void
+mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
+{
+  mp_ptr powtab_mem_ptr;
+  long i, pi;
+  mp_size_t n;
+  mp_ptr p, t;
+  unsigned normalization_steps;
+  mp_limb_t big_base, big_base_inverted;
+  int chars_per_limb;
+  size_t digits_in_base;
+  mp_size_t shift;
+
+  powtab_mem_ptr = powtab_mem;
+
+  chars_per_limb = mp_bases[base].chars_per_limb;
+  big_base = mp_bases[base].big_base;
+  big_base_inverted = mp_bases[base].big_base_inverted;
+  count_leading_zeros (normalization_steps, big_base);
+
+  p = powtab_mem_ptr;
+  powtab_mem_ptr += 1;
+
+  digits_in_base = chars_per_limb;
+
+  p[0] = big_base;
+  n = 1;
+
+  count_leading_zeros (i, un - 1);
+  i = GMP_LIMB_BITS - 1 - i;
+
+  powtab[i].p = p;
+  powtab[i].n = n;
+  powtab[i].digits_in_base = digits_in_base;
+  powtab[i].base = base;
+  powtab[i].shift = 0;
+
+  shift = 0;
+  for (pi = i - 1; pi >= 0; pi--)
+    {
+      t = powtab_mem_ptr;
+      powtab_mem_ptr += 2 * n;
+
+      ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un));
+
+      mpn_sqr (t, p, n);
+      n = 2 * n - 1; n += t[n] != 0;
+      digits_in_base *= 2;
+#if 1
+      if ((((un - 1) >> pi) & 2) == 0)
+       {
+         mpn_divexact_1 (t, t, n, big_base);
+         n -= t[n - 1] == 0;
+         digits_in_base -= chars_per_limb;
+       }
+#else
+      if (CLEVER_CONDITION_1 ())
+       {
+         /* perform adjustment operation of previous */
+         cy = mpn_mul_1 (p, p, n, big_base);
+       }
+      if (CLEVER_CONDITION_2 ())
+       {
+         /* perform adjustment operation of new */
+         cy = mpn_mul_1 (t, t, n, big_base);
+       }
+#endif
+      shift *= 2;
+      /* Strip low zero limbs, but be careful to keep the result divisible by
+        big_base.  */
+      while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
+       {
+         t++;
+         n--;
+         shift++;
+       }
+      p = t;
+      powtab[pi].p = p;
+      powtab[pi].n = n;
+      powtab[pi].digits_in_base = digits_in_base;
+      powtab[pi].base = base;
+      powtab[pi].shift = shift;
+    }
+}
+
+mp_size_t
+mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
+               const powers_t *powtab, mp_ptr tp)
+{
+  size_t len_lo, len_hi;
+  mp_limb_t cy;
+  mp_size_t ln, hn, n, sn;
+
+  len_lo = powtab->digits_in_base;
+
+  if (str_len <= len_lo)
+    {
+      if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))
+       return mpn_bc_set_str (rp, str, str_len, powtab->base);
+      else
+       return mpn_dc_set_str (rp, str, str_len, powtab + 1, tp);
+    }
+
+  len_hi = str_len - len_lo;
+  ASSERT (len_lo >= len_hi);
+
+  if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))
+    hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);
+  else
+    hn = mpn_dc_set_str (tp, str, len_hi, powtab + 1, rp);
+
+  sn = powtab->shift;
+
+  if (hn == 0)
+    {
+      MPN_ZERO (rp, powtab->n + sn);
+    }
+  else
+    {
+      if (powtab->n > hn)
+       mpn_mul (rp + sn, powtab->p, powtab->n, tp, hn);
+      else
+       mpn_mul (rp + sn, tp, hn, powtab->p, powtab->n);
+      MPN_ZERO (rp, sn);
+    }
+
+  str = str + str_len - len_lo;
+  if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))
+    ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);
+  else
+    ln = mpn_dc_set_str (tp, str, len_lo, powtab + 1, tp + powtab->n + sn + 1);
+
+  if (ln != 0)
+    {
+      cy = mpn_add_n (rp, rp, tp, ln);
+      mpn_incr_u (rp + ln, cy);
+    }
+  n = hn + powtab->n + sn;
+  return n - (rp[n - 1] == 0);
+}
+
+mp_size_t
+mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
+{
+  mp_size_t size;
+  size_t i;
+  long j;
+  mp_limb_t cy_limb;
+
+  mp_limb_t big_base;
+  int chars_per_limb;
+  mp_limb_t res_digit;
+
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (mp_bases));
+  ASSERT (str_len >= 1);
+
+  big_base = mp_bases[base].big_base;
+  chars_per_limb = mp_bases[base].chars_per_limb;
+
+  size = 0;
+  for (i = chars_per_limb; i < str_len; i += chars_per_limb)
+    {
+      res_digit = *str++;
+      if (base == 10)
+       { /* This is a common case.
+            Help the compiler to avoid multiplication.  */
+         for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
+           res_digit = res_digit * 10 + *str++;
+       }
+      else
+       {
+         for (j = chars_per_limb - 1; j != 0; j--)
+           res_digit = res_digit * base + *str++;
+       }
+
+      if (size == 0)
+       {
+         if (res_digit != 0)
+           {
+             rp[0] = res_digit;
+             size = 1;
+           }
+       }
+      else
+       {
+#if HAVE_NATIVE_mpn_mul_1c
+         cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+         cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+         cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+         if (cy_limb != 0)
+           rp[size++] = cy_limb;
+       }
+    }
+
+  big_base = base;
+  res_digit = *str++;
+  if (base == 10)
+    { /* This is a common case.
+        Help the compiler to avoid multiplication.  */
+      for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
+       {
+         res_digit = res_digit * 10 + *str++;
+         big_base *= 10;
+       }
+    }
+  else
+    {
+      for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
+       {
+         res_digit = res_digit * base + *str++;
+         big_base *= base;
+       }
+    }
+
+  if (size == 0)
+    {
+      if (res_digit != 0)
+       {
+         rp[0] = res_digit;
+         size = 1;
+       }
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_mul_1c
+      cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+      cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+      cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+      if (cy_limb != 0)
+       rp[size++] = cy_limb;
+    }
+  return size;
+}
diff --git a/mpn/generic/sizeinbase.c b/mpn/generic/sizeinbase.c

new file mode 100644 (file)

index 0000000..303359a
--- /dev/null
+++ b/mpn/generic/sizeinbase.c
@@ -0,0 +1,58 @@
+/* mpn_sizeinbase -- approximation to chars required for an mpn.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Same as mpz_sizeinbase, meaning exact for power-of-2 bases, and either
+   exact or 1 too big for other bases.  */
+
+size_t
+mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
+{
+  int lb_base, cnt;
+  mp_size_t totbits;
+
+  ASSERT (xsize >= 0);
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (mp_bases));
+
+  /* Special case for X == 0.  */
+  if (xsize == 0)
+    return 1;
+
+  /* Calculate the total number of significant bits of X.  */
+  count_leading_zeros (cnt, xp[xsize-1]);
+  totbits = xsize * GMP_LIMB_BITS - cnt;
+
+  if (POW2_P (base))
+    {
+      /* Special case for powers of 2, giving exact result.  */
+      lb_base = mp_bases[base].big_base;
+      return (totbits + lb_base - 1) / lb_base;
+    }
+  else
+    return (size_t) (totbits * mp_bases[base].chars_per_bit_exactly) + 1;
+}
diff --git a/mpn/generic/sqr.c b/mpn/generic/sqr.c

new file mode 100644 (file)

index 0000000..504dbfb
--- /dev/null
+++ b/mpn/generic/sqr.c
@@ -0,0 +1,88 @@
+/* mpn_sqr -- square natural numbers.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+
+  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
+      mpn_mul_basecase (p, a, n, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
+    {
+      mpn_sqr_basecase (p, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
+      mpn_toom2_sqr (p, a, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
+      mpn_toom3_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+      mpn_toom6_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+      mpn_toom8_sqr (p, a, n, ws);
+      TMP_FREE;
+    }
+  else
+    {
+      /* The current FFT code allocates its own space.  That should probably
+        change.  */
+      mpn_fft_mul (p, a, n, a, n);
+    }
+}
diff --git a/mpn/generic/sqr_basecase.c b/mpn/generic/sqr_basecase.c

new file mode 100644 (file)

index 0000000..548033d
--- /dev/null
+++ b/mpn/generic/sqr_basecase.c
@@ -0,0 +1,300 @@
+/* mpn_sqr_basecase -- Internal routine to square a natural number
+   of length n.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
+2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+#define MPN_SQR_DIAGONAL(rp, up, n)                                    \
+  mpn_sqr_diagonal (rp, up, n)
+#else
+#define MPN_SQR_DIAGONAL(rp, up, n)                                    \
+  do {                                                                 \
+    mp_size_t _i;                                                      \
+    for (_i = 0; _i < (n); _i++)                                       \
+      {                                                                        \
+       mp_limb_t ul, lpl;                                              \
+       ul = (up)[_i];                                                  \
+       umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);     \
+       (rp)[2 * _i] = lpl >> GMP_NAIL_BITS;                            \
+      }                                                                        \
+  } while (0)
+#endif
+
+
+#undef READY_WITH_mpn_sqr_basecase
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_ptr tp = tarr;
+  mp_limb_t cy;
+
+  /* must fit 2*n limbs in tarr */
+  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+  if ((n & 1) != 0)
+    {
+      if (n == 1)
+       {
+         mp_limb_t ul, lpl;
+         ul = up[0];
+         umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+         rp[0] = lpl >> GMP_NAIL_BITS;
+         return;
+       }
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 2; i += 2)
+       {
+         cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+         tp[n + i] = cy;
+       }
+    }
+  else
+    {
+      if (n == 2)
+       {
+         rp[0] = 0;
+         rp[1] = 0;
+         rp[3] = mpn_addmul_2 (rp, up, 2, up);
+         return;
+       }
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 4; i += 2)
+       {
+         cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+         tp[n + i] = cy;
+       }
+      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+      tp[2 * n - 3] = cy;
+    }
+
+  MPN_SQR_DIAGONAL (rp, up, n);
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+  rp[2 * n - 1] += cy;
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
+
+/* mpn_sqr_basecase using plain mpn_addmul_2.
+
+   This is tricky, since we have to let mpn_addmul_2 make some undesirable
+   multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
+   This forces us to conditionally add or subtract the mpn_sqr_diagonal
+   results.  Examples of the product we form:
+
+   n = 4              n = 5            n = 6
+   u1u0 * u3u2u1      u1u0 * u4u3u2u1  u1u0 * u5u4u3u2u1
+   u2 * u3           u3u2 * u4u3       u3u2 * u5u4u3
+                                       u4 * u5
+   add: u0 u2 u3      add: u0 u2 u4    add: u0 u2 u4 u5
+   sub: u1           sub: u1 u3        sub: u1 u3
+*/
+
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_ptr tp = tarr;
+  mp_limb_t cy;
+
+  /* must fit 2*n limbs in tarr */
+  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+  if ((n & 1) != 0)
+    {
+      mp_limb_t x0, x1;
+
+      if (n == 1)
+       {
+         mp_limb_t ul, lpl;
+         ul = up[0];
+         umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+         rp[0] = lpl >> GMP_NAIL_BITS;
+         return;
+       }
+
+      /* The code below doesn't like unnormalized operands.  Since such
+        operands are unusual, handle them with a dumb recursion.  */
+      if (up[n - 1] == 0)
+       {
+         rp[2 * n - 2] = 0;
+         rp[2 * n - 1] = 0;
+         mpn_sqr_basecase (rp, up, n - 1);
+         return;
+       }
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 2; i += 2)
+       {
+         cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+         tp[n + i] = cy;
+       }
+
+      MPN_SQR_DIAGONAL (rp, up, n);
+
+      for (i = 2;; i += 4)
+       {
+         x0 = rp[i + 0];
+         rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+         x1 = rp[i + 1];
+         rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+         __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+         if (i + 4 >= 2 * n)
+           break;
+         mpn_incr_u (rp + i + 4, cy);
+       }
+    }
+  else
+    {
+      mp_limb_t x0, x1;
+
+      if (n == 2)
+       {
+         rp[0] = 0;
+         rp[1] = 0;
+         rp[3] = mpn_addmul_2 (rp, up, 2, up);
+         return;
+       }
+
+      /* The code below doesn't like unnormalized operands.  Since such
+        operands are unusual, handle them with a dumb recursion.  */
+      if (up[n - 1] == 0)
+       {
+         rp[2 * n - 2] = 0;
+         rp[2 * n - 1] = 0;
+         mpn_sqr_basecase (rp, up, n - 1);
+         return;
+       }
+
+      MPN_ZERO (tp, n);
+
+      for (i = 0; i <= n - 4; i += 2)
+       {
+         cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
+         tp[n + i] = cy;
+       }
+      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
+      tp[2 * n - 3] = cy;
+
+      MPN_SQR_DIAGONAL (rp, up, n);
+
+      for (i = 2;; i += 4)
+       {
+         x0 = rp[i + 0];
+         rp[i + 0] = (-x0) & GMP_NUMB_MASK;
+         x1 = rp[i + 1];
+         rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
+         if (i + 6 >= 2 * n)
+           break;
+         __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
+         mpn_incr_u (rp + i + 4, cy);
+       }
+      mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
+    }
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+  rp[2 * n - 1] += cy;
+}
+#define READY_WITH_mpn_sqr_basecase
+#endif
+
+
+#if ! defined (READY_WITH_mpn_sqr_basecase)
+
+/* Default mpn_sqr_basecase using mpn_addmul_1.  */
+
+void
+mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  mp_size_t i;
+
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
+
+  {
+    mp_limb_t ul, lpl;
+    ul = up[0];
+    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
+    rp[0] = lpl >> GMP_NAIL_BITS;
+  }
+  if (n > 1)
+    {
+      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+      mp_ptr tp = tarr;
+      mp_limb_t cy;
+
+      /* must fit 2*n limbs in tarr */
+      ASSERT (n <= SQR_TOOM2_THRESHOLD);
+
+      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+      tp[n - 1] = cy;
+      for (i = 2; i < n; i++)
+       {
+         mp_limb_t cy;
+         cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+         tp[n + i - 2] = cy;
+       }
+      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
+
+      {
+       mp_limb_t cy;
+#if HAVE_NATIVE_mpn_addlsh1_n
+       cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+       cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+       cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+       rp[2 * n - 1] += cy;
+      }
+    }
+}
+#endif
diff --git a/mpn/generic/sqrmod_bnm1.c b/mpn/generic/sqrmod_bnm1.c

new file mode 100644 (file)

index 0000000..698bd68
--- /dev/null
+++ b/mpn/generic/sqrmod_bnm1.c
@@ -0,0 +1,302 @@
+/* sqrmod_bnm1.c -- squaring mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Input is {ap,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+static void
+mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr (tp, ap, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Input is {ap,rn+1}; output is {rp,rn+1}, in
+   semi-normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr (tp, ap, rn + 1);
+  ASSERT (tp[2*rn+1] == 0);
+  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
+  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn+1, cy );
+}
+
+
+/* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1.
+ * It should not be a problem if sqrmod_bnm1 is used to
+ * compute the full square with an <= 2*rn, because this condition
+ * implies (B^an-1)^2 < (B^rn-1) .
+ *
+ * Requires rn/4 < an <= rn
+ * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
+ *
+ * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
+ */
+void
+mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
+{
+  ASSERT (0 < an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (an < rn))
+       {
+         if (UNLIKELY (2*an <= rn))
+           {
+             mpn_sqr (rp, ap, an);
+           }
+         else
+           {
+             mp_limb_t cy;
+             mpn_sqr (tp, ap, an);
+             cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
+             MPN_INCR_U (rp, rn, cy);
+           }
+       }
+      else
+       mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      ASSERT (2*an > n);
+
+      /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
+        and crt together as
+
+        x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+
+#define xp  tp /* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+
+      {
+       mp_srcptr am1;
+       mp_size_t anm;
+       mp_ptr so;
+
+       if (LIKELY (an > n))
+         {
+           so = xp + n;
+           am1 = xp;
+           cy = mpn_add (xp, a0, n, a1, an - n);
+           MPN_INCR_U (xp, n, cy);
+           anm = n;
+         }
+       else
+         {
+           so = xp;
+           am1 = a0;
+           anm = an;
+         }
+
+       mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
+      }
+
+      {
+       int       k;
+       mp_srcptr ap1;
+       mp_size_t anp;
+
+       if (LIKELY (an > n)) {
+         ap1 = sp1;
+         cy = mpn_sub (sp1, a0, n, a1, an - n);
+         sp1[n] = 0;
+         MPN_INCR_U (sp1, n + 1, cy);
+         anp = n + ap1[n];
+       } else {
+         ap1 = a0;
+         anp = an;
+       }
+
+       if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+         k=0;
+       else
+         {
+           int mask;
+           k = mpn_fft_best_k (n, 1);
+           mask = (1<<k) -1;
+           while (n & mask) {k--; mask >>=1;};
+         }
+       if (k >= FFT_FIRST_K)
+         xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
+       else if (UNLIKELY (ap1 == a0))
+         {
+           ASSERT (anp <= n);
+           ASSERT (2*anp > n);
+           mpn_sqr (xp, a0, an);
+           anp = 2*an - n;
+           cy = mpn_sub (xp, xp, n, xp + n, anp);
+           xp[n] = 0;
+           MPN_INCR_U (xp, n+1, cy);
+         }
+       else
+         mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
+      }
+
+      /* Here the CRT recomposition begins.
+
+        xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+        Division by 2 is a bitwise rotation.
+
+        Assumes xp normalised mod (B^n+1).
+
+        The residue class [0] is represented by [B^n-1]; except when
+        both input are ZERO.
+      */
+
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+        overflows, i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+        the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+#if GMP_NAIL_BITS == 0
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
+
+      /* Compute the highest half:
+        ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
+       */
+      if (UNLIKELY (2*an < rn))
+       {
+         /* Note that in this case, the only way the result can equal
+            zero mod B^{rn} - 1 is if the input is zero, and
+            then the output of both the recursive calls and this CRT
+            reconstruction is zero, not B^{rn} - 1. */
+         cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
+
+         /* FIXME: This subtraction of the high parts is not really
+            necessary, we do it to get the carry out, and for sanity
+            checking. */
+         cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
+                                  xp + 2*an - n, rn - 2*an, cy);
+         ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
+         cy = mpn_sub_1 (rp, rp, 2*an, cy);
+         ASSERT (cy == (xp + 2*an - n)[0]);
+       }
+      else
+       {
+         cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+         /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+            DECR will affect _at most_ the lowest n limbs. */
+         MPN_DECR_U (rp, 2*n, cy);
+       }
+#undef a0
+#undef a1
+#undef xp
+#undef sp1
+    }
+}
+
+mp_size_t
+mpn_sqrmod_bnm1_next_size (mp_size_t n)
+{
+  mp_size_t nh;
+
+  if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
+    return n;
+  if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (2-1)) & (-2);
+  if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
+    return (n + (4-1)) & (-4);
+
+  nh = (n + 1) >> 1;
+
+  if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
+    return (n + (8-1)) & (-8);
+
+  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
+}
diff --git a/mpn/generic/sqrtrem.c b/mpn/generic/sqrtrem.c

new file mode 100644 (file)

index 0000000..a609a4b
--- /dev/null
+++ b/mpn/generic/sqrtrem.c
@@ -0,0 +1,342 @@
+/* mpn_sqrtrem -- square root and remainder
+
+   Contributed to the GNU project by Paul Zimmermann (most code) and
+   Torbjorn Granlund (mpn_sqrtrem1).
+
+   THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH A
+   MUTABLE INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED
+   INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
+   DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* See "Karatsuba Square Root", reference in gmp.texi.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static const unsigned short invsqrttab[384] =
+{
+  0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
+  0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
+  0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
+  0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
+  0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
+  0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
+  0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
+  0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
+  0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
+  0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
+  0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
+  0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
+  0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
+  0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
+  0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
+  0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
+  0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
+  0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
+  0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
+  0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
+  0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
+  0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
+  0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
+  0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
+  0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
+  0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
+  0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
+  0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
+  0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
+  0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
+  0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
+  0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
+  0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
+  0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
+  0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
+  0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
+  0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
+  0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
+  0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
+  0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
+  0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+  0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+  0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+  0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
+  0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
+  0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
+  0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
+  0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100  /* sqrt(1/1f8)..sqrt(1/1ff) */
+};
+
+/* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */
+
+#if GMP_NUMB_BITS > 32
+#define MAGIC CNST_LIMB(0x10000000000) /* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
+#else
+#define MAGIC CNST_LIMB(0x100000)              /* 0xfee6f < MAGIC < 0x29cbc8 */
+#endif
+
+static mp_limb_t
+mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
+{
+#if GMP_NUMB_BITS > 32
+  mp_limb_t a1;
+#endif
+  mp_limb_t x0, t2, t, x2;
+  unsigned abits;
+
+  ASSERT_ALWAYS (GMP_NAIL_BITS == 0);
+  ASSERT_ALWAYS (GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64);
+  ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);
+
+  /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
+     since we can do the former without division.  As part of the last
+     iteration convert from 1/sqrt(a) to sqrt(a).  */
+
+  abits = a0 >> (GMP_LIMB_BITS - 1 - 8);       /* extract bits for table lookup */
+  x0 = invsqrttab[abits - 0x80];               /* initial 1/sqrt(a) */
+
+  /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
+
+#if GMP_NUMB_BITS > 32
+  a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
+  t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000  - a1 * x0 * x0) >> 16;
+  x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
+
+  /* x0 is now an 16 bits approximation of 1/sqrt(a0) */
+
+  t2 = x0 * (a0 >> (32-8));
+  t = t2 >> 25;
+  t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
+  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
+  x0 >>= 32;
+#else
+  t2 = x0 * (a0 >> (16-8));
+  t = t2 >> 13;
+  t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
+  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
+  x0 >>= 16;
+#endif
+
+  /* x0 is now a full limb approximation of sqrt(a0) */
+
+  x2 = x0 * x0;
+  if (x2 + 2*x0 <= a0 - 1)
+    {
+      x2 += 2*x0 + 1;
+      x0++;
+    }
+
+  *rp = a0 - x2;
+  return x0;
+}
+
+
+#define Prec (GMP_NUMB_BITS >> 1)
+
+/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
+   return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
+static mp_limb_t
+mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
+{
+  mp_limb_t qhl, q, u, np0, sp0, rp0, q2;
+  int cc;
+
+  ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
+
+  np0 = np[0];
+  sp0 = mpn_sqrtrem1 (rp, np[1]);
+  qhl = 0;
+  rp0 = rp[0];
+  while (rp0 >= sp0)
+    {
+      qhl++;
+      rp0 -= sp0;
+    }
+  /* now rp0 < sp0 < 2^Prec */
+  rp0 = (rp0 << Prec) + (np0 >> Prec);
+  u = 2 * sp0;
+  q = rp0 / u;
+  u = rp0 - q * u;
+  q += (qhl & 1) << (Prec - 1);
+  qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
+  /* now we have (initial rp0)<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp0) + u */
+  sp0 = ((sp0 + qhl) << Prec) + q;
+  cc = u >> Prec;
+  rp0 = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
+  /* subtract q * q or qhl*2^(2*Prec) from rp */
+  q2 = q * q;
+  cc -= (rp0 < q2) + qhl;
+  rp0 -= q2;
+  /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
+  if (cc < 0)
+    {
+      if (sp0 != 0)
+       {
+         rp0 += sp0;
+         cc += rp0 < sp0;
+       }
+      else
+       cc++;
+      --sp0;
+      rp0 += sp0;
+      cc += rp0 < sp0;
+    }
+
+  rp[0] = rp0;
+  sp[0] = sp0;
+  return cc;
+}
+
+/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
+   and in {np, n} the low n limbs of the remainder, returns the high
+   limb of the remainder (which is 0 or 1).
+   Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
+   where B=2^GMP_NUMB_BITS.  */
+static mp_limb_t
+mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
+{
+  mp_limb_t q;                 /* carry out of {sp, n} */
+  int c, b;                    /* carry out of remainder */
+  mp_size_t l, h;
+
+  ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
+
+  if (n == 1)
+    c = mpn_sqrtrem2 (sp, np, np);
+  else
+    {
+      l = n / 2;
+      h = n - l;
+      q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
+      if (q != 0)
+       mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
+      q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
+      c = sp[0] & 1;
+      mpn_rshift (sp, sp, l, 1);
+      sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
+      q >>= 1;
+      if (c != 0)
+       c = mpn_add_n (np + l, np + l, sp + l, h);
+      mpn_sqr (np + n, sp, l);
+      b = q + mpn_sub_n (np, np, np + n, 2 * l);
+      c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
+      q = mpn_add_1 (sp + l, sp + l, h, q);
+
+      if (c < 0)
+       {
+         c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
+         c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
+         q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
+       }
+    }
+
+  return c;
+}
+
+
+mp_size_t
+mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
+{
+  mp_limb_t *tp, s0[1], cc, high, rl;
+  int c;
+  mp_size_t rn, tn;
+  TMP_DECL;
+
+  ASSERT (nn >= 0);
+  ASSERT_MPN (np, nn);
+
+  /* If OP is zero, both results are zero.  */
+  if (nn == 0)
+    return 0;
+
+  ASSERT (np[nn - 1] != 0);
+  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
+  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
+  ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
+
+  high = np[nn - 1];
+  if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
+    {
+      mp_limb_t r;
+      sp[0] = mpn_sqrtrem1 (&r, high);
+      if (rp != NULL)
+       rp[0] = r;
+      return r != 0;
+    }
+  count_leading_zeros (c, high);
+  c -= GMP_NAIL_BITS;
+
+  c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
+  tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
+
+  TMP_MARK;
+  if (nn % 2 != 0 || c > 0)
+    {
+      tp = TMP_ALLOC_LIMBS (2 * tn);
+      tp[0] = 0;            /* needed only when 2*tn > nn, but saves a test */
+      if (c != 0)
+       mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
+      else
+       MPN_COPY (tp + 2 * tn - nn, np, nn);
+      rl = mpn_dc_sqrtrem (sp, tp, tn);
+      /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
+        thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
+      c += (nn % 2) * GMP_NUMB_BITS / 2;               /* c now represents k */
+      s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1);      /* S mod 2^k */
+      rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);      /* R = R + 2*s0*S */
+      cc = mpn_submul_1 (tp, s0, 1, s0[0]);
+      rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
+      mpn_rshift (sp, sp, tn, c);
+      tp[tn] = rl;
+      if (rp == NULL)
+       rp = tp;
+      c = c << 1;
+      if (c < GMP_NUMB_BITS)
+       tn++;
+      else
+       {
+         tp++;
+         c -= GMP_NUMB_BITS;
+       }
+      if (c != 0)
+       mpn_rshift (rp, tp, tn, c);
+      else
+       MPN_COPY_INCR (rp, tp, tn);
+      rn = tn;
+    }
+  else
+    {
+      if (rp == NULL)
+       rp = TMP_ALLOC_LIMBS (nn);
+      if (rp != np)
+       MPN_COPY (rp, np, nn);
+      rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
+    }
+
+  MPN_NORMALIZE (rp, rn);
+
+  TMP_FREE;
+  return rn;
+}
diff --git a/mpn/generic/sub.c b/mpn/generic/sub.c

new file mode 100644 (file)

index 0000000..ada3e91
--- /dev/null
+++ b/mpn/generic/sub.c
@@ -0,0 +1,23 @@
+/* mpn_sub - subtract mpn from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_sub 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpn/generic/sub_1.c b/mpn/generic/sub_1.c

new file mode 100644 (file)

index 0000000..4ed2eab
--- /dev/null
+++ b/mpn/generic/sub_1.c
@@ -0,0 +1,23 @@
+/* mpn_sub_1 - subtract limb from mpn.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_sub_1 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpn/generic/sub_n.c b/mpn/generic/sub_n.c

new file mode 100644 (file)

index 0000000..3c2ed57
--- /dev/null
+++ b/mpn/generic/sub_n.c
@@ -0,0 +1,80 @@
+/* mpn_sub_n -- Subtract equal length limb vectors.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t ul, vl, rl, cy;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++;
+      rl = ul - vl - cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
diff --git a/mpn/generic/subcnd_n.c b/mpn/generic/subcnd_n.c

new file mode 100644 (file)

index 0000000..0dcc456
--- /dev/null
+++ b/mpn/generic/subcnd_n.c
@@ -0,0 +1,85 @@
+/* mpn_subcnd_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, rl, cy, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+      rl = ul - vl - cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
diff --git a/mpn/generic/submul_1.c b/mpn/generic/submul_1.c

new file mode 100644 (file)

index 0000000..3e8e743
--- /dev/null
+++ b/mpn/generic/submul_1.c
@@ -0,0 +1,129 @@
+/* mpn_submul_1 -- multiply the N long limb vector pointed to by UP by VL,
+   subtract the N least significant limbs of the product from the limb
+   vector pointed to by RP.  Return the most significant limb of the
+   product, adjusted for carry-out from the subtraction.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl - lpl;
+      cl += lpl > rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS == 1
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, cl, xl, c1, c2, c3;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+      rl = *rp;
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      SUBC_LIMB (c1, xl, rl, prev_hpl);
+      SUBC_LIMB (c2, xl, xl, lpl);
+      SUBC_LIMB (c3, xl, xl, cl);
+      cl = c1 + c2 + c3;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl + cl;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 2
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, xw, cl, xl;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT_MPN (rp, n);
+  ASSERT_MPN (up, n);
+  ASSERT_LIMB (vl);
+
+  shifted_vl = vl << GMP_NAIL_BITS;
+  cl = 0;
+  prev_hpl = 0;
+  do
+    {
+      ul = *up++;
+      rl = *rp;
+      umul_ppmm (hpl, lpl, ul, shifted_vl);
+      lpl >>= GMP_NAIL_BITS;
+      xw = rl - (prev_hpl + lpl) + cl;
+      cl = (mp_limb_signed_t) xw >> GMP_NUMB_BITS; /* FIXME: non-portable */
+      xl = xw & GMP_NUMB_MASK;
+      *rp++ = xl;
+      prev_hpl = hpl;
+    }
+  while (--n != 0);
+
+  return prev_hpl - cl;
+}
+
+#endif
diff --git a/mpn/generic/tdiv_qr.c b/mpn/generic/tdiv_qr.c

new file mode 100644 (file)

index 0000000..62d28a0
--- /dev/null
+++ b/mpn/generic/tdiv_qr.c
@@ -0,0 +1,378 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
+   qxn is non-zero, generate that many fraction limbs and append them after the
+   other quotient limbs, and update the remainder accordingly.  The input
+   operands are unaffected.
+
+   Preconditions:
+   1. The most significant limb of of the divisor must be non-zero.
+   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
+
+   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+   complexity of multiplication.
+
+Copyright 1997, 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+            mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  ASSERT_ALWAYS (qxn == 0);
+
+  ASSERT (nn >= 0);
+  ASSERT (dn >= 0);
+  ASSERT (dn == 0 || dp[dn - 1] != 0);
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
+
+  switch (dn)
+    {
+    case 0:
+      DIVIDE_BY_ZERO;
+
+    case 1:
+      {
+       rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+       return;
+      }
+
+    case 2:
+      {
+       mp_ptr n2p, d2p;
+       mp_limb_t qhl, cy;
+       TMP_DECL;
+       TMP_MARK;
+       if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
+         {
+           int cnt;
+           mp_limb_t dtmp[2];
+           count_leading_zeros (cnt, dp[1]);
+           cnt -= GMP_NAIL_BITS;
+           d2p = dtmp;
+           d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
+           d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
+           n2p = TMP_ALLOC_LIMBS (nn + 1);
+           cy = mpn_lshift (n2p, np, nn, cnt);
+           n2p[nn] = cy;
+           qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+           if (cy == 0)
+             qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
+           rp[0] = (n2p[0] >> cnt)
+             | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
+           rp[1] = (n2p[1] >> cnt);
+         }
+       else
+         {
+           d2p = (mp_ptr) dp;
+           n2p = TMP_ALLOC_LIMBS (nn);
+           MPN_COPY (n2p, np, nn);
+           qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+           qp[nn - 2] = qhl;   /* always store nn-2+1 quotient limbs */
+           rp[0] = n2p[0];
+           rp[1] = n2p[1];
+         }
+       TMP_FREE;
+       return;
+      }
+
+    default:
+      {
+       int adjust;
+       gmp_pi1_t dinv;
+       TMP_DECL;
+       TMP_MARK;
+       adjust = np[nn - 1] >= dp[dn - 1];      /* conservative tests for quotient size */
+       if (nn + adjust >= 2 * dn)
+         {
+           mp_ptr n2p, d2p;
+           mp_limb_t cy;
+           int cnt;
+
+           qp[nn - dn] = 0;                      /* zero high quotient limb */
+           if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
+             {
+               count_leading_zeros (cnt, dp[dn - 1]);
+               cnt -= GMP_NAIL_BITS;
+               d2p = TMP_ALLOC_LIMBS (dn);
+               mpn_lshift (d2p, dp, dn, cnt);
+               n2p = TMP_ALLOC_LIMBS (nn + 1);
+               cy = mpn_lshift (n2p, np, nn, cnt);
+               n2p[nn] = cy;
+               nn += adjust;
+             }
+           else
+             {
+               cnt = 0;
+               d2p = (mp_ptr) dp;
+               n2p = TMP_ALLOC_LIMBS (nn + 1);
+               MPN_COPY (n2p, np, nn);
+               n2p[nn] = 0;
+               nn += adjust;
+             }
+
+           invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
+           if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
+             mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
+           else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
+                    BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+                    (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+                    + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
+             mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
+           else
+             {
+               mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+               mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+               mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
+               n2p = rp;
+             }
+
+           if (cnt != 0)
+             mpn_rshift (rp, n2p, dn, cnt);
+           else
+             MPN_COPY (rp, n2p, dn);
+           TMP_FREE;
+           return;
+         }
+
+       /* When we come here, the numerator/partial remainder is less
+          than twice the size of the denominator.  */
+
+         {
+           /* Problem:
+
+              Divide a numerator N with nn limbs by a denominator D with dn
+              limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
+              compared to dn, conventional division algorithms perform poorly.
+              We want an algorithm that has an expected running time that is
+              dependent only on qn.
+
+              Algorithm (very informally stated):
+
+              1) Divide the 2 x qn most significant limbs from the numerator
+                 by the qn most significant limbs from the denominator.  Call
+                 the result qest.  This is either the correct quotient, but
+                 might be 1 or 2 too large.  Compute the remainder from the
+                 division.  (This step is implemented by a mpn_divrem call.)
+
+              2) Is the most significant limb from the remainder < p, where p
+                 is the product of the most significant limb from the quotient
+                 and the next(d)?  (Next(d) denotes the next ignored limb from
+                 the denominator.)  If it is, decrement qest, and adjust the
+                 remainder accordingly.
+
+              3) Is the remainder >= qest?  If it is, qest is the desired
+                 quotient.  The algorithm terminates.
+
+              4) Subtract qest x next(d) from the remainder.  If there is
+                 borrow out, decrement qest, and adjust the remainder
+                 accordingly.
+
+              5) Skip one word from the denominator (i.e., let next(d) denote
+                 the next less significant limb.  */
+
+           mp_size_t qn;
+           mp_ptr n2p, d2p;
+           mp_ptr tp;
+           mp_limb_t cy;
+           mp_size_t in, rn;
+           mp_limb_t quotient_too_large;
+           unsigned int cnt;
+
+           qn = nn - dn;
+           qp[qn] = 0;                         /* zero high quotient limb */
+           qn += adjust;                       /* qn cannot become bigger */
+
+           if (qn == 0)
+             {
+               MPN_COPY (rp, np, dn);
+               TMP_FREE;
+               return;
+             }
+
+           in = dn - qn;               /* (at least partially) ignored # of limbs in ops */
+           /* Normalize denominator by shifting it to the left such that its
+              most significant bit is set.  Then shift the numerator the same
+              amount, to mathematically preserve quotient.  */
+           if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
+             {
+               count_leading_zeros (cnt, dp[dn - 1]);
+               cnt -= GMP_NAIL_BITS;
+
+               d2p = TMP_ALLOC_LIMBS (qn);
+               mpn_lshift (d2p, dp + in, qn, cnt);
+               d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
+
+               n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+               cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+               if (adjust)
+                 {
+                   n2p[2 * qn] = cy;
+                   n2p++;
+                 }
+               else
+                 {
+                   n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
+                 }
+             }
+           else
+             {
+               cnt = 0;
+               d2p = (mp_ptr) dp + in;
+
+               n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+               MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+               if (adjust)
+                 {
+                   n2p[2 * qn] = 0;
+                   n2p++;
+                 }
+             }
+
+           /* Get an approximate quotient using the extracted operands.  */
+           if (qn == 1)
+             {
+               mp_limb_t q0, r0;
+               udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
+               n2p[0] = r0 >> GMP_NAIL_BITS;
+               qp[0] = q0;
+             }
+           else if (qn == 2)
+             mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
+           else
+             {
+               invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
+               if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+                 mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
+               else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
+                 mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
+               else
+                 {
+                   mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
+                   mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+                   mp_ptr r2p = rp;
+                   if (np == r2p)      /* If N and R share space, put ... */
+                     r2p += nn - qn;   /* intermediate remainder at N's upper end. */
+                   mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
+                   MPN_COPY (n2p, r2p, qn);
+                 }
+             }
+
+           rn = qn;
+           /* Multiply the first ignored divisor limb by the most significant
+              quotient limb.  If that product is > the partial remainder's
+              most significant limb, we know the quotient is too large.  This
+              test quickly catches most cases where the quotient is too large;
+              it catches all cases where the quotient is 2 too large.  */
+           {
+             mp_limb_t dl, x;
+             mp_limb_t h, dummy;
+
+             if (in - 2 < 0)
+               dl = 0;
+             else
+               dl = dp[in - 2];
+
+#if GMP_NAIL_BITS == 0
+             x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
+#else
+             x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
+             if (cnt != 0)
+               x |= dl >> (GMP_NUMB_BITS - cnt);
+#endif
+             umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
+
+             if (n2p[qn - 1] < h)
+               {
+                 mp_limb_t cy;
+
+                 mpn_decr_u (qp, (mp_limb_t) 1);
+                 cy = mpn_add_n (n2p, n2p, d2p, qn);
+                 if (cy)
+                   {
+                     /* The partial remainder is safely large.  */
+                     n2p[qn] = cy;
+                     ++rn;
+                   }
+               }
+           }
+
+           quotient_too_large = 0;
+           if (cnt != 0)
+             {
+               mp_limb_t cy1, cy2;
+
+               /* Append partially used numerator limb to partial remainder.  */
+               cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
+               n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
+
+               /* Update partial remainder with partially used divisor limb.  */
+               cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
+               if (qn != rn)
+                 {
+                   ASSERT_ALWAYS (n2p[qn] >= cy2);
+                   n2p[qn] -= cy2;
+                 }
+               else
+                 {
+                   n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
+
+                   quotient_too_large = (cy1 < cy2);
+                   ++rn;
+                 }
+               --in;
+             }
+           /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
+
+           tp = TMP_ALLOC_LIMBS (dn);
+
+           if (in < qn)
+             {
+               if (in == 0)
+                 {
+                   MPN_COPY (rp, n2p, rn);
+                   ASSERT_ALWAYS (rn == dn);
+                   goto foo;
+                 }
+               mpn_mul (tp, qp, qn, dp, in);
+             }
+           else
+             mpn_mul (tp, dp, in, qp, qn);
+
+           cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+           MPN_COPY (rp + in, n2p, dn - in);
+           quotient_too_large |= cy;
+           cy = mpn_sub_n (rp, np, tp, in);
+           cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+           quotient_too_large |= cy;
+         foo:
+           if (quotient_too_large)
+             {
+               mpn_decr_u (qp, (mp_limb_t) 1);
+               mpn_add_n (rp, rp, dp, dn);
+             }
+         }
+       TMP_FREE;
+       return;
+      }
+    }
+}
diff --git a/mpn/generic/toom22_mul.c b/mpn/generic/toom22_mul.c

new file mode 100644 (file)

index 0000000..fc296df
--- /dev/null
+++ b/mpn/generic/toom22_mul.c
@@ -0,0 +1,198 @@
+/* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
+   accurately, bn <= an < 2bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+  <-s--><--n-->
+   ____ ______
+  |_a1_|___a0_|
+   |b1_|___b0_|
+   <-t-><--n-->
+
+  v0  =  a0     * b0       #   A(0)*B(0)
+  vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
+  vinf=      a1 *     b1   # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22                                               \
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    if (! MAYBE_mul_toom22                                             \
+       || BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+      mpn_mul_basecase (p, a, n, b, n);                                        \
+    else                                                               \
+      mpn_toom22_mul (p, a, n, b, n, ws);                              \
+  } while (0)
+
+/* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
+   relative unbalance will become a larger and larger relative unbalance with
+   each recursion (the difference s-t will be invariant over recursive calls).
+   Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
+#define TOOM22_MUL_REC(p, a, an, b, bn, ws)                            \
+  do {                                                                 \
+    if (! MAYBE_mul_toom22                                             \
+       || BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))                  \
+      mpn_mul_basecase (p, a, an, b, bn);                              \
+    else if (4 * an < 5 * bn)                                          \
+      mpn_toom22_mul (p, a, an, b, bn, ws);                            \
+    else                                                               \
+      mpn_toom32_mul (p, a, an, b, bn, ws);                            \
+  } while (0)
+
+void
+mpn_toom22_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, cy2;
+  mp_ptr asm1;
+  mp_ptr bsm1;
+
+#define a0  ap
+#define a1  (ap + n)
+#define b0  bp
+#define b1  (bp + n)
+
+  s = an >> 1;
+  n = an - s;
+  t = bn - n;
+
+  ASSERT (an >= bn);
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= s);
+
+  asm1 = pp;
+  bsm1 = pp + n;
+
+  vm1_neg = 0;
+
+  /* Compute asm1.  */
+  if (s == n)
+    {
+      if (mpn_cmp (a0, a1, n) < 0)
+       {
+         mpn_sub_n (asm1, a1, a0, n);
+         vm1_neg = 1;
+       }
+      else
+       {
+         mpn_sub_n (asm1, a0, a1, n);
+       }
+    }
+  else
+    {
+      if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
+       {
+         mpn_sub_n (asm1, a1, a0, s);
+         MPN_ZERO (asm1 + s, n - s);
+         vm1_neg = 1;
+       }
+      else
+       {
+         mpn_sub (asm1, a0, n, a1, s);
+       }
+    }
+
+  /* Compute bsm1.  */
+  if (t == n)
+    {
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, n);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         mpn_sub_n (bsm1, b0, b1, n);
+       }
+    }
+  else
+    {
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, t);
+         MPN_ZERO (bsm1 + t, n - t);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         mpn_sub (bsm1, b0, n, b1, t);
+       }
+    }
+
+#define v0     pp                              /* 2n */
+#define vinf   (pp + 2 * n)                    /* s+t */
+#define vm1    scratch                         /* 2n */
+#define scratch_out    scratch + 2 * n
+
+  /* vm1, 2n limbs */
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+
+  if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
+  else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
+
+  /* v0, 2n limbs */
+  TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
+
+  /* H(v0) + L(vinf) */
+  cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+  /* L(v0) + H(v0) */
+  cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+  /* L(vinf) + H(vinf) */
+  cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
+
+  if (vm1_neg)
+    cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
+  else
+    cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+
+  ASSERT (cy + 1  <= 3);
+  ASSERT (cy2 <= 2);
+
+  mpn_incr_u (pp + 2 * n, cy2);
+  if (LIKELY (cy <= 2))
+    mpn_incr_u (pp + 3 * n, cy);
+  else
+    mpn_decr_u (pp + 3 * n, 1);
+}
diff --git a/mpn/generic/toom2_sqr.c b/mpn/generic/toom2_sqr.c

new file mode 100644 (file)

index 0000000..912feda
--- /dev/null
+++ b/mpn/generic/toom2_sqr.c
@@ -0,0 +1,134 @@
+/* mpn_toom2_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +inf
+
+  <-s--><--n-->
+   ____ ______
+  |_a1_|___a0_|
+
+  v0  =  a0     ^2  #   A(0)^2
+  vm1 = (a0- a1)^2  #  A(-1)^2
+  vinf=      a1 ^2  # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_toom2   1
+#else
+#define MAYBE_sqr_toom2                                                        \
+  (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
+#endif
+
+#define TOOM2_SQR_REC(p, a, n, ws)                                     \
+  do {                                                                 \
+    if (! MAYBE_sqr_toom2                                              \
+       || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))                    \
+      mpn_sqr_basecase (p, a, n);                                      \
+    else                                                               \
+      mpn_toom2_sqr (p, a, n, ws);                                     \
+  } while (0)
+
+void
+mpn_toom2_sqr (mp_ptr pp,
+              mp_srcptr ap, mp_size_t an,
+              mp_ptr scratch)
+{
+  mp_size_t n, s;
+  mp_limb_t cy, cy2;
+  mp_ptr asm1;
+
+#define a0  ap
+#define a1  (ap + n)
+
+  s = an >> 1;
+  n = an - s;
+
+  ASSERT (0 < s && s <= n);
+
+  asm1 = pp;
+
+  /* Compute asm1.  */
+  if (s == n)
+    {
+      if (mpn_cmp (a0, a1, n) < 0)
+       {
+         mpn_sub_n (asm1, a1, a0, n);
+       }
+      else
+       {
+         mpn_sub_n (asm1, a0, a1, n);
+       }
+    }
+  else
+    {
+      if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
+       {
+         mpn_sub_n (asm1, a1, a0, s);
+         MPN_ZERO (asm1 + s, n - s);
+       }
+      else
+       {
+         mpn_sub (asm1, a0, n, a1, s);
+       }
+    }
+
+#define v0     pp                              /* 2n */
+#define vinf   (pp + 2 * n)                    /* s+s */
+#define vm1    scratch                         /* 2n */
+#define scratch_out    scratch + 2 * n
+
+  /* vm1, 2n limbs */
+  TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
+
+  /* vinf, s+s limbs */
+  TOOM2_SQR_REC (vinf, a1, s, scratch_out);
+
+  /* v0, 2n limbs */
+  TOOM2_SQR_REC (v0, ap, n, scratch_out);
+
+  /* H(v0) + L(vinf) */
+  cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
+
+  /* L(v0) + H(v0) */
+  cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
+
+  /* L(vinf) + H(vinf) */
+  cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
+
+  cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
+
+  ASSERT (cy + 1  <= 3);
+  ASSERT (cy2 <= 2);
+
+  mpn_incr_u (pp + 2 * n, cy2);
+  if (LIKELY (cy <= 2))
+    mpn_incr_u (pp + 3 * n, cy);
+  else
+    mpn_decr_u (pp + 3 * n, 1);
+}
diff --git a/mpn/generic/toom32_mul.c b/mpn/generic/toom32_mul.c

new file mode 100644 (file)

index 0000000..2f61fad
--- /dev/null
+++ b/mpn/generic/toom32_mul.c
@@ -0,0 +1,312 @@
+/* mpn_toom32_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 1.5
+   times as large as bn.  Or more accurately, bn < an < 3bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Improvements by Marco Bodrato and Niels Möller.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +inf
+
+  <-s-><--n--><--n-->
+   ___ ______ ______
+  |a2_|___a1_|___a0_|
+       |_b1_|___b0_|
+       <-t--><--n-->
+
+  v0  =  a0         * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1) #   A(1)*B(1)      ah  <= 2  bh <= 1
+  vm1 = (a0- a1+ a2)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh = 0
+  vinf=          a2 *     b1  # A(inf)*B(inf)
+*/
+
+#define TOOM32_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    mpn_mul_n (p, a, b, n);                                            \
+  } while (0)
+
+void
+mpn_toom32_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy;
+  int hi;
+  mp_limb_t ap1_hi, bp1_hi;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define b0  bp
+#define b1  (bp + n)
+
+  /* Required, to ensure that s + t >= n. */
+  ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
+
+  n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
+
+  s = an - 2 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (s + t >= n);
+
+  /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
+#define ap1 (pp)               /* n, most significant limb in ap1_hi */
+#define bp1 (pp + n)           /* n, most significant bit in bp1_hi */
+#define am1 (pp + 2*n)         /* n, most significant bit in hi */
+#define bm1 (pp + 3*n)         /* n */
+#define v1 (scratch)           /* 2n + 1 */
+#define vm1 (pp)               /* 2n + 1 */
+#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
+
+  /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
+
+  /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
+
+  /* Compute ap1 = a0 + a1 + a3, am1 = a0 - a1 + a3 */
+  ap1_hi = mpn_add (ap1, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+    {
+      ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
+      hi = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
+      hi = ap1_hi - (cy & 1);
+      ap1_hi += (cy >> 1);
+      vm1_neg = 0;
+    }
+#else
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
+      hi = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
+      vm1_neg = 0;
+    }
+  ap1_hi += mpn_add_n (ap1, ap1, a1, n);
+#endif
+
+  /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
+       }
+      bp1_hi = cy >> 1;
+#else
+      bp1_hi = mpn_add_n (bp1, b0, b1, n);
+
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, n));
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         ASSERT_NOCARRY (mpn_sub_n (bm1, b0, b1, n));
+       }
+#endif
+    }
+  else
+    {
+      /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
+      bp1_hi = mpn_add (bp1, b0, n, b1, t);
+
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+       {
+         ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, t));
+         MPN_ZERO (bm1 + t, n - t);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         ASSERT_NOCARRY (mpn_sub (bm1, b0, n, b1, t));
+       }
+    }
+
+  TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
+  if (ap1_hi == 1)
+    {
+      cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n);
+    }
+  else if (ap1_hi == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
+#else
+      cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bp1_hi != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, ap1, n);
+  v1[2 * n] = cy;
+
+  TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out);
+  if (hi)
+    hi = mpn_add_n (vm1+n, vm1+n, bm1, n);
+
+  vm1[2*n] = hi;
+
+  /* v1 <-- (v1 + vm1) / 2 = x0 + x2 */
+  if (vm1_neg)
+    {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (v1, v1, vm1, 2*n+1);
+#else
+      mpn_sub_n (v1, v1, vm1, 2*n+1);
+      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (v1, v1, vm1, 2*n+1);
+#else
+      mpn_add_n (v1, v1, vm1, 2*n+1);
+      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+#endif
+    }
+
+  /* We get x1 + x3 = (x0 + x2) - (x0 - x1 + x2 - x3), and hence
+
+     y = x1 + x3 + (x0 + x2) * B
+       = (x0 + x2) * B + (x0 + x2) - vm1.
+
+     y is 3*n + 1 limbs, y = y0 + y1 B + y2 B^2. We store them as
+     follows: y0 at scratch, y1 at pp + 2*n, and y2 at scratch + n
+     (already in place, except for carry propagation).
+
+     We thus add
+
+   B^3  B^2   B    1
+    |    |    |    |
+   +-----+----+
+ + |  x0 + x2 |
+   +----+-----+----+
+ +      |  x0 + x2 |
+       +----------+
+ -      |  vm1     |
+ --+----++----+----+-
+   | y2  | y1 | y0 |
+   +-----+----+----+
+
+  Since we store y0 at the same location as the low half of x0 + x2, we
+  need to do the middle sum first. */
+
+  hi = vm1[2*n];
+  cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
+  MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
+
+  /* FIXME: Can we get rid of this second vm1_neg conditional by
+     swapping the location of +1 and -1 values? */
+  if (vm1_neg)
+    {
+      cy = mpn_add_n (v1, v1, vm1, n);
+      hi += mpn_add_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+      MPN_INCR_U (v1 + n, n+1, hi);
+    }
+  else
+    {
+      cy = mpn_sub_n (v1, v1, vm1, n);
+      hi += mpn_sub_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
+      MPN_DECR_U (v1 + n, n+1, hi);
+    }
+
+  TOOM32_MUL_N_REC (pp, a0, b0, n, scratch_out);
+  /* vinf, s+t limbs.  Use mpn_mul for now, to handle unbalanced operands */
+  if (s > t)  mpn_mul (pp+3*n, a2, s, b1, t);
+  else        mpn_mul (pp+3*n, b1, t, a2, s);
+
+  /* Remaining interpolation.
+
+     y * B + x0 + x3 B^3 - x0 B^2 - x3 B
+     = (x1 + x3) B + (x0 + x2) B^2 + x0 + x3 B^3 - x0 B^2 - x3 B
+     = y0 B + y1 B^2 + y3 B^3 + Lx0 + H x0 B
+       + L x3 B^3 + H x3 B^4 - Lx0 B^2 - H x0 B^3 - L x3 B - H x3 B^2
+     = L x0 + (y0 + H x0 - L x3) B + (y1 - L x0 - H x3) B^2
+       + (y2 - (H x0 - L x3)) B^3 + H x3 B^4
+
+         B^4       B^3       B^2        B         1
+ |         |         |         |         |         |
+   +-------+                   +---------+---------+
+   |  Hx3  |                   | Hx0-Lx3 |    Lx0  |
+   +------+----------+---------+---------+---------+
+         |    y2    |  y1     |   y0    |
+         ++---------+---------+---------+
+         -| Hx0-Lx3 | - Lx0   |
+          +---------+---------+
+                     | - Hx3  |
+                     +--------+
+
+    We must take into account the carry from Hx0 - Lx3.
+  */
+
+  cy = mpn_sub_n (pp + n, pp + n, pp+3*n, n);
+  hi = scratch[2*n] + cy;
+
+  cy = mpn_sub_nc (pp + 2*n, pp + 2*n, pp, n, cy);
+  hi -= mpn_sub_nc (pp + 3*n, scratch + n, pp + n, n, cy);
+
+  hi += mpn_add (pp + n, pp + n, 3*n, scratch, n);
+
+  /* FIXME: Is support for s + t == n needed? */
+  if (LIKELY (s + t > n))
+    {
+      hi -= mpn_sub (pp + 2*n, pp + 2*n, 2*n, pp + 4*n, s+t-n);
+
+      if (hi < 0)
+       MPN_DECR_U (pp + 4*n, s+t-n, -hi);
+      else
+       MPN_INCR_U (pp + 4*n, s+t-n, hi);
+    }
+  else
+    ASSERT (hi == 0);
+}
diff --git a/mpn/generic/toom33_mul.c b/mpn/generic/toom33_mul.c

new file mode 100644 (file)

index 0000000..cb30df6
--- /dev/null
+++ b/mpn/generic/toom33_mul.c
@@ -0,0 +1,304 @@
+/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
+   size.  Or more accurately, bn <= an < (3/2)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+   |b3_|___b2_|___b1_|___b0_|
+   <-t-><--n--><--n--><--n-->
+
+  v0  =  a0         * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
+  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
+  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
+  vinf=          a2 *         b2  # A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom33   1
+#else
+#define MAYBE_mul_basecase                                             \
+  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom33                                               \
+  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
+#endif
+
+/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
+   multiplication at the infinity point. We may have
+   MAYBE_mul_basecase == 0, and still get s just below
+   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
+   s == 1 and mpn_toom22_mul will crash.
+*/
+
+#define TOOM33_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    if (MAYBE_mul_basecase                                             \
+       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+      mpn_mul_basecase (p, a, n, b, n);                                        \
+    else if (! MAYBE_mul_toom33                                                \
+            || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))              \
+      mpn_toom22_mul (p, a, n, b, n, ws);                              \
+    else                                                               \
+      mpn_toom33_mul (p, a, n, b, n, ws);                              \
+  } while (0)
+
+void
+mpn_toom33_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, vinf0;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2;
+  mp_ptr bs1, bsm1, bs2;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+
+  n = (an + 2) / (size_t) 3;
+
+  s = an - 2 * n;
+  t = bn - 2 * n;
+
+  ASSERT (an >= bn);
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  as1  = scratch + 4 * n + 4;
+  asm1 = scratch + 2 * n + 2;
+  as2 = pp + n + 1;
+
+  bs1 = pp;
+  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
+  bs2 = pp + 2 * n + 2;
+
+  gp = scratch;
+
+  vm1_neg = 0;
+
+  /* Compute as1 and asm1.  */
+  cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      as1[n] = cy + (cy2 >> 1);
+      asm1[n] = cy - (cy2 & 1);
+    }
+#else
+  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      mpn_sub_n (asm1, a1, gp, n);
+      asm1[n] = 0;
+      vm1_neg = 1;
+    }
+  else
+    {
+      cy -= mpn_sub_n (asm1, gp, a1, n);
+      asm1[n] = cy;
+    }
+#endif
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a1, a2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+  as2[n] = cy;
+
+  /* Compute bs1 and bsm1.  */
+  cy = mpn_add (gp, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
+      bs1[n] = cy >> 1;
+      bsm1[n] = 0;
+      vm1_neg ^= 1;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
+      bs1[n] = cy + (cy2 >> 1);
+      bsm1[n] = cy - (cy2 & 1);
+    }
+#else
+  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
+  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, gp, n);
+      bsm1[n] = 0;
+      vm1_neg ^= 1;
+    }
+  else
+    {
+      cy -= mpn_sub_n (bsm1, gp, b1, n);
+      bsm1[n] = cy;
+    }
+#endif
+
+  /* Compute bs2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (bs2, b2, bs1, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+  cy += bs1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
+#else
+  cy  = mpn_add_n (bs2, bs1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
+  cy += bs1[n];
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy -= mpn_sub_n (bs2, bs2, b0, n);
+#endif
+#endif
+  bs2[n] = cy;
+
+  ASSERT (as1[n] <= 2);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 6);
+  ASSERT (bs2[n] <= 6);
+
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 4 * n)                     /* s+t */
+#define vm1   scratch                          /* 2n+1 */
+#define v2    (scratch + 2 * n + 1)            /* 2n+2 */
+#define scratch_out  (scratch + 5 * n + 5)
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  if (bsm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else
+  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
+#endif
+
+  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
+  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
+
+  vinf0 = vinf[0];                             /* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+  /* v1, 2n+1 limbs */
+  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bs1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (bs1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else
+  cy = vinf[1];
+  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
+  vinf[1] = cy;
+#endif
+
+  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);       /* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+}
diff --git a/mpn/generic/toom3_sqr.c b/mpn/generic/toom3_sqr.c

new file mode 100644 (file)

index 0000000..5824b05
--- /dev/null
+++ b/mpn/generic/toom3_sqr.c
@@ -0,0 +1,214 @@
+/* mpn_toom3_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s--><--n--><--n-->
+   ____ ______ ______
+  |_a2_|___a1_|___a0_|
+
+  v0  =  a0         ^2 #   A(0)^2
+  v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
+  vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
+  v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
+  vinf=          a2 ^2 # A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom3   1
+#else
+#define MAYBE_sqr_basecase                                             \
+  (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom3                                                        \
+  (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
+#endif
+
+#define TOOM3_SQR_REC(p, a, n, ws)                                     \
+  do {                                                                 \
+    if (MAYBE_sqr_basecase                                             \
+       && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))                    \
+      mpn_sqr_basecase (p, a, n);                                      \
+    else if (! MAYBE_sqr_toom3                                         \
+            || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))               \
+      mpn_toom2_sqr (p, a, n, ws);                                     \
+    else                                                               \
+      mpn_toom3_sqr (p, a, n, ws);                                     \
+  } while (0)
+
+void
+mpn_toom3_sqr (mp_ptr pp,
+              mp_srcptr ap, mp_size_t an,
+              mp_ptr scratch)
+{
+  mp_size_t n, s;
+  mp_limb_t cy, vinf0;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+
+  n = (an + 2) / (size_t) 3;
+
+  s = an - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+
+  as1 = scratch + 4 * n + 4;
+  asm1 = scratch + 2 * n + 2;
+  as2 = pp + n + 1;
+
+  gp = scratch;
+
+  /* Compute as1 and asm1.  */
+  cy = mpn_add (gp, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      as1[n] = cy + (cy2 >> 1);
+      asm1[n] = cy - (cy2 & 1);
+    }
+#else
+  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
+  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
+    {
+      mpn_sub_n (asm1, a1, gp, n);
+      asm1[n] = 0;
+    }
+  else
+    {
+      cy -= mpn_sub_n (asm1, gp, a1, n);
+      asm1[n] = cy;
+    }
+#endif
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a1, a2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy = mpn_add_n (as2, a2, as1, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
+  cy += as1[n];
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy -= mpn_sub_n (as2, as2, a0, n);
+#endif
+#endif
+  as2[n] = cy;
+
+  ASSERT (as1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 4 * n)                     /* s+s */
+#define vm1   scratch                          /* 2n+1 */
+#define v2    (scratch + 2 * n + 1)            /* 2n+2 */
+#define scratch_out  (scratch + 5 * n + 5)
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  if (asm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else
+  TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
+#endif
+
+  TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */
+
+  TOOM3_SQR_REC (vinf, a2, s, scratch_out);    /* vinf, s+s limbs */
+
+  vinf0 = vinf[0];                             /* v1 overlaps with this */
+
+#ifdef SMALLER_RECURSION
+  /* v1, 2n+1 limbs */
+  TOOM3_SQR_REC (v1, as1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * as1[n] + mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
+#else
+      cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (as1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (as1[n] != 0)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else
+  cy = vinf[1];
+  TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
+  vinf[1] = cy;
+#endif
+
+  TOOM3_SQR_REC (v0, ap, n, scratch_out);      /* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
+}
diff --git a/mpn/generic/toom42_mul.c b/mpn/generic/toom42_mul.c

new file mode 100644 (file)

index 0000000..99ac175
--- /dev/null
+++ b/mpn/generic/toom42_mul.c
@@ -0,0 +1,224 @@
+/* mpn_toom42_mul -- Multiply {ap,an} and {bp,bn} where an is nominally twice
+   as large as bn.  Or more accurately, (3/2)bn < an < 4bn.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Additional improvements by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+              |_b1_|___b0_|
+              <-t--><--n-->
+
+  v0  =  a0             * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3)*(b0+ b1) #   A(1)*B(1)      ah  <= 3  bh <= 1
+  vm1 = (a0- a1+ a2- a3)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh  = 0
+  v2  = (a0+2a1+4a2+8a3)*(b0+2b1) #   A(2)*B(2)      ah  <= 14 bh <= 2
+  vinf=              a3 *     b1  # A(inf)*B(inf)
+*/
+
+#define TOOM42_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    mpn_mul_n (p, a, b, n);                                            \
+  } while (0)
+
+void
+mpn_toom42_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int vm1_neg;
+  mp_limb_t cy, vinf0;
+  mp_ptr a0_a2, a1_a3;
+  mp_ptr as1, asm1, as2;
+  mp_ptr bs1, bsm1, bs2;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
+
+  s = an - 3 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
+
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n);
+  bs2 = TMP_SALLOC_LIMBS (n + 1);
+
+  a0_a2 = pp;
+  a1_a3 = pp + n + 1;
+
+  /* Compute as1 and asm1.  */
+  vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
+
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+       }
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, n);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         mpn_sub_n (bsm1, b0, b1, n);
+       }
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, t);
+         MPN_ZERO (bsm1 + t, n - t);
+         vm1_neg ^= 1;
+       }
+      else
+       {
+         mpn_sub (bsm1, b0, n, b1, t);
+       }
+    }
+
+  /* Compute bs2, recycling bs1. bs2=bs1+b1  */
+  mpn_add (bs2, bs1, n + 1, b1, t);
+
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 1);
+/*ASSERT (bsm1[n] == 0);*/
+  ASSERT (as2[n] <= 14);
+  ASSERT (bs2[n] <= 2);
+
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 4 * n)                     /* s+t */
+#define vm1   scratch                          /* 2n+1 */
+#define v2    (scratch + 2 * n + 1)            /* 2n+2 */
+#define scratch_out    scratch + 4 * n + 4     /* Currently unused. */
+
+  /* vm1, 2n+1 limbs */
+  TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  vm1[2 * n] = cy;
+
+  TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a3, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a3, s);
+
+  vinf0 = vinf[0];                             /* v1 overlaps with this */
+
+  /* v1, 2n+1 limbs */
+  TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] == 3)
+    {
+      cy = 3 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3));
+    }
+  else
+    cy = 0;
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+  v1[2 * n] = cy;
+
+  TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);       /* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/toom43_mul.c b/mpn/generic/toom43_mul.c

new file mode 100644 (file)

index 0000000..670049c
--- /dev/null
+++ b/mpn/generic/toom43_mul.c
@@ -0,0 +1,223 @@
+/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+   times as large as bn.  Or more accurately, bn < an < 2 bn.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+       |_b2_|___b1_|___b0_|
+       <-t--><--n--><--n-->
+
+  v0  =  a0             * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
+  vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
+  v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
+  vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
+  vinf=              a3 *         b2  # A(inf)*B(inf)
+*/
+
+void
+mpn_toom43_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  enum toom6_flags flags;
+  mp_limb_t cy;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define a3  (ap + 3 * n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2 * n)
+
+  n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
+
+  s = an - 3 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  /* This is true whenever an >= 25 or bn >= 19, I think. It
+     guarantees that we can fit 5 values of size n+1 in the product
+     area. */
+  ASSERT (s+t >= 5);
+
+#define v0    pp                               /* 2n */
+#define vm1   (scratch)                                /* 2n+1 */
+#define v1    (pp + 2*n)                       /* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)            /* 2n+1 */
+#define v2    (scratch + 4 * n + 2)            /* 2n+1 */
+#define vinf  (pp + 5 * n)                     /* s+t */
+#define bs1    pp                              /* n+1 */
+#define bsm1  (scratch + 2 * n + 2)            /* n+1 */
+#define asm1  (scratch + 3 * n + 3)            /* n+1 */
+#define asm2  (scratch + 4 * n + 4)            /* n+1 */
+#define bsm2  (pp + n + 1)                     /* n+1 */
+#define bs2   (pp + 2 * n + 2)                 /* n+1 */
+#define as2   (pp + 3 * n + 3)                 /* n+1 */
+#define as1   (pp + 4 * n + 4)                 /* n+1 */
+
+  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
+     limb, because products will overwrite 2n+2 limbs. */
+
+#define a0a2  scratch
+#define b0b2  scratch
+#define a1a3  asm1
+#define b1d   bsm1
+
+  /* Compute as2 and asm2.  */
+  flags = toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3);
+
+  /* Compute bs2 and bsm2.  */
+  b1d[n] = mpn_lshift (b1d, b1, n, 1);                 /*       2b1      */
+  cy  = mpn_lshift (b0b2, b2, t, 2);                   /*  4b2           */
+  cy += mpn_add_n (b0b2, b0b2, b0, t);                 /*  4b2      + b0 */
+  if (t != n)
+    cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
+  b0b2[n] = cy;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (mpn_cmp (b0b2, b1d, n+1) < 0)
+    {
+      mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
+      flags ^= toom6_vm2_neg;
+    }
+  else
+    {
+      mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
+    }
+#else
+  mpn_add_n (bs2, b0b2, b1d, n+1);
+  if (mpn_cmp (b0b2, b1d, n+1) < 0)
+    {
+      mpn_sub_n (bsm2, b1d, b0b2, n+1);
+      flags ^= toom6_vm2_neg;
+    }
+  else
+    {
+      mpn_sub_n (bsm2, b0b2, b1d, n+1);
+    }
+#endif
+
+  /* Compute as1 and asm1.  */
+  flags ^= toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2);
+
+  /* Compute bs1 and bsm1.  */
+  bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
+      bs1[n] = cy >> 1;
+      flags ^= toom6_vm1_neg;
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
+      bs1[n] = bsm1[n] + (cy >> 1);
+      bsm1[n]-= cy & 1;
+    }
+#else
+  bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
+  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, bsm1, n);
+      flags ^= toom6_vm1_neg;
+    }
+  else
+    {
+      bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
+    }
+#endif
+
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <=14);
+  ASSERT (bs2[n] <= 6);
+  ASSERT (asm2[n] <= 9);
+  ASSERT (bsm2[n] <= 4);
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
+
+  /* vm2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
+
+  /* v2, 2n+1 limbs */
+  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
+
+  /* vinf, s+t limbs */   /* W0 */
+  if (s > t)  mpn_mul (vinf, a3, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a3, s);
+
+  /* v0, 2n limbs */
+  mpn_mul_n (v0, ap, bp, n);  /* W5 */
+
+  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+/* #undef as1 */
+/* #undef as2 */
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef b1d
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+}
diff --git a/mpn/generic/toom44_mul.c b/mpn/generic/toom44_mul.c

new file mode 100644 (file)

index 0000000..01a6053
--- /dev/null
+++ b/mpn/generic/toom44_mul.c
@@ -0,0 +1,225 @@
+/* mpn_toom44_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
+   size.  Or more accurately, bn <= an < (4/3)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+   |b3_|___b2_|___b1_|___b0_|
+   <-t-><--n--><--n--><--n-->
+
+  v0  =   a0             *  b0              #    A(0)*B(0)
+  v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
+  vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
+  v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
+  vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) #    A(2)*B(2)      ah  <= 9  |bh| <= 9
+  vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vinf=               a3 *          b2      #  A(inf)*B(inf)
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom44   1
+#else
+#define MAYBE_mul_basecase                                             \
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22                                               \
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom44                                               \
+  (MUL_FFT_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+#endif
+
+#define TOOM44_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    if (MAYBE_mul_basecase                                             \
+       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+      mpn_mul_basecase (p, a, n, b, n);                                        \
+    else if (MAYBE_mul_toom22                                          \
+            && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))              \
+      mpn_toom22_mul (p, a, n, b, n, ws);                              \
+    else if (! MAYBE_mul_toom44                                                \
+            || BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))              \
+      mpn_toom33_mul (p, a, n, b, n, ws);                              \
+    else                                                               \
+      mpn_toom44_mul (p, a, n, b, n, ws);                              \
+  } while (0)
+
+/* Use of scratch space. In the product area, we store
+
+      ___________________
+     |vinf|____|_v1_|_v0_|
+      s+t  2n-1 2n+1  2n
+
+   The other recursive products, vm1, v2, vm2, vh are stored in the
+   scratch area. When computing them, we use the product area for
+   intermediate values.
+
+   Next, we compute v1. We can store the intermediate factors at v0
+   and at vh + 2n + 2.
+
+   Finally, for v0 and vinf, factors are parts of the input operands,
+   and we need scratch space only for the recursive multiplication.
+
+   In all, if S(an) is the scratch need, the needed space is bounded by
+
+     S(an) <= 4 (2*ceil(an/4) + 1) + 1 + S(ceil(an/4) + 1)
+
+   which should give S(n) = 8 n/3 + c log(n) for some constant c.
+*/
+
+void
+mpn_toom44_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  enum toom7_flags flags;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+#define b3  (bp + 3*n)
+
+  ASSERT (an >= bn);
+
+  n = (an + 3) >> 2;
+
+  s = an - 3 * n;
+  t = bn - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (s >= t);
+
+  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+   * following limb, so these must be computed in order, and we need a
+   * one limb gap to tp. */
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 6 * n)                     /* s+t */
+#define v2    scratch                          /* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)            /* 2n+1 */
+#define vh    (scratch + 4 * n + 2)            /* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)            /* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+  /* apx and bpx must not overlap with v1 */
+#define apx   pp                               /* n+1 */
+#define amx   (pp + n + 1)                     /* n+1 */
+#define bmx   (pp + 2*n + 2)                   /* n+1 */
+#define bpx   (pp + 4*n + 2)                   /* n+1 */
+
+  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+     gives roughly 32 n/3 + log term. */
+
+  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
+  flags = toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+
+  /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3.  */
+  flags ^= toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp);
+
+  TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp);  /* v2,  2n+1 limbs */
+  TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp); /* vm2,  2n+1 limbs */
+
+  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (apx, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+      MPN_INCR_U (apx + s, n+1-s, cy2);
+    }
+  else
+    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+  cy = mpn_lshift (apx, a0, n, 1);
+  cy += mpn_add_n (apx, apx, a1, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  cy += mpn_add_n (apx, apx, a2, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+  /* Compute bpx = 8 b0 + 4 b1 + 2 b2 + b3 = (((2*b0 + b1) * 2 + b2) * 2 + b3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bpx, b1, b0, n);
+  cy = 2*cy + mpn_addlsh1_n (bpx, b2, bpx, n);
+  if (t < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (bpx, b3, bpx, t);
+      bpx[n] = 2*cy + mpn_lshift (bpx + t, bpx + t, n - t, 1);
+      MPN_INCR_U (bpx + t, n+1-t, cy2);
+    }
+  else
+    bpx[n] = 2*cy + mpn_addlsh1_n (bpx, b3, bpx, n);
+#else
+  cy = mpn_lshift (bpx, b0, n, 1);
+  cy += mpn_add_n (bpx, bpx, b1, n);
+  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+  cy += mpn_add_n (bpx, bpx, b2, n);
+  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
+  bpx[n] = cy + mpn_add (bpx, bpx, n, b3, t);
+#endif
+
+  ASSERT (apx[n] < 15);
+  ASSERT (bpx[n] < 15);
+
+  TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp);  /* vh,  2n+1 limbs */
+
+  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
+  flags |= toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+
+  /* Compute bpx = b0 + b1 + b2 + b3 bnd bmx = b0 - b1 + b2 - b3.  */
+  flags ^= toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp);
+
+  TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp); /* vm1,  2n+1 limbs */
+  /* Clobbers amx, bmx. */
+  TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp);  /* v1,  2n+1 limbs */
+
+  TOOM44_MUL_N_REC (v0, a0, b0, n, tp);
+  if (s > t)
+    mpn_mul (vinf, a3, s, b3, t);
+  else
+    TOOM44_MUL_N_REC (vinf, a3, b3, s, tp);    /* vinf, s+t limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, tp);
+}
diff --git a/mpn/generic/toom4_sqr.c b/mpn/generic/toom4_sqr.c

new file mode 100644 (file)

index 0000000..4050c45
--- /dev/null
+++ b/mpn/generic/toom4_sqr.c
@@ -0,0 +1,153 @@
+/* mpn_toom4_sqr -- Square {ap,an}.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
+
+  <-s--><--n--><--n--><--n-->
+   ____ ______ ______ ______
+  |_a3_|___a2_|___a1_|___a0_|
+
+  v0  =   a0             ^2 #    A(0)^2
+  v1  = ( a0+ a1+ a2+ a3)^2 #    A(1)^2   ah  <= 3
+  vm1 = ( a0- a1+ a2- a3)^2 #   A(-1)^2  |ah| <= 1
+  v2  = ( a0+2a1+4a2+8a3)^2 #    A(2)^2   ah  <= 14
+  vh  = (8a0+4a1+2a2+ a3)^2 #  A(1/2)^2   ah  <= 14
+  vmh = (8a0-4a1+2a2- a3)^2 # A(-1/2)^2  -4<=ah<=9
+  vinf=               a3 ^2 #  A(inf)^2
+*/
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_toom4   1
+#else
+#define MAYBE_sqr_basecase                                             \
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2                                                        \
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom4                                                        \
+  (SQR_FFT_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+#endif
+
+#define TOOM4_SQR_REC(p, a, n, ws)                                     \
+  do {                                                                 \
+    if (MAYBE_sqr_basecase                                             \
+       && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))                    \
+      mpn_sqr_basecase (p, a, n);                                      \
+    else if (MAYBE_sqr_toom2                                           \
+            && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))               \
+      mpn_toom2_sqr (p, a, n, ws);                                     \
+    else if (! MAYBE_sqr_toom4                                         \
+            || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))               \
+      mpn_toom3_sqr (p, a, n, ws);                                     \
+    else                                                               \
+      mpn_toom4_sqr (p, a, n, ws);                                     \
+  } while (0)
+
+void
+mpn_toom4_sqr (mp_ptr pp,
+              mp_srcptr ap, mp_size_t an,
+              mp_ptr scratch)
+{
+  mp_size_t n, s;
+  mp_limb_t cy;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+
+  n = (an + 3) >> 2;
+
+  s = an - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+
+  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
+   * following limb, so these must be computed in order, and we need a
+   * one limb gap to tp. */
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 6 * n)                     /* s+t */
+#define v2    scratch                          /* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)            /* 2n+1 */
+#define vh    (scratch + 4 * n + 2)            /* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)            /* 2n+1 */
+#define tp (scratch + 8*n + 5)
+
+  /* No overlap with v1 */
+#define apx   pp                               /* n+1 */
+#define amx   (pp + 4*n + 2)                   /* n+1 */
+
+  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
+     gives roughly 32 n/3 + log term. */
+
+  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
+  mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+
+  TOOM4_SQR_REC (v2, apx, n + 1, tp);  /* v2,  2n+1 limbs */
+  TOOM4_SQR_REC (vm2, amx, n + 1, tp); /* vm2,  2n+1 limbs */
+
+  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (apx, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
+      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
+      MPN_INCR_U (apx + s, n+1-s, cy2);
+    }
+  else
+    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+#else
+  cy = mpn_lshift (apx, a0, n, 1);
+  cy += mpn_add_n (apx, apx, a1, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  cy += mpn_add_n (apx, apx, a2, n);
+  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
+  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+#endif
+
+  ASSERT (apx[n] < 15);
+
+  TOOM4_SQR_REC (vh, apx, n + 1, tp);  /* vh,  2n+1 limbs */
+
+  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
+  mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+
+  TOOM4_SQR_REC (v1, apx, n + 1, tp);  /* v1,  2n+1 limbs */
+  TOOM4_SQR_REC (vm1, amx, n + 1, tp); /* vm1,  2n+1 limbs */
+
+  TOOM4_SQR_REC (v0, a0, n, tp);
+  TOOM4_SQR_REC (vinf, a3, s, tp);     /* vinf, 2s limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, 0, vm2, vm1, v2, vh, 2*s, tp);
+}
diff --git a/mpn/generic/toom52_mul.c b/mpn/generic/toom52_mul.c

new file mode 100644 (file)

index 0000000..21040fd
--- /dev/null
+++ b/mpn/generic/toom52_mul.c
@@ -0,0 +1,246 @@
+/* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
+   times as large as bn.  Or more accurately, bn < an < 2 bn.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: -2, -1, 0, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______
+  |a4_|___a3_|___a2_|___a1_|___a0_|
+                       |b1|___b0_|
+                       <t-><--n-->
+
+  v0  =  a0                  * b0      #   A(0)*B(0)
+  v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
+  vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
+  v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
+  vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
+  vinf=                   a4 *     b1  # A(inf)*B(inf)
+
+  Some slight optimization in evaluation are taken from the paper:
+  "Towards Optimal Toom-Cook Multiplication for Univariate and
+  Multivariate Polynomials in Characteristic 2 and 0."
+*/
+
+void
+mpn_toom52_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  enum toom6_flags flags;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2 * n)
+#define a3  (ap + 3 * n)
+#define a4  (ap + 4 * n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
+
+  s = an - 4 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  /* Ensures that 5 values of n+1 limbs each fits in the product area.
+     Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
+     n = 8. */
+  ASSERT (s+t >= 5);
+
+#define v0    pp                               /* 2n */
+#define vm1   (scratch)                                /* 2n+1 */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)            /* 2n+1 */
+#define v2    (scratch + 4 * n + 2)            /* 2n+1 */
+#define vinf  (pp + 5 * n)                     /* s+t */
+#define bs1    pp                              /* n+1 */
+#define bsm1  (scratch + 2 * n + 2)            /* n   */
+#define asm1  (scratch + 3 * n + 3)            /* n+1 */
+#define asm2  (scratch + 4 * n + 4)            /* n+1 */
+#define bsm2  (pp + n + 1)                     /* n+1 */
+#define bs2   (pp + 2 * n + 2)                 /* n+1 */
+#define as2   (pp + 3 * n + 3)                 /* n+1 */
+#define as1   (pp + 4 * n + 4)                 /* n+1 */
+
+  /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
+     products will overwrite 2n+2 limbs. */
+
+#define a0a2  scratch
+#define a1a3  asm1
+
+  /* Compute as2 and asm2.  */
+  flags = toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3);
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      mp_limb_t cy;
+
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+         flags ^= toom6_vm1_neg;
+       }
+      else
+       {
+         cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+       }
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, n);
+         flags ^= toom6_vm1_neg;
+       }
+      else
+       {
+         mpn_sub_n (bsm1, b0, b1, n);
+       }
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, t);
+         MPN_ZERO (bsm1 + t, n - t);
+         flags ^= toom6_vm1_neg;
+       }
+      else
+       {
+         mpn_sub (bsm1, b0, n, b1, t);
+       }
+    }
+
+  /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
+  mpn_add (bs2, bs1, n+1, b1, t);
+  if (flags & toom6_vm1_neg )
+    {
+      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+      flags ^= toom6_vm2_neg;
+    }
+  else
+    {
+      bsm2[n] = 0;
+      if (t == n)
+       {
+         if (mpn_cmp (bsm1, b1, n) < 0)
+           {
+             mpn_sub_n (bsm2, b1, bsm1, n);
+             flags ^= toom6_vm2_neg;
+           }
+         else
+           {
+             mpn_sub_n (bsm2, bsm1, b1, n);
+           }
+       }
+      else
+       {
+         if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+           {
+             mpn_sub_n (bsm2, b1, bsm1, t);
+             MPN_ZERO (bsm2 + t, n - t);
+             flags ^= toom6_vm2_neg;
+           }
+         else
+           {
+             mpn_sub (bsm2, bsm1, n, b1, t);
+           }
+       }
+    }
+
+  /* Compute as1 and asm1.  */
+  flags ^= toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2);
+
+  ASSERT (as1[n] <= 4);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 2);
+/*   ASSERT (bsm1[n] <= 1); */
+  ASSERT (as2[n] <=30);
+  ASSERT (bs2[n] <= 2);
+  ASSERT (asm2[n] <= 20);
+  ASSERT (bsm2[n] <= 1);
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
+
+  /* vm2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
+
+  /* v2, 2n+1 limbs */
+  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
+
+  /* vinf, s+t limbs */   /* W0 */
+  if (s > t)  mpn_mul (vinf, a4, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a4, s);
+
+  /* v0, 2n limbs */
+  mpn_mul_n (v0, ap, bp, n);  /* W5 */
+
+  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
+
+#undef v0
+#undef vm1
+#undef v1
+#undef vm2
+#undef v2
+#undef vinf
+#undef bs1
+#undef bs2
+#undef bsm1
+#undef bsm2
+#undef asm1
+#undef asm2
+#undef as1
+#undef as2
+#undef a0a2
+#undef b0b2
+#undef a1a3
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef b0
+#undef b1
+#undef b2
+
+}
diff --git a/mpn/generic/toom53_mul.c b/mpn/generic/toom53_mul.c

new file mode 100644 (file)

index 0000000..8a0807a
--- /dev/null
+++ b/mpn/generic/toom53_mul.c
@@ -0,0 +1,320 @@
+/* mpn_toom53_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 5/3
+   times as large as bn.  Or more accurately, (4/3)bn < an < (5/2)bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s-><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______
+  |a4_|___a3_|___a2_|___a1_|___a0_|
+              |__b2|___b1_|___b0_|
+              <-t--><--n--><--n-->
+
+  v0  =    a0                  *  b0          #    A(0)*B(0)
+  v1  = (  a0+ a1+ a2+ a3+  a4)*( b0+ b1+ b2) #    A(1)*B(1)      ah  <= 4   bh <= 2
+  vm1 = (  a0- a1+ a2- a3+  a4)*( b0- b1+ b2) #   A(-1)*B(-1)    |ah| <= 2   bh <= 1
+  v2  = (  a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) #    A(2)*B(2)      ah  <= 30  bh <= 6
+  vm2 = (  a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) #    A(2)*B(2)     -9<=ah<=20 -1<=bh<=4
+  vh  = (16a0+8a1+4a2+2a3+  a4)*(4b0+2b1+ b2) #  A(1/2)*B(1/2)    ah  <= 30  bh <= 6
+  vinf=                     a4 *          b2  #  A(inf)*B(inf)
+*/
+
+void
+mpn_toom53_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  mp_ptr gp;
+  mp_ptr as1, asm1, as2, asm2, ash;
+  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+  enum toom7_flags flags;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define a4  (ap + 4*n)
+#define b0  bp
+#define b1  (bp + n)
+#define b2  (bp + 2*n)
+
+  n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
+
+  s = an - 4 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  as1  = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2  = TMP_SALLOC_LIMBS (n + 1);
+  asm2 = TMP_SALLOC_LIMBS (n + 1);
+  ash  = TMP_SALLOC_LIMBS (n + 1);
+
+  bs1  = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n + 1);
+  bs2  = TMP_SALLOC_LIMBS (n + 1);
+  bsm2 = TMP_SALLOC_LIMBS (n + 1);
+  bsh  = TMP_SALLOC_LIMBS (n + 1);
+
+  gp = pp;
+
+  /* Compute as1 and asm1.  */
+  flags = toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp);
+
+  /* Compute as2 and asm2. */
+  flags |= toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp);
+
+  /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
+     = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (ash, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (ash, a4, ash, s);
+      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+      MPN_INCR_U (ash + s, n+1-s, cy2);
+    }
+  else
+    ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+#else
+  cy = mpn_lshift (ash, a0, n, 1);
+  cy += mpn_add_n (ash, ash, a1, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a2, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a3, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  ash[n] = cy + mpn_add (ash, ash, n, a4, s);
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  bs1[n] = mpn_add (bs1, b0, n, b2, t);                /* b0 + b2 */
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+    {
+      bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
+      bsm1[n] = 0;
+      flags ^= toom7_w3_neg;
+    }
+  else
+    {
+      cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n);
+      bsm1[n] = bs1[n] - (cy & 1);
+      bs1[n] += (cy >> 1);
+    }
+#else
+  if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
+    {
+      mpn_sub_n (bsm1, b1, bs1, n);
+      bsm1[n] = 0;
+      flags ^= toom7_w3_neg;
+    }
+  else
+    {
+      bsm1[n] = bs1[n] - mpn_sub_n (bsm1, bs1, b1, n);
+    }
+  bs1[n] += mpn_add_n (bs1, bs1, b1, n);  /* b0+b1+b2 */
+#endif
+
+  /* Compute bs2 and bsm2. */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+  cy = mpn_addlsh2_n (bs2, b0, b2, t);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+  cy = mpn_addlsh_n (bs2, b0, b2, t, 2);
+#endif
+  if (t < n)
+    cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy);
+  bs2[n] = cy;
+#else
+  cy = mpn_lshift (gp, b2, t, 2);
+  bs2[n] = mpn_add (bs2, b0, n, gp, t);
+  MPN_INCR_U (bs2 + t, n+1-t, cy);
+#endif
+
+  gp[n] = mpn_lshift (gp, b1, n, 1);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (mpn_cmp (bs2, gp, n+1) < 0)
+    {
+      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
+      flags ^= toom7_w1_neg;
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1));
+    }
+#else
+  if (mpn_cmp (bs2, gp, n+1) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
+      flags ^= toom7_w1_neg;
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1));
+    }
+  mpn_add_n (bs2, bs2, gp, n+1);
+#endif
+
+  /* Compute bsh = 4 b0 + 2 b1 + b0 = 2*(2*b0 + b1)+b0.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bsh, b1, b0, n);
+  if (t < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (bsh, b2, bsh, t);
+      bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1);
+      MPN_INCR_U (bsh + t, n+1-t, cy2);
+    }
+  else
+    bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n);
+#else
+  cy = mpn_lshift (bsh, b0, n, 1);
+  cy += mpn_add_n (bsh, bsh, b1, n);
+  cy = 2*cy + mpn_lshift (bsh, bsh, n, 1);
+  bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t);
+#endif
+
+  ASSERT (as1[n] <= 4);
+  ASSERT (bs1[n] <= 2);
+  ASSERT (asm1[n] <= 2);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 30);
+  ASSERT (bs2[n] <= 6);
+  ASSERT (asm2[n] <= 20);
+  ASSERT (bsm2[n] <= 4);
+  ASSERT (ash[n] <= 30);
+  ASSERT (bsh[n] <= 6);
+
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 6 * n)                     /* s+t */
+#define v2    scratch                          /* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)            /* 2n+1 */
+#define vh    (scratch + 4 * n + 2)            /* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)            /* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4)              /* 2n+1 */
+  /* Total scratch need: 10*n+5 */
+
+  /* Must be in allocation order, as they overwrite one limb beyond
+   * 2n+1. */
+  mpn_mul_n (v2, as2, bs2, n + 1);             /* v2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n + 1);          /* vm2, 2n+1 limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);             /* vh, 2n+1 limbs */
+
+  /* vm1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  mpn_mul_n (vm1, asm1, bsm1, n);
+  if (asm1[n] == 1)
+    {
+      cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+    }
+  else if (asm1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bsm1[n] + mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n);
+#else
+      cy = 2 * bsm1[n] + mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+    }
+  else
+    cy = 0;
+  if (bsm1[n] != 0)
+    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+  vm1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+  vm1[2 * n] = 0;
+  mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+  /* v1, 2n+1 limbs */
+#ifdef SMALLER_RECURSION
+  mpn_mul_n (v1, as1, bs1, n);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] != 0)
+    {
+      cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+    }
+  else
+    cy = 0;
+  if (bs1[n] == 1)
+    {
+      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+    }
+  else if (bs1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
+#else
+      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+#endif
+    }
+  v1[2 * n] = cy;
+#else /* SMALLER_RECURSION */
+  v1[2 * n] = 0;
+  mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0));
+#endif /* SMALLER_RECURSION */
+
+  mpn_mul_n (v0, a0, b0, n);                   /* v0, 2n limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a4, s);
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t,
+                            scratch_out);
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/toom62_mul.c b/mpn/generic/toom62_mul.c

new file mode 100644 (file)

index 0000000..c01cfba
--- /dev/null
+++ b/mpn/generic/toom62_mul.c
@@ -0,0 +1,300 @@
+/* mpn_toom62_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 3 times
+   as large as bn.  Or more accurately, (5/2)bn < an < 6bn.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   The idea of applying toom to unbalanced multiplication is due to Marco
+   Bodrato and Alberto Zanoni.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluate in:
+   0, +1, -1, +2, -2, 1/2, +inf
+
+  <-s-><--n--><--n--><--n--><--n--><--n-->
+   ___ ______ ______ ______ ______ ______
+  |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
+                            |_b1_|___b0_|
+                            <-t--><--n-->
+
+  v0  =    a0                       *   b0      #    A(0)*B(0)
+  v1  = (  a0+  a1+ a2+ a3+  a4+  a5)*( b0+ b1) #    A(1)*B(1)      ah  <= 5   bh <= 1
+  vm1 = (  a0-  a1+ a2- a3+  a4-  a5)*( b0- b1) #   A(-1)*B(-1)    |ah| <= 2   bh  = 0
+  v2  = (  a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) #    A(2)*B(2)      ah  <= 62  bh <= 2
+  vm2 = (  a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) #   A(-2)*B(-2)    -41<=ah<=20 -1<=bh<=0
+  vh  = (32a0+16a1+8a2+4a3+ 2a4+  a5)*(2b0+ b1) #  A(1/2)*B(1/2)    ah  <= 62  bh <= 2
+  vinf=                           a5 *      b1  #  A(inf)*B(inf)
+*/
+
+void
+mpn_toom62_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn,
+               mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  mp_ptr as1, asm1, as2, asm2, ash;
+  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
+  mp_ptr gp;
+  enum toom7_flags aflags, bflags;
+  TMP_DECL;
+
+#define a0  ap
+#define a1  (ap + n)
+#define a2  (ap + 2*n)
+#define a3  (ap + 3*n)
+#define a4  (ap + 4*n)
+#define a5  (ap + 5*n)
+#define b0  bp
+#define b1  (bp + n)
+
+  n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+
+  s = an - 5 * n;
+  t = bn - n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
+  asm2 = TMP_SALLOC_LIMBS (n + 1);
+  ash = TMP_SALLOC_LIMBS (n + 1);
+
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n);
+  bs2 = TMP_SALLOC_LIMBS (n + 1);
+  bsm2 = TMP_SALLOC_LIMBS (n + 1);
+  bsh = TMP_SALLOC_LIMBS (n + 1);
+
+  gp = pp;
+
+  /* Compute as1 and asm1.  */
+  aflags = toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp);
+
+  /* Compute as2 and asm2. */
+  aflags |= toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp);
+
+  /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
+     = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (ash, a1, a0, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
+  cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+  if (s < n)
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_addlsh1_n (ash, a5, ash, s);
+      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
+      MPN_INCR_U (ash + s, n+1-s, cy2);
+    }
+  else
+    ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
+#else
+  cy = mpn_lshift (ash, a0, n, 1);
+  cy += mpn_add_n (ash, ash, a1, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a2, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a3, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  cy += mpn_add_n (ash, ash, a4, n);
+  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
+  ash[n] = cy + mpn_add (ash, ash, n, a5, s);
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  if (t == n)
+    {
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+         bflags = toom7_w3_neg;
+       }
+      else
+       {
+         cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+         bflags = 0;
+       }
+      bs1[n] = cy >> 1;
+#else
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
+      if (mpn_cmp (b0, b1, n) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, n);
+         bflags = toom7_w3_neg;
+       }
+      else
+       {
+         mpn_sub_n (bsm1, b0, b1, n);
+         bflags = 0;
+       }
+#endif
+    }
+  else
+    {
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
+      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
+       {
+         mpn_sub_n (bsm1, b1, b0, t);
+         MPN_ZERO (bsm1 + t, n - t);
+         bflags = toom7_w3_neg;
+       }
+      else
+       {
+         mpn_sub (bsm1, b0, n, b1, t);
+         bflags = 0;
+       }
+    }
+
+  /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
+     bsm1 - b1 */
+  mpn_add (bs2, bs1, n + 1, b1, t);
+  if (bflags & toom7_w3_neg)
+    {
+      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
+      bflags |= toom7_w1_neg;
+    }
+  else
+    {
+      /* FIXME: Simplify this logic? */
+      if (t < n)
+       {
+         if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
+           {
+             ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
+             MPN_ZERO (bsm2 + t, n + 1 - t);
+             bflags |= toom7_w1_neg;
+           }
+         else
+           {
+             ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
+             bsm2[n] = 0;
+           }
+       }
+      else
+       {
+         if (mpn_cmp (bsm1, b1, n) < 0)
+           {
+             ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
+             bflags |= toom7_w1_neg;
+           }
+         else
+           {
+             ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, n));
+           }
+         bsm2[n] = 0;
+       }
+    }
+
+  /* Compute bsh, recycling bs1 and bsm1. bsh=bs1+b0;  */
+  mpn_add (bsh, bs1, n + 1, b0, n);
+
+  ASSERT (as1[n] <= 5);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 2);
+  ASSERT (as2[n] <= 62);
+  ASSERT (bs2[n] <= 2);
+  ASSERT (asm2[n] <= 41);
+  ASSERT (bsm2[n] <= 1);
+  ASSERT (ash[n] <= 62);
+  ASSERT (bsh[n] <= 2);
+
+#define v0    pp                               /* 2n */
+#define v1    (pp + 2 * n)                     /* 2n+1 */
+#define vinf  (pp + 6 * n)                     /* s+t */
+#define v2    scratch                          /* 2n+1 */
+#define vm2   (scratch + 2 * n + 1)            /* 2n+1 */
+#define vh    (scratch + 4 * n + 2)            /* 2n+1 */
+#define vm1   (scratch + 6 * n + 3)            /* 2n+1 */
+#define scratch_out (scratch + 8 * n + 4)              /* 2n+1 */
+  /* Total scratch need: 10*n+5 */
+
+  /* Must be in allocation order, as they overwrite one limb beyond
+   * 2n+1. */
+  mpn_mul_n (v2, as2, bs2, n + 1);             /* v2, 2n+1 limbs */
+  mpn_mul_n (vm2, asm2, bsm2, n + 1);          /* vm2, 2n+1 limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);             /* vh, 2n+1 limbs */
+
+  /* vm1, 2n+1 limbs */
+  mpn_mul_n (vm1, asm1, bsm1, n);
+  cy = 0;
+  if (asm1[n] == 1)
+    {
+      cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+    }
+  else if (asm1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n);
+#else
+      cy = mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
+#endif
+    }
+  vm1[2 * n] = cy;
+
+  /* v1, 2n+1 limbs */
+  mpn_mul_n (v1, as1, bs1, n);
+  if (as1[n] == 1)
+    {
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
+    }
+  else if (as1[n] == 2)
+    {
+#if HAVE_NATIVE_mpn_addlsh1_n
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
+#else
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
+#endif
+    }
+  else if (as1[n] != 0)
+    {
+      cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
+    }
+  else
+    cy = 0;
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
+  v1[2 * n] = cy;
+
+  mpn_mul_n (v0, a0, b0, n);                   /* v0, 2n limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a5, s);
+
+  mpn_toom_interpolate_7pts (pp, n, aflags ^ bflags,
+                            vm2, vm1, v2, vh, s + t, scratch_out);
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/toom63_mul.c b/mpn/generic/toom63_mul.c

new file mode 100644 (file)

index 0000000..9c21457
--- /dev/null
+++ b/mpn/generic/toom63_mul.c
@@ -0,0 +1,221 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_limb_t  x, y;
+  while (--n >= 0)
+    {
+      x = ap[n];
+      y = bp[n];
+      if (x != y)
+       {
+         n++;
+         if (x > y)
+           {
+             mpn_sub_n (rp, ap, bp, n);
+             return 0;
+           }
+         else
+           {
+             mpn_sub_n (rp, bp, ap, n);
+             return ~0;
+           }
+       }
+      rp[n] = 0;
+    }
+  return 0;
+}
+
+static int
+abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
+  int result;
+  result = abs_sub_n (rm, rp, rs, n);
+  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
+  return result;
+}
+
+
+/* Toom-4.5, the splitting 6x3 unbalanced version.
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+  <--s-><--n--><--n--><--n--><--n--><--n-->
+   ____ ______ ______ ______ ______ ______
+  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
+                       |b2_|__b1__|__b0__|
+                       <-t-><--n--><--n-->
+
+*/
+#define TOOM_63_MUL_N_REC(p, a, b, n, ws)              \
+  do { mpn_mul_n (p, a, b, n);                         \
+  } while (0)
+
+#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)           \
+  do { mpn_mul (p, a, na, b, nb);                      \
+  } while (0)
+
+void
+mpn_toom63_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  mp_limb_t cy;
+  int sign;
+
+  /***************************** decomposition *******************************/
+#define a5  (ap + 5 * n)
+#define b0  (bp + 0 * n)
+#define b1  (bp + 1 * n)
+#define b2  (bp + 2 * n)
+
+  ASSERT (an >= bn);
+  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
+
+  s = an - 5 * n;
+  t = bn - 2 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  /* WARNING! it assumes s+t>=n */
+  ASSERT ( s + t >= n );
+  ASSERT ( s + t > 4);
+  /* WARNING! it assumes n>1 */
+  ASSERT ( n > 2);
+
+#define   r8    pp                             /* 2n   */
+#define   r7    scratch                                /* 3n+1 */
+#define   r5    (pp + 3*n)                     /* 3n+1 */
+#define   v0    (pp + 3*n)                     /* n+1 */
+#define   v1    (pp + 4*n+1)                   /* n+1 */
+#define   v2    (pp + 5*n+2)                   /* n+1 */
+#define   v3    (pp + 6*n+3)                   /* n+1 */
+#define   r3    (scratch + 3 * n + 1)          /* 3n+1 */
+#define   r1    (pp + 7*n)                     /* s+t <= 2*n */
+#define   ws    (scratch + 6 * n + 2)          /* ??? */
+
+  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
+  /* FIXME: use addlsh */
+  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
+  if ( n == t )
+    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
+  else
+    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
+  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
+  /* Compute bs1 and bsm1. Code taken from toom33 */
+  cy = mpn_add (ws, b0, n, b2, t);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+    {
+      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
+      v3[n] = cy >> 1;
+      v1[n] = 0;
+      sign = ~sign;
+    }
+  else
+    {
+      mp_limb_t cy2;
+      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
+      v3[n] = cy + (cy2 >> 1);
+      v1[n] = cy - (cy2 & 1);
+    }
+#else
+  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
+  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
+    {
+      mpn_sub_n (v1, b1, ws, n);
+      v1[n] = 0;
+      sign = ~sign;
+    }
+  else
+    {
+      cy -= mpn_sub_n (v1, ws, b1, n);
+      v1[n] = cy;
+    }
+#endif
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
+  /* FIXME: use addlsh or addlsh2 */
+  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
+  if ( n == t )
+    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
+  else
+    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
+  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
+  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+  /* A(0)*B(0) */
+  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
+
+  /* Infinity */
+  if (s > t) {
+    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
+  } else {
+    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
+  };
+
+  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a5
+#undef b0
+#undef b1
+#undef b2
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}
diff --git a/mpn/generic/toom6_sqr.c b/mpn/generic/toom6_sqr.c

new file mode 100644 (file)

index 0000000..9fd3a56
--- /dev/null
+++ b/mpn/generic/toom6_sqr.c
@@ -0,0 +1,171 @@
+/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_above_toom4   1
+#else
+#ifdef  SQR_TOOM8_THRESHOLD
+#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
+#else
+#define SQR_TOOM6_MAX                                  \
+  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?  \
+   ((SQR_FFT_THRESHOLD+6*2-1+5)/6)                     \
+   : MP_SIZE_T_MAX )
+#endif
+#define MAYBE_sqr_basecase                                     \
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase                               \
+  (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2                                                \
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2                                  \
+  (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3                                                \
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3                                  \
+  (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom4                                  \
+  (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
+#endif
+
+#define TOOM6_SQR_REC(p, a, n, ws)                                     \
+  do {                                                                 \
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase              \
+       || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))                   \
+      mpn_sqr_basecase (p, a, n);                                      \
+    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2               \
+            || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))              \
+      mpn_toom2_sqr (p, a, n, ws);                                     \
+    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3               \
+            || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))              \
+      mpn_toom3_sqr (p, a, n, ws);                                     \
+    else if (! MAYBE_sqr_above_toom4                                   \
+            || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))               \
+      mpn_toom4_sqr (p, a, n, ws);                                     \
+    else                                                               \
+      mpn_toom6_sqr (p, a, n, ws);                                     \
+  } while (0)
+
+void
+mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT( an >= 18 );
+
+  n = 1 + (an - 1) / (size_t) 6;
+
+  s = an - 5 * n;
+
+  ASSERT (0 < s && s <= n);
+
+#define   r4    (pp + 3 * n)                   /* 3n+1 */
+#define   r2    (pp + 7 * n)                   /* 3n+1 */
+#define   r0    (pp +11 * n)                   /* s+t <= 2*n */
+#define   r5    (scratch)                      /* 3n+1 */
+#define   r3    (scratch + 3 * n + 1)          /* 3n+1 */
+#define   r1    (scratch + 6 * n + 2)          /* 3n+1 */
+#define   v0    (pp + 7 * n)                   /* n+1 */
+#define   v2    (pp + 9 * n+2)                 /* n+1 */
+#define   wse   (scratch + 9 * n + 3)          /* 3n+1 */
+
+  /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch== NULL) */
+/*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/2$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+  TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
+
+  /* $\pm1$ */
+  mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
+  TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
+
+  /* $\pm4$ */
+  mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
+  TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
+
+  /* $\pm1/4$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+  TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
+
+  /* $\pm2$ */
+  mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
+  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
+  TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
+
+#undef v0
+#undef v2
+
+  /* A(0)*B(0) */
+  TOOM6_SQR_REC(pp, ap, n, wse);
+
+  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+
+}
+#undef TOOM6_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4
diff --git a/mpn/generic/toom6h_mul.c b/mpn/generic/toom6h_mul.c

new file mode 100644 (file)

index 0000000..91ff833
--- /dev/null
+++ b/mpn/generic/toom6h_mul.c
@@ -0,0 +1,232 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom33   1
+#define MAYBE_mul_toom6h   1
+#else
+#define MAYBE_mul_basecase                                             \
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22                                               \
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33                                               \
+  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom6h                                               \
+  (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
+#endif
+
+#define TOOM6H_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    if (MAYBE_mul_basecase                                             \
+       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+      mpn_mul_basecase (p, a, n, b, n);                                        \
+    else if (MAYBE_mul_toom22                                          \
+            && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))              \
+      mpn_toom22_mul (p, a, n, b, n, ws);                              \
+    else if (MAYBE_mul_toom33                                          \
+            && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))              \
+      mpn_toom33_mul (p, a, n, b, n, ws);                              \
+    else if (! MAYBE_mul_toom6h                                                \
+            || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))              \
+      mpn_toom44_mul (p, a, n, b, n, ws);                              \
+    else                                                               \
+      mpn_toom6h_mul (p, a, n, b, n, ws);                              \
+  } while (0)
+
+#define TOOM6H_MUL_REC(p, a, na, b, nb, ws)            \
+  do { mpn_mul (p, a, na, b, nb);                      \
+  } while (0)
+
+/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+   With: an >= bn >= 46, an*6 <  bn * 17.
+   It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
+
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
+*/
+/* Estimate on needed scratch:
+   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
+
+void
+mpn_toom6h_mul   (mp_ptr pp,
+                 mp_srcptr ap, mp_size_t an,
+                 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int p, q, half;
+  int sign;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT( an >= bn);
+  /* Can not handle too much unbalancement */
+  ASSERT( bn >= 42 );
+  /* Can not handle too much unbalancement */
+  ASSERT((an*3 <  bn * 8) || ( bn >= 46 && an*6 <  bn * 17 ));
+
+  /* Limit num/den is a rational number between
+     (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
+#define LIMIT_numerator (18)
+#define LIMIT_denominat (17)
+
+  if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */
+    { p = q = 6; }
+  else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn )
+    { p = 7; q = 6; }
+  else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn )
+    { p = 7; q = 5; }
+  else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn )  /* is 4*... < 8*... */
+    { p = 8; q = 5; }
+  else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn )  /* is 4*... < 8*... */
+    { p = 8; q = 4; }
+  else
+    { p = 9; q = 4; }
+
+  half = (p ^ q) & 1;
+  n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+  p--; q--;
+
+  s = an - p * n;
+  t = bn - q * n;
+
+  /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
+  if (half) { /* Recover from badly chosen splitting */
+    if (s<1) {p--; s+=n; half=0;}
+    else if (t<1) {q--; t+=n; half=0;}
+  }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (half || s + t > 3);
+  ASSERT (n > 2);
+
+#define   r4    (pp + 3 * n)                   /* 3n+1 */
+#define   r2    (pp + 7 * n)                   /* 3n+1 */
+#define   r0    (pp +11 * n)                   /* s+t <= 2*n */
+#define   r5    (scratch)                      /* 3n+1 */
+#define   r3    (scratch + 3 * n + 1)          /* 3n+1 */
+#define   r1    (scratch + 6 * n + 2)          /* 3n+1 */
+#define   v0    (pp + 7 * n)                   /* n+1 */
+#define   v1    (pp + 8 * n+1)                 /* n+1 */
+#define   v2    (pp + 9 * n+2)                 /* n+1 */
+#define   v3    (scratch + 9 * n + 3)          /* n+1 */
+#define   wsi   (scratch + 9 * n + 3)          /* 3n+1 */
+#define   wse   (scratch +10 * n + 4)          /* 2n+1 */
+
+  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
+     need all of them  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
+  ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
+  ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/2$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+        mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+  TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
+  if (q == 3)
+    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  else
+    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
+  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
+  TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
+
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+        mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
+  TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
+
+  /* $\pm1/4$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+        mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+  TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+        mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
+  TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+  /* A(0)*B(0) */
+  TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi);
+
+  /* Infinity */
+  if( half != 0) {
+    if(s>t) {
+      TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+    } else {
+      TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+    };
+  };
+
+  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef wsi
+}
+
+#undef TOOM6H_MUL_N_REC
+#undef TOOM6H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom6h
diff --git a/mpn/generic/toom8_sqr.c b/mpn/generic/toom8_sqr.c

new file mode 100644 (file)

index 0000000..e098d2e
--- /dev/null
+++ b/mpn/generic/toom8_sqr.c
@@ -0,0 +1,208 @@
+/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_toom4   1
+#define MAYBE_sqr_above_toom4   1
+#define MAYBE_sqr_above_toom6   1
+#else
+#define SQR_TOOM8_MAX                                  \
+  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?  \
+   ((SQR_FFT_THRESHOLD+8*2-1+7)/8)                     \
+   : MP_SIZE_T_MAX )
+#define MAYBE_sqr_basecase                                     \
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase                               \
+  (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2                                                \
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2                                  \
+  (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3                                                \
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3                                  \
+  (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_toom4                                                \
+  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom4                                  \
+  (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
+#define MAYBE_sqr_above_toom6                                  \
+  (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
+#endif
+
+#define TOOM8_SQR_REC(p, a, n, ws)                                     \
+  do {                                                                 \
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase              \
+       || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))                   \
+      mpn_sqr_basecase (p, a, n);                                      \
+    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2               \
+            || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))              \
+      mpn_toom2_sqr (p, a, n, ws);                                     \
+    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3               \
+            || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))              \
+      mpn_toom3_sqr (p, a, n, ws);                                     \
+    else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4               \
+            || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)))              \
+      mpn_toom4_sqr (p, a, n, ws);                                     \
+    else if (! MAYBE_sqr_above_toom6                                   \
+            || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))               \
+      mpn_toom6_sqr (p, a, n, ws);                                     \
+    else                                                               \
+      mpn_toom8_sqr (p, a, n, ws);                                     \
+  } while (0)
+
+void
+mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT ( an >= 40 );
+
+  n = 1 + ((an - 1)>>3);
+
+  s = an - 7 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT ( s + s > 3 );
+
+#define   r6    (pp + 3 * n)                   /* 3n+1 */
+#define   r4    (pp + 7 * n)                   /* 3n+1 */
+#define   r2    (pp +11 * n)                   /* 3n+1 */
+#define   r0    (pp +15 * n)                   /* s+t <= 2*n */
+#define   r7    (scratch)                      /* 3n+1 */
+#define   r5    (scratch + 3 * n + 1)          /* 3n+1 */
+#define   r3    (scratch + 6 * n + 2)          /* 3n+1 */
+#define   r1    (scratch + 9 * n + 3)          /* 3n+1 */
+#define   v0    (pp +11 * n)                   /* n+1 */
+#define   v2    (pp +13 * n+2)                 /* n+1 */
+#define   wse   (scratch +12 * n + 4)          /* 3n+1 */
+
+  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
+
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm1/8$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
+  TOOM8_SQR_REC(r7, v2, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
+  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
+
+  /* $\pm1/4$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+  TOOM8_SQR_REC(r5, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
+
+  /* $\pm2$ */
+  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
+  TOOM8_SQR_REC(r3, v2, n + 1, wse); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
+
+  /* $\pm8$ */
+  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-8)*B(-8) */
+  TOOM8_SQR_REC(r1, v2, n + 1, wse); /* A(+8)*B(+8) */
+  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
+
+  /* $\pm1/2$ */
+  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+  TOOM8_SQR_REC(r6, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
+
+  /* $\pm1$ */
+  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
+  TOOM8_SQR_REC(r4, v2, n + 1, wse); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
+
+  /* $\pm4$ */
+  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
+  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
+  TOOM8_SQR_REC(r2, v2, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
+
+#undef v0
+#undef v2
+
+  /* A(0)*B(0) */
+  TOOM8_SQR_REC(pp, ap, n, wse);
+
+  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wse
+
+}
+
+#undef TOOM8_SQR_REC
+#undef MAYBE_sqr_basecase
+#undef MAYBE_sqr_above_basecase
+#undef MAYBE_sqr_toom2
+#undef MAYBE_sqr_above_toom2
+#undef MAYBE_sqr_toom3
+#undef MAYBE_sqr_above_toom3
+#undef MAYBE_sqr_above_toom4
diff --git a/mpn/generic/toom8h_mul.c b/mpn/generic/toom8h_mul.c

new file mode 100644 (file)

index 0000000..c73cf6f
--- /dev/null
+++ b/mpn/generic/toom8h_mul.c
@@ -0,0 +1,290 @@
+/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented.
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_basecase 1
+#define MAYBE_mul_toom22   1
+#define MAYBE_mul_toom33   1
+#define MAYBE_mul_toom44   1
+#define MAYBE_mul_toom8h   1
+#else
+#define MAYBE_mul_basecase                                             \
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
+#define MAYBE_mul_toom22                                               \
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
+#define MAYBE_mul_toom33                                               \
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
+#define MAYBE_mul_toom44                                               \
+  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
+#define MAYBE_mul_toom8h                                               \
+  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
+#endif
+
+#define TOOM8H_MUL_N_REC(p, a, b, n, ws)                               \
+  do {                                                                 \
+    if (MAYBE_mul_basecase                                             \
+       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+      mpn_mul_basecase (p, a, n, b, n);                                        \
+    else if (MAYBE_mul_toom22                                          \
+            && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))              \
+      mpn_toom22_mul (p, a, n, b, n, ws);                              \
+    else if (MAYBE_mul_toom33                                          \
+            && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))              \
+      mpn_toom33_mul (p, a, n, b, n, ws);                              \
+    else if (MAYBE_mul_toom44                                          \
+            && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))              \
+      mpn_toom44_mul (p, a, n, b, n, ws);                              \
+    else if (! MAYBE_mul_toom8h                                                \
+            || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))              \
+      mpn_toom6h_mul (p, a, n, b, n, ws);                              \
+    else                                                               \
+      mpn_toom8h_mul (p, a, n, b, n, ws);                              \
+  } while (0)
+
+#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)            \
+  do { mpn_mul (p, a, na, b, nb);                      \
+  } while (0)
+
+/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
+   With: an >= bn >= 86, an*5 <  bn * 11.
+   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
+
+   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
+*/
+/* Estimate on needed scratch:
+   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
+   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
+ */
+
+void
+mpn_toom8h_mul   (mp_ptr pp,
+                 mp_srcptr ap, mp_size_t an,
+                 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int p, q, half;
+  int sign;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT (an >= bn);
+  /* Can not handle too small operands */
+  ASSERT (bn >= 86);
+  /* Can not handle too much unbalancement */
+  ASSERT (an*4 <= bn*13);
+  ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12);
+  ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11);
+  ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10);
+  ASSERT (GMP_NUMB_BITS >  9*3 || an*7 <= bn* 9);
+
+  /* Limit num/den is a rational number between
+     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
+#define LIMIT_numerator (21)
+#define LIMIT_denominat (20)
+
+  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
+    {
+      half = 0;
+      n = 1 + ((an - 1)>>3);
+      p = q = 7;
+      s = an - p * n;
+      t = bn - q * n;
+    }
+  else
+    {
+      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
+       { p = 9; q = 8; }
+      else if (GMP_NUMB_BITS <= 9*3 ||
+              an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
+       { p = 9; q = 7; }
+      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
+       { p =10; q = 7; }
+      else if (GMP_NUMB_BITS <= 10*3 ||
+              an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
+       { p =10; q = 6; }
+      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
+       { p =11; q = 6; }
+      else if (GMP_NUMB_BITS <= 11*3 ||
+              an * 4 < 9 * bn)
+       { p =11; q = 5; }
+      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn )  /* is 4*... <12*... */
+       { p =12; q = 5; }
+      else if (GMP_NUMB_BITS <= 12*3 ||
+              an * 9 < 28 * bn )  /* is 4*... <12*... */
+       { p =12; q = 4; }
+      else
+       { p =13; q = 4; }
+
+      half = (p+q)&1;
+      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+      p--; q--;
+
+      s = an - p * n;
+      t = bn - q * n;
+
+      if(half) { /* Recover from badly chosen splitting */
+       if (s<1) {p--; s+=n; half=0;}
+       else if (t<1) {q--; t+=n; half=0;}
+      }
+    }
+#undef LIMIT_numerator
+#undef LIMIT_denominat
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  ASSERT (half || s + t > 3);
+  ASSERT (n > 2);
+
+#define   r6    (pp + 3 * n)                   /* 3n+1 */
+#define   r4    (pp + 7 * n)                   /* 3n+1 */
+#define   r2    (pp +11 * n)                   /* 3n+1 */
+#define   r0    (pp +15 * n)                   /* s+t <= 2*n */
+#define   r7    (scratch)                      /* 3n+1 */
+#define   r5    (scratch + 3 * n + 1)          /* 3n+1 */
+#define   r3    (scratch + 6 * n + 2)          /* 3n+1 */
+#define   r1    (scratch + 9 * n + 3)          /* 3n+1 */
+#define   v0    (pp +11 * n)                   /* n+1 */
+#define   v1    (pp +12 * n+1)                 /* n+1 */
+#define   v2    (pp +13 * n+2)                 /* n+1 */
+#define   v3    (scratch +12 * n + 4)          /* n+1 */
+#define   wsi   (scratch +12 * n + 4)          /* 3n+1 */
+#define   wse   (scratch +13 * n + 5)          /* 2n+1 */
+
+  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
+     need all of them  */
+/*   if (scratch == NULL) */
+/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
+  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
+  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
+
+  /********************** evaluation and recursive calls *********************/
+
+  /* $\pm1/8$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
+        mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
+  TOOM8H_MUL_N_REC(r7, v2, v3, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
+  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
+
+  /* $\pm1/4$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
+        mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
+  TOOM8H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
+        mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
+  TOOM8H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
+
+  /* $\pm8$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
+        mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-8)*B(-8) */
+  TOOM8H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+8)*B(+8) */
+  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
+
+  /* $\pm1/2$ */
+  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
+        mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
+  TOOM8H_MUL_N_REC(r6, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
+  if (q == 3)
+    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  else
+    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
+  TOOM8H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
+
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
+        mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
+  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
+  TOOM8H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
+
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef wse
+
+  /* A(0)*B(0) */
+  TOOM8H_MUL_N_REC(pp, ap, bp, n, wsi);
+
+  /* Infinity */
+  if( half != 0) {
+    if(s>t) {
+      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
+    } else {
+      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
+    };
+  };
+
+  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
+
+#undef r0
+#undef r1
+#undef r2
+#undef r3
+#undef r4
+#undef r5
+#undef r6
+#undef wsi
+}
+
+#undef TOOM8H_MUL_N_REC
+#undef TOOM8H_MUL_REC
+#undef MAYBE_mul_basecase
+#undef MAYBE_mul_toom22
+#undef MAYBE_mul_toom33
+#undef MAYBE_mul_toom44
+#undef MAYBE_mul_toom8h
diff --git a/mpn/generic/toom_couple_handling.c b/mpn/generic/toom_couple_handling.c

new file mode 100644 (file)

index 0000000..c347297
--- /dev/null
+++ b/mpn/generic/toom_couple_handling.c
@@ -0,0 +1,70 @@
+/* Helper function for high degree Toom-Cook algorithms.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:
+     {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}
+     {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}
+   Finally recompose them obtaining:
+     {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}
+*/
+void
+mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,
+                         int nsign, mp_size_t off, int ps, int ns)
+{
+  if (nsign) {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+    mpn_rsh1sub_n (np, pp, np, n);
+#else
+    mpn_sub_n (np, pp, np, n);
+    mpn_rshift (np, np, n, 1);
+#endif
+  } else {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+    mpn_rsh1add_n (np, pp, np, n);
+#else
+    mpn_add_n (np, pp, np, n);
+    mpn_rshift (np, np, n, 1);
+#endif
+  }
+
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  if (ps == 1)
+    mpn_rsh1sub_n (pp, pp, np, n);
+  else
+#endif
+  {
+    mpn_sub_n (pp, pp, np, n);
+    if (ps > 0)
+      mpn_rshift (pp, pp, n, ps);
+  }
+  if (ns > 0)
+    mpn_rshift (np, np, n, ns);
+  pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);
+  ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );
+}
diff --git a/mpn/generic/toom_eval_dgr3_pm1.c b/mpn/generic/toom_eval_dgr3_pm1.c

new file mode 100644 (file)

index 0000000..6739506
--- /dev/null
+++ b/mpn/generic/toom_eval_dgr3_pm1.c
@@ -0,0 +1,62 @@
+/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,
+                       mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+  int neg;
+
+  ASSERT (x3n > 0);
+  ASSERT (x3n <= n);
+
+  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+  tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);
+
+  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+  else
+    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm1, tp, xp1, n + 1);
+  else
+    mpn_sub_n (xm1, xp1, tp, n + 1);
+
+  mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+  ASSERT (xp1[n] <= 3);
+  ASSERT (xm1[n] <= 1);
+
+  return neg;
+}
diff --git a/mpn/generic/toom_eval_dgr3_pm2.c b/mpn/generic/toom_eval_dgr3_pm2.c

new file mode 100644 (file)

index 0000000..0148600
--- /dev/null
+++ b/mpn/generic/toom_eval_dgr3_pm2.c
@@ -0,0 +1,87 @@
+/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Needs n+1 limbs of temporary storage. */
+int
+mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
+                       mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
+{
+  mp_limb_t cy;
+  int neg;
+
+  ASSERT (x3n > 0);
+  ASSERT (x3n <= n);
+
+  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
+#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
+#if HAVE_NATIVE_mpn_addlsh2_n
+  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);
+
+  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
+#else /* HAVE_NATIVE_mpn_addlsh_n */
+  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);
+
+  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
+#endif
+  if (x3n < n)
+    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
+  tp[n] = cy;
+#else
+  cy = mpn_lshift (tp, xp + 2*n, n, 2);
+  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);
+
+  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
+  if (x3n < n)
+    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
+  else
+    tp[n] += mpn_add_n (tp, xp + n, tp, n);
+#endif
+  mpn_lshift (tp, tp, n+1, 1);
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif
+
+  ASSERT (xp2[n] < 15);
+  ASSERT (xm2[n] < 10);
+
+  return neg;
+}
diff --git a/mpn/generic/toom_eval_pm1.c b/mpn/generic/toom_eval_pm1.c

new file mode 100644 (file)

index 0000000..1c63efd
--- /dev/null
+++ b/mpn/generic/toom_eval_pm1.c
@@ -0,0 +1,79 @@
+/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
+int
+mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
+                  mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+  unsigned i;
+  int neg;
+
+  ASSERT (k >= 4);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
+  for (i = 4; i < k; i += 2)
+    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
+
+  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
+  for (i = 5; i < k; i += 2)
+    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
+
+  if (k & 1)
+    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
+  else
+    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
+
+  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
+  else
+    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
+#else
+  if (neg)
+    mpn_sub_n (xm1, tp, xp1, n + 1);
+  else
+    mpn_sub_n (xm1, xp1, tp, n + 1);
+
+  mpn_add_n (xp1, xp1, tp, n + 1);
+#endif
+
+  ASSERT (xp1[n] <= k);
+  ASSERT (xm1[n] <= k/2 + 1);
+
+  return neg;
+}
diff --git a/mpn/generic/toom_eval_pm2.c b/mpn/generic/toom_eval_pm2.c

new file mode 100644 (file)

index 0000000..7795b0b
--- /dev/null
+++ b/mpn/generic/toom_eval_pm2.c
@@ -0,0 +1,120 @@
+/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
+
+   Contributed to the GNU project by Niels Möller and Marco Bodrato
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
+   can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
+#if HAVE_NATIVE_mpn_addlsh2_n
+#define DO_addlsh2(d, a, b, n, cy)     \
+do {                                   \
+  (cy) <<= 2;                          \
+  (cy) += mpn_addlsh2_n(d, a, b, n);   \
+} while (0)
+#else
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_addlsh2(d, a, b, n, cy)     \
+do {                                   \
+  (cy) <<= 2;                          \
+  (cy) += mpn_addlsh_n(d, a, b, n, 2); \
+} while (0)
+#else
+/* The following is not a general substitute for addlsh2.
+   It is correct if d == b, but it is not if d == a.   */
+#define DO_addlsh2(d, a, b, n, cy)     \
+do {                                   \
+  (cy) <<= 2;                          \
+  (cy) += mpn_lshift(d, b, n, 2);      \
+  (cy) += mpn_add_n(d, d, a, n);       \
+} while (0)
+#endif
+#endif
+
+/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
+   points +2 and -2. */
+int
+mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
+                  mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{
+  int i;
+  int neg;
+  mp_limb_t cy;
+
+  ASSERT (k >= 3);
+  ASSERT (k < GMP_NUMB_BITS);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+  cy = 0;
+  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
+  if (hn != n)
+    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
+  for (i = k - 4; i >= 0; i -= 2)
+    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
+  xp2[n] = cy;
+
+  k--;
+
+  cy = 0;
+  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
+  for (i = k - 4; i >= 0; i -= 2)
+    DO_addlsh2 (tp, xp + i * n, tp, n, cy);
+  tp[n] = cy;
+
+  if (k & 1)
+    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
+  else
+    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  ASSERT (xp2[n] < (1<<(k+2))-1);
+  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
+
+  neg ^= ((k & 1) - 1);
+
+  return neg;
+}
+
+#undef DO_addlsh2
diff --git a/mpn/generic/toom_eval_pm2exp.c b/mpn/generic/toom_eval_pm2exp.c

new file mode 100644 (file)

index 0000000..26c1edb
--- /dev/null
+++ b/mpn/generic/toom_eval_pm2exp.c
@@ -0,0 +1,117 @@
+/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
+int
+mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
+                     mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
+                     mp_ptr tp)
+{
+  unsigned i;
+  int neg;
+#if HAVE_NATIVE_mpn_addlsh_n
+  mp_limb_t cy;
+#endif
+
+  ASSERT (k >= 3);
+  ASSERT (shift*k < GMP_NUMB_BITS);
+
+  ASSERT (hn > 0);
+  ASSERT (hn <= n);
+
+  /* The degree k is also the number of full-size coefficients, so
+   * that last coefficient, of size hn, starts at xp + k*n. */
+
+#if HAVE_NATIVE_mpn_addlsh_n
+  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
+  for (i = 4; i < k; i += 2)
+    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);
+
+  tp[n] = mpn_lshift (tp, xp+n, n, shift);
+  for (i = 3; i < k; i+= 2)
+    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);
+
+  if (k & 1)
+    {
+      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
+      MPN_INCR_U (tp + hn, n+1 - hn, cy);
+    }
+  else
+    {
+      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
+      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
+    }
+
+#else /* !HAVE_NATIVE_mpn_addlsh_n */
+  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
+  xp2[n] += mpn_add_n (xp2, xp, tp, n);
+  for (i = 4; i < k; i += 2)
+    {
+      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
+      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
+    }
+
+  tp[n] = mpn_lshift (tp, xp+n, n, shift);
+  for (i = 3; i < k; i+= 2)
+    {
+      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
+      tp[n] += mpn_add_n (tp, tp, xm2, n);
+    }
+
+  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
+  if (k & 1)
+    mpn_add (tp, tp, n+1, xm2, hn+1);
+  else
+    mpn_add (xp2, xp2, n+1, xm2, hn+1);
+#endif /* !HAVE_NATIVE_mpn_addlsh_n */
+
+  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
+  else
+    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (xm2, tp, xp2, n + 1);
+  else
+    mpn_sub_n (xm2, xp2, tp, n + 1);
+
+  mpn_add_n (xp2, xp2, tp, n + 1);
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
+  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
+         xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
+  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
+         xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));
+
+  return neg;
+}
diff --git a/mpn/generic/toom_eval_pm2rexp.c b/mpn/generic/toom_eval_pm2rexp.c

new file mode 100644 (file)

index 0000000..ecbe9a7
--- /dev/null
+++ b/mpn/generic/toom_eval_pm2rexp.c
@@ -0,0 +1,91 @@
+/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k
+
+   Contributed to the GNU project by Marco Bodrato
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+/* Evaluates a polynomial of degree k >= 3. */
+int
+mpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,
+                     unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,
+                     unsigned int s, mp_ptr ws)
+{
+  unsigned int i;
+  int neg;
+  /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/
+  ASSERT (n >= t);
+  ASSERT (s != 0); /* or _eval_pm1 should be used */
+  ASSERT (q > 1);
+  ASSERT (s*q < GMP_NUMB_BITS);
+  rp[n] = mpn_lshift(rp, ap, n, s*q);
+  ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));
+  if( (q & 1) != 0) {
+    ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));
+    rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);
+  } else {
+    ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));
+  }
+  for(i=2; i<q-1; i++)
+  {
+    rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);
+    i++;
+    ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);
+  };
+
+  neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (neg)
+    mpn_add_n_sub_n (rp, rm, ws, rp, n + 1);
+  else
+    mpn_add_n_sub_n (rp, rm, rp, ws, n + 1);
+#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
+  if (neg)
+    mpn_sub_n (rm, ws, rp, n + 1);
+  else
+    mpn_sub_n (rm, rp, ws, n + 1);
+
+  ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));
+#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
+
+  return neg;
+}
diff --git a/mpn/generic/toom_interpolate_12pts.c b/mpn/generic/toom_interpolate_12pts.c

new file mode 100644 (file)

index 0000000..57becc3
--- /dev/null
+++ b/mpn/generic/toom_interpolate_12pts.c
@@ -0,0 +1,350 @@
+/* Interpolaton for the algorithm Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)                              \
+do {                                                                   \
+  mp_limb_t __cy;                                                      \
+  MPN_DECR_U (dst, nd, src[0] >> s);                                   \
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);        \
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);                                \
+} while (0)
+#endif
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented: Both sublsh_n(,,,20) should be corrected.
+#endif
+
+#if GMP_NUMB_BITS < 16
+#error Not implemented: divexact_by42525 needs splitting.
+#endif
+
+#if GMP_NUMB_BITS < 12
+#error Not implemented: Hard to adapt...
+#endif
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+  /* FIXME: find some more general expressions for 2835^-1, 42525^-1 */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835  (GMP_NUMB_MASK &         CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &         CNST_LIMB(0x9F314C35))
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835  (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,2)
+#else
+#define mpn_divexact_by9x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,0)
+#else
+#define mpn_divexact_by42525(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525))
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,2)
+#else
+#define mpn_divexact_by2835x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<2)
+#endif
+#endif
+
+/* Interpolation for Toom-6.5 (or Toom-6), using the evaluation
+   points: infinity(6.5 only), +-4, +-2, +-1, +-1/4, +-1/2, 0. More precisely,
+   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+   degree 11 (or 10), given the 12 (rsp. 11) values:
+
+     r0 = limit at infinity of f(x) / x^7,
+     r1 = f(4),f(-4),
+     r2 = f(2),f(-2),
+     r3 = f(1),f(-1),
+     r4 = f(1/4),f(-1/4),
+     r5 = f(1/2),f(-1/2),
+     r6 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 6*n)}.
+   At entry, r6 is stored at {pp, 2n},
+   r4 is stored at {pp + 3n, 3n + 1}.
+   r2 is stored at {pp + 7n, 3n + 1}.
+   r0 is stored at {pp +11n, spt}.
+
+   The other values are 3n+1 limbs each (with most significant limbs small).
+
+   Negative intermediate results are stored two-complemented.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
+                       mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+  mp_limb_t cy;
+  mp_size_t n3;
+  mp_size_t n3p1;
+  n3 = 3 * n;
+  n3p1 = n3 + 1;
+
+#define   r4    (pp + n3)                      /* 3n+1 */
+#define   r2    (pp + 7 * n)                   /* 3n+1 */
+#define   r0    (pp +11 * n)                   /* s+t <= 2*n */
+
+  /******************************* interpolation *****************************/
+  if (half != 0) {
+    cy = mpn_sub_n (r3, r3, r0, spt);
+    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+
+    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
+    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);
+
+    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
+    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
+  };
+
+  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
+  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
+#else
+  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
+  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
+  MP_PTR_SWAP(r1, wsi);
+#endif
+
+  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
+  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+  MP_PTR_SWAP(r5, wsi);
+#endif
+
+  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
+#else
+  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
+  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
+#endif
+  /* A division by 2835x4 followsi. Warning: the operand can be negative! */
+  mpn_divexact_by2835x4(r4, r4, n3p1);
+  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
+  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
+#endif
+  mpn_divexact_by255(r5, r5, n3p1);
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));
+
+#if AORSMUL_FASTER_3AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
+#else
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
+#endif
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
+  mpn_divexact_by42525(r1, r1, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
+#else
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
+  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
+#endif
+  mpn_divexact_by9x4(r2, r2, n3p1);
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));
+
+  mpn_sub_n (r4, r2, r4, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));
+
+  mpn_add_n (r5, r5, r1, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+
+  /* last interpolation steps... */
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
+  /* ... could be mixed with recomposition
+       ||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
+  */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+
+    summation scheme for remaining operations:
+    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
+       ||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r5, n);
+  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);
+
+  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
+  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
+  if (half) {
+    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+    if (LIKELY (spt > n)) {
+      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
+      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
+    }
+#else
+    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+    if (LIKELY (spt > n)) {
+      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
+      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
+    }
+#endif
+  } else {
+    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
+  }
+
+#undef   r0
+#undef   r2
+#undef   r4
+}
diff --git a/mpn/generic/toom_interpolate_16pts.c b/mpn/generic/toom_interpolate_16pts.c

new file mode 100644 (file)

index 0000000..36ed15d
--- /dev/null
+++ b/mpn/generic/toom_interpolate_16pts.c
@@ -0,0 +1,516 @@
+/* Interpolaton for the algorithm Toom-Cook 8.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS < 29
+#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
+#endif
+
+#if GMP_NUMB_BITS < 28
+#error Not implemented: divexact_by188513325 and _by182712915 will not work.
+#endif
+
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_sub_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh_n
+#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1 && 0
+  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift(ws,src,n,s);
+  return    __cy + mpn_add_n(dst,dst,ws,n);
+#endif
+}
+#endif
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
+#else
+/* FIXME: This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)                              \
+do {                                                                   \
+  mp_limb_t __cy;                                                      \
+  MPN_DECR_U (dst, nd, src[0] >> s);                                   \
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);        \
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);                                \
+} while (0)
+#endif
+
+
+/* FIXME: tuneup should decide the best variant */
+#ifndef AORSMUL_FASTER_AORS_AORSLSH
+#define AORSMUL_FASTER_AORS_AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_AORS_2AORSLSH
+#define AORSMUL_FASTER_AORS_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_2AORSLSH
+#define AORSMUL_FASTER_2AORSLSH 1
+#endif
+#ifndef AORSMUL_FASTER_3AORSLSH
+#define AORSMUL_FASTER_3AORSLSH 1
+#endif
+
+#if GMP_NUMB_BITS < 43
+#define BIT_CORRECTION 1
+#define CORRECTION_BITS GMP_NUMB_BITS
+#else
+#define BIT_CORRECTION 0
+#define CORRECTION_BITS 0
+#endif
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_255 \
+  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
+
+  /* FIXME: find some more general expressions for inverses */
+#if GMP_LIMB_BITS == 32
+#define BINVERT_2835  (GMP_NUMB_MASK &         CNST_LIMB(0x53E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK &         CNST_LIMB(0x9F314C35))
+#define BINVERT_182712915 (GMP_NUMB_MASK &     CNST_LIMB(0x550659DB))
+#define BINVERT_188513325 (GMP_NUMB_MASK &     CNST_LIMB(0xFBC333A5))
+#define BINVERT_255x182712915L (GMP_NUMB_MASK &        CNST_LIMB(0x6FC4CB25))
+#define BINVERT_255x188513325L (GMP_NUMB_MASK &        CNST_LIMB(0x6864275B))
+#if GMP_NAIL_BITS == 0
+#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
+#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
+#else /* GMP_NAIL_BITS != 0 */
+#define BINVERT_255x182712915H \
+  (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
+#define BINVERT_255x188513325H \
+  (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
+#endif
+#else
+#if GMP_LIMB_BITS == 64
+#define BINVERT_2835  (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B))
+#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35))
+#define BINVERT_255x182712915  (GMP_NUMB_MASK &        CNST_LIMB(0x1B649A076FC4CB25))
+#define BINVERT_255x188513325  (GMP_NUMB_MASK &        CNST_LIMB(0x06DB993A6864275B))
+#endif
+#endif
+
+#ifndef mpn_divexact_by255
+#if GMP_NUMB_BITS % 8 == 0
+#define mpn_divexact_by255(dst,src,size) \
+  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
+#else
+#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
+#endif
+#endif
+#endif
+
+#ifndef mpn_divexact_by255x4
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
+#else
+#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
+#else
+#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by42525x16
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
+#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
+#else
+#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
+#endif
+#endif
+
+#ifndef mpn_divexact_by2835x64
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
+#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
+#else
+#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
+#endif
+#endif
+
+#ifndef  mpn_divexact_by255x182712915
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
+/* FIXME: use mpn_bdiv_q_2_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
+#define mpn_divexact_by255x182712915(dst,src,size)                             \
+  do {                                                                         \
+    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);   \
+    mpn_divexact_by255(dst,dst,size);                                          \
+  } while(0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size)     \
+  do {                                                 \
+    mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \
+    mpn_divexact_by255(dst,dst,size);                  \
+  } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
+#define mpn_divexact_by255x182712915(dst,src,size) \
+  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
+#else
+#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+#ifndef  mpn_divexact_by255x188513325
+#if GMP_NUMB_BITS < 36
+#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
+/* FIXME: use mpn_bdiv_q_1_pi2 */
+#endif
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
+#define mpn_divexact_by255x188513325(dst,src,size)                     \
+  do {                                                                 \
+    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);   \
+    mpn_divexact_by255(dst,dst,size);                                  \
+  } while(0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size)     \
+  do {                                                 \
+    mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \
+    mpn_divexact_by255(dst,dst,size);                  \
+  } while(0)
+#endif
+#else /* GMP_NUMB_BITS > 35 */
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
+#define mpn_divexact_by255x188513325(dst,src,size) \
+  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
+#else
+#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
+#endif
+#endif /* GMP_NUMB_BITS >?< 36 */
+#endif
+
+/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
+   points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
+   +-1/8, 0. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
+   14), given the 16 (rsp. 15) values:
+
+     r0 = limit at infinity of f(x) / x^7,
+     r1 = f(8),f(-8),
+     r2 = f(4),f(-4),
+     r3 = f(2),f(-2),
+     r4 = f(1),f(-1),
+     r5 = f(1/4),f(-1/4),
+     r6 = f(1/2),f(-1/2),
+     r7 = f(1/8),f(-1/8),
+     r8 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 8*n)}.
+   At entry, r8 is stored at {pp, 2n},
+   r6 is stored at {pp + 3n, 3n + 1}.
+   r4 is stored at {pp + 7n, 3n + 1}.
+   r2 is stored at {pp +11n, 3n + 1}.
+   r0 is stored at {pp +15n, spt}.
+
+   The other values are 3n+1 limbs each (with most significant limbs small).
+
+   Negative intermediate results are stored two-complemented.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
+                       mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
+{
+  mp_limb_t cy;
+  mp_size_t n3;
+  mp_size_t n3p1;
+  n3 = 3 * n;
+  n3p1 = n3 + 1;
+
+#define   r6    (pp + n3)                      /* 3n+1 */
+#define   r4    (pp + 7 * n)                   /* 3n+1 */
+#define   r2    (pp +11 * n)                   /* 3n+1 */
+#define   r0    (pp +15 * n)                   /* s+t <= 2*n */
+
+  ASSERT( spt <= 2 * n );
+  /******************************* interpolation *****************************/
+  if( half != 0) {
+    cy = mpn_sub_n (r4, r4, r0, spt);
+    MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
+
+    cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
+    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
+
+    cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
+    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
+    DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
+
+    cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+    cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
+                   n3p1 - spt - BIT_CORRECTION, cy);
+    ASSERT (BIT_CORRECTION > 0 || cy == 0);
+    /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
+    cy = r7[n3p1];
+    r7[n3p1] = 0x80;
+#else
+    MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
+#endif
+    DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
+#if BIT_CORRECTION
+    /* FIXME: assumes r7[n3p1] is writable. */
+    ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
+    r7[n3p1] = cy;
+#endif
+  };
+
+  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
+  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
+#else
+  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
+  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
+  MP_PTR_SWAP(r5, wsi);
+#endif
+
+  r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
+  DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
+#else
+  ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
+  mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
+  MP_PTR_SWAP(r3, wsi);
+#endif
+
+  cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
+#if BIT_CORRECTION
+  MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
+  cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
+  cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
+  ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
+#else
+  r7[n3] -= cy;
+  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
+#endif
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
+#else
+  mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
+  mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
+  MP_PTR_SWAP(r7, wsi);
+#endif
+
+  r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
+
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
+#endif
+
+  mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
+#if AORSMUL_FASTER_3AORSLSH
+  mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
+#else
+  DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
+  DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
+#endif
+  mpn_divexact_by255x188513325(r7, r7, n3p1);
+
+  mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
+  /* A division by 2835x64 followsi. Warning: the operand can be negative! */
+  mpn_divexact_by2835x64(r5, r5, n3p1);
+  if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
+    r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
+
+#if AORSMUL_FASTER_AORS_AORSLSH
+  mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
+#else
+  mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
+  DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
+#endif
+#if AORSMUL_FASTER_2AORSLSH
+  mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
+#else
+  DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
+  DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
+#endif
+  /* A division by 255x4 followsi. Warning: the operand can be negative! */
+  mpn_divexact_by255x4(r6, r6, n3p1);
+  if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
+    r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
+
+  /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
+  DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
+  mpn_submul_1 (r1, r2, n3p1, 1428);
+  mpn_submul_1 (r1, r3, n3p1, 112896);
+  mpn_divexact_by255x182712915(r1, r1, n3p1);
+
+  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
+  mpn_divexact_by42525x16(r2, r2, n3p1);
+
+#if AORSMUL_FASTER_AORS_2AORSLSH
+  ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
+#else
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
+  ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
+#endif
+  ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
+  mpn_divexact_by9x16(r3, r3, n3p1);
+
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
+  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
+
+  mpn_add_n (r6, r2, r6, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
+  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
+
+  mpn_sub_n (r5, r3, r5, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
+
+  mpn_add_n (r7, r1, r7, n3p1);
+  ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
+  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
+
+  /* last interpolation steps... */
+  /* ... could be mixed with recomposition
+       ||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
+  */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+
+    summation scheme for remaining operations:
+    |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
+    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
+       ||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r7, n);
+  cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r7 + 2 * n, n + 1, cy);
+  cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
+
+  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
+  cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
+  cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
+
+  pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
+  cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
+#else
+  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
+  cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
+#endif
+  MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
+
+  pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
+  if ( half ) {
+    cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
+#if HAVE_NATIVE_mpn_add_nc
+    if(LIKELY(spt > n)) {
+      cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
+      MPN_INCR_U (pp + 16 * n, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
+    }
+#else
+    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
+    if(LIKELY(spt > n)) {
+      cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
+      MPN_INCR_U (pp + 16 * n, spt - n, cy);
+    } else {
+      ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
+    }
+#endif
+  } else {
+    ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
+  }
+
+#undef   r0
+#undef   r2
+#undef   r4
+#undef   r6
+}
diff --git a/mpn/generic/toom_interpolate_5pts.c b/mpn/generic/toom_interpolate_5pts.c

new file mode 100644 (file)

index 0000000..1806127
--- /dev/null
+++ b/mpn/generic/toom_interpolate_5pts.c
@@ -0,0 +1,189 @@
+/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
+
+   Contributed to the GNU project by Robert Harley.
+   Improvements by Paul Zimmermann and Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
+                          mp_size_t k, mp_size_t twor, int sa,
+                          mp_limb_t vinf0)
+{
+  mp_limb_t cy, saved;
+  mp_size_t twok;
+  mp_size_t kk1;
+  mp_ptr c1, v1, c3, vinf;
+
+  twok = k + k;
+  kk1 = twok + 1;
+
+  c1 = c  + k;
+  v1 = c1 + k;
+  c3 = v1 + k;
+  vinf = c3 + k;
+
+#define v0 (c)
+  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
+     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
+  */
+  if (sa)
+    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
+  else
+    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
+
+  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
+                                                     /* (5 3 1 1 0)*/
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
+
+  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+     tm1 >= 0                                         (0  1 0  1 0)
+     No carry comes out from {v1, kk1} +/- {vm1, kk1},
+     and the division by two is exact.
+     If (sa!=0) the sign of vm1 is negative */
+  if (sa)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (vm1, v1, vm1, kk1);
+#else
+      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
+      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
+#else
+      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
+      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+#endif
+    }
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
+     t1 >= 0
+  */
+  vinf[0] -= mpn_sub_n (v1, v1, c, twok);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
+
+  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
+     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
+  */
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (v2, v2, v1, kk1);
+#else
+  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
+  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
+#endif
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
+     result is v1 >= 0
+  */
+  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
+
+  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
+  cy = mpn_add_n (c1, c1, vm1, kk1);
+  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
+  /* Memory allocated for vm1 is now free, it can be recycled ...*/
+
+  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
+     result is v2 >= 0 */
+  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
+  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
+#ifdef HAVE_NATIVE_mpn_sublsh1_n
+  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
+#else
+  /* Overwrite unused vm1 */
+  cy = mpn_lshift (vm1, vinf, twor, 1);
+  cy += mpn_sub_n (v2, v2, vm1, twor);
+#endif
+  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
+
+  /* Current matrix is
+     [1 0 0 0 0; vinf
+      0 1 0 0 0; v2
+      1 0 1 0 0; v1
+      0 1 0 1 0; vm1
+      0 0 0 0 1] v0
+     Some vaues already are in-place (we added vm1 in the correct position)
+     | vinf|  v1 |  v0 |
+             | vm1 |
+     One still is in a separated area
+       | +v2 |
+     We have to compute v1-=vinf; vm1 -= v2,
+          |-vinf|
+             | -v2 |
+     Carefully reordering operations we can avoid to compute twice the sum
+     of the high half of v2 plus the low half of vinf.
+  */
+
+  /* Add the high half of t2 in {vinf} */
+  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
+    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
+    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+  } else { /* triggered only by very unbalanced cases like
+             (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
+    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
+  }
+  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
+     result is >= 0 */
+  /* Side effect: we also subtracted (high half) vm1 -= v2 */
+  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
+  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
+  vinf[0] = saved;
+  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
+
+  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+     Operate only on the low half.
+  */
+  cy = mpn_sub_n (c1, c1, v2, k);
+  MPN_DECR_U (v1, kk1, cy);
+
+  /********************* Beginning the final phase **********************/
+
+  /* Most of the recomposition was done */
+
+  /* add t2 in {c+3k, ...}, but only the low half */
+  cy = mpn_add_n (c3, c3, v2, k);
+  vinf[0] += cy;
+  ASSERT(vinf[0] >= cy); /* No carry */
+  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+
+#undef v0
+}
diff --git a/mpn/generic/toom_interpolate_6pts.c b/mpn/generic/toom_interpolate_6pts.c

new file mode 100644 (file)

index 0000000..fc9ee23
--- /dev/null
+++ b/mpn/generic/toom_interpolate_6pts.c
@@ -0,0 +1,229 @@
+/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+/* Interpolation for Toom-3.5, using the evaluation points: infinity,
+   1, -1, 2, -2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
+   six values
+
+     w5 = f(0),
+     w4 = f(-1),
+     w3 = f(1)
+     w2 = f(-2),
+     w1 = f(2),
+     w0 = limit at infinity of f(x) / x^5,
+
+   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
+   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
+   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-2) may be negative, signs
+   determined by the flag bits. All intermediate results are positive.
+   Inputs are destroyed.
+
+   Interpolation sequence was taken from the paper: "Integer and
+   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
+   Some slight variations were introduced: adaptation to "gmp
+   instruction set", and a final saving of an operation by interlacing
+   interpolation and recomposition phases.
+*/
+
+void
+mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
+                          mp_ptr w4, mp_ptr w2, mp_ptr w1,
+                          mp_size_t w0n)
+{
+  mp_limb_t cy;
+  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
+  mp_limb_t cy4, cy6, embankment;
+
+  ASSERT( n > 0 );
+  ASSERT( 2*n >= w0n && w0n > 0 );
+
+#define w5  pp                                 /* 2n   */
+#define w3  (pp + 2 * n)                       /* 2n+1 */
+#define w0  (pp + 5 * n)                       /* w0n  */
+
+  /* Interpolate with sequence:
+     W2 =(W1 - W2)>>2
+     W1 =(W1 - W5)>>1
+     W1 =(W1 - W2)>>1
+     W4 =(W3 - W4)>>1
+     W2 =(W2 - W4)/3
+     W3 = W3 - W4 - W5
+     W1 =(W1 - W3)/3
+     // Last steps are mixed with recomposition...
+     W2 = W2 - W0<<2
+     W4 = W4 - W2
+     W3 = W3 - W1
+     W2 = W2 - W0
+  */
+
+  /* W2 =(W1 - W2)>>2 */
+  if (flags & toom6_vm2_neg)
+    mpn_add_n (w2, w1, w2, 2 * n + 1);
+  else
+    mpn_sub_n (w2, w1, w2, 2 * n + 1);
+  mpn_rshift (w2, w2, 2 * n + 1, 2);
+
+  /* W1 =(W1 - W5)>>1 */
+  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
+  mpn_rshift (w1, w1, 2 * n + 1, 1);
+
+  /* W1 =(W1 - W2)>>1 */
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
+#else
+  mpn_sub_n (w1, w1, w2, 2 * n + 1);
+  mpn_rshift (w1, w1, 2 * n + 1, 1);
+#endif
+
+  /* W4 =(W3 - W4)>>1 */
+  if (flags & toom6_vm1_neg)
+    {
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
+#else
+      mpn_add_n (w4, w3, w4, 2 * n + 1);
+      mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
+#else
+      mpn_sub_n (w4, w3, w4, 2 * n + 1);
+      mpn_rshift (w4, w4, 2 * n + 1, 1);
+#endif
+    }
+
+  /* W2 =(W2 - W4)/3 */
+  mpn_sub_n (w2, w2, w4, 2 * n + 1);
+  mpn_divexact_by3 (w2, w2, 2 * n + 1);
+
+  /* W3 = W3 - W4 - W5 */
+  mpn_sub_n (w3, w3, w4, 2 * n + 1);
+  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
+
+  /* W1 =(W1 - W3)/3 */
+  mpn_sub_n (w1, w1, w3, 2 * n + 1);
+  mpn_divexact_by3 (w1, w1, 2 * n + 1);
+
+  /*
+    [1 0 0 0 0 0;
+     0 1 0 0 0 0;
+     1 0 1 0 0 0;
+     0 1 0 1 0 0;
+     1 0 1 0 1 0;
+     0 0 0 0 0 1]
+
+    pp[] prior to operations:
+     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+
+    summation scheme for remaining operations:
+     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
+     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
+                                   || H w4  | L w4  |
+                   || H w2  | L w2  |
+           || H w1  | L w1  |
+                           ||-H w1  |-L w1  |
+                    |-H w0  |-L w0 ||-H w2  |-L w2  |
+  */
+  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
+  MPN_INCR_U (pp + 3 * n + 1, n, cy);
+
+  /* W2 -= W0<<2 */
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n
+#if HAVE_NATIVE_mpn_sublsh2_n
+  cy = mpn_sublsh2_n(w2, w2, w0, w0n);
+#else
+  cy = mpn_sublsh_n(w2, w2, w0, w0n, 2);
+#endif
+#else
+  /* {W4,2*n+1} is now free and can be overwritten. */
+  cy = mpn_lshift(w4, w0, w0n, 2);
+  cy+= mpn_sub_n(w2, w2, w4, w0n);
+#endif
+  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
+
+  /* W4L = W4L - W2L */
+  cy = mpn_sub_n (pp + n, pp + n, w2, n);
+  MPN_DECR_U (w3, 2 * n + 1, cy);
+
+  /* W3H = W3H + W2L */
+  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
+  /* W1L + W2H */
+  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
+  MPN_INCR_U (w1 + n, n + 1, cy);
+
+  /* W0 = W0 + W1H */
+  if (LIKELY (w0n > n))
+    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
+  else
+    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
+
+  /*
+    summation scheme for the next operation:
+     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
+     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
+                    ...-w0___|-w1_w2 |
+  */
+  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
+  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
+
+  /* embankment is a "dirty trick" to avoid carry/borrow propagation
+     beyond allocated memory */
+  embankment = w0[w0n - 1] - 1;
+  w0[w0n - 1] = 1;
+  if (LIKELY (w0n > n)) {
+    if ( LIKELY(cy4 > cy6) )
+      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
+    else
+      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
+    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
+    MPN_INCR_U (w0 + n, w0n - n, cy6);
+  } else {
+    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
+    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
+  }
+  w0[w0n - 1] += embankment;
+
+#undef w5
+#undef w3
+#undef w0
+
+}
diff --git a/mpn/generic/toom_interpolate_7pts.c b/mpn/generic/toom_interpolate_7pts.c

new file mode 100644 (file)

index 0000000..95a2194
--- /dev/null
+++ b/mpn/generic/toom_interpolate_7pts.c
@@ -0,0 +1,255 @@
+/* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
+
+   Contributed to the GNU project by Niels Möller.
+   Improvements by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_9 \
+  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
+
+#define BINVERT_15 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15))
+
+/* For the various mpn_divexact_byN here, fall back to using either
+   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
+   many faster if it is native.  For now, since mpn_divexact_1 is native on
+   several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
+   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
+
+/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by9
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)
+#else
+#define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
+#endif
+#endif
+
+#ifndef mpn_divexact_by15
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)
+#else
+#define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
+#endif
+#endif
+
+/* Interpolation for toom4, using the evaluation points 0, infinity,
+   1, -1, 2, -2, 1/2. More precisely, we want to compute
+   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
+   seven values
+
+     w0 = f(0),
+     w1 = f(-2),
+     w2 = f(1),
+     w3 = f(-1),
+     w4 = f(2)
+     w5 = 64 * f(1/2)
+     w6 = limit at infinity of f(x) / x^6,
+
+   The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
+   w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
+   w6n }. The other values are 2n + 1 limbs each (with most
+   significant limbs small). f(-1) and f(-1/2) may be negative, signs
+   determined by the flag bits. Inputs are destroyed.
+
+   Needs (2*n + 1) limbs of temporary storage.
+*/
+
+void
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
+                          mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
+                          mp_size_t w6n, mp_ptr tp)
+{
+  mp_size_t m;
+  mp_limb_t cy;
+
+  m = 2*n + 1;
+#define w0 rp
+#define w2 (rp + 2*n)
+#define w6 (rp + 6*n)
+
+  ASSERT (w6n > 0);
+  ASSERT (w6n <= 2*n);
+
+  /* Using formulas similar to Marco Bodrato's
+
+     W5 = W5 + W4
+     W1 =(W4 - W1)/2
+     W4 = W4 - W0
+     W4 =(W4 - W1)/4 - W6*16
+     W3 =(W2 - W3)/2
+     W2 = W2 - W3
+
+     W5 = W5 - W2*65      May be negative.
+     W2 = W2 - W6 - W0
+     W5 =(W5 + W2*45)/2   Now >= 0 again.
+     W4 =(W4 - W2)/3
+     W2 = W2 - W4
+
+     W1 = W5 - W1         May be negative.
+     W5 =(W5 - W3*8)/9
+     W3 = W3 - W5
+     W1 =(W1/15 + W5)/2   Now >= 0 again.
+     W5 = W5 - W1
+
+     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
+          W4 = f(2), W5 = f(1/2), W6 = f(oo),
+
+     Note that most intermediate results are positive; the ones that
+     may be negative are represented in two's complement. We must
+     never shift right a value that may be negative, since that would
+     invalidate the sign bit. On the other hand, divexact by odd
+     numbers work fine with two's complement.
+  */
+
+  mpn_add_n (w5, w5, w4, m);
+  if (flags & toom7_w1_neg)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w1, w1, w4, m);
+#else
+      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
+      mpn_rshift (w1, w1, m, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w1, w4, w1, m);
+#else
+      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
+      mpn_rshift (w1, w1, m, 1);
+#endif
+    }
+  mpn_sub (w4, w4, m, w0, 2*n);
+  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
+  mpn_rshift (w4, w4, m, 2); /* w4>=0 */
+
+  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
+  mpn_sub (w4, w4, m, tp, w6n+1);
+
+  if (flags & toom7_w3_neg)
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (w3, w3, w2, m);
+#else
+      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
+      mpn_rshift (w3, w3, m, 1);
+#endif
+    }
+  else
+    {
+#ifdef HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (w3, w2, w3, m);
+#else
+      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
+      mpn_rshift (w3, w3, m, 1);
+#endif
+    }
+
+  mpn_sub_n (w2, w2, w3, m);
+
+  mpn_submul_1 (w5, w2, m, 65);
+  mpn_sub (w2, w2, m, w6, w6n);
+  mpn_sub (w2, w2, m, w0, 2*n);
+
+  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
+  mpn_rshift (w5, w5, m, 1);
+  mpn_sub_n (w4, w4, w2, m);
+
+  mpn_divexact_by3 (w4, w4, m);
+  mpn_sub_n (w2, w2, w4, m);
+
+  mpn_sub_n (w1, w5, w1, m);
+  mpn_lshift (tp, w3, m, 3);
+  mpn_sub_n (w5, w5, tp, m);
+  mpn_divexact_by9 (w5, w5, m);
+  mpn_sub_n (w3, w3, w5, m);
+
+  mpn_divexact_by15 (w1, w1, m);
+  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
+  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
+  mpn_sub_n (w5, w5, w1, m);
+
+  /* These bounds are valid for the 4x4 polynomial product of toom44,
+   * and they are conservative for toom53 and toom62. */
+  ASSERT (w1[2*n] < 2);
+  ASSERT (w2[2*n] < 3);
+  ASSERT (w3[2*n] < 4);
+  ASSERT (w4[2*n] < 3);
+  ASSERT (w5[2*n] < 2);
+
+  /* Addition chain. Note carries and the 2n'th limbs that need to be
+   * added in.
+   *
+   * Special care is needed for w2[2n] and the corresponding carry,
+   * since the "simple" way of adding it all together would overwrite
+   * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
+   * the high half of w3 and the low half of w4.
+   *
+   *         7    6    5    4    3    2    1    0
+   *    |    |    |    |    |    |    |    |    |
+   *                  ||w3 (2n+1)|
+   *             ||w4 (2n+1)|
+   *        ||w5 (2n+1)|        ||w1 (2n+1)|
+   *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
+   *  -----------------------------------------------
+   *  r |    |    |    |    |    |    |    |    |
+   *        c7   c6   c5   c4   c3                 Carries to propagate
+   */
+
+  cy = mpn_add_n (rp + n, rp + n, w1, m);
+  MPN_INCR_U (w2 + n + 1, n , cy);
+  cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
+  MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
+  cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
+  MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
+  cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
+  MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
+  if (w6n > n + 1)
+    ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1));
+  else
+    {
+      ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
+#if WANT_ASSERT
+      {
+       mp_size_t i;
+       for (i = w6n; i <= n; i++)
+         ASSERT (w5[n + i] == 0);
+      }
+#endif
+    }
+}
diff --git a/mpn/generic/toom_interpolate_8pts.c b/mpn/generic/toom_interpolate_8pts.c

new file mode 100644 (file)

index 0000000..b11af25
--- /dev/null
+++ b/mpn/generic/toom_interpolate_8pts.c
@@ -0,0 +1,195 @@
+/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#define BINVERT_3 MODLIMB_INVERSE_3
+
+#define BINVERT_15 \
+  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
+
+#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
+
+#ifndef mpn_divexact_by3
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
+#else
+#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
+#endif
+#endif
+
+#ifndef mpn_divexact_by45
+#if GMP_NUMB_BITS % 12 == 0
+#define mpn_divexact_by45(dst,src,size) \
+  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
+#else
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
+#else
+#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
+#endif
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh_n
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s)
+#else
+static mp_limb_t
+DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
+{
+#if USE_MUL_1
+  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
+#else
+  mp_limb_t __cy;
+  __cy = mpn_lshift (ws,src,n,s);
+  return    __cy + mpn_sub_n (dst,dst,ws,n);
+#endif
+}
+#endif
+
+
+#if HAVE_NATIVE_mpn_subrsh
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
+#else
+/* This is not a correct definition, it assumes no carry */
+#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)                              \
+do {                                                                   \
+  mp_limb_t __cy;                                                      \
+  MPN_DECR_U (dst, nd, src[0] >> s);                                   \
+  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);        \
+  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);                                \
+} while (0)
+#endif
+
+/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
+   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
+   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
+   degree 7 (or 6), given the 8 (rsp. 7) values:
+
+     r1 = limit at infinity of f(x) / x^7,
+     r2 = f(4),
+     r3 = f(-4),
+     r4 = f(2),
+     r5 = f(-2),
+     r6 = f(1),
+     r7 = f(-1),
+     r8 = f(0).
+
+   All couples of the form f(n),f(-n) must be already mixed with
+   toom_couple_handling(f(n),...,f(-n),...)
+
+   The result is stored in {pp, spt + 7*n (or 6*n)}.
+   At entry, r8 is stored at {pp, 2n},
+   r5 is stored at {pp + 3n, 3n + 1}.
+
+   The other values are 2n+... limbs each (with most significant limbs small).
+
+   All intermediate results are positive.
+   Inputs are destroyed.
+*/
+
+void
+mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
+                          mp_ptr r3, mp_ptr r7,
+                          mp_size_t spt, mp_ptr ws)
+{
+  mp_limb_signed_t cy;
+  mp_ptr r5, r1;
+  r5 = (pp + 3 * n);                   /* 3n+1 */
+  r1 = (pp + 7 * n);                   /* spt */
+
+  /******************************* interpolation *****************************/
+
+  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
+  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
+  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
+
+  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
+  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
+  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
+
+  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
+  cy = mpn_sub_n (r7, r7, r1, spt);
+  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
+
+  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
+
+  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
+
+  mpn_divexact_by45 (r3, r3, 3 * n + 1);
+
+  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
+
+  ASSERT_NOCARRY(DO_mpn_sublsh_n (r5, r3, 3 * n + 1, 2, ws));
+
+  /* last interpolation steps... */
+  /* ... are mixed with recomposition */
+
+  /***************************** recomposition *******************************/
+  /*
+    pp[] prior to operations:
+     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
+
+    summation scheme for remaining operations:
+     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
+     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
+         ||_H r3|_M r3|_L*r3|
+                                 ||_H_r7|_M_r7|_L_r7|
+                     ||-H r3|-M r3|-L*r3|
+                                 ||-H*r5|-M_r5|-L_r5|
+  */
+
+  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
+  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
+  if (0 > cy)
+    MPN_DECR_U (r7 + n, 2*n + 1, 1);
+  else
+    MPN_INCR_U (r7 + n, 2*n + 1, cy);
+
+  cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
+  MPN_DECR_U (r7 + 2*n, n + 1, cy);
+
+  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
+  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
+  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
+  if (UNLIKELY(0 > cy))
+    MPN_DECR_U (r5 + n + 1, 2*n, 1);
+  else
+    MPN_INCR_U (r5 + n + 1, 2*n, cy);
+
+  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
+
+  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
+  MPN_INCR_U (r3 + 2*n, n + 1, cy);
+  cy = r3[3*n] + mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
+  if (LIKELY(spt != n))
+    MPN_INCR_U (pp + 8*n, spt - n, cy);
+  else
+    ASSERT (cy == 0);
+}
diff --git a/mpn/generic/trialdiv.c b/mpn/generic/trialdiv.c

new file mode 100644 (file)

index 0000000..c8f3c5d
--- /dev/null
+++ b/mpn/generic/trialdiv.c
@@ -0,0 +1,110 @@
+/* mpn_trialdiv -- find small factors of an mpn number using trial division.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.  */
+
+/*
+   Fast, division-free trial division for GMP.
+
+   This function will find the first (smallest) factor represented in
+   trialdivtab.h.  It does not stop the factoring effort just because it has
+   reached some sensible limit, such as the square root of the input number.
+
+   The caller can limit the factoring effort by passing NPRIMES.  The function
+   well then divide to *at least* that limit.  A position which only
+   mpn_trialdiv can make sense of is returned in the WHERE parameter.  It can
+   be used for restarting the factoring effort; the first call should pass 0
+   here.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+struct gmp_primes_dtab {
+  mp_limb_t binv;
+  mp_limb_t lim;
+};
+
+struct gmp_primes_ptab {
+  mp_limb_t ppp;       /* primes, multiplied together */
+  mp_limb_t cps[7];    /* ppp values pre-computed for mpn_mod_1s_4p */
+  unsigned int idx:24; /* index of  first primes in dtab */
+  unsigned int np :8;  /* number of primes related to this entry */
+};
+
+#define P(p,inv,lim) {inv,lim}
+
+#include "trialdivtab.h"
+
+#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
+
+/* Attempt to find a factor of T using trial division.
+   Input: A non-negative number T.
+   Output: non-zero if we found a factor, zero otherwise.  To get the actual
+   prime factor, compute the mod B inverse of the return value.  */
+/* FIXME: We could optimize out one of the outer loop conditions if we
+   had a final ptab entry with a huge nd field.  */
+mp_limb_t
+mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
+{
+  mp_limb_t ppp;
+  mp_limb_t *cps;
+  struct gmp_primes_dtab *dp;
+  long i, j, idx, np;
+  mp_limb_t r, q;
+
+  ASSERT (tn >= 1);
+
+  for (i = *where; i < PTAB_LINES; i++)
+    {
+      ppp = gmp_primes_ptab[i].ppp;
+      cps = gmp_primes_ptab[i].cps;
+
+#if __GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR < 4
+      if (tn < 4)
+       r = mpn_mod_1 (tp, tn, ppp); /* FIXME */
+      else
+#endif
+       r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+
+      idx = gmp_primes_ptab[i].idx;
+      np = gmp_primes_ptab[i].np;
+
+      /* Check divisibility by individual primes.  */
+      dp = &gmp_primes_dtab[idx] + np;
+      for (j = -np; j < 0; j++)
+       {
+         q = r * dp[j].binv;
+         if (q <= dp[j].lim)
+           {
+             *where = i;
+             return dp[j].binv;
+           }
+       }
+
+      nprimes -= np;
+      if (nprimes <= 0)
+       return 0;
+    }
+  return 0;
+}
diff --git a/mpn/generic/udiv_w_sdiv.c b/mpn/generic/udiv_w_sdiv.c

new file mode 100644 (file)

index 0000000..f1353c6
--- /dev/null
+++ b/mpn/generic/udiv_w_sdiv.c
@@ -0,0 +1,131 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+   division.
+
+   Contributed by Peter L. Montgomery.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY SAFE
+   TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE
+   GNU MP RELEASE.
+
+
+Copyright 1992, 1994, 1996, 2000, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
+{
+  mp_limb_t q, r;
+  mp_limb_t c0, c1, b1;
+
+  ASSERT (d != 0);
+  ASSERT (a1 < d);
+
+  if ((mp_limb_signed_t) d >= 0)
+    {
+      if (a1 < d - a1 - (a0 >> (GMP_LIMB_BITS - 1)))
+       {
+         /* dividend, divisor, and quotient are nonnegative */
+         sdiv_qrnnd (q, r, a1, a0, d);
+       }
+      else
+       {
+         /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+         sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (GMP_LIMB_BITS - 1));
+         /* Divide (c1*2^32 + c0) by d */
+         sdiv_qrnnd (q, r, c1, c0, d);
+         /* Add 2^31 to quotient */
+         q += (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+       }
+    }
+  else
+    {
+      b1 = d >> 1;                     /* d/2, between 2^30 and 2^31 - 1 */
+      c1 = a1 >> 1;                    /* A/2 */
+      c0 = (a1 << (GMP_LIMB_BITS - 1)) + (a0 >> 1);
+
+      if (a1 < b1)                     /* A < 2^32*b1, so A/2 < 2^31*b1 */
+       {
+         sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+         r = 2*r + (a0 & 1);           /* Remainder from A/(2*b1) */
+         if ((d & 1) != 0)
+           {
+             if (r >= q)
+               r = r - q;
+             else if (q - r <= d)
+               {
+                 r = r - q + d;
+                 q--;
+               }
+             else
+               {
+                 r = r - q + 2*d;
+                 q -= 2;
+               }
+           }
+       }
+      else if (c1 < b1)                        /* So 2^31 <= (A/2)/b1 < 2^32 */
+       {
+         c1 = (b1 - 1) - c1;
+         c0 = ~c0;                     /* logical NOT */
+
+         sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+         q = ~q;                       /* (A/2)/b1 */
+         r = (b1 - 1) - r;
+
+         r = 2*r + (a0 & 1);           /* A/(2*b1) */
+
+         if ((d & 1) != 0)
+           {
+             if (r >= q)
+               r = r - q;
+             else if (q - r <= d)
+               {
+                 r = r - q + d;
+                 q--;
+               }
+             else
+               {
+                 r = r - q + 2*d;
+                 q -= 2;
+               }
+           }
+       }
+      else                             /* Implies c1 = b1 */
+       {                               /* Hence a1 = d - 1 = 2*b1 - 1 */
+         if (a0 >= -d)
+           {
+             q = -CNST_LIMB(1);
+             r = a0 + d;
+           }
+         else
+           {
+             q = -CNST_LIMB(2);
+             r = a0 + 2*d;
+           }
+       }
+    }
+
+  *rp = r;
+  return q;
+}
diff --git a/mpn/generic/zero.c b/mpn/generic/zero.c

new file mode 100644 (file)

index 0000000..4f12b00
--- /dev/null
+++ b/mpn/generic/zero.c
@@ -0,0 +1,31 @@
+/* mpn_zero
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_zero (mp_ptr rp, mp_size_t n)
+{
+  mp_size_t i;
+
+  rp += n;
+  for (i = -n; i != 0; i++)
+    rp[i] = 0;
+}
diff --git a/mpn/i960/README b/mpn/i960/README

new file mode 100644 (file)

index 0000000..d68a0a8
--- /dev/null
+++ b/mpn/i960/README
@@ -0,0 +1,9 @@
+This directory contains mpn functions for Intel i960 processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The code in this directory is not well optimized.
+
+STATUS
+
+The code in this directory has not been tested.
diff --git a/mpn/i960/add_n.s b/mpn/i960/add_n.s

new file mode 100644 (file)

index 0000000..24abc6b
--- /dev/null
+++ b/mpn/i960/add_n.s
@@ -0,0 +1,41 @@
+# I960 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align 4
+       .globl ___gmpn_add_n
+___gmpn_add_n:
+       mov     0,g6            # clear carry-save register
+       cmpo    1,0             # clear cy
+
+Loop:  subo    1,g3,g3         # update loop counter
+       ld      (g1),g5         # load from s1_ptr
+       addo    4,g1,g1         # s1_ptr++
+       ld      (g2),g4         # load from s2_ptr
+       addo    4,g2,g2         # s2_ptr++
+       cmpo    g6,1            # restore cy from g6, relies on cy being 0
+       addc    g4,g5,g4        # main add
+       subc    0,0,g6          # save cy in g6
+       st      g4,(g0)         # store result to res_ptr
+       addo    4,g0,g0         # res_ptr++
+       cmpobne 0,g3,Loop       # when branch is taken, clears C bit
+
+       mov     g6,g0
+       ret
diff --git a/mpn/i960/addmul_1.s b/mpn/i960/addmul_1.s

new file mode 100644 (file)

index 0000000..984f540
--- /dev/null
+++ b/mpn/i960/addmul_1.s
@@ -0,0 +1,46 @@
+# I960 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  4
+       .globl  ___gmpn_mul_1
+___gmpn_mul_1:
+       subo    g2,0,g2
+       shlo    2,g2,g4
+       subo    g4,g1,g1
+       subo    g4,g0,g13
+       mov     0,g0
+
+       cmpo    1,0             # clear C bit on AC.cc
+
+Loop:  ld      (g1)[g2*4],g5
+       emul    g3,g5,g6
+       ld      (g13)[g2*4],g5
+
+       addc    g0,g6,g6        # relies on that C bit is clear
+       addc    0,g7,g7
+       addc    g5,g6,g6        # relies on that C bit is clear
+       st      g6,(g13)[g2*4]
+       addc    0,g7,g0
+
+       addo    g2,1,g2
+       cmpobne 0,g2,Loop       # when branch is taken, clears C bit
+
+       ret
diff --git a/mpn/i960/mul_1.s b/mpn/i960/mul_1.s

new file mode 100644 (file)

index 0000000..7912aa1
--- /dev/null
+++ b/mpn/i960/mul_1.s
@@ -0,0 +1,43 @@
+# I960 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  4
+       .globl  ___gmpn_mul_1
+___gmpn_mul_1:
+       subo    g2,0,g2
+       shlo    2,g2,g4
+       subo    g4,g1,g1
+       subo    g4,g0,g13
+       mov     0,g0
+
+       cmpo    1,0             # clear C bit on AC.cc
+
+Loop:  ld      (g1)[g2*4],g5
+       emul    g3,g5,g6
+
+       addc    g0,g6,g6        # relies on that C bit is clear
+       st      g6,(g13)[g2*4]
+       addc    0,g7,g0
+
+       addo    g2,1,g2
+       cmpobne 0,g2,Loop       # when branch is taken, clears C bit
+
+       ret
diff --git a/mpn/i960/sub_n.s b/mpn/i960/sub_n.s

new file mode 100644 (file)

index 0000000..87adcbf
--- /dev/null
+++ b/mpn/i960/sub_n.s
@@ -0,0 +1,41 @@
+# I960 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align 4
+       .globl ___gmpn_sub_n
+___gmpn_sub_n:
+       mov     1,g6            # set carry-save register
+       cmpo    1,0             # clear cy
+
+Loop:  subo    1,g3,g3         # update loop counter
+       ld      (g1),g5         # load from s1_ptr
+       addo    4,g1,g1         # s1_ptr++
+       ld      (g2),g4         # load from s2_ptr
+       addo    4,g2,g2         # s2_ptr++
+       cmpo    g6,1            # restore cy from g6, relies on cy being 0
+       subc    g4,g5,g4        # main subtract
+       subc    0,0,g6          # save cy in g6
+       st      g4,(g0)         # store result to res_ptr
+       addo    4,g0,g0         # res_ptr++
+       cmpobne 0,g3,Loop       # when branch is taken, cy will be 0
+
+       mov     g6,g0
+       ret
diff --git a/mpn/ia64/README b/mpn/ia64/README

new file mode 100644 (file)

index 0000000..631a58d
--- /dev/null
+++ b/mpn/ia64/README
@@ -0,0 +1,270 @@
+Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+                      IA-64 MPN SUBROUTINES
+
+
+This directory contains mpn functions for the IA-64 architecture.
+
+
+CODE ORGANIZATION
+
+       mpn/ia64          itanium-2, and generic ia64
+
+The code here has been optimized primarily for Itanium 2.  Very few Itanium 1
+chips were ever sold, and Itanium 2 is more powerful, so the latter is what
+we concentrate on.
+
+
+
+CHIP NOTES
+
+The IA-64 ISA keeps instructions three and three in 128 bit bundles.
+Programmers/compilers need to put explicit breaks `;;' when there are WAW or
+RAW dependencies, with some notable exceptions.  Such "breaks" are typically
+at the end of a bundle, but can be put between operations within some bundle
+types too.
+
+The Itanium 1 and Itanium 2 implementations can under ideal conditions
+execute two bundles per cycle.  The Itanium 1 allows 4 of these instructions
+to do integer operations, while the Itanium 2 allows all 6 to be integer
+operations.
+
+Taken cloop branches seem to insert a bubble into the pipeline most of the
+time on Itanium 1.
+
+Loads to the fp registers bypass the L1 cache and thus get extremely long
+latencies, 9 cycles on the Itanium 1 and 6 cycles on the Itanium 2.
+
+The software pipeline stuff using br.ctop instruction causes delays, since
+many issue slots are taken up by instructions with zero predicates, and
+since many extra instructions are needed to set things up.  These features
+are clearly designed for code density, not speed.
+
+Misc pipeline limitations (Itanium 1):
+* The getf.sig instruction can only execute in M0.
+* At most four integer instructions/cycle.
+* Nops take up resources like any plain instructions.
+
+Misc pipeline limitations (Itanium 2):
+* The getf.sig instruction can only execute in M0.
+* Nops take up resources like any plain instructions.
+
+
+ASSEMBLY SYNTAX
+
+.align pads with nops in a text segment, but gas 2.14 and earlier
+incorrectly byte-swaps its nop bundle in big endian mode (eg. hpux), making
+it come out as break instructions.  We use the ALIGN() macro in
+mpn/ia64/ia64-defs.m4 when it might be executed across.  That macro
+suppresses any .align if the problem is detected by configure.  Lack of
+alignment might hurt performance but will at least be correct.
+
+foo:: to create a global symbol is not accepted by gas.  Use separate
+".global foo" and "foo:" instead.
+
+.global is the standard global directive.  gas accepts .globl, but hpux "as"
+doesn't.
+
+.proc / .endp generates the appropriate .type and .size information for ELF,
+so the latter directives don't need to be given explicitly.
+
+.pred.rel "mutex"... is standard for annotating predicate register
+relationships.  gas also accepts .pred.rel.mutex, but hpux "as" doesn't.
+
+.pred directives can't be put on a line with a label, like
+".Lfoo: .pred ...", the HP assembler on HP-UX 11.23 rejects that.
+gas is happy with it, and past versions of HP had seemed ok.
+
+// is the standard comment sequence, but we prefer "C" since it inhibits m4
+macro expansion.  See comments in ia64-defs.m4.
+
+
+REGISTER USAGE
+
+Special:
+   r0: constant 0
+   r1: global pointer (gp)
+   r8: return value
+   r12: stack pointer (sp)
+   r13: thread pointer (tp)
+Caller-saves: r8-r11 r14-r31 f6-f15 f32-f127
+Caller-saves but rotating: r32-
+
+
+================================================================
+mpn_add_n, mpn_sub_n:
+
+The current code runs at 1.25 c/l on Itanium 2.
+
+================================================================
+mpn_mul_1:
+
+The current code runs at 2 c/l on Itanium 2.
+
+Using a blocked approach, working off of 4 separate places in the operands,
+one could make use of the xma accumulation, and approach 1 c/l.
+
+       ldf8 [up]
+       xma.l
+       xma.hu
+       stf8  [wrp]
+
+================================================================
+mpn_addmul_1:
+
+The current code runs at 2 c/l on Itanium 2.
+
+It seems possible to use a blocked approach, as with mpn_mul_1.  We should
+read rp[] to integer registers, allowing for just one getf.sig per cycle.
+
+       ld8  [rp]
+       ldf8 [up]
+       xma.l
+       xma.hu
+       getf.sig
+       add+add+cmp+cmp
+       st8  [wrp]
+
+These 10 instructions can be scheduled to approach 1.667 cycles, and with
+the 4 cycle latency of xma, this means we need at least 3 blocks.  Using
+ldfp8 we could approach 1.583 c/l.
+
+================================================================
+mpn_submul_1:
+
+The current code runs at 2.25 c/l on Itanium 2.  Getting to 2 c/l requires
+ldfp8 with all alignment headache that implies.
+
+================================================================
+mpn_addmul_N
+
+For best speed, we need to give up using mpn_addmul_1 as the main multiply
+building block, and instead take multiple v limbs per loop.  For the Itanium
+1, we need to take about 8 limbs at a time for full speed.  For the Itanium
+2, something like mpn_addmul_4 should be enough.
+
+The add+cmp+cmp+add we use on the other codes is optimal for shortening
+recurrencies (1 cycle) but the sequence takes up 4 execution slots.  When
+recurrency depth is not critical, a more standard 3-cycle add+cmp+add is
+better.
+
+/* First load the 8 values from v */
+       ldfp8           v0, v1 = [r35], 16;;
+       ldfp8           v2, v3 = [r35], 16;;
+       ldfp8           v4, v5 = [r35], 16;;
+       ldfp8           v6, v7 = [r35], 16;;
+
+/* In the inner loop, get a new U limb and store a result limb. */
+       mov             lc = un
+Loop:  ldf8            u0 = [r33], 8
+       ld8             r0 = [r32]
+       xma.l           lp0 = v0, u0, hp0
+       xma.hu          hp0 = v0, u0, hp0
+       xma.l           lp1 = v1, u0, hp1
+       xma.hu          hp1 = v1, u0, hp1
+       xma.l           lp2 = v2, u0, hp2
+       xma.hu          hp2 = v2, u0, hp2
+       xma.l           lp3 = v3, u0, hp3
+       xma.hu          hp3 = v3, u0, hp3
+       xma.l           lp4 = v4, u0, hp4
+       xma.hu          hp4 = v4, u0, hp4
+       xma.l           lp5 = v5, u0, hp5
+       xma.hu          hp5 = v5, u0, hp5
+       xma.l           lp6 = v6, u0, hp6
+       xma.hu          hp6 = v6, u0, hp6
+       xma.l           lp7 = v7, u0, hp7
+       xma.hu          hp7 = v7, u0, hp7
+       getf.sig        l0 = lp0
+       getf.sig        l1 = lp1
+       getf.sig        l2 = lp2
+       getf.sig        l3 = lp3
+       getf.sig        l4 = lp4
+       getf.sig        l5 = lp5
+       getf.sig        l6 = lp6
+       add+cmp+add     xx, l0, r0
+       add+cmp+add     acc0, acc1, l1
+       add+cmp+add     acc1, acc2, l2
+       add+cmp+add     acc2, acc3, l3
+       add+cmp+add     acc3, acc4, l4
+       add+cmp+add     acc4, acc5, l5
+       add+cmp+add     acc5, acc6, l6
+       getf.sig        acc6 = lp7
+       st8             [r32] = xx, 8
+       br.cloop Loop
+
+       49 insn at max 6 insn/cycle:            8.167 cycles/limb8
+       11 memops at max 2 memops/cycle:        5.5 cycles/limb8
+       16 fpops at max 2 fpops/cycle:          8 cycles/limb8
+       21 intops at max 4 intops/cycle:        5.25 cycles/limb8
+       11+21 memops+intops at max 4/cycle      8 cycles/limb8
+
+================================================================
+mpn_lshift, mpn_rshift
+
+The current code runs at 1 cycle/limb on Itanium 2.
+
+Using 63 separate loops, we could use the double-word shrp instruction.
+That instruction has a plain single-cycle latency.  We need 63 loops since
+this instruction only accept immediate count.  That would lead to a somewhat
+silly code size, but the speed would be 0.75 c/l on Itanium 2 (by using shrp
+each cycle plus shl/shr going down I1 for a further limb every second
+cycle).
+
+================================================================
+mpn_copyi, mpn_copyd
+
+The current code runs at 0.5 c/l on Itanium 2.  But that is just for L1
+cache hit.  The 4-way unrolled loop takes just 2 cycles, and thus load-use
+scheduling isn't great.  It might be best to actually use modulo scheduled
+loops, since that will allow us to do better load-use scheduling without too
+much unrolling.
+
+Depending on size or operand alignment, we get 1 c/l or 0.5 c/l on Itanium
+2, according to tune/speed.  Cache bank conflicts?
+
+
+
+REFERENCES
+
+Intel Itanium Architecture Software Developer's Manual, volumes 1 to 3,
+Intel document 245317-004, 245318-004, 245319-004 October 2002.  Volume 1
+includes an Itanium optimization guide.
+
+Intel Itanium Processor-specific Application Binary Interface (ABI), Intel
+document 245370-003, May 2001.  Describes C type sizes, dynamic linking,
+etc.
+
+Intel Itanium Architecture Assembly Language Reference Guide, Intel document
+248801-004, 2000-2002.  Describes assembly instruction syntax and other
+directives.
+
+Itanium Software Conventions and Runtime Architecture Guide, Intel document
+245358-003, May 2001.  Describes calling conventions, including stack
+unwinding requirements.
+
+Intel Itanium Processor Reference Manual for Software Optimization, Intel
+document 245473-003, November 2001.
+
+Intel Itanium-2 Processor Reference Manual for Software Development and
+Optimization, Intel document 251110-003, May 2004.
+
+All the above documents can be found online at
+
+    http://developer.intel.com/design/itanium/manuals.htm
diff --git a/mpn/ia64/addmul_1.asm b/mpn/ia64/addmul_1.asm

new file mode 100644 (file)

index 0000000..6cd9d2b
--- /dev/null
+++ b/mpn/ia64/addmul_1.asm
@@ -0,0 +1,590 @@
+dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    3.0
+C Itanium 2:  2.0
+
+C TODO
+C  * Further optimize feed-in and wind-down code, both for speed and code size.
+C  * Handle low limb input and results specially, using a common stf8 in the
+C    epilogue.
+C  * Use 1 c/l carry propagation scheme in wind-down code.
+C  * Use extra pointer registers for `up' and rp to speed up feed-in loads.
+C  * Work out final differences with mul_1.asm.  That function is 300 bytes
+C    smaller than this due to better loop scheduling and thus simpler feed-in
+C    code.
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n', `r34')
+define(`vl', `r35')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C M I
+       addp4           up = 0, up              C M I
+       zxt4            n = n                   C I
+       ;;
+')
+{.mmi
+       adds            r15 = -1, n             C M I
+       mov             r20 = rp                C M I
+       mov.i           r2 = ar.lc              C I0
+}
+{.mmi
+       ldf8            f7 = [up], 8            C M
+       ldf8            f8 = [rp], 8            C M
+       and             r14 = 3, n              C M I
+       ;;
+}
+{.mmi
+       setf.sig        f6 = vl                 C M2 M3
+       cmp.eq          p10, p0 = 0, r14        C M I
+       shr.u           r31 = r15, 2            C I0
+}
+{.mmi
+       cmp.eq          p11, p0 = 2, r14        C M I
+       cmp.eq          p12, p0 = 3, r14        C M I
+       nop.i           0                       C I
+       ;;
+}
+{.mii
+       cmp.ne          p6, p7 = r0, r0         C M I
+       mov.i           ar.lc = r31             C I0
+       cmp.ne          p8, p9 = r0, r0         C M I
+}
+{.bbb
+  (p10)        br.dptk         .Lb00                   C B
+  (p11)        br.dptk         .Lb10                   C B
+  (p12)        br.dptk         .Lb11                   C B
+       ;;
+}
+
+.Lb01: br.cloop.dptk   .grt1                   C B
+
+       xma.l           f39 = f7, f6, f8        C F
+       xma.hu          f43 = f7, f6, f8        C F
+       ;;
+       getf.sig        r8 = f43                C M2
+       stf8            [r20] = f39             C M2 M3
+       mov.i           ar.lc = r2              C I0
+       br.ret.sptk.many b0                     C B
+
+.grt1:
+       ldf8            f32 = [up], 8
+       ldf8            f44 = [rp], 8
+       ;;
+       ldf8            f33 = [up], 8
+       ldf8            f45 = [rp], 8
+       ;;
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f7, f6, f8
+       ldf8            f46 = [rp], 8
+       xma.hu          f43 = f7, f6, f8
+       ;;
+       ldf8            f35 = [up], 8
+       ldf8            f47 = [rp], 8
+       br.cloop.dptk   .grt5
+
+       xma.l           f36 = f32, f6, f44
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       stf8            [r20] = f39, 8
+       xma.l           f37 = f33, f6, f45
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r31 = f43
+       getf.sig        r24 = f36
+       xma.l           f38 = f34, f6, f46
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r28 = f40
+       getf.sig        r25 = f37
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r29 = f41
+       getf.sig        r26 = f38
+       br              .Lcj5
+
+.grt5:
+       mov             r30 = 0
+       xma.l           f36 = f32, f6, f44
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f33, f6, f45
+       ldf8            f44 = [rp], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f33 = [up], 8
+       getf.sig        r27 = f39
+       ;;
+       getf.sig        r31 = f43
+       xma.l           f38 = f34, f6, f46
+       ldf8            f45 = [rp], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f34 = [up], 8
+       getf.sig        r24 = f36
+       ;;
+       getf.sig        r28 = f40
+       xma.l           f39 = f35, f6, f47
+       ldf8            f46 = [rp], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f35 = [up], 8
+       getf.sig        r25 = f37
+       br.cloop.dptk   .Loop
+       br              .Le0
+
+
+.Lb10: ldf8            f35 = [up], 8
+       ldf8            f47 = [rp], 8
+       br.cloop.dptk   .grt2
+
+       xma.l           f38 = f7, f6, f8
+       xma.hu          f42 = f7, f6, f8
+       ;;
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r30 = f42
+       stf8            [r20] = f38, 8
+       getf.sig        r27 = f39
+       getf.sig        r8 = f43
+       br              .Lcj2
+
+.grt2:
+       ldf8            f32 = [up], 8
+       ldf8            f44 = [rp], 8
+       ;;
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f7, f6, f8
+       ldf8            f45 = [rp], 8
+       xma.hu          f42 = f7, f6, f8
+       ;;
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f47
+       ldf8            f46 = [rp], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f35 = [up], 8
+       ldf8            f47 = [rp], 8
+       br.cloop.dptk   .grt6
+
+       stf8            [r20] = f38, 8
+       xma.l           f36 = f32, f6, f44
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       getf.sig        r30 = f42
+       getf.sig        r27 = f39
+       xma.l           f37 = f33, f6, f45
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r31 = f43
+       getf.sig        r24 = f36
+       xma.l           f38 = f34, f6, f46
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r28 = f40
+       getf.sig        r25 = f37
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       br              .Lcj6
+
+.grt6:
+       mov             r29 = 0
+       xma.l           f36 = f32, f6, f44
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       ldf8            f32 = [up], 8
+       getf.sig        r26 = f38
+       ;;
+       getf.sig        r30 = f42
+       xma.l           f37 = f33, f6, f45
+       ldf8            f44 = [rp], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f33 = [up], 8
+       getf.sig        r27 = f39
+       ;;
+       getf.sig        r31 = f43
+       xma.l           f38 = f34, f6, f46
+       ldf8            f45 = [rp], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f34 = [up], 8
+       getf.sig        r24 = f36
+       br              .LL10
+
+
+.Lb11: ldf8            f34 = [up], 8
+       ldf8            f46 = [rp], 8
+       ;;
+       ldf8            f35 = [up], 8
+       ldf8            f47 = [rp], 8
+       br.cloop.dptk   .grt3
+       ;;
+
+       xma.l           f37 = f7, f6, f8
+       xma.hu          f41 = f7, f6, f8
+       xma.l           f38 = f34, f6, f46
+       xma.hu          f42 = f34, f6, f46
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r29 = f41
+       stf8            [r20] = f37, 8
+       getf.sig        r26 = f38
+       getf.sig        r30 = f42
+       getf.sig        r27 = f39
+       getf.sig        r8 = f43
+       br              .Lcj3
+
+.grt3:
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f7, f6, f8
+       ldf8            f44 = [rp], 8
+       xma.hu          f41 = f7, f6, f8
+       ;;
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f46
+       ldf8            f45 = [rp], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f47
+       ldf8            f46 = [rp], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f35 = [up], 8
+       getf.sig        r25 = f37               C FIXME
+       ldf8            f47 = [rp], 8
+       br.cloop.dptk   .grt7
+
+       getf.sig        r29 = f41
+       stf8            [r20] = f37, 8          C FIXME
+       xma.l           f36 = f32, f6, f44
+       getf.sig        r26 = f38
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       getf.sig        r30 = f42
+       xma.l           f37 = f33, f6, f45
+       getf.sig        r27 = f39
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r31 = f43
+       xma.l           f38 = f34, f6, f46
+       getf.sig        r24 = f36
+       xma.hu          f42 = f34, f6, f46
+       br              .Lcj7
+
+.grt7:
+       getf.sig        r29 = f41
+       xma.l           f36 = f32, f6, f44
+       mov             r28 = 0
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       ldf8            f32 = [up], 8
+       getf.sig        r26 = f38
+       ;;
+       getf.sig        r30 = f42
+       xma.l           f37 = f33, f6, f45
+       ldf8            f44 = [rp], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f33 = [up], 8
+       getf.sig        r27 = f39
+       br              .LL11
+
+
+.Lb00: ldf8            f33 = [up], 8
+       ldf8            f45 = [rp], 8
+       ;;
+       ldf8            f34 = [up], 8
+       ldf8            f46 = [rp], 8
+       ;;
+       ldf8            f35 = [up], 8
+       xma.l           f36 = f7, f6, f8
+       ldf8            f47 = [rp], 8
+       xma.hu          f40 = f7, f6, f8
+       br.cloop.dptk   .grt4
+
+       xma.l           f37 = f33, f6, f45
+       xma.hu          f41 = f33, f6, f45
+       xma.l           f38 = f34, f6, f46
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r28 = f40
+       stf8            [r20] = f36, 8
+       xma.l           f39 = f35, f6, f47
+       getf.sig        r25 = f37
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r29 = f41
+       getf.sig        r26 = f38
+       getf.sig        r30 = f42
+       getf.sig        r27 = f39
+       br              .Lcj4
+
+.grt4:
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f33, f6, f45
+       ldf8            f44 = [rp], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f46
+       ldf8            f45 = [rp], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f34 = [up], 8
+       getf.sig        r24 = f36               C FIXME
+       xma.l           f39 = f35, f6, f47
+       ldf8            f46 = [rp], 8
+       getf.sig        r28 = f40
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f35 = [up], 8
+       getf.sig        r25 = f37
+       ldf8            f47 = [rp], 8
+       br.cloop.dptk   .grt8
+
+       getf.sig        r29 = f41
+       stf8            [r20] = f36, 8          C FIXME
+       xma.l           f36 = f32, f6, f44
+       getf.sig        r26 = f38
+       getf.sig        r30 = f42
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       xma.l           f37 = f33, f6, f45
+       getf.sig        r27 = f39
+       xma.hu          f41 = f33, f6, f45
+       br              .Lcj8
+
+.grt8:
+       getf.sig        r29 = f41
+       xma.l           f36 = f32, f6, f44
+       mov             r31 = 0
+       xma.hu          f40 = f32, f6, f44
+       ;;
+       ldf8            f32 = [up], 8
+       getf.sig        r26 = f38
+       br              .LL00
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)                               C insn  fed     cycle #
+.Loop:
+       .pred.rel "mutex", p6, p7               C num   by      i1 i2
+       getf.sig        r29 = f41               C 00    16      0   0
+       xma.l           f36 = f32, f6, f44      C 01    06,15   0   0
+   (p6)        add             r14 = r30, r27, 1       C 02            0   0
+       ldf8            f47 = [rp], 8           C 03            0   0
+       xma.hu          f40 = f32, f6, f44      C 04    06,15   0   0
+   (p7)        add             r14 = r30, r27          C 05            0   0
+       ;;
+       .pred.rel "mutex", p6, p7
+       ldf8            f32 = [up], 8           C 06            1   1
+   (p6)        cmp.leu         p8, p9 = r14, r27       C 07            1   1
+   (p7)        cmp.ltu         p8, p9 = r14, r27       C 08            1   1
+       getf.sig        r26 = f38               C 09    25      2   1
+       st8             [r20] = r14, 8          C 10            2   1
+       nop.b           0                       C 11            2   1
+       ;;
+.LL00:
+       .pred.rel "mutex", p8, p9
+       getf.sig        r30 = f42               C 12    28      3   2
+       xma.l           f37 = f33, f6, f45      C 13    18,27   3   2
+   (p8)        add             r16 = r31, r24, 1       C 14            3   2
+       ldf8            f44 = [rp], 8           C 15            3   2
+       xma.hu          f41 = f33, f6, f45      C 16    18,27   3   2
+   (p9)        add             r16 = r31, r24          C 17            3   2
+       ;;
+       .pred.rel "mutex", p8, p9
+       ldf8            f33 = [up], 8           C 18            4   3
+   (p8)        cmp.leu         p6, p7 = r16, r24       C 19            4   3
+   (p9)        cmp.ltu         p6, p7 = r16, r24       C 20            4   3
+       getf.sig        r27 = f39               C 21    37      5   3
+       st8             [r20] = r16, 8          C 22            5   3
+       nop.b           0                       C 23            5   3
+       ;;
+.LL11:
+       .pred.rel "mutex", p6, p7
+       getf.sig        r31 = f43               C 24    40      6   4
+       xma.l           f38 = f34, f6, f46      C 25    30,39   6   4
+   (p6)        add             r14 = r28, r25, 1       C 26            6   4
+       ldf8            f45 = [rp], 8           C 27            6   4
+       xma.hu          f42 = f34, f6, f46      C 28    30,39   6   4
+   (p7)        add             r14 = r28, r25          C 29            6   4
+       ;;
+       .pred.rel "mutex", p6, p7
+       ldf8            f34 = [up], 8           C 30            7   5
+   (p6)        cmp.leu         p8, p9 = r14, r25       C 31            7   5
+   (p7)        cmp.ltu         p8, p9 = r14, r25       C 32            7   5
+       getf.sig        r24 = f36               C 33    01      8   5
+       st8             [r20] = r14, 8          C 34            8   5
+       nop.b           0                       C 35            8   5
+       ;;
+.LL10:
+       .pred.rel "mutex", p8, p9
+       getf.sig        r28 = f40               C 36    04      9   6
+       xma.l           f39 = f35, f6, f47      C 37    42,03   9   6
+   (p8)        add             r16 = r29, r26, 1       C 38            9   6
+       ldf8            f46 = [rp], 8           C 39            9   6
+       xma.hu          f43 = f35, f6, f47      C 40    42,03   9   6
+   (p9)        add             r16 = r29, r26          C 41            9   6
+       ;;
+       .pred.rel "mutex", p8, p9
+       ldf8            f35 = [up], 8           C 42           10   7
+   (p8)        cmp.leu         p6, p7 = r16, r26       C 43           10   7
+   (p9)        cmp.ltu         p6, p7 = r16, r26       C 44           10   7
+       getf.sig        r25 = f37               C 45    13     11   7
+       st8             [r20] = r16, 8          C 46           11   7
+       br.cloop.dptk   .Loop                   C 47           11   7
+C *** MAIN LOOP END ***
+       ;;
+.Le0:
+       .pred.rel "mutex", p6, p7
+       getf.sig        r29 = f41               C
+       xma.l           f36 = f32, f6, f44      C
+   (p6)        add             r14 = r30, r27, 1       C
+       ldf8            f47 = [rp], 8           C
+       xma.hu          f40 = f32, f6, f44      C
+   (p7)        add             r14 = r30, r27          C
+       ;;
+       .pred.rel "mutex", p6, p7
+   (p6)        cmp.leu         p8, p9 = r14, r27       C
+   (p7)        cmp.ltu         p8, p9 = r14, r27       C
+       getf.sig        r26 = f38               C
+       st8             [r20] = r14, 8          C
+       ;;
+       .pred.rel "mutex", p8, p9
+       getf.sig        r30 = f42               C
+       xma.l           f37 = f33, f6, f45      C
+   (p8)        add             r16 = r31, r24, 1       C
+       xma.hu          f41 = f33, f6, f45      C
+   (p9)        add             r16 = r31, r24          C
+       ;;
+       .pred.rel "mutex", p8, p9
+   (p8)        cmp.leu         p6, p7 = r16, r24       C
+   (p9)        cmp.ltu         p6, p7 = r16, r24       C
+       getf.sig        r27 = f39               C
+       st8             [r20] = r16, 8          C
+       ;;
+.Lcj8:
+       .pred.rel "mutex", p6, p7
+       getf.sig        r31 = f43               C
+       xma.l           f38 = f34, f6, f46      C
+   (p6)        add             r14 = r28, r25, 1       C
+       xma.hu          f42 = f34, f6, f46      C
+   (p7)        add             r14 = r28, r25          C
+       ;;
+       .pred.rel "mutex", p6, p7
+   (p6)        cmp.leu         p8, p9 = r14, r25       C
+   (p7)        cmp.ltu         p8, p9 = r14, r25       C
+       getf.sig        r24 = f36               C
+       st8             [r20] = r14, 8          C
+       ;;
+.Lcj7:
+       .pred.rel "mutex", p8, p9
+       getf.sig        r28 = f40               C
+       xma.l           f39 = f35, f6, f47      C
+   (p8)        add             r16 = r29, r26, 1       C
+       xma.hu          f43 = f35, f6, f47      C
+   (p9)        add             r16 = r29, r26          C
+       ;;
+       .pred.rel "mutex", p8, p9
+   (p8)        cmp.leu         p6, p7 = r16, r26       C
+   (p9)        cmp.ltu         p6, p7 = r16, r26       C
+       getf.sig        r25 = f37               C
+       st8             [r20] = r16, 8          C
+       ;;
+.Lcj6:
+       .pred.rel "mutex", p6, p7
+       getf.sig        r29 = f41               C
+   (p6)        add             r14 = r30, r27, 1       C
+   (p7)        add             r14 = r30, r27          C
+       ;;
+       .pred.rel "mutex", p6, p7
+   (p6)        cmp.leu         p8, p9 = r14, r27       C
+   (p7)        cmp.ltu         p8, p9 = r14, r27       C
+       getf.sig        r26 = f38               C
+       st8             [r20] = r14, 8          C
+       ;;
+.Lcj5:
+       .pred.rel "mutex", p8, p9
+       getf.sig        r30 = f42               C
+   (p8)        add             r16 = r31, r24, 1       C
+   (p9)        add             r16 = r31, r24          C
+       ;;
+       .pred.rel "mutex", p8, p9
+   (p8)        cmp.leu         p6, p7 = r16, r24       C
+   (p9)        cmp.ltu         p6, p7 = r16, r24       C
+       getf.sig        r27 = f39               C
+       st8             [r20] = r16, 8          C
+       ;;
+.Lcj4:
+       .pred.rel "mutex", p6, p7
+       getf.sig        r8 = f43                C
+   (p6)        add             r14 = r28, r25, 1       C
+   (p7)        add             r14 = r28, r25          C
+       ;;
+       .pred.rel "mutex", p6, p7
+       st8             [r20] = r14, 8          C
+   (p6)        cmp.leu         p8, p9 = r14, r25       C
+   (p7)        cmp.ltu         p8, p9 = r14, r25       C
+       ;;
+.Lcj3:
+       .pred.rel "mutex", p8, p9
+   (p8)        add             r16 = r29, r26, 1       C
+   (p9)        add             r16 = r29, r26          C
+       ;;
+       .pred.rel "mutex", p8, p9
+       st8             [r20] = r16, 8          C
+   (p8)        cmp.leu         p6, p7 = r16, r26       C
+   (p9)        cmp.ltu         p6, p7 = r16, r26       C
+       ;;
+.Lcj2:
+       .pred.rel "mutex", p6, p7
+   (p6)        add             r14 = r30, r27, 1       C
+   (p7)        add             r14 = r30, r27          C
+       ;;
+       .pred.rel "mutex", p6, p7
+       st8             [r20] = r14             C
+   (p6)        cmp.leu         p8, p9 = r14, r27       C
+   (p7)        cmp.ltu         p8, p9 = r14, r27       C
+       ;;
+   (p8)        add             r8 = 1, r8              C M I
+       mov.i           ar.lc = r2              C I0
+       br.ret.sptk.many b0                     C B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/addmul_2.asm b/mpn/ia64/addmul_2.asm

new file mode 100644 (file)

index 0000000..2c25802
--- /dev/null
+++ b/mpn/ia64/addmul_2.asm
@@ -0,0 +1,657 @@
+dnl  IA-64 mpn_addmul_2 -- Multiply a n-limb number with a 2-limb number and
+dnl  add the result to a (n+1)-limb number.
+
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    3.65
+C Itanium 2:  1.625
+
+C Note that this is very similar to mul_2.asm.  If you change this file,
+C please change that file too.
+
+C TODO
+C  * Clean up variable names, and try to decrease the number of distinct
+C    registers used.
+C  * Cleanup feed-in code to not require zeroing several registers.
+C  * Make sure we don't depend on uninitialized predicate registers.
+C  * We currently cross-jump very aggressively, at the expense of a few cycles
+C    per operation.  Consider changing that.
+C  * Could perhaps save a few cycles by using 1 c/l carry propagation in
+C    wind-down code.
+C  * Ultimately rewrite.  The problem with this code is that it first uses a
+C    loaded u value in one xma pair, then leaves it live over several unrelated
+C    xma pairs, before it uses it again.  It should actually be quite possible
+C    to just swap some aligned xma pairs around.  But we should then schedule
+C    u loads further from the first use.
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`n',`r34')
+define(`vp',`r35')
+
+define(`srp',`r3')
+
+define(`v0',`f6')
+define(`v1',`f7')
+
+define(`s0',`r14')
+define(`acc0',`r15')
+
+define(`pr0_0',`r16') define(`pr0_1',`r17')
+define(`pr0_2',`r18') define(`pr0_3',`r19')
+
+define(`pr1_0',`r20') define(`pr1_1',`r21')
+define(`pr1_2',`r22') define(`pr1_3',`r23')
+
+define(`acc1_0',`r24') define(`acc1_1',`r25')
+define(`acc1_2',`r26') define(`acc1_3',`r27')
+
+dnl define(`',`r28')
+dnl define(`',`r29')
+dnl define(`',`r30')
+dnl define(`',`r31')
+
+define(`fp0b_0',`f8') define(`fp0b_1',`f9')
+define(`fp0b_2',`f10') define(`fp0b_3',`f11')
+
+define(`fp1a_0',`f12') define(`fp1a_1',`f13')
+define(`fp1a_2',`f14') define(`fp1a_3',`f15')
+
+define(`fp1b_0',`f32') define(`fp1b_1',`f33')
+define(`fp1b_2',`f34') define(`fp1b_3',`f35')
+
+define(`fp2a_0',`f36') define(`fp2a_1',`f37')
+define(`fp2a_2',`f38') define(`fp2a_3',`f39')
+
+define(`r_0',`f40') define(`r_1',`f41')
+define(`r_2',`f42') define(`r_3',`f43')
+
+define(`u_0',`f44') define(`u_1',`f45')
+define(`u_2',`f46') define(`u_3',`f47')
+
+define(`rx',`f48')
+define(`ux',`f49')
+define(`ry',`f50')
+define(`uy',`f51')
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C                       M I
+       addp4           up = 0, up              C                       M I
+       addp4           vp = 0, vp              C                       M I
+       zxt4            n = n                   C                       I
+       ;;')
+
+{.mmi          C 00
+       ldf8            ux = [up], 8            C                       M
+       ldf8            v0 = [vp], 8            C                       M
+       mov.i           r2 = ar.lc              C                       I0
+}{.mmi
+       ldf8            rx = [rp], 8            C                       M
+       and             r14 = 3, n              C                       M I
+       add             n = -2, n               C                       M I
+       ;;
+}{.mmi         C 01
+       ldf8            uy = [up], 8            C                       M
+       ldf8            v1 = [vp]               C                       M
+       shr.u           n = n, 2                C                       I0
+}{.mmi
+       ldf8            ry = [rp], -8           C                       M
+       cmp.eq          p10, p0 = 1, r14        C                       M I
+       cmp.eq          p11, p0 = 2, r14        C                       M I
+       ;;
+}{.mmi         C 02
+       add             srp = 16, rp            C                       M I
+       cmp.eq          p12, p0 = 3, r14        C                       M I
+       mov.i           ar.lc = n               C                       I0
+}{.bbb
+  (p10) br.dptk                .Lb01                   C                       B
+  (p11) br.dptk                .Lb10                   C                       B
+  (p12) br.dptk                .Lb11                   C                       B
+       ;;
+}
+
+       ALIGN(32)
+.Lb00: ldf8            r_1 = [srp], 8
+       ldf8            u_1 = [up], 8
+       mov             acc1_2 = 0
+       mov             pr1_2 = 0
+       mov             pr0_3 = 0
+       cmp.ne          p8, p9 = r0, r0
+       ;;
+       ldf8            r_2 = [srp], 8
+       xma.l           fp0b_3 = ux, v0, rx
+       cmp.ne          p12, p13 = r0, r0
+       ldf8            u_2 = [up], 8
+       xma.hu          fp1a_3 = ux, v0, rx
+       br.cloop.dptk   .grt4
+
+       xma.l           fp0b_0 = uy, v0, ry
+       xma.hu          fp1a_0 = uy, v0, ry
+       ;;
+       getf.sig        acc0 = fp0b_3
+       xma.l           fp1b_3 = ux, v1, fp1a_3
+       xma.hu          fp2a_3 = ux, v1, fp1a_3
+       ;;
+       xma.l           fp0b_1 = u_1, v0, r_1
+       xma.hu          fp1a_1 = u_1, v0, r_1
+       ;;
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = uy, v1, fp1a_0
+       xma.hu          fp2a_0 = uy, v1, fp1a_0
+       ;;
+       getf.sig        pr1_3 = fp1b_3
+       getf.sig        acc1_3 = fp2a_3
+       xma.l           fp0b_2 = u_2, v0, r_2
+       xma.hu          fp1a_2 = u_2, v0, r_2
+       br              .Lcj4
+
+.grt4: xma.l           fp0b_0 = uy, v0, ry
+       xma.hu          fp1a_0 = uy, v0, ry
+       ;;
+       ldf8            r_3 = [srp], 8
+       getf.sig        acc0 = fp0b_3
+       xma.l           fp1b_3 = ux, v1, fp1a_3
+       ldf8            u_3 = [up], 8
+       xma.hu          fp2a_3 = ux, v1, fp1a_3
+       ;;
+       xma.l           fp0b_1 = u_1, v0, r_1
+       xma.hu          fp1a_1 = u_1, v0, r_1
+       ;;
+       ldf8            r_0 = [srp], 8
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = uy, v1, fp1a_0
+       xma.hu          fp2a_0 = uy, v1, fp1a_0
+       ;;
+       ldf8            u_0 = [up], 8
+       getf.sig        pr1_3 = fp1b_3
+       ;;
+       getf.sig        acc1_3 = fp2a_3
+       xma.l           fp0b_2 = u_2, v0, r_2
+       xma.hu          fp1a_2 = u_2, v0, r_2
+       br              .LL00
+
+
+       ALIGN(32)
+.Lb01: ldf8            r_0 = [srp], 8          C M
+       ldf8            u_0 = [up], 8           C M
+       mov             acc1_1 = 0              C M I
+       mov             pr1_1 = 0               C M I
+       mov             pr0_2 = 0               C M I
+       cmp.ne          p6, p7 = r0, r0         C M I
+       ;;
+       ldf8            r_1 = [srp], 8          C M
+       xma.l           fp0b_2 = ux, v0, rx     C F
+       cmp.ne          p10, p11 = r0, r0       C M I
+       ldf8            u_1 = [up], 8           C M
+       xma.hu          fp1a_2 = ux, v0, rx     C F
+       ;;
+       xma.l           fp0b_3 = uy, v0, ry     C F
+       xma.hu          fp1a_3 = uy, v0, ry     C F
+       ;;
+       getf.sig        acc0 = fp0b_2           C M
+       ldf8            r_2 = [srp], 8          C M
+       xma.l           fp1b_2 = ux, v1,fp1a_2  C F
+       xma.hu          fp2a_2 = ux, v1,fp1a_2  C F
+       ldf8            u_2 = [up], 8           C M
+       br.cloop.dptk   .grt5
+
+       xma.l           fp0b_0 = u_0, v0, r_0   C F
+       xma.hu          fp1a_0 = u_0, v0, r_0   C F
+       ;;
+       getf.sig        pr0_3 = fp0b_3          C M
+       xma.l           fp1b_3 = uy, v1,fp1a_3  C F
+       xma.hu          fp2a_3 = uy, v1,fp1a_3  C F
+       ;;
+       getf.sig        pr1_2 = fp1b_2          C M
+       getf.sig        acc1_2 = fp2a_2         C M
+       xma.l           fp0b_1 = u_1, v0, r_1   C F
+       xma.hu          fp1a_1 = u_1, v0, r_1   C F
+       br              .Lcj5
+
+.grt5: xma.l           fp0b_0 = u_0, v0, r_0
+       xma.hu          fp1a_0 = u_0, v0, r_0
+       ;;
+       getf.sig        pr0_3 = fp0b_3
+       ldf8            r_3 = [srp], 8
+       xma.l           fp1b_3 = uy, v1, fp1a_3
+       xma.hu          fp2a_3 = uy, v1, fp1a_3
+       ;;
+       ldf8            u_3 = [up], 8
+       getf.sig        pr1_2 = fp1b_2
+       ;;
+       getf.sig        acc1_2 = fp2a_2
+       xma.l           fp0b_1 = u_1, v0, r_1
+       xma.hu          fp1a_1 = u_1, v0, r_1
+       br              .LL01
+
+
+       ALIGN(32)
+.Lb10:         C 03
+       br.cloop.dptk   .grt2
+               C 04
+               C 05
+               C 06
+       xma.l           fp0b_1 = ux, v0, rx
+       xma.hu          fp1a_1 = ux, v0, rx
+       ;;      C 07
+       xma.l           fp0b_2 = uy, v0, ry
+       xma.hu          fp1a_2 = uy, v0, ry
+       ;;      C 08
+               C 09
+               C 10
+       stf8            [rp] = fp0b_1, 8
+       xma.l           fp1b_1 = ux, v1, fp1a_1
+       xma.hu          fp2a_1 = ux, v1, fp1a_1
+       ;;      C 11
+       getf.sig        acc0 = fp0b_2
+       xma.l           fp1b_2 = uy, v1, fp1a_2
+       xma.hu          fp2a_2 = uy, v1, fp1a_2
+       ;;      C 12
+               C 13
+               C 14
+       getf.sig        pr1_1 = fp1b_1
+               C 15
+       getf.sig        acc1_1 = fp2a_1
+               C 16
+       getf.sig        pr1_2 = fp1b_2
+               C 17
+       getf.sig        r8 = fp2a_2
+       ;;      C 18
+               C 19
+       add             s0 = pr1_1, acc0
+       ;;      C 20
+       st8             [rp] = s0, 8
+       cmp.ltu         p8, p9 = s0, pr1_1
+       sub             r31 = -1, acc1_1
+       ;;      C 21
+       .pred.rel "mutex", p8, p9
+  (p8) add             acc0 = pr1_2, acc1_1, 1
+  (p9) add             acc0 = pr1_2, acc1_1
+  (p8) cmp.leu         p10, p0 = r31, pr1_2
+  (p9) cmp.ltu         p10, p0 = r31, pr1_2
+       ;;      C 22
+       st8             [rp] = acc0, 8
+       mov.i           ar.lc = r2
+  (p10)        add             r8 = 1, r8
+       br.ret.sptk.many b0
+
+
+.grt2: ldf8            r_3 = [srp], 8
+       ldf8            u_3 = [up], 8
+       mov             acc1_0 = 0
+       ;;
+       ldf8            r_0 = [srp], 8
+       xma.l           fp0b_1 = ux, v0, rx
+       mov             pr1_0 = 0
+       ldf8            u_0 = [up], 8
+       xma.hu          fp1a_1 = ux, v0, rx
+       mov             pr0_1 = 0
+       ;;
+       xma.l           fp0b_2 = uy, v0, ry
+       xma.hu          fp1a_2 = uy, v0, ry
+       ;;
+       getf.sig        acc0 = fp0b_1
+       ldf8            r_1 = [srp], 8
+       xma.l           fp1b_1 = ux, v1, fp1a_1
+       xma.hu          fp2a_1 = ux, v1, fp1a_1
+       ;;
+       ldf8            u_1 = [up], 8
+       xma.l           fp0b_3 = u_3, v0, r_3
+       xma.hu          fp1a_3 = u_3, v0, r_3
+       ;;
+       getf.sig        pr0_2 = fp0b_2
+       ldf8            r_2 = [srp], 8
+       xma.l           fp1b_2 = uy, v1, fp1a_2
+       xma.hu          fp2a_2 = uy, v1, fp1a_2
+       ;;
+       ldf8            u_2 = [up], 8
+       getf.sig        pr1_1 = fp1b_1
+       ;;
+       getf.sig        acc1_1 = fp2a_1
+       xma.l           fp0b_0 = u_0, v0, r_0
+       cmp.ne          p8, p9 = r0, r0
+       cmp.ne          p12, p13 = r0, r0
+       xma.hu          fp1a_0 = u_0, v0, r_0
+       br              .LL10
+
+
+       ALIGN(32)
+.Lb11: mov             acc1_3 = 0
+       mov             pr1_3 = 0
+       mov             pr0_0 = 0
+       cmp.ne          p6, p7 = r0, r0
+       ;;
+       ldf8            r_2 = [srp], 8
+       ldf8            u_2 = [up], 8
+       br.cloop.dptk   .grt3
+       ;;
+       xma.l           fp0b_0 = ux, v0, rx
+       xma.hu          fp1a_0 = ux, v0, rx
+       ;;
+       cmp.ne          p10, p11 = r0, r0
+       xma.l           fp0b_1 = uy, v0, ry
+       xma.hu          fp1a_1 = uy, v0, ry
+       ;;
+       getf.sig        acc0 = fp0b_0
+       xma.l           fp1b_0 = ux, v1, fp1a_0
+       xma.hu          fp2a_0 = ux, v1, fp1a_0
+       ;;
+       xma.l           fp0b_2 = u_2, v0, r_2
+       xma.hu          fp1a_2 = u_2, v0, r_2
+       ;;
+       getf.sig        pr0_1 = fp0b_1
+       xma.l           fp1b_1 = uy, v1, fp1a_1
+       xma.hu          fp2a_1 = uy, v1, fp1a_1
+       ;;
+       getf.sig        pr1_0 = fp1b_0
+       getf.sig        acc1_0 = fp2a_0
+       br              .Lcj3
+
+.grt3: ldf8            r_3 = [srp], 8
+       xma.l           fp0b_0 = ux, v0, rx
+       cmp.ne          p10, p11 = r0, r0
+       ldf8            u_3 = [up], 8
+       xma.hu          fp1a_0 = ux, v0, rx
+       ;;
+       xma.l           fp0b_1 = uy, v0, ry
+       xma.hu          fp1a_1 = uy, v0, ry
+       ;;
+       getf.sig        acc0 = fp0b_0
+       ldf8            r_0 = [srp], 8
+       xma.l           fp1b_0 = ux, v1, fp1a_0
+       ldf8            u_0 = [up], 8
+       xma.hu          fp2a_0 = ux, v1, fp1a_0
+       ;;
+       xma.l           fp0b_2 = u_2, v0, r_2
+       xma.hu          fp1a_2 = u_2, v0, r_2
+       ;;
+       getf.sig        pr0_1 = fp0b_1
+       ldf8            r_1 = [srp], 8
+       xma.l           fp1b_1 = uy, v1, fp1a_1
+       xma.hu          fp2a_1 = uy, v1, fp1a_1
+       ;;
+       ldf8            u_1 = [up], 8
+       getf.sig        pr1_0 = fp1b_0
+       ;;
+       getf.sig        acc1_0 = fp2a_0
+       xma.l           fp0b_3 = u_3, v0, r_3
+       xma.hu          fp1a_3 = u_3, v0, r_3
+       br              .LL11
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop:                                         C 00
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_3 = fp0b_3
+       ldf8            r_3 = [srp], 8
+       xma.l           fp1b_3 = u_3, v1, fp1a_3
+  (p12)        add             s0 = pr1_0, acc0, 1
+  (p13)        add             s0 = pr1_0, acc0
+       xma.hu          fp2a_3 = u_3, v1, fp1a_3
+       ;;                                      C 01
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       ldf8            u_3 = [up], 8
+       getf.sig        pr1_2 = fp1b_2
+  (p8) cmp.leu         p6, p7 = acc0, pr0_1
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
+  (p12)        cmp.leu         p10, p11 = s0, pr1_0
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+       ;;                                      C 02
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_2 = fp2a_2
+       st8             [rp] = s0, 8
+       xma.l           fp0b_1 = u_1, v0, r_1
+  (p6) add             acc0 = pr0_2, acc1_0, 1
+  (p7) add             acc0 = pr0_2, acc1_0
+       xma.hu          fp1a_1 = u_1, v0, r_1
+       ;;                                      C 03
+.LL01:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_0 = fp0b_0
+       ldf8            r_0 = [srp], 8
+       xma.l           fp1b_0 = u_0, v1, fp1a_0
+  (p10)        add             s0 = pr1_1, acc0, 1
+  (p11)        add             s0 = pr1_1, acc0
+       xma.hu          fp2a_0 = u_0, v1, fp1a_0
+       ;;                                      C 04
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       ldf8            u_0 = [up], 8
+       getf.sig        pr1_3 = fp1b_3
+  (p6) cmp.leu         p8, p9 = acc0, pr0_2
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
+  (p10)        cmp.leu         p12, p13 = s0, pr1_1
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+       ;;                                      C 05
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_3 = fp2a_3
+       st8             [rp] = s0, 8
+       xma.l           fp0b_2 = u_2, v0, r_2
+  (p8) add             acc0 = pr0_3, acc1_1, 1
+  (p9) add             acc0 = pr0_3, acc1_1
+       xma.hu          fp1a_2 = u_2, v0, r_2
+       ;;                                      C 06
+.LL00:
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_1 = fp0b_1
+       ldf8            r_1 = [srp], 8
+       xma.l           fp1b_1 = u_1, v1, fp1a_1
+  (p12)        add             s0 = pr1_2, acc0, 1
+  (p13)        add             s0 = pr1_2, acc0
+       xma.hu          fp2a_1 = u_1, v1, fp1a_1
+       ;;                                      C 07
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       ldf8            u_1 = [up], 8
+       getf.sig        pr1_0 = fp1b_0
+  (p8) cmp.leu         p6, p7 = acc0, pr0_3
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
+  (p12)        cmp.leu         p10, p11 = s0, pr1_2
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
+       ;;                                      C 08
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_0 = fp2a_0
+       st8             [rp] = s0, 8
+       xma.l           fp0b_3 = u_3, v0, r_3
+  (p6) add             acc0 = pr0_0, acc1_2, 1
+  (p7) add             acc0 = pr0_0, acc1_2
+       xma.hu          fp1a_3 = u_3, v0, r_3
+       ;;                                      C 09
+.LL11:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_2 = fp0b_2
+       ldf8            r_2 = [srp], 8
+       xma.l           fp1b_2 = u_2, v1, fp1a_2
+  (p10)        add             s0 = pr1_3, acc0, 1
+  (p11)        add             s0 = pr1_3, acc0
+       xma.hu          fp2a_2 = u_2, v1, fp1a_2
+       ;;                                      C 10
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       ldf8            u_2 = [up], 8
+       getf.sig        pr1_1 = fp1b_1
+  (p6) cmp.leu         p8, p9 = acc0, pr0_0
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
+  (p10)        cmp.leu         p12, p13 = s0, pr1_3
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
+       ;;                                      C 11
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_1 = fp2a_1
+       st8             [rp] = s0, 8
+       xma.l           fp0b_0 = u_0, v0, r_0
+  (p8) add             acc0 = pr0_1, acc1_3, 1
+  (p9) add             acc0 = pr0_1, acc1_3
+       xma.hu          fp1a_0 = u_0, v0, r_0
+.LL10: br.cloop.dptk   .Loop                   C 12
+       ;;
+C *** MAIN LOOP END ***
+
+.Lcj6:
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_3 = fp0b_3
+       xma.l           fp1b_3 = u_3, v1, fp1a_3
+  (p12)        add             s0 = pr1_0, acc0, 1
+  (p13)        add             s0 = pr1_0, acc0
+       xma.hu          fp2a_3 = u_3, v1, fp1a_3
+       ;;
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr1_2 = fp1b_2
+  (p8) cmp.leu         p6, p7 = acc0, pr0_1
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
+  (p12)        cmp.leu         p10, p11 = s0, pr1_0
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+       ;;
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_2 = fp2a_2
+       st8             [rp] = s0, 8
+       xma.l           fp0b_1 = u_1, v0, r_1
+  (p6) add             acc0 = pr0_2, acc1_0, 1
+  (p7) add             acc0 = pr0_2, acc1_0
+       xma.hu          fp1a_1 = u_1, v0, r_1
+       ;;
+.Lcj5:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = u_0, v1, fp1a_0
+  (p10)        add             s0 = pr1_1, acc0, 1
+  (p11)        add             s0 = pr1_1, acc0
+       xma.hu          fp2a_0 = u_0, v1, fp1a_0
+       ;;
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr1_3 = fp1b_3
+  (p6) cmp.leu         p8, p9 = acc0, pr0_2
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
+  (p10)        cmp.leu         p12, p13 = s0, pr1_1
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+       ;;
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_3 = fp2a_3
+       st8             [rp] = s0, 8
+       xma.l           fp0b_2 = u_2, v0, r_2
+  (p8) add             acc0 = pr0_3, acc1_1, 1
+  (p9) add             acc0 = pr0_3, acc1_1
+       xma.hu          fp1a_2 = u_2, v0, r_2
+       ;;
+.Lcj4:
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_1 = fp0b_1
+       xma.l           fp1b_1 = u_1, v1, fp1a_1
+  (p12)        add             s0 = pr1_2, acc0, 1
+  (p13)        add             s0 = pr1_2, acc0
+       xma.hu          fp2a_1 = u_1, v1, fp1a_1
+       ;;
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr1_0 = fp1b_0
+  (p8) cmp.leu         p6, p7 = acc0, pr0_3
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
+  (p12)        cmp.leu         p10, p11 = s0, pr1_2
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
+       ;;
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_0 = fp2a_0
+       st8             [rp] = s0, 8
+  (p6) add             acc0 = pr0_0, acc1_2, 1
+  (p7) add             acc0 = pr0_0, acc1_2
+       ;;
+.Lcj3:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_2 = fp0b_2
+       xma.l           fp1b_2 = u_2, v1, fp1a_2
+  (p10)        add             s0 = pr1_3, acc0, 1
+  (p11)        add             s0 = pr1_3, acc0
+       xma.hu          fp2a_2 = u_2, v1, fp1a_2
+       ;;
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr1_1 = fp1b_1
+  (p6) cmp.leu         p8, p9 = acc0, pr0_0
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
+  (p10)        cmp.leu         p12, p13 = s0, pr1_3
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
+       ;;
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_1 = fp2a_1
+       st8             [rp] = s0, 8
+  (p8) add             acc0 = pr0_1, acc1_3, 1
+  (p9) add             acc0 = pr0_1, acc1_3
+       ;;
+.Lcj2:
+       .pred.rel "mutex", p12, p13
+  (p12)        add             s0 = pr1_0, acc0, 1
+  (p13)        add             s0 = pr1_0, acc0
+       ;;
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr1_2 = fp1b_2
+  (p8) cmp.leu         p6, p7 = acc0, pr0_1
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
+  (p12)        cmp.leu         p10, p11 = s0, pr1_0
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+       ;;
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_2 = fp2a_2
+       st8             [rp] = s0, 8
+  (p6) add             acc0 = pr0_2, acc1_0, 1
+  (p7) add             acc0 = pr0_2, acc1_0
+       ;;
+       .pred.rel "mutex", p10, p11
+  (p10)        add             s0 = pr1_1, acc0, 1
+  (p11)        add             s0 = pr1_1, acc0
+       ;;
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+  (p6) cmp.leu         p8, p9 = acc0, pr0_2
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
+  (p10)        cmp.leu         p12, p13 = s0, pr1_1
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+       ;;
+       .pred.rel "mutex", p8, p9
+       st8             [rp] = s0, 8
+  (p8) add             acc0 = pr1_2, acc1_1, 1
+  (p9) add             acc0 = pr1_2, acc1_1
+       ;;
+       .pred.rel "mutex", p8, p9
+  (p8) cmp.leu         p10, p11 = acc0, pr1_2
+  (p9) cmp.ltu         p10, p11 = acc0, pr1_2
+  (p12)        add             acc0 = 1, acc0
+       ;;
+       st8             [rp] = acc0, 8
+  (p12)        cmp.eq.or       p10, p0 = 0, acc0
+       mov             r8 = acc1_2
+       ;;
+       .pred.rel "mutex", p10, p11
+  (p10)        add             r8 = 1, r8
+       mov.i           ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/aors_n.asm b/mpn/ia64/aors_n.asm

new file mode 100644 (file)

index 0000000..fd3aaac
--- /dev/null
+++ b/mpn/ia64/aors_n.asm
@@ -0,0 +1,611 @@
+dnl  IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      2.67
+C Itanium 2:    1.25
+
+C TODO
+C  * Consider using special code for small n, using something like
+C    "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`vp',`r34')
+define(`n',`r35')
+
+ifdef(`OPERATION_add_n',`
+  define(ADDSUB,       add)
+  define(PRED,         ltu)
+  define(INCR,         1)
+  define(LIM,          -1)
+  define(func, mpn_add_n)
+')
+ifdef(`OPERATION_sub_n',`
+  define(ADDSUB,       sub)
+  define(PRED,         gtu)
+  define(INCR,         -1)
+  define(LIM,          0)
+  define(func, mpn_sub_n)
+')
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')
+define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
+define(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')
+define(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')
+define(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')
+define(`rpx',`r3')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4           rp = 0, rp              C                       M I
+       addp4           up = 0, up              C                       M I
+       addp4           vp = 0, vp              C                       M I
+       zxt4            n = n                   C                       I
+       ;;
+')
+{.mmi          C 00
+       ld8             r11 = [vp], 8           C                       M01
+       ld8             r10 = [up], 8           C                       M01
+       mov.i           r2 = ar.lc              C                       I0
+}
+{.mmi
+       and             r14 = 7, n              C                       M I
+       cmp.lt          p15, p14 = 8, n         C                       M I
+       add             n = -8, n               C                       M I
+       ;;
+}
+{.mmi          C 01
+       cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       cmp.eq          p8, p0 = 3, r14         C                       M I
+}
+{.bbb
+   (p6)        br.dptk         .Lb001                  C                       B
+   (p7)        br.dptk         .Lb010                  C                       B
+   (p8)        br.dptk         .Lb011                  C                       B
+       ;;
+}
+{.mmi          C 02
+       cmp.eq          p9, p0 = 4, r14         C                       M I
+       cmp.eq          p10, p0 = 5, r14        C                       M I
+       cmp.eq          p11, p0 = 6, r14        C                       M I
+}
+{.bbb
+   (p9)        br.dptk         .Lb100                  C                       B
+  (p10)        br.dptk         .Lb101                  C                       B
+  (p11)        br.dptk         .Lb110                  C                       B
+       ;;
+}              C 03
+{.mmb
+       cmp.eq          p12, p0 = 7, r14        C                       M I
+       add             n = -1, n               C loop count            M I
+  (p12)        br.dptk         .Lb111                  C                       B
+}
+
+
+.Lb000:        ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       add             rpx = 8, rp             C                       M I
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       ADDSUB          w1 = r10, r11           C                       M I
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       cmp.PRED        p7, p0 = w1, r10        C                       M I
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       ADDSUB          w2 = u2, v2             C                       M I
+       ;;
+       ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       cmp.PRED        p8, p0 = w2, u2         C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, v3             C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       cmp.PRED        p9, p0 = w3, u3         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w2        C                       M I
+   (p7)        add             w2 = INCR, w2           C                       M I
+  (p14)        br.cond.dptk    .Lcj8                   C                       B
+       ;;
+
+.grt8: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+       ;;
+       add             r11 = 512, vp
+       ld8             v2 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u2 = [up], 8            C                       M01
+       nop.i           0
+       nop.b           0
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       br              .LL000                  C                       B
+
+.Lb001:        add             rpx = 16, rp            C                       M I
+       ADDSUB          w0 = r10, r11           C                       M I
+  (p15)        br.cond.dpnt    .grt1                   C                       B
+       ;;
+       cmp.PRED        p6, p0 = w0, r10        C                       M I
+       mov             r8 = 0                  C                       M I
+       br              .Lcj1                   C                       B
+
+.grt1: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       cmp.ne          p9, p0 = r0, r0         C read near Loop
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       cmp.PRED        p6, p0 = w0, r10        C                       M I
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, v1             C                       M I
+       ;;
+       ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       cmp.PRED        p7, p0 = w1, u1         C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       ADDSUB          w2 = u2, v2             C                       M I
+       ;;
+       add             r11 = 512, vp
+       ld8             v0 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u0 = [up], 8            C                       M01
+       br.cloop.dptk   .Loop                   C                       B
+       br              .Lcj9                   C                       B
+
+.Lb010:        ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       add             rpx = 24, rp            C                       M I
+       ADDSUB          w7 = r10, r11           C                       M I
+  (p15)        br.cond.dpnt    .grt2                   C                       B
+       ;;
+       cmp.PRED        p9, p0 = w7, r10        C                       M I
+       ADDSUB          w0 = u0, v0             C                       M I
+       br              .Lcj2                   C                       B
+
+.grt2: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       cmp.PRED        p9, p0 = w7, r10        C                       M I
+       ;;
+       ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       ADDSUB          w0 = u0, v0             C                       M I
+       ;;
+       add             r11 = 512, vp
+       ld8             v7 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u7 = [up], 8            C                       M01
+       br              .LL01x                  C                       B
+
+.Lb011:        ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       ADDSUB          w6 = r10, r11           C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+  (p15)        br.cond.dpnt    .grt3                   C                       B
+       ;;
+       cmp.PRED        p8, p0 = w6, r10        C                       M I
+       ADDSUB          w7 = u7, v7             C                       M I
+       ;;
+       st8             [rp] = w6, 8            C                       M23
+       cmp.PRED        p9, p0 = w7, u7         C                       M I
+       br              .Lcj3                   C                       B
+
+.grt3: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       add             rpx = 32, rp            C                       M I
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       cmp.PRED        p8, p0 = w6, r10        C                       M I
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ADDSUB          w7 = u7, v7             C                       M I
+       nop.i           0
+       nop.b           0
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       cmp.PRED        p9, p0 = w7, u7         C                       M I
+       ;;
+       add             r11 = 512, vp
+       ld8             v6 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u6 = [up], 8            C                       M01
+   (p8)        cmp.eq.or       p9, p0 = LIM, w7        C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+   (p8)        add             w7 = INCR, w7           C                       M I
+       st8             [rp] = w6, 8            C                       M23
+       ADDSUB          w0 = u0, v0             C                       M I
+       br              .LL01x                  C                       B
+
+.Lb100:        ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       add             rpx = 8, rp             C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       ADDSUB          w5 = r10, r11           C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+  (p15)        br.cond.dpnt    .grt4                   C                       B
+       ;;
+       cmp.PRED        p7, p0 = w5, r10        C                       M I
+       ADDSUB          w6 = u6, v6             C                       M I
+       ;;
+       cmp.PRED        p8, p0 = w6, u6         C                       M I
+       ADDSUB          w7 = u7, v7             C                       M I
+       br              .Lcj4                   C                       B
+
+.grt4: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+       cmp.PRED        p7, p0 = w5, r10        C                       M I
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w6 = u6, v6             C                       M I
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       cmp.PRED        p8, p0 = w6, u6         C                       M I
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       ADDSUB          w7 = u7, v7             C                       M I
+       ;;
+       add             r11 = 512, vp
+       ld8             v6 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u6 = [up], 8            C                       M01
+       cmp.PRED        p9, p0 = w7, u7         C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+   (p7)        cmp.eq.or       p8, p0 = LIM, w6        C                       M I
+   (p7)        add             w6 = INCR, w6           C                       M I
+       br              .LL100                  C                       B
+
+.Lb101:        ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       add             rpx = 16, rp            C                       M I
+       ;;
+       ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       ADDSUB          w4 = r10, r11           C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       cmp.PRED        p6, p0 = w4, r10        C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w5 = u5, v5             C                       M I
+       shr.u           n = n, 3                C                       I0
+  (p15)        br.cond.dpnt    .grt5                   C                       B
+       ;;
+       cmp.PRED        p7, p0 = w5, u5         C                       M I
+       ADDSUB          w6 = u6, v6             C                       M I
+       br              .Lcj5                   C                       B
+
+.grt5: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       cmp.PRED        p7, p0 = w5, u5         C                       M I
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       ADDSUB          w6 = u6, v6             C                       M I
+       ;;
+       add             r11 = 512, vp
+       ld8             v5 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u5 = [up], 8            C                       M01
+       br              .LL101                  C                       B
+
+.Lb110:        ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       add             rpx = 24, rp            C                       M I
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       ADDSUB          w3 = r10, r11           C                       M I
+       ;;
+       ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       cmp.PRED        p9, p0 = w3, r10        C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w4 = u4, v4             C                       M I
+  (p14)        br.cond.dptk    .Lcj67                  C                       B
+       ;;
+
+.grt6: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       cmp.PRED        p9, p0 = w3, r10        C                       M I
+       nop.i           0
+       nop.b           0
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w4 = u4, v4             C                       M I
+       ;;
+       add             r11 = 512, vp
+       ld8             v3 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u3 = [up], 8            C                       M01
+       br              .LL11x                  C                       B
+
+.Lb111:        ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       add             rpx = 32, rp            C                       M I
+       ;;
+       ld8             v4 = [vp], 8            C                       M01
+       ld8             u4 = [up], 8            C                       M01
+       ADDSUB          w2 = r10, r11           C                       M I
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       ld8             u5 = [up], 8            C                       M01
+       cmp.PRED        p8, p0 = w2, r10        C                       M I
+       ;;
+       ld8             v6 = [vp], 8            C                       M01
+       ld8             u6 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, v3             C                       M I
+       ;;
+       ld8             v7 = [vp], 8            C                       M01
+       ld8             u7 = [up], 8            C                       M01
+       cmp.PRED        p9, p0 = w3, u3         C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+  (p15)        br.cond.dpnt    .grt7                   C                       B
+       ;;
+       st8             [rp] = w2, 8            C                       M23
+   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
+   (p8)        add             w3 = INCR, w3           C                       M I
+       ADDSUB          w4 = u4, v4             C                       M I
+       br              .Lcj67                  C                       B
+
+.grt7: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 3                C                       I0
+   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
+       nop.i           0
+       nop.b           0
+       ;;
+       add             r11 = 512, vp
+       ld8             v2 = [vp], 8            C                       M01
+       add             r10 = 512, up
+       ld8             u2 = [up], 8            C                       M01
+   (p8)        add             w3 = INCR, w3           C                       M I
+       nop.b           0
+       ;;
+       ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       st8             [rp] = w2, 8            C                       M23
+       ADDSUB          w4 = u4, v4             C                       M I
+       br              .LL11x                  C                       B
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop: ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w1, u1         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
+       ld8             u1 = [up], 8            C                       M01
+   (p9)        add             w0 = INCR, w0           C                       M I
+       ADDSUB          w2 = u2, v2             C                       M I
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       cmp.PRED        p8, p0 = w2, u2         C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w1        C                       M I
+       ld8             u2 = [up], 8            C                       M01
+   (p6)        add             w1 = INCR, w1           C                       M I
+       ADDSUB          w3 = u3, v3             C                       M I
+       ;;
+       st8             [rp] = w0, 8            C                       M23
+       ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p9, p0 = w3, u3         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w2        C                       M I
+       ld8             u3 = [up], 8            C                       M01
+   (p7)        add             w2 = INCR, w2           C                       M I
+       ;;
+.LL000:        st8             [rp] = w1, 16           C                       M23
+       st8             [rpx] = w2, 32          C                       M23
+   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
+       lfetch          [r10], 64
+   (p8)        add             w3 = INCR, w3           C                       M I
+       ADDSUB          w4 = u4, v4             C                       M I
+       ;;
+.LL11x:        st8             [rp] = w3, 8            C                       M23
+       ld8             v4 = [vp], 8            C                       M01
+       cmp.PRED        p6, p0 = w4, u4         C                       M I
+       ld8             u4 = [up], 8            C                       M01
+       ADDSUB          w5 = u5, v5             C                       M I
+       ;;
+       ld8             v5 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w5, u5         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w4        C                       M I
+       ld8             u5 = [up], 8            C                       M01
+   (p9)        add             w4 = INCR, w4           C                       M I
+       ADDSUB          w6 = u6, v6             C                       M I
+       ;;
+.LL101:        ld8             v6 = [vp], 8            C                       M01
+       cmp.PRED        p8, p0 = w6, u6         C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w5        C                       M I
+       ld8             u6 = [up], 8            C                       M01
+   (p6)        add             w5 = INCR, w5           C                       M I
+       ADDSUB          w7 = u7, v7             C                       M I
+       ;;
+       st8             [rp] = w4, 8            C                       M23
+       ld8             v7 = [vp], 8            C                       M01
+       cmp.PRED        p9, p0 = w7, u7         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w6        C                       M I
+       ld8             u7 = [up], 8            C                       M01
+   (p7)        add             w6 = INCR, w6           C                       M I
+       ;;
+.LL100:        st8             [rp] = w5, 16           C                       M23
+       st8             [rpx] = w6, 32          C                       M23
+   (p8)        cmp.eq.or       p9, p0 = LIM, w7        C                       M I
+       lfetch          [r11], 64
+   (p8)        add             w7 = INCR, w7           C                       M I
+       ADDSUB          w0 = u0, v0             C                       M I
+       ;;
+.LL01x:        st8             [rp] = w7, 8            C                       M23
+       ld8             v0 = [vp], 8            C                       M01
+       cmp.PRED        p6, p0 = w0, u0         C                       M I
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, v1             C                       M I
+       br.cloop.dptk   .Loop                   C                       B
+       ;;
+C *** MAIN LOOP END ***
+
+       cmp.PRED        p7, p0 = w1, u1         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
+   (p9)        add             w0 = INCR, w0           C                       M I
+       ADDSUB          w2 = u2, v2             C                       M I
+       ;;
+.Lcj9: cmp.PRED        p8, p0 = w2, u2         C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w1        C                       M I
+       st8             [rp] = w0, 8            C                       M23
+   (p6)        add             w1 = INCR, w1           C                       M I
+       ADDSUB          w3 = u3, v3             C                       M I
+       ;;
+       cmp.PRED        p9, p0 = w3, u3         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w2        C                       M I
+   (p7)        add             w2 = INCR, w2           C                       M I
+       ;;
+.Lcj8: st8             [rp] = w1, 16           C                       M23
+       st8             [rpx] = w2, 32          C                       M23
+   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
+   (p8)        add             w3 = INCR, w3           C                       M I
+       ADDSUB          w4 = u4, v4             C                       M I
+       ;;
+.Lcj67:        st8             [rp] = w3, 8            C                       M23
+       cmp.PRED        p6, p0 = w4, u4         C                       M I
+       ADDSUB          w5 = u5, v5             C                       M I
+       ;;
+       cmp.PRED        p7, p0 = w5, u5         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w4        C                       M I
+   (p9)        add             w4 = INCR, w4           C                       M I
+       ADDSUB          w6 = u6, v6             C                       M I
+       ;;
+.Lcj5: cmp.PRED        p8, p0 = w6, u6         C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w5        C                       M I
+       st8             [rp] = w4, 8            C                       M23
+   (p6)        add             w5 = INCR, w5           C                       M I
+       ADDSUB          w7 = u7, v7             C                       M I
+       ;;
+.Lcj4: cmp.PRED        p9, p0 = w7, u7         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w6        C                       M I
+   (p7)        add             w6 = INCR, w6           C                       M I
+       ;;
+       st8             [rp] = w5, 16           C                       M23
+       st8             [rpx] = w6, 32          C                       M23
+.Lcj3:
+   (p8)        cmp.eq.or       p9, p0 = LIM, w7        C                       M I
+   (p8)        add             w7 = INCR, w7           C                       M I
+       ADDSUB          w0 = u0, v0             C                       M I
+       ;;
+.Lcj2: st8             [rp] = w7, 8            C                       M23
+       cmp.PRED        p6, p0 = w0, u0         C                       M I
+       ;;
+   (p9)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
+   (p9)        add             w0 = INCR, w0           C                       M I
+       mov             r8 = 0                  C                       M I
+       ;;
+.Lcj1: st8             [rp] = w0, 8            C                       M23
+       mov.i           ar.lc = r2              C                       I0
+   (p6)        mov             r8 = 1                  C                       M I
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/aorslsh1_n.asm b/mpn/ia64/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..5348149
--- /dev/null
+++ b/mpn/ia64/aorslsh1_n.asm
@@ -0,0 +1,323 @@
+dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      3.0
+C Itanium 2:    1.5
+
+C TODO
+C  * Use shladd in feed-in code (for mpn_addlsh1_n).
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`vp',`r34')
+define(`n',`r35')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADDSUB,       add)
+  define(PRED,        ltu)
+  define(INCR,        1)
+  define(LIM,         -1)
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADDSUB,       sub)
+  define(PRED,        gtu)
+  define(INCR,        -1)
+  define(LIM,         0)
+  define(func, mpn_sublsh1_n)
+')
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
+define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
+define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
+define(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4           rp = 0, rp              C                       M I
+       addp4           up = 0, up              C                       M I
+       addp4           vp = 0, vp              C                       M I
+       zxt4            n = n                   C                       I
+       ;;
+')
+ {.mmi;        ld8             r11 = [vp], 8           C                       M01
+       ld8             r10 = [up], 8           C                       M01
+       mov.i           r2 = ar.lc              C                       I0
+}{.mmi;        and             r14 = 3, n              C                       M I
+       cmp.lt          p15, p0 = 4, n          C                       M I
+       add             n = -4, n               C                       M I
+       ;;
+}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       cmp.eq          p8, p0 = 3, r14         C                       M I
+}{.bbb
+  (p6) br.dptk         .Lb01                   C                       B
+  (p7) br.dptk         .Lb10                   C                       B
+  (p8) br.dptk         .Lb11                   C                       B
+}
+
+.Lb00: ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       add             x3 = r11, r11           C                       M I
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w3 = r10, x3            C                       M I
+  (p15)        br.dpnt         .grt4                   C                       B
+       ;;
+       shrp            x0 = v0, r11, 63        C                       I0
+       cmp.PRED        p8, p0 = w3, r10        C                       M I
+       ;;
+       shrp            x1 = v1, v0, 63         C                       I0
+       ADDSUB          w0 = u0, x0             C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w0, u0         C                       M I
+       ADDSUB          w1 = u1, x1             C                       M I
+       br              .Lcj4                   C                       B
+
+.grt4: ld8             v3 = [vp], 8            C                       M01
+       shrp            x0 = v0, r11, 63        C                       I0
+       cmp.PRED        p8, p0 = w3, r10        C                       M I
+       add             n = -1, n
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       shrp            x1 = v1, v0, 63         C                       I0
+       ld8             v0 = [vp], 8            C                       M01
+       ADDSUB          w0 = u0, x0             C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w0, u0         C                       M I
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, x1             C                       M I
+       br              .LL00                   C                       B
+
+.Lb01: add             x2 = r11, r11           C                       M I
+       shr.u           n = n, 2                C                       I0
+  (p15)        br.dpnt         .grt1                   C                       B
+       ;;
+       ADDSUB          w2 = r10, x2            C                       M I
+       shr.u           r8 = r11, 63            C retval                I0
+       ;;
+       cmp.PRED        p6, p0 = w2, r10        C                       M I
+       ;;
+       st8             [rp] = w2, 8            C                       M23
+   (p6)        add             r8 = 1, r8              C                       M I
+       br.ret.sptk.many b0                     C                       B
+
+.grt1: ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C FIXME swap with next  I0
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w2 = r10, x2
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shrp            x3 = v3, r11, 63        C                       I0
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       cmp.PRED        p6, p0 = w2, r10        C                       M I
+       ADDSUB          w3 = u3, x3             C                       M I
+       br.cloop.dptk   .grt5                   C                       B
+       ;;
+       shrp            x0 = v0, v3, 63         C                       I0
+       cmp.PRED        p8, p0 = w3, u3         C                       M I
+       br              .Lcj5                   C                       B
+
+.grt5: shrp            x0 = v0, v3, 63         C                       I0
+       ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p8, p0 = w3, u3         C                       M I
+       br              .LL01                   C                       B
+
+.Lb10: ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       add             x1 = r11, r11           C                       M I
+  (p15)        br.dpnt         .grt2                   C                       B
+       ;;
+       ADDSUB          w1 = r10, x1            C                       M I
+       shrp            x2 = v2, r11, 63        C                       I0
+       ;;
+       cmp.PRED        p8, p0 = w1, r10        C                       M I
+       ADDSUB          w2 = u2, x2             C                       M I
+       shr.u           r8 = v2, 63             C retval                I0
+       ;;
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       br              .Lcj2                   C                       B
+
+.grt2: ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = r10, x1            C                       M I
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       shrp            x2 = v2, r11, 63        C                       I0
+       cmp.PRED        p8, p0 = w1, r10        C                       M I
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       shrp            x3 = v3, v2, 63         C                       I0
+       ld8             v2 = [vp], 8            C                       M01
+       ADDSUB          w2 = u2, x2             C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, x3             C                       M I
+       br.cloop.dpnt   .Loop                   C                       B
+       br              .Lskip                  C                       B
+
+.Lb11: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       add             x0 = r11, r11           C                       M I
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+  (p15)        br.dpnt         .grt3                   C                       B
+       ;;
+
+       shrp            x1 = v1, r11, 63        C                       I0
+       ADDSUB          w0 = r10, x0            C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w0, r10        C                       M I
+       ADDSUB          w1 = u1, x1             C                       M I
+       ;;
+       shrp            x2 = v2, v1, 63         C                       I0
+       cmp.PRED        p8, p0 = w1, u1         C                       M I
+       br              .Lcj3                   C                       B
+
+.grt3: ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       shrp            x1 = v1, r11, 63        C                       I0
+       ADDSUB          w0 = r10, x0            C                       M I
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       cmp.PRED        p6, p0 = w0, r10        C                       M I
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, x1             C                       M I
+       ;;
+       shrp            x2 = v2, v1, 63         C                       I0
+       ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p8, p0 = w1, u1         C                       M I
+       br              .LL11                   C                       B
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop: st8             [rp] = w1, 8            C                       M23
+       shrp            x0 = v0, v3, 63         C                       I0
+   (p8)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p8)        add             w2 = INCR, w2           C                       M I
+       ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p8, p0 = w3, u3         C                       M I
+       ;;
+.LL01: ld8             u3 = [up], 8            C                       M01
+       shrp            x1 = v1, v0, 63         C                       I0
+   (p6)        cmp.eq.or       p8, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       ld8             v0 = [vp], 8            C                       M01
+       ADDSUB          w0 = u0, x0             C                       M I
+       ;;
+       st8             [rp] = w2, 8            C                       M23
+       cmp.PRED        p6, p0 = w0, u0         C                       M I
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, x1             C                       M I
+       ;;
+.LL00: st8             [rp] = w3, 8            C                       M23
+       shrp            x2 = v2, v1, 63         C                       I0
+   (p8)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
+   (p8)        add             w0 = INCR, w0           C                       M I
+       ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p8, p0 = w1, u1         C                       M I
+       ;;
+.LL11: ld8             u1 = [up], 8            C                       M01
+       shrp            x3 = v3, v2, 63         C                       I0
+   (p6)        cmp.eq.or       p8, p0 = LIM, w1        C                       M I
+   (p6)        add             w1 = INCR, w1           C                       M I
+       ld8             v2 = [vp], 8            C                       M01
+       ADDSUB          w2 = u2, x2             C                       M I
+       ;;
+       st8             [rp] = w0, 8            C                       M23
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, x3             C                       M I
+       br.cloop.dptk   .Loop                   C                       B
+       ;;
+C *** MAIN LOOP END ***
+
+.Lskip:        st8             [rp] = w1, 8            C                       M23
+       shrp            x0 = v0, v3, 63         C                       I0
+   (p8)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p8)        add             w2 = INCR, w2           C                       M I
+       cmp.PRED        p8, p0 = w3, u3         C                       M I
+       ;;
+.Lcj5: shrp            x1 = v1, v0, 63         C                       I0
+   (p6)        cmp.eq.or       p8, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       ADDSUB          w0 = u0, x0             C                       M I
+       ;;
+       st8             [rp] = w2, 8            C                       M23
+       cmp.PRED        p6, p0 = w0, u0         C                       M I
+       ADDSUB          w1 = u1, x1             C                       M I
+       ;;
+.Lcj4: st8             [rp] = w3, 8            C                       M23
+       shrp            x2 = v2, v1, 63         C                       I0
+   (p8)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
+   (p8)        add             w0 = INCR, w0           C                       M I
+       cmp.PRED        p8, p0 = w1, u1         C                       M I
+       ;;
+.Lcj3: shr.u           r8 = v2, 63             C                       I0
+   (p6)        cmp.eq.or       p8, p0 = LIM, w1        C                       M I
+   (p6)        add             w1 = INCR, w1           C                       M I
+       ADDSUB          w2 = u2, x2             C                       M I
+       ;;
+       st8             [rp] = w0, 8            C                       M23
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       ;;
+.Lcj2: st8             [rp] = w1, 8            C                       M23
+   (p8)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p8)        add             w2 = INCR, w2           C                       M I
+       ;;
+.Lcj1: st8             [rp] = w2, 8            C                       M23
+       mov.i           ar.lc = r2              C                       I0
+   (p6)        add             r8 = 1, r8              C                       M I
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/bdiv_dbm1c.asm b/mpn/ia64/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..6ff4fda
--- /dev/null
+++ b/mpn/ia64/bdiv_dbm1c.asm
@@ -0,0 +1,503 @@
+dnl  IA-64 mpn_bdiv_dbm1.
+
+dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    4
+C Itanium 2:  2
+
+C TODO
+C  * Optimize feed-in and wind-down code, both for speed and code size.
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n', `r34')
+define(`bd', `r35')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+       .prologue
+       .save           ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C M I
+       addp4           up = 0, up              C M I
+       zxt4            n = n                   C I
+       ;;
+')
+{.mmb
+       mov             r15 = r36               C M I
+       ldf8            f9 = [up], 8            C M
+       nop.b           0                       C B
+}
+.Lcommon:
+{.mii
+       adds            r16 = -1, n             C M I
+       mov             r2 = ar.lc              C I0
+       and             r14 = 3, n              C M I
+       ;;
+}
+{.mii
+       setf.sig        f6 = bd                 C M2 M3
+       shr.u           r31 = r16, 2            C I0
+       cmp.eq          p10, p0 = 0, r14        C M I
+}
+{.mii
+       nop.m           0                       C M
+       cmp.eq          p11, p0 = 2, r14        C M I
+       cmp.eq          p12, p0 = 3, r14        C M I
+       ;;
+}
+{.mii
+       cmp.ne          p6, p7 = r0, r0         C M I
+       mov.i           ar.lc = r31             C I0
+       cmp.ne          p8, p9 = r0, r0         C M I
+}
+{.bbb
+  (p10)        br.dptk         .Lb00                   C B
+  (p11)        br.dptk         .Lb10                   C B
+  (p12)        br.dptk         .Lb11                   C B
+       ;;
+}
+
+.Lb01: br.cloop.dptk   .grt1
+       ;;
+       xma.l           f38 = f9, f6, f0
+       xma.hu          f39 = f9, f6, f0
+       ;;
+       getf.sig        r26 = f38
+       getf.sig        r27 = f39
+       br              .Lcj1
+
+.grt1: ldf8            f10 = [r33], 8
+       ;;
+       ldf8            f11 = [r33], 8
+       ;;
+       ldf8            f12 = [r33], 8
+       ;;
+       xma.l           f38 = f9, f6, f0
+       xma.hu          f39 = f9, f6, f0
+       ;;
+       ldf8            f13 = [r33], 8
+       ;;
+       xma.l           f32 = f10, f6, f0
+       xma.hu          f33 = f10, f6, f0
+       br.cloop.dptk   .grt5
+
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       ;;
+       getf.sig        r27 = f39
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       ;;
+       getf.sig        r21 = f33
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       br              .Lcj5
+
+.grt5: ldf8            f10 = [r33], 8
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       ;;
+       getf.sig        r27 = f39
+       ldf8            f11 = [r33], 8
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       ;;
+       getf.sig        r21 = f33
+       ldf8            f12 = [r33], 8
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       br              .LL01
+
+.Lb10: ldf8            f13 = [r33], 8
+       br.cloop.dptk   .grt2
+       ;;
+
+       xma.l           f36 = f9, f6, f0
+       xma.hu          f37 = f9, f6, f0
+       ;;
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       ;;
+       getf.sig        r24 = f36
+       ;;
+       getf.sig        r25 = f37
+       ;;
+       getf.sig        r26 = f38
+       ;;
+       getf.sig        r27 = f39
+       br              .Lcj2
+
+.grt2: ldf8            f10 = [r33], 8
+       ;;
+       ldf8            f11 = [r33], 8
+       ;;
+       xma.l           f36 = f9, f6, f0
+       xma.hu          f37 = f9, f6, f0
+       ;;
+       ldf8            f12 = [r33], 8
+       ;;
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       ;;
+       ldf8            f13 = [r33], 8
+       ;;
+       getf.sig        r24 = f36
+       xma.l           f32 = f10, f6, f0
+       xma.hu          f33 = f10, f6, f0
+       br.cloop.dptk   .grt6
+
+       getf.sig        r25 = f37
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       ;;
+       getf.sig        r27 = f39
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       br              .Lcj6
+
+.grt6: getf.sig        r25 = f37
+       ldf8            f10 = [r33], 8
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       ;;
+       getf.sig        r27 = f39
+       ldf8            f11 = [r33], 8
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       br              .LL10
+
+
+.Lb11: ldf8            f12 = [r33], 8
+       ;;
+       ldf8            f13 = [r33], 8
+       br.cloop.dptk   .grt3
+       ;;
+
+       xma.l           f34 = f9, f6, f0
+       xma.hu          f35 = f9, f6, f0
+       ;;
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       ;;
+       getf.sig        r23 = f35
+       ;;
+       getf.sig        r24 = f36
+       ;;
+       getf.sig        r25 = f37
+       ;;
+       getf.sig        r26 = f38
+       br              .Lcj3
+
+.grt3: ldf8            f10 = [r33], 8
+       ;;
+       xma.l           f34 = f9, f6, f0
+       xma.hu          f35 = f9, f6, f0
+       ;;
+       ldf8            f11 = [r33], 8
+       ;;
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       ;;
+       ldf8            f12 = [r33], 8
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       ;;
+       getf.sig        r23 = f35
+       ldf8            f13 = [r33], 8
+       ;;
+       getf.sig        r24 = f36
+       xma.l           f32 = f10, f6, f0
+       xma.hu          f33 = f10, f6, f0
+       br.cloop.dptk   .grt7
+
+       getf.sig        r25 = f37
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       br              .Lcj7
+
+.grt7: getf.sig        r25 = f37
+       ldf8            f10 = [r33], 8
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       br              .LL11
+
+
+.Lb00: ldf8            f11 = [r33], 8
+       ;;
+       ldf8            f12 = [r33], 8
+       ;;
+       ldf8            f13 = [r33], 8
+       br.cloop.dptk   .grt4
+       ;;
+
+       xma.l           f32 = f9, f6, f0
+       xma.hu          f33 = f9, f6, f0
+       ;;
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       ;;
+       getf.sig        r21 = f33
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       ;;
+       getf.sig        r23 = f35
+       ;;
+       getf.sig        r24 = f36
+       br              .Lcj4
+
+.grt4: xma.l           f32 = f9, f6, f0
+       xma.hu          f33 = f9, f6, f0
+       ;;
+       ldf8            f10 = [r33], 8
+       ;;
+       xma.l           f34 = f11, f6, f0
+       xma.hu          f35 = f11, f6, f0
+       ;;
+       ldf8            f11 = [r33], 8
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+       xma.hu          f37 = f12, f6, f0
+       ;;
+       getf.sig        r21 = f33
+       ldf8            f12 = [r33], 8
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+       xma.hu          f39 = f13, f6, f0
+       ;;
+       getf.sig        r23 = f35
+       ldf8            f13 = [r33], 8
+       ;;
+       getf.sig        r24 = f36
+       xma.l           f32 = f10, f6, f0
+       xma.hu          f33 = f10, f6, f0
+       br.cloop.dptk   .LL00
+       br              .Lcj8
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Ltop:
+       .pred.rel "mutex",p6,p7
+C      .mfi
+       getf.sig        r24 = f36
+       xma.l           f32 = f10, f6, f0
+  (p6) sub             r15 = r19, r27, 1
+C      .mfi
+       st8             [r32] = r19, 8
+       xma.hu          f33 = f10, f6, f0
+  (p7) sub             r15 = r19, r27
+       ;;
+.LL00:
+C      .mfi
+       getf.sig        r25 = f37
+       nop.f 0
+       cmp.ltu         p6, p7 = r15, r20
+C      .mib
+       ldf8            f10 = [r33], 8
+       sub             r16 = r15, r20
+       nop.b 0
+       ;;
+
+C      .mfi
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+  (p6) sub             r15 = r16, r21, 1
+C      .mfi
+       st8             [r32] = r16, 8
+       xma.hu          f35 = f11, f6, f0
+  (p7) sub             r15 = r16, r21
+       ;;
+.LL11:
+C      .mfi
+       getf.sig        r27 = f39
+       nop.f 0
+       cmp.ltu         p6, p7 = r15, r22
+C      .mib
+       ldf8            f11 = [r33], 8
+       sub             r17 = r15, r22
+       nop.b 0
+       ;;
+
+C      .mfi
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+  (p6) sub             r15 = r17, r23, 1
+C      .mfi
+       st8             [r32] = r17, 8
+       xma.hu          f37 = f12, f6, f0
+  (p7) sub             r15 = r17, r23
+       ;;
+.LL10:
+C      .mfi
+       getf.sig        r21 = f33
+       nop.f 0
+       cmp.ltu         p6, p7 = r15, r24
+C      .mib
+       ldf8            f12 = [r33], 8
+       sub             r18 = r15, r24
+       nop.b 0
+       ;;
+
+C      .mfi
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+  (p6) sub             r15 = r18, r25, 1
+C      .mfi
+       st8             [r32] = r18, 8
+       xma.hu          f39 = f13, f6, f0
+  (p7) sub             r15 = r18, r25
+       ;;
+.LL01:
+C      .mfi
+       getf.sig        r23 = f35
+       nop.f 0
+       cmp.ltu         p6, p7 = r15, r26
+C      .mib
+       ldf8            f13 = [r33], 8
+       sub             r19 = r15, r26
+       br.cloop.sptk.few .Ltop
+C *** MAIN LOOP END ***
+       ;;
+
+       getf.sig        r24 = f36
+       xma.l           f32 = f10, f6, f0
+  (p6) sub             r15 = r19, r27, 1
+       st8             [r32] = r19, 8
+       xma.hu          f33 = f10, f6, f0
+  (p7) sub             r15 = r19, r27
+       ;;
+.Lcj8: getf.sig        r25 = f37
+       cmp.ltu         p6, p7 = r15, r20
+       sub             r16 = r15, r20
+       ;;
+       getf.sig        r26 = f38
+       xma.l           f34 = f11, f6, f0
+  (p6) sub             r15 = r16, r21, 1
+       st8             [r32] = r16, 8
+       xma.hu          f35 = f11, f6, f0
+  (p7) sub             r15 = r16, r21
+       ;;
+.Lcj7: getf.sig        r27 = f39
+       cmp.ltu         p6, p7 = r15, r22
+       sub             r17 = r15, r22
+       ;;
+       getf.sig        r20 = f32
+       xma.l           f36 = f12, f6, f0
+  (p6) sub             r15 = r17, r23, 1
+       st8             [r32] = r17, 8
+       xma.hu          f37 = f12, f6, f0
+  (p7) sub             r15 = r17, r23
+       ;;
+.Lcj6: getf.sig        r21 = f33
+       cmp.ltu         p6, p7 = r15, r24
+       sub             r18 = r15, r24
+       ;;
+       getf.sig        r22 = f34
+       xma.l           f38 = f13, f6, f0
+  (p6) sub             r15 = r18, r25, 1
+       st8             [r32] = r18, 8
+       xma.hu          f39 = f13, f6, f0
+  (p7) sub             r15 = r18, r25
+       ;;
+.Lcj5: getf.sig        r23 = f35
+       cmp.ltu         p6, p7 = r15, r26
+       sub             r19 = r15, r26
+       ;;
+       getf.sig        r24 = f36
+  (p6) sub             r15 = r19, r27, 1
+       st8             [r32] = r19, 8
+  (p7) sub             r15 = r19, r27
+       ;;
+.Lcj4: getf.sig        r25 = f37
+       cmp.ltu         p6, p7 = r15, r20
+       sub             r16 = r15, r20
+       ;;
+       getf.sig        r26 = f38
+  (p6) sub             r15 = r16, r21, 1
+       st8             [r32] = r16, 8
+  (p7) sub             r15 = r16, r21
+       ;;
+.Lcj3: getf.sig        r27 = f39
+       cmp.ltu         p6, p7 = r15, r22
+       sub             r17 = r15, r22
+       ;;
+  (p6) sub             r15 = r17, r23, 1
+       st8             [r32] = r17, 8
+  (p7) sub             r15 = r17, r23
+       ;;
+.Lcj2: cmp.ltu         p6, p7 = r15, r24
+       sub             r18 = r15, r24
+       ;;
+  (p6) sub             r15 = r18, r25, 1
+       st8             [r32] = r18, 8
+  (p7) sub             r15 = r18, r25
+       ;;
+.Lcj1: cmp.ltu         p6, p7 = r15, r26
+       sub             r19 = r15, r26
+       ;;
+  (p6) sub             r8 = r19, r27, 1
+       st8             [r32] = r19
+  (p7) sub             r8 = r19, r27
+       mov ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/copyd.asm b/mpn/ia64/copyd.asm

new file mode 100644 (file)

index 0000000..759629e
--- /dev/null
+++ b/mpn/ia64/copyd.asm
@@ -0,0 +1,173 @@
+dnl  IA-64 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    1
+C Itanium 2:  0.5
+
+C INPUT PARAMETERS
+C rp = r32
+C sp = r33
+C n = r34
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+       .prologue
+       .save ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4           r32 = 0, r32
+       addp4           r33 = 0, r33
+       sxt4            r34 = r34
+       ;;
+')
+{.mmi
+       shladd          r32 = r34, 3, r32
+       shladd          r33 = r34, 3, r33
+       mov.i           r2 = ar.lc
+}
+{.mmi
+       and             r14 = 3, r34
+       cmp.ge          p14, p15 = 3, r34
+       add             r34 = -4, r34
+       ;;
+}
+{.mmi
+       cmp.eq          p8, p0 = 1, r14
+       cmp.eq          p10, p0 = 2, r14
+       cmp.eq          p12, p0 = 3, r14
+}
+{.bbb
+  (p8) br.dptk         .Lb01
+  (p10)        br.dptk         .Lb10
+  (p12)        br.dptk         .Lb11
+}
+
+.Lb00: C  n = 0, 4, 8, 12, ...
+       add             r32 = -8, r32
+       add             r33 = -8, r33
+  (p14)        br.dptk         .Ls00
+       ;;
+       add             r21 = -8, r33
+       ld8             r16 = [r33], -16
+       shr             r15 = r34, 2
+       ;;
+       ld8             r17 = [r21], -16
+       mov.i           ar.lc = r15
+       ld8             r18 = [r33], -16
+       add             r20 = -8, r32
+       ;;
+       ld8             r19 = [r21], -16
+       br.cloop.dptk   .Loop
+       ;;
+       br.sptk         .Lend
+       ;;
+
+.Lb01: C  n = 1, 5, 9, 13, ...
+       add             r21 = -8, r33
+       add             r20 = -8, r32
+       add             r33 = -16, r33
+       add             r32 = -16, r32
+       ;;
+       ld8             r19 = [r21], -16
+       shr             r15 = r34, 2
+  (p14)        br.dptk         .Ls01
+       ;;
+       ld8             r16 = [r33], -16
+       mov.i           ar.lc = r15
+       ;;
+       ld8             r17 = [r21], -16
+       ld8             r18 = [r33], -16
+       br.sptk         .Li01
+       ;;
+
+.Lb10: C  n = 2,6, 10, 14, ...
+       add             r21 = -16, r33
+       shr             r15 = r34, 2
+       add             r20 = -16, r32
+       add             r32 = -8, r32
+       add             r33 = -8, r33
+       ;;
+       ld8             r18 = [r33], -16
+       ld8             r19 = [r21], -16
+       mov.i           ar.lc = r15
+  (p14)        br.dptk         .Ls10
+       ;;
+       ld8             r16 = [r33], -16
+       ld8             r17 = [r21], -16
+       br.sptk         .Li10
+       ;;
+
+.Lb11: C  n = 3, 7, 11, 15, ...
+       add             r21 = -8, r33
+       add             r20 = -8, r32
+       add             r33 = -16, r33
+       add             r32 = -16, r32
+       ;;
+       ld8             r17 = [r21], -16
+       shr             r15 = r34, 2
+       ;;
+       ld8             r18 = [r33], -16
+       mov.i           ar.lc = r15
+       ld8             r19 = [r21], -16
+  (p14)        br.dptk         .Ls11
+       ;;
+       ld8             r16 = [r33], -16
+       br.sptk         .Li11
+       ;;
+
+       ALIGN(32)
+.Loop:
+.Li00:
+{.mmb
+       st8             [r32] = r16, -16
+       ld8             r16 = [r33], -16
+       nop.b           0
+}
+.Li11:
+{.mmb
+       st8             [r20] = r17, -16
+       ld8             r17 = [r21], -16
+       nop.b           0
+       ;;
+}
+.Li10:
+{.mmb
+       st8             [r32] = r18, -16
+       ld8             r18 = [r33], -16
+       nop.b           0
+}
+.Li01:
+{.mmb
+       st8             [r20] = r19, -16
+       ld8             r19 = [r21], -16
+       br.cloop.dptk   .Loop
+       ;;
+}
+.Lend: st8             [r32] = r16, -16
+.Ls11: st8             [r20] = r17, -16
+       ;;
+.Ls10: st8             [r32] = r18, -16
+.Ls01: st8             [r20] = r19, -16
+.Ls00: mov.i           ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/copyi.asm b/mpn/ia64/copyi.asm

new file mode 100644 (file)

index 0000000..11451dc
--- /dev/null
+++ b/mpn/ia64/copyi.asm
@@ -0,0 +1,169 @@
+dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    1
+C Itanium 2:  0.5
+
+C INPUT PARAMETERS
+C rp = r32
+C sp = r33
+C n = r34
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+       .prologue
+       .save ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4           r32 = 0, r32
+       addp4           r33 = 0, r33
+       sxt4            r34 = r34
+       ;;
+')
+{.mmi
+       nop             0
+       nop             0
+       mov.i           r2 = ar.lc
+}
+{.mmi
+       and             r14 = 3, r34
+       cmp.ge          p14, p15 = 3, r34
+       add             r34 = -4, r34
+       ;;
+}
+{.mmi
+       cmp.eq          p8, p0 = 1, r14
+       cmp.eq          p10, p0 = 2, r14
+       cmp.eq          p12, p0 = 3, r14
+}
+{.bbb
+  (p8) br.dptk         .Lb01
+  (p10)        br.dptk         .Lb10
+  (p12)        br.dptk         .Lb11
+}
+
+.Lb00: C  n = 0, 4, 8, 12, ...
+  (p14)        br.dptk         .Ls00
+       ;;
+       add             r21 = 8, r33
+       ld8             r16 = [r33], 16
+       shr             r15 = r34, 2
+       ;;
+       ld8             r17 = [r21], 16
+       mov.i           ar.lc = r15
+       ld8             r18 = [r33], 16
+       add             r20 = 8, r32
+       ;;
+       ld8             r19 = [r21], 16
+       br.cloop.dptk   .Loop
+       ;;
+       br.sptk         .Lend
+       ;;
+
+.Lb01: C  n = 1, 5, 9, 13, ...
+       add             r21 = 0, r33
+       add             r20 = 0, r32
+       add             r33 = 8, r33
+       add             r32 = 8, r32
+       ;;
+       ld8             r19 = [r21], 16
+       shr             r15 = r34, 2
+  (p14)        br.dptk         .Ls01
+       ;;
+       ld8             r16 = [r33], 16
+       mov.i           ar.lc = r15
+       ;;
+       ld8             r17 = [r21], 16
+       ld8             r18 = [r33], 16
+       br.sptk         .Li01
+       ;;
+
+.Lb10: C  n = 2,6, 10, 14, ...
+       add             r21 = 8, r33
+       add             r20 = 8, r32
+       ld8             r18 = [r33], 16
+       shr             r15 = r34, 2
+       ;;
+       ld8             r19 = [r21], 16
+       mov.i           ar.lc = r15
+  (p14)        br.dptk         .Ls10
+       ;;
+       ld8             r16 = [r33], 16
+       ld8             r17 = [r21], 16
+       br.sptk         .Li10
+       ;;
+
+.Lb11: C  n = 3, 7, 11, 15, ...
+       add             r21 = 0, r33
+       add             r20 = 0, r32
+       add             r33 = 8, r33
+       add             r32 = 8, r32
+       ;;
+       ld8             r17 = [r21], 16
+       shr             r15 = r34, 2
+       ;;
+       ld8             r18 = [r33], 16
+       mov.i           ar.lc = r15
+       ld8             r19 = [r21], 16
+  (p14)        br.dptk         .Ls11
+       ;;
+       ld8             r16 = [r33], 16
+       br.sptk         .Li11
+       ;;
+
+       ALIGN(32)
+.Loop:
+.Li00:
+{.mmb
+       st8             [r32] = r16, 16
+       ld8             r16 = [r33], 16
+       nop.b           0
+}
+.Li11:
+{.mmb
+       st8             [r20] = r17, 16
+       ld8             r17 = [r21], 16
+       nop.b           0
+       ;;
+}
+.Li10:
+{.mmb
+       st8             [r32] = r18, 16
+       ld8             r18 = [r33], 16
+       nop.b           0
+}
+.Li01:
+{.mmb
+       st8             [r20] = r19, 16
+       ld8             r19 = [r21], 16
+       br.cloop.dptk   .Loop
+       ;;
+}
+.Lend: st8             [r32] = r16, 16
+.Ls11: st8             [r20] = r17, 16
+       ;;
+.Ls10: st8             [r32] = r18, 16
+.Ls01: st8             [r20] = r19, 16
+.Ls00: mov.i           ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/dive_1.asm b/mpn/ia64/dive_1.asm

new file mode 100644 (file)

index 0000000..9b9d085
--- /dev/null
+++ b/mpn/ia64/dive_1.asm
@@ -0,0 +1,217 @@
+dnl  IA-64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C Itanium:      16
+C Itanium 2:     8
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n',  `r34')
+define(`divisor', `r35')
+
+define(`lshift', `r24')
+define(`rshift', `r25')
+
+C This code is a bit messy, and not as similar to mode1o.asm as desired.
+
+C The critical path during initialization is for computing the inverse of the
+C divisor.  Since odd divisors are probably common, we conditionally execute
+C the initial count_traling_zeros code and the downshift.
+
+C Possible improvement: Merge more of the feed-in code into the inverse
+C computation.
+
+ASM_START()
+       .text
+       .align  32
+.Ltab:
+data1  0,0x01, 0,0xAB, 0,0xCD, 0,0xB7, 0,0x39, 0,0xA3, 0,0xC5, 0,0xEF
+data1  0,0xF1, 0,0x1B, 0,0x3D, 0,0xA7, 0,0x29, 0,0x13, 0,0x35, 0,0xDF
+data1  0,0xE1, 0,0x8B, 0,0xAD, 0,0x97, 0,0x19, 0,0x83, 0,0xA5, 0,0xCF
+data1  0,0xD1, 0,0xFB, 0,0x1D, 0,0x87, 0,0x09, 0,0xF3, 0,0x15, 0,0xBF
+data1  0,0xC1, 0,0x6B, 0,0x8D, 0,0x77, 0,0xF9, 0,0x63, 0,0x85, 0,0xAF
+data1  0,0xB1, 0,0xDB, 0,0xFD, 0,0x67, 0,0xE9, 0,0xD3, 0,0xF5, 0,0x9F
+data1  0,0xA1, 0,0x4B, 0,0x6D, 0,0x57, 0,0xD9, 0,0x43, 0,0x65, 0,0x8F
+data1  0,0x91, 0,0xBB, 0,0xDD, 0,0x47, 0,0xC9, 0,0xB3, 0,0xD5, 0,0x7F
+data1  0,0x81, 0,0x2B, 0,0x4D, 0,0x37, 0,0xB9, 0,0x23, 0,0x45, 0,0x6F
+data1  0,0x71, 0,0x9B, 0,0xBD, 0,0x27, 0,0xA9, 0,0x93, 0,0xB5, 0,0x5F
+data1  0,0x61, 0,0x0B, 0,0x2D, 0,0x17, 0,0x99, 0,0x03, 0,0x25, 0,0x4F
+data1  0,0x51, 0,0x7B, 0,0x9D, 0,0x07, 0,0x89, 0,0x73, 0,0x95, 0,0x3F
+data1  0,0x41, 0,0xEB, 0,0x0D, 0,0xF7, 0,0x79, 0,0xE3, 0,0x05, 0,0x2F
+data1  0,0x31, 0,0x5B, 0,0x7D, 0,0xE7, 0,0x69, 0,0x53, 0,0x75, 0,0x1F
+data1  0,0x21, 0,0xCB, 0,0xED, 0,0xD7, 0,0x59, 0,0xC3, 0,0xE5, 0,0x0F
+data1  0,0x11, 0,0x3B, 0,0x5D, 0,0xC7, 0,0x49, 0,0x33, 0,0x55, 0,0xFF
+
+
+PROLOGUE(mpn_divexact_1)
+       .prologue
+       .save           ar.lc, r2
+       .body
+
+ {.mmi;        add             r8 = -1, divisor        C M0
+       nop             0                       C M1
+       tbit.z          p8, p9 = divisor, 0     C I0
+}
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C M2  rp extend
+       addp4           up = 0, up              C M3  up extend
+       sxt4            n = n')                 C I1  size extend
+       ;;
+.Lhere:
+ {.mmi;        ld8             r20 = [up], 8           C M0  up[0]
+  (p8) andcm           r8 = r8, divisor        C M1
+       mov             r15 = ip                C I0  .Lhere
+       ;;
+}{.mii
+       .pred.rel "mutex", p8, p9
+  (p9) mov             rshift = 0              C M0
+  (p8) popcnt          rshift = r8             C I0 r8 = cnt_lo_zeros(divisor)
+       cmp.eq          p6, p10 = 1, n          C I1
+       ;;
+}{.mii;        add             r9 = .Ltab-.Lhere, r15  C M0
+  (p8) shr.u           divisor = divisor, rshift C I0
+       nop             0                       C I1
+       ;;
+}{.mmi;        add             n = -4, n               C M0  size-1
+  (p10)        ld8             r21 = [up], 8           C M1  up[1]
+       mov             r14 = 2                 C M1  2
+}{.mfi;        setf.sig        f6 = divisor            C M2  divisor
+       mov             f9 = f0                 C M3  carry             FIXME
+       zxt1            r3 = divisor            C I1  divisor low byte
+       ;;
+}{.mmi;        add             r3 = r9, r3             C M0  table offset ip and index
+       sub             r16 = 0, divisor        C M1  -divisor
+       mov             r2 = ar.lc              C I0
+}{.mmi;        sub             lshift = 64, rshift     C M2
+       setf.sig        f13 = r14               C M3  2 in significand
+       mov             r17 = -1                C I1  -1
+       ;;
+}{.mmi;        ld1             r3 = [r3]               C M0  inverse, 8 bits
+       nop             0                       C M1
+       mov             ar.lc = n               C I0  size-1 loop count
+}{.mmi;        setf.sig        f12 = r16               C M2  -divisor
+       setf.sig        f8 = r17                C M3  -1
+       cmp.eq          p7, p0 = -2, n          C I1
+       ;;
+}{.mmi;        setf.sig        f7 = r3                 C M2  inverse, 8 bits
+       cmp.eq          p8, p0 = -1, n          C M0
+       shr.u           r23 = r20, rshift       C I0
+       ;;
+}
+
+       C f6    divisor
+       C f7    inverse, being calculated
+       C f8    -1, will be -inverse
+       C f9    carry
+       C f12   -divisor
+       C f13   2
+       C f14   scratch
+
+       xmpy.l          f14 = f13, f7           C Newton 2*i
+       xmpy.l          f7 = f7, f7             C Newton i*i
+       ;;
+       xma.l           f7 = f7, f12, f14       C Newton i*i*-d + 2*i, 16 bits
+       ;;
+       setf.sig        f10 = r23               C speculative, used iff n = 1
+       xmpy.l          f14 = f13, f7           C Newton 2*i
+       shl             r22 = r21, lshift       C speculative, used iff n > 1
+       xmpy.l          f7 = f7, f7             C Newton i*i
+       ;;
+       or              r31 = r22, r23          C speculative, used iff n > 1
+       xma.l           f7 = f7, f12, f14       C Newton i*i*-d + 2*i, 32 bits
+       shr.u           r23 = r21, rshift       C speculative, used iff n > 1
+       ;;
+       setf.sig        f11 = r31               C speculative, used iff n > 1
+       xmpy.l          f14 = f13, f7           C Newton 2*i
+       xmpy.l          f7 = f7, f7             C Newton i*i
+       ;;
+       xma.l           f7 = f7, f12, f14       C Newton i*i*-d + 2*i, 64 bits
+
+  (p7) br.cond.dptk    .Ln2
+  (p10)        br.cond.dptk    .grt3
+       ;;
+
+.Ln1:  xmpy.l          f12 = f10, f7           C q = ulimb * inverse
+       br              .Lx1
+
+.Ln2:
+       xmpy.l          f8 = f7, f8             C -inverse = inverse * -1
+       xmpy.l          f12 = f11, f7           C q = ulimb * inverse
+       setf.sig        f11 = r23
+       br              .Lx2
+
+.grt3:
+       ld8             r21 = [up], 8           C up[2]
+       xmpy.l          f8 = f7, f8             C -inverse = inverse * -1
+       ;;
+       shl             r22 = r21, lshift
+       ;;
+       xmpy.l          f12 = f11, f7           C q = ulimb * inverse
+       ;;
+       or              r31 = r22, r23
+       shr.u           r23 = r21, rshift
+       ;;
+       setf.sig        f11 = r31
+  (p8) br.cond.dptk    .Lx3                    C branch for n = 3
+       ;;
+       ld8             r21 = [up], 8
+       br              .Lent
+
+.Loop: ld8             r21 = [up], 8
+       xma.l           f12 = f9, f8, f10       C q = c * -inverse + si
+       ;;
+.Lent: add             r16 = 160, up
+       shl             r22 = r21, lshift
+       ;;
+       stf8            [rp] = f12, 8
+       xma.hu          f9 = f12, f6, f9        C c = high(q * divisor + c)
+       xmpy.l          f10 = f11, f7           C si = ulimb * inverse
+       ;;
+       or              r31 = r22, r23
+       shr.u           r23 = r21, rshift
+       ;;
+       lfetch          [r16]
+       setf.sig        f11 = r31
+       br.cloop.sptk.few.clr .Loop
+
+
+       xma.l           f12 = f9, f8, f10       C q = c * -inverse + si
+       ;;
+.Lx3:  stf8            [rp] = f12, 8
+       xma.hu          f9 = f12, f6, f9        C c = high(q * divisor + c)
+       xmpy.l          f10 = f11, f7           C si = ulimb * inverse
+       ;;
+       setf.sig        f11 = r23
+       ;;
+       xma.l           f12 = f9, f8, f10       C q = c * -inverse + si
+       ;;
+.Lx2:  stf8            [rp] = f12, 8
+       xma.hu          f9 = f12, f6, f9        C c = high(q * divisor + c)
+       xmpy.l          f10 = f11, f7           C si = ulimb * inverse
+       ;;
+       xma.l           f12 = f9, f8, f10       C q = c * -inverse + si
+       ;;
+.Lx1:  stf8            [rp] = f12, 8
+       mov             ar.lc = r2              C I0
+       br.ret.sptk.many b0
+EPILOGUE()
diff --git a/mpn/ia64/divrem_1.asm b/mpn/ia64/divrem_1.asm

new file mode 100644 (file)

index 0000000..aa50ac9
--- /dev/null
+++ b/mpn/ia64/divrem_1.asm
@@ -0,0 +1,464 @@
+dnl  IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
+dnl  unnormalized limb.
+
+dnl  Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         cycles/limb
+C Itanium:    40-42
+C Itanium 2:  29-30
+
+C This was generated by gcc, then the loops were optimized.  The preinv entry
+C point was shoehorned into the file.  Lots of things outside the loops could
+C be streamlined.  It would probably be a good idea to merge the loops for
+C normalized and unnormalized divisor, since the shifting stuff is done for
+C free in parallel with other operations.  It would even be possible to merge
+C all loops, if the ld8 were made conditional.
+
+C TODO
+C  * Consider delaying inversion for normalized mpn_divrem_1 entry till after
+C    computing leading limb.
+C  * Inline and interleave limb inversion code with loop setup code.
+
+ASM_START()
+
+C HP's assembler requires these declarations for importing mpn_invert_limb
+       .global mpn_invert_limb
+       .type   mpn_invert_limb,@function
+
+C INPUT PARAMETERS
+C rp    = r32
+C qxn   = r33
+C up    = r34
+C n     = r35
+C vl    = r36
+C vlinv = r37  (preinv only)
+C cnt = r38    (preinv only)
+
+PROLOGUE(mpn_preinv_divrem_1)
+       .prologue
+       .save   ar.pfs, r42
+       alloc           r42 = ar.pfs, 7, 8, 1, 0
+       .save   ar.lc, r44
+       mov             r44 = ar.lc
+       .save   rp, r41
+       mov             r41 = b0
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4           r32 = 0, r32
+       sxt4            r33 = r33
+       addp4           r34 = 0, r34
+       sxt4            r35 = r35
+       ;;
+')
+       mov             r40 = r38
+       shladd          r34 = r35, 3, r34
+       ;;
+       adds            r34 = -8, r34
+       ;;
+       ld8             r39 = [r34], -8
+       ;;
+
+       add             r15 = r35, r33
+       ;;
+       mov             r8 = r37
+       shladd          r32 = r15, 3, r32       C r32 = rp + n + qxn
+       cmp.le          p8, p0 = 0, r36
+       ;;
+       adds            r32 = -8, r32           C r32 = rp + n + qxn - 1
+       cmp.leu         p6, p7 = r36, r39
+   (p8)        br.cond.dpnt    .Lpunnorm
+       ;;
+
+   (p6)        addl            r15 = 1, r0
+   (p7)        mov             r15 = r0
+       ;;
+   (p6)        sub             r38 = r39, r36
+   (p7)        mov             r38 = r39
+       st8             [r32] = r15, -8
+       adds            r35 = -2, r35           C un -= 2
+       br      .Lpn
+
+.Lpunnorm:
+   (p6)        add             r34 = 8, r34
+       mov             r38 = 0                 C r = 0
+       shl             r36 = r36, r40
+   (p6)        br.cond.dptk    .Lpu
+       ;;
+       shl             r38 = r39, r40          C r = ahigh << cnt
+       cmp.ne          p8, p0 = 1, r35
+       st8             [r32] = r0, -8
+       adds            r35 = -1, r35           C un--
+   (p8)        br.cond.dpnt    .Lpu
+
+       mov             r23 = 1
+       ;;
+       setf.sig        f6 = r8
+       setf.sig        f12 = r23
+       br              .L435
+EPILOGUE()
+
+
+PROLOGUE(mpn_divrem_1)
+       .prologue
+       .save   ar.pfs, r42
+       alloc           r42 = ar.pfs, 5, 8, 1, 0
+       .save   ar.lc, r44
+       mov             r44 = ar.lc
+       .save   rp, r41
+       mov             r41 = b0
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4           r32 = 0, r32
+       sxt4            r33 = r33
+       addp4           r34 = 0, r34
+       sxt4            r35 = r35
+       ;;
+')
+       mov             r38 = r0
+       add             r15 = r35, r33
+       ;;
+       cmp.ne          p6, p7 = 0, r15
+       ;;
+   (p7)        mov             r8 = r0
+   (p7)        br.cond.dpnt    .Lret
+       shladd          r14 = r15, 3, r32       C r14 = rp + n + qxn
+       cmp.le          p6, p7 = 0, r36
+       ;;
+       adds            r32 = -8, r14           C r32 = rp + n + qxn - 1
+   (p6)        br.cond.dpnt    .Lunnorm
+       cmp.eq          p6, p7 = 0, r35
+   (p6)        br.cond.dpnt    .L179
+       shladd          r14 = r35, 3, r34
+       ;;
+       adds            r14 = -8, r14
+       adds            r35 = -1, r35
+       ;;
+       ld8             r38 = [r14]
+       ;;
+       cmp.leu         p6, p7 = r36, r38
+       ;;
+   (p6)        addl            r15 = 1, r0
+   (p7)        mov             r15 = r0
+       ;;
+       st8             [r32] = r15, -8
+  (p6) sub             r38 = r38, r36
+
+.L179:
+       mov             r45 = r36
+       adds            r35 = -1, r35
+       br.call.sptk.many b0 = mpn_invert_limb
+       ;;
+       shladd          r34 = r35, 3, r34
+.Lpn:
+       mov             r23 = 1
+       ;;
+       setf.sig        f6 = r8
+       setf.sig        f12 = r23
+       cmp.le          p6, p7 = 0, r35
+       mov             r40 = 0
+   (p7)        br.cond.dpnt    .L435
+       setf.sig        f10 = r36
+       mov             ar.lc = r35
+       setf.sig        f7 = r38
+       ;;
+       sub             r28 = -1, r36
+C Develop quotient limbs for normalized divisor
+.Loop1:                C 00                            C q=r18 nh=r38/f7
+       ld8             r20 = [r34], -8
+       xma.hu          f11 = f7, f6, f0
+       ;;      C 04
+       xma.l           f8 = f11, f12, f7       C q = q + nh
+       ;;      C 08
+       getf.sig        r18 = f8
+       xma.hu          f9 = f8, f10, f0
+       xma.l           f8 = f8, f10, f0
+       ;;      C 12
+       getf.sig        r16 = f9
+               C 13
+       getf.sig        r15 = f8
+       ;;      C 18
+       cmp.ltu         p6, p7 = r20, r15
+       sub             r15 = r20, r15
+       sub             r16 = r38, r16
+       ;;      C 19
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0?
+   (p6)        add             r16 = -1, r16
+   (p0)        cmp.ne.unc      p6, p7 = r0, r0
+       ;;      C 20
+   (p8)        cmp.ltu         p6, p7 = r15, r36
+   (p8)        sub             r15 = r15, r36
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;      C 21
+       .pred.rel "mutex",p6,p7
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0 still?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0 still?
+       cmp.ltu         p6, p7 = r15, r36       C speculative
+       sub             r28 = r15, r36          C speculative, just for cmp
+       ;;      C 22
+   (p8)        cmp.ltu         p6, p7 = r28, r36       C redo last cmp if needed
+   (p8)        mov             r15 = r28
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;      C 23
+   (p6)        setf.sig        f7 = r15
+   (p7)        sub             r15 = r15, r36
+   (p7)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;      C 24
+   (p7)        setf.sig        f7 = r15
+       st8             [r32] = r18, -8
+       mov             r38 = r15
+       br.cloop.dptk   .Loop1
+               C 29/30
+       br.sptk         .L435
+       ;;
+.Lunnorm:
+       mux1            r16 = r36, @rev
+       cmp.eq          p6, p7 = 0, r35
+   (p6)        br.cond.dpnt    .L322
+       shladd          r34 = r35, 3, r34
+       ;;
+       adds            r34 = -8, r34
+       ;;
+       ld8             r39 = [r34]
+       ;;
+       cmp.leu         p6, p7 = r36, r39
+   (p6)        br.cond.dptk    .L322
+       adds            r34 = -8, r34
+       ;;
+       mov             r38 = r39
+       ;;
+       cmp.ne          p6, p7 = 1, r15
+       st8             [r32] = r0, -8
+       ;;
+   (p7)        mov             r8 = r38
+   (p7)        br.cond.dpnt    .Lret
+       adds            r35 = -1, r35
+.L322:
+       sub             r14 = r0, r16
+       ;;
+       or              r14 = r16, r14
+       ;;
+       mov             r16 = -8
+       czx1.l          r14 = r14
+       ;;
+       shladd          r16 = r14, 3, r16
+       ;;
+       shr.u           r14 = r36, r16
+       ;;
+       cmp.geu         p6, p7 = 15, r14
+       ;;
+   (p7)        shr.u           r14 = r14, 4
+   (p7)        adds            r16 = 4, r16
+       ;;
+       cmp.geu         p6, p7 = 3, r14
+       ;;
+   (p7)        shr.u           r14 = r14, 2
+   (p7)        adds            r16 = 2, r16
+       ;;
+       tbit.nz         p6, p7 = r14, 1
+       ;;
+       .pred.rel "mutex",p6,p7
+  (p6) sub             r40 = 62, r16
+  (p7) sub             r40 = 63, r16
+       ;;
+       shl             r45 = r36, r40
+       shl             r36 = r36, r40
+       shl             r38 = r38, r40
+       br.call.sptk.many b0 = mpn_invert_limb
+       ;;
+.Lpu:
+       mov             r23 = 1
+       ;;
+       setf.sig        f6 = r8
+       setf.sig        f12 = r23
+       cmp.eq          p6, p7 = 0, r35
+   (p6)        br.cond.dpnt    .L435
+       sub             r16 = 64, r40
+       adds            r35 = -2, r35
+       ;;
+       ld8             r39 = [r34], -8
+       cmp.le          p6, p7 = 0, r35
+       ;;
+       shr.u           r14 = r39, r16
+       ;;
+       or              r38 = r14, r38
+   (p7)        br.cond.dpnt    .Lend3
+       ;;
+       mov             r22 = r16
+       setf.sig        f10 = r36
+       setf.sig        f7 = r38
+       mov             ar.lc = r35
+       ;;
+C Develop quotient limbs for unnormalized divisor
+.Loop3:
+       ld8             r14 = [r34], -8
+       xma.hu          f11 = f7, f6, f0
+       ;;
+       xma.l           f8 = f11, f12, f7       C q = q + nh
+       ;;
+       getf.sig        r18 = f8
+       xma.hu          f9 = f8, f10, f0
+       shl             r20 = r39, r40
+       xma.l           f8 = f8, f10, f0
+       shr.u           r24 = r14, r22
+       ;;
+       getf.sig        r16 = f9
+       getf.sig        r15 = f8
+       or              r20 = r24, r20
+       ;;
+       cmp.ltu         p6, p7 = r20, r15
+       sub             r15 = r20, r15
+       sub             r16 = r38, r16
+       ;;
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0?
+   (p6)        add             r16 = -1, r16
+   (p0)        cmp.ne.unc      p6, p7 = r0, r0
+       ;;
+   (p8)        cmp.ltu         p6, p7 = r15, r36
+   (p8)        sub             r15 = r15, r36
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+       .pred.rel "mutex",p6,p7
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0 still?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0 still?
+       cmp.ltu         p6, p7 = r15, r36       C speculative
+       sub             r28 = r15, r36          C speculative, just for cmp
+       ;;
+   (p8)        cmp.ltu         p6, p7 = r28, r36       C redo last cmp if needed
+   (p8)        mov             r15 = r28
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+   (p6)        setf.sig        f7 = r15
+   (p7)        sub             r15 = r15, r36
+   (p7)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+   (p7)        setf.sig        f7 = r15
+       st8             [r32] = r18, -8
+       mov             r39 = r14
+       mov             r38 = r15
+       br.cloop.dptk   .Loop3
+       ;;
+.Lend3:
+       setf.sig        f10 = r36
+       setf.sig        f7 = r38
+       ;;
+       xma.hu          f11 = f7, f6, f0
+       ;;
+       xma.l           f8 = f11, f12, f7       C q = q + nh
+       ;;
+       getf.sig        r18 = f8
+       xma.hu          f9 = f8, f10, f0
+       shl             r20 = r39, r40
+       xma.l           f8 = f8, f10, f0
+       ;;
+       getf.sig        r16 = f9
+       getf.sig        r15 = f8
+       ;;
+       cmp.ltu         p6, p7 = r20, r15
+       sub             r15 = r20, r15
+       sub             r16 = r38, r16
+       ;;
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0?
+   (p6)        add             r16 = -1, r16
+   (p0)        cmp.ne.unc      p6, p7 = r0, r0
+       ;;
+   (p8)        cmp.ltu         p6, p7 = r15, r36
+   (p8)        sub             r15 = r15, r36
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+       .pred.rel "mutex",p6,p7
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0 still?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0 still?
+       ;;
+   (p8)        sub             r15 = r15, r36
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+       cmp.ltu         p6, p7 = r15, r36
+       ;;
+   (p7)        sub             r15 = r15, r36
+   (p7)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+       st8             [r32] = r18, -8
+       mov             r38 = r15
+.L435:
+       adds            r35 = -1, r33
+       cmp.le          p6, p7 = 1, r33
+   (p7)        br.cond.dpnt    .Lend4
+       ;;
+       setf.sig        f7 = r38
+       setf.sig        f10 = r36
+       mov             ar.lc = r35
+       ;;
+.Loop4:
+       xma.hu          f11 = f7, f6, f0
+       ;;
+       xma.l           f8 = f11, f12, f7       C q = q + nh
+       ;;
+       getf.sig        r18 = f8
+       xma.hu          f9 = f8, f10, f0
+       xma.l           f8 = f8, f10, f0
+       ;;
+       getf.sig        r16 = f9
+       getf.sig        r15 = f8
+       ;;
+       cmp.ltu         p6, p7 = 0, r15
+       sub             r15 = 0, r15
+       sub             r16 = r38, r16
+       ;;
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0?
+   (p6)        add             r16 = -1, r16
+   (p0)        cmp.ne.unc      p6, p7 = r0, r0
+       ;;
+   (p8)        cmp.ltu         p6, p7 = r15, r36
+   (p8)        sub             r15 = r15, r36
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+       .pred.rel "mutex",p6,p7
+   (p6)        cmp.ne          p8, p9 = 1, r16         C is rH != 0 still?
+   (p7)        cmp.ne          p8, p9 = 0, r16         C is rH != 0 still?
+       cmp.ltu         p6, p7 = r15, r36       C speculative
+       sub             r28 = r15, r36          C speculative, just for cmp
+       ;;
+   (p8)        cmp.ltu         p6, p7 = r28, r36       C redo last cmp if needed
+   (p8)        mov             r15 = r28
+   (p8)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+   (p6)        setf.sig        f7 = r15
+   (p7)        sub             r15 = r15, r36
+   (p7)        add             r18 = 1, r18            C q = q + 1;    done if: rH > 0
+       ;;
+   (p7)        setf.sig        f7 = r15
+       st8             [r32] = r18, -8
+       mov             r38 = r15
+       br.cloop.dptk   .Loop4
+       ;;
+.Lend4:
+       shr.u           r8 = r38, r40
+.Lret:
+       mov             ar.pfs = r42
+       mov             ar.lc = r44
+       mov             b0 = r41
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/divrem_2.asm b/mpn/ia64/divrem_2.asm

new file mode 100644 (file)

index 0000000..da3e9d6
--- /dev/null
+++ b/mpn/ia64/divrem_2.asm
@@ -0,0 +1,264 @@
+dnl  IA-64 mpn_divrem_2 -- Divide an n-limb number by a 2-limb number.
+
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    63
+C Itanium 2:  46
+
+
+C TODO
+C  * Further optimize the loop.  We could probably do some more trickery with
+C    arithmetic in the FPU, or perhaps use a non-zero addend of xma in more
+C    places.
+C  * Software pipeline for perhaps 5 saved cycles, around the end and start of
+C    the loop.
+C  * Schedule code outside of loop better.
+C  * Update the comments.  They are now using the same name for the same
+C    logical quantity.
+C  * Handle conditional zeroing of r31 in loop more cleanly.
+C  * Inline mpn_invert_limb and schedule its insns across the entire init code.
+C  * Ultimately, use 2-limb, or perhaps 3-limb or 4-limb inverse.
+
+define(`qp',`r32')
+define(`qxn',`r33')
+define(`np',`r34')
+define(`nn',`r35')
+define(`dp',`r36')
+
+define(`fnh',`f11')
+define(`fminus1',`f10')
+define(`fd0',`f13')
+define(`fd1',`f14')
+define(`d0',`r39')
+define(`d1',`r36')
+define(`fnl',`f32')
+define(`fdinv',`f12')
+
+define(`R1',`r38') define(`R0',`r37')
+define(`P1',`r28') define(`P0',`r27')
+
+ASM_START()
+
+C HP's assembler requires these declarations for importing mpn_invert_limb
+       .global mpn_invert_limb
+       .type   mpn_invert_limb,@function
+
+PROLOGUE(mpn_divrem_2)
+       .prologue
+       .save ar.pfs, r42
+       .save ar.lc, r44
+       .save rp, r41
+ifdef(`HAVE_ABI_32',
+`      addp4           qp = 0, qp              C M I
+       addp4           np = 0, np              C M I
+       addp4           dp = 0, dp              C M I
+       zxt4            nn = nn                 C I
+       zxt4            qxn = qxn               C I
+       ;;
+')
+
+       alloc           r42 = ar.pfs, 5,8,1,0   C M2
+       ld8             d0 = [dp], 8            C M0M1  d0
+       mov             r44 = ar.lc             C I0
+       shladd          np = nn, 3, np          C M I
+       ;;
+       ld8             d1 = [dp]               C M0M1  d1
+       mov             r41 = b0                C I0
+       add             r15 = -8, np            C M I
+       add             np = -16, np            C M I
+       mov             r40 = r0                C M I
+       ;;
+       ld8             R1 = [r15]              C M0M1  n1
+       ld8             R0 = [r34], -8          C M0M1  n0
+       ;;
+       cmp.ltu         p6, p0 = d1, R1         C M I
+       cmp.eq          p8, p0 = d1, R1         C M I
+       ;;
+  (p8) cmp.leu         p6, p0 = d0, R0
+       cmp.ltu         p8, p9 = R0, d0
+  (p6) br.cond.dpnt    .L_high_limb_1          C FIXME: inline!
+.L8:
+
+       mov             r45 = d1
+       br.call.sptk.many b0 = mpn_invert_limb  C FIXME: inline+schedule
+       ;;
+       setf.sig        fd1 = d1                C d1
+       setf.sig        fd0 = d0                C d0
+       add             r14 = r33, r35          C nn + qxn
+       ;;
+       setf.sig        fdinv = r8              C dinv
+       mov             r9 = -1
+       add             r35 = -3, r14
+       ;;
+       setf.sig        fminus1 = r9
+       cmp.gt          p6, p0 = r0, r35
+       shladd          qp = r35, 3, qp
+       mov             ar.lc = r35
+       mov             r31 = 0                 C n0
+  (p6) br.cond.dpnt    .Ldone
+       ;;
+       ALIGN(16)
+C *** MAIN LOOP START ***
+.Loop:         C 00
+       mov             r15 = R0                C nadj = n10
+       cmp.le          p14, p15 = 0, R0        C check high bit of R0
+       cmp.le          p8, p0 = r33, r35       C dividend limbs remaining?
+       ;;      C 01
+       .pred.rel "mutex", p14, p15
+  (p8) ld8             r31 = [r34], -8         C n0
+  (p15)        add             r15 = d1, R0            C nadj = n10 + d1
+  (p15)        add             r14 = 1, R1             C nh + (nl:63)
+  (p14)        mov             r14 = R1                C nh
+       cmp.eq          p6, p0 = d1, R1         C nh == d1
+  (p6) br.cond.spnt    .L_R1_eq_d1
+       ;;      C 02
+       setf.sig        f8 = r14                C n2 + (nl:63)
+       setf.sig        f15 = r15               C nadj
+       sub             r23 = -1, R1            C r23 = ~nh
+       ;;      C 03
+       setf.sig        fnh = r23
+       setf.sig        fnl = R0
+       ;;      C 08
+       xma.hu          f7 = fdinv, f8, f15     C xh = HI(dinv*(nh-nmask)+nadj)
+       ;;      C 12
+       xma.l           f7 = f7, fminus1, fnh   C nh + xh
+       ;;      C 16
+       getf.sig        r14 = f7
+       xma.hu          f9 = f7, fd1, fnl       C xh = HI(q1*d1+nl)
+       xma.l           f33 = f7, fd1, fnl      C xh = LO(q1*d1+nl)
+       ;;      C 20
+       getf.sig        r16 = f9
+       sub             r24 = d1, R1
+               C 21
+       getf.sig        r17 = f33
+       ;;      C 25
+       cmp.eq          p6, p7 = r16, r24
+       ;;      C 26
+       .pred.rel "mutex", p6, p7
+  (p6) xma.l           f8 = f7, fminus1, f0    C f8 = -f7
+  (p7) xma.l           f8 = f7,fminus1,fminus1 C f8 = -f7-1
+       ;;      C 27
+       .pred.rel "mutex", p6, p7
+  (p6) sub             r18 = 0, r14            C q = -q1
+  (p7) sub             r18 = -1, r14           C q = -q1-1
+  (p6) add             r14 = 0, r17            C n1 = xl
+  (p7) add             r14 = d1, r17           C n1 = xl + d1
+       ;;      C 30
+       xma.hu          f9 = fd0, f8, f0        C d0*(-f7-1) = -d0*f7-d0
+       xma.l           f35 = fd0, f8, f0
+       ;;      C 34
+       getf.sig        P1 = f9         C P1
+               C 35
+       getf.sig        P0 = f35                C P0
+       ;;
+.L_adj:                C 40
+       cmp.ltu         p8, p0 = r31, P0        C p8 = cy from low limb
+       cmp.ltu         p6, p0 = r14, P1        C p6 = prel cy from high limb
+       sub             R0 = r31, P0
+       sub             R1 = r14, P1
+       ;;      C 41
+  (p8) cmp.eq.or       p6, p0 = 0, R1          C p6 = final cy from high limb
+  (p8) add             R1 = -1, R1
+       cmp.ne          p10, p0 = r0, r0        C clear p10 FIXME: use unc below!
+       cmp.ne          p13, p0 = r0, r0        C clear p13 FIXME: use unc below!
+       ;;      C 42
+  (p6) add             R0 = R0, d0
+  (p6) add             R1 = R1, d1
+  (p6) add             r18 = -1, r18           C q--
+       ;;      C 43
+  (p6) cmp.ltu         p10, p0 = R0, d0
+  (p6) cmp.ltu         p0, p13 = R1, d1
+       ;;      C 44
+  (p10)        cmp.ne.and      p0, p13 = -1, R1        C p13 = !cy
+  (p10)        add             R1 = 1, R1
+  (p13)        br.cond.spnt    .L_two_too_big          C jump if not cy
+       ;;      C 45
+       st8             [qp] = r18, -8
+       add             r35 = -1, r35
+       mov             r31 = 0                 C n0, next iteration
+       br.cloop.sptk   .Loop
+C *** MAIN LOOP END ***
+       ;;
+.Ldone:
+       mov             r8 = r40
+       mov             b0 = r41
+       add             r21 = 8, r34
+       add             r22 = 16, r34
+       ;;
+       st8             [r21] = R0
+       st8             [r22] = R1
+       mov             ar.pfs = r42
+       mov             ar.lc = r44
+       br.ret.sptk.many b0
+
+.L_high_limb_1:
+       .pred.rel "mutex", p8, p9
+       sub             R0 = R0, d0
+  (p8) sub             R1 = R1, d1, 1
+  (p9) sub             R1 = R1, d1
+       mov             r40 = 1
+       br.sptk         .L8
+       ;;
+
+.L_two_too_big:
+       add             R0 = R0, d0
+       add             R1 = R1, d1
+       ;;
+       add             r18 = -1, r18           C q--
+       cmp.ltu         p10, p0 = R0, d0
+       ;;
+  (p10)        add             R1 = 1, R1
+       st8             [qp] = r18, -8
+       add             r35 = -1, r35
+       mov             r31 = 0                 C n0, next iteration
+       br.cloop.sptk   .Loop
+       br.sptk         .Ldone
+
+.L_R1_eq_d1:
+       add             r14 = R0, d1            C r = R0 + d1
+       mov             r18 = -1                C q = -1
+       ;;
+       cmp.leu         p6, p0 = R0, r14
+ (p6)  br.cond.spnt    .L20                    C jump unless cy
+       ;;
+       sub             P1 = r14, d0
+       add             R0 = r31, d0
+       ;;
+       cmp.ltu         p8, p9 = R0, r31
+       ;;
+       .pred.rel "mutex", p8, p9
+       st8             [qp] = r18, -8
+  (p8) add             R1 = r0, P1, 1          C R1 = n1 - P1 - cy
+  (p9) add             R1 = r0, P1             C R1 = n1 - P1
+       add             r35 = -1, r35
+       mov             r31 = 0                 C n0, next iteration
+       br.cloop.sptk   .Loop
+       br.sptk         .Ldone
+       ;;
+.L20:  cmp.ne          p6, p7 = 0, d0
+       ;;
+       .pred.rel "mutex", p6, p7
+  (p6) add             P1 = -1, d0
+  (p7) mov             P1 = d0
+       sub             P0 = r0, d0
+       br.sptk         .L_adj
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/gcd_1.asm b/mpn/ia64/gcd_1.asm

new file mode 100644 (file)

index 0000000..c6efa5d
--- /dev/null
+++ b/mpn/ia64/gcd_1.asm
@@ -0,0 +1,231 @@
+dnl  Itanium-2 mpn_gcd_1 -- mpn by 1 gcd.
+
+dnl  Copyright 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/bitpair (1x1 gcd)
+C Itanium:      14 (approx)
+C Itanium 2:     6.3
+
+
+C mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
+C
+C The entry sequence is designed to expect xsize>1 and hence a modexact
+C call.  This ought to be more common than a 1x1 operation.  Our critical
+C path is thus stripping factors of 2 from y, calling modexact, then
+C stripping factors of 2 from the x remainder returned.
+C
+C The common factors of 2 between x and y must be determined using the
+C original x, not the remainder from the modexact.  This is done with
+C x_orig which is xp[0].  There's plenty of time to do this while the rest
+C of the modexact etc is happening.
+C
+C It's possible xp[0] is zero.  In this case the trailing zeros calculation
+C popc((x-1)&~x) gives 63, and that's clearly no less than what y will
+C have, making min(x_twos,y_twos) == y_twos.
+C
+C The main loop consists of transforming x,y to abs(x-y),min(x,y), and then
+C stripping factors of 2 from abs(x-y).  Those factors of two are
+C determined from just y-x, without the abs(), since there's the same
+C number of trailing zeros on n or -n in twos complement.  That makes the
+C dependent chain
+C
+C      cycles
+C        1    sub     x-y and x-y-1
+C        3    andcm   (x-y-1)&~(x-y)
+C        2    popcnt  trailing zeros
+C        3    shr.u   strip abs(x-y)
+C       ---
+C        9
+C
+C The selection of x-y versus y-x for abs(x-y), and the selection of the
+C minimum of x and y, is done in parallel with the above.
+C
+C The algorithm takes about 0.68 iterations per bit (two N bit operands) on
+C average, hence the final 6.3 cycles/bitpair.
+C
+C The loop is not as fast as one might hope, since there's extra latency
+C from andcm going across to the `multimedia' popcnt, and vice versa from
+C multimedia shr.u back to the integer sub.
+C
+C The loop branch is .sptk.clr since we usually expect a good number of
+C iterations, and the iterations are data dependent so it's unlikely past
+C results will predict anything much about the future.
+C
+C Not done:
+C
+C An alternate algorithm which didn't strip all twos, but instead applied
+C tbit and predicated extr on x, and then y, was attempted.  The loop was 6
+C cycles, but the algorithm is an average 1.25 iterations per bitpair for a
+C total 7.25 c/bp, which is slower than the current approach.
+C
+C Alternatives:
+C
+C Perhaps we could do something tricky by extracting a few high bits and a
+C few low bits from the operands, and looking up a table which would give a
+C set of predicates to control some shifts or subtracts or whatever.  That
+C could knock off multiple bits per iteration.
+C
+C The right shifts are a bit of a bottleneck (shr at 2 or 3 cycles, or extr
+C only going down I0), perhaps it'd be possible to shift left instead,
+C using add.  That would mean keeping track of the lowest not-yet-zeroed
+C bit, using some sort of mask.
+C
+C Itanium-1:
+C
+C This code is not designed for itanium-1 and in fact doesn't run well on
+C that chip.  The loop seems to be about 21 cycles, probably because we end
+C up with a 10 cycle replay for not forcibly scheduling the shr.u latency.
+C Lack of branch hints might introduce a couple of bubbles too.
+C
+
+ASM_START()
+       .explicit                               C What does this mean?
+
+C HP's assembler requires these declarations for importing mpn_modexact_1c_odd
+       .global mpn_modexact_1c_odd
+       .type   mpn_modexact_1c_odd,@function
+
+PROLOGUE(mpn_gcd_1)
+
+               C r32   xp
+               C r33   xsize
+               C r34   y
+
+define(x,           r8)
+define(xp_orig,     r32)
+define(xsize,       r33)
+define(y,           r34)  define(inputs, 3)
+define(save_rp,     r35)
+define(save_pfs,    r36)
+define(x_orig,      r37)
+define(x_orig_one,  r38)
+define(y_twos,      r39)  define(locals, 5)
+define(out_xp,      r40)
+define(out_xsize,   r41)
+define(out_divisor, r42)
+define(out_carry,   r43)  define(outputs, 4)
+
+       .prologue
+{ .mmi;
+ifdef(`HAVE_ABI_32',
+`              addp4   r9 = 0, xp_orig   define(xp,r9)',       C M0
+`                                        define(xp,xp_orig)')
+       .save ar.pfs, save_pfs
+               alloc   save_pfs = ar.pfs, inputs, locals, outputs, 0 C M2
+       .save rp, save_rp
+               mov     save_rp = b0            C I0
+}{     .body
+               add     r10 = -1, y             C M3  y-1
+}              ;;
+
+{ .mmi;                ld8     x = [xp]                C M0  x = xp[0] if no modexact
+               ld8     x_orig = [xp]           C M1  orig x for common twos
+               cmp.ne  p6,p0 = 1, xsize        C I0
+}{ .mmi;       andcm   y_twos = r10, y         C M2  (y-1)&~y
+               mov     out_xp = xp_orig        C M3
+               mov     out_xsize = xsize       C I1
+}              ;;
+
+               mov     out_carry = 0
+
+               C
+
+               popcnt  y_twos = y_twos         C I0  y twos
+               ;;
+
+               C
+
+{ .mmi;                add     x_orig_one = -1, x_orig C M0  orig x-1
+               shr.u   out_divisor = y, y_twos C I0  y without twos
+}{             shr.u   y = y, y_twos           C I1  y without twos
+       (p6)    br.call.sptk.many b0 = mpn_modexact_1c_odd  C if xsize>1
+}              ;;
+
+               C modexact can leave x==0
+{ .mmi;                cmp.eq  p6,p0 = 0, x            C M0  if {xp,xsize} % y == 0
+               andcm   x_orig = x_orig_one, x_orig     C M1  orig (x-1)&~x
+               add     r9 = -1, x              C I0  x-1
+}              ;;
+
+{ .mmi;                andcm   r9 = r9, x              C M0  (x-1)&~x
+               mov     b0 = save_rp            C I0
+}              ;;
+
+               C
+
+               popcnt  x_orig = x_orig         C I0  orig x twos
+
+               popcnt  r9 = r9                 C I0  x twos
+               ;;
+
+               C
+
+{              cmp.lt  p7,p0 = x_orig, y_twos  C M0  orig x_twos < y_twos
+               shr.u   x = x, r9               C I0  x odd
+}              ;;
+
+{      (p7)    mov     y_twos = x_orig         C M0  common twos
+               add     r10 = -1, y             C I0  y-1
+       (p6)    br.dpnt.few .Ldone_y            C B0  x%y==0 then result y
+}              ;;
+
+               C
+
+
+               C No noticable difference in speed for the loop aligned to
+               C 32 or just 16.
+.Ltop:
+               C r8    x
+               C r10  y-1
+               C r34   y
+               C r38   common twos, for use at end
+
+{  .mmi;       cmp.gtu p8,p9 = x, y    C M0  x>y
+               cmp.ne  p10,p0 = x, y   C M1  x==y
+               sub     r9 = y, x       C I0  d = y - x
+}{ .mmi;       sub     r10 = r10, x    C M2  d-1 = y - x - 1
+}              ;;
+
+{ .mmi;        .pred.rel "mutex", p8, p9
+       (p8)    sub     x = x, y        C M0  x>y  use x=x-y, y unchanged
+       (p9)    mov     y = x           C M1  y>=x use y=x
+       (p9)    mov     x = r9          C I0  y>=x use x=y-x
+}{ .mmi;       andcm   r9 = r10, r9    C M2  (d-1)&~d
+               ;;
+
+               add     r10 = -1, y     C M0  new y-1
+               popcnt  r9 = r9         C I0  twos on x-y
+}              ;;
+
+{              shr.u   x = x, r9       C I0   new x without twos
+       (p10)   br.sptk.few.clr .Ltop
+}              ;;
+
+
+
+               C result is y
+.Ldone_y:
+               shl     r8 = y, y_twos          C I   common factors of 2
+               ;;
+               mov     ar.pfs = save_pfs       C I0
+               br.ret.sptk.many b0
+
+EPILOGUE()
diff --git a/mpn/ia64/gmp-mparam.h b/mpn/ia64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..323c167
--- /dev/null
+++ b/mpn/ia64/gmp-mparam.h
@@ -0,0 +1,207 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1300MHz Itanium2 (babe.fsffrance.org) */
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                44
+#define MUL_TOOM33_THRESHOLD                89
+#define MUL_TOOM44_THRESHOLD               232
+#define MUL_TOOM6H_THRESHOLD               351
+#define MUL_TOOM8H_THRESHOLD               454
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     101
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     138
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     159
+
+#define SQR_BASECASE_THRESHOLD              26
+#define SQR_TOOM2_THRESHOLD                119
+#define SQR_TOOM3_THRESHOLD                141
+#define SQR_TOOM4_THRESHOLD                282
+#define SQR_TOOM6_THRESHOLD                375
+#define SQR_TOOM8_THRESHOLD                527
+
+#define MULMOD_BNM1_THRESHOLD               24
+#define SQRMOD_BNM1_THRESHOLD               19
+
+#define MUL_FFT_MODF_THRESHOLD             888  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    888, 5}, {     31, 6}, {     16, 5}, {     33, 6}, \
+    {     17, 5}, {     35, 6}, {     28, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     29, 8}, {     15, 7}, {     33, 8}, \
+    {     17, 7}, {     37, 8}, {     19, 7}, {     41, 8}, \
+    {     21, 7}, {     43, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     55, 9}, {     31, 8}, \
+    {     63, 9}, {     35, 8}, {     71, 9}, {     39, 8}, \
+    {     79, 9}, {     43,10}, {     23, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     71,10}, \
+    {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
+    {     55,11}, {     31,10}, {     63, 9}, {    127,10}, \
+    {     71, 9}, {    143,10}, {     87,11}, {     47,10}, \
+    {    111,12}, {     31,11}, {     63,10}, {    143,11}, \
+    {     79,10}, {    167,11}, {     95,10}, {    199,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287,11}, {    159,10}, {    319,12}, \
+    {     95,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    287,12}, {    159,11}, {    335,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    399,12}, {    223,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,12}, \
+    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
+    {    639,10}, {   1279,11}, {    671,13}, {    191,12}, \
+    {    383,11}, {    767,10}, {   1535,12}, {    415,11}, \
+    {    831,14}, {    127,13}, {    255,12}, {    511,11}, \
+    {   1023,12}, {    543,11}, {   1087,12}, {    575,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1055,11}, {   2111,12}, {   1087,13}, \
+    {    575,12}, {   1215,11}, {   2431,12}, {   1247,13}, \
+    {    639,12}, {   1279,11}, {   2559,12}, {   1343,13}, \
+    {    703,12}, {   1471,14}, {    383,13}, {    767,12}, \
+    {   1599,13}, {    831,12}, {   1663,11}, {   3327,12}, \
+    {   1727,13}, {    895,12}, {   1791,13}, {    959,15}, \
+    {    255,14}, {    511,13}, {   1023,12}, {   2047,13}, \
+    {   1087,12}, {   2175,13}, {   1151,12}, {   2303,13}, \
+    {   1215,11}, {   4863,12}, {   2495,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,14}, \
+    {    767,13}, {   1599,12}, {   3199,13}, {   1727,12}, \
+    {   3455,14}, {    895,13}, {   1983,12}, {   3967,15}, \
+    {    511,14}, {   1023,13}, {   2111,12}, {   4223,13}, \
+    {   2239,12}, {   4479,13}, {   2495,14}, {   1279,13}, \
+    {   2751,14}, {   1407,13}, {   2943,15}, {    767,14}, \
+    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,14}, \
+    {   1791,12}, {   7167,14}, {   1919,13}, {   3967,16}, \
+    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
+    {   2431,15}, {   1279,14}, {   2943,13}, {   5887,15}, \
+    {   1535,14}, {   3199,13}, {   6399,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 217
+#define MUL_FFT_THRESHOLD                 9856
+
+#define SQR_FFT_MODF_THRESHOLD             751  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    751, 5}, {     35, 6}, {     18, 5}, {     37, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     29, 8}, {     15, 7}, {     37, 8}, \
+    {     19, 7}, {     41, 8}, {     21, 7}, {     43, 8}, \
+    {     23, 7}, {     47, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     55, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     79, 9}, {     43,10}, \
+    {     23, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     83,10}, \
+    {     47, 9}, {     99,10}, {     55,11}, {     31,10}, \
+    {     63, 9}, {    127,10}, {     79,11}, {     47,10}, \
+    {    103,12}, {     31,11}, {     63,10}, {    143,11}, \
+    {     79,10}, {    159,11}, {     95,10}, {    199,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287,11}, {    159,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    335,12}, \
+    {    191,11}, {    383,10}, {    767,12}, {    223,13}, \
+    {    127,11}, {    511,10}, {   1023,11}, {    527,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    591,12}, \
+    {    319,11}, {    639,13}, {    191,12}, {    383,11}, \
+    {    767,10}, {   1535,11}, {    799,10}, {   1599, 9}, \
+    {   3199,14}, {    127,13}, {    255,12}, {    511, 9}, \
+    {   4095,10}, {   2111,12}, {    543,11}, {   1087,10}, \
+    {   2239,12}, {    575,10}, {   2303,13}, {    319,12}, \
+    {    671,11}, {   1471,13}, {    383,11}, {   1599,12}, \
+    {    831,11}, {   1663,12}, {    863,10}, {   3455,13}, \
+    {    447,12}, {    895,11}, {   1791,14}, {    255,13}, \
+    {    511,12}, {   1023,11}, {   2111,12}, {   1087,11}, \
+    {   2239,13}, {    575,12}, {   1215,11}, {   2495,13}, \
+    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
+    {   1727,11}, {   3455,12}, {   1791,15}, {    255,14}, \
+    {    511,13}, {   1023,12}, {   2111,11}, {   4223,12}, \
+    {   2239,11}, {   4479,10}, {   8959,11}, {   4607,13}, \
+    {   1215,14}, {    639,13}, {   1343,12}, {   2815,13}, \
+    {   1471,12}, {   2943,14}, {    767,13}, {   1599,12}, \
+    {   3199,13}, {   1727,12}, {   3455,14}, {    895,13}, \
+    {   1855,12}, {   3711,13}, {   1983,12}, {   3967,15}, \
+    {    511,14}, {   1023,13}, {   2111,12}, {   4223,13}, \
+    {   2239,12}, {   4479,14}, {   1151,13}, {   2495,14}, \
+    {   1279,13}, {   2687,14}, {   1407,13}, {   2943,15}, \
+    {    767,14}, {   1535,13}, {   3071,14}, {   1663,13}, \
+    {   3327,14}, {   1791,16}, {    511,15}, {   1023,14}, \
+    {   2047,13}, {   4223,14}, {   2175,13}, {   4479,12}, \
+    {   8959,14}, {   2303,13}, {   4735,14}, {   2431,15}, \
+    {   1279,14}, {   2943,15}, {   1535,14}, {   3071,13}, \
+    {   6143,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD                 7552
+
+#define MULLO_BASECASE_THRESHOLD            17
+#define MULLO_DC_THRESHOLD                  91
+#define MULLO_MUL_N_THRESHOLD            19187
+
+#define DC_DIV_QR_THRESHOLD                 72
+#define DC_DIVAPPR_Q_THRESHOLD             254
+#define DC_BDIV_QR_THRESHOLD               117
+#define DC_BDIV_Q_THRESHOLD                292
+
+#define INV_MULMOD_BNM1_THRESHOLD           86
+#define INV_NEWTON_THRESHOLD               178
+#define INV_APPR_THRESHOLD                 179
+
+#define BINV_NEWTON_THRESHOLD              300
+#define REDC_1_TO_REDC_2_THRESHOLD           2
+#define REDC_2_TO_REDC_N_THRESHOLD         167
+
+#define MU_DIV_QR_THRESHOLD               1787
+#define MU_DIVAPPR_Q_THRESHOLD            1470
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD              1787
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define MATRIX22_STRASSEN_THRESHOLD         27
+#define HGCD_THRESHOLD                     139
+#define GCD_DC_THRESHOLD                   469
+#define GCDEXT_DC_THRESHOLD                496
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        22
+#define SET_STR_DC_THRESHOLD              1474
+#define SET_STR_PRECOMPUTE_THRESHOLD      3495
diff --git a/mpn/ia64/hamdist.asm b/mpn/ia64/hamdist.asm

new file mode 100644 (file)

index 0000000..92dffce
--- /dev/null
+++ b/mpn/ia64/hamdist.asm
@@ -0,0 +1,352 @@
+dnl  IA-64 mpn_hamdist -- mpn hamming distance.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:       2
+C Itanium 2:     1
+
+C INPUT PARAMETERS
+define(`up', `r32')
+define(`vp', `r33')
+define(`n', `r34')
+
+define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
+define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')
+define(`x0',`r24') define(`x1',`r25') define(`x2',`r26') define(`x3',`r27')
+define(`c0',`r28') define(`c1',`r29') define(`c2',`r30') define(`c3',`r31')
+define(`s',`r8')
+
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+       .prologue
+ifdef(`HAVE_ABI_32',
+`      addp4           up = 0, up              C                       M I
+       addp4           vp = 0, vp              C                       M I
+       zxt4            n = n                   C                       I
+       ;;
+')
+
+ {.mmi;        ld8             r10 = [up], 8           C load first ulimb      M01
+       ld8             r11 = [vp], 8           C load first vlimb      M01
+       mov.i           r2 = ar.lc              C save ar.lc            I0
+}{.mmi;        and             r14 = 3, n              C                       M I
+       cmp.lt          p15, p0 = 4, n          C small count?          M I
+       add             n = -5, n               C                       M I
+       ;;
+}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       cmp.eq          p8, p0 = 3, r14         C                       M I
+}{.bbb
+  (p6) br.dptk         .Lb01                   C                       B
+  (p7) br.dptk         .Lb10                   C                       B
+  (p8) br.dptk         .Lb11                   C                       B
+}
+
+
+.Lb00: ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       xor             x0 = r10, r11           C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       xor             x2 = u2, v2             C                       M I
+       mov             s = 0                   C                       M I
+  (p15)        br.cond.dptk    .grt4                   C                       B
+       ;;
+       popcnt          c0 = x0                 C                       I0
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       popcnt          c1 = x1                 C                       I0
+       ;;
+       popcnt          c2 = x2                 C                       I0
+       br              .Lcj4                   C                       B
+
+.grt4: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       xor             x2 = u2, v2             C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       popcnt          c0 = x0                 C                       I0
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       popcnt          c1 = x1                 C                       I0
+       xor             x0 = u0, v0             C                       M I
+       br.cloop.dpnt   .grt8                   C                       B
+
+       popcnt          c2 = x2                 C                       I0
+       xor             x1 = u1, v1             C                       M I
+       br              .Lcj8                   C                       B
+
+.grt8: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       popcnt          c2 = x2                 C                       I0
+       xor             x1 = u1, v1             C                       M I
+       br              .LL00                   C                       B
+
+
+.Lb01: xor             x3 = r10, r11           C                       M I
+       shr.u           n = n, 2                C                       I0
+  (p15)        br.cond.dptk    .grt1                   C                       B
+       ;;
+       popcnt          r8 = x3                 C                       I0
+       br.ret.sptk.many b0                     C                       B
+
+.grt1: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       mov             s = 0                   C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       xor             x0 = u0, v0             C                       M I
+       br.cloop.dpnt   .grt5                   C                       B
+
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       popcnt          c3 = x3                 C                       I0
+       xor             x2 = u2, v2             C                       M I
+       ;;
+       popcnt          c0 = x0                 C                       I0
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       popcnt          c1 = x1                 C                       I0
+       br              .Lcj5                   C                       B
+
+.grt5: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       popcnt          c3 = x3                 C                       I0
+       xor             x2 = u2, v2             C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       popcnt          c0 = x0                 C                       I0
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       popcnt          c1 = x1                 C                       I0
+       xor             x0 = u0, v0             C                       M I
+       br.cloop.dpnt   .Loop                   C                       B
+       br              .Lend                   C                       B
+
+
+.Lb10: ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       xor             x2 = r10, r11           C                       M I
+  (p15)        br.cond.dptk    .grt2                   C                       B
+       ;;
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       popcnt          c2 = x2                 C                       I0
+       ;;
+       popcnt          c3 = x3                 C                       I0
+       ;;
+       add             s = c2, c3              C                       M I
+       br.ret.sptk.many b0                     C                       B
+
+.grt2: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       mov             s = 0                   C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       xor             x0 = u0, v0             C                       M I
+       br.cloop.dptk   .grt6                   C                       B
+
+       popcnt          c2 = x2                 C                       I0
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       popcnt          c3 = x3                 C                       I0
+       xor             x2 = u2, v2             C                       M I
+       ;;
+       popcnt          c0 = x0                 C                       I0
+       xor             x3 = u3, v3             C                       M I
+       br              .Lcj6                   C                       B
+
+.grt6: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       popcnt          c2 = x2                 C                       I0
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       popcnt          c3 = x3                 C                       I0
+       xor             x2 = u2, v2             C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       popcnt          c0 = x0                 C                       I0
+       xor             x3 = u3, v3             C                       M I
+       br              .LL10                   C                       B
+
+
+.Lb11: ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       xor             x1 = r10, r11           C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       xor             x2 = u2, v2             C                       M I
+  (p15)        br.cond.dptk    .grt3                   C                       B
+       ;;
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       popcnt          c1 = x1                 C                       I0
+       ;;
+       popcnt          c2 = x2                 C                       I0
+       ;;
+       popcnt          c3 = x3                 C                       I0
+       ;;
+       add             s = c1, c2              C                       M I
+       ;;
+       add             s = s, c3               C                       M I
+       br.ret.sptk.many b0                     C                       B
+
+.grt3: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       mov             s = 0                   C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       xor             x3 = u3, v3             C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       popcnt          c1 = x1                 C                       I0
+       xor             x0 = u0, v0             C                       M I
+       br.cloop.dptk   .grt7                   C                       B
+       popcnt          c2 = x2                 C                       I0
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       popcnt          c3 = x3                 C                       I0
+       xor             x2 = u2, v2             C                       M I
+       br              .Lcj7                   C                       B
+
+.grt7: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       popcnt          c2 = x2                 C                       I0
+       xor             x1 = u1, v1             C                       M I
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       popcnt          c3 = x3                 C                       I0
+       xor             x2 = u2, v2             C                       M I
+       br              .LL11                   C                       B
+
+
+       ALIGN(32)
+.Loop: ld8             u0 = [up], 8            C                       M01
+       ld8             v0 = [vp], 8            C                       M01
+       popcnt          c2 = x2                 C                       I0
+       add             s = s, c3               C                       M I
+       xor             x1 = u1, v1             C                       M I
+       nop.b           1                       C                       -
+       ;;
+.LL00: ld8             u1 = [up], 8            C                       M01
+       ld8             v1 = [vp], 8            C                       M01
+       popcnt          c3 = x3                 C                       I0
+       add             s = s, c0               C                       M I
+       xor             x2 = u2, v2             C                       M I
+       nop.b           1                       C                       -
+       ;;
+.LL11: ld8             u2 = [up], 8            C                       M01
+       ld8             v2 = [vp], 8            C                       M01
+       popcnt          c0 = x0                 C                       I0
+       add             s = s, c1               C                       M I
+       xor             x3 = u3, v3             C                       M I
+       nop.b           1                       C                       -
+       ;;
+.LL10: ld8             u3 = [up], 8            C                       M01
+       ld8             v3 = [vp], 8            C                       M01
+       popcnt          c1 = x1                 C                       I0
+       add             s = s, c2               C                       M I
+       xor             x0 = u0, v0             C                       M I
+       br.cloop.dptk   .Loop                   C                       B
+       ;;
+
+.Lend: popcnt          c2 = x2                 C                       I0
+       add             s = s, c3               C                       M I
+       xor             x1 = u1, v1             C                       M I
+       ;;
+.Lcj8: popcnt          c3 = x3                 C                       I0
+       add             s = s, c0               C                       M I
+       xor             x2 = u2, v2             C                       M I
+       ;;
+.Lcj7: popcnt          c0 = x0                 C                       I0
+       add             s = s, c1               C                       M I
+       xor             x3 = u3, v3             C                       M I
+       ;;
+.Lcj6: popcnt          c1 = x1                 C                       I0
+       add             s = s, c2               C                       M I
+       ;;
+.Lcj5: popcnt          c2 = x2                 C                       I0
+       add             s = s, c3               C                       M I
+       ;;
+.Lcj4: popcnt          c3 = x3                 C                       I0
+       add             s = s, c0               C                       M I
+       ;;
+       add             s = s, c1               C                       M I
+       ;;
+       add             s = s, c2               C                       M I
+       ;;
+       add             s = s, c3               C                       M I
+       mov.i           ar.lc = r2              C                       I0
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/ia64-defs.m4 b/mpn/ia64/ia64-defs.m4

new file mode 100644 (file)

index 0000000..2a8b5cf
--- /dev/null
+++ b/mpn/ia64/ia64-defs.m4
@@ -0,0 +1,124 @@
+divert(-1)
+
+
+dnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  ia64 assembler comments are C++ style "//" to the end of line.  gas
+dnl  also accepts "#" as a comment, if it's the first non-blank on a line.
+dnl
+dnl  BSD m4 can't handle a multi-character comment like "//" (see notes in
+dnl  mpn/asm-defs.m4).  For now the default "#" is left, but with care taken
+dnl  not to put any macros after "foo#" (since of course they won't expand).
+
+
+define(`ASM_START',
+m4_assert_numargs(0)
+`')
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  32-byte alignment is used for the benefit of itanium-2, where the code
+dnl  fetcher will only take 2 bundles from a 32-byte aligned target.  At
+dnl  16mod32 it only reads 1 in the first cycle.  This might not make any
+dnl  difference if the rotate buffers are full or there's other work holding
+dnl  up execution, but we use 32-bytes to give the best chance of peak
+dnl  throughput.
+dnl
+dnl  We can use .align here despite the gas bug noted in mpn/ia64/README,
+dnl  since we're not expecting to execute across a PROLOGUE(), at least not
+dnl  currently.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .text
+       .align  32
+       .global $1#
+       .proc   $1#
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .endp   $1#
+')
+
+define(`DATASTART',
+       `dnl
+       DATA
+$1:')
+define(`DATAEND',`dnl')
+
+define(`ASM_END',`dnl')
+
+
+dnl  Usage: ALIGN(bytes)
+dnl
+dnl  Emit a ".align" directive.  "bytes" is eval()ed, so can be an
+dnl  expression.
+dnl
+dnl  This version overrides the definition in mpn/asm-defs.m4.  We suppress
+dnl  any .align if the gas byte-swapped-nops bug was detected by configure
+dnl  GMP_ASM_IA64_ALIGN_OK.
+
+define(`ALIGN',
+m4_assert_numargs(1)
+m4_assert_defined(`IA64_ALIGN_OK')
+`ifelse(IA64_ALIGN_OK,no,,
+`.align        eval($1)')')
+
+
+dnl  Usage: ASSERT([pr] [,code])
+dnl
+dnl  Require that the given predictate register is true after executing the
+dnl  test code.  For example,
+dnl
+dnl         ASSERT(p6,
+dnl         `       cmp.eq  p6,p0 = r3, r4')
+dnl
+dnl  If the predicate register argument is empty then nothing is tested, the
+dnl  code is just executed.  This can be used for setups required by later
+dnl  ASSERTs.  The code argument can be omitted to just test a predicate
+dnl  with no special setup code.
+dnl
+dnl  For convenience, stops are inserted before and after the code emitted.
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+`      ;;
+ifelse(`$2',,,
+`$2
+       ;;
+')
+ifelse(`$1',,,
+`($1)  br      .LASSERTok`'ASSERT_label_counter ;;
+       cmp.ne  p6,p6 = r0, r0  C illegal instruction
+       ;;
+.LASSERTok`'ASSERT_label_counter:
+define(`ASSERT_label_counter',eval(ASSERT_label_counter+1))
+')
+')')
+define(`ASSERT_label_counter',1)
+
+
+divert
diff --git a/mpn/ia64/invert_limb.asm b/mpn/ia64/invert_limb.asm

new file mode 100644 (file)

index 0000000..ca987ba
--- /dev/null
+++ b/mpn/ia64/invert_limb.asm
@@ -0,0 +1,92 @@
+dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C d = r32
+
+C           cycles
+C Itanium:    74
+C Itanium 2:  50+6
+
+C It should be possible to avoid the xmpy.hu and the following tests by
+C explicitly chopping in the last fma.  That would save about 10 cycles.
+
+ASM_START()
+       .sdata
+       .align 16
+ifdef(`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN',`
+.LC0:  data4 0x00000000, 0x80000000, 0x0000403f, 0x00000000    C 2^64
+.LC1:  data4 0x00000000, 0x80000000, 0x0000407f, 0x00000000    C 2^128
+
+',`ifdef(`HAVE_DOUBLE_IEEE_BIG_ENDIAN',`
+.LC0:  data4 0x403f8000, 0x00000000, 0x00000000, 0x00000000    C 2^64
+.LC1:  data4 0x407f8000, 0x00000000, 0x00000000, 0x00000000    C 2^128
+
+',`m4_error(`Oops, need to know float endianness
+')')')
+
+
+PROLOGUE(mpn_invert_limb)
+               C 00
+       addl            r14 = @gprel(.LC0), gp
+       addl            r15 = @gprel(.LC1), gp
+       setf.sig        f7 = r32
+       add             r9 = r32, r32           C check for d = 2^63
+       ;;      C 01
+       ldfe            f10 = [r14]             C 2^64
+       ldfe            f8 = [r15]              C 2^128
+       cmp.eq          p6, p0 = 0, r9          C check for d = 2^63
+       mov             r8 = -1                 C retval for 2^63
+   (p6)        br.ret.spnt.many b0
+       ;;      C 07
+       fmpy.s1         f11 = f7, f10           C f11 = d * 2^64
+       fnma.s1         f6 = f7, f10, f8        C f6 = 2^128 - d * 2^64
+       ;;      C 11
+       frcpa.s1        f8, p6 = f6, f7
+       ;;      C 15
+   (p6)        fnma.s1         f9 = f7, f8, f1
+   (p6)        fmpy.s1         f10 = f6, f8
+       ;;      C 19
+   (p6)        fmpy.s1         f11 = f9, f9
+   (p6)        fma.s1          f10 = f9, f10, f10
+       ;;      C 23
+   (p6)        fma.s1          f8 = f9, f8, f8
+   (p6)        fma.s1          f9 = f11, f10, f10
+       ;;      C 27
+   (p6)        fma.s1          f8 = f11, f8, f8
+   (p6)        fnma.s1         f10 = f7, f9, f6
+       ;;      C 31
+   (p6)        fma.s1          f8 = f10, f8, f9
+       ;;      C 35
+       fcvt.fxu.trunc.s1 f8 = f8
+       ;;      C 39
+       getf.sig        r8 = f8
+       xmpy.hu         f10 = f8, f7            C di * d
+       ;;      C 43
+       getf.sig        r14 = f10
+       andcm           r9 = -1, r32            C one's complement
+       ;;      C 48
+       cmp.ltu         p6, p0 = r9, r14        C got overflow?
+       ;;      C 49
+   (p6)        add             r8 = -1, r8             C adjust di down
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/logops_n.asm b/mpn/ia64/logops_n.asm

new file mode 100644 (file)

index 0000000..3ab9d25
--- /dev/null
+++ b/mpn/ia64/logops_n.asm
@@ -0,0 +1,277 @@
+dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
+dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      2
+C Itanium 2:    1
+
+C TODO
+C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
+C    wind-down code).
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`vp', `r34')
+define(`n', `r35')
+
+ifdef(`OPERATION_and_n',
+`      define(`func',`mpn_and_n')
+       define(`logop',         `and    $1 = $2, $3')
+       define(`notormov',      `mov    $1 = $2')')
+ifdef(`OPERATION_andn_n',
+`      define(`func',`mpn_andn_n')
+       define(`logop',         `andcm  $1 = $2, $3')
+       define(`notormov',      `mov    $1 = $2')')
+ifdef(`OPERATION_nand_n',
+`      define(`func',`mpn_nand_n')
+       define(`logop',         `and    $1 = $2, $3')
+       define(`notormov',      `sub    $1 = -1, $2')')
+ifdef(`OPERATION_ior_n',
+`      define(`func',`mpn_ior_n')
+       define(`logop',         `or     $1 = $2, $3')
+       define(`notormov',      `mov    $1 = $2')')
+ifdef(`OPERATION_iorn_n',
+`      define(`func',`mpn_iorn_n')
+       define(`logop',         `andcm  $1 = $3, $2')
+       define(`notormov',      `sub    $1 = -1, $2')')
+ifdef(`OPERATION_nior_n',
+`      define(`func',`mpn_nior_n')
+       define(`logop',         `or     $1 = $2, $3')
+       define(`notormov',      `sub    $1 = -1, $2')')
+ifdef(`OPERATION_xor_n',
+`      define(`func',`mpn_xor_n')
+       define(`logop',         `xor    $1 = $2, $3')
+       define(`notormov',      `mov    $1 = $2')')
+ifdef(`OPERATION_xnor_n',
+`      define(`func',`mpn_xnor_n')
+       define(`logop',         `xor    $1 = $2, $3')
+       define(`notormov',      `sub    $1 = -1, $2')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4   rp = 0, rp                      C                       M I
+       addp4   up = 0, up                      C                       M I
+       addp4   vp = 0, vp                      C                       M I
+       zxt4    n = n                           C                       I
+       ;;
+')
+{.mmi
+       ld8             r10 = [up], 8           C                       M
+       ld8             r11 = [vp], 8           C                       M
+       mov.i           r2 = ar.lc              C                       I0
+}
+{.mmi
+       and             r14 = 3, n              C                       M I
+       cmp.lt          p15, p14 = 4, n         C                       M I
+       shr.u           n = n, 2                C                       I0
+       ;;
+}
+{.mmi
+       cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       cmp.eq          p8, p0 = 3, r14         C                       M I
+}
+{.bbb
+   (p6)        br.dptk         .Lb01                   C                       B
+   (p7)        br.dptk         .Lb10                   C                       B
+   (p8)        br.dptk         .Lb11                   C                       B
+}
+
+.Lb00: ld8             r17 = [up], 8           C                       M
+       ld8             r21 = [vp], 8           C                       M
+       add             n = -2, n               C                       M I
+       ;;
+       ld8             r18 = [up], 8           C                       M
+       ld8             r22 = [vp], 8           C                       M
+       ;;
+       ld8             r19 = [up], 8           C                       M
+       ld8             r23 = [vp], 8           C                       M
+  (p15)        br.cond.dpnt    .grt4                   C                       B
+
+       logop(          r14, r10, r11)          C                       M I
+       ;;
+       logop(          r15, r17, r21)          C                       M I
+       notormov(       r8, r14)                C                       M I
+       br              .Lcj4                   C                       B
+
+.grt4: logop(          r14, r10, r11)          C                       M I
+       ld8             r16 = [up], 8           C                       M
+       ld8             r20 = [vp], 8           C                       M
+       ;;
+       logop(          r15, r17, r21)          C                       M I
+       ld8             r17 = [up], 8           C                       M
+       mov.i           ar.lc = n               C                       I0
+       notormov(       r8, r14)                C                       M I
+       ld8             r21 = [vp], 8           C                       M
+       br              .LL00                   C                       B
+
+.Lb01: add             n = -1, n               C                       M I
+       logop(          r15, r10, r11)          C                       M I
+  (p15)        br.cond.dpnt    .grt1                   C                       B
+       ;;
+
+       notormov(       r9, r15)                C                       M I
+       br              .Lcj1                   C                       B
+
+.grt1: ld8             r16 = [up], 8           C                       M
+       ld8             r20 = [vp], 8           C                       M
+       ;;
+       ld8             r17 = [up], 8           C                       M
+       ld8             r21 = [vp], 8           C                       M
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             r18 = [up], 8           C                       M
+       ld8             r22 = [vp], 8           C                       M
+       ;;
+       ld8             r19 = [up], 8           C                       M
+       ld8             r23 = [vp], 8           C                       M
+       br.cloop.dptk   .grt5                   C                       B
+       ;;
+
+       logop(          r14, r16, r20)          C                       M I
+       notormov(       r9, r15)                C                       M I
+       br              .Lcj5                   C                       B
+
+.grt5: logop(          r14, r16, r20)          C                       M I
+       ld8             r16 = [up], 8           C                       M
+       notormov(       r9, r15)                C                       M I
+       ld8             r20 = [vp], 8           C                       M
+       br              .LL01                   C                       B
+
+.Lb10: ld8             r19 = [up], 8           C                       M
+       ld8             r23 = [vp], 8           C                       M
+  (p15)        br.cond.dpnt    .grt2                   C                       B
+
+       logop(          r14, r10, r11)          C                       M I
+       ;;
+       logop(          r15, r19, r23)          C                       M I
+       notormov(       r8, r14)                C                       M I
+       br              .Lcj2                   C                       B
+
+.grt2: ld8             r16 = [up], 8           C                       M
+       ld8             r20 = [vp], 8           C                       M
+       add             n = -1, n               C                       M I
+       ;;
+       ld8             r17 = [up], 8           C                       M
+       ld8             r21 = [vp], 8           C                       M
+       logop(          r14, r10, r11)          C                       M I
+       ;;
+       ld8             r18 = [up], 8           C                       M
+       ld8             r22 = [vp], 8           C                       M
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       logop(          r15, r19, r23)          C                       M I
+       ld8             r19 = [up], 8           C                       M
+       notormov(       r8, r14)                C                       M I
+       ld8             r23 = [vp], 8           C                       M
+       br.cloop.dptk   .Loop                   C                       B
+       br              .Lcj6                   C                       B
+
+.Lb11: ld8             r18 = [up], 8           C                       M
+       ld8             r22 = [vp], 8           C                       M
+       add             n = -1, n               C                       M I
+       ;;
+       ld8             r19 = [up], 8           C                       M
+       ld8             r23 = [vp], 8           C                       M
+       logop(          r15, r10, r11)          C                       M I
+  (p15)        br.cond.dpnt    .grt3                   C                       B
+       ;;
+
+       logop(          r14, r18, r22)          C                       M I
+       notormov(       r9, r15)                C                       M I
+       br              .Lcj3                   C                       B
+
+.grt3: ld8             r16 = [up], 8           C                       M
+       ld8             r20 = [vp], 8           C                       M
+       ;;
+       ld8             r17 = [up], 8           C                       M
+       ld8             r21 = [vp], 8           C                       M
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       logop(          r14, r18, r22)          C                       M I
+       ld8             r18 = [up], 8           C                       M
+       notormov(       r9, r15)                C                       M I
+       ld8             r22 = [vp], 8           C                       M
+       br              .LL11                   C                       B
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop: st8             [rp] = r8, 8            C                       M
+       logop(          r14, r16, r20)          C                       M I
+       notormov(       r9, r15)                C                       M I
+       ld8             r16 = [up], 8           C                       M
+       ld8             r20 = [vp], 8           C                       M
+       nop.b           0
+       ;;
+.LL01: st8             [rp] = r9, 8            C                       M
+       logop(          r15, r17, r21)          C                       M I
+       notormov(       r8, r14)                C                       M I
+       ld8             r17 = [up], 8           C                       M
+       ld8             r21 = [vp], 8           C                       M
+       nop.b           0
+       ;;
+.LL00: st8             [rp] = r8, 8            C                       M
+       logop(          r14, r18, r22)          C                       M I
+       notormov(       r9, r15)                C                       M I
+       ld8             r18 = [up], 8           C                       M
+       ld8             r22 = [vp], 8           C                       M
+       nop.b           0
+       ;;
+.LL11: st8             [rp] = r9, 8            C                       M
+       logop(          r15, r19, r23)          C                       M I
+       notormov(       r8, r14)                C                       M I
+       ld8             r19 = [up], 8           C                       M
+       ld8             r23 = [vp], 8           C                       M
+       br.cloop.dptk   .Loop   ;;              C                       B
+C *** MAIN LOOP END ***
+
+.Lcj6: st8             [rp] = r8, 8            C                       M
+       logop(          r14, r16, r20)          C                       M I
+       notormov(       r9, r15)                C                       M I
+       ;;
+.Lcj5: st8             [rp] = r9, 8            C                       M
+       logop(          r15, r17, r21)          C                       M I
+       notormov(       r8, r14)                C                       M I
+       ;;
+.Lcj4: st8             [rp] = r8, 8            C                       M
+       logop(          r14, r18, r22)          C                       M I
+       notormov(       r9, r15)                C                       M I
+       ;;
+.Lcj3: st8             [rp] = r9, 8            C                       M
+       logop(          r15, r19, r23)          C                       M I
+       notormov(       r8, r14)                C                       M I
+       ;;
+.Lcj2: st8             [rp] = r8, 8            C                       M
+       notormov(       r9, r15)                C                       M I
+       ;;
+.Lcj1: st8             [rp] = r9, 8            C                       M
+       mov.i           ar.lc = r2              C                       I0
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/lorrshift.asm b/mpn/ia64/lorrshift.asm

new file mode 100644 (file)

index 0000000..59badeb
--- /dev/null
+++ b/mpn/ia64/lorrshift.asm
@@ -0,0 +1,344 @@
+dnl  IA-64 mpn_lshift/mpn_rshift.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      2.0
+C Itanium 2:    1.0
+
+C This code is scheduled deeply since the plain shift instructions shr and shl
+C have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of
+C these instructions cause a 10 cycle replay trap on Itanium.
+
+C TODO
+C  * Optimize function entry and feed-in code.
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`n',`r34')
+define(`cnt',`r35')
+
+define(`tnc',`r9')
+
+ifdef(`OPERATION_lshift',`
+       define(`FSH',`shl')
+       define(`BSH',`shr.u')
+       define(`UPD',`-8')
+       define(`POFF',`-512')
+       define(`PUPD',`-32')
+       define(`func',`mpn_lshift')
+')
+ifdef(`OPERATION_rshift',`
+       define(`FSH',`shr.u')
+       define(`BSH',`shl')
+       define(`UPD',`8')
+       define(`POFF',`512')
+       define(`PUPD',`32')
+       define(`func',`mpn_rshift')
+')
+
+MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save           ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C                       M I
+       addp4           up = 0, up              C                       M I
+       sxt4            n = n                   C                       M I
+       zxt4            cnt = cnt               C                       I
+       ;;
+')
+
+ {.mmi;        cmp.lt          p14, p15 = 4, n         C                       M I
+       and             r14 = 3, n              C                       M I
+       mov.i           r2 = ar.lc              C                       I0
+}{.mmi;        add             r15 = -1, n             C                       M I
+       sub             tnc = 64, cnt           C                       M I
+       add             r16 = -5, n
+       ;;
+}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       shr.u           n = r16, 2              C                       I0
+}{.mmi;        cmp.eq          p8, p0 = 3, r14         C                       M I
+ifdef(`OPERATION_lshift',
+`      shladd          up = r15, 3, up         C                       M I
+       shladd          rp = r15, 3, rp')       C                       M I
+       ;;
+}{.mmi;        add             r11 = POFF, up          C                       M I
+       ld8             r10 = [up], UPD         C                       M01
+       mov.i           ar.lc = n               C                       I0
+}{.bbb;
+   (p6)        br.dptk         .Lb01
+   (p7)        br.dptk         .Lb10
+   (p8)        br.dptk         .Lb11
+       ;;
+}
+
+.Lb00: ld8             r19 = [up], UPD
+       ;;
+       ld8             r16 = [up], UPD
+       ;;
+       ld8             r17 = [up], UPD
+       BSH             r8 = r10, tnc           C function return value
+  (p14)        br.cond.dptk    .grt4
+
+       FSH             r24 = r10, cnt
+       BSH             r25 = r19, tnc
+       ;;
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       or              r14 = r25, r24
+       FSH             r22 = r17, cnt
+       BSH             r23 = r10, tnc
+       br              .Lr4
+
+.grt4: FSH             r24 = r10, cnt
+       BSH             r25 = r19, tnc
+       ;;
+       ld8             r18 = [up], UPD
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       ld8             r19 = [up], UPD
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       ld8             r16 = [up], UPD
+       FSH             r22 = r17, cnt
+       BSH             r23 = r18, tnc
+       ;;
+       or              r14 = r25, r24
+       ld8             r17 = [up], UPD
+       br.cloop.dpnt   .Ltop
+       br              .Lbot
+
+.Lb01:
+  (p15)        BSH             r8 = r10, tnc           C function return value I
+  (p15)        FSH             r22 = r10, cnt          C                       I
+  (p15)        br.cond.dptk    .Lr1                    C return                B
+
+.grt1: ld8             r18 = [up], UPD
+       ;;
+       ld8             r19 = [up], UPD
+       BSH             r8 = r10, tnc           C function return value
+       ;;
+       ld8             r16 = [up], UPD
+       FSH             r22 = r10, cnt
+       BSH             r23 = r18, tnc
+       ;;
+       ld8             r17 = [up], UPD
+       br.cloop.dpnt   .grt5
+       ;;
+
+       FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       ;;
+       or              r15 = r23, r22
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       br              .Lr5
+
+.grt5: FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       ;;
+       ld8             r18 = [up], UPD
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       ld8             r19 = [up], UPD
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       or              r15 = r23, r22
+       ld8             r16 = [up], UPD
+       br              .LL01
+
+
+.Lb10: ld8             r17 = [up], UPD
+  (p14)        br.cond.dptk    .grt2
+
+       BSH             r8 = r10, tnc           C function return value
+       ;;
+       FSH             r20 = r10, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       or              r14 = r21, r20
+       FSH             r22 = r17, cnt
+       br              .Lr2                    C return
+
+.grt2: ld8             r18 = [up], UPD
+       BSH             r8 = r10, tnc           C function return value
+       ;;
+       ld8             r19 = [up], UPD
+       FSH             r20 = r10, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       ld8             r16 = [up], UPD
+       FSH             r22 = r17, cnt
+       BSH             r23 = r18, tnc
+       ;;
+       ld8             r17 = [up], UPD
+       br.cloop.dpnt   .grt6
+       ;;
+
+       or              r14 = r21, r20
+       FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       ;;
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       br              .Lr6
+
+.grt6: or              r14 = r21, r20
+       FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       ;;
+       ld8             r18 = [up], UPD
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       ld8             r19 = [up], UPD
+       br              .LL10
+
+
+.Lb11: ld8             r16 = [up], UPD
+       ;;
+       ld8             r17 = [up], UPD
+       BSH             r8 = r10, tnc           C function return value
+  (p14)        br.cond.dptk    .grt3
+       ;;
+
+       FSH             r26 = r10, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       or              r15 = r27, r26
+       FSH             r22 = r17, cnt
+       br              .Lr3                    C return
+
+.grt3: ld8             r18 = [up], UPD
+       FSH             r26 = r10, cnt
+       BSH             r27 = r16, tnc
+       ;;
+       ld8             r19 = [up], UPD
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       ;;
+       ld8             r16 = [up], UPD
+       FSH             r22 = r17, cnt
+       BSH             r23 = r18, tnc
+       ;;
+       ld8             r17 = [up], UPD
+       br.cloop.dpnt   .grt7
+
+       or              r15 = r27, r26
+       FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       br              .Lr7
+
+.grt7: or              r15 = r27, r26
+       FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       ld8             r18 = [up], UPD
+       br              .LL11
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Ltop:
+ {.mmi;        st8             [rp] = r14, UPD         C M2
+       or              r15 = r27, r26          C M3
+       FSH             r24 = r18, cnt          C I0
+}{.mmi;        ld8             r18 = [up], UPD         C M1
+       lfetch          [r11], PUPD
+       BSH             r25 = r19, tnc          C I1
+       ;; }
+.LL11:
+ {.mmi;        st8             [rp] = r15, UPD
+       or              r14 = r21, r20
+       FSH             r26 = r19, cnt
+}{.mmi;        ld8             r19 = [up], UPD
+       nop.m           0
+       BSH             r27 = r16, tnc
+       ;; }
+.LL10:
+ {.mmi;        st8             [rp] = r14, UPD
+       or              r15 = r23, r22
+       FSH             r20 = r16, cnt
+}{.mmi;        ld8             r16 = [up], UPD
+       nop.m           0
+       BSH             r21 = r17, tnc
+       ;; }
+.LL01:
+ {.mmi;        st8             [rp] = r15, UPD
+       or              r14 = r25, r24
+       FSH             r22 = r17, cnt
+}{.mib;        ld8             r17 = [up], UPD
+       BSH             r23 = r18, tnc
+       br.cloop.dptk   .Ltop
+       ;; }
+
+C *** MAIN LOOP END ***
+
+.Lbot: or              r15 = r27, r26
+       FSH             r24 = r18, cnt
+       BSH             r25 = r19, tnc
+       st8             [rp] = r14, UPD
+       ;;
+.Lr7:  or              r14 = r21, r20
+       FSH             r26 = r19, cnt
+       BSH             r27 = r16, tnc
+       st8             [rp] = r15, UPD
+       ;;
+.Lr6:  or              r15 = r23, r22
+       FSH             r20 = r16, cnt
+       BSH             r21 = r17, tnc
+       st8             [rp] = r14, UPD
+       ;;
+.Lr5:  st8             [rp] = r15, UPD
+       or              r14 = r25, r24
+       FSH             r22 = r17, cnt
+       ;;
+.Lr4:  or              r15 = r27, r26
+       st8             [rp] = r14, UPD
+       ;;
+.Lr3:  or              r14 = r21, r20
+       st8             [rp] = r15, UPD
+       ;;
+.Lr2:  st8             [rp] = r14, UPD
+       ;;
+.Lr1:  st8             [rp] = r22, UPD         C                       M23
+       mov             ar.lc = r2              C                       I0
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE(func)
+ASM_END()
diff --git a/mpn/ia64/mode1o.asm b/mpn/ia64/mode1o.asm

new file mode 100644 (file)

index 0000000..6b3626e
--- /dev/null
+++ b/mpn/ia64/mode1o.asm
@@ -0,0 +1,329 @@
+dnl  Itanium-2 mpn_modexact_1c_odd -- mpn by 1 exact remainder.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C            cycles/limb
+C Itanium:      15
+C Itanium 2:     8
+
+
+dnl  Usage: ABI32(`code')
+dnl
+dnl  Emit the given code only under HAVE_ABI_32.
+dnl
+define(ABI32,
+m4_assert_onearg()
+`ifdef(`HAVE_ABI_32',`$1')')
+
+
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C The modexact algorithm is usually conceived as a dependent chain
+C
+C      l = src[i] - c
+C      q = low(l * inverse)
+C      c = high(q*divisor) + (src[i]<c)
+C
+C but we can work the src[i]-c into an xma by calculating si=src[i]*inverse
+C separately (off the dependent chain) and using
+C
+C      q = low(c * inverse + si)
+C      c = high(q*divisor + c)
+C
+C This means the dependent chain is simply xma.l followed by xma.hu, for a
+C total 8 cycles/limb on itanium-2.
+C
+C The reason xma.hu works for the new c is that the low of q*divisor is
+C src[i]-c (being the whole purpose of the q generated, and it can be
+C verified algebraically).  If there was an underflow from src[i]-c, then
+C there will be an overflow from (src-c)+c, thereby adding 1 to the new c
+C the same as the borrow bit (src[i]<c) gives in the first style shown.
+C
+C Incidentally, fcmp is not an option for treating src[i]-c, since it
+C apparently traps to the kernel for unnormalized operands like those used
+C and generated by ldf8 and xma.  On one GNU/Linux system it took about 1200
+C cycles.
+C
+C
+C First Limb:
+C
+C The first limb uses q = (src[0]-c) * inverse shown in the first style.
+C This lets us get the first q as soon as the inverse is ready, without
+C going through si=s*inverse.  Basically at the start we have c and can use
+C it while waiting for the inverse, whereas for the second and subsequent
+C limbs it's the other way around, ie. we have the inverse and are waiting
+C for c.
+C
+C At .Lentry the first two instructions in the loop have been done already.
+C The load of f11=src[1] at the start (predicated on size>=2), and the
+C calculation of q by the initial different scheme.
+C
+C
+C Entry Sequence:
+C
+C In the entry sequence, the critical path is the calculation of the
+C inverse, so this is begun first and optimized.  Apart from that, ar.lc is
+C established nice and early so the br.cloop's should predict perfectly.
+C And the load for the low limbs src[0] and src[1] can be initiated long
+C ahead of where they're needed.
+C
+C
+C Inverse Calculation:
+C
+C The initial 8-bit inverse is calculated using a table lookup.  If it hits
+C L1 (which is likely if we're called several times) then it should take a
+C total 4 cycles, otherwise hopefully L2 for 9 cycles.  This is considered
+C the best approach, on balance.  It could be done bitwise, but that would
+C probably be about 14 cycles (2 per bit beyond the first couple).  Or it
+C could be taken from 4 bits to 8 with xmpy doubling as used beyond 8 bits,
+C but that would be about 11 cycles.
+C
+C The table is not the same as binvert_limb_table, instead it's 256 bytes,
+C designed to be indexed by the low byte of the divisor.  The divisor is
+C always odd, so the relevant data is every second byte in the table.  The
+C padding lets us use zxt1 instead of extr.u, the latter would cost an extra
+C cycle because it must go down I0, and we're using the first I0 slot to get
+C ip.  The extra 128 bytes of padding should be insignificant compared to
+C typical ia64 code bloat.
+C
+C Having the table in .text allows us to use IP-relative addressing,
+C avoiding a fetch from ltoff.  .rodata is apparently not suitable for use
+C IP-relative, it gets a linker relocation overflow on GNU/Linux.
+C
+C
+C Load Scheduling:
+C
+C In the main loop, the data loads are scheduled for an L2 hit, which means
+C 6 cycles for the data ready to use.  In fact we end up 7 cycles ahead.  In
+C any case that scheduling is achieved simply by doing the load (and xmpy.l
+C for "si") in the immediately preceding iteration.
+C
+C The main loop requires size >= 2, and we handle size==1 by an initial
+C br.cloop to enter the loop only if size>1.  Since ar.lc is established
+C early, this should predict perfectly.
+C
+C
+C Not done:
+C
+C Consideration was given to using a plain "(src[0]-c) % divisor" for
+C size==1, but cycle counting suggests about 50 for the sort of approach
+C taken by gcc __umodsi3, versus about 47 for the modexact.  (Both assuming
+C L1 hits for their respective fetching.)
+C
+C Consideration was given to a test for high<divisor and replacing the last
+C loop iteration with instead c-=src[size-1] followed by c+=d if underflow.
+C Branching on high<divisor wouldn't be good since a mispredict would cost
+C more than the loop iteration saved, and the condition is of course data
+C dependent.  So the theory would be to shorten the loop count if
+C high<divisor, and predicate extra operations at the end.  That would mean
+C a gain of 6 when high<divisor, or a cost of 2 if not.
+C
+C Whether such a tradeoff is a win on average depends on assumptions about
+C how many bits in the high and the divisor.  If both are uniformly
+C distributed then high<divisor about 50% of the time.  But smallish
+C divisors (less chance of high<divisor) might be more likely from
+C applications (mpz_divisible_ui, mpz_gcd_ui, etc).  Though biggish divisors
+C would be normal internally from say mpn/generic/perfsqr.c.  On balance,
+C for the moment, it's felt the gain is not really enough to be worth the
+C trouble.
+C
+C
+C Enhancement:
+C
+C Process two source limbs per iteration using a two-limb inverse and a
+C sequence like
+C
+C      ql  = low (c * il + sil)        quotient low limb
+C      qlc = high(c * il + sil)
+C      qh1 = low (c * ih + sih)        quotient high, partial
+C
+C      cl = high (ql * d + c)          carry out of low
+C      qh = low (qlc * 1 + qh1)        quotient high limb
+C
+C      new c = high (qh * d + cl)      carry out of high
+C
+C This would be 13 cycles/iteration, giving 6.5 cycles/limb.  The two limb
+C s*inverse as sih:sil = sh:sl * ih:il would be calculated off the dependent
+C chain with 4 multiplies.  The bigger inverse would take extra time to
+C calculate, but a one limb iteration to handle an odd size could be done as
+C soon as 64-bits of inverse were ready.
+C
+C Perhaps this could even extend to a 3 limb inverse, which might promise 17
+C or 18 cycles for 3 limbs, giving 5.66 or 6.0 cycles/limb.
+C
+
+ASM_START()
+       .explicit
+
+       .text
+       .align  32
+.Ltable:
+data1  0,0x01, 0,0xAB, 0,0xCD, 0,0xB7, 0,0x39, 0,0xA3, 0,0xC5, 0,0xEF
+data1  0,0xF1, 0,0x1B, 0,0x3D, 0,0xA7, 0,0x29, 0,0x13, 0,0x35, 0,0xDF
+data1  0,0xE1, 0,0x8B, 0,0xAD, 0,0x97, 0,0x19, 0,0x83, 0,0xA5, 0,0xCF
+data1  0,0xD1, 0,0xFB, 0,0x1D, 0,0x87, 0,0x09, 0,0xF3, 0,0x15, 0,0xBF
+data1  0,0xC1, 0,0x6B, 0,0x8D, 0,0x77, 0,0xF9, 0,0x63, 0,0x85, 0,0xAF
+data1  0,0xB1, 0,0xDB, 0,0xFD, 0,0x67, 0,0xE9, 0,0xD3, 0,0xF5, 0,0x9F
+data1  0,0xA1, 0,0x4B, 0,0x6D, 0,0x57, 0,0xD9, 0,0x43, 0,0x65, 0,0x8F
+data1  0,0x91, 0,0xBB, 0,0xDD, 0,0x47, 0,0xC9, 0,0xB3, 0,0xD5, 0,0x7F
+data1  0,0x81, 0,0x2B, 0,0x4D, 0,0x37, 0,0xB9, 0,0x23, 0,0x45, 0,0x6F
+data1  0,0x71, 0,0x9B, 0,0xBD, 0,0x27, 0,0xA9, 0,0x93, 0,0xB5, 0,0x5F
+data1  0,0x61, 0,0x0B, 0,0x2D, 0,0x17, 0,0x99, 0,0x03, 0,0x25, 0,0x4F
+data1  0,0x51, 0,0x7B, 0,0x9D, 0,0x07, 0,0x89, 0,0x73, 0,0x95, 0,0x3F
+data1  0,0x41, 0,0xEB, 0,0x0D, 0,0xF7, 0,0x79, 0,0xE3, 0,0x05, 0,0x2F
+data1  0,0x31, 0,0x5B, 0,0x7D, 0,0xE7, 0,0x69, 0,0x53, 0,0x75, 0,0x1F
+data1  0,0x21, 0,0xCB, 0,0xED, 0,0xD7, 0,0x59, 0,0xC3, 0,0xE5, 0,0x0F
+data1  0,0x11, 0,0x3B, 0,0x5D, 0,0xC7, 0,0x49, 0,0x33, 0,0x55, 0,0xFF
+
+
+PROLOGUE(mpn_modexact_1c_odd)
+
+       C r32   src
+       C r33   size
+       C r34   divisor
+       C r35   carry
+
+       .prologue
+.Lhere:
+{ .mmi;        add     r33 = -1, r33           C M0  size-1
+       mov     r14 = 2                 C M1  2
+       mov     r15 = ip                C I0  .Lhere
+}{.mmi;        setf.sig f6 = r34               C M2  divisor
+       setf.sig f9 = r35               C M3  carry
+       zxt1    r3 = r34                C I1  divisor low byte
+}      ;;
+
+{ .mmi;        add     r3 = .Ltable-.Lhere, r3 C M0  table offset ip and index
+       sub     r16 = 0, r34            C M1  -divisor
+       .save   ar.lc, r2
+       mov     r2 = ar.lc              C I0
+}{.mmi;        .body
+       setf.sig f13 = r14              C M2  2 in significand
+       mov     r17 = -1                C M3  -1
+ABI32(`        zxt4    r33 = r33')             C I1  size extend
+}      ;;
+
+{ .mmi;        add     r3 = r3, r15            C M0  table entry address
+ABI32(` addp4  r32 = 0, r32')          C M1  src extend
+       mov     ar.lc = r33             C I0  size-1 loop count
+}{.mmi;        setf.sig f12 = r16              C M2  -divisor
+       setf.sig f8 = r17               C M3  -1
+}      ;;
+
+{ .mmi;        ld1     r3 = [r3]               C M0  inverse, 8 bits
+       ldf8    f10 = [r32], 8          C M1  src[0]
+       cmp.ne  p6,p0 = 0, r33          C I0  test size!=1
+}      ;;
+
+       C Wait for table load.
+       C Hope for an L1 hit of 1 cycles to ALU, but could be more.
+       setf.sig f7 = r3                C M2  inverse, 8 bits
+(p6)   ldf8    f11 = [r32], 8          C M1  src[1], if size!=1
+       ;;
+
+       C 5 cycles
+
+       C f6    divisor
+       C f7    inverse, being calculated
+       C f8    -1, will be -inverse
+       C f9    carry
+       C f10   src[0]
+       C f11   src[1]
+       C f12   -divisor
+       C f13   2
+       C f14   scratch
+
+       xmpy.l  f14 = f13, f7           C 2*i
+       xmpy.l  f7 = f7, f7             C i*i
+       ;;
+       xma.l   f7 = f7, f12, f14       C i*i*-d + 2*i, inverse 16 bits
+       ;;
+
+       xmpy.l  f14 = f13, f7           C 2*i
+       xmpy.l  f7 = f7, f7             C i*i
+       ;;
+       xma.l   f7 = f7, f12, f14       C i*i*-d + 2*i, inverse 32 bits
+       ;;
+
+       xmpy.l  f14 = f13, f7           C 2*i
+       xmpy.l  f7 = f7, f7             C i*i
+       ;;
+
+       xma.l   f7 = f7, f12, f14       C i*i*-d + 2*i, inverse 64 bits
+       xma.l   f10 = f9, f8, f10       C sc = c * -1 + src[0]
+       ;;
+ASSERT(p6, `
+       xmpy.l  f15 = f6, f7 ;; C divisor*inverse
+       getf.sig r31 = f15 ;;
+       cmp.eq  p6,p0 = 1, r31  C should == 1
+')
+
+       xmpy.l  f10 = f10, f7           C q = sc * inverse
+       xmpy.l  f8 = f7, f8             C -inverse = inverse * -1
+       br.cloop.sptk.few.clr .Lentry   C main loop, if size > 1
+       ;;
+
+       C size==1, finish up now
+       xma.hu  f9 = f10, f6, f9        C c = high(q * divisor + c)
+       mov     ar.lc = r2              C I0
+       ;;
+       getf.sig r8 = f9                C M2  return c
+       br.ret.sptk.many b0
+
+
+
+.Ltop:
+       C r2    saved ar.lc
+       C f6    divisor
+       C f7    inverse
+       C f8    -inverse
+       C f9    carry
+       C f10   src[i] * inverse
+       C f11   scratch src[i+1]
+
+       add     r16 = 160, r32
+       ldf8    f11 = [r32], 8          C src[i+1]
+       ;;
+       C 2 cycles
+
+       lfetch  [r16]
+       xma.l   f10 = f9, f8, f10       C q = c * -inverse + si
+       ;;
+       C 3 cycles
+
+.Lentry:
+       xma.hu  f9 = f10, f6, f9        C c = high(q * divisor + c)
+       xmpy.l  f10 = f11, f7           C si = src[i] * inverse
+       br.cloop.sptk.few.clr .Ltop
+       ;;
+
+
+
+       xma.l   f10 = f9, f8, f10       C q = c * -inverse + si
+       mov     ar.lc = r2              C I0
+       ;;
+       xma.hu  f9 = f10, f6, f9        C c = high(q * divisor + c)
+       ;;
+       getf.sig r8 = f9                C M2  return c
+       br.ret.sptk.many b0
+
+EPILOGUE()
diff --git a/mpn/ia64/mul_1.asm b/mpn/ia64/mul_1.asm

new file mode 100644 (file)

index 0000000..8df8d93
--- /dev/null
+++ b/mpn/ia64/mul_1.asm
@@ -0,0 +1,573 @@
+dnl  IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and
+dnl  store the result in a second limb vector.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    4.0
+C Itanium 2:  2.0
+
+C TODO
+C  * Further optimize feed-in and wind-down code, both for speed and code size.
+C  * Handle low limb input and results specially, using a common stf8 in the
+C    epilogue.
+C  * Use 1 c/l carry propagation scheme in wind-down code.
+C  * Use extra pointer register for `up' to speed up feed-in loads.
+C  * Work out final differences with addmul_1.asm.
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n', `r34')
+define(`vl', `r35')
+define(`cy', `r36')    C for mpn_mul_1c
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C M I
+       addp4           up = 0, up              C M I
+       zxt4            n = n                   C I
+       ;;
+')
+{.mfi
+       adds            r15 = -1, n             C M I
+       mov             f9 = f0                 C F
+       mov.i           r2 = ar.lc              C I0
+}
+{.mmi
+       ldf8            f7 = [up], 8            C M
+       nop.m           0                       C M
+       and             r14 = 3, n              C M I
+       ;;
+}
+.Lcommon:
+{.mii
+       setf.sig        f6 = vl                 C M2 M3
+       shr.u           r31 = r15, 2            C I0
+       cmp.eq          p10, p0 = 0, r14        C M I
+}
+{.mii
+       cmp.eq          p11, p0 = 2, r14        C M I
+       cmp.eq          p12, p0 = 3, r14        C M I
+       nop.i           0                       C I
+       ;;
+}
+{.mii
+       cmp.ne          p6, p7 = r0, r0         C M I
+       mov.i           ar.lc = r31             C I0
+       cmp.ne          p8, p9 = r0, r0         C M I
+}
+{.bbb
+  (p10)        br.dptk         .Lb00                   C B
+  (p11)        br.dptk         .Lb10                   C B
+  (p12)        br.dptk         .Lb11                   C B
+       ;;
+}
+
+.Lb01: mov             r20 = 0
+       br.cloop.dptk   .grt1                   C B
+
+       xma.l           f39 = f7, f6, f9        C F
+       xma.hu          f43 = f7, f6, f9        C F
+       ;;
+       getf.sig        r8 = f43                C M2
+       stf8            [rp] = f39              C M2 M3
+       mov.i           ar.lc = r2              C I0
+       br.ret.sptk.many b0                     C B
+
+.grt1:
+       ldf8            f32 = [up], 8
+       ;;
+       ldf8            f33 = [up], 8
+       ;;
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f7, f6, f9
+       xma.hu          f43 = f7, f6, f9
+       ;;
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt5
+
+       xma.l           f36 = f32, f6, f0
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       stf8            [rp] = f39, 8
+       xma.l           f37 = f33, f6, f0
+       xma.hu          f41 = f33, f6, f0
+       ;;
+       getf.sig        r21 = f43
+       getf.sig        r18 = f36
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       ;;
+       getf.sig        r22 = f40
+       getf.sig        r19 = f37
+       xma.l           f39 = f35, f6, f0
+       xma.hu          f43 = f35, f6, f0
+       ;;
+       getf.sig        r23 = f41
+       getf.sig        r16 = f38
+       br              .Lcj5
+
+.grt5:
+       xma.l           f36 = f32, f6, f0
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       getf.sig        r17 = f39
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f33, f6, f0
+       xma.hu          f41 = f33, f6, f0
+       ;;
+       getf.sig        r21 = f43
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f0
+       ;;
+       getf.sig        r18 = f36
+       xma.hu          f42 = f34, f6, f0
+       ;;
+       getf.sig        r22 = f40
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f0
+       ;;
+       getf.sig        r19 = f37
+       xma.hu          f43 = f35, f6, f0
+       br              .LL01
+
+
+.Lb10: ldf8            f35 = [up], 8
+       mov             r23 = 0
+       br.cloop.dptk   .grt2
+
+       xma.l           f38 = f7, f6, f9
+       xma.hu          f42 = f7, f6, f9
+       ;;
+       stf8            [rp] = f38, 8
+       xma.l           f39 = f35, f6, f42
+       xma.hu          f43 = f35, f6, f42
+       ;;
+       getf.sig        r8 = f43
+       stf8            [rp] = f39
+       mov.i           ar.lc = r2
+       br.ret.sptk.many b0
+
+
+.grt2:
+       ldf8            f32 = [up], 8
+       ;;
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f7, f6, f9
+       xma.hu          f42 = f7, f6, f9
+       ;;
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f0
+       xma.hu          f43 = f35, f6, f0
+       ;;
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt6
+
+       stf8            [rp] = f38, 8
+       xma.l           f36 = f32, f6, f0
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       getf.sig        r20 = f42
+       getf.sig        r17 = f39
+       xma.l           f37 = f33, f6, f0
+       xma.hu          f41 = f33, f6, f0
+       ;;
+       getf.sig        r21 = f43
+       getf.sig        r18 = f36
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       ;;
+       getf.sig        r22 = f40
+       getf.sig        r19 = f37
+       xma.l           f39 = f35, f6, f0
+       xma.hu          f43 = f35, f6, f0
+       br              .Lcj6
+
+.grt6:
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       getf.sig        r20 = f42
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f33, f6, f0
+       ;;
+       getf.sig        r17 = f39
+       xma.hu          f41 = f33, f6, f0
+       ;;
+       getf.sig        r21 = f43
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f0
+       ;;
+       getf.sig        r18 = f36
+       xma.hu          f42 = f34, f6, f0
+       br              .LL10
+
+
+.Lb11: ldf8            f34 = [up], 8
+       mov             r22 = 0
+       ;;
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt3
+       ;;
+
+       xma.l           f37 = f7, f6, f9
+       xma.hu          f41 = f7, f6, f9
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       xma.l           f39 = f35, f6, f0
+       xma.hu          f43 = f35, f6, f0
+       ;;
+       getf.sig        r23 = f41
+       stf8            [rp] = f37, 8
+       getf.sig        r16 = f38
+       getf.sig        r20 = f42
+       getf.sig        r17 = f39
+       getf.sig        r8 = f43
+       br              .Lcj3
+
+.grt3:
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f7, f6, f9
+       xma.hu          f41 = f7, f6, f9
+       ;;
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       ;;
+       getf.sig        r19 = f37
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f0
+       xma.hu          f43 = f35, f6, f0
+       ;;
+       getf.sig        r23 = f41
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt7
+
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+       getf.sig        r20 = f42
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       getf.sig        r17 = f39
+       xma.l           f37 = f33, f6, f0
+       getf.sig        r21 = f43
+       xma.hu          f41 = f33, f6, f0
+       ;;
+       getf.sig        r18 = f36
+       st8             [rp] = r19, 8
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       br              .Lcj7
+
+.grt7:
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       getf.sig        r20 = f42
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f33, f6, f0
+       ;;
+       getf.sig        r17 = f39
+       xma.hu          f41 = f33, f6, f0
+       br              .LL11
+
+
+.Lb00: ldf8            f33 = [up], 8
+       mov             r21 = 0
+       ;;
+       ldf8            f34 = [up], 8
+       ;;
+       ldf8            f35 = [up], 8
+       xma.l           f36 = f7, f6, f9
+       xma.hu          f40 = f7, f6, f9
+       br.cloop.dptk   .grt4
+
+       xma.l           f37 = f33, f6, f0
+       xma.hu          f41 = f33, f6, f0
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       ;;
+       getf.sig        r22 = f40
+       stf8            [rp] = f36, 8
+       xma.l           f39 = f35, f6, f0
+       getf.sig        r19 = f37
+       xma.hu          f43 = f35, f6, f0
+       ;;
+       getf.sig        r23 = f41
+       getf.sig        r16 = f38
+       getf.sig        r20 = f42
+       getf.sig        r17 = f39
+       br              .Lcj4
+
+.grt4:
+       ldf8            f32 = [up], 8
+       xma.l           f37 = f33, f6, f0
+       xma.hu          f41 = f33, f6, f0
+       ;;
+       getf.sig        r18 = f36
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f0
+       xma.hu          f42 = f34, f6, f0
+       ;;
+       getf.sig        r22 = f40
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f0
+       ;;
+       getf.sig        r19 = f37
+       getf.sig        r23 = f41
+       xma.hu          f43 = f35, f6, f0
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt8
+
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+       getf.sig        r20 = f42
+       xma.hu          f40 = f32, f6, f0
+       ;;
+       getf.sig        r17 = f39
+       st8             [rp] = r18, 8
+       xma.l           f37 = f33, f6, f0
+       xma.hu          f41 = f33, f6, f0
+       br              .Lcj8
+
+.grt8:
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+       xma.hu          f40 = f32, f6, f0
+       br              .LL00
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+   (p6)        cmp.leu         p8, p9 = r24, r17
+       st8             [rp] = r24, 8
+       xma.hu          f40 = f32, f6, f0
+   (p7)        cmp.ltu         p8, p9 = r24, r17
+       ;;
+.LL00:
+       .pred.rel "mutex",p8,p9
+       getf.sig        r20 = f42
+   (p8)        add             r24 = r18, r21, 1
+       nop.b           0
+       ldf8            f32 = [up], 8
+   (p9)        add             r24 = r18, r21
+       nop.b           0
+       ;;
+       .pred.rel "mutex",p8,p9
+       getf.sig        r17 = f39
+       xma.l           f37 = f33, f6, f0
+   (p8)        cmp.leu         p6, p7 = r24, r18
+       st8             [rp] = r24, 8
+       xma.hu          f41 = f33, f6, f0
+   (p9)        cmp.ltu         p6, p7 = r24, r18
+       ;;
+.LL11:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r21 = f43
+   (p6)        add             r24 = r19, r22, 1
+       nop.b           0
+       ldf8            f33 = [up], 8
+   (p7)        add             r24 = r19, r22
+       nop.b           0
+       ;;
+       .pred.rel "mutex",p6,p7
+       getf.sig        r18 = f36
+       xma.l           f38 = f34, f6, f0
+   (p6)        cmp.leu         p8, p9 = r24, r19
+       st8             [rp] = r24, 8
+       xma.hu          f42 = f34, f6, f0
+   (p7)        cmp.ltu         p8, p9 = r24, r19
+       ;;
+.LL10:
+       .pred.rel "mutex",p8,p9
+       getf.sig        r22 = f40
+   (p8)        add             r24 = r16, r23, 1
+       nop.b           0
+       ldf8            f34 = [up], 8
+   (p9)        add             r24 = r16, r23
+       nop.b           0
+       ;;
+       .pred.rel "mutex",p8,p9
+       getf.sig        r19 = f37
+       xma.l           f39 = f35, f6, f0
+   (p8)        cmp.leu         p6, p7 = r24, r16
+       st8             [rp] = r24, 8
+       xma.hu          f43 = f35, f6, f0
+   (p9)        cmp.ltu         p6, p7 = r24, r16
+       ;;
+.LL01:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r23 = f41
+   (p6)        add             r24 = r17, r20, 1
+       nop.b           0
+       ldf8            f35 = [up], 8
+   (p7)        add             r24 = r17, r20
+       br.cloop.dptk   .Loop
+C *** MAIN LOOP END ***
+       ;;
+
+.Lcj9:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r16 = f38
+       xma.l           f36 = f32, f6, f0
+   (p6)        cmp.leu         p8, p9 = r24, r17
+       st8             [rp] = r24, 8
+       xma.hu          f40 = f32, f6, f0
+   (p7)        cmp.ltu         p8, p9 = r24, r17
+       ;;
+       .pred.rel "mutex",p8,p9
+       getf.sig        r20 = f42
+   (p8)        add             r24 = r18, r21, 1
+   (p9)        add             r24 = r18, r21
+       ;;
+       .pred.rel "mutex",p8,p9
+       getf.sig        r17 = f39
+       xma.l           f37 = f33, f6, f0
+   (p8)        cmp.leu         p6, p7 = r24, r18
+       st8             [rp] = r24, 8
+       xma.hu          f41 = f33, f6, f0
+   (p9)        cmp.ltu         p6, p7 = r24, r18
+       ;;
+.Lcj8:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r21 = f43
+   (p6)        add             r24 = r19, r22, 1
+   (p7)        add             r24 = r19, r22
+       ;;
+       .pred.rel "mutex",p6,p7
+       getf.sig        r18 = f36
+       xma.l           f38 = f34, f6, f0
+   (p6)        cmp.leu         p8, p9 = r24, r19
+       st8             [rp] = r24, 8
+       xma.hu          f42 = f34, f6, f0
+   (p7)        cmp.ltu         p8, p9 = r24, r19
+       ;;
+.Lcj7:
+       .pred.rel "mutex",p8,p9
+       getf.sig        r22 = f40
+   (p8)        add             r24 = r16, r23, 1
+   (p9)        add             r24 = r16, r23
+       ;;
+       .pred.rel "mutex",p8,p9
+       getf.sig        r19 = f37
+       xma.l           f39 = f35, f6, f0
+   (p8)        cmp.leu         p6, p7 = r24, r16
+       st8             [rp] = r24, 8
+       xma.hu          f43 = f35, f6, f0
+   (p9)        cmp.ltu         p6, p7 = r24, r16
+       ;;
+.Lcj6:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r23 = f41
+   (p6)        add             r24 = r17, r20, 1
+   (p7)        add             r24 = r17, r20
+       ;;
+       .pred.rel "mutex",p6,p7
+   (p6)        cmp.leu         p8, p9 = r24, r17
+   (p7)        cmp.ltu         p8, p9 = r24, r17
+       getf.sig        r16 = f38
+       st8             [rp] = r24, 8
+       ;;
+.Lcj5:
+       .pred.rel "mutex",p8,p9
+       getf.sig        r20 = f42
+   (p8)        add             r24 = r18, r21, 1
+   (p9)        add             r24 = r18, r21
+       ;;
+       .pred.rel "mutex",p8,p9
+   (p8)        cmp.leu         p6, p7 = r24, r18
+   (p9)        cmp.ltu         p6, p7 = r24, r18
+       getf.sig        r17 = f39
+       st8             [rp] = r24, 8
+       ;;
+.Lcj4:
+       .pred.rel "mutex",p6,p7
+       getf.sig        r8 = f43
+   (p6)        add             r24 = r19, r22, 1
+   (p7)        add             r24 = r19, r22
+       ;;
+       .pred.rel "mutex",p6,p7
+       st8             [rp] = r24, 8
+   (p6)        cmp.leu         p8, p9 = r24, r19
+   (p7)        cmp.ltu         p8, p9 = r24, r19
+       ;;
+.Lcj3:
+       .pred.rel "mutex",p8,p9
+   (p8)        add             r24 = r16, r23, 1
+   (p9)        add             r24 = r16, r23
+       ;;
+       .pred.rel "mutex",p8,p9
+       st8             [rp] = r24, 8
+   (p8)        cmp.leu         p6, p7 = r24, r16
+   (p9)        cmp.ltu         p6, p7 = r24, r16
+       ;;
+.Lcj2:
+       .pred.rel "mutex",p6,p7
+   (p6)        add             r24 = r17, r20, 1
+   (p7)        add             r24 = r17, r20
+       ;;
+       .pred.rel "mutex",p6,p7
+       st8             [rp] = r24, 8
+   (p6)        cmp.leu         p8, p9 = r24, r17
+   (p7)        cmp.ltu         p8, p9 = r24, r17
+       ;;
+       .pred.rel "mutex",p8,p9
+   (p8)        add             r8 = 1, r8
+       mov.i           ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1c)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C M I
+       addp4           up = 0, up              C M I
+       zxt4            n = n                   C I
+       ;;
+')
+{.mmi
+       adds            r15 = -1, n             C M I
+       setf.sig        f9 = cy                 C M2 M3
+       mov.i           r2 = ar.lc              C I0
+}
+{.mmb
+       ldf8            f7 = [up], 8            C M
+       and             r14 = 3, n              C M I
+       br.sptk         .Lcommon
+       ;;
+}
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/mul_2.asm b/mpn/ia64/mul_2.asm

new file mode 100644 (file)

index 0000000..b0d4ef7
--- /dev/null
+++ b/mpn/ia64/mul_2.asm
@@ -0,0 +1,691 @@
+dnl  IA-64 mpn_mul_2 -- Multiply a n-limb number with a 2-limb number and store
+dnl  store the result to a (n+1)-limb number.
+
+dnl  Copyright 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    3.15
+C Itanium 2:  1.625
+
+C Note that this is very similar to addmul_2.asm.  If you change this file,
+C please change that file too.
+
+C TODO
+C  * Clean up variable names, and try to decrease the number of distinct
+C    registers used.
+C  * Cleanup feed-in code to not require zeroing several registers.
+C  * Make sure we don't depend on uninitialized predicate registers.
+C  * We currently cross-jump very aggressively, at the expense of a few cycles
+C    per operation.  Consider changing that.
+C  * Could perhaps save a few cycles by using 1 c/l carry propagation in
+C    wind-down code.
+C  * Ultimately rewrite.  The problem with this code is that it first uses a
+C    loaded u value in one xma pair, then leaves it live over several unrelated
+C    xma pairs, before it uses it again.  It should actually be quite possible
+C    to just swap some aligned xma pairs around.  But we should then schedule
+C    u loads further from the first use.
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`n',`r34')
+define(`vp',`r35')
+
+define(`srp',`r3')
+
+define(`v0',`f6')
+define(`v1',`f7')
+
+define(`s0',`r14')
+define(`acc0',`r15')
+
+define(`pr0_0',`r16') define(`pr0_1',`r17')
+define(`pr0_2',`r18') define(`pr0_3',`r19')
+
+define(`pr1_0',`r20') define(`pr1_1',`r21')
+define(`pr1_2',`r22') define(`pr1_3',`r23')
+
+define(`acc1_0',`r24') define(`acc1_1',`r25')
+define(`acc1_2',`r26') define(`acc1_3',`r27')
+
+dnl define(`',`r28')
+dnl define(`',`r29')
+dnl define(`',`r30')
+dnl define(`',`r31')
+
+define(`fp0b_0',`f8') define(`fp0b_1',`f9')
+define(`fp0b_2',`f10') define(`fp0b_3',`f11')
+
+define(`fp1a_0',`f12') define(`fp1a_1',`f13')
+define(`fp1a_2',`f14') define(`fp1a_3',`f15')
+
+define(`fp1b_0',`f32') define(`fp1b_1',`f33')
+define(`fp1b_2',`f34') define(`fp1b_3',`f35')
+
+define(`fp2a_0',`f36') define(`fp2a_1',`f37')
+define(`fp2a_2',`f38') define(`fp2a_3',`f39')
+
+define(`u_0',`f44') define(`u_1',`f45')
+define(`u_2',`f46') define(`u_3',`f47')
+
+define(`ux',`f49')
+define(`uy',`f51')
+
+ASM_START()
+PROLOGUE(mpn_mul_2)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C                       M I
+       addp4           up = 0, up              C                       M I
+       addp4           vp = 0, vp              C                       M I
+       zxt4            n = n                   C                       I
+       ;;')
+
+{.mmi          C 00
+       ldf8            ux = [up], 8            C                       M
+       ldf8            v0 = [vp], 8            C                       M
+       mov.i           r2 = ar.lc              C                       I0
+}{.mmi
+       nop             0                       C                       M
+       and             r14 = 3, n              C                       M I
+       add             n = -2, n               C                       M I
+       ;;
+}{.mmi         C 01
+       ldf8            uy = [up], 8            C                       M
+       ldf8            v1 = [vp]               C                       M
+       shr.u           n = n, 2                C                       I
+}{.mmi
+       nop             0                       C                       M
+       cmp.eq          p10, p0 = 1, r14        C                       M I
+       cmp.eq          p11, p0 = 2, r14        C                       M I
+       ;;
+}{.mmi         C 02
+       nop             0                       C                       M
+       cmp.eq          p12, p0 = 3, r14        C                       M I
+       mov.i           ar.lc = n               C                       I0
+}{.bbb
+  (p10) br.dptk                .Lb01                   C                       B
+  (p11) br.dptk                .Lb10                   C                       B
+  (p12) br.dptk                .Lb11                   C                       B
+       ;;
+}
+
+       ALIGN(32)
+.Lb00: ldf8            u_1 = [up], 8
+       mov             acc1_2 = 0
+       mov             pr1_2 = 0
+       mov             pr0_3 = 0
+       cmp.ne          p8, p9 = r0, r0
+       ;;
+       xma.l           fp0b_3 = ux, v0, f0
+       cmp.ne          p12, p13 = r0, r0
+       ldf8            u_2 = [up], 8
+       xma.hu          fp1a_3 = ux, v0, f0
+       br.cloop.dptk   .grt4
+
+       xma.l           fp0b_0 = uy, v0, f0
+       xma.hu          fp1a_0 = uy, v0, f0
+       ;;
+       getf.sig        acc0 = fp0b_3
+       xma.l           fp1b_3 = ux, v1, fp1a_3
+       xma.hu          fp2a_3 = ux, v1, fp1a_3
+       ;;
+       xma.l           fp0b_1 = u_1, v0, f0
+       xma.hu          fp1a_1 = u_1, v0, f0
+       ;;
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = uy, v1, fp1a_0
+       xma.hu          fp2a_0 = uy, v1, fp1a_0
+       ;;
+       getf.sig        pr1_3 = fp1b_3
+       getf.sig        acc1_3 = fp2a_3
+       xma.l           fp0b_2 = u_2, v0, f0
+       xma.hu          fp1a_2 = u_2, v0, f0
+       br              .Lcj4
+
+.grt4: xma.l           fp0b_0 = uy, v0, f0
+       xma.hu          fp1a_0 = uy, v0, f0
+       ;;
+       getf.sig        acc0 = fp0b_3
+       xma.l           fp1b_3 = ux, v1, fp1a_3
+       ldf8            u_3 = [up], 8
+       xma.hu          fp2a_3 = ux, v1, fp1a_3
+       ;;
+       xma.l           fp0b_1 = u_1, v0, f0
+       xma.hu          fp1a_1 = u_1, v0, f0
+       ;;
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = uy, v1, fp1a_0
+       xma.hu          fp2a_0 = uy, v1, fp1a_0
+       ;;
+       ldf8            u_0 = [up], 8
+       getf.sig        pr1_3 = fp1b_3
+       ;;
+       getf.sig        acc1_3 = fp2a_3
+       xma.l           fp0b_2 = u_2, v0, f0
+       xma.hu          fp1a_2 = u_2, v0, f0
+       br              .LL00
+
+
+       ALIGN(32)
+.Lb01: ldf8            u_0 = [up], 8           C M
+       mov             acc1_1 = 0              C M I
+       mov             pr1_1 = 0               C M I
+       mov             pr0_2 = 0               C M I
+       cmp.ne          p6, p7 = r0, r0         C M I
+       ;;
+       xma.l           fp0b_2 = ux, v0, f0     C F
+       cmp.ne          p10, p11 = r0, r0       C M I
+       ldf8            u_1 = [up], 8           C M
+       xma.hu          fp1a_2 = ux, v0, f0     C F
+       ;;
+       xma.l           fp0b_3 = uy, v0, f0     C F
+       xma.hu          fp1a_3 = uy, v0, f0     C F
+       ;;
+       getf.sig        acc0 = fp0b_2           C M
+       xma.l           fp1b_2 = ux, v1,fp1a_2  C F
+       xma.hu          fp2a_2 = ux, v1,fp1a_2  C F
+       ldf8            u_2 = [up], 8           C M
+       br.cloop.dptk   .grt5
+
+       xma.l           fp0b_0 = u_0, v0, f0    C F
+       xma.hu          fp1a_0 = u_0, v0, f0    C F
+       ;;
+       getf.sig        pr0_3 = fp0b_3          C M
+       xma.l           fp1b_3 = uy, v1,fp1a_3  C F
+       xma.hu          fp2a_3 = uy, v1,fp1a_3  C F
+       ;;
+       getf.sig        pr1_2 = fp1b_2          C M
+       getf.sig        acc1_2 = fp2a_2         C M
+       xma.l           fp0b_1 = u_1, v0, f0    C F
+       xma.hu          fp1a_1 = u_1, v0, f0    C F
+       br              .Lcj5
+
+.grt5: xma.l           fp0b_0 = u_0, v0, f0
+       xma.hu          fp1a_0 = u_0, v0, f0
+       ;;
+       getf.sig        pr0_3 = fp0b_3
+       xma.l           fp1b_3 = uy, v1, fp1a_3
+       xma.hu          fp2a_3 = uy, v1, fp1a_3
+       ;;
+       ldf8            u_3 = [up], 8
+       getf.sig        pr1_2 = fp1b_2
+       ;;
+       getf.sig        acc1_2 = fp2a_2
+       xma.l           fp0b_1 = u_1, v0, f0
+       xma.hu          fp1a_1 = u_1, v0, f0
+       br              .LL01
+
+
+C We have two variants for n = 2.  They turn out to run at exactly the same
+C speed.  But the first, odd variant might allow one cycle to be trimmed.
+       ALIGN(32)
+ifdef(`',`
+.Lb10:         C 03
+       br.cloop.dptk   .grt2
+               C 04
+               C 05
+               C 06
+       xma.l           fp0b_1 = ux, v0, f0     C 0
+       xma.hu          fp1a_1 = ux, v0, f0     C 1
+       ;;      C 07
+       xma.l           fp0b_2 = uy, v0, f0     C 1
+       xma.l           fp1b_1 = ux, v1, f0     C 1
+       ;;      C 08
+       xma.hu          fp1a_2 = uy, v0, f0     C 2
+       xma.hu          fp2a_1 = ux, v1, f0     C 2
+       ;;      C 09
+       xma.l           fp1b_2 = uy, v1, f0     C 2
+       xma.hu          fp2a_2 = uy, v1, f0     C 3
+       ;;      C 10
+       getf.sig        r16 = fp1a_1
+       stf8            [rp] = fp0b_1, 8
+       ;;      C 11
+       getf.sig        r17 = fp0b_2
+               C 12
+       getf.sig        r18 = fp1b_1
+               C 13
+       getf.sig        r19 = fp1a_2
+               C 14
+       getf.sig        r20 = fp2a_1
+               C 15
+       getf.sig        r21 = fp1b_2
+       ;;      C 16
+       getf.sig        r8 = fp2a_2
+       add             r24 = r16, r17
+       ;;      C 17
+       cmp.ltu         p6, p7 = r24, r16
+       add             r26 = r24, r18
+       ;;      C 18
+       cmp.ltu         p8, p9 = r26, r24
+       ;;      C 19
+       st8             [rp] = r26, 8
+  (p6) add             r25 = r19, r20, 1
+  (p7) add             r25 = r19, r20
+       ;;      C 20
+  (p8) add             r27 = r25, r21, 1
+  (p9) add             r27 = r25, r21
+  (p6) cmp.leu         p10, p0 = r25, r19
+  (p7) cmp.ltu         p10, p0 = r25, r19
+       ;;      C 21
+  (p10)        add             r8 = 1, r8
+  (p8) cmp.leu         p12, p0 = r27, r25
+  (p9) cmp.ltu         p12, p0 = r27, r25
+       ;;      C 22
+       st8             [rp] = r27, 8
+       mov.i           ar.lc = r2
+  (p12)        add             r8 = 1, r8
+       br.ret.sptk.many b0
+')
+
+.Lb10:         C 03
+       br.cloop.dptk   .grt2
+               C 04
+               C 05
+               C 06
+       xma.l           fp0b_1 = ux, v0, f0
+       xma.hu          fp1a_1 = ux, v0, f0
+       ;;      C 07
+       xma.l           fp0b_2 = uy, v0, f0
+       xma.hu          fp1a_2 = uy, v0, f0
+       ;;      C 08
+               C 09
+               C 10
+       stf8            [rp] = fp0b_1, 8
+       xma.l           fp1b_1 = ux, v1, fp1a_1
+       xma.hu          fp2a_1 = ux, v1, fp1a_1
+       ;;      C 11
+       getf.sig        acc0 = fp0b_2
+       xma.l           fp1b_2 = uy, v1, fp1a_2
+       xma.hu          fp2a_2 = uy, v1, fp1a_2
+       ;;      C 12
+               C 13
+               C 14
+       getf.sig        pr1_1 = fp1b_1
+               C 15
+       getf.sig        acc1_1 = fp2a_1
+               C 16
+       getf.sig        pr1_2 = fp1b_2
+               C 17
+       getf.sig        r8 = fp2a_2
+       ;;      C 18
+               C 19
+       add             s0 = pr1_1, acc0
+       ;;      C 20
+       st8             [rp] = s0, 8
+       cmp.ltu         p8, p9 = s0, pr1_1
+       sub             r31 = -1, acc1_1
+       ;;      C 21
+       .pred.rel "mutex", p8, p9
+  (p8) add             acc0 = pr1_2, acc1_1, 1
+  (p9) add             acc0 = pr1_2, acc1_1
+  (p8) cmp.leu         p10, p0 = r31, pr1_2
+  (p9) cmp.ltu         p10, p0 = r31, pr1_2
+       ;;      C 22
+       st8             [rp] = acc0, 8
+       mov.i           ar.lc = r2
+  (p10)        add             r8 = 1, r8
+       br.ret.sptk.many b0
+
+
+.grt2: ldf8            u_3 = [up], 8
+       mov             acc1_0 = 0
+       mov             pr1_0 = 0
+       ;;
+       mov             pr0_1 = 0
+       xma.l           fp0b_1 = ux, v0, f0
+       ldf8            u_0 = [up], 8
+       xma.hu          fp1a_1 = ux, v0, f0
+       ;;
+       xma.l           fp0b_2 = uy, v0, f0
+       xma.hu          fp1a_2 = uy, v0, f0
+       ;;
+       getf.sig        acc0 = fp0b_1
+       xma.l           fp1b_1 = ux, v1, fp1a_1
+       xma.hu          fp2a_1 = ux, v1, fp1a_1
+       ;;
+       ldf8            u_1 = [up], 8
+       xma.l           fp0b_3 = u_3, v0, f0
+       xma.hu          fp1a_3 = u_3, v0, f0
+       ;;
+       getf.sig        pr0_2 = fp0b_2
+       xma.l           fp1b_2 = uy, v1, fp1a_2
+       xma.hu          fp2a_2 = uy, v1, fp1a_2
+       ;;
+       ldf8            u_2 = [up], 8
+       getf.sig        pr1_1 = fp1b_1
+       ;;
+       getf.sig        acc1_1 = fp2a_1
+       xma.l           fp0b_0 = u_0, v0, f0
+       cmp.ne          p8, p9 = r0, r0
+       cmp.ne          p12, p13 = r0, r0
+       xma.hu          fp1a_0 = u_0, v0, f0
+       br              .LL10
+
+
+       ALIGN(32)
+.Lb11: mov             acc1_3 = 0
+       mov             pr1_3 = 0
+       mov             pr0_0 = 0
+       cmp.ne          p6, p7 = r0, r0
+       ;;
+       ldf8            u_2 = [up], 8
+       br.cloop.dptk   .grt3
+       ;;
+       xma.l           fp0b_0 = ux, v0, f0
+       xma.hu          fp1a_0 = ux, v0, f0
+       ;;
+       cmp.ne          p10, p11 = r0, r0
+       xma.l           fp0b_1 = uy, v0, f0
+       xma.hu          fp1a_1 = uy, v0, f0
+       ;;
+       getf.sig        acc0 = fp0b_0
+       xma.l           fp1b_0 = ux, v1, fp1a_0
+       xma.hu          fp2a_0 = ux, v1, fp1a_0
+       ;;
+       xma.l           fp0b_2 = u_2, v0, f0
+       xma.hu          fp1a_2 = u_2, v0, f0
+       ;;
+       getf.sig        pr0_1 = fp0b_1
+       xma.l           fp1b_1 = uy, v1, fp1a_1
+       xma.hu          fp2a_1 = uy, v1, fp1a_1
+       ;;
+       getf.sig        pr1_0 = fp1b_0
+       getf.sig        acc1_0 = fp2a_0
+       br              .Lcj3
+
+.grt3: xma.l           fp0b_0 = ux, v0, f0
+       cmp.ne          p10, p11 = r0, r0
+       ldf8            u_3 = [up], 8
+       xma.hu          fp1a_0 = ux, v0, f0
+       ;;
+       xma.l           fp0b_1 = uy, v0, f0
+       xma.hu          fp1a_1 = uy, v0, f0
+       ;;
+       getf.sig        acc0 = fp0b_0
+       xma.l           fp1b_0 = ux, v1, fp1a_0
+       ldf8            u_0 = [up], 8
+       xma.hu          fp2a_0 = ux, v1, fp1a_0
+       ;;
+       xma.l           fp0b_2 = u_2, v0, f0
+       xma.hu          fp1a_2 = u_2, v0, f0
+       ;;
+       getf.sig        pr0_1 = fp0b_1
+       xma.l           fp1b_1 = uy, v1, fp1a_1
+       xma.hu          fp2a_1 = uy, v1, fp1a_1
+       ;;
+       ldf8            u_1 = [up], 8
+       getf.sig        pr1_0 = fp1b_0
+       ;;
+       getf.sig        acc1_0 = fp2a_0
+       xma.l           fp0b_3 = u_3, v0, f0
+       xma.hu          fp1a_3 = u_3, v0, f0
+       br              .LL11
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop:                                         C 00
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_3 = fp0b_3
+       xma.l           fp1b_3 = u_3, v1, fp1a_3
+  (p12)        add             s0 = pr1_0, acc0, 1
+  (p13)        add             s0 = pr1_0, acc0
+       xma.hu          fp2a_3 = u_3, v1, fp1a_3
+       ;;                                      C 01
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       ldf8            u_3 = [up], 8
+       getf.sig        pr1_2 = fp1b_2
+  (p8) cmp.leu         p6, p7 = acc0, pr0_1
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
+  (p12)        cmp.leu         p10, p11 = s0, pr1_0
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+       ;;                                      C 02
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_2 = fp2a_2
+       st8             [rp] = s0, 8
+       xma.l           fp0b_1 = u_1, v0, f0
+  (p6) add             acc0 = pr0_2, acc1_0, 1
+  (p7) add             acc0 = pr0_2, acc1_0
+       xma.hu          fp1a_1 = u_1, v0, f0
+       ;;                                      C 03
+.LL01:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = u_0, v1, fp1a_0
+  (p10)        add             s0 = pr1_1, acc0, 1
+  (p11)        add             s0 = pr1_1, acc0
+       xma.hu          fp2a_0 = u_0, v1, fp1a_0
+       ;;                                      C 04
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       ldf8            u_0 = [up], 8
+       getf.sig        pr1_3 = fp1b_3
+  (p6) cmp.leu         p8, p9 = acc0, pr0_2
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
+  (p10)        cmp.leu         p12, p13 = s0, pr1_1
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+       ;;                                      C 05
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_3 = fp2a_3
+       st8             [rp] = s0, 8
+       xma.l           fp0b_2 = u_2, v0, f0
+  (p8) add             acc0 = pr0_3, acc1_1, 1
+  (p9) add             acc0 = pr0_3, acc1_1
+       xma.hu          fp1a_2 = u_2, v0, f0
+       ;;                                      C 06
+.LL00:
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_1 = fp0b_1
+       xma.l           fp1b_1 = u_1, v1, fp1a_1
+  (p12)        add             s0 = pr1_2, acc0, 1
+  (p13)        add             s0 = pr1_2, acc0
+       xma.hu          fp2a_1 = u_1, v1, fp1a_1
+       ;;                                      C 07
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       ldf8            u_1 = [up], 8
+       getf.sig        pr1_0 = fp1b_0
+  (p8) cmp.leu         p6, p7 = acc0, pr0_3
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
+  (p12)        cmp.leu         p10, p11 = s0, pr1_2
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
+       ;;                                      C 08
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_0 = fp2a_0
+       st8             [rp] = s0, 8
+       xma.l           fp0b_3 = u_3, v0, f0
+  (p6) add             acc0 = pr0_0, acc1_2, 1
+  (p7) add             acc0 = pr0_0, acc1_2
+       xma.hu          fp1a_3 = u_3, v0, f0
+       ;;                                      C 09
+.LL11:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_2 = fp0b_2
+       xma.l           fp1b_2 = u_2, v1, fp1a_2
+  (p10)        add             s0 = pr1_3, acc0, 1
+  (p11)        add             s0 = pr1_3, acc0
+       xma.hu          fp2a_2 = u_2, v1, fp1a_2
+       ;;                                      C 10
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       ldf8            u_2 = [up], 8
+       getf.sig        pr1_1 = fp1b_1
+  (p6) cmp.leu         p8, p9 = acc0, pr0_0
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
+  (p10)        cmp.leu         p12, p13 = s0, pr1_3
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
+       ;;                                      C 11
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_1 = fp2a_1
+       st8             [rp] = s0, 8
+       xma.l           fp0b_0 = u_0, v0, f0
+  (p8) add             acc0 = pr0_1, acc1_3, 1
+  (p9) add             acc0 = pr0_1, acc1_3
+       xma.hu          fp1a_0 = u_0, v0, f0
+.LL10: br.cloop.dptk   .Loop                   C 12
+       ;;
+C *** MAIN LOOP END ***
+
+.Lcj6:
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_3 = fp0b_3
+       xma.l           fp1b_3 = u_3, v1, fp1a_3
+  (p12)        add             s0 = pr1_0, acc0, 1
+  (p13)        add             s0 = pr1_0, acc0
+       xma.hu          fp2a_3 = u_3, v1, fp1a_3
+       ;;
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr1_2 = fp1b_2
+  (p8) cmp.leu         p6, p7 = acc0, pr0_1
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
+  (p12)        cmp.leu         p10, p11 = s0, pr1_0
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+       ;;
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_2 = fp2a_2
+       st8             [rp] = s0, 8
+       xma.l           fp0b_1 = u_1, v0, f0
+  (p6) add             acc0 = pr0_2, acc1_0, 1
+  (p7) add             acc0 = pr0_2, acc1_0
+       xma.hu          fp1a_1 = u_1, v0, f0
+       ;;
+.Lcj5:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_0 = fp0b_0
+       xma.l           fp1b_0 = u_0, v1, fp1a_0
+  (p10)        add             s0 = pr1_1, acc0, 1
+  (p11)        add             s0 = pr1_1, acc0
+       xma.hu          fp2a_0 = u_0, v1, fp1a_0
+       ;;
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr1_3 = fp1b_3
+  (p6) cmp.leu         p8, p9 = acc0, pr0_2
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
+  (p10)        cmp.leu         p12, p13 = s0, pr1_1
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+       ;;
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_3 = fp2a_3
+       st8             [rp] = s0, 8
+       xma.l           fp0b_2 = u_2, v0, f0
+  (p8) add             acc0 = pr0_3, acc1_1, 1
+  (p9) add             acc0 = pr0_3, acc1_1
+       xma.hu          fp1a_2 = u_2, v0, f0
+       ;;
+.Lcj4:
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr0_1 = fp0b_1
+       xma.l           fp1b_1 = u_1, v1, fp1a_1
+  (p12)        add             s0 = pr1_2, acc0, 1
+  (p13)        add             s0 = pr1_2, acc0
+       xma.hu          fp2a_1 = u_1, v1, fp1a_1
+       ;;
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr1_0 = fp1b_0
+  (p8) cmp.leu         p6, p7 = acc0, pr0_3
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
+  (p12)        cmp.leu         p10, p11 = s0, pr1_2
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
+       ;;
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_0 = fp2a_0
+       st8             [rp] = s0, 8
+  (p6) add             acc0 = pr0_0, acc1_2, 1
+  (p7) add             acc0 = pr0_0, acc1_2
+       ;;
+.Lcj3:
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr0_2 = fp0b_2
+       xma.l           fp1b_2 = u_2, v1, fp1a_2
+  (p10)        add             s0 = pr1_3, acc0, 1
+  (p11)        add             s0 = pr1_3, acc0
+       xma.hu          fp2a_2 = u_2, v1, fp1a_2
+       ;;
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+       getf.sig        pr1_1 = fp1b_1
+  (p6) cmp.leu         p8, p9 = acc0, pr0_0
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
+  (p10)        cmp.leu         p12, p13 = s0, pr1_3
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
+       ;;
+       .pred.rel "mutex", p8, p9
+       getf.sig        acc1_1 = fp2a_1
+       st8             [rp] = s0, 8
+  (p8) add             acc0 = pr0_1, acc1_3, 1
+  (p9) add             acc0 = pr0_1, acc1_3
+       ;;
+       .pred.rel "mutex", p12, p13
+  (p12)        add             s0 = pr1_0, acc0, 1
+  (p13)        add             s0 = pr1_0, acc0
+       ;;
+       .pred.rel "mutex", p8, p9
+       .pred.rel "mutex", p12, p13
+       getf.sig        pr1_2 = fp1b_2
+  (p8) cmp.leu         p6, p7 = acc0, pr0_1
+  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
+  (p12)        cmp.leu         p10, p11 = s0, pr1_0
+  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+       ;;
+       .pred.rel "mutex", p6, p7
+       getf.sig        acc1_2 = fp2a_2
+       st8             [rp] = s0, 8
+  (p6) add             acc0 = pr0_2, acc1_0, 1
+  (p7) add             acc0 = pr0_2, acc1_0
+       ;;
+       .pred.rel "mutex", p10, p11
+  (p10)        add             s0 = pr1_1, acc0, 1
+  (p11)        add             s0 = pr1_1, acc0
+       ;;
+       .pred.rel "mutex", p6, p7
+       .pred.rel "mutex", p10, p11
+  (p6) cmp.leu         p8, p9 = acc0, pr0_2
+  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
+  (p10)        cmp.leu         p12, p13 = s0, pr1_1
+  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+       ;;
+       .pred.rel "mutex", p8, p9
+       st8             [rp] = s0, 8
+  (p8) add             acc0 = pr1_2, acc1_1, 1
+  (p9) add             acc0 = pr1_2, acc1_1
+       ;;
+       .pred.rel "mutex", p8, p9
+  (p8) cmp.leu         p10, p11 = acc0, pr1_2
+  (p9) cmp.ltu         p10, p11 = acc0, pr1_2
+  (p12)        add             acc0 = 1, acc0
+       ;;
+       st8             [rp] = acc0, 8
+  (p12)        cmp.eq.or       p10, p0 = 0, acc0
+       mov             r8 = acc1_2
+       ;;
+       .pred.rel "mutex", p10, p11
+  (p10)        add             r8 = 1, r8
+       mov.i           ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/popcount.asm b/mpn/ia64/popcount.asm

new file mode 100644 (file)

index 0000000..a02bf43
--- /dev/null
+++ b/mpn/ia64/popcount.asm
@@ -0,0 +1,187 @@
+dnl  IA-64 mpn_popcount -- mpn population count.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:       1.5
+C Itanium 2:     1
+
+C INPUT PARAMETERS
+define(`up', `r32')
+define(`n', `r33')
+
+define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
+define(`c0',`r28') define(`c1',`r29') define(`c2',`r30') define(`c3',`r31')
+define(`s',`r8')
+
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+       .prologue
+ifdef(`HAVE_ABI_32',
+`      addp4           up = 0, up              C                       M I
+       zxt4            n = n                   C                       I
+       ;;
+')
+
+ {.mmi;        add             r9 = 512, up            C prefetch pointer      M I
+       ld8             r10 = [up], 8           C load first limb       M01
+       mov.i           r2 = ar.lc              C save ar.lc            I0
+}{.mmi;        and             r14 = 3, n              C                       M I
+       cmp.lt          p15, p14 = 4, n         C small count?          M I
+       add             n = -5, n               C                       M I
+       ;;
+}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       cmp.eq          p8, p0 = 3, r14         C                       M I
+}{.bbb
+  (p6) br.dptk         .Lb01                   C                       B
+  (p7) br.dptk         .Lb10                   C                       B
+  (p8) br.dptk         .Lb11                   C                       B
+}
+
+
+.Lb00: ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       mov             s = 0                   C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       popcnt          c0 = r10                C                       I0
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       popcnt          c1 = u1                 C                       I0
+  (p15)        br.cond.dptk    .grt4                   C                       B
+       ;;
+       nop.m   0                               C                       -
+       nop.m   0                               C                       -
+       popcnt          c2 = u2                 C                       I0
+       ;;
+       mov             s = c0                  C                       M I
+       popcnt          c3 = u3                 C                       I0
+       br              .Lcj4                   C                       B
+
+.grt4: ld8             u0 = [up], 8            C                       M01
+       popcnt          c2 = u2                 C                       I0
+       br              .LL00                   C                       B
+
+
+.Lb01:
+       popcnt          s = r10                 C                       I0
+  (p14)        br.ret.sptk.many b0                     C                       B
+
+.grt1: ld8             u0 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       popcnt          c0 = u0                 C                       I0
+       mov             c3 = 0                  C                       I0
+
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       popcnt          c1 = u1                 C                       I0
+       br.cloop.dptk   .Loop                   C                       B
+       br              .Lend                   C                       B
+
+
+.Lb10: ld8             u3 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+  (p15)        br.cond.dptk    .grt2                   C                       B
+
+       popcnt          s = r10                 C                       I0
+       ;;
+       popcnt          c3 = u3                 C                       I0
+       br              .Lcj2                   C                       B
+
+.grt2: ld8             u0 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       popcnt          c2 = r10                C                       I0
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       popcnt          c3 = u3                 C                       I0
+       mov             s = 0                   C                       M I
+       ;;
+       ld8             u2 = [up], 8            C                       M01
+       popcnt          c0 = u0                 C                       I0
+       br              .LL10                   C                       B
+
+
+.Lb11: ld8             u2 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       mov             s = 0                   C                       M I
+       ;;
+       ld8             u3 = [up], 8            C                       M01
+       popcnt          s = r10                 C                       I0
+  (p15)        br.cond.dptk    .grt3                   C                       B
+
+       popcnt          c2 = u2                 C                       I0
+       ;;
+       popcnt          c3 = u3                 C                       I0
+       br              .Lcj3                   C                       B
+
+.grt3: ld8             u0 = [up], 8            C                       M01
+       popcnt          c2 = u2                 C                       I0
+       mov.i           ar.lc = n               C                       I0
+       mov             c1 = 0
+       ;;
+       ld8             u1 = [up], 8            C                       M01
+       popcnt          c3 = u3                 C                       I0
+       br              .LL11                   C                       B
+
+
+.Loop: ld8             u0 = [up], 8            C                       M01
+       popcnt          c2 = u2                 C                       I0
+       add             s = s, c3               C                       M I
+       ;;
+.LL00: ld8             u1 = [up], 8            C                       M01
+       popcnt          c3 = u3                 C                       I0
+       add             s = s, c0               C                       M I
+       ;;
+.LL11: ld8             u2 = [up], 8            C                       M01
+       popcnt          c0 = u0                 C                       I0
+       add             s = s, c1               C                       M I
+       ;;
+.LL10: ld8             u3 = [up], 8            C                       M01
+       popcnt          c1 = u1                 C                       I0
+       add             s = s, c2               C                       M I
+       lfetch          [r9], 32                C                       M01
+       nop.m           0                       C                       -
+       br.cloop.dptk   .Loop                   C                       B
+       ;;
+
+.Lend: popcnt          c2 = u2                 C                       I0
+       add             s = s, c3               C                       M I
+       ;;
+       popcnt          c3 = u3                 C                       I0
+       add             s = s, c0               C                       M I
+       ;;
+.Lcj4: add             s = s, c1               C                       M I
+       ;;
+.Lcj3: add             s = s, c2               C                       M I
+       ;;
+.Lcj2: add             s = s, c3               C                       M I
+       mov.i           ar.lc = r2              C                       I0
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/rsh1aors_n.asm b/mpn/ia64/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..366b5c5
--- /dev/null
+++ b/mpn/ia64/rsh1aors_n.asm
@@ -0,0 +1,432 @@
+dnl  IA-64 mpn_rsh1add_n/mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    2.5
+C Itanium 2:  1.5
+
+C TODO
+C  * Rewrite function entry code using aorslsh1_n.asm style.
+C  * Micro-optimize feed-in and wind-down code.
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`vp',`r34')
+define(`n',`r35')
+
+ifdef(`OPERATION_rsh1add_n',`
+  define(ADDSUB,       add)
+  define(PRED,        ltu)
+  define(INCR,        1)
+  define(LIM,         -1)
+  define(func, mpn_rsh1add_n)
+')
+ifdef(`OPERATION_rsh1sub_n',`
+  define(ADDSUB,       sub)
+  define(PRED,        gtu)
+  define(INCR,        -1)
+  define(LIM,         0)
+  define(func, mpn_rsh1sub_n)
+')
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
+define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
+define(`x0',`r26') define(`x1',`r9') define(`x2',`r30') define(`x3',`r31')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4           rp = 0, rp              C                       M I
+       addp4           up = 0, up              C                       M I
+       addp4           vp = 0, vp              C                       M I
+       zxt4            n = n                   C                       I
+       ;;
+')
+ {.mmi;        ld8             r11 = [vp], 8           C                       M01
+       ld8             r10 = [up], 8           C                       M01
+       mov.i           r2 = ar.lc              C                       I0
+}{.mmi;        and             r14 = 3, n              C                       M I
+       cmp.lt          p15, p0 = 4, n          C                       M I
+       add             n = -4, n               C                       M I
+       ;;
+}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
+       cmp.eq          p7, p0 = 2, r14         C                       M I
+       cmp.eq          p8, p0 = 3, r14         C                       M I
+}{.bbb
+  (p6) br.dptk         .Lb01                   C                       B
+  (p7) br.dptk         .Lb10                   C                       B
+  (p8) br.dptk         .Lb11                   C                       B
+}
+
+.Lb00: ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       ADDSUB          w3 = r10, r11           C                       M I
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+  (p15)        br.dpnt         .grt4                   C                       B
+       ;;
+
+       cmp.PRED        p7, p0 = w3, r10        C                       M I
+       and             r8 = 1, w3              C                       M I
+       ADDSUB          w0 = u0, v0             C                       M I
+       ;;
+       cmp.PRED        p8, p0 = w0, u0         C                       M I
+       ADDSUB          w1 = u1, v1             C                       M I
+       ;;
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w0        C                       M I
+   (p7)        add             w0 = INCR, w0           C                       M I
+       ;;
+       shrp            x3 = w0, w3, 1          C                       I0
+       ADDSUB          w2 = u2, v2             C                       M I
+   (p8)        cmp.eq.or       p9, p0 = LIM, w1        C                       M I
+   (p8)        add             w1 = INCR, w1           C                       M I
+       br              .Lcj4                   C                       B
+
+.grt4: ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w3, r10        C                       M I
+       ld8             u3 = [up], 8            C                       M01
+       and             r8 = 1, w3              C                       M I
+       ;;
+       ADDSUB          w0 = u0, v0             C                       M I
+       ld8             v0 = [vp], 8            C                       M01
+       add             n = -1, n
+       ;;
+       cmp.PRED        p8, p0 = w0, u0         C                       M I
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, v1             C                       M I
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+       ld8             u1 = [up], 8            C                       M01
+   (p7)        cmp.eq.or       p8, p0 = LIM, w0        C                       M I
+   (p7)        add             w0 = INCR, w0           C                       M I
+       ;;
+       ADDSUB          w2 = u2, v2             C                       M I
+       ld8             v2 = [vp], 8            C                       M01
+       shrp            x3 = w0, w3, 1          C                       I0
+   (p8)        cmp.eq.or       p9, p0 = LIM, w1        C                       M I
+   (p8)        add             w1 = INCR, w1           C                       M I
+       br              .LL00                   C                       B
+
+
+.Lb01: ADDSUB          w2 = r10, r11           C                       M I
+       shr.u           n = n, 2                C                       I0
+  (p15)        br.dpnt         .grt1                   C                       B
+       ;;
+
+       cmp.PRED        p6, p7 = w2, r10        C                       M I
+       shr.u           x2 = w2, 1              C                       I0
+       and             r8 = 1, w2              C                       M I
+       ;;
+   (p6)        dep             x2 = -1, x2, 63, 1      C                       I0
+       br              .Lcj1                   C                       B
+
+.grt1: ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C FIXME swap with next  I0
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       cmp.PRED        p6, p0 = w2, r10        C                       M I
+       and             r8 = 1, w2              C                       M I
+       ADDSUB          w3 = u3, v3             C                       M I
+       br.cloop.dptk   .grt5                   C                       B
+       ;;
+
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+       ;;
+       ADDSUB          w0 = u0, v0             C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       ;;
+       cmp.PRED        p8, p0 = w0, u0         C                       M I
+       shrp            x2 = w3, w2, 1          C                       I0
+       ADDSUB          w1 = u1, v1             C                       M I
+       ;;
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w0        C                       M I
+   (p7)        add             w0 = INCR, w0           C                       M I
+       br              .Lcj5                   C                       B
+
+.grt5: ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+       ld8             u3 = [up], 8            C                       M01
+       ;;
+       ADDSUB          w0 = u0, v0             C                       M I
+       ld8             v0 = [vp], 8            C                       M01
+   (p6)        cmp.eq.or       p7, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       ;;
+       cmp.PRED        p8, p0 = w0, u0         C                       M I
+       shrp            x2 = w3, w2, 1          C                       I0
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, v1             C                       M I
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+       ld8             u1 = [up], 8            C                       M01
+   (p7)        cmp.eq.or       p8, p0 = LIM, w0        C                       M I
+   (p7)        add             w0 = INCR, w0           C                       M I
+       br              .LL01                   C                       B
+
+
+.Lb10: ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       ADDSUB          w1 = r10, r11           C                       M I
+  (p15)        br.dpnt         .grt2                   C                       B
+       ;;
+
+       cmp.PRED        p9, p0 = w1, r10        C                       M I
+       and             r8 = 1, w1              C                       M I
+       ADDSUB          w2 = u2, v2             C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       ;;
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       ;;
+       shrp            x1 = w2, w1, 1          C                       I0
+       shr.u           x2 = w2, 1              C                       I0
+       br              .Lcj2                   C                       B
+
+.grt2: ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       ld8             u0 = [up], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p9, p0 = w1, r10        C                       M I
+       ld8             u1 = [up], 8            C                       M01
+       and             r8 = 1, w1              C                       M I
+       ;;
+       ADDSUB          w2 = u2, v2             C                       M I
+       ld8             v2 = [vp], 8            C                       M01
+       ;;
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, v3             C                       M I
+       br.cloop.dptk   .grt6                   C                       B
+       ;;
+
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       ;;
+       shrp            x1 = w2, w1, 1          C                       I0
+       ADDSUB          w0 = u0, v0             C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       br              .Lcj6                   C                       B
+
+.grt6: ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+       ld8             u3 = [up], 8            C                       M01
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       ;;
+       shrp            x1 = w2, w1, 1          C                       I0
+       ADDSUB          w0 = u0, v0             C                       M I
+       ld8             v0 = [vp], 8            C                       M01
+   (p6)        cmp.eq.or       p7, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       br              .LL10                   C                       B
+
+
+.Lb11: ld8             v1 = [vp], 8            C                       M01
+       ld8             u1 = [up], 8            C                       M01
+       shr.u           n = n, 2                C                       I0
+       ;;
+       ld8             v2 = [vp], 8            C                       M01
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w0 = r10, r11           C                       M I
+  (p15)        br.dpnt         .grt3                   C                       B
+       ;;
+
+       cmp.PRED        p8, p0 = w0, r10        C                       M I
+       ADDSUB          w1 = u1, v1             C                       M I
+       and             r8 = 1, w0              C                       M I
+       ;;
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+       ;;
+       ADDSUB          w2 = u2, v2             C                       M I
+   (p8)        cmp.eq.or       p9, p0 = LIM, w1        C                       M I
+   (p8)        add             w1 = INCR, w1           C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       shrp            x0 = w1, w0, 1          C                       I0
+       ;;
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       br              .Lcj3                   C                       B
+
+.grt3: ld8             v3 = [vp], 8            C                       M01
+       ld8             u3 = [up], 8            C                       M01
+       ;;
+       ld8             v0 = [vp], 8            C                       M01
+       mov.i           ar.lc = n               C                       I0
+       cmp.PRED        p8, p0 = w0, r10        C                       M I
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, v1             C                       M I
+       and             r8 = 1, w0              C                       M I
+       ;;
+       ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+       ld8             u1 = [up], 8            C                       M01
+       ;;
+       ADDSUB          w2 = u2, v2             C                       M I
+       ld8             v2 = [vp], 8            C                       M01
+   (p8)        cmp.eq.or       p9, p0 = LIM, w1        C                       M I
+   (p8)        add             w1 = INCR, w1           C                       M I
+       ;;
+       cmp.PRED        p6, p0 = w2, u2         C                       M I
+       shrp            x0 = w1, w0, 1          C                       I0
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, v3             C                       M I
+       br.cloop.dptk   .grt7                   C                       B
+       ;;
+
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       br              .Lcj7                   C                       B
+
+.grt7: ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+       ld8             u3 = [up], 8            C                       M01
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       br              .LL11                   C                       B
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+.Loop: st8             [rp] = x3, 8            C                       M23
+       ld8             v3 = [vp], 8            C                       M01
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+       ld8             u3 = [up], 8            C                       M01
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       ;;
+.LL11: st8             [rp] = x0, 8            C                       M23
+       shrp            x1 = w2, w1, 1          C                       I0
+       ADDSUB          w0 = u0, v0             C                       M I
+       ld8             v0 = [vp], 8            C                       M01
+   (p6)        cmp.eq.or       p7, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       ;;
+.LL10: cmp.PRED        p8, p0 = w0, u0         C                       M I
+       shrp            x2 = w3, w2, 1          C                       I0
+       nop.b           0
+       ld8             u0 = [up], 8            C                       M01
+       ADDSUB          w1 = u1, v1             C                       M I
+       nop.b           0
+       ;;
+       st8             [rp] = x1, 8            C                       M23
+       ld8             v1 = [vp], 8            C                       M01
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+       ld8             u1 = [up], 8            C                       M01
+   (p7)        cmp.eq.or       p8, p0 = LIM, w0        C                       M I
+   (p7)        add             w0 = INCR, w0           C                       M I
+       ;;
+.LL01: st8             [rp] = x2, 8            C                       M23
+       shrp            x3 = w0, w3, 1          C                       I0
+       ADDSUB          w2 = u2, v2             C                       M I
+       ld8             v2 = [vp], 8            C                       M01
+   (p8)        cmp.eq.or       p9, p0 = LIM, w1        C                       M I
+   (p8)        add             w1 = INCR, w1           C                       M I
+       ;;
+.LL00: cmp.PRED        p6, p0 = w2, u2         C                       M I
+       shrp            x0 = w1, w0, 1          C                       I0
+       nop.b           0
+       ld8             u2 = [up], 8            C                       M01
+       ADDSUB          w3 = u3, v3             C                       M I
+       br.cloop.dptk   .Loop                   C                       B
+       ;;
+C *** MAIN LOOP END ***
+
+.Lskip:        st8             [rp] = x3, 8            C                       M23
+       cmp.PRED        p7, p0 = w3, u3         C                       M I
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       ;;
+.Lcj7: st8             [rp] = x0, 8            C                       M23
+       shrp            x1 = w2, w1, 1          C                       I0
+       ADDSUB          w0 = u0, v0             C                       M I
+   (p6)        cmp.eq.or       p7, p0 = LIM, w3        C                       M I
+   (p6)        add             w3 = INCR, w3           C                       M I
+       ;;
+.Lcj6: cmp.PRED        p8, p0 = w0, u0         C                       M I
+       shrp            x2 = w3, w2, 1          C                       I0
+       ADDSUB          w1 = u1, v1             C                       M I
+       ;;
+       st8             [rp] = x1, 8            C                       M23
+       cmp.PRED        p9, p0 = w1, u1         C                       M I
+   (p7)        cmp.eq.or       p8, p0 = LIM, w0        C                       M I
+   (p7)        add             w0 = INCR, w0           C                       M I
+       ;;
+.Lcj5: st8             [rp] = x2, 8            C                       M23
+       shrp            x3 = w0, w3, 1          C                       I0
+       ADDSUB          w2 = u2, v2             C                       M I
+   (p8)        cmp.eq.or       p9, p0 = LIM, w1        C                       M I
+   (p8)        add             w1 = INCR, w1           C                       M I
+       ;;
+.Lcj4: cmp.PRED        p6, p0 = w2, u2         C                       M I
+       shrp            x0 = w1, w0, 1          C                       I0
+       ;;
+       st8             [rp] = x3, 8            C                       M23
+   (p9)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
+   (p9)        add             w2 = INCR, w2           C                       M I
+       ;;
+.Lcj3: st8             [rp] = x0, 8            C                       M23
+       shrp            x1 = w2, w1, 1          C                       I0
+       shr.u           x2 = w2, 1              C                       I0
+       ;;
+.Lcj2: st8             [rp] = x1, 8            C                       M23
+   (p6)        dep             x2 = -1, x2, 63, 1      C                       I0
+       ;;
+.Lcj1: st8             [rp] = x2               C                       M23
+       mov.i           ar.lc = r2              C                       I0
+       br.ret.sptk.many b0                     C                       B
+EPILOGUE()
diff --git a/mpn/ia64/sqr_diagonal.asm b/mpn/ia64/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..50307d4
--- /dev/null
+++ b/mpn/ia64/sqr_diagonal.asm
@@ -0,0 +1,79 @@
+dnl  IA-64 mpn_sqr_diagonal.  Helper for sqr_basecase.
+
+dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    4
+C Itanium 2:  2
+
+C TODO
+C  * Perhaps avoid ctop loop.  Unfortunately, a cloop loop running at 1 c/l
+C    would need prohibitive 8-way unrolling.
+C  * Instead of messing too much with this, write a nifty mpn_sqr_basecase.
+
+C INPUT PARAMETERS
+C rp = r32
+C sp = r33
+C n = r34
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       .prologue
+       .save   ar.lc, r2
+       .save   pr, r15
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4   r32 = 0, r32
+       addp4   r33 = 0, r33
+       zxt4    r34 = r34
+       ;;
+')
+       ldf8            f32 = [r33], 8          C M     load rp[0] early
+       mov             r2 = ar.lc              C I0
+       mov             r14 = ar.ec             C I0
+       mov             r15 = pr                C I0
+       add             r19 = -1, r34           C M I   decr n
+       add             r18 = 8, r32            C M I   rp for high limb
+       ;;
+       mov             ar.lc = r19             C I0
+       mov             ar.ec = 5               C I0
+       mov             pr.rot = 1<<16          C I0
+       ;;
+       br.cexit.spnt   .Ldone                  C B
+       ;;
+       ALIGN(32)
+.Loop:
+  (p16)        ldf8            f32 = [r33], 8          C M
+  (p19)        xma.l           f36 = f35, f35, f0      C F
+  (p21)        stf8            [r32] = f38, 16         C M2 M3
+  (p19)        xma.hu          f40 = f35, f35, f0      C F
+  (p21)        stf8            [r18] = f42, 16         C M2 M3
+       br.ctop.dptk    .Loop                   C B
+       ;;
+.Ldone:
+       stf8            [r32] = f38             C M2 M3
+       stf8            [r18] = f42             C M2 M3
+       mov             ar.ec = r14             C I0
+       ;;
+       mov             pr = r15, 0x1ffff       C I0
+       mov             ar.lc = r2              C I0
+       br.ret.sptk.many b0                     C B
+EPILOGUE(mpn_sqr_diagonal)
+ASM_END()
diff --git a/mpn/ia64/submul_1.asm b/mpn/ia64/submul_1.asm

new file mode 100644 (file)

index 0000000..ae46e55
--- /dev/null
+++ b/mpn/ia64/submul_1.asm
@@ -0,0 +1,634 @@
+dnl  IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl  result from a second limb vector.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    4.0
+C Itanium 2:  2.25 (alignment dependent, sometimes it seems to need 3 c/l)
+
+C TODO
+C  * Optimize feed-in and wind-down code, both for speed and code size.
+C  * Handle low limb input and results specially, using a common stf8 in the
+C    epilogue.
+C  * Delay r8, r10 initialization, put cmp-p6 in 1st bundle and br .Ldone in
+C    2nd bundle.  This will allow the bbb bundle to be one cycle earlier and
+C    save a cycle.
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n',  `r34')
+define(`vl', `r35')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',
+`      addp4           rp = 0, rp              C M I
+       addp4           up = 0, up              C M I
+       zxt4            n = n                   C I
+       ;;
+')
+{.mmi
+       mov             r10 = rp                C M I
+       mov             r9 = up                 C M I
+       sub             vl = r0, vl             C M I   negate vl
+}
+{.mmi
+       ldf8            f8 = [rp], 8            C M
+       ldf8            f7 = [up], 8            C M
+       add             r19 = -1, n             C M I   n - 1
+       ;;
+}
+{.mmi
+       cmp.eq          p6, p0 = 0, vl          C M I
+       mov             r8 = 0                  C M I   zero cylimb
+       mov             r2 = ar.lc              C I0
+}
+{.mmi
+       setf.sig        f6 = vl                 C M2 M3
+       and             r14 = 3, n              C M I
+       shr.u           r19 = r19, 2            C I0
+       ;;
+}
+{.mmb
+       nop             0
+       cmp.eq          p10, p0 = 0, r14        C M I
+   (p6)        br.spnt         .Ldone                  C B     vl == 0
+}
+{.mmi
+       cmp.eq          p11, p0 = 2, r14        C M I
+       cmp.eq          p12, p0 = 3, r14        C M I
+       mov             ar.lc = r19             C I0
+}
+{.bbb
+  (p10)        br.dptk         .Lb00                   C B
+  (p11)        br.dptk         .Lb10                   C B
+  (p12)        br.dptk         .Lb11                   C B
+       ;;
+}
+
+.Lb01: br.cloop.dptk   .grt1
+
+       xma.l           f39 = f7, f6, f8
+       xma.hu          f43 = f7, f6, f8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       getf.sig        r31 = f43                       C hi
+       ld8             r20 = [r9], 8
+       br              .Lcj1
+
+.grt1: ldf8            f44 = [rp], 8
+       ldf8            f32 = [up], 8
+       ;;
+       ldf8            f45 = [rp], 8
+       ldf8            f33 = [up], 8
+       ;;
+       ldf8            f46 = [rp], 8
+       xma.l           f39 = f7, f6, f8
+       ldf8            f34 = [up], 8
+       xma.hu          f43 = f7, f6, f8
+       ;;
+       ldf8            f47 = [rp], 8
+       xma.l           f36 = f32, f6, f44
+       ldf8            f35 = [up], 8
+       xma.hu          f40 = f32, f6, f44
+       br.cloop.dptk   .grt5
+       ;;
+
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r31 = f43                       C hi
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r28 = f40                       C hi
+       getf.sig        r25 = f37                       C lo
+       xma.l           f39 = f35, f6, f47
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r29 = f41                       C hi
+       getf.sig        r26 = f38                       C lo
+       ld8             r23 = [r9], 8
+       br              .Lcj5
+
+.grt5: ldf8            f44 = [rp], 8
+       ldf8            f32 = [up], 8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f45 = [rp], 8
+       getf.sig        r31 = f43                       C hi
+       ldf8            f33 = [up], 8
+       ;;
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f46 = [rp], 8
+       getf.sig        r28 = f40                       C hi
+       ldf8            f34 = [up], 8
+       ;;
+       getf.sig        r25 = f37                       C lo
+       xma.l           f39 = f35, f6, f47
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f47 = [rp], 8
+       getf.sig        r29 = f41                       C hi
+       ldf8            f35 = [up], 8
+       ;;
+       getf.sig        r26 = f38                       C lo
+       xma.l           f36 = f32, f6, f44
+       ld8             r23 = [r9], 8
+       xma.hu          f40 = f32, f6, f44
+       br.cloop.dptk   .Loop
+       br              .Lend
+
+
+.Lb10: ldf8            f47 = [rp], 8
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt2
+
+       xma.l           f38 = f7, f6, f8
+       xma.hu          f42 = f7, f6, f8
+       ;;
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r26 = f38                       C lo
+       getf.sig        r30 = f42                       C hi
+       ld8             r23 = [r9], 8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       getf.sig        r31 = f43                       C hi
+       ld8             r20 = [r9], 8
+       br              .Lcj2
+
+.grt2: ldf8            f44 = [rp], 8
+       ldf8            f32 = [up], 8
+       ;;
+       ldf8            f45 = [rp], 8
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f7, f6, f8
+       xma.hu          f42 = f7, f6, f8
+       ;;
+       ldf8            f46 = [rp], 8
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f47 = [rp], 8
+       ldf8            f35 = [up], 8
+       ;;
+       getf.sig        r26 = f38                       C lo
+       xma.l           f36 = f32, f6, f44
+       ld8             r23 = [r9], 8
+       xma.hu          f40 = f32, f6, f44
+       br.cloop.dptk   .grt6
+
+       getf.sig        r30 = f42                       C hi
+       ;;
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r31 = f43                       C hi
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r28 = f40                       C hi
+       getf.sig        r25 = f37                       C lo
+       xma.l           f39 = f35, f6, f47
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       br              .Lcj6
+
+.grt6: ldf8            f44 = [rp], 8
+       getf.sig        r30 = f42                       C hi
+       ldf8            f32 = [up], 8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f45 = [rp], 8
+       getf.sig        r31 = f43                       C hi
+       ldf8            f33 = [up], 8
+       ;;
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f46 = [rp], 8
+       getf.sig        r28 = f40                       C hi
+       ldf8            f34 = [up], 8
+       ;;
+       getf.sig        r25 = f37                       C lo
+       xma.l           f39 = f35, f6, f47
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       br              .LL10
+
+
+.Lb11: ldf8            f46 = [rp], 8
+       ldf8            f34 = [up], 8
+       ;;
+       ldf8            f47 = [rp], 8
+       ldf8            f35 = [up], 8
+       br.cloop.dptk   .grt3
+
+       xma.l           f37 = f7, f6, f8
+       xma.hu          f41 = f7, f6, f8
+       ;;
+       xma.l           f38 = f34, f6, f46
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r25 = f37                       C lo
+       xma.l           f39 = f35, f6, f47
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r29 = f41                       C hi
+       ld8             r22 = [r9], 8
+       ;;
+       getf.sig        r26 = f38                       C lo
+       getf.sig        r30 = f42                       C hi
+       ld8             r23 = [r9], 8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       getf.sig        r31 = f43                       C hi
+       ld8             r20 = [r9], 8
+       br              .Lcj3
+
+.grt3: ldf8            f44 = [rp], 8
+       xma.l           f37 = f7, f6, f8
+       ldf8            f32 = [up], 8
+       xma.hu          f41 = f7, f6, f8
+       ;;
+       ldf8            f45 = [rp], 8
+       xma.l           f38 = f34, f6, f46
+       ldf8            f33 = [up], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f46 = [rp], 8
+       ldf8            f34 = [up], 8
+       ;;
+       getf.sig        r25 = f37                       C lo
+       xma.l           f39 = f35, f6, f47
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f47 = [rp], 8
+       getf.sig        r29 = f41                       C hi
+       ldf8            f35 = [up], 8
+       ;;
+       getf.sig        r26 = f38                       C lo
+       xma.l           f36 = f32, f6, f44
+       ld8             r23 = [r9], 8
+       xma.hu          f40 = f32, f6, f44
+       br.cloop.dptk   .grt7
+       ;;
+
+       getf.sig        r30 = f42                       C hi
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r31 = f43                       C hi
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       br              .Lcj7
+
+.grt7: ldf8            f44 = [rp], 8
+       getf.sig        r30 = f42                       C hi
+       ldf8            f32 = [up], 8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f45 = [rp], 8
+       getf.sig        r31 = f43                       C hi
+       ldf8            f33 = [up], 8
+       ;;
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       br              .LL11
+
+
+.Lb00: ldf8            f45 = [rp], 8
+       ldf8            f33 = [up], 8
+       ;;
+       ldf8            f46 = [rp], 8
+       ldf8            f34 = [up], 8
+       ;;
+       ldf8            f47 = [rp], 8
+       xma.l           f36 = f7, f6, f8
+       ldf8            f35 = [up], 8
+       xma.hu          f40 = f7, f6, f8
+       br.cloop.dptk   .grt4
+
+       xma.l           f37 = f33, f6, f45
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       getf.sig        r24 = f36                       C lo
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       getf.sig        r28 = f40                       C hi
+       xma.l           f39 = f35, f6, f47
+       getf.sig        r25 = f37                       C lo
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       getf.sig        r29 = f41                       C hi
+       getf.sig        r26 = f38                       C lo
+       ld8             r23 = [r9], 8
+       ;;
+       getf.sig        r30 = f42                       C hi
+       getf.sig        r27 = f39                       C lo
+       ld8             r20 = [r9], 8
+       br              .Lcj4
+
+.grt4: ldf8            f44 = [rp], 8
+       xma.l           f37 = f33, f6, f45
+       ldf8            f32 = [up], 8
+       xma.hu          f41 = f33, f6, f45
+       ;;
+       ldf8            f45 = [rp], 8
+       ldf8            f33 = [up], 8
+       xma.l           f38 = f34, f6, f46
+       getf.sig        r24 = f36                       C lo
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+       ;;
+       ldf8            f46 = [rp], 8
+       getf.sig        r28 = f40                       C hi
+       ldf8            f34 = [up], 8
+       xma.l           f39 = f35, f6, f47
+       getf.sig        r25 = f37                       C lo
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+       ;;
+       ldf8            f47 = [rp], 8
+       getf.sig        r29 = f41                       C hi
+       ldf8            f35 = [up], 8
+       ;;
+       getf.sig        r26 = f38                       C lo
+       xma.l           f36 = f32, f6, f44
+       ld8             r23 = [r9], 8
+       xma.hu          f40 = f32, f6, f44
+       br.cloop.dptk   .grt8
+       ;;
+
+       getf.sig        r30 = f42                       C hi
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       br              .Lcj8
+
+.grt8: ldf8            f44 = [rp], 8
+       getf.sig        r30 = f42                       C hi
+       ldf8            f32 = [up], 8
+       ;;
+       getf.sig        r27 = f39                       C lo
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+       br              .LL00
+
+       ALIGN(32)
+.Loop:
+{.mmi
+       ldf8            f44 = [rp], 8
+       cmp.ltu         p6, p0 = r27, r8        C lo cmp
+       sub             r14 = r27, r8           C lo sub
+}
+{.mmi
+       getf.sig        r30 = f42                       C hi
+       ldf8            f32 = [up], 8
+       sub             r8 = r20, r31           C hi sub
+       ;;                              C 01
+}
+{.mmf
+       getf.sig        r27 = f39                       C lo
+       st8             [r10] = r14, 8
+       xma.l           f37 = f33, f6, f45
+}
+{.mfi
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+   (p6)        add             r8 = 1, r8
+       ;;                              C 02
+}
+{.mmi
+.LL00: ldf8            f45 = [rp], 8
+       cmp.ltu         p6, p0 = r24, r8
+       sub             r14 = r24, r8
+}
+{.mmi
+       getf.sig        r31 = f43                       C hi
+       ldf8            f33 = [up], 8
+       sub             r8 = r21, r28
+       ;;                              C 03
+}
+{.mmf
+       getf.sig        r24 = f36                       C lo
+       st8             [r10] = r14, 8
+       xma.l           f38 = f34, f6, f46
+}
+{.mfi
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+   (p6)        add             r8 = 1, r8
+       ;;                              C 04
+}
+{.mmi
+.LL11: ldf8            f46 = [rp], 8
+       cmp.ltu         p6, p0 = r25, r8
+       sub             r14 = r25, r8
+}
+{.mmi
+       getf.sig        r28 = f40                       C hi
+       ldf8            f34 = [up], 8
+       sub             r8 = r22, r29
+       ;;                              C 05
+}
+{.mmf
+       getf.sig        r25 = f37                       C lo
+       st8             [r10] = r14, 8
+       xma.l           f39 = f35, f6, f47
+}
+{.mfi
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+   (p6)        add             r8 = 1, r8
+       ;;                              C 06
+}
+{.mmi
+.LL10: ldf8            f47 = [rp], 8
+       cmp.ltu         p6, p0 = r26, r8
+       sub             r14 = r26, r8
+}
+{.mmi
+       getf.sig        r29 = f41                       C hi
+       ldf8            f35 = [up], 8
+       sub             r8 = r23, r30
+       ;;                              C 07
+}
+{.mmf
+       getf.sig        r26 = f38                       C lo
+       st8             [r10] = r14, 8
+       xma.l           f36 = f32, f6, f44
+}
+{.mfi
+       ld8             r23 = [r9], 8
+       xma.hu          f40 = f32, f6, f44
+   (p6)        add             r8 = 1, r8
+}
+       br.cloop.dptk   .Loop
+       ;;
+
+.Lend:
+       cmp.ltu         p6, p0 = r27, r8
+       sub             r14 = r27, r8
+       getf.sig        r30 = f42
+       sub             r8 = r20, r31
+       ;;
+       getf.sig        r27 = f39
+       st8             [r10] = r14, 8
+       xma.l           f37 = f33, f6, f45
+       ld8             r20 = [r9], 8
+       xma.hu          f41 = f33, f6, f45
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj8:
+       cmp.ltu         p6, p0 = r24, r8
+       sub             r14 = r24, r8
+       getf.sig        r31 = f43
+       sub             r8 = r21, r28
+       ;;
+       getf.sig        r24 = f36
+       st8             [r10] = r14, 8
+       xma.l           f38 = f34, f6, f46
+       ld8             r21 = [r9], 8
+       xma.hu          f42 = f34, f6, f46
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj7:
+       cmp.ltu         p6, p0 = r25, r8
+       sub             r14 = r25, r8
+       getf.sig        r28 = f40
+       sub             r8 = r22, r29
+       ;;
+       getf.sig        r25 = f37
+       st8             [r10] = r14, 8
+       xma.l           f39 = f35, f6, f47
+       ld8             r22 = [r9], 8
+       xma.hu          f43 = f35, f6, f47
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj6:
+       cmp.ltu         p6, p0 = r26, r8
+       sub             r14 = r26, r8
+       getf.sig        r29 = f41
+       sub             r8 = r23, r30
+       ;;
+       getf.sig        r26 = f38
+       st8             [r10] = r14, 8
+       ld8             r23 = [r9], 8
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj5:
+       cmp.ltu         p6, p0 = r27, r8
+       sub             r14 = r27, r8
+       getf.sig        r30 = f42
+       sub             r8 = r20, r31
+       ;;
+       getf.sig        r27 = f39
+       st8             [r10] = r14, 8
+       ld8             r20 = [r9], 8
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj4:
+       cmp.ltu         p6, p0 = r24, r8
+       sub             r14 = r24, r8
+       getf.sig        r31 = f43
+       sub             r8 = r21, r28
+       ;;
+       st8             [r10] = r14, 8
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj3:
+       cmp.ltu         p6, p0 = r25, r8
+       sub             r14 = r25, r8
+       sub             r8 = r22, r29
+       ;;
+       st8             [r10] = r14, 8
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj2:
+       cmp.ltu         p6, p0 = r26, r8
+       sub             r14 = r26, r8
+       sub             r8 = r23, r30
+       ;;
+       st8             [r10] = r14, 8
+   (p6)        add             r8 = 1, r8
+       ;;
+.Lcj1:
+       cmp.ltu         p6, p0 = r27, r8
+       sub             r14 = r27, r8
+       sub             r8 = r20, r31
+       ;;
+       st8             [r10] = r14, 8
+       mov             ar.lc = r2
+   (p6)        add             r8 = 1, r8
+       br.ret.sptk.many b0
+.Ldone:        mov             ar.lc = r2
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/lisp/gmpasm-mode.el b/mpn/lisp/gmpasm-mode.el

new file mode 100644 (file)

index 0000000..31a9b48
--- /dev/null
+++ b/mpn/lisp/gmpasm-mode.el
@@ -0,0 +1,374 @@
+;;; gmpasm-mode.el -- GNU MP asm and m4 editing mode.
+
+
+;; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of the GNU MP Library.
+;;
+;; The GNU MP Library is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU Lesser General Public License as published by
+;; the Free Software Foundation; either version 3 of the License, or (at your
+;; option) any later version.
+;;
+;; The GNU MP Library is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU Lesser General Public License
+;; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+;;; Commentary:
+;;
+;; gmpasm-mode is a major mode for editing m4 processed assembler code and
+;; m4 macro files in GMP.  It's similar to m4-mode, but has a number of
+;; settings better suited to GMP.
+;;
+;;
+;; Install
+;; -------
+;;
+;; To make M-x gmpasm-mode available, put gmpasm-mode.el somewhere in your
+;; load-path and the following in your .emacs
+;;
+;;     (autoload 'gmpasm-mode "gmpasm-mode" nil t)
+;;
+;; To use gmpasm-mode automatically on all .asm and .m4 files, put the
+;; following in your .emacs
+;;
+;;     (add-to-list 'auto-mode-alist '("\\.asm\\'" . gmpasm-mode))
+;;     (add-to-list 'auto-mode-alist '("\\.m4\\'" . gmpasm-mode))
+;;
+;; To have gmpasm-mode only on gmp files, try instead something like the
+;; following, which uses it only in a directory starting with "gmp", or a
+;; sub-directory of such.
+;;
+;;     (add-to-list 'auto-mode-alist
+;;                  '("/gmp.*/.*\\.\\(asm\\|m4\\)\\'" . gmpasm-mode))
+;;
+;; Byte compiling will slightly speed up loading.  If you want a docstring
+;; in the autoload you can use M-x update-file-autoloads if you set it up
+;; right.
+;;
+;;
+;; Emacsen
+;; -------
+;;
+;; GNU Emacs 20.x, 21.x and XEmacs 20.x all work well.  GNU Emacs 19.x
+;; should work if replacements for the various 20.x-isms are available,
+;; though comment-region with "C" doesn't do the right thing.
+
+
+;;; Code:
+
+(defgroup gmpasm nil
+  "GNU MP m4 and asm editing."
+  :prefix "gmpasm-"
+  :group 'languages)
+
+(defcustom gmpasm-mode-hook nil
+  "*Hook called by `gmpasm-mode'."
+  :type 'hook
+  :group 'gmpasm)
+
+(defcustom gmpasm-comment-start-regexp "\\([#;!@*|C]\\|//\\)"
+  "*Regexp matching possible comment styles.
+See `gmpasm-mode' docstring for how this is used.
+
+Commenting styles within GMP include
+  #   - alpha, i386, i960, vax, traditional unix
+  ;   - a29k, clipper, hppa, m88k, ppc
+  !   - sh, sparc, z8000
+  |   - m68k
+  @   - arm
+  *   - cray
+  C   - GMP m4, see mpn/asm-defs.m4
+  //  - ia64"
+  :type 'regexp
+  :group 'gmpasm)
+
+
+(defun gmpasm-add-to-list-second (list-var element)
+  "(gmpasm-add-to-list-second LIST-VAR ELEMENT)
+
+Add ELEMENT to LIST-VAR as the second element in the list, if it isn't
+already in the list.  If LIST-VAR is nil, then ELEMENT is just added as the
+sole element in the list.
+
+This is like `add-to-list', but it puts the new value second in the list.
+
+The first cons cell is copied rather than changed in-place, so references to
+the list elsewhere won't be affected."
+
+  (if (member element (symbol-value list-var))
+      (symbol-value list-var)
+    (set list-var
+        (if (symbol-value list-var)
+            (cons (car (symbol-value list-var))
+                  (cons element
+                        (cdr (symbol-value list-var))))
+          (list element)))))
+
+
+(defun gmpasm-remove-from-list (list-var element)
+  "(gmpasm-remove-from-list LIST-VAR ELEMENT)
+
+Remove ELEMENT from LIST-VAR, using `copy-sequence' and `delete'.
+This is vaguely like `add-to-list', but the element is removed from the list.
+The list is copied rather than changed in-place, so references to it elsewhere
+aren't affected."
+
+;; Only the portion of the list up to the removed element needs to be
+;; copied, but there's no need to bother arranging that, since this function
+;; is only used for a couple of initializations.
+
+  (set list-var (delete element (copy-sequence (symbol-value list-var)))))
+
+
+(defvar gmpasm-mode-map
+  (let ((map (make-sparse-keymap)))
+
+    ;; assembler and dnl commenting
+    (define-key map "\C-c\C-c" 'comment-region)
+    (define-key map "\C-c\C-d" 'gmpasm-comment-region-dnl)
+
+    ;; kill an M-x compile, since it's not hard to put m4 into an infinite
+    ;; loop
+    (define-key map "\C-c\C-k" 'kill-compilation)
+
+    map)
+  "Keymap for `gmpasm-mode'.")
+
+
+(defvar gmpasm-mode-syntax-table
+  (let ((table (make-syntax-table)))
+    ;; underscore left as a symbol char, like C mode
+
+    ;; m4 quotes
+    (modify-syntax-entry ?`  "('"  table)
+    (modify-syntax-entry ?'  ")`"  table)
+
+    table)
+  "Syntax table used in `gmpasm-mode'.
+
+'#' and '\n' aren't set as comment syntax.  In m4 these are a comment
+outside quotes, but not inside.  Omitting a syntax entry ensures that when
+inside quotes emacs treats parentheses and apostrophes the same way that m4
+does.  When outside quotes this is not quite right, but having it right when
+nesting expressions is more important.
+
+'*', '!' or '|' aren't setup as comment syntax either, on CPUs which use
+these for comments.  The GMP macro setups don't set them in m4 changecom(),
+since that prevents them being used in eval() expressions, and on that basis
+they don't change the way quotes and parentheses are treated by m4 and
+should be treated by emacs.")
+
+
+(defvar gmpasm-font-lock-keywords
+  (eval-when-compile
+    (list
+     (cons
+      (concat
+       "\\b"
+       (regexp-opt
+       '("deflit" "defreg" "defframe" "defframe_pushl"
+         "define_not_for_expansion"
+         "m4_error" "m4_warning"
+         "ASM_START" "ASM_END"
+         "PROLOGUE" "PROLOGUE_GP" "MULFUNC_PROLOGUE" "EPILOGUE"
+         "DATASTART" "DATAEND"
+         "forloop"
+         "TEXT" "DATA" "ALIGN" "W32" "FLOAT64"
+         "builtin" "changecom" "changequote" "changeword" "debugfile"
+         "debugmode" "decr" "define" "defn" "divert" "divnum" "dumpdef"
+         "errprint" "esyscmd" "eval" "__file__" "format" "gnu" "ifdef"
+         "ifelse" "include" "incr" "index" "indir" "len" "__line__"
+         "m4exit" "m4wrap" "maketemp" "patsubst" "popdef" "pushdef"
+         "regexp" "shift" "sinclude" "substr" "syscmd" "sysval"
+         "traceoff" "traceon" "translit" "undefine" "undivert" "unix")
+       t)
+       "\\b") 'font-lock-keyword-face)))
+
+  "`font-lock-keywords' for `gmpasm-mode'.
+
+The keywords are m4 builtins and some of the GMP macros used in asm files.
+L doesn't look good fontified, so it's omitted.
+
+The right assembler comment regexp is added dynamically buffer-local (with
+dnl too).")
+
+
+;; Initialized if gmpasm-mode finds filladapt loaded.
+(defvar gmpasm-filladapt-token-table nil
+  "Filladapt token table used in `gmpasm-mode'.")
+(defvar gmpasm-filladapt-token-match-table nil
+  "Filladapt token match table used in `gmpasm-mode'.")
+(defvar gmpasm-filladapt-token-conversion-table nil
+  "Filladapt token conversion table used in `gmpasm-mode'.")
+
+
+;;;###autoload
+(defun gmpasm-mode ()
+  "A major mode for editing GNU MP asm and m4 files.
+
+\\{gmpasm-mode-map}
+`comment-start' and `comment-end' are set buffer-local to assembler
+commenting appropriate for the CPU by looking for something matching
+`gmpasm-comment-start-regexp' at the start of a line, or \"#\" is used if
+there's no match (if \"#\" isn't what you want, type in a desired comment
+and do \\[gmpasm-mode] to reinitialize).
+
+`adaptive-fill-regexp' is set buffer-local to the standard regexp with
+`comment-start' and dnl added.  If filladapt.el has been loaded it similarly
+gets `comment-start' and dnl added as buffer-local fill prefixes.
+
+Font locking has the m4 builtins, some of the GMP macros, m4 dnl commenting,
+and assembler commenting (based on the `comment-start' determined).
+
+Note that `gmpasm-comment-start-regexp' is only matched as a whole word, so
+the `C' in it is only matched as a whole word, not on something that happens
+to start with `C'.  Also it's only the particular `comment-start' determined
+that's added for filling etc, not the whole `gmpasm-comment-start-regexp'.
+
+`gmpasm-mode-hook' is run after initializations are complete."
+
+  (interactive)
+  (kill-all-local-variables)
+  (setq major-mode 'gmpasm-mode
+        mode-name  "gmpasm")
+  (use-local-map gmpasm-mode-map)
+  (set-syntax-table gmpasm-mode-syntax-table)
+  (setq fill-column 76)
+
+  ;; Short instructions might fit with 32, but anything with labels or
+  ;; expressions soon needs the comments pushed out to column 40.
+  (setq comment-column 40)
+
+  ;; Don't want to find out the hard way which dumb assemblers don't like a
+  ;; missing final newline.
+  (set (make-local-variable 'require-final-newline) t)
+
+  ;; The first match of gmpasm-comment-start-regexp at the start of a line
+  ;; determines comment-start, or "#" if no match.
+  (set (make-local-variable 'comment-start)
+       (save-excursion
+        (goto-char (point-min))
+        (if (re-search-forward
+             (concat "^\\(" gmpasm-comment-start-regexp "\\)\\(\\s-\\|$\\)")
+             nil t)
+            (match-string 1)
+          "#")))
+  (set (make-local-variable 'comment-end) "")
+
+  ;; If comment-start ends in an alphanumeric then \b is used to match it
+  ;; only as a separate word.  The test is for an alphanumeric rather than
+  ;; \w since we might try # or ! as \w characters but without wanting \b on
+  ;; them.
+  (let ((comment-regexp
+        (concat (regexp-quote comment-start)
+                (if (string-match "[a-zA-Z0-9]\\'" comment-start) "\\b"))))
+
+    ;; Whitespace is required before a comment-start so m4 $# doesn't match
+    ;; when comment-start is "#".
+    (set (make-local-variable 'comment-start-skip)
+        (concat "\\(^\\|\\s-\\)\\(\\<dnl\\>\\|" comment-regexp "\\)[ \t]*"))
+
+    ;; Comment fontification based on comment-start, and always with dnl.
+    ;; Same treatment of a space before "#" as in comment-start-skip, but
+    ;; don't fontify that space.
+    (add-to-list (make-local-variable 'gmpasm-font-lock-keywords)
+                (list (concat "\\(^\\|\\s-\\)\\(\\(\\<dnl\\>\\|"
+                              comment-regexp
+                              "\\).*$\\)")
+                      2 'font-lock-comment-face))
+
+    (set (make-local-variable 'font-lock-defaults)
+        '(gmpasm-font-lock-keywords
+          t             ; no syntactic fontification (of strings etc)
+          nil           ; no case-fold
+          ((?_ . "w"))  ; _ part of a word while fontifying
+          ))
+
+    ;; Paragraphs are separated by blank lines, or lines with only dnl or
+    ;; comment-start.
+    (set (make-local-variable 'paragraph-separate)
+        (concat "[ \t\f]*\\(\\(" comment-regexp "\\|dnl\\)[ \t]*\\)*$"))
+    (set (make-local-variable 'paragraph-start)
+        (concat "\f\\|" paragraph-separate))
+
+    ;; Some sort of "def...(" m4 define, possibly with ` for quoting.
+    ;; Could do something with PROLOGUE here, but in GMP the filename is
+    ;; enough, it's not normally necessary to say the function name.
+    (set (make-local-variable 'add-log-current-defun-header-regexp)
+        "^def[a-z0-9_]+(`?\\([a-zA-Z0-9_]+\\)")
+
+    ;; Adaptive fill gets dnl and comment-start as comment style prefixes on
+    ;; top of the standard regexp (which has # and ; already actually).
+    (set (make-local-variable 'adaptive-fill-regexp)
+        (concat "[ \t]*\\(\\("
+                comment-regexp
+                "\\|dnl\\|[-|#;>*]+\\|(?[0-9]+[.)]\\)[ \t]*\\)*"))
+    (set (make-local-variable 'adaptive-fill-first-line-regexp)
+        "\\`\\([ \t]*dnl\\)?[ \t]*\\'")
+
+    (when (fboundp 'filladapt-mode)
+      (unless gmpasm-filladapt-token-table
+       (setq gmpasm-filladapt-token-table
+             filladapt-token-table)
+       (setq gmpasm-filladapt-token-match-table
+             filladapt-token-match-table)
+       (setq gmpasm-filladapt-token-conversion-table
+             filladapt-token-conversion-table)
+
+       ;; Numbered bullet points like "2.1" get matched at the start of a
+       ;; line when it's really something like "2.1 cycles/limb", so remove
+       ;; this from the list.  The regexp for "1.", "2." etc is left
+       ;; though.
+       (gmpasm-remove-from-list 'gmpasm-filladapt-token-table
+                                '("[0-9]+\\(\\.[0-9]+\\)+[ \t]"
+                                  bullet))
+
+       ;; "%" as a comment prefix interferes with register names on some
+       ;; CPUs, like %eax on x86, so remove this.
+       (gmpasm-remove-from-list 'gmpasm-filladapt-token-table
+                                '("%+" postscript-comment))
+
+       (add-to-list 'gmpasm-filladapt-token-match-table
+                    '(gmpasm-comment gmpasm-comment))
+       (add-to-list 'gmpasm-filladapt-token-conversion-table
+                    '(gmpasm-comment . exact)))
+
+      (set (make-local-variable 'filladapt-token-table)
+          gmpasm-filladapt-token-table)
+      (set (make-local-variable 'filladapt-token-match-table)
+          gmpasm-filladapt-token-match-table)
+      (set (make-local-variable 'filladapt-token-conversion-table)
+          gmpasm-filladapt-token-conversion-table)
+
+      ;; Add dnl and comment-start as fill prefixes.
+      ;; Comments in filladapt.el say filladapt-token-table must begin
+      ;; with ("^" beginning-of-line), so put our addition second.
+      (gmpasm-add-to-list-second 'filladapt-token-table
+                                (list (concat "dnl[ \t]\\|" comment-regexp)
+                                      'gmpasm-comment))))
+
+  (run-hooks 'gmpasm-mode-hook))
+
+
+(defun gmpasm-comment-region-dnl (beg end &optional arg)
+  "(gmpasm-comment-region-dnl BEG END &optional ARG)
+
+Comment or uncomment each line in the region using `dnl'.
+With \\[universal-argument] prefix arg, uncomment each line in region.
+This is `comment-region', but using \"dnl\"."
+
+  (interactive "r\nP")
+  (let ((comment-start "dnl")
+       (comment-end ""))
+    (comment-region beg end arg)))
+
+
+(provide 'gmpasm-mode)
+
+;;; gmpasm-mode.el ends here
diff --git a/mpn/m4-ccas b/mpn/m4-ccas

new file mode 100755 (executable)

index 0000000..984e8e9
--- /dev/null
+++ b/mpn/m4-ccas
@@ -0,0 +1,96 @@
+#!/bin/sh
+#
+# A helper script for Makeasm.am .asm.lo rule.
+
+# Copyright 2001 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: m4-ccas --m4=M4 CC ... file.asm ...
+#
+# Process file.asm with the given M4 plus any -D arguments, then
+# assemble with the given CC plus all arguments.
+#
+# The M4 command must be in a single --m4= argument, and will be split
+# on whitespace.  When CC is invoked file.asm is replaced with a
+# temporary .s file which is the M4 output.
+#
+# To allow parallel builds, the temp file name is based on the .asm
+# file name, which will be the output object filename for all uses we
+# put this script to.
+
+M4=
+CC=
+DEFS=
+ASM=
+SEEN_O=no
+
+for i in "$@"; do
+  case $i in
+    --m4=*)
+      M4=`echo "$i" | sed 's/^--m4=//'`
+      ;;
+    -D*)
+      DEFS="$DEFS $i"
+      CC="$CC $i"
+      ;;
+    *.asm)
+      if test -n "$ASM"; then
+        echo "Only one .asm file permitted"
+        exit 1
+      fi
+      BASENAME=`echo "$i" | sed -e 's/\.asm$//' -e 's/^.*[\\/:]//'`
+      TMP=tmp-$BASENAME.s
+      ASM=$i
+      CC="$CC $TMP"
+      ;;
+    -o)
+      SEEN_O=yes
+      CC="$CC $i"
+      ;;
+    *)
+      CC="$CC $i"
+      ;;
+  esac
+done
+
+if test -z "$M4"; then
+  echo "No --m4 specified"
+  exit 1
+fi
+
+if test -z "$ASM"; then
+  echo "No .asm specified"
+  exit 1
+fi
+
+# Libtool adds it's own -o when sending output to .libs/foo.o, but not
+# when just wanting foo.o in the current directory.  We need an
+# explicit -o in both cases since we're assembling tmp-foo.s.
+#
+if test $SEEN_O = no; then
+  CC="$CC -o $BASENAME.o"
+fi
+
+echo "$M4 $DEFS $ASM >$TMP"
+$M4 $DEFS $ASM >$TMP || exit
+
+echo "$CC"
+$CC || exit
+
+# Comment this out to preserve .s intermediates
+rm -f $TMP
diff --git a/mpn/m68k/README b/mpn/m68k/README

new file mode 100644 (file)

index 0000000..8838f8d
--- /dev/null
+++ b/mpn/m68k/README
@@ -0,0 +1,127 @@
+Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                      M68K MPN SUBROUTINES
+
+
+This directory contains mpn functions for various m68k family chips.
+
+
+CODE ORGANIZATION
+
+       m68k             m68000, m68010, m68060
+       m68k/mc68020     m68020, m68030, m68040, and CPU32
+
+
+The m5200 "coldfire", which is m68000 less a few instructions, currently has
+no assembler code support.
+
+
+STATUS
+
+The code herein is old and poorly maintained.  If somebody really cared, it
+could be optimized substantially.  For example,
+
+* mpn_add_n and mpn_sub_n could, with more unrolling be improved from 6 to
+  close to 4 c/l (on m68040).
+
+* The multiplication loops could be sped up by using the FPU.
+
+* mpn_lshift by 31 should use the special-case mpn_rshift by 1 code, and
+  vice versa mpn_rshift by 31 use the special lshift by 1, when operand
+  overlap permits.
+
+* On 68000, mpn_mul_1, mpn_addmul_1 and mpn_submul_1 could check for a
+  16-bit multiplier and use two multiplies per limb, not four.
+
+  Similarly various other _1 operations like mpn_mod_1, mpn_divrem_1,
+  mpn_divexact_1, mpn_modexact_1c_odd.
+
+* On 68000, mpn_lshift and mpn_rshift could use a roll and mask instead of
+  lsrl and lsll.  This promises to be a speedup, effectively trading a 6+2*n
+  shift for one or two 4 cycle masks.  Suggested by Jean-Charles Meyrignac.
+
+* config.guess detects 68000, 68010, CPU32 and 68020 by running some code,
+  but relies on system information for 030, 040 and 060.  Can they be
+  identified by running some code?  Currently this only makes a difference
+  to the compiler options selected, since we have no specific asm code for
+  those chips.
+
+One novel idea for 68000 would be to use a 16-bit limb instead of 32-bits.
+This would suit the native 16x16 multiply, but might make it difficult to
+get full value from the native 32x32 add/sub/etc.  This would be an ABI
+option, and would select "__GMP_SHORT_LIMB" in gmp.h.
+
+Naturally an entirely new set of asm subroutines would be needed for a
+16-bit limb.  Also there's various places in the C code assuming limb>=long,
+which would need to be updated, eg. mpz_set_ui.  Some of the nails changes
+may have helped cover some of this.
+
+
+ASM FILES
+
+The .asm files are put through m4 for macro processing, and with the help of
+configure give either MIT or Motorola syntax.  The generic mpn/asm-defs.m4
+is used, together with mpn/m68k/m68k-defs.m4.  See comments in those files.
+
+Not all possible syntax variations are covered.  GCC config/m68k for
+instance has things like $ for immediates on CRDS or reversed cmp order for
+AT&T SGS.  These could probably be handled if anyone really needs it.
+
+
+CALLING CONVENTIONS
+
+The SVR4 standard has an int of 32 bits, and all parameters 32-bit aligned
+on the stack.
+
+PalmOS and perhaps various embedded systems intended for 68000 however use
+an int of 16 bits and parameters only 16-bit aligned on the stack.  This is
+generated by "gcc -mshort" (and is the default for the PalmOS gcc port, we
+believe).
+
+The asm files adapt to these two ABIs by checking sizeof(unsigned), coming
+through config.m4 as SIZEOF_UNSIGNED.  Only mpn_lshift and mpn_rshift are
+affected, all other routines take longs and pointers, which are 32-bits in
+both cases.
+
+Strictly speaking the size of an int doesn't determine the stack padding
+convention.  But if int is 16 bits then we can definitely say the host
+system is not SVR4, and therefore may as well assume we're in 16-bit stack
+alignment.
+
+
+REFERENCES
+
+"Motorola M68000 Family Programmer's Reference Manual", available online,
+
+       http://e-www.motorola.com/brdata/PDFDB/docs/M68000PM.pdf
+
+"System V Application Binary Interface: Motorola 68000 Processor Family
+Supplement", AT&T, 1990, ISBN 0-13-877553-6.  Has details of calling
+conventions and ELF style PIC coding.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/m68k/aors_n.asm b/mpn/m68k/aors_n.asm

new file mode 100644 (file)

index 0000000..da9bb41
--- /dev/null
+++ b/mpn/m68k/aors_n.asm
@@ -0,0 +1,89 @@
+dnl  mc68020 mpn_add_n, mpn_sub_n -- add or subtract limb vectors
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002, 2003, 2005 Free
+dnl  Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C 68040:      6
+
+ifdef(`OPERATION_add_n',`
+  define(M4_inst,       addxl)
+  define(M4_function_n, mpn_add_n)
+',`ifdef(`OPERATION_sub_n',`
+  define(M4_inst,       subxl)
+  define(M4_function_n, mpn_sub_n)
+',
+`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+
+C INPUT PARAMETERS
+C res_ptr      (sp + 4)
+C s1_ptr       (sp + 8)
+C s2_ptr       (sp + 12)
+C size         (sp + 16)
+
+
+PROLOGUE(M4_function_n)
+
+C Save used registers on the stack.
+       movel   d2, M(-,sp)
+       movel   a2, M(-,sp)
+
+C Copy the arguments to registers.  Better use movem?
+       movel   M(sp,12), a2
+       movel   M(sp,16), a0
+       movel   M(sp,20), a1
+       movel   M(sp,24), d2
+
+       eorw    #1, d2
+       lsrl    #1, d2
+       bcc     L(L1)
+       subql   #1, d2  C clears cy as side effect
+
+L(Loop):
+       movel   M(a0,+), d0
+       movel   M(a1,+), d1
+       M4_inst d1, d0
+       movel   d0, M(a2,+)
+L(L1): movel   M(a0,+), d0
+       movel   M(a1,+), d1
+       M4_inst d1, d0
+       movel   d0, M(a2,+)
+
+       dbf     d2, L(Loop)             C loop until 16 lsb of %4 == -1
+       subxl   d0, d0                  C d0 <= -cy; save cy as 0 or -1 in d0
+       subl    #0x10000, d2
+       bcs     L(L2)
+       addl    d0, d0                  C restore cy
+       bra     L(Loop)
+
+L(L2):
+       negl    d0
+
+C Restore used registers from stack frame.
+       movel   M(sp,+), a2
+       movel   M(sp,+), d2
+
+       rts
+
+EPILOGUE(M4_function_n)
diff --git a/mpn/m68k/gmp-mparam.h b/mpn/m68k/gmp-mparam.h

new file mode 100644 (file)

index 0000000..21b817e
--- /dev/null
+++ b/mpn/m68k/gmp-mparam.h
@@ -0,0 +1,65 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* 25MHz 68040 */
+
+/* Generated by tuneup.c, 2004-02-05, gcc 3.2 */
+
+#define MUL_TOOM22_THRESHOLD             14
+#define MUL_TOOM33_THRESHOLD             90
+
+#define SQR_BASECASE_THRESHOLD            5
+#define SQR_TOOM2_THRESHOLD              28
+#define SQR_TOOM3_THRESHOLD              98
+
+#define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
+#define DIV_DC_THRESHOLD                 55
+#define POWM_THRESHOLD                   65
+
+#define HGCD_THRESHOLD                  116
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                590
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD       MP_SIZE_T_MAX  /* never */
+#define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* never */
+#define MOD_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define MOD_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  0
+#define DIVREM_2_THRESHOLD            MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define MODEXACT_1_ODD_THRESHOLD      MP_SIZE_T_MAX  /* never */
+
+#define GET_STR_DC_THRESHOLD             18
+#define GET_STR_PRECOMPUTE_THRESHOLD     43
+#define SET_STR_THRESHOLD               937
+
+#define MUL_FFT_TABLE  { 336, 672, 1408, 3584, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD          296
+#define MUL_FFT_THRESHOLD              1728
+
+#define SQR_FFT_TABLE  { 336, 736, 1408, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          296
+#define SQR_FFT_THRESHOLD              2304
diff --git a/mpn/m68k/lshift.asm b/mpn/m68k/lshift.asm

new file mode 100644 (file)

index 0000000..9d7a5ed
--- /dev/null
+++ b/mpn/m68k/lshift.asm
@@ -0,0 +1,165 @@
+dnl  mc68020 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1996, 1999, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C        shift==1  shift>1
+C 68040:    5         12
+
+
+C mp_limb_t mpn_lshift (mp_ptr res_ptr, mp_srcptr s_ptr, mp_size_t s_size,
+C                       unsigned cnt);
+C
+C The "cnt" parameter is either 16 bits or 32 bits depending on
+C SIZEOF_UNSIGNED (see ABI notes in mpn/m68k/README).  The value is of
+C course only 1 to 31.  When loaded as 16 bits there's garbage in the upper
+C half, hence the use of cmpw.  The shift instructions take the their count
+C modulo 64, so the upper part doesn't matter to them either.
+C
+
+C INPUT PARAMETERS
+C res_ptr      (sp + 4)
+C s_ptr                (sp + 8)
+C s_size       (sp + 12)
+C cnt          (sp + 16)
+
+define(res_ptr, `a1')
+define(s_ptr,   `a0')
+define(s_size,  `d6')
+define(cnt,     `d4')
+
+ifdef(`SIZEOF_UNSIGNED',,
+`m4_error(`SIZEOF_UNSIGNED not defined, should be in config.m4
+')')
+
+PROLOGUE(mpn_lshift)
+C Save used registers on the stack.
+       moveml  d2-d6/a2, M(-,sp)
+
+C Copy the arguments to registers.
+       movel   M(sp,28), res_ptr
+       movel   M(sp,32), s_ptr
+       movel   M(sp,36), s_size
+ifelse(SIZEOF_UNSIGNED,2,
+`      movew   M(sp,40), cnt',
+`      movel   M(sp,40), cnt')
+
+       moveql  #1, d5
+       cmpw    d5, cnt
+       bne     L(Lnormal)
+       cmpl    s_ptr, res_ptr
+       bls     L(Lspecial)             C jump if s_ptr >= res_ptr
+
+ifelse(scale_available_p,1,`
+       lea     M(s_ptr,s_size,l,4), a2
+',`
+       movel   s_size, d0
+       asll    #2, d0
+       lea     M(s_ptr,d0,l), a2
+')
+       cmpl    res_ptr, a2
+       bls     L(Lspecial)             C jump if res_ptr >= s_ptr + s_size
+
+L(Lnormal):
+       moveql  #32, d5
+       subl    cnt, d5
+
+ifelse(scale_available_p,1,`
+       lea     M(s_ptr,s_size,l,4), s_ptr
+       lea     M(res_ptr,s_size,l,4), res_ptr
+',`
+       movel   s_size, d0
+       asll    #2, d0
+       addl    d0, s_ptr
+       addl    d0, res_ptr
+')
+       movel   M(-,s_ptr), d2
+       movel   d2, d0
+       lsrl    d5, d0          C compute carry limb
+
+       lsll    cnt, d2
+       movel   d2, d1
+       subql   #1, s_size
+       beq     L(Lend)
+       lsrl    #1, s_size
+       bcs     L(L1)
+       subql   #1, s_size
+
+L(Loop:)
+       movel   M(-,s_ptr), d2
+       movel   d2, d3
+       lsrl    d5, d3
+       orl     d3, d1
+       movel   d1, M(-,res_ptr)
+       lsll    cnt, d2
+L(L1:)
+       movel   M(-,s_ptr), d1
+       movel   d1, d3
+       lsrl    d5, d3
+       orl     d3, d2
+       movel   d2, M(-,res_ptr)
+       lsll    cnt, d1
+
+       dbf     s_size, L(Loop)
+       subl    #0x10000, s_size
+       bcc     L(Loop)
+
+L(Lend:)
+       movel   d1, M(-,res_ptr)        C store least significant limb
+
+C Restore used registers from stack frame.
+       moveml  M(sp,+), d2-d6/a2
+       rts
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(Lspecial):
+       clrl    d0                      C initialize carry
+       eorw    #1, s_size
+       lsrl    #1, s_size
+       bcc     L(LL1)
+       subql   #1, s_size
+
+L(LLoop):
+       movel   M(s_ptr,+), d2
+       addxl   d2, d2
+       movel   d2, M(res_ptr,+)
+L(LL1):
+       movel   M(s_ptr,+), d2
+       addxl   d2, d2
+       movel   d2, M(res_ptr,+)
+
+       dbf     s_size, L(LLoop)
+       addxl   d0, d0          C save cy in lsb
+       subl    #0x10000, s_size
+       bcs     L(LLend)
+       lsrl    #1, d0          C restore cy
+       bra     L(LLoop)
+
+L(LLend):
+C Restore used registers from stack frame.
+       moveml  M(sp,+), d2-d6/a2
+       rts
+
+EPILOGUE(mpn_lshift)
diff --git a/mpn/m68k/m68k-defs.m4 b/mpn/m68k/m68k-defs.m4

new file mode 100644 (file)

index 0000000..17a3459
--- /dev/null
+++ b/mpn/m68k/m68k-defs.m4
@@ -0,0 +1,219 @@
+divert(-1)
+
+dnl  m4 macros for 68k assembler.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  The default m4 `#' commenting interferes with the assembler syntax for
+dnl  immediates.  `|' would be correct, but it interferes with "||" in
+dnl  eval().  Would like to disable commenting, but that's not possible (see
+dnl  mpn/asm-defs.m4), so use `;' which should be harmless.
+
+changecom(;)
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  Same as the standard PROLOGUE, but align to 2 bytes not 4.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      TEXT
+       ALIGN(2)
+       GLOBL   `$1' GLOBL_ATTR
+       TYPE(`$1',`function')
+`$1'LABEL_SUFFIX')
+
+
+dnl  Usage: d0, etc
+dnl
+dnl  Expand to d0 or %d0 according to the assembler's requirements.
+dnl
+dnl  Actually d0 expands to `d0' or %`d0', the quotes protecting against
+dnl  further expansion.  Definitions are made even if d0 is to be just `d0',
+dnl  so that any m4 quoting problems will show up everywhere, not just on a
+dnl  %d0 system.
+dnl
+dnl  Care must be taken with quoting when using these in a definition.  For
+dnl  instance the quotes in the following are essential or two %'s will be
+dnl  produced when `counter' is used.
+dnl
+dnl         define(counter, `d7')
+dnl
+
+dnl  Called: m68k_reg(r)
+define(m68k_reg,
+m4_assert_numargs(1)
+m4_assert_defined(`WANT_REGISTER_PERCENT')
+`ifelse(WANT_REGISTER_PERCENT,yes,%)`$1'')
+
+dnl  Usage: m68k_defreg(r)
+define(m68k_defreg,
+m4_assert_numargs(1)
+`deflit($1,`m68k_reg(`$1')')')
+
+m68k_defreg(d0)
+m68k_defreg(d1)
+m68k_defreg(d2)
+m68k_defreg(d3)
+m68k_defreg(d4)
+m68k_defreg(d5)
+m68k_defreg(d6)
+m68k_defreg(d7)
+
+m68k_defreg(a0)
+m68k_defreg(a1)
+m68k_defreg(a2)
+m68k_defreg(a3)
+m68k_defreg(a4)
+m68k_defreg(a5)
+m68k_defreg(a6)
+m68k_defreg(a7)
+
+m68k_defreg(sp)
+m68k_defreg(pc)
+
+
+dnl  Usage: M(base)
+dnl         M(base,displacement)
+dnl         M(base,index,size)
+dnl         M(base,index,size,scale)
+dnl         M(base,+)
+dnl         M(-,base)
+dnl
+dnl  `base' is an address register, `index' is a data register, `size' is w
+dnl  or l, and scale is 1, 2, 4 or 8.
+dnl
+dnl  M(-,base) has it's arguments that way around to emphasise it's a
+dnl  pre-decrement, as opposed to M(base,+) a post-increment.
+dnl
+dnl  Enhancement: Add the memory indirect modes, if/when they're needed.
+
+define(M,
+m4_assert_numargs_range(1,4)
+m4_assert_defined(`WANT_ADDRESSING')
+`ifelse(WANT_ADDRESSING,mit,
+`ifelse($#,1, ``$1'@')dnl
+ifelse($#,2,
+`ifelse($2,+, ``$1'@+',
+`ifelse($1,-, ``$2'@-',
+              ``$1'@($2)')')')dnl
+ifelse($#,3,  ``$1'@(`$2':`$3')')dnl
+ifelse($#,4,  ``$1'@(`$2':`$3':$4)')',
+
+dnl  WANT_ADDRESSING `motorola'
+`ifelse($#,1, `(`$1')')dnl
+ifelse($#,2,
+`ifelse($2,+, `(`$1')+',
+`ifelse($1,-, `-(`$2')',
+              `$2(`$1')')')')dnl
+ifelse($#,3,  `(`$1',`$2'.$3)')dnl
+ifelse($#,4,  `(`$1',`$2'.$3*$4)')')')
+
+
+dnl  Usage: addl etc
+dnl
+dnl  m68k instructions with special handling for the suffix, with for
+dnl  instance addl expanding to addl or add.l as necessary.
+dnl
+dnl  See also t-m68k-defs.pl which verifies all mnemonics used in the asm
+dnl  files have entries here.
+
+dnl  Called: m68k_insn(mnemonic,suffix)
+define(m68k_insn,
+m4_assert_numargs(2)
+m4_assert_defined(`WANT_DOT_SIZE')
+`ifelse(WANT_DOT_SIZE,yes, ``$1'.``$2''',
+                           ``$1$2'')')
+
+dnl  Usage: m68k_definsn(mnemonic,suffix)
+define(m68k_definsn,
+m4_assert_numargs(2)
+`deflit($1`'$2,`m68k_insn(`$1',`$2')')')
+
+m68k_definsn(add,  l)
+m68k_definsn(addx, l)
+m68k_definsn(addq, l)
+m68k_definsn(asl,  l)
+m68k_definsn(cmp,  l)
+m68k_definsn(cmp,  w)
+m68k_definsn(clr,  l)
+m68k_definsn(divu, l)
+m68k_definsn(eor,  w)
+m68k_definsn(lsl,  l)
+m68k_definsn(lsr,  l)
+m68k_definsn(move, l)
+m68k_definsn(move, w)
+m68k_definsn(movem,l)
+m68k_definsn(moveq,l)
+m68k_definsn(mulu, l)
+m68k_definsn(neg,  l)
+m68k_definsn(or,   l)
+m68k_definsn(roxl, l)
+m68k_definsn(roxr, l)
+m68k_definsn(sub,  l)
+m68k_definsn(subx, l)
+m68k_definsn(subq, l)
+
+
+dnl  Usage: bra etc
+dnl
+dnl  Expand to `bra', `jra' or `jbra' according to what the assembler will
+dnl  accept.  The latter two give variable-sized branches in gas.
+dnl
+dnl  See also t-m68k-defs.pl which verifies all the bXX branches used in the
+dnl  asm files have entries here.
+
+dnl  Called: m68k_branch(cond)
+define(m68k_branch,
+m4_assert_numargs(1)
+m4_assert_defined(`WANT_BRANCHES')
+`ifelse(WANT_BRANCHES,jra, `j$1',
+`ifelse(WANT_BRANCHES,jbra,`jb$1',
+                           ``b$1'')')')
+
+dnl  Called: m68k_defbranch(cond)
+define(m68k_defbranch,
+m4_assert_numargs(1)
+`deflit(b$1,`m68k_branch(`$1')')')
+
+m68k_defbranch(ra)
+m68k_defbranch(cc)
+m68k_defbranch(cs)
+m68k_defbranch(ls)
+m68k_defbranch(eq)
+m68k_defbranch(ne)
+
+
+dnl  Usage: scale_available_p
+dnl
+dnl  Expand to 1 if a scale factor can be used in addressing modes, or 0 if
+dnl  not.  M(a0,d0,l,4), meaning a0+d0*4, is not available in 68000 or
+dnl  68010, but is in CPU32 and in 68020 and up.
+
+define(scale_available_p,
+`m4_ifdef_anyof_p(
+`HAVE_HOST_CPU_m68360'
+`HAVE_HOST_CPU_m68020'
+`HAVE_HOST_CPU_m68030'
+`HAVE_HOST_CPU_m68040'
+`HAVE_HOST_CPU_m68060')')
+
+
+divert
diff --git a/mpn/m68k/mc68020/aorsmul_1.asm b/mpn/m68k/mc68020/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..1786660
--- /dev/null
+++ b/mpn/m68k/mc68020/aorsmul_1.asm
@@ -0,0 +1,90 @@
+dnl  mc68020 mpn_addmul_1, mpn_submul_1 -- add or subtract mpn multiple.
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C 68040:     25
+
+ifdef(`OPERATION_addmul_1',`
+  define(M4_inst,       addl)
+  define(M4_function_1, mpn_addmul_1)
+',`ifdef(`OPERATION_submul_1',`
+  define(M4_inst,       subl)
+  define(M4_function_1, mpn_submul_1)
+',
+`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+
+C INPUT PARAMETERS
+C res_ptr      (sp + 4)
+C s1_ptr       (sp + 8)
+C s1_size      (sp + 12)
+C s2_limb      (sp + 16)
+
+define(res_ptr, `a0')
+define(s1_ptr,  `a1')
+define(s1_size, `d2')
+define(s2_limb, `d4')
+
+
+PROLOGUE(M4_function_1)
+
+C Save used registers on the stack.
+       moveml  d2-d5, M(-,sp)
+
+C Copy the arguments to registers.  Better use movem?
+       movel   M(sp,20), res_ptr
+       movel   M(sp,24), s1_ptr
+       movel   M(sp,28), s1_size
+       movel   M(sp,32), s2_limb
+
+       eorw    #1, s1_size
+       clrl    d1
+       clrl    d5
+       lsrl    #1, s1_size
+       bcc     L(L1)
+       subql   #1, s1_size
+       subl    d0, d0          C (d0,cy) <= (0,0)
+
+L(Loop):
+       movel   M(s1_ptr,+), d3
+       mulul   s2_limb, d1:d3
+       addxl   d0, d3
+       addxl   d5, d1
+       M4_inst d3, M(res_ptr,+)
+L(L1): movel   M(s1_ptr,+), d3
+       mulul   s2_limb, d0:d3
+       addxl   d1, d3
+       addxl   d5, d0
+       M4_inst d3, M(res_ptr,+)
+
+       dbf     s1_size, L(Loop)
+       addxl   d5, d0
+       subl    #0x10000, s1_size
+       bcc     L(Loop)
+
+C Restore used registers from stack frame.
+       moveml  M(sp,+), d2-d5
+
+       rts
+
+EPILOGUE(M4_function_1)
diff --git a/mpn/m68k/mc68020/mul_1.asm b/mpn/m68k/mc68020/mul_1.asm

new file mode 100644 (file)

index 0000000..d24f6d1
--- /dev/null
+++ b/mpn/m68k/mc68020/mul_1.asm
@@ -0,0 +1,86 @@
+dnl  mc68020 mpn_mul_1 -- mpn by limb multiply
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C 68040:     24
+
+C INPUT PARAMETERS
+C res_ptr      (sp + 4)
+C s1_ptr       (sp + 8)
+C s1_size      (sp + 12)
+C s2_limb      (sp + 16)
+
+
+define(res_ptr, `a0')
+define(s1_ptr,  `a1')
+define(s1_size, `d2')
+define(s2_limb, `d4')
+
+
+PROLOGUE(mpn_mul_1)
+
+C Save used registers on the stack.
+       moveml  d2-d4, M(-,sp)
+
+C      movel   d2, M(-,sp)
+C      movel   d3, M(-,sp)
+C      movel   d4, M(-,sp)
+
+C Copy the arguments to registers.  Better use movem?
+       movel   M(sp,16), res_ptr
+       movel   M(sp,20), s1_ptr
+       movel   M(sp,24), s1_size
+       movel   M(sp,28), s2_limb
+
+       eorw    #1, s1_size
+       clrl    d1
+       lsrl    #1, s1_size
+       bcc     L(L1)
+       subql   #1, s1_size
+       subl    d0, d0          C (d0,cy) <= (0,0)
+
+L(Loop):
+       movel   M(s1_ptr,+), d3
+       mulul   s2_limb, d1:d3
+       addxl   d0, d3
+       movel   d3, M(res_ptr,+)
+L(L1): movel   M(s1_ptr,+), d3
+       mulul   s2_limb, d0:d3
+       addxl   d1, d3
+       movel   d3, M(res_ptr,+)
+
+       dbf     s1_size, L(Loop)
+       clrl    d3
+       addxl   d3, d0
+       subl    #0x10000, s1_size
+       bcc     L(Loop)
+
+C Restore used registers from stack frame.
+       moveml  M(sp,+), d2-d4
+
+C      movel   M(sp,+),d4
+C      movel   M(sp,+),d3
+C      movel   M(sp,+),d2
+
+       rts
+
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/m68k/mc68020/udiv.asm b/mpn/m68k/mc68020/udiv.asm

new file mode 100644 (file)

index 0000000..ebc1ef2
--- /dev/null
+++ b/mpn/m68k/mc68020/udiv.asm
@@ -0,0 +1,34 @@
+dnl  mc68020 mpn_udiv_qrnnd -- 2x1 limb division
+
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *rp,
+C                           mp_limb_t nh, mp_limb_t nl, mp_limb_t d);
+C
+
+PROLOGUE(mpn_udiv_qrnnd)
+       movel   M(sp,4), a0     C rp
+       movel   M(sp,8), d1     C nh
+       movel   M(sp,12), d0    C nl
+       divul   M(sp,16), d1:d0
+       movel   d1, M(a0)       C r
+       rts
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/m68k/mc68020/umul.asm b/mpn/m68k/mc68020/umul.asm

new file mode 100644 (file)

index 0000000..4d6e8a8
--- /dev/null
+++ b/mpn/m68k/mc68020/umul.asm
@@ -0,0 +1,33 @@
+dnl  mc68020 mpn_umul_ppmm -- limb by limb multiplication
+
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lp, mp_limb_t x, mp_limb_t y);
+C
+
+PROLOGUE(mpn_umul_ppmm)
+       movel   M(sp,4), a0     C lp
+       movel   M(sp,8), d1     C x
+       movel   M(sp,12), d0    C y
+       mulul   d0, d0:d1
+       movel   d1, M(a0)       C low
+       rts
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/m68k/rshift.asm b/mpn/m68k/rshift.asm

new file mode 100644 (file)

index 0000000..1bf58ac
--- /dev/null
+++ b/mpn/m68k/rshift.asm
@@ -0,0 +1,165 @@
+dnl  mc68020 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1996, 1999, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C        shift==1  shift>1
+C 68040:    9         12
+
+
+C mp_limb_t mpn_rshift (mp_ptr res_ptr, mp_srcptr s_ptr, mp_size_t s_size,
+C                       unsigned cnt);
+C
+C The "cnt" parameter is either 16 bits or 32 bits depending on
+C SIZEOF_UNSIGNED (see ABI notes in mpn/m68k/README).  The value is of
+C course only 1 to 31.  When loaded as 16 bits there's garbage in the upper
+C half, hence the use of cmpw.  The shift instructions take the their count
+C modulo 64, so the upper part doesn't matter to them either.
+C
+
+C INPUT PARAMETERS
+C res_ptr      (sp + 4)
+C s_ptr                (sp + 8)
+C s_size       (sp + 12)
+C cnt          (sp + 16)
+
+define(res_ptr, `a1')
+define(s_ptr,   `a0')
+define(s_size,  `d6')
+define(cnt,     `d4')
+
+ifdef(`SIZEOF_UNSIGNED',,
+`m4_error(`SIZEOF_UNSIGNED not defined, should be in config.m4
+')')
+
+PROLOGUE(mpn_rshift)
+C Save used registers on the stack.
+       moveml  d2-d6/a2, M(-,sp)
+
+C Copy the arguments to registers.
+       movel   M(sp,28), res_ptr
+       movel   M(sp,32), s_ptr
+       movel   M(sp,36), s_size
+ifelse(SIZEOF_UNSIGNED,2,
+`      movew   M(sp,40), cnt',
+`      movel   M(sp,40), cnt')
+
+       moveql  #1, d5
+       cmpw    d5, cnt
+       bne     L(Lnormal)
+       cmpl    res_ptr, s_ptr
+       bls     L(Lspecial)             C jump if res_ptr >= s_ptr
+
+ifelse(scale_available_p,1,`
+       lea     M(res_ptr,s_size,l,4), a2
+',`
+       movel   s_size, d0
+       asll    #2, d0
+       lea     M(res_ptr,d0,l), a2
+')
+       cmpl    s_ptr, a2
+       bls     L(Lspecial)             C jump if s_ptr >= res_ptr + s_size
+
+L(Lnormal:)
+       moveql  #32, d5
+       subl    cnt, d5
+       movel   M(s_ptr,+), d2
+       movel   d2, d0
+       lsll    d5, d0          C compute carry limb
+
+       lsrl    cnt, d2
+       movel   d2, d1
+       subql   #1, s_size
+       beq     L(Lend)
+       lsrl    #1, s_size
+       bcs     L(L1)
+       subql   #1, s_size
+
+L(Loop:)
+       movel   M(s_ptr,+), d2
+       movel   d2, d3
+       lsll    d5, d3
+       orl     d3, d1
+       movel   d1, M(res_ptr,+)
+       lsrl    cnt, d2
+L(L1:)
+       movel   M(s_ptr,+), d1
+       movel   d1, d3
+       lsll    d5, d3
+       orl     d3, d2
+       movel   d2, M(res_ptr,+)
+       lsrl    cnt, d1
+
+       dbf     s_size, L(Loop)
+       subl    #0x10000, s_size
+       bcc     L(Loop)
+
+L(Lend:)
+       movel   d1, M(res_ptr)  C store most significant limb
+
+C Restore used registers from stack frame.
+       moveml  M(sp,+), d2-d6/a2
+       rts
+
+C We loop from most significant end of the arrays, which is only permissable
+C if the source and destination don't overlap, since the function is
+C documented to work for overlapping source and destination.
+
+L(Lspecial:)
+ifelse(scale_available_p,1,`
+       lea     M(s_ptr,s_size,l,4), s_ptr
+       lea     M(res_ptr,s_size,l,4), res_ptr
+',`
+       movel   s_size, d0
+       asll    #2, d0
+       addl    d0, s_ptr
+       addl    d0, res_ptr
+')
+
+       clrl    d0                      C initialize carry
+       eorw    #1, s_size
+       lsrl    #1, s_size
+       bcc     L(LL1)
+       subql   #1, s_size
+
+L(LLoop:)
+       movel   M(-,s_ptr), d2
+       roxrl   #1, d2
+       movel   d2, M(-,res_ptr)
+L(LL1:)
+       movel   M(-,s_ptr), d2
+       roxrl   #1, d2
+       movel   d2, M(-,res_ptr)
+
+       dbf     s_size, L(LLoop)
+       roxrl   #1, d0          C save cy in msb
+       subl    #0x10000, s_size
+       bcs     L(LLend)
+       addl    d0, d0          C restore cy
+       bra     L(LLoop)
+
+L(LLend:)
+C Restore used registers from stack frame.
+       moveml  M(sp,+), d2-d6/a2
+       rts
+
+EPILOGUE(mpn_rshift)
diff --git a/mpn/m68k/t-m68k-defs.pl b/mpn/m68k/t-m68k-defs.pl

new file mode 100644 (file)

index 0000000..226afc5
--- /dev/null
+++ b/mpn/m68k/t-m68k-defs.pl
@@ -0,0 +1,80 @@
+#! /usr/bin/perl -w
+
+# Copyright 2001, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage:  perl t-m68k-defs.pl [-t]
+#
+# Run this in the mpn/m68k source directory to check that m68k-defs.m4 has
+# m68k_defbranch()s or m68k_definsn()s for each instruction used in *.asm
+# and */*.asm.  Print nothing if everything is ok.  The -t option prints
+# some diagnostic traces.
+
+use strict;
+use Getopt::Std;
+
+my %opt;
+getopts('t', \%opt);
+
+my %branch;
+my %insn;
+
+open(FD, "<m68k-defs.m4")
+    or die "Cannot open m68k-defs.m4: $!\nIs this the mpn/m68k source directory?\n";
+my ($srcdir, $top_srcdir);
+while (<FD>) {
+    if (/^m68k_defbranch\(\s*(.*)\)/) { $branch{"b".$1} = 1; }
+    if (/^m68k_definsn\(\s*(.*),\s*(.*)\)/) { $insn{$1.$2} = 1; }
+}
+close(FD);
+
+print "branches: ", join(" ",keys(%branch)), "\n" if $opt{'t'};
+print "insns: ", join(" ",keys(%insn)), "\n" if $opt{'t'};
+
+
+foreach my $file (glob("*.asm"), glob("*/*.asm")) {
+    print "file $file\n" if $opt{'t'};
+
+    open(FD, "<$file") or die "Cannot open $file: $!";
+    while (<FD>) {
+       if (/^[ \t]*C/) { next; };
+       if (/^\t([a-z0-9]+)/) {
+           my $opcode = $1;
+           print "opcode $1\n" if $opt{'t'};
+
+           # instructions with an l, w or b suffix should have a definsn
+           # (unless they're already a defbranch)
+           if ($opcode =~ /[lwb]$/
+               && ! defined $insn{$opcode}
+               && ! defined $branch{$opcode})
+           {
+               print "$file: $.: missing m68k_definsn: $opcode\n";
+           }
+
+           # instructions bXX should have a defbranch (unless they're
+           # already a definsn)
+           if ($opcode =~ /^b/
+               && ! defined $insn{$opcode}
+               && ! defined $branch{$opcode})
+           {
+               print "$file: $.: missing m68k_defbranch: $opcode\n";
+           }
+       }
+    }
+    close(FD);
+}
diff --git a/mpn/m88k/README b/mpn/m88k/README

new file mode 100644 (file)

index 0000000..046e3bf
--- /dev/null
+++ b/mpn/m88k/README
@@ -0,0 +1,50 @@
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                      M88K MPN SUBROUTINES
+
+This directory contains mpn functions for various m88k family chips.
+
+CODE ORGANIZATION
+
+       m88k             m88000, m88100
+       m88k/mc88110     m88110
+
+STATUS
+
+The code herein is old and poorly maintained.
+
+* The .s files assume the system uses a "_" underscore prefix, which
+  should be controlled by configure.
+
+* The mc88110/*.S files are using the defunct "sysdep.h" configuration
+  scheme and won't compile.
+
+Conversion to the current m4 .asm style wouldn't be difficult.
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/m88k/add_n.s b/mpn/m88k/add_n.s

new file mode 100644 (file)

index 0000000..db2ffff
--- /dev/null
+++ b/mpn/m88k/add_n.s
@@ -0,0 +1,102 @@
+; mc88100 mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      r2
+; s1_ptr       r3
+; s2_ptr       r4
+; size         r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention.  As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+       text
+       align    16
+       global   ___gmpn_add_n
+___gmpn_add_n:
+       ld      r6,r3,0                 ; read first limb from s1_ptr
+       extu    r10,r5,3
+       ld      r7,r4,0                 ; read first limb from s2_ptr
+
+       subu.co r5,r0,r5                ; (clear carry as side effect)
+       mak     r5,r5,3<4>
+       bcnd    eq0,r5,Lzero
+
+       or      r12,r0,lo16(Lbase)
+       or.u    r12,r12,hi16(Lbase)
+       addu    r12,r12,r5              ; r12 is address for entering in loop
+
+       extu    r5,r5,2                 ; divide by 4
+       subu    r2,r2,r5                ; adjust res_ptr
+       subu    r3,r3,r5                ; adjust s1_ptr
+       subu    r4,r4,r5                ; adjust s2_ptr
+
+       or      r8,r6,r0
+
+       jmp.n   r12
+        or     r9,r7,r0
+
+Loop:  addu    r3,r3,32
+       st      r8,r2,28
+       addu    r4,r4,32
+       ld      r6,r3,0
+       addu    r2,r2,32
+       ld      r7,r4,0
+Lzero: subu    r10,r10,1               ; add 0 + 8r limbs (adj loop cnt)
+Lbase: ld      r8,r3,4
+       addu.cio r6,r6,r7
+       ld      r9,r4,4
+       st      r6,r2,0
+       ld      r6,r3,8                 ; add 7 + 8r limbs
+       addu.cio r8,r8,r9
+       ld      r7,r4,8
+       st      r8,r2,4
+       ld      r8,r3,12                ; add 6 + 8r limbs
+       addu.cio r6,r6,r7
+       ld      r9,r4,12
+       st      r6,r2,8
+       ld      r6,r3,16                ; add 5 + 8r limbs
+       addu.cio r8,r8,r9
+       ld      r7,r4,16
+       st      r8,r2,12
+       ld      r8,r3,20                ; add 4 + 8r limbs
+       addu.cio r6,r6,r7
+       ld      r9,r4,20
+       st      r6,r2,16
+       ld      r6,r3,24                ; add 3 + 8r limbs
+       addu.cio r8,r8,r9
+       ld      r7,r4,24
+       st      r8,r2,20
+       ld      r8,r3,28                ; add 2 + 8r limbs
+       addu.cio r6,r6,r7
+       ld      r9,r4,28
+       st      r6,r2,24
+       bcnd.n  ne0,r10,Loop            ; add 1 + 8r limbs
+        addu.cio r8,r8,r9
+
+       st      r8,r2,28                ; store most significant limb
+
+       jmp.n    r1
+        addu.ci r2,r0,r0               ; return carry-out from most sign. limb
diff --git a/mpn/m88k/mc88110/add_n.S b/mpn/m88k/mc88110/add_n.S

new file mode 100644 (file)

index 0000000..3b627c0
--- /dev/null
+++ b/mpn/m88k/mc88110/add_n.S
@@ -0,0 +1,198 @@
+; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+#define res_ptr        r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size   r5
+
+#include "sysdep.h"
+
+       text
+       align   16
+       global  C_SYMBOL_NAME(__gmpn_add_n)
+C_SYMBOL_NAME(__gmpn_add_n):
+       addu.co  r0,r0,r0               ; clear cy flag
+       xor      r12,s2_ptr,res_ptr
+       bb1      2,r12,L1
+; **  V1a  **
+L0:    bb0      2,res_ptr,L_v1         ; branch if res_ptr is aligned?
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       addu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+L_v1:  cmp      r12,size,2
+       bb1      lt,r12,Lend2
+
+       ld       r10,s1_ptr,0
+       ld       r12,s1_ptr,4
+       ld.d     r8,s2_ptr,0
+       subu     size,size,10
+       bcnd     lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop1: subu     size,size,8
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,16
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,20
+       ld.d     r8,s2_ptr,16
+       st.d     r6,res_ptr,8
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,24
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,28
+       ld.d     r8,s2_ptr,24
+       st.d     r6,res_ptr,16
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,32
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,36
+       addu     s1_ptr,s1_ptr,32
+       ld.d     r8,s2_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       st.d     r6,res_ptr,24
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop1
+
+Lfin1: addu     size,size,8-2
+       bcnd     lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:        addu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope1
+Lend1: addu.cio r6,r10,r8
+       addu.cio r7,r12,r9
+       st.d     r6,res_ptr,0
+
+       bb0      0,size,Lret1
+/* Add last limb */
+       ld       r10,s1_ptr,8
+       ld       r8,s2_ptr,8
+       addu.cio r6,r10,r8
+       st       r6,res_ptr,8
+
+Lret1: jmp.n    r1
+       addu.ci  r2,r0,r0               ; return carry-out from most sign. limb
+
+L1:    xor      r12,s1_ptr,res_ptr
+       bb1      2,r12,L2
+; **  V1b  **
+       or       r12,r0,s2_ptr
+       or       s2_ptr,r0,s1_ptr
+       or       s1_ptr,r0,r12
+       br       L0
+
+; **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:    cmp      r12,size,1
+       bb1      eq,r12,Ljone
+       bb0      2,s1_ptr,L_v2          ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       addu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+
+L_v2:  subu     size,size,8
+       bcnd     lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop2: subu     size,size,8
+       ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       ld.d     r8,s1_ptr,8
+       ld.d     r6,s2_ptr,8
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,8
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,12
+       ld.d     r8,s1_ptr,16
+       ld.d     r6,s2_ptr,16
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,16
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,20
+       ld.d     r8,s1_ptr,24
+       ld.d     r6,s2_ptr,24
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,24
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,28
+       addu     s1_ptr,s1_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop2
+
+Lfin2: addu     size,size,8-2
+       bcnd     lt0,size,Lend2
+Loope2:        ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope2
+Lend2: bb0      0,size,Lret2
+/* Add last limb */
+Ljone: ld       r10,s1_ptr,0
+       ld       r8,s2_ptr,0
+       addu.cio r6,r10,r8
+       st       r6,res_ptr,0
+
+Lret2: jmp.n    r1
+       addu.ci  r2,r0,r0               ; return carry-out from most sign. limb
diff --git a/mpn/m88k/mc88110/addmul_1.s b/mpn/m88k/mc88110/addmul_1.s

new file mode 100644 (file)

index 0000000..f412833
--- /dev/null
+++ b/mpn/m88k/mc88110/addmul_1.s
@@ -0,0 +1,59 @@
+; mc88110 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      r2
+; s1_ptr       r3
+; size         r4
+; s2_limb      r5
+
+       text
+       align   16
+       global  ___gmpn_addmul_1
+___gmpn_addmul_1:
+       lda      r3,r3[r4]
+       lda      r8,r2[r4]              ; RES_PTR in r8 since r2 is retval
+       subu     r4,r0,r4
+       addu.co  r2,r0,r0               ; r2 = cy = 0
+
+       ld       r6,r3[r4]
+       addu     r4,r4,1
+       subu     r8,r8,4
+       bcnd.n   eq0,r4,Lend
+        mulu.d  r10,r6,r5
+
+Loop:  ld       r7,r8[r4]
+       ld       r6,r3[r4]
+       addu.cio r9,r11,r2
+       addu.ci  r2,r10,r0
+       addu.co  r9,r9,r7
+       st       r9,r8[r4]
+       addu     r4,r4,1
+       mulu.d   r10,r6,r5
+       bcnd     ne0,r4,Loop
+
+Lend:  ld       r7,r8,0
+       addu.cio r9,r11,r2
+       addu.ci  r2,r10,r0
+       addu.co  r9,r9,r7
+       st       r9,r8,0
+       jmp.n    r1
+        addu.ci r2,r2,r0
diff --git a/mpn/m88k/mc88110/mul_1.s b/mpn/m88k/mc88110/mul_1.s

new file mode 100644 (file)

index 0000000..e8e8879
--- /dev/null
+++ b/mpn/m88k/mc88110/mul_1.s
@@ -0,0 +1,57 @@
+; mc88110 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      r2
+; s1_ptr       r3
+; size         r4
+; s2_limb      r5
+
+       text
+       align   16
+       global  ___gmpn_mul_1
+___gmpn_mul_1:
+       ; Make S1_PTR and RES_PTR point at the end of their blocks
+       ; and negate SIZE.
+       lda      r3,r3[r4]
+       lda      r8,r2[r4]              ; RES_PTR in r8 since r2 is retval
+       subu     r4,r0,r4
+
+       addu.co  r2,r0,r0               ; r2 = cy = 0
+
+       ld       r6,r3[r4]
+       addu     r4,r4,1
+       mulu.d   r10,r6,r5
+       bcnd.n   eq0,r4,Lend
+        subu    r8,r8,8
+
+Loop:  ld       r6,r3[r4]
+       addu.cio r9,r11,r2
+       or       r2,r10,r0              ; could be avoided if unrolled
+       addu     r4,r4,1
+       mulu.d   r10,r6,r5
+       bcnd.n   ne0,r4,Loop
+        st      r9,r8[r4]
+
+Lend:  addu.cio r9,r11,r2
+       st       r9,r8,4
+       jmp.n    r1
+        addu.ci r2,r10,r0
diff --git a/mpn/m88k/mc88110/sub_n.S b/mpn/m88k/mc88110/sub_n.S

new file mode 100644 (file)

index 0000000..a21a2cc
--- /dev/null
+++ b/mpn/m88k/mc88110/sub_n.S
@@ -0,0 +1,274 @@
+; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+#define res_ptr        r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size   r5
+
+#include "sysdep.h"
+
+       text
+       align   16
+       global  C_SYMBOL_NAME(__gmpn_sub_n)
+C_SYMBOL_NAME(__gmpn_sub_n):
+       subu.co  r0,r0,r0               ; set cy flag
+       xor      r12,s2_ptr,res_ptr
+       bb1      2,r12,L1
+; **  V1a  **
+L0:    bb0      2,res_ptr,L_v1         ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       subu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+L_v1:  cmp      r12,size,2
+       bb1      lt,r12,Lend2
+
+       ld       r10,s1_ptr,0
+       ld       r12,s1_ptr,4
+       ld.d     r8,s2_ptr,0
+       subu     size,size,10
+       bcnd     lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop1: subu     size,size,8
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,16
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,20
+       ld.d     r8,s2_ptr,16
+       st.d     r6,res_ptr,8
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,24
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,28
+       ld.d     r8,s2_ptr,24
+       st.d     r6,res_ptr,16
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,32
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,36
+       addu     s1_ptr,s1_ptr,32
+       ld.d     r8,s2_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       st.d     r6,res_ptr,24
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop1
+
+Lfin1: addu     size,size,8-2
+       bcnd     lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:        subu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope1
+Lend1: subu.cio r6,r10,r8
+       subu.cio r7,r12,r9
+       st.d     r6,res_ptr,0
+
+       bb0      0,size,Lret1
+/* Add last limb */
+       ld       r10,s1_ptr,8
+       ld       r8,s2_ptr,8
+       subu.cio r6,r10,r8
+       st       r6,res_ptr,8
+
+Lret1: addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
+
+L1:    xor      r12,s1_ptr,res_ptr
+       bb1      2,r12,L2
+; **  V1b  **
+       bb0      2,res_ptr,L_v1b        ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+       ld       r10,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       ld       r8,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       subu     size,size,1
+       subu.co  r6,r8,r10
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+L_v1b: cmp      r12,size,2
+       bb1      lt,r12,Lend2
+
+       ld       r10,s2_ptr,0
+       ld       r12,s2_ptr,4
+       ld.d     r8,s1_ptr,0
+       subu     size,size,10
+       bcnd     lt0,size,Lfin1b
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop1b:        subu     size,size,8
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,8
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,12
+       ld.d     r8,s1_ptr,8
+       st.d     r6,res_ptr,0
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,16
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,20
+       ld.d     r8,s1_ptr,16
+       st.d     r6,res_ptr,8
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,24
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,28
+       ld.d     r8,s1_ptr,24
+       st.d     r6,res_ptr,16
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,32
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,36
+       addu     s2_ptr,s2_ptr,32
+       ld.d     r8,s1_ptr,32
+       addu     s1_ptr,s1_ptr,32
+       st.d     r6,res_ptr,24
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop1b
+
+Lfin1b:        addu     size,size,8-2
+       bcnd     lt0,size,Lend1b
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subu.cio r6,r8,r10
+       ld       r10,s2_ptr,8
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,12
+       ld.d     r8,s1_ptr,8
+       st.d     r6,res_ptr,0
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope1b
+Lend1b:        subu.cio r6,r8,r10
+       subu.cio r7,r9,r12
+       st.d     r6,res_ptr,0
+
+       bb0      0,size,Lret1b
+/* Add last limb */
+       ld       r10,s2_ptr,8
+       ld       r8,s1_ptr,8
+       subu.cio r6,r8,r10
+       st       r6,res_ptr,8
+
+Lret1b:        addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
+
+; **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:    cmp      r12,size,1
+       bb1      eq,r12,Ljone
+       bb0      2,s1_ptr,L_v2          ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       subu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+
+L_v2:  subu     size,size,8
+       bcnd     lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop2: subu     size,size,8
+       ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       ld.d     r8,s1_ptr,8
+       ld.d     r6,s2_ptr,8
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,8
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,12
+       ld.d     r8,s1_ptr,16
+       ld.d     r6,s2_ptr,16
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,16
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,20
+       ld.d     r8,s1_ptr,24
+       ld.d     r6,s2_ptr,24
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,24
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,28
+       addu     s1_ptr,s1_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop2
+
+Lfin2: addu     size,size,8-2
+       bcnd     lt0,size,Lend2
+Loope2:        ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope2
+Lend2: bb0      0,size,Lret2
+/* Add last limb */
+Ljone: ld       r10,s1_ptr,0
+       ld       r8,s2_ptr,0
+       subu.cio r6,r10,r8
+       st       r6,res_ptr,0
+
+Lret2: addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
diff --git a/mpn/m88k/mul_1.s b/mpn/m88k/mul_1.s

new file mode 100644 (file)

index 0000000..5c385bd
--- /dev/null
+++ b/mpn/m88k/mul_1.s
@@ -0,0 +1,125 @@
+; mc88100 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      r2
+; s1_ptr       r3
+; size         r4
+; s2_limb      r5
+
+; Common overhead is about 11 cycles/invocation.
+
+; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb.  (The
+; pipeline stalls 2 cycles due to WB contention.)
+
+; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb.  (The
+; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
+
+; To enhance speed:
+; 1. Unroll main loop 4-8 times.
+; 2. Schedule code to avoid WB contention.  It might be tempting to move the
+;    ld instruction in the loops down to save 2 cycles (less WB contention),
+;    but that looses because the ultimate value will be read from outside
+;    the allocated space.  But if we handle the ultimate multiplication in
+;    the tail, we can do this.
+; 3. Make the multiplication with less instructions.  I think the code for
+;    (S2_LIMB >= 0x10000) is not minimal.
+; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
+; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
+; cycles/limb.  (Assuming infinite unrolling.)
+
+       text
+       align    16
+       global   ___gmpn_mul_1
+___gmpn_mul_1:
+
+       ; Make S1_PTR and RES_PTR point at the end of their blocks
+       ; and negate SIZE.
+       lda      r3,r3[r4]
+       lda      r6,r2[r4]      ; RES_PTR in r6 since r2 is retval
+       subu     r4,r0,r4
+
+       addu.co  r2,r0,r0       ; r2 = cy = 0
+       ld       r9,r3[r4]
+       mask     r7,r5,0xffff   ; r7 = lo(S2_LIMB)
+       extu     r8,r5,16       ; r8 = hi(S2_LIMB)
+       bcnd.n   eq0,r8,Lsmall  ; jump if (hi(S2_LIMB) == 0)
+        subu    r6,r6,4
+
+; General code for any value of S2_LIMB.
+
+       ; Make a stack frame and save r25 and r26
+       subu     r31,r31,16
+       st.d     r25,r31,8
+
+       ; Enter the loop in the middle
+       br.n    L1
+       addu     r4,r4,1
+
+Loop:  ld       r9,r3[r4]
+       st       r26,r6[r4]
+; bcnd ne0,r0,0                ; bubble
+       addu     r4,r4,1
+L1:    mul      r26,r9,r5      ; low word of product   mul_1   WB ld
+       mask     r12,r9,0xffff  ; r12 = lo(s1_limb)     mask_1
+       mul      r11,r12,r7     ; r11 =  prod_0         mul_2   WB mask_1
+       mul      r10,r12,r8     ; r10 = prod_1a         mul_3
+       extu     r13,r9,16      ; r13 = hi(s1_limb)     extu_1  WB mul_1
+       mul      r12,r13,r7     ; r12 = prod_1b         mul_4   WB extu_1
+       mul      r25,r13,r8     ; r25  = prod_2         mul_5   WB mul_2
+       extu     r11,r11,16     ; r11 = hi(prod_0)      extu_2  WB mul_3
+       addu     r10,r10,r11    ;                       addu_1  WB extu_2
+; bcnd ne0,r0,0                ; bubble                        WB addu_1
+       addu.co  r10,r10,r12    ;                               WB mul_4
+       mask.u   r10,r10,0xffff ; move the 16 most significant bits...
+       addu.ci  r10,r10,r0     ; ...to the low half of the word...
+       rot      r10,r10,16     ; ...and put carry in pos 16.
+       addu.co  r26,r26,r2     ; add old carry limb
+       bcnd.n   ne0,r4,Loop
+        addu.ci r2,r25,r10     ; compute new carry limb
+
+       st       r26,r6[r4]
+       ld.d     r25,r31,8
+       jmp.n    r1
+        addu    r31,r31,16
+
+; Fast code for S2_LIMB < 0x10000
+Lsmall:
+       ; Enter the loop in the middle
+       br.n    SL1
+       addu     r4,r4,1
+
+SLoop: ld       r9,r3[r4]      ;
+       st       r8,r6[r4]      ;
+       addu     r4,r4,1        ;
+SL1:   mul      r8,r9,r5       ; low word of product
+       mask     r12,r9,0xffff  ; r12 = lo(s1_limb)
+       extu     r13,r9,16      ; r13 = hi(s1_limb)
+       mul      r11,r12,r7     ; r11 =  prod_0
+       mul      r12,r13,r7     ; r12 = prod_1b
+       addu.cio r8,r8,r2       ; add old carry limb
+       extu     r10,r11,16     ; r11 = hi(prod_0)
+       addu     r10,r10,r12    ;
+       bcnd.n   ne0,r4,SLoop
+       extu     r2,r10,16      ; r2 = new carry limb
+
+       jmp.n    r1
+       st       r8,r6[r4]
diff --git a/mpn/m88k/sub_n.s b/mpn/m88k/sub_n.s

new file mode 100644 (file)

index 0000000..9ea78ff
--- /dev/null
+++ b/mpn/m88k/sub_n.s
@@ -0,0 +1,104 @@
+; mc88100 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr      r2
+; s1_ptr       r3
+; s2_ptr       r4
+; size         r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention.  As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+       text
+       align    16
+       global   ___gmpn_sub_n
+___gmpn_sub_n:
+       ld      r6,r3,0                 ; read first limb from s1_ptr
+       extu    r10,r5,3
+       ld      r7,r4,0                 ; read first limb from s2_ptr
+
+       subu    r5,r0,r5
+       mak     r5,r5,3<4>
+       bcnd.n  eq0,r5,Lzero
+       subu.co r0,r0,r0                ; initialize carry
+
+       or      r12,r0,lo16(Lbase)
+       or.u    r12,r12,hi16(Lbase)
+       addu    r12,r12,r5              ; r12 is address for entering in loop
+
+       extu    r5,r5,2                 ; divide by 4
+       subu    r2,r2,r5                ; adjust res_ptr
+       subu    r3,r3,r5                ; adjust s1_ptr
+       subu    r4,r4,r5                ; adjust s2_ptr
+
+       or      r8,r6,r0
+
+       jmp.n   r12
+        or     r9,r7,r0
+
+Loop:  addu    r3,r3,32
+       st      r8,r2,28
+       addu    r4,r4,32
+       ld      r6,r3,0
+       addu    r2,r2,32
+       ld      r7,r4,0
+Lzero: subu    r10,r10,1               ; subtract 0 + 8r limbs (adj loop cnt)
+Lbase: ld      r8,r3,4
+       subu.cio r6,r6,r7
+       ld      r9,r4,4
+       st      r6,r2,0
+       ld      r6,r3,8                 ; subtract 7 + 8r limbs
+       subu.cio r8,r8,r9
+       ld      r7,r4,8
+       st      r8,r2,4
+       ld      r8,r3,12                ; subtract 6 + 8r limbs
+       subu.cio r6,r6,r7
+       ld      r9,r4,12
+       st      r6,r2,8
+       ld      r6,r3,16                ; subtract 5 + 8r limbs
+       subu.cio r8,r8,r9
+       ld      r7,r4,16
+       st      r8,r2,12
+       ld      r8,r3,20                ; subtract 4 + 8r limbs
+       subu.cio r6,r6,r7
+       ld      r9,r4,20
+       st      r6,r2,16
+       ld      r6,r3,24                ; subtract 3 + 8r limbs
+       subu.cio r8,r8,r9
+       ld      r7,r4,24
+       st      r8,r2,20
+       ld      r8,r3,28                ; subtract 2 + 8r limbs
+       subu.cio r6,r6,r7
+       ld      r9,r4,28
+       st      r6,r2,24
+       bcnd.n  ne0,r10,Loop            ; subtract 1 + 8r limbs
+        subu.cio r8,r8,r9
+
+       st      r8,r2,28                ; store most significant limb
+
+       addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
diff --git a/mpn/minithres/gmp-mparam.h b/mpn/minithres/gmp-mparam.h

new file mode 100644 (file)

index 0000000..47f0f6c
--- /dev/null
+++ b/mpn/minithres/gmp-mparam.h
@@ -0,0 +1,95 @@
+/* Minimal values gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* The values in this file are not currently minimal.
+   Trimming them further would be good.  */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1_1_THRESHOLD                    2
+#define MOD_1_2_THRESHOLD                    3
+#define MOD_1_4_THRESHOLD                    4
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define USE_PREINV_MOD_1                     1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD             0  /* always (native) */
+
+#define MUL_TOOM22_THRESHOLD                 8
+#define MUL_TOOM33_THRESHOLD                20
+#define MUL_TOOM44_THRESHOLD                24
+#define MUL_TOOM6H_THRESHOLD               200 /* FIXME */
+#define MUL_TOOM8H_THRESHOLD                86
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      50 /* FIXME */
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      50 /* FIXME */
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      50 /* FIXME */
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      50 /* FIXME */
+
+#define SQR_BASECASE_THRESHOLD               0
+#define SQR_TOOM2_THRESHOLD                  8
+#define SQR_TOOM3_THRESHOLD                 20
+#define SQR_TOOM4_THRESHOLD                 24
+#define SQR_TOOM6H_THRESHOLD               200 /* FIXME */
+#define SQR_TOOM8H_THRESHOLD                86
+
+#define MULMOD_BNM1_THRESHOLD            10
+#define SQRMOD_BNM1_THRESHOLD            10
+
+#define MUL_FFT_TABLE  {64, 256, 1024, 4096, 8192, 65536, 0}
+#define MUL_FFT_MODF_THRESHOLD  65
+#define MUL_FFT_THRESHOLD      200
+
+#define SQR_FFT_TABLE  {64, 256, 1024, 4096, 8192, 65536, 0}
+#define SQR_FFT_MODF_THRESHOLD  65
+#define SQR_FFT_THRESHOLD      200
+
+#define MULLO_BASECASE_THRESHOLD             0
+#define MULLO_DC_THRESHOLD                   2
+#define MULLO_MUL_N_THRESHOLD                4
+
+#define DC_DIV_QR_THRESHOLD                  6
+#define DC_DIVAPPR_Q_THRESHOLD               6
+#define DC_BDIV_QR_THRESHOLD                 4
+#define DC_BDIV_Q_THRESHOLD                  4
+
+#define INV_MULMOD_BNM1_THRESHOLD            2
+#define INV_NEWTON_THRESHOLD                 6
+#define INV_APPR_THRESHOLD                   4
+
+#define BINV_NEWTON_THRESHOLD                6
+#define REDC_1_TO_REDC_N_THRESHOLD           4
+
+#define MU_DIV_QR_THRESHOLD                  8
+#define MU_DIVAPPR_Q_THRESHOLD               8
+#define MUPI_DIV_QR_THRESHOLD                8
+#define MU_BDIV_QR_THRESHOLD                 8
+#define MU_BDIV_Q_THRESHOLD                  8
+
+#define MATRIX22_STRASSEN_THRESHOLD          2
+#define HGCD_THRESHOLD                      10
+#define GCD_DC_THRESHOLD                    20
+#define GCDEXT_SCHOENHAGE_THRESHOLD         20
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                 4
+#define GET_STR_PRECOMPUTE_THRESHOLD        10
+#define SET_STR_THRESHOLD                   64
+#define SET_STR_PRECOMPUTE_THRESHOLD       100
diff --git a/mpn/mips32/add_n.asm b/mpn/mips32/add_n.asm

new file mode 100644 (file)

index 0000000..f7dc7ef
--- /dev/null
+++ b/mpn/mips32/add_n.asm
@@ -0,0 +1,113 @@
+dnl  MIPS32 mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C s2_ptr       $6
+C size         $7
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+
+       lw      $10,0($5)
+       lw      $11,0($6)
+
+       addiu   $7,$7,-1
+       and     $9,$7,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        move   $2,$0
+
+       subu    $7,$7,$9
+
+.Loop0:        addiu   $9,$9,-1
+       lw      $12,4($5)
+       addu    $11,$11,$2
+       lw      $13,4($6)
+       sltu    $8,$11,$2
+       addu    $11,$10,$11
+       sltu    $2,$11,$10
+       sw      $11,0($4)
+       or      $2,$2,$8
+
+       addiu   $5,$5,4
+       addiu   $6,$6,4
+       move    $10,$12
+       move    $11,$13
+       bne     $9,$0,.Loop0
+        addiu  $4,$4,4
+
+.L0:   beq     $7,$0,.Lend
+        nop
+
+.Loop: addiu   $7,$7,-4
+
+       lw      $12,4($5)
+       addu    $11,$11,$2
+       lw      $13,4($6)
+       sltu    $8,$11,$2
+       addu    $11,$10,$11
+       sltu    $2,$11,$10
+       sw      $11,0($4)
+       or      $2,$2,$8
+
+       lw      $10,8($5)
+       addu    $13,$13,$2
+       lw      $11,8($6)
+       sltu    $8,$13,$2
+       addu    $13,$12,$13
+       sltu    $2,$13,$12
+       sw      $13,4($4)
+       or      $2,$2,$8
+
+       lw      $12,12($5)
+       addu    $11,$11,$2
+       lw      $13,12($6)
+       sltu    $8,$11,$2
+       addu    $11,$10,$11
+       sltu    $2,$11,$10
+       sw      $11,8($4)
+       or      $2,$2,$8
+
+       lw      $10,16($5)
+       addu    $13,$13,$2
+       lw      $11,16($6)
+       sltu    $8,$13,$2
+       addu    $13,$12,$13
+       sltu    $2,$13,$12
+       sw      $13,12($4)
+       or      $2,$2,$8
+
+       addiu   $5,$5,16
+       addiu   $6,$6,16
+
+       bne     $7,$0,.Loop
+        addiu  $4,$4,16
+
+.Lend: addu    $11,$11,$2
+       sltu    $8,$11,$2
+       addu    $11,$10,$11
+       sltu    $2,$11,$10
+       sw      $11,0($4)
+       j       $31
+       or      $2,$2,$8
+EPILOGUE(mpn_add_n)
diff --git a/mpn/mips32/addmul_1.asm b/mpn/mips32/addmul_1.asm

new file mode 100644 (file)

index 0000000..f43e3c6
--- /dev/null
+++ b/mpn/mips32/addmul_1.asm
@@ -0,0 +1,90 @@
+dnl  MIPS32 mpn_addmul_1 -- Multiply a limb vector with a single limb and add
+dnl  the product to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C size         $6
+C s2_limb      $7
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+
+C feed-in phase 0
+       lw      $8,0($5)
+
+C feed-in phase 1
+       addiu   $5,$5,4
+       multu   $8,$7
+
+       addiu   $6,$6,-1
+       beq     $6,$0,$LC0
+        move   $2,$0           C zero cy2
+
+       addiu   $6,$6,-1
+       beq     $6,$0,$LC1
+       lw      $8,0($5)        C load new s1 limb as early as possible
+
+Loop:  lw      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       addiu   $5,$5,4
+       addu    $3,$3,$2        C add old carry limb to low product limb
+       multu   $8,$7
+       lw      $8,0($5)        C load new s1 limb as early as possible
+       addiu   $6,$6,-1        C decrement loop counter
+       sltu    $2,$3,$2        C carry from previous addition -> $2
+       addu    $3,$10,$3
+       sltu    $10,$3,$10
+       addu    $2,$2,$10
+       sw      $3,0($4)
+       addiu   $4,$4,4
+       bne     $6,$0,Loop
+        addu   $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 1
+$LC1:  lw      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       addu    $3,$3,$2
+       sltu    $2,$3,$2
+       multu   $8,$7
+       addu    $3,$10,$3
+       sltu    $10,$3,$10
+       addu    $2,$2,$10
+       sw      $3,0($4)
+       addiu   $4,$4,4
+       addu    $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 0
+$LC0:  lw      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       addu    $3,$3,$2
+       sltu    $2,$3,$2
+       addu    $3,$10,$3
+       sltu    $10,$3,$10
+       addu    $2,$2,$10
+       sw      $3,0($4)
+       j       $31
+       addu    $2,$9,$2        C add high product limb and carry from addition
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/mips32/gmp-mparam.h b/mpn/mips32/gmp-mparam.h

new file mode 100644 (file)

index 0000000..a8f2732
--- /dev/null
+++ b/mpn/mips32/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* Generated by tuneup.c, 2002-02-20, gcc 2.95 (R3000) */
+
+#define MUL_TOOM22_THRESHOLD             20
+#define MUL_TOOM33_THRESHOLD             50
+
+#define SQR_BASECASE_THRESHOLD            7
+#define SQR_TOOM2_THRESHOLD              57
+#define SQR_TOOM3_THRESHOLD              78
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 57
+#define POWM_THRESHOLD                   78
+
+#define GCD_ACCEL_THRESHOLD               3
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             19
+#define GET_STR_PRECOMPUTE_THRESHOLD     25
+#define SET_STR_THRESHOLD               309
+
+#define MUL_FFT_TABLE  { 496, 1056, 2176, 5632, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          624
+#define MUL_FFT_THRESHOLD              5888
+
+#define SQR_FFT_TABLE  { 496, 1184, 2176, 5632, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          560
+#define SQR_FFT_THRESHOLD              5376
diff --git a/mpn/mips32/lshift.asm b/mpn/mips32/lshift.asm

new file mode 100644 (file)

index 0000000..8a27951
--- /dev/null
+++ b/mpn/mips32/lshift.asm
@@ -0,0 +1,88 @@
+dnl  MIPS32 mpn_lshift -- Left shift.
+
+dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C src_ptr      $5
+C size         $6
+C cnt          $7
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       sll     $2,$6,2
+       addu    $5,$5,$2        C make r5 point at end of src
+       lw      $10,-4($5)      C load first limb
+       subu    $13,$0,$7
+       addu    $4,$4,$2        C make r4 point at end of res
+       addiu   $6,$6,-1
+       and     $9,$6,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        srl    $2,$10,$13      C compute function result
+
+       subu    $6,$6,$9
+
+.Loop0:        lw      $3,-8($5)
+       addiu   $4,$4,-4
+       addiu   $5,$5,-4
+       addiu   $9,$9,-1
+       sll     $11,$10,$7
+       srl     $12,$3,$13
+       move    $10,$3
+       or      $8,$11,$12
+       bne     $9,$0,.Loop0
+        sw     $8,0($4)
+
+.L0:   beq     $6,$0,.Lend
+        nop
+
+.Loop: lw      $3,-8($5)
+       addiu   $4,$4,-16
+       addiu   $6,$6,-4
+       sll     $11,$10,$7
+       srl     $12,$3,$13
+
+       lw      $10,-12($5)
+       sll     $14,$3,$7
+       or      $8,$11,$12
+       sw      $8,12($4)
+       srl     $9,$10,$13
+
+       lw      $3,-16($5)
+       sll     $11,$10,$7
+       or      $8,$14,$9
+       sw      $8,8($4)
+       srl     $12,$3,$13
+
+       lw      $10,-20($5)
+       sll     $14,$3,$7
+       or      $8,$11,$12
+       sw      $8,4($4)
+       srl     $9,$10,$13
+
+       addiu   $5,$5,-16
+       or      $8,$14,$9
+       bgtz    $6,.Loop
+        sw     $8,0($4)
+
+.Lend: sll     $8,$10,$7
+       j       $31
+       sw      $8,-4($4)
+EPILOGUE(mpn_lshift)
diff --git a/mpn/mips32/mips-defs.m4 b/mpn/mips32/mips-defs.m4

new file mode 100644 (file)

index 0000000..a30e8df
--- /dev/null
+++ b/mpn/mips32/mips-defs.m4
@@ -0,0 +1,69 @@
+divert(-1)
+
+dnl  m4 macros for MIPS assembly code (both 32-bit and 64-bit).
+
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Usage: ASM_START()
+define(`ASM_START',
+m4_assert_numargs(0)
+`      .set noreorder
+       .set nomacro')
+
+dnl  Usage: X(value)
+define(`X',
+m4_assert_numargs(1)
+`0x$1')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      .text
+       .align  4
+       .globl  $1
+       .ent    $1
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .end    $1')
+
+
+dnl  Usage: r0 ... r31
+dnl         f0 ... f31
+dnl
+dnl  Map register names r0 to $0, and f0 to $f0, etc.
+dnl
+dnl  defreg() is used to protect the $ in $0 (otherwise it would represent a
+dnl  macro argument).  Double quoting is used to protect the f0 in $f0
+dnl  (otherwise it would be an infinite recursion).
+
+forloop(i,0,31,`defreg(`r'i,$i)')
+forloop(i,0,31,`deflit(`f'i,``$f''i)')
+
+
+dnl  Usage: ASM_END()
+define(`ASM_END',
+m4_assert_numargs(0)
+)
+
+divert
diff --git a/mpn/mips32/mips.m4 b/mpn/mips32/mips.m4

new file mode 100644 (file)

index 0000000..37c6ca8
--- /dev/null
+++ b/mpn/mips32/mips.m4
@@ -0,0 +1,69 @@
+divert(-1)
+
+dnl  m4 macros for MIPS assembly code.
+
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Usage: ASM_START()
+define(`ASM_START',
+m4_assert_numargs(0)
+`      .set noreorder
+       .set nomacro')
+
+dnl  Usage: X(value)
+define(`X',
+m4_assert_numargs(1)
+`0x$1')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      .text
+       .align  4
+       .globl  $1
+       .ent    $1
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .end    $1')
+
+
+dnl  Usage: r0 ... r31
+dnl         f0 ... f31
+dnl
+dnl  Map register names r0 to $0, and f0 to $f0, etc.
+dnl
+dnl  defreg() is used to protect the $ in $0 (otherwise it would represent a
+dnl  macro argument).  Double quoting is used to protect the f0 in $f0
+dnl  (otherwise it would be an infinite recursion).
+
+forloop(i,0,31,`defreg(`r'i,$i)')
+forloop(i,0,31,`deflit(`f'i,``$f''i)')
+
+
+dnl  Usage: ASM_END()
+define(`ASM_END',
+m4_assert_numargs(0)
+)
+
+divert
diff --git a/mpn/mips32/mul_1.asm b/mpn/mips32/mul_1.asm

new file mode 100644 (file)

index 0000000..1e1a275
--- /dev/null
+++ b/mpn/mips32/mul_1.asm
@@ -0,0 +1,78 @@
+dnl  MIPS32 mpn_mul_1 -- Multiply a limb vector with a single limb and store
+dnl  the product in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C size         $6
+C s2_limb      $7
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+
+C feed-in phase 0
+       lw      $8,0($5)
+
+C feed-in phase 1
+       addiu   $5,$5,4
+       multu   $8,$7
+
+       addiu   $6,$6,-1
+       beq     $6,$0,$LC0
+        move   $2,$0           C zero cy2
+
+       addiu   $6,$6,-1
+       beq     $6,$0,$LC1
+       lw      $8,0($5)        C load new s1 limb as early as possible
+
+Loop:  mflo    $10
+       mfhi    $9
+       addiu   $5,$5,4
+       addu    $10,$10,$2      C add old carry limb to low product limb
+       multu   $8,$7
+       lw      $8,0($5)        C load new s1 limb as early as possible
+       addiu   $6,$6,-1        C decrement loop counter
+       sltu    $2,$10,$2       C carry from previous addition -> $2
+       sw      $10,0($4)
+       addiu   $4,$4,4
+       bne     $6,$0,Loop
+        addu   $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 1
+$LC1:  mflo    $10
+       mfhi    $9
+       addu    $10,$10,$2
+       sltu    $2,$10,$2
+       multu   $8,$7
+       sw      $10,0($4)
+       addiu   $4,$4,4
+       addu    $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 0
+$LC0:  mflo    $10
+       mfhi    $9
+       addu    $10,$10,$2
+       sltu    $2,$10,$2
+       sw      $10,0($4)
+       j       $31
+       addu    $2,$9,$2        C add high product limb and carry from addition
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/mips32/rshift.asm b/mpn/mips32/rshift.asm

new file mode 100644 (file)

index 0000000..23d1e78
--- /dev/null
+++ b/mpn/mips32/rshift.asm
@@ -0,0 +1,85 @@
+dnl  MIPS32 mpn_rshift -- Right shift.
+
+dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C src_ptr      $5
+C size         $6
+C cnt          $7
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       lw      $10,0($5)       C load first limb
+       subu    $13,$0,$7
+       addiu   $6,$6,-1
+       and     $9,$6,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        sll    $2,$10,$13      C compute function result
+
+       subu    $6,$6,$9
+
+.Loop0:        lw      $3,4($5)
+       addiu   $4,$4,4
+       addiu   $5,$5,4
+       addiu   $9,$9,-1
+       srl     $11,$10,$7
+       sll     $12,$3,$13
+       move    $10,$3
+       or      $8,$11,$12
+       bne     $9,$0,.Loop0
+        sw     $8,-4($4)
+
+.L0:   beq     $6,$0,.Lend
+        nop
+
+.Loop: lw      $3,4($5)
+       addiu   $4,$4,16
+       addiu   $6,$6,-4
+       srl     $11,$10,$7
+       sll     $12,$3,$13
+
+       lw      $10,8($5)
+       srl     $14,$3,$7
+       or      $8,$11,$12
+       sw      $8,-16($4)
+       sll     $9,$10,$13
+
+       lw      $3,12($5)
+       srl     $11,$10,$7
+       or      $8,$14,$9
+       sw      $8,-12($4)
+       sll     $12,$3,$13
+
+       lw      $10,16($5)
+       srl     $14,$3,$7
+       or      $8,$11,$12
+       sw      $8,-8($4)
+       sll     $9,$10,$13
+
+       addiu   $5,$5,16
+       or      $8,$14,$9
+       bgtz    $6,.Loop
+        sw     $8,-4($4)
+
+.Lend: srl     $8,$10,$7
+       j       $31
+       sw      $8,0($4)
+EPILOGUE(mpn_rshift)
diff --git a/mpn/mips32/sub_n.asm b/mpn/mips32/sub_n.asm

new file mode 100644 (file)

index 0000000..ed41271
--- /dev/null
+++ b/mpn/mips32/sub_n.asm
@@ -0,0 +1,112 @@
+dnl  MIPS32 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C s2_ptr       $6
+C size         $7
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       lw      $10,0($5)
+       lw      $11,0($6)
+
+       addiu   $7,$7,-1
+       and     $9,$7,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        move   $2,$0
+
+       subu    $7,$7,$9
+
+.Loop0:        addiu   $9,$9,-1
+       lw      $12,4($5)
+       addu    $11,$11,$2
+       lw      $13,4($6)
+       sltu    $8,$11,$2
+       subu    $11,$10,$11
+       sltu    $2,$10,$11
+       sw      $11,0($4)
+       or      $2,$2,$8
+
+       addiu   $5,$5,4
+       addiu   $6,$6,4
+       move    $10,$12
+       move    $11,$13
+       bne     $9,$0,.Loop0
+        addiu  $4,$4,4
+
+.L0:   beq     $7,$0,.Lend
+        nop
+
+.Loop: addiu   $7,$7,-4
+
+       lw      $12,4($5)
+       addu    $11,$11,$2
+       lw      $13,4($6)
+       sltu    $8,$11,$2
+       subu    $11,$10,$11
+       sltu    $2,$10,$11
+       sw      $11,0($4)
+       or      $2,$2,$8
+
+       lw      $10,8($5)
+       addu    $13,$13,$2
+       lw      $11,8($6)
+       sltu    $8,$13,$2
+       subu    $13,$12,$13
+       sltu    $2,$12,$13
+       sw      $13,4($4)
+       or      $2,$2,$8
+
+       lw      $12,12($5)
+       addu    $11,$11,$2
+       lw      $13,12($6)
+       sltu    $8,$11,$2
+       subu    $11,$10,$11
+       sltu    $2,$10,$11
+       sw      $11,8($4)
+       or      $2,$2,$8
+
+       lw      $10,16($5)
+       addu    $13,$13,$2
+       lw      $11,16($6)
+       sltu    $8,$13,$2
+       subu    $13,$12,$13
+       sltu    $2,$12,$13
+       sw      $13,12($4)
+       or      $2,$2,$8
+
+       addiu   $5,$5,16
+       addiu   $6,$6,16
+
+       bne     $7,$0,.Loop
+        addiu  $4,$4,16
+
+.Lend: addu    $11,$11,$2
+       sltu    $8,$11,$2
+       subu    $11,$10,$11
+       sltu    $2,$10,$11
+       sw      $11,0($4)
+       j       $31
+       or      $2,$2,$8
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/mips32/submul_1.asm b/mpn/mips32/submul_1.asm

new file mode 100644 (file)

index 0000000..4e43654
--- /dev/null
+++ b/mpn/mips32/submul_1.asm
@@ -0,0 +1,90 @@
+dnl  MIPS32 mpn_submul_1 -- Multiply a limb vector with a single limb and
+dnl  subtract the product from a second limb vector.
+
+dnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C size         $6
+C s2_limb      $7
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+
+C feed-in phase 0
+       lw      $8,0($5)
+
+C feed-in phase 1
+       addiu   $5,$5,4
+       multu   $8,$7
+
+       addiu   $6,$6,-1
+       beq     $6,$0,$LC0
+        move   $2,$0           C zero cy2
+
+       addiu   $6,$6,-1
+       beq     $6,$0,$LC1
+       lw      $8,0($5)        C load new s1 limb as early as possible
+
+Loop:  lw      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       addiu   $5,$5,4
+       addu    $3,$3,$2        C add old carry limb to low product limb
+       multu   $8,$7
+       lw      $8,0($5)        C load new s1 limb as early as possible
+       addiu   $6,$6,-1        C decrement loop counter
+       sltu    $2,$3,$2        C carry from previous addition -> $2
+       subu    $3,$10,$3
+       sgtu    $10,$3,$10
+       addu    $2,$2,$10
+       sw      $3,0($4)
+       addiu   $4,$4,4
+       bne     $6,$0,Loop
+        addu   $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 1
+$LC1:  lw      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       addu    $3,$3,$2
+       sltu    $2,$3,$2
+       multu   $8,$7
+       subu    $3,$10,$3
+       sgtu    $10,$3,$10
+       addu    $2,$2,$10
+       sw      $3,0($4)
+       addiu   $4,$4,4
+       addu    $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 0
+$LC0:  lw      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       addu    $3,$3,$2
+       sltu    $2,$3,$2
+       subu    $3,$10,$3
+       sgtu    $10,$3,$10
+       addu    $2,$2,$10
+       sw      $3,0($4)
+       j       $31
+       addu    $2,$9,$2        C add high product limb and carry from addition
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/mips32/umul.asm b/mpn/mips32/umul.asm

new file mode 100644 (file)

index 0000000..04ecbe5
--- /dev/null
+++ b/mpn/mips32/umul.asm
@@ -0,0 +1,34 @@
+dnl  MIPS32 umul_ppmm -- longlong.h support.
+
+dnl  Copyright 1999, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C plp   $4
+C u     $5
+C v     $6
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+       multu   $5,$6
+       mflo    $3
+       mfhi    $2
+       j       $31
+       sw      $3,0($4)
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/mips64/README b/mpn/mips64/README

new file mode 100644 (file)

index 0000000..65a1af1
--- /dev/null
+++ b/mpn/mips64/README
@@ -0,0 +1,49 @@
+Copyright 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions optimized for MIPS3.  Example of
+processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. On the R4000 and R4400, branches, both the plain and the "likely" ones,
+   take 3 cycles to execute.  (The fastest possible loop will take 4 cycles,
+   because of the delay insn.)
+
+   On the R4600, branches takes a single cycle
+
+   On the R8000, branches often take no noticable cycles, as they are
+   executed in a separate function unit..
+
+2. The R4000 and R4400 have a load latency of 4 cycles.
+
+3. On the R4000 and R4400, multiplies take a data-dependent number of
+   cycles, contrary to the SGI documentation.  There seem to be 3 or 4
+   possible latencies.
+
+4. The R1x000 processors can issue one floating-point operation, two integer
+   operations, and one memory operation per cycle.  The FPU has very short
+   latencies, while the integer multiply unit is non-pipelined.  We should
+   therefore write fp based mpn_Xmul_1.
+
+STATUS
+
+Good...
diff --git a/mpn/mips64/add_n.asm b/mpn/mips64/add_n.asm

new file mode 100644 (file)

index 0000000..1a3978c
--- /dev/null
+++ b/mpn/mips64/add_n.asm
@@ -0,0 +1,112 @@
+dnl  MIPS64 mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C s2_ptr       $6
+C size         $7
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       ld      $10,0($5)
+       ld      $11,0($6)
+
+       daddiu  $7,$7,-1
+       and     $9,$7,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        move   $2,$0
+
+       dsubu   $7,$7,$9
+
+.Loop0:        daddiu  $9,$9,-1
+       ld      $12,8($5)
+       daddu   $11,$11,$2
+       ld      $13,8($6)
+       sltu    $8,$11,$2
+       daddu   $11,$10,$11
+       sltu    $2,$11,$10
+       sd      $11,0($4)
+       or      $2,$2,$8
+
+       daddiu  $5,$5,8
+       daddiu  $6,$6,8
+       move    $10,$12
+       move    $11,$13
+       bne     $9,$0,.Loop0
+        daddiu $4,$4,8
+
+.L0:   beq     $7,$0,.Lend
+        nop
+
+.Loop: daddiu  $7,$7,-4
+
+       ld      $12,8($5)
+       daddu   $11,$11,$10
+       ld      $13,8($6)
+       sltu    $8,$11,$10
+       daddu   $11,$11,$2
+       sltu    $2,$11,$2
+       sd      $11,0($4)
+       or      $2,$2,$8
+
+       ld      $10,16($5)
+       daddu   $13,$13,$12
+       ld      $11,16($6)
+       sltu    $8,$13,$12
+       daddu   $13,$13,$2
+       sltu    $2,$13,$2
+       sd      $13,8($4)
+       or      $2,$2,$8
+
+       ld      $12,24($5)
+       daddu   $11,$11,$10
+       ld      $13,24($6)
+       sltu    $8,$11,$10
+       daddu   $11,$11,$2
+       sltu    $2,$11,$2
+       sd      $11,16($4)
+       or      $2,$2,$8
+
+       ld      $10,32($5)
+       daddu   $13,$13,$12
+       ld      $11,32($6)
+       sltu    $8,$13,$12
+       daddu   $13,$13,$2
+       sltu    $2,$13,$2
+       sd      $13,24($4)
+       or      $2,$2,$8
+
+       daddiu  $5,$5,32
+       daddiu  $6,$6,32
+
+       bne     $7,$0,.Loop
+        daddiu $4,$4,32
+
+.Lend: daddu   $11,$11,$2
+       sltu    $8,$11,$2
+       daddu   $11,$10,$11
+       sltu    $2,$11,$10
+       sd      $11,0($4)
+       j       $31
+       or      $2,$2,$8
+EPILOGUE(mpn_add_n)
diff --git a/mpn/mips64/addmul_1.asm b/mpn/mips64/addmul_1.asm

new file mode 100644 (file)

index 0000000..a116298
--- /dev/null
+++ b/mpn/mips64/addmul_1.asm
@@ -0,0 +1,91 @@
+dnl  MIPS64 mpn_addmul_1 -- Multiply a limb vector with a single limb and add
+dnl  the product to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C size         $6
+C s2_limb      $7
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+
+C feed-in phase 0
+       ld      $8,0($5)
+
+C feed-in phase 1
+       daddiu  $5,$5,8
+       dmultu  $8,$7
+
+       daddiu  $6,$6,-1
+       beq     $6,$0,$LC0
+        move   $2,$0           C zero cy2
+
+       daddiu  $6,$6,-1
+       beq     $6,$0,$LC1
+       ld      $8,0($5)        C load new s1 limb as early as possible
+
+Loop:  ld      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       daddiu  $5,$5,8
+       daddu   $3,$3,$2        C add old carry limb to low product limb
+       dmultu  $8,$7
+       ld      $8,0($5)        C load new s1 limb as early as possible
+       daddiu  $6,$6,-1        C decrement loop counter
+       sltu    $2,$3,$2        C carry from previous addition -> $2
+       daddu   $3,$10,$3
+       sltu    $10,$3,$10
+       daddu   $2,$2,$10
+       sd      $3,0($4)
+       daddiu  $4,$4,8
+       bne     $6,$0,Loop
+        daddu  $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 1
+$LC1:  ld      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       daddu   $3,$3,$2
+       sltu    $2,$3,$2
+       dmultu  $8,$7
+       daddu   $3,$10,$3
+       sltu    $10,$3,$10
+       daddu   $2,$2,$10
+       sd      $3,0($4)
+       daddiu  $4,$4,8
+       daddu   $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 0
+$LC0:  ld      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       daddu   $3,$3,$2
+       sltu    $2,$3,$2
+       daddu   $3,$10,$3
+       sltu    $10,$3,$10
+       daddu   $2,$2,$10
+       sd      $3,0($4)
+       j       $31
+       daddu   $2,$9,$2        C add high product limb and carry from addition
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/mips64/gmp-mparam.h b/mpn/mips64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..2bf9424
--- /dev/null
+++ b/mpn/mips64/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+/* Generated by tuneup.c, 2004-02-10, gcc 3.2 & MIPSpro C 7.2.1 (R1x000) */
+
+#define MUL_TOOM22_THRESHOLD             16
+#define MUL_TOOM33_THRESHOLD             89
+
+#define SQR_BASECASE_THRESHOLD            6
+#define SQR_TOOM2_THRESHOLD              32
+#define SQR_TOOM3_THRESHOLD              98
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 53
+#define POWM_THRESHOLD                   61
+
+#define HGCD_THRESHOLD                  116
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                492
+#define JACOBI_BASE_METHOD                2
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             21
+#define GET_STR_PRECOMPUTE_THRESHOLD     26
+#define SET_STR_THRESHOLD              3962
+
+#define MUL_FFT_TABLE  { 368, 736, 1600, 3328, 7168, 20480, 49152, 0 }
+#define MUL_FFT_MODF_THRESHOLD          264
+#define MUL_FFT_THRESHOLD              1920
+
+#define SQR_FFT_TABLE  { 368, 736, 1856, 3328, 7168, 20480, 49152, 0 }
+#define SQR_FFT_MODF_THRESHOLD          280
+#define SQR_FFT_THRESHOLD              1920
diff --git a/mpn/mips64/lshift.asm b/mpn/mips64/lshift.asm

new file mode 100644 (file)

index 0000000..16da93c
--- /dev/null
+++ b/mpn/mips64/lshift.asm
@@ -0,0 +1,88 @@
+dnl  MIPS64 mpn_lshift -- Left shift.
+
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C src_ptr      $5
+C size         $6
+C cnt          $7
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       dsll    $2,$6,3
+       daddu   $5,$5,$2        C make r5 point at end of src
+       ld      $10,-8($5)      C load first limb
+       dsubu   $13,$0,$7
+       daddu   $4,$4,$2        C make r4 point at end of res
+       daddiu  $6,$6,-1
+       and     $9,$6,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        dsrl   $2,$10,$13      C compute function result
+
+       dsubu   $6,$6,$9
+
+.Loop0:        ld      $3,-16($5)
+       daddiu  $4,$4,-8
+       daddiu  $5,$5,-8
+       daddiu  $9,$9,-1
+       dsll    $11,$10,$7
+       dsrl    $12,$3,$13
+       move    $10,$3
+       or      $8,$11,$12
+       bne     $9,$0,.Loop0
+        sd     $8,0($4)
+
+.L0:   beq     $6,$0,.Lend
+        nop
+
+.Loop: ld      $3,-16($5)
+       daddiu  $4,$4,-32
+       daddiu  $6,$6,-4
+       dsll    $11,$10,$7
+       dsrl    $12,$3,$13
+
+       ld      $10,-24($5)
+       dsll    $14,$3,$7
+       or      $8,$11,$12
+       sd      $8,24($4)
+       dsrl    $9,$10,$13
+
+       ld      $3,-32($5)
+       dsll    $11,$10,$7
+       or      $8,$14,$9
+       sd      $8,16($4)
+       dsrl    $12,$3,$13
+
+       ld      $10,-40($5)
+       dsll    $14,$3,$7
+       or      $8,$11,$12
+       sd      $8,8($4)
+       dsrl    $9,$10,$13
+
+       daddiu  $5,$5,-32
+       or      $8,$14,$9
+       bgtz    $6,.Loop
+        sd     $8,0($4)
+
+.Lend: dsll    $8,$10,$7
+       j       $31
+       sd      $8,-8($4)
+EPILOGUE(mpn_lshift)
diff --git a/mpn/mips64/mul_1.asm b/mpn/mips64/mul_1.asm

new file mode 100644 (file)

index 0000000..d16e08d
--- /dev/null
+++ b/mpn/mips64/mul_1.asm
@@ -0,0 +1,82 @@
+dnl  MIPS64 mpn_mul_1 -- Multiply a limb vector with a single limb and store
+dnl  the product in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C size         $6
+C s2_limb      $7
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+
+C feed-in phase 0
+       ld      $8,0($5)
+
+C feed-in phase 1
+       daddiu  $5,$5,8
+       dmultu  $8,$7
+
+       daddiu  $6,$6,-1
+       beq     $6,$0,$LC0
+        move   $2,$0           C zero cy2
+
+       daddiu  $6,$6,-1
+       beq     $6,$0,$LC1
+       ld      $8,0($5)        C load new s1 limb as early as possible
+
+Loop:  nop
+       mflo    $10
+       mfhi    $9
+       daddiu  $5,$5,8
+       daddu   $10,$10,$2      C add old carry limb to low product limb
+       dmultu  $8,$7
+       ld      $8,0($5)        C load new s1 limb as early as possible
+       daddiu  $6,$6,-1        C decrement loop counter
+       sltu    $2,$10,$2       C carry from previous addition -> $2
+       nop
+       nop
+       sd      $10,0($4)
+       daddiu  $4,$4,8
+       bne     $6,$0,Loop
+        daddu  $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 1
+$LC1:  mflo    $10
+       mfhi    $9
+       daddu   $10,$10,$2
+       sltu    $2,$10,$2
+       dmultu  $8,$7
+       sd      $10,0($4)
+       daddiu  $4,$4,8
+       daddu   $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 0
+$LC0:  mflo    $10
+       mfhi    $9
+       daddu   $10,$10,$2
+       sltu    $2,$10,$2
+       sd      $10,0($4)
+       j       $31
+       daddu   $2,$9,$2        C add high product limb and carry from addition
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/mips64/rshift.asm b/mpn/mips64/rshift.asm

new file mode 100644 (file)

index 0000000..5294875
--- /dev/null
+++ b/mpn/mips64/rshift.asm
@@ -0,0 +1,85 @@
+dnl  MIPS64 mpn_rshift -- Right shift.
+
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C src_ptr      $5
+C size         $6
+C cnt          $7
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       ld      $10,0($5)       C load first limb
+       dsubu   $13,$0,$7
+       daddiu  $6,$6,-1
+       and     $9,$6,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        dsll   $2,$10,$13      C compute function result
+
+       dsubu   $6,$6,$9
+
+.Loop0:        ld      $3,8($5)
+       daddiu  $4,$4,8
+       daddiu  $5,$5,8
+       daddiu  $9,$9,-1
+       dsrl    $11,$10,$7
+       dsll    $12,$3,$13
+       move    $10,$3
+       or      $8,$11,$12
+       bne     $9,$0,.Loop0
+        sd     $8,-8($4)
+
+.L0:   beq     $6,$0,.Lend
+        nop
+
+.Loop: ld      $3,8($5)
+       daddiu  $4,$4,32
+       daddiu  $6,$6,-4
+       dsrl    $11,$10,$7
+       dsll    $12,$3,$13
+
+       ld      $10,16($5)
+       dsrl    $14,$3,$7
+       or      $8,$11,$12
+       sd      $8,-32($4)
+       dsll    $9,$10,$13
+
+       ld      $3,24($5)
+       dsrl    $11,$10,$7
+       or      $8,$14,$9
+       sd      $8,-24($4)
+       dsll    $12,$3,$13
+
+       ld      $10,32($5)
+       dsrl    $14,$3,$7
+       or      $8,$11,$12
+       sd      $8,-16($4)
+       dsll    $9,$10,$13
+
+       daddiu  $5,$5,32
+       or      $8,$14,$9
+       bgtz    $6,.Loop
+        sd     $8,-8($4)
+
+.Lend: dsrl    $8,$10,$7
+       j       $31
+       sd      $8,0($4)
+EPILOGUE(mpn_rshift)
diff --git a/mpn/mips64/sqr_diagonal.asm b/mpn/mips64/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..511a755
--- /dev/null
+++ b/mpn/mips64/sqr_diagonal.asm
@@ -0,0 +1,66 @@
+dnl  MIPS64 mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  rp                $4
+dnl  up                $5
+dnl  n         $6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       ld      r8,0(r5)
+       daddiu  r6,r6,-2
+       dmultu  r8,r8
+       bltz    r6,$Lend1
+       nop
+       ld      r8,8(r5)
+       beq     r6,r0,$Lend2
+       nop
+
+$Loop: mflo    r10
+       mfhi    r9
+       daddiu  r6,r6,-1
+       sd      r10,0(r4)
+       sd      r9,8(r4)
+       dmultu  r8,r8
+       ld      r8,16(r5)
+       daddiu  r5,r5,8
+       bne     r6,r0,$Loop
+       daddiu  r4,r4,16
+
+$Lend2: mflo   r10
+       mfhi    r9
+       sd      r10,0(r4)
+       sd      r9,8(r4)
+       dmultu  r8,r8
+       mflo    r10
+       mfhi    r9
+       sd      r10,16(r4)
+       j       r31
+       sd      r9,24(r4)
+
+$Lend1: mflo   r10
+       mfhi    r9
+       sd      r10,0(r4)
+       j       r31
+       sd      r9,8(r4)
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/mips64/sub_n.asm b/mpn/mips64/sub_n.asm

new file mode 100644 (file)

index 0000000..b28c1ce
--- /dev/null
+++ b/mpn/mips64/sub_n.asm
@@ -0,0 +1,112 @@
+dnl  MIPS64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C s2_ptr       $6
+C size         $7
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       ld      $10,0($5)
+       ld      $11,0($6)
+
+       daddiu  $7,$7,-1
+       and     $9,$7,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        move   $2,$0
+
+       dsubu   $7,$7,$9
+
+.Loop0:        daddiu  $9,$9,-1
+       ld      $12,8($5)
+       daddu   $11,$11,$2
+       ld      $13,8($6)
+       sltu    $8,$11,$2
+       dsubu   $11,$10,$11
+       sltu    $2,$10,$11
+       sd      $11,0($4)
+       or      $2,$2,$8
+
+       daddiu  $5,$5,8
+       daddiu  $6,$6,8
+       move    $10,$12
+       move    $11,$13
+       bne     $9,$0,.Loop0
+        daddiu $4,$4,8
+
+.L0:   beq     $7,$0,.Lend
+        nop
+
+.Loop: daddiu  $7,$7,-4
+
+       ld      $12,8($5)
+       dsubu   $11,$10,$11
+       ld      $13,8($6)
+       sltu    $8,$10,$11
+       dsubu   $14,$11,$2
+       sltu    $2,$11,$14
+       sd      $14,0($4)
+       or      $2,$2,$8
+
+       ld      $10,16($5)
+       dsubu   $13,$12,$13
+       ld      $11,16($6)
+       sltu    $8,$12,$13
+       dsubu   $14,$13,$2
+       sltu    $2,$13,$14
+       sd      $14,8($4)
+       or      $2,$2,$8
+
+       ld      $12,24($5)
+       dsubu   $11,$10,$11
+       ld      $13,24($6)
+       sltu    $8,$10,$11
+       dsubu   $14,$11,$2
+       sltu    $2,$11,$14
+       sd      $14,16($4)
+       or      $2,$2,$8
+
+       ld      $10,32($5)
+       dsubu   $13,$12,$13
+       ld      $11,32($6)
+       sltu    $8,$12,$13
+       dsubu   $14,$13,$2
+       sltu    $2,$13,$14
+       sd      $14,24($4)
+       or      $2,$2,$8
+
+       daddiu  $5,$5,32
+       daddiu  $6,$6,32
+
+       bne     $7,$0,.Loop
+        daddiu $4,$4,32
+
+.Lend: daddu   $11,$11,$2
+       sltu    $8,$11,$2
+       dsubu   $11,$10,$11
+       sltu    $2,$10,$11
+       sd      $11,0($4)
+       j       $31
+       or      $2,$2,$8
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/mips64/submul_1.asm b/mpn/mips64/submul_1.asm

new file mode 100644 (file)

index 0000000..11e1737
--- /dev/null
+++ b/mpn/mips64/submul_1.asm
@@ -0,0 +1,91 @@
+dnl  MIPS64 mpn_submul_1 -- Multiply a limb vector with a single limb and
+dnl  subtract the product from a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      $4
+C s1_ptr       $5
+C size         $6
+C s2_limb      $7
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+
+C feed-in phase 0
+       ld      $8,0($5)
+
+C feed-in phase 1
+       daddiu  $5,$5,8
+       dmultu  $8,$7
+
+       daddiu  $6,$6,-1
+       beq     $6,$0,$LC0
+        move   $2,$0           C zero cy2
+
+       daddiu  $6,$6,-1
+       beq     $6,$0,$LC1
+       ld      $8,0($5)        C load new s1 limb as early as possible
+
+Loop:  ld      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       daddiu  $5,$5,8
+       daddu   $3,$3,$2        C add old carry limb to low product limb
+       dmultu  $8,$7
+       ld      $8,0($5)        C load new s1 limb as early as possible
+       daddiu  $6,$6,-1        C decrement loop counter
+       sltu    $2,$3,$2        C carry from previous addition -> $2
+       dsubu   $3,$10,$3
+       sgtu    $10,$3,$10
+       daddu   $2,$2,$10
+       sd      $3,0($4)
+       daddiu  $4,$4,8
+       bne     $6,$0,Loop
+        daddu  $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 1
+$LC1:  ld      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       daddu   $3,$3,$2
+       sltu    $2,$3,$2
+       dmultu  $8,$7
+       dsubu   $3,$10,$3
+       sgtu    $10,$3,$10
+       daddu   $2,$2,$10
+       sd      $3,0($4)
+       daddiu  $4,$4,8
+       daddu   $2,$9,$2        C add high product limb and carry from addition
+
+C wind-down phase 0
+$LC0:  ld      $10,0($4)
+       mflo    $3
+       mfhi    $9
+       daddu   $3,$3,$2
+       sltu    $2,$3,$2
+       dsubu   $3,$10,$3
+       sgtu    $10,$3,$10
+       daddu   $2,$2,$10
+       sd      $3,0($4)
+       j       $31
+       daddu   $2,$9,$2        C add high product limb and carry from addition
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/mips64/umul.asm b/mpn/mips64/umul.asm

new file mode 100644 (file)

index 0000000..1792d97
--- /dev/null
+++ b/mpn/mips64/umul.asm
@@ -0,0 +1,34 @@
+dnl  MIPS64 umul_ppmm -- longlong.h support.
+
+dnl  Copyright 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C plp   $4
+C u     $5
+C v     $6
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+       dmultu  $5,$6
+       mflo    $3
+       mfhi    $2
+       j       $31
+       sd      $3,0($4)
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/ns32k/add_n.s b/mpn/ns32k/add_n.s

new file mode 100644 (file)

index 0000000..962cc16
--- /dev/null
+++ b/mpn/ns32k/add_n.s
@@ -0,0 +1,44 @@
+# ns32000 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+       .align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+       save    [r3,r4,r5]
+       negd    28(sp),r3
+       movd    r3,r0
+       lshd    2,r0
+       movd    24(sp),r4
+       subd    r0,r4                   # r4 -> to end of S2
+       movd    20(sp),r5
+       subd    r0,r5                   # r5 -> to end of S1
+       movd    16(sp),r2
+       subd    r0,r2                   # r2 -> to end of RES
+       subd    r0,r0                   # cy = 0
+
+Loop:  movd    r5[r3:d],r0
+       addcd   r4[r3:d],r0
+       movd    r0,r2[r3:d]
+       acbd    1,r3,Loop
+
+       scsd    r0                      # r0 = cy.
+       restore [r5,r4,r3]
+       ret     0
diff --git a/mpn/ns32k/addmul_1.s b/mpn/ns32k/addmul_1.s

new file mode 100644 (file)

index 0000000..1dd8791
--- /dev/null
+++ b/mpn/ns32k/addmul_1.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+       .align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+       save    [r3,r4,r5,r6,r7]
+       negd    24(sp),r4
+       movd    r4,r0
+       lshd    2,r0
+       movd    20(sp),r5
+       subd    r0,r5                   # r5 -> to end of S1
+       movd    16(sp),r6
+       subd    r0,r6                   # r6 -> to end of RES
+       subd    r0,r0                   # r0 = 0, cy = 0
+       movd    28(sp),r7               # r7 = s2_limb
+
+Loop:  movd    r5[r4:d],r2
+       meid    r7,r2                   # r2 = low_prod, r3 = high_prod
+       addcd   r0,r2                   # r2 = low_prod + cy_limb
+       movd    r3,r0                   # r0 = new cy_limb
+       addcd   0,r0
+       addd    r2,r6[r4:d]
+       acbd    1,r4,Loop
+
+       addcd   0,r0
+       restore [r7,r6,r5,r4,r3]
+       ret     0
diff --git a/mpn/ns32k/mul_1.s b/mpn/ns32k/mul_1.s

new file mode 100644 (file)

index 0000000..abc911e
--- /dev/null
+++ b/mpn/ns32k/mul_1.s
@@ -0,0 +1,45 @@
+# ns32000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+       .align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+       save    [r3,r4,r5,r6,r7]
+       negd    24(sp),r4
+       movd    r4,r0
+       lshd    2,r0
+       movd    20(sp),r5
+       subd    r0,r5                   # r5 -> to end of S1
+       movd    16(sp),r6
+       subd    r0,r6                   # r6 -> to end of RES
+       subd    r0,r0                   # r0 = 0, cy = 0
+       movd    28(sp),r7               # r7 = s2_limb
+
+Loop:  movd    r5[r4:d],r2
+       meid    r7,r2                   # r2 = low_prod, r3 = high_prod
+       addcd   r0,r2                   # r2 = low_prod + cy_limb
+       movd    r3,r0                   # r0 = new cy_limb
+       movd    r2,r6[r4:d]
+       acbd    1,r4,Loop
+
+       addcd   0,r0
+       restore [r7,r6,r5,r4,r3]
+       ret     0
diff --git a/mpn/ns32k/sub_n.s b/mpn/ns32k/sub_n.s

new file mode 100644 (file)

index 0000000..5252ddf
--- /dev/null
+++ b/mpn/ns32k/sub_n.s
@@ -0,0 +1,44 @@
+# ns32000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+       .align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+       save    [r3,r4,r5]
+       negd    28(sp),r3
+       movd    r3,r0
+       lshd    2,r0
+       movd    24(sp),r4
+       subd    r0,r4                   # r4 -> to end of S2
+       movd    20(sp),r5
+       subd    r0,r5                   # r5 -> to end of S1
+       movd    16(sp),r2
+       subd    r0,r2                   # r2 -> to end of RES
+       subd    r0,r0                   # cy = 0
+
+Loop:  movd    r5[r3:d],r0
+       subcd   r4[r3:d],r0
+       movd    r0,r2[r3:d]
+       acbd    1,r3,Loop
+
+       scsd    r0                      # r0 = cy.
+       restore [r5,r4,r3]
+       ret     0
diff --git a/mpn/ns32k/submul_1.s b/mpn/ns32k/submul_1.s

new file mode 100644 (file)

index 0000000..7a0ba9a
--- /dev/null
+++ b/mpn/ns32k/submul_1.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+       .align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+       save    [r3,r4,r5,r6,r7]
+       negd    24(sp),r4
+       movd    r4,r0
+       lshd    2,r0
+       movd    20(sp),r5
+       subd    r0,r5                   # r5 -> to end of S1
+       movd    16(sp),r6
+       subd    r0,r6                   # r6 -> to end of RES
+       subd    r0,r0                   # r0 = 0, cy = 0
+       movd    28(sp),r7               # r7 = s2_limb
+
+Loop:  movd    r5[r4:d],r2
+       meid    r7,r2                   # r2 = low_prod, r3 = high_prod
+       addcd   r0,r2                   # r2 = low_prod + cy_limb
+       movd    r3,r0                   # r0 = new cy_limb
+       addcd   0,r0
+       subd    r2,r6[r4:d]
+       acbd    1,r4,Loop
+
+       addcd   0,r0
+       restore [r7,r6,r5,r4,r3]
+       ret     0
diff --git a/mpn/pa32/README b/mpn/pa32/README

new file mode 100644 (file)

index 0000000..72158d3
--- /dev/null
+++ b/mpn/pa32/README
@@ -0,0 +1,151 @@
+Copyright 1996, 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+
+This directory contains mpn functions for various HP PA-RISC chips.  Code
+that runs faster on the PA7100 and later implementations, is in the pa7100
+directory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+  Load and Store timing
+
+On the PA7000 no memory instructions can issue the two cycles after a store.
+For the PA7100, this is reduced to one cycle.
+
+The PA7100 has a lookup-free cache, so it helps to schedule loads and the
+dependent instruction really far from each other.
+
+STATUS
+
+1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
+   instructions below (but some sw pipelining is needed to avoid the
+   xmpyu-fstds delay):
+
+       fldds   s1_ptr
+
+       xmpyu
+       fstds   N(%r30)
+       xmpyu
+       fstds   N(%r30)
+
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+
+       addc
+       stws    res_ptr
+       addc
+       stws    res_ptr
+
+       addib   Loop
+
+2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
+   (asymptotically) on the PA7100, using the instructions below.  With proper
+   sw pipelining and the unrolling level below, the speed becomes 8
+   cycles/limb.
+
+       fldds   s1_ptr
+       fldds   s1_ptr
+
+       xmpyu
+       fstds   N(%r30)
+       xmpyu
+       fstds   N(%r30)
+       xmpyu
+       fstds   N(%r30)
+       xmpyu
+       fstds   N(%r30)
+
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       ldws    N(%r30)
+       addc
+       addc
+       addc
+       addc
+       addc    %r0,%r0,cy-limb
+
+       ldws    res_ptr
+       ldws    res_ptr
+       ldws    res_ptr
+       ldws    res_ptr
+       add
+       stws    res_ptr
+       addc
+       stws    res_ptr
+       addc
+       stws    res_ptr
+       addc
+       stws    res_ptr
+
+       addib
+
+3. For the PA8000 we have to stick to using 32-bit limbs before compiler
+   support emerges.  But we want to use 64-bit operations whenever possible,
+   in particular for loads and stores.  It is possible to handle mpn_add_n
+   efficiently by rotating (when s1/s2 are aligned), masking+bit field
+   inserting when (they are not).  The speed should double compared to the
+   code used today.
+
+
+
+
+LABEL SYNTAX
+
+The HP-UX assembler takes labels starting in column 0 with no colon,
+
+       L$loop  ldws,mb -4(0,%r25),%r22
+
+Gas on hppa GNU/Linux however requires a colon,
+
+       L$loop: ldws,mb -4(0,%r25),%r22
+
+This is covered by using LDEF() from asm-defs.m4.  An alternative would be
+to use ".label" which is accepted by both,
+
+               .label  L$loop
+               ldws,mb -4(0,%r25),%r22
+
+but that's not as nice to look at, not if you're used to assembler code
+having labels in column 0.
+
+
+
+
+REFERENCES
+
+Hewlett Packard, "HP Assembler Reference Manual", 9th edition, June 1998,
+part number 92432-90012.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/pa32/add_n.asm b/mpn/pa32/add_n.asm

new file mode 100644 (file)

index 0000000..1bb27ae
--- /dev/null
+++ b/mpn/pa32/add_n.asm
@@ -0,0 +1,52 @@
+dnl  HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      gr26
+C s1_ptr       gr25
+C s2_ptr       gr24
+C size         gr23
+
+C One might want to unroll this as for other processors, but it turns out that
+C the data cache contention after a store makes such unrolling useless.  We
+C can't come under 5 cycles/limb anyway.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+
+       addib,=         -1,%r23,L(end)  C check for (SIZE == 1)
+        add            %r20,%r19,%r28  C add first limbs ignoring cy
+
+LDEF(loop)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addib,<>        -1,%r23,L(loop)
+        addc           %r20,%r19,%r28
+
+LDEF(end)
+       stws            %r28,0(0,%r26)
+       bv              0(%r2)
+        addc           %r0,%r0,%r28
+EPILOGUE()
diff --git a/mpn/pa32/gmp-mparam.h b/mpn/pa32/gmp-mparam.h

new file mode 100644 (file)

index 0000000..fd1eb97
--- /dev/null
+++ b/mpn/pa32/gmp-mparam.h
@@ -0,0 +1,51 @@
+/* HP-PA 1.0 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* These values are for the PA7100 using GCC.  */
+/* Generated by tuneup.c, 2000-10-27. */
+
+#ifndef MUL_TOOM22_THRESHOLD
+#define MUL_TOOM22_THRESHOLD      30
+#endif
+#ifndef MUL_TOOM33_THRESHOLD
+#define MUL_TOOM33_THRESHOLD     141
+#endif
+
+#ifndef SQR_TOOM2_THRESHOLD
+#define SQR_TOOM2_THRESHOLD       59
+#endif
+#ifndef SQR_TOOM3_THRESHOLD
+#define SQR_TOOM3_THRESHOLD      177
+#endif
+
+#ifndef DIV_DC_THRESHOLD
+#define DIV_DC_THRESHOLD         108
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD            18
+#endif
+
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD          33
+#endif
diff --git a/mpn/pa32/hppa1_1/addmul_1.asm b/mpn/pa32/hppa1_1/addmul_1.asm

new file mode 100644 (file)

index 0000000..c50e4e1
--- /dev/null
+++ b/mpn/pa32/hppa1_1/addmul_1.asm
@@ -0,0 +1,96 @@
+dnl  HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s1_ptr       r25
+C size         r24
+C s2_limb      r23
+
+C This runs at 11 cycles/limb on a PA7000.  With the used instructions, it can
+C not become faster due to data cache contention after a store.  On the PA7100
+C it runs at 10 cycles/limb.
+
+C There are some ideas described in mul_1.asm that applies to this code too.
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+C      .callinfo       frame=64,no_calls
+
+       ldo             64(%r30),%r30
+       fldws,ma        4(%r25),%fr5
+       stw             %r23,-16(%r30)          C move s2_limb ...
+       addib,=         -1,%r24,L(just_one_limb)
+        fldws          -16(%r30),%fr4          C ... into fr4
+       add             %r0,%r0,%r0             C clear carry
+       xmpyu           %fr4,%fr5,%fr6
+       fldws,ma        4(%r25),%fr7
+       fstds           %fr6,-16(%r30)
+       xmpyu           %fr4,%fr7,%fr8
+       ldw             -12(%r30),%r19          C least significant limb in product
+       ldw             -16(%r30),%r28
+
+       fstds           %fr8,-16(%r30)
+       addib,=         -1,%r24,L(end)
+        ldw            -12(%r30),%r1
+
+C Main loop
+LDEF(loop)
+       ldws            0(%r26),%r29
+       fldws,ma        4(%r25),%fr5
+       add             %r29,%r19,%r19
+       stws,ma         %r19,4(%r26)
+       addc            %r28,%r1,%r19
+       xmpyu           %fr4,%fr5,%fr6
+       ldw             -16(%r30),%r28
+       fstds           %fr6,-16(%r30)
+       addc            %r0,%r28,%r28
+       addib,<>        -1,%r24,L(loop)
+        ldw            -12(%r30),%r1
+
+LDEF(end)
+       ldw             0(%r26),%r29
+       add             %r29,%r19,%r19
+       stws,ma         %r19,4(%r26)
+       addc            %r28,%r1,%r19
+       ldw             -16(%r30),%r28
+       ldws            0(%r26),%r29
+       addc            %r0,%r28,%r28
+       add             %r29,%r19,%r19
+       stws,ma         %r19,4(%r26)
+       addc            %r0,%r28,%r28
+       bv              0(%r2)
+        ldo            -64(%r30),%r30
+
+LDEF(just_one_limb)
+       xmpyu           %fr4,%fr5,%fr6
+       ldw             0(%r26),%r29
+       fstds           %fr6,-16(%r30)
+       ldw             -12(%r30),%r1
+       ldw             -16(%r30),%r28
+       add             %r29,%r1,%r19
+       stw             %r19,0(%r26)
+       addc            %r0,%r28,%r28
+       bv              0(%r2)
+        ldo            -64(%r30),%r30
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/gmp-mparam.h b/mpn/pa32/hppa1_1/gmp-mparam.h

new file mode 100644 (file)

index 0000000..32b3ae9
--- /dev/null
+++ b/mpn/pa32/hppa1_1/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2004-02-07, gcc 2.8 (pa7100/100MHz) */
+
+#define MUL_TOOM22_THRESHOLD             30
+#define MUL_TOOM33_THRESHOLD             89
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_TOOM2_THRESHOLD              55
+#define SQR_TOOM3_THRESHOLD             101
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 84
+#define POWM_THRESHOLD                  166
+
+#define HGCD_THRESHOLD                  231
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                823
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           5
+#define DIVREM_1_UNNORM_THRESHOLD        11
+#define MOD_1_NORM_THRESHOLD              5
+#define MOD_1_UNNORM_THRESHOLD           10
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             13
+#define GET_STR_PRECOMPUTE_THRESHOLD     23
+#define SET_STR_THRESHOLD              6589
+
+#define MUL_FFT_TABLE  { 464, 928, 1920, 4608, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          480
+#define MUL_FFT_THRESHOLD              3328
+
+#define SQR_FFT_TABLE  { 528, 1184, 2176, 5632, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          520
+#define SQR_FFT_THRESHOLD              3328
diff --git a/mpn/pa32/hppa1_1/mul_1.asm b/mpn/pa32/hppa1_1/mul_1.asm

new file mode 100644 (file)

index 0000000..9e17c2d
--- /dev/null
+++ b/mpn/pa32/hppa1_1/mul_1.asm
@@ -0,0 +1,92 @@
+dnl  HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s1_ptr       r25
+C size         r24
+C s2_limb      r23
+
+C This runs at 9 cycles/limb on a PA7000.  With the used instructions, it can
+C not become faster due to data cache contention after a store.  On the PA7100
+C it runs at 7 cycles/limb.
+
+C We could use fldds to read two limbs at a time from the S1 array, and that
+C could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+C PA7100, respectively.  We don't do that since it does not seem worth the
+C (alignment) troubles...
+
+C At least the PA7100 is rumored to be able to deal with cache-misses without
+C stalling instruction issue.  If this is true, and the cache is actually also
+C lockup-free, we should use a deeper software pipeline, and load from S1 very
+C early!  (The loads and stores to -12(sp) will surely be in the cache.)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+C      .callinfo       frame=64,no_calls
+
+       ldo             64(%r30),%r30
+       fldws,ma        4(%r25),%fr5
+       stw             %r23,-16(%r30)          C move s2_limb ...
+       addib,=         -1,%r24,L(just_one_limb)
+        fldws          -16(%r30),%fr4          C ... into fr4
+       add             %r0,%r0,%r0             C clear carry
+       xmpyu           %fr4,%fr5,%fr6
+       fldws,ma        4(%r25),%fr7
+       fstds           %fr6,-16(%r30)
+       xmpyu           %fr4,%fr7,%fr8
+       ldw             -12(%r30),%r19          C least significant limb in product
+       ldw             -16(%r30),%r28
+
+       fstds           %fr8,-16(%r30)
+       addib,=         -1,%r24,L(end)
+        ldw            -12(%r30),%r1
+
+C Main loop
+LDEF(loop)
+       fldws,ma        4(%r25),%fr5
+       stws,ma         %r19,4(%r26)
+       addc            %r28,%r1,%r19
+       xmpyu           %fr4,%fr5,%fr6
+       ldw             -16(%r30),%r28
+       fstds           %fr6,-16(%r30)
+       addib,<>        -1,%r24,L(loop)
+        ldw            -12(%r30),%r1
+
+LDEF(end)
+       stws,ma         %r19,4(%r26)
+       addc            %r28,%r1,%r19
+       ldw             -16(%r30),%r28
+       stws,ma         %r19,4(%r26)
+       addc            %r0,%r28,%r28
+       bv              0(%r2)
+        ldo            -64(%r30),%r30
+
+LDEF(just_one_limb)
+       xmpyu           %fr4,%fr5,%fr6
+       fstds           %fr6,-16(%r30)
+       ldw             -16(%r30),%r28
+       ldo             -64(%r30),%r30
+       bv              0(%r2)
+        fstws          %fr6R,0(%r26)
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/add_n.asm b/mpn/pa32/hppa1_1/pa7100/add_n.asm

new file mode 100644 (file)

index 0000000..326a133
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/add_n.asm
@@ -0,0 +1,73 @@
+dnl  HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.  Optimized for the PA7100, where is runs at
+dnl  4.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s1_ptr       r25
+C s2_ptr       r24
+C size         r23
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+
+       addib,<=        -5,%r23,L(rest)
+        add            %r20,%r19,%r28  C add first limbs ignoring cy
+
+LDEF(loop)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addc            %r20,%r19,%r28
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addc            %r20,%r19,%r28
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addc            %r20,%r19,%r28
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addib,>         -4,%r23,L(loop)
+       addc            %r20,%r19,%r28
+
+LDEF(rest)
+       addib,=         4,%r23,L(end)
+       nop
+
+LDEF(eloop)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addib,>         -1,%r23,L(eloop)
+       addc            %r20,%r19,%r28
+
+LDEF(end)
+       stws            %r28,0(0,%r26)
+       bv              0(%r2)
+        addc           %r0,%r0,%r28
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/addmul_1.asm b/mpn/pa32/hppa1_1/pa7100/addmul_1.asm

new file mode 100644 (file)

index 0000000..57f4d76
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
@@ -0,0 +1,190 @@
+dnl  HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl  add the result to a second limb vector.
+
+dnl  Copyright 1995, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`res_ptr',`%r26')
+define(`s1_ptr',`%r25')
+define(`size_param',`%r24')
+define(`s2_limb',`%r23')
+
+define(`cylimb',`%r28')
+define(`s0',`%r19')
+define(`s1',`%r20')
+define(`s2',`%r3')
+define(`s3',`%r4')
+define(`lo0',`%r21')
+define(`lo1',`%r5')
+define(`lo2',`%r6')
+define(`lo3',`%r7')
+define(`hi0',`%r22')
+define(`hi1',`%r23')                           C safe to reuse
+define(`hi2',`%r29')
+define(`hi3',`%r1')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+C      .callinfo       frame=128,no_calls
+
+       ldo     128(%r30),%r30
+       stws    s2_limb,-16(%r30)
+       add      %r0,%r0,cylimb                 C clear cy and cylimb
+       addib,< -4,size_param,L(few_limbs)
+       fldws   -16(%r30),%fr31R
+
+       ldo     -112(%r30),%r31
+       stw     %r3,-96(%r30)
+       stw     %r4,-92(%r30)
+       stw     %r5,-88(%r30)
+       stw     %r6,-84(%r30)
+       stw     %r7,-80(%r30)
+
+       bb,>=,n  s1_ptr,29,L(0)
+
+       fldws,ma 4(s1_ptr),%fr4
+       ldws     0(res_ptr),s0
+       xmpyu    %fr4,%fr31R,%fr5
+       fstds    %fr5,-16(%r31)
+       ldws    -16(%r31),cylimb
+       ldws    -12(%r31),lo0
+       add      s0,lo0,s0
+       addib,< -1,size_param,L(few_limbs)
+       stws,ma  s0,4(res_ptr)
+
+C start software pipeline ----------------------------------------------------
+LDEF(0)
+       fldds,ma 8(s1_ptr),%fr4
+       fldds,ma 8(s1_ptr),%fr8
+
+       xmpyu    %fr4L,%fr31R,%fr5
+       xmpyu    %fr4R,%fr31R,%fr6
+       xmpyu    %fr8L,%fr31R,%fr9
+       xmpyu    %fr8R,%fr31R,%fr10
+
+       fstds    %fr5,-16(%r31)
+       fstds    %fr6,-8(%r31)
+       fstds    %fr9,0(%r31)
+       fstds    %fr10,8(%r31)
+
+       ldws   -16(%r31),hi0
+       ldws   -12(%r31),lo0
+       ldws    -8(%r31),hi1
+       ldws    -4(%r31),lo1
+       ldws     0(%r31),hi2
+       ldws     4(%r31),lo2
+       ldws     8(%r31),hi3
+       ldws    12(%r31),lo3
+
+       addc     lo0,cylimb,lo0
+       addc     lo1,hi0,lo1
+       addc     lo2,hi1,lo2
+       addc     lo3,hi2,lo3
+
+       addib,<  -4,size_param,L(end)
+       addc     %r0,hi3,cylimb                 C propagate carry into cylimb
+C main loop ------------------------------------------------------------------
+LDEF(loop)
+       fldds,ma 8(s1_ptr),%fr4
+       fldds,ma 8(s1_ptr),%fr8
+
+       ldws     0(res_ptr),s0
+       xmpyu    %fr4L,%fr31R,%fr5
+       ldws     4(res_ptr),s1
+       xmpyu    %fr4R,%fr31R,%fr6
+       ldws     8(res_ptr),s2
+       xmpyu    %fr8L,%fr31R,%fr9
+       ldws    12(res_ptr),s3
+       xmpyu    %fr8R,%fr31R,%fr10
+
+       fstds    %fr5,-16(%r31)
+       add      s0,lo0,s0
+       fstds    %fr6,-8(%r31)
+       addc     s1,lo1,s1
+       fstds    %fr9,0(%r31)
+       addc     s2,lo2,s2
+       fstds    %fr10,8(%r31)
+       addc     s3,lo3,s3
+
+       ldws   -16(%r31),hi0
+       ldws   -12(%r31),lo0
+       ldws    -8(%r31),hi1
+       ldws    -4(%r31),lo1
+       ldws     0(%r31),hi2
+       ldws     4(%r31),lo2
+       ldws     8(%r31),hi3
+       ldws    12(%r31),lo3
+
+       addc     lo0,cylimb,lo0
+       stws,ma  s0,4(res_ptr)
+       addc     lo1,hi0,lo1
+       stws,ma  s1,4(res_ptr)
+       addc     lo2,hi1,lo2
+       stws,ma  s2,4(res_ptr)
+       addc     lo3,hi2,lo3
+       stws,ma  s3,4(res_ptr)
+
+       addib,>= -4,size_param,L(loop)
+       addc     %r0,hi3,cylimb                 C propagate carry into cylimb
+C finish software pipeline ---------------------------------------------------
+LDEF(end)
+       ldws     0(res_ptr),s0
+       ldws     4(res_ptr),s1
+       ldws     8(res_ptr),s2
+       ldws    12(res_ptr),s3
+
+       add      s0,lo0,s0
+       stws,ma  s0,4(res_ptr)
+       addc     s1,lo1,s1
+       stws,ma  s1,4(res_ptr)
+       addc     s2,lo2,s2
+       stws,ma  s2,4(res_ptr)
+       addc     s3,lo3,s3
+       stws,ma  s3,4(res_ptr)
+
+C restore callee-saves registers ---------------------------------------------
+       ldw     -96(%r30),%r3
+       ldw     -92(%r30),%r4
+       ldw     -88(%r30),%r5
+       ldw     -84(%r30),%r6
+       ldw     -80(%r30),%r7
+
+LDEF(few_limbs)
+       addib,=,n 4,size_param,L(ret)
+
+LDEF(loop2)
+       fldws,ma 4(s1_ptr),%fr4
+       ldws     0(res_ptr),s0
+       xmpyu    %fr4,%fr31R,%fr5
+       fstds    %fr5,-16(%r30)
+       ldws    -16(%r30),hi0
+       ldws    -12(%r30),lo0
+       addc     lo0,cylimb,lo0
+       addc     %r0,hi0,cylimb
+       add      s0,lo0,s0
+       stws,ma  s0,4(res_ptr)
+       addib,<> -1,size_param,L(loop2)
+       nop
+
+LDEF(ret)
+       addc     %r0,cylimb,cylimb
+       bv       0(%r2)
+       ldo      -128(%r30),%r30
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/pa32/hppa1_1/pa7100/lshift.asm b/mpn/pa32/hppa1_1/pa7100/lshift.asm

new file mode 100644 (file)

index 0000000..f6b4068
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/lshift.asm
@@ -0,0 +1,85 @@
+dnl  HP-PA  mpn_lshift -- Shift a number left.
+dnl  Optimized for the PA7100, where is runs at 3.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s_ptr                r25
+C size         r24
+C cnt          r23
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       sh2add          %r24,%r25,%r25
+       sh2add          %r24,%r26,%r26
+       ldws,mb         -4(0,%r25),%r22
+       subi            32,%r23,%r1
+       mtsar           %r1
+       addib,=         -1,%r24,L(0004)
+       vshd            %r0,%r22,%r28           C compute carry out limb
+       ldws,mb         -4(0,%r25),%r29
+       addib,<=        -5,%r24,L(rest)
+       vshd            %r22,%r29,%r20
+
+LDEF(loop)
+       ldws,mb         -4(0,%r25),%r22
+       stws,mb         %r20,-4(0,%r26)
+       vshd            %r29,%r22,%r20
+       ldws,mb         -4(0,%r25),%r29
+       stws,mb         %r20,-4(0,%r26)
+       vshd            %r22,%r29,%r20
+       ldws,mb         -4(0,%r25),%r22
+       stws,mb         %r20,-4(0,%r26)
+       vshd            %r29,%r22,%r20
+       ldws,mb         -4(0,%r25),%r29
+       stws,mb         %r20,-4(0,%r26)
+       addib,>         -4,%r24,L(loop)
+       vshd            %r22,%r29,%r20
+
+LDEF(rest)
+       addib,=         4,%r24,L(end1)
+       nop
+
+LDEF(eloop)
+       ldws,mb         -4(0,%r25),%r22
+       stws,mb         %r20,-4(0,%r26)
+       addib,<=        -1,%r24,L(end2)
+       vshd            %r29,%r22,%r20
+       ldws,mb         -4(0,%r25),%r29
+       stws,mb         %r20,-4(0,%r26)
+       addib,>         -1,%r24,L(eloop)
+       vshd            %r22,%r29,%r20
+
+LDEF(end1)
+       stws,mb         %r20,-4(0,%r26)
+       vshd            %r29,%r0,%r20
+       bv              0(%r2)
+       stw             %r20,-4(0,%r26)
+
+LDEF(end2)
+       stws,mb         %r20,-4(0,%r26)
+
+LDEF(0004)
+       vshd            %r22,%r0,%r20
+       bv              0(%r2)
+       stw             %r20,-4(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/rshift.asm b/mpn/pa32/hppa1_1/pa7100/rshift.asm

new file mode 100644 (file)

index 0000000..ed7313b
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/rshift.asm
@@ -0,0 +1,82 @@
+dnl  HP-PA  mpn_rshift -- Shift a number right.
+dnl  Optimized for the PA7100, where is runs at 3.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s_ptr                r25
+C size         r24
+C cnt          r23
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       ldws,ma         4(0,%r25),%r22
+       mtsar           %r23
+       addib,=         -1,%r24,L(0004)
+       vshd            %r22,%r0,%r28           C compute carry out limb
+       ldws,ma         4(0,%r25),%r29
+       addib,<=        -5,%r24,L(rest)
+       vshd            %r29,%r22,%r20
+
+LDEF(loop)
+       ldws,ma         4(0,%r25),%r22
+       stws,ma         %r20,4(0,%r26)
+       vshd            %r22,%r29,%r20
+       ldws,ma         4(0,%r25),%r29
+       stws,ma         %r20,4(0,%r26)
+       vshd            %r29,%r22,%r20
+       ldws,ma         4(0,%r25),%r22
+       stws,ma         %r20,4(0,%r26)
+       vshd            %r22,%r29,%r20
+       ldws,ma         4(0,%r25),%r29
+       stws,ma         %r20,4(0,%r26)
+       addib,>         -4,%r24,L(loop)
+       vshd            %r29,%r22,%r20
+
+LDEF(rest)
+       addib,=         4,%r24,L(end1)
+       nop
+
+LDEF(eloop)
+       ldws,ma         4(0,%r25),%r22
+       stws,ma         %r20,4(0,%r26)
+       addib,<=        -1,%r24,L(end2)
+       vshd            %r22,%r29,%r20
+       ldws,ma         4(0,%r25),%r29
+       stws,ma         %r20,4(0,%r26)
+       addib,>         -1,%r24,L(eloop)
+       vshd            %r29,%r22,%r20
+
+LDEF(end1)
+       stws,ma         %r20,4(0,%r26)
+       vshd            %r0,%r29,%r20
+       bv              0(%r2)
+       stw             %r20,0(0,%r26)
+
+LDEF(end2)
+       stws,ma         %r20,4(0,%r26)
+
+LDEF(0004)
+       vshd            %r0,%r22,%r20
+       bv              0(%r2)
+       stw             %r20,0(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/sub_n.asm b/mpn/pa32/hppa1_1/pa7100/sub_n.asm

new file mode 100644 (file)

index 0000000..38ea0e1
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/sub_n.asm
@@ -0,0 +1,74 @@
+dnl  HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.  Optimized for the PA7100, where
+dnl  is runs at 4.25 cycles/limb.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s1_ptr       r25
+C s2_ptr       r24
+C size         r23
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+
+       addib,<=        -5,%r23,L(rest)
+        sub            %r20,%r19,%r28  C subtract first limbs ignoring cy
+
+LDEF(loop)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       subb            %r20,%r19,%r28
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       subb            %r20,%r19,%r28
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       subb            %r20,%r19,%r28
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addib,>         -4,%r23,L(loop)
+       subb            %r20,%r19,%r28
+
+LDEF(rest)
+       addib,=         4,%r23,L(end)
+       nop
+
+LDEF(eloop)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addib,>         -1,%r23,L(eloop)
+       subb            %r20,%r19,%r28
+
+LDEF(end)
+       stws            %r28,0(0,%r26)
+       addc            %r0,%r0,%r28
+       bv              0(%r2)
+        subi           1,%r28,%r28
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/pa7100/submul_1.asm b/mpn/pa32/hppa1_1/pa7100/submul_1.asm

new file mode 100644 (file)

index 0000000..aee9d90
--- /dev/null
+++ b/mpn/pa32/hppa1_1/pa7100/submul_1.asm
@@ -0,0 +1,196 @@
+dnl  HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 1995, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`res_ptr',`%r26')
+define(`s1_ptr',`%r25')
+define(`size_param',`%r24')
+define(`s2_limb',`%r23')
+
+define(`cylimb',`%r28')
+define(`s0',`%r19')
+define(`s1',`%r20')
+define(`s2',`%r3')
+define(`s3',`%r4')
+define(`lo0',`%r21')
+define(`lo1',`%r5')
+define(`lo2',`%r6')
+define(`lo3',`%r7')
+define(`hi0',`%r22')
+define(`hi1',`%r23')                           C safe to reuse
+define(`hi2',`%r29')
+define(`hi3',`%r1')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+C      .callinfo       frame=128,no_calls
+
+       ldo     128(%r30),%r30
+       stws    s2_limb,-16(%r30)
+       add      %r0,%r0,cylimb                 C clear cy and cylimb
+       addib,< -4,size_param,L(few_limbs)
+       fldws   -16(%r30),%fr31R
+
+       ldo     -112(%r30),%r31
+       stw     %r3,-96(%r30)
+       stw     %r4,-92(%r30)
+       stw     %r5,-88(%r30)
+       stw     %r6,-84(%r30)
+       stw     %r7,-80(%r30)
+
+       bb,>=,n  s1_ptr,29,L(0)
+
+       fldws,ma 4(s1_ptr),%fr4
+       ldws     0(res_ptr),s0
+       xmpyu    %fr4,%fr31R,%fr5
+       fstds    %fr5,-16(%r31)
+       ldws    -16(%r31),cylimb
+       ldws    -12(%r31),lo0
+       sub      s0,lo0,s0
+       add      s0,lo0,%r0                     C invert cy
+       addib,< -1,size_param,L(few_limbs)
+       stws,ma  s0,4(res_ptr)
+
+C start software pipeline ----------------------------------------------------
+LDEF(0)
+       fldds,ma 8(s1_ptr),%fr4
+       fldds,ma 8(s1_ptr),%fr8
+
+       xmpyu    %fr4L,%fr31R,%fr5
+       xmpyu    %fr4R,%fr31R,%fr6
+       xmpyu    %fr8L,%fr31R,%fr9
+       xmpyu    %fr8R,%fr31R,%fr10
+
+       fstds    %fr5,-16(%r31)
+       fstds    %fr6,-8(%r31)
+       fstds    %fr9,0(%r31)
+       fstds    %fr10,8(%r31)
+
+       ldws   -16(%r31),hi0
+       ldws   -12(%r31),lo0
+       ldws    -8(%r31),hi1
+       ldws    -4(%r31),lo1
+       ldws     0(%r31),hi2
+       ldws     4(%r31),lo2
+       ldws     8(%r31),hi3
+       ldws    12(%r31),lo3
+
+       addc     lo0,cylimb,lo0
+       addc     lo1,hi0,lo1
+       addc     lo2,hi1,lo2
+       addc     lo3,hi2,lo3
+
+       addib,<  -4,size_param,L(end)
+       addc     %r0,hi3,cylimb                 C propagate carry into cylimb
+C main loop ------------------------------------------------------------------
+LDEF(loop)
+       fldds,ma 8(s1_ptr),%fr4
+       fldds,ma 8(s1_ptr),%fr8
+
+       ldws     0(res_ptr),s0
+       xmpyu    %fr4L,%fr31R,%fr5
+       ldws     4(res_ptr),s1
+       xmpyu    %fr4R,%fr31R,%fr6
+       ldws     8(res_ptr),s2
+       xmpyu    %fr8L,%fr31R,%fr9
+       ldws    12(res_ptr),s3
+       xmpyu    %fr8R,%fr31R,%fr10
+
+       fstds    %fr5,-16(%r31)
+       sub      s0,lo0,s0
+       fstds    %fr6,-8(%r31)
+       subb     s1,lo1,s1
+       fstds    %fr9,0(%r31)
+       subb     s2,lo2,s2
+       fstds    %fr10,8(%r31)
+       subb     s3,lo3,s3
+       subb     %r0,%r0,lo0                    C these two insns ...
+       add      lo0,lo0,%r0                    C ... just invert cy
+
+       ldws   -16(%r31),hi0
+       ldws   -12(%r31),lo0
+       ldws    -8(%r31),hi1
+       ldws    -4(%r31),lo1
+       ldws     0(%r31),hi2
+       ldws     4(%r31),lo2
+       ldws     8(%r31),hi3
+       ldws    12(%r31),lo3
+
+       addc     lo0,cylimb,lo0
+       stws,ma  s0,4(res_ptr)
+       addc     lo1,hi0,lo1
+       stws,ma  s1,4(res_ptr)
+       addc     lo2,hi1,lo2
+       stws,ma  s2,4(res_ptr)
+       addc     lo3,hi2,lo3
+       stws,ma  s3,4(res_ptr)
+
+       addib,>= -4,size_param,L(loop)
+       addc     %r0,hi3,cylimb                 C propagate carry into cylimb
+C finish software pipeline ---------------------------------------------------
+LDEF(end)
+       ldws     0(res_ptr),s0
+       ldws     4(res_ptr),s1
+       ldws     8(res_ptr),s2
+       ldws    12(res_ptr),s3
+
+       sub      s0,lo0,s0
+       stws,ma  s0,4(res_ptr)
+       subb     s1,lo1,s1
+       stws,ma  s1,4(res_ptr)
+       subb     s2,lo2,s2
+       stws,ma  s2,4(res_ptr)
+       subb     s3,lo3,s3
+       stws,ma  s3,4(res_ptr)
+       subb     %r0,%r0,lo0                    C these two insns ...
+       add      lo0,lo0,%r0                    C ... invert cy
+
+C restore callee-saves registers ---------------------------------------------
+       ldw     -96(%r30),%r3
+       ldw     -92(%r30),%r4
+       ldw     -88(%r30),%r5
+       ldw     -84(%r30),%r6
+       ldw     -80(%r30),%r7
+
+LDEF(few_limbs)
+       addib,=,n 4,size_param,L(ret)
+
+LDEF(loop2)
+       fldws,ma 4(s1_ptr),%fr4
+       ldws     0(res_ptr),s0
+       xmpyu    %fr4,%fr31R,%fr5
+       fstds    %fr5,-16(%r30)
+       ldws    -16(%r30),hi0
+       ldws    -12(%r30),lo0
+       addc     lo0,cylimb,lo0
+       addc     %r0,hi0,cylimb
+       sub      s0,lo0,s0
+       add      s0,lo0,%r0                     C invert cy
+       stws,ma  s0,4(res_ptr)
+       addib,<> -1,size_param,L(loop2)
+       nop
+
+LDEF(ret)
+       addc     %r0,cylimb,cylimb
+       bv       0(%r2)
+       ldo      -128(%r30),%r30
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/pa32/hppa1_1/sqr_diagonal.asm b/mpn/pa32/hppa1_1/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..4eba989
--- /dev/null
+++ b/mpn/pa32/hppa1_1/sqr_diagonal.asm
@@ -0,0 +1,49 @@
+dnl  HP-PA 1.1 32-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 6 cycles/limb on the PA7100 and 2.5 cycles/limb on PA8x00.
+C 2-way unrolling wouldn't help the PA7100; it could however bring times down
+C to 2.0 cycles/limb for the PA8x00.
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       ldo             4(rp),rp
+       fldws,ma        4(up),%fr4r
+       addib,=         -1,n,L(exit)
+       xmpyu           %fr4r,%fr4r,%fr5
+
+LDEF(loop)
+       fldws,ma        4(up),%fr4r
+       fstws           %fr5r,-4(rp)
+       fstws,ma        %fr5l,8(rp)
+       addib,<>        -1,n,L(loop)
+       xmpyu           %fr4r,%fr4r,%fr5
+
+LDEF(exit)
+       fstws           %fr5r,-4(rp)
+       bv              0(%r2)
+       fstws           %fr5l,0(rp)
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/pa32/hppa1_1/submul_1.asm b/mpn/pa32/hppa1_1/submul_1.asm

new file mode 100644 (file)

index 0000000..c6bc383
--- /dev/null
+++ b/mpn/pa32/hppa1_1/submul_1.asm
@@ -0,0 +1,105 @@
+dnl  HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r26
+C s1_ptr       r25
+C size         r24
+C s2_limb      r23
+
+C This runs at 12 cycles/limb on a PA7000.  With the used instructions, it can
+C not become faster due to data cache contention after a store.  On the PA7100
+C it runs at 11 cycles/limb.
+
+C There are some ideas described in mul_1.asm that applies to this code too.
+
+C It seems possible to make this run as fast as mpn_addmul_1, if we use
+C      sub,>>= %r29,%r19,%r22
+C      addi    1,%r28,%r28
+C but that requires reworking the hairy software pipeline...
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+C      .callinfo       frame=64,no_calls
+
+       ldo             64(%r30),%r30
+       fldws,ma        4(%r25),%fr5
+       stw             %r23,-16(%r30)          C move s2_limb ...
+       addib,=         -1,%r24,L(just_one_limb)
+        fldws          -16(%r30),%fr4          C ... into fr4
+       add             %r0,%r0,%r0             C clear carry
+       xmpyu           %fr4,%fr5,%fr6
+       fldws,ma        4(%r25),%fr7
+       fstds           %fr6,-16(%r30)
+       xmpyu           %fr4,%fr7,%fr8
+       ldw             -12(%r30),%r19          C least significant limb in product
+       ldw             -16(%r30),%r28
+
+       fstds           %fr8,-16(%r30)
+       addib,=         -1,%r24,L(end)
+        ldw            -12(%r30),%r1
+
+C Main loop
+LDEF(loop)
+       ldws            0(%r26),%r29
+       fldws,ma        4(%r25),%fr5
+       sub             %r29,%r19,%r22
+       add             %r22,%r19,%r0
+       stws,ma         %r22,4(%r26)
+       addc            %r28,%r1,%r19
+       xmpyu           %fr4,%fr5,%fr6
+       ldw             -16(%r30),%r28
+       fstds           %fr6,-16(%r30)
+       addc            %r0,%r28,%r28
+       addib,<>        -1,%r24,L(loop)
+        ldw            -12(%r30),%r1
+
+LDEF(end)
+       ldw             0(%r26),%r29
+       sub             %r29,%r19,%r22
+       add             %r22,%r19,%r0
+       stws,ma         %r22,4(%r26)
+       addc            %r28,%r1,%r19
+       ldw             -16(%r30),%r28
+       ldws            0(%r26),%r29
+       addc            %r0,%r28,%r28
+       sub             %r29,%r19,%r22
+       add             %r22,%r19,%r0
+       stws,ma         %r22,4(%r26)
+       addc            %r0,%r28,%r28
+       bv              0(%r2)
+        ldo            -64(%r30),%r30
+
+LDEF(just_one_limb)
+       xmpyu           %fr4,%fr5,%fr6
+       ldw             0(%r26),%r29
+       fstds           %fr6,-16(%r30)
+       ldw             -12(%r30),%r1
+       ldw             -16(%r30),%r28
+       sub             %r29,%r1,%r22
+       add             %r22,%r1,%r0
+       stw             %r22,0(%r26)
+       addc            %r0,%r28,%r28
+       bv              0(%r2)
+        ldo            -64(%r30),%r30
+EPILOGUE()
diff --git a/mpn/pa32/hppa1_1/udiv.asm b/mpn/pa32/hppa1_1/udiv.asm

new file mode 100644 (file)

index 0000000..e6a9927
--- /dev/null
+++ b/mpn/pa32/hppa1_1/udiv.asm
@@ -0,0 +1,91 @@
+dnl  HP-PA  __udiv_qrnnd division support, used from longlong.h.
+dnl  This version runs fast on PA 7000 and later.
+
+dnl  Copyright 1993, 1994, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      gr26
+C n1           gr25
+C n0           gr24
+C d            gr23
+
+C This file has caused a lot of trouble, since it demands PIC reference to
+C static data, which triggers bugs in gas (at least version 2.7 through
+C 2.11.2).  When the bug is triggered, many bogus relocs are generated.  The
+C current solution is to stuff data right into the code, and refer it using
+C absolute offsets.  Fragile to be sure, but nothing else seems to work.
+
+ASM_START()
+ifdef(`PIC',`',
+`      RODATA
+       INT64(0000, 0x43f00000, 0x0)    C 2^64
+')
+
+PROLOGUE(mpn_udiv_qrnnd)
+C      .callinfo       frame=64,no_calls
+
+       ldo             64(%r30),%r30
+
+       stws            %r25,-16(0,%r30)        C n_hi
+       stws            %r24,-12(0,%r30)        C n_lo
+
+ifdef(`PIC',
+`      bl              .+20,%r31
+       dep             %r0,31,2,%r31
+       .word   0x0                             C padding for alignment
+       .word   0x43f00000, 0x0                 C 2^64
+       ldo             4(%r31),%r31',
+`      ldil            `L'%L(0000),%r31
+       ldo             R%L(0000)(%r31),%r31')
+
+       fldds           -16(0,%r30),%fr5
+       stws            %r23,-12(0,%r30)
+       comib,<=        0,%r25,L(1)
+       fcnvxf,dbl,dbl  %fr5,%fr5
+       fldds           0(0,%r31),%fr4
+       fadd,dbl        %fr4,%fr5,%fr5
+
+LDEF(1)
+       fcpy,sgl        %fr0,%fr6L
+       fldws           -12(0,%r30),%fr6R
+       fcnvxf,dbl,dbl  %fr6,%fr4
+
+       fdiv,dbl        %fr5,%fr4,%fr5
+
+       fcnvfx,dbl,dbl  %fr5,%fr4
+       fstws           %fr4R,-16(%r30)
+       xmpyu           %fr4R,%fr6R,%fr6
+       ldws            -16(%r30),%r28
+       fstds           %fr6,-16(0,%r30)
+       ldws            -12(0,%r30),%r21
+       ldws            -16(0,%r30),%r20
+       sub             %r24,%r21,%r22
+       subb            %r25,%r20,%r20
+       comib,=         0,%r20,L(2)
+       ldo             -64(%r30),%r30
+
+       add             %r22,%r23,%r22
+       ldo             -1(%r28),%r28
+
+LDEF(2)
+       bv              0(%r2)
+       stws            %r22,0(0,%r26)
+
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/pa32/hppa1_1/umul.asm b/mpn/pa32/hppa1_1/umul.asm

new file mode 100644 (file)

index 0000000..7f1cb93
--- /dev/null
+++ b/mpn/pa32/hppa1_1/umul.asm
@@ -0,0 +1,36 @@
+dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+C      .callinfo frame=64,no_calls
+
+       ldo     64(%r30),%r30
+       stw     %r25,-16(0,%r30)
+       fldws   -16(0,%r30),%fr22R
+       stw     %r24,-16(0,%r30)
+       fldws   -16(0,%r30),%fr22L
+       xmpyu   %fr22R,%fr22L,%fr22
+       fstds   %fr22,-16(0,%r30)
+       ldw     -16(0,%r30),%r28
+       ldw     -12(0,%r30),%r29
+       stw     %r29,0(0,%r26)
+       bv      0(%r2)
+       ldo     -64(%r30),%r30
+EPILOGUE()
diff --git a/mpn/pa32/hppa2_0/add_n.asm b/mpn/pa32/hppa2_0/add_n.asm

new file mode 100644 (file)

index 0000000..685c4c9
--- /dev/null
+++ b/mpn/pa32/hppa2_0/add_n.asm
@@ -0,0 +1,96 @@
+dnl  HP-PA 2.0 32-bit mpn_add_n -- Add two limb vectors of the same length > 0
+dnl  and store sum in a third limb vector.
+
+dnl  Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      gr26
+C s1_ptr       gr25
+C s2_ptr       gr24
+C size         gr23
+
+C This runs at 2 cycles/limb on PA8000.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       sub             %r0,%r23,%r22
+       zdep            %r22,30,3,%r28          C r28 = 2 * (-n & 7)
+       zdep            %r22,29,3,%r22          C r22 = 4 * (-n & 7)
+       sub             %r25,%r22,%r25          C offset s1_ptr
+       sub             %r24,%r22,%r24          C offset s2_ptr
+       sub             %r26,%r22,%r26          C offset res_ptr
+       blr             %r28,%r0                C branch into loop
+       add             %r0,%r0,%r0             C reset carry
+
+LDEF(loop)
+       ldw             0(%r25),%r20
+       ldw             0(%r24),%r31
+       addc            %r20,%r31,%r20
+       stw             %r20,0(%r26)
+
+LDEF(7)
+       ldw             4(%r25),%r21
+       ldw             4(%r24),%r19
+       addc            %r21,%r19,%r21
+       stw             %r21,4(%r26)
+
+LDEF(6)
+       ldw             8(%r25),%r20
+       ldw             8(%r24),%r31
+       addc            %r20,%r31,%r20
+       stw             %r20,8(%r26)
+
+LDEF(5)
+       ldw             12(%r25),%r21
+       ldw             12(%r24),%r19
+       addc            %r21,%r19,%r21
+       stw             %r21,12(%r26)
+
+LDEF(4)
+       ldw             16(%r25),%r20
+       ldw             16(%r24),%r31
+       addc            %r20,%r31,%r20
+       stw             %r20,16(%r26)
+
+LDEF(3)
+       ldw             20(%r25),%r21
+       ldw             20(%r24),%r19
+       addc            %r21,%r19,%r21
+       stw             %r21,20(%r26)
+
+LDEF(2)
+       ldw             24(%r25),%r20
+       ldw             24(%r24),%r31
+       addc            %r20,%r31,%r20
+       stw             %r20,24(%r26)
+
+LDEF(1)
+       ldw             28(%r25),%r21
+       ldo             32(%r25),%r25
+       ldw             28(%r24),%r19
+       addc            %r21,%r19,%r21
+       stw             %r21,28(%r26)
+       ldo             32(%r24),%r24
+       addib,>         -8,%r23,L(loop)
+       ldo             32(%r26),%r26
+
+       bv              (%r2)
+       addc            %r0,%r0,%r28
+EPILOGUE()
diff --git a/mpn/pa32/hppa2_0/gmp-mparam.h b/mpn/pa32/hppa2_0/gmp-mparam.h

new file mode 100644 (file)

index 0000000..44543c7
--- /dev/null
+++ b/mpn/pa32/hppa2_0/gmp-mparam.h
@@ -0,0 +1,157 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2009, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 552 MHz PA8600 (gcc61.fsffrance.org) */
+
+#define DIVREM_1_NORM_THRESHOLD              3
+#define DIVREM_1_UNNORM_THRESHOLD            4
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         14
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           31
+
+#define MUL_TOOM22_THRESHOLD                15
+#define MUL_TOOM33_THRESHOLD                91
+#define MUL_TOOM44_THRESHOLD               154
+#define MUL_TOOM6H_THRESHOLD               204
+#define MUL_TOOM8H_THRESHOLD               482
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     103
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     109
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     103
+
+#define SQR_BASECASE_THRESHOLD               6
+#define SQR_TOOM2_THRESHOLD                 47
+#define SQR_TOOM3_THRESHOLD                 93
+#define SQR_TOOM4_THRESHOLD                250
+#define SQR_TOOM6_THRESHOLD                278
+#define SQR_TOOM8_THRESHOLD                502
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               15
+
+#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    244, 5}, {      8, 4}, {     17, 5}, {     13, 6}, \
+    {      7, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     24, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 8}, \
+    {     15, 7}, {     33, 8}, {     23, 9}, {     15, 8}, \
+    {     39, 9}, {     23,10}, {     15, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
+    {     31, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 9}, {    135, 8}, {    271, 9}, {    143,10}, \
+    {     79, 9}, {    159, 8}, {    319, 9}, {    175, 8}, \
+    {    351,10}, {     95, 9}, {    191, 8}, {    383, 9}, \
+    {    207,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
+    {    575,10}, {    159, 9}, {    319,10}, {    175, 9}, \
+    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207, 9}, {    415,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
+    {   1087,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
+    {    191,10}, {    415, 9}, {    831,11}, {    223, 9}, \
+    {    895,10}, {    479,12}, {    127,11}, {    255,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
+    {   1215,11}, {    351,10}, {    703, 9}, {   1407,12}, \
+    {    191,11}, {    415,10}, {    831,11}, {    479,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 107
+#define MUL_FFT_THRESHOLD                 2112
+
+#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    240, 5}, {      8, 4}, {     17, 5}, {     19, 6}, \
+    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     25, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 9}, {     15, 8}, \
+    {     39, 9}, {     23,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 7}, {    511, 9}, {    135, 8}, {    271, 9}, \
+    {    143,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
+    {    175, 8}, {    351, 7}, {    703,10}, {     95, 9}, \
+    {    191, 8}, {    383, 9}, {    207,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
+    {    319,10}, {    175, 9}, {    351, 8}, {    703,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
+    {    415,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543, 8}, {   1087,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
+    {    191,10}, {    415, 9}, {    831,11}, {    223, 8}, \
+    {   1791,10}, {    479, 9}, {    959,12}, {    127,11}, \
+    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    415,10}, {    831,11}, \
+    {    479,10}, {    959,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 109
+#define SQR_FFT_THRESHOLD                 1600
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  90
+#define MULLO_MUL_N_THRESHOLD             4167
+
+#define DC_DIV_QR_THRESHOLD                100
+#define DC_DIVAPPR_Q_THRESHOLD             342
+#define DC_BDIV_QR_THRESHOLD               119
+#define DC_BDIV_Q_THRESHOLD                246
+
+#define INV_MULMOD_BNM1_THRESHOLD           12
+#define INV_NEWTON_THRESHOLD               274
+#define INV_APPR_THRESHOLD                 268
+
+#define BINV_NEWTON_THRESHOLD              327
+#define REDC_1_TO_REDC_N_THRESHOLD          70
+
+#define MU_DIV_QR_THRESHOLD                979
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD              100
+#define MU_BDIV_QR_THRESHOLD               667
+#define MU_BDIV_Q_THRESHOLD               1187
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      99
+#define GCD_DC_THRESHOLD                   372
+#define GCDEXT_DC_THRESHOLD                241
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                 7
+#define GET_STR_PRECOMPUTE_THRESHOLD        14
+#define SET_STR_DC_THRESHOLD               224
+#define SET_STR_PRECOMPUTE_THRESHOLD       788
diff --git a/mpn/pa32/hppa2_0/sqr_diagonal.asm b/mpn/pa32/hppa2_0/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..3493c87
--- /dev/null
+++ b/mpn/pa32/hppa2_0/sqr_diagonal.asm
@@ -0,0 +1,101 @@
+dnl  HP-PA 32-bit mpn_sqr_diagonal optimized for the PA8x00.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 6 cycles/limb on the PA7100 and 2 cycles/limb on PA8x00.
+C The 2-way unrolling is actually not helping the PA7100.
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+
+       fldws,ma        4(up),%fr4r
+       addib,=         -1,n,L(end1)
+       ldo             4(rp),rp
+
+       fldws,ma        4(up),%fr6r
+       addib,=         -1,n,L(end2)
+       xmpyu           %fr4r,%fr4r,%fr5
+
+       fldws,ma        4(up),%fr4r
+       addib,=         -1,n,L(end3)
+       xmpyu           %fr6r,%fr6r,%fr7
+
+
+LDEF(loop)
+       fldws,ma        4(up),%fr6r
+       fstws           %fr5r,-4(rp)
+       fstws,ma        %fr5l,8(rp)
+       addib,=         -1,n,L(exite)
+       xmpyu           %fr4r,%fr4r,%fr5
+       fldws,ma        4(up),%fr4r
+       fstws           %fr7r,-4(rp)
+       fstws,ma        %fr7l,8(rp)
+       addib,<>        -1,n,L(loop)
+       xmpyu           %fr6r,%fr6r,%fr7
+
+LDEF(exito)
+       fstws           %fr5r,-4(rp)
+       fstws           %fr5l,0(rp)
+       xmpyu           %fr4r,%fr4r,%fr5
+       fstws           %fr7r,4(rp)
+       fstws           %fr7l,8(rp)
+       fstws,mb        %fr5r,12(rp)
+       bv              0(%r2)
+       fstws           %fr5l,4(rp)
+
+LDEF(exite)
+       fstws           %fr7r,-4(rp)
+       fstws           %fr7l,0(rp)
+       xmpyu           %fr6r,%fr6r,%fr7
+       fstws           %fr5r,4(rp)
+       fstws           %fr5l,8(rp)
+       fstws,mb        %fr7r,12(rp)
+       bv              0(%r2)
+       fstws           %fr7l,4(rp)
+
+LDEF(end1)
+       xmpyu           %fr4r,%fr4r,%fr5
+       fstws           %fr5r,-4(rp)
+       bv              0(%r2)
+       fstws,ma        %fr5l,8(rp)
+
+LDEF(end2)
+       xmpyu           %fr6r,%fr6r,%fr7
+       fstws           %fr5r,-4(rp)
+       fstws           %fr5l,0(rp)
+       fstws           %fr7r,4(rp)
+       bv              0(%r2)
+       fstws           %fr7l,8(rp)
+
+LDEF(end3)
+       fstws           %fr5r,-4(rp)
+       fstws           %fr5l,0(rp)
+       xmpyu           %fr4r,%fr4r,%fr5
+       fstws           %fr7r,4(rp)
+       fstws           %fr7l,8(rp)
+       fstws,mb        %fr5r,12(rp)
+       bv              0(%r2)
+       fstws           %fr5l,4(rp)
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/pa32/hppa2_0/sub_n.asm b/mpn/pa32/hppa2_0/sub_n.asm

new file mode 100644 (file)

index 0000000..b0aefb4
--- /dev/null
+++ b/mpn/pa32/hppa2_0/sub_n.asm
@@ -0,0 +1,96 @@
+dnl  HP-PA 2.0 32-bit mpn_sub_n -- Subtract two limb vectors of the same
+dnl  length > 0 and store difference in a third limb vector.
+
+dnl  Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      gr26
+C s1_ptr       gr25
+C s2_ptr       gr24
+C size         gr23
+
+C This runs at 2 cycles/limb on PA8000.
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       sub             %r0,%r23,%r22
+       zdep            %r22,30,3,%r28          C r28 = 2 * (-n & 7)
+       zdep            %r22,29,3,%r22          C r22 = 4 * (-n & 7)
+       sub             %r25,%r22,%r25          C offset s1_ptr
+       sub             %r24,%r22,%r24          C offset s2_ptr
+       blr             %r28,%r0                C branch into loop
+       sub             %r26,%r22,%r26          C offset res_ptr and set carry
+
+LDEF(loop)
+       ldw             0(%r25),%r20
+       ldw             0(%r24),%r31
+       subb            %r20,%r31,%r20
+       stw             %r20,0(%r26)
+
+LDEF(7)
+       ldw             4(%r25),%r21
+       ldw             4(%r24),%r19
+       subb            %r21,%r19,%r21
+       stw             %r21,4(%r26)
+
+LDEF(6)
+       ldw             8(%r25),%r20
+       ldw             8(%r24),%r31
+       subb            %r20,%r31,%r20
+       stw             %r20,8(%r26)
+
+LDEF(5)
+       ldw             12(%r25),%r21
+       ldw             12(%r24),%r19
+       subb            %r21,%r19,%r21
+       stw             %r21,12(%r26)
+
+LDEF(4)
+       ldw             16(%r25),%r20
+       ldw             16(%r24),%r31
+       subb            %r20,%r31,%r20
+       stw             %r20,16(%r26)
+
+LDEF(3)
+       ldw             20(%r25),%r21
+       ldw             20(%r24),%r19
+       subb            %r21,%r19,%r21
+       stw             %r21,20(%r26)
+
+LDEF(2)
+       ldw             24(%r25),%r20
+       ldw             24(%r24),%r31
+       subb            %r20,%r31,%r20
+       stw             %r20,24(%r26)
+
+LDEF(1)
+       ldw             28(%r25),%r21
+       ldo             32(%r25),%r25
+       ldw             28(%r24),%r19
+       subb            %r21,%r19,%r21
+       stw             %r21,28(%r26)
+       ldo             32(%r24),%r24
+       addib,>         -8,%r23,L(loop)
+       ldo             32(%r26),%r26
+
+       addc            %r0,%r0,%r28
+       bv              (%r2)
+       subi            1,%r28,%r28
+EPILOGUE()
diff --git a/mpn/pa32/lshift.asm b/mpn/pa32/lshift.asm

new file mode 100644 (file)

index 0000000..2128fbe
--- /dev/null
+++ b/mpn/pa32/lshift.asm
@@ -0,0 +1,64 @@
+dnl  HP-PA  mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      gr26
+C s_ptr                gr25
+C size         gr24
+C cnt          gr23
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       sh2add          %r24,%r25,%r25
+       sh2add          %r24,%r26,%r26
+       ldws,mb         -4(0,%r25),%r22
+       subi            32,%r23,%r1
+       mtsar           %r1
+       addib,=         -1,%r24,L(0004)
+       vshd            %r0,%r22,%r28           C compute carry out limb
+       ldws,mb         -4(0,%r25),%r29
+       addib,=         -1,%r24,L(0002)
+       vshd            %r22,%r29,%r20
+
+LDEF(loop)
+       ldws,mb         -4(0,%r25),%r22
+       stws,mb         %r20,-4(0,%r26)
+       addib,=         -1,%r24,L(0003)
+       vshd            %r29,%r22,%r20
+       ldws,mb         -4(0,%r25),%r29
+       stws,mb         %r20,-4(0,%r26)
+       addib,<>        -1,%r24,L(loop)
+       vshd            %r22,%r29,%r20
+
+LDEF(0002)
+       stws,mb         %r20,-4(0,%r26)
+       vshd            %r29,%r0,%r20
+       bv              0(%r2)
+       stw             %r20,-4(0,%r26)
+
+LDEF(0003)
+       stws,mb         %r20,-4(0,%r26)
+
+LDEF(0004)
+       vshd            %r22,%r0,%r20
+       bv              0(%r2)
+       stw             %r20,-4(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/pa-defs.m4 b/mpn/pa32/pa-defs.m4

new file mode 100644 (file)

index 0000000..837ee47
--- /dev/null
+++ b/mpn/pa32/pa-defs.m4
@@ -0,0 +1,53 @@
+divert(-1)
+
+dnl  m4 macros for HPPA assembler.
+
+dnl  Copyright 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  hppa assembler comments are introduced with ";".
+dnl
+dnl  For cooperation with cpp, apparently lines "# 123" set the line number,
+dnl  and other lines starting with a "#" are ignored.
+
+changecom(;)
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  These are the same as the basic PROLOGUE_cpu and EPILOGUE_cpu in
+dnl  mpn/asm-defs.m4, but using .proc / .procend.  These are standard and on
+dnl  an ELF system they do what .type and .size normally do.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `.code
+       ALIGN(8)
+       .export `$1',entry
+`$1'LABEL_SUFFIX'
+       .proc
+       .callinfo)      dnl  This is really bogus, but allows us to compile
+                       dnl  again on hppa machines.
+
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .procend')
+
+divert
diff --git a/mpn/pa32/rshift.asm b/mpn/pa32/rshift.asm

new file mode 100644 (file)

index 0000000..238b0be
--- /dev/null
+++ b/mpn/pa32/rshift.asm
@@ -0,0 +1,61 @@
+dnl  HP-PA  mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      gr26
+C s_ptr                gr25
+C size         gr24
+C cnt          gr23
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       ldws,ma         4(0,%r25),%r22
+       mtsar           %r23
+       addib,=         -1,%r24,L(0004)
+       vshd            %r22,%r0,%r28           C compute carry out limb
+       ldws,ma         4(0,%r25),%r29
+       addib,=         -1,%r24,L(0002)
+       vshd            %r29,%r22,%r20
+
+LDEF(loop)
+       ldws,ma         4(0,%r25),%r22
+       stws,ma         %r20,4(0,%r26)
+       addib,=         -1,%r24,L(0003)
+       vshd            %r22,%r29,%r20
+       ldws,ma         4(0,%r25),%r29
+       stws,ma         %r20,4(0,%r26)
+       addib,<>        -1,%r24,L(loop)
+       vshd            %r29,%r22,%r20
+
+LDEF(0002)
+       stws,ma         %r20,4(0,%r26)
+       vshd            %r0,%r29,%r20
+       bv              0(%r2)
+       stw             %r20,0(0,%r26)
+
+LDEF(0003)
+       stws,ma         %r20,4(0,%r26)
+
+LDEF(0004)
+       vshd            %r0,%r22,%r20
+       bv              0(%r2)
+       stw             %r20,0(0,%r26)
+EPILOGUE()
diff --git a/mpn/pa32/sub_n.asm b/mpn/pa32/sub_n.asm

new file mode 100644 (file)

index 0000000..d07ebb5
--- /dev/null
+++ b/mpn/pa32/sub_n.asm
@@ -0,0 +1,53 @@
+dnl  HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      gr26
+C s1_ptr       gr25
+C s2_ptr       gr24
+C size         gr23
+
+C One might want to unroll this as for other processors, but it turns out that
+C the data cache contention after a store makes such unrolling useless.  We
+C can't come under 5 cycles/limb anyway.
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+
+       addib,=         -1,%r23,L(end)  C check for (SIZE == 1)
+        sub            %r20,%r19,%r28  C subtract first limbs ignoring cy
+
+LDEF(loop)
+       ldws,ma         4(0,%r25),%r20
+       ldws,ma         4(0,%r24),%r19
+       stws,ma         %r28,4(0,%r26)
+       addib,<>        -1,%r23,L(loop)
+        subb           %r20,%r19,%r28
+
+LDEF(end)
+       stws            %r28,0(0,%r26)
+       addc            %r0,%r0,%r28
+       bv              0(%r2)
+        subi           1,%r28,%r28
+EPILOGUE()
diff --git a/mpn/pa32/udiv.asm b/mpn/pa32/udiv.asm

new file mode 100644 (file)

index 0000000..86886e4
--- /dev/null
+++ b/mpn/pa32/udiv.asm
@@ -0,0 +1,280 @@
+dnl  HP-PA  __udiv_qrnnd division support, used from longlong.h.
+dnl  This version runs fast on pre-PA7000 CPUs.
+
+dnl  Copyright 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      gr26
+C n1           gr25
+C n0           gr24
+C d            gr23
+
+C The code size is a bit excessive.  We could merge the last two ds;addc
+C sequences by simply moving the "bb,< Odd" instruction down.  The only
+C trouble is the FFFFFFFF code that would need some hacking.
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+       comb,<          %r23,0,L(largedivisor)
+        sub            %r0,%r23,%r1            C clear cy as side-effect
+       ds              %r0,%r1,%r0
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r23,%r25
+       addc            %r24,%r24,%r28
+       ds              %r25,%r23,%r25
+       comclr,>=       %r25,%r0,%r0
+       addl            %r25,%r23,%r25
+       stws            %r25,0(0,%r26)
+       bv              0(%r2)
+        addc           %r28,%r28,%r28
+
+LDEF(largedivisor)
+       extru           %r24,31,1,%r19          C r19 = n0 & 1
+       bb,<            %r23,31,L(odd)
+        extru          %r23,30,31,%r22         C r22 = d >> 1
+       shd             %r25,%r24,1,%r24        C r24 = new n0
+       extru           %r25,30,31,%r25         C r25 = new n1
+       sub             %r0,%r22,%r21
+       ds              %r0,%r21,%r0
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       comclr,>=       %r25,%r0,%r0
+       addl            %r25,%r22,%r25
+       sh1addl         %r25,%r19,%r25
+       stws            %r25,0(0,%r26)
+       bv              0(%r2)
+        addc           %r24,%r24,%r28
+
+LDEF(odd)
+       addib,sv,n      1,%r22,L(FFFFFFFF)      C r22 = (d / 2 + 1)
+       shd             %r25,%r24,1,%r24        C r24 = new n0
+       extru           %r25,30,31,%r25         C r25 = new n1
+       sub             %r0,%r22,%r21
+       ds              %r0,%r21,%r0
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r24
+       ds              %r25,%r22,%r25
+       addc            %r24,%r24,%r28
+       comclr,>=       %r25,%r0,%r0
+       addl            %r25,%r22,%r25
+       sh1addl         %r25,%r19,%r25
+C We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+       add,nuv         %r28,%r25,%r25
+       addl            %r25,%r1,%r25
+       addc            %r0,%r28,%r28
+       sub,<<          %r25,%r23,%r0
+       addl            %r25,%r1,%r25
+       stws            %r25,0(0,%r26)
+       bv              0(%r2)
+        addc           %r0,%r28,%r28
+
+C This is just a special case of the code above.
+C We come here when d == 0xFFFFFFFF
+LDEF(FFFFFFFF)
+       add,uv          %r25,%r24,%r24
+       sub,<<          %r24,%r23,%r0
+       ldo             1(%r24),%r24
+       stws            %r24,0(0,%r26)
+       bv              0(%r2)
+        addc           %r0,%r25,%r28
+EPILOGUE()
diff --git a/mpn/pa64/README b/mpn/pa64/README

new file mode 100644 (file)

index 0000000..6234a40
--- /dev/null
+++ b/mpn/pa64/README
@@ -0,0 +1,67 @@
+Copyright 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+This directory contains mpn functions for 64-bit PA-RISC 2.0.
+
+PIPELINE SUMMARY
+
+The PA8x00 processors have an orthogonal 4-way out-of-order pipeline.  Each
+cycle two ALU operations and two MEM operations can issue, but just one of the
+MEM operations may be a store.  The two ALU operations can be almost any
+combination of non-memory operations.  Unlike every other processor, integer
+and fp operations are completely equal here; they both count as just ALU
+operations.
+
+Unfortunately, some operations cause hickups in the pipeline.  Combining
+carry-consuming operations like ADD,DC with operations that does not set carry
+like ADD,L cause long delays.  Skip operations also seem to cause hickups.  If
+several ADD,DC are issued consecutively, or if plain carry-generating ADD feed
+ADD,DC, stalling does not occur.  We can effectively issue two ADD,DC
+operations/cycle.
+
+Latency scheduling is not as important as making sure to have a mix of ALU and
+MEM operations, but for full pipeline utilization, it is still a good idea to
+do some amount of latency scheduling.
+
+Like for all other processors, RAW memory scheduling is critically important.
+Since integer multiplication takes place in the floating-point unit, the GMP
+code needs to handle this problem frequently.
+
+STATUS
+
+* mpn_lshift and mpn_rshift run at 1.5 cycles/limb on PA8000 and at 1.0
+  cycles/limb on PA8500.  With latency scheduling, the numbers could
+  probably be improved to 1.0 cycles/limb for all PA8x00 chips.
+
+* mpn_add_n and mpn_sub_n run at 2.0 cycles/limb on PA8000 and at about
+  1.6875 cycles/limb on PA8500.  With latency scheduling, this could
+  probably be improved to get close to 1.5 cycles/limb.  A problem is the
+  stalling of carry-inputting instructions after instructions that do not
+  write to carry.
+
+* mpn_mul_1, mpn_addmul_1, and mpn_submul_1 run at between 5.625 and 6.375
+  on PA8500 and later, and about a cycle/limb slower on older chips.  The
+  code uses ADD,DC for adjacent limbs, and relies heavily on reordering.
+
+
+REFERENCES
+
+Hewlett Packard, "64-Bit Runtime Architecture for PA-RISC 2.0", version 3.3,
+October 1997.
diff --git a/mpn/pa64/addmul_1.asm b/mpn/pa64/addmul_1.asm

new file mode 100644 (file)

index 0000000..4e76546
--- /dev/null
+++ b/mpn/pa64/addmul_1.asm
@@ -0,0 +1,682 @@
+dnl  HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl  add the result to a second limb vector.
+
+dnl  Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C 8000,8200:           7
+C 8500,8600,8700:      6.375
+
+C  The feed-in and wind-down code has not yet been scheduled.  Many cycles
+C  could be saved there per call.
+
+C  DESCRIPTION:
+C  The main loop "BIG" is 4-way unrolled, mainly to allow
+C  effective use of ADD,DC.  Delays in moving data via the cache from the FP
+C  registers to the IU registers, have demanded a deep software pipeline, and
+C  a lot of stack slots for partial products in flight.
+C
+C  CODE STRUCTURE:
+C  save-some-registers
+C  do 0, 1, 2, or 3 limbs
+C  if done, restore-some-regs and return
+C  save-many-regs
+C  do 4, 8, ... limb
+C  restore-all-regs
+
+C  STACK LAYOUT:
+C  HP-PA stack grows upwards.  We could allocate 8 fewer slots by using the
+C  slots marked FREE, as well as some slots in the caller's "frame marker".
+C
+C -00 <- r30
+C -08  FREE
+C -10  tmp
+C -18  tmp
+C -20  tmp
+C -28  tmp
+C -30  tmp
+C -38  tmp
+C -40  tmp
+C -48  tmp
+C -50  tmp
+C -58  tmp
+C -60  tmp
+C -68  tmp
+C -70  tmp
+C -78  tmp
+C -80  tmp
+C -88  tmp
+C -90  FREE
+C -98  FREE
+C -a0  FREE
+C -a8  FREE
+C -b0  r13
+C -b8  r12
+C -c0  r11
+C -c8  r10
+C -d0  r8
+C -d8  r8
+C -e0  r7
+C -e8  r6
+C -f0  r5
+C -f8  r4
+C -100 r3
+C  Previous frame:
+C  [unused area]
+C -38/-138 vlimb home slot.  For 2.0N, the vlimb arg will arrive here.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS:
+define(`rp',`%r26')    C
+define(`up',`%r25')    C
+define(`n',`%r24')     C
+define(`vlimb',`%r23') C
+
+define(`climb',`%r23') C
+
+ifdef(`HAVE_ABI_2_0w',
+`      .level  2.0w
+',`    .level  2.0
+')
+PROLOGUE(mpn_addmul_1)
+
+ifdef(`HAVE_ABI_2_0w',
+`      std             vlimb, -0x38(%r30)      C store vlimb into "home" slot
+')
+       std,ma          %r3, 0x100(%r30)
+       std             %r4, -0xf8(%r30)
+       std             %r5, -0xf0(%r30)
+       ldo             0(%r0), climb           C clear climb
+       fldd            -0x138(%r30), %fr8      C put vlimb in fp register
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19')        C
+
+define(`m032',`%r20')  C
+define(`m096',`%r21')  C
+
+define(`p000a',`%r22') C
+define(`p064a',`%r29') C
+
+define(`s000',`%r31')  C
+
+define(`ma000',`%r4')  C
+define(`ma064',`%r20') C
+
+define(`r000',`%r3')   C
+
+       extrd,u         n, 63, 2, %r5
+       cmpb,=          %r5, %r0, L(BIG)
+       nop
+
+       fldd            0(up), %fr4
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       addib,<>        -1, %r5, L(two_or_more)
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+LDEF(one)
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldd             -0x80(%r30), p000a
+       b               L(0_one_out)
+       ldd             -0x68(%r30), p064a
+
+LDEF(two_or_more)
+       fldd            0(up), %fr4
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       ldd             -0x78(%r30), p032a1
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       ldd             -0x70(%r30), p032a2
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       ldd             -0x80(%r30), p000a
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       ldd             -0x68(%r30), p064a
+       addib,<>        -1, %r5, L(three_or_more)
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+LDEF(two)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+       b               L(0_two_out)
+       depd            m096, 31, 32, ma064
+
+LDEF(three_or_more)
+       fldd            0(up), %fr4
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+C      addib,=         -1, %r5, L(0_out)
+       depd            m096, 31, 32, ma064
+LDEF(loop0)
+C      xmpyu           %fr8R, %fr4L, %fr22
+C      xmpyu           %fr8L, %fr4R, %fr23
+C      ldd             -0x78(%r30), p032a1
+C      fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+C
+C      xmpyu           %fr8R, %fr4R, %fr24
+C      xmpyu           %fr8L, %fr4L, %fr25
+C      ldd             -0x70(%r30), p032a2
+C      fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+C
+C      ldo             8(rp), rp
+C      add             climb, p000a, s000
+C      ldd             -0x80(%r30), p000a
+C      fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+C
+C      add,dc          p064a, %r0, climb
+C      ldo             8(up), up
+C      ldd             -0x68(%r30), p064a
+C      fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+C
+C      add             ma000, s000, s000
+C      add,dc          ma064, climb, climb
+C      fldd            0(up), %fr4
+C
+C      add             r000, s000, s000
+C      add,dc          %r0, climb, climb
+C      std             s000, -8(rp)
+C
+C      add             p032a1, p032a2, m032
+C      add,dc          %r0, %r0, m096
+C
+C      depd,z          m032, 31, 32, ma000
+C      extrd,u         m032, 31, 32, ma064
+C      ldd             0(rp), r000
+C      addib,<>        -1, %r5, L(loop0)
+C      depd            m096, 31, 32, ma064
+LDEF(0_out)
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       ldd             -0x78(%r30), p032a1
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       ldd             -0x70(%r30), p032a2
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       ldo             8(rp), rp
+       add             climb, p000a, s000
+       ldd             -0x80(%r30), p000a
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       add,dc          p064a, %r0, climb
+       ldd             -0x68(%r30), p064a
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       add             r000, s000, s000
+       add,dc          %r0, climb, climb
+       std             s000, -8(rp)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+       depd            m096, 31, 32, ma064
+LDEF(0_two_out)
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldo             8(rp), rp
+       add             climb, p000a, s000
+       ldd             -0x80(%r30), p000a
+       add,dc          p064a, %r0, climb
+       ldd             -0x68(%r30), p064a
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       add             r000, s000, s000
+       add,dc          %r0, climb, climb
+       std             s000, -8(rp)
+LDEF(0_one_out)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+       depd            m096, 31, 32, ma064
+
+       add             climb, p000a, s000
+       add,dc          p064a, %r0, climb
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       add             r000, s000, s000
+       add,dc          %r0, climb, climb
+       std             s000, 0(rp)
+
+       cmpib,>=        4, n, L(done)
+       ldo             8(rp), rp
+
+C 4-way unrolled code.
+
+LDEF(BIG)
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19')        C
+define(`p096b1',`%r20')        C
+define(`p096b2',`%r21')        C
+define(`p160c1',`%r22')        C
+define(`p160c2',`%r29')        C
+define(`p224d1',`%r31')        C
+define(`p224d2',`%r3') C
+                       C
+define(`m032',`%r4')   C
+define(`m096',`%r5')   C
+define(`m160',`%r6')   C
+define(`m224',`%r7')   C
+define(`m288',`%r8')   C
+                       C
+define(`p000a',`%r1')  C
+define(`p064a',`%r19') C
+define(`p064b',`%r20') C
+define(`p128b',`%r21') C
+define(`p128c',`%r22') C
+define(`p192c',`%r29') C
+define(`p192d',`%r31') C
+define(`p256d',`%r3')  C
+                       C
+define(`s000',`%r10')  C
+define(`s064',`%r11')  C
+define(`s128',`%r12')  C
+define(`s192',`%r13')  C
+                       C
+define(`ma000',`%r9')  C
+define(`ma064',`%r4')  C
+define(`ma128',`%r5')  C
+define(`ma192',`%r6')  C
+define(`ma256',`%r7')  C
+                       C
+define(`r000',`%r1')   C
+define(`r064',`%r19')  C
+define(`r128',`%r20')  C
+define(`r192',`%r21')  C
+
+       std             %r6, -0xe8(%r30)
+       std             %r7, -0xe0(%r30)
+       std             %r8, -0xd8(%r30)
+       std             %r9, -0xd0(%r30)
+       std             %r10, -0xc8(%r30)
+       std             %r11, -0xc0(%r30)
+       std             %r12, -0xb8(%r30)
+       std             %r13, -0xb0(%r30)
+
+ifdef(`HAVE_ABI_2_0w',
+`      extrd,u         n, 61, 62, n            C right shift 2
+',`    extrd,u         n, 61, 30, n            C right shift 2, zero extend
+')
+
+LDEF(4_or_more)
+       fldd            0(up), %fr4
+       fldd            8(up), %fr5
+       fldd            16(up), %fr6
+       fldd            24(up), %fr7
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       xmpyu           %fr8R, %fr5L, %fr24
+       xmpyu           %fr8L, %fr5R, %fr25
+       xmpyu           %fr8R, %fr6L, %fr26
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr7L, %fr28
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       xmpyu           %fr8R, %fr4R, %fr30
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+       xmpyu           %fr8R, %fr5R, %fr22
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+       xmpyu           %fr8R, %fr6R, %fr24
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+       xmpyu           %fr8R, %fr7R, %fr26
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       addib,<>        -1, n, L(8_or_more)
+       xmpyu           %fr8L, %fr7L, %fr27
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldd             -0x38(%r30), p096b1
+       ldd             -0x30(%r30), p096b2
+       ldd             -0x58(%r30), p160c1
+       ldd             -0x50(%r30), p160c2
+       ldd             -0x18(%r30), p224d1
+       ldd             -0x10(%r30), p224d2
+       b               L(end1)
+       nop
+
+LDEF(8_or_more)
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       ldo             32(up), up
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       fldd            0(up), %fr4
+       fldd            8(up), %fr5
+       fldd            16(up), %fr6
+       fldd            24(up), %fr7
+       xmpyu           %fr8R, %fr4L, %fr22
+       ldd             -0x78(%r30), p032a1
+       xmpyu           %fr8L, %fr4R, %fr23
+       xmpyu           %fr8R, %fr5L, %fr24
+       ldd             -0x70(%r30), p032a2
+       xmpyu           %fr8L, %fr5R, %fr25
+       xmpyu           %fr8R, %fr6L, %fr26
+       ldd             -0x38(%r30), p096b1
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr7L, %fr28
+       ldd             -0x30(%r30), p096b2
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       xmpyu           %fr8R, %fr4R, %fr30
+       ldd             -0x58(%r30), p160c1
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+       xmpyu           %fr8R, %fr5R, %fr22
+       ldd             -0x50(%r30), p160c2
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+       xmpyu           %fr8R, %fr6R, %fr24
+       ldd             -0x18(%r30), p224d1
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+       xmpyu           %fr8R, %fr7R, %fr26
+       ldd             -0x10(%r30), p224d2
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       addib,=         -1, n, L(end2)
+       xmpyu           %fr8L, %fr7L, %fr27
+LDEF(loop)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       ldo             32(up), up
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+
+       add,dc          p064a, p064b, s064
+       ldd             0(rp), r000
+       add,dc          p128b, p128c, s128
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+
+       add,dc          p192c, p192d, s192
+       ldd             8(rp), r064
+       add,dc          p256d, %r0, climb
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+
+       ldd             16(rp), r128
+       add             ma000, s000, s000       C accum mid 0
+       ldd             24(rp), r192
+       add,dc          ma064, s064, s064       C accum mid 1
+
+       add,dc          ma128, s128, s128       C accum mid 2
+       fldd            0(up), %fr4
+       add,dc          ma192, s192, s192       C accum mid 3
+       fldd            8(up), %fr5
+
+       add,dc          ma256, climb, climb
+       fldd            16(up), %fr6
+       add             r000, s000, s000        C accum rlimb 0
+       fldd            24(up), %fr7
+
+       add,dc          r064, s064, s064        C accum rlimb 1
+       add,dc          r128, s128, s128        C accum rlimb 2
+       std             s000, 0(rp)
+
+       add,dc          r192, s192, s192        C accum rlimb 3
+       add,dc          %r0, climb, climb
+       std             s064, 8(rp)
+
+       xmpyu           %fr8R, %fr4L, %fr22
+       ldd             -0x78(%r30), p032a1
+       xmpyu           %fr8L, %fr4R, %fr23
+       std             s128, 16(rp)
+
+       xmpyu           %fr8R, %fr5L, %fr24
+       ldd             -0x70(%r30), p032a2
+       xmpyu           %fr8L, %fr5R, %fr25
+       std             s192, 24(rp)
+
+       xmpyu           %fr8R, %fr6L, %fr26
+       ldd             -0x38(%r30), p096b1
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+
+       xmpyu           %fr8R, %fr7L, %fr28
+       ldd             -0x30(%r30), p096b2
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+
+       xmpyu           %fr8R, %fr4R, %fr30
+       ldd             -0x58(%r30), p160c1
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+
+       xmpyu           %fr8R, %fr5R, %fr22
+       ldd             -0x50(%r30), p160c2
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+
+       xmpyu           %fr8R, %fr6R, %fr24
+       ldd             -0x18(%r30), p224d1
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+
+       xmpyu           %fr8R, %fr7R, %fr26
+       ldd             -0x10(%r30), p224d2
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       xmpyu           %fr8L, %fr7L, %fr27
+
+       addib,<>        -1, n, L(loop)
+       ldo             32(rp), rp
+
+LDEF(end2)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       add,dc          p064a, p064b, s064
+       ldd             0(rp), r000
+       add,dc          p128b, p128c, s128
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       add,dc          p192c, p192d, s192
+       ldd             8(rp), r064
+       add,dc          p256d, %r0, climb
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       ldd             16(rp), r128
+       add             ma000, s000, s000       C accum mid 0
+       ldd             24(rp), r192
+       add,dc          ma064, s064, s064       C accum mid 1
+       add,dc          ma128, s128, s128       C accum mid 2
+       add,dc          ma192, s192, s192       C accum mid 3
+       add,dc          ma256, climb, climb
+       add             r000, s000, s000        C accum rlimb 0
+       add,dc          r064, s064, s064        C accum rlimb 1
+       add,dc          r128, s128, s128        C accum rlimb 2
+       std             s000, 0(rp)
+       add,dc          r192, s192, s192        C accum rlimb 3
+       add,dc          %r0, climb, climb
+       std             s064, 8(rp)
+       ldd             -0x78(%r30), p032a1
+       std             s128, 16(rp)
+       ldd             -0x70(%r30), p032a2
+       std             s192, 24(rp)
+       ldd             -0x38(%r30), p096b1
+       ldd             -0x30(%r30), p096b2
+       ldd             -0x58(%r30), p160c1
+       ldd             -0x50(%r30), p160c2
+       ldd             -0x18(%r30), p224d1
+       ldd             -0x10(%r30), p224d2
+       ldo             32(rp), rp
+
+LDEF(end1)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       add,dc          p064a, p064b, s064
+       ldd             0(rp), r000
+       add,dc          p128b, p128c, s128
+       add,dc          p192c, p192d, s192
+       ldd             8(rp), r064
+       add,dc          p256d, %r0, climb
+       ldd             16(rp), r128
+       add             ma000, s000, s000       C accum mid 0
+       ldd             24(rp), r192
+       add,dc          ma064, s064, s064       C accum mid 1
+       add,dc          ma128, s128, s128       C accum mid 2
+       add,dc          ma192, s192, s192       C accum mid 3
+       add,dc          ma256, climb, climb
+       add             r000, s000, s000        C accum rlimb 0
+       add,dc          r064, s064, s064        C accum rlimb 1
+       add,dc          r128, s128, s128        C accum rlimb 2
+       std             s000, 0(rp)
+       add,dc          r192, s192, s192        C accum rlimb 3
+       add,dc          %r0, climb, climb
+       std             s064, 8(rp)
+       std             s128, 16(rp)
+       std             s192, 24(rp)
+
+       ldd             -0xb0(%r30), %r13
+       ldd             -0xb8(%r30), %r12
+       ldd             -0xc0(%r30), %r11
+       ldd             -0xc8(%r30), %r10
+       ldd             -0xd0(%r30), %r9
+       ldd             -0xd8(%r30), %r8
+       ldd             -0xe0(%r30), %r7
+       ldd             -0xe8(%r30), %r6
+LDEF(done)
+ifdef(`HAVE_ABI_2_0w',
+`      copy            climb, %r28
+',`    extrd,u         climb, 63, 32, %r29
+       extrd,u         climb, 31, 32, %r28
+')
+       ldd             -0xf0(%r30), %r5
+       ldd             -0xf8(%r30), %r4
+       bve             (%r2)
+       ldd,mb          -0x100(%r30), %r3
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/pa64/aors_n.asm b/mpn/pa64/aors_n.asm

new file mode 100644 (file)

index 0000000..4048e8c
--- /dev/null
+++ b/mpn/pa64/aors_n.asm
@@ -0,0 +1,119 @@
+dnl  HP-PA 2.0 mpn_add_n, mpn_sub_n
+
+dnl  Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
+dnl  should be possible to reach the cache bandwidth 1.5 cycles/limb at least
+dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
+dnl  where the processor gets confused about where carry comes from.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`vp',`%r24')
+define(`n',`%r23')
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBC,        `add,dc')
+       define(INITCY,        `addi -1,%r22,%r0')
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBC,        `sub,db')
+       define(INITCY,        `subi 0,%r22,%r0')
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ifdef(`HAVE_ABI_2_0w',
+`       .level  2.0w
+',`     .level  2.0
+')
+PROLOGUE(func_nc)
+ifdef(`HAVE_ABI_2_0w',
+`      b               L(com)
+       nop
+',`    b               L(com)
+       ldw             -52(%r30), %r22
+')
+EPILOGUE()
+PROLOGUE(func)
+       ldi             0, %r22
+LDEF(com)
+       sub             %r0, n, %r21
+       depw,z          %r21, 30, 3, %r28       C r28 = 2 * (-n & 7)
+       depw,z          %r21, 28, 3, %r21       C r21 = 8 * (-n & 7)
+       sub             up, %r21, up            C offset up
+       sub             vp, %r21, vp            C offset vp
+       sub             rp, %r21, rp            C offset rp
+       blr             %r28, %r0               C branch into loop
+       INITCY
+
+LDEF(loop)
+       ldd             0(up), %r20
+       ldd             0(vp), %r31
+       ADCSBC          %r20, %r31, %r20
+       std             %r20, 0(rp)
+LDEF(7)        ldd             8(up), %r21
+       ldd             8(vp), %r19
+       ADCSBC          %r21, %r19, %r21
+       std             %r21, 8(rp)
+LDEF(6)        ldd             16(up), %r20
+       ldd             16(vp), %r31
+       ADCSBC          %r20, %r31, %r20
+       std             %r20, 16(rp)
+LDEF(5)        ldd             24(up), %r21
+       ldd             24(vp), %r19
+       ADCSBC          %r21, %r19, %r21
+       std             %r21, 24(rp)
+LDEF(4)        ldd             32(up), %r20
+       ldd             32(vp), %r31
+       ADCSBC          %r20, %r31, %r20
+       std             %r20, 32(rp)
+LDEF(3)        ldd             40(up), %r21
+       ldd             40(vp), %r19
+       ADCSBC          %r21, %r19, %r21
+       std             %r21, 40(rp)
+LDEF(2)        ldd             48(up), %r20
+       ldd             48(vp), %r31
+       ADCSBC          %r20, %r31, %r20
+       std             %r20, 48(rp)
+LDEF(1)        ldd             56(up), %r21
+       ldd             56(vp), %r19
+       ADCSBC          %r21, %r19, %r21
+       ldo             64(up), up
+       std             %r21, 56(rp)
+       ldo             64(vp), vp
+       addib,>         -8, n, L(loop)
+       ldo             64(rp), rp
+
+       add,dc          %r0, %r0, %r29
+ifdef(`OPERATION_sub_n',`
+       subi            1, %r29, %r29
+')
+       bve             (%r2)
+ifdef(`HAVE_ABI_2_0w',
+`      copy            %r29, %r28
+',`    ldi             0, %r28
+')
+EPILOGUE()
diff --git a/mpn/pa64/aorslsh1_n.asm b/mpn/pa64/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..b2cca7a
--- /dev/null
+++ b/mpn/pa64/aorslsh1_n.asm
@@ -0,0 +1,217 @@
+dnl  PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl  Copyright 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C 8000,8200:           2
+C 8500,8600,8700:      1.75
+
+C TODO
+C  * Write special feed-in code for each (n mod 8). (See the ia64 code.)
+C  * Try to make this run at closer to 1.5 c/l.
+C  * Set up register aliases (define(`u0',`%r19')).
+C  * Explicitly align loop.
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`vp',`%r24')
+define(`n',`%r23')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADCSBC,       `add,dc')
+  define(INITC,                `ldi    0,')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADCSBC,       `sub,db')
+  define(INITC,                `ldi    1,')
+  define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ifdef(`HAVE_ABI_2_0w',`
+  define(LEVEL,                `.level 2.0w')
+  define(RETREG,       `%r28')
+  define(CLRRET1,      `dnl')
+')
+ifdef(`HAVE_ABI_2_0n',`
+  define(LEVEL,                `.level 2.0')
+  define(RETREG,       `%r29')
+  define(CLRRET1,      `ldi    0, %r28')
+')
+
+       LEVEL
+PROLOGUE(func)
+       std,ma          %r3, 0x100(%r30)        C save reg
+
+       INITC           %r1                     C init saved cy
+
+C Primitive code for the first (n mod 8) limbs:
+       extrd,u         n, 63, 3, %r22          C count for loop0
+       comib,=         0, %r22, L(unrolled)    C skip loop0?
+       copy            %r0, %r28
+LDEF(loop0)
+       ldd     0(vp), %r21
+       ldo             8(vp), vp
+       ldd     0(up), %r19
+       ldo             8(up), up
+       shrpd   %r21, %r28, 63, %r31
+       addi            -1, %r1, %r0            C restore cy
+       ADCSBC  %r19, %r31, %r29
+       std     %r29, 0(rp)
+       add,dc          %r0, %r0, %r1           C save cy
+       copy    %r21, %r28
+       addib,>         -1, %r22, L(loop0)
+       ldo             8(rp), rp
+
+       addib,>=        -8, n, L(unrolled)
+       addi            -1, %r1, %r0            C restore cy
+
+       shrpd   %r0, %r28, 63, %r28
+       ADCSBC  %r0, %r28, RETREG
+ifdef(`OPERATION_sublsh1_n',
+`      sub     %r0, RETREG, RETREG')
+       CLRRET1
+
+       bve             (%r2)
+       ldd,mb          -0x100(%r30), %r3
+
+
+LDEF(unrolled)
+       std             %r4, -0xf8(%r30)        C save reg
+       ldd     0(vp), %r4
+       std             %r5, -0xf0(%r30)        C save reg
+       ldd     8(vp), %r5
+       std             %r6, -0xe8(%r30)        C save reg
+       ldd     16(vp), %r6
+       std             %r7, -0xe0(%r30)        C save reg
+
+       ldd     24(vp), %r7
+       shrpd   %r4, %r28, 63, %r31
+       std             %r8, -0xd8(%r30)        C save reg
+       ldd     32(vp), %r8
+       shrpd   %r5, %r4, 63, %r4
+       std             %r9, -0xd0(%r30)        C save reg
+       ldd     40(vp), %r9
+       shrpd   %r6, %r5, 63, %r5
+       ldd     48(vp), %r3
+       shrpd   %r7, %r6, 63, %r6
+       ldd     56(vp), %r28
+       shrpd   %r8, %r7, 63, %r7
+       ldd     0(up), %r19
+       shrpd   %r9, %r8, 63, %r8
+       ldd     8(up), %r20
+       shrpd   %r3, %r9, 63, %r9
+       ldd     16(up), %r21
+       shrpd   %r28, %r3, 63, %r3
+       ldd     24(up), %r22
+
+       nop                                     C alignment FIXME
+       addib,<=        -8, n, L(end)
+       addi            -1, %r1, %r0            C restore cy
+LDEF(loop)
+       ADCSBC  %r19, %r31, %r29
+       ldd     32(up), %r19
+       std     %r29, 0(rp)
+       ADCSBC  %r20, %r4, %r29
+       ldd     40(up), %r20
+       std     %r29, 8(rp)
+       ADCSBC  %r21, %r5, %r29
+       ldd     48(up), %r21
+       std     %r29, 16(rp)
+       ADCSBC  %r22, %r6, %r29
+       ldd     56(up), %r22
+       std     %r29, 24(rp)
+       ADCSBC  %r19, %r7, %r29
+       ldd     64(vp), %r4
+       std     %r29, 32(rp)
+       ADCSBC  %r20, %r8, %r29
+       ldd     72(vp), %r5
+       std     %r29, 40(rp)
+       ADCSBC  %r21, %r9, %r29
+       ldd     80(vp), %r6
+       std     %r29, 48(rp)
+       ADCSBC  %r22, %r3, %r29
+       std     %r29, 56(rp)
+
+       add,dc          %r0, %r0, %r1           C save cy
+
+       ldd     88(vp), %r7
+       shrpd   %r4, %r28, 63, %r31
+       ldd     96(vp), %r8
+       shrpd   %r5, %r4, 63, %r4
+       ldd     104(vp), %r9
+       shrpd   %r6, %r5, 63, %r5
+       ldd     112(vp), %r3
+       shrpd   %r7, %r6, 63, %r6
+       ldd     120(vp), %r28
+       shrpd   %r8, %r7, 63, %r7
+       ldd     64(up), %r19
+       shrpd   %r9, %r8, 63, %r8
+       ldd     72(up), %r20
+       shrpd   %r3, %r9, 63, %r9
+       ldd     80(up), %r21
+       shrpd   %r28, %r3, 63, %r3
+       ldd     88(up), %r22
+
+       ldo             64(vp), vp
+       ldo             64(rp), rp
+       ldo             64(up), up
+       addib,>         -8, n, L(loop)
+       addi            -1, %r1, %r0            C restore cy
+LDEF(end)
+       ADCSBC  %r19, %r31, %r29
+       ldd     32(up), %r19
+       std     %r29, 0(rp)
+       ADCSBC  %r20, %r4, %r29
+       ldd     40(up), %r20
+       std     %r29, 8(rp)
+       ADCSBC  %r21, %r5, %r29
+       ldd     48(up), %r21
+       std     %r29, 16(rp)
+       ADCSBC  %r22, %r6, %r29
+       ldd     56(up), %r22
+       std     %r29, 24(rp)
+       ADCSBC  %r19, %r7, %r29
+       ldd             -0xf8(%r30), %r4        C restore reg
+       std     %r29, 32(rp)
+       ADCSBC  %r20, %r8, %r29
+       ldd             -0xf0(%r30), %r5        C restore reg
+       std     %r29, 40(rp)
+       ADCSBC  %r21, %r9, %r29
+       ldd             -0xe8(%r30), %r6        C restore reg
+       std     %r29, 48(rp)
+       ADCSBC  %r22, %r3, %r29
+       ldd             -0xe0(%r30), %r7        C restore reg
+       std     %r29, 56(rp)
+
+       shrpd   %r0, %r28, 63, %r28
+       ldd             -0xd8(%r30), %r8        C restore reg
+       ADCSBC  %r0, %r28, RETREG
+ifdef(`OPERATION_sublsh1_n',
+`      sub     %r0, RETREG, RETREG')
+       CLRRET1
+
+       ldd             -0xd0(%r30), %r9        C restore reg
+       bve             (%r2)
+       ldd,mb          -0x100(%r30), %r3       C restore reg
+EPILOGUE()
diff --git a/mpn/pa64/gmp-mparam.h b/mpn/pa64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..428615f
--- /dev/null
+++ b/mpn/pa64/gmp-mparam.h
@@ -0,0 +1,230 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 440MHz PA8200 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                30
+#define MUL_TOOM33_THRESHOLD               113
+#define MUL_TOOM44_THRESHOLD               195
+#define MUL_TOOM6H_THRESHOLD               222
+#define MUL_TOOM8H_THRESHOLD               236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     130
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     229
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      54
+
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 54
+#define SQR_TOOM3_THRESHOLD                169
+#define SQR_TOOM4_THRESHOLD                280
+#define SQR_TOOM6_THRESHOLD                280
+#define SQR_TOOM8_THRESHOLD                296
+
+#define MULMOD_BNM1_THRESHOLD               15
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             336  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    336, 5}, {     11, 4}, {     23, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     19, 7}, {     39, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47, 9}, {     95,10}, {     55,11}, {     31,10}, \
+    {     63, 9}, {    127,10}, {     71, 8}, {    287,10}, \
+    {     79,11}, {     47,10}, {     95, 9}, {    191, 8}, \
+    {    383, 7}, {    767,10}, {    103, 9}, {    207, 8}, \
+    {    415, 7}, {    831,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    543, 7}, {   1087, 6}, \
+    {   2175,10}, {    143, 9}, {    287, 8}, {    575,11}, \
+    {     79, 9}, {    319, 8}, {    639, 7}, {   1279, 9}, \
+    {    335, 8}, {    671,10}, {    175, 9}, {    351, 8}, \
+    {    703,11}, {     95,10}, {    191, 9}, {    383, 8}, \
+    {    767,10}, {    207, 9}, {    415, 8}, {    831, 7}, \
+    {   1663,11}, {    111,10}, {    223, 9}, {    447, 8}, \
+    {    895,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    543, 8}, {   1087, 7}, {   2175,10}, {    287, 9}, \
+    {    575, 8}, {   1215, 7}, {   2431,10}, {    319, 9}, \
+    {    639, 8}, {   1279,10}, {    335, 9}, {    671, 8}, \
+    {   1343, 9}, {    703, 8}, {   1407,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207, 9}, {    831, 8}, \
+    {   1663,11}, {    223,10}, {    447, 9}, {    959,13}, \
+    {     63,12}, {    127,11}, {    255, 8}, {   2047,11}, \
+    {    271,10}, {    543, 9}, {   1087, 8}, {   2175,11}, \
+    {    287,10}, {    575, 9}, {   1215, 8}, {   2431,11}, \
+    {    319,10}, {    671, 9}, {   1343, 8}, {   2687,11}, \
+    {    351,10}, {    703, 9}, {   1471, 8}, {   2943,12}, \
+    {    191,11}, {    383, 8}, {   3071,11}, {    415,10}, \
+    {    831, 9}, {   1663,11}, {    479,10}, {    959, 9}, \
+    {   1919, 8}, {   3839,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087, 9}, {   2175,12}, {    287,11}, \
+    {    607,10}, {   1215, 9}, {   2431, 8}, {   4863,12}, \
+    {    319,11}, {    671,10}, {   1343,13}, {    191, 9}, \
+    {   3071,12}, {    415,11}, {    831,10}, {   1663, 8}, \
+    {   6655, 9}, {   3455,12}, {    447, 9}, {   3583,13}, \
+    {    255,12}, {    511,11}, {   1023,10}, {   2175,13}, \
+    {    319,11}, {   1279,12}, {    671,10}, {   2815,12}, \
+    {    735,10}, {   2943, 9}, {   5887,13}, {    383,12}, \
+    {    767,11}, {   1535,10}, {   3071,13}, {    447,10}, \
+    {   3583,12}, {    959,13}, {    511,12}, {   1087,13}, \
+    {    639,12}, {   1343,13}, {    767,11}, {   3071,13}, \
+    {    831,12}, {   1663,11}, {   3455,10}, {   6911,13}, \
+    {    895,14}, {    511,13}, {   1023,12}, {   2047,13}, \
+    {   1087,12}, {   2303,13}, {   1215,12}, {   2431,14}, \
+    {    639,13}, {   1279,12}, {   2559,13}, {   1343,12}, \
+    {   2687,11}, {   5375,13}, {   1407,12}, {   2815,11}, \
+    {   5631,12}, {   2943,13}, {   1535,12}, {   3199,13}, \
+    {   1663,12}, {   3327,13}, {   1727,14}, {    895,13}, \
+    {   1791,12}, {   3583,13}, {   1919,15}, {    511,14}, \
+    {   1023,13}, {   2047,12}, {   4095,14}, {   1151,13}, \
+    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
+    {   2815,12}, {   5631,15}, {    767,14}, {   1535,13}, \
+    {   3071,14}, {   1663,13}, {   3327,14}, {   1791,13}, \
+    {   3583,14}, {   1919,15}, {   1023,14}, {   2303,13}, \
+    {   4607,14}, {   2431,13}, {   4863,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 252
+#define MUL_FFT_THRESHOLD                 2368
+
+#define SQR_FFT_MODF_THRESHOLD             284  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    284, 5}, {      9, 4}, {     21, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     25, 7}, {     25, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     63, 8}, {    255, 7}, {    511,10}, \
+    {     71, 8}, {    287, 7}, {    575,10}, {     79,11}, \
+    {     47,10}, {     95, 9}, {    191, 8}, {    383, 7}, \
+    {    767,10}, {    103, 9}, {    207, 8}, {    415,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    543, 7}, {   1087, 8}, {    575, 7}, {   1151,11}, \
+    {     79, 8}, {    639, 7}, {   1279, 9}, {    335, 8}, \
+    {    671, 7}, {   1343,10}, {    175, 8}, {    703, 7}, \
+    {   1407,11}, {     95,10}, {    191, 9}, {    383, 8}, \
+    {    767,10}, {    207, 9}, {    415, 8}, {    831, 7}, \
+    {   1663, 9}, {    447, 8}, {    895,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    543, 8}, {   1087, 7}, \
+    {   2175, 9}, {    575, 8}, {   1151,10}, {    303, 9}, \
+    {    607, 8}, {   1215, 7}, {   2431,10}, {    319, 9}, \
+    {    639, 8}, {   1279, 9}, {    671, 8}, {   1343, 7}, \
+    {   2687,10}, {    351, 9}, {    703, 8}, {   1407,12}, \
+    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
+    {    207,10}, {    415, 9}, {    831, 8}, {   1663,11}, \
+    {    223,10}, {    447, 9}, {    895,13}, {     63,11}, \
+    {    255,10}, {    543, 8}, {   2175,11}, {    287,10}, \
+    {    575, 9}, {   1151,10}, {    607, 9}, {   1215, 8}, \
+    {   2431,11}, {    319, 9}, {   1279,10}, {    671, 9}, \
+    {   1343, 8}, {   2687,11}, {    351,10}, {    703, 9}, \
+    {   1407,10}, {    735,12}, {    191,11}, {    383,10}, \
+    {    831, 9}, {   1663,12}, {    223,11}, {    447,10}, \
+    {    895,11}, {    479, 9}, {   1919, 8}, {   3839,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087, 9}, {   2175,12}, {    287,11}, {    575,10}, \
+    {   1151,11}, {    607,10}, {   1215, 9}, {   2431, 8}, \
+    {   4863,10}, {   1279,11}, {    671,10}, {   1343, 9}, \
+    {   2687,12}, {    351,11}, {    703,10}, {   1407,11}, \
+    {    735,13}, {    191, 9}, {   3071, 7}, {  12287,11}, \
+    {    799,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447, 8}, {   7167,12}, {    479, 9}, {   3839,14}, \
+    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    543,10}, {   2175, 9}, {   4607,11}, {   1215,10}, \
+    {   2431,11}, {   1279,10}, {   2559,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,10}, {   3199, 9}, \
+    {   6399,12}, {    895,13}, {    511,12}, {   1023,11}, \
+    {   2047,12}, {   1087,13}, {    575,12}, {   1151,10}, \
+    {   4607,13}, {    639,12}, {   1279,11}, {   2687,14}, \
+    {    383,13}, {    767,11}, {   3071,12}, {   1599,13}, \
+    {    895,12}, {   1791,11}, {   3583,13}, {    959,15}, \
+    {    255,12}, {   2175,13}, {   1215,14}, {    639,13}, \
+    {   1279,12}, {   2559,13}, {   1343,12}, {   2687,13}, \
+    {   1471,11}, {   5887,14}, {    767,13}, {   1535,12}, \
+    {   3071,13}, {   1599,12}, {   3199,13}, {   1663,12}, \
+    {   3327,13}, {   1727,14}, {    895,13}, {   1791,12}, \
+    {   3583,15}, {    511,14}, {   1023,13}, {   2175,14}, \
+    {   1151,12}, {   4607,13}, {   2431,14}, {   1279,13}, \
+    {   2687,14}, {   1407,13}, {   2815,15}, {    767,13}, \
+    {   3199,14}, {   1663,13}, {   3327,14}, {   1791,13}, \
+    {   3583,14}, {   1919,15}, {   1023,14}, {   2047,13}, \
+    {   4095,14}, {   2303,13}, {   4607,14}, {   2431,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 257
+#define SQR_FFT_THRESHOLD                 1856
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 125
+#define MULLO_MUL_N_THRESHOLD             4658
+
+#define DC_DIV_QR_THRESHOLD                123
+#define DC_DIVAPPR_Q_THRESHOLD             372
+#define DC_BDIV_QR_THRESHOLD               142
+#define DC_BDIV_Q_THRESHOLD                309
+
+#define INV_MULMOD_BNM1_THRESHOLD           56
+#define INV_NEWTON_THRESHOLD               315
+#define INV_APPR_THRESHOLD                 318
+
+#define BINV_NEWTON_THRESHOLD              363
+#define REDC_1_TO_REDC_N_THRESHOLD         102
+
+#define MU_DIV_QR_THRESHOLD                979
+#define MU_DIVAPPR_Q_THRESHOLD             998
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD               942
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define MATRIX22_STRASSEN_THRESHOLD          9
+#define HGCD_THRESHOLD                     240
+#define GCD_DC_THRESHOLD                   689
+#define GCDEXT_DC_THRESHOLD                538
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                21
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD              1951
+#define SET_STR_PRECOMPUTE_THRESHOLD      4034
diff --git a/mpn/pa64/lshift.asm b/mpn/pa64/lshift.asm

new file mode 100644 (file)

index 0000000..0dceba2
--- /dev/null
+++ b/mpn/pa64/lshift.asm
@@ -0,0 +1,103 @@
+dnl  HP-PA 2.0 mpn_lshift -- Left shift.
+
+dnl  Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+define(`cnt',`%r23')
+
+ifdef(`HAVE_ABI_2_0w',
+`       .level  2.0w
+',`     .level  2.0
+')
+PROLOGUE(mpn_lshift)
+       shladd          n, 3, up, up
+       shladd          n, 3, rp, rp
+       subi            64, cnt, cnt
+       mtsar           cnt
+       ldd             -8(up), %r21
+       addib,=         -1, n, L(end)
+       shrpd           %r0, %r21, %sar, %r29   C compute carry out limb
+       depw,z          n, 31, 3, %r28          C r28 = (size & 7)
+       sub             %r0, n, %r22
+       depw,z          %r22, 28, 3, %r22       C r22 = 8 * (-size & 7)
+       add             up, %r22, up            C offset up
+       blr             %r28, %r0               C branch into jump table
+       add             rp, %r22, rp            C offset rp
+       b               L(0)
+       nop
+       b               L(1)
+       copy            %r21, %r20
+       b               L(2)
+       nop
+       b               L(3)
+       copy            %r21, %r20
+       b               L(4)
+       nop
+       b               L(5)
+       copy            %r21, %r20
+       b               L(6)
+       nop
+       b               L(7)
+       copy            %r21, %r20
+
+LDEF(loop)
+LDEF(0)        ldd             -16(up), %r20
+       shrpd           %r21, %r20, %sar, %r21
+       std             %r21, -8(rp)
+LDEF(7)        ldd             -24(up), %r21
+       shrpd           %r20, %r21, %sar, %r20
+       std             %r20, -16(rp)
+LDEF(6)        ldd             -32(up), %r20
+       shrpd           %r21, %r20, %sar, %r21
+       std             %r21, -24(rp)
+LDEF(5)        ldd             -40(up), %r21
+       shrpd           %r20, %r21, %sar, %r20
+       std             %r20, -32(rp)
+LDEF(4)        ldd             -48(up), %r20
+       shrpd           %r21, %r20, %sar, %r21
+       std             %r21, -40(rp)
+LDEF(3)        ldd             -56(up), %r21
+       shrpd           %r20, %r21, %sar, %r20
+       std             %r20, -48(rp)
+LDEF(2)        ldd             -64(up), %r20
+       shrpd           %r21, %r20, %sar, %r21
+       std             %r21, -56(rp)
+LDEF(1)        ldd             -72(up), %r21
+       ldo             -64(up), up
+       shrpd           %r20, %r21, %sar, %r20
+       std             %r20, -64(rp)
+       addib,>         -8, n, L(loop)
+       ldo             -64(rp), rp
+
+LDEF(end)
+       shrpd           %r21, %r0, %sar, %r21
+       std             %r21, -8(rp)
+       bve             (%r2)
+ifdef(`HAVE_ABI_2_0w',
+`      copy            %r29,%r28
+',`    extrd,u         %r29, 31, 32, %r28
+')
+EPILOGUE(mpn_lshift)
diff --git a/mpn/pa64/mul_1.asm b/mpn/pa64/mul_1.asm

new file mode 100644 (file)

index 0000000..fbb5f17
--- /dev/null
+++ b/mpn/pa64/mul_1.asm
@@ -0,0 +1,635 @@
+dnl  HP-PA 2.0 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C 8000,8200:           6.5
+C 8500,8600,8700:      5.625
+
+C  The feed-in and wind-down code has not yet been scheduled.  Many cycles
+C  could be saved there per call.
+
+C  DESCRIPTION:
+C  The main loop "BIG" is 4-way unrolled, mainly to allow
+C  effective use of ADD,DC.  Delays in moving data via the cache from the FP
+C  registers to the IU registers, have demanded a deep software pipeline, and
+C  a lot of stack slots for partial products in flight.
+C
+C  CODE STRUCTURE:
+C  save-some-registers
+C  do 0, 1, 2, or 3 limbs
+C  if done, restore-some-regs and return
+C  save-many-regs
+C  do 4, 8, ... limb
+C  restore-all-regs
+
+C  STACK LAYOUT:
+C  HP-PA stack grows upwards.  We could allocate 8 fewer slots by using the
+C  slots marked FREE, as well as some slots in the caller's "frame marker".
+C
+C -00 <- r30
+C -08  FREE
+C -10  tmp
+C -18  tmp
+C -20  tmp
+C -28  tmp
+C -30  tmp
+C -38  tmp
+C -40  tmp
+C -48  tmp
+C -50  tmp
+C -58  tmp
+C -60  tmp
+C -68  tmp
+C -70  tmp
+C -78  tmp
+C -80  tmp
+C -88  tmp
+C -90  FREE
+C -98  FREE
+C -a0  FREE
+C -a8  FREE
+C -b0  r13
+C -b8  r12
+C -c0  r11
+C -c8  r10
+C -d0  r8
+C -d8  r8
+C -e0  r7
+C -e8  r6
+C -f0  r5
+C -f8  r4
+C -100 r3
+C  Previous frame:
+C  [unused area]
+C -38/-138 vlimb home slot.  For 2.0N, the vlimb arg will arrive here.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS:
+define(`rp',`%r26')    C
+define(`up',`%r25')    C
+define(`n',`%r24')     C
+define(`vlimb',`%r23') C
+
+define(`climb',`%r23') C
+
+ifdef(`HAVE_ABI_2_0w',
+`      .level  2.0w
+',`    .level  2.0
+')
+PROLOGUE(mpn_mul_1)
+
+ifdef(`HAVE_ABI_2_0w',
+`      std             vlimb, -0x38(%r30)      C store vlimb into "home" slot
+')
+       std,ma          %r3, 0x100(%r30)
+       std             %r4, -0xf8(%r30)
+       std             %r5, -0xf0(%r30)
+       ldo             0(%r0), climb           C clear climb
+       fldd            -0x138(%r30), %fr8      C put vlimb in fp register
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19')        C
+
+define(`m032',`%r20')  C
+define(`m096',`%r21')  C
+
+define(`p000a',`%r22') C
+define(`p064a',`%r29') C
+
+define(`s000',`%r31')  C
+
+define(`ma000',`%r4')  C
+define(`ma064',`%r20') C
+
+C define(`r000',`%r3') C       FIXME don't save r3 for n < 4.
+
+       extrd,u         n, 63, 2, %r5
+       cmpb,=          %r5, %r0, L(BIG)
+       nop
+
+       fldd            0(up), %fr4
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       addib,<>        -1, %r5, L(two_or_more)
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+LDEF(one)
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldd             -0x80(%r30), p000a
+       b               L(0_one_out)
+       ldd             -0x68(%r30), p064a
+
+LDEF(two_or_more)
+       fldd            0(up), %fr4
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       ldd             -0x78(%r30), p032a1
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       ldd             -0x70(%r30), p032a2
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       ldd             -0x80(%r30), p000a
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       ldd             -0x68(%r30), p064a
+       addib,<>        -1, %r5, L(three_or_more)
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+LDEF(two)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       b               L(0_two_out)
+       depd            m096, 31, 32, ma064
+
+LDEF(three_or_more)
+       fldd            0(up), %fr4
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+C      addib,=         -1, %r5, L(0_out)
+       depd            m096, 31, 32, ma064
+LDEF(loop0)
+C      xmpyu           %fr8R, %fr4L, %fr22
+C      xmpyu           %fr8L, %fr4R, %fr23
+C      ldd             -0x78(%r30), p032a1
+C      fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+C
+C      xmpyu           %fr8R, %fr4R, %fr24
+C      xmpyu           %fr8L, %fr4L, %fr25
+C      ldd             -0x70(%r30), p032a2
+C      fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+C
+C      ldo             8(rp), rp
+C      add             climb, p000a, s000
+C      ldd             -0x80(%r30), p000a
+C      fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+C
+C      add,dc          p064a, %r0, climb
+C      ldo             8(up), up
+C      ldd             -0x68(%r30), p064a
+C      fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+C
+C      add             ma000, s000, s000
+C      add,dc          ma064, climb, climb
+C      fldd            0(up), %fr4
+C
+C      std             s000, -8(rp)
+C
+C      add             p032a1, p032a2, m032
+C      add,dc          %r0, %r0, m096
+C
+C      depd,z          m032, 31, 32, ma000
+C      extrd,u         m032, 31, 32, ma064
+C      addib,<>        -1, %r5, L(loop0)
+C      depd            m096, 31, 32, ma064
+LDEF(0_out)
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       ldd             -0x78(%r30), p032a1
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       ldd             -0x70(%r30), p032a2
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       ldo             8(rp), rp
+       add             climb, p000a, s000
+       ldd             -0x80(%r30), p000a
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       add,dc          p064a, %r0, climb
+       ldd             -0x68(%r30), p064a
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       std             s000, -8(rp)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       depd            m096, 31, 32, ma064
+LDEF(0_two_out)
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldo             8(rp), rp
+       add             climb, p000a, s000
+       ldd             -0x80(%r30), p000a
+       add,dc          p064a, %r0, climb
+       ldd             -0x68(%r30), p064a
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       std             s000, -8(rp)
+LDEF(0_one_out)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       depd            m096, 31, 32, ma064
+
+       add             climb, p000a, s000
+       add,dc          p064a, %r0, climb
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       std             s000, 0(rp)
+
+       cmpib,>=        4, n, L(done)
+       ldo             8(rp), rp
+
+C 4-way unrolled code.
+
+LDEF(BIG)
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19')        C
+define(`p096b1',`%r20')        C
+define(`p096b2',`%r21')        C
+define(`p160c1',`%r22')        C
+define(`p160c2',`%r29')        C
+define(`p224d1',`%r31')        C
+define(`p224d2',`%r3') C
+                       C
+define(`m032',`%r4')   C
+define(`m096',`%r5')   C
+define(`m160',`%r6')   C
+define(`m224',`%r7')   C
+define(`m288',`%r8')   C
+                       C
+define(`p000a',`%r1')  C
+define(`p064a',`%r19') C
+define(`p064b',`%r20') C
+define(`p128b',`%r21') C
+define(`p128c',`%r22') C
+define(`p192c',`%r29') C
+define(`p192d',`%r31') C
+define(`p256d',`%r3')  C
+                       C
+define(`s000',`%r10')  C
+define(`s064',`%r11')  C
+define(`s128',`%r12')  C
+define(`s192',`%r13')  C
+                       C
+define(`ma000',`%r9')  C
+define(`ma064',`%r4')  C
+define(`ma128',`%r5')  C
+define(`ma192',`%r6')  C
+define(`ma256',`%r7')  C
+
+       std             %r6, -0xe8(%r30)
+       std             %r7, -0xe0(%r30)
+       std             %r8, -0xd8(%r30)
+       std             %r9, -0xd0(%r30)
+       std             %r10, -0xc8(%r30)
+       std             %r11, -0xc0(%r30)
+       std             %r12, -0xb8(%r30)
+       std             %r13, -0xb0(%r30)
+
+ifdef(`HAVE_ABI_2_0w',
+`      extrd,u         n, 61, 62, n            C right shift 2
+',`    extrd,u         n, 61, 30, n            C right shift 2, zero extend
+')
+
+LDEF(4_or_more)
+       fldd            0(up), %fr4
+       fldd            8(up), %fr5
+       fldd            16(up), %fr6
+       fldd            24(up), %fr7
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       xmpyu           %fr8R, %fr5L, %fr24
+       xmpyu           %fr8L, %fr5R, %fr25
+       xmpyu           %fr8R, %fr6L, %fr26
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr7L, %fr28
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       xmpyu           %fr8R, %fr4R, %fr30
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+       xmpyu           %fr8R, %fr5R, %fr22
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+       xmpyu           %fr8R, %fr6R, %fr24
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+       xmpyu           %fr8R, %fr7R, %fr26
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       addib,<>        -1, n, L(8_or_more)
+       xmpyu           %fr8L, %fr7L, %fr27
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldd             -0x38(%r30), p096b1
+       ldd             -0x30(%r30), p096b2
+       ldd             -0x58(%r30), p160c1
+       ldd             -0x50(%r30), p160c2
+       ldd             -0x18(%r30), p224d1
+       ldd             -0x10(%r30), p224d2
+       b               L(end1)
+       nop
+
+LDEF(8_or_more)
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       ldo             32(up), up
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       fldd            0(up), %fr4
+       fldd            8(up), %fr5
+       fldd            16(up), %fr6
+       fldd            24(up), %fr7
+       xmpyu           %fr8R, %fr4L, %fr22
+       ldd             -0x78(%r30), p032a1
+       xmpyu           %fr8L, %fr4R, %fr23
+       xmpyu           %fr8R, %fr5L, %fr24
+       ldd             -0x70(%r30), p032a2
+       xmpyu           %fr8L, %fr5R, %fr25
+       xmpyu           %fr8R, %fr6L, %fr26
+       ldd             -0x38(%r30), p096b1
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr7L, %fr28
+       ldd             -0x30(%r30), p096b2
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       xmpyu           %fr8R, %fr4R, %fr30
+       ldd             -0x58(%r30), p160c1
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+       xmpyu           %fr8R, %fr5R, %fr22
+       ldd             -0x50(%r30), p160c2
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+       xmpyu           %fr8R, %fr6R, %fr24
+       ldd             -0x18(%r30), p224d1
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+       xmpyu           %fr8R, %fr7R, %fr26
+       ldd             -0x10(%r30), p224d2
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       addib,=         -1, n, L(end2)
+       xmpyu           %fr8L, %fr7L, %fr27
+LDEF(loop)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       ldo             32(up), up
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+
+       add,dc          p064a, p064b, s064
+       add,dc          p128b, p128c, s128
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+
+       add,dc          p192c, p192d, s192
+       add,dc          p256d, %r0, climb
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+
+       add             ma000, s000, s000       C accum mid 0
+       fldd            0(up), %fr4
+       add,dc          ma064, s064, s064       C accum mid 1
+       std             s000, 0(rp)
+
+       add,dc          ma128, s128, s128       C accum mid 2
+       fldd            8(up), %fr5
+       add,dc          ma192, s192, s192       C accum mid 3
+       std             s064, 8(rp)
+
+       add,dc          ma256, climb, climb
+       fldd            16(up), %fr6
+       std             s128, 16(rp)
+
+       xmpyu           %fr8R, %fr4L, %fr22
+       ldd             -0x78(%r30), p032a1
+       xmpyu           %fr8L, %fr4R, %fr23
+       fldd            24(up), %fr7
+
+       xmpyu           %fr8R, %fr5L, %fr24
+       ldd             -0x70(%r30), p032a2
+       xmpyu           %fr8L, %fr5R, %fr25
+       std             s192, 24(rp)
+
+       xmpyu           %fr8R, %fr6L, %fr26
+       ldd             -0x38(%r30), p096b1
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+
+       xmpyu           %fr8R, %fr7L, %fr28
+       ldd             -0x30(%r30), p096b2
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+
+       xmpyu           %fr8R, %fr4R, %fr30
+       ldd             -0x58(%r30), p160c1
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+
+       xmpyu           %fr8R, %fr5R, %fr22
+       ldd             -0x50(%r30), p160c2
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+
+       xmpyu           %fr8R, %fr6R, %fr24
+       ldd             -0x18(%r30), p224d1
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+
+       xmpyu           %fr8R, %fr7R, %fr26
+       ldd             -0x10(%r30), p224d2
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       xmpyu           %fr8L, %fr7L, %fr27
+
+       addib,<>        -1, n, L(loop)
+       ldo             32(rp), rp
+
+LDEF(end2)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       add,dc          p064a, p064b, s064
+       add,dc          p128b, p128c, s128
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       add,dc          p192c, p192d, s192
+       add,dc          p256d, %r0, climb
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       add             ma000, s000, s000       C accum mid 0
+       add,dc          ma064, s064, s064       C accum mid 1
+       add,dc          ma128, s128, s128       C accum mid 2
+       add,dc          ma192, s192, s192       C accum mid 3
+       add,dc          ma256, climb, climb
+       std             s000, 0(rp)
+       std             s064, 8(rp)
+       ldd             -0x78(%r30), p032a1
+       std             s128, 16(rp)
+       ldd             -0x70(%r30), p032a2
+       std             s192, 24(rp)
+       ldd             -0x38(%r30), p096b1
+       ldd             -0x30(%r30), p096b2
+       ldd             -0x58(%r30), p160c1
+       ldd             -0x50(%r30), p160c2
+       ldd             -0x18(%r30), p224d1
+       ldd             -0x10(%r30), p224d2
+       ldo             32(rp), rp
+
+LDEF(end1)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       add,dc          p064a, p064b, s064
+       add,dc          p128b, p128c, s128
+       add,dc          p192c, p192d, s192
+       add,dc          p256d, %r0, climb
+       add             ma000, s000, s000       C accum mid 0
+       add,dc          ma064, s064, s064       C accum mid 1
+       add,dc          ma128, s128, s128       C accum mid 2
+       add,dc          ma192, s192, s192       C accum mid 3
+       add,dc          ma256, climb, climb
+       std             s000, 0(rp)
+       std             s064, 8(rp)
+       std             s128, 16(rp)
+       std             s192, 24(rp)
+
+       ldd             -0xb0(%r30), %r13
+       ldd             -0xb8(%r30), %r12
+       ldd             -0xc0(%r30), %r11
+       ldd             -0xc8(%r30), %r10
+       ldd             -0xd0(%r30), %r9
+       ldd             -0xd8(%r30), %r8
+       ldd             -0xe0(%r30), %r7
+       ldd             -0xe8(%r30), %r6
+LDEF(done)
+ifdef(`HAVE_ABI_2_0w',
+`      copy            climb, %r28
+',`    extrd,u         climb, 63, 32, %r29
+       extrd,u         climb, 31, 32, %r28
+')
+       ldd             -0xf0(%r30), %r5
+       ldd             -0xf8(%r30), %r4
+       bve             (%r2)
+       ldd,mb          -0x100(%r30), %r3
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/pa64/rshift.asm b/mpn/pa64/rshift.asm

new file mode 100644 (file)

index 0000000..80470c9
--- /dev/null
+++ b/mpn/pa64/rshift.asm
@@ -0,0 +1,100 @@
+dnl  HP-PA 2.0 mpn_rshift -- Right shift.
+
+dnl  Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
+
+include(`../config.m4')
+
+dnl  INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+define(`cnt',`%r23')
+
+ifdef(`HAVE_ABI_2_0w',
+`       .level  2.0w
+',`     .level  2.0
+')
+PROLOGUE(mpn_rshift)
+       mtsar           cnt
+       ldd             0(up), %r21
+       addib,=         -1, n, L(end)
+       shrpd           %r21, %r0, %sar, %r29   C compute carry out limb
+       depw,z          n, 31, 3, %r28          C r28 = (size & 7)
+       sub             %r0, n, %r22
+       depw,z          %r22, 28, 3, %r22       C r22 = 8 * (-size & 7)
+       sub             up, %r22, up            C offset up
+       blr             %r28, %r0               C branch into jump table
+       sub             rp, %r22, rp            C offset rp
+       b               L(0)
+       nop
+       b               L(1)
+       copy            %r21, %r20
+       b               L(2)
+       nop
+       b               L(3)
+       copy            %r21, %r20
+       b               L(4)
+       nop
+       b               L(5)
+       copy            %r21, %r20
+       b               L(6)
+       nop
+       b               L(7)
+       copy            %r21, %r20
+
+LDEF(loop)
+LDEF(0)        ldd             8(up), %r20
+       shrpd           %r20, %r21, %sar, %r21
+       std             %r21, 0(rp)
+LDEF(7)        ldd             16(up), %r21
+       shrpd           %r21, %r20, %sar, %r20
+       std             %r20, 8(rp)
+LDEF(6)        ldd             24(up), %r20
+       shrpd           %r20, %r21, %sar, %r21
+       std             %r21, 16(rp)
+LDEF(5)        ldd             32(up), %r21
+       shrpd           %r21, %r20, %sar, %r20
+       std             %r20, 24(rp)
+LDEF(4)        ldd             40(up), %r20
+       shrpd           %r20, %r21, %sar, %r21
+       std             %r21, 32(rp)
+LDEF(3)        ldd             48(up), %r21
+       shrpd           %r21, %r20, %sar, %r20
+       std             %r20, 40(rp)
+LDEF(2)        ldd             56(up), %r20
+       shrpd           %r20, %r21, %sar, %r21
+       std             %r21, 48(rp)
+LDEF(1)        ldd             64(up), %r21
+       ldo             64(up), up
+       shrpd           %r21, %r20, %sar, %r20
+       std             %r20, 56(rp)
+       addib,>         -8, n, L(loop)
+       ldo             64(rp), rp
+
+LDEF(end)
+       shrpd           %r0, %r21, %sar, %r21
+       std             %r21, 0(rp)
+       bve             (%r2)
+ifdef(`HAVE_ABI_2_0w',
+`      copy            %r29,%r28
+',`    extrd,u         %r29, 31, 32, %r28
+')
+EPILOGUE(mpn_rshift)
diff --git a/mpn/pa64/sqr_diagonal.asm b/mpn/pa64/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..73c64b0
--- /dev/null
+++ b/mpn/pa64/sqr_diagonal.asm
@@ -0,0 +1,180 @@
+dnl  HP-PA 2.0 64-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  This code runs at 7.25 cycles/limb on PA8000 and 7.75 cycles/limb on
+dnl  PA8500.  The cache would saturate at 5 cycles/limb, so there is some room
+dnl  for optimization.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+define(`p00',`%r28')
+define(`p32',`%r29')
+define(`p64',`%r31')
+define(`t0',`%r19')
+define(`t1',`%r20')
+
+ifdef(`HAVE_ABI_2_0w',
+`      .level  2.0w
+',`    .level  2.0
+')
+PROLOGUE(mpn_sqr_diagonal)
+       ldo             128(%r30),%r30
+
+       fldds,ma        8(up),%fr8
+       addib,=         -1,n,L(end1)
+       nop
+       fldds,ma        8(up),%fr4
+       xmpyu           %fr8l,%fr8r,%fr10
+       fstd            %fr10,-120(%r30)
+       xmpyu           %fr8r,%fr8r,%fr9
+       fstd            %fr9,0(rp)
+       xmpyu           %fr8l,%fr8l,%fr11
+       fstd            %fr11,8(rp)
+       addib,=         -1,n,L(end2)
+       ldo             16(rp),rp
+
+LDEF(loop)
+       fldds,ma        8(up),%fr8              C load next up limb
+       xmpyu           %fr4l,%fr4r,%fr6
+       fstd            %fr6,-128(%r30)
+       xmpyu           %fr4r,%fr4r,%fr5        C multiply in fp regs
+       fstd            %fr5,0(rp)
+       xmpyu           %fr4l,%fr4l,%fr7
+       fstd            %fr7,8(rp)
+       ldd             -120(%r30),p32
+       ldd             -16(rp),p00             C accumulate in int regs
+       ldd             -8(rp),p64
+       depd,z          p32,30,31,t0
+       add             t0,p00,p00
+       std             p00,-16(rp)
+       extrd,u         p32,32,33,t1
+       add,dc          t1,p64,p64
+       std             p64,-8(rp)
+       addib,=         -1,n,L(exit)
+       ldo             16(rp),rp
+
+       fldds,ma        8(up),%fr4
+       xmpyu           %fr8l,%fr8r,%fr10
+       fstd            %fr10,-120(%r30)
+       xmpyu           %fr8r,%fr8r,%fr9
+       fstd            %fr9,0(rp)
+       xmpyu           %fr8l,%fr8l,%fr11
+       fstd            %fr11,8(rp)
+       ldd             -128(%r30),p32
+       ldd             -16(rp),p00
+       ldd             -8(rp),p64
+       depd,z          p32,30,31,t0
+       add             t0,p00,p00
+       std             p00,-16(rp)
+       extrd,u         p32,32,33,t1
+       add,dc          t1,p64,p64
+       std             p64,-8(rp)
+       addib,<>        -1,n,L(loop)
+       ldo             16(rp),rp
+
+LDEF(end2)
+       xmpyu           %fr4l,%fr4r,%fr6
+       fstd            %fr6,-128(%r30)
+       xmpyu           %fr4r,%fr4r,%fr5
+       fstd            %fr5,0(rp)
+       xmpyu           %fr4l,%fr4l,%fr7
+       fstd            %fr7,8(rp)
+       ldd             -120(%r30),p32
+       ldd             -16(rp),p00
+       ldd             -8(rp),p64
+       depd,z          p32,30,31,t0
+       add             t0,p00,p00
+       std             p00,-16(rp)
+       extrd,u         p32,32,33,t1
+       add,dc          t1,p64,p64
+       std             p64,-8(rp)
+       ldo             16(rp),rp
+       ldd             -128(%r30),p32
+       ldd             -16(rp),p00
+       ldd             -8(rp),p64
+       depd,z          p32,30,31,t0
+       add             t0,p00,p00
+       std             p00,-16(rp)
+       extrd,u         p32,32,33,t1
+       add,dc          t1,p64,p64
+       std             p64,-8(rp)
+       bve             (%r2)
+       ldo             -128(%r30),%r30
+
+LDEF(exit)
+       xmpyu           %fr8l,%fr8r,%fr10
+       fstd            %fr10,-120(%r30)
+       xmpyu           %fr8r,%fr8r,%fr9
+       fstd            %fr9,0(rp)
+       xmpyu           %fr8l,%fr8l,%fr11
+       fstd            %fr11,8(rp)
+       ldd             -128(%r30),p32
+       ldd             -16(rp),p00
+       ldd             -8(rp),p64
+       depd,z          p32,31,32,t0
+       add             t0,p00,p00
+       extrd,u         p32,31,32,t1
+       add,dc          t1,p64,p64
+       add             t0,p00,p00
+       add,dc          t1,p64,p64
+       std             p00,-16(rp)
+       std             p64,-8(rp)
+       ldo             16(rp),rp
+       ldd             -120(%r30),p32
+       ldd             -16(rp),p00
+       ldd             -8(rp),p64
+       depd,z          p32,31,32,t0
+       add             t0,p00,p00
+       extrd,u         p32,31,32,t1
+       add,dc          t1,p64,p64
+       add             t0,p00,p00
+       add,dc          t1,p64,p64
+       std             p00,-16(rp)
+       std             p64,-8(rp)
+       bve             (%r2)
+       ldo             -128(%r30),%r30
+
+LDEF(end1)
+       xmpyu           %fr8l,%fr8r,%fr10
+       fstd            %fr10,-128(%r30)
+       xmpyu           %fr8r,%fr8r,%fr9
+       fstd            %fr9,0(rp)
+       xmpyu           %fr8l,%fr8l,%fr11
+       fstd            %fr11,8(rp)
+       ldo             16(rp),rp
+       ldd             -128(%r30),p32
+       ldd             -16(rp),p00
+       ldd             -8(rp),p64
+       depd,z          p32,31,32,t0
+       add             t0,p00,p00
+       extrd,u         p32,31,32,t1
+       add,dc          t1,p64,p64
+       add             t0,p00,p00
+       add,dc          t1,p64,p64
+       std             p00,-16(rp)
+       std             p64,-8(rp)
+       bve             (%r2)
+       ldo             -128(%r30),%r30
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/pa64/submul_1.asm b/mpn/pa64/submul_1.asm

new file mode 100644 (file)

index 0000000..4067823
--- /dev/null
+++ b/mpn/pa64/submul_1.asm
@@ -0,0 +1,689 @@
+dnl  HP-PA 2.0 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C 8000,8200:           7
+C 8500,8600,8700:      6.5
+
+C  The feed-in and wind-down code has not yet been scheduled.  Many cycles
+C  could be saved there per call.
+
+C  DESCRIPTION:
+C  The main loop "BIG" is 4-way unrolled, mainly to allow
+C  effective use of ADD,DC.  Delays in moving data via the cache from the FP
+C  registers to the IU registers, have demanded a deep software pipeline, and
+C  a lot of stack slots for partial products in flight.
+C
+C  CODE STRUCTURE:
+C  save-some-registers
+C  do 0, 1, 2, or 3 limbs
+C  if done, restore-some-regs and return
+C  save-many-regs
+C  do 4, 8, ... limb
+C  restore-all-regs
+
+C  STACK LAYOUT:
+C  HP-PA stack grows upwards.  We could allocate 8 fewer slots by using the
+C  slots marked FREE, as well as some slots in the caller's "frame marker".
+C
+C -00 <- r30
+C -08  FREE
+C -10  tmp
+C -18  tmp
+C -20  tmp
+C -28  tmp
+C -30  tmp
+C -38  tmp
+C -40  tmp
+C -48  tmp
+C -50  tmp
+C -58  tmp
+C -60  tmp
+C -68  tmp
+C -70  tmp
+C -78  tmp
+C -80  tmp
+C -88  tmp
+C -90  FREE
+C -98  FREE
+C -a0  FREE
+C -a8  FREE
+C -b0  r13
+C -b8  r12
+C -c0  r11
+C -c8  r10
+C -d0  r8
+C -d8  r8
+C -e0  r7
+C -e8  r6
+C -f0  r5
+C -f8  r4
+C -100 r3
+C  Previous frame:
+C  [unused area]
+C -38/-138 vlimb home slot.  For 2.0N, the vlimb arg will arrive here.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS:
+define(`rp',`%r26')    C
+define(`up',`%r25')    C
+define(`n',`%r24')     C
+define(`vlimb',`%r23') C
+
+define(`climb',`%r23') C
+
+ifdef(`HAVE_ABI_2_0w',
+`      .level  2.0w
+',`    .level  2.0
+')
+PROLOGUE(mpn_submul_1)
+
+ifdef(`HAVE_ABI_2_0w',
+`      std             vlimb, -0x38(%r30)      C store vlimb into "home" slot
+')
+       std,ma          %r3, 0x100(%r30)
+       std             %r4, -0xf8(%r30)
+       std             %r5, -0xf0(%r30)
+       ldo             0(%r0), climb           C clear climb
+       fldd            -0x138(%r30), %fr8      C put vlimb in fp register
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19')        C
+
+define(`m032',`%r20')  C
+define(`m096',`%r21')  C
+
+define(`p000a',`%r22') C
+define(`p064a',`%r29') C
+
+define(`s000',`%r31')  C
+
+define(`ma000',`%r4')  C
+define(`ma064',`%r20') C
+
+define(`r000',`%r3')   C
+
+       extrd,u         n, 63, 2, %r5
+       cmpb,=          %r5, %r0, L(BIG)
+       nop
+
+       fldd            0(up), %fr4
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       addib,<>        -1, %r5, L(two_or_more)
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+LDEF(one)
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldd             -0x80(%r30), p000a
+       b               L(0_one_out)
+       ldd             -0x68(%r30), p064a
+
+LDEF(two_or_more)
+       fldd            0(up), %fr4
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       ldd             -0x78(%r30), p032a1
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       ldd             -0x70(%r30), p032a2
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       ldd             -0x80(%r30), p000a
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       ldd             -0x68(%r30), p064a
+       addib,<>        -1, %r5, L(three_or_more)
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+LDEF(two)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+       b               L(0_two_out)
+       depd            m096, 31, 32, ma064
+
+LDEF(three_or_more)
+       fldd            0(up), %fr4
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+C      addib,=         -1, %r5, L(0_out)
+       depd            m096, 31, 32, ma064
+LDEF(loop0)
+C      xmpyu           %fr8R, %fr4L, %fr22
+C      xmpyu           %fr8L, %fr4R, %fr23
+C      ldd             -0x78(%r30), p032a1
+C      fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+C
+C      xmpyu           %fr8R, %fr4R, %fr24
+C      xmpyu           %fr8L, %fr4L, %fr25
+C      ldd             -0x70(%r30), p032a2
+C      fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+C
+C      ldo             8(rp), rp
+C      add             climb, p000a, s000
+C      ldd             -0x80(%r30), p000a
+C      fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+C
+C      add,dc          p064a, %r0, climb
+C      ldo             8(up), up
+C      ldd             -0x68(%r30), p064a
+C      fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+C
+C      add             ma000, s000, s000
+C      add,dc          ma064, climb, climb
+C      fldd            0(up), %fr4
+C
+C      sub             r000, s000, s000
+C      sub,db          %r0, climb, climb
+C      sub             %r0, climb, climb
+C      std             s000, -8(rp)
+C
+C      add             p032a1, p032a2, m032
+C      add,dc          %r0, %r0, m096
+C
+C      depd,z          m032, 31, 32, ma000
+C      extrd,u         m032, 31, 32, ma064
+C      ldd             0(rp), r000
+C      addib,<>        -1, %r5, L(loop0)
+C      depd            m096, 31, 32, ma064
+LDEF(0_out)
+       ldo             8(up), up
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       ldd             -0x78(%r30), p032a1
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr4R, %fr24
+       xmpyu           %fr8L, %fr4L, %fr25
+       ldd             -0x70(%r30), p032a2
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       ldo             8(rp), rp
+       add             climb, p000a, s000
+       ldd             -0x80(%r30), p000a
+       fstd            %fr24, -0x80(%r30)      C low product to  -0x80..-0x79
+       add,dc          p064a, %r0, climb
+       ldd             -0x68(%r30), p064a
+       fstd            %fr25, -0x68(%r30)      C high product to -0x68..-0x61
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       sub             r000, s000, s000
+       sub,db          %r0, climb, climb
+       sub             %r0, climb, climb
+       std             s000, -8(rp)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+       depd            m096, 31, 32, ma064
+LDEF(0_two_out)
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldo             8(rp), rp
+       add             climb, p000a, s000
+       ldd             -0x80(%r30), p000a
+       add,dc          p064a, %r0, climb
+       ldd             -0x68(%r30), p064a
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       sub             r000, s000, s000
+       sub,db          %r0, climb, climb
+       sub             %r0, climb, climb
+       std             s000, -8(rp)
+LDEF(0_one_out)
+       add             p032a1, p032a2, m032
+       add,dc          %r0, %r0, m096
+       depd,z          m032, 31, 32, ma000
+       extrd,u         m032, 31, 32, ma064
+       ldd             0(rp), r000
+       depd            m096, 31, 32, ma064
+
+       add             climb, p000a, s000
+       add,dc          p064a, %r0, climb
+       add             ma000, s000, s000
+       add,dc          ma064, climb, climb
+       sub             r000, s000, s000
+       sub,db          %r0, climb, climb
+       sub             %r0, climb, climb
+       std             s000, 0(rp)
+
+       cmpib,>=        4, n, L(done)
+       ldo             8(rp), rp
+
+C 4-way unrolled code.
+
+LDEF(BIG)
+
+define(`p032a1',`%r1') C
+define(`p032a2',`%r19')        C
+define(`p096b1',`%r20')        C
+define(`p096b2',`%r21')        C
+define(`p160c1',`%r22')        C
+define(`p160c2',`%r29')        C
+define(`p224d1',`%r31')        C
+define(`p224d2',`%r3') C
+                       C
+define(`m032',`%r4')   C
+define(`m096',`%r5')   C
+define(`m160',`%r6')   C
+define(`m224',`%r7')   C
+define(`m288',`%r8')   C
+                       C
+define(`p000a',`%r1')  C
+define(`p064a',`%r19') C
+define(`p064b',`%r20') C
+define(`p128b',`%r21') C
+define(`p128c',`%r22') C
+define(`p192c',`%r29') C
+define(`p192d',`%r31') C
+define(`p256d',`%r3')  C
+                       C
+define(`s000',`%r10')  C
+define(`s064',`%r11')  C
+define(`s128',`%r12')  C
+define(`s192',`%r13')  C
+                       C
+define(`ma000',`%r9')  C
+define(`ma064',`%r4')  C
+define(`ma128',`%r5')  C
+define(`ma192',`%r6')  C
+define(`ma256',`%r7')  C
+                       C
+define(`r000',`%r1')   C
+define(`r064',`%r19')  C
+define(`r128',`%r20')  C
+define(`r192',`%r21')  C
+
+       std             %r6, -0xe8(%r30)
+       std             %r7, -0xe0(%r30)
+       std             %r8, -0xd8(%r30)
+       std             %r9, -0xd0(%r30)
+       std             %r10, -0xc8(%r30)
+       std             %r11, -0xc0(%r30)
+       std             %r12, -0xb8(%r30)
+       std             %r13, -0xb0(%r30)
+
+ifdef(`HAVE_ABI_2_0w',
+`      extrd,u         n, 61, 62, n            C right shift 2
+',`    extrd,u         n, 61, 30, n            C right shift 2, zero extend
+')
+
+LDEF(4_or_more)
+       fldd            0(up), %fr4
+       fldd            8(up), %fr5
+       fldd            16(up), %fr6
+       fldd            24(up), %fr7
+       xmpyu           %fr8R, %fr4L, %fr22
+       xmpyu           %fr8L, %fr4R, %fr23
+       xmpyu           %fr8R, %fr5L, %fr24
+       xmpyu           %fr8L, %fr5R, %fr25
+       xmpyu           %fr8R, %fr6L, %fr26
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr7L, %fr28
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       xmpyu           %fr8R, %fr4R, %fr30
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+       xmpyu           %fr8R, %fr5R, %fr22
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+       xmpyu           %fr8R, %fr6R, %fr24
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+       xmpyu           %fr8R, %fr7R, %fr26
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       addib,<>        -1, n, L(8_or_more)
+       xmpyu           %fr8L, %fr7L, %fr27
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       ldd             -0x78(%r30), p032a1
+       ldd             -0x70(%r30), p032a2
+       ldd             -0x38(%r30), p096b1
+       ldd             -0x30(%r30), p096b2
+       ldd             -0x58(%r30), p160c1
+       ldd             -0x50(%r30), p160c2
+       ldd             -0x18(%r30), p224d1
+       ldd             -0x10(%r30), p224d2
+       b               L(end1)
+       nop
+
+LDEF(8_or_more)
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       ldo             32(up), up
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       fldd            0(up), %fr4
+       fldd            8(up), %fr5
+       fldd            16(up), %fr6
+       fldd            24(up), %fr7
+       xmpyu           %fr8R, %fr4L, %fr22
+       ldd             -0x78(%r30), p032a1
+       xmpyu           %fr8L, %fr4R, %fr23
+       xmpyu           %fr8R, %fr5L, %fr24
+       ldd             -0x70(%r30), p032a2
+       xmpyu           %fr8L, %fr5R, %fr25
+       xmpyu           %fr8R, %fr6L, %fr26
+       ldd             -0x38(%r30), p096b1
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+       xmpyu           %fr8R, %fr7L, %fr28
+       ldd             -0x30(%r30), p096b2
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+       xmpyu           %fr8R, %fr4R, %fr30
+       ldd             -0x58(%r30), p160c1
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+       xmpyu           %fr8R, %fr5R, %fr22
+       ldd             -0x50(%r30), p160c2
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+       xmpyu           %fr8R, %fr6R, %fr24
+       ldd             -0x18(%r30), p224d1
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+       xmpyu           %fr8R, %fr7R, %fr26
+       ldd             -0x10(%r30), p224d2
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       addib,=         -1, n, L(end2)
+       xmpyu           %fr8L, %fr7L, %fr27
+LDEF(loop)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       ldo             32(up), up
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+
+       add,dc          p064a, p064b, s064
+       ldd             0(rp), r000
+       add,dc          p128b, p128c, s128
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+
+       add,dc          p192c, p192d, s192
+       ldd             8(rp), r064
+       add,dc          p256d, %r0, climb
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+
+       ldd             16(rp), r128
+       add             ma000, s000, s000       C accum mid 0
+       ldd             24(rp), r192
+       add,dc          ma064, s064, s064       C accum mid 1
+
+       add,dc          ma128, s128, s128       C accum mid 2
+       fldd            0(up), %fr4
+       add,dc          ma192, s192, s192       C accum mid 3
+       fldd            8(up), %fr5
+
+       add,dc          ma256, climb, climb
+       fldd            16(up), %fr6
+       sub             r000, s000, s000        C accum rlimb 0
+       fldd            24(up), %fr7
+
+       sub,db          r064, s064, s064        C accum rlimb 1
+       sub,db          r128, s128, s128        C accum rlimb 2
+       std             s000, 0(rp)
+
+       sub,db          r192, s192, s192        C accum rlimb 3
+       sub,db          %r0, climb, climb
+       sub             %r0, climb, climb
+       std             s064, 8(rp)
+
+       xmpyu           %fr8R, %fr4L, %fr22
+       ldd             -0x78(%r30), p032a1
+       xmpyu           %fr8L, %fr4R, %fr23
+       std             s128, 16(rp)
+
+       xmpyu           %fr8R, %fr5L, %fr24
+       ldd             -0x70(%r30), p032a2
+       xmpyu           %fr8L, %fr5R, %fr25
+       std             s192, 24(rp)
+
+       xmpyu           %fr8R, %fr6L, %fr26
+       ldd             -0x38(%r30), p096b1
+       xmpyu           %fr8L, %fr6R, %fr27
+       fstd            %fr22, -0x78(%r30)      C mid product to  -0x78..-0x71
+
+       xmpyu           %fr8R, %fr7L, %fr28
+       ldd             -0x30(%r30), p096b2
+       xmpyu           %fr8L, %fr7R, %fr29
+       fstd            %fr23, -0x70(%r30)      C mid product to  -0x70..-0x69
+
+       xmpyu           %fr8R, %fr4R, %fr30
+       ldd             -0x58(%r30), p160c1
+       xmpyu           %fr8L, %fr4L, %fr31
+       fstd            %fr24, -0x38(%r30)      C mid product to  -0x38..-0x31
+
+       xmpyu           %fr8R, %fr5R, %fr22
+       ldd             -0x50(%r30), p160c2
+       xmpyu           %fr8L, %fr5L, %fr23
+       fstd            %fr25, -0x30(%r30)      C mid product to  -0x30..-0x29
+
+       xmpyu           %fr8R, %fr6R, %fr24
+       ldd             -0x18(%r30), p224d1
+       xmpyu           %fr8L, %fr6L, %fr25
+       fstd            %fr26, -0x58(%r30)      C mid product to  -0x58..-0x51
+
+       xmpyu           %fr8R, %fr7R, %fr26
+       ldd             -0x10(%r30), p224d2
+       fstd            %fr27, -0x50(%r30)      C mid product to  -0x50..-0x49
+       xmpyu           %fr8L, %fr7L, %fr27
+
+       addib,<>        -1, n, L(loop)
+       ldo             32(rp), rp
+
+LDEF(end2)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       fstd            %fr28, -0x18(%r30)      C mid product to  -0x18..-0x11
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       fstd            %fr29, -0x10(%r30)      C mid product to  -0x10..-0x09
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       fstd            %fr30, -0x80(%r30)      C low product to  -0x80..-0x79
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       fstd            %fr31, -0x68(%r30)      C high product to -0x68..-0x61
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       fstd            %fr22, -0x40(%r30)      C low product to  -0x40..-0x39
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       fstd            %fr23, -0x28(%r30)      C high product to -0x28..-0x21
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       fstd            %fr24, -0x60(%r30)      C low product to  -0x60..-0x59
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       fstd            %fr25, -0x48(%r30)      C high product to -0x48..-0x41
+       add,dc          p064a, p064b, s064
+       ldd             0(rp), r000
+       add,dc          p128b, p128c, s128
+       fstd            %fr26, -0x20(%r30)      C low product to  -0x20..-0x19
+       add,dc          p192c, p192d, s192
+       ldd             8(rp), r064
+       add,dc          p256d, %r0, climb
+       fstd            %fr27, -0x88(%r30)      C high product to -0x88..-0x81
+       ldd             16(rp), r128
+       add             ma000, s000, s000       C accum mid 0
+       ldd             24(rp), r192
+       add,dc          ma064, s064, s064       C accum mid 1
+       add,dc          ma128, s128, s128       C accum mid 2
+       add,dc          ma192, s192, s192       C accum mid 3
+       add,dc          ma256, climb, climb
+       sub             r000, s000, s000        C accum rlimb 0
+       sub,db          r064, s064, s064        C accum rlimb 1
+       sub,db          r128, s128, s128        C accum rlimb 2
+       std             s000, 0(rp)
+       sub,db          r192, s192, s192        C accum rlimb 3
+       sub,db          %r0, climb, climb
+       sub             %r0, climb, climb
+       std             s064, 8(rp)
+       ldd             -0x78(%r30), p032a1
+       std             s128, 16(rp)
+       ldd             -0x70(%r30), p032a2
+       std             s192, 24(rp)
+       ldd             -0x38(%r30), p096b1
+       ldd             -0x30(%r30), p096b2
+       ldd             -0x58(%r30), p160c1
+       ldd             -0x50(%r30), p160c2
+       ldd             -0x18(%r30), p224d1
+       ldd             -0x10(%r30), p224d2
+       ldo             32(rp), rp
+
+LDEF(end1)
+       add             p032a1, p032a2, m032
+       ldd             -0x80(%r30), p000a
+       add,dc          p096b1, p096b2, m096
+       add,dc          p160c1, p160c2, m160
+       ldd             -0x68(%r30), p064a
+       add,dc          p224d1, p224d2, m224
+       add,dc          %r0, %r0, m288
+       ldd             -0x40(%r30), p064b
+       depd,z          m032, 31, 32, ma000
+       ldd             -0x28(%r30), p128b
+       extrd,u         m032, 31, 32, ma064
+       depd            m096, 31, 32, ma064
+       ldd             -0x60(%r30), p128c
+       extrd,u         m096, 31, 32, ma128
+       depd            m160, 31, 32, ma128
+       ldd             -0x48(%r30), p192c
+       extrd,u         m160, 31, 32, ma192
+       depd            m224, 31, 32, ma192
+       ldd             -0x20(%r30), p192d
+       extrd,u         m224, 31, 32, ma256
+       depd            m288, 31, 32, ma256
+       ldd             -0x88(%r30), p256d
+       add             climb, p000a, s000
+       add,dc          p064a, p064b, s064
+       ldd             0(rp), r000
+       add,dc          p128b, p128c, s128
+       add,dc          p192c, p192d, s192
+       ldd             8(rp), r064
+       add,dc          p256d, %r0, climb
+       ldd             16(rp), r128
+       add             ma000, s000, s000       C accum mid 0
+       ldd             24(rp), r192
+       add,dc          ma064, s064, s064       C accum mid 1
+       add,dc          ma128, s128, s128       C accum mid 2
+       add,dc          ma192, s192, s192       C accum mid 3
+       add,dc          ma256, climb, climb
+       sub             r000, s000, s000        C accum rlimb 0
+       sub,db          r064, s064, s064        C accum rlimb 1
+       sub,db          r128, s128, s128        C accum rlimb 2
+       std             s000, 0(rp)
+       sub,db          r192, s192, s192        C accum rlimb 3
+       sub,db          %r0, climb, climb
+       sub             %r0, climb, climb
+       std             s064, 8(rp)
+       std             s128, 16(rp)
+       std             s192, 24(rp)
+
+       ldd             -0xb0(%r30), %r13
+       ldd             -0xb8(%r30), %r12
+       ldd             -0xc0(%r30), %r11
+       ldd             -0xc8(%r30), %r10
+       ldd             -0xd0(%r30), %r9
+       ldd             -0xd8(%r30), %r8
+       ldd             -0xe0(%r30), %r7
+       ldd             -0xe8(%r30), %r6
+LDEF(done)
+ifdef(`HAVE_ABI_2_0w',
+`      copy            climb, %r28
+',`    extrd,u         climb, 63, 32, %r29
+       extrd,u         climb, 31, 32, %r28
+')
+       ldd             -0xf0(%r30), %r5
+       ldd             -0xf8(%r30), %r4
+       bve             (%r2)
+       ldd,mb          -0x100(%r30), %r3
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/pa64/udiv.asm b/mpn/pa64/udiv.asm

new file mode 100644 (file)

index 0000000..3775783
--- /dev/null
+++ b/mpn/pa64/udiv.asm
@@ -0,0 +1,114 @@
+dnl  HP-PA 2.0 64-bit mpn_udiv_qrnnd_r.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This runs at about 280 cycles on both PA8000 and PA8500, corresponding to a
+C bit more than 4 cycles/bit.
+
+C INPUT PARAMETERS
+define(`n1',`%r26')
+define(`n0',`%r25')
+define(`d',`%r24')
+define(`remptr',`%r23')
+
+define(`q',`%r28')
+define(`dn',`%r29')
+
+define(`old_divstep',
+       `add,dc         n0,n0,n0
+       add,dc          n1,n1,n1
+       sub,*<<         n1,d,%r22
+       copy            %r22,n1')
+
+define(`divstep',
+       `add            n0,n0,n0
+       add,dc          n1,n1,n1
+       sub             n1,d,%r1
+       add,dc          q,q,q
+       cmpclr,*<<      n1,d,%r0
+       copy            %r1,n1
+')
+
+ifdef(`HAVE_ABI_2_0w',
+`      .level  2.0w
+',`    .level  2.0
+')
+PROLOGUE(mpn_udiv_qrnnd_r)
+ifdef(`HAVE_ABI_2_0n',
+`      depd            %r25,31,32,%r26
+       depd            %r23,31,32,%r24
+       copy            %r24,%r25
+       ldd             -56(%r30),%r24
+       ldw             -60(%r30),%r23
+')
+       ldi             0,q
+       cmpib,*>=       0,d,L(large_divisor)
+       ldi             8,%r31          C setup loop counter
+
+       sub             %r0,d,dn
+LDEF(Loop)
+       divstep divstep divstep divstep divstep divstep divstep divstep
+       addib,<>        -1,%r31,L(Loop)
+       nop
+
+ifdef(`HAVE_ABI_2_0n',
+`      copy            %r28,%r29
+       extrd,u         %r28,31,32,%r28
+')
+       bve             (%r2)
+       std             n1,0(remptr)    C store remainder
+
+LDEF(large_divisor)
+       extrd,u         n0,63,1,%r19    C save lsb of dividend
+       shrpd           n1,n0,1,n0      C n0 = lo(n1n0 >> 1)
+       shrpd           %r0,n1,1,n1     C n1 = hi(n1n0 >> 1)
+       extrd,u         d,63,1,%r20     C save lsb of divisor
+       shrpd           %r0,d,1,d       C d = floor(orig_d / 2)
+       add,l           %r20,d,d        C d = ceil(orig_d / 2)
+
+       sub             %r0,d,dn
+LDEF(Loop2)
+       divstep divstep divstep divstep divstep divstep divstep divstep
+       addib,<>        -1,%r31,L(Loop2)
+       nop
+
+       cmpib,*=        0,%r20,L(even_divisor)
+       shladd          n1,1,%r19,n1    C shift in omitted dividend lsb
+
+       add             d,d,d           C restore orig...
+       sub             d,%r20,d        C ...d value
+       sub             %r0,d,dn        C r21 = -d
+
+       add,*nuv        n1,q,n1         C fix remainder for omitted divisor lsb
+       add,l           n1,dn,n1        C adjust remainder if rem. fix carried
+       add,dc          %r0,q,q         C adjust quotient accordingly
+
+       sub,*<<         n1,d,%r0        C remainder >= divisor?
+       add,l           n1,dn,n1        C adjust remainder
+       add,dc          %r0,q,q         C adjust quotient
+
+LDEF(even_divisor)
+ifdef(`HAVE_ABI_2_0n',
+`      copy            %r28,%r29
+       extrd,u         %r28,31,32,%r28
+')
+       bve             (%r2)
+       std             n1,0(remptr)    C store remainder
+EPILOGUE(mpn_udiv_qrnnd_r)
diff --git a/mpn/pa64/umul.asm b/mpn/pa64/umul.asm

new file mode 100644 (file)

index 0000000..635e44f
--- /dev/null
+++ b/mpn/pa64/umul.asm
@@ -0,0 +1,88 @@
+dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Optimizations:
+dnl  * Avoid skip instructions
+dnl  * Put carry-generating and carry-consuming insns consecutively
+dnl  * Don't allocate any stack, "home" positions for parameteters could be
+dnl    used.
+
+include(`../config.m4')
+
+define(`p0',`%r28')
+define(`p1',`%r29')
+define(`t32',`%r19')
+define(`t0',`%r20')
+define(`t1',`%r21')
+define(`x',`%r22')
+define(`m0',`%r23')
+define(`m1',`%r24')
+
+ifdef(`HAVE_ABI_2_0w',
+`      .level  2.0w
+',`    .level  2.0
+')
+PROLOGUE(mpn_umul_ppmm_r)
+       ldo             128(%r30),%r30
+ifdef(`HAVE_ABI_2_0w',
+`      std             %r26,-64(%r30)
+       std             %r25,-56(%r30)
+       copy            %r24,%r31
+',`
+       depd            %r25,31,32,%r26
+       std             %r26,-64(%r30)
+       depd            %r23,31,32,%r24
+       std             %r24,-56(%r30)
+       ldw             -180(%r30),%r31
+')
+
+       fldd            -64(%r30),%fr4
+       fldd            -56(%r30),%fr5
+
+       xmpyu           %fr5R,%fr4R,%fr6
+       fstd            %fr6,-128(%r30)
+       xmpyu           %fr5R,%fr4L,%fr7
+       fstd            %fr7,-120(%r30)
+       xmpyu           %fr5L,%fr4R,%fr8
+       fstd            %fr8,-112(%r30)
+       xmpyu           %fr5L,%fr4L,%fr9
+       fstd            %fr9,-104(%r30)
+
+       depdi,z         1,31,1,t32              C t32 = 2^32
+
+       ldd             -128(%r30),p0           C lo = low 64 bit of product
+       ldd             -120(%r30),m0           C m0 = mid0 64 bit of product
+       ldd             -112(%r30),m1           C m1 = mid1 64 bit of product
+       ldd             -104(%r30),p1           C hi = high 64 bit of product
+
+       add,l,*nuv      m0,m1,x                 C x = m1+m0
+        add,l          t32,p1,p1               C propagate carry to mid of p1
+       depd,z          x,31,32,t0              C lo32(m1+m0)
+       add             t0,p0,p0
+       extrd,u         x,31,32,t1              C hi32(m1+m0)
+       add,dc          t1,p1,p1
+
+       std             p0,0(%r31)              C store low half of product
+ifdef(`HAVE_ABI_2_0w',
+`      copy            p1,%r28                 C return val in %r28
+',`    extrd,u         p1,31,32,%r28           C return val in %r28,%r29
+')
+       bve             (%r2)
+       ldo             -128(%r30),%r30
+EPILOGUE(mpn_umul_ppmm_r)
+
diff --git a/mpn/power/add_n.asm b/mpn/power/add_n.asm

new file mode 100644 (file)

index 0000000..4fcafab
--- /dev/null
+++ b/mpn/power/add_n.asm
@@ -0,0 +1,73 @@
+dnl  IBM POWER mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2005 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s1_ptr    r4
+dnl  s2_ptr    r5
+dnl  size      r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       andil.  10,6,1          C odd or even number of limbs?
+       l       8,0(4)          C load least significant s1 limb
+       l       0,0(5)          C load least significant s2 limb
+       cal     3,-4(3)         C offset res_ptr, it's updated before it's used
+       sri     10,6,1          C count for unrolled loop
+       a       7,0,8           C add least significant limbs, set cy
+       mtctr   10              C copy count into CTR
+       beq     0,Leven         C branch if even # of limbs (# of limbs >= 2)
+
+C We have an odd # of limbs.  Add the first limbs separately.
+       cmpi    1,10,0          C is count for unrolled loop zero?
+       bc      4,6,L1          C bne cr1,L1 (misassembled by gas)
+       st      7,4(3)
+       aze     3,10            C use the fact that r10 is zero...
+       br                      C return
+
+C We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:    lu      8,4(4)          C load s1 limb and update s1_ptr
+       lu      0,4(5)          C load s2 limb and update s2_ptr
+       stu     7,4(3)
+       ae      7,0,8           C add limbs, set cy
+Leven: lu      9,4(4)          C load s1 limb and update s1_ptr
+       lu      10,4(5)         C load s2 limb and update s2_ptr
+       bdz     Lend            C If done, skip loop
+
+Loop:  lu      8,4(4)          C load s1 limb and update s1_ptr
+       lu      0,4(5)          C load s2 limb and update s2_ptr
+       ae      11,10,9         C add previous limbs with cy, set cy
+       stu     7,4(3)          C
+       lu      9,4(4)          C load s1 limb and update s1_ptr
+       lu      10,4(5)         C load s2 limb and update s2_ptr
+       ae      7,0,8           C add previous limbs with cy, set cy
+       stu     11,4(3)         C
+       bdn     Loop            C decrement CTR and loop back
+
+Lend:  ae      11,10,9         C add limbs with cy, set cy
+       st      7,4(3)          C
+       st      11,8(3)         C
+       lil     3,0             C load cy into ...
+       aze     3,3             C ... return value register
+       br
+EPILOGUE(mpn_add_n)
diff --git a/mpn/power/addmul_1.asm b/mpn/power/addmul_1.asm

new file mode 100644 (file)

index 0000000..fcda2c1
--- /dev/null
+++ b/mpn/power/addmul_1.asm
@@ -0,0 +1,115 @@
+dnl  IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s1_ptr    r4
+dnl  size      r5
+dnl  s2_limb   r6
+
+dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl  instruction.  To obtain that operation, we have to use the 32x32->64
+dnl  signed multiplication instruction, and add the appropriate compensation to
+dnl  the high limb of the result.  We add the multiplicand if the multiplier
+dnl  has its most significant bit set, and we add the multiplier if the
+dnl  multiplicand has its most significant bit set.  We need to preserve the
+dnl  carry flag between each iteration, so we have to compute the compensation
+dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
+dnl  branch in zero cycles, we use conditional branches for the compensation.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       cal     3,-4(3)
+       l       0,0(4)
+       cmpi    0,6,0
+       mtctr   5
+       mul     9,0,6
+       srai    7,0,31
+       and     7,7,6
+       mfmq    8
+       cax     9,9,7
+       l       7,4(3)
+       a       8,8,7           C add res_limb
+       blt     Lneg
+Lpos:  bdz     Lend
+
+Lploop:        lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     10,0,6
+       mfmq    0
+       ae      8,0,9           C low limb + old_cy_limb + old cy
+       l       7,4(3)
+       aze     10,10           C propagate cy to new cy_limb
+       a       8,8,7           C add res_limb
+       bge     Lp0
+       cax     10,10,6         C adjust high limb for negative limb from s1
+Lp0:   bdz     Lend0
+       lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     9,0,6
+       mfmq    0
+       ae      8,0,10
+       l       7,4(3)
+       aze     9,9
+       a       8,8,7
+       bge     Lp1
+       cax     9,9,6           C adjust high limb for negative limb from s1
+Lp1:   bdn     Lploop
+
+       b       Lend
+
+Lneg:  cax     9,9,0
+       bdz     Lend
+Lnloop:        lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     10,0,6
+       mfmq    7
+       ae      8,7,9
+       l       7,4(3)
+       ae      10,10,0         C propagate cy to new cy_limb
+       a       8,8,7           C add res_limb
+       bge     Ln0
+       cax     10,10,6         C adjust high limb for negative limb from s1
+Ln0:   bdz     Lend0
+       lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     9,0,6
+       mfmq    7
+       ae      8,7,10
+       l       7,4(3)
+       ae      9,9,0           C propagate cy to new cy_limb
+       a       8,8,7           C add res_limb
+       bge     Ln1
+       cax     9,9,6           C adjust high limb for negative limb from s1
+Ln1:   bdn     Lnloop
+       b       Lend
+
+Lend0: cal     9,0(10)
+Lend:  st      8,4(3)
+       aze     3,9
+       br
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/power/gmp-mparam.h b/mpn/power/gmp-mparam.h

new file mode 100644 (file)

index 0000000..34f74aa
--- /dev/null
+++ b/mpn/power/gmp-mparam.h
@@ -0,0 +1,58 @@
+/* POWER gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Generated by tuneup.c, 2003-02-10, gcc 3.2, POWER2 66.7MHz */
+
+#define MUL_TOOM22_THRESHOLD             12
+#define MUL_TOOM33_THRESHOLD             75
+
+#define SQR_BASECASE_THRESHOLD            7
+#define SQR_TOOM2_THRESHOLD              28
+#define SQR_TOOM3_THRESHOLD              86
+
+#define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
+#define DIV_DC_THRESHOLD                 36
+#define POWM_THRESHOLD                   69
+
+#define HGCD_THRESHOLD                   97
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                590
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD          12
+#define DIVREM_1_UNNORM_THRESHOLD     MP_SIZE_T_MAX  /* never */
+#define MOD_1_NORM_THRESHOLD             10
+#define MOD_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD               11
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             10
+#define GET_STR_PRECOMPUTE_THRESHOLD     20
+#define SET_STR_THRESHOLD              2899
+
+#define MUL_FFT_TABLE  { 336, 800, 1408, 3584, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD          296
+#define MUL_FFT_THRESHOLD              2304
+
+#define SQR_FFT_TABLE  { 336, 800, 1408, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          296
+#define SQR_FFT_THRESHOLD              2304
diff --git a/mpn/power/lshift.asm b/mpn/power/lshift.asm

new file mode 100644 (file)

index 0000000..a4adb7a
--- /dev/null
+++ b/mpn/power/lshift.asm
@@ -0,0 +1,50 @@
+dnl  IBM POWER mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s_ptr     r4
+dnl  size      r5
+dnl  cnt       r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       sli     0,5,2
+       cax     9,3,0
+       cax     4,4,0
+       sfi     8,6,32
+       mtctr   5               C put limb count in CTR loop register
+       lu      0,-4(4)         C read most significant limb
+       sre     3,0,8           C compute carry out limb, and init MQ register
+       bdz     Lend2           C if just one limb, skip loop
+       lu      0,-4(4)         C read 2:nd most significant limb
+       sreq    7,0,8           C compute most significant limb of result
+       bdz     Lend            C if just two limb, skip loop
+Loop:  lu      0,-4(4)         C load next lower limb
+       stu     7,-4(9)         C store previous result during read latency
+       sreq    7,0,8           C compute result limb
+       bdn     Loop            C loop back until CTR is zero
+Lend:  stu     7,-4(9)         C store 2:nd least significant limb
+Lend2: sle     7,0,6           C compute least significant limb
+       st      7,-4(9)         C store it
+       br
+EPILOGUE(mpn_lshift)
diff --git a/mpn/power/mul_1.asm b/mpn/power/mul_1.asm

new file mode 100644 (file)

index 0000000..bd33942
--- /dev/null
+++ b/mpn/power/mul_1.asm
@@ -0,0 +1,102 @@
+dnl  IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s1_ptr    r4
+dnl  size      r5
+dnl  s2_limb   r6
+
+dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl  instruction.  To obtain that operation, we have to use the 32x32->64
+dnl  signed multiplication instruction, and add the appropriate compensation to
+dnl  the high limb of the result.  We add the multiplicand if the multiplier
+dnl  has its most significant bit set, and we add the multiplier if the
+dnl  multiplicand has its most significant bit set.  We need to preserve the
+dnl  carry flag between each iteration, so we have to compute the compensation
+dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
+dnl  branch in zero cycles, we use conditional branches for the compensation.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       cal     3,-4(3)
+       l       0,0(4)
+       cmpi    0,6,0
+       mtctr   5
+       mul     9,0,6
+       srai    7,0,31
+       and     7,7,6
+       mfmq    8
+       ai      0,0,0           C reset carry
+       cax     9,9,7
+       blt     Lneg
+Lpos:  bdz     Lend
+Lploop:        lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     10,0,6
+       mfmq    0
+       ae      8,0,9
+       bge     Lp0
+       cax     10,10,6         C adjust high limb for negative limb from s1
+Lp0:   bdz     Lend0
+       lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     9,0,6
+       mfmq    0
+       ae      8,0,10
+       bge     Lp1
+       cax     9,9,6           C adjust high limb for negative limb from s1
+Lp1:   bdn     Lploop
+       b       Lend
+
+Lneg:  cax     9,9,0
+       bdz     Lend
+Lnloop:        lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     10,0,6
+       cax     10,10,0         C adjust high limb for negative s2_limb
+       mfmq    0
+       ae      8,0,9
+       bge     Ln0
+       cax     10,10,6         C adjust high limb for negative limb from s1
+Ln0:   bdz     Lend0
+       lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     9,0,6
+       cax     9,9,0           C adjust high limb for negative s2_limb
+       mfmq    0
+       ae      8,0,10
+       bge     Ln1
+       cax     9,9,6           C adjust high limb for negative limb from s1
+Ln1:   bdn     Lnloop
+       b       Lend
+
+Lend0: cal     9,0(10)
+Lend:  st      8,4(3)
+       aze     3,9
+       br
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/power/rshift.asm b/mpn/power/rshift.asm

new file mode 100644 (file)

index 0000000..4645015
--- /dev/null
+++ b/mpn/power/rshift.asm
@@ -0,0 +1,48 @@
+dnl  IBM POWER mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s_ptr     r4
+dnl  size      r5
+dnl  cnt       r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       sfi     8,6,32
+       mtctr   5               C put limb count in CTR loop register
+       l       0,0(4)          C read least significant limb
+       ai      9,3,-4          C adjust res_ptr since it's offset in the stu:s
+       sle     3,0,8           C compute carry limb, and init MQ register
+       bdz     Lend2           C if just one limb, skip loop
+       lu      0,4(4)          C read 2:nd least significant limb
+       sleq    7,0,8           C compute least significant limb of result
+       bdz     Lend            C if just two limb, skip loop
+Loop:  lu      0,4(4)          C load next higher limb
+       stu     7,4(9)          C store previous result during read latency
+       sleq    7,0,8           C compute result limb
+       bdn     Loop            C loop back until CTR is zero
+Lend:  stu     7,4(9)          C store 2:nd most significant limb
+Lend2: sre     7,0,6           C compute most significant limb
+       st      7,4(9)          C store it
+       br
+EPILOGUE(mpn_rshift)
diff --git a/mpn/power/sdiv.asm b/mpn/power/sdiv.asm

new file mode 100644 (file)

index 0000000..7a79802
--- /dev/null
+++ b/mpn/power/sdiv.asm
@@ -0,0 +1,28 @@
+dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sdiv_qrnnd)
+       mtmq    5
+       div     0,4,6
+       mfmq    9
+       st      9,0(3)
+       mr      3,0
+       br
+EPILOGUE(mpn_sdiv_qrnnd)
diff --git a/mpn/power/sub_n.asm b/mpn/power/sub_n.asm

new file mode 100644 (file)

index 0000000..d34415d
--- /dev/null
+++ b/mpn/power/sub_n.asm
@@ -0,0 +1,75 @@
+dnl  IBM POWER mpn_sub_n -- Subtract two limb vectors of equal, non-zero
+dnl  length.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2005 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s1_ptr    r4
+dnl  s2_ptr    r5
+dnl  size      r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       andil.  10,6,1          C odd or even number of limbs?
+       l       8,0(4)          C load least significant s1 limb
+       l       0,0(5)          C load least significant s2 limb
+       cal     3,-4(3)         C offset res_ptr, it's updated before it's used
+       sri     10,6,1          C count for unrolled loop
+       sf      7,0,8           C subtract least significant limbs, set cy
+       mtctr   10              C copy count into CTR
+       beq     0,Leven         C branch if even # of limbs (# of limbs >= 2)
+
+C We have an odd # of limbs.  Add the first limbs separately.
+       cmpi    1,10,0          C is count for unrolled loop zero?
+       bc      4,6,L1          C bne cr1,L1 (misassembled by gas)
+       st      7,4(3)
+       sfe     3,0,0           C load !cy into ...
+       sfi     3,3,0           C ... return value register
+       br                      C return
+
+C We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:    lu      8,4(4)          C load s1 limb and update s1_ptr
+       lu      0,4(5)          C load s2 limb and update s2_ptr
+       stu     7,4(3)
+       sfe     7,0,8           C subtract limbs, set cy
+Leven: lu      9,4(4)          C load s1 limb and update s1_ptr
+       lu      10,4(5)         C load s2 limb and update s2_ptr
+       bdz     Lend            C If done, skip loop
+
+Loop:  lu      8,4(4)          C load s1 limb and update s1_ptr
+       lu      0,4(5)          C load s2 limb and update s2_ptr
+       sfe     11,10,9         C subtract previous limbs with cy, set cy
+       stu     7,4(3)          C
+       lu      9,4(4)          C load s1 limb and update s1_ptr
+       lu      10,4(5)         C load s2 limb and update s2_ptr
+       sfe     7,0,8           C subtract previous limbs with cy, set cy
+       stu     11,4(3)         C
+       bdn     Loop            C decrement CTR and loop back
+
+Lend:  sfe     11,10,9         C subtract limbs with cy, set cy
+       st      7,4(3)          C
+       st      11,8(3)         C
+       sfe     3,0,0           C load !cy into ...
+       sfi     3,3,0           C ... return value register
+       br
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/power/submul_1.asm b/mpn/power/submul_1.asm

new file mode 100644 (file)

index 0000000..3c3492d
--- /dev/null
+++ b/mpn/power/submul_1.asm
@@ -0,0 +1,120 @@
+dnl  IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr   r3
+dnl  s1_ptr    r4
+dnl  size      r5
+dnl  s2_limb   r6
+
+dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
+dnl  instruction.  To obtain that operation, we have to use the 32x32->64
+dnl  signed multiplication instruction, and add the appropriate compensation to
+dnl  the high limb of the result.  We add the multiplicand if the multiplier
+dnl  has its most significant bit set, and we add the multiplier if the
+dnl  multiplicand has its most significant bit set.  We need to preserve the
+dnl  carry flag between each iteration, so we have to compute the compensation
+dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
+dnl  branch in zero cycles, we use conditional branches for the compensation.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       cal     3,-4(3)
+       l       0,0(4)
+       cmpi    0,6,0
+       mtctr   5
+       mul     9,0,6
+       srai    7,0,31
+       and     7,7,6
+       mfmq    11
+       cax     9,9,7
+       l       7,4(3)
+       sf      8,11,7          C add res_limb
+       a       11,8,11         C invert cy (r11 is junk)
+       blt     Lneg
+Lpos:  bdz     Lend
+
+Lploop:        lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     10,0,6
+       mfmq    0
+       ae      11,0,9          C low limb + old_cy_limb + old cy
+       l       7,4(3)
+       aze     10,10           C propagate cy to new cy_limb
+       sf      8,11,7          C add res_limb
+       a       11,8,11         C invert cy (r11 is junk)
+       bge     Lp0
+       cax     10,10,6         C adjust high limb for negative limb from s1
+Lp0:   bdz     Lend0
+       lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     9,0,6
+       mfmq    0
+       ae      11,0,10
+       l       7,4(3)
+       aze     9,9
+       sf      8,11,7
+       a       11,8,11         C invert cy (r11 is junk)
+       bge     Lp1
+       cax     9,9,6           C adjust high limb for negative limb from s1
+Lp1:   bdn     Lploop
+
+       b       Lend
+
+Lneg:  cax     9,9,0
+       bdz     Lend
+Lnloop:        lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     10,0,6
+       mfmq    7
+       ae      11,7,9
+       l       7,4(3)
+       ae      10,10,0         C propagate cy to new cy_limb
+       sf      8,11,7          C add res_limb
+       a       11,8,11         C invert cy (r11 is junk)
+       bge     Ln0
+       cax     10,10,6         C adjust high limb for negative limb from s1
+Ln0:   bdz     Lend0
+       lu      0,4(4)
+       stu     8,4(3)
+       cmpi    0,0,0
+       mul     9,0,6
+       mfmq    7
+       ae      11,7,10
+       l       7,4(3)
+       ae      9,9,0           C propagate cy to new cy_limb
+       sf      8,11,7          C add res_limb
+       a       11,8,11         C invert cy (r11 is junk)
+       bge     Ln1
+       cax     9,9,6           C adjust high limb for negative limb from s1
+Ln1:   bdn     Lnloop
+       b       Lend
+
+Lend0: cal     9,0(10)
+Lend:  st      8,4(3)
+       aze     3,9
+       br
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/power/umul.asm b/mpn/power/umul.asm

new file mode 100644 (file)

index 0000000..996f2e6
--- /dev/null
+++ b/mpn/power/umul.asm
@@ -0,0 +1,32 @@
+dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+       mul     9,4,5
+       srai    0,4,31
+       and     0,0,5
+       srai    5,5,31
+       and     5,5,4
+       cax     0,0,5
+       mfmq    11
+       st      11,0(3)
+       cax     3,9,0
+       br
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/powerpc32/750/com.asm b/mpn/powerpc32/750/com.asm

new file mode 100644 (file)

index 0000000..c6b4b10
--- /dev/null
+++ b/mpn/powerpc32/750/com.asm
@@ -0,0 +1,68 @@
+dnl  PowerPC 750 mpn_com -- mpn bitwise one's complement
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3.0
+C 75x (G3):        2.0
+C 7400,7410 (G4):  2.0
+C 744x,745x (G4+): 3.0
+
+C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C This loop form is necessary for the claimed speed.
+
+ASM_START()
+PROLOGUE(mpn_com)
+
+       C r3    dst
+       C r4    src
+       C r5    size
+
+       mtctr   r5              C size
+       lwz     r5, 0(r4)       C src low limb
+
+       sub     r4, r4, r3      C src-dst
+       subi    r3, r3, 4       C dst-4
+
+       addi    r4, r4, 8       C src-dst+8
+       bdz     L(one)
+
+L(top):
+       C r3    &dst[i-1]
+       C r4    src-dst
+       C r5    src[i]
+       C r6    scratch
+
+       not     r6, r5          C ~src[i]
+       lwzx    r5, r4,r3       C src[i+1]
+
+       stwu    r6, 4(r3)       C dst[i]
+       bdnz    L(top)
+
+L(one):
+       not     r6, r5
+
+       stw     r6, 4(r3)       C dst[size-1]
+       blr
+
+EPILOGUE()
diff --git a/mpn/powerpc32/750/gmp-mparam.h b/mpn/powerpc32/750/gmp-mparam.h

new file mode 100644 (file)

index 0000000..8e46042
--- /dev/null
+++ b/mpn/powerpc32/750/gmp-mparam.h
@@ -0,0 +1,181 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2002, 2004, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* This file is used for 75x (G3) and for 7400/7410 (G4), both which have
+   much slow multiply instructions.  */
+
+/* 450 MHz PPC 7400 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     38
+#define USE_PREINV_DIVREM_1                  1
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                10
+#define MUL_TOOM33_THRESHOLD                38
+#define MUL_TOOM44_THRESHOLD                99
+#define MUL_TOOM6H_THRESHOLD               141
+#define MUL_TOOM8H_THRESHOLD               212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
+
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 18
+#define SQR_TOOM3_THRESHOLD                 57
+#define SQR_TOOM4_THRESHOLD                142
+#define SQR_TOOM6_THRESHOLD                173
+#define SQR_TOOM8_THRESHOLD                309
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               11
+
+#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    220, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     23, 9}, \
+    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
+    {     39, 8}, {     23, 9}, {     15, 8}, {     39, 9}, \
+    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
+    {     67, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
+    {    127, 7}, {    255, 9}, {     71, 8}, {    143, 7}, \
+    {    287, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
+    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
+    {    319, 9}, {    175, 8}, {    351, 7}, {    703,10}, \
+    {     95, 9}, {    191, 8}, {    383, 9}, {    207,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
+    {    319,10}, {    175, 9}, {    351, 8}, {    703,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
+    {    415, 8}, {    831,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    351, 9}, \
+    {    703, 8}, {   1407,11}, {    191,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447, 9}, {    895,12}, \
+    {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
+    {    575,11}, {    351,10}, {    703, 9}, {   1407,12}, \
+    {    191,11}, {    415,10}, {    831,11}, {    447,10}, \
+    {    895,13}, {    127,12}, {    255,11}, {    543,10}, \
+    {   1087,11}, {    575,12}, {    319,11}, {    703,10}, \
+    {   1407,12}, {    383,11}, {    831,12}, {    447,11}, \
+    {    895,10}, {   1791,11}, {    959,13}, {    255,12}, \
+    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
+    {    703,11}, {   1407,13}, {    383,12}, {    895,11}, \
+    {   1791,12}, {    959,14}, {    255,13}, {    511,12}, \
+    {   1215,13}, {    639,12}, {   1407,13}, {    895,12}, \
+    {   1919,14}, {    511,13}, {   1023,12}, {   2047,13}, \
+    {   1151,12}, {   2303,13}, {   1407,14}, {    767,13}, \
+    {   1919,10}, {  15359,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 154
+#define MUL_FFT_THRESHOLD                 2688
+
+#define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    184, 5}, {      6, 4}, {     13, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
+    {     31, 8}, {     19, 7}, {     39, 8}, {     27, 9}, \
+    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
+    {    127, 7}, {    255, 9}, {     71, 8}, {    143, 7}, \
+    {    287, 9}, {     79, 8}, {    159,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 9}, {    143, 8}, {    287, 7}, {    575,10}, \
+    {     79, 9}, {    159, 8}, {    319, 9}, {    175, 8}, \
+    {    351,10}, {     95, 9}, {    191, 8}, {    383, 9}, \
+    {    207,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143, 9}, {    287, 8}, {    575,10}, \
+    {    159, 9}, {    319,10}, {    175, 9}, {    351,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
+    {    415, 8}, {    831,10}, {    223,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    351, 9}, {    703,11}, \
+    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    447, 9}, {    895,12}, {    127,11}, {    255,10}, \
+    {    511,11}, {    287,10}, {    575,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703, 9}, {   1407,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    447,10}, {    895,13}, {    127,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    575,12}, \
+    {    319,11}, {    703,10}, {   1407,12}, {    383,11}, \
+    {    831,12}, {    447,11}, {    895,10}, {   1791,11}, \
+    {    959,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    575,11}, {   1215,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    895,11}, {   1791,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1215,13}, {    639,12}, \
+    {   1471,13}, {    767,12}, {   1535,13}, {    895,12}, \
+    {   1919,14}, {    511,13}, {   1151,12}, {   2431,13}, \
+    {   1407,14}, {    767,13}, {   1919,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD                 1728
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 31
+#define DC_DIVAPPR_Q_THRESHOLD             108
+#define DC_BDIV_QR_THRESHOLD                35
+#define DC_BDIV_Q_THRESHOLD                 88
+
+#define INV_MULMOD_BNM1_THRESHOLD           76
+#define INV_NEWTON_THRESHOLD               149
+#define INV_APPR_THRESHOLD                 125
+
+#define BINV_NEWTON_THRESHOLD              156
+#define REDC_1_TO_REDC_N_THRESHOLD          39
+
+#define MU_DIV_QR_THRESHOLD                807
+#define MU_DIVAPPR_Q_THRESHOLD             807
+#define MUPI_DIV_QR_THRESHOLD               66
+#define MU_BDIV_QR_THRESHOLD               667
+#define MU_BDIV_Q_THRESHOLD                807
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                      87
+#define GCD_DC_THRESHOLD                   233
+#define GCDEXT_DC_THRESHOLD                198
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               390
+#define SET_STR_PRECOMPUTE_THRESHOLD       814
diff --git a/mpn/powerpc32/750/lshift.asm b/mpn/powerpc32/750/lshift.asm

new file mode 100644 (file)

index 0000000..9298793
--- /dev/null
+++ b/mpn/powerpc32/750/lshift.asm
@@ -0,0 +1,144 @@
+dnl  PowerPC 750 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       cycles/limb
+C 750:     3.0
+C 7400:    3.0
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+
+       C r3    dst
+       C r4    src
+       C r5    size
+       C r6    shift
+
+       mtctr   r5              C size
+       slwi    r5, r5, 2       C 4*size
+
+       subfic  r7, r6, 32      C 32-shift
+       add     r4, r4, r5      C &src[size]
+
+       add     r5, r3, r5      C &dst[size]
+       lwz     r8, -4(r4)      C src[size-1]
+       bdz     L(one)
+
+       lwzu    r9, -8(r4)      C src[size-2]
+
+       srw     r3, r8, r7      C return value
+       slw     r8, r8, r6      C src[size-1] << shift
+       bdz     L(two)
+
+
+L(top):
+       C r3    return value
+       C r4    src, incrementing
+       C r5    dst, incrementing
+       C r6    lshift
+       C r7    32-shift
+       C r8    src[i+1] << shift
+       C r9    src[i]
+       C r10
+
+       lwzu    r10, -4(r4)
+       srw     r11, r9, r7
+
+       or      r8, r8, r11
+       stwu    r8, -4(r5)
+
+       slw     r8, r9, r6
+       bdz     L(odd)
+
+       C r8    src[i+1] << shift
+       C r9
+       C r10   src[i]
+
+       lwzu    r9, -4(r4)
+       srw     r11, r10, r7
+
+       or      r8, r8, r11
+       stwu    r8, -4(r5)
+
+       slw     r8, r10, r6
+       bdnz    L(top)
+
+
+L(two):
+       C r3    return value
+       C r4
+       C r5    &dst[2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[1] << shift
+       C r9    src[0]
+       C r10
+
+       srw     r11, r9, r7
+       slw     r12, r9, r6     C src[0] << shift
+
+       or      r8, r8, r11
+       stw     r12, -8(r5)     C dst[0]
+
+       stw     r8, -4(r5)      C dst[1]
+       blr
+
+
+L(odd):
+       C r3    return value
+       C r4
+       C r5    &dst[2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[1] << shift
+       C r9
+       C r10   src[0]
+
+       srw     r11, r10, r7
+       slw     r12, r10, r6
+
+       or      r8, r8, r11
+       stw     r12, -8(r5)     C dst[0]
+
+       stw     r8, -4(r5)      C dst[1]
+       blr
+
+
+L(one):
+       C r5    &dst[1]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[0]
+
+       srw     r3, r8, r7      C return value
+       slw     r8, r8, r6      C src[size-1] << shift
+
+       stw     r8, -4(r5)      C dst[0]
+       blr
+
+EPILOGUE(mpn_lshift)
diff --git a/mpn/powerpc32/750/rshift.asm b/mpn/powerpc32/750/rshift.asm

new file mode 100644 (file)

index 0000000..944e869
--- /dev/null
+++ b/mpn/powerpc32/750/rshift.asm
@@ -0,0 +1,142 @@
+dnl  PowerPC 750 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       cycles/limb
+C 750:     3.0
+C 7400:    3.0
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+
+       C r3    dst
+       C r4    src
+       C r5    size
+       C r6    shift
+
+       mtctr   r5              C size
+       lwz     r8, 0(r4)       C src[0]
+
+       subfic  r7, r6, 32      C 32-shift
+       addi    r5, r3, -4      C dst-4
+
+       slw     r3, r8, r7      C return value
+       bdz     L(one)
+
+       lwzu    r9, 4(r4)       C src[1]
+       srw     r8, r8, r6      C src[0] >> shift
+       bdz     L(two)
+
+
+L(top):
+       C r3    return value
+       C r4    src, incrementing
+       C r5    dst, incrementing
+       C r6    shift
+       C r7    32-shift
+       C r8    src[i-1] >> shift
+       C r9    src[i]
+       C r10
+
+       lwzu    r10, 4(r4)
+       slw     r11, r9, r7
+
+       or      r8, r8, r11
+       stwu    r8, 4(r5)
+
+       srw     r8, r9, r6
+       bdz     L(odd)
+
+       C r8    src[i-1] >> shift
+       C r9
+       C r10   src[i]
+
+       lwzu    r9, 4(r4)
+       slw     r11, r10, r7
+
+       or      r8, r8, r11
+       stwu    r8, 4(r5)
+
+       srw     r8, r10, r6
+       bdnz    L(top)
+
+
+L(two):
+       C r3    return value
+       C r4
+       C r5    &dst[size-2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[size-2] >> shift
+       C r9    src[size-1]
+       C r10
+
+       slw     r11, r9, r7
+       srw     r12, r9, r6     C src[size-1] >> shift
+
+       or      r8, r8, r11
+       stw     r12, 8(r5)      C dst[size-1]
+
+       stw     r8, 4(r5)       C dst[size-2]
+       blr
+
+
+L(odd):
+       C r3    return value
+       C r4
+       C r5    &dst[size-2]
+       C r6    shift
+       C r7    32-shift
+       C r8    src[size-2] >> shift
+       C r9
+       C r10   src[size-1]
+
+       slw     r11, r10, r7
+       srw     r12, r10, r6
+
+       or      r8, r8, r11
+       stw     r12, 8(r5)      C dst[size-1]
+
+       stw     r8, 4(r5)       C dst[size-2]
+       blr
+
+
+L(one):
+       C r3    return value
+       C r4
+       C r5    dst-4
+       C r6    shift
+       C r7
+       C r8    src[0]
+
+       srw     r8, r8, r6
+
+       stw     r8, 4(r5)       C dst[0]
+       blr
+
+EPILOGUE(mpn_rshift)
diff --git a/mpn/powerpc32/README b/mpn/powerpc32/README

new file mode 100644 (file)

index 0000000..43aca46
--- /dev/null
+++ b/mpn/powerpc32/README
@@ -0,0 +1,169 @@
+Copyright 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                    POWERPC 32-BIT MPN SUBROUTINES
+
+
+This directory contains mpn functions for various 32-bit PowerPC chips.
+
+
+CODE ORGANIZATION
+
+       directory         used for
+       ================================================
+       powerpc           generic, 604, 604e, 744x, 745x
+       powerpc/750       740, 750, 7400, 7410
+
+
+The top-level powerpc directory is currently mostly aimed at 604/604e but
+should be reasonable on all powerpcs.
+
+
+
+STATUS
+
+The code is quite well optimized for the 604e, other chips have had less
+attention.
+
+Altivec SIMD available in 74xx might hold some promise, but unfortunately
+GMP only guarantees 32-bit data alignment, so there's lots of fiddling
+around with partial operations at the start and end of limb vectors.  A
+128-bit limb would be a novel idea, but is unlikely to be practical, since
+it would have to work with ordinary +, -, * etc in the C code.
+
+Also, Altivec isn't very well suited for the GMP multiplication needs.
+Using floating-point based multiplication has much better better performance
+potential for all current powerpcs, both the ones with slow integer multiply
+units (603, 740, 750, 7400, 7410) and those with fast (604, 604e, 744x,
+745x).  This is because all powerpcs do some level of pipelining in the FPU:
+
+603 and 750 can sustain one fmadd every 2nd cycle.
+604 and 604e can sustain one fmadd per cycle.
+7400 and 7410 can sustain 3 fmadd in 4 cycles.
+744x and 745x can sustain 4 fmadd in 5 cycles.
+
+
+
+REGISTER NAMES
+
+The normal powerpc convention is to give registers as plain numbers, like
+"mtctr 6", but on Apple MacOS X (powerpc*-*-rhapsody* and
+powerpc*-*-darwin*) the assembler demands an "r" like "mtctr r6".  Note
+however when register 0 in an instruction means a literal zero the "r" is
+omitted, for instance "lwzx r6,0,r7".
+
+The GMP code uses the "r" forms, powerpc-defs.m4 transforms them to plain
+numbers according to what GMP_ASM_POWERPC_R_REGISTERS finds is needed.
+(Note that this style isn't fully general, as the identifier r4 and the
+register r4 will not be distinguishable on some systems.  However, this is
+not a problem for the limited GMP assembly usage.)
+
+
+
+GLOBAL REFERENCES
+
+Linux non-PIC
+       lis     9, __gmp_binvert_limb_table@ha
+       rlwinm  11, 5, 31, 25, 31
+       la      9, __gmp_binvert_limb_table@l(9)
+       lbzx    11, 9, 11
+
+Linux PIC (FIXME)
+.LCL0:
+       .long .LCTOC1-.LCF0
+       bcl     20, 31, .LCF0
+.LCF0:
+       mflr    30
+       lwz     7, .LCL0-.LCF0(30)
+       add     30, 7, 30
+       lwz     11, .LC0-.LCTOC1(30)
+       rlwinm  3, 5, 31, 25, 31
+       lbzx    7, 11, 3
+
+AIX (always PIC)
+LC..0:
+       .tc __gmp_binvert_limb_table[TC],__gmp_binvert_limb_table[RW]
+       lwz     9, LC..0(2)
+       rlwinm  0, 5, 31, 25, 31
+       lbzx    0, 9, 0
+
+Darwin (non-PIC)
+       lis     r2, ha16(___gmp_binvert_limb_table)
+       rlwinm  r9, r5, 31, 25, 31
+       la      r2, lo16(___gmp_binvert_limb_table)(r2)
+       lbzx    r0, r2, r9
+Darwin (PIC)
+       mflr    r0
+       bcl     20, 31, L0001$pb
+L0001$pb:
+       mflr    r7
+       mtlr    r0
+       addis   r2, r7, ha16(L___gmp_binvert_limb_table$non_lazy_ptr-L0001$pb)
+       rlwinm  r9, r5, 31, 25, 31
+       lwz     r2, lo16(L___gmp_binvert_limb_table$non_lazy_ptr-L0001$pb)(r2)
+       lbzx    r0, r2, r9
+------
+       .non_lazy_symbol_pointer
+L___gmp_binvert_limb_table$non_lazy_ptr:
+       .indirect_symbol ___gmp_binvert_limb_table
+       .long   0
+       .subsections_via_symbols
+
+
+For GNU/Linux and Darwin, we might want to duplicate __gmp_binvert_limb_table
+into the text section in this file.  We should thus be able to reach it like
+this:
+
+       blr     L0
+L0:    mflr    r2
+       rlwinm  r9, r5, 31, 25, 31
+       addi    r9, r9, lo16(local_binvert_table-L0)
+       lbzx    r0, r2, r9
+
+
+
+REFERENCES
+
+PowerPC Microprocessor Family: The Programming Environments for 32-bit
+Microprocessors, IBM document G522-0290-01, 2000.
+
+PowerPC 604e RISC Microprocessor User's Manual with Supplement for PowerPC
+604 Microprocessor, IBM document G552-0330-00, Freescale document
+MPC604EUM/AD, 3/1998.
+
+MPC7410/MPC7400 RISC Microprocessor User's Manual, Freescale document
+MPC7400UM/D, rev 1, 11/2002.
+
+MPC7450 RISC Microprocessor Family Reference Manual, Freescale document
+MPC7450UM, rev 5, 1/2005.
+
+The above are available online from
+
+       http://www.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC
+       http://www.freescale.com/PowerPC
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/powerpc32/addlsh1_n.asm b/mpn/powerpc32/addlsh1_n.asm

new file mode 100644 (file)

index 0000000..db627a0
--- /dev/null
+++ b/mpn/powerpc32/addlsh1_n.asm
@@ -0,0 +1,89 @@
+dnl  PowerPC-32 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            4.0
+C 75x (G3):        5.0
+C 7400,7410 (G4):  5.0
+C 744x,745x (G4+): 5.0
+C power4/ppc970:   4.25
+C power5:          5.0
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_addlsh1_n)
+       mtctr   r6              C copy n in ctr
+       addic   r31, r31, 0     C clear cy
+
+       lwz     v0, 0(vp)       C load v limb
+       lwz     u0, 0(up)       C load u limb
+       addi    up, up, -4      C update up
+       addi    rp, rp, -4      C update rp
+       slwi    s1, v0, 1
+       bdz     L(end)          C If done, skip loop
+
+L(loop):
+       lwz     v1, 4(vp)       C load v limb
+       adde    s1, s1, u0      C add limbs with cy, set cy
+       srwi    s0, v0, 31      C shift down previous v limb
+       stw     s1, 4(rp)       C store result limb
+       lwzu    u0, 8(up)       C load u limb and update up
+       rlwimi  s0, v1, 1, 0,30 C left shift v limb and merge with prev v limb
+
+       bdz     L(exit)         C decrement ctr and exit if done
+
+       lwzu    v0, 8(vp)       C load v limb and update vp
+       adde    s0, s0, u0      C add limbs with cy, set cy
+       srwi    s1, v1, 31      C shift down previous v limb
+       stwu    s0, 8(rp)       C store result limb and update rp
+       lwz     u0, 4(up)       C load u limb
+       rlwimi  s1, v0, 1, 0,30 C left shift v limb and merge with prev v limb
+
+       bdnz    L(loop)         C decrement ctr and loop back
+
+L(end):        adde    r7, s1, u0
+       srwi    r4, v0, 31
+       stw     r7, 4(rp)       C store last result limb
+       addze   r3, r4
+       blr
+L(exit):
+       adde    r7, s0, u0
+       srwi    r4, v1, 31
+       stw     r7, 8(rp)       C store last result limb
+       addze   r3, r4
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/addmul_1.asm b/mpn/powerpc32/addmul_1.asm

new file mode 100644 (file)

index 0000000..6260691
--- /dev/null
+++ b/mpn/powerpc32/addmul_1.asm
@@ -0,0 +1,145 @@
+dnl  PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 1995, 1997, 1998, 2000, 2001, 2002, 2003, 2005 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            6.75
+C 75x (G3):        8.7-14.3
+C 7400,7410 (G4):  8.7-14.3
+C 744x,745x (G4+): 9.5
+C power4/ppc970:   6.25
+C power5:          6.25
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C vl   r6
+
+C This is optimized for the PPC604.  It has not been tuned for other
+C PowerPC processors.
+C
+C Loop Analysis for the 604:
+C 12 mem insn
+C 8 serializing insn
+C 8 int multiply
+C 25 int reg write
+C 9 int ops (8 of which serialize)
+C
+C The multiply insns need 16 cycles/4limb.
+C The integer register writes will need 13 cycles/4limb.
+C All-in-all, it should be possible to get to 4 or 5 cycles/limb on PPC604,
+C but that will require some clever FPNOPS and BNOPS for exact
+C issue control.
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       cmpwi   cr0,r5,9        C more than 9 limbs?
+       bgt     cr0,L(big)      C branch if more than 9 limbs
+
+       mtctr   r5
+       lwz     r0,0(r4)
+       mullw   r7,r0,r6
+       mulhwu  r10,r0,r6
+       lwz     r9,0(r3)
+       addc    r8,r7,r9
+       addi    r3,r3,-4
+       bdz     L(end)
+L(loop):
+       lwzu    r0,4(r4)
+       stwu    r8,4(r3)
+       mullw   r8,r0,r6
+       adde    r7,r8,r10
+       mulhwu  r10,r0,r6
+       lwz     r9,4(r3)
+       addze   r10,r10
+       addc    r8,r7,r9
+       bdnz    L(loop)
+L(end):        stw     r8,4(r3)
+       addze   r3,r10
+       blr
+
+L(big):        stmw    r30,-32(r1)
+       addi    r5,r5,-1
+       srwi    r0,r5,2
+       mtctr   r0
+
+       lwz     r7,0(r4)
+       mullw   r8,r7,r6
+       mulhwu  r0,r7,r6
+       lwz     r7,0(r3)
+       addc    r8,r8,r7
+       stw     r8,0(r3)
+
+L(loopU):
+       lwz     r7,4(r4)
+       lwz     r12,8(r4)
+       lwz     r30,12(r4)
+       lwzu    r31,16(r4)
+       mullw   r8,r7,r6
+       mullw   r9,r12,r6
+       mullw   r10,r30,r6
+       mullw   r11,r31,r6
+       adde    r8,r8,r0        C add cy_limb
+       mulhwu  r0,r7,r6
+       lwz     r7,4(r3)
+       adde    r9,r9,r0
+       mulhwu  r0,r12,r6
+       lwz     r12,8(r3)
+       adde    r10,r10,r0
+       mulhwu  r0,r30,r6
+       lwz     r30,12(r3)
+       adde    r11,r11,r0
+       mulhwu  r0,r31,r6
+       lwz     r31,16(r3)
+       addze   r0,r0           C new cy_limb
+       addc    r8,r8,r7
+       stw     r8,4(r3)
+       adde    r9,r9,r12
+       stw     r9,8(r3)
+       adde    r10,r10,r30
+       stw     r10,12(r3)
+       adde    r11,r11,r31
+       stwu    r11,16(r3)
+       bdnz    L(loopU)
+
+       andi.   r31,r5,3
+       mtctr   r31
+       beq     cr0,L(endx)
+
+L(loopE):
+       lwzu    r7,4(r4)
+       mullw   r8,r7,r6
+       adde    r8,r8,r0        C add cy_limb
+       mulhwu  r0,r7,r6
+       lwz     r7,4(r3)
+       addze   r0,r0           C new cy_limb
+       addc    r8,r8,r7
+       stwu    r8,4(r3)
+       bdnz    L(loopE)
+L(endx):
+       addze   r3,r0
+       lmw     r30,-32(r1)
+       blr
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/powerpc32/aix.m4 b/mpn/powerpc32/aix.m4

new file mode 100644 (file)

index 0000000..81199c7
--- /dev/null
+++ b/mpn/powerpc32/aix.m4
@@ -0,0 +1,71 @@
+divert(-1)
+dnl  m4 macros for AIX 32-bit assembly.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',
+`      .toc')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  Don't want ELF style .size in the epilogue.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .globl  $1
+       .globl  .$1
+       .csect  [DS], 2
+$1:
+       .long   .$1, TOC[tc0], 0
+       .csect  [PR]
+       .align  2
+.$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`')
+
+define(`TOC_ENTRY', `')
+
+define(`LEA',
+m4_assert_numargs(2)
+`define(`TOC_ENTRY',
+`      .toc
+tc$2:
+       .tc     $2[TC], $2')'
+`      lwz     $1, tc$2(2)')
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`      .globl  $1')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`      .csect  [RO], 3
+       ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1))
+
+define(`ASM_END', `TOC_ENTRY')
+
+divert
diff --git a/mpn/powerpc32/aors_n.asm b/mpn/powerpc32/aors_n.asm

new file mode 100644 (file)

index 0000000..f9e9b50
--- /dev/null
+++ b/mpn/powerpc32/aors_n.asm
@@ -0,0 +1,143 @@
+dnl  PowerPC-32 mpn_add_n and mpn_sub_n.
+
+dnl  Copyright 2002, 2005, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:              ?
+C 604e:              ?         old: 3.25
+C 75x (G3):          ?         old: 3.5
+C 7400,7410 (G4):    3.25
+C 744x,745x (G4+):   4
+C power4/ppc970:     ?         old: 2.0
+C power5:            ?         old: 2.5
+
+C INPUT PARAMETERS
+define(`rp',   `r3')
+define(`up',   `r4')
+define(`vp',   `r5')
+define(`n',    `r6')
+define(`cy',   `r7')
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBC,  adde)
+       define(func,    mpn_add_n)
+       define(func_nc, mpn_add_nc)
+       define(IFADD,   `$1')
+       define(IFSUB,   `')')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBC,  subfe)
+       define(func,    mpn_sub_n)
+       define(func_nc, mpn_sub_nc)
+       define(IFADD,   `')
+       define(IFSUB,   `$1')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+
+PROLOGUE(func_nc)
+IFADD(`        addic   r0, cy, -1')            C set carry from argument
+IFSUB(`        subfic  r0, cy, 0')             C set carry from argument
+       b       L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+IFADD(`        addic   r0, n, 0')              C clear carry
+IFSUB(`        addic   r0, n, -1')             C set carry
+L(ent):        andi.   r0, n, 3
+       addi    r3, r3, -12
+       addi    n, n, 1
+       cmpwi   cr7, r0, 2
+       srwi    r0, n, 2
+       sub     r4, r4, r3
+       sub     r5, r5, r3
+       mtctr   r0
+       bne     cr0, L(n00)
+
+       lwzx    r7, r4, r3              C n = 4, 8, 12, ...
+       lwzx    r8, r5, r3
+       addi    r3, r3, 4
+       lwzx    r9, r4, r3
+       ADCSBC  r7, r8, r7
+       lwzx    r10, r5, r3
+       addi    r3, r3, 4
+       b       L(00)
+
+L(n00):        bge     cr7, L(n01)
+       cmpwi   cr0, r0, 0              C n = 1, 5, 9, 13, ...
+       lwzx    r0, r4, r3
+       lwzx    r6, r5, r3
+       addi    r3, r3, 4
+       ADCSBC  r0, r6, r0
+       ble     L(ret)
+L(gt1):        lwzx    r7, r4, r3
+       lwzx    r8, r5, r3
+       addi    r3, r3, 4
+       b       L(01)
+
+L(n10):
+       lwzx    r9, r4, r3              C n = 3, 7, 11, 15, ...
+       lwzx    r10, r5, r3
+       addi    r3, r3, 4
+       lwzx    r11, r4, r3
+       ADCSBC  r9, r10, r9
+       lwzx    r12, r5, r3
+       addi    r3, r3, 4
+       b       L(11)
+
+L(n01):        bne     cr7, L(n10)
+       cmpwi   cr0, r0, 0              C n = 2, 6, 10, 14, ...
+       lwzx    r11, r4, r3
+       lwzx    r12, r5, r3
+       addi    r3, r3, 4
+       lwzx    r0, r4, r3
+       ADCSBC  r11, r12, r11
+       lwzx    r6, r5, r3
+       addi    r3, r3, 4
+       ble     cr0, L(end)
+
+
+L(lp): lwzx    r7, r4, r3
+       ADCSBC  r0, r6, r0
+       lwzx    r8, r5, r3
+       stwu    r11, 4(r3)
+L(01): lwzx    r9, r4, r3
+       ADCSBC  r7, r8, r7
+       lwzx    r10, r5, r3
+       stwu    r0, 4(r3)
+L(00): lwzx    r11, r4, r3
+       ADCSBC  r9, r10, r9
+       lwzx    r12, r5, r3
+       stwu    r7, 4(r3)
+L(11): lwzx    r0, r4, r3
+       ADCSBC  r11, r12, r11
+       lwzx    r6, r5, r3
+       stwu    r9, 4(r3)
+       bdnz    L(lp)
+
+L(end):        ADCSBC  r0, r6, r0
+       stw     r11, 4(r3)
+L(ret):        stw     r0, 8(r3)
+IFADD(`        li      r3, 0   ')
+IFADD(`        addze   r3, r3  ')
+IFSUB(`        subfe   r3, r0, r0')
+IFSUB(`        neg     r3, r3')
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/bdiv_dbm1c.asm b/mpn/powerpc32/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..41870fb
--- /dev/null
+++ b/mpn/powerpc32/bdiv_dbm1c.asm
@@ -0,0 +1,120 @@
+dnl  PPC32 mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            ?
+C 75x (G3):        ?
+C 7400,7410 (G4):  9.43
+C 744x,745x (G4+): 6.28
+C power4/ppc970:   ?
+C power5:          ?
+
+C TODO
+C  * Nothing to do...
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`bd', `r6')
+define(`cy', `r7')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+       lwz     r0, 0(r4)
+
+       rlwinm. r12, r5, 0,30,31
+       cmplwi  cr6, r12, 2
+       cmplwi  cr7, r5, 4
+       addi    r5, r5, 1
+       srwi    r5, r5, 2
+       mtctr   r5
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        mullw   r5, r0, r6
+       mulhwu  r12, r0, r6
+       lwz     r0, 4(r4)
+       addi    r4, r4, -12
+       addi    r3, r3, -12
+       b       L(3)
+
+L(b00):        mullw   r9, r0, r6
+       mulhwu  r8, r0, r6
+       lwz     r0, 4(r4)
+       addi    r4, r4, -8
+       addi    r3, r3, -8
+       b       L(0)
+
+L(b01):        mullw   r5, r0, r6
+       mulhwu  r12, r0, r6
+       addi    r3, r3, -4
+       ble     cr7, L(e1)
+       lwz     r0, 4(r4)
+       addi    r4, r4, -4
+       b       L(1)
+
+L(b10):        mullw   r9, r0, r6
+       mulhwu  r8, r0, r6
+       lwz     r0, 4(r4)
+       ble     cr7, L(e2)
+
+       ALIGN(16)
+L(top):        mullw   r5, r0, r6
+       mulhwu  r12, r0, r6
+       subfc   r11, r9, r7
+       lwz     r0, 8(r4)
+       subfe   r7, r8, r11
+       stw     r11, 0(r3)
+L(1):  mullw   r9, r0, r6
+       mulhwu  r8, r0, r6
+       subfc   r11, r5, r7
+       lwz     r0, 12(r4)
+       subfe   r7, r12, r11
+       stw     r11, 4(r3)
+L(0):  mullw   r5, r0, r6
+       mulhwu  r12, r0, r6
+       subfc   r11, r9, r7
+       lwz     r0, 16(r4)
+       subfe   r7, r8, r11
+       stw     r11, 8(r3)
+L(3):  mullw   r9, r0, r6
+       mulhwu  r8, r0, r6
+       subfc   r11, r5, r7
+       lwz     r0, 20(r4)
+       subfe   r7, r12, r11
+       stw     r11, 12(r3)
+       addi    r4, r4, 16
+       addi    r3, r3, 16
+       bdnz    L(top)
+
+L(e2): mullw   r5, r0, r6
+       mulhwu  r12, r0, r6
+       subfc   r11, r9, r7
+       subfe   r7, r8, r11
+       stw     r11, 0(r3)
+L(e1): subfc   r11, r5, r7
+       stw     r11, 4(r3)
+       subfe   r3, r12, r11
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/darwin.m4 b/mpn/powerpc32/darwin.m4

new file mode 100644 (file)

index 0000000..b76103a
--- /dev/null
+++ b/mpn/powerpc32/darwin.m4
@@ -0,0 +1,78 @@
+divert(-1)
+dnl  m4 macros for Mac OS 32-bit assembly.
+
+dnl  Copyright 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      .text
+       .globl  $1
+       .align  3
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1))
+
+
+dnl  LEA -- Load Effective Address.
+
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',
+`      mflr    r0                      C save return address
+       bcl     20, 31, 1f
+1:     mflr    $1
+       addis   $1, $1, ha16($2-1b)
+       la      $1, lo16($2-1b)($1)
+       mtlr    r0                      C restore return address
+',`
+       lis     $1, ha16($2)
+       la      $1, lo16($2)($1)
+')')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`LEA($1,$2)')
+
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`      .const
+       ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1))
+
+define(`ASM_END', `dnl')
+
+ifdef(`PIC',`
+define(`PIC_SLOW')')
+
+divert
diff --git a/mpn/powerpc32/diveby3.asm b/mpn/powerpc32/diveby3.asm

new file mode 100644 (file)

index 0000000..cf11a19
--- /dev/null
+++ b/mpn/powerpc32/diveby3.asm
@@ -0,0 +1,82 @@
+dnl  PowerPC-32 mpn_divexact_by3 -- mpn by 3 exact division
+
+dnl  Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:              ?
+C 604e:              5
+C 75x (G3):          ?
+C 7400,7410 (G4):    8
+C 744x,745x (G4+):   6
+C power4/ppc970:    12
+C power5:            ?
+
+C void mpn_divexact_by3 (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C We avoid the slow subfe instruction and instead rely on an extremely unlikely
+C branch.
+C
+C The mullw has the inverse in the first operand, since 0xAA..AB won't allow
+C any early-out.  The src[] data normally won't either, but there's at least
+C a chance, whereas 0xAA..AB never will.  If, for instance, src[] is all
+C zeros (not a sensible input of course) we run at 7.0 c/l on ppc750.
+C
+C The mulhwu has the "3" multiplier in the second operand, which lets 750 and
+C 7400 use an early-out.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`cy', `r6')
+
+ASM_START()
+PROLOGUE(mpn_divexact_by3c)
+       lwz     r11, 0(up)
+       mtctr   n
+       lis     r12, 0xAAAA
+       ori     r12, r12, 0xAAAB
+       li      r10, 3
+
+       cmplw   cr7, cy, r11
+       subf    r11, cy, r11
+
+       mullw   r0, r11, r12
+       stw     r0, 0(rp)
+       bdz     L(one)
+
+L(top):        lwzu    r9, 4(up)
+       mulhwu  r7, r0, r10
+       bgt-    cr7, L(adj)             C very unlikely branch
+L(bko):        cmplw   cr7, r7, r9
+       subf    r0, r7, r9
+       mullw   r0, r12, r0
+       stwu    r0, 4(rp)
+       bdnz    L(top)
+
+L(one):        mulhwu  r3, r0, r10
+       blelr+  cr7
+       addi    r3, r3, 1
+       blr
+
+L(adj):        addi    r7, r7, 1
+       b       L(bko)
+EPILOGUE()
+ASM_END()
diff --git a/mpn/powerpc32/divrem_2.asm b/mpn/powerpc32/divrem_2.asm

new file mode 100644 (file)

index 0000000..916219e
--- /dev/null
+++ b/mpn/powerpc32/divrem_2.asm
@@ -0,0 +1,171 @@
+dnl  PPC-32 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C              norm    frac
+C 7410         ~36.5   ~36.5
+C 744x, 745x    29      29
+
+C INPUT PARAMETERS
+C qp  = r3
+C fn  = r4
+C up  = r5
+C un  = r6
+C d   = r7
+
+C TODO
+C  * Decrease register usage.
+C  * Make sure mul operands and optimal for early-out.
+C  * Check that things work well for a shared library build.
+C  * Write an invert_limb, perhaps inline, perhaps as a private call.  Or at
+C    least vastly improve the current __udiv_qrnnd_c based code.
+
+
+ASM_START()
+PROLOGUE(mpn_divrem_2)
+       stwu    r1, -32(r1)
+       slwi    r0, r6, 2
+       add     r5, r5, r0
+       stmw    r28, 8(r1)
+       addi    r29, r5, -8             C up = up_param + un - 2
+       lwz     r10, 4(r7)
+       lwz     r12, 4(r29)
+       addi    r8, r3, -12
+       lwz     r7, 0(r7)
+       cmplw   cr7, r12, r10
+       lwz     r28, 0(r29)
+       blt-    cr7, L(2)
+       bgt+    cr7, L(4)
+       cmplw   cr7, r28, r7
+       blt-    cr7, L(2)
+L(4):  subfc   r28, r7, r28
+       subfe   r12, r10, r12
+       li      r3, 1
+       b       L(6)
+L(2):  li      r3, 0
+
+L(6):  add     r0, r4, r6
+       addic.  r30, r0, -2
+       ble-    cr0, L(ret)
+
+       slwi    r9, r0, 2
+       add     r8, r8, r9              C rp += un + fn
+       mtctr   r30
+
+C Compute di from d1
+       srwi    r11, r10, 16
+       nor     r0, r10, r10
+       divwu   r31, r0, r11
+       rlwinm  r5, r10, 0, 16, 31
+       mullw   r9, r11, r31
+       mullw   r6, r5, r31
+       subf    r0, r9, r0
+       slwi    r0, r0, 16
+       ori     r0, r0, 65535
+       cmplw   cr7, r0, r6
+       bge-    cr7, L(9)
+       add     r0, r0, r10
+       cmplw   cr7, r0, r10
+       cmplw   cr6, r6, r0
+       addi    r31, r31, -1            C q1--
+       crorc   28, 28, 25
+       bc+     12, 28, L(9)
+       addi    r31, r31, -1            C q1--
+       add     r0, r0, r10
+L(9):  subf    r0, r6, r0
+       divwu   r6, r0, r11
+       mullw   r9, r11, r6
+       mullw   r11, r5, r6
+       subf    r0, r9, r0
+       slwi    r0, r0, 16
+       ori     r0, r0, 65535
+       cmplw   cr7, r0, r11
+       bge-    cr7, L(13)
+       add     r0, r0, r10
+       cmplw   cr7, r0, r10
+       cmplw   cr6, r11, r0
+       addi    r6, r6, -1              C q0--
+       crorc   28, 28, 25
+       bc+     12, 28, L(13)
+C      add     r0, r0, r10             C final remainder
+       addi    r6, r6, -1              C q0--
+L(13): rlwimi  r6, r31, 16, 0, 15      C assemble final quotient
+
+C Adjust di by including d0
+       mullw   r9, r10, r6             C t0 = LO(di * d1)
+       addc    r11, r9, r7
+       subfe   r0, r1, r1
+       mulhwu  r9, r6, r7              C s1 = HI(di * d0)
+       addc    r9, r11, r9
+       addze.  r0, r0
+       blt     cr0, L(17)
+L(18): subfc   r9, r10, r9
+       addi    r6, r6, -1
+       addme.  r0, r0
+       bge+    cr0, L(18)
+L(17):
+
+C r0  r3  r4  r5  r6  r7  r8  r9 r10 r11 r12 r28 r29 r30 r31
+C     msl         di  d0  qp     d1          fn  up  un
+L(loop):
+       mullw   r0, r12, r6             C q0 = LO(n2 * di)
+       cmpw    cr7, r30, r4
+       addc    r31, r0, r28            C q0 += n1
+       mulhwu  r9, r12, r6             C q  = HI(n2 * di)
+       adde    r12, r9, r12            C q  += n2
+       addi    r30, r30, -1
+       mullw   r0, r10, r12            C d1 * q
+       li      r9, 0
+       subf    r0, r0, r28             C n1 -= d1 * q
+       addi    r5, r12, 1
+       ble-    cr7, L(23)
+       lwzu    r9, -4(r29)
+L(23): mullw   r11, r12, r7            C t0 = LO(d0 * q)
+       subfc   r28, r7, r9             C n0 -= d0
+       subfe   r0, r10, r0             C n1 -= d1
+       mulhwu  r12, r12, r7            C t1 = HI(d0 * q)
+       subfc   r28, r11, r28           C n0 -= t0
+       subfe   r12, r12, r0            C n1 -= t1
+       cmplw   cr7, r12, r31
+       blt+    cr7, L(24)
+       addc    r28, r28, r7
+       adde    r12, r12, r10
+       addi    r5, r5, -1
+L(24): cmplw   cr7, r12, r10
+       bge-    cr7, L(fix)
+L(bck):        stw     r5, 0(r8)
+       addi    r8, r8, -4
+       bdnz    L(loop)
+
+L(ret):        stw     r28, 0(r29)
+       stw     r12, 4(r29)
+       lmw     r28, 8(r1)
+       addi    r1, r1, 32
+       blr
+
+L(fix):        cmplw   cr6, r28, r7
+       bgt+    cr7, L(28)
+       blt-    cr6, L(bck)
+L(28): subfc   r28, r7, r28
+       subfe   r12, r10, r12
+       addi    r5, r5, 1
+       b       L(bck)
+EPILOGUE()
diff --git a/mpn/powerpc32/eabi.m4 b/mpn/powerpc32/eabi.m4

new file mode 100644 (file)

index 0000000..20f9a2f
--- /dev/null
+++ b/mpn/powerpc32/eabi.m4
@@ -0,0 +1,75 @@
+divert(-1)
+dnl  m4 macros for powerpc32 eABI assembly.
+
+dnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .section        ".text"
+       .align  3
+       .globl  $1
+       .type   $1, @function
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .size   $1, .-$1')
+
+dnl  This ought to support PIC, but it is unclear how that is done for eABI
+define(`LEA',
+m4_assert_numargs(2)
+`
+       lis     $1, $2@ha
+       la      $1, $2@l($1)
+')
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`
+       .section        .rodata
+       ALIGN(ifelse($#,1,2,$2))
+       .type   $1, @object
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`      .size   $1, .-$1')
+
+define(`ASM_END', `dnl')
+
+ifdef(`PIC',`
+define(`PIC_SLOW')')
+
+dnl  64-bit "long long" parameters are put in an even-odd pair, skipping an
+dnl  even register if that was in turn.  I wish somebody could explain why that
+dnl  is a good idea.
+define(`BROKEN_LONGLONG_PARAM')
+
+divert
diff --git a/mpn/powerpc32/elf.m4 b/mpn/powerpc32/elf.m4

new file mode 100644 (file)

index 0000000..ab1559e
--- /dev/null
+++ b/mpn/powerpc32/elf.m4
@@ -0,0 +1,85 @@
+divert(-1)
+dnl  m4 macros for powerpc32 GNU/Linux assembly.
+
+dnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .section        ".text"
+       .align  3
+       .globl  $1
+       .type   $1, @function
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .size   $1, .-$1')
+
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`
+       mflr    r0
+       bl      _GLOBAL_OFFSET_TABLE_@local-4
+       mflr    $1
+       mtlr    r0
+       lwz     $1, $2@got($1)
+',`
+       lis     $1, $2@ha
+       la      $1, $2@l($1)
+')')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`LEA($1,$2)')
+
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`
+       .section        .rodata
+       ALIGN(ifelse($#,1,2,$2))
+       .type   $1, @object
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`      .size   $1, .-$1')
+
+define(`ASM_END', `dnl')
+
+ifdef(`PIC',`
+define(`PIC_SLOW')')
+
+dnl  64-bit "long long" parameters are put in an even-odd pair, skipping an
+dnl  even register if that was in turn.  I wish somebody could explain why that
+dnl  is a good idea.
+define(`BROKEN_LONGLONG_PARAM')
+
+divert
diff --git a/mpn/powerpc32/gmp-mparam.h b/mpn/powerpc32/gmp-mparam.h

new file mode 100644 (file)

index 0000000..7502c51
--- /dev/null
+++ b/mpn/powerpc32/gmp-mparam.h
@@ -0,0 +1,191 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* This file is supposed to be used for 604, 604e, 744x/745x/747x (G4+), i.e.,
+   32-bit PowerPC processors with reasonably fast integer multiply insns.  The
+   values below are chosen to be best for the latter processors, since 604 is
+   largely irrelevant today.
+
+   In mpn/powerpc32/750/gmp-mparam.h there are values for 75x (G3) and for
+   7400/7410 (G4), both which have much slower multiply instructions.  */
+
+/* 1417 MHz PPC 7447A */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        36
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     37
+#define USE_PREINV_DIVREM_1                  1
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           69
+
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                73
+#define MUL_TOOM44_THRESHOLD               106
+#define MUL_TOOM6H_THRESHOLD               157
+#define MUL_TOOM8H_THRESHOLD               236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      71
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_TOOM3_THRESHOLD                 77
+#define SQR_TOOM4_THRESHOLD                130
+#define SQR_TOOM6_THRESHOLD                189
+#define SQR_TOOM8_THRESHOLD                284
+
+#define MULMOD_BNM1_THRESHOLD               10
+#define SQRMOD_BNM1_THRESHOLD               13
+
+#define MUL_FFT_MODF_THRESHOLD             284  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    284, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     20, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
+    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
+    {     39, 8}, {     23, 7}, {     47, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     71, 8}, \
+    {    143, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
+    {    135, 8}, {    271, 9}, {    143,10}, {     79, 9}, \
+    {    159, 8}, {    319, 9}, {    175,10}, {     95, 9}, \
+    {    191, 8}, {    383, 9}, {    207, 8}, {    415,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511, 9}, \
+    {    271,10}, {    143, 9}, {    287, 8}, {    575,10}, \
+    {    159, 9}, {    319,10}, {    175,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207, 9}, {    415, 8}, \
+    {    831,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543, 8}, {   1087,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351, 9}, {    703,11}, {    191,10}, \
+    {    415, 9}, {    831,11}, {    223,10}, {    447, 9}, \
+    {    895,10}, {    479, 9}, {    959,12}, {    127,11}, \
+    {    255,10}, {    543, 9}, {   1087,11}, {    287,10}, \
+    {    607,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703, 9}, {   1407,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,11}, {    447,10}, \
+    {    895,11}, {    479,10}, {    959,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    607,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    703,10}, \
+    {   1407,12}, {    383,11}, {    831,12}, {    447,11}, \
+    {    959,10}, {   1919,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
+    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    959,11}, {   1919,14}, {    255,13}, \
+    {    511,12}, {   1215,11}, {   2431,13}, {    639,12}, \
+    {   1471,13}, {    767,12}, {   1599,13}, {    895,12}, \
+    {   1919,14}, {    511,13}, {   1023,12}, {   2111,13}, \
+    {   1151,12}, {   2431,13}, {   1407,14}, {    767,13}, \
+    {   1535,12}, {   3071,13}, {   1919,12}, {   3839,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 165
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             248  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    248, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
+    {     33, 8}, {     19, 7}, {     39, 8}, {     23, 7}, \
+    {     47, 8}, {     27, 9}, {     15, 8}, {     39, 9}, \
+    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
+    {    143, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255, 7}, \
+    {    511, 9}, {    143,10}, {     79, 9}, {    159, 8}, \
+    {    319, 9}, {    175, 8}, {    351,10}, {     95, 9}, \
+    {    191, 8}, {    383, 9}, {    207, 8}, {    415,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
+    {    319,10}, {    175, 9}, {    351,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207, 9}, {    415, 8}, \
+    {    831,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    351, 9}, {    703, 8}, {   1407, 9}, {    735,11}, \
+    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    447, 9}, {    895,10}, {    479,12}, {    127,11}, \
+    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    383,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    575,10}, \
+    {   1151,12}, {    319,11}, {    703,10}, {   1407,12}, \
+    {    383,11}, {    831,12}, {    447,11}, {    959,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1279,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1215,11}, {   2431,13}, {    639,12}, \
+    {   1471,13}, {    767,12}, {   1599,13}, {    895,12}, \
+    {   1919,14}, {    511,13}, {   1023,12}, {   2111,13}, \
+    {   1151,12}, {   2431,13}, {   1407,12}, {   2815,14}, \
+    {    767,13}, {   1535,12}, {   3199,13}, {   1919,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 153
+#define SQR_FFT_THRESHOLD                 2688
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  45
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 43
+#define DC_DIVAPPR_Q_THRESHOLD             154
+#define DC_BDIV_QR_THRESHOLD                55
+#define DC_BDIV_Q_THRESHOLD                124
+
+#define INV_MULMOD_BNM1_THRESHOLD           42
+#define INV_NEWTON_THRESHOLD               179
+#define INV_APPR_THRESHOLD                 157
+
+#define BINV_NEWTON_THRESHOLD              232
+#define REDC_1_TO_REDC_N_THRESHOLD          54
+
+#define MU_DIV_QR_THRESHOLD               1057
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD               83
+#define MU_BDIV_QR_THRESHOLD               872
+#define MU_BDIV_Q_THRESHOLD               1142
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     122
+#define GCD_DC_THRESHOLD                   339
+#define GCDEXT_DC_THRESHOLD                278
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                17
+#define GET_STR_PRECOMPUTE_THRESHOLD        38
+#define SET_STR_DC_THRESHOLD               781
+#define SET_STR_PRECOMPUTE_THRESHOLD      1505
diff --git a/mpn/powerpc32/lshift.asm b/mpn/powerpc32/lshift.asm

new file mode 100644 (file)

index 0000000..e306173
--- /dev/null
+++ b/mpn/powerpc32/lshift.asm
@@ -0,0 +1,156 @@
+dnl  PowerPC-32 mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3.0
+C 75x (G3):        3.0
+C 7400,7410 (G4):  3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970:   2.5
+C power5:          2.5
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C cnt  r6
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       cmpwi   cr0, r5, 12     C more than 12 limbs?
+       slwi    r0, r5, 2
+       add     r4, r4, r0      C make r4 point at end of s1
+       add     r7, r3, r0      C make r7 point at end of res
+       bgt     L(BIG)          C branch if more than 12 limbs
+
+       mtctr   r5              C copy size into CTR
+       subfic  r8, r6, 32
+       lwzu    r11, -4(r4)     C load first s1 limb
+       srw     r3, r11, r8     C compute function return value
+       bdz     L(end1)
+
+L(oop):        lwzu    r10, -4(r4)
+       slw     r9, r11, r6
+       srw     r12, r10, r8
+       or      r9, r9, r12
+       stwu    r9, -4(r7)
+       bdz     L(end2)
+       lwzu    r11, -4(r4)
+       slw     r9, r10, r6
+       srw     r12, r11, r8
+       or      r9, r9, r12
+       stwu    r9, -4(r7)
+       bdnz    L(oop)
+
+L(end1):
+       slw     r0, r11, r6
+       stw     r0, -4(r7)
+       blr
+L(end2):
+       slw     r0, r10, r6
+       stw     r0, -4(r7)
+       blr
+
+L(BIG):
+       stmw    r24, -32(r1)    C save registers we are supposed to preserve
+       lwzu    r9, -4(r4)
+       subfic  r8, r6, 32
+       srw     r3, r9, r8      C compute function return value
+       slw     r0, r9, r6
+       addi    r5, r5, -1
+
+       andi.   r10, r5, 3      C count for spill loop
+       beq     L(e)
+       mtctr   r10
+       lwzu    r28, -4(r4)
+       bdz     L(xe0)
+
+L(loop0):
+       slw     r12, r28, r6
+       srw     r24, r28, r8
+       lwzu    r28, -4(r4)
+       or      r24, r0, r24
+       stwu    r24, -4(r7)
+       mr      r0, r12
+       bdnz    L(loop0)        C taken at most once!
+
+L(xe0):        slw     r12, r28, r6
+       srw     r24, r28, r8
+       or      r24, r0, r24
+       stwu    r24, -4(r7)
+       mr      r0, r12
+
+L(e):  srwi    r5, r5, 2       C count for unrolled loop
+       addi    r5, r5, -1
+       mtctr   r5
+       lwz     r28, -4(r4)
+       lwz     r29, -8(r4)
+       lwz     r30, -12(r4)
+       lwzu    r31, -16(r4)
+
+L(loopU):
+       slw     r9, r28, r6
+       srw     r24, r28, r8
+       lwz     r28, -4(r4)
+       slw     r10, r29, r6
+       srw     r25, r29, r8
+       lwz     r29, -8(r4)
+       slw     r11, r30, r6
+       srw     r26, r30, r8
+       lwz     r30, -12(r4)
+       slw     r12, r31, r6
+       srw     r27, r31, r8
+       lwzu    r31, -16(r4)
+       or      r24, r0, r24
+       stw     r24, -4(r7)
+       or      r25, r9, r25
+       stw     r25, -8(r7)
+       or      r26, r10, r26
+       stw     r26, -12(r7)
+       or      r27, r11, r27
+       stwu    r27, -16(r7)
+       mr      r0, r12
+       bdnz    L(loopU)
+
+       slw     r9, r28, r6
+       srw     r24, r28, r8
+       slw     r10, r29, r6
+       srw     r25, r29, r8
+       slw     r11, r30, r6
+       srw     r26, r30, r8
+       slw     r12, r31, r6
+       srw     r27, r31, r8
+       or      r24, r0, r24
+       stw     r24, -4(r7)
+       or      r25, r9, r25
+       stw     r25, -8(r7)
+       or      r26, r10, r26
+       stw     r26, -12(r7)
+       or      r27, r11, r27
+       stw     r27, -16(r7)
+
+       stw     r12, -20(r7)
+       lmw     r24, -32(r1)    C restore registers
+       blr
+EPILOGUE(mpn_lshift)
diff --git a/mpn/powerpc32/mod_34lsub1.asm b/mpn/powerpc32/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..fa0f013
--- /dev/null
+++ b/mpn/powerpc32/mod_34lsub1.asm
@@ -0,0 +1,134 @@
+dnl  PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
+
+dnl  Copyright 2002, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3
+C 75x (G3):        3
+C 7400,7410 (G4):  3
+C 744x,745x (G4+): 3
+C power4/ppc970:   2.5
+C power5:          2.5
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+C There seems no need to schedule the loads back, the code is still 3.0 c/l
+C on 750/7400 no matter where they're placed.
+C
+C Alternatives:
+C
+C Fetching half words would allow add instead for accumulating, instead of
+C adde and its serialization.  An outer loop would be required though, since
+C 2^16 halfwords can overflow.  lhz+add would be 2.0 c/l, but if there's
+C also a bdz or bdnz for each and a pointer update say every three limbs
+C then the total would be 2.67 c/l which isn't much faster than the current
+C simpler code.
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+
+       C r3    src
+       C r4    size
+
+       mtctr   r4
+       addic   r6, r3, 8               C &src[2], and clear CA
+
+       lwz     r3, 0(r3)               C acc0 = src[0]
+       bdz     L(done)
+
+       lwz     r4, -4(r6)              C acc1 = src[1]
+       bdz     L(two)
+
+       lwz     r5, 0(r6)               C acc2 = src[2]
+       lis     r7, 0                   C no carry if just three limbs
+
+       bdz     L(three)
+       lis     r7, 1                   C 0x10000 carry pos
+
+L(top):
+       C r3    acc0
+       C r4    acc1
+       C r5    acc2
+       C r6    src, incrementing
+       C r7    carry pos
+
+       lwz     r0, 4(r6)
+       adde    r3, r3, r0
+       bdz     L(end0)
+
+       lwz     r0, 8(r6)
+       adde    r4, r4, r0
+       bdz     L(end1)
+
+       lwzu    r0, 12(r6)
+       adde    r5, r5, r0
+       bdnz    L(top)
+
+
+       srwi    r7, r7, 8
+L(end0):
+       srwi    r7, r7, 8
+L(end1):
+       subfe   r0, r0, r0              C -1 if not CA
+
+       andc    r7, r7, r0              C final carry, 0x10000, 0x100, 1 or 0
+L(three):
+       rlwinm  r6, r3, 0,8,31          C acc0 low
+
+       add     r7, r7, r6
+       rlwinm  r6, r3, 8,24,31         C acc0 high
+
+       add     r7, r7, r6
+       rlwinm  r6, r4, 8,8,23          C acc1 low
+
+       add     r7, r7, r6
+       rlwinm  r6, r4, 16,16,31        C acc1 high
+
+       add     r7, r7, r6
+       rlwinm  r6, r5, 16,8,15         C acc2 low
+
+       add     r7, r7, r6
+       rlwinm  r6, r5, 24,8,31         C acc2 high
+
+       add     r3, r7, r6
+
+L(done):
+       blr
+
+L(two):
+       C r3    acc0
+       C r4    acc1
+
+       rlwinm  r5, r3, 8,24,31         C acc0 high
+       rlwinm  r3, r3, 0,8,31          C acc0 low
+
+       add     r3, r3, r5              C acc0 high + low
+       rlwinm  r5, r4, 16,16,31        C acc1 high
+
+       add     r3, r3, r5              C add acc1 high
+       rlwinm  r5, r4, 8,8,23          C acc1 low
+
+       add     r3, r3, r5              C add acc1 low
+
+       blr
+
+EPILOGUE()
diff --git a/mpn/powerpc32/mode1o.asm b/mpn/powerpc32/mode1o.asm

new file mode 100644 (file)

index 0000000..ba9a393
--- /dev/null
+++ b/mpn/powerpc32/mode1o.asm
@@ -0,0 +1,116 @@
+dnl  PowerPC-32 mpn_modexact_1_odd -- mpn by limb exact remainder.
+
+dnl  Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                cycles/limb
+C 603e:             ?
+C 604e:             6.0
+C 75x (G3):         6.0-13.0, depending on divisor
+C 7400,7410 (G4):   6.0-13.0, depending on divisor
+C 744x,745x (G4+):  8.0-10.0, depending on divisor
+C power4/ppc970:   12.0
+C power5:          12.0
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C For PIC, the inverse is established arithmetically since it measures about
+C 5 cycles faster than the nonsense needed to access binvert_limb_table in
+C SVR4 or Darwin style PIC.  AIX might be better, since it avoids bl/mflr to
+C get at the GOT/TOC/whatever.
+C
+C Using divwu for size==1 measured about 10 cycles slower on 604e, or about
+C 3-5 cycles faster on 750.  For now it doesn't seem worth bothering with.
+C
+C The loop allows an early-out on mullw for the inverse, and on mulhwu for
+C the divisor.  So the fastest is for instance divisor==1 (inverse==-1), and
+C the slowest is anything giving a full 32-bits in both, such as
+C divisor==0xDEADBEEF (inverse==0x904B300F).  These establish the stated
+C range above for 750 and 7400.
+
+
+ASM_START()
+
+EXTERN(binvert_limb_table)
+
+PROLOGUE(mpn_modexact_1_odd)
+       li      r6, 0
+
+PROLOGUE(mpn_modexact_1c_odd)
+
+       mtctr   r4                      C size
+
+ifdef(`PIC_SLOW',`
+C Load from our table with PIC is so slow on Linux and Darwin that we avoid it
+       rlwinm  r7, r5, 1,28,28         C (divisor << 1) & 8
+       rlwinm  r8, r5, 2,28,28         C (divisor << 2) & 8
+       xor     r7, r7, r8              C ((divisor << 1) ^ (divisor << 2)) & 8
+       rlwinm  r4, r5, 0,28,31         C divisor low 4 bits, speedup mullw
+       xor     r4, r4, r7              C inverse, 4 bits
+       mullw   r7, r4, r4              C i*i
+       slwi    r4, r4, 1               C 2*i
+       rlwinm  r8, r5, 0,24,31         C divisor low 8 bits, speedup mullw
+       mullw   r7, r7, r8              C i*i*d
+       sub     r4, r4, r7              C inverse, 8 bits
+',`
+       LEA(    r7, binvert_limb_table)
+       rlwinm  r4, r5, 31,25,31        C (divisor/2) & 0x7F
+       lbzx    r4, r4,r7               C inverse, 8 bits
+')
+
+       mullw   r7, r4, r4              C i*i
+       slwi    r4, r4, 1               C 2*i
+       mullw   r7, r5, r7              C i*i*d   [i*i is 16 bits, so second operand]
+       sub     r4, r4, r7              C inverse, 16 bits
+       mullw   r7, r4, r4              C i*i
+       slwi    r4, r4, 1               C 2*i
+       mullw   r7, r7, r5              C i*i*d
+       lwz     r0, 0(r3)               C src[0]
+       sub     r4, r4, r7              C inverse, 32 bits
+       subfc   r7, r6, r0              C l = src[0] - carry
+
+       mullw   r7, r7, r4              C q = l * inverse
+       bdz     L(one)
+
+       lwzu    r0, 4(r3)               C src[1]
+       mulhwu  r6, r7, r5              C carry = high(q*divisor)
+       subfe   r7, r6, r0              C l = src[1] - carry
+       bdz     L(two)
+
+L(top):
+       mullw   r7, r7, r4              C q = l * inverse
+       lwzu    r0, 4(r3)               C src[i]
+       mulhwu  r6, r7, r5              C carry = high(q*divisor)
+       subfe   r7, r6, r0              C l = src[i] - carry
+       bdnz    L(top)
+
+L(two):        mullw   r7, r7, r4              C q = l * inverse
+L(one):        subfe   r3, r3, r3              C ca 0 or -1
+       mulhwu  r6, r7, r5              C carry = high(q*divisor)
+       subf    r3, r3, r6              C carry + ca
+       blr
+
+EPILOGUE(mpn_modexact_1c_odd)
+EPILOGUE(mpn_modexact_1_odd)
+ASM_END()
diff --git a/mpn/powerpc32/mul_1.asm b/mpn/powerpc32/mul_1.asm

new file mode 100644 (file)

index 0000000..e6f44e2
--- /dev/null
+++ b/mpn/powerpc32/mul_1.asm
@@ -0,0 +1,90 @@
+dnl  PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 1995, 1997, 2000, 2002, 2003, 2005 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            4.0
+C 75x (G3):        4.5-11
+C 7400,7410 (G4):  4.5-11
+C 744x,745x (G4+): 6.0
+C power4/ppc970:   6.0
+C power5:          5.63
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C vl   r6
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       mtctr   r5
+       addi    r3,r3,-4        C adjust res_ptr, it's offset before it's used
+       li      r12,0           C clear upper product reg
+       addic   r0,r0,0         C clear cy
+C Start software pipeline
+       lwz     r8,0(r4)
+       bdz     L(end3)
+       lwzu    r9,4(r4)
+       mullw   r11,r8,r6
+       mulhwu  r0,r8,r6
+       bdz     L(end1)
+C Software pipelined main loop
+L(loop):
+       lwz     r8,4(r4)
+       mullw   r10,r9,r6
+       adde    r5,r11,r12
+       mulhwu  r12,r9,r6
+       stw     r5,4(r3)
+       bdz     L(end2)
+       lwzu    r9,8(r4)
+       mullw   r11,r8,r6
+       adde    r7,r10,r0
+       mulhwu  r0,r8,r6
+       stwu    r7,8(r3)
+       bdnz    L(loop)
+C Finish software pipeline
+L(end1):
+       mullw   r10,r9,r6
+       adde    r5,r11,r12
+       mulhwu  r12,r9,r6
+       stw     r5,4(r3)
+       adde    r7,r10,r0
+       stwu    r7,8(r3)
+       addze   r3,r12
+       blr
+L(end2):
+       mullw   r11,r8,r6
+       adde    r7,r10,r0
+       mulhwu  r0,r8,r6
+       stwu    r7,8(r3)
+       adde    r5,r11,r12
+       stw     r5,4(r3)
+       addze   r3,r0
+       blr
+L(end3):
+       mullw   r11,r8,r6
+       stw     r11,4(r3)
+       mulhwu  r3,r8,r6
+       blr
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/powerpc32/powerpc-defs.m4 b/mpn/powerpc32/powerpc-defs.m4

new file mode 100644 (file)

index 0000000..33cf97e
--- /dev/null
+++ b/mpn/powerpc32/powerpc-defs.m4
@@ -0,0 +1,93 @@
+divert(-1)
+
+dnl  m4 macros for PowerPC assembler (32 and 64 bit).
+
+dnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  This is the same as the default in mpn/asm-defs.m4, but with ALIGN(4)
+dnl  not 8.
+dnl
+dnl  4-byte alignment is normally enough, certainly it's what gcc gives.  We
+dnl  don't want bigger alignment within PROLOGUE since it can introduce
+dnl  padding into multiple-entrypoint routines, and with gas such padding is
+dnl  zero words, which are not valid instructions.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      TEXT
+       ALIGN(4)
+       GLOBL   `$1' GLOBL_ATTR
+       TYPE(`$1',`function')
+`$1'LABEL_SUFFIX')
+
+
+dnl  Usage: r0 ... r31, cr0 ... cr7
+dnl
+dnl  Registers names, either left as "r0" etc or mapped to plain 0 etc,
+dnl  according to the result of the GMP_ASM_POWERPC_REGISTERS configure
+dnl  test.
+
+ifelse(WANT_R_REGISTERS,no,`
+forloop(i,0,31,`deflit(`r'i,i)')
+forloop(i,0,31,`deflit(`v'i,i)')
+forloop(i,0,31,`deflit(`f'i,i)')
+forloop(i,0,7, `deflit(`cr'i,i)')
+')
+
+
+dnl  Usage: ASSERT(cond,instructions)
+dnl
+dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl  flags condition to then be satisfied.  For example,
+dnl
+dnl         ASSERT(eq, `cmpwi r6, 123')
+dnl
+dnl  The instructions can be omitted to just assert a flags condition with
+dnl  no extra calculation.  For example,
+dnl
+dnl         ASSERT(ne)
+dnl
+dnl  The condition can be omitted to just output the given instructions when
+dnl  assertion checking is wanted.  For example,
+dnl
+dnl         ASSERT(, `mr r11, r0')
+dnl
+dnl  Using a zero word for an illegal instruction is probably not ideal,
+dnl  since it marks the beginning of a traceback table in the 64-bit ABI.
+dnl  But assertions are only for development, so it doesn't matter too much.
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+       `C ASSERT
+       $2
+ifelse(`$1',,,
+`      b$1     L(ASSERT_ok`'ASSERT_counter)
+       W32     0       C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+define(`ASSERT_counter',incr(ASSERT_counter))
+')')')
+
+define(ASSERT_counter,1)
+
+
+divert
diff --git a/mpn/powerpc32/rshift.asm b/mpn/powerpc32/rshift.asm

new file mode 100644 (file)

index 0000000..b069a93
--- /dev/null
+++ b/mpn/powerpc32/rshift.asm
@@ -0,0 +1,154 @@
+dnl  PowerPC-32 mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3.0
+C 75x (G3):        3.0
+C 7400,7410 (G4):  3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970:   2.5
+C power5:          2.5
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C cnt  r6
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       cmpwi   cr0, r5, 12     C more than 12 limbs?
+       addi    r7, r3, -4      C dst-4
+       bgt     L(BIG)          C branch if more than 12 limbs
+
+       mtctr   r5              C copy size into CTR
+       subfic  r8, r6, 32
+       lwz     r11, 0(r4)      C load first s1 limb
+       slw     r3, r11, r8     C compute function return value
+       bdz     L(end1)
+
+L(oop):        lwzu    r10, 4(r4)
+       srw     r9, r11, r6
+       slw     r12, r10, r8
+       or      r9, r9, r12
+       stwu    r9, 4(r7)
+       bdz     L(end2)
+       lwzu    r11, 4(r4)
+       srw     r9, r10, r6
+       slw     r12, r11, r8
+       or      r9, r9, r12
+       stwu    r9, 4(r7)
+       bdnz    L(oop)
+
+L(end1):
+       srw     r0, r11, r6
+       stw     r0, 4(r7)
+       blr
+L(end2):
+       srw     r0, r10, r6
+       stw     r0, 4(r7)
+       blr
+
+L(BIG):
+       stmw    r24, -32(r1)    C save registers we are supposed to preserve
+       lwz     r9, 0(r4)
+       subfic  r8, r6, 32
+       slw     r3, r9, r8      C compute function return value
+       srw     r0, r9, r6
+       addi    r5, r5, -1
+
+       andi.   r10, r5, 3      C count for spill loop
+       beq     L(e)
+       mtctr   r10
+       lwzu    r28, 4(r4)
+       bdz     L(xe0)
+
+L(loop0):
+       srw     r12, r28, r6
+       slw     r24, r28, r8
+       lwzu    r28, 4(r4)
+       or      r24, r0, r24
+       stwu    r24, 4(r7)
+       mr      r0, r12
+       bdnz    L(loop0)        C taken at most once!
+
+L(xe0):        srw     r12, r28, r6
+       slw     r24, r28, r8
+       or      r24, r0, r24
+       stwu    r24, 4(r7)
+       mr      r0, r12
+
+L(e):  srwi    r5, r5, 2       C count for unrolled loop
+       addi    r5, r5, -1
+       mtctr   r5
+       lwz     r28, 4(r4)
+       lwz     r29, 8(r4)
+       lwz     r30, 12(r4)
+       lwzu    r31, 16(r4)
+
+L(loopU):
+       srw     r9, r28, r6
+       slw     r24, r28, r8
+       lwz     r28, 4(r4)
+       srw     r10, r29, r6
+       slw     r25, r29, r8
+       lwz     r29, 8(r4)
+       srw     r11, r30, r6
+       slw     r26, r30, r8
+       lwz     r30, 12(r4)
+       srw     r12, r31, r6
+       slw     r27, r31, r8
+       lwzu    r31, 16(r4)
+       or      r24, r0, r24
+       stw     r24, 4(r7)
+       or      r25, r9, r25
+       stw     r25, 8(r7)
+       or      r26, r10, r26
+       stw     r26, 12(r7)
+       or      r27, r11, r27
+       stwu    r27, 16(r7)
+       mr      r0, r12
+       bdnz    L(loopU)
+
+       srw     r9, r28, r6
+       slw     r24, r28, r8
+       srw     r10, r29, r6
+       slw     r25, r29, r8
+       srw     r11, r30, r6
+       slw     r26, r30, r8
+       srw     r12, r31, r6
+       slw     r27, r31, r8
+       or      r24, r0, r24
+       stw     r24, 4(r7)
+       or      r25, r9, r25
+       stw     r25, 8(r7)
+       or      r26, r10, r26
+       stw     r26, 12(r7)
+       or      r27, r11, r27
+       stw     r27, 16(r7)
+
+       stw     r12, 20(r7)
+       lmw     r24, -32(r1)    C restore registers
+       blr
+EPILOGUE(mpn_rshift)
diff --git a/mpn/powerpc32/sqr_diagonal.asm b/mpn/powerpc32/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..d315349
--- /dev/null
+++ b/mpn/powerpc32/sqr_diagonal.asm
@@ -0,0 +1,103 @@
+dnl  PowerPC-32 mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:             ?
+C 604e:             4.0
+C 75x (G3):        10.5
+C 7400,7410 (G4):  10.5
+C 744x,745x (G4+):  4.0
+C power4/ppc970:    8.6
+C power5:           7.0
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       lwz     r6,0(r4)
+       mtctr   r5
+
+       addi    r3,r3,-4
+       bdz     L(end1)
+
+       lwzu    r7,4(r4)
+       mullw   r9,r6,r6
+       mulhwu  r11,r6,r6
+       bdz     L(end2)
+
+       lwzu    r6,4(r4)
+       mullw   r8,r7,r7
+       mulhwu  r10,r7,r7
+       bdz     L(ende)
+
+L(loop):
+       lwzu    r7,4(r4)
+       stw     r9,4(r3)
+       mullw   r9,r6,r6
+       stwu    r11,8(r3)
+       mulhwu  r11,r6,r6
+       bdz     L(endo)
+       lwzu    r6,4(r4)
+       stw     r8,4(r3)
+       mullw   r8,r7,r7
+       stwu    r10,8(r3)
+       mulhwu  r10,r7,r7
+       bdnz    L(loop)
+
+L(ende):
+       stw     r9,4(r3)
+       mullw   r9,r6,r6
+       stw     r11,8(r3)
+       mulhwu  r11,r6,r6
+       stw     r8,12(r3)
+       stw     r10,16(r3)
+       stw     r9,20(r3)
+       stw     r11,24(r3)
+       blr
+L(endo):
+       stw     r8,4(r3)
+       mullw   r8,r7,r7
+       stw     r10,8(r3)
+       mulhwu  r10,r7,r7
+       stw     r9,12(r3)
+       stw     r11,16(r3)
+       stw     r8,20(r3)
+       stw     r10,24(r3)
+       blr
+
+L(end2):
+       mullw   r8,r7,r7
+       stw     r9,4(r3)
+       mulhwu  r10,r7,r7
+       stw     r11,8(r3)
+       stw     r8,12(r3)
+       stw     r10,16(r3)
+       blr
+L(end1):
+       mullw   r9,r6,r6
+       mulhwu  r11,r6,r6
+       stw     r9,4(r3)
+       stw     r11,8(r3)
+       blr
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/powerpc32/sublsh1_n.asm b/mpn/powerpc32/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..c8711d0
--- /dev/null
+++ b/mpn/powerpc32/sublsh1_n.asm
@@ -0,0 +1,90 @@
+dnl  PowerPC-32 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            4.0
+C 75x (G3):        5.0
+C 7400,7410 (G4):  5.0
+C 744x,745x (G4+): 5.0
+C power4/ppc970:   4.25
+C power5:          5.0
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_sublsh1_n)
+       mtctr   r6              C copy n in ctr
+
+       lwz     v0, 0(vp)       C load v limb
+       lwz     u0, 0(up)       C load u limb
+       addic   up, up, -4      C update up; set cy
+       addi    rp, rp, -4      C update rp
+       slwi    s1, v0, 1
+       bdz     L(end)          C If done, skip loop
+
+L(loop):
+       lwz     v1, 4(vp)       C load v limb
+       subfe   s1, s1, u0      C add limbs with cy, set cy
+       srwi    s0, v0, 31      C shift down previous v limb
+       stw     s1, 4(rp)       C store result limb
+       lwzu    u0, 8(up)       C load u limb and update up
+       rlwimi  s0, v1, 1, 0,30 C left shift v limb and merge with prev v limb
+
+       bdz     L(exit)         C decrement ctr and exit if done
+
+       lwzu    v0, 8(vp)       C load v limb and update vp
+       subfe   s0, s0, u0      C add limbs with cy, set cy
+       srwi    s1, v1, 31      C shift down previous v limb
+       stwu    s0, 8(rp)       C store result limb and update rp
+       lwz     u0, 4(up)       C load u limb
+       rlwimi  s1, v0, 1, 0,30 C left shift v limb and merge with prev v limb
+
+       bdnz    L(loop)         C decrement ctr and loop back
+
+L(end):        subfe   r7, s1, u0
+       srwi    r4, v0, 31
+       stw     r7, 4(rp)       C store last result limb
+       subfze  r3, r4
+       neg     r3, r3
+       blr
+L(exit):
+       subfe   r7, s0, u0
+       srwi    r4, v1, 31
+       stw     r7, 8(rp)       C store last result limb
+       subfze  r3, r4
+       neg     r3, r3
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/submul_1.asm b/mpn/powerpc32/submul_1.asm

new file mode 100644 (file)

index 0000000..ae40bb4
--- /dev/null
+++ b/mpn/powerpc32/submul_1.asm
@@ -0,0 +1,136 @@
+dnl  PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1995, 1997, 1998, 2000, 2002, 2005 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            7.5
+C 75x (G3):        9.3-15
+C 7400,7410 (G4):  9.3-15
+C 744x,745x (G4+): 10.5
+C power4/ppc970:   6.75
+C power5:          6.5
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C vl   r6
+
+C This is optimized for the PPC604.  See addmul_1.asm for additional comments.
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       cmpwi   cr0,r5,9        C more than 9 limbs?
+       bgt     cr0,L(big)      C branch if more than 9 limbs
+
+       mtctr   r5
+       lwz     r0,0(r4)
+       mullw   r7,r0,r6
+       mulhwu  r10,r0,r6
+       lwz     r9,0(r3)
+       subfc   r8,r7,r9
+       addc    r7,r7,r8        C invert cy (r7 is junk)
+       addi    r3,r3,-4
+       bdz     L(end)
+L(loop):
+       lwzu    r0,4(r4)
+       stwu    r8,4(r3)
+       mullw   r8,r0,r6
+       adde    r7,r8,r10
+       mulhwu  r10,r0,r6
+       lwz     r9,4(r3)
+       addze   r10,r10
+       subfc   r8,r7,r9
+       addc    r7,r7,r8        C invert cy (r7 is junk)
+       bdnz    L(loop)
+L(end):        stw     r8,4(r3)
+       addze   r3,r10
+       blr
+
+L(big):        stmw    r30,-32(r1)
+       addi    r5,r5,-1
+       srwi    r0,r5,2
+       mtctr   r0
+
+       lwz     r7,0(r4)
+       mullw   r8,r7,r6
+       mulhwu  r0,r7,r6
+       lwz     r7,0(r3)
+       subfc   r7,r8,r7
+       addc    r8,r8,r7
+       stw     r7,0(r3)
+
+L(loopU):
+       lwz     r7,4(r4)
+       lwz     r12,8(r4)
+       lwz     r30,12(r4)
+       lwzu    r31,16(r4)
+       mullw   r8,r7,r6
+       mullw   r9,r12,r6
+       mullw   r10,r30,r6
+       mullw   r11,r31,r6
+       adde    r8,r8,r0        C add cy_limb
+       mulhwu  r0,r7,r6
+       lwz     r7,4(r3)
+       adde    r9,r9,r0
+       mulhwu  r0,r12,r6
+       lwz     r12,8(r3)
+       adde    r10,r10,r0
+       mulhwu  r0,r30,r6
+       lwz     r30,12(r3)
+       adde    r11,r11,r0
+       mulhwu  r0,r31,r6
+       lwz     r31,16(r3)
+       addze   r0,r0           C new cy_limb
+       subfc   r7,r8,r7
+       stw     r7,4(r3)
+       subfe   r12,r9,r12
+       stw     r12,8(r3)
+       subfe   r30,r10,r30
+       stw     r30,12(r3)
+       subfe   r31,r11,r31
+       stwu    r31,16(r3)
+       subfe   r11,r11,r11     C invert ...
+       addic   r11,r11,1       C ... carry
+       bdnz    L(loopU)
+
+       andi.   r31,r5,3
+       mtctr   r31
+       beq     cr0,L(endx)
+
+L(loopE):
+       lwzu    r7,4(r4)
+       mullw   r8,r7,r6
+       adde    r8,r8,r0        C add cy_limb
+       mulhwu  r0,r7,r6
+       lwz     r7,4(r3)
+       addze   r0,r0           C new cy_limb
+       subfc   r7,r8,r7
+       addc    r8,r8,r7
+       stwu    r7,4(r3)
+       bdnz    L(loopE)
+L(endx):
+       addze   r3,r0
+       lmw     r30,-32(r1)
+       blr
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/powerpc32/umul.asm b/mpn/powerpc32/umul.asm

new file mode 100644 (file)

index 0000000..400f009
--- /dev/null
+++ b/mpn/powerpc32/umul.asm
@@ -0,0 +1,39 @@
+dnl PowerPC-32 umul_ppmm -- support for longlong.h
+
+dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+dnl General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+
+       C r3    lowptr
+       C r4    m1
+       C r5    m2
+
+       mullw   r0, r4, r5
+       mulhwu  r9, r4, r5
+       stw     r0, 0(r3)
+       mr      r3, r9
+       blr
+
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/powerpc32/vmx/copyd.asm b/mpn/powerpc32/vmx/copyd.asm

new file mode 100644 (file)

index 0000000..e56f21c
--- /dev/null
+++ b/mpn/powerpc32/vmx/copyd.asm
@@ -0,0 +1,192 @@
+dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_copyd.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                16-byte coaligned      unaligned
+C                   cycles/limb        cycles/limb
+C 7400,7410 (G4):       0.5                0.64
+C 744x,745x (G4+):      0.75               0.82
+C 970 (G5):             0.78               1.02                (64-bit limbs)
+
+C STATUS
+C  * Works for all sizes and alignments.
+
+C TODO
+C  * Optimize unaligned case.  Some basic tests with 2-way and 4-way unrolling
+C    indicate that we can reach 0.56 c/l for 7400, 0.75 c/l for 745x, and 0.80
+C    c/l for 970.
+C  * Consider using VMX instructions also for head and tail, by using some
+C    read-modify-write tricks.
+C  * The VMX code is used from the smallest sizes it handles, but measurements
+C    show a large speed bump at the cutoff points.  Small copying (perhaps
+C    using some read-modify-write technique) should be optimized.
+C  * Make a mpn_com based on this code.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+
+ifelse(GMP_LIMB_BITS,32,`
+       define(`LIMB32',`       $1')
+       define(`LIMB64',`')
+',`
+       define(`LIMB32',`')
+       define(`LIMB64',`       $1')
+')
+
+C INPUT PARAMETERS
+define(`rp',   `r3')
+define(`up',   `r4')
+define(`n',    `r5')
+
+define(`us',   `v4')
+
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+
+LIMB32(`slwi.  r0, n, 2        ')
+LIMB64(`sldi.  r0, n, 3        ')
+       add     rp, rp, r0
+       add     up, up, r0
+
+LIMB32(`cmpi   cr7, n, 11      ')
+LIMB64(`cmpdi  cr7, n, 5       ')
+       bge     cr7, L(big)
+
+       beqlr   cr0
+
+C Handle small cases with plain operations
+       mtctr   n
+L(topS):
+LIMB32(`lwz    r0, -4(up)      ')
+LIMB64(`ld     r0, -8(up)      ')
+       addi    up, up, -GMP_LIMB_BYTES
+LIMB32(`stw    r0, -4(rp)      ')
+LIMB64(`std    r0, -8(rp)      ')
+       addi    rp, rp, -GMP_LIMB_BYTES
+       bdnz    L(topS)
+       blr
+
+C Handle large cases with VMX operations
+L(big):
+       addi    rp, rp, -16
+       addi    up, up, -16
+       mfspr   r12, 256
+       oris    r0, r12, 0xf800         C Set VRSAVE bit 0-4
+       mtspr   256, r0
+
+LIMB32(`rlwinm.        r7, rp, 30,30,31')      C (rp >> 2) mod 4
+LIMB64(`rlwinm.        r7, rp, 29,31,31')      C (rp >> 3) mod 2
+       beq     L(rp_aligned)
+
+       subf    n, r7, n
+L(top0):
+LIMB32(`lwz    r0, 12(up)      ')
+LIMB64(`ld     r0, 8(up)       ')
+       addi    up, up, -GMP_LIMB_BYTES
+LIMB32(`addic. r7, r7, -1      ')
+LIMB32(`stw    r0, 12(rp)      ')
+LIMB64(`std    r0, 8(rp)       ')
+       addi    rp, rp, -GMP_LIMB_BYTES
+LIMB32(`bne    L(top0)         ')
+
+L(rp_aligned):
+
+LIMB32(`rlwinm.        r0, up, 30,30,31')      C (up >> 2) mod 4
+LIMB64(`rlwinm.        r0, up, 29,31,31')      C (up >> 3) mod 2
+
+LIMB64(`srdi   r7, n, 2        ')      C loop count corresponding to n
+LIMB32(`srwi   r7, n, 3        ')      C loop count corresponding to n
+       mtctr   r7                      C copy n to count register
+
+       li      r10, -16
+
+       beq     L(up_aligned)
+
+       lvsl    us, 0, up
+
+       addi    up, up, 16
+LIMB32(`andi.  r0, n, 0x4      ')
+LIMB64(`andi.  r0, n, 0x2      ')
+       beq     L(1)
+       lvx     v0, 0, up
+       lvx     v2, r10, up
+       vperm   v3, v2, v0, us
+       stvx    v3, 0, rp
+       addi    up, up, -32
+       addi    rp, rp, -16
+       b       L(lpu)
+L(1):  lvx     v2, 0, up
+       addi    up, up, -16
+       b       L(lpu)
+
+       ALIGN(32)
+L(lpu):        lvx     v0, 0, up
+       vperm   v3, v0, v2, us
+       stvx    v3, 0, rp
+       lvx     v2, r10, up
+       addi    up, up, -32
+       vperm   v3, v2, v0, us
+       stvx    v3, r10, rp
+       addi    rp, rp, -32
+       bdnz    L(lpu)
+
+       b       L(tail)
+
+L(up_aligned):
+
+LIMB32(`andi.  r0, n, 0x4      ')
+LIMB64(`andi.  r0, n, 0x2      ')
+       beq     L(lpa)
+       lvx     v0, 0,   up
+       stvx    v0, 0,   rp
+       addi    up, up, -16
+       addi    rp, rp, -16
+       b       L(lpa)
+
+       ALIGN(32)
+L(lpa):        lvx     v0, 0,   up
+       lvx     v1, r10, up
+       addi    up, up, -32
+       nop
+       stvx    v0, 0,   rp
+       stvx    v1, r10, rp
+       addi    rp, rp, -32
+       bdnz    L(lpa)
+
+L(tail):
+LIMB32(`rlwinm.        r7, n, 0,30,31  ')      C r7 = n mod 4
+LIMB64(`rlwinm.        r7, n, 0,31,31  ')      C r7 = n mod 2
+       beq     L(ret)
+LIMB32(`li     r10, 12         ')
+L(top2):
+LIMB32(`lwzx   r0, r10, up     ')
+LIMB64(`ld     r0, 8(up)       ')
+LIMB32(`addic. r7, r7, -1      ')
+LIMB32(`stwx   r0, r10, rp     ')
+LIMB64(`std    r0, 8(rp)       ')
+LIMB32(`addi   r10, r10, -GMP_LIMB_BYTES')
+LIMB32(`bne    L(top2)         ')
+
+L(ret):        mtspr   256, r12
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/vmx/copyi.asm b/mpn/powerpc32/vmx/copyi.asm

new file mode 100644 (file)

index 0000000..6c8303a
--- /dev/null
+++ b/mpn/powerpc32/vmx/copyi.asm
@@ -0,0 +1,187 @@
+dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_copyi.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                16-byte coaligned      unaligned
+C                   cycles/limb        cycles/limb
+C 7400,7410 (G4):       0.5                0.64
+C 744x,745x (G4+):      0.75               0.82
+C 970 (G5):             0.78               1.02                (64-bit limbs)
+
+C STATUS
+C  * Works for all sizes and alignments.
+
+C TODO
+C  * Optimize unaligned case.  Some basic tests with 2-way and 4-way unrolling
+C    indicate that we can reach 0.56 c/l for 7400, 0.75 c/l for 745x, and 0.80
+C    c/l for 970.
+C  * Consider using VMX instructions also for head and tail, by using some
+C    read-modify-write tricks.
+C  * The VMX code is used from the smallest sizes it handles, but measurements
+C    show a large speed bump at the cutoff points.  Small copying (perhaps
+C    using some read-modify-write technique) should be optimized.
+C  * Make a mpn_com based on this code.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+
+ifelse(GMP_LIMB_BITS,32,`
+       define(`LIMB32',`       $1')
+       define(`LIMB64',`')
+',`
+       define(`LIMB32',`')
+       define(`LIMB64',`       $1')
+')
+
+C INPUT PARAMETERS
+define(`rp',   `r3')
+define(`up',   `r4')
+define(`n',    `r5')
+
+define(`us',   `v4')
+
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+
+LIMB32(`cmpi   cr7, n, 11      ')
+LIMB64(`cmpdi  cr7, n, 5       ')
+       bge     cr7, L(big)
+
+       or.     r0, n, n
+       beqlr   cr0
+
+C Handle small cases with plain operations
+       mtctr   n
+L(topS):
+LIMB32(`lwz    r0, 0(up)       ')
+LIMB64(`ld     r0, 0(up)       ')
+       addi    up, up, GMP_LIMB_BYTES
+LIMB32(`stw    r0, 0(rp)       ')
+LIMB64(`std    r0, 0(rp)       ')
+       addi    rp, rp, GMP_LIMB_BYTES
+       bdnz    L(topS)
+       blr
+
+C Handle large cases with VMX operations
+L(big):
+       mfspr   r12, 256
+       oris    r0, r12, 0xf800         C Set VRSAVE bit 0-4
+       mtspr   256, r0
+
+LIMB32(`rlwinm.        r7, rp, 30,30,31')      C (rp >> 2) mod 4
+LIMB64(`rlwinm.        r7, rp, 29,31,31')      C (rp >> 3) mod 2
+       beq     L(rp_aligned)
+
+       subfic  r7, r7, LIMBS_PER_VR
+       subf    n, r7, n
+L(top0):
+LIMB32(`lwz    r0, 0(up)       ')
+LIMB64(`ld     r0, 0(up)       ')
+       addi    up, up, GMP_LIMB_BYTES
+LIMB32(`addic. r7, r7, -1      ')
+LIMB32(`stw    r0, 0(rp)       ')
+LIMB64(`std    r0, 0(rp)       ')
+       addi    rp, rp, GMP_LIMB_BYTES
+LIMB32(`bne    L(top0)         ')
+
+L(rp_aligned):
+
+LIMB32(`rlwinm.        r0, up, 30,30,31')      C (up >> 2) mod 4
+LIMB64(`rlwinm.        r0, up, 29,31,31')      C (up >> 3) mod 2
+
+LIMB64(`srdi   r7, n, 2        ')      C loop count corresponding to n
+LIMB32(`srwi   r7, n, 3        ')      C loop count corresponding to n
+       mtctr   r7                      C copy n to count register
+
+       li      r10, 16
+
+       beq     L(up_aligned)
+
+       lvsl    us, 0, up
+
+LIMB32(`andi.  r0, n, 0x4      ')
+LIMB64(`andi.  r0, n, 0x2      ')
+       beq     L(1)
+       lvx     v0, 0, up
+       lvx     v2, r10, up
+       vperm   v3, v0, v2, us
+       stvx    v3, 0, rp
+       addi    up, up, 32
+       addi    rp, rp, 16
+       b       L(lpu)
+L(1):  lvx     v2, 0, up
+       addi    up, up, 16
+       b       L(lpu)
+
+       ALIGN(32)
+L(lpu):        lvx     v0, 0, up
+       vperm   v3, v2, v0, us
+       stvx    v3, 0, rp
+       lvx     v2, r10, up
+       addi    up, up, 32
+       vperm   v3, v0, v2, us
+       stvx    v3, r10, rp
+       addi    rp, rp, 32
+       bdnz    L(lpu)
+
+       addi    up, up, -16
+       b       L(tail)
+
+L(up_aligned):
+
+LIMB32(`andi.  r0, n, 0x4      ')
+LIMB64(`andi.  r0, n, 0x2      ')
+       beq     L(lpa)
+       lvx     v0, 0,   up
+       stvx    v0, 0,   rp
+       addi    up, up, 16
+       addi    rp, rp, 16
+       b       L(lpa)
+
+       ALIGN(32)
+L(lpa):        lvx     v0, 0,   up
+       lvx     v1, r10, up
+       addi    up, up, 32
+       nop
+       stvx    v0, 0,   rp
+       stvx    v1, r10, rp
+       addi    rp, rp, 32
+       bdnz    L(lpa)
+
+L(tail):
+LIMB32(`rlwinm.        r7, n, 0,30,31  ')      C r7 = n mod 4
+LIMB64(`rlwinm.        r7, n, 0,31,31  ')      C r7 = n mod 2
+       beq     L(ret)
+LIMB32(`li     r10, 0          ')
+L(top2):
+LIMB32(`lwzx   r0, r10, up     ')
+LIMB64(`ld     r0, 0(up)       ')
+LIMB32(`addic. r7, r7, -1      ')
+LIMB32(`stwx   r0, r10, rp     ')
+LIMB64(`std    r0, 0(rp)       ')
+LIMB32(`addi   r10, r10, GMP_LIMB_BYTES')
+LIMB32(`bne    L(top2)         ')
+
+L(ret):        mtspr   256, r12
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/vmx/logops_n.asm b/mpn/powerpc32/vmx/logops_n.asm

new file mode 100644 (file)

index 0000000..7ed731e
--- /dev/null
+++ b/mpn/powerpc32/vmx/logops_n.asm
@@ -0,0 +1,299 @@
+dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,
+dnl  mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise
+dnl  logical operations.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C               and,ior,andn,nior,xor    iorn,xnor         nand
+C                   cycles/limb         cycles/limb    cycles/limb
+C 7400,7410 (G4):       1.39                 ?              ?
+C 744x,745x (G4+):      1.14                1.39           1.39
+C 970:                  1.7                 2.0            2.0
+
+C STATUS
+C  * Works for all sizes and alignment for 32-bit limbs.
+C  * Works for n >= 4 for 64-bit limbs; untested for smaller operands.
+C  * Current performance makes this pointless for 970
+
+C TODO
+C  * Might want to make variants when just one of the source operands needs
+C    vperm, and when neither needs it.  The latter runs 50% faster on 7400.
+C  * Idea: If the source operands are equally aligned, we could do the logops
+C    first, then vperm before storing!  That means we never need more than one
+C    vperm, ever!
+C  * Perhaps align `rp' after initial alignment loop?
+C  * Instead of having scalar code in the beginning and end, consider using
+C    read-modify-write vector code.
+C  * Software pipeline?  Hopefully not too important, this is hairy enough
+C    already.
+C  * At least be more clever about operand loading, i.e., load v operands before
+C    u operands, since v operands are sometimes negated.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+define(`vnegb', `')            C default neg-before to null
+define(`vnega', `')            C default neg-before to null
+
+ifdef(`OPERATION_and_n',
+`      define(`func',  `mpn_and_n')
+       define(`logopS',`and    $1,$2,$3')
+       define(`logop', `vand   $1,$2,$3')')
+ifdef(`OPERATION_andn_n',
+`      define(`func',  `mpn_andn_n')
+       define(`logopS',`andc   $1,$2,$3')
+       define(`logop', `vandc  $1,$2,$3')')
+ifdef(`OPERATION_nand_n',
+`      define(`func',  `mpn_nand_n')
+       define(`logopS',`nand   $1,$2,$3')
+       define(`logop', `vand   $1,$2,$3')
+       define(`vnega', `vnor   $1,$2,$2')')
+ifdef(`OPERATION_ior_n',
+`      define(`func',  `mpn_ior_n')
+       define(`logopS',`or     $1,$2,$3')
+       define(`logop', `vor    $1,$2,$3')')
+ifdef(`OPERATION_iorn_n',
+`      define(`func',  `mpn_iorn_n')
+       define(`logopS',`orc    $1,$2,$3')
+       define(`vnegb', `vnor   $1,$2,$2')
+       define(`logop', `vor    $1,$2,$3')')
+ifdef(`OPERATION_nior_n',
+`      define(`func',  `mpn_nior_n')
+       define(`logopS',`nor    $1,$2,$3')
+       define(`logop', `vnor   $1,$2,$3')')
+ifdef(`OPERATION_xor_n',
+`      define(`func',  `mpn_xor_n')
+       define(`logopS',`xor    $1,$2,$3')
+       define(`logop', `vxor   $1,$2,$3')')
+ifdef(`OPERATION_xnor_n',
+`      define(`func',`mpn_xnor_n')
+       define(`logopS',`eqv    $1,$2,$3')
+       define(`vnegb', `vnor   $1,$2,$2')
+       define(`logop', `vxor   $1,$2,$3')')
+
+ifelse(GMP_LIMB_BITS,`32',`
+       define(`LIMB32',`       $1')
+       define(`LIMB64',`')
+',`
+       define(`LIMB32',`')
+       define(`LIMB64',`       $1')
+')
+
+C INPUT PARAMETERS
+define(`rp',   `r3')
+define(`up',   `r4')
+define(`vp',   `r5')
+define(`n',    `r6')
+
+define(`us',   `v8')
+define(`vs',   `v9')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+
+LIMB32(`cmpwi  cr0, n, 8       ')
+LIMB64(`cmpdi  cr0, n, 4       ')
+       bge     L(big)
+
+       mtctr   n
+
+LIMB32(`lwz    r8, 0(up)       ')
+LIMB32(`lwz    r9, 0(vp)       ')
+LIMB32(`logopS(        r0, r8, r9)     ')
+LIMB32(`stw    r0, 0(rp)       ')
+LIMB32(`bdz    L(endS)         ')
+
+L(topS):
+LIMB32(`lwzu   r8, 4(up)       ')
+LIMB64(`ld     r8, 0(up)       ')
+LIMB64(`addi   up, up, GMP_LIMB_BYTES  ')
+LIMB32(`lwzu   r9, 4(vp)       ')
+LIMB64(`ld     r9, 0(vp)       ')
+LIMB64(`addi   vp, vp, GMP_LIMB_BYTES  ')
+       logopS( r0, r8, r9)
+LIMB32(`stwu   r0, 4(rp)       ')
+LIMB64(`std    r0, 0(rp)       ')
+LIMB64(`addi   rp, rp, GMP_LIMB_BYTES  ')
+       bdnz    L(topS)
+L(endS):
+       blr
+
+L(big):        mfspr   r12, 256
+       oris    r0, r12, 0xfffc         C Set VRSAVE bit 0-13 FIXME
+       mtspr   256, r0
+
+C First loop until the destination is 16-byte aligned.  This will execute 0 or 1
+C times for 64-bit machines, and 0 to 3 times for 32-bit machines.
+
+LIMB32(`rlwinm.        r0, rp, 30,30,31')      C (rp >> 2) mod 4
+LIMB64(`rlwinm.        r0, rp, 29,31,31')      C (rp >> 3) mod 2
+       beq     L(aligned)
+
+       subfic  r7, r0, LIMBS_PER_VR
+LIMB32(`li     r10, 0          ')
+       subf    n, r7, n
+L(top0):
+LIMB32(`lwz    r8, 0(up)       ')
+LIMB64(`ld     r8, 0(up)       ')
+       addi    up, up, GMP_LIMB_BYTES
+LIMB32(`lwz    r9, 0(vp)       ')
+LIMB64(`ld     r9, 0(vp)       ')
+       addi    vp, vp, GMP_LIMB_BYTES
+LIMB32(`addic. r7, r7, -1      ')
+       logopS( r0, r8, r9)
+LIMB32(`stwx   r0, r10, rp     ')
+LIMB64(`std    r0, 0(rp)       ')
+LIMB32(`addi   r10, r10, GMP_LIMB_BYTES')
+LIMB32(`bne    L(top0)         ')
+
+       addi    rp, rp, 16              C update rp, but preserve its alignment
+
+L(aligned):
+LIMB64(`srdi   r7, n, 1        ')      C loop count corresponding to n
+LIMB32(`srwi   r7, n, 2        ')      C loop count corresponding to n
+       mtctr   r7                      C copy n to count register
+
+       li      r10, 16
+       lvsl    us, 0, up
+       lvsl    vs, 0, vp
+
+       lvx     v2, 0, up
+       lvx     v3, 0, vp
+       bdnz    L(gt1)
+       lvx     v0, r10, up
+       lvx     v1, r10, vp
+       vperm   v4, v2, v0, us
+       vperm   v5, v3, v1, vs
+       vnegb(  v5, v5)
+       logop(  v6, v4, v5)
+       vnega(  v6, v6)
+       stvx    v6, 0, rp
+       addi    up, up, 16
+       addi    vp, vp, 16
+       addi    rp, rp, 4
+       b       L(tail)
+
+L(gt1):        addi    up, up, 16
+       addi    vp, vp, 16
+
+L(top):        lvx     v0, 0, up
+       lvx     v1, 0, vp
+       vperm   v4, v2, v0, us
+       vperm   v5, v3, v1, vs
+       vnegb(  v5, v5)
+       logop(  v6, v4, v5)
+       vnega(  v6, v6)
+       stvx    v6, 0, rp
+       bdz     L(end)
+       lvx     v2, r10, up
+       lvx     v3, r10, vp
+       vperm   v4, v0, v2, us
+       vperm   v5, v1, v3, vs
+       vnegb(  v5, v5)
+       logop(  v6, v4, v5)
+       vnega(  v6, v6)
+       stvx    v6, r10, rp
+       addi    up, up, 32
+       addi    vp, vp, 32
+       addi    rp, rp, 32
+       bdnz    L(top)
+
+       andi.   r0, up, 15
+       vxor    v0, v0, v0
+       beq     1f
+       lvx     v0, 0, up
+1:     andi.   r0, vp, 15
+       vxor    v1, v1, v1
+       beq     1f
+       lvx     v1, 0, vp
+1:     vperm   v4, v2, v0, us
+       vperm   v5, v3, v1, vs
+       vnegb(  v5, v5)
+       logop(  v6, v4, v5)
+       vnega(  v6, v6)
+       stvx    v6, 0, rp
+       addi    rp, rp, 4
+       b       L(tail)
+
+L(end):        andi.   r0, up, 15
+       vxor    v2, v2, v2
+       beq     1f
+       lvx     v2, r10, up
+1:     andi.   r0, vp, 15
+       vxor    v3, v3, v3
+       beq     1f
+       lvx     v3, r10, vp
+1:     vperm   v4, v0, v2, us
+       vperm   v5, v1, v3, vs
+       vnegb(  v5, v5)
+       logop(  v6, v4, v5)
+       vnega(  v6, v6)
+       stvx    v6, r10, rp
+
+       addi    up, up, 16
+       addi    vp, vp, 16
+       addi    rp, rp, 20
+
+L(tail):
+LIMB32(`rlwinm.        r7, n, 0,30,31  ')      C r7 = n mod 4
+LIMB64(`rlwinm.        r7, n, 0,31,31  ')      C r7 = n mod 2
+       beq     L(ret)
+       addi    rp, rp, 15
+LIMB32(`rlwinm rp, rp, 0,0,27  ')
+LIMB64(`rldicr rp, rp, 0,59    ')
+       li      r10, 0
+L(top2):
+LIMB32(`lwzx   r8, r10, up     ')
+LIMB64(`ldx    r8, r10, up     ')
+LIMB32(`lwzx   r9, r10, vp     ')
+LIMB64(`ldx    r9, r10, vp     ')
+LIMB32(`addic. r7, r7, -1      ')
+       logopS( r0, r8, r9)
+LIMB32(`stwx   r0, r10, rp     ')
+LIMB64(`std    r0, 0(rp)       ')
+LIMB32(`addi   r10, r10, GMP_LIMB_BYTES')
+LIMB32(`bne    L(top2)         ')
+
+L(ret):        mtspr   256, r12
+       blr
+EPILOGUE()
+
+C This works for 64-bit PowerPC, since a limb ptr can only be aligned
+C in 2 relevant ways, which means we can always find a pair of aligned
+C pointers of rp, up, and vp.
+C process words until rp is 16-byte aligned
+C if (((up | vp) & 15) == 0)
+C   process with VMX without any vperm
+C else if ((up & 15) != 0 && (vp & 15) != 0)
+C   process with VMX using vperm on store data
+C else if ((up & 15) != 0)
+C   process with VMX using vperm on up data
+C else
+C   process with VMX using vperm on vp data
+C
+C      rlwinm, r0, up, 0,28,31
+C      rlwinm  r0, vp, 0,28,31
+C      cmpwi   cr7, r0, 0
+C      cror    cr6, cr0, cr7
+C      crand   cr0, cr0, cr7
diff --git a/mpn/powerpc32/vmx/mod_34lsub1.asm b/mpn/powerpc32/vmx/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..8aee6f8
--- /dev/null
+++ b/mpn/powerpc32/vmx/mod_34lsub1.asm
@@ -0,0 +1,375 @@
+dnl  PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
+
+dnl  Copyright 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C                cycles/limb
+C 603e:              -
+C 604e:              -
+C 75x (G3):          -
+C 7400,7410 (G4):    1          simple load-use scheduling results in 0.75
+C 744x,745x (G4+):   0.75
+C ppc970:            0.75
+C power4:            -
+C power5:            -
+
+C TODO
+C  * Either start using the low-end masking constants, or remove them.
+C  * Merge multiple feed-in cases into a parameterized code block.
+C  * Reduce register usage.  It should be possible to almost halve it.
+
+define(`up', `r3')
+define(`n', `r4')
+
+define(`a0', `v3')
+define(`a1', `v4')
+define(`a2', `v5')
+define(`c0', `v6')
+define(`c1', `v7')
+define(`c2', `v8')
+define(`z',  `v9')
+define(`x0', `v10')
+define(`x1', `v11')
+define(`x2', `v12')
+define(`x3', `v13')
+define(`pv', `v14')
+define(`y0', `v0')
+define(`y1', `v1')
+define(`y2', `v2')
+define(`y3', `v15')
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+       cmpwi   cr0, n, 20              C tuned cutoff point
+       bge     L(large)
+
+       li      r9, 0                   C result accumulator
+       mulli   r10, n, 0xb             C 0xb = ceil(32/3)
+       srwi.   r10, r10, 5             C r10 = floor(n/3), n < 32
+       beq     L(small_tail)
+       mtctr   r10
+       lwz     r6, 0(up)
+       lwz     r7, 4(up)
+       lwzu    r8, 8(up)
+       subf    n, r10, n
+       subf    n, r10, n
+       subf    n, r10, n
+       bdz     L(small_end)
+
+       ALIGN(16)
+L(los):        rlwinm  r0, r6, 0,8,31
+       add     r9, r9, r0              C add 24b from u0
+       srwi    r0, r6, 24
+       lwz     r6, 4(up)
+       rlwimi  r0, r7, 8, 0x00ffff00   C --111100
+       add     r9, r9, r0              C add 8b from u0 and 16b from u1
+       srwi    r0, r7, 16
+       lwz     r7, 8(up)
+       rlwimi  r0, r8, 16, 0x00ff0000  C --221111
+       add     r9, r9, r0              C add 16b from u1 and 8b from u2
+       srwi    r0, r8, 8               C --222222
+       lwzu    r8, 12(up)
+       add     r9, r9, r0              C add 24b from u2
+       bdnz    L(los)
+L(small_end):
+       rlwinm  r0, r6, 0,8,31
+       add     r9, r9, r0              C add 24b from u0
+       srwi    r0, r6, 24
+       rlwimi  r0, r7, 8, 0x00ffff00   C --111100
+       add     r9, r9, r0              C add 8b from u0 and 16b from u1
+       srwi    r0, r7, 16
+       rlwimi  r0, r8, 16, 0x00ff0000  C --221111
+       add     r9, r9, r0              C add 16b from u1 and 8b from u2
+       srwi    r0, r8, 8               C --222222
+       add     r9, r9, r0              C add 24b from u2
+
+       addi    up, up, 4
+       rlwinm  r0, r9, 0,8,31
+       srwi    r9, r9, 24
+       add     r9, r9, r0
+
+L(small_tail):
+       cmpi    cr0, n, 1
+       blt     L(ret)
+
+       lwz     r6, 0(up)
+       rlwinm  r0, r6, 0,8,31
+       srwi    r6, r6, 24
+       add     r9, r9, r0
+       add     r9, r9, r6
+
+       beq     L(ret)
+
+       lwz     r6, 4(up)
+       rlwinm  r0, r6, 8,8,23
+       srwi    r6, r6, 16
+       add     r9, r9, r0
+       add     r9, r9, r6
+
+L(ret):        mr      r3, r9
+       blr
+
+
+L(large):
+       mfspr   r10, 256
+       oris    r0, r10, 0xffff         C Set VRSAVE bit 0-15
+       mtspr   256, r0
+
+       andi.   r7, up, 15
+       vxor    a0, v0, v0
+       lis     r0, 0xaaaa
+       vxor    a1, v0, v0
+       ori     r0, r0, 0xaaab
+       vxor    a2, v0, v0
+       li      r5, 16
+       vxor    c0, v0, v0
+       li      r6, 32
+       vxor    c1, v0, v0
+       LEAL(   r11, cnsts)
+       vxor    c2, v0, v0
+       vxor    z, v0, v0
+
+       beq     L(aligned16)
+
+       cmpwi   cr7, r7, 8
+       bge     cr7, L(na4)
+
+       lvx     a2, 0, up
+       addi    up, up, 16
+       vsldoi  a2, a2, z, 4
+       vsldoi  a2, z, a2, 12
+
+       addi    n, n, 9
+       mulhwu  r0, n, r0
+       srwi    r0, r0, 3               C r0 = floor(n/12)
+       mtctr   r0
+
+       mulli   r8, r0, 12
+       subf    n, r8, n
+       b       L(2)
+
+L(na4):        bne     cr7, L(na8)
+
+       lvx     a1, 0, up
+       addi    up, up, -16
+       vsldoi  a1, a1, z, 8
+       vsldoi  a1, z, a1, 8
+
+       addi    n, n, 6
+       mulhwu  r0, n, r0
+       srwi    r0, r0, 3               C r0 = floor(n/12)
+       mtctr   r0
+
+       mulli   r8, r0, 12
+       subf    n, r8, n
+       b       L(1)
+
+L(na8):
+       lvx     a0, 0, up
+       vsldoi  a0, a0, z, 12
+       vsldoi  a0, z, a0, 4
+
+       addi    n, n, 3
+       mulhwu  r0, n, r0
+       srwi    r0, r0, 3               C r0 = floor(n/12)
+       mtctr   r0
+
+       mulli   r8, r0, 12
+       subf    n, r8, n
+       b       L(0)
+
+L(aligned16):
+       mulhwu  r0, n, r0
+       srwi    r0, r0, 3               C r0 = floor(n/12)
+       mtctr   r0
+
+       mulli   r8, r0, 12
+       subf    n, r8, n
+
+       lvx     a0, 0, up
+L(0):  lvx     a1, r5, up
+L(1):  lvx     a2, r6, up
+       addi    up, up, 48
+L(2):  bdz     L(end)
+       li      r12, 256
+       li      r9, 288
+       ALIGN(32)
+L(top):
+       lvx     v0, 0, up
+       vaddcuw v10, a0, v0
+       vadduwm a0, a0, v0
+       vadduwm c0, c0, v10
+
+       lvx     v1, r5, up
+       vaddcuw v10, a1, v1
+       vadduwm a1, a1, v1
+       vadduwm c1, c1, v10
+
+       lvx     v2, r6, up
+       dcbt    up, r12
+       dcbt    up, r9
+       addi    up, up, 48
+       vaddcuw v10, a2, v2
+       vadduwm a2, a2, v2
+       vadduwm c2, c2, v10
+       bdnz    L(top)
+
+L(end):
+C n = 0...11
+       cmpwi   cr0, n, 0
+       beq     L(sum)
+       cmpwi   cr0, n, 4
+       ble     L(tail.1..4)
+       cmpwi   cr0, n, 8
+       ble     L(tail.5..8)
+
+L(tail.9..11):
+       lvx     v0, 0, up
+       vaddcuw v10, a0, v0
+       vadduwm a0, a0, v0
+       vadduwm c0, c0, v10
+
+       lvx     v1, r5, up
+       vaddcuw v10, a1, v1
+       vadduwm a1, a1, v1
+       vadduwm c1, c1, v10
+
+       lvx     v2, r6, up
+
+       addi    r8, r11, 96
+       rlwinm  r3, n ,4,26,27
+       lvx     v11, r3, r8
+       vand    v2, v2, v11
+
+       vaddcuw v10, a2, v2
+       vadduwm a2, a2, v2
+       vadduwm c2, c2, v10
+       b       L(sum)
+
+L(tail.5..8):
+       lvx     v0, 0, up
+       vaddcuw v10, a0, v0
+       vadduwm a0, a0, v0
+       vadduwm c0, c0, v10
+
+       lvx     v1, r5, up
+
+       addi    r8, r11, 96
+       rlwinm  r3, n ,4,26,27
+       lvx     v11, r3, r8
+       vand    v1, v1, v11
+
+       vaddcuw v10, a1, v1
+       vadduwm a1, a1, v1
+       vadduwm c1, c1, v10
+       b       L(sum)
+
+L(tail.1..4):
+       lvx     v0, 0, up
+
+       addi    r8, r11, 96
+       rlwinm  r3, n ,4,26,27
+       lvx     v11, r3, r8
+       vand    v0, v0, v11
+
+       vaddcuw v10, a0, v0
+       vadduwm a0, a0, v0
+       vadduwm c0, c0, v10
+
+L(sum):        lvx     pv, 0, r11
+       vperm   x0, a0, z, pv           C extract 4 24-bit field from a0
+       vperm   y0, c2, z, pv
+       lvx     pv, r5, r11
+       vperm   x1, a1, z, pv           C extract 4 24-bit field from a1
+       vperm   y1, c0, z, pv           C extract 4 24-bit field from a1
+       lvx     pv, r6, r11
+       vperm   x2, a2, z, pv           C extract 4 24-bit field from a1
+       vperm   y2, c1, z, pv           C extract 4 24-bit field from a1
+       li      r10,  48
+       lvx     pv, r10, r11
+       vperm   x3, a0, z, pv           C extract remaining/partial a0 fields
+       vperm   y3, c2, z, pv           C extract remaining/partial a0 fields
+       li      r10,  64
+       lvx     pv, r10, r11
+       vperm   x3, a1, x3, pv          C insert remaining/partial a1 fields
+       vperm   y3, c0, y3, pv          C insert remaining/partial a1 fields
+       li      r10,  80
+       lvx     pv, r10, r11
+       vperm   x3, a2, x3, pv          C insert remaining/partial a2 fields
+       vperm   y3, c1, y3, pv          C insert remaining/partial a2 fields
+
+C We now have 4 128-bit accumulators to sum
+       vadduwm x0, x0, x1
+       vadduwm x2, x2, x3
+       vadduwm x0, x0, x2
+
+       vadduwm y0, y0, y1
+       vadduwm y2, y2, y3
+       vadduwm y0, y0, y2
+
+       vadduwm x0, x0, y0
+
+C Reduce 32-bit fields
+       vsumsws x0, x0, z
+
+       li      r7, -16                 C FIXME: does all ppc32 ABIs...
+       stvx    x0, r7, r1              C FIXME: ...support storing below sp?
+       lwz     r3, -4(r1)
+
+       mtspr   256, r10
+       blr
+EPILOGUE()
+
+C load |      v0       |      v1       |      v2       |
+C acc  |      a0       |      a1       |      a2       |
+C carry        |      c0       |      c1       |      c2       |
+C      | 0   1   2   3 | 4   5   6   7 | 8   9  10  11 |  128
+C      |---|---|---|---|---|---|---|---|---|---|---|---|   32
+C      |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |   24
+C      |     |     |     |     |     |     |     |     |   48
+
+C       $---------------$---------------$---------------$---------------$
+C       |   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   |
+C       |_______________________________________________________________|
+C   |           |           |           |           |           |           |
+C       <-hi16-> <--- 24 --> <--- 24 --> <--- 24 --> <--- 24 --> <-lo16->
+
+
+DEF_OBJECT(cnsts,16)
+C Permutation vectors in the order they are used above
+C #      00   01   02   03    04   05   06   07    08   09   0a   0b    0c   0d   0e   0f
+ .byte 0x10,0x01,0x02,0x03, 0x10,0x06,0x07,0x00, 0x10,0x0b,0x04,0x05, 0x10,0x08,0x09,0x0a C a0
+ .byte 0x10,0x07,0x00,0x01, 0x10,0x04,0x05,0x06, 0x10,0x09,0x0a,0x0b, 0x10,0x0e,0x0f,0x08 C a1
+ .byte 0x10,0x00,0x01,0x02, 0x10,0x05,0x06,0x07, 0x10,0x0a,0x0b,0x04, 0x10,0x0f,0x08,0x09 C a2
+ .byte 0x10,0x0d,0x0e,0x0f, 0x10,0x10,0x10,0x0c, 0x10,0x10,0x10,0x10, 0x10,0x10,0x10,0x10 C part a0
+ .byte 0x10,0x11,0x12,0x13, 0x10,0x02,0x03,0x17, 0x10,0x10,0x0c,0x0d, 0x10,0x10,0x10,0x10 C part a1
+ .byte 0x10,0x11,0x12,0x13, 0x10,0x15,0x16,0x17, 0x10,0x03,0x1a,0x1b, 0x10,0x0c,0x0d,0x0e C part a2
+C Masks for high end of number
+ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+ .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+C Masks for low end of number
+C .byte        0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+C .byte        0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+C .byte        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+C .byte        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff
+END_OBJECT(cnsts)
diff --git a/mpn/powerpc32/vmx/popcount.asm b/mpn/powerpc32/vmx/popcount.asm

new file mode 100644 (file)

index 0000000..62fcaae
--- /dev/null
+++ b/mpn/powerpc32/vmx/popcount.asm
@@ -0,0 +1,28 @@
+dnl  PowerPC-32/VMX mpn_popcount.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`powerpc64/vmx/popcount.asm')
+
+C                   cycles/limb
+C 7400,7410 (G4):       2.75
+C 744x,745x (G4+):      2.25
+C 970 (G5):             5.3
diff --git a/mpn/powerpc64/README b/mpn/powerpc64/README

new file mode 100644 (file)

index 0000000..757357b
--- /dev/null
+++ b/mpn/powerpc64/README
@@ -0,0 +1,155 @@
+Copyright 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+                    POWERPC-64 MPN SUBROUTINES
+
+
+This directory contains mpn functions for 64-bit PowerPC chips.
+
+
+CODE ORGANIZATION
+
+       mpn/powerpc64          mode-neutral code
+       mpn/powerpc64/mode32   code for mode32
+       mpn/powerpc64/mode64   code for mode64
+
+
+The mode32 and mode64 sub-directories contain code which is for use in the
+respective chip mode, 32 or 64.  The top-level directory is code that's
+unaffected by the mode.
+
+The "adde" instruction is the main difference between mode32 and mode64.  It
+operates on either on a 32-bit or 64-bit quantity according to the chip mode.
+Other instructions have an operand size in their opcode and hence don't vary.
+
+
+
+POWER3/PPC630 pipeline information:
+
+Decoding is 4-way + branch and issue is 8-way with some out-of-order
+capability.
+
+Functional units:
+LS1  - ld/st unit 1
+LS2  - ld/st unit 2
+FXU1 - integer unit 1, handles any simple integer instruction
+FXU2 - integer unit 2, handles any simple integer instruction
+FXU3 - integer unit 3, handles integer multiply and divide
+FPU1 - floating-point unit 1
+FPU2 - floating-point unit 2
+
+Memory:                  Any two memory operations can issue, but memory subsystem
+                 can sustain just one store per cycle.  No need for data
+                 prefetch; the hardware has very sophisticated prefetch logic.
+Simple integer:          2 operations (such as add, rl*)
+Integer multiply: 1 operation every 9th cycle worst case; exact timing depends
+                 on 2nd operand's most significant bit position (10 bits per
+                 cycle).  Multiply unit is not pipelined, only one multiply
+                 operation in progress is allowed.
+Integer divide:          ?
+Floating-point:          Any plain 2 arithmetic instructions (such as fmul, fadd, and
+                 fmadd), latency 4 cycles.
+Floating-point divide:
+                 ?
+Floating-point square root:
+                 ?
+
+POWER3/PPC630 best possible times for the main loops:
+shift:       1.5 cycles limited by integer unit contention.
+             With 63 special loops, one for each shift count, we could
+             reduce the needed integer instructions to 2, which would
+             reduce the best possible time to 1 cycle.
+add/sub:      1.5 cycles, limited by ld/st unit contention.
+mul:         18 cycles (average) unless floating-point operations are used,
+             but that would only help for multiplies of perhaps 10 and more
+             limbs.
+addmul/submul:Same situation as for mul.
+
+
+POWER4/PPC970 and POWER5 pipeline information:
+
+This is a very odd pipeline, it is basically a VLIW masquerading as a plain
+architecture.  Its issue rules are not made public, and since it is so weird,
+it is very hard to figure out any useful information from experimentation.
+An example:
+
+  A well-aligned loop with nop's take 3, 4, 6, 7, ... cycles.
+    3 cycles for  0,  1,  2,  3,  4,  5,  6,  7 nop's
+    4 cycles for  8,  9, 10, 11, 12, 13, 14, 15 nop's
+    6 cycles for 16, 17, 18, 19, 20, 21, 22, 23 nop's
+    7 cycles for 24, 25, 26, 27 nop's
+    8 cycles for 28, 29, 30, 31 nop's
+    ... continues regularly
+
+
+Functional units:
+LS1  - ld/st unit 1
+LS2  - ld/st unit 2
+FXU1 - integer unit 1, handles any integer instruction
+FXU2 - integer unit 2, handles any integer instruction
+FPU1 - floating-point unit 1
+FPU2 - floating-point unit 2
+
+While this is one integer unit less than POWER3/PPC630, the remaining units
+are more powerful; here they handle multiply and divide.
+
+Memory:                  2 ld/st.  Stores go to the L2 cache, which can sustain just
+                 one store per cycle.
+                 L1 load latency: to gregs 3-4 cycles, to fregs 5-6 cycles.
+                 Operations that modify the address register might be split
+                 to use also a an integer issue slot.
+Simple integer:          2 operations every cycle, latency 2.
+Integer multiply: 2 operations every 6th cycle, latency 7 cycles.
+Integer divide:          ?
+Floating-point:          Any plain 2 arithmetic instructions (such as fmul, fadd, and
+                 fmadd), latency 6 cycles.
+Floating-point divide:
+                 ?
+Floating-point square root:
+                 ?
+
+
+IDEAS
+
+*mul_1: Handling one limb using mulld/mulhdu and two limbs using floating-
+point operations should give performance of about 20 cycles for 3 limbs, or 7
+cycles/limb.
+
+We should probably split the single-limb operand in 32-bit chunks, and the
+multi-limb operand in 16-bit chunks, allowing us to accumulate well in fp
+registers.
+
+Problem is to get 32-bit or 16-bit words to the fp registers.  Only 64-bit fp
+memops copies bits without fiddling with them.  We might therefore need to
+load to integer registers with zero extension, store as 64 bits into temp
+space, and then load to fp regs.  Alternatively, load directly to fp space
+and add well-chosen constants to get cancelation.  (Other part after given by
+subsequent subtraction.)
+
+Possible code mix for load-via-intregs variant:
+
+lwz,std,lfd
+fmadd,fmadd,fmul,fmul
+fctidz,stfd,ld,fctidz,stfd,ld
+add,adde
+lwz,std,lfd
+fmadd,fmadd,fmul,fmul
+fctidz,stfd,ld,fctidz,stfd,ld
+add,adde
+srd,sld,add,adde,add,adde
diff --git a/mpn/powerpc64/aix.m4 b/mpn/powerpc64/aix.m4

new file mode 100644 (file)

index 0000000..589686a
--- /dev/null
+++ b/mpn/powerpc64/aix.m4
@@ -0,0 +1,84 @@
+divert(-1)
+dnl  m4 macros for AIX 64-bit assembly.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',
+       `.machine       "ppc64"
+       .toc')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  Don't want ELF style .size in the epilogue.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .globl  $1
+       .globl  .$1
+       .csect  [DS], 3
+$1:
+       .llong  .$1, TOC[tc0], 0
+       .csect  [PR]
+       .align  4
+.$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`')
+
+define(`TOC_ENTRY', `')
+
+define(`LEA',
+m4_assert_numargs(2)
+`define(`TOC_ENTRY',
+`      .toc
+..$2:  .tc     $2[TC], $2')'
+       `ld     $1, ..$2(2)')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`LEA($1,$2)')
+
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`      .globl  $1')
+
+define(`EXTERN_FUNC',
+m4_assert_numargs(1)
+`      .globl  .$1')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`      .csect  [RO], 3
+       ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1))
+
+define(`CALL',
+       `bl     .$1
+       nop')
+
+define(`ASM_END', `TOC_ENTRY')
+
+divert
diff --git a/mpn/powerpc64/com.asm b/mpn/powerpc64/com.asm

new file mode 100644 (file)

index 0000000..4fb2e65
--- /dev/null
+++ b/mpn/powerpc64/com.asm
@@ -0,0 +1,74 @@
+dnl  PowerPC-64 mpn_com.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1?
+C POWER4/PPC970:     1.6
+
+C TODO
+C  * 8-way unrolling brings timing down to about 1.3 cycles/limb.
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+
+ASM_START()
+PROLOGUE(mpn_com)
+       rldic.  r0, r5, 3, 59   C r0 = (r5 & 3) << 3; cr0 = (n == 4t)?
+       cmpldi  cr6, r0, 16     C cr6 = (n cmp 4t + 2)?
+
+       addi    r5, r5, 3       C compute...
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r5, r5, 62,34', C ...branch count
+`      rldicl  r5, r5, 62, 2') C ...branch count
+       mtctr   r5
+
+       add     r4, r4, r0      C offset up
+       add     r3, r3, r0      C offset rp
+
+       beq     cr0, L(L00)
+       blt     cr6, L(L01)
+       beq     cr6, L(L10)
+       b       L(L11)
+
+L(L00):        addi    r4, r4, 32
+       addi    r3, r3, 32
+
+       ALIGN(16)
+L(oop):        ld      r6, -32(r4)
+       nor     r6, r6, r6
+       std     r6, -32(r3)
+L(L11):        ld      r6, -24(r4)
+       nor     r6, r6, r6
+       std     r6, -24(r3)
+L(L10):        ld      r6, -16(r4)
+       nor     r6, r6, r6
+       std     r6, -16(r3)
+L(L01):        ld      r6, -8(r4)
+       nor     r6, r6, r6
+       addi    r4, r4, 32
+       std     r6, -8(r3)
+       addi    r3, r3, 32
+       bdnz    L(oop)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/copyd.asm b/mpn/powerpc64/copyd.asm

new file mode 100644 (file)

index 0000000..6a46a43
--- /dev/null
+++ b/mpn/powerpc64/copyd.asm
@@ -0,0 +1,70 @@
+dnl  PowerPC-64 mpn_copyd
+
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1
+C POWER4/PPC970:     1
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+       rldic.  r0, r5, 3, 59   C r0 = (r5 & 3) << 3; cr0 = (n == 4t)?
+       cmpldi  cr6, r0, 16     C cr6 = (n cmp 4t + 2)?
+
+ifdef(`HAVE_ABI_mode32',
+`      rldic   r6, r5, 3, 32', C byte count corresponding to n
+`      rldicr  r6, r5, 3, 60') C byte count corresponding to n
+
+       addi    r5, r5, 4       C compute...
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r5, r5, 62,34', C ...branch count
+`      rldicl  r5, r5, 62, 2') C ...branch count
+       mtctr   r5
+
+       add     r4, r4, r6
+       add     r3, r3, r6
+       sub     r4, r4, r0      C offset up
+       sub     r3, r3, r0      C offset rp
+
+       beq     cr0, L(L00)
+       blt     cr6, L(L01)
+       beq     cr6, L(L10)
+       b       L(L11)
+
+       ALIGN(16)
+L(oop):        ld      r6, 24(r4)
+       std     r6, 24(r3)
+L(L11):        ld      r6, 16(r4)
+       std     r6, 16(r3)
+L(L10):        ld      r6, 8(r4)
+       std     r6, 8(r3)
+L(L01):        ld      r6, 0(r4)
+       std     r6, 0(r3)
+L(L00):        addi    r4, r4, -32
+       addi    r3, r3, -32
+       bdnz    L(oop)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/copyi.asm b/mpn/powerpc64/copyi.asm

new file mode 100644 (file)

index 0000000..5cb7e48
--- /dev/null
+++ b/mpn/powerpc64/copyi.asm
@@ -0,0 +1,64 @@
+dnl  PowerPC-64 mpn_copyi.
+
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1
+C POWER4/PPC970:     1
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+       rldic.  r0, r5, 3, 59   C r0 = (r5 & 3) << 3; cr0 = (n == 4t)?
+       cmpldi  cr6, r0, 16     C cr6 = (n cmp 4t + 2)?
+
+       addi    r5, r5, 4       C compute...
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r5, r5, 62,34', C ...branch count
+`      rldicl  r5, r5, 62, 2') C ...branch count
+       mtctr   r5
+
+       add     r4, r4, r0      C offset up
+       add     r3, r3, r0      C offset rp
+
+       beq     cr0, L(L00)
+       blt     cr6, L(L01)
+       beq     cr6, L(L10)
+       b       L(L11)
+
+       ALIGN(16)
+L(oop):        ld      r6, -32(r4)
+       std     r6, -32(r3)
+L(L11):        ld      r6, -24(r4)
+       std     r6, -24(r3)
+L(L10):        ld      r6, -16(r4)
+       std     r6, -16(r3)
+L(L01):        ld      r6, -8(r4)
+       std     r6, -8(r3)
+L(L00):        addi    r4, r4, 32
+       addi    r3, r3, 32
+       bdnz    L(oop)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/darwin.m4 b/mpn/powerpc64/darwin.m4

new file mode 100644 (file)

index 0000000..10055be
--- /dev/null
+++ b/mpn/powerpc64/darwin.m4
@@ -0,0 +1,104 @@
+divert(-1)
+dnl  m4 macros for Mac OS 64-bit assembly.
+
+dnl  Copyright 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`DARWIN')
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      .text
+       .globl  $1
+       .align  4
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1))
+
+dnl  LEAL -- Load Effective Address Local.  This is to be used for symbols
+dnl  defined in the same file.  It will not work for externally defined
+dnl  symbols.
+
+define(`LEAL',
+m4_assert_numargs(2)
+`ifdef(`PIC',
+`
+       mflr    r0                      C save return address
+       bcl     20, 31, 1f
+1:     mflr    $1
+       addis   $1, $1, ha16($2-1b)
+       la      $1, lo16($2-1b)($1)
+       mtlr    r0                      C restore return address
+',`
+       lis     $1, ha16($2)
+       la      $1, lo16($2)($1)
+')')
+
+dnl  LEA -- Load Effective Address.  This is to be used for symbols defined in
+dnl  another file.  It will not work for locally defined symbols.
+
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',
+`define(`EPILOGUE_cpu',
+`      .non_lazy_symbol_pointer
+`L'$2`'$non_lazy_ptr:
+       .indirect_symbol $2
+       .quad   0
+')
+       mflr    r0                      C save return address
+       bcl     20, 31, 1f
+1:     mflr    $1
+       addis   $1, $1, ha16(`L'$2`'$non_lazy_ptr-1b)
+       ld      $1, lo16(`L'$2`'$non_lazy_ptr-1b)($1)
+       mtlr    r0                      C restore return address
+',`
+       lis     $1, ha16($2)
+       la      $1, lo16($2)($1)
+')')
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`EXTERN_FUNC',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`      .const
+       ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1))
+
+define(`CALL',
+       `bl     GSYM_PREFIX`'$1')
+
+define(`ASM_END', `dnl')
+
+divert
diff --git a/mpn/powerpc64/elf.m4 b/mpn/powerpc64/elf.m4

new file mode 100644 (file)

index 0000000..e6da11f
--- /dev/null
+++ b/mpn/powerpc64/elf.m4
@@ -0,0 +1,87 @@
+divert(-1)
+dnl  m4 macros for powerpc64 GNU/Linux assembly.
+
+dnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+       `
+       .globl  $1
+       .globl  .$1
+       .section        ".opd","aw"
+       .align  3
+$1:
+       .llong  .$1, .TOC.@tocbase, 0
+       .size   $1, 24
+       .type   .$1, @function
+       .section        ".text"
+       .align  4
+.$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`      .size   .$1, .-.$1')
+
+define(`TOC_ENTRY', `')
+
+define(`LEA',
+m4_assert_numargs(2)
+`define(`TOC_ENTRY',
+`      .section        ".toc", "aw"
+..$2:  .tc     $2[TC], $2')'
+       `ld     $1, ..$2@toc(2)')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`LEA($1,$2)')
+
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`EXTERN_FUNC',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`
+       .section        .rodata
+       ALIGN(ifelse($#,1,2,$2))
+       .type   $1, @object
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`      .size   $1, .-$1')
+
+define(`CALL',
+       `bl     GSYM_PREFIX`'$1
+       nop')
+
+define(`ASM_END', `TOC_ENTRY')
+
+divert
diff --git a/mpn/powerpc64/logops_n.asm b/mpn/powerpc64/logops_n.asm

new file mode 100644 (file)

index 0000000..917b59f
--- /dev/null
+++ b/mpn/powerpc64/logops_n.asm
@@ -0,0 +1,137 @@
+dnl  PowerPC-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
+dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1.75
+C POWER4/PPC970:     2.10
+
+C   n     POWER3/PPC630   POWER4/PPC970
+C     1               15.00           15.33
+C     2                7.50            7.99
+C     3                5.33            6.00
+C     4                4.50            4.74
+C     5                4.20            4.39
+C     6                3.50            3.99
+C     7                3.14            3.64
+C     8                3.00            3.36
+C     9                3.00            3.36
+C    10                2.70            3.25
+C    11                2.63            3.11
+C    12                2.58            3.00
+C    13                2.61            3.02
+C    14                2.42            2.82
+C    15                2.40            2.79
+C    50                2.08            2.67
+C   100                1.85            2.31
+C   200                1.80            2.18
+C   400                1.77            2.14
+C  1000                1.76            2.10#
+C  2000                1.75#           2.13
+C  4000                2.30            2.57
+C  8000                2.62            2.58
+C 16000                2.52            4.25
+C 32000                2.49           16.25
+C 64000                2.66           18.76
+
+ifdef(`OPERATION_and_n',
+`      define(`func',`mpn_and_n')
+       define(`logop',         `and')')
+ifdef(`OPERATION_andn_n',
+`      define(`func',`mpn_andn_n')
+       define(`logop',         `andc')')
+ifdef(`OPERATION_nand_n',
+`      define(`func',`mpn_nand_n')
+       define(`logop',         `nand')')
+ifdef(`OPERATION_ior_n',
+`      define(`func',`mpn_ior_n')
+       define(`logop',         `or')')
+ifdef(`OPERATION_iorn_n',
+`      define(`func',`mpn_iorn_n')
+       define(`logop',         `orc')')
+ifdef(`OPERATION_nior_n',
+`      define(`func',`mpn_nior_n')
+       define(`logop',         `nor')')
+ifdef(`OPERATION_xor_n',
+`      define(`func',`mpn_xor_n')
+       define(`logop',         `xor')')
+ifdef(`OPERATION_xnor_n',
+`      define(`func',`mpn_xnor_n')
+       define(`logop',         `eqv')')
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+       ld      r8, 0(r4)       C read lowest u limb
+       ld      r9, 0(r5)       C read lowest v limb
+       addi    r6, r6, 3       C compute branch count (1)
+       rldic.  r0, r6, 3, 59   C r0 = (n-1 & 3) << 3; cr0 = (n == 4(t+1))?
+       cmpldi  cr6, r0, 16     C cr6 = (n cmp 4t + 3)
+
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r6, r6, 62,34', C ...branch count
+`      rldicl  r6, r6, 62, 2') C ...branch count
+       mtctr   r6
+
+       ld      r6, 0(r4)       C read lowest u limb (again)
+       ld      r7, 0(r5)       C read lowest v limb (again)
+
+       add     r5, r5, r0      C offset vp
+       add     r4, r4, r0      C offset up
+       add     r3, r3, r0      C offset rp
+
+       beq     cr0, L(L01)
+       blt     cr6, L(L10)
+       beq     cr6, L(L11)
+       b       L(L00)
+
+L(oop):        ld      r8, -24(r4)
+       ld      r9, -24(r5)
+       logop   r10, r6, r7
+       std     r10, -32(r3)
+L(L00):        ld      r6, -16(r4)
+       ld      r7, -16(r5)
+       logop   r10, r8, r9
+       std     r10, -24(r3)
+L(L11):        ld      r8, -8(r4)
+       ld      r9, -8(r5)
+       logop   r10, r6, r7
+       std     r10, -16(r3)
+L(L10):        ld      r6, 0(r4)
+       ld      r7, 0(r5)
+       logop   r10, r8, r9
+       std     r10, -8(r3)
+L(L01):        addi    r5, r5, 32
+       addi    r4, r4, 32
+       addi    r3, r3, 32
+       bdnz    L(oop)
+
+       logop   r10, r6, r7
+       std     r10, -32(r3)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/lshift.asm b/mpn/powerpc64/lshift.asm

new file mode 100644 (file)

index 0000000..41e5ddd
--- /dev/null
+++ b/mpn/powerpc64/lshift.asm
@@ -0,0 +1,116 @@
+dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1.5
+C POWER4/PPC970:     3.0
+
+C INPUT PARAMETERS
+define(`rp',`r3')
+define(`up',`r4')
+define(`n',`r5')
+define(`cnt',`r6')
+
+define(`tnc',`r5')
+define(`v0',`r0')
+define(`v1',`r7')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`h0',`r10')
+define(`h1',`r11')
+
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r7, r5, 0, 32   C zero extend n
+       mtctr   r7',            C copy n to count register
+`      mtctr   n')             C copy n to count register
+
+ifdef(`HAVE_ABI_mode32',
+`      rldic   r0, n, 3, 32',  C byte count corresponding to n
+`      rldicr  r0, n, 3, 60')  C byte count corresponding to n
+
+       add     rp, rp, r0      C rp = rp + n
+       add     up, up, r0      C up = up + n
+       addi    rp, rp, 8       C rp now points 16 beyond end
+       addi    up, up, -8      C up now points to last limb
+       subfic  tnc, cnt, 64    C reverse shift count
+
+       ld      u0, 0(up)
+       sld     h0, u0, cnt
+       srd     r12, u0, tnc    C return value
+       bdz     L(1)            C jump for n = 1
+
+       ld      u1, -8(up)
+       bdz     L(2)            C jump for n = 2
+
+       ldu     u0, -16(up)
+       bdz     L(end)          C jump for n = 3
+
+L(oop):        srd     v1, u1, tnc
+       sld     h1, u1, cnt
+       ld      u1, -8(up)
+       or      h0, v1, h0
+       stdu    h0, -16(rp)
+
+       bdz     L(exit)
+
+       srd     v0, u0, tnc
+       sld     h0, u0, cnt
+       ldu     u0, -16(up)
+       or      h1, v0, h1
+       std     h1, -8(rp)
+
+       bdnz    L(oop)
+
+L(end):        srd     v1, u1, tnc
+       sld     h1, u1, cnt
+       or      h0, v1, h0
+       stdu    h0, -16(rp)
+       srd     v0, u0, tnc
+       sld     h0, u0, cnt
+       or      h1, v0, h1
+       std     h1, -8(rp)
+L(1):  std     h0, -16(rp)
+ifdef(`HAVE_ABI_mode32',
+`      srdi    r3, r12, 32
+       mr      r4, r12
+',`    mr      r3, r12
+')
+       blr
+
+L(exit):       srd     v0, u0, tnc
+       sld     h0, u0, cnt
+       or      h1, v0, h1
+       std     h1, -8(rp)
+L(2):  srd     v1, u1, tnc
+       sld     h1, u1, cnt
+       or      h0, v1, h0
+       stdu    h0, -16(rp)
+       std     h1, -8(rp)
+ifdef(`HAVE_ABI_mode32',
+`      srdi    r3, r12, 32
+       mr      r4, r12
+',`    mr      r3, r12
+')
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode32/add_n.asm b/mpn/powerpc64/mode32/add_n.asm

new file mode 100644 (file)

index 0000000..4c62041
--- /dev/null
+++ b/mpn/powerpc64/mode32/add_n.asm
@@ -0,0 +1,75 @@
+dnl  PowerPC-64/mode32 mpn_add_n -- Add two limb vectors of the same length > 0
+dnl  and store sum in a third limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     4.25
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       mtctr   r6              C copy size into CTR
+       addic   r0, r0, 0       C clear cy
+       ld      r8, 0(r4)       C load least significant s1 limb
+       ld      r0, 0(r5)       C load least significant s2 limb
+       addi    r3, r3, -8      C offset res_ptr, it's updated before it's used
+       bdz     L(end)          C If done, skip loop
+
+L(oop):        ld      r9, 8(r4)       C load s1 limb
+       ld      r10, 8(r5)      C load s2 limb
+       adde    r7, r0, r8      C add limbs with cy, set cy
+       srdi    r6, r0, 32
+       srdi    r11, r8, 32
+       adde    r6, r6, r11     C add high limb parts, set cy
+       std     r7, 8(r3)       C store result limb
+       bdz     L(exit)         C decrement CTR and exit if done
+       ldu     r8, 16(r4)      C load s1 limb and update s1_ptr
+       ldu     r0, 16(r5)      C load s2 limb and update s2_ptr
+       adde    r7, r10, r9     C add limbs with cy, set cy
+       srdi    r6, r10, 32
+       srdi    r11, r9, 32
+       adde    r6, r6, r11     C add high limb parts, set cy
+       stdu    r7, 16(r3)      C store result limb and update res_ptr
+       bdnz    L(oop)          C decrement CTR and loop back
+
+L(end):        adde    r7, r0, r8
+       srdi    r6, r0, 32
+       srdi    r11, r8, 32
+       adde    r6, r6, r11     C add limbs with cy, set cy
+       std     r7, 8(r3)       C store ultimate result limb
+       li      r3, 0           C load cy into ...
+       addze   r4, r3          C ... return value register
+       blr
+L(exit):       adde    r7, r10, r9
+       srdi    r6, r10, 32
+       srdi    r11, r9, 32
+       adde    r6, r6, r11     C add limbs with cy, set cy
+       std     r7, 16(r3)
+       li      r3, 0           C load cy into ...
+       addze   r4, r3          C ... return value register
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode32/addmul_1.asm b/mpn/powerpc64/mode32/addmul_1.asm

new file mode 100644 (file)

index 0000000..41a9078
--- /dev/null
+++ b/mpn/powerpc64/mode32/addmul_1.asm
@@ -0,0 +1,68 @@
+dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     12.5
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C v    r6,r7  or  r7,r8
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+
+ifdef(`BROKEN_LONGLONG_PARAM',
+`      rldimi  r8, r7, 32,0    C assemble vlimb from separate 32-bit arguments
+       mr      r6, r8
+',`
+       rldimi  r7, r6, 32,0    C assemble vlimb from separate 32-bit arguments
+       mr      r6, r7
+')
+       li      r7, 0           C cy_limb = 0
+       mtctr   r5
+       addic   r0, r0, 0
+       addi    r3, r3, -8
+       addi    r4, r4, -8
+
+L(oop):        ldu     r0, 8(r4)
+       mulld   r9, r0, r6
+       adde    r12, r9, r7     C add old high limb and new low limb
+       srdi    r5, r9, 32
+       srdi    r11, r7, 32
+       adde    r5, r5, r11     C add high limb parts, set cy
+       mulhdu  r7, r0, r6
+       addze   r7, r7
+       ld      r10, 8(r3)
+       addc    r9, r12, r10
+       srdi    r5, r12, 32
+       srdi    r11, r10, 32
+       adde    r5, r5, r11     C add high limb parts, set cy
+       stdu    r9, 8(r3)
+       bdnz    L(oop)
+
+       addze   r4, r7
+       srdi    r3, r4, 32
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode32/mul_1.asm b/mpn/powerpc64/mode32/mul_1.asm

new file mode 100644 (file)

index 0000000..091be4d
--- /dev/null
+++ b/mpn/powerpc64/mode32/mul_1.asm
@@ -0,0 +1,62 @@
+dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     10
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C v    r6,r7  or  r7,r8
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+
+ifdef(`BROKEN_LONGLONG_PARAM',
+`      rldimi  r8, r7, 32,0    C assemble vlimb from separate 32-bit arguments
+       mr      r6, r8
+',`
+       rldimi  r7, r6, 32,0    C assemble vlimb from separate 32-bit arguments
+       mr      r6, r7
+')
+       li      r7, 0           C cy_limb = 0
+       mtctr   r5
+       addic   r0, r0, 0
+       addi    r3, r3, -8
+       addi    r4, r4, -8
+
+L(oop):        ldu     r0, 8(r4)
+       mulld   r9, r0, r6
+       adde    r12, r9, r7     C add old high limb and new low limb
+       srdi    r5, r9, 32
+       srdi    r11, r7, 32
+       adde    r5, r5, r11     C add high limb parts, set cy
+       mulhdu  r7, r0, r6
+       stdu    r12, 8(r3)
+       bdnz    L(oop)
+
+       addze   r4, r7
+       srdi    r3, r4, 32
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode32/sub_n.asm b/mpn/powerpc64/mode32/sub_n.asm

new file mode 100644 (file)

index 0000000..5bcc4a4
--- /dev/null
+++ b/mpn/powerpc64/mode32/sub_n.asm
@@ -0,0 +1,77 @@
+dnl  PowerPC-64/mode32 mpn_sub_n -- Subtract two limb vectors of the same
+dnl  length and store difference in a third limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     4.25
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       mtctr   r6              C copy size into CTR
+       addic   r0, r6, -1      C set cy
+       ld      r8, 0(r4)       C load least significant s1 limb
+       ld      r0, 0(r5)       C load least significant s2 limb
+       addi    r3, r3, -8      C offset res_ptr, it's updated before it's used
+       bdz     L(end)          C If done, skip loop
+
+L(oop):        ld      r9, 8(r4)       C load s1 limb
+       ld      r10, 8(r5)      C load s2 limb
+       subfe   r7, r0, r8      C subtract limbs with cy, set cy
+       srdi    r6, r0, 32
+       srdi    r11, r8, 32
+       subfe   r6, r6, r11
+       std     r7, 8(r3)       C store result limb
+       bdz     L(exit)         C decrement CTR and exit if done
+       ldu     r8, 16(r4)      C load s1 limb and update s1_ptr
+       ldu     r0, 16(r5)      C load s2 limb and update s2_ptr
+       subfe   r7, r10, r9     C subtract limbs with cy, set cy
+       srdi    r6, r10, 32
+       srdi    r11, r9, 32
+       subfe   r6, r6, r11
+       stdu    r7, 16(r3)      C store result limb and update res_ptr
+       bdnz    L(oop)          C decrement CTR and loop back
+
+L(end):        subfe   r7, r0, r8
+       srdi    r6, r0, 32
+       srdi    r11, r8, 32
+       subfe   r6, r6, r11
+       std     r7, 8(r3)       C store ultimate result limb
+       subfe   r3, r0, r0      C load !cy into ...
+       subfic  r4, r3, 0       C ... return value register
+       li      r3, 0           C zero extend return value
+       blr
+L(exit):       subfe   r7, r10, r9
+       srdi    r6, r10, 32
+       srdi    r11, r9, 32
+       subfe   r6, r6, r11
+       std     r7, 16(r3)
+       subfe   r3, r0, r0      C load !cy into ...
+       subfic  r4, r3, 0       C ... return value register
+       li      r3, 0           C zero extend return value
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode32/submul_1.asm b/mpn/powerpc64/mode32/submul_1.asm

new file mode 100644 (file)

index 0000000..44ac326
--- /dev/null
+++ b/mpn/powerpc64/mode32/submul_1.asm
@@ -0,0 +1,71 @@
+dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     16
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C v    r6,r7  or  r7,r8
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+
+ifdef(`BROKEN_LONGLONG_PARAM',
+`      rldimi  r8, r7, 32,0    C assemble vlimb from separate 32-bit arguments
+       mr      r6, r8
+',`
+       rldimi  r7, r6, 32,0    C assemble vlimb from separate 32-bit arguments
+       mr      r6, r7
+')
+       li      r7, 0           C cy_limb = 0
+       mtctr   r5
+       addic   r0, r0, 0
+       addi    r3, r3, -8
+       addi    r4, r4, -8
+
+L(oop):        ldu     r0, 8(r4)
+       mulld   r9, r0, r6
+       adde    r12, r9, r7     C add old high limb and new low limb
+       srdi    r5, r9, 32
+       srdi    r11, r7, 32
+       adde    r5, r5, r11     C add high limb parts, set cy
+       mulhdu  r7, r0, r6
+       addze   r7, r7
+       ld      r10, 8(r3)
+       subfc   r9, r12, r10
+       srdi    r5, r12, 32
+       srdi    r11, r10, 32
+       subfe   r5, r5, r11     C subtract high limb parts, set cy
+       stdu    r9, 8(r3)
+       subfe   r11, r11, r11   C invert ...
+       addic   r11, r11, 1     C ... carry
+       bdnz    L(oop)
+
+       addze   r4, r7
+       srdi    r3, r4, 32
+       blr
+EPILOGUE()
+
diff --git a/mpn/powerpc64/mode64/addlsh1_n.asm b/mpn/powerpc64/mode64/addlsh1_n.asm

new file mode 100644 (file)

index 0000000..15182e1
--- /dev/null
+++ b/mpn/powerpc64/mode64/addlsh1_n.asm
@@ -0,0 +1,82 @@
+dnl  PowerPC-64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     2         (1.5 c/l should be possible)
+C POWER4/PPC970:     4         (2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_addlsh1_n)
+       mtctr   r6              C copy n in ctr
+       addic   r31, r31, 0     C clear cy
+
+       ld      v0, 0(vp)       C load v limb
+       ld      u0, 0(up)       C load u limb
+       addi    up, up, -8      C update up
+       addi    rp, rp, -8      C update rp
+       sldi    s1, v0, 1
+       bdz     L(end)          C If done, skip loop
+
+L(oop):        ld      v1, 8(vp)       C load v limb
+       adde    s1, s1, u0      C add limbs with cy, set cy
+       std     s1, 8(rp)       C store result limb
+       srdi    s0, v0, 63      C shift down previous v limb
+       ldu     u0, 16(up)      C load u limb and update up
+       rldimi  s0, v1, 1, 0    C left shift v limb and merge with prev v limb
+
+       bdz     L(exit)         C decrement ctr and exit if done
+
+       ldu     v0, 16(vp)      C load v limb and update vp
+       adde    s0, s0, u0      C add limbs with cy, set cy
+       stdu    s0, 16(rp)      C store result limb and update rp
+       srdi    s1, v1, 63      C shift down previous v limb
+       ld      u0, 8(up)       C load u limb
+       rldimi  s1, v0, 1, 0    C left shift v limb and merge with prev v limb
+
+       bdnz    L(oop)          C decrement ctr and loop back
+
+L(end):        adde    r7, s1, u0
+       std     r7, 8(rp)       C store last result limb
+       srdi    r3, v0, 63
+       addze   r3, r3
+       blr
+L(exit):       adde    r7, s0, u0
+       std     r7, 16(rp)      C store last result limb
+       srdi    r3, v1, 63
+       addze   r3, r3
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/addmul_1.asm b/mpn/powerpc64/mode64/addmul_1.asm

new file mode 100644 (file)

index 0000000..cadab3a
--- /dev/null
+++ b/mpn/powerpc64/mode64/addmul_1.asm
@@ -0,0 +1,185 @@
+dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:     8
+C POWER5:            8
+
+C TODO
+C  * Reduce the number of registers used.  Some mul destination registers could
+C    be coalesced.
+C  * Delay std for preserving registers, and suppress them for n=1.
+C  * Write faster feed-in code.  If nothing else, avoid one or two up updates.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vl', `r6')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+       std     r27, -40(r1)
+       std     r26, -48(r1)
+
+       rldicl. r0, n, 0,62     C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    n, n, 3         C compute count...
+       srdi    n, n, 2         C ...for ctr
+       mtctr   n               C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        ld      r26, 0(up)
+       ld      r28, 0(rp)
+       addi    up, up, 8
+       nop
+       mulld   r0, r26, r6
+       mulhdu  r12, r26, r6
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       addi    rp, rp, 8
+       b       L(fic)
+
+L(b00):        ld      r26, 0(up)
+       ld      r27, 8(up)
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       addi    up, up, 16
+       nop
+       mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       addc    r7, r7, r5
+       addze   r12, r8
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       addi    rp, rp, 16
+       b       L(fic)
+
+L(b01):        bdnz    L(gt1)
+       ld      r26, 0(up)
+       ld      r28, 0(rp)
+       mulld   r0, r26, r6
+       mulhdu  r8, r26, r6
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       b       L(ret)
+L(gt1):        ld      r26, 0(up)
+       ld      r27, 8(up)
+       mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+       ld      r26, 16(up)
+       ld      r28, 0(rp)
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r29, 8(rp)
+       ld      r30, 16(rp)
+       mulld   r9, r26, r6
+       mulhdu  r10, r26, r6
+       addc    r7, r7, r5
+       adde    r9, r9, r8
+       addze   r12, r10
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       adde    r9, r9, r30
+       std     r9, 16(rp)
+       addi    up, up, 24
+       addi    rp, rp, 24
+       b       L(fic)
+
+L(b10):        addic   r0, r0, 0
+       li      r12, 0          C cy_limb = 0
+L(fic):        ld      r26, 0(up)
+       ld      r27, 8(up)
+       addi    up, up, 16
+       bdz     L(end)
+                               C registers dying
+L(top):        mulld   r0, r26, r6     C
+       mulhdu  r5, r26, r6     C 26
+       ld      r26, 0(up)      C
+       ld      r28, 0(rp)      C
+       mulld   r7, r27, r6     C
+       mulhdu  r8, r27, r6     C 27
+       ld      r27, 8(up)      C
+       ld      r29, 8(rp)      C
+       adde    r0, r0, r12     C 0 12
+       adde    r7, r7, r5      C 5 7
+       mulld   r9, r26, r6     C
+       mulhdu  r10, r26, r6    C 26
+       ld      r26, 16(up)     C
+       ld      r30, 16(rp)     C
+       mulld   r11, r27, r6    C
+       mulhdu  r12, r27, r6    C 27
+       ld      r27, 24(up)     C
+       ld      r31, 24(rp)     C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 0(rp)       C 0
+       adde    r7, r7, r29     C 7 29
+       std     r7, 8(rp)       C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, 16(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 24(rp)     C 11
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       bdnz    L(top)          C
+
+L(end):        mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+       ld      r28, 0(rp)
+       nop
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r29, 8(rp)
+       nop
+       adde    r0, r0, r12
+       adde    r7, r7, r5
+       addze   r8, r8
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+L(ret):        addze   r3, r8
+       ld      r31, -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+       ld      r26, -48(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/aors_n.asm b/mpn/powerpc64/mode64/aors_n.asm

new file mode 100644 (file)

index 0000000..42b6d79
--- /dev/null
+++ b/mpn/powerpc64/mode64/aors_n.asm
@@ -0,0 +1,203 @@
+dnl  PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1.5
+C POWER4/PPC970:     2
+
+C   n     POWER3/PPC630   POWER4/PPC970
+C     1               17.00           19.00
+C     2                9.00           10.49
+C     3                5.33            7.66
+C     4                4.50            5.14
+C     5                4.20            4.80
+C     6                3.83            4.33
+C     7                3.00            3.99
+C     8                2.87            3.55
+C     9                2.89            3.40
+C    10                2.60            3.42
+C    11                2.45            3.15
+C    12                2.41            2.99
+C    13                2.46            3.01
+C    14                2.42            2.97
+C    15                2.20            2.85
+C    50                1.78            2.44
+C   100                1.83            2.20
+C   200                1.55            2.12
+C   400                1.53            2.05
+C  1000                1.98            2.02#
+C  2000                1.50#           2.04
+C  4000                2.55            2.50
+C  8000                2.70            2.45
+C 16000                2.65            5.94
+C 32000                2.62           16.41
+C 64000                2.73           18.94
+
+C This code is a little bit slower for POWER3/PPC630 than the simple code used
+C previously, but it is much faster for POWER4/PPC970.  The reason for the
+C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4
+C registers.
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+ifdef(`OPERATION_add_n',`
+  define(ADDSUBC,      adde)
+  define(ADDSUB,       addc)
+  define(func,         mpn_add_n)
+  define(func_nc,      mpn_add_nc)
+  define(GENRVAL,      `addi   r3, r3, 1')
+  define(SETCBR,       `addic  r0, $1, -1')
+  define(CLRCB,                `addic  r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+  define(ADDSUBC,      subfe)
+  define(ADDSUB,       subfc)
+  define(func,         mpn_sub_n)
+  define(func_nc,      mpn_sub_nc)
+  define(GENRVAL,      `neg    r3, r3')
+  define(SETCBR,       `subfic r0, $1, 0')
+  define(CLRCB,                `addic  r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+       SETCBR(r7)
+       b       L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+       CLRCB
+L(ent):        std     r31, -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+
+       rldicl. r0, r6, 0,62    C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    r6, r6, 3       C compute count...
+       srdi    r6, r6, 2       C ...for ctr
+       mtctr   r6              C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        ld      r8, 0(r4)       C load s1 limb
+       ld      r9, 0(r5)       C load s2 limb
+       ld      r10, 8(r4)      C load s1 limb
+       ld      r11, 8(r5)      C load s2 limb
+       ld      r12, 16(r4)     C load s1 limb
+       addi    r4, r4, 24
+       ld      r0, 16(r5)      C load s2 limb
+       addi    r5, r5, 24
+       ADDSUBC r29, r9, r8
+       ADDSUBC r30, r11, r10
+       ADDSUBC r31, r0, r12
+       std     r29, 0(r3)
+       std     r30, 8(r3)
+       std     r31, 16(r3)
+       addi    r3, r3, 24
+       bdnz    L(go)
+       b       L(ret)
+
+L(b01):        ld      r12, 0(r4)      C load s1 limb
+       addi    r4, r4, 8
+       ld      r0, 0(r5)       C load s2 limb
+       addi    r5, r5, 8
+       ADDSUBC r31, r0, r12    C add
+       std     r31, 0(r3)
+       addi    r3, r3, 8
+       bdnz    L(go)
+       b       L(ret)
+
+L(b10):        ld      r10, 0(r4)      C load s1 limb
+       ld      r11, 0(r5)      C load s2 limb
+       ld      r12, 8(r4)      C load s1 limb
+       addi    r4, r4, 16
+       ld      r0, 8(r5)       C load s2 limb
+       addi    r5, r5, 16
+       ADDSUBC r30, r11, r10   C add
+       ADDSUBC r31, r0, r12    C add
+       std     r30, 0(r3)
+       std     r31, 8(r3)
+       addi    r3, r3, 16
+       bdnz    L(go)
+       b       L(ret)
+
+L(b00):        C INITCY                C clear/set cy
+L(go): ld      r6, 0(r4)       C load s1 limb
+       ld      r7, 0(r5)       C load s2 limb
+       ld      r8, 8(r4)       C load s1 limb
+       ld      r9, 8(r5)       C load s2 limb
+       ld      r10, 16(r4)     C load s1 limb
+       ld      r11, 16(r5)     C load s2 limb
+       ld      r12, 24(r4)     C load s1 limb
+       ld      r0, 24(r5)      C load s2 limb
+       bdz     L(end)
+
+       addi    r4, r4, 32
+       addi    r5, r5, 32
+
+L(oop):        ADDSUBC r28, r7, r6
+       ld      r6, 0(r4)       C load s1 limb
+       ld      r7, 0(r5)       C load s2 limb
+       ADDSUBC r29, r9, r8
+       ld      r8, 8(r4)       C load s1 limb
+       ld      r9, 8(r5)       C load s2 limb
+       ADDSUBC r30, r11, r10
+       ld      r10, 16(r4)     C load s1 limb
+       ld      r11, 16(r5)     C load s2 limb
+       ADDSUBC r31, r0, r12
+       ld      r12, 24(r4)     C load s1 limb
+       ld      r0, 24(r5)      C load s2 limb
+       std     r28, 0(r3)
+       addi    r4, r4, 32
+       std     r29, 8(r3)
+       addi    r5, r5, 32
+       std     r30, 16(r3)
+       std     r31, 24(r3)
+       addi    r3, r3, 32
+       bdnz    L(oop)          C decrement ctr and loop back
+
+L(end):        ADDSUBC r28, r7, r6
+       ADDSUBC r29, r9, r8
+       ADDSUBC r30, r11, r10
+       ADDSUBC r31, r0, r12
+       std     r28, 0(r3)
+       std     r29, 8(r3)
+       std     r30, 16(r3)
+       std     r31, 24(r3)
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+
+       subfe   r3, r0, r0      C -cy
+       GENRVAL
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/bdiv_dbm1c.asm b/mpn/powerpc64/mode64/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..8c1e87e
--- /dev/null
+++ b/mpn/powerpc64/mode64/bdiv_dbm1c.asm
@@ -0,0 +1,116 @@
+dnl  PPC64 mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:    8.5
+C POWER5:           ?
+
+C TODO
+C  * Nothing to do...
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`bd', `r6')
+define(`cy', `r7')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+       ld      r0, 0(r4)
+
+       rldicl. r12, r5, 0,62
+       cmpldi  cr6, r12, 2
+       cmpldi  cr7, r5, 4
+       addi    r5, r5, 1
+       srwi    r5, r5, 2
+       mtctr   r5
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        mulld   r5, r0, r6
+       mulhdu  r12, r0, r6
+       ld      r0, 8(r4)
+       addi    r4, r4, -24
+       addi    r3, r3, -24
+       b       L(3)
+
+L(b00):        mulld   r9, r0, r6
+       mulhdu  r8, r0, r6
+       ld      r0, 8(r4)
+       addi    r4, r4, -16
+       addi    r3, r3, -16
+       b       L(0)
+
+L(b01):        mulld   r5, r0, r6
+       mulhdu  r12, r0, r6
+       addi    r3, r3, -8
+       ble     cr7, L(e1)
+       ld      r0, 8(r4)
+       addi    r4, r4, -8
+       b       L(1)
+
+L(b10):        mulld   r9, r0, r6
+       mulhdu  r8, r0, r6
+       ld      r0, 8(r4)
+       ble     cr7, L(e2)
+
+       ALIGN(16)
+L(top):        mulld   r5, r0, r6
+       mulhdu  r12, r0, r6
+       subfc   r11, r9, r7
+       ld      r0, 16(r4)
+       subfe   r7, r8, r11
+       std     r11, 0(r3)
+L(1):  mulld   r9, r0, r6
+       mulhdu  r8, r0, r6
+       subfc   r11, r5, r7
+       ld      r0, 24(r4)
+       subfe   r7, r12, r11
+       std     r11, 8(r3)
+L(0):  mulld   r5, r0, r6
+       mulhdu  r12, r0, r6
+       subfc   r11, r9, r7
+       ld      r0, 32(r4)
+       subfe   r7, r8, r11
+       std     r11, 16(r3)
+L(3):  mulld   r9, r0, r6
+       mulhdu  r8, r0, r6
+       subfc   r11, r5, r7
+       ld      r0, 40(r4)
+       subfe   r7, r12, r11
+       std     r11, 24(r3)
+       addi    r4, r4, 32
+       addi    r3, r3, 32
+       bdnz    L(top)
+
+L(e2): mulld   r5, r0, r6
+       mulhdu  r12, r0, r6
+       subfc   r11, r9, r7
+       subfe   r7, r8, r11
+       std     r11, 0(r3)
+L(e1): subfc   r11, r5, r7
+       std     r11, 8(r3)
+       subfe   r3, r12, r11
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/dive_1.asm b/mpn/powerpc64/mode64/dive_1.asm

new file mode 100644 (file)

index 0000000..1f482ba
--- /dev/null
+++ b/mpn/powerpc64/mode64/dive_1.asm
@@ -0,0 +1,118 @@
+dnl  PowerPC-64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    13-19
+C POWER4/PPC970:     16
+C POWER5:           16
+
+C TODO
+C  * Check if n=1 code is really an improvement.  It probably isn't.
+C  * Perhaps remove L(norm) code, it is currently unreachable.
+C  * Make more similar to mode1o.asm.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`d',  `r6')
+
+
+ASM_START()
+
+EXTERN(binvert_limb_table)
+
+PROLOGUE(mpn_divexact_1)
+       addic.  n, n, -1
+       ld      r12, 0(up)
+       bne     cr0, L(2)
+       divdu   r0, r12, d
+       std     r0, 0(rp)
+       blr
+L(2):
+       rldicl. r0, d, 0, 63
+       li      r10, 0
+       bne     cr0, L(7)
+       neg     r0, d
+       and     r0, d, r0
+       cntlzd  r0, r0
+       subfic  r0, r0, 63
+       rldicl  r10, r0, 0, 32
+       srd     d, d, r0
+L(7):
+       mtctr   n
+       LEA(    r5, binvert_limb_table)
+       rldicl  r11, d, 63, 57
+C      cmpdi   cr7, r0, 0
+       lbzx    r0, r5, r11
+       mulld   r9, r0, r0
+       sldi    r0, r0, 1
+       mulld   r9, d, r9
+       subf    r0, r9, r0
+       mulld   r5, r0, r0
+       sldi    r0, r0, 1
+       mulld   r5, d, r5
+       subf    r0, r5, r0
+       mulld   r9, r0, r0
+       sldi    r0, r0, 1
+       mulld   r9, d, r9
+       subf    r7, r9, r0              C r7 = 1/d mod 2^64
+C      beq     cr7, L(norm)
+       subfic  r8, r10, 64             C set carry as side effect
+       li      r5, 0
+
+       ALIGN(16)
+L(loop0):
+       srd     r11, r12, r10
+       ld      r12, 8(up)
+       addi    up, up, 8
+       sld     r0, r12, r8
+       or      r11, r11, r0
+       subfe   r9, r5, r11
+       mulld   r0, r7, r9
+       std     r0, 0(rp)
+       addi    rp, rp, 8
+       mulhdu  r5, r0, d
+       bdnz    L(loop0)
+
+       srd     r0, r12, r10
+       subfe   r0, r5, r0
+       mulld   r0, r7, r0
+       std     r0, 0(rp)
+       blr
+
+       ALIGN(16)
+L(norm):
+       mulld   r11, r12, r7
+       std     r11, 0(rp)
+       ALIGN(16)
+L(loop1):
+       mulhdu  r5, r11, d
+       ld      r9, 8(up)
+       addi    up, up, 8
+       subfe   r5, r5, r9
+       mulld   r11, r7, r5
+       std     r11, 8(rp)
+       addi    rp, rp, 8
+       bdnz    L(loop1)
+       blr
+EPILOGUE()
+ASM_END()
diff --git a/mpn/powerpc64/mode64/divrem_1.asm b/mpn/powerpc64/mode64/divrem_1.asm

new file mode 100644 (file)

index 0000000..895badf
--- /dev/null
+++ b/mpn/powerpc64/mode64/divrem_1.asm
@@ -0,0 +1,308 @@
+dnl  PowerPC-64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles/limb
+C                      norm    unorm   frac
+C POWER3/PPC630                16-34   16-34   ~11
+C POWER4/PPC970                 29              19
+C POWER5                29      29     ~20
+
+C INPUT PARAMETERS
+C qp  = r3
+C fn  = r4
+C up  = r5
+C un  = r6
+C d   = r7
+
+C We use a not very predictable branch in the frac code, therefore the cycle
+C count wobbles somewhat.  With the alternative branch-free code, things run
+C considerably slower on POWER4/PPC970 and POWER5.
+
+C Add preinv entry point.
+
+
+ASM_START()
+
+EXTERN_FUNC(mpn_invert_limb)
+
+PROLOGUE(mpn_divrem_1)
+
+       mfcr    r12
+       add.    r10, r6, r4
+       std     r25, -56(r1)
+       mr      r25, r4
+       mflr    r0
+       std     r26, -48(r1)
+       mr      r26, r5
+       std     r28, -32(r1)
+       mr      r28, r6
+       std     r29, -24(r1)
+       mr      r29, r3
+       li      r3, 0
+       std     r30, -16(r1)
+       mr      r30, r7
+       std     r31, -8(r1)
+       li      r31, 0
+       std     r27, -40(r1)
+       std     r0, 16(r1)
+       stw     r12, 8(r1)
+       stdu    r1, -176(r1)
+       beq-    cr0, L(1)
+       cmpdi   cr7, r7, 0
+       sldi    r0, r10, 3
+       add     r11, r0, r29
+       addi    r29, r11, -8
+       blt-    cr7, L(162)
+       cmpdi   cr4, r6, 0
+       beq+    cr4, L(71)
+L(163):
+       sldi    r9, r6, 3
+       add     r9, r9, r5
+       ld      r7, -8(r9)
+       cmpld   cr7, r7, r30
+       bge-    cr7, L(71)
+       cmpdi   cr7, r10, 1
+       li      r0, 0
+       mr      r31, r7
+       std     r0, -8(r11)
+       addi    r29, r29, -8
+       mr      r3, r7
+       beq-    cr7, L(1)
+       addi    r28, r6, -1
+       cmpdi   cr4, r28, 0
+L(71):
+       cntlzd  r27, r30
+       sld     r30, r30, r27
+       sld     r31, r31, r27
+       mr      r3, r30
+       CALL(   mpn_invert_limb)
+       nop
+       beq-    cr4, L(110)
+       sldi    r9, r28, 3
+       addic.  r6, r28, -2
+       add     r9, r9, r26
+       subfic  r5, r27, 64
+       ld      r8, -8(r9)
+       srd     r0, r8, r5
+       or      r31, r31, r0
+       sld     r7, r8, r27
+       blt-    cr0, L(154)
+       addi    r28, r28, -1
+       mtctr   r28
+       sldi    r6, r6, 3
+       ALIGN(16)
+L(uloop):
+       addi    r11, r31, 1
+       ldx     r8, r26, r6
+       mulld   r0, r31, r3
+       mulhdu  r10, r31, r3
+       addi    r6, r6, -8
+       srd     r9, r8, r5
+       or      r9, r7, r9
+       addc    r0, r0, r9
+       adde    r10, r10, r11
+       mulld   r31, r10, r30
+       subf    r31, r31, r9
+       subfc   r0, r0, r31     C r >= ql
+       subfe   r0, r0, r0      C r0 = -(r >= ql)
+       not     r7, r0
+       add     r10, r7, r10    C qh -= (r >= ql)
+       andc    r0, r30, r0
+       add     r31, r31, r0
+       cmpld   cr7, r31, r30
+       bge-    cr7, L(164)
+L(123):
+       std     r10, 0(r29)
+       addi    r29, r29, -8
+       sld     r7, r8, r27
+       bdnz    L(uloop)
+L(154):
+       addi    r11, r31, 1
+       nop
+       mulld   r0, r31, r3
+       mulhdu  r8, r31, r3
+       addc    r0, r0, r7
+       adde    r8, r8, r11
+       mulld   r31, r8, r30
+       subf    r31, r31, r7
+       subfc   r0, r0, r31     C r >= ql
+       subfe   r0, r0, r0      C r0 = -(r >= ql)
+       not     r7, r0
+       add     r8, r7, r8      C qh -= (r >= ql)
+       andc    r0, r30, r0
+       add     r31, r31, r0
+       cmpld   cr7, r31, r30
+       bge-    cr7, L(165)
+L(134):
+       std     r8, 0(r29)
+       addi    r29, r29, -8
+L(110):
+       addic.  r0, r25, -1
+       blt-    cr0, L(156)
+       mtctr   r25
+       neg     r9, r30
+       ALIGN(16)
+L(ufloop):
+       addi    r11, r31, 1
+       nop
+       mulld   r7, r3, r31
+       mulhdu  r10, r3, r31
+       add     r10, r10, r11
+       mulld   r31, r9, r10
+ifelse(0,1,`
+       subfc   r0, r7, r31
+       subfe   r0, r0, r0      C r0 = -(r >= ql)
+       not     r7, r0
+       add     r10, r7, r10    C qh -= (r >= ql)
+       andc    r0, r30, r0
+       add     r31, r31, r0
+',`
+       cmpld   cr7, r31, r7
+       blt     cr7, L(29)
+       add     r31, r30, r31
+       addi    r10, r10, -1
+L(29):
+')
+       std     r10, 0(r29)
+       addi    r29, r29, -8
+       bdnz    L(ufloop)
+L(156):
+       srd     r3, r31, r27
+L(1):
+       addi    r1, r1, 176
+       ld      r0, 16(r1)
+       lwz     r12, 8(r1)
+       mtlr    r0
+       ld      r25, -56(r1)
+       ld      r26, -48(r1)
+       mtcrf   8, r12
+       ld      r27, -40(r1)
+       ld      r28, -32(r1)
+       ld      r29, -24(r1)
+       ld      r30, -16(r1)
+       ld      r31, -8(r1)
+       blr
+L(162):
+       cmpdi   cr7, r6, 0
+       beq-    cr7, L(8)
+       sldi    r9, r6, 3
+       addi    r29, r29, -8
+       add     r9, r9, r5
+       addi    r28, r6, -1
+       ld      r31, -8(r9)
+       subfc   r9, r7, r31
+       li      r9, 0
+       adde    r9, r9, r9
+       neg     r0, r9
+       std     r9, -8(r11)
+       and     r0, r0, r7
+       subf    r31, r0, r31
+L(8):
+L(10):
+       mr      r3, r30
+       CALL(   mpn_invert_limb)
+       nop
+       addic.  r6, r28, -1
+       blt-    cr0, L(150)
+       mtctr   r28
+       sldi    r6, r6, 3
+       ALIGN(16)
+L(nloop):
+       addi    r11, r31, 1
+       ldx     r8, r26, r6
+       mulld   r0, r31, r3
+       addi    r6, r6, -8
+       mulhdu  r10, r31, r3
+       addc    r7, r0, r8
+       adde    r10, r10, r11
+       mulld   r31, r10, r30
+       subf    r31, r31, r8    C r = nl - qh * d
+       subfc   r0, r7, r31     C r >= ql
+       subfe   r0, r0, r0      C r0 = -(r >= ql)
+       not     r7, r0
+       add     r10, r7, r10    C qh -= (r >= ql)
+       andc    r0, r30, r0
+       add     r31, r31, r0
+       cmpld   cr7, r31, r30
+       bge-    cr7, L(167)
+L(51):
+       std     r10, 0(r29)
+       addi    r29, r29, -8
+       bdnz    L(nloop)
+
+L(150):
+       addic.  r9, r25, -1
+       blt-    cr0, L(152)
+       mtctr   r25
+       neg     r9, r30
+       ALIGN(16)
+L(nfloop):
+       addi    r11, r31, 1
+       nop
+       mulld   r7, r3, r31
+       mulhdu  r10, r3, r31
+       add     r10, r10, r11
+       mulld   r31, r9, r10
+ifelse(0,1,`
+       subfc   r0, r7, r31
+       subfe   r0, r0, r0      C r0 = -(r >= ql)
+       not     r7, r0
+       add     r10, r7, r10    C qh -= (r >= ql)
+       andc    r0, r30, r0
+       add     r31, r31, r0
+',`
+       cmpld   cr7, r31, r7
+       blt     cr7, L(28)
+       add     r31, r30, r31
+       addi    r10, r10, -1
+L(28):
+')
+       std     r10, 0(r29)
+       addi    r29, r29, -8
+       bdnz    L(nfloop)
+L(152):
+       addi    r1, r1, 176
+       mr      r3, r31
+       ld      r0, 16(r1)
+       lwz     r12, 8(r1)
+       mtlr    r0
+       ld      r25, -56(r1)
+       ld      r26, -48(r1)
+       mtcrf   8, r12
+       ld      r27, -40(r1)
+       ld      r28, -32(r1)
+       ld      r29, -24(r1)
+       ld      r30, -16(r1)
+       ld      r31, -8(r1)
+       blr
+L(164):
+       subf    r31, r30, r31
+       addi    r10, r10, 1
+       b       L(123)
+L(167):
+       subf    r31, r30, r31
+       addi    r10, r10, 1
+       b       L(51)
+L(165):
+       subf    r31, r30, r31
+       addi    r8, r8, 1
+       b       L(134)
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/divrem_2.asm b/mpn/powerpc64/mode64/divrem_2.asm

new file mode 100644 (file)

index 0000000..369b5c1
--- /dev/null
+++ b/mpn/powerpc64/mode64/divrem_2.asm
@@ -0,0 +1,178 @@
+dnl  PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                      cycles/limb
+C                      norm    frac
+C POWER3/PPC630
+C POWER4/PPC970                39*     39*
+C POWER5               39*     39*
+
+C STATUS
+C  * Performace fluctuates like crazy
+
+C INPUT PARAMETERS
+C qp  = r3
+C fn  = r4
+C up  = r5
+C un  = r6
+C dp  = r7
+
+
+ifdef(`DARWIN',,`
+define(`r2',`r31')')           C FIXME!
+
+ASM_START()
+
+EXTERN_FUNC(mpn_invert_limb)
+
+PROLOGUE(mpn_divrem_2)
+       mflr    r0
+       std     r23, -72(r1)
+       std     r24, -64(r1)
+       std     r25, -56(r1)
+       std     r26, -48(r1)
+       std     r27, -40(r1)
+       std     r28, -32(r1)
+       std     r29, -24(r1)
+       std     r30, -16(r1)
+       std     r31, -8(r1)
+       std     r0, 16(r1)
+       stdu    r1, -192(r1)
+       mr      r24, r3
+       mr      r25, r4
+       sldi    r0, r6, 3
+       add     r26, r5, r0
+       addi    r26, r26, -24
+       ld      r30, 8(r7)
+       ld      r28, 0(r7)
+       ld      r29, 16(r26)
+       ld      r31, 8(r26)
+
+ifelse(0,1,`
+       li      r23, 0
+       cmpld   cr7, r29, r30
+       blt     cr7, L(8)
+       bgt     cr7, L(9)
+       cmpld   cr0, r31, r28
+       blt     cr0, L(8)
+L(9):  subfc   r31, r28, r31
+       subfe   r29, r30, r29
+       li      r23, 1
+',`
+       li      r23, 0
+       cmpld   cr7, r29, r30
+       blt     cr7, L(8)
+       mfcr    r0
+       rlwinm  r0, r0, 30, 1
+       subfc   r9, r28, r31
+       addze.  r0, r0
+       nop
+       beq     cr0, L(8)
+       subfc   r31, r28, r31
+       subfe   r29, r30, r29
+       li      r23, 1
+')
+
+L(8):
+       add     r27, r25, r6
+       addic.  r27, r27, -3
+       blt     cr0, L(18)
+       mr      r3, r30
+       CALL(   mpn_invert_limb)
+       nop
+       mulld   r10, r3, r30
+       mulhdu  r0, r3, r28
+       addc    r8, r10, r28
+       subfe   r11, r1, r1
+       addc    r10, r8, r0
+       addze.  r11, r11
+       blt     cr0, L(91)
+L(40):
+       subfc   r10, r30, r10
+       addme.  r11, r11
+       addi    r3, r3, -1
+       bge     cr0, L(40)
+L(91):
+       addi    r5, r27,  1
+       mtctr   r5
+       sldi    r0, r27, 3
+       add     r24, r24, r0
+       ALIGN(16)
+L(loop):
+       mulhdu  r8, r29, r3
+       mulld   r6, r29, r3
+       addc    r6, r6, r31
+       adde    r8, r8, r29
+       mulld   r0, r30, r8
+       subf    r31, r0, r31
+       mulhdu  r11, r28, r8
+       mulld   r10, r28, r8
+       li      r7, 0
+       cmpd    cr7, r27, r25
+       blt     cr7, L(60)
+       ld      r7, 0(r26)
+       addi    r26, r26, -8
+       nop
+L(60): subfc   r7, r28, r7
+       subfe   r31, r30, r31
+       subfc   r7, r10, r7
+       subfe   r4, r11, r31
+       subfc   r9, r6, r4
+       subfe   r9, r1, r1
+       andc    r6, r28, r9
+       andc    r0, r30, r9
+       addc    r31, r7, r6
+       adde    r29, r4, r0
+       subf    r8, r9, r8
+       cmpld   cr7, r29, r30
+       bge-    cr7, L(fix)
+L(bck):        std     r8, 0(r24)
+       addi    r24, r24, -8
+       addi    r27, r27, -1
+       bdnz    L(loop)
+L(18):
+       std     r31, 8(r26)
+       std     r29, 16(r26)
+       mr      r3, r23
+       addi    r1, r1, 192
+       ld      r0, 16(r1)
+       mtlr    r0
+       ld      r23, -72(r1)
+       ld      r24, -64(r1)
+       ld      r25, -56(r1)
+       ld      r26, -48(r1)
+       ld      r27, -40(r1)
+       ld      r28, -32(r1)
+       ld      r29, -24(r1)
+       ld      r30, -16(r1)
+       ld      r31, -8(r1)
+       blr
+L(fix):
+       mfcr    r0
+       rlwinm  r0, r0, 30, 1
+       subfc   r9, r28, r31
+       addze.  r0, r0
+       beq     cr0, L(bck)
+       subfc   r31, r28, r31
+       subfe   r29, r30, r29
+       addi    r8, r8, 1
+       b       L(bck)
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/gmp-mparam.h b/mpn/powerpc64/mode64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..23d1ce5
--- /dev/null
+++ b/mpn/powerpc64/mode64/gmp-mparam.h
@@ -0,0 +1,71 @@
+/* PowerPC-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1600MHz PPC970 */
+
+/* Generated by tuneup.c, 2009-01-14, gcc 4.0 */
+
+#define MUL_TOOM22_THRESHOLD             14
+#define MUL_TOOM33_THRESHOLD             93
+#define MUL_TOOM44_THRESHOLD            135
+
+#define SQR_BASECASE_THRESHOLD            6
+#define SQR_TOOM2_THRESHOLD              32
+#define SQR_TOOM3_THRESHOLD              74
+#define SQR_TOOM4_THRESHOLD             136
+
+#define MULLO_BASECASE_THRESHOLD          0  /* always */
+#define MULLO_DC_THRESHOLD               44
+#define MULLO_MUL_N_THRESHOLD           234
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 33
+#define POWM_THRESHOLD                   89
+
+#define MATRIX22_STRASSEN_THRESHOLD      15
+#define HGCD_THRESHOLD                   93
+#define GCD_DC_THRESHOLD                237
+#define GCDEXT_DC_THRESHOLD             273
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 6
+#define MOD_1_2_THRESHOLD                 9
+#define MOD_1_4_THRESHOLD                23
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  0
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             12
+#define GET_STR_PRECOMPUTE_THRESHOLD     24
+#define SET_STR_DC_THRESHOLD            650
+#define SET_STR_PRECOMPUTE_THRESHOLD   1713
+
+#define MUL_FFT_TABLE  { 336, 672, 1856, 2816, 7168, 20480, 81920, 327680, 0 }
+#define MUL_FFT_MODF_THRESHOLD          304
+#define MUL_FFT_THRESHOLD              4224
+
+#define SQR_FFT_TABLE  { 272, 672, 1600, 2816, 7168, 20480, 81920, 327680, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD          272
+#define SQR_FFT_THRESHOLD              2688
diff --git a/mpn/powerpc64/mode64/invert_limb.asm b/mpn/powerpc64/mode64/invert_limb.asm

new file mode 100644 (file)

index 0000000..02a67a3
--- /dev/null
+++ b/mpn/powerpc64/mode64/invert_limb.asm
@@ -0,0 +1,109 @@
+dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     75 (including call+ret)
+
+C TODO:
+C   * Pair multiply instructions.
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+       LEAL(   r12, approx_tab)
+
+       srdi    r11, r3, 32             C r11 = d >> 32
+       rlwinm  r9, r11, 10, 23, 30     C r9 = ((d >> 55) & 0xff) << 1
+       lhzx    r0, r12, r9             C load initial approximation
+       rldic   r10, r0, 6, 42
+       mulld   r8, r10, r10
+       sldi    r9, r10, 17
+       mulld   r0, r8, r11
+       srdi    r0, r0, 31
+       subf    r10, r0, r9
+       mulld   r8, r10, r10
+       sldi    r11, r10, 33
+       mulhdu  r0, r8, r3
+       sldi    r9, r0, 1
+       subf    r10, r9, r11
+       sldi    r11, r10, 2
+       mulhdu  r0, r10, r10
+       mulld   r8, r10, r10
+       mulhdu  r10, r8, r3
+       mulld   r9, r0, r3
+       mulhdu  r0, r0, r3
+       addc    r8, r9, r10
+       addze   r10, r0
+       srdi    r0, r8, 62
+       rldimi  r0, r10, 2, 0
+       sldi    r9, r8, 2
+       subfic  r10, r9, 0
+       subfe   r8, r0, r11
+       mulhdu  r10, r3, r8
+       add     r10, r10, r3
+       mulld   r9, r3, r8
+       subf    r11, r10, r8
+       addi    r0, r10, 1
+       addi    r8, r11, -1
+       and     r0, r3, r0
+       addc    r11, r9, r0
+       addze   r10, r10
+       addc    r0, r11, r3
+       addze   r10, r10
+       subf    r3, r10, r8
+       blr
+EPILOGUE()
+
+DEF_OBJECT(approx_tab)
+       .short  1023,1020,1016,1012,1008,1004,1000,996
+       .short  992,989,985,981,978,974,970,967
+       .short  963,960,956,953,949,946,942,939
+       .short  936,932,929,926,923,919,916,913
+       .short  910,907,903,900,897,894,891,888
+       .short  885,882,879,876,873,870,868,865
+       .short  862,859,856,853,851,848,845,842
+       .short  840,837,834,832,829,826,824,821
+       .short  819,816,814,811,809,806,804,801
+       .short  799,796,794,791,789,787,784,782
+       .short  780,777,775,773,771,768,766,764
+       .short  762,759,757,755,753,751,748,746
+       .short  744,742,740,738,736,734,732,730
+       .short  728,726,724,722,720,718,716,714
+       .short  712,710,708,706,704,702,700,699
+       .short  697,695,693,691,689,688,686,684
+       .short  682,680,679,677,675,673,672,670
+       .short  668,667,665,663,661,660,658,657
+       .short  655,653,652,650,648,647,645,644
+       .short  642,640,639,637,636,634,633,631
+       .short  630,628,627,625,624,622,621,619
+       .short  618,616,615,613,612,611,609,608
+       .short  606,605,604,602,601,599,598,597
+       .short  595,594,593,591,590,589,587,586
+       .short  585,583,582,581,579,578,577,576
+       .short  574,573,572,571,569,568,567,566
+       .short  564,563,562,561,560,558,557,556
+       .short  555,554,553,551,550,549,548,547
+       .short  546,544,543,542,541,540,539,538
+       .short  537,536,534,533,532,531,530,529
+       .short  528,527,526,525,524,523,522,521
+       .short  520,519,518,517,516,515,514,513
+END_OBJECT(approx_tab)
+ASM_END()
diff --git a/mpn/powerpc64/mode64/mod_34lsub1.asm b/mpn/powerpc64/mode64/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..ca46c39
--- /dev/null
+++ b/mpn/powerpc64/mode64/mod_34lsub1.asm
@@ -0,0 +1,119 @@
+dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^24-1.
+
+dnl  Copyright 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1.33
+C POWER4/PPC970:     1.5
+C POWER5:           1.57
+
+C INPUT PARAMETERS
+define(`up',`r3')
+define(`n',`r4')
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+       li      r8, 0
+       li      r9, 0
+       li      r10, 0
+       li      r11, 0
+
+       cmpdi   cr6, n, 3
+       blt     cr6, L(lt3)
+
+       li      r0, -0x5556             C 0xFFFFFFFFFFFFAAAA
+       rldimi  r0, r0, 16, 32          C 0xFFFFFFFFAAAAAAAA
+       rldimi  r0, r0, 32, 63          C 0xAAAAAAAAAAAAAAAB
+       mulhdu  r0, r0, n
+       srdi    r0, r0, 1               C r0 = [n / 3]
+       mtctr   r0
+
+       ld      r5, 0(up)
+       ld      r6, 8(up)
+       ld      r7, 16(up)
+       addi    up, up, 24
+       bdz     L(end)
+
+       ALIGN(16)
+L(top):        addc    r8, r8, r5
+       nop
+       ld      r5, 0(up)
+       adde    r9, r9, r6
+       ld      r6, 8(up)
+       adde    r10, r10, r7
+       ld      r7, 16(up)
+       addi    up, up, 48
+       addze   r11, r11
+       bdz     L(endx)
+       addc    r8, r8, r5
+       nop
+       ld      r5, -24(up)
+       adde    r9, r9, r6
+       ld      r6, -16(up)
+       adde    r10, r10, r7
+       ld      r7, -8(up)
+       addze   r11, r11
+       bdnz    L(top)
+
+       addi    up, up, 24
+L(endx):
+       addi    up, up, -24
+
+L(end):        addc    r8, r8, r5
+       adde    r9, r9, r6
+       adde    r10, r10, r7
+       addze   r11, r11
+
+       sldi    r5, r0, 1
+       add     r5, r5, r0              C r11 = n / 3 * 3
+       sub     n, n, r5                C n = n mod 3
+L(lt3):        cmpdi   cr6, n, 1
+       blt     cr6, L(2)
+
+       ld      r5, 0(up)
+       addc    r8, r8, r5
+       li      r6, 0
+       beq     cr6, L(1)
+
+       ld      r6, 8(up)
+L(1):  adde    r9, r9, r6
+       addze   r10, r10
+       addze   r11, r11
+
+L(2):  rldicl  r0, r8, 0, 16           C r0 = r8 mod 2^48
+       srdi    r3, r8, 48              C r3 = r8 div 2^48
+       rldic   r4, r9, 16, 16          C r4 = (r9 mod 2^32) << 16
+       srdi    r5, r9, 32              C r5 = r9 div 2^32
+       rldic   r6, r10, 32, 16         C r6 = (r10 mod 2^16) << 32
+       srdi    r7, r10, 16             C r7 = r10 div 2^16
+
+       add     r0, r0, r3
+       add     r4, r4, r5
+       add     r6, r6, r7
+
+       add     r0, r0, r4
+       add     r6, r6, r11
+
+       add     r3, r0, r6
+       blr
+EPILOGUE()
+
+C |__r10__|__r9___|__r8___|
+C |-----|-----|-----|-----|
diff --git a/mpn/powerpc64/mode64/mode1o.asm b/mpn/powerpc64/mode64/mode1o.asm

new file mode 100644 (file)

index 0000000..489ca85
--- /dev/null
+++ b/mpn/powerpc64/mode64/mode1o.asm
@@ -0,0 +1,104 @@
+dnl  PowerPC-64 mpn_modexact_1_odd -- mpn by limb exact remainder.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C POWER3/PPC630:    13-19
+C POWER4/PPC970:     16
+C POWER5:            16
+
+C TODO
+C  * Check if n=1 code is really an improvement.  It probably isn't.
+C  * Make more similar to dive_1.asm.
+
+C INPUT PARAMETERS
+define(`up', `r3')
+define(`n',  `r4')
+define(`d',  `r5')
+define(`cy', `r6')
+
+
+ASM_START()
+
+EXTERN(binvert_limb_table)
+
+PROLOGUE(mpn_modexact_1c_odd)
+       addic.  n, n, -1                C set carry as side effect
+       ld      r8, 0(up)
+       bne     cr0, L(2)
+       cmpld   cr7, r6, r8
+       bge     cr7, L(4)
+       subf    r8, r6, r8
+       divdu   r3, r8, d
+       mulld   r3, r3, d
+       subf.   r3, r3, r8
+       beqlr   cr0
+       subf    r3, r3, d
+       blr
+
+L(4):  subf    r3, r8, r6
+       divdu   r8, r3, d
+       mulld   r8, r8, d
+       subf    r3, r8, r3
+       blr
+
+L(2):  LEA(    r7, binvert_limb_table)
+       rldicl  r9, d, 63, 57
+       mtctr   n
+       lbzx    r0, r7, r9
+       mulld   r7, r0, r0
+       sldi    r0, r0, 1
+       mulld   r7, d, r7
+       subf    r0, r7, r0
+       mulld   r9, r0, r0
+       sldi    r0, r0, 1
+       mulld   r9, d, r9
+       subf    r0, r9, r0
+       mulld   r7, r0, r0
+       sldi    r0, r0, 1
+       mulld   r7, d, r7
+       subf    r9, r7, r0
+
+       ALIGN(16)
+L(loop):
+       subfe   r0, r6, r8
+       ld      r8, 8(up)
+       addi    up, up, 8
+       mulld   r0, r9, r0
+       mulhdu  r6, r0, d
+       bdnz    L(loop)
+
+       cmpld   cr7, d, r8
+       blt     cr7, L(10)
+
+       subfe   r0, r0, r0
+       subf    r6, r0, r6
+       cmpld   cr7, r6, r8
+       subf    r3, r8, r6
+       bgelr   cr7
+       add     r3, d, r3
+       blr
+
+L(10): subfe   r0, r6, r8
+       mulld   r0, r9, r0
+       mulhdu  r3, r0, d
+       blr
+EPILOGUE()
+ASM_END()
diff --git a/mpn/powerpc64/mode64/mul_1.asm b/mpn/powerpc64/mode64/mul_1.asm

new file mode 100644 (file)

index 0000000..8f644d8
--- /dev/null
+++ b/mpn/powerpc64/mode64/mul_1.asm
@@ -0,0 +1,167 @@
+dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     6-18
+C POWER4/PPC970:     7.25
+C POWER5:            7.75
+
+C TODO
+C  * Try to reduce the number of needed live registers (at least r5 and r10
+C    could be combined)
+C  * Optimize feed-in code, for speed and size.
+C  * Clean up r12/r7 usage in feed-in code.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vl', `r6')
+
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+       std     r27, -40(r1)
+       std     r26, -48(r1)
+       mr      r12, r7
+       b       L(ent)
+EPILOGUE()
+PROLOGUE(mpn_mul_1)
+       std     r27, -40(r1)
+       std     r26, -48(r1)
+       li      r12, 0          C cy_limb = 0
+L(ent):        ld      r26, 0(up)
+
+       rldicl. r0, n, 0,62     C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addic   n, n, 3         C compute count...
+       srdi    n, n, 2         C ...for ctr
+       mtctr   n               C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        mr      r7, r12
+       mulld   r0, r26, r6
+       mulhdu  r12, r26, r6
+       addi    up, up, 8
+       addc    r0, r0, r7
+       std     r0, 0(rp)
+       addi    rp, rp, 8
+       b       L(fic)
+
+L(b00):        ld      r27, 8(up)
+       addi    up, up, 16
+       mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       addc    r0, r0, r12
+       adde    r7, r7, r5
+       addze   r12, r8
+       std     r0, 0(rp)
+       std     r7, 8(rp)
+       addi    rp, rp, 16
+       b       L(fic)
+
+       nop                     C alignment
+L(b01):        bdnz    L(gt1)
+       mulld   r0, r26, r6
+       mulhdu  r8, r26, r6
+       addc    r0, r0, r12
+       std     r0, 0(rp)
+       b       L(ret)
+L(gt1):        ld      r27, 8(up)
+       nop
+       mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+       ld      r26, 16(up)
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       mulld   r9, r26, r6
+       mulhdu  r10, r26, r6
+       addc    r0, r0, r12
+       adde    r7, r7, r5
+       adde    r9, r9, r8
+       addze   r12, r10
+       std     r0, 0(rp)
+       std     r7, 8(rp)
+       std     r9, 16(rp)
+       addi    up, up, 24
+       addi    rp, rp, 24
+       b       L(fic)
+
+       nop
+L(fic):        ld      r26, 0(up)
+L(b10):        ld      r27, 8(up)
+       addi    up, up, 16
+       bdz     L(end)
+
+L(top):        mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+       ld      r26, 0(up)
+       nop
+
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r27, 8(up)
+       nop
+
+       adde    r0, r0, r12
+       adde    r7, r7, r5
+
+       mulld   r9, r26, r6
+       mulhdu  r10, r26, r6
+       ld      r26, 16(up)
+       nop
+
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6
+       ld      r27, 24(up)
+
+       std     r0, 0(rp)
+       adde    r9, r9, r8
+       std     r7, 8(rp)
+       adde    r11, r11, r10
+       std     r9, 16(rp)
+       addi    up, up, 32
+       std     r11, 24(rp)
+
+       addi    rp, rp, 32
+       bdnz    L(top)
+
+L(end):        mulld   r0, r26, r6
+       mulhdu  r5, r26, r6
+
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+
+       adde    r0, r0, r12
+       adde    r7, r7, r5
+
+       std     r0, 0(rp)
+       std     r7, 8(rp)
+L(ret):        addze   r3, r8
+       ld      r27, -40(r1)
+       ld      r26, -48(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/mul_basecase.asm b/mpn/powerpc64/mode64/mul_basecase.asm

new file mode 100644 (file)

index 0000000..cea5417
--- /dev/null
+++ b/mpn/powerpc64/mode64/mul_basecase.asm
@@ -0,0 +1,698 @@
+dnl  PowerPC-64 mpn_basecase.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:     8
+C POWER5:            8
+
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`un', `r5')
+define(`vp', `r6')
+define(`vn', `r7')
+
+define(`v0',      `r25')
+define(`outer_rp', `r22')
+define(`outer_up', `r23')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+
+C Special code for un <= 2, for efficiency of these important cases,
+C and since it simplifies the default code.
+       cmpdi   cr0, un, 2
+       bgt     cr0, L(un_gt2)
+       cmpdi   cr6, vn, 1
+       ld      r7, 0(vp)
+       ld      r5, 0(up)
+       mulld   r8, r5, r7      C weight 0
+       mulhdu  r9, r5, r7      C weight 1
+       std     r8, 0(rp)
+       beq     cr0, L(2x)
+       std     r9, 8(rp)
+       blr
+       ALIGN(16)
+L(2x): ld      r0, 8(up)
+       mulld   r8, r0, r7      C weight 1
+       mulhdu  r10, r0, r7     C weight 2
+       addc    r9, r9, r8
+       addze   r10, r10
+       bne     cr6, L(2x2)
+       std     r9, 8(rp)
+       std     r10, 16(rp)
+       blr
+       ALIGN(16)
+L(2x2):        ld      r6, 8(vp)
+       nop
+       mulld   r8, r5, r6      C weight 1
+       mulhdu  r11, r5, r6     C weight 2
+       addc    r9, r9, r8
+       std     r9, 8(rp)
+       adde    r11, r11, r10
+       mulld   r12, r0, r6     C weight 2
+       mulhdu  r0, r0, r6      C weight 3
+       addze   r0, r0
+       addc    r11, r11, r12
+       addze   r0, r0
+       std     r11, 16(rp)
+       std     r0, 24(rp)
+       blr
+
+L(un_gt2):
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+       std     r27, -40(r1)
+       std     r26, -48(r1)
+       std     r25, -56(r1)
+       std     r24, -64(r1)
+       std     r23, -72(r1)
+       std     r22, -80(r1)
+
+       mr      outer_rp, rp
+       mr      outer_up, up
+
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, 0(up)
+
+       rldicl. r0, un, 0,62    C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    un, un, 1       C compute count...
+       srdi    un, un, 2       C ...for ctr
+       mtctr   un              C copy inner loop count into ctr
+       beq     cr0, L(b0)
+       blt     cr6, L(b1)
+       beq     cr6, L(b2)
+
+
+       ALIGN(16)
+L(b3): mulld   r0, r26, v0
+       mulhdu  r12, r26, v0
+       addic   r0, r0, 0
+       std     r0, 0(rp)
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       bdz     L(end_m_3)
+
+       ALIGN(16)
+L(lo_m_3):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       ld      r26, 24(up)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r27, 32(up)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       mulld   r9, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r26, 40(up)
+       nop
+       mulld   r11, r27, v0
+       mulhdu  r12, r27, v0
+       ld      r27, 48(up)
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r10
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(lo_m_3)
+
+       ALIGN(16)
+L(end_m_3):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+
+       std     r0, 8(rp)
+       std     r24, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       addic.  vn, vn, -1
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_3):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 8
+       mr      up, outer_up
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, 0(up)
+       ld      r28, 0(rp)
+       mulld   r0, r26, v0
+       mulhdu  r12, r26, v0
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       bdz     L(end_3)
+
+       ALIGN(16)               C registers dying
+L(lo_3):
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 24(up)     C
+       ld      r28, 8(rp)      C
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       ld      r27, 32(up)     C
+       ld      r29, 16(rp)     C
+       adde    r0, r0, r12     C 0 12
+       adde    r24, r24, r10   C 24 10
+       mulld   r9, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 40(up)     C
+       ld      r30, 24(rp)     C
+       mulld   r11, r27, v0    C
+       mulhdu  r12, r27, v0    C 27
+       ld      r27, 48(up)     C
+       ld      r31, 32(rp)     C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 8(rp)       C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, 16(rp)     C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, 24(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 32(rp)     C 11
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       bdnz    L(lo_3) C
+
+       ALIGN(16)
+L(end_3):
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r28, 8(rp)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r29, 16(rp)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r10
+       addze   r8, r8
+       addc    r0, r0, r28
+       std     r0, 8(rp)
+       adde    r24, r24, r29
+       std     r24, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+
+       addic.  vn, vn, -1
+       bne     L(outer_lo_3)
+       b       L(ret)
+
+
+       ALIGN(16)
+L(b0): ld      r27, 8(up)
+       addi    up, up, 8
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       addc    r24, r24, r10
+       addze   r12, r8
+       std     r0, 0(rp)
+       std     r24, 8(rp)
+       addi    rp, rp, 8
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       bdz     L(end_m_0)
+
+       ALIGN(16)
+L(lo_m_0):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       ld      r26, 24(up)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r27, 32(up)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       mulld   r9, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r26, 40(up)
+       nop
+       mulld   r11, r27, v0
+       mulhdu  r12, r27, v0
+       ld      r27, 48(up)
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r10
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(lo_m_0)
+
+       ALIGN(16)
+L(end_m_0):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+
+       std     r0, 8(rp)
+       addze   r8, r8
+       std     r24, 16(rp)
+       addic.  vn, vn, -1
+       std     r8, 24(rp)
+       nop
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_0):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 16
+       addi    up, outer_up, 8
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, -8(up)
+       ld      r27, 0(up)
+       ld      r28, -8(rp)
+       ld      r29, 0(rp)
+       nop
+       nop
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       addc    r24, r24, r10
+       addze   r12, r8
+       addc    r0, r0, r28
+       std     r0, -8(rp)
+       adde    r24, r24, r29
+       std     r24, 0(rp)
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       bdz     L(end_0)
+
+       ALIGN(16)               C registers dying
+L(lo_0):
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 24(up)     C
+       ld      r28, 8(rp)      C
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       ld      r27, 32(up)     C
+       ld      r29, 16(rp)     C
+       adde    r0, r0, r12     C 0 12
+       adde    r24, r24, r10   C 24 10
+       mulld   r9, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 40(up)     C
+       ld      r30, 24(rp)     C
+       mulld   r11, r27, v0    C
+       mulhdu  r12, r27, v0    C 27
+       ld      r27, 48(up)     C
+       ld      r31, 32(rp)     C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 8(rp)       C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, 16(rp)     C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, 24(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 32(rp)     C 11
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       bdnz    L(lo_0) C
+
+       ALIGN(16)
+L(end_0):
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r28, 8(rp)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r29, 16(rp)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r10
+       addze   r8, r8
+       addic.  vn, vn, -1
+       addc    r0, r0, r28
+       std     r0, 8(rp)
+       adde    r24, r24, r29
+       std     r24, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       bne     L(outer_lo_0)
+       b       L(ret)
+
+
+       ALIGN(16)
+L(b1): ld      r27, 8(up)
+       nop
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       ld      r26, 16(up)
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r26, v0
+       mulhdu  r10, r26, v0
+       addc    r24, r24, r31
+       adde    r9, r9, r8
+       addze   r12, r10
+       std     r0, 0(rp)
+       std     r24, 8(rp)
+       std     r9, 16(rp)
+       addi    up, up, 16
+       addi    rp, rp, 16
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       bdz     L(end_m_1)
+
+       ALIGN(16)
+L(lo_m_1):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       ld      r26, 24(up)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r27, 32(up)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       mulld   r9, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r26, 40(up)
+       nop
+       mulld   r11, r27, v0
+       mulhdu  r12, r27, v0
+       ld      r27, 48(up)
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r10
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(lo_m_1)
+
+       ALIGN(16)
+L(end_m_1):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+
+       std     r0, 8(rp)
+       addze   r8, r8
+       std     r24, 16(rp)
+       addic.  vn, vn, -1
+       std     r8, 24(rp)
+       nop
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_1):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 24
+       addi    up, outer_up, 16
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, -16(up)
+       ld      r27, -8(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       ld      r26, 0(up)
+       ld      r28, -16(rp)
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r29, -8(rp)
+       ld      r30, 0(rp)
+       mulld   r9, r26, v0
+       mulhdu  r10, r26, v0
+       addc    r24, r24, r31
+       adde    r9, r9, r8
+       addze   r12, r10
+       addc    r0, r0, r28
+       std     r0, -16(rp)
+       adde    r24, r24, r29
+       std     r24, -8(rp)
+       adde    r9, r9, r30
+       std     r9, 0(rp)
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       bdz     L(end_1)
+
+       ALIGN(16)               C registers dying
+L(lo_1):
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 24(up)     C
+       ld      r28, 8(rp)      C
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       ld      r27, 32(up)     C
+       ld      r29, 16(rp)     C
+       adde    r0, r0, r12     C 0 12
+       adde    r24, r24, r10   C 24 10
+       mulld   r9, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 40(up)     C
+       ld      r30, 24(rp)     C
+       mulld   r11, r27, v0    C
+       mulhdu  r12, r27, v0    C 27
+       ld      r27, 48(up)     C
+       ld      r31, 32(rp)     C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 8(rp)       C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, 16(rp)     C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, 24(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 32(rp)     C 11
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       bdnz    L(lo_1) C
+
+       ALIGN(16)
+L(end_1):
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r28, 8(rp)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r29, 16(rp)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r10
+       addze   r8, r8
+       addic.  vn, vn, -1
+       addc    r0, r0, r28
+       std     r0, 8(rp)
+       adde    r24, r24, r29
+       std     r24, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       bne     L(outer_lo_1)
+       b       L(ret)
+
+
+       ALIGN(16)
+L(b2): ld      r27, 8(up)
+       addi    up, up, -8
+       addi    rp, rp, -8
+       li      r12, 0
+       addic   r12, r12, 0
+
+       ALIGN(16)
+L(lo_m_2):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       ld      r26, 24(up)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r27, 32(up)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       mulld   r9, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r26, 40(up)
+       nop
+       mulld   r11, r27, v0
+       mulhdu  r12, r27, v0
+       ld      r27, 48(up)
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r10
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+
+       addi    rp, rp, 32
+       bdnz    L(lo_m_2)
+
+       ALIGN(16)
+L(end_m_2):
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+
+       std     r0, 8(rp)
+       addze   r8, r8
+       std     r24, 16(rp)
+       addic.  vn, vn, -1
+       std     r8, 24(rp)
+       nop
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_2):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 0
+       addi    up, outer_up, -8
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       li      r12, 0
+       addic   r12, r12, 0
+
+       ALIGN(16)               C registers dying
+L(lo_2):
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 24(up)     C
+       ld      r28, 8(rp)      C
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       ld      r27, 32(up)     C
+       ld      r29, 16(rp)     C
+       adde    r0, r0, r12     C 0 12
+       adde    r24, r24, r10   C 24 10
+       mulld   r9, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       ld      r26, 40(up)     C
+       ld      r30, 24(rp)     C
+       mulld   r11, r27, v0    C
+       mulhdu  r12, r27, v0    C 27
+       ld      r27, 48(up)     C
+       ld      r31, 32(rp)     C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 8(rp)       C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, 16(rp)     C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, 24(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 32(rp)     C 11
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       bdnz    L(lo_2) C
+
+       ALIGN(16)
+L(end_2):
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       ld      r28, 8(rp)
+       nop
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       ld      r29, 16(rp)
+       nop
+       adde    r0, r0, r12
+       adde    r24, r24, r10
+       addze   r8, r8
+       addic.  vn, vn, -1
+       addc    r0, r0, r28
+       std     r0, 8(rp)
+       adde    r24, r24, r29
+       std     r24, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       bne     L(outer_lo_2)
+       b       L(ret)
+
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+       ld      r26, -48(r1)
+       ld      r25, -56(r1)
+       ld      r24, -64(r1)
+       ld      r23, -72(r1)
+       ld      r22, -80(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/p3/gmp-mparam.h b/mpn/powerpc64/mode64/p3/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ba195df
--- /dev/null
+++ b/mpn/powerpc64/mode64/p3/gmp-mparam.h
@@ -0,0 +1,157 @@
+/* POWER3/PowerPC630 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                10
+#define MUL_TOOM33_THRESHOLD                33
+#define MUL_TOOM44_THRESHOLD                46
+#define MUL_TOOM6H_THRESHOLD                77
+#define MUL_TOOM8H_THRESHOLD               115
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      49
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      38
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      33
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      32
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 14
+#define SQR_TOOM3_THRESHOLD                 49
+#define SQR_TOOM4_THRESHOLD                 64
+#define SQR_TOOM6_THRESHOLD                 84
+#define SQR_TOOM8_THRESHOLD                127
+
+#define MULMOD_BNM1_THRESHOLD                8
+#define SQRMOD_BNM1_THRESHOLD                9
+
+#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    220, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
+    {      7, 7}, {     15, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     23,10}, {     15, 9}, \
+    {     35, 8}, {     71,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     79,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79,11}, {     47,10}, {     95, 9}, {    191,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287,11}, {     79,10}, \
+    {    159, 9}, {    319, 8}, {    639,10}, {    175, 9}, \
+    {    351,11}, {     95,10}, {    191, 9}, {    383,11}, \
+    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    223,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
+    {    575, 9}, {   1151,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,12}, {    191,11}, {    383,10}, \
+    {    767,12}, {    223,11}, {    447,10}, {    895,13}, \
+    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
+    {    575,10}, {   1151,12}, {    319,11}, {    639,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447,11}, {    895,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 120
+#define MUL_FFT_THRESHOLD                 2688
+
+#define SQR_FFT_MODF_THRESHOLD             188  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    188, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
+    {     13, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
+    {      9, 7}, {     19, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
+    {     15, 8}, {     31, 9}, {     19, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79, 8}, {    159,10}, {     47, 9}, {     95, 8}, \
+    {    191,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79, 9}, {    159,11}, {     47,10}, {     95, 9}, \
+    {    191,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    143, 9}, {    287,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175,11}, {     95,10}, {    191, 9}, {    383,11}, \
+    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,12}, \
+    {    191,11}, {    383,10}, {    767,12}, {    223,11}, \
+    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
+    {    511,12}, {    287,11}, {    575,10}, {   1151,12}, \
+    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    447,11}, {    895,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 118
+#define SQR_FFT_THRESHOLD                 1728
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  27
+#define MULLO_MUL_N_THRESHOLD             4940
+
+#define DC_DIV_QR_THRESHOLD                 27
+#define DC_DIVAPPR_Q_THRESHOLD              95
+#define DC_BDIV_QR_THRESHOLD                28
+#define DC_BDIV_Q_THRESHOLD                 62
+
+#define INV_MULMOD_BNM1_THRESHOLD           35
+#define INV_NEWTON_THRESHOLD                97
+#define INV_APPR_THRESHOLD                  94
+
+#define BINV_NEWTON_THRESHOLD              115
+#define REDC_1_TO_REDC_N_THRESHOLD          30
+
+#define MU_DIV_QR_THRESHOLD                551
+#define MU_DIVAPPR_Q_THRESHOLD             551
+#define MUPI_DIV_QR_THRESHOLD               49
+#define MU_BDIV_QR_THRESHOLD               492
+#define MU_BDIV_Q_THRESHOLD                492
+
+#define MATRIX22_STRASSEN_THRESHOLD          9
+#define HGCD_THRESHOLD                      55
+#define GCD_DC_THRESHOLD                   162
+#define GCDEXT_DC_THRESHOLD                124
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                17
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               354
+#define SET_STR_PRECOMPUTE_THRESHOLD       812
diff --git a/mpn/powerpc64/mode64/p4/gmp-mparam.h b/mpn/powerpc64/mode64/p4/gmp-mparam.h

new file mode 100644 (file)

index 0000000..1606fab
--- /dev/null
+++ b/mpn/powerpc64/mode64/p4/gmp-mparam.h
@@ -0,0 +1,202 @@
+/* POWER4/PowerPC970 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        23
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           43
+
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                54
+#define MUL_TOOM44_THRESHOLD               154
+#define MUL_TOOM6H_THRESHOLD               206
+#define MUL_TOOM8H_THRESHOLD               309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      97
+
+#define SQR_BASECASE_THRESHOLD               5
+#define SQR_TOOM2_THRESHOLD                 36
+#define SQR_TOOM3_THRESHOLD                 61
+#define SQR_TOOM4_THRESHOLD                154
+#define SQR_TOOM6_THRESHOLD                206
+#define SQR_TOOM8_THRESHOLD                309
+
+#define MULMOD_BNM1_THRESHOLD               12
+#define SQRMOD_BNM1_THRESHOLD               14
+
+#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     25, 8}, \
+    {     13, 7}, {     30, 6}, {     61, 7}, {     32, 8}, \
+    {     17, 7}, {     35, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
+    {     71,10}, {     39, 9}, {     83,10}, {     47, 9}, \
+    {     99,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127,10}, {     79,11}, {     47,10}, {    103,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    135, 9}, {    271,11}, {     79,10}, {    159, 9}, \
+    {    319,10}, {    167,11}, {     95,10}, {    191, 9}, \
+    {    383, 8}, {    767,10}, {    207,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303, 9}, {    607,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
+    {    415, 9}, {    831,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    271,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    575,11}, {    303,10}, \
+    {    607,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671,11}, {    351,10}, {    703,11}, \
+    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,10}, {    831,12}, {    223,11}, {    447,10}, \
+    {    895,13}, {    127,12}, {    255,11}, {    511,10}, \
+    {   1023,11}, {    543,10}, {   1087,12}, {    287,11}, \
+    {    575,10}, {   1151,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
+    {    351,11}, {    703,10}, {   1407,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,10}, \
+    {   1663,12}, {    447,11}, {    895,12}, {    479,14}, \
+    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
+    {   1151,12}, {    607,11}, {   1215,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    671,11}, {   1343,10}, \
+    {   2687,12}, {    703,11}, {   1407,12}, {    735,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    799,11}, \
+    {   1599,12}, {    831,11}, {   1663,13}, {    447,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
+    {   2175,13}, {    575,12}, {   1215,11}, {   2431,13}, \
+    {    639,12}, {   1343,11}, {   2687,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
+    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
+    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
+    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
+    {   1471,12}, {   2943,14}, {    767,13}, {   1599,12}, \
+    {   3199,13}, {   1663,14}, {    895,13}, {   1855,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 209
+#define MUL_FFT_THRESHOLD                 7296
+
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     19, 7}, {     10, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
+    {     13, 7}, {     29, 8}, {     15, 7}, {     31, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {     95, 9}, {    191, 8}, {    383,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    135, 9}, {    271,11}, {     79,10}, \
+    {    159, 9}, {    319,10}, {    175, 9}, {    351,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
+    {    415,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511, 8}, {   1023,10}, {    271, 9}, \
+    {    543,10}, {    287, 9}, {    575, 8}, {   1151,10}, \
+    {    303,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
+    {    831,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575, 9}, {   1151,11}, \
+    {    303,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
+    {    447,10}, {    895,11}, {    479,10}, {    959,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,12}, {    319,11}, {    639,10}, {   1279,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447,11}, {    895,12}, {    479,11}, {    959,14}, \
+    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
+    {   1151,12}, {    607,13}, {    319,12}, {    639,11}, \
+    {   1279,12}, {    671,11}, {   1343,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    767,11}, {   1535,12}, \
+    {    831,11}, {   1663,13}, {    447,12}, {    959,11}, \
+    {   1919,14}, {    255,13}, {    511,12}, {   1087,11}, \
+    {   2175,13}, {    575,12}, {   1215,11}, {   2431,13}, \
+    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
+    {   1663,13}, {    959,12}, {   1919,15}, {    255,14}, \
+    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
+    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
+    {   1407,12}, {   2815,13}, {   1471,14}, {    767,13}, \
+    {   1535,12}, {   3071,13}, {   1663,14}, {    895,13}, \
+    {   1791,12}, {   3839,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 207
+#define SQR_FFT_THRESHOLD                 2752
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  34
+#define MULLO_MUL_N_THRESHOLD            10950
+
+#define DC_DIV_QR_THRESHOLD                 30
+#define DC_DIVAPPR_Q_THRESHOLD             103
+#define DC_BDIV_QR_THRESHOLD                48
+#define DC_BDIV_Q_THRESHOLD                120
+
+#define INV_MULMOD_BNM1_THRESHOLD           50
+#define INV_NEWTON_THRESHOLD               131
+#define INV_APPR_THRESHOLD                 115
+
+#define BINV_NEWTON_THRESHOLD              204
+#define REDC_1_TO_REDC_N_THRESHOLD          55
+
+#define MU_DIV_QR_THRESHOLD                998
+#define MU_DIVAPPR_Q_THRESHOLD             998
+#define MUPI_DIV_QR_THRESHOLD               61
+#define MU_BDIV_QR_THRESHOLD               889
+#define MU_BDIV_Q_THRESHOLD               1078
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                      96
+#define GCD_DC_THRESHOLD                   249
+#define GCDEXT_DC_THRESHOLD                209
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        23
+#define SET_STR_DC_THRESHOLD               532
+#define SET_STR_PRECOMPUTE_THRESHOLD      1781
diff --git a/mpn/powerpc64/mode64/p5/gmp-mparam.h b/mpn/powerpc64/mode64/p5/gmp-mparam.h

new file mode 100644 (file)

index 0000000..89e1534
--- /dev/null
+++ b/mpn/powerpc64/mode64/p5/gmp-mparam.h
@@ -0,0 +1,199 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* POWER5 (friggms.hpc.ntnu.no) */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           59
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                56
+#define MUL_TOOM44_THRESHOLD               118
+#define MUL_TOOM6H_THRESHOLD               206
+#define MUL_TOOM8H_THRESHOLD               309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      82
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
+
+#define SQR_BASECASE_THRESHOLD              10
+#define SQR_TOOM2_THRESHOLD                 51
+#define SQR_TOOM3_THRESHOLD                 78
+#define SQR_TOOM4_THRESHOLD                100
+#define SQR_TOOM6_THRESHOLD                150
+#define SQR_TOOM8_THRESHOLD                309
+
+#define MULMOD_BNM1_THRESHOLD                5
+#define SQRMOD_BNM1_THRESHOLD                7
+
+#define MUL_FFT_MODF_THRESHOLD             348  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    348, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    135,11}, {     79,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    319,12}, \
+    {     95,11}, {    191,10}, {    383,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575, 9}, {   1151,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
+    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
+    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
+    {    639,12}, {    351,11}, {    703,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
+    {    447,11}, {    895,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
+    {   2175,12}, {    575,11}, {   1151,12}, {    607,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    831,13}, {    447,12}, \
+    {    959,11}, {   1919,14}, {    255,13}, {    511,12}, \
+    {   1087,11}, {   2175,13}, {    575,12}, {   1215,11}, \
+    {   2431,10}, {   4863,13}, {    639,12}, {   1343,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    959,12}, \
+    {   1919,11}, {   3839,15}, {    255,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1215,12}, {   2431,11}, \
+    {   4863,14}, {    639,13}, {   1343,12}, {   2687,13}, \
+    {   1407,12}, {   2815,13}, {   1471,12}, {   2943,14}, \
+    {    767,13}, {   1599,12}, {   3199,13}, {   1663,14}, \
+    {    895,13}, {   1919,12}, {   3839,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
+    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
+    {   2943,15}, {    767,14}, {   1535,13}, {   3199,14}, \
+    {   1663,13}, {   3327,14}, {   1919,13}, {   3839,16}, \
+    {    511,15}, {   1023,14}, {   2431,13}, {   4863,15}, \
+    {   1279,14}, {   2943,12}, {  11775,15}, {   1535,14}, \
+    {   3327,15}, {   1791,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 208
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    272, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {     19, 7}, {     17, 8}, {      9, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     63,10}, {     47,11}, \
+    {     31,10}, {     71, 9}, {    143,10}, {     79,11}, \
+    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
+    {    143,11}, {     79,10}, {    159, 9}, {    319,10}, \
+    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207, 9}, {    415,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
+    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    319,11}, {    175,10}, {    351,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
+    {    271,10}, {    543,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,12}, {    223,11}, {    447,10}, {    895,11}, \
+    {    479,10}, {    959,12}, {    255,11}, {    511,10}, \
+    {   1023,11}, {    543,12}, {    287,11}, {    575,12}, \
+    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,11}, {    895,12}, {    479,11}, \
+    {    959,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    543,11}, {   1087,12}, {    575,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    831,13}, {    447,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1279,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    831,12}, {   1663,13}, \
+    {    959,12}, {   1919,15}, {    255,14}, {    511,13}, \
+    {   1023,12}, {   2047,13}, {   1087,12}, {   2175,13}, \
+    {   1215,14}, {    639,13}, {   1407,12}, {   2815,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,14}, {   1407,13}, {   2815,15}, \
+    {    767,14}, {   1663,13}, {   3327,14}, {   1919,13}, \
+    {   3839,16}, {    511,15}, {   1023,14}, {   2431,13}, \
+    {   4863,15}, {   1279,14}, {   2943,13}, {   5887,12}, \
+    {  11775,15}, {   1535,14}, {   3327,15}, {   1791,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 190
+#define SQR_FFT_THRESHOLD                 2752
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  25
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 29
+#define DC_DIVAPPR_Q_THRESHOLD             102
+#define DC_BDIV_QR_THRESHOLD                47
+#define DC_BDIV_Q_THRESHOLD                112
+
+#define INV_MULMOD_BNM1_THRESHOLD           76
+#define INV_NEWTON_THRESHOLD               129
+#define INV_APPR_THRESHOLD                 109
+
+#define BINV_NEWTON_THRESHOLD              197
+#define REDC_1_TO_REDC_N_THRESHOLD          54
+
+#define MU_DIV_QR_THRESHOLD                872
+#define MU_DIVAPPR_Q_THRESHOLD             855
+#define MUPI_DIV_QR_THRESHOLD               53
+#define MU_BDIV_QR_THRESHOLD               792
+#define MU_BDIV_Q_THRESHOLD                942
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      86
+#define GCD_DC_THRESHOLD                   241
+#define GCDEXT_DC_THRESHOLD                229
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        21
+#define SET_STR_DC_THRESHOLD               532
+#define SET_STR_PRECOMPUTE_THRESHOLD      1655
diff --git a/mpn/powerpc64/mode64/p6/gmp-mparam.h b/mpn/powerpc64/mode64/p6/gmp-mparam.h

new file mode 100644 (file)

index 0000000..bedb270
--- /dev/null
+++ b/mpn/powerpc64/mode64/p6/gmp-mparam.h
@@ -0,0 +1,189 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 3500 MHz POWER6 (kolga.bibsys.no) */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     55
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                37
+#define MUL_TOOM44_THRESHOLD               160
+#define MUL_TOOM6H_THRESHOLD               177
+#define MUL_TOOM8H_THRESHOLD               321
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      86
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     103
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      90
+
+#define SQR_BASECASE_THRESHOLD               5
+#define SQR_TOOM2_THRESHOLD                 22
+#define SQR_TOOM3_THRESHOLD                 43
+#define SQR_TOOM4_THRESHOLD                296
+#define SQR_TOOM6_THRESHOLD                309
+#define SQR_TOOM8_THRESHOLD                562
+
+#define MULMOD_BNM1_THRESHOLD               12
+#define SQRMOD_BNM1_THRESHOLD               14
+
+#define MUL_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    272, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     24, 8}, {     21, 9}, {     11, 8}, \
+    {     25, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     63,10}, {     47,11}, {     31,10}, \
+    {     71,11}, {     47,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511,10}, {    143,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,11}, {    143,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
+    {    351,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511, 9}, {   1023,11}, {    271,10}, {    543,11}, \
+    {    287,10}, {    575,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703, 9}, {   1407,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831, 9}, {   1663,12}, {    223,11}, {    447,10}, \
+    {    959, 9}, {   1919,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,12}, {    287,11}, {    575,10}, \
+    {   1151,12}, {    319,11}, {    639,12}, {    351,11}, \
+    {    703,10}, {   1407,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447,11}, {    959,10}, {   1919, 9}, {   3839,13}, \
+    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
+    {   1087,10}, {   2175,12}, {    575,11}, {   1151,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    703,11}, \
+    {   1407,10}, {   2815,13}, {    383,12}, {    831,11}, \
+    {   1663,13}, {    447,12}, {    959,11}, {   1919,10}, \
+    {   3839,14}, {    255,13}, {    511,12}, {   1087,11}, \
+    {   2175,13}, {    575,12}, {   1151,13}, {    639,12}, \
+    {   1279,13}, {    703,12}, {   1407,11}, {   2815,14}, \
+    {    383,13}, {    831,12}, {   1663,13}, {    959,12}, \
+    {   1919,11}, {   3839,15}, {    255,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1151,14}, {    639,13}, \
+    {   1407,12}, {   2815,13}, {   1471,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1919,12}, {   3839,11}, \
+    {   7679,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 178
+#define MUL_FFT_THRESHOLD                 1856
+
+#define SQR_FFT_MODF_THRESHOLD             208  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    208, 5}, {      7, 4}, {     15, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     17, 8}, \
+    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
+    {     19, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
+    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
+    {     47,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     47,11}, {     15,10}, {     31, 9}, {     63,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     71, 9}, {    143, 8}, {    287,11}, \
+    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    143, 9}, {    287,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175, 9}, {    351, 8}, {    703,11}, {     95,10}, \
+    {    191, 9}, {    383, 8}, {    767,10}, {    207, 9}, \
+    {    415,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511, 8}, {   1023,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351, 9}, {    703,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
+    {    415,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511, 9}, {   1023,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,10}, {    831, 9}, {   1663,12}, {    223,11}, \
+    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,12}, {    287,11}, {    575,10}, \
+    {   1151,12}, {    319,11}, {    639,12}, {    351,11}, \
+    {    703,10}, {   1407,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447,11}, {    959,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
+    {   2175,12}, {    575,11}, {   1151,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    831,11}, {   1663,13}, {    447,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
+    {   2175,13}, {    575,12}, {   1215,13}, {    639,12}, \
+    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
+    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
+    {    511,13}, {   1087,12}, {   2303,13}, {   1215,14}, \
+    {    639,13}, {   1407,12}, {   2815,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1919,12}, {   3839,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 177
+#define SQR_FFT_THRESHOLD                 1856
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  37
+#define MULLO_MUL_N_THRESHOLD             3574
+
+#define DC_DIV_QR_THRESHOLD                 23
+#define DC_DIVAPPR_Q_THRESHOLD              95
+#define DC_BDIV_QR_THRESHOLD                41
+#define DC_BDIV_Q_THRESHOLD                 90
+
+#define INV_MULMOD_BNM1_THRESHOLD           45
+#define INV_NEWTON_THRESHOLD                85
+#define INV_APPR_THRESHOLD                  85
+
+#define BINV_NEWTON_THRESHOLD              151
+#define REDC_1_TO_REDC_N_THRESHOLD          43
+
+#define MU_DIV_QR_THRESHOLD                748
+#define MU_DIVAPPR_Q_THRESHOLD            1210
+#define MUPI_DIV_QR_THRESHOLD               42
+#define MU_BDIV_QR_THRESHOLD               618
+#define MU_BDIV_Q_THRESHOLD                807
+
+#define MATRIX22_STRASSEN_THRESHOLD         10
+#define HGCD_THRESHOLD                      77
+#define GCD_DC_THRESHOLD                   358
+#define GCDEXT_DC_THRESHOLD                241
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        25
+#define SET_STR_DC_THRESHOLD               552
+#define SET_STR_PRECOMPUTE_THRESHOLD      1416
diff --git a/mpn/powerpc64/mode64/p7/gmp-mparam.h b/mpn/powerpc64/mode64/p7/gmp-mparam.h

new file mode 100644 (file)

index 0000000..884bf13
--- /dev/null
+++ b/mpn/powerpc64/mode64/p7/gmp-mparam.h
@@ -0,0 +1,202 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 3550 MHz POWER7 (gcc110.fsffrance.org) */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         12
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         7
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
+#define USE_PREINV_DIVREM_1                  1
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           28
+
+#define MUL_TOOM22_THRESHOLD                22
+#define MUL_TOOM33_THRESHOLD                73
+#define MUL_TOOM44_THRESHOLD               202
+#define MUL_TOOM6H_THRESHOLD               393
+#define MUL_TOOM8H_THRESHOLD               592
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     137
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     149
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     137
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     149
+
+#define SQR_BASECASE_THRESHOLD              18
+#define SQR_TOOM2_THRESHOLD                 64
+#define SQR_TOOM3_THRESHOLD                 89
+#define SQR_TOOM4_THRESHOLD                184
+#define SQR_TOOM6_THRESHOLD                294
+#define SQR_TOOM8_THRESHOLD                430
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               13
+
+#define MUL_FFT_MODF_THRESHOLD             408  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    408, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
+    {     15, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
+    {    143, 7}, {   2303,10}, {    303,11}, {    159,10}, \
+    {    319, 9}, {    639,12}, {     95,11}, {    191,10}, \
+    {    383,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511,11}, {    271,10}, {    543,11}, {    287,10}, \
+    {    575,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671, 9}, {   1343,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    799,11}, \
+    {    415,10}, {    831,12}, {    223,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,10}, {   1087,12}, {    287,11}, {    575,10}, \
+    {   1151,11}, {    607,10}, {   1215,12}, {    319,11}, \
+    {    639,10}, {   1279,11}, {    671,10}, {   1343,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    799,10}, {   1599,12}, {    415,11}, {    831,10}, \
+    {   1663,12}, {    447,11}, {    895,14}, {    127,13}, \
+    {    255,12}, {    543,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1407,12}, {    735,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {    255,12}, \
+    {   1087,13}, {    575,12}, {   1215,13}, {    639,12}, \
+    {   1343,11}, {   2687,12}, {   1471,14}, {    383,13}, \
+    {    767,12}, {   1599,13}, {    831,10}, {   6655,12}, \
+    {   1727,13}, {    959,12}, {   1919,11}, {   3839,14}, \
+    {    511,11}, {   4095,13}, {   1087,12}, {   2303,13}, \
+    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1471,12}, {   2943,14}, {    767,13}, \
+    {   1599,12}, {   3199,13}, {   1663,14}, {    895,13}, \
+    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
+    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
+    {   1407,13}, {   2943,15}, {    767,14}, {   1663,13}, \
+    {   3327,12}, {   6655,14}, {   1919,13}, {   3839,16}, \
+    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
+    {   2303,12}, {   9215,13}, {   4863,15}, {   1279,13}, \
+    {   5119,14}, {   2815,13}, {   5887,15}, {   1535,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 202
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             332  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    332, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255,11}, {     79,10}, {    159, 9}, \
+    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,11}, {    143,10}, {    287, 9}, \
+    {    575,10}, {    303, 9}, {    607,10}, {    319, 9}, \
+    {    639,12}, {     95,11}, {    191,10}, {    383,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
+    {    271,10}, {    543, 9}, {   1087,11}, {    287,10}, \
+    {    575,11}, {    303,10}, {    607, 9}, {   1215,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
+    {    447,10}, {    895,11}, {    479,10}, {    959,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,10}, {   1535,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,12}, \
+    {    479,11}, {    959,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    639,12}, {   1343,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    959,12}, \
+    {   1919,15}, {    255,13}, {   1151,12}, {   2303,13}, \
+    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,11}, \
+    {   5887,13}, {   1663,14}, {    895,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,13}, {   2687,14}, {   1407,13}, \
+    {   2815,15}, {    767,14}, {   1663,13}, {   3455,14}, \
+    {   1791,13}, {   3583,14}, {   1919,13}, {   3839,16}, \
+    {    511,15}, {   1023,14}, {   2175,13}, {   4351,15}, \
+    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 206
+#define SQR_FFT_THRESHOLD                 2752
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  23
+#define MULLO_MUL_N_THRESHOLD             7246
+
+#define DC_DIV_QR_THRESHOLD                 16
+#define DC_DIVAPPR_Q_THRESHOLD              64
+#define DC_BDIV_QR_THRESHOLD                62
+#define DC_BDIV_Q_THRESHOLD                156
+
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD                93
+#define INV_APPR_THRESHOLD                  66
+
+#define BINV_NEWTON_THRESHOLD              294
+#define REDC_1_TO_REDC_N_THRESHOLD          74
+
+#define MU_DIV_QR_THRESHOLD               1387
+#define MU_DIVAPPR_Q_THRESHOLD            1414
+#define MUPI_DIV_QR_THRESHOLD               31
+#define MU_BDIV_QR_THRESHOLD              1210
+#define MU_BDIV_Q_THRESHOLD               1558
+
+#define MATRIX22_STRASSEN_THRESHOLD         14
+#define HGCD_THRESHOLD                     108
+#define GCD_DC_THRESHOLD                   333
+#define GCDEXT_DC_THRESHOLD                333
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                10
+#define GET_STR_PRECOMPUTE_THRESHOLD        22
+#define SET_STR_DC_THRESHOLD              1532
+#define SET_STR_PRECOMPUTE_THRESHOLD      3850
diff --git a/mpn/powerpc64/mode64/rsh1add_n.asm b/mpn/powerpc64/mode64/rsh1add_n.asm

new file mode 100644 (file)

index 0000000..0cd6cf4
--- /dev/null
+++ b/mpn/powerpc64/mode64/rsh1add_n.asm
@@ -0,0 +1,104 @@
+dnl  PowerPC-64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     2         (1.5 c/l should be possible)
+C POWER4/PPC970:     4         (2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`x',`r0')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`v0',`r10')
+define(`v1',`r11')
+
+
+ASM_START()
+PROLOGUE(mpn_rsh1add_n)
+       mtctr   r6              C copy size to count register
+       addi    rp, rp, -8
+
+       ld      u1, 0(up)
+       ld      v1, 0(vp)
+       addc    x, v1, u1
+       rldicl  r12, x, 0, 63   C return value
+       srdi    s1, x, 1
+
+       bdz     L(1)
+
+       ld      u0, 8(up)
+       ld      v0, 8(vp)
+
+       bdz     L(end)
+
+L(oop):        ldu     u1, 16(up)
+       ldu     v1, 16(vp)
+       adde    x, v0, u0
+       srdi    s0, x, 1
+       rldimi  s1, x, 63, 0
+       std     s1, 8(rp)
+
+       bdz     L(exit)
+
+       ld      u0, 8(up)
+       ld      v0, 8(vp)
+       adde    x, v1, u1
+       srdi    s1, x, 1
+       rldimi  s0, x, 63, 0
+       stdu    s0, 16(rp)
+
+       bdnz    L(oop)
+
+L(end):        adde    x, v0, u0
+       srdi    s0, x, 1
+       rldimi  s1, x, 63, 0
+       std     s1, 8(rp)
+
+       li      x, 0
+       addze   x, x
+       rldimi  s0, x, 63, 0
+       std     s0, 16(rp)
+       mr      r3, r12
+       blr
+
+L(exit):       adde    x, v1, u1
+       srdi    s1, x, 1
+       rldimi  s0, x, 63, 0
+       stdu    s0, 16(rp)
+
+L(1):  li      x, 0
+       addze   x, x
+       rldimi  s1, x, 63, 0
+       std     s1, 8(rp)
+       mr      r3, r12
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/rsh1sub_n.asm b/mpn/powerpc64/mode64/rsh1sub_n.asm

new file mode 100644 (file)

index 0000000..e4c78ff
--- /dev/null
+++ b/mpn/powerpc64/mode64/rsh1sub_n.asm
@@ -0,0 +1,102 @@
+dnl  PowerPC-64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     2         (1.5 c/l should be possible)
+C POWER4/PPC970:     4         (2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`x',`r0')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`v0',`r10')
+define(`v1',`r11')
+
+
+ASM_START()
+PROLOGUE(mpn_rsh1sub_n)
+       mtctr   r6              C copy size to count register
+       addi    rp, rp, -8
+
+       ld      u1, 0(up)
+       ld      v1, 0(vp)
+       subfc   x, v1, u1
+       rldicl  r12, x, 0, 63   C return value
+       srdi    s1, x, 1
+
+       bdz     L(1)
+
+       ld      u0, 8(up)
+       ld      v0, 8(vp)
+
+       bdz     L(end)
+
+L(oop):        ldu     u1, 16(up)
+       ldu     v1, 16(vp)
+       subfe   x, v0, u0
+       srdi    s0, x, 1
+       rldimi  s1, x, 63, 0
+       std     s1, 8(rp)
+
+       bdz     L(exit)
+
+       ld      u0, 8(up)
+       ld      v0, 8(vp)
+       subfe   x, v1, u1
+       srdi    s1, x, 1
+       rldimi  s0, x, 63, 0
+       stdu    s0, 16(rp)
+
+       bdnz    L(oop)
+
+L(end):        subfe   x, v0, u0
+       srdi    s0, x, 1
+       rldimi  s1, x, 63, 0
+       std     s1, 8(rp)
+
+       subfe   x, x, x
+       rldimi  s0, x, 63, 0
+       std     s0, 16(rp)
+       mr      r3, r12
+       blr
+
+L(exit):       subfe   x, v1, u1
+       srdi    s1, x, 1
+       rldimi  s0, x, 63, 0
+       stdu    s0, 16(rp)
+
+L(1):  subfe   x, x, x
+       rldimi  s1, x, 63, 0
+       std     s1, 8(rp)
+       mr      r3, r12
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/sublsh1_n.asm b/mpn/powerpc64/mode64/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..69e0dfa
--- /dev/null
+++ b/mpn/powerpc64/mode64/sublsh1_n.asm
@@ -0,0 +1,83 @@
+dnl  PowerPC-64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     2         (1.5 c/l should be possible)
+C POWER4/PPC970:     4         (2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_sublsh1_n)
+       mtctr   r6              C put n in ctr
+
+       ld      v0, 0(vp)       C load v limb
+       ld      u0, 0(up)       C load u limb
+       addic   up, up, -8      C update up; set cy
+       addi    rp, rp, -8      C update rp
+       sldi    s1, v0, 1
+       bdz     L(end)          C If done, skip loop
+
+L(oop):        ld      v1, 8(vp)       C load v limb
+       subfe   s1, s1, u0      C add limbs with cy, set cy
+       std     s1, 8(rp)       C store result limb
+       srdi    s0, v0, 63      C shift down previous v limb
+       ldu     u0, 16(up)      C load u limb and update up
+       rldimi  s0, v1, 1, 0    C left shift v limb and merge with prev v limb
+
+       bdz     L(exit)         C decrement ctr and exit if done
+
+       ldu     v0, 16(vp)      C load v limb and update vp
+       subfe   s0, s0, u0      C add limbs with cy, set cy
+       stdu    s0, 16(rp)      C store result limb and update rp
+       srdi    s1, v1, 63      C shift down previous v limb
+       ld      u0, 8(up)       C load u limb
+       rldimi  s1, v0, 1, 0    C left shift v limb and merge with prev v limb
+
+       bdnz    L(oop)          C decrement ctr and loop back
+
+L(end):        subfe   r7, s1, u0
+       std     r7, 8(rp)       C store last result limb
+       srdi    r3, v0, 63
+       subfze  r3, r3
+       neg     r3, r3
+       blr
+L(exit):       subfe   r7, s0, u0
+       std     r7, 16(rp)      C store last result limb
+       srdi    r3, v1, 63
+       subfze  r3, r3
+       neg     r3, r3
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/submul_1.asm b/mpn/powerpc64/mode64/submul_1.asm

new file mode 100644 (file)

index 0000000..3c1e8a5
--- /dev/null
+++ b/mpn/powerpc64/mode64/submul_1.asm
@@ -0,0 +1,62 @@
+dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:    10
+C POWER5:           10.5
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vl', `r6')
+define(`cy', `r7')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       li      cy, 0                   C cy_limb = 0
+
+PROLOGUE(mpn_submul_1c)
+       mtctr   n
+       addic   r0, r0, 0
+       addi    rp, rp, -8
+       ALIGN(16)
+L(top):
+       ld      r0, 0(up)
+       ld      r10, 8(rp)
+       mulld   r9, r0, vl
+       mulhdu  r5, r0, vl
+       adde    r9, r9, cy
+       addi    up, up, 8
+       addze   cy, r5
+       subf    r12, r9, r10
+       not     r0, r10
+       addc    r11, r9, r0             C inverted carry from subf
+       stdu    r12, 8(rp)
+       bdnz    L(top)
+
+       addze   r3, cy
+       blr
+EPILOGUE(mpn_submul_1)
+EPILOGUE(mpn_submul_1c)
diff --git a/mpn/powerpc64/rshift.asm b/mpn/powerpc64/rshift.asm

new file mode 100644 (file)

index 0000000..e73640d
--- /dev/null
+++ b/mpn/powerpc64/rshift.asm
@@ -0,0 +1,107 @@
+dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:     1.5
+C POWER4/PPC970:     3.0
+
+C INPUT PARAMETERS
+define(`rp',`r3')
+define(`up',`r4')
+define(`n',`r5')
+define(`cnt',`r6')
+
+define(`tnc',`r5')
+define(`v0',`r0')
+define(`v1',`r7')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`h0',`r10')
+define(`h1',`r11')
+
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  n, n, 0, 32')   C zero extend n
+       mtctr   n               C copy n to count register
+       addi    rp, rp, -16
+       subfic  tnc, cnt, 64    C reverse shift count
+
+       ld      u0, 0(up)
+       srd     h0, u0, cnt
+       sld     r12, u0, tnc    C return value
+       bdz     L(1)            C jump for n = 1
+
+       ld      u1, 8(up)
+       bdz     L(2)            C jump for n = 2
+
+       ldu     u0, 16(up)
+       bdz     L(end)          C jump for n = 3
+
+L(oop):        sld     v1, u1, tnc
+       srd     h1, u1, cnt
+       ld      u1, 8(up)
+       or      h0, v1, h0
+       stdu    h0, 16(rp)
+
+       bdz     L(exit)
+
+       sld     v0, u0, tnc
+       srd     h0, u0, cnt
+       ldu     u0, 16(up)
+       or      h1, v0, h1
+       std     h1, 8(rp)
+
+       bdnz    L(oop)
+
+L(end):        sld     v1, u1, tnc
+       srd     h1, u1, cnt
+       or      h0, v1, h0
+       stdu    h0, 16(rp)
+       sld     v0, u0, tnc
+       srd     h0, u0, cnt
+       or      h1, v0, h1
+       std     h1, 8(rp)
+L(1):  std     h0, 16(rp)
+ifdef(`HAVE_ABI_mode32',
+`      srdi    r3, r12, 32
+       mr      r4, r12
+',`    mr      r3, r12
+')
+       blr
+
+L(exit):       sld     v0, u0, tnc
+       srd     h0, u0, cnt
+       or      h1, v0, h1
+       std     h1, 8(rp)
+L(2):  sld     v1, u1, tnc
+       srd     h1, u1, cnt
+       or      h0, v1, h0
+       stdu    h0, 16(rp)
+       std     h1, 8(rp)
+ifdef(`HAVE_ABI_mode32',
+`      srdi    r3, r12, 32
+       mr      r4, r12
+',`    mr      r3, r12
+')
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/sqr_diagonal.asm b/mpn/powerpc64/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..07f60e0
--- /dev/null
+++ b/mpn/powerpc64/sqr_diagonal.asm
@@ -0,0 +1,55 @@
+dnl  PowerPC-64 mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              cycles/limb
+C POWER3/PPC630:    18
+C POWER4/PPC970:     8
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r5, r5, 0, 32')         C zero extend n
+       mtctr   r5
+       ld      r0, 0(r4)
+       bdz     L(end)
+       ALIGN(16)
+
+L(top):        mulld   r5, r0, r0
+       mulhdu  r6, r0, r0
+       ld      r0, 8(r4)
+       addi    r4, r4, 8
+       std     r5, 0(r3)
+       std     r6, 8(r3)
+       addi    r3, r3, 16
+       bdnz    L(top)
+
+L(end):        mulld   r5, r0, r0
+       mulhdu  r6, r0, r0
+       std     r5, 0(r3)
+       std     r6, 8(r3)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/umul.asm b/mpn/powerpc64/umul.asm

new file mode 100644 (file)

index 0000000..516be3d
--- /dev/null
+++ b/mpn/powerpc64/umul.asm
@@ -0,0 +1,42 @@
+dnl PowerPC-64 umul_ppmm -- support for longlong.h
+
+dnl Copyright 2000, 2001, 2005 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+dnl General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+
+       C r3    lowptr
+       C r4    m1
+       C r5    m2
+
+       mulld   r0, r4, r5
+       mulhdu  r4, r4, r5
+       std     r0, 0(r3)
+ifdef(`HAVE_ABI_mode32',
+`      srdi    r3, r4, 32
+',`    mr      r3, r4
+')
+       blr
+
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/powerpc64/vmx/popcount.asm b/mpn/powerpc64/vmx/popcount.asm

new file mode 100644 (file)

index 0000000..b9f5896
--- /dev/null
+++ b/mpn/powerpc64/vmx/popcount.asm
@@ -0,0 +1,260 @@
+dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_popcount.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C 7400,7410 (G4):       2.75
+C 744x,745x (G4+):      2.25
+C 970 (G5):             5.3
+
+C STATUS
+C  * Works for all sizes and alignments.
+
+C TODO
+C  * Tune the awkward huge n outer loop code.
+C  * Two lvx, two vperm, and two vxor could make us a similar hamdist.
+C  * For the 970, a combined VMX+intop approach might be best.
+C  * Compress cnsts table in 64-bit mode, only half the values are needed.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+define(`OPERATION_popcount')
+
+ifdef(`OPERATION_popcount',`
+  define(`func',`mpn_popcount')
+  define(`up',         `r3')
+  define(`n',          `r4')
+  define(`HAM',                `dnl')
+')
+ifdef(`OPERATION_hamdist',`
+  define(`func',`mpn_hamdist')
+  define(`up',         `r3')
+  define(`vp',         `r4')
+  define(`n',          `r5')
+  define(`HAM',                `$1')
+')
+
+define(`x01010101',`v2')
+define(`x00110011',`v7')
+define(`x00001111',`v10')
+define(`cnt1',`v11')
+define(`cnt2',`v12')
+define(`cnt4',`v13')
+
+ifelse(GMP_LIMB_BITS,32,`
+       define(`LIMB32',`       $1')
+       define(`LIMB64',`')
+',`
+       define(`LIMB32',`')
+       define(`LIMB64',`       $1')
+')
+
+C The inner loop handles up to 2^34 bits, i.e., 2^31 64-limbs, due to overflow
+C in vsum4ubs.  For large operands, we work in chunks, of size LIMBS_PER_CHUNK.
+define(`LIMBS_PER_CHUNK', 0x1000)
+define(`LIMBS_CHUNK_THRES', 0x1001)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+       mfspr   r10, 256
+       oris    r0, r10, 0xfffc         C Set VRSAVE bit 0-13
+       mtspr   256, r0
+
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  n, n, 0, 32')           C zero extend n
+
+C Load various constants into vector registers
+       LEAL(   r11, cnsts)
+       li      r12, 16
+       vspltisb cnt1, 1                C 0x0101...01 used as shift count
+       vspltisb cnt2, 2                C 0x0202...02 used as shift count
+       vspltisb cnt4, 4                C 0x0404...04 used as shift count
+       lvx     x01010101, 0, r11       C 0x3333...33
+       lvx     x00110011, r12, r11     C 0x5555...55
+       vspltisb x00001111, 15          C 0x0f0f...0f
+
+LIMB64(`lis    r0, LIMBS_CHUNK_THRES   ')
+LIMB64(`cmpd   cr7, n, r0              ')
+
+       lvx     v0, 0, up
+       addi    r7, r11, 96
+       rlwinm  r6, up, 2,26,29
+       lvx     v8, r7, r6
+       vand    v0, v0, v8
+
+LIMB32(`rlwinm r8, up, 30,30,31        ')
+LIMB64(`rlwinm r8, up, 29,31,31        ')
+       add     n, n, r8                C compensate n for rounded down `up'
+
+       vxor    v1, v1, v1
+       li      r8, 0                   C grand total count
+
+       vxor    v3, v3, v3              C zero total count
+
+       addic.  n, n, -LIMBS_PER_VR
+       ble     L(sum)
+
+       addic.  n, n, -LIMBS_PER_VR
+       ble     L(lsum)
+
+C For 64-bit machines, handle huge n that would overflow vsum4ubs
+LIMB64(`ble    cr7, L(small)           ')
+LIMB64(`addis  r9, n, -LIMBS_PER_CHUNK ') C remaining n
+LIMB64(`lis    n, LIMBS_PER_CHUNK      ')
+L(small):
+
+
+LIMB32(`srwi   r7, n, 3        ')      C loop count corresponding to n
+LIMB64(`srdi   r7, n, 2        ')      C loop count corresponding to n
+       addi    r7, r7, 1
+       mtctr   r7                      C copy n to count register
+       b       L(ent)
+
+       ALIGN(8)
+L(top):        lvx     v0, 0, up
+       li      r7, 128                 C prefetch distance
+L(ent):        lvx     v1, r12, up
+       addi    up, up, 32
+       vsr     v4, v0, cnt1
+       vsr     v5, v1, cnt1
+       dcbt    up, r7                  C prefetch
+       vand    v8, v4, x01010101
+       vand    v9, v5, x01010101
+       vsububm v0, v0, v8              C 64 2-bit accumulators (0..2)
+       vsububm v1, v1, v9              C 64 2-bit accumulators (0..2)
+       vsr     v4, v0, cnt2
+       vsr     v5, v1, cnt2
+       vand    v8, v0, x00110011
+       vand    v9, v1, x00110011
+       vand    v4, v4, x00110011
+       vand    v5, v5, x00110011
+       vaddubm v0, v4, v8              C 32 4-bit accumulators (0..4)
+       vaddubm v1, v5, v9              C 32 4-bit accumulators (0..4)
+       vaddubm v8, v0, v1              C 32 4-bit accumulators (0..8)
+       vsr     v9, v8, cnt4
+       vand    v6, v8, x00001111
+       vand    v9, v9, x00001111
+       vaddubm v6, v9, v6              C 16 8-bit accumulators (0..16)
+       vsum4ubs v3, v6, v3             C sum 4 x 4 bytes into 4 32-bit fields
+       bdnz    L(top)
+
+       andi.   n, n, eval(LIMBS_PER_2VR-1)
+       beq     L(rt)
+
+       lvx     v0, 0, up
+       vxor    v1, v1, v1
+       cmpwi   n, LIMBS_PER_VR
+       ble     L(sum)
+L(lsum):
+       vor     v1, v0, v0
+       lvx     v0, r12, up
+L(sum):
+LIMB32(`rlwinm r6, n, 4,26,27  ')
+LIMB64(`rlwinm r6, n, 5,26,26  ')
+       addi    r7, r11, 32
+       lvx     v8, r7, r6
+       vand    v0, v0, v8
+
+       vsr     v4, v0, cnt1
+       vsr     v5, v1, cnt1
+       vand    v8, v4, x01010101
+       vand    v9, v5, x01010101
+       vsububm v0, v0, v8              C 64 2-bit accumulators (0..2)
+       vsububm v1, v1, v9              C 64 2-bit accumulators (0..2)
+       vsr     v4, v0, cnt2
+       vsr     v5, v1, cnt2
+       vand    v8, v0, x00110011
+       vand    v9, v1, x00110011
+       vand    v4, v4, x00110011
+       vand    v5, v5, x00110011
+       vaddubm v0, v4, v8              C 32 4-bit accumulators (0..4)
+       vaddubm v1, v5, v9              C 32 4-bit accumulators (0..4)
+       vaddubm v8, v0, v1              C 32 4-bit accumulators (0..8)
+       vsr     v9, v8, cnt4
+       vand    v6, v8, x00001111
+       vand    v9, v9, x00001111
+       vaddubm v6, v9, v6              C 16 8-bit accumulators (0..16)
+       vsum4ubs v3, v6, v3             C sum 4 x 4 bytes into 4 32-bit fields
+
+L(rt):
+       li      r7, -16                 C FIXME: does all ppc32 and ppc64 ABIs
+       stvx    v3, r7, r1              C FIXME: ...support storing below sp?
+
+       lwz     r7, -16(r1)
+       add     r8, r8, r7
+       lwz     r7, -12(r1)
+       add     r8, r8, r7
+       lwz     r7, -8(r1)
+       add     r8, r8, r7
+       lwz     r7, -4(r1)
+       add     r8, r8, r7
+
+C Handle outer loop for huge n.  We inherit cr7 and r0 from above.
+LIMB64(`ble    cr7, L(ret)
+       vxor    v3, v3, v3              C zero total count
+       mr      n, r9
+       cmpd    cr7, n, r0
+       ble     cr7, L(2)
+       addis   r9, n, -LIMBS_PER_CHUNK C remaining n
+       lis     n, LIMBS_PER_CHUNK
+L(2):  srdi    r7, n, 2                C loop count corresponding to n
+       mtctr   r7                      C copy n to count register
+       b       L(top)
+')
+
+L(ret):        mr      r3, r8
+       mtspr   256, r10
+       blr
+EPILOGUE()
+
+DEF_OBJECT(cnsts,16)
+       .byte   0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
+       .byte   0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
+
+       .byte   0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+       .byte   0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+C Masks for high end of number
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+C Masks for low end of number
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+       .byte   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff
+END_OBJECT(cnsts)
+ASM_END()
diff --git a/mpn/pyr/add_n.s b/mpn/pyr/add_n.s

new file mode 100644 (file)

index 0000000..7ac02e6
--- /dev/null
+++ b/mpn/pyr/add_n.s
@@ -0,0 +1,74 @@
+# Pyramid __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  2
+.globl ___gmpn_add_n
+___gmpn_add_n:
+       movw    $-1,tr0         # representation for carry clear
+
+       movw    pr3,tr2
+       andw    $3,tr2
+       beq     Lend0
+       subw    tr2,pr3
+
+Loop0: rsubw   $0,tr0          # restore carry bit from carry-save register
+
+       movw    (pr1),tr1
+       addwc   (pr2),tr1
+       movw    tr1,(pr0)
+
+       subwb   tr0,tr0
+       addw    $4,pr0
+       addw    $4,pr1
+       addw    $4,pr2
+       addw    $-1,tr2
+       bne     Loop0
+
+       mtstw   pr3,pr3
+       beq     Lend
+Lend0:
+Loop:  rsubw   $0,tr0          # restore carry bit from carry-save register
+
+       movw    (pr1),tr1
+       addwc   (pr2),tr1
+       movw    tr1,(pr0)
+
+       movw    4(pr1),tr1
+       addwc   4(pr2),tr1
+       movw    tr1,4(pr0)
+
+       movw    8(pr1),tr1
+       addwc   8(pr2),tr1
+       movw    tr1,8(pr0)
+
+       movw    12(pr1),tr1
+       addwc   12(pr2),tr1
+       movw    tr1,12(pr0)
+
+       subwb   tr0,tr0
+       addw    $16,pr0
+       addw    $16,pr1
+       addw    $16,pr2
+       addw    $-4,pr3
+       bne     Loop
+Lend:
+       mnegw   tr0,pr0
+       ret
diff --git a/mpn/pyr/addmul_1.s b/mpn/pyr/addmul_1.s

new file mode 100644 (file)

index 0000000..d40a9e7
--- /dev/null
+++ b/mpn/pyr/addmul_1.s
@@ -0,0 +1,43 @@
+# Pyramid __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  2
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+       mova    (pr0)[pr2*4],pr0
+       mova    (pr1)[pr2*4],pr1
+       mnegw   pr2,pr2
+       movw    $0,tr3
+
+Loop:  movw    (pr1)[pr2*4],tr1
+       uemul   pr3,tr0
+       addw    tr3,tr1
+       movw    $0,tr3
+       addwc   tr0,tr3
+       movw    (pr0)[pr2*0x4],tr0
+       addw    tr0,tr1
+       addwc   $0,tr3
+       movw    tr1,(pr0)[pr2*4]
+       addw    $1,pr2
+       bne     Loop
+
+       movw    tr3,pr0
+       ret
diff --git a/mpn/pyr/mul_1.s b/mpn/pyr/mul_1.s

new file mode 100644 (file)

index 0000000..453727f
--- /dev/null
+++ b/mpn/pyr/mul_1.s
@@ -0,0 +1,40 @@
+# Pyramid __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  2
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+       mova    (pr0)[pr2*4],pr0
+       mova    (pr1)[pr2*4],pr1
+       mnegw   pr2,pr2
+       movw    $0,tr3
+
+Loop:  movw    (pr1)[pr2*4],tr1
+       uemul   pr3,tr0
+       addw    tr3,tr1
+       movw    $0,tr3
+       addwc   tr0,tr3
+       movw    tr1,(pr0)[pr2*4]
+       addw    $1,pr2
+       bne     Loop
+
+       movw    tr3,pr0
+       ret
diff --git a/mpn/pyr/sub_n.s b/mpn/pyr/sub_n.s

new file mode 100644 (file)

index 0000000..11f185a
--- /dev/null
+++ b/mpn/pyr/sub_n.s
@@ -0,0 +1,74 @@
+# Pyramid __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+       .align  2
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+       movw    $-1,tr0         # representation for carry clear
+
+       movw    pr3,tr2
+       andw    $3,tr2
+       beq     Lend0
+       subw    tr2,pr3
+
+Loop0: rsubw   $0,tr0          # restore carry bit from carry-save register
+
+       movw    (pr1),tr1
+       subwb   (pr2),tr1
+       movw    tr1,(pr0)
+
+       subwb   tr0,tr0
+       addw    $4,pr0
+       addw    $4,pr1
+       addw    $4,pr2
+       addw    $-1,tr2
+       bne     Loop0
+
+       mtstw   pr3,pr3
+       beq     Lend
+Lend0:
+Loop:  rsubw   $0,tr0          # restore carry bit from carry-save register
+
+       movw    (pr1),tr1
+       subwb   (pr2),tr1
+       movw    tr1,(pr0)
+
+       movw    4(pr1),tr1
+       subwb   4(pr2),tr1
+       movw    tr1,4(pr0)
+
+       movw    8(pr1),tr1
+       subwb   8(pr2),tr1
+       movw    tr1,8(pr0)
+
+       movw    12(pr1),tr1
+       subwb   12(pr2),tr1
+       movw    tr1,12(pr0)
+
+       subwb   tr0,tr0
+       addw    $16,pr0
+       addw    $16,pr1
+       addw    $16,pr2
+       addw    $-4,pr3
+       bne     Loop
+Lend:
+       mnegw   tr0,pr0
+       ret
diff --git a/mpn/s390_32/README b/mpn/s390_32/README

new file mode 100644 (file)

index 0000000..59519ba
--- /dev/null
+++ b/mpn/s390_32/README
@@ -0,0 +1,37 @@
+All current (2001) S/390 and z/Architecture machines are single-issue,
+but some newer machines have a deep pipeline.  Software-pipelining is
+therefore beneficial.
+
+* mpn_add_n, mpn_sub_n: Use code along the lines below.  Two-way unrolling
+  would be adequate.
+
+  mp_limb_t
+  mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+  {
+    mp_limb_t a, b, r, cy;
+    mp_size_t i;
+    mp_limb_t mm = -1;
+
+    cy = 0;
+    up += n;
+    vp += n;
+    rp += n;
+    i = -n;
+    do
+      {
+       a = up[i];
+       b = vp[i];
+       r = a + b + cy;
+       rp[i] = r;
+       cy = (((a & b) | ((a | b) & (r ^ mm)))) >> 31;
+       i++;
+      }
+    while (i < 0);
+    return cy;
+  }
+
+* mpn_lshift, mpn_rshift: Use SLDL/SRDL, and two-way unrolling.
+
+* mpn_mul_1, mpn_addmul_1, mpn_submul_1: For machines with just signed
+  multiply (MR), use two loops, similar to the corresponding VAX or
+  POWER functions.  Handle carry like for mpn_add_n.
diff --git a/mpn/s390_32/addmul_1.asm b/mpn/s390_32/addmul_1.asm

new file mode 100644 (file)

index 0000000..71d49bb
--- /dev/null
+++ b/mpn/s390_32/addmul_1.asm
@@ -0,0 +1,82 @@
+dnl  S/390 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(`rp',2)
+define(`up',3)
+define(`n',4)
+define(`vlimb',5)
+define(`cylimb',7)
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       stm     6,7,24(15)
+       slr     cylimb,cylimb   # clear cylimb
+       ltr     vlimb,vlimb
+       jnl     .Loopp
+
+.Loopn:        l       1,0(up)         # load from u
+       lr      6,1             #
+       mr      0,vlimb         # multiply signed
+       alr     0,6             # add vlimb to phi
+       sra     6,31            # make mask
+       nr      6,vlimb         # 0 or vlimb
+       alr     0,6             # conditionally add vlimb to phi
+       alr     1,cylimb        # add carry limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       l       6,0(rp)         # load r limb
+       alr     6,1             # add u limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       lr      cylimb,0        # new cylimb
+       st      6,0(rp)         # store
+       la      up,4(,up)
+       la      rp,4(,rp)
+       brct    n,.Loopn
+
+       lr      2,cylimb
+       lm      6,7,24(15)
+       br      14
+
+.Loopp:        l       1,0(up)         # load from u
+       lr      6,1             #
+       mr      0,vlimb         # multiply signed
+       sra     6,31            # make mask
+       nr      6,vlimb         # 0 or vlimb
+       alr     0,6             # conditionally add vlimb to phi
+       alr     1,cylimb        # add carry limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       l       6,0(rp)         # load r limb
+       alr     6,1             # add u limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       lr      cylimb,0        # new cylimb
+       st      6,0(rp)         # store
+       la      up,4(,up)
+       la      rp,4(,rp)
+       brct    n,.Loopp
+
+       lr      2,cylimb
+       lm      6,7,24(15)
+       br      14
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/s390_32/gmp-mparam.h b/mpn/s390_32/gmp-mparam.h

new file mode 100644 (file)

index 0000000..858d940
--- /dev/null
+++ b/mpn/s390_32/gmp-mparam.h
@@ -0,0 +1,121 @@
+/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            5
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        35
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     21
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           30
+
+#define MUL_TOOM22_THRESHOLD                22
+#define MUL_TOOM33_THRESHOLD                89
+#define MUL_TOOM44_THRESHOLD               202
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               406
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     139
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     127
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
+
+#define SQR_BASECASE_THRESHOLD               8
+#define SQR_TOOM2_THRESHOLD                 52
+#define SQR_TOOM3_THRESHOLD                125
+#define SQR_TOOM4_THRESHOLD                226
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                430
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
+    {     31, 7}, {     19, 8}, {     11, 7}, {     27, 9}, \
+    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
+    {     39, 8}, {     23, 7}, {     47, 8}, {     27, 9}, \
+    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
+    {     15, 9}, {     31, 8}, {     63, 9}, {     39, 8}, \
+    {     83, 9}, {     47,10}, {     31, 9}, {     79,10}, \
+    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 42
+#define MUL_FFT_THRESHOLD                 3520
+
+#define SQR_FFT_MODF_THRESHOLD             276  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    276, 5}, {     19, 6}, {     17, 7}, {      9, 6}, \
+    {     20, 7}, {     11, 6}, {     23, 7}, {     19, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     47,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
+    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
+    {     79,10}, {     47,11}, {   2048,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 35
+#define SQR_FFT_THRESHOLD                 2688
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  54
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 52
+#define DC_DIVAPPR_Q_THRESHOLD             185
+#define DC_BDIV_QR_THRESHOLD                53
+#define DC_BDIV_Q_THRESHOLD                122
+
+#define INV_MULMOD_BNM1_THRESHOLD           29
+#define INV_NEWTON_THRESHOLD               260
+#define INV_APPR_THRESHOLD                 220
+
+#define BINV_NEWTON_THRESHOLD              230
+#define REDC_1_TO_REDC_N_THRESHOLD          56
+
+#define MU_DIV_QR_THRESHOLD               1142
+#define MU_DIVAPPR_Q_THRESHOLD            1234
+#define MUPI_DIV_QR_THRESHOLD              114
+#define MU_BDIV_QR_THRESHOLD               792
+#define MU_BDIV_Q_THRESHOLD               1099
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     151
+#define GCD_DC_THRESHOLD                   599
+#define GCDEXT_DC_THRESHOLD                460
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        35
+#define SET_STR_DC_THRESHOLD               915
+#define SET_STR_PRECOMPUTE_THRESHOLD      1670
diff --git a/mpn/s390_32/mul_1.asm b/mpn/s390_32/mul_1.asm

new file mode 100644 (file)

index 0000000..649671b
--- /dev/null
+++ b/mpn/s390_32/mul_1.asm
@@ -0,0 +1,74 @@
+dnl  S/390 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl  result in a second limb vector.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(`rp',2)
+define(`up',3)
+define(`n',4)
+define(`vlimb',5)
+define(`cylimb',7)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       stm     6,7,24(15)
+       slr     cylimb,cylimb   # clear cylimb
+       ltr     vlimb,vlimb
+       jnl     .Loopp
+
+.Loopn:        l       1,0(up)         # load from u
+       lr      6,1             #
+       mr      0,vlimb         # multiply signed
+       alr     0,6             # add vlimb to phi
+       sra     6,31            # make mask
+       nr      6,vlimb         # 0 or vlimb
+       alr     0,6             # conditionally add vlimb to phi
+       alr     1,cylimb        # add carry limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       lr      cylimb,0        # new cylimb
+       st      1,0(rp)         # store
+       la      up,4(,up)
+       la      rp,4(,rp)
+       brct    n,.Loopn
+
+       lr      2,cylimb
+       lm      6,7,24(15)
+       br      14
+
+.Loopp:        l       1,0(up)         # load from u
+       lr      6,1             #
+       mr      0,vlimb         # multiply signed
+       sra     6,31            # make mask
+       nr      6,vlimb         # 0 or vlimb
+       alr     0,6             # conditionally add vlimb to phi
+       alr     1,cylimb        # add carry limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       lr      cylimb,0        # new cylimb
+       st      1,0(rp)         # store
+       la      up,4(,up)
+       la      rp,4(,rp)
+       brct    n,.Loopp
+
+       lr      2,cylimb
+       lm      6,7,24(15)
+       br      14
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/s390_32/submul_1.asm b/mpn/s390_32/submul_1.asm

new file mode 100644 (file)

index 0000000..5301096
--- /dev/null
+++ b/mpn/s390_32/submul_1.asm
@@ -0,0 +1,82 @@
+dnl  S/390 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl  result from a second limb vector.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(`rp',2)
+define(`up',3)
+define(`n',4)
+define(`vlimb',5)
+define(`cylimb',7)
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       stm     6,7,24(15)
+       slr     cylimb,cylimb   # clear cylimb
+       ltr     vlimb,vlimb
+       jnl     .Loopp
+
+.Loopn:        l       1,0(up)         # load from u
+       lr      6,1             #
+       mr      0,vlimb         # multiply signed
+       alr     0,6             # add vlimb to phi
+       sra     6,31            # make mask
+       nr      6,vlimb         # 0 or vlimb
+       alr     0,6             # conditionally add vlimb to phi
+       alr     1,cylimb        # add carry limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       l       6,0(rp)         # load r limb
+       slr     6,1             # add u limb to plo
+       brc     2+1,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       lr      cylimb,0        # new cylimb
+       st      6,0(rp)         # store
+       la      up,4(,up)
+       la      rp,4(,rp)
+       brct    n,.Loopn
+
+       lr      2,cylimb
+       lm      6,7,24(15)
+       br      14
+
+.Loopp:        l       1,0(up)         # load from u
+       lr      6,1             #
+       mr      0,vlimb         # multiply signed
+       sra     6,31            # make mask
+       nr      6,vlimb         # 0 or vlimb
+       alr     0,6             # conditionally add vlimb to phi
+       alr     1,cylimb        # add carry limb to plo
+       brc     8+4,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       l       6,0(rp)         # load r limb
+       slr     6,1             # add u limb to plo
+       brc     2+1,+8          # branch if not carry
+       ahi     0,1             # increment phi
+       lr      cylimb,0        # new cylimb
+       st      6,0(rp)         # store
+       la      up,4(,up)
+       la      rp,4(,rp)
+       brct    n,.Loopp
+
+       lr      2,cylimb
+       lm      6,7,24(15)
+       br      14
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/s390_64/gmp-mparam.h b/mpn/s390_64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..237dc28
--- /dev/null
+++ b/mpn/s390_64/gmp-mparam.h
@@ -0,0 +1,125 @@
+/* S/390-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        19
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD          101
+
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                74
+#define MUL_TOOM44_THRESHOLD               118
+#define MUL_TOOM6H_THRESHOLD               157
+#define MUL_TOOM8H_THRESHOLD               236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      84
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
+
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 26
+#define SQR_TOOM3_THRESHOLD                 87
+#define SQR_TOOM4_THRESHOLD                136
+#define SQR_TOOM6_THRESHOLD                171
+#define SQR_TOOM8_THRESHOLD                246
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               11
+
+#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    212, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
+    {      6, 5}, {     13, 6}, {     13, 7}, {      7, 6}, \
+    {     17, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
+    {     23,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
+    {     19, 8}, {     41, 9}, {     23,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 50
+#define MUL_FFT_THRESHOLD                 2240
+
+#define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    184, 5}, {     11, 6}, {     13, 7}, {      7, 6}, \
+    {     15, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
+    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
+    {     23,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
+    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
+    {     15,10}, {     31, 9}, {     63, 8}, {    127,10}, \
+    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 42
+#define SQR_FFT_THRESHOLD                 1728
+
+#define MULLO_BASECASE_THRESHOLD             2
+#define MULLO_DC_THRESHOLD                  45
+#define MULLO_MUL_N_THRESHOLD             4392
+
+#define DC_DIV_QR_THRESHOLD                 40
+#define DC_DIVAPPR_Q_THRESHOLD             154
+#define DC_BDIV_QR_THRESHOLD                42
+#define DC_BDIV_Q_THRESHOLD                102
+
+#define INV_MULMOD_BNM1_THRESHOLD           26
+#define INV_NEWTON_THRESHOLD               226
+#define INV_APPR_THRESHOLD                 171
+
+#define BINV_NEWTON_THRESHOLD              222
+#define REDC_1_TO_REDC_N_THRESHOLD          46
+
+#define MU_DIV_QR_THRESHOLD                855
+#define MU_DIVAPPR_Q_THRESHOLD             942
+#define MUPI_DIV_QR_THRESHOLD               99
+#define MU_BDIV_QR_THRESHOLD               680
+#define MU_BDIV_Q_THRESHOLD                855
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      89
+#define GCD_DC_THRESHOLD                   273
+#define GCDEXT_DC_THRESHOLD                209
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                32
+#define GET_STR_PRECOMPUTE_THRESHOLD        47
+#define SET_STR_DC_THRESHOLD               532
+#define SET_STR_PRECOMPUTE_THRESHOLD      1336
diff --git a/mpn/sh/add_n.asm b/mpn/sh/add_n.asm

new file mode 100644 (file)

index 0000000..a838451
--- /dev/null
+++ b/mpn/sh/add_n.asm
@@ -0,0 +1,48 @@
+dnl  SH mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
+dnl  in a third limb vector.
+
+dnl  Copyright 1995, 1997, 2000, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rp           r4
+C up           r5
+C vp           r6
+C n            r7
+
+changecom(blah)                        C disable # to make all C comments below work
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       mov     #0,r3           C clear cy save reg
+
+L(top):        mov.l   @r5+,r1
+       mov.l   @r6+,r2
+       shlr    r3              C restore cy
+       addc    r2,r1
+       movt    r3              C save cy
+       mov.l   r1,@r4
+       dt      r7
+       bf.s    L(top)
+        add    #4,r4
+
+       rts
+       mov     r3,r0           C return carry-out from most significant limb
+EPILOGUE()
diff --git a/mpn/sh/sh2/addmul_1.asm b/mpn/sh/sh2/addmul_1.asm

new file mode 100644 (file)

index 0000000..a4f922a
--- /dev/null
+++ b/mpn/sh/sh2/addmul_1.asm
@@ -0,0 +1,54 @@
+dnl  SH2 mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
+dnl  to a second limb vector.
+
+dnl  Copyright 1995, 2000, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r4
+C s1_ptr       r5
+C size         r6
+C s2_limb      r7
+
+changecom(blah)                        C disable # to make all C comments below work
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       mov     #0,r2           C cy_limb = 0
+       mov     #0,r0           C Keep r0 = 0 for entire loop
+       clrt
+
+L(top):        mov.l   @r5+,r3
+       dmulu.l r3,r7
+       sts     macl,r1
+       addc    r2,r1           C lo_prod += old cy_limb
+       sts     mach,r2         C new cy_limb = hi_prod
+       mov.l   @r4,r3
+       addc    r0,r2           C cy_limb += T, T = 0
+       addc    r3,r1
+       addc    r0,r2           C cy_limb += T, T = 0
+       dt      r6
+       mov.l   r1,@r4
+       bf.s    L(top)
+       add     #4,r4
+
+       rts
+       mov     r2,r0
+EPILOGUE()
diff --git a/mpn/sh/sh2/mul_1.asm b/mpn/sh/sh2/mul_1.asm

new file mode 100644 (file)

index 0000000..02f341f
--- /dev/null
+++ b/mpn/sh/sh2/mul_1.asm
@@ -0,0 +1,51 @@
+dnl  SH2 mpn_mul_1 -- Multiply a limb vector with a limb and store the result
+dnl  in a second limb vector.
+
+dnl  Copyright 1995, 2000, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r4
+C s1_ptr       r5
+C size         r6
+C s2_limb      r7
+
+changecom(blah)                        C disable # to make all C comments below work
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       mov     #0,r2           C cy_limb = 0
+       mov     #0,r0           C Keep r0 = 0 for entire loop
+       clrt
+
+L(top):        mov.l   @r5+,r3
+       dmulu.l r3,r7
+       sts     macl,r1
+       addc    r2,r1
+       sts     mach,r2
+       addc    r0,r2           C propagate carry to cy_limb (dt clobbers T)
+       dt      r6
+       mov.l   r1,@r4
+       bf.s    L(top)
+       add     #4,r4
+
+       rts
+       mov     r2,r0
+EPILOGUE()
diff --git a/mpn/sh/sh2/submul_1.asm b/mpn/sh/sh2/submul_1.asm

new file mode 100644 (file)

index 0000000..c701eba
--- /dev/null
+++ b/mpn/sh/sh2/submul_1.asm
@@ -0,0 +1,54 @@
+dnl  SH2 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl  result from a second limb vector.
+
+dnl  Copyright 1995, 2000, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      r4
+C s1_ptr       r5
+C size         r6
+C s2_limb      r7
+
+changecom(blah)                        C disable # to make all C comments below work
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       mov     #0,r2           C cy_limb = 0
+       mov     #0,r0           C Keep r0 = 0 for entire loop
+       clrt
+
+L(top):        mov.l   @r5+,r3
+       dmulu.l r3,r7
+       sts     macl,r1
+       addc    r2,r1           C lo_prod += old cy_limb
+       sts     mach,r2         C new cy_limb = hi_prod
+       mov.l   @r4,r3
+       addc    r0,r2           C cy_limb += T, T = 0
+       subc    r1,r3
+       addc    r0,r2           C cy_limb += T, T = 0
+       dt      r6
+       mov.l   r3,@r4
+       bf.s    L(top)
+       add     #4,r4
+
+       rts
+       mov     r2,r0
+EPILOGUE()
diff --git a/mpn/sh/sub_n.asm b/mpn/sh/sub_n.asm

new file mode 100644 (file)

index 0000000..493f1a5
--- /dev/null
+++ b/mpn/sh/sub_n.asm
@@ -0,0 +1,48 @@
+dnl  SH mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+dnl  difference in a third limb vector.
+
+dnl  Copyright 1995, 1997, 2000, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rp           r4
+C up           r5
+C vp           r6
+C n            r7
+
+changecom(blah)                        C disable # to make all C comments below work
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       mov     #0,r3           C clear cy save reg
+
+L(top):        mov.l   @r5+,r1
+       mov.l   @r6+,r2
+       shlr    r3              C restore cy
+       subc    r2,r1
+       movt    r3              C save cy
+       mov.l   r1,@r4
+       dt      r7
+       bf.s    L(top)
+        add    #4,r4
+
+       rts
+       mov     r3,r0           C return carry-out from most significant limb
+EPILOGUE()
diff --git a/mpn/sparc32/README b/mpn/sparc32/README

new file mode 100644 (file)

index 0000000..825a1ac
--- /dev/null
+++ b/mpn/sparc32/README
@@ -0,0 +1,60 @@
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions for various SPARC chips.  Code that
+runs only on version 8 SPARC implementations, is in the v8 subdirectory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+  Load and Store timing
+
+On most early SPARC implementations, the ST instructions takes multiple
+cycles, while a STD takes just a single cycle more than an ST.  For the CPUs
+in SPARCstation I and II, the times are 3 and 4 cycles, respectively.
+Therefore, combining two ST instructions into a STD when possible is a
+significant optimization.
+
+Later SPARC implementations have single cycle ST.
+
+For SuperSPARC, we can perform just one memory instruction per cycle, even
+if up to two integer instructions can be executed in its pipeline.  For
+programs that perform so many memory operations that there are not enough
+non-memory operations to issue in parallel with all memory operations, using
+LDD and STD when possible helps.
+
+UltraSPARC-1/2 has very slow integer multiplication.  In the v9 subdirectory,
+we therefore use floating-point multiplication.
+
+STATUS
+
+1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5
+   cycles/limb asymptotically.  We could optimize speed for special counts
+   by using ADDXCC.
+
+2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2
+   cycles/limb asymptotically.
+
+3. mpn_mul_1 runs at what is believed to be optimal speed.
+
+4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a
+   cycle by avoiding one of the add instructions.  See a29k/addmul_1.
+
+The speed of the code for other SPARC implementations is uncertain.
diff --git a/mpn/sparc32/add_n.asm b/mpn/sparc32/add_n.asm

new file mode 100644 (file)

index 0000000..7c8a9c4
--- /dev/null
+++ b/mpn/sparc32/add_n.asm
@@ -0,0 +1,234 @@
+dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       xor     s2_ptr,res_ptr,%g1
+       andcc   %g1,4,%g0
+       bne     L(1)                    C branch if alignment differs
+       nop
+C **  V1a  **
+L(0):  andcc   res_ptr,4,%g0           C res_ptr unaligned? Side effect: cy=0
+       be      L(v1)                   C if no, branch
+       nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+       ld      [s1_ptr],%g4
+       add     s1_ptr,4,s1_ptr
+       ld      [s2_ptr],%g2
+       add     s2_ptr,4,s2_ptr
+       add     n,-1,n
+       addcc   %g4,%g2,%o4
+       st      %o4,[res_ptr]
+       add     res_ptr,4,res_ptr
+L(v1): addx    %g0,%g0,%o4             C save cy in register
+       cmp     n,2                     C if n < 2 ...
+       bl      L(end2)                 C ... branch to tail code
+       subcc   %g0,%o4,%g0             C restore cy
+
+       ld      [s1_ptr+0],%g4
+       addcc   n,-10,n
+       ld      [s1_ptr+4],%g1
+       ldd     [s2_ptr+0],%g2
+       blt     L(fin1)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+       addxcc  %g4,%g2,%o4
+       ld      [s1_ptr+8],%g4
+       addxcc  %g1,%g3,%o5
+       ld      [s1_ptr+12],%g1
+       ldd     [s2_ptr+8],%g2
+       std     %o4,[res_ptr+0]
+       addxcc  %g4,%g2,%o4
+       ld      [s1_ptr+16],%g4
+       addxcc  %g1,%g3,%o5
+       ld      [s1_ptr+20],%g1
+       ldd     [s2_ptr+16],%g2
+       std     %o4,[res_ptr+8]
+       addxcc  %g4,%g2,%o4
+       ld      [s1_ptr+24],%g4
+       addxcc  %g1,%g3,%o5
+       ld      [s1_ptr+28],%g1
+       ldd     [s2_ptr+24],%g2
+       std     %o4,[res_ptr+16]
+       addxcc  %g4,%g2,%o4
+       ld      [s1_ptr+32],%g4
+       addxcc  %g1,%g3,%o5
+       ld      [s1_ptr+36],%g1
+       ldd     [s2_ptr+32],%g2
+       std     %o4,[res_ptr+24]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       add     s1_ptr,32,s1_ptr
+       add     s2_ptr,32,s2_ptr
+       add     res_ptr,32,res_ptr
+       bge     L(loop1)
+       subcc   %g0,%o4,%g0             C restore cy
+
+L(fin1):
+       addcc   n,8-2,n
+       blt     L(end1)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+       addxcc  %g4,%g2,%o4
+       ld      [s1_ptr+8],%g4
+       addxcc  %g1,%g3,%o5
+       ld      [s1_ptr+12],%g1
+       ldd     [s2_ptr+8],%g2
+       std     %o4,[res_ptr+0]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-2,n
+       add     s1_ptr,8,s1_ptr
+       add     s2_ptr,8,s2_ptr
+       add     res_ptr,8,res_ptr
+       bge     L(loope1)
+       subcc   %g0,%o4,%g0             C restore cy
+L(end1):
+       addxcc  %g4,%g2,%o4
+       addxcc  %g1,%g3,%o5
+       std     %o4,[res_ptr+0]
+       addx    %g0,%g0,%o4             C save cy in register
+
+       andcc   n,1,%g0
+       be      L(ret1)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add last limb
+       ld      [s1_ptr+8],%g4
+       ld      [s2_ptr+8],%g2
+       addxcc  %g4,%g2,%o4
+       st      %o4,[res_ptr+8]
+
+L(ret1):
+       retl
+       addx    %g0,%g0,%o0     C return carry-out from most sign. limb
+
+L(1):  xor     s1_ptr,res_ptr,%g1
+       andcc   %g1,4,%g0
+       bne     L(2)
+       nop
+C **  V1b  **
+       mov     s2_ptr,%g1
+       mov     s1_ptr,s2_ptr
+       b       L(0)
+       mov     %g1,s1_ptr
+
+C **  V2  **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2):  cmp     n,1
+       be      L(jone)
+       nop
+       andcc   s1_ptr,4,%g0            C s1_ptr unaligned? Side effect: cy=0
+       be      L(v2)                   C if no, branch
+       nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+       ld      [s1_ptr],%g4
+       add     s1_ptr,4,s1_ptr
+       ld      [s2_ptr],%g2
+       add     s2_ptr,4,s2_ptr
+       add     n,-1,n
+       addcc   %g4,%g2,%o4
+       st      %o4,[res_ptr]
+       add     res_ptr,4,res_ptr
+
+L(v2): addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       blt     L(fin2)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+       ldd     [s1_ptr+0],%g2
+       ldd     [s2_ptr+0],%o4
+       addxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+0]
+       addxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+4]
+       ldd     [s1_ptr+8],%g2
+       ldd     [s2_ptr+8],%o4
+       addxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+8]
+       addxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+12]
+       ldd     [s1_ptr+16],%g2
+       ldd     [s2_ptr+16],%o4
+       addxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+16]
+       addxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+20]
+       ldd     [s1_ptr+24],%g2
+       ldd     [s2_ptr+24],%o4
+       addxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+24]
+       addxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+28]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       add     s1_ptr,32,s1_ptr
+       add     s2_ptr,32,s2_ptr
+       add     res_ptr,32,res_ptr
+       bge     L(loop2)
+       subcc   %g0,%o4,%g0             C restore cy
+
+L(fin2):
+       addcc   n,8-2,n
+       blt     L(end2)
+       subcc   %g0,%o4,%g0             C restore cy
+L(loope2):
+       ldd     [s1_ptr+0],%g2
+       ldd     [s2_ptr+0],%o4
+       addxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+0]
+       addxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+4]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-2,n
+       add     s1_ptr,8,s1_ptr
+       add     s2_ptr,8,s2_ptr
+       add     res_ptr,8,res_ptr
+       bge     L(loope2)
+       subcc   %g0,%o4,%g0             C restore cy
+L(end2):
+       andcc   n,1,%g0
+       be      L(ret2)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add last limb
+L(jone):
+       ld      [s1_ptr],%g4
+       ld      [s2_ptr],%g2
+       addxcc  %g4,%g2,%o4
+       st      %o4,[res_ptr]
+
+L(ret2):
+       retl
+       addx    %g0,%g0,%o0     C return carry-out from most sign. limb
+EPILOGUE(mpn_add_n)
diff --git a/mpn/sparc32/addmul_1.asm b/mpn/sparc32/addmul_1.asm

new file mode 100644 (file)

index 0000000..d73529e
--- /dev/null
+++ b/mpn/sparc32/addmul_1.asm
@@ -0,0 +1,144 @@
+dnl  SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl  result to a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      o0
+C s1_ptr       o1
+C size         o2
+C s2_limb      o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       C Make S1_PTR and RES_PTR point at the end of their blocks
+       C and put (- 4 x SIZE) in index/loop counter.
+       sll     %o2,2,%o2
+       add     %o0,%o2,%o4     C RES_PTR in o4 since o0 is retval
+       add     %o1,%o2,%o1
+       sub     %g0,%o2,%o2
+
+       cmp     %o3,0xfff
+       bgu     L(large)
+       nop
+
+       ld      [%o1+%o2],%o5
+       mov     0,%o0
+       b       L(0)
+        add    %o4,-4,%o4
+L(loop0):
+       addcc   %o5,%g1,%g1
+       ld      [%o1+%o2],%o5
+       addx    %o0,%g0,%o0
+       st      %g1,[%o4+%o2]
+L(0):  wr      %g0,%o3,%y
+       sra     %o5,31,%g2
+       and     %o3,%g2,%g2
+       andcc   %g1,0,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,0,%g1
+       sra     %g1,20,%g4
+       sll     %g1,12,%g1
+       rd      %y,%g3
+       srl     %g3,20,%g3
+       or      %g1,%g3,%g1
+
+       addcc   %g1,%o0,%g1
+       addx    %g2,%g4,%o0     C add sign-compensation and cy to hi limb
+       addcc   %o2,4,%o2       C loop counter
+       bne     L(loop0)
+        ld     [%o4+%o2],%o5
+
+       addcc   %o5,%g1,%g1
+       addx    %o0,%g0,%o0
+       retl
+       st      %g1,[%o4+%o2]
+
+L(large):
+       ld      [%o1+%o2],%o5
+       mov     0,%o0
+       sra     %o3,31,%g4      C g4 = mask of ones iff S2_LIMB < 0
+       b       L(1)
+        add    %o4,-4,%o4
+L(loop):
+       addcc   %o5,%g3,%g3
+       ld      [%o1+%o2],%o5
+       addx    %o0,%g0,%o0
+       st      %g3,[%o4+%o2]
+L(1):  wr      %g0,%o5,%y
+       and     %o5,%g4,%g2
+       andcc   %g0,%g0,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%g0,%g1
+       rd      %y,%g3
+       addcc   %g3,%o0,%g3
+       addx    %g2,%g1,%o0
+       addcc   %o2,4,%o2
+       bne     L(loop)
+        ld     [%o4+%o2],%o5
+
+       addcc   %o5,%g3,%g3
+       addx    %o0,%g0,%o0
+       retl
+       st      %g3,[%o4+%o2]
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/gmp-mparam.h b/mpn/sparc32/gmp-mparam.h

new file mode 100644 (file)

index 0000000..55432ea
--- /dev/null
+++ b/mpn/sparc32/gmp-mparam.h
@@ -0,0 +1,57 @@
+/* SPARC v7 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Generated by tuneup.c, 2002-03-13, gcc 2.95, Weitek 8701 */
+
+#define MUL_TOOM22_THRESHOLD              8
+#define MUL_TOOM33_THRESHOLD            466
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_TOOM2_THRESHOLD              16
+#define SQR_TOOM3_THRESHOLD             258
+
+#define DIV_SB_PREINV_THRESHOLD           4
+#define DIV_DC_THRESHOLD                 28
+#define POWM_THRESHOLD                   28
+
+#define GCD_ACCEL_THRESHOLD               3
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           3
+#define DIVREM_1_UNNORM_THRESHOLD         4
+#define MOD_1_NORM_THRESHOLD              3
+#define MOD_1_UNNORM_THRESHOLD            4
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD            120
+#define MODEXACT_1_ODD_THRESHOLD      MP_SIZE_T_MAX  /* never */
+
+#define GET_STR_DC_THRESHOLD             21
+#define GET_STR_PRECOMPUTE_THRESHOLD     25
+#define SET_STR_THRESHOLD              1012
+
+#define MUL_FFT_TABLE  { 272, 672, 1152, 3584, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD          264
+#define MUL_FFT_THRESHOLD              2304
+
+#define SQR_FFT_TABLE  { 304, 736, 1152, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          248
+#define SQR_FFT_THRESHOLD              2304
diff --git a/mpn/sparc32/lshift.asm b/mpn/sparc32/lshift.asm

new file mode 100644 (file)

index 0000000..00004f8
--- /dev/null
+++ b/mpn/sparc32/lshift.asm
@@ -0,0 +1,94 @@
+dnl  SPARC mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      %o0
+C src_ptr      %o1
+C size         %o2
+C cnt          %o3
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       sll     %o2,2,%g1
+       add     %o1,%g1,%o1     C make %o1 point at end of src
+       ld      [%o1-4],%g2     C load first limb
+       sub     %g0,%o3,%o5     C negate shift count
+       add     %o0,%g1,%o0     C make %o0 point at end of res
+       add     %o2,-1,%o2
+       andcc   %o2,4-1,%g4     C number of limbs in first loop
+       srl     %g2,%o5,%g1     C compute function result
+       be      L(0)            C if multiple of 4 limbs, skip first loop
+       st      %g1,[%sp+80]
+
+       sub     %o2,%g4,%o2     C adjust count for main loop
+
+L(loop0):
+       ld      [%o1-8],%g3
+       add     %o0,-4,%o0
+       add     %o1,-4,%o1
+       addcc   %g4,-1,%g4
+       sll     %g2,%o3,%o4
+       srl     %g3,%o5,%g1
+       mov     %g3,%g2
+       or      %o4,%g1,%o4
+       bne     L(loop0)
+        st     %o4,[%o0+0]
+
+L(0):  tst     %o2
+       be      L(end)
+        nop
+
+L(loop):
+       ld      [%o1-8],%g3
+       add     %o0,-16,%o0
+       addcc   %o2,-4,%o2
+       sll     %g2,%o3,%o4
+       srl     %g3,%o5,%g1
+
+       ld      [%o1-12],%g2
+       sll     %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       st      %o4,[%o0+12]
+       srl     %g2,%o5,%g1
+
+       ld      [%o1-16],%g3
+       sll     %g2,%o3,%o4
+       or      %g4,%g1,%g4
+       st      %g4,[%o0+8]
+       srl     %g3,%o5,%g1
+
+       ld      [%o1-20],%g2
+       sll     %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       st      %o4,[%o0+4]
+       srl     %g2,%o5,%g1
+
+       add     %o1,-16,%o1
+       or      %g4,%g1,%g4
+       bne     L(loop)
+        st     %g4,[%o0+0]
+
+L(end):        sll     %g2,%o3,%g2
+       st      %g2,[%o0-4]
+       retl
+       ld      [%sp+80],%o0
+EPILOGUE(mpn_lshift)
diff --git a/mpn/sparc32/mul_1.asm b/mpn/sparc32/mul_1.asm

new file mode 100644 (file)

index 0000000..147db11
--- /dev/null
+++ b/mpn/sparc32/mul_1.asm
@@ -0,0 +1,135 @@
+dnl  SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      o0
+C s1_ptr       o1
+C size         o2
+C s2_limb      o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       C Make S1_PTR and RES_PTR point at the end of their blocks
+       C and put (- 4 x SIZE) in index/loop counter.
+       sll     %o2,2,%o2
+       add     %o0,%o2,%o4     C RES_PTR in o4 since o0 is retval
+       add     %o1,%o2,%o1
+       sub     %g0,%o2,%o2
+
+       cmp     %o3,0xfff
+       bgu     L(large)
+       nop
+
+       ld      [%o1+%o2],%o5
+       mov     0,%o0
+       b       L(0)
+        add    %o4,-4,%o4
+L(loop0):
+       st      %g1,[%o4+%o2]
+L(0):  wr      %g0,%o3,%y
+       sra     %o5,31,%g2
+       and     %o3,%g2,%g2
+       andcc   %g1,0,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,0,%g1
+       sra     %g1,20,%g4
+       sll     %g1,12,%g1
+       rd      %y,%g3
+       srl     %g3,20,%g3
+       or      %g1,%g3,%g1
+
+       addcc   %g1,%o0,%g1
+       addx    %g2,%g4,%o0     C add sign-compensation and cy to hi limb
+       addcc   %o2,4,%o2       C loop counter
+       bne,a   L(loop0)
+        ld     [%o1+%o2],%o5
+
+       retl
+       st      %g1,[%o4+%o2]
+
+
+L(large):
+       ld      [%o1+%o2],%o5
+       mov     0,%o0
+       sra     %o3,31,%g4      C g4 = mask of ones iff S2_LIMB < 0
+       b       L(1)
+        add    %o4,-4,%o4
+L(loop):
+       st      %g3,[%o4+%o2]
+L(1):  wr      %g0,%o5,%y
+       and     %o5,%g4,%g2     C g2 = S1_LIMB iff S2_LIMB < 0, else 0
+       andcc   %g0,%g0,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%g0,%g1
+       rd      %y,%g3
+       addcc   %g3,%o0,%g3
+       addx    %g2,%g1,%o0     C add sign-compensation and cy to hi limb
+       addcc   %o2,4,%o2       C loop counter
+       bne,a   L(loop)
+        ld     [%o1+%o2],%o5
+
+       retl
+       st      %g3,[%o4+%o2]
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc32/rshift.asm b/mpn/sparc32/rshift.asm

new file mode 100644 (file)

index 0000000..26db441
--- /dev/null
+++ b/mpn/sparc32/rshift.asm
@@ -0,0 +1,91 @@
+dnl  SPARC mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      %o0
+C src_ptr      %o1
+C size         %o2
+C cnt          %o3
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       ld      [%o1],%g2       C load first limb
+       sub     %g0,%o3,%o5     C negate shift count
+       add     %o2,-1,%o2
+       andcc   %o2,4-1,%g4     C number of limbs in first loop
+       sll     %g2,%o5,%g1     C compute function result
+       be      L(0)            C if multiple of 4 limbs, skip first loop
+       st      %g1,[%sp+80]
+
+       sub     %o2,%g4,%o2     C adjust count for main loop
+
+L(loop0):
+       ld      [%o1+4],%g3
+       add     %o0,4,%o0
+       add     %o1,4,%o1
+       addcc   %g4,-1,%g4
+       srl     %g2,%o3,%o4
+       sll     %g3,%o5,%g1
+       mov     %g3,%g2
+       or      %o4,%g1,%o4
+       bne     L(loop0)
+        st     %o4,[%o0-4]
+
+L(0):  tst     %o2
+       be      L(end)
+        nop
+
+L(loop):
+       ld      [%o1+4],%g3
+       add     %o0,16,%o0
+       addcc   %o2,-4,%o2
+       srl     %g2,%o3,%o4
+       sll     %g3,%o5,%g1
+
+       ld      [%o1+8],%g2
+       srl     %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       st      %o4,[%o0-16]
+       sll     %g2,%o5,%g1
+
+       ld      [%o1+12],%g3
+       srl     %g2,%o3,%o4
+       or      %g4,%g1,%g4
+       st      %g4,[%o0-12]
+       sll     %g3,%o5,%g1
+
+       ld      [%o1+16],%g2
+       srl     %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       st      %o4,[%o0-8]
+       sll     %g2,%o5,%g1
+
+       add     %o1,16,%o1
+       or      %g4,%g1,%g4
+       bne     L(loop)
+        st     %g4,[%o0-4]
+
+L(end):        srl     %g2,%o3,%g2
+       st      %g2,[%o0-0]
+       retl
+       ld      [%sp+80],%o0
+EPILOGUE(mpn_rshift)
diff --git a/mpn/sparc32/sparc-defs.m4 b/mpn/sparc32/sparc-defs.m4

new file mode 100644 (file)

index 0000000..36d7301
--- /dev/null
+++ b/mpn/sparc32/sparc-defs.m4
@@ -0,0 +1,39 @@
+divert(-1)
+
+dnl  m4 macros for SPARC assembler (32 and 64 bit).
+
+
+dnl  Copyright 2002, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+changecom(;)   dnl cannot use default # since that's used in REGISTER decls
+
+
+dnl  Usage: REGISTER(reg,attr)
+dnl
+dnl  Give a ".register reg,attr" directive, if the assembler supports it.
+dnl  HAVE_REGISTER comes from the GMP_ASM_SPARC_REGISTER configure test.
+
+define(REGISTER,
+m4_assert_numargs(2)
+m4_assert_defined(`HAVE_REGISTER')
+`ifelse(HAVE_REGISTER,yes,
+`.register `$1',`$2'')')
+
+
+divert
diff --git a/mpn/sparc32/sub_n.asm b/mpn/sparc32/sub_n.asm

new file mode 100644 (file)

index 0000000..4fc759d
--- /dev/null
+++ b/mpn/sparc32/sub_n.asm
@@ -0,0 +1,324 @@
+dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       xor     s2_ptr,res_ptr,%g1
+       andcc   %g1,4,%g0
+       bne     L(1)                    C branch if alignment differs
+       nop
+C **  V1a  **
+       andcc   res_ptr,4,%g0           C res_ptr unaligned? Side effect: cy=0
+       be      L(v1)                   C if no, branch
+       nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+       ld      [s1_ptr],%g4
+       add     s1_ptr,4,s1_ptr
+       ld      [s2_ptr],%g2
+       add     s2_ptr,4,s2_ptr
+       add     n,-1,n
+       subcc   %g4,%g2,%o4
+       st      %o4,[res_ptr]
+       add     res_ptr,4,res_ptr
+L(v1): addx    %g0,%g0,%o4             C save cy in register
+       cmp     n,2                     C if n < 2 ...
+       bl      L(end2)                 C ... branch to tail code
+       subcc   %g0,%o4,%g0             C restore cy
+
+       ld      [s1_ptr+0],%g4
+       addcc   n,-10,n
+       ld      [s1_ptr+4],%g1
+       ldd     [s2_ptr+0],%g2
+       blt     L(fin1)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+       subxcc  %g4,%g2,%o4
+       ld      [s1_ptr+8],%g4
+       subxcc  %g1,%g3,%o5
+       ld      [s1_ptr+12],%g1
+       ldd     [s2_ptr+8],%g2
+       std     %o4,[res_ptr+0]
+       subxcc  %g4,%g2,%o4
+       ld      [s1_ptr+16],%g4
+       subxcc  %g1,%g3,%o5
+       ld      [s1_ptr+20],%g1
+       ldd     [s2_ptr+16],%g2
+       std     %o4,[res_ptr+8]
+       subxcc  %g4,%g2,%o4
+       ld      [s1_ptr+24],%g4
+       subxcc  %g1,%g3,%o5
+       ld      [s1_ptr+28],%g1
+       ldd     [s2_ptr+24],%g2
+       std     %o4,[res_ptr+16]
+       subxcc  %g4,%g2,%o4
+       ld      [s1_ptr+32],%g4
+       subxcc  %g1,%g3,%o5
+       ld      [s1_ptr+36],%g1
+       ldd     [s2_ptr+32],%g2
+       std     %o4,[res_ptr+24]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       add     s1_ptr,32,s1_ptr
+       add     s2_ptr,32,s2_ptr
+       add     res_ptr,32,res_ptr
+       bge     L(loop1)
+       subcc   %g0,%o4,%g0             C restore cy
+
+L(fin1):
+       addcc   n,8-2,n
+       blt     L(end1)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+       subxcc  %g4,%g2,%o4
+       ld      [s1_ptr+8],%g4
+       subxcc  %g1,%g3,%o5
+       ld      [s1_ptr+12],%g1
+       ldd     [s2_ptr+8],%g2
+       std     %o4,[res_ptr+0]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-2,n
+       add     s1_ptr,8,s1_ptr
+       add     s2_ptr,8,s2_ptr
+       add     res_ptr,8,res_ptr
+       bge     L(loope1)
+       subcc   %g0,%o4,%g0             C restore cy
+L(end1):
+       subxcc  %g4,%g2,%o4
+       subxcc  %g1,%g3,%o5
+       std     %o4,[res_ptr+0]
+       addx    %g0,%g0,%o4             C save cy in register
+
+       andcc   n,1,%g0
+       be      L(ret1)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add last limb
+       ld      [s1_ptr+8],%g4
+       ld      [s2_ptr+8],%g2
+       subxcc  %g4,%g2,%o4
+       st      %o4,[res_ptr+8]
+
+L(ret1):
+       retl
+       addx    %g0,%g0,%o0     C return carry-out from most sign. limb
+
+L(1):  xor     s1_ptr,res_ptr,%g1
+       andcc   %g1,4,%g0
+       bne     L(2)
+       nop
+C **  V1b  **
+       andcc   res_ptr,4,%g0           C res_ptr unaligned? Side effect: cy=0
+       be      L(v1b)                  C if no, branch
+       nop
+C Add least significant limb separately to align res_ptr and s1_ptr
+       ld      [s2_ptr],%g4
+       add     s2_ptr,4,s2_ptr
+       ld      [s1_ptr],%g2
+       add     s1_ptr,4,s1_ptr
+       add     n,-1,n
+       subcc   %g2,%g4,%o4
+       st      %o4,[res_ptr]
+       add     res_ptr,4,res_ptr
+L(v1b):        addx    %g0,%g0,%o4             C save cy in register
+       cmp     n,2                     C if n < 2 ...
+       bl      L(end2)                 C ... branch to tail code
+       subcc   %g0,%o4,%g0             C restore cy
+
+       ld      [s2_ptr+0],%g4
+       addcc   n,-10,n
+       ld      [s2_ptr+4],%g1
+       ldd     [s1_ptr+0],%g2
+       blt     L(fin1b)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1b):
+       subxcc  %g2,%g4,%o4
+       ld      [s2_ptr+8],%g4
+       subxcc  %g3,%g1,%o5
+       ld      [s2_ptr+12],%g1
+       ldd     [s1_ptr+8],%g2
+       std     %o4,[res_ptr+0]
+       subxcc  %g2,%g4,%o4
+       ld      [s2_ptr+16],%g4
+       subxcc  %g3,%g1,%o5
+       ld      [s2_ptr+20],%g1
+       ldd     [s1_ptr+16],%g2
+       std     %o4,[res_ptr+8]
+       subxcc  %g2,%g4,%o4
+       ld      [s2_ptr+24],%g4
+       subxcc  %g3,%g1,%o5
+       ld      [s2_ptr+28],%g1
+       ldd     [s1_ptr+24],%g2
+       std     %o4,[res_ptr+16]
+       subxcc  %g2,%g4,%o4
+       ld      [s2_ptr+32],%g4
+       subxcc  %g3,%g1,%o5
+       ld      [s2_ptr+36],%g1
+       ldd     [s1_ptr+32],%g2
+       std     %o4,[res_ptr+24]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       add     s1_ptr,32,s1_ptr
+       add     s2_ptr,32,s2_ptr
+       add     res_ptr,32,res_ptr
+       bge     L(loop1b)
+       subcc   %g0,%o4,%g0             C restore cy
+
+L(fin1b):
+       addcc   n,8-2,n
+       blt     L(end1b)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1b):
+       subxcc  %g2,%g4,%o4
+       ld      [s2_ptr+8],%g4
+       subxcc  %g3,%g1,%o5
+       ld      [s2_ptr+12],%g1
+       ldd     [s1_ptr+8],%g2
+       std     %o4,[res_ptr+0]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-2,n
+       add     s1_ptr,8,s1_ptr
+       add     s2_ptr,8,s2_ptr
+       add     res_ptr,8,res_ptr
+       bge     L(loope1b)
+       subcc   %g0,%o4,%g0             C restore cy
+L(end1b):
+       subxcc  %g2,%g4,%o4
+       subxcc  %g3,%g1,%o5
+       std     %o4,[res_ptr+0]
+       addx    %g0,%g0,%o4             C save cy in register
+
+       andcc   n,1,%g0
+       be      L(ret1b)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add last limb
+       ld      [s2_ptr+8],%g4
+       ld      [s1_ptr+8],%g2
+       subxcc  %g2,%g4,%o4
+       st      %o4,[res_ptr+8]
+
+L(ret1b):
+       retl
+       addx    %g0,%g0,%o0             C return carry-out from most sign. limb
+
+C **  V2  **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2):  cmp     n,1
+       be      L(jone)
+       nop
+       andcc   s1_ptr,4,%g0            C s1_ptr unaligned? Side effect: cy=0
+       be      L(v2)                   C if no, branch
+       nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+       ld      [s1_ptr],%g4
+       add     s1_ptr,4,s1_ptr
+       ld      [s2_ptr],%g2
+       add     s2_ptr,4,s2_ptr
+       add     n,-1,n
+       subcc   %g4,%g2,%o4
+       st      %o4,[res_ptr]
+       add     res_ptr,4,res_ptr
+
+L(v2): addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       blt     L(fin2)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+       ldd     [s1_ptr+0],%g2
+       ldd     [s2_ptr+0],%o4
+       subxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+0]
+       subxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+4]
+       ldd     [s1_ptr+8],%g2
+       ldd     [s2_ptr+8],%o4
+       subxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+8]
+       subxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+12]
+       ldd     [s1_ptr+16],%g2
+       ldd     [s2_ptr+16],%o4
+       subxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+16]
+       subxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+20]
+       ldd     [s1_ptr+24],%g2
+       ldd     [s2_ptr+24],%o4
+       subxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+24]
+       subxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+28]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-8,n
+       add     s1_ptr,32,s1_ptr
+       add     s2_ptr,32,s2_ptr
+       add     res_ptr,32,res_ptr
+       bge     L(loop2)
+       subcc   %g0,%o4,%g0             C restore cy
+
+L(fin2):
+       addcc   n,8-2,n
+       blt     L(end2)
+       subcc   %g0,%o4,%g0             C restore cy
+L(loope2):
+       ldd     [s1_ptr+0],%g2
+       ldd     [s2_ptr+0],%o4
+       subxcc  %g2,%o4,%g2
+       st      %g2,[res_ptr+0]
+       subxcc  %g3,%o5,%g3
+       st      %g3,[res_ptr+4]
+       addx    %g0,%g0,%o4             C save cy in register
+       addcc   n,-2,n
+       add     s1_ptr,8,s1_ptr
+       add     s2_ptr,8,s2_ptr
+       add     res_ptr,8,res_ptr
+       bge     L(loope2)
+       subcc   %g0,%o4,%g0             C restore cy
+L(end2):
+       andcc   n,1,%g0
+       be      L(ret2)
+       subcc   %g0,%o4,%g0             C restore cy
+C Add last limb
+L(jone):
+       ld      [s1_ptr],%g4
+       ld      [s2_ptr],%g2
+       subxcc  %g4,%g2,%o4
+       st      %o4,[res_ptr]
+
+L(ret2):
+       retl
+       addx    %g0,%g0,%o0             C return carry-out from most sign. limb
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc32/submul_1.asm b/mpn/sparc32/submul_1.asm

new file mode 100644 (file)

index 0000000..9cde45f
--- /dev/null
+++ b/mpn/sparc32/submul_1.asm
@@ -0,0 +1,144 @@
+dnl  SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      o0
+C s1_ptr       o1
+C size         o2
+C s2_limb      o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       C Make S1_PTR and RES_PTR point at the end of their blocks
+       C and put (- 4 x SIZE) in index/loop counter.
+       sll     %o2,2,%o2
+       add     %o0,%o2,%o4     C RES_PTR in o4 since o0 is retval
+       add     %o1,%o2,%o1
+       sub     %g0,%o2,%o2
+
+       cmp     %o3,0xfff
+       bgu     L(large)
+       nop
+
+       ld      [%o1+%o2],%o5
+       mov     0,%o0
+       b       L(0)
+        add    %o4,-4,%o4
+L(loop0):
+       subcc   %o5,%g1,%g1
+       ld      [%o1+%o2],%o5
+       addx    %o0,%g0,%o0
+       st      %g1,[%o4+%o2]
+L(0):  wr      %g0,%o3,%y
+       sra     %o5,31,%g2
+       and     %o3,%g2,%g2
+       andcc   %g1,0,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,%o5,%g1
+       mulscc  %g1,0,%g1
+       sra     %g1,20,%g4
+       sll     %g1,12,%g1
+       rd      %y,%g3
+       srl     %g3,20,%g3
+       or      %g1,%g3,%g1
+
+       addcc   %g1,%o0,%g1
+       addx    %g2,%g4,%o0     C add sign-compensation and cy to hi limb
+       addcc   %o2,4,%o2       C loop counter
+       bne     L(loop0)
+        ld     [%o4+%o2],%o5
+
+       subcc   %o5,%g1,%g1
+       addx    %o0,%g0,%o0
+       retl
+       st      %g1,[%o4+%o2]
+
+L(large):
+       ld      [%o1+%o2],%o5
+       mov     0,%o0
+       sra     %o3,31,%g4      C g4 = mask of ones iff S2_LIMB < 0
+       b       L(1)
+        add    %o4,-4,%o4
+L(loop):
+       subcc   %o5,%g3,%g3
+       ld      [%o1+%o2],%o5
+       addx    %o0,%g0,%o0
+       st      %g3,[%o4+%o2]
+L(1):  wr      %g0,%o5,%y
+       and     %o5,%g4,%g2
+       andcc   %g0,%g0,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%o3,%g1
+       mulscc  %g1,%g0,%g1
+       rd      %y,%g3
+       addcc   %g3,%o0,%g3
+       addx    %g2,%g1,%o0
+       addcc   %o2,4,%o2
+       bne     L(loop)
+        ld     [%o4+%o2],%o5
+
+       subcc   %o5,%g3,%g3
+       addx    %o0,%g0,%o0
+       retl
+       st      %g3,[%o4+%o2]
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc32/udiv.asm b/mpn/sparc32/udiv.asm

new file mode 100644 (file)

index 0000000..fc520f6
--- /dev/null
+++ b/mpn/sparc32/udiv.asm
@@ -0,0 +1,156 @@
+dnl  SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl  This is for v7 CPUs with a floating-point unit.
+
+dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      i0
+C n1           i1
+C n0           i2
+C d            i3
+
+ASM_START()
+
+ifdef(`PIC',
+`      TEXT
+L(getpc):
+       retl
+       nop')
+
+       TEXT
+       ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+       save    %sp,-104,%sp
+       st      %i1,[%fp-8]
+       ld      [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc):        call    L(getpc)                C put address of this insn in %o7
+       ldd     [%o7+L(C0)-L(pc)],%f8',
+`      sethi   %hi(L(C0)),%o7
+       ldd     [%o7+%lo(L(C0))],%f8')
+
+       fitod   %f10,%f4
+       cmp     %i1,0
+       bge     L(248)
+       mov     %i0,%i5
+       faddd   %f4,%f8,%f4
+L(248):
+       st      %i2,[%fp-8]
+       ld      [%fp-8],%f10
+       fmuld   %f4,%f8,%f6
+       cmp     %i2,0
+       bge     L(249)
+       fitod   %f10,%f2
+       faddd   %f2,%f8,%f2
+L(249):
+       st      %i3,[%fp-8]
+       faddd   %f6,%f2,%f2
+       ld      [%fp-8],%f10
+       cmp     %i3,0
+       bge     L(250)
+       fitod   %f10,%f4
+       faddd   %f4,%f8,%f4
+L(250):
+       fdivd   %f2,%f4,%f2
+
+ifdef(`PIC',
+`      ldd     [%o7+L(C1)-L(pc)],%f4',
+`      sethi   %hi(L(C1)),%o7
+       ldd     [%o7+%lo(L(C1))],%f4')
+
+       fcmped  %f2,%f4
+       nop
+       fbge,a  L(251)
+       fsubd   %f2,%f4,%f2
+       fdtoi   %f2,%f2
+       st      %f2,[%fp-8]
+       b       L(252)
+       ld      [%fp-8],%i4
+L(251):
+       fdtoi   %f2,%f2
+       st      %f2,[%fp-8]
+       ld      [%fp-8],%i4
+       sethi   %hi(-2147483648),%g2
+       xor     %i4,%g2,%i4
+L(252):
+       wr      %g0,%i4,%y
+       sra     %i3,31,%g2
+       and     %i4,%g2,%g2
+       andcc   %g0,0,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,%i3,%g1
+       mulscc  %g1,0,%g1
+       add     %g1,%g2,%i0
+       rd      %y,%g3
+       subcc   %i2,%g3,%o7
+       subxcc  %i1,%i0,%g0
+       be      L(253)
+       cmp     %o7,%i3
+
+       add     %i4,-1,%i0
+       add     %o7,%i3,%o7
+       st      %o7,[%i5]
+       ret
+       restore
+L(253):
+       blu     L(246)
+       mov     %i4,%i0
+       add     %i4,1,%i0
+       sub     %o7,%i3,%o7
+L(246):
+       st      %o7,[%i5]
+       ret
+       restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/sparc32/udiv_nfp.asm b/mpn/sparc32/udiv_nfp.asm

new file mode 100644 (file)

index 0000000..a7513ea
--- /dev/null
+++ b/mpn/sparc32/udiv_nfp.asm
@@ -0,0 +1,191 @@
+dnl  SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl  This is for v7 CPUs without a floating-point unit.
+
+dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      o0
+C n1           o1
+C n0           o2
+C d            o3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+       tst     %o3
+       bneg    L(largedivisor)
+       mov     8,%g1
+
+       b       L(p1)
+       addxcc  %o2,%o2,%o2
+
+L(plop):
+       bcc     L(n1)
+       addxcc  %o2,%o2,%o2
+L(p1): addx    %o1,%o1,%o1
+       subcc   %o1,%o3,%o4
+       bcc     L(n2)
+       addxcc  %o2,%o2,%o2
+L(p2): addx    %o1,%o1,%o1
+       subcc   %o1,%o3,%o4
+       bcc     L(n3)
+       addxcc  %o2,%o2,%o2
+L(p3): addx    %o1,%o1,%o1
+       subcc   %o1,%o3,%o4
+       bcc     L(n4)
+       addxcc  %o2,%o2,%o2
+L(p4): addx    %o1,%o1,%o1
+       addcc   %g1,-1,%g1
+       bne     L(plop)
+       subcc   %o1,%o3,%o4
+       bcc     L(n5)
+       addxcc  %o2,%o2,%o2
+L(p5): st      %o1,[%o0]
+       retl
+       xnor    %g0,%o2,%o0
+
+L(nlop):
+       bcc     L(p1)
+       addxcc  %o2,%o2,%o2
+L(n1): addx    %o4,%o4,%o4
+       subcc   %o4,%o3,%o1
+       bcc     L(p2)
+       addxcc  %o2,%o2,%o2
+L(n2): addx    %o4,%o4,%o4
+       subcc   %o4,%o3,%o1
+       bcc     L(p3)
+       addxcc  %o2,%o2,%o2
+L(n3): addx    %o4,%o4,%o4
+       subcc   %o4,%o3,%o1
+       bcc     L(p4)
+       addxcc  %o2,%o2,%o2
+L(n4): addx    %o4,%o4,%o4
+       addcc   %g1,-1,%g1
+       bne     L(nlop)
+       subcc   %o4,%o3,%o1
+       bcc     L(p5)
+       addxcc  %o2,%o2,%o2
+L(n5): st      %o4,[%o0]
+       retl
+       xnor    %g0,%o2,%o0
+
+L(largedivisor):
+       and     %o2,1,%o5       C %o5 = n0 & 1
+
+       srl     %o2,1,%o2
+       sll     %o1,31,%g2
+       or      %g2,%o2,%o2     C %o2 = lo(n1n0 >> 1)
+       srl     %o1,1,%o1       C %o1 = hi(n1n0 >> 1)
+
+       and     %o3,1,%g2
+       srl     %o3,1,%g3       C %g3 = floor(d / 2)
+       add     %g3,%g2,%g3     C %g3 = ceil(d / 2)
+
+       b       L(Lp1)
+       addxcc  %o2,%o2,%o2
+
+L(Lplop):
+       bcc     L(Ln1)
+       addxcc  %o2,%o2,%o2
+L(Lp1):        addx    %o1,%o1,%o1
+       subcc   %o1,%g3,%o4
+       bcc     L(Ln2)
+       addxcc  %o2,%o2,%o2
+L(Lp2):        addx    %o1,%o1,%o1
+       subcc   %o1,%g3,%o4
+       bcc     L(Ln3)
+       addxcc  %o2,%o2,%o2
+L(Lp3):        addx    %o1,%o1,%o1
+       subcc   %o1,%g3,%o4
+       bcc     L(Ln4)
+       addxcc  %o2,%o2,%o2
+L(Lp4):        addx    %o1,%o1,%o1
+       addcc   %g1,-1,%g1
+       bne     L(Lplop)
+       subcc   %o1,%g3,%o4
+       bcc     L(Ln5)
+       addxcc  %o2,%o2,%o2
+L(Lp5):        add     %o1,%o1,%o1     C << 1
+       tst     %g2
+       bne     L(oddp)
+       add     %o5,%o1,%o1
+       st      %o1,[%o0]
+       retl
+       xnor    %g0,%o2,%o0
+
+L(Lnlop):
+       bcc     L(Lp1)
+       addxcc  %o2,%o2,%o2
+L(Ln1):        addx    %o4,%o4,%o4
+       subcc   %o4,%g3,%o1
+       bcc     L(Lp2)
+       addxcc  %o2,%o2,%o2
+L(Ln2):        addx    %o4,%o4,%o4
+       subcc   %o4,%g3,%o1
+       bcc     L(Lp3)
+       addxcc  %o2,%o2,%o2
+L(Ln3):        addx    %o4,%o4,%o4
+       subcc   %o4,%g3,%o1
+       bcc     L(Lp4)
+       addxcc  %o2,%o2,%o2
+L(Ln4):        addx    %o4,%o4,%o4
+       addcc   %g1,-1,%g1
+       bne     L(Lnlop)
+       subcc   %o4,%g3,%o1
+       bcc     L(Lp5)
+       addxcc  %o2,%o2,%o2
+L(Ln5):        add     %o4,%o4,%o4     C << 1
+       tst     %g2
+       bne     L(oddn)
+       add     %o5,%o4,%o4
+       st      %o4,[%o0]
+       retl
+       xnor    %g0,%o2,%o0
+
+L(oddp):
+       xnor    %g0,%o2,%o2
+       C q' in %o2. r' in %o1
+       addcc   %o1,%o2,%o1
+       bcc     L(Lp6)
+       addx    %o2,0,%o2
+       sub     %o1,%o3,%o1
+L(Lp6):        subcc   %o1,%o3,%g0
+       bcs     L(Lp7)
+       subx    %o2,-1,%o2
+       sub     %o1,%o3,%o1
+L(Lp7):        st      %o1,[%o0]
+       retl
+       mov     %o2,%o0
+
+L(oddn):
+       xnor    %g0,%o2,%o2
+       C q' in %o2. r' in %o4
+       addcc   %o4,%o2,%o4
+       bcc     L(Ln6)
+       addx    %o2,0,%o2
+       sub     %o4,%o3,%o4
+L(Ln6):        subcc   %o4,%o3,%g0
+       bcs     L(Ln7)
+       subx    %o2,-1,%o2
+       sub     %o4,%o3,%o4
+L(Ln7):        st      %o4,[%o0]
+       retl
+       mov     %o2,%o0
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/sparc32/umul.asm b/mpn/sparc32/umul.asm

new file mode 100644 (file)

index 0000000..80c8212
--- /dev/null
+++ b/mpn/sparc32/umul.asm
@@ -0,0 +1,66 @@
+dnl  SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+       wr      %g0,%o1,%y
+       sra     %o2,31,%g2      C Don't move this insn
+       and     %o1,%g2,%g2     C Don't move this insn
+       andcc   %g0,0,%g1       C Don't move this insn
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,%o2,%g1
+       mulscc  %g1,0,%g1
+       rd      %y,%g3
+       st      %g3,[%o0]
+       retl
+       add     %g1,%g2,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/sparc32/v8/addmul_1.asm b/mpn/sparc32/v8/addmul_1.asm

new file mode 100644 (file)

index 0000000..6e5e788
--- /dev/null
+++ b/mpn/sparc32/v8/addmul_1.asm
@@ -0,0 +1,120 @@
+dnl  SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl  add the result to a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      o0
+C s1_ptr       o1
+C size         o2
+C s2_limb      o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       orcc    %g0,%g0,%g2
+       ld      [%o1+0],%o4     C 1
+
+       sll     %o2,4,%g1
+       and     %g1,(4-1)<<4,%g1
+ifdef(`PIC',
+`      mov     %o7,%g4         C Save return address register
+0:     call    1f
+       add     %o7,L(1)-0b,%g3
+1:     mov     %g4,%o7         C Restore return address register
+',
+`      sethi   %hi(L(1)),%g3
+       or      %g3,%lo(L(1)),%g3
+')
+       jmp     %g3+%g1
+       nop
+L(1):
+L(L00):        add     %o0,-4,%o0
+       b       L(loop00)       C 4, 8, 12, ...
+       add     %o1,-4,%o1
+       nop
+L(L01):        b       L(loop01)       C 1, 5, 9, ...
+       nop
+       nop
+       nop
+L(L10):        add     %o0,-12,%o0     C 2, 6, 10, ...
+       b       L(loop10)
+       add     %o1,4,%o1
+       nop
+L(L11):        add     %o0,-8,%o0      C 3, 7, 11, ...
+       b       L(loop11)
+       add     %o1,-8,%o1
+       nop
+
+L(loop):
+       addcc   %g3,%g2,%g3     C 1
+       ld      [%o1+4],%o4     C 2
+       rd      %y,%g2          C 1
+       addx    %g0,%g2,%g2
+       ld      [%o0+0],%g1     C 2
+       addcc   %g1,%g3,%g3
+       st      %g3,[%o0+0]     C 1
+L(loop00):
+       umul    %o4,%o3,%g3     C 2
+       ld      [%o0+4],%g1     C 2
+       addxcc  %g3,%g2,%g3     C 2
+       ld      [%o1+8],%o4     C 3
+       rd      %y,%g2          C 2
+       addx    %g0,%g2,%g2
+       nop
+       addcc   %g1,%g3,%g3
+       st      %g3,[%o0+4]     C 2
+L(loop11):
+       umul    %o4,%o3,%g3     C 3
+       addxcc  %g3,%g2,%g3     C 3
+       ld      [%o1+12],%o4    C 4
+       rd      %y,%g2          C 3
+       add     %o1,16,%o1
+       addx    %g0,%g2,%g2
+       ld      [%o0+8],%g1     C 2
+       addcc   %g1,%g3,%g3
+       st      %g3,[%o0+8]     C 3
+L(loop10):
+       umul    %o4,%o3,%g3     C 4
+       addxcc  %g3,%g2,%g3     C 4
+       ld      [%o1+0],%o4     C 1
+       rd      %y,%g2          C 4
+       addx    %g0,%g2,%g2
+       ld      [%o0+12],%g1    C 2
+       addcc   %g1,%g3,%g3
+       st      %g3,[%o0+12]    C 4
+       add     %o0,16,%o0
+       addx    %g0,%g2,%g2
+L(loop01):
+       addcc   %o2,-4,%o2
+       bg      L(loop)
+       umul    %o4,%o3,%g3     C 1
+
+       addcc   %g3,%g2,%g3     C 4
+       rd      %y,%g2          C 4
+       addx    %g0,%g2,%g2
+       ld      [%o0+0],%g1     C 2
+       addcc   %g1,%g3,%g3
+       st      %g3,[%o0+0]     C 4
+       addx    %g0,%g2,%o0
+
+       retl
+        nop
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/v8/gmp-mparam.h b/mpn/sparc32/v8/gmp-mparam.h

new file mode 100644 (file)

index 0000000..0962732
--- /dev/null
+++ b/mpn/sparc32/v8/gmp-mparam.h
@@ -0,0 +1,63 @@
+/* SPARC v8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2004-02-07, gcc 2.95 */
+
+#define MUL_TOOM22_THRESHOLD             10
+#define MUL_TOOM33_THRESHOLD             65
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_TOOM2_THRESHOLD              18
+#define SQR_TOOM3_THRESHOLD              65
+
+#define DIV_SB_PREINV_THRESHOLD           5
+#define DIV_DC_THRESHOLD                 24
+#define POWM_THRESHOLD                   38
+
+#define HGCD_THRESHOLD                   69
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                498
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           6
+#define DIVREM_1_UNNORM_THRESHOLD        11
+#define MOD_1_NORM_THRESHOLD              5
+#define MOD_1_UNNORM_THRESHOLD            9
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          4
+
+#define GET_STR_DC_THRESHOLD             14
+#define GET_STR_PRECOMPUTE_THRESHOLD     23
+#define SET_STR_THRESHOLD              1679
+
+#define MUL_FFT_TABLE  { 272, 672, 1152, 2560, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD          264
+#define MUL_FFT_THRESHOLD              1792
+
+#define SQR_FFT_TABLE  { 304, 672, 1152, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          264
+#define SQR_FFT_THRESHOLD              1728
diff --git a/mpn/sparc32/v8/mul_1.asm b/mpn/sparc32/v8/mul_1.asm

new file mode 100644 (file)

index 0000000..d428deb
--- /dev/null
+++ b/mpn/sparc32/v8/mul_1.asm
@@ -0,0 +1,101 @@
+dnl  SPARC v8 mpn_mul_1 -- Multiply a limb vector with a single limb and
+dnl  store the product in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      o0
+C s1_ptr       o1
+C size         o2
+C s2_limb      o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       sll     %o2,4,%g1
+       and     %g1,(4-1)<<4,%g1
+ifdef(`PIC',
+`      mov     %o7,%g4         C Save return address register
+0:     call    1f
+       add     %o7,L(1)-0b,%g3
+1:     mov     %g4,%o7         C Restore return address register
+',
+`      sethi   %hi(L(1)),%g3
+       or      %g3,%lo(L(1)),%g3
+')
+       jmp     %g3+%g1
+       ld      [%o1+0],%o4     C 1
+L(1):
+L(L00):        add     %o0,-4,%o0
+       add     %o1,-4,%o1
+       b       L(loop00)       C 4, 8, 12, ...
+       orcc    %g0,%g0,%g2
+L(L01):        b       L(loop01)       C 1, 5, 9, ...
+       orcc    %g0,%g0,%g2
+       nop
+       nop
+L(L10):        add     %o0,-12,%o0     C 2, 6, 10, ...
+       add     %o1,4,%o1
+       b       L(loop10)
+       orcc    %g0,%g0,%g2
+       nop
+L(L11):        add     %o0,-8,%o0      C 3, 7, 11, ...
+       add     %o1,-8,%o1
+       b       L(loop11)
+       orcc    %g0,%g0,%g2
+
+L(loop):
+       addcc   %g3,%g2,%g3     C 1
+       ld      [%o1+4],%o4     C 2
+       st      %g3,[%o0+0]     C 1
+       rd      %y,%g2          C 1
+L(loop00):
+       umul    %o4,%o3,%g3     C 2
+       addxcc  %g3,%g2,%g3     C 2
+       ld      [%o1+8],%o4     C 3
+       st      %g3,[%o0+4]     C 2
+       rd      %y,%g2          C 2
+L(loop11):
+       umul    %o4,%o3,%g3     C 3
+       addxcc  %g3,%g2,%g3     C 3
+       ld      [%o1+12],%o4    C 4
+       add     %o1,16,%o1
+       st      %g3,[%o0+8]     C 3
+       rd      %y,%g2          C 3
+L(loop10):
+       umul    %o4,%o3,%g3     C 4
+       addxcc  %g3,%g2,%g3     C 4
+       ld      [%o1+0],%o4     C 1
+       st      %g3,[%o0+12]    C 4
+       add     %o0,16,%o0
+       rd      %y,%g2          C 4
+       addx    %g0,%g2,%g2
+L(loop01):
+       addcc   %o2,-4,%o2
+       bg      L(loop)
+       umul    %o4,%o3,%g3     C 1
+
+       addcc   %g3,%g2,%g3     C 4
+       st      %g3,[%o0+0]     C 4
+       rd      %y,%g2          C 4
+
+       retl
+       addx    %g0,%g2,%o0
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc32/v8/submul_1.asm b/mpn/sparc32/v8/submul_1.asm

new file mode 100644 (file)

index 0000000..4dde012
--- /dev/null
+++ b/mpn/sparc32/v8/submul_1.asm
@@ -0,0 +1,56 @@
+dnl  SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr      o0
+C s1_ptr       o1
+C size         o2
+C s2_limb      o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       sub     %g0,%o2,%o2             C negate ...
+       sll     %o2,2,%o2               C ... and scale size
+       sub     %o1,%o2,%o1             C o1 is offset s1_ptr
+       sub     %o0,%o2,%g1             C g1 is offset res_ptr
+
+       mov     0,%o0                   C clear cy_limb
+
+L(loop):
+       ld      [%o1+%o2],%o4
+       ld      [%g1+%o2],%g2
+       umul    %o4,%o3,%o5
+       rd      %y,%g3
+       addcc   %o5,%o0,%o5
+       addx    %g3,0,%o0
+       subcc   %g2,%o5,%g2
+       addx    %o0,0,%o0
+       st      %g2,[%g1+%o2]
+
+       addcc   %o2,4,%o2
+       bne     L(loop)
+        nop
+
+       retl
+        nop
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc32/v8/supersparc/gmp-mparam.h b/mpn/sparc32/v8/supersparc/gmp-mparam.h

new file mode 100644 (file)

index 0000000..130e9a5
--- /dev/null
+++ b/mpn/sparc32/v8/supersparc/gmp-mparam.h
@@ -0,0 +1,63 @@
+/* SuperSPARC gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2004-02-10, gcc 3.3 */
+
+#define MUL_TOOM22_THRESHOLD             14
+#define MUL_TOOM33_THRESHOLD             81
+
+#define SQR_BASECASE_THRESHOLD            5
+#define SQR_TOOM2_THRESHOLD              28
+#define SQR_TOOM3_THRESHOLD              86
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 26
+#define POWM_THRESHOLD                   79
+
+#define HGCD_THRESHOLD                   97
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                470
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD         3
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            3
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             19
+#define GET_STR_PRECOMPUTE_THRESHOLD     34
+#define SET_STR_THRESHOLD              3524
+
+#define MUL_FFT_TABLE  { 304, 800, 1408, 3584, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD          264
+#define MUL_FFT_THRESHOLD              2304
+
+#define SQR_FFT_TABLE  { 336, 800, 1408, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          280
+#define SQR_FFT_THRESHOLD              2304
diff --git a/mpn/sparc32/v8/supersparc/udiv.asm b/mpn/sparc32/v8/supersparc/udiv.asm

new file mode 100644 (file)

index 0000000..2ce3b8f
--- /dev/null
+++ b/mpn/sparc32/v8/supersparc/udiv.asm
@@ -0,0 +1,120 @@
+dnl  SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h.
+dnl  This is for SuperSPARC only, to compensate for its semi-functional
+dnl  udiv instruction.
+
+dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      i0
+C n1           i1
+C n0           i2
+C d            i3
+
+ASM_START()
+
+ifdef(`PIC',
+`      TEXT
+L(getpc):
+       retl
+       nop')
+
+       TEXT
+       ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+       save    %sp,-104,%sp
+       st      %i1,[%fp-8]
+       ld      [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc):        call    L(getpc)                C put address of this insn in %o7
+       ldd     [%o7+L(C0)-L(pc)],%f8',
+`      sethi   %hi(L(C0)),%o7
+       ldd     [%o7+%lo(L(C0))],%f8')
+
+       fitod   %f10,%f4
+       cmp     %i1,0
+       bge     L(248)
+       mov     %i0,%i5
+       faddd   %f4,%f8,%f4
+L(248):
+       st      %i2,[%fp-8]
+       ld      [%fp-8],%f10
+       fmuld   %f4,%f8,%f6
+       cmp     %i2,0
+       bge     L(249)
+       fitod   %f10,%f2
+       faddd   %f2,%f8,%f2
+L(249):
+       st      %i3,[%fp-8]
+       faddd   %f6,%f2,%f2
+       ld      [%fp-8],%f10
+       cmp     %i3,0
+       bge     L(250)
+       fitod   %f10,%f4
+       faddd   %f4,%f8,%f4
+L(250):
+       fdivd   %f2,%f4,%f2
+
+ifdef(`PIC',
+`      ldd     [%o7+L(C1)-L(pc)],%f4',
+`      sethi   %hi(L(C1)),%o7
+       ldd     [%o7+%lo(L(C1))],%f4')
+
+       fcmped  %f2,%f4
+       nop
+       fbge,a  L(251)
+       fsubd   %f2,%f4,%f2
+       fdtoi   %f2,%f2
+       st      %f2,[%fp-8]
+       b       L(252)
+       ld      [%fp-8],%i4
+L(251):
+       fdtoi   %f2,%f2
+       st      %f2,[%fp-8]
+       ld      [%fp-8],%i4
+       sethi   %hi(-2147483648),%g2
+       xor     %i4,%g2,%i4
+L(252):
+       umul    %i3,%i4,%g3
+       rd      %y,%i0
+       subcc   %i2,%g3,%o7
+       subxcc  %i1,%i0,%g0
+       be      L(253)
+       cmp     %o7,%i3
+
+       add     %i4,-1,%i0
+       add     %o7,%i3,%o7
+       st      %o7,[%i5]
+       ret
+       restore
+L(253):
+       blu     L(246)
+       mov     %i4,%i0
+       add     %i4,1,%i0
+       sub     %o7,%i3,%o7
+L(246):
+       st      %o7,[%i5]
+       ret
+       restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/sparc32/v8/udiv.asm b/mpn/sparc32/v8/udiv.asm

new file mode 100644 (file)

index 0000000..2ce3b8f
--- /dev/null
+++ b/mpn/sparc32/v8/udiv.asm
@@ -0,0 +1,120 @@
+dnl  SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h.
+dnl  This is for SuperSPARC only, to compensate for its semi-functional
+dnl  udiv instruction.
+
+dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      i0
+C n1           i1
+C n0           i2
+C d            i3
+
+ASM_START()
+
+ifdef(`PIC',
+`      TEXT
+L(getpc):
+       retl
+       nop')
+
+       TEXT
+       ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+       save    %sp,-104,%sp
+       st      %i1,[%fp-8]
+       ld      [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc):        call    L(getpc)                C put address of this insn in %o7
+       ldd     [%o7+L(C0)-L(pc)],%f8',
+`      sethi   %hi(L(C0)),%o7
+       ldd     [%o7+%lo(L(C0))],%f8')
+
+       fitod   %f10,%f4
+       cmp     %i1,0
+       bge     L(248)
+       mov     %i0,%i5
+       faddd   %f4,%f8,%f4
+L(248):
+       st      %i2,[%fp-8]
+       ld      [%fp-8],%f10
+       fmuld   %f4,%f8,%f6
+       cmp     %i2,0
+       bge     L(249)
+       fitod   %f10,%f2
+       faddd   %f2,%f8,%f2
+L(249):
+       st      %i3,[%fp-8]
+       faddd   %f6,%f2,%f2
+       ld      [%fp-8],%f10
+       cmp     %i3,0
+       bge     L(250)
+       fitod   %f10,%f4
+       faddd   %f4,%f8,%f4
+L(250):
+       fdivd   %f2,%f4,%f2
+
+ifdef(`PIC',
+`      ldd     [%o7+L(C1)-L(pc)],%f4',
+`      sethi   %hi(L(C1)),%o7
+       ldd     [%o7+%lo(L(C1))],%f4')
+
+       fcmped  %f2,%f4
+       nop
+       fbge,a  L(251)
+       fsubd   %f2,%f4,%f2
+       fdtoi   %f2,%f2
+       st      %f2,[%fp-8]
+       b       L(252)
+       ld      [%fp-8],%i4
+L(251):
+       fdtoi   %f2,%f2
+       st      %f2,[%fp-8]
+       ld      [%fp-8],%i4
+       sethi   %hi(-2147483648),%g2
+       xor     %i4,%g2,%i4
+L(252):
+       umul    %i3,%i4,%g3
+       rd      %y,%i0
+       subcc   %i2,%g3,%o7
+       subxcc  %i1,%i0,%g0
+       be      L(253)
+       cmp     %o7,%i3
+
+       add     %i4,-1,%i0
+       add     %o7,%i3,%o7
+       st      %o7,[%i5]
+       ret
+       restore
+L(253):
+       blu     L(246)
+       mov     %i4,%i0
+       add     %i4,1,%i0
+       sub     %o7,%i3,%o7
+L(246):
+       st      %o7,[%i5]
+       ret
+       restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/sparc32/v8/umul.asm b/mpn/sparc32/v8/umul.asm

new file mode 100644 (file)

index 0000000..569a4e8
--- /dev/null
+++ b/mpn/sparc32/v8/umul.asm
@@ -0,0 +1,29 @@
+dnl  SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+       umul    %o1,%o2,%g2
+       st      %g2,[%o0]
+       retl
+       rd      %y,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/sparc32/v9/README b/mpn/sparc32/v9/README

new file mode 100644 (file)

index 0000000..9b39713
--- /dev/null
+++ b/mpn/sparc32/v9/README
@@ -0,0 +1,4 @@
+Code for SPARC processors implementing version 9 of the SPARC architecture.
+This code is for systems that doesn't preserve the full 64-bit contents of
+integer register at context switch.  For other systems (such as Solaris 7 or
+later) use the code in ../../sparc64.
diff --git a/mpn/sparc32/v9/add_n.asm b/mpn/sparc32/v9/add_n.asm

new file mode 100644 (file)

index 0000000..a21cf10
--- /dev/null
+++ b/mpn/sparc32/v9/add_n.asm
@@ -0,0 +1,118 @@
+dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl  sum in a third limb vector.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(rp,%o0)
+define(s1p,%o1)
+define(s2p,%o2)
+define(n,%o3)
+define(cy,%g1)
+
+C This code uses 64-bit operations on `o' and `g' registers.  It doesn't
+C require that `o' registers' upper 32 bits are preserved by the operating
+C system, but if they are not, they must be zeroed.  That is indeed what
+C happens at least on Slowaris 2.5 and 2.6.
+
+C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
+C about 10 cycles/limb from the Ecache.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       lduw    [s1p+0],%o4
+       lduw    [s2p+0],%o5
+       addcc   n,-2,n
+       bl,pn   %icc,L(end1)
+       lduw    [s1p+4],%g2
+       lduw    [s2p+4],%g3
+       be,pn   %icc,L(end2)
+       mov     0,cy
+
+       .align  16
+L(loop):
+       add     %o4,%o5,%g4
+       add     rp,8,rp
+       lduw    [s1p+8],%o4
+       fitod   %f0,%f2
+C ---
+       add     cy,%g4,%g4
+       addcc   n,-1,n
+       lduw    [s2p+8],%o5
+       fitod   %f0,%f2
+C ---
+       srlx    %g4,32,cy
+       add     s2p,8,s2p
+       stw     %g4,[rp-8]
+       be,pn   %icc,L(exito)+4
+C ---
+       add     %g2,%g3,%g4
+       addcc   n,-1,n
+       lduw    [s1p+12],%g2
+       fitod   %f0,%f2
+C ---
+       add     cy,%g4,%g4
+       add     s1p,8,s1p
+       lduw    [s2p+4],%g3
+       fitod   %f0,%f2
+C ---
+       srlx    %g4,32,cy
+       bne,pt  %icc,L(loop)
+       stw     %g4,[rp-4]
+C ---
+L(exite):
+       add     %o4,%o5,%g4
+       add     cy,%g4,%g4
+       srlx    %g4,32,cy
+       stw     %g4,[rp+0]
+       add     %g2,%g3,%g4
+       add     cy,%g4,%g4
+       stw     %g4,[rp+4]
+       retl
+       srlx    %g4,32,%o0
+
+L(exito):
+       add     %g2,%g3,%g4
+       add     cy,%g4,%g4
+       srlx    %g4,32,cy
+       stw     %g4,[rp-4]
+       add     %o4,%o5,%g4
+       add     cy,%g4,%g4
+       stw     %g4,[rp+0]
+       retl
+       srlx    %g4,32,%o0
+
+L(end1):
+       add     %o4,%o5,%g4
+       stw     %g4,[rp+0]
+       retl
+       srlx    %g4,32,%o0
+
+L(end2):
+       add     %o4,%o5,%g4
+       srlx    %g4,32,cy
+       stw     %g4,[rp+0]
+       add     %g2,%g3,%g4
+       add     cy,%g4,%g4
+       stw     %g4,[rp+4]
+       retl
+       srlx    %g4,32,%o0
+EPILOGUE(mpn_add_n)
diff --git a/mpn/sparc32/v9/addmul_1.asm b/mpn/sparc32/v9/addmul_1.asm

new file mode 100644 (file)

index 0000000..18b9a72
--- /dev/null
+++ b/mpn/sparc32/v9/addmul_1.asm
@@ -0,0 +1,295 @@
+dnl  SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Algorithm: We use two floating-point multiplies per limb product, with the
+C invariant v operand split into two 16-bit pieces, and the u operand split
+C into 32-bit pieces.  We convert the two 48-bit products and transfer them to
+C the integer unit.
+
+C                 cycles/limb
+C UltraSPARC 1&2:     6.5
+C UltraSPARC 3:              ?
+
+C Possible optimizations:
+C   1. Combine 32-bit memory operations into 64-bit operations.  Since we're
+C      memory bandwidth limited, this could save 1.5 cycles/limb.
+C   2. Unroll the inner loop.  Since we already use alternate temporary areas,
+C      it is very straightforward to unroll, using an exit branch midways.
+C      Unrolling would allow deeper scheduling which could improve speed for L2
+C      cache case.
+C   3. For mpn_mul_1: Use more alternating temp areas.  The std'es and ldx'es
+C      aren't sufficiently apart-scheduled with just two temp areas.
+C   4. Specialize for particular v values.  If its upper 16 bits are zero, we
+C      could save many operations.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+define(`FSIZE',224)
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       add     %sp, -FSIZE, %sp
+       sethi   %hi(0xffff), %g1
+       srl     %o3, 16, %g2
+       or      %g1, %lo(0xffff), %g1
+       and     %o3, %g1, %g1
+       stx     %g1, [%sp+104]
+       stx     %g2, [%sp+112]
+       ldd     [%sp+104], %f6
+       ldd     [%sp+112], %f8
+       fxtod   %f6, %f6
+       fxtod   %f8, %f8
+       ld      [%sp+104], %f10         C zero f10
+
+       mov     0, %g3                  C cy = 0
+
+define(`fanop', `fitod %f18, %f0')     C  A quasi nop running in the FA pipe
+
+       add     %sp, 160, %o5           C point in scratch area
+       and     %o5, -32, %o5           C align at 0 (mod 32) in scratch area
+
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_two_or_more
+       fxtod   %f10, %f2
+
+       fmuld   %f2, %f8, %f16
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       std     %f12, [%o5+24]
+       ldx     [%o5+16], %g2           C p16
+       ldx     [%o5+24], %g1           C p0
+       lduw    [%o0], %g5              C read rp[i]
+       b       .L1
+       add     %o0, -16, %o0
+
+       .align  16
+.L_two_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fmuld   %f2, %f8, %f16
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_three_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       std     %f12, [%o5+8]
+       lduw    [%o0], %g5              C read rp[i]
+       ldx     [%o5+16], %g2           C p16
+       ldx     [%o5+24], %g1           C p0
+       b       .L2
+       add     %o0, -12, %o0
+
+       .align  16
+.L_three_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_four_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       std     %f12, [%o5+24]
+       lduw    [%o0], %g5              C read rp[i]
+       b       .L3
+       add     %o0, -8, %o0
+
+       .align  16
+.L_four_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_five_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       lduw    [%o0], %g5              C read rp[i]
+       b       .L4
+       add     %o0, -4, %o0
+
+       .align  16
+.L_five_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       lduw    [%o0], %g5              C read rp[i]
+       bne,pt  %icc, .Loop
+       fxtod   %f10, %f2
+       b,a     .L5
+
+C BEGIN MAIN LOOP
+       .align 16
+C -- 0
+.Loop: nop
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+C -- 1
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       add     %o0, 4, %o0             C rp++
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+C -- 2
+       nop
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       fanop
+C -- 3
+       nop
+       add     %g3, %g4, %g4           C p += cy
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+C -- 4
+       nop
+       add     %g5, %g4, %g4           C p += rp[i]
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+C -- 5
+       xor     %o5, 16, %o5            C alternate scratch variables
+       add     %o1, 4, %o1             C up++
+       stw     %g4, [%o0-4]
+       fanop
+C -- 6
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0], %g5              C read rp[i]
+       bne,pt  %icc, .Loop
+       fxtod   %f10, %f2
+C END MAIN LOOP
+
+.L5:   fdtox   %f16, %f14
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g4, %g3, %g4           C p += cy
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       add     %g5, %g4, %g4           C p += rp[i]
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+0]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+4], %g5            C read rp[i]
+
+.L4:   fdtox   %f16, %f14
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       std     %f14, [%o5+0]
+       add     %g5, %g4, %g4           C p += rp[i]
+       std     %f12, [%o5+8]
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+4]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+8], %g5            C read rp[i]
+
+.L3:   sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       add     %g5, %g4, %g4           C p += rp[i]
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+8]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+12], %g5           C read rp[i]
+
+.L2:   sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       add     %g5, %g4, %g4           C p += rp[i]
+       stw     %g4, [%o0+12]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+16], %g5           C read rp[i]
+
+.L1:   sllx    %g2, 16, %g4            C (p16 << 16)
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       add     %g3, %g4, %g4           C p += cy
+       add     %g5, %g4, %g4           C p += rp[i]
+       stw     %g4, [%o0+16]
+       srlx    %g4, 32, %g3            C new cy
+
+       mov     %g3, %o0
+       retl
+       sub     %sp, -FSIZE, %sp
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc32/v9/gmp-mparam.h b/mpn/sparc32/v9/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ab401ff
--- /dev/null
+++ b/mpn/sparc32/v9/gmp-mparam.h
@@ -0,0 +1,98 @@
+/* SPARC v9 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+#define DIVREM_1_NORM_THRESHOLD              3
+#define DIVREM_1_UNNORM_THRESHOLD            5
+#define MOD_1_NORM_THRESHOLD                 4
+#define MOD_1_UNNORM_THRESHOLD               7
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                32
+#define MUL_TOOM33_THRESHOLD                96
+#define MUL_TOOM44_THRESHOLD               143
+#define MUL_TOOM6H_THRESHOLD               216
+#define MUL_TOOM8H_THRESHOLD               494
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      96
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     145
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      92
+
+#define SQR_BASECASE_THRESHOLD              12
+#define SQR_TOOM2_THRESHOLD                 62
+#define SQR_TOOM3_THRESHOLD                103
+#define SQR_TOOM4_THRESHOLD                274
+#define SQR_TOOM6_THRESHOLD                274
+#define SQR_TOOM8_THRESHOLD                542
+
+#define MULMOD_BNM1_THRESHOLD               14
+#define SQRMOD_BNM1_THRESHOLD               21
+
+#define MUL_FFT_TABLE  { 272, 736, 1152, 3584, 10240, 24576, 98304, 917504, 0 }
+#define MUL_FFT_MODF_THRESHOLD             248
+#define MUL_FFT_THRESHOLD                 2112
+
+#define SQR_FFT_TABLE  { 336, 800, 1408, 3584, 10240, 24576, 98304, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD             248
+#define SQR_FFT_THRESHOLD                 2112
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 106
+#define MULLO_MUL_N_THRESHOLD             3493
+
+#define DC_DIV_QR_THRESHOLD                123
+#define DC_DIVAPPR_Q_THRESHOLD             396
+#define DC_BDIV_QR_THRESHOLD               121
+#define DC_BDIV_Q_THRESHOLD                280
+
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD               351
+#define INV_APPR_THRESHOLD                 357
+
+#define BINV_NEWTON_THRESHOLD              324
+#define REDC_1_TO_REDC_N_THRESHOLD          78
+
+#define MU_DIV_QR_THRESHOLD               1895
+#define MU_DIVAPPR_Q_THRESHOLD            1895
+#define MUPI_DIV_QR_THRESHOLD              122
+#define MU_BDIV_QR_THRESHOLD               872
+#define MU_BDIV_Q_THRESHOLD               2801
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                     144
+#define GCD_DC_THRESHOLD                   630
+#define GCDEXT_DC_THRESHOLD                416
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                 9
+#define GET_STR_PRECOMPUTE_THRESHOLD        17
+#define SET_STR_DC_THRESHOLD               537
+#define SET_STR_PRECOMPUTE_THRESHOLD      1576
diff --git a/mpn/sparc32/v9/mul_1.asm b/mpn/sparc32/v9/mul_1.asm

new file mode 100644 (file)

index 0000000..881f46f
--- /dev/null
+++ b/mpn/sparc32/v9/mul_1.asm
@@ -0,0 +1,276 @@
+dnl  SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Algorithm: We use two floating-point multiplies per limb product, with the
+C invariant v operand split into two 16-bit pieces, and the u operand split
+C into 32-bit pieces.  We convert the two 48-bit products and transfer them to
+C the integer unit.
+
+C                 cycles/limb
+C UltraSPARC 1&2:     6.5
+C UltraSPARC 3:              ?
+
+C Possible optimizations:
+C   1. Combine 32-bit memory operations into 64-bit operations.  Since we're
+C      memory bandwidth limited, this could save 1.5 cycles/limb.
+C   2. Unroll the inner loop.  Since we already use alternate temporary areas,
+C      it is very straightforward to unroll, using an exit branch midways.
+C      Unrolling would allow deeper scheduling which could improve speed for L2
+C      cache case.
+C   3. For mpn_mul_1: Use more alternating temp areas.  The std'es and ldx'es
+C      aren't sufficiently apart-scheduled with just two temp areas.
+C   4. Specialize for particular v values.  If its upper 16 bits are zero, we
+C      could save many operations.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+define(`FSIZE',224)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       add     %sp, -FSIZE, %sp
+       sethi   %hi(0xffff), %g1
+       srl     %o3, 16, %g2
+       or      %g1, %lo(0xffff), %g1
+       and     %o3, %g1, %g1
+       stx     %g1, [%sp+104]
+       stx     %g2, [%sp+112]
+       ldd     [%sp+104], %f6
+       ldd     [%sp+112], %f8
+       fxtod   %f6, %f6
+       fxtod   %f8, %f8
+       ld      [%sp+104], %f10         C zero f10
+
+       mov     0, %g3                  C cy = 0
+
+define(`fanop', `fitod %f18, %f0')     C  A quasi nop running in the FA pipe
+
+       add     %sp, 160, %o5           C point in scratch area
+       and     %o5, -32, %o5           C align at 0 (mod 32) in scratch area
+
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_two_or_more
+       fxtod   %f10, %f2
+
+       fmuld   %f2, %f8, %f16
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       std     %f12, [%o5+24]
+       ldx     [%o5+16], %g2           C p16
+       ldx     [%o5+24], %g1           C p0
+       b       .L1
+       add     %o0, -16, %o0
+
+       .align  16
+.L_two_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fmuld   %f2, %f8, %f16
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_three_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       std     %f12, [%o5+8]
+       ldx     [%o5+16], %g2           C p16
+       ldx     [%o5+24], %g1           C p0
+       b       .L2
+       add     %o0, -12, %o0
+
+       .align  16
+.L_three_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_four_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       std     %f12, [%o5+24]
+       b       .L3
+       add     %o0, -8, %o0
+
+       .align  16
+.L_four_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_five_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       b       .L4
+       add     %o0, -4, %o0
+
+       .align  16
+.L_five_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .Loop
+       fxtod   %f10, %f2
+       b,a     .L5
+
+C BEGIN MAIN LOOP
+       .align 16
+C -- 0
+.Loop: nop
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+C -- 1
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       add     %o0, 4, %o0             C rp++
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+C -- 2
+       nop
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       fanop
+C -- 3
+       nop
+       add     %g3, %g4, %g4           C p += cy
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+C -- 4
+       srlx    %g4, 32, %g3            C new cy
+       add     %o1, 4, %o1             C up++
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+C -- 5
+       xor     %o5, 16, %o5            C alternate scratch variables
+       stw     %g4, [%o0-4]
+       bne,pt  %icc, .Loop
+       fxtod   %f10, %f2
+C END MAIN LOOP
+
+.L5:   fdtox   %f16, %f14
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g4, %g3, %g4           C p += cy
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+0]
+       srlx    %g4, 32, %g3            C new cy
+
+.L4:   fdtox   %f16, %f14
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       std     %f14, [%o5+0]
+       std     %f12, [%o5+8]
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+4]
+       srlx    %g4, 32, %g3            C new cy
+
+.L3:   sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+8]
+       srlx    %g4, 32, %g3            C new cy
+
+.L2:   sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       stw     %g4, [%o0+12]
+       srlx    %g4, 32, %g3            C new cy
+
+.L1:   sllx    %g2, 16, %g4            C (p16 << 16)
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       add     %g3, %g4, %g4           C p += cy
+       stw     %g4, [%o0+16]
+       srlx    %g4, 32, %g3            C new cy
+
+       mov     %g3, %o0
+       retl
+       sub     %sp, -FSIZE, %sp
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc32/v9/sqr_diagonal.asm b/mpn/sparc32/v9/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..e4a78c5
--- /dev/null
+++ b/mpn/sparc32/v9/sqr_diagonal.asm
@@ -0,0 +1,451 @@
+dnl  SPARC v9 32-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+
+C This code uses a very deep software pipeline, due to the need for moving data
+C forth and back between the integer registers and floating-point registers.
+C
+C A VIS variant of this code would make the pipeline less deep, since the
+C masking now done in the integer unit could take place in the floating-point
+C unit using the FAND instruction.  It would be possible to save several cycles
+C too.
+C
+C On UltraSPARC 1 and 2, this code runs at 11 cycles/limb from the Dcache and
+C not much slower from the Ecache.  It would perhaps be possible to shave off
+C one cycle, but not easily.  We cannot do better than 10 cycles/limb with the
+C used instructions, since we have 10 memory operations per limb.  But a VIS
+C variant could run three cycles faster than the corresponding non-VIS code.
+
+C This is non-pipelined code showing the algorithm:
+C
+C .Loop:
+C      lduw    [up+0],%g4              C 00000000hhhhllll
+C      sllx    %g4,16,%g3              C 0000hhhhllll0000
+C      or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+C      andn    %g2,%g5,%g2             C 0000hhhh0000llll
+C      stx     %g2,[%fp+80]
+C      ldd     [%fp+80],%f0
+C      fitod   %f0,%f4                 C hi16
+C      fitod   %f1,%f6                 C lo16
+C      ld      [up+0],%f9
+C      fxtod   %f8,%f2
+C      fmuld   %f2,%f4,%f4
+C      fmuld   %f2,%f6,%f6
+C      fdtox   %f4,%f4
+C      fdtox   %f6,%f6
+C      std     %f4,[%fp-24]
+C      std     %f6,[%fp-16]
+C      ldx     [%fp-24],%g2
+C      ldx     [%fp-16],%g1
+C      sllx    %g2,16,%g2
+C      add     %g2,%g1,%g1
+C      stw     %g1,[rp+0]
+C      srlx    %g1,32,%l0
+C      stw     %l0,[rp+4]
+C      add     up,4,up
+C      subcc   n,1,n
+C      bne,pt  %icc,.Loop
+C      add     rp,8,rp
+
+define(`fanop',`fitod %f12,%f10')      dnl  A quasi nop running in the FA pipe
+
+ASM_START()
+
+       TEXT
+       ALIGN(4)
+.Lnoll:
+       .word   0
+
+PROLOGUE(mpn_sqr_diagonal)
+       save    %sp,-256,%sp
+
+ifdef(`PIC',
+`.Lpc: rd      %pc,%o7
+       ld      [%o7+.Lnoll-.Lpc],%f8',
+`      sethi   %hi(.Lnoll),%g1
+       ld      [%g1+%lo(.Lnoll)],%f8')
+
+       sethi   %hi(0xffff0000),%g5
+       add     %i1,-8,%i1
+
+       lduw    [%i1+8],%g4
+       add     %i1,4,%i1               C s1_ptr++
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       bne,pt  %icc,.L_grt_1
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+
+       add     %i1,4,%i1               C s1_ptr++
+       stx     %g2,[%fp+80]
+       ld      [%i1],%f9
+       ldd     [%fp+80],%f0
+       fxtod   %f8,%f2
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       fmuld   %f2,%f6,%f6
+       fdtox   %f4,%f4
+       fdtox   %f6,%f6
+       std     %f4,[%fp-24]
+       std     %f6,[%fp-16]
+
+       add     %fp, 80, %l3
+       add     %fp, -24, %l4
+       add     %fp, 72, %l5
+       b       .L1
+       add     %fp, -40, %l6
+
+.L_grt_1:
+       stx     %g2,[%fp+80]
+       lduw    [%i1+8],%g4
+       add     %i1,4,%i1               C s1_ptr++
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       bne,pt  %icc,.L_grt_2
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+
+       stx     %g2,[%fp+72]
+       ld      [%i1],%f9
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+80],%f0
+       fxtod   %f8,%f2
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       ldd     [%fp+72],%f0
+       fdtox   %f4,%f4
+       fdtox   %f6,%f6
+       std     %f4,[%fp-24]
+       fxtod   %f8,%f2
+       std     %f6,[%fp-16]
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       fmuld   %f2,%f6,%f6
+       fdtox   %f4,%f4
+
+       add     %fp, 72, %l3
+       add     %fp, -40, %l4
+       add     %fp, 80, %l5
+       b       .L2
+       add     %fp, -24, %l6
+
+.L_grt_2:
+       stx     %g2,[%fp+72]
+       lduw    [%i1+8],%g4
+       ld      [%i1],%f9
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+80],%f0
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       fxtod   %f8,%f2
+       bne,pt  %icc,.L_grt_3
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+
+       stx     %g2,[%fp+80]
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+72],%f0
+       fdtox   %f4,%f4
+       fdtox   %f6,%f6
+       std     %f4,[%fp-24]
+       fxtod   %f8,%f2
+       std     %f6,[%fp-16]
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       ld      [%i1],%f9
+       add     %fp, 80, %l3
+       fmuld   %f2,%f6,%f6
+       add     %fp, -24, %l4
+       ldd     [%fp+80],%f0
+       add     %fp, 72, %l5
+       fdtox   %f4,%f4
+       b       .L3
+       add     %fp, -40, %l6
+
+.L_grt_3:
+       stx     %g2,[%fp+80]
+       fitod   %f0,%f4
+       lduw    [%i1+8],%g4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+72],%f0
+       fdtox   %f4,%f4
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       fdtox   %f6,%f6
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       std     %f4,[%fp-24]
+       fxtod   %f8,%f2
+       std     %f6,[%fp-16]
+       bne,pt  %icc,.L_grt_4
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+
+       stx     %g2,[%fp+72]
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       add     %fp, 72, %l3
+       fmuld   %f2,%f4,%f4
+       add     %fp, -40, %l4
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+80],%f0
+       add     %fp, 80, %l5
+       fdtox   %f4,%f4
+       b       .L4
+       add     %fp, -24, %l6
+
+.L_grt_4:
+       stx     %g2,[%fp+72]
+       fitod   %f0,%f4
+       lduw    [%i1+8],%g4
+       fitod   %f1,%f6
+       fmuld   %f2,%f4,%f4
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+80],%f0
+       fdtox   %f4,%f4
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       fdtox   %f6,%f6
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       std     %f4,[%fp-40]
+       fxtod   %f8,%f2
+       std     %f6,[%fp-32]
+       be,pn   %icc,.L5
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+
+       b,a     .Loop
+
+       .align  16
+C --- LOOP BEGIN
+.Loop: nop
+       nop
+       stx     %g2,[%fp+80]
+       fitod   %f0,%f4
+C ---
+       nop
+       nop
+       lduw    [%i1+8],%g4
+       fitod   %f1,%f6
+C ---
+       nop
+       nop
+       ldx     [%fp-24],%g2            C p16
+       fanop
+C ---
+       nop
+       nop
+       ldx     [%fp-16],%g1            C p0
+       fmuld   %f2,%f4,%f4
+C ---
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+C ---
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+72],%f0
+       fanop
+C ---
+       srlx    %g1,32,%l0
+       nop
+       stw     %g1,[%i0-8]
+       fdtox   %f4,%f4
+C ---
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       nop
+       stw     %l0,[%i0-4]
+       fdtox   %f6,%f6
+C ---
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       std     %f4,[%fp-24]
+       fxtod   %f8,%f2
+C ---
+       std     %f6,[%fp-16]
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+       be,pn   %icc,.Lend
+       fanop
+C ---  LOOP MIDDLE
+       nop
+       nop
+       stx     %g2,[%fp+72]
+       fitod   %f0,%f4
+C ---
+       nop
+       nop
+       lduw    [%i1+8],%g4
+       fitod   %f1,%f6
+C ---
+       nop
+       nop
+       ldx     [%fp-40],%g2            C p16
+       fanop
+C ---
+       nop
+       nop
+       ldx     [%fp-32],%g1            C p0
+       fmuld   %f2,%f4,%f4
+C ---
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+C ---
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%fp+80],%f0
+       fanop
+C ---
+       srlx    %g1,32,%l0
+       nop
+       stw     %g1,[%i0-8]
+       fdtox   %f4,%f4
+C ---
+       sllx    %g4,16,%g3              C 0000hhhhllll0000
+       nop
+       stw     %l0,[%i0-4]
+       fdtox   %f6,%f6
+C ---
+       or      %g3,%g4,%g2             C 0000hhhhXXXXllll
+       subcc   %i2,1,%i2
+       std     %f4,[%fp-40]
+       fxtod   %f8,%f2
+C ---
+       std     %f6,[%fp-32]
+       andn    %g2,%g5,%g2             C 0000hhhh0000llll
+       bne,pt  %icc,.Loop
+       fanop
+C --- LOOP END
+
+.L5:   add     %fp, 80, %l3
+       add     %fp, -24, %l4
+       add     %fp, 72, %l5
+       b       .Ltail
+       add     %fp, -40, %l6
+
+.Lend: add     %fp, 72, %l3
+       add     %fp, -40, %l4
+       add     %fp, 80, %l5
+       add     %fp, -24, %l6
+.Ltail:        stx     %g2,[%l3]
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       ldx     [%l4],%g2               C p16
+       ldx     [%l4+8],%g1             C p0
+       fmuld   %f2,%f4,%f4
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       add     %i1,4,%i1               C s1_ptr++
+       ldd     [%l5],%f0
+       srlx    %g1,32,%l0
+       stw     %g1,[%i0-8]
+       fdtox   %f4,%f4
+       stw     %l0,[%i0-4]
+.L4:   fdtox   %f6,%f6
+       std     %f4,[%l4]
+       fxtod   %f8,%f2
+       std     %f6,[%l4+8]
+
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       ldx     [%l6],%g2               C p16
+       ldx     [%l6+8],%g1             C p0
+       fmuld   %f2,%f4,%f4
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       ld      [%i1],%f9
+       fmuld   %f2,%f6,%f6
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       ldd     [%l3],%f0
+       srlx    %g1,32,%l0
+       stw     %g1,[%i0-8]
+       fdtox   %f4,%f4
+       stw     %l0,[%i0-4]
+.L3:   fdtox   %f6,%f6
+       std     %f4,[%l6]
+       fxtod   %f8,%f2
+       std     %f6,[%l6+8]
+
+       fitod   %f0,%f4
+       fitod   %f1,%f6
+       ldx     [%l4],%g2               C p16
+       ldx     [%l4+8],%g1             C p0
+       fmuld   %f2,%f4,%f4
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       fmuld   %f2,%f6,%f6
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       srlx    %g1,32,%l0
+       stw     %g1,[%i0-8]
+       fdtox   %f4,%f4
+       stw     %l0,[%i0-4]
+.L2:   fdtox   %f6,%f6
+       std     %f4,[%l4]
+       std     %f6,[%l4+8]
+
+       ldx     [%l6],%g2               C p16
+       ldx     [%l6+8],%g1             C p0
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       srlx    %g1,32,%l0
+       stw     %g1,[%i0-8]
+       stw     %l0,[%i0-4]
+
+.L1:   ldx     [%l4],%g2               C p16
+       ldx     [%l4+8],%g1             C p0
+       sllx    %g2,16,%g2              C align p16
+       add     %i0,8,%i0               C res_ptr++
+       add     %g2,%g1,%g1             C add p16 to p0 (ADD1)
+       srlx    %g1,32,%l0
+       stw     %g1,[%i0-8]
+       stw     %l0,[%i0-4]
+
+       ret
+       restore %g0,%g0,%o0
+
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/sparc32/v9/sub_n.asm b/mpn/sparc32/v9/sub_n.asm

new file mode 100644 (file)

index 0000000..cea4743
--- /dev/null
+++ b/mpn/sparc32/v9/sub_n.asm
@@ -0,0 +1,118 @@
+dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(rp,%o0)
+define(s1p,%o1)
+define(s2p,%o2)
+define(n,%o3)
+define(cy,%g1)
+
+C This code uses 64-bit operations on `o' and `g' registers.  It doesn't
+C require that `o' registers' upper 32 bits are preserved by the operating
+C system, but if they are not, they must be zeroed.  That is indeed what
+C happens at least on Slowaris 2.5 and 2.6.
+
+C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
+C about 10 cycles/limb from the Ecache.
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       lduw    [s1p+0],%o4
+       lduw    [s2p+0],%o5
+       addcc   n,-2,n
+       bl,pn   %icc,L(end1)
+       lduw    [s1p+4],%g2
+       lduw    [s2p+4],%g3
+       be,pn   %icc,L(end2)
+       mov     0,cy
+
+       .align  16
+L(loop):
+       sub     %o4,%o5,%g4
+       add     rp,8,rp
+       lduw    [s1p+8],%o4
+       fitod   %f0,%f2
+C ---
+       sub     %g4,cy,%g4
+       addcc   n,-1,n
+       lduw    [s2p+8],%o5
+       fitod   %f0,%f2
+C ---
+       srlx    %g4,63,cy
+       add     s2p,8,s2p
+       stw     %g4,[rp-8]
+       be,pn   %icc,L(exito)+4
+C ---
+       sub     %g2,%g3,%g4
+       addcc   n,-1,n
+       lduw    [s1p+12],%g2
+       fitod   %f0,%f2
+C ---
+       sub     %g4,cy,%g4
+       add     s1p,8,s1p
+       lduw    [s2p+4],%g3
+       fitod   %f0,%f2
+C ---
+       srlx    %g4,63,cy
+       bne,pt  %icc,L(loop)
+       stw     %g4,[rp-4]
+C ---
+L(exite):
+       sub     %o4,%o5,%g4
+       sub     %g4,cy,%g4
+       srlx    %g4,63,cy
+       stw     %g4,[rp+0]
+       sub     %g2,%g3,%g4
+       sub     %g4,cy,%g4
+       stw     %g4,[rp+4]
+       retl
+       srlx    %g4,63,%o0
+
+L(exito):
+       sub     %g2,%g3,%g4
+       sub     %g4,cy,%g4
+       srlx    %g4,63,cy
+       stw     %g4,[rp-4]
+       sub     %o4,%o5,%g4
+       sub     %g4,cy,%g4
+       stw     %g4,[rp+0]
+       retl
+       srlx    %g4,63,%o0
+
+L(end1):
+       sub     %o4,%o5,%g4
+       stw     %g4,[rp+0]
+       retl
+       srlx    %g4,63,%o0
+
+L(end2):
+       sub     %o4,%o5,%g4
+       srlx    %g4,63,cy
+       stw     %g4,[rp+0]
+       sub     %g2,%g3,%g4
+       sub     %g4,cy,%g4
+       stw     %g4,[rp+4]
+       retl
+       srlx    %g4,63,%o0
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc32/v9/submul_1.asm b/mpn/sparc32/v9/submul_1.asm

new file mode 100644 (file)

index 0000000..e5823b1
--- /dev/null
+++ b/mpn/sparc32/v9/submul_1.asm
@@ -0,0 +1,305 @@
+dnl  SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Algorithm: We use two floating-point multiplies per limb product, with the
+C invariant v operand split into two 16-bit pieces, and the u operand split
+C into 32-bit pieces.  We convert the two 48-bit products and transfer them to
+C the integer unit.
+
+C                 cycles/limb
+C UltraSPARC 1&2:     6.5
+C UltraSPARC 3:              ?
+
+C Possible optimizations:
+C   1. Combine 32-bit memory operations into 64-bit operations.  Since we're
+C      memory bandwidth limited, this could save 1.5 cycles/limb.
+C   2. Unroll the inner loop.  Since we already use alternate temporary areas,
+C      it is very straightforward to unroll, using an exit branch midways.
+C      Unrolling would allow deeper scheduling which could improve speed for L2
+C      cache case.
+C   3. For mpn_mul_1: Use more alternating temp areas.  The std'es and ldx'es
+C      aren't sufficiently apart-scheduled with just two temp areas.
+C   4. Specialize for particular v values.  If its upper 16 bits are zero, we
+C      could save many operations.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+define(`FSIZE',224)
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       add     %sp, -FSIZE, %sp
+       sethi   %hi(0xffff), %g1
+       srl     %o3, 16, %g2
+       or      %g1, %lo(0xffff), %g1
+       and     %o3, %g1, %g1
+       stx     %g1, [%sp+104]
+       stx     %g2, [%sp+112]
+       ldd     [%sp+104], %f6
+       ldd     [%sp+112], %f8
+       fxtod   %f6, %f6
+       fxtod   %f8, %f8
+       ld      [%sp+104], %f10         C zero f10
+
+       mov     0, %g3                  C cy = 0
+
+define(`fanop', `fitod %f18, %f0')     C  A quasi nop running in the FA pipe
+
+       add     %sp, 160, %o5           C point in scratch area
+       and     %o5, -32, %o5           C align at 0 (mod 32) in scratch area
+
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_two_or_more
+       fxtod   %f10, %f2
+
+       fmuld   %f2, %f8, %f16
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       std     %f12, [%o5+24]
+       ldx     [%o5+16], %g2           C p16
+       ldx     [%o5+24], %g1           C p0
+       lduw    [%o0], %g5              C read rp[i]
+       b       .L1
+       add     %o0, -16, %o0
+
+       .align  16
+.L_two_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fmuld   %f2, %f8, %f16
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_three_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       std     %f12, [%o5+8]
+       lduw    [%o0], %g5              C read rp[i]
+       ldx     [%o5+16], %g2           C p16
+       ldx     [%o5+24], %g1           C p0
+       b       .L2
+       add     %o0, -12, %o0
+
+       .align  16
+.L_three_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_four_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       std     %f12, [%o5+24]
+       lduw    [%o0], %g5              C read rp[i]
+       b       .L3
+       add     %o0, -8, %o0
+
+       .align  16
+.L_four_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       fdtox   %f4, %f12
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       bne,pt  %icc, .L_five_or_more
+       fxtod   %f10, %f2
+
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       lduw    [%o0], %g5              C read rp[i]
+       b       .L4
+       add     %o0, -4, %o0
+
+       .align  16
+.L_five_or_more:
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+       ldx     [%o5+16], %g2           C p16
+       fdtox   %f4, %f12
+       ldx     [%o5+24], %g1           C p0
+       std     %f14, [%o5+16]
+       fmuld   %f2, %f8, %f16
+       std     %f12, [%o5+24]
+       fmuld   %f2, %f6, %f4
+       add     %o1, 4, %o1             C up++
+       lduw    [%o0], %g5              C read rp[i]
+       bne,pt  %icc, .Loop
+       fxtod   %f10, %f2
+       b,a     .L5
+
+C BEGIN MAIN LOOP
+       .align 16
+C -- 0
+.Loop: sub     %g0, %g3, %g3
+       subcc   %o2, 1, %o2
+       ld      [%o1], %f11             C read up[i]
+       fdtox   %f16, %f14
+C -- 1
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       add     %o0, 4, %o0             C rp++
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+C -- 2
+       srl     %g3, 0, %g3             C zero most significant 32 bits
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       fanop
+C -- 3
+       nop
+       add     %g3, %g4, %g4           C p += cy
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+C -- 4
+       nop
+       sub     %g5, %g4, %g4           C p += rp[i]
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+C -- 5
+       xor     %o5, 16, %o5            C alternate scratch variables
+       add     %o1, 4, %o1             C up++
+       stw     %g4, [%o0-4]
+       fanop
+C -- 6
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0], %g5              C read rp[i]
+       bne,pt  %icc, .Loop
+       fxtod   %f10, %f2
+C END MAIN LOOP
+
+.L5:   sub     %g0, %g3, %g3
+       fdtox   %f16, %f14
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+       srl     %g3, 0, %g3             C zero most significant 32 bits
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g4, %g3, %g4           C p += cy
+       std     %f14, [%o5+0]
+       fmuld   %f2, %f8, %f16
+       sub     %g5, %g4, %g4           C p += rp[i]
+       std     %f12, [%o5+8]
+       fmuld   %f2, %f6, %f4
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+0]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+4], %g5            C read rp[i]
+
+       sub     %g0, %g3, %g3
+.L4:   fdtox   %f16, %f14
+       sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       fdtox   %f4, %f12
+       srl     %g3, 0, %g3             C zero most significant 32 bits
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       std     %f14, [%o5+0]
+       sub     %g5, %g4, %g4           C p += rp[i]
+       std     %f12, [%o5+8]
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+4]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+8], %g5            C read rp[i]
+
+       sub     %g0, %g3, %g3
+.L3:   sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       srl     %g3, 0, %g3             C zero most significant 32 bits
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       sub     %g5, %g4, %g4           C p += rp[i]
+       xor     %o5, 16, %o5
+       stw     %g4, [%o0+8]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+12], %g5           C read rp[i]
+
+       sub     %g0, %g3, %g3
+.L2:   sllx    %g2, 16, %g4            C (p16 << 16)
+       ldx     [%o5+0], %g2            C p16
+       srl     %g3, 0, %g3             C zero most significant 32 bits
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       ldx     [%o5+8], %g1            C p0
+       add     %g3, %g4, %g4           C p += cy
+       sub     %g5, %g4, %g4           C p += rp[i]
+       stw     %g4, [%o0+12]
+       srlx    %g4, 32, %g3            C new cy
+       lduw    [%o0+16], %g5           C read rp[i]
+
+       sub     %g0, %g3, %g3
+.L1:   sllx    %g2, 16, %g4            C (p16 << 16)
+       srl     %g3, 0, %g3             C zero most significant 32 bits
+       add     %g1, %g4, %g4           C p = p0 + (p16 << 16)
+       add     %g3, %g4, %g4           C p += cy
+       sub     %g5, %g4, %g4           C p += rp[i]
+       stw     %g4, [%o0+16]
+       srlx    %g4, 32, %g3            C new cy
+
+       sub     %g0, %g3, %o0
+       retl
+       sub     %sp, -FSIZE, %sp
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc32/v9/udiv.asm b/mpn/sparc32/v9/udiv.asm

new file mode 100644 (file)

index 0000000..0957b8c
--- /dev/null
+++ b/mpn/sparc32/v9/udiv.asm
@@ -0,0 +1,41 @@
+dnl  SPARC v9 32-bit mpn_udiv_qrnnd - division support for longlong.h.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr      o0
+C n1           o1
+C n0           o2
+C d            o3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+       sllx    %o1, 32, %g1            C shift upper dividend limb
+       srl     %o2, 0, %g2             C zero extend lower dividend limb
+       srl     %o3, 0, %g3             C zero extend divisor
+       or      %g2, %g1, %g1           C assemble 64-bit dividend
+       udivx   %g1, %g3, %g1
+       mulx    %g1, %g3, %g4
+       sub     %g2, %g4, %g2
+       st      %g2, [%o0]              C store remainder
+       retl
+       mov     %g1, %o0                C return quotient
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/sparc64/README b/mpn/sparc64/README

new file mode 100644 (file)

index 0000000..1907299
--- /dev/null
+++ b/mpn/sparc64/README
@@ -0,0 +1,114 @@
+Copyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions for 64-bit V9 SPARC
+
+RELEVANT OPTIMIZATION ISSUES
+
+Notation:
+  IANY = shift/add/sub/logical/sethi
+  IADDLOG = add/sub/logical/sethi
+  MEM = ld*/st*
+  FA = fadd*/fsub*/f*to*/fmov*
+  FM = fmul*
+
+UltraSPARC can issue four instructions per cycle, with these restrictions:
+* Two IANY instructions, but only one of these may be a shift.  If there is a
+  shift and an IANY instruction, the shift must precede the IANY instruction.
+* One FA.
+* One FM.
+* One branch.
+* One MEM.
+* IANY/IADDLOG/MEM must be insn 1, 2, or 3 in an issue bundle.  Taken branches
+  should not be in slot 4, since that makes the delay insn come from separate
+  bundle.
+* If two IANY/IADDLOG instructions are to be executed in the same cycle and one
+  of these is setting the condition codes, that instruction must be the second
+  one.
+
+To summarize, ignoring branches, these are the bundles that can reach the peak
+execution speed:
+
+insn1  iany    iany    mem     iany    iany    mem     iany    iany    mem
+insn2  iaddlog mem     iany    mem     iaddlog iany    mem     iaddlog iany
+insn3  mem     iaddlog iaddlog fa      fa      fa      fm      fm      fm
+insn4  fa/fm   fa/fm   fa/fm   fm      fm      fm      fa      fa      fa
+
+The 64-bit integer multiply instruction mulx takes from 5 cycles to 35 cycles,
+depending on the position of the most significant bit of the first source
+operand.  When used for 32x32->64 multiplication, it needs 20 cycles.
+Furthermore, it stalls the processor while executing.  We stay away from that
+instruction, and instead use floating-point operations.
+
+Floating-point add and multiply units are fully pipelined.  The latency for
+UltraSPARC-1/2 is 3 cycles and for UltraSPARC-3 it is 4 cycles.
+
+Integer conditional move instructions cannot dual-issue with other integer
+instructions.  No conditional move can issue 1-5 cycles after a load.  (This
+might have been fixed for UltraSPARC-3.)
+
+The UltraSPARC-3 pipeline is very simular to he one of UltraSPARC-1/2 , but is
+somewhat slower.  Branches execute slower, and there may be other new stalls.
+But integer multiply doesn't stall the entire CPU and also has a much lower
+latency.  But it's still not pipelined, and thus useless for our needs.
+
+STATUS
+
+* mpn_lshift, mpn_rshift: The current code runs at 2.0 cycles/limb on
+  UltraSPARC-1/2 and 2.65 on UltraSPARC-3.  For UltraSPARC-1/2, the IEU0
+  functional unit is saturated with shifts.
+
+* mpn_add_n, mpn_sub_n: The current code runs at 4 cycles/limb on
+  UltraSPARC-1/2 and 4.5 cycles/limb on UltraSPARC-3.  The 4 instruction
+  recurrency is the speed limiter.
+
+* mpn_addmul_1: The current code runs at 14 cycles/limb asymptotically on
+  UltraSPARC-1/2 and 17.5 cycles/limb on UltraSPARC-3.  On UltraSPARC-1/2, the
+  code sustains 4 instructions/cycle.  It might be possible to invent a better
+  way of summing the intermediate 49-bit operands, but it is unlikely that it
+  will save enough instructions to save an entire cycle.
+
+  The load-use of the u operand is not enough scheduled for good L2 cache
+  performance.  The UltraSPARC-1/2 L1 cache is direct mapped, and since we use
+  temporary stack slots that will conflict with the u and r operands, we miss
+  to L2 very often.  The load-use of the std/ldx pairs via the stack are
+  perhaps over-scheduled.
+
+  It would be possible to save two instructions: (1) The mov could be avoided
+  if the std/ldx were less scheduled.  (2) The ldx of the r operand could be
+  split into two ld instructions, saving the shifts/masks.
+
+  It should be possible to reach 14 cycles/limb for UltraSPARC-3 if the fp
+  operations where rescheduled for this processor's 4-cycle latency.
+
+* mpn_mul_1: The current code is a straightforward edit of the mpn_addmul_1
+  code.  It would be possible to shave one or two cycles from it, with some
+  labour.
+
+* mpn_submul_1: Simpleminded code just calling mpn_mul_1 + mpn_sub_n.  This
+  means that it runs at 18 cycles/limb on UltraSPARC-1/2 and 23 cycles/limb on
+  UltraSPARC-3.  It would be possible to either match the mpn_addmul_1
+  performance, or in the worst case use one more instruction group.
+
+* US1/US2 cache conflict resolving.  The direct mapped L1 date cache of US1/US2
+  is a problem for mul_1, addmul_1 (and a prospective submul_1).  We should
+  allocate a larger cache area, and put the stack temp area in a place that
+  doesn't cause cache conflicts.
diff --git a/mpn/sparc64/add_n.asm b/mpn/sparc64/add_n.asm

new file mode 100644 (file)

index 0000000..c3e5b46
--- /dev/null
+++ b/mpn/sparc64/add_n.asm
@@ -0,0 +1,220 @@
+dnl  SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     4
+C UltraSPARC 3:              4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u+v+carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_n)
+       save    %sp,-160,%sp
+
+       fitod   %f0,%f0         C make sure f0 contains small, quiet number
+       subcc   n,4,%g0
+       bl,pn   %icc,.Loop0
+       mov     0,cy
+
+       ldx     [up+0],u0
+       ldx     [vp+0],v0
+       add     up,32,up
+       ldx     [up-24],u1
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       ldx     [up-16],u2
+       ldx     [vp-16],v2
+       ldx     [up-8],u3
+       ldx     [vp-8],v3
+       subcc   n,8,n
+       add     u0,v0,%g1       C main add
+       add     %g1,cy,%g4      C carry add
+       or      u0,v0,%g2
+       bl,pn   %icc,.Lend4567
+       fanop
+       b,a     .Loop
+
+       .align  16
+C START MAIN LOOP
+.Loop: andn    %g2,%g4,%g2
+       and     u0,v0,%g3
+       ldx     [up+0],u0
+       fanop
+C --
+       or      %g3,%g2,%g2
+       ldx     [vp+0],v0
+       add     up,32,up
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u1,v1,%g1
+       stx     %g4,[rp+0]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u1,v1,%g2
+       fmnop
+       fanop
+C --
+       andn    %g2,%g4,%g2
+       and     u1,v1,%g3
+       ldx     [up-24],u1
+       fanop
+C --
+       or      %g3,%g2,%g2
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u2,v2,%g1
+       stx     %g4,[rp+8]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u2,v2,%g2
+       fmnop
+       fanop
+C --
+       andn    %g2,%g4,%g2
+       and     u2,v2,%g3
+       ldx     [up-16],u2
+       fanop
+C --
+       or      %g3,%g2,%g2
+       ldx     [vp-16],v2
+       add     rp,32,rp
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u3,v3,%g1
+       stx     %g4,[rp-16]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u3,v3,%g2
+       fmnop
+       fanop
+C --
+       andn    %g2,%g4,%g2
+       and     u3,v3,%g3
+       ldx     [up-8],u3
+       fanop
+C --
+       or      %g3,%g2,%g2
+       subcc   n,4,n
+       ldx     [vp-8],v3
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u0,v0,%g1
+       stx     %g4,[rp-8]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u0,v0,%g2
+       bge,pt  %icc,.Loop
+       fanop
+C END MAIN LOOP
+.Lend4567:
+       andn    %g2,%g4,%g2
+       and     u0,v0,%g3
+       or      %g3,%g2,%g2
+       srlx    %g2,63,cy
+       add     u1,v1,%g1
+       stx     %g4,[rp+0]
+       add     %g1,cy,%g4
+       or      u1,v1,%g2
+       andn    %g2,%g4,%g2
+       and     u1,v1,%g3
+       or      %g3,%g2,%g2
+       srlx    %g2,63,cy
+       add     u2,v2,%g1
+       stx     %g4,[rp+8]
+       add     %g1,cy,%g4
+       or      u2,v2,%g2
+       andn    %g2,%g4,%g2
+       and     u2,v2,%g3
+       or      %g3,%g2,%g2
+       add     rp,32,rp
+       srlx    %g2,63,cy
+       add     u3,v3,%g1
+       stx     %g4,[rp-16]
+       add     %g1,cy,%g4
+       or      u3,v3,%g2
+       andn    %g2,%g4,%g2
+       and     u3,v3,%g3
+       or      %g3,%g2,%g2
+       srlx    %g2,63,cy
+       stx     %g4,[rp-8]
+
+       addcc   n,4,n
+       bz,pn   %icc,.Lret
+       fanop
+
+.Loop0:        ldx     [up],u0
+       add     up,8,up
+       ldx     [vp],v0
+       add     vp,8,vp
+       add     rp,8,rp
+       subcc   n,1,n
+       add     u0,v0,%g1
+       or      u0,v0,%g2
+       add     %g1,cy,%g4
+       and     u0,v0,%g3
+       andn    %g2,%g4,%g2
+       stx     %g4,[rp-8]
+       or      %g3,%g2,%g2
+       bnz,pt  %icc,.Loop0
+       srlx    %g2,63,cy
+
+.Lret: mov     cy,%i0
+       ret
+       restore
+EPILOGUE(mpn_add_n)
diff --git a/mpn/sparc64/addmul_1.asm b/mpn/sparc64/addmul_1.asm

new file mode 100644 (file)

index 0000000..bd83c65
--- /dev/null
+++ b/mpn/sparc64/addmul_1.asm
@@ -0,0 +1,596 @@
+dnl  SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     14
+C UltraSPARC 3:              17.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the up operand split
+C into 32-bit pieces.  We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C   0. Rewrite to use algorithm of mpn_addmul_2.
+C   1. Align the stack area where we transfer the four 49-bit product-sums
+C      to a 32-byte boundary.  That would minimize the cache collision.
+C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
+C      be to align the area to map to the area immediately before up?)
+C   2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C      develop mpn_addmul_2.  This would save many integer instructions.
+C   3. Unrolling.  Questionable if it is worth the code expansion, given that
+C      it could only save 1 cycle/limb.
+C   4. Specialize for particular v values.  If its upper 32 bits are zero, we
+C      could save many operations, in the FPU (fmuld), but more so in the IEU
+C      since we'll be summing 48-bit quantities, which might be simpler.
+C   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should
+C      not be greater than needed for L2 cache latency, and also not so great
+C      that i16 needs to be copied.
+C   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU
+C      ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C    8 FM
+C   10 FA
+C   12 MEM
+C   10 ISHIFT + 14 IADDLOG
+C    1 BRANCH
+C   55 insns totally (plus one mov insn that should be optimized out)
+
+C The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain the peak execution rate of 4 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_addmul_1)
+
+C Initialization.  (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
+C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+       save    %sp, -256, %sp
+       mov     -1, %g4
+       srlx    %g4, 48, xffff          C store mask in register `xffff'
+       and     %i3, xffff, %g2
+       stx     %g2, [%sp+2223+0]
+       srlx    %i3, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+8]
+       srlx    %i3, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+16]
+       srlx    %i3, 48, %g3
+       stx     %g3, [%sp+2223+24]
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+
+       sllx    %i2, 3, %i2
+       mov     0, cy                   C clear cy
+       add     %i0, %i2, %i0
+       add     %i1, %i2, %i1
+       neg     %i2
+       add     %i1, 4, %i5
+       add     %i0, -32, %i4
+       add     %i0, -16, %i0
+
+       ldd     [%sp+2223+0], v00
+       ldd     [%sp+2223+8], v16
+       ldd     [%sp+2223+16], v32
+       ldd     [%sp+2223+24], v48
+       ld      [%sp+2223+0],%f2        C zero f2
+       ld      [%sp+2223+0],%f4        C zero f4
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fxtod   v00, v00
+       fxtod   v16, v16
+       fxtod   v32, v32
+       fxtod   v48, v48
+
+C Start real work.  (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fmuld   u00, v00, a00
+       fmuld   u00, v16, a16
+       fmuld   u00, v32, p32
+       fmuld   u32, v00, r32
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .L_two_or_more
+       fmuld   u32, v16, r48
+
+.L_one:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       add     %i2, 8, %i2
+
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       add     i00, %g5, %g5           C i00+ now in g5
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_1
+       add     %i2, 8, %i2
+
+.L_two_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .L_three_or_more
+       fmuld   u32, v16, r48
+
+.L_two:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       add     i00, %g5, %g5           C i00+ now in g5
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_2
+       add     %i2, 8, %i2
+
+.L_three_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .L_four_or_more
+       fmuld   u32, v16, r48
+
+.L_three:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_3
+       add     %i2, 8, %i2
+
+.L_four_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .Loop
+       fmuld   u32, v16, r48
+
+.L_four:
+       b,a     .L_out_4
+
+C BEGIN MAIN LOOP
+       .align  16
+.Loop:
+C 00
+       srlx    %o4, 16, %o5            C (x >> 16)
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+C 01
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+C 02
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+C 03
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+C 04
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+C 05
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+C 06
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+C 07
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+C 08
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+C 09
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+C 10
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+C 11
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+C 12
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+C 13
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .Loop
+       fmuld   u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   a00, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_3:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   r64, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       fdtox   r80, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_2:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_1:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       or      %i3, %o5, %o5
+       stx     %o5, [%i4+%i2]
+
+       sllx    i00, 0, %g2
+       add     %g2, cy, cy
+       sllx    i16, 16, %g3
+       add     %g3, cy, cy
+
+       return  %i7+8
+       mov     cy, %o0
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc64/addmul_2.asm b/mpn/sparc64/addmul_2.asm

new file mode 100644 (file)

index 0000000..65efb51
--- /dev/null
+++ b/mpn/sparc64/addmul_2.asm
@@ -0,0 +1,540 @@
+dnl  SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb
+dnl  number and add the result to a n limb vector.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C UltraSPARC 1&2:      9
+C UltraSPARC 3:       10
+
+C Algorithm: We use 16 floating-point multiplies per limb product, with the
+C 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand
+C split into 32-bit pieces.  We sum four 48-bit partial products using
+C floating-point add, then convert the resulting four 50-bit quantities and
+C transfer them to the integer unit.
+
+C Possible optimizations:
+C   1. Align the stack area where we transfer the four 50-bit product-sums
+C      to a 32-byte boundary.  That would minimize the cache collision.
+C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
+C      be to align the area to map to the area immediately before up?)
+C   2. Perform two of the fp->int conversions with integer instructions.  We
+C      can get almost ten free IEU slots, if we clean up bookkeeping and the
+C      silly carry-limb code.
+C   3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb
+C      code.
+
+C OSP (Overlapping software pipeline) version of mpn_mul_basecase:
+C Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.
+C FI   = 20
+C L    =  9 x un * vn
+C WDFI = 10 x vn / 2
+C WD   = 4
+
+C Instruction classification (as per UltraSPARC functional units).
+C Assuming silly carry code is fixed.  Includes bookkeeping.
+C
+C               mpn_addmul_X     mpn_mul_X
+C                1       2       1       2
+C               ==========      ==========
+C      FM        8      16       8      16
+C      FA       10      18      10      18
+C     MEM       12      12      10      10
+C  ISHIFT        6       6       6       6
+C IADDLOG       11      11      10      10
+C  BRANCH        1       1       1       1
+C
+C TOTAL IEU     17      17      16      16
+C TOTAL         48      64      45      61
+C
+C IEU cycles     8.5     8.5     8       8
+C MEM cycles    12      12      10      10
+C ISSUE cycles  12      16      11.25   15.25
+C FPU cycles    10      18      10      18
+C cycles/loop   12      18      12      18
+C cycles/limb   12       9      12       9
+
+
+C INPUT PARAMETERS
+C rp[n + 1]    i0
+C up[n]                i1
+C n            i2
+C vp[2]                i3
+
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+
+C Combine registers:
+C u00_hi= u32_hi
+C u00_lo= u32_lo
+C a000  = out000
+C a016  = out016
+C Free: f52 f54
+
+
+define(`p000', `%f8')  define(`p016',`%f10')
+define(`p032',`%f12')  define(`p048',`%f14')
+define(`p064',`%f16')  define(`p080',`%f18')
+define(`p096a',`%f20') define(`p112a',`%f22')
+define(`p096b',`%f56') define(`p112b',`%f58')
+
+define(`out000',`%f0') define(`out016',`%f6')
+
+define(`v000',`%f24')  define(`v016',`%f26')
+define(`v032',`%f28')  define(`v048',`%f30')
+define(`v064',`%f44')  define(`v080',`%f46')
+define(`v096',`%f48')  define(`v112',`%f50')
+
+define(`u00',`%f32')   define(`u32', `%f34')
+
+define(`a000',`%f36')  define(`a016',`%f38')
+define(`a032',`%f40')  define(`a048',`%f42')
+define(`a064',`%f60')  define(`a080',`%f62')
+
+define(`u00_hi',`%f2') define(`u32_hi',`%f4')
+define(`u00_lo',`%f3') define(`u32_lo',`%f5')
+
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0')    define(`i16',`%l1')
+define(`r00',`%l2')    define(`r32',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+
+PROLOGUE(mpn_addmul_2)
+
+C Initialization.  (1) Split v operand into eight 16-bit chunks and store them
+C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
+C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+C This code could be better scheduled.
+
+       save    %sp, -256, %sp
+
+ifdef(`HAVE_VIS',
+`      mov     -1, %g4
+       wr      %g0, 0xD2, %asi
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+       ldda    [%i3+6] %asi, v000
+       ldda    [%i3+4] %asi, v016
+       ldda    [%i3+2] %asi, v032
+       ldda    [%i3+0] %asi, v048
+       fxtod   v000, v000
+       ldda    [%i3+14] %asi, v064
+       fxtod   v016, v016
+       ldda    [%i3+12] %asi, v080
+       fxtod   v032, v032
+       ldda    [%i3+10] %asi, v096
+       fxtod   v048, v048
+       ldda    [%i3+8] %asi, v112
+       fxtod   v064, v064
+       fxtod   v080, v080
+       fxtod   v096, v096
+       fxtod   v112, v112
+       fzero   u00_hi
+       fzero   u32_hi
+',
+`      mov     -1, %g4
+       ldx     [%i3+0], %l0            C vp[0]
+       srlx    %g4, 48, xffff          C store mask in register `xffff'
+       ldx     [%i3+8], %l1            C vp[1]
+
+       and     %l0, xffff, %g2
+       stx     %g2, [%sp+2223+0]
+       srlx    %l0, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+8]
+       srlx    %l0, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+16]
+       srlx    %l0, 48, %g3
+       stx     %g3, [%sp+2223+24]
+       and     %l1, xffff, %g2
+       stx     %g2, [%sp+2223+32]
+       srlx    %l1, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+40]
+       srlx    %l1, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+48]
+       srlx    %l1, 48, %g3
+       stx     %g3, [%sp+2223+56]
+
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+
+       ldd     [%sp+2223+0], v000
+       ldd     [%sp+2223+8], v016
+       ldd     [%sp+2223+16], v032
+       ldd     [%sp+2223+24], v048
+       fxtod   v000, v000
+       ldd     [%sp+2223+32], v064
+       fxtod   v016, v016
+       ldd     [%sp+2223+40], v080
+       fxtod   v032, v032
+       ldd     [%sp+2223+48], v096
+       fxtod   v048, v048
+       ldd     [%sp+2223+56], v112
+       fxtod   v064, v064
+       ld      [%sp+2223+0], u00_hi    C zero u00_hi
+       fxtod   v080, v080
+       ld      [%sp+2223+0], u32_hi    C zero u32_hi
+       fxtod   v096, v096
+       fxtod   v112, v112
+')
+C Initialization done.
+       mov     0, %g2
+       mov     0, rlimb
+       mov     0, %g4
+       add     %i0, -8, %i0            C BOOKKEEPING
+
+C Start software pipeline.
+
+       ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
+       fxtod   u00_hi, u00
+C mid
+       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
+       fmuld   u00, v000, a000
+       fmuld   u00, v016, a016
+       fmuld   u00, v032, a032
+       fmuld   u00, v048, a048
+       add     %i2, -1, %i2            C BOOKKEEPING
+       fmuld   u00, v064, p064
+       add     %i1, 8, %i1             C BOOKKEEPING
+       fxtod   u32_hi, u32
+       fmuld   u00, v080, p080
+       fmuld   u00, v096, p096a
+       brnz,pt %i2, .L_2_or_more
+        fmuld  u00, v112, p112a
+
+.L1:   fdtox   a000, out000
+       fmuld   u32, v000, p000
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+       fmovd   p064, a064
+       fmuld   u32, v032, p032
+       fmovd   p080, a080
+       fmuld   u32, v048, p048
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+       std     out016, [%sp+2223+24]
+       fxtod   u00_hi, u00
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C mid
+       fdtox   a000, out000
+       fdtox   a016, out016
+       faddd   p064, p096a, a064
+       faddd   p080, p112a, a080
+       std     out000, [%sp+2223+0]
+       b       .L_wd2
+        std    out016, [%sp+2223+8]
+
+.L_2_or_more:
+       ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
+       fdtox   a000, out000
+       fmuld   u32, v000, p000
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+       fmovd   p064, a064
+       fmuld   u32, v032, p032
+       fmovd   p080, a080
+       fmuld   u32, v048, p048
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+       std     out016, [%sp+2223+24]
+       fxtod   u00_hi, u00
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C mid
+       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
+       fdtox   a000, out000
+       fmuld   u00, v000, p000
+       fdtox   a016, out016
+       fmuld   u00, v016, p016
+       faddd   p064, p096a, a064
+       fmuld   u00, v032, p032
+       faddd   p080, p112a, a080
+       fmuld   u00, v048, p048
+       add     %i2, -1, %i2            C BOOKKEEPING
+       std     out000, [%sp+2223+0]
+       faddd   p000, a032, a000
+       fmuld   u00, v064, p064
+       add     %i1, 8, %i1             C BOOKKEEPING
+       std     out016, [%sp+2223+8]
+       fxtod   u32_hi, u32
+       faddd   p016, a048, a016
+       fmuld   u00, v080, p080
+       faddd   p032, a064, a032
+       fmuld   u00, v096, p096a
+       faddd   p048, a080, a048
+       brnz,pt %i2, .L_3_or_more
+        fmuld  u00, v112, p112a
+
+       b       .Lend
+        nop
+
+C  64      32       0
+C   .       .       .
+C   .       |__rXXX_|  32
+C   .      |___cy___|  34
+C   .  |_______i00__|  50
+C  |_______i16__|   .  50
+
+
+C BEGIN MAIN LOOP
+       .align  16
+.L_3_or_more:
+.Loop: ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
+       and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       fmuld   u32, v000, p000
+C
+       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+C
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       faddd   p064, p096b, a064
+       fmuld   u32, v032, p032
+C
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       faddd   p080, p112b, a080
+       fmuld   u32, v048, p048
+C
+       nop
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+C
+       add     i00, r00, rlimb
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       fxtod   u00_hi, u00
+C
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+C
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+C
+       stw     %l5, [%i0+4]
+       nop
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C midloop
+       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
+       and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       fmuld   u00, v000, p000
+C
+       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       fmuld   u00, v016, p016
+C
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], i00
+       faddd   p064, p096a, a064
+       fmuld   u00, v032, p032
+C
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       faddd   p080, p112a, a080
+       fmuld   u00, v048, p048
+C
+       add     %i2, -1, %i2            C BOOKKEEPING
+       std     out000, [%sp+2223+0]
+       faddd   p000, a032, a000
+       fmuld   u00, v064, p064
+C
+       add     i00, r32, rlimb
+       add     %i1, 8, %i1             C BOOKKEEPING
+       std     out016, [%sp+2223+8]
+       fxtod   u32_hi, u32
+C
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       faddd   p016, a048, a016
+       fmuld   u00, v080, p080
+C
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       faddd   p032, a064, a032
+       fmuld   u00, v096, p096a
+C
+       stw     %l5, [%i0+0]
+       faddd   p048, a080, a048
+       brnz,pt %i2, .Loop
+        fmuld  u00, v112, p112a
+C END MAIN LOOP
+
+C WIND-DOWN PHASE 1
+.Lend: and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       fmuld   u32, v000, p000
+       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       faddd   p064, p096b, a064
+       fmuld   u32, v032, p032
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       faddd   p080, p112b, a080
+       fmuld   u32, v048, p048
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+       add     i00, r00, rlimb
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+       stw     %l5, [%i0+4]
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C mid
+       and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], i00
+       faddd   p064, p096a, a064
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       faddd   p080, p112a, a080
+       std     out000, [%sp+2223+0]
+       add     i00, r32, rlimb
+       std     out016, [%sp+2223+8]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+0]
+
+C WIND-DOWN PHASE 2
+.L_wd2:        and     %g2, xffffffff, %g2
+       fdtox   a032, out000
+       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a048, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       std     out000, [%sp+2223+16]
+       add     i00, r00, rlimb
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+4]
+C mid
+       and     %g2, xffffffff, %g2
+       fdtox   a064, out000
+       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a080, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], i00
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       std     out000, [%sp+2223+0]
+       add     i00, r32, rlimb
+       std     out016, [%sp+2223+8]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+0]
+
+C WIND-DOWN PHASE 3
+.L_wd3:        and     %g2, xffffffff, %g2
+       fdtox   p096b, out000
+       add     %g2, rlimb, %l5
+       fdtox   p112b, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], rlimb
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       std     out000, [%sp+2223+16]
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+4]
+C mid
+       and     %g2, xffffffff, %g2
+       add     %g2, rlimb, %l5
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], rlimb
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+0]
+
+       and     %g2, xffffffff, %g2
+       add     %g2, rlimb, %l5
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+
+       sllx    i16, 16, %g2
+       add     i00, cy, cy
+       return  %i7+8
+       add     %g2, cy, %o0
+EPILOGUE(mpn_addmul_2)
diff --git a/mpn/sparc64/copyd.asm b/mpn/sparc64/copyd.asm

new file mode 100644 (file)

index 0000000..8a73dba
--- /dev/null
+++ b/mpn/sparc64/copyd.asm
@@ -0,0 +1,75 @@
+dnl  SPARC v9 mpn_copyd -- Copy a limb vector, decrementing.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:              2.5
+
+C INPUT PARAMETERS
+C rptr %o0
+C sptr %o1
+C n    %o2
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_copyd)
+       sllx    %o2,3,%g1
+       add     %g1,%o0,%o0
+       add     %g1,%o1,%o1
+       addcc   %o2,-8,%o2
+       bl,pt   %icc,L(end01234567)
+       nop
+L(loop1):
+       ldx     [%o1-8],%g1
+       ldx     [%o1-16],%g2
+       ldx     [%o1-24],%g3
+       ldx     [%o1-32],%g4
+       ldx     [%o1-40],%g5
+       ldx     [%o1-48],%o3
+       ldx     [%o1-56],%o4
+       ldx     [%o1-64],%o5
+       add     %o1,-64,%o1
+       stx     %g1,[%o0-8]
+       stx     %g2,[%o0-16]
+       stx     %g3,[%o0-24]
+       stx     %g4,[%o0-32]
+       stx     %g5,[%o0-40]
+       stx     %o3,[%o0-48]
+       stx     %o4,[%o0-56]
+       stx     %o5,[%o0-64]
+       addcc   %o2,-8,%o2
+       bge,pt  %icc,L(loop1)
+       add     %o0,-64,%o0
+L(end01234567):
+       addcc   %o2,8,%o2
+       bz,pn   %icc,L(end)
+       nop
+L(loop2):
+       ldx     [%o1-8],%g1
+       add     %o1,-8,%o1
+       addcc   %o2,-1,%o2
+       stx     %g1,[%o0-8]
+       bg,pt   %icc,L(loop2)
+       add     %o0,-8,%o0
+L(end):        retl
+       nop
+EPILOGUE(mpn_copyd)
diff --git a/mpn/sparc64/copyi.asm b/mpn/sparc64/copyi.asm

new file mode 100644 (file)

index 0000000..3158357
--- /dev/null
+++ b/mpn/sparc64/copyi.asm
@@ -0,0 +1,72 @@
+dnl  SPARC v9 mpn_copyi -- Copy a limb vector, incrementing.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:              2.5
+
+C INPUT PARAMETERS
+C rptr %o0
+C sptr %o1
+C n    %o2
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_copyi)
+       addcc   %o2,-8,%o2
+       bl,pt   %icc,L(end01234567)
+       nop
+L(loop1):
+       ldx     [%o1+0],%g1
+       ldx     [%o1+8],%g2
+       ldx     [%o1+16],%g3
+       ldx     [%o1+24],%g4
+       ldx     [%o1+32],%g5
+       ldx     [%o1+40],%o3
+       ldx     [%o1+48],%o4
+       ldx     [%o1+56],%o5
+       add     %o1,64,%o1
+       stx     %g1,[%o0+0]
+       stx     %g2,[%o0+8]
+       stx     %g3,[%o0+16]
+       stx     %g4,[%o0+24]
+       stx     %g5,[%o0+32]
+       stx     %o3,[%o0+40]
+       stx     %o4,[%o0+48]
+       stx     %o5,[%o0+56]
+       addcc   %o2,-8,%o2
+       bge,pt  %icc,L(loop1)
+       add     %o0,64,%o0
+L(end01234567):
+       addcc   %o2,8,%o2
+       bz,pn   %icc,L(end)
+       nop
+L(loop2):
+       ldx     [%o1+0],%g1
+       add     %o1,8,%o1
+       addcc   %o2,-1,%o2
+       stx     %g1,[%o0+0]
+       bg,pt   %icc,L(loop2)
+       add     %o0,8,%o0
+L(end):        retl
+       nop
+EPILOGUE(mpn_copyi)
diff --git a/mpn/sparc64/dive_1.c b/mpn/sparc64/dive_1.c

new file mode 100644 (file)

index 0000000..6f3d7c4
--- /dev/null
+++ b/mpn/sparc64/dive_1.c
@@ -0,0 +1,147 @@
+/* UltraSPARC 64 mpn_divexact_1 -- mpn by limb exact division.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/*                 64-bit divisor   32-bit divisor
+                    cycles/limb      cycles/limb
+                     (approx)         (approx)
+   Ultrasparc 2i:      110               70
+*/
+
+
+/* There are two key ideas here to reduce mulx's.  Firstly when the divisor
+   is 32-bits the high of q*d can be calculated without the two 32x32->64
+   cross-products involving the high 32-bits of the divisor, that being zero
+   of course.  Secondly umul_ppmm_lowequal and umul_ppmm_half_lowequal save
+   one mulx (each) knowing the low of q*d is equal to the input limb l.
+
+   For size==1, a simple udivx is used.  This is faster than calculating an
+   inverse.
+
+   For a 32-bit divisor and small sizes, an attempt was made at a simple
+   udivx loop (two per 64-bit limb), but it turned out to be slower than
+   mul-by-inverse.  At size==2 the inverse is about 260 cycles total
+   compared to a udivx at 291.  Perhaps the latter would suit when size==2
+   but the high 32-bits of the second limb is zero (saving one udivx), but
+   it doesn't seem worth a special case just for that.  */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+  mp_limb_t  inverse, s, s_next, c, l, ls, q;
+  unsigned   rshift, lshift;
+  mp_limb_t  lshift_mask;
+  mp_limb_t  divisor_h;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (divisor);
+
+  s = *src++;                 /* src low limb */
+  size--;
+  if (size == 0)
+    {
+      *dst = s / divisor;
+      return;
+    }
+
+  if ((divisor & 1) == 0)
+    {
+      count_trailing_zeros (rshift, divisor);
+      divisor >>= rshift;
+    }
+  else
+    rshift = 0;
+
+  binvert_limb (inverse, divisor);
+
+  lshift = 64 - rshift;
+
+  /* lshift==64 means no shift, so must mask out other part in this case */
+  lshift_mask = (rshift == 0 ? 0 : MP_LIMB_T_MAX);
+
+  c = 0;
+  divisor_h = HIGH32 (divisor);
+
+  if (divisor_h == 0)
+    {
+      /* 32-bit divisor */
+      do
+        {
+          s_next = *src++;
+          ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);
+          s = s_next;
+
+          SUBC_LIMB (c, l, ls, c);
+
+          q = l * inverse;
+          *dst++ = q;
+
+          umul_ppmm_half_lowequal (l, q, divisor, l);
+          c += l;
+
+          size--;
+        }
+      while (size != 0);
+
+      ls = s >> rshift;
+      l = ls - c;
+      q = l * inverse;
+      *dst = q;
+    }
+  else
+    {
+      /* 64-bit divisor */
+      mp_limb_t  divisor_l = LOW32 (divisor);
+      do
+        {
+          s_next = *src++;
+          ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);
+          s = s_next;
+
+          SUBC_LIMB (c, l, ls, c);
+
+          q = l * inverse;
+          *dst++ = q;
+
+          umul_ppmm_lowequal (l, q, divisor, divisor_h, divisor_l, l);
+          c += l;
+
+          size--;
+        }
+      while (size != 0);
+
+      ls = s >> rshift;
+      l = ls - c;
+      q = l * inverse;
+      *dst = q;
+    }
+}
diff --git a/mpn/sparc64/divrem_1.c b/mpn/sparc64/divrem_1.c

new file mode 100644 (file)

index 0000000..06de9a6
--- /dev/null
+++ b/mpn/sparc64/divrem_1.c
@@ -0,0 +1,232 @@
+/* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/*                   64-bit divisor       32-bit divisor
+                       cycles/limb          cycles/limb
+                        (approx)             (approx)
+                   integer  fraction    integer  fraction
+   Ultrasparc 2i:    160      160          122      96
+*/
+
+
+/* 32-bit divisors are treated in special case code.  This requires 4 mulx
+   per limb instead of 8 in the general case.
+
+   For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
+   addressing, to get the two halves of each limb read in the correct order.
+   This is kept in an adj variable.  Doing that measures about 4 c/l faster
+   than just writing HALF_ENDIAN_ADJ(i) in the integer loop.  The latter
+   shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
+   (on gcc 3.2.1 at least).  The fraction loop doesn't seem affected, but we
+   still use a variable since that ought to work out best.  */
+
+mp_limb_t
+mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
+              mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+{
+  mp_size_t  total_size_limbs;
+  mp_size_t  i;
+
+  ASSERT (xsize_limbs >= 0);
+  ASSERT (size_limbs >= 0);
+  ASSERT (d_limb != 0);
+  /* FIXME: What's the correct overlap rule when xsize!=0? */
+  ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
+                                  ap_limbptr, size_limbs));
+
+  total_size_limbs = size_limbs + xsize_limbs;
+  if (UNLIKELY (total_size_limbs == 0))
+    return 0;
+
+  /* udivx is good for total_size==1, and no need to bother checking
+     limb<divisor, since if that's likely the caller should check */
+  if (UNLIKELY (total_size_limbs == 1))
+    {
+      mp_limb_t  a, q;
+      a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
+      q = a / d_limb;
+      qp_limbptr[0] = q;
+      return a - q*d_limb;
+    }
+
+  if (d_limb <= CNST_LIMB(0xFFFFFFFF))
+    {
+      mp_size_t  size, xsize, total_size, adj;
+      unsigned   *qp, n1, n0, q, r, nshift, norm_rmask;
+      mp_limb_t  dinv_limb;
+      const unsigned *ap;
+      int        norm, norm_rshift;
+
+      size = 2 * size_limbs;
+      xsize = 2 * xsize_limbs;
+      total_size = size + xsize;
+
+      ap = (unsigned *) ap_limbptr;
+      qp = (unsigned *) qp_limbptr;
+
+      qp += xsize;
+      r = 0;        /* initial remainder */
+
+      if (LIKELY (size != 0))
+        {
+          n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
+
+          /* If the length of the source is uniformly distributed, then
+             there's a 50% chance of the high 32-bits being zero, which we
+             can skip.  */
+          if (n1 == 0)
+            {
+              n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
+              total_size--;
+              size--;
+              ASSERT (size > 0);  /* because always even */
+              qp[size + HALF_ENDIAN_ADJ(1)] = 0;
+            }
+
+          /* Skip a division if high < divisor (high quotient 0).  Testing
+             here before before normalizing will still skip as often as
+             possible.  */
+          if (n1 < d_limb)
+            {
+              r = n1;
+              size--;
+              qp[size + HALF_ENDIAN_ADJ(size)] = 0;
+              total_size--;
+              if (total_size == 0)
+                return r;
+            }
+        }
+
+      count_leading_zeros_32 (norm, d_limb);
+      norm -= 32;
+      d_limb <<= norm;
+      r <<= norm;
+
+      norm_rshift = 32 - norm;
+      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
+
+      invert_half_limb (dinv_limb, d_limb);
+
+      if (LIKELY (size != 0))
+        {
+          i = size - 1;
+          adj = HALF_ENDIAN_ADJ (i);
+          n1 = ap[i + adj];
+          adj = -adj;
+          r |= ((n1 >> norm_rshift) & norm_rmask);
+          for ( ; i > 0; i--)
+            {
+              n0 = ap[i-1 + adj];
+              adj = -adj;
+              nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+              udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
+              qp[i + adj] = q;
+              n1 = n0;
+            }
+          nshift = n1 << norm;
+          udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
+          qp[0 + HALF_ENDIAN_ADJ(0)] = q;
+        }
+      qp -= xsize;
+      adj = HALF_ENDIAN_ADJ (0);
+      for (i = xsize-1; i >= 0; i--)
+        {
+          udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
+          adj = -adj;
+          qp[i + adj] = q;
+        }
+
+      return r >> norm;
+    }
+  else
+    {
+      mp_srcptr  ap;
+      mp_ptr     qp;
+      mp_size_t  size, xsize, total_size;
+      mp_limb_t  d, n1, n0, q, r, dinv, nshift, norm_rmask;
+      int        norm, norm_rshift;
+
+      ap = ap_limbptr;
+      qp = qp_limbptr;
+      size = size_limbs;
+      xsize = xsize_limbs;
+      total_size = total_size_limbs;
+      d = d_limb;
+
+      qp += total_size;   /* above high limb */
+      r = 0;              /* initial remainder */
+
+      if (LIKELY (size != 0))
+        {
+          /* Skip a division if high < divisor (high quotient 0).  Testing
+             here before before normalizing will still skip as often as
+             possible.  */
+          n1 = ap[size-1];
+          if (n1 < d)
+            {
+              r = n1;
+              *--qp = 0;
+              total_size--;
+              if (total_size == 0)
+                return r;
+              size--;
+            }
+        }
+
+      count_leading_zeros (norm, d);
+      d <<= norm;
+      r <<= norm;
+
+      norm_rshift = GMP_LIMB_BITS - norm;
+      norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
+
+      invert_limb (dinv, d);
+
+      if (LIKELY (size != 0))
+        {
+          n1 = ap[size-1];
+          r |= ((n1 >> norm_rshift) & norm_rmask);
+          for (i = size-2; i >= 0; i--)
+            {
+              n0 = ap[i];
+              nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+              udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
+              *--qp = q;
+              n1 = n0;
+            }
+          nshift = n1 << norm;
+          udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
+          *--qp = q;
+        }
+      for (i = 0; i < xsize; i++)
+        {
+          udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
+          *--qp = q;
+        }
+      return r >> norm;
+    }
+}
diff --git a/mpn/sparc64/gmp-mparam.h b/mpn/sparc64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..91bed9e
--- /dev/null
+++ b/mpn/sparc64/gmp-mparam.h
@@ -0,0 +1,129 @@
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2006, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 500 MHz ultrasparc2 running GNU/Linux */
+
+#define DIVREM_1_NORM_THRESHOLD              3
+#define DIVREM_1_UNNORM_THRESHOLD            4
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   7
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                30
+#define MUL_TOOM33_THRESHOLD               187
+#define MUL_TOOM44_THRESHOLD               278
+#define MUL_TOOM6H_THRESHOLD               278
+#define MUL_TOOM8H_THRESHOLD               357
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     201
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     199
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     154
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     107
+
+#define SQR_BASECASE_THRESHOLD              13
+#define SQR_TOOM2_THRESHOLD                 69
+#define SQR_TOOM3_THRESHOLD                116
+#define SQR_TOOM4_THRESHOLD                336
+#define SQR_TOOM6_THRESHOLD                336
+#define SQR_TOOM8_THRESHOLD                454
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               23
+
+#define MUL_FFT_MODF_THRESHOLD             248  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    248, 5}, {      9, 4}, {     19, 6}, {      5, 5}, \
+    {     15, 6}, {      8, 5}, {     17, 6}, {     21, 7}, \
+    {     19, 8}, {     11, 7}, {     25, 8}, {     15, 7}, \
+    {     31, 8}, {     27, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79,11}, {     47,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 50
+#define MUL_FFT_THRESHOLD                 1984
+
+#define SQR_FFT_MODF_THRESHOLD             236  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    236, 5}, {      8, 4}, {     17, 5}, {     19, 6}, \
+    {     10, 5}, {     21, 6}, {     19, 7}, {     10, 6}, \
+    {     21, 7}, {     21, 8}, {     21, 9}, {     11, 8}, \
+    {     23, 9}, {     19, 8}, {     43, 9}, {     23,10}, \
+    {     15, 9}, {     43,10}, {     23,11}, {     15,10}, \
+    {     31, 9}, {     63,10}, {     47, 8}, {    191,11}, \
+    {     31,10}, {     63, 8}, {    255, 7}, {    511, 9}, \
+    {    135, 8}, {    271,10}, {     71, 9}, {    143, 8}, \
+    {    287, 7}, {    575,11}, {     47, 9}, {    191, 8}, \
+    {    383,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 49
+#define SQR_FFT_THRESHOLD                 1120
+
+#define MULLO_BASECASE_THRESHOLD            16
+#define MULLO_DC_THRESHOLD                  41
+#define MULLO_MUL_N_THRESHOLD             3791
+
+#define DC_DIV_QR_THRESHOLD                 27
+#define DC_DIVAPPR_Q_THRESHOLD             100
+#define DC_BDIV_QR_THRESHOLD                47
+#define DC_BDIV_Q_THRESHOLD                174
+
+#define INV_MULMOD_BNM1_THRESHOLD           58
+#define INV_NEWTON_THRESHOLD                13
+#define INV_APPR_THRESHOLD                   9
+
+#define BINV_NEWTON_THRESHOLD              187
+#define REDC_1_TO_REDC_2_THRESHOLD          10
+#define REDC_2_TO_REDC_N_THRESHOLD         115
+
+#define MU_DIV_QR_THRESHOLD                680
+#define MU_DIVAPPR_Q_THRESHOLD             618
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD               748
+#define MU_BDIV_Q_THRESHOLD                889
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                      53
+#define GCD_DC_THRESHOLD                   283
+#define GCDEXT_DC_THRESHOLD                186
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        16
+#define SET_STR_DC_THRESHOLD               390
+#define SET_STR_PRECOMPUTE_THRESHOLD      1665
diff --git a/mpn/sparc64/lshift.asm b/mpn/sparc64/lshift.asm

new file mode 100644 (file)

index 0000000..b3bbd9d
--- /dev/null
+++ b/mpn/sparc64/lshift.asm
@@ -0,0 +1,152 @@
+dnl  SPARC v9 mpn_lshift
+
+dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:              3.25
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`n',`%i2')
+define(`cnt',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+       save    %sp,-160,%sp
+
+       sllx    n,3,%g1
+       sub     %g0,cnt,tnc             C negate shift count
+       add     up,%g1,up               C make %o1 point at end of src
+       add     rp,%g1,rp               C make %o0 point at end of res
+       ldx     [up-8],u3               C load first limb
+       subcc   n,5,n
+       srlx    u3,tnc,%i5              C compute function result
+       sllx    u3,cnt,%g3
+       bl,pn   %icc,.Lend1234
+       fanop
+
+       subcc   n,4,n
+       ldx     [up-16],u0
+       ldx     [up-24],u1
+       add     up,-32,up
+       ldx     [up-0],u2
+       ldx     [up-8],u3
+       srlx    u0,tnc,%g2
+
+       bl,pn   %icc,.Lend5678
+       fanop
+
+       b,a     .Loop
+       .align  16
+.Loop:
+       sllx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up-16],u0
+       fanop
+C --
+       srlx    u1,tnc,%g2
+       subcc   n,4,n
+       stx     %g3,[rp-8]
+       fanop
+C --
+       sllx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up-24],u1
+       fanop
+C --
+       srlx    u2,tnc,%g2
+       stx     %g1,[rp-16]
+       add     up,-32,up
+       fanop
+C --
+       sllx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up-0],u2
+       fanop
+C --
+       srlx    u3,tnc,%g2
+       stx     %g3,[rp-24]
+       add     rp,-32,rp
+       fanop
+C --
+       sllx    u3,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up-8],u3
+       fanop
+C --
+       srlx    u0,tnc,%g2
+       stx     %g1,[rp-0]
+       bge,pt  %icc,.Loop
+       fanop
+C --
+.Lend5678:
+       sllx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       srlx    u1,tnc,%g2
+       stx     %g3,[rp-8]
+       sllx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       srlx    u2,tnc,%g2
+       stx     %g1,[rp-16]
+       sllx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       srlx    u3,tnc,%g2
+       stx     %g3,[rp-24]
+       add     rp,-32,rp
+       sllx    u3,cnt,%g3              C carry...
+       or      %g1,%g2,%g1
+       stx     %g1,[rp-0]
+
+.Lend1234:
+       addcc   n,4,n
+       bz,pn   %icc,.Lret
+       fanop
+.Loop0:
+       add     rp,-8,rp
+       subcc   n,1,n
+       ldx     [up-16],u3
+       add     up,-8,up
+       srlx    u3,tnc,%g2
+       or      %g3,%g2,%g3
+       stx     %g3,[rp]
+       sllx    u3,cnt,%g3
+       bnz,pt  %icc,.Loop0
+       fanop
+.Lret:
+       stx     %g3,[rp-8]
+       mov     %i5,%i0
+       ret
+       restore
+EPILOGUE(mpn_lshift)
diff --git a/mpn/sparc64/mod_1.c b/mpn/sparc64/mod_1.c

new file mode 100644 (file)

index 0000000..757ae01
--- /dev/null
+++ b/mpn/sparc64/mod_1.c
@@ -0,0 +1,177 @@
+/* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/*                 64-bit divisor   32-bit divisor
+                    cycles/limb      cycles/limb
+                     (approx)         (approx)
+   Ultrasparc 2i:      160               120
+*/
+
+
+/* 32-bit divisors are treated in special case code.  This requires 4 mulx
+   per limb instead of 8 in the general case.
+
+   For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
+   addressing, to get the two halves of each limb read in the correct order.
+   This is kept in an adj variable.  Doing that measures about 6 c/l faster
+   than just writing HALF_ENDIAN_ADJ(i) in the loop.  The latter shouldn't
+   be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc
+   3.2.1 at least).
+
+   A simple udivx/umulx loop for the 32-bit case was attempted for small
+   sizes, but at size==2 it was only about the same speed and at size==3 was
+   slower.  */
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+{
+  int        norm, norm_rshift;
+  mp_limb_t  src_high_limb;
+  mp_size_t  i;
+
+  ASSERT (size_limbs >= 0);
+  ASSERT (d_limb != 0);
+
+  if (UNLIKELY (size_limbs == 0))
+    return 0;
+
+  src_high_limb = src_limbptr[size_limbs-1];
+
+  /* udivx is good for size==1, and no need to bother checking limb<divisor,
+     since if that's likely the caller should check */
+  if (UNLIKELY (size_limbs == 1))
+    return src_high_limb % d_limb;
+
+  if (d_limb <= CNST_LIMB(0xFFFFFFFF))
+    {
+      unsigned   *src, n1, n0, r, dummy_q, nshift, norm_rmask;
+      mp_size_t  size, adj;
+      mp_limb_t  dinv_limb;
+
+      size = 2 * size_limbs;    /* halfwords */
+      src = (unsigned *) src_limbptr;
+
+      /* prospective initial remainder, if < d */
+      r = src_high_limb >> 32;
+
+      /* If the length of the source is uniformly distributed, then there's
+         a 50% chance of the high 32-bits being zero, which we can skip.  */
+      if (r == 0)
+        {
+          r = (unsigned) src_high_limb;
+          size--;
+          ASSERT (size > 0);  /* because always even */
+        }
+
+      /* Skip a division if high < divisor.  Having the test here before
+         normalizing will still skip as often as possible.  */
+      if (r < d_limb)
+        {
+          size--;
+          ASSERT (size > 0);  /* because size==1 handled above */
+        }
+      else
+        r = 0;
+
+      count_leading_zeros_32 (norm, d_limb);
+      norm -= 32;
+      d_limb <<= norm;
+
+      norm_rshift = 32 - norm;
+      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
+      i = size-1;
+      adj = HALF_ENDIAN_ADJ (i);
+      n1 = src [i + adj];
+      r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
+
+      invert_half_limb (dinv_limb, d_limb);
+      adj = -adj;
+
+      for (i--; i >= 0; i--)
+        {
+          n0 = src [i + adj];
+          adj = -adj;
+          nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+          udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
+          n1 = n0;
+        }
+
+      /* same as loop, but without n0 */
+      nshift = n1 << norm;
+      udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
+
+      ASSERT ((r & ((1 << norm) - 1)) == 0);
+      return r >> norm;
+    }
+  else
+    {
+      mp_srcptr  src;
+      mp_size_t  size;
+      mp_limb_t  n1, n0, r, dinv, dummy_q, nshift, norm_rmask;
+
+      src = src_limbptr;
+      size = size_limbs;
+      r = src_high_limb;  /* initial remainder */
+
+      /* Skip a division if high < divisor.  Having the test here before
+         normalizing will still skip as often as possible.  */
+      if (r < d_limb)
+        {
+          size--;
+          ASSERT (size > 0);  /* because size==1 handled above */
+        }
+      else
+        r = 0;
+
+      count_leading_zeros (norm, d_limb);
+      d_limb <<= norm;
+
+      norm_rshift = GMP_LIMB_BITS - norm;
+      norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
+
+      src += size;
+      n1 = *--src;
+      r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
+
+      invert_limb (dinv, d_limb);
+
+      for (i = size-2; i >= 0; i--)
+        {
+          n0 = *--src;
+          nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+          udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
+          n1 = n0;
+        }
+
+      /* same as loop, but without n0 */
+      nshift = n1 << norm;
+      udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
+
+      ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0);
+      return r >> norm;
+    }
+}
diff --git a/mpn/sparc64/mode1o.c b/mpn/sparc64/mode1o.c

new file mode 100644 (file)

index 0000000..5ec97c5
--- /dev/null
+++ b/mpn/sparc64/mode1o.c
@@ -0,0 +1,186 @@
+/* UltraSPARC 64 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/*                 64-bit divisor   32-bit divisor
+                    cycles/limb      cycles/limb
+                     (approx)         (approx)
+   Ultrasparc 2i:       ?                ?
+*/
+
+
+/* This implementation reduces the number of multiplies done, knowing that
+   on ultrasparc 1 and 2 the mulx instruction stalls the whole chip.
+
+   The key idea is to use the fact that the low limb of q*d equals l, this
+   being the whole purpose of the q calculated.  It means there's no need to
+   calculate the lowest 32x32->64 part of the q*d, instead it can be
+   inferred from l and the other three 32x32->64 parts.  See sparc64.h for
+   details.
+
+   When d is 32-bits, the same applies, but in this case there's only one
+   other 32x32->64 part (ie. HIGH(q)*d).
+
+   The net effect is that for 64-bit divisor each limb is 4 mulx, or for
+   32-bit divisor each is 2 mulx.
+
+   Enhancements:
+
+   No doubt this could be done in assembler, if that helped the scheduling,
+   or perhaps guaranteed good code irrespective of the compiler.
+
+   Alternatives:
+
+   It might be possibly to use floating point.  The loop is dominated by
+   multiply latency, so not sure if floats would improve that.  One
+   possibility would be to take two limbs at a time, with a 128 bit inverse,
+   if there's enough registers, which could effectively use float throughput
+   to reduce total latency across two limbs.  */
+
+#define ASSERT_RETVAL(r)                \
+  ASSERT (orig_c < d ? r < d : r <= d)
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)
+{
+  mp_limb_t  c = orig_c;
+  mp_limb_t  s, l, q, h, inverse;
+
+  ASSERT (size >= 1);
+  ASSERT (d & 1);
+  ASSERT_MPN (src, size);
+  ASSERT_LIMB (d);
+  ASSERT_LIMB (c);
+
+  /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */
+  if (size == 1)
+    {
+      s = src[0];
+      if (s > c)
+       {
+         l = s-c;
+         h = l % d;
+         if (h != 0)
+           h = d - h;
+       }
+      else
+       {
+         l = c-s;
+         h = l % d;
+       }
+      return h;
+    }
+
+  binvert_limb (inverse, d);
+
+  if (d <= 0xFFFFFFFF)
+    {
+      s = *src++;
+      size--;
+      do
+        {
+          SUBC_LIMB (c, l, s, c);
+          s = *src++;
+          q = l * inverse;
+          umul_ppmm_half_lowequal (h, q, d, l);
+          c += h;
+          size--;
+        }
+      while (size != 0);
+
+      if (s <= d)
+        {
+          /* With high s <= d the final step can be a subtract and addback.
+             If c==0 then the addback will restore to l>=0.  If c==d then
+             will get l==d if s==0, but that's ok per the function
+             definition.  */
+
+          l = c - s;
+          l += (l > c ? d : 0);
+
+          ASSERT_RETVAL (l);
+          return l;
+        }
+      else
+        {
+          /* Can't skip a divide, just do the loop code once more. */
+          SUBC_LIMB (c, l, s, c);
+          q = l * inverse;
+          umul_ppmm_half_lowequal (h, q, d, l);
+          c += h;
+
+          ASSERT_RETVAL (c);
+          return c;
+        }
+    }
+  else
+    {
+      mp_limb_t  dl = LOW32 (d);
+      mp_limb_t  dh = HIGH32 (d);
+      long i;
+
+      s = *src++;
+      size--;
+      do
+        {
+          SUBC_LIMB (c, l, s, c);
+          s = *src++;
+          q = l * inverse;
+          umul_ppmm_lowequal (h, q, d, dh, dl, l);
+          c += h;
+          size--;
+        }
+      while (size != 0);
+
+      if (s <= d)
+        {
+          /* With high s <= d the final step can be a subtract and addback.
+             If c==0 then the addback will restore to l>=0.  If c==d then
+             will get l==d if s==0, but that's ok per the function
+             definition.  */
+
+          l = c - s;
+          l += (l > c ? d : 0);
+
+          ASSERT_RETVAL (l);
+          return l;
+        }
+      else
+        {
+          /* Can't skip a divide, just do the loop code once more. */
+          SUBC_LIMB (c, l, s, c);
+          q = l * inverse;
+          umul_ppmm_lowequal (h, q, d, dh, dl, l);
+          c += h;
+
+          ASSERT_RETVAL (c);
+          return c;
+        }
+    }
+}
diff --git a/mpn/sparc64/mul_1.asm b/mpn/sparc64/mul_1.asm

new file mode 100644 (file)

index 0000000..e57e822
--- /dev/null
+++ b/mpn/sparc64/mul_1.asm
@@ -0,0 +1,569 @@
+dnl  SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     14
+C UltraSPARC 3:              18.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the s1 operand split
+C into 32-bit pieces.  We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C   1. Align the stack area where we transfer the four 49-bit product-sums
+C      to a 32-byte boundary.  That would minimize the cache collision.
+C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
+C      be to align the area to map to the area immediately before s1?)
+C   2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C      develop mpn_addmul_2.  This would save many integer instructions.
+C   3. Unrolling.  Questionable if it is worth the code expansion, given that
+C      it could only save 1 cycle/limb.
+C   4. Specialize for particular v values.  If its upper 32 bits are zero, we
+C      could save many operations, in the FPU (fmuld), but more so in the IEU
+C      since we'll be summing 48-bit quantities, which might be simpler.
+C   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should
+C      not be greater than needed for L2 cache latency, and also not so great
+C      that i16 needs to be copied.
+C   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU
+C      ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C    8 FM
+C   10 FA
+C   11 MEM
+C   9 ISHIFT + 10? IADDLOG
+C    1 BRANCH
+C   49 insns totally (plus three mov insns that should be optimized out)
+
+C The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain 3.79 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_mul_1)
+
+C Initialization.  (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
+C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+       save    %sp, -256, %sp
+       mov     -1, %g4
+       srlx    %g4, 48, xffff          C store mask in register `xffff'
+       and     %i3, xffff, %g2
+       stx     %g2, [%sp+2223+0]
+       srlx    %i3, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+8]
+       srlx    %i3, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+16]
+       srlx    %i3, 48, %g3
+       stx     %g3, [%sp+2223+24]
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+
+       sllx    %i2, 3, %i2
+       mov     0, cy                   C clear cy
+       add     %i0, %i2, %i0
+       add     %i1, %i2, %i1
+       neg     %i2
+       add     %i1, 4, %i5
+       add     %i0, -32, %i4
+       add     %i0, -16, %i0
+
+       ldd     [%sp+2223+0], v00
+       ldd     [%sp+2223+8], v16
+       ldd     [%sp+2223+16], v32
+       ldd     [%sp+2223+24], v48
+       ld      [%sp+2223+0],%f2        C zero f2
+       ld      [%sp+2223+0],%f4        C zero f4
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fxtod   v00, v00
+       fxtod   v16, v16
+       fxtod   v32, v32
+       fxtod   v48, v48
+
+C Start real work.  (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fmuld   u00, v00, a00
+       fmuld   u00, v16, a16
+       fmuld   u00, v32, p32
+       fmuld   u32, v00, r32
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .L_two_or_more
+       fmuld   u32, v16, r48
+
+.L_one:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       add     %i2, 8, %i2
+
+       mov     i00, %g5                C i00+ now in g5
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_1
+       add     %i2, 8, %i2
+
+.L_two_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .L_three_or_more
+       fmuld   u32, v16, r48
+
+.L_two:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       mov     i00, %g5                C i00+ now in g5
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_2
+       add     %i2, 8, %i2
+
+.L_three_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .L_four_or_more
+       fmuld   u32, v16, r48
+
+.L_three:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_3
+       add     %i2, 8, %i2
+
+.L_four_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .Loop
+       fmuld   u32, v16, r48
+
+.L_four:
+       b,a     .L_out_4
+
+C BEGIN MAIN LOOP
+       .align  16
+.Loop:
+C 00
+       srlx    %o4, 16, %o5            C (x >> 16)
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+C 01
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+C 02
+       faddd   p48, r48, a48
+C 03
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+C 04
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+C 05
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+C 06
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+C 07
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+C 08
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+C 09
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+C 10
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+C 11
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+C 12
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+C 13
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %icc, .Loop
+       fmuld   u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_3:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   r64, a00
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       fdtox   r80, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_2:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_1:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       or      %i3, %o5, %o5
+       stx     %o5, [%i4+%i2]
+
+       sllx    i00, 0, %g2
+       add     %g2, cy, cy
+       sllx    i16, 16, %g3
+       add     %g3, cy, cy
+
+       return  %i7+8
+       mov     cy, %o0
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc64/rshift.asm b/mpn/sparc64/rshift.asm

new file mode 100644 (file)

index 0000000..691fe01
--- /dev/null
+++ b/mpn/sparc64/rshift.asm
@@ -0,0 +1,149 @@
+dnl  SPARC v9 mpn_rshift
+
+dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:              3.25
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`n',`%i2')
+define(`cnt',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+       save    %sp,-160,%sp
+
+       sub     %g0,cnt,tnc             C negate shift count
+       ldx     [up],u3                 C load first limb
+       subcc   n,5,n
+       sllx    u3,tnc,%i5              C compute function result
+       srlx    u3,cnt,%g3
+       bl,pn   %icc,.Lend1234
+       fanop
+
+       subcc   n,4,n
+       ldx     [up+8],u0
+       ldx     [up+16],u1
+       add     up,32,up
+       ldx     [up-8],u2
+       ldx     [up+0],u3
+       sllx    u0,tnc,%g2
+
+       bl,pn   %icc,.Lend5678
+       fanop
+
+       b,a     .Loop
+       .align  16
+.Loop:
+       srlx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up+8],u0
+       fanop
+C --
+       sllx    u1,tnc,%g2
+       subcc   n,4,n
+       stx     %g3,[rp+0]
+       fanop
+C --
+       srlx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up+16],u1
+       fanop
+C --
+       sllx    u2,tnc,%g2
+       stx     %g1,[rp+8]
+       add     up,32,up
+       fanop
+C --
+       srlx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up-8],u2
+       fanop
+C --
+       sllx    u3,tnc,%g2
+       stx     %g3,[rp+16]
+       add     rp,32,rp
+       fanop
+C --
+       srlx    u3,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up+0],u3
+       fanop
+C --
+       sllx    u0,tnc,%g2
+       stx     %g1,[rp-8]
+       bge,pt  %icc,.Loop
+       fanop
+C --
+.Lend5678:
+       srlx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       sllx    u1,tnc,%g2
+       stx     %g3,[rp+0]
+       srlx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       sllx    u2,tnc,%g2
+       stx     %g1,[rp+8]
+       srlx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       sllx    u3,tnc,%g2
+       stx     %g3,[rp+16]
+       add     rp,32,rp
+       srlx    u3,cnt,%g3              C carry...
+       or      %g1,%g2,%g1
+       stx     %g1,[rp-8]
+
+.Lend1234:
+       addcc   n,4,n
+       bz,pn   %icc,.Lret
+       fanop
+.Loop0:
+       add     rp,8,rp
+       subcc   n,1,n
+       ldx     [up+8],u3
+       add     up,8,up
+       sllx    u3,tnc,%g2
+       or      %g3,%g2,%g3
+       stx     %g3,[rp-8]
+       srlx    u3,cnt,%g3
+       bnz,pt  %icc,.Loop0
+       fanop
+.Lret:
+       stx     %g3,[rp+0]
+       mov     %i5,%i0
+       ret
+       restore
+EPILOGUE(mpn_rshift)
diff --git a/mpn/sparc64/sparc64.h b/mpn/sparc64/sparc64.h

new file mode 100644 (file)

index 0000000..945e422
--- /dev/null
+++ b/mpn/sparc64/sparc64.h
@@ -0,0 +1,190 @@
+/* UltraSPARC 64 support macros.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define LOW32(x)   ((x) & 0xFFFFFFFF)
+#define HIGH32(x)  ((x) >> 32)
+
+
+/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
+   Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
+   effect of swapping the two halves in this case.  */
+#if HAVE_LIMB_BIG_ENDIAN
+#define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */
+#endif
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */
+#endif
+#ifndef HALF_ENDIAN_ADJ
+Error, error, unknown limb endianness;
+#endif
+
+
+/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
+   of that product is equal to l.  dh and dl are the 32-bit halves of d.
+
+   |-----high----||----low-----|
+   +------+------+
+   |             |                 ph = qh * dh
+   +------+------+
+          +------+------+
+          |             |          pm1 = ql * dh
+          +------+------+
+          +------+------+
+          |             |          pm2 = qh * dl
+          +------+------+
+                 +------+------+
+                 |             |   pl = ql * dl (not calculated)
+                 +------+------+
+
+   Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
+   + HIGH(pl) == HIGH(l).  The only thing we need from those product parts
+   is whether they produce a carry into the high.
+
+   pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
+   time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
+   HIGH(l).  pl is never actually calculated.  */
+
+#define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \
+  do {                                          \
+    mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \
+    ASSERT (dh == HIGH32(d));                   \
+    ASSERT (dl == LOW32(d));                    \
+    ASSERT (q*d == l);                          \
+                                                \
+    ql = LOW32 (q);                             \
+    qh = HIGH32 (q);                            \
+                                                \
+    pm1 = ql * dh;                              \
+    pm2 = qh * dl;                              \
+    ph  = qh * dh;                              \
+                                                \
+    pm_l = LOW32 (pm1) + LOW32 (pm2);           \
+                                                \
+    (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \
+      + HIGH32 (pm_l) + ((pm_l << 32) > l);     \
+                                                \
+    ASSERT_HIGH_PRODUCT (h, q, d);              \
+  } while (0)
+
+
+/* Set h to the high of q*d, assuming the low limb of that product is equal
+   to l, and that d fits in 32-bits.
+
+   |-----high----||----low-----|
+          +------+------+
+          |             |          pm = qh * dl
+          +------+------+
+                 +------+------+
+                 |             |   pl = ql * dl (not calculated)
+                 +------+------+
+
+   Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
+   time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no
+   need to calculate pl to determine this.  */
+
+#define umul_ppmm_half_lowequal(h, q, d, l)     \
+  do {                                          \
+    mp_limb_t pm;                               \
+    ASSERT (q*d == l);                          \
+    ASSERT (HIGH32(d) == 0);                    \
+                                                \
+    pm = HIGH32(q) * d;                         \
+    (h) = HIGH32(pm) + ((pm << 32) > l);        \
+    ASSERT_HIGH_PRODUCT (h, q, d);              \
+  } while (0)
+
+
+/* check that h is the high limb of x*y */
+#if WANT_ASSERT
+#define ASSERT_HIGH_PRODUCT(h, x, y)    \
+  do {                                  \
+    mp_limb_t  want_h, dummy;           \
+    umul_ppmm (want_h, dummy, x, y);    \
+    ASSERT (h == want_h);               \
+  } while (0)
+#else
+#define ASSERT_HIGH_PRODUCT(h, q, d)    \
+  do { } while (0)
+#endif
+
+
+/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
+   The count returned will be in the range 32 to 63.
+   This is the 32-bit generic C count_leading_zeros from longlong.h. */
+#define count_leading_zeros_32(count, x)                                      \
+  do {                                                                        \
+    mp_limb_t  __xr = (x);                                                    \
+    unsigned   __a;                                                           \
+    ASSERT ((x) != 0);                                                        \
+    ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \
+    __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \
+      : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \
+                                                                              \
+    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \
+  } while (0)
+
+
+/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
+   32 bits and is normalized (high bit set).  */
+#define invert_half_limb(inv, d)                \
+  do {                                          \
+    mp_limb_t  _n;                              \
+    ASSERT ((d) <= 0xFFFFFFFF);                 \
+    ASSERT ((d) & 0x80000000);                  \
+    _n = (((mp_limb_t) -(d)) << 32) - 1;        \
+    (inv) = (mp_limb_t) (unsigned) (_n / (d));  \
+  } while (0)
+
+
+/* Divide nh:nl by d, setting q to the quotient and r to the remainder.
+   q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
+   dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */
+
+#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \
+  do {                                                                  \
+    unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \
+    mp_limb_t  _n, _x;                                                  \
+    ASSERT (d_limb <= 0xFFFFFFFF);                                      \
+    ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \
+    ASSERT (d_limb & 0x80000000);                                       \
+    ASSERT (nh < d_limb);                                               \
+    _n10 = (nl);                                                        \
+    _n2 = (nh);                                                         \
+    _n1 = (int) _n10 >> 31;                                             \
+    _nadj = _n10 + (_n1 & d_limb);                                      \
+    _x = dinv_limb * (_n2 - _n1) + _nadj;                               \
+    _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \
+    _n = ((mp_limb_t) _n2 << 32) + _n10;                                \
+    _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \
+    _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \
+    ASSERT (_xh == 0 || _xh == ~0);                                     \
+    _r = _x + (d_limb & _xh);                 /* addback */             \
+    _q = _xh - _q11n;                         /* q1+1-addback */        \
+    ASSERT (_r < d_limb);                                               \
+    ASSERT (d_limb * _q + _r == _n);                                    \
+    (r) = _r;                                                           \
+    (q) = _q;                                                           \
+  } while (0)
+
+
diff --git a/mpn/sparc64/sqr_diagonal.asm b/mpn/sparc64/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..fbbb4ff
--- /dev/null
+++ b/mpn/sparc64/sqr_diagonal.asm
@@ -0,0 +1,331 @@
+dnl  SPARC v9 64-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     22
+C UltraSPARC 3:              36
+
+C This was generated by the Sun C compiler.  It runs at 22 cycles/limb on the
+C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
+C code using the same algorithm.  For 1-3 limbs, a special loop was generated,
+C which causes performance problems in particular for 2 and 3 limbs.
+C Ultimately, this should be replaced by hand-written code in the same software
+C pipeline style as e.g., addmul_1.asm.
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sqr_diagonal)
+       save    %sp, -240, %sp
+
+       sethi   %hi(0x1ffc00), %o0
+       sethi   %hi(0x3ffc00), %o1
+       add     %o0, 1023, %o7
+       cmp     %i2, 4
+       add     %o1, 1023, %o4
+       or      %g0, %i1, %g1
+       or      %g0, %i0, %o0
+       bl,pn   %xcc, .Lsmall
+       or      %g0, 0, %g2
+
+       ldx     [%i1], %o1
+       add     %i1, 24, %g1
+       or      %g0, 3, %g2
+       srlx    %o1, 42, %g3
+       stx     %g3, [%sp+2279]
+       and     %o1, %o7, %o2
+       stx     %o2, [%sp+2263]
+       srlx    %o1, 21, %o1
+       ldd     [%sp+2279], %f0
+       and     %o1, %o7, %o1
+       stx     %o1, [%sp+2271]
+       ldx     [%i1+8], %o2
+       fxtod   %f0, %f12
+       srlx    %o2, 21, %o1
+       and     %o2, %o7, %g3
+       ldd     [%sp+2263], %f2
+       fmuld   %f12, %f12, %f10
+       srlx    %o2, 42, %o2
+       ldd     [%sp+2271], %f0
+       and     %o1, %o7, %o1
+       fxtod   %f2, %f8
+       stx     %o2, [%sp+2279]
+       stx     %o1, [%sp+2271]
+       fxtod   %f0, %f0
+       stx     %g3, [%sp+2263]
+       fdtox   %f10, %f14
+       fmuld   %f12, %f8, %f6
+       ldx     [%i1+16], %o2
+       std     %f14, [%sp+2255]
+       fmuld   %f0, %f0, %f2
+       fmuld   %f8, %f8, %f10
+       srlx    %o2, 42, %o1
+       faddd   %f6, %f6, %f6
+       fmuld   %f12, %f0, %f12
+       fmuld   %f0, %f8, %f8
+       ldd     [%sp+2279], %f0
+       ldd     [%sp+2263], %f4
+       fdtox   %f10, %f10
+       std     %f10, [%sp+2239]
+       faddd   %f2, %f6, %f6
+       ldd     [%sp+2271], %f2
+       fdtox   %f12, %f12
+       std     %f12, [%sp+2247]
+       fdtox   %f8, %f8
+       std     %f8, [%sp+2231]
+       fdtox   %f6, %f6
+       std     %f6, [%sp+2223]
+
+.Loop: srlx    %o2, 21, %g3
+       stx     %o1, [%sp+2279]
+       add     %g2, 1, %g2
+       and     %g3, %o7, %o1
+       ldx     [%sp+2255], %g4
+       cmp     %g2, %i2
+       stx     %o1, [%sp+2271]
+       add     %g1, 8, %g1
+       add     %o0, 16, %o0
+       ldx     [%sp+2239], %o1
+       fxtod   %f0, %f10
+       fxtod   %f4, %f14
+       ldx     [%sp+2231], %i0
+       ldx     [%sp+2223], %g5
+       ldx     [%sp+2247], %g3
+       and     %o2, %o7, %o2
+       fxtod   %f2, %f8
+       fmuld   %f10, %f10, %f0
+       stx     %o2, [%sp+2263]
+       fmuld   %f10, %f14, %f6
+       ldx     [%g1-8], %o2
+       fmuld   %f10, %f8, %f12
+       fdtox   %f0, %f2
+       ldd     [%sp+2279], %f0
+       fmuld   %f8, %f8, %f4
+       faddd   %f6, %f6, %f6
+       fmuld   %f14, %f14, %f10
+       std     %f2, [%sp+2255]
+       sllx    %g4, 20, %g4
+       ldd     [%sp+2271], %f2
+       fmuld   %f8, %f14, %f8
+       sllx    %i0, 22, %i1
+       fdtox   %f12, %f12
+       std     %f12, [%sp+2247]
+       sllx    %g5, 42, %i0
+       add     %o1, %i1, %o1
+       faddd   %f4, %f6, %f6
+       ldd     [%sp+2263], %f4
+       add     %o1, %i0, %o1
+       add     %g3, %g4, %g3
+       fdtox   %f10, %f10
+       std     %f10, [%sp+2239]
+       srlx    %o1, 42, %g4
+       and     %g5, %o4, %i0
+       fdtox   %f8, %f8
+       std     %f8, [%sp+2231]
+       srlx    %g5, 22, %g5
+       sub     %g4, %i0, %g4
+       fdtox   %f6, %f6
+       std     %f6, [%sp+2223]
+       srlx    %g4, 63, %g4
+       add     %g3, %g5, %g3
+       add     %g3, %g4, %g3
+       stx     %o1, [%o0-16]
+       srlx    %o2, 42, %o1
+       bl,pt   %xcc, .Loop
+       stx     %g3, [%o0-8]
+
+       stx     %o1, [%sp+2279]
+       srlx    %o2, 21, %o1
+       fxtod   %f0, %f16
+       ldx     [%sp+2223], %g3
+       fxtod   %f4, %f6
+       and     %o2, %o7, %o3
+       stx     %o3, [%sp+2263]
+       fxtod   %f2, %f4
+       and     %o1, %o7, %o1
+       ldx     [%sp+2231], %o2
+       sllx    %g3, 42, %g4
+       fmuld   %f16, %f16, %f14
+       stx     %o1, [%sp+2271]
+       fmuld   %f16, %f6, %f8
+       add     %o0, 48, %o0
+       ldx     [%sp+2239], %o1
+       sllx    %o2, 22, %o2
+       fmuld   %f4, %f4, %f10
+       ldx     [%sp+2255], %o3
+       fdtox   %f14, %f14
+       fmuld   %f4, %f6, %f2
+       std     %f14, [%sp+2255]
+       faddd   %f8, %f8, %f12
+       add     %o1, %o2, %o2
+       fmuld   %f16, %f4, %f4
+       ldd     [%sp+2279], %f0
+       sllx    %o3, 20, %g5
+       add     %o2, %g4, %o2
+       fmuld   %f6, %f6, %f6
+       srlx    %o2, 42, %o3
+       and     %g3, %o4, %g4
+       srlx    %g3, 22, %g3
+       faddd   %f10, %f12, %f16
+       ldd     [%sp+2271], %f12
+       ldd     [%sp+2263], %f8
+       fxtod   %f0, %f0
+       sub     %o3, %g4, %o3
+       ldx     [%sp+2247], %o1
+       srlx    %o3, 63, %o3
+       fdtox   %f2, %f10
+       fxtod   %f8, %f8
+       std     %f10, [%sp+2231]
+       fdtox   %f6, %f6
+       std     %f6, [%sp+2239]
+       add     %o1, %g5, %o1
+       fmuld   %f0, %f0, %f2
+       fdtox   %f16, %f16
+       std     %f16, [%sp+2223]
+       add     %o1, %g3, %o1
+       fdtox   %f4, %f4
+       std     %f4, [%sp+2247]
+       fmuld   %f0, %f8, %f10
+       fxtod   %f12, %f12
+       add     %o1, %o3, %o1
+       stx     %o2, [%o0-48]
+       fmuld   %f8, %f8, %f6
+       stx     %o1, [%o0-40]
+       fdtox   %f2, %f2
+       ldx     [%sp+2231], %o2
+       faddd   %f10, %f10, %f10
+       ldx     [%sp+2223], %g3
+       fmuld   %f12, %f12, %f4
+       fdtox   %f6, %f6
+       ldx     [%sp+2239], %o1
+       sllx    %o2, 22, %o2
+       fmuld   %f12, %f8, %f8
+       sllx    %g3, 42, %g5
+       ldx     [%sp+2255], %o3
+       fmuld   %f0, %f12, %f0
+       add     %o1, %o2, %o2
+       faddd   %f4, %f10, %f4
+       ldx     [%sp+2247], %o1
+       add     %o2, %g5, %o2
+       and     %g3, %o4, %g4
+       fdtox   %f8, %f8
+       sllx    %o3, 20, %g5
+       std     %f8, [%sp+2231]
+       fdtox   %f0, %f0
+       srlx    %o2, 42, %o3
+       add     %o1, %g5, %o1
+       fdtox   %f4, %f4
+       srlx    %g3, 22, %g3
+       sub     %o3, %g4, %o3
+       std     %f6, [%sp+2239]
+       std     %f4, [%sp+2223]
+       srlx    %o3, 63, %o3
+       add     %o1, %g3, %o1
+       std     %f2, [%sp+2255]
+       add     %o1, %o3, %o1
+       std     %f0, [%sp+2247]
+       stx     %o2, [%o0-32]
+       stx     %o1, [%o0-24]
+       ldx     [%sp+2231], %o2
+       ldx     [%sp+2223], %o3
+       ldx     [%sp+2239], %o1
+       sllx    %o2, 22, %o2
+       sllx    %o3, 42, %g5
+       ldx     [%sp+2255], %g4
+       and     %o3, %o4, %g3
+       add     %o1, %o2, %o2
+       ldx     [%sp+2247], %o1
+       add     %o2, %g5, %o2
+       stx     %o2, [%o0-16]
+       sllx    %g4, 20, %g4
+       srlx    %o2, 42, %o2
+       add     %o1, %g4, %o1
+       srlx    %o3, 22, %o3
+       sub     %o2, %g3, %o2
+       srlx    %o2, 63, %o2
+       add     %o1, %o3, %o1
+       add     %o1, %o2, %o1
+       stx     %o1, [%o0-8]
+       ret
+       restore %g0, %g0, %g0
+.Lsmall:
+       ldx     [%g1], %o2
+.Loop0:
+       and     %o2, %o7, %o1
+       stx     %o1, [%sp+2263]
+       add     %g2, 1, %g2
+       srlx    %o2, 21, %o1
+       add     %g1, 8, %g1
+       srlx    %o2, 42, %o2
+       stx     %o2, [%sp+2279]
+       and     %o1, %o7, %o1
+       ldd     [%sp+2263], %f0
+       cmp     %g2, %i2
+       stx     %o1, [%sp+2271]
+       fxtod   %f0, %f6
+       ldd     [%sp+2279], %f0
+       ldd     [%sp+2271], %f4
+       fxtod   %f0, %f2
+       fmuld   %f6, %f6, %f0
+       fxtod   %f4, %f10
+       fmuld   %f2, %f6, %f4
+       fdtox   %f0, %f0
+       std     %f0, [%sp+2239]
+       fmuld   %f10, %f6, %f8
+       fmuld   %f10, %f10, %f0
+       faddd   %f4, %f4, %f6
+       fmuld   %f2, %f2, %f4
+       fdtox   %f8, %f8
+       std     %f8, [%sp+2231]
+       fmuld   %f2, %f10, %f2
+       faddd   %f0, %f6, %f0
+       fdtox   %f4, %f4
+       std     %f4, [%sp+2255]
+       fdtox   %f2, %f2
+       std     %f2, [%sp+2247]
+       fdtox   %f0, %f0
+       std     %f0, [%sp+2223]
+       ldx     [%sp+2239], %o1
+       ldx     [%sp+2255], %g4
+       ldx     [%sp+2231], %o2
+       sllx    %g4, 20, %g4
+       ldx     [%sp+2223], %o3
+       sllx    %o2, 22, %o2
+       sllx    %o3, 42, %g5
+       add     %o1, %o2, %o2
+       ldx     [%sp+2247], %o1
+       add     %o2, %g5, %o2
+       stx     %o2, [%o0]
+       and     %o3, %o4, %g3
+       srlx    %o2, 42, %o2
+       add     %o1, %g4, %o1
+       srlx    %o3, 22, %o3
+       sub     %o2, %g3, %o2
+       srlx    %o2, 63, %o2
+       add     %o1, %o3, %o1
+       add     %o1, %o2, %o1
+       stx     %o1, [%o0+8]
+       add     %o0, 16, %o0
+       bl,a,pt %xcc, .Loop0
+       ldx     [%g1], %o2
+       ret
+       restore %g0, %g0, %g0
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/sparc64/sub_n.asm b/mpn/sparc64/sub_n.asm

new file mode 100644 (file)

index 0000000..e6fe9ee
--- /dev/null
+++ b/mpn/sparc64/sub_n.asm
@@ -0,0 +1,220 @@
+dnl  SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     4
+C UltraSPARC 3:              4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u-v-carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_n)
+       save    %sp,-160,%sp
+
+       fitod   %f0,%f0         C make sure f0 contains small, quiet number
+       subcc   n,4,%g0
+       bl,pn   %icc,.Loop0
+       mov     0,cy
+
+       ldx     [up+0],u0
+       ldx     [vp+0],v0
+       add     up,32,up
+       ldx     [up-24],u1
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       ldx     [up-16],u2
+       ldx     [vp-16],v2
+       ldx     [up-8],u3
+       ldx     [vp-8],v3
+       subcc   n,8,n
+       sub     u0,v0,%g1       C main sub
+       sub     %g1,cy,%g4      C carry sub
+       orn     u0,v0,%g2
+       bl,pn   %icc,.Lend4567
+       fanop
+       b,a     .Loop
+
+       .align  16
+C START MAIN LOOP
+.Loop: orn     %g4,%g2,%g2
+       andn    u0,v0,%g3
+       ldx     [up+0],u0
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       ldx     [vp+0],v0
+       add     up,32,up
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u1,v1,%g1
+       stx     %g4,[rp+0]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u1,v1,%g2
+       fmnop
+       fanop
+C --
+       orn     %g4,%g2,%g2
+       andn    u1,v1,%g3
+       ldx     [up-24],u1
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u2,v2,%g1
+       stx     %g4,[rp+8]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u2,v2,%g2
+       fmnop
+       fanop
+C --
+       orn     %g4,%g2,%g2
+       andn    u2,v2,%g3
+       ldx     [up-16],u2
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       ldx     [vp-16],v2
+       add     rp,32,rp
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u3,v3,%g1
+       stx     %g4,[rp-16]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u3,v3,%g2
+       fmnop
+       fanop
+C --
+       orn     %g4,%g2,%g2
+       andn    u3,v3,%g3
+       ldx     [up-8],u3
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       subcc   n,4,n
+       ldx     [vp-8],v3
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u0,v0,%g1
+       stx     %g4,[rp-8]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u0,v0,%g2
+       bge,pt  %icc,.Loop
+       fanop
+C END MAIN LOOP
+.Lend4567:
+       orn     %g4,%g2,%g2
+       andn    u0,v0,%g3
+       andn    %g2,%g3,%g2
+       srlx    %g2,63,cy
+       sub     u1,v1,%g1
+       stx     %g4,[rp+0]
+       sub     %g1,cy,%g4
+       orn     u1,v1,%g2
+       orn     %g4,%g2,%g2
+       andn    u1,v1,%g3
+       andn    %g2,%g3,%g2
+       srlx    %g2,63,cy
+       sub     u2,v2,%g1
+       stx     %g4,[rp+8]
+       sub     %g1,cy,%g4
+       orn     u2,v2,%g2
+       orn     %g4,%g2,%g2
+       andn    u2,v2,%g3
+       andn    %g2,%g3,%g2
+       add     rp,32,rp
+       srlx    %g2,63,cy
+       sub     u3,v3,%g1
+       stx     %g4,[rp-16]
+       sub     %g1,cy,%g4
+       orn     u3,v3,%g2
+       orn     %g4,%g2,%g2
+       andn    u3,v3,%g3
+       andn    %g2,%g3,%g2
+       srlx    %g2,63,cy
+       stx     %g4,[rp-8]
+
+       addcc   n,4,n
+       bz,pn   %icc,.Lret
+       fanop
+
+.Loop0:        ldx     [up],u0
+       add     up,8,up
+       ldx     [vp],v0
+       add     vp,8,vp
+       add     rp,8,rp
+       subcc   n,1,n
+       sub     u0,v0,%g1
+       orn     u0,v0,%g2
+       sub     %g1,cy,%g4
+       andn    u0,v0,%g3
+       orn     %g4,%g2,%g2
+       stx     %g4,[rp-8]
+       andn    %g2,%g3,%g2
+       bnz,pt  %icc,.Loop0
+       srlx    %g2,63,cy
+
+.Lret: mov     cy,%i0
+       ret
+       restore
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc64/submul_1.asm b/mpn/sparc64/submul_1.asm

new file mode 100644 (file)

index 0000000..ba91200
--- /dev/null
+++ b/mpn/sparc64/submul_1.asm
@@ -0,0 +1,57 @@
+dnl  SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     18
+C UltraSPARC 3:              23
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+
+PROLOGUE(mpn_submul_1)
+       save    %sp,-176,%sp
+
+       sllx    %i2, 3, %g2
+       or      %g0, %i1, %o1
+       add     %g2, 15, %o0
+       or      %g0, %i2, %o2
+       and     %o0, -16, %o0
+       sub     %sp, %o0, %sp
+       add     %sp, 2223, %o0
+       or      %g0, %o0, %l0
+       call    mpn_mul_1
+       or      %g0, %i3, %o3
+       or      %g0, %o0, %l1           C preserve carry value from mpn_mul_1
+       or      %g0, %i0, %o0
+       or      %g0, %i0, %o1
+       or      %g0, %l0, %o2
+       call    mpn_sub_n
+       or      %g0, %i2, %o3
+       ret
+       restore %l1, %o0, %o0           C sum carry values
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc64/ultrasparc34/gmp-mparam.h b/mpn/sparc64/ultrasparc34/gmp-mparam.h

new file mode 100644 (file)

index 0000000..cd1f89a
--- /dev/null
+++ b/mpn/sparc64/ultrasparc34/gmp-mparam.h
@@ -0,0 +1,192 @@
+/* ultrasparc3/4 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2006, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1593 MHz ultrasparc3 running Solaris 10 (swift.nada.kth.se) */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  1
+#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                30
+#define MUL_TOOM33_THRESHOLD                93
+#define MUL_TOOM44_THRESHOLD               143
+#define MUL_TOOM6H_THRESHOLD               165
+#define MUL_TOOM8H_THRESHOLD               303
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      95
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      85
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      50
+
+#define SQR_BASECASE_THRESHOLD              10
+#define SQR_TOOM2_THRESHOLD                 72
+#define SQR_TOOM3_THRESHOLD                 97
+#define SQR_TOOM4_THRESHOLD                179
+#define SQR_TOOM6_THRESHOLD                191
+#define SQR_TOOM8_THRESHOLD                339
+
+#define MULMOD_BNM1_THRESHOLD               14
+#define SQRMOD_BNM1_THRESHOLD                9
+
+#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    212, 5}, {     13, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23, 8}, {     47, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79, 8}, \
+    {    159, 9}, {     83,10}, {     47, 9}, {     95, 8}, \
+    {    191, 7}, {    383, 9}, {     99,10}, {     55,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
+    {     71, 9}, {    143, 8}, {    287,10}, {     79, 9}, \
+    {    159, 8}, {    319,11}, {     47,10}, {     95, 9}, \
+    {    191, 8}, {    383,10}, {    103, 9}, {    207, 8}, \
+    {    415,10}, {    111,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207, 9}, {    415,11}, {    111,10}, \
+    {    223, 9}, {    447,12}, {     63,11}, {    127,10}, \
+    {    255,11}, {    143,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319,11}, {    175,10}, {    351,12}, \
+    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
+    {    415,11}, {    223,10}, {    447,13}, {     63,12}, \
+    {    127,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    415,12}, \
+    {    223,11}, {    479,10}, {    959,13}, {    127,12}, \
+    {    287,11}, {    575,12}, {    351,13}, {    191,12}, \
+    {    479,14}, {    127,13}, {    255,12}, {    575,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    703,13}, \
+    {    383,12}, {    831,13}, {    447,12}, {    895,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1279,13}, {    703,14}, \
+    {    383,13}, {    831,12}, {   1663,13}, {    895,15}, \
+    {    255,14}, {    511,13}, {   1151,14}, {    639,13}, \
+    {   1407,14}, {    767,13}, {   1663,14}, {    895,13}, \
+    {   1791,15}, {    511,14}, {   1023,13}, {   2047,14}, \
+    {   1151,13}, {   2303,14}, {   1407,15}, {    767,14}, \
+    {   1791,16}, {    511,15}, {   1023,14}, {   2303,15}, \
+    {   1279,14}, {   2815,15}, {   1535,14}, {   3199,15}, \
+    {   1791,14}, {   3583,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 170
+#define MUL_FFT_THRESHOLD                 2240
+
+#define SQR_FFT_MODF_THRESHOLD             244  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    244, 5}, {      8, 4}, {     17, 5}, {     17, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     17, 8}, \
+    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
+    {     21, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
+    {     31, 9}, {     19, 8}, {     39, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
+    {    287,10}, {     79, 9}, {    159,11}, {     47,10}, \
+    {     95, 9}, {    191, 8}, {    383,12}, {     31,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
+    {    135, 9}, {    271,10}, {    143, 9}, {    287,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175, 9}, {    351, 8}, {    703,11}, {     95,10}, \
+    {    191, 9}, {    383, 8}, {    767,10}, {    207, 9}, \
+    {    415, 8}, {    831,10}, {    223, 9}, {    447,12}, \
+    {     63,11}, {    127,10}, {    271, 9}, {    543,11}, \
+    {    143,10}, {    287, 9}, {    575, 8}, {   1151, 9}, \
+    {    607,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351, 9}, {    703, 8}, {   1407,12}, \
+    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
+    {    415, 9}, {    831,11}, {    223,10}, {    447,13}, \
+    {     63,12}, {    127,11}, {    271,10}, {    543,11}, \
+    {    287,10}, {    575, 9}, {   1151,10}, {    607,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    415,10}, {    831,12}, \
+    {    223,11}, {    479,13}, {    127,12}, {    255,11}, \
+    {    543,12}, {    287,11}, {    607,12}, {    319,11}, \
+    {    639,12}, {    351,11}, {    703,13}, {    191,12}, \
+    {    415,11}, {    831,12}, {    479,11}, {    959,14}, \
+    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
+    {    575,13}, {    319,12}, {    639,11}, {   1279,12}, \
+    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
+    {    895,14}, {    255,13}, {    511,12}, {   1023,13}, \
+    {    575,12}, {   1151,13}, {    703,14}, {    383,13}, \
+    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
+    {    511,13}, {   1087,12}, {   2175,13}, {   1151,14}, \
+    {    639,13}, {   1407,14}, {    767,13}, {   1663,14}, \
+    {    895,13}, {   1791,15}, {    511,14}, {   1023,13}, \
+    {   2047,14}, {   1151,13}, {   2431,14}, {   1407,15}, \
+    {    767,14}, {   1791,16}, {    511,15}, {   1023,14}, \
+    {   2303,15}, {   1279,14}, {   2815,15}, {   1535,14}, \
+    {   3199,15}, {   1791,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 182
+#define SQR_FFT_THRESHOLD                 1984
+
+#define MULLO_BASECASE_THRESHOLD            13
+#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
+#define MULLO_MUL_N_THRESHOLD             3791
+
+#define DC_DIV_QR_THRESHOLD                 16
+#define DC_DIVAPPR_Q_THRESHOLD              66
+#define DC_BDIV_QR_THRESHOLD                26
+#define DC_BDIV_Q_THRESHOLD                 92
+
+#define INV_MULMOD_BNM1_THRESHOLD           58
+#define INV_NEWTON_THRESHOLD                17
+#define INV_APPR_THRESHOLD                  17
+
+#define BINV_NEWTON_THRESHOLD              134
+#define REDC_1_TO_REDC_2_THRESHOLD          10
+#define REDC_2_TO_REDC_N_THRESHOLD         117
+
+#define MU_DIV_QR_THRESHOLD                748
+#define MU_DIVAPPR_Q_THRESHOLD             630
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD               748
+#define MU_BDIV_Q_THRESHOLD                807
+
+#define MATRIX22_STRASSEN_THRESHOLD         12
+#define HGCD_THRESHOLD                      39
+#define GCD_DC_THRESHOLD                   130
+#define GCDEXT_DC_THRESHOLD                134
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                18
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               315
+#define SET_STR_PRECOMPUTE_THRESHOLD      1037
diff --git a/mpn/vax/add_n.s b/mpn/vax/add_n.s

new file mode 100644 (file)

index 0000000..60773cc
--- /dev/null
+++ b/mpn/vax/add_n.s
@@ -0,0 +1,59 @@
+# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr      (sp + 4)
+# s1_ptr       (sp + 8)
+# s2_ptr       (sp + 12)
+# size         (sp + 16)
+
+.text
+       .align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+       .word   0x0
+       movl    16(ap),r0
+       movl    12(ap),r1
+       movl    8(ap),r2
+       movl    4(ap),r3
+       mnegl   r0,r5
+       addl2   $3,r0
+       ashl    $-2,r0,r0       # unroll loop count
+       bicl2   $-4,r5          # mask out low 2 bits
+       movaq   (r5)[r5],r5     # 9x
+       jmp     Loop(r5)
+
+Loop:  movl    (r2)+,r4
+       adwc    (r1)+,r4
+       movl    r4,(r3)+
+       movl    (r2)+,r4
+       adwc    (r1)+,r4
+       movl    r4,(r3)+
+       movl    (r2)+,r4
+       adwc    (r1)+,r4
+       movl    r4,(r3)+
+       movl    (r2)+,r4
+       adwc    (r1)+,r4
+       movl    r4,(r3)+
+       sobgtr  r0,Loop
+
+       adwc    r0,r0
+       ret
diff --git a/mpn/vax/addmul_1.s b/mpn/vax/addmul_1.s

new file mode 100644 (file)

index 0000000..e2f86e0
--- /dev/null
+++ b/mpn/vax/addmul_1.s
@@ -0,0 +1,124 @@
+# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr      (sp + 4)
+# s1_ptr       (sp + 8)
+# size         (sp + 12)
+# s2_limb      (sp + 16)
+
+.text
+       .align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+       .word   0xfc0
+       movl    12(ap),r4
+       movl    8(ap),r8
+       movl    4(ap),r9
+       movl    16(ap),r6
+       jlss    s2_big
+
+       clrl    r3
+       incl    r4
+       ashl    $-1,r4,r7
+       jlbc    r4,L1
+       clrl    r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl    (r8)+,r1
+       jlss    L1n0
+       emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    $0,r3
+       addl2   r2,(r9)+
+       adwc    $0,r3
+L1:    movl    (r8)+,r1
+       jlss    L1n1
+L1p1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    $0,r11
+       addl2   r10,(r9)+
+       adwc    $0,r11
+
+       sobgtr  r7,Loop1
+       movl    r11,r0
+       ret
+
+L1n0:  emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r6,r3
+       addl2   r2,(r9)+
+       adwc    $0,r3
+       movl    (r8)+,r1
+       jgeq    L1p1
+L1n1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r6,r11
+       addl2   r10,(r9)+
+       adwc    $0,r11
+
+       sobgtr  r7,Loop1
+       movl    r11,r0
+       ret
+
+
+s2_big:        clrl    r3
+       incl    r4
+       ashl    $-1,r4,r7
+       jlbc    r4,L2
+       clrl    r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl    (r8)+,r1
+       jlss    L2n0
+       emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r1,r3
+       addl2   r2,(r9)+
+       adwc    $0,r3
+L2:    movl    (r8)+,r1
+       jlss    L2n1
+L2p1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r1,r11
+       addl2   r10,(r9)+
+       adwc    $0,r11
+
+       sobgtr  r7,Loop2
+       movl    r11,r0
+       ret
+
+L2n0:  emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r6,r3
+       addl2   r2,(r9)+
+       adwc    r1,r3
+       movl    (r8)+,r1
+       jgeq    L2p1
+L2n1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r6,r11
+       addl2   r10,(r9)+
+       adwc    r1,r11
+
+       sobgtr  r7,Loop2
+       movl    r11,r0
+       ret
diff --git a/mpn/vax/gmp-mparam.h b/mpn/vax/gmp-mparam.h

new file mode 100644 (file)

index 0000000..f436282
--- /dev/null
+++ b/mpn/vax/gmp-mparam.h
@@ -0,0 +1,49 @@
+/* VAX gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* These numbers were measured manually using the tune/speed program.
+   The standard tune/tunup takes too long.  (VAX 8800) */
+
+#define MUL_TOOM22_THRESHOLD             14
+#define MUL_TOOM33_THRESHOLD            110
+
+#define SQR_BASECASE_THRESHOLD            6
+#define SQR_TOOM2_THRESHOLD              42
+#define SQR_TOOM3_THRESHOLD             250
+
+/* #define DIV_SB_PREINV_THRESHOLD         */
+/* #define DIV_DC_THRESHOLD                */
+/* #define POWM_THRESHOLD                  */
+
+/* #define GCD_ACCEL_THRESHOLD             */
+/* #define JACOBI_BASE_METHOD              */
+
+/* #define DIVREM_1_NORM_THRESHOLD         */
+/* #define DIVREM_1_UNNORM_THRESHOLD       */
+/* #define MOD_1_NORM_THRESHOLD            */
+/* #define MOD_1_UNNORM_THRESHOLD          */
+/* #define USE_PREINV_DIVREM_1             */
+/* #define USE_PREINV_MOD_1                */
+/* #define DIVREM_2_THRESHOLD              */
+/* #define DIVEXACT_1_THRESHOLD            */
+/* #define MODEXACT_1_ODD_THRESHOLD        */
+
+/* #define GET_STR_DC_THRESHOLD            */
+/* #define GET_STR_PRECOMPUTE_THRESHOLD    */
+#define SET_STR_THRESHOLD              3400
diff --git a/mpn/vax/lshift.s b/mpn/vax/lshift.s

new file mode 100644 (file)

index 0000000..6f3d600
--- /dev/null
+++ b/mpn/vax/lshift.s
@@ -0,0 +1,56 @@
+# VAX mpn_lshift -- left shift.
+
+# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# rptr         (sp + 4)
+# sptr         (sp + 8)
+# size         (sp + 12)
+# cnt          (sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp      call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11            call-saved registers
+
+.text
+       .align 1
+.globl ___gmpn_lshift
+___gmpn_lshift:
+       .word   0x1c0
+       movl    4(ap),r7
+       movl    8(ap),r6
+       movl    12(ap),r1
+       movl    16(ap),r8
+
+       moval   (r6)[r1],r6
+       moval   (r7)[r1],r7
+       clrl    r3
+       movl    -(r6),r2
+       ashq    r8,r2,r4
+       movl    r5,r0
+       movl    r2,r3
+       decl    r1
+       jeql    Lend
+
+Loop:  movl    -(r6),r2
+       ashq    r8,r2,r4
+       movl    r5,-(r7)
+       movl    r2,r3
+       sobgtr  r1,Loop
+
+Lend:  movl    r4,-4(r7)
+       ret
diff --git a/mpn/vax/mul_1.s b/mpn/vax/mul_1.s

new file mode 100644 (file)

index 0000000..c6f4594
--- /dev/null
+++ b/mpn/vax/mul_1.s
@@ -0,0 +1,121 @@
+# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr      (sp + 4)
+# s1_ptr       (sp + 8)
+# size         (sp + 12)
+# s2_limb      (sp + 16)
+
+.text
+       .align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+       .word   0xfc0
+       movl    12(ap),r4
+       movl    8(ap),r8
+       movl    4(ap),r9
+       movl    16(ap),r6
+       jlss    s2_big
+
+# One might want to combine the addl2 and the store below, but that
+# is actually just slower according to my timing tests.  (VAX 3600)
+
+       clrl    r3
+       incl    r4
+       ashl    $-1,r4,r7
+       jlbc    r4,L1
+       clrl    r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl    (r8)+,r1
+       jlss    L1n0
+       emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    $0,r3
+       movl    r2,(r9)+
+L1:    movl    (r8)+,r1
+       jlss    L1n1
+L1p1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    $0,r11
+       movl    r10,(r9)+
+
+       sobgtr  r7,Loop1
+       movl    r11,r0
+       ret
+
+L1n0:  emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r6,r3
+       movl    r2,(r9)+
+       movl    (r8)+,r1
+       jgeq    L1p1
+L1n1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r6,r11
+       movl    r10,(r9)+
+
+       sobgtr  r7,Loop1
+       movl    r11,r0
+       ret
+
+
+s2_big:        clrl    r3
+       incl    r4
+       ashl    $-1,r4,r7
+       jlbc    r4,L2
+       clrl    r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl    (r8)+,r1
+       jlss    L2n0
+       emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r1,r3
+       movl    r2,(r9)+
+L2:    movl    (r8)+,r1
+       jlss    L2n1
+L2p1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r1,r11
+       movl    r10,(r9)+
+
+       sobgtr  r7,Loop2
+       movl    r11,r0
+       ret
+
+L2n0:  emul    r1,r6,$0,r2
+       addl2   r1,r3
+       addl2   r11,r2
+       adwc    r6,r3
+       movl    r2,(r9)+
+       movl    (r8)+,r1
+       jgeq    L2p1
+L2n1:  emul    r1,r6,$0,r10
+       addl2   r1,r11
+       addl2   r3,r10
+       adwc    r6,r11
+       movl    r10,(r9)+
+
+       sobgtr  r7,Loop2
+       movl    r11,r0
+       ret
diff --git a/mpn/vax/rshift.s b/mpn/vax/rshift.s

new file mode 100644 (file)

index 0000000..ae27208
--- /dev/null
+++ b/mpn/vax/rshift.s
@@ -0,0 +1,54 @@
+# VAX mpn_rshift -- right shift.
+
+# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# rptr         (sp + 4)
+# sptr         (sp + 8)
+# size         (sp + 12)
+# cnt          (sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp      call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11            call-saved registers
+
+.text
+       .align 1
+.globl ___gmpn_rshift
+___gmpn_rshift:
+       .word   0x1c0
+       movl    4(ap),r7
+       movl    8(ap),r6
+       movl    12(ap),r1
+       movl    16(ap),r8
+
+       movl    (r6)+,r2
+       subl3   r8,$32,r8
+       ashl    r8,r2,r0
+       decl    r1
+       jeql    Lend
+
+Loop:  movl    (r6)+,r3
+       ashq    r8,r2,r4
+       movl    r5,(r7)+
+       movl    r3,r2
+       sobgtr  r1,Loop
+
+Lend:  clrl    r3
+       ashq    r8,r2,r4
+       movl    r5,(r7)
+       ret
diff --git a/mpn/vax/sub_n.s b/mpn/vax/sub_n.s

new file mode 100644 (file)

index 0000000..c9ad1ec
--- /dev/null
+++ b/mpn/vax/sub_n.s
@@ -0,0 +1,59 @@
+# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+# difference in a third limb vector.
+
+# Copyright 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr      (sp + 4)
+# s1_ptr       (sp + 8)
+# s2_ptr       (sp + 12)
+# size         (sp + 16)
+
+.text
+       .align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+       .word   0x0
+       movl    16(ap),r0
+       movl    12(ap),r1
+       movl    8(ap),r2
+       movl    4(ap),r3
+       mnegl   r0,r5
+       addl2   $3,r0
+       ashl    $-2,r0,r0       # unroll loop count
+       bicl2   $-4,r5          # mask out low 2 bits
+       movaq   (r5)[r5],r5     # 9x
+       jmp     Loop(r5)
+
+Loop:  movl    (r2)+,r4
+       sbwc    (r1)+,r4
+       movl    r4,(r3)+
+       movl    (r2)+,r4
+       sbwc    (r1)+,r4
+       movl    r4,(r3)+
+       movl    (r2)+,r4
+       sbwc    (r1)+,r4
+       movl    r4,(r3)+
+       movl    (r2)+,r4
+       sbwc    (r1)+,r4
+       movl    r4,(r3)+
+       sobgtr  r0,Loop
+
+       adwc    r0,r0
+       ret
diff --git a/mpn/vax/submul_1.s b/mpn/vax/submul_1.s

new file mode 100644 (file)

index 0000000..ad0ddbb
--- /dev/null
+++ b/mpn/vax/submul_1.s
@@ -0,0 +1,124 @@
+# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr      (sp + 4)
+# s1_ptr       (sp + 8)
+# size         (sp + 12)
+# s2_limb      (sp + 16)
+
+.text
+       .align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+       .word   0xfc0
+       movl    12(ap),r4
+       movl    8(ap),r8
+       movl    4(ap),r9
+       movl    16(ap),r6
+       jlss    s2_big
+
+       clrl    r3
+       incl    r4
+       ashl    $-1,r4,r7
+       jlbc    r4,L1
+       clrl    r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl    (r8)+,r1
+       jlss    L1n0
+       emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    $0,r3
+       subl2   r2,(r9)+
+       adwc    $0,r3
+L1:    movl    (r8)+,r1
+       jlss    L1n1
+L1p1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    $0,r11
+       subl2   r10,(r9)+
+       adwc    $0,r11
+
+       sobgtr  r7,Loop1
+       movl    r11,r0
+       ret
+
+L1n0:  emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r6,r3
+       subl2   r2,(r9)+
+       adwc    $0,r3
+       movl    (r8)+,r1
+       jgeq    L1p1
+L1n1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r6,r11
+       subl2   r10,(r9)+
+       adwc    $0,r11
+
+       sobgtr  r7,Loop1
+       movl    r11,r0
+       ret
+
+
+s2_big:        clrl    r3
+       incl    r4
+       ashl    $-1,r4,r7
+       jlbc    r4,L2
+       clrl    r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl    (r8)+,r1
+       jlss    L2n0
+       emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r1,r3
+       subl2   r2,(r9)+
+       adwc    $0,r3
+L2:    movl    (r8)+,r1
+       jlss    L2n1
+L2p1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r1,r11
+       subl2   r10,(r9)+
+       adwc    $0,r11
+
+       sobgtr  r7,Loop2
+       movl    r11,r0
+       ret
+
+L2n0:  emul    r1,r6,$0,r2
+       addl2   r11,r2
+       adwc    r6,r3
+       subl2   r2,(r9)+
+       adwc    r1,r3
+       movl    (r8)+,r1
+       jgeq    L2p1
+L2n1:  emul    r1,r6,$0,r10
+       addl2   r3,r10
+       adwc    r6,r11
+       subl2   r10,(r9)+
+       adwc    r1,r11
+
+       sobgtr  r7,Loop2
+       movl    r11,r0
+       ret
diff --git a/mpn/x86/README b/mpn/x86/README

new file mode 100644 (file)

index 0000000..883db22
--- /dev/null
+++ b/mpn/x86/README
@@ -0,0 +1,514 @@
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                      X86 MPN SUBROUTINES
+
+
+This directory contains mpn functions for various 80x86 chips.
+
+
+CODE ORGANIZATION
+
+       x86               i386, generic
+       x86/i486          i486
+       x86/pentium       Intel Pentium (P5, P54)
+       x86/pentium/mmx   Intel Pentium with MMX (P55)
+       x86/p6            Intel Pentium Pro
+       x86/p6/mmx        Intel Pentium II, III
+       x86/p6/p3mmx      Intel Pentium III
+       x86/k6            \ AMD K6
+       x86/k6/mmx        /
+       x86/k6/k62mmx     AMD K6-2
+       x86/k7            \ AMD Athlon
+       x86/k7/mmx        /
+       x86/pentium4      \
+       x86/pentium4/mmx  | Intel Pentium 4
+       x86/pentium4/sse2 /
+
+
+The top-level x86 directory contains blended style code, meant to be
+reasonable on all x86s.
+
+
+
+STATUS
+
+The code is well-optimized for AMD and Intel chips, but there's nothing
+specific for Cyrix chips, nor for actual 80386 and 80486 chips.
+
+
+
+ASM FILES
+
+The x86 .asm files are BSD style assembler code, first put through m4 for
+macro processing.  The generic mpn/asm-defs.m4 is used, together with
+mpn/x86/x86-defs.m4.  See comments in those files.
+
+The code is meant for use with GNU "gas" or a system "as".  There's no
+support for assemblers that demand Intel style code.
+
+
+
+STACK FRAME
+
+m4 macros are used to define the parameters passed on the stack, and these
+act like comments on what the stack frame looks like too.  For example,
+mpn_mul_1() has the following.
+
+        defframe(PARAM_MULTIPLIER, 16)
+        defframe(PARAM_SIZE,       12)
+        defframe(PARAM_SRC,         8)
+        defframe(PARAM_DST,         4)
+
+PARAM_MULTIPLIER becomes `FRAME+16(%esp)', and the others similarly.  The
+return address is at offset 0, but there's not normally any need to access
+that.
+
+FRAME is redefined as necessary through the code so it's the number of bytes
+pushed on the stack, and hence the offsets in the parameter macros stay
+correct.  At the start of a routine FRAME should be zero.
+
+        deflit(`FRAME',0)
+       ...
+       deflit(`FRAME',4)
+       ...
+       deflit(`FRAME',8)
+       ...
+
+Helper macros FRAME_pushl(), FRAME_popl(), FRAME_addl_esp() and
+FRAME_subl_esp() exist to adjust FRAME for the effect of those instructions,
+and can be used instead of explicit definitions if preferred.
+defframe_pushl() is a combination FRAME_pushl() and defframe().
+
+There's generally some slackness in redefining FRAME.  If new values aren't
+going to get used then the redefinitions are omitted to keep from cluttering
+up the code.  This happens for instance at the end of a routine, where there
+might be just four pops and then a ret, so FRAME isn't getting used.
+
+Local variables and saved registers can be similarly defined, with negative
+offsets representing stack space below the initial stack pointer.  For
+example,
+
+       defframe(SAVE_ESI,   -4)
+       defframe(SAVE_EDI,   -8)
+       defframe(VAR_COUNTER,-12)
+
+       deflit(STACK_SPACE, 12)
+
+Here STACK_SPACE gets used in a "subl $STACK_SPACE, %esp" to allocate the
+space, and that instruction must be followed by a redefinition of FRAME
+(setting it equal to STACK_SPACE) to reflect the change in %esp.
+
+Definitions for pushed registers are only put in when they're going to be
+used.  If registers are just saved and restored with pushes and pops then
+definitions aren't made.
+
+
+
+ASSEMBLER EXPRESSIONS
+
+Only addition and subtraction seem to be universally available, certainly
+that's all the Solaris 8 "as" seems to accept.  If expressions are wanted
+then m4 eval() should be used.
+
+In particular note that a "/" anywhere in a line starts a comment in Solaris
+"as", and in some configurations of gas too.
+
+       addl    $32/2, %eax           <-- wrong
+
+       addl    $eval(32/2), %eax     <-- right
+
+Binutils gas/config/tc-i386.c has a choice between "/" being a comment
+anywhere in a line, or only at the start.  FreeBSD patches 2.9.1 to select
+the latter, and from 2.9.5 it's the default for GNU/Linux too.
+
+
+
+ASSEMBLER COMMENTS
+
+Solaris "as" doesn't support "#" commenting, using /* */ instead.  For that
+reason "C" commenting is used (see asm-defs.m4) and the intermediate ".s"
+files have no comments.
+
+Any comments before include(`../config.m4') must use m4 "dnl", since it's
+only after the include that "C" is available.  By convention "dnl" is also
+used for comments about m4 macros.
+
+
+
+TEMPORARY LABELS
+
+Temporary numbered labels like "1:" used as "1f" or "1b" are available in
+"gas" and Solaris "as", but not in SCO "as".  Normal L() labels should be
+used instead, possibly with a counter to make them unique, see jadcl0() in
+x86-defs.m4 for instance.  A separate counter for each macro makes it
+possible to nest them, for instance movl_text_address() can be used within
+an ASSERT().
+
+"1:" etc must be avoided in gcc __asm__ blocks too.  "%=" for generating a
+unique number looks like a good alternative, but is that actually a
+documented feature?  In any case this problem doesn't currently arise.
+
+
+
+ZERO DISPLACEMENTS
+
+In a couple of places addressing modes like 0(%ebx) with a byte-sized zero
+displacement are wanted, rather than (%ebx) with no displacement.  These are
+either for computed jumps or to get desirable code alignment.  Explicit
+.byte sequences are used to ensure the assembler doesn't turn 0(%ebx) into
+(%ebx).  The Zdisp() macro in x86-defs.m4 is used for this.
+
+Current gas 2.9.5 or recent 2.9.1 leave 0(%ebx) as written, but old gas
+1.92.3 changes it.  In general changing would be the sort of "optimization"
+an assembler might perform, hence explicit ".byte"s are used where
+necessary.
+
+
+
+SHLD/SHRD INSTRUCTIONS
+
+The %cl count forms of double shift instructions like "shldl %cl,%eax,%ebx"
+must be written "shldl %eax,%ebx" for some assemblers.  gas takes either,
+Solaris "as" doesn't allow %cl, gcc generates %cl for gas and NeXT (which is
+gas), and omits %cl elsewhere.
+
+For GMP an autoconf test GMP_ASM_X86_SHLDL_CL is used to determine whether
+%cl should be used, and the macros shldl, shrdl, shldw and shrdw in
+mpn/x86/x86-defs.m4 pass through or omit %cl as necessary.  See the comments
+with those macros for usage.
+
+
+
+IMUL INSTRUCTION
+
+GCC config/i386/i386.md (cvs rev 1.187, 21 Oct 00) under *mulsi3_1 notes
+that the following two forms produce identical object code
+
+       imul    $12, %eax
+       imul    $12, %eax, %eax
+
+but that the former isn't accepted by some assemblers, in particular the SCO
+OSR5 COFF assembler.  GMP follows GCC and uses only the latter form.
+
+(This applies only to immediate operands, the three operand form is only
+valid with an immediate.)
+
+
+
+DIRECTION FLAG
+
+The x86 calling conventions say that the direction flag should be clear at
+function entry and exit.  (See iBCS2 and SVR4 ABI books, references below.)
+Although this has been so since the year dot, it's not absolutely clear
+whether it's universally respected.  Since it's better to be safe than
+sorry, GMP follows glibc and does a "cld" if it depends on the direction
+flag being clear.  This happens only in a few places.
+
+
+
+POSITION INDEPENDENT CODE
+
+  Coding Style
+
+    Defining the symbol PIC in m4 processing selects SVR4 / ELF style
+    position independent code.  This is necessary for shared libraries
+    because they can be mapped into different processes at different virtual
+    addresses.  Actually, relocations are allowed but text pages with
+    relocations aren't shared, defeating the purpose of a shared library.
+
+    The GOT is used to access global data, and the PLT is used for
+    functions.  The use of the PLT adds a fixed cost to every function call,
+    and the GOT adds a cost to any function accessing global variables.
+    These are small but might be noticeable when working with small
+    operands.
+
+  Scope
+
+    It's intended, as a matter of policy, that references within libgmp are
+    resolved within libgmp.  Certainly there's no need for an application to
+    replace any internals, and we take the view that there's no value in an
+    application subverting anything documented either.
+
+    Resolving references within libgmp in theory means calls can be made with a
+    plain PC-relative call instruction, which is faster and smaller than going
+    through the PLT, and data references can be similarly PC-relative, saving a
+    GOT entry and fetch from there.  Unfortunately the normal linker behaviour
+    doesn't allow us to do this.
+
+    By default an R_386_PC32 PC-relative reference, either for a call or for
+    data, is left in libgmp.so by the linker so that it can be resolved at
+    runtime to a location in the application or another shared library.  This
+    means a text segment relocation which we don't want.
+
+  -Bsymbolic
+
+    Under the "-Bsymbolic" option, the linker resolves references to symbols
+    within libgmp.so.  This gives us the desired effect for R_386_PC32,
+    ie. it's resolved at link time.  It also resolves R_386_PLT32 calls
+    directly to their target without creating a PLT entry (though if this is
+    done to normal compiler-generated code it still leaves a setup of %ebx
+    to _GLOBAL_OFFSET_TABLE_ which may then be unnecessary).
+
+    Unfortunately -Bsymbolic does bad things to global variables defined in
+    a shared library but accessed by non-PIC code from the mainline (or a
+    static library).
+
+    The problem is that the mainline needs a fixed data address to avoid
+    text segment relocations, so space is allocated in its data segment and
+    the value from the variable is copied from the shared library's data
+    segment when the library is loaded.  Under -Bsymbolic, however,
+    references in the shared library are then resolved still to the shared
+    library data area.  Not surprisingly it bombs badly to have mainline
+    code and library code accessing different locations for what should be
+    one variable.
+
+    Note that this -Bsymbolic effect for the shared library is not just for
+    R_386_PC32 offsets which might have been cooked up in assembler, but is
+    done also for the contents of GOT entries.  -Bsymbolic simply applies a
+    general rule that symbols are resolved first from the local module.
+
+  Visibility Attributes
+
+    GCC __attribute__ ((visibility ("protected"))), which is available in
+    recent versions, eg. 3.3, is probably what we'd like to use.  It makes
+    gcc generate plain PC-relative calls to indicated functions, and directs
+    the linker to resolve references to the given function within the link
+    module.
+
+    Unfortunately, as of debian binutils 2.13.90.0.16 at least, the
+    resulting libgmp.so comes out with text segment relocations, references
+    are not resolved at link time.  If the gcc description is to be believed
+    this is this not how it should work.  If a symbol cannot be overridden
+    by another module then surely references within that module can be
+    resolved immediately (ie. at link time).
+
+  Present
+
+    In any case, all this means that we have no optimizations we can
+    usefully make to function or variable usages, neither for assembler nor
+    C code.  Perhaps in the future the visibility attribute will work as
+    we'd like.
+
+
+
+
+GLOBAL OFFSET TABLE
+
+The magic _GLOBAL_OFFSET_TABLE_ used by code establishing the address of the
+GOT sometimes requires an extra underscore prefix.  SVR4 systems and NetBSD
+don't need a prefix, OpenBSD does need one.  Note that NetBSD and OpenBSD
+are both a.out underscore systems, so the prefix for _GLOBAL_OFFSET_TABLE_
+is not simply the same as the prefix for ordinary globals.
+
+In any case in the asm code we write _GLOBAL_OFFSET_TABLE_ and let a macro
+in x86-defs.m4 add an extra underscore if required (according to a configure
+test).
+
+Old gas 1.92.3 which comes with FreeBSD 2.2.8 gets a segmentation fault when
+asked to assemble the following,
+
+        L1:
+            addl  $_GLOBAL_OFFSET_TABLE_+[.-L1], %ebx
+
+It seems that using the label in the same instruction it refers to is the
+problem, since a nop in between works.  But the simplest workaround is to
+follow gcc and omit the +[.-L1] since it does nothing,
+
+            addl  $_GLOBAL_OFFSET_TABLE_, %ebx
+
+Current gas 2.10 generates incorrect object code when %eax is used in such a
+construction (with or without +[.-L1]),
+
+            addl  $_GLOBAL_OFFSET_TABLE_, %eax
+
+The R_386_GOTPC gets a displacement of 2 rather than the 1 appropriate for
+the 1 byte opcode of "addl $n,%eax".  The best workaround is just to use any
+other register, since then it's a two byte opcode+mod/rm.  GCC for example
+always uses %ebx (which is needed for calls through the PLT).
+
+A similar problem occurs in an leal (again with or without a +[.-L1]),
+
+            leal  _GLOBAL_OFFSET_TABLE_(%edi), %ebx
+
+This time the R_386_GOTPC gets a displacement of 0 rather than the 2
+appropriate for the opcode and mod/rm, making this form unusable.
+
+
+
+
+SIMPLE LOOPS
+
+The overheads in setting up for an unrolled loop can mean that at small
+sizes a simple loop is faster.  Making small sizes go fast is important,
+even if it adds a cycle or two to bigger sizes.  To this end various
+routines choose between a simple loop and an unrolled loop according to
+operand size.  The path to the simple loop, or to special case code for
+small sizes, is always as fast as possible.
+
+Adding a simple loop requires a conditional jump to choose between the
+simple and unrolled code.  The size of a branch misprediction penalty
+affects whether a simple loop is worthwhile.
+
+The convention is for an m4 definition UNROLL_THRESHOLD to set the crossover
+point, with sizes < UNROLL_THRESHOLD using the simple loop, sizes >=
+UNROLL_THRESHOLD using the unrolled loop.  If position independent code adds
+a couple of cycles to an unrolled loop setup, the threshold will vary with
+PIC or non-PIC.  Something like the following is typical.
+
+       deflit(UNROLL_THRESHOLD, ifdef(`PIC',10,8))
+
+There's no automated way to determine the threshold.  Setting it to a small
+value and then to a big value makes it possible to measure the simple and
+unrolled loops each over a range of sizes, from which the crossover point
+can be determined.  Alternately, just adjust the threshold up or down until
+there's no more speedups.
+
+
+
+UNROLLED LOOP CODING
+
+The x86 addressing modes allow a byte displacement of -128 to +127, making
+it possible to access 256 bytes, which is 64 limbs, without adjusting
+pointer registers within the loop.  Dword sized displacements can be used
+too, but they increase code size, and unrolling to 64 ought to be enough.
+
+When unrolling to the full 64 limbs/loop, the limb at the top of the loop
+will have a displacement of -128, so pointers have to have a corresponding
++128 added before entering the loop.  When unrolling to 32 limbs/loop
+displacements 0 to 127 can be used with 0 at the top of the loop and no
+adjustment needed to the pointers.
+
+Where 64 limbs/loop is supported, the +128 adjustment is done only when 64
+limbs/loop is selected.  Usually the gain in speed using 64 instead of 32 or
+16 is small, so support for 64 limbs/loop is generally only for comparison.
+
+
+
+COMPUTED JUMPS
+
+When working from least significant limb to most significant limb (most
+routines) the computed jump and pointer calculations in preparation for an
+unrolled loop are as follows.
+
+       S = operand size in limbs
+       N = number of limbs per loop (UNROLL_COUNT)
+       L = log2 of unrolling (UNROLL_LOG2)
+       M = mask for unrolling (UNROLL_MASK)
+       C = code bytes per limb in the loop
+       B = bytes per limb (4 for x86)
+
+       computed jump            (-S & M) * C + entrypoint
+       subtract from pointers   (-S & M) * B
+       initial loop counter     (S-1) >> L
+       displacements            0 to B*(N-1)
+
+The loop counter is decremented at the end of each loop, and the looping
+stops when the decrement takes the counter to -1.  The displacements are for
+the addressing accessing each limb, eg. a load with "movl disp(%ebx), %eax".
+
+Usually the multiply by "C" can be handled without an imul, using instead an
+leal, or a shift and subtract.
+
+When working from most significant to least significant limb (eg. mpn_lshift
+and mpn_copyd), the calculations change as follows.
+
+       add to pointers          (-S & M) * B
+       displacements            0 to -B*(N-1)
+
+
+
+OLD GAS 1.92.3
+
+This version comes with FreeBSD 2.2.8 and has a couple of gremlins that
+affect GMP code.
+
+Firstly, an expression involving two forward references to labels comes out
+as zero.  For example,
+
+               addl    $bar-foo, %eax
+       foo:
+               nop
+       bar:
+
+This should lead to "addl $1, %eax", but it comes out as "addl $0, %eax".
+When only one forward reference is involved, it works correctly, as for
+example,
+
+       foo:
+               addl    $bar-foo, %eax
+               nop
+       bar:
+
+Secondly, an expression involving two labels can't be used as the
+displacement for an leal.  For example,
+
+       foo:
+               nop
+       bar:
+               leal    bar-foo(%eax,%ebx,8), %ecx
+
+A slightly cryptic error is given, "Unimplemented segment type 0 in
+parse_operand".  When only one label is used it's ok, and the label can be a
+forward reference too, as for example,
+
+               leal    foo(%eax,%ebx,8), %ecx
+               nop
+       foo:
+
+These problems only affect PIC computed jump calculations.  The workarounds
+are just to do an leal without a displacement and then an addl, and to make
+sure the code is placed so that there's at most one forward reference in the
+addl.
+
+
+
+REFERENCES
+
+"Intel Architecture Software Developer's Manual", volumes 1, 2a, 2b, 3a, 3b,
+2006, order numbers 253665 through 253669.  Available on-line,
+
+       ftp://download.intel.com/design/Pentium4/manuals/25366518.pdf
+       ftp://download.intel.com/design/Pentium4/manuals/25366618.pdf
+       ftp://download.intel.com/design/Pentium4/manuals/25366718.pdf
+       ftp://download.intel.com/design/Pentium4/manuals/25366818.pdf
+       ftp://download.intel.com/design/Pentium4/manuals/25366918.pdf
+
+
+"System V Application Binary Interface", Unix System Laboratories Inc, 1992,
+published by Prentice Hall, ISBN 0-13-880410-9.  And the "Intel386 Processor
+Supplement", AT&T, 1991, ISBN 0-13-877689-X.  These have details of calling
+conventions and ELF shared library PIC coding.  Versions of both available
+on-line,
+
+       http://www.sco.com/developer/devspecs
+
+"Intel386 Family Binary Compatibility Specification 2", Intel Corporation,
+published by McGraw-Hill, 1991, ISBN 0-07-031219-2.  (Same as the above 386
+ABI supplement.)
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/x86/aors_n.asm b/mpn/x86/aors_n.asm

new file mode 100644 (file)

index 0000000..c896999
--- /dev/null
+++ b/mpn/x86/aors_n.asm
@@ -0,0 +1,192 @@
+dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb
+C P5:   3.375
+C P6:   3.125
+C K6:   3.5
+C K7:   2.25
+C P4:   8.75
+
+
+ifdef(`OPERATION_add_n',`
+       define(M4_inst,        adcl)
+       define(M4_function_n,  mpn_add_n)
+       define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+       define(M4_inst,        sbbl)
+       define(M4_function_n,  mpn_sub_n)
+       define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(M4_function_nc)
+deflit(`FRAME',0)
+
+       pushl   %edi            FRAME_pushl()
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC1,%esi
+       movl    PARAM_SRC2,%edx
+       movl    PARAM_SIZE,%ecx
+
+       movl    %ecx,%eax
+       shrl    $3,%ecx                 C compute count for unrolled loop
+       negl    %eax
+       andl    $7,%eax                 C get index where to start loop
+       jz      L(oopgo)                C necessary special case for 0
+       incl    %ecx                    C adjust loop count
+       shll    $2,%eax                 C adjustment for pointers...
+       subl    %eax,%edi               C ... since they are offset ...
+       subl    %eax,%esi               C ... by a constant when we ...
+       subl    %eax,%edx               C ... enter the loop
+       shrl    $2,%eax                 C restore previous value
+
+ifdef(`PIC',`
+       C Calculate start address in loop for PIC.  Due to limitations in
+       C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
+       call    L(0a)
+L(0a): leal    (%eax,%eax,8),%eax
+       addl    (%esp),%eax
+       addl    $L(oop)-L(0a)-3,%eax
+       addl    $4,%esp
+',`
+       C Calculate start address in loop for non-PIC.
+       leal    L(oop)-3(%eax,%eax,8),%eax
+')
+
+       C These lines initialize carry from the 5th parameter.  Should be
+       C possible to simplify.
+       pushl   %ebp            FRAME_pushl()
+       movl    PARAM_CARRY,%ebp
+       shrl    $1,%ebp                 C shift bit 0 into carry
+       popl    %ebp            FRAME_popl()
+
+       jmp     *%eax                   C jump into loop
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(M4_function_n)
+deflit(`FRAME',0)
+
+       pushl   %edi            FRAME_pushl()
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC1,%esi
+       movl    PARAM_SRC2,%edx
+       movl    PARAM_SIZE,%ecx
+
+       movl    %ecx,%eax
+       shrl    $3,%ecx                 C compute count for unrolled loop
+       negl    %eax
+       andl    $7,%eax                 C get index where to start loop
+       jz      L(oop)                  C necessary special case for 0
+       incl    %ecx                    C adjust loop count
+       shll    $2,%eax                 C adjustment for pointers...
+       subl    %eax,%edi               C ... since they are offset ...
+       subl    %eax,%esi               C ... by a constant when we ...
+       subl    %eax,%edx               C ... enter the loop
+       shrl    $2,%eax                 C restore previous value
+
+ifdef(`PIC',`
+       C Calculate start address in loop for PIC.  Due to limitations in
+       C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
+       call    L(0b)
+L(0b): leal    (%eax,%eax,8),%eax
+       addl    (%esp),%eax
+       addl    $L(oop)-L(0b)-3,%eax
+       addl    $4,%esp
+',`
+       C Calculate start address in loop for non-PIC.
+       leal    L(oop)-3(%eax,%eax,8),%eax
+')
+       jmp     *%eax                   C jump into loop
+
+L(oopgo):
+       pushl   %ebp            FRAME_pushl()
+       movl    PARAM_CARRY,%ebp
+       shrl    $1,%ebp                 C shift bit 0 into carry
+       popl    %ebp            FRAME_popl()
+
+       ALIGN(16)
+L(oop):        movl    (%esi),%eax
+       M4_inst (%edx),%eax
+       movl    %eax,(%edi)
+       movl    4(%esi),%eax
+       M4_inst 4(%edx),%eax
+       movl    %eax,4(%edi)
+       movl    8(%esi),%eax
+       M4_inst 8(%edx),%eax
+       movl    %eax,8(%edi)
+       movl    12(%esi),%eax
+       M4_inst 12(%edx),%eax
+       movl    %eax,12(%edi)
+       movl    16(%esi),%eax
+       M4_inst 16(%edx),%eax
+       movl    %eax,16(%edi)
+       movl    20(%esi),%eax
+       M4_inst 20(%edx),%eax
+       movl    %eax,20(%edi)
+       movl    24(%esi),%eax
+       M4_inst 24(%edx),%eax
+       movl    %eax,24(%edi)
+       movl    28(%esi),%eax
+       M4_inst 28(%edx),%eax
+       movl    %eax,28(%edi)
+       leal    32(%edi),%edi
+       leal    32(%esi),%esi
+       leal    32(%edx),%edx
+       decl    %ecx
+       jnz     L(oop)
+
+       sbbl    %eax,%eax
+       negl    %eax
+
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/aorsmul_1.asm b/mpn/x86/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..b4db427
--- /dev/null
+++ b/mpn/x86/aorsmul_1.asm
@@ -0,0 +1,145 @@
+dnl  x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
+dnl  limb and add the result to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:                           14.75
+C P6 model 0-8,10-12)            7.5
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           6.75
+C P4 model 0  (Willamette)      24.0
+C P4 model 1  (?)               24.0
+C P4 model 2  (Northwood)       24.0
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:                           12.5
+C K7:                            5.25
+C K8:
+
+
+ifdef(`OPERATION_addmul_1',`
+      define(M4_inst,        addl)
+      define(M4_function_1,  mpn_addmul_1)
+
+',`ifdef(`OPERATION_submul_1',`
+      define(M4_inst,        subl)
+      define(M4_function_1,  mpn_submul_1)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult);
+
+define(PARAM_MULTIPLIER, `FRAME+16(%esp)')
+define(PARAM_SIZE,       `FRAME+12(%esp)')
+define(PARAM_SRC,        `FRAME+8(%esp)')
+define(PARAM_DST,        `FRAME+4(%esp)')
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(M4_function_1)
+deflit(`FRAME',0)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       pushl   %ebp
+deflit(`FRAME',16)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC,%esi
+       movl    PARAM_SIZE,%ecx
+
+       xorl    %ebx,%ebx
+       andl    $3,%ecx
+       jz      L(end0)
+
+L(oop0):
+       movl    (%esi),%eax
+       mull    PARAM_MULTIPLIER
+       leal    4(%esi),%esi
+       addl    %ebx,%eax
+       movl    $0,%ebx
+       adcl    %ebx,%edx
+       M4_inst %eax,(%edi)
+       adcl    %edx,%ebx       C propagate carry into cylimb
+
+       leal    4(%edi),%edi
+       decl    %ecx
+       jnz     L(oop0)
+
+L(end0):
+       movl    PARAM_SIZE,%ecx
+       shrl    $2,%ecx
+       jz      L(end)
+
+       ALIGN(8)
+L(oop):        movl    (%esi),%eax
+       mull    PARAM_MULTIPLIER
+       addl    %eax,%ebx
+       movl    $0,%ebp
+       adcl    %edx,%ebp
+
+       movl    4(%esi),%eax
+       mull    PARAM_MULTIPLIER
+       M4_inst %ebx,(%edi)
+       adcl    %eax,%ebp       C new lo + cylimb
+       movl    $0,%ebx
+       adcl    %edx,%ebx
+
+       movl    8(%esi),%eax
+       mull    PARAM_MULTIPLIER
+       M4_inst %ebp,4(%edi)
+       adcl    %eax,%ebx       C new lo + cylimb
+       movl    $0,%ebp
+       adcl    %edx,%ebp
+
+       movl    12(%esi),%eax
+       mull    PARAM_MULTIPLIER
+       M4_inst %ebx,8(%edi)
+       adcl    %eax,%ebp       C new lo + cylimb
+       movl    $0,%ebx
+       adcl    %edx,%ebx
+
+       M4_inst %ebp,12(%edi)
+       adcl    $0,%ebx         C propagate carry into cylimb
+
+       leal    16(%esi),%esi
+       leal    16(%edi),%edi
+       decl    %ecx
+       jnz     L(oop)
+
+L(end):        movl    %ebx,%eax
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/atom/gmp-mparam.h b/mpn/x86/atom/gmp-mparam.h

new file mode 100644 (file)

index 0000000..daadd41
--- /dev/null
+++ b/mpn/x86/atom/gmp-mparam.h
@@ -0,0 +1,179 @@
+/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c */
+
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               9
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     31
+#define USE_PREINV_DIVREM_1                  1
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD          102
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                66
+#define MUL_TOOM44_THRESHOLD               171
+#define MUL_TOOM6H_THRESHOLD               258
+#define MUL_TOOM8H_THRESHOLD               357
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     113
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     129
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 16
+#define SQR_TOOM3_THRESHOLD                113
+#define SQR_TOOM4_THRESHOLD                193
+#define SQR_TOOM6_THRESHOLD                254
+#define SQR_TOOM8_THRESHOLD                381
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               11
+
+#define MUL_FFT_MODF_THRESHOLD             332  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    332, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     11, 5}, {     23, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     25, 7}, {     15, 6}, \
+    {     31, 7}, {     19, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 7}, {     71, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     55,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95, 9}, {    191, 8}, \
+    {    383,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
+    {    607,10}, {    159, 9}, {    319,11}, {     95,10}, \
+    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
+    {   1087,10}, {    287, 9}, {    607,11}, {    159,10}, \
+    {    351, 9}, {    703, 8}, {   1407,11}, {    191,10}, \
+    {    415, 9}, {    831,11}, {    223,10}, {    479, 9}, \
+    {    959,12}, {    127,11}, {    255,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
+    {    351,10}, {    703, 9}, {   1407,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    415,10}, {    831,11}, \
+    {    479,10}, {    959,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    703,10}, {   1407,11}, {    735,10}, \
+    {   1471,12}, {    383,11}, {    831,12}, {    447,11}, \
+    {    959,10}, {   1919,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
+    {    703,11}, {   1471,13}, {    383,12}, {    959,11}, \
+    {   1919,14}, {    255,13}, {    511,12}, {   1215,11}, \
+    {   2431,13}, {    639,12}, {   1471,11}, {   2943,10}, \
+    {   5887,13}, {    767,12}, {   1599,13}, {    895,12}, \
+    {   1919,11}, {   3839,14}, {    511,13}, {   1023,12}, \
+    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,12}, \
+    {   2943,11}, {   5887,14}, {    767,13}, {   1919,12}, \
+    {   3839,15}, {    511,14}, {   1023,13}, {   2431,14}, \
+    {   1279,13}, {   2943,12}, {   5887,14}, {   1535,13}, \
+    {   3199,14}, {   1791,13}, {   3839,12}, {   7679,15}, \
+    {   1023,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 163
+#define MUL_FFT_THRESHOLD                 3456
+
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    308, 5}, {     13, 6}, {      7, 5}, {     17, 6}, \
+    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
+    {     24, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     15, 7}, {     31, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 4}, {    607, 5}, \
+    {    319, 7}, {     95, 8}, {     55, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
+    {     31, 9}, {     79,10}, {     47,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 7}, {    511,10}, \
+    {     79, 9}, {    159, 8}, {    319,10}, {     95, 9}, \
+    {    191, 8}, {    383,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    543, 7}, {   1087, 9}, {    287, 8}, \
+    {    607,10}, {    159, 9}, {    319,11}, {     95,10}, \
+    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    543, 8}, {   1087,10}, {    287, 9}, \
+    {    607,11}, {    159,10}, {    351, 9}, {    703, 8}, \
+    {   1407, 9}, {    735,11}, {    191,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    479, 9}, {    959, 8}, \
+    {   1919,12}, {    127,11}, {    255,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
+    {    351,10}, {    703, 9}, {   1407,12}, {    191,11}, \
+    {    415,10}, {    831,11}, {    479,10}, {    959, 9}, \
+    {   1919,13}, {    127,12}, {    255,11}, {    543,10}, \
+    {   1087,11}, {    607,10}, {   1215,12}, {    319,11}, \
+    {    703,10}, {   1407,11}, {    735,12}, {    383,11}, \
+    {    831,12}, {    447,11}, {    959,10}, {   1919, 9}, \
+    {   3839,13}, {    255,12}, {    511,11}, {   1087,12}, \
+    {    575,11}, {   1215,10}, {   2431,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    959,11}, {   1919,10}, \
+    {   3839,14}, {    255,13}, {    511,12}, {   1215,11}, \
+    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
+    {    767,12}, {   1599,13}, {    895,12}, {   1919,11}, \
+    {   3839,14}, {    511,13}, {   1151,12}, {   2431,13}, \
+    {   1407,12}, {   2943,14}, {    767,13}, {   1919,12}, \
+    {   3839,15}, {    511,14}, {   1023,13}, {   2431,14}, \
+    {   1279,13}, {   2943,14}, {   1791,13}, {   3839,15}, \
+    {   1023,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 155
+#define SQR_FFT_THRESHOLD                 2368
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  56
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 59
+#define DC_DIVAPPR_Q_THRESHOLD             216
+#define DC_BDIV_QR_THRESHOLD                56
+#define DC_BDIV_Q_THRESHOLD                136
+
+#define INV_MULMOD_BNM1_THRESHOLD           30
+#define INV_NEWTON_THRESHOLD               260
+#define INV_APPR_THRESHOLD                 244
+
+#define BINV_NEWTON_THRESHOLD              266
+#define REDC_1_TO_REDC_N_THRESHOLD          62
+
+#define MU_DIV_QR_THRESHOLD               1308
+#define MU_DIVAPPR_Q_THRESHOLD            1334
+#define MUPI_DIV_QR_THRESHOLD              130
+#define MU_BDIV_QR_THRESHOLD              1017
+#define MU_BDIV_Q_THRESHOLD               1308
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     111
+#define GCD_DC_THRESHOLD                   606
+#define GCDEXT_DC_THRESHOLD                273
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        26
+#define SET_STR_DC_THRESHOLD               270
+#define SET_STR_PRECOMPUTE_THRESHOLD       860
diff --git a/mpn/x86/bdiv_dbm1c.asm b/mpn/x86/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..dbee28f
--- /dev/null
+++ b/mpn/x86/bdiv_dbm1c.asm
@@ -0,0 +1,112 @@
+dnl  x86 mpn_bdiv_dbm1.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C          cycles/limb
+C K7:           3.5
+C P4 m0:         ?
+C P4 m1:         ?
+C P4 m2:       13.67
+C P4 m3:         ?
+C P4 m4:         ?
+C P6-13:        5.1
+
+C TODO
+C  * Optimize for more x86 processors
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+       mov     16(%esp), %ecx          C d
+       push    %esi
+       mov     12(%esp), %esi          C ap
+       push    %edi
+       mov     12(%esp), %edi          C qp
+       push    %ebp
+       mov     24(%esp), %ebp          C n
+       push    %ebx
+
+       mov     (%esi), %eax
+       mul     %ecx
+       mov     36(%esp), %ebx
+       sub     %eax, %ebx
+       mov     %ebx, (%edi)
+       sbb     %edx, %ebx
+
+       mov     %ebp, %eax
+       and     $3, %eax
+       jz      L(b0)
+       cmp     $2, %eax
+       jc      L(b1)
+       jz      L(b2)
+       jmp     L(b3)
+
+L(b0): mov     4(%esi), %eax
+       lea     -4(%esi), %esi
+       lea     12(%edi), %edi
+       add     $-4, %ebp
+       jmp     L(0)
+L(b3):
+       lea     -8(%esi), %esi
+       lea     8(%edi), %edi
+       add     $-3, %ebp
+       jmp     L(3)
+
+L(b2): mov     4(%esi), %eax
+       lea     4(%esi), %esi
+       lea     4(%edi), %edi
+       add     $-2, %ebp
+       jmp     L(2)
+
+       ALIGN(8)
+L(top):
+       mov     4(%esi), %eax
+       mul     %ecx
+       lea     16(%edi), %edi
+       sub     %eax, %ebx
+       mov     8(%esi), %eax
+       mov     %ebx, -12(%edi)
+       sbb     %edx, %ebx
+L(0):  mul     %ecx
+       sub     %eax, %ebx
+       mov     %ebx, -8(%edi)
+       sbb     %edx, %ebx
+L(3):  mov     12(%esi), %eax
+       mul     %ecx
+       sub     %eax, %ebx
+       mov     %ebx, -4(%edi)
+       mov     16(%esi), %eax
+       lea     16(%esi), %esi
+       sbb     %edx, %ebx
+L(2):  mul     %ecx
+       sub     %eax, %ebx
+       mov     %ebx, 0(%edi)
+       sbb     %edx, %ebx
+L(b1): add     $-4, %ebp
+       jns     L(top)
+
+       mov     %ebx, %eax
+       pop     %ebx
+       pop     %ebp
+       pop     %edi
+       pop     %esi
+       ret
+EPILOGUE()
diff --git a/mpn/x86/copyd.asm b/mpn/x86/copyd.asm

new file mode 100644 (file)

index 0000000..4ce3bbb
--- /dev/null
+++ b/mpn/x86/copyd.asm
@@ -0,0 +1,80 @@
+dnl  x86 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb  startup (approx)
+C P5:     1.0         40
+C P6      2.4         70
+C K6      1.0         55
+C K7:     1.3         75
+C P4:     2.6        175
+C
+C (Startup time includes some function call overheads.)
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from high to low addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_copyd)
+       C eax   saved esi
+       C ebx
+       C ecx   counter
+       C edx   saved edi
+       C esi   src
+       C edi   dst
+       C ebp
+
+       movl    PARAM_SIZE, %ecx
+       movl    %esi, %eax
+
+       movl    PARAM_SRC, %esi
+       movl    %edi, %edx
+
+       movl    PARAM_DST, %edi
+       leal    -4(%esi,%ecx,4), %esi
+
+       leal    -4(%edi,%ecx,4), %edi
+
+       std
+
+       rep
+       movsl
+
+       cld
+
+       movl    %eax, %esi
+       movl    %edx, %edi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/copyi.asm b/mpn/x86/copyi.asm

new file mode 100644 (file)

index 0000000..c6bbaee
--- /dev/null
+++ b/mpn/x86/copyi.asm
@@ -0,0 +1,88 @@
+dnl  x86 mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb  startup (approx)
+C P5:     1.0         35
+C P6      0.75        45
+C K6      1.0         30
+C K7:     1.3         65
+C P4:     1.0        120
+C
+C (Startup time includes some function call overheads.)
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from low to high addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+C
+C P6 -  An MMX based copy was tried, but was found to be slower than a rep
+C       movs in all cases.  The fastest MMX found was 0.8 cycles/limb (when
+C       fully aligned).  A rep movs seems to have a startup time of about 15
+C       cycles, but doing something special for small sizes could lead to a
+C       branch misprediction that would destroy any saving.  For now a plain
+C       rep movs seems ok.
+C
+C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
+C       aligned or a 1.0 rep movs if not.  But that seemed excessive since
+C       it only got an advantage half the time, and even then only showed it
+C       above 50 limbs or so.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+       TEXT
+       ALIGN(32)
+
+       C eax   saved esi
+       C ebx
+       C ecx   counter
+       C edx   saved edi
+       C esi   src
+       C edi   dst
+       C ebp
+
+PROLOGUE(mpn_copyi)
+
+       movl    PARAM_SIZE, %ecx
+       movl    %esi, %eax
+
+       movl    PARAM_SRC, %esi
+       movl    %edi, %edx
+
+       movl    PARAM_DST, %edi
+
+       cld     C better safe than sorry, see mpn/x86/README
+
+       rep
+       movsl
+
+       movl    %eax, %esi
+       movl    %edx, %edi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/darwin.m4 b/mpn/x86/darwin.m4

new file mode 100644 (file)

index 0000000..7ef8dfc
--- /dev/null
+++ b/mpn/x86/darwin.m4
@@ -0,0 +1,40 @@
+divert(-1)
+dnl  Copyright 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`DARWIN')
+
+dnl  Usage LEA(symbol,reg)
+dnl
+dnl  FIXME: Only handles one symbol per assembly file because of the
+dnl  way EPILOGUE_cpu is handled.
+
+define(`LEA',`
+define(`EPILOGUE_cpu',
+`      L(movl_eip_`'substr($2,1)):
+       movl    (%esp), $2
+       ret_internal
+       .section __IMPORT,__pointers,non_lazy_symbol_pointers
+L($1`'$non_lazy_ptr):
+       .indirect_symbol $1
+       .long    0
+')
+       call    L(movl_eip_`'substr($2,1))
+       movl    L($1`'$non_lazy_ptr)-.($2), $2
+')
+
+divert`'dnl
diff --git a/mpn/x86/dive_1.asm b/mpn/x86/dive_1.asm

new file mode 100644 (file)

index 0000000..cb927e5
--- /dev/null
+++ b/mpn/x86/dive_1.asm
@@ -0,0 +1,178 @@
+dnl  x86 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb
+C P54    30.0
+C P55    29.0
+C P6     13.0 odd divisor, 12.0 even (strangely)
+C K6     14.0
+C K7     12.0
+C P4     42.0
+
+
+C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       pushl   %ebp    FRAME_pushl()
+
+       movl    PARAM_SIZE, %ebp
+       pushl   %edi    FRAME_pushl()
+
+       pushl   %ebx    FRAME_pushl()
+       movl    $-1, %ecx               C shift count
+
+       pushl   %esi    FRAME_pushl()
+
+L(strip_twos):
+       incl    %ecx
+
+       shrl    %eax
+       jnc     L(strip_twos)
+
+       leal    1(%eax,%eax), %ebx      C d without twos
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edx)
+       movzbl  (%eax,%edx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       leal    (%eax,%eax), %edx       C 2*inv
+       movl    %ebx, PARAM_DIVISOR     C d without twos
+
+       imull   %eax, %eax              C inv*inv
+
+       movl    PARAM_SRC, %esi
+       movl    PARAM_DST, %edi
+
+       imull   %ebx, %eax              C inv*inv*d
+
+       subl    %eax, %edx              C inv = 2*inv - inv*inv*d
+       leal    (%edx,%edx), %eax       C 2*inv
+
+       imull   %edx, %edx              C inv*inv
+
+       leal    (%esi,%ebp,4), %esi     C src end
+       leal    (%edi,%ebp,4), %edi     C dst end
+       negl    %ebp                    C -size
+
+       imull   %ebx, %edx              C inv*inv*d
+
+       subl    %edx, %eax              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       movl    %eax, VAR_INVERSE
+       movl    (%esi,%ebp,4), %eax     C src[0]
+
+       xorl    %ebx, %ebx
+       xorl    %edx, %edx
+
+       incl    %ebp
+       jz      L(one)
+
+       movl    (%esi,%ebp,4), %edx     C src[1]
+
+       shrdl(  %cl, %edx, %eax)
+
+       movl    VAR_INVERSE, %edx
+       jmp     L(entry)
+
+
+       ALIGN(8)
+       nop     C k6 code alignment
+       nop
+L(top):
+       C eax   q
+       C ebx   carry bit, 0 or -1
+       C ecx   shift
+       C edx   carry limb
+       C esi   src end
+       C edi   dst end
+       C ebp   counter, limbs, negative
+
+       movl    -4(%esi,%ebp,4), %eax
+       subl    %ebx, %edx              C accumulate carry bit
+
+       movl    (%esi,%ebp,4), %ebx
+
+       shrdl(  %cl, %ebx, %eax)
+
+       subl    %edx, %eax              C apply carry limb
+       movl    VAR_INVERSE, %edx
+
+       sbbl    %ebx, %ebx
+
+L(entry):
+       imull   %edx, %eax
+
+       movl    %eax, -4(%edi,%ebp,4)
+       movl    PARAM_DIVISOR, %edx
+
+       mull    %edx
+
+       incl    %ebp
+       jnz     L(top)
+
+
+       movl    -4(%esi), %eax          C src high limb
+L(one):
+       shrl    %cl, %eax
+       popl    %esi    FRAME_popl()
+
+       addl    %ebx, %eax              C apply carry bit
+       popl    %ebx    FRAME_popl()
+
+       subl    %edx, %eax              C apply carry limb
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)
+
+       popl    %edi
+       popl    %ebp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/divrem_1.asm b/mpn/x86/divrem_1.asm

new file mode 100644 (file)

index 0000000..a5fb880
--- /dev/null
+++ b/mpn/x86/divrem_1.asm
@@ -0,0 +1,223 @@
+dnl  x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       cycles/limb
+C 486   approx 43 maybe
+C P5        44
+C P6        39
+C P6MMX     39
+C K6        22
+C K7        42
+C P4        58
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                         mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C                          mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                          mp_limb_t carry);
+C
+C Divide src,size by divisor and store the quotient in dst+xsize,size.
+C Extend the division to fractional quotient limbs in dst,xsize.  Return the
+C remainder.  Either or both xsize and size can be 0.
+C
+C mpn_divrem_1c takes a carry parameter which is an initial high limb,
+C effectively one extra limb at the top of src,size.  Must have
+C carry<divisor.
+C
+C
+C Essentially the code is the same as the division based part of
+C mpn/generic/divrem_1.c, but has the advantage that we get the desired divl
+C instruction even when gcc is not being used (when longlong.h only has the
+C rather slow generic C udiv_qrnnd().
+C
+C A test is done to see if the high limb is less than the divisor, and if so
+C one less div is done.  A div is between 20 and 40 cycles on the various
+C x86s, so assuming high<divisor about half the time, then this test saves
+C half that amount.  The branch misprediction penalty on each chip is less
+C than half a div.
+C
+C
+C Notes for P5:
+C
+C It might be thought that moving the load down to pair with the store would
+C save 1 cycle, but that doesn't seem to happen in practice, and in any case
+C would be a mere 2.2% saving, so it's hardly worth bothering about.
+C
+C A mul-by-inverse might be a possibility for P5, as done in
+C mpn/x86/pentium/mod_1.asm.  The number of auxiliary instructions required
+C is a hinderance, but there could be a 10-15% speedup available.
+C
+C
+C Notes for K6:
+C
+C K6 has its own version of this code, using loop and paying attention to
+C cache line boundary crossings.  The target 20 c/l can be had with the
+C decl+jnz of the present code by pairing up the load and store in the
+C loops.  But it's considered easier not to introduce complexity just for
+C that, but instead let k6 have its own code.
+C
+
+defframe(PARAM_CARRY,  24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_SRC,    12)
+defframe(PARAM_XSIZE,  8)
+defframe(PARAM_DST,    4)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %edi
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_DIVISOR, %esi
+       pushl   %ebx            FRAME_pushl()
+
+       movl    PARAM_DST, %ebx
+       pushl   %ebp            FRAME_pushl()
+
+       movl    PARAM_XSIZE, %ebp
+       orl     %ecx, %ecx
+
+       movl    PARAM_CARRY, %edx
+       jz      L(fraction)
+
+       leal    -4(%ebx,%ebp,4), %ebx   C dst one limb below integer part
+       jmp     L(integer_top)
+
+EPILOGUE()
+
+
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %edi
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_DIVISOR, %esi
+       orl     %ecx,%ecx
+
+       jz      L(size_zero)
+       pushl   %ebx            FRAME_pushl()
+
+       movl    -4(%edi,%ecx,4), %eax   C src high limb
+       xorl    %edx, %edx
+
+       movl    PARAM_DST, %ebx
+       pushl   %ebp            FRAME_pushl()
+
+       movl    PARAM_XSIZE, %ebp
+       cmpl    %esi, %eax
+
+       leal    -4(%ebx,%ebp,4), %ebx   C dst one limb below integer part
+       jae     L(integer_entry)
+
+
+       C high<divisor, so high of dst is zero, and avoid one div
+
+       movl    %edx, (%ebx,%ecx,4)
+       decl    %ecx
+
+       movl    %eax, %edx
+       jz      L(fraction)
+
+
+L(integer_top):
+       C eax   scratch (quotient)
+       C ebx   dst+4*xsize-4
+       C ecx   counter
+       C edx   scratch (remainder)
+       C esi   divisor
+       C edi   src
+       C ebp   xsize
+
+       movl    -4(%edi,%ecx,4), %eax
+L(integer_entry):
+
+       divl    %esi
+
+       movl    %eax, (%ebx,%ecx,4)
+       decl    %ecx
+       jnz     L(integer_top)
+
+
+L(fraction):
+       orl     %ebp, %ecx
+       jz      L(done)
+
+       movl    PARAM_DST, %ebx
+
+
+L(fraction_top):
+       C eax   scratch (quotient)
+       C ebx   dst
+       C ecx   counter
+       C edx   scratch (remainder)
+       C esi   divisor
+       C edi
+       C ebp
+
+       xorl    %eax, %eax
+
+       divl    %esi
+
+       movl    %eax, -4(%ebx,%ecx,4)
+       decl    %ecx
+       jnz     L(fraction_top)
+
+
+L(done):
+       popl    %ebp
+       movl    %edx, %eax
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+
+L(size_zero):
+deflit(`FRAME',8)
+       movl    PARAM_XSIZE, %ecx
+       xorl    %eax, %eax
+
+       movl    PARAM_DST, %edi
+
+       cld     C better safe than sorry, see mpn/x86/README
+
+       rep
+       stosl
+
+       popl    %esi
+       popl    %edi
+       ret
+EPILOGUE()
diff --git a/mpn/x86/divrem_2.asm b/mpn/x86/divrem_2.asm

new file mode 100644 (file)

index 0000000..2ccaae9
--- /dev/null
+++ b/mpn/x86/divrem_2.asm
@@ -0,0 +1,188 @@
+dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              norm    frac
+C 486
+C P5
+C P6-13                29.2
+C P6-15                *26
+C K6
+C K7           22
+C K8           *19
+C P4-f1
+C P4-f2                *65
+C P4-f3
+C P4-f4                *72
+
+C A star means numbers not updated for the latest version of the code.
+
+
+C TODO
+C  * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
+C  * The loop has not been carefully tuned.  We should at the very least do
+C    some local insn swapping.
+C  * The code outside the main loop is what gcc generated.  Clean up!
+C  * Clean up stack slot usage.
+
+C INPUT PARAMETERS
+C qp
+C fn
+C up_param
+C un_param
+C dp
+
+
+C eax ebx ecx edx esi edi ebp
+C         cnt         qp
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_divrem_2)
+       push    %ebp
+       push    %edi
+       push    %esi
+       push    %ebx
+       sub     $36, %esp
+       mov     68(%esp), %ecx          C un
+       mov     72(%esp), %esi          C dp
+       movl    $0, 32(%esp)
+       lea     0(,%ecx,4), %edi
+       add     64(%esp), %edi          C up
+       mov     (%esi), %ebx
+       mov     4(%esi), %eax
+       mov     %ebx, 20(%esp)
+       sub     $12, %edi
+       mov     %eax, 24(%esp)
+       mov     %edi, 12(%esp)
+       mov     8(%edi), %ebx
+       mov     4(%edi), %ebp
+       cmp     %eax, %ebx
+       jb      L(8)
+       seta    %dl
+       cmp     20(%esp), %ebp
+       setae   %al
+       orb     %dl, %al
+       jne     L(35)
+L(8):
+       mov     60(%esp), %esi          C fn
+       lea     -3(%esi,%ecx), %edi
+       test    %edi, %edi
+       js      L(9)
+       mov     24(%esp), %edx
+       mov     $-1, %esi
+       mov     %esi, %eax
+       mov     %esi, %ecx
+       not     %edx
+       divl    24(%esp)
+       mov     %eax, %esi
+       imul    24(%esp), %eax
+       mov     %eax, (%esp)
+       mov     %esi, %eax
+       mull    20(%esp)
+       mov     (%esp), %eax
+       add     20(%esp), %eax
+       adc     $0, %ecx
+       add     %eax, %edx
+       adc     $0, %ecx
+       mov     %ecx, %eax
+       js      L(32)
+L(36): dec     %esi
+       sub     24(%esp), %edx
+       sbb     $0, %eax
+       jns     L(36)
+L(32):
+       mov     %esi, 16(%esp)          C di
+       mov     %edi, %ecx              C un
+       mov     12(%esp), %esi          C up
+       mov     24(%esp), %eax
+       neg     %eax
+       mov     %eax, 4(%esp)           C -d1
+       ALIGN(16)
+       nop
+
+C eax ebx ecx edx esi edi ebp  0    4   8   12  16  20  24  28  32   56  60
+C     n2  un      up      n1   q0  -d1          di  d0  d1      msl  qp  fn
+
+L(loop):
+       mov     16(%esp), %eax          C di
+       mul     %ebx
+       add     %ebp, %eax
+       mov     %eax, (%esp)            C q0
+       adc     %ebx, %edx
+       mov     %edx, %edi              C q
+       imul    4(%esp), %edx
+       mov     20(%esp), %eax
+       lea     (%edx, %ebp), %ebx      C n1 -= ...
+       mul     %edi
+       xor     %ebp, %ebp
+       cmp     60(%esp), %ecx
+       jl      L(19)
+       mov     (%esi), %ebp
+       sub     $4, %esi
+L(19): sub     20(%esp), %ebp
+       sbb     24(%esp), %ebx
+       sub     %eax, %ebp
+       sbb     %edx, %ebx
+       mov     20(%esp), %eax          C d1
+       inc     %edi
+       xor     %edx, %edx
+       cmp     (%esp), %ebx
+       adc     $-1, %edx               C mask
+       add     %edx, %edi              C q--
+       and     %edx, %eax              C d0 or 0
+       and     24(%esp), %edx          C d1 or 0
+       add     %eax, %ebp
+       adc     %edx, %ebx
+       cmp     24(%esp), %ebx
+       jae     L(fix)
+L(bck):        mov     56(%esp), %edx
+       mov     %edi, (%edx, %ecx, 4)
+       dec     %ecx
+       jns     L(loop)
+
+L(9):  mov     64(%esp), %esi          C up
+       mov     %ebp, (%esi)
+       mov     %ebx, 4(%esi)
+       mov     32(%esp), %eax
+       add     $36, %esp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
+       ret
+
+L(fix):        seta    %dl
+       cmp     20(%esp), %ebp
+       setae   %al
+       orb     %dl, %al
+       je      L(bck)
+       inc     %edi
+       sub     20(%esp), %ebp
+       sbb     24(%esp), %ebx
+       jmp     L(bck)
+
+L(35): sub     20(%esp), %ebp
+       sbb     24(%esp), %ebx
+       movl    $1, 32(%esp)
+       jmp     L(8)
+EPILOGUE()
diff --git a/mpn/x86/fat/diveby3.c b/mpn/x86/fat/diveby3.c

new file mode 100644 (file)

index 0000000..7ea0161
--- /dev/null
+++ b/mpn/x86/fat/diveby3.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_divexact_by3c.
+
+Copyright 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/diveby3.c"
diff --git a/mpn/x86/fat/fat.c b/mpn/x86/fat/fat.c

new file mode 100644 (file)

index 0000000..8349afc
--- /dev/null
+++ b/mpn/x86/fat/fat.c
@@ -0,0 +1,317 @@
+/* x86 fat binary initializers.
+
+   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
+   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
+   COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 2003, 2004, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>    /* for printf */
+#include <stdlib.h>   /* for getenv */
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+/* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */
+#define WANT_FAKE_CPUID  0
+
+
+/* fat_entry.asm */
+long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
+int  __gmpn_cpuid_available __GMP_PROTO ((void));
+
+
+#if WANT_FAKE_CPUID
+/* The "name"s in the table are values for the GMP_CPU_TYPE environment
+   variable.  Anything can be used, but for now it's the canonical cpu types
+   as per config.guess/config.sub.  */
+
+#define __gmpn_cpuid            fake_cpuid
+#define __gmpn_cpuid_available  fake_cpuid_available
+
+#define MAKE_FMS(family, model)                                                \
+  ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)                        \
+   + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
+
+static struct {
+  const char  *name;
+  const char  vendor[13];
+  unsigned    fms;
+} fake_cpuid_table[] = {
+  { "i386",       "" },
+  { "i486",       "GenuineIntel", MAKE_FMS (4, 0) },
+  { "pentium",    "GenuineIntel", MAKE_FMS (5, 0) },
+  { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
+  { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
+  { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
+  { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
+  { "pentium4",   "GenuineIntel", MAKE_FMS (7, 0) },
+
+  { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
+  { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
+  { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
+  { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
+  { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
+  { "x86_64",     "AuthenticAMD", MAKE_FMS (15, 0) },
+
+  { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
+  { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
+};
+
+static int
+fake_cpuid_lookup (void)
+{
+  char  *s;
+  int   i;
+
+  s = getenv ("GMP_CPU_TYPE");
+  if (s == NULL)
+    {
+      printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
+      abort ();
+    }
+
+  for (i = 0; i < numberof (fake_cpuid_table); i++)
+    if (strcmp (s, fake_cpuid_table[i].name) == 0)
+      return i;
+
+  printf ("GMP_CPU_TYPE=%s unknown\n", s);
+  abort ();
+}
+
+static int
+fake_cpuid_available (void)
+{
+  return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
+}
+
+static long
+fake_cpuid (char dst[12], int id)
+{
+  int  i = fake_cpuid_lookup();
+
+  switch (id) {
+  case 0:
+    memcpy (dst, fake_cpuid_table[i].vendor, 12);
+    return 0;
+  case 1:
+    return fake_cpuid_table[i].fms;
+  default:
+    printf ("fake_cpuid(): oops, unknown id %d\n", id);
+    abort ();
+  }
+}
+#endif
+
+
+typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
+typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
+
+struct cpuvec_t __gmpn_cpuvec = {
+  __MPN(add_n_init),
+  __MPN(addmul_1_init),
+  __MPN(copyd_init),
+  __MPN(copyi_init),
+  __MPN(divexact_1_init),
+  __MPN(divexact_by3c_init),
+  __MPN(divrem_1_init),
+  __MPN(gcd_1_init),
+  __MPN(lshift_init),
+  __MPN(mod_1_init),
+  __MPN(mod_34lsub1_init),
+  __MPN(modexact_1c_odd_init),
+  __MPN(mul_1_init),
+  __MPN(mul_basecase_init),
+  __MPN(preinv_divrem_1_init),
+  __MPN(preinv_mod_1_init),
+  __MPN(rshift_init),
+  __MPN(sqr_basecase_init),
+  __MPN(sub_n_init),
+  __MPN(submul_1_init),
+  0
+};
+
+
+/* The following setups start with generic x86, then overwrite with
+   specifics for a chip, and higher versions of that chip.
+
+   The arrangement of the setups here will normally be the same as the $path
+   selections in configure.in for the respective chips.
+
+   This code is reentrant and thread safe.  We always calculate the same
+   decided_cpuvec, so if two copies of the code are running it doesn't
+   matter which completes first, both write the same to __gmpn_cpuvec.
+
+   We need to go via decided_cpuvec because if one thread has completed
+   __gmpn_cpuvec then it may be making use of the threshold values in that
+   vector.  If another thread is still running __gmpn_cpuvec_init then we
+   don't want it to write different values to those fields since some of the
+   asm routines only operate correctly up to their own defined threshold,
+   not an arbitrary value.  */
+
+void
+__gmpn_cpuvec_init (void)
+{
+  struct cpuvec_t  decided_cpuvec;
+
+  TRACE (printf ("__gmpn_cpuvec_init:\n"));
+
+  memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
+
+  CPUVEC_SETUP_x86;
+  CPUVEC_SETUP_fat;
+
+  if (! __gmpn_cpuid_available ())
+    {
+      TRACE (printf ("  80386, or early 80486 without cpuid\n"));
+    }
+  else
+    {
+      char vendor_string[13];
+      char dummy_string[12];
+      long fms;
+      int family, model;
+
+      __gmpn_cpuid (vendor_string, 0);
+      vendor_string[12] = 0;
+
+      fms = __gmpn_cpuid (dummy_string, 1);
+      family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
+      model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+
+      if (strcmp (vendor_string, "GenuineIntel") == 0)
+        {
+          switch (family)
+            {
+            case 4:
+              TRACE (printf ("  80486 with cpuid\n"));
+              break;
+
+            case 5:
+              TRACE (printf ("  pentium\n"));
+              CPUVEC_SETUP_pentium;
+              if (model >= 4)
+                {
+                  TRACE (printf ("  pentiummmx\n"));
+                  CPUVEC_SETUP_pentium_mmx;
+                }
+              break;
+
+            case 6:
+              TRACE (printf ("  p6\n"));
+              CPUVEC_SETUP_p6;
+              if (model >= 2)
+                {
+                  TRACE (printf ("  pentium2\n"));
+                  CPUVEC_SETUP_p6_mmx;
+                }
+              if (model >= 7)
+                {
+                  TRACE (printf ("  pentium3\n"));
+                  CPUVEC_SETUP_p6_p3mmx;
+                }
+              if (model >= 0xD || model == 9)
+                {
+                  TRACE (printf ("  p6 with sse2\n"));
+                  CPUVEC_SETUP_p6_sse2;
+                }
+              break;
+
+            case 15:
+              TRACE (printf ("  pentium4\n"));
+              CPUVEC_SETUP_pentium4;
+              CPUVEC_SETUP_pentium4_mmx;
+              CPUVEC_SETUP_pentium4_sse2;
+              break;
+            }
+        }
+      else if (strcmp (vendor_string, "AuthenticAMD") == 0)
+        {
+          switch (family)
+            {
+            case 5:
+              if (model <= 3)
+                {
+                  TRACE (printf ("  k5\n"));
+                }
+              else
+                {
+                  TRACE (printf ("  k6\n"));
+                  CPUVEC_SETUP_k6;
+                  CPUVEC_SETUP_k6_mmx;
+                  if (model >= 8)
+                    {
+                      TRACE (printf ("  k62\n"));
+                      CPUVEC_SETUP_k6_k62mmx;
+                    }
+                  if (model >= 9)
+                    {
+                      TRACE (printf ("  k63\n"));
+                    }
+                }
+              break;
+            case 6:
+              TRACE (printf ("  athlon\n"));
+            athlon:
+              CPUVEC_SETUP_k7;
+              CPUVEC_SETUP_k7_mmx;
+              break;
+            case 15:
+              TRACE (printf ("  x86_64\n"));
+              goto athlon;
+            }
+        }
+      else if (strcmp (vendor_string, "CentaurHauls") == 0)
+        {
+          switch (family)
+            {
+            case 6:
+              TRACE (printf ("  viac3\n"));
+              if (model >= 9)
+                {
+                  TRACE (printf ("  viac32\n"));
+                }
+              break;
+            }
+        }
+      else if (strcmp (vendor_string, "CyrixInstead") == 0)
+        {
+          /* Should recognize Cyrix' processors too.  */
+          TRACE (printf ("  cyrix something\n"));
+        }
+    }
+
+  /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
+     Instead default to the plain versions from whichever CPU we detected.
+     The function arguments are compatible, no need for any glue code.  */
+  if (decided_cpuvec.preinv_divrem_1 == NULL)
+    decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
+  if (decided_cpuvec.preinv_mod_1 == NULL)
+    decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
+
+  ASSERT_CPUVEC (decided_cpuvec);
+  CPUVEC_INSTALL (decided_cpuvec);
+
+  /* Set this once the threshold fields are ready.
+     Use volatile to prevent it getting moved.  */
+  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
+}
diff --git a/mpn/x86/fat/fat_entry.asm b/mpn/x86/fat/fat_entry.asm

new file mode 100644 (file)

index 0000000..bd46e4e
--- /dev/null
+++ b/mpn/x86/fat/fat_entry.asm
@@ -0,0 +1,209 @@
+dnl  x86 fat binary entrypoints.
+
+dnl  Copyright 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+dnl  Forcibly disable profiling.
+dnl
+dnl  The entrypoints and inits are small enough not to worry about, the real
+dnl  routines arrived at will have any profiling.  Also, the way the code
+dnl  here ends with a jump means we won't work properly with the
+dnl  "instrument" profiling scheme anyway.
+
+define(`WANT_PROFILING',no)
+
+
+       TEXT
+
+
+dnl  Usage: FAT_ENTRY(name, offset)
+dnl
+dnl  Emit a fat binary entrypoint function of the given name.  This is the
+dnl  normal entry for applications, eg. __gmpn_add_n.
+dnl
+dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
+dnl  the given "offset" (in bytes).
+dnl
+dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
+dnl  fine for all x86s.
+dnl
+dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
+dnl  ensure at least the first two instructions don't cross a cache line
+dnl  boundary.
+dnl
+dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
+dnl  grepping in configure, stopping that code trying to eval something with
+dnl  $1 in it.
+
+define(FAT_ENTRY,
+m4_assert_numargs(2)
+`      ALIGN(ifdef(`PIC',16,8))
+`'PROLOGUE($1)
+ifdef(`PIC',
+`      call    L(movl_eip_edx)
+L(entry_here$2):
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(entry_here$2)], %edx
+       movl    GSYM_PREFIX`'__gmpn_cpuvec@GOT(%edx), %edx
+       jmp     *m4_empty_if_zero($2)(%edx)
+',`dnl non-PIC
+       jmp     *GSYM_PREFIX`'__gmpn_cpuvec+$2
+')
+EPILOGUE()
+')
+
+
+dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
+dnl
+
+define(`CPUVEC_offset',0)
+foreach(i,
+`FAT_ENTRY(MPN(i),CPUVEC_offset)
+define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
+CPUVEC_FUNCS_LIST)
+
+ifdef(`PIC',`
+       ALIGN(8)
+L(movl_eip_edx):
+       movl    (%esp), %edx
+       ret_internal
+')
+
+
+dnl  Usage: FAT_INIT(name, offset)
+dnl
+dnl  Emit a fat binary initializer function of the given name.  These
+dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
+dnl
+dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
+dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
+dnl  __gmpn_cpuvec_init will have stored the address of the selected
+dnl  implementation there.
+dnl
+dnl  Only one of these routines will be executed, and only once, since after
+dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
+dnl  need for anything special here, just something small and simple.  To
+dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
+dnl  with the offset in %al.  %al is used since the movb instruction is 2
+dnl  bytes where %eax would be 4.
+dnl
+dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
+dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
+dnl  something with $1 in it.
+
+define(FAT_INIT,
+m4_assert_numargs(2)
+`PROLOGUE($1)
+       movb    $`'$2, %al
+       jmp     L(fat_init)
+EPILOGUE()
+')
+
+L(fat_init):
+       C al    __gmpn_cpuvec byte offset
+
+       movsbl  %al, %eax
+       pushl   %eax
+
+ifdef(`PIC',`
+       pushl   %ebx
+       call    L(movl_eip_ebx)
+L(init_here):
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(init_here)], %ebx
+       call    GSYM_PREFIX`'__gmpn_cpuvec_init@PLT
+       movl    GSYM_PREFIX`'__gmpn_cpuvec@GOT(%ebx), %edx
+       popl    %ebx
+       popl    %eax
+       jmp     *(%edx,%eax)
+
+L(movl_eip_ebx):
+       movl    (%esp), %ebx
+       ret_internal
+
+',`dnl non-PIC
+       call    GSYM_PREFIX`'__gmpn_cpuvec_init
+       popl    %eax
+       jmp     *GSYM_PREFIX`'__gmpn_cpuvec(%eax)
+')
+
+dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
+dnl
+
+define(`CPUVEC_offset',0)
+foreach(i,
+`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
+define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
+CPUVEC_FUNCS_LIST)
+
+
+
+C long __gmpn_cpuid (char dst[12], int id);
+C
+C This is called only once, so just something simple and compact is fine.
+
+defframe(PARAM_ID,  8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+PROLOGUE(__gmpn_cpuid)
+       pushl   %esi            FRAME_pushl()
+       pushl   %ebx            FRAME_pushl()
+       movl    PARAM_ID, %eax
+       cpuid
+       movl    PARAM_DST, %esi
+       movl    %ebx, (%esi)
+       movl    %edx, 4(%esi)
+       movl    %ecx, 8(%esi)
+       popl    %ebx
+       popl    %esi
+       ret
+EPILOGUE()
+
+
+C int __gmpn_cpuid_available (void);
+C
+C Return non-zero if the cpuid instruction is available, which means late
+C model 80486 and higher.  80386 and early 80486 don't have cpuid.
+C
+C The test follows Intel AP-485 application note, namely that if bit 21 is
+C modifiable then cpuid is supported.  This test is reentrant and thread
+C safe, since of course any interrupt or context switch will preserve the
+C flags while we're tinkering with them.
+C
+C This is called only once, so just something simple and compact is fine.
+
+PROLOGUE(__gmpn_cpuid_available)
+       pushf
+       popl    %ecx            C old flags
+
+       movl    %ecx, %edx
+       xorl    $0x200000, %edx
+       pushl   %edx
+       popf
+       pushf
+       popl    %edx            C tweaked flags
+
+       movl    $1, %eax
+       cmpl    %ecx, %edx
+       jne     L(available)
+       xorl    %eax, %eax      C not changed, so cpuid not available
+
+L(available):
+       ret
+EPILOGUE()
diff --git a/mpn/x86/fat/gcd_1.c b/mpn/x86/fat/gcd_1.c

new file mode 100644 (file)

index 0000000..5bd0006
--- /dev/null
+++ b/mpn/x86/fat/gcd_1.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_gcd_1.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/gcd_1.c"
diff --git a/mpn/x86/fat/gmp-mparam.h b/mpn/x86/fat/gmp-mparam.h

new file mode 100644 (file)

index 0000000..45680ed
--- /dev/null
+++ b/mpn/x86/fat/gmp-mparam.h
@@ -0,0 +1,59 @@
+/* Fat binary x86 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* mpn_divexact_1 is faster than mpn_divrem_1 at all sizes.  The only time
+   this might not be true currently is for actual 80386 and 80486 chips,
+   where mpn/x86/dive_1.asm might be slower than mpn/x86/divrem_1.asm, but
+   that's not worth worrying about.  */
+#define DIVEXACT_1_THRESHOLD  0
+
+/* Only some of the x86s have an mpn_preinv_divrem_1, but we set
+   USE_PREINV_DIVREM_1 so that all callers use it, and then let the
+   __gmpn_cpuvec pointer go to plain mpn_divrem_1 if there's not an actual
+   preinv.  */
+#define USE_PREINV_DIVREM_1   1
+
+/* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
+   for mpn_sqr to call the latter.  */
+#define SQR_BASECASE_THRESHOLD 0
+
+/* Sensible fallbacks for these, when not taken from a cpu-specific
+   gmp-mparam.h.  */
+#define MUL_TOOM22_THRESHOLD      20
+#define MUL_TOOM33_THRESHOLD     130
+#define SQR_TOOM2_THRESHOLD       30
+#define SQR_TOOM3_THRESHOLD      200
+
+/* These are values more or less in the middle of what the typical x86 chips
+   come out as.  For a fat binary it's necessary to have values for these,
+   since the defaults for MUL_FFT_TABLE and SQR_FFT_TABLE otherwise come out
+   as non-constant array initializers.  FIXME: Perhaps these should be done
+   in the cpuvec structure like other thresholds.  */
+#define MUL_FFT_TABLE  { 464, 928, 1920, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          400
+#define MUL_FFT_THRESHOLD              2000
+
+#define SQR_FFT_TABLE  { 528, 1184, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          500
+#define SQR_FFT_THRESHOLD              3000
diff --git a/mpn/x86/fat/mod_1.c b/mpn/x86/fat/mod_1.c

new file mode 100644 (file)

index 0000000..a79359d
--- /dev/null
+++ b/mpn/x86/fat/mod_1.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_mod_1.
+
+Copyright 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/mod_1.c"
diff --git a/mpn/x86/fat/mode1o.c b/mpn/x86/fat/mode1o.c

new file mode 100644 (file)

index 0000000..a5244ca
--- /dev/null
+++ b/mpn/x86/fat/mode1o.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_modexact_1c_odd.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/mode1o.c"
diff --git a/mpn/x86/gmp-mparam.h b/mpn/x86/gmp-mparam.h

new file mode 100644 (file)

index 0000000..4f91bd2
--- /dev/null
+++ b/mpn/x86/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all
+   of p5, p6, k6 and k7, so use it always.  It's probably slower on 386 and
+   486, but that's too bad.  */
+#define DIVEXACT_1_THRESHOLD  0
diff --git a/mpn/x86/i486/gmp-mparam.h b/mpn/x86/i486/gmp-mparam.h

new file mode 100644 (file)

index 0000000..30084ed
--- /dev/null
+++ b/mpn/x86/i486/gmp-mparam.h
@@ -0,0 +1,58 @@
+/* 80486 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* 100MHz DX4 */
+
+/* Generated by tuneup.c, 2003-02-13, gcc 2.95 */
+
+#define MUL_TOOM22_THRESHOLD             18
+#define MUL_TOOM33_THRESHOLD            228
+
+#define SQR_BASECASE_THRESHOLD           13
+#define SQR_TOOM2_THRESHOLD              49
+#define SQR_TOOM3_THRESHOLD             238
+
+#define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
+#define DIV_DC_THRESHOLD                 72
+#define POWM_THRESHOLD                   38
+
+#define GCD_ACCEL_THRESHOLD               3
+#define JACOBI_BASE_METHOD                2
+
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  0
+#define DIVREM_2_THRESHOLD            MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD         17
+
+#define GET_STR_DC_THRESHOLD             32
+#define GET_STR_PRECOMPUTE_THRESHOLD     82
+#define SET_STR_THRESHOLD              3524
+
+#define MUL_FFT_TABLE  { 464, 928, 1920, 4608, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          392
+#define MUL_FFT_THRESHOLD              2816
+
+#define SQR_FFT_TABLE  { 432, 928, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          392
+#define SQR_FFT_THRESHOLD              2816
diff --git a/mpn/x86/invert_limb.asm b/mpn/x86/invert_limb.asm

new file mode 100644 (file)

index 0000000..ff77128
--- /dev/null
+++ b/mpn/x86/invert_limb.asm
@@ -0,0 +1,169 @@
+dnl  x86 mpn_invert_limb
+
+dnl  Contributed to the GNU project by Niels Möller
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles (approx)    div
+C K7:           46             53
+
+C Register usage:
+C   Input D in %edi
+C   Current approximation is in %eax and/or %ecx
+C   %ebx and %edx are temporaries
+C   %esi and %ebp are unused
+
+defframe(PARAM_DIVISOR,4)
+
+ASM_START()
+
+C Make approx_tab global to work around Apple relocation bug.
+ifdef(`DARWIN',`
+       define(`approx_tab', MPN(invert_limb_tab))
+       GLOBL   approx_tab')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_invert_limb)
+deflit(`FRAME', 0)
+       C Adding the unnecessary push of %ebp and the corresponding pop seems
+       C to *reduce* running time from 46 to 43 cycles on K7.  Don't know if
+       C this is a benchmark artefact or some alignment issue.
+
+       push    %ebx    FRAME_pushl()
+       C push  %ebp    FRAME_pushl()
+       push    %edi    FRAME_pushl()
+
+       mov     PARAM_DIVISOR, %edi
+       mov     %edi, %eax
+       shr     $22, %eax
+ifdef(`PIC',`
+       LEA(    approx_tab, %ebx)
+       movzwl  -1024(%ebx, %eax, 2), %eax
+',`
+       movzwl  -1024+approx_tab`'(%eax, %eax), %eax    C %eax = v0
+')
+
+       C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
+       mov     %eax, %ecx
+       imul    %eax, %eax
+       mov     %edi, %ebx
+       shr     $11, %ebx
+       inc     %ebx
+       mul     %ebx
+       mov     %edi, %ebx                              C Prepare
+       shr     %ebx
+       sbb     %eax, %eax
+       sub     %eax, %ebx                              C %ebx = d_31, %eax = mask
+       shl     $4, %ecx
+       dec     %ecx
+       sub     %edx, %ecx                              C %ecx = v1
+
+       C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
+       imul    %ecx, %ebx
+       and     %ecx, %eax
+       shr     %eax
+       sub     %ebx, %eax
+       mul     %ecx
+       mov     %edi, %eax                              C Prepare for next mul
+       shl     $15, %ecx
+       shr     %edx
+       add     %edx, %ecx                              C %ecx = v2
+
+       mul     %ecx
+       add     %edi, %eax
+       mov     %ecx, %eax
+       adc     %edi, %edx
+       sub     %edx, %eax                              C %eax = v3
+
+       pop     %edi
+       C pop   %ebp
+       pop     %ebx
+
+       ret
+
+EPILOGUE()
+
+DEF_OBJECT(approx_tab,2)
+       .value  0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
+       .value  0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
+       .value  0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
+       .value  0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
+       .value  0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
+       .value  0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
+       .value  0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
+       .value  0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
+       .value  0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
+       .value  0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
+       .value  0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
+       .value  0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
+       .value  0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
+       .value  0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
+       .value  0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
+       .value  0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
+       .value  0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
+       .value  0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
+       .value  0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
+       .value  0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
+       .value  0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
+       .value  0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
+       .value  0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
+       .value  0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
+       .value  0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
+       .value  0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
+       .value  0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
+       .value  0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
+       .value  0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
+       .value  0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
+       .value  0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
+       .value  0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
+       .value  0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
+       .value  0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
+       .value  0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
+       .value  0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
+       .value  0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
+       .value  0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
+       .value  0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
+       .value  0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
+       .value  0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
+       .value  0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
+       .value  0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
+       .value  0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
+       .value  0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
+       .value  0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
+       .value  0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
+       .value  0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
+       .value  0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
+       .value  0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
+       .value  0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
+       .value  0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
+       .value  0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
+       .value  0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
+       .value  0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
+       .value  0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
+       .value  0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
+       .value  0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
+       .value  0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
+       .value  0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
+       .value  0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
+       .value  0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
+       .value  0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
+       .value  0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
+END_OBJECT(approx_tab)
diff --git a/mpn/x86/k6/README b/mpn/x86/k6/README

new file mode 100644 (file)

index 0000000..f488cbd
--- /dev/null
+++ b/mpn/x86/k6/README
@@ -0,0 +1,240 @@
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+                       AMD K6 MPN SUBROUTINES
+
+
+
+This directory contains code optimized for AMD K6 CPUs, meaning K6, K6-2 and
+K6-3.
+
+The mmx subdirectory has MMX code suiting plain K6, the k62mmx subdirectory
+has MMX code suiting K6-2 and K6-3.  All chips in the K6 family have MMX,
+the separate directories are just so that ./configure can omit them if the
+assembler doesn't support MMX.
+
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache, are as follows.
+
+                                 cycles/limb
+
+       mpn_add_n/sub_n            3.25 normal, 2.75 in-place
+
+       mpn_mul_1                  6.25
+       mpn_add/submul_1           7.65-8.4  (varying with data values)
+
+       mpn_mul_basecase           9.25 cycles/crossproduct (approx)
+       mpn_sqr_basecase           4.7  cycles/crossproduct (approx)
+                                   or 9.2 cycles/triangleproduct (approx)
+
+       mpn_l/rshift               3.0
+
+       mpn_divrem_1              20.0
+       mpn_mod_1                 20.0
+       mpn_divexact_by3          11.0
+
+       mpn_copyi                  1.0
+       mpn_copyd                  1.0
+
+
+K6-2 and K6-3 have dual-issue MMX and get the following improvements.
+
+       mpn_l/rshift               1.75
+
+
+Prefetching of sources hasn't yet given any joy.  With the 3DNow "prefetch"
+instruction, code seems to run slower, and with just "mov" loads it doesn't
+seem faster.  Results so far are inconsistent.  The K6 does a hardware
+prefetch of the second cache line in a sector, so the penalty for not
+prefetching in software is reduced.
+
+
+
+
+NOTES
+
+All K6 family chips have MMX, but only K6-2 and K6-3 have 3DNow.
+
+Plain K6 executes MMX instructions only in the X pipe, but K6-2 and K6-3 can
+execute them in both X and Y (and in both together).
+
+Branch misprediction penalty is 1 to 4 cycles (Optimization Manual
+chapter 6 table 12).
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+Store queue is 7 entries of 64 bits each.
+
+Floating point multiplications can be done in parallel with integer
+multiplications, but there doesn't seem to be any way to make use of this.
+
+
+
+OPTIMIZATIONS
+
+Unrolled loops are used to reduce looping overhead.  The unrolling is
+configurable up to 32 limbs/loop for most routines, up to 64 for some.
+
+Sometimes computed jumps into the unrolling are used to handle sizes not a
+multiple of the unrolling.  An attractive feature of this is that times
+smoothly increase with operand size, but an indirect jump is about 6 cycles
+and the setups about another 6, so it depends on how much the unrolled code
+is faster than a simple loop as to whether a computed jump ought to be used.
+
+Position independent code is implemented using a call to get eip for
+computed jumps and a ret is always done, rather than an addl $4,%esp or a
+popl, so the CPU return address branch prediction stack stays synchronised
+with the actual stack in memory.  Such a call however still costs 4 to 7
+cycles.
+
+Branch prediction, in absence of any history, will guess forward jumps are
+not taken and backward jumps are taken.  Where possible it's arranged that
+the less likely or less important case is under a taken forward jump.
+
+
+
+MMX
+
+Putting emms or femms as late as possible in a routine seems to be fastest.
+Perhaps an emms or femms stalls until all outstanding MMX instructions have
+completed, so putting it later gives them a chance to complete on their own,
+in parallel with other operations (like register popping).
+
+The Optimization Manual chapter 5 recommends using a femms on K6-2 and K6-3
+at the start of a routine, in case it's been preceded by x87 floating point
+operations.  This isn't done because in gmp programs it's expected that x87
+floating point won't be much used and that chances are an mpn routine won't
+have been preceded by any x87 code.
+
+
+
+CODING
+
+Instructions in general code are shown paired if they can decode and execute
+together, meaning two short decode instructions with the second not
+depending on the first, only the first using the shifter, no more than one
+load, and no more than one store.
+
+K6 does some out of order execution so the pairings aren't essential, they
+just show what slots might be available.  When decoding is the limiting
+factor things can be scheduled that might not execute until later.
+
+
+
+NOTES
+
+Code alignment
+
+- if an opcode/modrm or 0Fh/opcode/modrm crosses a cache line boundary,
+  short decode is inhibited.  The cross.pl script detects this.
+
+- loops and branch targets should be aligned to 16 bytes, or ensure at least
+  2 instructions before a 32 byte boundary.  This makes use of the 16 byte
+  cache in the BTB.
+
+Addressing modes
+
+- (%esi) degrades decoding from short to vector.  0(%esi) doesn't have this
+  problem, and can be used as an equivalent, or easier is just to use a
+  different register, like %ebx.
+
+- K6 and pre-CXT core K6-2 have the following problem.  (K6-2 CXT and K6-3
+  have it fixed, these being cpuid function 1 signatures 0x588 to 0x58F).
+
+  If more than 3 bytes are needed to determine instruction length then
+  decoding degrades from direct to long, or from long to vector.  This
+  happens with forms like "0F opcode mod/rm" with mod/rm=00-xxx-100 since
+  with mod=00 the sib determines whether there's a displacement.
+
+  This affects all MMX and 3DNow instructions, and others with an 0F prefix,
+  like movzbl.  The modes affected are anything with an index and no
+  displacement, or an index but no base, and this includes (%esp) which is
+  really (,%esp,1).
+
+  The cross.pl script detects problem cases.  The workaround is to always
+  use a displacement, and to do this with Zdisp if it's zero so the
+  assembler doesn't discard it.
+
+  See Optimization Manual rev D page 67 and 3DNow Porting Guide rev B pages
+  13-14 and 36-37.
+
+Calls
+
+- indirect jumps and calls are not branch predicted, they measure about 6
+  cycles.
+
+Various
+
+- adcl      2 cycles of decode, maybe 2 cycles executing in the X pipe
+- bsf       12-27 cycles
+- emms      5 cycles
+- femms     3 cycles
+- jecxz     2 cycles taken, 13 not taken (optimization manual says 7 not taken)
+- divl      20 cycles back-to-back
+- imull     2 decode, 3 execute
+- mull      2 decode, 3 execute (optimization manual decoding sample)
+- prefetch  2 cycles
+- rcll/rcrl implicit by one bit: 2 cycles
+            immediate or %cl count: 11 + 2 per bit for dword
+                                    13 + 4 per bit for byte
+- setCC            2 cycles
+- xchgl        %eax,reg  1.5 cycles, back-to-back (strange)
+        reg,reg   2 cycles, back-to-back
+
+
+
+
+REFERENCES
+
+"AMD-K6 Processor Code Optimization Application Note", AMD publication
+number 21924, revision D amendment 0, January 2000.  This describes K6-2 and
+K6-3.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/21924.pdf
+
+"AMD-K6 MMX Enhanced Processor x86 Code Optimization Application Note", AMD
+publication number 21828, revision A amendment 0, August 1997.  This is an
+older edition of the above document, describing plain K6.  Available
+on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/21828.pdf
+
+"3DNow Technology Manual", AMD publication number 21928G/0-March 2000.
+This describes the femms and prefetch instructions, but nothing else from
+3DNow has been used.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/21928.pdf
+
+"3DNow Instruction Porting Guide", AMD publication number 22621, revision B,
+August 1999.  This has some notes on general K6 optimizations as well as
+3DNow.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22621.pdf
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/x86/k6/aors_n.asm b/mpn/x86/k6/aors_n.asm

new file mode 100644 (file)

index 0000000..09afd8f
--- /dev/null
+++ b/mpn/x86/k6/aors_n.asm
@@ -0,0 +1,326 @@
+dnl  AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb.
+
+
+ifdef(`OPERATION_add_n', `
+       define(M4_inst,        adcl)
+       define(M4_function_n,  mpn_add_n)
+       define(M4_function_nc, mpn_add_nc)
+       define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+       define(M4_inst,        sbbl)
+       define(M4_function_n,  mpn_sub_n)
+       define(M4_function_nc, mpn_sub_nc)
+       define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                            mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size.  The return value is the carry bit from the top of the result
+C (1 or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation.  Note values other than 1 or 0 here will lead to garbage
+C results.
+C
+C Instruction decoding limits a normal dst=src1+src2 operation to 3 c/l, and
+C an in-place dst+=src to 2.5 c/l.  The unrolled loops have 1 cycle/loop of
+C loop control, which with 4 limbs/loop means an extra 0.25 c/l.
+
+define(PARAM_CARRY, `FRAME+20(%esp)')
+define(PARAM_SIZE,  `FRAME+16(%esp)')
+define(PARAM_SRC2,  `FRAME+12(%esp)')
+define(PARAM_SRC1,  `FRAME+8(%esp)')
+define(PARAM_DST,   `FRAME+4(%esp)')
+deflit(`FRAME',0)
+
+dnl  minimum 5 because the unrolled code can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(M4_function_nc)
+       movl    PARAM_CARRY, %eax
+       jmp     L(start)
+EPILOGUE()
+
+
+PROLOGUE(M4_function_n)
+       xorl    %eax, %eax
+L(start):
+       movl    PARAM_SIZE, %ecx
+       pushl   %ebx
+FRAME_pushl()
+
+       movl    PARAM_SRC1, %ebx
+       pushl   %edi
+FRAME_pushl()
+
+       movl    PARAM_SRC2, %edx
+       cmpl    $UNROLL_THRESHOLD, %ecx
+
+       movl    PARAM_DST, %edi
+       jae     L(unroll)
+
+
+       shrl    %eax            C initial carry flag
+
+       C offset 0x21 here, close enough to aligned
+L(simple):
+       C eax   scratch
+       C ebx   src1
+       C ecx   counter
+       C edx   src2
+       C esi
+       C edi   dst
+       C ebp
+       C
+       C The store to (%edi) could be done with a stosl; it'd be smaller
+       C code, but there's no speed gain and a cld would have to be added
+       C (per mpn/x86/README).
+
+       movl    (%ebx), %eax
+       leal    4(%ebx), %ebx
+
+       M4_inst (%edx), %eax
+
+       movl    %eax, (%edi)
+       leal    4(%edi), %edi
+
+       leal    4(%edx), %edx
+       loop    L(simple)
+
+
+       movl    $0, %eax
+       popl    %edi
+
+       setc    %al
+
+       popl    %ebx
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(unroll):
+       C eax   carry
+       C ebx   src1
+       C ecx   counter
+       C edx   src2
+       C esi
+       C edi   dst
+       C ebp
+
+       cmpl    %edi, %ebx
+       pushl   %esi
+
+       je      L(inplace)
+
+ifdef(`OPERATION_add_n',`
+       cmpl    %edi, %edx
+
+       je      L(inplace_reverse)
+')
+
+       movl    %ecx, %esi
+
+       andl    $-4, %ecx
+       andl    $3, %esi
+
+       leal    (%ebx,%ecx,4), %ebx
+       leal    (%edx,%ecx,4), %edx
+       leal    (%edi,%ecx,4), %edi
+
+       negl    %ecx
+       shrl    %eax
+
+       ALIGN(32)
+L(normal_top):
+       C eax   counter, qwords, negative
+       C ebx   src1
+       C ecx   scratch
+       C edx   src2
+       C esi
+       C edi   dst
+       C ebp
+
+       movl    (%ebx,%ecx,4), %eax
+       leal    5(%ecx), %ecx
+       M4_inst -20(%edx,%ecx,4), %eax
+       movl    %eax, -20(%edi,%ecx,4)
+
+       movl    4-20(%ebx,%ecx,4), %eax
+       M4_inst 4-20(%edx,%ecx,4), %eax
+       movl    %eax, 4-20(%edi,%ecx,4)
+
+       movl    8-20(%ebx,%ecx,4), %eax
+       M4_inst 8-20(%edx,%ecx,4), %eax
+       movl    %eax, 8-20(%edi,%ecx,4)
+
+       movl    12-20(%ebx,%ecx,4), %eax
+       M4_inst 12-20(%edx,%ecx,4), %eax
+       movl    %eax, 12-20(%edi,%ecx,4)
+
+       loop    L(normal_top)
+
+
+       decl    %esi
+       jz      L(normal_finish_one)
+       js      L(normal_done)
+
+       C two or three more limbs
+
+       movl    (%ebx), %eax
+       M4_inst (%edx), %eax
+       movl    %eax, (%edi)
+
+       movl    4(%ebx), %eax
+       M4_inst 4(%edx), %eax
+       decl    %esi
+       movl    %eax, 4(%edi)
+
+       jz      L(normal_done)
+       movl    $2, %ecx
+
+L(normal_finish_one):
+       movl    (%ebx,%ecx,4), %eax
+       M4_inst (%edx,%ecx,4), %eax
+       movl    %eax, (%edi,%ecx,4)
+
+L(normal_done):
+       popl    %esi
+       popl    %edi
+
+       movl    $0, %eax
+       popl    %ebx
+
+       setc    %al
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+
+ifdef(`OPERATION_add_n',`
+L(inplace_reverse):
+       C dst==src2
+
+       movl    %ebx, %edx
+')
+
+L(inplace):
+       C eax   initial carry
+       C ebx
+       C ecx   size
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+
+       leal    -1(%ecx), %esi
+       decl    %ecx
+
+       andl    $-4, %ecx
+       andl    $3, %esi
+
+       movl    (%edx), %ebx            C src low limb
+       leal    (%edx,%ecx,4), %edx
+
+       leal    (%edi,%ecx,4), %edi
+       negl    %ecx
+
+       shrl    %eax
+
+
+       ALIGN(32)
+L(inplace_top):
+       C eax
+       C ebx   next src limb
+       C ecx   size
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+
+       M4_inst %ebx, (%edi,%ecx,4)
+
+       movl    4(%edx,%ecx,4), %eax
+       leal    5(%ecx), %ecx
+
+       M4_inst %eax, 4-20(%edi,%ecx,4)
+
+       movl    8-20(%edx,%ecx,4), %eax
+       movl    12-20(%edx,%ecx,4), %ebx
+
+       M4_inst %eax, 8-20(%edi,%ecx,4)
+       M4_inst %ebx, 12-20(%edi,%ecx,4)
+
+       movl    16-20(%edx,%ecx,4), %ebx
+       loop    L(inplace_top)
+
+
+       C now %esi is 0 to 3 representing respectively 1 to 4 limbs more
+
+       M4_inst %ebx, (%edi)
+
+       decl    %esi
+       jz      L(inplace_finish_one)
+       js      L(inplace_done)
+
+       C two or three more limbs
+
+       movl    4(%edx), %eax
+       movl    8(%edx), %ebx
+       M4_inst %eax, 4(%edi)
+       M4_inst %ebx, 8(%edi)
+
+       decl    %esi
+       movl    $2, %ecx
+
+       jz      L(normal_done)
+
+L(inplace_finish_one):
+       movl    4(%edx,%ecx,4), %eax
+       M4_inst %eax, 4(%edi,%ecx,4)
+
+L(inplace_done):
+       popl    %esi
+       popl    %edi
+
+       movl    $0, %eax
+       popl    %ebx
+
+       setc    %al
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/aorsmul_1.asm b/mpn/x86/k6/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..5f1cd9c
--- /dev/null
+++ b/mpn/x86/k6/aorsmul_1.asm
@@ -0,0 +1,381 @@
+dnl  AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)            5.94
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           5.57
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:                           7.65-8.5 (data dependent)
+C K7:
+C K8:
+
+
+dnl  K6:           large multipliers  small multipliers
+dnl  UNROLL_COUNT    cycles/limb       cycles/limb
+dnl        4             9.5              7.78
+dnl        8             9.0              7.78
+dnl       16             8.4              7.65
+dnl       32             8.4              8.2
+dnl
+dnl  Maximum possible unrolling with the current code is 32.
+dnl
+dnl  Unrolling to 16 limbs/loop makes the unrolled loop fit exactly in a 256
+dnl  byte block, which might explain the good speed at that unrolling.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1', `
+       define(M4_inst,        addl)
+       define(M4_function_1,  mpn_addmul_1)
+       define(M4_function_1c, mpn_addmul_1c)
+',`ifdef(`OPERATION_submul_1', `
+       define(M4_inst,        subl)
+       define(M4_function_1,  mpn_submul_1)
+       define(M4_function_1c, mpn_submul_1c)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                         mp_limb_t mult);
+C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult, mp_limb_t carry);
+C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                         mp_limb_t mult);
+C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult, mp_limb_t carry);
+C
+C The jadcl0()s in the unrolled loop makes the speed data dependent.  Small
+C multipliers (most significant few bits clear) result in few carry bits and
+C speeds up to 7.65 cycles/limb are attained.  Large multipliers (most
+C significant few bits set) make the carry bits 50/50 and lead to something
+C more like 8.4 c/l.  With adcl's both of these would be 9.3 c/l.
+C
+C It's important that the gains for jadcl0 on small multipliers don't come
+C at the cost of slowing down other data.  Tests on uniformly distributed
+C random data, designed to confound branch prediction, show about a 7%
+C speed-up using jadcl0 over adcl (8.93 versus 9.57 cycles/limb, with all
+C overheads included).
+C
+C In the simple loop, jadcl0() measures slower than adcl (11.9-14.7 versus
+C 11.0 cycles/limb), and hence isn't used.
+C
+C In the simple loop, note that running ecx from negative to zero and using
+C it as an index in the two movs wouldn't help.  It would save one
+C instruction (2*addl+loop becoming incl+jnz), but there's nothing unpaired
+C that would be collapsed by this.
+C
+C Attempts at a simpler main loop, with less unrolling, haven't yielded much
+C success, generally running over 9 c/l.
+C
+C
+C jadcl0
+C ------
+C
+C jadcl0() being faster than adcl $0 seems to be an artifact of two things,
+C firstly the instruction decoding and secondly the fact that there's a
+C carry bit for the jadcl0 only on average about 1/4 of the time.
+C
+C The code in the unrolled loop decodes something like the following.
+C
+C                                         decode cycles
+C              mull    %ebp                    2
+C              M4_inst %esi, disp(%edi)        1
+C              adcl    %eax, %ecx              2
+C              movl    %edx, %esi            \ 1
+C              jnc     1f                    /
+C              incl    %esi                  \ 1
+C      1:      movl    disp(%ebx), %eax      /
+C                                              ---
+C                                               7
+C
+C In a back-to-back style test this measures 7 with the jnc not taken, or 8
+C with it taken (both when correctly predicted).  This is opposite to the
+C measurements showing small multipliers running faster than large ones.
+C Don't really know why.
+C
+C It's not clear how much branch misprediction might be costing.  The K6
+C doco says it will be 1 to 4 cycles, but presumably it's near the low end
+C of that range to get the measured results.
+C
+C
+C In the code the two carries are more or less the preceding mul product and
+C the calculation is roughly
+C
+C      x*y + u*b+v
+C
+C where b=2^32 is the size of a limb, x*y is the two carry limbs, and u and
+C v are the two limbs it's added to (being the low of the next mul, and a
+C limb from the destination).
+C
+C To get a carry requires x*y+u*b+v >= b^2, which is u*b+v >= b^2-x*y, and
+C there are b^2-(b^2-x*y) = x*y many such values, giving a probability of
+C x*y/b^2.  If x, y, u and v are random and uniformly distributed between 0
+C and b-1, then the total probability can be summed over x and y,
+C
+C       1    b-1 b-1 x*y    1    b*(b-1)   b*(b-1)
+C      --- * sum sum --- = --- * ------- * ------- = 1/4
+C       b^2   x=0 y=1 b^2   b^4      2         2
+C
+C Actually it's a very tiny bit less than 1/4 of course.  If y is fixed,
+C then the probability is 1/2*y/b thus varying linearly between 0 and 1/2.
+
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 9)
+',`
+deflit(UNROLL_THRESHOLD, 6)
+')
+
+defframe(PARAM_CARRY,     20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(M4_function_1c)
+       pushl   %esi
+deflit(`FRAME',4)
+       movl    PARAM_CARRY, %esi
+       jmp     L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function_1)
+       push    %esi
+deflit(`FRAME',4)
+       xorl    %esi, %esi      C initial carry
+
+L(start_nc):
+       movl    PARAM_SIZE, %ecx
+       pushl   %ebx
+deflit(`FRAME',8)
+
+       movl    PARAM_SRC, %ebx
+       pushl   %edi
+deflit(`FRAME',12)
+
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       movl    PARAM_DST, %edi
+
+       pushl   %ebp
+deflit(`FRAME',16)
+       jae     L(unroll)
+
+
+       C simple loop
+
+       movl    PARAM_MULTIPLIER, %ebp
+
+L(simple):
+       C eax   scratch
+       C ebx   src
+       C ecx   counter
+       C edx   scratch
+       C esi   carry
+       C edi   dst
+       C ebp   multiplier
+
+       movl    (%ebx), %eax
+       addl    $4, %ebx
+
+       mull    %ebp
+
+       addl    $4, %edi
+       addl    %esi, %eax
+
+       adcl    $0, %edx
+
+       M4_inst %eax, -4(%edi)
+
+       adcl    $0, %edx
+
+       movl    %edx, %esi
+       loop    L(simple)
+
+
+       popl    %ebp
+       popl    %edi
+
+       popl    %ebx
+       movl    %esi, %eax
+
+       popl    %esi
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+C The unrolled loop uses a "two carry limbs" scheme.  At the top of the loop
+C the carries are ecx=lo, esi=hi, then they swap for each limb processed.
+C For the computed jump an odd size means they start one way around, an even
+C size the other.
+C
+C VAR_JUMP holds the computed jump temporarily because there's not enough
+C registers at the point of doing the mul for the initial two carry limbs.
+C
+C The add/adc for the initial carry in %esi is necessary only for the
+C mpn_addmul/submul_1c entry points.  Duplicating the startup code to
+C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good
+C idea.
+
+dnl  overlapping with parameters already fetched
+define(VAR_COUNTER, `PARAM_SIZE')
+define(VAR_JUMP,    `PARAM_DST')
+
+L(unroll):
+       C eax
+       C ebx   src
+       C ecx   size
+       C edx
+       C esi   initial carry
+       C edi   dst
+       C ebp
+
+       movl    %ecx, %edx
+       decl    %ecx
+
+       subl    $2, %edx
+       negl    %ecx
+
+       shrl    $UNROLL_LOG2, %edx
+       andl    $UNROLL_MASK, %ecx
+
+       movl    %edx, VAR_COUNTER
+       movl    %ecx, %edx
+
+       shll    $4, %edx
+       negl    %ecx
+
+       C 15 code bytes per limb
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    L(entry) (%edx,%ecx,1), %edx
+')
+       movl    (%ebx), %eax            C src low limb
+
+       movl    PARAM_MULTIPLIER, %ebp
+       movl    %edx, VAR_JUMP
+
+       mull    %ebp
+
+       addl    %esi, %eax      C initial carry (from _1c)
+       jadcl0( %edx)
+
+
+       leal    4(%ebx,%ecx,4), %ebx
+       movl    %edx, %esi      C high carry
+
+       movl    VAR_JUMP, %edx
+       leal    (%edi,%ecx,4), %edi
+
+       testl   $1, %ecx
+       movl    %eax, %ecx      C low carry
+
+       jz      L(noswap)
+       movl    %esi, %ecx      C high,low carry other way around
+
+       movl    %eax, %esi
+L(noswap):
+
+       jmp     *%edx
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%edx,%ecx,1), %edx
+       addl    $L(entry)-L(here), %edx
+       addl    (%esp), %edx
+       ret_internal
+')
+
+
+C -----------------------------------------------------------
+       ALIGN(32)
+L(top):
+deflit(`FRAME',16)
+       C eax   scratch
+       C ebx   src
+       C ecx   carry lo
+       C edx   scratch
+       C esi   carry hi
+       C edi   dst
+       C ebp   multiplier
+       C
+       C 15 code bytes per limb
+
+       leal    UNROLL_BYTES(%edi), %edi
+
+L(entry):
+forloop(`i', 0, UNROLL_COUNT/2-1, `
+       deflit(`disp0', eval(2*i*4))
+       deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl,   disp0,(%ebx), %eax)
+       mull    %ebp
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+       adcl    %eax, %esi
+       movl    %edx, %ecx
+       jadcl0( %ecx)
+
+       movl    disp1(%ebx), %eax
+       mull    %ebp
+       M4_inst %esi, disp1(%edi)
+       adcl    %eax, %ecx
+       movl    %edx, %esi
+       jadcl0( %esi)
+')
+
+       decl    VAR_COUNTER
+
+       leal    UNROLL_BYTES(%ebx), %ebx
+       jns     L(top)
+
+
+       popl    %ebp
+       M4_inst %ecx, UNROLL_BYTES(%edi)
+
+       popl    %edi
+       movl    %esi, %eax
+
+       popl    %ebx
+       jadcl0( %eax)
+
+       popl    %esi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/cross.pl b/mpn/x86/k6/cross.pl

new file mode 100755 (executable)

index 0000000..cf476d6
--- /dev/null
+++ b/mpn/x86/k6/cross.pl
@@ -0,0 +1,171 @@
+#! /usr/bin/perl
+
+# Copyright 2000, 2001 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: cross.pl [filename.o]...
+#
+# Produce an annotated disassembly of the given object files, indicating
+# certain code alignment and addressing mode problems afflicting K6 chips.
+# "ZZ" is used on all annotations, so this can be searched for.
+#
+# With no arguments, all .o files corresponding to .asm files are processed.
+# This is good in the mpn object directory of a k6*-*-* build.
+#
+# Code alignments of 8 bytes or more are handled.  When 32 is used, cache
+# line boundaries will fall in at offsets 0x20,0x40,etc and problems are
+# flagged at those locations.  When 16 is used, the line boundaries can also
+# fall at offsets 0x10,0x30,0x50,etc, depending where the file is loaded, so
+# problems are identified there too.  Likewise when 8 byte alignment is used
+# problems are flagged additionally at 0x08,0x18,0x28,etc.
+#
+# Usually 32 byte alignment is used for k6 routines, but less is certainly
+# possible if through good luck, or a little tweaking, cache line crossing
+# problems can be avoided at the extra locations.
+#
+# Bugs:
+#
+# Instructions without mod/rm bytes or which are already vector decoded are
+# unaffected by cache line boundary crossing, but not all of these have yet
+# been put in as exceptions.  All that occur in practice in GMP are present
+# though.
+#
+# There's no messages for using the vector decoded addressing mode (%esi),
+# but that's easy to avoid when coding.
+#
+# Future:
+#
+# Warn about jump targets that are poorly aligned (less than 2 instructions
+# before a cache line boundary).
+
+use strict;
+
+sub disassemble {
+    my ($file) = @_;
+    my ($addr,$b1,$b2,$b3, $prefix,$opcode,$modrm);
+    my $align;
+
+    open (IN, "objdump -Srfh $file |")
+       || die "Cannot open pipe from objdump\n";
+    while (<IN>) {
+       print;
+
+       if (/^[ \t]*[0-9]+[ \t]+\.text[ \t]/ && /2\*\*([0-9]+)$/) {
+           $align = 1 << $1;
+           if ($align < 8) {
+               print "ZZ cross.pl cannot handle alignment < 2**3\n";
+               $align = 8
+           }
+       }
+
+       if (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
+           ($addr,$b1,$b2,$b3) = ($1,$2,$3,$4);
+
+       } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
+           ($addr,$b1,$b2,$b3) = ($1,$2,$3,'');
+
+       } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)/) {
+           ($addr,$b1,$b2,$b3) = ($1,$2,'','');
+
+       } else {
+           next;
+       }
+
+       if ($b1 =~ /0f/) {
+           $prefix = $b1;
+           $opcode = $b2;
+           $modrm = $b3;
+       } else {
+           $prefix = '';
+           $opcode = $b1;
+           $modrm = $b2;
+       }
+
+       # modrm of the form 00-xxx-100 with an 0F prefix is the problem case
+       # for K6 and pre-CXT K6-2
+       if ($prefix =~ /0f/
+           && $opcode !~ /^8/         # jcond disp32
+           && $modrm =~ /^[0-3][4c]/) {
+           print "ZZ ($file) >3 bytes to determine instruction length [K6]\n";
+       }
+
+       # with just an opcode, starting 1f mod 20h
+       if (($align==32 && $addr =~ /[13579bdf]f$/
+            || $align==16 && $addr =~ /f$/
+            || $align==8 && $addr =~ /[7f]$/)
+           && $prefix !~ /0f/
+           && $opcode !~ /1[012345]/ # adc
+           && $opcode !~ /1[89abcd]/ # sbb
+           && $opcode !~ /^4/        # inc/dec reg
+           && $opcode !~ /^5/        # push/pop reg
+           && $opcode !~ /68/        # push $imm32
+           && $opcode !~ /^7/        # jcond disp8
+           && $opcode !~ /a[89]/     # test+imm
+           && $opcode !~ /a[a-f]/    # stos/lods/scas
+           && $opcode !~ /b8/        # movl $imm32,%eax
+           && $opcode !~ /d[0123]/   # rcl
+           && $opcode !~ /e[0123]/   # loop/loopz/loopnz/jcxz
+           && $opcode !~ /e8/        # call disp32
+           && $opcode !~ /e[9b]/     # jmp disp32/disp8
+           && $opcode !~ /f[89abcd]/ # clc,stc,cli,sti,cld,std
+           && !($opcode =~ /f[67]/          # grp 1
+                && $modrm =~ /^[2367abef]/) # mul, imul, div, idiv
+           && $modrm !~ /^$/) {
+           print "ZZ ($file) opcode/modrm cross 32-byte boundary\n";
+       }
+
+       # with an 0F prefix, anything starting at 1f mod 20h
+       if (($align==32 && $addr =~ /[13579bdf][f]$/
+            || $align==16 && $addr =~ /f$/
+            || $align==8 && $addr =~ /[7f]$/)
+           && $prefix =~ /0f/
+           && $opcode !~ /af/        # imul
+           && $opcode !~ /a[45]/     # shldl
+           && $opcode !~ /a[cd]/     # shrdl
+           ) {
+           print "ZZ ($file) prefix/opcode cross 32-byte boundary\n";
+       }
+
+       # with an 0F prefix, anything with mod/rm starting at 1e mod 20h
+       if (($align==32 && $addr =~ /[13579bdf][e]$/
+            || $align==16 && $addr =~ /[e]$/
+            || $align==8 && $addr =~ /[6e]$/)
+           && $prefix =~ /0f/
+            && $opcode !~ /^8/        # jcond disp32
+            && $opcode !~ /af/        # imull reg,reg
+            && $opcode !~ /a[45]/     # shldl
+            && $opcode !~ /a[cd]/     # shrdl
+           && $modrm !~ /^$/) {
+           print "ZZ ($file) prefix/opcode/modrm cross 32-byte boundary\n";
+       }
+    }
+    close IN || die "Error from objdump (or objdump not available)\n";
+}
+
+
+my @files;
+if ($#ARGV >= 0) {
+    @files = @ARGV;
+} else {
+    @files = glob "*.asm";
+    map {s/.asm/.o/} @files;
+}
+
+foreach (@files)  {
+    disassemble($_);
+}
diff --git a/mpn/x86/k6/divrem_1.asm b/mpn/x86/k6/divrem_1.asm

new file mode 100644 (file)

index 0000000..1c86d9b
--- /dev/null
+++ b/mpn/x86/k6/divrem_1.asm
@@ -0,0 +1,193 @@
+dnl  AMD K6 mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 20 cycles/limb
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                         mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C                          mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                          mp_limb_t carry);
+C
+C The code here is basically the same as mpn/x86/divrem_1.asm, but uses loop
+C instead of decl+jnz, since it comes out 2 cycles/limb faster.
+C
+C A test is done to see if the high limb is less than the divisor, and if so
+C one less div is done.  A div is 20 cycles, so assuming high<divisor about
+C half the time, then this test saves half that amount.  The branch
+C misprediction penalty is less than that.
+C
+C Back-to-back div instructions run at 20 cycles, the same as the loop here,
+C so it seems there's nothing to gain by rearranging the loop.  Pairing the
+C mov and loop instructions was found to gain nothing.
+C
+C Enhancements:
+C
+C The low-latency K6 multiply might be thought to suit a mul-by-inverse, but
+C that algorithm has been found to suffer from the relatively poor carry
+C handling on K6 and too many auxiliary instructions.  The fractional part
+C however could be done at about 13 c/l, if it mattered enough.
+
+defframe(PARAM_CARRY,  24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_SRC,    12)
+defframe(PARAM_XSIZE,  8)
+defframe(PARAM_DST,    4)
+
+       TEXT
+
+       ALIGN(32)
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %edi
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_DIVISOR, %esi
+       pushl   %ebx            FRAME_pushl()
+
+       movl    PARAM_DST, %ebx
+       pushl   %ebp            FRAME_pushl()
+
+       movl    PARAM_XSIZE, %ebp
+       orl     %ecx, %ecx              C size
+
+       movl    PARAM_CARRY, %edx
+       jz      L(fraction)             C if size==0
+
+       leal    -4(%ebx,%ebp,4), %ebx   C dst one limb below integer part
+       jmp     L(integer_top)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %edi
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_DIVISOR, %esi
+       orl     %ecx,%ecx               C size
+
+       jz      L(size_zero)
+       pushl   %ebx            FRAME_pushl()
+
+       movl    -4(%edi,%ecx,4), %eax   C src high limb
+       xorl    %edx, %edx
+
+       movl    PARAM_DST, %ebx
+       pushl   %ebp            FRAME_pushl()
+
+       movl    PARAM_XSIZE, %ebp
+       cmpl    %esi, %eax
+
+       leal    -4(%ebx,%ebp,4), %ebx   C dst one limb below integer part
+       jae     L(integer_entry)
+
+
+       C high<divisor, so high of dst is zero, and avoid one div
+
+       movl    %edx, (%ebx,%ecx,4)
+       decl    %ecx
+
+       movl    %eax, %edx
+       jz      L(fraction)
+
+
+L(integer_top):
+       C eax   scratch (quotient)
+       C ebx   dst+4*xsize-4
+       C ecx   counter
+       C edx   scratch (remainder)
+       C esi   divisor
+       C edi   src
+       C ebp   xsize
+
+       movl    -4(%edi,%ecx,4), %eax
+L(integer_entry):
+
+       divl    %esi
+
+       movl    %eax, (%ebx,%ecx,4)
+       loop    L(integer_top)
+
+
+L(fraction):
+       orl     %ebp, %ecx
+       jz      L(done)
+
+       movl    PARAM_DST, %ebx
+
+
+L(fraction_top):
+       C eax   scratch (quotient)
+       C ebx   dst
+       C ecx   counter
+       C edx   scratch (remainder)
+       C esi   divisor
+       C edi
+       C ebp
+
+       xorl    %eax, %eax
+
+       divl    %esi
+
+       movl    %eax, -4(%ebx,%ecx,4)
+       loop    L(fraction_top)
+
+
+L(done):
+       popl    %ebp
+       movl    %edx, %eax
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+
+L(size_zero):
+deflit(`FRAME',8)
+       movl    PARAM_XSIZE, %ecx
+       xorl    %eax, %eax
+
+       movl    PARAM_DST, %edi
+
+       cld     C better safe than sorry, see mpn/x86/README
+
+       rep
+       stosl
+
+       popl    %esi
+       popl    %edi
+       ret
+EPILOGUE()
diff --git a/mpn/x86/k6/gcd_1.asm b/mpn/x86/k6/gcd_1.asm

new file mode 100644 (file)

index 0000000..58aff08
--- /dev/null
+++ b/mpn/x86/k6/gcd_1.asm
@@ -0,0 +1,351 @@
+dnl  AMD K6 mpn_gcd_1 -- mpn by 1 gcd.
+
+dnl  Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 9.5 cycles/bit (approx)   1x1 gcd
+C     11.0 cycles/limb          Nx1 reduction (modexact_1_odd)
+
+
+C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t y);
+C
+C This code is nothing very special, but offers a speedup over what gcc 2.95
+C can do with mpn/generic/gcd_1.c.
+C
+C Future:
+C
+C Using a lookup table to count trailing zeros seems a touch quicker, but
+C after a slightly longer startup.  Might be worthwhile if an mpn_gcd_2 used
+C it too.
+
+
+dnl  If size==1 and x (the larger operand) is more than DIV_THRESHOLD bits
+dnl  bigger than y, then a division x%y is done to reduce it.
+dnl
+dnl  A divl is 20 cycles and the loop runs at about 9.5 cycles/bitpair so
+dnl  there should be an advantage in the divl at about 4 or 5 bits, which is
+dnl  what's found.
+
+deflit(DIV_THRESHOLD, 5)
+
+
+defframe(PARAM_LIMB, 12)
+defframe(PARAM_SIZE,  8)
+defframe(PARAM_SRC,   4)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_gcd_1)
+deflit(`FRAME',0)
+
+       ASSERT(ne, `cmpl $0, PARAM_LIMB')
+       ASSERT(ae, `cmpl $1, PARAM_SIZE')
+
+
+       movl    PARAM_SRC, %eax
+       pushl   %ebx                    FRAME_pushl()
+
+       movl    PARAM_LIMB, %edx
+       movl    $-1, %ecx
+
+       movl    (%eax), %ebx            C src low limb
+
+       movl    %ebx, %eax              C src low limb
+       orl     %edx, %ebx
+
+L(common_twos):
+       shrl    %ebx
+       incl    %ecx
+
+       jnc     L(common_twos)          C 1/4 chance on random data
+       shrl    %cl, %edx               C y
+
+       cmpl    $1, PARAM_SIZE
+       ja      L(size_two_or_more)
+
+
+       ASSERT(nz, `orl %eax, %eax')    C should have src limb != 0
+
+       shrl    %cl, %eax               C x
+
+
+       C Swap if necessary to make x>=y.  Measures a touch quicker as a
+       C jump than a branch free calculation.
+       C
+       C eax   x
+       C ebx
+       C ecx   common twos
+       C edx   y
+
+       movl    %eax, %ebx
+       cmpl    %eax, %edx
+
+       jb      L(noswap)
+       movl    %edx, %eax
+
+       movl    %ebx, %edx
+       movl    %eax, %ebx
+L(noswap):
+
+
+       C See if it's worth reducing x with a divl.
+       C
+       C eax   x
+       C ebx   x
+       C ecx   common twos
+       C edx   y
+
+       shrl    $DIV_THRESHOLD, %ebx
+
+       cmpl    %ebx, %edx
+       ja      L(nodiv)
+
+
+       C Reduce x to x%y.
+       C
+       C eax   x
+       C ebx
+       C ecx   common twos
+       C edx   y
+
+       movl    %edx, %ebx
+       xorl    %edx, %edx
+
+       divl    %ebx
+
+       orl     %edx, %edx      C y
+       nop     C code alignment
+
+       movl    %ebx, %eax      C x
+       jz      L(done_shll)
+L(nodiv):
+
+
+       C eax   x
+       C ebx
+       C ecx   common twos
+       C edx   y
+       C esi
+       C edi
+       C ebp
+
+L(strip_y):
+       shrl    %edx
+       jnc     L(strip_y)
+
+       leal    1(%edx,%edx), %edx
+       movl    %ecx, %ebx      C common twos
+
+       leal    1(%eax), %ecx
+       jmp     L(strip_x_and)
+
+
+C Calculating a %cl shift based on the low bit 0 or 1 avoids doing a branch
+C on a 50/50 chance of 0 or 1.  The chance of the next bit also being 0 is
+C only 1/4.
+C
+C A second computed %cl shift was tried, but that measured a touch slower
+C than branching back.
+C
+C A branch-free abs(x-y) and min(x,y) calculation was tried, but that
+C measured about 1 cycle/bit slower.
+
+       C eax   x
+       C ebx   common twos
+       C ecx   scratch
+       C edx   y
+
+       ALIGN(4)
+L(swap):
+       addl    %eax, %edx      C x-y+y = x
+       negl    %eax            C -(x-y) = y-x
+
+L(strip_x):
+       shrl    %eax            C odd-odd = even, so always one to strip
+       ASSERT(nz)
+
+L(strip_x_leal):
+       leal    1(%eax), %ecx
+
+L(strip_x_and):
+       andl    $1, %ecx        C (x^1)&1
+
+       shrl    %cl, %eax       C shift if x even
+
+       testb   $1, %al
+       jz      L(strip_x)
+
+       ASSERT(nz,`testl $1, %eax')     C x, y odd
+       ASSERT(nz,`testl $1, %edx')
+
+       subl    %edx, %eax
+       jb      L(swap)
+       ja      L(strip_x)
+
+
+       movl    %edx, %eax
+       movl    %ebx, %ecx
+
+L(done_shll):
+       shll    %cl, %eax
+       popl    %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+C Two or more limbs.
+C
+C x={src,size} is reduced modulo y using either a plain mod_1 style
+C remainder, or a modexact_1 style exact division.
+
+deflit(MODEXACT_THRESHOLD, ifdef(`PIC', 4, 4))
+
+       ALIGN(8)
+L(size_two_or_more):
+       C eax
+       C ebx
+       C ecx   common twos
+       C edx   y, without common twos
+       C esi
+       C edi
+       C ebp
+
+deflit(FRAME_TWO_OR_MORE, FRAME)
+
+       pushl   %edi            defframe_pushl(SAVE_EDI)
+       movl    PARAM_SRC, %ebx
+
+L(y_twos):
+       shrl    %edx
+       jnc     L(y_twos)
+
+       movl    %ecx, %edi              C common twos
+       movl    PARAM_SIZE, %ecx
+
+       pushl   %esi            defframe_pushl(SAVE_ESI)
+       leal    1(%edx,%edx), %esi      C y (odd)
+
+       movl    -4(%ebx,%ecx,4), %eax   C src high limb
+
+       cmpl    %edx, %eax              C carry if high<divisor
+
+       sbbl    %edx, %edx              C -1 if high<divisor
+
+       addl    %edx, %ecx              C skip one limb if high<divisor
+       andl    %eax, %edx
+
+       cmpl    $MODEXACT_THRESHOLD, %ecx
+       jae     L(modexact)
+
+
+L(divide_top):
+       C eax   scratch (quotient)
+       C ebx   src
+       C ecx   counter, size-1 to 1
+       C edx   carry (remainder)
+       C esi   divisor (odd)
+       C edi
+       C ebp
+
+       movl    -4(%ebx,%ecx,4), %eax
+       divl    %esi
+       loop    L(divide_top)
+
+
+       movl    %edx, %eax      C x
+       movl    %esi, %edx      C y (odd)
+
+       movl    %edi, %ebx      C common twos
+       popl    %esi
+
+       popl    %edi
+       leal    1(%eax), %ecx
+
+       orl     %eax, %eax
+       jnz     L(strip_x_and)
+
+
+       movl    %ebx, %ecx
+       movl    %edx, %eax
+
+       shll    %cl, %eax
+       popl    %ebx
+
+       ret
+
+
+       ALIGN(8)
+L(modexact):
+       C eax
+       C ebx   src ptr
+       C ecx   size or size-1
+       C edx
+       C esi   y odd
+       C edi   common twos
+       C ebp
+
+       movl    PARAM_SIZE, %eax
+       pushl   %esi            FRAME_pushl()
+
+       pushl   %eax            FRAME_pushl()
+
+       pushl   %ebx            FRAME_pushl()
+
+ifdef(`PIC',`
+       nop     C code alignment
+       call    L(movl_eip_ebx)
+L(here):
+       addl    $_GLOBAL_OFFSET_TABLE_, %ebx
+       call    GSYM_PREFIX`'mpn_modexact_1_odd@PLT
+',`
+       call    GSYM_PREFIX`'mpn_modexact_1_odd
+')
+
+       movl    %esi, %edx              C y odd
+       movl    SAVE_ESI, %esi
+
+       movl    %edi, %ebx              C common twos
+       movl    SAVE_EDI, %edi
+
+       addl    $eval(FRAME - FRAME_TWO_OR_MORE), %esp
+       orl     %eax, %eax
+
+       leal    1(%eax), %ecx
+       jnz     L(strip_x_and)
+
+
+       movl    %ebx, %ecx
+       movl    %edx, %eax
+
+       shll    %cl, %eax
+       popl    %ebx
+
+       ret
+
+
+ifdef(`PIC',`
+L(movl_eip_ebx):
+       movl    (%esp), %ebx
+       ret_internal
+')
+
+EPILOGUE()
diff --git a/mpn/x86/k6/gmp-mparam.h b/mpn/x86/k6/gmp-mparam.h

new file mode 100644 (file)

index 0000000..168ea06
--- /dev/null
+++ b/mpn/x86/k6/gmp-mparam.h
@@ -0,0 +1,155 @@
+/* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2009, 2010
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* 450MHz K6-2 */
+
+#define MOD_1_NORM_THRESHOLD                12
+#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         28
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         18
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     82
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                69
+#define MUL_TOOM44_THRESHOLD               106
+#define MUL_TOOM6H_THRESHOLD               157
+#define MUL_TOOM8H_THRESHOLD               199
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      64
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 32
+#define SQR_TOOM3_THRESHOLD                 97
+#define SQR_TOOM4_THRESHOLD                143
+#define SQR_TOOM6_THRESHOLD                222
+#define SQR_TOOM8_THRESHOLD                272
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    476, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     11, 5}, {     23, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     17, 6}, \
+    {     35, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    167,10}, {     95, 9}, {    191,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
+    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543,10}, \
+    {    287,11}, {    159,10}, {    351,11}, {    191,10}, \
+    {    415, 9}, {    831,11}, {    223,12}, {    127,11}, \
+    {    255,10}, {    543,11}, {    287,10}, {    575,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    415,10}, \
+    {    831,13}, {    127,12}, {    255,11}, {    543,10}, \
+    {   1087,11}, {    575,12}, {    319,11}, {    703,12}, \
+    {    383,11}, {    831,12}, {    447,11}, {    895,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1151,12}, {    703,13}, {    383,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1215,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD                 7424
+
+#define SQR_FFT_MODF_THRESHOLD             432  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    432, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 7}, {     93, 8}, {     47, 7}, \
+    {     95, 8}, {     51,10}, {     15, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     71, 8}, \
+    {    143, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    167,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287, 8}, \
+    {    575,10}, {    159, 9}, {    319,11}, {     95,10}, \
+    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    351, 9}, \
+    {    703,11}, {    191,10}, {    415,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
+    {    607,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    415,10}, {    831,13}, \
+    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
+    {    607,12}, {    319,11}, {    703,12}, {    383,11}, \
+    {    831,12}, {    447,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1215,12}, {    703,13}, \
+    {    383,12}, {    895,14}, {    255,13}, {    511,12}, \
+    {   1215,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 112
+#define SQR_FFT_THRESHOLD                 7040
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  60
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 78
+#define DC_DIVAPPR_Q_THRESHOLD             252
+#define DC_BDIV_QR_THRESHOLD                84
+#define DC_BDIV_Q_THRESHOLD                171
+
+#define INV_MULMOD_BNM1_THRESHOLD           55
+#define INV_NEWTON_THRESHOLD               234
+#define INV_APPR_THRESHOLD                 236
+
+#define BINV_NEWTON_THRESHOLD              268
+#define REDC_1_TO_REDC_N_THRESHOLD          67
+
+#define MU_DIV_QR_THRESHOLD               1308
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD              134
+#define MU_BDIV_QR_THRESHOLD              1164
+#define MU_BDIV_Q_THRESHOLD               1164
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     182
+#define GCD_DC_THRESHOLD                   591
+#define GCDEXT_DC_THRESHOLD                472
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                24
+#define GET_STR_PRECOMPUTE_THRESHOLD        40
+#define SET_STR_DC_THRESHOLD               834
+#define SET_STR_PRECOMPUTE_THRESHOLD      2042
diff --git a/mpn/x86/k6/k62mmx/copyd.asm b/mpn/x86/k6/k62mmx/copyd.asm

new file mode 100644 (file)

index 0000000..227ed78
--- /dev/null
+++ b/mpn/x86/k6/k62mmx/copyd.asm
@@ -0,0 +1,107 @@
+dnl  AMD K6-2 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6-2: 1.0 cycles/limb
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The loop here is no faster than a rep movsl at 1.0 c/l, but it avoids a 30
+C cycle startup time, which amounts for instance to a 2x speedup at 15
+C limbs.
+C
+C If dst is 4mod8 the loop would be 1.17 c/l, but that's avoided by
+C processing one limb separately to make it aligned.  This and a final odd
+C limb are handled in a branch-free fashion, ending up re-copying if the
+C special case isn't needed.
+C
+C Alternatives:
+C
+C There used to be a big unrolled version of this, running at 0.56 c/l if
+C the destination was aligned, but that seemed rather excessive for the
+C relative importance of copyd.
+C
+C If the destination alignment is ignored and just left to run at 1.17 c/l
+C some code size and a fixed few cycles can be saved.  Considering how few
+C uses copyd finds perhaps that should be favoured.  The current code has
+C the attraction of being no slower than a basic rep movsl though.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl  re-using parameter space
+define(SAVE_EBX,`PARAM_SIZE')
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_copyd)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    %ebx, SAVE_EBX
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %edx
+
+       subl    $1, %ecx                C better code alignment than decl
+       jb      L(zero)
+
+       jz      L(one_more)
+       leal    4(%edx,%ecx,4), %ebx
+
+Zdisp( movd,   0,(%eax,%ecx,4), %mm0)  C high limb
+Zdisp( movd,   %mm0, 0,(%edx,%ecx,4))  C Zdisp for good code alignment
+
+       cmpl    $1, %ecx
+       je      L(one_more)
+
+       shrl    $2, %ebx
+       andl    $1, %ebx                C 1 if dst[size-2] unaligned
+
+       subl    %ebx, %ecx
+       nop                             C code alignment
+
+L(top):
+       C eax   src
+       C ebx
+       C ecx   counter
+       C edx   dst
+
+       movq    -4(%eax,%ecx,4), %mm0
+       subl    $2, %ecx
+
+       movq    %mm0, 4(%edx,%ecx,4)
+       ja      L(top)
+
+
+L(one_more):
+       movd    (%eax), %mm0
+       movd    %mm0, (%edx)
+
+       movl    SAVE_EBX, %ebx
+       emms_or_femms
+L(zero):
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/k62mmx/lshift.asm b/mpn/x86/k6/k62mmx/lshift.asm

new file mode 100644 (file)

index 0000000..e48e73e
--- /dev/null
+++ b/mpn/x86/k6/k62mmx/lshift.asm
@@ -0,0 +1,283 @@
+dnl  AMD K6-2 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6-2: 1.75 cycles/limb
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+deflit(`FRAME',0)
+
+dnl  used after src has been fetched
+define(VAR_RETVAL,`PARAM_SRC')
+
+dnl  minimum 9, because unrolled loop can't handle less
+deflit(UNROLL_THRESHOLD, 9)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+       C The 1 limb case can be done without the push %ebx, but it's then
+       C still the same speed.  The push is left as a free helping hand for
+       C the two_or_more code.
+
+       movl    PARAM_SIZE, %eax
+       pushl   %ebx                    FRAME_pushl()
+
+       movl    PARAM_SRC, %ebx
+       decl    %eax
+
+       movl    PARAM_SHIFT, %ecx
+       jnz     L(two_or_more)
+
+       movl    (%ebx), %edx            C src limb
+       movl    PARAM_DST, %ebx
+
+       shldl(  %cl, %edx, %eax)        C return value
+
+       shll    %cl, %edx
+
+       movl    %edx, (%ebx)            C dst limb
+       popl    %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)       C avoid offset 0x1f
+L(two_or_more):
+       C eax   size-1
+       C ebx   src
+       C ecx   shift
+       C edx
+
+       movl    (%ebx,%eax,4), %edx     C src high limb
+       negl    %ecx
+
+       movd    PARAM_SHIFT, %mm6
+       addl    $32, %ecx               C 32-shift
+
+       shrl    %cl, %edx
+       cmpl    $UNROLL_THRESHOLD-1, %eax
+
+       movl    %edx, VAR_RETVAL
+       jae     L(unroll)
+
+
+       movd    %ecx, %mm7
+       movl    %eax, %ecx
+
+       movl    PARAM_DST, %eax
+
+L(simple):
+       C eax   dst
+       C ebx   src
+       C ecx   counter, size-1 to 1
+       C edx   retval
+       C
+       C mm0   scratch
+       C mm6   shift
+       C mm7   32-shift
+
+       movq    -4(%ebx,%ecx,4), %mm0
+
+       psrlq   %mm7, %mm0
+
+Zdisp( movd,   %mm0, 0,(%eax,%ecx,4))
+       loop    L(simple)
+
+
+       movd    (%ebx), %mm0
+       popl    %ebx
+
+       psllq   %mm6, %mm0
+
+       movd    %mm0, (%eax)
+       movl    %edx, %eax
+
+       femms
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll):
+       C eax   size-1
+       C ebx   src
+       C ecx   32-shift
+       C edx   retval (but instead VAR_RETVAL is used)
+       C
+       C mm6   shift
+
+       addl    $32, %ecx
+       movl    PARAM_DST, %edx
+
+       movd    %ecx, %mm7
+       subl    $7, %eax                        C size-8
+
+       leal    (%edx,%eax,4), %ecx             C alignment of dst
+
+       movq    32-8(%ebx,%eax,4), %mm2         C src high qword
+       testb   $4, %cl
+
+       jz      L(dst_aligned)
+       psllq   %mm6, %mm2
+
+       psrlq   $32, %mm2
+       decl    %eax
+
+       movd    %mm2, 32(%edx,%eax,4)           C dst high limb
+       movq    32-8(%ebx,%eax,4), %mm2         C new src high qword
+L(dst_aligned):
+
+       movq    32-16(%ebx,%eax,4), %mm0        C src second highest qword
+
+
+       C This loop is the important bit, the rest is just support for it.
+       C Four src limbs are held at the start, and four more will be read.
+       C Four dst limbs will be written.  This schedule seems necessary for
+       C full speed.
+       C
+       C The use of size-8 lets the loop stop when %eax goes negative and
+       C leaves -4 to -1 which can be tested with test $1 and $2.
+
+L(top):
+       C eax   counter, size-8 step by -4 until <0
+       C ebx   src
+       C ecx
+       C edx   dst
+       C
+       C mm0   src next qword
+       C mm1   scratch
+       C mm2   src prev qword
+       C mm6   shift
+       C mm7   64-shift
+
+       psllq   %mm6, %mm2
+       subl    $4, %eax
+
+       movq    %mm0, %mm1
+       psrlq   %mm7, %mm0
+
+       por     %mm0, %mm2
+       movq    24(%ebx,%eax,4), %mm0
+
+       psllq   %mm6, %mm1
+       movq    %mm2, 40(%edx,%eax,4)
+
+       movq    %mm0, %mm2
+       psrlq   %mm7, %mm0
+
+       por     %mm0, %mm1
+       movq    16(%ebx,%eax,4), %mm0
+
+       movq    %mm1, 32(%edx,%eax,4)
+       jnc     L(top)
+
+
+       C Now have four limbs in mm2 (prev) and mm0 (next), plus eax mod 4.
+       C
+       C 8(%ebx) is the next source, and 24(%edx) is the next destination.
+       C %eax is between -4 and -1, representing respectively 0 to 3 extra
+       C limbs that must be read.
+
+
+       testl   $2, %eax        C testl to avoid bad cache line crossing
+       jz      L(finish_nottwo)
+
+       C Two more limbs: lshift mm2, OR it with rshifted mm0, mm0 becomes
+       C new mm2 and a new mm0 is loaded.
+
+       psllq   %mm6, %mm2
+       movq    %mm0, %mm1
+
+       psrlq   %mm7, %mm0
+       subl    $2, %eax
+
+       por     %mm0, %mm2
+       movq    16(%ebx,%eax,4), %mm0
+
+       movq    %mm2, 32(%edx,%eax,4)
+       movq    %mm1, %mm2
+L(finish_nottwo):
+
+
+       C lshift mm2, OR with rshifted mm0, mm1 becomes lshifted mm0
+
+       testb   $1, %al
+       psllq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psrlq   %mm7, %mm0
+
+       por     %mm0, %mm2
+       psllq   %mm6, %mm1
+
+       movq    %mm2, 24(%edx,%eax,4)
+       jz      L(finish_even)
+
+
+       C Size is odd, so mm1 and one extra limb to process.
+
+       movd    (%ebx), %mm0            C src[0]
+       popl    %ebx
+deflit(`FRAME',0)
+
+       movq    %mm0, %mm2
+       psllq   $32, %mm0
+
+       psrlq   %mm7, %mm0
+
+       psllq   %mm6, %mm2
+       por     %mm0, %mm1
+
+       movq    %mm1, 4(%edx)           C dst[1,2]
+       movd    %mm2, (%edx)            C dst[0]
+
+       movl    VAR_RETVAL, %eax
+
+       femms
+       ret
+
+
+       nop     C avoid bad cache line crossing
+L(finish_even):
+deflit(`FRAME',4)
+       C Size is even, so only mm1 left to process.
+
+       movq    %mm1, (%edx)            C dst[0,1]
+       movl    VAR_RETVAL, %eax
+
+       popl    %ebx
+       femms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/k62mmx/rshift.asm b/mpn/x86/k6/k62mmx/rshift.asm

new file mode 100644 (file)

index 0000000..b3114d0
--- /dev/null
+++ b/mpn/x86/k6/k62mmx/rshift.asm
@@ -0,0 +1,282 @@
+dnl  AMD K6-2 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6-2: 1.75 cycles/limb
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+deflit(`FRAME',0)
+
+dnl  Minimum 9, because the unrolled loop can't handle less.
+dnl
+deflit(UNROLL_THRESHOLD, 9)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+       C The 1 limb case can be done without the push %ebx, but it's then
+       C still the same speed.  The push is left as a free helping hand for
+       C the two_or_more code.
+
+       movl    PARAM_SIZE, %eax
+       pushl   %ebx                    FRAME_pushl()
+
+       movl    PARAM_SRC, %ebx
+       decl    %eax
+
+       movl    PARAM_SHIFT, %ecx
+       jnz     L(two_or_more)
+
+       movl    (%ebx), %edx            C src limb
+       movl    PARAM_DST, %ebx
+
+       shrdl(  %cl, %edx, %eax)        C return value
+
+       shrl    %cl, %edx
+
+       movl    %edx, (%ebx)            C dst limb
+       popl    %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)       C avoid offset 0x1f
+L(two_or_more):
+       C eax   size-1
+       C ebx   src
+       C ecx   shift
+       C edx
+
+       movl    (%ebx), %edx    C src low limb
+       negl    %ecx
+
+       addl    $32, %ecx
+       movd    PARAM_SHIFT, %mm6
+
+       shll    %cl, %edx
+       cmpl    $UNROLL_THRESHOLD-1, %eax
+
+       jae     L(unroll)
+
+
+       C eax   size-1
+       C ebx   src
+       C ecx   32-shift
+       C edx   retval
+       C
+       C mm6   shift
+
+       movl    PARAM_DST, %ecx
+       leal    (%ebx,%eax,4), %ebx
+
+       leal    -4(%ecx,%eax,4), %ecx
+       negl    %eax
+
+       C This loop runs at about 3 cycles/limb, which is the amount of
+       C decoding, and this is despite every second access being unaligned.
+
+L(simple):
+       C eax   counter, -(size-1) to -1
+       C ebx   &src[size-1]
+       C ecx   &dst[size-1]
+       C edx   retval
+       C
+       C mm0   scratch
+       C mm6   shift
+
+Zdisp( movq,   0,(%ebx,%eax,4), %mm0)
+       incl    %eax
+
+       psrlq   %mm6, %mm0
+
+Zdisp( movd,   %mm0, 0,(%ecx,%eax,4))
+       jnz     L(simple)
+
+
+       movq    %mm0, (%ecx)
+       movl    %edx, %eax
+
+       popl    %ebx
+
+       femms
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll):
+       C eax   size-1
+       C ebx   src
+       C ecx   32-shift
+       C edx   retval
+       C
+       C mm6   shift
+
+       addl    $32, %ecx
+       subl    $7, %eax                C size-8
+
+       movd    %ecx, %mm7
+       movl    PARAM_DST, %ecx
+
+       movq    (%ebx), %mm2            C src low qword
+       leal    (%ebx,%eax,4), %ebx     C src end - 32
+
+       testb   $4, %cl
+       leal    (%ecx,%eax,4), %ecx     C dst end - 32
+
+       notl    %eax                    C -(size-7)
+       jz      L(dst_aligned)
+
+       psrlq   %mm6, %mm2
+       incl    %eax
+
+Zdisp( movd,   %mm2, 0,(%ecx,%eax,4))  C dst low limb
+       movq    4(%ebx,%eax,4), %mm2    C new src low qword
+L(dst_aligned):
+
+       movq    12(%ebx,%eax,4), %mm0   C src second lowest qword
+       nop     C avoid bad cache line crossing
+
+
+       C This loop is the important bit, the rest is just support for it.
+       C Four src limbs are held at the start, and four more will be read.
+       C Four dst limbs will be written.  This schedule seems necessary for
+       C full speed.
+       C
+       C The use of -(size-7) lets the loop stop when %eax becomes >= 0 and
+       C and leaves 0 to 3 which can be tested with test $1 and $2.
+
+L(top):
+       C eax   counter, -(size-7) step by +4 until >=0
+       C ebx   src end - 32
+       C ecx   dst end - 32
+       C edx   retval
+       C
+       C mm0   src next qword
+       C mm1   scratch
+       C mm2   src prev qword
+       C mm6   shift
+       C mm7   64-shift
+
+       psrlq   %mm6, %mm2
+       addl    $4, %eax
+
+       movq    %mm0, %mm1
+       psllq   %mm7, %mm0
+
+       por     %mm0, %mm2
+       movq    4(%ebx,%eax,4), %mm0
+
+       psrlq   %mm6, %mm1
+       movq    %mm2, -12(%ecx,%eax,4)
+
+       movq    %mm0, %mm2
+       psllq   %mm7, %mm0
+
+       por     %mm0, %mm1
+       movq    12(%ebx,%eax,4), %mm0
+
+       movq    %mm1, -4(%ecx,%eax,4)
+       ja      L(top)          C jump if no carry and not zero
+
+
+
+       C Now have the four limbs in mm2 (low) and mm0 (high), and %eax is 0
+       C to 3 representing respectively 3 to 0 further limbs.
+
+       testl   $2, %eax        C testl to avoid bad cache line crossings
+       jnz     L(finish_nottwo)
+
+       C Two or three extra limbs: rshift mm2, OR it with lshifted mm0, mm0
+       C becomes new mm2 and a new mm0 is loaded.
+
+       psrlq   %mm6, %mm2
+       movq    %mm0, %mm1
+
+       psllq   %mm7, %mm0
+       addl    $2, %eax
+
+       por     %mm0, %mm2
+       movq    12(%ebx,%eax,4), %mm0
+
+       movq    %mm2, -4(%ecx,%eax,4)
+       movq    %mm1, %mm2
+L(finish_nottwo):
+
+
+       testb   $1, %al
+       psrlq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psllq   %mm7, %mm0
+
+       por     %mm0, %mm2
+       psrlq   %mm6, %mm1
+
+       movq    %mm2, 4(%ecx,%eax,4)
+       jnz     L(finish_even)
+
+
+       C one further extra limb to process
+
+       movd    32-4(%ebx), %mm0        C src[size-1], most significant limb
+       popl    %ebx
+
+       movq    %mm0, %mm2
+       psllq   %mm7, %mm0
+
+       por     %mm0, %mm1
+       psrlq   %mm6, %mm2
+
+       movq    %mm1, 32-12(%ecx)       C dst[size-3,size-2]
+       movd    %mm2, 32-4(%ecx)        C dst[size-1]
+
+       movl    %edx, %eax              C retval
+
+       femms
+       ret
+
+
+       nop     C avoid bad cache line crossing
+L(finish_even):
+       C no further extra limbs
+
+       movq    %mm1, 32-8(%ecx)        C dst[size-2,size-1]
+       movl    %edx, %eax              C retval
+
+       popl    %ebx
+
+       femms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mmx/com.asm b/mpn/x86/k6/mmx/com.asm

new file mode 100644 (file)

index 0000000..3dcf539
--- /dev/null
+++ b/mpn/x86/k6/mmx/com.asm
@@ -0,0 +1,92 @@
+dnl  AMD K6-2 mpn_com -- mpn bitwise one's complement.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+NAILS_SUPPORT(0-31)
+
+
+C    alignment dst/src, A=0mod8 N=4mod8
+C       A/A   A/N   N/A   N/N
+C K6-2  1.0   1.18  1.18  1.18  cycles/limb
+C K6    1.5   1.85  1.75  1.85
+
+
+C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Take the bitwise ones-complement of src,size and write it to dst,size.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_com)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %edx
+       shrl    %ecx
+       jnz     L(two_or_more)
+
+       movl    (%eax), %eax
+       notl_or_xorl_GMP_NUMB_MASK(     %eax)
+       movl    %eax, (%edx)
+       ret
+
+
+L(two_or_more):
+       pushl   %ebx    FRAME_pushl()
+       pcmpeqd %mm7, %mm7              C all ones
+
+       movl    %ecx, %ebx
+ifelse(GMP_NAIL_BITS,0,,
+`      psrld   $GMP_NAIL_BITS, %mm7')  C clear nails
+
+
+
+       ALIGN(8)
+L(top):
+       C eax   src
+       C ebx   floor(size/2)
+       C ecx   counter
+       C edx   dst
+       C
+       C mm0   scratch
+       C mm7   mask
+
+       movq    -8(%eax,%ecx,8), %mm0
+       pxor    %mm7, %mm0
+       movq    %mm0, -8(%edx,%ecx,8)
+       loop    L(top)
+
+
+       jnc     L(no_extra)
+       movl    (%eax,%ebx,8), %eax
+       notl_or_xorl_GMP_NUMB_MASK(     %eax)
+       movl    %eax, (%edx,%ebx,8)
+L(no_extra):
+
+       popl    %ebx
+       emms_or_femms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mmx/dive_1.asm b/mpn/x86/k6/mmx/dive_1.asm

new file mode 100644 (file)

index 0000000..f169bfc
--- /dev/null
+++ b/mpn/x86/k6/mmx/dive_1.asm
@@ -0,0 +1,270 @@
+dnl  AMD K6 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         divisor
+C       odd   even
+C K6:   10.0  12.0  cycles/limb
+C K6-2: 10.0  11.5
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t divisor);
+C
+C A simple divl is used for size==1.  This is about 10 cycles faster for an
+C odd divisor or 20 cycles for an even divisor.
+C
+C The loops are quite sensitive to code alignment, speeds should be
+C rechecked (odd and even divisor, pic and non-pic) if contemplating
+C changing anything.
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_DST')
+
+       TEXT
+
+       ALIGN(32)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+
+       movl    PARAM_SRC, %eax
+       xorl    %edx, %edx
+
+       cmpl    $1, %ecx
+       jnz     L(two_or_more)
+
+       movl    (%eax), %eax
+
+       divl    PARAM_DIVISOR
+
+       movl    PARAM_DST, %ecx
+       movl    %eax, (%ecx)
+
+       ret
+
+
+L(two_or_more):
+       movl    PARAM_DIVISOR, %eax
+       pushl   %ebx            FRAME_pushl()
+
+       movl    PARAM_SRC, %ebx
+       pushl   %ebp            FRAME_pushl()
+
+L(strip_twos):
+       shrl    %eax
+       incl    %edx                    C will get shift+1
+
+       jnc     L(strip_twos)
+       pushl   %esi            FRAME_pushl()
+
+       leal    1(%eax,%eax), %esi      C d without twos
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %ebp)
+Zdisp( movzbl, 0,(%eax,%ebp), %eax)
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+       pushl   %edi            FRAME_pushl()
+
+       leal    (%eax,%eax), %ebp       C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+
+       movl    PARAM_DST, %edi
+
+       imull   %esi, %eax              C inv*inv*d
+
+       subl    %eax, %ebp              C inv = 2*inv - inv*inv*d
+       leal    (%ebp,%ebp), %eax       C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+
+       movl    %esi, PARAM_DIVISOR     C d without twos
+       leal    (%ebx,%ecx,4), %ebx     C src end
+
+       imull   %esi, %ebp              C inv*inv*d
+
+       leal    (%edi,%ecx,4), %edi     C dst end
+       negl    %ecx                    C -size
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       subl    $1, %edx                C shift amount, and clear carry
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       movl    %eax, VAR_INVERSE
+       jnz     L(even)
+
+       movl    (%ebx,%ecx,4), %esi     C src low limb
+       jmp     L(odd_entry)
+
+
+       ALIGN(16)
+       nop     C code alignment
+L(odd_top):
+       C eax   scratch
+       C ebx   src end
+       C ecx   counter, limbs, negative
+       C edx   inverse
+       C esi   next limb, adjusted for carry
+       C edi   dst end
+       C ebp   carry bit, 0 or -1
+
+       imull   %edx, %esi
+
+       movl    PARAM_DIVISOR, %eax
+       movl    %esi, -4(%edi,%ecx,4)
+
+       mull    %esi                    C carry limb in edx
+
+       subl    %ebp, %edx              C apply carry bit
+       movl    (%ebx,%ecx,4), %esi
+
+L(odd_entry):
+       subl    %edx, %esi              C apply carry limb
+       movl    VAR_INVERSE, %edx
+
+       sbbl    %ebp, %ebp              C 0 or -1
+
+       incl    %ecx
+       jnz     L(odd_top)
+
+
+       imull   %edx, %esi
+
+       movl    %esi, -4(%edi,%ecx,4)
+
+       popl    %edi
+       popl    %esi
+
+       popl    %ebp
+       popl    %ebx
+
+       ret
+
+
+L(even):
+       C eax
+       C ebx   src end
+       C ecx   -size
+       C edx   twos
+       C esi
+       C edi   dst end
+       C ebp
+
+       xorl    %ebp, %ebp
+Zdisp( movq,   0,(%ebx,%ecx,4), %mm0)  C src[0,1]
+
+       movd    %edx, %mm7
+       movl    VAR_INVERSE, %edx
+
+       addl    $2, %ecx
+       psrlq   %mm7, %mm0
+
+       movd    %mm0, %esi
+       jz      L(even_two)             C if only two limbs
+
+
+C Out-of-order execution is good enough to hide the load/rshift/movd
+C latency.  Having imul at the top of the loop gives 11.5 c/l instead of 12,
+C on K6-2.  In fact there's only 11 of decode, but nothing running at 11 has
+C been found.  Maybe the fact every second movq is unaligned costs the extra
+C 0.5.
+
+L(even_top):
+       C eax   scratch
+       C ebx   src end
+       C ecx   counter, limbs, negative
+       C edx   inverse
+       C esi   next limb, adjusted for carry
+       C edi   dst end
+       C ebp   carry bit, 0 or -1
+       C
+       C mm0   scratch, source limbs
+       C mm7   twos
+
+       imull   %edx, %esi
+
+       movl    %esi, -8(%edi,%ecx,4)
+       movl    PARAM_DIVISOR, %eax
+
+       mull    %esi                    C carry limb in edx
+
+       movq    -4(%ebx,%ecx,4), %mm0
+       psrlq   %mm7, %mm0
+
+       movd    %mm0, %esi
+       subl    %ebp, %edx              C apply carry bit
+
+       subl    %edx, %esi              C apply carry limb
+       movl    VAR_INVERSE, %edx
+
+       sbbl    %ebp, %ebp              C 0 or -1
+
+       incl    %ecx
+       jnz     L(even_top)
+
+
+L(even_two):
+       movd    -4(%ebx), %mm0          C src high limb
+       psrlq   %mm7, %mm0
+
+       imull   %edx, %esi
+
+       movl    %esi, -8(%edi)
+       movl    PARAM_DIVISOR, %eax
+
+       mull    %esi                    C carry limb in edx
+
+       movd    %mm0, %esi
+       subl    %ebp, %edx              C apply carry bit
+
+       movl    VAR_INVERSE, %eax
+       subl    %edx, %esi              C apply carry limb
+
+       imull   %eax, %esi
+
+       movl    %esi, -4(%edi)
+
+       popl    %edi
+       popl    %esi
+
+       popl    %ebp
+       popl    %ebx
+
+       emms_or_femms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mmx/logops_n.asm b/mpn/x86/k6/mmx/logops_n.asm

new file mode 100644 (file)

index 0000000..a627213
--- /dev/null
+++ b/mpn/x86/k6/mmx/logops_n.asm
@@ -0,0 +1,215 @@
+dnl  AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
+dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+NAILS_SUPPORT(0-31)
+
+
+C         alignment dst/src1/src2, A=0mod8, N=4mod8
+C      A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
+C
+C K6-2  1.2   1.5   1.5   1.2   1.2   1.5   1.5   1.2   and,andn,ior,xor
+C K6-2  1.5   1.75  2.0   1.75  1.75  2.0   1.75  1.5   iorn,xnor
+C K6-2  1.75  2.0   2.0   2.0   2.0   2.0   2.0   1.75  nand,nior
+C
+C K6    1.5   1.68  1.75  1.2   1.75  1.75  1.68  1.5   and,andn,ior,xor
+C K6    2.0   2.0   2.25  2.25  2.25  2.25  2.0   2.0   iorn,xnor
+C K6    2.0   2.25  2.25  2.25  2.25  2.25  2.25  2.0   nand,nior
+
+
+dnl  M4_p and M4_i are the MMX and integer instructions
+dnl  M4_*_neg_dst means whether to negate the final result before writing
+dnl  M4_*_neg_src2 means whether to negate the src2 values before using them
+
+define(M4_choose_op,
+m4_assert_numargs(7)
+`ifdef(`OPERATION_$1',`
+define(`M4_function',  `mpn_$1')
+define(`M4_operation', `$1')
+define(`M4_p',         `$2')
+define(`M4_p_neg_dst', `$3')
+define(`M4_p_neg_src2',`$4')
+define(`M4_i',         `$5')
+define(`M4_i_neg_dst', `$6')
+define(`M4_i_neg_src2',`$7')
+')')
+
+dnl  xnor is done in "iorn" style because it's a touch faster than "nior"
+dnl  style (the two are equivalent for xor).
+dnl
+dnl  pandn can't be used with nails.
+
+M4_choose_op( and_n,  pand,0,0,  andl,0,0)
+ifelse(GMP_NAIL_BITS,0,
+`M4_choose_op(andn_n, pandn,0,0, andl,0,1)',
+`M4_choose_op(andn_n, pand,0,1,  andl,0,1)')
+M4_choose_op( nand_n, pand,1,0,  andl,1,0)
+M4_choose_op( ior_n,  por,0,0,   orl,0,0)
+M4_choose_op( iorn_n, por,0,1,   orl,0,1)
+M4_choose_op( nior_n, por,1,0,   orl,1,0)
+M4_choose_op( xor_n,  pxor,0,0,  xorl,0,0)
+M4_choose_op( xnor_n, pxor,0,1,  xorl,0,1)
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+
+C void M4_function (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                   mp_size_t size);
+C
+C Do src1,size M4_operation src2,size, storing the result in dst,size.
+C
+C Unaligned movq loads and stores are a bit slower than aligned ones.  The
+C test at the start of the routine checks the alignment of src1 and if
+C necessary processes one limb separately at the low end to make it aligned.
+C
+C The raw speeds without this alignment switch are as follows.
+C
+C           alignment dst/src1/src2, A=0mod8, N=4mod8
+C     A/A/A  A/A/N  A/N/A  A/N/N  N/A/A  N/A/N  N/N/A  N/N/N
+C
+C K6                 1.5    2.0                 1.5    2.0    and,andn,ior,xor
+C K6                 1.75   2.2                 2.0    2.28   iorn,xnor
+C K6                 2.0    2.25                2.35   2.28   nand,nior
+C
+C
+C Future:
+C
+C K6 can do one 64-bit load per cycle so each of these routines should be
+C able to approach 1.0 c/l, if aligned.  The basic and/andn/ior/xor might be
+C able to get 1.0 with just a 4 limb loop, being 3 instructions per 2 limbs.
+C The others are 4 instructions per 2 limbs, and so can only approach 1.0
+C because there's nowhere to hide some loop control.
+
+defframe(PARAM_SIZE,16)
+defframe(PARAM_SRC2,12)
+defframe(PARAM_SRC1,8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(M4_function)
+                       movl    PARAM_SIZE, %ecx
+                       pushl   %ebx            FRAME_pushl()
+
+                       movl    PARAM_SRC1, %eax
+
+                       movl    PARAM_SRC2, %ebx
+                       cmpl    $1, %ecx
+
+                       movl    PARAM_DST, %edx
+                       ja      L(two_or_more)
+
+
+                       movl    (%ebx), %ecx
+                       popl    %ebx
+ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK(    %ecx)')
+                       M4_i    (%eax), %ecx
+ifelse(M4_i_neg_dst,1,`        notl_or_xorl_GMP_NUMB_MASK(     %ecx)')
+                       movl    %ecx, (%edx)
+
+                       ret
+
+
+L(two_or_more):
+                       C eax   src1
+                       C ebx   src2
+                       C ecx   size
+                       C edx   dst
+                       C esi
+                       C edi
+                       C ebp
+
+                       pushl   %esi            FRAME_pushl()
+                       testl   $4, %eax
+                       jz      L(alignment_ok)
+
+                       movl    (%ebx), %esi
+                       addl    $4, %ebx
+ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK(    %esi)')
+                       M4_i    (%eax), %esi
+                       addl    $4, %eax
+ifelse(M4_i_neg_dst,1,`        notl_or_xorl_GMP_NUMB_MASK(     %esi)')
+                       movl    %esi, (%edx)
+                       addl    $4, %edx
+                       decl    %ecx
+
+L(alignment_ok):
+                       movl    %ecx, %esi
+                       shrl    %ecx
+                       jnz     L(still_two_or_more)
+
+                       movl    (%ebx), %ecx
+                       popl    %esi
+ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK(    %ecx)')
+                       M4_i    (%eax), %ecx
+ifelse(M4_i_neg_dst,1,`        notl_or_xorl_GMP_NUMB_MASK(     %ecx)')
+                       popl    %ebx
+                       movl    %ecx, (%edx)
+                       ret
+
+
+L(still_two_or_more):
+ifelse(eval(M4_p_neg_src2 || M4_p_neg_dst),1,`
+                       pcmpeqd %mm7, %mm7              C all ones
+ifelse(GMP_NAIL_BITS,0,,`psrld $GMP_NAIL_BITS, %mm7')  C clear nails
+')
+
+                       ALIGN(16)
+L(top):
+                       C eax   src1
+                       C ebx   src2
+                       C ecx   counter
+                       C edx   dst
+                       C esi
+                       C edi
+                       C ebp
+                       C
+                       C carry bit is low of size
+
+                       movq    -8(%ebx,%ecx,8), %mm0
+ifelse(M4_p_neg_src2,1,`pxor   %mm7, %mm0')
+                       M4_p    -8(%eax,%ecx,8), %mm0
+ifelse(M4_p_neg_dst,1,`        pxor    %mm7, %mm0')
+                       movq    %mm0, -8(%edx,%ecx,8)
+
+                       loop    L(top)
+
+
+                       jnc     L(no_extra)
+
+                       movl    -4(%ebx,%esi,4), %ebx
+ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK(    %ebx)')
+                       M4_i    -4(%eax,%esi,4), %ebx
+ifelse(M4_i_neg_dst,1,`        notl_or_xorl_GMP_NUMB_MASK(     %ebx)')
+                       movl    %ebx, -4(%edx,%esi,4)
+L(no_extra):
+
+                       popl    %esi
+                       popl    %ebx
+                       emms_or_femms
+                       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mmx/lshift.asm b/mpn/x86/k6/mmx/lshift.asm

new file mode 100644 (file)

index 0000000..1492025
--- /dev/null
+++ b/mpn/x86/k6/mmx/lshift.asm
@@ -0,0 +1,119 @@
+dnl  AMD K6 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 3.0 cycles/limb
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx
+C instructions.  This is despite every second fetch being unaligned.
+
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+       C The 1 limb case can be done without the push %ebx, but it's then
+       C still the same speed.  The push is left as a free helping hand for
+       C the two_or_more code.
+
+       movl    PARAM_SIZE, %eax
+       pushl   %ebx                    FRAME_pushl()
+
+       movl    PARAM_SRC, %ebx
+       decl    %eax
+
+       movl    PARAM_SHIFT, %ecx
+       jnz     L(two_or_more)
+
+       movl    (%ebx), %edx            C src limb
+       movl    PARAM_DST, %ebx
+
+       shldl(  %cl, %edx, %eax)        C return value
+
+       shll    %cl, %edx
+
+       movl    %edx, (%ebx)            C dst limb
+       popl    %ebx
+
+       ret
+
+
+       ALIGN(16)       C avoid offset 0x1f
+       nop             C avoid bad cache line crossing
+L(two_or_more):
+       C eax   size-1
+       C ebx   src
+       C ecx   shift
+       C edx
+
+       movl    (%ebx,%eax,4), %edx     C src high limb
+       negl    %ecx
+
+       movd    PARAM_SHIFT, %mm6
+       addl    $32, %ecx               C 32-shift
+
+       shrl    %cl, %edx
+
+       movd    %ecx, %mm7
+       movl    PARAM_DST, %ecx
+
+L(top):
+       C eax   counter, size-1 to 1
+       C ebx   src
+       C ecx   dst
+       C edx   retval
+       C
+       C mm0   scratch
+       C mm6   shift
+       C mm7   32-shift
+
+       movq    -4(%ebx,%eax,4), %mm0
+       decl    %eax
+
+       psrlq   %mm7, %mm0
+
+       movd    %mm0, 4(%ecx,%eax,4)
+       jnz     L(top)
+
+
+       movd    (%ebx), %mm0
+       popl    %ebx
+
+       psllq   %mm6, %mm0
+       movl    %edx, %eax
+
+       movd    %mm0, (%ecx)
+
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mmx/popham.asm b/mpn/x86/k6/mmx/popham.asm

new file mode 100644 (file)

index 0000000..a0a651d
--- /dev/null
+++ b/mpn/x86/k6/mmx/popham.asm
@@ -0,0 +1,225 @@
+dnl  AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and
+dnl  hamming distance.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C        popcount  hamdist
+C K6-2:    9.0       11.5   cycles/limb
+C K6:      12.5      13.0
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C The code here isn't optimal, but it's already a 2x speedup over the plain
+C integer mpn/generic/popcount.c,hamdist.c.
+
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist
+')m4exit(1)')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC2,   8)
+defframe(PARAM_SRC,    4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+       dnl  non-PIC
+
+       RODATA
+       ALIGN(8)
+
+L(rodata_AAAAAAAAAAAAAAAA):
+       .long   0xAAAAAAAA
+       .long   0xAAAAAAAA
+
+L(rodata_3333333333333333):
+       .long   0x33333333
+       .long   0x33333333
+
+L(rodata_0F0F0F0F0F0F0F0F):
+       .long   0x0F0F0F0F
+       .long   0x0F0F0F0F
+
+L(rodata_000000FF000000FF):
+       .long   0x000000FF
+       .long   0x000000FF
+')
+
+       TEXT
+       ALIGN(32)
+
+POP(`ifdef(`PIC', `
+       C avoid shrl crossing a 32-byte boundary
+       nop')')
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+
+ifdef(`PIC',`
+       movl    $0xAAAAAAAA, %eax
+       movl    $0x33333333, %edx
+
+       movd    %eax, %mm7
+       movd    %edx, %mm6
+
+       movl    $0x0F0F0F0F, %eax
+       movl    $0x000000FF, %edx
+
+       punpckldq %mm7, %mm7
+       punpckldq %mm6, %mm6
+
+       movd    %eax, %mm5
+       movd    %edx, %mm4
+
+       punpckldq %mm5, %mm5
+       punpckldq %mm4, %mm4
+',`
+
+       movq    L(rodata_AAAAAAAAAAAAAAAA), %mm7
+       movq    L(rodata_3333333333333333), %mm6
+       movq    L(rodata_0F0F0F0F0F0F0F0F), %mm5
+       movq    L(rodata_000000FF000000FF), %mm4
+')
+
+define(REG_AAAAAAAAAAAAAAAA, %mm7)
+define(REG_3333333333333333, %mm6)
+define(REG_0F0F0F0F0F0F0F0F, %mm5)
+define(REG_000000FF000000FF, %mm4)
+
+
+       movl    PARAM_SRC, %eax
+HAM(`  movl    PARAM_SRC2, %edx')
+
+       pxor    %mm2, %mm2      C total
+
+       shrl    %ecx
+       jnc     L(top)
+
+Zdisp( movd,   0,(%eax,%ecx,8), %mm1)
+
+HAM(`
+Zdisp( movd,   0,(%edx,%ecx,8), %mm0)
+       pxor    %mm0, %mm1
+')
+
+       incl    %ecx
+       jmp     L(loaded)
+
+
+       ALIGN(16)
+POP(`  nop     C alignment to avoid crossing 32-byte boundaries')
+
+L(top):
+       C eax   src
+       C ebx
+       C ecx   counter, qwords, decrementing
+       C edx   [hamdist] src2
+       C
+       C mm0   (scratch)
+       C mm1   (scratch)
+       C mm2   total (low dword)
+       C mm3
+       C mm4   \
+       C mm5   | special constants
+       C mm6   |
+       C mm7   /
+
+       movq    -8(%eax,%ecx,8), %mm1
+HAM(`  pxor    -8(%edx,%ecx,8), %mm1')
+
+L(loaded):
+       movq    %mm1, %mm0
+       pand    REG_AAAAAAAAAAAAAAAA, %mm1
+
+       psrlq   $1, %mm1
+HAM(`  nop                     C code alignment')
+
+       psubd   %mm1, %mm0      C bit pairs
+HAM(`  nop                     C code alignment')
+
+
+       movq    %mm0, %mm1
+       psrlq   $2, %mm0
+
+       pand    REG_3333333333333333, %mm0
+       pand    REG_3333333333333333, %mm1
+
+       paddd   %mm1, %mm0      C nibbles
+
+
+       movq    %mm0, %mm1
+       psrlq   $4, %mm0
+
+       pand    REG_0F0F0F0F0F0F0F0F, %mm0
+       pand    REG_0F0F0F0F0F0F0F0F, %mm1
+
+       paddd   %mm1, %mm0      C bytes
+
+       movq    %mm0, %mm1
+       psrlq   $8, %mm0
+
+
+       paddb   %mm1, %mm0      C words
+
+
+       movq    %mm0, %mm1
+       psrlq   $16, %mm0
+
+       paddd   %mm1, %mm0      C dwords
+
+       pand    REG_000000FF000000FF, %mm0
+
+       paddd   %mm0, %mm2      C low to total
+       psrlq   $32, %mm0
+
+       paddd   %mm0, %mm2      C high to total
+       loop    L(top)
+
+
+
+       movd    %mm2, %eax
+       emms_or_femms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mmx/rshift.asm b/mpn/x86/k6/mmx/rshift.asm

new file mode 100644 (file)

index 0000000..80cd6fb
--- /dev/null
+++ b/mpn/x86/k6/mmx/rshift.asm
@@ -0,0 +1,119 @@
+dnl  AMD K6 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 3.0 cycles/limb
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx
+C instructions.  This is despite every second fetch being unaligned.
+
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+deflit(`FRAME',0)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+       C The 1 limb case can be done without the push %ebx, but it's then
+       C still the same speed.  The push is left as a free helping hand for
+       C the two_or_more code.
+
+       movl    PARAM_SIZE, %eax
+       pushl   %ebx                    FRAME_pushl()
+
+       movl    PARAM_SRC, %ebx
+       decl    %eax
+
+       movl    PARAM_SHIFT, %ecx
+       jnz     L(two_or_more)
+
+       movl    (%ebx), %edx            C src limb
+       movl    PARAM_DST, %ebx
+
+       shrdl(  %cl, %edx, %eax)        C return value
+
+       shrl    %cl, %edx
+
+       movl    %edx, (%ebx)            C dst limb
+       popl    %ebx
+
+       ret
+
+
+       ALIGN(16)       C avoid offset 0x1f
+L(two_or_more):
+       C eax   size-1
+       C ebx   src
+       C ecx   shift
+       C edx
+
+       movl    (%ebx), %edx    C src low limb
+       negl    %ecx
+
+       addl    $32, %ecx       C 32-shift
+       movd    PARAM_SHIFT, %mm6
+
+       shll    %cl, %edx       C retval
+       movl    PARAM_DST, %ecx
+
+       leal    (%ebx,%eax,4), %ebx
+
+       leal    -4(%ecx,%eax,4), %ecx
+       negl    %eax
+
+
+L(simple):
+       C eax   counter (negative)
+       C ebx   &src[size-1]
+       C ecx   &dst[size-1]
+       C edx   retval
+       C
+       C mm0   scratch
+       C mm6   shift
+
+Zdisp( movq,   0,(%ebx,%eax,4), %mm0)
+       incl    %eax
+
+       psrlq   %mm6, %mm0
+
+Zdisp( movd,   %mm0, 0,(%ecx,%eax,4))
+       jnz     L(simple)
+
+
+       movq    %mm0, (%ecx)
+       movl    %edx, %eax
+
+       popl    %ebx
+
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mod_34lsub1.asm b/mpn/x86/k6/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..a5b7ee1
--- /dev/null
+++ b/mpn/x86/k6/mod_34lsub1.asm
@@ -0,0 +1,179 @@
+dnl  AMD K6 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 2.66 cycles/limb
+
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+C An attempt was made to use a loop like
+C
+C L(top):
+C      adcl    (%edx), %eax
+C      adcl    4(%edx), %ebx
+C      adcl    8(%edx), %esi
+C      leal    12(%edx), %edx
+C      loop    L(top)
+C
+C with %ecx starting from floor(size/3), but it still measured 2.66 c/l.
+C The form used instead can save about 6 cycles by not dividing by 3.
+C
+C In the code used, putting the "leal"s at the top of the loop is necessary
+C for the claimed speed, anywhere else costs an extra cycle per loop.
+C Perhaps a tight loop like this needs short decode instructions at the
+C branch target, which would explain the leal/loop form above taking 8
+C cycles instead of 7 too.
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX, `PARAM_SIZE')
+define(SAVE_ESI, `PARAM_SRC')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %eax
+       movl    PARAM_SRC, %edx
+
+       subl    $2, %eax
+       ja      L(three_or_more)
+
+Zdisp( movl,   0,(%edx), %eax)         C avoid code cache line boundary
+       jne     L(one)
+
+       movl    %eax, %ecx
+       movl    4(%edx), %edx
+
+       shrl    $24, %eax               C src[0] high
+       andl    $0x00FFFFFF, %ecx       C src[0] low
+
+       addl    %ecx, %eax
+       movl    %edx, %ecx
+
+       shll    $8, %edx
+       andl    $0x00FFFF00, %edx       C src[1] high
+
+       shrl    $16, %ecx               C src[1] low
+       addl    %ecx, %eax
+
+       addl    %edx, %eax
+
+L(one):
+       ret
+
+
+L(three_or_more):
+       C eax   size-2
+       C ebx
+       C ecx
+       C edx   src
+
+       movl    %ebx, SAVE_EBX
+       xorl    %ebx, %ebx
+
+       movl    %esi, SAVE_ESI
+       pushl   %edi    FRAME_pushl()
+
+       xorl    %esi, %esi
+       xorl    %edi, %edi              C and clear carry flag
+
+L(top):
+       C eax   counter, limbs
+       C ebx   acc 0mod3
+       C ecx
+       C edx   src, incrementing
+       C esi   acc 1mod3
+       C edi   acc 2mod3
+       C ebp
+
+       leal    -2(%eax), %eax
+       leal    12(%edx), %edx
+
+       adcl    -12(%edx), %ebx
+       adcl    -8(%edx), %esi
+       adcl    -4(%edx), %edi
+
+       decl    %eax
+       jg      L(top)
+
+
+       C ecx is -3, -2 or -1 representing 0, 1 or 2 more limbs, respectively
+
+       movb    $0, %cl
+       incl    %eax
+
+       js      L(combine)              C 0 more
+
+Zdisp( adcl,   0,(%edx), %ebx)         C avoid code cache line crossings
+
+       movb    $8, %cl
+       decl    %eax
+
+       js      L(combine)              C 1 more
+
+       adcl    4(%edx), %esi
+
+       movb    $16, %cl
+
+
+L(combine):
+       sbbl    %edx, %edx
+
+       shll    %cl, %edx               C carry
+       movl    %ebx, %eax              C 0mod3
+
+       shrl    $24, %eax               C 0mod3 high
+       andl    $0x00FFFFFF, %ebx       C 0mod3 low
+
+       subl    %edx, %eax              C apply carry
+       movl    %esi, %ecx              C 1mod3
+
+       shrl    $16, %esi               C 1mod3 high
+       addl    %ebx, %eax              C apply 0mod3 low
+
+       andl    $0x0000FFFF, %ecx
+       addl    %esi, %eax              C apply 1mod3 high
+
+       shll    $8, %ecx                C 1mod3 low
+       movl    %edi, %edx              C 2mod3
+
+       shrl    $8, %edx                C 2mod3 high
+       addl    %ecx, %eax              C apply 1mod3 low
+
+       addl    %edx, %eax              C apply 2mod3 high
+       andl    $0x000000FF, %edi
+
+       shll    $16, %edi               C 2mod3 low
+       movl    SAVE_EBX, %ebx
+
+       addl    %edi, %eax              C apply 2mod3 low
+       movl    SAVE_ESI, %esi
+
+       popl    %edi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mode1o.asm b/mpn/x86/k6/mode1o.asm

new file mode 100644 (file)

index 0000000..34f3536
--- /dev/null
+++ b/mpn/x86/k6/mode1o.asm
@@ -0,0 +1,164 @@
+dnl  AMD K6 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 10.0 cycles/limb
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C A special case for high<divisor at the end measured only about 4 cycles
+C faster, and so isn't used.
+C
+C A special case for size==1 using a divl rather than the inverse measured
+C only about 5 cycles faster, and so isn't used.  When size==1 and
+C high<divisor it can skip a division and be a full 24 cycles faster, but
+C this isn't an important case.
+
+defframe(PARAM_CARRY,  16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+
+       TEXT
+
+       ALIGN(32)
+PROLOGUE(mpn_modexact_1c_odd)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %ecx
+       pushl   %esi            FRAME_pushl()
+
+       movl    PARAM_CARRY, %edx
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1_odd)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %ecx
+       pushl   %esi            FRAME_pushl()
+
+       xorl    %edx, %edx
+L(start_1c):
+       pushl   %edi            FRAME_pushl()
+
+       shrl    %ecx                    C d/2
+       movl    PARAM_DIVISOR, %esi
+
+       andl    $127, %ecx              C d/2, 7 bits
+       pushl   %ebp            FRAME_pushl()
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edi)
+Zdisp( movzbl, 0,(%ecx,%edi), %edi)            C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%ecx), %edi  C inv 8 bits
+')
+       leal    (%edi,%edi), %ecx       C 2*inv
+
+       imull   %edi, %edi              C inv*inv
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_SIZE, %ebp
+
+       imull   %esi, %edi              C inv*inv*d
+
+       pushl   %ebx            FRAME_pushl()
+       leal    (%eax,%ebp,4), %ebx     C src end
+
+       subl    %edi, %ecx              C inv = 2*inv - inv*inv*d
+       leal    (%ecx,%ecx), %edi       C 2*inv
+
+       imull   %ecx, %ecx              C inv*inv
+
+       movl    (%eax), %eax            C src low limb
+       negl    %ebp                    C -size
+
+       imull   %esi, %ecx              C inv*inv*d
+
+       subl    %ecx, %edi              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax
+       movl    %esi, %eax
+       imull   %edi, %eax
+       cmpl    $1, %eax
+       popl    %eax')
+
+       jmp     L(entry)
+
+
+C Rotating the mul to the top of the loop saves 1 cycle, presumably by
+C hiding the loop control under the imul latency.
+C
+C The run time is 10 cycles, but decoding is only 9 (and the dependent chain
+C only 8).  It's not clear how to get down to 9 cycles.
+C
+C The xor and rcl to handle the carry bit could be an sbb instead, with the
+C the carry bit add becoming a sub, but that doesn't save anything.
+
+L(top):
+       C eax   (low product)
+       C ebx   src end
+       C ecx   carry bit, 0 or 1
+       C edx   (high product, being carry limb)
+       C esi   divisor
+       C edi   inverse
+       C ebp   counter, limbs, negative
+
+       mull    %esi
+
+       movl    (%ebx,%ebp,4), %eax
+       addl    %ecx, %edx              C apply carry bit to carry limb
+
+L(entry):
+       xorl    %ecx, %ecx
+       subl    %edx, %eax              C apply carry limb
+
+       rcll    %ecx
+
+       imull   %edi, %eax
+
+       incl    %ebp
+       jnz     L(top)
+
+
+
+       popl    %ebx
+       popl    %ebp
+
+       mull    %esi
+
+       popl    %edi
+       popl    %esi
+
+       leal    (%ecx,%edx), %eax
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mul_1.asm b/mpn/x86/k6/mul_1.asm

new file mode 100644 (file)

index 0000000..e1c468f
--- /dev/null
+++ b/mpn/x86/k6/mul_1.asm
@@ -0,0 +1,281 @@
+dnl  AMD K6 mpn_mul_1 -- mpn by limb multiply.
+
+dnl  Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)            5.5
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           4.87
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:                            6.25
+C K7:
+C K8:
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t multiplier);
+C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       mp_limb_t multiplier, mp_limb_t carry);
+C
+C Multiply src,size by mult and store the result in dst,size.
+C Return the carry limb from the top of the result.
+C
+C mpn_mul_1c() accepts an initial carry for the calculation, it's added into
+C the low limb of the result.
+
+defframe(PARAM_CARRY,     20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+dnl  minimum 5 because the unrolled code can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_mul_1c)
+       pushl   %esi
+deflit(`FRAME',4)
+       movl    PARAM_CARRY, %esi
+       jmp     L(start_nc)
+EPILOGUE()
+
+
+PROLOGUE(mpn_mul_1)
+       push    %esi
+deflit(`FRAME',4)
+       xorl    %esi, %esi      C initial carry
+
+L(start_nc):
+       mov     PARAM_SIZE, %ecx
+       push    %ebx
+FRAME_pushl()
+
+       movl    PARAM_SRC, %ebx
+       push    %edi
+FRAME_pushl()
+
+       movl    PARAM_DST, %edi
+       pushl   %ebp
+FRAME_pushl()
+
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       movl    PARAM_MULTIPLIER, %ebp
+
+       jae     L(unroll)
+
+
+       C code offset 0x22 here, close enough to aligned
+L(simple):
+       C eax   scratch
+       C ebx   src
+       C ecx   counter
+       C edx   scratch
+       C esi   carry
+       C edi   dst
+       C ebp   multiplier
+       C
+       C this loop 8 cycles/limb
+
+       movl    (%ebx), %eax
+       addl    $4, %ebx
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, (%edi)
+       addl    $4, %edi
+
+       loop    L(simple)
+
+
+       popl    %ebp
+
+       popl    %edi
+       popl    %ebx
+
+       movl    %esi, %eax
+       popl    %esi
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+C The code for each limb is 6 cycles, with instruction decoding being the
+C limiting factor.  At 4 limbs/loop and 1 cycle/loop of overhead it's 6.25
+C cycles/limb in total.
+C
+C The secret ingredient to get 6.25 is to start the loop with the mul and
+C have the load/store pair at the end.  Rotating the load/store to the top
+C is an 0.5 c/l slowdown.  (Some address generation effect probably.)
+C
+C The whole unrolled loop fits nicely in exactly 80 bytes.
+
+
+       ALIGN(16)       C already aligned to 16 here actually
+L(unroll):
+       movl    (%ebx), %eax
+       leal    -16(%ebx,%ecx,4), %ebx
+
+       leal    -16(%edi,%ecx,4), %edi
+       subl    $4, %ecx
+
+       negl    %ecx
+
+
+       ALIGN(16)       C one byte nop for this alignment
+L(top):
+       C eax   scratch
+       C ebx   &src[size-4]
+       C ecx   counter
+       C edx   scratch
+       C esi   carry
+       C edi   &dst[size-4]
+       C ebp   multiplier
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, (%edi,%ecx,4)
+       movl    4(%ebx,%ecx,4), %eax
+
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, 4(%edi,%ecx,4)
+       movl    8(%ebx,%ecx,4), %eax
+
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, 8(%edi,%ecx,4)
+       movl    12(%ebx,%ecx,4), %eax
+
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, 12(%edi,%ecx,4)
+       movl    16(%ebx,%ecx,4), %eax
+
+
+       addl    $4, %ecx
+       js      L(top)
+
+
+
+       C eax   next src limb
+       C ebx   &src[size-4]
+       C ecx   0 to 3 representing respectively 4 to 1 further limbs
+       C edx
+       C esi   carry
+       C edi   &dst[size-4]
+
+       testb   $2, %cl
+       jnz     L(finish_not_two)
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, (%edi,%ecx,4)
+       movl    4(%ebx,%ecx,4), %eax
+
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, 4(%edi,%ecx,4)
+       movl    8(%ebx,%ecx,4), %eax
+
+       addl    $2, %ecx
+L(finish_not_two):
+
+
+       testb   $1, %cl
+       jnz     L(finish_not_one)
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, 8(%edi)
+       movl    12(%ebx), %eax
+L(finish_not_one):
+
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       popl    %ebp
+
+       adcl    $0, %edx
+
+       movl    %eax, 12(%edi)
+       popl    %edi
+
+       popl    %ebx
+       movl    %edx, %eax
+
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/mul_basecase.asm b/mpn/x86/k6/mul_basecase.asm

new file mode 100644 (file)

index 0000000..dcd4d70
--- /dev/null
+++ b/mpn/x86/k6/mul_basecase.asm
@@ -0,0 +1,601 @@
+dnl  AMD K6 mpn_mul_basecase -- multiply two mpn numbers.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop
+C     unrolling).
+
+
+
+dnl  K6: UNROLL_COUNT cycles/product (approx)
+dnl           8           9.75
+dnl          16           9.3
+dnl          32           9.3
+dnl  Maximum possible with the current code is 32.
+dnl
+dnl  With 16 the inner unrolled loop fits exactly in a 256 byte block, which
+dnl  might explain it's good performance.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xsize,
+C                        mp_srcptr yp, mp_size_t ysize);
+C
+C Calculate xp,xsize multiplied by yp,ysize, storing the result in
+C wp,xsize+ysize.
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() entry code only
+C once.  The saving is about 10-20% on typical sizes coming from the
+C Karatsuba multiply code.
+C
+C Enhancements:
+C
+C The mul_1 loop is about 8.5 c/l, which is slower than mpn_mul_1 at 6.25
+C c/l.  Could call mpn_mul_1 when ysize is big enough to make it worthwhile.
+C
+C The main unrolled addmul loop could be shared by mpn_addmul_1, using some
+C extra stack setups and maybe 2 or 3 wasted cycles at the end.  Code saving
+C would be 256 bytes.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 8)
+',`
+deflit(UNROLL_THRESHOLD, 8)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP,   16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_XSIZE, %ecx
+       movl    PARAM_YP, %eax
+
+       movl    PARAM_XP, %edx
+       movl    (%eax), %eax    C yp low limb
+
+       cmpl    $2, %ecx
+       ja      L(xsize_more_than_two_limbs)
+       je      L(two_by_something)
+
+
+       C one limb by one limb
+
+       movl    (%edx), %edx    C xp low limb
+       movl    PARAM_WP, %ecx
+
+       mull    %edx
+
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+       decl    PARAM_YSIZE
+       pushl   %ebx
+deflit(`FRAME',4)
+
+       movl    PARAM_WP, %ebx
+       pushl   %esi
+deflit(`FRAME',8)
+
+       movl    %eax, %ecx      C yp low limb
+       movl    (%edx), %eax    C xp low limb
+
+       movl    %edx, %esi      C xp
+       jnz     L(two_by_two)
+
+
+       C two limbs by one limb
+
+       mull    %ecx
+
+       movl    %eax, (%ebx)
+       movl    4(%esi), %eax
+
+       movl    %edx, %esi      C carry
+
+       mull    %ecx
+
+       addl    %eax, %esi
+       movl    %esi, 4(%ebx)
+
+       adcl    $0, %edx
+
+       movl    %edx, 8(%ebx)
+       popl    %esi
+
+       popl    %ebx
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(two_by_two):
+       C eax   xp low limb
+       C ebx   wp
+       C ecx   yp low limb
+       C edx
+       C esi   xp
+       C edi
+       C ebp
+deflit(`FRAME',8)
+
+       mull    %ecx            C xp[0] * yp[0]
+
+       push    %edi
+deflit(`FRAME',12)
+       movl    %eax, (%ebx)
+
+       movl    4(%esi), %eax
+       movl    %edx, %edi      C carry, for wp[1]
+
+       mull    %ecx            C xp[1] * yp[0]
+
+       addl    %eax, %edi
+       movl    PARAM_YP, %ecx
+
+       adcl    $0, %edx
+
+       movl    %edi, 4(%ebx)
+       movl    4(%ecx), %ecx   C yp[1]
+
+       movl    4(%esi), %eax   C xp[1]
+       movl    %edx, %edi      C carry, for wp[2]
+
+       mull    %ecx            C xp[1] * yp[1]
+
+       addl    %eax, %edi
+
+       adcl    $0, %edx
+
+       movl    (%esi), %eax    C xp[0]
+       movl    %edx, %esi      C carry, for wp[3]
+
+       mull    %ecx            C xp[0] * yp[1]
+
+       addl    %eax, 4(%ebx)
+       adcl    %edx, %edi
+       adcl    $0, %esi
+
+       movl    %edi, 8(%ebx)
+       popl    %edi
+
+       movl    %esi, 12(%ebx)
+       popl    %esi
+
+       popl    %ebx
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(xsize_more_than_two_limbs):
+
+C The first limb of yp is processed with a simple mpn_mul_1 style loop
+C inline.  Unrolling this doesn't seem worthwhile since it's only run once
+C (whereas the addmul below is run ysize-1 many times).  A call to the
+C actual mpn_mul_1 will be slowed down by the call and parameter pushing and
+C popping, and doesn't seem likely to be worthwhile on the typical 10-20
+C limb operations the Karatsuba code calls here with.
+
+       C eax   yp[0]
+       C ebx
+       C ecx   xsize
+       C edx   xp
+       C esi
+       C edi
+       C ebp
+deflit(`FRAME',0)
+
+       pushl   %edi            defframe_pushl(SAVE_EDI)
+       pushl   %ebp            defframe_pushl(SAVE_EBP)
+
+       movl    PARAM_WP, %edi
+       pushl   %esi            defframe_pushl(SAVE_ESI)
+
+       movl    %eax, %ebp
+       pushl   %ebx            defframe_pushl(SAVE_EBX)
+
+       leal    (%edx,%ecx,4), %ebx     C xp end
+       xorl    %esi, %esi
+
+       leal    (%edi,%ecx,4), %edi     C wp end of mul1
+       negl    %ecx
+
+
+L(mul1):
+       C eax   scratch
+       C ebx   xp end
+       C ecx   counter, negative
+       C edx   scratch
+       C esi   carry
+       C edi   wp end of mul1
+       C ebp   multiplier
+
+       movl    (%ebx,%ecx,4), %eax
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, (%edi,%ecx,4)
+       incl    %ecx
+
+       jnz     L(mul1)
+
+
+       movl    PARAM_YSIZE, %edx
+       movl    %esi, (%edi)            C final carry
+
+       movl    PARAM_XSIZE, %ecx
+       decl    %edx
+
+       jnz     L(ysize_more_than_one_limb)
+
+       popl    %ebx
+       popl    %esi
+       popl    %ebp
+       popl    %edi
+       ret
+
+
+L(ysize_more_than_one_limb):
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       movl    PARAM_YP, %eax
+
+       jae     L(unroll)
+
+
+C -----------------------------------------------------------------------------
+C Simple addmul loop.
+C
+C Using ebx and edi pointing at the ends of their respective locations saves
+C a couple of instructions in the outer loop.  The inner loop is still 11
+C cycles, the same as the simple loop in aorsmul_1.asm.
+
+       C eax   yp
+       C ebx   xp end
+       C ecx   xsize
+       C edx   ysize-1
+       C esi
+       C edi   wp end of mul1
+       C ebp
+
+       movl    4(%eax), %ebp           C multiplier
+       negl    %ecx
+
+       movl    %ecx, PARAM_XSIZE       C -xsize
+       xorl    %esi, %esi              C initial carry
+
+       leal    4(%eax,%edx,4), %eax    C yp end
+       negl    %edx
+
+       movl    %eax, PARAM_YP
+       movl    %edx, PARAM_YSIZE
+
+       jmp     L(simple_outer_entry)
+
+
+       C aligning here saves a couple of cycles
+       ALIGN(16)
+L(simple_outer_top):
+       C edx   ysize counter, negative
+
+       movl    PARAM_YP, %eax          C yp end
+       xorl    %esi, %esi              C carry
+
+       movl    PARAM_XSIZE, %ecx       C -xsize
+       movl    %edx, PARAM_YSIZE
+
+       movl    (%eax,%edx,4), %ebp     C yp limb multiplier
+L(simple_outer_entry):
+       addl    $4, %edi
+
+
+L(simple_inner):
+       C eax   scratch
+       C ebx   xp end
+       C ecx   counter, negative
+       C edx   scratch
+       C esi   carry
+       C edi   wp end of this addmul
+       C ebp   multiplier
+
+       movl    (%ebx,%ecx,4), %eax
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    $0, %edx
+       addl    %eax, (%edi,%ecx,4)
+       adcl    %edx, %esi
+
+       incl    %ecx
+       jnz     L(simple_inner)
+
+
+       movl    PARAM_YSIZE, %edx
+       movl    %esi, (%edi)
+
+       incl    %edx
+       jnz     L(simple_outer_top)
+
+
+       popl    %ebx
+       popl    %esi
+       popl    %ebp
+       popl    %edi
+       ret
+
+
+C -----------------------------------------------------------------------------
+C Unrolled loop.
+C
+C The unrolled inner loop is the same as in aorsmul_1.asm, see that code for
+C some comments.
+C
+C VAR_COUNTER is for the inner loop, running from VAR_COUNTER_INIT down to
+C 0, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C PARAM_XP and PARAM_WP get offset appropriately for where the unrolled loop
+C is entered.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop.  This can't just be fetched through the xp
+C pointer because of the offset applied to it.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C PARAM_WP is similarly offset so that the PARAM_YSIZE counter can be added
+C to give the starting point in the destination for each unrolled loop (this
+C point is one limb upwards for each limb of yp processed).
+C
+C Having PARAM_YSIZE count negative to zero means it's not necessary to
+C store new values of PARAM_YP and PARAM_WP on each loop.  Those values on
+C the stack remain constant and on each loop an leal adjusts them with the
+C PARAM_YSIZE counter value.
+
+
+defframe(VAR_COUNTER,      -20)
+defframe(VAR_COUNTER_INIT, -24)
+defframe(VAR_JMP,          -28)
+defframe(VAR_XP_LOW,       -32)
+deflit(VAR_STACK_SPACE, 16)
+
+dnl  For some strange reason using (%esp) instead of 0(%esp) is a touch
+dnl  slower in this code, hence the defframe empty-if-zero feature is
+dnl  disabled.
+dnl
+dnl  If VAR_COUNTER is at (%esp), the effect is worse.  In this case the
+dnl  unrolled loop is 255 instead of 256 bytes, but quite how this affects
+dnl  anything isn't clear.
+dnl
+define(`defframe_empty_if_zero_disabled',1)
+
+L(unroll):
+       C eax   yp (not used)
+       C ebx   xp end (not used)
+       C ecx   xsize
+       C edx   ysize-1
+       C esi
+       C edi   wp end of mul1 (not used)
+       C ebp
+deflit(`FRAME', 16)
+
+       leal    -2(%ecx), %ebp  C one limb processed at start,
+       decl    %ecx            C and ebp is one less
+
+       shrl    $UNROLL_LOG2, %ebp
+       negl    %ecx
+
+       subl    $VAR_STACK_SPACE, %esp
+deflit(`FRAME', 16+VAR_STACK_SPACE)
+       andl    $UNROLL_MASK, %ecx
+
+       movl    %ecx, %esi
+       shll    $4, %ecx
+
+       movl    %ebp, VAR_COUNTER_INIT
+       negl    %esi
+
+       C 15 code bytes per limb
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(unroll_here):
+',`
+       leal    L(unroll_entry) (%ecx,%esi,1), %ecx
+')
+
+       movl    PARAM_XP, %ebx
+       movl    %ebp, VAR_COUNTER
+
+       movl    PARAM_WP, %edi
+       movl    %ecx, VAR_JMP
+
+       movl    (%ebx), %eax
+       leal    4(%edi,%esi,4), %edi    C wp adjust for unrolling and mul1
+
+       leal    (%ebx,%esi,4), %ebx     C xp adjust for unrolling
+
+       movl    %eax, VAR_XP_LOW
+
+       movl    %ebx, PARAM_XP
+       movl    PARAM_YP, %ebx
+
+       leal    (%edi,%edx,4), %ecx     C wp adjust for ysize indexing
+       movl    4(%ebx), %ebp           C multiplier (yp second limb)
+
+       leal    4(%ebx,%edx,4), %ebx    C yp adjust for ysize indexing
+
+       movl    %ecx, PARAM_WP
+
+       leal    1(%esi), %ecx   C adjust parity for decl %ecx above
+
+       movl    %ebx, PARAM_YP
+       negl    %edx
+
+       movl    %edx, PARAM_YSIZE
+       jmp     L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%ecx,%esi,1), %ecx
+       addl    $L(unroll_entry)-L(unroll_here), %ecx
+       addl    (%esp), %ecx
+       ret_internal
+')
+
+
+C -----------------------------------------------------------------------------
+       C Aligning here saves a couple of cycles per loop.  Using 32 doesn't
+       C cost any extra space, since the inner unrolled loop below is
+       C aligned to 32.
+       ALIGN(32)
+L(unroll_outer_top):
+       C edx   ysize
+
+       movl    PARAM_YP, %eax
+       movl    %edx, PARAM_YSIZE       C incremented ysize counter
+
+       movl    PARAM_WP, %edi
+
+       movl    VAR_COUNTER_INIT, %ebx
+       movl    (%eax,%edx,4), %ebp     C next multiplier
+
+       movl    PARAM_XSIZE, %ecx
+       leal    (%edi,%edx,4), %edi     C adjust wp for where we are in yp
+
+       movl    VAR_XP_LOW, %eax
+       movl    %ebx, VAR_COUNTER
+
+L(unroll_outer_entry):
+       mull    %ebp
+
+       C using testb is a tiny bit faster than testl
+       testb   $1, %cl
+
+       movl    %eax, %ecx      C low carry
+       movl    VAR_JMP, %eax
+
+       movl    %edx, %esi      C high carry
+       movl    PARAM_XP, %ebx
+
+       jnz     L(unroll_noswap)
+       movl    %ecx, %esi      C high,low carry other way around
+
+       movl    %edx, %ecx
+L(unroll_noswap):
+
+       jmp     *%eax
+
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(32)
+L(unroll_top):
+       C eax   scratch
+       C ebx   xp
+       C ecx   carry low
+       C edx   scratch
+       C esi   carry high
+       C edi   wp
+       C ebp   multiplier
+       C VAR_COUNTER  loop counter
+       C
+       C 15 code bytes each limb
+
+       leal    UNROLL_BYTES(%edi), %edi
+
+L(unroll_entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(i*CHUNK_COUNT*4))
+       deflit(`disp1', eval(disp0 + 4))
+       deflit(`disp2', eval(disp1 + 4))
+
+       movl    disp1(%ebx), %eax
+       mull    %ebp
+Zdisp( addl,   %ecx, disp0,(%edi))
+       adcl    %eax, %esi
+       movl    %edx, %ecx
+       jadcl0( %ecx)
+
+       movl    disp2(%ebx), %eax
+       mull    %ebp
+       addl    %esi, disp1(%edi)
+       adcl    %eax, %ecx
+       movl    %edx, %esi
+       jadcl0( %esi)
+')
+
+       decl    VAR_COUNTER
+       leal    UNROLL_BYTES(%ebx), %ebx
+
+       jns     L(unroll_top)
+
+
+       movl    PARAM_YSIZE, %edx
+       addl    %ecx, UNROLL_BYTES(%edi)
+
+       adcl    $0, %esi
+
+       incl    %edx
+       movl    %esi, UNROLL_BYTES+4(%edi)
+
+       jnz     L(unroll_outer_top)
+
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBP, %ebp
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBX, %ebx
+
+       addl    $FRAME, %esp
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k6/pre_mod_1.asm b/mpn/x86/k6/pre_mod_1.asm

new file mode 100644 (file)

index 0000000..3231539
--- /dev/null
+++ b/mpn/x86/k6/pre_mod_1.asm
@@ -0,0 +1,135 @@
+dnl  AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor.
+
+dnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: 18.0 cycles/limb
+
+
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                             mp_limb_t inverse);
+C
+C This code is only 2 c/l faster than a simple divl, but that's 10% so it's
+C considered worthwhile (just).
+
+defframe(PARAM_INVERSE,16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,    8)
+defframe(PARAM_SRC,     4)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+       ASSERT(ae,`cmpl $1, PARAM_SIZE')
+       ASSERT(nz,`testl $0x80000000, PARAM_DIVISOR')
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %ebp    FRAME_pushl()
+
+       movl    PARAM_SRC, %ebp
+       pushl   %edi    FRAME_pushl()
+
+       movl    PARAM_DIVISOR, %eax
+       pushl   %esi    FRAME_pushl()
+
+       movl    -4(%ebp,%ecx,4), %esi   C src high limb
+       pushl   %ebx    FRAME_pushl()
+
+       movl    %edx, %edi              C first n2 to cancel
+       subl    %eax, %esi              C first n1 = high-divisor
+
+       decl    %ecx
+       jz      L(done_sbbl)
+
+L(top):
+       C eax   scratch
+       C ebx   n10, nadj, q1
+       C ecx   counter, size to 1
+       C edx   scratch
+       C esi   n2
+       C edi   old high, for underflow test
+       C ebp   src
+
+       sbbl    %edx, %edi          C high n-(q1+1)*d, 0 or -1
+
+L(entry):
+       andl    PARAM_DIVISOR, %edi
+L(q1_ff_top):
+       movl    -4(%ebp,%ecx,4), %ebx
+
+       addl    %esi, %edi          C possible addback
+       movl    %ebx, %esi          C n10
+
+       sarl    $31, %ebx           C -n1 = 0 or -1
+       movl    %edi, %eax          C n2
+
+       movl    PARAM_INVERSE, %edx
+       subl    %ebx, %eax          C n2+n1
+
+       mull    %edx                C m*(n2+n1)
+
+       andl    PARAM_DIVISOR, %ebx C -n1 & d
+       addl    %esi, %ebx          C nadj = n10 + (-n1&d), ignoring overflow
+
+       addl    %ebx, %eax          C low m*(n2+n1) + nadj, giving carry flag
+       leal    1(%edi), %ebx       C n2+1
+
+       adcl    %ebx, %edx          C 1+high(n2<<32+m*(n2+n1)+nadj) = q1+1
+
+       movl    PARAM_DIVISOR, %eax C d
+       jz      L(q1_ff)
+
+       mull    %edx                C (q1+1)*d
+
+       subl    %eax, %esi          C low  n-(q1+1)*d
+       loop    L(top)
+
+
+
+L(done_sbbl):
+       sbbl    %edx, %edi          C high n-(q1+1)*d, 0 or -1
+
+       andl    PARAM_DIVISOR, %edi
+L(done_esi_edi):
+       popl    %ebx
+
+       leal    (%esi,%edi), %eax
+       popl    %esi
+
+       popl    %edi
+       popl    %ebp
+
+       ret
+
+
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d.  This is rarely
+C reached.
+
+L(q1_ff):
+       movl    PARAM_DIVISOR, %edi
+       loop    L(q1_ff_top)
+
+       jmp     L(done_esi_edi)
+
+
+EPILOGUE()
diff --git a/mpn/x86/k6/sqr_basecase.asm b/mpn/x86/k6/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..1a2ad34
--- /dev/null
+++ b/mpn/x86/k6/sqr_basecase.asm
@@ -0,0 +1,669 @@
+dnl  AMD K6 mpn_sqr_basecase -- square an mpn number.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular
+C     product (measured on the speed difference between 17 and 33 limbs,
+C     which is roughly the Karatsuba recursing range).
+
+
+dnl  SQR_TOOM2_THRESHOLD_MAX is the maximum SQR_TOOM2_THRESHOLD this
+dnl  code supports.  This value is used only by the tune program to know
+dnl  what it can go up to.  (An attempt to compile with a bigger value will
+dnl  trigger some m4_assert()s in the code, making the build fail.)
+dnl
+dnl  The value is determined by requiring the displacements in the unrolled
+dnl  addmul to fit in single bytes.  This means a maximum UNROLL_COUNT of
+dnl  63, giving a maximum SQR_TOOM2_THRESHOLD of 66.
+
+deflit(SQR_TOOM2_THRESHOLD_MAX, 66)
+
+
+dnl  Allow a value from the tune program to override config.m4.
+
+ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
+`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+
+
+dnl  UNROLL_COUNT is the number of code chunks in the unrolled addmul.  The
+dnl  number required is determined by SQR_TOOM2_THRESHOLD, since
+dnl  mpn_sqr_basecase only needs to handle sizes < SQR_TOOM2_THRESHOLD.
+dnl
+dnl  The first addmul is the biggest, and this takes the second least
+dnl  significant limb and multiplies it by the third least significant and
+dnl  up.  Hence for a maximum operand size of SQR_TOOM2_THRESHOLD-1
+dnl  limbs, UNROLL_COUNT needs to be SQR_TOOM2_THRESHOLD-3.
+
+m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is essentially the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the given size
+C is small.
+C
+C The code size might look a bit excessive, but not all of it is executed
+C and so won't fill up the code cache.  The 1x1, 2x2 and 3x3 special cases
+C clearly apply only to those sizes; mid sizes like 10x10 only need part of
+C the unrolled addmul; and big sizes like 35x35 that do need all of it will
+C at least be getting value for money, because 35x35 spends something like
+C 5780 cycles here.
+C
+C Different values of UNROLL_COUNT give slightly different speeds, between
+C 9.0 and 9.2 c/tri-prod measured on the difference between 17 and 33 limbs.
+C This isn't a big difference, but it's presumably some alignment effect
+C which if understood could give a simple speedup.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %eax
+
+       cmpl    $2, %ecx
+       je      L(two_limbs)
+
+       movl    PARAM_DST, %edx
+       ja      L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+       C eax   src
+       C ebx
+       C ecx   size
+       C edx   dst
+
+       movl    (%eax), %eax
+       movl    %edx, %ecx
+
+       mull    %eax
+
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(two_limbs):
+       C eax   src
+       C ebx
+       C ecx   size
+       C edx   dst
+
+       pushl   %ebx
+       movl    %eax, %ebx      C src
+deflit(`FRAME',4)
+
+       movl    (%ebx), %eax
+       movl    PARAM_DST, %ecx
+
+       mull    %eax            C src[0]^2
+
+       movl    %eax, (%ecx)
+       movl    4(%ebx), %eax
+
+       movl    %edx, 4(%ecx)
+
+       mull    %eax            C src[1]^2
+
+       movl    %eax, 8(%ecx)
+       movl    (%ebx), %eax
+
+       movl    %edx, 12(%ecx)
+       movl    4(%ebx), %edx
+
+       mull    %edx            C src[0]*src[1]
+
+       addl    %eax, 4(%ecx)
+
+       adcl    %edx, 8(%ecx)
+       adcl    $0, 12(%ecx)
+
+       popl    %ebx
+       addl    %eax, 4(%ecx)
+
+       adcl    %edx, 8(%ecx)
+       adcl    $0, 12(%ecx)
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(three_or_more):
+deflit(`FRAME',0)
+       cmpl    $4, %ecx
+       jae     L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+       C eax   src
+       C ecx   size
+       C edx   dst
+
+       pushl   %ebx
+       movl    %eax, %ebx      C src
+
+       movl    (%ebx), %eax
+       movl    %edx, %ecx      C dst
+
+       mull    %eax            C src[0] ^ 2
+
+       movl    %eax, (%ecx)
+       movl    4(%ebx), %eax
+
+       movl    %edx, 4(%ecx)
+       pushl   %esi
+
+       mull    %eax            C src[1] ^ 2
+
+       movl    %eax, 8(%ecx)
+       movl    8(%ebx), %eax
+
+       movl    %edx, 12(%ecx)
+       pushl   %edi
+
+       mull    %eax            C src[2] ^ 2
+
+       movl    %eax, 16(%ecx)
+       movl    (%ebx), %eax
+
+       movl    %edx, 20(%ecx)
+       movl    4(%ebx), %edx
+
+       mull    %edx            C src[0] * src[1]
+
+       movl    %eax, %esi
+       movl    (%ebx), %eax
+
+       movl    %edx, %edi
+       movl    8(%ebx), %edx
+
+       pushl   %ebp
+       xorl    %ebp, %ebp
+
+       mull    %edx            C src[0] * src[2]
+
+       addl    %eax, %edi
+       movl    4(%ebx), %eax
+
+       adcl    %edx, %ebp
+
+       movl    8(%ebx), %edx
+
+       mull    %edx            C src[1] * src[2]
+
+       addl    %eax, %ebp
+
+       adcl    $0, %edx
+
+
+       C eax   will be dst[5]
+       C ebx
+       C ecx   dst
+       C edx   dst[4]
+       C esi   dst[1]
+       C edi   dst[2]
+       C ebp   dst[3]
+
+       xorl    %eax, %eax
+       addl    %esi, %esi
+       adcl    %edi, %edi
+       adcl    %ebp, %ebp
+       adcl    %edx, %edx
+       adcl    $0, %eax
+
+       addl    %esi, 4(%ecx)
+       adcl    %edi, 8(%ecx)
+       adcl    %ebp, 12(%ecx)
+
+       popl    %ebp
+       popl    %edi
+
+       adcl    %edx, 16(%ecx)
+
+       popl    %esi
+       popl    %ebx
+
+       adcl    %eax, 20(%ecx)
+       ASSERT(nc)
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+
+defframe(SAVE_EBX,   -4)
+defframe(SAVE_ESI,   -8)
+defframe(SAVE_EDI,   -12)
+defframe(SAVE_EBP,   -16)
+defframe(VAR_COUNTER,-20)
+defframe(VAR_JMP,    -24)
+deflit(STACK_SPACE, 24)
+
+       ALIGN(16)
+L(four_or_more):
+
+       C eax   src
+       C ebx
+       C ecx   size
+       C edx   dst
+       C esi
+       C edi
+       C ebp
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+C
+C A test was done calling mpn_mul_1 here to get the benefit of its unrolled
+C loop, but this was only a tiny speedup; at 35 limbs it took 24 cycles off
+C a 5780 cycle operation, which is not surprising since the loop here is 8
+C c/l and mpn_mul_1 is 6.25 c/l.
+
+       subl    $STACK_SPACE, %esp      deflit(`FRAME',STACK_SPACE)
+
+       movl    %edi, SAVE_EDI
+       leal    4(%edx), %edi
+
+       movl    %ebx, SAVE_EBX
+       leal    4(%eax), %ebx
+
+       movl    %esi, SAVE_ESI
+       xorl    %esi, %esi
+
+       movl    %ebp, SAVE_EBP
+
+       C eax
+       C ebx   src+4
+       C ecx   size
+       C edx
+       C esi
+       C edi   dst+4
+       C ebp
+
+       movl    (%eax), %ebp    C multiplier
+       leal    -1(%ecx), %ecx  C size-1, and pad to a 16 byte boundary
+
+
+       ALIGN(16)
+L(mul_1):
+       C eax   scratch
+       C ebx   src ptr
+       C ecx   counter
+       C edx   scratch
+       C esi   carry
+       C edi   dst ptr
+       C ebp   multiplier
+
+       movl    (%ebx), %eax
+       addl    $4, %ebx
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       movl    $0, %esi
+
+       adcl    %edx, %esi
+
+       movl    %eax, (%edi)
+       addl    $4, %edi
+
+       loop    L(mul_1)
+
+
+C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two addmuls, which are the bottom right corner of the product
+C triangle, are left to the end.  These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1].  If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as mpn_addmul_1(), see that routine for some
+C comments.
+C
+C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+C
+C K6 doesn't do any branch prediction on indirect jumps, which is good
+C actually because it's a different target each time.  The unrolled addmul
+C is about 3 cycles/limb faster than a simple loop, so the 6 cycle cost of
+C the indirect jump is quickly recovered.
+
+
+dnl  This value is also implicitly encoded in a shift and add.
+dnl
+deflit(CODE_BYTES_PER_LIMB, 15)
+
+dnl  With the unmodified &src[size] and &dst[size] pointers, the
+dnl  displacements in the unrolled code fit in a byte for UNROLL_COUNT
+dnl  values up to 31.  Above that an offset must be added to them.
+dnl
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>31),1,
+eval((UNROLL_COUNT-31)*4),
+0))
+
+       C eax
+       C ebx   &src[size]
+       C ecx
+       C edx
+       C esi   carry
+       C edi   &dst[size]
+       C ebp
+
+       movl    PARAM_SIZE, %ecx
+       movl    %esi, (%edi)
+
+       subl    $4, %ecx
+       jz      L(corner)
+
+       movl    %ecx, %edx
+ifelse(OFFSET,0,,
+`      subl    $OFFSET, %ebx')
+
+       shll    $4, %ecx
+ifelse(OFFSET,0,,
+`      subl    $OFFSET, %edi')
+
+       negl    %ecx
+
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+       negl    %edx
+
+
+       C The calculated jump mustn't be before the start of the available
+       C code.  This is the limitation UNROLL_COUNT puts on the src operand
+       C size, but checked here using the jump address directly.
+       C
+       ASSERT(ae,`
+       movl_text_address( L(unroll_inner_start), %eax)
+       cmpl    %eax, %ecx
+       ')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll_outer_top):
+       C eax
+       C ebx   &src[size], constant
+       C ecx   VAR_JMP
+       C edx   VAR_COUNTER, limbs, negative
+       C esi   high limb to store
+       C edi   dst ptr, high of last addmul
+       C ebp
+
+       movl    -12+OFFSET(%ebx,%edx,4), %ebp   C multiplier
+       movl    %edx, VAR_COUNTER
+
+       movl    -8+OFFSET(%ebx,%edx,4), %eax    C first limb of multiplicand
+
+       mull    %ebp
+
+       testb   $1, %cl
+
+       movl    %edx, %esi      C high carry
+       movl    %ecx, %edx      C jump
+
+       movl    %eax, %ecx      C low carry
+       leal    CODE_BYTES_PER_LIMB(%edx), %edx
+
+       movl    %edx, VAR_JMP
+       leal    4(%edi), %edi
+
+       C A branch-free version of this using some xors was found to be a
+       C touch slower than just a conditional jump, despite the jump
+       C switching between taken and not taken on every loop.
+
+ifelse(eval(UNROLL_COUNT%2),0,
+       jz,jnz) L(unroll_noswap)
+       movl    %esi, %eax      C high,low carry other way around
+
+       movl    %ecx, %esi
+       movl    %eax, %ecx
+L(unroll_noswap):
+
+       jmp     *%edx
+
+
+       C Must be on an even address here so the low bit of the jump address
+       C will indicate which way around ecx/esi should start.
+       C
+       C An attempt was made at padding here to get the end of the unrolled
+       C code to come out on a good alignment, to save padding before
+       C L(corner).  This worked, but turned out to run slower than just an
+       C ALIGN(2).  The reason for this is not clear, it might be related
+       C to the different speeds on different UNROLL_COUNTs noted above.
+
+       ALIGN(2)
+
+L(unroll_inner_start):
+       C eax   scratch
+       C ebx   src
+       C ecx   carry low
+       C edx   scratch
+       C esi   carry high
+       C edi   dst
+       C ebp   multiplier
+       C
+       C 15 code bytes each limb
+       C ecx/esi swapped on each chunk
+
+forloop(`i', UNROLL_COUNT, 1, `
+       deflit(`disp_src', eval(-i*4 + OFFSET))
+       deflit(`disp_dst', eval(disp_src - 4))
+
+       m4_assert(`disp_src>=-128 && disp_src<128')
+       m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl,   disp_src,(%ebx), %eax)
+       mull    %ebp
+Zdisp( addl,   %esi, disp_dst,(%edi))
+       adcl    %eax, %ecx
+       movl    %edx, %esi
+       jadcl0( %esi)
+',`
+       dnl  this one comes out last
+Zdisp( movl,   disp_src,(%ebx), %eax)
+       mull    %ebp
+Zdisp( addl,   %ecx, disp_dst,(%edi))
+       adcl    %eax, %esi
+       movl    %edx, %ecx
+       jadcl0( %ecx)
+')
+')
+L(unroll_inner_end):
+
+       addl    %esi, -4+OFFSET(%edi)
+
+       movl    VAR_COUNTER, %edx
+       jadcl0( %ecx)
+
+       movl    %ecx, m4_empty_if_zero(OFFSET)(%edi)
+       movl    VAR_JMP, %ecx
+
+       incl    %edx
+       jnz     L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+       addl    $OFFSET, %ebx
+       addl    $OFFSET, %edi
+')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(corner):
+       C ebx   &src[size]
+       C edi   &dst[2*size-5]
+
+       movl    -12(%ebx), %ebp
+
+       movl    -8(%ebx), %eax
+       movl    %eax, %ecx
+
+       mull    %ebp
+
+       addl    %eax, -4(%edi)
+       adcl    $0, %edx
+
+       movl    -4(%ebx), %eax
+       movl    %edx, %esi
+       movl    %eax, %ebx
+
+       mull    %ebp
+
+       addl    %esi, %eax
+       adcl    $0, %edx
+
+       addl    %eax, (%edi)
+       adcl    $0, %edx
+
+       movl    %edx, %esi
+       movl    %ebx, %eax
+
+       mull    %ecx
+
+       addl    %esi, %eax
+       movl    %eax, 4(%edi)
+
+       adcl    $0, %edx
+
+       movl    %edx, 8(%edi)
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1].
+C The loop measures about 6 cycles/iteration, though it looks like it should
+C decode in 5.
+
+L(lshift_start):
+       movl    PARAM_SIZE, %ecx
+
+       movl    PARAM_DST, %edi
+       subl    $1, %ecx                C size-1 and clear carry
+
+       movl    PARAM_SRC, %ebx
+       movl    %ecx, %edx
+
+       xorl    %eax, %eax              C ready for adcl
+
+
+       ALIGN(16)
+L(lshift):
+       C eax
+       C ebx   src (for later use)
+       C ecx   counter, decrementing
+       C edx   size-1 (for later use)
+       C esi
+       C edi   dst, incrementing
+       C ebp
+
+       rcll    4(%edi)
+       rcll    8(%edi)
+       leal    8(%edi), %edi
+       loop    L(lshift)
+
+
+       adcl    %eax, %eax
+
+       movl    %eax, 4(%edi)           C dst most significant limb
+       movl    (%ebx), %eax            C src[0]
+
+       leal    4(%ebx,%edx,4), %ebx    C &src[size]
+       subl    %edx, %ecx              C -(size-1)
+
+
+C -----------------------------------------------------------------------------
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2.  dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+
+       mull    %eax
+
+       movl    %eax, (%edi,%ecx,8)     C dst[0]
+
+
+       ALIGN(16)
+L(diag):
+       C eax   scratch
+       C ebx   &src[size]
+       C ecx   counter, negative
+       C edx   carry
+       C esi   scratch
+       C edi   dst[2*size-2]
+       C ebp
+
+       movl    (%ebx,%ecx,4), %eax
+       movl    %edx, %esi
+
+       mull    %eax
+
+       addl    %esi, 4(%edi,%ecx,8)
+       adcl    %eax, 8(%edi,%ecx,8)
+       adcl    $0, %edx
+
+       incl    %ecx
+       jnz     L(diag)
+
+
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_ESI, %esi
+
+       addl    %edx, 4(%edi)           C dst most significant limb
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBP, %ebp
+       addl    $FRAME, %esp
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       addl    (%esp), %ecx
+       addl    $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
+       addl    %edx, %ecx
+       ret_internal
+')
+
+
+EPILOGUE()
diff --git a/mpn/x86/k7/README b/mpn/x86/k7/README

new file mode 100644 (file)

index 0000000..e2c5e0c
--- /dev/null
+++ b/mpn/x86/k7/README
@@ -0,0 +1,163 @@
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+                      AMD K7 MPN SUBROUTINES
+
+
+This directory contains code optimized for the AMD Athlon CPU.
+
+The mmx subdirectory has routines using MMX instructions.  All Athlons have
+MMX, the separate directory is just so that configure can omit it if the
+assembler doesn't support MMX.
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache.
+
+                               cycles/limb
+       mpn_add/sub_n             1.6
+
+       mpn_copyi                 0.75 or 1.0   \ varying with data alignment
+       mpn_copyd                 0.75 or 1.0   /
+
+       mpn_divrem_1             17.0 integer part, 15.0 fractional part
+       mpn_mod_1                17.0
+       mpn_divexact_by3          8.0
+
+       mpn_l/rshift              1.2
+
+       mpn_mul_1                 3.4
+       mpn_addmul/submul_1       3.9
+
+       mpn_mul_basecase          4.42 cycles/crossproduct (approx)
+        mpn_sqr_basecase          2.3 cycles/crossproduct (approx)
+                                 or 4.55 cycles/triangleproduct (approx)
+
+Prefetching of sources hasn't yet been tried.
+
+
+
+NOTES
+
+cmov, MMX, 3DNow and some extensions to MMX and 3DNow are available.
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+
+Floating point multiplications can be done in parallel with integer
+multiplications, but there doesn't seem to be any way to make use of this.
+
+Unsigned "mul"s can be issued every 3 cycles.  This suggests 3 is a limit on
+the speed of the multiplication routines.  The documentation shows mul
+executing in IEU0 (or maybe in IEU0 and IEU1 together), so it might be that,
+to get near 3 cycles code has to be arranged so that nothing else is issued
+to IEU0.  A busy IEU0 could explain why some code takes 4 cycles and other
+apparently equivalent code takes 5.
+
+
+
+OPTIMIZATIONS
+
+Unrolled loops are used to reduce looping overhead.  The unrolling is
+configurable up to 32 limbs/loop for most routines and up to 64 for some.
+The K7 has 64k L1 code cache so quite big unrolling is allowable.
+
+Computed jumps into the unrolling are used to handle sizes not a multiple of
+the unrolling.  An attractive feature of this is that times increase
+smoothly with operand size, but it may be that some routines should just
+have simple loops to finish up, especially when PIC adds between 2 and 16
+cycles to get %eip.
+
+Position independent code is implemented using a call to get %eip for the
+computed jumps and a ret is always done, rather than an addl $4,%esp or a
+popl, so the CPU return address branch prediction stack stays synchronised
+with the actual stack in memory.
+
+Branch prediction, in absence of any history, will guess forward jumps are
+not taken and backward jumps are taken.  Where possible it's arranged that
+the less likely or less important case is under a taken forward jump.
+
+
+
+CODING
+
+Instructions in general code have been shown grouped if they can execute
+together, which means up to three direct-path instructions which have no
+successive dependencies.  K7 always decodes three and has out-of-order
+execution, but the groupings show what slots might be available and what
+dependency chains exist.
+
+When there's vector-path instructions an effort is made to get triplets of
+direct-path instructions in between them, even if there's dependencies,
+since this maximizes decoding throughput and might save a cycle or two if
+decoding is the limiting factor.
+
+
+
+INSTRUCTIONS
+
+adcl       direct
+divl       39 cycles back-to-back
+lodsl,etc  vector
+loop       1 cycle vector (decl/jnz opens up one decode slot)
+movd reg   vector
+movd mem   direct
+mull       issue every 3 cycles, latency 4 cycles low word, 6 cycles high word
+popl      vector (use movl for more than one pop)
+pushl     direct, will pair with a load
+shrdl %cl  vector, 3 cycles, seems to be 3 decode too
+xorl r,r   false read dependency recognised
+
+
+
+REFERENCES
+
+"AMD Athlon Processor X86 Code Optimization Guide", AMD publication number
+22007, revision K, February 2002.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22007.pdf
+
+"3DNow Technology Manual", AMD publication number 21928G/0-March 2000.
+This describes the femms and prefetch instructions.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/21928.pdf
+
+"AMD Extensions to the 3DNow and MMX Instruction Sets Manual", AMD
+publication number 22466, revision D, March 2000.  This describes
+instructions added in the Athlon processor, such as pswapd and the extra
+prefetch forms.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22466.pdf
+
+"3DNow Instruction Porting Guide", AMD publication number 22621, revision B,
+August 1999.  This has some notes on general Athlon optimizations as well as
+3DNow.  Available on-line,
+
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22621.pdf
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/x86/k7/aors_n.asm b/mpn/x86/k7/aors_n.asm

new file mode 100644 (file)

index 0000000..d84de3e
--- /dev/null
+++ b/mpn/x86/k7/aors_n.asm
@@ -0,0 +1,247 @@
+dnl  AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 1.64 cycles/limb (at 16 limbs/loop).
+
+
+
+dnl  K7: UNROLL_COUNT cycles/limb
+dnl           8           1.9
+dnl          16           1.64
+dnl          32           1.7
+dnl          64           2.0
+dnl  Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_add_n', `
+       define(M4_inst,        adcl)
+       define(M4_function_n,  mpn_add_n)
+       define(M4_function_nc, mpn_add_nc)
+       define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+       define(M4_inst,        sbbl)
+       define(M4_function_n,  mpn_sub_n)
+       define(M4_function_nc, mpn_sub_nc)
+       define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                         mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                         mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size.  The return value is the carry bit from the top of the result (1
+C or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation.  Note values other than 1 or 0 here will lead to garbage
+C results.
+C
+C This code runs at 1.64 cycles/limb, which might be the best possible with
+C plain integer operations.  Each limb is 2 loads and 1 store, any 2 of
+C which can be done each cycle, leading to 1.5 c/l.
+
+dnl  Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1.
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 8)
+',`
+deflit(UNROLL_THRESHOLD, 8)
+')
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+defframe(SAVE_EBP, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+defframe(SAVE_EDI, -16)
+deflit(STACK_SPACE, 16)
+
+       TEXT
+       ALIGN(32)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function_nc)
+       movl    PARAM_CARRY, %eax
+       jmp     L(start)
+EPILOGUE()
+
+PROLOGUE(M4_function_n)
+
+       xorl    %eax, %eax      C carry
+L(start):
+       movl    PARAM_SIZE, %ecx
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %edi, SAVE_EDI
+       movl    %ebx, SAVE_EBX
+       cmpl    $UNROLL_THRESHOLD, %ecx
+
+       movl    PARAM_SRC2, %edx
+       movl    PARAM_SRC1, %ebx
+       jae     L(unroll)
+
+       movl    PARAM_DST, %edi
+       leal    (%ebx,%ecx,4), %ebx
+       leal    (%edx,%ecx,4), %edx
+
+       leal    (%edi,%ecx,4), %edi
+       negl    %ecx
+       shrl    %eax
+
+       C This loop in in a single 16 byte code block already, so no
+       C alignment necessary.
+L(simple):
+       C eax   scratch
+       C ebx   src1
+       C ecx   counter
+       C edx   src2
+       C esi
+       C edi   dst
+       C ebp
+
+       movl    (%ebx,%ecx,4), %eax
+       M4_inst (%edx,%ecx,4), %eax
+       movl    %eax, (%edi,%ecx,4)
+       incl    %ecx
+       jnz     L(simple)
+
+       movl    $0, %eax
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBX, %ebx
+       setc    %al
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       C This is at 0x55, close enough to aligned.
+L(unroll):
+deflit(`FRAME',STACK_SPACE)
+       movl    %ebp, SAVE_EBP
+       andl    $-2, %ecx               C size low bit masked out
+       andl    $1, PARAM_SIZE          C size low bit kept
+
+       movl    %ecx, %edi
+       decl    %ecx
+       movl    PARAM_DST, %ebp
+
+       shrl    $UNROLL_LOG2, %ecx
+       negl    %edi
+       movl    %esi, SAVE_ESI
+
+       andl    $UNROLL_MASK, %edi
+
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    L(entry) (%edi,%edi,8), %esi    C 9 bytes per
+')
+       negl    %edi
+       shrl    %eax
+
+       leal    ifelse(UNROLL_BYTES,256,128) (%ebx,%edi,4), %ebx
+       leal    ifelse(UNROLL_BYTES,256,128) (%edx,%edi,4), %edx
+       leal    ifelse(UNROLL_BYTES,256,128) (%ebp,%edi,4), %edi
+
+       jmp     *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%edi,%edi,8), %esi
+       addl    $L(entry)-L(here), %esi
+       addl    (%esp), %esi
+       ret_internal
+')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(32)
+L(top):
+       C eax   zero
+       C ebx   src1
+       C ecx   counter
+       C edx   src2
+       C esi   scratch (was computed jump)
+       C edi   dst
+       C ebp   scratch
+
+       leal    UNROLL_BYTES(%edx), %edx
+
+L(entry):
+deflit(CHUNK_COUNT, 2)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+       deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl,   disp0,(%ebx), %esi)
+       movl    disp1(%ebx), %ebp
+Zdisp( M4_inst,disp0,(%edx), %esi)
+Zdisp( movl,   %esi, disp0,(%edi))
+       M4_inst disp1(%edx), %ebp
+       movl    %ebp, disp1(%edi)
+')
+
+       decl    %ecx
+       leal    UNROLL_BYTES(%ebx), %ebx
+       leal    UNROLL_BYTES(%edi), %edi
+       jns     L(top)
+
+
+       mov     PARAM_SIZE, %esi
+       movl    SAVE_EBP, %ebp
+       movl    $0, %eax
+
+       decl    %esi
+       js      L(even)
+
+       movl    (%ebx), %ecx
+       M4_inst UNROLL_BYTES(%edx), %ecx
+       movl    %ecx, (%edi)
+L(even):
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBX, %ebx
+       setc    %al
+
+       movl    SAVE_ESI, %esi
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/aorsmul_1.asm b/mpn/x86/k7/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..b247c29
--- /dev/null
+++ b/mpn/x86/k7/aorsmul_1.asm
@@ -0,0 +1,157 @@
+dnl  AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:
+C K7:                            3.75
+C K8:
+
+C TODO
+C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
+C    but lose by 2x for n == 1.
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       add     $-16, %esp
+       mov     %ebp, (%esp)
+       mov     %ebx, 4(%esp)
+       mov     %esi, 8(%esp)
+       mov     %edi, 12(%esp)
+
+       mov     20(%esp), %edi
+       mov     24(%esp), %esi
+       mov     28(%esp), %eax
+       mov     32(%esp), %ecx
+       mov     %eax, %ebx
+       shr     $2, %eax
+       mov     %eax, 28(%esp)
+       mov     (%esi), %eax
+       and     $3, %ebx
+       jz      L(b0)
+       cmp     $2, %ebx
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): lea     -4(%esi), %esi
+       lea     -4(%edi), %edi
+       mul     %ecx
+       mov     %eax, %ebx
+       mov     %edx, %ebp
+       cmpl    $0, 28(%esp)
+       jz      L(cj1)
+       mov     8(%esi), %eax
+       jmp     L(1)
+
+L(b2): mul     %ecx
+       mov     %eax, %ebp
+       mov     4(%esi), %eax
+       mov     %edx, %ebx
+       cmpl    $0, 28(%esp)
+       jne     L(2)
+       jmp     L(cj2)
+
+L(b3): lea     -12(%esi), %esi
+       lea     -12(%edi), %edi
+       mul     %ecx
+       mov     %eax, %ebx
+       mov     %edx, %ebp
+       mov     16(%esi), %eax
+       incl    28(%esp)
+       jmp     L(3)
+
+L(b0): lea     -8(%esi), %esi
+       lea     -8(%edi), %edi
+       mul     %ecx
+       mov     %eax, %ebp
+       mov     12(%esi), %eax
+       mov     %edx, %ebx
+       jmp     L(0)
+
+       ALIGN(16)
+L(top):        lea     16(%edi), %edi
+L(2):  mul     %ecx
+       ADDSUB  %ebp, 0(%edi)
+       mov     $0, %ebp
+       adc     %eax, %ebx
+       mov     8(%esi), %eax
+       adc     %edx, %ebp
+L(1):  mul     %ecx
+       ADDSUB  %ebx, 4(%edi)
+       mov     $0, %ebx
+       adc     %eax, %ebp
+       mov     12(%esi), %eax
+       adc     %edx, %ebx
+L(0):  mul     %ecx
+       ADDSUB  %ebp, 8(%edi)
+       mov     $0, %ebp
+       adc     %eax, %ebx
+       adc     %edx, %ebp
+       mov     16(%esi), %eax
+L(3):  mul     %ecx
+       ADDSUB  %ebx, 12(%edi)
+       adc     %eax, %ebp
+       mov     20(%esi), %eax
+       lea     16(%esi), %esi
+       mov     $0, %ebx
+       adc     %edx, %ebx
+       decl    28(%esp)
+       jnz     L(top)
+
+L(end):        lea     16(%edi), %edi
+L(cj2):        mul     %ecx
+       ADDSUB  %ebp, (%edi)
+       adc     %eax, %ebx
+       adc     $0, %edx
+L(cj1):        ADDSUB  %ebx, 4(%edi)
+       adc     $0, %edx
+       mov     %edx, %eax
+       mov     (%esp), %ebp
+       mov     4(%esp), %ebx
+       mov     8(%esp), %esi
+       mov     12(%esp), %edi
+       add     $16, %esp
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/k7/dive_1.asm b/mpn/x86/k7/dive_1.asm

new file mode 100644 (file)

index 0000000..a754d09
--- /dev/null
+++ b/mpn/x86/k7/dive_1.asm
@@ -0,0 +1,196 @@
+dnl  AMD K7 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb
+C Athlon:     11.0
+C Hammer:      9.0
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t divisor);
+C
+C The dependent chain is mul+imul+sub for 11 cycles and that speed is
+C achieved with no special effort.  The load and shrld latencies are hidden
+C by out of order execution.
+C
+C It's a touch faster on size==1 to use the mul-by-inverse than divl.
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+defframe(VAR_INVERSE, -20)
+defframe(VAR_DST_END, -24)
+
+deflit(STACK_SPACE, 24)
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       subl    $STACK_SPACE, %esp      deflit(`FRAME',STACK_SPACE)
+       movl    $-1, %ecx               C shift count
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_SIZE, %ebp
+
+       movl    %esi, SAVE_ESI
+       movl    %edi, SAVE_EDI
+
+       C If there's usually only one or two trailing zero bits then this
+       C should be faster than bsfl.
+L(strip_twos):
+       incl    %ecx
+       shrl    %eax
+       jnc     L(strip_twos)
+
+       movl    %ebx, SAVE_EBX
+       leal    1(%eax,%eax), %ebx      C d without twos
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edx)
+       movzbl  (%eax,%edx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       leal    (%eax,%eax), %edx       C 2*inv
+       movl    %ebx, PARAM_DIVISOR     C d without twos
+
+       imull   %eax, %eax              C inv*inv
+
+       movl    PARAM_SRC, %esi
+       movl    PARAM_DST, %edi
+
+       imull   %ebx, %eax              C inv*inv*d
+
+       subl    %eax, %edx              C inv = 2*inv - inv*inv*d
+       leal    (%edx,%edx), %eax       C 2*inv
+
+       imull   %edx, %edx              C inv*inv
+
+       leal    (%esi,%ebp,4), %esi     C src end
+       leal    (%edi,%ebp,4), %edi     C dst end
+       negl    %ebp                    C -size
+
+       imull   %ebx, %edx              C inv*inv*d
+
+       subl    %edx, %eax              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       movl    %eax, VAR_INVERSE
+       movl    (%esi,%ebp,4), %eax     C src[0]
+
+       incl    %ebp
+       jz      L(one)
+
+       movl    (%esi,%ebp,4), %edx     C src[1]
+
+       shrdl(  %cl, %edx, %eax)
+
+       movl    %edi, VAR_DST_END
+       xorl    %ebx, %ebx
+       jmp     L(entry)
+
+       ALIGN(8)
+L(top):
+       C eax   q
+       C ebx   carry bit, 0 or 1
+       C ecx   shift
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   counter, limbs, negative
+
+       mull    PARAM_DIVISOR           C carry limb in edx
+
+       movl    -4(%esi,%ebp,4), %eax
+       movl    (%esi,%ebp,4), %edi
+
+       shrdl(  %cl, %edi, %eax)
+
+       subl    %ebx, %eax              C apply carry bit
+       setc    %bl
+       movl    VAR_DST_END, %edi
+
+       subl    %edx, %eax              C apply carry limb
+       adcl    $0, %ebx
+
+L(entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi,%ebp,4)
+       incl    %ebp
+       jnz     L(top)
+
+
+       mull    PARAM_DIVISOR           C carry limb in edx
+
+       movl    -4(%esi), %eax          C src high limb
+       shrl    %cl, %eax
+       movl    SAVE_ESI, %esi
+
+       subl    %ebx, %eax              C apply carry bit
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_EBP, %ebp
+
+       subl    %edx, %eax              C apply carry limb
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)
+       movl    SAVE_EDI, %edi
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+
+L(one):
+       shrl    %cl, %eax
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBX, %ebx
+
+       imull   VAR_INVERSE, %eax
+
+       movl    SAVE_EBP, %ebp
+       movl    %eax, -4(%edi)
+
+       movl    SAVE_EDI, %edi
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/gcd_1.asm b/mpn/x86/k7/gcd_1.asm

new file mode 100644 (file)

index 0000000..e90d6bb
--- /dev/null
+++ b/mpn/x86/k7/gcd_1.asm
@@ -0,0 +1,369 @@
+dnl  AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
+
+dnl  Copyright 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 6.75 cycles/bit (approx)  1x1 gcd
+C     11.0 cycles/limb          Nx1 reduction (modexact_1_odd)
+
+
+dnl  Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
+dnl  where x is the larger of the two.  See tune/README for more.
+dnl
+dnl  divl at 40 cycles compared to the gcd at about 7 cycles/bitpair
+dnl  suggests 40/7*2=11.4 but 7 seems to be about right.
+
+deflit(DIV_THRESHOLD, 7)
+
+
+C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+C
+C This is mixed in with the code, but as per the k7 optimization manual it's
+C a full cache line and suitably aligned so it won't get swapped between
+C code and data.  Having it in TEXT rather than RODATA saves needing a GOT
+C entry when PIC.
+C
+C Actually, there doesn't seem to be a measurable difference between this in
+C it's own cache line or plonked in the middle of the code.  Presumably
+C since TEXT is read-only there's no worries about coherency.
+
+deflit(MAXSHIFT, 6)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+       TEXT
+       ALIGN(64)
+L(table):
+       .byte   MAXSHIFT
+forloop(i,1,MASK,
+`      .byte   m4_count_trailing_zeros(i)
+')
+
+
+C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb);
+C
+
+defframe(PARAM_LIMB,   12)
+defframe(PARAM_SIZE,    8)
+defframe(PARAM_SRC,     4)
+
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+defframe(CALL_DIVISOR,-20)
+defframe(CALL_SIZE,   -24)
+defframe(CALL_SRC,    -28)
+
+deflit(STACK_SPACE, 28)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_gcd_1)
+deflit(`FRAME',0)
+
+       ASSERT(ne, `cmpl $0, PARAM_LIMB')       C y!=0
+       ASSERT(ae, `cmpl $1, PARAM_SIZE')       C size>=1
+
+       mov     PARAM_SRC, %eax
+       mov     PARAM_LIMB, %edx
+       sub     $STACK_SPACE, %esp      deflit(`FRAME',STACK_SPACE)
+
+       mov     %esi, SAVE_ESI
+       mov     %ebx, SAVE_EBX
+
+       mov     (%eax), %esi            C src low limb
+
+ifdef(`PIC',`
+       mov     %edi, SAVE_EDI
+       call    L(movl_eip_to_edi)
+L(here):
+       add     $L(table)-L(here), %edi
+')
+
+       mov     %esi, %ebx
+       or      %edx, %esi      C x|y
+       mov     $-1, %ecx
+
+L(twos):
+       inc     %ecx
+       shr     %esi
+       jnc     L(twos)         C 3/4 chance of x or y odd already
+
+       shr     %cl, %ebx
+       shr     %cl, %edx
+       mov     %ecx, %esi      C common twos
+
+       mov     PARAM_SIZE, %ecx
+       cmp     $1, %ecx
+       ja      L(divide)
+
+
+       C eax
+       C ebx   x
+       C ecx
+       C edx   y
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp
+
+       mov     %edx, %eax
+       cmp     %ebx, %edx
+
+       cmovb(  %ebx, %eax)     C swap to make x bigger than y
+       cmovb(  %edx, %ebx)
+
+
+L(strip_y):
+       C eax   x
+       C ebx   y
+       C ecx
+       C edx
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp
+
+       ASSERT(nz,`orl %ebx,%ebx')
+       shr     %ebx
+       jnc     L(strip_y)
+       rcl     %ebx
+
+
+       C eax   x
+       C ebx   y (odd)
+       C ecx
+       C edx
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp
+
+       mov     %eax, %ecx
+       mov     %ebx, %edx
+       shr     $DIV_THRESHOLD, %eax
+
+       cmp     %eax, %ebx
+       mov     %ecx, %eax
+       ja      L(strip_x_entry)        C do x%y if x much bigger than y
+
+
+       xor     %edx, %edx
+
+       div     %ebx
+
+       or      %edx, %edx
+       mov     %edx, %ecx              C remainder -> x
+       mov     %ebx, %edx              C y
+
+       jz      L(done_ebx)
+       jmp     L(strip_x)
+
+
+       C Offset 0x9D here for non-PIC.  About 0.4 cycles/bit is saved by
+       C ensuring the end of the jnz at the end of this loop doesn't cross
+       C into the next cache line at 0xC0.
+       C
+       C PIC on the other hand is offset 0xAC here and extends to 0xC9, so
+       C it crosses but doesn't suffer any measurable slowdown.
+
+L(top):
+       C eax   x
+       C ebx   y-x
+       C ecx   x-y
+       C edx   y
+       C esi   twos, for use at end
+       C edi   [PIC] L(table)
+
+       cmovc(  %ebx, %ecx)             C if x-y gave carry, use x and y-x
+       cmovc(  %eax, %edx)
+
+L(strip_x):
+       mov     %ecx, %eax
+L(strip_x_entry):
+       and     $MASK, %ecx
+
+       ASSERT(nz, `orl %eax, %eax')
+
+ifdef(`PIC',`
+       mov     (%ecx,%edi), %cl
+',`
+       mov     L(table) (%ecx), %cl
+')
+
+       shr     %cl, %eax
+       cmp     $MAXSHIFT, %cl
+
+       mov     %eax, %ecx
+       mov     %edx, %ebx
+       je      L(strip_x)
+
+       ASSERT(nz, `test $1, %eax')     C both odd
+       ASSERT(nz, `test $1, %edx')
+
+       sub     %eax, %ebx
+       sub     %edx, %ecx
+       jnz     L(top)
+
+
+L(done):
+       mov     %esi, %ecx
+       mov     SAVE_ESI, %esi
+ifdef(`PIC',`
+       mov     SAVE_EDI, %edi
+')
+
+       shl     %cl, %eax
+       mov     SAVE_EBX, %ebx
+       add     $FRAME, %esp
+
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+C two or more limbs
+
+dnl  MODEXACT_THRESHOLD is the size at which it's better to call
+dnl  mpn_modexact_1_odd than do an inline loop.
+
+deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5))
+
+L(divide):
+       C eax   src
+       C ebx
+       C ecx   size
+       C edx   y
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp
+
+L(divide_strip_y):
+       ASSERT(nz,`or %edx,%edx')
+       shr     %edx
+       jnc     L(divide_strip_y)
+       lea     1(%edx,%edx), %ebx              C y now odd
+
+       mov     %ebp, SAVE_EBP
+       mov     %eax, %ebp
+       mov     -4(%eax,%ecx,4), %eax           C src high limb
+
+       cmp     $MODEXACT_THRESHOLD, %ecx
+       jae     L(modexact)
+
+       cmp     %ebx, %eax                      C high cmp divisor
+       mov     $0, %edx
+
+       cmovc(  %eax, %edx)                     C skip a div if high<divisor
+       sbb     $0, %ecx
+
+
+L(divide_top):
+       C eax   scratch (quotient)
+       C ebx   y
+       C ecx   counter (size to 1, inclusive)
+       C edx   carry (remainder)
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp   src
+
+       mov     -4(%ebp,%ecx,4), %eax
+
+       div     %ebx
+
+       dec     %ecx
+       jnz     L(divide_top)
+
+
+       C eax
+       C ebx   y (odd)
+       C ecx
+       C edx   x
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp
+
+       or      %edx, %edx
+       mov     SAVE_EBP, %ebp
+       mov     %edx, %eax
+
+       mov     %edx, %ecx
+       mov     %ebx, %edx
+       jnz     L(strip_x_entry)
+
+
+L(done_ebx):
+       mov     %ebx, %eax
+       jmp     L(done)
+
+
+
+L(modexact):
+       C eax
+       C ebx   y
+       C ecx   size
+       C edx
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp   src
+
+ifdef(`PIC',`
+       mov     %ebp, CALL_SRC
+       mov     %ebx, %ebp              C y
+       mov     %edi, %ebx              C L(table)
+
+       add     $_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx
+       mov     %ebp, CALL_DIVISOR
+       mov     %ecx, CALL_SIZE
+
+       call    GSYM_PREFIX`'mpn_modexact_1_odd@PLT
+',`
+dnl non-PIC
+       mov     %ebx, CALL_DIVISOR
+       mov     %ebp, CALL_SRC
+       mov     %ecx, CALL_SIZE
+
+       call    GSYM_PREFIX`'mpn_modexact_1_odd
+')
+
+       C eax   x
+       C ebx   [non-PIC] y
+       C ecx
+       C edx
+       C esi   common twos
+       C edi   [PIC] L(table)
+       C ebp   [PIC] y
+
+       or      %eax, %eax
+       mov     ifdef(`PIC',`%ebp',`%ebx'), %edx
+       mov     SAVE_EBP, %ebp
+
+       mov     %eax, %ecx
+       jnz     L(strip_x_entry)
+
+       mov     %edx, %eax
+       jmp     L(done)
+
+
+ifdef(`PIC', `
+L(movl_eip_to_edi):
+       mov     (%esp), %edi
+       ret_internal
+')
+
+EPILOGUE()
diff --git a/mpn/x86/k7/gmp-mparam.h b/mpn/x86/k7/gmp-mparam.h

new file mode 100644 (file)

index 0000000..f18940f
--- /dev/null
+++ b/mpn/x86/k7/gmp-mparam.h
@@ -0,0 +1,185 @@
+/* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         14
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     26
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           28
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD                85
+#define MUL_TOOM44_THRESHOLD               148
+#define MUL_TOOM6H_THRESHOLD               204
+#define MUL_TOOM8H_THRESHOLD               309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      85
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     101
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 50
+#define SQR_TOOM3_THRESHOLD                 87
+#define SQR_TOOM4_THRESHOLD                208
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                430
+
+#define MULMOD_BNM1_THRESHOLD               18
+#define SQRMOD_BNM1_THRESHOLD               19
+
+#define MUL_FFT_MODF_THRESHOLD             888  /* k = 6 */
+#define MUL_FFT_TABLE3                                      \
+  { {    888, 6}, {     25, 7}, {     13, 6}, {     27, 7}, \
+    {     15, 6}, {     32, 7}, {     17, 6}, {     35, 7}, \
+    {     19, 6}, {     39, 7}, {     23, 6}, {     47, 7}, \
+    {     27, 8}, {     15, 7}, {     31, 6}, {     63, 7}, \
+    {     35, 8}, {     19, 7}, {     39, 8}, {     23, 7}, \
+    {     47, 8}, {     31, 7}, {     63, 8}, {     39, 7}, \
+    {     79, 9}, {     23, 8}, {     47, 7}, {     95, 8}, \
+    {     51, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
+    {    143, 9}, {     79,10}, {     47,11}, {     31,10}, \
+    {     63, 9}, {    127,10}, {     79, 9}, {    167,10}, \
+    {     95, 9}, {    207,10}, {    111,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    271, 9}, {    543,10}, {    287,11}, {    159,10}, \
+    {    319, 9}, {    671,11}, {    191,10}, {    383, 9}, \
+    {    767,11}, {    223,12}, {    127,11}, {    255,10}, \
+    {    511, 9}, {   1023,10}, {    543, 9}, {   1087,11}, \
+    {    287,10}, {    575, 9}, {   1151,10}, {    607, 9}, \
+    {   1215, 8}, {   2431,11}, {    319,10}, {    639, 9}, \
+    {   1279,10}, {    671, 9}, {   1343,12}, {    191,11}, \
+    {    383,10}, {    767, 9}, {   1535,10}, {    799, 9}, \
+    {   1599,11}, {    415,10}, {    831, 9}, {   1663,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,10}, {   1087,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,10}, {   1407, 9}, {   2815,11}, \
+    {    735,10}, {   1471, 9}, {   2943,12}, {    383,11}, \
+    {    767,10}, {   1535,11}, {    799,10}, {   1599,11}, \
+    {    831,10}, {   1663,11}, {    863,10}, {   1727,11}, \
+    {    895,10}, {   1791,11}, {    959,13}, {    255,12}, \
+    {    511,11}, {   1023,10}, {   2047,11}, {   1087,12}, \
+    {    575,11}, {   1151,10}, {   2303,11}, {   1215,10}, \
+    {   2431,12}, {    639,11}, {   1407,10}, {   2815,11}, \
+    {   1471,10}, {   2943,13}, {    383,12}, {    767,11}, \
+    {   1599,12}, {    831,11}, {   1663,10}, {   3327,11}, \
+    {   1727,12}, {    895,11}, {   1791,10}, {   3583,12}, \
+    {    959,11}, {   1919,14}, {    255,13}, {    511,12}, \
+    {   1023,11}, {   2047,12}, {   1087,11}, {   2239,12}, \
+    {   1151,11}, {   2303,12}, {   1215,11}, {   2431,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 167
+#define MUL_FFT_THRESHOLD                 7808
+
+#define SQR_FFT_MODF_THRESHOLD             786  /* k = 6 */
+#define SQR_FFT_TABLE3                                      \
+  { {    786, 6}, {     25, 7}, {     13, 6}, {     27, 7}, \
+    {     15, 6}, {     31, 7}, {     17, 6}, {     35, 7}, \
+    {     19, 6}, {     39, 7}, {     23, 6}, {     47, 7}, \
+    {     27, 8}, {     15, 7}, {     31, 6}, {     63, 7}, \
+    {     35, 8}, {     19, 7}, {     39, 8}, {     23, 7}, \
+    {     47, 8}, {     31, 7}, {     63, 8}, {     39, 9}, \
+    {     23, 8}, {     47, 7}, {     95, 8}, {     51, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     79,10}, {     47, 9}, {     95, 8}, \
+    {    191,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    167,10}, {     95, 9}, {    191,10}, \
+    {    111,11}, {     63,10}, {    143, 9}, {    287, 8}, \
+    {    607,10}, {    159, 9}, {    319,10}, {    175,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    287,11}, {    159,10}, {    319, 9}, {    639, 8}, \
+    {   1279, 9}, {    671, 8}, {   1343,11}, {    191,10}, \
+    {    383, 9}, {    767, 8}, {   1535, 9}, {    799, 8}, \
+    {   1599,10}, {    415,11}, {    223,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    575, 9}, {   1151,10}, \
+    {    607, 9}, {   1215, 8}, {   2431,11}, {    319,10}, \
+    {    639, 9}, {   1279,10}, {    671, 9}, {   1343,12}, \
+    {    191,11}, {    383,10}, {    767, 9}, {   1535,10}, \
+    {    799, 9}, {   1599,11}, {    415,10}, {    863,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,10}, {   1087,11}, {    575,10}, {   1151, 9}, \
+    {   2303,11}, {    607,10}, {   1215, 9}, {   2431,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    671,10}, \
+    {   1407, 9}, {   2815,11}, {    735,10}, {   1471, 9}, \
+    {   2943,11}, {    767,10}, {   1535,11}, {    799,10}, \
+    {   1599,11}, {    831,10}, {   1663,11}, {    863,10}, \
+    {   1727,11}, {    895,10}, {   1791,11}, {    959,10}, \
+    {   1919,13}, {    255,12}, {    511,11}, {   1023,10}, \
+    {   2047,11}, {   1087,10}, {   2175,12}, {    575,11}, \
+    {   1151,10}, {   2303,11}, {   1215,10}, {   2431,12}, \
+    {    639,11}, {   1407,10}, {   2815,11}, {   1471,10}, \
+    {   2943,12}, {    767,11}, {   1599,12}, {    831,11}, \
+    {   1663,10}, {   3327,12}, {    895,11}, {   1791,12}, \
+    {    959,11}, {   1919,10}, {   3839,11}, {   1983,14}, \
+    {    255,13}, {    511,12}, {   1023,11}, {   2047,12}, \
+    {   1087,11}, {   2239,12}, {   1151,11}, {   2303,12}, \
+    {   1215,11}, {   2431,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 177
+#define SQR_FFT_THRESHOLD                 7552
+
+#define MULLO_BASECASE_THRESHOLD            10
+#define MULLO_DC_THRESHOLD                  50
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 60
+#define DC_DIVAPPR_Q_THRESHOLD             333
+#define DC_BDIV_QR_THRESHOLD                82
+#define DC_BDIV_Q_THRESHOLD                268
+
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD               284
+#define INV_APPR_THRESHOLD                 290
+
+#define BINV_NEWTON_THRESHOLD              264
+#define REDC_1_TO_REDC_N_THRESHOLD          86
+
+#define MU_DIV_QR_THRESHOLD               1858
+#define MU_DIVAPPR_Q_THRESHOLD            1718
+#define MUPI_DIV_QR_THRESHOLD              114
+#define MU_BDIV_QR_THRESHOLD              1387
+#define MU_BDIV_Q_THRESHOLD               1470
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     154
+#define GCD_DC_THRESHOLD                   599
+#define GCDEXT_DC_THRESHOLD                443
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                17
+#define GET_STR_PRECOMPUTE_THRESHOLD        34
+#define SET_STR_DC_THRESHOLD               542
+#define SET_STR_PRECOMPUTE_THRESHOLD      1615
diff --git a/mpn/x86/k7/mmx/com.asm b/mpn/x86/k7/mmx/com.asm

new file mode 100644 (file)

index 0000000..3c6704b
--- /dev/null
+++ b/mpn/x86/k7/mmx/com.asm
@@ -0,0 +1,114 @@
+dnl  AMD Athlon mpn_com -- mpn bitwise one's complement.
+
+dnl  Copyright 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 1.0 cycles/limb
+
+
+C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The loop form below is necessary for the claimed speed.  It needs to be
+C aligned to a 16 byte boundary and only 16 bytes long.  Maybe that's so it
+C fits in a BTB entry.  The adjustments to %eax and %edx avoid offsets on
+C the movq's and achieve the necessary size.
+C
+C If both src and dst are 4mod8, the loop runs at 1.5 c/l.  So long as one
+C of the two is 0mod8, it runs at 1.0 c/l.  On that basis dst is checked
+C (offset by the size, as per the loop addressing) and one high limb
+C processed separately to get alignment.
+C
+C The padding for the nails case is unattractive, but shouldn't cost any
+C cycles.  Explicit .byte's guarantee the desired instructions, at a point
+C where we're probably stalled waiting for loads anyway.
+C
+C Enhancements:
+C
+C The combination load/pxor/store might be able to be unrolled to approach
+C 0.5 c/l if desired.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_com)
+deflit(`FRAME',0)
+
+       movl    PARAM_DST, %edx
+       movl    PARAM_SIZE, %ecx
+       pcmpeqd %mm7, %mm7
+
+       leal    (%edx,%ecx,4), %eax
+       andl    $4, %eax
+ifelse(GMP_NAIL_BITS,0,,
+`      psrld   $GMP_NAIL_BITS, %mm7')          C GMP_NUMB_MASK
+
+       movl    PARAM_SRC, %eax
+       movd    -4(%eax,%ecx,4), %mm0           C src high limb
+
+ifelse(GMP_NAIL_BITS,0,,
+`      C padding for alignment below
+       .byte   0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00      C lea 0(%esi),%esi
+       .byte   0x8d, 0xbf, 0x00, 0x00, 0x00, 0x00      C lea 0(%edi),%edi
+')
+
+       jz      L(aligned)
+
+       pxor    %mm7, %mm0
+       movd    %mm0, -4(%edx,%ecx,4)           C dst high limb
+       decl    %ecx
+       jz      L(done)
+L(aligned):
+
+       addl    $4, %eax
+       addl    $4, %edx
+       decl    %ecx
+       jz      L(one)
+
+       C offset 0x30 for no nails, or 0x40 for nails
+       ALIGN(16)
+L(top):
+       C eax   src
+       C ebx
+       C ecx   counter
+       C edx   dst
+
+       subl    $2, %ecx
+       movq    (%eax,%ecx,4), %mm0
+       pxor    %mm7, %mm0
+       movq    %mm0, (%edx,%ecx,4)
+       jg      L(top)
+
+       jnz     L(done)                         C if size even
+
+L(one):
+       movd    -4(%eax), %mm0                  C src low limb
+       pxor    %mm7, %mm0
+       movd    %mm0, -4(%edx)                  C dst low limb
+
+L(done):
+       emms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mmx/copyd.asm b/mpn/x86/k7/mmx/copyd.asm

new file mode 100644 (file)

index 0000000..4601fcd
--- /dev/null
+++ b/mpn/x86/k7/mmx/copyd.asm
@@ -0,0 +1,133 @@
+dnl  AMD K7 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C    alignment dst/src, A=0mod8 N=4mod8
+C       A/A   A/N   N/A   N/N
+C K7    0.75  1.0   1.0   0.75
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The various comments in mpn/x86/k7/copyi.asm apply here too.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl  parameter space reused
+define(SAVE_EBX,`PARAM_SIZE')
+define(SAVE_ESI,`PARAM_SRC')
+
+dnl  minimum 5 since the unrolled code can't handle less than 5
+deflit(UNROLL_THRESHOLD, 5)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_copyd)
+
+       movl    PARAM_SIZE, %ecx
+       movl    %ebx, SAVE_EBX
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %edx
+
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       jae     L(unroll)
+
+       orl     %ecx, %ecx
+       jz      L(simple_done)
+
+L(simple):
+       C eax   src
+       C ebx   scratch
+       C ecx   counter
+       C edx   dst
+       C
+       C this loop is 2 cycles/limb
+
+       movl    -4(%eax,%ecx,4), %ebx
+       movl    %ebx, -4(%edx,%ecx,4)
+       decl    %ecx
+       jnz     L(simple)
+
+L(simple_done):
+       movl    SAVE_EBX, %ebx
+       ret
+
+
+L(unroll):
+       movl    %esi, SAVE_ESI
+       leal    (%eax,%ecx,4), %ebx
+       leal    (%edx,%ecx,4), %esi
+
+       andl    %esi, %ebx
+       movl    SAVE_ESI, %esi
+       subl    $4, %ecx                C size-4
+
+       testl   $4, %ebx   C testl to pad code closer to 16 bytes for L(top)
+       jz      L(aligned)
+
+       C both src and dst unaligned, process one limb to align them
+       movl    12(%eax,%ecx,4), %ebx
+       movl    %ebx, 12(%edx,%ecx,4)
+       decl    %ecx
+L(aligned):
+
+
+       ALIGN(16)
+L(top):
+       C eax   src
+       C ebx
+       C ecx   counter, limbs
+       C edx   dst
+
+       movq    8(%eax,%ecx,4), %mm0
+       movq    (%eax,%ecx,4), %mm1
+       subl    $4, %ecx
+       movq    %mm0, 16+8(%edx,%ecx,4)
+       movq    %mm1, 16(%edx,%ecx,4)
+       jns     L(top)
+
+
+       C now %ecx is -4 to -1 representing respectively 0 to 3 limbs remaining
+
+       testb   $2, %cl
+       jz      L(finish_not_two)
+
+       movq    8(%eax,%ecx,4), %mm0
+       movq    %mm0, 8(%edx,%ecx,4)
+L(finish_not_two):
+
+       testb   $1, %cl
+       jz      L(done)
+
+       movl    (%eax), %ebx
+       movl    %ebx, (%edx)
+
+L(done):
+       movl    SAVE_EBX, %ebx
+       emms
+       ret
+
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mmx/copyi.asm b/mpn/x86/k7/mmx/copyi.asm

new file mode 100644 (file)

index 0000000..a17d575
--- /dev/null
+++ b/mpn/x86/k7/mmx/copyi.asm
@@ -0,0 +1,146 @@
+dnl  AMD K7 mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C    alignment dst/src, A=0mod8 N=4mod8
+C       A/A   A/N   N/A   N/N
+C K7    0.75  1.0   1.0   0.75
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size.
+C
+C This code at 0.75 or 1.0 c/l is always faster than a plain rep movsl at
+C 1.33 c/l.
+C
+C The K7 can do a 64-bit load and 64-bit store in one cycle (optimization
+C guile 22007 appendix B), so 0.5 c/l should be possible, however nothing
+C under 0.7 c/l is known.  Apparently only two 32-bit stores can be done in
+C one cycle, so perhaps some scheduling is needed to ensure it's a
+C load+store in each cycle, not store+store.
+C
+C If both source and destination are unaligned then one limb is processed at
+C the start to make them aligned and so get 0.75 c/l, whereas if they'd been
+C used unaligned it would be 1.5 c/l.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl  parameter space reused
+define(SAVE_EBX,`PARAM_SIZE')
+
+dnl  minimum 5 since the unrolled code can't handle less than 5
+deflit(UNROLL_THRESHOLD, 5)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_copyi)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    %ebx, SAVE_EBX
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %edx
+
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       jae     L(unroll)
+
+       orl     %ecx, %ecx
+       jz      L(simple_done)
+
+L(simple):
+       C eax   src, incrementing
+       C ebx   scratch
+       C ecx   counter
+       C edx   dst, incrementing
+       C
+       C this loop is 2 cycles/limb
+
+       movl    (%eax), %ebx
+       movl    %ebx, (%edx)
+       decl    %ecx
+       leal    4(%eax), %eax
+       leal    4(%edx), %edx
+       jnz     L(simple)
+
+L(simple_done):
+       movl    SAVE_EBX, %ebx
+       ret
+
+
+L(unroll):
+       movl    %eax, %ebx
+       leal    -12(%eax,%ecx,4), %eax  C src end - 12
+       subl    $3, %ecx                C size-3
+
+       andl    %edx, %ebx
+       leal    (%edx,%ecx,4), %edx     C dst end - 12
+       negl    %ecx
+
+       testl   $4, %ebx   C testl to pad code closer to 16 bytes for L(top)
+       jz      L(aligned)
+
+       C both src and dst unaligned, process one limb to align them
+       movl    (%eax,%ecx,4), %ebx
+       movl    %ebx, (%edx,%ecx,4)
+       incl    %ecx
+L(aligned):
+
+
+       ALIGN(16)
+L(top):
+       C eax   src end - 12
+       C ebx
+       C ecx   counter, negative, limbs
+       C edx   dst end - 12
+
+       movq    (%eax,%ecx,4), %mm0
+       movq    8(%eax,%ecx,4), %mm1
+       addl    $4, %ecx
+       movq    %mm0, -16(%edx,%ecx,4)
+       movq    %mm1, -16+8(%edx,%ecx,4)
+       ja      L(top)          C jump no carry and not zero
+
+
+       C now %ecx is 0 to 3 representing respectively 3 to 0 limbs remaining
+
+       testb   $2, %cl
+       jnz     L(finish_not_two)
+
+       movq    (%eax,%ecx,4), %mm0
+       movq    %mm0, (%edx,%ecx,4)
+L(finish_not_two):
+
+       testb   $1, %cl
+       jnz     L(done)
+
+       movl    8(%eax), %ebx
+       movl    %ebx, 8(%edx)
+
+L(done):
+       movl    SAVE_EBX, %ebx
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mmx/divrem_1.asm b/mpn/x86/k7/mmx/divrem_1.asm

new file mode 100644 (file)

index 0000000..fa5824c
--- /dev/null
+++ b/mpn/x86/k7/mmx/divrem_1.asm
@@ -0,0 +1,821 @@
+dnl  AMD K7 mpn_divrem_1, mpn_divrem_1c, mpn_preinv_divrem_1 -- mpn by limb
+dnl  division.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                         mp_srcptr src, mp_size_t size,
+C                         mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C                          mp_srcptr src, mp_size_t size,
+C                          mp_limb_t divisor, mp_limb_t carry);
+C mp_limb_t mpn_preinv_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                                mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t inverse,
+C                                unsigned shift);
+C
+C Algorithm:
+C
+C The method and nomenclature follow part 8 of "Division by Invariant
+C Integers using Multiplication" by Granlund and Montgomery, reference in
+C gmp.texi.
+C
+C The "and"s shown in the paper are done here with "cmov"s.  "m" is written
+C for m', and "d" for d_norm, which won't cause any confusion since it's
+C only the normalized divisor that's of any use in the code.  "b" is written
+C for 2^N, the size of a limb, N being 32 here.
+C
+C The step "sdword dr = n - 2^N*d + (2^N-1-q1) * d" is instead done as
+C "n-(q1+1)*d"; this rearrangement gives the same two-limb answer.  If
+C q1==0xFFFFFFFF, then q1+1 would overflow.  We branch to a special case
+C "q1_ff" if this occurs.  Since the true quotient is either q1 or q1+1 then
+C if q1==0xFFFFFFFF that must be the right value.
+C
+C For the last and second last steps q1==0xFFFFFFFF is instead handled by an
+C sbbl to go back to 0xFFFFFFFF if an overflow occurs when adding 1.  This
+C then goes through as normal, and finding no addback required.  sbbl costs
+C an extra cycle over what the main loop code does, but it keeps code size
+C and complexity down.
+C
+C Notes:
+C
+C mpn_divrem_1 and mpn_preinv_divrem_1 avoid one division if the src high
+C limb is less than the divisor.  mpn_divrem_1c doesn't check for a zero
+C carry, since in normal circumstances that will be a very rare event.
+C
+C The test for skipping a division is branch free (once size>=1 is tested).
+C The store to the destination high limb is 0 when a divide is skipped, or
+C if it's not skipped then a copy of the src high limb is used.  The latter
+C is in case src==dst.
+C
+C There's a small bias towards expecting xsize==0, by having code for
+C xsize==0 in a straight line and xsize!=0 under forward jumps.
+C
+C Alternatives:
+C
+C If the divisor is normalized (high bit set) then a division step can
+C always be skipped, since the high destination limb is always 0 or 1 in
+C that case.  It doesn't seem worth checking for this though, since it
+C probably occurs infrequently, in particular note that big_base for a
+C decimal mpn_get_str is not normalized in a 32-bit limb.
+
+
+dnl  MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl  inverse method is used, rather than plain "divl"s.  Minimum value 1.
+dnl
+dnl  The inverse takes about 50 cycles to calculate, but after that the
+dnl  multiply is 17 c/l versus division at 42 c/l.
+dnl
+dnl  At 3 limbs the mul is a touch faster than div on the integer part, and
+dnl  even more so on the fractional part.
+
+deflit(MUL_THRESHOLD, 3)
+
+
+defframe(PARAM_PREINV_SHIFT,   28)  dnl mpn_preinv_divrem_1
+defframe(PARAM_PREINV_INVERSE, 24)  dnl mpn_preinv_divrem_1
+defframe(PARAM_CARRY,  24)          dnl mpn_divrem_1c
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_SRC,    12)
+defframe(PARAM_XSIZE,  8)
+defframe(PARAM_DST,    4)
+
+defframe(SAVE_EBX,    -4)
+defframe(SAVE_ESI,    -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+
+defframe(VAR_NORM,    -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC,     -28)
+defframe(VAR_DST,     -32)
+defframe(VAR_DST_STOP,-36)
+
+deflit(STACK_SPACE, 36)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_preinv_divrem_1)
+deflit(`FRAME',0)
+       movl    PARAM_XSIZE, %ecx
+       movl    PARAM_DST, %edx
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SIZE, %ebx
+
+       leal    8(%edx,%ecx,4), %edx    C &dst[xsize+2]
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %edx, VAR_DST_STOP      C &dst[xsize+2]
+       movl    %edi, SAVE_EDI
+       xorl    %edi, %edi              C carry
+
+       movl    -4(%esi,%ebx,4), %eax   C src high limb
+       xor     %ecx, %ecx
+
+       C
+
+       C
+
+       cmpl    %ebp, %eax              C high cmp divisor
+
+       cmovc(  %eax, %edi)             C high is carry if high<divisor
+       cmovnc( %eax, %ecx)             C 0 if skip div, src high if not
+                                       C (the latter in case src==dst)
+
+       movl    %ecx, -12(%edx,%ebx,4)  C dst high limb
+       sbbl    $0, %ebx                C skip one division if high<divisor
+       movl    PARAM_PREINV_SHIFT, %ecx
+
+       leal    -8(%edx,%ebx,4), %edx   C &dst[xsize+size]
+       movl    $32, %eax
+
+       movl    %edx, VAR_DST           C &dst[xsize+size]
+
+       shll    %cl, %ebp               C d normalized
+       subl    %ecx, %eax
+       movl    %ecx, VAR_NORM
+
+       movd    %eax, %mm7              C rshift
+       movl    PARAM_PREINV_INVERSE, %eax
+       jmp     L(start_preinv)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+       movl    PARAM_CARRY, %edx
+       movl    PARAM_SIZE, %ecx
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       C offset 0xa1, close enough to aligned
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    $0, %edx                C initial carry (if can't skip a div)
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+       orl     %ecx, %ecx              C size
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+       leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
+
+       jz      L(no_skip_div)          C if size==0
+       movl    -4(%esi,%ecx,4), %eax   C src high limb
+       xorl    %esi, %esi
+
+       cmpl    %ebp, %eax              C high cmp divisor
+
+       cmovc(  %eax, %edx)             C high is carry if high<divisor
+       cmovnc( %eax, %esi)             C 0 if skip div, src high if not
+
+       movl    %esi, (%edi,%ecx,4)     C dst high limb
+       sbbl    $0, %ecx                C size-1 if high<divisor
+       movl    PARAM_SRC, %esi         C reload
+L(no_skip_div):
+
+
+L(start_1c):
+       C eax
+       C ebx   xsize
+       C ecx   size
+       C edx   carry
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       leal    (%ebx,%ecx), %eax       C size+xsize
+       cmpl    $MUL_THRESHOLD, %eax
+       jae     L(mul_by_inverse)
+
+
+C With MUL_THRESHOLD set to 3, the simple loops here only do 0 to 2 limbs.
+C It'd be possible to write them out without the looping, but no speedup
+C would be expected.
+C
+C Using PARAM_DIVISOR instead of %ebp measures 1 cycle/loop faster on the
+C integer part, but curiously not on the fractional part, where %ebp is a
+C (fixed) couple of cycles faster.
+
+       orl     %ecx, %ecx
+       jz      L(divide_no_integer)
+
+L(divide_integer):
+       C eax   scratch (quotient)
+       C ebx   xsize
+       C ecx   counter
+       C edx   scratch (remainder)
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       movl    -4(%esi,%ecx,4), %eax
+
+       divl    PARAM_DIVISOR
+
+       movl    %eax, (%edi,%ecx,4)
+       decl    %ecx
+       jnz     L(divide_integer)
+
+
+L(divide_no_integer):
+       movl    PARAM_DST, %edi
+       orl     %ebx, %ebx
+       jnz     L(divide_fraction)
+
+L(divide_done):
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EDI, %edi
+       movl    %edx, %eax
+
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_EBP, %ebp
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+
+L(divide_fraction):
+       C eax   scratch (quotient)
+       C ebx   counter
+       C ecx
+       C edx   scratch (remainder)
+       C esi
+       C edi   dst
+       C ebp   divisor
+
+       movl    $0, %eax
+
+       divl    %ebp
+
+       movl    %eax, -4(%edi,%ebx,4)
+       decl    %ebx
+       jnz     L(divide_fraction)
+
+       jmp     L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+       C eax
+       C ebx   xsize
+       C ecx   size
+       C edx   carry
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       bsrl    %ebp, %eax              C 31-l
+
+       leal    12(%edi), %ebx          C &dst[xsize+2], loop dst stop
+       leal    4(%edi,%ecx,4), %edi    C &dst[xsize+size]
+
+       movl    %edi, VAR_DST
+       movl    %ebx, VAR_DST_STOP
+
+       movl    %ecx, %ebx              C size
+       movl    $31, %ecx
+
+       movl    %edx, %edi              C carry
+       movl    $-1, %edx
+
+       C
+
+       xorl    %eax, %ecx              C l
+       incl    %eax                    C 32-l
+
+       shll    %cl, %ebp               C d normalized
+       movl    %ecx, VAR_NORM
+
+       movd    %eax, %mm7
+
+       movl    $-1, %eax
+       subl    %ebp, %edx              C (b-d)-1 giving edx:eax = b*(b-d)-1
+
+       divl    %ebp                    C floor (b*(b-d)-1) / d
+
+L(start_preinv):
+       C eax   inverse
+       C ebx   size
+       C ecx   shift
+       C edx
+       C esi   src
+       C edi   carry
+       C ebp   divisor
+       C
+       C mm7   rshift
+
+       orl     %ebx, %ebx              C size
+       movl    %eax, VAR_INVERSE
+       leal    -12(%esi,%ebx,4), %eax  C &src[size-3]
+
+       jz      L(start_zero)
+       movl    %eax, VAR_SRC
+       cmpl    $1, %ebx
+
+       movl    8(%eax), %esi           C src high limb
+       jz      L(start_one)
+
+L(start_two_or_more):
+       movl    4(%eax), %edx           C src second highest limb
+
+       shldl(  %cl, %esi, %edi)        C n2 = carry,high << l
+
+       shldl(  %cl, %edx, %esi)        C n10 = high,second << l
+
+       cmpl    $2, %ebx
+       je      L(integer_two_left)
+       jmp     L(integer_top)
+
+
+L(start_one):
+       shldl(  %cl, %esi, %edi)        C n2 = carry,high << l
+
+       shll    %cl, %esi               C n10 = high << l
+       movl    %eax, VAR_SRC
+       jmp     L(integer_one_left)
+
+
+L(start_zero):
+       C Can be here with xsize==0 if mpn_preinv_divrem_1 had size==1 and
+       C skipped a division.
+
+       shll    %cl, %edi               C n2 = carry << l
+       movl    %edi, %eax              C return value for zero_done
+       cmpl    $0, PARAM_XSIZE
+
+       je      L(zero_done)
+       jmp     L(fraction_some)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The multiply by inverse loop is 17 cycles, and relies on some out-of-order
+C execution.  The instruction scheduling is important, with various
+C apparently equivalent forms running 1 to 5 cycles slower.
+C
+C A lower bound for the time would seem to be 16 cycles, based on the
+C following successive dependencies.
+C
+C                    cycles
+C              n2+n1   1
+C              mul     6
+C              q1+1    1
+C              mul     6
+C              sub     1
+C              addback 1
+C                     ---
+C                     16
+C
+C This chain is what the loop has already, but 16 cycles isn't achieved.
+C K7 has enough decode, and probably enough execute (depending maybe on what
+C a mul actually consumes), but nothing running under 17 has been found.
+C
+C In theory n2+n1 could be done in the sub and addback stages (by
+C calculating both n2 and n2+n1 there), but lack of registers makes this an
+C unlikely proposition.
+C
+C The jz in the loop keeps the q1+1 stage to 1 cycle.  Handling an overflow
+C from q1+1 with an "sbbl $0, %ebx" would add a cycle to the dependent
+C chain, and nothing better than 18 cycles has been found when using it.
+C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will
+C be an extremely rare event.
+C
+C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but
+C if some special data is coming out with this always, the q1_ff special
+C case actually runs at 15 c/l.  0x2FFF...FFFD divided by 3 is a good way to
+C induce the q1_ff case, for speed measurements or testing.  Note that
+C 0xFFF...FFF divided by 1 or 2 doesn't induce it.
+C
+C The instruction groupings and empty comments show the cycles for a naive
+C in-order view of the code (conveniently ignoring the load latency on
+C VAR_INVERSE).  This shows some of where the time is going, but is nonsense
+C to the extent that out-of-order execution rearranges it.  In this case
+C there's 19 cycles shown, but it executes at 17.
+
+       ALIGN(16)
+L(integer_top):
+       C eax   scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   scratch (src, dst)
+       C edx   scratch
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+       C
+       C mm0   scratch (src qword)
+       C mm7   rshift for normalization
+
+       cmpl    $0x80000000, %esi  C n1 as 0=c, 1=nc
+       movl    %edi, %eax         C n2
+       movl    VAR_SRC, %ecx
+
+       leal    (%ebp,%esi), %ebx
+       cmovc(  %esi, %ebx)        C nadj = n10 + (-n1 & d), ignoring overflow
+       sbbl    $-1, %eax          C n2+n1
+
+       mull    VAR_INVERSE        C m*(n2+n1)
+
+       movq    (%ecx), %mm0       C next limb and the one below it
+       subl    $4, %ecx
+
+       movl    %ecx, VAR_SRC
+
+       C
+
+       addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+       leal    1(%edi), %ebx      C n2+1
+       movl    %ebp, %eax         C d
+
+       C
+
+       adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+       jz      L(q1_ff)
+       movl    VAR_DST, %ecx
+
+       mull    %ebx               C (q1+1)*d
+
+       psrlq   %mm7, %mm0
+
+       leal    -4(%ecx), %ecx
+
+       C
+
+       subl    %eax, %esi
+       movl    VAR_DST_STOP, %eax
+
+       C
+
+       sbbl    %edx, %edi         C n - (q1+1)*d
+       movl    %esi, %edi         C remainder -> n2
+       leal    (%ebp,%esi), %edx
+
+       movd    %mm0, %esi
+
+       cmovc(  %edx, %edi)        C n - q1*d if underflow from using q1+1
+       sbbl    $0, %ebx           C q
+       cmpl    %eax, %ecx
+
+       movl    %ebx, (%ecx)
+       movl    %ecx, VAR_DST
+       jne     L(integer_top)
+
+
+L(integer_loop_done):
+
+
+C -----------------------------------------------------------------------------
+C
+C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz
+C q1_ff special case.  This make the code a bit smaller and simpler, and
+C costs only 1 cycle (each).
+
+L(integer_two_left):
+       C eax   scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   scratch (src, dst)
+       C edx   scratch
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+       C
+       C mm7   rshift
+
+       cmpl    $0x80000000, %esi  C n1 as 0=c, 1=nc
+       movl    %edi, %eax         C n2
+       movl    PARAM_SRC, %ecx
+
+       leal    (%ebp,%esi), %ebx
+       cmovc(  %esi, %ebx)        C nadj = n10 + (-n1 & d), ignoring overflow
+       sbbl    $-1, %eax          C n2+n1
+
+       mull    VAR_INVERSE        C m*(n2+n1)
+
+       movd    (%ecx), %mm0       C src low limb
+
+       movl    VAR_DST_STOP, %ecx
+
+       C
+
+       addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+       leal    1(%edi), %ebx      C n2+1
+       movl    %ebp, %eax         C d
+
+       adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+       sbbl    $0, %ebx
+
+       mull    %ebx               C (q1+1)*d
+
+       psllq   $32, %mm0
+
+       psrlq   %mm7, %mm0
+
+       C
+
+       subl    %eax, %esi
+
+       C
+
+       sbbl    %edx, %edi         C n - (q1+1)*d
+       movl    %esi, %edi         C remainder -> n2
+       leal    (%ebp,%esi), %edx
+
+       movd    %mm0, %esi
+
+       cmovc(  %edx, %edi)        C n - q1*d if underflow from using q1+1
+       sbbl    $0, %ebx           C q
+
+       movl    %ebx, -4(%ecx)
+
+
+C -----------------------------------------------------------------------------
+L(integer_one_left):
+       C eax   scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   dst
+       C edx   scratch
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+       C
+       C mm7   rshift
+
+       movl    VAR_DST_STOP, %ecx
+       cmpl    $0x80000000, %esi  C n1 as 0=c, 1=nc
+       movl    %edi, %eax         C n2
+
+       leal    (%ebp,%esi), %ebx
+       cmovc(  %esi, %ebx)        C nadj = n10 + (-n1 & d), ignoring overflow
+       sbbl    $-1, %eax          C n2+n1
+
+       mull    VAR_INVERSE        C m*(n2+n1)
+
+       C
+
+       C
+
+       C
+
+       addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+       leal    1(%edi), %ebx      C n2+1
+       movl    %ebp, %eax         C d
+
+       C
+
+       adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+       sbbl    $0, %ebx           C q1 if q1+1 overflowed
+
+       mull    %ebx
+
+       C
+
+       C
+
+       C
+
+       subl    %eax, %esi
+
+       C
+
+       sbbl    %edx, %edi         C n - (q1+1)*d
+       movl    %esi, %edi         C remainder -> n2
+       leal    (%ebp,%esi), %edx
+
+       cmovc(  %edx, %edi)        C n - q1*d if underflow from using q1+1
+       sbbl    $0, %ebx           C q
+
+       movl    %ebx, -8(%ecx)
+       subl    $8, %ecx
+
+
+
+L(integer_none):
+       cmpl    $0, PARAM_XSIZE
+       jne     L(fraction_some)
+
+       movl    %edi, %eax
+L(fraction_done):
+       movl    VAR_NORM, %ecx
+L(zero_done):
+       movl    SAVE_EBP, %ebp
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EBX, %ebx
+       addl    $STACK_SPACE, %esp
+
+       shrl    %cl, %eax
+       emms
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+       C eax   (divisor)
+       C ebx   (q1+1 == 0)
+       C ecx
+       C edx
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+
+       movl    VAR_DST, %ecx
+       movl    VAR_DST_STOP, %edx
+       subl    $4, %ecx
+
+       psrlq   %mm7, %mm0
+       leal    (%ebp,%esi), %edi       C n-q*d remainder -> next n2
+       movl    %ecx, VAR_DST
+
+       movd    %mm0, %esi              C next n10
+
+       movl    $-1, (%ecx)
+       cmpl    %ecx, %edx
+       jne     L(integer_top)
+
+       jmp     L(integer_loop_done)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C Being the fractional part, the "source" limbs are all zero, meaning
+C n10=0, n1=0, and hence nadj=0, leading to many instructions eliminated.
+C
+C The loop runs at 15 cycles.  The dependent chain is the same as the
+C general case above, but without the n2+n1 stage (due to n1==0), so 15
+C would seem to be the lower bound.
+C
+C A not entirely obvious simplification is that q1+1 never overflows a limb,
+C and so there's no need for the sbbl $0 or jz q1_ff from the general case.
+C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always.
+C rnd() means rounding down to a multiple of d.
+C
+C      m*n2 + b*n2 <= m*(d-1) + b*(d-1)
+C                   = m*d + b*d - m - b
+C                   = floor((b(b-d)-1)/d)*d + b*d - m - b
+C                   = rnd(b(b-d)-1) + b*d - m - b
+C                   = rnd(b(b-d)-1 + b*d) - m - b
+C                   = rnd(b*b-1) - m - b
+C                   <= (b-2)*b
+C
+C Unchanged from the general case is that the final quotient limb q can be
+C either q1 or q1+1, and the q1+1 case occurs often.  This can be seen from
+C equation 8.4 of the paper which simplifies as follows when n1==0 and
+C n0==0.
+C
+C      n-q1*d = (n2*k+q0*d)/b <= d + (d*d-2d)/b
+C
+C As before, the instruction groupings and empty comments show a naive
+C in-order view of the code, which is made a nonsense by out of order
+C execution.  There's 17 cycles shown, but it executes at 15.
+C
+C Rotating the store q and remainder->n2 instructions up to the top of the
+C loop gets the run time down from 16 to 15.
+
+       ALIGN(16)
+L(fraction_some):
+       C eax
+       C ebx
+       C ecx
+       C edx
+       C esi
+       C edi   carry
+       C ebp   divisor
+
+       movl    PARAM_DST, %esi
+       movl    VAR_DST_STOP, %ecx      C &dst[xsize+2]
+       movl    %edi, %eax
+
+       subl    $8, %ecx                C &dst[xsize]
+       jmp     L(fraction_entry)
+
+
+       ALIGN(16)
+L(fraction_top):
+       C eax   n2 carry, then scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   dst, decrementing
+       C edx   scratch
+       C esi   dst stop point
+       C edi   (will be n2)
+       C ebp   divisor
+
+       movl    %ebx, (%ecx)    C previous q
+       movl    %eax, %edi      C remainder->n2
+
+L(fraction_entry):
+       mull    VAR_INVERSE     C m*n2
+
+       movl    %ebp, %eax      C d
+       subl    $4, %ecx        C dst
+       leal    1(%edi), %ebx
+
+       C
+
+       C
+
+       C
+
+       C
+
+       addl    %edx, %ebx      C 1 + high(n2<<32 + m*n2) = q1+1
+
+       mull    %ebx            C (q1+1)*d
+
+       C
+
+       C
+
+       C
+
+       negl    %eax            C low of n - (q1+1)*d
+
+       C
+
+       sbbl    %edx, %edi      C high of n - (q1+1)*d, caring only about carry
+       leal    (%ebp,%eax), %edx
+
+       cmovc(  %edx, %eax)     C n - q1*d if underflow from using q1+1
+       sbbl    $0, %ebx        C q
+       cmpl    %esi, %ecx
+
+       jne     L(fraction_top)
+
+
+       movl    %ebx, (%ecx)
+       jmp     L(fraction_done)
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mmx/lshift.asm b/mpn/x86/k7/mmx/lshift.asm

new file mode 100644 (file)

index 0000000..b3bff8f
--- /dev/null
+++ b/mpn/x86/k7/mmx/lshift.asm
@@ -0,0 +1,470 @@
+dnl  AMD K7 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+
+dnl  K7: UNROLL_COUNT cycles/limb
+dnl           4           1.51
+dnl           8           1.26
+dnl          16           1.21
+dnl          32           1.2
+dnl  Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C Shift src,size left by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the right.  The bits shifted out at the left are
+C the return value.
+C
+C The comments in mpn_rshift apply here too.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 10)
+',`
+deflit(UNROLL_THRESHOLD, 10)
+')
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+defframe(SAVE_EDI, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+deflit(SAVE_SIZE, 12)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %eax
+       movl    PARAM_SRC, %edx
+       subl    $SAVE_SIZE, %esp
+deflit(`FRAME',SAVE_SIZE)
+
+       movl    PARAM_SHIFT, %ecx
+       movl    %edi, SAVE_EDI
+
+       movl    PARAM_DST, %edi
+       decl    %eax
+       jnz     L(more_than_one_limb)
+
+       movl    (%edx), %edx
+
+       shldl(  %cl, %edx, %eax)        C eax was decremented to zero
+
+       shll    %cl, %edx
+
+       movl    %edx, (%edi)
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(more_than_one_limb):
+       C eax   size-1
+       C ebx
+       C ecx   shift
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+
+       movd    PARAM_SHIFT, %mm6
+       movd    (%edx,%eax,4), %mm5     C src high limb
+       cmp     $UNROLL_THRESHOLD-1, %eax
+
+       jae     L(unroll)
+       negl    %ecx
+       movd    (%edx), %mm4            C src low limb
+
+       addl    $32, %ecx
+
+       movd    %ecx, %mm7
+
+L(simple_top):
+       C eax   loop counter, limbs
+       C ebx
+       C ecx
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+       C
+       C mm0   scratch
+       C mm4   src low limb
+       C mm5   src high limb
+       C mm6   shift
+       C mm7   32-shift
+
+       movq    -4(%edx,%eax,4), %mm0
+       decl    %eax
+
+       psrlq   %mm7, %mm0
+
+       movd    %mm0, 4(%edi,%eax,4)
+       jnz     L(simple_top)
+
+
+       psllq   %mm6, %mm5
+       psllq   %mm6, %mm4
+
+       psrlq   $32, %mm5
+       movd    %mm4, (%edi)            C dst low limb
+
+       movd    %mm5, %eax              C return value
+
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+       emms
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll):
+       C eax   size-1
+       C ebx   (saved)
+       C ecx   shift
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+       C
+       C mm5   src high limb, for return value
+       C mm6   lshift
+
+       movl    %esi, SAVE_ESI
+       movl    %ebx, SAVE_EBX
+       leal    -4(%edx,%eax,4), %edx   C &src[size-2]
+
+       testb   $4, %dl
+       movq    (%edx), %mm1            C src high qword
+
+       jz      L(start_src_aligned)
+
+
+       C src isn't aligned, process high limb (marked xxx) separately to
+       C make it so
+       C
+       C  source    -4(edx,%eax,4)
+       C                  |
+       C  +-------+-------+-------+--
+       C  |  xxx          |
+       C  +-------+-------+-------+--
+       C        0mod8   4mod8   0mod8
+       C
+       C  dest      -4(edi,%eax,4)
+       C                  |
+       C  +-------+-------+--
+       C  |  xxx  |       |
+       C  +-------+-------+--
+
+       psllq   %mm6, %mm1
+       subl    $4, %edx
+       movl    %eax, PARAM_SIZE        C size-1
+
+       psrlq   $32, %mm1
+       decl    %eax                    C size-2 is new size-1
+
+       movd    %mm1, 4(%edi,%eax,4)
+       movq    (%edx), %mm1            C new src high qword
+L(start_src_aligned):
+
+
+       leal    -4(%edi,%eax,4), %edi   C &dst[size-2]
+       psllq   %mm6, %mm5
+
+       testl   $4, %edi
+       psrlq   $32, %mm5               C return value
+
+       jz      L(start_dst_aligned)
+
+
+       C dst isn't aligned, subtract 4 bytes to make it so, and pretend the
+       C shift is 32 bits extra.  High limb of dst (marked xxx) handled
+       C here separately.
+       C
+       C  source       %edx
+       C  +-------+-------+--
+       C  |      mm1      |
+       C  +-------+-------+--
+       C                0mod8   4mod8
+       C
+       C  dest         %edi
+       C  +-------+-------+-------+--
+       C  |  xxx  |
+       C  +-------+-------+-------+--
+       C        0mod8   4mod8   0mod8
+
+       movq    %mm1, %mm0
+       psllq   %mm6, %mm1
+       addl    $32, %ecx               C shift+32
+
+       psrlq   $32, %mm1
+
+       movd    %mm1, 4(%edi)
+       movq    %mm0, %mm1
+       subl    $4, %edi
+
+       movd    %ecx, %mm6              C new lshift
+L(start_dst_aligned):
+
+       decl    %eax                    C size-2, two last limbs handled at end
+       movq    %mm1, %mm2              C copy of src high qword
+       negl    %ecx
+
+       andl    $-2, %eax               C round size down to even
+       addl    $64, %ecx
+
+       movl    %eax, %ebx
+       negl    %eax
+
+       andl    $UNROLL_MASK, %eax
+       decl    %ebx
+
+       shll    %eax
+
+       movd    %ecx, %mm7              C rshift = 64-lshift
+
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    L(entry) (%eax,%eax,4), %esi
+')
+       shrl    $UNROLL_LOG2, %ebx      C loop counter
+
+       leal    ifelse(UNROLL_BYTES,256,128) -8(%edx,%eax,2), %edx
+       leal    ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi
+       movl    PARAM_SIZE, %eax        C for use at end
+       jmp     *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%eax,%eax,4), %esi
+       addl    $L(entry)-L(here), %esi
+       addl    (%esp), %esi
+
+       ret_internal
+')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(32)
+L(top):
+       C eax   size (for use at end)
+       C ebx   loop counter
+       C ecx   rshift
+       C edx   src
+       C esi   computed jump
+       C edi   dst
+       C ebp
+       C
+       C mm0   scratch
+       C mm1   \ carry (alternating, mm2 first)
+       C mm2   /
+       C mm6   lshift
+       C mm7   rshift
+       C
+       C 10 code bytes/limb
+       C
+       C The two chunks differ in whether mm1 or mm2 hold the carry.
+       C The computed jump puts the initial carry in both mm1 and mm2.
+
+L(entry):
+deflit(CHUNK_COUNT, 4)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+       deflit(`disp1', eval(disp0 - 8))
+
+Zdisp( movq,   disp0,(%edx), %mm0)
+       psllq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psrlq   %mm7, %mm0
+
+       por     %mm2, %mm0
+Zdisp( movq,   %mm0, disp0,(%edi))
+
+
+Zdisp( movq,   disp1,(%edx), %mm0)
+       psllq   %mm6, %mm1
+
+       movq    %mm0, %mm2
+       psrlq   %mm7, %mm0
+
+       por     %mm1, %mm0
+Zdisp( movq,   %mm0, disp1,(%edi))
+')
+
+       subl    $UNROLL_BYTES, %edx
+       subl    $UNROLL_BYTES, %edi
+       decl    %ebx
+
+       jns     L(top)
+
+
+
+define(`disp', `m4_empty_if_zero(eval($1 ifelse(UNROLL_BYTES,256,-128)))')
+
+L(end):
+       testb   $1, %al
+       movl    SAVE_EBX, %ebx
+       psllq   %mm6, %mm2      C wanted left shifted in all cases below
+
+       movd    %mm5, %eax
+
+       movl    SAVE_ESI, %esi
+       jz      L(end_even)
+
+
+L(end_odd):
+
+       C Size odd, destination was aligned.
+       C
+       C                 source        edx+8   edx+4
+       C                 --+---------------+-------+
+       C                   |      mm2      |       |
+       C                 --+---------------+-------+
+       C
+       C dest                            edi
+       C --+---------------+---------------+-------+
+       C   |   written     |               |       |
+       C --+---------------+---------------+-------+
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C Size odd, destination was unaligned.
+       C
+       C                 source        edx+8   edx+4
+       C                 --+---------------+-------+
+       C                   |      mm2      |       |
+       C                 --+---------------+-------+
+       C
+       C         dest                            edi
+       C         --+---------------+---------------+
+       C           |   written     |               |
+       C         --+---------------+---------------+
+       C
+       C mm6 = shift+32
+       C mm7 = ecx = 64-(shift+32)
+
+
+       C In both cases there's one extra limb of src to fetch and combine
+       C with mm2 to make a qword at (%edi), and in the aligned case
+       C there's an extra limb of dst to be formed from that extra src limb
+       C left shifted.
+
+       movd    disp(4) (%edx), %mm0
+       testb   $32, %cl
+
+       movq    %mm0, %mm1
+       psllq   $32, %mm0
+
+       psrlq   %mm7, %mm0
+       psllq   %mm6, %mm1
+
+       por     %mm2, %mm0
+
+       movq    %mm0, disp(0) (%edi)
+       jz      L(end_odd_unaligned)
+       movd    %mm1, disp(-4) (%edi)
+L(end_odd_unaligned):
+
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+       emms
+
+       ret
+
+
+L(end_even):
+
+       C Size even, destination was aligned.
+       C
+       C                 source        edx+8
+       C                 --+---------------+
+       C                   |      mm2      |
+       C                 --+---------------+
+       C
+       C dest                            edi
+       C --+---------------+---------------+
+       C   |   written     |               |
+       C --+---------------+---------------+
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C Size even, destination was unaligned.
+       C
+       C               source          edx+8
+       C                 --+---------------+
+       C                   |      mm2      |
+       C                 --+---------------+
+       C
+       C         dest                  edi+4
+       C         --+---------------+-------+
+       C           |    written    |       |
+       C         --+---------------+-------+
+       C
+       C mm6 = shift+32
+       C mm7 = ecx = 64-(shift+32)
+
+
+       C The movq for the aligned case overwrites the movd for the
+       C unaligned case.
+
+       movq    %mm2, %mm0
+       psrlq   $32, %mm2
+
+       testb   $32, %cl
+       movd    %mm2, disp(4) (%edi)
+
+       jz      L(end_even_unaligned)
+       movq    %mm0, disp(0) (%edi)
+L(end_even_unaligned):
+
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+       emms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mmx/popham.asm b/mpn/x86/k7/mmx/popham.asm

new file mode 100644 (file)

index 0000000..5dc0a78
--- /dev/null
+++ b/mpn/x86/k7/mmx/popham.asm
@@ -0,0 +1,202 @@
+dnl  AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming
+dnl  distance.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           popcount        hamdist
+C P3 generic                   6.5             7
+C P3 model 9  (Banias)          ?              ?
+C P3 model 13 (Dothan)         5.75            6
+C K7                           5               6
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C The code here is almost certainly not optimal, but is already a 3x speedup
+C over the generic C code.  The main improvement would be to interleave
+C processing of two qwords in the loop so as to fully exploit the available
+C execution units, possibly leading to 3.25 c/l (13 cycles for 4 limbs).
+C
+C The loop is based on the example "Efficient 64-bit population count using
+C MMX instructions" in the Athlon Optimization Guide, AMD document 22007,
+C page 158 of rev E (reference in mpn/x86/k7/README).
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist defined
+')')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC2,   8)
+defframe(PARAM_SRC,    4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+       dnl  non-PIC
+
+       RODATA
+       ALIGN(8)
+
+L(rodata_AAAAAAAAAAAAAAAA):
+       .long   0xAAAAAAAA
+       .long   0xAAAAAAAA
+
+L(rodata_3333333333333333):
+       .long   0x33333333
+       .long   0x33333333
+
+L(rodata_0F0F0F0F0F0F0F0F):
+       .long   0x0F0F0F0F
+       .long   0x0F0F0F0F
+')
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+
+ifdef(`PIC',`
+       movl    $0xAAAAAAAA, %eax
+       movl    $0x33333333, %edx
+
+       movd    %eax, %mm7
+       movd    %edx, %mm6
+
+       movl    $0x0F0F0F0F, %eax
+
+       punpckldq %mm7, %mm7
+       punpckldq %mm6, %mm6
+
+       movd    %eax, %mm5
+       movd    %edx, %mm4
+
+       punpckldq %mm5, %mm5
+
+',`
+       movq    L(rodata_AAAAAAAAAAAAAAAA), %mm7
+       movq    L(rodata_3333333333333333), %mm6
+       movq    L(rodata_0F0F0F0F0F0F0F0F), %mm5
+')
+       pxor    %mm4, %mm4
+
+define(REG_AAAAAAAAAAAAAAAA,%mm7)
+define(REG_3333333333333333,%mm6)
+define(REG_0F0F0F0F0F0F0F0F,%mm5)
+define(REG_0000000000000000,%mm4)
+
+
+       movl    PARAM_SRC, %eax
+HAM(`  movl    PARAM_SRC2, %edx')
+
+       pxor    %mm2, %mm2      C total
+
+       shrl    %ecx
+       jnc     L(top)
+
+       movd    (%eax,%ecx,8), %mm1
+
+HAM(`  movd    (%edx,%ecx,8), %mm0
+       pxor    %mm0, %mm1
+')
+       orl     %ecx, %ecx
+       jmp     L(loaded)
+
+
+       ALIGN(16)
+L(top):
+       C eax   src
+       C ebx
+       C ecx   counter, qwords, decrementing
+       C edx   [hamdist] src2
+       C
+       C mm0   (scratch)
+       C mm1   (scratch)
+       C mm2   total (low dword)
+       C mm3
+       C mm4   \
+       C mm5   | special constants
+       C mm6   |
+       C mm7   /
+
+       movq    -8(%eax,%ecx,8), %mm1
+
+HAM(`  pxor    -8(%edx,%ecx,8), %mm1')
+       decl    %ecx
+
+L(loaded):
+       movq    %mm1, %mm0
+       pand    REG_AAAAAAAAAAAAAAAA, %mm1
+
+       psrlq   $1, %mm1
+
+       psubd   %mm1, %mm0      C bit pairs
+
+
+       movq    %mm0, %mm1
+       psrlq   $2, %mm0
+
+       pand    REG_3333333333333333, %mm0
+       pand    REG_3333333333333333, %mm1
+
+       paddd   %mm1, %mm0      C nibbles
+
+
+       movq    %mm0, %mm1
+       psrlq   $4, %mm0
+
+       pand    REG_0F0F0F0F0F0F0F0F, %mm0
+       pand    REG_0F0F0F0F0F0F0F0F, %mm1
+
+       paddd   %mm1, %mm0      C bytes
+
+
+       psadbw( %mm4, %mm0)
+
+       paddd   %mm0, %mm2      C add to total
+       jnz     L(top)
+
+
+       movd    %mm2, %eax
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mmx/rshift.asm b/mpn/x86/k7/mmx/rshift.asm

new file mode 100644 (file)

index 0000000..3566ce8
--- /dev/null
+++ b/mpn/x86/k7/mmx/rshift.asm
@@ -0,0 +1,469 @@
+dnl  AMD K7 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+
+dnl  K7: UNROLL_COUNT cycles/limb
+dnl           4           1.51
+dnl           8           1.26
+dnl          16           1.21
+dnl          32           1.2
+dnl  Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C Shift src,size right by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the left.  The bits shifted out at the right are
+C the return value.
+C
+C This code uses 64-bit MMX operations, which makes it possible to handle
+C two limbs at a time, for a theoretical 1.0 cycles/limb.  Plain integer
+C code, on the other hand, suffers from shrd being a vector path decode and
+C running at 3 cycles back-to-back.
+C
+C Full speed depends on source and destination being aligned, and some hairy
+C setups and finish-ups are done to arrange this for the loop.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 10)
+',`
+deflit(UNROLL_THRESHOLD, 10)
+')
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+defframe(SAVE_EDI, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+deflit(SAVE_SIZE, 12)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %eax
+       movl    PARAM_SRC, %edx
+       subl    $SAVE_SIZE, %esp
+deflit(`FRAME',SAVE_SIZE)
+
+       movl    PARAM_SHIFT, %ecx
+       movl    %edi, SAVE_EDI
+
+       movl    PARAM_DST, %edi
+       decl    %eax
+       jnz     L(more_than_one_limb)
+
+       movl    (%edx), %edx            C src limb
+
+       shrdl(  %cl, %edx, %eax)        C eax was decremented to zero
+
+       shrl    %cl, %edx
+
+       movl    %edx, (%edi)            C dst limb
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(more_than_one_limb):
+       C eax   size-1
+       C ebx
+       C ecx   shift
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+
+       movd    PARAM_SHIFT, %mm6       C rshift
+       movd    (%edx), %mm5            C src low limb
+       cmp     $UNROLL_THRESHOLD-1, %eax
+
+       jae     L(unroll)
+       leal    (%edx,%eax,4), %edx     C &src[size-1]
+       leal    -4(%edi,%eax,4), %edi   C &dst[size-2]
+
+       movd    (%edx), %mm4            C src high limb
+       negl    %eax
+
+
+L(simple_top):
+       C eax   loop counter, limbs, negative
+       C ebx
+       C ecx   shift
+       C edx   carry
+       C edx   &src[size-1]
+       C edi   &dst[size-2]
+       C ebp
+       C
+       C mm0   scratch
+       C mm4   src high limb
+       C mm5   src low limb
+       C mm6   shift
+
+       movq    (%edx,%eax,4), %mm0
+       incl    %eax
+
+       psrlq   %mm6, %mm0
+
+       movd    %mm0, (%edi,%eax,4)
+       jnz     L(simple_top)
+
+
+       psllq   $32, %mm5
+       psrlq   %mm6, %mm4
+
+       psrlq   %mm6, %mm5
+       movd    %mm4, 4(%edi)           C dst high limb
+
+       movd    %mm5, %eax              C return value
+
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+       emms
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll):
+       C eax   size-1
+       C ebx
+       C ecx   shift
+       C edx   src
+       C esi
+       C edi   dst
+       C ebp
+       C
+       C mm5   src low limb
+       C mm6   rshift
+
+       testb   $4, %dl
+       movl    %esi, SAVE_ESI
+       movl    %ebx, SAVE_EBX
+
+       psllq   $32, %mm5
+       jz      L(start_src_aligned)
+
+
+       C src isn't aligned, process low limb separately (marked xxx) and
+       C step src and dst by one limb, making src aligned.
+       C
+       C source                  edx
+       C --+-------+-------+-------+
+       C           |          xxx  |
+       C --+-------+-------+-------+
+       C         4mod8   0mod8   4mod8
+       C
+       C         dest            edi
+       C         --+-------+-------+
+       C           |       |  xxx  |
+       C         --+-------+-------+
+
+       movq    (%edx), %mm0            C src low two limbs
+       addl    $4, %edx
+       movl    %eax, PARAM_SIZE        C size-1
+
+       addl    $4, %edi
+       decl    %eax                    C size-2 is new size-1
+
+       psrlq   %mm6, %mm0
+       movl    %edi, PARAM_DST         C new dst
+
+       movd    %mm0, -4(%edi)
+L(start_src_aligned):
+
+
+       movq    (%edx), %mm1            C src low two limbs
+       decl    %eax                    C size-2, two last limbs handled at end
+       testl   $4, %edi
+
+       psrlq   %mm6, %mm5
+       jz      L(start_dst_aligned)
+
+
+       C dst isn't aligned, add 4 to make it so, and pretend the shift is
+       C 32 bits extra.  Low limb of dst (marked xxx) handled here separately.
+       C
+       C          source          edx
+       C          --+-------+-------+
+       C            |      mm1      |
+       C          --+-------+-------+
+       C                  4mod8   0mod8
+       C
+       C  dest                    edi
+       C  --+-------+-------+-------+
+       C                    |  xxx  |
+       C  --+-------+-------+-------+
+       C          4mod8   0mod8   4mod8
+
+       movq    %mm1, %mm0
+       psrlq   %mm6, %mm1
+       addl    $32, %ecx               C shift+32
+
+       movd    %mm1, (%edi)
+       movq    %mm0, %mm1
+       addl    $4, %edi                C new dst
+
+       movd    %ecx, %mm6
+L(start_dst_aligned):
+
+
+       movq    %mm1, %mm2              C copy of src low two limbs
+       negl    %ecx
+       andl    $-2, %eax               C round size down to even
+
+       movl    %eax, %ebx
+       negl    %eax
+       addl    $64, %ecx
+
+       andl    $UNROLL_MASK, %eax
+       decl    %ebx
+
+       shll    %eax
+
+       movd    %ecx, %mm7              C lshift = 64-rshift
+
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    L(entry) (%eax,%eax,4), %esi
+       negl    %eax
+')
+       shrl    $UNROLL_LOG2, %ebx      C loop counter
+
+       leal    ifelse(UNROLL_BYTES,256,128+) 8(%edx,%eax,2), %edx
+       leal    ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi
+       movl    PARAM_SIZE, %eax        C for use at end
+
+       jmp     *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%eax,%eax,4), %esi
+       addl    $L(entry)-L(here), %esi
+       addl    (%esp), %esi
+       negl    %eax
+
+       ret_internal
+')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(64)
+L(top):
+       C eax   size, for use at end
+       C ebx   loop counter
+       C ecx   lshift
+       C edx   src
+       C esi   was computed jump
+       C edi   dst
+       C ebp
+       C
+       C mm0   scratch
+       C mm1   \ carry (alternating)
+       C mm2   /
+       C mm6   rshift
+       C mm7   lshift
+       C
+       C 10 code bytes/limb
+       C
+       C The two chunks differ in whether mm1 or mm2 hold the carry.
+       C The computed jump puts the initial carry in both mm1 and mm2.
+
+L(entry):
+deflit(CHUNK_COUNT, 4)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+       deflit(`disp1', eval(disp0 + 8))
+
+Zdisp( movq,   disp0,(%edx), %mm0)
+       psrlq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psllq   %mm7, %mm0
+
+       por     %mm2, %mm0
+Zdisp( movq,   %mm0, disp0,(%edi))
+
+
+Zdisp( movq,   disp1,(%edx), %mm0)
+       psrlq   %mm6, %mm1
+
+       movq    %mm0, %mm2
+       psllq   %mm7, %mm0
+
+       por     %mm1, %mm0
+Zdisp( movq,   %mm0, disp1,(%edi))
+')
+
+       addl    $UNROLL_BYTES, %edx
+       addl    $UNROLL_BYTES, %edi
+       decl    %ebx
+
+       jns     L(top)
+
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 8))
+
+       testb   $1, %al
+       psrlq   %mm6, %mm2      C wanted rshifted in all cases below
+       movl    SAVE_ESI, %esi
+
+       movd    %mm5, %eax              C return value
+
+       movl    SAVE_EBX, %ebx
+       jz      L(end_even)
+
+
+       C Size odd, destination was aligned.
+       C
+       C source
+       C       edx
+       C +-------+---------------+--
+       C |       |      mm2      |
+       C +-------+---------------+--
+       C
+       C dest                  edi
+       C +-------+---------------+---------------+--
+       C |       |               |    written    |
+       C +-------+---------------+---------------+--
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C Size odd, destination was unaligned.
+       C
+       C source
+       C       edx
+       C +-------+---------------+--
+       C |       |      mm2      |
+       C +-------+---------------+--
+       C
+       C dest          edi
+       C +---------------+---------------+--
+       C |               |    written    |
+       C +---------------+---------------+--
+       C
+       C mm6 = shift+32
+       C mm7 = ecx = 64-(shift+32)
+
+
+       C In both cases there's one extra limb of src to fetch and combine
+       C with mm2 to make a qword to store, and in the aligned case there's
+       C a further extra limb of dst to be formed.
+
+
+       movd    disp0(%edx), %mm0
+       movq    %mm0, %mm1
+
+       psllq   %mm7, %mm0
+       testb   $32, %cl
+
+       por     %mm2, %mm0
+       psrlq   %mm6, %mm1
+
+       movq    %mm0, disp0(%edi)
+       jz      L(finish_odd_unaligned)
+
+       movd    %mm1, disp1(%edi)
+L(finish_odd_unaligned):
+
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+       emms
+
+       ret
+
+
+L(end_even):
+
+       C Size even, destination was aligned.
+       C
+       C source
+       C +---------------+--
+       C |      mm2      |
+       C +---------------+--
+       C
+       C dest          edi
+       C +---------------+---------------+--
+       C |               |      mm3      |
+       C +---------------+---------------+--
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C Size even, destination was unaligned.
+       C
+       C source
+       C +---------------+--
+       C |      mm2      |
+       C +---------------+--
+       C
+       C dest  edi
+       C +-------+---------------+--
+       C |       |      mm3      |
+       C +-------+---------------+--
+       C
+       C mm6 = shift+32
+       C mm7 = 64-(shift+32)
+
+
+       C The movd for the unaligned case is the same data as the movq for
+       C the aligned case, it's just a choice between whether one or two
+       C limbs should be written.
+
+
+       testb   $32, %cl
+       movd    %mm2, disp0(%edi)
+
+       jz      L(end_even_unaligned)
+
+       movq    %mm2, disp0(%edi)
+L(end_even_unaligned):
+
+       movl    SAVE_EDI, %edi
+       addl    $SAVE_SIZE, %esp
+       emms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mod_1_4.asm b/mpn/x86/k7/mod_1_4.asm

new file mode 100644 (file)

index 0000000..5ecb4fc
--- /dev/null
+++ b/mpn/x86/k7/mod_1_4.asm
@@ -0,0 +1,281 @@
+dnl  x86-32 mpn_mod_1s_4p, requiring cmov.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)          6.0
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)      15.5
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:
+C K7:                            4.75
+C K8:
+
+
+C Ths inner loop was manually written, it ought to be loopmixed.
+C Presumably, we could get to 4 c/l for K7.
+
+C The cps function was compiler generated.  It can clearly be optimized.
+
+
+ASM_START()
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p)
+       push    %ebp
+       push    %edi
+       push    %esi
+       push    %ebx
+       sub     $28, %esp
+       mov     60(%esp), %edi          C cps
+       mov     8(%edi), %eax
+       mov     12(%edi), %edx
+       mov     16(%edi), %ecx
+       mov     20(%edi), %esi
+       mov     24(%edi), %edi
+       mov     %eax, 4(%esp)
+       mov     %edx, 8(%esp)
+       mov     %ecx, 12(%esp)
+       mov     %esi, 16(%esp)
+       mov     %edi, 20(%esp)
+       mov     52(%esp), %eax          C n
+       xor     %edi, %edi
+       mov     48(%esp), %esi          C up
+       lea     -12(%esi,%eax,4), %esi
+       and     $3, %eax
+       je      L(b0)
+       cmp     $2, %eax
+       jc      L(b1)
+       je      L(b2)
+
+L(b3): mov     4(%esi), %eax
+       mull    4(%esp)
+       mov     (%esi), %ebp
+       add     %eax, %ebp
+       adc     %edx, %edi
+       mov     8(%esi), %eax
+       mull    8(%esp)
+       lea     -12(%esi), %esi
+       jmp     L(m0)
+
+L(b0): mov     (%esi), %eax
+       mull    4(%esp)
+       mov     -4(%esi), %ebp
+       add     %eax, %ebp
+       adc     %edx, %edi
+       mov     4(%esi), %eax
+       mull    8(%esp)
+       add     %eax, %ebp
+       adc     %edx, %edi
+       mov     8(%esi), %eax
+       mull    12(%esp)
+       lea     -16(%esi), %esi
+       jmp     L(m0)
+
+L(b1): mov     8(%esi), %ebp
+       lea     -4(%esi), %esi
+       jmp     L(m1)
+
+L(b2): mov     8(%esi), %eax
+       mull    4(%esp)
+       mov     4(%esi), %ebp
+       lea     -8(%esi), %esi
+       jmp     L(m0)
+
+       ALIGN(16)
+L(top):        mov     (%esi), %eax
+       mull    4(%esp)
+       mov     -4(%esi), %ebx
+       xor     %ecx, %ecx
+       add     %eax, %ebx
+       adc     %edx, %ecx
+       mov     4(%esi), %eax
+       mull    8(%esp)
+       add     %eax, %ebx
+       adc     %edx, %ecx
+       mov     8(%esi), %eax
+       mull    12(%esp)
+       add     %eax, %ebx
+       adc     %edx, %ecx
+       lea     -16(%esi), %esi
+       mov     16(%esp), %eax
+       mul     %ebp
+       add     %eax, %ebx
+       adc     %edx, %ecx
+       mov     20(%esp), %eax
+       mul     %edi
+       mov     %ebx, %ebp
+       mov     %ecx, %edi
+L(m0): add     %eax, %ebp
+       adc     %edx, %edi
+L(m1): sub     $4, 52(%esp)
+       ja      L(top)
+
+L(end):        mov     4(%esp), %eax
+       mul     %edi
+       mov     60(%esp), %edi
+       add     %eax, %ebp
+       adc     $0, %edx
+       mov     4(%edi), %ecx
+       mov     %edx, %esi
+       mov     %ebp, %eax
+       sal     %cl, %esi
+       mov     %ecx, %ebx
+       neg     %ecx
+       shr     %cl, %eax
+       or      %esi, %eax
+       lea     1(%eax), %esi
+       mull    (%edi)
+       mov     %ebx, %ecx
+       mov     %eax, %ebx
+       mov     %ebp, %eax
+       sal     %cl, %eax
+       add     %eax, %ebx
+       adc     %esi, %edx
+       imul    56(%esp), %edx
+       mov     56(%esp), %esi
+       sub     %edx, %eax
+       lea     (%eax,%esi), %edx
+       cmp     %eax, %ebx
+       cmovb(  %edx, %eax)
+       mov     %eax, %edx
+       sub     %esi, %eax
+       cmovb(  %edx, %eax)
+       add     $28, %esp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
+       shr     %cl, %eax
+       ret
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p_cps)
+       sub     $56, %esp
+       mov     %esi, 44(%esp)
+       mov     64(%esp), %esi
+       mov     %edi, 48(%esp)
+       mov     %ebx, 40(%esp)
+       mov     $-1, %ebx
+       mov     %ebp, 52(%esp)
+       bsr     %esi, %eax
+       xor     $31, %eax
+       mov     %eax, %ecx
+       mov     %eax, 24(%esp)
+       mov     %ebx, %eax
+       sal     %cl, %esi
+       mov     %esi, %ecx
+       mov     %esi, %edi
+       mov     %esi, %ebp
+       neg     %ecx
+       not     %edi
+       mov     %ecx, 20(%esp)
+       mov     $32, %ecx
+       sub     24(%esp), %ecx
+       mov     %edi, %edx
+       mov     %edi, 16(%esp)
+       mov     20(%esp), %edi
+       div     %esi
+       mov     %eax, %ebx
+       shr     %cl, %eax
+       movzbl  24(%esp), %ecx
+       mov     %eax, 12(%esp)
+       mov     $1, %eax
+       sal     %cl, %eax
+       or      %eax, 12(%esp)
+       imul    12(%esp), %edi
+       mov     %edi, %eax
+       mov     %edi, 20(%esp)
+       mul     %ebx
+       mov     %eax, %ecx
+       lea     1(%edx,%edi), %eax
+       neg     %eax
+       imul    %eax, %ebp
+       lea     (%ebp,%esi), %eax
+       cmp     %ebp, %ecx
+       cmovb(  %eax, %ebp)
+       mov     %ebp, %eax
+       mul     %ebx
+       lea     1(%ebp,%edx), %edi
+       mov     %eax, %ecx
+       neg     %edi
+       mov     %edi, 8(%esp)
+       imul    %esi, %edi
+       mov     %edi, %eax
+       add     %esi, %eax
+       cmp     %edi, %ecx
+       cmovae( %edi, %eax)
+       mov     %eax, 32(%esp)
+       mov     32(%esp), %edi
+       mul     %ebx
+       mov     %eax, 36(%esp)
+       lea     1(%edi,%edx), %eax
+       negl    %eax
+       imul    %esi, %eax
+       mov     %eax, %ecx
+       add     %esi, %ecx
+       cmp     %eax, 36(%esp)
+       cmovae( %eax, %ecx)
+       mov     %ecx, (%esp)
+       mov     %ecx, %eax
+       mul     %ebx
+       mov     %eax, %edi
+       mov     (%esp), %eax
+       lea     1(%eax,%edx), %ecx
+       mov     60(%esp), %edx
+       neg     %ecx
+       imul    %esi, %ecx
+       mov     %ebx, (%edx)
+       add     %ecx, %esi
+       cmp     %ecx, %edi
+       cmovae( %ecx, %esi)
+       mov     24(%esp), %ecx
+       shrl    %cl, 20(%esp)
+       mov     20(%esp), %edi
+       mov     %esi, 4(%esp)
+       mov     %ecx, 4(%edx)
+       movzbl  24(%esp), %ecx
+       mov     %edi, 8(%edx)
+       shr     %cl, %ebp
+       shr     %cl, %eax
+       mov     %ebp, 12(%edx)
+       shrl    %cl, 32(%esp)
+       mov     32(%esp), %edi
+       shrl    %cl, 4(%esp)
+       mov     %eax, 20(%edx)
+       mov     %edi, 16(%edx)
+       mov     4(%esp), %edi
+       mov     %edi, 24(%edx)
+       mov     40(%esp), %ebx
+       mov     44(%esp), %esi
+       mov     48(%esp), %edi
+       mov     52(%esp), %ebp
+       add     $56, %esp
+       ret
+EPILOGUE()
diff --git a/mpn/x86/k7/mod_34lsub1.asm b/mpn/x86/k7/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..f00e84d
--- /dev/null
+++ b/mpn/x86/k7/mod_34lsub1.asm
@@ -0,0 +1,178 @@
+dnl  AMD K7 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         cycles/limb
+C Athlon:     1
+C Hammer:     1
+
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+C The loop form below and the 64 byte code alignment seem necessary for the
+C claimed speed.  This is a bit strange, since normally k7 isn't very
+C sensitive to such things.  Perhaps there has to be 6 instructions in the
+C first 16 bytes for the BTB entry or something.
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+dnl  re-use parameter space
+define(SAVE_EDI, `PARAM_SIZE')
+
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %edx
+
+       subl    $2, %ecx
+       ja      L(three_or_more)
+
+       movl    (%edx), %eax
+       jb      L(one)
+
+       movl    4(%edx), %ecx
+       movl    %eax, %edx
+       shrl    $24, %eax               C src[0] low
+
+       andl    $0xFFFFFF, %edx         C src[0] high
+       addl    %edx, %eax
+       movl    %ecx, %edx
+
+       andl    $0xFFFF, %ecx
+       shrl    $16, %edx               C src[1] high
+       addl    %edx, %eax
+
+       shll    $8, %ecx                C src[1] low
+       addl    %ecx, %eax
+
+L(one):
+       ret
+
+
+L(three_or_more):
+       C eax
+       C ebx
+       C ecx   size-2
+       C edx   src
+       C esi
+       C edi
+
+       pushl   %ebx    FRAME_pushl()
+       xorl    %eax, %eax
+       xorl    %ebx, %ebx
+
+       movl    %edi, SAVE_EDI
+       pushl   %esi    FRAME_pushl()
+       xorl    %esi, %esi              C and clear carry flag
+
+
+       C code offset 0x40 at this point
+L(top):
+       C eax   acc 0mod3
+       C ebx   acc 1mod3
+       C ecx   counter, limbs
+       C edx   src
+       C esi   acc 2mod3
+       C edi
+
+       leal    24(%edx), %edx
+       leal    -2(%ecx), %ecx
+       adcl    -24(%edx), %eax
+       adcl    -20(%edx), %ebx
+       adcl    -16(%edx), %esi
+
+       decl    %ecx
+       jng     L(done_loop)
+
+       leal    -2(%ecx), %ecx
+       adcl    -12(%edx), %eax
+       adcl    -8(%edx), %ebx
+       adcl    -4(%edx), %esi
+
+       decl    %ecx
+       jg      L(top)
+
+
+       leal    12(%edx), %edx
+
+
+L(done_loop):
+       C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
+
+       incl    %ecx
+       movl    $0xFFFFFFFF, %edi
+       js      L(combine)
+
+       adcl    -12(%edx), %eax
+       decl    %ecx
+       movl    $0xFFFFFF00, %edi
+       js      L(combine)
+
+       adcl    -8(%edx), %ebx
+       movl    $0xFFFF0000, %edi
+
+
+L(combine):
+       C eax   acc 0mod3
+       C ebx   acc 1mod3
+       C ecx
+       C edx
+       C esi   acc 2mod3
+       C edi   mask
+
+       sbbl    %ecx, %ecx              C carry
+       movl    %eax, %edx              C 0mod3
+       shrl    $24, %eax               C 0mod3 high
+
+       andl    %edi, %ecx              C carry masked
+       andl    $0x00FFFFFF, %edx       C 0mod3 low
+       movl    %ebx, %edi              C 1mod3
+
+       subl    %ecx, %eax              C apply carry
+       shrl    $16, %ebx               C 1mod3 high
+       andl    $0xFFFF, %edi
+
+       addl    %edx, %eax              C apply 0mod3 low
+       movl    %esi, %edx              C 2mod3
+       shll    $8, %edi                C 1mod3 low
+
+       addl    %ebx, %eax              C apply 1mod3 high
+       shrl    $8, %esi                C 2mod3 high
+       movzbl  %dl, %edx               C 2mod3 low
+
+       addl    %edi, %eax              C apply 1mod3 low
+       shll    $16, %edx               C 2mod3 low
+
+       addl    %esi, %eax              C apply 2mod3 high
+       popl    %esi    FRAME_popl()
+
+       movl    SAVE_EDI, %edi
+       addl    %edx, %eax              C apply 2mod3 low
+       popl    %ebx    FRAME_popl()
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mode1o.asm b/mpn/x86/k7/mode1o.asm

new file mode 100644 (file)

index 0000000..f2eaf22
--- /dev/null
+++ b/mpn/x86/k7/mode1o.asm
@@ -0,0 +1,169 @@
+dnl  AMD K7 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb
+C Athlon:     11.0
+C Hammer:      7.0
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C With the loop running at just 11 cycles it doesn't seem worth bothering to
+C check for high<divisor to save one step.
+C
+C Using a divl for size==1 measures slower than the modexact method, which
+C is not too surprising since for the latter it's only about 24 cycles to
+C calculate the modular inverse.
+
+defframe(PARAM_CARRY,  16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+
+deflit(STACK_SPACE, 16)
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1c_odd)
+deflit(`FRAME',0)
+
+       movl    PARAM_CARRY, %ecx
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1_odd)
+deflit(`FRAME',0)
+
+       xorl    %ecx, %ecx
+L(start_1c):
+       movl    PARAM_DIVISOR, %eax
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_DIVISOR, %esi
+
+       movl    %edi, SAVE_EDI
+
+       shrl    %eax                    C d/2
+
+       andl    $127, %eax
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edi)
+       movzbl  (%eax,%edi), %edi               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %edi  C inv 8 bits
+')
+
+       xorl    %edx, %edx              C initial extra carry
+       leal    (%edi,%edi), %eax       C 2*inv
+
+       imull   %edi, %edi              C inv*inv
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_SIZE, %ebp
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SRC, %ebx
+
+       imull   %esi, %edi              C inv*inv*d
+
+       subl    %edi, %eax              C inv = 2*inv - inv*inv*d
+       leal    (%eax,%eax), %edi       C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+
+       imull   %esi, %eax              C inv*inv*d
+
+       leal    (%ebx,%ebp,4), %ebx     C src end
+       negl    %ebp                    C -size
+
+       subl    %eax, %edi              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C d*inv == 1 mod 2^GMP_LIMB_BITS
+       movl    %esi, %eax
+       imull   %edi, %eax
+       cmpl    $1, %eax')
+
+
+C The dependent chain here is
+C
+C                            cycles
+C      subl    %edx, %eax      1
+C      imull   %edi, %eax      4
+C      mull    %esi            6  (high limb)
+C                            ----
+C       total                 11
+C
+C Out of order execution hides the load latency for the source data, so no
+C special scheduling is required.
+
+L(top):
+       C eax   src limb
+       C ebx   src end ptr
+       C ecx   next carry bit, 0 or 1 (or initial carry param)
+       C edx   carry limb, high of last product
+       C esi   divisor
+       C edi   inverse
+       C ebp   counter, limbs, negative
+
+       movl    (%ebx,%ebp,4), %eax
+
+       subl    %ecx, %eax              C apply carry bit
+       movl    $0, %ecx
+
+       setc    %cl                     C new carry bit
+
+       subl    %edx, %eax              C apply carry limb
+       adcl    $0, %ecx
+
+       imull   %edi, %eax
+
+       mull    %esi
+
+       incl    %ebp
+       jnz     L(top)
+
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EDI, %edi
+       leal    (%ecx,%edx), %eax
+
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_EBP, %ebp
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/mul_1.asm b/mpn/x86/k7/mul_1.asm

new file mode 100644 (file)

index 0000000..016262d
--- /dev/null
+++ b/mpn/x86/k7/mul_1.asm
@@ -0,0 +1,227 @@
+dnl  AMD K7 mpn_mul_1.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:
+C K7:                            3.25
+C K8:
+
+C TODO
+C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
+C    but we might be able to do even better.
+C  * The feed-in code for mul_1c is crude.
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+       add     $-16, %esp
+       mov     %ebp, (%esp)
+       mov     %ebx, 4(%esp)
+       mov     %esi, 8(%esp)
+       mov     %edi, 12(%esp)
+
+       mov     20(%esp), %edi
+       mov     24(%esp), %esi
+       mov     28(%esp), %ebp
+       mov     32(%esp), %ecx
+       mov     %ebp, %ebx
+       shr     $2, %ebp
+       mov     %ebp, 28(%esp)
+       mov     (%esi), %eax
+       and     $3, %ebx
+       jz      L(c0)
+       cmp     $2, %ebx
+       mov     36(%esp), %ebx
+       jz      L(c2)
+       jg      L(c3)
+
+L(c1): lea     -4(%edi), %edi
+       mul     %ecx
+       test    %ebp, %ebp
+       jnz     1f
+       add     %ebx, %eax
+       mov     %eax, 4(%edi)
+       mov     %edx, %eax
+       adc     %ebp, %eax
+       jmp     L(rt)
+1:     add     %eax, %ebx
+       mov     $0, %ebp
+       adc     %edx, %ebp
+       mov     4(%esi), %eax
+       jmp     L(1)
+
+L(c2): lea     4(%esi), %esi
+       mul     %ecx
+       test    %ebp, %ebp
+       mov     %ebx, %ebp
+       jnz     2f
+       add     %eax, %ebp
+       mov     $0, %ebx
+       adc     %edx, %ebx
+       mov     (%esi), %eax
+       jmp     L(cj2)
+2:     add     %eax, %ebp
+       mov     $0, %ebx
+       adc     %edx, %ebx
+       mov     (%esi), %eax
+       jmp     L(2)
+
+L(c3): lea     8(%esi), %esi
+       lea     -12(%edi), %edi
+       mul     %ecx
+       add     %eax, %ebx
+       mov     $0, %ebp
+       adc     %edx, %ebp
+       mov     -4(%esi), %eax
+       incl    28(%esp)
+       jmp     L(3)
+
+L(c0): mov     36(%esp), %ebx
+       lea     -4(%esi), %esi
+       lea     -8(%edi), %edi
+       mul     %ecx
+       mov     %ebx, %ebp
+       add     %eax, %ebp
+       mov     $0, %ebx
+       adc     %edx, %ebx
+       mov     8(%esi), %eax
+       jmp     L(0)
+
+EPILOGUE()
+       ALIGN(16)
+PROLOGUE(mpn_mul_1)
+       add     $-16, %esp
+       mov     %ebp, (%esp)
+       mov     %ebx, 4(%esp)
+       mov     %esi, 8(%esp)
+       mov     %edi, 12(%esp)
+
+       mov     20(%esp), %edi
+       mov     24(%esp), %esi
+       mov     28(%esp), %ebp
+       mov     32(%esp), %ecx
+       mov     %ebp, %ebx
+       shr     $2, %ebp
+       mov     %ebp, 28(%esp)
+       mov     (%esi), %eax
+       and     $3, %ebx
+       jz      L(b0)
+       cmp     $2, %ebx
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): lea     -4(%edi), %edi
+       mul     %ecx
+       test    %ebp, %ebp
+       jnz     L(gt1)
+       mov     %eax, 4(%edi)
+       mov     %edx, %eax
+       jmp     L(rt)
+L(gt1):        mov     %eax, %ebx
+       mov     %edx, %ebp
+       mov     4(%esi), %eax
+       jmp     L(1)
+
+L(b2): lea     4(%esi), %esi
+       mul     %ecx
+       test    %ebp, %ebp
+       mov     %eax, %ebp
+       mov     %edx, %ebx
+       mov     (%esi), %eax
+       jnz     L(2)
+       jmp     L(cj2)
+
+L(b3): lea     8(%esi), %esi
+       lea     -12(%edi), %edi
+       mul     %ecx
+       mov     %eax, %ebx
+       mov     %edx, %ebp
+       mov     -4(%esi), %eax
+       incl    28(%esp)
+       jmp     L(3)
+
+L(b0): lea     -4(%esi), %esi
+       lea     -8(%edi), %edi
+       mul     %ecx
+       mov     %eax, %ebp
+       mov     %edx, %ebx
+       mov     8(%esi), %eax
+       jmp     L(0)
+
+       ALIGN(16)
+L(top):        mov     $0, %ebx
+       adc     %edx, %ebx
+L(2):  mul     %ecx
+       add     %eax, %ebx
+       mov     %ebp, 0(%edi)
+       mov     4(%esi), %eax
+       mov     $0, %ebp
+       adc     %edx, %ebp
+L(1):  mul     %ecx
+       add     %eax, %ebp
+       mov     8(%esi), %eax
+       mov     %ebx, 4(%edi)
+       mov     $0, %ebx
+       adc     %edx, %ebx
+L(0):  mov     %ebp, 8(%edi)
+       mul     %ecx
+       add     %eax, %ebx
+       mov     12(%esi), %eax
+       lea     16(%esi), %esi
+       mov     $0, %ebp
+       adc     %edx, %ebp
+L(3):  mov     %ebx, 12(%edi)
+       mul     %ecx
+       lea     16(%edi), %edi
+       add     %eax, %ebp
+       decl    28(%esp)
+       mov     0(%esi), %eax
+       jnz     L(top)
+
+L(end):        mov     $0, %ebx
+       adc     %edx, %ebx
+L(cj2):        mul     %ecx
+       add     %eax, %ebx
+       mov     %ebp, (%edi)
+L(cj1):        mov     %ebx, 4(%edi)
+       adc     $0, %edx
+       mov     %edx, %eax
+
+L(rt): mov     (%esp), %ebp
+       mov     4(%esp), %ebx
+       mov     8(%esp), %esi
+       mov     12(%esp), %edi
+       add     $16, %esp
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/k7/mul_basecase.asm b/mpn/x86/k7/mul_basecase.asm

new file mode 100644 (file)

index 0000000..7f4c000
--- /dev/null
+++ b/mpn/x86/k7/mul_basecase.asm
@@ -0,0 +1,591 @@
+dnl  AMD K7 mpn_mul_basecase -- multiply two mpn numbers.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: approx 4.42 cycles per cross product at around 20x20 limbs (16
+C     limbs/loop unrolling).
+
+
+
+dnl  K7 UNROLL_COUNT cycles/product (at around 20x20)
+dnl           8           4.67
+dnl          16           4.59
+dnl          32           4.42
+dnl  Maximum possible with the current code is 32.
+dnl
+dnl  At 32 the typical 13-26 limb sizes from the karatsuba code will get
+dnl  done with a straight run through a block of code, no inner loop.  Using
+dnl  32 gives 1k of code, but the k7 has a 64k L1 code cache.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xsize,
+C                        mp_srcptr yp, mp_size_t ysize);
+C
+C Calculate xp,xsize multiplied by yp,ysize, storing the result in
+C wp,xsize+ysize.
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() startup
+C calculations only once.  The saving is 15-25% on typical sizes coming from
+C the Karatsuba multiply code.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP,   16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_XSIZE, %ecx
+       movl    PARAM_YP, %eax
+
+       movl    PARAM_XP, %edx
+       movl    (%eax), %eax    C yp low limb
+
+       cmpl    $2, %ecx
+       ja      L(xsize_more_than_two)
+       je      L(two_by_something)
+
+
+       C one limb by one limb
+
+       mull    (%edx)
+
+       movl    PARAM_WP, %ecx
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+deflit(`FRAME',0)
+       decl    PARAM_YSIZE
+       pushl   %ebx            defframe_pushl(`SAVE_EBX')
+       movl    %eax, %ecx      C yp low limb
+
+       movl    PARAM_WP, %ebx
+       pushl   %esi            defframe_pushl(`SAVE_ESI')
+       movl    %edx, %esi      C xp
+
+       movl    (%edx), %eax    C xp low limb
+       jnz     L(two_by_two)
+
+
+       C two limbs by one limb
+
+       mull    %ecx
+
+       movl    %eax, (%ebx)
+       movl    4(%esi), %eax
+       movl    %edx, %esi      C carry
+
+       mull    %ecx
+
+       addl    %eax, %esi
+
+       movl    %esi, 4(%ebx)
+       movl    SAVE_ESI, %esi
+
+       adcl    $0, %edx
+
+       movl    %edx, 8(%ebx)
+       movl    SAVE_EBX, %ebx
+       addl    $FRAME, %esp
+
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+C Could load yp earlier into another register.
+
+       ALIGN(16)
+L(two_by_two):
+       C eax   xp low limb
+       C ebx   wp
+       C ecx   yp low limb
+       C edx
+       C esi   xp
+       C edi
+       C ebp
+
+dnl  FRAME carries on from previous
+
+       mull    %ecx            C xp[0] * yp[0]
+
+       push    %edi            defframe_pushl(`SAVE_EDI')
+       movl    %edx, %edi      C carry, for wp[1]
+
+       movl    %eax, (%ebx)
+       movl    4(%esi), %eax
+
+       mull    %ecx            C xp[1] * yp[0]
+
+       addl    %eax, %edi
+       movl    PARAM_YP, %ecx
+
+       adcl    $0, %edx
+       movl    4(%ecx), %ecx   C yp[1]
+       movl    %edi, 4(%ebx)
+
+       movl    4(%esi), %eax   C xp[1]
+       movl    %edx, %edi      C carry, for wp[2]
+
+       mull    %ecx            C xp[1] * yp[1]
+
+       addl    %eax, %edi
+
+       adcl    $0, %edx
+       movl    (%esi), %eax    C xp[0]
+
+       movl    %edx, %esi      C carry, for wp[3]
+
+       mull    %ecx            C xp[0] * yp[1]
+
+       addl    %eax, 4(%ebx)
+       adcl    %edx, %edi
+       movl    %edi, 8(%ebx)
+
+       adcl    $0, %esi
+       movl    SAVE_EDI, %edi
+       movl    %esi, 12(%ebx)
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBX, %ebx
+       addl    $FRAME, %esp
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(xsize_more_than_two):
+
+C The first limb of yp is processed with a simple mpn_mul_1 style loop
+C inline.  Unrolling this doesn't seem worthwhile since it's only run once
+C (whereas the addmul below is run ysize-1 many times).  A call to the
+C actual mpn_mul_1 will be slowed down by the call and parameter pushing and
+C popping, and doesn't seem likely to be worthwhile on the typical 13-26
+C limb operations the Karatsuba code calls here with.
+
+       C eax   yp[0]
+       C ebx
+       C ecx   xsize
+       C edx   xp
+       C esi
+       C edi
+       C ebp
+
+dnl  FRAME doesn't carry on from previous, no pushes yet here
+defframe(`SAVE_EBX',-4)
+defframe(`SAVE_ESI',-8)
+defframe(`SAVE_EDI',-12)
+defframe(`SAVE_EBP',-16)
+deflit(`FRAME',0)
+
+       subl    $16, %esp
+deflit(`FRAME',16)
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_WP, %edi
+
+       movl    %ebx, SAVE_EBX
+       movl    %ebp, SAVE_EBP
+       movl    %eax, %ebp
+
+       movl    %esi, SAVE_ESI
+       xorl    %ebx, %ebx
+       leal    (%edx,%ecx,4), %esi     C xp end
+
+       leal    (%edi,%ecx,4), %edi     C wp end of mul1
+       negl    %ecx
+
+
+L(mul1):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter, negative
+       C edx   scratch
+       C esi   xp end
+       C edi   wp end of mul1
+       C ebp   multiplier
+
+       movl    (%esi,%ecx,4), %eax
+
+       mull    %ebp
+
+       addl    %ebx, %eax
+       movl    %eax, (%edi,%ecx,4)
+       movl    $0, %ebx
+
+       adcl    %edx, %ebx
+       incl    %ecx
+       jnz     L(mul1)
+
+
+       movl    PARAM_YSIZE, %edx
+       movl    PARAM_XSIZE, %ecx
+
+       movl    %ebx, (%edi)            C final carry
+       decl    %edx
+
+       jnz     L(ysize_more_than_one)
+
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBX, %ebx
+
+       movl    SAVE_EBP, %ebp
+       movl    SAVE_ESI, %esi
+       addl    $FRAME, %esp
+
+       ret
+
+
+L(ysize_more_than_one):
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       movl    PARAM_YP, %eax
+
+       jae     L(unroll)
+
+
+C -----------------------------------------------------------------------------
+       C simple addmul looping
+       C
+       C eax   yp
+       C ebx
+       C ecx   xsize
+       C edx   ysize-1
+       C esi   xp end
+       C edi   wp end of mul1
+       C ebp
+
+       leal    4(%eax,%edx,4), %ebp    C yp end
+       negl    %ecx
+       negl    %edx
+
+       movl    (%esi,%ecx,4), %eax     C xp low limb
+       movl    %edx, PARAM_YSIZE       C -(ysize-1)
+       incl    %ecx
+
+       xorl    %ebx, %ebx              C initial carry
+       movl    %ecx, PARAM_XSIZE       C -(xsize-1)
+       movl    %ebp, PARAM_YP
+
+       movl    (%ebp,%edx,4), %ebp     C yp second lowest limb - multiplier
+       jmp     L(simple_outer_entry)
+
+
+       C this is offset 0x121 so close enough to aligned
+L(simple_outer_top):
+       C ebp   ysize counter, negative
+
+       movl    PARAM_YP, %edx
+       movl    PARAM_XSIZE, %ecx       C -(xsize-1)
+       xorl    %ebx, %ebx              C carry
+
+       movl    %ebp, PARAM_YSIZE
+       addl    $4, %edi                C next position in wp
+
+       movl    (%edx,%ebp,4), %ebp     C yp limb - multiplier
+       movl    -4(%esi,%ecx,4), %eax   C xp low limb
+
+
+L(simple_outer_entry):
+
+L(simple_inner):
+       C eax   xp limb
+       C ebx   carry limb
+       C ecx   loop counter (negative)
+       C edx   scratch
+       C esi   xp end
+       C edi   wp end
+       C ebp   multiplier
+
+       mull    %ebp
+
+       addl    %eax, %ebx
+       adcl    $0, %edx
+
+       addl    %ebx, (%edi,%ecx,4)
+       movl    (%esi,%ecx,4), %eax
+       adcl    $0, %edx
+
+       incl    %ecx
+       movl    %edx, %ebx
+       jnz     L(simple_inner)
+
+
+       mull    %ebp
+
+       movl    PARAM_YSIZE, %ebp
+       addl    %eax, %ebx
+
+       adcl    $0, %edx
+       addl    %ebx, (%edi)
+
+       adcl    $0, %edx
+       incl    %ebp
+
+       movl    %edx, 4(%edi)
+       jnz     L(simple_outer_top)
+
+
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBP, %ebp
+       addl    $FRAME, %esp
+
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The unrolled loop is the same as in mpn_addmul_1(), see that code for some
+C comments.
+C
+C VAR_ADJUST is the negative of how many limbs the leals in the inner loop
+C increment xp and wp.  This is used to adjust back xp and wp, and rshifted
+C to given an initial VAR_COUNTER at the top of the outer loop.
+C
+C VAR_COUNTER is for the unrolled loop, running from VAR_ADJUST/UNROLL_COUNT
+C up to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C The trick with VAR_ADJUST means it's only necessary to do one fetch in the
+C outer loop to take care of xp, wp and the inner loop counter.
+
+defframe(VAR_COUNTER,  -20)
+defframe(VAR_ADJUST,   -24)
+defframe(VAR_JMP,      -28)
+defframe(VAR_XP_LOW,   -32)
+deflit(VAR_EXTRA_SPACE, 16)
+
+
+L(unroll):
+       C eax   yp
+       C ebx
+       C ecx   xsize
+       C edx   ysize-1
+       C esi   xp end
+       C edi   wp end of mul1
+       C ebp
+
+       movl    PARAM_XP, %esi
+       movl    4(%eax), %ebp           C multiplier (yp second limb)
+       leal    4(%eax,%edx,4), %eax    C yp adjust for ysize indexing
+
+       movl    PARAM_WP, %edi
+       movl    %eax, PARAM_YP
+       negl    %edx
+
+       movl    %edx, PARAM_YSIZE
+       leal    UNROLL_COUNT-2(%ecx), %ebx      C (xsize-1)+UNROLL_COUNT-1
+       decl    %ecx                            C xsize-1
+
+       movl    (%esi), %eax            C xp low limb
+       andl    $-UNROLL_MASK-1, %ebx
+       negl    %ecx
+
+       subl    $VAR_EXTRA_SPACE, %esp
+deflit(`FRAME',16+VAR_EXTRA_SPACE)
+       negl    %ebx
+       andl    $UNROLL_MASK, %ecx
+
+       movl    %ebx, VAR_ADJUST
+       movl    %ecx, %edx
+       shll    $4, %ecx
+
+       sarl    $UNROLL_LOG2, %ebx
+
+       C 17 code bytes per limb
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(unroll_here):
+',`
+       leal    L(unroll_entry) (%ecx,%edx,1), %ecx
+')
+       negl    %edx
+
+       movl    %eax, VAR_XP_LOW
+       movl    %ecx, VAR_JMP
+       leal    4(%edi,%edx,4), %edi    C wp and xp, adjust for unrolling,
+       leal    4(%esi,%edx,4), %esi    C  and start at second limb
+       jmp     L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%ecx,%edx,1), %ecx
+       addl    $L(unroll_entry)-L(unroll_here), %ecx
+       addl    (%esp), %ecx
+       ret_internal
+')
+
+
+C --------------------------------------------------------------------------
+       ALIGN(32)
+L(unroll_outer_top):
+       C ebp   ysize counter, negative
+
+       movl    VAR_ADJUST, %ebx
+       movl    PARAM_YP, %edx
+
+       movl    VAR_XP_LOW, %eax
+       movl    %ebp, PARAM_YSIZE       C store incremented ysize counter
+
+       leal    4(%edi,%ebx,4), %edi
+       leal    (%esi,%ebx,4), %esi
+       sarl    $UNROLL_LOG2, %ebx
+
+       movl    (%edx,%ebp,4), %ebp     C yp next multiplier
+       movl    VAR_JMP, %ecx
+
+L(unroll_outer_entry):
+       mull    %ebp
+
+       testb   $1, %cl         C and clear carry bit
+       movl    %ebx, VAR_COUNTER
+       movl    $0, %ebx
+
+       movl    $0, %ecx
+       cmovz(  %eax, %ecx)     C eax into low carry, zero into high carry limb
+       cmovnz( %eax, %ebx)
+
+       C Extra fetch of VAR_JMP is bad, but registers are tight
+       jmp     *VAR_JMP
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(32)
+L(unroll_top):
+       C eax   xp limb
+       C ebx   carry high
+       C ecx   carry low
+       C edx   scratch
+       C esi   xp+8
+       C edi   wp
+       C ebp   yp multiplier limb
+       C
+       C VAR_COUNTER  loop counter, negative
+       C
+       C 17 bytes each limb
+
+L(unroll_entry):
+
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+       deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl,   disp0,(%esi), %eax)
+       adcl    %edx, %ebx
+
+       mull    %ebp
+
+Zdisp( addl,   %ecx, disp0,(%edi))
+       movl    $0, %ecx
+
+       adcl    %eax, %ebx
+
+
+       movl    disp1(%esi), %eax
+       adcl    %edx, %ecx
+
+       mull    %ebp
+
+       addl    %ebx, disp1(%edi)
+       movl    $0, %ebx
+
+       adcl    %eax, %ecx
+')
+
+
+       incl    VAR_COUNTER
+       leal    UNROLL_BYTES(%esi), %esi
+       leal    UNROLL_BYTES(%edi), %edi
+
+       jnz     L(unroll_top)
+
+
+       C eax
+       C ebx   zero
+       C ecx   low
+       C edx   high
+       C esi
+       C edi   wp, pointing at second last limb)
+       C ebp
+       C
+       C carry flag to be added to high
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 4))
+
+       movl    PARAM_YSIZE, %ebp
+       adcl    $0, %edx
+       addl    %ecx, disp0(%edi)
+
+       adcl    $0, %edx
+       incl    %ebp
+
+       movl    %edx, disp1(%edi)
+       jnz     L(unroll_outer_top)
+
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBP, %ebp
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBX, %ebx
+       addl    $FRAME, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/k7/sqr_basecase.asm b/mpn/x86/k7/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..850a54e
--- /dev/null
+++ b/mpn/x86/k7/sqr_basecase.asm
@@ -0,0 +1,624 @@
+dnl  AMD K7 mpn_sqr_basecase -- square an mpn number.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product
+C     (measured on the speed difference between 25 and 50 limbs, which is
+C     roughly the Karatsuba recursing range).
+
+
+dnl  These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
+dnl  some comments.
+
+deflit(SQR_TOOM2_THRESHOLD_MAX, 66)
+
+ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
+`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+
+m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C With a SQR_TOOM2_THRESHOLD around 50 this code is about 1500 bytes,
+C which is quite a bit, but is considered good value since squares big
+C enough to use most of the code will be spending quite a few cycles in it.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %eax
+       cmpl    $2, %ecx
+
+       movl    PARAM_DST, %edx
+       je      L(two_limbs)
+       ja      L(three_or_more)
+
+
+C------------------------------------------------------------------------------
+C one limb only
+       C eax   src
+       C ecx   size
+       C edx   dst
+
+       movl    (%eax), %eax
+       movl    %edx, %ecx
+
+       mull    %eax
+
+       movl    %edx, 4(%ecx)
+       movl    %eax, (%ecx)
+       ret
+
+
+C------------------------------------------------------------------------------
+C
+C Using the read/modify/write "add"s seems to be faster than saving and
+C restoring registers.  Perhaps the loads for the first set hide under the
+C mul latency and the second gets store to load forwarding.
+
+       ALIGN(16)
+L(two_limbs):
+       C eax   src
+       C ebx
+       C ecx   size
+       C edx   dst
+deflit(`FRAME',0)
+
+       pushl   %ebx            FRAME_pushl()
+       movl    %eax, %ebx      C src
+       movl    (%eax), %eax
+
+       movl    %edx, %ecx      C dst
+
+       mull    %eax            C src[0]^2
+
+       movl    %eax, (%ecx)    C dst[0]
+       movl    4(%ebx), %eax
+
+       movl    %edx, 4(%ecx)   C dst[1]
+
+       mull    %eax            C src[1]^2
+
+       movl    %eax, 8(%ecx)   C dst[2]
+       movl    (%ebx), %eax
+
+       movl    %edx, 12(%ecx)  C dst[3]
+
+       mull    4(%ebx)         C src[0]*src[1]
+
+       popl    %ebx
+
+       addl    %eax, 4(%ecx)
+       adcl    %edx, 8(%ecx)
+       adcl    $0, 12(%ecx)
+       ASSERT(nc)
+
+       addl    %eax, 4(%ecx)
+       adcl    %edx, 8(%ecx)
+       adcl    $0, 12(%ecx)
+       ASSERT(nc)
+
+       ret
+
+
+C------------------------------------------------------------------------------
+defframe(SAVE_EBX,  -4)
+defframe(SAVE_ESI,  -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(STACK_SPACE, 16)
+
+L(three_or_more):
+       subl    $STACK_SPACE, %esp
+       cmpl    $4, %ecx
+       jae     L(four_or_more)
+deflit(`FRAME',STACK_SPACE)
+
+
+C------------------------------------------------------------------------------
+C Three limbs
+C
+C Writing out the loads and stores separately at the end of this code comes
+C out about 10 cycles faster than using adcls to memory.
+
+       C eax   src
+       C ecx   size
+       C edx   dst
+
+       movl    %ebx, SAVE_EBX
+       movl    %eax, %ebx      C src
+       movl    (%eax), %eax
+
+       movl    %edx, %ecx      C dst
+       movl    %esi, SAVE_ESI
+       movl    %edi, SAVE_EDI
+
+       mull    %eax            C src[0] ^ 2
+
+       movl    %eax, (%ecx)
+       movl    4(%ebx), %eax
+       movl    %edx, 4(%ecx)
+
+       mull    %eax            C src[1] ^ 2
+
+       movl    %eax, 8(%ecx)
+       movl    8(%ebx), %eax
+       movl    %edx, 12(%ecx)
+
+       mull    %eax            C src[2] ^ 2
+
+       movl    %eax, 16(%ecx)
+       movl    (%ebx), %eax
+       movl    %edx, 20(%ecx)
+
+       mull    4(%ebx)         C src[0] * src[1]
+
+       movl    %eax, %esi
+       movl    (%ebx), %eax
+       movl    %edx, %edi
+
+       mull    8(%ebx)         C src[0] * src[2]
+
+       addl    %eax, %edi
+       movl    %ebp, SAVE_EBP
+       movl    $0, %ebp
+
+       movl    4(%ebx), %eax
+       adcl    %edx, %ebp
+
+       mull    8(%ebx)         C src[1] * src[2]
+
+       xorl    %ebx, %ebx
+       addl    %eax, %ebp
+
+       adcl    $0, %edx
+
+       C eax
+       C ebx   zero, will be dst[5]
+       C ecx   dst
+       C edx   dst[4]
+       C esi   dst[1]
+       C edi   dst[2]
+       C ebp   dst[3]
+
+       adcl    $0, %edx
+       addl    %esi, %esi
+
+       adcl    %edi, %edi
+       movl    4(%ecx), %eax
+
+       adcl    %ebp, %ebp
+
+       adcl    %edx, %edx
+
+       adcl    $0, %ebx
+       addl    %eax, %esi
+       movl    8(%ecx), %eax
+
+       adcl    %eax, %edi
+       movl    12(%ecx), %eax
+       movl    %esi, 4(%ecx)
+
+       adcl    %eax, %ebp
+       movl    16(%ecx), %eax
+       movl    %edi, 8(%ecx)
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EDI, %edi
+
+       adcl    %eax, %edx
+       movl    20(%ecx), %eax
+       movl    %ebp, 12(%ecx)
+
+       adcl    %ebx, %eax
+       ASSERT(nc)
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_EBP, %ebp
+
+       movl    %edx, 16(%ecx)
+       movl    %eax, 20(%ecx)
+       addl    $FRAME, %esp
+
+       ret
+
+
+C------------------------------------------------------------------------------
+L(four_or_more):
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+C Further products are added in rather than stored.
+
+       C eax   src
+       C ebx
+       C ecx   size
+       C edx   dst
+       C esi
+       C edi
+       C ebp
+
+defframe(`VAR_COUNTER',-20)
+defframe(`VAR_JMP',    -24)
+deflit(EXTRA_STACK_SPACE, 8)
+
+       movl    %ebx, SAVE_EBX
+       movl    %edi, SAVE_EDI
+       leal    (%edx,%ecx,4), %edi     C &dst[size]
+
+       movl    %esi, SAVE_ESI
+       movl    %ebp, SAVE_EBP
+       leal    (%eax,%ecx,4), %esi     C &src[size]
+
+       movl    (%eax), %ebp            C multiplier
+       movl    $0, %ebx
+       decl    %ecx
+
+       negl    %ecx
+       subl    $EXTRA_STACK_SPACE, %esp
+FRAME_subl_esp(EXTRA_STACK_SPACE)
+
+L(mul_1):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter
+       C edx   scratch
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp   multiplier
+
+       movl    (%esi,%ecx,4), %eax
+
+       mull    %ebp
+
+       addl    %ebx, %eax
+       movl    %eax, (%edi,%ecx,4)
+       movl    $0, %ebx
+
+       adcl    %edx, %ebx
+       incl    %ecx
+       jnz     L(mul_1)
+
+
+C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two products, which are the bottom right corner of the product
+C triangle, are left to the end.  These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1].  If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as in mpn_addmul_1, see that routine for
+C some comments.
+C
+C VAR_COUNTER is the outer loop, running from -size+4 to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+C
+C K7 does branch prediction on indirect jumps, which is bad since it's a
+C different target each time.  There seems no way to avoid this.
+
+dnl  This value also hard coded in some shifts and adds
+deflit(CODE_BYTES_PER_LIMB, 17)
+
+dnl  With the unmodified &src[size] and &dst[size] pointers, the
+dnl  displacements in the unrolled code fit in a byte for UNROLL_COUNT
+dnl  values up to 31, but above that an offset must be added to them.
+
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>31),1,
+eval((UNROLL_COUNT-31)*4),
+0))
+
+dnl  Because the last chunk of code is generated differently, a label placed
+dnl  at the end doesn't work.  Instead calculate the implied end using the
+dnl  start and how many chunks of code there are.
+
+deflit(UNROLL_INNER_END,
+`L(unroll_inner_start)+eval(UNROLL_COUNT*CODE_BYTES_PER_LIMB)')
+
+       C eax
+       C ebx   carry
+       C ecx
+       C edx
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp
+
+       movl    PARAM_SIZE, %ecx
+       movl    %ebx, (%edi)
+
+       subl    $4, %ecx
+       jz      L(corner)
+
+       negl    %ecx
+ifelse(OFFSET,0,,`subl $OFFSET, %edi')
+ifelse(OFFSET,0,,`subl $OFFSET, %esi')
+
+       movl    %ecx, %edx
+       shll    $4, %ecx
+
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+
+
+       C The calculated jump mustn't come out to before the start of the
+       C code available.  This is the limit UNROLL_COUNT puts on the src
+       C operand size, but checked here directly using the jump address.
+       ASSERT(ae,
+       `movl_text_address(L(unroll_inner_start), %eax)
+       cmpl    %eax, %ecx')
+
+
+C------------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll_outer_top):
+       C eax
+       C ebx   high limb to store
+       C ecx   VAR_JMP
+       C edx   VAR_COUNTER, limbs, negative
+       C esi   &src[size], constant
+       C edi   dst ptr, high of last addmul
+       C ebp
+
+       movl    -12+OFFSET(%esi,%edx,4), %ebp   C next multiplier
+       movl    -8+OFFSET(%esi,%edx,4), %eax    C first of multiplicand
+
+       movl    %edx, VAR_COUNTER
+
+       mull    %ebp
+
+define(cmovX,`ifelse(eval(UNROLL_COUNT%2),0,`cmovz($@)',`cmovnz($@)')')
+
+       testb   $1, %cl
+       movl    %edx, %ebx      C high carry
+       movl    %ecx, %edx      C jump
+
+       movl    %eax, %ecx      C low carry
+       cmovX(  %ebx, %ecx)     C high carry reverse
+       cmovX(  %eax, %ebx)     C low carry reverse
+
+       leal    CODE_BYTES_PER_LIMB(%edx), %eax
+       xorl    %edx, %edx
+       leal    4(%edi), %edi
+
+       movl    %eax, VAR_JMP
+
+       jmp     *%eax
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       addl    (%esp), %ecx
+       addl    $UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)-L(here), %ecx
+       addl    %edx, %ecx
+       ret_internal
+')
+
+
+       C Must be an even address to preserve the significance of the low
+       C bit of the jump address indicating which way around ecx/ebx should
+       C start.
+       ALIGN(2)
+
+L(unroll_inner_start):
+       C eax   next limb
+       C ebx   carry high
+       C ecx   carry low
+       C edx   scratch
+       C esi   src
+       C edi   dst
+       C ebp   multiplier
+
+forloop(`i', UNROLL_COUNT, 1, `
+       deflit(`disp_src', eval(-i*4 + OFFSET))
+       deflit(`disp_dst', eval(disp_src - 4))
+
+       m4_assert(`disp_src>=-128 && disp_src<128')
+       m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl,   disp_src,(%esi), %eax)
+       adcl    %edx, %ebx
+
+       mull    %ebp
+
+Zdisp(  addl,  %ecx, disp_dst,(%edi))
+       movl    $0, %ecx
+
+       adcl    %eax, %ebx
+
+',`
+       dnl  this bit comes out last
+Zdisp(  movl,  disp_src,(%esi), %eax)
+       adcl    %edx, %ecx
+
+       mull    %ebp
+
+Zdisp( addl,   %ebx, disp_dst,(%edi))
+
+ifelse(forloop_last,0,
+`      movl    $0, %ebx')
+
+       adcl    %eax, %ecx
+')
+')
+
+       C eax   next limb
+       C ebx   carry high
+       C ecx   carry low
+       C edx   scratch
+       C esi   src
+       C edi   dst
+       C ebp   multiplier
+
+       adcl    $0, %edx
+       addl    %ecx, -4+OFFSET(%edi)
+       movl    VAR_JMP, %ecx
+
+       adcl    $0, %edx
+
+       movl    %edx, m4_empty_if_zero(OFFSET) (%edi)
+       movl    VAR_COUNTER, %edx
+
+       incl    %edx
+       jnz     L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+       addl    $OFFSET, %esi
+       addl    $OFFSET, %edi
+')
+
+
+C------------------------------------------------------------------------------
+L(corner):
+       C esi   &src[size]
+       C edi   &dst[2*size-5]
+
+       movl    -12(%esi), %ebp
+       movl    -8(%esi), %eax
+       movl    %eax, %ecx
+
+       mull    %ebp
+
+       addl    %eax, -4(%edi)
+       movl    -4(%esi), %eax
+
+       adcl    $0, %edx
+       movl    %edx, %ebx
+       movl    %eax, %esi
+
+       mull    %ebp
+
+       addl    %ebx, %eax
+
+       adcl    $0, %edx
+       addl    %eax, (%edi)
+       movl    %esi, %eax
+
+       adcl    $0, %edx
+       movl    %edx, %ebx
+
+       mull    %ecx
+
+       addl    %ebx, %eax
+       movl    %eax, 4(%edi)
+
+       adcl    $0, %edx
+       movl    %edx, 8(%edi)
+
+
+
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+L(lshift_start):
+       movl    PARAM_SIZE, %eax
+       movl    PARAM_DST, %edi
+       xorl    %ecx, %ecx              C clear carry
+
+       leal    (%edi,%eax,8), %edi
+       notl    %eax                    C -size-1, preserve carry
+
+       leal    2(%eax), %eax           C -(size-1)
+
+L(lshift):
+       C eax   counter, negative
+       C ebx
+       C ecx
+       C edx
+       C esi
+       C edi   dst, pointing just after last limb
+       C ebp
+
+       rcll    -4(%edi,%eax,8)
+       rcll    (%edi,%eax,8)
+       incl    %eax
+       jnz     L(lshift)
+
+       setc    %al
+
+       movl    PARAM_SRC, %esi
+       movl    %eax, -4(%edi)          C dst most significant limb
+
+       movl    PARAM_SIZE, %ecx
+
+
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2.  dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+       movl    (%esi), %eax            C src[0]
+
+       mull    %eax
+
+       leal    (%esi,%ecx,4), %esi     C src point just after last limb
+       negl    %ecx
+
+       movl    %eax, (%edi,%ecx,8)     C dst[0]
+       incl    %ecx
+
+L(diag):
+       C eax   scratch
+       C ebx   scratch
+       C ecx   counter, negative
+       C edx   carry
+       C esi   src just after last limb
+       C edi   dst just after last limb
+       C ebp
+
+       movl    (%esi,%ecx,4), %eax
+       movl    %edx, %ebx
+
+       mull    %eax
+
+       addl    %ebx, -4(%edi,%ecx,8)
+       adcl    %eax, (%edi,%ecx,8)
+       adcl    $0, %edx
+
+       incl    %ecx
+       jnz     L(diag)
+
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBX, %ebx
+
+       addl    %edx, -4(%edi)          C dst most significant limb
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBP, %ebp
+       addl    $FRAME, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/lshift.asm b/mpn/x86/lshift.asm

new file mode 100644 (file)

index 0000000..5598599
--- /dev/null
+++ b/mpn/x86/lshift.asm
@@ -0,0 +1,96 @@
+dnl  x86 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb
+C P54:   7.5
+C P55:   7.0
+C P6:    2.5
+C K6:    4.5
+C K7:    5.0
+C P4:   14.5
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+deflit(`FRAME',12)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC,%esi
+       movl    PARAM_SIZE,%edx
+       movl    PARAM_SHIFT,%ecx
+
+       subl    $4,%esi                 C adjust src
+
+       movl    (%esi,%edx,4),%ebx      C read most significant limb
+       xorl    %eax,%eax
+       shldl(  %cl, %ebx, %eax)        C compute carry limb
+       decl    %edx
+       jz      L(end)
+       pushl   %eax                    C push carry limb onto stack
+       testb   $1,%dl
+       jnz     L(1)                    C enter loop in the middle
+       movl    %ebx,%eax
+
+       ALIGN(8)
+L(oop):        movl    (%esi,%edx,4),%ebx      C load next lower limb
+       shldl(  %cl, %ebx, %eax)        C compute result limb
+       movl    %eax,(%edi,%edx,4)      C store it
+       decl    %edx
+L(1):  movl    (%esi,%edx,4),%eax
+       shldl(  %cl, %eax, %ebx)
+       movl    %ebx,(%edi,%edx,4)
+       decl    %edx
+       jnz     L(oop)
+
+       shll    %cl,%eax                C compute least significant limb
+       movl    %eax,(%edi)             C store it
+
+       popl    %eax                    C pop carry limb
+
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+L(end):        shll    %cl,%ebx                C compute least significant limb
+       movl    %ebx,(%edi)             C store it
+
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/mod_34lsub1.asm b/mpn/x86/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..68b4a73
--- /dev/null
+++ b/mpn/x86/mod_34lsub1.asm
@@ -0,0 +1,172 @@
+dnl  Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1.
+
+dnl  Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C      cycles/limb
+C P5:     3.0
+C P6:     3.66
+C K6:     3.0
+C K7:     1.3
+C P4:     9
+
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX, `PARAM_SRC')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %edx
+
+       subl    $2, %ecx
+       ja      L(three_or_more)
+
+       movl    (%edx), %eax
+       jb      L(one)
+
+       movl    4(%edx), %ecx
+       movl    %eax, %edx
+       shrl    $24, %eax               C src[0] low
+
+       andl    $0xFFFFFF, %edx         C src[0] high
+       addl    %edx, %eax
+       movl    %ecx, %edx
+
+       andl    $0xFFFF, %ecx
+       shrl    $16, %edx               C src[1] high
+       addl    %edx, %eax
+
+       shll    $8, %ecx                C src[1] low
+       addl    %ecx, %eax
+
+L(one):
+       ret
+
+
+L(three_or_more):
+       C eax
+       C ebx
+       C ecx   size-2
+       C edx   src
+       C esi
+       C edi
+       C ebp
+
+       movl    %ebx, SAVE_EBX          C and arrange 16-byte loop alignment
+       xorl    %ebx, %ebx
+
+       pushl   %esi    FRAME_pushl()
+       xorl    %esi, %esi
+
+       pushl   %edi    FRAME_pushl()
+       xorl    %eax, %eax              C and clear carry flag
+
+
+       C offset 0x40 here
+L(top):
+       C eax   acc 0mod3
+       C ebx   acc 1mod3
+       C ecx   counter, limbs
+       C edx   src
+       C esi   acc 2mod3
+       C edi
+       C ebp
+
+       leal    12(%edx), %edx
+       leal    -2(%ecx), %ecx
+
+       adcl    -12(%edx), %eax
+       adcl    -8(%edx), %ebx
+       adcl    -4(%edx), %esi
+
+       decl    %ecx
+       jg      L(top)
+
+
+       C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
+
+       movl    $0xFFFFFFFF, %edi
+       incl    %ecx
+       js      L(combine)
+
+       adcl    (%edx), %eax
+       movl    $0xFFFFFF00, %edi
+       decl    %ecx
+       js      L(combine)
+
+       adcl    4(%edx), %ebx
+       movl    $0xFFFF0000, %edi
+
+
+L(combine):
+       C eax   acc 0mod3
+       C ebx   acc 1mod3
+       C ecx
+       C edx
+       C esi   acc 2mod3
+       C edi   mask
+       C ebp
+
+       sbbl    %ecx, %ecx              C carry
+       movl    %eax, %edx              C 0mod3
+
+       shrl    $24, %eax               C 0mod3 high
+       andl    %edi, %ecx              C carry masked
+
+       subl    %ecx, %eax              C apply carry
+       movl    %ebx, %edi              C 1mod3
+
+       shrl    $16, %ebx               C 1mod3 high
+       andl    $0x00FFFFFF, %edx       C 0mod3 low
+
+       addl    %edx, %eax              C apply 0mod3 low
+       andl    $0xFFFF, %edi
+
+       shll    $8, %edi                C 1mod3 low
+       addl    %ebx, %eax              C apply 1mod3 high
+
+       addl    %edi, %eax              C apply 1mod3 low
+       movl    %esi, %edx              C 2mod3
+
+       shrl    $8, %esi                C 2mod3 high
+       andl    $0xFF, %edx             C 2mod3 low
+
+       shll    $16, %edx               C 2mod3 low
+       addl    %esi, %eax              C apply 2mod3 high
+
+       addl    %edx, %eax              C apply 2mod3 low
+       popl    %edi    FRAME_popl()
+
+       movl    SAVE_EBX, %ebx
+       popl    %esi    FRAME_popl()
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/mul_1.asm b/mpn/x86/mul_1.asm

new file mode 100644 (file)

index 0000000..1d715ec
--- /dev/null
+++ b/mpn/x86/mul_1.asm
@@ -0,0 +1,130 @@
+dnl  x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
+dnl  with a limb and store the result in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free
+dnl  Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:                           12.5
+C P6 model 0-8,10-12)            5.5
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           5.25
+C P4 model 0  (Willamette)      19.0
+C P4 model 1  (?)               19.0
+C P4 model 2  (Northwood)       19.0
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:                           10.5
+C K7:                            4.5
+C K8:
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t multiplier);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       pushl   %ebp
+deflit(`FRAME',16)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC,%esi
+       movl    PARAM_SIZE,%ecx
+
+       xorl    %ebx,%ebx
+       andl    $3,%ecx
+       jz      L(end0)
+
+L(oop0):
+       movl    (%esi),%eax
+       mull    PARAM_MULTIPLIER
+       leal    4(%esi),%esi
+       addl    %ebx,%eax
+       movl    $0,%ebx
+       adcl    %ebx,%edx
+       movl    %eax,(%edi)
+       movl    %edx,%ebx       C propagate carry into cylimb
+
+       leal    4(%edi),%edi
+       decl    %ecx
+       jnz     L(oop0)
+
+L(end0):
+       movl    PARAM_SIZE,%ecx
+       shrl    $2,%ecx
+       jz      L(end)
+
+
+       ALIGN(8)
+L(oop):        movl    (%esi),%eax
+       mull    PARAM_MULTIPLIER
+       addl    %eax,%ebx
+       movl    $0,%ebp
+       adcl    %edx,%ebp
+
+       movl    4(%esi),%eax
+       mull    PARAM_MULTIPLIER
+       movl    %ebx,(%edi)
+       addl    %eax,%ebp       C new lo + cylimb
+       movl    $0,%ebx
+       adcl    %edx,%ebx
+
+       movl    8(%esi),%eax
+       mull    PARAM_MULTIPLIER
+       movl    %ebp,4(%edi)
+       addl    %eax,%ebx       C new lo + cylimb
+       movl    $0,%ebp
+       adcl    %edx,%ebp
+
+       movl    12(%esi),%eax
+       mull    PARAM_MULTIPLIER
+       movl    %ebx,8(%edi)
+       addl    %eax,%ebp       C new lo + cylimb
+       movl    $0,%ebx
+       adcl    %edx,%ebx
+
+       movl    %ebp,12(%edi)
+
+       leal    16(%esi),%esi
+       leal    16(%edi),%edi
+       decl    %ecx
+       jnz     L(oop)
+
+L(end):        movl    %ebx,%eax
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/mul_basecase.asm b/mpn/x86/mul_basecase.asm

new file mode 100644 (file)

index 0000000..7918ea0
--- /dev/null
+++ b/mpn/x86/mul_basecase.asm
@@ -0,0 +1,213 @@
+dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
+dnl  in a third limb vector.
+
+dnl  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/crossproduct
+C P5:     15
+C P6:      7.5
+C K6:     12.5
+C K7:      5.5
+C P4:     24
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xsize,
+C                        mp_srcptr yp, mp_size_t ysize);
+C
+C This was written in a haste since the Pentium optimized code that was used
+C for all x86 machines was slow for the Pentium II.  This code would benefit
+C from some cleanup.
+C
+C To shave off some percentage of the run-time, one should make 4 variants
+C of the Louter loop, for the four different outcomes of un mod 4.  That
+C would avoid Loop0 altogether.  Code expansion would be > 4-fold for that
+C part of the function, but since it is not very large, that would be
+C acceptable.
+C
+C The mul loop (at L(oopM)) might need some tweaking.  It's current speed is
+C unknown.
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP,   16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+defframe(VAR_MULTIPLIER, -4)
+defframe(VAR_COUNTER,    -8)
+deflit(VAR_STACK_SPACE,  8)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+       subl    $VAR_STACK_SPACE,%esp
+       pushl   %esi
+       pushl   %ebp
+       pushl   %edi
+deflit(`FRAME',eval(VAR_STACK_SPACE+12))
+
+       movl    PARAM_XP,%esi
+       movl    PARAM_WP,%edi
+       movl    PARAM_YP,%ebp
+
+       movl    (%esi),%eax             C load xp[0]
+       mull    (%ebp)                  C multiply by yp[0]
+       movl    %eax,(%edi)             C store to wp[0]
+       movl    PARAM_XSIZE,%ecx        C xsize
+       decl    %ecx                    C If xsize = 1, ysize = 1 too
+       jz      L(done)
+
+       pushl   %ebx
+FRAME_pushl()
+       movl    %edx,%ebx
+
+       leal    4(%esi),%esi
+       leal    4(%edi),%edi
+
+L(oopM):
+       movl    (%esi),%eax             C load next limb at xp[j]
+       leal    4(%esi),%esi
+       mull    (%ebp)
+       addl    %ebx,%eax
+       movl    %edx,%ebx
+       adcl    $0,%ebx
+       movl    %eax,(%edi)
+       leal    4(%edi),%edi
+       decl    %ecx
+       jnz     L(oopM)
+
+       movl    %ebx,(%edi)             C most significant limb of product
+       addl    $4,%edi                 C increment wp
+       movl    PARAM_XSIZE,%eax
+       shll    $2,%eax
+       subl    %eax,%edi
+       subl    %eax,%esi
+
+       movl    PARAM_YSIZE,%eax        C ysize
+       decl    %eax
+       jz      L(skip)
+       movl    %eax,VAR_COUNTER        C set index i to ysize
+
+L(outer):
+       movl    PARAM_YP,%ebp           C yp
+       addl    $4,%ebp                 C make ebp point to next v limb
+       movl    %ebp,PARAM_YP
+       movl    (%ebp),%eax             C copy y limb ...
+       movl    %eax,VAR_MULTIPLIER     C ... to stack slot
+       movl    PARAM_XSIZE,%ecx
+
+       xorl    %ebx,%ebx
+       andl    $3,%ecx
+       jz      L(end0)
+
+L(oop0):
+       movl    (%esi),%eax
+       mull    VAR_MULTIPLIER
+       leal    4(%esi),%esi
+       addl    %ebx,%eax
+       movl    $0,%ebx
+       adcl    %ebx,%edx
+       addl    %eax,(%edi)
+       adcl    %edx,%ebx               C propagate carry into cylimb
+
+       leal    4(%edi),%edi
+       decl    %ecx
+       jnz     L(oop0)
+
+L(end0):
+       movl    PARAM_XSIZE,%ecx
+       shrl    $2,%ecx
+       jz      L(endX)
+
+       ALIGN(8)
+L(oopX):
+       movl    (%esi),%eax
+       mull    VAR_MULTIPLIER
+       addl    %eax,%ebx
+       movl    $0,%ebp
+       adcl    %edx,%ebp
+
+       movl    4(%esi),%eax
+       mull    VAR_MULTIPLIER
+       addl    %ebx,(%edi)
+       adcl    %eax,%ebp       C new lo + cylimb
+       movl    $0,%ebx
+       adcl    %edx,%ebx
+
+       movl    8(%esi),%eax
+       mull    VAR_MULTIPLIER
+       addl    %ebp,4(%edi)
+       adcl    %eax,%ebx       C new lo + cylimb
+       movl    $0,%ebp
+       adcl    %edx,%ebp
+
+       movl    12(%esi),%eax
+       mull    VAR_MULTIPLIER
+       addl    %ebx,8(%edi)
+       adcl    %eax,%ebp       C new lo + cylimb
+       movl    $0,%ebx
+       adcl    %edx,%ebx
+
+       addl    %ebp,12(%edi)
+       adcl    $0,%ebx         C propagate carry into cylimb
+
+       leal    16(%esi),%esi
+       leal    16(%edi),%edi
+       decl    %ecx
+       jnz     L(oopX)
+
+L(endX):
+       movl    %ebx,(%edi)
+       addl    $4,%edi
+
+       C we incremented wp and xp in the loop above; compensate
+       movl    PARAM_XSIZE,%eax
+       shll    $2,%eax
+       subl    %eax,%edi
+       subl    %eax,%esi
+
+       movl    VAR_COUNTER,%eax
+       decl    %eax
+       movl    %eax,VAR_COUNTER
+       jnz     L(outer)
+
+L(skip):
+       popl    %ebx
+       popl    %edi
+       popl    %ebp
+       popl    %esi
+       addl    $8,%esp
+       ret
+
+L(done):
+       movl    %edx,4(%edi)       C store to wp[1]
+       popl    %edi
+       popl    %ebp
+       popl    %esi
+       addl    $8,%esp
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/README b/mpn/x86/p6/README

new file mode 100644 (file)

index 0000000..1ded4e7
--- /dev/null
+++ b/mpn/x86/p6/README
@@ -0,0 +1,114 @@
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                      INTEL P6 MPN SUBROUTINES
+
+
+
+This directory contains code optimized for Intel P6 class CPUs, meaning
+PentiumPro, Pentium II and Pentium III.  The mmx and p3mmx subdirectories
+have routines using MMX instructions.
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache, are as follows.
+Some of these might be able to be improved.
+
+                               cycles/limb
+
+       mpn_add_n/sub_n           3.7
+
+       mpn_copyi                 0.75
+       mpn_copyd                 1.75 (or 0.75 if no overlap)
+
+       mpn_divrem_1             39.0
+       mpn_mod_1                21.5
+       mpn_divexact_by3          8.5
+
+       mpn_mul_1                 5.5
+       mpn_addmul/submul_1       6.35
+
+       mpn_l/rshift              2.5
+
+       mpn_mul_basecase          8.2 cycles/crossproduct (approx)
+       mpn_sqr_basecase          4.0 cycles/crossproduct (approx)
+                                 or 7.75 cycles/triangleproduct (approx)
+
+Pentium II and III have MMX and get the following improvements.
+
+       mpn_divrem_1             25.0 integer part, 17.5 fractional part
+
+       mpn_l/rshift              1.75
+
+
+
+
+NOTES
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+
+Mispredicted branches have a penalty of between 9 and 15 cycles, and even up
+to 26 cycles depending how far speculative execution has gone.  The 9 cycle
+minimum penalty comes from the issue pipeline being 9 stages.
+
+A copy with rep movs seems to copy 16 bytes at a time, since speeds for 4,
+5, 6 or 7 limb operations are all the same.  The 0.75 cycles/limb would be 3
+cycles per 16 byte block.
+
+
+
+
+CODING
+
+Instructions in general code have been shown grouped if they can execute
+together, which means up to three instructions with no successive
+dependencies, and with only the first being a multiple micro-op.
+
+P6 has out-of-order execution, so the groupings are really only showing
+dependent paths where some shuffling might allow some latencies to be
+hidden.
+
+
+
+
+REFERENCES
+
+"Intel Architecture Optimization Reference Manual", 1999, revision 001 dated
+02/99, order number 245127 (order number 730795-001 is in the document too).
+Available on-line:
+
+       http://download.intel.com/design/PentiumII/manuals/245127.htm
+
+"Intel Architecture Optimization Manual", 1997, order number 242816.  This
+is an older document mostly about P5 and not as good as the above.
+Available on-line:
+
+       http://download.intel.com/design/PentiumII/manuals/242816.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/x86/p6/aors_n.asm b/mpn/x86/p6/aors_n.asm

new file mode 100644 (file)

index 0000000..784ed08
--- /dev/null
+++ b/mpn/x86/p6/aors_n.asm
@@ -0,0 +1,145 @@
+dnl  Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Avoid indexed addressing, it makes us stall on the two-ported register
+C    file.
+
+C                           cycles/limb
+C P6 model 0-8,10-12)           3.17
+C P6 model 9   (Banias)         ?
+C P6 model 13  (Dothan)         2.25
+
+
+define(`rp',   `%edi')
+define(`up',   `%esi')
+define(`vp',   `%ebx')
+define(`n',    `%ecx')
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(func)
+       xor     %edx, %edx
+L(start):
+       push    %edi
+       push    %esi
+       push    %ebx
+
+       mov     16(%esp), rp
+       mov     20(%esp), up
+       mov     24(%esp), vp
+       mov     28(%esp), n
+
+       lea     (up,n,4), up
+       lea     (vp,n,4), vp
+       lea     (rp,n,4), rp
+
+       neg     n
+       mov     n, %eax
+       and     $-8, n
+       and     $7, %eax
+       shl     $2, %eax                        C 4x
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       lea     L(ent) (%eax,%eax,2), %eax      C 12x
+')
+
+       shr     %edx                            C set cy flag
+       jmp     *%eax
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       lea     (%eax,%eax,2), %eax
+       add     $L(ent)-L(here), %eax
+       add     (%esp), %eax
+       ret_internal
+')
+
+L(end):
+       sbb     %eax, %eax
+       neg     %eax
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       ret
+
+       ALIGN(16)
+L(top):
+       jecxz   L(end)
+L(ent):
+Zdisp( mov,    0,(up,n,4), %eax)
+Zdisp( ADCSBB, 0,(vp,n,4), %eax)
+Zdisp( mov,    %eax, 0,(rp,n,4))
+
+       mov     4(up,n,4), %edx
+       ADCSBB  4(vp,n,4), %edx
+       mov     %edx, 4(rp,n,4)
+
+       mov     8(up,n,4), %eax
+       ADCSBB  8(vp,n,4), %eax
+       mov     %eax, 8(rp,n,4)
+
+       mov     12(up,n,4), %edx
+       ADCSBB  12(vp,n,4), %edx
+       mov     %edx, 12(rp,n,4)
+
+       mov     16(up,n,4), %eax
+       ADCSBB  16(vp,n,4), %eax
+       mov     %eax, 16(rp,n,4)
+
+       mov     20(up,n,4), %edx
+       ADCSBB  20(vp,n,4), %edx
+       mov     %edx, 20(rp,n,4)
+
+       mov     24(up,n,4), %eax
+       ADCSBB  24(vp,n,4), %eax
+       mov     %eax, 24(rp,n,4)
+
+       mov     28(up,n,4), %edx
+       ADCSBB  28(vp,n,4), %edx
+       mov     %edx, 28(rp,n,4)
+
+       lea     8(n), n
+       jmp     L(top)
+
+EPILOGUE()
+
+PROLOGUE(func_nc)
+       movl    20(%esp), %edx
+       jmp     L(start)
+EPILOGUE()
diff --git a/mpn/x86/p6/aorsmul_1.asm b/mpn/x86/p6/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..d0b0cef
--- /dev/null
+++ b/mpn/x86/p6/aorsmul_1.asm
@@ -0,0 +1,309 @@
+dnl  Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)            6.44
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           6.11
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C K6:
+C K7:
+C K8:
+
+
+dnl  P6 UNROLL_COUNT cycles/limb
+dnl          8           6.7
+dnl         16           6.35
+dnl         32           6.3
+dnl         64           6.3
+dnl  Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1', `
+       define(M4_inst,        addl)
+       define(M4_function_1,  mpn_addmul_1)
+       define(M4_function_1c, mpn_addmul_1c)
+       define(M4_description, add it to)
+       define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1', `
+       define(M4_inst,        subl)
+       define(M4_function_1,  mpn_submul_1)
+       define(M4_function_1c, mpn_submul_1c)
+       define(M4_description, subtract it from)
+       define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                            mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                             mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+C
+C This code is pretty much the same as the K6 code.  The unrolled loop is
+C the same, but there's just a few scheduling tweaks in the setups and the
+C simple loop.
+C
+C A number of variations have been tried for the unrolled loop, with one or
+C two carries, and with loads scheduled earlier, but nothing faster than 6
+C cycles/limb has been found.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_CARRY,     20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+       ALIGN(32)
+
+PROLOGUE(M4_function_1c)
+       pushl   %ebx
+deflit(`FRAME',4)
+       movl    PARAM_CARRY, %ebx
+       jmp     L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function_1)
+       push    %ebx
+deflit(`FRAME',4)
+       xorl    %ebx, %ebx      C initial carry
+
+L(start_nc):
+       movl    PARAM_SIZE, %ecx
+       pushl   %esi
+deflit(`FRAME',8)
+
+       movl    PARAM_SRC, %esi
+       pushl   %edi
+deflit(`FRAME',12)
+
+       movl    PARAM_DST, %edi
+       pushl   %ebp
+deflit(`FRAME',16)
+       cmpl    $UNROLL_THRESHOLD, %ecx
+
+       movl    PARAM_MULTIPLIER, %ebp
+       jae     L(unroll)
+
+
+       C simple loop
+       C this is offset 0x22, so close enough to aligned
+L(simple):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter
+       C edx   scratch
+       C esi   src
+       C edi   dst
+       C ebp   multiplier
+
+       movl    (%esi), %eax
+       addl    $4, %edi
+
+       mull    %ebp
+
+       addl    %ebx, %eax
+       adcl    $0, %edx
+
+       M4_inst %eax, -4(%edi)
+       movl    %edx, %ebx
+
+       adcl    $0, %ebx
+       decl    %ecx
+
+       leal    4(%esi), %esi
+       jnz     L(simple)
+
+
+       popl    %ebp
+       popl    %edi
+
+       popl    %esi
+       movl    %ebx, %eax
+
+       popl    %ebx
+       ret
+
+
+
+C------------------------------------------------------------------------------
+C VAR_JUMP holds the computed jump temporarily because there's not enough
+C registers when doing the mul for the initial two carry limbs.
+C
+C The add/adc for the initial carry in %ebx is necessary only for the
+C mpn_add/submul_1c entry points.  Duplicating the startup code to
+C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good
+C idea.
+
+dnl  overlapping with parameters already fetched
+define(VAR_COUNTER,`PARAM_SIZE')
+define(VAR_JUMP,   `PARAM_DST')
+
+       C this is offset 0x43, so close enough to aligned
+L(unroll):
+       C eax
+       C ebx   initial carry
+       C ecx   size
+       C edx
+       C esi   src
+       C edi   dst
+       C ebp
+
+       movl    %ecx, %edx
+       decl    %ecx
+
+       subl    $2, %edx
+       negl    %ecx
+
+       shrl    $UNROLL_LOG2, %edx
+       andl    $UNROLL_MASK, %ecx
+
+       movl    %edx, VAR_COUNTER
+       movl    %ecx, %edx
+
+       C 15 code bytes per limb
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       shll    $4, %edx
+       negl    %ecx
+
+       leal    L(entry) (%edx,%ecx,1), %edx
+')
+       movl    (%esi), %eax            C src low limb
+
+       movl    %edx, VAR_JUMP
+       leal    ifelse(UNROLL_BYTES,256,128+) 4(%esi,%ecx,4), %esi
+
+       mull    %ebp
+
+       addl    %ebx, %eax      C initial carry (from _1c)
+       adcl    $0, %edx
+
+       movl    %edx, %ebx      C high carry
+       leal    ifelse(UNROLL_BYTES,256,128) (%edi,%ecx,4), %edi
+
+       movl    VAR_JUMP, %edx
+       testl   $1, %ecx
+       movl    %eax, %ecx      C low carry
+
+       cmovnz( %ebx, %ecx)     C high,low carry other way around
+       cmovnz( %eax, %ebx)
+
+       jmp     *%edx
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       shll    $4, %edx
+       negl    %ecx
+
+       C See mpn/x86/README about old gas bugs
+       leal    (%edx,%ecx,1), %edx
+       addl    $L(entry)-L(here), %edx
+
+       addl    (%esp), %edx
+
+       ret_internal
+')
+
+
+C -----------------------------------------------------------
+       ALIGN(32)
+L(top):
+deflit(`FRAME',16)
+       C eax   scratch
+       C ebx   carry hi
+       C ecx   carry lo
+       C edx   scratch
+       C esi   src
+       C edi   dst
+       C ebp   multiplier
+       C
+       C VAR_COUNTER   loop counter
+       C
+       C 15 code bytes per limb
+
+       addl    $UNROLL_BYTES, %edi
+
+L(entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(i*4*CHUNK_COUNT ifelse(UNROLL_BYTES,256,-128)))
+       deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl,   disp0,(%esi), %eax)
+       mull    %ebp
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+       adcl    %eax, %ebx
+       movl    %edx, %ecx
+       adcl    $0, %ecx
+
+       movl    disp1(%esi), %eax
+       mull    %ebp
+       M4_inst %ebx, disp1(%edi)
+       adcl    %eax, %ecx
+       movl    %edx, %ebx
+       adcl    $0, %ebx
+')
+
+       decl    VAR_COUNTER
+       leal    UNROLL_BYTES(%esi), %esi
+
+       jns     L(top)
+
+
+deflit(`disp0',        eval(UNROLL_BYTES ifelse(UNROLL_BYTES,256,-128)))
+
+       M4_inst %ecx, disp0(%edi)
+       movl    %ebx, %eax
+
+       popl    %ebp
+       popl    %edi
+
+       popl    %esi
+       popl    %ebx
+       adcl    $0, %eax
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/copyd.asm b/mpn/x86/p6/copyd.asm

new file mode 100644 (file)

index 0000000..2946f51
--- /dev/null
+++ b/mpn/x86/p6/copyd.asm
@@ -0,0 +1,167 @@
+dnl  Intel P6 mpn_copyd -- copy limb vector backwards.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: 1.75 cycles/limb, or 0.75 if no overlap
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C An explicit loop is used because a decrementing rep movsl is a bit slow at
+C 2.4 c/l.  That rep movsl also has about a 40 cycle startup time, and the
+C code here stands a chance of being faster if the branches predict well.
+C
+C The slightly strange loop form seems necessary for the claimed speed.
+C Maybe load/store ordering affects it.
+C
+C The source and destination are checked to see if they're actually
+C overlapping, since it might be possible to use an incrementing rep movsl
+C at 0.75 c/l.  (It doesn't suffer the bad startup time of the decrementing
+C version.)
+C
+C Enhancements:
+C
+C Top speed for an all-integer copy is probably 1.0 c/l, being one load and
+C one store each cycle.  Unrolling the loop below would approach 1.0, but
+C it'd be good to know why something like store/load/subl + store/load/jnz
+C doesn't already run at 1.0 c/l.  It looks like it should decode in 2
+C cycles, but doesn't run that way.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl  re-using parameter space
+define(SAVE_ESI,`PARAM_SIZE')
+define(SAVE_EDI,`PARAM_SRC')
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_copyd)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       subl    $1, %ecx
+       jb      L(zero)
+
+       movl    (%esi,%ecx,4), %eax             C src[size-1]
+       jz      L(one)
+
+       movl    -4(%esi,%ecx,4), %edx           C src[size-2]
+       subl    $2, %ecx
+       jbe     L(done_loop)                    C 2 or 3 limbs only
+
+
+       C The usual overlap is
+       C
+       C     high                   low
+       C     +------------------+
+       C     |               dst|
+       C     +------------------+
+       C           +------------------+
+       C           |               src|
+       C           +------------------+
+       C
+       C We can use an incrementing copy in the following circumstances.
+       C
+       C     src+4*size<=dst, since then the regions are disjoint
+       C
+       C     src==dst, clearly (though this shouldn't occur normally)
+       C
+       C     src>dst, since in that case it's a requirement of the
+       C              parameters that src>=dst+size*4, and hence the
+       C              regions are disjoint
+       C
+
+       leal    (%edi,%ecx,4), %edx
+       cmpl    %edi, %esi
+       jae     L(use_movsl)            C src >= dst
+
+       cmpl    %edi, %edx
+       movl    4(%esi,%ecx,4), %edx    C src[size-2] again
+       jbe     L(use_movsl)            C src+4*size <= dst
+
+
+L(top):
+       C eax   prev high limb
+       C ebx
+       C ecx   counter, size-3 down to 0 or -1, inclusive, by 2s
+       C edx   prev low limb
+       C esi   src
+       C edi   dst
+       C ebp
+
+       movl    %eax, 8(%edi,%ecx,4)
+       movl    (%esi,%ecx,4), %eax
+
+       movl    %edx, 4(%edi,%ecx,4)
+       movl    -4(%esi,%ecx,4), %edx
+
+       subl    $2, %ecx
+       jnbe    L(top)
+
+
+L(done_loop):
+       movl    %eax, 8(%edi,%ecx,4)
+       movl    %edx, 4(%edi,%ecx,4)
+
+       C copy low limb (needed if size was odd, but will already have been
+       C done in the loop if size was even)
+       movl    (%esi), %eax
+L(one):
+       movl    %eax, (%edi)
+       movl    SAVE_EDI, %edi
+       movl    SAVE_ESI, %esi
+
+       ret
+
+
+L(use_movsl):
+       C eax
+       C ebx
+       C ecx   size-3
+       C edx
+       C esi   src
+       C edi   dst
+       C ebp
+
+       addl    $3, %ecx
+
+       cld             C better safe than sorry, see mpn/x86/README
+
+       rep
+       movsl
+
+L(zero):
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EDI, %edi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/dive_1.asm b/mpn/x86/p6/dive_1.asm

new file mode 100644 (file)

index 0000000..23d1d53
--- /dev/null
+++ b/mpn/x86/p6/dive_1.asm
@@ -0,0 +1,255 @@
+dnl  Intel P6 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       odd  even  divisor
+C P6:  10.0  12.0  cycles/limb
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t divisor);
+C
+C The odd case is basically the same as mpn_modexact_1_odd, just with an
+C extra store, and it runs at the same 10 cycles which is the dependent
+C chain.
+C
+C The shifts for the even case aren't on the dependent chain so in principle
+C it could run the same too, but nothing running at 10 has been found.
+C Perhaps there's too many uops (an extra 4 over the odd case).
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,     8)
+defframe(PARAM_DST,     4)
+
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+defframe(VAR_INVERSE, -20)
+deflit(STACK_SPACE, 20)
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SIZE, %ebx
+
+       bsfl    %eax, %ecx              C trailing twos
+
+       movl    %ebp, SAVE_EBP
+
+       shrl    %cl, %eax               C d without twos
+
+       movl    %eax, %edx
+       shrl    %eax                    C d/2 without twos
+
+       movl    %edx, PARAM_DIVISOR
+       andl    $127, %eax
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %ebp)
+       movzbl  (%eax,%ebp), %ebp               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %ebp  C inv 8 bits
+')
+
+       leal    (%ebp,%ebp), %eax       C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       leal    (%esi,%ebx,4), %esi     C src end
+
+       imull   PARAM_DIVISOR, %ebp     C inv*inv*d
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       leal    (%eax,%eax), %ebp       C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+
+       leal    (%edi,%ebx,4), %edi     C dst end
+       negl    %ebx                    C -size
+
+       movl    %edi, PARAM_DST
+
+       imull   PARAM_DIVISOR, %eax     C inv*inv*d
+
+       subl    %eax, %ebp              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C d*inv == 1 mod 2^GMP_LIMB_BITS
+       movl    PARAM_DIVISOR, %eax
+       imull   %ebp, %eax
+       cmpl    $1, %eax')
+
+       movl    %ebp, VAR_INVERSE
+       movl    (%esi,%ebx,4), %eax     C src[0]
+
+       orl     %ecx, %ecx
+       jnz     L(even)
+
+       C ecx initial carry is zero
+       jmp     L(odd_entry)
+
+
+C The dependent chain here is
+C
+C      subl    %edx, %eax       1
+C      imull   %ebp, %eax       4
+C      mull    PARAM_DIVISOR    5
+C                             ----
+C       total                  10
+C
+C and this is the measured speed.  No special scheduling is necessary, out
+C of order execution hides the load latency.
+
+L(odd_top):
+       C eax   scratch (src limb)
+       C ebx   counter, limbs, negative
+       C ecx   carry bit
+       C edx   carry limb, high of last product
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp
+
+       mull    PARAM_DIVISOR
+
+       movl    (%esi,%ebx,4), %eax
+       subl    %ecx, %eax
+
+       sbbl    %ecx, %ecx
+       subl    %edx, %eax
+
+       sbbl    $0, %ecx
+
+L(odd_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, (%edi,%ebx,4)
+       negl    %ecx
+
+       incl    %ebx
+       jnz     L(odd_top)
+
+
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBP, %ebp
+
+       movl    SAVE_EBX, %ebx
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+
+L(even):
+       C eax   src[0]
+       C ebx   counter, limbs, negative
+       C ecx   shift
+       C edx
+       C esi
+       C edi
+       C ebp
+
+       xorl    %ebp, %ebp              C initial carry bit
+       xorl    %edx, %edx              C initial carry limb (for size==1)
+
+       incl    %ebx
+       jz      L(even_one)
+
+       movl    (%esi,%ebx,4), %edi     C src[1]
+
+       shrdl(  %cl, %edi, %eax)
+
+       jmp     L(even_entry)
+
+
+L(even_top):
+       C eax   scratch
+       C ebx   counter, limbs, negative
+       C ecx   shift
+       C edx   scratch
+       C esi   &src[size]
+       C edi   &dst[size] and scratch
+       C ebp   carry bit
+
+       movl    (%esi,%ebx,4), %edi
+
+       mull    PARAM_DIVISOR
+
+       movl    -4(%esi,%ebx,4), %eax
+       shrdl(  %cl, %edi, %eax)
+
+       subl    %ebp, %eax
+
+       sbbl    %ebp, %ebp
+       subl    %edx, %eax
+
+       sbbl    $0, %ebp
+
+L(even_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    PARAM_DST, %edi
+       negl    %ebp
+
+       movl    %eax, -4(%edi,%ebx,4)
+       incl    %ebx
+       jnz     L(even_top)
+
+
+
+       mull    PARAM_DIVISOR
+
+       movl    -4(%esi), %eax
+
+L(even_one):
+       shrl    %cl, %eax
+       movl    SAVE_ESI, %esi
+
+       subl    %ebp, %eax
+       movl    SAVE_EBP, %ebp
+
+       subl    %edx, %eax
+       movl    SAVE_EBX, %ebx
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)
+       movl    SAVE_EDI, %edi
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/gmp-mparam.h b/mpn/x86/p6/gmp-mparam.h

new file mode 100644 (file)

index 0000000..8ff8d48
--- /dev/null
+++ b/mpn/x86/p6/gmp-mparam.h
@@ -0,0 +1,66 @@
+/* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be smaller than
+   the value in mpn/x86/p6/mmx/gmp-mparam.h.  The former is used as a hard
+   limit in mpn/x86/p6/sqr_basecase.asm, and that file will be run by the
+   p6/mmx cpus (pentium2, pentium3).  */
+
+
+/* 200MHz Pentium Pro */
+
+/* Generated by tuneup.c, 2003-02-12, gcc 2.95 */
+
+#define MUL_TOOM22_THRESHOLD             23
+#define MUL_TOOM33_THRESHOLD            140
+
+#define SQR_BASECASE_THRESHOLD            0  /* always */
+#define SQR_TOOM2_THRESHOLD              52
+#define SQR_TOOM3_THRESHOLD             189
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                116
+#define POWM_THRESHOLD                  131
+
+#define GCD_ACCEL_THRESHOLD               3
+#define JACOBI_BASE_METHOD                1
+
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             18
+#define GET_STR_PRECOMPUTE_THRESHOLD     23
+#define SET_STR_THRESHOLD              6093
+
+#define MUL_FFT_TABLE  { 464, 928, 1920, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          360
+#define MUL_FFT_THRESHOLD              2816
+
+#define SQR_FFT_TABLE  { 528, 1184, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          440
+#define SQR_FFT_THRESHOLD              2816
diff --git a/mpn/x86/p6/lshsub_n.asm b/mpn/x86/p6/lshsub_n.asm

new file mode 100644 (file)

index 0000000..a3086bd
--- /dev/null
+++ b/mpn/x86/p6/lshsub_n.asm
@@ -0,0 +1,158 @@
+dnl  Intel P6 mpn_lshsub_n -- mpn papillion support.
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C P6/13: 3.35 cycles/limb      (separate mpn_sub_n + mpn_lshift needs 4.12)
+
+C (1) The loop is is not scheduled in any way, and scheduling attempts have not
+C     improved speed on P6/13.  Presumably, the K7 will want scheduling, if it
+C     at all wants to use MMX.
+C (2) We could save a register by not alternatingly using eax and edx in the
+C     loop.
+
+define(`rp',   `%edi')
+define(`up',   `%esi')
+define(`vp',   `%ebx')
+define(`n',    `%ecx')
+define(`cnt',  `%mm7')
+
+ASM_START()
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_lshsub_n)
+       push    %edi
+       push    %esi
+       push    %ebx
+
+       mov     16(%esp), rp
+       mov     20(%esp), up
+       mov     24(%esp), vp
+       mov     28(%esp), n
+       mov     $32, %eax
+       sub     32(%esp), %eax
+       movd    %eax, cnt
+
+       lea     (up,n,4), up
+       lea     (vp,n,4), vp
+       lea     (rp,n,4), rp
+
+       neg     n
+       mov     n, %eax
+       and     $-8, n
+       and     $7, %eax
+       shl     %eax                            C eax = 2x
+       lea     (%eax,%eax,4), %edx             C edx = 10x
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       lea     L(ent)(%eax,%edx,2), %eax       C eax = 22x
+')
+
+       pxor    %mm1, %mm1
+       pxor    %mm0, %mm0
+
+       jmp     *%eax
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       lea     (%eax,%edx,2), %eax
+       add     $L(ent)-L(here), %eax
+       add     (%esp), %eax
+       ret_internal
+')
+
+L(end):        C compute (cy<<cnt) | (edx>>(32-cnt))
+       sbb     %eax, %eax
+       neg     %eax
+       mov     32(%esp), %ecx
+       shld    %cl, %edx, %eax
+
+       emms
+
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       ret
+       ALIGN(16)
+L(top):        jecxz   L(end)
+L(ent):        mov        0(up,n,4), %eax
+       sbb        0(vp,n,4), %eax
+       movd       %eax, %mm0
+       punpckldq  %mm0, %mm1
+       psrlq      %mm7, %mm1
+       movd       %mm1, 0(rp,n,4)
+
+       mov        4(up,n,4), %edx
+       sbb        4(vp,n,4), %edx
+       movd       %edx, %mm1
+       punpckldq  %mm1, %mm0
+       psrlq      %mm7, %mm0
+       movd       %mm0, 4(rp,n,4)
+
+       mov        8(up,n,4), %eax
+       sbb        8(vp,n,4), %eax
+       movd       %eax, %mm0
+       punpckldq  %mm0, %mm1
+       psrlq      %mm7, %mm1
+       movd       %mm1, 8(rp,n,4)
+
+       mov        12(up,n,4), %edx
+       sbb        12(vp,n,4), %edx
+       movd       %edx, %mm1
+       punpckldq  %mm1, %mm0
+       psrlq      %mm7, %mm0
+       movd       %mm0, 12(rp,n,4)
+
+       mov        16(up,n,4), %eax
+       sbb        16(vp,n,4), %eax
+       movd       %eax, %mm0
+       punpckldq  %mm0, %mm1
+       psrlq      %mm7, %mm1
+       movd       %mm1, 16(rp,n,4)
+
+       mov        20(up,n,4), %edx
+       sbb        20(vp,n,4), %edx
+       movd       %edx, %mm1
+       punpckldq  %mm1, %mm0
+       psrlq      %mm7, %mm0
+       movd       %mm0, 20(rp,n,4)
+
+       mov        24(up,n,4), %eax
+       sbb        24(vp,n,4), %eax
+       movd       %eax, %mm0
+       punpckldq  %mm0, %mm1
+       psrlq      %mm7, %mm1
+       movd       %mm1, 24(rp,n,4)
+
+       mov        28(up,n,4), %edx
+       sbb        28(vp,n,4), %edx
+       movd       %edx, %mm1
+       punpckldq  %mm1, %mm0
+       psrlq      %mm7, %mm0
+       movd       %mm0, 28(rp,n,4)
+
+       lea        8(n), n
+       jmp        L(top)
+
+EPILOGUE()
diff --git a/mpn/x86/p6/mmx/divrem_1.asm b/mpn/x86/p6/mmx/divrem_1.asm

new file mode 100644 (file)

index 0000000..8891f3a
--- /dev/null
+++ b/mpn/x86/p6/mmx/divrem_1.asm
@@ -0,0 +1,756 @@
+dnl  Intel Pentium-II mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part.
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                         mp_srcptr src, mp_size_t size,
+C                         mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C                          mp_srcptr src, mp_size_t size,
+C                          mp_limb_t divisor, mp_limb_t carry);
+C mp_limb_t mpn_preinv_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                                mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t inverse,
+C                                unsigned shift);
+C
+C This code is a lightly reworked version of mpn/x86/k7/mmx/divrem_1.asm,
+C see that file for some comments.  It's possible what's here can be improved.
+
+
+dnl  MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl  inverse method is used, rather than plain "divl"s.  Minimum value 1.
+dnl
+dnl  The different speeds of the integer and fraction parts means that using
+dnl  xsize+size isn't quite right.  The threshold wants to be a bit higher
+dnl  for the integer part and a bit lower for the fraction part.  (Or what's
+dnl  really wanted is to speed up the integer part!)
+dnl
+dnl  The threshold is set to make the integer part right.  At 4 limbs the
+dnl  div and mul are about the same there, but on the fractional part the
+dnl  mul is much faster.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_PREINV_SHIFT,   28)  dnl mpn_preinv_divrem_1
+defframe(PARAM_PREINV_INVERSE, 24)  dnl mpn_preinv_divrem_1
+defframe(PARAM_CARRY,  24)          dnl mpn_divrem_1c
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_SRC,    12)
+defframe(PARAM_XSIZE,  8)
+defframe(PARAM_DST,    4)
+
+defframe(SAVE_EBX,    -4)
+defframe(SAVE_ESI,    -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+
+defframe(VAR_NORM,    -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC,     -28)
+defframe(VAR_DST,     -32)
+defframe(VAR_DST_STOP,-36)
+
+deflit(STACK_SPACE, 36)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_preinv_divrem_1)
+deflit(`FRAME',0)
+       movl    PARAM_XSIZE, %ecx
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SIZE, %ebx
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edx
+
+       movl    -4(%esi,%ebx,4), %eax   C src high limb
+       xorl    %edi, %edi              C initial carry (if can't skip a div)
+
+       C
+
+       leal    8(%edx,%ecx,4), %edx    C &dst[xsize+2]
+       xor     %ecx, %ecx
+
+       movl    %edx, VAR_DST_STOP      C &dst[xsize+2]
+       cmpl    %ebp, %eax              C high cmp divisor
+
+       cmovc(  %eax, %edi)             C high is carry if high<divisor
+
+       cmovnc( %eax, %ecx)             C 0 if skip div, src high if not
+                                       C (the latter in case src==dst)
+
+       movl    %ecx, -12(%edx,%ebx,4)  C dst high limb
+
+       sbbl    $0, %ebx                C skip one division if high<divisor
+       movl    PARAM_PREINV_SHIFT, %ecx
+
+       leal    -8(%edx,%ebx,4), %edx   C &dst[xsize+size]
+       movl    $32, %eax
+
+       movl    %edx, VAR_DST           C &dst[xsize+size]
+
+       shll    %cl, %ebp               C d normalized
+       subl    %ecx, %eax
+       movl    %ecx, VAR_NORM
+
+       movd    %eax, %mm7              C rshift
+       movl    PARAM_PREINV_INVERSE, %eax
+       jmp     L(start_preinv)
+
+EPILOGUE()
+
+
+
+       ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+       movl    PARAM_CARRY, %edx
+
+       movl    PARAM_SIZE, %ecx
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       leal    -4(%edi,%ebx,4), %edi
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       C offset 0x31, close enough to aligned
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    $0, %edx                C initial carry (if can't skip a div)
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+       orl     %ecx, %ecx              C size
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
+       jz      L(no_skip_div)          C if size==0
+
+       movl    -4(%esi,%ecx,4), %eax   C src high limb
+       xorl    %esi, %esi
+       cmpl    %ebp, %eax              C high cmp divisor
+
+       cmovc(  %eax, %edx)             C high is carry if high<divisor
+
+       cmovnc( %eax, %esi)             C 0 if skip div, src high if not
+                                       C (the latter in case src==dst)
+
+       movl    %esi, (%edi,%ecx,4)     C dst high limb
+
+       sbbl    $0, %ecx                C size-1 if high<divisor
+       movl    PARAM_SRC, %esi         C reload
+L(no_skip_div):
+
+
+L(start_1c):
+       C eax
+       C ebx   xsize
+       C ecx   size
+       C edx   carry
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       leal    (%ebx,%ecx), %eax       C size+xsize
+       cmpl    $MUL_THRESHOLD, %eax
+       jae     L(mul_by_inverse)
+
+       orl     %ecx, %ecx
+       jz      L(divide_no_integer)
+
+L(divide_integer):
+       C eax   scratch (quotient)
+       C ebx   xsize
+       C ecx   counter
+       C edx   scratch (remainder)
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       movl    -4(%esi,%ecx,4), %eax
+
+       divl    %ebp
+
+       movl    %eax, (%edi,%ecx,4)
+       decl    %ecx
+       jnz     L(divide_integer)
+
+
+L(divide_no_integer):
+       movl    PARAM_DST, %edi
+       orl     %ebx, %ebx
+       jnz     L(divide_fraction)
+
+L(divide_done):
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBX, %ebx
+       movl    %edx, %eax
+
+       movl    SAVE_EBP, %ebp
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+
+L(divide_fraction):
+       C eax   scratch (quotient)
+       C ebx   counter
+       C ecx
+       C edx   scratch (remainder)
+       C esi
+       C edi   dst
+       C ebp   divisor
+
+       movl    $0, %eax
+
+       divl    %ebp
+
+       movl    %eax, -4(%edi,%ebx,4)
+       decl    %ebx
+       jnz     L(divide_fraction)
+
+       jmp     L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+       C eax
+       C ebx   xsize
+       C ecx   size
+       C edx   carry
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       leal    12(%edi), %ebx          C &dst[xsize+2], loop dst stop
+
+       movl    %ebx, VAR_DST_STOP
+       leal    4(%edi,%ecx,4), %edi    C &dst[xsize+size]
+
+       movl    %edi, VAR_DST
+       movl    %ecx, %ebx              C size
+
+       bsrl    %ebp, %ecx              C 31-l
+       movl    %edx, %edi              C carry
+
+       leal    1(%ecx), %eax           C 32-l
+       xorl    $31, %ecx               C l
+
+       movl    %ecx, VAR_NORM
+       movl    $-1, %edx
+
+       shll    %cl, %ebp               C d normalized
+       movd    %eax, %mm7
+
+       movl    $-1, %eax
+       subl    %ebp, %edx              C (b-d)-1 giving edx:eax = b*(b-d)-1
+
+       divl    %ebp                    C floor (b*(b-d)-1) / d
+
+L(start_preinv):
+       C eax   inverse
+       C ebx   size
+       C ecx   shift
+       C edx
+       C esi   src
+       C edi   carry
+       C ebp   divisor
+       C
+       C mm7   rshift
+
+       movl    %eax, VAR_INVERSE
+       orl     %ebx, %ebx              C size
+       leal    -12(%esi,%ebx,4), %eax  C &src[size-3]
+
+       movl    %eax, VAR_SRC
+       jz      L(start_zero)
+
+       movl    8(%eax), %esi           C src high limb
+       cmpl    $1, %ebx
+       jz      L(start_one)
+
+L(start_two_or_more):
+       movl    4(%eax), %edx           C src second highest limb
+
+       shldl(  %cl, %esi, %edi)        C n2 = carry,high << l
+
+       shldl(  %cl, %edx, %esi)        C n10 = high,second << l
+
+       cmpl    $2, %ebx
+       je      L(integer_two_left)
+       jmp     L(integer_top)
+
+
+L(start_one):
+       shldl(  %cl, %esi, %edi)        C n2 = carry,high << l
+
+       shll    %cl, %esi               C n10 = high << l
+       jmp     L(integer_one_left)
+
+
+L(start_zero):
+       C Can be here with xsize==0 if mpn_preinv_divrem_1 had size==1 and
+       C skipped a division.
+
+       shll    %cl, %edi               C n2 = carry << l
+       movl    %edi, %eax              C return value for zero_done
+       cmpl    $0, PARAM_XSIZE
+
+       je      L(zero_done)
+       jmp     L(fraction_some)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C This loop runs at about 25 cycles, which is probably sub-optimal, and
+C certainly more than the dependent chain would suggest.  A better loop, or
+C a better rough analysis of what's possible, would be welcomed.
+C
+C In the current implementation, the following successively dependent
+C micro-ops seem to exist.
+C
+C                     uops
+C              n2+n1   1   (addl)
+C              mul     5
+C              q1+1    3   (addl/adcl)
+C              mul     5
+C              sub     3   (subl/sbbl)
+C              addback 2   (cmov)
+C                     ---
+C                     19
+C
+C Lack of registers hinders explicit scheduling and it might be that the
+C normal out of order execution isn't able to hide enough under the mul
+C latencies.
+C
+C Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than
+C cmov (and takes one uop off the dependent chain).  A sarl/andl/addl
+C combination was tried for the addback (despite the fact it would lengthen
+C the dependent chain) but found to be no faster.
+
+
+       ALIGN(16)
+L(integer_top):
+       C eax   scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   scratch (src, dst)
+       C edx   scratch
+       C esi   n10
+       C edi   n2
+       C ebp   d
+       C
+       C mm0   scratch (src qword)
+       C mm7   rshift for normalization
+
+       movl    %esi, %eax
+       movl    %ebp, %ebx
+
+       sarl    $31, %eax          C -n1
+       movl    VAR_SRC, %ecx
+
+       andl    %eax, %ebx         C -n1 & d
+       negl    %eax               C n1
+
+       addl    %esi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+       addl    %edi, %eax         C n2+n1
+       movq    (%ecx), %mm0       C next src limb and the one below it
+
+       mull    VAR_INVERSE        C m*(n2+n1)
+
+       subl    $4, %ecx
+
+       movl    %ecx, VAR_SRC
+
+       C
+
+       C
+
+       addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+       movl    %ebp, %eax         C d
+       leal    1(%edi), %ebx      C n2+1
+
+       adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+       jz      L(q1_ff)
+
+       mull    %ebx               C (q1+1)*d
+
+       movl    VAR_DST, %ecx
+       psrlq   %mm7, %mm0
+
+       C
+
+       C
+
+       C
+
+       subl    %eax, %esi
+       movl    VAR_DST_STOP, %eax
+
+       sbbl    %edx, %edi         C n - (q1+1)*d
+       movl    %esi, %edi         C remainder -> n2
+       leal    (%ebp,%esi), %edx
+
+       cmovc(  %edx, %edi)        C n - q1*d if underflow from using q1+1
+       movd    %mm0, %esi
+
+       sbbl    $0, %ebx           C q
+       subl    $4, %ecx
+
+       movl    %ebx, (%ecx)
+       cmpl    %eax, %ecx
+
+       movl    %ecx, VAR_DST
+       jne     L(integer_top)
+
+
+L(integer_loop_done):
+
+
+C -----------------------------------------------------------------------------
+C
+C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz
+C q1_ff special case.  This make the code a bit smaller and simpler, and
+C costs only 2 cycles (each).
+
+L(integer_two_left):
+       C eax   scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   scratch (src, dst)
+       C edx   scratch
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+       C
+       C mm7   rshift
+
+
+       movl    %esi, %eax
+       movl    %ebp, %ebx
+
+       sarl    $31, %eax          C -n1
+       movl    PARAM_SRC, %ecx
+
+       andl    %eax, %ebx         C -n1 & d
+       negl    %eax               C n1
+
+       addl    %esi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+       addl    %edi, %eax         C n2+n1
+
+       mull    VAR_INVERSE        C m*(n2+n1)
+
+       movd    (%ecx), %mm0       C src low limb
+
+       movl    VAR_DST_STOP, %ecx
+
+       C
+
+       C
+
+       addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+       leal    1(%edi), %ebx      C n2+1
+       movl    %ebp, %eax         C d
+
+       adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+       sbbl    $0, %ebx
+
+       mull    %ebx               C (q1+1)*d
+
+       psllq   $32, %mm0
+
+       psrlq   %mm7, %mm0
+
+       C
+
+       C
+
+       subl    %eax, %esi
+
+       sbbl    %edx, %edi         C n - (q1+1)*d
+       movl    %esi, %edi         C remainder -> n2
+       leal    (%ebp,%esi), %edx
+
+       cmovc(  %edx, %edi)        C n - q1*d if underflow from using q1+1
+       movd    %mm0, %esi
+
+       sbbl    $0, %ebx           C q
+
+       movl    %ebx, -4(%ecx)
+
+
+C -----------------------------------------------------------------------------
+L(integer_one_left):
+       C eax   scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   scratch (dst)
+       C edx   scratch
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+       C
+       C mm7   rshift
+
+
+       movl    %esi, %eax
+       movl    %ebp, %ebx
+
+       sarl    $31, %eax          C -n1
+       movl    VAR_DST_STOP, %ecx
+
+       andl    %eax, %ebx         C -n1 & d
+       negl    %eax               C n1
+
+       addl    %esi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+       addl    %edi, %eax         C n2+n1
+
+       mull    VAR_INVERSE        C m*(n2+n1)
+
+       C
+
+       C
+
+       C
+
+       addl    %ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+       leal    1(%edi), %ebx      C n2+1
+       movl    %ebp, %eax         C d
+
+       C
+
+       adcl    %edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+       sbbl    $0, %ebx           C q1 if q1+1 overflowed
+
+       mull    %ebx
+
+       C
+
+       C
+
+       C
+
+       C
+
+       subl    %eax, %esi
+       movl    PARAM_XSIZE, %eax
+
+       sbbl    %edx, %edi         C n - (q1+1)*d
+       movl    %esi, %edi         C remainder -> n2
+       leal    (%ebp,%esi), %edx
+
+       cmovc(  %edx, %edi)        C n - q1*d if underflow from using q1+1
+
+       sbbl    $0, %ebx           C q
+
+       movl    %ebx, -8(%ecx)
+       subl    $8, %ecx
+
+
+
+       orl     %eax, %eax         C xsize
+       jnz     L(fraction_some)
+
+       movl    %edi, %eax
+L(fraction_done):
+       movl    VAR_NORM, %ecx
+L(zero_done):
+       movl    SAVE_EBP, %ebp
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EBX, %ebx
+       addl    $STACK_SPACE, %esp
+
+       shrl    %cl, %eax
+       emms
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+       C eax   (divisor)
+       C ebx   (q1+1 == 0)
+       C ecx
+       C edx
+       C esi   n10
+       C edi   n2
+       C ebp   divisor
+
+       movl    VAR_DST, %ecx
+       movl    VAR_DST_STOP, %edx
+       subl    $4, %ecx
+
+       movl    %ecx, VAR_DST
+       psrlq   %mm7, %mm0
+       leal    (%ebp,%esi), %edi       C n-q*d remainder -> next n2
+
+       movl    $-1, (%ecx)
+       movd    %mm0, %esi              C next n10
+
+       cmpl    %ecx, %edx
+       jne     L(integer_top)
+
+       jmp     L(integer_loop_done)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C In the current implementation, the following successively dependent
+C micro-ops seem to exist.
+C
+C                     uops
+C              mul     5
+C              q1+1    1   (addl)
+C              mul     5
+C              sub     3   (negl/sbbl)
+C              addback 2   (cmov)
+C                     ---
+C                     16
+C
+C The loop in fact runs at about 17.5 cycles.  Using a sarl/andl/addl for
+C the addback was found to be a touch slower.
+
+
+       ALIGN(16)
+L(fraction_some):
+       C eax
+       C ebx
+       C ecx
+       C edx
+       C esi
+       C edi   carry
+       C ebp   divisor
+
+       movl    PARAM_DST, %esi
+       movl    VAR_DST_STOP, %ecx      C &dst[xsize+2]
+       movl    %edi, %eax
+
+       subl    $8, %ecx                C &dst[xsize]
+
+
+       ALIGN(16)
+L(fraction_top):
+       C eax   n2, then scratch
+       C ebx   scratch (nadj, q1)
+       C ecx   dst, decrementing
+       C edx   scratch
+       C esi   dst stop point
+       C edi   n2
+       C ebp   divisor
+
+       mull    VAR_INVERSE     C m*n2
+
+       movl    %ebp, %eax      C d
+       subl    $4, %ecx        C dst
+       leal    1(%edi), %ebx
+
+       C
+
+       C
+
+       C
+
+       addl    %edx, %ebx      C 1 + high(n2<<32 + m*n2) = q1+1
+
+       mull    %ebx            C (q1+1)*d
+
+       C
+
+       C
+
+       C
+
+       C
+
+       negl    %eax            C low of n - (q1+1)*d
+
+       sbbl    %edx, %edi      C high of n - (q1+1)*d, caring only about carry
+       leal    (%ebp,%eax), %edx
+
+       cmovc(  %edx, %eax)     C n - q1*d if underflow from using q1+1
+
+       sbbl    $0, %ebx        C q
+       movl    %eax, %edi      C remainder->n2
+       cmpl    %esi, %ecx
+
+       movl    %ebx, (%ecx)    C previous q
+       jne     L(fraction_top)
+
+
+       jmp     L(fraction_done)
+
+EPILOGUE()
diff --git a/mpn/x86/p6/mmx/gmp-mparam.h b/mpn/x86/p6/mmx/gmp-mparam.h

new file mode 100644 (file)

index 0000000..f239422
--- /dev/null
+++ b/mpn/x86/p6/mmx/gmp-mparam.h
@@ -0,0 +1,187 @@
+/* Intel P6/mmx gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
+   value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard limit in
+   mpn/x86/p6/sqr_basecase.asm.  */
+
+
+/* 800 MHz P6 model 8 */
+
+#define MOD_1_NORM_THRESHOLD                 4
+#define MOD_1_UNNORM_THRESHOLD               5
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           49
+
+#define MUL_TOOM22_THRESHOLD                22
+#define MUL_TOOM33_THRESHOLD                73
+#define MUL_TOOM44_THRESHOLD               193
+#define MUL_TOOM6H_THRESHOLD               254
+#define MUL_TOOM8H_THRESHOLD               381
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 48
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                142
+#define SQR_TOOM6_THRESHOLD                258
+#define SQR_TOOM8_THRESHOLD                399
+
+#define MULMOD_BNM1_THRESHOLD               15
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    476, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    167,10}, {     95, 9}, {    199,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287, 8}, {    575,10}, \
+    {    159,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543, 8}, {   1087,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351, 9}, {    703,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    415, 9}, {    831,11}, \
+    {    223,10}, {    447,12}, {    127,11}, {    255,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
+    {   1215,11}, {    319,10}, {    671,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,10}, {    831,11}, {    447,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
+    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
+    {    703,10}, {   1407,11}, {    735,12}, {    383,11}, \
+    {    831,12}, {    447,11}, {    959,10}, {   1919,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,10}, {   2431,12}, {    639,11}, {   1343,12}, \
+    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    831,11}, {   1727,12}, {    959,11}, \
+    {   1919,14}, {    255,13}, {    511,12}, {   1215,11}, \
+    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
+    {    767,12}, {   1727,13}, {    895,12}, {   1919,11}, \
+    {   3839,14}, {    511,13}, {   1023,12}, {   2111,13}, \
+    {   1151,12}, {   2431,13}, {   1279,12}, {   2559,13}, \
+    {   1407,12}, {   2943,14}, {    767,13}, {   1663,12}, \
+    {   3327,13}, {   1919,12}, {   3839,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 160
+#define MUL_FFT_THRESHOLD                 7040
+
+#define SQR_FFT_MODF_THRESHOLD             376  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    376, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 9}, {    135,10}, {     79, 9}, {    167,10}, \
+    {     95, 9}, {    191, 8}, {    383,10}, {    111,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511, 9}, \
+    {    271,10}, {    143, 9}, {    287, 8}, {    575, 9}, \
+    {    303, 8}, {    607,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    479,12}, {    127,11}, {    255,10}, {    543, 9}, \
+    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
+    {    319,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    479,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    671,10}, {   1343,11}, {    703,10}, \
+    {   1407,11}, {    735,12}, {    383,11}, {    831,12}, \
+    {    447,11}, {    959,10}, {   1919,13}, {    255,12}, \
+    {    511,11}, {   1087,12}, {    575,11}, {   1215,10}, \
+    {   2431,12}, {    639,11}, {   1343,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    831,11}, {   1727,12}, \
+    {    959,11}, {   1919,14}, {    255,13}, {    511,12}, \
+    {   1215,11}, {   2431,13}, {    639,12}, {   1471,11}, \
+    {   2943,13}, {    767,12}, {   1727,13}, {    895,12}, \
+    {   1919,11}, {   3839,14}, {    511,13}, {   1023,12}, \
+    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,12}, \
+    {   2943,14}, {    767,13}, {   1535,12}, {   3071,13}, \
+    {   1663,12}, {   3455,13}, {   1919,12}, {   3839,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 161
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             8
+#define MULLO_DC_THRESHOLD                  60
+#define MULLO_MUL_N_THRESHOLD            13765
+
+#define DC_DIV_QR_THRESHOLD                 83
+#define DC_DIVAPPR_Q_THRESHOLD             246
+#define DC_BDIV_QR_THRESHOLD                76
+#define DC_BDIV_Q_THRESHOLD                175
+
+#define INV_MULMOD_BNM1_THRESHOLD           82
+#define INV_NEWTON_THRESHOLD               268
+#define INV_APPR_THRESHOLD                 250
+
+#define BINV_NEWTON_THRESHOLD              276
+#define REDC_1_TO_REDC_N_THRESHOLD          74
+
+#define MU_DIV_QR_THRESHOLD               1442
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD              132
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define MATRIX22_STRASSEN_THRESHOLD         18
+#define HGCD_THRESHOLD                     121
+#define GCD_DC_THRESHOLD                   478
+#define GCDEXT_DC_THRESHOLD                361
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        26
+#define SET_STR_DC_THRESHOLD               272
+#define SET_STR_PRECOMPUTE_THRESHOLD      1074
diff --git a/mpn/x86/p6/mmx/lshift.asm b/mpn/x86/p6/mmx/lshift.asm

new file mode 100644 (file)

index 0000000..e325b67
--- /dev/null
+++ b/mpn/x86/p6/mmx/lshift.asm
@@ -0,0 +1,27 @@
+dnl  Intel Pentium-II mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  The P55 code runs well on P-II/III, but could stand some minor tweaks
+dnl  at some stage probably.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86/pentium/mmx/lshift.asm')
diff --git a/mpn/x86/p6/mmx/popham.asm b/mpn/x86/p6/mmx/popham.asm

new file mode 100644 (file)

index 0000000..421daa5
--- /dev/null
+++ b/mpn/x86/p6/mmx/popham.asm
@@ -0,0 +1,28 @@
+dnl  Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and
+dnl  hamming distance.
+
+dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb (approx)
+
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/mpn/x86/p6/mmx/rshift.asm b/mpn/x86/p6/mmx/rshift.asm

new file mode 100644 (file)

index 0000000..b1543cd
--- /dev/null
+++ b/mpn/x86/p6/mmx/rshift.asm
@@ -0,0 +1,27 @@
+dnl  Intel Pentium-II mpn_rshift -- mpn left shift.
+
+dnl  Copyright 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  The P55 code runs well on P-II/III, but could stand some minor tweaks
+dnl  at some stage probably.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86/pentium/mmx/rshift.asm')
diff --git a/mpn/x86/p6/mod_34lsub1.asm b/mpn/x86/p6/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..5e854b7
--- /dev/null
+++ b/mpn/x86/p6/mod_34lsub1.asm
@@ -0,0 +1,179 @@
+dnl  Intel P6 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: 2.0 cycles/limb
+
+C TODO
+C  Experiments with more unrolling indicate that 1.5 c/l is possible on P6-13
+C  with the current carry handling scheme.
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+C Groups of three limbs are handled, with carry bits from 0mod3 into 1mod3
+C into 2mod3, but at that point going into a separate carries total so we
+C don't keep the carry flag live across the loop control.  Avoiding decl
+C lets us get to 2.0 c/l, as compared to the generic x86 code at 3.66.
+C
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX, `PARAM_SIZE')
+define(SAVE_ESI, `PARAM_SRC')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %edx
+
+       subl    $2, %ecx                C size-2
+       movl    (%edx), %eax            C src[0]
+       ja      L(three_or_more)
+       jb      L(one)
+
+       C size==2
+
+       movl    4(%edx), %ecx           C src[1]
+
+       movl    %eax, %edx              C src[0]
+       shrl    $24, %eax               C src[0] high
+
+       andl    $0xFFFFFF, %edx         C src[0] low
+
+       addl    %edx, %eax
+       movl    %ecx, %edx              C src[1]
+       shrl    $16, %ecx               C src[1] high
+
+       andl    $0xFFFF, %edx
+       addl    %ecx, %eax
+
+       shll    $8, %edx                C src[1] low
+
+       addl    %edx, %eax
+L(one):
+       ret
+
+
+L(three_or_more):
+       C eax   src[0], initial acc 0mod3
+       C ebx
+       C ecx   size-2
+       C edx   src
+       C esi
+       C edi
+       C ebp
+
+       movl    %ebx, SAVE_EBX
+       movl    4(%edx), %ebx           C src[1], initial 1mod3
+       subl    $3, %ecx                C size-5
+
+       movl    %esi, SAVE_ESI
+       movl    8(%edx), %esi           C src[2], initial 2mod3
+
+       pushl   %edi    FRAME_pushl()
+       movl    $0, %edi                C initial carries 0mod3
+       jng     L(done)                 C if size < 6
+
+
+L(top):
+       C eax   acc 0mod3
+       C ebx   acc 1mod3
+       C ecx   counter, limbs
+       C edx   src
+       C esi   acc 2mod3
+       C edi   carrys into 0mod3
+       C ebp
+
+       addl    12(%edx), %eax
+       adcl    16(%edx), %ebx
+       adcl    20(%edx), %esi
+       leal    12(%edx), %edx
+       adcl    $0, %edi
+
+       subl    $3, %ecx
+       jg      L(top)                  C at least 3 more to process
+
+
+L(done):
+       C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs respectively
+       cmpl    $-1, %ecx
+       jl      L(done_0)               C if -2, meaning 0 more limbs
+
+       C 1 or 2 more limbs
+       movl    $0, %ecx
+       je      L(done_1)               C if -1, meaning 1 more limb only
+       movl    16(%edx), %ecx
+L(done_1):
+       addl    12(%edx), %eax          C 0mod3
+       adcl    %ecx, %ebx              C 1mod3
+       adcl    $0, %esi                C 2mod3
+       adcl    $0, %edi                C carries 0mod3
+
+L(done_0):
+       C eax   acc 0mod3
+       C ebx   acc 1mod3
+       C ecx
+       C edx
+       C esi   acc 2mod3
+       C edi   carries 0mod3
+       C ebp
+
+       movl    %eax, %ecx              C 0mod3
+       shrl    $24, %eax               C 0mod3 high initial total
+
+       andl    $0xFFFFFF, %ecx         C 0mod3 low
+       movl    %edi, %edx              C carries
+       shrl    $24, %edi               C carries high
+
+       addl    %ecx, %eax              C add 0mod3 low
+       andl    $0xFFFFFF, %edx         C carries 0mod3 low
+       movl    %ebx, %ecx              C 1mod3
+
+       shrl    $16, %ebx               C 1mod3 high
+       addl    %edi, %eax              C add carries high
+       addl    %edx, %eax              C add carries 0mod3 low
+
+       andl    $0xFFFF, %ecx           C 1mod3 low mask
+       addl    %ebx, %eax              C add 1mod3 high
+       movl    SAVE_EBX, %ebx
+
+       shll    $8, %ecx                C 1mod3 low
+       movl    %esi, %edx              C 2mod3
+       popl    %edi    FRAME_popl()
+
+       shrl    $8, %esi                C 2mod3 high
+       andl    $0xFF, %edx             C 2mod3 low mask
+       addl    %ecx, %eax              C add 1mod3 low
+
+       shll    $16, %edx               C 2mod3 low
+       addl    %esi, %eax              C add 2mod3 high
+       movl    SAVE_ESI, %esi
+
+       addl    %edx, %eax              C add 2mod3 low
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/mode1o.asm b/mpn/x86/p6/mode1o.asm

new file mode 100644 (file)

index 0000000..7361164
--- /dev/null
+++ b/mpn/x86/p6/mode1o.asm
@@ -0,0 +1,158 @@
+dnl  Intel P6 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: 10.0 cycles/limb
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C It's not worth skipping a step at the end when high<divisor since the main
+C loop is only 10 cycles.
+
+defframe(PARAM_CARRY,  16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+
+dnl  Not enough room under modexact_1 to make these re-use the parameter
+dnl  space, unfortunately.
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+deflit(STACK_SPACE, 12)
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1c_odd)
+deflit(`FRAME',0)
+
+       movl    PARAM_CARRY, %ecx
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1_odd)
+deflit(`FRAME',0)
+
+       xorl    %ecx, %ecx
+L(start_1c):
+       movl    PARAM_DIVISOR, %eax
+
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       shrl    %eax                    C d/2
+       movl    %edi, SAVE_EDI
+
+       andl    $127, %eax
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edi)
+       movzbl  (%eax,%edi), %edi               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %edi  C inv 8 bits
+')
+
+       xorl    %edx, %edx              C initial extra carry
+       leal    (%edi,%edi), %eax       C 2*inv
+
+       imull   %edi, %edi              C inv*inv
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SIZE, %ebx
+
+       imull   PARAM_DIVISOR, %edi     C inv*inv*d
+
+       subl    %edi, %eax              C inv = 2*inv - inv*inv*d
+       leal    (%eax,%eax), %edi       C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+
+       imull   PARAM_DIVISOR, %eax     C inv*inv*d
+
+       leal    (%esi,%ebx,4), %esi     C src end
+       negl    %ebx                    C -size
+
+       subl    %eax, %edi              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C d*inv == 1 mod 2^GMP_LIMB_BITS
+       movl    PARAM_DIVISOR, %eax
+       imull   %edi, %eax
+       cmpl    $1, %eax')
+
+
+C The dependent chain here is
+C
+C      subl    %edx, %eax       1
+C      imull   %edi, %eax       4
+C      mull    PARAM_DIVISOR    5
+C                             ----
+C       total                  10
+C
+C and this is the measured speed.  No special scheduling is necessary, out
+C of order execution hides the load latency.
+
+L(top):
+       C eax   scratch (src limb)
+       C ebx   counter, limbs, negative
+       C ecx   carry bit, 0 or 1
+       C edx   carry limb, high of last product
+       C esi   &src[size]
+       C edi   inverse
+       C ebp
+
+       movl    (%esi,%ebx,4), %eax
+       subl    %ecx, %eax
+
+       sbbl    %ecx, %ecx
+       subl    %edx, %eax
+
+       sbbl    $0, %ecx
+
+       imull   %edi, %eax
+
+       negl    %ecx
+
+       mull    PARAM_DIVISOR
+
+       incl    %ebx
+       jnz     L(top)
+
+
+       movl    SAVE_ESI, %esi
+       leal    (%ecx,%edx), %eax
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBX, %ebx
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/mul_basecase.asm b/mpn/x86/p6/mul_basecase.asm

new file mode 100644 (file)

index 0000000..fc1afbd
--- /dev/null
+++ b/mpn/x86/p6/mul_basecase.asm
@@ -0,0 +1,596 @@
+dnl  Intel P6 mpn_mul_basecase -- multiply two mpn numbers.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: approx 6.5 cycles per cross product (16 limbs/loop unrolling).
+
+
+dnl  P6 UNROLL_COUNT cycles/product (approx)
+dnl           8           7
+dnl          16           6.5
+dnl          32           6.4
+dnl  Maximum possible with the current code is 32.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xsize,
+C                        mp_srcptr yp, mp_size_t ysize);
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() startup
+C calculations only once.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP,   16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_XSIZE, %ecx
+
+       movl    PARAM_YP, %eax
+
+       movl    PARAM_XP, %edx
+
+       movl    (%eax), %eax            C yp[0]
+       cmpl    $2, %ecx
+       ja      L(xsize_more_than_two)
+       je      L(two_by_something)
+
+
+       C one limb by one limb
+
+       mull    (%edx)
+
+       movl    PARAM_WP, %ecx
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+deflit(`FRAME',0)
+
+dnl  re-use parameter space
+define(SAVE_EBX, `PARAM_XSIZE')
+define(SAVE_ESI, `PARAM_YSIZE')
+
+       movl    %ebx, SAVE_EBX
+       cmpl    $1, PARAM_YSIZE
+       movl    %eax, %ecx              C yp[0]
+
+       movl    %esi, SAVE_ESI          C save esi
+       movl    PARAM_WP, %ebx
+       movl    %edx, %esi              C xp
+
+       movl    (%edx), %eax            C xp[0]
+       jne     L(two_by_two)
+
+
+       C two limbs by one limb
+       C
+       C eax   xp[0]
+       C ebx   wp
+       C ecx   yp[0]
+       C edx
+       C esi   xp
+
+       mull    %ecx
+
+       movl    %eax, (%ebx)
+       movl    4(%esi), %eax
+       movl    %edx, %esi              C carry
+
+       mull    %ecx
+
+       addl    %eax, %esi
+
+       movl    %esi, 4(%ebx)
+       movl    SAVE_ESI, %esi
+
+       adcl    $0, %edx
+
+       movl    %edx, 8(%ebx)
+       movl    SAVE_EBX, %ebx
+
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+
+       ALIGN(16)
+L(two_by_two):
+       C eax   xp[0]
+       C ebx   wp
+       C ecx   yp[0]
+       C edx
+       C esi   xp
+       C edi
+       C ebp
+
+dnl  more parameter space re-use
+define(SAVE_EDI, `PARAM_WP')
+
+       mull    %ecx            C xp[0] * yp[0]
+
+       movl    %edi, SAVE_EDI
+       movl    %edx, %edi      C carry, for wp[1]
+
+       movl    %eax, (%ebx)
+       movl    4(%esi), %eax
+
+       mull    %ecx            C xp[1] * yp[0]
+
+       addl    %eax, %edi
+       movl    PARAM_YP, %ecx
+
+       adcl    $0, %edx
+       movl    4(%ecx), %ecx   C yp[1]
+
+       movl    %edi, 4(%ebx)
+       movl    4(%esi), %eax   C xp[1]
+       movl    %edx, %edi      C carry, for wp[2]
+
+       mull    %ecx            C xp[1] * yp[1]
+
+       addl    %eax, %edi
+       movl    (%esi), %eax    C xp[0]
+
+       adcl    $0, %edx
+       movl    %edx, %esi      C carry, for wp[3]
+
+       mull    %ecx            C xp[0] * yp[1]
+
+       addl    %eax, 4(%ebx)
+       movl    %esi, %eax
+
+       adcl    %edx, %edi
+       movl    SAVE_ESI, %esi
+
+       movl    %edi, 8(%ebx)
+
+       adcl    $0, %eax
+       movl    SAVE_EDI, %edi
+
+       movl    %eax, 12(%ebx)
+       movl    SAVE_EBX, %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(xsize_more_than_two):
+
+C The first limb of yp is processed with a simple mpn_mul_1 loop running at
+C about 6.2 c/l.  Unrolling this doesn't seem worthwhile since it's only run
+C once (whereas the addmul_1 below is run ysize-1 many times).  A call to
+C mpn_mul_1 would be slowed down by the parameter pushing and popping etc,
+C and doesn't seem likely to be worthwhile on the typical sizes reaching
+C here from the Karatsuba code.
+
+       C eax   yp[0]
+       C ebx
+       C ecx   xsize
+       C edx   xp
+       C esi
+       C edi
+       C ebp
+
+defframe(`SAVE_EBX',    -4)
+defframe(`SAVE_ESI',    -8)
+defframe(`SAVE_EDI',   -12)
+defframe(`SAVE_EBP',   -16)
+defframe(VAR_COUNTER,  -20)  dnl for use in the unroll case
+defframe(VAR_ADJUST,   -24)
+defframe(VAR_JMP,      -28)
+defframe(VAR_SWAP,     -32)
+defframe(VAR_XP_LOW,   -36)
+deflit(STACK_SPACE, 36)
+
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_WP, %edi
+
+       movl    %ebx, SAVE_EBX
+
+       movl    %ebp, SAVE_EBP
+       movl    %eax, %ebp
+
+       movl    %esi, SAVE_ESI
+       xorl    %ebx, %ebx
+       leal    (%edx,%ecx,4), %esi     C xp end
+
+       leal    (%edi,%ecx,4), %edi     C wp end of mul1
+       negl    %ecx
+
+
+L(mul1):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter, negative
+       C edx   scratch
+       C esi   xp end
+       C edi   wp end of mul1
+       C ebp   multiplier
+
+       movl    (%esi,%ecx,4), %eax
+
+       mull    %ebp
+
+       addl    %ebx, %eax
+       movl    %eax, (%edi,%ecx,4)
+       movl    $0, %ebx
+
+       adcl    %edx, %ebx
+       incl    %ecx
+       jnz     L(mul1)
+
+
+       movl    PARAM_YSIZE, %edx
+
+       movl    %ebx, (%edi)            C final carry
+       movl    PARAM_XSIZE, %ecx
+       decl    %edx
+
+       jz      L(done)                 C if ysize==1
+
+       cmpl    $UNROLL_THRESHOLD, %ecx
+       movl    PARAM_YP, %eax
+       jae     L(unroll)
+
+
+C -----------------------------------------------------------------------------
+       C simple addmul looping
+       C
+       C eax   yp
+       C ebx
+       C ecx   xsize
+       C edx   ysize-1
+       C esi   xp end
+       C edi   wp end of mul1
+       C ebp
+
+       leal    4(%eax,%edx,4), %ebp    C yp end
+       negl    %ecx
+       negl    %edx
+
+       movl    %edx, PARAM_YSIZE       C -(ysize-1)
+       movl    (%esi,%ecx,4), %eax     C xp low limb
+       incl    %ecx
+
+       movl    %ecx, PARAM_XSIZE       C -(xsize-1)
+       xorl    %ebx, %ebx              C initial carry
+
+       movl    %ebp, PARAM_YP
+       movl    (%ebp,%edx,4), %ebp     C yp second lowest limb - multiplier
+       jmp     L(simple_outer_entry)
+
+
+L(simple_outer_top):
+       C ebp   ysize counter, negative
+
+       movl    PARAM_YP, %edx
+
+       movl    PARAM_XSIZE, %ecx       C -(xsize-1)
+       xorl    %ebx, %ebx              C carry
+
+       movl    %ebp, PARAM_YSIZE
+       addl    $4, %edi                C next position in wp
+
+       movl    (%edx,%ebp,4), %ebp     C yp limb - multiplier
+
+       movl    -4(%esi,%ecx,4), %eax   C xp low limb
+
+
+L(simple_outer_entry):
+
+L(simple_inner_top):
+       C eax   xp limb
+       C ebx   carry limb
+       C ecx   loop counter (negative)
+       C edx   scratch
+       C esi   xp end
+       C edi   wp end
+       C ebp   multiplier
+
+       mull    %ebp
+
+       addl    %eax, %ebx
+       adcl    $0, %edx
+
+       addl    %ebx, (%edi,%ecx,4)
+       movl    (%esi,%ecx,4), %eax
+       adcl    $0, %edx
+
+       incl    %ecx
+       movl    %edx, %ebx
+       jnz     L(simple_inner_top)
+
+
+       C separate code for last limb so outer loop counter handling can be
+       C interleaved
+
+       mull    %ebp
+
+       movl    PARAM_YSIZE, %ebp
+       addl    %eax, %ebx
+
+       adcl    $0, %edx
+
+       addl    %ebx, (%edi)
+
+       adcl    $0, %edx
+       incl    %ebp
+
+       movl    %edx, 4(%edi)
+       jnz     L(simple_outer_top)
+
+
+L(done):
+       movl    SAVE_EBX, %ebx
+
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBP, %ebp
+       addl    $FRAME, %esp
+
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The unrolled loop is the same as in mpn_addmul_1, see that code for some
+C comments.
+C
+C VAR_ADJUST is the negative of how many limbs the leals in the inner loop
+C increment xp and wp.  This is used to adjust xp and wp, and is rshifted to
+C given an initial VAR_COUNTER at the top of the outer loop.
+C
+C VAR_COUNTER is for the unrolled loop, running from VAR_ADJUST/UNROLL_COUNT
+C up to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C VAR_SWAP is 0 if xsize odd or 0xFFFFFFFF if xsize even, used to swap the
+C initial ebx and ecx on entry to the unrolling.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C The trick with the VAR_ADJUST value means it's only necessary to do one
+C fetch in the outer loop to take care of xp, wp and the inner loop counter.
+
+
+L(unroll):
+       C eax   yp
+       C ebx
+       C ecx   xsize
+       C edx   ysize-1
+       C esi   xp end
+       C edi   wp end of mul1
+       C ebp
+
+       movl    PARAM_XP, %esi
+
+       movl    4(%eax), %ebp           C multiplier (yp second limb)
+       leal    4(%eax,%edx,4), %eax    C yp adjust for ysize indexing
+
+       movl    %eax, PARAM_YP
+       movl    PARAM_WP, %edi
+       negl    %edx
+
+       movl    %edx, PARAM_YSIZE
+       leal    UNROLL_COUNT-2(%ecx), %ebx      C (xsize-1)+UNROLL_COUNT-1
+       decl    %ecx                            C xsize-1
+
+       movl    (%esi), %eax            C xp low limb
+       andl    $-UNROLL_MASK-1, %ebx
+       negl    %ecx                    C -(xsize-1)
+
+       negl    %ebx
+       andl    $UNROLL_MASK, %ecx
+
+       movl    %ebx, VAR_ADJUST
+       movl    %ecx, %edx
+       shll    $4, %ecx
+
+       movl    %eax, VAR_XP_LOW
+       sarl    $UNROLL_LOG2, %ebx
+       negl    %edx
+
+       C 15 code bytes per limb
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(unroll_here):
+',`
+       leal    L(unroll_inner_entry) (%ecx,%edx,1), %ecx
+')
+
+       movl    %ecx, VAR_JMP
+       movl    %edx, %ecx
+       shll    $31, %edx
+
+       sarl    $31, %edx               C 0 or -1 as xsize odd or even
+       leal    4(%edi,%ecx,4), %edi    C wp and xp, adjust for unrolling,
+       leal    4(%esi,%ecx,4), %esi    C  and start at second limb
+
+       movl    %edx, VAR_SWAP
+       jmp     L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+       C See mpn/x86/README about old gas bugs
+       leal    (%ecx,%edx,1), %ecx
+       addl    $L(unroll_inner_entry)-L(unroll_here), %ecx
+       addl    (%esp), %ecx
+       ret_internal
+')
+
+
+C --------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll_outer_top):
+       C eax
+       C ebx
+       C ecx
+       C edx
+       C esi   xp + offset
+       C edi   wp + offset
+       C ebp   ysize counter, negative
+
+       movl    VAR_ADJUST, %ebx
+       movl    PARAM_YP, %edx
+
+       movl    VAR_XP_LOW, %eax
+       movl    %ebp, PARAM_YSIZE       C store incremented ysize counter
+
+       leal    eval(UNROLL_BYTES + 4) (%edi,%ebx,4), %edi
+       leal    (%esi,%ebx,4), %esi
+       sarl    $UNROLL_LOG2, %ebx
+
+       movl    (%edx,%ebp,4), %ebp     C yp next multiplier
+
+L(unroll_outer_entry):
+       mull    %ebp
+
+       movl    %ebx, VAR_COUNTER
+       movl    %edx, %ebx              C carry high
+       movl    %eax, %ecx              C carry low
+
+       xorl    %edx, %eax
+       movl    VAR_JMP, %edx
+
+       andl    VAR_SWAP, %eax
+
+       xorl    %eax, %ebx              C carries other way for odd index
+       xorl    %eax, %ecx
+
+       jmp     *%edx
+
+
+C -----------------------------------------------------------------------------
+
+L(unroll_inner_top):
+       C eax   xp limb
+       C ebx   carry high
+       C ecx   carry low
+       C edx   scratch
+       C esi   xp+8
+       C edi   wp
+       C ebp   yp multiplier limb
+       C
+       C VAR_COUNTER  loop counter, negative
+       C
+       C 15 bytes each limb
+
+       addl    $UNROLL_BYTES, %edi
+
+L(unroll_inner_entry):
+
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+       deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+       deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl,   disp0,(%esi), %eax)
+       mull    %ebp
+Zdisp( addl,   %ecx, disp0,(%edi))
+       adcl    %eax, %ebx              C new carry low
+       movl    %edx, %ecx
+       adcl    $0, %ecx                C new carry high
+
+       movl    disp1(%esi), %eax
+       mull    %ebp
+       addl    %ebx, disp1(%edi)
+       adcl    %eax, %ecx              C new carry low
+       movl    %edx, %ebx
+       adcl    $0, %ebx                C new carry high
+')
+
+
+       incl    VAR_COUNTER
+       leal    UNROLL_BYTES(%esi), %esi
+       jnz     L(unroll_inner_top)
+
+
+       C eax
+       C ebx   carry high
+       C ecx   carry low
+       C edx
+       C esi
+       C edi   wp, pointing at second last limb)
+       C ebp
+
+deflit(`disp0',        eval(UNROLL_BYTES ifelse(UNROLL_BYTES,256,-128)))
+deflit(`disp1', eval(disp0 + 4))
+
+       movl    PARAM_YSIZE, %ebp
+       addl    %ecx, disp0(%edi)       C carry low
+
+       adcl    $0, %ebx
+       incl    %ebp
+
+       movl    %ebx, disp1(%edi)       C carry high
+       jnz     L(unroll_outer_top)
+
+
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EBP, %ebp
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBX, %ebx
+       addl    $FRAME, %esp
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/p6/p3mmx/popham.asm b/mpn/x86/p6/p3mmx/popham.asm

new file mode 100644 (file)

index 0000000..2f58968
--- /dev/null
+++ b/mpn/x86/p6/p3mmx/popham.asm
@@ -0,0 +1,31 @@
+dnl  Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and
+dnl  hamming distance.
+
+dnl  Copyright 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           popcount        hamdist
+C P3 generic                   6.5             7
+C P3 model 9  (Banias)         ?               ?
+C P3 model 13 (Dothan)         5.75            6
+
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/mpn/x86/p6/sqr_basecase.asm b/mpn/x86/p6/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..0b690af
--- /dev/null
+++ b/mpn/x86/p6/sqr_basecase.asm
@@ -0,0 +1,638 @@
+dnl  Intel P6 mpn_sqr_basecase -- square an mpn number.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular
+C     product (measured on the speed difference between 20 and 40 limbs,
+C     which is the Karatsuba recursing range).
+
+
+dnl  These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for
+dnl  a description.  The only difference here is that UNROLL_COUNT can go up
+dnl  to 64 (not 63) making SQR_TOOM2_THRESHOLD_MAX 67.
+
+deflit(SQR_TOOM2_THRESHOLD_MAX, 67)
+
+ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
+`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+
+m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the given size
+C is small.
+C
+C The code size might look a bit excessive, but not all of it is executed so
+C it won't all get into the code cache.  The 1x1, 2x2 and 3x3 special cases
+C clearly apply only to those sizes; mid sizes like 10x10 only need part of
+C the unrolled addmul; and big sizes like 40x40 that do use the full
+C unrolling will least be making good use of it, because 40x40 will take
+C something like 7000 cycles.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %edx
+
+       movl    PARAM_SRC, %eax
+
+       cmpl    $2, %edx
+       movl    PARAM_DST, %ecx
+       je      L(two_limbs)
+
+       movl    (%eax), %eax
+       ja      L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+       C eax   src limb
+       C ebx
+       C ecx   dst
+       C edx
+
+       mull    %eax
+
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(two_limbs):
+       C eax   src
+       C ebx
+       C ecx   dst
+       C edx
+
+defframe(SAVE_ESI, -4)
+defframe(SAVE_EBX, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(`STACK_SPACE',16)
+
+       subl    $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    %eax, %esi
+       movl    (%eax), %eax
+
+       mull    %eax            C src[0]^2
+
+       movl    %eax, (%ecx)    C dst[0]
+       movl    4(%esi), %eax
+
+       movl    %ebx, SAVE_EBX
+       movl    %edx, %ebx      C dst[1]
+
+       mull    %eax            C src[1]^2
+
+       movl    %edi, SAVE_EDI
+       movl    %eax, %edi      C dst[2]
+       movl    (%esi), %eax
+
+       movl    %ebp, SAVE_EBP
+       movl    %edx, %ebp      C dst[3]
+
+       mull    4(%esi)         C src[0]*src[1]
+
+       addl    %eax, %ebx
+       movl    SAVE_ESI, %esi
+
+       adcl    %edx, %edi
+
+       adcl    $0, %ebp
+       addl    %ebx, %eax
+       movl    SAVE_EBX, %ebx
+
+       adcl    %edi, %edx
+       movl    SAVE_EDI, %edi
+
+       adcl    $0, %ebp
+
+       movl    %eax, 4(%ecx)
+
+       movl    %ebp, 12(%ecx)
+       movl    SAVE_EBP, %ebp
+
+       movl    %edx, 8(%ecx)
+       addl    $FRAME, %esp
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(three_or_more):
+       C eax   src low limb
+       C ebx
+       C ecx   dst
+       C edx   size
+deflit(`FRAME',0)
+
+       pushl   %esi    defframe_pushl(`SAVE_ESI')
+       cmpl    $4, %edx
+
+       movl    PARAM_SRC, %esi
+       jae     L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+
+       C eax   src low limb
+       C ebx
+       C ecx   dst
+       C edx
+       C esi   src
+       C edi
+       C ebp
+
+       pushl   %ebp    defframe_pushl(`SAVE_EBP')
+       pushl   %edi    defframe_pushl(`SAVE_EDI')
+
+       mull    %eax            C src[0] ^ 2
+
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+
+       movl    4(%esi), %eax
+       xorl    %ebp, %ebp
+
+       mull    %eax            C src[1] ^ 2
+
+       movl    %eax, 8(%ecx)
+       movl    %edx, 12(%ecx)
+       movl    8(%esi), %eax
+
+       pushl   %ebx    defframe_pushl(`SAVE_EBX')
+
+       mull    %eax            C src[2] ^ 2
+
+       movl    %eax, 16(%ecx)
+       movl    %edx, 20(%ecx)
+
+       movl    (%esi), %eax
+
+       mull    4(%esi)         C src[0] * src[1]
+
+       movl    %eax, %ebx
+       movl    %edx, %edi
+
+       movl    (%esi), %eax
+
+       mull    8(%esi)         C src[0] * src[2]
+
+       addl    %eax, %edi
+       movl    %edx, %ebp
+
+       adcl    $0, %ebp
+       movl    4(%esi), %eax
+
+       mull    8(%esi)         C src[1] * src[2]
+
+       xorl    %esi, %esi
+       addl    %eax, %ebp
+
+       C eax
+       C ebx   dst[1]
+       C ecx   dst
+       C edx   dst[4]
+       C esi   zero, will be dst[5]
+       C edi   dst[2]
+       C ebp   dst[3]
+
+       adcl    $0, %edx
+       addl    %ebx, %ebx
+
+       adcl    %edi, %edi
+
+       adcl    %ebp, %ebp
+
+       adcl    %edx, %edx
+       movl    4(%ecx), %eax
+
+       adcl    $0, %esi
+       addl    %ebx, %eax
+
+       movl    %eax, 4(%ecx)
+       movl    8(%ecx), %eax
+
+       adcl    %edi, %eax
+       movl    12(%ecx), %ebx
+
+       adcl    %ebp, %ebx
+       movl    16(%ecx), %edi
+
+       movl    %eax, 8(%ecx)
+       movl    SAVE_EBP, %ebp
+
+       movl    %ebx, 12(%ecx)
+       movl    SAVE_EBX, %ebx
+
+       adcl    %edx, %edi
+       movl    20(%ecx), %eax
+
+       movl    %edi, 16(%ecx)
+       movl    SAVE_EDI, %edi
+
+       adcl    %esi, %eax      C no carry out of this
+       movl    SAVE_ESI, %esi
+
+       movl    %eax, 20(%ecx)
+       addl    $FRAME, %esp
+
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+defframe(VAR_COUNTER,-20)
+defframe(VAR_JMP,    -24)
+deflit(`STACK_SPACE',24)
+
+L(four_or_more):
+       C eax   src low limb
+       C ebx
+       C ecx
+       C edx   size
+       C esi   src
+       C edi
+       C ebp
+deflit(`FRAME',4)  dnl  %esi already pushed
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+       subl    $STACK_SPACE-FRAME, %esp
+deflit(`FRAME',STACK_SPACE)
+       movl    $1, %ecx
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    %ebx, SAVE_EBX
+       subl    %edx, %ecx              C -(size-1)
+
+       movl    %ebp, SAVE_EBP
+       movl    $0, %ebx                C initial carry
+
+       leal    (%esi,%edx,4), %esi     C &src[size]
+       movl    %eax, %ebp              C multiplier
+
+       leal    -4(%edi,%edx,4), %edi   C &dst[size-1]
+
+
+C This loop runs at just over 6 c/l.
+
+L(mul_1):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter, limbs, negative, -(size-1) to -1
+       C edx   scratch
+       C esi   &src[size]
+       C edi   &dst[size-1]
+       C ebp   multiplier
+
+       movl    %ebp, %eax
+
+       mull    (%esi,%ecx,4)
+
+       addl    %ebx, %eax
+       movl    $0, %ebx
+
+       adcl    %edx, %ebx
+       movl    %eax, 4(%edi,%ecx,4)
+
+       incl    %ecx
+       jnz     L(mul_1)
+
+
+       movl    %ebx, 4(%edi)
+
+
+C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two addmuls, which are the bottom right corner of the product
+C triangle, are left to the end.  These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1].  If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as mpn_addmul_1(), see that routine for some
+C comments.
+C
+C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+
+dnl  This is also hard-coded in the address calculation below.
+deflit(CODE_BYTES_PER_LIMB, 15)
+
+dnl  With &src[size] and &dst[size-1] pointers, the displacements in the
+dnl  unrolled code fit in a byte for UNROLL_COUNT values up to 32, but above
+dnl  that an offset must be added to them.
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>32),1,
+eval((UNROLL_COUNT-32)*4),
+0))
+
+       C eax
+       C ebx   carry
+       C ecx
+       C edx
+       C esi   &src[size]
+       C edi   &dst[size-1]
+       C ebp
+
+       movl    PARAM_SIZE, %ecx
+
+       subl    $4, %ecx
+       jz      L(corner)
+
+       movl    %ecx, %edx
+       negl    %ecx
+
+       shll    $4, %ecx
+ifelse(OFFSET,0,,`subl $OFFSET, %esi')
+
+ifdef(`PIC',`
+       call    L(pic_calc)
+L(here):
+',`
+       leal    L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+       negl    %edx
+
+ifelse(OFFSET,0,,`subl $OFFSET, %edi')
+
+       C The calculated jump mustn't be before the start of the available
+       C code.  This is the limit that UNROLL_COUNT puts on the src operand
+       C size, but checked here using the jump address directly.
+
+       ASSERT(ae,
+       `movl_text_address( L(unroll_inner_start), %eax)
+       cmpl    %eax, %ecx')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(unroll_outer_top):
+       C eax
+       C ebx   high limb to store
+       C ecx   VAR_JMP
+       C edx   VAR_COUNTER, limbs, negative
+       C esi   &src[size], constant
+       C edi   dst ptr, second highest limb of last addmul
+       C ebp
+
+       movl    -12+OFFSET(%esi,%edx,4), %ebp   C multiplier
+       movl    %edx, VAR_COUNTER
+
+       movl    -8+OFFSET(%esi,%edx,4), %eax    C first limb of multiplicand
+
+       mull    %ebp
+
+define(cmovX,`ifelse(eval(UNROLL_COUNT%2),1,`cmovz($@)',`cmovnz($@)')')
+
+       testb   $1, %cl
+
+       movl    %edx, %ebx      C high carry
+       leal    4(%edi), %edi
+
+       movl    %ecx, %edx      C jump
+
+       movl    %eax, %ecx      C low carry
+       leal    CODE_BYTES_PER_LIMB(%edx), %edx
+
+       cmovX(  %ebx, %ecx)     C high carry reverse
+       cmovX(  %eax, %ebx)     C low carry reverse
+       movl    %edx, VAR_JMP
+       jmp     *%edx
+
+
+       C Must be on an even address here so the low bit of the jump address
+       C will indicate which way around ecx/ebx should start.
+
+       ALIGN(2)
+
+L(unroll_inner_start):
+       C eax   scratch
+       C ebx   carry high
+       C ecx   carry low
+       C edx   scratch
+       C esi   src pointer
+       C edi   dst pointer
+       C ebp   multiplier
+       C
+       C 15 code bytes each limb
+       C ecx/ebx reversed on each chunk
+
+forloop(`i', UNROLL_COUNT, 1, `
+       deflit(`disp_src', eval(-i*4 + OFFSET))
+       deflit(`disp_dst', eval(disp_src))
+
+       m4_assert(`disp_src>=-128 && disp_src<128')
+       m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl,   disp_src,(%esi), %eax)
+       mull    %ebp
+Zdisp( addl,   %ebx, disp_dst,(%edi))
+       adcl    %eax, %ecx
+       movl    %edx, %ebx
+       adcl    $0, %ebx
+',`
+       dnl  this one comes out last
+Zdisp( movl,   disp_src,(%esi), %eax)
+       mull    %ebp
+Zdisp( addl,   %ecx, disp_dst,(%edi))
+       adcl    %eax, %ebx
+       movl    %edx, %ecx
+       adcl    $0, %ecx
+')
+')
+L(unroll_inner_end):
+
+       addl    %ebx, m4_empty_if_zero(OFFSET)(%edi)
+
+       movl    VAR_COUNTER, %edx
+       adcl    $0, %ecx
+
+       movl    %ecx, m4_empty_if_zero(OFFSET+4)(%edi)
+       movl    VAR_JMP, %ecx
+
+       incl    %edx
+       jnz     L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+       addl    $OFFSET, %esi
+       addl    $OFFSET, %edi
+')
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(16)
+L(corner):
+       C eax
+       C ebx
+       C ecx
+       C edx
+       C esi   &src[size]
+       C edi   &dst[2*size-5]
+       C ebp
+
+       movl    -12(%esi), %eax
+
+       mull    -8(%esi)
+
+       addl    %eax, (%edi)
+       movl    -12(%esi), %eax
+       movl    $0, %ebx
+
+       adcl    %edx, %ebx
+
+       mull    -4(%esi)
+
+       addl    %eax, %ebx
+       movl    -8(%esi), %eax
+
+       adcl    $0, %edx
+
+       addl    %ebx, 4(%edi)
+       movl    $0, %ebx
+
+       adcl    %edx, %ebx
+
+       mull    -4(%esi)
+
+       movl    PARAM_SIZE, %ecx
+       addl    %ebx, %eax
+
+       adcl    $0, %edx
+
+       movl    %eax, 8(%edi)
+
+       movl    %edx, 12(%edi)
+       movl    PARAM_DST, %edi
+
+
+C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1].
+
+       subl    $1, %ecx                C size-1
+       xorl    %eax, %eax              C ready for final adcl, and clear carry
+
+       movl    %ecx, %edx
+       movl    PARAM_SRC, %esi
+
+
+L(lshift):
+       C eax
+       C ebx
+       C ecx   counter, size-1 to 1
+       C edx   size-1 (for later use)
+       C esi   src (for later use)
+       C edi   dst, incrementing
+       C ebp
+
+       rcll    4(%edi)
+       rcll    8(%edi)
+
+       leal    8(%edi), %edi
+       decl    %ecx
+       jnz     L(lshift)
+
+
+       adcl    %eax, %eax
+
+       movl    %eax, 4(%edi)           C dst most significant limb
+       movl    (%esi), %eax            C src[0]
+
+       leal    4(%esi,%edx,4), %esi    C &src[size]
+       subl    %edx, %ecx              C -(size-1)
+
+
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2.  dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+
+       mull    %eax
+
+       movl    %eax, (%edi,%ecx,8)     C dst[0]
+
+
+L(diag):
+       C eax   scratch
+       C ebx   scratch
+       C ecx   counter, negative
+       C edx   carry
+       C esi   &src[size]
+       C edi   dst[2*size-2]
+       C ebp
+
+       movl    (%esi,%ecx,4), %eax
+       movl    %edx, %ebx
+
+       mull    %eax
+
+       addl    %ebx, 4(%edi,%ecx,8)
+       adcl    %eax, 8(%edi,%ecx,8)
+       adcl    $0, %edx
+
+       incl    %ecx
+       jnz     L(diag)
+
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBX, %ebx
+
+       addl    %edx, 4(%edi)           C dst most significant limb
+
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBP, %ebp
+       addl    $FRAME, %esp
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+ifdef(`PIC',`
+L(pic_calc):
+       addl    (%esp), %ecx
+       addl    $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
+       addl    %edx, %ecx
+       ret_internal
+')
+
+
+EPILOGUE()
diff --git a/mpn/x86/p6/sse2/addmul_1.asm b/mpn/x86/p6/sse2/addmul_1.asm

new file mode 100644 (file)

index 0000000..b601c54
--- /dev/null
+++ b/mpn/x86/p6/sse2/addmul_1.asm
@@ -0,0 +1,26 @@
+dnl  Intel P6/SSE2 mpn_addmul_1.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Write P6 specific SSE2 code.
+
+MULFUNC_PROLOGUE(mpn_addmul_1)
+include_mpn(`x86/pentium4/sse2/addmul_1.asm')
diff --git a/mpn/x86/p6/sse2/gmp-mparam.h b/mpn/x86/p6/sse2/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ecef436
--- /dev/null
+++ b/mpn/x86/p6/sse2/gmp-mparam.h
@@ -0,0 +1,176 @@
+/* Intel P6/sse2 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2008, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
+   value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard limit in
+   mpn/x86/p6/sqr_basecase.asm.  */
+
+
+/* 1867 MHz P6 model 13 */
+
+#define MOD_1_NORM_THRESHOLD                 4
+#define MOD_1_UNNORM_THRESHOLD               6
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         8
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           22
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                77
+#define MUL_TOOM44_THRESHOLD               182
+#define MUL_TOOM6H_THRESHOLD               252
+#define MUL_TOOM8H_THRESHOLD               381
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      75
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      79
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                101
+#define SQR_TOOM4_THRESHOLD                154
+#define SQR_TOOM6_THRESHOLD                222
+#define SQR_TOOM8_THRESHOLD                547
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             565  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    565, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 5}, \
+    {    383, 4}, {    991, 5}, {    511, 6}, {    267, 7}, \
+    {    157, 8}, {     91, 9}, {     47, 8}, {    111, 9}, \
+    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
+    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
+    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
+    {    159,10}, {    335, 9}, {    671,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
+    {    415,11}, {    223,12}, {    127,11}, {    255,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    607,11}, \
+    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
+    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
+    {    735,10}, {   1471,12}, {    383,11}, {    799,10}, \
+    {   1599,11}, {    863,12}, {    447,11}, {    959,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
+    {   1471,13}, {    383,12}, {    831,11}, {   1727,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
+    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
+    {   1727,13}, {    895,12}, {   1919,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
+    {   1407,12}, {   2815,14}, {    767,13}, {   1663,12}, \
+    {   3455,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 132
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             472  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    472, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
+    {     31, 8}, {     63, 4}, {   1023, 8}, {     67, 9}, \
+    {     39, 5}, {    639, 4}, {   1471, 6}, {    383, 7}, \
+    {    209, 8}, {    119, 9}, {     63, 7}, {    255, 8}, \
+    {    139, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
+    {    167,10}, {     95,11}, {     63,10}, {    143, 9}, \
+    {    287,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
+    {   1087,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
+    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399, 9}, {    799,10}, {    415, 9}, \
+    {    831,11}, {    223,12}, {    127,11}, {    255,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
+    {   1215,11}, {    319,10}, {    671, 9}, {   1343,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    607,12}, \
+    {    319,11}, {    671,10}, {   1343,11}, {    735,12}, \
+    {    383,11}, {    799,10}, {   1599,11}, {    863,12}, \
+    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
+    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
+    {    767,11}, {   1599,12}, {    831,11}, {   1727,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
+    {    639,12}, {   1471,13}, {    767,12}, {   1727,13}, \
+    {    895,12}, {   1919,14}, {    511,13}, {   1023,12}, \
+    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,14}, \
+    {    767,13}, {   1663,12}, {   3455,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 146
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  34
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 19
+#define DC_DIVAPPR_Q_THRESHOLD              56
+#define DC_BDIV_QR_THRESHOLD                60
+#define DC_BDIV_Q_THRESHOLD                132
+
+#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_NEWTON_THRESHOLD                69
+#define INV_APPR_THRESHOLD                  65
+
+#define BINV_NEWTON_THRESHOLD              276
+#define REDC_1_TO_REDC_N_THRESHOLD          63
+
+#define MU_DIV_QR_THRESHOLD               1308
+#define MU_DIVAPPR_Q_THRESHOLD             998
+#define MUPI_DIV_QR_THRESHOLD               62
+#define MU_BDIV_QR_THRESHOLD              1442
+#define MU_BDIV_Q_THRESHOLD               1470
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                      60
+#define GCD_DC_THRESHOLD                   393
+#define GCDEXT_DC_THRESHOLD                303
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        22
+#define SET_STR_DC_THRESHOLD               587
+#define SET_STR_PRECOMPUTE_THRESHOLD       983
diff --git a/mpn/x86/p6/sse2/mod_1_4.asm b/mpn/x86/p6/sse2/mod_1_4.asm

new file mode 100644 (file)

index 0000000..e3631db
--- /dev/null
+++ b/mpn/x86/p6/sse2/mod_1_4.asm
@@ -0,0 +1,23 @@
+dnl  Intel P6/SSE2 mpn_mod_1_4.
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1s_4p)
+include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/mpn/x86/p6/sse2/mul_1.asm b/mpn/x86/p6/sse2/mul_1.asm

new file mode 100644 (file)

index 0000000..fc3d4e6
--- /dev/null
+++ b/mpn/x86/p6/sse2/mul_1.asm
@@ -0,0 +1,27 @@
+dnl  Intel P6/SSE2 mpn_mul_1.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Write P6 specific SSE2 code.  It should reach 3 c/l.
+C    The Pentium4 code runs at 4.2 c/l.
+
+MULFUNC_PROLOGUE(mpn_mul_1)
+include_mpn(`x86/pentium4/sse2/mul_1.asm')
diff --git a/mpn/x86/p6/sse2/mul_basecase.asm b/mpn/x86/p6/sse2/mul_basecase.asm

new file mode 100644 (file)

index 0000000..f52ece0
--- /dev/null
+++ b/mpn/x86/p6/sse2/mul_basecase.asm
@@ -0,0 +1,24 @@
+dnl  Intel P6/SSE2 mpn_mul_basecase.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_mul_basecase)
+include_mpn(`x86/pentium4/sse2/mul_basecase.asm')
diff --git a/mpn/x86/p6/sse2/popcount.asm b/mpn/x86/p6/sse2/popcount.asm

new file mode 100644 (file)

index 0000000..f818d6e
--- /dev/null
+++ b/mpn/x86/p6/sse2/popcount.asm
@@ -0,0 +1,24 @@
+dnl  Intel P6/SSE2 mpn_popcount -- population count.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/mpn/x86/p6/sse2/sqr_basecase.asm b/mpn/x86/p6/sse2/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..8a7f249
--- /dev/null
+++ b/mpn/x86/p6/sse2/sqr_basecase.asm
@@ -0,0 +1,24 @@
+dnl  Intel P6/SSE2 mpn_sqr_basecase.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_sqr_basecase)
+include_mpn(`x86/pentium4/sse2/sqr_basecase.asm')
diff --git a/mpn/x86/p6/sse2/submul_1.asm b/mpn/x86/p6/sse2/submul_1.asm

new file mode 100644 (file)

index 0000000..ae97fd6
--- /dev/null
+++ b/mpn/x86/p6/sse2/submul_1.asm
@@ -0,0 +1,24 @@
+dnl  Intel P6/SSE2 mpn_submul_1.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_submul_1)
+include_mpn(`x86/k6/aorsmul_1.asm')
diff --git a/mpn/x86/pentium/README b/mpn/x86/pentium/README

new file mode 100644 (file)

index 0000000..6c4d872
--- /dev/null
+++ b/mpn/x86/pentium/README
@@ -0,0 +1,170 @@
+Copyright 1996, 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                   INTEL PENTIUM P5 MPN SUBROUTINES
+
+
+This directory contains mpn functions optimized for Intel Pentium (P5,P54)
+processors.  The mmx subdirectory has additional code for Pentium with MMX
+(P55).
+
+
+STATUS
+
+                                cycles/limb
+
+       mpn_add_n/sub_n            2.375
+
+       mpn_mul_1                 12.0
+       mpn_add/submul_1          14.0
+
+       mpn_mul_basecase          14.2 cycles/crossproduct (approx)
+
+       mpn_sqr_basecase           8 cycles/crossproduct (approx)
+                                   or 15.5 cycles/triangleproduct (approx)
+
+       mpn_l/rshift               5.375 normal (6.0 on P54)
+                                  1.875 special shift by 1 bit
+
+       mpn_divrem_1              44.0
+       mpn_mod_1                 28.0
+       mpn_divexact_by3          15.0
+
+       mpn_copyi/copyd            1.0
+
+Pentium MMX gets the following improvements
+
+       mpn_l/rshift               1.75
+
+       mpn_mul_1                 12.0 normal, 7.0 for 16-bit multiplier
+
+
+mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb.  Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5
+cycles/limb.
+
+mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should.  Intel documentation says a mul instruction is 10 cycles, but it
+measures 9 and the routines using it run as 9.
+
+
+
+P55 MMX AND X87
+
+The cost of switching between MMX and x87 floating point on P55 is about 100
+cycles (fld1/por/emms for instance).  In order to avoid that the two aren't
+mixed and currently that means using MMX and not x87.
+
+MMX offers a big speedup for lshift and rshift, and a nice speedup for
+16-bit multipliers in mpn_mul_1.  If fast code using x87 is found then
+perhaps the preference for MMX will be reversed.
+
+
+
+
+P54 SHLDL
+
+mpn_lshift and mpn_rshift run at about 6 cycles/limb on P5 and P54, but the
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically.  The P55 runs them at the expected speed.
+
+It seems that on P54 a shldl or shrdl allows pairing in one following cycle,
+but not two.  For example, back to back repetitions of the following
+
+       shldl(  %cl, %eax, %ebx)
+       xorl    %edx, %edx
+       xorl    %esi, %esi
+
+run at 5 cycles, as expected, but repetitions of the following run at 7
+cycles, whereas 6 would be expected (and is achieved on P55),
+
+       shldl(  %cl, %eax, %ebx)
+       xorl    %edx, %edx
+       xorl    %esi, %esi
+       xorl    %edi, %edi
+       xorl    %ebp, %ebp
+
+Three xorls run at 7 cycles too, so it doesn't seem to be just that pairing
+inhibited is only in the second following cycle (or something like that).
+
+Avoiding this problem would bring P54 shifts down from 6.0 c/l to 5.5 with a
+pattern of shift, 2 loads, shift, 2 stores, shift, etc.  A start has been
+made on something like that, but it's not yet complete.
+
+
+
+
+OTHER NOTES
+
+Prefetching Destinations
+
+    Pentium doesn't allocate cache lines on writes, unlike most other modern
+    processors.  Since the functions in the mpn class do array writes, we
+    have to handle allocating the destination cache lines by reading a word
+    from it in the loops, to achieve the best performance.
+
+Prefetching Sources
+
+    Prefetching of sources is pointless since there's no out-of-order loads.
+    Any load instruction blocks until the line is brought to L1, so it may
+    as well be the load that wants the data which blocks.
+
+Data Cache Bank Clashes
+
+    Pairing of memory operations requires that the two issued operations
+    refer to different cache banks (ie. different addresses modulo 32
+    bytes).  The simplest way to ensure this is to read/write two words from
+    the same object.  If we make operations on different objects, they might
+    or might not be to the same cache bank.
+
+PIC %eip Fetching
+
+    A simple call $+5 and popl can be used to get %eip, there's no need to
+    balance calls and returns since P5 doesn't have any return stack branch
+    prediction.
+
+Float Multiplies
+
+    fmul is pairable and can be issued every 2 cycles (with a 4 cycle
+    latency for data ready to use).  This is a lot better than integer mull
+    or imull at 9 cycles non-pairing.  Unfortunately the advantage is
+    quickly eaten away by needing to throw data through memory back to the
+    integer registers to adjust for fild and fist being signed, and to do
+    things like propagating carry bits.
+
+
+
+
+
+REFERENCES
+
+"Intel Architecture Optimization Manual", 1997, order number 242816.  This
+is mostly about P5, the parts about P6 aren't relevant.  Available on-line:
+
+        http://download.intel.com/design/PentiumII/manuals/242816.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/x86/pentium/aors_n.asm b/mpn/x86/pentium/aors_n.asm

new file mode 100644 (file)

index 0000000..30d0df7
--- /dev/null
+++ b/mpn/x86/pentium/aors_n.asm
@@ -0,0 +1,193 @@
+dnl  Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 2.375 cycles/limb
+
+
+ifdef(`OPERATION_add_n',`
+       define(M4_inst,        adcl)
+       define(M4_function_n,  mpn_add_n)
+       define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+       define(M4_inst,        sbbl)
+       define(M4_function_n,  mpn_sub_n)
+       define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(M4_function_nc)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       pushl   %ebp
+deflit(`FRAME',16)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC1,%esi
+       movl    PARAM_SRC2,%ebp
+       movl    PARAM_SIZE,%ecx
+
+       movl    (%ebp),%ebx
+
+       decl    %ecx
+       movl    %ecx,%edx
+       shrl    $3,%ecx
+       andl    $7,%edx
+       testl   %ecx,%ecx               C zero carry flag
+       jz      L(endgo)
+
+       pushl   %edx
+FRAME_pushl()
+       movl    PARAM_CARRY,%eax
+       shrl    $1,%eax                 C shift bit 0 into carry
+       jmp     L(oop)
+
+L(endgo):
+deflit(`FRAME',16)
+       movl    PARAM_CARRY,%eax
+       shrl    $1,%eax                 C shift bit 0 into carry
+       jmp     L(end)
+
+EPILOGUE()
+
+
+       ALIGN(8)
+PROLOGUE(M4_function_n)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       pushl   %ebp
+deflit(`FRAME',16)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC1,%esi
+       movl    PARAM_SRC2,%ebp
+       movl    PARAM_SIZE,%ecx
+
+       movl    (%ebp),%ebx
+
+       decl    %ecx
+       movl    %ecx,%edx
+       shrl    $3,%ecx
+       andl    $7,%edx
+       testl   %ecx,%ecx               C zero carry flag
+       jz      L(end)
+       pushl   %edx
+FRAME_pushl()
+
+       ALIGN(8)
+L(oop):        movl    28(%edi),%eax           C fetch destination cache line
+       leal    32(%edi),%edi
+
+L(1):  movl    (%esi),%eax
+       movl    4(%esi),%edx
+       M4_inst %ebx,%eax
+       movl    4(%ebp),%ebx
+       M4_inst %ebx,%edx
+       movl    8(%ebp),%ebx
+       movl    %eax,-32(%edi)
+       movl    %edx,-28(%edi)
+
+L(2):  movl    8(%esi),%eax
+       movl    12(%esi),%edx
+       M4_inst %ebx,%eax
+       movl    12(%ebp),%ebx
+       M4_inst %ebx,%edx
+       movl    16(%ebp),%ebx
+       movl    %eax,-24(%edi)
+       movl    %edx,-20(%edi)
+
+L(3):  movl    16(%esi),%eax
+       movl    20(%esi),%edx
+       M4_inst %ebx,%eax
+       movl    20(%ebp),%ebx
+       M4_inst %ebx,%edx
+       movl    24(%ebp),%ebx
+       movl    %eax,-16(%edi)
+       movl    %edx,-12(%edi)
+
+L(4):  movl    24(%esi),%eax
+       movl    28(%esi),%edx
+       M4_inst %ebx,%eax
+       movl    28(%ebp),%ebx
+       M4_inst %ebx,%edx
+       movl    32(%ebp),%ebx
+       movl    %eax,-8(%edi)
+       movl    %edx,-4(%edi)
+
+       leal    32(%esi),%esi
+       leal    32(%ebp),%ebp
+       decl    %ecx
+       jnz     L(oop)
+
+       popl    %edx
+FRAME_popl()
+L(end):
+       decl    %edx                    C test %edx w/o clobbering carry
+       js      L(end2)
+       incl    %edx
+L(oop2):
+       leal    4(%edi),%edi
+       movl    (%esi),%eax
+       M4_inst %ebx,%eax
+       movl    4(%ebp),%ebx
+       movl    %eax,-4(%edi)
+       leal    4(%esi),%esi
+       leal    4(%ebp),%ebp
+       decl    %edx
+       jnz     L(oop2)
+L(end2):
+       movl    (%esi),%eax
+       M4_inst %ebx,%eax
+       movl    %eax,(%edi)
+
+       sbbl    %eax,%eax
+       negl    %eax
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/aorsmul_1.asm b/mpn/x86/pentium/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..a50299b
--- /dev/null
+++ b/mpn/x86/pentium/aorsmul_1.asm
@@ -0,0 +1,133 @@
+dnl  Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 14.0 cycles/limb
+
+
+ifdef(`OPERATION_addmul_1', `
+      define(M4_inst,        addl)
+      define(M4_function_1,  mpn_addmul_1)
+      define(M4_function_1c, mpn_addmul_1c)
+
+',`ifdef(`OPERATION_submul_1', `
+      define(M4_inst,        subl)
+      define(M4_function_1,  mpn_submul_1)
+      define(M4_function_1c, mpn_submul_1c)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                         mp_limb_t mult);
+C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult, mp_limb_t carry);
+C
+C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                         mp_limb_t mult);
+C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult, mp_limb_t carry);
+C
+
+defframe(PARAM_CARRY,     20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+
+       ALIGN(8)
+PROLOGUE(M4_function_1c)
+deflit(`FRAME',0)
+
+       movl    PARAM_CARRY, %ecx
+       pushl   %esi            FRAME_pushl()
+
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       ALIGN(8)
+PROLOGUE(M4_function_1)
+deflit(`FRAME',0)
+
+       xorl    %ecx, %ecx
+       pushl   %esi            FRAME_pushl()
+
+L(start_1c):
+       movl    PARAM_SRC, %esi
+       movl    PARAM_SIZE, %eax
+
+       pushl   %edi            FRAME_pushl()
+       pushl   %ebx            FRAME_pushl()
+
+       movl    PARAM_DST, %edi
+       leal    -1(%eax), %ebx          C size-1
+
+       leal    (%esi,%eax,4), %esi
+       xorl    $-1, %ebx               C -size, and clear carry
+
+       leal    (%edi,%eax,4), %edi
+
+L(top):
+       C eax
+       C ebx   counter, negative
+       C ecx   carry
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp
+
+       adcl    $0, %ecx
+       movl    (%esi,%ebx,4), %eax
+
+       mull    PARAM_MULTIPLIER
+
+       addl    %ecx, %eax
+       movl    (%edi,%ebx,4), %ecx
+
+       adcl    $0, %edx
+       M4_inst %eax, %ecx
+
+       movl    %ecx, (%edi,%ebx,4)
+       incl    %ebx
+
+       movl    %edx, %ecx
+       jnz     L(top)
+
+
+       adcl    $0, %ecx
+       popl    %ebx
+
+       movl    %ecx, %eax
+       popl    %edi
+
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/com.asm b/mpn/x86/pentium/com.asm

new file mode 100644 (file)

index 0000000..fbb4ffd
--- /dev/null
+++ b/mpn/x86/pentium/com.asm
@@ -0,0 +1,170 @@
+dnl  Intel Pentium mpn_com -- mpn ones complement.
+
+dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 1.75 cycles/limb
+
+
+NAILS_SUPPORT(0-31)
+
+
+C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C This code is similar to mpn_copyi, basically there's just some "xorl
+C $GMP_NUMB_MASK"s inserted.
+C
+C Alternatives:
+C
+C On P55 some MMX code could be 1.25 c/l (8 limb unrolled) if src and dst
+C are the same alignment mod 8, but it doesn't seem worth the trouble for
+C just that case (there'd need to be some plain integer available too for
+C the unaligned case).
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_com)
+deflit(`FRAME',0)
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_SIZE, %ecx
+
+       pushl   %esi    FRAME_pushl()
+       pushl   %edi    FRAME_pushl()
+
+       leal    (%eax,%ecx,4), %eax
+       xorl    $-1, %ecx               C -size-1
+
+       movl    PARAM_DST, %edx
+       addl    $8, %ecx                C -size+7
+
+       jns     L(end)
+
+       movl    (%edx), %esi            C fetch destination cache line
+       nop
+
+L(top):
+       C eax   &src[size]
+       C ebx
+       C ecx   counter, limbs, negative
+       C edx   dst, incrementing
+       C esi   scratch
+       C edi   scratch
+       C ebp
+
+       movl    28(%edx), %esi          C destination prefetch
+       addl    $32, %edx
+
+       movl    -28(%eax,%ecx,4), %esi
+       movl    -24(%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, -32(%edx)
+       movl    %edi, -28(%edx)
+
+       movl    -20(%eax,%ecx,4), %esi
+       movl    -16(%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, -24(%edx)
+       movl    %edi, -20(%edx)
+
+       movl    -12(%eax,%ecx,4), %esi
+       movl    -8(%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, -16(%edx)
+       movl    %edi, -12(%edx)
+
+       movl    -4(%eax,%ecx,4), %esi
+       movl    (%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, -8(%edx)
+       movl    %edi, -4(%edx)
+
+       addl    $8, %ecx
+       js      L(top)
+
+
+L(end):
+       C eax   &src[size]
+       C ecx   0 to 7, representing respectively 7 to 0 limbs remaining
+       C edx   dst, next location to store
+
+       subl    $4, %ecx
+       nop
+
+       jns     L(no4)
+
+       movl    -12(%eax,%ecx,4), %esi
+       movl    -8(%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, (%edx)
+       movl    %edi, 4(%edx)
+
+       movl    -4(%eax,%ecx,4), %esi
+       movl    (%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, 8(%edx)
+       movl    %edi, 12(%edx)
+
+       addl    $16, %edx
+       addl    $4, %ecx
+L(no4):
+
+       subl    $2, %ecx
+       nop
+
+       jns     L(no2)
+
+       movl    -4(%eax,%ecx,4), %esi
+       movl    (%eax,%ecx,4), %edi
+       xorl    $GMP_NUMB_MASK, %esi
+       xorl    $GMP_NUMB_MASK, %edi
+       movl    %esi, (%edx)
+       movl    %edi, 4(%edx)
+
+       addl    $8, %edx
+       addl    $2, %ecx
+L(no2):
+
+       popl    %edi
+       jnz     L(done)
+
+       movl    -4(%eax), %ecx
+
+       xorl    $GMP_NUMB_MASK, %ecx
+       popl    %esi
+
+       movl    %ecx, (%edx)
+       ret
+
+L(done):
+       popl    %esi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/copyd.asm b/mpn/x86/pentium/copyd.asm

new file mode 100644 (file)

index 0000000..2be8c76
--- /dev/null
+++ b/mpn/x86/pentium/copyd.asm
@@ -0,0 +1,135 @@
+dnl  Intel Pentium mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 1.25 cycles/limb
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C See comments in copyi.asm.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_copyd)
+deflit(`FRAME',0)
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_SIZE, %ecx
+
+       pushl   %esi    FRAME_pushl()
+       pushl   %edi    FRAME_pushl()
+
+       leal    -4(%eax,%ecx,4), %eax           C &src[size-1]
+       movl    PARAM_DST, %edx
+
+       subl    $7, %ecx                        C size-7
+       jle     L(end)
+
+       movl    28-4(%edx,%ecx,4), %esi         C prefetch cache, dst[size-1]
+       nop
+
+L(top):
+       C eax   src, decrementing
+       C ebx
+       C ecx   counter, limbs
+       C edx   dst
+       C esi   scratch
+       C edi   scratch
+       C ebp
+
+       movl    28-32(%edx,%ecx,4), %esi        C prefetch dst cache line
+       subl    $8, %ecx
+
+       movl    (%eax), %esi                    C read words pairwise
+       movl    -4(%eax), %edi
+       movl    %esi, 56(%edx,%ecx,4)           C store words pairwise
+       movl    %edi, 52(%edx,%ecx,4)
+
+       movl    -8(%eax), %esi
+       movl    -12(%eax), %edi
+       movl    %esi, 48(%edx,%ecx,4)
+       movl    %edi, 44(%edx,%ecx,4)
+
+       movl    -16(%eax), %esi
+       movl    -20(%eax), %edi
+       movl    %esi, 40(%edx,%ecx,4)
+       movl    %edi, 36(%edx,%ecx,4)
+
+       movl    -24(%eax), %esi
+       movl    -28(%eax), %edi
+       movl    %esi, 32(%edx,%ecx,4)
+       movl    %edi, 28(%edx,%ecx,4)
+
+       leal    -32(%eax), %eax
+       jg      L(top)
+
+
+L(end):
+       C ecx   -7 to 0, representing respectively 0 to 7 limbs remaining
+       C eax   src end
+       C edx   dst, next location to store
+
+       addl    $4, %ecx
+       jle     L(no4)
+
+       movl    (%eax), %esi
+       movl    -4(%eax), %edi
+       movl    %esi, 8(%edx,%ecx,4)
+       movl    %edi, 4(%edx,%ecx,4)
+
+       movl    -8(%eax), %esi
+       movl    -12(%eax), %edi
+       movl    %esi, (%edx,%ecx,4)
+       movl    %edi, -4(%edx,%ecx,4)
+
+       subl    $16, %eax
+       subl    $4, %ecx
+L(no4):
+
+       addl    $2, %ecx
+       jle     L(no2)
+
+       movl    (%eax), %esi
+       movl    -4(%eax), %edi
+       movl    %esi, (%edx,%ecx,4)
+       movl    %edi, -4(%edx,%ecx,4)
+
+       subl    $8, %eax
+       subl    $2, %ecx
+L(no2):
+
+       jnz     L(done)
+
+       movl    (%eax), %ecx
+       movl    %ecx, (%edx)    C risk of cache bank clash here
+
+L(done):
+       popl    %edi
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/copyi.asm b/mpn/x86/pentium/copyi.asm

new file mode 100644 (file)

index 0000000..9da08e2
--- /dev/null
+++ b/mpn/x86/pentium/copyi.asm
@@ -0,0 +1,153 @@
+dnl  Intel Pentium mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 1.25 cycles/limb
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Destination prefetching is done to avoid repeated write-throughs on lines
+C not already in L1.
+C
+C At least one of the src or dst pointer needs to be incremented rather than
+C using indexing, so that there's somewhere to put the loop control without
+C an AGI.  Incrementing one and not two lets us keep loop overhead to 2
+C cycles.  Making it the src pointer incremented avoids an AGI on the %ecx
+C subtracts in the finishup code.
+C
+C The block of finishup code is almost as big as the main loop itself, which
+C is unfortunate, but it's faster that way than with say rep movsl, by about
+C 10 cycles for instance on P55.
+C
+C There's nothing to be gained from MMX on P55, since it can do only one
+C movq load (or store) per cycle, so the throughput would be the same as the
+C code here (and even then only if src and dst have the same alignment mod
+C 8).
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_copyi)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_DST, %edx
+
+       pushl   %ebx    FRAME_pushl()
+       pushl   %esi    FRAME_pushl()
+
+       leal    (%edx,%ecx,4), %edx     C &dst[size-1]
+       xorl    $-1, %ecx               C -size-1
+
+       movl    PARAM_SRC, %esi
+       addl    $8, %ecx                C -size+7
+
+       jns     L(end)
+
+       movl    -28(%edx,%ecx,4), %eax  C fetch destination cache line, dst[0]
+       nop
+
+L(top):
+       C eax   scratch
+       C ebx   scratch
+       C ecx   counter, limbs, negative
+       C edx   &dst[size-1]
+       C esi   src, incrementing
+       C edi
+       C ebp
+
+       movl    (%edx,%ecx,4), %eax     C fetch destination cache line
+       addl    $8, %ecx
+
+       movl    (%esi), %eax            C read words pairwise
+       movl    4(%esi), %ebx
+       movl    %eax, -60(%edx,%ecx,4)  C store words pairwise
+       movl    %ebx, -56(%edx,%ecx,4)
+
+       movl    8(%esi), %eax
+       movl    12(%esi), %ebx
+       movl    %eax, -52(%edx,%ecx,4)
+       movl    %ebx, -48(%edx,%ecx,4)
+
+       movl    16(%esi), %eax
+       movl    20(%esi), %ebx
+       movl    %eax, -44(%edx,%ecx,4)
+       movl    %ebx, -40(%edx,%ecx,4)
+
+       movl    24(%esi), %eax
+       movl    28(%esi), %ebx
+       movl    %eax, -36(%edx,%ecx,4)
+       movl    %ebx, -32(%edx,%ecx,4)
+
+       leal    32(%esi), %esi
+       js      L(top)
+
+
+L(end):
+       C ecx   0 to 7, representing respectively 7 to 0 limbs remaining
+       C esi   src end
+       C edx   dst, next location to store
+
+       subl    $4, %ecx
+       jns     L(no4)
+
+       movl    (%esi), %eax
+       movl    4(%esi), %ebx
+       movl    %eax, -12(%edx,%ecx,4)
+       movl    %ebx, -8(%edx,%ecx,4)
+
+       movl    8(%esi), %eax
+       movl    12(%esi), %ebx
+       movl    %eax, -4(%edx,%ecx,4)
+       movl    %ebx, (%edx,%ecx,4)
+
+       addl    $16, %esi
+       addl    $4, %ecx
+L(no4):
+
+       subl    $2, %ecx
+       jns     L(no2)
+
+       movl    (%esi), %eax
+       movl    4(%esi), %ebx
+       movl    %eax, -4(%edx,%ecx,4)
+       movl    %ebx, (%edx,%ecx,4)
+
+       addl    $8, %esi
+       addl    $2, %ecx
+L(no2):
+
+       jnz     L(done)
+
+       movl    (%esi), %eax
+       movl    %eax, -4(%edx,%ecx,4)   C risk of cache bank clash here
+
+L(done):
+       popl    %esi
+       popl    %ebx
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/dive_1.asm b/mpn/x86/pentium/dive_1.asm

new file mode 100644 (file)

index 0000000..253d128
--- /dev/null
+++ b/mpn/x86/pentium/dive_1.asm
@@ -0,0 +1,261 @@
+dnl  Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         divisor
+C       odd   even
+C P54:  24.5  30.5   cycles/limb
+C P55:  23.0  28.0
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t divisor);
+C
+C Plain divl is used for small sizes, since the inverse takes a while to
+C setup.  Multiplying works out faster for size>=3 when the divisor is odd,
+C or size>=4 when the divisor is even.  Actually on P55 size==2 for odd or
+C size==3 for even are about the same speed for both divl or mul, but the
+C former is used since it will use up less code cache.
+C
+C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as
+C expected.  On P54 in the even case the shrdl pairing nonsense (see
+C mpn/x86/pentium/README) costs 1 cycle, but it's not clear why there's a
+C further 1.5 slowdown for both odd and even.
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_DST')
+
+       TEXT
+
+       ALIGN(32)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       movl    PARAM_SIZE, %ecx
+
+       pushl   %esi            FRAME_pushl()
+       push    %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %esi
+       andl    $1, %eax
+
+       movl    PARAM_DST, %edi
+       addl    %ecx, %eax      C size if even, size+1 if odd
+
+       cmpl    $4, %eax
+       jae     L(mul_by_inverse)
+
+
+       xorl    %edx, %edx
+L(div_top):
+       movl    -4(%esi,%ecx,4), %eax
+
+       divl    PARAM_DIVISOR
+
+       movl    %eax, -4(%edi,%ecx,4)
+       decl    %ecx
+
+       jnz     L(div_top)
+
+       popl    %edi
+       popl    %esi
+
+       ret
+
+
+
+L(mul_by_inverse):
+       movl    PARAM_DIVISOR, %eax
+       movl    $-1, %ecx
+
+L(strip_twos):
+       ASSERT(nz, `orl %eax, %eax')
+       shrl    %eax
+       incl    %ecx                    C shift count
+
+       jnc     L(strip_twos)
+
+       leal    1(%eax,%eax), %edx      C d
+       andl    $127, %eax              C d/2, 7 bits
+
+       pushl   %ebx            FRAME_pushl()
+       pushl   %ebp            FRAME_pushl()
+
+ifdef(`PIC',`
+       call    L(here)
+L(here):
+       popl    %ebp                    C eip
+
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
+       C AGI
+       movl    binvert_limb_table@GOT(%ebp), %ebp
+       C AGI
+       movzbl  (%eax,%ebp), %eax
+',`
+
+dnl non-PIC
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       movl    %eax, %ebp              C inv
+       addl    %eax, %eax              C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+
+       imull   %edx, %ebp              C inv*inv*d
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       movl    PARAM_SIZE, %ebx
+
+       movl    %eax, %ebp
+       addl    %eax, %eax              C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+
+       imull   %edx, %ebp              C inv*inv*d
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       movl    %edx, PARAM_DIVISOR     C d without twos
+
+       leal    (%esi,%ebx,4), %esi     C src end
+       leal    (%edi,%ebx,4), %edi     C dst end
+
+       negl    %ebx                    C -size
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       movl    %eax, VAR_INVERSE
+       xorl    %ebp, %ebp              C initial carry bit
+
+       movl    (%esi,%ebx,4), %eax     C src low limb
+       orl     %ecx, %ecx              C shift
+
+       movl    4(%esi,%ebx,4), %edx    C src second limb (for even)
+       jz      L(odd_entry)
+
+       shrdl(  %cl, %edx, %eax)
+
+       incl    %ebx
+       jmp     L(even_entry)
+
+
+       ALIGN(8)
+L(odd_top):
+       C eax   scratch
+       C ebx   counter, limbs, negative
+       C ecx
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   carry bit, 0 or -1
+
+       mull    PARAM_DIVISOR
+
+       movl    (%esi,%ebx,4), %eax
+       subl    %ebp, %edx
+
+       subl    %edx, %eax
+
+       sbbl    %ebp, %ebp
+
+L(odd_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, (%edi,%ebx,4)
+
+       incl    %ebx
+       jnz     L(odd_top)
+
+
+       popl    %ebp
+       popl    %ebx
+
+       popl    %edi
+       popl    %esi
+
+       ret
+
+
+L(even_top):
+       C eax   scratch
+       C ebx   counter, limbs, negative
+       C ecx   twos
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   carry bit, 0 or -1
+
+       mull    PARAM_DIVISOR
+
+       subl    %ebp, %edx              C carry bit
+       movl    -4(%esi,%ebx,4), %eax   C src limb
+
+       movl    (%esi,%ebx,4), %ebp     C and one above it
+
+       shrdl(  %cl, %ebp, %eax)
+
+       subl    %edx, %eax              C carry limb
+
+       sbbl    %ebp, %ebp
+
+L(even_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi,%ebx,4)
+       incl    %ebx
+
+       jnz     L(even_top)
+
+
+
+       mull    PARAM_DIVISOR
+
+       movl    -4(%esi), %eax          C src high limb
+       subl    %ebp, %edx
+
+       shrl    %cl, %eax
+
+       subl    %edx, %eax              C no carry if division is exact
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)          C dst high limb
+       nop                             C protect against cache bank clash
+
+       popl    %ebp
+       popl    %ebx
+
+       popl    %edi
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/gmp-mparam.h b/mpn/x86/pentium/gmp-mparam.h

new file mode 100644 (file)

index 0000000..de293c4
--- /dev/null
+++ b/mpn/x86/pentium/gmp-mparam.h
@@ -0,0 +1,66 @@
+/* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* For mpn/x86/pentium/mod_1.asm */
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+
+
+/* 166MHz P54 */
+
+/* Generated by tuneup.c, 2004-02-10, gcc 2.95 */
+
+#define MUL_TOOM22_THRESHOLD             16
+#define MUL_TOOM33_THRESHOLD             90
+
+#define SQR_BASECASE_THRESHOLD            0  /* always */
+#define SQR_TOOM2_THRESHOLD              22
+#define SQR_TOOM3_THRESHOLD             122
+
+#define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
+#define DIV_DC_THRESHOLD                 52
+#define POWM_THRESHOLD                   77
+
+#define HGCD_THRESHOLD                  121
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                615
+#define JACOBI_BASE_METHOD                2
+
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVREM_2_THRESHOLD            MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             23
+#define GET_STR_PRECOMPUTE_THRESHOLD     33
+#define SET_STR_THRESHOLD              2788
+
+#define MUL_FFT_TABLE  { 432, 928, 1664, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          448
+#define MUL_FFT_THRESHOLD              3328
+
+#define SQR_FFT_TABLE  { 496, 928, 1920, 4608, 10240, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          512
+#define SQR_FFT_THRESHOLD              3328
diff --git a/mpn/x86/pentium/hamdist.asm b/mpn/x86/pentium/hamdist.asm

new file mode 100644 (file)

index 0000000..a129030
--- /dev/null
+++ b/mpn/x86/pentium/hamdist.asm
@@ -0,0 +1,132 @@
+dnl  Intel P5 mpn_hamdist -- mpn hamming distance.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 14.0 cycles/limb
+
+
+C unsigned long mpn_hamdist (mp_srcptr src1, mp_srcptr src2, mp_size_t size);
+C
+C It might be possible to shave 1 cycle from the loop, and hence 2
+C cycles/limb.  The xorb is taking 2 cycles, but a separate load and xor
+C would be 1, if the right schedule could be found (not found so far).
+C Wanting to avoid potential cache bank clashes makes it tricky.
+
+C The slightly strange quoting here helps the renaming done by tune/many.pl.
+deflit(TABLE_NAME,
+m4_assert_defined(`GSYM_PREFIX')
+GSYM_PREFIX`'mpn_popcount``'_table')
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_SRC1, 4)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_hamdist)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %esi    FRAME_pushl()
+
+       shll    %ecx            C size in byte pairs
+       pushl   %edi    FRAME_pushl()
+
+ifdef(`PIC',`
+       pushl   %ebx    FRAME_pushl()
+       pushl   %ebp    FRAME_pushl()
+
+       call    L(here) FRAME_pushl()
+L(here):
+       movl    PARAM_SRC1, %esi
+       popl    %ebp    FRAME_popl()
+
+       movl    PARAM_SRC2, %edi
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
+
+       xorl    %ebx, %ebx      C byte
+       xorl    %edx, %edx      C byte
+
+       movl    TABLE_NAME@GOT(%ebp), %ebp
+       xorl    %eax, %eax      C total
+define(TABLE,`(%ebp,$1)')
+
+',`
+dnl non-PIC
+       movl    PARAM_SRC1, %esi
+       movl    PARAM_SRC2, %edi
+
+       xorl    %eax, %eax      C total
+       pushl   %ebx    FRAME_pushl()
+
+       xorl    %edx, %edx      C byte
+       xorl    %ebx, %ebx      C byte
+
+define(TABLE,`TABLE_NAME($1)')
+')
+
+
+       C The nop after the xorb seems necessary.  Although a movb might be
+       C expected to go down the V pipe in the second cycle of the xorb, it
+       C doesn't and costs an extra 2 cycles.
+L(top):
+       C eax   total
+       C ebx   byte
+       C ecx   counter, 2*size to 2
+       C edx   byte
+       C esi   src1
+       C edi   src2
+       C ebp   [PIC] table
+
+       addl    %ebx, %eax
+       movb    -1(%esi,%ecx,2), %bl
+
+       addl    %edx, %eax
+       movb    -1(%edi,%ecx,2), %dl
+
+       xorb    %dl, %bl
+       movb    -2(%esi,%ecx,2), %dl
+
+       xorb    -2(%edi,%ecx,2), %dl
+       nop
+
+       movb    TABLE(%ebx), %bl
+       decl    %ecx
+
+       movb    TABLE(%edx), %dl
+       jnz     L(top)
+
+
+ifdef(`PIC',`
+       popl    %ebp
+')
+       addl    %ebx, %eax
+       popl    %ebx
+
+       addl    %edx, %eax
+       popl    %edi
+
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/logops_n.asm b/mpn/x86/pentium/logops_n.asm

new file mode 100644 (file)

index 0000000..0552e55
--- /dev/null
+++ b/mpn/x86/pentium/logops_n.asm
@@ -0,0 +1,165 @@
+dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 3.0 c/l  and, ior, xor
+C     3.5 c/l  andn, iorn, nand, nior, xnor
+
+
+define(M4_choose_op,
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_want_pre', `$4')
+define(`M4op',        `$3')
+define(`M4_want_post',`$2')
+')')
+define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
+define(M4post,`ifelse(M4_want_post,yes,`$1')')
+
+M4_choose_op( and_n,     , andl,    )
+M4_choose_op( andn_n,    , andl, yes)
+M4_choose_op( nand_n, yes, andl,    )
+M4_choose_op( ior_n,     ,  orl,    )
+M4_choose_op( iorn_n,    ,  orl, yes)
+M4_choose_op( nior_n, yes,  orl,    )
+M4_choose_op( xor_n,     , xorl,    )
+M4_choose_op( xnor_n, yes, xorl,    )
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+NAILS_SUPPORT(0-31)
+
+
+C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
+C
+C Nothing complicated here, just some care to avoid data cache bank clashes
+C and AGIs.
+C
+C We're one register short of being able to do a simple 4 loads, 2 ops, 2
+C stores.  Instead %ebp is juggled a bit and nops are introduced to keep the
+C pairings as intended.  An in-place operation would free up a register, for
+C an 0.5 c/l speedup, if that's worth bothering with.
+C
+C This code seems best for P55 too.  Data alignment is a big problem for MMX
+C and the pairing restrictions on movq and integer instructions make life
+C difficult.
+
+defframe(PARAM_SIZE,16)
+defframe(PARAM_YP,  12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+       pushl   %ebx    FRAME_pushl()
+       pushl   %esi    FRAME_pushl()
+
+       pushl   %edi    FRAME_pushl()
+       pushl   %ebp    FRAME_pushl()
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_XP, %ebx
+
+       movl    PARAM_YP, %esi
+       movl    PARAM_WP, %edi
+
+       shrl    %ecx
+       jnc     L(entry)
+
+       movl    (%ebx,%ecx,8), %eax     C risk of data cache bank clash here
+       movl    (%esi,%ecx,8), %edx
+
+M4pre(`        notl_or_xorl_GMP_NUMB_MASK(%edx)')
+
+       M4op    %edx, %eax
+
+M4post(`xorl   $GMP_NUMB_MASK, %eax')
+       orl     %ecx, %ecx
+
+       movl    %eax, (%edi,%ecx,8)
+       jz      L(done)
+
+       jmp     L(entry)
+
+
+L(top):
+       C eax
+       C ebx   xp
+       C ecx   counter, limb pairs, decrementing
+       C edx
+       C esi   yp
+       C edi   wp
+       C ebp
+
+       M4op    %ebp, %edx
+       nop
+
+M4post(`xorl   $GMP_NUMB_MASK, %eax')
+M4post(`xorl   $GMP_NUMB_MASK, %edx')
+
+       movl    %eax, 4(%edi,%ecx,8)
+       movl    %edx, (%edi,%ecx,8)
+
+L(entry):
+       movl    -4(%ebx,%ecx,8), %ebp
+       nop
+
+       movl    -4(%esi,%ecx,8), %eax
+       movl    -8(%esi,%ecx,8), %edx
+
+M4pre(`        xorl    $GMP_NUMB_MASK, %eax')
+M4pre(`        xorl    $GMP_NUMB_MASK, %edx')
+
+       M4op    %ebp, %eax
+       movl    -8(%ebx,%ecx,8), %ebp
+
+       decl    %ecx
+       jnz     L(top)
+
+
+       M4op    %ebp, %edx
+       nop
+
+M4post(`xorl   $GMP_NUMB_MASK, %eax')
+M4post(`xorl   $GMP_NUMB_MASK, %edx')
+
+       movl    %eax, 4(%edi,%ecx,8)
+       movl    %edx, (%edi,%ecx,8)
+
+
+L(done):
+       popl    %ebp
+       popl    %edi
+
+       popl    %esi
+       popl    %ebx
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/lshift.asm b/mpn/x86/pentium/lshift.asm

new file mode 100644 (file)

index 0000000..ece51e0
--- /dev/null
+++ b/mpn/x86/pentium/lshift.asm
@@ -0,0 +1,233 @@
+dnl  Intel Pentium mpn_lshift -- mpn left shift.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         cycles/limb
+C P5,P54:    6.0
+C P55:       5.375
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
+C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       pushl   %ebp
+deflit(`FRAME',16)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC,%esi
+       movl    PARAM_SIZE,%ebp
+       movl    PARAM_SHIFT,%ecx
+
+C We can use faster code for shift-by-1 under certain conditions.
+       cmp     $1,%ecx
+       jne     L(normal)
+       leal    4(%esi),%eax
+       cmpl    %edi,%eax
+       jnc     L(special)              C jump if s_ptr + 1 >= res_ptr
+       leal    (%esi,%ebp,4),%eax
+       cmpl    %eax,%edi
+       jnc     L(special)              C jump if res_ptr >= s_ptr + size
+
+L(normal):
+       leal    -4(%edi,%ebp,4),%edi
+       leal    -4(%esi,%ebp,4),%esi
+
+       movl    (%esi),%edx
+       subl    $4,%esi
+       xorl    %eax,%eax
+       shldl(  %cl, %edx, %eax)        C compute carry limb
+       pushl   %eax                    C push carry limb onto stack
+
+       decl    %ebp
+       pushl   %ebp
+       shrl    $3,%ebp
+       jz      L(end)
+
+       movl    (%edi),%eax             C fetch destination cache line
+
+       ALIGN(4)
+L(oop):        movl    -28(%edi),%eax          C fetch destination cache line
+       movl    %edx,%ebx
+
+       movl    (%esi),%eax
+       movl    -4(%esi),%edx
+       shldl(  %cl, %eax, %ebx)
+       shldl(  %cl, %edx, %eax)
+       movl    %ebx,(%edi)
+       movl    %eax,-4(%edi)
+
+       movl    -8(%esi),%ebx
+       movl    -12(%esi),%eax
+       shldl(  %cl, %ebx, %edx)
+       shldl(  %cl, %eax, %ebx)
+       movl    %edx,-8(%edi)
+       movl    %ebx,-12(%edi)
+
+       movl    -16(%esi),%edx
+       movl    -20(%esi),%ebx
+       shldl(  %cl, %edx, %eax)
+       shldl(  %cl, %ebx, %edx)
+       movl    %eax,-16(%edi)
+       movl    %edx,-20(%edi)
+
+       movl    -24(%esi),%eax
+       movl    -28(%esi),%edx
+       shldl(  %cl, %eax, %ebx)
+       shldl(  %cl, %edx, %eax)
+       movl    %ebx,-24(%edi)
+       movl    %eax,-28(%edi)
+
+       subl    $32,%esi
+       subl    $32,%edi
+       decl    %ebp
+       jnz     L(oop)
+
+L(end):        popl    %ebp
+       andl    $7,%ebp
+       jz      L(end2)
+L(oop2):
+       movl    (%esi),%eax
+       shldl(  %cl,%eax,%edx)
+       movl    %edx,(%edi)
+       movl    %eax,%edx
+       subl    $4,%esi
+       subl    $4,%edi
+       decl    %ebp
+       jnz     L(oop2)
+
+L(end2):
+       shll    %cl,%edx                C compute least significant limb
+       movl    %edx,(%edi)             C store it
+
+       popl    %eax                    C pop carry limb
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(special):
+       movl    (%esi),%edx
+       addl    $4,%esi
+
+       decl    %ebp
+       pushl   %ebp
+       shrl    $3,%ebp
+
+       addl    %edx,%edx
+       incl    %ebp
+       decl    %ebp
+       jz      L(Lend)
+
+       movl    (%edi),%eax             C fetch destination cache line
+
+       ALIGN(4)
+L(Loop):
+       movl    28(%edi),%eax           C fetch destination cache line
+       movl    %edx,%ebx
+
+       movl    (%esi),%eax
+       movl    4(%esi),%edx
+       adcl    %eax,%eax
+       movl    %ebx,(%edi)
+       adcl    %edx,%edx
+       movl    %eax,4(%edi)
+
+       movl    8(%esi),%ebx
+       movl    12(%esi),%eax
+       adcl    %ebx,%ebx
+       movl    %edx,8(%edi)
+       adcl    %eax,%eax
+       movl    %ebx,12(%edi)
+
+       movl    16(%esi),%edx
+       movl    20(%esi),%ebx
+       adcl    %edx,%edx
+       movl    %eax,16(%edi)
+       adcl    %ebx,%ebx
+       movl    %edx,20(%edi)
+
+       movl    24(%esi),%eax
+       movl    28(%esi),%edx
+       adcl    %eax,%eax
+       movl    %ebx,24(%edi)
+       adcl    %edx,%edx
+       movl    %eax,28(%edi)
+
+       leal    32(%esi),%esi           C use leal not to clobber carry
+       leal    32(%edi),%edi
+       decl    %ebp
+       jnz     L(Loop)
+
+L(Lend):
+       popl    %ebp
+       sbbl    %eax,%eax               C save carry in %eax
+       andl    $7,%ebp
+       jz      L(Lend2)
+       addl    %eax,%eax               C restore carry from eax
+L(Loop2):
+       movl    %edx,%ebx
+       movl    (%esi),%edx
+       adcl    %edx,%edx
+       movl    %ebx,(%edi)
+
+       leal    4(%esi),%esi            C use leal not to clobber carry
+       leal    4(%edi),%edi
+       decl    %ebp
+       jnz     L(Loop2)
+
+       jmp     L(L1)
+L(Lend2):
+       addl    %eax,%eax               C restore carry from eax
+L(L1): movl    %edx,(%edi)             C store last limb
+
+       sbbl    %eax,%eax
+       negl    %eax
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mmx/gmp-mparam.h b/mpn/x86/pentium/mmx/gmp-mparam.h

new file mode 100644 (file)

index 0000000..958a322
--- /dev/null
+++ b/mpn/x86/pentium/mmx/gmp-mparam.h
@@ -0,0 +1,152 @@
+/* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+/* For mpn/x86/pentium/mod_1.asm */
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+
+
+/* 233MHz P55 */
+
+#define MOD_1_NORM_THRESHOLD                 5
+#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         12
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        11
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     63
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           51
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                53
+#define MUL_TOOM44_THRESHOLD               128
+#define MUL_TOOM6H_THRESHOLD               189
+#define MUL_TOOM8H_THRESHOLD               260
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      90
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 20
+#define SQR_TOOM3_THRESHOLD                 73
+#define SQR_TOOM4_THRESHOLD                178
+#define SQR_TOOM6_THRESHOLD                210
+#define SQR_TOOM8_THRESHOLD                375
+
+#define MULMOD_BNM1_THRESHOLD               11
+#define SQRMOD_BNM1_THRESHOLD               12
+
+#define MUL_FFT_MODF_THRESHOLD             364  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    364, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {      9, 5}, {     19, 6}, {     17, 7}, {      9, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
+    {     47,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159, 8}, {    319, 9}, {    167,10}, \
+    {     95, 9}, {    191, 8}, {    383,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
+    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {     63,11}, {    127,10}, {    271, 9}, \
+    {    543,10}, {    287,11}, {    159,10}, {    351,11}, \
+    {    191,10}, {    415,11}, {    223,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    287,10}, {    575,11}, \
+    {    351,12}, {    191,11}, {    415,13}, {    127,12}, \
+    {    255,11}, {    575,12}, {    319,11}, {    703,12}, \
+    {    383,11}, {    831,12}, {    447,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 90
+#define MUL_FFT_THRESHOLD                 3520
+
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    340, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     17, 7}, {      9, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     29, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     27, 7}, {     55, 9}, \
+    {     15, 8}, {     31, 7}, {     65, 8}, {     43, 9}, \
+    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
+    {     95,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 9}, {    135,10}, \
+    {     79, 9}, {    159, 8}, {    319,10}, {     95, 9}, \
+    {    191,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
+    {    575, 9}, {    303,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    271, 9}, {    543,10}, \
+    {    287, 9}, {    575,10}, {    303,11}, {    159,10}, \
+    {    351,11}, {    191,10}, {    415,11}, {    223,10}, \
+    {    447,12}, {    127,11}, {    255,10}, {    543,11}, \
+    {    287,10}, {    607,11}, {    351,12}, {    191,11}, \
+    {    479,13}, {    127,12}, {    255,11}, {    575,12}, \
+    {    319,11}, {    703,12}, {    383,11}, {    767,12}, \
+    {    447,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 96
+#define SQR_FFT_THRESHOLD                 5504
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  48
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 43
+#define DC_DIVAPPR_Q_THRESHOLD             170
+#define DC_BDIV_QR_THRESHOLD                43
+#define DC_BDIV_Q_THRESHOLD                110
+
+#define INV_MULMOD_BNM1_THRESHOLD           30
+#define INV_NEWTON_THRESHOLD               177
+#define INV_APPR_THRESHOLD                 171
+
+#define BINV_NEWTON_THRESHOLD              194
+#define REDC_1_TO_REDC_N_THRESHOLD          50
+
+#define MU_DIV_QR_THRESHOLD               1142
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD               90
+#define MU_BDIV_QR_THRESHOLD               942
+#define MU_BDIV_Q_THRESHOLD               1017
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                      92
+#define GCD_DC_THRESHOLD                   283
+#define GCDEXT_DC_THRESHOLD                221
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                18
+#define GET_STR_PRECOMPUTE_THRESHOLD        31
+#define SET_STR_DC_THRESHOLD               490
+#define SET_STR_PRECOMPUTE_THRESHOLD       994
diff --git a/mpn/x86/pentium/mmx/hamdist.asm b/mpn/x86/pentium/mmx/hamdist.asm

new file mode 100644 (file)

index 0000000..185eeae
--- /dev/null
+++ b/mpn/x86/pentium/mmx/hamdist.asm
@@ -0,0 +1,29 @@
+dnl  Intel P55 mpn_hamdist -- mpn hamming distance.
+
+dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P55: hamdist 12.0 cycles/limb
+
+C For reference, this code runs at 11.5 cycles/limb for popcount, which is
+C slower than the plain integer mpn/x86/pentium/popcount.asm.
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/mpn/x86/pentium/mmx/lshift.asm b/mpn/x86/pentium/mmx/lshift.asm

new file mode 100644 (file)

index 0000000..012d794
--- /dev/null
+++ b/mpn/x86/pentium/mmx/lshift.asm
@@ -0,0 +1,452 @@
+dnl  Intel P5 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 1.75 cycles/limb.
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C Shift src,size left by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the right.  Return the bits shifted out at the
+C left.
+C
+C The comments in mpn_rshift apply here too.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+deflit(`FRAME',0)
+
+dnl  minimum 5, because the unrolled loop can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_lshift)
+
+       pushl   %ebx
+       pushl   %edi
+deflit(`FRAME',8)
+
+       movl    PARAM_SIZE, %eax
+       movl    PARAM_DST, %edx
+
+       movl    PARAM_SRC, %ebx
+       movl    PARAM_SHIFT, %ecx
+
+       cmp     $UNROLL_THRESHOLD, %eax
+       jae     L(unroll)
+
+       movl    -4(%ebx,%eax,4), %edi   C src high limb
+       decl    %eax
+
+       jnz     L(simple)
+
+       shldl(  %cl, %edi, %eax)        C eax was decremented to zero
+
+       shll    %cl, %edi
+
+       movl    %edi, (%edx)            C dst low limb
+       popl    %edi                    C risk of data cache bank clash
+
+       popl    %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+L(simple):
+       C eax   size-1
+       C ebx   src
+       C ecx   shift
+       C edx   dst
+       C esi
+       C edi
+       C ebp
+deflit(`FRAME',8)
+
+       movd    (%ebx,%eax,4), %mm5     C src high limb
+
+       movd    %ecx, %mm6              C lshift
+       negl    %ecx
+
+       psllq   %mm6, %mm5
+       addl    $32, %ecx
+
+       movd    %ecx, %mm7
+       psrlq   $32, %mm5               C retval
+
+
+L(simple_top):
+       C eax   counter, limbs, negative
+       C ebx   src
+       C ecx
+       C edx   dst
+       C esi
+       C edi
+       C
+       C mm0   scratch
+       C mm5   return value
+       C mm6   shift
+       C mm7   32-shift
+
+       movq    -4(%ebx,%eax,4), %mm0
+       decl    %eax
+
+       psrlq   %mm7, %mm0
+
+       C
+
+       movd    %mm0, 4(%edx,%eax,4)
+       jnz     L(simple_top)
+
+
+       movd    (%ebx), %mm0
+
+       movd    %mm5, %eax
+       psllq   %mm6, %mm0
+
+       popl    %edi
+       popl    %ebx
+
+       movd    %mm0, (%edx)
+
+       emms
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(unroll):
+       C eax   size
+       C ebx   src
+       C ecx   shift
+       C edx   dst
+       C esi
+       C edi
+       C ebp
+deflit(`FRAME',8)
+
+       movd    -4(%ebx,%eax,4), %mm5   C src high limb
+       leal    (%ebx,%eax,4), %edi
+
+       movd    %ecx, %mm6              C lshift
+       andl    $4, %edi
+
+       psllq   %mm6, %mm5
+       jz      L(start_src_aligned)
+
+
+       C src isn't aligned, process high limb separately (marked xxx) to
+       C make it so.
+       C
+       C  source     -8(ebx,%eax,4)
+       C                  |
+       C  +-------+-------+-------+--
+       C  |               |
+       C  +-------+-------+-------+--
+       C        0mod8   4mod8   0mod8
+       C
+       C  dest
+       C     -4(edx,%eax,4)
+       C          |
+       C  +-------+-------+--
+       C  |  xxx  |       |
+       C  +-------+-------+--
+
+       movq    -8(%ebx,%eax,4), %mm0   C unaligned load
+
+       psllq   %mm6, %mm0
+       decl    %eax
+
+       psrlq   $32, %mm0
+
+       C
+
+       movd    %mm0, (%edx,%eax,4)
+L(start_src_aligned):
+
+       movq    -8(%ebx,%eax,4), %mm1   C src high qword
+       leal    (%edx,%eax,4), %edi
+
+       andl    $4, %edi
+       psrlq   $32, %mm5               C return value
+
+       movq    -16(%ebx,%eax,4), %mm3  C src second highest qword
+       jz      L(start_dst_aligned)
+
+       C dst isn't aligned, subtract 4 to make it so, and pretend the shift
+       C is 32 bits extra.  High limb of dst (marked xxx) handled here
+       C separately.
+       C
+       C  source     -8(ebx,%eax,4)
+       C                  |
+       C  +-------+-------+--
+       C  |      mm1      |
+       C  +-------+-------+--
+       C                0mod8   4mod8
+       C
+       C  dest
+       C     -4(edx,%eax,4)
+       C          |
+       C  +-------+-------+-------+--
+       C  |  xxx  |               |
+       C  +-------+-------+-------+--
+       C        0mod8   4mod8   0mod8
+
+       movq    %mm1, %mm0
+       addl    $32, %ecx               C new shift
+
+       psllq   %mm6, %mm0
+
+       movd    %ecx, %mm6
+       psrlq   $32, %mm0
+
+       C wasted cycle here waiting for %mm0
+
+       movd    %mm0, -4(%edx,%eax,4)
+       subl    $4, %edx
+L(start_dst_aligned):
+
+
+       psllq   %mm6, %mm1
+       negl    %ecx                    C -shift
+
+       addl    $64, %ecx               C 64-shift
+       movq    %mm3, %mm2
+
+       movd    %ecx, %mm7
+       subl    $8, %eax                C size-8
+
+       psrlq   %mm7, %mm3
+
+       por     %mm1, %mm3              C mm3 ready to store
+       jc      L(finish)
+
+
+       C The comments in mpn_rshift apply here too.
+
+       ALIGN(8)
+L(unroll_loop):
+       C eax   counter, limbs
+       C ebx   src
+       C ecx
+       C edx   dst
+       C esi
+       C edi
+       C
+       C mm0
+       C mm1
+       C mm2   src qword from 16(%ebx,%eax,4)
+       C mm3   dst qword ready to store to 24(%edx,%eax,4)
+       C
+       C mm5   return value
+       C mm6   lshift
+       C mm7   rshift
+
+       movq    8(%ebx,%eax,4), %mm0
+       psllq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psrlq   %mm7, %mm0
+
+       movq    %mm3, 24(%edx,%eax,4)   C prev
+       por     %mm2, %mm0
+
+       movq    (%ebx,%eax,4), %mm3     C
+       psllq   %mm6, %mm1              C
+
+       movq    %mm0, 16(%edx,%eax,4)
+       movq    %mm3, %mm2              C
+
+       psrlq   %mm7, %mm3              C
+       subl    $4, %eax
+
+       por     %mm1, %mm3              C
+       jnc     L(unroll_loop)
+
+
+
+L(finish):
+       C eax   -4 to -1 representing respectively 0 to 3 limbs remaining
+
+       testb   $2, %al
+
+       jz      L(finish_no_two)
+
+       movq    8(%ebx,%eax,4), %mm0
+       psllq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psrlq   %mm7, %mm0
+
+       movq    %mm3, 24(%edx,%eax,4)   C prev
+       por     %mm2, %mm0
+
+       movq    %mm1, %mm2
+       movq    %mm0, %mm3
+
+       subl    $2, %eax
+L(finish_no_two):
+
+
+       C eax   -4 or -3 representing respectively 0 or 1 limbs remaining
+       C
+       C mm2   src prev qword, from 16(%ebx,%eax,4)
+       C mm3   dst qword, for 24(%edx,%eax,4)
+
+       testb   $1, %al
+       movd    %mm5, %eax      C retval
+
+       popl    %edi
+       jz      L(finish_zero)
+
+
+       C One extra src limb, destination was aligned.
+       C
+       C                 source                  ebx
+       C                 --+---------------+-------+
+       C                   |      mm2      |       |
+       C                 --+---------------+-------+
+       C
+       C dest         edx+12           edx+4     edx
+       C --+---------------+---------------+-------+
+       C   |      mm3      |               |       |
+       C --+---------------+---------------+-------+
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C One extra src limb, destination was unaligned.
+       C
+       C                 source                  ebx
+       C                 --+---------------+-------+
+       C                   |      mm2      |       |
+       C                 --+---------------+-------+
+       C
+       C         dest         edx+12           edx+4
+       C         --+---------------+---------------+
+       C           |      mm3      |               |
+       C         --+---------------+---------------+
+       C
+       C mm6 = shift+32
+       C mm7 = ecx = 64-(shift+32)
+
+
+       C In both cases there's one extra limb of src to fetch and combine
+       C with mm2 to make a qword at 4(%edx), and in the aligned case
+       C there's an extra limb of dst to be formed from that extra src limb
+       C left shifted.
+
+
+       movd    (%ebx), %mm0
+       psllq   %mm6, %mm2
+
+       movq    %mm3, 12(%edx)
+       psllq   $32, %mm0
+
+       movq    %mm0, %mm1
+       psrlq   %mm7, %mm0
+
+       por     %mm2, %mm0
+       psllq   %mm6, %mm1
+
+       movq    %mm0, 4(%edx)
+       psrlq   $32, %mm1
+
+       andl    $32, %ecx
+       popl    %ebx
+
+       jz      L(finish_one_unaligned)
+
+       movd    %mm1, (%edx)
+L(finish_one_unaligned):
+
+       emms
+
+       ret
+
+
+L(finish_zero):
+
+       C No extra src limbs, destination was aligned.
+       C
+       C                 source          ebx
+       C                 --+---------------+
+       C                   |      mm2      |
+       C                 --+---------------+
+       C
+       C dest          edx+8             edx
+       C --+---------------+---------------+
+       C   |      mm3      |               |
+       C --+---------------+---------------+
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C No extra src limbs, destination was unaligned.
+       C
+       C               source            ebx
+       C                 --+---------------+
+       C                   |      mm2      |
+       C                 --+---------------+
+       C
+       C         dest          edx+8   edx+4
+       C         --+---------------+-------+
+       C           |      mm3      |       |
+       C         --+---------------+-------+
+       C
+       C mm6 = shift+32
+       C mm7 = ecx = 64-(shift+32)
+
+
+       C The movd for the unaligned case writes the same data to 4(%edx)
+       C that the movq does for the aligned case.
+
+
+       movq    %mm3, 8(%edx)
+       andl    $32, %ecx
+
+       psllq   %mm6, %mm2
+       jz      L(finish_zero_unaligned)
+
+       movq    %mm2, (%edx)
+L(finish_zero_unaligned):
+
+       psrlq   $32, %mm2
+       popl    %ebx
+
+       movd    %mm5, %eax      C retval
+
+       movd    %mm2, 4(%edx)
+
+       emms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mmx/mul_1.asm b/mpn/x86/pentium/mmx/mul_1.asm

new file mode 100644 (file)

index 0000000..b9fe77e
--- /dev/null
+++ b/mpn/x86/pentium/mmx/mul_1.asm
@@ -0,0 +1,360 @@
+dnl  Intel Pentium MMX mpn_mul_1 -- mpn by limb multiplication.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C    cycles/limb
+C P5:   12.0   for 32-bit multiplier
+C        7.0   for 16-bit multiplier
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t multiplier);
+C
+C When the multiplier is 16 bits some special case MMX code is used.  Small
+C multipliers might arise reasonably often from mpz_mul_ui etc.  If the size
+C is odd there's roughly a 5 cycle penalty, so times for say size==7 and
+C size==8 end up being quite close.  If src isn't aligned to an 8 byte
+C boundary then one limb is processed separately with roughly a 5 cycle
+C penalty, so in that case it's say size==8 and size==9 which are close.
+C
+C Alternatives:
+C
+C MMX is not believed to be of any use for 32-bit multipliers, since for
+C instance the current method would just have to be more or less duplicated
+C for the high and low halves of the multiplier, and would probably
+C therefore run at about 14 cycles, which is slower than the plain integer
+C at 12.
+C
+C Adding the high and low MMX products using integer code seems best.  An
+C attempt at using paddd and carry bit propagation with pcmpgtd didn't give
+C any joy.  Perhaps something could be done keeping the values signed and
+C thereby avoiding adjustments to make pcmpgtd into an unsigned compare, or
+C perhaps not.
+C
+C Future:
+C
+C An mpn_mul_1c entrypoint would need a double carry out of the low result
+C limb in the 16-bit code, unless it could be assumed the carry fits in 16
+C bits, possibly as carry<multiplier, this being true of a big calculation
+C done piece by piece.  But let's worry about that if/when mul_1c is
+C actually used.
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+
+       ALIGN(8)
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %edx
+
+       cmpl    $1, %ecx
+       jne     L(two_or_more)
+
+       C one limb only
+
+       movl    PARAM_MULTIPLIER, %eax
+       movl    PARAM_DST, %ecx
+
+       mull    (%edx)
+
+       movl    %eax, (%ecx)
+       movl    %edx, %eax
+
+       ret
+
+
+L(two_or_more):
+       C eax   size
+       C ebx
+       C ecx   carry
+       C edx
+       C esi   src
+       C edi
+       C ebp
+
+       pushl   %esi            FRAME_pushl()
+       pushl   %edi            FRAME_pushl()
+
+       movl    %edx, %esi              C src
+       movl    PARAM_DST, %edi
+
+       movl    PARAM_MULTIPLIER, %eax
+       pushl   %ebx            FRAME_pushl()
+
+       leal    (%esi,%ecx,4), %esi     C src end
+       leal    (%edi,%ecx,4), %edi     C dst end
+
+       negl    %ecx                    C -size
+
+       pushl   %ebp            FRAME_pushl()
+       cmpl    $65536, %eax
+
+       jb      L(small)
+
+
+L(big):
+       xorl    %ebx, %ebx              C carry limb
+       sarl    %ecx                    C -size/2
+
+       jnc     L(top)                  C with carry flag clear
+
+
+       C size was odd, process one limb separately
+
+       mull    4(%esi,%ecx,8)          C m * src[0]
+
+       movl    %eax, 4(%edi,%ecx,8)
+       incl    %ecx
+
+       orl     %edx, %ebx              C carry limb, and clear carry flag
+
+
+L(top):
+       C eax
+       C ebx   carry
+       C ecx   counter, negative
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   (scratch carry)
+
+       adcl    $0, %ebx
+       movl    (%esi,%ecx,8), %eax
+
+       mull    PARAM_MULTIPLIER
+
+       movl    %edx, %ebp
+       addl    %eax, %ebx
+
+       adcl    $0, %ebp
+       movl    4(%esi,%ecx,8), %eax
+
+       mull    PARAM_MULTIPLIER
+
+       movl    %ebx, (%edi,%ecx,8)
+       addl    %ebp, %eax
+
+       movl    %eax, 4(%edi,%ecx,8)
+       incl    %ecx
+
+       movl    %edx, %ebx
+       jnz     L(top)
+
+
+       adcl    $0, %ebx
+       popl    %ebp
+
+       movl    %ebx, %eax
+       popl    %ebx
+
+       popl    %edi
+       popl    %esi
+
+       ret
+
+
+L(small):
+       C Special case for 16-bit multiplier.
+       C
+       C eax   multiplier
+       C ebx
+       C ecx   -size
+       C edx   src
+       C esi   src end
+       C edi   dst end
+       C ebp   multiplier
+
+       C size<3 not supported here.  At size==3 we're already a couple of
+       C cycles faster, so there's no threshold as such, just use the MMX
+       C as soon as possible.
+
+       cmpl    $-3, %ecx
+       ja      L(big)
+
+       movd    %eax, %mm7              C m
+       pxor    %mm6, %mm6              C initial carry word
+
+       punpcklwd %mm7, %mm7            C m replicated 2 times
+       addl    $2, %ecx                C -size+2
+
+       punpckldq %mm7, %mm7            C m replicated 4 times
+       andl    $4, %edx                C test alignment, clear carry flag
+
+       movq    %mm7, %mm0              C m
+       jz      L(small_entry)
+
+
+       C Source is unaligned, process one limb separately.
+       C
+       C Plain integer code is used here, since it's smaller and is about
+       C the same 13 cycles as an mmx block would be.
+       C
+       C An "addl $1,%ecx" doesn't clear the carry flag when size==3, hence
+       C the use of separate incl and orl.
+
+       mull    -8(%esi,%ecx,4)         C m * src[0]
+
+       movl    %eax, -8(%edi,%ecx,4)   C dst[0]
+       incl    %ecx                    C one limb processed
+
+       movd    %edx, %mm6              C initial carry
+
+       orl     %eax, %eax              C clear carry flag
+       jmp     L(small_entry)
+
+
+C The scheduling here is quite tricky, since so many instructions have
+C pairing restrictions.  In particular the js won't pair with a movd, and
+C can't be paired with an adc since it wants flags from the inc, so
+C instructions are rotated to the top of the loop to find somewhere useful
+C for it.
+C
+C Trouble has been taken to avoid overlapping successive loop iterations,
+C since that would greatly increase the size of the startup and finishup
+C code.  Actually there's probably not much advantage to be had from
+C overlapping anyway, since the difficulties are mostly with pairing, not
+C with latencies as such.
+C
+C In the comments x represents the src data and m the multiplier (16
+C bits, but replicated 4 times).
+C
+C The m signs calculated in %mm3 are a loop invariant and could be held in
+C say %mm5, but that would save only one instruction and hence be no faster.
+
+L(small_top):
+       C eax   l.low, then l.high
+       C ebx   (h.low)
+       C ecx   counter, -size+2 to 0 or 1
+       C edx   (h.high)
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp
+       C
+       C %mm0  (high products)
+       C %mm1  (low products)
+       C %mm2  (adjust for m using x signs)
+       C %mm3  (adjust for x using m signs)
+       C %mm4
+       C %mm5
+       C %mm6  h.low, then carry
+       C %mm7  m replicated 4 times
+
+       movd    %mm6, %ebx              C h.low
+       psrlq   $32, %mm1               C l.high
+
+       movd    %mm0, %edx              C h.high
+       movq    %mm0, %mm6              C new c
+
+       adcl    %eax, %ebx
+       incl    %ecx
+
+       movd    %mm1, %eax              C l.high
+       movq    %mm7, %mm0
+
+       adcl    %eax, %edx
+       movl    %ebx, -16(%edi,%ecx,4)
+
+       movl    %edx, -12(%edi,%ecx,4)
+       psrlq   $32, %mm6               C c
+
+L(small_entry):
+       pmulhw  -8(%esi,%ecx,4), %mm0   C h = (x*m).high
+       movq    %mm7, %mm1
+
+       pmullw  -8(%esi,%ecx,4), %mm1   C l = (x*m).low
+       movq    %mm7, %mm3
+
+       movq    -8(%esi,%ecx,4), %mm2   C x
+       psraw   $15, %mm3               C m signs
+
+       pand    -8(%esi,%ecx,4), %mm3   C x selected by m signs
+       psraw   $15, %mm2               C x signs
+
+       paddw   %mm3, %mm0              C add x to h if m neg
+       pand    %mm7, %mm2              C m selected by x signs
+
+       paddw   %mm2, %mm0              C add m to h if x neg
+       incl    %ecx
+
+       movd    %mm1, %eax              C l.low
+       punpcklwd %mm0, %mm6            C c + h.low << 16
+
+       psrlq   $16, %mm0               C h.high
+       js      L(small_top)
+
+
+
+
+       movd    %mm6, %ebx              C h.low
+       psrlq   $32, %mm1               C l.high
+
+       adcl    %eax, %ebx
+       popl    %ebp            FRAME_popl()
+
+       movd    %mm0, %edx              C h.high
+       psrlq   $32, %mm0               C l.high
+
+       movd    %mm1, %eax              C l.high
+
+       adcl    %eax, %edx
+       movl    %ebx, -12(%edi,%ecx,4)
+
+       movd    %mm0, %eax              C c
+
+       adcl    $0, %eax
+       movl    %edx, -8(%edi,%ecx,4)
+
+       orl     %ecx, %ecx
+       jnz     L(small_done)           C final %ecx==1 means even, ==0 odd
+
+
+       C Size odd, one extra limb to process.
+       C Plain integer code is used here, since it's smaller and is about
+       C the same speed as another mmx block would be.
+
+       movl    %eax, %ecx
+       movl    PARAM_MULTIPLIER, %eax
+
+       mull    -4(%esi)
+
+       addl    %ecx, %eax
+
+       adcl    $0, %edx
+       movl    %eax, -4(%edi)
+
+       movl    %edx, %eax
+L(small_done):
+       popl    %ebx
+
+       popl    %edi
+       popl    %esi
+
+       emms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mmx/rshift.asm b/mpn/x86/pentium/mmx/rshift.asm

new file mode 100644 (file)

index 0000000..f50b8ab
--- /dev/null
+++ b/mpn/x86/pentium/mmx/rshift.asm
@@ -0,0 +1,457 @@
+dnl  Intel P5 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 1.75 cycles/limb.
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C Shift src,size right by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the left.  Return the bits shifted out at the
+C right.
+C
+C It takes 6 mmx instructions to process 2 limbs, making 1.5 cycles/limb,
+C and with a 4 limb loop and 1 cycle of loop overhead the total is 1.75 c/l.
+C
+C Full speed depends on source and destination being aligned.  Unaligned mmx
+C loads and stores on P5 don't pair and have a 2 cycle penalty.  Some hairy
+C setups and finish-ups are done to ensure alignment for the loop.
+C
+C MMX shifts work out a bit faster even for the simple loop.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+deflit(`FRAME',0)
+
+dnl  Minimum 5, because the unrolled loop can't handle less.
+deflit(UNROLL_THRESHOLD, 5)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_rshift)
+
+       pushl   %ebx
+       pushl   %edi
+deflit(`FRAME',8)
+
+       movl    PARAM_SIZE, %eax
+       movl    PARAM_DST, %edx
+
+       movl    PARAM_SRC, %ebx
+       movl    PARAM_SHIFT, %ecx
+
+       cmp     $UNROLL_THRESHOLD, %eax
+       jae     L(unroll)
+
+       decl    %eax
+       movl    (%ebx), %edi            C src low limb
+
+       jnz     L(simple)
+
+       shrdl(  %cl, %edi, %eax)        C eax was decremented to zero
+
+       shrl    %cl, %edi
+
+       movl    %edi, (%edx)            C dst low limb
+       popl    %edi                    C risk of data cache bank clash
+
+       popl    %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(simple):
+       C eax   size-1
+       C ebx   src
+       C ecx   shift
+       C edx   dst
+       C esi
+       C edi
+       C ebp
+deflit(`FRAME',8)
+
+       movd    (%ebx), %mm5            C src[0]
+       leal    (%ebx,%eax,4), %ebx     C &src[size-1]
+
+       movd    %ecx, %mm6              C rshift
+       leal    -4(%edx,%eax,4), %edx   C &dst[size-2]
+
+       psllq   $32, %mm5
+       negl    %eax
+
+
+C This loop is 5 or 8 cycles, with every second load unaligned and a wasted
+C cycle waiting for the mm0 result to be ready.  For comparison a shrdl is 4
+C cycles and would be 8 in a simple loop.  Using mmx helps the return value
+C and last limb calculations too.
+
+L(simple_top):
+       C eax   counter, limbs, negative
+       C ebx   &src[size-1]
+       C ecx   return value
+       C edx   &dst[size-2]
+       C
+       C mm0   scratch
+       C mm5   return value
+       C mm6   shift
+
+       movq    (%ebx,%eax,4), %mm0
+       incl    %eax
+
+       psrlq   %mm6, %mm0
+
+       movd    %mm0, (%edx,%eax,4)
+       jnz     L(simple_top)
+
+
+       movd    (%ebx), %mm0
+       psrlq   %mm6, %mm5              C return value
+
+       psrlq   %mm6, %mm0
+       popl    %edi
+
+       movd    %mm5, %eax
+       popl    %ebx
+
+       movd    %mm0, 4(%edx)
+
+       emms
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(unroll):
+       C eax   size
+       C ebx   src
+       C ecx   shift
+       C edx   dst
+       C esi
+       C edi
+       C ebp
+deflit(`FRAME',8)
+
+       movd    (%ebx), %mm5            C src[0]
+       movl    $4, %edi
+
+       movd    %ecx, %mm6              C rshift
+       testl   %edi, %ebx
+
+       psllq   $32, %mm5
+       jz      L(start_src_aligned)
+
+
+       C src isn't aligned, process low limb separately (marked xxx) and
+       C step src and dst by one limb, making src aligned.
+       C
+       C source                  ebx
+       C --+-------+-------+-------+
+       C           |          xxx  |
+       C --+-------+-------+-------+
+       C         4mod8   0mod8   4mod8
+       C
+       C         dest            edx
+       C         --+-------+-------+
+       C           |       |  xxx  |
+       C         --+-------+-------+
+
+       movq    (%ebx), %mm0            C unaligned load
+
+       psrlq   %mm6, %mm0
+       addl    $4, %ebx
+
+       decl    %eax
+
+       movd    %mm0, (%edx)
+       addl    $4, %edx
+L(start_src_aligned):
+
+
+       movq    (%ebx), %mm1
+       testl   %edi, %edx
+
+       psrlq   %mm6, %mm5              C retval
+       jz      L(start_dst_aligned)
+
+       C dst isn't aligned, add 4 to make it so, and pretend the shift is
+       C 32 bits extra.  Low limb of dst (marked xxx) handled here
+       C separately.
+       C
+       C          source          ebx
+       C          --+-------+-------+
+       C            |      mm1      |
+       C          --+-------+-------+
+       C                  4mod8   0mod8
+       C
+       C  dest                    edx
+       C  --+-------+-------+-------+
+       C                    |  xxx  |
+       C  --+-------+-------+-------+
+       C          4mod8   0mod8   4mod8
+
+       movq    %mm1, %mm0
+       addl    $32, %ecx               C new shift
+
+       psrlq   %mm6, %mm0
+
+       movd    %ecx, %mm6
+
+       movd    %mm0, (%edx)
+       addl    $4, %edx
+L(start_dst_aligned):
+
+
+       movq    8(%ebx), %mm3
+       negl    %ecx
+
+       movq    %mm3, %mm2              C mm2 src qword
+       addl    $64, %ecx
+
+       movd    %ecx, %mm7
+       psrlq   %mm6, %mm1
+
+       leal    -12(%ebx,%eax,4), %ebx
+       leal    -20(%edx,%eax,4), %edx
+
+       psllq   %mm7, %mm3
+       subl    $7, %eax                C size-7
+
+       por     %mm1, %mm3              C mm3 ready to store
+       negl    %eax                    C -(size-7)
+
+       jns     L(finish)
+
+
+       C This loop is the important bit, the rest is just support.  Careful
+       C instruction scheduling achieves the claimed 1.75 c/l.  The
+       C relevant parts of the pairing rules are:
+       C
+       C - mmx loads and stores execute only in the U pipe
+       C - only one mmx shift in a pair
+       C - wait one cycle before storing an mmx register result
+       C - the usual address generation interlock
+       C
+       C Two qword calculations are slightly interleaved.  The instructions
+       C marked "C" belong to the second qword, and the "C prev" one is for
+       C the second qword from the previous iteration.
+
+       ALIGN(8)
+L(unroll_loop):
+       C eax   counter, limbs, negative
+       C ebx   &src[size-12]
+       C ecx
+       C edx   &dst[size-12]
+       C esi
+       C edi
+       C
+       C mm0
+       C mm1
+       C mm2   src qword from -8(%ebx,%eax,4)
+       C mm3   dst qword ready to store to -8(%edx,%eax,4)
+       C
+       C mm5   return value
+       C mm6   rshift
+       C mm7   lshift
+
+       movq    (%ebx,%eax,4), %mm0
+       psrlq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psllq   %mm7, %mm0
+
+       movq    %mm3, -8(%edx,%eax,4)   C prev
+       por     %mm2, %mm0
+
+       movq    8(%ebx,%eax,4), %mm3    C
+       psrlq   %mm6, %mm1              C
+
+       movq    %mm0, (%edx,%eax,4)
+       movq    %mm3, %mm2              C
+
+       psllq   %mm7, %mm3              C
+       addl    $4, %eax
+
+       por     %mm1, %mm3              C
+       js      L(unroll_loop)
+
+
+L(finish):
+       C eax   0 to 3 representing respectively 3 to 0 limbs remaining
+
+       testb   $2, %al
+
+       jnz     L(finish_no_two)
+
+       movq    (%ebx,%eax,4), %mm0
+       psrlq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psllq   %mm7, %mm0
+
+       movq    %mm3, -8(%edx,%eax,4)   C prev
+       por     %mm2, %mm0
+
+       movq    %mm1, %mm2
+       movq    %mm0, %mm3
+
+       addl    $2, %eax
+L(finish_no_two):
+
+
+       C eax   2 or 3 representing respectively 1 or 0 limbs remaining
+       C
+       C mm2   src prev qword, from -8(%ebx,%eax,4)
+       C mm3   dst qword, for -8(%edx,%eax,4)
+
+       testb   $1, %al
+       popl    %edi
+
+       movd    %mm5, %eax      C retval
+       jnz     L(finish_zero)
+
+
+       C One extra limb, destination was aligned.
+       C
+       C source                ebx
+       C +-------+---------------+--
+       C |       |      mm2      |
+       C +-------+---------------+--
+       C
+       C dest                                  edx
+       C +-------+---------------+---------------+--
+       C |       |               |      mm3      |
+       C +-------+---------------+---------------+--
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C One extra limb, destination was unaligned.
+       C
+       C source                ebx
+       C +-------+---------------+--
+       C |       |      mm2      |
+       C +-------+---------------+--
+       C
+       C dest                          edx
+       C +---------------+---------------+--
+       C |               |      mm3      |
+       C +---------------+---------------+--
+       C
+       C mm6 = shift+32
+       C mm7 = ecx = 64-(shift+32)
+
+
+       C In both cases there's one extra limb of src to fetch and combine
+       C with mm2 to make a qword at 8(%edx), and in the aligned case
+       C there's a further extra limb of dst to be formed.
+
+
+       movd    8(%ebx), %mm0
+       psrlq   %mm6, %mm2
+
+       movq    %mm0, %mm1
+       psllq   %mm7, %mm0
+
+       movq    %mm3, (%edx)
+       por     %mm2, %mm0
+
+       psrlq   %mm6, %mm1
+       andl    $32, %ecx
+
+       popl    %ebx
+       jz      L(finish_one_unaligned)
+
+       C dst was aligned, must store one extra limb
+       movd    %mm1, 16(%edx)
+L(finish_one_unaligned):
+
+       movq    %mm0, 8(%edx)
+
+       emms
+
+       ret
+
+
+L(finish_zero):
+
+       C No extra limbs, destination was aligned.
+       C
+       C source        ebx
+       C +---------------+--
+       C |      mm2      |
+       C +---------------+--
+       C
+       C dest                        edx+4
+       C +---------------+---------------+--
+       C |               |      mm3      |
+       C +---------------+---------------+--
+       C
+       C mm6 = shift
+       C mm7 = ecx = 64-shift
+
+
+       C No extra limbs, destination was unaligned.
+       C
+       C source        ebx
+       C +---------------+--
+       C |      mm2      |
+       C +---------------+--
+       C
+       C dest                edx+4
+       C +-------+---------------+--
+       C |       |      mm3      |
+       C +-------+---------------+--
+       C
+       C mm6 = shift+32
+       C mm7 = 64-(shift+32)
+
+
+       C The movd for the unaligned case is clearly the same data as the
+       C movq for the aligned case, it's just a choice between whether one
+       C or two limbs should be written.
+
+
+       movq    %mm3, 4(%edx)
+       psrlq   %mm6, %mm2
+
+       movd    %mm2, 12(%edx)
+       andl    $32, %ecx
+
+       popl    %ebx
+       jz      L(finish_zero_unaligned)
+
+       movq    %mm2, 12(%edx)
+L(finish_zero_unaligned):
+
+       emms
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mod_34lsub1.asm b/mpn/x86/pentium/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..201081a
--- /dev/null
+++ b/mpn/x86/pentium/mod_34lsub1.asm
@@ -0,0 +1,181 @@
+dnl  Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 1.66 cycles/limb
+
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %edx
+
+       subl    $2, %ecx
+       ja      L(three_or_more)
+
+       movl    (%edx), %eax
+       jne     L(one)
+
+
+       movl    4(%edx), %ecx
+       movl    %eax, %edx
+
+       shrl    $24, %edx
+       andl    $0xFFFFFF, %eax
+
+       addl    %edx, %eax
+       movl    %ecx, %edx
+
+       shrl    $16, %ecx
+       andl    $0xFFFF, %edx
+
+       shll    $8, %edx
+       addl    %ecx, %eax
+
+       addl    %edx, %eax
+
+L(one):
+       ret
+
+
+L(three_or_more):
+       C eax
+       C ebx
+       C ecx   size-2
+       C edx   src
+       C esi
+       C edi
+       C ebp
+
+       pushl   %ebx    FRAME_pushl()
+       pushl   %esi    FRAME_pushl()
+
+       pushl   %edi    FRAME_pushl()
+       pushl   %ebp    FRAME_pushl()
+
+       xorl    %esi, %esi              C 0mod3
+       xorl    %edi, %edi              C 1mod3
+
+       xorl    %ebp, %ebp              C 2mod3, and clear carry
+
+L(top):
+       C eax   scratch
+       C ebx   scratch
+       C ecx   counter, limbs
+       C edx   src
+       C esi   0mod3
+       C edi   1mod3
+       C ebp   2mod3
+
+       movl    (%edx), %eax
+       movl    4(%edx), %ebx
+
+       adcl    %eax, %esi
+       movl    8(%edx), %eax
+
+       adcl    %ebx, %edi
+       leal    12(%edx), %edx
+
+       adcl    %eax, %ebp
+       leal    -2(%ecx), %ecx
+
+       decl    %ecx
+       jg      L(top)
+
+
+       C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
+
+       movl    $0xFFFFFFFF, %ebx       C mask
+       incl    %ecx
+
+       js      L(combine)              C 0 more
+
+       movl    (%edx), %eax
+       movl    $0xFFFFFF00, %ebx
+
+       adcl    %eax, %esi
+       decl    %ecx
+
+       js      L(combine)              C 1 more
+
+       movl    4(%edx), %eax
+       movl    $0xFFFF0000, %ebx
+
+       adcl    %eax, %edi
+
+
+
+L(combine):
+       C eax
+       C ebx   mask
+       C ecx
+       C edx
+       C esi   0mod3
+       C edi   1mod3
+       C ebp   2mod3
+
+       sbbl    %ecx, %ecx              C carry
+       movl    %esi, %eax              C 0mod3
+
+       andl    %ebx, %ecx              C masked for position
+       andl    $0xFFFFFF, %eax         C 0mod3 low
+
+       shrl    $24, %esi               C 0mod3 high
+       subl    %ecx, %eax              C apply carry
+
+       addl    %esi, %eax              C apply 0mod3
+       movl    %edi, %ebx              C 1mod3
+
+       shrl    $16, %edi               C 1mod3 high
+       andl    $0x0000FFFF, %ebx
+
+       shll    $8, %ebx                C 1mod3 low
+       addl    %edi, %eax              C apply 1mod3 high
+
+       addl    %ebx, %eax              C apply 1mod3 low
+       movl    %ebp, %ebx              C 2mod3
+
+       shrl    $8, %ebp                C 2mod3 high
+       andl    $0xFF, %ebx
+
+       shll    $16, %ebx               C 2mod3 low
+       addl    %ebp, %eax              C apply 2mod3 high
+
+       addl    %ebx, %eax              C apply 2mod3 low
+
+       popl    %ebp
+       popl    %edi
+
+       popl    %esi
+       popl    %ebx
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mode1o.asm b/mpn/x86/pentium/mode1o.asm

new file mode 100644 (file)

index 0000000..afb0ebe
--- /dev/null
+++ b/mpn/x86/pentium/mode1o.asm
@@ -0,0 +1,255 @@
+dnl  Intel Pentium mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 23.0 cycles/limb
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C There seems no way to pair up the two lone instructions in the main loop.
+C
+C The special case for size==1 saves about 20 cycles (non-PIC), making it
+C the same as mpn_mod_1, and in fact making modexact faster than mod_1 at
+C all sizes.
+C
+C Alternatives:
+C
+C Using mmx for the multiplies might be possible, with pmullw and pmulhw
+C having just 3 cycle latencies, but carry bit handling would probably be
+C complicated.
+
+defframe(PARAM_CARRY,  16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+
+dnl  re-using parameter space
+define(VAR_INVERSE,`PARAM_SIZE')
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1c_odd)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       movl    PARAM_CARRY, %edx
+
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1_odd)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       xorl    %edx, %edx              C carry
+
+L(start_1c):
+
+ifdef(`PIC',`
+       call    L(here)         FRAME_pushl()
+L(here):
+
+       shrl    %eax                    C d/2
+       movl    (%esp), %ecx            C eip
+
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ecx
+       movl    %ebx, (%esp)            C push ebx
+
+       andl    $127, %eax
+       movl    PARAM_SIZE, %ebx
+
+       movl    binvert_limb_table@GOT(%ecx), %ecx
+       subl    $2, %ebx
+
+       movb    (%eax,%ecx), %cl                        C inv 8 bits
+       jc      L(one_limb)
+
+',`
+dnl non-PIC
+       shrl    %eax                    C d/2
+       pushl   %ebx            FRAME_pushl()
+
+       movl    PARAM_SIZE, %ebx
+       andl    $127, %eax
+
+       subl    $2, %ebx
+       jc      L(one_limb)
+
+       movb    binvert_limb_table(%eax), %cl           C inv 8 bits
+')
+
+       movl    %ecx, %eax
+       addl    %ecx, %ecx              C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+
+       imull   PARAM_DIVISOR, %eax     C inv*inv*d
+
+       subl    %eax, %ecx              C inv = 2*inv - inv*inv*d
+
+       movl    %ecx, %eax
+       addl    %ecx, %ecx              C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+
+       imull   PARAM_DIVISOR, %eax     C inv*inv*d
+
+       subl    %eax, %ecx              C inv = 2*inv - inv*inv*d
+       pushl   %esi            FRAME_pushl()
+
+       ASSERT(e,`      C d*inv == 1 mod 2^GMP_LIMB_BITS
+       movl    %ecx, %eax
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax')
+
+       movl    PARAM_SRC, %esi
+       movl    %ecx, VAR_INVERSE
+
+       movl    (%esi), %eax            C src[0]
+       leal    4(%esi,%ebx,4), %esi    C &src[size-1]
+
+       xorl    $-1, %ebx               C -(size-1)
+       ASSERT(nz)
+       jmp     L(entry)
+
+
+C The use of VAR_INVERSE means only a store is needed for that value, rather
+C than a push and pop of say %edi.
+
+       ALIGN(16)
+L(top):
+       C eax   scratch, low product
+       C ebx   counter, limbs, negative
+       C ecx   carry bit
+       C edx   scratch, high product
+       C esi   &src[size-1]
+       C edi
+       C ebp
+
+       mull    PARAM_DIVISOR           C h:dummy = q*d
+
+       movl    (%esi,%ebx,4), %eax     C src[i]
+       subl    %ecx, %edx              C h -= -c
+
+L(entry):
+       subl    %edx, %eax              C s = src[i] - h
+
+       sbbl    %ecx, %ecx              C new -c (0 or -1)
+
+       imull   VAR_INVERSE, %eax       C q = s*i
+
+       incl    %ebx
+       jnz     L(top)
+
+
+       mull    PARAM_DIVISOR
+
+       movl    (%esi), %eax            C src high
+       subl    %ecx, %edx              C h -= -c
+
+       cmpl    PARAM_DIVISOR, %eax
+
+       jbe     L(skip_last)
+deflit(FRAME_LAST,FRAME)
+
+
+       subl    %edx, %eax              C s = src[i] - h
+       popl    %esi            FRAME_popl()
+
+       sbbl    %ecx, %ecx              C c (0 or -1)
+       popl    %ebx            FRAME_popl()
+
+       imull   VAR_INVERSE, %eax       C q = s*i
+
+       mull    PARAM_DIVISOR           C h:dummy = q*d
+
+       movl    %edx, %eax
+
+       subl    %ecx, %eax
+
+       ret
+
+
+C When high<divisor can skip last step.
+
+L(skip_last):
+deflit(`FRAME',FRAME_LAST)
+       C eax   src high
+       C ebx
+       C ecx
+       C edx   r
+       C esi
+
+       subl    %eax, %edx      C r-s
+       popl    %esi            FRAME_popl()
+
+       sbbl    %eax, %eax      C -1 if underflow
+       movl    PARAM_DIVISOR, %ebx
+
+       andl    %ebx, %eax      C divisor if underflow
+       popl    %ebx            FRAME_popl()
+
+       addl    %edx, %eax      C addback if underflow
+
+       ret
+
+
+C Special case for size==1 using a division for r = c-a mod d.
+C Could look for a-c<d and save a division sometimes, but that doesn't seem
+C worth bothering about.
+
+L(one_limb):
+deflit(`FRAME',4)
+       C eax
+       C ebx   size-2 (==-1)
+       C ecx
+       C edx   carry
+       C esi   src end
+       C edi
+       C ebp
+
+       movl    %edx, %eax
+       movl    PARAM_SRC, %edx
+
+       movl    PARAM_DIVISOR, %ecx
+       popl    %ebx            FRAME_popl()
+
+       subl    (%edx), %eax            C c-a
+
+       sbbl    %edx, %edx
+       decl    %ecx                    C d-1
+
+       andl    %ecx, %edx              C b*d+c-a if c<a, or c-a if c>=a
+
+       divl    PARAM_DIVISOR
+
+       movl    %edx, %eax
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mul_1.asm b/mpn/x86/pentium/mul_1.asm

new file mode 100644 (file)

index 0000000..c6b255c
--- /dev/null
+++ b/mpn/x86/pentium/mul_1.asm
@@ -0,0 +1,166 @@
+dnl  Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 12.0 cycles/limb
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t multiplier);
+C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       mp_limb_t multiplier, mp_limb_t carry);
+C
+
+defframe(PARAM_CARRY,     20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_mul_1c)
+deflit(`FRAME',0)
+
+       movl    PARAM_CARRY, %ecx
+       pushl   %esi            FRAME_pushl()
+
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       ALIGN(8)
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+
+       xorl    %ecx, %ecx
+       pushl   %esi            FRAME_pushl()
+
+L(start_1c):
+       movl    PARAM_SRC, %esi
+       movl    PARAM_SIZE, %eax
+
+       shrl    %eax
+       jnz     L(two_or_more)
+
+
+       C one limb only
+
+       movl    (%esi), %eax
+
+       mull    PARAM_MULTIPLIER
+
+       addl    %eax, %ecx
+       movl    PARAM_DST, %eax
+
+       adcl    $0, %edx
+       popl    %esi
+
+       movl    %ecx, (%eax)
+       movl    %edx, %eax
+
+       ret
+
+
+L(two_or_more):
+       C eax   size/2
+       C ebx
+       C ecx   carry
+       C edx
+       C esi   src
+       C edi
+       C ebp
+
+       pushl   %edi            FRAME_pushl()
+       pushl   %ebx            FRAME_pushl()
+
+       movl    PARAM_DST, %edi
+       leal    -1(%eax), %ebx          C size/2-1
+
+       notl    %ebx                    C -size, preserve carry
+
+       leal    (%esi,%eax,8), %esi     C src end
+       leal    (%edi,%eax,8), %edi     C dst end
+
+       pushl   %ebp            FRAME_pushl()
+       jnc     L(top)
+
+
+       C size was odd, process one limb separately
+
+       movl    (%esi,%ebx,8), %eax
+       addl    $4, %esi
+
+       mull    PARAM_MULTIPLIER
+
+       addl    %ecx, %eax
+       movl    %edx, %ecx
+
+       movl    %eax, (%edi,%ebx,8)
+       leal    4(%edi), %edi
+
+
+L(top):
+       C eax
+       C ebx   counter, negative
+       C ecx   carry
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp
+
+       adcl    $0, %ecx
+       movl    (%esi,%ebx,8), %eax
+
+       mull    PARAM_MULTIPLIER
+
+       movl    %edx, %ebp
+       addl    %eax, %ecx
+
+       adcl    $0, %ebp
+       movl    4(%esi,%ebx,8), %eax
+
+       mull    PARAM_MULTIPLIER
+
+       movl    %ecx, (%edi,%ebx,8)
+       addl    %ebp, %eax
+
+       movl    %eax, 4(%edi,%ebx,8)
+       incl    %ebx
+
+       movl    %edx, %ecx
+       jnz     L(top)
+
+
+       adcl    $0, %ecx
+       popl    %ebp
+
+       movl    %ecx, %eax
+       popl    %ebx
+
+       popl    %edi
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mul_2.asm b/mpn/x86/pentium/mul_2.asm

new file mode 100644 (file)

index 0000000..36a025c
--- /dev/null
+++ b/mpn/x86/pentium/mul_2.asm
@@ -0,0 +1,139 @@
+dnl  Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 24.0 cycles/limb
+
+
+C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_srcptr mult);
+C
+C At 24 c/l this is only 2 cycles faster than a separate mul_1 and addmul_1,
+C but has the advantage of making just one pass over the operands.
+C
+C There's not enough registers to use PARAM_MULT directly, so the multiplier
+C limbs are transferred to local variables on the stack.
+
+defframe(PARAM_MULT, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,   8)
+defframe(PARAM_DST,   4)
+
+dnl  re-use parameter space
+define(VAR_MULT_LOW, `PARAM_SRC')
+define(VAR_MULT_HIGH,`PARAM_DST')
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_mul_2)
+deflit(`FRAME',0)
+
+       pushl   %esi            FRAME_pushl()
+       pushl   %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %esi
+       movl    PARAM_DST, %edi
+
+       movl    PARAM_MULT, %eax
+       movl    PARAM_SIZE, %ecx
+
+       movl    4(%eax), %edx           C mult high
+       movl    (%eax), %eax            C mult low
+
+       movl    %eax, VAR_MULT_LOW
+       movl    %edx, VAR_MULT_HIGH
+
+       pushl   %ebx            FRAME_pushl()
+       pushl   %ebp            FRAME_pushl()
+
+       mull    (%esi)                  C src[0] * mult[0]
+
+       movl    %eax, %ebp              C in case src==dst
+       movl    (%esi), %eax            C src[0]
+
+       movl    %ebp, (%edi)            C dst[0]
+       movl    %edx, %ebx              C initial low carry
+
+       xorl    %ebp, %ebp              C initial high carry
+       leal    (%edi,%ecx,4), %edi     C dst end
+
+       mull    VAR_MULT_HIGH           C src[0] * mult[1]
+
+       subl    $2, %ecx                C size-2
+       js      L(done)
+
+       leal    8(%esi,%ecx,4), %esi    C &src[size]
+       xorl    $-1, %ecx               C -(size-1)
+
+
+
+L(top):
+       C eax   low prod
+       C ebx   low carry
+       C ecx   counter, negative
+       C edx   high prod
+       C esi   src end
+       C edi   dst end
+       C ebp   high carry (0 or -1)
+
+       andl    $1, %ebp                C 1 or 0
+       addl    %eax, %ebx
+
+       adcl    %edx, %ebp
+       ASSERT(nc)
+       movl    (%esi,%ecx,4), %eax
+
+       mull    VAR_MULT_LOW
+
+       addl    %eax, %ebx              C low carry
+       movl    (%esi,%ecx,4), %eax
+
+       adcl    %ebp, %edx              C high carry
+       movl    %ebx, (%edi,%ecx,4)
+
+       sbbl    %ebp, %ebp              C new high carry, -1 or 0
+       movl    %edx, %ebx              C new low carry
+
+       mull    VAR_MULT_HIGH
+
+       incl    %ecx
+       jnz     L(top)
+
+
+L(done):
+       andl    $1, %ebp                C 1 or 0
+       addl    %ebx, %eax
+
+       adcl    %ebp, %edx
+       ASSERT(nc)
+       movl    %eax, (%edi)            C store carry low
+
+       movl    %edx, %eax              C return carry high
+
+       popl    %ebp
+       popl    %ebx
+
+       popl    %edi
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/mul_basecase.asm b/mpn/x86/pentium/mul_basecase.asm

new file mode 100644 (file)

index 0000000..fd24fdf
--- /dev/null
+++ b/mpn/x86/pentium/mul_basecase.asm
@@ -0,0 +1,132 @@
+dnl  Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
+
+dnl  Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 14.2 cycles/crossproduct (approx)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xsize,
+C                        mp_srcptr yp, mp_size_t ysize);
+
+defframe(PARAM_YSIZE, 20)
+defframe(PARAM_YP,    16)
+defframe(PARAM_XSIZE, 12)
+defframe(PARAM_XP,    8)
+defframe(PARAM_WP,    4)
+
+defframe(VAR_COUNTER, -4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_mul_basecase)
+
+       pushl   %eax                    C dummy push for allocating stack slot
+       pushl   %esi
+       pushl   %ebp
+       pushl   %edi
+deflit(`FRAME',16)
+
+       movl    PARAM_XP,%esi
+       movl    PARAM_WP,%edi
+       movl    PARAM_YP,%ebp
+
+       movl    (%esi),%eax             C load xp[0]
+       mull    (%ebp)                  C multiply by yp[0]
+       movl    %eax,(%edi)             C store to wp[0]
+       movl    PARAM_XSIZE,%ecx        C xsize
+       decl    %ecx                    C If xsize = 1, ysize = 1 too
+       jz      L(done)
+
+       movl    PARAM_XSIZE,%eax
+       pushl   %ebx
+FRAME_pushl()
+       movl    %edx,%ebx
+       leal    (%esi,%eax,4),%esi      C make xp point at end
+       leal    (%edi,%eax,4),%edi      C offset wp by xsize
+       negl    %ecx                    C negate j size/index for inner loop
+       xorl    %eax,%eax               C clear carry
+
+       ALIGN(8)
+L(oop1):       adcl    $0,%ebx
+       movl    (%esi,%ecx,4),%eax      C load next limb at xp[j]
+       mull    (%ebp)
+       addl    %ebx,%eax
+       movl    %eax,(%edi,%ecx,4)
+       incl    %ecx
+       movl    %edx,%ebx
+       jnz     L(oop1)
+
+       adcl    $0,%ebx
+       movl    PARAM_YSIZE,%eax
+       movl    %ebx,(%edi)             C most significant limb of product
+       addl    $4,%edi                 C increment wp
+       decl    %eax
+       jz      L(skip)
+       movl    %eax,VAR_COUNTER        C set index i to ysize
+
+L(outer):
+       addl    $4,%ebp                 C make ebp point to next y limb
+       movl    PARAM_XSIZE,%ecx
+       negl    %ecx
+       xorl    %ebx,%ebx
+
+       C code at 0x61 here, close enough to aligned
+L(oop2):
+       adcl    $0,%ebx
+       movl    (%esi,%ecx,4),%eax
+       mull    (%ebp)
+       addl    %ebx,%eax
+       movl    (%edi,%ecx,4),%ebx
+       adcl    $0,%edx
+       addl    %eax,%ebx
+       movl    %ebx,(%edi,%ecx,4)
+       incl    %ecx
+       movl    %edx,%ebx
+       jnz     L(oop2)
+
+       adcl    $0,%ebx
+
+       movl    %ebx,(%edi)
+       addl    $4,%edi
+       movl    VAR_COUNTER,%eax
+       decl    %eax
+       movl    %eax,VAR_COUNTER
+       jnz     L(outer)
+
+L(skip):
+       popl    %ebx
+       popl    %edi
+       popl    %ebp
+       popl    %esi
+       addl    $4,%esp
+       ret
+
+L(done):
+       movl    %edx,4(%edi)    C store to wp[1]
+       popl    %edi
+       popl    %ebp
+       popl    %esi
+       popl    %eax            C dummy pop for deallocating stack slot
+       ret
+
+EPILOGUE()
+
diff --git a/mpn/x86/pentium/popcount.asm b/mpn/x86/pentium/popcount.asm

new file mode 100644 (file)

index 0000000..df53bb8
--- /dev/null
+++ b/mpn/x86/pentium/popcount.asm
@@ -0,0 +1,123 @@
+dnl  Intel P5 mpn_popcount -- mpn bit population count.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 8.0 cycles/limb
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C
+C An arithmetic approach has been found to be slower than the table lookup,
+C due to needing too many instructions.
+
+C The slightly strange quoting here helps the renaming done by tune/many.pl.
+deflit(TABLE_NAME,
+m4_assert_defined(`GSYM_PREFIX')
+GSYM_PREFIX`'mpn_popcount``'_table')
+
+       RODATA
+       ALIGN(8)
+       GLOBL   TABLE_NAME
+TABLE_NAME:
+forloop(i,0,255,
+`      .byte   m4_popcount(i)
+')
+
+defframe(PARAM_SIZE,8)
+defframe(PARAM_SRC, 4)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_popcount)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       pushl   %esi    FRAME_pushl()
+
+ifdef(`PIC',`
+       pushl   %ebx    FRAME_pushl()
+       pushl   %ebp    FRAME_pushl()
+
+       call    L(here)
+L(here):
+       popl    %ebp
+       shll    %ecx            C size in byte pairs
+
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
+       movl    PARAM_SRC, %esi
+
+       xorl    %eax, %eax      C total
+       xorl    %ebx, %ebx      C byte
+
+       movl    TABLE_NAME@GOT(%ebp), %ebp
+       xorl    %edx, %edx      C byte
+define(TABLE,`(%ebp,$1)')
+',`
+dnl non-PIC
+       shll    %ecx            C size in byte pairs
+       movl    PARAM_SRC, %esi
+
+       pushl   %ebx    FRAME_pushl()
+       xorl    %eax, %eax      C total
+
+       xorl    %ebx, %ebx      C byte
+       xorl    %edx, %edx      C byte
+
+define(TABLE,`TABLE_NAME`'($1)')
+')
+
+
+       ALIGN(8)        C necessary on P55 for claimed speed
+L(top):
+       C eax   total
+       C ebx   byte
+       C ecx   counter, 2*size to 2
+       C edx   byte
+       C esi   src
+       C edi
+       C ebp   [PIC] table
+
+       addl    %ebx, %eax
+       movb    -1(%esi,%ecx,2), %bl
+
+       addl    %edx, %eax
+       movb    -2(%esi,%ecx,2), %dl
+
+       movb    TABLE(%ebx), %bl
+       decl    %ecx
+
+       movb    TABLE(%edx), %dl
+       jnz     L(top)
+
+
+ifdef(`PIC',`
+       popl    %ebp
+')
+       addl    %ebx, %eax
+       popl    %ebx
+
+       addl    %edx, %eax
+       popl    %esi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/rshift.asm b/mpn/x86/pentium/rshift.asm

new file mode 100644 (file)

index 0000000..949b0d2
--- /dev/null
+++ b/mpn/x86/pentium/rshift.asm
@@ -0,0 +1,233 @@
+dnl  Intel Pentium mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         cycles/limb
+C P5,P54:    6.0
+C P55:       5.375
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+C
+C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
+C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       pushl   %ebp
+deflit(`FRAME',16)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC,%esi
+       movl    PARAM_SIZE,%ebp
+       movl    PARAM_SHIFT,%ecx
+
+C We can use faster code for shift-by-1 under certain conditions.
+       cmp     $1,%ecx
+       jne     L(normal)
+       leal    4(%edi),%eax
+       cmpl    %esi,%eax
+       jnc     L(special)              C jump if res_ptr + 1 >= s_ptr
+       leal    (%edi,%ebp,4),%eax
+       cmpl    %eax,%esi
+       jnc     L(special)              C jump if s_ptr >= res_ptr + size
+
+L(normal):
+       movl    (%esi),%edx
+       addl    $4,%esi
+       xorl    %eax,%eax
+       shrdl(  %cl, %edx, %eax)        C compute carry limb
+       pushl   %eax                    C push carry limb onto stack
+
+       decl    %ebp
+       pushl   %ebp
+       shrl    $3,%ebp
+       jz      L(end)
+
+       movl    (%edi),%eax             C fetch destination cache line
+
+       ALIGN(4)
+L(oop):        movl    28(%edi),%eax           C fetch destination cache line
+       movl    %edx,%ebx
+
+       movl    (%esi),%eax
+       movl    4(%esi),%edx
+       shrdl(  %cl, %eax, %ebx)
+       shrdl(  %cl, %edx, %eax)
+       movl    %ebx,(%edi)
+       movl    %eax,4(%edi)
+
+       movl    8(%esi),%ebx
+       movl    12(%esi),%eax
+       shrdl(  %cl, %ebx, %edx)
+       shrdl(  %cl, %eax, %ebx)
+       movl    %edx,8(%edi)
+       movl    %ebx,12(%edi)
+
+       movl    16(%esi),%edx
+       movl    20(%esi),%ebx
+       shrdl(  %cl, %edx, %eax)
+       shrdl(  %cl, %ebx, %edx)
+       movl    %eax,16(%edi)
+       movl    %edx,20(%edi)
+
+       movl    24(%esi),%eax
+       movl    28(%esi),%edx
+       shrdl(  %cl, %eax, %ebx)
+       shrdl(  %cl, %edx, %eax)
+       movl    %ebx,24(%edi)
+       movl    %eax,28(%edi)
+
+       addl    $32,%esi
+       addl    $32,%edi
+       decl    %ebp
+       jnz     L(oop)
+
+L(end):        popl    %ebp
+       andl    $7,%ebp
+       jz      L(end2)
+L(oop2):
+       movl    (%esi),%eax
+       shrdl(  %cl,%eax,%edx)          C compute result limb
+       movl    %edx,(%edi)
+       movl    %eax,%edx
+       addl    $4,%esi
+       addl    $4,%edi
+       decl    %ebp
+       jnz     L(oop2)
+
+L(end2):
+       shrl    %cl,%edx                C compute most significant limb
+       movl    %edx,(%edi)             C store it
+
+       popl    %eax                    C pop carry limb
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(special):
+       leal    -4(%edi,%ebp,4),%edi
+       leal    -4(%esi,%ebp,4),%esi
+
+       movl    (%esi),%edx
+       subl    $4,%esi
+
+       decl    %ebp
+       pushl   %ebp
+       shrl    $3,%ebp
+
+       shrl    %edx
+       incl    %ebp
+       decl    %ebp
+       jz      L(Lend)
+
+       movl    (%edi),%eax             C fetch destination cache line
+
+       ALIGN(4)
+L(Loop):
+       movl    -28(%edi),%eax          C fetch destination cache line
+       movl    %edx,%ebx
+
+       movl    (%esi),%eax
+       movl    -4(%esi),%edx
+       rcrl    %eax
+       movl    %ebx,(%edi)
+       rcrl    %edx
+       movl    %eax,-4(%edi)
+
+       movl    -8(%esi),%ebx
+       movl    -12(%esi),%eax
+       rcrl    %ebx
+       movl    %edx,-8(%edi)
+       rcrl    %eax
+       movl    %ebx,-12(%edi)
+
+       movl    -16(%esi),%edx
+       movl    -20(%esi),%ebx
+       rcrl    %edx
+       movl    %eax,-16(%edi)
+       rcrl    %ebx
+       movl    %edx,-20(%edi)
+
+       movl    -24(%esi),%eax
+       movl    -28(%esi),%edx
+       rcrl    %eax
+       movl    %ebx,-24(%edi)
+       rcrl    %edx
+       movl    %eax,-28(%edi)
+
+       leal    -32(%esi),%esi          C use leal not to clobber carry
+       leal    -32(%edi),%edi
+       decl    %ebp
+       jnz     L(Loop)
+
+L(Lend):
+       popl    %ebp
+       sbbl    %eax,%eax               C save carry in %eax
+       andl    $7,%ebp
+       jz      L(Lend2)
+       addl    %eax,%eax               C restore carry from eax
+L(Loop2):
+       movl    %edx,%ebx
+       movl    (%esi),%edx
+       rcrl    %edx
+       movl    %ebx,(%edi)
+
+       leal    -4(%esi),%esi           C use leal not to clobber carry
+       leal    -4(%edi),%edi
+       decl    %ebp
+       jnz     L(Loop2)
+
+       jmp     L(L1)
+L(Lend2):
+       addl    %eax,%eax               C restore carry from eax
+L(L1): movl    %edx,(%edi)             C store last limb
+
+       movl    $0,%eax
+       rcrl    %eax
+
+       popl    %ebp
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium/sqr_basecase.asm b/mpn/x86/pentium/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..e4fca7c
--- /dev/null
+++ b/mpn/x86/pentium/sqr_basecase.asm
@@ -0,0 +1,517 @@
+dnl  Intel P5 mpn_sqr_basecase -- square an mpn number.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular
+C product at around 20x20 limbs.
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Calculate src,size squared, storing the result in dst,2*size.
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the size is
+C small.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %edx
+       movl    PARAM_SRC, %eax
+
+       cmpl    $2, %edx
+       movl    PARAM_DST, %ecx
+
+       je      L(two_limbs)
+
+       movl    (%eax), %eax
+       ja      L(three_or_more)
+
+C -----------------------------------------------------------------------------
+C one limb only
+       C eax   src
+       C ebx
+       C ecx   dst
+       C edx
+
+       mull    %eax
+
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+
+       ret
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(two_limbs):
+       C eax   src
+       C ebx
+       C ecx   dst
+       C edx   size
+
+       pushl   %ebp
+       pushl   %edi
+
+       pushl   %esi
+       pushl   %ebx
+
+       movl    %eax, %ebx
+       movl    (%eax), %eax
+
+       mull    %eax            C src[0]^2
+
+       movl    %eax, (%ecx)    C dst[0]
+       movl    %edx, %esi      C dst[1]
+
+       movl    4(%ebx), %eax
+
+       mull    %eax            C src[1]^2
+
+       movl    %eax, %edi      C dst[2]
+       movl    %edx, %ebp      C dst[3]
+
+       movl    (%ebx), %eax
+
+       mull    4(%ebx)         C src[0]*src[1]
+
+       addl    %eax, %esi
+       popl    %ebx
+
+       adcl    %edx, %edi
+
+       adcl    $0, %ebp
+       addl    %esi, %eax
+
+       adcl    %edi, %edx
+       movl    %eax, 4(%ecx)
+
+       adcl    $0, %ebp
+       popl    %esi
+
+       movl    %edx, 8(%ecx)
+       movl    %ebp, 12(%ecx)
+
+       popl    %edi
+       popl    %ebp
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(three_or_more):
+       C eax   src low limb
+       C ebx
+       C ecx   dst
+       C edx   size
+
+       cmpl    $4, %edx
+       pushl   %ebx
+deflit(`FRAME',4)
+
+       movl    PARAM_SRC, %ebx
+       jae     L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+       C eax   src low limb
+       C ebx   src
+       C ecx   dst
+       C edx   size
+
+       pushl   %ebp
+       pushl   %edi
+
+       mull    %eax            C src[0] ^ 2
+
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+
+       movl    4(%ebx), %eax
+       xorl    %ebp, %ebp
+
+       mull    %eax            C src[1] ^ 2
+
+       movl    %eax, 8(%ecx)
+       movl    %edx, 12(%ecx)
+
+       movl    8(%ebx), %eax
+       pushl   %esi            C risk of cache bank clash
+
+       mull    %eax            C src[2] ^ 2
+
+       movl    %eax, 16(%ecx)
+       movl    %edx, 20(%ecx)
+
+       movl    (%ebx), %eax
+
+       mull    4(%ebx)         C src[0] * src[1]
+
+       movl    %eax, %esi
+       movl    %edx, %edi
+
+       movl    (%ebx), %eax
+
+       mull    8(%ebx)         C src[0] * src[2]
+
+       addl    %eax, %edi
+       movl    %edx, %ebp
+
+       adcl    $0, %ebp
+       movl    4(%ebx), %eax
+
+       mull    8(%ebx)         C src[1] * src[2]
+
+       xorl    %ebx, %ebx
+       addl    %eax, %ebp
+
+       C eax
+       C ebx   zero, will be dst[5]
+       C ecx   dst
+       C edx   dst[4]
+       C esi   dst[1]
+       C edi   dst[2]
+       C ebp   dst[3]
+
+       adcl    $0, %edx
+       addl    %esi, %esi
+
+       adcl    %edi, %edi
+
+       adcl    %ebp, %ebp
+
+       adcl    %edx, %edx
+       movl    4(%ecx), %eax
+
+       adcl    $0, %ebx
+       addl    %esi, %eax
+
+       movl    %eax, 4(%ecx)
+       movl    8(%ecx), %eax
+
+       adcl    %edi, %eax
+       movl    12(%ecx), %esi
+
+       adcl    %ebp, %esi
+       movl    16(%ecx), %edi
+
+       movl    %eax, 8(%ecx)
+       movl    %esi, 12(%ecx)
+
+       adcl    %edx, %edi
+       popl    %esi
+
+       movl    20(%ecx), %eax
+       movl    %edi, 16(%ecx)
+
+       popl    %edi
+       popl    %ebp
+
+       adcl    %ebx, %eax      C no carry out of this
+       popl    %ebx
+
+       movl    %eax, 20(%ecx)
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(four_or_more):
+       C eax   src low limb
+       C ebx   src
+       C ecx   dst
+       C edx   size
+       C esi
+       C edi
+       C ebp
+       C
+       C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+deflit(`FRAME',4)
+
+       pushl   %edi
+FRAME_pushl()
+       pushl   %esi
+FRAME_pushl()
+
+       pushl   %ebp
+FRAME_pushl()
+       leal    (%ecx,%edx,4), %edi     C dst end of this mul1
+
+       leal    (%ebx,%edx,4), %esi     C src end
+       movl    %ebx, %ebp              C src
+
+       negl    %edx                    C -size
+       xorl    %ebx, %ebx              C clear carry limb and carry flag
+
+       leal    1(%edx), %ecx           C -(size-1)
+
+L(mul1):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter, negative
+       C edx   scratch
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp   src
+
+       adcl    $0, %ebx
+       movl    (%esi,%ecx,4), %eax
+
+       mull    (%ebp)
+
+       addl    %eax, %ebx
+
+       movl    %ebx, (%edi,%ecx,4)
+       incl    %ecx
+
+       movl    %edx, %ebx
+       jnz     L(mul1)
+
+
+       C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for
+       C n=1..size-2.
+       C
+       C The last two products, which are the end corner of the product
+       C triangle, are handled separately to save looping overhead.  These
+       C are src[size-3]*src[size-2,size-1] and src[size-2]*src[size-1].
+       C If size is 4 then it's only these that need to be done.
+       C
+       C In the outer loop %esi is a constant, and %edi just advances by 1
+       C limb each time.  The size of the operation decreases by 1 limb
+       C each time.
+
+       C eax
+       C ebx   carry (needing carry flag added)
+       C ecx
+       C edx
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp
+
+       adcl    $0, %ebx
+       movl    PARAM_SIZE, %edx
+
+       movl    %ebx, (%edi)
+       subl    $4, %edx
+
+       negl    %edx
+       jz      L(corner)
+
+
+L(outer):
+       C ebx   previous carry limb to store
+       C edx   outer loop counter (negative)
+       C esi   &src[size]
+       C edi   dst, pointing at stored carry limb of previous loop
+
+       pushl   %edx                    C new outer loop counter
+       leal    -2(%edx), %ecx
+
+       movl    %ebx, (%edi)
+       addl    $4, %edi
+
+       addl    $4, %ebp
+       xorl    %ebx, %ebx              C initial carry limb, clear carry flag
+
+L(inner):
+       C eax   scratch
+       C ebx   carry (needing carry flag added)
+       C ecx   counter, negative
+       C edx   scratch
+       C esi   &src[size]
+       C edi   dst end of this addmul
+       C ebp   &src[j]
+
+       adcl    $0, %ebx
+       movl    (%esi,%ecx,4), %eax
+
+       mull    (%ebp)
+
+       addl    %ebx, %eax
+       movl    (%edi,%ecx,4), %ebx
+
+       adcl    $0, %edx
+       addl    %eax, %ebx
+
+       movl    %ebx, (%edi,%ecx,4)
+       incl    %ecx
+
+       movl    %edx, %ebx
+       jnz     L(inner)
+
+
+       adcl    $0, %ebx
+       popl    %edx            C outer loop counter
+
+       incl    %edx
+       jnz     L(outer)
+
+
+       movl    %ebx, (%edi)
+
+L(corner):
+       C esi   &src[size]
+       C edi   &dst[2*size-4]
+
+       movl    -8(%esi), %eax
+       movl    -4(%edi), %ebx          C risk of data cache bank clash here
+
+       mull    -12(%esi)               C src[size-2]*src[size-3]
+
+       addl    %eax, %ebx
+       movl    %edx, %ecx
+
+       adcl    $0, %ecx
+       movl    -4(%esi), %eax
+
+       mull    -12(%esi)               C src[size-1]*src[size-3]
+
+       addl    %ecx, %eax
+       movl    (%edi), %ecx
+
+       adcl    $0, %edx
+       movl    %ebx, -4(%edi)
+
+       addl    %eax, %ecx
+       movl    %edx, %ebx
+
+       adcl    $0, %ebx
+       movl    -4(%esi), %eax
+
+       mull    -8(%esi)                C src[size-1]*src[size-2]
+
+       movl    %ecx, (%edi)
+       addl    %eax, %ebx
+
+       adcl    $0, %edx
+       movl    PARAM_SIZE, %eax
+
+       negl    %eax
+       movl    %ebx, 4(%edi)
+
+       addl    $1, %eax                C -(size-1) and clear carry
+       movl    %edx, 8(%edi)
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+L(lshift):
+       C eax   counter, negative
+       C ebx   next limb
+       C ecx
+       C edx
+       C esi
+       C edi   &dst[2*size-4]
+       C ebp
+
+       movl    12(%edi,%eax,8), %ebx
+
+       rcll    %ebx
+       movl    16(%edi,%eax,8), %ecx
+
+       rcll    %ecx
+       movl    %ebx, 12(%edi,%eax,8)
+
+       movl    %ecx, 16(%edi,%eax,8)
+       incl    %eax
+
+       jnz     L(lshift)
+
+
+       adcl    %eax, %eax              C high bit out
+       movl    PARAM_SRC, %esi
+
+       movl    PARAM_SIZE, %ecx        C risk of cache bank clash
+       movl    %eax, 12(%edi)          C dst most significant limb
+
+
+C -----------------------------------------------------------------------------
+C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ...,
+C src[size-1]^2.  dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+       movl    (%esi), %eax            C src[0]
+       leal    (%esi,%ecx,4), %esi     C src end
+
+       negl    %ecx
+
+       mull    %eax
+
+       movl    %eax, 16(%edi,%ecx,8)   C dst[0]
+       movl    %edx, %ebx
+
+       addl    $1, %ecx                C size-1 and clear carry
+
+L(diag):
+       C eax   scratch (low product)
+       C ebx   carry limb
+       C ecx   counter, negative
+       C edx   scratch (high product)
+       C esi   &src[size]
+       C edi   &dst[2*size-4]
+       C ebp   scratch (fetched dst limbs)
+
+       movl    (%esi,%ecx,4), %eax
+       adcl    $0, %ebx
+
+       mull    %eax
+
+       movl    16-4(%edi,%ecx,8), %ebp
+
+       addl    %ebp, %ebx
+       movl    16(%edi,%ecx,8), %ebp
+
+       adcl    %eax, %ebp
+       movl    %ebx, 16-4(%edi,%ecx,8)
+
+       movl    %ebp, 16(%edi,%ecx,8)
+       incl    %ecx
+
+       movl    %edx, %ebx
+       jnz     L(diag)
+
+
+       adcl    $0, %edx
+       movl    16-4(%edi), %eax        C dst most significant limb
+
+       addl    %eax, %edx
+       popl    %ebp
+
+       movl    %edx, 16-4(%edi)
+       popl    %esi            C risk of cache bank clash
+
+       popl    %edi
+       popl    %ebx
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/README b/mpn/x86/pentium4/README

new file mode 100644 (file)

index 0000000..8dc0479
--- /dev/null
+++ b/mpn/x86/pentium4/README
@@ -0,0 +1,113 @@
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+                   INTEL PENTIUM-4 MPN SUBROUTINES
+
+
+This directory contains mpn functions optimized for Intel Pentium-4.
+
+The mmx subdirectory has routines using MMX instructions, the sse2
+subdirectory has routines using SSE2 instructions.  All P4s have these, the
+separate directories are just so configure can omit that code if the
+assembler doesn't support it.
+
+
+STATUS
+
+                                cycles/limb
+
+       mpn_add_n/sub_n            4 normal, 6 in-place
+
+       mpn_mul_1                  4 normal, 6 in-place
+       mpn_addmul_1               6
+       mpn_submul_1               7
+
+       mpn_mul_basecase           6 cycles/crossproduct (approx)
+
+       mpn_sqr_basecase           3.5 cycles/crossproduct (approx)
+                                   or 7.0 cycles/triangleproduct (approx)
+
+       mpn_l/rshift               1.75
+
+
+
+The shifts ought to be able to go at 1.5 c/l, but not much effort has been
+applied to them yet.
+
+In-place operations, and all addmul, submul, mul_basecase and sqr_basecase
+calls, suffer from pipeline anomalies associated with write combining and
+movd reads and writes to the same or nearby locations.  The movq
+instructions do not trigger the same hardware problems.  Unfortunately,
+using movq and splitting/combining seems to require too many extra
+instructions to help.  Perhaps future chip steppings will be better.
+
+
+
+NOTES
+
+The Pentium-4 pipeline "Netburst", provides for quite a number of surprises.
+Many traditional x86 instructions run very slowly, requiring use of
+alterative instructions for acceptable performance.
+
+adcl and sbbl are quite slow at 8 cycles for reg->reg.  paddq of 32-bits
+within a 64-bit mmx register seems better, though the combination
+paddq/psrlq when propagating a carry is still a 4 cycle latency.
+
+incl and decl should be avoided, instead use add $1 and sub $1.  Apparently
+the carry flag is not separately renamed, so incl and decl depend on all
+previous flags-setting instructions.
+
+shll and shrl have a 4 cycle latency, or 8 times the latency of the fastest
+integer instructions (addl, subl, orl, andl, and some more).  shldl and
+shrdl seem to have 13 and 15 cycles latency, respectively.  Bizarre.
+
+movq mmx -> mmx does have 6 cycle latency, as noted in the documentation.
+pxor/por or similar combination at 2 cycles latency can be used instead.
+The movq however executes in the float unit, thereby saving MMX execution
+resources.  With the right juggling, data moves shouldn't be on a dependent
+chain.
+
+L1 is write-through, but the write-combining sounds like it does enough to
+not require explicit destination prefetching.
+
+xmm registers so far haven't found a use, but not much effort has been
+expended.  A configure test for whether the operating system knows
+fxsave/fxrestor will be needed if they're used.
+
+
+
+REFERENCES
+
+Intel Pentium-4 processor manuals,
+
+       http://developer.intel.com/design/pentium4/manuals
+
+"Intel Pentium 4 Processor Optimization Reference Manual", Intel, 2001,
+order number 248966.  Available on-line:
+
+       http://developer.intel.com/design/pentium4/manuals/248966.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/x86/pentium4/copyd.asm b/mpn/x86/pentium4/copyd.asm

new file mode 100644 (file)

index 0000000..491ad60
--- /dev/null
+++ b/mpn/x86/pentium4/copyd.asm
@@ -0,0 +1,61 @@
+dnl  Pentium-4 mpn_copyd -- copy limb vector, decrementing.
+dnl
+
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  The std/rep/movsl/cld is very slow for small blocks on pentium4.  Its
+dnl  startup time seems to be about 165 cycles.  It then needs 2.6 c/l.
+dnl  We therefore use an open-coded 2 c/l copying loop.
+
+dnl  Ultimately, we may want to use 64-bit movq or 128-bit movdqu in some
+dnl  nifty unrolled arrangement.  Clearly, that could reach much higher
+dnl  speeds, at least for large blocks.
+
+include(`../config.m4')
+
+
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_copyd)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %edx
+       movl    %ebx, PARAM_SIZE
+       addl    $-1, %ecx
+       js      L(end)
+
+L(loop):
+       movl    (%eax,%ecx,4), %ebx
+       movl    %ebx, (%edx,%ecx,4)
+       addl    $-1, %ecx
+
+       jns     L(loop)
+L(end):
+       movl    PARAM_SIZE, %ebx
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/copyi.asm b/mpn/x86/pentium4/copyi.asm

new file mode 100644 (file)

index 0000000..bf812c8
--- /dev/null
+++ b/mpn/x86/pentium4/copyi.asm
@@ -0,0 +1,83 @@
+dnl  Pentium-4 mpn_copyi -- copy limb vector, incrementing.
+dnl
+
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  The rep/movsl is very slow for small blocks on pentium4.  Its startup
+dnl  time seems to be about 110 cycles.  It then copies at a rate of one
+dnl  limb per cycle.  We therefore fall back to an open-coded 2 c/l copying
+dnl  loop for smaller sizes.
+
+dnl  Ultimately, we may want to use 64-bit movd or 128-bit movdqu in some
+dnl  nifty unrolled arrangement.  Clearly, that could reach much higher
+dnl  speeds, at least for large blocks.
+
+include(`../config.m4')
+
+
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_copyi)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       cmpl    $150, %ecx
+       jg      L(replmovs)
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %edx
+       movl    %ebx, PARAM_SIZE
+       testl   %ecx, %ecx
+       jz      L(end)
+
+L(loop):
+       movl    (%eax), %ebx
+       leal    4(%eax), %eax
+       addl    $-1, %ecx
+       movl    %ebx, (%edx)
+       leal    4(%edx), %edx
+
+       jnz     L(loop)
+
+L(end):
+       movl    PARAM_SIZE, %ebx
+       ret
+
+L(replmovs):
+       cld     C better safe than sorry, see mpn/x86/README
+
+       movl    %esi, %eax
+       movl    PARAM_SRC, %esi
+       movl    %edi, %edx
+       movl    PARAM_DST, %edi
+
+       rep
+       movsl
+
+       movl    %eax, %esi
+       movl    %edx, %edi
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/mmx/lshift.asm b/mpn/x86/pentium4/mmx/lshift.asm

new file mode 100644 (file)

index 0000000..5d316d5
--- /dev/null
+++ b/mpn/x86/pentium4/mmx/lshift.asm
@@ -0,0 +1,28 @@
+dnl  Intel Pentium-4 mpn_lshift -- left shift.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4 Willamette, Northwood: 1.75 cycles/limb
+C P4 Prescott:             2.0 cycles/limb
+
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86/pentium/mmx/lshift.asm')
diff --git a/mpn/x86/pentium4/mmx/popham.asm b/mpn/x86/pentium4/mmx/popham.asm

new file mode 100644 (file)

index 0000000..2e79816
--- /dev/null
+++ b/mpn/x86/pentium4/mmx/popham.asm
@@ -0,0 +1,192 @@
+dnl  Intel Pentium 4 mpn_popcount, mpn_hamdist -- population count and
+dnl  hamming distance.
+
+dnl  Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                           popcount        hamdist
+C P3 model 9  (Banias)         ?               ?
+C P3 model 13 (Dothan)         6               6
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)      8               9
+C P4 model 3  (Prescott)       8               9
+C P4 model 4  (Nocona)
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C Loading with unaligned movq's costs an extra 1 c/l and hence is avoided.
+C Two movd's and a punpckldq seems to be the same speed as an aligned movq,
+C and using them saves fiddling about with alignment testing on entry.
+C
+C For popcount there's 13 mmx instructions in the loop, so perhaps 6.5 c/l
+C might be possible, but 8 c/l relying on out-of-order execution is already
+C quite reasonable.
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist defined
+')')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC2,  8)
+defframe(PARAM_SRC,   4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE,  8)
+defframe(PARAM_SRC,   4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+       dnl  non-PIC
+       RODATA
+       ALIGN(8)
+L(rodata_AAAAAAAAAAAAAAAA):
+       .long   0xAAAAAAAA
+       .long   0xAAAAAAAA
+L(rodata_3333333333333333):
+       .long   0x33333333
+       .long   0x33333333
+L(rodata_0F0F0F0F0F0F0F0F):
+       .long   0x0F0F0F0F
+       .long   0x0F0F0F0F
+')
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %eax
+
+ifdef(`PIC',`
+       movl    $0xAAAAAAAA, %edx
+       movd    %edx, %mm7
+       punpckldq %mm7, %mm7
+
+       movl    $0x33333333, %edx
+       movd    %edx, %mm6
+       punpckldq %mm6, %mm6
+
+       movl    $0x0F0F0F0F, %edx
+       movd    %edx, %mm5
+       punpckldq %mm5, %mm5
+
+HAM(`  movl    PARAM_SRC2, %edx')
+
+',`
+       dnl non-PIC
+HAM(`  movl    PARAM_SRC2, %edx')
+       movq    L(rodata_AAAAAAAAAAAAAAAA), %mm7
+       movq    L(rodata_3333333333333333), %mm6
+       movq    L(rodata_0F0F0F0F0F0F0F0F), %mm5
+')
+
+       pxor    %mm4, %mm4              C zero
+       pxor    %mm0, %mm0              C total
+
+       subl    $1, %ecx
+       ja      L(top)
+
+L(last):
+       movd    (%eax,%ecx,4), %mm1             C src high limb
+HAM(`  movd    (%edx,%ecx,4), %mm2
+       pxor    %mm2, %mm1
+')
+       jmp     L(loaded)
+
+
+L(top):
+       C eax   src
+       C ebx
+       C ecx   counter, size-1 to 2 or 1, inclusive
+       C edx   [hamdist] src2
+       C
+       C mm0   total (low dword)
+       C mm1   (scratch)
+       C mm2   (scratch)
+       C mm3
+       C mm4   0x0000000000000000
+       C mm5   0x0F0F0F0F0F0F0F0F
+       C mm6   0x3333333333333333
+       C mm7   0xAAAAAAAAAAAAAAAA
+
+       movd    (%eax), %mm1
+       movd    4(%eax), %mm2
+       punpckldq %mm2, %mm1
+       addl    $8, %eax
+
+HAM(`  movd    (%edx), %mm2
+       movd    4(%edx), %mm3
+       punpckldq %mm3, %mm2
+       pxor    %mm2, %mm1
+       addl    $8, %edx
+')
+
+L(loaded):
+       movq    %mm7, %mm2
+       pand    %mm1, %mm2
+       psrlq   $1, %mm2
+       psubd   %mm2, %mm1      C bit pairs
+
+       movq    %mm6, %mm2
+       pand    %mm1, %mm2
+       psrlq   $2, %mm1
+       pand    %mm6, %mm1
+       paddd   %mm2, %mm1      C nibbles
+
+       movq    %mm5, %mm2
+       pand    %mm1, %mm2
+       psrlq   $4, %mm1
+       pand    %mm5, %mm1
+       paddd   %mm2, %mm1      C bytes
+
+       psadbw( %mm4, %mm1)
+       paddd   %mm1, %mm0      C to total
+
+       subl    $2, %ecx
+       jg      L(top)
+
+       C ecx is 0 or -1 representing respectively 1 or 0 further limbs
+       jz      L(last)
+
+
+       movd    %mm0, %eax
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/mmx/rshift.asm b/mpn/x86/pentium4/mmx/rshift.asm

new file mode 100644 (file)

index 0000000..a7dec54
--- /dev/null
+++ b/mpn/x86/pentium4/mmx/rshift.asm
@@ -0,0 +1,28 @@
+dnl  Intel Pentium-4 mpn_rshift -- right shift.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4 Willamette, Northwood: 1.75 cycles/limb
+C P4 Prescott:             2.0 cycles/limb
+
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86/pentium/mmx/rshift.asm')
diff --git a/mpn/x86/pentium4/sse2/add_n.asm b/mpn/x86/pentium4/sse2/add_n.asm

new file mode 100644 (file)

index 0000000..04c0c68
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/add_n.asm
@@ -0,0 +1,98 @@
+dnl  Intel Pentium-4 mpn_add_n -- mpn addition.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C                          6.0 cycles/limb if dst==src1 or dst==src2
+C P4 Prescott:             >= 5 cycles/limb
+
+C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                      mp_size_t size);
+C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                       mp_size_t size, mp_limb_t carry);
+C
+C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l
+C for a basic adc loop.
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX,`PARAM_SRC1')
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_add_nc)
+deflit(`FRAME',0)
+
+       movd    PARAM_CARRY, %mm0
+       jmp     L(start_nc)
+
+EPILOGUE()
+
+       ALIGN(8)
+PROLOGUE(mpn_add_n)
+deflit(`FRAME',0)
+
+       pxor    %mm0, %mm0
+
+L(start_nc):
+       movl    PARAM_SRC1, %eax
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SRC2, %ebx
+       movl    PARAM_DST, %edx
+       movl    PARAM_SIZE, %ecx
+
+       leal    (%eax,%ecx,4), %eax     C src1 end
+       leal    (%ebx,%ecx,4), %ebx     C src2 end
+       leal    (%edx,%ecx,4), %edx     C dst end
+       negl    %ecx                    C -size
+
+L(top):
+       C eax   src1 end
+       C ebx   src2 end
+       C ecx   counter, limbs, negative
+       C edx   dst end
+       C mm0   carry bit
+
+       movd    (%eax,%ecx,4), %mm1
+       movd    (%ebx,%ecx,4), %mm2
+       paddq   %mm2, %mm1
+
+       paddq   %mm1, %mm0
+       movd    %mm0, (%edx,%ecx,4)
+
+       psrlq   $32, %mm0
+
+       addl    $1, %ecx
+       jnz     L(top)
+
+
+       movd    %mm0, %eax
+       movl    SAVE_EBX, %ebx
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/addlsh1_n.asm b/mpn/x86/pentium4/sse2/addlsh1_n.asm

new file mode 100644 (file)

index 0000000..46b0903
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/addlsh1_n.asm
@@ -0,0 +1,96 @@
+dnl  Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y.
+
+dnl  Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb (approx)
+C          dst!=src1,2  dst==src1  dst==src2
+C P4 m2:      4.5         ?7.25      ?6.75
+C P4 m3:      5.3         ?         ?
+
+C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C
+C The slightly strange combination of indexing and pointer incrementing
+C that's used seems to work best.  Not sure why, but %ecx,4 with src1 and/or
+C src2 is a slowdown.
+C
+C The dependent chain is simply the paddq of x+2*y to the previous carry,
+C then psrlq to get the new carry.  That makes 4 c/l the target speed, which
+C is almost achieved for separate src/dst but when src==dst the write
+C combining anomalies slow it down.
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX,`PARAM_SRC1')
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_addlsh1_n)
+deflit(`FRAME',0)
+
+       movl    PARAM_SRC1, %eax
+       movl    %ebx, SAVE_EBX
+
+       movl    PARAM_SRC2, %ebx
+       pxor    %mm0, %mm0              C initial carry
+
+       movl    PARAM_DST, %edx
+
+       movl    PARAM_SIZE, %ecx
+
+       leal    (%edx,%ecx,4), %edx     C dst end
+       negl    %ecx                    C -size
+
+L(top):
+       C eax   src1 end
+       C ebx   src2 end
+       C ecx   counter, limbs, negative
+       C edx   dst end
+       C mm0   carry
+
+       movd    (%eax), %mm1
+       movd    (%ebx), %mm2
+       psrlq   $32, %mm0
+       leal    4(%eax), %eax
+       leal    4(%ebx), %ebx
+
+       paddq   %mm2, %mm1
+       paddq   %mm2, %mm1
+
+       paddq   %mm1, %mm0
+
+       movd    %mm0, (%edx,%ecx,4)
+       addl    $1, %ecx
+       jnz     L(top)
+
+
+       psrlq   $32, %mm0
+       movl    SAVE_EBX, %ebx
+       movd    %mm0, %eax
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/addmul_1.asm b/mpn/x86/pentium4/sse2/addmul_1.asm

new file mode 100644 (file)

index 0000000..3a8d0bb
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/addmul_1.asm
@@ -0,0 +1,183 @@
+dnl  mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C TODO:
+C  * Tweak eax/edx offsets in loop as to save some lea's
+C  * Perhaps software pipeline small-case code
+
+C                           cycles/limb
+C P6 model 0-8,10-12)           -
+C P6 model 9   (Banias)         ?
+C P6 model 13  (Dothan)         5.24
+C P4 model 0-1 (Willamette):    5
+C P4 model 2   (Northwood):     5
+C P4 model 3-4 (Prescott):      5
+
+C INPUT PARAMETERS
+C rp           sp + 4
+C up           sp + 8
+C n            sp + 12
+C v0           sp + 16
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_addmul_1c)
+       mov     4(%esp), %edx
+       mov     8(%esp), %eax
+       mov     12(%esp), %ecx
+       movd    16(%esp), %mm7
+       movd    20(%esp), %mm6
+       jmp     L(ent)
+EPILOGUE()
+       ALIGN(16)
+PROLOGUE(mpn_addmul_1)
+       mov     4(%esp), %edx
+       mov     8(%esp), %eax
+       mov     12(%esp), %ecx
+       movd    16(%esp), %mm7
+       pxor    %mm6, %mm6
+L(ent):        cmp     $4, %ecx
+       jnc     L(big)
+
+L(lp0):        movd    (%eax), %mm0
+       lea     4(%eax), %eax
+       movd    (%edx), %mm4
+       lea     4(%edx), %edx
+       pmuludq %mm7, %mm0
+       paddq   %mm0, %mm4
+       paddq   %mm4, %mm6
+       movd    %mm6, -4(%edx)
+       psrlq   $32, %mm6
+       dec     %ecx
+       jnz     L(lp0)
+       movd    %mm6, %eax
+       emms
+       ret
+
+L(big):        and     $3, %ecx
+       je      L(0)
+       cmp     $2, %ecx
+       jc      L(1)
+       je      L(2)
+       jmp     L(3)                    C FIXME: one case should fall through
+
+L(0):  movd    (%eax), %mm3
+       sub     12(%esp), %ecx          C loop count
+       lea     -16(%eax), %eax
+       lea     -12(%edx), %edx
+       pmuludq %mm7, %mm3
+       movd    20(%eax), %mm0
+       movd    12(%edx), %mm5
+       pmuludq %mm7, %mm0
+       movd    24(%eax), %mm1
+       paddq   %mm3, %mm5
+       movd    16(%edx), %mm4
+       jmp     L(00)
+
+L(1):  movd    (%eax), %mm2
+       sub     12(%esp), %ecx
+       lea     -12(%eax), %eax
+       lea     -8(%edx), %edx
+       movd    8(%edx), %mm4
+       pmuludq %mm7, %mm2
+       movd    16(%eax), %mm3
+       pmuludq %mm7, %mm3
+       movd    20(%eax), %mm0
+       paddq   %mm2, %mm4
+       movd    12(%edx), %mm5
+       jmp     L(01)
+
+L(2):  movd    (%eax), %mm1
+       sub     12(%esp), %ecx
+       lea     -8(%eax), %eax
+       lea     -4(%edx), %edx
+       pmuludq %mm7, %mm1
+       movd    12(%eax), %mm2
+       movd    4(%edx), %mm5
+       pmuludq %mm7, %mm2
+       movd    16(%eax), %mm3
+       paddq   %mm1, %mm5
+       movd    8(%edx), %mm4
+       jmp     L(10)
+
+L(3):  movd    (%eax), %mm0
+       sub     12(%esp), %ecx
+       lea     -4(%eax), %eax
+       pmuludq %mm7, %mm0
+       movd    8(%eax), %mm1
+       movd    (%edx), %mm4
+       pmuludq %mm7, %mm1
+       movd    12(%eax), %mm2
+       paddq   %mm0, %mm4
+       movd    4(%edx), %mm5
+
+       ALIGN(16)
+L(top):        pmuludq %mm7, %mm2
+       paddq   %mm4, %mm6
+       movd    16(%eax), %mm3
+       paddq   %mm1, %mm5
+       movd    8(%edx), %mm4
+       movd    %mm6, 0(%edx)
+       psrlq   $32, %mm6
+L(10): pmuludq %mm7, %mm3
+       paddq   %mm5, %mm6
+       movd    20(%eax), %mm0
+       paddq   %mm2, %mm4
+       movd    12(%edx), %mm5
+       movd    %mm6, 4(%edx)
+       psrlq   $32, %mm6
+L(01): pmuludq %mm7, %mm0
+       paddq   %mm4, %mm6
+       movd    24(%eax), %mm1
+       paddq   %mm3, %mm5
+       movd    16(%edx), %mm4
+       movd    %mm6, 8(%edx)
+       psrlq   $32, %mm6
+L(00): pmuludq %mm7, %mm1
+       paddq   %mm5, %mm6
+       movd    28(%eax), %mm2
+       paddq   %mm0, %mm4
+       movd    20(%edx), %mm5
+       movd    %mm6, 12(%edx)
+       psrlq   $32, %mm6
+       lea     16(%eax), %eax
+       lea     16(%edx), %edx
+       add     $4, %ecx
+       jnz     L(top)
+
+L(end):        pmuludq %mm7, %mm2
+       paddq   %mm4, %mm6
+       paddq   %mm1, %mm5
+       movd    8(%edx), %mm4
+       movd    %mm6, 0(%edx)
+       psrlq   $32, %mm6
+       paddq   %mm5, %mm6
+       paddq   %mm2, %mm4
+       movd    %mm6, 4(%edx)
+       psrlq   $32, %mm6
+       paddq   %mm4, %mm6
+       movd    %mm6, 8(%edx)
+       psrlq   $32, %mm6
+       movd    %mm6, %eax
+       emms
+       ret
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/dive_1.asm b/mpn/x86/pentium4/sse2/dive_1.asm

new file mode 100644 (file)

index 0000000..5e0e38e
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/dive_1.asm
@@ -0,0 +1,204 @@
+dnl  Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4: 19.0 cycles/limb
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t divisor);
+C
+C Pairs of movd's are used to avoid unaligned loads.  Despite the loads not
+C being on the dependent chain and there being plenty of cycles available,
+C using an unaligned movq on every second iteration measured about 23 c/l.
+C
+C Using divl for size==1 seems a touch quicker than mul-by-inverse.  The mul
+C will be about 9+2*4+2*2+10*4+19+12 = 92 cycles latency, though some of
+C that might be hidden by out-of-order execution, whereas divl is around 60.
+C At size==2 an extra 19 for the mul versus 60 for the divl will see the mul
+C faster.
+
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %edx
+
+       movl    PARAM_SRC, %eax
+
+       movl    PARAM_DIVISOR, %ecx
+       subl    $1, %edx
+       jnz     L(two_or_more)
+
+       movl    (%eax), %eax
+       xorl    %edx, %edx
+
+       divl    %ecx
+       movl    PARAM_DST, %ecx
+
+       movl    %eax, (%ecx)
+       ret
+
+
+L(two_or_more):
+       C eax   src
+       C ebx
+       C ecx   divisor
+       C edx   size-1
+
+       movl    %ecx, %eax
+       bsfl    %ecx, %ecx              C trailing twos
+
+       shrl    %cl, %eax               C d = divisor without twos
+       movd    %eax, %mm6
+       movd    %ecx, %mm7              C shift
+
+       shrl    %eax                    C d/2
+
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %ecx)
+       movzbl  (%eax,%ecx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       C
+
+       movd    %eax, %mm5              C inv
+
+       movd    %eax, %mm0              C inv
+
+       pmuludq %mm5, %mm5              C inv*inv
+
+       C
+
+       pmuludq %mm6, %mm5              C inv*inv*d
+       paddd   %mm0, %mm0              C 2*inv
+
+       C
+
+       psubd   %mm5, %mm0              C inv = 2*inv - inv*inv*d
+       pxor    %mm5, %mm5
+
+       paddd   %mm0, %mm5
+       pmuludq %mm0, %mm0              C inv*inv
+
+       pcmpeqd %mm4, %mm4
+       psrlq   $32, %mm4               C 0x00000000FFFFFFFF
+
+       C
+
+       pmuludq %mm6, %mm0              C inv*inv*d
+       paddd   %mm5, %mm5              C 2*inv
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %ecx
+       pxor    %mm1, %mm1              C initial carry limb
+
+       C
+
+       psubd   %mm0, %mm5              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       movq    %mm6, %mm0
+       pmuludq %mm5, %mm0
+       movd    %mm0, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       pxor    %mm0, %mm0              C initial carry bit
+
+
+C The dependent chain here is as follows.
+C
+C                                      latency
+C      psubq    s = (src-cbit) - climb    2
+C      pmuludq  q = s*inverse             8
+C      pmuludq  prod = q*divisor          8
+C      psrlq    climb = high(prod)        2
+C                                        --
+C                                        20
+C
+C Yet the loop measures 19.0 c/l, so obviously there's something gained
+C there over a straight reading of the chip documentation.
+
+L(top):
+       C eax   src, incrementing
+       C ebx
+       C ecx   dst, incrementing
+       C edx   counter, size-1 iterations
+       C
+       C mm0   carry bit
+       C mm1   carry limb
+       C mm4   0x00000000FFFFFFFF
+       C mm5   inverse
+       C mm6   divisor
+       C mm7   shift
+
+       movd    (%eax), %mm2
+       movd    4(%eax), %mm3
+       addl    $4, %eax
+       punpckldq %mm3, %mm2
+
+       psrlq   %mm7, %mm2
+       pand    %mm4, %mm2              C src
+       psubq   %mm0, %mm2              C src - cbit
+
+       psubq   %mm1, %mm2              C src - cbit - climb
+       movq    %mm2, %mm0
+       psrlq   $63, %mm0               C new cbit
+
+       pmuludq %mm5, %mm2              C s*inverse
+       movd    %mm2, (%ecx)            C q
+       addl    $4, %ecx
+
+       movq    %mm6, %mm1
+       pmuludq %mm2, %mm1              C q*divisor
+       psrlq   $32, %mm1               C new climb
+
+       subl    $1, %edx
+       jnz     L(top)
+
+
+L(done):
+       movd    (%eax), %mm2
+       psrlq   %mm7, %mm2              C src
+       psubq   %mm0, %mm2              C src - cbit
+
+       psubq   %mm1, %mm2              C src - cbit - climb
+
+       pmuludq %mm5, %mm2              C s*inverse
+       movd    %mm2, (%ecx)            C q
+
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/divrem_1.asm b/mpn/x86/pentium4/sse2/divrem_1.asm

new file mode 100644 (file)

index 0000000..7f973db
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/divrem_1.asm
@@ -0,0 +1,635 @@
+dnl  Intel Pentium-4 mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4: 32 cycles/limb integer part, 30 cycles/limb fraction part.
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                         mp_srcptr src, mp_size_t size,
+C                         mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C                          mp_srcptr src, mp_size_t size,
+C                          mp_limb_t divisor, mp_limb_t carry);
+C mp_limb_t mpn_preinv_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C                                mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t inverse,
+C                                unsigned shift);
+C
+C Algorithm:
+C
+C The method and nomenclature follow part 8 of "Division by Invariant
+C Integers using Multiplication" by Granlund and Montgomery, reference in
+C gmp.texi.
+C
+C "m" is written for what is m' in the paper, and "d" for d_norm, which
+C won't cause any confusion since it's only the normalized divisor that's of
+C any use in the code.  "b" is written for 2^N, the size of a limb, N being
+C 32 here.
+C
+C The step "sdword dr = n - 2^N*d + (2^N-1-q1) * d" is instead done as
+C "n-d - q1*d".  This rearrangement gives the same two-limb answer but lets
+C us have just a psubq on the dependent chain.
+C
+C For reference, the way the k7 code uses "n-(q1+1)*d" would not suit here,
+C detecting an overflow of q1+1 when q1=0xFFFFFFFF would cost too much.
+C
+C Notes:
+C
+C mpn_divrem_1 and mpn_preinv_divrem_1 avoid one division if the src high
+C limb is less than the divisor.  mpn_divrem_1c doesn't check for a zero
+C carry, since in normal circumstances that will be a very rare event.
+C
+C The test for skipping a division is branch free (once size>=1 is tested).
+C The store to the destination high limb is 0 when a divide is skipped, or
+C if it's not skipped then a copy of the src high limb is stored.  The
+C latter is in case src==dst.
+C
+C There's a small bias towards expecting xsize==0, by having code for
+C xsize==0 in a straight line and xsize!=0 under forward jumps.
+C
+C Enhancements:
+C
+C The loop measures 32 cycles, but the dependent chain would suggest it
+C could be done with 30.  Not sure where to start looking for the extras.
+C
+C Alternatives:
+C
+C If the divisor is normalized (high bit set) then a division step can
+C always be skipped, since the high destination limb is always 0 or 1 in
+C that case.  It doesn't seem worth checking for this though, since it
+C probably occurs infrequently.
+
+
+dnl  MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl  inverse method is used, rather than plain "divl"s.  Minimum value 1.
+dnl
+dnl  The inverse takes about 80-90 cycles to calculate, but after that the
+dnl  multiply is 32 c/l versus division at about 58 c/l.
+dnl
+dnl  At 4 limbs the div is a touch faster than the mul (and of course
+dnl  simpler), so start the mul from 5 limbs.
+
+deflit(MUL_THRESHOLD, 5)
+
+
+defframe(PARAM_PREINV_SHIFT,   28)  dnl mpn_preinv_divrem_1
+defframe(PARAM_PREINV_INVERSE, 24)  dnl mpn_preinv_divrem_1
+defframe(PARAM_CARRY,  24)          dnl mpn_divrem_1c
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_SRC,    12)
+defframe(PARAM_XSIZE,  8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(SAVE_ESI,`PARAM_SIZE')
+define(SAVE_EBP,`PARAM_SRC')
+define(SAVE_EDI,`PARAM_DIVISOR')
+define(SAVE_EBX,`PARAM_DST')
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_preinv_divrem_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       xorl    %edx, %edx              C carry if can't skip a div
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    -4(%esi,%ecx,4), %eax   C src high limb
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+
+       movd    PARAM_PREINV_INVERSE, %mm4
+
+       movd    PARAM_PREINV_SHIFT, %mm7  C l
+       cmpl    %ebp, %eax              C high cmp divisor
+
+       cmovc(  %eax, %edx)             C high is carry if high<divisor
+       movd    %edx, %mm0              C carry
+
+       movd    %edx, %mm1              C carry
+       movl    $0, %edx
+
+       movd    %ebp, %mm5              C d
+       cmovnc( %eax, %edx)             C 0 if skip div, src high if not
+                                       C (the latter in case src==dst)
+       leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
+
+       movl    %edx, (%edi,%ecx,4)     C dst high limb
+       sbbl    $0, %ecx                C skip one division if high<divisor
+       movl    $32, %eax
+
+       subl    PARAM_PREINV_SHIFT, %eax
+       psllq   %mm7, %mm5              C d normalized
+       leal    (%edi,%ecx,4), %edi     C &dst[xsize+size-1]
+       leal    -4(%esi,%ecx,4), %esi   C &src[size-1]
+
+       movd    %eax, %mm6              C 32-l
+       jmp     L(start_preinv)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+
+       movl    PARAM_CARRY, %edx
+
+       movl    PARAM_SIZE, %ecx
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+
+       leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       xorl    %edx, %edx              C initial carry (if can't skip a div)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_DIVISOR, %ebp
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_XSIZE, %ebx
+       leal    -4(%edi,%ebx,4), %edi   C &dst[xsize-1]
+
+       orl     %ecx, %ecx              C size
+       jz      L(no_skip_div)          C if size==0
+       movl    -4(%esi,%ecx,4), %eax   C src high limb
+
+       cmpl    %ebp, %eax              C high cmp divisor
+
+       cmovnc( %eax, %edx)             C 0 if skip div, src high if not
+       movl    %edx, (%edi,%ecx,4)     C dst high limb
+
+       movl    $0, %edx
+       cmovc(  %eax, %edx)             C high is carry if high<divisor
+
+       sbbl    $0, %ecx                C size-1 if high<divisor
+L(no_skip_div):
+
+
+L(start_1c):
+       C eax
+       C ebx   xsize
+       C ecx   size
+       C edx   carry
+       C esi   src
+       C edi   &dst[xsize-1]
+       C ebp   divisor
+
+       leal    (%ebx,%ecx), %eax       C size+xsize
+       leal    -4(%esi,%ecx,4), %esi   C &src[size-1]
+       leal    (%edi,%ecx,4), %edi     C &dst[size+xsize-1]
+
+       cmpl    $MUL_THRESHOLD, %eax
+       jae     L(mul_by_inverse)
+
+
+       orl     %ecx, %ecx
+       jz      L(divide_no_integer)    C if size==0
+
+L(divide_integer):
+       C eax   scratch (quotient)
+       C ebx   xsize
+       C ecx   counter
+       C edx   carry
+       C esi   src, decrementing
+       C edi   dst, decrementing
+       C ebp   divisor
+
+       movl    (%esi), %eax
+       subl    $4, %esi
+
+       divl    %ebp
+
+       movl    %eax, (%edi)
+       subl    $4, %edi
+
+       subl    $1, %ecx
+       jnz     L(divide_integer)
+
+
+L(divide_no_integer):
+       orl     %ebx, %ebx
+       jnz     L(divide_fraction)      C if xsize!=0
+
+L(divide_done):
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EDI, %edi
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_EBP, %ebp
+       movl    %edx, %eax
+       ret
+
+
+L(divide_fraction):
+       C eax   scratch (quotient)
+       C ebx   counter
+       C ecx
+       C edx   carry
+       C esi
+       C edi   dst, decrementing
+       C ebp   divisor
+
+       movl    $0, %eax
+
+       divl    %ebp
+
+       movl    %eax, (%edi)
+       subl    $4, %edi
+
+       subl    $1, %ebx
+       jnz     L(divide_fraction)
+
+       jmp     L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+       C eax
+       C ebx   xsize
+       C ecx   size
+       C edx   carry
+       C esi   &src[size-1]
+       C edi   &dst[size+xsize-1]
+       C ebp   divisor
+
+       bsrl    %ebp, %eax              C 31-l
+       movd    %edx, %mm0              C carry
+       movd    %edx, %mm1              C carry
+       movl    %ecx, %edx              C size
+       movl    $31, %ecx
+
+       C
+
+       xorl    %eax, %ecx              C l = leading zeros on d
+       addl    $1, %eax
+
+       shll    %cl, %ebp               C d normalized
+       movd    %ecx, %mm7              C l
+       movl    %edx, %ecx              C size
+
+       movd    %eax, %mm6              C 32-l
+       movl    $-1, %edx
+       movl    $-1, %eax
+
+       C
+
+       subl    %ebp, %edx              C (b-d)-1 so  edx:eax = b*(b-d)-1
+
+       divl    %ebp                    C floor (b*(b-d)-1 / d)
+       movd    %ebp, %mm5              C d
+
+       C
+
+       movd    %eax, %mm4              C m
+
+
+L(start_preinv):
+       C eax   inverse
+       C ebx   xsize
+       C ecx   size
+       C edx
+       C esi   &src[size-1]
+       C edi   &dst[size+xsize-1]
+       C ebp
+       C
+       C mm0   carry
+       C mm1   carry
+       C mm2
+       C mm4   m
+       C mm5   d
+       C mm6   31-l
+       C mm7   l
+
+       psllq   %mm7, %mm0              C n2 = carry << l, for size==0
+
+       subl    $1, %ecx
+       jb      L(integer_none)
+
+       movd    (%esi), %mm0            C src high limb
+       punpckldq %mm1, %mm0
+       psrlq   %mm6, %mm0              C n2 = high (carry:srchigh << l)
+       jz      L(integer_last)
+
+
+C The dependent chain here consists of
+C
+C      2   paddd    n1+n2
+C      8   pmuludq  m*(n1+n2)
+C      2   paddq    n2:nadj + m*(n1+n2)
+C      2   psrlq    q1
+C      8   pmuludq  d*q1
+C      2   psubq    (n-d)-q1*d
+C      2   psrlq    high n-(q1+1)*d mask
+C      2   pand     d masked
+C      2   paddd    n2+d addback
+C      --
+C      30
+C
+C But it seems to run at 32 cycles, so presumably there's something else
+C going on.
+
+       ALIGN(16)
+L(integer_top):
+       C eax
+       C ebx
+       C ecx   counter, size-1 to 0
+       C edx
+       C esi   src, decrementing
+       C edi   dst, decrementing
+       C
+       C mm0   n2
+       C mm4   m
+       C mm5   d
+       C mm6   32-l
+       C mm7   l
+
+       ASSERT(b,`C n2<d
+        movd   %mm0, %eax
+        movd   %mm5, %edx
+        cmpl   %edx, %eax')
+
+       movd    -4(%esi), %mm1          C next src limbs
+       movd    (%esi), %mm2
+       leal    -4(%esi), %esi
+
+       punpckldq %mm2, %mm1
+       psrlq   %mm6, %mm1              C n10
+
+       movq    %mm1, %mm2              C n10
+       movq    %mm1, %mm3              C n10
+       psrad   $31, %mm1               C -n1
+       pand    %mm5, %mm1              C -n1 & d
+       paddd   %mm2, %mm1              C nadj = n10+(-n1&d), ignore overflow
+
+       psrld   $31, %mm2               C n1
+       paddd   %mm0, %mm2              C n2+n1
+       punpckldq %mm0, %mm1            C n2:nadj
+
+       pmuludq %mm4, %mm2              C m*(n2+n1)
+
+       C
+
+       paddq   %mm2, %mm1              C n2:nadj + m*(n2+n1)
+       pxor    %mm2, %mm2              C break dependency, saves 4 cycles
+       pcmpeqd %mm2, %mm2              C FF...FF
+       psrlq   $63, %mm2               C 1
+
+       psrlq   $32, %mm1               C q1 = high(n2:nadj + m*(n2+n1))
+
+       paddd   %mm1, %mm2              C q1+1
+       pmuludq %mm5, %mm1              C q1*d
+
+       punpckldq %mm0, %mm3            C n = n2:n10
+       pxor    %mm0, %mm0
+
+       psubq   %mm5, %mm3              C n - d
+
+       C
+
+       psubq   %mm1, %mm3              C n - (q1+1)*d
+
+       por     %mm3, %mm0              C copy remainder -> new n2
+       psrlq   $32, %mm3               C high n - (q1+1)*d, 0 or -1
+
+       ASSERT(be,`C 0 or -1
+        movd   %mm3, %eax
+        addl   $1, %eax
+        cmpl   $1, %eax')
+
+       paddd   %mm3, %mm2              C q
+       pand    %mm5, %mm3              C mask & d
+
+       paddd   %mm3, %mm0              C addback if necessary
+       movd    %mm2, (%edi)
+       leal    -4(%edi), %edi
+
+       subl    $1, %ecx
+       ja      L(integer_top)
+
+
+L(integer_last):
+       C eax
+       C ebx   xsize
+       C ecx
+       C edx
+       C esi   &src[0]
+       C edi   &dst[xsize]
+       C
+       C mm0   n2
+       C mm4   m
+       C mm5   d
+       C mm6
+       C mm7   l
+
+       ASSERT(b,`C n2<d
+        movd   %mm0, %eax
+        movd   %mm5, %edx
+        cmpl   %edx, %eax')
+
+       movd    (%esi), %mm1            C src[0]
+       psllq   %mm7, %mm1              C n10
+
+       movq    %mm1, %mm2              C n10
+       movq    %mm1, %mm3              C n10
+       psrad   $31, %mm1               C -n1
+       pand    %mm5, %mm1              C -n1 & d
+       paddd   %mm2, %mm1              C nadj = n10+(-n1&d), ignore overflow
+
+       psrld   $31, %mm2               C n1
+       paddd   %mm0, %mm2              C n2+n1
+       punpckldq %mm0, %mm1            C n2:nadj
+
+       pmuludq %mm4, %mm2              C m*(n2+n1)
+
+       C
+
+       paddq   %mm2, %mm1              C n2:nadj + m*(n2+n1)
+       pcmpeqd %mm2, %mm2              C FF...FF
+       psrlq   $63, %mm2               C 1
+
+       psrlq   $32, %mm1               C q1 = high(n2:nadj + m*(n2+n1))
+       paddd   %mm1, %mm2              C q1
+
+       pmuludq %mm5, %mm1              C q1*d
+       punpckldq %mm0, %mm3            C n
+       psubq   %mm5, %mm3              C n - d
+       pxor    %mm0, %mm0
+
+       C
+
+       psubq   %mm1, %mm3              C n - (q1+1)*d
+
+       por     %mm3, %mm0              C remainder -> n2
+       psrlq   $32, %mm3               C high n - (q1+1)*d, 0 or -1
+
+       ASSERT(be,`C 0 or -1
+        movd   %mm3, %eax
+        addl   $1, %eax
+        cmpl   $1, %eax')
+
+       paddd   %mm3, %mm2              C q
+       pand    %mm5, %mm3              C mask & d
+
+       paddd   %mm3, %mm0              C addback if necessary
+       movd    %mm2, (%edi)
+       leal    -4(%edi), %edi
+
+
+L(integer_none):
+       C eax
+       C ebx   xsize
+
+       orl     %ebx, %ebx
+       jnz     L(fraction_some)        C if xsize!=0
+
+
+L(fraction_done):
+       movl    SAVE_EBP, %ebp
+       psrld   %mm7, %mm0              C remainder
+
+       movl    SAVE_EDI, %edi
+       movd    %mm0, %eax
+
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBX, %ebx
+       emms
+       ret
+
+
+
+C -----------------------------------------------------------------------------
+C
+
+L(fraction_some):
+       C eax
+       C ebx   xsize
+       C ecx
+       C edx
+       C esi
+       C edi   &dst[xsize-1]
+       C ebp
+
+
+L(fraction_top):
+       C eax
+       C ebx   counter, xsize iterations
+       C ecx
+       C edx
+       C esi   src, decrementing
+       C edi   dst, decrementing
+       C
+       C mm0   n2
+       C mm4   m
+       C mm5   d
+       C mm6   32-l
+       C mm7   l
+
+       ASSERT(b,`C n2<d
+        movd   %mm0, %eax
+        movd   %mm5, %edx
+        cmpl   %edx, %eax')
+
+       movq    %mm0, %mm1              C n2
+       pmuludq %mm4, %mm0              C m*n2
+
+       pcmpeqd %mm2, %mm2
+       psrlq   $63, %mm2
+
+       C
+
+       psrlq   $32, %mm0               C high(m*n2)
+
+       paddd   %mm1, %mm0              C q1 = high(n2:0 + m*n2)
+
+       paddd   %mm0, %mm2              C q1+1
+       pmuludq %mm5, %mm0              C q1*d
+
+       psllq   $32, %mm1               C n = n2:0
+       psubq   %mm5, %mm1              C n - d
+
+       C
+
+       psubq   %mm0, %mm1              C r = n - (q1+1)*d
+       pxor    %mm0, %mm0
+
+       por     %mm1, %mm0              C r -> n2
+       psrlq   $32, %mm1               C high n - (q1+1)*d, 0 or -1
+
+       ASSERT(be,`C 0 or -1
+        movd   %mm1, %eax
+        addl   $1, %eax
+        cmpl   $1, %eax')
+
+       paddd   %mm1, %mm2              C q
+       pand    %mm5, %mm1              C mask & d
+
+       paddd   %mm1, %mm0              C addback if necessary
+       movd    %mm2, (%edi)
+       leal    -4(%edi), %edi
+
+       subl    $1, %ebx
+       jne     L(fraction_top)
+
+
+       jmp     L(fraction_done)
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/gmp-mparam.h b/mpn/x86/pentium4/sse2/gmp-mparam.h

new file mode 100644 (file)

index 0000000..7091981
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/gmp-mparam.h
@@ -0,0 +1,171 @@
+/* Intel Pentium-4 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
+2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+
+#define MOD_1_NORM_THRESHOLD                24
+#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         26
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     34
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           22
+
+#define MUL_TOOM22_THRESHOLD                30
+#define MUL_TOOM33_THRESHOLD               120
+#define MUL_TOOM44_THRESHOLD               296
+#define MUL_TOOM6H_THRESHOLD               414
+#define MUL_TOOM8H_THRESHOLD               620
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     198
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     216
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     194
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     209
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 48
+#define SQR_TOOM3_THRESHOLD                170
+#define SQR_TOOM4_THRESHOLD                454
+#define SQR_TOOM6_THRESHOLD                454
+#define SQR_TOOM8_THRESHOLD                915
+
+#define MULMOD_BNM1_THRESHOLD               19
+#define SQRMOD_BNM1_THRESHOLD               24
+
+#define MUL_FFT_MODF_THRESHOLD             904  /* k = 6 */
+#define MUL_FFT_TABLE3                                      \
+  { {    904, 6}, {     15, 5}, {     32, 6}, {     17, 5}, \
+    {     35, 6}, {     19, 5}, {     39, 6}, {     28, 7}, \
+    {     15, 6}, {     33, 7}, {     17, 6}, {     35, 7}, \
+    {     19, 6}, {     41, 7}, {     21, 6}, {     43, 7}, \
+    {     23, 6}, {     47, 7}, {     27, 6}, {     55, 8}, \
+    {     15, 7}, {     31, 6}, {     63, 7}, {     35, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95,11}, {     63,10}, {    127, 9}, \
+    {    263,10}, {    143, 9}, {    287,10}, {    159,11}, \
+    {     95,10}, {    207,12}, {     63,11}, {    127,10}, \
+    {    271,11}, {    159,10}, {    319,11}, {    191,10}, \
+    {    383,11}, {    223,12}, {    127,11}, {    287,10}, \
+    {    607,11}, {    319,12}, {    191,11}, {    383,10}, \
+    {    767,13}, {    127,12}, {    255,11}, {    511,10}, \
+    {   1055,11}, {    543,10}, {   1119, 9}, {   2239,11}, \
+    {    607,12}, {    319,11}, {    671,10}, {   1407,11}, \
+    {    735,10}, {   1471, 9}, {   2943,12}, {    383,11}, \
+    {    799,10}, {   1663,11}, {    863,10}, {   1727,12}, \
+    {    447,13}, {    255,12}, {    511,11}, {   1055,10}, \
+    {   2111,11}, {   1119,10}, {   2239, 9}, {   4479,12}, \
+    {    575,11}, {   1247,10}, {   2495, 9}, {   4991,12}, \
+    {    639,11}, {   1471,10}, {   2943,13}, {    383,12}, \
+    {    767,11}, {   1599,12}, {    831,11}, {   1727,10}, \
+    {   3455,14}, {    255,13}, {    511,12}, {   1023,11}, \
+    {   2111,12}, {   1087,11}, {   2239,10}, {   4479,12}, \
+    {   1215,11}, {   2495,10}, {   4991,13}, {    639,12}, \
+    {   1471,11}, {   2943,10}, {   5887,11}, {   3007,13}, \
+    {    767,12}, {   1727,11}, {   3455,13}, {    895,11}, \
+    {   3839,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 141
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             793  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    793, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
+    {     17, 5}, {     35, 6}, {     19, 5}, {     39, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     41, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 6}, {     55, 7}, {     31, 6}, \
+    {     63, 7}, {     37, 8}, {     19, 7}, {     43, 8}, \
+    {     23, 7}, {     49, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 7}, {     79, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
+    {    159,10}, {    335,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399, 9}, {    799,11}, {    223,12}, \
+    {    127,11}, {    255,10}, {    527, 9}, {   1055,10}, \
+    {    543,11}, {    287,10}, {    607, 9}, {   1215,11}, \
+    {    319,12}, {    191,11}, {    383,10}, {    799,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1055,11}, \
+    {    543,10}, {   1119, 9}, {   2239,11}, {    607,10}, \
+    {   1215,12}, {    319,11}, {    671,10}, {   1407,11}, \
+    {    735,10}, {   1471, 9}, {   2943,10}, {   1503,12}, \
+    {    383,11}, {    799,10}, {   1599,11}, {    863,10}, \
+    {   1727,12}, {    447,11}, {    991,13}, {    255,12}, \
+    {    511,11}, {   1055,10}, {   2111,11}, {   1119,10}, \
+    {   2239,12}, {    575,11}, {   1247,10}, {   2495,12}, \
+    {    639,11}, {   1471,10}, {   2943,13}, {    383,12}, \
+    {    767,11}, {   1599,12}, {    831,11}, {   1727,10}, \
+    {   3455,12}, {    959,11}, {   1919,14}, {    255,13}, \
+    {    511,12}, {   1023,11}, {   2111,12}, {   1087,11}, \
+    {   2239,10}, {   4479,12}, {   1215,11}, {   2495,13}, \
+    {    639,12}, {   1471,11}, {   2943,10}, {   5887,13}, \
+    {    767,12}, {   1727,11}, {   3455,13}, {    895,12}, \
+    {   1791,11}, {   3711,12}, {   1919,11}, {   3839,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 148
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD            12
+#define MULLO_DC_THRESHOLD                  51
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 28
+#define DC_DIVAPPR_Q_THRESHOLD              61
+#define DC_BDIV_QR_THRESHOLD                55
+#define DC_BDIV_Q_THRESHOLD                 82
+
+#define INV_MULMOD_BNM1_THRESHOLD           60
+#define INV_NEWTON_THRESHOLD                94
+#define INV_APPR_THRESHOLD                  78
+
+#define BINV_NEWTON_THRESHOLD              327
+#define REDC_1_TO_REDC_N_THRESHOLD          63
+
+#define MU_DIV_QR_THRESHOLD               2350
+#define MU_DIVAPPR_Q_THRESHOLD            2089
+#define MUPI_DIV_QR_THRESHOLD                7
+#define MU_BDIV_QR_THRESHOLD              2089
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define MATRIX22_STRASSEN_THRESHOLD         34
+#define HGCD_THRESHOLD                      74
+#define GCD_DC_THRESHOLD                   321
+#define GCDEXT_DC_THRESHOLD                209
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               123
+#define SET_STR_PRECOMPUTE_THRESHOLD      1265
diff --git a/mpn/x86/pentium4/sse2/mod_1_4.asm b/mpn/x86/pentium4/sse2/mod_1_4.asm

new file mode 100644 (file)

index 0000000..cedbab1
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/mod_1_4.asm
@@ -0,0 +1,259 @@
+dnl  mpn_mod_1_4 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Optimize.  The present code was written quite straightforwardly.
+C  * Optimize post-loop reduction code.
+
+C                           cycles/limb
+C P6 model 0-8,10-12)           -
+C P6 model 9   (Banias)         ?
+C P6 model 13  (Dothan)         3.4
+C P4 model 0-1 (Willamette):    ?
+C P4 model 2   (Northwood):     4
+C P4 model 3-4 (Prescott):      ?
+
+C INPUT PARAMETERS
+C ap           sp + 4
+C n            sp + 8
+C b            sp + 12
+C cps          sp + 16
+
+define(`B1modb', `%mm1')
+define(`B2modb', `%mm2')
+define(`B3modb', `%mm3')
+define(`B4modb', `%mm4')
+define(`B5modb', `%mm5')
+define(`ap', `%edx')
+define(`n', `%eax')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p)
+       push    %ebx
+       mov     8(%esp), ap
+       mov     12(%esp), n
+       mov     20(%esp), %ecx
+
+       movd    8(%ecx), B1modb
+       movd    12(%ecx), B2modb
+       movd    16(%ecx), B3modb
+       movd    20(%ecx), B4modb
+       movd    24(%ecx), B5modb
+
+       mov     n, %ebx
+       lea     -4(ap,n,4), ap
+       and     $3, %ebx
+       je      L(b0)
+       cmp     $2, %ebx
+       jc      L(b1)
+       je      L(b2)
+
+L(b3): movd    -4(ap), %mm7
+       pmuludq B1modb, %mm7
+       movd    -8(ap), %mm6
+       paddq   %mm6, %mm7
+       movd    (ap), %mm6
+       pmuludq B2modb, %mm6
+       paddq   %mm6, %mm7
+       lea     -24(ap), ap
+       add     $-3, n
+       jz      L(end)
+       jmp     L(top)
+
+L(b0): movd    -8(ap), %mm7
+       pmuludq B1modb, %mm7
+       movd    -12(ap), %mm6
+       paddq   %mm6, %mm7
+       movd    -4(ap), %mm6
+       pmuludq B2modb, %mm6
+       paddq   %mm6, %mm7
+       movd    (ap), %mm6
+       pmuludq B3modb, %mm6
+       paddq   %mm6, %mm7
+       lea     -28(ap), ap
+       add     $-4, n
+       jz      L(end)
+       jmp     L(top)
+
+L(b1): movd    (ap), %mm7
+       lea     -16(ap), ap
+       dec     n
+       jz      L(x)
+       jmp     L(top)
+
+L(b2): movd    (ap), %mm7
+       pmuludq B1modb, %mm7
+       movd    -4(ap), %mm6
+       paddq   %mm6, %mm7
+       lea     -20(ap), ap
+       add     $-2, n
+       jz      L(end)
+
+       ALIGN(8)
+L(top):        movd    4(ap), %mm0
+       pmuludq B1modb, %mm0
+       movd    0(ap), %mm6
+       paddq   %mm6, %mm0
+
+       movd    8(ap), %mm6
+       pmuludq B2modb, %mm6
+       paddq   %mm6, %mm0
+
+       movd    12(ap), %mm6
+       pmuludq B3modb, %mm6
+       paddq   %mm6, %mm0
+
+       movq    %mm7, %mm6
+       psrlq   $32, %mm7               C rh
+       pmuludq B5modb, %mm7
+       pmuludq B4modb, %mm6
+
+       paddq   %mm0, %mm7
+       paddq   %mm6, %mm7
+
+       add     $-16, ap
+       add     $-4, n
+       jnz     L(top)
+L(end):
+
+       pcmpeqd %mm4, %mm4
+       psrlq   $32, %mm4               C 0x00000000FFFFFFFF
+       pand    %mm7, %mm4              C rl
+       psrlq   $32, %mm7               C rh
+       pmuludq B1modb, %mm7            C rh,cl
+       paddq   %mm4, %mm7              C rh,rl
+
+L(x):  movd    4(%ecx), %mm4           C cnt
+       psllq   %mm4, %mm7              C rh,rl normalized
+       movq    %mm7, %mm2              C rl in low half
+       psrlq   $32, %mm7               C rh
+       movd    (%ecx), %mm1            C bi
+       pmuludq %mm7, %mm1              C qh,ql
+       paddq   %mm2, %mm1              C qh-1,ql
+       movd    %mm1, %ecx              C ql
+       psrlq   $32, %mm1               C qh-1
+       movd    16(%esp), %mm3          C b
+       pmuludq %mm1, %mm3              C (qh-1) * b
+       psubq   %mm3, %mm2              C r in low half (could use psubd)
+       movd    %mm2, %eax              C r
+       mov     16(%esp), %ebx
+       sub     %ebx, %eax              C r
+       cmp     %eax, %ecx
+       lea     (%eax,%ebx), %edx
+       cmovc(  %edx, %eax)
+       movd    %mm4, %ecx              C cnt
+       cmp     %ebx, %eax
+       jae     L(fix)
+       emms
+       pop     %ebx
+       shr     %cl, %eax
+       ret
+
+L(fix):        sub     %ebx, %eax
+       emms
+       pop     %ebx
+       shr     %cl, %eax
+       ret
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_4p_cps)
+       push    %ebp
+       push    %edi
+       push    %esi
+       push    %ebx
+       sub     $12, %esp
+       mov     36(%esp), %ebx
+       bsr     %ebx, %ecx
+       xor     $31, %ecx
+       mov     %ecx, 4(%esp)
+       sal     %cl, %ebx
+       mov     %ebx, %edx
+       not     %edx
+       mov     $-1, %eax
+       div     %ebx
+       mov     %eax, %esi
+       mov     $1, %ebp
+       sal     %cl, %ebp
+       neg     %ecx
+       shr     %cl, %eax
+       or      %eax, %ebp
+       mov     %ebx, %eax
+       neg     %eax
+       imul    %ebp, %eax
+       mov     %esi, %ecx
+       mov     %eax, 8(%esp)
+       mul     %ecx
+       mov     %edx, %esi
+       not     %esi
+       sub     8(%esp), %esi
+       imul    %ebx, %esi
+       lea     (%esi,%ebx), %edx
+       cmp     %esi, %eax
+       cmovb(  %edx, %esi)
+       mov     %esi, %eax
+       mul     %ecx
+       lea     (%esi,%edx), %edi
+       not     %edi
+       imul    %ebx, %edi
+       lea     (%edi,%ebx), %edx
+       cmp     %edi, %eax
+       cmovb(  %edx, %edi)
+       mov     %edi, %eax
+       mul     %ecx
+       lea     (%edi,%edx), %ebp
+       not     %ebp
+       imul    %ebx, %ebp
+       lea     (%ebp,%ebx), %edx
+       cmp     %ebp, %eax
+       cmovb(  %edx, %ebp)
+       mov     %ebp, %eax
+       mul     %ecx
+       add     %ebp, %edx
+       not     %edx
+       imul    %ebx, %edx
+       add     %edx, %ebx
+       cmp     %edx, %eax
+       cmovb(  %ebx, %edx)
+       mov     32(%esp), %eax
+       mov     %ecx, (%eax)
+       mov     4(%esp), %ecx
+       mov     %ecx, 4(%eax)
+       mov     8(%esp), %ebx
+       shr     %cl, %ebx
+       mov     %ebx, 8(%eax)
+       shr     %cl, %esi
+       mov     %esi, 12(%eax)
+       shr     %cl, %edi
+       mov     %edi, 16(%eax)
+       shr     %cl, %ebp
+       mov     %ebp, 20(%eax)
+       shr     %cl, %edx
+       mov     %edx, 24(%eax)
+       add     $12, %esp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
+       ret
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/mpn/x86/pentium4/sse2/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..1598b41
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/mod_34lsub1.asm
@@ -0,0 +1,164 @@
+dnl  Intel Pentium 4 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C Pentium4: 1.0 cycles/limb
+
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+C Enhancements:
+C
+C There might a couple of cycles to save by using plain integer code for
+C more small sizes.  2 limbs measures about 20 cycles, but 3 limbs jumps to
+C about 46 (inclusive of some function call overheads).
+
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX, `PARAM_SRC')
+define(SAVE_ESI, `PARAM_SIZE')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_34lsub1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %ecx
+       movl    PARAM_SRC, %edx
+       movl    (%edx), %eax
+
+       subl    $2, %ecx
+       ja      L(three_or_more)
+       jne     L(one)
+
+       movl    4(%edx), %edx
+       movl    %eax, %ecx
+       shrl    $24, %eax               C src[0] high
+
+       andl    $0x00FFFFFF, %ecx       C src[0] low
+       addl    %ecx, %eax
+
+       movl    %edx, %ecx
+       shll    $8, %edx
+
+       shrl    $16, %ecx               C src[1] low
+       addl    %ecx, %eax
+
+       andl    $0x00FFFF00, %edx       C src[1] high
+       addl    %edx, %eax
+
+L(one):
+       ret
+
+
+L(three_or_more):
+       pxor    %mm0, %mm0
+       pxor    %mm1, %mm1
+       pxor    %mm2, %mm2
+
+       pcmpeqd %mm7, %mm7
+       psrlq   $32, %mm7       C 0x00000000FFFFFFFF, low 32 bits
+
+       pcmpeqd %mm6, %mm6
+       psrlq   $40, %mm6       C 0x0000000000FFFFFF, low 24 bits
+
+L(top):
+       C eax
+       C ebx
+       C ecx   counter, size-2 to 0, -1 or -2
+       C edx   src, incrementing
+       C
+       C mm0   sum 0mod3
+       C mm1   sum 1mod3
+       C mm2   sum 2mod3
+       C mm3
+       C mm4
+       C mm5
+       C mm6   0x0000000000FFFFFF
+       C mm7   0x00000000FFFFFFFF
+
+       movd    (%edx), %mm3
+       paddq   %mm3, %mm0
+
+       movd    4(%edx), %mm3
+       paddq   %mm3, %mm1
+
+       movd    8(%edx), %mm3
+       paddq   %mm3, %mm2
+
+       addl    $12, %edx
+       subl    $3, %ecx
+       ja      L(top)
+
+
+       C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
+
+       addl    $1, %ecx
+       js      L(combine)              C 0 more
+
+       movd    (%edx), %mm3
+       paddq   %mm3, %mm0
+
+       jz      L(combine)              C 1 more
+
+       movd    4(%edx), %mm3
+       paddq   %mm3, %mm1
+
+L(combine):
+       movq    %mm7, %mm3              C low halves
+       pand    %mm0, %mm3
+
+       movq    %mm7, %mm4
+       pand    %mm1, %mm4
+
+       movq    %mm7, %mm5
+       pand    %mm2, %mm5
+
+       psrlq   $32, %mm0               C high halves
+       psrlq   $32, %mm1
+       psrlq   $32, %mm2
+
+       paddq   %mm0, %mm4              C fold high halves to give 33 bits each
+       paddq   %mm1, %mm5
+       paddq   %mm2, %mm3
+
+       psllq   $8, %mm4                C combine at respective offsets
+       psllq   $16, %mm5
+       paddq   %mm4, %mm3
+       paddq   %mm5, %mm3              C 0x000cxxxxxxxxxxxx, 50 bits
+
+       pand    %mm3, %mm6              C fold at 24 bits
+       psrlq   $24, %mm3
+
+       paddq   %mm6, %mm3
+       movd    %mm3, %eax
+
+       ASSERT(z,       C nothing left in high dword
+       `psrlq  $32, %mm3
+       movd    %mm3, %ecx
+       orl     %ecx, %ecx')
+
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/mode1o.asm b/mpn/x86/pentium4/sse2/mode1o.asm

new file mode 100644 (file)

index 0000000..f9d1f14
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/mode1o.asm
@@ -0,0 +1,163 @@
+dnl  Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder.
+
+dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4: 19.0 cycles/limb
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+
+defframe(PARAM_CARRY,  16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1c_odd)
+deflit(`FRAME',0)
+
+       movd    PARAM_CARRY, %mm1
+       jmp     L(start_1c)
+
+EPILOGUE()
+
+
+       ALIGN(16)
+PROLOGUE(mpn_modexact_1_odd)
+deflit(`FRAME',0)
+
+       pxor    %mm1, %mm1              C carry limb
+L(start_1c):
+       movl    PARAM_DIVISOR, %eax
+
+       movd    PARAM_DIVISOR, %mm7
+
+       shrl    %eax
+
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edx)
+       movzbl  (%eax,%edx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       C
+
+       movd    %eax, %mm6              C inv
+
+       movd    %eax, %mm0              C inv
+
+       pmuludq %mm6, %mm6              C inv*inv
+
+       C
+
+       pmuludq %mm7, %mm6              C inv*inv*d
+       paddd   %mm0, %mm0              C 2*inv
+
+       C
+
+       psubd   %mm6, %mm0              C inv = 2*inv - inv*inv*d
+       pxor    %mm6, %mm6
+
+       paddd   %mm0, %mm6
+       pmuludq %mm0, %mm0              C inv*inv
+
+       C
+
+       pmuludq %mm7, %mm0              C inv*inv*d
+       paddd   %mm6, %mm6              C 2*inv
+
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_SIZE, %ecx
+
+       C
+
+       psubd   %mm0, %mm6              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       movd    %mm6, %eax
+       imul    PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       pxor    %mm0, %mm0              C carry bit
+
+
+C The dependent chain here is as follows.
+C
+C                                      latency
+C      psubq    s = (src-cbit) - climb    2
+C      pmuludq  q = s*inverse             8
+C      pmuludq  prod = q*divisor          8
+C      psrlq    climb = high(prod)        2
+C                                        --
+C                                        20
+C
+C Yet the loop measures 19.0 c/l, so obviously there's something gained
+C there over a straight reading of the chip documentation.
+
+L(top):
+       C eax   src, incrementing
+       C ebx
+       C ecx   counter, limbs
+       C edx
+       C
+       C mm0   carry bit
+       C mm1   carry limb
+       C mm6   inverse
+       C mm7   divisor
+
+       movd    (%eax), %mm2
+       addl    $4, %eax
+
+       psubq   %mm0, %mm2              C src - cbit
+
+       psubq   %mm1, %mm2              C src - cbit - climb
+       movq    %mm2, %mm0
+       psrlq   $63, %mm0               C new cbit
+
+       pmuludq %mm6, %mm2              C s*inverse
+
+       movq    %mm7, %mm1
+       pmuludq %mm2, %mm1              C q*divisor
+       psrlq   $32, %mm1               C new climb
+
+       subl    $1, %ecx
+       jnz     L(top)
+
+
+L(done):
+       paddq   %mm1, %mm0
+       movd    %mm0, %eax
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/mul_1.asm b/mpn/x86/pentium4/sse2/mul_1.asm

new file mode 100644 (file)

index 0000000..07be951
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/mul_1.asm
@@ -0,0 +1,158 @@
+dnl  mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C TODO:
+C  * Tweak eax/edx offsets in loop as to save some lea's
+C  * Perhaps software pipeline small-case code
+
+C                           cycles/limb
+C P6 model 0-8,10-12)           -
+C P6 model 9   (Banias)                ?
+C P6 model 13  (Dothan)         4.17
+C P4 model 0-1 (Willamette):   4
+C P4 model 2   (Northwood):     4
+C P4 model 3-4 (Prescott):      4.55
+
+C INPUT PARAMETERS
+C rp           sp + 4
+C up           sp + 8
+C n            sp + 12
+C v0           sp + 16
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+       mov     4(%esp), %edx
+       mov     8(%esp), %eax
+       mov     12(%esp), %ecx
+       movd    16(%esp), %mm7
+       movd    20(%esp), %mm6
+       jmp     L(ent)
+EPILOGUE()
+       ALIGN(16)
+PROLOGUE(mpn_mul_1)
+       mov     4(%esp), %edx
+       mov     8(%esp), %eax
+       mov     12(%esp), %ecx
+       movd    16(%esp), %mm7
+       pxor    %mm6, %mm6
+L(ent):        cmp     $4, %ecx
+       jnc     L(big)
+
+L(lp0):        movd    (%eax), %mm0
+       lea     4(%eax), %eax
+       lea     4(%edx), %edx
+       pmuludq %mm7, %mm0
+       paddq   %mm0, %mm6
+       movd    %mm6, -4(%edx)
+       psrlq   $32, %mm6
+       dec     %ecx
+       jnz     L(lp0)
+       movd    %mm6, %eax
+       emms
+       ret
+
+L(big):        and     $3, %ecx
+       je      L(0)
+       cmp     $2, %ecx
+       jc      L(1)
+       je      L(2)
+       jmp     L(3)                    C FIXME: one case should fall through
+
+L(0):  movd    (%eax), %mm3
+       sub     12(%esp), %ecx          C loop count
+       lea     -16(%eax), %eax
+       lea     -12(%edx), %edx
+       pmuludq %mm7, %mm3
+       movd    20(%eax), %mm0
+       pmuludq %mm7, %mm0
+       movd    24(%eax), %mm1
+       jmp     L(00)
+
+L(1):  movd    (%eax), %mm2
+       sub     12(%esp), %ecx
+       lea     -12(%eax), %eax
+       lea     -8(%edx), %edx
+       pmuludq %mm7, %mm2
+       movd    16(%eax), %mm3
+       pmuludq %mm7, %mm3
+       movd    20(%eax), %mm0
+       jmp     L(01)
+
+L(2):  movd    (%eax), %mm1
+       sub     12(%esp), %ecx
+       lea     -8(%eax), %eax
+       lea     -4(%edx), %edx
+       pmuludq %mm7, %mm1
+       movd    12(%eax), %mm2
+       pmuludq %mm7, %mm2
+       movd    16(%eax), %mm3
+       jmp     L(10)
+
+L(3):  movd    (%eax), %mm0
+       sub     12(%esp), %ecx
+       lea     -4(%eax), %eax
+       pmuludq %mm7, %mm0
+       movd    8(%eax), %mm1
+       pmuludq %mm7, %mm1
+       movd    12(%eax), %mm2
+
+       ALIGN(16)
+L(top):        pmuludq %mm7, %mm2
+       paddq   %mm0, %mm6
+       movd    16(%eax), %mm3
+       movd    %mm6, 0(%edx)
+       psrlq   $32, %mm6
+L(10): pmuludq %mm7, %mm3
+       paddq   %mm1, %mm6
+       movd    20(%eax), %mm0
+       movd    %mm6, 4(%edx)
+       psrlq   $32, %mm6
+L(01): pmuludq %mm7, %mm0
+       paddq   %mm2, %mm6
+       movd    24(%eax), %mm1
+       movd    %mm6, 8(%edx)
+       psrlq   $32, %mm6
+L(00): pmuludq %mm7, %mm1
+       paddq   %mm3, %mm6
+       movd    28(%eax), %mm2
+       movd    %mm6, 12(%edx)
+       psrlq   $32, %mm6
+       lea     16(%eax), %eax
+       lea     16(%edx), %edx
+       add     $4, %ecx
+       ja      L(top)
+
+L(end):        pmuludq %mm7, %mm2
+       paddq   %mm0, %mm6
+       movd    %mm6, 0(%edx)
+       psrlq   $32, %mm6
+       paddq   %mm1, %mm6
+       movd    %mm6, 4(%edx)
+       psrlq   $32, %mm6
+       paddq   %mm2, %mm6
+       movd    %mm6, 8(%edx)
+       psrlq   $32, %mm6
+       movd    %mm6, %eax
+       emms
+       ret
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/mul_basecase.asm b/mpn/x86/pentium4/sse2/mul_basecase.asm

new file mode 100644 (file)

index 0000000..2628e5e
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/mul_basecase.asm
@@ -0,0 +1,651 @@
+dnl  mpn_mul_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl  Copyright 2001, 2002, 2005, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Improve ad-hoc outer loop code and register handling.  Some feed-in
+C    scheduling could improve things by several cycles per outer iteration.
+C  * In code for un <= 3, try keeping accumulation operands in registers,
+C    without storing intermediates to rp.
+C  * We might want to keep 32 in a free mm register, since the register form is
+C    3 bytes and the immediate form is 4 bytes.  About 70 bytes to save.
+C  * Look into different loop alignment, we now expand the code about 50 bytes
+C    with possibly needless alignment.
+C  * Perhaps rewrap loops 00,01,02 (6 loops) to allow fall-through entry.
+C  * Use OSP, should solve feed-in latency problems.
+C  * Save a few tens of bytes by doing cross-jumping for Loel0, etc.
+C  * Save around 120 bytes by remapping "m 0", "m 1", "m 2" and "m 3" registers
+C    so that they can share feed-in code, and changing the branch targets from
+C    L<n> to Lm<nn>.
+
+C                           cycles/limb
+C P6 model 9   (Banias)         ?
+C P6 model 13  (Dothan)         5.24
+C P6 model 14  (Yonah)          ?
+C P4 model 0-1 (Willamette):    5
+C P4 model 2   (Northwood):     4.60 at 32 limbs
+C P4 model 3-4 (Prescott):      4.94 at 32 limbs
+
+C INPUT PARAMETERS
+C rp           sp + 4
+C up           sp + 8
+C un           sp + 12
+C vp           sp + 16
+C vn           sp + 20
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+       push    %esi
+       push    %ebx
+       mov     12(%esp), %edx          C rp
+       mov     16(%esp), %eax          C up
+       mov     20(%esp), %ecx          C un
+       mov     24(%esp), %esi          C vp
+       mov     28(%esp), %ebx          C vn
+       movd    (%esi), %mm7            C
+L(ent):        cmp     $3, %ecx
+       ja      L(big)
+       movd    (%eax), %mm6
+       pmuludq %mm7, %mm6
+       jz      L(un3)
+       cmp     $2, %ecx
+       jz      L(un2)
+
+L(un1):        movd    %mm6, (%edx)            C                               un=1
+       psrlq   $32, %mm6               C                               un=1
+       movd    %mm6, 4(%edx)           C                               un=1
+       jmp     L(rtr)                  C                               un=1
+
+L(un2):        movd    4(%eax), %mm1           C                               un=2
+       pmuludq %mm7, %mm1              C                               un=2
+       movd    %mm6, (%edx)            C                               un=2
+       psrlq   $32, %mm6               C                               un=2
+       paddq   %mm1, %mm6              C                               un=2
+       movd    %mm6, 4(%edx)           C                               un=2
+       psrlq   $32, %mm6               C                               un=2
+       movd    %mm6, 8(%edx)           C                               un=2
+      dec      %ebx                    C                               un=2
+      jz       L(rtr)                  C                               un=2
+       movd    4(%esi), %mm7           C                               un=2
+       movd    (%eax), %mm6            C                               un=2
+       pmuludq %mm7, %mm6              C                               un=2
+       movd    4(%eax), %mm1           C                               un=2
+       movd    4(%edx), %mm4           C                               un=2
+       pmuludq %mm7, %mm1              C                               un=2
+       movd    8(%edx), %mm5           C                               un=2
+       paddq   %mm4, %mm6              C                               un=2
+       paddq   %mm1, %mm5              C                               un=2
+       movd    %mm6, 4(%edx)           C                               un=2
+       psrlq   $32, %mm6               C                               un=2
+       paddq   %mm5, %mm6              C                               un=2
+       movd    %mm6, 8(%edx)           C                               un=2
+       psrlq   $32, %mm6               C                               un=2
+       movd    %mm6, 12(%edx)          C                               un=2
+L(rtr):        emms
+       pop     %ebx
+       pop     %esi
+       ret
+
+L(un3):        movd    4(%eax), %mm1           C                               un=3
+       pmuludq %mm7, %mm1              C                               un=3
+       movd    8(%eax), %mm2           C                               un=3
+       pmuludq %mm7, %mm2              C                               un=3
+       movd    %mm6, (%edx)            C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm1, %mm6              C                               un=3
+       movd    %mm6, 4(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm2, %mm6              C                               un=3
+       movd    %mm6, 8(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       movd    %mm6, 12(%edx)          C                               un=3
+      dec      %ebx                    C                               un=3
+      jz       L(rtr)                  C                               un=3
+       movd    4(%esi), %mm7           C                               un=3
+       movd    (%eax), %mm6            C                               un=3
+       pmuludq %mm7, %mm6              C                               un=3
+       movd    4(%eax), %mm1           C                               un=3
+       movd    4(%edx), %mm4           C                               un=3
+       pmuludq %mm7, %mm1              C                               un=3
+       movd    8(%eax), %mm2           C                               un=3
+       movd    8(%edx), %mm5           C                               un=3
+       pmuludq %mm7, %mm2              C                               un=3
+       paddq   %mm4, %mm6              C                               un=3
+       paddq   %mm1, %mm5              C                               un=3
+       movd    12(%edx), %mm4          C                               un=3
+       movd    %mm6, 4(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm5, %mm6              C                               un=3
+       paddq   %mm2, %mm4              C                               un=3
+       movd    %mm6, 8(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm4, %mm6              C                               un=3
+       movd    %mm6, 12(%edx)          C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       movd    %mm6, 16(%edx)          C                               un=3
+      dec      %ebx                    C                               un=3
+      jz       L(rtr)                  C                               un=3
+       movd    8(%esi), %mm7           C                               un=3
+       movd    (%eax), %mm6            C                               un=3
+       pmuludq %mm7, %mm6              C                               un=3
+       movd    4(%eax), %mm1           C                               un=3
+       movd    8(%edx), %mm4           C                               un=3
+       pmuludq %mm7, %mm1              C                               un=3
+       movd    8(%eax), %mm2           C                               un=3
+       movd    12(%edx), %mm5          C                               un=3
+       pmuludq %mm7, %mm2              C                               un=3
+       paddq   %mm4, %mm6              C                               un=3
+       paddq   %mm1, %mm5              C                               un=3
+       movd    16(%edx), %mm4          C                               un=3
+       movd    %mm6, 8(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm5, %mm6              C                               un=3
+       paddq   %mm2, %mm4              C                               un=3
+       movd    %mm6, 12(%edx)          C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm4, %mm6              C                               un=3
+       movd    %mm6, 16(%edx)          C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       movd    %mm6, 20(%edx)          C                               un=3
+       jmp     L(rtr)
+
+
+L(big):        push    %edi
+       pxor    %mm6, %mm6
+       lea     4(%esi), %esi
+       and     $3, %ecx
+       jz      L(0)
+       cmp     $2, %ecx
+       jc      L(1)
+       jz      L(2)
+       jmp     L(3)                    C FIXME: one case should fall through
+
+
+L(0):  movd    (%eax), %mm3            C                               m 0
+       sub     24(%esp), %ecx          C inner loop count              m 0
+       mov     %ecx, 24(%esp)          C update loop count for later   m 0
+       pmuludq %mm7, %mm3              C                               m 0
+       movd    4(%eax), %mm0           C                               m 0
+       pmuludq %mm7, %mm0              C                               m 0
+       movd    8(%eax), %mm1           C                               m 0
+       jmp     L(m00)                  C                               m 0
+       ALIGN(16)                       C                               m 0
+L(lpm0):
+       pmuludq %mm7, %mm4              C                               m 0
+       paddq   %mm0, %mm6              C                               m 0
+       movd    (%eax), %mm3            C                               m 0
+       movd    %mm6, -12(%edx)         C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       pmuludq %mm7, %mm3              C                               m 0
+       paddq   %mm1, %mm6              C                               m 0
+       movd    4(%eax), %mm0           C                               m 0
+       movd    %mm6, -8(%edx)          C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       pmuludq %mm7, %mm0              C                               m 0
+       paddq   %mm4, %mm6              C                               m 0
+       movd    8(%eax), %mm1           C                               m 0
+       movd    %mm6, -4(%edx)          C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+L(m00):        pmuludq %mm7, %mm1              C                               m 0
+       paddq   %mm3, %mm6              C                               m 0
+       movd    12(%eax), %mm4          C                               m 0
+       movd    %mm6, (%edx)            C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       lea     16(%eax), %eax          C                               m 0
+       lea     16(%edx), %edx          C                               m 0
+       add     $4, %ecx                C                               m 0
+       ja      L(lpm0)                 C                               m 0
+       pmuludq %mm7, %mm4              C                               m 0
+       paddq   %mm0, %mm6              C                               m 0
+       movd    %mm6, -12(%edx)         C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       paddq   %mm1, %mm6              C                               m 0
+       mov     16(%esp), %edi          C rp                              0
+       jmp     L(x0)
+
+L(olp0):
+       lea     4(%edi), %edi           C                               am 0
+       movd    (%esi), %mm7            C                               am 0
+       lea     4(%esi), %esi           C                               am 0
+       mov     %edi, %edx              C rp                            am 0
+       mov     20(%esp), %eax          C up                            am 0
+       movd    (%eax), %mm3            C                               am 0
+       mov     24(%esp), %ecx          C inner loop count              am 0
+       pxor    %mm6, %mm6              C                               am 0
+       pmuludq %mm7, %mm3              C                               am 0
+       movd    4(%eax), %mm0           C                               am 0
+       movd    (%edx), %mm5            C                               am 0
+       pmuludq %mm7, %mm0              C                               am 0
+       movd    8(%eax), %mm1           C                               am 0
+       paddq   %mm3, %mm5              C                               am 0
+       movd    4(%edx), %mm4           C                               am 0
+       jmp     L(am00)                 C                               am 0
+       ALIGN(16)                       C                               mm 0
+L(lam0):
+       pmuludq %mm7, %mm2              C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       movd    (%eax), %mm3            C                               am 0
+       paddq   %mm1, %mm5              C                               am 0
+       movd    -4(%edx), %mm4          C                               am 0
+       movd    %mm6, -12(%edx)         C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       pmuludq %mm7, %mm3              C                               am 0
+       paddq   %mm5, %mm6              C                               am 0
+       movd    4(%eax), %mm0           C                               am 0
+       paddq   %mm2, %mm4              C                               am 0
+       movd    (%edx), %mm5            C                               am 0
+       movd    %mm6, -8(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       pmuludq %mm7, %mm0              C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       movd    8(%eax), %mm1           C                               am 0
+       paddq   %mm3, %mm5              C                               am 0
+       movd    4(%edx), %mm4           C                               am 0
+       movd    %mm6, -4(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+L(am00):
+       pmuludq %mm7, %mm1              C                               am 0
+       paddq   %mm5, %mm6              C                               am 0
+       movd    12(%eax), %mm2          C                               am 0
+       paddq   %mm0, %mm4              C                               am 0
+       movd    8(%edx), %mm5           C                               am 0
+       movd    %mm6, (%edx)            C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       lea     16(%eax), %eax          C                               am 0
+       lea     16(%edx), %edx          C                               am 0
+       add     $4, %ecx                C                               am 0
+       jnz     L(lam0)                 C                               am 0
+       pmuludq %mm7, %mm2              C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       paddq   %mm1, %mm5              C                               am 0
+       movd    -4(%edx), %mm4          C                               am 0
+       movd    %mm6, -12(%edx)         C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       paddq   %mm5, %mm6              C                               am 0
+       paddq   %mm2, %mm4              C                               am 0
+L(x0): movd    %mm6, -8(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       movd    %mm6, -4(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       movd    %mm6, (%edx)            C                               am 0
+       dec     %ebx                    C                               am 0
+       jnz     L(olp0)                 C                               am 0
+L(oel0):
+       emms                            C                                  0
+       pop     %edi                    C                                  0
+       pop     %ebx                    C                                  0
+       pop     %esi                    C                                  0
+       ret                             C                                  0
+
+
+L(1):  movd    (%eax), %mm4            C                               m 1
+       sub     24(%esp), %ecx          C                               m 1
+       mov     %ecx, 24(%esp)          C update loop count for later   m 1
+       pmuludq %mm7, %mm4              C                               m 1
+       movd    4(%eax), %mm3           C                               m 1
+       pmuludq %mm7, %mm3              C                               m 1
+       movd    8(%eax), %mm0           C                               m 1
+       jmp     L(m01)                  C                               m 1
+       ALIGN(16)                       C                               m 1
+L(lpm1):
+       pmuludq %mm7, %mm4              C                               m 1
+       paddq   %mm0, %mm6              C                               m 1
+       movd    4(%eax), %mm3           C                               m 1
+       movd    %mm6, -8(%edx)          C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       pmuludq %mm7, %mm3              C                               m 1
+       paddq   %mm1, %mm6              C                               m 1
+       movd    8(%eax), %mm0           C                               m 1
+       movd    %mm6, -4(%edx)          C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+L(m01):        pmuludq %mm7, %mm0              C                               m 1
+       paddq   %mm4, %mm6              C                               m 1
+       movd    12(%eax), %mm1          C                               m 1
+       movd    %mm6, (%edx)            C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       pmuludq %mm7, %mm1              C                               m 1
+       paddq   %mm3, %mm6              C                               m 1
+       movd    16(%eax), %mm4          C                               m 1
+       movd    %mm6, 4(%edx)           C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       lea     16(%eax), %eax          C                               m 1
+       lea     16(%edx), %edx          C                               m 1
+       add     $4, %ecx                C                               m 1
+       ja      L(lpm1)                 C                               m 1
+       pmuludq %mm7, %mm4              C                               m 1
+       paddq   %mm0, %mm6              C                               m 1
+       movd    %mm6, -8(%edx)          C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       paddq   %mm1, %mm6              C                               m 1
+       mov     16(%esp), %edi          C rp                              1
+       jmp     L(x1)
+
+L(olp1):
+       lea     4(%edi), %edi           C                               am 1
+       movd    (%esi), %mm7            C                               am 1
+       lea     4(%esi), %esi           C                               am 1
+       mov     %edi, %edx              C rp                            am 1
+       mov     20(%esp), %eax          C up                            am 1
+       movd    (%eax), %mm2            C                               am 1
+       mov     24(%esp), %ecx          C inner loop count              am 1
+       pxor    %mm6, %mm6              C                               am 1
+       pmuludq %mm7, %mm2              C                               am 1
+       movd    4(%eax), %mm3           C                               am 1
+       movd    (%edx), %mm4            C                               am 1
+       pmuludq %mm7, %mm3              C                               am 1
+       movd    8(%eax), %mm0           C                               am 1
+       paddq   %mm2, %mm4              C                               am 1
+       movd    4(%edx), %mm5           C                               am 1
+       jmp     L(am01)                 C                               am 1
+       ALIGN(16)                       C                               am 1
+L(lam1):
+       pmuludq %mm7, %mm2              C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       movd    4(%eax), %mm3           C                               am 1
+       paddq   %mm1, %mm5              C                               am 1
+       movd    (%edx), %mm4            C                               am 1
+       movd    %mm6, -8(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       pmuludq %mm7, %mm3              C                               am 1
+       paddq   %mm5, %mm6              C                               am 1
+       movd    8(%eax), %mm0           C                               am 1
+       paddq   %mm2, %mm4              C                               am 1
+       movd    4(%edx), %mm5           C                               am 1
+       movd    %mm6, -4(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+L(am01):
+       pmuludq %mm7, %mm0              C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       movd    12(%eax), %mm1          C                               am 1
+       paddq   %mm3, %mm5              C                               am 1
+       movd    8(%edx), %mm4           C                               am 1
+       movd    %mm6, (%edx)            C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       pmuludq %mm7, %mm1              C                               am 1
+       paddq   %mm5, %mm6              C                               am 1
+       movd    16(%eax), %mm2          C                               am 1
+       paddq   %mm0, %mm4              C                               am 1
+       movd    12(%edx), %mm5          C                               am 1
+       movd    %mm6, 4(%edx)           C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       lea     16(%eax), %eax          C                               am 1
+       lea     16(%edx), %edx          C                               am 1
+       add     $4, %ecx                C                               am 1
+       jnz     L(lam1)                 C                               am 1
+       pmuludq %mm7, %mm2              C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       paddq   %mm1, %mm5              C                               am 1
+       movd    (%edx), %mm4            C                               am 1
+       movd    %mm6, -8(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       paddq   %mm5, %mm6              C                               am 1
+       paddq   %mm2, %mm4              C                               am 1
+L(x1): movd    %mm6, -4(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       movd    %mm6, (%edx)            C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       movd    %mm6, 4(%edx)           C                               am 1
+       dec     %ebx                    C                               am 1
+       jnz     L(olp1)                 C                               am 1
+L(oel1):
+       emms                            C                                  1
+       pop     %edi                    C                                  1
+       pop     %ebx                    C                                  1
+       pop     %esi                    C                                  1
+       ret                             C                                  1
+
+
+L(2):  movd    (%eax), %mm1            C                               m 2
+       sub     24(%esp), %ecx          C                               m 2
+       mov     %ecx, 24(%esp)          C update loop count for later   m 2
+       pmuludq %mm7, %mm1              C                               m 2
+       movd    4(%eax), %mm4           C                               m 2
+       pmuludq %mm7, %mm4              C                               m 2
+       movd    8(%eax), %mm3           C                               m 2
+       jmp     L(m10)                  C                               m 2
+       ALIGN(16)                       C                               m 2
+L(lpm2):
+       pmuludq %mm7, %mm4              C                               m 2
+       paddq   %mm0, %mm6              C                               m 2
+       movd    8(%eax), %mm3           C                               m 2
+       movd    %mm6, -4(%edx)          C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+L(m10):        pmuludq %mm7, %mm3              C                               m 2
+       paddq   %mm1, %mm6              C                               m 2
+       movd    12(%eax), %mm0          C                               m 2
+       movd    %mm6, (%edx)            C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       pmuludq %mm7, %mm0              C                               m 2
+       paddq   %mm4, %mm6              C                               m 2
+       movd    16(%eax), %mm1          C                               m 2
+       movd    %mm6, 4(%edx)           C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       pmuludq %mm7, %mm1              C                               m 2
+       paddq   %mm3, %mm6              C                               m 2
+       movd    20(%eax), %mm4          C                               m 2
+       movd    %mm6, 8(%edx)           C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       lea     16(%eax), %eax          C                               m 2
+       lea     16(%edx), %edx          C                               m 2
+       add     $4, %ecx                C                               m 2
+       ja      L(lpm2)                 C                               m 2
+       pmuludq %mm7, %mm4              C                               m 2
+       paddq   %mm0, %mm6              C                               m 2
+       movd    %mm6, -4(%edx)          C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       paddq   %mm1, %mm6              C                               m 2
+       mov     16(%esp), %edi          C rp                              2
+       jmp     L(x2)
+
+L(olp2):
+       lea     4(%edi), %edi           C                               am 2
+       movd    (%esi), %mm7            C                               am 2
+       lea     4(%esi), %esi           C                               am 2
+       mov     %edi, %edx              C rp                            am 2
+       mov     20(%esp), %eax          C up                            am 2
+       movd    (%eax), %mm1            C                               am 2
+       mov     24(%esp), %ecx          C inner loop count              am 2
+       pxor    %mm6, %mm6              C                               am 2
+       pmuludq %mm7, %mm1              C                               am 2
+       movd    4(%eax), %mm2           C                               am 2
+       movd    (%edx), %mm5            C                               am 2
+       pmuludq %mm7, %mm2              C                               am 2
+       movd    8(%eax), %mm3           C                               am 2
+       paddq   %mm1, %mm5              C                               am 2
+       movd    4(%edx), %mm4           C                               am 2
+       jmp     L(am10)                 C                               am 2
+       ALIGN(16)                       C                               am 2
+L(lam2):
+       pmuludq %mm7, %mm2              C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       movd    8(%eax), %mm3           C                               am 2
+       paddq   %mm1, %mm5              C                               am 2
+       movd    4(%edx), %mm4           C                               am 2
+       movd    %mm6, -4(%edx)          C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+L(am10):
+       pmuludq %mm7, %mm3              C                               am 2
+       paddq   %mm5, %mm6              C                               am 2
+       movd    12(%eax), %mm0          C                               am 2
+       paddq   %mm2, %mm4              C                               am 2
+       movd    8(%edx), %mm5           C                               am 2
+       movd    %mm6, (%edx)            C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       pmuludq %mm7, %mm0              C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       movd    16(%eax), %mm1          C                               am 2
+       paddq   %mm3, %mm5              C                               am 2
+       movd    12(%edx), %mm4          C                               am 2
+       movd    %mm6, 4(%edx)           C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       pmuludq %mm7, %mm1              C                               am 2
+       paddq   %mm5, %mm6              C                               am 2
+       movd    20(%eax), %mm2          C                               am 2
+       paddq   %mm0, %mm4              C                               am 2
+       movd    16(%edx), %mm5          C                               am 2
+       movd    %mm6, 8(%edx)           C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       lea     16(%eax), %eax          C                               am 2
+       lea     16(%edx), %edx          C                               am 2
+       add     $4, %ecx                C                               am 2
+       jnz     L(lam2)                 C                               am 2
+       pmuludq %mm7, %mm2              C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       paddq   %mm1, %mm5              C                               am 2
+       movd    4(%edx), %mm4           C                               am 2
+       movd    %mm6, -4(%edx)          C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       paddq   %mm5, %mm6              C                               am 2
+       paddq   %mm2, %mm4              C                               am 2
+L(x2): movd    %mm6, (%edx)            C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       movd    %mm6, 4(%edx)           C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       movd    %mm6, 8(%edx)           C                               am 2
+       dec     %ebx                    C                               am 2
+       jnz     L(olp2)                 C                               am 2
+L(oel2):
+       emms                            C                                  2
+       pop     %edi                    C                                  2
+       pop     %ebx                    C                                  2
+       pop     %esi                    C                                  2
+       ret                             C                                  2
+
+
+L(3):  movd    (%eax), %mm0            C                               m 3
+       sub     24(%esp), %ecx          C                               m 3
+       mov     %ecx, 24(%esp)          C update loop count for later   m 3
+       pmuludq %mm7, %mm0              C                               m 3
+       movd    4(%eax), %mm1           C                               m 3
+       pmuludq %mm7, %mm1              C                               m 3
+       movd    8(%eax), %mm4           C                               m 3
+       jmp     L(lpm3)                 C                               m 3
+       ALIGN(16)                       C                               m 3
+L(lpm3):
+       pmuludq %mm7, %mm4              C                               m 3
+       paddq   %mm0, %mm6              C                               m 3
+       movd    12(%eax), %mm3          C                               m 3
+       movd    %mm6, (%edx)            C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       pmuludq %mm7, %mm3              C                               m 3
+       paddq   %mm1, %mm6              C                               m 3
+       movd    16(%eax), %mm0          C                               m 3
+       movd    %mm6, 4(%edx)           C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       pmuludq %mm7, %mm0              C                               m 3
+       paddq   %mm4, %mm6              C                               m 3
+       movd    20(%eax), %mm1          C                               m 3
+       movd    %mm6, 8(%edx)           C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       pmuludq %mm7, %mm1              C                               m 3
+       paddq   %mm3, %mm6              C                               m 3
+       movd    24(%eax), %mm4          C                               m 3
+       movd    %mm6, 12(%edx)          C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       lea     16(%eax), %eax          C                               m 3
+       lea     16(%edx), %edx          C                               m 3
+       add     $4, %ecx                C                               m 3
+       ja      L(lpm3)                 C                               m 3
+       pmuludq %mm7, %mm4              C                               m 3
+       paddq   %mm0, %mm6              C                               m 3
+       movd    %mm6, (%edx)            C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       paddq   %mm1, %mm6              C                               m 3
+       mov     16(%esp), %edi          C rp                              3
+       jmp     L(x3)
+
+L(olp3):
+       lea     4(%edi), %edi           C                               am 3
+       movd    (%esi), %mm7            C                               am 3
+       lea     4(%esi), %esi           C                               am 3
+       mov     %edi, %edx              C rp                            am 3
+       mov     20(%esp), %eax          C up                            am 3
+       movd    (%eax), %mm0            C                               am 3
+       mov     24(%esp), %ecx          C inner loop count              am 3
+       pxor    %mm6, %mm6              C                               am 3
+       pmuludq %mm7, %mm0              C                               am 3
+       movd    4(%eax), %mm1           C                               am 3
+       movd    (%edx), %mm4            C                               am 3
+       pmuludq %mm7, %mm1              C                               am 3
+       movd    8(%eax), %mm2           C                               am 3
+       paddq   %mm0, %mm4              C                               am 3
+       movd    4(%edx), %mm5           C                               am 3
+       jmp     L(lam3)                 C                               am 3
+       ALIGN(16)                       C                               am 3
+L(lam3):
+       pmuludq %mm7, %mm2              C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       movd    12(%eax), %mm3          C                               am 3
+       paddq   %mm1, %mm5              C                               am 3
+       movd    8(%edx), %mm4           C                               am 3
+       movd    %mm6, (%edx)            C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       pmuludq %mm7, %mm3              C                               am 3
+       paddq   %mm5, %mm6              C                               am 3
+       movd    16(%eax), %mm0          C                               am 3
+       paddq   %mm2, %mm4              C                               am 3
+       movd    12(%edx), %mm5          C                               am 3
+       movd    %mm6, 4(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       pmuludq %mm7, %mm0              C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       movd    20(%eax), %mm1          C                               am 3
+       paddq   %mm3, %mm5              C                               am 3
+       movd    16(%edx), %mm4          C                               am 3
+       movd    %mm6, 8(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       pmuludq %mm7, %mm1              C                               am 3
+       paddq   %mm5, %mm6              C                               am 3
+       movd    24(%eax), %mm2          C                               am 3
+       paddq   %mm0, %mm4              C                               am 3
+       movd    20(%edx), %mm5          C                               am 3
+       movd    %mm6, 12(%edx)          C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       lea     16(%eax), %eax          C                               am 3
+       lea     16(%edx), %edx          C                               am 3
+       add     $4, %ecx                C                               am 3
+       jnz     L(lam3)                 C                               am 3
+       pmuludq %mm7, %mm2              C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       paddq   %mm1, %mm5              C                               am 3
+       movd    8(%edx), %mm4           C                               am 3
+       movd    %mm6, (%edx)            C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       paddq   %mm5, %mm6              C                               am 3
+       paddq   %mm2, %mm4              C                               am 3
+L(x3): movd    %mm6, 4(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       movd    %mm6, 8(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       movd    %mm6, 12(%edx)          C                               am 3
+       dec     %ebx                    C                               am 3
+       jnz     L(olp3)                 C                               am 3
+L(oel3):
+       emms                            C                                  3
+       pop     %edi                    C                                  3
+       pop     %ebx                    C                                  3
+       pop     %esi                    C                                  3
+       ret                             C                                  3
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/popcount.asm b/mpn/x86/pentium4/sse2/popcount.asm

new file mode 100644 (file)

index 0000000..41c86ec
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/popcount.asm
@@ -0,0 +1,267 @@
+dnl  X86-32 and X86-64 mpn_popcount using SSE2.
+
+dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C 32-bit                     popcount        hamdist
+C                           cycles/limb     cycles/limb
+C P5:                           -
+C P6 model 0-8,10-12)           -
+C P6 model 9  (Banias)          ?
+C P6 model 13 (Dothan)          4
+C P4 model 0  (Willamette)      ?
+C P4 model 1  (?)               ?
+C P4 model 2  (Northwood)       3.9
+C P4 model 3  (Prescott)        ?
+C P4 model 4  (Nocona)          ?
+C K6:                           -
+C K7:                           -
+C K8:                           ?
+
+C 64-bit                     popcount        hamdist
+C                           cycles/limb     cycles/limb
+C P4 model 4 (Nocona):          8
+C K8:                           7.5
+C K10:                         3.5
+C P6 core2:                    3.68
+C P6 corei7:                   3.15
+
+C TODO
+C  * Make a mpn_hamdist based on this.  Alignment could either be handled by
+C    using movdqu for one operand and movdqa for the other, or by painfully
+C    shifting as we go.  Unfortunately, there seem to be no useable shift
+C    instruction, except for one that takes an immediate count.
+C  * It would probably be possible to cut a few cycles/limb using software
+C    pipelining.
+C  * There are 35 decode slots unused by the SSE2 instructions.  Loop control
+C    needs just 2 or 3 slots, leaving around 32 slots.  This allows a parallel
+C    integer based popcount.  Such a combined loop would handle 6 limbs in
+C    about 30 cycles on K8.
+C  * We could save a byte or two by using 32-bit operations on areg.
+C  * Check if using movdqa to a temp of and then register-based pand is faster.
+
+ifelse(GMP_LIMB_BITS,`32',
+`      define(`up',  `%edx')
+       define(`n',   `%ecx')
+       define(`areg',`%eax')
+       define(`breg',`%ebx')
+       define(`zero',`%xmm4')
+       define(`LIMB32',`       $1')
+       define(`LIMB64',`dnl')
+',`
+       define(`up',  `%rdi')
+       define(`n',   `%rsi')
+       define(`areg',`%rax')
+       define(`breg',`%rdx')
+       define(`zero',`%xmm8')
+       define(`LIMB32',`dnl')
+       define(`LIMB64',`       $1')
+')
+
+define(`mm01010101',`%xmm6')
+define(`mm00110011',`%xmm7')
+define(`mm00001111',`%xmm2')
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_XMM',  eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2XMM', eval(32/GMP_LIMB_BYTES))
+
+undefine(`psadbw')                     C override inherited m4 version
+
+ASM_START()
+
+C Make cnsts global to work around Apple relocation bug.
+ifdef(`DARWIN',`
+       define(`cnsts', MPN(popccnsts))
+       GLOBL   cnsts')
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_popcount)
+
+LIMB32(`mov    4(%esp), up     ')
+LIMB32(`mov    8(%esp), n      ')
+LIMB32(`push   %ebx            ')
+
+       pxor    %xmm3, %xmm3            C zero grand total count
+LIMB64(`pxor   zero, zero      ')
+ifdef(`PIC',`
+       LEA(    cnsts, breg)
+',`
+LIMB32(`mov    $cnsts, breg    ')
+LIMB64(`movabs $cnsts, breg    ')
+')
+
+       movdqa  -48(breg), mm01010101
+       movdqa  -32(breg), mm00110011
+       movdqa  -16(breg), mm00001111
+
+       mov     up, areg
+       and     $-16, up                C round `up' down to 128-bit boundary
+       and     $12, areg               C 32:areg = 0, 4, 8, 12
+                                       C 64:areg = 0, 8
+       movdqa  (up), %xmm0
+       pand    64(breg,areg,4), %xmm0
+       shr     $m4_log2(GMP_LIMB_BYTES), %eax
+       add     areg, n                 C compensate n for rounded down `up'
+
+       pxor    %xmm4, %xmm4
+       sub     $LIMBS_PER_XMM, n
+       jbe     L(sum)
+
+       sub     $LIMBS_PER_XMM, n
+       ja      L(ent)
+       jmp     L(lsum)
+
+       ALIGN(16)
+L(top):        movdqa  (up), %xmm0
+L(ent):        movdqa  16(up), %xmm4
+
+       movdqa  %xmm0, %xmm1
+       movdqa  %xmm4, %xmm5
+       psrld   $1, %xmm0
+       psrld   $1, %xmm4
+       pand    mm01010101, %xmm0
+       pand    mm01010101, %xmm4
+       psubd   %xmm0, %xmm1
+       psubd   %xmm4, %xmm5
+
+       movdqa  %xmm1, %xmm0
+       movdqa  %xmm5, %xmm4
+       psrlq   $2, %xmm1
+       psrlq   $2, %xmm5
+       pand    mm00110011, %xmm0
+       pand    mm00110011, %xmm4
+       pand    mm00110011, %xmm1
+       pand    mm00110011, %xmm5
+       paddq   %xmm0, %xmm1
+       paddq   %xmm4, %xmm5
+
+LIMB32(`pxor   zero, zero      ')
+
+       add     $32, up
+       sub     $LIMBS_PER_2XMM, n
+
+       paddq   %xmm5, %xmm1
+       movdqa  %xmm1, %xmm0
+       psrlq   $4, %xmm1
+       pand    mm00001111, %xmm0
+       pand    mm00001111, %xmm1
+       paddq   %xmm0, %xmm1
+
+       psadbw  zero, %xmm1
+       paddq   %xmm1, %xmm3            C add to grand total
+
+       jnc     L(top)
+L(end):
+       add     $LIMBS_PER_2XMM, n
+       jz      L(rt)
+       movdqa  (up), %xmm0
+       pxor    %xmm4, %xmm4
+       sub     $LIMBS_PER_XMM, n
+       jbe     L(sum)
+L(lsum):
+       movdqa  %xmm0, %xmm4
+       movdqa  16(up), %xmm0
+L(sum):
+       shl     $m4_log2(GMP_LIMB_BYTES), n
+       and     $12, n
+       pand    (breg,n,4), %xmm0
+
+       movdqa  %xmm0, %xmm1
+       movdqa  %xmm4, %xmm5
+       psrld   $1, %xmm0
+       psrld   $1, %xmm4
+       pand    mm01010101, %xmm0
+       pand    mm01010101, %xmm4
+       psubd   %xmm0, %xmm1
+       psubd   %xmm4, %xmm5
+
+       movdqa  %xmm1, %xmm0
+       movdqa  %xmm5, %xmm4
+       psrlq   $2, %xmm1
+       psrlq   $2, %xmm5
+       pand    mm00110011, %xmm0
+       pand    mm00110011, %xmm4
+       pand    mm00110011, %xmm1
+       pand    mm00110011, %xmm5
+       paddq   %xmm0, %xmm1
+       paddq   %xmm4, %xmm5
+
+LIMB32(`pxor   zero, zero      ')
+
+       paddq   %xmm5, %xmm1
+       movdqa  %xmm1, %xmm0
+       psrlq   $4, %xmm1
+       pand    mm00001111, %xmm0
+       pand    mm00001111, %xmm1
+       paddq   %xmm0, %xmm1
+
+       psadbw  zero, %xmm1
+       paddq   %xmm1, %xmm3            C add to grand total
+
+
+C Add the two 64-bit halves of the grand total counter
+L(rt): movdqa  %xmm3, %xmm0
+       psrldq  $8, %xmm3
+       paddq   %xmm3, %xmm0
+       movd    %xmm0, areg             C movq avoided due to gas bug
+
+LIMB32(`pop    %ebx            ')
+       ret
+
+EPILOGUE()
+DEF_OBJECT(dummy,16)
+C Three magic constants used for masking out bits
+       .byte   0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
+       .byte   0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
+
+       .byte   0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+       .byte   0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+
+       .byte   0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
+       .byte   0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
+cnsts:
+C Masks for high end of number
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+C Masks for low end of number
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+       .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+       .byte   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+       .byte   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff
+END_OBJECT(dummy)
diff --git a/mpn/x86/pentium4/sse2/rsh1add_n.asm b/mpn/x86/pentium4/sse2/rsh1add_n.asm

new file mode 100644 (file)

index 0000000..bbf4324
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/rsh1add_n.asm
@@ -0,0 +1,115 @@
+dnl  Intel Pentium-4 mpn_rsh1add_n -- mpn (x+y)/2
+
+dnl  Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C        cycles/limb (approx)
+C      dst!=src1,2  dst==src1  dst==src2
+C P4:      4.5         6.5        6.5
+
+
+C mp_limb_t mpn_rsh1add_n (mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+C                          mp_size_t size);
+C
+C The slightly strange combination of indexing and pointer incrementing
+C that's used seems to work best.  Not sure why, but for instance leal
+C incrementing on %esi is a 1 or 2 cycle slowdown.
+C
+C The dependent chain is paddq combining the carry and next (shifted) part,
+C plus psrlq to move the new carry down.  That, and just 4 mmx instructions
+C in total, makes 4 c/l the target speed, which is almost achieved for
+C separate src/dst but when src==dst the write combining anomalies slow it
+C down.
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_YP,   12)
+defframe(PARAM_XP,   8)
+defframe(PARAM_WP,   4)
+
+dnl  re-use parameter space
+define(SAVE_EBX,`PARAM_XP')
+define(SAVE_ESI,`PARAM_YP')
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_rsh1add_n)
+deflit(`FRAME',0)
+
+       movl    PARAM_XP, %edx
+       movl    %ebx, SAVE_EBX
+
+       movl    PARAM_YP, %ebx
+       movl    %esi, SAVE_ESI
+
+       movl    PARAM_WP, %esi
+
+       movd    (%edx), %mm0            C xp[0]
+
+       movd    (%ebx), %mm1            C yp[0]
+       movl    PARAM_SIZE, %ecx
+
+       movl    (%edx), %eax            C xp[0]
+
+       addl    (%ebx), %eax            C xp[0]+yp[0]
+
+       paddq   %mm1, %mm0              C xp[0]+yp[0]
+       leal    (%esi,%ecx,4), %esi     C wp end
+       negl    %ecx                    C -size
+
+       psrlq   $1, %mm0                C (xp[0]+yp[0])/2
+       and     $1, %eax                C return value, rsh1 bit of xp[0]+yp[0]
+       addl    $1, %ecx                C -(size-1)
+       jz      L(done)
+
+
+L(top):
+       C eax   return value
+       C ebx   yp end
+       C ecx   counter, limbs, -(size-1) to -1 inclusive
+       C edx   xp end
+       C esi   wp end
+       C mm0   carry (32 bits)
+
+       movd    4(%edx), %mm1   C xp[i+1]
+       movd    4(%ebx), %mm2   C yp[i+1]
+       leal    4(%edx), %edx
+       leal    4(%ebx), %ebx
+       paddq   %mm2, %mm1              C xp[i+1]+yp[i+1]
+       psllq   $31, %mm1               C low bit at 31, further 32 above
+
+       paddq   %mm1, %mm0              C 31 and carry from prev add
+       movd    %mm0, -4(%esi,%ecx,4)   C low ready to store dst[i]
+
+       psrlq   $32, %mm0               C high becomes new carry
+
+       addl    $1, %ecx
+       jnz     L(top)
+
+
+L(done):
+       movd    %mm0, -4(%esi)          C dst[size-1]
+       movl    SAVE_EBX, %ebx
+
+       movl    SAVE_ESI, %esi
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/sqr_basecase.asm b/mpn/x86/pentium4/sse2/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..a10859a
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/sqr_basecase.asm
@@ -0,0 +1,694 @@
+dnl  mpn_sqr_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Improve ad-hoc outer loop code and register handling.  Some feed-in
+C    scheduling could improve things by several cycles per outer iteration.
+C  * In Lam3...Lam1 code for, keep accumulation operands in registers, without
+C    storing intermediates to rp.
+C  * We might want to keep 32 in a free mm register, since the register form is
+C    3 bytes and the immediate form is 4 bytes.  About 80 bytes to save.
+C  * Look into different loop alignment, we now expand the code about 50 bytes
+C    with possibly needless alignment.
+C  * Use OSP, should solve feed-in latency problems.
+C  * Address relative slowness for un<=3 for Pentium M.  The old code is there
+C    considerably faster.  (1:20/14, 2:34:32, 3:66/57)
+
+C INPUT PARAMETERS
+C rp           sp + 4
+C up           sp + 8
+C un           sp + 12
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_sqr_basecase)
+       mov     4(%esp), %edx           C rp
+       mov     8(%esp), %eax           C up
+       mov     12(%esp), %ecx          C un
+
+       cmp     $2, %ecx
+       jc      L(un1)
+       jz      L(un2)
+       cmp     $4, %ecx
+       jc      L(un3)
+       jz      L(un4)
+       jmp     L(big)
+
+L(un1):        mov     (%eax), %eax
+       mov     %edx, %ecx
+       mul     %eax
+       mov     %eax, (%ecx)
+       mov     %edx, 4(%ecx)
+       ret
+L(un2):        movd    (%eax), %mm0            C                               un=2
+       movd    (%eax), %mm2            C                               un=2
+       movd    4(%eax), %mm1           C                               un=2
+       pmuludq %mm0, %mm0              C 64b weight 0                  un=2
+       pmuludq %mm1, %mm2              C 64b weight 32                 un=2
+       pmuludq %mm1, %mm1              C 64b weight 64                 un=2
+       movd    %mm0, (%edx)            C                               un=2
+       psrlq   $32, %mm0               C 32b weight 32                 un=2
+       pcmpeqd %mm7, %mm7              C                               un=2
+       psrlq   $33, %mm7               C 0x000000007FFFFFFF            un=2
+       pand    %mm2, %mm7              C 31b weight 32                 un=2
+       psrlq   $31, %mm2               C 33b weight 65                 un=2
+       psllq   $1, %mm7                C 31b weight 33                 un=2
+       paddq   %mm7, %mm0              C                               un=2
+       movd    %mm0, 4(%edx)           C                               un=2
+       psrlq   $32, %mm0               C                               un=2
+       paddq   %mm2, %mm1              C                               un=2
+       paddq   %mm0, %mm1              C                               un=2
+       movd    %mm1, 8(%edx)           C                               un=2
+       psrlq   $32, %mm1               C                               un=2
+       movd    %mm1, 12(%edx)          C                               un=2
+       emms
+       ret
+L(un3):        movd    (%eax), %mm7            C                               un=3
+       movd    4(%eax), %mm6           C                               un=3
+       pmuludq %mm7, %mm6              C                               un=3
+       movd    8(%eax), %mm2           C                               un=3
+       pmuludq %mm7, %mm2              C                               un=3
+       movd    %mm6, 4(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       paddq   %mm2, %mm6              C                               un=3
+       movd    %mm6, 8(%edx)           C                               un=3
+       psrlq   $32, %mm6               C                               un=3
+       movd    %mm6, 12(%edx)          C                               un=3
+       lea     4(%edx), %edx           C                               un=3
+       lea     4(%eax), %eax           C                               un=3
+       jmp     L(am1)
+L(un4):        movd    (%eax), %mm7            C                               un=4
+       movd    4(%eax), %mm6           C                               un=4
+       pmuludq %mm7, %mm6              C                               un=4
+       movd    8(%eax), %mm0           C                               un=4
+       pmuludq %mm7, %mm0              C                               un=4
+       movd    12(%eax), %mm1          C                               un=4
+       pmuludq %mm7, %mm1              C                               un=4
+       movd    %mm6, 4(%edx)           C                               un=4
+       psrlq   $32, %mm6               C                               un=4
+       paddq   %mm0, %mm6              C                               un=4
+       movd    %mm6, 8(%edx)           C                               un=4
+       psrlq   $32, %mm6               C                               un=4
+       paddq   %mm1, %mm6              C                               un=4
+       movd    %mm6, 12(%edx)          C                               un=4
+       psrlq   $32, %mm6               C                               un=4
+       movd    %mm6, 16(%edx)          C                               un=4
+       lea     4(%edx), %edx           C                               un=4
+       lea     4(%eax), %eax           C                               un=4
+       jmp     L(am2)
+
+L(big):        push    %esi
+       push    %ebx
+       push    %edi
+       pxor    %mm6, %mm6
+       movd    (%eax), %mm7            C
+       lea     4(%eax), %esi           C init up, up++
+       lea     4(%eax), %eax           C up2++  FIXME: should fix offsets
+       lea     4(%edx), %edi           C init rp, rp++
+       lea     4(%edx), %edx           C rp2++
+       lea     -4(%ecx), %ebx          C loop count
+       and     $3, %ecx
+       jz      L(3m)
+       cmp     $2, %ecx
+       ja      L(2m)
+       jb      L(0m)
+
+L(1m):
+       movd    (%eax), %mm4            C                               m 1
+       lea     (%ebx), %ecx            C inner loop count              m 1
+       pmuludq %mm7, %mm4              C                               m 1
+       movd    4(%eax), %mm3           C                               m 1
+       pmuludq %mm7, %mm3              C                               m 1
+       movd    8(%eax), %mm0           C                               m 1
+       jmp     L(m01)                  C                               m 1
+       ALIGN(16)                       C                               m 1
+L(lpm1):
+       pmuludq %mm7, %mm4              C                               m 1
+       paddq   %mm0, %mm6              C                               m 1
+       movd    4(%eax), %mm3           C                               m 1
+       movd    %mm6, -8(%edx)          C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       pmuludq %mm7, %mm3              C                               m 1
+       paddq   %mm1, %mm6              C                               m 1
+       movd    8(%eax), %mm0           C                               m 1
+       movd    %mm6, -4(%edx)          C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+L(m01):        pmuludq %mm7, %mm0              C                               m 1
+       paddq   %mm4, %mm6              C                               m 1
+       movd    12(%eax), %mm1          C                               m 1
+       movd    %mm6, (%edx)            C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       pmuludq %mm7, %mm1              C                               m 1
+       paddq   %mm3, %mm6              C                               m 1
+       movd    16(%eax), %mm4          C                               m 1
+       movd    %mm6, 4(%edx)           C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       lea     16(%eax), %eax          C                               m 1
+       lea     16(%edx), %edx          C                               m 1
+       sub     $4, %ecx                C                               m 1
+       ja      L(lpm1)                 C                               m 1
+       pmuludq %mm7, %mm4              C                               m 1
+       paddq   %mm0, %mm6              C                               m 1
+       movd    %mm6, -8(%edx)          C                               m 1
+       psrlq   $32, %mm6               C                               m 1
+       paddq   %mm1, %mm6              C                               m 1
+       jmp     L(0)
+
+L(2m):
+       movd    (%eax), %mm1            C                               m 2
+       lea     (%ebx), %ecx            C inner loop count              m 2
+       pmuludq %mm7, %mm1              C                               m 2
+       movd    4(%eax), %mm4           C                               m 2
+       pmuludq %mm7, %mm4              C                               m 2
+       movd    8(%eax), %mm3           C                               m 2
+       jmp     L(m10)                  C                               m 2
+       ALIGN(16)                       C                               m 2
+L(lpm2):
+       pmuludq %mm7, %mm4              C                               m 2
+       paddq   %mm0, %mm6              C                               m 2
+       movd    8(%eax), %mm3           C                               m 2
+       movd    %mm6, -4(%edx)          C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+L(m10):        pmuludq %mm7, %mm3              C                               m 2
+       paddq   %mm1, %mm6              C                               m 2
+       movd    12(%eax), %mm0          C                               m 2
+       movd    %mm6, (%edx)            C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       pmuludq %mm7, %mm0              C                               m 2
+       paddq   %mm4, %mm6              C                               m 2
+       movd    16(%eax), %mm1          C                               m 2
+       movd    %mm6, 4(%edx)           C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       pmuludq %mm7, %mm1              C                               m 2
+       paddq   %mm3, %mm6              C                               m 2
+       movd    20(%eax), %mm4          C                               m 2
+       movd    %mm6, 8(%edx)           C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       lea     16(%eax), %eax          C                               m 2
+       lea     16(%edx), %edx          C                               m 2
+       sub     $4, %ecx                C                               m 2
+       ja      L(lpm2)                 C                               m 2
+       pmuludq %mm7, %mm4              C                               m 2
+       paddq   %mm0, %mm6              C                               m 2
+       movd    %mm6, -4(%edx)          C                               m 2
+       psrlq   $32, %mm6               C                               m 2
+       paddq   %mm1, %mm6              C                               m 2
+       jmp     L(1)
+
+L(3m):
+       movd    (%eax), %mm0            C                               m 3
+       lea     (%ebx), %ecx            C inner loop count              m 3
+       pmuludq %mm7, %mm0              C                               m 3
+       movd    4(%eax), %mm1           C                               m 3
+       pmuludq %mm7, %mm1              C                               m 3
+       movd    8(%eax), %mm4           C                               m 3
+       jmp     L(lpm3)                 C                               m 3
+       ALIGN(16)                       C                               m 3
+L(lpm3):
+       pmuludq %mm7, %mm4              C                               m 3
+       paddq   %mm0, %mm6              C                               m 3
+       movd    12(%eax), %mm3          C                               m 3
+       movd    %mm6, (%edx)            C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       pmuludq %mm7, %mm3              C                               m 3
+       paddq   %mm1, %mm6              C                               m 3
+       movd    16(%eax), %mm0          C                               m 3
+       movd    %mm6, 4(%edx)           C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       pmuludq %mm7, %mm0              C                               m 3
+       paddq   %mm4, %mm6              C                               m 3
+       movd    20(%eax), %mm1          C                               m 3
+       movd    %mm6, 8(%edx)           C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       pmuludq %mm7, %mm1              C                               m 3
+       paddq   %mm3, %mm6              C                               m 3
+       movd    24(%eax), %mm4          C                               m 3
+       movd    %mm6, 12(%edx)          C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       lea     16(%eax), %eax          C                               m 3
+       lea     16(%edx), %edx          C                               m 3
+       sub     $4, %ecx                C                               m 3
+       ja      L(lpm3)                 C                               m 3
+       pmuludq %mm7, %mm4              C                               m 3
+       paddq   %mm0, %mm6              C                               m 3
+       movd    %mm6, (%edx)            C                               m 3
+       psrlq   $32, %mm6               C                               m 3
+       paddq   %mm1, %mm6              C                               m 3
+       jmp     L(2)
+
+L(0m):
+       movd    (%eax), %mm3            C                               m 0
+       lea     (%ebx), %ecx            C inner loop count              m 0
+       pmuludq %mm7, %mm3              C                               m 0
+       movd    4(%eax), %mm0           C                               m 0
+       pmuludq %mm7, %mm0              C                               m 0
+       movd    8(%eax), %mm1           C                               m 0
+       jmp     L(m00)                  C                               m 0
+       ALIGN(16)                       C                               m 0
+L(lpm0):
+       pmuludq %mm7, %mm4              C                               m 0
+       paddq   %mm0, %mm6              C                               m 0
+       movd    (%eax), %mm3            C                               m 0
+       movd    %mm6, -12(%edx)         C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       pmuludq %mm7, %mm3              C                               m 0
+       paddq   %mm1, %mm6              C                               m 0
+       movd    4(%eax), %mm0           C                               m 0
+       movd    %mm6, -8(%edx)          C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       pmuludq %mm7, %mm0              C                               m 0
+       paddq   %mm4, %mm6              C                               m 0
+       movd    8(%eax), %mm1           C                               m 0
+       movd    %mm6, -4(%edx)          C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+L(m00):        pmuludq %mm7, %mm1              C                               m 0
+       paddq   %mm3, %mm6              C                               m 0
+       movd    12(%eax), %mm4          C                               m 0
+       movd    %mm6, (%edx)            C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       lea     16(%eax), %eax          C                               m 0
+       lea     16(%edx), %edx          C                               m 0
+       sub     $4, %ecx                C                               m 0
+       ja      L(lpm0)                 C                               m 0
+       pmuludq %mm7, %mm4              C                               m 0
+       paddq   %mm0, %mm6              C                               m 0
+       movd    %mm6, -12(%edx)         C                               m 0
+       psrlq   $32, %mm6               C                               m 0
+       paddq   %mm1, %mm6              C                               m 0
+       jmp     L(3)
+
+L(outer):
+       lea     8(%edi), %edi           C rp += 2
+       movd    (%esi), %mm7            C                               am 3
+       mov     %edi, %edx              C rp2 = rp                      am 3
+       lea     4(%esi), %esi           C up++                          am 3
+       lea     (%esi), %eax            C up2 = up                      am 3
+       movd    (%eax), %mm0            C                               am 3
+       lea     (%ebx), %ecx            C inner loop count              am 3
+       pxor    %mm6, %mm6              C                               am 3
+       pmuludq %mm7, %mm0              C                               am 3
+       movd    4(%eax), %mm1           C                               am 3
+       movd    (%edx), %mm4            C                               am 3
+       pmuludq %mm7, %mm1              C                               am 3
+       movd    8(%eax), %mm2           C                               am 3
+       paddq   %mm0, %mm4              C                               am 3
+       movd    4(%edx), %mm5           C                               am 3
+       jmp     L(lam3)                 C                               am 3
+       ALIGN(16)                       C                               am 3
+L(lam3):
+       pmuludq %mm7, %mm2              C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       movd    12(%eax), %mm3          C                               am 3
+       paddq   %mm1, %mm5              C                               am 3
+       movd    8(%edx), %mm4           C                               am 3
+       movd    %mm6, (%edx)            C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       pmuludq %mm7, %mm3              C                               am 3
+       paddq   %mm5, %mm6              C                               am 3
+       movd    16(%eax), %mm0          C                               am 3
+       paddq   %mm2, %mm4              C                               am 3
+       movd    12(%edx), %mm5          C                               am 3
+       movd    %mm6, 4(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       pmuludq %mm7, %mm0              C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       movd    20(%eax), %mm1          C                               am 3
+       paddq   %mm3, %mm5              C                               am 3
+       movd    16(%edx), %mm4          C                               am 3
+       movd    %mm6, 8(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       pmuludq %mm7, %mm1              C                               am 3
+       paddq   %mm5, %mm6              C                               am 3
+       movd    24(%eax), %mm2          C                               am 3
+       paddq   %mm0, %mm4              C                               am 3
+       movd    20(%edx), %mm5          C                               am 3
+       movd    %mm6, 12(%edx)          C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       lea     16(%eax), %eax          C                               am 3
+       lea     16(%edx), %edx          C                               am 3
+       sub     $4, %ecx                C                               am 3
+       ja      L(lam3)                 C                               am 3
+       pmuludq %mm7, %mm2              C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       paddq   %mm1, %mm5              C                               am 3
+       movd    8(%edx), %mm4           C                               am 3
+       movd    %mm6, (%edx)            C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       paddq   %mm5, %mm6              C                               am 3
+       paddq   %mm2, %mm4              C                               am 3
+L(2):  movd    %mm6, 4(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       paddq   %mm4, %mm6              C                               am 3
+       movd    %mm6, 8(%edx)           C                               am 3
+       psrlq   $32, %mm6               C                               am 3
+       movd    %mm6, 12(%edx)          C                               am 3
+
+       lea     8(%edi), %edi           C rp += 2
+       movd    (%esi), %mm7            C                               am 2
+       mov     %edi, %edx              C rp2 = rp                      am 2
+       lea     4(%esi), %esi           C up++                          am 2
+       lea     (%esi), %eax            C up2 = up                      am 2
+       movd    (%eax), %mm1            C                               am 2
+       lea     (%ebx), %ecx            C inner loop count              am 2
+       pxor    %mm6, %mm6              C                               am 2
+       pmuludq %mm7, %mm1              C                               am 2
+       movd    4(%eax), %mm2           C                               am 2
+       movd    (%edx), %mm5            C                               am 2
+       pmuludq %mm7, %mm2              C                               am 2
+       movd    8(%eax), %mm3           C                               am 2
+       paddq   %mm1, %mm5              C                               am 2
+       movd    4(%edx), %mm4           C                               am 2
+       jmp     L(am10)                 C                               am 2
+       ALIGN(16)                       C                               am 2
+L(lam2):
+       pmuludq %mm7, %mm2              C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       movd    8(%eax), %mm3           C                               am 2
+       paddq   %mm1, %mm5              C                               am 2
+       movd    4(%edx), %mm4           C                               am 2
+       movd    %mm6, -4(%edx)          C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+L(am10):
+       pmuludq %mm7, %mm3              C                               am 2
+       paddq   %mm5, %mm6              C                               am 2
+       movd    12(%eax), %mm0          C                               am 2
+       paddq   %mm2, %mm4              C                               am 2
+       movd    8(%edx), %mm5           C                               am 2
+       movd    %mm6, (%edx)            C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       pmuludq %mm7, %mm0              C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       movd    16(%eax), %mm1          C                               am 2
+       paddq   %mm3, %mm5              C                               am 2
+       movd    12(%edx), %mm4          C                               am 2
+       movd    %mm6, 4(%edx)           C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       pmuludq %mm7, %mm1              C                               am 2
+       paddq   %mm5, %mm6              C                               am 2
+       movd    20(%eax), %mm2          C                               am 2
+       paddq   %mm0, %mm4              C                               am 2
+       movd    16(%edx), %mm5          C                               am 2
+       movd    %mm6, 8(%edx)           C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       lea     16(%eax), %eax          C                               am 2
+       lea     16(%edx), %edx          C                               am 2
+       sub     $4, %ecx                C                               am 2
+       ja      L(lam2)                 C                               am 2
+       pmuludq %mm7, %mm2              C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       paddq   %mm1, %mm5              C                               am 2
+       movd    4(%edx), %mm4           C                               am 2
+       movd    %mm6, -4(%edx)          C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       paddq   %mm5, %mm6              C                               am 2
+       paddq   %mm2, %mm4              C                               am 2
+L(1):  movd    %mm6, (%edx)            C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       paddq   %mm4, %mm6              C                               am 2
+       movd    %mm6, 4(%edx)           C                               am 2
+       psrlq   $32, %mm6               C                               am 2
+       movd    %mm6, 8(%edx)           C                               am 2
+
+       lea     8(%edi), %edi           C rp += 2
+       movd    (%esi), %mm7            C                               am 1
+       mov     %edi, %edx              C rp2 = rp                      am 1
+       lea     4(%esi), %esi           C up++                          am 1
+       lea     (%esi), %eax            C up2 = up                      am 1
+       movd    (%eax), %mm2            C                               am 1
+       lea     (%ebx), %ecx            C inner loop count              am 1
+       pxor    %mm6, %mm6              C                               am 1
+       pmuludq %mm7, %mm2              C                               am 1
+       movd    4(%eax), %mm3           C                               am 1
+       movd    (%edx), %mm4            C                               am 1
+       pmuludq %mm7, %mm3              C                               am 1
+       movd    8(%eax), %mm0           C                               am 1
+       paddq   %mm2, %mm4              C                               am 1
+       movd    4(%edx), %mm5           C                               am 1
+       jmp     L(am01)                 C                               am 1
+       ALIGN(16)                       C                               am 1
+L(lam1):
+       pmuludq %mm7, %mm2              C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       movd    4(%eax), %mm3           C                               am 1
+       paddq   %mm1, %mm5              C                               am 1
+       movd    (%edx), %mm4            C                               am 1
+       movd    %mm6, -8(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       pmuludq %mm7, %mm3              C                               am 1
+       paddq   %mm5, %mm6              C                               am 1
+       movd    8(%eax), %mm0           C                               am 1
+       paddq   %mm2, %mm4              C                               am 1
+       movd    4(%edx), %mm5           C                               am 1
+       movd    %mm6, -4(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+L(am01):
+       pmuludq %mm7, %mm0              C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       movd    12(%eax), %mm1          C                               am 1
+       paddq   %mm3, %mm5              C                               am 1
+       movd    8(%edx), %mm4           C                               am 1
+       movd    %mm6, (%edx)            C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       pmuludq %mm7, %mm1              C                               am 1
+       paddq   %mm5, %mm6              C                               am 1
+       movd    16(%eax), %mm2          C                               am 1
+       paddq   %mm0, %mm4              C                               am 1
+       movd    12(%edx), %mm5          C                               am 1
+       movd    %mm6, 4(%edx)           C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       lea     16(%eax), %eax          C                               am 1
+       lea     16(%edx), %edx          C                               am 1
+       sub     $4, %ecx                C                               am 1
+       ja      L(lam1)                 C                               am 1
+       pmuludq %mm7, %mm2              C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       paddq   %mm1, %mm5              C                               am 1
+       movd    (%edx), %mm4            C                               am 1
+       movd    %mm6, -8(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       paddq   %mm5, %mm6              C                               am 1
+       paddq   %mm2, %mm4              C                               am 1
+L(0):  movd    %mm6, -4(%edx)          C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       paddq   %mm4, %mm6              C                               am 1
+       movd    %mm6, (%edx)            C                               am 1
+       psrlq   $32, %mm6               C                               am 1
+       movd    %mm6, 4(%edx)           C                               am 1
+
+       lea     8(%edi), %edi           C rp += 2
+       movd    (%esi), %mm7            C                               am 0
+       mov     %edi, %edx              C rp2 = rp                      am 0
+       lea     4(%esi), %esi           C up++                          am 0
+       lea     (%esi), %eax            C up2 = up                      am 0
+       movd    (%eax), %mm3            C                               am 0
+       lea     (%ebx), %ecx            C inner loop count              am 0
+       pxor    %mm6, %mm6              C                               am 0
+       pmuludq %mm7, %mm3              C                               am 0
+       movd    4(%eax), %mm0           C                               am 0
+       movd    (%edx), %mm5            C                               am 0
+       pmuludq %mm7, %mm0              C                               am 0
+       movd    8(%eax), %mm1           C                               am 0
+       paddq   %mm3, %mm5              C                               am 0
+       movd    4(%edx), %mm4           C                               am 0
+       jmp     L(am00)                 C                               am 0
+       ALIGN(16)                       C                               am 0
+L(lam0):
+       pmuludq %mm7, %mm2              C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       movd    (%eax), %mm3            C                               am 0
+       paddq   %mm1, %mm5              C                               am 0
+       movd    -4(%edx), %mm4          C                               am 0
+       movd    %mm6, -12(%edx)         C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       pmuludq %mm7, %mm3              C                               am 0
+       paddq   %mm5, %mm6              C                               am 0
+       movd    4(%eax), %mm0           C                               am 0
+       paddq   %mm2, %mm4              C                               am 0
+       movd    (%edx), %mm5            C                               am 0
+       movd    %mm6, -8(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       pmuludq %mm7, %mm0              C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       movd    8(%eax), %mm1           C                               am 0
+       paddq   %mm3, %mm5              C                               am 0
+       movd    4(%edx), %mm4           C                               am 0
+       movd    %mm6, -4(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+L(am00):
+       pmuludq %mm7, %mm1              C                               am 0
+       paddq   %mm5, %mm6              C                               am 0
+       movd    12(%eax), %mm2          C                               am 0
+       paddq   %mm0, %mm4              C                               am 0
+       movd    8(%edx), %mm5           C                               am 0
+       movd    %mm6, (%edx)            C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       lea     16(%eax), %eax          C                               am 0
+       lea     16(%edx), %edx          C                               am 0
+       sub     $4, %ecx                C                               am 0
+       ja      L(lam0)                 C                               am 0
+       pmuludq %mm7, %mm2              C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       paddq   %mm1, %mm5              C                               am 0
+       movd    -4(%edx), %mm4          C                               am 0
+       movd    %mm6, -12(%edx)         C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       paddq   %mm5, %mm6              C                               am 0
+       paddq   %mm2, %mm4              C                               am 0
+L(3):  movd    %mm6, -8(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       paddq   %mm4, %mm6              C                               am 0
+       movd    %mm6, -4(%edx)          C                               am 0
+       psrlq   $32, %mm6               C                               am 0
+       movd    %mm6, (%edx)            C                               am 0
+       sub     $4, %ebx                C                               am 0
+       ja      L(outer)                        C                               am 0
+
+       mov     %edi, %edx
+       mov     %esi, %eax
+       pop     %edi
+       pop     %ebx
+       pop     %esi
+
+L(am3):        C up[un-1..un-3] x up[un-4]
+       lea     8(%edx), %edx           C rp2 += 2
+       movd    (%eax), %mm7
+       movd    4(%eax), %mm1
+       movd    8(%eax), %mm2
+       movd    12(%eax), %mm3
+       movd    (%edx), %mm4
+       pmuludq %mm7, %mm1
+       movd    4(%edx), %mm5
+       pmuludq %mm7, %mm2
+       movd    8(%edx), %mm6
+       pmuludq %mm7, %mm3
+       paddq   %mm1, %mm4
+       paddq   %mm2, %mm5
+       paddq   %mm3, %mm6
+       movd    %mm4, (%edx)
+       psrlq   $32, %mm4
+       paddq   %mm5, %mm4
+       movd    %mm4, 4(%edx)
+       psrlq   $32, %mm4
+       paddq   %mm6, %mm4
+       movd    %mm4, 8(%edx)
+       psrlq   $32, %mm4
+       movd    %mm4, 12(%edx)          C FIXME feed through!
+       lea     4(%eax), %eax
+
+L(am2):        C up[un-1..un-2] x up[un-3]
+       lea     8(%edx), %edx           C rp2 += 2
+       movd    (%eax), %mm7
+       movd    4(%eax), %mm1
+       movd    8(%eax), %mm2
+       movd    (%edx), %mm4
+       movd    4(%edx), %mm5
+       pmuludq %mm7, %mm1
+       pmuludq %mm7, %mm2
+       paddq   %mm1, %mm4
+       paddq   %mm2, %mm5
+       movd    %mm4, (%edx)
+       psrlq   $32, %mm4
+       paddq   %mm5, %mm4
+       movd    %mm4, 4(%edx)
+       psrlq   $32, %mm4
+       movd    %mm4, 8(%edx)           C FIXME feed through!
+       lea     4(%eax), %eax
+
+L(am1):        C up[un-1] x up[un-2]
+       lea     8(%edx), %edx           C rp2 += 2
+       movd    (%eax), %mm7
+       movd    4(%eax), %mm2
+       movd    (%edx), %mm4
+       pmuludq %mm7, %mm2
+       paddq   %mm2, %mm4
+       movd    %mm4, (%edx)
+       psrlq   $32, %mm4
+       movd    %mm4, 4(%edx)
+
+C *** diag stuff, use elementary code for now
+
+       mov     4(%esp), %edx           C rp
+       mov     8(%esp), %eax           C up
+       mov     12(%esp), %ecx          C un
+
+       movd    (%eax), %mm2
+       pmuludq %mm2, %mm2              C src[0]^2
+
+       pcmpeqd %mm7, %mm7
+       psrlq   $32, %mm7
+
+       movd    4(%edx), %mm3           C dst[1]
+
+       movd    %mm2, (%edx)
+       psrlq   $32, %mm2
+
+       psllq   $1, %mm3                C 2*dst[1]
+       paddq   %mm3, %mm2
+       movd    %mm2, 4(%edx)
+       psrlq   $32, %mm2
+
+       sub     $2, %ecx
+
+L(diag):
+       movd    4(%eax), %mm0           C src limb
+       add     $4, %eax
+       pmuludq %mm0, %mm0
+       movq    %mm7, %mm1
+       pand    %mm0, %mm1              C diagonal low
+       psrlq   $32, %mm0               C diagonal high
+
+       movd    8(%edx), %mm3
+       psllq   $1, %mm3                C 2*dst[i]
+       paddq   %mm3, %mm1
+       paddq   %mm1, %mm2
+       movd    %mm2, 8(%edx)
+       psrlq   $32, %mm2
+
+       movd    12(%edx), %mm3
+       psllq   $1, %mm3                C 2*dst[i+1]
+       paddq   %mm3, %mm0
+       paddq   %mm0, %mm2
+       movd    %mm2, 12(%edx)
+       add     $8, %edx
+       psrlq   $32, %mm2
+
+       sub     $1, %ecx
+       jnz     L(diag)
+
+       movd    4(%eax), %mm0           C src[size-1]
+       pmuludq %mm0, %mm0
+       pand    %mm0, %mm7              C diagonal low
+       psrlq   $32, %mm0               C diagonal high
+
+       movd    8(%edx), %mm3           C dst[2*size-2]
+       psllq   $1, %mm3
+       paddq   %mm3, %mm7
+       paddq   %mm7, %mm2
+       movd    %mm2, 8(%edx)
+       psrlq   $32, %mm2
+
+       paddq   %mm0, %mm2
+       movd    %mm2, 12(%edx)          C dst[2*size-1]
+
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/sub_n.asm b/mpn/x86/pentium4/sse2/sub_n.asm

new file mode 100644 (file)

index 0000000..02d5f01
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/sub_n.asm
@@ -0,0 +1,115 @@
+dnl  Intel Pentium-4 mpn_sub_n -- mpn subtraction.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C                          6.0 cycles/limb if dst==src1 or dst==src2
+C P4 Prescott:             >= 5 cycles/limb
+
+
+C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                      mp_size_t size);
+C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                       mp_size_t size, mp_limb_t carry);
+C
+C The main loop code is 2x unrolled so that the carry bit can alternate
+C between mm0 and mm1.
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_EBX,`PARAM_SRC1')
+
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(mpn_sub_nc)
+deflit(`FRAME',0)
+
+       movd    PARAM_CARRY, %mm0
+       jmp     L(start_nc)
+
+EPILOGUE()
+
+       ALIGN(8)
+PROLOGUE(mpn_sub_n)
+deflit(`FRAME',0)
+       pxor    %mm0, %mm0
+L(start_nc):
+       movl    PARAM_SRC1, %eax
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SRC2, %ebx
+       movl    PARAM_DST, %edx
+       movl    PARAM_SIZE, %ecx
+
+       leal    (%eax,%ecx,4), %eax     C src1 end
+       leal    (%ebx,%ecx,4), %ebx     C src2 end
+       leal    (%edx,%ecx,4), %edx     C dst end
+       negl    %ecx                    C -size
+
+L(top):
+       C eax   src1 end
+       C ebx   src2 end
+       C ecx   counter, limbs, negative
+       C edx   dst end
+       C mm0   carry bit
+
+       movd    (%eax,%ecx,4), %mm1
+       movd    (%ebx,%ecx,4), %mm2
+       psubq   %mm2, %mm1
+
+       psubq   %mm0, %mm1
+       movd    %mm1, (%edx,%ecx,4)
+
+       psrlq   $63, %mm1
+
+       addl    $1, %ecx
+       jz      L(done_mm1)
+
+       movd    (%eax,%ecx,4), %mm0
+       movd    (%ebx,%ecx,4), %mm2
+       psubq   %mm2, %mm0
+
+       psubq   %mm1, %mm0
+       movd    %mm0, (%edx,%ecx,4)
+
+       psrlq   $63, %mm0
+
+       addl    $1, %ecx
+       jnz     L(top)
+
+
+       movd    %mm0, %eax
+       movl    SAVE_EBX, %ebx
+       emms
+       ret
+
+L(done_mm1):
+       movd    %mm1, %eax
+       movl    SAVE_EBX, %ebx
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/submul_1.asm b/mpn/x86/pentium4/sse2/submul_1.asm

new file mode 100644 (file)

index 0000000..ceb41f2
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/submul_1.asm
@@ -0,0 +1,128 @@
+dnl  Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C     (stepping 10).
+
+
+C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                         mp_limb_t mult);
+C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult, mp_limb_t carry);
+C
+C This code is not particularly good at 7 c/l.  The dependent chain is only
+C 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that
+C speed isn't achieved.
+C
+C The arrangements made here to get a two instruction dependent chain are
+C slightly subtle.  In the loop the carry (or borrow rather) is a negative
+C so that a paddq can be used to give a low limb ready to store, and a high
+C limb ready to become the new carry after a psrlq.
+C
+C If the carry was a simple twos complement negative then the psrlq shift
+C would need to bring in 0 bits or 1 bits according to whether the high was
+C zero or non-zero, since a non-zero value would represent a negative
+C needing sign extension.  That wouldn't be particularly easy to arrange and
+C certainly would add an instruction to the dependent chain, so instead an
+C offset is applied so that the high limb will be 0xFFFFFFFF+c.  With c in
+C the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to
+C 0xFFFFFFFF and is therefore always positive and can always have 0 bits
+C shifted in, which is what psrlq does.
+C
+C The extra 0xFFFFFFFF must be subtracted before c is used, but that can be
+C done off the dependent chain.  The total adjustment then is to add
+C 0xFFFFFFFF00000000 to offset the new carry, and subtract
+C 0x00000000FFFFFFFF to remove the offset from the current carry, for a net
+C add of 0xFFFFFFFE00000001.  In the code this is applied to the destination
+C limb when fetched.
+C
+C It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement
+C negative, which is how it's undone for the return value, but that doesn't
+C seem as clear.
+
+defframe(PARAM_CARRY,     20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE,      12)
+defframe(PARAM_SRC,       8)
+defframe(PARAM_DST,       4)
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_submul_1c)
+deflit(`FRAME',0)
+       movd    PARAM_CARRY, %mm1
+       jmp     L(start_1c)
+EPILOGUE()
+
+PROLOGUE(mpn_submul_1)
+deflit(`FRAME',0)
+       pxor    %mm1, %mm1              C initial borrow
+
+L(start_1c):
+       movl    PARAM_SRC, %eax
+       pcmpeqd %mm0, %mm0
+
+       movd    PARAM_MULTIPLIER, %mm7
+       pcmpeqd %mm6, %mm6
+
+       movl    PARAM_DST, %edx
+       psrlq   $32, %mm0               C 0x00000000FFFFFFFF
+
+       movl    PARAM_SIZE, %ecx
+       psllq   $32, %mm6               C 0xFFFFFFFF00000000
+
+       psubq   %mm0, %mm6              C 0xFFFFFFFE00000001
+
+       psubq   %mm1, %mm0              C 0xFFFFFFFF - borrow
+
+
+       C eax   src, incrementing
+       C ebx
+       C ecx   loop counter, decrementing
+       C edx   dst, incrementing
+       C
+       C mm0   0xFFFFFFFF - borrow
+       C mm6   0xFFFFFFFE00000001
+       C mm7   multiplier
+
+L(loop):
+       movd    (%eax), %mm1            C src
+       leal    4(%eax), %eax
+       movd    (%edx), %mm2            C dst
+       paddq   %mm6, %mm2              C add 0xFFFFFFFE00000001
+       pmuludq %mm7, %mm1
+       psubq   %mm1, %mm2              C prod
+       paddq   %mm2, %mm0              C borrow
+       subl    $1, %ecx
+       movd    %mm0, (%edx)            C result
+       psrlq   $32, %mm0
+       leal    4(%edx), %edx
+       jnz     L(loop)
+
+       movd    %mm0, %eax
+       notl    %eax
+       emms
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/rshift.asm b/mpn/x86/rshift.asm

new file mode 100644 (file)

index 0000000..8e33eab
--- /dev/null
+++ b/mpn/x86/rshift.asm
@@ -0,0 +1,98 @@
+dnl  x86 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb
+C P54:   7.5
+C P55:   7.0
+C P6:    2.5
+C K6:    4.5
+C K7:    5.0
+C P4:   16.5
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+deflit(`FRAME',12)
+
+       movl    PARAM_DST,%edi
+       movl    PARAM_SRC,%esi
+       movl    PARAM_SIZE,%edx
+       movl    PARAM_SHIFT,%ecx
+
+       leal    -4(%edi,%edx,4),%edi
+       leal    (%esi,%edx,4),%esi
+       negl    %edx
+
+       movl    (%esi,%edx,4),%ebx      C read least significant limb
+       xorl    %eax,%eax
+       shrdl(  %cl, %ebx, %eax)        C compute carry limb
+       incl    %edx
+       jz      L(end)
+       pushl   %eax                    C push carry limb onto stack
+       testb   $1,%dl
+       jnz     L(1)                    C enter loop in the middle
+       movl    %ebx,%eax
+
+       ALIGN(8)
+L(oop):        movl    (%esi,%edx,4),%ebx      C load next higher limb
+       shrdl(  %cl, %ebx, %eax)        C compute result limb
+       movl    %eax,(%edi,%edx,4)      C store it
+       incl    %edx
+L(1):  movl    (%esi,%edx,4),%eax
+       shrdl(  %cl, %eax, %ebx)
+       movl    %ebx,(%edi,%edx,4)
+       incl    %edx
+       jnz     L(oop)
+
+       shrl    %cl,%eax                C compute most significant limb
+       movl    %eax,(%edi)             C store it
+
+       popl    %eax                    C pop carry limb
+
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+L(end):        shrl    %cl,%ebx                C compute most significant limb
+       movl    %ebx,(%edi)             C store it
+
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/sqr_basecase.asm b/mpn/x86/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..9a7e133
--- /dev/null
+++ b/mpn/x86/sqr_basecase.asm
@@ -0,0 +1,348 @@
+dnl  x86 generic mpn_sqr_basecase -- square an mpn number.
+
+dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C     cycles/crossproduct  cycles/triangleproduct
+C P5:
+C P6:
+C K6:
+C K7:
+C P4:
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the size is
+C small.
+C
+C The mul1 loop is not unrolled like mul_1.asm, it doesn't seem worth the
+C code size to do so here.
+C
+C Enhancements:
+C
+C The addmul loop here is also not unrolled like aorsmul_1.asm and
+C mul_basecase.asm are.  Perhaps it should be done.  It'd add to the
+C complexity, but if it's worth doing in the other places then it should be
+C worthwhile here.
+C
+C A fully-unrolled style like other sqr_basecase.asm versions (k6, k7, p6)
+C might be worth considering.  That'd add quite a bit to the code size, but
+C only as much as is used would be dragged into L1 cache.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %edx
+
+       movl    PARAM_SRC, %eax
+
+       cmpl    $2, %edx
+       movl    PARAM_DST, %ecx
+
+       je      L(two_limbs)
+       ja      L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+       C eax   src
+       C ebx
+       C ecx   dst
+       C edx
+
+       movl    (%eax), %eax
+       mull    %eax
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(two_limbs):
+       C eax   src
+       C ebx
+       C ecx   dst
+       C edx
+
+       pushl   %ebx
+       pushl   %ebp
+
+       movl    %eax, %ebx
+       movl    (%eax), %eax
+
+       mull    %eax            C src[0]^2
+
+       pushl   %esi
+       pushl   %edi
+
+       movl    %edx, %esi      C dst[1]
+       movl    %eax, (%ecx)    C dst[0]
+
+       movl    4(%ebx), %eax
+       mull    %eax            C src[1]^2
+
+       movl    %eax, %edi      C dst[2]
+       movl    %edx, %ebp      C dst[3]
+
+       movl    (%ebx), %eax
+       mull    4(%ebx)         C src[0]*src[1]
+
+       addl    %eax, %esi
+
+       adcl    %edx, %edi
+
+       adcl    $0, %ebp
+       addl    %esi, %eax
+
+       adcl    %edi, %edx
+       movl    %eax, 4(%ecx)
+
+       adcl    $0, %ebp
+
+       movl    %edx, 8(%ecx)
+       movl    %ebp, 12(%ecx)
+
+       popl    %edi
+       popl    %esi
+
+       popl    %ebp
+       popl    %ebx
+
+       ret
+
+
+C -----------------------------------------------------------------------------
+       ALIGN(8)
+L(three_or_more):
+deflit(`FRAME',0)
+       C eax   src
+       C ebx
+       C ecx   dst
+       C edx   size
+
+       pushl   %ebx    FRAME_pushl()
+       pushl   %edi    FRAME_pushl()
+
+       pushl   %esi    FRAME_pushl()
+       pushl   %ebp    FRAME_pushl()
+
+       leal    (%ecx,%edx,4), %edi     C &dst[size], end of this mul1
+       leal    (%eax,%edx,4), %esi     C &src[size]
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+       movl    (%eax), %ebp            C src[0], multiplier
+       movl    %edx, %ecx
+
+       negl    %ecx                    C -size
+       xorl    %ebx, %ebx              C clear carry limb
+
+       incl    %ecx                    C -(size-1)
+
+L(mul1):
+       C eax   scratch
+       C ebx   carry
+       C ecx   counter, limbs, negative
+       C edx   scratch
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp   multiplier
+
+       movl    (%esi,%ecx,4), %eax
+       mull    %ebp
+       addl    %eax, %ebx
+       adcl    $0, %edx
+       movl    %ebx, (%edi,%ecx,4)
+       movl    %edx, %ebx
+       incl    %ecx
+       jnz     L(mul1)
+
+       movl    %ebx, (%edi)
+
+
+       C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for
+       C n=1..size-2.
+       C
+       C The last products src[size-2]*src[size-1], which is the end corner
+       C of the product triangle, is handled separately at the end to save
+       C looping overhead.  If size is 3 then it's only this that needs to
+       C be done.
+       C
+       C In the outer loop %esi is a constant, and %edi just advances by 1
+       C limb each time.  The size of the operation decreases by 1 limb
+       C each time.
+
+       C eax
+       C ebx   carry (needing carry flag added)
+       C ecx
+       C edx
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp
+
+       movl    PARAM_SIZE, %ecx
+       subl    $3, %ecx
+       jz      L(corner)
+
+       negl    %ecx
+
+dnl  re-use parameter space
+define(VAR_OUTER,`PARAM_DST')
+
+L(outer):
+       C eax
+       C ebx
+       C ecx
+       C edx   outer loop counter, -(size-3) to -1
+       C esi   &src[size]
+       C edi   dst, pointing at stored carry limb of previous loop
+       C ebp
+
+       movl    %ecx, VAR_OUTER
+       addl    $4, %edi                C advance dst end
+
+       movl    -8(%esi,%ecx,4), %ebp   C next multiplier
+       subl    $1, %ecx
+
+       xorl    %ebx, %ebx              C initial carry limb
+
+L(inner):
+       C eax   scratch
+       C ebx   carry (needing carry flag added)
+       C ecx   counter, -n-1 to -1
+       C edx   scratch
+       C esi   &src[size]
+       C edi   dst end of this addmul
+       C ebp   multiplier
+
+       movl    (%esi,%ecx,4), %eax
+       mull    %ebp
+       addl    %ebx, %eax
+       adcl    $0, %edx
+       addl    %eax, (%edi,%ecx,4)
+       adcl    $0, %edx
+       movl    %edx, %ebx
+       addl    $1, %ecx
+       jl      L(inner)
+
+
+       movl    %ebx, (%edi)
+       movl    VAR_OUTER, %ecx
+       incl    %ecx
+       jnz     L(outer)
+
+
+L(corner):
+       C esi   &src[size]
+       C edi   &dst[2*size-3]
+
+       movl    -4(%esi), %eax
+       mull    -8(%esi)                C src[size-1]*src[size-2]
+       addl    %eax, 0(%edi)
+       adcl    $0, %edx
+       movl    %edx, 4(%edi)           C dst high limb
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+       movl    PARAM_SIZE, %eax
+       negl    %eax
+       addl    $1, %eax                C -(size-1) and clear carry
+
+L(lshift):
+       C eax   counter, negative
+       C ebx   next limb
+       C ecx
+       C edx
+       C esi
+       C edi   &dst[2*size-4]
+       C ebp
+
+       rcll    8(%edi,%eax,8)
+       rcll    12(%edi,%eax,8)
+       incl    %eax
+       jnz     L(lshift)
+
+
+       adcl    %eax, %eax              C high bit out
+       movl    %eax, 8(%edi)           C dst most significant limb
+
+
+C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ...,
+C src[size-1]^2.  dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+       movl    PARAM_SRC, %esi
+       movl    (%esi), %eax            C src[0]
+       mull    %eax                    C src[0]^2
+
+       movl    PARAM_SIZE, %ecx
+       leal    (%esi,%ecx,4), %esi     C src end
+
+       negl    %ecx                    C -size
+       movl    %edx, %ebx              C initial carry
+
+       movl    %eax, 12(%edi,%ecx,8)   C dst[0]
+       incl    %ecx                    C -(size-1)
+
+L(diag):
+       C eax   scratch (low product)
+       C ebx   carry limb
+       C ecx   counter, -(size-1) to -1
+       C edx   scratch (high product)
+       C esi   &src[size]
+       C edi   &dst[2*size-3]
+       C ebp   scratch (fetched dst limbs)
+
+       movl    (%esi,%ecx,4), %eax
+       mull    %eax
+
+       addl    %ebx, 8(%edi,%ecx,8)
+       movl    %edx, %ebx
+
+       adcl    %eax, 12(%edi,%ecx,8)
+       adcl    $0, %ebx
+
+       incl    %ecx
+       jnz     L(diag)
+
+
+       addl    %ebx, 8(%edi)           C dst most significant limb
+
+       popl    %ebp
+       popl    %esi
+
+       popl    %edi
+       popl    %ebx
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/t-zdisp.sh b/mpn/x86/t-zdisp.sh

new file mode 100755 (executable)

index 0000000..6c55067
--- /dev/null
+++ b/mpn/x86/t-zdisp.sh
@@ -0,0 +1,60 @@
+#! /bin/sh
+#
+# Copyright 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: cd $(builddir)/mpn
+#        $(srcdir)/x86/t-zdisp.sh
+#
+# Run the Zdisp() macro instructions through the assembler to check
+# the encodings used.  Mismatches are printed, no output means all ok.
+#
+# This program is only meant for use during development.  It can be
+# run in the mpn build directory of any x86 configuration.
+#
+# For this test the assembler needs to generate byte sized 0
+# displacements when given something like 0(%eax).  Recent versions of
+# gas are suitable (eg. 2.9.x or 2.10.x).
+
+set -e
+
+cat >tmp-zdisptest.asm <<\EOF
+
+include(`../config.m4')
+
+dnl  Redefine Zdisp_match to output its pattern and encoding.
+define(`Zdisp_match',
+`define(`Zdisp_found',1)dnl
+ifelse(`$2',0,`        $1      $2$3, $4')`'dnl
+ifelse(`$3',0,`        $1      $2, $3$4')`'dnl
+
+       .byte   $5
+')
+       .text
+       Zdisp()
+EOF
+
+m4 tmp-zdisptest.asm >tmp-zdisptest.s
+as -o tmp-zdisptest.o tmp-zdisptest.s
+
+# Demand duplicates from the instruction patterns and byte encodings.
+objdump -d tmp-zdisptest.o | awk '
+/^ *[a-z0-9]+:/ {
+       sub(/^ *[a-z0-9]+:/,"")
+        print
+}' | sort | uniq -u
diff --git a/mpn/x86/t-zdisp2.pl b/mpn/x86/t-zdisp2.pl

new file mode 100755 (executable)

index 0000000..f32d070
--- /dev/null
+++ b/mpn/x86/t-zdisp2.pl
@@ -0,0 +1,136 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage: cd $(builddir)/mpn
+#        $(srcdir)/x86/t-zdisp2.pl
+#
+# Grep for any "0(reg...)" addressing modes coming out of the x86 .asm
+# files.  Additive expressions like "12+4-16" are recognised too.
+#
+# Old gas doesn't preserve the "0" displacement, so if it's wanted then
+# Zdisp ought to be used to give explicit .byte sequences.  See
+# mpn/x86/README.
+#
+# No output means everything is ok.  All the asm files are put through m4 in
+# PIC and non-PIC modes, and in each multi-function form, all of which can
+# take a while to run.
+#
+# This program is only meant for use during development.
+
+use strict;
+use File::Find;
+use File::Basename;
+use Getopt::Std;
+
+my %opt;
+getopts('t', \%opt);
+
+
+my $srcdir;
+open IN, '<Makefile' or die;
+while (<IN>) {
+  if (/^srcdir[ \t]*=[ \t]*(.*)/) {
+    $srcdir = $1;
+    last;
+  }
+}
+close IN or die;
+defined $srcdir or die "Cannot find \$srcdir in Makefile\n";
+
+my $filecount = 0;
+
+my $tempfile = 't-zdisp2.tmp';
+open KARA, ">$tempfile" or die;
+close KARA or die;
+
+find({ wanted => \&process, preprocess => \&process_mparam, no_chdir => 1 },
+     "$srcdir/x86");
+
+sub process {
+  if (/gmp-mparam.h$/) {
+    process_mparam($_);
+  } elsif (/\.asm$/) {
+    process_asm($_);
+  }
+}
+
+# Ensure we're using the right SQR_TOOM2_THRESHOLD for the part of the
+# tree being processed.
+sub process_mparam {
+  my $file = "$File::Find::dir/gmp-mparam.h";
+  if (-f $file) {
+    print "$file\n" if $opt{'t'};
+    open MPARAM, "<$file" or die;
+    while (<MPARAM>) {
+      if (/^#define SQR_TOOM2_THRESHOLD[ \t]*([0-9][0-9]*)/) {
+        open KARA, ">$tempfile" or die;
+        print KARA "define(\`SQR_TOOM2_THRESHOLD',$1)\n\n";
+        print "define(\`SQR_TOOM2_THRESHOLD',$1)\n" if $opt{'t'};
+        close KARA or die;
+        last;
+      }
+    }
+    close MPARAM or die;
+  }
+  return @_;
+}
+
+sub process_asm {
+  my ($file) = @_;
+  my $base = basename ($file, '.asm');
+
+  my @funs;
+  if    ($base eq 'aors_n')    { @funs = qw(add_n sub_n); }
+  elsif ($base eq 'aorsmul_1') { @funs = qw(addmul_1 submul_1); }
+  elsif ($base eq 'popham')    { @funs = qw(popcount hamdist); }
+  elsif ($base eq 'logops_n')  { @funs = qw(and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n); }
+  elsif ($base eq 'lorrshift') { @funs = qw(lshift rshift); }
+  else                         { @funs = ($base); }
+
+  foreach my $fun (@funs) {
+    foreach my $pic ('', ' -DPIC') {
+      my $header = "$file: 0: $pic\n";
+      $filecount++;
+
+      my $m4 = "m4 -DHAVE_HOST_CPU_athlon -DOPERATION_$fun $pic ../config.m4 $tempfile $file";
+      print "$m4\n" if $opt{'t'};
+
+      open IN, "$m4 |" or die;
+      while (<IN>) {
+        next unless /([0-9+-][0-9 \t+-]*)\(%/;
+        my $pat=$1;
+        $pat = eval($pat);
+        next if ($pat != 0);
+        print "$header$_";
+        $header='';
+      }
+      close IN or die;
+    }
+  }
+}
+
+unlink($tempfile);
+print "total $filecount processed\n";
+exit 0;
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/mpn/x86/udiv.asm b/mpn/x86/udiv.asm

new file mode 100644 (file)

index 0000000..5c7d3f3
--- /dev/null
+++ b/mpn/x86/udiv.asm
@@ -0,0 +1,41 @@
+dnl  x86 mpn_udiv_qrnnd -- 2 by 1 limb division
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *remptr, mp_limb_t high, mp_limb_t low,
+C                           mp_limb_t divisor);
+
+defframe(PARAM_DIVISOR, 16)
+defframe(PARAM_LOW,     12)
+defframe(PARAM_HIGH,    8)
+defframe(PARAM_REMPTR,  4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_udiv_qrnnd)
+deflit(`FRAME',0)
+       movl    PARAM_LOW, %eax
+       movl    PARAM_HIGH, %edx
+       divl    PARAM_DIVISOR
+       movl    PARAM_REMPTR, %ecx
+       movl    %edx, (%ecx)
+       ret
+EPILOGUE()
diff --git a/mpn/x86/umul.asm b/mpn/x86/umul.asm

new file mode 100644 (file)

index 0000000..d0116de
--- /dev/null
+++ b/mpn/x86/umul.asm
@@ -0,0 +1,40 @@
+dnl  mpn_umul_ppmm -- 1x1->2 limb multiplication
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+defframe(PARAM_M2,    12)
+defframe(PARAM_M1,     8)
+defframe(PARAM_LOWPTR, 4)
+
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_umul_ppmm)
+deflit(`FRAME',0)
+       movl    PARAM_LOWPTR, %ecx
+       movl    PARAM_M1, %eax
+       mull    PARAM_M2
+       movl    %eax, (%ecx)
+       movl    %edx, %eax
+       ret
+EPILOGUE()
diff --git a/mpn/x86/x86-defs.m4 b/mpn/x86/x86-defs.m4

new file mode 100644 (file)

index 0000000..b1f36dd
--- /dev/null
+++ b/mpn/x86/x86-defs.m4
@@ -0,0 +1,956 @@
+divert(-1)
+
+
+dnl  m4 macros for x86 assembler.
+
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Notes:
+dnl
+dnl  m4 isn't perfect for processing BSD style x86 assembler code, the main
+dnl  problems are,
+dnl
+dnl  1. Doing define(foo,123) and then using foo in an addressing mode like
+dnl     foo(%ebx) expands as a macro rather than a constant.  This is worked
+dnl     around by using deflit() from asm-defs.m4, instead of define().
+dnl
+dnl  2. Immediates in macro definitions need a space or `' to stop the $
+dnl     looking like a macro parameter.  For example,
+dnl
+dnl            define(foo, `mov $ 123, %eax')
+dnl
+dnl     This is only a problem in macro definitions, not in ordinary text,
+dnl     and not in macro parameters like text passed to forloop() or ifdef().
+
+
+deflit(BYTES_PER_MP_LIMB, 4)
+
+
+dnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We
+dnl  undefine PIC since we don't need to be position independent in this
+dnl  case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
+
+ifdef(`DLL_EXPORT',`undefine(`PIC')')
+
+
+dnl  Usage: CPUVEC_FUNCS_LIST
+dnl
+dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
+dnl  order they appear in that structure.
+
+define(CPUVEC_FUNCS_LIST,
+``add_n',
+`addmul_1',
+`copyd',
+`copyi',
+`divexact_1',
+`divexact_by3c',
+`divrem_1',
+`gcd_1',
+`lshift',
+`mod_1',
+`mod_34lsub1',
+`modexact_1c_odd',
+`mul_1',
+`mul_basecase',
+`preinv_divrem_1',
+`preinv_mod_1',
+`rshift',
+`sqr_basecase',
+`sub_n',
+`submul_1'')
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  In the x86 code we use explicit TEXT and ALIGN() calls in the code,
+dnl  since different alignments are wanted in various circumstances.  So for
+dnl  instance,
+dnl
+dnl                  TEXT
+dnl                  ALIGN(16)
+dnl          PROLOGUE(mpn_add_n)
+dnl          ...
+dnl          EPILOGUE()
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+m4_assert_defined(`WANT_PROFILING')
+       `GLOBL  $1
+       TYPE($1,`function')
+       COFF_TYPE($1)
+$1:
+ifelse(WANT_PROFILING,`prof',      `   call_mcount')
+ifelse(WANT_PROFILING,`gprof',     `   call_mcount')
+ifelse(WANT_PROFILING,`instrument',`   call_instrument(enter)')
+')
+
+
+dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
+dnl
+dnl  Emit COFF style ".def ... .endef" type information for a function, when
+dnl  supported.  The argument should include any GSYM_PREFIX.
+dnl
+dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
+
+define(COFF_TYPE,
+m4_assert_numargs(1)
+m4_assert_defined(`HAVE_COFF_TYPE')
+`ifelse(HAVE_COFF_TYPE,yes,
+       `.def   $1
+       .scl    2
+       .type   32
+       .endef')')
+
+
+dnl  Usage: call_mcount
+dnl
+dnl  For `gprof' style profiling, %ebp is setup as a frame pointer.  None of
+dnl  the assembler routines use %ebp this way, so it's done only for the
+dnl  benefit of mcount.  glibc sysdeps/i386/i386-mcount.S shows how mcount
+dnl  gets the current function from (%esp) and the parent from 4(%ebp).
+dnl
+dnl  For `prof' style profiling gcc generates mcount calls without setting
+dnl  up %ebp, and the same is done here.
+
+define(`call_mcount',
+m4_assert_numargs(-1)
+m4_assert_defined(`WANT_PROFILING')
+m4_assert_defined(`MCOUNT_PIC_REG')
+m4_assert_defined(`MCOUNT_NONPIC_REG')
+m4_assert_defined(`MCOUNT_PIC_CALL')
+m4_assert_defined(`MCOUNT_NONPIC_CALL')
+`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
+`      DATA
+       ALIGN(4)
+L(mcount_data_`'mcount_counter):
+       W32     0
+       TEXT
+')dnl
+ifelse(WANT_PROFILING,`gprof',
+`      pushl   %ebp
+       movl    %esp, %ebp
+')dnl
+ifdef(`PIC',
+`      pushl   %ebx
+       call_movl_eip_to_ebx
+L(mcount_here_`'mcount_counter):
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
+ifelse(MCOUNT_PIC_REG,,,
+`      leal    L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
+MCOUNT_PIC_CALL
+       popl    %ebx
+',`dnl non-PIC
+ifelse(MCOUNT_NONPIC_REG,,,
+`      movl    `$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
+')dnl
+MCOUNT_NONPIC_CALL
+')dnl
+ifelse(WANT_PROFILING,`gprof',
+`      popl    %ebp
+')
+define(`mcount_counter',incr(mcount_counter))
+')
+
+define(mcount_counter,1)
+
+
+dnl  Usage: call_instrument(enter|exit)
+dnl
+dnl  Call __cyg_profile_func_enter or __cyg_profile_func_exit.
+dnl
+dnl  For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
+dnl  so %ebx is just setup for these calls.  It's a bit wasteful to repeat
+dnl  the setup for the exit call having done it earlier for the enter, but
+dnl  there's nowhere very convenient to hold %ebx through the length of a
+dnl  routine, in general.
+dnl
+dnl  For PIC, because instrument_current_function will be within the current
+dnl  object file we can get it just as an offset from %eip, there's no need
+dnl  to use the GOT.
+dnl
+dnl  No attempt is made to maintain the stack alignment gcc generates with
+dnl  -mpreferred-stack-boundary.  This wouldn't be hard, but it seems highly
+dnl  unlikely the instrumenting functions would be doing anything that'd
+dnl  benefit from alignment, in particular they're unlikely to be using
+dnl  doubles or long doubles on the stack.
+dnl
+dnl  The FRAME scheme is used to conveniently account for the register saves
+dnl  before accessing the return address.  Any previous value is saved and
+dnl  restored, since plenty of code keeps a value across a "ret" in the
+dnl  middle of a routine.
+
+define(call_instrument,
+m4_assert_numargs(1)
+`      pushdef(`FRAME',0)
+ifelse($1,exit,
+`      pushl   %eax    FRAME_pushl()   C return value
+')
+ifdef(`PIC',
+`      pushl   %ebx    FRAME_pushl()
+       call_movl_eip_to_ebx
+L(instrument_here_`'instrument_count):
+       movl    %ebx, %ecx
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
+       C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
+       addl    $instrument_current_function-L(instrument_here_`'instrument_count), %ecx
+       pushl   m4_empty_if_zero(FRAME)(%esp)   FRAME_pushl()   C return addr
+       pushl   %ecx                            FRAME_pushl()   C this function
+       call    GSYM_PREFIX`'__cyg_profile_func_$1@PLT
+       addl    $`'8, %esp
+       popl    %ebx
+',
+`      C non-PIC
+       pushl   m4_empty_if_zero(FRAME)(%esp)   FRAME_pushl()   C return addr
+       pushl   $instrument_current_function    FRAME_pushl()   C this function
+       call    GSYM_PREFIX`'__cyg_profile_func_$1
+       addl    $`'8, %esp
+')
+ifelse($1,exit,
+`      popl    %eax                    C return value
+')
+       popdef(`FRAME')
+define(`instrument_count',incr(instrument_count))
+')
+define(instrument_count,1)
+
+
+dnl  Usage: instrument_current_function
+dnl
+dnl  Return the current function name for instrumenting purposes.  This is
+dnl  PROLOGUE_current_function, but it sticks at the first such name seen.
+dnl
+dnl  Sticking to the first name seen ensures that multiple-entrypoint
+dnl  functions like mpn_add_nc and mpn_add_n will make enter and exit calls
+dnl  giving the same function address.
+
+define(instrument_current_function,
+m4_assert_numargs(-1)
+`ifdef(`instrument_current_function_seen',
+`instrument_current_function_seen',
+`define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
+PROLOGUE_current_function')')
+
+
+dnl  Usage: call_movl_eip_to_ebx
+dnl
+dnl  Generate a call to L(movl_eip_to_ebx), and record the need for that
+dnl  routine.
+
+define(call_movl_eip_to_ebx,
+m4_assert_numargs(-1)
+`call  L(movl_eip_to_ebx)
+define(`movl_eip_to_ebx_needed',1)')
+
+dnl  Usage: generate_movl_eip_to_ebx
+dnl
+dnl  Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
+
+define(generate_movl_eip_to_ebx,
+m4_assert_numargs(-1)
+`ifelse(movl_eip_to_ebx_needed,1,
+`ifelse(movl_eip_to_ebx_done,1,,
+`L(movl_eip_to_ebx):
+       movl    (%esp), %ebx
+       ret_internal
+define(`movl_eip_to_ebx_done',1)
+')')')
+
+
+dnl  Usage: ret
+dnl
+dnl  Generate a "ret", but if doing instrumented profiling then call
+dnl  __cyg_profile_func_exit first.
+
+define(ret,
+m4_assert_numargs(-1)
+m4_assert_defined(`WANT_PROFILING')
+`ifelse(WANT_PROFILING,instrument,
+`ret_instrument',
+`ret_internal')
+generate_movl_eip_to_ebx
+')
+
+
+dnl  Usage: ret_internal
+dnl
+dnl  A plain "ret", without any __cyg_profile_func_exit call.  This can be
+dnl  used for a return which is internal to some function, such as when
+dnl  getting %eip for PIC.
+
+define(ret_internal,
+m4_assert_numargs(-1)
+``ret'')
+
+
+dnl  Usage: ret_instrument
+dnl
+dnl  Generate call to __cyg_profile_func_exit and then a ret.  If a ret has
+dnl  already been seen from this function then jump to that chunk of code,
+dnl  rather than emitting it again.
+
+define(ret_instrument,
+m4_assert_numargs(-1)
+`ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
+`jmp   L(instrument_exit_`'instrument_current_function)',
+`define(ret_instrument_seen_`'instrument_current_function,1)
+L(instrument_exit_`'instrument_current_function):
+call_instrument(exit)
+       ret_internal')')
+
+
+dnl  Usage: _GLOBAL_OFFSET_TABLE_
+dnl
+dnl  Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
+dnl  This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
+dnl  work with systems requiring an extra underscore such as OpenBSD.
+dnl
+dnl  deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
+dnl  out right, though that form doesn't work properly in gas (see
+dnl  mpn/x86/README).
+
+deflit(_GLOBAL_OFFSET_TABLE_,
+m4_assert_defined(`GOT_GSYM_PREFIX')
+`GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Various x86 macros.
+dnl
+
+
+dnl  Usage: ALIGN_OFFSET(bytes,offset)
+dnl
+dnl  Align to `offset' away from a multiple of `bytes'.
+dnl
+dnl  This is useful for testing, for example align to something very strict
+dnl  and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
+dnl
+dnl  Generally you wouldn't execute across the padding, but it's done with
+dnl  nop's so it'll work.
+
+define(ALIGN_OFFSET,
+m4_assert_numargs(2)
+`ALIGN($1)
+forloop(`i',1,$2,`     nop
+')')
+
+
+dnl  Usage: defframe(name,offset)
+dnl
+dnl  Make a definition like the following with which to access a parameter
+dnl  or variable on the stack.
+dnl
+dnl         define(name,`FRAME+offset(%esp)')
+dnl
+dnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
+dnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
+dnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
+dnl  zero offset is wanted.
+dnl
+dnl  The new macro also gets a check that when it's used FRAME is actually
+dnl  defined, and that the final %esp offset isn't negative, which would
+dnl  mean an attempt to access something below the current %esp.
+dnl
+dnl  deflit() is used rather than a plain define(), so the new macro won't
+dnl  delete any following parenthesized expression.  name(%edi) will come
+dnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should
+dnl  provoke an error, which is better than silently giving just 16(%esp).
+dnl
+dnl  See README for more on the suggested way to access the stack frame.
+
+define(defframe,
+m4_assert_numargs(2)
+`deflit(`$1',
+m4_assert_defined(`FRAME')
+`defframe_check_notbelow(`$1',$2,FRAME)dnl
+defframe_empty_if_zero(FRAME+($2))(%esp)')')
+
+dnl  Called: defframe_empty_if_zero(expression)
+define(defframe_empty_if_zero,
+m4_assert_numargs(1)
+`ifelse(defframe_empty_if_zero_disabled,1,
+`eval($1)',
+`m4_empty_if_zero($1)')')
+
+dnl  Called: defframe_check_notbelow(`name',offset,FRAME)
+define(defframe_check_notbelow,
+m4_assert_numargs(3)
+`ifelse(eval(($3)+($2)<0),1,
+`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
+')')')
+
+
+dnl  Usage: FRAME_pushl()
+dnl         FRAME_popl()
+dnl         FRAME_addl_esp(n)
+dnl         FRAME_subl_esp(n)
+dnl
+dnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
+dnl  %esp of n bytes.
+dnl
+dnl  Using these macros is completely optional.  Sometimes it makes more
+dnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's
+dnl  jumps and different sequences of FRAME values need to be used in
+dnl  different places.
+
+define(FRAME_pushl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+4))')
+
+define(FRAME_popl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-4))')
+
+define(FRAME_addl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-($1)))')
+
+define(FRAME_subl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+($1)))')
+
+
+dnl  Usage: defframe_pushl(name)
+dnl
+dnl  Do a combination FRAME_pushl() and a defframe() to name the stack
+dnl  location just pushed.  This should come after a pushl instruction.
+dnl  Putting it on the same line works and avoids lengthening the code.  For
+dnl  example,
+dnl
+dnl         pushl   %eax     defframe_pushl(VAR_COUNTER)
+dnl
+dnl  Notice the defframe() is done with an unquoted -FRAME thus giving its
+dnl  current value without tracking future changes.
+
+define(defframe_pushl,
+m4_assert_numargs(1)
+`FRAME_pushl()defframe(`$1',-FRAME)')
+
+
+dnl  --------------------------------------------------------------------------
+dnl  Assembler instruction macros.
+dnl
+
+
+dnl  Usage: emms_or_femms
+dnl         femms_available_p
+dnl
+dnl  femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
+dnl  femms instruction is available.  emms_or_femms expands to femms if
+dnl  available, or emms if not.
+dnl
+dnl  emms_or_femms is meant for use in the K6 directory where plain K6
+dnl  (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
+dnl  supported together.
+dnl
+dnl  On K7 femms is no longer faster and is just an alias for emms, so plain
+dnl  emms may as well be used.
+
+define(femms_available_p,
+m4_assert_numargs(-1)
+`m4_ifdef_anyof_p(
+       `HAVE_HOST_CPU_k62',
+       `HAVE_HOST_CPU_k63',
+       `HAVE_HOST_CPU_athlon')')
+
+define(emms_or_femms,
+m4_assert_numargs(-1)
+`ifelse(femms_available_p,1,`femms',`emms')')
+
+
+dnl  Usage: femms
+dnl
+dnl  Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
+dnl  following is a replacement using .byte.
+
+define(femms,
+m4_assert_numargs(-1)
+`.byte 15,14   C AMD 3DNow femms')
+
+
+dnl  Usage: jadcl0(op)
+dnl
+dnl  Generate a jnc/incl as a substitute for adcl $0,op.  Note this isn't an
+dnl  exact replacement, since it doesn't set the flags like adcl does.
+dnl
+dnl  This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
+dnl  mpn_sqr_basecase because on K6 an adcl is slow, the branch
+dnl  misprediction penalty is small, and the multiply algorithm used leads
+dnl  to a carry bit on average only 1/4 of the time.
+dnl
+dnl  jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
+dnl  for comparison.  For example,
+dnl
+dnl            define(`jadcl0_disabled',1)
+dnl
+dnl  When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
+dnl  the same size as an adcl.  This makes it possible to use the exact same
+dnl  computed jump code when testing the relative speed of the two.
+
+define(jadcl0,
+m4_assert_numargs(1)
+`ifelse(jadcl0_disabled,1,
+       `adcl   $`'0, $1',
+       `jnc    L(jadcl0_`'jadcl0_counter)
+       incl    $1
+L(jadcl0_`'jadcl0_counter):
+define(`jadcl0_counter',incr(jadcl0_counter))')')
+
+define(jadcl0_counter,1)
+
+
+dnl  Usage: x86_lookup(target, key,value, key,value, ...)
+dnl         x86_lookup_p(target, key,value, key,value, ...)
+dnl
+dnl  Look for `target' among the `key' parameters.
+dnl
+dnl  x86_lookup expands to the corresponding `value', or generates an error
+dnl  if `target' isn't found.
+dnl
+dnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.
+
+define(x86_lookup,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+define(x86_lookup_p,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1, `0',
+`ifelse(`$1',`$2',    `1',
+`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
+
+
+dnl  Usage: x86_opcode_reg32(reg)
+dnl         x86_opcode_reg32_p(reg)
+dnl
+dnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given
+dnl  32-bit register, eg. `%ebp' turns into 5.
+dnl
+dnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
+dnl  if not.
+
+define(x86_opcode_reg32,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_p,
+m4_assert_onearg()
+`x86_lookup_p(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_list,
+``%eax',0,
+`%ecx',1,
+`%edx',2,
+`%ebx',3,
+`%esp',4,
+`%ebp',5,
+`%esi',6,
+`%edi',7')
+
+
+dnl  Usage: x86_opcode_tttn(cond)
+dnl
+dnl  Expand to the 4-bit "tttn" field value for the given x86 branch
+dnl  condition (like `c', `ae', etc).
+
+define(x86_opcode_tttn,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_ttn_list)')
+
+define(x86_opcode_tttn_list,
+``o',  0,
+`no',  1,
+`b',   2, `c',  2, `nae',2,
+`nb',  3, `nc', 3, `ae', 3,
+`e',   4, `z',  4,
+`ne',  5, `nz', 5,
+`be',  6, `na', 6,
+`nbe', 7, `a',  7,
+`s',   8,
+`ns',  9,
+`p',  10, `pe', 10, `npo',10,
+`np', 11, `npe',11, `po', 11,
+`l',  12, `nge',12,
+`nl', 13, `ge', 13,
+`le', 14, `ng', 14,
+`nle',15, `g',  15')
+
+
+dnl  Usage: cmovCC(%srcreg,%dstreg)
+dnl
+dnl  Emit a cmov instruction, using a .byte sequence, since various past
+dnl  versions of gas don't know cmov.  For example,
+dnl
+dnl         cmovz(  %eax, %ebx)
+dnl
+dnl  The source operand can only be a plain register.  (m4 code implementing
+dnl  full memory addressing modes exists, believe it or not, but isn't
+dnl  currently needed and isn't included.)
+dnl
+dnl  All the standard conditions are defined.  Attempting to use one without
+dnl  the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
+dnl  an error.  This protects against writing something old gas wouldn't
+dnl  understand.
+
+dnl  Called: define_cmov_many(cond,tttn,cond,tttn,...)
+define(define_cmov_many,
+`ifelse(m4_length(`$1'),0,,
+`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
+
+dnl  Called: define_cmov(cond,tttn)
+dnl  Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
+define(define_cmov,
+m4_assert_numargs(2)
+`define(`cmov$1',
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`cmov_internal'(m4_doublequote($`'0),``$2'',dnl
+m4_doublequote($`'1),m4_doublequote($`'2)))')
+
+define_cmov_many(x86_opcode_tttn_list)
+
+dnl  Called: cmov_internal(name,tttn,src,dst)
+define(cmov_internal,
+m4_assert_numargs(4)
+`.byte dnl
+15, dnl
+eval(64+$2), dnl
+eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
+       C `$1 $3, $4'')
+
+
+dnl  Usage: x86_opcode_regmmx(reg)
+dnl
+dnl  Validate the given mmx register, and return its number, 0 to 7.
+
+define(x86_opcode_regmmx,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regmmx_list)')
+
+define(x86_opcode_regmmx_list,
+``%mm0',0,
+`%mm1',1,
+`%mm2',2,
+`%mm3',3,
+`%mm4',4,
+`%mm5',5,
+`%mm6',6,
+`%mm7',7')
+
+
+dnl  Usage: psadbw(%srcreg,%dstreg)
+dnl
+dnl  Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
+dnl  FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences.  For
+dnl  example,
+dnl
+dnl         psadbw( %mm1, %mm2)
+dnl
+dnl  Only register->register forms are supported here, which suffices for
+dnl  the current code.
+
+define(psadbw,
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`.byte 0x0f,0xf6,dnl
+eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
+       C `psadbw $1, $2'')
+
+
+dnl  Usage: Zdisp(inst,op,op,op)
+dnl
+dnl  Generate explicit .byte sequences if necessary to force a byte-sized
+dnl  zero displacement on an instruction.  For example,
+dnl
+dnl         Zdisp(  movl,   0,(%esi), %eax)
+dnl
+dnl  expands to
+dnl
+dnl                 .byte   139,70,0  C movl 0(%esi), %eax
+dnl
+dnl  If the displacement given isn't 0, then normal assembler code is
+dnl  generated.  For example,
+dnl
+dnl         Zdisp(  movl,   4,(%esi), %eax)
+dnl
+dnl  expands to
+dnl
+dnl                 movl    4(%esi), %eax
+dnl
+dnl  This means a single Zdisp() form can be used with an expression for the
+dnl  displacement, and .byte will be used only if necessary.  The
+dnl  displacement argument is eval()ed.
+dnl
+dnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is
+dnl  implemented with a table of instructions and encodings.  A new entry is
+dnl  needed for any different operation or registers.  The table is split
+dnl  into separate macros to avoid overflowing BSD m4 macro expansion space.
+
+define(Zdisp,
+m4_assert_numargs(4)
+`define(`Zdisp_found',0)dnl
+Zdisp_1($@)dnl
+Zdisp_2($@)dnl
+Zdisp_3($@)dnl
+Zdisp_4($@)dnl
+ifelse(Zdisp_found,0,
+`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
+')')')
+
+define(Zdisp_1,`dnl
+Zdisp_match( adcl, 0,(%edx), %eax,        `0x13,0x42,0x00',           $@)`'dnl
+Zdisp_match( adcl, 0,(%edx), %ebx,        `0x13,0x5a,0x00',           $@)`'dnl
+Zdisp_match( adcl, 0,(%edx), %esi,        `0x13,0x72,0x00',           $@)`'dnl
+Zdisp_match( addl, %ebx, 0,(%edi),        `0x01,0x5f,0x00',           $@)`'dnl
+Zdisp_match( addl, %ecx, 0,(%edi),        `0x01,0x4f,0x00',           $@)`'dnl
+Zdisp_match( addl, %esi, 0,(%edi),        `0x01,0x77,0x00',           $@)`'dnl
+Zdisp_match( sbbl, 0,(%edx), %eax,        `0x1b,0x42,0x00',           $@)`'dnl
+Zdisp_match( sbbl, 0,(%edx), %esi,        `0x1b,0x72,0x00',           $@)`'dnl
+Zdisp_match( subl, %ecx, 0,(%edi),        `0x29,0x4f,0x00',           $@)`'dnl
+Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
+Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
+Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax,  `0x13,0x44,0x8b,0x00',      $@)`'dnl
+Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax,  `0x1b,0x44,0x8b,0x00',      $@)`'dnl
+')
+define(Zdisp_2,`dnl
+Zdisp_match( movl, %eax, 0,(%edi),        `0x89,0x47,0x00',           $@)`'dnl
+Zdisp_match( movl, %ebx, 0,(%edi),        `0x89,0x5f,0x00',           $@)`'dnl
+Zdisp_match( movl, %esi, 0,(%edi),        `0x89,0x77,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %eax,        `0x8b,0x43,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %esi,        `0x8b,0x73,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%edx), %eax,        `0x8b,0x42,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%esi), %eax,        `0x8b,0x46,0x00',           $@)`'dnl
+Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl
+Zdisp_match( mov, 0,(%esi,%ecx,4), %eax,  `0x8b,0x44,0x8e,0x00',      $@)`'dnl
+Zdisp_match( mov, %eax, 0,(%edi,%ecx,4),  `0x89,0x44,0x8f,0x00',      $@)`'dnl
+')
+define(Zdisp_3,`dnl
+Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%edx), %mm0,        `0x0f,0x6f,0x42,0x00',      $@)`'dnl
+Zdisp_match( movq, 0,(%esi), %mm0,        `0x0f,0x6f,0x46,0x00',      $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edi),        `0x0f,0x7f,0x47,0x00',      $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
+')
+define(Zdisp_4,`dnl
+Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
+Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
+')
+
+define(Zdisp_match,
+m4_assert_numargs(9)
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+       && m4_stringequal_p(`$2',0)
+       && m4_stringequal_p(`$3',`$8')
+       && m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$7'),0,
+`      .byte   $5  C `$1 0$3, $4'',
+`      $6      $7$8, $9')',
+
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+       && m4_stringequal_p(`$2',`$7')
+       && m4_stringequal_p(`$3',0)
+       && m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$8'),0,
+`      .byte   $5  C `$1 $2, 0$4'',
+`      $6      $7, $8$9')')')')
+
+
+dnl  Usage: shldl(count,src,dst)
+dnl         shrdl(count,src,dst)
+dnl         shldw(count,src,dst)
+dnl         shrdw(count,src,dst)
+dnl
+dnl  Generate a double-shift instruction, possibly omitting a %cl count
+dnl  parameter if that's what the assembler requires, as indicated by
+dnl  WANT_SHLDL_CL in config.m4.  For example,
+dnl
+dnl         shldl(  %cl, %eax, %ebx)
+dnl
+dnl  turns into either
+dnl
+dnl         shldl   %cl, %eax, %ebx
+dnl  or
+dnl         shldl   %eax, %ebx
+dnl
+dnl  Immediate counts are always passed through unchanged.  For example,
+dnl
+dnl         shrdl(  $2, %esi, %edi)
+dnl  becomes
+dnl         shrdl   $2, %esi, %edi
+dnl
+dnl
+dnl  If you forget to use the macro form "shldl( ...)" and instead write
+dnl  just a plain "shldl ...", an error results.  This ensures the necessary
+dnl  variant treatment of %cl isn't accidentally bypassed.
+
+define(define_shd_instruction,
+m4_assert_numargs(1)
+`define($1,
+m4_instruction_wrapper()
+m4_assert_numargs(3)
+`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
+m4_doublequote($`'2),m4_doublequote($`'3)))')
+
+dnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
+define_shd_instruction(shldl)
+define_shd_instruction(shrdl)
+define_shd_instruction(shldw)
+define_shd_instruction(shrdw)
+
+dnl  Called: shd_instruction(op,count,src,dst)
+define(shd_instruction,
+m4_assert_numargs(4)
+m4_assert_defined(`WANT_SHLDL_CL')
+`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
+``$1'  `$3', `$4'',
+``$1'  `$2', `$3', `$4'')')
+
+
+dnl  Usage: ASSERT([cond][,instructions])
+dnl
+dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl  flags condition to then be satisfied.  For example,
+dnl
+dnl         ASSERT(ne, `cmpl %eax, %ebx')
+dnl
+dnl  The instructions can be omitted to just assert a flags condition with
+dnl  no extra calculation.  For example,
+dnl
+dnl         ASSERT(nc)
+dnl
+dnl  When `instructions' is not empty, a pushf/popf is added to preserve the
+dnl  flags, but the instructions themselves must preserve any registers that
+dnl  matter.  FRAME is adjusted for the push and pop, so the instructions
+dnl  given can use defframe() stack variables.
+dnl
+dnl  The condition can be omitted to just output the given instructions when
+dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
+dnl  For example,
+dnl
+dnl         ASSERT(, `movl %eax, VAR_KEEPVAL')
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+`ifelse(`$1',,
+       `$2',
+       `C ASSERT
+ifelse(`$2',,,`        pushf   ifdef(`FRAME',`FRAME_pushl()')')
+       $2
+       j`$1'   L(ASSERT_ok`'ASSERT_counter)
+       ud2     C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+ifelse(`$2',,,`        popf    ifdef(`FRAME',`FRAME_popl()')')
+define(`ASSERT_counter',incr(ASSERT_counter))')')')
+
+define(ASSERT_counter,1)
+
+
+dnl  Usage: movl_text_address(label,register)
+dnl
+dnl  Get the address of a text segment label, using either a plain movl or a
+dnl  position-independent calculation, as necessary.  For example,
+dnl
+dnl         movl_code_address(L(foo),%eax)
+dnl
+dnl  This macro is only meant for use in ASSERT()s or when testing, since
+dnl  the PIC sequence it generates will want to be done with a ret balancing
+dnl  the call on CPUs with return address branch prediction.
+dnl
+dnl  The addl generated here has a backward reference to the label, and so
+dnl  won't suffer from the two forwards references bug in old gas (described
+dnl  in mpn/x86/README).
+
+define(movl_text_address,
+m4_assert_numargs(2)
+`ifdef(`PIC',
+       `call   L(movl_text_address_`'movl_text_address_counter)
+L(movl_text_address_`'movl_text_address_counter):
+       popl    $2      C %eip
+       addl    `$'$1-L(movl_text_address_`'movl_text_address_counter), $2
+define(`movl_text_address_counter',incr(movl_text_address_counter))',
+       `movl   `$'$1, $2')')
+
+define(movl_text_address_counter,1)
+
+
+dnl  Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
+dnl
+dnl  Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
+dnl  appropriate for nails in use or not.
+
+define(notl_or_xorl_GMP_NUMB_MASK,
+m4_assert_numargs(1)
+`ifelse(GMP_NAIL_BITS,0,
+`notl  `$1'',
+`xorl  $GMP_NUMB_MASK, `$1'')')
+
+
+dnl  Usage LEA(symbol,reg)
+
+define(`LEA',`
+define(`EPILOGUE_cpu',
+`
+L(movl_eip_`'substr($2,1)):
+       movl    (%esp), $2
+       ret_internal
+       SIZE($'`1, .-$'`1)')
+
+        call    L(movl_eip_`'substr($2,1))
+        addl    $_GLOBAL_OFFSET_TABLE_, $2
+        movl    $1@GOT($2), $2
+')
+
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+       `RODATA
+       ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`      SIZE(`$1',.-`$1')')
+
+divert`'dnl
diff --git a/mpn/x86_64/README b/mpn/x86_64/README

new file mode 100644 (file)

index 0000000..c89f841
--- /dev/null
+++ b/mpn/x86_64/README
@@ -0,0 +1,63 @@
+Copyright 2003, 2004, 2006, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                       AMD64 MPN SUBROUTINES
+
+
+This directory contains mpn functions for AMD64 chips.  It is also useful
+for 64-bit Pentiums, and "Core 2".
+
+
+                    RELEVANT OPTIMIZATION ISSUES
+
+The Opteron and Athlon64 can sustain up to 3 instructions per cycle, but in
+practice that is only possible for integer instructions.  But almost any
+three integer instructions can issue simultaneously, including any 3 ALU
+operations, including shifts.  Up to two memory operations can issue each
+cycle.
+
+Scheduling typically requires that load-use instructions are split into
+separate load and use instructions.  That requires more decode resources,
+and it is rarely a win.  Opteron/Athlon64 have deep out-of-order core.
+
+
+Optimizing for 64-bit Pentium4 is probably a waste of time, as the most
+critical instructions are very poorly implemented here.  Perhaps we could
+save a cycle or two, but the most common loops now run at between 10 and 22
+cycles, so a saved cycle isn't too exciting.
+
+
+The new spin of the venerable P6 core, the "Core 2" is much better than the
+Pentium4 for the GMP loops.  Its integer pipeline is somewhat similar to to
+the Opteron/Athlon64 pipeline, except that the GMP favourites ADC/SBB and
+MUL are slower.  Furthermore, an INC/DEC followed by ADC/SBB incur a
+pipeline stall of around 10 cycles.  The default mpn_add_n and mpn_sub_n
+code suffers badly from the stall.  The code in the core2 subdirectory uses
+the almost forgotten instruction JRCXZ for loop control, and updates the
+induction variable using LEA.
+
+
+
+REFERENCES
+
+"System V Application Binary Interface AMD64 Architecture Processor
+Supplement", draft version 0.99, December 2007.
+http://www.x86-64.org/documentation/abi.pdf
diff --git a/mpn/x86_64/addaddmul_1msb0.asm b/mpn/x86_64/addaddmul_1msb0.asm

new file mode 100644 (file)

index 0000000..89e7bed
--- /dev/null
+++ b/mpn/x86_64/addaddmul_1msb0.asm
@@ -0,0 +1,155 @@
+dnl  AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8:           2.167
+C P4:          12.0
+C P6-15:        4.0
+
+C TODO
+C  * Perhaps handle various n mod 3 sizes better.  The code now is too large.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`ap',   `%rsi')
+define(`bp_param', `%rdx')
+define(`n',    `%rcx')
+define(`u0',   `%r8')
+define(`v0',   `%r9')
+
+
+define(`bp', `%rbp')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_addaddmul_1msb0)
+       push    %r12
+       push    %rbp
+
+       lea     (ap,n,8), ap
+       lea     (bp_param,n,8), bp
+       lea     (rp,n,8), rp
+       neg     n
+
+       mov     (ap,n,8), %rax
+       mul     %r8
+       mov     %rax, %r12
+       mov     (bp,n,8), %rax
+       mov     %rdx, %r10
+       add     $3, n
+       jns     L(end)
+
+       ALIGN(16)
+L(top):        mul     %r9
+       add     %rax, %r12
+       mov     -16(ap,n,8), %rax
+       adc     %rdx, %r10
+       mov     %r12, -24(rp,n,8)
+       mul     %r8
+       add     %rax, %r10
+       mov     -16(bp,n,8), %rax
+       mov     $0, %r11d
+       adc     %rdx, %r11
+       mul     %r9
+       add     %rax, %r10
+       mov     -8(ap,n,8), %rax
+       adc     %rdx, %r11
+       mov     %r10, -16(rp,n,8)
+       mul     %r8
+       add     %rax, %r11
+       mov     -8(bp,n,8), %rax
+       mov     $0, %r12d
+       adc     %rdx, %r12
+       mul     %r9
+       add     %rax, %r11
+       adc     %rdx, %r12
+       mov     (ap,n,8), %rax
+       mul     %r8
+       add     %rax, %r12
+       mov     %r11, -8(rp,n,8)
+       mov     (bp,n,8), %rax
+       mov     $0, %r10d
+       adc     %rdx, %r10
+       add     $3, n
+       js      L(top)
+
+L(end):        cmp     $1, R32(n)
+       ja      2f
+       jz      1f
+
+       mul     %r9
+       add     %rax, %r12
+       mov     -16(ap), %rax
+       adc     %rdx, %r10
+       mov     %r12, -24(rp)
+       mul     %r8
+       add     %rax, %r10
+       mov     -16(bp), %rax
+       mov     $0, %r11d
+       adc     %rdx, %r11
+       mul     %r9
+       add     %rax, %r10
+       mov     -8(ap), %rax
+       adc     %rdx, %r11
+       mov     %r10, -16(rp)
+       mul     %r8
+       add     %rax, %r11
+       mov     -8(bp), %rax
+       mov     $0, %r12d
+       adc     %rdx, %r12
+       mul     %r9
+       add     %rax, %r11
+       adc     %rdx, %r12
+       mov     %r11, -8(rp)
+       mov     %r12, %rax
+       pop     %rbp
+       pop     %r12
+       ret
+
+1:     mul     %r9
+       add     %rax, %r12
+       mov     -8(ap), %rax
+       adc     %rdx, %r10
+       mov     %r12, -16(rp)
+       mul     %r8
+       add     %rax, %r10
+       mov     -8(bp), %rax
+       mov     $0, %r11d
+       adc     %rdx, %r11
+       mul     %r9
+       add     %rax, %r10
+       adc     %rdx, %r11
+       mov     %r10, -8(rp)
+       mov     %r11, %rax
+       pop     %rbp
+       pop     %r12
+       ret
+
+2:     mul     %r9
+       add     %rax, %r12
+       mov     %r12, -8(rp)
+       adc     %rdx, %r10
+       mov     %r10, %rax
+       pop     %rbp
+       pop     %r12
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/addmul_2.asm b/mpn/x86_64/addmul_2.asm

new file mode 100644 (file)

index 0000000..e762113
--- /dev/null
+++ b/mpn/x86_64/addmul_2.asm
@@ -0,0 +1,168 @@
+dnl  AMD64 mpn_addmul_2 -- Multiply an n-limb vector with a 2-limb vector and
+dnl  add the result to a third limb vector.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.375
+C K10:          2.375
+C P4:           ?
+C P6 core2:     4.45
+C P6 corei7:    4.35
+
+C This code is the result of running a code generation and optimization tool
+C suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Work on feed-in and wind-down code.
+C  * Convert "mov $0" to "xor".
+C  * Adjust initial lea to save some bytes.
+C  * Perhaps adjust n from n_param&3 value?
+
+C INPUT PARAMETERS
+define(`rp',     `%rdi')
+define(`up',     `%rsi')
+define(`n_param',`%rdx')
+define(`vp',     `%rcx')
+
+define(`v0', `%r8')
+define(`v1', `%r9')
+define(`w0', `%rbx')
+define(`w1', `%rcx')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+define(`n',  `%r11')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_addmul_2)
+       push    %rbx
+       push    %rbp
+
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       mov     n_param, n
+       neg     n
+       lea     -32(up,n_param,8), up
+       lea     -32(rp,n_param,8), rp
+
+       and     $3, R32(n_param)
+       jz      L(am2p0)
+       cmp     $2, R32(n_param)
+       jc      L(am2p1)
+       jz      L(am2p2)
+L(am2p3):
+       mov     32(up,n,8), %rax
+       mul     v0
+       mov     %rax, w1
+       mov     32(up,n,8), %rax
+       mov     %rdx, w2
+       xor     R32(w3), R32(w3)
+       add     $2, n
+       jmp     L(am3)
+L(am2p0):
+       mov     32(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     32(up,n,8), %rax
+       mov     %rdx, w1
+       xor     R32(w2), R32(w2)
+       add     $3, n
+       jmp     L(am0)
+L(am2p1):
+       mov     32(up,n,8), %rax
+       mul     v0
+       mov     %rax, w3
+       mov     32(up,n,8), %rax
+       mov     %rdx, w0
+       xor     R32(w1), R32(w1)
+       jmp     L(am1)
+L(am2p2):
+       mov     32(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     32(up,n,8), %rax
+       mov     %rdx, w3
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       add     $1, n
+       jmp     L(am2)
+
+       ALIGN(32)
+L(top):
+       add     w3, (rp,n,8)            C 0 21
+       adc     %rax, w0                C 1 24
+       mov     8(up,n,8), %rax
+       adc     %rdx, w1                C 3 26
+       mov     $0, R32(w2)
+       mul     v0
+       add     %rax, w0                C 2 26
+       mov     8(up,n,8), %rax
+       adc     %rdx, w1                C 4 28
+       adc     $0, R32(w2)             C 6 30
+L(am0):        mul     v1
+       add     w0, 8(rp,n,8)           C 3 27
+       adc     %rax, w1                C 6 30
+       adc     %rdx, w2                C 8 32
+       mov     16(up,n,8), %rax
+       mov     $0, R32(w3)
+       mul     v0
+       add     %rax, w1                C 8
+       mov     16(up,n,8), %rax
+       adc     %rdx, w2                C 10
+       adc     $0, R32(w3)             C 12
+L(am3):        mul     v1
+       add     w1, 16(rp,n,8)          C 9
+       adc     %rax, w2                C 12
+       mov     24(up,n,8), %rax
+       adc     %rdx, w3                C 14
+       mul     v0
+       mov     $0, R32(w0)
+       add     %rax, w2                C 14
+       adc     %rdx, w3                C 16
+       mov     $0, R32(w1)
+       mov     24(up,n,8), %rax
+       adc     $0, R32(w0)             C 18
+L(am2):        mul     v1
+       add     w2, 24(rp,n,8)          C 15
+       adc     %rax, w3                C 18
+       adc     %rdx, w0                C 20
+       mov     32(up,n,8), %rax
+       mul     v0
+       add     %rax, w3                C 20
+       mov     32(up,n,8), %rax
+       adc     %rdx, w0                C 22
+       adc     $0, R32(w1)             C 24
+L(am1):        mul     v1
+       add     $4, n
+       js      L(top)
+
+       add     w3, (rp,n,8)
+       adc     %rax, w0
+       adc     %rdx, w1
+       mov     w0, 8(rp,n,8)
+       mov     w1, %rax
+
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aorrlsh1_n.asm b/mpn/x86_64/aorrlsh1_n.asm

new file mode 100644 (file)

index 0000000..75fd009
--- /dev/null
+++ b/mpn/x86_64/aorrlsh1_n.asm
@@ -0,0 +1,150 @@
+dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Copyright 2003, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2
+C K10:          2
+C P4:           13
+C P6 core2:     3.45
+C P6 corei7:    3.45
+C P6 atom:      ?
+
+
+C Sometimes speed degenerates, supposedly related to that some operand
+C alignments cause cache conflicts.
+
+C The speed is limited by decoding/issue bandwidth.  There are 22 instructions
+C in the loop, which corresponds to ceil(22/3)/4 = 1.83 c/l.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_addlsh1_n', `
+       define(ADDSUB,        add)
+       define(ADCSBB,        adc)
+       define(func,          mpn_addlsh1_n)')
+ifdef(`OPERATION_rsblsh1_n', `
+       define(ADDSUB,        sub)
+       define(ADCSBB,        sbb)
+       define(func,          mpn_rsblsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       push    %rbp
+
+       mov     (vp), %r8
+       mov     R32(n), R32(%rax)
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       neg     n
+       xor     R32(%rbp), R32(%rbp)
+       and     $3, R32(%rax)
+       je      L(b00)
+       cmp     $2, R32(%rax)
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        add     %r8, %r8
+       mov     8(vp,n,8), %r9
+       adc     %r9, %r9
+       mov     16(vp,n,8), %r10
+       adc     %r10, %r10
+       sbb     R32(%rax), R32(%rax)    C save scy
+       ADDSUB  (up,n,8), %r8
+       ADCSBB  8(up,n,8), %r9
+       mov     %r8, (rp,n,8)
+       mov     %r9, 8(rp,n,8)
+       ADCSBB  16(up,n,8), %r10
+       mov     %r10, 16(rp,n,8)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       add     $3, n
+       jmp     L(ent)
+
+L(b10):        add     %r8, %r8
+       mov     8(vp,n,8), %r9
+       adc     %r9, %r9
+       sbb     R32(%rax), R32(%rax)    C save scy
+       ADDSUB  (up,n,8), %r8
+       ADCSBB  8(up,n,8), %r9
+       mov     %r8, (rp,n,8)
+       mov     %r9, 8(rp,n,8)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       add     $2, n
+       jmp     L(ent)
+
+L(b01):        add     %r8, %r8
+       sbb     R32(%rax), R32(%rax)    C save scy
+       ADDSUB  (up,n,8), %r8
+       mov     %r8, (rp,n,8)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       inc     n
+L(ent):        jns     L(end)
+
+       ALIGN(16)
+L(top):        add     R32(%rax), R32(%rax)    C restore scy
+
+       mov     (vp,n,8), %r8
+L(b00):        adc     %r8, %r8
+       mov     8(vp,n,8), %r9
+       adc     %r9, %r9
+       mov     16(vp,n,8), %r10
+       adc     %r10, %r10
+       mov     24(vp,n,8), %r11
+       adc     %r11, %r11
+
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+
+       ADCSBB  (up,n,8), %r8
+       nop                             C Hammer speedup!
+       ADCSBB  8(up,n,8), %r9
+       mov     %r8, (rp,n,8)
+       mov     %r9, 8(rp,n,8)
+       ADCSBB  16(up,n,8), %r10
+       ADCSBB  24(up,n,8), %r11
+       mov     %r10, 16(rp,n,8)
+       mov     %r11, 24(rp,n,8)
+
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       add     $4, n
+       js      L(top)
+
+L(end):
+ifdef(`OPERATION_addlsh1_n',`
+       add     R32(%rbp), R32(%rax)
+       neg     R32(%rax)')
+ifdef(`OPERATION_rsblsh1_n',`
+       sub     R32(%rax), R32(%rbp)
+       movslq  R32(%rbp), %rax')
+
+       pop     %rbp
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aorrlsh2_n.asm b/mpn/x86_64/aorrlsh2_n.asm

new file mode 100644 (file)

index 0000000..16cecef
--- /dev/null
+++ b/mpn/x86_64/aorrlsh2_n.asm
@@ -0,0 +1,154 @@
+dnl  AMD64 mpn_addlsh2_n and mpn_rsblsh2_n.  R = 2*V +- U.
+dnl  ("rsb" means reversed subtract, name mandated by mpn_sublsh2_n which
+dnl  subtacts the shifted operand from the unshifted operand.)
+
+dnl  Copyright 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2
+C K10:          2
+C P4:           ?
+C P6 core2:     3
+C P6 corei7:    2.75
+C P6 atom:      ?
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+
+ifdef(`OPERATION_addlsh2_n',`
+  define(ADDSUB,        `add')
+  define(ADCSBB,       `adc')
+  define(func, mpn_addlsh2_n)')
+ifdef(`OPERATION_rsblsh2_n',`
+  define(ADDSUB,        `sub')
+  define(ADCSBB,       `sbb')
+  define(func, mpn_rsblsh2_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     (vp), %r8
+       lea     (,%r8,4), %r12
+       shr     $62, %r8
+
+       mov     R32(n), R32(%rax)
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       neg     n
+       and     $3, R8(%rax)
+       je      L(b00)
+       cmp     $2, R8(%rax)
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        mov     8(vp,n,8), %r10
+       lea     (%r8,%r10,4), %r14
+       shr     $62, %r10
+       mov     16(vp,n,8), %r11
+       lea     (%r10,%r11,4), %r15
+       shr     $62, %r11
+       ADDSUB  (up,n,8), %r12
+       ADCSBB  8(up,n,8), %r14
+       ADCSBB  16(up,n,8), %r15
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r12, (rp,n,8)
+       mov     %r14, 8(rp,n,8)
+       mov     %r15, 16(rp,n,8)
+       add     $3, n
+       js      L(top)
+       jmp     L(end)
+
+L(b01):        mov     %r8, %r11
+       ADDSUB  (up,n,8), %r12
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r12, (rp,n,8)
+       add     $1, n
+       js      L(top)
+       jmp     L(end)
+
+L(b10):        mov     8(vp,n,8), %r11
+       lea     (%r8,%r11,4), %r15
+       shr     $62, %r11
+       ADDSUB  (up,n,8), %r12
+       ADCSBB  8(up,n,8), %r15
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r12, (rp,n,8)
+       mov     %r15, 8(rp,n,8)
+       add     $2, n
+       js      L(top)
+       jmp     L(end)
+
+L(b00):        mov     8(vp,n,8), %r9
+       mov     16(vp,n,8), %r10
+       jmp     L(e00)
+
+       ALIGN(16)
+L(top):        mov     16(vp,n,8), %r10
+       mov     (vp,n,8), %r8
+       mov     8(vp,n,8), %r9
+       lea     (%r11,%r8,4), %r12
+       shr     $62, %r8
+L(e00):        lea     (%r8,%r9,4), %r13
+       shr     $62, %r9
+       mov     24(vp,n,8), %r11
+       lea     (%r9,%r10,4), %r14
+       shr     $62, %r10
+       lea     (%r10,%r11,4), %r15
+       shr     $62, %r11
+       add     R32(%rax), R32(%rax)              C restore carry
+       ADCSBB  (up,n,8), %r12
+       ADCSBB  8(up,n,8), %r13
+       ADCSBB  16(up,n,8), %r14
+       ADCSBB  24(up,n,8), %r15
+       mov     %r12, (rp,n,8)
+       mov     %r13, 8(rp,n,8)
+       mov     %r14, 16(rp,n,8)
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r15, 24(rp,n,8)
+       add     $4, n
+       js      L(top)
+L(end):
+
+ifdef(`OPERATION_addlsh2_n',`
+       sub     R32(%r11), R32(%rax)
+       neg     R32(%rax)')
+ifdef(`OPERATION_rsblsh2_n',`
+       add     R32(%r11), R32(%rax)
+       movslq  R32(%rax), %rax')
+
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aorrlsh_n.asm b/mpn/x86_64/aorrlsh_n.asm

new file mode 100644 (file)

index 0000000..9aa8af9
--- /dev/null
+++ b/mpn/x86_64/aorrlsh_n.asm
@@ -0,0 +1,161 @@
+dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
+dnl  ("rsb" means reversed subtract, name mandated by mpn_sublsh1_n which
+dnl  subtacts the shifted operand from the unshifted operand.)
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        3.25   (mpn_lshift + mpn_add_n costs about 4.1 c/l)
+C K10:          3.25   (mpn_lshift + mpn_add_n costs about 4.1 c/l)
+C P4:          14
+C P6-15:        4
+
+C This was written quickly and not optimized at all.  Surely one could get
+C closer to 3 c/l or perhaps even under 3 c/l.  Ideas:
+C   1) Use indexing to save the 3 LEA
+C   2) Write reasonable feed-in code
+C   3) Be more clever about register usage
+C   4) Unroll more, handling CL negation, carry save/restore cost much now
+C   5) Reschedule
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cnt',  `%r8')
+
+ifdef(`OPERATION_addlsh_n',`
+  define(ADDSUBC,       `adc')
+  define(func, mpn_addlsh_n)
+')
+ifdef(`OPERATION_rsblsh_n',`
+  define(ADDSUBC,       `sbb')
+  define(func, mpn_rsblsh_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       push    %rbx
+
+       mov     n, %rax
+       xor     %ebx, %ebx              C clear carry save register
+       mov     %r8d, %ecx              C shift count
+       xor     %r15d, %r15d            C limb carry
+
+       mov     %eax, %r11d
+       and     $3, %r11d
+       je      L(4)
+       sub     $1, %r11d
+
+L(oopette):
+       mov     0(vp), %r8
+       mov     %r8, %r12
+       shl     %cl, %r8
+       or      %r15, %r8
+       neg     %cl
+       mov     %r12, %r15
+       shr     %cl, %r15
+       neg     %cl
+       add     %ebx, %ebx
+       ADDSUBC 0(up), %r8
+       mov     %r8, 0(rp)
+       sbb     %ebx, %ebx
+       lea     8(up), up
+       lea     8(vp), vp
+       lea     8(rp), rp
+       sub     $1, %r11d
+       jnc     L(oopette)
+
+L(4):
+       sub     $4, %rax
+       jc      L(end)
+
+L(oop):
+       mov     0(vp), %r8
+       mov     %r8, %r12
+       mov     8(vp), %r9
+       mov     %r9, %r13
+       mov     16(vp), %r10
+       mov     %r10, %r14
+       mov     24(vp), %r11
+
+       shl     %cl, %r8
+       shl     %cl, %r9
+       shl     %cl, %r10
+       or      %r15, %r8
+       mov     %r11, %r15
+       shl     %cl, %r11
+
+       neg     %cl
+
+       shr     %cl, %r12
+       shr     %cl, %r13
+       shr     %cl, %r14
+       shr     %cl, %r15               C used next loop
+
+       or      %r12, %r9
+       or      %r13, %r10
+       or      %r14, %r11
+
+       neg     %cl
+
+       add     %ebx, %ebx              C restore carry flag
+
+       ADDSUBC 0(up), %r8
+       ADDSUBC 8(up), %r9
+       ADDSUBC 16(up), %r10
+       ADDSUBC 24(up), %r11
+
+       mov     %r8, 0(rp)
+       mov     %r9, 8(rp)
+       mov     %r10, 16(rp)
+       mov     %r11, 24(rp)
+
+       sbb     %ebx, %ebx              C save carry flag
+
+       lea     32(up), up
+       lea     32(vp), vp
+       lea     32(rp), rp
+
+       sub     $4, %rax
+       jnc     L(oop)
+L(end):
+       add     %ebx, %ebx
+       ADDSUBC $0, %r15
+       mov     %r15, %rax
+       pop     %rbx
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aors_n.asm b/mpn/x86_64/aors_n.asm

new file mode 100644 (file)

index 0000000..7928f61
--- /dev/null
+++ b/mpn/x86_64/aors_n.asm
@@ -0,0 +1,145 @@
+dnl  AMD64 mpn_add_n, mpn_sub_n
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     1.5
+C AMD K10       1.5
+C Intel P4      ?
+C Intel core2   4.9
+C Intel corei   ?
+C Intel atom    4
+C VIA nano      3.25
+
+C The inner loop of this code is the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')          C (only for mpn_add_nc)
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_nc)
+       mov     R32(n), R32(%rax)
+       shr     $2, n
+       and     $3, R32(%rax)
+       bt      $0, %r8                 C cy flag <- carry parameter
+       jrcxz   L(lt4)
+
+       mov     (up), %r8
+       mov     8(up), %r9
+       dec     n
+       jmp     L(mid)
+
+EPILOGUE()
+       ALIGN(16)
+PROLOGUE(func)
+       mov     R32(n), R32(%rax)
+       shr     $2, n
+       and     $3, R32(%rax)
+       jrcxz   L(lt4)
+
+       mov     (up), %r8
+       mov     8(up), %r9
+       dec     n
+       jmp     L(mid)
+
+L(lt4):        dec     R32(%rax)
+       mov     (up), %r8
+       jnz     L(2)
+       ADCSBB  (vp), %r8
+       mov     %r8, (rp)
+       adc     %eax, %eax
+       ret
+
+L(2):  dec     R32(%rax)
+       mov     8(up), %r9
+       jnz     L(3)
+       ADCSBB  (vp), %r8
+       ADCSBB  8(vp), %r9
+       mov     %r8, (rp)
+       mov     %r9, 8(rp)
+       adc     %eax, %eax
+       ret
+
+L(3):  mov     16(up), %r10
+       ADCSBB  (vp), %r8
+       ADCSBB  8(vp), %r9
+       ADCSBB  16(vp), %r10
+       mov     %r8, (rp)
+       mov     %r9, 8(rp)
+       mov     %r10, 16(rp)
+       setc    R8(%rax)
+       ret
+
+       ALIGN(16)
+L(top):        ADCSBB  (vp), %r8
+       ADCSBB  8(vp), %r9
+       ADCSBB  16(vp), %r10
+       ADCSBB  24(vp), %r11
+       mov     %r8, (rp)
+       lea     32(up), up
+       mov     %r9, 8(rp)
+       mov     %r10, 16(rp)
+       dec     n
+       mov     %r11, 24(rp)
+       lea     32(vp), vp
+       mov     (up), %r8
+       mov     8(up), %r9
+       lea     32(rp), rp
+L(mid):        mov     16(up), %r10
+       mov     24(up), %r11
+       jnz     L(top)
+
+L(end):        lea     32(up), up
+       ADCSBB  (vp), %r8
+       ADCSBB  8(vp), %r9
+       ADCSBB  16(vp), %r10
+       ADCSBB  24(vp), %r11
+       lea     32(vp), vp
+       mov     %r8, (rp)
+       mov     %r9, 8(rp)
+       mov     %r10, 16(rp)
+       mov     %r11, 24(rp)
+       lea     32(rp), rp
+
+       inc     R32(%rax)
+       dec     R32(%rax)
+       jnz     L(lt4)
+       adc     %eax, %eax
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..cbf4b46
--- /dev/null
+++ b/mpn/x86_64/aorsmul_1.asm
@@ -0,0 +1,148 @@
+dnl  AMD64 mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.5
+C K10:          2.5
+C P4:          14.9
+C P6 core2:     5.09
+C P6 corei7:
+C P6 atom:     21.3
+
+C The inner loop of this code is the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO:
+C  * The inner loop is great, but the prologue and epilogue code was
+C    quickly written.  Tune it!
+
+C INPUT PARAMETERS
+define(`rp',    `%rdi')
+define(`up',    `%rsi')
+define(`n_param',`%rdx')
+define(`vl',    `%rcx')
+
+define(`n',    `%r11')
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       mov     (up), %rax              C read first u limb early
+       push    %rbx
+       mov     n_param, %rbx           C move away n from rdx, mul uses it
+       mul     vl
+       mov     %rbx, %r11
+
+       and     $3, R32(%rbx)
+       jz      L(b0)
+       cmp     $2, R32(%rbx)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): dec     n
+       jne     L(gt1)
+       ADDSUB  %rax, (rp)
+       jmp     L(ret)
+L(gt1):        lea     8(up,n,8), up
+       lea     -8(rp,n,8), rp
+       neg     n
+       xor     %r10, %r10
+       xor     R32(%rbx), R32(%rbx)
+       mov     %rax, %r9
+       mov     (up,n,8), %rax
+       mov     %rdx, %r8
+       jmp     L(L1)
+
+L(b0): lea     (up,n,8), up
+       lea     -16(rp,n,8), rp
+       neg     n
+       xor     %r10, %r10
+       mov     %rax, %r8
+       mov     %rdx, %rbx
+       jmp      L(L0)
+
+L(b3): lea     -8(up,n,8), up
+       lea     -24(rp,n,8), rp
+       neg     n
+       mov     %rax, %rbx
+       mov     %rdx, %r10
+       jmp     L(L3)
+
+L(b2): lea     -16(up,n,8), up
+       lea     -32(rp,n,8), rp
+       neg     n
+       xor     %r8, %r8
+       xor     R32(%rbx), R32(%rbx)
+       mov     %rax, %r10
+       mov     24(up,n,8), %rax
+       mov     %rdx, %r9
+       jmp     L(L2)
+
+       ALIGN(16)
+L(top):        ADDSUB  %r10, (rp,n,8)
+       adc     %rax, %r9
+       mov     (up,n,8), %rax
+       adc     %rdx, %r8
+       mov     $0, %r10d
+L(L1): mul     vl
+       ADDSUB  %r9, 8(rp,n,8)
+       adc     %rax, %r8
+       adc     %rdx, %rbx
+L(L0): mov     8(up,n,8), %rax
+       mul     vl
+       ADDSUB  %r8, 16(rp,n,8)
+       adc     %rax, %rbx
+       adc     %rdx, %r10
+L(L3): mov     16(up,n,8), %rax
+       mul     vl
+       ADDSUB  %rbx, 24(rp,n,8)
+       mov     $0, %r8d                # zero
+       mov     %r8, %rbx               # zero
+       adc     %rax, %r10
+       mov     24(up,n,8), %rax
+       mov     %r8, %r9                # zero
+       adc     %rdx, %r9
+L(L2): mul     vl
+       add     $4, n
+       js       L(top)
+
+       ADDSUB  %r10, (rp,n,8)
+       adc     %rax, %r9
+       adc     %r8, %rdx
+       ADDSUB  %r9, 8(rp,n,8)
+L(ret):        adc     $0, %rdx
+       mov     %rdx, %rax
+
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/aors_n.asm b/mpn/x86_64/atom/aors_n.asm

new file mode 100644 (file)

index 0000000..6319ae8
--- /dev/null
+++ b/mpn/x86_64/atom/aors_n.asm
@@ -0,0 +1,145 @@
+dnl  X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        1.85
+C K10:          ?
+C P4:           ?
+C P6-15 (Core2): ?
+C P6-28 (Atom):         3
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')          C (only for mpn_add_nc)
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_nc)
+       jmp     L(ent)
+EPILOGUE()
+PROLOGUE(func)
+       xor     %r8, %r8
+L(ent):
+       mov     R32(%rcx), R32(%rax)
+       shr     $2, %rcx
+       and     $3, R32(%rax)
+       jz      L(b0)
+       cmp     $2, R32(%rax)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): mov     (%rsi), %r10
+       test    %rcx, %rcx
+       jnz     L(gt1)
+       shr     R32(%r8)                        C Set CF from argument
+       ADCSBB  (%rdx), %r10
+       mov     %r10, (%rdi)
+       mov     R32(%rcx), R32(%rax)            C zero rax
+       adc     R32(%rax), R32(%rax)
+       ret
+L(gt1):        shr     R32(%r8)
+       ADCSBB  (%rdx), %r10
+       mov     8(%rsi), %r11
+       lea     16(%rsi), %rsi
+       lea     -16(%rdx), %rdx
+       lea     -16(%rdi), %rdi
+       jmp     L(m1)
+
+L(b2): mov     (%rsi), %r9
+       mov     8(%rsi), %r10
+       lea     -8(%rdx), %rdx
+       test    %rcx, %rcx
+       jnz     L(gt2)
+       shr     R32(%r8)
+       lea     -40(%rdi), %rdi
+       jmp     L(e2)
+L(gt2):        shr     R32(%r8)
+       ADCSBB  8(%rdx), %r9
+       mov     16(%rsi), %r11
+       lea     -8(%rsi), %rsi
+       lea     -8(%rdi), %rdi
+       jmp     L(m2)
+
+L(b3): mov     (%rsi), %rax
+       mov     8(%rsi), %r9
+       mov     16(%rsi), %r10
+       test    %rcx, %rcx
+       jnz     L(gt3)
+       shr     R32(%r8)
+       lea     -32(%rdi), %rdi
+       jmp     L(e3)
+L(gt3):        shr     R32(%r8)
+       ADCSBB  (%rdx), %rax
+       jmp     L(m3)
+
+L(b0): mov     (%rsi), %r11
+       neg     R32(%r8)
+       lea     -24(%rdx), %rdx
+       lea     -24(%rdi), %rdi
+       lea     8(%rsi), %rsi
+       jmp     L(m0)
+
+       ALIGN(8)
+L(top):        mov     %r11, 24(%rdi)
+       ADCSBB  (%rdx), %rax
+       lea     32(%rdi), %rdi
+L(m3): mov     %rax, (%rdi)
+       ADCSBB  8(%rdx), %r9
+       mov     24(%rsi), %r11
+L(m2): mov     %r9, 8(%rdi)
+       ADCSBB  16(%rdx), %r10
+       lea     32(%rsi), %rsi
+L(m1): mov     %r10, 16(%rdi)
+L(m0): ADCSBB  24(%rdx), %r11
+       mov     (%rsi), %rax
+       mov     8(%rsi), %r9
+       lea     32(%rdx), %rdx
+       dec     %rcx
+       mov     16(%rsi), %r10
+       jnz     L(top)
+
+       mov     %r11, 24(%rdi)
+L(e3): ADCSBB  (%rdx), %rax
+       mov     %rax, 32(%rdi)
+L(e2): ADCSBB  8(%rdx), %r9
+       mov     %r9, 40(%rdi)
+L(e1): ADCSBB  16(%rdx), %r10
+       mov     %r10, 48(%rdi)
+       mov     R32(%rcx), R32(%rax)            C zero rax
+       adc     R32(%rax), R32(%rax)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/gmp-mparam.h b/mpn/x86_64/atom/gmp-mparam.h

new file mode 100644 (file)

index 0000000..a124f3c
--- /dev/null
+++ b/mpn/x86_64/atom/gmp-mparam.h
@@ -0,0 +1,187 @@
+/* Intel Atom/64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         37
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     69
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           32
+
+#define MUL_TOOM22_THRESHOLD                10
+#define MUL_TOOM33_THRESHOLD                66
+#define MUL_TOOM44_THRESHOLD               118
+#define MUL_TOOM6H_THRESHOLD               157
+#define MUL_TOOM8H_THRESHOLD               236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      76
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 16
+#define SQR_TOOM3_THRESHOLD                 65
+#define SQR_TOOM4_THRESHOLD                166
+#define SQR_TOOM6_THRESHOLD                226
+#define SQR_TOOM8_THRESHOLD                333
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD                9
+
+#define MUL_FFT_MODF_THRESHOLD             208  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    208, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
+    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {      9, 6}, \
+    {     19, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
+    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     47,11}, {     15,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     67,10}, {     39, 9}, {     79, 8}, \
+    {    159,10}, {     47,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 7}, {    511,10}, {     71, 9}, \
+    {    143, 8}, {    287, 7}, {    575,10}, {     79, 9}, \
+    {    159, 8}, {    319,11}, {     47, 9}, {    191,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287, 8}, {    575,11}, \
+    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
+    {    351, 8}, {    703, 7}, {   1407,10}, {    191, 9}, \
+    {    415,11}, {    111,10}, {    223, 9}, {    447,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
+    {    143,10}, {    287, 9}, {    575, 8}, {   1151,10}, \
+    {    319,11}, {    175,10}, {    351, 9}, {    703, 8}, \
+    {   1407,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    415,11}, {    223,10}, {    447, 9}, {    895,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
+    {    287,10}, {    575, 9}, {   1151,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,12}, {    223,11}, {    447,10}, {    895,11}, \
+    {    479,13}, {    127,12}, {    255,11}, {    511,12}, \
+    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
+    {    639,12}, {    351,11}, {    703,10}, {   1407,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,14}, \
+    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    575,11}, {   1151,13}, {    319,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    831,13}, {    447,12}, \
+    {    895,14}, {    255,13}, {    511,12}, {   1023,13}, \
+    {    575,12}, {   1151,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    831,12}, {   1663,13}, {    895,15}, \
+    {    255,14}, {    511,13}, {   1023,12}, {   2175,13}, \
+    {   1151,14}, {    639,13}, {   1407,12}, {   2815,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2047,14}, {   1151,13}, \
+    {   2431,14}, {   1407,13}, {   2815,15}, {    767,14}, \
+    {   1663,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 193
+#define MUL_FFT_THRESHOLD                 1728
+
+#define SQR_FFT_MODF_THRESHOLD             208  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    208, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
+    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
+    {     17, 7}, {     17, 8}, {      9, 7}, {     19, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     31, 9}, {     19, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     47,11}, {     15,10}, {     31, 9}, {     67,10}, \
+    {     39, 9}, {     79, 8}, {    159,10}, {     47,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
+    {     71, 9}, {    143, 8}, {    287, 7}, {    575, 9}, \
+    {    159, 8}, {    319,11}, {     47, 9}, {    191,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287, 8}, {    575,10}, \
+    {    159, 9}, {    319, 8}, {    639, 9}, {    351, 8}, \
+    {    703,10}, {    191, 9}, {    383,10}, {    207, 9}, \
+    {    415,11}, {    111,10}, {    223,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,11}, {    143,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    175,10}, {    351, 9}, {    703,11}, \
+    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
+    {    223,10}, {    447,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,12}, {    223,11}, {    447,13}, {    127,12}, \
+    {    255,11}, {    543,12}, {    287,11}, {    575,12}, \
+    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
+    {    479,13}, {    255,10}, {   2047,12}, {    575,13}, \
+    {    319,11}, {   1279,12}, {    703,13}, {    383,12}, \
+    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
+    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
+    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
+    {    895,15}, {    255,14}, {    511,13}, {   1151,14}, \
+    {    639,13}, {   1407,12}, {   2815,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1791,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 160
+#define SQR_FFT_THRESHOLD                 1600
+
+#define MULLO_BASECASE_THRESHOLD             0
+#define MULLO_DC_THRESHOLD                  22
+#define MULLO_MUL_N_THRESHOLD             3176
+
+#define DC_DIV_QR_THRESHOLD                 26
+#define DC_DIVAPPR_Q_THRESHOLD              93
+#define DC_BDIV_QR_THRESHOLD                27
+#define DC_BDIV_Q_THRESHOLD                 62
+
+#define INV_MULMOD_BNM1_THRESHOLD           18
+#define INV_NEWTON_THRESHOLD               131
+#define INV_APPR_THRESHOLD                 110
+
+#define BINV_NEWTON_THRESHOLD              165
+#define REDC_1_TO_REDC_2_THRESHOLD          12
+#define REDC_2_TO_REDC_N_THRESHOLD          36
+
+#define MU_DIV_QR_THRESHOLD                792
+#define MU_DIVAPPR_Q_THRESHOLD             807
+#define MUPI_DIV_QR_THRESHOLD               67
+#define MU_BDIV_QR_THRESHOLD               654
+#define MU_BDIV_Q_THRESHOLD                792
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                      83
+#define GCD_DC_THRESHOLD                   198
+#define GCDEXT_DC_THRESHOLD                198
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               254
+#define SET_STR_PRECOMPUTE_THRESHOLD      1122
diff --git a/mpn/x86_64/bd1/gmp-mparam.h b/mpn/x86_64/bd1/gmp-mparam.h

new file mode 100644 (file)

index 0000000..73890fe
--- /dev/null
+++ b/mpn/x86_64/bd1/gmp-mparam.h
@@ -0,0 +1,182 @@
+/* AMD Bulldozer-1 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           24
+
+#define MUL_TOOM22_THRESHOLD                18
+#define MUL_TOOM33_THRESHOLD                53
+#define MUL_TOOM44_THRESHOLD               154
+#define MUL_TOOM6H_THRESHOLD               274
+#define MUL_TOOM8H_THRESHOLD               466
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     140
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     109
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_TOOM3_THRESHOLD                 85
+#define SQR_TOOM4_THRESHOLD                119
+#define SQR_TOOM6_THRESHOLD                318
+#define SQR_TOOM8_THRESHOLD                502
+
+#define MULMOD_BNM1_THRESHOLD               11
+#define SQRMOD_BNM1_THRESHOLD               16
+
+#define MUL_FFT_MODF_THRESHOLD             412  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    412, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     13, 7}, {     28, 8}, {     15, 7}, {     31, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {    103,12}, \
+    {     31,11}, {     63,10}, {    127,11}, {     79,10}, \
+    {    175,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255,11}, {    143,10}, {    287,11}, \
+    {    159,12}, {     95,13}, {     63,12}, {    127,11}, \
+    {    271, 9}, {   1087,11}, {    287,10}, {    575,11}, \
+    {    303,12}, {    159,11}, {    319,10}, {    671,11}, \
+    {    351,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,12}, {    223,11}, {    447,13}, {    127,12}, \
+    {    255,11}, {    543,12}, {    287,11}, {    575,10}, \
+    {   1215,12}, {    319,11}, {    639,12}, {    351,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,10}, {   1663,12}, {    447,14}, {    127,13}, \
+    {    255,12}, {    543,11}, {   1087,10}, {   2175,12}, \
+    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,10}, {   2687,12}, {    703,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    831,13}, {    447,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
+    {   2175,13}, {    575,12}, {   1215,11}, {   2431,10}, \
+    {   4863,13}, {    639,12}, {   1343,11}, {   2687,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    959,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,12}, {   2431,11}, {   4863,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,11}, \
+    {   5887,14}, {    767,13}, {   1599,12}, {   3199,13}, \
+    {   1727,14}, {    895,13}, {   1919,12}, {   3839,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 168
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             368  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    368, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
+    {    135,11}, {     79,10}, {    159,11}, {     95,10}, \
+    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    543,11}, {    143, 9}, {    575,12}, \
+    {     95,11}, {    191,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    271,10}, {    543,11}, \
+    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
+    {    335,12}, {    191,11}, {    415,12}, {    223,11}, \
+    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
+    {    319,11}, {    639,10}, {   1279,12}, {    351,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,14}, \
+    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
+    {   1151,12}, {    607,13}, {    319,12}, {    639,11}, \
+    {   1279,12}, {    671,11}, {   1343,10}, {   2687,12}, \
+    {    703,13}, {    383,12}, {    767,11}, {   1599,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1087,11}, {   2175,13}, {    575,12}, \
+    {   1151,11}, {   2303,12}, {   1215,11}, {   2431,10}, \
+    {   4863,13}, {    639,12}, {   1343,11}, {   2687,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1599,13}, {    831,12}, {   1727,13}, {    895,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,12}, {   2431,11}, {   4863,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,11}, \
+    {   5887,14}, {    767,13}, {   1599,12}, {   3199,13}, \
+    {   1727,14}, {    895,13}, {   1919,12}, {   3839,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 172
+#define SQR_FFT_THRESHOLD                 3264
+
+#define MULLO_BASECASE_THRESHOLD             4
+#define MULLO_DC_THRESHOLD                  30
+#define MULLO_MUL_N_THRESHOLD             8648
+
+#define DC_DIV_QR_THRESHOLD                 38
+#define DC_DIVAPPR_Q_THRESHOLD             187
+#define DC_BDIV_QR_THRESHOLD                48
+#define DC_BDIV_Q_THRESHOLD                 92
+
+#define INV_MULMOD_BNM1_THRESHOLD           49
+#define INV_NEWTON_THRESHOLD               202
+#define INV_APPR_THRESHOLD                 197
+
+#define BINV_NEWTON_THRESHOLD              246
+#define REDC_1_TO_REDC_2_THRESHOLD          55
+#define REDC_2_TO_REDC_N_THRESHOLD           0  /* anomaly: never REDC_2 */
+
+#define MU_DIV_QR_THRESHOLD               1470
+#define MU_DIVAPPR_Q_THRESHOLD            1470
+#define MUPI_DIV_QR_THRESHOLD               90
+#define MU_BDIV_QR_THRESHOLD              1187
+#define MU_BDIV_Q_THRESHOLD               1470
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      96
+#define GCD_DC_THRESHOLD                   400
+#define GCDEXT_DC_THRESHOLD                288
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               172
+#define SET_STR_PRECOMPUTE_THRESHOLD      1341
diff --git a/mpn/x86_64/bdiv_dbm1c.asm b/mpn/x86_64/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..d6775ae
--- /dev/null
+++ b/mpn/x86_64/bdiv_dbm1c.asm
@@ -0,0 +1,99 @@
+dnl  x86_64 mpn_bdiv_dbm1.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.25
+C K10:           ?
+C P4:          12.5
+C P6 core2:     4.0
+C P6 corei7:    3.8
+C P6 atom:     20
+
+C TODO
+C  * Do proper 4-way feed-in instead of the current epilogue
+
+C INPUT PARAMETERS shared
+define(`qp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`bd',   `%rcx')
+define(`cy',   `%r8')
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+       mov     (%rsi), %rax
+       mov     %rdx, %r9               C n
+
+       mul     %rcx
+       sub     %rax, %r8
+       mov     %r8, (%rdi)
+       sbb     %rdx, %r8
+
+       lea     (%rsi,%r9,8), %rsi
+       lea     (%rdi,%r9,8), %rdi
+       neg     %r9
+       add     $4, %r9
+       jns     L(end)
+       ALIGN(16)
+L(top):
+       mov     -24(%rsi,%r9,8), %rax
+       mul     %rcx
+       sub     %rax, %r8
+       mov     %r8, -24(%rdi,%r9,8)
+       sbb     %rdx, %r8
+L(3):
+       mov     -16(%rsi,%r9,8), %rax
+       mul     %rcx
+       sub     %rax, %r8
+       mov     %r8, -16(%rdi,%r9,8)
+       sbb     %rdx, %r8
+L(2):
+       mov     -8(%rsi,%r9,8), %rax
+       mul     %rcx
+       sub     %rax, %r8
+       mov     %r8, -8(%rdi,%r9,8)
+       sbb     %rdx, %r8
+L(1):
+       mov     (%rsi,%r9,8), %rax
+       mul     %rcx
+       sub     %rax, %r8
+       mov     %r8, (%rdi,%r9,8)
+       sbb     %rdx, %r8
+
+       add     $4, %r9
+       js      L(top)
+L(end):
+       je      L(3x)
+       cmp     $2, %r9
+       jg      L(ret)
+       mov     $-1, %r9
+       je      L(1)
+       jmp     L(2)
+L(3x):
+       dec     %r9
+       jmp     L(3)
+
+L(ret):        mov     %r8, %rax
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bdiv_q_1.asm b/mpn/x86_64/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..2356f2b
--- /dev/null
+++ b/mpn/x86_64/bdiv_q_1.asm
@@ -0,0 +1,153 @@
+dnl  AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by
+dnl  1-limb divisor, returning quotient only.
+
+dnl  Copyright 2001, 2002, 2004, 2005, 2006, 2009 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:       10
+C K10:         10
+C P4:          33
+C P6 core2:    13.25
+C P6 corei7:   14
+C P6 atom:     42
+
+
+C INPUT PARAMETERS
+C rp           rdi
+C up           rsi
+C n            rdx
+C d            rcx
+C di           r8      just mpn_pi1_bdiv_q_1
+C shift                r9      just mpn_pi1_bdiv_q_1
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+       push    %rbx
+
+       mov     %rcx, %rax
+       xor     R32(%rcx), R32(%rcx)    C shift count
+       mov     %rdx, %r10
+
+       bt      $0, R32(%rax)
+       jnc     L(evn)                  C skip bsfq unless divisor is even
+
+L(odd):        mov     %rax, %rbx
+       shr     R32(%rax)
+       and     $127, R32(%rax)         C d/2, 7 bits
+
+ifdef(`PIC',`
+       mov     binvert_limb_table@GOTPCREL(%rip), %rdx
+',`
+       movabs  $binvert_limb_table, %rdx
+')
+
+       movzbl  (%rdx,%rax), R32(%rax)  C inv 8 bits
+
+       mov     %rbx, %r11              C d without twos
+
+       lea     (%rax,%rax), R32(%rdx)  C 2*inv
+       imul    R32(%rax), R32(%rax)    C inv*inv
+       imul    R32(%rbx), R32(%rax)    C inv*inv*d
+       sub     R32(%rax), R32(%rdx)    C inv = 2*inv - inv*inv*d, 16 bits
+
+       lea     (%rdx,%rdx), R32(%rax)  C 2*inv
+       imul    R32(%rdx), R32(%rdx)    C inv*inv
+       imul    R32(%rbx), R32(%rdx)    C inv*inv*d
+       sub     R32(%rdx), R32(%rax)    C inv = 2*inv - inv*inv*d, 32 bits
+
+       lea     (%rax,%rax), %r8        C 2*inv
+       imul    %rax, %rax              C inv*inv
+       imul    %rbx, %rax              C inv*inv*d
+       sub     %rax, %r8               C inv = 2*inv - inv*inv*d, 64 bits
+
+       jmp     L(com)
+
+L(evn):        bsf     %rax, %rcx
+       shr     R8(%rcx), %rax
+       jmp     L(odd)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+       push    %rbx
+
+       mov     %rcx, %r11              C d
+       mov     %rdx, %r10              C n
+       mov     %r9, %rcx               C shift
+L(com):
+       mov     (%rsi), %rax            C up[0]
+
+       dec     %r10
+       jz      L(one)
+
+       mov     8(%rsi), %rdx           C up[1]
+       lea     (%rsi,%r10,8), %rsi     C up end
+       lea     (%rdi,%r10,8), %rdi     C rp end
+       neg     %r10                    C -n
+
+       shrd    R8(%rcx), %rdx, %rax
+
+       xor     R32(%rbx), R32(%rbx)
+       jmp     L(ent)
+
+       ALIGN(8)
+L(top):
+       C rax   q
+       C rbx   carry bit, 0 or 1
+       C rcx   shift
+       C rdx
+       C rsi   up end
+       C rdi   rp end
+       C r10   counter, limbs, negative
+
+       mul     %r11                    C carry limb in rdx
+       mov     (%rsi,%r10,8), %rax
+       mov     8(%rsi,%r10,8), %r9
+       shrd    R8(%rcx), %r9, %rax
+       nop
+       sub     %rbx, %rax              C apply carry bit
+       setc    R8(%rbx)
+       sub     %rdx, %rax              C apply carry limb
+       adc     $0, %rbx
+L(ent):        imul    %r8, %rax
+       mov     %rax, (%rdi,%r10,8)
+       inc     %r10
+       jnz     L(top)
+
+       mul     %r11                    C carry limb in rdx
+       mov     (%rsi), %rax            C up high limb
+       shr     R8(%rcx), %rax
+       sub     %rbx, %rax              C apply carry bit
+       sub     %rdx, %rax              C apply carry limb
+       imul    %r8, %rax
+       mov     %rax, (%rdi)
+       pop     %rbx
+       ret
+
+L(one):        shr     R8(%rcx), %rax
+       imul    %r8, %rax
+       mov     %rax, (%rdi)
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bobcat/gmp-mparam.h b/mpn/x86_64/bobcat/gmp-mparam.h

new file mode 100644 (file)

index 0000000..02ce938
--- /dev/null
+++ b/mpn/x86_64/bobcat/gmp-mparam.h
@@ -0,0 +1,179 @@
+/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           24
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                45
+#define MUL_TOOM44_THRESHOLD               336
+#define MUL_TOOM6H_THRESHOLD               426
+#define MUL_TOOM8H_THRESHOLD               446
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      98
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     103
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_TOOM3_THRESHOLD                 77
+#define SQR_TOOM4_THRESHOLD                354
+#define SQR_TOOM6_THRESHOLD                366
+#define SQR_TOOM8_THRESHOLD                430
+
+#define MULMOD_BNM1_THRESHOLD               11
+#define SQRMOD_BNM1_THRESHOLD               13
+
+#define MUL_FFT_MODF_THRESHOLD             400  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    400, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
+    {     25, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     49, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39,10}, {     23, 9}, {     55,11}, \
+    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     87,11}, {     47,10}, {    103,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    159, 9}, {    319,10}, {    167,11}, {     95,10}, \
+    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    271,11}, {    143,10}, {    303, 9}, {    607,11}, \
+    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
+    {    383,11}, {    207,13}, {     63,12}, {    127,11}, \
+    {    271,10}, {    543,11}, {    287,10}, {    575,11}, \
+    {    303,10}, {    607,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703,11}, {    367,12}, \
+    {    191,11}, {    415,12}, {    223,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
+    {    607,12}, {    319,11}, {    639,12}, {    351,11}, \
+    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
+    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
+    {    255,12}, {    607,13}, {    319,12}, {    703,13}, \
+    {    383,12}, {    831,13}, {    447,12}, {    895,14}, \
+    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
+    {   1151,13}, {    703,14}, {    383,13}, {    831,12}, \
+    {   1663,13}, {    895,15}, {    255,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1151,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1407,14}, {    767,13}, \
+    {   1663,14}, {    895,15}, {    511,14}, {   1023,13}, \
+    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
+    {   1279,13}, {   2687,14}, {   1407,15}, {    767,14}, \
+    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 160
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    340, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    135, 9}, {    271,11}, {     79,10}, {    159, 9}, \
+    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,11}, {    143,10}, {    287, 9}, \
+    {    575,10}, {    303, 9}, {    607,11}, {    159,10}, \
+    {    319, 9}, {    639,12}, {     95,11}, {    191,10}, \
+    {    383,11}, {    207,10}, {    415,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,12}, {    223,11}, {    479,13}, {    127,12}, \
+    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
+    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    479,14}, {    127,13}, {    255,12}, \
+    {    607,13}, {    319,12}, {    703,13}, {    383,12}, \
+    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
+    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
+    {    703,14}, {    383,13}, {    895,15}, {    255,14}, \
+    {    511,13}, {   1087,12}, {   2175,13}, {   1151,14}, \
+    {    639,13}, {   1343,12}, {   2687,14}, {    767,13}, \
+    {   1599,12}, {   3199,13}, {   1663,14}, {    895,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,15}, \
+    {    767,14}, {   1535,13}, {   3199,14}, {   1663,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 160
+#define SQR_FFT_THRESHOLD                 3264
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  43
+#define MULLO_MUL_N_THRESHOLD             9174
+
+#define DC_DIV_QR_THRESHOLD                 43
+#define DC_DIVAPPR_Q_THRESHOLD             142
+#define DC_BDIV_QR_THRESHOLD                44
+#define DC_BDIV_Q_THRESHOLD                 80
+
+#define INV_MULMOD_BNM1_THRESHOLD           42
+#define INV_NEWTON_THRESHOLD               181
+#define INV_APPR_THRESHOLD                 157
+
+#define BINV_NEWTON_THRESHOLD              230
+#define REDC_1_TO_REDC_2_THRESHOLD          54
+#define REDC_2_TO_REDC_N_THRESHOLD           0  /* anomaly: never REDC_2 */
+
+#define MU_DIV_QR_THRESHOLD               1442
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD               91
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define MATRIX22_STRASSEN_THRESHOLD         14
+#define HGCD_THRESHOLD                      95
+#define GCD_DC_THRESHOLD                   298
+#define GCDEXT_DC_THRESHOLD                283
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        30
+#define SET_STR_DC_THRESHOLD               306
+#define SET_STR_PRECOMPUTE_THRESHOLD      1628
diff --git a/mpn/x86_64/com.asm b/mpn/x86_64/com.asm

new file mode 100644 (file)

index 0000000..699da11
--- /dev/null
+++ b/mpn/x86_64/com.asm
@@ -0,0 +1,77 @@
+dnl  AMD64 mpn_com.
+
+dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb
+C K8,K9:       1.25
+C K10:         1.25
+C P4:          2.78
+C P6-15:       1.1
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_com)
+       movq    (up), %r8
+       movl    %edx, %eax
+       leaq    (up,n,8), up
+       leaq    (rp,n,8), rp
+       negq    n
+       andl    $3, %eax
+       je      L(b00)
+       cmpl    $2, %eax
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        notq    %r8
+       movq    %r8, (rp,n,8)
+       decq    n
+       jmp     L(e11)
+L(b10):        addq    $-2, n
+       jmp     L(e10)
+       .byte   0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):        notq    %r8
+       movq    %r8, (rp,n,8)
+       incq    n
+       jz      L(ret)
+
+L(oop):        movq    (up,n,8), %r8
+L(b00):        movq    8(up,n,8), %r9
+       notq    %r8
+       notq    %r9
+       movq    %r8, (rp,n,8)
+       movq    %r9, 8(rp,n,8)
+L(e11):        movq    16(up,n,8), %r8
+L(e10):        movq    24(up,n,8), %r9
+       notq    %r8
+       notq    %r9
+       movq    %r8, 16(rp,n,8)
+       movq    %r9, 24(rp,n,8)
+       addq    $4, n
+       jnc     L(oop)
+L(ret):        ret
+EPILOGUE()
diff --git a/mpn/x86_64/copyd.asm b/mpn/x86_64/copyd.asm

new file mode 100644 (file)

index 0000000..f5c451c
--- /dev/null
+++ b/mpn/x86_64/copyd.asm
@@ -0,0 +1,75 @@
+dnl  AMD64 mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb
+C K8,K9:       1
+C K10:         1
+C P4:          2.8
+C P6 core2:    1.2
+C P6 corei7:   1
+
+
+C INPUT PARAMETERS
+C rp   rdi
+C up   rsi
+C n    rdx
+
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_copyd)
+       leaq    -8(up,n,8), up
+       leaq    (rp,n,8), rp
+       subq    $4, n
+       jc      L(end)
+       ALIGN(16)
+L(oop):        movq    (up), %r8
+       movq    -8(up), %r9
+       leaq    -32(rp), rp
+       movq    -16(up), %r10
+       movq    -24(up), %r11
+       leaq    -32(up), up
+       movq    %r8, 24(rp)
+       movq    %r9, 16(rp)
+       subq    $4, n
+       movq    %r10, 8(rp)
+       movq    %r11, (rp)
+       jnc     L(oop)
+
+L(end):        shrl    %edx                    C edx = lowpart(n)
+       jnc     1f
+       movq    (up), %r8
+       movq    %r8, -8(rp)
+       leaq    -8(rp), rp
+       leaq    -8(up), up
+1:     shrl    %edx                    C edx = lowpart(n)
+       jnc     1f
+       movq    (up), %r8
+       movq    -8(up), %r9
+       movq    %r8, -8(rp)
+       movq    %r9, -16(rp)
+1:     ret
+EPILOGUE()
diff --git a/mpn/x86_64/copyi.asm b/mpn/x86_64/copyi.asm

new file mode 100644 (file)

index 0000000..506142b
--- /dev/null
+++ b/mpn/x86_64/copyi.asm
@@ -0,0 +1,73 @@
+dnl  AMD64 mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb
+C K8,K9:       1
+C K10:         1
+C P4:          2.8
+C P6-15:       1.2
+
+
+C INPUT PARAMETERS
+C rp   rdi
+C up   rsi
+C n    rdx
+
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_copyi)
+       leaq    -8(rp), rp
+       subq    $4, n
+       jc      L(end)
+       ALIGN(16)
+L(oop):        movq    (up), %r8
+       movq    8(up), %r9
+       leaq    32(rp), rp
+       movq    16(up), %r10
+       movq    24(up), %r11
+       leaq    32(up), up
+       movq    %r8, -24(rp)
+       movq    %r9, -16(rp)
+       subq    $4, n
+       movq    %r10, -8(rp)
+       movq    %r11, (rp)
+       jnc     L(oop)
+
+L(end):        shrl    %edx                    C edx = lowpart(n)
+       jnc     1f
+       movq    (up), %r8
+       movq    %r8, 8(rp)
+       leaq    8(rp), rp
+       leaq    8(up), up
+1:     shrl    %edx                    C edx = lowpart(n)
+       jnc     1f
+       movq    (up), %r8
+       movq    8(up), %r9
+       movq    %r8, 8(rp)
+       movq    %r9, 16(rp)
+1:     ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/aors_n.asm b/mpn/x86_64/core2/aors_n.asm

new file mode 100644 (file)

index 0000000..3dc04d0
--- /dev/null
+++ b/mpn/x86_64/core2/aors_n.asm
@@ -0,0 +1,124 @@
+dnl  Intel P6-15 mpn_add_n/mpn_sub_n -- mpn add or subtract.
+
+dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.25
+C K10:          2
+C P4:          10
+C P6 core2:     2.05
+C P6 corei7:    2.3
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(func_nc)
+       jmp     L(start)
+EPILOGUE()
+
+PROLOGUE(func)
+       xor     %r8, %r8
+L(start):
+       mov     (up), %r10
+       mov     (vp), %r11
+
+       lea     -8(up,n,8), up
+       lea     -8(vp,n,8), vp
+       lea     -16(rp,n,8), rp
+       mov     %ecx, %eax
+       neg     n
+       and     $3, %eax
+       je      L(b00)
+       add     %rax, n         C clear low rcx bits for jrcxz
+       cmp     $2, %eax
+       jl      L(b01)
+       je      L(b10)
+
+L(b11):        shr     %r8                     C set cy
+       jmp     L(e11)
+
+L(b00):        shr     %r8                     C set cy
+       mov     %r10, %r8
+       mov     %r11, %r9
+       lea     4(n), n
+       jmp     L(e00)
+
+L(b01):        shr     %r8                     C set cy
+       jmp     L(e01)
+
+L(b10):        shr     %r8                     C set cy
+       mov     %r10, %r8
+       mov     %r11, %r9
+       jmp     L(e10)
+
+L(end):        ADCSBB  %r11, %r10
+       mov     %r10, 8(rp)
+       mov     %ecx, %eax              C clear eax, ecx contains 0
+       adc     %eax, %eax
+       ret
+
+       ALIGN(16)
+L(top):
+       mov     -24(up,n,8), %r8
+       mov     -24(vp,n,8), %r9
+       ADCSBB  %r11, %r10
+       mov     %r10, -24(rp,n,8)
+L(e00):
+       mov     -16(up,n,8), %r10
+       mov     -16(vp,n,8), %r11
+       ADCSBB  %r9, %r8
+       mov     %r8, -16(rp,n,8)
+L(e11):
+       mov     -8(up,n,8), %r8
+       mov     -8(vp,n,8), %r9
+       ADCSBB  %r11, %r10
+       mov     %r10, -8(rp,n,8)
+L(e10):
+       mov     (up,n,8), %r10
+       mov     (vp,n,8), %r11
+       ADCSBB  %r9, %r8
+       mov     %r8, (rp,n,8)
+L(e01):
+       jrcxz   L(end)
+       lea     4(n), n
+       jmp     L(top)
+
+EPILOGUE()
diff --git a/mpn/x86_64/core2/aorslsh1_n.asm b/mpn/x86_64/core2/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..18db7c9
--- /dev/null
+++ b/mpn/x86_64/core2/aorslsh1_n.asm
@@ -0,0 +1,151 @@
+dnl  x86-64 mpn_addlsh1_n and mpn_sublsh1_n, optimized for "Core" 2.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        4.25
+C K10:          ?
+C P4:           ?
+C P6-15:        3
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_addlsh1_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(func,    mpn_addlsh1_n)')
+ifdef(`OPERATION_sublsh1_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(func,    mpn_sublsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+PROLOGUE(func)
+       push    %rbx
+       push    %r12
+
+       mov     R32(%rcx), R32(%rax)
+       lea     24(up,n,8), up
+       lea     24(vp,n,8), vp
+       lea     24(rp,n,8), rp
+       neg     n
+
+       xor     R32(%r11), R32(%r11)
+
+       mov     -24(vp,n,8), %r8        C do first limb early
+       shrd    $63, %r8, %r11
+
+       and     $3, R32(%rax)
+       je      L(b0)
+       cmp     $2, R32(%rax)
+       jc      L(b1)
+       je      L(b2)
+
+L(b3): mov     -16(vp,n,8), %r9
+       shrd    $63, %r9, %r8
+       mov     -8(vp,n,8), %r10
+       shrd    $63, %r10, %r9
+       mov     -24(up,n,8), %r12
+       ADDSUB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+       mov     -16(up,n,8), %r12
+       ADCSBB  %r8, %r12
+       mov     %r12, -16(rp,n,8)
+       mov     -8(up,n,8), %r12
+       ADCSBB  %r9, %r12
+       mov     %r12, -8(rp,n,8)
+       mov     %r10, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+       add     $3, n
+       js      L(top)
+       jmp     L(end)
+
+L(b1): mov     -24(up,n,8), %r12
+       ADDSUB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+       mov     %r8, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+       inc     n
+       js      L(top)
+       jmp     L(end)
+
+L(b2): mov     -16(vp,n,8), %r9
+       shrd    $63, %r9, %r8
+       mov     -24(up,n,8), %r12
+       ADDSUB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+       mov     -16(up,n,8), %r12
+       ADCSBB  %r8, %r12
+       mov     %r12, -16(rp,n,8)
+       mov     %r9, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+       add     $2, n
+       js      L(top)
+       jmp     L(end)
+
+       ALIGN(16)
+L(top):        mov     -24(vp,n,8), %r8
+       shrd    $63, %r8, %r11
+L(b0): mov     -16(vp,n,8), %r9
+       shrd    $63, %r9, %r8
+       mov     -8(vp,n,8), %r10
+       shrd    $63, %r10, %r9
+       mov     (vp,n,8), %rbx
+       shrd    $63, %rbx, %r10
+
+       add     R32(%rax), R32(%rax)    C restore cy
+
+       mov     -24(up,n,8), %r12
+       ADCSBB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+
+       mov     -16(up,n,8), %r12
+       ADCSBB  %r8, %r12
+       mov     %r12, -16(rp,n,8)
+
+       mov     -8(up,n,8), %r12
+       ADCSBB  %r9, %r12
+       mov     %r12, -8(rp,n,8)
+
+       mov     (up,n,8), %r12
+       ADCSBB  %r10, %r12
+       mov     %r12, (rp,n,8)
+
+       mov     %rbx, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+
+       add     $4, n
+       js      L(top)
+
+L(end):        add     %r11, %r11
+       pop     %r12
+       pop     %rbx
+       sbb     $0, R32(%rax)
+       neg     R32(%rax)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/aorsmul_1.asm b/mpn/x86_64/core2/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..8dcccd9
--- /dev/null
+++ b/mpn/x86_64/core2/aorsmul_1.asm
@@ -0,0 +1,129 @@
+dnl  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for "Core 2".
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        4
+C K10:          4
+C P4:           ?
+C P6 core2:     4.3-4.5 (fluctuating)
+C P6 corei7:    5
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`v0',   `%rcx')
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       push    %rbx
+       push    %rbp
+       lea     (%rdx), %rbx
+       neg     %rbx
+
+       mov     (up), %rax
+       mov     (rp), %r10
+
+       lea     -16(rp,%rdx,8), rp
+       lea     (up,%rdx,8), up
+       mul     %rcx
+
+       bt      $0, R32(%rbx)
+       jc      L(odd)
+
+       lea     (%rax), %r11
+       mov     8(up,%rbx,8), %rax
+       lea     (%rdx), %rbp
+       mul     %rcx
+       add     $2, %rbx
+       jns     L(n2)
+
+       lea     (%rax), %r8
+       mov     (up,%rbx,8), %rax
+       lea     (%rdx), %r9
+       jmp     L(mid)
+
+L(odd):        add     $1, %rbx
+       jns     L(n1)
+
+       lea     (%rax), %r8
+       mov     (up,%rbx,8), %rax
+       lea     (%rdx), %r9
+       mul     %rcx
+       lea     (%rax), %r11
+       mov     8(up,%rbx,8), %rax
+       lea     (%rdx), %rbp
+       jmp     L(e)
+
+       ALIGN(16)
+L(top):        mul     %rcx
+       ADDSUB  %r8, %r10
+       lea     (%rax), %r8
+       mov     (up,%rbx,8), %rax
+       adc     %r9, %r11
+       mov     %r10, -8(rp,%rbx,8)
+       mov     (rp,%rbx,8), %r10
+       lea     (%rdx), %r9
+       adc     $0, %rbp
+L(mid):        mul     %rcx
+       ADDSUB  %r11, %r10
+       lea     (%rax), %r11
+       mov     8(up,%rbx,8), %rax
+       adc     %rbp, %r8
+       mov     %r10, (rp,%rbx,8)
+       mov     8(rp,%rbx,8), %r10
+       lea     (%rdx), %rbp
+       adc     $0, %r9
+L(e):  add     $2, %rbx
+       js      L(top)
+
+       mul     %rcx
+       ADDSUB  %r8, %r10
+       adc     %r9, %r11
+       mov     %r10, -8(rp)
+       adc     $0, %rbp
+L(n2): mov     (rp), %r10
+       ADDSUB  %r11, %r10
+       adc     %rbp, %rax
+       mov     %r10, (rp)
+       adc     $0, %rdx
+L(n1): mov     8(rp), %r10
+       ADDSUB  %rax, %r10
+       mov     %r10, 8(rp)
+       mov     R32(%rbx), R32(%rax)    C zero rax
+       adc     %rdx, %rax
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/gmp-mparam.h b/mpn/x86_64/core2/gmp-mparam.h

new file mode 100644 (file)

index 0000000..3c78e29
--- /dev/null
+++ b/mpn/x86_64/core2/gmp-mparam.h
@@ -0,0 +1,186 @@
+/* Core 2 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 2133 MHz Core 2 (65nm) */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         5
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         8
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           26
+
+#define MUL_TOOM22_THRESHOLD                23
+#define MUL_TOOM33_THRESHOLD                65
+#define MUL_TOOM44_THRESHOLD               183
+#define MUL_TOOM6H_THRESHOLD               254
+#define MUL_TOOM8H_THRESHOLD               381
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      74
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 28
+#define SQR_TOOM3_THRESHOLD                 97
+#define SQR_TOOM4_THRESHOLD                148
+#define SQR_TOOM6_THRESHOLD                254
+#define SQR_TOOM8_THRESHOLD                296
+
+#define MULMOD_BNM1_THRESHOLD               12
+#define SQRMOD_BNM1_THRESHOLD               14
+
+#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    380, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {      9, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
+    {     19, 7}, {     10, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 6}, {     27, 7}, {     24, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     63,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     87,11}, \
+    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143,11}, {     79, 9}, {    319,11}, \
+    {     95,10}, {    207,11}, {    111,12}, {     63,11}, \
+    {    143,10}, {    287,11}, {    159,10}, {    319,11}, \
+    {    175,12}, {     95,11}, {    191,10}, {    383,11}, \
+    {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
+    {    287,10}, {    575,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703,11}, {    367,12}, \
+    {    191,11}, {    415,10}, {    831,12}, {    223,11}, \
+    {    447,10}, {    895,11}, {    479,13}, {    127,12}, \
+    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
+    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    415,11}, {    831,12}, {    447,11}, \
+    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
+    {    543,11}, {   1087,12}, {    607,13}, {    319,12}, \
+    {    735,13}, {    383,12}, {    831,13}, {    447,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1087,13}, \
+    {    575,12}, {   1215,13}, {    639,12}, {   1279,13}, \
+    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,14}, \
+    {    767,13}, {   1535,12}, {   3071,13}, {   1663,14}, \
+    {    895,13}, {   1791,12}, {   3583,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2303,12}, {   4607,13}, {   2431,12}, {   4863,14}, \
+    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
+    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,12}, \
+    {   6911,14}, {   1791,13}, {   3583,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 173
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             256  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    256, 5}, {      8, 4}, {     17, 5}, {      9, 4}, \
+    {     19, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
+    {     25, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
+    {     63,10}, {     39, 9}, {     79,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63, 8}, {    511,10}, {    135, 9}, \
+    {    271,10}, {    143,11}, {     79,10}, {    159, 9}, \
+    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207,11}, {    111,12}, {     63,11}, \
+    {    127,10}, {    271,11}, {    143,10}, {    287, 9}, \
+    {    575,10}, {    303,11}, {    159,10}, {    319, 9}, \
+    {    639,12}, {     95,11}, {    191,10}, {    383,11}, \
+    {    207,13}, {     63,12}, {    127,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    351,12}, {    191,11}, {    415,12}, {    223,11}, \
+    {    447,10}, {    895,11}, {    479,13}, {    127,12}, \
+    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
+    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
+    {    415,11}, {    831,12}, {    479,11}, {    959,14}, \
+    {    127,13}, {    255,12}, {    607,13}, {    319,12}, \
+    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
+    {    895,14}, {    255,13}, {    511,12}, {   1023,13}, \
+    {    575,12}, {   1215,13}, {    639,12}, {   1279,13}, \
+    {    703,14}, {    383,13}, {    767,12}, {   1535,13}, \
+    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
+    {    511,13}, {   1087,12}, {   2175,13}, {   1215,14}, \
+    {    639,13}, {   1343,12}, {   2687,13}, {   1407,12}, \
+    {   2815,14}, {    767,13}, {   1663,14}, {    895,13}, \
+    {   1791,15}, {    511,14}, {   1023,13}, {   2175,14}, \
+    {   1151,13}, {   2303,12}, {   4607,13}, {   2431,12}, \
+    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
+    {   2815,15}, {    767,14}, {   1535,13}, {   3071,14}, \
+    {   1663,13}, {   3327,12}, {   6655,13}, {   3455,12}, \
+    {   6911,14}, {   1791,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 166
+#define SQR_FFT_THRESHOLD                 3200
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  20
+#define MULLO_MUL_N_THRESHOLD             8648
+
+#define DC_DIV_QR_THRESHOLD                 46
+#define DC_DIVAPPR_Q_THRESHOLD             190
+#define DC_BDIV_QR_THRESHOLD                57
+#define DC_BDIV_Q_THRESHOLD                156
+
+#define INV_MULMOD_BNM1_THRESHOLD           50
+#define INV_NEWTON_THRESHOLD               172
+#define INV_APPR_THRESHOLD                 172
+
+#define BINV_NEWTON_THRESHOLD              240
+#define REDC_1_TO_REDC_2_THRESHOLD          10
+#define REDC_2_TO_REDC_N_THRESHOLD          63
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1334
+#define MUPI_DIV_QR_THRESHOLD               81
+#define MU_BDIV_QR_THRESHOLD              1037
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define MATRIX22_STRASSEN_THRESHOLD         18
+#define HGCD_THRESHOLD                     138
+#define GCD_DC_THRESHOLD                   465
+#define GCDEXT_DC_THRESHOLD                365
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                 9
+#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define SET_STR_DC_THRESHOLD               552
+#define SET_STR_PRECOMPUTE_THRESHOLD      1790
diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm

new file mode 100644 (file)

index 0000000..e3e4008
--- /dev/null
+++ b/mpn/x86_64/core2/lshift.asm
@@ -0,0 +1,128 @@
+dnl  x86-64 mpn_lshift optimized for "Core 2".
+
+dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        4.25
+C K10:          4.25
+C P4:          14.7
+C P6 core2:     1.27
+C P6 corei7:    1.5
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%cl')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_lshift)
+       lea     -8(rp,n,8), rp
+       lea     -8(up,n,8), up
+
+       mov     %edx, %eax
+       and     $3, %eax
+       jne     L(nb00)
+L(b00):        C n = 4, 8, 12, ...
+       mov     (up), %r10
+       mov     -8(up), %r11
+       xor     %eax, %eax
+       shld    %cl, %r10, %rax
+       mov     -16(up), %r8
+       lea     24(rp), rp
+       sub     $4, n
+       jmp     L(00)
+
+L(nb00):C n = 1, 5, 9, ...
+       cmp     $2, %eax
+       jae     L(nb01)
+L(b01):        mov     (up), %r9
+       xor     %eax, %eax
+       shld    %cl, %r9, %rax
+       sub     $2, n
+       jb      L(le1)
+       mov     -8(up), %r10
+       mov     -16(up), %r11
+       lea     -8(up), up
+       lea     16(rp), rp
+       jmp     L(01)
+L(le1):        shl     %cl, %r9
+       mov     %r9, (rp)
+       ret
+
+L(nb01):C n = 2, 6, 10, ...
+       jne     L(b11)
+L(b10):        mov     (up), %r8
+       mov     -8(up), %r9
+       xor     %eax, %eax
+       shld    %cl, %r8, %rax
+       sub     $3, n
+       jb      L(le2)
+       mov     -16(up), %r10
+       lea     -16(up), up
+       lea     8(rp), rp
+       jmp     L(10)
+L(le2):        shld    %cl, %r9, %r8
+       mov     %r8, (rp)
+       shl     %cl, %r9
+       mov     %r9, -8(rp)
+       ret
+
+       ALIGN(16)                       C performance critical!
+L(b11):        C n = 3, 7, 11, ...
+       mov     (up), %r11
+       mov     -8(up), %r8
+       xor     %eax, %eax
+       shld    %cl, %r11, %rax
+       mov     -16(up), %r9
+       lea     -24(up), up
+       sub     $4, n
+       jb      L(end)
+
+       ALIGN(16)
+L(top):        shld    %cl, %r8, %r11
+       mov     (up), %r10
+       mov     %r11, (rp)
+L(10): shld    %cl, %r9, %r8
+       mov     -8(up), %r11
+       mov     %r8, -8(rp)
+L(01): shld    %cl, %r10, %r9
+       mov     -16(up), %r8
+       mov     %r9, -16(rp)
+L(00): shld    %cl, %r11, %r10
+       mov     -24(up), %r9
+       mov     %r10, -24(rp)
+       add     $-32, up
+       lea     -32(rp), rp
+       sub     $4, n
+       jnc     L(top)
+
+L(end):        shld    %cl, %r8, %r11
+       mov     %r11, (rp)
+       shld    %cl, %r9, %r8
+       mov     %r8, -8(rp)
+       shl     %cl, %r9
+       mov     %r9, -16(rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/lshiftc.asm b/mpn/x86_64/core2/lshiftc.asm

new file mode 100644 (file)

index 0000000..bc014c8
--- /dev/null
+++ b/mpn/x86_64/core2/lshiftc.asm
@@ -0,0 +1,138 @@
+dnl  x86-64 mpn_lshiftc optimized for "Core 2".
+
+dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        ?
+C K10:          ?
+C P4:           ?
+C P6 core2:     1.5
+C P6 corei7:    1.75
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%cl')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+       lea     -8(rp,n,8), rp
+       lea     -8(up,n,8), up
+
+       mov     %edx, %eax
+       and     $3, %eax
+       jne     L(nb00)
+L(b00):        C n = 4, 8, 12, ...
+       mov     (up), %r10
+       mov     -8(up), %r11
+       xor     %eax, %eax
+       shld    %cl, %r10, %rax
+       mov     -16(up), %r8
+       lea     24(rp), rp
+       sub     $4, n
+       jmp     L(00)
+
+L(nb00):C n = 1, 5, 9, ...
+       cmp     $2, %eax
+       jae     L(nb01)
+L(b01):        mov     (up), %r9
+       xor     %eax, %eax
+       shld    %cl, %r9, %rax
+       sub     $2, n
+       jb      L(le1)
+       mov     -8(up), %r10
+       mov     -16(up), %r11
+       lea     -8(up), up
+       lea     16(rp), rp
+       jmp     L(01)
+L(le1):        shl     %cl, %r9
+       not     %r9
+       mov     %r9, (rp)
+       ret
+
+L(nb01):C n = 2, 6, 10, ...
+       jne     L(b11)
+L(b10):        mov     (up), %r8
+       mov     -8(up), %r9
+       xor     %eax, %eax
+       shld    %cl, %r8, %rax
+       sub     $3, n
+       jb      L(le2)
+       mov     -16(up), %r10
+       lea     -16(up), up
+       lea     8(rp), rp
+       jmp     L(10)
+L(le2):        shld    %cl, %r9, %r8
+       not     %r8
+       mov     %r8, (rp)
+       shl     %cl, %r9
+       not     %r9
+       mov     %r9, -8(rp)
+       ret
+
+       ALIGN(16)                       C performance critical!
+L(b11):        C n = 3, 7, 11, ...
+       mov     (up), %r11
+       mov     -8(up), %r8
+       xor     %eax, %eax
+       shld    %cl, %r11, %rax
+       mov     -16(up), %r9
+       lea     -24(up), up
+       sub     $4, n
+       jb      L(end)
+
+       ALIGN(16)
+L(top):        shld    %cl, %r8, %r11
+       mov     (up), %r10
+       not     %r11
+       mov     %r11, (rp)
+L(10): shld    %cl, %r9, %r8
+       mov     -8(up), %r11
+       not     %r8
+       mov     %r8, -8(rp)
+L(01): shld    %cl, %r10, %r9
+       mov     -16(up), %r8
+       not     %r9
+       mov     %r9, -16(rp)
+L(00): shld    %cl, %r11, %r10
+       mov     -24(up), %r9
+       not     %r10
+       mov     %r10, -24(rp)
+       add     $-32, up
+       lea     -32(rp), rp
+       sub     $4, n
+       jnc     L(top)
+
+L(end):        shld    %cl, %r8, %r11
+       not     %r11
+       mov     %r11, (rp)
+       shld    %cl, %r9, %r8
+       not     %r8
+       mov     %r8, -8(rp)
+       shl     %cl, %r9
+       not     %r9
+       mov     %r9, -16(rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/popcount.asm b/mpn/x86_64/core2/popcount.asm

new file mode 100644 (file)

index 0000000..97c451f
--- /dev/null
+++ b/mpn/x86_64/core2/popcount.asm
@@ -0,0 +1,24 @@
+dnl  x86-64 mpn_popcount optimized for "Core 2".
+
+dnl  Copyright 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm

new file mode 100644 (file)

index 0000000..485fd4b
--- /dev/null
+++ b/mpn/x86_64/core2/rshift.asm
@@ -0,0 +1,126 @@
+dnl  x86-64 mpn_rshift optimized for "Core 2".
+
+dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        4.25
+C K10:          4.25
+C P4:          14.7
+C P6 core2:     1.27
+C P6 corei7:    1.5
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%cl')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_rshift)
+       mov     %edx, %eax
+       and     $3, %eax
+       jne     L(nb00)
+L(b00):        C n = 4, 8, 12, ...
+       mov     (up), %r10
+       mov     8(up), %r11
+       xor     %eax, %eax
+       shrd    %cl, %r10, %rax
+       mov     16(up), %r8
+       lea     8(up), up
+       lea     -24(rp), rp
+       sub     $4, n
+       jmp     L(00)
+
+L(nb00):C n = 1, 5, 9, ...
+       cmp     $2, %eax
+       jae     L(nb01)
+L(b01):        mov     (up), %r9
+       xor     %eax, %eax
+       shrd    %cl, %r9, %rax
+       sub     $2, n
+       jb      L(le1)
+       mov     8(up), %r10
+       mov     16(up), %r11
+       lea     16(up), up
+       lea     -16(rp), rp
+       jmp     L(01)
+L(le1):        shr     %cl, %r9
+       mov     %r9, (rp)
+       ret
+
+L(nb01):C n = 2, 6, 10, ...
+       jne     L(b11)
+L(b10):        mov     (up), %r8
+       mov     8(up), %r9
+       xor     %eax, %eax
+       shrd    %cl, %r8, %rax
+       sub     $3, n
+       jb      L(le2)
+       mov     16(up), %r10
+       lea     24(up), up
+       lea     -8(rp), rp
+       jmp     L(10)
+L(le2):        shrd    %cl, %r9, %r8
+       mov     %r8, (rp)
+       shr     %cl, %r9
+       mov     %r9, 8(rp)
+       ret
+
+       ALIGN(16)
+L(b11):        C n = 3, 7, 11, ...
+       mov     (up), %r11
+       mov     8(up), %r8
+       xor     %eax, %eax
+       shrd    %cl, %r11, %rax
+       mov     16(up), %r9
+       lea     32(up), up
+       sub     $4, n
+       jb      L(end)
+
+       ALIGN(16)
+L(top):        shrd    %cl, %r8, %r11
+       mov     -8(up), %r10
+       mov     %r11, (rp)
+L(10): shrd    %cl, %r9, %r8
+       mov     (up), %r11
+       mov     %r8, 8(rp)
+L(01): shrd    %cl, %r10, %r9
+       mov     8(up), %r8
+       mov     %r9, 16(rp)
+L(00): shrd    %cl, %r11, %r10
+       mov     16(up), %r9
+       mov     %r10, 24(rp)
+       add     $32, up
+       lea     32(rp), rp
+       sub     $4, n
+       jnc     L(top)
+
+L(end):        shrd    %cl, %r8, %r11
+       mov     %r11, (rp)
+       shrd    %cl, %r9, %r8
+       mov     %r8, 8(rp)
+       shr     %cl, %r9
+       mov     %r9, 16(rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/coreinhm/gmp-mparam.h b/mpn/x86_64/coreinhm/gmp-mparam.h

new file mode 100644 (file)

index 0000000..69c1c31
--- /dev/null
+++ b/mpn/x86_64/coreinhm/gmp-mparam.h
@@ -0,0 +1,138 @@
+/* Nehalem gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 2667 MHz Core i7 Nehalem */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     19
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           18
+
+#define MUL_TOOM22_THRESHOLD                18
+#define MUL_TOOM33_THRESHOLD                65
+#define MUL_TOOM44_THRESHOLD               166
+#define MUL_TOOM6H_THRESHOLD               254
+#define MUL_TOOM8H_THRESHOLD               333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      96
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 28
+#define SQR_TOOM3_THRESHOLD                105
+#define SQR_TOOM4_THRESHOLD                250
+#define SQR_TOOM6_THRESHOLD                366
+#define SQR_TOOM8_THRESHOLD                478
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               13
+
+#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    159, 9}, {    319, 8}, {    639,10}, {    167,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 74
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     43,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    135,11}, {     79,10}, {    159, 9}, \
+    {    319,11}, {     95,10}, {    191, 9}, {    383, 8}, \
+    {    767,12}, {     63,10}, {    255,11}, {    143, 9}, \
+    {    575, 8}, {   1151,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    175,12}, {     95,11}, {    191,10}, \
+    {    383,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 76
+#define SQR_FFT_THRESHOLD                 3200
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  21
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 38
+#define DC_DIVAPPR_Q_THRESHOLD             133
+#define DC_BDIV_QR_THRESHOLD                32
+#define DC_BDIV_Q_THRESHOLD                 70
+
+#define INV_MULMOD_BNM1_THRESHOLD           46
+#define INV_NEWTON_THRESHOLD               195
+#define INV_APPR_THRESHOLD                 147
+
+#define BINV_NEWTON_THRESHOLD              230
+#define REDC_1_TO_REDC_2_THRESHOLD          12
+#define REDC_2_TO_REDC_N_THRESHOLD          59
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1360
+#define MUPI_DIV_QR_THRESHOLD               74
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1308
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     125
+#define GCD_DC_THRESHOLD                   330
+#define GCDEXT_DC_THRESHOLD                382
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD               230
+#define SET_STR_PRECOMPUTE_THRESHOLD      1660
diff --git a/mpn/x86_64/coreisbr/gmp-mparam.h b/mpn/x86_64/coreisbr/gmp-mparam.h

new file mode 100644 (file)

index 0000000..f43388d
--- /dev/null
+++ b/mpn/x86_64/coreisbr/gmp-mparam.h
@@ -0,0 +1,191 @@
+/* Sandy Bridge gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 3300 MHz Core i5 Sandy Bridge */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           34
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                57
+#define MUL_TOOM44_THRESHOLD               166
+#define MUL_TOOM6H_THRESHOLD               387
+#define MUL_TOOM8H_THRESHOLD               527
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     105
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     113
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     114
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                 93
+#define SQR_TOOM4_THRESHOLD                278
+#define SQR_TOOM6_THRESHOLD                369
+#define SQR_TOOM8_THRESHOLD                557
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    376, 5}, {     17, 6}, {      9, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
+    {    135,11}, {     79,10}, {    159,11}, {     95,10}, \
+    {    191, 8}, {    767,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287,11}, \
+    {    159, 9}, {    639,12}, {     95,11}, {    191,13}, \
+    {     63,12}, {    127,10}, {    511,11}, {    271,10}, \
+    {    543, 9}, {   1087,10}, {    607,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703, 9}, {   1407,10}, {    735,12}, \
+    {    191,11}, {    415,10}, {    831,12}, {    223,11}, \
+    {    447,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    607,12}, {    319,11}, {    639,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
+    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
+    {    543,11}, {   1087,12}, {    607,13}, {    319,12}, \
+    {    735,13}, {    383,12}, {    831,11}, {   1663,13}, \
+    {    447,12}, {    959,11}, {   1919,13}, {    511,12}, \
+    {   1087,11}, {   2175,13}, {    575,12}, {   1215,11}, \
+    {   2431,13}, {    639,12}, {   1279,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1535,13}, \
+    {    831,12}, {   1727,13}, {    959,12}, {   1919,14}, \
+    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
+    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
+    {   1471,12}, {   2943,14}, {    767,13}, {   1663,14}, \
+    {    895,13}, {   1919,15}, {    511,14}, {   1023,13}, \
+    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
+    {   1279,13}, {   2687,14}, {   1407,13}, {   2943,15}, \
+    {    767,14}, {   1535,13}, {   3199,14}, {   1663,13}, \
+    {   3455,14}, {   1919,16}, {    511,15}, {   1023,14}, \
+    {   2431,13}, {   4863,15}, {   1279,14}, {   2943,13}, \
+    {   5887,15}, {   1535,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 184
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             336  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    336, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
+    {     25, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     63,10}, {     39, 9}, {     79,10}, {     47,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    135,11}, {     79, 8}, {    639,11}, {     95,10}, \
+    {    191, 9}, {    383,12}, {     63, 9}, {    511,10}, \
+    {    271,11}, {    143,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
+    {    383,11}, {    207,10}, {    415,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    575,11}, {    303,10}, \
+    {    639,11}, {    351,10}, {    703,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    415,10}, {    831,12}, \
+    {    223,11}, {    447,10}, {    959,13}, {    127,11}, \
+    {    511,10}, {   1023,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,11}, {    895,12}, {    479,11}, \
+    {    959,14}, {    127,13}, {    255,12}, {    543,11}, \
+    {   1087,12}, {    575,11}, {   1151,12}, {    607,13}, \
+    {    319,12}, {    671,11}, {   1343,12}, {    703,13}, \
+    {    383,12}, {    831,13}, {    447,12}, {    959,11}, \
+    {   1919,13}, {    511,12}, {   1023,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1343,13}, {    703,14}, \
+    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
+    {   1663,13}, {    959,12}, {   1919,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,13}, \
+    {   1471,14}, {    767,13}, {   1599,12}, {   3199,13}, \
+    {   1663,14}, {    895,13}, {   1919,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
+    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
+    {   2815,15}, {    767,14}, {   1535,13}, {   3199,14}, \
+    {   1663,13}, {   3455,14}, {   1919,16}, {    511,15}, \
+    {   1023,14}, {   2431,13}, {   4863,15}, {   1279,14}, \
+    {   2943,13}, {   5887,15}, {   1535,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 177
+#define SQR_FFT_THRESHOLD                 3264
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 39
+#define DC_DIVAPPR_Q_THRESHOLD             119
+#define DC_BDIV_QR_THRESHOLD                31
+#define DC_BDIV_Q_THRESHOLD                 78
+
+#define INV_MULMOD_BNM1_THRESHOLD           46
+#define INV_NEWTON_THRESHOLD               139
+#define INV_APPR_THRESHOLD                 131
+
+#define BINV_NEWTON_THRESHOLD              198
+#define REDC_1_TO_REDC_2_THRESHOLD          23
+#define REDC_2_TO_REDC_N_THRESHOLD          59
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD               66
+#define MU_BDIV_QR_THRESHOLD              1017
+#define MU_BDIV_Q_THRESHOLD               1442
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     125 /* hardwired, tuneup crashes */
+#define GCD_DC_THRESHOLD                   396
+#define GCDEXT_DC_THRESHOLD                368
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        21
+#define SET_STR_DC_THRESHOLD               650
+#define SET_STR_PRECOMPUTE_THRESHOLD      1585
diff --git a/mpn/x86_64/darwin.m4 b/mpn/x86_64/darwin.m4

new file mode 100644 (file)

index 0000000..247b7a6
--- /dev/null
+++ b/mpn/x86_64/darwin.m4
@@ -0,0 +1,34 @@
+divert(-1)
+dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`DARWIN')
+
+define(`LEA',`
+       lea     $1(%rip), $2
+')
+
+dnl  Usage: CALL(funcname)
+dnl
+dnl  Simply override the definition in x86_64-defs.m4.
+
+define(`CALL',`call    GSYM_PREFIX`'$1')
+
+
+define(`JUMPTABSECT', `DATA')
+
+divert`'dnl
diff --git a/mpn/x86_64/dive_1.asm b/mpn/x86_64/dive_1.asm

new file mode 100644 (file)

index 0000000..f3b6ac8
--- /dev/null
+++ b/mpn/x86_64/dive_1.asm
@@ -0,0 +1,144 @@
+dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:       10
+C K10:         10
+C P4:          33
+C P6 core2:    13.25
+C P6 corei7:   14
+C P6 atom:     42
+
+C A quick adoption of the 32-bit K7 code.
+
+
+C INPUT PARAMETERS
+C rp           rdi
+C up           rsi
+C n            rdx
+C divisor      rcx
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+       push    %rbx
+
+       mov     %rcx, %rax
+       xor     R32(%rcx), R32(%rcx)    C shift count
+       mov     %rdx, %r8
+
+       bt      $0, R32(%rax)
+       jnc     L(evn)                  C skip bsfq unless divisor is even
+
+L(odd):        mov     %rax, %rbx
+       shr     R32(%rax)
+       and     $127, R32(%rax)         C d/2, 7 bits
+
+ifdef(`PIC',`
+       mov     binvert_limb_table@GOTPCREL(%rip), %rdx
+',`
+       movabs  $binvert_limb_table, %rdx
+')
+
+       movzbl  (%rdx,%rax), R32(%rax)  C inv 8 bits
+
+       mov     %rbx, %r11              C d without twos
+
+       lea     (%rax,%rax), R32(%rdx)  C 2*inv
+       imul    R32(%rax), R32(%rax)    C inv*inv
+       imul    R32(%rbx), R32(%rax)    C inv*inv*d
+       sub     R32(%rax), R32(%rdx)    C inv = 2*inv - inv*inv*d, 16 bits
+
+       lea     (%rdx,%rdx), R32(%rax)  C 2*inv
+       imul    R32(%rdx), R32(%rdx)    C inv*inv
+       imul    R32(%rbx), R32(%rdx)    C inv*inv*d
+       sub     R32(%rdx), R32(%rax)    C inv = 2*inv - inv*inv*d, 32 bits
+
+       lea     (%rax,%rax), %r10       C 2*inv
+       imul    %rax, %rax              C inv*inv
+       imul    %rbx, %rax              C inv*inv*d
+       sub     %rax, %r10              C inv = 2*inv - inv*inv*d, 64 bits
+
+       lea     (%rsi,%r8,8), %rsi      C up end
+       lea     -8(%rdi,%r8,8), %rdi    C rp end
+       neg     %r8                     C -n
+
+       mov     (%rsi,%r8,8), %rax      C up[0]
+
+       inc     %r8
+       jz      L(one)
+
+       mov     (%rsi,%r8,8), %rdx      C up[1]
+
+       shrd    R8(%rcx), %rdx, %rax
+
+       xor     R32(%rbx), R32(%rbx)
+       jmp     L(ent)
+
+L(evn):        bsf     %rax, %rcx
+       shr     R8(%rcx), %rax
+       jmp     L(odd)
+
+       ALIGN(8)
+L(top):
+       C rax   q
+       C rbx   carry bit, 0 or 1
+       C rcx   shift
+       C rdx
+       C rsi   up end
+       C rdi   rp end
+       C r8    counter, limbs, negative
+       C r10   d^(-1) mod 2^64
+       C r11   d, shifted down
+
+       mul     %r11                    C carry limb in rdx     0 10
+       mov     -8(%rsi,%r8,8), %rax    C
+       mov     (%rsi,%r8,8), %r9       C
+       shrd    R8(%rcx), %r9, %rax     C
+       nop                             C
+       sub     %rbx, %rax              C apply carry bit
+       setc    %bl                     C
+       sub     %rdx, %rax              C apply carry limb      5
+       adc     $0, %rbx                C                       6
+L(ent):        imul    %r10, %rax              C                       6
+       mov     %rax, (%rdi,%r8,8)      C
+       inc     %r8                     C
+       jnz     L(top)
+
+       mul     %r11                    C carry limb in rdx
+       mov     -8(%rsi), %rax          C up high limb
+       shr     R8(%rcx), %rax
+       sub     %rbx, %rax              C apply carry bit
+       sub     %rdx, %rax              C apply carry limb
+       imul    %r10, %rax
+       mov     %rax, (%rdi)
+       pop     %rbx
+       ret
+
+L(one):        shr     R8(%rcx), %rax
+       imul    %r10, %rax
+       mov     %rax, (%rdi)
+       pop     %rbx
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm

new file mode 100644 (file)

index 0000000..da0a211
--- /dev/null
+++ b/mpn/x86_64/divrem_1.asm
@@ -0,0 +1,280 @@
+dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              norm    unorm   frac
+C K8           13      13      12
+C P4           44.2    44.2    42.3
+C P6 core2     25      24.5    19.3
+C P6 corei7    21.5    20.7    18
+C P6 atom      42      52      37
+
+C TODO
+C  * Compute the inverse without relying on the div instruction.
+C    Newton's method and mulq, or perhaps the faster fdiv.
+C  * Tune prologue.
+C  * Optimize for Core 2.
+
+C The code for unnormalized divisors works also for normalized divisors, but
+C for some reason it runs really slowly (on K8) for that case.  Use special
+C code until we can address this.  The Intel Atom is also affected, but
+C understandably (shld slowness).
+define(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',1)
+
+C mp_limb_t
+C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
+
+C mp_limb_t
+C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
+C                      mp_limb_t dinv, int cnt)
+
+C INPUT PARAMETERS
+define(`qp',           `%rdi')
+define(`fn_param',     `%rsi')
+define(`up_param',     `%rdx')
+define(`un_param',     `%rcx')
+define(`d',            `%r8')
+define(`dinv',         `%r9')          C only for mpn_preinv_divrem_1
+C       shift passed on stack          C only for mpn_preinv_divrem_1
+
+define(`cnt',          `%rcx')
+define(`up',           `%rsi')
+define(`fn',           `%r12')
+define(`un',           `%rbx')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
+C         cnt         qp      d  dinv
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_preinv_divrem_1)
+       xor     %eax, %eax
+       push    %r13
+       push    %r12
+       push    %rbp
+       push    %rbx
+
+       mov     fn_param, fn
+       mov     un_param, un
+       add     fn_param, un_param
+       mov     up_param, up
+
+       lea     -8(qp,un_param,8), qp
+
+       test    d, d
+       js      L(nent)
+       mov     40(%rsp), R8(cnt)
+       shl     R8(cnt), d
+       jmp     L(uent)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+       xor     %eax, %eax
+       push    %r13
+       push    %r12
+       push    %rbp
+       push    %rbx
+
+       mov     fn_param, fn
+       mov     un_param, un
+       add     fn_param, un_param
+       mov     up_param, up
+       je      L(ret)
+
+       lea     -8(qp,un_param,8), qp
+       xor     R32(%rbp), R32(%rbp)
+
+
+ifdef(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',`
+       test    d, d
+       jns     L(unnormalized)
+
+L(normalized):
+       test    un, un
+       je      L(8)                    C un == 0
+       mov     -8(up,un,8), %rbp
+       dec     un
+       mov     %rbp, %rax
+       sub     d, %rbp
+       cmovb   %rax, %rbp
+       sbb     %eax, %eax
+       inc     %eax
+       mov     %rax, (qp)
+       lea     -8(qp), qp
+L(8):
+       mov     d, %rdx
+       mov     $-1, %rax
+       not     %rdx
+       div     d                       C FREE rax rdx rcx r9 r10 r11
+       mov     %rax, dinv
+       mov     %rbp, %rax
+       jmp     L(nent)
+
+       ALIGN(16)
+L(nloop):                              C                   cycK8  cycP6  cycP4
+       mov     (up,un,8), %r10         C
+       lea     1(%rax), %rbp           C
+       mul     dinv                    C                    0,13   0,19  0,45
+       add     %r10, %rax              C                    4      8     12
+       adc     %rbp, %rdx              C                    5      9     13
+       mov     %rax, %rbp              C                    5      9     13
+       mov     %rdx, %r13              C                    6      11    23
+       imul    d, %rdx                 C                    6      11    23
+       sub     %rdx, %r10              C                    10     16    33
+       mov     d, %rax                 C
+       add     %r10, %rax              C                    11     17    34
+       cmp     %rbp, %r10              C                    11     17    34
+       cmovb   %r10, %rax              C                    12     18    35
+       adc     $-1, %r13               C
+       cmp     d, %rax                 C
+       jae     L(nfx)                  C
+L(nok):        mov     %r13, (qp)              C
+       sub     $8, qp                  C
+L(nent):dec    un                      C
+       jns     L(nloop)                C
+
+       xor     %ecx, %ecx
+       jmp     L(87)
+
+L(nfx):        sub     d, %rax
+       inc     %r13
+       jmp     L(nok)
+')
+
+L(unnormalized):
+       test    un, un
+       je      L(44)
+       mov     -8(up,un,8), %rax
+       cmp     d, %rax
+       jae     L(44)
+       mov     %rbp, (qp)
+       mov     %rax, %rbp
+       lea     -8(qp), qp
+       je      L(ret)
+       dec     un
+L(44):
+       bsr     d, %rcx
+       not     %ecx
+       sal     %cl, d
+       sal     %cl, %rbp
+       mov     d, %rdx
+       mov     $-1, %rax
+       not     %rdx
+       div     d                       C FREE rax rdx r9 r10 r11
+       test    un, un
+       mov     %rax, dinv
+       mov     %rbp, %rax
+       je      L(87)
+L(uent):
+       mov     -8(up,un,8), %rbp
+       shr     %cl, %rax
+       shld    %cl, %rbp, %rax
+       sub     $2, un
+       js      L(ulast)
+
+       ALIGN(16)
+L(uloop):
+       nop
+       mov     (up,un,8), %r10
+       lea     1(%rax), %r11
+       shld    %cl, %r10, %rbp
+       mul     dinv
+       add     %rbp, %rax
+       adc     %r11, %rdx
+       mov     %rax, %r11
+       mov     %rdx, %r13
+       imul    d, %rdx
+       sub     %rdx, %rbp
+       mov     d, %rax
+       add     %rbp, %rax
+       cmp     %r11, %rbp
+       cmovb   %rbp, %rax
+       adc     $-1, %r13
+       cmp     d, %rax
+       jae     L(ufx)
+L(uok):        mov     %r13, (qp)
+       sub     $8, qp
+       dec     un
+       mov     %r10, %rbp
+       jns     L(uloop)
+L(ulast):
+       lea     1(%rax), %r11
+       sal     %cl, %rbp
+       mul     dinv
+       add     %rbp, %rax
+       adc     %r11, %rdx
+       mov     %rax, %r11
+       mov     %rdx, %r13
+       imul    d, %rdx
+       sub     %rdx, %rbp
+       mov     d, %rax
+       add     %rbp, %rax
+       cmp     %r11, %rbp
+       cmovb   %rbp, %rax
+       adc     $-1, %r13
+       cmp     d, %rax
+       jae     L(93)
+L(69): mov     %r13, (qp)
+       sub     $8, qp
+       jmp     L(87)
+
+L(ufx):        sub     d, %rax
+       inc     %r13
+       jmp     L(uok)
+
+L(93): sub     d, %rax
+       inc     %r13
+       jmp     L(69)
+
+L(87): mov     d, %rbp
+       neg     %rbp
+       jmp     L(87b)
+
+       ALIGN(16)
+L(floop):                              C                   cycK8  cycP6  cycP4
+       lea     1(%rax), %r11           C
+       mul     dinv                    C                    0,12
+       add     %r11, %rdx              C                    5
+       mov     %rax, %r11              C                    4
+       mov     %rdx, %r13              C                    6
+       imul    %rbp, %rdx              C                    6
+       mov     d, %rax                 C
+       add     %rdx, %rax              C                    10
+       cmp     %r11, %rdx              C                    10
+       cmovb   %rdx, %rax              C                    11
+       adc     $-1, %r13               C
+       mov     %r13, (qp)              C
+       sub     $8, qp                  C
+L(87b):        dec     fn                      C
+       jns     L(floop)                C
+
+       shr     %cl, %rax
+L(ret):        pop     %rbx
+       pop     %rbp
+       pop     %r12
+       pop     %r13
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/divrem_2.asm b/mpn/x86_64/divrem_2.asm

new file mode 100644 (file)

index 0000000..2b3a34c
--- /dev/null
+++ b/mpn/x86_64/divrem_2.asm
@@ -0,0 +1,240 @@
+dnl  x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl  Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              norm    frac
+C K8           20      20
+C P4           73      73
+C P6 core2     37      37
+C P6 corei7    33      33
+
+C TODO
+C  * Perhaps compute the inverse without relying on divq?  Could either use
+C    Newton's method and mulq, or perhaps the faster fdiv.
+C  * The loop has not been carefully tuned, nor analysed for critical path
+C    length.  It seems that 20 c/l is a bit long, compared to the 13 c/l for
+C    mpn_divrem_1.
+C  * Clean up.  This code is really crude.
+
+
+C INPUT PARAMETERS
+define(`qp',           `%rdi')
+define(`fn',           `%rsi')
+define(`up_param',     `%rdx')
+define(`un_param',     `%rcx')
+define(`dp',           `%r8')
+
+define(`dinv',         `%r9')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
+C         cnt         qp      d  dinv
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_divrem_2)
+
+       push    %r15
+       lea     (%rdx,%rcx,8), %rax
+       push    %r14
+       push    %r13
+       mov     %rsi, %r13
+       push    %r12
+       lea     -24(%rax), %r12
+       push    %rbp
+       mov     %rdi, %rbp
+       push    %rbx
+       mov     8(%r8), %r11
+       mov     -8(%rax), %r9
+       mov     (%r8), %r8
+       mov     -16(%rax), %r10
+       xor     R32(%r15), R32(%r15)
+       cmp     %r9, %r11
+       ja      L(2)
+       setb    %dl
+       cmp     %r10, %r8
+       setbe   %al
+       orb     %al, %dl
+       jne     L(23)
+L(2):
+       lea     -3(%rcx,%r13), %rbx     C un + fn - 3
+       test    %rbx, %rbx
+       js      L(6)
+       mov     %r11, %rdx
+       mov     $-1, %rax
+       not     %rdx
+       div     %r11
+       mov     %r11, %rdx
+       mov     %rax, %rdi
+       imul    %rax, %rdx
+       mov     %rdx, %r14
+       mul     %r8
+       mov     %rdx, %rcx
+       mov     $-1, %rdx
+       add     %r8, %r14
+       adc     $0, %rdx
+       add     %rcx, %r14
+       adc     $0, %rdx
+       js      L(8)
+L(18):
+       dec     %rdi
+       sub     %r11, %r14
+       sbb     $0, %rdx
+       jns     L(18)
+L(8):
+
+C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+C n2      un      n1 dinv qp  d0        d1  up  fn      msl
+C     n2  un     -d1      n1    dinv XX              XX
+
+ifdef(`NEW',`
+       lea     (%rbp,%rbx,8), %rbp
+       mov     %rbx, %rcx              C un
+       mov     %r9, %rbx
+       mov     %rdi, %r9               C di
+       mov     %r10, %r14
+       mov     %r11, %rsi
+       neg     %rsi                    C -d1
+       ALIGN(16)
+L(loop):
+       mov     %r9, %rax               C di            ncp
+       mul     %rbx                    C               0, 18
+       add     %r14, %rax              C               4
+       mov     %rax, %r10              C q0            5
+       adc     %rbx, %rdx              C               5
+       mov     %rdx, %rdi              C q             6
+       imul    %rsi, %rdx              C               6
+       mov     %r8, %rax               C               ncp
+       lea     (%rdx, %r14), %rbx      C n1 -= ...     7
+       mul     %rdi                    C               7
+       xor     R32(%r14), R32(%r14)    C
+       cmp     %rcx, %r13              C
+       jg      L(19)                   C
+       mov     (%r12), %r14            C
+       sub     $8, %r12                C
+L(19): sub     %r8, %r14               C               ncp
+       sbb     %r11, %rbx              C               9
+       sub     %rax, %r14              C               11
+       sbb     %rdx, %rbx              C               12
+       inc     %rdi                    C               7
+       xor     R32(%rdx), R32(%rdx)    C
+       cmp     %r10, %rbx              C               13
+       mov     %r8, %rax               C d0            ncp
+       adc     $-1, %rdx               C mask          14
+       add     %rdx, %rdi              C q--           15
+       and     %rdx, %rax              C d0 or 0       15
+       and     %r11, %rdx              C d1 or 0       15
+       add     %rax, %r14              C               16
+       adc     %rdx, %rbx              C               16
+       cmp     %r11, %rbx              C               17
+       jae     L(fix)                  C
+L(bck):        mov     %rdi, (%rbp)            C
+       sub     $8, %rbp                C
+       dec     %rcx
+       jns     L(loop)
+
+       mov     %r14, %r10
+       mov     %rbx, %r9
+',`
+       lea     (%rbp,%rbx,8), %rbp
+       mov     %rbx, %rcx
+       mov     %r9, %rax
+       mov     %r10, %rsi
+       ALIGN(16)
+L(loop):
+       mov     %rax, %r14              C               0, 19
+       mul     %rdi                    C               0
+       mov     %r11, %r9               C               1
+       add     %rsi, %rax              C               4
+       mov     %rax, %rbx              C q0            5
+       adc     %r14, %rdx              C q             5
+       lea     1(%rdx), %r10           C               6
+       mov     %rdx, %rax              C               6
+       imul    %rdx, %r9               C               6
+       sub     %r9, %rsi               C               10
+       xor     R32(%r9), R32(%r9)      C
+       mul     %r8                     C               7
+       cmp     %rcx, %r13              C
+       jg      L(13)                   C
+       mov     (%r12), %r9             C
+       sub     $8, %r12                C
+L(13): sub     %r8, %r9                C               ncp
+       sbb     %r11, %rsi              C               11
+       sub     %rax, %r9               C               11
+       sbb     %rdx, %rsi              C               12
+       cmp     %rbx, %rsi              C               13
+       sbb     %rax, %rax              C               14
+       not     %rax                    C               15
+       add     %rax, %r10              C               16
+       mov     %r8, %rbx               C               ncp
+       and     %rax, %rbx              C               16
+       and     %r11, %rax              C               16
+       add     %rbx, %r9               C               17
+       adc     %rsi, %rax              C               18
+       cmp     %rax, %r11              C               19
+       jbe     L(fix)                  C
+L(bck):        mov     %r10, (%rbp)            C
+       sub     $8, %rbp                C
+       mov     %r9, %rsi               C               18
+       dec     %rcx
+       jns     L(loop)
+
+       mov     %rsi, %r10
+       mov     %rax, %r9
+')
+L(6):
+       mov     %r10, 8(%r12)
+       mov     %r9, 16(%r12)
+       pop     %rbx
+       pop     %rbp
+       pop     %r12
+       pop     %r13
+       pop     %r14
+       mov     %r15, %rax
+       pop     %r15
+       ret
+
+L(23): inc     R32(%r15)
+       sub     %r8, %r10
+       sbb     %r11, %r9
+       jmp     L(2)
+
+ifdef(`NEW',`
+L(fix):        seta    %dl
+       cmp     %r8, %r14
+       setae   %al
+       orb     %dl, %al
+       je      L(bck)
+       inc     %rdi
+       sub     %r8, %r14
+       sbb     %r11, %rbx
+       jmp     L(bck)
+',`
+L(fix):        jb      L(88)
+       cmp     %r8, %r9
+       jb      L(bck)
+L(88): inc     %r10
+       sub     %r8, %r9
+       sbb     %r11, %rax
+       jmp     L(bck)
+')
+EPILOGUE()
diff --git a/mpn/x86_64/fat/diveby3.c b/mpn/x86_64/fat/diveby3.c

new file mode 100644 (file)

index 0000000..7ea0161
--- /dev/null
+++ b/mpn/x86_64/fat/diveby3.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_divexact_by3c.
+
+Copyright 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/diveby3.c"
diff --git a/mpn/x86_64/fat/fat.c b/mpn/x86_64/fat/fat.c

new file mode 100644 (file)

index 0000000..ec0f353
--- /dev/null
+++ b/mpn/x86_64/fat/fat.c
@@ -0,0 +1,319 @@
+/* x86 fat binary initializers.
+
+   Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
+   Torbjorn Granlund (port to x86_64)
+
+   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
+   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
+   COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 2003, 2004, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>    /* for printf */
+#include <stdlib.h>   /* for getenv */
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+/* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */
+#define WANT_FAKE_CPUID  0
+
+
+/* fat_entry.asm */
+long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
+
+
+#if WANT_FAKE_CPUID
+/* The "name"s in the table are values for the GMP_CPU_TYPE environment
+   variable.  Anything can be used, but for now it's the canonical cpu types
+   as per config.guess/config.sub.  */
+
+#define __gmpn_cpuid            fake_cpuid
+#define __gmpn_cpuid_available  fake_cpuid_available
+
+#define MAKE_FMS(family, model)                                                \
+  ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)                        \
+   + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
+
+static struct {
+  const char  *name;
+  const char  vendor[13];
+  unsigned    fms;
+} fake_cpuid_table[] = {
+  { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
+  { "coreinhm",   "GenuineIntel", MAKE_FMS (6, 0x1a) },
+  { "coreiwsm",   "GenuineIntel", MAKE_FMS (6, 0x25) },
+  { "coreisbr",   "GenuineIntel", MAKE_FMS (6, 0x2a) },
+  { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
+  { "pentium4",   "GenuineIntel", MAKE_FMS (15, 3) },
+
+  { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
+  { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
+  { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
+
+  { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
+};
+
+static int
+fake_cpuid_lookup (void)
+{
+  char  *s;
+  int   i;
+
+  s = getenv ("GMP_CPU_TYPE");
+  if (s == NULL)
+    {
+      printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
+      abort ();
+    }
+
+  for (i = 0; i < numberof (fake_cpuid_table); i++)
+    if (strcmp (s, fake_cpuid_table[i].name) == 0)
+      return i;
+
+  printf ("GMP_CPU_TYPE=%s unknown\n", s);
+  abort ();
+}
+
+static int
+fake_cpuid_available (void)
+{
+  return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
+}
+
+static long
+fake_cpuid (char dst[12], int id)
+{
+  int  i = fake_cpuid_lookup();
+
+  switch (id) {
+  case 0:
+    memcpy (dst, fake_cpuid_table[i].vendor, 12);
+    return 0;
+  case 1:
+    return fake_cpuid_table[i].fms;
+  default:
+    printf ("fake_cpuid(): oops, unknown id %d\n", id);
+    abort ();
+  }
+}
+#endif
+
+
+typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
+typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
+
+struct cpuvec_t __gmpn_cpuvec = {
+  __MPN(add_n_init),
+  __MPN(addmul_1_init),
+  __MPN(copyd_init),
+  __MPN(copyi_init),
+  __MPN(divexact_1_init),
+  __MPN(divexact_by3c_init),
+  __MPN(divrem_1_init),
+  __MPN(gcd_1_init),
+  __MPN(lshift_init),
+  __MPN(mod_1_init),
+  __MPN(mod_34lsub1_init),
+  __MPN(modexact_1c_odd_init),
+  __MPN(mul_1_init),
+  __MPN(mul_basecase_init),
+  __MPN(preinv_divrem_1_init),
+  __MPN(preinv_mod_1_init),
+  __MPN(rshift_init),
+  __MPN(sqr_basecase_init),
+  __MPN(sub_n_init),
+  __MPN(submul_1_init),
+  0
+};
+
+
+/* The following setups start with generic x86, then overwrite with
+   specifics for a chip, and higher versions of that chip.
+
+   The arrangement of the setups here will normally be the same as the $path
+   selections in configure.in for the respective chips.
+
+   This code is reentrant and thread safe.  We always calculate the same
+   decided_cpuvec, so if two copies of the code are running it doesn't
+   matter which completes first, both write the same to __gmpn_cpuvec.
+
+   We need to go via decided_cpuvec because if one thread has completed
+   __gmpn_cpuvec then it may be making use of the threshold values in that
+   vector.  If another thread is still running __gmpn_cpuvec_init then we
+   don't want it to write different values to those fields since some of the
+   asm routines only operate correctly up to their own defined threshold,
+   not an arbitrary value.  */
+
+void
+__gmpn_cpuvec_init (void)
+{
+  struct cpuvec_t  decided_cpuvec;
+  char vendor_string[13];
+  char dummy_string[12];
+  long fms;
+  int family, model;
+
+  TRACE (printf ("__gmpn_cpuvec_init:\n"));
+
+  memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
+
+  CPUVEC_SETUP_x86_64;
+  CPUVEC_SETUP_fat;
+
+  __gmpn_cpuid (vendor_string, 0);
+  vendor_string[12] = 0;
+
+  fms = __gmpn_cpuid (dummy_string, 1);
+  family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
+  model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+
+  /*********************************************************/
+  /*** WARNING: keep this list in sync with config.guess ***/
+  /*********************************************************/
+  if (strcmp (vendor_string, "GenuineIntel") == 0)
+    {
+      switch (family)
+       {
+       case 4:
+       case 5:
+         abort ();             /* 32-bit processors */
+
+       case 6:
+         switch (model)
+           {
+           case 0x00:
+           case 0x01:
+           case 0x02:
+           case 0x03:
+           case 0x04:
+           case 0x05:
+           case 0x06:
+           case 0x07:
+           case 0x08:
+           case 0x09:          /* Banias */
+           case 0x0a:
+           case 0x0b:
+           case 0x0c:
+           case 0x0d:          /* Dothan */
+           case 0x0e:          /* Yonah */
+             abort ();         /* 32-bit processors */
+
+           case 0x0f:          /* Conroe Merom Kentsfield Allendale */
+           case 0x10:
+           case 0x11:
+           case 0x12:
+           case 0x13:
+           case 0x14:
+           case 0x15:
+           case 0x16:
+           case 0x17:          /* PNR Wolfdale Yorkfield */
+           case 0x18:
+           case 0x19:
+           case 0x1d:          /* PNR Dunnington */
+             CPUVEC_SETUP_core2;
+             break;
+
+           case 0x1c:          /* Silverthorne */
+           case 0x26:          /* Lincroft */
+           case 0x27:          /* Saltwell */
+             CPUVEC_SETUP_atom;
+             break;
+
+           case 0x1a:          /* NHM Gainestown */
+           case 0x1b:
+           case 0x1e:          /* NHM Lynnfield/Jasper */
+           case 0x1f:
+           case 0x20:
+           case 0x21:
+           case 0x22:
+           case 0x23:
+           case 0x24:
+           case 0x25:          /* WSM Clarkdale/Arrandale */
+           case 0x28:
+           case 0x29:
+           case 0x2b:
+           case 0x2c:          /* WSM Gulftown */
+           case 0x2e:          /* NHM Beckton */
+           case 0x2f:          /* WSM Eagleton */
+             CPUVEC_SETUP_core2;
+             CPUVEC_SETUP_coreinhm;
+             break;
+
+           case 0x2a:          /* SB */
+           case 0x2d:          /* SBC-EP */
+             CPUVEC_SETUP_core2;
+             CPUVEC_SETUP_coreisbr;
+             break;
+           }
+         break;
+
+       case 15:
+         CPUVEC_SETUP_pentium4;
+         break;
+       }
+    }
+  else if (strcmp (vendor_string, "AuthenticAMD") == 0)
+    {
+      switch (family)
+       {
+       case 5:
+       case 6:
+         abort ();
+
+       case 15:                /* k8 */
+       case 16:                /* k10 */
+         /* CPUVEC_SETUP_athlon */
+         break;
+       }
+    }
+  else if (strcmp (vendor_string, "CentaurHauls") == 0)
+    {
+      switch (family)
+       {
+       case 5:
+         abort ();             /* 32-bit processors */
+
+       case 6:
+         if (model < 15)
+           abort ();           /* 32-bit processors */
+
+         CPUVEC_SETUP_nano;
+         break;
+       }
+    }
+
+  /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
+     Instead default to the plain versions from whichever CPU we detected.
+     The function arguments are compatible, no need for any glue code.  */
+  if (decided_cpuvec.preinv_divrem_1 == NULL)
+    decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
+  if (decided_cpuvec.preinv_mod_1 == NULL)
+    decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
+
+  ASSERT_CPUVEC (decided_cpuvec);
+  CPUVEC_INSTALL (decided_cpuvec);
+
+  /* Set this once the threshold fields are ready.
+     Use volatile to prevent it getting moved.  */
+  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
+}
diff --git a/mpn/x86_64/fat/fat_entry.asm b/mpn/x86_64/fat/fat_entry.asm

new file mode 100644 (file)

index 0000000..db64440
--- /dev/null
+++ b/mpn/x86_64/fat/fat_entry.asm
@@ -0,0 +1,175 @@
+dnl  x86 fat binary entrypoints.
+
+dnl  Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
+dnl  Torbjorn Granlund (port to x86_64)
+
+dnl  Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+dnl  Forcibly disable profiling.
+dnl
+dnl  The entrypoints and inits are small enough not to worry about, the real
+dnl  routines arrived at will have any profiling.  Also, the way the code
+dnl  here ends with a jump means we won't work properly with the
+dnl  "instrument" profiling scheme anyway.
+
+define(`WANT_PROFILING',no)
+
+
+dnl  We define PIC_OR_DARWIN as a helper symbol, the use it for suppressing
+dnl  normal, fast call code, since that triggers problems on darwin.
+dnl
+dnl  FIXME: There might be a more elegant solution, adding less overhead.
+
+ifdef(`DARWIN',
+`define(`PIC_OR_DARWIN')')
+ifdef(`PIC',
+`define(`PIC_OR_DARWIN')')
+
+
+       TEXT
+
+
+dnl  Usage: FAT_ENTRY(name, offset)
+dnl
+dnl  Emit a fat binary entrypoint function of the given name.  This is the
+dnl  normal entry for applications, eg. __gmpn_add_n.
+dnl
+dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
+dnl  the given "offset" (in bytes).
+dnl
+dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
+dnl  fine for all x86s.
+dnl
+dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
+dnl  ensure at least the first two instructions don't cross a cache line
+dnl  boundary.
+dnl
+dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
+dnl  grepping in configure, stopping that code trying to eval something with
+dnl  $1 in it.
+
+define(FAT_ENTRY,
+m4_assert_numargs(2)
+`      ALIGN(ifdef(`PIC',16,8))
+`'PROLOGUE($1)
+ifdef(`PIC_OR_DARWIN',
+`      LEA(    GSYM_PREFIX`'__gmpn_cpuvec, %rax)
+       jmp     *$2(%rax)
+',`dnl non-PIC
+       jmp     *GSYM_PREFIX`'__gmpn_cpuvec+$2
+')
+EPILOGUE()
+')
+
+
+dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
+dnl
+
+define(`CPUVEC_offset',0)
+foreach(i,
+`FAT_ENTRY(MPN(i),CPUVEC_offset)
+define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
+CPUVEC_FUNCS_LIST)
+
+
+dnl  Usage: FAT_INIT(name, offset)
+dnl
+dnl  Emit a fat binary initializer function of the given name.  These
+dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
+dnl
+dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
+dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
+dnl  __gmpn_cpuvec_init will have stored the address of the selected
+dnl  implementation there.
+dnl
+dnl  Only one of these routines will be executed, and only once, since after
+dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
+dnl  need for anything special here, just something small and simple.  To
+dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
+dnl  with the offset in %al.  %al is used since the movb instruction is 2
+dnl  bytes where %eax would be 4.
+dnl
+dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
+dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
+dnl  something with $1 in it.
+dnl
+dnl  We need to preserve parameter registers over the __gmpn_cpuvec_init call
+
+define(FAT_INIT,
+m4_assert_numargs(2)
+`PROLOGUE($1)
+       mov     $`'$2, %al
+       jmp     L(fat_init)
+EPILOGUE()
+')
+
+L(fat_init):
+       C al    __gmpn_cpuvec byte offset
+
+       movzbl  %al, %eax
+       push    %rdi
+       push    %rsi
+       push    %rdx
+       push    %rcx
+       push    %r8
+       push    %r9
+       push    %rax
+       CALL(   __gmpn_cpuvec_init)
+       pop     %rax
+       pop     %r9
+       pop     %r8
+       pop     %rcx
+       pop     %rdx
+       pop     %rsi
+       pop     %rdi
+ifdef(`PIC_OR_DARWIN',`
+       LEA(    GSYM_PREFIX`'__gmpn_cpuvec, %r10)
+       jmp     *(%r10,%rax)
+',`dnl non-PIC
+       jmp     *GSYM_PREFIX`'__gmpn_cpuvec(%rax)
+')
+
+dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
+dnl
+
+define(`CPUVEC_offset',0)
+foreach(i,
+`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
+define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
+CPUVEC_FUNCS_LIST)
+
+
+
+C long __gmpn_cpuid (char dst[12], int id);
+C
+C This is called only once, so just something simple and compact is fine.
+
+
+PROLOGUE(__gmpn_cpuid)
+       mov     %rbx, %r8
+       mov     %esi, %eax
+       cpuid
+       mov     %ebx, (%rdi)
+       mov     %edx, 4(%rdi)
+       mov     %ecx, 8(%rdi)
+       mov     %r8, %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fat/gcd_1.c b/mpn/x86_64/fat/gcd_1.c

new file mode 100644 (file)

index 0000000..5bd0006
--- /dev/null
+++ b/mpn/x86_64/fat/gcd_1.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_gcd_1.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/gcd_1.c"
diff --git a/mpn/x86_64/fat/gmp-mparam.h b/mpn/x86_64/fat/gmp-mparam.h

new file mode 100644 (file)

index 0000000..6e744c0
--- /dev/null
+++ b/mpn/x86_64/fat/gmp-mparam.h
@@ -0,0 +1,59 @@
+/* Fat binary x86_64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+/* mpn_divexact_1 is faster than mpn_divrem_1 at all sizes.  The only time
+   this might not be true currently is for actual 80386 and 80486 chips,
+   where mpn/x86/dive_1.asm might be slower than mpn/x86/divrem_1.asm, but
+   that's not worth worrying about.  */
+#define DIVEXACT_1_THRESHOLD  0
+
+/* Only some of the x86s have an mpn_preinv_divrem_1, but we set
+   USE_PREINV_DIVREM_1 so that all callers use it, and then let the
+   __gmpn_cpuvec pointer go to plain mpn_divrem_1 if there's not an actual
+   preinv.  */
+#define USE_PREINV_DIVREM_1   1
+
+/* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
+   for mpn_sqr to call the latter.  */
+#define SQR_BASECASE_THRESHOLD 0
+
+/* Sensible fallbacks for these, when not taken from a cpu-specific
+   gmp-mparam.h.  */
+#define MUL_TOOM22_THRESHOLD      20
+#define MUL_TOOM33_THRESHOLD     130
+#define SQR_TOOM2_THRESHOLD       30
+#define SQR_TOOM3_THRESHOLD      200
+
+/* These are values more or less in the middle of what the typical x86 chips
+   come out as.  For a fat binary it's necessary to have values for these,
+   since the defaults for MUL_FFT_TABLE and SQR_FFT_TABLE otherwise come out
+   as non-constant array initializers.  FIXME: Perhaps these should be done
+   in the cpuvec structure like other thresholds.  */
+#define MUL_FFT_TABLE  { 464, 928, 1920, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          400
+#define MUL_FFT_THRESHOLD              2000
+
+#define SQR_FFT_TABLE  { 528, 1184, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          500
+#define SQR_FFT_THRESHOLD              3000
diff --git a/mpn/x86_64/fat/mod_1.c b/mpn/x86_64/fat/mod_1.c

new file mode 100644 (file)

index 0000000..a79359d
--- /dev/null
+++ b/mpn/x86_64/fat/mod_1.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_mod_1.
+
+Copyright 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/mod_1.c"
diff --git a/mpn/x86_64/fat/mode1o.c b/mpn/x86_64/fat/mode1o.c

new file mode 100644 (file)

index 0000000..a5244ca
--- /dev/null
+++ b/mpn/x86_64/fat/mode1o.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_modexact_1c_odd.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/mode1o.c"
diff --git a/mpn/x86_64/gcd_1.asm b/mpn/x86_64/gcd_1.asm

new file mode 100644 (file)

index 0000000..4fe9e17
--- /dev/null
+++ b/mpn/x86_64/gcd_1.asm
@@ -0,0 +1,130 @@
+dnl  AMD64 mpn_gcd_1 -- mpn by 1 gcd.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K8: 6.75 cycles/bit (approx)  1x1 gcd
+C     10.0 cycles/limb          Nx1 reduction (modexact_1_odd)
+
+
+dnl  Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
+dnl  where x is the larger of the two.  See tune/README for more.
+dnl
+dnl  div at 80 cycles compared to the gcd at about 7 cycles/bitpair
+dnl  suggests 80/7*2=23
+
+deflit(DIV_THRESHOLD, 23)
+
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+
+deflit(MAXSHIFT, 6)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+DEF_OBJECT(ctz_table,64)
+       .byte   MAXSHIFT
+forloop(i,1,MASK,
+`      .byte   m4_count_trailing_zeros(i)
+')
+END_OBJECT(ctz_table)
+
+C mp_limb_t mpn_gcd_1 (mp_srcptr up, mp_size_t n, mp_limb_t vlimb);
+
+
+C INPUT PARAMETERS
+define(`up',    `%rdi')
+define(`n',     `%rsi')
+define(`vlimb', `%rdx')
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_gcd_1)
+       mov     (%rdi), %r8             C src low limb
+       or      %rdx, %r8               C x | y
+       mov     $-1, R32(%rcx)
+
+L(twos):
+       inc     R32(%rcx)
+       shr     %r8
+       jnc     L(twos)
+
+       shr     R8(%rcx), %rdx
+       mov     R32(%rcx), R32(%r8)     C common twos
+
+L(divide_strip_y):
+       shr     %rdx
+       jnc     L(divide_strip_y)
+       adc     %rdx, %rdx
+
+       push    %r8
+       push    %rdx
+       sub     $8, %rsp                C maintain ABI required rsp alignment
+
+       CALL(   mpn_modexact_1_odd)
+
+       add     $8, %rsp
+       pop     %rdx
+       pop     %r8
+
+       test    %rax, %rax
+
+       mov     %rax, %rcx
+       jnz     L(strip_x)
+
+       mov     %rdx, %rax
+       jmp     L(done)
+
+L(strip_x):
+       LEA(    ctz_table, %r9)
+       jmp     L(strip_x_top)
+
+       ALIGN(16)
+L(top):
+       cmovc   %r10, %rcx              C if x-y gave carry, use x,y-x  0
+       cmovc   %rax, %rdx              C                               0
+
+L(strip_x_top):
+       mov     %rcx, %rax              C                               1
+       and     $MASK, R32(%rcx)        C                               1
+
+       mov     (%r9,%rcx), R8(%rcx)    C                               1
+
+       shr     R8(%rcx), %rax          C                               4
+       cmp     $MAXSHIFT, R8(%rcx)     C                               4
+
+       mov     %rax, %rcx              C                               5
+       mov     %rdx, %r10              C                               5
+       je      L(strip_x_top)          C                               5
+
+       sub     %rax, %r10              C                               6
+       sub     %rdx, %rcx              C                               6
+       jnz     L(top)                  C                               6
+
+L(done):
+       mov     %r8, %rcx
+       shl     R8(%rcx), %rax
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86_64/gmp-mparam.h b/mpn/x86_64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..1c6988c
--- /dev/null
+++ b/mpn/x86_64/gmp-mparam.h
@@ -0,0 +1,197 @@
+/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        11
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           19
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               232
+#define MUL_TOOM6H_THRESHOLD               369
+#define MUL_TOOM8H_THRESHOLD               478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     160
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     187
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 32
+#define SQR_TOOM3_THRESHOLD                113
+#define SQR_TOOM4_THRESHOLD                327
+#define SQR_TOOM6_THRESHOLD                446
+#define SQR_TOOM8_THRESHOLD                597
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             570  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     25, 8}, {     13, 7}, {     29, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     23, 7}, {     47, 8}, {     25, 7}, {     51, 8}, \
+    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     55,10}, {     15, 9}, {     43,10}, {     23, 9}, \
+    {     55,10}, {     31, 9}, {     63, 5}, {   1023, 4}, \
+    {   2431, 5}, {   1279, 6}, {    671, 7}, {    367, 8}, \
+    {    189, 9}, {     95, 8}, {    195, 9}, {    111,11}, \
+    {     31, 9}, {    131,10}, {     71, 9}, {    155,10}, \
+    {     79, 9}, {    159,10}, {     87,11}, {     47,10}, \
+    {    111,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    143,10}, {    287,11}, {    159,10}, \
+    {    319,11}, {    175,12}, {     95,11}, {    207,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    543,11}, \
+    {    287,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
+    {    223,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,12}, {    447,14}, \
+    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
+    {    607,11}, {   1215,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    735,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    799,11}, {   1599,12}, {    831,13}, \
+    {    447,12}, {    895,11}, {   1791,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
+    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1855,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,15}, \
+    {    767,14}, {   1535,13}, {   3071,14}, {   1791,16}, \
+    {    511,15}, {   1023,14}, {   2431,15}, {   1279,14}, \
+    {   2815,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
+    {   3583,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             460  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    460, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
+    {     29, 7}, {     15, 6}, {     31, 7}, {     29, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     25, 7}, {     51, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     55,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     43,10}, {     23, 9}, \
+    {     55,11}, {     15,10}, {     31, 9}, {     71,10}, \
+    {     39, 9}, {     83,10}, {     47, 6}, {    767, 4}, \
+    {   3263, 5}, {   1727, 4}, {   3455, 5}, {   1791, 6}, \
+    {    927, 7}, {    479, 6}, {    959, 7}, {    511, 8}, \
+    {    271, 9}, {    147,10}, {     87,11}, {     47,10}, \
+    {     95,12}, {     31,11}, {     63,10}, {    135,11}, \
+    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
+    {    703,11}, {   1407,12}, {    735,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
+    {   1663,13}, {    895,12}, {   1791,13}, {    959,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1855,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,14}, \
+    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
+    {   1535,13}, {   3071,14}, {   1791,16}, {    511,15}, \
+    {   1023,14}, {   2303,15}, {   1279,14}, {   2687,15}, \
+    {   1535,14}, {   3199,15}, {   1791,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD                 5248
+
+#define MULLO_BASECASE_THRESHOLD             0
+#define MULLO_DC_THRESHOLD                  35
+#define MULLO_MUL_N_THRESHOLD            14709
+
+#define DC_DIV_QR_THRESHOLD                 56
+#define DC_DIVAPPR_Q_THRESHOLD             220
+#define DC_BDIV_QR_THRESHOLD                52
+#define DC_BDIV_Q_THRESHOLD                152
+
+#define INV_MULMOD_BNM1_THRESHOLD           74
+#define INV_NEWTON_THRESHOLD               260
+#define INV_APPR_THRESHOLD                 220
+
+#define BINV_NEWTON_THRESHOLD              345
+#define REDC_1_TO_REDC_2_THRESHOLD           6
+#define REDC_2_TO_REDC_N_THRESHOLD          79
+
+#define MU_DIV_QR_THRESHOLD               1787
+#define MU_DIVAPPR_Q_THRESHOLD            1787
+#define MUPI_DIV_QR_THRESHOLD              126
+#define MU_BDIV_QR_THRESHOLD              1620
+#define MU_BDIV_Q_THRESHOLD               1787
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     139
+#define GCD_DC_THRESHOLD                   501
+#define GCDEXT_DC_THRESHOLD                474
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                17
+#define GET_STR_PRECOMPUTE_THRESHOLD        23
+#define SET_STR_DC_THRESHOLD               266
+#define SET_STR_PRECOMPUTE_THRESHOLD      1648
diff --git a/mpn/x86_64/invert_limb.asm b/mpn/x86_64/invert_limb.asm

new file mode 100644 (file)

index 0000000..8dcfae0
--- /dev/null
+++ b/mpn/x86_64/invert_limb.asm
@@ -0,0 +1,133 @@
+dnl  AMD64 mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+
+dnl  Copyright 2004, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb (approx)       div
+C K8,K9:        48                      71
+C K10:          48                      77
+C P4:          135                     161
+C P6 core2:     69                     116
+C P6 corei7:    55                      89
+C P6 atom:     129                     191
+
+C rax rcx rdx rdi rsi r8
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_invert_limb)              C                       Kn      C2      Ci
+       mov     %rdi, %rax              C                        0       0       0
+       shr     $55, %rax               C                        1       1       1
+ifdef(`PIC',`
+ifdef(`DARWIN',`
+       mov     approx_tab@GOTPCREL(%rip), %r8
+       add     $-512, %r8
+',`
+       lea     -512+approx_tab(%rip), %r8
+')',`
+       movabs  $-512+approx_tab, %r8
+')
+       movzwl  (%r8,%rax,2), R32(%rcx) C       %rcx = v0
+
+       C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
+       mov     %rdi, %rsi              C                        0       0       0
+       mov     R32(%rcx), R32(%rax)    C                        4       5       5
+       imul    R32(%rcx), R32(%rcx)    C                        4       5       5
+       shr     $24, %rsi               C                        1       1       1
+       inc     %rsi                    C       %rsi = d40
+       imul    %rsi, %rcx              C                        8      10       8
+       shr     $40, %rcx               C                       12      15      11
+       sal     $11, R32(%rax)          C                        5       6       6
+       dec     R32(%rax)
+       sub     R32(%rcx), R32(%rax)    C       %rax = v1
+
+       C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47
+       mov     $0x1000000000000000, %rcx
+       imul    %rax, %rsi              C                       14      17      13
+       sub     %rsi, %rcx
+       imul    %rax, %rcx
+       sal     $13, %rax
+       shr     $47, %rcx
+       add     %rax, %rcx              C       %rcx = v2
+
+       C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + (v2>>1) & mask) >> 65
+       mov     %rdi, %rsi              C                        0       0       0
+       shr     $1, %rsi                C d/2
+       sbb     %rax, %rax              C -d0 = -(d mod 2)
+       sub     %rax, %rsi              C d63 = ceil(d/2)
+       imul    %rcx, %rsi              C v2 * d63
+       and     %rcx, %rax              C v2 * d0
+       shr     $1, %rax                C (v2>>1) * d0
+       sub     %rsi, %rax              C (v2>>1) * d0 - v2 * d63
+       mul     %rcx
+       sal     $31, %rcx
+       shr     $1, %rdx
+       add     %rdx, %rcx              C       %rcx = v3
+
+       mov     %rdi, %rax
+       mul     %rcx
+       add     %rdi, %rax
+       mov     %rcx, %rax
+       adc     %rdi, %rdx
+       sub     %rdx, %rax
+
+       ret
+EPILOGUE()
+
+       RODATA
+       ALIGN(2)
+approx_tab:
+       .value  0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+       .value  0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+       .value  0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+       .value  0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+       .value  0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+       .value  0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+       .value  0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+       .value  0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+       .value  0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+       .value  0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+       .value  0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+       .value  0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+       .value  0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+       .value  0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+       .value  0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+       .value  0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+       .value  0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+       .value  0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+       .value  0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+       .value  0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+       .value  0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+       .value  0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+       .value  0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+       .value  0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+       .value  0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+       .value  0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+       .value  0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+       .value  0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+       .value  0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+       .value  0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+       .value  0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+       .value  0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
+ASM_END()
diff --git a/mpn/x86_64/logops_n.asm b/mpn/x86_64/logops_n.asm

new file mode 100644 (file)

index 0000000..1022b61
--- /dev/null
+++ b/mpn/x86_64/logops_n.asm
@@ -0,0 +1,221 @@
+dnl  AMD64 logops.
+
+dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        1.5
+C K10:          1.75-2 (fluctuating)
+C P4:           2.8/3.35/3.60 (variant1/variant2/variant3)
+C P6-15:        2.0
+
+ifdef(`OPERATION_and_n',`
+  define(`func',`mpn_and_n')
+  define(`VARIANT_1')
+  define(`LOGOP',`andq')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',`mpn_andn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`andq')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',`mpn_nand_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`andq')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',`mpn_ior_n')
+  define(`VARIANT_1')
+  define(`LOGOP',`orq')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',`mpn_iorn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`orq')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',`mpn_nior_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`orq')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',`mpn_xor_n')
+  define(`VARIANT_1')
+  define(`LOGOP',`xorq')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',`mpn_xnor_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`xorq')')
+
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+
+
+ASM_START()
+
+ifdef(`VARIANT_1',`
+       TEXT
+       ALIGN(32)
+PROLOGUE(func)
+       movq    (vp), %r8
+       movl    %ecx, %eax
+       leaq    (vp,n,8), vp
+       leaq    (up,n,8), up
+       leaq    (rp,n,8), rp
+       negq    n
+       andl    $3, %eax
+       je      L(b00)
+       cmpl    $2, %eax
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        LOGOP   (up,n,8), %r8
+       movq    %r8, (rp,n,8)
+       decq    n
+       jmp     L(e11)
+L(b10):        addq    $-2, n
+       jmp     L(e10)
+L(b01):        LOGOP   (up,n,8), %r8
+       movq    %r8, (rp,n,8)
+       incq    n
+       jz      L(ret)
+
+L(oop):        movq    (vp,n,8), %r8
+L(b00):        movq    8(vp,n,8), %r9
+       LOGOP   (up,n,8), %r8
+       LOGOP   8(up,n,8), %r9
+       nop
+       movq    %r8, (rp,n,8)
+       movq    %r9, 8(rp,n,8)
+L(e11):        movq    16(vp,n,8), %r8
+L(e10):        movq    24(vp,n,8), %r9
+       LOGOP   16(up,n,8), %r8
+       LOGOP   24(up,n,8), %r9
+       movq    %r8, 16(rp,n,8)
+       movq    %r9, 24(rp,n,8)
+       addq    $4, n
+       jnc     L(oop)
+L(ret):        ret
+EPILOGUE()
+')
+
+ifdef(`VARIANT_2',`
+       TEXT
+       ALIGN(32)
+PROLOGUE(func)
+       movq    (vp), %r8
+       notq    %r8
+       movl    %ecx, %eax
+       leaq    (vp,n,8), vp
+       leaq    (up,n,8), up
+       leaq    (rp,n,8), rp
+       negq    n
+       andl    $3, %eax
+       je      L(b00)
+       cmpl    $2, %eax
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        LOGOP   (up,n,8), %r8
+       movq    %r8, (rp,n,8)
+       decq    n
+       jmp     L(e11)
+L(b10):        addq    $-2, n
+       jmp     L(e10)
+       .byte   0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):        LOGOP   (up,n,8), %r8
+       movq    %r8, (rp,n,8)
+       incq    n
+       jz      L(ret)
+
+L(oop):        movq    (vp,n,8), %r8
+       notq    %r8
+L(b00):        movq    8(vp,n,8), %r9
+       notq    %r9
+       LOGOP   (up,n,8), %r8
+       LOGOP   8(up,n,8), %r9
+       movq    %r8, (rp,n,8)
+       movq    %r9, 8(rp,n,8)
+L(e11):        movq    16(vp,n,8), %r8
+       notq    %r8
+L(e10):        movq    24(vp,n,8), %r9
+       notq    %r9
+       LOGOP   16(up,n,8), %r8
+       LOGOP   24(up,n,8), %r9
+       movq    %r8, 16(rp,n,8)
+       movq    %r9, 24(rp,n,8)
+       addq    $4, n
+       jnc     L(oop)
+L(ret):        ret
+EPILOGUE()
+')
+
+ifdef(`VARIANT_3',`
+       TEXT
+       ALIGN(32)
+PROLOGUE(func)
+       movq    (vp), %r8
+       movl    %ecx, %eax
+       leaq    (vp,n,8), vp
+       leaq    (up,n,8), up
+       leaq    (rp,n,8), rp
+       negq    n
+       andl    $3, %eax
+       je      L(b00)
+       cmpl    $2, %eax
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        LOGOP   (up,n,8), %r8
+       notq    %r8
+       movq    %r8, (rp,n,8)
+       decq    n
+       jmp     L(e11)
+L(b10):        addq    $-2, n
+       jmp     L(e10)
+       .byte   0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):        LOGOP   (up,n,8), %r8
+       notq    %r8
+       movq    %r8, (rp,n,8)
+       incq    n
+       jz      L(ret)
+
+L(oop):        movq    (vp,n,8), %r8
+L(b00):        movq    8(vp,n,8), %r9
+       LOGOP   (up,n,8), %r8
+       notq    %r8
+       LOGOP   8(up,n,8), %r9
+       notq    %r9
+       movq    %r8, (rp,n,8)
+       movq    %r9, 8(rp,n,8)
+L(e11):        movq    16(vp,n,8), %r8
+L(e10):        movq    24(vp,n,8), %r9
+       LOGOP   16(up,n,8), %r8
+       notq    %r8
+       LOGOP   24(up,n,8), %r9
+       notq    %r9
+       movq    %r8, 16(rp,n,8)
+       movq    %r9, 24(rp,n,8)
+       addq    $4, n
+       jnc     L(oop)
+L(ret):        ret
+EPILOGUE()
+')
diff --git a/mpn/x86_64/lshift.asm b/mpn/x86_64/lshift.asm

new file mode 100644 (file)

index 0000000..d59d825
--- /dev/null
+++ b/mpn/x86_64/lshift.asm
@@ -0,0 +1,224 @@
+dnl  AMD64 mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2003, 2005, 2007, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb   cycles/limb cnt=1
+C K8,K9:        2.375           1.375
+C K10:          2.375           1.375
+C P4:           8              10.5
+C P6-15 (Core2): 2.11           4.28
+C P6-28 (Atom):         5.75            3.5
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_lshift)
+       cmp     $1, R8(%rcx)
+       jne     L(gen)
+
+C For cnt=1 we want to work from lowest limb towards higher limbs.
+C Check for bad overlap (up=rp is OK!) up=1..rp+n-1 is bad.
+C FIXME: this could surely be done more cleverly.
+
+       mov    rp, %rax
+       sub    up, %rax
+       je     L(fwd)                   C rp = up
+       shr    $3, %rax
+       cmp    n, %rax
+       jb     L(gen)
+
+L(fwd):        mov     R32(n), R32(%rax)
+       shr     $2, n
+       je      L(e1)
+       and     $3, R32(%rax)
+
+       ALIGN(8)
+       nop
+       nop
+L(t1): mov     (up), %r8
+       mov     8(up), %r9
+       mov     16(up), %r10
+       mov     24(up), %r11
+       lea     32(up), up
+       adc     %r8, %r8
+       mov     %r8, (rp)
+       adc     %r9, %r9
+       mov     %r9, 8(rp)
+       adc     %r10, %r10
+       mov     %r10, 16(rp)
+       adc     %r11, %r11
+       mov     %r11, 24(rp)
+       lea     32(rp), rp
+       dec     n
+       jne     L(t1)
+
+       inc     R32(%rax)
+       dec     R32(%rax)
+       jne     L(n00)
+       adc     R32(%rax), R32(%rax)
+       ret
+L(e1): test    R32(%rax), R32(%rax)    C clear cy
+L(n00):        mov     (up), %r8
+       dec     R32(%rax)
+       jne     L(n01)
+       adc     %r8, %r8
+       mov     %r8, (rp)
+L(ret):        adc     R32(%rax), R32(%rax)
+       ret
+L(n01):        dec     R32(%rax)
+       mov     8(up), %r9
+       jne     L(n10)
+       adc     %r8, %r8
+       adc     %r9, %r9
+       mov     %r8, (rp)
+       mov     %r9, 8(rp)
+       adc     R32(%rax), R32(%rax)
+       ret
+L(n10):        mov     16(up), %r10
+       adc     %r8, %r8
+       adc     %r9, %r9
+       adc     %r10, %r10
+       mov     %r8, (rp)
+       mov     %r9, 8(rp)
+       mov     %r10, 16(rp)
+       adc     $-1, R32(%rax)
+       ret
+
+L(gen):        neg     R32(%rcx)               C put rsh count in cl
+       mov     -8(up,n,8), %rax
+       shr     R8(%rcx), %rax          C function return value
+
+       neg     R32(%rcx)               C put lsh count in cl
+       lea     1(n), R32(%r8)
+       and     $3, R32(%r8)
+       je      L(rlx)                  C jump for n = 3, 7, 11, ...
+
+       dec     R32(%r8)
+       jne     L(1)
+C      n = 4, 8, 12, ...
+       mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     %r10, -8(rp,n,8)
+       dec     n
+       jmp     L(rll)
+
+L(1):  dec     R32(%r8)
+       je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
+C      n = 2, 6, 10, 16, ...
+       mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     %r10, -8(rp,n,8)
+       dec     n
+       neg     R32(%rcx)               C put lsh count in cl
+L(1x):
+       cmp     $1, n
+       je      L(ast)
+       mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       mov     -16(up,n,8), %r11
+       shl     R8(%rcx), %r11
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       mov     -24(up,n,8), %r9
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       shr     R8(%rcx), %r9
+       or      %r9, %r11
+       mov     %r10, -8(rp,n,8)
+       mov     %r11, -16(rp,n,8)
+       sub     $2, n
+
+L(rll):        neg     R32(%rcx)               C put lsh count in cl
+L(rlx):        mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       mov     -16(up,n,8), %r11
+       shl     R8(%rcx), %r11
+
+       sub     $4, n                   C                                     4
+       jb      L(end)                  C                                     2
+       ALIGN(16)
+L(top):
+       C finish stuff from lsh block
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     16(up,n,8), %r8
+       mov     8(up,n,8), %r9
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       shr     R8(%rcx), %r9
+       or      %r9, %r11
+       mov     %r10, 24(rp,n,8)
+       mov     %r11, 16(rp,n,8)
+       C start two new rsh
+       mov     0(up,n,8), %r8
+       mov     -8(up,n,8), %r9
+       shr     R8(%rcx), %r8
+       shr     R8(%rcx), %r9
+
+       C finish stuff from rsh block
+       neg     R32(%rcx)               C put lsh count in cl
+       mov     8(up,n,8), %r10
+       mov     0(up,n,8), %r11
+       shl     R8(%rcx), %r10
+       or      %r10, %r8
+       shl     R8(%rcx), %r11
+       or      %r11, %r9
+       mov     %r8, 8(rp,n,8)
+       mov     %r9, 0(rp,n,8)
+       C start two new lsh
+       mov     -8(up,n,8), %r10
+       mov     -16(up,n,8), %r11
+       shl     R8(%rcx), %r10
+       shl     R8(%rcx), %r11
+
+       sub     $4, n
+       jae     L(top)                  C                                     2
+L(end):
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     8(up), %r8
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     (up), %r9
+       shr     R8(%rcx), %r9
+       or      %r9, %r11
+       mov     %r10, 16(rp)
+       mov     %r11, 8(rp)
+
+       neg     R32(%rcx)               C put lsh count in cl
+L(ast):        mov     (up), %r10
+       shl     R8(%rcx), %r10
+       mov     %r10, (rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/lshiftc.asm b/mpn/x86_64/lshiftc.asm

new file mode 100644 (file)

index 0000000..2423529
--- /dev/null
+++ b/mpn/x86_64/lshiftc.asm
@@ -0,0 +1,164 @@
+dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
+
+dnl  Copyright 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.75
+C K10:          2.75
+C P4:           ?
+C P6-15 (Core2): ?
+C P6-28 (Atom):         ?
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_lshiftc)
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -8(up,n,8), %rax
+       shr     R8(%rcx), %rax          C function return value
+
+       neg     R32(%rcx)               C put lsh count in cl
+       lea     1(n), R32(%r8)
+       and     $3, R32(%r8)
+       je      L(rlx)                  C jump for n = 3, 7, 11, ...
+
+       dec     R32(%r8)
+       jne     L(1)
+C      n = 4, 8, 12, ...
+       mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       not     %r10
+       mov     %r10, -8(rp,n,8)
+       dec     n
+       jmp     L(rll)
+
+L(1):  dec     R32(%r8)
+       je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
+C      n = 2, 6, 10, 16, ...
+       mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       not     %r10
+       mov     %r10, -8(rp,n,8)
+       dec     n
+       neg     R32(%rcx)               C put lsh count in cl
+L(1x):
+       cmp     $1, n
+       je      L(ast)
+       mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       mov     -16(up,n,8), %r11
+       shl     R8(%rcx), %r11
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       mov     -24(up,n,8), %r9
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       shr     R8(%rcx), %r9
+       or      %r9, %r11
+       not     %r10
+       not     %r11
+       mov     %r10, -8(rp,n,8)
+       mov     %r11, -16(rp,n,8)
+       sub     $2, n
+
+L(rll):        neg     R32(%rcx)               C put lsh count in cl
+L(rlx):        mov     -8(up,n,8), %r10
+       shl     R8(%rcx), %r10
+       mov     -16(up,n,8), %r11
+       shl     R8(%rcx), %r11
+
+       sub     $4, n                   C                                     4
+       jb      L(end)                  C                                     2
+       ALIGN(16)
+L(top):
+       C finish stuff from lsh block
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     16(up,n,8), %r8
+       mov     8(up,n,8), %r9
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       shr     R8(%rcx), %r9
+       or      %r9, %r11
+       not     %r10
+       not     %r11
+       mov     %r10, 24(rp,n,8)
+       mov     %r11, 16(rp,n,8)
+       C start two new rsh
+       mov     0(up,n,8), %r8
+       mov     -8(up,n,8), %r9
+       shr     R8(%rcx), %r8
+       shr     R8(%rcx), %r9
+
+       C finish stuff from rsh block
+       neg     R32(%rcx)               C put lsh count in cl
+       mov     8(up,n,8), %r10
+       mov     0(up,n,8), %r11
+       shl     R8(%rcx), %r10
+       or      %r10, %r8
+       shl     R8(%rcx), %r11
+       or      %r11, %r9
+       not     %r8
+       not     %r9
+       mov     %r8, 8(rp,n,8)
+       mov     %r9, 0(rp,n,8)
+       C start two new lsh
+       mov     -8(up,n,8), %r10
+       mov     -16(up,n,8), %r11
+       shl     R8(%rcx), %r10
+       shl     R8(%rcx), %r11
+
+       sub     $4, n
+       jae     L(top)                  C                                     2
+L(end):
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     16(up,n,8), %r8
+       shr     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     8(up,n,8), %r9
+       shr     R8(%rcx), %r9
+       or      %r9, %r11
+       not     %r10
+       not     %r11
+       mov     %r10, 24(rp,n,8)
+       mov     %r11, 16(rp,n,8)
+
+       neg     R32(%rcx)               C put lsh count in cl
+L(ast):        mov     (up), %r10
+       shl     R8(%rcx), %r10
+       not     %r10
+       mov     %r10, (rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/lshsub_n.asm b/mpn/x86_64/lshsub_n.asm

new file mode 100644 (file)

index 0000000..6ae7c36
--- /dev/null
+++ b/mpn/x86_64/lshsub_n.asm
@@ -0,0 +1,152 @@
+dnl  AMD64 mpn_lshsub_n.  R = 2^k(U - V).
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        3.15   (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C K10:          3.15   (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C P4:          16.5
+C P6-15:        4.35
+
+C This was written quickly and not optimized at all, but it runs very well on
+C K8.  But perhaps one could get under 3 c/l.  Ideas:
+C   1) Use indexing to save the 3 LEA
+C   2) Write reasonable feed-in code
+C   3) Be more clever about register usage
+C   4) Unroll more, handling CL negation, carry save/restore cost much now
+C   5) Reschedule
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cnt',  `%r8')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_lshsub_n)
+
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       push    %rbx
+
+       mov     n, %rax
+       xor     %ebx, %ebx              C clear carry save register
+       mov     %r8d, %ecx              C shift count
+       xor     %r15d, %r15d            C limb carry
+
+       mov     %eax, %r11d
+       and     $3, %r11d
+       je      L(4)
+       sub     $1, %r11d
+
+L(oopette):
+       add     %ebx, %ebx              C restore carry flag
+       mov     0(up), %r8
+       lea     8(up), up
+       sbb     0(vp), %r8
+       mov     %r8, %r12
+       sbb     %ebx, %ebx              C save carry flag
+       shl     %cl, %r8
+       or      %r15, %r8
+       mov     %r12, %r15
+       lea     8(vp), vp
+       neg     %cl
+       shr     %cl, %r15
+       neg     %cl
+       mov     %r8, 0(rp)
+       lea     8(rp), rp
+       sub     $1, %r11d
+       jnc     L(oopette)
+
+L(4):
+       sub     $4, %rax
+       jc      L(end)
+
+       ALIGN(16)
+L(oop):
+       add     %ebx, %ebx              C restore carry flag
+
+       mov     0(up), %r8
+       mov     8(up), %r9
+       mov     16(up), %r10
+       mov     24(up), %r11
+
+       lea     32(up), up
+
+       sbb     0(vp), %r8
+       mov     %r8, %r12
+       sbb     8(vp), %r9
+       mov     %r9, %r13
+       sbb     16(vp), %r10
+       mov     %r10, %r14
+       sbb     24(vp), %r11
+
+       sbb     %ebx, %ebx              C save carry flag
+
+       shl     %cl, %r8
+       shl     %cl, %r9
+       shl     %cl, %r10
+       or      %r15, %r8
+       mov     %r11, %r15
+       shl     %cl, %r11
+
+       lea     32(vp), vp
+
+       neg     %cl
+
+       shr     %cl, %r12
+       shr     %cl, %r13
+       shr     %cl, %r14
+       shr     %cl, %r15               C used next loop
+
+       or      %r12, %r9
+       or      %r13, %r10
+       or      %r14, %r11
+
+       neg     %cl
+
+       mov     %r8, 0(rp)
+       mov     %r9, 8(rp)
+       mov     %r10, 16(rp)
+       mov     %r11, 24(rp)
+
+       lea     32(rp), rp
+
+       sub     $4, %rax
+       jnc     L(oop)
+L(end):
+       neg     %ebx
+       shl     %cl, %rbx
+       adc     %r15, %rbx
+       mov     %rbx, %rax
+       pop     %rbx
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm

new file mode 100644 (file)

index 0000000..bb8a6b2
--- /dev/null
+++ b/mpn/x86_64/mod_1_4.asm
@@ -0,0 +1,235 @@
+dnl  AMD64 mpn_mod_1s_4p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        3.0
+C K10:          3.0
+C P4:          14.5
+C P6 core2:     5.0
+C P6 corei7:    4.3
+C P6 atom:     25.0
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p)
+       push    %r14
+       push    %r13
+       push    %r12
+       push    %rbp
+       push    %rbx
+
+       mov     %rdx, -16(%rsp)
+       mov     %rcx, %r14
+       mov     16(%rcx), %r11
+       mov     24(%rcx), %rbx
+       mov     32(%rcx), %rbp
+       mov     40(%rcx), %r13
+       mov     48(%rcx), %r12
+       xor     R32(%r8), R32(%r8)
+       mov     R32(%rsi), R32(%rdx)
+       and     $3, R32(%rdx)
+       je      L(b0)
+       cmp     $2, R32(%rdx)
+       jc      L(b1)
+       je      L(b2)
+
+L(b3): lea     -24(%rdi,%rsi,8), %rdi
+       mov     8(%rdi), %rax
+       mul     %r11
+       mov     (%rdi), %r9
+       add     %rax, %r9
+       adc     %rdx, %r8
+       mov     16(%rdi), %rax
+       mul     %rbx
+       jmp     L(m0)
+
+       ALIGN(8)
+L(b0): lea     -32(%rdi,%rsi,8), %rdi
+       mov     8(%rdi), %rax
+       mul     %r11
+       mov     (%rdi), %r9
+       add     %rax, %r9
+       adc     %rdx, %r8
+       mov     16(%rdi), %rax
+       mul     %rbx
+       add     %rax, %r9
+       adc     %rdx, %r8
+       mov     24(%rdi), %rax
+       mul     %rbp
+       jmp     L(m0)
+
+       ALIGN(8)
+L(b1): lea     -8(%rdi,%rsi,8), %rdi
+       mov     (%rdi), %r9
+       jmp     L(m1)
+
+       ALIGN(8)
+L(b2): lea     -16(%rdi,%rsi,8), %rdi
+       mov     8(%rdi), %rax
+       mul     %r11
+       mov     (%rdi), %r9
+       jmp     L(m0)
+
+       ALIGN(16)
+L(top):        mov     -24(%rdi), %rax
+       mov     -32(%rdi), %r10
+       mul     %r11
+       add     %rax, %r10
+       mov     -16(%rdi), %rax
+       mov     %rdx, %rcx
+       adc     $0, %rcx
+       mul     %rbx
+       add     %rax, %r10
+       mov     -8(%rdi), %rax
+       adc     %rdx, %rcx
+       sub     $32, %rdi
+       mul     %rbp
+       add     %rax, %r10
+       mov     %r9, %rax
+       adc     %rdx, %rcx
+       mul     %r13
+       add     %rax, %r10
+       mov     %r8, %rax
+       adc     %rdx, %rcx
+       mul     %r12
+       mov     %r10, %r9
+       mov     %rcx, %r8
+L(m0): add     %rax, %r9
+       adc     %rdx, %r8
+L(m1): sub     $4, %rsi
+       ja      L(top)
+
+L(end):        mov     8(%r14), R32(%rsi)
+       mov     %r8, %rax
+       mul     %r11
+       mov     %rax, %r8
+       add     %r9, %r8
+       adc     $0, %rdx
+       xor     R32(%rcx), R32(%rcx)
+       sub     R32(%rsi), R32(%rcx)
+       mov     %r8, %rdi
+       shr     R8(%rcx), %rdi
+       mov     R32(%rsi), R32(%rcx)
+       sal     R8(%rcx), %rdx
+       or      %rdx, %rdi
+       mov     %rdi, %rax
+       mulq    (%r14)
+       mov     -16(%rsp), %rbx
+       mov     %rax, %r9
+       sal     R8(%rcx), %r8
+       inc     %rdi
+       add     %r8, %r9
+       adc     %rdi, %rdx
+       imul    %rbx, %rdx
+       sub     %rdx, %r8
+       lea     (%r8,%rbx), %rax
+       cmp     %r8, %r9
+       cmovb   %rax, %r8
+       mov     %r8, %rax
+       sub     %rbx, %rax
+       cmovb   %r8, %rax
+       shr     R8(%rcx), %rax
+       pop     %rbx
+       pop     %rbp
+       pop     %r12
+       pop     %r13
+       pop     %r14
+       ret
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_4p_cps)
+       push    %r12
+       bsr     %rsi, %rcx
+       push    %rbp
+       xor     $63, R32(%rcx)
+       mov     %rsi, %rbp
+       mov     R32(%rcx), R32(%r12)
+       sal     R8(%rcx), %rbp
+       push    %rbx
+       mov     %rdi, %rbx
+       mov     %rbp, %rdi
+       CALL(   mpn_invert_limb)
+       mov     R32(%r12), R32(%rcx)
+       mov     $1, R32(%r10)
+       sal     R8(%rcx), %r10
+       mov     $64, R32(%rcx)
+       mov     %rax, %r9
+       sub     R32(%r12), R32(%rcx)
+       mov     %r9, (%rbx)
+       shr     R8(%rcx), %rax
+       mov     R32(%r12), R32(%rcx)
+       or      %rax, %r10
+       mov     %rbp, %rax
+       neg     %rax
+       imul    %rax, %r10
+       mov     %r10, %rax
+       mul     %r9
+       lea     1(%r10,%rdx), %r8
+       neg     %r8
+       imul    %rbp, %r8
+       cmp     %r8, %rax
+       lea     (%r8,%rbp), %rdx
+       cmovb   %rdx, %r8
+       mov     %r8, %rax
+       mul     %r9
+       lea     1(%r8,%rdx), %rdi
+       neg     %rdi
+       imul    %rbp, %rdi
+       cmp     %rdi, %rax
+       lea     (%rdi,%rbp), %rdx
+       cmovb   %rdx, %rdi
+       mov     %rdi, %rax
+       mul     %r9
+       lea     1(%rdi,%rdx), %rsi
+       neg     %rsi
+       imul    %rbp, %rsi
+       cmp     %rsi, %rax
+       lea     (%rsi,%rbp), %rdx
+       cmovb   %rdx, %rsi
+       mov     %rsi, %rax
+       mul     %r9
+       lea     1(%rsi,%rdx), %rdx
+       neg     %rdx
+       imul    %rbp, %rdx
+       cmp     %rdx, %rax
+       lea     (%rdx,%rbp), %rbp
+       movslq  R32(%r12), %rax
+       cmovae  %rdx, %rbp
+       shr     R8(%rcx), %r10
+       shr     R8(%rcx), %r8
+       shr     R8(%rcx), %rbp
+       shr     R8(%rcx), %rdi
+       shr     R8(%rcx), %rsi
+       mov     %rbp, 48(%rbx)
+       mov     %rax, 8(%rbx)
+       mov     %r10, 16(%rbx)
+       mov     %r8, 24(%rbx)
+       mov     %rdi, 32(%rbx)
+       mov     %rsi, 40(%rbx)
+       pop     %rbx
+       pop     %rbp
+       pop     %r12
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mod_34lsub1.asm b/mpn/x86_64/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..318fb96
--- /dev/null
+++ b/mpn/x86_64/mod_34lsub1.asm
@@ -0,0 +1,165 @@
+dnl  AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        1.0
+C K10:          1.12
+C P4:           3.25
+C P6-15 (Core2): 1.5
+C P6-28 (Atom):         2.5
+
+
+C INPUT PARAMETERS
+C up   rdi
+C n    rsi
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Apply the movzwl tricks to the x86/k7 code
+C  * Review feed-in and wind-down code.  In particular, try to avoid adc and
+C    sbb to placate Pentium4.
+C  * More unrolling and/or index addressing could bring time to under 1 c/l
+C    for Athlon64, approaching 0.67 c/l seems possible.
+C  * There are recurrencies on the carry registers (r8, r9, r10) that might
+C    be the limiting factor for the Pentium4 speed.  Splitting these into 6
+C    registers would help.
+C  * For ultimate Athlon64 performance, a sequence like this might be best.
+C    It should reach 0.5 c/l (limited by L1 cache bandwidth).
+C
+C      add     (%rdi), %rax
+C      adc     8(%rdi), %rcx
+C      adc     16(%rdi), %rdx
+C      adc     $0, %r8
+C      add     24(%rdi), %rax
+C      adc     32(%rdi), %rcx
+C      adc     40(%rdi), %rdx
+C      adc     $0, %r8
+C      ...
+
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+
+       mov     $0x0000FFFFFFFFFFFF, %r11
+
+       sub     $2, %rsi
+       ja      L(gt2)
+
+       mov     (%rdi), %rax
+       nop
+       jb      L(1)
+
+       mov     8(%rdi), %rsi
+       mov     %rax, %rdx
+       shr     $48, %rax               C src[0] low
+
+       and     %r11, %rdx              C src[0] high
+       add     %rdx, %rax
+       mov     %esi, %edx
+
+       shr     $32, %rsi               C src[1] high
+       add     %rsi, %rax
+
+       shl     $16, %rdx               C src[1] low
+       add     %rdx, %rax
+
+L(1):  ret
+
+
+       ALIGN(16)
+L(gt2):        xor     %eax, %eax
+       xor     %ecx, %ecx
+       xor     %edx, %edx
+       xor     %r8, %r8
+       xor     %r9, %r9
+       xor     %r10, %r10
+
+L(top):        add     (%rdi), %rax
+       adc     $0, %r10
+       add     8(%rdi), %rcx
+       adc     $0, %r8
+       add     16(%rdi), %rdx
+       adc     $0, %r9
+
+       sub     $3,%rsi
+       jng     L(end)
+
+       add     24(%rdi), %rax
+       adc     $0, %r10
+       add     32(%rdi), %rcx
+       adc     $0, %r8
+       add     40(%rdi), %rdx
+       lea     48(%rdi), %rdi
+       adc     $0, %r9
+
+       sub     $3,%rsi
+       jg      L(top)
+
+
+       add     $-24, %rdi
+L(end):        add     %r9, %rax
+       adc     %r10, %rcx
+       adc     %r8, %rdx
+
+       inc     %rsi
+       mov     $0x1, %r10d
+       js      L(combine)
+
+       mov     $0x10000, %r10d
+       adc     24(%rdi), %rax
+       dec     %rsi
+       js      L(combine)
+
+       adc     32(%rdi), %rcx
+       mov     $0x100000000, %r10
+
+L(combine):
+       sbb     %rsi, %rsi              C carry
+       mov     %rax, %rdi              C 0mod3
+       shr     $48, %rax               C 0mod3 high
+
+       and     %r10, %rsi              C carry masked
+       and     %r11, %rdi              C 0mod3 low
+       mov     %ecx, %r10d             C 1mod3
+
+       add     %rsi, %rax              C apply carry
+       shr     $32, %rcx               C 1mod3 high
+
+       add     %rdi, %rax              C apply 0mod3 low
+       movzwl  %dx, %edi               C 2mod3
+       shl     $16, %r10               C 1mod3 low
+
+       add     %rcx, %rax              C apply 1mod3 high
+       shr     $16, %rdx               C 2mod3 high
+
+       add     %r10, %rax              C apply 1mod3 low
+       shl     $32, %rdi               C 2mod3 low
+
+       add     %rdx, %rax              C apply 2mod3 high
+       add     %rdi, %rax              C apply 2mod3 low
+
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mode1o.asm b/mpn/x86_64/mode1o.asm

new file mode 100644 (file)

index 0000000..ae5f83c
--- /dev/null
+++ b/mpn/x86_64/mode1o.asm
@@ -0,0 +1,179 @@
+dnl  AMD64 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:       10
+C K10:         10
+C P4:          33
+C P6 core2:    13
+C P6 corei7:   14.5
+C P6 Atom:     35
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C
+C The dependent chain in the main loop is
+C
+C                            cycles
+C      subq    %rdx, %rax      1
+C      imulq   %r9, %rax       4
+C      mulq    %r8             5
+C                            ----
+C       total                 10
+C
+C The movq load from src seems to need to be scheduled back before the jz to
+C achieve this speed, out-of-order execution apparently can't completely
+C hide the latency otherwise.
+C
+C The l=src[i]-cbit step is rotated back too, since that allows us to avoid
+C it for the first iteration (where there's no cbit).
+C
+C The code alignment used (32-byte) for the loop also seems necessary.
+C Without that the non-PIC case has adcq crossing the 0x60 offset,
+C apparently making it run at 11 cycles instead of 10.
+C
+C Not done:
+C
+C divq for size==1 was measured at about 79 cycles, compared to the inverse
+C at about 25 cycles (both including function call overheads), so that's not
+C used.
+C
+C Enhancements:
+C
+C For PIC, we shouldn't really need the GOT fetch for binvert_limb_table,
+C it'll be in rodata or text in libgmp.so and can be accessed directly %rip
+C relative.  This would be for small model only (something we don't
+C presently detect, but which is all that gcc 3.3.3 supports), since 8-byte
+C PC-relative relocations are apparently not available.  Some rough
+C experiments with binutils 2.13 looked worrylingly like it might come out
+C with an unwanted text segment relocation though, even with ".protected".
+
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_modexact_1_odd)
+
+       movl    $0, %ecx
+
+PROLOGUE(mpn_modexact_1c_odd)
+
+       C rdi   src
+       C rsi   size
+       C rdx   divisor
+       C rcx   carry
+
+       movq    %rdx, %r8               C d
+       shrl    %edx                    C d/2
+ifdef(`PIC',`
+       movq    binvert_limb_table@GOTPCREL(%rip), %r9
+',`
+       movabsq $binvert_limb_table, %r9
+')
+
+       andl    $127, %edx
+       movq    %rcx, %r10              C initial carry
+
+       movzbl  (%r9,%rdx), %edx        C inv 8 bits
+
+       movq    (%rdi), %rax            C src[0]
+       leaq    (%rdi,%rsi,8), %r11     C src end
+       movq    %r8, %rdi               C d, made available to imull
+
+       leal    (%rdx,%rdx), %ecx       C 2*inv
+       imull   %edx, %edx              C inv*inv
+
+       negq    %rsi                    C -size
+
+       imull   %edi, %edx              C inv*inv*d
+
+       subl    %edx, %ecx              C inv = 2*inv - inv*inv*d, 16 bits
+
+       leal    (%rcx,%rcx), %edx       C 2*inv
+       imull   %ecx, %ecx              C inv*inv
+
+       imull   %edi, %ecx              C inv*inv*d
+
+       subl    %ecx, %edx              C inv = 2*inv - inv*inv*d, 32 bits
+       xorl    %ecx, %ecx              C initial cbit
+
+       leaq    (%rdx,%rdx), %r9        C 2*inv
+       imulq   %rdx, %rdx              C inv*inv
+
+       imulq   %r8, %rdx               C inv*inv*d
+
+       subq    %rdx, %r9               C inv = 2*inv - inv*inv*d, 64 bits
+       movq    %r10, %rdx              C initial climb
+
+       ASSERT(e,`      C d*inv == 1 mod 2^64
+       movq    %r8, %r10
+       imulq   %r9, %r10
+       cmpq    $1, %r10')
+
+       incq    %rsi
+       jz      L(one)
+
+
+       ALIGN(16)
+L(top):
+       C rax   l = src[i]-cbit
+       C rcx   new cbit, 0 or 1
+       C rdx   climb, high of last product
+       C rsi   counter, limbs, negative
+       C rdi
+       C r8    divisor
+       C r9    inverse
+       C r11   src end ptr
+
+       subq    %rdx, %rax              C l = src[i]-cbit - climb
+
+       adcq    $0, %rcx                C more cbit
+       imulq   %r9, %rax               C q = l * inverse
+
+       mulq    %r8                     C climb = high (q * d)
+
+       movq    (%r11,%rsi,8), %rax     C src[i+1]
+       subq    %rcx, %rax              C next l = src[i+1] - cbit
+       setc    %cl                     C new cbit
+
+       incq    %rsi
+       jnz     L(top)
+
+
+L(one):
+       subq    %rdx, %rax              C l = src[i]-cbit - climb
+
+       adcq    $0, %rcx                C more cbit
+       imulq   %r9, %rax               C q = l * inverse
+
+       mulq    %r8                     C climb = high (q * d)
+
+       leaq    (%rcx,%rdx), %rax       C climb+cbit
+       ret
+
+EPILOGUE(mpn_modexact_1c_odd)
+EPILOGUE(mpn_modexact_1_odd)
diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm

new file mode 100644 (file)

index 0000000..a0c4599
--- /dev/null
+++ b/mpn/x86_64/mul_1.asm
@@ -0,0 +1,148 @@
+dnl  AMD64 mpn_mul_1.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.5
+C K10:          2.5
+C P4:           12.3
+C P6 core2:     4.0
+C P6 corei7:    3.8
+C Atom:                19.8
+
+C The inner loop of this code is the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO:
+C  * The inner loop is great, but the prologue and epilogue code was
+C    quickly written.  Tune it!
+
+C INPUT PARAMETERS
+define(`rp',    `%rdi')
+define(`up',    `%rsi')
+define(`n_param',`%rdx')
+define(`vl',    `%rcx')
+
+define(`n',    `%r11')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+       push    %rbx
+       mov     %r8, %r10
+       jmp     L(common)
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1)
+       push    %rbx
+       xor     %r10, %r10
+L(common):
+       mov     (up), %rax              C read first u limb early
+       mov     n_param, %rbx           C move away n from rdx, mul uses it
+       mul     vl
+       mov     %rbx, %r11
+
+       add     %r10, %rax
+       adc     $0, %rdx
+
+       and     $3, R32(%rbx)
+       jz      L(b0)
+       cmp     $2, R32(%rbx)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): dec     n
+       jne     L(gt1)
+       mov     %rax, (rp)
+       jmp     L(ret)
+L(gt1):        lea     8(up,n,8), up
+       lea     -8(rp,n,8), rp
+       neg     n
+       xor     %r10, %r10
+       xor     R32(%rbx), R32(%rbx)
+       mov     %rax, %r9
+       mov     (up,n,8), %rax
+       mov     %rdx, %r8
+       jmp     L(L1)
+
+L(b0): lea     (up,n,8), up
+       lea     -16(rp,n,8), rp
+       neg     n
+       xor     %r10, %r10
+       mov     %rax, %r8
+       mov     %rdx, %rbx
+       jmp      L(L0)
+
+L(b3): lea     -8(up,n,8), up
+       lea     -24(rp,n,8), rp
+       neg     n
+       mov     %rax, %rbx
+       mov     %rdx, %r10
+       jmp     L(L3)
+
+L(b2): lea     -16(up,n,8), up
+       lea     -32(rp,n,8), rp
+       neg     n
+       xor     %r8, %r8
+       xor     R32(%rbx), R32(%rbx)
+       mov     %rax, %r10
+       mov     24(up,n,8), %rax
+       mov     %rdx, %r9
+       jmp     L(L2)
+
+       ALIGN(16)
+L(top):        mov     %r10, (rp,n,8)
+       add     %rax, %r9
+       mov     (up,n,8), %rax
+       adc     %rdx, %r8
+       mov     $0, %r10d
+L(L1): mul     vl
+       mov     %r9, 8(rp,n,8)
+       add     %rax, %r8
+       adc     %rdx, %rbx
+L(L0): mov     8(up,n,8), %rax
+       mul     vl
+       mov     %r8, 16(rp,n,8)
+       add     %rax, %rbx
+       adc     %rdx, %r10
+L(L3): mov     16(up,n,8), %rax
+       mul     vl
+       mov     %rbx, 24(rp,n,8)
+       mov     $0, %r8d                # zero
+       mov     %r8, %rbx               # zero
+       add     %rax, %r10
+       mov     24(up,n,8), %rax
+       mov     %r8, %r9                # zero
+       adc     %rdx, %r9
+L(L2): mul     vl
+       add     $4, n
+       js       L(top)
+
+       mov     %r10, (rp,n,8)
+       add     %rax, %r9
+       adc     %r8, %rdx
+       mov     %r9, 8(rp,n,8)
+       add     %r8, %rdx
+L(ret):        mov     %rdx, %rax
+
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mul_2.asm b/mpn/x86_64/mul_2.asm

new file mode 100644 (file)

index 0000000..ab87aaf
--- /dev/null
+++ b/mpn/x86_64/mul_2.asm
@@ -0,0 +1,174 @@
+dnl  AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and
+dnl  store the result in a third limb vector.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.275
+C K10:          2.275
+C P4:           ?
+C P6 core2:     4.0
+C P6 corei7:    3.8
+
+C This code is the result of running a code generation and optimization tool
+C suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Work on feed-in and wind-down code.
+C  * Convert "mov $0" to "xor".
+C  * Adjust initial lea to save some bytes.
+C  * Perhaps adjust n from n_param&3 value?
+C  * Replace with 2.25 c/l sequence.
+
+C INPUT PARAMETERS
+define(`rp',    `%rdi')
+define(`up',    `%rsi')
+define(`n_param',`%rdx')
+define(`vp',    `%rcx')
+
+define(`v0', `%r8')
+define(`v1', `%r9')
+define(`w0', `%rbx')
+define(`w1', `%rcx')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+define(`n',  `%r11')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_2)
+       push    %rbx
+       push    %rbp
+
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       mov     (up), %rax
+
+       mov     n_param, n
+       neg     n
+       lea     -8(up,n_param,8), up
+       lea     -8(rp,n_param,8), rp
+
+       and     $3, R32(n_param)
+       jz      L(m2p0)
+       cmp     $2, R32(n_param)
+       jc      L(m2p1)
+       jz      L(m2p2)
+L(m2p3):
+       mul     v0
+       xor     R32(w3), R32(w3)
+       mov     %rax, w1
+       mov     %rdx, w2
+       mov     8(up,n,8), %rax
+       add     $-1, n
+       mul     v1
+       add     %rax, w2
+       jmp     L(m23)
+L(m2p0):
+       mul     v0
+       xor     R32(w2), R32(w2)
+       mov     %rax, w0
+       mov     %rdx, w1
+       jmp     L(m20)
+L(m2p1):
+       mul     v0
+       xor     R32(w3), R32(w3)
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       add     $1, n
+       jmp     L(m2top)
+L(m2p2):
+       mul     v0
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     8(up,n,8), %rax
+       add     $-2, n
+       jmp     L(m22)
+
+
+       ALIGN(32)
+L(m2top):
+       add     %rax, w3
+       adc     %rdx, w0
+       mov     0(up,n,8), %rax
+       adc     $0, R32(w1)
+       mov     $0, R32(w2)
+       mul     v1
+       add     %rax, w0
+       mov     w3, 0(rp,n,8)
+       adc     %rdx, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       add     %rax, w0
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+L(m20):        mov     8(up,n,8), %rax
+       mul     v1
+       add     %rax, w1
+       adc     %rdx, w2
+       mov     16(up,n,8), %rax
+       mov     $0, R32(w3)
+       mul     v0
+       add     %rax, w1
+       mov     16(up,n,8), %rax
+       adc     %rdx, w2
+       adc     $0, R32(w3)
+       mul     v1
+       add     %rax, w2
+       mov     w0, 8(rp,n,8)
+L(m23):        adc     %rdx, w3
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     $0, R32(w0)
+       add     %rax, w2
+       adc     %rdx, w3
+       mov     w1, 16(rp,n,8)
+       mov     24(up,n,8), %rax
+       mov     $0, R32(w1)
+       adc     $0, R32(w0)
+L(m22):        mul     v1
+       add     %rax, w3
+       mov     w2, 24(rp,n,8)
+       adc     %rdx, w0
+       mov     32(up,n,8), %rax
+       mul     v0
+       add     $4, n
+       js      L(m2top)
+
+
+       add     %rax, w3
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       mov     (up), %rax
+       mul     v1
+       mov     w3, (rp)
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     w0, 8(rp)
+       mov     w1, %rax
+
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mul_basecase.asm b/mpn/x86_64/mul_basecase.asm

new file mode 100644 (file)

index 0000000..5320766
--- /dev/null
+++ b/mpn/x86_64/mul_basecase.asm
@@ -0,0 +1,450 @@
+dnl  AMD64 mpn_mul_basecase.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and David Harvey.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.375
+C K10:          2.375
+C P4:           ?
+C P6-15:        4.45
+
+C The inner loops of this code are the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Use fewer registers.  (how??? I can't see it -- david)
+C  * Avoid some "mov $0,r" and instead use "xor r,r".
+C  * Can the top of each L(addmul_outer_n) prologue be folded into the
+C    mul_1/mul_2 prologues, saving a LEA (%rip)? It would slow down the
+C    case where vn = 1 or 2; is it worth it?
+
+C INPUT PARAMETERS
+define(`rp',      `%rdi')
+define(`up',      `%rsi')
+define(`un_param',`%rdx')
+define(`vp',      `%rcx')
+define(`vn',      `%r8')
+
+define(`v0', `%r12')
+define(`v1', `%r9')
+
+define(`w0', `%rbx')
+define(`w1', `%r15')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+
+define(`n',  `%r11')
+define(`outer_addr', `%r14')
+define(`un',  `%r13')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       xor     R32(un), R32(un)
+       mov     (up), %rax
+       mov     (vp), v0
+
+       sub     un_param, un            C rdx used by mul
+       mov     un, n
+       mov     R32(un_param), R32(w0)
+
+       lea     (rp,un_param,8), rp
+       lea     (up,un_param,8), up
+
+       mul     v0
+
+       test    $1, R8(vn)
+       jz      L(mul_2)
+
+C ===========================================================
+C     mul_1 for vp[0] if vn is odd
+
+L(mul_1):
+       and     $3, R32(w0)
+       jz      L(mul_1_prologue_0)
+       cmp     $2, R32(w0)
+       jc      L(mul_1_prologue_1)
+       jz      L(mul_1_prologue_2)
+       jmp     L(mul_1_prologue_3)
+
+L(mul_1_prologue_0):
+       mov     %rax, w2
+       mov     %rdx, w3                C note: already w0 == 0
+       lea     L(addmul_outer_0)(%rip), outer_addr
+       jmp     L(mul_1_entry_0)
+
+L(mul_1_prologue_1):
+       cmp     $-1, un
+       jne     2f
+       mov     %rax, -8(rp)
+       mov     %rdx, (rp)
+       jmp     L(ret)
+2:     add     $1, n
+       lea     L(addmul_outer_1)(%rip), outer_addr
+       mov     %rax, w1
+       mov     %rdx, w2
+       xor     R32(w3), R32(w3)
+       mov     (up,n,8), %rax
+       jmp     L(mul_1_entry_1)
+
+L(mul_1_prologue_2):
+       add     $-2, n
+       lea     L(addmul_outer_2)(%rip), outer_addr
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     24(up,n,8), %rax
+       xor     R32(w2), R32(w2)
+       xor     R32(w3), R32(w3)
+       jmp     L(mul_1_entry_2)
+
+L(mul_1_prologue_3):
+       add     $-1, n
+       lea     L(addmul_outer_3)(%rip), outer_addr
+       mov     %rax, w3
+       mov     %rdx, w0
+       jmp     L(mul_1_entry_3)
+
+
+       C this loop is 10 c/loop = 2.5 c/l on K8, for all up/rp alignments
+
+       ALIGN(16)
+L(mul_1_top):
+       mov     w0, -16(rp,n,8)
+       add     %rax, w1
+       mov     (up,n,8), %rax
+       adc     %rdx, w2
+L(mul_1_entry_1):
+       xor     R32(w0), R32(w0)
+       mul     v0
+       mov     w1, -8(rp,n,8)
+       add     %rax, w2
+       adc     %rdx, w3
+L(mul_1_entry_0):
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     w2, (rp,n,8)
+       add     %rax, w3
+       adc     %rdx, w0
+L(mul_1_entry_3):
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     w3, 8(rp,n,8)
+       xor     R32(w2), R32(w2)        C zero
+       mov     w2, w3                  C zero
+       add     %rax, w0
+       mov     24(up,n,8), %rax
+       mov     w2, w1                  C zero
+       adc     %rdx, w1
+L(mul_1_entry_2):
+       mul     v0
+       add     $4, n
+       js      L(mul_1_top)
+
+       mov     w0, -16(rp)
+       add     %rax, w1
+       mov     w1, -8(rp)
+       adc     %rdx, w2
+       mov     w2, (rp)
+
+       add     $-1, vn                 C vn -= 1
+       jz      L(ret)
+
+       mov     8(vp), v0
+       mov     16(vp), v1
+
+       lea     8(vp), vp               C vp += 1
+       lea     8(rp), rp               C rp += 1
+
+       jmp     *outer_addr
+
+C ===========================================================
+C     mul_2 for vp[0], vp[1] if vn is even
+
+       ALIGN(16)
+L(mul_2):
+       mov     8(vp), v1
+
+       and     $3, R32(w0)
+       jz      L(mul_2_prologue_0)
+       cmp     $2, R32(w0)
+       jz      L(mul_2_prologue_2)
+       jc      L(mul_2_prologue_1)
+
+L(mul_2_prologue_3):
+       lea     L(addmul_outer_3)(%rip), outer_addr
+       add     $2, n
+       mov     %rax, -16(rp,n,8)
+       mov     %rdx, w2
+       xor     R32(w3), R32(w3)
+       xor     R32(w0), R32(w0)
+       mov     -16(up,n,8), %rax
+       jmp     L(mul_2_entry_3)
+
+       ALIGN(16)
+L(mul_2_prologue_0):
+       add     $3, n
+       mov     %rax, w0
+       mov     %rdx, w1
+       xor     R32(w2), R32(w2)
+       mov     -24(up,n,8), %rax
+       lea     L(addmul_outer_0)(%rip), outer_addr
+       jmp     L(mul_2_entry_0)
+
+       ALIGN(16)
+L(mul_2_prologue_1):
+       mov     %rax, w3
+       mov     %rdx, w0
+       xor     R32(w1), R32(w1)
+       lea     L(addmul_outer_1)(%rip), outer_addr
+       jmp     L(mul_2_entry_1)
+
+       ALIGN(16)
+L(mul_2_prologue_2):
+       add     $1, n
+       lea     L(addmul_outer_2)(%rip), outer_addr
+       mov     $0, R32(w0)
+       mov     $0, R32(w1)
+       mov     %rax, w2
+       mov     -8(up,n,8), %rax
+       mov     %rdx, w3
+       jmp     L(mul_2_entry_2)
+
+       C this loop is 18 c/loop = 2.25 c/l on K8, for all up/rp alignments
+
+       ALIGN(16)
+L(mul_2_top):
+       mov     -32(up,n,8), %rax
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     -24(up,n,8), %rax
+       xor     R32(w2), R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     -24(up,n,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+L(mul_2_entry_0):
+       mul     v1
+       add     %rax, w1
+       mov     w0, -24(rp,n,8)
+       adc     %rdx, w2
+       mov     -16(up,n,8), %rax
+       mul     v0
+       mov     $0, R32(w3)
+       add     %rax, w1
+       adc     %rdx, w2
+       mov     -16(up,n,8), %rax
+       adc     $0, R32(w3)
+       mov     $0, R32(w0)
+       mov     w1, -16(rp,n,8)
+L(mul_2_entry_3):
+       mul     v1
+       add     %rax, w2
+       mov     -8(up,n,8), %rax
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mul     v0
+       add     %rax, w2
+       mov     -8(up,n,8), %rax
+       adc     %rdx, w3
+       adc     R32(w1), R32(w0)        C adc $0, w0
+L(mul_2_entry_2):
+       mul     v1
+       add     %rax, w3
+       mov     w2, -8(rp,n,8)
+       adc     %rdx, w0
+       mov     (up,n,8), %rax
+       mul     v0
+       add     %rax, w3
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+L(mul_2_entry_1):
+       add     $4, n
+       mov     w3, -32(rp,n,8)
+       js      L(mul_2_top)
+
+       mov     -32(up,n,8), %rax
+       mul     v1
+       add     %rax, w0
+       mov     w0, (rp)
+       adc     %rdx, w1
+       mov     w1, 8(rp)
+
+       add     $-2, vn                 C vn -= 2
+       jz      L(ret)
+
+       mov     16(vp), v0
+       mov     24(vp), v1
+
+       lea     16(vp), vp              C vp += 2
+       lea     16(rp), rp              C rp += 2
+
+       jmp     *outer_addr
+
+
+C ===========================================================
+C     addmul_2 for remaining vp's
+
+       C in the following prologues, we reuse un to store the
+       C adjusted value of n that is reloaded on each iteration
+
+L(addmul_outer_0):
+       add     $3, un
+       lea     0(%rip), outer_addr
+
+       mov     un, n
+       mov     -24(up,un,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     -24(up,un,8), %rax
+       mov     %rdx, w1
+       xor     R32(w2), R32(w2)
+       jmp     L(addmul_entry_0)
+
+L(addmul_outer_1):
+       mov     un, n
+       mov     (up,un,8), %rax
+       mul     v0
+       mov     %rax, w3
+       mov     (up,un,8), %rax
+       mov     %rdx, w0
+       xor     R32(w1), R32(w1)
+       jmp     L(addmul_entry_1)
+
+L(addmul_outer_2):
+       add     $1, un
+       lea     0(%rip), outer_addr
+
+       mov     un, n
+       mov     -8(up,un,8), %rax
+       mul     v0
+       xor     R32(w0), R32(w0)
+       mov     %rax, w2
+       xor     R32(w1), R32(w1)
+       mov     %rdx, w3
+       mov     -8(up,un,8), %rax
+       jmp     L(addmul_entry_2)
+
+L(addmul_outer_3):
+       add     $2, un
+       lea     0(%rip), outer_addr
+
+       mov     un, n
+       mov     -16(up,un,8), %rax
+       xor     R32(w3), R32(w3)
+       mul     v0
+       mov     %rax, w1
+       mov     -16(up,un,8), %rax
+       mov     %rdx, w2
+       jmp     L(addmul_entry_3)
+
+       C this loop is 19 c/loop = 2.375 c/l on K8, for all up/rp alignments
+
+       ALIGN(16)
+L(addmul_top):
+       add     w3, -32(rp,n,8)
+       adc     %rax, w0
+       mov     -24(up,n,8), %rax
+       adc     %rdx, w1
+       xor     R32(w2), R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     -24(up,n,8), %rax
+       adc     %rdx, w1
+       adc     R32(w2), R32(w2)        C adc $0, w2
+L(addmul_entry_0):
+       mul     v1
+       xor     R32(w3), R32(w3)
+       add     w0, -24(rp,n,8)
+       adc     %rax, w1
+       mov     -16(up,n,8), %rax
+       adc     %rdx, w2
+       mul     v0
+       add     %rax, w1
+       mov     -16(up,n,8), %rax
+       adc     %rdx, w2
+       adc     $0, R32(w3)
+L(addmul_entry_3):
+       mul     v1
+       add     w1, -16(rp,n,8)
+       adc     %rax, w2
+       mov     -8(up,n,8), %rax
+       adc     %rdx, w3
+       mul     v0
+       xor     R32(w0), R32(w0)
+       add     %rax, w2
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mov     -8(up,n,8), %rax
+       adc     R32(w1), R32(w0)        C adc $0, w0
+L(addmul_entry_2):
+       mul     v1
+       add     w2, -8(rp,n,8)
+       adc     %rax, w3
+       adc     %rdx, w0
+       mov     (up,n,8), %rax
+       mul     v0
+       add     %rax, w3
+       mov     (up,n,8), %rax
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+L(addmul_entry_1):
+       mul     v1
+       add     $4, n
+       js      L(addmul_top)
+
+       add     w3, -8(rp)
+       adc     %rax, w0
+       mov     w0, (rp)
+       adc     %rdx, w1
+       mov     w1, 8(rp)
+
+       add     $-2, vn                 C vn -= 2
+       jz      L(ret)
+
+       lea     16(rp), rp              C rp += 2
+       lea     16(vp), vp              C vp += 2
+
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       jmp     *outer_addr
+
+       ALIGN(16)
+L(ret):        pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86_64/nano/gmp-mparam.h b/mpn/x86_64/nano/gmp-mparam.h

new file mode 100644 (file)

index 0000000..e029a3a
--- /dev/null
+++ b/mpn/x86_64/nano/gmp-mparam.h
@@ -0,0 +1,208 @@
+/* VIA Nano gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1600 MHz Nano 2xxx */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         7
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           24
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD                33
+#define MUL_TOOM44_THRESHOLD               292
+#define MUL_TOOM6H_THRESHOLD               746
+#define MUL_TOOM8H_THRESHOLD               866
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     201
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     211
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     219
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 38
+#define SQR_TOOM3_THRESHOLD                 77
+#define SQR_TOOM4_THRESHOLD                620
+#define SQR_TOOM6_THRESHOLD                996
+#define SQR_TOOM8_THRESHOLD               1138
+
+#define MULMOD_BNM1_THRESHOLD               15
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             468  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    468, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     15, 5}, {     31, 6}, {     21, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     19, 6}, {     39, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     40, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     39, 4}, {    767, 5}, {    399, 6}, {    201, 5}, \
+    {    415, 6}, {    208, 7}, {    105, 6}, {    214, 7}, \
+    {    127, 8}, {     71, 9}, {     39, 8}, {     87, 9}, \
+    {     47, 8}, {     97, 9}, {     55,11}, {     15,10}, \
+    {     31, 9}, {     67, 8}, {    135, 9}, {     75,10}, \
+    {     39, 9}, {     87,10}, {     47, 9}, {     99,10}, \
+    {     55,11}, {     31,10}, {     63, 9}, {    127,10}, \
+    {     87,11}, {     47,10}, {    103,12}, {     31,11}, \
+    {     63,10}, {    143,11}, {     79,10}, {    167,11}, \
+    {     95,10}, {    199,11}, {    111,12}, {     63,11}, \
+    {    127, 9}, {    511,11}, {    143,10}, {    287,11}, \
+    {    159, 9}, {    639,11}, {    175,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
+    {    415, 9}, {    831,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,11}, {    271,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639, 9}, {   1279,11}, \
+    {    335,10}, {    671, 9}, {   1343,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    383,10}, {    767, 9}, \
+    {   1535,11}, {    415,10}, {    831, 9}, {   1663,12}, \
+    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    607,10}, \
+    {   1215,12}, {    319,11}, {    671,10}, {   1343,12}, \
+    {    351,11}, {    703,10}, {   1407,13}, {    191,12}, \
+    {    383,11}, {    767,10}, {   1599,12}, {    415,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,14}, \
+    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
+    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
+    {   1151,12}, {    607,11}, {   1215,13}, {    319,12}, \
+    {    639,11}, {   1343,12}, {    703,11}, {   1407,12}, \
+    {    735,11}, {   1471,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    831,11}, {   1663,13}, {    447,12}, \
+    {    895,11}, {   1791,12}, {    959,11}, {   1919,13}, \
+    {    511,12}, {   1023,11}, {   2047,12}, {   1087,11}, \
+    {   2175,13}, {    575,12}, {   1215,13}, {    639,12}, \
+    {   1343,13}, {    703,12}, {   1471,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    895,12}, \
+    {   1791,13}, {    959,12}, {   1919,14}, {    511,13}, \
+    {   1023,12}, {   2047,13}, {   1087,12}, {   2175,13}, \
+    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
+    {   1727,14}, {    895,13}, {   1791,12}, {   3583,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 215
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             432  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    432, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     25, 8}, \
+    {     13, 7}, {     28, 8}, {     15, 7}, {     32, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 6}, {    255, 4}, {   1151, 5}, {    607, 7}, \
+    {    167, 8}, {     99, 9}, {     55,10}, {     31, 9}, \
+    {     75,10}, {     39, 9}, {     87,10}, {     47, 9}, \
+    {    103,10}, {     55, 9}, {    111,11}, {     31,10}, \
+    {     63, 9}, {    131,10}, {     71, 9}, {    143,10}, \
+    {     79,11}, {     47,10}, {    103,12}, {     31,11}, \
+    {     63,10}, {    135, 9}, {    271,10}, {    143,11}, \
+    {     79,10}, {    159, 9}, {    319,10}, {    167,11}, \
+    {     95,10}, {    191, 9}, {    383, 8}, {    767,10}, \
+    {    199,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511, 8}, {   1023,11}, {    143, 9}, \
+    {    575, 8}, {   1151,11}, {    159,10}, {    319, 9}, \
+    {    639, 8}, {   1279,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767, 8}, {   1535, 9}, {    799,11}, \
+    {    207,10}, {    415, 9}, {    831,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511, 9}, {   1023,11}, \
+    {    271,10}, {    543, 9}, {   1087,10}, {    575, 9}, \
+    {   1151,11}, {    303,12}, {    159,10}, {    639, 9}, \
+    {   1279,11}, {    335,10}, {    671, 9}, {   1343,11}, \
+    {    351,10}, {    703, 9}, {   1471,12}, {    191,11}, \
+    {    383,10}, {    767, 9}, {   1535,11}, {    399,10}, \
+    {    799,11}, {    415,10}, {    831, 9}, {   1663,12}, \
+    {    223,11}, {    447,10}, {    895,13}, {    127,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,12}, {    287,11}, {    575,10}, {   1215,11}, \
+    {    639,10}, {   1279,11}, {    671,10}, {   1343,11}, \
+    {    703,10}, {   1407,11}, {    735,10}, {   1471,13}, \
+    {    191,12}, {    383,11}, {    767,10}, {   1535,11}, \
+    {    799,10}, {   1599,12}, {    415,11}, {    831,10}, \
+    {   1663,11}, {    863,12}, {    447,11}, {    895,10}, \
+    {   1791,11}, {    959,14}, {    127,12}, {    511,11}, \
+    {   1023,12}, {    543,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
+    {    831,11}, {   1663,12}, {    863,13}, {    447,12}, \
+    {    895,11}, {   1791,12}, {    959,13}, {    511,12}, \
+    {   1023,11}, {   2047,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
+    {   1471,13}, {    767,12}, {   1599,13}, {    831,12}, \
+    {   1727,13}, {    895,12}, {   1791,13}, {    959,12}, \
+    {   1919,14}, {    511,13}, {   1023,12}, {   2047,13}, \
+    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
+    {   1727,14}, {    895,13}, {   1791,12}, {   3583,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 215
+#define SQR_FFT_THRESHOLD                 3264
+
+#define MULLO_BASECASE_THRESHOLD            11
+#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
+#define MULLO_MUL_N_THRESHOLD             6253
+
+#define DC_DIV_QR_THRESHOLD                 53
+#define DC_DIVAPPR_Q_THRESHOLD             151
+#define DC_BDIV_QR_THRESHOLD                51
+#define DC_BDIV_Q_THRESHOLD                 79
+
+#define INV_MULMOD_BNM1_THRESHOLD           82
+#define INV_NEWTON_THRESHOLD               149
+#define INV_APPR_THRESHOLD                 155
+
+#define BINV_NEWTON_THRESHOLD              228
+#define REDC_1_TO_REDC_2_THRESHOLD          12
+#define REDC_2_TO_REDC_N_THRESHOLD          77
+
+#define MU_DIV_QR_THRESHOLD               1787
+#define MU_DIVAPPR_Q_THRESHOLD            1970
+#define MUPI_DIV_QR_THRESHOLD               74
+#define MU_BDIV_QR_THRESHOLD              1334
+#define MU_BDIV_Q_THRESHOLD               1652
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                      93
+#define GCD_DC_THRESHOLD                   245
+#define GCDEXT_DC_THRESHOLD                456
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD               552
+#define SET_STR_PRECOMPUTE_THRESHOLD      1898
diff --git a/mpn/x86_64/pentium4/aors_n.asm b/mpn/x86_64/pentium4/aors_n.asm

new file mode 100644 (file)

index 0000000..90f5a21
--- /dev/null
+++ b/mpn/x86_64/pentium4/aors_n.asm
@@ -0,0 +1,178 @@
+dnl  x86-64 mpn_add_n/mpn_sub_n optimized for Pentium 4.
+
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.8
+C K10:          2.8
+C P4:           4
+C P6-15:        3.6-5  (fluctuating)
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')
+
+ifdef(`OPERATION_add_n', `
+       define(ADDSUB,        add)
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADDSUB,        sub)
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(func_nc)
+       jmp     L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+       xor     %r8, %r8
+L(ent):        push    %rbx
+       push    %r12
+
+       mov     (vp), %r9
+
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jne     L(n00)          C n = 0, 4, 8, ...
+       mov     R32(%r8), R32(%rbx)
+       mov     (up), %r8
+       mov     8(up), %r10
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       setc    R8(%rax)
+       lea     -16(rp), rp
+       jmp     L(L00)
+
+L(n00):        cmp     $2, R32(%rax)
+       jnc     L(n01)          C n = 1, 5, 9, ...
+       mov     (up), %r11
+       mov     R32(%r8), R32(%rax)
+       xor     R32(%rbx), R32(%rbx)
+       dec     n
+       jnz     L(gt1)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       ADDSUB  %rax, %r11
+       adc     $0, R32(%rbx)
+       mov     %r11, (rp)
+       jmp     L(ret)
+L(gt1):        mov     8(up), %r8
+       ADDSUB  %r9, %r11
+       mov     8(vp), %r9
+       setc    R8(%rbx)
+       lea     -8(rp), rp
+       lea     8(up), up
+       lea     8(vp), vp
+       jmp     L(L01)
+
+L(n01):        jne     L(n10)          C n = 2, 6, 10, ...
+       mov     (up), %r12
+       mov     R32(%r8), R32(%rbx)
+       mov     8(up), %r11
+       ADDSUB  %r9, %r12
+       mov     8(vp), %r9
+       setc    R8(%rax)
+       lea     -32(rp), rp
+       lea     16(up), up
+       lea     16(vp), vp
+       jmp     L(L10)
+
+L(n10):        mov     (up), %r10      C n = 3, 7, 11, ...
+       mov     R32(%r8), R32(%rax)
+       xor     R32(%rbx), R32(%rbx)
+       mov     8(up), %r12
+       ADDSUB  %r9, %r10
+       mov     8(vp), %r9
+       setc    R8(%rbx)
+       lea     -24(rp), rp
+       lea     -8(up), up
+       lea     -8(vp), vp
+       jmp     L(L11)
+
+L(c0): mov     $1, R8(%rbx)
+       jmp     L(rc0)
+L(c1): mov     $1, R8(%rax)
+       jmp     L(rc1)
+L(c2): mov     $1, R8(%rbx)
+       jmp     L(rc2)
+L(c3): mov     $1, R8(%rax)
+       jmp     L(rc3)
+
+       ALIGN(16)
+L(top):        mov     (up), %r8       C not on critical path
+       ADDSUB  %r9, %r11       C not on critical path
+       mov     (vp), %r9       C not on critical path
+       setc    R8(%rbx)        C save carry out
+       mov     %r12, (rp)
+L(L01):        ADDSUB  %rax, %r11      C apply previous carry out
+       jc      L(c0)           C jump if ripple
+L(rc0):        mov     8(up), %r10
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       setc    R8(%rax)
+       mov     %r11, 8(rp)
+L(L00):        ADDSUB  %rbx, %r8
+       jc      L(c1)
+L(rc1):        mov     16(up), %r12
+       ADDSUB  %r9, %r10
+       mov     16(vp), %r9
+       setc    R8(%rbx)
+       mov     %r8, 16(rp)
+L(L11):        ADDSUB  %rax, %r10
+       jc      L(c2)
+L(rc2):        mov     24(up), %r11
+       ADDSUB  %r9, %r12
+       lea     32(up), up
+       mov     24(vp), %r9
+       lea     32(vp), vp
+       setc    R8(%rax)
+       mov     %r10, 24(rp)
+L(L10):        ADDSUB  %rbx, %r12
+       jc      L(c3)
+L(rc3):        lea     32(rp), rp
+       sub     $4, n
+       ja      L(top)
+
+L(end):        ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       mov     %r12, (rp)
+       ADDSUB  %rax, %r11
+       jnc     L(1)
+       mov     $1, R8(%rbx)
+L(1):  mov     %r11, 8(rp)
+
+L(ret):        mov     R32(%rbx), R32(%rax)
+       pop     %r12
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/pentium4/aorslsh1_n.asm b/mpn/x86_64/pentium4/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..0723f3e
--- /dev/null
+++ b/mpn/x86_64/pentium4/aorslsh1_n.asm
@@ -0,0 +1,192 @@
+dnl  AMD64 mpn_addlsh1_n, mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1),
+dnl  optimized for Pentium 4.
+
+dnl  Copyright 2008 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        3.8
+C K10:          4.8
+C P4:           5.8
+C P6-15:        ?
+
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_addlsh1_n', `
+       define(ADDSUB,        add)
+       define(func,          mpn_addlsh1_n)')
+ifdef(`OPERATION_sublsh1_n', `
+       define(ADDSUB,        sub)
+       define(func,          mpn_sublsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       push    %rbx
+       push    %r12
+       push    %rbp
+
+       mov     (vp), %r9
+       shl     %r9
+       mov     4(vp), R32(%rbp)
+
+       xor     R32(%rbx), R32(%rbx)
+
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jne     L(n00)          C n = 0, 4, 8, ...
+
+       mov     (up), %r8
+       mov     8(up), %r10
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       setc    R8(%rax)
+       mov     12(vp), R32(%rbp)
+       lea     -16(rp), rp
+       jmp     L(L00)
+
+L(n00):        cmp     $2, R32(%rax)
+       jnc     L(n01)          C n = 1, 5, 9, ...
+       mov     (up), %r11
+       lea     -8(rp), rp
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       dec     n
+       jz      L(1)            C jump for n = 1
+       mov     8(up), %r8
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       mov     12(vp), R32(%rbp)
+       lea     8(up), up
+       lea     8(vp), vp
+       jmp     L(L01)
+
+L(n01):        jne     L(n10)          C n = 2, 6, 10, ...
+       mov     (up), %r12
+       mov     8(up), %r11
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r12
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       setc    R8(%rax)
+       mov     12(vp), R32(%rbp)
+       lea     16(up), up
+       lea     16(vp), vp
+       jmp     L(L10)
+
+L(n10):        mov     (up), %r10
+       mov     8(up), %r12
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r10
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       setc    R8(%rbx)
+       mov     12(vp), R32(%rbp)
+       lea     -24(rp), rp
+       lea     -8(up), up
+       lea     -8(vp), vp
+       jmp     L(L11)
+
+L(c0): mov     $1, R8(%rbx)
+       jmp     L(rc0)
+L(c1): mov     $1, R8(%rax)
+       jmp     L(rc1)
+L(c2): mov     $1, R8(%rbx)
+       jmp     L(rc2)
+
+       ALIGN(16)
+L(top):        mov     (up), %r8       C not on critical path
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r11       C not on critical path
+       mov     (vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       setc    R8(%rbx)        C save carry out
+       mov     4(vp), R32(%rbp)
+       mov     %r12, (rp)
+       ADDSUB  %rax, %r11      C apply previous carry out
+       jc      L(c0)           C jump if ripple
+L(rc0):
+L(L01):        mov     8(up), %r10
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       setc    R8(%rax)
+       mov     12(vp), R32(%rbp)
+       mov     %r11, 8(rp)
+       ADDSUB  %rbx, %r8
+       jc      L(c1)
+L(rc1):
+L(L00):        mov     16(up), %r12
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r10
+       mov     16(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       setc    R8(%rbx)
+       mov     20(vp), R32(%rbp)
+       mov     %r8, 16(rp)
+       ADDSUB  %rax, %r10
+       jc      L(c2)
+L(rc2):
+L(L11):        mov     24(up), %r11
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r12
+       mov     24(vp), %r9
+       lea     (%rbp,%r9,2), %r9
+       lea     32(up), up
+       lea     32(vp), vp
+       setc    R8(%rax)
+       mov     -4(vp), R32(%rbp)
+       mov     %r10, 24(rp)
+       ADDSUB  %rbx, %r12
+       jc      L(c3)
+L(rc3):        lea     32(rp), rp
+L(L10):        sub     $4, n
+       ja      L(top)
+
+L(end):
+       shr     $31, R32(%rbp)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       mov     %r12, (rp)
+       ADDSUB  %rax, %r11
+       jnc     L(1)
+       mov     $1, R8(%rbx)
+L(1):  mov     %r11, 8(rp)
+       lea     (%rbx,%rbp), R32(%rax)
+       pop     %rbp
+       pop     %r12
+       pop     %rbx
+       emms
+       ret
+L(c3): mov     $1, R8(%rax)
+       jmp     L(rc3)
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86_64/pentium4/gmp-mparam.h b/mpn/x86_64/pentium4/gmp-mparam.h

new file mode 100644 (file)

index 0000000..899a455
--- /dev/null
+++ b/mpn/x86_64/pentium4/gmp-mparam.h
@@ -0,0 +1,218 @@
+/* Pentium 4-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* These routines exists for all x86_64 chips, but they are slower on Pentium4
+   than separate add/sub and shift.  Make sure they are not really used.  */
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+
+/* 3200 MHz Pentium / 2048 Kibyte cache / socket 775 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        16
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           24
+
+#define MUL_TOOM22_THRESHOLD                12
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               121
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               430
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     144
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 20
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                226
+#define SQR_TOOM6_THRESHOLD                303
+#define SQR_TOOM8_THRESHOLD                454
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               11
+
+
+#define MUL_FFT_MODF_THRESHOLD             240  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    240, 5}, {      9, 4}, {     19, 5}, {     11, 6}, \
+    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+    {      9, 7}, {     21, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
+    {     21, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23, 8}, {     47, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     51,11}, {     15,10}, \
+    {     31, 9}, {     63,10}, {     39, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     79, 9}, {    159,11}, {     47,10}, \
+    {     95,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    287,11}, {     79,10}, {    159, 9}, {    319,10}, \
+    {    175,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207, 9}, {    415,11}, {    111,10}, {    223,12}, \
+    {     63,11}, {    127,10}, {    255,11}, {    143,10}, \
+    {    287,11}, {    159,10}, {    319,11}, {    175,12}, \
+    {     95,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    287,10}, {    575,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,12}, {    191,11}, {    383,12}, \
+    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    511,12}, {    287,11}, {    575,12}, {    319,11}, \
+    {    639,12}, {    351,13}, {    191,12}, {    415,11}, \
+    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
+    {   2175,12}, {    575,13}, {    319,12}, {    639,11}, \
+    {   1279,12}, {    703,11}, {   1407,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
+    {    447,14}, {    255,13}, {    511,12}, {   1023,11}, \
+    {   2047,12}, {   1087,11}, {   2175,13}, {    575,12}, \
+    {   1151,11}, {   2303,12}, {   1215,11}, {   2431,10}, \
+    {   4863,13}, {    639,12}, {   1279,11}, {   2559,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    895,15}, \
+    {    255,14}, {    511,13}, {   1023,12}, {   2047,13}, \
+    {   1087,12}, {   2175,13}, {   1215,12}, {   2431,11}, \
+    {   4863,14}, {    639,13}, {   1407,12}, {   2815,13}, \
+    {   1471,14}, {    767,13}, {   1663,14}, {    895,13}, \
+    {   1791,12}, {   3583,13}, {   1919,12}, {   3839,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2303,12}, {   4607,13}, {   2431,12}, {   4863,14}, \
+    {   1279,13}, {   2687,14}, {   1407,13}, {   2815,15}, \
+    {    767,14}, {   1791,13}, {   3583,14}, {   1919,13}, \
+    {   3839,12}, {   7679,16}, {    511,15}, {   1023,14}, \
+    {   2303,13}, {   4607,14}, {   2431,13}, {   4863,15}, \
+    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
+    {   3199,15}, {   1791,14}, {   3839,13}, {   7679,16}, \
+    {   1023,15}, {   2047,14}, {   4351,15}, {   2303,14}, \
+    {   4863,15}, {   2815,14}, {   5887,13}, {  11775,16}, \
+    {   1535,15}, {   3071,14}, {   6655,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 224
+#define MUL_FFT_THRESHOLD                 2752
+
+#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    240, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
+    {     15, 7}, {      8, 6}, {     19, 7}, {     10, 6}, \
+    {     21, 7}, {     13, 8}, {      7, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 9}, {      7, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     25,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     23, 8}, {     47, 9}, \
+    {     27,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     51,11}, {     15,10}, {     31, 9}, {     63, 8}, \
+    {    127,10}, {     39, 9}, {     79,10}, {     47,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
+    {     71, 9}, {    143, 7}, {    575,10}, {     79,11}, \
+    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143, 9}, {    287,11}, {     79, 9}, \
+    {    319,10}, {    191, 9}, {    383,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    255,11}, {    143,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319,11}, \
+    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
+    {    383,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    287,12}, {    159,11}, {    351,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    415,12}, {    223,11}, \
+    {    447,13}, {    127,12}, {    255,11}, {    511,12}, \
+    {    287,11}, {    575,12}, {    319,11}, {    639,12}, \
+    {    351,13}, {    191,12}, {    383,11}, {    767,12}, \
+    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
+    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
+    {   1087,10}, {   2175,12}, {    575,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
+    {    703,13}, {    383,12}, {    767,11}, {   1535,12}, \
+    {    831,13}, {    447,14}, {    255,13}, {    511,12}, \
+    {   1023,11}, {   2047,12}, {   1087,11}, {   2175,13}, \
+    {    575,12}, {   1151,11}, {   2303,12}, {   1215,13}, \
+    {    639,12}, {   1279,11}, {   2559,12}, {   1343,13}, \
+    {    703,14}, {    383,13}, {    767,12}, {   1535,13}, \
+    {    831,12}, {   1663,15}, {    255,14}, {    511,13}, \
+    {   1023,12}, {   2047,13}, {   1087,12}, {   2175,13}, \
+    {   1151,12}, {   2303,13}, {   1215,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1791,12}, \
+    {   3583,13}, {   1919,12}, {   3839,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,12}, \
+    {   4607,13}, {   2431,12}, {   4863,14}, {   1279,13}, \
+    {   2687,14}, {   1407,13}, {   2815,15}, {    767,14}, \
+    {   1535,13}, {   3071,14}, {   1791,13}, {   3583,14}, \
+    {   1919,13}, {   3839,12}, {   7679,16}, {    511,15}, \
+    {   1023,14}, {   2175,13}, {   4351,14}, {   2303,13}, \
+    {   4607,14}, {   2431,13}, {   4863,15}, {   1279,14}, \
+    {   2815,13}, {   5631,14}, {   2943,13}, {   5887,12}, \
+    {  11775,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
+    {   3583,13}, {   7167,14}, {   3839,13}, {   7679,16}, \
+    {   1023,15}, {   2047,14}, {   4351,15}, {   2303,14}, \
+    {   4863,15}, {   2815,14}, {   5887,13}, {  11775,16}, \
+    {   1535,15}, {   3071,14}, {   6655,15}, {   3583,14}, \
+    {   7167,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 222
+#define SQR_FFT_THRESHOLD                 2240
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  27
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 28
+#define DC_DIVAPPR_Q_THRESHOLD              60
+#define DC_BDIV_QR_THRESHOLD                31
+#define DC_BDIV_Q_THRESHOLD                 49
+
+#define INV_MULMOD_BNM1_THRESHOLD           22
+#define INV_NEWTON_THRESHOLD               226
+#define INV_APPR_THRESHOLD                 108
+
+#define BINV_NEWTON_THRESHOLD              262
+#define REDC_1_TO_REDC_2_THRESHOLD          11
+#define REDC_2_TO_REDC_N_THRESHOLD          44
+
+#define MU_DIV_QR_THRESHOLD                979
+#define MU_DIVAPPR_Q_THRESHOLD            1078
+#define MUPI_DIV_QR_THRESHOLD               91
+#define MU_BDIV_QR_THRESHOLD               792
+#define MU_BDIV_Q_THRESHOLD                942
+
+#define MATRIX22_STRASSEN_THRESHOLD         21
+#define HGCD_THRESHOLD                      97
+#define GCD_DC_THRESHOLD                   217
+#define GCDEXT_DC_THRESHOLD                237
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        23
+#define SET_STR_DC_THRESHOLD               572
+#define SET_STR_PRECOMPUTE_THRESHOLD      1588
diff --git a/mpn/x86_64/pentium4/lshift.asm b/mpn/x86_64/pentium4/lshift.asm

new file mode 100644 (file)

index 0000000..7596d9c
--- /dev/null
+++ b/mpn/x86_64/pentium4/lshift.asm
@@ -0,0 +1,148 @@
+dnl  x86-64 mpn_lshift optimized for Pentium 4.
+
+dnl  Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.5
+C K10:          ?
+C P4:           3.29
+C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
+C P6-28 (Atom):        14.3
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+define(`cnt',`%cl')
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_lshift)
+       mov     -8(up,n,8), %rax
+       movd    %ecx, %mm4
+       neg     %ecx                    C put rsh count in cl
+       and     $63, %ecx
+       movd    %ecx, %mm5
+
+       lea     1(n), %r8d
+
+       shr     %cl, %rax               C function return value
+
+       and     $3, %r8d
+       je      L(rol)                  C jump for n = 3, 7, 11, ...
+
+       dec     %r8d
+       jne     L(1)
+C      n = 4, 8, 12, ...
+       movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm0
+       psrlq   %mm5, %mm0
+       por     %mm0, %mm2
+       movq    %mm2, -8(rp,n,8)
+       dec     n
+       jmp     L(rol)
+
+L(1):  dec     %r8d
+       je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
+C      n = 2, 6, 10, 16, ...
+       movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm0
+       psrlq   %mm5, %mm0
+       por     %mm0, %mm2
+       movq    %mm2, -8(rp,n,8)
+       dec     n
+L(1x):
+       cmp     $1, n
+       je      L(ast)
+       movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm3
+       psllq   %mm4, %mm3
+       movq    -16(up,n,8), %mm0
+       movq    -24(up,n,8), %mm1
+       psrlq   %mm5, %mm0
+       por     %mm0, %mm2
+       psrlq   %mm5, %mm1
+       por     %mm1, %mm3
+       movq    %mm2, -8(rp,n,8)
+       movq    %mm3, -16(rp,n,8)
+       sub     $2, n
+
+L(rol):        movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm3
+       psllq   %mm4, %mm3
+
+       sub     $4, n                   C                                     4
+       jb      L(end)                  C                                     2
+       ALIGN(32)
+L(top):
+       C finish stuff from lsh block
+       movq    16(up,n,8), %mm0
+       movq    8(up,n,8), %mm1
+       psrlq   %mm5, %mm0
+       por     %mm0, %mm2
+       psrlq   %mm5, %mm1
+       movq    (up,n,8), %mm0
+       por     %mm1, %mm3
+       movq    -8(up,n,8), %mm1
+       movq    %mm2, 24(rp,n,8)
+       movq    %mm3, 16(rp,n,8)
+       C start two new rsh
+       psrlq   %mm5, %mm0
+       psrlq   %mm5, %mm1
+
+       C finish stuff from rsh block
+       movq    8(up,n,8), %mm2
+       movq    (up,n,8), %mm3
+       psllq   %mm4, %mm2
+       por     %mm2, %mm0
+       psllq   %mm4, %mm3
+       movq    -8(up,n,8), %mm2
+       por     %mm3, %mm1
+       movq    -16(up,n,8), %mm3
+       movq    %mm0, 8(rp,n,8)
+       movq    %mm1, (rp,n,8)
+       C start two new lsh
+       sub     $4, n
+       psllq   %mm4, %mm2
+       psllq   %mm4, %mm3
+
+       jae     L(top)                  C                                     2
+L(end):
+       movq    16(up,n,8), %mm0
+       psrlq   %mm5, %mm0
+       por     %mm0, %mm2
+       movq    8(up,n,8), %mm1
+       psrlq   %mm5, %mm1
+       por     %mm1, %mm3
+       movq    %mm2, 24(rp,n,8)
+       movq    %mm3, 16(rp,n,8)
+
+L(ast):        movq    (up), %mm2
+       psllq   %mm4, %mm2
+       movq    %mm2, (rp)
+       emms
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/pentium4/popcount.asm b/mpn/x86_64/pentium4/popcount.asm

new file mode 100644 (file)

index 0000000..7b52044
--- /dev/null
+++ b/mpn/x86_64/pentium4/popcount.asm
@@ -0,0 +1,24 @@
+dnl  x86-64 mpn_popcount optimized for Pentium 4.
+
+dnl  Copyright 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/mpn/x86_64/pentium4/rshift.asm b/mpn/x86_64/pentium4/rshift.asm

new file mode 100644 (file)

index 0000000..61899c5
--- /dev/null
+++ b/mpn/x86_64/pentium4/rshift.asm
@@ -0,0 +1,151 @@
+dnl  x86-64 mpn_rshift optimized for Pentium 4.
+
+dnl  Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.5
+C K10:          ?
+C P4:           3.29
+C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
+C P6-28 (Atom):        14.3
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+define(`cnt',`%cl')
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_rshift)
+       mov     (up), %rax
+       movd    %ecx, %mm4
+       neg     %ecx                    C put lsh count in cl
+       and     $63, %ecx
+       movd    %ecx, %mm5
+
+       lea     -8(up,n,8), up
+       lea     -8(rp,n,8), rp
+       lea     1(n), %r8d
+       neg     n
+
+       shl     %cl, %rax               C function return value
+
+       and     $3, %r8d
+       je      L(rol)                  C jump for n = 3, 7, 11, ...
+
+       dec     %r8d
+       jne     L(1)
+C      n = 4, 8, 12, ...
+       movq    8(up,n,8), %mm2
+       psrlq   %mm4, %mm2
+       movq    16(up,n,8), %mm0
+       psllq   %mm5, %mm0
+       por     %mm0, %mm2
+       movq    %mm2, 8(rp,n,8)
+       inc     n
+       jmp     L(rol)
+
+L(1):  dec     %r8d
+       je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
+C      n = 2, 6, 10, 16, ...
+       movq    8(up,n,8), %mm2
+       psrlq   %mm4, %mm2
+       movq    16(up,n,8), %mm0
+       psllq   %mm5, %mm0
+       por     %mm0, %mm2
+       movq    %mm2, 8(rp,n,8)
+       inc     n
+L(1x):
+       cmp     $-1, n
+       je      L(ast)
+       movq    8(up,n,8), %mm2
+       psrlq   %mm4, %mm2
+       movq    16(up,n,8), %mm3
+       psrlq   %mm4, %mm3
+       movq    16(up,n,8), %mm0
+       movq    24(up,n,8), %mm1
+       psllq   %mm5, %mm0
+       por     %mm0, %mm2
+       psllq   %mm5, %mm1
+       por     %mm1, %mm3
+       movq    %mm2, 8(rp,n,8)
+       movq    %mm3, 16(rp,n,8)
+       add     $2, n
+
+L(rol):        movq    8(up,n,8), %mm2
+       psrlq   %mm4, %mm2
+       movq    16(up,n,8), %mm3
+       psrlq   %mm4, %mm3
+
+       add     $4, n                   C                                     4
+       jb      L(end)                  C                                     2
+       ALIGN(32)
+L(top):
+       C finish stuff from lsh block
+       movq    -16(up,n,8), %mm0
+       movq    -8(up,n,8), %mm1
+       psllq   %mm5, %mm0
+       por     %mm0, %mm2
+       psllq   %mm5, %mm1
+       movq    (up,n,8), %mm0
+       por     %mm1, %mm3
+       movq    8(up,n,8), %mm1
+       movq    %mm2, -24(rp,n,8)
+       movq    %mm3, -16(rp,n,8)
+       C start two new rsh
+       psllq   %mm5, %mm0
+       psllq   %mm5, %mm1
+
+       C finish stuff from rsh block
+       movq    -8(up,n,8), %mm2
+       movq    (up,n,8), %mm3
+       psrlq   %mm4, %mm2
+       por     %mm2, %mm0
+       psrlq   %mm4, %mm3
+       movq    8(up,n,8), %mm2
+       por     %mm3, %mm1
+       movq    16(up,n,8), %mm3
+       movq    %mm0, -8(rp,n,8)
+       movq    %mm1, (rp,n,8)
+       C start two new lsh
+       add     $4, n
+       psrlq   %mm4, %mm2
+       psrlq   %mm4, %mm3
+
+       jae     L(top)                  C                                     2
+L(end):
+       movq    -16(up,n,8), %mm0
+       psllq   %mm5, %mm0
+       por     %mm0, %mm2
+       movq    -8(up,n,8), %mm1
+       psllq   %mm5, %mm1
+       por     %mm1, %mm3
+       movq    %mm2, -24(rp,n,8)
+       movq    %mm3, -16(rp,n,8)
+
+L(ast):        movq    (up), %mm2
+       psrlq   %mm4, %mm2
+       movq    %mm2, (rp)
+       emms
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/popham.asm b/mpn/x86_64/popham.asm

new file mode 100644 (file)

index 0000000..e2bdb1a
--- /dev/null
+++ b/mpn/x86_64/popham.asm
@@ -0,0 +1,157 @@
+dnl  AMD64 mpn_popcount, mpn_hamdist -- population count and hamming distance.
+
+dnl  Copyright 2004, 2005, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C                   popcount         hamdist
+C                  cycles/limb     cycles/limb
+C K8,K9:                6               7
+C K10:                  6               7
+C P4:                  12              14.3
+C P6-15:                7               8
+
+C TODO
+C  * Tune.  It should be possible to reach 5 c/l for popcount and 6 c/l for
+C    hamdist for K8/K9.
+
+
+ifdef(`OPERATION_popcount',`
+  define(`func',`mpn_popcount')
+  define(`up',         `%rdi')
+  define(`n',          `%rsi')
+  define(`h55555555',  `%r10')
+  define(`h33333333',  `%r11')
+  define(`h0f0f0f0f',  `%rcx')
+  define(`h01010101',  `%rdx')
+  define(`HAM',                `dnl')
+')
+ifdef(`OPERATION_hamdist',`
+  define(`func',`mpn_hamdist')
+  define(`up',         `%rdi')
+  define(`vp',         `%rsi')
+  define(`n',          `%rdx')
+  define(`h55555555',  `%r10')
+  define(`h33333333',  `%r11')
+  define(`h0f0f0f0f',  `%rcx')
+  define(`h01010101',  `%r14')
+  define(`HAM',                `$1')
+')
+
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(func)
+
+       pushq   %r12
+       pushq   %r13
+ HAM(` pushq   %r14            ')
+
+       movq    $0x5555555555555555, h55555555
+       movq    $0x3333333333333333, h33333333
+       movq    $0x0f0f0f0f0f0f0f0f, h0f0f0f0f
+       movq    $0x0101010101010101, h01010101
+
+       leaq    (up,n,8), up
+ HAM(` leaq    (vp,n,8), vp    ')
+       negq    n
+
+       xorl    %eax, %eax
+
+       btq     $0, n
+       jnc     L(oop)
+
+       movq    (up,n,8), %r8
+ HAM(` xorq    (vp,n,8), %r8   ')
+
+       movq    %r8, %r9
+       shrq    %r8
+       andq    h55555555, %r8
+       subq    %r8, %r9
+
+       movq    %r9, %r8
+       shrq    $2, %r9
+       andq    h33333333, %r8
+       andq    h33333333, %r9
+       addq    %r8, %r9                C 16 4-bit fields (0..4)
+
+       movq    %r9, %r8
+       shrq    $4, %r9
+       andq    h0f0f0f0f, %r8
+       andq    h0f0f0f0f, %r9
+       addq    %r8, %r9                C 8 8-bit fields (0..16)
+
+       imulq   h01010101, %r9          C sum the 8 fields in high 8 bits
+       shrq    $56, %r9
+
+       addq    %r9, %rax               C add to total
+       addq    $1, n
+       jz      L(done)
+
+       ALIGN(16)
+L(oop):        movq    (up,n,8), %r8
+       movq    8(up,n,8), %r12
+ HAM(` xorq    (vp,n,8), %r8   ')
+ HAM(` xorq    8(vp,n,8), %r12 ')
+
+       movq    %r8, %r9
+       movq    %r12, %r13
+       shrq    %r8
+       shrq    %r12
+       andq    h55555555, %r8
+       andq    h55555555, %r12
+       subq    %r8, %r9
+       subq    %r12, %r13
+
+       movq    %r9, %r8
+       movq    %r13, %r12
+       shrq    $2, %r9
+       shrq    $2, %r13
+       andq    h33333333, %r8
+       andq    h33333333, %r9
+       andq    h33333333, %r12
+       andq    h33333333, %r13
+       addq    %r8, %r9                C 16 4-bit fields (0..4)
+       addq    %r12, %r13              C 16 4-bit fields (0..4)
+
+       addq    %r13, %r9               C 16 4-bit fields (0..8)
+       movq    %r9, %r8
+       shrq    $4, %r9
+       andq    h0f0f0f0f, %r8
+       andq    h0f0f0f0f, %r9
+       addq    %r8, %r9                C 8 8-bit fields (0..16)
+
+       imulq   h01010101, %r9          C sum the 8 fields in high 8 bits
+       shrq    $56, %r9
+
+       addq    %r9, %rax               C add to total
+       addq    $2, n
+       jnc     L(oop)
+
+L(done):
+ HAM(` popq    %r14            ')
+       popq    %r13
+       popq    %r12
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm

new file mode 100644 (file)

index 0000000..ceaadea
--- /dev/null
+++ b/mpn/x86_64/redc_1.asm
@@ -0,0 +1,335 @@
+dnl  AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
+
+dnl  Copyright 2004, 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C           cycles/limb
+C K8,K9:        2.5
+C K10:          2.5
+C P4:           ?
+C P6-15 (Core2): 5.3
+C P6-28 (Atom):         ?
+
+C TODO
+C  * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
+C    The code for 1, 2, 3, 4 should perhaps be completely register based.
+C  * Perhaps align outer loops.
+C  * The sub_n at the end leaks side-channel data.  How do we fix that?
+C  * Write mpn_add_n_sub_n computing R = A + B - C.  It should run at 2 c/l.
+C  * We could software pipeline the IMUL stuff, by putting it before the
+C    outer loops and before the end of the outer loops.  The last outer
+C    loop iteration would then compute an unneeded product, but it is at
+C    least not a stray read from up[], since it is at up[n].
+C  * Can we combine both the add_n and sub_n into the loops, somehow?
+
+C INPUT PARAMETERS
+define(`rp',     `%rdi')
+define(`up',     `%rsi')
+define(`param_mp',`%rdx')
+define(`n',      `%rcx')
+define(`invm',   `%r8')
+
+define(`mp',     `%r13')
+define(`i',      `%r11')
+define(`nneg',   `%r12')
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_redc_1)
+       push    %rbp
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    n
+       sub     $8, %rsp                C maintain ABI required rsp alignment
+
+       lea     (param_mp,n,8), mp      C mp += n
+       lea     (up,n,8), up            C up += n
+
+       mov     n, nneg
+       neg     nneg
+
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jz      L(b0)
+       cmp     $2, R32(%rax)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): C lea   (mp), mp
+       lea     -16(up), up
+L(o1): mov     nneg, i
+       mov     16(up,nneg,8), %rbp     C up[0]
+       imul    invm, %rbp
+
+       mov     (mp,i,8), %rax
+       xor     %ebx, %ebx
+       mul     %rbp
+       add     $1, i
+       jnz     1f
+       add     %rax, 8(up,i,8)
+       adc     $0, %rdx
+       mov     %rdx, %r14
+       jmp     L(n1)
+
+1:     mov     %rax, %r9
+       mov     (mp,i,8), %rax
+       mov     %rdx, %r14
+       jmp     L(mi1)
+
+       ALIGN(16)
+L(lo1):        add     %r10, (up,i,8)
+       adc     %rax, %r9
+       mov     (mp,i,8), %rax
+       adc     %rdx, %r14
+L(mi1):        xor     %r10d, %r10d
+       mul     %rbp
+       add     %r9, 8(up,i,8)
+       adc     %rax, %r14
+       adc     %rdx, %rbx
+       mov     8(mp,i,8), %rax
+       mul     %rbp
+       add     %r14, 16(up,i,8)
+       adc     %rax, %rbx
+       adc     %rdx, %r10
+       mov     16(mp,i,8), %rax
+       mul     %rbp
+       xor     %r9d, %r9d
+       xor     %r14d, %r14d
+       add     %rbx, 24(up,i,8)
+       adc     %rax, %r10
+       mov     24(mp,i,8), %rax
+       adc     %rdx, %r9
+       xor     %ebx, %ebx
+       mul     %rbp
+       add     $4, i
+       js      L(lo1)
+L(ed1):        add     %r10, (up)
+       adc     %rax, %r9
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       add     %r9, 8(up)
+       adc     $0, %r14
+L(n1): mov     %r14, 16(up,nneg,8)     C up[0]
+       add     $8, up
+       dec     n
+       jnz     L(o1)
+C      lea     (mp), mp
+       lea     16(up), up
+       jmp     L(common)
+
+L(b0): C lea   (mp), mp
+       lea     -16(up), up
+L(o0): mov     nneg, i
+       mov     16(up,nneg,8), %rbp     C up[0]
+       imul    invm, %rbp
+
+       mov     (mp,i,8), %rax
+       xor     %r10d, %r10d
+       mul     %rbp
+       mov     %rax, %r14
+       mov     %rdx, %rbx
+       jmp     L(mi0)
+
+       ALIGN(16)
+L(lo0):        add     %r10, (up,i,8)
+       adc     %rax, %r9
+       mov     (mp,i,8), %rax
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       mul     %rbp
+       add     %r9, 8(up,i,8)
+       adc     %rax, %r14
+       adc     %rdx, %rbx
+L(mi0):        mov     8(mp,i,8), %rax
+       mul     %rbp
+       add     %r14, 16(up,i,8)
+       adc     %rax, %rbx
+       adc     %rdx, %r10
+       mov     16(mp,i,8), %rax
+       mul     %rbp
+       xor     %r9d, %r9d
+       xor     %r14d, %r14d
+       add     %rbx, 24(up,i,8)
+       adc     %rax, %r10
+       mov     24(mp,i,8), %rax
+       adc     %rdx, %r9
+       xor     %ebx, %ebx
+       mul     %rbp
+       add     $4, i
+       js      L(lo0)
+L(ed0):        add     %r10, (up)
+       adc     %rax, %r9
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       add     %r9, 8(up)
+       adc     $0, %r14
+       mov     %r14, 16(up,nneg,8)     C up[0]
+       add     $8, up
+       dec     n
+       jnz     L(o0)
+C      lea     (mp), mp
+       lea     16(up), up
+       jmp     L(common)
+
+
+L(b3): lea     -8(mp), mp
+       lea     -24(up), up
+L(o3): mov     nneg, i
+       mov     24(up,nneg,8), %rbp     C up[0]
+       imul    invm, %rbp
+
+       mov     8(mp,i,8), %rax
+       mul     %rbp
+       mov     %rax, %rbx
+       mov     %rdx, %r10
+       jmp     L(mi3)
+
+       ALIGN(16)
+L(lo3):        add     %r10, (up,i,8)
+       adc     %rax, %r9
+       mov     (mp,i,8), %rax
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       mul     %rbp
+       add     %r9, 8(up,i,8)
+       adc     %rax, %r14
+       adc     %rdx, %rbx
+       mov     8(mp,i,8), %rax
+       mul     %rbp
+       add     %r14, 16(up,i,8)
+       adc     %rax, %rbx
+       adc     %rdx, %r10
+L(mi3):        mov     16(mp,i,8), %rax
+       mul     %rbp
+       xor     %r9d, %r9d
+       xor     %r14d, %r14d
+       add     %rbx, 24(up,i,8)
+       adc     %rax, %r10
+       mov     24(mp,i,8), %rax
+       adc     %rdx, %r9
+       xor     %ebx, %ebx
+       mul     %rbp
+       add     $4, i
+       js      L(lo3)
+L(ed3):        add     %r10, 8(up)
+       adc     %rax, %r9
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       add     %r9, 16(up)
+       adc     $0, %r14
+       mov     %r14, 24(up,nneg,8)     C up[0]
+       add     $8, up
+       dec     n
+       jnz     L(o3)
+       lea     8(mp), mp
+       lea     24(up), up
+       jmp     L(common)
+
+L(b2): lea     -16(mp), mp
+       lea     -32(up), up
+L(o2): mov     nneg, i
+       mov     32(up,nneg,8), %rbp     C up[0]
+       imul    invm, %rbp
+
+       mov     16(mp,i,8), %rax
+       mul     %rbp
+       xor     %r14d, %r14d
+       mov     %rax, %r10
+       mov     24(mp,i,8), %rax
+       mov     %rdx, %r9
+       jmp     L(mi2)
+
+       ALIGN(16)
+L(lo2):        add     %r10, (up,i,8)
+       adc     %rax, %r9
+       mov     (mp,i,8), %rax
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       mul     %rbp
+       add     %r9, 8(up,i,8)
+       adc     %rax, %r14
+       adc     %rdx, %rbx
+       mov     8(mp,i,8), %rax
+       mul     %rbp
+       add     %r14, 16(up,i,8)
+       adc     %rax, %rbx
+       adc     %rdx, %r10
+       mov     16(mp,i,8), %rax
+       mul     %rbp
+       xor     %r9d, %r9d
+       xor     %r14d, %r14d
+       add     %rbx, 24(up,i,8)
+       adc     %rax, %r10
+       mov     24(mp,i,8), %rax
+       adc     %rdx, %r9
+L(mi2):        xor     %ebx, %ebx
+       mul     %rbp
+       add     $4, i
+       js      L(lo2)
+L(ed2):        add     %r10, 16(up)
+       adc     %rax, %r9
+       adc     %rdx, %r14
+       xor     %r10d, %r10d
+       add     %r9, 24(up)
+       adc     $0, %r14
+       mov     %r14, 32(up,nneg,8)     C up[0]
+       add     $8, up
+       dec     n
+       jnz     L(o2)
+       lea     16(mp), mp
+       lea     32(up), up
+
+
+L(common):
+       lea     (mp,nneg,8), mp         C restore entry mp
+
+C   cy = mpn_add_n (rp, up, up - n, n);
+C                  rdi rsi  rdx    rcx
+       lea     (up,nneg,8), up         C up -= n
+       lea     (up,nneg,8), %rdx       C rdx = up - n [up entry value]
+       mov     rp, nneg                C preserve rp over first call
+       mov     8(%rsp), %rcx           C pass entry n
+C      mov     rp, %rdi
+       CALL(   mpn_add_n)
+       test    R32(%rax), R32(%rax)
+       jz      L(ret)
+
+C     mpn_sub_n (rp, rp, mp, n);
+C               rdi rsi rdx rcx
+       mov     nneg, %rdi
+       mov     nneg, %rsi
+       mov     mp, %rdx
+       mov     8(%rsp), %rcx           C pass entry n
+       CALL(   mpn_sub_n)
+
+L(ret):
+       add     $8, %rsp
+       pop     n                       C just increment rsp
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/rsh1aors_n.asm b/mpn/x86_64/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..41e67e3
--- /dev/null
+++ b/mpn/x86_64/rsh1aors_n.asm
@@ -0,0 +1,170 @@
+dnl  AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+
+dnl  Copyright 2003, 2005, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.14   (mpn_add_n + mpn_rshift need 4.125)
+C K10:          2.14   (mpn_add_n + mpn_rshift need 4.125)
+C P4:          12.75
+C P6-15:        3.75
+
+C TODO
+C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
+C  * Try to approach the cache bandwidth 1.5 c/l.  It should be possible.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+define(`n32',`%ecx')
+
+ifdef(`OPERATION_rsh1add_n', `
+       define(ADDSUB,        add)
+       define(ADCSBB,        adc)
+       define(func_n,        mpn_rsh1add_n)
+       define(func_nc,       mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+       define(ADDSUB,        sub)
+       define(ADCSBB,        sbb)
+       define(func_n,        mpn_rsh1sub_n)
+       define(func_nc,       mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ASM_START()
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(func_nc)
+       push    %rbx
+
+       xor     %eax, %eax
+       neg     %r8                     C set C flag from parameter
+       mov     (up), %rbx
+       ADCSBB  (vp), %rbx
+       jmp     L(ent)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(func_n)
+       push    %rbx
+
+       xor     %eax, %eax
+       mov     (up), %rbx
+       ADDSUB  (vp), %rbx
+L(ent):
+       rcr     %rbx                    C rotate, save acy
+       adc     %eax, %eax              C return value
+
+       mov     n32, R32(%r11)
+       and     $3, R32(%r11)
+
+       cmp     $1, R32(%r11)
+       je      L(do)                   C jump if n = 1 5 9 ...
+
+L(n1): cmp     $2, R32(%r11)
+       jne     L(n2)                   C jump unless n = 2 6 10 ...
+       add     %rbx, %rbx              C rotate carry limb, restore acy
+       mov     8(up), %r10
+       ADCSBB  8(vp), %r10
+       lea     8(up), up
+       lea     8(vp), vp
+       lea     8(rp), rp
+       rcr     %r10
+       rcr     %rbx
+       mov     %rbx, -8(rp)
+       jmp     L(cj1)
+
+L(n2): cmp     $3, R32(%r11)
+       jne     L(n3)                   C jump unless n = 3 7 11 ...
+       add     %rbx, %rbx              C rotate carry limb, restore acy
+       mov     8(up), %r9
+       mov     16(up), %r10
+       ADCSBB  8(vp), %r9
+       ADCSBB  16(vp), %r10
+       lea     16(up), up
+       lea     16(vp), vp
+       lea     16(rp), rp
+       rcr     %r10
+       rcr     %r9
+       rcr     %rbx
+       mov     %rbx, -16(rp)
+       jmp     L(cj2)
+
+L(n3): dec     n                       C come here for n = 4 8 12 ...
+       add     %rbx, %rbx              C rotate carry limb, restore acy
+       mov     8(up), %r8
+       mov     16(up), %r9
+       ADCSBB  8(vp), %r8
+       ADCSBB  16(vp), %r9
+       mov     24(up), %r10
+       ADCSBB  24(vp), %r10
+       lea     24(up), up
+       lea     24(vp), vp
+       lea     24(rp), rp
+       rcr     %r10
+       rcr     %r9
+       rcr     %r8
+       rcr     %rbx
+       mov     %rbx, -24(rp)
+       mov     %r8, -16(rp)
+L(cj2):        mov     %r9, -8(rp)
+L(cj1):        mov     %r10, %rbx
+
+L(do):
+       shr     $2, n                   C                               4
+       je      L(end)                  C                               2
+       ALIGN(16)
+L(top):        add     %rbx, %rbx              C rotate carry limb, restore acy
+
+       mov     8(up), %r8
+       mov     16(up), %r9
+       ADCSBB  8(vp), %r8
+       ADCSBB  16(vp), %r9
+       mov     24(up), %r10
+       mov     32(up), %r11
+       ADCSBB  24(vp), %r10
+       ADCSBB  32(vp), %r11
+
+       lea     32(up), up
+       lea     32(vp), vp
+
+       rcr     %r11                    C rotate, save acy
+       rcr     %r10
+       rcr     %r9
+       rcr     %r8
+
+       rcr     %rbx
+       mov     %rbx, (rp)
+       mov     %r8, 8(rp)
+       mov     %r9, 16(rp)
+       mov     %r10, 24(rp)
+       mov     %r11, %rbx
+
+       lea     32(rp), rp
+       dec     n
+       jne     L(top)
+
+L(end):        mov     %rbx, (rp)
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/rshift.asm b/mpn/x86_64/rshift.asm

new file mode 100644 (file)

index 0000000..3b15868
--- /dev/null
+++ b/mpn/x86_64/rshift.asm
@@ -0,0 +1,158 @@
+dnl  AMD64 mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2003, 2005, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.375
+C K10:          2.375
+C P4:           8
+C P6-15 (Core2): 2.11
+C P6-28 (Atom):         5.75
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_rshift)
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     (up), %rax
+       shl     R8(%rcx), %rax          C function return value
+       neg     R32(%rcx)               C put lsh count in cl
+
+       lea     1(n), R32(%r8)
+
+       lea     -8(up,n,8), up
+       lea     -8(rp,n,8), rp
+       neg     n
+
+       and     $3, R32(%r8)
+       je      L(rlx)                  C jump for n = 3, 7, 11, ...
+
+       dec     R32(%r8)
+       jne     L(1)
+C      n = 4, 8, 12, ...
+       mov     8(up,n,8), %r10
+       shr     R8(%rcx), %r10
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     16(up,n,8), %r8
+       shl     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     %r10, 8(rp,n,8)
+       inc     n
+       jmp     L(rll)
+
+L(1):  dec     R32(%r8)
+       je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
+C      n = 2, 6, 10, 16, ...
+       mov     8(up,n,8), %r10
+       shr     R8(%rcx), %r10
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     16(up,n,8), %r8
+       shl     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     %r10, 8(rp,n,8)
+       inc     n
+       neg     R32(%rcx)               C put lsh count in cl
+L(1x):
+       cmp     $-1, n
+       je      L(ast)
+       mov     8(up,n,8), %r10
+       shr     R8(%rcx), %r10
+       mov     16(up,n,8), %r11
+       shr     R8(%rcx), %r11
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     16(up,n,8), %r8
+       mov     24(up,n,8), %r9
+       shl     R8(%rcx), %r8
+       or      %r8, %r10
+       shl     R8(%rcx), %r9
+       or      %r9, %r11
+       mov     %r10, 8(rp,n,8)
+       mov     %r11, 16(rp,n,8)
+       add     $2, n
+
+L(rll):        neg     R32(%rcx)               C put lsh count in cl
+L(rlx):        mov     8(up,n,8), %r10
+       shr     R8(%rcx), %r10
+       mov     16(up,n,8), %r11
+       shr     R8(%rcx), %r11
+
+       add     $4, n                   C                                     4
+       jb      L(end)                  C                                     2
+       ALIGN(16)
+L(top):
+       C finish stuff from lsh block
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -16(up,n,8), %r8
+       mov     -8(up,n,8), %r9
+       shl     R8(%rcx), %r8
+       or      %r8, %r10
+       shl     R8(%rcx), %r9
+       or      %r9, %r11
+       mov     %r10, -24(rp,n,8)
+       mov     %r11, -16(rp,n,8)
+       C start two new rsh
+       mov     (up,n,8), %r8
+       mov     8(up,n,8), %r9
+       shl     R8(%rcx), %r8
+       shl     R8(%rcx), %r9
+
+       C finish stuff from rsh block
+       neg     R32(%rcx)               C put lsh count in cl
+       mov     -8(up,n,8), %r10
+       mov     0(up,n,8), %r11
+       shr     R8(%rcx), %r10
+       or      %r10, %r8
+       shr     R8(%rcx), %r11
+       or      %r11, %r9
+       mov     %r8, -8(rp,n,8)
+       mov     %r9, 0(rp,n,8)
+       C start two new lsh
+       mov     8(up,n,8), %r10
+       mov     16(up,n,8), %r11
+       shr     R8(%rcx), %r10
+       shr     R8(%rcx), %r11
+
+       add     $4, n
+       jae     L(top)                  C                                     2
+L(end):
+       neg     R32(%rcx)               C put rsh count in cl
+       mov     -8(up), %r8
+       shl     R8(%rcx), %r8
+       or      %r8, %r10
+       mov     (up), %r9
+       shl     R8(%rcx), %r9
+       or      %r9, %r11
+       mov     %r10, -16(rp)
+       mov     %r11, -8(rp)
+
+       neg     R32(%rcx)               C put lsh count in cl
+L(ast):        mov     (up), %r10
+       shr     R8(%rcx), %r10
+       mov     %r10, (rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/sqr_basecase.asm b/mpn/x86_64/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..cfab923
--- /dev/null
+++ b/mpn/x86_64/sqr_basecase.asm
@@ -0,0 +1,786 @@
+dnl  AMD64 mpn_sqr_basecase.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C The inner loops of this code are the result of running a code generation and
+C optimization tool suite written by David Harvey and Torbjorn Granlund.
+
+C NOTES
+C   * This code only handles operands up to SQR_TOOM2_THRESHOLD_MAX.  That
+C     means we can safely use 32-bit operations for all sizes, unlike in e.g.,
+C     mpn_addmul_1.
+C   * The jump table could probably be optimized, at least for non-pic.
+C   * The special code for n=1,2,3 was quickly written.  It is probably too
+C     large and unnecessarily slow.
+C   * Consider combining small cases code so that the n=k-1 code jumps into
+C     the middle of the n=k code.
+C   * Avoid saving registers for small cases code.
+C   * Needed variables:
+C    n   r11  input size
+C    i   r8   work left, initially n
+C    j   r9   inner loop count
+C        r15  unused
+C    v0  r13
+C    v1  r14
+C    rp  rdi
+C    up  rsi
+C    w0  rbx
+C    w1  rcx
+C    w2  rbp
+C    w3  r10
+C    tp  r12
+C    lo  rax
+C    hi  rdx
+C        rsp
+
+C INPUT PARAMETERS
+define(`rp',     `%rdi')
+define(`up',     `%rsi')
+define(`n_param', `%rdx')
+
+C We should really trim this, for better spatial locality.  Alternatively,
+C we could grab the upper part of the stack area, leaving the lower part
+C instead of the upper part unused.
+deflit(SQR_TOOM2_THRESHOLD_MAX, 80)
+define(`STACK_ALLOC', eval(8*2*SQR_TOOM2_THRESHOLD_MAX))
+
+define(`n',    `%r11')
+define(`tp',   `%r12')
+define(`i',    `%r8')
+define(`j',    `%r9')
+define(`v0',   `%r13')
+define(`v1',   `%r14')
+define(`w0',   `%rbx')
+define(`w1',   `%rcx')
+define(`w2',   `%rbp')
+define(`w3',   `%r10')
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_sqr_basecase)
+       add     $-48, %rsp
+       mov     %rbx, 40(%rsp)
+       mov     %rbp, 32(%rsp)
+       mov     %r12, 24(%rsp)
+       mov     %r13, 16(%rsp)
+       mov     %r14, 8(%rsp)
+
+       mov     R32(n_param), R32(n)            C free original n register (rdx)
+       mov     R32(n_param), R32(%rcx)
+       and     $3, R32(%rcx)
+       lea     4(%rcx), %rbx
+       cmp     $4, R32(n_param)
+       cmovg   %rbx, %rcx
+       lea     L(jmptab)(%rip), %rax
+       jmp     *(%rax,%rcx,8)
+       JUMPTABSECT
+       ALIGN(8)
+L(jmptab):
+       .quad   L(4)
+       .quad   L(1)
+       .quad   L(2)
+       .quad   L(3)
+       .quad   L(0m4)
+       .quad   L(1m4)
+       .quad   L(2m4)
+       .quad   L(3m4)
+       TEXT
+
+L(1):  mov     (up), %rax
+       mul     %rax
+       mov     %rax, (rp)
+       mov     %rdx, 8(rp)
+       add     $40, %rsp
+       pop     %rbx
+       ret
+
+L(2):  mov     (up), %rax
+       mul     %rax
+       mov     %rax, (rp)
+       mov     %rdx, %r9
+       mov     8(up), %rax
+       mul     %rax
+       mov     %rax, %r10
+       mov     %rdx, %r11
+       mov     8(up), %rax
+       mov     (up), %rbx
+       mul     %rbx
+       add     %rax, %r9
+       adc     %rdx, %r10
+       adc     $0, %r11
+       add     %rax, %r9
+       mov     %r9, 8(rp)
+       adc     %rdx, %r10
+       mov     %r10, 16(rp)
+       adc     $0, %r11
+       mov     %r11, 24(rp)
+       add     $40, %rsp
+       pop     %rbx
+       ret
+
+L(3):  mov     (up), %rax
+       mul     %rax
+       mov     %rax, (rp)
+       mov     %rdx, 8(rp)
+       mov     8(up), %rax
+       mul     %rax
+       mov     %rax, 16(rp)
+       mov     %rdx, 24(rp)
+       mov     16(up), %rax
+       mul     %rax
+       mov     %rax, 32(rp)
+       mov     %rdx, 40(rp)
+
+       mov     (up), %rbx
+       mov     8(up), %rax
+       mul     %rbx
+       mov     %rax, %r8
+       mov     %rdx, %r9
+       mov     16(up), %rax
+       mul     %rbx
+       xor     R32(%r10), R32(%r10)
+       add     %rax, %r9
+       adc     %rdx, %r10
+
+       mov     8(up), %rbx
+       mov     16(up), %rax
+       mul     %rbx
+       xor     R32(%r11), R32(%r11)
+       add     %rax, %r10
+       adc     %rdx, %r11
+       add     %r8, %r8
+       adc     %r9, %r9
+       adc     %r10, %r10
+       adc     %r11, %r11
+       mov     $0, R32(%rbx)
+       adc     %rbx, %rbx
+       add     %r8, 8(rp)
+       adc     %r9, 16(rp)
+       adc     %r10, 24(rp)
+       adc     %r11, 32(rp)
+       adc     %rbx, 40(rp)
+       add     $40, %rsp
+       pop     %rbx
+       ret
+
+L(4):  mov     (up), %rax
+       mul     %rax
+       mov     %rax, (rp)
+       mov     %rdx, 8(rp)
+       mov     8(up), %rax
+       mul     %rax
+       mov     %rax, 16(rp)
+       mov     %rdx, 24(rp)
+       mov     16(up), %rax
+       mul     %rax
+       mov     %rax, 32(rp)
+       mov     %rdx, 40(rp)
+       mov     24(up), %rax
+       mul     %rax
+       mov     %rax, 48(rp)
+       mov     %rdx, 56(rp)
+
+       mov     (up), %rbx
+       mov     8(up), %rax
+       mul     %rbx
+       mov     %rax, %r8
+       mov     %rdx, %r9
+       mov     16(up), %rax
+       mul     %rbx
+       xor     R32(%r10), R32(%r10)
+       add     %rax, %r9
+       adc     %rdx, %r10
+       mov     24(up), %rax
+       mul     %rbx
+       xor     R32(%r11), R32(%r11)
+       add     %rax, %r10
+       adc     %rdx, %r11
+       mov     8(up), %rbx
+       mov     16(up), %rax
+       mul     %rbx
+       xor     R32(%r12), R32(%r12)
+       add     %rax, %r10
+       adc     %rdx, %r11
+       adc     $0, %r12
+       mov     24(up), %rax
+       mul     %rbx
+       add     %rax, %r11
+       adc     %rdx, %r12
+       mov     16(up), %rbx
+       mov     24(up), %rax
+       mul     %rbx
+       xor     R32(%rbp), R32(%rbp)
+       add     %rax, %r12
+       adc     %rdx, %rbp
+
+       add     %r8, %r8
+       adc     %r9, %r9
+       adc     %r10, %r10
+       adc     %r11, %r11
+       adc     %r12, %r12
+       mov     $0, R32(%rbx)
+       adc     %rbp, %rbp
+
+       adc     %rbx, %rbx
+       add     %r8, 8(rp)
+       adc     %r9, 16(rp)
+       adc     %r10, 24(rp)
+       adc     %r11, 32(rp)
+       adc     %r12, 40(rp)
+       adc     %rbp, 48(rp)
+       adc     %rbx, 56(rp)
+       add     $24, %rsp
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+
+
+L(0m4):        add     $-STACK_ALLOC, %rsp
+       lea     -24(%rsp,n,8), tp               C point tp in middle of result operand
+       mov     (up), v0
+       mov     8(up), %rax
+       lea     (up,n,8), up            C point up at end of input operand
+
+       lea     -4(n), i
+C Function mpn_mul_1_m3(tp, up - i, i, up[-i - 1])
+       xor     R32(j), R32(j)
+       sub     n, j
+
+       mul     v0
+       xor     R32(w2), R32(w2)
+       mov     %rax, w0
+       mov     16(up,j,8), %rax
+       mov     %rdx, w3
+       jmp     L(L3)
+
+       ALIGN(16)
+L(mul_1_m3_top):
+       add     %rax, w2
+       mov     w3, (tp,j,8)
+       mov     (up,j,8), %rax
+       adc     %rdx, w1
+       xor     R32(w0), R32(w0)
+       mul     v0
+       xor     R32(w3), R32(w3)
+       mov     w2, 8(tp,j,8)
+       add     %rax, w1
+       adc     %rdx, w0
+       mov     8(up,j,8), %rax
+       mov     w1, 16(tp,j,8)
+       xor     R32(w2), R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     16(up,j,8), %rax
+       adc     %rdx, w3
+L(L3): xor     R32(w1), R32(w1)
+       mul     v0
+       add     %rax, w3
+       mov     24(up,j,8), %rax
+       adc     %rdx, w2
+       mov     w0, 24(tp,j,8)
+       mul     v0
+       add     $4, j
+       js      L(mul_1_m3_top)
+
+       add     %rax, w2
+       mov     w3, (tp)
+       adc     %rdx, w1
+       mov     w2, 8(tp)
+       mov     w1, 16(tp)
+
+       lea     eval(2*8)(tp), tp       C tp += 2
+       lea     -8(up), up
+       jmp     L(dowhile)
+
+
+L(1m4):        add     $-STACK_ALLOC, %rsp
+       lea     (%rsp,n,8), tp          C point tp in middle of result operand
+       mov     (up), v0                C u0
+       mov     8(up), %rax             C u1
+       lea     8(up,n,8), up           C point up at end of input operand
+
+       lea     -3(n), i
+C Function mpn_mul_2s_m0(tp, up - i, i, up - i - 1)
+       lea     -3(n), j
+       neg     j
+
+       mov     %rax, v1                C u1
+       mul     v0                      C u0 * u1
+       mov     %rdx, w1
+       xor     R32(w2), R32(w2)
+       mov     %rax, (%rsp)
+       jmp     L(m0)
+
+       ALIGN(16)
+L(mul_2_m0_top):
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     -24(up,j,8), %rax
+       mov     $0, R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     -24(up,j,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1                      C v1 * u0
+       add     %rax, w1
+       mov     w0, -24(tp,j,8)
+       adc     %rdx, w2
+L(m0): mov     -16(up,j,8), %rax       C u2, u6 ...
+       mul     v0                      C u0 * u2
+       mov     $0, R32(w3)
+       add     %rax, w1
+       adc     %rdx, w2
+       mov     -16(up,j,8), %rax
+       adc     $0, R32(w3)
+       mov     $0, R32(w0)
+       mov     w1, -16(tp,j,8)
+       mul     v1
+       add     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mul     v0
+       add     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       adc     $0, R32(w0)
+       mul     v1
+       add     %rax, w3
+       mov     w2, -8(tp,j,8)
+       adc     %rdx, w0
+L(m2x):        mov     (up,j,8), %rax
+       mul     v0
+       add     %rax, w3
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       add     $4, j
+       mov     -32(up,j,8), %rax
+       mov     w3, -32(tp,j,8)
+       js      L(mul_2_m0_top)
+
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     w0, -8(tp)
+       mov     w1, (tp)
+
+       lea     -16(up), up
+       lea     eval(3*8-24)(tp), tp    C tp += 3
+       jmp     L(dowhile_end)
+
+
+L(2m4):        add     $-STACK_ALLOC, %rsp
+       lea     -24(%rsp,n,8), tp       C point tp in middle of result operand
+       mov     (up), v0
+       mov     8(up), %rax
+       lea     (up,n,8), up            C point up at end of input operand
+
+       lea     -4(n), i
+C Function mpn_mul_1_m1(tp, up - (i - 1), i - 1, up[-i])
+       lea     -2(n), j
+       neg     j
+
+       mul     v0
+       mov     %rax, w2
+       mov     (up,j,8), %rax
+       mov     %rdx, w1
+       jmp     L(L1)
+
+       ALIGN(16)
+L(mul_1_m1_top):
+       add     %rax, w2
+       mov     w3, (tp,j,8)
+       mov     (up,j,8), %rax
+       adc     %rdx, w1
+L(L1): xor     R32(w0), R32(w0)
+       mul     v0
+       xor     R32(w3), R32(w3)
+       mov     w2, 8(tp,j,8)
+       add     %rax, w1
+       adc     %rdx, w0
+       mov     8(up,j,8), %rax
+       mov     w1, 16(tp,j,8)
+       xor     R32(w2), R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     16(up,j,8), %rax
+       adc     %rdx, w3
+       xor     R32(w1), R32(w1)
+       mul     v0
+       add     %rax, w3
+       mov     24(up,j,8), %rax
+       adc     %rdx, w2
+       mov     w0, 24(tp,j,8)
+       mul     v0
+       add     $4, j
+       js      L(mul_1_m1_top)
+
+       add     %rax, w2
+       mov     w3, (tp)
+       adc     %rdx, w1
+       mov     w2, 8(tp)
+       mov     w1, 16(tp)
+
+       lea     eval(2*8)(tp), tp       C tp += 2
+       lea     -8(up), up
+       jmp     L(dowhile_mid)
+
+
+L(3m4):        add     $-STACK_ALLOC, %rsp
+       lea     (%rsp,n,8), tp          C point tp in middle of result operand
+       mov     (up), v0                C u0
+       mov     8(up), %rax             C u1
+       lea     8(up,n,8), up           C point up at end of input operand
+
+       lea     -5(n), i
+C Function mpn_mul_2s_m2(tp, up - i + 1, i - 1, up - i)
+       lea     -1(n), j
+       neg     j
+
+       mov     %rax, v1                C u1
+       mul     v0                      C u0 * u1
+       mov     %rdx, w3
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       mov     %rax, (%rsp)
+       jmp     L(m2)
+
+       ALIGN(16)
+L(mul_2_m2_top):
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     -24(up,j,8), %rax
+       mov     $0, R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     -24(up,j,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1                      C v1 * u0
+       add     %rax, w1
+       mov     w0, -24(tp,j,8)
+       adc     %rdx, w2
+       mov     -16(up,j,8), %rax
+       mul     v0
+       mov     $0, R32(w3)
+       add     %rax, w1
+       adc     %rdx, w2
+       mov     -16(up,j,8), %rax
+       adc     $0, R32(w3)
+       mov     $0, R32(w0)
+       mov     w1, -16(tp,j,8)
+       mul     v1
+       add     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mul     v0
+       add     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       adc     $0, R32(w0)
+       mul     v1
+       add     %rax, w3
+       mov     w2, -8(tp,j,8)
+       adc     %rdx, w0
+L(m2): mov     (up,j,8), %rax
+       mul     v0
+       add     %rax, w3
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       add     $4, j
+       mov     -32(up,j,8), %rax
+       mov     w3, -32(tp,j,8)
+       js      L(mul_2_m2_top)
+
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     w0, -8(tp)
+       mov     w1, (tp)
+
+       lea     -16(up), up
+       jmp     L(dowhile_mid)
+
+L(dowhile):
+C Function mpn_addmul_2s_m2(tp, up - (i - 1), i - 1, up - i)
+       lea     4(i), j
+       neg     j
+
+       mov     16(up,j,8), v0
+       mov     24(up,j,8), v1
+       mov     24(up,j,8), %rax
+       mul     v0
+       xor     R32(w3), R32(w3)
+       add     %rax, 24(tp,j,8)
+       adc     %rdx, w3
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       jmp     L(am2)
+
+       ALIGN(16)
+L(addmul_2_m2_top):
+       add     w3, (tp,j,8)
+       adc     %rax, w0
+       mov     8(up,j,8), %rax
+       adc     %rdx, w1
+       mov     $0, R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     8(up,j,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1                              C v1 * u0
+       add     w0, 8(tp,j,8)
+       adc     %rax, w1
+       adc     %rdx, w2
+       mov     16(up,j,8), %rax
+       mov     $0, R32(w3)
+       mul     v0                              C v0 * u1
+       add     %rax, w1
+       mov     16(up,j,8), %rax
+       adc     %rdx, w2
+       adc     $0, R32(w3)
+       mul     v1                              C v1 * u1
+       add     w1, 16(tp,j,8)
+       adc     %rax, w2
+       mov     24(up,j,8), %rax
+       adc     %rdx, w3
+       mul     v0
+       mov     $0, R32(w0)
+       add     %rax, w2
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mov     24(up,j,8), %rax
+       adc     $0, R32(w0)
+       mul     v1
+       add     w2, 24(tp,j,8)
+       adc     %rax, w3
+       adc     %rdx, w0
+L(am2):        mov     32(up,j,8), %rax
+       mul     v0
+       add     %rax, w3
+       mov     32(up,j,8), %rax
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       mul     v1
+       add     $4, j
+       js      L(addmul_2_m2_top)
+
+       add     w3, (tp)
+       adc     %rax, w0
+       adc     %rdx, w1
+       mov     w0, 8(tp)
+       mov     w1, 16(tp)
+
+       lea     eval(2*8)(tp), tp       C tp += 2
+
+       add     $-2, R32(i)             C i -= 2
+
+L(dowhile_mid):
+C Function mpn_addmul_2s_m0(tp, up - (i - 1), i - 1, up - i)
+       lea     2(i), j
+       neg     j
+
+       mov     (up,j,8), v0
+       mov     8(up,j,8), v1
+       mov     8(up,j,8), %rax
+       mul     v0
+       xor     R32(w1), R32(w1)
+       add     %rax, 8(tp,j,8)
+       adc     %rdx, w1
+       xor     R32(w2), R32(w2)
+       jmp     L(20)
+
+       ALIGN(16)
+L(addmul_2_m0_top):
+       add     w3, (tp,j,8)
+       adc     %rax, w0
+       mov     8(up,j,8), %rax
+       adc     %rdx, w1
+       mov     $0, R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     8(up,j,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1                              C v1 * u0
+       add     w0, 8(tp,j,8)
+       adc     %rax, w1
+       adc     %rdx, w2
+L(20): mov     16(up,j,8), %rax
+       mov     $0, R32(w3)
+       mul     v0                              C v0 * u1
+       add     %rax, w1
+       mov     16(up,j,8), %rax
+       adc     %rdx, w2
+       adc     $0, R32(w3)
+       mul     v1                              C v1 * u1
+       add     w1, 16(tp,j,8)
+       adc     %rax, w2
+       mov     24(up,j,8), %rax
+       adc     %rdx, w3
+       mul     v0
+       mov     $0, R32(w0)
+       add     %rax, w2
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mov     24(up,j,8), %rax
+       adc     $0, R32(w0)
+       mul     v1
+       add     w2, 24(tp,j,8)
+       adc     %rax, w3
+       adc     %rdx, w0
+       mov     32(up,j,8), %rax
+       mul     v0
+       add     %rax, w3
+       mov     32(up,j,8), %rax
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       mul     v1
+       add     $4, j
+       js      L(addmul_2_m0_top)
+
+       add     w3, (tp)
+       adc     %rax, w0
+       adc     %rdx, w1
+       mov     w0, 8(tp)
+       mov     w1, 16(tp)
+
+       lea     eval(2*8)(tp), tp       C tp += 2
+L(dowhile_end):
+
+       add     $-2, R32(i)             C i -= 2
+       jne     L(dowhile)
+
+C Function mpn_addmul_2s_2
+       mov     -16(up), v0
+       mov     -8(up), v1
+       mov     -8(up), %rax
+       mul     v0
+       xor     R32(w3), R32(w3)
+       add     %rax, -8(tp)
+       adc     %rdx, w3
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       mov     (up), %rax
+       mul     v0
+       add     %rax, w3
+       mov     (up), %rax
+       adc     %rdx, w0
+       mul     v1
+       add     w3, (tp)
+       adc     %rax, w0
+       adc     %rdx, w1
+       mov     w0, 8(tp)
+       mov     w1, 16(tp)
+
+C Function mpn_sqr_diag_addlsh1
+       lea     -4(n,n), j
+
+       mov     (%rsp), %r11
+
+       lea     (rp,j,8), rp
+       lea     -8(up), up
+       lea     8(%rsp,j,8), tp
+       neg     j
+       mov     (up,j,4), %rax
+       mul     %rax
+       test    $2, R8(j)
+       jnz     L(odd)
+
+L(evn):        add     %r11, %r11
+       sbb     R32(%rbx), R32(%rbx)            C save CF
+       add     %rdx, %r11
+       mov     %rax, (rp,j,8)
+       jmp     L(d0)
+
+L(odd):        add     %r11, %r11
+       sbb     R32(%rbp), R32(%rbp)            C save CF
+       add     %rdx, %r11
+       mov     %rax, (rp,j,8)
+       lea     -2(j), j
+       jmp     L(d1)
+
+       ALIGN(16)
+L(top):        mov     (up,j,4), %rax
+       mul     %rax
+       add     R32(%rbp), R32(%rbp)            C restore carry
+       adc     %rax, %r10
+       adc     %rdx, %r11
+       mov     %r10, (rp,j,8)
+L(d0): mov     %r11, 8(rp,j,8)
+       mov     (tp,j,8), %r10
+       adc     %r10, %r10
+       mov     8(tp,j,8), %r11
+       adc     %r11, %r11
+       nop
+       sbb     R32(%rbp), R32(%rbp)            C save CF
+       mov     8(up,j,4), %rax
+       mul     %rax
+       add     R32(%rbx), R32(%rbx)            C restore carry
+       adc     %rax, %r10
+       adc     %rdx, %r11
+       mov     %r10, 16(rp,j,8)
+L(d1): mov     %r11, 24(rp,j,8)
+       mov     16(tp,j,8), %r10
+       adc     %r10, %r10
+       mov     24(tp,j,8), %r11
+       adc     %r11, %r11
+       sbb     R32(%rbx), R32(%rbx)            C save CF
+       add     $4, j
+       js      L(top)
+
+       mov     (up), %rax
+       mul     %rax
+       add     R32(%rbp), R32(%rbp)            C restore carry
+       adc     %rax, %r10
+       adc     %rdx, %r11
+       mov     %r10, (rp)
+       mov     %r11, 8(rp)
+       mov     (tp), %r10
+       adc     %r10, %r10
+       sbb     R32(%rbp), R32(%rbp)            C save CF
+       neg     R32(%rbp)
+       mov     8(up), %rax
+       mul     %rax
+       add     R32(%rbx), R32(%rbx)            C restore carry
+       adc     %rax, %r10
+       adc     %rbp, %rdx
+       mov     %r10, 16(rp)
+       mov     %rdx, 24(rp)
+
+       add     $eval(8+STACK_ALLOC), %rsp
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/sublsh1_n.asm b/mpn/x86_64/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..6f67fae
--- /dev/null
+++ b/mpn/x86_64/sublsh1_n.asm
@@ -0,0 +1,144 @@
+dnl  AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C K8,K9:        2.2
+C K10:          2.2
+C P4:          12.75
+C P6 core2:     3.45
+C P6 corei7:    3.45
+C P6 atom:      ?
+
+
+C Sometimes speed degenerates, supposedly related to that some operand
+C alignments cause cache conflicts.
+
+C The speed is limited by decoding/issue bandwidth.  There are 26 instructions
+C in the loop, which corresponds to 26/3/4 = 2.167 c/l.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_sublsh1_n)
+       push    %rbx
+       push    %rbp
+
+       mov     (vp), %r8
+       mov     R32(n), R32(%rax)
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       neg     n
+       xor     R32(%rbp), R32(%rbp)
+       and     $3, R32(%rax)
+       je      L(b00)
+       cmp     $2, R32(%rax)
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        add     %r8, %r8
+       mov     8(vp,n,8), %r9
+       adc     %r9, %r9
+       mov     16(vp,n,8), %r10
+       adc     %r10, %r10
+       sbb     R32(%rax), R32(%rax)    C save scy
+       mov     (up,n,8), %rbp
+       mov     8(up,n,8), %rbx
+       sub     %r8, %rbp
+       sbb     %r9, %rbx
+       mov     %rbp, (rp,n,8)
+       mov     %rbx, 8(rp,n,8)
+       mov     16(up,n,8), %rbp
+       sbb     %r10, %rbp
+       mov     %rbp, 16(rp,n,8)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       add     $3, n
+       jmp     L(ent)
+
+L(b10):        add     %r8, %r8
+       mov     8(vp,n,8), %r9
+       adc     %r9, %r9
+       sbb     R32(%rax), R32(%rax)    C save scy
+       mov     (up,n,8), %rbp
+       mov     8(up,n,8), %rbx
+       sub     %r8, %rbp
+       sbb     %r9, %rbx
+       mov     %rbp, (rp,n,8)
+       mov     %rbx, 8(rp,n,8)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       add     $2, n
+       jmp     L(ent)
+
+L(b01):        add     %r8, %r8
+       sbb     R32(%rax), R32(%rax)    C save scy
+       mov     (up,n,8), %rbp
+       sub     %r8, %rbp
+       mov     %rbp, (rp,n,8)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       inc     n
+L(ent):        jns     L(end)
+
+       ALIGN(16)
+L(top):        add     R32(%rax), R32(%rax)    C restore scy
+
+       mov     (vp,n,8), %r8
+L(b00):        adc     %r8, %r8
+       mov     8(vp,n,8), %r9
+       adc     %r9, %r9
+       mov     16(vp,n,8), %r10
+       adc     %r10, %r10
+       mov     24(vp,n,8), %r11
+       adc     %r11, %r11
+
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+
+       mov     (up,n,8), %rbp
+       mov     8(up,n,8), %rbx
+       sbb     %r8, %rbp
+       sbb     %r9, %rbx
+       mov     %rbp, (rp,n,8)
+       mov     %rbx, 8(rp,n,8)
+       mov     16(up,n,8), %rbp
+       mov     24(up,n,8), %rbx
+       sbb     %r10, %rbp
+       sbb     %r11, %rbx
+       mov     %rbp, 16(rp,n,8)
+       mov     %rbx, 24(rp,n,8)
+
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       add     $4, n
+       js      L(top)
+
+L(end):        add     R32(%rbp), R32(%rax)
+       neg     R32(%rax)
+
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/x86_64-defs.m4 b/mpn/x86_64/x86_64-defs.m4

new file mode 100644 (file)

index 0000000..6942a78
--- /dev/null
+++ b/mpn/x86_64/x86_64-defs.m4
@@ -0,0 +1,172 @@
+divert(-1)
+
+dnl  m4 macros for amd64 assembler.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free
+dnl  Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  Usage: CPUVEC_FUNCS_LIST
+dnl
+dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
+dnl  order they appear in that structure.
+
+define(CPUVEC_FUNCS_LIST,
+``add_n',
+`addmul_1',
+`copyd',
+`copyi',
+`divexact_1',
+`divexact_by3c',
+`divrem_1',
+`gcd_1',
+`lshift',
+`mod_1',
+`mod_34lsub1',
+`modexact_1c_odd',
+`mul_1',
+`mul_basecase',
+`preinv_divrem_1',
+`preinv_mod_1',
+`rshift',
+`sqr_basecase',
+`sub_n',
+`submul_1'')
+
+
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl  In the amd64 code we use explicit TEXT and ALIGN() calls in the code,
+dnl  since different alignments are wanted in various circumstances.  So for
+dnl  instance,
+dnl
+dnl                  TEXT
+dnl                  ALIGN(16)
+dnl          PROLOGUE(mpn_add_n)
+dnl                  ...
+dnl          EPILOGUE()
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+`      GLOBL   $1
+       TYPE($1,`function')
+$1:
+')
+
+
+dnl  Usage: ASSERT([cond][,instructions])
+dnl
+dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl  flags condition to then be satisfied.  For example,
+dnl
+dnl         ASSERT(ne, `cmpq %rax, %rbx')
+dnl
+dnl  The instructions can be omitted to just assert a flags condition with
+dnl  no extra calculation.  For example,
+dnl
+dnl         ASSERT(nc)
+dnl
+dnl  When `instructions' is not empty, a pushfq/popfq is added for
+dnl  convenience to preserve the flags, but the instructions themselves must
+dnl  preserve any registers that matter.
+dnl
+dnl  The condition can be omitted to just output the given instructions when
+dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
+dnl  For example,
+dnl
+dnl         ASSERT(, `movq %rax, VAR_KEEPVAL')
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+`ifelse(`$1',,
+`      $2',
+`ifelse(`$2',,,
+`      pushfq')
+       $2
+       j`$1'   L(ASSERT_ok`'ASSERT_counter)
+       ud2     C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+ifelse(`$2',,,`        popfq')
+define(`ASSERT_counter',incr(ASSERT_counter))')')')
+
+define(ASSERT_counter,1)
+
+define(`LEA',`
+       mov     $1@GOTPCREL(%rip), $2
+')
+
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`      RODATA
+       ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+`      SIZE(`$1',.-`$1')')
+
+
+define(`R32',
+       `ifelse($1,`%rax',`%eax',
+               $1,`%rbx',`%ebx',
+               $1,`%rcx',`%ecx',
+               $1,`%rdx',`%edx',
+               $1,`%rsi',`%esi',
+               $1,`%rdi',`%edi',
+               $1,`%rbp',`%ebp',
+               $1,`%r8',`%r8d',
+               $1,`%r9',`%r9d',
+               $1,`%r10',`%r10d',
+               $1,`%r11',`%r11d',
+               $1,`%r12',`%r12d',
+               $1,`%r13',`%r13d',
+               $1,`%r14',`%r14d',
+               $1,`%r15',`%r15d')')
+define(`R8',
+       `ifelse($1,`%rax',`%al',
+               $1,`%rbx',`%bl',
+               $1,`%rcx',`%cl',
+               $1,`%rdx',`%dl',
+               $1,`%rsi',`%sil',
+               $1,`%rdi',`%dil',
+               $1,`%rbp',`%bpl',
+               $1,`%r8',`%r8b',
+               $1,`%r9',`%r9b',
+               $1,`%r10',`%r10b',
+               $1,`%r11',`%r11b',
+               $1,`%r12',`%r12b',
+               $1,`%r13',`%r13b',
+               $1,`%r14',`%r14b',
+               $1,`%r15',`%r15b')')
+
+
+dnl  Usage: CALL(funcname)
+dnl
+
+ifdef(`PIC',
+  `define(`CALL',`call GSYM_PREFIX`'$1@PLT')',
+  `define(`CALL',`call GSYM_PREFIX`'$1')')
+
+
+define(`JUMPTABSECT', `.section        .data.rel.ro.local,"aw",@progbits')
+
+divert`'dnl
diff --git a/mpn/z8000/README b/mpn/z8000/README

new file mode 100644 (file)

index 0000000..e1cf22d
--- /dev/null
+++ b/mpn/z8000/README
@@ -0,0 +1,45 @@
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                      Z8000 MPN SUBROUTINES
+
+
+This directory contains mpn functions for the Zilog Z8000.
+
+
+STATUS
+
+This code is old and has not been used for a long time.
+
+mpn/z8000 uses a 16-bit limb, it's possible this doesn't really work, on
+account of various bits of C code assuming limb>=long and of course long is
+invariably at least 32 bits.
+
+mpn/z8000x uses a 32-bit limb, this could perhaps be an ABI choice.
+Currently it's reached only by an MPN_PATH override.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/mpn/z8000/add_n.s b/mpn/z8000/add_n.s

new file mode 100644 (file)

index 0000000..89fbb1a
--- /dev/null
+++ b/mpn/z8000/add_n.s
@@ -0,0 +1,51 @@
+! Z8000 __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr      r7
+! s1_ptr       r6
+! s2_ptr       r5
+! size         r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+       unseg
+       .text
+       even
+       global ___gmpn_add_n
+___gmpn_add_n:
+       pop     r0,@r6
+       pop     r1,@r5
+       add     r0,r1
+       ld      @r7,r0
+       dec     r4
+       jr      eq,Lend
+Loop:  pop     r0,@r6
+       pop     r1,@r5
+       adc     r0,r1
+       inc     r7,#2
+       ld      @r7,r0
+       dec     r4
+       jr      ne,Loop
+Lend:  ld      r2,r4           ! use 0 already in r4
+       adc     r2,r2
+       ret     t
diff --git a/mpn/z8000/gmp-mparam.h b/mpn/z8000/gmp-mparam.h

new file mode 100644 (file)

index 0000000..1b25c9b
--- /dev/null
+++ b/mpn/z8000/gmp-mparam.h
@@ -0,0 +1,21 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 16
+#define BYTES_PER_MP_LIMB 2
diff --git a/mpn/z8000/mul_1.s b/mpn/z8000/mul_1.s

new file mode 100644 (file)

index 0000000..fa92bc3
--- /dev/null
+++ b/mpn/z8000/mul_1.s
@@ -0,0 +1,66 @@
+! Z8000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr      r7
+! s1_ptr       r6
+! size         r5
+! s2_limb      r4
+
+       unseg
+       .text
+       even
+       global ___gmpn_mul_1
+___gmpn_mul_1:
+       sub     r2,r2           ! zero carry limb
+       and     r4,r4
+       jr      mi,Lneg
+
+Lpos:  pop     r1,@r6
+       ld      r9,r1
+       mult    rr8,r4
+       and     r1,r1           ! shift msb of loaded limb into cy
+       jr      mi,Lp           ! branch if loaded limb's msb is set
+       add     r8,r4           ! hi_limb += sign_comp2
+Lp:    add     r9,r2           ! lo_limb += cy_limb
+       xor     r2,r2
+       adc     r2,r8
+       ld      @r7,r9
+       inc     r7,#2
+       dec     r5
+       jr      ne,Lpos
+       ret t
+
+Lneg:  pop     r1,@r6
+       ld      r9,r1
+       mult    rr8,r4
+       add     r8,r1           ! hi_limb += sign_comp1
+       and     r1,r1
+       jr      mi,Ln
+       add     r8,r4           ! hi_limb += sign_comp2
+Ln:    add     r9,r2           ! lo_limb += cy_limb
+       xor     r2,r2
+       adc     r2,r8
+       ld      @r7,r9
+       inc     r7,#2
+       dec     r5
+       jr      ne,Lneg
+       ret t
diff --git a/mpn/z8000/sub_n.s b/mpn/z8000/sub_n.s

new file mode 100644 (file)

index 0000000..1dbd837
--- /dev/null
+++ b/mpn/z8000/sub_n.s
@@ -0,0 +1,52 @@
+! Z8000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr      r7
+! s1_ptr       r6
+! s2_ptr       r5
+! size         r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+       unseg
+       .text
+       even
+       global ___gmpn_sub_n
+___gmpn_sub_n:
+       pop     r0,@r6
+       pop     r1,@r5
+       sub     r0,r1
+       ld      @r7,r0
+       dec     r4
+       jr      eq,Lend
+Loop:  pop     r0,@r6
+       pop     r1,@r5
+       sbc     r0,r1
+       inc     r7,#2
+       ld      @r7,r0
+       dec     r4
+       jr      ne,Loop
+Lend:  ld      r2,r4           ! use 0 already in r4
+       adc     r2,r2
+       ret     t
diff --git a/mpn/z8000x/add_n.s b/mpn/z8000x/add_n.s

new file mode 100644 (file)

index 0000000..26b47e2
--- /dev/null
+++ b/mpn/z8000x/add_n.s
@@ -0,0 +1,54 @@
+! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal,
+! non-zero length.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr      r7
+! s1_ptr       r6
+! s2_ptr       r5
+! size         r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+       segm
+       .text
+       even
+       global ___gmpn_add_n
+___gmpn_add_n:
+       popl    rr0,@r6
+       popl    rr8,@r5
+       addl    rr0,rr8
+       ldl     @r7,rr0
+       dec     r4
+       jr      eq,Lend
+Loop:  popl    rr0,@r6
+       popl    rr8,@r5
+       adc     r1,r9
+       adc     r0,r8
+       inc     r7,#4
+       ldl     @r7,rr0
+       dec     r4
+       jr      ne,Loop
+Lend:  ld      r2,r4           ! use 0 already in r4
+       ld      r3,r4
+       adc     r2,r2
+       ret     t
diff --git a/mpn/z8000x/sub_n.s b/mpn/z8000x/sub_n.s

new file mode 100644 (file)

index 0000000..837ecef
--- /dev/null
+++ b/mpn/z8000x/sub_n.s
@@ -0,0 +1,54 @@
+! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the
+! same length > 0 and store difference in a third limb vector.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr      r7
+! s1_ptr       r6
+! s2_ptr       r5
+! size         r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+       segm
+       .text
+       even
+       global ___gmpn_sub_n
+___gmpn_sub_n:
+       popl    rr0,@r6
+       popl    rr8,@r5
+       subl    rr0,rr8
+       ldl     @r7,rr0
+       dec     r4
+       jr      eq,Lend
+Loop:  popl    rr0,@r6
+       popl    rr8,@r5
+       sbc     r1,r9
+       sbc     r0,r8
+       inc     r7,#4
+       ldl     @r7,rr0
+       dec     r4
+       jr      ne,Loop
+Lend:  ld      r2,r4           ! use 0 already in r4
+       ld      r3,r4
+       adc     r2,r2
+       ret     t
diff --git a/mpq/Makefile.am b/mpq/Makefile.am

new file mode 100644 (file)

index 0000000..f5db4e5
--- /dev/null
+++ b/mpq/Makefile.am
@@ -0,0 +1,30 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = libmpq.la
+libmpq_la_SOURCES =                                                    \
+  abs.c aors.c canonicalize.c clear.c clears.c                         \
+  cmp.c cmp_si.c cmp_ui.c div.c equal.c                                        \
+  get_d.c get_den.c get_num.c get_str.c                                        \
+  init.c inits.c inp_str.c inv.c md_2exp.c mul.c neg.c out_str.c       \
+  set.c set_den.c set_num.c set_si.c set_str.c set_ui.c set_z.c set_d.c        \
+  set_f.c swap.c
diff --git a/mpq/Makefile.in b/mpq/Makefile.in

new file mode 100644 (file)

index 0000000..d3e90a7
--- /dev/null
+++ b/mpq/Makefile.in
@@ -0,0 +1,613 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = mpq
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libmpq_la_LIBADD =
+am_libmpq_la_OBJECTS = abs$U.lo aors$U.lo canonicalize$U.lo clear$U.lo \
+       clears$U.lo cmp$U.lo cmp_si$U.lo cmp_ui$U.lo div$U.lo \
+       equal$U.lo get_d$U.lo get_den$U.lo get_num$U.lo get_str$U.lo \
+       init$U.lo inits$U.lo inp_str$U.lo inv$U.lo md_2exp$U.lo \
+       mul$U.lo neg$U.lo out_str$U.lo set$U.lo set_den$U.lo \
+       set_num$U.lo set_si$U.lo set_str$U.lo set_ui$U.lo set_z$U.lo \
+       set_d$U.lo set_f$U.lo swap$U.lo
+libmpq_la_OBJECTS = $(am_libmpq_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libmpq_la_SOURCES)
+DIST_SOURCES = $(libmpq_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = libmpq.la
+libmpq_la_SOURCES = \
+  abs.c aors.c canonicalize.c clear.c clears.c                         \
+  cmp.c cmp_si.c cmp_ui.c div.c equal.c                                        \
+  get_d.c get_den.c get_num.c get_str.c                                        \
+  init.c inits.c inp_str.c inv.c md_2exp.c mul.c neg.c out_str.c       \
+  set.c set_den.c set_num.c set_si.c set_str.c set_ui.c set_z.c set_d.c        \
+  set_f.c swap.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpq/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps mpq/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libmpq.la: $(libmpq_la_OBJECTS) $(libmpq_la_DEPENDENCIES) 
+       $(LINK)  $(libmpq_la_OBJECTS) $(libmpq_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+abs_.c: abs.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+aors_.c: aors.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aors.c; then echo $(srcdir)/aors.c; else echo aors.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+canonicalize_.c: canonicalize.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/canonicalize.c; then echo $(srcdir)/canonicalize.c; else echo canonicalize.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clear_.c: clear.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clears_.c: clears.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_.c: cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_si_.c: cmp_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+div_.c: div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div.c; then echo $(srcdir)/div.c; else echo div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+equal_.c: equal.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/equal.c; then echo $(srcdir)/equal.c; else echo equal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_.c: get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_den_.c: get_den.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_den.c; then echo $(srcdir)/get_den.c; else echo get_den.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_num_.c: get_num.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_num.c; then echo $(srcdir)/get_num.c; else echo get_num.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+init_.c: init.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inits_.c: inits.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inp_str_.c: inp_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inv_.c: inv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inv.c; then echo $(srcdir)/inv.c; else echo inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+md_2exp_.c: md_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/md_2exp.c; then echo $(srcdir)/md_2exp.c; else echo md_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+neg_.c: neg.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+out_str_.c: out_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_.c: set.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_d_.c: set_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_den_.c: set_den.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_den.c; then echo $(srcdir)/set_den.c; else echo set_den.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_f_.c: set_f.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_f.c; then echo $(srcdir)/set_f.c; else echo set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_num_.c: set_num.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_num.c; then echo $(srcdir)/set_num.c; else echo set_num.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_si_.c: set_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_ui_.c: set_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_z_.c: set_z.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_z.c; then echo $(srcdir)/set_z.c; else echo set_z.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+swap_.c: swap.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+abs_.$(OBJEXT) abs_.lo aors_.$(OBJEXT) aors_.lo \
+canonicalize_.$(OBJEXT) canonicalize_.lo clear_.$(OBJEXT) clear_.lo \
+clears_.$(OBJEXT) clears_.lo cmp_.$(OBJEXT) cmp_.lo cmp_si_.$(OBJEXT) \
+cmp_si_.lo cmp_ui_.$(OBJEXT) cmp_ui_.lo div_.$(OBJEXT) div_.lo \
+equal_.$(OBJEXT) equal_.lo get_d_.$(OBJEXT) get_d_.lo \
+get_den_.$(OBJEXT) get_den_.lo get_num_.$(OBJEXT) get_num_.lo \
+get_str_.$(OBJEXT) get_str_.lo init_.$(OBJEXT) init_.lo \
+inits_.$(OBJEXT) inits_.lo inp_str_.$(OBJEXT) inp_str_.lo \
+inv_.$(OBJEXT) inv_.lo md_2exp_.$(OBJEXT) md_2exp_.lo mul_.$(OBJEXT) \
+mul_.lo neg_.$(OBJEXT) neg_.lo out_str_.$(OBJEXT) out_str_.lo \
+set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) set_d_.lo set_den_.$(OBJEXT) \
+set_den_.lo set_f_.$(OBJEXT) set_f_.lo set_num_.$(OBJEXT) set_num_.lo \
+set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
+set_ui_.$(OBJEXT) set_ui_.lo set_z_.$(OBJEXT) set_z_.lo \
+swap_.$(OBJEXT) swap_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/mpq/abs.c b/mpq/abs.c

new file mode 100644 (file)

index 0000000..04b8e34
--- /dev/null
+++ b/mpq/abs.c
@@ -0,0 +1,46 @@
+/* mpq_abs -- absolute value of a rational.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpq_abs 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpq_abs (mpq_ptr dst, mpq_srcptr src)
+{
+  mp_size_t  num_size = src->_mp_num._mp_size;
+  mp_size_t  num_abs_size = ABS (num_size);
+
+  if (dst != src)
+    {
+      mp_size_t  den_size = src->_mp_den._mp_size;
+
+      MPZ_REALLOC (mpq_numref(dst), num_abs_size);
+      MPZ_REALLOC (mpq_denref(dst), den_size);
+
+      MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);
+      MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
+
+      dst->_mp_den._mp_size = den_size;
+    }
+
+  dst->_mp_num._mp_size = num_abs_size;
+}
diff --git a/mpq/aors.c b/mpq/aors.c

new file mode 100644 (file)

index 0000000..5e09de5
--- /dev/null
+++ b/mpq/aors.c
@@ -0,0 +1,102 @@
+/* mpq_add, mpq_sub -- add or subtract rational numbers.
+
+Copyright 1991, 1994, 1995, 1996, 1997, 2000, 2001, 2004, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+static void __gmpq_aors __GMP_PROTO ((REGPARM_3_1 (mpq_ptr, mpq_srcptr, mpq_srcptr, void (*) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr))))) REGPARM_ATTR (1);
+#define mpq_aors(w,x,y,fun)  __gmpq_aors (REGPARM_3_1 (w, x, y, fun))
+
+REGPARM_ATTR (1) static void
+mpq_aors (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2,
+          void (*fun) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)))
+{
+  mpz_t gcd;
+  mpz_t tmp1, tmp2;
+  mp_size_t op1_num_size = ABS (op1->_mp_num._mp_size);
+  mp_size_t op1_den_size =      op1->_mp_den._mp_size;
+  mp_size_t op2_num_size = ABS (op2->_mp_num._mp_size);
+  mp_size_t op2_den_size =      op2->_mp_den._mp_size;
+  TMP_DECL;
+
+  TMP_MARK;
+  MPZ_TMP_INIT (gcd, MIN (op1_den_size, op2_den_size));
+  MPZ_TMP_INIT (tmp1, op1_num_size + op2_den_size);
+  MPZ_TMP_INIT (tmp2, op2_num_size + op1_den_size);
+
+  /* ROP might be identical to either operand, so don't store the
+     result there until we are finished with the input operands.  We
+     dare to overwrite the numerator of ROP when we are finished
+     with the numerators of OP1 and OP2.  */
+
+  mpz_gcd (gcd, &(op1->_mp_den), &(op2->_mp_den));
+  if (! MPZ_EQUAL_1_P (gcd))
+    {
+      mpz_t t;
+
+      mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);
+      mpz_mul (tmp1, &(op1->_mp_num), tmp1);
+
+      mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);
+      mpz_mul (tmp2, &(op2->_mp_num), tmp2);
+
+      MPZ_TMP_INIT (t, MAX (ABS (tmp1->_mp_size), ABS (tmp2->_mp_size)) + 1);
+
+      (*fun) (t, tmp1, tmp2);
+      mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);
+
+      mpz_gcd (gcd, t, gcd);
+      if (MPZ_EQUAL_1_P (gcd))
+        {
+          mpz_set (&(rop->_mp_num), t);
+          mpz_mul (&(rop->_mp_den), &(op2->_mp_den), tmp2);
+        }
+      else
+        {
+          mpz_divexact_gcd (&(rop->_mp_num), t, gcd);
+          mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);
+          mpz_mul (&(rop->_mp_den), tmp1, tmp2);
+        }
+    }
+  else
+    {
+      /* The common divisor is 1.  This is the case (for random input) with
+        probability 6/(pi**2), which is about 60.8%.  */
+      mpz_mul (tmp1, &(op1->_mp_num), &(op2->_mp_den));
+      mpz_mul (tmp2, &(op2->_mp_num), &(op1->_mp_den));
+      (*fun) (&(rop->_mp_num), tmp1, tmp2);
+      mpz_mul (&(rop->_mp_den), &(op1->_mp_den), &(op2->_mp_den));
+    }
+  TMP_FREE;
+}
+
+
+void
+mpq_add (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpq_aors (rop, op1, op2, mpz_add);
+}
+
+void
+mpq_sub (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpq_aors (rop, op1, op2, mpz_sub);
+}
diff --git a/mpq/canonicalize.c b/mpq/canonicalize.c

new file mode 100644 (file)

index 0000000..e5bedec
--- /dev/null
+++ b/mpq/canonicalize.c
@@ -0,0 +1,53 @@
+/* mpq_canonicalize(op) -- Remove common factors of the denominator and
+   numerator in OP.
+
+Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_canonicalize (MP_RAT *op)
+{
+  mpz_t gcd;
+  TMP_DECL;
+
+  if (op->_mp_den._mp_size == 0)
+    DIVIDE_BY_ZERO;
+
+  TMP_MARK;
+
+  /* ??? Dunno if the 1+ is needed.  */
+  MPZ_TMP_INIT (gcd, 1 + MAX (ABS (op->_mp_num._mp_size),
+                             ABS (op->_mp_den._mp_size)));
+
+  mpz_gcd (gcd, &(op->_mp_num), &(op->_mp_den));
+  if (! MPZ_EQUAL_1_P (gcd))
+    {
+      mpz_divexact_gcd (&(op->_mp_num), &(op->_mp_num), gcd);
+      mpz_divexact_gcd (&(op->_mp_den), &(op->_mp_den), gcd);
+    }
+
+  if (op->_mp_den._mp_size < 0)
+    {
+      op->_mp_num._mp_size = -op->_mp_num._mp_size;
+      op->_mp_den._mp_size = -op->_mp_den._mp_size;
+    }
+  TMP_FREE;
+}
diff --git a/mpq/clear.c b/mpq/clear.c

new file mode 100644 (file)

index 0000000..d6f7d7f
--- /dev/null
+++ b/mpq/clear.c
@@ -0,0 +1,30 @@
+/* mpq_clear -- free the space occupied by a MP_RAT.
+
+Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_clear (MP_RAT *m)
+{
+  (*__gmp_free_func) (m->_mp_num._mp_d,
+                   m->_mp_num._mp_alloc * BYTES_PER_MP_LIMB);
+  (*__gmp_free_func) (m->_mp_den._mp_d,
+                   m->_mp_den._mp_alloc * BYTES_PER_MP_LIMB);
+}
diff --git a/mpq/clears.c b/mpq/clears.c

new file mode 100644 (file)

index 0000000..0aa8b57
--- /dev/null
+++ b/mpq/clears.c
@@ -0,0 +1,56 @@
+/* mpq_clears() -- Clear multiple mpq_t variables.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+mpq_clears (mpq_ptr x, ...)
+#else
+mpq_clears (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+
+#if HAVE_STDARG
+  va_start (ap, x);
+#else
+  mpq_ptr x;
+  va_start (ap);
+  x = va_arg (ap, mpq_ptr);
+#endif
+
+  while (x != NULL)
+    {
+      mpq_clear (x);
+      x = va_arg (ap, mpq_ptr);
+    }
+  va_end (ap);
+}
diff --git a/mpq/cmp.c b/mpq/cmp.c

new file mode 100644 (file)

index 0000000..1844c29
--- /dev/null
+++ b/mpq/cmp.c
@@ -0,0 +1,115 @@
+/* mpq_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative
+   based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1994, 1996, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+int
+mpq_cmp (const MP_RAT *op1, const MP_RAT *op2)
+{
+  mp_size_t num1_size = op1->_mp_num._mp_size;
+  mp_size_t den1_size = op1->_mp_den._mp_size;
+  mp_size_t num2_size = op2->_mp_num._mp_size;
+  mp_size_t den2_size = op2->_mp_den._mp_size;
+  mp_size_t tmp1_size, tmp2_size;
+  mp_ptr tmp1_ptr, tmp2_ptr;
+  mp_size_t num1_sign;
+  int cc;
+  TMP_DECL;
+
+  /* need canonical signs to get right result */
+  ASSERT (den1_size > 0);
+  ASSERT (den2_size > 0);
+
+  if (num1_size == 0)
+    return -num2_size;
+  if (num2_size == 0)
+    return num1_size;
+  if ((num1_size ^ num2_size) < 0) /* I.e. are the signs different? */
+    return num1_size;
+
+  num1_sign = num1_size;
+  num1_size = ABS (num1_size);
+  num2_size = ABS (num2_size);
+
+  tmp1_size = num1_size + den2_size;
+  tmp2_size = num2_size + den1_size;
+
+  /* 1. Check to see if we can tell which operand is larger by just looking at
+     the number of limbs.  */
+
+  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.
+     Same for NUM1 x DEN1 with respect to TMP2_SIZE.  */
+  if (tmp1_size > tmp2_size + 1)
+    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */
+    return num1_sign;
+  if (tmp2_size > tmp1_size + 1)
+    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */
+    return -num1_sign;
+
+  /* 2. Same, but compare the number of significant bits.  */
+  {
+    int cnt1, cnt2;
+    mp_bitcnt_t bits1, bits2;
+
+    count_leading_zeros (cnt1, op1->_mp_num._mp_d[num1_size - 1]);
+    count_leading_zeros (cnt2, op2->_mp_den._mp_d[den2_size - 1]);
+    bits1 = tmp1_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
+
+    count_leading_zeros (cnt1, op2->_mp_num._mp_d[num2_size - 1]);
+    count_leading_zeros (cnt2, op1->_mp_den._mp_d[den1_size - 1]);
+    bits2 = tmp2_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
+
+    if (bits1 > bits2 + 1)
+      return num1_sign;
+    if (bits2 > bits1 + 1)
+      return -num1_sign;
+  }
+
+  /* 3. Finally, cross multiply and compare.  */
+
+  TMP_MARK;
+  TMP_ALLOC_LIMBS_2 (tmp1_ptr,tmp1_size, tmp2_ptr,tmp2_size);
+
+  if (num1_size >= den2_size)
+    tmp1_size -= 0 == mpn_mul (tmp1_ptr,
+                              op1->_mp_num._mp_d, num1_size,
+                              op2->_mp_den._mp_d, den2_size);
+  else
+    tmp1_size -= 0 == mpn_mul (tmp1_ptr,
+                              op2->_mp_den._mp_d, den2_size,
+                              op1->_mp_num._mp_d, num1_size);
+
+   if (num2_size >= den1_size)
+     tmp2_size -= 0 == mpn_mul (tmp2_ptr,
+                               op2->_mp_num._mp_d, num2_size,
+                               op1->_mp_den._mp_d, den1_size);
+   else
+     tmp2_size -= 0 == mpn_mul (tmp2_ptr,
+                               op1->_mp_den._mp_d, den1_size,
+                               op2->_mp_num._mp_d, num2_size);
+
+
+  cc = tmp1_size - tmp2_size != 0
+    ? tmp1_size - tmp2_size : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);
+  TMP_FREE;
+  return num1_sign < 0 ? -cc : cc;
+}
diff --git a/mpq/cmp_si.c b/mpq/cmp_si.c

new file mode 100644 (file)

index 0000000..a744a98
--- /dev/null
+++ b/mpq/cmp_si.c
@@ -0,0 +1,56 @@
+/* _mpq_cmp_si -- compare mpq and long/ulong fraction.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Something like mpq_cmpabs_ui would be more useful for the neg/neg case,
+   and perhaps a version accepting a parameter to reverse the test, to make
+   it a tail call here.  */
+
+int
+_mpq_cmp_si (mpq_srcptr q, long n, unsigned long d)
+{
+  /* need canonical sign to get right result */
+  ASSERT (q->_mp_den._mp_size > 0);
+
+  if (q->_mp_num._mp_size >= 0)
+    {
+      if (n >= 0)
+        return _mpq_cmp_ui (q, n, d);            /* >=0 cmp >=0 */
+      else
+        return 1;                                /* >=0 cmp <0 */
+    }
+  else
+    {
+      if (n >= 0)
+        return -1;                               /* <0 cmp >=0 */
+      else
+        {
+          mpq_t  qabs;
+          qabs->_mp_num._mp_size = ABS (q->_mp_num._mp_size);
+          qabs->_mp_num._mp_d    = q->_mp_num._mp_d;
+          qabs->_mp_den._mp_size = q->_mp_den._mp_size;
+          qabs->_mp_den._mp_d    = q->_mp_den._mp_d;
+
+          return - _mpq_cmp_ui (qabs, -n, d);    /* <0 cmp <0 */
+        }
+    }
+}
diff --git a/mpq/cmp_ui.c b/mpq/cmp_ui.c

new file mode 100644 (file)

index 0000000..8e0b1af
--- /dev/null
+++ b/mpq/cmp_ui.c
@@ -0,0 +1,90 @@
+/* mpq_cmp_ui(u,vn,vd) -- Compare U with Vn/Vd.  Return positive, zero, or
+   negative based on if U > V, U == V, or U < V.  Vn and Vd may have
+   common factors.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+_mpq_cmp_ui (const MP_RAT *op1, unsigned long int num2, unsigned long int den2)
+{
+  mp_size_t num1_size = op1->_mp_num._mp_size;
+  mp_size_t den1_size = op1->_mp_den._mp_size;
+  mp_size_t tmp1_size, tmp2_size;
+  mp_ptr tmp1_ptr, tmp2_ptr;
+  mp_limb_t cy_limb;
+  int cc;
+  TMP_DECL;
+
+#if GMP_NAIL_BITS != 0
+  if ((num2 | den2) > GMP_NUMB_MAX)
+    {
+      mpq_t op2;
+      mpq_init (op2);
+      mpz_set_ui (mpq_numref (op2), num2);
+      mpz_set_ui (mpq_denref (op2), den2);
+      cc = mpq_cmp (op1, op2);
+      mpq_clear (op2);
+      return cc;
+    }
+#endif
+
+  /* need canonical sign to get right result */
+  ASSERT (den1_size > 0);
+
+  if (den2 == 0)
+    DIVIDE_BY_ZERO;
+
+  if (num1_size == 0)
+    return -(num2 != 0);
+  if (num1_size < 0)
+    return num1_size;
+  if (num2 == 0)
+    return num1_size;
+
+  /* NUM1 x DEN2 is either TMP1_SIZE limbs or TMP1_SIZE-1 limbs.
+     Same for NUM1 x DEN1 with respect to TMP2_SIZE.  */
+  if (num1_size > den1_size + 1)
+    /* NUM1 x DEN2 is surely larger in magnitude than NUM2 x DEN1.  */
+    return num1_size;
+  if (den1_size > num1_size + 1)
+    /* NUM1 x DEN2 is surely smaller in magnitude than NUM2 x DEN1.  */
+    return -num1_size;
+
+  TMP_MARK;
+  tmp1_ptr = TMP_ALLOC_LIMBS (num1_size + 1);
+  tmp2_ptr = TMP_ALLOC_LIMBS (den1_size + 1);
+
+  cy_limb = mpn_mul_1 (tmp1_ptr, op1->_mp_num._mp_d, num1_size,
+                       (mp_limb_t) den2);
+  tmp1_ptr[num1_size] = cy_limb;
+  tmp1_size = num1_size + (cy_limb != 0);
+
+  cy_limb = mpn_mul_1 (tmp2_ptr, op1->_mp_den._mp_d, den1_size,
+                       (mp_limb_t) num2);
+  tmp2_ptr[den1_size] = cy_limb;
+  tmp2_size = den1_size + (cy_limb != 0);
+
+  cc = tmp1_size - tmp2_size != 0
+    ? tmp1_size - tmp2_size : mpn_cmp (tmp1_ptr, tmp2_ptr, tmp1_size);
+  TMP_FREE;
+  return cc;
+}
diff --git a/mpq/div.c b/mpq/div.c

new file mode 100644 (file)

index 0000000..efba32a
--- /dev/null
+++ b/mpq/div.c
@@ -0,0 +1,102 @@
+/* mpq_div -- divide two rational numbers.
+
+Copyright 1991, 1994, 1995, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpq_div (mpq_ptr quot, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpz_t gcd1, gcd2;
+  mpz_t tmp1, tmp2;
+  mpz_t numtmp;
+  mp_size_t op1_num_size;
+  mp_size_t op1_den_size;
+  mp_size_t op2_num_size;
+  mp_size_t op2_den_size;
+  mp_size_t alloc;
+  TMP_DECL;
+
+  op1_num_size = ABS (op1->_mp_num._mp_size);
+  op1_den_size =      op1->_mp_den._mp_size;
+  op2_num_size = ABS (op2->_mp_num._mp_size);
+  op2_den_size =      op2->_mp_den._mp_size;
+
+  if (op2_num_size == 0)
+    DIVIDE_BY_ZERO;
+
+  if (op1_num_size == 0)
+    {
+      /* We special case this to simplify allocation logic; gcd(0,x) = x
+        is a singular case for the allocations.  */
+      quot->_mp_num._mp_size = 0;
+      quot->_mp_den._mp_d[0] = 1;
+      quot->_mp_den._mp_size = 1;
+      return;
+    }
+
+  TMP_MARK;
+
+  alloc = MIN (op1_num_size, op2_num_size);
+  MPZ_TMP_INIT (gcd1, alloc);
+
+  alloc = MIN (op1_den_size, op2_den_size);
+  MPZ_TMP_INIT (gcd2, alloc);
+
+  alloc = MAX (op1_num_size, op2_num_size);
+  MPZ_TMP_INIT (tmp1, alloc);
+
+  alloc = MAX (op1_den_size, op2_den_size);
+  MPZ_TMP_INIT (tmp2, alloc);
+
+  alloc = op1_num_size + op2_den_size;
+  MPZ_TMP_INIT (numtmp, alloc);
+
+  /* QUOT might be identical to either operand, so don't store the result there
+     until we are finished with the input operands.  We can overwrite the
+     numerator of QUOT when we are finished with the numerators of OP1 and
+     OP2.  */
+
+  mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_num));
+  mpz_gcd (gcd2, &(op2->_mp_den), &(op1->_mp_den));
+
+  mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);
+  mpz_divexact_gcd (tmp2, &(op2->_mp_den), gcd2);
+
+  mpz_mul (numtmp, tmp1, tmp2);
+
+  mpz_divexact_gcd (tmp1, &(op2->_mp_num), gcd1);
+  mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);
+
+  mpz_mul (&(quot->_mp_den), tmp1, tmp2);
+
+  /* We needed to go via NUMTMP to take care of QUOT being the same as OP2.
+     Now move NUMTMP to QUOT->_mp_num.  */
+  mpz_set (&(quot->_mp_num), numtmp);
+
+  /* Keep the denominator positive.  */
+  if (quot->_mp_den._mp_size < 0)
+    {
+      quot->_mp_den._mp_size = -quot->_mp_den._mp_size;
+      quot->_mp_num._mp_size = -quot->_mp_num._mp_size;
+    }
+
+  TMP_FREE;
+}
diff --git a/mpq/equal.c b/mpq/equal.c

new file mode 100644 (file)

index 0000000..36f7d37
--- /dev/null
+++ b/mpq/equal.c
@@ -0,0 +1,58 @@
+/* mpq_equal(u,v) -- Compare U, V.  Return non-zero if they are equal, zero
+   if they are non-equal.
+
+Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpq_equal (mpq_srcptr op1, mpq_srcptr op2) __GMP_NOTHROW
+{
+  mp_size_t  num1_size, num2_size, den1_size, den2_size, i;
+  mp_srcptr  num1_ptr,  num2_ptr,  den1_ptr,  den2_ptr;
+
+  /* need fully canonical for correct results */
+  ASSERT_MPQ_CANONICAL (op1);
+  ASSERT_MPQ_CANONICAL (op2);
+
+  num1_size = op1->_mp_num._mp_size;
+  num2_size = op2->_mp_num._mp_size;
+  if (num1_size != num2_size)
+    return 0;
+
+  num1_ptr = op1->_mp_num._mp_d;
+  num2_ptr = op2->_mp_num._mp_d;
+  num1_size = ABS (num1_size);
+  for (i = 0; i < num1_size; i++)
+    if (num1_ptr[i] != num2_ptr[i])
+      return 0;
+
+  den1_size = op1->_mp_den._mp_size;
+  den2_size = op2->_mp_den._mp_size;
+  if (den1_size != den2_size)
+    return 0;
+
+  den1_ptr = op1->_mp_den._mp_d;
+  den2_ptr = op2->_mp_den._mp_d;
+  for (i = 0; i < den1_size; i++)
+    if (den1_ptr[i] != den2_ptr[i])
+      return 0;
+
+  return 1;
+}
diff --git a/mpq/get_d.c b/mpq/get_d.c

new file mode 100644 (file)

index 0000000..0caefed
--- /dev/null
+++ b/mpq/get_d.c
@@ -0,0 +1,165 @@
+/* double mpq_get_d (mpq_t src) -- mpq to double, rounding towards zero.
+
+Copyright 1995, 1996, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* All that's needed is to get the high 53 bits of the quotient num/den,
+   rounded towards zero.  More than 53 bits is fine, any excess is ignored
+   by mpn_get_d.
+
+   N_QLIMBS is how many quotient limbs we need to satisfy the mantissa of a
+   double, assuming the highest of those limbs is non-zero.  The target
+   qsize for mpn_tdiv_qr is then 1 more than this, since that function may
+   give a zero in the high limb (and non-zero in the second highest).
+
+   The use of 8*sizeof(double) in N_QLIMBS is an overestimate of the
+   mantissa bits, but it gets the same result as the true value (53 or 48 or
+   whatever) when rounded up to a multiple of GMP_NUMB_BITS, for non-nails.
+
+   Enhancements:
+
+   Use the true mantissa size in the N_QLIMBS formula, to save a divide step
+   in nails.
+
+   Examine the high limbs of num and den to see if the highest 1 bit of the
+   quotient will fall high enough that just N_QLIMBS-1 limbs is enough to
+   get the necessary bits, thereby saving a division step.
+
+   Bit shift either num or den to arrange for the above condition on the
+   high 1 bit of the quotient, to save a division step always.  A shift to
+   save a division step is definitely worthwhile with mpn_tdiv_qr, though we
+   may want to reassess this on big num/den when a quotient-only division
+   exists.
+
+   Maybe we could estimate the final exponent using nsize-dsize (and
+   possibly the high limbs of num and den), so as to detect overflow and
+   return infinity or zero quickly.  Overflow is never very helpful to an
+   application, and can therefore probably be regarded as abnormal, but we
+   may still like to optimize it if the conditions are easy.  (This would
+   only be for float formats we know, unknown formats are not important and
+   can be left to mpn_get_d.)
+
+   Future:
+
+   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of
+   padding n with zeros in temporary space.
+
+   If/when a quotient-only division exists it can be used here immediately.
+   remp is only to satisfy mpn_tdiv_qr, the remainder is not used.
+
+   Alternatives:
+
+   An alternative algorithm, that may be faster:
+   0. Let n be somewhat larger than the number of significant bits in a double.
+   1. Extract the most significant n bits of the denominator, and an equal
+      number of bits from the numerator.
+   2. Interpret the extracted numbers as integers, call them a and b
+      respectively, and develop n bits of the fractions ((a + 1) / b) and
+      (a / (b + 1)) using mpn_divrem.
+   3. If the computed values are identical UP TO THE POSITION WE CARE ABOUT,
+      we are done.  If they are different, repeat the algorithm from step 1,
+      but first let n = n * 2.
+   4. If we end up using all bits from the numerator and denominator, fall
+      back to a plain division.
+   5. Just to make life harder, The computation of a + 1 and b + 1 above
+      might give carry-out...  Needs special handling.  It might work to
+      subtract 1 in both cases instead.
+
+   Not certain if this approach would be faster than a quotient-only
+   division.  Presumably such optimizations are the sort of thing we would
+   like to have helping everywhere that uses a quotient-only division. */
+
+double
+mpq_get_d (const MP_RAT *src)
+{
+  double res;
+  mp_srcptr np, dp;
+  mp_ptr remp, tp;
+  mp_size_t nsize = src->_mp_num._mp_size;
+  mp_size_t dsize = src->_mp_den._mp_size;
+  mp_size_t qsize, prospective_qsize, zeros, chop, tsize;
+  mp_size_t sign_quotient = nsize;
+  long exp;
+#define N_QLIMBS (1 + (sizeof (double) + BYTES_PER_MP_LIMB-1) / BYTES_PER_MP_LIMB)
+  mp_limb_t qarr[N_QLIMBS + 1];
+  mp_ptr qp = qarr;
+  TMP_DECL;
+
+  ASSERT (dsize > 0);    /* canonical src */
+
+  /* mpn_get_d below requires a non-zero operand */
+  if (UNLIKELY (nsize == 0))
+    return 0.0;
+
+  TMP_MARK;
+  nsize = ABS (nsize);
+  dsize = ABS (dsize);
+  np = src->_mp_num._mp_d;
+  dp = src->_mp_den._mp_d;
+
+  prospective_qsize = nsize - dsize + 1;   /* from using given n,d */
+  qsize = N_QLIMBS + 1;                    /* desired qsize */
+
+  zeros = qsize - prospective_qsize;       /* padding n to get qsize */
+  exp = (long) -zeros * GMP_NUMB_BITS;     /* relative to low of qp */
+
+  chop = MAX (-zeros, 0);                  /* negative zeros means shorten n */
+  np += chop;
+  nsize -= chop;
+  zeros += chop;                           /* now zeros >= 0 */
+
+  tsize = nsize + zeros;                   /* size for possible copy of n */
+
+  if (WANT_TMP_DEBUG)
+    {
+      /* separate blocks, for malloc debugging */
+      remp = TMP_ALLOC_LIMBS (dsize);
+      tp = (zeros > 0 ? TMP_ALLOC_LIMBS (tsize) : NULL);
+    }
+  else
+    {
+      /* one block with conditionalized size, for efficiency */
+      remp = TMP_ALLOC_LIMBS (dsize + (zeros > 0 ? tsize : 0));
+      tp = remp + dsize;
+    }
+
+  /* zero extend n into temporary space, if necessary */
+  if (zeros > 0)
+    {
+      MPN_ZERO (tp, zeros);
+      MPN_COPY (tp+zeros, np, nsize);
+      np = tp;
+      nsize = tsize;
+    }
+
+  ASSERT (qsize == nsize - dsize + 1);
+  mpn_tdiv_qr (qp, remp, (mp_size_t) 0, np, nsize, dp, dsize);
+
+  /* strip possible zero high limb */
+  qsize -= (qp[qsize-1] == 0);
+
+  res = mpn_get_d (qp, qsize, sign_quotient, exp);
+  TMP_FREE;
+  return res;
+}
diff --git a/mpq/get_den.c b/mpq/get_den.c

new file mode 100644 (file)

index 0000000..c3104e6
--- /dev/null
+++ b/mpq/get_den.c
@@ -0,0 +1,33 @@
+/* mpq_get_den(den,rat_src) -- Set DEN to the denominator of RAT_SRC.
+
+Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_get_den (MP_INT *den, const MP_RAT *src)
+{
+  mp_size_t size = src->_mp_den._mp_size;
+
+  if (den->_mp_alloc < size)
+    _mpz_realloc (den, size);
+
+  MPN_COPY (den->_mp_d, src->_mp_den._mp_d, size);
+  den->_mp_size = size;
+}
diff --git a/mpq/get_num.c b/mpq/get_num.c

new file mode 100644 (file)

index 0000000..c45e7ed
--- /dev/null
+++ b/mpq/get_num.c
@@ -0,0 +1,34 @@
+ /* mpq_get_num(num,rat_src) -- Set NUM to the numerator of RAT_SRC.
+
+Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_get_num (MP_INT *num, const MP_RAT *src)
+{
+  mp_size_t size = src->_mp_num._mp_size;
+  mp_size_t abs_size = ABS (size);
+
+  if (num->_mp_alloc < abs_size)
+    _mpz_realloc (num, abs_size);
+
+  MPN_COPY (num->_mp_d, src->_mp_num._mp_d, abs_size);
+  num->_mp_size = size;
+}
diff --git a/mpq/get_str.c b/mpq/get_str.c

new file mode 100644 (file)

index 0000000..68ca34f
--- /dev/null
+++ b/mpq/get_str.c
@@ -0,0 +1,65 @@
+/* mpq_get_str -- mpq to string conversion.
+
+Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+char *
+mpq_get_str (char *str, int base, mpq_srcptr q)
+{
+  size_t  str_alloc, len;
+
+  ASSERT (ABS(base) >= 2);
+  ASSERT (ABS(base) <= 62);
+
+  str_alloc = 0;
+  if (str == NULL)
+    {
+      /* This is an overestimate since we don't bother checking how much of
+         the high limbs of num and den are used.  +2 for rounding up the
+         chars per bit of num and den.  +3 for sign, slash and '\0'.  */
+      str_alloc = ((size_t) ((ABS (q->_mp_num._mp_size) + q->_mp_den._mp_size)
+                             * GMP_LIMB_BITS
+                             * mp_bases[ABS(base)].chars_per_bit_exactly))
+                   + 5;
+      str = (char *) (*__gmp_allocate_func) (str_alloc);
+    }
+
+  mpz_get_str (str, base, mpq_numref(q));
+  len = strlen (str);
+  if (! MPZ_EQUAL_1_P (mpq_denref (q)))
+    {
+      str[len++] = '/';
+      mpz_get_str (str+len, base, mpq_denref(q));
+      len += strlen (str+len);
+    }
+
+  ASSERT (len == strlen(str));
+  ASSERT (str_alloc == 0 || len+1 <= str_alloc);
+  ASSERT (len+1 <=  /* size recommended to applications */
+          mpz_sizeinbase (mpq_numref(q), ABS(base)) +
+          mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3);
+
+  if (str_alloc != 0)
+    __GMP_REALLOCATE_FUNC_MAYBE_TYPE (str, str_alloc, len+1, char);
+
+  return str;
+}
diff --git a/mpq/init.c b/mpq/init.c

new file mode 100644 (file)

index 0000000..4cec0c1
--- /dev/null
+++ b/mpq/init.c
@@ -0,0 +1,38 @@
+/* mpq_init -- Make a new rational number with value 0/1.
+
+Copyright 1991, 1994, 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_init (MP_RAT *x)
+{
+  x->_mp_num._mp_alloc = 1;
+  x->_mp_num._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  x->_mp_num._mp_size = 0;
+  x->_mp_den._mp_alloc = 1;
+  x->_mp_den._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  x->_mp_den._mp_d[0] = 1;
+  x->_mp_den._mp_size = 1;
+
+#ifdef __CHECKER__
+  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
+  x->_mp_num._mp_d[0] = 0;
+#endif
+}
diff --git a/mpq/inits.c b/mpq/inits.c

new file mode 100644 (file)

index 0000000..851daaa
--- /dev/null
+++ b/mpq/inits.c
@@ -0,0 +1,56 @@
+/* mpq_inits() -- Initialize multiple mpq_t variables and set them to 0.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+mpq_inits (mpq_ptr x, ...)
+#else
+mpq_inits (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+
+#if HAVE_STDARG
+  va_start (ap, x);
+#else
+  mpq_ptr x;
+  va_start (ap);
+  x = va_arg (ap, mpq_ptr);
+#endif
+
+  while (x != NULL)
+    {
+      mpq_init (x);
+      x = va_arg (ap, mpq_ptr);
+    }
+  va_end (ap);
+}
diff --git a/mpq/inp_str.c b/mpq/inp_str.c

new file mode 100644 (file)

index 0000000..9df6d80
--- /dev/null
+++ b/mpq/inp_str.c
@@ -0,0 +1,65 @@
+/* mpq_inp_str -- read an mpq from a FILE.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+size_t
+mpq_inp_str (mpq_ptr q, FILE *fp, int base)
+{
+  size_t  nread;
+  int     c;
+
+  if (fp == NULL)
+    fp = stdin;
+
+  q->_mp_den._mp_size = 1;
+  q->_mp_den._mp_d[0] = 1;
+
+  nread = mpz_inp_str (mpq_numref(q), fp, base);
+  if (nread == 0)
+    return 0;
+
+  c = getc (fp);
+  nread++;
+
+  if (c == '/')
+    {
+      c = getc (fp);
+      nread++;
+
+      nread = mpz_inp_str_nowhite (mpq_denref(q), fp, base, c, nread);
+      if (nread == 0)
+        {
+          q->_mp_num._mp_size = 0;
+          q->_mp_den._mp_size = 1;
+          q->_mp_den._mp_d[0] = 1;
+        }
+    }
+  else
+    {
+      ungetc (c, fp);
+      nread--;
+    }
+
+  return nread;
+}
diff --git a/mpq/inv.c b/mpq/inv.c

new file mode 100644 (file)

index 0000000..5143686
--- /dev/null
+++ b/mpq/inv.c
@@ -0,0 +1,67 @@
+/* mpq_inv(dest,src) -- invert a rational number, i.e. set DEST to SRC
+   with the numerator and denominator swapped.
+
+Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_inv (MP_RAT *dest, const MP_RAT *src)
+{
+  mp_size_t num_size = src->_mp_num._mp_size;
+  mp_size_t den_size = src->_mp_den._mp_size;
+
+  if (num_size == 0)
+    DIVIDE_BY_ZERO;
+
+  if (num_size < 0)
+    {
+      num_size = -num_size;
+      den_size = -den_size;
+    }
+  dest->_mp_den._mp_size = num_size;
+  dest->_mp_num._mp_size = den_size;
+
+  /* If dest == src we may just swap the numerator and denominator, but
+     we have to ensure the new denominator is positive.  */
+
+  if (dest == src)
+    {
+      mp_size_t alloc = dest->_mp_num._mp_alloc;
+      mp_ptr limb_ptr = dest->_mp_num._mp_d;
+
+      dest->_mp_num._mp_alloc = dest->_mp_den._mp_alloc;
+      dest->_mp_num._mp_d = dest->_mp_den._mp_d;
+
+      dest->_mp_den._mp_alloc = alloc;
+      dest->_mp_den._mp_d = limb_ptr;
+    }
+  else
+    {
+      den_size = ABS (den_size);
+      if (dest->_mp_num._mp_alloc < den_size)
+       _mpz_realloc (&(dest->_mp_num), den_size);
+
+      if (dest->_mp_den._mp_alloc < num_size)
+       _mpz_realloc (&(dest->_mp_den), num_size);
+
+      MPN_COPY (dest->_mp_num._mp_d, src->_mp_den._mp_d, den_size);
+      MPN_COPY (dest->_mp_den._mp_d, src->_mp_num._mp_d, num_size);
+    }
+}
diff --git a/mpq/md_2exp.c b/mpq/md_2exp.c

new file mode 100644 (file)

index 0000000..6179ca3
--- /dev/null
+++ b/mpq/md_2exp.c
@@ -0,0 +1,103 @@
+/* mpq_mul_2exp, mpq_div_2exp - multiply or divide by 2^N */
+
+/*
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The multiplier/divisor "n", representing 2^n, is applied by right shifting
+   "r" until it's odd (if it isn't already), and left shifting "l" for the
+   rest. */
+
+static void
+mord_2exp (mpz_ptr ldst, mpz_ptr rdst, mpz_srcptr lsrc, mpz_srcptr rsrc,
+           mp_bitcnt_t n)
+{
+  mp_size_t  rsrc_size = SIZ(rsrc);
+  mp_size_t  len = ABS (rsrc_size);
+  mp_ptr     rsrc_ptr = PTR(rsrc);
+  mp_ptr     p, rdst_ptr;
+  mp_limb_t  plow;
+
+  p = rsrc_ptr;
+  plow = *p;
+  while (n >= GMP_NUMB_BITS && plow == 0)
+    {
+      n -= GMP_NUMB_BITS;
+      p++;
+      plow = *p;
+    }
+
+  /* no realloc here if rsrc==rdst, so p and rsrc_ptr remain valid */
+  len -= (p - rsrc_ptr);
+  MPZ_REALLOC (rdst, len);
+  rdst_ptr = PTR(rdst);
+
+  if ((plow & 1) || n == 0)
+    {
+      /* need DECR when src==dst */
+      if (p != rdst_ptr)
+        MPN_COPY_DECR (rdst_ptr, p, len);
+    }
+  else
+    {
+      unsigned long  shift;
+      if (plow == 0)
+        shift = n;
+      else
+        {
+          count_trailing_zeros (shift, plow);
+          shift = MIN (shift, n);
+        }
+      mpn_rshift (rdst_ptr, p, len, shift);
+      len -= (rdst_ptr[len-1] == 0);
+      n -= shift;
+    }
+  SIZ(rdst) = (rsrc_size >= 0) ? len : -len;
+
+  if (n)
+    mpz_mul_2exp (ldst, lsrc, n);
+  else if (ldst != lsrc)
+    mpz_set (ldst, lsrc);
+}
+
+
+void
+mpq_mul_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
+{
+  mord_2exp (mpq_numref (dst), mpq_denref (dst),
+             mpq_numref (src), mpq_denref (src), n);
+}
+
+void
+mpq_div_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
+{
+  if (SIZ (mpq_numref(src)) == 0)
+    {
+      dst->_mp_num._mp_size = 0;
+      dst->_mp_den._mp_size = 1;
+      dst->_mp_den._mp_d[0] = 1;
+      return;
+    }
+
+  mord_2exp (mpq_denref (dst), mpq_numref (dst),
+             mpq_denref (src), mpq_numref (src), n);
+}
diff --git a/mpq/mul.c b/mpq/mul.c

new file mode 100644 (file)

index 0000000..0214b31
--- /dev/null
+++ b/mpq/mul.c
@@ -0,0 +1,93 @@
+/* mpq_mul -- multiply two rational numbers.
+
+Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpq_mul (mpq_ptr prod, mpq_srcptr op1, mpq_srcptr op2)
+{
+  mpz_t gcd1, gcd2;
+  mpz_t tmp1, tmp2;
+  mp_size_t op1_num_size;
+  mp_size_t op1_den_size;
+  mp_size_t op2_num_size;
+  mp_size_t op2_den_size;
+  mp_size_t alloc;
+  TMP_DECL;
+
+  if (op1 == op2)
+    {
+      /* No need for any GCDs when squaring. */
+      mpz_mul (mpq_numref (prod), mpq_numref (op1), mpq_numref (op1));
+      mpz_mul (mpq_denref (prod), mpq_denref (op1), mpq_denref (op1));
+      return;
+    }
+
+  op1_num_size = ABS (op1->_mp_num._mp_size);
+  op1_den_size =      op1->_mp_den._mp_size;
+  op2_num_size = ABS (op2->_mp_num._mp_size);
+  op2_den_size =      op2->_mp_den._mp_size;
+
+  if (op1_num_size == 0 || op2_num_size == 0)
+    {
+      /* We special case this to simplify allocation logic; gcd(0,x) = x
+        is a singular case for the allocations.  */
+      prod->_mp_num._mp_size = 0;
+      prod->_mp_den._mp_d[0] = 1;
+      prod->_mp_den._mp_size = 1;
+      return;
+    }
+
+  TMP_MARK;
+
+  alloc = MIN (op1_num_size, op2_den_size);
+  MPZ_TMP_INIT (gcd1, alloc);
+
+  alloc = MIN (op2_num_size, op1_den_size);
+  MPZ_TMP_INIT (gcd2, alloc);
+
+  alloc = MAX (op1_num_size, op2_den_size);
+  MPZ_TMP_INIT (tmp1, alloc);
+
+  alloc = MAX (op2_num_size, op1_den_size);
+  MPZ_TMP_INIT (tmp2, alloc);
+
+  /* PROD might be identical to either operand, so don't store the result there
+     until we are finished with the input operands.  We can overwrite the
+     numerator of PROD when we are finished with the numerators of OP1 and
+     OP2.  */
+
+  mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_den));
+  mpz_gcd (gcd2, &(op2->_mp_num), &(op1->_mp_den));
+
+  mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);
+  mpz_divexact_gcd (tmp2, &(op2->_mp_num), gcd2);
+
+  mpz_mul (&(prod->_mp_num), tmp1, tmp2);
+
+  mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd1);
+  mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);
+
+  mpz_mul (&(prod->_mp_den), tmp1, tmp2);
+
+  TMP_FREE;
+}
diff --git a/mpq/neg.c b/mpq/neg.c

new file mode 100644 (file)

index 0000000..972f334
--- /dev/null
+++ b/mpq/neg.c
@@ -0,0 +1,46 @@
+/* mpq_neg -- negate a rational.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpq_neg 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpq_neg (mpq_ptr dst, mpq_srcptr src)
+{
+  mp_size_t  num_size = src->_mp_num._mp_size;
+
+  if (src != dst)
+    {
+      mp_size_t  num_abs_size = ABS(num_size);
+      mp_size_t  den_size = src->_mp_den._mp_size;
+
+      MPZ_REALLOC (mpq_numref(dst), num_abs_size);
+      MPZ_REALLOC (mpq_denref(dst), den_size);
+
+      MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);
+      MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
+
+      dst->_mp_den._mp_size = den_size;
+    }
+
+  dst->_mp_num._mp_size = -num_size;
+}
diff --git a/mpq/out_str.c b/mpq/out_str.c

new file mode 100644 (file)

index 0000000..ade8e2b
--- /dev/null
+++ b/mpq/out_str.c
@@ -0,0 +1,43 @@
+/* mpq_out_str(stream,base,integer) */
+
+/*
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+size_t
+mpq_out_str (FILE *stream, int base, mpq_srcptr q)
+{
+  size_t  written;
+
+  if (stream == NULL)
+    stream = stdout;
+
+  written = mpz_out_str (stream, base, mpq_numref (q));
+
+  if (mpz_cmp_ui (mpq_denref (q), 1) != 0)
+    {
+      putc ('/', stream);
+      written += 1 + mpz_out_str (stream, base, mpq_denref (q));
+    }
+
+  return ferror (stream) ? 0 : written;
+}
diff --git a/mpq/set.c b/mpq/set.c

new file mode 100644 (file)

index 0000000..5d527be
--- /dev/null
+++ b/mpq/set.c
@@ -0,0 +1,41 @@
+/* mpq_set(dest,src) -- Set DEST to SRC.
+
+Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_set (MP_RAT *dest, const MP_RAT *src)
+{
+  mp_size_t num_size, den_size;
+  mp_size_t abs_num_size;
+
+  num_size = src->_mp_num._mp_size;
+  abs_num_size = ABS (num_size);
+  if (dest->_mp_num._mp_alloc < abs_num_size)
+    _mpz_realloc (&(dest->_mp_num), abs_num_size);
+  MPN_COPY (dest->_mp_num._mp_d, src->_mp_num._mp_d, abs_num_size);
+  dest->_mp_num._mp_size = num_size;
+
+  den_size = src->_mp_den._mp_size;
+  if (dest->_mp_den._mp_alloc < den_size)
+    _mpz_realloc (&(dest->_mp_den), den_size);
+  MPN_COPY (dest->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
+  dest->_mp_den._mp_size = den_size;
+}
diff --git a/mpq/set_d.c b/mpq/set_d.c

new file mode 100644 (file)

index 0000000..1e806f8
--- /dev/null
+++ b/mpq/set_d.c
@@ -0,0 +1,158 @@
+/* mpq_set_d(mpq_t q, double d) -- Set q to d without rounding.
+
+Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if LIMBS_PER_DOUBLE > 4
+  choke me
+#endif
+
+void
+mpq_set_d (mpq_ptr dest, double d)
+{
+  int negative;
+  mp_exp_t exp;
+  mp_limb_t tp[LIMBS_PER_DOUBLE];
+  mp_ptr np, dp;
+  mp_size_t nn, dn;
+  int c;
+
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         __gmp_invalid_operation ());
+
+  negative = d < 0;
+  d = ABS (d);
+
+  exp = __gmp_extract_double (tp, d);
+
+  /* There are two main version of the conversion.  The `then' arm handles
+     numbers with a fractional part, while the `else' arm handles integers.  */
+#if LIMBS_PER_DOUBLE == 4
+  if (exp <= 1 || (exp == 2 && (tp[0] | tp[1]) != 0))
+#endif
+#if LIMBS_PER_DOUBLE == 3
+  if (exp <= 1 || (exp == 2 && tp[0] != 0))
+#endif
+#if LIMBS_PER_DOUBLE == 2
+  if (exp <= 1)
+#endif
+    {
+      if (d == 0.0)
+       {
+         SIZ(&(dest->_mp_num)) = 0;
+         SIZ(&(dest->_mp_den)) = 1;
+         PTR(&(dest->_mp_den))[0] = 1;
+         return;
+       }
+
+      dn = -exp;
+      MPZ_REALLOC (&(dest->_mp_num), 3);
+      np = PTR(&(dest->_mp_num));
+#if LIMBS_PER_DOUBLE == 4
+      if ((tp[0] | tp[1] | tp[2]) == 0)
+       np[0] = tp[3], nn = 1;
+      else if ((tp[0] | tp[1]) == 0)
+       np[1] = tp[3], np[0] = tp[2], nn = 2;
+      else if (tp[0] == 0)
+       np[2] = tp[3], np[1] = tp[2], np[0] = tp[1], nn = 3;
+      else
+       np[3] = tp[3], np[2] = tp[2], np[1] = tp[1], np[0] = tp[0], nn = 4;
+#endif
+#if LIMBS_PER_DOUBLE == 3
+      if ((tp[0] | tp[1]) == 0)
+       np[0] = tp[2], nn = 1;
+      else if (tp[0] == 0)
+       np[1] = tp[2], np[0] = tp[1], nn = 2;
+      else
+       np[2] = tp[2], np[1] = tp[1], np[0] = tp[0], nn = 3;
+#endif
+#if LIMBS_PER_DOUBLE == 2
+      if (tp[0] == 0)
+       np[0] = tp[1], nn = 1;
+      else
+       np[1] = tp[1], np[0] = tp[0], nn = 2;
+#endif
+      dn += nn + 1;
+      ASSERT_ALWAYS (dn > 0);
+      MPZ_REALLOC (&(dest->_mp_den), dn);
+      dp = PTR(&(dest->_mp_den));
+      MPN_ZERO (dp, dn - 1);
+      dp[dn - 1] = 1;
+      count_trailing_zeros (c, np[0] | dp[0]);
+      if (c != 0)
+       {
+         mpn_rshift (np, np, nn, c);
+         nn -= np[nn - 1] == 0;
+         mpn_rshift (dp, dp, dn, c);
+         dn -= dp[dn - 1] == 0;
+       }
+      SIZ(&(dest->_mp_den)) = dn;
+      SIZ(&(dest->_mp_num)) = negative ? -nn : nn;
+    }
+  else
+    {
+      nn = exp;
+      MPZ_REALLOC (&(dest->_mp_num), nn);
+      np = PTR(&(dest->_mp_num));
+      switch (nn)
+        {
+       default:
+         MPN_ZERO (np, nn - LIMBS_PER_DOUBLE);
+         np += nn - LIMBS_PER_DOUBLE;
+         /* fall through */
+#if LIMBS_PER_DOUBLE == 2
+       case 2:
+         np[1] = tp[1], np[0] = tp[0];
+         break;
+#endif
+#if LIMBS_PER_DOUBLE == 3
+       case 3:
+         np[2] = tp[2], np[1] = tp[1], np[0] = tp[0];
+         break;
+       case 2:
+         np[1] = tp[2], np[0] = tp[1];
+         break;
+#endif
+#if LIMBS_PER_DOUBLE == 4
+       case 4:
+         np[3] = tp[3], np[2] = tp[2], np[1] = tp[1], np[0] = tp[0];
+         break;
+       case 3:
+         np[2] = tp[3], np[1] = tp[2], np[0] = tp[1];
+         break;
+       case 2:
+         np[1] = tp[3], np[0] = tp[2];
+         break;
+#endif
+       }
+      dp = PTR(&(dest->_mp_den));
+      dp[0] = 1;
+      SIZ(&(dest->_mp_den)) = 1;
+      SIZ(&(dest->_mp_num)) = negative ? -nn : nn;
+    }
+}
diff --git a/mpq/set_den.c b/mpq/set_den.c

new file mode 100644 (file)

index 0000000..641c97d
--- /dev/null
+++ b/mpq/set_den.c
@@ -0,0 +1,34 @@
+/* mpq_set_den(dest,den) -- Set the denominator of DEST from DEN.
+
+Copyright 1991, 1994, 1995, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_set_den (MP_RAT *dest, const MP_INT *den)
+{
+  mp_size_t size = den->_mp_size;
+  mp_size_t abs_size = ABS (size);
+
+  if (dest->_mp_den._mp_alloc < abs_size)
+    _mpz_realloc (&(dest->_mp_den), abs_size);
+
+  MPN_COPY (dest->_mp_den._mp_d, den->_mp_d, abs_size);
+  dest->_mp_den._mp_size = size;
+}
diff --git a/mpq/set_f.c b/mpq/set_f.c

new file mode 100644 (file)

index 0000000..ba15844
--- /dev/null
+++ b/mpq/set_f.c
@@ -0,0 +1,99 @@
+/* mpq_set_f -- set an mpq from an mpf.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpq_set_f (mpq_ptr q, mpf_srcptr f)
+{
+  mp_size_t  fexp = EXP(f);
+  mp_ptr     fptr = PTR(f);
+  mp_size_t  fsize = SIZ(f);
+  mp_size_t  abs_fsize = ABS(fsize);
+  mp_limb_t  flow;
+
+  if (fsize == 0)
+    {
+      /* set q=0 */
+      q->_mp_num._mp_size = 0;
+      q->_mp_den._mp_size = 1;
+      q->_mp_den._mp_d[0] = 1;
+      return;
+    }
+
+  /* strip low zero limbs from f */
+  flow = *fptr;
+  MPN_STRIP_LOW_ZEROS_NOT_ZERO (fptr, abs_fsize, flow);
+
+  if (fexp >= abs_fsize)
+    {
+      /* radix point is to the right of the limbs, no denominator */
+      mp_ptr  num_ptr;
+
+      MPZ_REALLOC (mpq_numref (q), fexp);
+      num_ptr = q->_mp_num._mp_d;
+      MPN_ZERO (num_ptr, fexp - abs_fsize);
+      MPN_COPY (num_ptr + fexp - abs_fsize, fptr, abs_fsize);
+
+      q->_mp_num._mp_size = fsize >= 0 ? fexp : -fexp;
+      q->_mp_den._mp_size = 1;
+      q->_mp_den._mp_d[0] = 1;
+    }
+  else
+    {
+      /* radix point is within or to the left of the limbs, use denominator */
+      mp_ptr     num_ptr, den_ptr;
+      mp_size_t  den_size;
+
+      den_size = abs_fsize - fexp;
+      MPZ_REALLOC (mpq_numref (q), abs_fsize);
+      MPZ_REALLOC (mpq_denref (q), den_size+1);
+      num_ptr = q->_mp_num._mp_d;
+      den_ptr = q->_mp_den._mp_d;
+
+      if (flow & 1)
+        {
+          /* no powers of two to strip from numerator */
+
+          MPN_COPY (num_ptr, fptr, abs_fsize);
+          MPN_ZERO (den_ptr, den_size);
+          den_ptr[den_size] = 1;
+        }
+      else
+        {
+          /* right shift numerator, adjust denominator accordingly */
+          int  shift;
+
+          den_size--;
+          count_trailing_zeros (shift, flow);
+
+          mpn_rshift (num_ptr, fptr, abs_fsize, shift);
+          abs_fsize -= (num_ptr[abs_fsize-1] == 0);
+
+          MPN_ZERO (den_ptr, den_size);
+          den_ptr[den_size] = GMP_LIMB_HIGHBIT >> (shift-1);
+        }
+
+      q->_mp_num._mp_size = fsize >= 0 ? abs_fsize : -abs_fsize;
+      q->_mp_den._mp_size = den_size + 1;
+    }
+}
diff --git a/mpq/set_num.c b/mpq/set_num.c

new file mode 100644 (file)

index 0000000..6c3c564
--- /dev/null
+++ b/mpq/set_num.c
@@ -0,0 +1,34 @@
+/* mpq_set_num(dest,num) -- Set the numerator of DEST from NUM.
+
+Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_set_num (MP_RAT *dest, const MP_INT *num)
+{
+  mp_size_t size = num->_mp_size;
+  mp_size_t abs_size = ABS (size);
+
+  if (dest->_mp_num._mp_alloc < abs_size)
+    _mpz_realloc (&(dest->_mp_num), abs_size);
+
+  MPN_COPY (dest->_mp_num._mp_d, num->_mp_d, abs_size);
+  dest->_mp_num._mp_size = size;
+}
diff --git a/mpq/set_si.c b/mpq/set_si.c

new file mode 100644 (file)

index 0000000..2d2bd4a
--- /dev/null
+++ b/mpq/set_si.c
@@ -0,0 +1,54 @@
+/* mpq_set_si(dest,ulong_num,ulong_den) -- Set DEST to the rational number
+   ULONG_NUM/ULONG_DEN.
+
+Copyright 1991, 1994, 1995, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_set_si (MP_RAT *dest, signed long int num, unsigned long int den)
+{
+  unsigned long int abs_num;
+
+  if (GMP_NUMB_BITS < BITS_PER_ULONG)
+    {
+      if (num == 0)  /* Canonicalize 0/d to 0/1.  */
+        den = 1;
+      mpz_set_si (mpq_numref (dest), num);
+      mpz_set_ui (mpq_denref (dest), den);
+      return;
+    }
+
+  abs_num = ABS_CAST (unsigned long, num);
+
+  if (num == 0)
+    {
+      /* Canonicalize 0/d to 0/1.  */
+      den = 1;
+      dest->_mp_num._mp_size = 0;
+    }
+  else
+    {
+      dest->_mp_num._mp_d[0] = abs_num;
+      dest->_mp_num._mp_size = num > 0 ? 1 : -1;
+    }
+
+  dest->_mp_den._mp_d[0] = den;
+  dest->_mp_den._mp_size = (den != 0);
+}
diff --git a/mpq/set_str.c b/mpq/set_str.c

new file mode 100644 (file)

index 0000000..fd1c415
--- /dev/null
+++ b/mpq/set_str.c
@@ -0,0 +1,58 @@
+/* mpq_set_str -- string to mpq conversion.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* FIXME: Would like an mpz_set_mem (or similar) accepting a pointer and
+   length so we wouldn't have to copy the numerator just to null-terminate
+   it.  */
+
+int
+mpq_set_str (mpq_ptr q, const char *str, int base)
+{
+  const char  *slash;
+  char        *num;
+  size_t      numlen;
+  int         ret;
+
+  slash = strchr (str, '/');
+  if (slash == NULL)
+    {
+      q->_mp_den._mp_size = 1;
+      q->_mp_den._mp_d[0] = 1;
+
+      return mpz_set_str (mpq_numref(q), str, base);
+    }
+
+  numlen = slash - str;
+  num = __GMP_ALLOCATE_FUNC_TYPE (numlen+1, char);
+  memcpy (num, str, numlen);
+  num[numlen] = '\0';
+  ret = mpz_set_str (mpq_numref(q), num, base);
+  (*__gmp_free_func) (num, numlen+1);
+
+  if (ret != 0)
+    return ret;
+
+  return mpz_set_str (mpq_denref(q), slash+1, base);
+}
diff --git a/mpq/set_ui.c b/mpq/set_ui.c

new file mode 100644 (file)

index 0000000..8f0a9cd
--- /dev/null
+++ b/mpq/set_ui.c
@@ -0,0 +1,50 @@
+/* mpq_set_ui(dest,ulong_num,ulong_den) -- Set DEST to the rational number
+   ULONG_NUM/ULONG_DEN.
+
+Copyright 1991, 1994, 1995, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_set_ui (MP_RAT *dest, unsigned long int num, unsigned long int den)
+{
+  if (GMP_NUMB_BITS < BITS_PER_ULONG)
+    {
+      if (num == 0)  /* Canonicalize 0/d to 0/1.  */
+        den = 1;
+      mpz_set_ui (mpq_numref (dest), num);
+      mpz_set_ui (mpq_denref (dest), den);
+      return;
+    }
+
+  if (num == 0)
+    {
+      /* Canonicalize 0/n to 0/1.  */
+      den = 1;
+      dest->_mp_num._mp_size = 0;
+    }
+  else
+    {
+      dest->_mp_num._mp_d[0] = num;
+      dest->_mp_num._mp_size = 1;
+    }
+
+  dest->_mp_den._mp_d[0] = den;
+  dest->_mp_den._mp_size = (den != 0);
+}
diff --git a/mpq/set_z.c b/mpq/set_z.c

new file mode 100644 (file)

index 0000000..8ca980c
--- /dev/null
+++ b/mpq/set_z.c
@@ -0,0 +1,38 @@
+/* mpq_set_z (dest,src) -- Set DEST to SRC.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_set_z (mpq_ptr dest, mpz_srcptr src)
+{
+  mp_size_t num_size;
+  mp_size_t abs_num_size;
+
+  num_size = src->_mp_size;
+  abs_num_size = ABS (num_size);
+  if (dest->_mp_num._mp_alloc < abs_num_size)
+    _mpz_realloc (&(dest->_mp_num), abs_num_size);
+  MPN_COPY (dest->_mp_num._mp_d, src->_mp_d, abs_num_size);
+  dest->_mp_num._mp_size = num_size;
+
+  dest->_mp_den._mp_d[0] = 1;
+  dest->_mp_den._mp_size = 1;
+}
diff --git a/mpq/swap.c b/mpq/swap.c

new file mode 100644 (file)

index 0000000..e1d96cc
--- /dev/null
+++ b/mpq/swap.c
@@ -0,0 +1,60 @@
+/* mpq_swap (U, V) -- Swap U and V.
+
+Copyright 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpq_swap (mpq_ptr u, mpq_ptr v) __GMP_NOTHROW
+{
+  mp_ptr up, vp;
+  mp_size_t usize, vsize;
+  mp_size_t ualloc, valloc;
+
+  ualloc = u->_mp_num._mp_alloc;
+  valloc = v->_mp_num._mp_alloc;
+  v->_mp_num._mp_alloc = ualloc;
+  u->_mp_num._mp_alloc = valloc;
+
+  usize = u->_mp_num._mp_size;
+  vsize = v->_mp_num._mp_size;
+  v->_mp_num._mp_size = usize;
+  u->_mp_num._mp_size = vsize;
+
+  up = u->_mp_num._mp_d;
+  vp = v->_mp_num._mp_d;
+  v->_mp_num._mp_d = up;
+  u->_mp_num._mp_d = vp;
+
+
+  ualloc = u->_mp_den._mp_alloc;
+  valloc = v->_mp_den._mp_alloc;
+  v->_mp_den._mp_alloc = ualloc;
+  u->_mp_den._mp_alloc = valloc;
+
+  usize = u->_mp_den._mp_size;
+  vsize = v->_mp_den._mp_size;
+  v->_mp_den._mp_size = usize;
+  u->_mp_den._mp_size = vsize;
+
+  up = u->_mp_den._mp_d;
+  vp = v->_mp_den._mp_d;
+  v->_mp_den._mp_d = up;
+  u->_mp_den._mp_d = vp;
+}
diff --git a/mpz/Makefile.am b/mpz/Makefile.am

new file mode 100644 (file)

index 0000000..74c2c34
--- /dev/null
+++ b/mpz/Makefile.am
@@ -0,0 +1,60 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = libmpz.la
+libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
+  add.c add_ui.c abs.c aorsmul.c aorsmul_i.c and.c array_init.c \
+  bin_ui.c bin_uiui.c cdiv_q.c \
+  cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
+  cfdiv_q_2exp.c cfdiv_r_2exp.c \
+  clear.c clears.c clrbit.c \
+  cmp.c cmp_d.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_d.c cmpabs_ui.c \
+  com.c combit.c \
+  cong.c cong_2exp.c cong_ui.c \
+  divexact.c divegcd.c dive_ui.c divis.c divis_ui.c divis_2exp.c \
+  dump.c export.c fac_ui.c fdiv_q.c fdiv_q_ui.c \
+  fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_ui.c fdiv_ui.c \
+  fib_ui.c fib2_ui.c \
+  fits_sint.c fits_slong.c fits_sshort.c \
+  fits_uint.c fits_ulong.c fits_ushort.c \
+  gcd.c gcd_ui.c gcdext.c get_d.c get_d_2exp.c get_si.c \
+  get_str.c get_ui.c getlimbn.c hamdist.c \
+  import.c init.c init2.c inits.c inp_raw.c inp_str.c \
+  invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
+  jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
+  lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c millerrabin.c \
+  mod.c mul.c mul_2exp.c mul_si.c mul_ui.c n_pow_ui.c neg.c nextprime.c \
+  out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
+  powm_sec.c powm_ui.c pprime_p.c random.c random2.c \
+  realloc.c realloc2.c remove.c root.c rootrem.c rrandomb.c \
+  scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
+  set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c \
+  swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
+  tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
+  ui_sub.c urandomb.c urandomm.c xor.c
+
+# These are BUILT_SOURCES at the top-level, so normally they're built before
+# recursing into this directory.
+#
+fac_ui.h:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpz/fac_ui.h
diff --git a/mpz/Makefile.in b/mpz/Makefile.in

new file mode 100644 (file)

index 0000000..b0b75ba
--- /dev/null
+++ b/mpz/Makefile.in
@@ -0,0 +1,960 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = mpz
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libmpz_la_LIBADD =
+am_libmpz_la_OBJECTS = add$U.lo add_ui$U.lo abs$U.lo aorsmul$U.lo \
+       aorsmul_i$U.lo and$U.lo array_init$U.lo bin_ui$U.lo \
+       bin_uiui$U.lo cdiv_q$U.lo cdiv_q_ui$U.lo cdiv_qr$U.lo \
+       cdiv_qr_ui$U.lo cdiv_r$U.lo cdiv_r_ui$U.lo cdiv_ui$U.lo \
+       cfdiv_q_2exp$U.lo cfdiv_r_2exp$U.lo clear$U.lo clears$U.lo \
+       clrbit$U.lo cmp$U.lo cmp_d$U.lo cmp_si$U.lo cmp_ui$U.lo \
+       cmpabs$U.lo cmpabs_d$U.lo cmpabs_ui$U.lo com$U.lo combit$U.lo \
+       cong$U.lo cong_2exp$U.lo cong_ui$U.lo divexact$U.lo \
+       divegcd$U.lo dive_ui$U.lo divis$U.lo divis_ui$U.lo \
+       divis_2exp$U.lo dump$U.lo export$U.lo fac_ui$U.lo fdiv_q$U.lo \
+       fdiv_q_ui$U.lo fdiv_qr$U.lo fdiv_qr_ui$U.lo fdiv_r$U.lo \
+       fdiv_r_ui$U.lo fdiv_ui$U.lo fib_ui$U.lo fib2_ui$U.lo \
+       fits_sint$U.lo fits_slong$U.lo fits_sshort$U.lo fits_uint$U.lo \
+       fits_ulong$U.lo fits_ushort$U.lo gcd$U.lo gcd_ui$U.lo \
+       gcdext$U.lo get_d$U.lo get_d_2exp$U.lo get_si$U.lo \
+       get_str$U.lo get_ui$U.lo getlimbn$U.lo hamdist$U.lo \
+       import$U.lo init$U.lo init2$U.lo inits$U.lo inp_raw$U.lo \
+       inp_str$U.lo invert$U.lo ior$U.lo iset$U.lo iset_d$U.lo \
+       iset_si$U.lo iset_str$U.lo iset_ui$U.lo jacobi$U.lo \
+       kronsz$U.lo kronuz$U.lo kronzs$U.lo kronzu$U.lo lcm$U.lo \
+       lcm_ui$U.lo lucnum_ui$U.lo lucnum2_ui$U.lo millerrabin$U.lo \
+       mod$U.lo mul$U.lo mul_2exp$U.lo mul_si$U.lo mul_ui$U.lo \
+       n_pow_ui$U.lo neg$U.lo nextprime$U.lo out_raw$U.lo \
+       out_str$U.lo perfpow$U.lo perfsqr$U.lo popcount$U.lo \
+       pow_ui$U.lo powm$U.lo powm_sec$U.lo powm_ui$U.lo pprime_p$U.lo \
+       random$U.lo random2$U.lo realloc$U.lo realloc2$U.lo \
+       remove$U.lo root$U.lo rootrem$U.lo rrandomb$U.lo scan0$U.lo \
+       scan1$U.lo set$U.lo set_d$U.lo set_f$U.lo set_q$U.lo \
+       set_si$U.lo set_str$U.lo set_ui$U.lo setbit$U.lo size$U.lo \
+       sizeinbase$U.lo sqrt$U.lo sqrtrem$U.lo sub$U.lo sub_ui$U.lo \
+       swap$U.lo tdiv_ui$U.lo tdiv_q$U.lo tdiv_q_2exp$U.lo \
+       tdiv_q_ui$U.lo tdiv_qr$U.lo tdiv_qr_ui$U.lo tdiv_r$U.lo \
+       tdiv_r_2exp$U.lo tdiv_r_ui$U.lo tstbit$U.lo ui_pow_ui$U.lo \
+       ui_sub$U.lo urandomb$U.lo urandomm$U.lo xor$U.lo
+libmpz_la_OBJECTS = $(am_libmpz_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libmpz_la_SOURCES)
+DIST_SOURCES = $(libmpz_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = libmpz.la
+libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
+  add.c add_ui.c abs.c aorsmul.c aorsmul_i.c and.c array_init.c \
+  bin_ui.c bin_uiui.c cdiv_q.c \
+  cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
+  cfdiv_q_2exp.c cfdiv_r_2exp.c \
+  clear.c clears.c clrbit.c \
+  cmp.c cmp_d.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_d.c cmpabs_ui.c \
+  com.c combit.c \
+  cong.c cong_2exp.c cong_ui.c \
+  divexact.c divegcd.c dive_ui.c divis.c divis_ui.c divis_2exp.c \
+  dump.c export.c fac_ui.c fdiv_q.c fdiv_q_ui.c \
+  fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_ui.c fdiv_ui.c \
+  fib_ui.c fib2_ui.c \
+  fits_sint.c fits_slong.c fits_sshort.c \
+  fits_uint.c fits_ulong.c fits_ushort.c \
+  gcd.c gcd_ui.c gcdext.c get_d.c get_d_2exp.c get_si.c \
+  get_str.c get_ui.c getlimbn.c hamdist.c \
+  import.c init.c init2.c inits.c inp_raw.c inp_str.c \
+  invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
+  jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
+  lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c millerrabin.c \
+  mod.c mul.c mul_2exp.c mul_si.c mul_ui.c n_pow_ui.c neg.c nextprime.c \
+  out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
+  powm_sec.c powm_ui.c pprime_p.c random.c random2.c \
+  realloc.c realloc2.c remove.c root.c rootrem.c rrandomb.c \
+  scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
+  set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c \
+  swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
+  tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
+  ui_sub.c urandomb.c urandomm.c xor.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpz/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps mpz/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES) 
+       $(LINK)  $(libmpz_la_OBJECTS) $(libmpz_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+abs_.c: abs.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_.c: add.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_ui_.c: add_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_ui.c; then echo $(srcdir)/add_ui.c; else echo add_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+and_.c: and.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and.c; then echo $(srcdir)/and.c; else echo and.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+aorsmul_.c: aorsmul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aorsmul.c; then echo $(srcdir)/aorsmul.c; else echo aorsmul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+aorsmul_i_.c: aorsmul_i.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aorsmul_i.c; then echo $(srcdir)/aorsmul_i.c; else echo aorsmul_i.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+array_init_.c: array_init.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/array_init.c; then echo $(srcdir)/array_init.c; else echo array_init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+bin_ui_.c: bin_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bin_ui.c; then echo $(srcdir)/bin_ui.c; else echo bin_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+bin_uiui_.c: bin_uiui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bin_uiui.c; then echo $(srcdir)/bin_uiui.c; else echo bin_uiui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_q_.c: cdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_q.c; then echo $(srcdir)/cdiv_q.c; else echo cdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_q_ui_.c: cdiv_q_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_q_ui.c; then echo $(srcdir)/cdiv_q_ui.c; else echo cdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_qr_.c: cdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_qr.c; then echo $(srcdir)/cdiv_qr.c; else echo cdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_qr_ui_.c: cdiv_qr_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_qr_ui.c; then echo $(srcdir)/cdiv_qr_ui.c; else echo cdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_r_.c: cdiv_r.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_r.c; then echo $(srcdir)/cdiv_r.c; else echo cdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_r_ui_.c: cdiv_r_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_r_ui.c; then echo $(srcdir)/cdiv_r_ui.c; else echo cdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cdiv_ui_.c: cdiv_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_ui.c; then echo $(srcdir)/cdiv_ui.c; else echo cdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cfdiv_q_2exp_.c: cfdiv_q_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cfdiv_q_2exp.c; then echo $(srcdir)/cfdiv_q_2exp.c; else echo cfdiv_q_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cfdiv_r_2exp_.c: cfdiv_r_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cfdiv_r_2exp.c; then echo $(srcdir)/cfdiv_r_2exp.c; else echo cfdiv_r_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clear_.c: clear.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clears_.c: clears.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clrbit_.c: clrbit.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clrbit.c; then echo $(srcdir)/clrbit.c; else echo clrbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_.c: cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_d_.c: cmp_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_d.c; then echo $(srcdir)/cmp_d.c; else echo cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_si_.c: cmp_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmpabs_.c: cmpabs.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs.c; then echo $(srcdir)/cmpabs.c; else echo cmpabs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmpabs_d_.c: cmpabs_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs_d.c; then echo $(srcdir)/cmpabs_d.c; else echo cmpabs_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmpabs_ui_.c: cmpabs_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs_ui.c; then echo $(srcdir)/cmpabs_ui.c; else echo cmpabs_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+com_.c: com.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com.c; then echo $(srcdir)/com.c; else echo com.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+combit_.c: combit.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/combit.c; then echo $(srcdir)/combit.c; else echo combit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cong_.c: cong.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong.c; then echo $(srcdir)/cong.c; else echo cong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cong_2exp_.c: cong_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong_2exp.c; then echo $(srcdir)/cong_2exp.c; else echo cong_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cong_ui_.c: cong_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong_ui.c; then echo $(srcdir)/cong_ui.c; else echo cong_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dive_ui_.c: dive_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_ui.c; then echo $(srcdir)/dive_ui.c; else echo dive_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divegcd_.c: divegcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divegcd.c; then echo $(srcdir)/divegcd.c; else echo divegcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divexact_.c: divexact.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divexact.c; then echo $(srcdir)/divexact.c; else echo divexact.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divis_.c: divis.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divis_2exp_.c: divis_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis_2exp.c; then echo $(srcdir)/divis_2exp.c; else echo divis_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divis_ui_.c: divis_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis_ui.c; then echo $(srcdir)/divis_ui.c; else echo divis_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dump_.c: dump.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+export_.c: export.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/export.c; then echo $(srcdir)/export.c; else echo export.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fac_ui_.c: fac_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fac_ui.c; then echo $(srcdir)/fac_ui.c; else echo fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_q_.c: fdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_q.c; then echo $(srcdir)/fdiv_q.c; else echo fdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_q_ui_.c: fdiv_q_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_q_ui.c; then echo $(srcdir)/fdiv_q_ui.c; else echo fdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_qr_.c: fdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_qr.c; then echo $(srcdir)/fdiv_qr.c; else echo fdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_qr_ui_.c: fdiv_qr_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_qr_ui.c; then echo $(srcdir)/fdiv_qr_ui.c; else echo fdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_r_.c: fdiv_r.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_r.c; then echo $(srcdir)/fdiv_r.c; else echo fdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_r_ui_.c: fdiv_r_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_r_ui.c; then echo $(srcdir)/fdiv_r_ui.c; else echo fdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fdiv_ui_.c: fdiv_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_ui.c; then echo $(srcdir)/fdiv_ui.c; else echo fdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fib_ui_.c: fib_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_ui.c; then echo $(srcdir)/fib_ui.c; else echo fib_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_sint_.c: fits_sint.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sint.c; then echo $(srcdir)/fits_sint.c; else echo fits_sint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_slong_.c: fits_slong.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_slong.c; then echo $(srcdir)/fits_slong.c; else echo fits_slong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_sshort_.c: fits_sshort.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sshort.c; then echo $(srcdir)/fits_sshort.c; else echo fits_sshort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_uint_.c: fits_uint.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_uint.c; then echo $(srcdir)/fits_uint.c; else echo fits_uint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_ulong_.c: fits_ulong.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ulong.c; then echo $(srcdir)/fits_ulong.c; else echo fits_ulong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fits_ushort_.c: fits_ushort.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ushort.c; then echo $(srcdir)/fits_ushort.c; else echo fits_ushort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: gcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_ui_.c: gcd_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_ui.c; then echo $(srcdir)/gcd_ui.c; else echo gcd_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_.c: gcdext.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_.c: get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_2exp_.c: get_d_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d_2exp.c; then echo $(srcdir)/get_d_2exp.c; else echo get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_si_.c: get_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_si.c; then echo $(srcdir)/get_si.c; else echo get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_ui_.c: get_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_ui.c; then echo $(srcdir)/get_ui.c; else echo get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+getlimbn_.c: getlimbn.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/getlimbn.c; then echo $(srcdir)/getlimbn.c; else echo getlimbn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hamdist_.c: hamdist.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+import_.c: import.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/import.c; then echo $(srcdir)/import.c; else echo import.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+init_.c: init.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+init2_.c: init2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init2.c; then echo $(srcdir)/init2.c; else echo init2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inits_.c: inits.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inp_raw_.c: inp_raw.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_raw.c; then echo $(srcdir)/inp_raw.c; else echo inp_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+inp_str_.c: inp_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invert_.c: invert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ior_.c: ior.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior.c; then echo $(srcdir)/ior.c; else echo ior.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_.c: iset.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset.c; then echo $(srcdir)/iset.c; else echo iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_d_.c: iset_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_d.c; then echo $(srcdir)/iset_d.c; else echo iset_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_si_.c: iset_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_si.c; then echo $(srcdir)/iset_si.c; else echo iset_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_str_.c: iset_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_str.c; then echo $(srcdir)/iset_str.c; else echo iset_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iset_ui_.c: iset_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_ui.c; then echo $(srcdir)/iset_ui.c; else echo iset_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacobi_.c: jacobi.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacobi.c; then echo $(srcdir)/jacobi.c; else echo jacobi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+kronsz_.c: kronsz.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronsz.c; then echo $(srcdir)/kronsz.c; else echo kronsz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+kronuz_.c: kronuz.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronuz.c; then echo $(srcdir)/kronuz.c; else echo kronuz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+kronzs_.c: kronzs.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronzs.c; then echo $(srcdir)/kronzs.c; else echo kronzs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+kronzu_.c: kronzu.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronzu.c; then echo $(srcdir)/kronzu.c; else echo kronzu.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+lcm_.c: lcm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lcm.c; then echo $(srcdir)/lcm.c; else echo lcm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+lcm_ui_.c: lcm_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lcm_ui.c; then echo $(srcdir)/lcm_ui.c; else echo lcm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+lucnum2_ui_.c: lucnum2_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lucnum2_ui.c; then echo $(srcdir)/lucnum2_ui.c; else echo lucnum2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+lucnum_ui_.c: lucnum_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lucnum_ui.c; then echo $(srcdir)/lucnum_ui.c; else echo lucnum_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+millerrabin_.c: millerrabin.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/millerrabin.c; then echo $(srcdir)/millerrabin.c; else echo millerrabin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_.c: mod.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod.c; then echo $(srcdir)/mod.c; else echo mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_2exp_.c: mul_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2exp.c; then echo $(srcdir)/mul_2exp.c; else echo mul_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_si_.c: mul_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_si.c; then echo $(srcdir)/mul_si.c; else echo mul_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_ui_.c: mul_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_ui.c; then echo $(srcdir)/mul_ui.c; else echo mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+n_pow_ui_.c: n_pow_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/n_pow_ui.c; then echo $(srcdir)/n_pow_ui.c; else echo n_pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+neg_.c: neg.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nextprime_.c: nextprime.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nextprime.c; then echo $(srcdir)/nextprime.c; else echo nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+out_raw_.c: out_raw.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_raw.c; then echo $(srcdir)/out_raw.c; else echo out_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+out_str_.c: out_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+perfpow_.c: perfpow.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfpow.c; then echo $(srcdir)/perfpow.c; else echo perfpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+perfsqr_.c: perfsqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+popcount_.c: popcount.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pow_ui_.c: pow_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_ui.c; then echo $(srcdir)/pow_ui.c; else echo pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_.c: powm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm.c; then echo $(srcdir)/powm.c; else echo powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_sec_.c: powm_sec.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_sec.c; then echo $(srcdir)/powm_sec.c; else echo powm_sec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_ui_.c: powm_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_ui.c; then echo $(srcdir)/powm_ui.c; else echo powm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pprime_p_.c: pprime_p.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pprime_p.c; then echo $(srcdir)/pprime_p.c; else echo pprime_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random_.c: random.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random2_.c: random2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+realloc_.c: realloc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/realloc.c; then echo $(srcdir)/realloc.c; else echo realloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+realloc2_.c: realloc2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/realloc2.c; then echo $(srcdir)/realloc2.c; else echo realloc2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+remove_.c: remove.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/remove.c; then echo $(srcdir)/remove.c; else echo remove.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+root_.c: root.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/root.c; then echo $(srcdir)/root.c; else echo root.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rootrem_.c: rootrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rrandomb_.c: rrandomb.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rrandomb.c; then echo $(srcdir)/rrandomb.c; else echo rrandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scan0_.c: scan0.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scan1_.c: scan1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_.c: set.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_d_.c: set_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_f_.c: set_f.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_f.c; then echo $(srcdir)/set_f.c; else echo set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_q_.c: set_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_q.c; then echo $(srcdir)/set_q.c; else echo set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_si_.c: set_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_ui_.c: set_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+setbit_.c: setbit.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/setbit.c; then echo $(srcdir)/setbit.c; else echo setbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+size_.c: size.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/size.c; then echo $(srcdir)/size.c; else echo size.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sizeinbase_.c: sizeinbase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sizeinbase.c; then echo $(srcdir)/sizeinbase.c; else echo sizeinbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrt_.c: sqrt.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt.c; then echo $(srcdir)/sqrt.c; else echo sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_.c: sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_ui_.c: sub_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_ui.c; then echo $(srcdir)/sub_ui.c; else echo sub_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+swap_.c: swap.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_q_.c: tdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q.c; then echo $(srcdir)/tdiv_q.c; else echo tdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_q_2exp_.c: tdiv_q_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q_2exp.c; then echo $(srcdir)/tdiv_q_2exp.c; else echo tdiv_q_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_q_ui_.c: tdiv_q_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q_ui.c; then echo $(srcdir)/tdiv_q_ui.c; else echo tdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_ui_.c: tdiv_qr_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr_ui.c; then echo $(srcdir)/tdiv_qr_ui.c; else echo tdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_r_.c: tdiv_r.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r.c; then echo $(srcdir)/tdiv_r.c; else echo tdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_r_2exp_.c: tdiv_r_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r_2exp.c; then echo $(srcdir)/tdiv_r_2exp.c; else echo tdiv_r_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_r_ui_.c: tdiv_r_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r_ui.c; then echo $(srcdir)/tdiv_r_ui.c; else echo tdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_ui_.c: tdiv_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_ui.c; then echo $(srcdir)/tdiv_ui.c; else echo tdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tstbit_.c: tstbit.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tstbit.c; then echo $(srcdir)/tstbit.c; else echo tstbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ui_pow_ui_.c: ui_pow_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_pow_ui.c; then echo $(srcdir)/ui_pow_ui.c; else echo ui_pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ui_sub_.c: ui_sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_sub.c; then echo $(srcdir)/ui_sub.c; else echo ui_sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+urandomb_.c: urandomb.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomb.c; then echo $(srcdir)/urandomb.c; else echo urandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+urandomm_.c: urandomm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomm.c; then echo $(srcdir)/urandomm.c; else echo urandomm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+xor_.c: xor.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor.c; then echo $(srcdir)/xor.c; else echo xor.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+abs_.$(OBJEXT) abs_.lo add_.$(OBJEXT) add_.lo add_ui_.$(OBJEXT) \
+add_ui_.lo and_.$(OBJEXT) and_.lo aorsmul_.$(OBJEXT) aorsmul_.lo \
+aorsmul_i_.$(OBJEXT) aorsmul_i_.lo array_init_.$(OBJEXT) \
+array_init_.lo bin_ui_.$(OBJEXT) bin_ui_.lo bin_uiui_.$(OBJEXT) \
+bin_uiui_.lo cdiv_q_.$(OBJEXT) cdiv_q_.lo cdiv_q_ui_.$(OBJEXT) \
+cdiv_q_ui_.lo cdiv_qr_.$(OBJEXT) cdiv_qr_.lo cdiv_qr_ui_.$(OBJEXT) \
+cdiv_qr_ui_.lo cdiv_r_.$(OBJEXT) cdiv_r_.lo cdiv_r_ui_.$(OBJEXT) \
+cdiv_r_ui_.lo cdiv_ui_.$(OBJEXT) cdiv_ui_.lo cfdiv_q_2exp_.$(OBJEXT) \
+cfdiv_q_2exp_.lo cfdiv_r_2exp_.$(OBJEXT) cfdiv_r_2exp_.lo \
+clear_.$(OBJEXT) clear_.lo clears_.$(OBJEXT) clears_.lo \
+clrbit_.$(OBJEXT) clrbit_.lo cmp_.$(OBJEXT) cmp_.lo cmp_d_.$(OBJEXT) \
+cmp_d_.lo cmp_si_.$(OBJEXT) cmp_si_.lo cmp_ui_.$(OBJEXT) cmp_ui_.lo \
+cmpabs_.$(OBJEXT) cmpabs_.lo cmpabs_d_.$(OBJEXT) cmpabs_d_.lo \
+cmpabs_ui_.$(OBJEXT) cmpabs_ui_.lo com_.$(OBJEXT) com_.lo \
+combit_.$(OBJEXT) combit_.lo cong_.$(OBJEXT) cong_.lo \
+cong_2exp_.$(OBJEXT) cong_2exp_.lo cong_ui_.$(OBJEXT) cong_ui_.lo \
+dive_ui_.$(OBJEXT) dive_ui_.lo divegcd_.$(OBJEXT) divegcd_.lo \
+divexact_.$(OBJEXT) divexact_.lo divis_.$(OBJEXT) divis_.lo \
+divis_2exp_.$(OBJEXT) divis_2exp_.lo divis_ui_.$(OBJEXT) divis_ui_.lo \
+dump_.$(OBJEXT) dump_.lo export_.$(OBJEXT) export_.lo \
+fac_ui_.$(OBJEXT) fac_ui_.lo fdiv_q_.$(OBJEXT) fdiv_q_.lo \
+fdiv_q_ui_.$(OBJEXT) fdiv_q_ui_.lo fdiv_qr_.$(OBJEXT) fdiv_qr_.lo \
+fdiv_qr_ui_.$(OBJEXT) fdiv_qr_ui_.lo fdiv_r_.$(OBJEXT) fdiv_r_.lo \
+fdiv_r_ui_.$(OBJEXT) fdiv_r_ui_.lo fdiv_ui_.$(OBJEXT) fdiv_ui_.lo \
+fib2_ui_.$(OBJEXT) fib2_ui_.lo fib_ui_.$(OBJEXT) fib_ui_.lo \
+fits_sint_.$(OBJEXT) fits_sint_.lo fits_slong_.$(OBJEXT) \
+fits_slong_.lo fits_sshort_.$(OBJEXT) fits_sshort_.lo \
+fits_uint_.$(OBJEXT) fits_uint_.lo fits_ulong_.$(OBJEXT) \
+fits_ulong_.lo fits_ushort_.$(OBJEXT) fits_ushort_.lo gcd_.$(OBJEXT) \
+gcd_.lo gcd_ui_.$(OBJEXT) gcd_ui_.lo gcdext_.$(OBJEXT) gcdext_.lo \
+get_d_.$(OBJEXT) get_d_.lo get_d_2exp_.$(OBJEXT) get_d_2exp_.lo \
+get_si_.$(OBJEXT) get_si_.lo get_str_.$(OBJEXT) get_str_.lo \
+get_ui_.$(OBJEXT) get_ui_.lo getlimbn_.$(OBJEXT) getlimbn_.lo \
+hamdist_.$(OBJEXT) hamdist_.lo import_.$(OBJEXT) import_.lo \
+init_.$(OBJEXT) init_.lo init2_.$(OBJEXT) init2_.lo inits_.$(OBJEXT) \
+inits_.lo inp_raw_.$(OBJEXT) inp_raw_.lo inp_str_.$(OBJEXT) \
+inp_str_.lo invert_.$(OBJEXT) invert_.lo ior_.$(OBJEXT) ior_.lo \
+iset_.$(OBJEXT) iset_.lo iset_d_.$(OBJEXT) iset_d_.lo \
+iset_si_.$(OBJEXT) iset_si_.lo iset_str_.$(OBJEXT) iset_str_.lo \
+iset_ui_.$(OBJEXT) iset_ui_.lo jacobi_.$(OBJEXT) jacobi_.lo \
+kronsz_.$(OBJEXT) kronsz_.lo kronuz_.$(OBJEXT) kronuz_.lo \
+kronzs_.$(OBJEXT) kronzs_.lo kronzu_.$(OBJEXT) kronzu_.lo \
+lcm_.$(OBJEXT) lcm_.lo lcm_ui_.$(OBJEXT) lcm_ui_.lo \
+lucnum2_ui_.$(OBJEXT) lucnum2_ui_.lo lucnum_ui_.$(OBJEXT) \
+lucnum_ui_.lo millerrabin_.$(OBJEXT) millerrabin_.lo mod_.$(OBJEXT) \
+mod_.lo mul_.$(OBJEXT) mul_.lo mul_2exp_.$(OBJEXT) mul_2exp_.lo \
+mul_si_.$(OBJEXT) mul_si_.lo mul_ui_.$(OBJEXT) mul_ui_.lo \
+n_pow_ui_.$(OBJEXT) n_pow_ui_.lo neg_.$(OBJEXT) neg_.lo \
+nextprime_.$(OBJEXT) nextprime_.lo out_raw_.$(OBJEXT) out_raw_.lo \
+out_str_.$(OBJEXT) out_str_.lo perfpow_.$(OBJEXT) perfpow_.lo \
+perfsqr_.$(OBJEXT) perfsqr_.lo popcount_.$(OBJEXT) popcount_.lo \
+pow_ui_.$(OBJEXT) pow_ui_.lo powm_.$(OBJEXT) powm_.lo \
+powm_sec_.$(OBJEXT) powm_sec_.lo powm_ui_.$(OBJEXT) powm_ui_.lo \
+pprime_p_.$(OBJEXT) pprime_p_.lo random_.$(OBJEXT) random_.lo \
+random2_.$(OBJEXT) random2_.lo realloc_.$(OBJEXT) realloc_.lo \
+realloc2_.$(OBJEXT) realloc2_.lo remove_.$(OBJEXT) remove_.lo \
+root_.$(OBJEXT) root_.lo rootrem_.$(OBJEXT) rootrem_.lo \
+rrandomb_.$(OBJEXT) rrandomb_.lo scan0_.$(OBJEXT) scan0_.lo \
+scan1_.$(OBJEXT) scan1_.lo set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) \
+set_d_.lo set_f_.$(OBJEXT) set_f_.lo set_q_.$(OBJEXT) set_q_.lo \
+set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
+set_ui_.$(OBJEXT) set_ui_.lo setbit_.$(OBJEXT) setbit_.lo \
+size_.$(OBJEXT) size_.lo sizeinbase_.$(OBJEXT) sizeinbase_.lo \
+sqrt_.$(OBJEXT) sqrt_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) \
+sub_.lo sub_ui_.$(OBJEXT) sub_ui_.lo swap_.$(OBJEXT) swap_.lo \
+tdiv_q_.$(OBJEXT) tdiv_q_.lo tdiv_q_2exp_.$(OBJEXT) tdiv_q_2exp_.lo \
+tdiv_q_ui_.$(OBJEXT) tdiv_q_ui_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo \
+tdiv_qr_ui_.$(OBJEXT) tdiv_qr_ui_.lo tdiv_r_.$(OBJEXT) tdiv_r_.lo \
+tdiv_r_2exp_.$(OBJEXT) tdiv_r_2exp_.lo tdiv_r_ui_.$(OBJEXT) \
+tdiv_r_ui_.lo tdiv_ui_.$(OBJEXT) tdiv_ui_.lo tstbit_.$(OBJEXT) \
+tstbit_.lo ui_pow_ui_.$(OBJEXT) ui_pow_ui_.lo ui_sub_.$(OBJEXT) \
+ui_sub_.lo urandomb_.$(OBJEXT) urandomb_.lo urandomm_.$(OBJEXT) \
+urandomm_.lo xor_.$(OBJEXT) xor_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# These are BUILT_SOURCES at the top-level, so normally they're built before
+# recursing into this directory.
+#
+fac_ui.h:
+       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpz/fac_ui.h
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/mpz/abs.c b/mpz/abs.c

new file mode 100644 (file)

index 0000000..1ce4c81
--- /dev/null
+++ b/mpz/abs.c
@@ -0,0 +1,45 @@
+/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.
+
+Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_abs 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_abs (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp, up;
+  mp_size_t size;
+
+  size = ABS (u->_mp_size);
+
+  if (u != w)
+    {
+      if (w->_mp_alloc < size)
+       _mpz_realloc (w, size);
+
+      wp = w->_mp_d;
+      up = u->_mp_d;
+
+      MPN_COPY (wp, up, size);
+    }
+
+  w->_mp_size = size;
+}
diff --git a/mpz/add.c b/mpz/add.c

new file mode 100644 (file)

index 0000000..60cc416
--- /dev/null
+++ b/mpz/add.c
@@ -0,0 +1,22 @@
+/* mpz_add -- add integers.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_add
+#include "aors.h"
diff --git a/mpz/add_ui.c b/mpz/add_ui.c

new file mode 100644 (file)

index 0000000..cccb929
--- /dev/null
+++ b/mpz/add_ui.c
@@ -0,0 +1,22 @@
+/* mpz_add_ui -- Add an mpz_t and an unsigned one-word integer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_add_ui
+#include "aors_ui.h"
diff --git a/mpz/and.c b/mpz/and.c

new file mode 100644 (file)

index 0000000..d6355e9
--- /dev/null
+++ b/mpz/and.c
@@ -0,0 +1,268 @@
+/* mpz_and -- Logical and.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+{
+  mp_srcptr op1_ptr, op2_ptr;
+  mp_size_t op1_size, op2_size;
+  mp_ptr res_ptr;
+  mp_size_t res_size;
+  mp_size_t i;
+  TMP_DECL;
+
+  TMP_MARK;
+  op1_size = SIZ(op1);
+  op2_size = SIZ(op2);
+
+  op1_ptr = PTR(op1);
+  op2_ptr = PTR(op2);
+  res_ptr = PTR(res);
+
+  if (op1_size >= 0)
+    {
+      if (op2_size >= 0)
+       {
+         res_size = MIN (op1_size, op2_size);
+         /* First loop finds the size of the result.  */
+         for (i = res_size - 1; i >= 0; i--)
+           if ((op1_ptr[i] & op2_ptr[i]) != 0)
+             break;
+         res_size = i + 1;
+
+         /* Handle allocation, now then we know exactly how much space is
+            needed for the result.  */
+         if (UNLIKELY (ALLOC(res) < res_size))
+           {
+             _mpz_realloc (res, res_size);
+             res_ptr = PTR(res);
+             /* Don't re-read op1_ptr and op2_ptr.  Since res_size <=
+                MIN(op1_size, op2_size), we will not reach this code when op1
+                is identical to res or op2 is identical to res.  */
+           }
+
+         SIZ(res) = res_size;
+          if (LIKELY (res_size != 0))
+            mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
+         return;
+       }
+      else /* op2_size < 0 */
+       {
+         /* Fall through to the code at the end of the function.  */
+       }
+    }
+  else
+    {
+      if (op2_size < 0)
+       {
+         mp_ptr opx;
+         mp_limb_t cy;
+         mp_size_t res_alloc;
+
+         /* Both operands are negative, so will be the result.
+            -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =
+            = ~(~(OP1 - 1) & ~(OP2 - 1)) + 1 =
+            = ((OP1 - 1) | (OP2 - 1)) + 1      */
+
+         /* It might seem as we could end up with an (invalid) result with
+            a leading zero-limb here when one of the operands is of the
+            type 1,,0,,..,,.0.  But some analysis shows that we surely
+            would get carry into the zero-limb in this situation...  */
+
+         op1_size = -op1_size;
+         op2_size = -op2_size;
+
+         res_alloc = 1 + MAX (op1_size, op2_size);
+
+         opx = TMP_ALLOC_LIMBS (op1_size);
+         mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+         op1_ptr = opx;
+
+         opx = TMP_ALLOC_LIMBS (op2_size);
+         mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+         op2_ptr = opx;
+
+         if (ALLOC(res) < res_alloc)
+           {
+             _mpz_realloc (res, res_alloc);
+             res_ptr = PTR(res);
+             /* Don't re-read OP1_PTR and OP2_PTR.  They point to temporary
+                space--never to the space PTR(res) used to point to before
+                reallocation.  */
+           }
+
+         if (op1_size >= op2_size)
+           {
+             MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+                       op1_size - op2_size);
+             for (i = op2_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+             res_size = op1_size;
+           }
+         else
+           {
+             MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+                       op2_size - op1_size);
+             for (i = op1_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+             res_size = op2_size;
+           }
+
+         cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+         if (cy)
+           {
+             res_ptr[res_size] = cy;
+             res_size++;
+           }
+
+         SIZ(res) = -res_size;
+         TMP_FREE;
+         return;
+       }
+      else
+       {
+         /* We should compute -OP1 & OP2.  Swap OP1 and OP2 and fall
+            through to the code that handles OP1 & -OP2.  */
+          MPZ_SRCPTR_SWAP (op1, op2);
+          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+       }
+
+    }
+
+  {
+#if ANDNEW
+    mp_size_t op2_lim;
+    mp_size_t count;
+
+    /* OP2 must be negated as with infinite precision.
+
+       Scan from the low end for a non-zero limb.  The first non-zero
+       limb is simply negated (two's complement).  Any subsequent
+       limbs are one's complemented.  Of course, we don't need to
+       handle more limbs than there are limbs in the other, positive
+       operand as the result for those limbs is going to become zero
+       anyway.  */
+
+    /* Scan for the least significant non-zero OP2 limb, and zero the
+       result meanwhile for those limb positions.  (We will surely
+       find a non-zero limb, so we can write the loop with one
+       termination condition only.)  */
+    for (i = 0; op2_ptr[i] == 0; i++)
+      res_ptr[i] = 0;
+    op2_lim = i;
+
+    op2_size = -op2_size;
+
+    if (op1_size <= op2_size)
+      {
+       /* The ones-extended OP2 is >= than the zero-extended OP1.
+          RES_SIZE <= OP1_SIZE.  Find the exact size.  */
+       for (i = op1_size - 1; i > op2_lim; i--)
+         if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+           break;
+       res_size = i + 1;
+       for (i = res_size - 1; i > op2_lim; i--)
+         res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+       res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+       /* Yes, this *can* happen!  */
+       MPN_NORMALIZE (res_ptr, res_size);
+      }
+    else
+      {
+       /* The ones-extended OP2 is < than the zero-extended OP1.
+          RES_SIZE == OP1_SIZE, since OP1 is normalized.  */
+       res_size = op1_size;
+       MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+       for (i = op2_size - 1; i > op2_lim; i--)
+         res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+       res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+      }
+
+    SIZ(res) = res_size;
+#else
+
+    /* OP1 is positive and zero-extended,
+       OP2 is negative and ones-extended.
+       The result will be positive.
+       OP1 & -OP2 = OP1 & ~(OP2 - 1).  */
+
+    mp_ptr opx;
+
+    op2_size = -op2_size;
+    opx = TMP_ALLOC_LIMBS (op2_size);
+    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+    op2_ptr = opx;
+
+    if (op1_size > op2_size)
+      {
+       /* The result has the same size as OP1, since OP1 is normalized
+          and longer than the ones-extended OP2.  */
+       res_size = op1_size;
+
+       /* Handle allocation, now then we know exactly how much space is
+          needed for the result.  */
+       if (ALLOC(res) < res_size)
+         {
+           _mpz_realloc (res, res_size);
+           res_ptr = PTR(res);
+           /* Don't re-read OP1_PTR or OP2_PTR.  Since res_size = op1_size,
+              we will not reach this code when op1 is identical to res.
+              OP2_PTR points to temporary space.  */
+         }
+
+       MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, res_size - op2_size);
+       for (i = op2_size - 1; i >= 0; i--)
+         res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+
+       SIZ(res) = res_size;
+      }
+    else
+      {
+       /* Find out the exact result size.  Ignore the high limbs of OP2,
+          OP1 is zero-extended and would make the result zero.  */
+       for (i = op1_size - 1; i >= 0; i--)
+         if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+           break;
+       res_size = i + 1;
+
+       /* Handle allocation, now then we know exactly how much space is
+          needed for the result.  */
+       if (ALLOC(res) < res_size)
+         {
+           _mpz_realloc (res, res_size);
+           res_ptr = PTR(res);
+           /* Don't re-read OP1_PTR.  Since res_size <= op1_size, we will
+              not reach this code when op1 is identical to res.  */
+           /* Don't re-read OP2_PTR.  It points to temporary space--never
+              to the space PTR(res) used to point to before reallocation.  */
+         }
+
+       for (i = res_size - 1; i >= 0; i--)
+         res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+
+       SIZ(res) = res_size;
+      }
+#endif
+  }
+  TMP_FREE;
+}
diff --git a/mpz/aors.h b/mpz/aors.h

new file mode 100644 (file)

index 0000000..208c515
--- /dev/null
+++ b/mpz/aors.h
@@ -0,0 +1,132 @@
+/* mpz_add, mpz_sub -- add or subtract integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifdef BERKELEY_MP
+
+#include "mp.h"
+#ifdef OPERATION_add
+#define FUNCTION     madd
+#define VARIATION
+#endif
+#ifdef OPERATION_sub
+#define FUNCTION     msub
+#define VARIATION    -
+#endif
+#define ARGUMENTS    mpz_srcptr u, mpz_srcptr v, mpz_ptr w
+
+#else /* normal GMP */
+
+#ifdef OPERATION_add
+#define FUNCTION     mpz_add
+#define VARIATION
+#endif
+#ifdef OPERATION_sub
+#define FUNCTION     mpz_sub
+#define VARIATION    -
+#endif
+#define ARGUMENTS    mpz_ptr w, mpz_srcptr u, mpz_srcptr v
+
+#endif
+
+#ifndef FUNCTION
+Error, need OPERATION_add or OPERATION_sub
+#endif
+
+
+void
+FUNCTION (ARGUMENTS)
+{
+  mp_srcptr up, vp;
+  mp_ptr wp;
+  mp_size_t usize, vsize, wsize;
+  mp_size_t abs_usize;
+  mp_size_t abs_vsize;
+
+  usize = u->_mp_size;
+  vsize = VARIATION v->_mp_size;
+  abs_usize = ABS (usize);
+  abs_vsize = ABS (vsize);
+
+  if (abs_usize < abs_vsize)
+    {
+      /* Swap U and V. */
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (usize, vsize);
+      MP_SIZE_T_SWAP (abs_usize, abs_vsize);
+    }
+
+  /* True: ABS_USIZE >= ABS_VSIZE.  */
+
+  /* If not space for w (and possible carry), increase space.  */
+  wsize = abs_usize + 1;
+  if (w->_mp_alloc < wsize)
+    _mpz_realloc (w, wsize);
+
+  /* These must be after realloc (u or v may be the same as w).  */
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  wp = w->_mp_d;
+
+  if ((usize ^ vsize) < 0)
+    {
+      /* U and V have different sign.  Need to compare them to determine
+        which operand to subtract from which.  */
+
+      /* This test is right since ABS_USIZE >= ABS_VSIZE.  */
+      if (abs_usize != abs_vsize)
+       {
+         mpn_sub (wp, up, abs_usize, vp, abs_vsize);
+         wsize = abs_usize;
+         MPN_NORMALIZE (wp, wsize);
+         if (usize < 0)
+           wsize = -wsize;
+       }
+      else if (mpn_cmp (up, vp, abs_usize) < 0)
+       {
+         mpn_sub_n (wp, vp, up, abs_usize);
+         wsize = abs_usize;
+         MPN_NORMALIZE (wp, wsize);
+         if (usize >= 0)
+           wsize = -wsize;
+       }
+      else
+       {
+         mpn_sub_n (wp, up, vp, abs_usize);
+         wsize = abs_usize;
+         MPN_NORMALIZE (wp, wsize);
+         if (usize < 0)
+           wsize = -wsize;
+       }
+    }
+  else
+    {
+      /* U and V have same sign.  Add them.  */
+      mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);
+      wp[abs_usize] = cy_limb;
+      wsize = abs_usize + cy_limb;
+      if (usize < 0)
+       wsize = -wsize;
+    }
+
+  w->_mp_size = wsize;
+}
diff --git a/mpz/aors_ui.h b/mpz/aors_ui.h

new file mode 100644 (file)

index 0000000..b438b32
--- /dev/null
+++ b/mpz/aors_ui.h
@@ -0,0 +1,113 @@
+/* mpz_add_ui, mpz_sub_ui -- Add or subtract an mpz_t and an unsigned
+   one-word integer.
+
+Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_add_ui
+#define FUNCTION          mpz_add_ui
+#define FUNCTION2         mpz_add
+#define VARIATION_CMP     >=
+#define VARIATION_NEG
+#define VARIATION_UNNEG   -
+#endif
+
+#ifdef OPERATION_sub_ui
+#define FUNCTION          mpz_sub_ui
+#define FUNCTION2         mpz_sub
+#define VARIATION_CMP     <
+#define VARIATION_NEG     -
+#define VARIATION_UNNEG
+#endif
+
+#ifndef FUNCTION
+Error, need OPERATION_add_ui or OPERATION_sub_ui
+#endif
+
+
+void
+FUNCTION (mpz_ptr w, mpz_srcptr u, unsigned long int vval)
+{
+  mp_srcptr up;
+  mp_ptr wp;
+  mp_size_t usize, wsize;
+  mp_size_t abs_usize;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (vval > GMP_NUMB_MAX)
+    {
+      mpz_t v;
+      mp_limb_t vl[2];
+      PTR(v) = vl;
+      vl[0] = vval & GMP_NUMB_MASK;
+      vl[1] = vval >> GMP_NUMB_BITS;
+      SIZ(v) = 2;
+      FUNCTION2 (w, u, v);
+      return;
+    }
+#endif
+
+  usize = u->_mp_size;
+  abs_usize = ABS (usize);
+
+  /* If not space for W (and possible carry), increase space.  */
+  wsize = abs_usize + 1;
+  if (w->_mp_alloc < wsize)
+    _mpz_realloc (w, wsize);
+
+  /* These must be after realloc (U may be the same as W).  */
+  up = u->_mp_d;
+  wp = w->_mp_d;
+
+  if (abs_usize == 0)
+    {
+      wp[0] = vval;
+      w->_mp_size = VARIATION_NEG (vval != 0);
+      return;
+    }
+
+  if (usize VARIATION_CMP 0)
+    {
+      mp_limb_t cy;
+      cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) vval);
+      wp[abs_usize] = cy;
+      wsize = VARIATION_NEG (abs_usize + cy);
+    }
+  else
+    {
+      /* The signs are different.  Need exact comparison to determine
+        which operand to subtract from which.  */
+      if (abs_usize == 1 && up[0] < vval)
+       {
+         wp[0] = vval - up[0];
+         wsize = VARIATION_NEG 1;
+       }
+      else
+       {
+         mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) vval);
+         /* Size can decrease with at most one limb.  */
+         wsize = VARIATION_UNNEG (abs_usize - (wp[abs_usize - 1] == 0));
+       }
+    }
+
+  w->_mp_size = wsize;
+}
diff --git a/mpz/aorsmul.c b/mpz/aorsmul.c

new file mode 100644 (file)

index 0000000..8b145b3
--- /dev/null
+++ b/mpz/aorsmul.c
@@ -0,0 +1,154 @@
+/* mpz_addmul, mpz_submul -- add or subtract multiple.
+
+Copyright 2001, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* expecting x and y both with non-zero high limbs */
+#define mpn_cmp_twosizes_lt(xp,xsize, yp,ysize)                 \
+  ((xsize) < (ysize)                                            \
+   || ((xsize) == (ysize) && mpn_cmp (xp, yp, xsize) < 0))
+
+
+/* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
+
+   The signs of w, x and y are fully accounted for by each flipping "sub".
+
+   The sign of w is retained for the result, unless the absolute value
+   submul underflows, in which case it flips.  */
+
+static void __gmpz_aorsmul __GMP_PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub))) REGPARM_ATTR (1);
+#define mpz_aorsmul(w,x,y,sub)  __gmpz_aorsmul (REGPARM_3_1 (w, x, y, sub))
+
+REGPARM_ATTR (1) static void
+mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
+{
+  mp_size_t  xsize, ysize, tsize, wsize, wsize_signed;
+  mp_ptr     wp, tp;
+  mp_limb_t  c, high;
+  TMP_DECL;
+
+  /* w unaffected if x==0 or y==0 */
+  xsize = SIZ(x);
+  ysize = SIZ(y);
+  if (xsize == 0 || ysize == 0)
+    return;
+
+  /* make x the bigger of the two */
+  if (ABS(ysize) > ABS(xsize))
+    {
+      MPZ_SRCPTR_SWAP (x, y);
+      MP_SIZE_T_SWAP (xsize, ysize);
+    }
+
+  sub ^= ysize;
+  ysize = ABS(ysize);
+
+  /* use mpn_addmul_1/mpn_submul_1 if possible */
+  if (ysize == 1)
+    {
+      mpz_aorsmul_1 (w, x, PTR(y)[0], sub);
+      return;
+    }
+
+  sub ^= xsize;
+  xsize = ABS(xsize);
+
+  wsize_signed = SIZ(w);
+  sub ^= wsize_signed;
+  wsize = ABS(wsize_signed);
+
+  tsize = xsize + ysize;
+  MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
+  wp = PTR(w);
+
+  if (wsize_signed == 0)
+    {
+      /* Nothing to add to, just set w=x*y.  No w==x or w==y overlap here,
+         since we know x,y!=0 but w==0.  */
+      high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);
+      tsize -= (high == 0);
+      SIZ(w) = (sub >= 0 ? tsize : -tsize);
+      return;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (tsize);
+
+  high = mpn_mul (tp, PTR(x),xsize, PTR(y),ysize);
+  tsize -= (high == 0);
+  ASSERT (tp[tsize-1] != 0);
+  if (sub >= 0)
+    {
+      mp_srcptr up    = wp;
+      mp_size_t usize = wsize;
+
+      if (usize < tsize)
+        {
+          up    = tp;
+          usize = tsize;
+          tp    = wp;
+          tsize = wsize;
+
+          wsize = usize;
+        }
+
+      c = mpn_add (wp, up,usize, tp,tsize);
+      wp[wsize] = c;
+      wsize += (c != 0);
+    }
+  else
+    {
+      mp_srcptr up    = wp;
+      mp_size_t usize = wsize;
+
+      if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))
+        {
+          up    = tp;
+          usize = tsize;
+          tp    = wp;
+          tsize = wsize;
+
+          wsize = usize;
+          wsize_signed = -wsize_signed;
+        }
+
+      ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));
+      wsize = usize;
+      MPN_NORMALIZE (wp, wsize);
+    }
+
+  SIZ(w) = (wsize_signed >= 0 ? wsize : -wsize);
+
+  TMP_FREE;
+}
+
+
+void
+mpz_addmul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+{
+  mpz_aorsmul (w, u, v, (mp_size_t) 0);
+}
+
+void
+mpz_submul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+{
+  mpz_aorsmul (w, u, v, (mp_size_t) -1);
+}
diff --git a/mpz/aorsmul_i.c b/mpz/aorsmul_i.c

new file mode 100644 (file)

index 0000000..b3c2efa
--- /dev/null
+++ b/mpz/aorsmul_i.c
@@ -0,0 +1,246 @@
+/* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
+
+   THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
+   ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
+   COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if HAVE_NATIVE_mpn_mul_1c
+#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
+  do {                                                  \
+    (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
+  } while (0)
+#else
+#define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
+  do {                                                  \
+    mp_limb_t __cy;                                     \
+    __cy = mpn_mul_1 (dst, src, size, n);               \
+    (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
+  } while (0)
+#endif
+
+
+/* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
+
+   All that's needed to account for negative w or x is to flip "sub".
+
+   The final w will retain its sign, unless an underflow occurs in a submul
+   of absolute values, in which case it's flipped.
+
+   If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
+   used.  The alternative would be mpn_mul_1 into temporary space followed
+   by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
+   a chance of being faster since it involves only one set of carry
+   propagations, not two.  Note that doing an addmul_1 with a
+   twos-complement negative y doesn't work, because it effectively adds an
+   extra x * 2^GMP_LIMB_BITS.  */
+
+REGPARM_ATTR(1) void
+mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
+{
+  mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
+  mp_srcptr  xp;
+  mp_ptr     wp;
+  mp_limb_t  cy;
+
+  /* w unaffected if x==0 or y==0 */
+  xsize = SIZ (x);
+  if (xsize == 0 || y == 0)
+    return;
+
+  sub ^= xsize;
+  xsize = ABS (xsize);
+
+  wsize_signed = SIZ (w);
+  if (wsize_signed == 0)
+    {
+      /* nothing to add to, just set x*y, "sub" gives the sign */
+      MPZ_REALLOC (w, xsize+1);
+      wp = PTR (w);
+      cy = mpn_mul_1 (wp, PTR(x), xsize, y);
+      wp[xsize] = cy;
+      xsize += (cy != 0);
+      SIZ (w) = (sub >= 0 ? xsize : -xsize);
+      return;
+    }
+
+  sub ^= wsize_signed;
+  wsize = ABS (wsize_signed);
+
+  new_wsize = MAX (wsize, xsize);
+  MPZ_REALLOC (w, new_wsize+1);
+  wp = PTR (w);
+  xp = PTR (x);
+  min_size = MIN (wsize, xsize);
+
+  if (sub >= 0)
+    {
+      /* addmul of absolute values */
+
+      cy = mpn_addmul_1 (wp, xp, min_size, y);
+      wp += min_size;
+      xp += min_size;
+
+      dsize = xsize - wsize;
+#if HAVE_NATIVE_mpn_mul_1c
+      if (dsize > 0)
+        cy = mpn_mul_1c (wp, xp, dsize, y, cy);
+      else if (dsize < 0)
+        {
+          dsize = -dsize;
+          cy = mpn_add_1 (wp, wp, dsize, cy);
+        }
+#else
+      if (dsize != 0)
+        {
+          mp_limb_t  cy2;
+          if (dsize > 0)
+            cy2 = mpn_mul_1 (wp, xp, dsize, y);
+          else
+            {
+              dsize = -dsize;
+              cy2 = 0;
+            }
+          cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
+        }
+#endif
+
+      wp[dsize] = cy;
+      new_wsize += (cy != 0);
+    }
+  else
+    {
+      /* submul of absolute values */
+
+      cy = mpn_submul_1 (wp, xp, min_size, y);
+      if (wsize >= xsize)
+        {
+          /* if w bigger than x, then propagate borrow through it */
+          if (wsize != xsize)
+            cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
+
+          if (cy != 0)
+            {
+              /* Borrow out of w, take twos complement negative to get
+                 absolute value, flip sign of w.  */
+              wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
+              mpn_com (wp, wp, new_wsize);
+              new_wsize++;
+              MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
+              wsize_signed = -wsize_signed;
+            }
+        }
+      else /* wsize < xsize */
+        {
+          /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
+             take twos complement and use an mpn_mul_1 for the rest.  */
+
+          mp_limb_t  cy2;
+
+          /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
+          mpn_com (wp, wp, wsize);
+          cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
+          cy -= 1;
+
+          /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
+             returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
+          cy2 = (cy == MP_LIMB_T_MAX);
+          cy += cy2;
+          MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
+          wp[new_wsize] = cy;
+          new_wsize += (cy != 0);
+
+          /* Apply any -1 from above.  The value at wp+wsize is non-zero
+             because y!=0 and the high limb of x will be non-zero.  */
+          if (cy2)
+            MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
+
+          wsize_signed = -wsize_signed;
+        }
+
+      /* submul can produce high zero limbs due to cancellation, both when w
+         has more limbs or x has more  */
+      MPN_NORMALIZE (wp, new_wsize);
+    }
+
+  SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
+
+  ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
+}
+
+
+void
+mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
+{
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))
+    {
+      mpz_t t;
+      mp_ptr tp;
+      mp_size_t xn;
+      TMP_DECL;
+      TMP_MARK;
+      xn = SIZ (x);
+      MPZ_TMP_INIT (t, ABS (xn) + 1);
+      tp = PTR (t);
+      tp[0] = 0;
+      MPN_COPY (tp + 1, PTR(x), ABS (xn));
+      SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
+      PTR(t) = tp + 1;
+      SIZ(t) = xn;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
+      TMP_FREE;
+      return;
+    }
+#endif
+  mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
+}
+
+void
+mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
+{
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  if (y > GMP_NUMB_MAX && SIZ(x) != 0)
+    {
+      mpz_t t;
+      mp_ptr tp;
+      mp_size_t xn;
+      TMP_DECL;
+      TMP_MARK;
+      xn = SIZ (x);
+      MPZ_TMP_INIT (t, ABS (xn) + 1);
+      tp = PTR (t);
+      tp[0] = 0;
+      MPN_COPY (tp + 1, PTR(x), ABS (xn));
+      SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
+      PTR(t) = tp + 1;
+      SIZ(t) = xn;
+      mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
+      TMP_FREE;
+      return;
+    }
+#endif
+  mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
+}
diff --git a/mpz/array_init.c b/mpz/array_init.c

new file mode 100644 (file)

index 0000000..0e2f9aa
--- /dev/null
+++ b/mpz/array_init.c
@@ -0,0 +1,40 @@
+/* mpz_array_init (array, array_size, size_per_elem) --
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)
+{
+  register mp_ptr p;
+  register mp_size_t i;
+  mp_size_t nlimbs;
+
+  nlimbs = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
+  p = (mp_ptr) (*__gmp_allocate_func) (arr_size * nlimbs * BYTES_PER_MP_LIMB);
+
+  for (i = 0; i < arr_size; i++)
+    {
+      arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */
+      arr[i]._mp_size = 0;
+      arr[i]._mp_d = p + i * nlimbs;
+    }
+}
diff --git a/mpz/bin_ui.c b/mpz/bin_ui.c

new file mode 100644 (file)

index 0000000..c97ed7c
--- /dev/null
+++ b/mpz/bin_ui.c
@@ -0,0 +1,133 @@
+/* mpz_bin_ui - compute n over k.
+
+Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
+   In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
+   the code here only for big n.
+
+   The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
+   1 section 1.2.6 part G. */
+
+
+#define DIVIDE()                                                              \
+  do {                                                                        \
+    ASSERT (SIZ(r) > 0);                                                      \
+    MPN_DIVREM_OR_DIVEXACT_1 (PTR(r), PTR(r), (mp_size_t) SIZ(r), kacc);      \
+    SIZ(r) -= (PTR(r)[SIZ(r)-1] == 0);                                        \
+  } while (0)
+
+void
+mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
+{
+  mpz_t      ni;
+  mp_limb_t  i;
+  mpz_t      nacc;
+  mp_limb_t  kacc;
+  mp_size_t  negate;
+
+  if (mpz_sgn (n) < 0)
+    {
+      /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
+      mpz_init (ni);
+      mpz_neg (ni, n);
+      mpz_sub_ui (ni, ni, 1L);
+      negate = (k & 1);   /* (-1)^k */
+    }
+  else
+    {
+      /* bin(n,k) == 0 if k>n
+         (no test for this under the n<0 case, since -n+k-1 >= k there) */
+      if (mpz_cmp_ui (n, k) < 0)
+        {
+          mpz_set_ui (r, 0L);
+          return;
+        }
+
+      /* set ni = n-k */
+      mpz_init (ni);
+      mpz_sub_ui (ni, n, k);
+      negate = 0;
+    }
+
+  /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
+     for positive, 1 for negative). */
+  mpz_set_ui (r, 1L);
+
+  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
+     whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
+     = ni, and new ni of ni+k-ni = k.  */
+  if (mpz_cmp_ui (ni, k) < 0)
+    {
+      unsigned long  tmp;
+      tmp = k;
+      k = mpz_get_ui (ni);
+      mpz_set_ui (ni, tmp);
+    }
+
+  kacc = 1;
+  mpz_init_set_ui (nacc, 1L);
+
+  for (i = 1; i <= k; i++)
+    {
+      mp_limb_t k1, k0;
+
+#if 0
+      mp_limb_t nacclow;
+      int c;
+
+      nacclow = PTR(nacc)[0];
+      for (c = 0; (((kacc | nacclow) & 1) == 0); c++)
+       {
+         kacc >>= 1;
+         nacclow >>= 1;
+       }
+      mpz_div_2exp (nacc, nacc, c);
+#endif
+
+      mpz_add_ui (ni, ni, 1L);
+      mpz_mul (nacc, nacc, ni);
+      umul_ppmm (k1, k0, kacc, i << GMP_NAIL_BITS);
+      k0 >>= GMP_NAIL_BITS;
+      if (k1 != 0)
+       {
+         /* Accumulator overflow.  Perform bignum step.  */
+         mpz_mul (r, r, nacc);
+         mpz_set_ui (nacc, 1L);
+          DIVIDE ();
+         kacc = i;
+       }
+      else
+       {
+         /* Save new products in accumulators to keep accumulating.  */
+         kacc = k0;
+       }
+    }
+
+  mpz_mul (r, r, nacc);
+  DIVIDE ();
+  SIZ(r) = (SIZ(r) ^ -negate) + negate;
+
+  mpz_clear (nacc);
+  mpz_clear (ni);
+}
diff --git a/mpz/bin_uiui.c b/mpz/bin_uiui.c

new file mode 100644 (file)

index 0000000..29bbd60
--- /dev/null
+++ b/mpz/bin_uiui.c
@@ -0,0 +1,123 @@
+/* mpz_bin_uiui - compute n over k.
+
+Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2006 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Enhancement: It ought to be possible to calculate the size of the final
+   result in advance, to a rough approximation at least, and use it to do
+   just one realloc.  Stirling's approximation n! ~= sqrt(2*pi*n)*(n/e)^n
+   (Knuth section 1.2.5) might be of use.  */
+
+/* "inc" in the main loop allocates a chunk more space if not already
+   enough, so as to avoid repeated reallocs.  The final step on the other
+   hand requires only one more limb.  */
+#define MULDIV(inc)                                                     \
+  do {                                                                  \
+    ASSERT (rsize <= ralloc);                                           \
+                                                                        \
+    if (rsize == ralloc)                                                \
+      {                                                                 \
+        mp_size_t  new_ralloc = ralloc + (inc);                         \
+        rp = __GMP_REALLOCATE_FUNC_LIMBS (rp, ralloc, new_ralloc);      \
+        ralloc = new_ralloc;                                            \
+      }                                                                 \
+                                                                        \
+    rp[rsize] = mpn_mul_1 (rp, rp, rsize, nacc);                        \
+    MPN_DIVREM_OR_DIVEXACT_1 (rp, rp, rsize+1, kacc);                   \
+    rsize += (rp[rsize] != 0);                                          \
+                                                                        \
+} while (0)
+
+void
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  unsigned long int  i, j;
+  mp_limb_t          nacc, kacc;
+  unsigned long int  cnt;
+  mp_size_t          rsize, ralloc;
+  mp_ptr             rp;
+
+  /* bin(n,k) = 0 if k>n. */
+  if (n < k)
+    {
+      SIZ(r) = 0;
+      return;
+    }
+
+  rp = PTR(r);
+
+  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+  k = MIN (k, n-k);
+
+  /* bin(n,0) = 1 */
+  if (k == 0)
+    {
+      SIZ(r) = 1;
+      rp[0] = 1;
+      return;
+    }
+
+  j = n - k + 1;
+  rp[0] = j;
+  rsize = 1;
+  ralloc = ALLOC(r);
+
+  /* Initialize accumulators.  */
+  nacc = 1;
+  kacc = 1;
+
+  for (i = 2; i <= k; i++)
+    {
+      mp_limb_t n1, n0;
+
+      /* Remove common 2 factors.  */
+      cnt = ((nacc | kacc) & 1) ^ 1;
+      nacc >>= cnt;
+      kacc >>= cnt;
+
+      j++;
+      /* Accumulate next multiples.  */
+      umul_ppmm (n1, n0, nacc, (mp_limb_t) j << GMP_NAIL_BITS);
+      n0 >>= GMP_NAIL_BITS;
+      if (n1 == 0)
+        {
+          /* Save new products in accumulators to keep accumulating.  */
+          nacc = n0;
+          kacc = kacc * i;
+        }
+      else
+        {
+          /* Accumulator overflow.  Perform bignum step.  */
+          MULDIV (32);
+          nacc = j;
+          kacc = i;
+        }
+    }
+
+  /* Take care of whatever is left in accumulators.  */
+  MULDIV (1);
+
+  ALLOC(r) = ralloc;
+  SIZ(r) = rsize;
+  PTR(r) = rp;
+}
diff --git a/mpz/cdiv_q.c b/mpz/cdiv_q.c

new file mode 100644 (file)

index 0000000..2e66363
--- /dev/null
+++ b/mpz/cdiv_q.c
@@ -0,0 +1,42 @@
+/* mpz_cdiv_q -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t dividend_size = dividend->_mp_size;
+  mp_size_t divisor_size = divisor->_mp_size;
+  mpz_t rem;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0)
+    mpz_add_ui (quot, quot, 1L);
+
+  TMP_FREE;
+}
diff --git a/mpz/cdiv_q_ui.c b/mpz/cdiv_q_ui.c

new file mode 100644 (file)

index 0000000..b757ea5
--- /dev/null
+++ b/mpz/cdiv_q_ui.c
@@ -0,0 +1,92 @@
+/* mpz_cdiv_q_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994, 1996, 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  MPZ_REALLOC (quot, nn);
+  qp = PTR(quot);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         qp[0] = 0;
+         rl = np[0];
+         qn = 1;               /* a white lie, fixed below */
+       }
+      else
+       {
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+         qn = nn - 2 + 1;
+       }
+
+      if (rl != 0 && ns >= 0)
+       {
+         mpn_incr_u (qp, (mp_limb_t) 1);
+         rl = divisor - rl;
+       }
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+
+      if (rl != 0 && ns >= 0)
+       {
+         mpn_incr_u (qp, (mp_limb_t) 1);
+         rl = divisor - rl;
+       }
+
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}
diff --git a/mpz/cdiv_qr.c b/mpz/cdiv_qr.c

new file mode 100644 (file)

index 0000000..197ae50
--- /dev/null
+++ b/mpz/cdiv_qr.c
@@ -0,0 +1,54 @@
+/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t xsize;
+  mpz_t temp_divisor;          /* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the quotient and
+     remainder have been preliminary calculated.  We have to copy it to
+     temporary space if it's the same variable as either QUOT or REM.  */
+  if (quot == divisor || rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  xsize = dividend->_mp_size ^ divisor_size;;
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if (xsize >= 0 && rem->_mp_size != 0)
+    {
+      mpz_add_ui (quot, quot, 1L);
+      mpz_sub (rem, rem, divisor);
+    }
+
+  TMP_FREE;
+}
diff --git a/mpz/cdiv_qr_ui.c b/mpz/cdiv_qr_ui.c

new file mode 100644 (file)

index 0000000..67e80b7
--- /dev/null
+++ b/mpz/cdiv_qr_ui.c
@@ -0,0 +1,110 @@
+/* mpz_cdiv_qr_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  MPZ_REALLOC (quot, nn);
+  qp = PTR(quot);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp;
+      mp_size_t rn;
+
+      MPZ_REALLOC (rem, 2);
+      rp = PTR(rem);
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         qp[0] = 0;
+         qn = 1;               /* a white lie, fixed below */
+         rl = np[0];
+         rp[0] = rl;
+       }
+      else
+       {
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+         qn = nn - 2 + 1;
+       }
+
+      if (rl != 0 && ns >= 0)
+       {
+         mpn_incr_u (qp, (mp_limb_t) 1);
+         rl = divisor - rl;
+         rp[0] = rl & GMP_NUMB_MASK;
+         rp[1] = rl >> GMP_NUMB_BITS;
+       }
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       SIZ(rem) = 0;
+      else
+       {
+         if (ns >= 0)
+           {
+             mpn_incr_u (qp, (mp_limb_t) 1);
+             rl = divisor - rl;
+           }
+
+         PTR(rem)[0] = rl;
+         SIZ(rem) = -(rl != 0);
+       }
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}
diff --git a/mpz/cdiv_r.c b/mpz/cdiv_r.c

new file mode 100644 (file)

index 0000000..749276f
--- /dev/null
+++ b/mpz/cdiv_r.c
@@ -0,0 +1,50 @@
+/* mpz_cdiv_r -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = divisor->_mp_size;
+  mpz_t temp_divisor;          /* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the remainder has been
+     preliminary calculated.  We have to copy it to temporary space if it's
+     the same variable as REM.  */
+  if (rem == divisor)
+    {
+
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  mpz_tdiv_r (rem, dividend, divisor);
+
+  if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0)
+    mpz_sub (rem, rem, divisor);
+
+  TMP_FREE;
+}
diff --git a/mpz/cdiv_r_ui.c b/mpz/cdiv_r_ui.c

new file mode 100644 (file)

index 0000000..e889d74
--- /dev/null
+++ b/mpz/cdiv_r_ui.c
@@ -0,0 +1,100 @@
+/* mpz_cdiv_r_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp, qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      MPZ_REALLOC (rem, 2);
+      rp = PTR(rem);
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         rl = np[0];
+         rp[0] = rl;
+       }
+      else
+       {
+         TMP_MARK;
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         TMP_FREE;
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+       }
+
+      if (rl != 0 && ns >= 0)
+       {
+         rl = divisor - rl;
+         rp[0] = rl & GMP_NUMB_MASK;
+         rp[1] = rl >> GMP_NUMB_BITS;
+       }
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       SIZ(rem) = 0;
+      else
+       {
+         if (ns >= 0)
+           rl = divisor - rl;
+
+         PTR(rem)[0] = rl;
+         SIZ(rem) = -1;
+       }
+    }
+
+  return rl;
+}
diff --git a/mpz/cdiv_ui.c b/mpz/cdiv_ui.c

new file mode 100644 (file)

index 0000000..7b99bee
--- /dev/null
+++ b/mpz/cdiv_ui.c
@@ -0,0 +1,92 @@
+/* mpz_cdiv_ui -- Division rounding the quotient towards +infinity.  The
+   remainder gets the opposite sign as the denominator.  In order to make it
+   always fit into the return type, the negative of the true remainder is
+   returned.
+
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_cdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+      mp_ptr qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         rl = np[0];
+         rp[0] = rl;
+       }
+      else
+       {
+         TMP_MARK;
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         TMP_FREE;
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+       }
+
+      if (rl != 0 && ns >= 0)
+       {
+         rl = divisor - rl;
+         rp[0] = rl & GMP_NUMB_MASK;
+         rp[1] = rl >> GMP_NUMB_BITS;
+       }
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       ;
+      else
+       {
+         if (ns >= 0)
+           rl = divisor - rl;
+       }
+    }
+
+  return rl;
+}
diff --git a/mpz/cfdiv_q_2exp.c b/mpz/cfdiv_q_2exp.c

new file mode 100644 (file)

index 0000000..1d32691
--- /dev/null
+++ b/mpz/cfdiv_q_2exp.c
@@ -0,0 +1,102 @@
+/* mpz_cdiv_q_2exp, mpz_fdiv_q_2exp -- quotient from mpz divided by 2^n.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* dir==1 for ceil, dir==-1 for floor */
+
+static void __gmpz_cfdiv_q_2exp __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int))) REGPARM_ATTR (1);
+#define cfdiv_q_2exp(w,u,cnt,dir)  __gmpz_cfdiv_q_2exp (REGPARM_3_1 (w,u,cnt,dir))
+
+REGPARM_ATTR (1) static void
+cfdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
+{
+  mp_size_t  wsize, usize, abs_usize, limb_cnt, i;
+  mp_srcptr  up;
+  mp_ptr     wp;
+  mp_limb_t  round, rmask;
+
+  usize = SIZ (u);
+  abs_usize = ABS (usize);
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  wsize = abs_usize - limb_cnt;
+  if (wsize <= 0)
+    {
+      /* u < 2**cnt, so result 1, 0 or -1 according to rounding */
+      PTR(w)[0] = 1;
+      SIZ(w) = (usize == 0 || (usize ^ dir) < 0 ? 0 : dir);
+      return;
+    }
+
+  /* +1 limb to allow for mpn_add_1 below */
+  MPZ_REALLOC (w, wsize+1);
+
+  /* Check for rounding if direction matches u sign.
+     Set round if we're skipping non-zero limbs.  */
+  up = PTR(u);
+  round = 0;
+  rmask = ((usize ^ dir) >= 0 ? MP_LIMB_T_MAX : 0);
+  if (rmask != 0)
+    for (i = 0; i < limb_cnt && round == 0; i++)
+      round = up[i];
+
+  wp = PTR(w);
+  cnt %= GMP_NUMB_BITS;
+  if (cnt != 0)
+    {
+      round |= rmask & mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+      wsize -= (wp[wsize - 1] == 0);
+    }
+  else
+    MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+
+  if (round != 0)
+    {
+      if (wsize != 0)
+       {
+          mp_limb_t cy;
+         cy = mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
+         wp[wsize] = cy;
+         wsize += cy;
+       }
+      else
+       {
+         /* We shifted something to zero.  */
+         wp[0] = 1;
+         wsize = 1;
+       }
+    }
+  SIZ(w) = (usize >= 0 ? wsize : -wsize);
+}
+
+
+void
+mpz_cdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_q_2exp (w, u, cnt, 1);
+}
+
+void
+mpz_fdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_q_2exp (w, u, cnt, -1);
+}
diff --git a/mpz/cfdiv_r_2exp.c b/mpz/cfdiv_r_2exp.c

new file mode 100644 (file)

index 0000000..5611ad6
--- /dev/null
+++ b/mpz/cfdiv_r_2exp.c
@@ -0,0 +1,155 @@
+/* mpz_cdiv_r_2exp, mpz_fdiv_r_2exp -- remainder from mpz divided by 2^n.
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Bit mask of "n" least significant bits of a limb. */
+#define LOW_MASK(n)   ((CNST_LIMB(1) << (n)) - 1)
+
+
+/* dir==1 for ceil, dir==-1 for floor */
+
+static void __gmpz_cfdiv_r_2exp __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int))) REGPARM_ATTR (1);
+#define cfdiv_r_2exp(w,u,cnt,dir)  __gmpz_cfdiv_r_2exp (REGPARM_3_1 (w, u, cnt, dir))
+
+REGPARM_ATTR (1) static void
+cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
+{
+  mp_size_t  usize, abs_usize, limb_cnt, i;
+  mp_srcptr  up;
+  mp_ptr     wp;
+  mp_limb_t  high;
+
+  usize = SIZ(u);
+  if (usize == 0)
+    {
+      SIZ(w) = 0;
+      return;
+    }
+
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  cnt %= GMP_NUMB_BITS;
+  abs_usize = ABS (usize);
+
+  /* MPZ_REALLOC(w) below is only when w!=u, so we can fetch PTR(u) here
+     nice and early */
+  up = PTR(u);
+
+  if ((usize ^ dir) < 0)
+    {
+      /* Round towards zero, means just truncate */
+
+      if (w == u)
+        {
+          /* if already smaller than limb_cnt then do nothing */
+          if (abs_usize <= limb_cnt)
+            return;
+          wp = PTR(w);
+        }
+      else
+        {
+          i = MIN (abs_usize, limb_cnt+1);
+          MPZ_REALLOC (w, i);
+          wp = PTR(w);
+          MPN_COPY (wp, up, i);
+
+          /* if smaller than limb_cnt then only the copy is needed */
+          if (abs_usize <= limb_cnt)
+            {
+              SIZ(w) = usize;
+              return;
+            }
+        }
+    }
+  else
+    {
+      /* Round away from zero, means twos complement if non-zero */
+
+      /* if u!=0 and smaller than divisor, then must negate */
+      if (abs_usize <= limb_cnt)
+        goto negate;
+
+      /* if non-zero low limb, then must negate */
+      for (i = 0; i < limb_cnt; i++)
+        if (up[i] != 0)
+          goto negate;
+
+      /* if non-zero partial limb, then must negate */
+      if ((up[limb_cnt] & LOW_MASK (cnt)) != 0)
+        goto negate;
+
+      /* otherwise low bits of u are zero, so that's the result */
+      SIZ(w) = 0;
+      return;
+
+    negate:
+      /* twos complement negation to get 2**cnt-u */
+
+      MPZ_REALLOC (w, limb_cnt+1);
+      up = PTR(u);
+      wp = PTR(w);
+
+      /* Ones complement */
+      i = MIN (abs_usize, limb_cnt+1);
+      mpn_com (wp, up, i);
+      for ( ; i <= limb_cnt; i++)
+        wp[i] = GMP_NUMB_MAX;
+
+      /* Twos complement.  Since u!=0 in the relevant part, the twos
+         complement never gives 0 and a carry, so can use MPN_INCR_U. */
+      MPN_INCR_U (wp, limb_cnt+1, CNST_LIMB(1));
+
+      usize = -usize;
+    }
+
+  /* Mask the high limb */
+  high = wp[limb_cnt];
+  high &= LOW_MASK (cnt);
+  wp[limb_cnt] = high;
+
+  /* Strip any consequent high zeros */
+  while (high == 0)
+    {
+      limb_cnt--;
+      if (limb_cnt < 0)
+        {
+          SIZ(w) = 0;
+          return;
+        }
+      high = wp[limb_cnt];
+    }
+
+  limb_cnt++;
+  SIZ(w) = (usize >= 0 ? limb_cnt : -limb_cnt);
+}
+
+
+void
+mpz_cdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_r_2exp (w, u, cnt, 1);
+}
+
+void
+mpz_fdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  cfdiv_r_2exp (w, u, cnt, -1);
+}
diff --git a/mpz/clear.c b/mpz/clear.c

new file mode 100644 (file)

index 0000000..0902256
--- /dev/null
+++ b/mpz/clear.c
@@ -0,0 +1,28 @@
+/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of
+   an integer.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_clear (mpz_ptr m)
+{
+  (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
+}
diff --git a/mpz/clears.c b/mpz/clears.c

new file mode 100644 (file)

index 0000000..e9f2cf4
--- /dev/null
+++ b/mpz/clears.c
@@ -0,0 +1,56 @@
+/* mpz_clears() -- Clear multiple mpz_t variables.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+mpz_clears (mpz_ptr x, ...)
+#else
+mpz_clears (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+
+#if HAVE_STDARG
+  va_start (ap, x);
+#else
+  mpz_ptr x;
+  va_start (ap);
+  x = va_arg (ap, mpz_ptr);
+#endif
+
+  while (x != NULL)
+    {
+      mpz_clear (x);
+      x = va_arg (ap, mpz_ptr);
+    }
+  va_end (ap);
+}
diff --git a/mpz/clrbit.c b/mpz/clrbit.c

new file mode 100644 (file)

index 0000000..d08d684
--- /dev/null
+++ b/mpz/clrbit.c
@@ -0,0 +1,113 @@
+/* mpz_clrbit -- clear a specified bit.
+
+Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dsize = d->_mp_size;
+  mp_ptr dp = d->_mp_d;
+  mp_size_t limb_index;
+
+  limb_index = bit_index / GMP_NUMB_BITS;
+  if (dsize >= 0)
+    {
+      if (limb_index < dsize)
+       {
+          mp_limb_t  dlimb;
+          dlimb = dp[limb_index];
+          dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+          dp[limb_index] = dlimb;
+
+          if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))
+            {
+              /* high limb became zero, must normalize */
+              do {
+                dsize--;
+              } while (dsize > 0 && dp[dsize-1] == 0);
+              d->_mp_size = dsize;
+            }
+       }
+      else
+       ;
+    }
+  else
+    {
+      mp_size_t zero_bound;
+
+      /* Simulate two's complement arithmetic, i.e. simulate
+        1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+        2. clear the bit.
+        3. Set OP = ~OP + 1.  */
+
+      dsize = -dsize;
+
+      /* No upper bound on this loop, we're sure there's a non-zero limb
+        sooner ot later.  */
+      for (zero_bound = 0; ; zero_bound++)
+       if (dp[zero_bound] != 0)
+         break;
+
+      if (limb_index > zero_bound)
+       {
+         if (limb_index < dsize)
+           dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
+         else
+           {
+             /* Ugh.  The bit should be cleared outside of the end of the
+                number.  We have to increase the size of the number.  */
+             if (UNLIKELY (d->_mp_alloc < limb_index + 1))
+                dp = _mpz_realloc (d, limb_index + 1);
+
+             MPN_ZERO (dp + dsize, limb_index - dsize);
+             dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
+             d->_mp_size = -(limb_index + 1);
+           }
+       }
+      else if (limb_index == zero_bound)
+       {
+         dp[limb_index] = ((((dp[limb_index] - 1)
+                             | ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1)
+                           & GMP_NUMB_MASK);
+         if (dp[limb_index] == 0)
+           {
+             mp_size_t i;
+             for (i = limb_index + 1; i < dsize; i++)
+               {
+                 dp[i] = (dp[i] + 1) & GMP_NUMB_MASK;
+                 if (dp[i] != 0)
+                   goto fin;
+               }
+             /* We got carry all way out beyond the end of D.  Increase
+                its size (and allocation if necessary).  */
+             dsize++;
+             if (UNLIKELY (d->_mp_alloc < dsize))
+                dp = _mpz_realloc (d, dsize);
+
+             dp[i] = 1;
+             d->_mp_size = -dsize;
+           fin:;
+           }
+       }
+      else
+       ;
+    }
+}
diff --git a/mpz/cmp.c b/mpz/cmp.c

new file mode 100644 (file)

index 0000000..a6d9d38
--- /dev/null
+++ b/mpz/cmp.c
@@ -0,0 +1,49 @@
+/* mpz_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative
+   based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#ifdef BERKELEY_MP
+mcmp (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_cmp (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+#endif
+{
+  mp_size_t  usize, vsize, dsize, asize;
+  mp_srcptr  up, vp;
+  int        cmp;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+  dsize = usize - vsize;
+  if (dsize != 0)
+    return dsize;
+
+  asize = ABS (usize);
+  up = PTR(u);
+  vp = PTR(v);
+  MPN_CMP (cmp, up, vp, asize);
+  return (usize >= 0 ? cmp : -cmp);
+}
diff --git a/mpz/cmp_d.c b/mpz/cmp_d.c

new file mode 100644 (file)

index 0000000..fd635a6
--- /dev/null
+++ b/mpz/cmp_d.c
@@ -0,0 +1,134 @@
+/* mpz_cmp_d -- compare absolute values of mpz and double.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#define RETURN_CMP(zl, dl)                      \
+  do {                                          \
+    zlimb = (zl);                               \
+    dlimb = (dl);                               \
+    if (zlimb != dlimb)                         \
+      return (zlimb >= dlimb ? ret : -ret);     \
+  } while (0)
+
+#define RETURN_NONZERO(ptr, size, val)          \
+  do {                                          \
+    mp_size_t __i;                              \
+    for (__i = (size)-1; __i >= 0; __i--)       \
+      if ((ptr)[__i] != 0)                      \
+        return val;                             \
+    return 0;                                   \
+  } while (0)
+
+
+int
+mpz_cmp_d (mpz_srcptr z, double d)
+{
+  mp_limb_t  darray[LIMBS_PER_DOUBLE], zlimb, dlimb;
+  mp_srcptr  zp;
+  mp_size_t  zsize;
+  int        dexp, ret;
+
+  /* d=NaN is an invalid operation, there's no sensible return value.
+     d=Inf or -Inf is always bigger than z.  */
+  DOUBLE_NAN_INF_ACTION (d, __gmp_invalid_operation (), goto z_zero);
+
+  /* 1. Either operand zero. */
+  zsize = SIZ(z);
+  if (d == 0.0)
+    return zsize;
+  if (zsize == 0)
+    {
+    z_zero:
+      return (d < 0.0 ? 1 : -1);
+    }
+
+  /* 2. Opposite signs. */
+  if (zsize >= 0)
+    {
+      if (d < 0.0)
+        return 1;    /* >=0 cmp <0 */
+      ret = 1;
+    }
+  else
+    {
+      if (d >= 0.0)
+        return -1;   /* <0 cmp >=0 */
+      ret = -1;
+      d = -d;
+      zsize = -zsize;
+    }
+
+  /* 3. Small d, knowing abs(z) >= 1. */
+  if (d < 1.0)
+    return ret;
+
+  dexp = __gmp_extract_double (darray, d);
+  ASSERT (dexp >= 1);
+
+  /* 4. Check for different high limb positions. */
+  if (zsize != dexp)
+    return (zsize >= dexp ? ret : -ret);
+
+  /* 5. Limb data. */
+  zp = PTR(z);
+
+#if LIMBS_PER_DOUBLE == 2
+  RETURN_CMP (zp[zsize-1], darray[1]);
+  if (zsize == 1)
+    return (darray[0] != 0 ? -ret : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[0]);
+  RETURN_NONZERO (zp, zsize-2, ret);
+#endif
+
+#if LIMBS_PER_DOUBLE == 3
+  RETURN_CMP (zp[zsize-1], darray[2]);
+  if (zsize == 1)
+    return ((darray[0] | darray[1]) != 0 ? -ret : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[1]);
+  if (zsize == 2)
+    return (darray[0] != 0 ? -ret : 0);
+
+  RETURN_CMP (zp[zsize-3], darray[0]);
+  RETURN_NONZERO (zp, zsize-3, ret);
+#endif
+
+#if LIMBS_PER_DOUBLE >= 4
+  {
+    int i;
+    for (i = 1; i <= LIMBS_PER_DOUBLE; i++)
+      {
+       RETURN_CMP (zp[zsize-i], darray[LIMBS_PER_DOUBLE-i]);
+       if (i >= zsize)
+         RETURN_NONZERO (darray, LIMBS_PER_DOUBLE-i, -ret);
+      }
+    RETURN_NONZERO (zp, zsize-LIMBS_PER_DOUBLE, ret);
+  }
+#endif
+}
diff --git a/mpz/cmp_si.c b/mpz/cmp_si.c

new file mode 100644 (file)

index 0000000..1919bd3
--- /dev/null
+++ b/mpz/cmp_si.c
@@ -0,0 +1,67 @@
+/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.
+   Return positive, zero, or negative based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+_mpz_cmp_si (mpz_srcptr u, signed long int v_digit) __GMP_NOTHROW
+{
+  mp_size_t usize = u->_mp_size;
+  mp_size_t vsize;
+  mp_limb_t u_digit;
+  unsigned long int absv_digit = (unsigned long int) v_digit;
+
+#if GMP_NAIL_BITS != 0
+  /* FIXME.  This isn't very pretty.  */
+  mpz_t tmp;
+  mp_limb_t tt[2];
+  PTR(tmp) = tt;
+  ALLOC(tmp) = 2;
+  mpz_set_si (tmp, v_digit);
+  return mpz_cmp (u, tmp);
+#endif
+
+  vsize = 0;
+  if (v_digit > 0)
+    vsize = 1;
+  else if (v_digit < 0)
+    {
+      vsize = -1;
+      absv_digit = -absv_digit;
+    }
+
+  if (usize != vsize)
+    return usize - vsize;
+
+  if (usize == 0)
+    return 0;
+
+  u_digit = u->_mp_d[0];
+
+  if (u_digit == (mp_limb_t) absv_digit)
+    return 0;
+
+  if (u_digit > (mp_limb_t) absv_digit)
+    return usize;
+  else
+    return -usize;
+}
diff --git a/mpz/cmp_ui.c b/mpz/cmp_ui.c

new file mode 100644 (file)

index 0000000..f9f27ba
--- /dev/null
+++ b/mpz/cmp_ui.c
@@ -0,0 +1,68 @@
+/* mpz_cmp_ui.c -- Compare a mpz_t a with an mp_limb_t b.  Return positive,
+  zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+_mpz_cmp_ui (mpz_srcptr u, unsigned long int v_digit) __GMP_NOTHROW
+{
+  mp_ptr up;
+  mp_size_t un;
+  mp_limb_t ul;
+
+  up = PTR(u);
+  un = SIZ(u);
+
+  if (un == 0)
+    return -(v_digit != 0);
+
+  if (un == 1)
+    {
+      ul = up[0];
+      if (ul > v_digit)
+       return 1;
+      if (ul < v_digit)
+       return -1;
+      return 0;
+    }
+
+#if GMP_NAIL_BITS != 0
+  if (v_digit > GMP_NUMB_MAX)
+    {
+      if (un == 2)
+       {
+         ul = up[0] + (up[1] << GMP_NUMB_BITS);
+
+         if ((up[1] >> GMP_NAIL_BITS) != 0)
+           return 1;
+
+         if (ul > v_digit)
+           return 1;
+         if (ul < v_digit)
+           return -1;
+         return 0;
+       }
+    }
+#endif
+
+  return un > 0 ? 1 : -1;
+}
diff --git a/mpz/cmpabs.c b/mpz/cmpabs.c

new file mode 100644 (file)

index 0000000..f7bbcd7
--- /dev/null
+++ b/mpz/cmpabs.c
@@ -0,0 +1,43 @@
+/* mpz_cmpabs(u,v) -- Compare U, V.  Return positive, zero, or negative
+   based on if U > V, U == V, or U < V.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+mpz_cmpabs (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+{
+  mp_size_t  usize, vsize, dsize;
+  mp_srcptr  up, vp;
+  int        cmp;
+
+  usize = ABSIZ (u);
+  vsize = ABSIZ (v);
+  dsize = usize - vsize;
+  if (dsize != 0)
+    return dsize;
+
+  up = PTR(u);
+  vp = PTR(v);
+  MPN_CMP (cmp, up, vp, usize);
+  return cmp;
+}
diff --git a/mpz/cmpabs_d.c b/mpz/cmpabs_d.c

new file mode 100644 (file)

index 0000000..f7aadc2
--- /dev/null
+++ b/mpz/cmpabs_d.c
@@ -0,0 +1,119 @@
+/* mpz_cmpabs_d -- compare absolute values of mpz and double.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#define RETURN_CMP(zl, dl)              \
+  do {                                  \
+    zlimb = (zl);                       \
+    dlimb = (dl);                       \
+    if (zlimb != dlimb)                 \
+      return (zlimb >= dlimb ? 1 : -1); \
+  } while (0)
+
+#define RETURN_NONZERO(ptr, size, val)          \
+  do {                                          \
+    mp_size_t __i;                              \
+    for (__i = (size)-1; __i >= 0; __i--)       \
+      if ((ptr)[__i] != 0)                      \
+        return val;                             \
+    return 0;                                   \
+  } while (0)
+
+
+int
+mpz_cmpabs_d (mpz_srcptr z, double d)
+{
+  mp_limb_t  darray[LIMBS_PER_DOUBLE], zlimb, dlimb;
+  mp_srcptr  zp;
+  mp_size_t  zsize;
+  int        dexp;
+
+  /* d=NaN is an invalid operation, there's no sensible return value.
+     d=Inf or -Inf is always bigger than z.  */
+  DOUBLE_NAN_INF_ACTION (d, __gmp_invalid_operation (), return -1);
+
+  /* 1. Check for either operand zero. */
+  zsize = SIZ(z);
+  if (d == 0.0)
+    return (zsize != 0);
+  if (zsize == 0)
+    return (d != 0 ? -1 : 0);
+
+  /* 2. Ignore signs. */
+  zsize = ABS(zsize);
+  d = ABS(d);
+
+  /* 3. Small d, knowing abs(z) >= 1. */
+  if (d < 1.0)
+    return 1;
+
+  dexp = __gmp_extract_double (darray, d);
+  ASSERT (dexp >= 1);
+
+  /* 4. Check for different high limb positions. */
+  if (zsize != dexp)
+    return (zsize >= dexp ? 1 : -1);
+
+  /* 5. Limb data. */
+  zp = PTR(z);
+
+#if LIMBS_PER_DOUBLE == 2
+  RETURN_CMP (zp[zsize-1], darray[1]);
+  if (zsize == 1)
+    return (darray[0] != 0 ? -1 : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[0]);
+  RETURN_NONZERO (zp, zsize-2, 1);
+#endif
+
+#if LIMBS_PER_DOUBLE == 3
+  RETURN_CMP (zp[zsize-1], darray[2]);
+  if (zsize == 1)
+    return ((darray[0] | darray[1]) != 0 ? -1 : 0);
+
+  RETURN_CMP (zp[zsize-2], darray[1]);
+  if (zsize == 2)
+    return (darray[0] != 0 ? -1 : 0);
+
+  RETURN_CMP (zp[zsize-3], darray[0]);
+  RETURN_NONZERO (zp, zsize-3, 1);
+#endif
+
+#if LIMBS_PER_DOUBLE >= 4
+  {
+    int i;
+    for (i = 1; i <= LIMBS_PER_DOUBLE; i++)
+      {
+       RETURN_CMP (zp[zsize-i], darray[LIMBS_PER_DOUBLE-i]);
+       if (i >= zsize)
+         RETURN_NONZERO (darray, LIMBS_PER_DOUBLE-i, -1);
+      }
+    RETURN_NONZERO (zp, zsize-LIMBS_PER_DOUBLE, 1);
+  }
+#endif
+}
diff --git a/mpz/cmpabs_ui.c b/mpz/cmpabs_ui.c

new file mode 100644 (file)

index 0000000..f8df4b8
--- /dev/null
+++ b/mpz/cmpabs_ui.c
@@ -0,0 +1,67 @@
+/* mpz_cmpabs_ui.c -- Compare a mpz_t a with an mp_limb_t b.  Return positive,
+  zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright 1991, 1993, 1994, 1995, 1997, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpz_cmpabs_ui (mpz_srcptr u, unsigned long int v_digit) __GMP_NOTHROW
+{
+  mp_ptr up;
+  mp_size_t un;
+  mp_limb_t ul;
+
+  up = PTR(u);
+  un = SIZ(u);
+
+  if (un == 0)
+    return -(v_digit != 0);
+
+  un = ABS (un);
+
+  if (un == 1)
+    {
+      ul = up[0];
+      if (ul > v_digit)
+       return 1;
+      if (ul < v_digit)
+       return -1;
+      return 0;
+    }
+
+#if GMP_NAIL_BITS != 0
+  if (v_digit > GMP_NUMB_MAX)
+    {
+      if (un == 2)
+       {
+         ul = up[0] + (up[1] << GMP_NUMB_BITS);
+
+         if (ul > v_digit)
+           return 1;
+         if (ul < v_digit)
+           return -1;
+         return 0;
+       }
+    }
+#endif
+
+  return 1;
+}
diff --git a/mpz/com.c b/mpz/com.c

new file mode 100644 (file)

index 0000000..c403b8c
--- /dev/null
+++ b/mpz/com.c
@@ -0,0 +1,84 @@
+/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of
+   SRC to DST.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_com (mpz_ptr dst, mpz_srcptr src)
+{
+  mp_size_t size = src->_mp_size;
+  mp_srcptr src_ptr;
+  mp_ptr dst_ptr;
+
+  if (size >= 0)
+    {
+      /* As with infinite precision: one's complement, two's complement.
+        But this can be simplified using the identity -x = ~x + 1.
+        So we're going to compute (~~x) + 1 = x + 1!  */
+
+      if (dst->_mp_alloc < size + 1)
+       _mpz_realloc (dst, size + 1);
+
+      src_ptr = src->_mp_d;
+      dst_ptr = dst->_mp_d;
+
+      if (UNLIKELY (size == 0))
+       {
+         /* special case, as mpn_add_1 wants size!=0 */
+         dst_ptr[0] = 1;
+         dst->_mp_size = -1;
+         return;
+       }
+
+      {
+       mp_limb_t cy;
+
+       cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+       if (cy)
+         {
+           dst_ptr[size] = cy;
+           size++;
+         }
+      }
+
+      /* Store a negative size, to indicate ones-extension.  */
+      dst->_mp_size = -size;
+    }
+  else
+    {
+      /* As with infinite precision: two's complement, then one's complement.
+        But that can be simplified using the identity -x = ~(x - 1).
+        So we're going to compute ~~(x - 1) = x - 1!  */
+      size = -size;
+
+      if (dst->_mp_alloc < size)
+       _mpz_realloc (dst, size);
+
+      src_ptr = src->_mp_d;
+      dst_ptr = dst->_mp_d;
+
+      mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+      size -= dst_ptr[size - 1] == 0;
+
+      /* Store a positive size, to indicate zero-extension.  */
+      dst->_mp_size = size;
+    }
+}
diff --git a/mpz/combit.c b/mpz/combit.c

new file mode 100644 (file)

index 0000000..8a0ce3a
--- /dev/null
+++ b/mpz/combit.c
@@ -0,0 +1,80 @@
+/* mpz_combit -- complement a specified bit.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_combit (mpz_ptr d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dsize = ABSIZ(d);
+  mp_ptr dp = LIMBS(d);
+
+  mp_size_t limb_index = bit_index / GMP_NUMB_BITS;
+  mp_limb_t bit = ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+
+  if (limb_index >= dsize)
+    {
+      MPZ_REALLOC(d, limb_index + 1);
+      dp = LIMBS(d);
+
+      MPN_ZERO(dp + dsize, limb_index + 1 - dsize);
+      dsize = limb_index + 1;
+    }
+
+  if (SIZ(d) >= 0)
+    {
+      dp[limb_index] ^= bit;
+      MPN_NORMALIZE (dp, dsize);
+      SIZ(d) = dsize;
+    }
+  else
+    {
+      mp_limb_t x = -dp[limb_index];
+      mp_size_t i;
+
+      /* non-zero limb below us means ones-complement */
+      for (i = limb_index-1; i >= 0; i--)
+       if (dp[i] != 0)
+         {
+           x--;  /* change twos comp to ones comp */
+           break;
+         }
+
+      if (x & bit)
+       {
+         mp_limb_t  c;
+
+         /* Clearing the bit increases the magitude. We might need a carry. */
+         MPZ_REALLOC(d, dsize + 1);
+         dp = LIMBS(d);
+
+         __GMPN_ADD_1 (c, dp+limb_index, dp+limb_index,
+                       dsize - limb_index, bit);
+         dp[dsize] = c;
+         dsize += c;
+       }
+      else
+       /* Setting the bit decreases the magnitude */
+       mpn_sub_1(dp+limb_index, dp+limb_index, dsize + limb_index, bit);
+
+      MPN_NORMALIZE (dp, dsize);
+      SIZ(d) = -dsize;
+    }
+}
diff --git a/mpz/cong.c b/mpz/cong.c

new file mode 100644 (file)

index 0000000..127f5cd
--- /dev/null
+++ b/mpz/cong.c
@@ -0,0 +1,172 @@
+/* mpz_congruent_p -- test congruence of two mpz's.
+
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* For big divisors this code is only very slightly better than the user
+   doing a combination of mpz_sub and mpz_tdiv_r, but it's quite convenient,
+   and perhaps in the future can be improved, in similar ways to
+   mpn_divisible_p perhaps.
+
+   The csize==1 / dsize==1 special case makes mpz_congruent_p as good as
+   mpz_congruent_ui_p on relevant operands, though such a combination
+   probably doesn't occur often.
+
+   Alternatives:
+
+   If c<d then it'd work to just form a%d and compare a and c (either as
+   a==c or a+c==d depending on the signs), but the saving from avoiding the
+   abs(a-c) calculation would be small compared to the division.
+
+   Similarly if both a<d and c<d then it would work to just compare a and c
+   (a==c or a+c==d), but this isn't considered a particularly important case
+   and so isn't done for the moment.
+
+   Low zero limbs on d could be stripped and the corresponding limbs of a
+   and c tested and skipped, but doing so would introduce a borrow when a
+   and c differ in sign and have non-zero skipped limbs.  It doesn't seem
+   worth the complications to do this, since low zero limbs on d should
+   occur only rarely.  */
+
+int
+mpz_congruent_p (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d)
+{
+  mp_size_t  asize, csize, dsize, sign;
+  mp_srcptr  ap, cp, dp;
+  mp_ptr     xp;
+  mp_limb_t  alow, clow, dlow, dmask, r;
+  int        result;
+  TMP_DECL;
+
+  dsize = SIZ(d);
+  if (UNLIKELY (dsize == 0))
+    return (mpz_cmp (a, c) == 0);
+
+  dsize = ABS(dsize);
+  dp = PTR(d);
+
+  if (ABSIZ(a) < ABSIZ(c))
+    MPZ_SRCPTR_SWAP (a, c);
+
+  asize = SIZ(a);
+  csize = SIZ(c);
+  sign = (asize ^ csize);
+
+  asize = ABS(asize);
+  ap = PTR(a);
+
+  if (csize == 0)
+    return mpn_divisible_p (ap, asize, dp, dsize);
+
+  csize = ABS(csize);
+  cp = PTR(c);
+
+  alow = ap[0];
+  clow = cp[0];
+  dlow = dp[0];
+
+  /* Check a==c mod low zero bits of dlow.  This might catch a few cases of
+     a!=c quickly, and it helps the csize==1 special cases below.  */
+  dmask = LOW_ZEROS_MASK (dlow) & GMP_NUMB_MASK;
+  alow = (sign >= 0 ? alow : -alow);
+  if (((alow-clow) & dmask) != 0)
+    return 0;
+
+  if (csize == 1)
+    {
+      if (dsize == 1)
+        {
+        cong_1:
+          if (sign < 0)
+            NEG_MOD (clow, clow, dlow);
+
+          if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+            {
+              r = mpn_mod_1 (ap, asize, dlow);
+              if (clow < dlow)
+                return r == clow;
+              else
+                return r == (clow % dlow);
+            }
+
+          if ((dlow & 1) == 0)
+            {
+              /* Strip low zero bits to get odd d required by modexact.  If
+                 d==e*2^n then a==c mod d if and only if both a==c mod e and
+                 a==c mod 2^n, the latter having been done above.  */
+              unsigned  twos;
+              count_trailing_zeros (twos, dlow);
+              dlow >>= twos;
+            }
+
+          r = mpn_modexact_1c_odd (ap, asize, dlow, clow);
+          return r == 0 || r == dlow;
+        }
+
+      /* dlow==0 is avoided since we don't want to bother handling extra low
+         zero bits if dsecond is even (would involve borrow if a,c differ in
+         sign and alow,clow!=0).  */
+      if (dsize == 2 && dlow != 0)
+        {
+          mp_limb_t  dsecond = dp[1];
+
+          if (dsecond <= dmask)
+            {
+              unsigned   twos;
+              count_trailing_zeros (twos, dlow);
+              dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+              ASSERT_LIMB (dlow);
+
+              /* dlow will be odd here, so the test for it even under cong_1
+                 is unnecessary, but the rest of that code is wanted. */
+              goto cong_1;
+            }
+        }
+    }
+
+  TMP_MARK;
+  xp = TMP_ALLOC_LIMBS (asize+1);
+
+  /* calculate abs(a-c) */
+  if (sign >= 0)
+    {
+      /* same signs, subtract */
+      if (asize > csize || mpn_cmp (ap, cp, asize) >= 0)
+        ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));
+      else
+        ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));
+      MPN_NORMALIZE (xp, asize);
+    }
+  else
+    {
+      /* different signs, add */
+      mp_limb_t  carry;
+      carry = mpn_add (xp, ap, asize, cp, csize);
+      xp[asize] = carry;
+      asize += (carry != 0);
+    }
+
+  result = mpn_divisible_p (xp, asize, dp, dsize);
+
+  TMP_FREE;
+  return result;
+}
diff --git a/mpz/cong_2exp.c b/mpz/cong_2exp.c

new file mode 100644 (file)

index 0000000..bf3ae54
--- /dev/null
+++ b/mpz/cong_2exp.c
@@ -0,0 +1,144 @@
+/* mpz_congruent_2exp_p -- test congruence of mpz mod 2^n.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+mpz_congruent_2exp_p (mpz_srcptr a, mpz_srcptr c, mp_bitcnt_t d) __GMP_NOTHROW
+{
+  mp_size_t      i, dlimbs;
+  unsigned       dbits;
+  mp_ptr         ap, cp;
+  mp_limb_t      dmask, alimb, climb, sum;
+  mp_size_t      asize_signed, csize_signed, asize, csize;
+
+  if (ABSIZ(a) < ABSIZ(c))
+    MPZ_SRCPTR_SWAP (a, c);
+
+  dlimbs = d / GMP_NUMB_BITS;
+  dbits = d % GMP_NUMB_BITS;
+  dmask = (CNST_LIMB(1) << dbits) - 1;
+
+  ap = PTR(a);
+  cp = PTR(c);
+
+  asize_signed = SIZ(a);
+  asize = ABS(asize_signed);
+
+  csize_signed = SIZ(c);
+  csize = ABS(csize_signed);
+
+  if (csize_signed == 0)
+    goto a_zeros;
+
+  if ((asize_signed ^ csize_signed) >= 0)
+    {
+      /* same signs, direct comparison */
+
+      /* a==c for limbs in common */
+      if (mpn_cmp (ap, cp, MIN (csize, dlimbs)) != 0)
+        return 0;
+
+      /* if that's all of dlimbs, then a==c for remaining bits */
+      if (csize > dlimbs)
+        return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;
+
+    a_zeros:
+      /* a remains, need all zero bits */
+
+      /* if d covers all of a and c, then must be exactly equal */
+      if (asize <= dlimbs)
+        return asize == csize;
+
+      /* whole limbs zero */
+      for (i = csize; i < dlimbs; i++)
+        if (ap[i] != 0)
+          return 0;
+
+      /* partial limb zero */
+      return (ap[dlimbs] & dmask) == 0;
+    }
+  else
+    {
+      /* different signs, negated comparison */
+
+      /* common low zero limbs, stopping at first non-zeros, which must
+         match twos complement */
+      i = 0;
+      for (;;)
+        {
+          ASSERT (i < csize);  /* always have a non-zero limb on c */
+          alimb = ap[i];
+          climb = cp[i];
+          sum = (alimb + climb) & GMP_NUMB_MASK;
+
+          if (i >= dlimbs)
+            return (sum & dmask) == 0;
+          i++;
+
+          /* require both zero, or first non-zeros as twos-complements */
+          if (sum != 0)
+            return 0;
+
+          if (alimb != 0)
+            break;
+        }
+
+      /* further limbs matching as ones-complement */
+      for (;;)
+        {
+          if (i >= csize)
+            break;
+
+          alimb = ap[i];
+          climb = cp[i];
+          sum = (alimb + climb + 1) & GMP_NUMB_MASK;
+
+          if (i >= dlimbs)
+            return (sum & dmask) == 0;
+
+          if (sum != 0)
+            return 0;
+
+          i++;
+        }
+
+      /* no more c, so require all 1 bits in a */
+
+      if (asize < dlimbs)
+        return 0;   /* not enough a */
+
+      /* whole limbs */
+      for ( ; i < dlimbs; i++)
+        if (ap[i] != GMP_NUMB_MAX)
+          return 0;
+
+      /* if only whole limbs, no further fetches from a */
+      if (dbits == 0)
+        return 1;
+
+      /* need enough a */
+      if (asize == dlimbs)
+        return 0;
+
+      return ((ap[dlimbs]+1) & dmask) == 0;
+    }
+}
diff --git a/mpz/cong_ui.c b/mpz/cong_ui.c

new file mode 100644 (file)

index 0000000..ee68c10
--- /dev/null
+++ b/mpz/cong_ui.c
@@ -0,0 +1,105 @@
+/* mpz_congruent_ui_p -- test congruence of mpz and ulong.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* There's some explicit checks for c<d since it seems reasonably likely an
+   application might use that in a test.
+
+   Hopefully the compiler can generate something good for r==(c%d), though
+   if modexact is being used exclusively then that's not reached.  */
+
+int
+mpz_congruent_ui_p (mpz_srcptr a, unsigned long cu, unsigned long du)
+{
+  mp_srcptr  ap;
+  mp_size_t  asize;
+  mp_limb_t  c, d, r;
+
+  if (UNLIKELY (du == 0))
+    return (mpz_cmp_ui (a, cu) == 0);
+
+  asize = SIZ(a);
+  if (asize == 0)
+    {
+      if (cu < du)
+        return cu == 0;
+      else
+        return (cu % du) == 0;
+    }
+
+  /* For nails don't try to be clever if c or d is bigger than a limb, just
+     fake up some mpz_t's and go to the main mpz_congruent_p.  */
+  if (du > GMP_NUMB_MAX || cu > GMP_NUMB_MAX)
+    {
+      mp_limb_t  climbs[2], dlimbs[2];
+      mpz_t      cz, dz;
+
+      ALLOC(cz) = 2;
+      PTR(cz) = climbs;
+      ALLOC(dz) = 2;
+      PTR(dz) = dlimbs;
+
+      mpz_set_ui (cz, cu);
+      mpz_set_ui (dz, du);
+      return mpz_congruent_p (a, cz, dz);
+    }
+
+  /* NEG_MOD works on limbs, so convert ulong to limb */
+  c = cu;
+  d = du;
+
+  if (asize < 0)
+    {
+      asize = -asize;
+      NEG_MOD (c, c, d);
+    }
+
+  ap = PTR (a);
+
+  if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+    {
+      r = mpn_mod_1 (ap, asize, d);
+      if (c < d)
+        return r == c;
+      else
+        return r == (c % d);
+    }
+
+  if ((d & 1) == 0)
+    {
+      /* Strip low zero bits to get odd d required by modexact.  If
+         d==e*2^n then a==c mod d if and only if both a==c mod 2^n
+         and a==c mod e.  */
+
+      unsigned  twos;
+
+      if ((ap[0]-c) & LOW_ZEROS_MASK (d))
+        return 0;
+
+      count_trailing_zeros (twos, d);
+      d >>= twos;
+    }
+
+  r = mpn_modexact_1c_odd (ap, asize, d, c);
+  return r == 0 || r == d;
+}
diff --git a/mpz/dive_ui.c b/mpz/dive_ui.c

new file mode 100644 (file)

index 0000000..53709aa
--- /dev/null
+++ b/mpz/dive_ui.c
@@ -0,0 +1,59 @@
+/* mpz_divexact_ui -- exact division mpz by ulong.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_divexact_ui (mpz_ptr dst, mpz_srcptr src, unsigned long divisor)
+{
+  mp_size_t  size, abs_size;
+  mp_ptr     dst_ptr;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  /* For nails don't try to be clever if d is bigger than a limb, just fake
+     up an mpz_t and go to the main mpz_divexact.  */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t  dlimbs[2];
+      mpz_t      dz;
+      ALLOC(dz) = 2;
+      PTR(dz) = dlimbs;
+      mpz_set_ui (dz, divisor);
+      mpz_divexact (dst, src, dz);
+      return;
+    }
+
+  size = SIZ(src);
+  if (size == 0)
+    {
+      SIZ(dst) = 0;
+      return;
+    }
+  abs_size = ABS (size);
+
+  MPZ_REALLOC (dst, abs_size);
+  dst_ptr = PTR(dst);
+
+  MPN_DIVREM_OR_DIVEXACT_1 (dst_ptr, PTR(src), abs_size, (mp_limb_t) divisor);
+  abs_size -= (dst_ptr[abs_size-1] == 0);
+  SIZ(dst) = (size >= 0 ? abs_size : -abs_size);
+}
diff --git a/mpz/divegcd.c b/mpz/divegcd.c

new file mode 100644 (file)

index 0000000..e4bf431
--- /dev/null
+++ b/mpz/divegcd.c
@@ -0,0 +1,110 @@
+/* mpz_divexact_gcd -- exact division optimized for GCDs.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Set q to a/d, expecting d to be from a GCD and therefore usually small.
+
+   The distribution of GCDs of random numbers can be found in Knuth volume 2
+   section 4.5.2 theorem D.
+
+            GCD     chance
+             1       60.8%
+            2^k      20.2%     (1<=k<32)
+           3*2^k      9.0%     (1<=k<32)
+           other     10.1%
+
+   Only the low limb is examined for optimizations, since GCDs bigger than
+   2^32 (or 2^64) will occur very infrequently.
+
+   Future: This could change to an mpn_divexact_gcd, possibly partly
+   inlined, if/when the relevant mpq functions change to an mpn based
+   implementation.  */
+
+
+static void
+mpz_divexact_by3 (mpz_ptr q, mpz_srcptr a)
+{
+  mp_size_t  size = SIZ(a);
+  if (size == 0)
+    {
+      SIZ(q) = 0;
+      return;
+    }
+  else
+    {
+      mp_size_t  abs_size = ABS(size);
+      mp_ptr     qp;
+
+      MPZ_REALLOC (q, abs_size);
+
+      qp = PTR(q);
+      mpn_divexact_by3 (qp, PTR(a), abs_size);
+
+      abs_size -= (qp[abs_size-1] == 0);
+      SIZ(q) = (size>0 ? abs_size : -abs_size);
+    }
+}
+
+void
+mpz_divexact_gcd (mpz_ptr q, mpz_srcptr a, mpz_srcptr d)
+{
+  ASSERT (mpz_sgn (d) > 0);
+
+  if (SIZ(d) == 1)
+    {
+      mp_limb_t  dl = PTR(d)[0];
+      int        twos;
+
+      if (dl == 1)
+        {
+          if (q != a)
+            mpz_set (q, a);
+          return;
+        }
+      if (dl == 3)
+        {
+          mpz_divexact_by3 (q, a);
+          return;
+        }
+
+      count_trailing_zeros (twos, dl);
+      dl >>= twos;
+
+      if (dl == 1)
+        {
+          mpz_tdiv_q_2exp (q, a, twos);
+          return;
+        }
+      if (dl == 3)
+        {
+          mpz_tdiv_q_2exp (q, a, twos);
+          mpz_divexact_by3 (q, q);
+          return;
+        }
+    }
+
+  mpz_divexact (q, a, d);
+}
diff --git a/mpz/divexact.c b/mpz/divexact.c

new file mode 100644 (file)

index 0000000..95ba311
--- /dev/null
+++ b/mpz/divexact.c
@@ -0,0 +1,81 @@
+/* mpz_divexact -- finds quotient when known that quot * den == num && den != 0.
+
+Contributed to the GNU project by Niels Möller.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2005,
+2006, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_ptr qp;
+  mp_size_t qn;
+  mp_srcptr np, dp;
+  mp_size_t nn, dn;
+  TMP_DECL;
+
+#if WANT_ASSERT
+  {
+    mpz_t  rem;
+    mpz_init (rem);
+    mpz_tdiv_r (rem, num, den);
+    ASSERT (SIZ(rem) == 0);
+    mpz_clear (rem);
+  }
+#endif
+
+  nn = ABSIZ (num);
+  dn = ABSIZ (den);
+
+  qn = nn - dn + 1;
+  MPZ_REALLOC (quot, qn);
+
+  if (nn < dn)
+    {
+      /* This special case avoids segfaults below when the function is
+        incorrectly called with |N| < |D|, N != 0.  It also handles the
+        well-defined case N = 0.  */
+      SIZ(quot) = 0;
+      return;
+    }
+
+  TMP_MARK;
+
+  qp = PTR(quot);
+
+  if (quot == num || quot == den)
+    qp = TMP_ALLOC_LIMBS (qn);
+
+  np = PTR(num);
+  dp = PTR(den);
+
+  mpn_divexact (qp, np, nn, dp, dn);
+  MPN_NORMALIZE (qp, qn);
+
+  SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;
+
+  if (qp != PTR(quot))
+    MPN_COPY (PTR(quot), qp, qn);
+
+  TMP_FREE;
+}
diff --git a/mpz/divis.c b/mpz/divis.c

new file mode 100644 (file)

index 0000000..78ac38f
--- /dev/null
+++ b/mpz/divis.c
@@ -0,0 +1,33 @@
+/* mpz_divisible_p -- mpz by mpz divisibility test
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpz_divisible_p (mpz_srcptr a, mpz_srcptr d)
+{
+  mp_size_t dsize = SIZ(d);
+  mp_size_t asize = SIZ(a);
+
+  if (UNLIKELY (dsize == 0))
+    return (asize == 0);
+
+  return mpn_divisible_p (PTR(a), ABS(asize), PTR(d), ABS(dsize));
+}
diff --git a/mpz/divis_2exp.c b/mpz/divis_2exp.c

new file mode 100644 (file)

index 0000000..814037f
--- /dev/null
+++ b/mpz/divis_2exp.c
@@ -0,0 +1,50 @@
+/* mpz_divisible_2exp_p -- mpz by 2^n divisibility test
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+mpz_divisible_2exp_p (mpz_srcptr a, mp_bitcnt_t d) __GMP_NOTHROW
+{
+  mp_size_t      i, dlimbs;
+  unsigned       dbits;
+  mp_ptr         ap;
+  mp_limb_t      dmask;
+  mp_size_t      asize;
+
+  asize = ABSIZ(a);
+  dlimbs = d / GMP_NUMB_BITS;
+
+  /* if d covers the whole of a, then only a==0 is divisible */
+  if (asize <= dlimbs)
+    return asize == 0;
+
+  /* whole limbs must be zero */
+  ap = PTR(a);
+  for (i = 0; i < dlimbs; i++)
+    if (ap[i] != 0)
+      return 0;
+
+  /* left over bits must be zero */
+  dbits = d % GMP_NUMB_BITS;
+  dmask = (CNST_LIMB(1) << dbits) - 1;
+  return (ap[dlimbs] & dmask) == 0;
+}
diff --git a/mpz/divis_ui.c b/mpz/divis_ui.c

new file mode 100644 (file)

index 0000000..69dc21c
--- /dev/null
+++ b/mpz/divis_ui.c
@@ -0,0 +1,70 @@
+/* mpz_divisible_ui_p -- mpz by ulong divisibility test.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_divisible_ui_p (mpz_srcptr a, unsigned long d)
+{
+  mp_size_t  asize;
+  mp_ptr     ap;
+  unsigned   twos;
+
+  asize = SIZ(a);
+  if (UNLIKELY (d == 0))
+    return (asize == 0);
+
+  if (asize == 0)  /* 0 divisible by any d */
+    return 1;
+
+  /* For nails don't try to be clever if d is bigger than a limb, just fake
+     up an mpz_t and go to the main mpz_divisible_p.  */
+  if (d > GMP_NUMB_MAX)
+    {
+      mp_limb_t  dlimbs[2];
+      mpz_t      dz;
+      ALLOC(dz) = 2;
+      PTR(dz) = dlimbs;
+      mpz_set_ui (dz, d);
+      return mpz_divisible_p (a, dz);
+    }
+
+  ap = PTR(a);
+  asize = ABS(asize);  /* ignore sign of a */
+
+  if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+    return mpn_mod_1 (ap, asize, (mp_limb_t) d) == 0;
+
+  if (! (d & 1))
+    {
+      /* Strip low zero bits to get odd d required by modexact.  If d==e*2^n
+         and a is divisible by 2^n and by e, then it's divisible by d. */
+
+      if ((ap[0] & LOW_ZEROS_MASK (d)) != 0)
+        return 0;
+
+      count_trailing_zeros (twos, (mp_limb_t) d);
+      d >>= twos;
+    }
+
+  return mpn_modexact_1_odd (ap, asize, (mp_limb_t) d) == 0;
+}
diff --git a/mpz/dump.c b/mpz/dump.c

new file mode 100644 (file)

index 0000000..c5656a7
--- /dev/null
+++ b/mpz/dump.c
@@ -0,0 +1,38 @@
+/* mpz_dump - Dump an integer to stdout.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS NOT SAFE TO
+   CALL THIS FUNCTION DIRECTLY.  IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+   FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <string.h> /* for strlen */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_dump (mpz_srcptr u)
+{
+  char *str;
+
+  str = mpz_get_str (0, 10, u);
+  printf ("%s\n", str);
+  (*__gmp_free_func) (str, strlen (str) + 1);
+}
diff --git a/mpz/export.c b/mpz/export.c

new file mode 100644 (file)

index 0000000..484b9d9
--- /dev/null
+++ b/mpz/export.c
@@ -0,0 +1,192 @@
+/* mpz_export -- create word data from mpz.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>  /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define HOST_ENDIAN     1
+#endif
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HOST_ENDIAN     (-1)
+#endif
+#ifndef HOST_ENDIAN
+static const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;
+#define HOST_ENDIAN     (* (signed char *) &endian_test)
+#endif
+
+
+#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)                \
+  do {                                                                  \
+    int            __cnt;                                               \
+    unsigned long  __totbits;                                           \
+    ASSERT ((size) > 0);                                                \
+    ASSERT ((ptr)[(size)-1] != 0);                                      \
+    count_leading_zeros (__cnt, (ptr)[(size)-1]);                       \
+    __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);       \
+    (result) = (__totbits + (base2exp)-1) / (base2exp);                 \
+  } while (0)
+
+
+void *
+mpz_export (void *data, size_t *countp, int order,
+            size_t size, int endian, size_t nail, mpz_srcptr z)
+{
+  mp_size_t      zsize;
+  mp_srcptr      zp;
+  size_t         count, dummy;
+  unsigned long  numb;
+  unsigned       align;
+
+  ASSERT (order == 1 || order == -1);
+  ASSERT (endian == 1 || endian == 0 || endian == -1);
+  ASSERT (nail <= 8*size);
+  ASSERT (8*size-nail > 0);
+
+  if (countp == NULL)
+    countp = &dummy;
+
+  zsize = SIZ(z);
+  if (zsize == 0)
+    {
+      *countp = 0;
+      return data;
+    }
+
+  zsize = ABS (zsize);
+  zp = PTR(z);
+  numb = 8*size - nail;
+  MPN_SIZEINBASE_2EXP (count, zp, zsize, numb);
+  *countp = count;
+
+  if (data == NULL)
+    data = (*__gmp_allocate_func) (count*size);
+
+  if (endian == 0)
+    endian = HOST_ENDIAN;
+
+  align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);
+
+  if (nail == GMP_NAIL_BITS)
+    {
+      if (size == sizeof (mp_limb_t) && align == 0)
+        {
+          if (order == -1 && endian == HOST_ENDIAN)
+            {
+              MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);
+              return data;
+            }
+          if (order == 1 && endian == HOST_ENDIAN)
+            {
+              MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+              return data;
+            }
+
+          if (order == -1 && endian == -HOST_ENDIAN)
+            {
+              MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);
+              return data;
+            }
+          if (order == 1 && endian == -HOST_ENDIAN)
+            {
+              MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+              return data;
+            }
+        }
+    }
+
+  {
+    mp_limb_t      limb, wbitsmask;
+    size_t         i, numb;
+    mp_size_t      j, wbytes, woffset;
+    unsigned char  *dp;
+    int            lbits, wbits;
+    mp_srcptr      zend;
+
+    numb = size * 8 - nail;
+
+    /* whole bytes per word */
+    wbytes = numb / 8;
+
+    /* possible partial byte */
+    wbits = numb % 8;
+    wbitsmask = (CNST_LIMB(1) << wbits) - 1;
+
+    /* offset to get to the next word */
+    woffset = (endian >= 0 ? size : - (mp_size_t) size)
+      + (order < 0 ? size : - (mp_size_t) size);
+
+    /* least significant byte */
+    dp = (unsigned char *) data
+      + (order >= 0 ? (count-1)*size : 0) + (endian >= 0 ? size-1 : 0);
+
+#define EXTRACT(N, MASK)                                \
+    do {                                                \
+      if (lbits >= (N))                                 \
+        {                                               \
+          *dp = limb MASK;                              \
+          limb >>= (N);                                 \
+          lbits -= (N);                                 \
+        }                                               \
+      else                                              \
+        {                                               \
+          mp_limb_t  newlimb;                           \
+          newlimb = (zp == zend ? 0 : *zp++);           \
+          *dp = (limb | (newlimb << lbits)) MASK;       \
+          limb = newlimb >> ((N)-lbits);                \
+          lbits += GMP_NUMB_BITS - (N);                 \
+        }                                               \
+    } while (0)
+
+    zend = zp + zsize;
+    lbits = 0;
+    limb = 0;
+    for (i = 0; i < count; i++)
+      {
+        for (j = 0; j < wbytes; j++)
+          {
+            EXTRACT (8, + 0);
+            dp -= endian;
+          }
+        if (wbits != 0)
+          {
+            EXTRACT (wbits, & wbitsmask);
+            dp -= endian;
+            j++;
+          }
+        for ( ; j < size; j++)
+          {
+            *dp = '\0';
+            dp -= endian;
+          }
+        dp += woffset;
+      }
+
+    ASSERT (zp == PTR(z) + ABSIZ(z));
+
+    /* low byte of word after most significant */
+    ASSERT (dp == (unsigned char *) data
+            + (order < 0 ? count*size : - (mp_size_t) size)
+            + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+  }
+  return data;
+}
diff --git a/mpz/fac_ui.c b/mpz/fac_ui.c

new file mode 100644 (file)

index 0000000..7e394fc
--- /dev/null
+++ b/mpz/fac_ui.c
@@ -0,0 +1,396 @@
+/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "fac_ui.h"
+
+
+static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
+static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
+
+
+/* must be >=2 */
+#define APCONST        5
+
+/* for single non-zero limb */
+#define MPZ_SET_1_NZ(z,n)      \
+  do {                         \
+    mpz_ptr  __z = (z);                \
+    ASSERT ((n) != 0);         \
+    PTR(__z)[0] = (n);         \
+    SIZ(__z) = 1;              \
+  } while (0)
+
+/* for src>0 and n>0 */
+#define MPZ_MUL_1_POS(dst,src,n)                       \
+  do {                                                 \
+    mpz_ptr    __dst = (dst);                          \
+    mpz_srcptr __src = (src);                          \
+    mp_size_t  __size = SIZ(__src);                    \
+    mp_ptr     __dst_p;                                        \
+    mp_limb_t  __c;                                    \
+                                                       \
+    ASSERT (__size > 0);                               \
+    ASSERT ((n) != 0);                                 \
+                                                       \
+    MPZ_REALLOC (__dst, __size+1);                     \
+    __dst_p = PTR(__dst);                              \
+                                                       \
+    __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n);  \
+    __dst_p[__size] = __c;                             \
+    SIZ(__dst) = __size + (__c != 0);                  \
+  } while (0)
+
+
+#if BITS_PER_ULONG == GMP_LIMB_BITS
+#define BSWAP_ULONG(x,y)       BSWAP_LIMB(x,y)
+#endif
+
+/* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
+   by a shift down to get the high part.  But it provoked incorrect code
+   from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode.  This
+   case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
+   can get that directly muxing a 4-byte ulong if it matters enough.  */
+
+#if ! defined (BSWAP_ULONG)
+#define BSWAP_ULONG(dst, src)                                          \
+  do {                                                                 \
+    unsigned long  __bswapl_src = (src);                               \
+    unsigned long  __bswapl_dst = 0;                                   \
+    int               __i;                                                     \
+    for (__i = 0; __i < sizeof(unsigned long); __i++)                  \
+      {                                                                        \
+       __bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF);     \
+       __bswapl_src >>= 8;                                             \
+      }                                                                        \
+    (dst) = __bswapl_dst;                                              \
+  } while (0)
+#endif
+
+/* x is bit reverse of y */
+/* Note the divides below are all exact */
+#define BITREV_ULONG(x,y)                                                 \
+  do {                                                                    \
+   unsigned long __dst;                                                           \
+   BSWAP_ULONG(__dst,y);                                                  \
+   __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
+   __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4)  ); \
+   __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2)  ); \
+   (x) = __dst;                                                                   \
+  } while(0)
+/* above could be improved if cpu has a nibble/bit swap/muxing instruction */
+/* above code is serialized, possible to write as a big parallel expression */
+
+
+
+void
+mpz_fac_ui (mpz_ptr x, unsigned long n)
+{
+  unsigned long z, stt;
+  int i, j;
+  mpz_t t1, st[8 * sizeof (unsigned long) + 1 - APCONST];
+  mp_limb_t d[4];
+
+  static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
+
+  if (n < numberof (table))
+    {
+      MPZ_SET_1_NZ (x, table[n]);
+      return;
+    }
+
+  /*  NOTE : MUST have n>=3 here */
+  ASSERT (n >= 3);
+  /* for estimating the alloc sizes the calculation of these formula's is not
+     exact and also the formulas are only approximations, also we ignore
+     the few "side" calculations, correct allocation seems to speed up the
+     small sizes better, having very little effect on the large sizes */
+
+  /* estimate space for stack entries see below
+     number of bits for n! is
+     (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
+     2.325748065-n*1.442695041+(n+0.5)*log_2(n)  */
+  umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) FAC2OVERE);
+  /* d[1] is 2n/e, d[0] ignored        */
+  count_leading_zeros (z, d[1]);
+  z = GMP_LIMB_BITS - z - 1;   /* z=floor(log_2(2n/e))   */
+  umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) z);
+  /* d=n*floor(log_2(2n/e))   */
+  d[0] = (d[0] >> 2) | (d[1] << (GMP_LIMB_BITS - 2));
+  d[1] >>= 2;
+  /* d=n*floor(log_2(2n/e))/4   */
+  z = d[0] + 1;                        /* have to ignore any overflow */
+  /* so z is the number of bits wanted for st[0]    */
+
+
+  if (n <= ((unsigned long) 1) << (APCONST))
+    {
+      mpz_realloc2 (x, 4 * z);
+      ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n - 1, 4L);
+      return;
+    }
+  if (n <= ((unsigned long) 1) << (APCONST + 1))
+    {                          /*  use n!=odd(1,n)*(n/2)!*2^(n/2)         */
+      mpz_init2 (t1, 2 * z);
+      mpz_realloc2 (x, 4 * z);
+      ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n / 2 - 1, 4L);
+      ap_product_small (t1, CNST_LIMB(3), CNST_LIMB(2), (n - 1) / 2, 4L);
+      mpz_mul (x, x, t1);
+      mpz_clear (t1);
+      mpz_mul_2exp (x, x, n / 2);
+      return;
+    }
+  if (n <= ((unsigned long) 1) << (APCONST + 2))
+    {
+      /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
+        so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
+      mpz_init2 (t1, 2 * z);
+      mpz_realloc2 (x, 4 * z);
+      for (i = 0; i < 4; i++)
+       {
+         mpz_init2 (st[i], z);
+         z >>= 1;
+       }
+      odd_product (1, n / 2, st);
+      mpz_set (x, st[0]);
+      odd_product (n / 2, n, st);
+      mpz_mul (x, x, x);
+      ASSERT (n / 4 <= FACMUL4 + 6);
+      ap_product_small (t1, CNST_LIMB(2), CNST_LIMB(1), n / 4 - 1, 4L);
+      /* must have 2^APCONST odd numbers max */
+      mpz_mul (t1, t1, st[0]);
+      for (i = 0; i < 4; i++)
+       mpz_clear (st[i]);
+      mpz_mul (x, x, t1);
+      mpz_clear (t1);
+      mpz_mul_2exp (x, x, n / 2 + n / 4);
+      return;
+    }
+
+  count_leading_zeros (stt, (mp_limb_t) n);
+  stt = GMP_LIMB_BITS - stt + 1 - APCONST;
+
+  for (i = 0; i < (signed long) stt; i++)
+    {
+      mpz_init2 (st[i], z);
+      z >>= 1;
+    }
+
+  count_leading_zeros (z, (mp_limb_t) (n / 3));
+  /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
+  z = GMP_LIMB_BITS - z;
+
+  /*
+     n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
+     where 2^e || n!   3.2^z>n   C_2(a,b)=PRODUCT of odd z such that a<z<=b
+   */
+
+
+  mpz_init_set_ui (t1, 1);
+  for (j = 8 * sizeof (unsigned long) / 2; j != 0; j >>= 1)
+    {
+      MPZ_SET_1_NZ (x, 1);
+      for (i = 8 * sizeof (unsigned long) - j; i >= j; i -= 2 * j)
+       if ((signed long) z >= i)
+         {
+           odd_product (n >> i, n >> (i - 1), st);
+           /* largest odd product when j=i=1 then we have
+              odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
+              so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
+              number of bits is n*log_base2(2n/e)/4+1  */
+           if (i != j)
+             mpz_pow_ui (st[0], st[0], i / j);
+           mpz_mul (x, x, st[0]);
+         }
+      if ((signed long) z >= j && j != 1)
+       {
+         mpz_mul (t1, t1, x);
+         mpz_mul (t1, t1, t1);
+       }
+    }
+  for (i = 0; i < (signed long) stt; i++)
+    mpz_clear (st[i]);
+  mpz_mul (x, x, t1);
+  mpz_clear (t1);
+  popc_limb (i, (mp_limb_t) n);
+  mpz_mul_2exp (x, x, n - i);
+  return;
+}
+
+/* start,step are mp_limb_t although they will fit in unsigned long    */
+static void
+ap_product_small (mpz_t ret, mp_limb_t start, mp_limb_t step,
+                 unsigned long count, unsigned long nm)
+{
+  unsigned long a;
+  mp_limb_t b;
+
+  ASSERT (count <= (((unsigned long) 1) << APCONST));
+/* count can never be zero ? check this and remove test below */
+  if (count == 0)
+    {
+      MPZ_SET_1_NZ (ret, 1);
+      return;
+    }
+  if (count == 1)
+    {
+      MPZ_SET_1_NZ (ret, start);
+      return;
+    }
+  switch (nm)
+    {
+    case 1:
+      MPZ_SET_1_NZ (ret, start);
+      b = start + step;
+      for (a = 0; a < count - 1; b += step, a++)
+       MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    case 2:
+      MPZ_SET_1_NZ (ret, start * (start + step));
+      if (count == 2)
+       return;
+      for (b = start + 2 * step, a = count / 2 - 1; a != 0;
+          a--, b += 2 * step)
+       MPZ_MUL_1_POS (ret, ret, b * (b + step));
+      if (count % 2 == 1)
+       MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    case 3:
+      if (count == 2)
+       {
+         MPZ_SET_1_NZ (ret, start * (start + step));
+         return;
+       }
+      MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
+      if (count == 3)
+       return;
+      for (b = start + 3 * step, a = count / 3 - 1; a != 0;
+          a--, b += 3 * step)
+       MPZ_MUL_1_POS (ret, ret, b * (b + step) * (b + 2 * step));
+      if (count % 3 == 2)
+       b = b * (b + step);
+      if (count % 3 != 0)
+       MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    default:                   /* ie nm=4      */
+      if (count == 2)
+       {
+         MPZ_SET_1_NZ (ret, start * (start + step));
+         return;
+       }
+      if (count == 3)
+       {
+         MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
+         return;
+       }
+      MPZ_SET_1_NZ (ret,
+                   start * (start + step) * (start + 2 * step) * (start +
+                                                                  3 * step));
+      if (count == 4)
+       return;
+      for (b = start + 4 * step, a = count / 4 - 1; a != 0;
+          a--, b += 4 * step)
+       MPZ_MUL_1_POS (ret, ret,
+                      b * (b + step) * (b + 2 * step) * (b + 3 * step));
+      if (count % 4 == 2)
+       b = b * (b + step);
+      if (count % 4 == 3)
+       b = b * (b + step) * (b + 2 * step);
+      if (count % 4 != 0)
+       MPZ_MUL_1_POS (ret, ret, b);
+      return;
+    }
+}
+
+/* return value in st[0]
+   odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
+   so st[0] needs enough bits for above, st[1] needs half these bits and
+   st[2] needs 1/4 of these bits etc   */
+static void
+odd_product (unsigned long low, unsigned long high, mpz_t * st)
+{
+  unsigned long stc = 1, stn = 0, n, y, mask, a, nm = 1;
+  signed long z;
+
+  low++;
+  if (low % 2 == 0)
+    low++;
+  if (high == 0)
+    high = 1;
+  if (high % 2 == 0)
+    high--;
+/* must have high>=low ? check this and remove test below */
+  if (high < low)
+    {
+      MPZ_SET_1_NZ (st[0], 1);
+      return;
+    }
+  if (high == low)
+    {
+      MPZ_SET_1_NZ (st[0], low);
+      return;
+    }
+  if (high <= FACMUL2 + 2)
+    {
+      nm = 2;
+      if (high <= FACMUL3 + 4)
+       {
+         nm = 3;
+         if (high <= FACMUL4 + 6)
+           nm = 4;
+       }
+    }
+  high = (high - low) / 2 + 1; /* high is now count,high<=2^(BITS_PER_ULONG-1) */
+  if (high <= (((unsigned long) 1) << APCONST))
+    {
+      ap_product_small (st[0], (mp_limb_t) low, CNST_LIMB(2), high, nm);
+      return;
+    }
+  count_leading_zeros (n, (mp_limb_t) high);
+/* assumes clz above is LIMB based not NUMB based */
+  n = GMP_LIMB_BITS - n - APCONST;
+  mask = (((unsigned long) 1) << n);
+  a = mask << 1;
+  mask--;
+/* have 2^(BITS_IN_N-APCONST) iterations so need
+   (BITS_IN_N-APCONST+1) stack entries */
+  for (z = mask; z >= 0; z--)
+    {
+      BITREV_ULONG (y, z);
+      y >>= (BITS_PER_ULONG - n);
+      ap_product_small (st[stn],
+                       (mp_limb_t) (low + 2 * ((~y) & mask)), (mp_limb_t) a,
+                       (high + y) >> n, nm);
+      ASSERT (((high + y) >> n) <= (((unsigned long) 1) << APCONST));
+      stn++;
+      y = stc++;
+      while ((y & 1) == 0)
+       {
+         mpz_mul (st[stn - 2], st[stn - 2], st[stn - 1]);
+         stn--;
+         y >>= 1;
+       }
+    }
+  ASSERT (stn == 1);
+  return;
+}
diff --git a/mpz/fdiv_q.c b/mpz/fdiv_q.c

new file mode 100644 (file)

index 0000000..6b4c2c4
--- /dev/null
+++ b/mpz/fdiv_q.c
@@ -0,0 +1,42 @@
+/* mpz_fdiv_q -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t dividend_size = dividend->_mp_size;
+  mp_size_t divisor_size = divisor->_mp_size;
+  mpz_t rem;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0)
+    mpz_sub_ui (quot, quot, 1L);
+
+  TMP_FREE;
+}
diff --git a/mpz/fdiv_q_ui.c b/mpz/fdiv_q_ui.c

new file mode 100644 (file)

index 0000000..9554185
--- /dev/null
+++ b/mpz/fdiv_q_ui.c
@@ -0,0 +1,91 @@
+/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  MPZ_REALLOC (quot, nn);
+  qp = PTR(quot);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         qp[0] = 0;
+         rl = np[0];
+         qn = 1;               /* a white lie, fixed below */
+       }
+      else
+       {
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+         qn = nn - 2 + 1;
+       }
+
+      if (rl != 0 && ns < 0)
+       {
+         mpn_incr_u (qp, (mp_limb_t) 1);
+         rl = divisor - rl;
+       }
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+
+      if (rl != 0 && ns < 0)
+       {
+         mpn_incr_u (qp, (mp_limb_t) 1);
+         rl = divisor - rl;
+       }
+
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}
diff --git a/mpz/fdiv_qr.c b/mpz/fdiv_qr.c

new file mode 100644 (file)

index 0000000..0230db1
--- /dev/null
+++ b/mpz/fdiv_qr.c
@@ -0,0 +1,54 @@
+/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t xsize;
+  mpz_t temp_divisor;          /* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the quotient and
+     remainder have been preliminary calculated.  We have to copy it to
+     temporary space if it's the same variable as either QUOT or REM.  */
+  if (quot == divisor || rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  xsize = dividend->_mp_size ^ divisor_size;;
+  mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+  if (xsize < 0 && rem->_mp_size != 0)
+    {
+      mpz_sub_ui (quot, quot, 1L);
+      mpz_add (rem, rem, divisor);
+    }
+
+  TMP_FREE;
+}
diff --git a/mpz/fdiv_qr_ui.c b/mpz/fdiv_qr_ui.c

new file mode 100644 (file)

index 0000000..7c41fc8
--- /dev/null
+++ b/mpz/fdiv_qr_ui.c
@@ -0,0 +1,108 @@
+/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  MPZ_REALLOC (quot, nn);
+  qp = PTR(quot);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp;
+      mp_size_t rn;
+
+      MPZ_REALLOC (rem, 2);
+      rp = PTR(rem);
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         qp[0] = 0;
+         qn = 1;               /* a white lie, fixed below */
+         rl = np[0];
+         rp[0] = rl;
+       }
+      else
+       {
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+         qn = nn - 2 + 1;
+       }
+
+      if (rl != 0 && ns < 0)
+       {
+         mpn_incr_u (qp, (mp_limb_t) 1);
+         rl = divisor - rl;
+         rp[0] = rl & GMP_NUMB_MASK;
+         rp[1] = rl >> GMP_NUMB_BITS;
+       }
+
+      qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       SIZ(rem) = 0;
+      else
+       {
+         if (ns < 0)
+           {
+             mpn_incr_u (qp, (mp_limb_t) 1);
+             rl = divisor - rl;
+           }
+
+         PTR(rem)[0] = rl;
+         SIZ(rem) = rl != 0;
+       }
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}
diff --git a/mpz/fdiv_r.c b/mpz/fdiv_r.c

new file mode 100644 (file)

index 0000000..56bcf4c
--- /dev/null
+++ b/mpz/fdiv_r.c
@@ -0,0 +1,49 @@
+/* mpz_fdiv_r -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = divisor->_mp_size;
+  mpz_t temp_divisor;          /* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the remainder has been
+     preliminary calculated.  We have to copy it to temporary space if it's
+     the same variable as REM.  */
+  if (rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  mpz_tdiv_r (rem, dividend, divisor);
+
+  if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0)
+    mpz_add (rem, rem, divisor);
+
+  TMP_FREE;
+}
diff --git a/mpz/fdiv_r_ui.c b/mpz/fdiv_r_ui.c

new file mode 100644 (file)

index 0000000..d16e432
--- /dev/null
+++ b/mpz/fdiv_r_ui.c
@@ -0,0 +1,98 @@
+/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp, qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      MPZ_REALLOC (rem, 2);
+      rp = PTR(rem);
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         rl = np[0];
+         rp[0] = rl;
+       }
+      else
+       {
+         TMP_MARK;
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         TMP_FREE;
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+       }
+
+      if (rl != 0 && ns < 0)
+       {
+         rl = divisor - rl;
+         rp[0] = rl & GMP_NUMB_MASK;
+         rp[1] = rl >> GMP_NUMB_BITS;
+       }
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       SIZ(rem) = 0;
+      else
+       {
+         if (ns < 0)
+           rl = divisor - rl;
+
+         PTR(rem)[0] = rl;
+         SIZ(rem) = 1;
+       }
+    }
+
+  return rl;
+}
diff --git a/mpz/fdiv_ui.c b/mpz/fdiv_ui.c

new file mode 100644 (file)

index 0000000..566b6e7
--- /dev/null
+++ b/mpz/fdiv_ui.c
@@ -0,0 +1,90 @@
+/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.
+   The remainder gets the same sign as the denominator.
+
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_fdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+      mp_ptr qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         rl = np[0];
+         rp[0] = rl;
+       }
+      else
+       {
+         TMP_MARK;
+         dp[0] = divisor & GMP_NUMB_MASK;
+         dp[1] = divisor >> GMP_NUMB_BITS;
+         qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+         mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+         TMP_FREE;
+         rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+       }
+
+      if (rl != 0 && ns < 0)
+       {
+         rl = divisor - rl;
+         rp[0] = rl & GMP_NUMB_MASK;
+         rp[1] = rl >> GMP_NUMB_BITS;
+       }
+
+      rn = 1 + (rl > GMP_NUMB_MAX);  rn -= (rp[rn - 1] == 0);
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       ;
+      else
+       {
+         if (ns < 0)
+           rl = divisor - rl;
+       }
+    }
+
+  return rl;
+}
diff --git a/mpz/fib2_ui.c b/mpz/fib2_ui.c

new file mode 100644 (file)

index 0000000..8521136
--- /dev/null
+++ b/mpz/fib2_ui.c
@@ -0,0 +1,41 @@
+/* mpz_fib2_ui -- calculate Fibonacci numbers.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpz_fib2_ui (mpz_ptr fn, mpz_ptr fnsub1, unsigned long n)
+{
+  mp_ptr     fp, f1p;
+  mp_size_t  size;
+
+  size = MPN_FIB2_SIZE (n);
+  MPZ_REALLOC (fn,     size);
+  MPZ_REALLOC (fnsub1, size);
+  fp = PTR (fn);
+  f1p = PTR (fnsub1);
+
+  size = mpn_fib2_ui (fp, f1p, n);
+
+  SIZ(fn)     = size - (n == 0);
+  SIZ(fnsub1) = size - (f1p[size-1] == 0);
+}
diff --git a/mpz/fib_ui.c b/mpz/fib_ui.c

new file mode 100644 (file)

index 0000000..8c13a8f
--- /dev/null
+++ b/mpz/fib_ui.c
@@ -0,0 +1,142 @@
+/* mpz_fib_ui -- calculate Fibonacci numbers.
+
+Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* change to "#define TRACE(x) x" to get some traces */
+#define TRACE(x)
+
+
+/* In the F[2k+1] below for k odd, the -2 won't give a borrow from the low
+   limb because the result F[2k+1] is an F[4m+3] and such numbers are always
+   == 1, 2 or 5 mod 8, whereas an underflow would leave 6 or 7.  (This is
+   the same as in mpn_fib2_ui.)
+
+   In the F[2k+1] for k even, the +2 won't give a carry out of the low limb
+   in normal circumstances.  This is an F[4m+1] and we claim that F[3*2^b+1]
+   == 1 mod 2^b is the first F[4m+1] congruent to 0 or 1 mod 2^b, and hence
+   if n < 2^GMP_NUMB_BITS then F[n] cannot have a low limb of 0 or 1.  No
+   proof for this claim, but it's been verified up to b==32 and has such a
+   nice pattern it must be true :-).  Of interest is that F[3*2^b] == 0 mod
+   2^(b+1) seems to hold too.
+
+   When n >= 2^GMP_NUMB_BITS, which can arise in a nails build, then the low
+   limb of F[4m+1] can certainly be 1, and an mpn_add_1 must be used.  */
+
+void
+mpz_fib_ui (mpz_ptr fn, unsigned long n)
+{
+  mp_ptr         fp, xp, yp;
+  mp_size_t      size, xalloc;
+  unsigned long  n2;
+  mp_limb_t      c, c2;
+  TMP_DECL;
+
+  if (n <= FIB_TABLE_LIMIT)
+    {
+      PTR(fn)[0] = FIB_TABLE (n);
+      SIZ(fn) = (n != 0);      /* F[0]==0, others are !=0 */
+      return;
+    }
+
+  n2 = n/2;
+  xalloc = MPN_FIB2_SIZE (n2) + 1;
+  MPZ_REALLOC (fn, 2*xalloc+1);
+  fp = PTR (fn);
+
+  TMP_MARK;
+  TMP_ALLOC_LIMBS_2 (xp,xalloc, yp,xalloc);
+  size = mpn_fib2_ui (xp, yp, n2);
+
+  TRACE (printf ("mpz_fib_ui last step n=%lu size=%ld bit=%lu\n",
+                 n >> 1, size, n&1);
+         mpn_trace ("xp", xp, size);
+         mpn_trace ("yp", yp, size));
+
+  if (n & 1)
+    {
+      /* F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k  */
+      mp_size_t  xsize, ysize;
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+      xp[size] = mpn_lshift (xp, xp, size, 1);
+      yp[size] = 0;
+      ASSERT_NOCARRY (mpn_add_n_sub_n (xp, yp, xp, yp, size+1));
+      xsize = size + (xp[size] != 0);
+      ysize = size + (yp[size] != 0);
+#else
+      c2 = mpn_lshift (fp, xp, size, 1);
+      c = c2 + mpn_add_n (xp, fp, yp, size);
+      xp[size] = c;
+      xsize = size + (c != 0);
+      c2 -= mpn_sub_n (yp, fp, yp, size);
+      yp[size] = c2;
+      ASSERT (c2 <= 1);
+      ysize = size + c2;
+#endif
+
+      size = xsize + ysize;
+      c = mpn_mul (fp, xp, xsize, yp, ysize);
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+      /* no overflow, see comments above */
+      ASSERT (n & 2 ? fp[0] >= 2 : fp[0] <= GMP_NUMB_MAX-2);
+      fp[0] += (n & 2 ? -CNST_LIMB(2) : CNST_LIMB(2));
+#else
+      if (n & 2)
+        {
+          ASSERT (fp[0] >= 2);
+          fp[0] -= 2;
+        }
+      else
+        {
+          ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */
+          c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));
+          fp[size-1] = c;
+        }
+#endif
+    }
+  else
+    {
+      /* F[2k] = F[k]*(F[k]+2F[k-1]) */
+
+      mp_size_t  xsize, ysize;
+      c = mpn_lshift (yp, yp, size, 1);
+      c += mpn_add_n (yp, yp, xp, size);
+      yp[size] = c;
+      xsize = size;
+      ysize = size + (c != 0);
+      size += ysize;
+      c = mpn_mul (fp, yp, ysize, xp, xsize);
+    }
+
+  /* one or two high zeros */
+  size -= (c == 0);
+  size -= (fp[size-1] == 0);
+  SIZ(fn) = size;
+
+  TRACE (printf ("done special, size=%ld\n", size);
+         mpn_trace ("fp ", fp, size));
+
+  TMP_FREE;
+}
diff --git a/mpz/fits_s.h b/mpz/fits_s.h

new file mode 100644 (file)

index 0000000..d690c08
--- /dev/null
+++ b/mpz/fits_s.h
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t z) -- test whether z fits signed type X.
+
+Copyright 1997, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+FUNCTION (mpz_srcptr z) __GMP_NOTHROW
+{
+  mp_size_t n = SIZ(z);
+  mp_ptr p = PTR(z);
+  mp_limb_t limb = p[0];
+
+  if (n == 0)
+    return 1;
+  if (n == 1)
+    return limb <= MAXIMUM;
+  if (n == -1)
+    return limb <= - (mp_limb_t) MINIMUM;
+#if GMP_NAIL_BITS != 0
+  {
+    if ((p[1] >> GMP_NAIL_BITS) == 0)
+      {
+       limb += p[1] << GMP_NUMB_BITS;
+       if (n == 2)
+         return limb <= MAXIMUM;
+       if (n == -2)
+         return limb <= - (mp_limb_t) MINIMUM;
+      }
+  }
+#endif
+  return 0;
+}
diff --git a/mpz/fits_sint.c b/mpz/fits_sint.c

new file mode 100644 (file)

index 0000000..6730b6c
--- /dev/null
+++ b/mpz/fits_sint.c
@@ -0,0 +1,25 @@
+/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits a int.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpz_fits_sint_p
+#define MAXIMUM   INT_MAX
+#define MINIMUM   INT_MIN
+
+#include "fits_s.h"
diff --git a/mpz/fits_slong.c b/mpz/fits_slong.c

new file mode 100644 (file)

index 0000000..e9c88e1
--- /dev/null
+++ b/mpz/fits_slong.c
@@ -0,0 +1,25 @@
+/* int mpz_fits_slong_p (mpz_t z) -- test whether z fits a long.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpz_fits_slong_p
+#define MAXIMUM   LONG_MAX
+#define MINIMUM   LONG_MIN
+
+#include "fits_s.h"
diff --git a/mpz/fits_sshort.c b/mpz/fits_sshort.c

new file mode 100644 (file)

index 0000000..f973c41
--- /dev/null
+++ b/mpz/fits_sshort.c
@@ -0,0 +1,25 @@
+/* int mpz_fits_sshort_p (mpz_t z) -- test whether z fits a short.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define FUNCTION  mpz_fits_sshort_p
+#define MAXIMUM   SHRT_MAX
+#define MINIMUM   SHRT_MIN
+
+#include "fits_s.h"
diff --git a/mpz/fits_uint.c b/mpz/fits_uint.c

new file mode 100644 (file)

index 0000000..c2b95e5
--- /dev/null
+++ b/mpz/fits_uint.c
@@ -0,0 +1,23 @@
+/* mpz_fits_uint_p -- test whether z fits an unsigned int.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_fits_uint_p 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/fits_ulong.c b/mpz/fits_ulong.c

new file mode 100644 (file)

index 0000000..71013e0
--- /dev/null
+++ b/mpz/fits_ulong.c
@@ -0,0 +1,23 @@
+/* mpz_fits_ulong_p -- test whether z fits an unsigned long.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_fits_ulong_p 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/fits_ushort.c b/mpz/fits_ushort.c

new file mode 100644 (file)

index 0000000..47d37cd
--- /dev/null
+++ b/mpz/fits_ushort.c
@@ -0,0 +1,23 @@
+/* mpz_fits_ushort_p -- test whether z fits an unsigned short.
+
+Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_fits_ushort_p 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/gcd.c b/mpz/gcd.c

new file mode 100644 (file)

index 0000000..1878769
--- /dev/null
+++ b/mpz/gcd.c
@@ -0,0 +1,163 @@
+/* mpz/gcd.c:   Calculate the greatest common divisor of two integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+void
+#ifndef BERKELEY_MP
+mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
+#else /* BERKELEY_MP */
+gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
+#endif /* BERKELEY_MP */
+{
+  unsigned long int g_zero_bits, u_zero_bits, v_zero_bits;
+  mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
+  mp_ptr tp;
+  mp_ptr up = u->_mp_d;
+  mp_size_t usize = ABS (u->_mp_size);
+  mp_ptr vp = v->_mp_d;
+  mp_size_t vsize = ABS (v->_mp_size);
+  mp_size_t gsize;
+  TMP_DECL;
+
+  /* GCD(0, V) == V.  */
+  if (usize == 0)
+    {
+      g->_mp_size = vsize;
+      if (g == v)
+       return;
+      if (g->_mp_alloc < vsize)
+       _mpz_realloc (g, vsize);
+      MPN_COPY (g->_mp_d, vp, vsize);
+      return;
+    }
+
+  /* GCD(U, 0) == U.  */
+  if (vsize == 0)
+    {
+      g->_mp_size = usize;
+      if (g == u)
+       return;
+      if (g->_mp_alloc < usize)
+       _mpz_realloc (g, usize);
+      MPN_COPY (g->_mp_d, up, usize);
+      return;
+    }
+
+  if (usize == 1)
+    {
+      g->_mp_size = 1;
+      g->_mp_d[0] = mpn_gcd_1 (vp, vsize, up[0]);
+      return;
+    }
+
+  if (vsize == 1)
+    {
+      g->_mp_size = 1;
+      g->_mp_d[0] = mpn_gcd_1 (up, usize, vp[0]);
+      return;
+    }
+
+  TMP_MARK;
+
+  /*  Eliminate low zero bits from U and V and move to temporary storage.  */
+  while (*up == 0)
+    up++;
+  u_zero_limbs = up - u->_mp_d;
+  usize -= u_zero_limbs;
+  count_trailing_zeros (u_zero_bits, *up);
+  tp = up;
+  up = TMP_ALLOC_LIMBS (usize);
+  if (u_zero_bits != 0)
+    {
+      mpn_rshift (up, tp, usize, u_zero_bits);
+      usize -= up[usize - 1] == 0;
+    }
+  else
+    MPN_COPY (up, tp, usize);
+
+  while (*vp == 0)
+    vp++;
+  v_zero_limbs = vp - v->_mp_d;
+  vsize -= v_zero_limbs;
+  count_trailing_zeros (v_zero_bits, *vp);
+  tp = vp;
+  vp = TMP_ALLOC_LIMBS (vsize);
+  if (v_zero_bits != 0)
+    {
+      mpn_rshift (vp, tp, vsize, v_zero_bits);
+      vsize -= vp[vsize - 1] == 0;
+    }
+  else
+    MPN_COPY (vp, tp, vsize);
+
+  if (u_zero_limbs > v_zero_limbs)
+    {
+      g_zero_limbs = v_zero_limbs;
+      g_zero_bits = v_zero_bits;
+    }
+  else if (u_zero_limbs < v_zero_limbs)
+    {
+      g_zero_limbs = u_zero_limbs;
+      g_zero_bits = u_zero_bits;
+    }
+  else  /*  Equal.  */
+    {
+      g_zero_limbs = u_zero_limbs;
+      g_zero_bits = MIN (u_zero_bits, v_zero_bits);
+    }
+
+  /*  Call mpn_gcd.  The 2nd argument must not have more bits than the 1st.  */
+  vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
+    ? mpn_gcd (vp, vp, vsize, up, usize)
+    : mpn_gcd (vp, up, usize, vp, vsize);
+
+  /*  Here G <-- V << (g_zero_limbs*GMP_LIMB_BITS + g_zero_bits).  */
+  gsize = vsize + g_zero_limbs;
+  if (g_zero_bits != 0)
+    {
+      mp_limb_t cy_limb;
+      gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;
+      if (g->_mp_alloc < gsize)
+       _mpz_realloc (g, gsize);
+      MPN_ZERO (g->_mp_d, g_zero_limbs);
+
+      tp = g->_mp_d + g_zero_limbs;
+      cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
+      if (cy_limb != 0)
+       tp[vsize] = cy_limb;
+    }
+  else
+    {
+      if (g->_mp_alloc < gsize)
+       _mpz_realloc (g, gsize);
+      MPN_ZERO (g->_mp_d, g_zero_limbs);
+      MPN_COPY (g->_mp_d + g_zero_limbs, vp, vsize);
+    }
+
+  g->_mp_size = gsize;
+  TMP_FREE;
+}
diff --git a/mpz/gcd_ui.c b/mpz/gcd_ui.c

new file mode 100644 (file)

index 0000000..d1a7cec
--- /dev/null
+++ b/mpz/gcd_ui.c
@@ -0,0 +1,75 @@
+/* mpz_gcd_ui -- Calculate the greatest common divisor of two integers.
+
+Copyright 1994, 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_gcd_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+{
+  mp_size_t un;
+  mp_limb_t res;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpz_t vz;
+      mp_limb_t vlimbs[2];
+      vlimbs[0] = v & GMP_NUMB_MASK;
+      vlimbs[1] = v >> GMP_NUMB_BITS;
+      PTR(vz) = vlimbs;
+      SIZ(vz) = 2;
+      mpz_gcd (w, u, vz);
+      /* because v!=0 we will have w<=v hence fitting a ulong */
+      ASSERT (mpz_fits_ulong_p (w));
+      return mpz_get_ui (w);
+    }
+#endif
+
+  un = ABSIZ(u);
+
+  if (un == 0)
+    res = v;
+  else if (v == 0)
+    {
+      if (w != NULL)
+       {
+         if (u != w)
+           {
+             MPZ_REALLOC (w, un);
+             MPN_COPY (PTR(w), PTR(u), un);
+           }
+         SIZ(w) = un;
+       }
+      /* Return u if it fits a ulong, otherwise 0. */
+      res = PTR(u)[0];
+      return (un == 1 && res <= ULONG_MAX ? res : 0);
+    }
+  else
+    res = mpn_gcd_1 (PTR(u), un, (mp_limb_t) v);
+
+  if (w != NULL)
+    {
+      PTR(w)[0] = res;
+      SIZ(w) = res != 0;
+    }
+  return res;
+}
diff --git a/mpz/gcdext.c b/mpz/gcdext.c

new file mode 100644 (file)

index 0000000..2419e2f
--- /dev/null
+++ b/mpz/gcdext.c
@@ -0,0 +1,126 @@
+/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that
+   g = as + bt.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)
+{
+  mp_size_t asize, bsize, usize, vsize;
+  mp_srcptr ap, bp;
+  mp_ptr up, vp;
+  mp_size_t gsize, ssize, tmp_ssize;
+  mp_ptr gp, sp, tmp_gp, tmp_sp;
+  mpz_srcptr u, v;
+  mpz_ptr ss, tt;
+  __mpz_struct stmp, gtmp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* mpn_gcdext requires that U >= V.  Therefore, we often have to swap U and
+     V.  This in turn leads to a lot of complications.  The computed cofactor
+     will be the wrong one, so we have to fix that up at the end.  */
+
+  asize = ABS (SIZ (a));
+  bsize = ABS (SIZ (b));
+  ap = PTR (a);
+  bp = PTR (b);
+  if (asize > bsize || (asize == bsize && mpn_cmp (ap, bp, asize) > 0))
+    {
+      usize = asize;
+      vsize = bsize;
+      up = TMP_ALLOC_LIMBS (usize + 1);
+      vp = TMP_ALLOC_LIMBS (vsize + 1);
+      MPN_COPY (up, ap, usize);
+      MPN_COPY (vp, bp, vsize);
+      u = a;
+      v = b;
+      ss = s;
+      tt = t;
+    }
+  else
+    {
+      usize = bsize;
+      vsize = asize;
+      up = TMP_ALLOC_LIMBS (usize + 1);
+      vp = TMP_ALLOC_LIMBS (vsize + 1);
+      MPN_COPY (up, bp, usize);
+      MPN_COPY (vp, ap, vsize);
+      u = b;
+      v = a;
+      ss = t;
+      tt = s;
+    }
+
+  tmp_gp = TMP_ALLOC_LIMBS (usize + 1);
+  tmp_sp = TMP_ALLOC_LIMBS (usize + 1);
+
+  if (vsize == 0)
+    {
+      tmp_sp[0] = 1;
+      tmp_ssize = 1;
+      MPN_COPY (tmp_gp, up, usize);
+      gsize = usize;
+    }
+  else
+    gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, up, usize, vp, vsize);
+  ssize = ABS (tmp_ssize);
+
+  PTR (&gtmp) = tmp_gp;
+  SIZ (&gtmp) = gsize;
+
+  PTR (&stmp) = tmp_sp;
+  SIZ (&stmp) = (tmp_ssize ^ SIZ (u)) >= 0 ? ssize : -ssize;
+
+  if (tt != NULL)
+    {
+      if (SIZ (v) == 0)
+       SIZ (tt) = 0;
+      else
+       {
+         mpz_t x;
+         MPZ_TMP_INIT (x, ssize + usize + 1);
+         mpz_mul (x, &stmp, u);
+         mpz_sub (x, &gtmp, x);
+         mpz_tdiv_q (tt, x, v);
+       }
+    }
+
+  if (ss != NULL)
+    {
+      if (ALLOC (ss) < ssize)
+       _mpz_realloc (ss, ssize);
+      sp = PTR (ss);
+      MPN_COPY (sp, tmp_sp, ssize);
+      SIZ (ss) = SIZ (&stmp);
+    }
+
+  if (ALLOC (g) < gsize)
+    _mpz_realloc (g, gsize);
+  gp = PTR (g);
+  MPN_COPY (gp, tmp_gp, gsize);
+  SIZ (g) = gsize;
+
+  TMP_FREE;
+}
diff --git a/mpz/get_d.c b/mpz/get_d.c

new file mode 100644 (file)

index 0000000..5643e21
--- /dev/null
+++ b/mpz/get_d.c
@@ -0,0 +1,33 @@
+/* double mpz_get_d (mpz_t src) -- Return the double approximation to SRC.
+
+Copyright 1996, 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+double
+mpz_get_d (mpz_srcptr z)
+{
+  mp_size_t size;
+
+  size = SIZ (z);
+  if (UNLIKELY (size == 0))
+    return 0.0;
+
+  return mpn_get_d (PTR (z), ABS (size), size, 0L);
+}
diff --git a/mpz/get_d_2exp.c b/mpz/get_d_2exp.c

new file mode 100644 (file)

index 0000000..c3cf60c
--- /dev/null
+++ b/mpz/get_d_2exp.c
@@ -0,0 +1,45 @@
+/* double mpz_get_d_2exp (signed long int *exp, mpz_t src).
+
+Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+double
+mpz_get_d_2exp (signed long int *exp2, mpz_srcptr src)
+{
+  mp_size_t size, abs_size;
+  mp_srcptr ptr;
+  int cnt;
+  long exp;
+
+  size = SIZ(src);
+  if (UNLIKELY (size == 0))
+    {
+      *exp2 = 0;
+      return 0.0;
+    }
+
+  ptr = PTR(src);
+  abs_size = ABS(size);
+  count_leading_zeros (cnt, ptr[abs_size - 1]);
+  exp = abs_size * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);
+  *exp2 = exp;
+  return mpn_get_d (ptr, abs_size, size, -exp);
+}
diff --git a/mpz/get_si.c b/mpz/get_si.c

new file mode 100644 (file)

index 0000000..2f8a473
--- /dev/null
+++ b/mpz/get_si.c
@@ -0,0 +1,43 @@
+/* mpz_get_si(integer) -- Return the least significant digit from INTEGER.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2006 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+signed long int
+mpz_get_si (mpz_srcptr z) __GMP_NOTHROW
+{
+  mp_ptr zp = z->_mp_d;
+  mp_size_t size = z->_mp_size;
+  mp_limb_t zl = zp[0];
+
+#if GMP_NAIL_BITS != 0
+  if (ULONG_MAX > GMP_NUMB_MAX && ABS (size) >= 2)
+    zl |= zp[1] << GMP_NUMB_BITS;
+#endif
+
+  if (size > 0)
+    return zl & LONG_MAX;
+  else if (size < 0)
+    /* This expression is necessary to properly handle 0x80000000 */
+    return -1 - (long) ((zl - 1) & LONG_MAX);
+  else
+    return 0;
+}
diff --git a/mpz/get_str.c b/mpz/get_str.c

new file mode 100644 (file)

index 0000000..cce5193
--- /dev/null
+++ b/mpz/get_str.c
@@ -0,0 +1,115 @@
+/* mpz_get_str (string, base, mp_src) -- Convert the multiple precision
+   number MP_SRC to a string STRING of base BASE.  If STRING is NULL
+   allocate space for the result.  In any case, return a pointer to the
+   result.  If STRING is not NULL, the caller must ensure enough space is
+   available to store the result.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <string.h> /* for strlen */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+char *
+mpz_get_str (char *res_str, int base, mpz_srcptr x)
+{
+  mp_ptr xp;
+  mp_size_t x_size = x->_mp_size;
+  char *str;
+  char *return_str;
+  size_t str_size;
+  size_t alloc_size = 0;
+  char *num_to_text;
+  int i;
+  TMP_DECL;
+
+  if (base >= 0)
+    {
+      num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+      if (base == 0)
+       base = 10;
+      else if (base > 36)
+       {
+         num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+         if (base > 62)
+           return NULL;
+       }
+    }
+  else
+    {
+      base = -base;
+      num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    }
+
+  /* allocate string for the user if necessary */
+  if (res_str == NULL)
+    {
+      /* digits, null terminator, possible minus sign */
+      MPN_SIZEINBASE (alloc_size, PTR(x), ABS(x_size), base);
+      alloc_size += 1 + (x_size<0);
+      res_str = (char *) (*__gmp_allocate_func) (alloc_size);
+    }
+  return_str = res_str;
+
+  if (x_size < 0)
+    {
+      *res_str++ = '-';
+      x_size = -x_size;
+    }
+
+  /* mpn_get_str clobbers its input on non power-of-2 bases */
+  TMP_MARK;
+  xp = x->_mp_d;
+  if (! POW2_P (base))
+    {
+      xp = TMP_ALLOC_LIMBS (x_size + 1);  /* +1 in case x_size==0 */
+      MPN_COPY (xp, x->_mp_d, x_size);
+    }
+
+  str_size = mpn_get_str ((unsigned char *) res_str, base, xp, x_size);
+  ASSERT (alloc_size == 0 || str_size <= alloc_size - (SIZ(x) < 0));
+
+  /* might have a leading zero, skip it */
+  str = res_str;
+  if (*res_str == 0 && str_size != 1)
+    {
+      str_size--;
+      str++;
+      ASSERT (*str != 0);  /* at most one leading zero */
+    }
+
+  /* Convert result to printable chars, and move down if there was a leading
+     zero.  */
+  for (i = 0; i < str_size; i++)
+    res_str[i] = num_to_text[(int) str[i]];
+  res_str[str_size] = 0;
+
+  TMP_FREE;
+
+  /* if allocated then resize down to the actual space required */
+  if (alloc_size != 0)
+    {
+      size_t  actual_size = str_size + 1 + (res_str - return_str);
+      ASSERT (actual_size == strlen (return_str) + 1);
+      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (return_str, alloc_size, actual_size,
+                                        char);
+    }
+  return return_str;
+}
diff --git a/mpz/get_ui.c b/mpz/get_ui.c

new file mode 100644 (file)

index 0000000..a94af63
--- /dev/null
+++ b/mpz/get_ui.c
@@ -0,0 +1,23 @@
+/* mpz_get_ui(integer) -- Return the least significant digit from INTEGER.
+
+Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_get_ui 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/getlimbn.c b/mpz/getlimbn.c

new file mode 100644 (file)

index 0000000..dc7771e
--- /dev/null
+++ b/mpz/getlimbn.c
@@ -0,0 +1,23 @@
+/* mpz_getlimbn(integer,n) -- Return the N:th limb from INTEGER.
+
+Copyright 1993, 1994, 1995, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_getlimbn 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/hamdist.c b/mpz/hamdist.c

new file mode 100644 (file)

index 0000000..5c62e0b
--- /dev/null
+++ b/mpz/hamdist.c
@@ -0,0 +1,165 @@
+/* mpz_hamdist -- calculate hamming distance.
+
+Copyright 1994, 1996, 2001, 2002, 2009, 2010, 2011 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_bitcnt_t
+mpz_hamdist (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
+{
+  mp_srcptr      up, vp;
+  mp_size_t      usize, vsize;
+  mp_bitcnt_t    count;
+
+  usize = SIZ(u);
+  vsize = SIZ(v);
+
+  up = PTR(u);
+  vp = PTR(v);
+
+  if (usize >= 0)
+    {
+      if (vsize < 0)
+       return ~ (mp_bitcnt_t) 0;
+
+      /* positive/positive */
+
+      if (usize < vsize)
+       MPN_SRCPTR_SWAP (up,usize, vp,vsize);
+
+      count = 0;
+      if (vsize != 0)
+       count = mpn_hamdist (up, vp, vsize);
+
+      usize -= vsize;
+      if (usize != 0)
+       count += mpn_popcount (up + vsize, usize);
+
+      return count;
+    }
+  else
+    {
+      mp_limb_t  ulimb, vlimb;
+      mp_size_t  old_vsize, step;
+
+      if (vsize >= 0)
+       return ~ (mp_bitcnt_t) 0;
+
+      /* negative/negative */
+
+      usize = -usize;
+      vsize = -vsize;
+
+      /* skip common low zeros */
+      for (;;)
+       {
+         ASSERT (usize > 0);
+         ASSERT (vsize > 0);
+
+         usize--;
+         vsize--;
+
+         ulimb = *up++;
+         vlimb = *vp++;
+
+         if (ulimb != 0)
+           break;
+
+         if (vlimb != 0)
+           {
+             MPN_SRCPTR_SWAP (up,usize, vp,vsize);
+             ulimb = vlimb;
+             vlimb = 0;
+             break;
+           }
+       }
+
+      /* twos complement first non-zero limbs (ulimb is non-zero, but vlimb
+        might be zero) */
+      ulimb = -ulimb;
+      vlimb = -vlimb;
+      popc_limb (count, (ulimb ^ vlimb) & GMP_NUMB_MASK);
+
+      if (vlimb == 0)
+       {
+         mp_bitcnt_t  twoscount;
+
+         /* first non-zero of v */
+         old_vsize = vsize;
+         do
+           {
+             ASSERT (vsize > 0);
+             vsize--;
+             vlimb = *vp++;
+           }
+         while (vlimb == 0);
+
+         /* part of u corresponding to skipped v zeros */
+         step = old_vsize - vsize - 1;
+         count += step * GMP_NUMB_BITS;
+         step = MIN (step, usize);
+         if (step != 0)
+           {
+             count -= mpn_popcount (up, step);
+             usize -= step;
+             up += step;
+           }
+
+         /* First non-zero vlimb as twos complement, xor with ones
+            complement ulimb.  Note -v^(~0^u) == (v-1)^u. */
+         vlimb--;
+         if (usize != 0)
+           {
+             usize--;
+             vlimb ^= *up++;
+           }
+         popc_limb (twoscount, vlimb);
+         count += twoscount;
+       }
+
+      /* Overlapping part of u and v, if any.  Ones complement both, so just
+        plain hamdist. */
+      step = MIN (usize, vsize);
+      if (step != 0)
+       {
+         count += mpn_hamdist (up, vp, step);
+         usize -= step;
+         vsize -= step;
+         up += step;
+         vp += step;
+       }
+
+      /* Remaining high part of u or v, if any, ones complement but xor
+        against all ones in the other, so plain popcount. */
+      if (usize != 0)
+       {
+       remaining:
+         count += mpn_popcount (up, usize);
+       }
+      else if (vsize != 0)
+       {
+         up = vp;
+         usize = vsize;
+         goto remaining;
+       }
+      return count;
+    }
+}
diff --git a/mpz/import.c b/mpz/import.c

new file mode 100644 (file)

index 0000000..17e3d58
--- /dev/null
+++ b/mpz/import.c
@@ -0,0 +1,170 @@
+/* mpz_import -- set mpz from word data.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define HOST_ENDIAN     1
+#endif
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HOST_ENDIAN     (-1)
+#endif
+#ifndef HOST_ENDIAN
+static const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;
+#define HOST_ENDIAN     (* (signed char *) &endian_test)
+#endif
+
+
+void
+mpz_import (mpz_ptr z, size_t count, int order,
+            size_t size, int endian, size_t nail, const void *data)
+{
+  mp_size_t  zsize;
+  mp_ptr     zp;
+
+  ASSERT (order == 1 || order == -1);
+  ASSERT (endian == 1 || endian == 0 || endian == -1);
+  ASSERT (nail <= 8*size);
+
+  zsize = (count * (8*size - nail) + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;
+  MPZ_REALLOC (z, zsize);
+  zp = PTR(z);
+
+  if (endian == 0)
+    endian = HOST_ENDIAN;
+
+  /* Can't use these special cases with nails currently, since they don't
+     mask out the nail bits in the input data.  */
+  if (nail == 0 && GMP_NAIL_BITS == 0)
+    {
+      unsigned  align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);
+
+      if (order == -1
+          && size == sizeof (mp_limb_t)
+          && endian == HOST_ENDIAN
+          && align == 0)
+        {
+          MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);
+          goto done;
+        }
+
+      if (order == -1
+          && size == sizeof (mp_limb_t)
+          && endian == - HOST_ENDIAN
+          && align == 0)
+        {
+          MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);
+          goto done;
+        }
+
+      if (order == 1
+          && size == sizeof (mp_limb_t)
+          && endian == HOST_ENDIAN
+          && align == 0)
+        {
+          MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
+          goto done;
+        }
+    }
+
+  {
+    mp_limb_t      limb, byte, wbitsmask;
+    size_t         i, j, numb, wbytes;
+    mp_size_t      woffset;
+    unsigned char  *dp;
+    int            lbits, wbits;
+
+    numb = size * 8 - nail;
+
+    /* whole bytes to process */
+    wbytes = numb / 8;
+
+    /* partial byte to process */
+    wbits = numb % 8;
+    wbitsmask = (CNST_LIMB(1) << wbits) - 1;
+
+    /* offset to get to the next word after processing wbytes and wbits */
+    woffset = (numb + 7) / 8;
+    woffset = (endian >= 0 ? woffset : -woffset)
+      + (order < 0 ? size : - (mp_size_t) size);
+
+    /* least significant byte */
+    dp = (unsigned char *) data
+      + (order >= 0 ? (count-1)*size : 0) + (endian >= 0 ? size-1 : 0);
+
+#define ACCUMULATE(N)                                   \
+    do {                                                \
+      ASSERT (lbits < GMP_NUMB_BITS);                   \
+      ASSERT (limb <= (CNST_LIMB(1) << lbits) - 1);     \
+                                                        \
+      limb |= (mp_limb_t) byte << lbits;                \
+      lbits += (N);                                     \
+      if (lbits >= GMP_NUMB_BITS)                       \
+        {                                               \
+          *zp++ = limb & GMP_NUMB_MASK;                 \
+          lbits -= GMP_NUMB_BITS;                       \
+          ASSERT (lbits < (N));                         \
+          limb = byte >> ((N) - lbits);                 \
+        }                                               \
+    } while (0)
+
+    limb = 0;
+    lbits = 0;
+    for (i = 0; i < count; i++)
+      {
+        for (j = 0; j < wbytes; j++)
+          {
+            byte = *dp;
+            dp -= endian;
+            ACCUMULATE (8);
+          }
+        if (wbits != 0)
+          {
+            byte = *dp & wbitsmask;
+            dp -= endian;
+            ACCUMULATE (wbits);
+          }
+        dp += woffset;
+      }
+
+    if (lbits != 0)
+      {
+        ASSERT (lbits <= GMP_NUMB_BITS);
+        ASSERT_LIMB (limb);
+        *zp++ = limb;
+      }
+
+    ASSERT (zp == PTR(z) + zsize);
+
+    /* low byte of word after most significant */
+    ASSERT (dp == (unsigned char *) data
+            + (order < 0 ? count*size : - (mp_size_t) size)
+            + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+
+  }
+
+ done:
+  zp = PTR(z);
+  MPN_NORMALIZE (zp, zsize);
+  SIZ(z) = zsize;
+}
diff --git a/mpz/init.c b/mpz/init.c

new file mode 100644 (file)

index 0000000..5fc0ed9
--- /dev/null
+++ b/mpz/init.c
@@ -0,0 +1,35 @@
+/* mpz_init() -- Make a new multiple precision number with value 0.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_init (mpz_ptr x)
+{
+  x->_mp_alloc = 1;
+  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  x->_mp_size = 0;
+
+#ifdef __CHECKER__
+  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
+  x->_mp_d[0] = 0;
+#endif
+}
diff --git a/mpz/init2.c b/mpz/init2.c

new file mode 100644 (file)

index 0000000..a516b0a
--- /dev/null
+++ b/mpz/init2.c
@@ -0,0 +1,50 @@
+/* mpz_init2 -- initialize mpz, with requested size in bits.
+
+Copyright 2001, 2002, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_init2 (mpz_ptr x, mp_bitcnt_t bits)
+{
+  mp_size_t  new_alloc;
+
+  bits -= (bits != 0);         /* Round down, except if 0 */
+  new_alloc = 1 + bits / GMP_NUMB_BITS;
+
+  if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
+    {
+      if (UNLIKELY (new_alloc > INT_MAX))
+       {
+         fprintf (stderr, "gmp: overflow in mpz type\n");
+         abort ();
+       }
+    }
+
+  PTR(x) = __GMP_ALLOCATE_FUNC_LIMBS (new_alloc);
+  ALLOC(x) = new_alloc;
+  SIZ(x) = 0;
+
+#ifdef __CHECKER__
+  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
+  PTR(x)[0] = 0;
+#endif
+}
diff --git a/mpz/inits.c b/mpz/inits.c

new file mode 100644 (file)

index 0000000..9ca0e8c
--- /dev/null
+++ b/mpz/inits.c
@@ -0,0 +1,56 @@
+/* mpz_inits() -- Initialize multiple mpz_t variables and set them to 0.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+mpz_inits (mpz_ptr x, ...)
+#else
+mpz_inits (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+
+#if HAVE_STDARG
+  va_start (ap, x);
+#else
+  mpz_ptr x;
+  va_start (ap);
+  x = va_arg (ap, mpz_ptr);
+#endif
+
+  while (x != NULL)
+    {
+      mpz_init (x);
+      x = va_arg (ap, mpz_ptr);
+    }
+  va_end (ap);
+}
diff --git a/mpz/inp_raw.c b/mpz/inp_raw.c

new file mode 100644 (file)

index 0000000..497207e
--- /dev/null
+++ b/mpz/inp_raw.c
@@ -0,0 +1,163 @@
+/* mpz_inp_raw -- read an mpz_t in raw format.
+
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* NTOH_LIMB_FETCH fetches a limb which is in network byte order (ie. big
+   endian) and produces a normal host byte order result. */
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define NTOH_LIMB_FETCH(limb, src)  do { (limb) = *(src); } while (0)
+#endif
+
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define NTOH_LIMB_FETCH(limb, src)  BSWAP_LIMB_FETCH (limb, src)
+#endif
+
+#ifndef NTOH_LIMB_FETCH
+#define NTOH_LIMB_FETCH(limb, src)                              \
+  do {                                                          \
+    const unsigned char  *__p = (const unsigned char *) (src);  \
+    mp_limb_t  __limb;                                          \
+    int        __i;                                             \
+    __limb = 0;                                                 \
+    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)               \
+      __limb = (__limb << 8) | __p[__i];                        \
+    (limb) = __limb;                                            \
+  } while (0)
+#endif
+
+
+/* Enhancement: The byte swap loop ought to be safe to vectorize on Cray
+   etc, but someone who knows what they're doing needs to check it.  */
+
+size_t
+mpz_inp_raw (mpz_ptr x, FILE *fp)
+{
+  unsigned char  csize_bytes[4];
+  mp_size_t      csize, abs_xsize, i;
+  size_t         abs_csize;
+  char           *cp;
+  mp_ptr         xp, sp, ep;
+  mp_limb_t      slimb, elimb;
+
+  if (fp == 0)
+    fp = stdin;
+
+  /* 4 bytes for size */
+  if (fread (csize_bytes, sizeof (csize_bytes), 1, fp) != 1)
+    return 0;
+
+  csize =
+    (  (mp_size_t) csize_bytes[0] << 24)
+    + ((mp_size_t) csize_bytes[1] << 16)
+    + ((mp_size_t) csize_bytes[2] << 8)
+    + ((mp_size_t) csize_bytes[3]);
+
+  /* Sign extend if necessary.
+     Could write "csize -= ((csize & 0x80000000L) << 1)", but that tickles a
+     bug in gcc 3.0 for powerpc64 on AIX.  */
+  if (sizeof (csize) > 4 && csize & 0x80000000L)
+    csize -= 0x80000000L << 1;
+
+  abs_csize = ABS (csize);
+
+  /* round up to a multiple of limbs */
+  abs_xsize = (abs_csize*8 + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;
+
+  if (abs_xsize != 0)
+    {
+      MPZ_REALLOC (x, abs_xsize);
+      xp = PTR(x);
+
+      /* Get limb boundaries right in the read, for the benefit of the
+         non-nails case.  */
+      xp[0] = 0;
+      cp = (char *) (xp + abs_xsize) - abs_csize;
+      if (fread (cp, abs_csize, 1, fp) != 1)
+        return 0;
+
+      if (GMP_NAIL_BITS == 0)
+        {
+          /* Reverse limbs to least significant first, and byte swap.  If
+             abs_xsize is odd then on the last iteration elimb and slimb are
+             the same.  It doesn't seem extra code to handle that case
+             separately, to save an NTOH.  */
+          sp = xp;
+          ep = xp + abs_xsize-1;
+          for (i = 0; i < (abs_xsize+1)/2; i++)
+            {
+              NTOH_LIMB_FETCH (elimb, ep);
+              NTOH_LIMB_FETCH (slimb, sp);
+              *sp++ = elimb;
+              *ep-- = slimb;
+            }
+        }
+      else
+        {
+          /* It ought to be possible to do the transformation in-place, but
+             for now it's easier to use an extra temporary area.  */
+          mp_limb_t  byte, limb;
+          int        bits;
+          mp_size_t  tpos;
+          mp_ptr     tp;
+          TMP_DECL;
+
+          TMP_MARK;
+          tp = TMP_ALLOC_LIMBS (abs_xsize);
+          limb = 0;
+          bits = 0;
+          tpos = 0;
+          for (i = abs_csize-1; i >= 0; i--)
+            {
+              byte = (unsigned char) cp[i];
+              limb |= (byte << bits);
+              bits += 8;
+              if (bits >= GMP_NUMB_BITS)
+                {
+                  ASSERT (tpos < abs_xsize);
+                  tp[tpos++] = limb & GMP_NUMB_MASK;
+                  bits -= GMP_NUMB_BITS;
+                  ASSERT (bits < 8);
+                  limb = byte >> (8 - bits);
+                }
+            }
+          if (bits != 0)
+            {
+              ASSERT (tpos < abs_xsize);
+              tp[tpos++] = limb;
+            }
+          ASSERT (tpos == abs_xsize);
+
+          MPN_COPY (xp, tp, abs_xsize);
+          TMP_FREE;
+        }
+
+      /* GMP 1.x mpz_out_raw wrote high zero bytes, strip any high zero
+         limbs resulting from this.  Should be a non-zero value here, but
+         for safety don't assume that. */
+      MPN_NORMALIZE (xp, abs_xsize);
+    }
+
+  SIZ(x) = (csize >= 0 ? abs_xsize : -abs_xsize);
+  return abs_csize + 4;
+}
diff --git a/mpz/inp_str.c b/mpz/inp_str.c

new file mode 100644 (file)

index 0000000..05c8cde
--- /dev/null
+++ b/mpz/inp_str.c
@@ -0,0 +1,164 @@
+/* mpz_inp_str(dest_integer, stream, base) -- Input a number in base
+   BASE from stdio stream STREAM and store the result in DEST_INTEGER.
+
+   OF THE FUNCTIONS IN THIS FILE, ONLY mpz_inp_str IS FOR EXTERNAL USE, THE
+   REST ARE INTERNALS AND ARE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE
+   CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+extern const unsigned char __gmp_digit_value_tab[];
+#define digit_value_tab __gmp_digit_value_tab
+
+size_t
+mpz_inp_str (mpz_ptr x, FILE *stream, int base)
+{
+  int c;
+  size_t nread;
+
+  if (stream == 0)
+    stream = stdin;
+
+  nread = 0;
+
+  /* Skip whitespace.  */
+  do
+    {
+      c = getc (stream);
+      nread++;
+    }
+  while (isspace (c));
+
+  return mpz_inp_str_nowhite (x, stream, base, c, nread);
+}
+
+/* shared by mpq_inp_str */
+size_t
+mpz_inp_str_nowhite (mpz_ptr x, FILE *stream, int base, int c, size_t nread)
+{
+  char *str;
+  size_t alloc_size, str_size;
+  int negative;
+  mp_size_t xsize;
+  const unsigned char *digit_value;
+
+  ASSERT_ALWAYS (EOF == -1);   /* FIXME: handle this by adding explicit */
+                               /* comparisons of c and EOF before each  */
+                               /* read of digit_value[].  */
+
+  digit_value = digit_value_tab;
+  if (base > 36)
+    {
+      /* For bases > 36, use the collating sequence
+        0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
+      digit_value += 224;
+      if (base > 62)
+       return 0;               /* too large base */
+    }
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = getc (stream);
+      nread++;
+    }
+
+  if (c == EOF || digit_value[c] >= (base == 0 ? 10 : base))
+    return 0;                  /* error if no digits */
+
+  /* If BASE is 0, try to find out the base by looking at the initial
+     characters.  */
+  if (base == 0)
+    {
+      base = 10;
+      if (c == '0')
+       {
+         base = 8;
+         c = getc (stream);
+         nread++;
+         if (c == 'x' || c == 'X')
+           {
+             base = 16;
+             c = getc (stream);
+             nread++;
+           }
+         else if (c == 'b' || c == 'B')
+           {
+             base = 2;
+             c = getc (stream);
+             nread++;
+           }
+       }
+    }
+
+  /* Skip leading zeros.  */
+  while (c == '0')
+    {
+      c = getc (stream);
+      nread++;
+    }
+
+  alloc_size = 100;
+  str = (char *) (*__gmp_allocate_func) (alloc_size);
+  str_size = 0;
+
+  while (c != EOF)
+    {
+      int dig;
+      dig = digit_value[c];
+      if (dig >= base)
+       break;
+      if (str_size >= alloc_size)
+       {
+         size_t old_alloc_size = alloc_size;
+         alloc_size = alloc_size * 3 / 2;
+         str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
+       }
+      str[str_size++] = dig;
+      c = getc (stream);
+    }
+  nread += str_size;
+
+  ungetc (c, stream);
+  nread--;
+
+  /* Make sure the string is not empty, mpn_set_str would fail.  */
+  if (str_size == 0)
+    {
+      x->_mp_size = 0;
+    }
+  else
+    {
+      xsize = 2 + (mp_size_t)
+       (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+      MPZ_REALLOC (x, xsize);
+
+      /* Convert the byte array in base BASE to our bignum format.  */
+      xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base);
+      x->_mp_size = negative ? -xsize : xsize;
+    }
+  (*__gmp_free_func) (str, alloc_size);
+  return nread;
+}
diff --git a/mpz/invert.c b/mpz/invert.c

new file mode 100644 (file)

index 0000000..009a03c
--- /dev/null
+++ b/mpz/invert.c
@@ -0,0 +1,70 @@
+/* mpz_invert (inv, x, n).  Find multiplicative inverse of X in Z(N).
+   If X has an inverse, return non-zero and store inverse in INVERSE,
+   otherwise, return 0 and put garbage in INVERSE.
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)
+{
+  mpz_t gcd, tmp;
+  mp_size_t xsize, nsize, size;
+  TMP_DECL;
+
+  xsize = SIZ (x);
+  nsize = SIZ (n);
+  xsize = ABS (xsize);
+  nsize = ABS (nsize);
+  size = MAX (xsize, nsize) + 1;
+
+  /* No inverse exists if the leftside operand is 0.  Likewise, no
+     inverse exists if the mod operand is 1.  */
+  if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1))
+    return 0;
+
+  TMP_MARK;
+
+  MPZ_TMP_INIT (gcd, size);
+  MPZ_TMP_INIT (tmp, size);
+  mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);
+
+  /* If no inverse existed, return with an indication of that.  */
+  if (SIZ (gcd) != 1 || PTR(gcd)[0] != 1)
+    {
+      TMP_FREE;
+      return 0;
+    }
+
+  /* Make sure we return a positive inverse.  */
+  if (SIZ (tmp) < 0)
+    {
+      if (SIZ (n) < 0)
+       mpz_sub (inverse, tmp, n);
+      else
+       mpz_add (inverse, tmp, n);
+    }
+  else
+    mpz_set (inverse, tmp);
+
+  TMP_FREE;
+  return 1;
+}
diff --git a/mpz/ior.c b/mpz/ior.c

new file mode 100644 (file)

index 0000000..26362c9
--- /dev/null
+++ b/mpz/ior.c
@@ -0,0 +1,232 @@
+/* mpz_ior -- Logical inclusive or.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+{
+  mp_srcptr op1_ptr, op2_ptr;
+  mp_size_t op1_size, op2_size;
+  mp_ptr res_ptr;
+  mp_size_t res_size;
+  mp_size_t i;
+  TMP_DECL;
+
+  TMP_MARK;
+  op1_size = SIZ(op1);
+  op2_size = SIZ(op2);
+
+  op1_ptr = PTR(op1);
+  op2_ptr = PTR(op2);
+  res_ptr = PTR(res);
+
+  if (op1_size >= 0)
+    {
+      if (op2_size >= 0)
+       {
+         if (op1_size >= op2_size)
+           {
+             if (ALLOC(res) < op1_size)
+               {
+                 _mpz_realloc (res, op1_size);
+                 /* No overlapping possible: op1_ptr = PTR(op1); */
+                 op2_ptr = PTR(op2);
+                 res_ptr = PTR(res);
+               }
+
+             if (res_ptr != op1_ptr)
+               MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+                         op1_size - op2_size);
+             for (i = op2_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+             res_size = op1_size;
+           }
+         else
+           {
+             if (ALLOC(res) < op2_size)
+               {
+                 _mpz_realloc (res, op2_size);
+                 op1_ptr = PTR(op1);
+                 /* No overlapping possible: op2_ptr = PTR(op2); */
+                 res_ptr = PTR(res);
+               }
+
+             if (res_ptr != op2_ptr)
+               MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+                         op2_size - op1_size);
+             for (i = op1_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+             res_size = op2_size;
+           }
+
+         SIZ(res) = res_size;
+         return;
+       }
+      else /* op2_size < 0 */
+       {
+         /* Fall through to the code at the end of the function.  */
+       }
+    }
+  else
+    {
+      if (op2_size < 0)
+       {
+         mp_ptr opx;
+         mp_limb_t cy;
+
+         /* Both operands are negative, so will be the result.
+            -((-OP1) | (-OP2)) = -(~(OP1 - 1) | ~(OP2 - 1)) =
+            = ~(~(OP1 - 1) | ~(OP2 - 1)) + 1 =
+            = ((OP1 - 1) & (OP2 - 1)) + 1      */
+
+         op1_size = -op1_size;
+         op2_size = -op2_size;
+
+         res_size = MIN (op1_size, op2_size);
+
+         /* Possible optimization: Decrease mpn_sub precision,
+            as we won't use the entire res of both.  */
+         opx = TMP_ALLOC_LIMBS (res_size);
+         mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);
+         op1_ptr = opx;
+
+         opx = TMP_ALLOC_LIMBS (res_size);
+         mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1);
+         op2_ptr = opx;
+
+         if (ALLOC(res) < res_size)
+           {
+             _mpz_realloc (res, res_size);
+             /* op1_ptr and op2_ptr point to temporary space.  */
+             res_ptr = PTR(res);
+           }
+
+         /* First loop finds the size of the result.  */
+         for (i = res_size - 1; i >= 0; i--)
+           if ((op1_ptr[i] & op2_ptr[i]) != 0)
+             break;
+         res_size = i + 1;
+
+         if (res_size != 0)
+           {
+             /* Second loop computes the real result.  */
+             for (i = res_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+
+             cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+             if (cy)
+               {
+                 res_ptr[res_size] = cy;
+                 res_size++;
+               }
+           }
+         else
+           {
+             res_ptr[0] = 1;
+             res_size = 1;
+           }
+
+         SIZ(res) = -res_size;
+         TMP_FREE;
+         return;
+       }
+      else
+       {
+         /* We should compute -OP1 | OP2.  Swap OP1 and OP2 and fall
+            through to the code that handles OP1 | -OP2.  */
+          MPZ_SRCPTR_SWAP (op1, op2);
+          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+       }
+    }
+
+  {
+    mp_ptr opx;
+    mp_limb_t cy;
+    mp_size_t res_alloc;
+    mp_size_t count;
+
+    /* Operand 2 negative, so will be the result.
+       -(OP1 | (-OP2)) = -(OP1 | ~(OP2 - 1)) =
+       = ~(OP1 | ~(OP2 - 1)) + 1 =
+       = (~OP1 & (OP2 - 1)) + 1      */
+
+    op2_size = -op2_size;
+
+    res_alloc = op2_size;
+
+    opx = TMP_ALLOC_LIMBS (op2_size);
+    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+    op2_ptr = opx;
+    op2_size -= op2_ptr[op2_size - 1] == 0;
+
+    if (ALLOC(res) < res_alloc)
+      {
+       _mpz_realloc (res, res_alloc);
+       op1_ptr = PTR(op1);
+       /* op2_ptr points to temporary space.  */
+       res_ptr = PTR(res);
+      }
+
+    if (op1_size >= op2_size)
+      {
+       /* We can just ignore the part of OP1 that stretches above OP2,
+          because the result limbs are zero there.  */
+
+       /* First loop finds the size of the result.  */
+       for (i = op2_size - 1; i >= 0; i--)
+         if ((~op1_ptr[i] & op2_ptr[i]) != 0)
+           break;
+       res_size = i + 1;
+       count = res_size;
+      }
+    else
+      {
+       res_size = op2_size;
+
+       /* Copy the part of OP2 that stretches above OP1, to RES.  */
+       MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+       count = op1_size;
+      }
+
+    if (res_size != 0)
+      {
+       /* Second loop computes the real result.  */
+       for (i = count - 1; i >= 0; i--)
+         res_ptr[i] = ~op1_ptr[i] & op2_ptr[i];
+
+       cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+       if (cy)
+         {
+           res_ptr[res_size] = cy;
+           res_size++;
+         }
+      }
+    else
+      {
+       res_ptr[0] = 1;
+       res_size = 1;
+      }
+
+    SIZ(res) = -res_size;
+  }
+  TMP_FREE;
+}
diff --git a/mpz/iset.c b/mpz/iset.c

new file mode 100644 (file)

index 0000000..384ca79
--- /dev/null
+++ b/mpz/iset.c
@@ -0,0 +1,48 @@
+/* mpz_init_set (src_integer) -- Make a new multiple precision number with
+   a value copied from SRC_INTEGER.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_init_set (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp, up;
+  mp_size_t usize, size;
+
+  usize = u->_mp_size;
+  size = ABS (usize);
+
+  w->_mp_alloc = MAX (size, 1);
+  w->_mp_d = (mp_ptr) (*__gmp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB);
+
+  wp = w->_mp_d;
+  up = u->_mp_d;
+
+  MPN_COPY (wp, up, size);
+  w->_mp_size = usize;
+
+#ifdef __CHECKER__
+  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
+  if (size == 0)
+    wp[0] = 0;
+#endif
+}
diff --git a/mpz/iset_d.c b/mpz/iset_d.c

new file mode 100644 (file)

index 0000000..004b087
--- /dev/null
+++ b/mpz/iset_d.c
@@ -0,0 +1,31 @@
+/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double
+   value VAL.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_init_set_d (mpz_ptr dest, double val)
+{
+  dest->_mp_alloc = 1;
+  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  dest->_mp_size = 0;
+  mpz_set_d (dest, val);
+}
diff --git a/mpz/iset_si.c b/mpz/iset_si.c

new file mode 100644 (file)

index 0000000..64e51b3
--- /dev/null
+++ b/mpz/iset_si.c
@@ -0,0 +1,49 @@
+/* mpz_init_set_si(dest,val) -- Make a new multiple precision in DEST and
+   assign VAL to the new number.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_init_set_si (mpz_ptr dest, signed long int val)
+{
+  mp_size_t size;
+  mp_limb_t vl;
+
+  dest->_mp_alloc = 1;
+  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if GMP_NAIL_BITS != 0
+  if (vl > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      dest->_mp_d[1] = vl >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  dest->_mp_size = val >= 0 ? size : -size;
+}
diff --git a/mpz/iset_str.c b/mpz/iset_str.c

new file mode 100644 (file)

index 0000000..302126f
--- /dev/null
+++ b/mpz/iset_str.c
@@ -0,0 +1,44 @@
+/* mpz_init_set_str(string, base) -- Convert the \0-terminated string
+   STRING in base BASE to a multiple precision integer.  Return a MP_INT
+   structure representing the integer.  Allow white space in the
+   string.  If BASE == 0 determine the base in the C standard way,
+   i.e.  0xhh...h means base 16, 0oo...o means base 8, otherwise
+   assume base 10.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpz_init_set_str (mpz_ptr x, const char *str, int base)
+{
+  x->_mp_alloc = 1;
+  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+  /* if str has no digits mpz_set_str leaves x->_mp_size unset */
+  x->_mp_size = 0;
+
+#ifdef __CHECKER__
+  /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
+  x->_mp_d[0] = 0;
+#endif
+
+  return mpz_set_str (x, str, base);
+}
diff --git a/mpz/iset_ui.c b/mpz/iset_ui.c

new file mode 100644 (file)

index 0000000..841d91f
--- /dev/null
+++ b/mpz/iset_ui.c
@@ -0,0 +1,46 @@
+/* mpz_init_set_ui(dest,val) -- Make a new multiple precision in DEST and
+   assign VAL to the new number.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_init_set_ui (mpz_ptr dest, unsigned long int val)
+{
+  mp_size_t size;
+
+  dest->_mp_alloc = 1;
+  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+  dest->_mp_d[0] = val & GMP_NUMB_MASK;
+  size = val != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (val > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      dest->_mp_d[1] = val >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  dest->_mp_size = size;
+}
diff --git a/mpz/jacobi.c b/mpz/jacobi.c

new file mode 100644 (file)

index 0000000..cab11f5
--- /dev/null
+++ b/mpz/jacobi.c
@@ -0,0 +1,309 @@
+/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
+
+Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+#define MPN_RSHIFT_OR_COPY(dst,src,size,shift)                  \
+  do {                                                          \
+    if ((shift) != 0)                                           \
+      {                                                         \
+        ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift));    \
+        (size) -= ((dst)[(size)-1] == 0);                       \
+      }                                                         \
+    else                                                        \
+      MPN_COPY (dst, src, size);                                \
+  } while (0)
+
+
+/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
+
+   mpz_jacobi could assume b is odd, but the improvements from that seem
+   small compared to other operations, and anything significant should be
+   checked at run-time since we'd like odd b to go fast in mpz_kronecker
+   too.
+
+   mpz_legendre could assume b is an odd prime, but knowing this doesn't
+   present any obvious benefits.  Result 0 wouldn't arise (unless "a" is a
+   multiple of b), but the checking for that takes little time compared to
+   other operations.
+
+   The main loop is just a simple binary GCD with the jacobi symbol result
+   tracked during the reduction.
+
+   The special cases for a or b fitting in one limb let mod_1 or modexact_1
+   get used, without any copying, and end up just as efficient as the mixed
+   precision mpz_kronecker_ui etc.
+
+   When tdiv_qr is called it's not necessary to make "a" odd or make a
+   working copy of it, but tdiv_qr is going to be pretty slow so it's not
+   worth bothering trying to save anything for that case.
+
+   Enhancements:
+
+   mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
+
+   Some sort of multi-step algorithm should be used.  The current subtract
+   and shift for every bit is very inefficient.  Lehmer (per current gcdext)
+   would need some low bits included in its calculation to apply the sign
+   change for reciprocity.  Binary Lehmer keeps low bits to strip twos
+   anyway, so might be better suited.  Maybe the accelerated GCD style k-ary
+   reduction would work, if sign changes due to the extra factors it
+   introduces can be accounted for (or maybe they can be ignored).  */
+
+
+int
+mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
+{
+  mp_srcptr  asrcp, bsrcp;
+  mp_size_t  asize, bsize;
+  mp_ptr     ap, bp;
+  mp_limb_t  alow, blow, ahigh, bhigh, asecond, bsecond;
+  unsigned   atwos, btwos;
+  int        result_bit1;
+  TMP_DECL;
+
+  TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
+         mpz_trace (" a", a);
+         mpz_trace (" b", b));
+
+  asize = SIZ(a);
+  asrcp = PTR(a);
+  alow = asrcp[0];
+
+  bsize = SIZ(b);
+  if (bsize == 0)
+    return JACOBI_LS0 (alow, asize);  /* (a/0) */
+
+  bsrcp = PTR(b);
+  blow = bsrcp[0];
+
+  if (asize == 0)
+    return JACOBI_0LS (blow, bsize);  /* (0/b) */
+
+  /* (even/even)=0 */
+  if (((alow | blow) & 1) == 0)
+    return 0;
+
+  /* account for effect of sign of b, then ignore it */
+  result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
+  bsize = ABS (bsize);
+
+  /* low zero limbs on b can be discarded */
+  JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
+
+  count_trailing_zeros (btwos, blow);
+  TRACE (printf ("b twos %u\n", btwos));
+
+  /* establish shifted blow */
+  blow >>= btwos;
+  if (bsize > 1)
+    {
+      bsecond = bsrcp[1];
+      if (btwos != 0)
+        blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+    }
+
+  /* account for effect of sign of a, then ignore it */
+  result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
+  asize = ABS (asize);
+
+  if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
+    {
+      /* special case one limb b, use modexact and no copying */
+
+      /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
+
+      if (blow == 1)   /* (a/1)=1 always */
+        return JACOBI_BIT1_TO_PN (result_bit1);
+
+      JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
+      TRACE (printf ("base (%lu/%lu) with %d\n",
+                     alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
+      return mpn_jacobi_base (alow, blow, result_bit1);
+    }
+
+  /* Discard low zero limbs of a.  Usually there won't be anything to
+     strip, hence not bothering with it for the bsize==1 case.  */
+  JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
+
+  count_trailing_zeros (atwos, alow);
+  TRACE (printf ("a twos %u\n", atwos));
+  result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
+
+  /* establish shifted alow */
+  alow >>= atwos;
+  if (asize > 1)
+    {
+      asecond = asrcp[1];
+      if (atwos != 0)
+        alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
+    }
+
+  /* (a/2)=(2/a) with a odd */
+  result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
+
+  if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
+    {
+      /* another special case with modexact and no copying */
+
+      if (alow == 1)  /* (1/b)=1 always */
+        return JACOBI_BIT1_TO_PN (result_bit1);
+
+      /* b still has its twos, so cancel out their effect */
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
+
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);  /* now (b/a) */
+      JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
+      TRACE (printf ("base (%lu/%lu) with %d\n",
+                     blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
+      return mpn_jacobi_base (blow, alow, result_bit1);
+    }
+
+
+  TMP_MARK;
+  TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
+
+  MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
+  ASSERT (alow == ap[0]);
+  TRACE (mpn_trace ("stripped a", ap, asize));
+
+  MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
+  ASSERT (blow == bp[0]);
+  TRACE (mpn_trace ("stripped b", bp, bsize));
+
+  /* swap if necessary to make a longer than b */
+  if (asize < bsize)
+    {
+      TRACE (printf ("swap\n"));
+      MPN_PTR_SWAP (ap,asize, bp,bsize);
+      MP_LIMB_T_SWAP (alow, blow);
+      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
+    }
+
+  /* If a is bigger than b then reduce to a mod b.
+     Division is much faster than chipping away at "a" bit-by-bit. */
+  if (asize > bsize)
+    {
+      mp_ptr  rp, qp;
+
+      TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
+
+      TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize);
+      ap = rp;
+      asize = bsize;
+      MPN_NORMALIZE (ap, asize);
+
+      TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
+             mpn_trace (" a", ap, asize);
+             mpn_trace (" b", bp, bsize));
+
+      if (asize == 0)  /* (0/b)=0 for b!=1 */
+        goto zero;
+
+      alow = ap[0];
+      goto strip_a;
+    }
+
+  for (;;)
+    {
+      ASSERT (asize >= 1);         /* a,b non-empty */
+      ASSERT (bsize >= 1);
+      ASSERT (ap[asize-1] != 0);   /* a,b normalized (and hence non-zero) */
+      ASSERT (bp[bsize-1] != 0);
+      ASSERT (alow == ap[0]);      /* low limb copies should be correct */
+      ASSERT (blow == bp[0]);
+      ASSERT (alow & 1);           /* a,b odd */
+      ASSERT (blow & 1);
+
+      TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
+             mpn_trace (" a", ap, asize);
+             mpn_trace (" b", bp, bsize));
+
+      /* swap if necessary to make a>=b, applying reciprocity
+         high limbs are almost always enough to tell which is bigger */
+      if (asize < bsize
+          || (asize == bsize
+              && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
+                  || (ahigh == bhigh
+                      && mpn_cmp (ap, bp, asize-1) < 0))))
+        {
+          TRACE (printf ("swap\n"));
+          MPN_PTR_SWAP (ap,asize, bp,bsize);
+          MP_LIMB_T_SWAP (alow, blow);
+          result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
+        }
+
+      if (asize == 1)
+        break;
+
+      /* a = a-b */
+      ASSERT (asize >= bsize);
+      ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
+      MPN_NORMALIZE (ap, asize);
+      alow = ap[0];
+
+      /* (0/b)=0 for b!=1.  b!=1 when a==0 because otherwise would have had
+         a==1 which is asize==1 and would have exited above.  */
+      if (asize == 0)
+        goto zero;
+
+    strip_a:
+      /* low zero limbs on a can be discarded */
+      JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow);
+
+      if ((alow & 1) == 0)
+        {
+          /* factors of 2 from a */
+          unsigned  twos;
+          count_trailing_zeros (twos, alow);
+          TRACE (printf ("twos %u\n", twos));
+          result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
+          ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
+          asize -= (ap[asize-1] == 0);
+          alow = ap[0];
+        }
+    }
+
+  ASSERT (asize == 1 && bsize == 1);  /* just alow and blow left */
+  TMP_FREE;
+
+  /* (1/b)=1 always (in this case have b==1 because a>=b) */
+  if (alow == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);
+
+  /* swap with reciprocity and do (b/a) */
+  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
+  TRACE (printf ("base (%lu/%lu) with %d\n",
+                 blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
+  return mpn_jacobi_base (blow, alow, result_bit1);
+
+ zero:
+  TMP_FREE;
+  return 0;
+}
diff --git a/mpz/kronsz.c b/mpz/kronsz.c

new file mode 100644 (file)

index 0000000..50bf7f6
--- /dev/null
+++ b/mpz/kronsz.c
@@ -0,0 +1,127 @@
+/* mpz_si_kronecker -- long+mpz Kronecker/Jacobi symbol.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_si_kronecker (long a, mpz_srcptr b)
+{
+  mp_srcptr  b_ptr;
+  mp_limb_t  b_low;
+  mp_size_t  b_size;
+  mp_size_t  b_abs_size;
+  mp_limb_t  a_limb, b_rem;
+  unsigned   twos;
+  int        result_bit1;
+
+#if GMP_NUMB_BITS < BITS_PER_ULONG
+  if (a > GMP_NUMB_MAX || a < -GMP_NUMB_MAX)
+    {
+      mp_limb_t  alimbs[2];
+      mpz_t      az;
+      ALLOC(az) = numberof (alimbs);
+      PTR(az) = alimbs;
+      mpz_set_si (az, a);
+      return mpz_kronecker (az, b);
+    }
+#endif
+
+  b_size = SIZ (b);
+  if (b_size == 0)
+    return JACOBI_S0 (a);  /* (a/0) */
+
+  /* account for the effect of the sign of b, then ignore it */
+  result_bit1 = JACOBI_BSGN_SS_BIT1 (a, b_size);
+
+  b_ptr = PTR(b);
+  b_low = b_ptr[0];
+  b_abs_size = ABS (b_size);
+
+  if ((b_low & 1) != 0)
+    {
+      /* b odd */
+
+      result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
+      a_limb = (unsigned long) ABS(a);
+
+      if ((a_limb & 1) == 0)
+        {
+          /* (0/b)=1 for b=+/-1, 0 otherwise */
+          if (a_limb == 0)
+            return (b_abs_size == 1 && b_low == 1);
+
+          /* a even, b odd */
+          count_trailing_zeros (twos, a_limb);
+          a_limb >>= twos;
+          /* (a*2^n/b) = (a/b) * twos(n,a) */
+          result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
+        }
+    }
+  else
+    {
+      /* (even/even)=0, and (0/b)=0 for b!=+/-1 */
+      if ((a & 1) == 0)
+        return 0;
+
+      /* a odd, b even
+
+         Establish shifted b_low with valid bit1 for ASGN and RECIP below.
+         Zero limbs stripped are accounted for, but zero bits on b_low are
+         not because they remain in {b_ptr,b_abs_size} for the
+         JACOBI_MOD_OR_MODEXACT_1_ODD. */
+
+      JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
+      if ((b_low & 1) == 0)
+        {
+          if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+            {
+              /* need b_ptr[1] to get bit1 in b_low */
+              if (b_abs_size == 1)
+                {
+                  /* (a/0x80000000) = (a/2)^(BPML-1) */
+                  if ((GMP_NUMB_BITS % 2) == 0)
+                    result_bit1 ^= JACOBI_TWO_U_BIT1 (a);
+                  return JACOBI_BIT1_TO_PN (result_bit1);
+                }
+
+              /* b_abs_size > 1 */
+              b_low = b_ptr[1] << 1;
+            }
+          else
+            {
+              count_trailing_zeros (twos, b_low);
+              b_low >>= twos;
+            }
+        }
+
+      result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
+      a_limb = (unsigned long) ABS(a);
+    }
+
+  if (a_limb == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (1/b)=1 */
+
+  /* (a/b*2^n) = (b*2^n mod a / a) * recip(a,b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, b_ptr, b_abs_size, a_limb);
+  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a_limb, b_low);
+  return mpn_jacobi_base (b_rem, a_limb, result_bit1);
+}
diff --git a/mpz/kronuz.c b/mpz/kronuz.c

new file mode 100644 (file)

index 0000000..82a9962
--- /dev/null
+++ b/mpz/kronuz.c
@@ -0,0 +1,119 @@
+/* mpz_ui_kronecker -- ulong+mpz Kronecker/Jacobi symbol.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_ui_kronecker (unsigned long a, mpz_srcptr b)
+{
+  mp_srcptr  b_ptr;
+  mp_limb_t  b_low;
+  int        b_abs_size;
+  mp_limb_t  b_rem;
+  int        twos;
+  int        result_bit1;
+
+  /* (a/-1)=1 when a>=0, so the sign of b is ignored */
+  b_abs_size = ABSIZ (b);
+
+  if (b_abs_size == 0)
+    return JACOBI_U0 (a);  /* (a/0) */
+
+  if (a > GMP_NUMB_MAX)
+    {
+      mp_limb_t  alimbs[2];
+      mpz_t      az;
+      ALLOC(az) = numberof (alimbs);
+      PTR(az) = alimbs;
+      mpz_set_ui (az, a);
+      return mpz_kronecker (az, b);
+    }
+
+  b_ptr = PTR(b);
+  b_low = b_ptr[0];
+  result_bit1 = 0;
+
+  if (! (b_low & 1))
+    {
+      /* (0/b)=0 for b!=+/-1; and (even/even)=0 */
+      if (! (a & 1))
+        return 0;
+
+      /* a odd, b even
+
+         Establish shifted b_low with valid bit1 for the RECIP below.  Zero
+         limbs stripped are accounted for, but zero bits on b_low are not
+         because they remain in {b_ptr,b_abs_size} for
+         JACOBI_MOD_OR_MODEXACT_1_ODD. */
+
+      JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
+      if (! (b_low & 1))
+        {
+          if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+            {
+              /* need b_ptr[1] to get bit1 in b_low */
+              if (b_abs_size == 1)
+                {
+                  /* (a/0x80...00) == (a/2)^(NUMB-1) */
+                  if ((GMP_NUMB_BITS % 2) == 0)
+                    {
+                      /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1
+                         when GMP_NUMB_BITS is even, so it's still 0. */
+                      ASSERT (result_bit1 == 0);
+                      result_bit1 = JACOBI_TWO_U_BIT1 (a);
+                    }
+                  return JACOBI_BIT1_TO_PN (result_bit1);
+                }
+
+              /* b_abs_size > 1 */
+              b_low = b_ptr[1] << 1;
+            }
+          else
+            {
+              count_trailing_zeros (twos, b_low);
+              b_low >>= twos;
+            }
+        }
+    }
+  else
+    {
+      if (a == 0)        /* (0/b)=1 for b=+/-1, 0 otherwise */
+        return (b_abs_size == 1 && b_low == 1);
+
+      if (! (a & 1))
+        {
+          /* a even, b odd */
+          count_trailing_zeros (twos, a);
+          a >>= twos;
+          /* (a*2^n/b) = (a/b) * (2/a)^n */
+          result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);
+        }
+    }
+
+  if (a == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (1/b)=1 */
+
+  /* (a/b*2^n) = (b*2^n mod a / a) * RECIP(a,b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, b_rem, b_ptr, b_abs_size, a);
+  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b_low);
+  return mpn_jacobi_base (b_rem, (mp_limb_t) a, result_bit1);
+}
diff --git a/mpz/kronzs.c b/mpz/kronzs.c

new file mode 100644 (file)

index 0000000..045e556
--- /dev/null
+++ b/mpz/kronzs.c
@@ -0,0 +1,82 @@
+/* mpz_kronecker_si -- mpz+long Kronecker/Jacobi symbol.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* After the absolute value of b is established it's treated as an unsigned
+   long, because 0x80..00 doesn't fit in a signed long. */
+
+int
+mpz_kronecker_si (mpz_srcptr a, long b)
+{
+  mp_srcptr  a_ptr;
+  mp_size_t  a_size;
+  mp_limb_t  a_rem, b_limb;
+  int        result_bit1;
+
+  a_size = SIZ(a);
+  if (a_size == 0)
+    return JACOBI_0S (b);
+
+#if GMP_NUMB_BITS < BITS_PER_ULONG
+  if (b > GMP_NUMB_MAX || b < -GMP_NUMB_MAX)
+    {
+      mp_limb_t  blimbs[2];
+      mpz_t      bz;
+      ALLOC(bz) = numberof (blimbs);
+      PTR(bz) = blimbs;
+      mpz_set_si (bz, b);
+      return mpz_kronecker (a, bz);
+    }
+#endif
+
+  result_bit1 = JACOBI_BSGN_SS_BIT1 (a_size, b);
+  b_limb = (unsigned long) ABS (b);
+  a_ptr = PTR(a);
+
+  if ((b_limb & 1) == 0)
+    {
+      mp_limb_t  a_low = a_ptr[0];
+      int        twos;
+
+      if (b_limb == 0)
+        return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
+
+      if (! (a_low & 1))
+        return 0;  /* (even/even)=0 */
+
+      /* (a/2)=(2/a) for a odd */
+      count_trailing_zeros (twos, b_limb);
+      b_limb >>= twos;
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, a_low);
+    }
+
+  if (b_limb == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (a/1)=1 for any a */
+
+  result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a_size, b_limb);
+  a_size = ABS(a_size);
+
+  /* (a/b) = (a mod b / b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, a_rem, a_ptr, a_size, b_limb);
+  return mpn_jacobi_base (a_rem, b_limb, result_bit1);
+}
diff --git a/mpz/kronzu.c b/mpz/kronzu.c

new file mode 100644 (file)

index 0000000..e73a0f8
--- /dev/null
+++ b/mpz/kronzu.c
@@ -0,0 +1,78 @@
+/* mpz_kronecker_ui -- mpz+ulong Kronecker/Jacobi symbol.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+mpz_kronecker_ui (mpz_srcptr a, unsigned long b)
+{
+  mp_srcptr  a_ptr;
+  mp_size_t  a_size;
+  mp_limb_t  a_rem;
+  int        result_bit1;
+
+  a_size = SIZ(a);
+  if (a_size == 0)
+    return JACOBI_0U (b);
+
+  if (b > GMP_NUMB_MAX)
+    {
+      mp_limb_t  blimbs[2];
+      mpz_t      bz;
+      ALLOC(bz) = numberof (blimbs);
+      PTR(bz) = blimbs;
+      mpz_set_ui (bz, b);
+      return mpz_kronecker (a, bz);
+    }
+
+  a_ptr = PTR(a);
+  if ((b & 1) != 0)
+    {
+      result_bit1 = JACOBI_ASGN_SU_BIT1 (a_size, b);
+    }
+  else
+    {
+      mp_limb_t  a_low = a_ptr[0];
+      int        twos;
+
+      if (b == 0)
+        return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
+
+      if (! (a_low & 1))
+        return 0;  /* (even/even)=0 */
+
+      /* (a/2)=(2/a) for a odd */
+      count_trailing_zeros (twos, b);
+      b >>= twos;
+      result_bit1 = (JACOBI_TWOS_U_BIT1 (twos, a_low)
+                     ^ JACOBI_ASGN_SU_BIT1 (a_size, b));
+    }
+
+  if (b == 1)
+    return JACOBI_BIT1_TO_PN (result_bit1);  /* (a/1)=1 for any a */
+
+  a_size = ABS(a_size);
+
+  /* (a/b) = (a mod b / b) */
+  JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, a_rem, a_ptr, a_size, b);
+  return mpn_jacobi_base (a_rem, (mp_limb_t) b, result_bit1);
+}
diff --git a/mpz/lcm.c b/mpz/lcm.c

new file mode 100644 (file)

index 0000000..22ac041
--- /dev/null
+++ b/mpz/lcm.c
@@ -0,0 +1,82 @@
+/* mpz_lcm -- mpz/mpz least common multiple.
+
+Copyright 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
+{
+  mpz_t g;
+  mp_size_t usize, vsize, size;
+  TMP_DECL;
+
+  usize = SIZ (u);
+  vsize = SIZ (v);
+  if (usize == 0 || vsize == 0)
+    {
+      SIZ (r) = 0;
+      return;
+    }
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  if (vsize == 1)
+    {
+      mp_limb_t  vl, gl, c;
+      mp_srcptr  up;
+      mp_ptr     rp;
+
+    one:
+      MPZ_REALLOC (r, usize+1);
+
+      up = PTR(u);
+      vl = PTR(v)[0];
+      gl = mpn_gcd_1 (up, usize, vl);
+      vl /= gl;
+
+      rp = PTR(r);
+      c = mpn_mul_1 (rp, up, usize, vl);
+      rp[usize] = c;
+      usize += (c != 0);
+      SIZ(r) = usize;
+      return;
+    }
+
+  if (usize == 1)
+    {
+      usize = vsize;
+      MPZ_SRCPTR_SWAP (u, v);
+      goto one;
+    }
+
+  TMP_MARK;
+  size = MAX (usize, vsize);
+  MPZ_TMP_INIT (g, size);
+
+  mpz_gcd (g, u, v);
+  mpz_divexact (g, u, g);
+  mpz_mul (r, g, v);
+
+  SIZ (r) = ABS (SIZ (r));     /* result always positive */
+
+  TMP_FREE;
+}
diff --git a/mpz/lcm_ui.c b/mpz/lcm_ui.c

new file mode 100644 (file)

index 0000000..2249aac
--- /dev/null
+++ b/mpz/lcm_ui.c
@@ -0,0 +1,68 @@
+/* mpz_lcm_ui -- least common multiple of mpz and ulong.
+
+Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+void
+mpz_lcm_ui (mpz_ptr r, mpz_srcptr u, unsigned long v)
+{
+  mp_size_t      usize;
+  mp_srcptr      up;
+  mp_ptr         rp;
+  unsigned long  g;
+  mp_limb_t      c;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (v > GMP_NUMB_MAX)
+    {
+      mpz_t vz;
+      mp_limb_t vlimbs[2];
+      vlimbs[0] = v & GMP_NUMB_MASK;
+      vlimbs[1] = v >> GMP_NUMB_BITS;
+      PTR(vz) = vlimbs;
+      SIZ(vz) = 2;
+      mpz_lcm (r, u, vz);
+      return;
+    }
+#endif
+
+  /* result zero if either operand zero */
+  usize = SIZ(u);
+  if (usize == 0 || v == 0)
+    {
+      SIZ(r) = 0;
+      return;
+    }
+  usize = ABS(usize);
+
+  MPZ_REALLOC (r, usize+1);
+
+  up = PTR(u);
+  g = (unsigned long) mpn_gcd_1 (up, usize, (mp_limb_t) v);
+  v /= g;
+
+  rp = PTR(r);
+  c = mpn_mul_1 (rp, up, usize, (mp_limb_t) v);
+  rp[usize] = c;
+  usize += (c != 0);
+  SIZ(r) = usize;
+}
diff --git a/mpz/lucnum2_ui.c b/mpz/lucnum2_ui.c

new file mode 100644 (file)

index 0000000..a6b6cfb
--- /dev/null
+++ b/mpz/lucnum2_ui.c
@@ -0,0 +1,81 @@
+/* mpz_lucnum2_ui -- calculate Lucas numbers.
+
+Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
+{
+  mp_ptr     lp, l1p, f1p;
+  mp_size_t  size;
+  mp_limb_t  c;
+  TMP_DECL;
+
+  ASSERT (ln != lnsub1);
+
+  /* handle small n quickly, and hide the special case for L[-1]=-1 */
+  if (n <= FIB_TABLE_LUCNUM_LIMIT)
+    {
+      mp_limb_t  f  = FIB_TABLE (n);
+      mp_limb_t  f1 = FIB_TABLE ((int) n - 1);
+
+      /* L[n] = F[n] + 2F[n-1] */
+      PTR(ln)[0] = f + 2*f1;
+      SIZ(ln) = 1;
+
+      /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */
+      PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1);
+      SIZ(lnsub1) = (n == 0 ? -1 : 1);
+
+      return;
+    }
+
+  TMP_MARK;
+  size = MPN_FIB2_SIZE (n);
+  f1p = TMP_ALLOC_LIMBS (size);
+
+  MPZ_REALLOC (ln,     size+1);
+  MPZ_REALLOC (lnsub1, size+1);
+  lp  = PTR(ln);
+  l1p = PTR(lnsub1);
+
+  size = mpn_fib2_ui (l1p, f1p, n);
+
+  /* L[n] = F[n] + 2F[n-1] */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  c = mpn_addlsh1_n (lp, l1p, f1p, size);
+#else
+  c = mpn_lshift (lp, f1p, size, 1);
+  c += mpn_add_n (lp, lp, l1p, size);
+#endif
+  lp[size] = c;
+  SIZ(ln) = size + (c != 0);
+
+  /* L[n-1] = 2F[n] - F[n-1] */
+  c = mpn_lshift (l1p, l1p, size, 1);
+  c -= mpn_sub_n (l1p, l1p, f1p, size);
+  ASSERT ((mp_limb_signed_t) c >= 0);
+  l1p[size] = c;
+  SIZ(lnsub1) = size + (c != 0);
+
+  TMP_FREE;
+}
diff --git a/mpz/lucnum_ui.c b/mpz/lucnum_ui.c

new file mode 100644 (file)

index 0000000..1fb8ec8
--- /dev/null
+++ b/mpz/lucnum_ui.c
@@ -0,0 +1,198 @@
+/* mpz_lucnum_ui -- calculate Lucas number.
+
+Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Notes:
+
+   For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
+   there can't be an overflow applying +4 to just the low limb (since that
+   would leave 0, 1, 2 or 3 mod 8).
+
+   For the -4 in L[2k+1] when k is even, it seems (no proof) that
+   L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
+   L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
+   low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
+   conceivable to calculate it, so it probably should be handled.
+
+   For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
+   2^b, so for instance in 32-bits L[0x80000000] has a low limb of
+   0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
+   obviously huge, but probably should be made to work.  */
+
+void
+mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
+{
+  mp_size_t  lalloc, xalloc, lsize, xsize;
+  mp_ptr     lp, xp;
+  mp_limb_t  c;
+  int        zeros;
+  TMP_DECL;
+
+  TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
+
+  if (n <= FIB_TABLE_LUCNUM_LIMIT)
+    {
+      /* L[n] = F[n] + 2F[n-1] */
+      PTR(ln)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
+      SIZ(ln) = 1;
+      return;
+    }
+
+  /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
+     since square or mul used below might need an extra limb over the true
+     size */
+  lalloc = MPN_FIB2_SIZE (n) + 2;
+  MPZ_REALLOC (ln, lalloc);
+  lp = PTR (ln);
+
+  TMP_MARK;
+  xalloc = lalloc;
+  xp = TMP_ALLOC_LIMBS (xalloc);
+
+  /* Strip trailing zeros from n, until either an odd number is reached
+     where the L[2k+1] formula can be used, or until n fits within the
+     FIB_TABLE data.  The table is preferred of course.  */
+  zeros = 0;
+  for (;;)
+    {
+      if (n & 1)
+        {
+          /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
+
+          mp_size_t  yalloc, ysize;
+          mp_ptr     yp;
+
+          TRACE (printf ("  initial odd n=%lu\n", n));
+
+          yalloc = MPN_FIB2_SIZE (n/2);
+          yp = TMP_ALLOC_LIMBS (yalloc);
+          ASSERT (xalloc >= yalloc);
+
+          xsize = mpn_fib2_ui (xp, yp, n/2);
+
+          /* possible high zero on F[k-1] */
+          ysize = xsize;
+          ysize -= (yp[ysize-1] == 0);
+          ASSERT (yp[ysize-1] != 0);
+
+          /* xp = 2*F[k] + F[k-1] */
+#if HAVE_NATIVE_mpn_addlsh1_n
+          c = mpn_addlsh1_n (xp, yp, xp, xsize);
+#else
+          c = mpn_lshift (xp, xp, xsize, 1);
+          c += mpn_add_n (xp, xp, yp, xsize);
+#endif
+          ASSERT (xalloc >= xsize+1);
+          xp[xsize] = c;
+          xsize += (c != 0);
+          ASSERT (xp[xsize-1] != 0);
+
+          ASSERT (lalloc >= xsize + ysize);
+          c = mpn_mul (lp, xp, xsize, yp, ysize);
+          lsize = xsize + ysize;
+          lsize -= (c == 0);
+
+          /* lp = 5*lp */
+#if HAVE_NATIVE_mpn_addlshift
+          c = mpn_addlshift (lp, lp, lsize, 2);
+#else
+          c = mpn_lshift (xp, lp, lsize, 2);
+          c += mpn_add_n (lp, lp, xp, lsize);
+#endif
+          ASSERT (lalloc >= lsize+1);
+          lp[lsize] = c;
+          lsize += (c != 0);
+
+          /* lp = lp - 4*(-1)^k */
+          if (n & 2)
+            {
+              /* no overflow, see comments above */
+              ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
+              lp[0] += 4;
+            }
+          else
+            {
+              /* won't go negative */
+              MPN_DECR_U (lp, lsize, CNST_LIMB(4));
+            }
+
+          TRACE (mpn_trace ("  l",lp, lsize));
+          break;
+        }
+
+      MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
+      zeros++;
+      n /= 2;
+
+      if (n <= FIB_TABLE_LUCNUM_LIMIT)
+        {
+          /* L[n] = F[n] + 2F[n-1] */
+          lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
+          lsize = 1;
+
+          TRACE (printf ("  initial small n=%lu\n", n);
+                 mpn_trace ("  l",lp, lsize));
+          break;
+        }
+    }
+
+  for ( ; zeros != 0; zeros--)
+    {
+      /* L[2k] = L[k]^2 + 2*(-1)^k */
+
+      TRACE (printf ("  zeros=%d\n", zeros));
+
+      ASSERT (xalloc >= 2*lsize);
+      mpn_sqr (xp, lp, lsize);
+      lsize *= 2;
+      lsize -= (xp[lsize-1] == 0);
+
+      /* First time around the loop k==n determines (-1)^k, after that k is
+         always even and we set n=0 to indicate that.  */
+      if (n & 1)
+        {
+          /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
+          ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
+          xp[0] += 2;
+          n = 0;
+        }
+      else
+        {
+          /* won't go negative */
+          MPN_DECR_U (xp, lsize, CNST_LIMB(2));
+        }
+
+      MP_PTR_SWAP (xp, lp);
+      ASSERT (lp[lsize-1] != 0);
+    }
+
+  /* should end up in the right spot after all the xp/lp swaps */
+  ASSERT (lp == PTR(ln));
+  SIZ(ln) = lsize;
+
+  TMP_FREE;
+}
diff --git a/mpz/millerrabin.c b/mpz/millerrabin.c

new file mode 100644 (file)

index 0000000..f717278
--- /dev/null
+++ b/mpz/millerrabin.c
@@ -0,0 +1,112 @@
+/* mpz_millerrabin(n,reps) -- An implementation of the probabilistic primality
+   test found in Knuth's Seminumerical Algorithms book.  If the function
+   mpz_millerrabin() returns 0 then n is not prime.  If it returns 1, then n is
+   'probably' prime.  The probability of a false positive is (1/4)**reps, where
+   reps is the number of internal passes of the probabilistic algorithm.  Knuth
+   indicates that 25 passes are reasonable.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free
+Software Foundation, Inc.  Contributed by John Amanatides.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static int millerrabin __GMP_PROTO ((mpz_srcptr, mpz_srcptr,
+                                    mpz_ptr, mpz_ptr,
+                                    mpz_srcptr, unsigned long int));
+
+int
+mpz_millerrabin (mpz_srcptr n, int reps)
+{
+  int r;
+  mpz_t nm1, nm3, x, y, q;
+  unsigned long int k;
+  gmp_randstate_t rstate;
+  int is_prime;
+  TMP_DECL;
+  TMP_MARK;
+
+  MPZ_TMP_INIT (nm1, SIZ (n) + 1);
+  mpz_sub_ui (nm1, n, 1L);
+
+  MPZ_TMP_INIT (x, SIZ (n) + 1);
+  MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */
+
+  /* Perform a Fermat test.  */
+  mpz_set_ui (x, 210L);
+  mpz_powm (y, x, nm1, n);
+  if (mpz_cmp_ui (y, 1L) != 0)
+    {
+      TMP_FREE;
+      return 0;
+    }
+
+  MPZ_TMP_INIT (q, SIZ (n));
+
+  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
+  k = mpz_scan1 (nm1, 0L);
+  mpz_tdiv_q_2exp (q, nm1, k);
+
+  /* n-3 */
+  MPZ_TMP_INIT (nm3, SIZ (n) + 1);
+  mpz_sub_ui (nm3, n, 3L);
+  ASSERT (mpz_cmp_ui (nm3, 1L) >= 0);
+
+  gmp_randinit_default (rstate);
+
+  is_prime = 1;
+  for (r = 0; r < reps && is_prime; r++)
+    {
+      /* 2 to n-2 inclusive, don't want 1, 0 or -1 */
+      mpz_urandomm (x, rstate, nm3);
+      mpz_add_ui (x, x, 2L);
+
+      is_prime = millerrabin (n, nm1, x, y, q, k);
+    }
+
+  gmp_randclear (rstate);
+
+  TMP_FREE;
+  return is_prime;
+}
+
+static int
+millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
+             mpz_srcptr q, unsigned long int k)
+{
+  unsigned long int i;
+
+  mpz_powm (y, x, q, n);
+
+  if (mpz_cmp_ui (y, 1L) == 0 || mpz_cmp (y, nm1) == 0)
+    return 1;
+
+  for (i = 1; i < k; i++)
+    {
+      mpz_powm_ui (y, y, 2L, n);
+      if (mpz_cmp (y, nm1) == 0)
+       return 1;
+      if (mpz_cmp_ui (y, 1L) == 0)
+       return 0;
+    }
+  return 0;
+}
diff --git a/mpz/mod.c b/mpz/mod.c

new file mode 100644 (file)

index 0000000..f7b8411
--- /dev/null
+++ b/mpz/mod.c
@@ -0,0 +1,57 @@
+/* mpz_mod -- The mathematical mod function.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 2001, 2002, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+{
+  mp_size_t divisor_size = divisor->_mp_size;
+  mpz_t temp_divisor;          /* N.B.: lives until function returns! */
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* We need the original value of the divisor after the remainder has been
+     preliminary calculated.  We have to copy it to temporary space if it's
+     the same variable as REM.  */
+  if (rem == divisor)
+    {
+      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+      mpz_set (temp_divisor, divisor);
+      divisor = temp_divisor;
+    }
+
+  mpz_tdiv_r (rem, dividend, divisor);
+
+  if (rem->_mp_size != 0)
+    {
+      if (dividend->_mp_size < 0)
+       {
+         if (divisor->_mp_size < 0)
+           mpz_sub (rem, rem, divisor);
+         else
+           mpz_add (rem, rem, divisor);
+       }
+    }
+
+  TMP_FREE;
+}
diff --git a/mpz/mul.c b/mpz/mul.c

new file mode 100644 (file)

index 0000000..ee49aea
--- /dev/null
+++ b/mpz/mul.c
@@ -0,0 +1,155 @@
+/* mpz_mul -- Multiply two integers.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+void
+#ifndef BERKELEY_MP
+mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else /* BERKELEY_MP */
+mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#endif /* BERKELEY_MP */
+{
+  mp_size_t usize;
+  mp_size_t vsize;
+  mp_size_t wsize;
+  mp_size_t sign_product;
+  mp_ptr up, vp;
+  mp_ptr wp;
+  mp_ptr free_me;
+  size_t free_me_size;
+  mp_limb_t cy_limb;
+  TMP_DECL;
+
+  usize = SIZ (u);
+  vsize = SIZ (v);
+  sign_product = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  if (usize < vsize)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (usize, vsize);
+    }
+
+  if (vsize == 0)
+    {
+      SIZ(w) = 0;
+      return;
+    }
+
+#if HAVE_NATIVE_mpn_mul_2
+  if (vsize <= 2)
+    {
+      MPZ_REALLOC (w, usize+vsize);
+      wp = PTR(w);
+      if (vsize == 1)
+        cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
+      else
+        {
+          cy_limb = mpn_mul_2 (wp, PTR(u), usize, PTR(v));
+          usize++;
+        }
+      wp[usize] = cy_limb;
+      usize += (cy_limb != 0);
+      SIZ(w) = (sign_product >= 0 ? usize : -usize);
+      return;
+    }
+#else
+  if (vsize == 1)
+    {
+      MPZ_REALLOC (w, usize+1);
+      wp = PTR(w);
+      cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
+      wp[usize] = cy_limb;
+      usize += (cy_limb != 0);
+      SIZ(w) = (sign_product >= 0 ? usize : -usize);
+      return;
+    }
+#endif
+
+  TMP_MARK;
+  free_me = NULL;
+  up = PTR(u);
+  vp = PTR(v);
+  wp = PTR(w);
+
+  /* Ensure W has space enough to store the result.  */
+  wsize = usize + vsize;
+  if (ALLOC(w) < wsize)
+    {
+      if (wp == up || wp == vp)
+       {
+         free_me = wp;
+         free_me_size = ALLOC(w);
+       }
+      else
+       (*__gmp_free_func) (wp, ALLOC(w) * BYTES_PER_MP_LIMB);
+
+      ALLOC(w) = wsize;
+      wp = (mp_ptr) (*__gmp_allocate_func) (wsize * BYTES_PER_MP_LIMB);
+      PTR(w) = wp;
+    }
+  else
+    {
+      /* Make U and V not overlap with W.  */
+      if (wp == up)
+       {
+         /* W and U are identical.  Allocate temporary space for U.  */
+         up = TMP_ALLOC_LIMBS (usize);
+         /* Is V identical too?  Keep it identical with U.  */
+         if (wp == vp)
+           vp = up;
+         /* Copy to the temporary space.  */
+         MPN_COPY (up, wp, usize);
+       }
+      else if (wp == vp)
+       {
+         /* W and V are identical.  Allocate temporary space for V.  */
+         vp = TMP_ALLOC_LIMBS (vsize);
+         /* Copy to the temporary space.  */
+         MPN_COPY (vp, wp, vsize);
+       }
+    }
+
+  if (up == vp)
+    {
+      mpn_sqr (wp, up, usize);
+      cy_limb = wp[wsize - 1];
+    }
+  else
+    {
+      cy_limb = mpn_mul (wp, up, usize, vp, vsize);
+    }
+
+  wsize -= cy_limb == 0;
+
+  SIZ(w) = sign_product < 0 ? -wsize : wsize;
+  if (free_me != NULL)
+    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+  TMP_FREE;
+}
diff --git a/mpz/mul_2exp.c b/mpz/mul_2exp.c

new file mode 100644 (file)

index 0000000..a152181
--- /dev/null
+++ b/mpz/mul_2exp.c
@@ -0,0 +1,67 @@
+/* mpz_mul_2exp -- Multiply a bignum by 2**CNT
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  mp_size_t usize = u->_mp_size;
+  mp_size_t abs_usize = ABS (usize);
+  mp_size_t wsize;
+  mp_size_t limb_cnt;
+  mp_ptr wp;
+  mp_limb_t wlimb;
+
+  if (usize == 0)
+    {
+      w->_mp_size = 0;
+      return;
+    }
+
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  wsize = abs_usize + limb_cnt + 1;
+  if (w->_mp_alloc < wsize)
+    _mpz_realloc (w, wsize);
+
+  wp = w->_mp_d;
+  wsize = abs_usize + limb_cnt;
+
+  cnt %= GMP_NUMB_BITS;
+  if (cnt != 0)
+    {
+      wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);
+      if (wlimb != 0)
+       {
+         wp[wsize] = wlimb;
+         wsize++;
+       }
+    }
+  else
+    {
+      MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);
+    }
+
+  /* Zero all whole limbs at low end.  Do it here and not before calling
+     mpn_lshift, not to lose for U == W.  */
+  MPN_ZERO (wp, limb_cnt);
+
+  w->_mp_size = usize >= 0 ? wsize : -wsize;
+}
diff --git a/mpz/mul_i.h b/mpz/mul_i.h

new file mode 100644 (file)

index 0000000..2de3fe0
--- /dev/null
+++ b/mpz/mul_i.h
@@ -0,0 +1,96 @@
+/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to
+   MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2008 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_mul_si
+#define FUNCTION               mpz_mul_si
+#define MULTIPLICAND_UNSIGNED
+#define MULTIPLICAND_ABS(x)    ABS_CAST(unsigned long, (x))
+#endif
+
+#ifdef OPERATION_mul_ui
+#define FUNCTION               mpz_mul_ui
+#define MULTIPLICAND_UNSIGNED  unsigned
+#define MULTIPLICAND_ABS(x)    x
+#endif
+
+#ifndef FUNCTION
+Error, error, unrecognised OPERATION
+#endif
+
+
+void
+FUNCTION (mpz_ptr prod, mpz_srcptr mult,
+          MULTIPLICAND_UNSIGNED long int small_mult)
+{
+  mp_size_t size = SIZ(mult);
+  mp_size_t sign_product = size;
+  mp_limb_t sml;
+  mp_limb_t cy;
+  mp_ptr pp;
+
+  if (size == 0 || small_mult == 0)
+    {
+      SIZ(prod) = 0;
+      return;
+    }
+
+  size = ABS (size);
+
+  sml = MULTIPLICAND_ABS (small_mult);
+
+  if (sml <= GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (prod, size + 1);
+      pp = PTR(prod);
+      cy = mpn_mul_1 (pp, PTR(mult), size, sml & GMP_NUMB_MASK);
+      pp[size] = cy;
+      size += cy != 0;
+    }
+#if GMP_NAIL_BITS != 0
+  else
+    {
+      /* Operand too large for the current nails size.  Use temporary for
+        intermediate products, to allow prod and mult being identical.  */
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+
+      tp = TMP_ALLOC_LIMBS (size + 2);
+
+      cy = mpn_mul_1 (tp, PTR(mult), size, sml & GMP_NUMB_MASK);
+      tp[size] = cy;
+      cy = mpn_addmul_1 (tp + 1, PTR(mult), size, sml >> GMP_NUMB_BITS);
+      tp[size + 1] = cy;
+      size += 2;
+      MPN_NORMALIZE_NOT_ZERO (tp, size); /* too general, need to trim one or two limb */
+      MPZ_REALLOC (prod, size);
+      pp = PTR(prod);
+      MPN_COPY (pp, tp, size);
+      TMP_FREE;
+    }
+#endif
+
+  SIZ(prod) = ((sign_product < 0) ^ (small_mult < 0)) ? -size : size;
+}
diff --git a/mpz/mul_si.c b/mpz/mul_si.c

new file mode 100644 (file)

index 0000000..86d5d31
--- /dev/null
+++ b/mpz/mul_si.c
@@ -0,0 +1,23 @@
+/* mpz_mul_si (product, multiplier, small_multiplicand) -- Set PRODUCT to
+   MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_mul_si
+#include "mul_i.h"
diff --git a/mpz/mul_ui.c b/mpz/mul_ui.c

new file mode 100644 (file)

index 0000000..9630351
--- /dev/null
+++ b/mpz/mul_ui.c
@@ -0,0 +1,23 @@
+/* mpz_mul_ui (product, multiplier, small_multiplicand) -- Set PRODUCT to
+   MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_mul_ui
+#include "mul_i.h"
diff --git a/mpz/n_pow_ui.c b/mpz/n_pow_ui.c

new file mode 100644 (file)

index 0000000..6d527c3
--- /dev/null
+++ b/mpz/n_pow_ui.c
@@ -0,0 +1,523 @@
+/* mpz_n_pow_ui -- mpn raised to ulong.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+/* Use this to test the mul_2 code on a CPU without a native version of that
+   routine.  */
+#if 0
+#define mpn_mul_2  refmpn_mul_2
+#define HAVE_NATIVE_mpn_mul_2  1
+#endif
+
+
+/* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code.
+   ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on
+   bsize==2 or >2, but separating that isn't easy because there's shared
+   code both before and after (the size calculations and the powers of 2
+   handling).
+
+   Alternatives:
+
+   It would work to just use the mpn_mul powering loop for 1 and 2 limb
+   bases, but the current separate loop allows mul_1 and mul_2 to be done
+   in-place, which might help cache locality a bit.  If mpn_mul was relaxed
+   to allow source==dest when vn==1 or 2 then some pointer twiddling might
+   let us get the same effect in one loop.
+
+   The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't
+   form the biggest possible power of b that fits, only the biggest power of
+   2 power, ie. b^(2^n).  It'd be possible to choose a bigger power, perhaps
+   using mp_bases[b].big_base for small b, and thereby get better value
+   from mpn_mul_1 or mpn_mul_2 in the bignum powering.  It's felt that doing
+   so would be more complicated than it's worth, and could well end up being
+   a slowdown for small e.  For big e on the other hand the algorithm is
+   dominated by mpn_sqr so there wouldn't much of a saving.  The current
+   code can be viewed as simply doing the first few steps of the powering in
+   a single or double limb where possible.
+
+   If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary
+   copy made of b is unnecessary.  We could just use the old alloc'ed block
+   and free it at the end.  But arranging this seems like a lot more trouble
+   than it's worth.  */
+
+
+/* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in
+   a limb without overflowing.
+   FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */
+
+#define GMP_NUMB_HALFMAX  (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1)
+
+
+/* The following are for convenience, they update the size and check the
+   alloc.  */
+
+#define MPN_SQR(dst, alloc, src, size)          \
+  do {                                          \
+    ASSERT (2*(size) <= (alloc));               \
+    mpn_sqr (dst, src, size);                   \
+    (size) *= 2;                                \
+    (size) -= ((dst)[(size)-1] == 0);           \
+  } while (0)
+
+#define MPN_MUL(dst, alloc, src, size, src2, size2)     \
+  do {                                                  \
+    mp_limb_t  cy;                                      \
+    ASSERT ((size) + (size2) <= (alloc));               \
+    cy = mpn_mul (dst, src, size, src2, size2);         \
+    (size) += (size2) - (cy == 0);                      \
+  } while (0)
+
+#define MPN_MUL_2(ptr, size, alloc, mult)       \
+  do {                                          \
+    mp_limb_t  cy;                              \
+    ASSERT ((size)+2 <= (alloc));               \
+    cy = mpn_mul_2 (ptr, ptr, size, mult);      \
+    (size)++;                                   \
+    (ptr)[(size)] = cy;                         \
+    (size) += (cy != 0);                        \
+  } while (0)
+
+#define MPN_MUL_1(ptr, size, alloc, limb)       \
+  do {                                          \
+    mp_limb_t  cy;                              \
+    ASSERT ((size)+1 <= (alloc));               \
+    cy = mpn_mul_1 (ptr, ptr, size, limb);      \
+    (ptr)[size] = cy;                           \
+    (size) += (cy != 0);                        \
+  } while (0)
+
+#define MPN_LSHIFT(ptr, size, alloc, shift)     \
+  do {                                          \
+    mp_limb_t  cy;                              \
+    ASSERT ((size)+1 <= (alloc));               \
+    cy = mpn_lshift (ptr, ptr, size, shift);    \
+    (ptr)[size] = cy;                           \
+    (size) += (cy != 0);                        \
+  } while (0)
+
+#define MPN_RSHIFT_OR_COPY(dst, src, size, shift)       \
+  do {                                                  \
+    if ((shift) == 0)                                   \
+      MPN_COPY (dst, src, size);                        \
+    else                                                \
+      {                                                 \
+        mpn_rshift (dst, src, size, shift);             \
+        (size) -= ((dst)[(size)-1] == 0);               \
+      }                                                 \
+  } while (0)
+
+
+/* ralloc and talloc are only wanted for ASSERTs, after the initial space
+   allocations.  Avoid writing values to them in a normal build, to ensure
+   the compiler lets them go dead.  gcc already figures this out itself
+   actually.  */
+
+#define SWAP_RP_TP                                      \
+  do {                                                  \
+    MP_PTR_SWAP (rp, tp);                               \
+    ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc));      \
+  } while (0)
+
+
+void
+mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
+{
+  mp_ptr         rp;
+  mp_size_t      rtwos_limbs, ralloc, rsize;
+  int            rneg, i, cnt, btwos, r_bp_overlap;
+  mp_limb_t      blimb, rl;
+  mp_bitcnt_t    rtwos_bits;
+#if HAVE_NATIVE_mpn_mul_2
+  mp_limb_t      blimb_low, rl_high;
+#else
+  mp_limb_t      b_twolimbs[2];
+#endif
+  TMP_DECL;
+
+  TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n",
+                 PTR(r), bp, bsize, e, e);
+         mpn_trace ("b", bp, bsize));
+
+  ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0);
+  ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ABSIZ(r), bp, bsize));
+
+  /* b^0 == 1, including 0^0 == 1 */
+  if (e == 0)
+    {
+      PTR(r)[0] = 1;
+      SIZ(r) = 1;
+      return;
+    }
+
+  /* 0^e == 0 apart from 0^0 above */
+  if (bsize == 0)
+    {
+      SIZ(r) = 0;
+      return;
+    }
+
+  /* Sign of the final result. */
+  rneg = (bsize < 0 && (e & 1) != 0);
+  bsize = ABS (bsize);
+  TRACE (printf ("rneg %d\n", rneg));
+
+  r_bp_overlap = (PTR(r) == bp);
+
+  /* Strip low zero limbs from b. */
+  rtwos_limbs = 0;
+  for (blimb = *bp; blimb == 0; blimb = *++bp)
+    {
+      rtwos_limbs += e;
+      bsize--; ASSERT (bsize >= 1);
+    }
+  TRACE (printf ("trailing zero rtwos_limbs=%ld\n", rtwos_limbs));
+
+  /* Strip low zero bits from b. */
+  count_trailing_zeros (btwos, blimb);
+  blimb >>= btwos;
+  rtwos_bits = e * btwos;
+  rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;
+  rtwos_bits %= GMP_NUMB_BITS;
+  TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n",
+                 btwos, rtwos_limbs, rtwos_bits));
+
+  TMP_MARK;
+
+  rl = 1;
+#if HAVE_NATIVE_mpn_mul_2
+  rl_high = 0;
+#endif
+
+  if (bsize == 1)
+    {
+    bsize_1:
+      /* Power up as far as possible within blimb.  We start here with e!=0,
+         but if e is small then we might reach e==0 and the whole b^e in rl.
+         Notice this code works when blimb==1 too, reaching e==0.  */
+
+      while (blimb <= GMP_NUMB_HALFMAX)
+        {
+          TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
+                         e, blimb, rl));
+          ASSERT (e != 0);
+          if ((e & 1) != 0)
+            rl *= blimb;
+          e >>= 1;
+          if (e == 0)
+            goto got_rl;
+          blimb *= blimb;
+        }
+
+#if HAVE_NATIVE_mpn_mul_2
+      TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n",
+                     e, blimb, rl));
+
+      /* Can power b once more into blimb:blimb_low */
+      bsize = 2;
+      ASSERT (e != 0);
+      if ((e & 1) != 0)
+       {
+         umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS);
+         rl >>= GMP_NAIL_BITS;
+       }
+      e >>= 1;
+      umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS);
+      blimb_low >>= GMP_NAIL_BITS;
+
+    got_rl:
+      TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n",
+                     e, blimb, blimb_low, rl_high, rl));
+
+      /* Combine left-over rtwos_bits into rl_high:rl to be handled by the
+         final mul_1 or mul_2 rather than a separate lshift.
+         - rl_high:rl mustn't be 1 (since then there's no final mul)
+         - rl_high mustn't overflow
+         - rl_high mustn't change to non-zero, since mul_1+lshift is
+         probably faster than mul_2 (FIXME: is this true?)  */
+
+      if (rtwos_bits != 0
+          && ! (rl_high == 0 && rl == 1)
+          && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+        {
+          mp_limb_t  new_rl_high = (rl_high << rtwos_bits)
+            | (rl >> (GMP_NUMB_BITS-rtwos_bits));
+          if (! (rl_high == 0 && new_rl_high != 0))
+            {
+              rl_high = new_rl_high;
+              rl <<= rtwos_bits;
+              rtwos_bits = 0;
+              TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
+                             rl_high, rl));
+            }
+        }
+#else
+    got_rl:
+      TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n",
+                     e, blimb, rl));
+
+      /* Combine left-over rtwos_bits into rl to be handled by the final
+         mul_1 rather than a separate lshift.
+         - rl mustn't be 1 (since then there's no final mul)
+         - rl mustn't overflow  */
+
+      if (rtwos_bits != 0
+          && rl != 1
+          && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+        {
+          rl <<= rtwos_bits;
+          rtwos_bits = 0;
+          TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
+        }
+#endif
+    }
+  else if (bsize == 2)
+    {
+      mp_limb_t  bsecond = bp[1];
+      if (btwos != 0)
+        blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+      bsecond >>= btwos;
+      if (bsecond == 0)
+        {
+          /* Two limbs became one after rshift. */
+          bsize = 1;
+          goto bsize_1;
+        }
+
+      TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb));
+#if HAVE_NATIVE_mpn_mul_2
+      blimb_low = blimb;
+#else
+      bp = b_twolimbs;
+      b_twolimbs[0] = blimb;
+      b_twolimbs[1] = bsecond;
+#endif
+      blimb = bsecond;
+    }
+  else
+    {
+      if (r_bp_overlap || btwos != 0)
+        {
+          mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
+          MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
+          bp = tp;
+          TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
+        }
+#if HAVE_NATIVE_mpn_mul_2
+      /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */
+      blimb_low = bp[0];
+#endif
+      blimb = bp[bsize-1];
+
+      TRACE (printf ("big bsize=%ld  ", bsize);
+             mpn_trace ("b", bp, bsize));
+    }
+
+  /* At this point blimb is the most significant limb of the base to use.
+
+     Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1
+     limb to round up the division; +1 for multiplies all using an extra
+     limb over the true size; +2 for rl at the end; +1 for lshift at the
+     end.
+
+     The size calculation here is reasonably accurate.  The base is at least
+     half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits
+     when it will power up as just over 16, an overestimate of 17/16 =
+     6.25%.  For a 64-bit limb it's half that.
+
+     If e==0 then blimb won't be anything useful (though it will be
+     non-zero), but that doesn't matter since we just end up with ralloc==5,
+     and that's fine for 2 limbs of rl and 1 of lshift.  */
+
+  ASSERT (blimb != 0);
+  count_leading_zeros (cnt, blimb);
+  ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5;
+  TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n",
+                 ralloc, bsize, blimb, cnt));
+  MPZ_REALLOC (r, ralloc + rtwos_limbs);
+  rp = PTR(r);
+
+  /* Low zero limbs resulting from powers of 2. */
+  MPN_ZERO (rp, rtwos_limbs);
+  rp += rtwos_limbs;
+
+  if (e == 0)
+    {
+      /* Any e==0 other than via bsize==1 or bsize==2 is covered at the
+         start. */
+      rp[0] = rl;
+      rsize = 1;
+#if HAVE_NATIVE_mpn_mul_2
+      rp[1] = rl_high;
+      rsize += (rl_high != 0);
+#endif
+      ASSERT (rp[rsize-1] != 0);
+    }
+  else
+    {
+      mp_ptr     tp;
+      mp_size_t  talloc;
+
+      /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the
+         low bit of e is zero, tp only has to hold the second last power
+         step, which is half the size of the final result.  There's no need
+         to round up the divide by 2, since ralloc includes a +2 for rl
+         which not needed by tp.  In the mpn_mul loop when the low bit of e
+         is 1, tp must hold nearly the full result, so just size it the same
+         as rp.  */
+
+      talloc = ralloc;
+#if HAVE_NATIVE_mpn_mul_2
+      if (bsize <= 2 || (e & 1) == 0)
+        talloc /= 2;
+#else
+      if (bsize <= 1 || (e & 1) == 0)
+        talloc /= 2;
+#endif
+      TRACE (printf ("talloc %ld\n", talloc));
+      tp = TMP_ALLOC_LIMBS (talloc);
+
+      /* Go from high to low over the bits of e, starting with i pointing at
+         the bit below the highest 1 (which will mean i==-1 if e==1).  */
+      count_leading_zeros (cnt, e);
+      i = GMP_LIMB_BITS - cnt - 2;
+
+#if HAVE_NATIVE_mpn_mul_2
+      if (bsize <= 2)
+        {
+          mp_limb_t  mult[2];
+
+          /* Any bsize==1 will have been powered above to be two limbs. */
+          ASSERT (bsize == 2);
+          ASSERT (blimb != 0);
+
+          /* Arrange the final result ends up in r, not in the temp space */
+          if ((i & 1) == 0)
+            SWAP_RP_TP;
+
+          rp[0] = blimb_low;
+          rp[1] = blimb;
+          rsize = 2;
+
+          mult[0] = blimb_low;
+          mult[1] = blimb;
+
+          for ( ; i >= 0; i--)
+            {
+              TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+                             i, e, rsize, ralloc, talloc);
+                     mpn_trace ("r", rp, rsize));
+
+              MPN_SQR (tp, talloc, rp, rsize);
+              SWAP_RP_TP;
+              if ((e & (1L << i)) != 0)
+                MPN_MUL_2 (rp, rsize, ralloc, mult);
+            }
+
+          TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
+          if (rl_high != 0)
+            {
+              mult[0] = rl;
+              mult[1] = rl_high;
+              MPN_MUL_2 (rp, rsize, ralloc, mult);
+            }
+          else if (rl != 1)
+            MPN_MUL_1 (rp, rsize, ralloc, rl);
+        }
+#else
+      if (bsize == 1)
+        {
+          /* Arrange the final result ends up in r, not in the temp space */
+          if ((i & 1) == 0)
+            SWAP_RP_TP;
+
+          rp[0] = blimb;
+          rsize = 1;
+
+          for ( ; i >= 0; i--)
+            {
+              TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+                             i, e, rsize, ralloc, talloc);
+                     mpn_trace ("r", rp, rsize));
+
+              MPN_SQR (tp, talloc, rp, rsize);
+              SWAP_RP_TP;
+              if ((e & (1L << i)) != 0)
+                MPN_MUL_1 (rp, rsize, ralloc, blimb);
+            }
+
+          TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
+          if (rl != 1)
+            MPN_MUL_1 (rp, rsize, ralloc, rl);
+        }
+#endif
+      else
+        {
+          int  parity;
+
+          /* Arrange the final result ends up in r, not in the temp space */
+          ULONG_PARITY (parity, e);
+          if (((parity ^ i) & 1) != 0)
+            SWAP_RP_TP;
+
+          MPN_COPY (rp, bp, bsize);
+          rsize = bsize;
+
+          for ( ; i >= 0; i--)
+            {
+              TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+                             i, e, rsize, ralloc, talloc);
+                     mpn_trace ("r", rp, rsize));
+
+              MPN_SQR (tp, talloc, rp, rsize);
+              SWAP_RP_TP;
+              if ((e & (1L << i)) != 0)
+                {
+                  MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
+                  SWAP_RP_TP;
+                }
+            }
+        }
+    }
+
+  ASSERT (rp == PTR(r) + rtwos_limbs);
+  TRACE (mpn_trace ("end loop r", rp, rsize));
+  TMP_FREE;
+
+  /* Apply any partial limb factors of 2. */
+  if (rtwos_bits != 0)
+    {
+      MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits);
+      TRACE (mpn_trace ("lshift r", rp, rsize));
+    }
+
+  rsize += rtwos_limbs;
+  SIZ(r) = (rneg ? -rsize : rsize);
+}
diff --git a/mpz/neg.c b/mpz/neg.c

new file mode 100644 (file)

index 0000000..6d0f8a9
--- /dev/null
+++ b/mpz/neg.c
@@ -0,0 +1,47 @@
+/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.
+
+Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_neg 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_neg (mpz_ptr w, mpz_srcptr u)
+{
+  mp_ptr wp, up;
+  mp_size_t usize, size;
+
+  usize = u->_mp_size;
+
+  if (u != w)
+    {
+      size = ABS (usize);
+
+      if (w->_mp_alloc < size)
+       _mpz_realloc (w, size);
+
+      wp = w->_mp_d;
+      up = u->_mp_d;
+
+      MPN_COPY (wp, up, size);
+    }
+
+  w->_mp_size = -usize;
+}
diff --git a/mpz/nextprime.c b/mpz/nextprime.c

new file mode 100644 (file)

index 0000000..9e68ea8
--- /dev/null
+++ b/mpz/nextprime.c
@@ -0,0 +1,120 @@
+/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
+
+Copyright 1999, 2000, 2001, 2008, 2009 Free Software Foundation, Inc.
+
+Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static const unsigned char primegap[] =
+{
+  2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
+  2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
+  4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
+  12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8,
+  6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6,
+  6,14,4,6,6,8,6,12
+};
+
+#define NUMBER_OF_PRIMES 167
+
+void
+mpz_nextprime (mpz_ptr p, mpz_srcptr n)
+{
+  unsigned short *moduli;
+  unsigned long difference;
+  int i;
+  unsigned prime_limit;
+  unsigned long prime;
+  int cnt;
+  mp_size_t pn;
+  mp_bitcnt_t nbits;
+  unsigned incr;
+  TMP_SDECL;
+
+  /* First handle tiny numbers */
+  if (mpz_cmp_ui (n, 2) < 0)
+    {
+      mpz_set_ui (p, 2);
+      return;
+    }
+  mpz_add_ui (p, n, 1);
+  mpz_setbit (p, 0);
+
+  if (mpz_cmp_ui (p, 7) <= 0)
+    return;
+
+  pn = SIZ(p);
+  count_leading_zeros (cnt, PTR(p)[pn - 1]);
+  nbits = pn * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);
+  if (nbits / 2 >= NUMBER_OF_PRIMES)
+    prime_limit = NUMBER_OF_PRIMES - 1;
+  else
+    prime_limit = nbits / 2;
+
+  TMP_SMARK;
+
+  /* Compute residues modulo small odd primes */
+  moduli = TMP_SALLOC_TYPE (prime_limit * sizeof moduli[0], unsigned short);
+
+  for (;;)
+    {
+      /* FIXME: Compute lazily? */
+      prime = 3;
+      for (i = 0; i < prime_limit; i++)
+       {
+         moduli[i] = mpz_fdiv_ui (p, prime);
+         prime += primegap[i];
+       }
+
+#define INCR_LIMIT 0x10000     /* deep science */
+
+      for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
+       {
+         /* First check residues */
+         prime = 3;
+         for (i = 0; i < prime_limit; i++)
+           {
+             unsigned r;
+             /* FIXME: Reduce moduli + incr and store back, to allow for
+                division-free reductions.  Alternatively, table primes[]'s
+                inverses (mod 2^16).  */
+             r = (moduli[i] + incr) % prime;
+             prime += primegap[i];
+
+             if (r == 0)
+               goto next;
+           }
+
+         mpz_add_ui (p, p, difference);
+         difference = 0;
+
+         /* Miller-Rabin test */
+         if (mpz_millerrabin (p, 25))
+           goto done;
+       next:;
+         incr += 2;
+       }
+      mpz_add_ui (p, p, difference);
+      difference = 0;
+    }
+ done:
+  TMP_SFREE;
+}
diff --git a/mpz/out_raw.c b/mpz/out_raw.c

new file mode 100644 (file)

index 0000000..3eb9fab
--- /dev/null
+++ b/mpz/out_raw.c
@@ -0,0 +1,162 @@
+/* mpz_out_raw -- write an mpz_t in raw format.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* HTON_LIMB_STORE takes a normal host byte order limb and stores it as
+   network byte order (ie. big endian). */
+
+#if HAVE_LIMB_BIG_ENDIAN
+#define HTON_LIMB_STORE(dst, limb)  do { *(dst) = (limb); } while (0)
+#endif
+
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HTON_LIMB_STORE(dst, limb)  BSWAP_LIMB_STORE (dst, limb)
+#endif
+
+#ifndef HTON_LIMB_STORE
+#define HTON_LIMB_STORE(dst, limb)                                      \
+  do {                                                                  \
+    mp_limb_t  __limb = (limb);                                         \
+    char      *__p = (char *) (dst);                                    \
+    int        __i;                                                     \
+    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)                       \
+      __p[__i] = (char) (__limb >> ((BYTES_PER_MP_LIMB-1 - __i) * 8));  \
+  } while (0)
+#endif
+
+
+size_t
+mpz_out_raw (FILE *fp, mpz_srcptr x)
+{
+  mp_size_t   xsize, abs_xsize, bytes, i;
+  mp_srcptr   xp;
+  char        *tp, *bp;
+  mp_limb_t   xlimb;
+  int         zeros;
+  size_t      tsize, ssize;
+
+  xsize = SIZ(x);
+  abs_xsize = ABS (xsize);
+  bytes = (abs_xsize * GMP_NUMB_BITS + 7) / 8;
+  tsize = ROUND_UP_MULTIPLE ((unsigned) 4, BYTES_PER_MP_LIMB) + bytes;
+
+  tp = __GMP_ALLOCATE_FUNC_TYPE (tsize, char);
+  bp = tp + ROUND_UP_MULTIPLE ((unsigned) 4, BYTES_PER_MP_LIMB);
+
+  if (bytes != 0)
+    {
+      bp += bytes;
+      xp = PTR (x);
+      i = abs_xsize;
+
+      if (GMP_NAIL_BITS == 0)
+        {
+          /* reverse limb order, and byte swap if necessary */
+#ifdef _CRAY
+          _Pragma ("_CRI ivdep");
+#endif
+          do
+            {
+              bp -= BYTES_PER_MP_LIMB;
+              xlimb = *xp;
+              HTON_LIMB_STORE ((mp_ptr) bp, xlimb);
+              xp++;
+            }
+          while (--i > 0);
+
+          /* strip high zero bytes (without fetching from bp) */
+          count_leading_zeros (zeros, xlimb);
+          zeros /= 8;
+          bp += zeros;
+          bytes -= zeros;
+        }
+      else
+        {
+          mp_limb_t  new_xlimb;
+          int        bits;
+          ASSERT_CODE (char *bp_orig = bp - bytes);
+
+          ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);
+
+          bits = 0;
+          xlimb = 0;
+          for (;;)
+            {
+              while (bits >= 8)
+                {
+                  ASSERT (bp > bp_orig);
+                  *--bp = xlimb & 0xFF;
+                  xlimb >>= 8;
+                  bits -= 8;
+                }
+
+              if (i == 0)
+                break;
+
+              new_xlimb = *xp++;
+              i--;
+              ASSERT (bp > bp_orig);
+              *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;
+              xlimb = new_xlimb >> (8 - bits);
+              bits += GMP_NUMB_BITS - 8;
+            }
+
+          if (bits != 0)
+            {
+              ASSERT (bp > bp_orig);
+              *--bp = xlimb;
+            }
+
+          ASSERT (bp == bp_orig);
+          while (*bp == 0)
+            {
+              bp++;
+              bytes--;
+            }
+        }
+    }
+
+  /* total bytes to be written */
+  ssize = 4 + bytes;
+
+  /* twos complement negative for the size value */
+  bytes = (xsize >= 0 ? bytes : -bytes);
+
+  /* so we don't rely on sign extension in ">>" */
+  ASSERT_ALWAYS (sizeof (bytes) >= 4);
+
+  bp[-4] = bytes >> 24;
+  bp[-3] = bytes >> 16;
+  bp[-2] = bytes >> 8;
+  bp[-1] = bytes;
+  bp -= 4;
+
+  if (fp == 0)
+    fp = stdout;
+  if (fwrite (bp, ssize, 1, fp) != 1)
+    ssize = 0;
+
+  (*__gmp_free_func) (tp, tsize);
+  return ssize;
+}
diff --git a/mpz/out_str.c b/mpz/out_str.c

new file mode 100644 (file)

index 0000000..8643db8
--- /dev/null
+++ b/mpz/out_str.c
@@ -0,0 +1,105 @@
+/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.
+   integer INTEGER in base BASE.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+mpz_out_str (FILE *stream, int base, mpz_srcptr x)
+{
+  mp_ptr xp;
+  mp_size_t x_size = x->_mp_size;
+  unsigned char *str;
+  size_t str_size;
+  size_t i;
+  size_t written;
+  char *num_to_text;
+  TMP_DECL;
+
+  if (stream == 0)
+    stream = stdout;
+
+  if (base >= 0)
+    {
+      num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+      if (base == 0)
+       base = 10;
+      else if (base > 36)
+       {
+         num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+         if (base > 62)
+           return 0;
+       }
+    }
+  else
+    {
+      base = -base;
+      num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    }
+
+  if (x_size == 0)
+    {
+      fputc ('0', stream);
+      return ferror (stream) ? 0 : 1;
+    }
+
+  written = 0;
+
+  if (x_size < 0)
+    {
+      fputc ('-', stream);
+      x_size = -x_size;
+      written = 1;
+    }
+
+  TMP_MARK;
+  str_size = ((size_t) (x_size * GMP_LIMB_BITS
+                       * mp_bases[base].chars_per_bit_exactly)) + 3;
+  str = (unsigned char *) TMP_ALLOC (str_size);
+
+  /* Move the number to convert into temporary space, since mpn_get_str
+     clobbers its argument + needs one extra high limb....  */
+  xp = TMP_ALLOC_LIMBS (x_size + 1);
+  MPN_COPY (xp, x->_mp_d, x_size);
+
+  str_size = mpn_get_str (str, base, xp, x_size);
+
+  /* mpn_get_str might make some leading zeros.  Skip them.  */
+  while (*str == 0)
+    {
+      str_size--;
+      str++;
+    }
+
+  /* Translate to printable chars.  */
+  for (i = 0; i < str_size; i++)
+    str[i] = num_to_text[str[i]];
+  str[str_size] = 0;
+
+  {
+    size_t fwret;
+    fwret = fwrite ((char *) str, 1, str_size, stream);
+    written += fwret;
+  }
+
+  TMP_FREE;
+  return ferror (stream) ? 0 : written;
+}
diff --git a/mpz/perfpow.c b/mpz/perfpow.c

new file mode 100644 (file)

index 0000000..81c8884
--- /dev/null
+++ b/mpz/perfpow.c
@@ -0,0 +1,29 @@
+/* mpz_perfect_power_p(arg) -- Return non-zero if ARG is a perfect power,
+   zero otherwise.
+
+Copyright 1998, 1999, 2000, 2001, 2005, 2008, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpz_perfect_power_p (mpz_srcptr u)
+{
+  return mpn_perfect_power_p (PTR (u), SIZ (u));
+}
diff --git a/mpz/perfsqr.c b/mpz/perfsqr.c

new file mode 100644 (file)

index 0000000..b4853b5
--- /dev/null
+++ b/mpz/perfsqr.c
@@ -0,0 +1,24 @@
+/* mpz_perfect_square_p(arg) -- Return non-zero if ARG is a perfect square,
+   zero otherwise.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_perfect_square_p 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/popcount.c b/mpz/popcount.c

new file mode 100644 (file)

index 0000000..c8d9bec
--- /dev/null
+++ b/mpz/popcount.c
@@ -0,0 +1,24 @@
+/* mpz_popcount(mpz_ptr op) -- Population count of OP.  If the operand is
+   negative, return ~0 (a novel representation of infinity).
+
+Copyright 1994, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_popcount 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/pow_ui.c b/mpz/pow_ui.c

new file mode 100644 (file)

index 0000000..ae0307d
--- /dev/null
+++ b/mpz/pow_ui.c
@@ -0,0 +1,42 @@
+/* mpz_pow_ui -- mpz raised to ulong.
+
+Copyright 2001, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_pow_ui (mpz_ptr r, mpz_srcptr b, unsigned long int e)
+{
+  /* We test some small exponents here, mainly to avoid the overhead of
+     mpz_n_pow_ui for small bases and exponents.  */
+  switch (e)
+    {
+    case 0:
+      mpz_set_ui (r, 1);
+      break;
+    case 1:
+      mpz_set (r, b);
+      break;
+    case 2:
+      mpz_mul (r, b, b);
+      break;
+    default:
+      mpz_n_pow_ui (r, PTR(b), (mp_size_t) SIZ(b), e);
+    }
+}
diff --git a/mpz/powm.c b/mpz/powm.c

new file mode 100644 (file)

index 0000000..29b0132
--- /dev/null
+++ b/mpz/powm.c
@@ -0,0 +1,279 @@
+/* mpz_powm(res,base,exp,mod) -- Set R to (U^E) mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+/* TODO
+
+ * Improve handling of buffers.  It is pretty ugly now.
+
+ * For even moduli, we compute a binvert of its odd part both here and in
+   mpn_powm.  How can we avoid this recomputation?
+*/
+
+/*
+  b ^ e mod m   res
+  0   0     0    ?
+  0   e     0    ?
+  0   0     m    ?
+  0   e     m    0
+  b   0     0    ?
+  b   e     0    ?
+  b   0     m    1 mod m
+  b   e     m    b^e mod m
+*/
+
+#define HANDLE_NEGATIVE_EXPONENT 1
+
+void
+#ifndef BERKELEY_MP
+mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
+#else /* BERKELEY_MP */
+pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
+#endif /* BERKELEY_MP */
+{
+  mp_size_t n, nodd, ncnt;
+  int cnt;
+  mp_ptr rp, tp;
+  mp_srcptr bp, ep, mp;
+  mp_size_t rn, bn, es, en, itch;
+  TMP_DECL;
+
+  n = ABSIZ(m);
+  if (n == 0)
+    DIVIDE_BY_ZERO;
+
+  mp = PTR(m);
+
+  TMP_MARK;
+
+  es = SIZ(e);
+  if (UNLIKELY (es <= 0))
+    {
+      mpz_t new_b;
+      if (es == 0)
+       {
+         /* b^0 mod m,  b is anything and m is non-zero.
+            Result is 1 mod m, i.e., 1 or 0 depending on if m = 1.  */
+         SIZ(r) = n != 1 || mp[0] != 1;
+         PTR(r)[0] = 1;
+         TMP_FREE;     /* we haven't really allocated anything here */
+         return;
+       }
+#if HANDLE_NEGATIVE_EXPONENT
+      MPZ_TMP_INIT (new_b, n + 1);
+
+      if (! mpz_invert (new_b, b, m))
+       DIVIDE_BY_ZERO;
+      b = new_b;
+      es = -es;
+#else
+      DIVIDE_BY_ZERO;
+#endif
+    }
+  en = es;
+
+  bn = ABSIZ(b);
+
+  if (UNLIKELY (bn == 0))
+    {
+      SIZ(r) = 0;
+      TMP_FREE;
+      return;
+    }
+
+  ep = PTR(e);
+
+  /* Handle (b^1 mod m) early, since mpn_pow* do not handle that case.  */
+  if (UNLIKELY (en == 1 && ep[0] == 1))
+    {
+      rp = TMP_ALLOC_LIMBS (n);
+      bp = PTR(b);
+      if (bn >= n)
+       {
+         mp_ptr qp = TMP_ALLOC_LIMBS (bn - n + 1);
+         mpn_tdiv_qr (qp, rp, 0L, bp, bn, mp, n);
+         rn = n;
+         MPN_NORMALIZE (rp, rn);
+
+         if (SIZ(b) < 0 && rn != 0)
+           {
+             mpn_sub (rp, mp, n, rp, rn);
+             rn = n;
+             MPN_NORMALIZE (rp, rn);
+           }
+       }
+      else
+       {
+         if (SIZ(b) < 0)
+           {
+             mpn_sub (rp, mp, n, bp, bn);
+             rn = n;
+             rn -= (rp[rn - 1] == 0);
+           }
+         else
+           {
+             MPN_COPY (rp, bp, bn);
+             rn = bn;
+           }
+       }
+      goto ret;
+    }
+
+  /* Remove low zero limbs from M.  This loop will terminate for correctly
+     represented mpz numbers.  */
+  ncnt = 0;
+  while (UNLIKELY (mp[0] == 0))
+    {
+      mp++;
+      ncnt++;
+    }
+  nodd = n - ncnt;
+  cnt = 0;
+  if (mp[0] % 2 == 0)
+    {
+      mp_ptr new = TMP_ALLOC_LIMBS (nodd);
+      count_trailing_zeros (cnt, mp[0]);
+      mpn_rshift (new, mp, nodd, cnt);
+      nodd -= new[nodd - 1] == 0;
+      mp = new;
+      ncnt++;
+    }
+
+  if (ncnt != 0)
+    {
+      /* We will call both mpn_powm and mpn_powlo.  */
+      /* rp needs n, mpn_powlo needs 4n, the 2 mpn_binvert might need more */
+      mp_size_t n_largest_binvert = MAX (ncnt, nodd);
+      mp_size_t itch_binvert = mpn_binvert_itch (n_largest_binvert);
+      itch = 3 * n + MAX (itch_binvert, 2 * n);
+    }
+  else
+    {
+      /* We will call just mpn_powm.  */
+      mp_size_t itch_binvert = mpn_binvert_itch (nodd);
+      itch = n + MAX (itch_binvert, 2 * n);
+    }
+  tp = TMP_ALLOC_LIMBS (itch);
+
+  rp = tp;  tp += n;
+
+  bp = PTR(b);
+  mpn_powm (rp, bp, bn, ep, en, mp, nodd, tp);
+
+  rn = n;
+
+  if (ncnt != 0)
+    {
+      mp_ptr r2, xp, yp, odd_inv_2exp;
+      unsigned long t;
+      int bcnt;
+
+      if (bn < ncnt)
+       {
+         mp_ptr new = TMP_ALLOC_LIMBS (ncnt);
+         MPN_COPY (new, bp, bn);
+         MPN_ZERO (new + bn, ncnt - bn);
+         bp = new;
+       }
+
+      r2 = tp;
+
+      if (bp[0] % 2 == 0)
+       {
+         if (en > 1)
+           {
+             MPN_ZERO (r2, ncnt);
+             goto zero;
+           }
+
+         ASSERT (en == 1);
+         t = (ncnt - (cnt != 0)) * GMP_NUMB_BITS + cnt;
+
+         /* Count number of low zero bits in B, up to 3.  */
+         bcnt = (0x1213 >> ((bp[0] & 7) << 1)) & 0x3;
+         /* Note that ep[0] * bcnt might overflow, but that just results
+            in a missed optimization.  */
+         if (ep[0] * bcnt >= t)
+           {
+             MPN_ZERO (r2, ncnt);
+             goto zero;
+           }
+       }
+
+      mpn_powlo (r2, bp, ep, en, ncnt, tp + ncnt);
+
+    zero:
+      if (nodd < ncnt)
+       {
+         mp_ptr new = TMP_ALLOC_LIMBS (ncnt);
+         MPN_COPY (new, mp, nodd);
+         MPN_ZERO (new + nodd, ncnt - nodd);
+         mp = new;
+       }
+
+      odd_inv_2exp = tp + n;
+      mpn_binvert (odd_inv_2exp, mp, ncnt, tp + 2 * n);
+
+      mpn_sub (r2, r2, ncnt, rp, nodd > ncnt ? ncnt : nodd);
+
+      xp = tp + 2 * n;
+      mpn_mullo_n (xp, odd_inv_2exp, r2, ncnt);
+
+      if (cnt != 0)
+       xp[ncnt - 1] &= (CNST_LIMB(1) << cnt) - 1;
+
+      yp = tp;
+      if (ncnt > nodd)
+       mpn_mul (yp, xp, ncnt, mp, nodd);
+      else
+       mpn_mul (yp, mp, nodd, xp, ncnt);
+
+      mpn_add (rp, yp, n, rp, nodd);
+
+      ASSERT (nodd + ncnt >= n);
+      ASSERT (nodd + ncnt <= n + 1);
+    }
+
+  MPN_NORMALIZE (rp, rn);
+
+  if ((ep[0] & 1) && SIZ(b) < 0 && rn != 0)
+    {
+      mpn_sub (rp, PTR(m), n, rp, rn);
+      rn = n;
+      MPN_NORMALIZE (rp, rn);
+    }
+
+ ret:
+  MPZ_REALLOC (r, rn);
+  SIZ(r) = rn;
+  MPN_COPY (PTR(r), rp, rn);
+
+  TMP_FREE;
+}
diff --git a/mpz/powm_sec.c b/mpz/powm_sec.c

new file mode 100644 (file)

index 0000000..2432fe4
--- /dev/null
+++ b/mpz/powm_sec.c
@@ -0,0 +1,90 @@
+/* mpz_powm_sec(res,base,exp,mod) -- Set R to (U^E) mod M.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpz_powm_sec (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
+{
+  mp_size_t n;
+  mp_ptr rp, tp;
+  mp_srcptr bp, ep, mp;
+  mp_size_t rn, bn, es, en;
+  TMP_DECL;
+
+  n = ABSIZ(m);
+  if (n == 0)
+    DIVIDE_BY_ZERO;
+
+  mp = PTR(m);
+
+  if (mp[0] % 2 == 0)
+    DIVIDE_BY_ZERO;
+
+  es = SIZ(e);
+  if (UNLIKELY (es <= 0))
+    {
+      mpz_t new_b;
+      if (es == 0)
+       {
+         /* b^0 mod m,  b is anything and m is non-zero.
+            Result is 1 mod m, i.e., 1 or 0 depending on if m = 1.  */
+         SIZ(r) = n != 1 || mp[0] != 1;
+         PTR(r)[0] = 1;
+         return;
+       }
+      DIVIDE_BY_ZERO;
+    }
+
+  en = es;
+  bn = ABSIZ(b);
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n + mpn_powm_sec_itch (bn, en, n));
+
+  rp = tp;  tp += n;
+
+  bp = PTR(b);
+  ep = PTR(e);
+
+  mpn_powm_sec (rp, bp, bn, ep, en, mp, n, tp);
+
+  rn = n;
+
+  MPN_NORMALIZE (rp, rn);
+
+  if ((ep[0] & 1) && SIZ(b) < 0 && rn != 0)
+    {
+      mpn_sub (rp, PTR(m), n, rp, rn);
+      rn = n;
+      MPN_NORMALIZE (rp, rn);
+    }
+
+  MPZ_REALLOC (r, rn);
+  SIZ(r) = rn;
+  MPN_COPY (PTR(r), rp, rn);
+
+  TMP_FREE;
+}
diff --git a/mpz/powm_ui.c b/mpz/powm_ui.c

new file mode 100644 (file)

index 0000000..64615d1
--- /dev/null
+++ b/mpz/powm_ui.c
@@ -0,0 +1,195 @@
+/* mpz_powm_ui(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
+   t is defined by (tp,mn).  */
+static void
+reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
+{
+  mp_ptr qp;
+  TMP_DECL;
+
+  TMP_MARK;
+  qp = TMP_ALLOC_LIMBS (an - mn + 1);
+
+  mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
+
+  TMP_FREE;
+}
+
+void
+mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
+{
+  mp_ptr xp, tp, qp, mp, bp;
+  mp_size_t xn, tn, mn, bn;
+  int m_zero_cnt;
+  int c;
+  mp_limb_t e;
+  TMP_DECL;
+
+  mp = PTR(m);
+  mn = ABSIZ(m);
+  if (mn == 0)
+    DIVIDE_BY_ZERO;
+
+  if (el == 0)
+    {
+      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+        depending on if MOD equals 1.  */
+      SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
+      PTR(r)[0] = 1;
+      return;
+    }
+
+  TMP_MARK;
+
+  /* Normalize m (i.e. make its most significant bit set) as required by
+     division functions below.  */
+  count_leading_zeros (m_zero_cnt, mp[mn - 1]);
+  m_zero_cnt -= GMP_NAIL_BITS;
+  if (m_zero_cnt != 0)
+    {
+      mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
+      mpn_lshift (new_mp, mp, mn, m_zero_cnt);
+      mp = new_mp;
+    }
+
+  bn = ABSIZ(b);
+  bp = PTR(b);
+  if (bn > mn)
+    {
+      /* Reduce possibly huge base.  Use a function call to reduce, since we
+        don't want the quotient allocation to live until function return.  */
+      mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
+      reduce (new_bp, bp, bn, mp, mn);
+      bp = new_bp;
+      bn = mn;
+      /* Canonicalize the base, since we are potentially going to multiply with
+        it quite a few times.  */
+      MPN_NORMALIZE (bp, bn);
+    }
+
+  if (bn == 0)
+    {
+      SIZ(r) = 0;
+      TMP_FREE;
+      return;
+    }
+
+  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
+  xp = TMP_ALLOC_LIMBS (mn);
+
+  qp = TMP_ALLOC_LIMBS (mn + 1);
+
+  MPN_COPY (xp, bp, bn);
+  xn = bn;
+
+  e = el;
+  count_leading_zeros (c, e);
+  e = (e << c) << 1;           /* shift the exp bits to the left, lose msb */
+  c = GMP_LIMB_BITS - 1 - c;
+
+  /* Main loop. */
+
+  /* If m is already normalized (high bit of high limb set), and b is the
+     same size, but a bigger value, and e==1, then there's no modular
+     reductions done and we can end up with a result out of range at the
+     end. */
+  if (c == 0)
+    {
+      if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
+        mpn_sub_n (xp, xp, mp, mn);
+      goto finishup;
+    }
+
+  while (c != 0)
+    {
+      mpn_sqr (tp, xp, xn);
+      tn = 2 * xn; tn -= tp[tn - 1] == 0;
+      if (tn < mn)
+       {
+         MPN_COPY (xp, tp, tn);
+         xn = tn;
+       }
+      else
+       {
+         mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
+         xn = mn;
+       }
+
+      if ((mp_limb_signed_t) e < 0)
+       {
+         mpn_mul (tp, xp, xn, bp, bn);
+         tn = xn + bn; tn -= tp[tn - 1] == 0;
+         if (tn < mn)
+           {
+             MPN_COPY (xp, tp, tn);
+             xn = tn;
+           }
+         else
+           {
+             mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
+             xn = mn;
+           }
+       }
+      e <<= 1;
+      c--;
+    }
+
+ finishup:
+  /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing
+     it with the original MOD.  */
+  if (m_zero_cnt != 0)
+    {
+      mp_limb_t cy;
+      cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
+      tp[xn] = cy; xn += cy != 0;
+
+      if (xn < mn)
+       {
+         MPN_COPY (xp, tp, xn);
+       }
+      else
+       {
+         mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn);
+         xn = mn;
+       }
+      mpn_rshift (xp, xp, xn, m_zero_cnt);
+    }
+  MPN_NORMALIZE (xp, xn);
+
+  if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
+    {
+      mp = PTR(m);                     /* want original, unnormalized m */
+      mpn_sub (xp, mp, mn, xp, xn);
+      xn = mn;
+      MPN_NORMALIZE (xp, xn);
+    }
+  MPZ_REALLOC (r, xn);
+  SIZ (r) = xn;
+  MPN_COPY (PTR(r), xp, xn);
+
+  TMP_FREE;
+}
diff --git a/mpz/pprime_p.c b/mpz/pprime_p.c

new file mode 100644 (file)

index 0000000..ce501a4
--- /dev/null
+++ b/mpz/pprime_p.c
@@ -0,0 +1,154 @@
+/* mpz_probab_prime_p --
+   An implementation of the probabilistic primality test found in Knuth's
+   Seminumerical Algorithms book.  If the function mpz_probab_prime_p()
+   returns 0 then n is not prime.  If it returns 1, then n is 'probably'
+   prime.  If it returns 2, n is surely prime.  The probability of a false
+   positive is (1/4)**reps, where reps is the number of internal passes of the
+   probabilistic algorithm.  Knuth indicates that 25 passes are reasonable.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free
+Software Foundation, Inc.  Miller-Rabin code contributed by John Amanatides.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int isprime __GMP_PROTO ((unsigned long int));
+
+
+/* MPN_MOD_OR_MODEXACT_1_ODD can be used instead of mpn_mod_1 for the trial
+   division.  It gives a result which is not the actual remainder r but a
+   value congruent to r*2^n mod d.  Since all the primes being tested are
+   odd, r*2^n mod p will be 0 if and only if r mod p is 0.  */
+
+int
+mpz_probab_prime_p (mpz_srcptr n, int reps)
+{
+  mp_limb_t r;
+  mpz_t n2;
+
+  /* Handle small and negative n.  */
+  if (mpz_cmp_ui (n, 1000000L) <= 0)
+    {
+      int is_prime;
+      if (mpz_cmpabs_ui (n, 1000000L) <= 0)
+       {
+         is_prime = isprime (mpz_get_ui (n));
+         return is_prime ? 2 : 0;
+       }
+      /* Negative number.  Negate and fall out.  */
+      PTR(n2) = PTR(n);
+      SIZ(n2) = -SIZ(n);
+      n = n2;
+    }
+
+  /* If n is now even, it is not a prime.  */
+  if ((mpz_get_ui (n) & 1) == 0)
+    return 0;
+
+#if defined (PP)
+  /* Check if n has small factors.  */
+#if defined (PP_INVERTED)
+  r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP,
+                               (mp_limb_t) PP_INVERTED);
+#else
+  r = mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP);
+#endif
+  if (r % 3 == 0
+#if GMP_LIMB_BITS >= 4
+      || r % 5 == 0
+#endif
+#if GMP_LIMB_BITS >= 8
+      || r % 7 == 0
+#endif
+#if GMP_LIMB_BITS >= 16
+      || r % 11 == 0 || r % 13 == 0
+#endif
+#if GMP_LIMB_BITS >= 32
+      || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0
+#endif
+#if GMP_LIMB_BITS >= 64
+      || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0
+      || r % 47 == 0 || r % 53 == 0
+#endif
+      )
+    {
+      return 0;
+    }
+#endif /* PP */
+
+  /* Do more dividing.  We collect small primes, using umul_ppmm, until we
+     overflow a single limb.  We divide our number by the small primes product,
+     and look for factors in the remainder.  */
+  {
+    unsigned long int ln2;
+    unsigned long int q;
+    mp_limb_t p1, p0, p;
+    unsigned int primes[15];
+    int nprimes;
+
+    nprimes = 0;
+    p = 1;
+    ln2 = mpz_sizeinbase (n, 2);       /* FIXME: tune this limit */
+    for (q = PP_FIRST_OMITTED; q < ln2; q += 2)
+      {
+       if (isprime (q))
+         {
+           umul_ppmm (p1, p0, p, q);
+           if (p1 != 0)
+             {
+               r = MPN_MOD_OR_MODEXACT_1_ODD (PTR(n), (mp_size_t) SIZ(n), p);
+               while (--nprimes >= 0)
+                 if (r % primes[nprimes] == 0)
+                   {
+                     ASSERT_ALWAYS (mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) primes[nprimes]) == 0);
+                     return 0;
+                   }
+               p = q;
+               nprimes = 0;
+             }
+           else
+             {
+               p = p0;
+             }
+           primes[nprimes++] = q;
+         }
+      }
+  }
+
+  /* Perform a number of Miller-Rabin tests.  */
+  return mpz_millerrabin (n, reps);
+}
+
+static int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  if (t < 3 || (t & 1) == 0)
+    return t == 2;
+
+  for (d = 3, r = 1; r != 0; d += 2)
+    {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+       return 1;
+    }
+  return 0;
+}
diff --git a/mpz/random.c b/mpz/random.c

new file mode 100644 (file)

index 0000000..8bf2a36
--- /dev/null
+++ b/mpz/random.c
@@ -0,0 +1,29 @@
+/* mpz_random -- Generate a random mpz_t of specified size in limbs.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_random (mpz_ptr x, mp_size_t size)
+{
+  mpz_urandomb (x, RANDS, (unsigned long) (ABS (size) * GMP_NUMB_BITS));
+  if (size < 0)
+    SIZ(x) = -SIZ(x);
+}
diff --git a/mpz/random2.c b/mpz/random2.c

new file mode 100644 (file)

index 0000000..f3b8565
--- /dev/null
+++ b/mpz/random2.c
@@ -0,0 +1,40 @@
+/* mpz_random2 -- Generate a positive random mpz_t of specified size, with
+   long runs of consecutive ones and zeros in the binary representation.
+   Meant for testing of other MP routines.
+
+Copyright 1991, 1993, 1994, 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_random2 (mpz_ptr x, mp_size_t size)
+{
+  mp_size_t abs_size;
+
+  abs_size = ABS (size);
+  if (abs_size != 0)
+    {
+      if (x->_mp_alloc < abs_size)
+       _mpz_realloc (x, abs_size);
+
+      mpn_random2 (x->_mp_d, abs_size);
+    }
+
+  x->_mp_size = size;
+}
diff --git a/mpz/realloc.c b/mpz/realloc.c

new file mode 100644 (file)

index 0000000..0a6d163
--- /dev/null
+++ b/mpz/realloc.c
@@ -0,0 +1,61 @@
+/* _mpz_realloc -- make the mpz_t have NEW_ALLOC digits allocated.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2008 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void *
+_mpz_realloc (mpz_ptr m, mp_size_t new_alloc)
+{
+  mp_ptr mp;
+
+  /* Never allocate zero space. */
+  new_alloc = MAX (new_alloc, 1);
+
+  if (sizeof (mp_size_t) == sizeof (int))
+    {
+      if (UNLIKELY (new_alloc > ULONG_MAX / GMP_NUMB_BITS))
+       {
+         fprintf (stderr, "gmp: overflow in mpz type\n");
+         abort ();
+       }
+    }
+  else
+    {
+      if (UNLIKELY (new_alloc > INT_MAX))
+       {
+         fprintf (stderr, "gmp: overflow in mpz type\n");
+         abort ();
+       }
+    }
+
+  mp = __GMP_REALLOCATE_FUNC_LIMBS (PTR(m), ALLOC(m), new_alloc);
+  PTR(m) = mp;
+  ALLOC(m) = new_alloc;
+
+  /* Don't create an invalid number; if the current value doesn't fit after
+     reallocation, clear it to 0.  */
+  if (ABSIZ(m) > new_alloc)
+    SIZ(m) = 0;
+
+  return (void *) mp;
+}
diff --git a/mpz/realloc2.c b/mpz/realloc2.c

new file mode 100644 (file)

index 0000000..84295f6
--- /dev/null
+++ b/mpz/realloc2.c
@@ -0,0 +1,49 @@
+/* mpz_realloc2 -- change allocated data size.
+
+Copyright 2001, 2002, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_realloc2 (mpz_ptr m, mp_bitcnt_t bits)
+{
+  mp_size_t new_alloc;
+
+  bits -= (bits != 0);         /* Round down, except if 0 */
+  new_alloc = 1 + bits / GMP_NUMB_BITS;
+
+  if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
+    {
+      if (UNLIKELY (new_alloc > INT_MAX))
+       {
+         fprintf (stderr, "gmp: overflow in mpz type\n");
+         abort ();
+       }
+    }
+
+  PTR(m) = __GMP_REALLOCATE_FUNC_LIMBS (PTR(m), ALLOC(m), new_alloc);
+  ALLOC(m) = new_alloc;
+
+  /* Don't create an invalid number; if the current value doesn't fit after
+     reallocation, clear it to 0.  */
+  if (ABSIZ(m) > new_alloc)
+    SIZ(m) = 0;
+}
diff --git a/mpz/remove.c b/mpz/remove.c

new file mode 100644 (file)

index 0000000..21c0023
--- /dev/null
+++ b/mpz/remove.c
@@ -0,0 +1,92 @@
+/* mpz_remove -- divide out a factor and return its multiplicity.
+
+Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_bitcnt_t
+mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
+{
+  mpz_t fpow[GMP_LIMB_BITS];           /* Really MP_SIZE_T_BITS */
+  mpz_t x, rem;
+  mp_bitcnt_t pwr;
+  int p;
+
+  if (mpz_cmp_ui (f, 1) <= 0)
+    DIVIDE_BY_ZERO;
+
+  if (SIZ (src) == 0)
+    {
+      if (src != dest)
+        mpz_set (dest, src);
+      return 0;
+    }
+
+  if (mpz_cmp_ui (f, 2) == 0)
+    {
+      mp_bitcnt_t s0;
+      s0 = mpz_scan1 (src, 0);
+      mpz_div_2exp (dest, src, s0);
+      return s0;
+    }
+
+  /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an
+     upper bound of the result we're seeking.  We could also shift down the
+     operands so that they become odd, to make intermediate values smaller.  */
+
+  mpz_init (rem);
+  mpz_init (x);
+
+  pwr = 0;
+  mpz_init (fpow[0]);
+  mpz_set (fpow[0], f);
+  mpz_set (dest, src);
+
+  /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k).  */
+  for (p = 0;; p++)
+    {
+      mpz_tdiv_qr (x, rem, dest, fpow[p]);
+      if (SIZ (rem) != 0)
+       break;
+      mpz_init (fpow[p + 1]);
+      mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
+      mpz_set (dest, x);
+    }
+
+  pwr = (1L << p) - 1;
+
+  mpz_clear (fpow[p]);
+
+  /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
+     zero remainder.  */
+  while (--p >= 0)
+    {
+      mpz_tdiv_qr (x, rem, dest, fpow[p]);
+      if (SIZ (rem) == 0)
+       {
+         pwr += 1L << p;
+         mpz_set (dest, x);
+       }
+      mpz_clear (fpow[p]);
+    }
+
+  mpz_clear (x);
+  mpz_clear (rem);
+  return pwr;
+}
diff --git a/mpz/root.c b/mpz/root.c

new file mode 100644 (file)

index 0000000..ece0a99
--- /dev/null
+++ b/mpz/root.c
@@ -0,0 +1,82 @@
+/* mpz_root(root, u, nth) --  Set ROOT to floor(U^(1/nth)).
+   Return an indication if the result is exact.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+mpz_root (mpz_ptr root, mpz_srcptr u, unsigned long int nth)
+{
+  mp_ptr rootp, up;
+  mp_size_t us, un, rootn, remn;
+  TMP_DECL;
+
+  us = SIZ(u);
+
+  /* even roots of negatives provoke an exception */
+  if (us < 0 && (nth & 1) == 0)
+    SQRT_OF_NEGATIVE;
+
+  /* root extraction interpreted as c^(1/nth) means a zeroth root should
+     provoke a divide by zero, do this even if c==0 */
+  if (nth == 0)
+    DIVIDE_BY_ZERO;
+
+  if (us == 0)
+    {
+      if (root != NULL)
+       SIZ(root) = 0;
+      return 1;                        /* exact result */
+    }
+
+  un = ABS (us);
+  rootn = (un - 1) / nth + 1;
+
+  TMP_MARK;
+
+  /* FIXME: Perhaps disallow root == NULL */
+  if (root != NULL && u != root)
+    rootp = MPZ_REALLOC (root, rootn);
+  else
+    rootp = TMP_ALLOC_LIMBS (rootn);
+
+  up = PTR(u);
+
+  if (nth == 1)
+    {
+      MPN_COPY (rootp, up, un);
+      remn = 0;
+    }
+  else
+    {
+      remn = mpn_rootrem (rootp, NULL, up, un, (mp_limb_t) nth);
+    }
+
+  if (root != NULL)
+    {
+      SIZ(root) = us >= 0 ? rootn : -rootn;
+      if (u == root)
+       MPN_COPY (up, rootp, rootn);
+    }
+
+  TMP_FREE;
+  return remn == 0;
+}
diff --git a/mpz/rootrem.c b/mpz/rootrem.c

new file mode 100644 (file)

index 0000000..69988d6
--- /dev/null
+++ b/mpz/rootrem.c
@@ -0,0 +1,90 @@
+/* mpz_rootrem(root, rem, u, nth) --  Set ROOT to floor(U^(1/nth)) and
+   set REM to the remainder.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>             /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
+{
+  mp_ptr rootp, up, remp;
+  mp_size_t us, un, rootn, remn;
+  TMP_DECL;
+
+  us = SIZ(u);
+
+  /* even roots of negatives provoke an exception */
+  if (us < 0 && (nth & 1) == 0)
+    SQRT_OF_NEGATIVE;
+
+  /* root extraction interpreted as c^(1/nth) means a zeroth root should
+     provoke a divide by zero, do this even if c==0 */
+  if (nth == 0)
+    DIVIDE_BY_ZERO;
+
+  if (us == 0)
+    {
+      if (root != NULL)
+       SIZ(root) = 0;
+      SIZ(rem) = 0;
+      return;
+    }
+
+  un = ABS (us);
+  rootn = (un - 1) / nth + 1;
+
+  TMP_MARK;
+
+  /* FIXME: Perhaps disallow root == NULL */
+  if (root != NULL && u != root)
+    rootp = MPZ_REALLOC (root, rootn);
+  else
+    rootp = TMP_ALLOC_LIMBS (rootn);
+
+  if (u != rem)
+    remp = MPZ_REALLOC (rem, un);
+  else
+    remp = TMP_ALLOC_LIMBS (un);
+
+  up = PTR(u);
+
+  if (nth == 1)
+    {
+      MPN_COPY (rootp, up, un);
+      remn = 0;
+    }
+  else
+    {
+      remn = mpn_rootrem (rootp, remp, up, un, (mp_limb_t) nth);
+    }
+
+  if (root != NULL)
+    {
+      SIZ(root) = us >= 0 ? rootn : -rootn;
+      if (u == root)
+       MPN_COPY (up, rootp, rootn);
+      else if (u == rem)
+       MPN_COPY (up, remp, remn);
+    }
+
+  SIZ(rem) = remn;
+  TMP_FREE;
+}
diff --git a/mpz/rrandomb.c b/mpz/rrandomb.c

new file mode 100644 (file)

index 0000000..ee8aa35
--- /dev/null
+++ b/mpz/rrandomb.c
@@ -0,0 +1,91 @@
+/* mpz_rrandomb -- Generate a positive random mpz_t of specified bit size, with
+   long runs of consecutive ones and zeros in the binary representation.
+   Meant for testing of other MP routines.
+
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_bitcnt_t));
+
+void
+mpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_size_t nl;
+
+  nl = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
+  if (nbits != 0)
+    {
+      MPZ_REALLOC (x, nl);
+      gmp_rrandomb (PTR(x), rstate, nbits);
+    }
+
+  SIZ(x) = nl;
+}
+
+/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
+   Thus, we get the same random number sequence in the common cases.
+   FIXME: We should always generate the same random number sequence!  */
+#if GMP_NUMB_BITS < 32
+#define BITS_PER_RANDCALL GMP_NUMB_BITS
+#else
+#define BITS_PER_RANDCALL 32
+#endif
+
+static void
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_bitcnt_t bi;
+  mp_limb_t ranm;              /* buffer for random bits */
+  unsigned cap_chunksize, chunksize;
+  mp_size_t i;
+
+  /* Set entire result to 111..1  */
+  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;
+  rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
+  for (i = i - 1; i >= 0; i--)
+    rp[i] = GMP_NUMB_MAX;
+
+  _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+  cap_chunksize = nbits / (ranm % 4 + 1);
+  cap_chunksize += cap_chunksize == 0; /* make it at least 1 */
+
+  bi = nbits;
+
+  for (;;)
+    {
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      if (bi == 0)
+       break;                  /* low chunk is ...1 */
+
+      rp[bi / GMP_NUMB_BITS] ^= CNST_LIMB (1) << bi % GMP_NUMB_BITS;
+
+      _gmp_rand (&ranm, rstate, BITS_PER_RANDCALL);
+      chunksize = 1 + ranm % cap_chunksize;
+      bi = (bi < chunksize) ? 0 : bi - chunksize;
+
+      mpn_incr_u (rp + bi / GMP_NUMB_BITS, CNST_LIMB (1) << bi % GMP_NUMB_BITS);
+
+      if (bi == 0)
+       break;                  /* low chunk is ...0 */
+    }
+}
diff --git a/mpz/scan0.c b/mpz/scan0.c

new file mode 100644 (file)

index 0000000..ac081a6
--- /dev/null
+++ b/mpz/scan0.c
@@ -0,0 +1,119 @@
+/* mpz_scan0 -- search for a 0 bit.
+
+Copyright 2000, 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* mpn_scan0 can't be used for the u>0 search since there might not be a 0
+   bit before the end of the data.  mpn_scan1 could be used for the inverted
+   search under u<0, but usually the search won't go very far so it seems
+   reasonable to inline that code.  */
+
+mp_bitcnt_t
+mpz_scan0 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
+{
+  mp_srcptr      u_ptr = PTR(u);
+  mp_size_t      size = SIZ(u);
+  mp_size_t      abs_size = ABS(size);
+  mp_srcptr      u_end = u_ptr + abs_size;
+  mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;
+  mp_srcptr      p = u_ptr + starting_limb;
+  mp_limb_t      limb;
+  int            cnt;
+
+  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
+     u<0.  Notice this test picks up all cases of u==0 too. */
+  if (starting_limb >= abs_size)
+    return (size >= 0 ? starting_bit : ~(mp_bitcnt_t) 0);
+
+  limb = *p;
+
+  if (size >= 0)
+    {
+      /* Mask to 1 all bits before starting_bit, thus ignoring them. */
+      limb |= (CNST_LIMB(1) << (starting_bit % GMP_NUMB_BITS)) - 1;
+
+      /* Search for a limb which isn't all ones.  If the end is reached then
+        the zero bit immediately past the end is returned.  */
+      while (limb == GMP_NUMB_MAX)
+       {
+         p++;
+         if (p == u_end)
+           return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;
+         limb = *p;
+       }
+
+      /* Now seek low 1 bit. */
+      limb = ~limb;
+    }
+  else
+    {
+      mp_srcptr  q;
+
+      /* If there's a non-zero limb before ours then we're in the ones
+        complement region.  Search from *(p-1) downwards since that might
+        give better cache locality, and since a non-zero in the middle of a
+        number is perhaps a touch more likely than at the end.  */
+      q = p;
+      while (q != u_ptr)
+       {
+         q--;
+         if (*q != 0)
+           goto inverted;
+       }
+
+      /* Adjust so ~limb implied by searching for 1 bit below becomes -limb.
+        If limb==0 here then this isn't the beginning of twos complement
+        inversion, but that doesn't matter because limb==0 is a zero bit
+        immediately (-1 is all ones for below).  */
+      limb--;
+
+    inverted:
+      /* Now seeking a 1 bit. */
+
+      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+      limb &= (MP_LIMB_T_MAX << (starting_bit % GMP_NUMB_BITS));
+
+      if (limb == 0)
+       {
+         /* If the high limb is zero after masking, then no 1 bits past
+            starting_bit.  */
+         p++;
+         if (p == u_end)
+           return ~(mp_bitcnt_t) 0;
+
+         /* Search further for a non-zero limb.  The high limb is non-zero,
+            if nothing else.  */
+         for (;;)
+           {
+             limb = *p;
+             if (limb != 0)
+               break;
+             p++;
+             ASSERT (p < u_end);
+           }
+       }
+    }
+
+  ASSERT (limb != 0);
+  count_trailing_zeros (cnt, limb);
+  return (mp_bitcnt_t) (p - u_ptr) * GMP_NUMB_BITS + cnt;
+}
diff --git a/mpz/scan1.c b/mpz/scan1.c

new file mode 100644 (file)

index 0000000..e7e3c7f
--- /dev/null
+++ b/mpz/scan1.c
@@ -0,0 +1,137 @@
+/* mpz_scan1 -- search for a 1 bit.
+
+Copyright 2000, 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* mpn_scan0 can't be used for the inverted u<0 search since there might not
+   be a 0 bit before the end of the data.  mpn_scan1 could be used under u>0
+   (except when in the high limb), but usually the search won't go very far
+   so it seems reasonable to inline that code.  */
+
+mp_bitcnt_t
+mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
+{
+  mp_srcptr      u_ptr = PTR(u);
+  mp_size_t      size = SIZ(u);
+  mp_size_t      abs_size = ABS(size);
+  mp_srcptr      u_end = u_ptr + abs_size;
+  mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;
+  mp_srcptr      p = u_ptr + starting_limb;
+  mp_limb_t      limb;
+  int            cnt;
+
+  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit for u<0.
+     Notice this test picks up any u==0 too. */
+  if (starting_limb >= abs_size)
+    return (size >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+
+  limb = *p;
+
+  if (size >= 0)
+    {
+      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+      limb &= (MP_LIMB_T_MAX << (starting_bit % GMP_NUMB_BITS));
+
+      if (limb == 0)
+       {
+         /* If it's the high limb which is zero after masking, then there's
+            no 1 bits after starting_bit.  */
+         p++;
+         if (p == u_end)
+           return ~(mp_bitcnt_t) 0;
+
+         /* Otherwise search further for a non-zero limb.  The high limb is
+            non-zero, if nothing else.  */
+         for (;;)
+           {
+             limb = *p;
+             if (limb != 0)
+               break;
+             p++;
+             ASSERT (p < u_end);
+           }
+       }
+    }
+  else
+    {
+      mp_srcptr  q;
+
+      /* If there's a non-zero limb before ours then we're in the ones
+        complement region.  Search from *(p-1) downwards since that might
+        give better cache locality, and since a non-zero in the middle of a
+        number is perhaps a touch more likely than at the end.  */
+      q = p;
+      while (q != u_ptr)
+       {
+         q--;
+         if (*q != 0)
+           goto inverted;
+       }
+
+      if (limb == 0)
+       {
+         /* Skip zero limbs, to find the start of twos complement.  The
+            high limb is non-zero, if nothing else.  This search is
+            necessary so the -limb is applied at the right spot. */
+         do
+           {
+             p++;
+             ASSERT (p < u_end);
+             limb = *p;
+           }
+         while (limb == 0);
+
+         /* Apply twos complement, and look for a 1 bit in that.  Since
+            limb!=0 here, also have (-limb)!=0 so there's certainly a 1
+            bit.  */
+         limb = -limb;
+         goto got_limb;
+       }
+
+      /* Adjust so ~limb implied by searching for 0 bit becomes -limb.  */
+      limb--;
+
+    inverted:
+      /* Now seeking a 0 bit. */
+
+      /* Mask to 1 all bits before starting_bit, thus ignoring them. */
+      limb |= (CNST_LIMB(1) << (starting_bit % GMP_NUMB_BITS)) - 1;
+
+      /* Search for a limb which is not all ones.  If the end is reached
+        then the zero immediately past the end is the result.  */
+      while (limb == GMP_NUMB_MAX)
+       {
+         p++;
+         if (p == u_end)
+           return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;
+         limb = *p;
+       }
+
+      /* Now seeking low 1 bit. */
+      limb = ~limb;
+    }
+
+ got_limb:
+  ASSERT (limb != 0);
+  count_trailing_zeros (cnt, limb);
+  return (mp_bitcnt_t) (p - u_ptr) * GMP_NUMB_BITS + cnt;
+}
diff --git a/mpz/set.c b/mpz/set.c

new file mode 100644 (file)

index 0000000..d7366c8
--- /dev/null
+++ b/mpz/set.c
@@ -0,0 +1,53 @@
+/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.
+
+Copyright 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifdef BERKELEY_MP
+#include "mp.h"
+#define FUNCTION   move
+#define ARGUMENTS  mpz_srcptr u, mpz_ptr w
+
+#else
+#define FUNCTION   mpz_set
+#define ARGUMENTS  mpz_ptr w, mpz_srcptr u
+
+#endif
+
+
+void
+FUNCTION (ARGUMENTS)
+{
+  mp_ptr wp, up;
+  mp_size_t usize, size;
+
+  usize = u->_mp_size;
+  size = ABS (usize);
+
+  if (w->_mp_alloc < size)
+    _mpz_realloc (w, size);
+
+  wp = w->_mp_d;
+  up = u->_mp_d;
+
+  MPN_COPY (wp, up, size);
+  w->_mp_size = usize;
+}
diff --git a/mpz/set_d.c b/mpz/set_d.c

new file mode 100644 (file)

index 0000000..2e7fce1
--- /dev/null
+++ b/mpz/set_d.c
@@ -0,0 +1,107 @@
+/* mpz_set_d(integer, val) -- Assign INTEGER with a double value VAL.
+
+Copyright 1995, 1996, 2000, 2001, 2002, 2003, 2006 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_FLOAT_H
+#include <float.h>  /* for DBL_MAX */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* We used to have a special case for d < MP_BASE_AS_DOUBLE, just casting
+   double -> limb.  Unfortunately gcc 3.3 on powerpc970-apple-darwin6.8.5
+   got this wrong.  (It assumed __fixunsdfdi returned its result in a single
+   64-bit register, where instead that function followed the calling
+   conventions and gave the result in two parts r3 and r4.)  Hence the use
+   of __gmp_extract_double in all cases.  */
+
+void
+mpz_set_d (mpz_ptr r, double d)
+{
+  int negative;
+  mp_limb_t tp[LIMBS_PER_DOUBLE];
+  mp_ptr rp;
+  mp_size_t rn;
+
+  DOUBLE_NAN_INF_ACTION (d,
+                         __gmp_invalid_operation (),
+                         __gmp_invalid_operation ());
+
+  negative = d < 0;
+  d = ABS (d);
+
+  rn = __gmp_extract_double (tp, d);
+
+  if (ALLOC(r) < rn)
+    _mpz_realloc (r, rn);
+
+  if (rn <= 0)
+    rn = 0;
+
+  rp = PTR (r);
+
+  switch (rn)
+    {
+    default:
+      MPN_ZERO (rp, rn - LIMBS_PER_DOUBLE);
+      rp += rn - LIMBS_PER_DOUBLE;
+      /* fall through */
+#if LIMBS_PER_DOUBLE == 2
+    case 2:
+      rp[1] = tp[1], rp[0] = tp[0];
+      break;
+    case 1:
+      rp[0] = tp[1];
+      break;
+#endif
+#if LIMBS_PER_DOUBLE == 3
+    case 3:
+      rp[2] = tp[2], rp[1] = tp[1], rp[0] = tp[0];
+      break;
+    case 2:
+      rp[1] = tp[2], rp[0] = tp[1];
+      break;
+    case 1:
+      rp[0] = tp[2];
+      break;
+#endif
+#if LIMBS_PER_DOUBLE == 4
+    case 4:
+      rp[3] = tp[3], rp[2] = tp[2], rp[1] = tp[1], rp[0] = tp[0];
+      break;
+    case 3:
+      rp[2] = tp[3], rp[1] = tp[2], rp[0] = tp[1];
+      break;
+    case 2:
+      rp[1] = tp[3], rp[0] = tp[2];
+      break;
+    case 1:
+      rp[0] = tp[3];
+      break;
+#endif
+    case 0:
+      break;
+    }
+
+  SIZ(r) = negative ? -rn : rn;
+}
diff --git a/mpz/set_f.c b/mpz/set_f.c

new file mode 100644 (file)

index 0000000..b939b66
--- /dev/null
+++ b/mpz/set_f.c
@@ -0,0 +1,62 @@
+/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpz_set_f (mpz_ptr w, mpf_srcptr u)
+{
+  mp_ptr    wp, up;
+  mp_size_t size;
+  mp_exp_t  exp;
+
+  /* abs(u)<1 truncates to zero */
+  exp = EXP (u);
+  if (exp <= 0)
+    {
+      SIZ(w) = 0;
+      return;
+    }
+
+  MPZ_REALLOC (w, exp);
+  wp = PTR(w);
+  up = PTR(u);
+
+  size = SIZ (u);
+  SIZ(w) = (size >= 0 ? exp : -exp);
+  size = ABS (size);
+
+  if (exp > size)
+    {
+      /* pad with low zeros to get a total "exp" many limbs */
+      mp_size_t  zeros = exp - size;
+      MPN_ZERO (wp, zeros);
+      wp += zeros;
+    }
+  else
+    {
+      /* exp<=size, trucate to the high "exp" many limbs */
+      up += (size - exp);
+      size = exp;
+    }
+
+  MPN_COPY (wp, up, size);
+}
diff --git a/mpz/set_q.c b/mpz/set_q.c

new file mode 100644 (file)

index 0000000..13e59ae
--- /dev/null
+++ b/mpz/set_q.c
@@ -0,0 +1,24 @@
+/* mpz_set_q (dest_integer, src_rational) -- Assign DEST_INTEGER from
+   SRC_rational.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_set_q 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/set_si.c b/mpz/set_si.c

new file mode 100644 (file)

index 0000000..bffb2ee
--- /dev/null
+++ b/mpz/set_si.c
@@ -0,0 +1,45 @@
+/* mpz_set_si(dest,val) -- Assign DEST with a small value VAL.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_set_si (mpz_ptr dest, signed long int val)
+{
+  mp_size_t size;
+  mp_limb_t vl;
+
+  vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
+
+  dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+  size = vl != 0;
+
+#if GMP_NAIL_BITS != 0
+  if (vl > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      dest->_mp_d[1] = vl >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  dest->_mp_size = val >= 0 ? size : -size;
+}
diff --git a/mpz/set_str.c b/mpz/set_str.c

new file mode 100644 (file)

index 0000000..550c486
--- /dev/null
+++ b/mpz/set_str.c
@@ -0,0 +1,135 @@
+/* mpz_set_str(mp_dest, string, base) -- Convert the \0-terminated
+   string STRING in base BASE to multiple precision integer in
+   MP_DEST.  Allow white space in the string.  If BASE == 0 determine
+   the base in the C standard way, i.e.  0xhh...h means base 16,
+   0oo...o means base 8, otherwise assume base 10.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <string.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+extern const unsigned char __gmp_digit_value_tab[];
+#define digit_value_tab __gmp_digit_value_tab
+
+int
+mpz_set_str (mpz_ptr x, const char *str, int base)
+{
+  size_t str_size;
+  char *s, *begs;
+  size_t i;
+  mp_size_t xsize;
+  int c;
+  int negative;
+  const unsigned char *digit_value;
+  TMP_DECL;
+
+  digit_value = digit_value_tab;
+  if (base > 36)
+    {
+      /* For bases > 36, use the collating sequence
+        0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
+      digit_value += 224;
+      if (base > 62)
+       return -1;              /* too large base */
+    }
+
+  /* Skip whitespace.  */
+  do
+    c = (unsigned char) *str++;
+  while (isspace (c));
+
+  negative = 0;
+  if (c == '-')
+    {
+      negative = 1;
+      c = (unsigned char) *str++;
+    }
+
+  if (digit_value[c] >= (base == 0 ? 10 : base))
+    return -1;                 /* error if no valid digits */
+
+  /* If BASE is 0, try to find out the base by looking at the initial
+     characters.  */
+  if (base == 0)
+    {
+      base = 10;
+      if (c == '0')
+       {
+         base = 8;
+         c = (unsigned char) *str++;
+         if (c == 'x' || c == 'X')
+           {
+             base = 16;
+             c = (unsigned char) *str++;
+           }
+         else if (c == 'b' || c == 'B')
+           {
+             base = 2;
+             c = (unsigned char) *str++;
+           }
+       }
+    }
+
+  /* Skip leading zeros and white space.  */
+  while (c == '0' || isspace (c))
+    c = (unsigned char) *str++;
+  /* Make sure the string does not become empty, mpn_set_str would fail.  */
+  if (c == 0)
+    {
+      x->_mp_size = 0;
+      return 0;
+    }
+
+  TMP_MARK;
+  str_size = strlen (str - 1);
+  s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+  /* Remove spaces from the string and convert the result from ASCII to a
+     byte array.  */
+  for (i = 0; i < str_size; i++)
+    {
+      if (!isspace (c))
+       {
+         int dig = digit_value[c];
+         if (dig >= base)
+           {
+             TMP_FREE;
+             return -1;
+           }
+         *s++ = dig;
+       }
+      c = (unsigned char) *str++;
+    }
+
+  str_size = s - begs;
+
+  xsize = 2 + (mp_size_t)
+    (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+  MPZ_REALLOC (x, xsize);
+
+  /* Convert the byte array in base BASE to our bignum format.  */
+  xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base);
+  x->_mp_size = negative ? -xsize : xsize;
+
+  TMP_FREE;
+  return 0;
+}
diff --git a/mpz/set_ui.c b/mpz/set_ui.c

new file mode 100644 (file)

index 0000000..13afc6a
--- /dev/null
+++ b/mpz/set_ui.c
@@ -0,0 +1,42 @@
+/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.
+
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2004 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_set_ui (mpz_ptr dest, unsigned long int val)
+{
+  mp_size_t size;
+
+  dest->_mp_d[0] = val & GMP_NUMB_MASK;
+  size = val != 0;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (val > GMP_NUMB_MAX)
+    {
+      MPZ_REALLOC (dest, 2);
+      dest->_mp_d[1] = val >> GMP_NUMB_BITS;
+      size = 2;
+    }
+#endif
+
+  dest->_mp_size = size;
+}
diff --git a/mpz/setbit.c b/mpz/setbit.c

new file mode 100644 (file)

index 0000000..6d9b402
--- /dev/null
+++ b/mpz/setbit.c
@@ -0,0 +1,117 @@
+/* mpz_setbit -- set a specified bit.
+
+Copyright 1991, 1993, 1994, 1995, 1997, 1999, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dsize = d->_mp_size;
+  mp_ptr dp = d->_mp_d;
+  mp_size_t limb_index;
+
+  limb_index = bit_index / GMP_NUMB_BITS;
+  if (dsize >= 0)
+    {
+      if (limb_index < dsize)
+       {
+         dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
+         d->_mp_size = dsize;
+       }
+      else
+       {
+         /* Ugh.  The bit should be set outside of the end of the
+            number.  We have to increase the size of the number.  */
+         if (UNLIKELY (d->_mp_alloc < limb_index + 1))
+            dp = _mpz_realloc (d, limb_index + 1);
+         MPN_ZERO (dp + dsize, limb_index - dsize);
+         dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
+         d->_mp_size = limb_index + 1;
+       }
+    }
+  else
+    {
+      mp_size_t zero_bound;
+
+      /* Simulate two's complement arithmetic, i.e. simulate
+        1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+        2. Set the bit.
+        3. Set OP = ~OP + 1.  */
+
+      dsize = -dsize;
+
+      /* No upper bound on this loop, we're sure there's a non-zero limb
+        sooner ot later.  */
+      for (zero_bound = 0; ; zero_bound++)
+       if (dp[zero_bound] != 0)
+         break;
+
+      if (limb_index > zero_bound)
+       {
+         if (limb_index < dsize)
+            {
+              mp_limb_t  dlimb;
+              dlimb = dp[limb_index];
+              dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+              dp[limb_index] = dlimb;
+
+              if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))
+                {
+                  /* high limb became zero, must normalize */
+                  do {
+                    dsize--;
+                  } while (dsize > 0 && dp[dsize-1] == 0);
+                  d->_mp_size = -dsize;
+                }
+            }
+       }
+      else if (limb_index == zero_bound)
+       {
+         dp[limb_index] = ((dp[limb_index] - 1)
+                           & ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1;
+         if (dp[limb_index] == 0)
+           {
+             mp_size_t i;
+             for (i = limb_index + 1; i < dsize; i++)
+               {
+                 dp[i] += 1;
+                 if (dp[i] != 0)
+                   goto fin;
+               }
+             /* We got carry all way out beyond the end of D.  Increase
+                its size (and allocation if necessary).  */
+             dsize++;
+             if (UNLIKELY (d->_mp_alloc < dsize))
+                dp = _mpz_realloc (d, dsize);
+             dp[i] = 1;
+             d->_mp_size = -dsize;
+           fin:;
+           }
+       }
+      else
+       {
+         mpn_decr_u (dp + limb_index,
+                    (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+         dsize -= dp[dsize - 1] == 0;
+         d->_mp_size = -dsize;
+       }
+    }
+}
diff --git a/mpz/size.c b/mpz/size.c

new file mode 100644 (file)

index 0000000..b310228
--- /dev/null
+++ b/mpz/size.c
@@ -0,0 +1,24 @@
+/* mpz_size(x) -- return the number of lims currently used by the
+   value of integer X.
+
+Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpz_size 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/mpz/sizeinbase.c b/mpz/sizeinbase.c

new file mode 100644 (file)

index 0000000..d70e878
--- /dev/null
+++ b/mpz/sizeinbase.c
@@ -0,0 +1,32 @@
+/* mpz_sizeinbase(x, base) -- return an approximation to the number of
+   character the integer X would have printed in base BASE.  The
+   approximation is never too small.
+
+Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+size_t
+mpz_sizeinbase (mpz_srcptr x, int base) __GMP_NOTHROW
+{
+  size_t  result;
+  MPN_SIZEINBASE (result, PTR(x), ABSIZ(x), base);
+  return result;
+}
diff --git a/mpz/sqrt.c b/mpz/sqrt.c

new file mode 100644 (file)

index 0000000..6de2120
--- /dev/null
+++ b/mpz/sqrt.c
@@ -0,0 +1,84 @@
+/* mpz_sqrt(root, u) --  Set ROOT to floor(sqrt(U)).
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_sqrt (mpz_ptr root, mpz_srcptr op)
+{
+  mp_size_t op_size, root_size;
+  mp_ptr root_ptr, op_ptr;
+  mp_ptr free_me = NULL;
+  mp_size_t free_me_size;
+  TMP_DECL;
+
+  TMP_MARK;
+  op_size = op->_mp_size;
+  if (op_size <= 0)
+    {
+      if (op_size < 0)
+        SQRT_OF_NEGATIVE;
+      SIZ(root) = 0;
+      return;
+    }
+
+  /* The size of the root is accurate after this simple calculation.  */
+  root_size = (op_size + 1) / 2;
+
+  root_ptr = root->_mp_d;
+  op_ptr = op->_mp_d;
+
+  if (root->_mp_alloc < root_size)
+    {
+      if (root_ptr == op_ptr)
+       {
+         free_me = root_ptr;
+         free_me_size = root->_mp_alloc;
+       }
+      else
+       (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+
+      root->_mp_alloc = root_size;
+      root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
+      root->_mp_d = root_ptr;
+    }
+  else
+    {
+      /* Make OP not overlap with ROOT.  */
+      if (root_ptr == op_ptr)
+       {
+         /* ROOT and OP are identical.  Allocate temporary space for OP.  */
+         op_ptr = TMP_ALLOC_LIMBS (op_size);
+         /* Copy to the temporary space.  Hack: Avoid temporary variable
+            by using ROOT_PTR.  */
+         MPN_COPY (op_ptr, root_ptr, op_size);
+       }
+    }
+
+  mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+
+  root->_mp_size = root_size;
+
+  if (free_me != NULL)
+    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+  TMP_FREE;
+}
diff --git a/mpz/sqrtrem.c b/mpz/sqrtrem.c

new file mode 100644 (file)

index 0000000..ed8a85b
--- /dev/null
+++ b/mpz/sqrtrem.c
@@ -0,0 +1,101 @@
+/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM
+   to the remainder, i.e. X - ROOT**2.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+void
+#ifndef BERKELEY_MP
+mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)
+#else /* BERKELEY_MP */
+msqrt (mpz_srcptr op, mpz_ptr root, mpz_ptr rem)
+#endif /* BERKELEY_MP */
+{
+  mp_size_t op_size, root_size, rem_size;
+  mp_ptr root_ptr, op_ptr;
+  mp_ptr free_me = NULL;
+  mp_size_t free_me_size;
+  TMP_DECL;
+
+  TMP_MARK;
+  op_size = op->_mp_size;
+  if (op_size <= 0)
+    {
+      if (op_size < 0)
+        SQRT_OF_NEGATIVE;
+      SIZ(root) = 0;
+      SIZ(rem) = 0;
+      return;
+    }
+
+  if (rem->_mp_alloc < op_size)
+    _mpz_realloc (rem, op_size);
+
+  /* The size of the root is accurate after this simple calculation.  */
+  root_size = (op_size + 1) / 2;
+
+  root_ptr = root->_mp_d;
+  op_ptr = op->_mp_d;
+
+  if (root->_mp_alloc < root_size)
+    {
+      if (root_ptr == op_ptr)
+       {
+         free_me = root_ptr;
+         free_me_size = root->_mp_alloc;
+       }
+      else
+       (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+
+      root->_mp_alloc = root_size;
+      root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
+      root->_mp_d = root_ptr;
+    }
+  else
+    {
+      /* Make OP not overlap with ROOT.  */
+      if (root_ptr == op_ptr)
+       {
+         /* ROOT and OP are identical.  Allocate temporary space for OP.  */
+         op_ptr = TMP_ALLOC_LIMBS (op_size);
+         /* Copy to the temporary space.  Hack: Avoid temporary variable
+            by using ROOT_PTR.  */
+         MPN_COPY (op_ptr, root_ptr, op_size);
+       }
+    }
+
+  rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size);
+
+  root->_mp_size = root_size;
+
+  /* Write remainder size last, to enable us to define this function to
+     give only the square root remainder, if the user calls if with
+     ROOT == REM.  */
+  rem->_mp_size = rem_size;
+
+  if (free_me != NULL)
+    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+  TMP_FREE;
+}
diff --git a/mpz/sub.c b/mpz/sub.c

new file mode 100644 (file)

index 0000000..1b7ac49
--- /dev/null
+++ b/mpz/sub.c
@@ -0,0 +1,22 @@
+/* mpz_sub -- subtract integers.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_sub
+#include "aors.h"
diff --git a/mpz/sub_ui.c b/mpz/sub_ui.c

new file mode 100644 (file)

index 0000000..d0fe369
--- /dev/null
+++ b/mpz/sub_ui.c
@@ -0,0 +1,22 @@
+/* mpz_sub_ui -- Subtract an mpz_t and an unsigned one-word integer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#define OPERATION_sub_ui
+#include "aors_ui.h"
diff --git a/mpz/swap.c b/mpz/swap.c

new file mode 100644 (file)

index 0000000..de8195a
--- /dev/null
+++ b/mpz/swap.c
@@ -0,0 +1,44 @@
+/* mpz_swap (dest_integer, src_integer) -- Swap U and V.
+
+Copyright 1997, 1998, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_swap (mpz_ptr u, mpz_ptr v) __GMP_NOTHROW
+{
+  mp_ptr up, vp;
+  mp_size_t usize, vsize;
+  mp_size_t ualloc, valloc;
+
+  ualloc = u->_mp_alloc;
+  valloc = v->_mp_alloc;
+  v->_mp_alloc = ualloc;
+  u->_mp_alloc = valloc;
+
+  usize = u->_mp_size;
+  vsize = v->_mp_size;
+  v->_mp_size = usize;
+  u->_mp_size = vsize;
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+  v->_mp_d = up;
+  u->_mp_d = vp;
+}
diff --git a/mpz/tdiv_q.c b/mpz/tdiv_q.c

new file mode 100644 (file)

index 0000000..e78dd64
--- /dev/null
+++ b/mpz/tdiv_q.c
@@ -0,0 +1,83 @@
+/* mpz_tdiv_q -- divide two integers and produce a quotient.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_size_t ql;
+  mp_size_t ns, ds, nl, dl;
+  mp_ptr np, dp, qp;
+  TMP_DECL;
+
+  ns = SIZ (num);
+  ds = SIZ (den);
+  nl = ABS (ns);
+  dl = ABS (ds);
+  ql = nl - dl + 1;
+
+  if (dl == 0)
+    DIVIDE_BY_ZERO;
+
+  if (ql <= 0)
+    {
+      SIZ (quot) = 0;
+      return;
+    }
+
+  MPZ_REALLOC (quot, ql);
+
+  TMP_MARK;
+  qp = PTR (quot);
+  np = PTR (num);
+  dp = PTR (den);
+
+  /* Copy denominator to temporary space if it overlaps with the quotient.  */
+  if (dp == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (dl);
+      MPN_COPY (tp, dp, dl);
+      dp = tp;
+    }
+  /* Copy numerator to temporary space if it overlaps with the quotient.  */
+  if (np == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (nl + 1);
+      MPN_COPY (tp, np, nl);
+      /* Overlap dividend and scratch.  */
+      mpn_div_q (qp, tp, nl, dp, dl, tp);
+    }
+  else
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (nl + 1);
+      mpn_div_q (qp, np, nl, dp, dl, tp);
+    }
+
+  ql -=  qp[ql - 1] == 0;
+
+  SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+  TMP_FREE;
+}
diff --git a/mpz/tdiv_q_2exp.c b/mpz/tdiv_q_2exp.c

new file mode 100644 (file)

index 0000000..491d9d0
--- /dev/null
+++ b/mpz/tdiv_q_2exp.c
@@ -0,0 +1,59 @@
+/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT.  Round the quotient
+   towards -infinity.
+
+Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+{
+  mp_size_t usize, wsize;
+  mp_size_t limb_cnt;
+
+  usize = u->_mp_size;
+  limb_cnt = cnt / GMP_NUMB_BITS;
+  wsize = ABS (usize) - limb_cnt;
+  if (wsize <= 0)
+    w->_mp_size = 0;
+  else
+    {
+      mp_ptr wp;
+      mp_srcptr up;
+
+      if (w->_mp_alloc < wsize)
+       _mpz_realloc (w, wsize);
+
+      wp = w->_mp_d;
+      up = u->_mp_d;
+
+      cnt %= GMP_NUMB_BITS;
+      if (cnt != 0)
+       {
+         mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+         wsize -= wp[wsize - 1] == 0;
+       }
+      else
+       {
+         MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+       }
+
+      w->_mp_size = usize >= 0 ? wsize : -wsize;
+    }
+}
diff --git a/mpz/tdiv_q_ui.c b/mpz/tdiv_q_ui.c

new file mode 100644 (file)

index 0000000..50abb85
--- /dev/null
+++ b/mpz/tdiv_q_ui.c
@@ -0,0 +1,74 @@
+/* mpz_tdiv_q_ui(quot, dividend, divisor_limb)
+   -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  MPZ_REALLOC (quot, nn);
+  qp = PTR(quot);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         SIZ(quot) = 0;
+         rl = np[0];
+         return rl;
+       }
+
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      qn = nn - 2 + 1; qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}
diff --git a/mpz/tdiv_qr.c b/mpz/tdiv_qr.c

new file mode 100644 (file)

index 0000000..64b6e03
--- /dev/null
+++ b/mpz/tdiv_qr.c
@@ -0,0 +1,106 @@
+/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,
+   and REM to DIVIDEND mod DIVISOR.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+void
+#ifndef BERKELEY_MP
+mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+#else /* BERKELEY_MP */
+mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
+#endif /* BERKELEY_MP */
+{
+  mp_size_t ql;
+  mp_size_t ns, ds, nl, dl;
+  mp_ptr np, dp, qp, rp;
+  TMP_DECL;
+
+  ns = SIZ (num);
+  ds = SIZ (den);
+  nl = ABS (ns);
+  dl = ABS (ds);
+  ql = nl - dl + 1;
+
+  if (dl == 0)
+    DIVIDE_BY_ZERO;
+
+  MPZ_REALLOC (rem, dl);
+
+  if (ql <= 0)
+    {
+      if (num != rem)
+       {
+         mp_ptr np, rp;
+         np = PTR (num);
+         rp = PTR (rem);
+         MPN_COPY (rp, np, nl);
+         SIZ (rem) = SIZ (num);
+       }
+      /* This needs to follow the assignment to rem, in case the
+        numerator and quotient are the same.  */
+      SIZ (quot) = 0;
+      return;
+    }
+
+  MPZ_REALLOC (quot, ql);
+
+  TMP_MARK;
+  qp = PTR (quot);
+  rp = PTR (rem);
+  np = PTR (num);
+  dp = PTR (den);
+
+  /* FIXME: We should think about how to handle the temporary allocation.
+     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+     allocate temp space.  */
+
+  /* Copy denominator to temporary space if it overlaps with the quotient
+     or remainder.  */
+  if (dp == rp || dp == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (dl);
+      MPN_COPY (tp, dp, dl);
+      dp = tp;
+    }
+  /* Copy numerator to temporary space if it overlaps with the quotient or
+     remainder.  */
+  if (np == rp || np == qp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (nl);
+      MPN_COPY (tp, np, nl);
+      np = tp;
+    }
+
+  mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+  ql -=  qp[ql - 1] == 0;
+  MPN_NORMALIZE (rp, dl);
+
+  SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+  SIZ (rem) = ns >= 0 ? dl : -dl;
+  TMP_FREE;
+}
diff --git a/mpz/tdiv_qr_ui.c b/mpz/tdiv_qr_ui.c

new file mode 100644 (file)

index 0000000..4f797b1
--- /dev/null
+++ b/mpz/tdiv_qr_ui.c
@@ -0,0 +1,94 @@
+/* mpz_tdiv_qr_ui(quot,rem,dividend,short_divisor) --
+   Set QUOT to DIVIDEND / SHORT_DIVISOR
+   and REM to DIVIDEND mod SHORT_DIVISOR.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn, qn;
+  mp_ptr np, qp;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(quot) = 0;
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  MPZ_REALLOC (quot, nn);
+  qp = PTR(quot);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp;
+      mp_size_t rn;
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         SIZ(quot) = 0;
+         rl = np[0];
+         SIZ(rem) = ns >= 0 ? 1 : -1;
+         PTR(rem)[0] = rl;
+         return rl;
+       }
+
+      MPZ_REALLOC (rem, 2);
+      rp = PTR(rem);
+
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      qn = nn - 2 + 1; qn -= qp[qn - 1] == 0; qn -= qn != 0 && qp[qn - 1] == 0;
+      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = ns >= 0 ? rn : -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       SIZ(rem) = 0;
+      else
+       {
+         /* Store the single-limb remainder.  We don't check if there's space
+            for just one limb, since no function ever makes zero space.  */
+         SIZ(rem) = ns >= 0 ? 1 : -1;
+         PTR(rem)[0] = rl;
+       }
+      qn = nn - (qp[nn - 1] == 0);
+    }
+
+  SIZ(quot) = ns >= 0 ? qn : -qn;
+  return rl;
+}
diff --git a/mpz/tdiv_r.c b/mpz/tdiv_r.c

new file mode 100644 (file)

index 0000000..a3b008c
--- /dev/null
+++ b/mpz/tdiv_r.c
@@ -0,0 +1,89 @@
+/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+{
+  mp_size_t ql;
+  mp_size_t ns, ds, nl, dl;
+  mp_ptr np, dp, qp, rp;
+  TMP_DECL;
+
+  ns = SIZ (num);
+  ds = SIZ (den);
+  nl = ABS (ns);
+  dl = ABS (ds);
+  ql = nl - dl + 1;
+
+  if (dl == 0)
+    DIVIDE_BY_ZERO;
+
+  MPZ_REALLOC (rem, dl);
+
+  if (ql <= 0)
+    {
+      if (num != rem)
+       {
+         mp_ptr np, rp;
+         np = PTR (num);
+         rp = PTR (rem);
+         MPN_COPY (rp, np, nl);
+         SIZ (rem) = SIZ (num);
+       }
+      return;
+    }
+
+  TMP_MARK;
+  qp = TMP_ALLOC_LIMBS (ql);
+  rp = PTR (rem);
+  np = PTR (num);
+  dp = PTR (den);
+
+  /* FIXME: We should think about how to handle the temporary allocation.
+     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+     allocate temp space.  */
+
+  /* Copy denominator to temporary space if it overlaps with the remainder.  */
+  if (dp == rp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (dl);
+      MPN_COPY (tp, dp, dl);
+      dp = tp;
+    }
+  /* Copy numerator to temporary space if it overlaps with the remainder.  */
+  if (np == rp)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (nl);
+      MPN_COPY (tp, np, nl);
+      np = tp;
+    }
+
+  mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+  MPN_NORMALIZE (rp, dl);
+
+  SIZ (rem) = ns >= 0 ? dl : -dl;
+  TMP_FREE;
+}
diff --git a/mpz/tdiv_r_2exp.c b/mpz/tdiv_r_2exp.c

new file mode 100644 (file)

index 0000000..3828ff1
--- /dev/null
+++ b/mpz/tdiv_r_2exp.c
@@ -0,0 +1,70 @@
+/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+
+Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)
+{
+  mp_size_t in_size = ABS (in->_mp_size);
+  mp_size_t res_size;
+  mp_size_t limb_cnt = cnt / GMP_NUMB_BITS;
+  mp_srcptr in_ptr = in->_mp_d;
+
+  if (in_size > limb_cnt)
+    {
+      /* The input operand is (probably) greater than 2**CNT.  */
+      mp_limb_t x;
+
+      x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % GMP_NUMB_BITS) - 1);
+      if (x != 0)
+       {
+         res_size = limb_cnt + 1;
+         if (res->_mp_alloc < res_size)
+           _mpz_realloc (res, res_size);
+
+         res->_mp_d[limb_cnt] = x;
+       }
+      else
+       {
+         res_size = limb_cnt;
+         MPN_NORMALIZE (in_ptr, res_size);
+
+         if (res->_mp_alloc < res_size)
+           _mpz_realloc (res, res_size);
+
+         limb_cnt = res_size;
+       }
+    }
+  else
+    {
+      /* The input operand is smaller than 2**CNT.  We perform a no-op,
+        apart from that we might need to copy IN to RES.  */
+      res_size = in_size;
+      if (res->_mp_alloc < res_size)
+       _mpz_realloc (res, res_size);
+
+      limb_cnt = res_size;
+    }
+
+  if (res != in)
+    MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
+  res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size;
+}
diff --git a/mpz/tdiv_r_ui.c b/mpz/tdiv_r_ui.c

new file mode 100644 (file)

index 0000000..64a8b94
--- /dev/null
+++ b/mpz/tdiv_r_ui.c
@@ -0,0 +1,89 @@
+/* mpz_tdiv_r_ui(rem, dividend, divisor_limb)
+   -- Set REM to DIVDEND mod DIVISOR_LIMB.
+
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      SIZ(rem) = 0;
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2];
+      mp_ptr rp, qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         rl = np[0];
+         SIZ(rem) = ns >= 0 ? 1 : -1;
+         PTR(rem)[0] = rl;
+         return rl;
+       }
+
+      MPZ_REALLOC (rem, 2);
+      rp = PTR(rem);
+
+      TMP_MARK;
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      TMP_FREE;
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);
+      SIZ(rem) = ns >= 0 ? rn : -rn;
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+      if (rl == 0)
+       SIZ(rem) = 0;
+      else
+       {
+         /* Store the single-limb remainder.  We don't check if there's space
+            for just one limb, since no function ever makes zero space.  */
+         SIZ(rem) = ns >= 0 ? 1 : -1;
+         PTR(rem)[0] = rl;
+       }
+    }
+
+  return rl;
+}
diff --git a/mpz/tdiv_ui.c b/mpz/tdiv_ui.c

new file mode 100644 (file)

index 0000000..fafd97e
--- /dev/null
+++ b/mpz/tdiv_ui.c
@@ -0,0 +1,74 @@
+/* mpz_tdiv_ui(dividend, divisor_limb) -- Return DIVDEND mod DIVISOR_LIMB.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2001, 2002, 2004, 2005 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+unsigned long int
+mpz_tdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+{
+  mp_size_t ns, nn;
+  mp_ptr np;
+  mp_limb_t rl;
+
+  if (divisor == 0)
+    DIVIDE_BY_ZERO;
+
+  ns = SIZ(dividend);
+  if (ns == 0)
+    {
+      return 0;
+    }
+
+  nn = ABS(ns);
+  np = PTR(dividend);
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (divisor > GMP_NUMB_MAX)
+    {
+      mp_limb_t dp[2], rp[2];
+      mp_ptr qp;
+      mp_size_t rn;
+      TMP_DECL;
+
+      if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
+       {
+         rl = np[0];
+         return rl;
+       }
+
+      TMP_MARK;
+      dp[0] = divisor & GMP_NUMB_MASK;
+      dp[1] = divisor >> GMP_NUMB_BITS;
+      qp = TMP_ALLOC_LIMBS (nn - 2 + 1);
+      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2);
+      TMP_FREE;
+      rl = rp[0] + (rp[1] << GMP_NUMB_BITS);
+      rn = 2 - (rp[1] == 0);  rn -= (rp[rn - 1] == 0);
+    }
+  else
+#endif
+    {
+      rl = mpn_mod_1 (np, nn, (mp_limb_t) divisor);
+    }
+
+  return rl;
+}
diff --git a/mpz/tstbit.c b/mpz/tstbit.c

new file mode 100644 (file)

index 0000000..c3006c8
--- /dev/null
+++ b/mpz/tstbit.c
@@ -0,0 +1,70 @@
+/* mpz_tstbit -- test a specified bit.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* For negatives the effective twos complement is achieved by negating the
+   limb tested, either with a ones or twos complement.  Twos complement
+   ("-") is used if there's only zero limbs below the one being tested.
+   Ones complement ("~") is used if there's a non-zero below.  Note that "-"
+   is correct even if the limb examined is 0 (and the true beginning of twos
+   complement is further up).
+
+   Testing the limbs below p is unavoidable on negatives, but will usually
+   need to examine only *(p-1).  The search is done from *(p-1) down to
+   *u_ptr, since that might give better cache locality, and because a
+   non-zero limb is perhaps a touch more likely in the middle of a number
+   than at the low end.
+
+   Bits past the end of available data simply follow sign of u.  Notice that
+   the limb_index >= abs_size test covers u=0 too.  */
+
+int
+mpz_tstbit (mpz_srcptr u, mp_bitcnt_t bit_index) __GMP_NOTHROW
+{
+  mp_srcptr      u_ptr      = PTR(u);
+  mp_size_t      size       = SIZ(u);
+  unsigned       abs_size   = ABS(size);
+  mp_size_t      limb_index = bit_index / GMP_NUMB_BITS;
+  mp_srcptr      p          = u_ptr + limb_index;
+  mp_limb_t      limb;
+
+  if (limb_index >= abs_size)
+    return (size < 0);
+
+  limb = *p;
+  if (size < 0)
+    {
+      limb = -limb;     /* twos complement */
+
+      while (p != u_ptr)
+        {
+          p--;
+          if (*p != 0)
+            {
+              limb--;   /* make it a ones complement instead */
+              break;
+            }
+        }
+    }
+
+  return (limb >> (bit_index % GMP_NUMB_BITS)) & 1;
+}
diff --git a/mpz/ui_pow_ui.c b/mpz/ui_pow_ui.c

new file mode 100644 (file)

index 0000000..4a0f7bd
--- /dev/null
+++ b/mpz/ui_pow_ui.c
@@ -0,0 +1,48 @@
+/* mpz_ui_pow_ui -- ulong raised to ulong.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpz_ui_pow_ui (mpz_ptr r, unsigned long b, unsigned long e)
+{
+#if GMP_NAIL_BITS != 0
+  if (b > GMP_NUMB_MAX)
+    {
+      mp_limb_t bb[2];
+      bb[0] = b & GMP_NUMB_MASK;
+      bb[1] = b >> GMP_NUMB_BITS;
+      mpz_n_pow_ui (r, bb, (mp_size_t) 2, e);
+    }
+  else
+#endif
+    {
+#ifdef _LONG_LONG_LIMB
+      /* i386 gcc 2.95.3 doesn't recognise blimb can be eliminated when
+        mp_limb_t is an unsigned long, so only use a separate blimb when
+        necessary.  */
+      mp_limb_t  blimb = b;
+      mpz_n_pow_ui (r, &blimb, (mp_size_t) (b != 0), e);
+#else
+      mpz_n_pow_ui (r, &b,     (mp_size_t) (b != 0), e);
+#endif
+    }
+}
diff --git a/mpz/ui_sub.c b/mpz/ui_sub.c

new file mode 100644 (file)

index 0000000..76d4b82
--- /dev/null
+++ b/mpz/ui_sub.c
@@ -0,0 +1,85 @@
+/* mpz_ui_sub -- Subtract an unsigned one-word integer and an mpz_t.
+
+Copyright 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_ui_sub (mpz_ptr w, unsigned long int uval, mpz_srcptr v)
+{
+  mp_ptr vp, wp;
+  mp_size_t vn, wn;
+  mp_limb_t cy;
+
+#if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
+  if (uval > GMP_NUMB_MAX)
+    {
+      mpz_t u;
+      mp_limb_t ul[2];
+      PTR(u) = ul;
+      ul[0] = uval & GMP_NUMB_MASK;
+      ul[1] = uval >> GMP_NUMB_BITS;
+      SIZ(u) = 2;
+      mpz_sub (w, u, v);
+      return;
+    }
+#endif
+
+  vp = PTR(v);
+  vn = SIZ(v);
+
+  wp = PTR(w);
+
+  if (vn > 1)
+    {
+      wp = MPZ_REALLOC (w, vn);
+      vp = PTR(v);
+      mpn_sub_1 (wp, vp, vn, (mp_limb_t) uval);
+      wn = -(vn - (wp[vn - 1] == 0));
+    }
+  else if (vn == 1)
+    {
+      if (uval >= vp[0])
+       {
+         wp[0] = uval - vp[0];
+         wn = wp[0] != 0;
+       }
+      else
+       {
+         wp[0] = vp[0] - uval;
+         wn = -1;
+       }
+    }
+  else if (vn == 0)
+    {
+      wp[0] = uval;
+      wn = uval != 0;
+    }
+  else /* (vn < 0) */
+    {
+      vn = -vn;
+      wp = MPZ_REALLOC (w, vn + 1);
+      vp = PTR(v);
+      cy = mpn_add_1 (wp, vp, vn, (mp_limb_t) uval);
+      wp[vn] = cy;
+      wn = vn + (cy != 0);
+    }
+
+  SIZ(w) = wn;
+}
diff --git a/mpz/urandomb.c b/mpz/urandomb.c

new file mode 100644 (file)

index 0000000..fc7ec55
--- /dev/null
+++ b/mpz/urandomb.c
@@ -0,0 +1,37 @@
+/* mpz_urandomb (rop, state, n) -- Generate a uniform pseudorandom
+   integer in the range 0 to 2^N - 1, inclusive, using STATE as the
+   random state previously initialized by a call to gmp_randinit().
+
+Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_urandomb (mpz_ptr rop, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+{
+  mp_ptr rp;
+  mp_size_t size;
+
+  size = BITS_TO_LIMBS (nbits);
+  rp = MPZ_REALLOC (rop, size);
+
+  _gmp_rand (rp, rstate, nbits);
+  MPN_NORMALIZE (rp, size);
+  SIZ (rop) = size;
+}
diff --git a/mpz/urandomm.c b/mpz/urandomm.c

new file mode 100644 (file)

index 0000000..b8a6d68
--- /dev/null
+++ b/mpz/urandomm.c
@@ -0,0 +1,94 @@
+/* mpz_urandomm (rop, state, n) -- Generate a uniform pseudorandom
+   integer in the range 0 to N-1, using STATE as the random state
+   previously initialized by a call to gmp_randinit().
+
+Copyright 2000, 2002  Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h" /* for count_leading_zeros */
+
+
+#define MAX_URANDOMM_ITER  80
+
+void
+mpz_urandomm (mpz_ptr rop, gmp_randstate_t rstate, mpz_srcptr n)
+{
+  mp_ptr rp, np, nlast;
+  mp_size_t nbits, size;
+  int count;
+  int pow2;
+  int cmp;
+  TMP_DECL;
+
+  size = ABSIZ (n);
+  if (size == 0)
+    DIVIDE_BY_ZERO;
+
+  nlast = &PTR (n)[size - 1];
+
+  /* Detect whether n is a power of 2.  */
+  pow2 = POW2_P (*nlast);
+  if (pow2 != 0)
+    for (np = PTR (n); np < nlast; np++)
+      if (*np != 0)
+       {
+         pow2 = 0;             /* Mark n as `not a power of two'.  */
+         break;
+       }
+
+  count_leading_zeros (count, *nlast);
+  nbits = size * GMP_NUMB_BITS - (count - GMP_NAIL_BITS) - pow2;
+  if (nbits == 0)              /* nbits == 0 means that n was == 1.  */
+    {
+      SIZ (rop) = 0;
+      return;
+    }
+
+  TMP_MARK;
+  np = PTR (n);
+  if (rop == n)
+    {
+      mp_ptr tp;
+      tp = TMP_ALLOC_LIMBS (size);
+      MPN_COPY (tp, np, size);
+      np = tp;
+    }
+
+  /* Here the allocated size can be one too much if n is a power of
+     (2^GMP_NUMB_BITS) but it's convenient for using mpn_cmp below.  */
+  rp = MPZ_REALLOC (rop, size);
+  /* Clear last limb to prevent the case in which size is one too much.  */
+  rp[size - 1] = 0;
+
+  count = MAX_URANDOMM_ITER;   /* Set iteration count limit.  */
+  do
+    {
+      _gmp_rand (rp, rstate, nbits);
+      MPN_CMP (cmp, rp, np, size);
+    }
+  while (cmp >= 0 && --count != 0);
+
+  if (count == 0)
+    /* Too many iterations; return result mod n == result - n */
+    mpn_sub_n (rp, rp, np, size);
+
+  MPN_NORMALIZE (rp, size);
+  SIZ (rop) = size;
+  TMP_FREE;
+}
diff --git a/mpz/xor.c b/mpz/xor.c

new file mode 100644 (file)

index 0000000..18edd81
--- /dev/null
+++ b/mpz/xor.c
@@ -0,0 +1,203 @@
+/* mpz_xor -- Logical xor.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+{
+  mp_srcptr op1_ptr, op2_ptr;
+  mp_size_t op1_size, op2_size;
+  mp_ptr res_ptr;
+  mp_size_t res_size, res_alloc;
+  mp_size_t i;
+  TMP_DECL;
+
+  TMP_MARK;
+  op1_size = SIZ(op1);
+  op2_size = SIZ(op2);
+
+  op1_ptr = PTR(op1);
+  op2_ptr = PTR(op2);
+  res_ptr = PTR(res);
+
+  if (op1_size >= 0)
+    {
+      if (op2_size >= 0)
+       {
+         if (op1_size >= op2_size)
+           {
+             if (ALLOC(res) < op1_size)
+               {
+                 _mpz_realloc (res, op1_size);
+                 /* No overlapping possible: op1_ptr = PTR(op1); */
+                 op2_ptr = PTR(op2);
+                 res_ptr = PTR(res);
+               }
+
+             if (res_ptr != op1_ptr)
+               MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+                         op1_size - op2_size);
+             for (i = op2_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+             res_size = op1_size;
+           }
+         else
+           {
+             if (ALLOC(res) < op2_size)
+               {
+                 _mpz_realloc (res, op2_size);
+                 op1_ptr = PTR(op1);
+                 /* No overlapping possible: op2_ptr = PTR(op2); */
+                 res_ptr = PTR(res);
+               }
+
+             if (res_ptr != op2_ptr)
+               MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+                         op2_size - op1_size);
+             for (i = op1_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+             res_size = op2_size;
+           }
+
+         MPN_NORMALIZE (res_ptr, res_size);
+         SIZ(res) = res_size;
+         return;
+       }
+      else /* op2_size < 0 */
+       {
+         /* Fall through to the code at the end of the function.  */
+       }
+    }
+  else
+    {
+      if (op2_size < 0)
+       {
+         mp_ptr opx;
+
+         /* Both operands are negative, the result will be positive.
+             (-OP1) ^ (-OP2) =
+            = ~(OP1 - 1) ^ ~(OP2 - 1) =
+            = (OP1 - 1) ^ (OP2 - 1)  */
+
+         op1_size = -op1_size;
+         op2_size = -op2_size;
+
+         /* Possible optimization: Decrease mpn_sub precision,
+            as we won't use the entire res of both.  */
+         opx = TMP_ALLOC_LIMBS (op1_size);
+         mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+         op1_ptr = opx;
+
+         opx = TMP_ALLOC_LIMBS (op2_size);
+         mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+         op2_ptr = opx;
+
+         res_alloc = MAX (op1_size, op2_size);
+         if (ALLOC(res) < res_alloc)
+           {
+             _mpz_realloc (res, res_alloc);
+             res_ptr = PTR(res);
+             /* op1_ptr and op2_ptr point to temporary space.  */
+           }
+
+         if (op1_size > op2_size)
+           {
+             MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+                       op1_size - op2_size);
+             for (i = op2_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+             res_size = op1_size;
+           }
+         else
+           {
+             MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+                       op2_size - op1_size);
+             for (i = op1_size - 1; i >= 0; i--)
+               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+             res_size = op2_size;
+           }
+
+         MPN_NORMALIZE (res_ptr, res_size);
+         SIZ(res) = res_size;
+         TMP_FREE;
+         return;
+       }
+      else
+       {
+         /* We should compute -OP1 ^ OP2.  Swap OP1 and OP2 and fall
+            through to the code that handles OP1 ^ -OP2.  */
+          MPZ_SRCPTR_SWAP (op1, op2);
+          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+       }
+    }
+
+  {
+    mp_ptr opx;
+    mp_limb_t cy;
+
+    /* Operand 2 negative, so will be the result.
+       -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) =
+       = ~(OP1 ^ ~(OP2 - 1)) + 1 =
+       = (OP1 ^ (OP2 - 1)) + 1      */
+
+    op2_size = -op2_size;
+
+    opx = TMP_ALLOC_LIMBS (op2_size);
+    mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+    op2_ptr = opx;
+
+    res_alloc = MAX (op1_size, op2_size) + 1;
+    if (ALLOC(res) < res_alloc)
+      {
+       _mpz_realloc (res, res_alloc);
+       op1_ptr = PTR(op1);
+       /* op2_ptr points to temporary space.  */
+       res_ptr = PTR(res);
+      }
+
+    if (op1_size > op2_size)
+      {
+       MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+       for (i = op2_size - 1; i >= 0; i--)
+         res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+       res_size = op1_size;
+      }
+    else
+      {
+       MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+       for (i = op1_size - 1; i >= 0; i--)
+         res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+       res_size = op2_size;
+      }
+
+    cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+    if (cy)
+      {
+       res_ptr[res_size] = cy;
+       res_size++;
+      }
+
+    MPN_NORMALIZE (res_ptr, res_size);
+    SIZ(res) = -res_size;
+    TMP_FREE;
+  }
+}
diff --git a/nextprime.c b/nextprime.c

new file mode 100644 (file)

index 0000000..f3e80f6
--- /dev/null
+++ b/nextprime.c
@@ -0,0 +1,156 @@
+/* gmp_nextprime -- generate small primes reasonably efficiently for internal
+   GMP needs.
+
+   Contributed to the GNU project by Torbjorn Granlund.  Miscellaneous
+   improvements by Martin Boij.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.  */
+
+/*
+  Optimisation ideas:
+
+  1. Unroll the sieving loops.  Should reach 1 write/cycle.  That would be a 2x
+     improvement.
+
+  2. Separate sieving with primes p < SIEVESIZE and p >= SIEVESIZE.  The latter
+     will need at most one write, and thus not need any inner loop.
+
+  3. For primes p >= SIEVESIZE, i.e., typically the majority of primes, we
+     perform more than one division per sieving write.  That might dominate the
+     entire run time for the nextprime function.  A incrementally initialised
+     remainder table of Pi(65536) = 6542 16-bit entries could replace that
+     division.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include <string.h>            /* for memset */
+
+
+unsigned long int
+gmp_nextprime (gmp_primesieve_t *ps)
+{
+  unsigned long p, d, pi;
+  unsigned char *sp;
+  static unsigned char addtab[] =
+    { 2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,
+      2,4,6,2,6,4,2,4,2,10,2,10 };
+  unsigned char *addp = addtab;
+  unsigned long ai;
+
+  /* Look for already sieved primes.  A sentinel at the end of the sieving
+     area allows us to use a very simple loop here.  */
+  d = ps->d;
+  sp = ps->s + d;
+  while (*sp != 0)
+    sp++;
+  if (sp != ps->s + SIEVESIZE)
+    {
+      d = sp - ps->s;
+      ps->d = d + 1;
+      return ps->s0 + 2 * d;
+    }
+
+  /* Handle the number 2 separately.  */
+  if (ps->s0 < 3)
+    {
+      ps->s0 = 3 - 2 * SIEVESIZE; /* Tricky */
+      return 2;
+    }
+
+  /* Exhausted computed primes.  Resieve, then call ourselves recursively.  */
+
+#if 0
+  for (sp = ps->s; sp < ps->s + SIEVESIZE; sp++)
+    *sp = 0;
+#else
+  memset (ps->s, 0, SIEVESIZE);
+#endif
+
+  ps->s0 += 2 * SIEVESIZE;
+
+  /* Update sqrt_s0 as needed.  */
+  while ((ps->sqrt_s0 + 1) * (ps->sqrt_s0 + 1) <= ps->s0 + 2 * SIEVESIZE - 1)
+    ps->sqrt_s0++;
+
+  pi = ((ps->s0 + 3) / 2) % 3;
+  if (pi > 0)
+    pi = 3 - pi;
+  if (ps->s0 + 2 * pi <= 3)
+    pi += 3;
+  sp = ps->s + pi;
+  while (sp < ps->s + SIEVESIZE)
+    {
+      *sp = 1, sp += 3;
+    }
+
+  pi = ((ps->s0 + 5) / 2) % 5;
+  if (pi > 0)
+    pi = 5 - pi;
+  if (ps->s0 + 2 * pi <= 5)
+    pi += 5;
+  sp = ps->s + pi;
+  while (sp < ps->s + SIEVESIZE)
+    {
+      *sp = 1, sp += 5;
+    }
+
+  pi = ((ps->s0 + 7) / 2) % 7;
+  if (pi > 0)
+    pi = 7 - pi;
+  if (ps->s0 + 2 * pi <= 7)
+    pi += 7;
+  sp = ps->s + pi;
+  while (sp < ps->s + SIEVESIZE)
+    {
+      *sp = 1, sp += 7;
+    }
+
+  p = 11;
+  ai = 0;
+  while (p <= ps->sqrt_s0)
+    {
+      pi = ((ps->s0 + p) / 2) % p;
+      if (pi > 0)
+       pi = p - pi;
+      if (ps->s0 + 2 * pi <= p)
+         pi += p;
+      sp = ps->s + pi;
+      while (sp < ps->s + SIEVESIZE)
+       {
+         *sp = 1, sp += p;
+       }
+      p += addp[ai];
+      ai = (ai + 1) % 48;
+    }
+  ps->d = 0;
+  return gmp_nextprime (ps);
+}
+
+void
+gmp_init_primesieve (gmp_primesieve_t *ps)
+{
+  ps->s0 = 0;
+  ps->sqrt_s0 = 0;
+  ps->d = SIEVESIZE;
+  ps->s[SIEVESIZE] = 0;                /* sentinel */
+}
diff --git a/printf/Makefile.am b/printf/Makefile.am

new file mode 100644 (file)

index 0000000..c4aead7
--- /dev/null
+++ b/printf/Makefile.am
@@ -0,0 +1,30 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = libprintf.la
+
+libprintf_la_SOURCES =                                                  \
+  asprintf.c asprntffuns.c doprnt.c doprntf.c doprnti.c                         \
+  fprintf.c obprintf.c obvprintf.c obprntffuns.c                        \
+  printf.c printffuns.c snprintf.c snprntffuns.c sprintf.c sprintffuns.c \
+  vasprintf.c vfprintf.c vprintf.c vsnprintf.c vsprintf.c               \
+  repl-vsnprintf.c
diff --git a/printf/Makefile.in b/printf/Makefile.in

new file mode 100644 (file)

index 0000000..ea85437
--- /dev/null
+++ b/printf/Makefile.in
@@ -0,0 +1,586 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = printf
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libprintf_la_LIBADD =
+am_libprintf_la_OBJECTS = asprintf$U.lo asprntffuns$U.lo doprnt$U.lo \
+       doprntf$U.lo doprnti$U.lo fprintf$U.lo obprintf$U.lo \
+       obvprintf$U.lo obprntffuns$U.lo printf$U.lo printffuns$U.lo \
+       snprintf$U.lo snprntffuns$U.lo sprintf$U.lo sprintffuns$U.lo \
+       vasprintf$U.lo vfprintf$U.lo vprintf$U.lo vsnprintf$U.lo \
+       vsprintf$U.lo repl-vsnprintf$U.lo
+libprintf_la_OBJECTS = $(am_libprintf_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libprintf_la_SOURCES)
+DIST_SOURCES = $(libprintf_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = libprintf.la
+libprintf_la_SOURCES = \
+  asprintf.c asprntffuns.c doprnt.c doprntf.c doprnti.c                         \
+  fprintf.c obprintf.c obvprintf.c obprntffuns.c                        \
+  printf.c printffuns.c snprintf.c snprntffuns.c sprintf.c sprintffuns.c \
+  vasprintf.c vfprintf.c vprintf.c vsnprintf.c vsprintf.c               \
+  repl-vsnprintf.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps printf/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps printf/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libprintf.la: $(libprintf_la_OBJECTS) $(libprintf_la_DEPENDENCIES) 
+       $(LINK)  $(libprintf_la_OBJECTS) $(libprintf_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+asprintf_.c: asprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/asprintf.c; then echo $(srcdir)/asprintf.c; else echo asprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+asprntffuns_.c: asprntffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/asprntffuns.c; then echo $(srcdir)/asprntffuns.c; else echo asprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+doprnt_.c: doprnt.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprnt.c; then echo $(srcdir)/doprnt.c; else echo doprnt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+doprntf_.c: doprntf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprntf.c; then echo $(srcdir)/doprntf.c; else echo doprntf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+doprnti_.c: doprnti.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprnti.c; then echo $(srcdir)/doprnti.c; else echo doprnti.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fprintf_.c: fprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fprintf.c; then echo $(srcdir)/fprintf.c; else echo fprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+obprintf_.c: obprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obprintf.c; then echo $(srcdir)/obprintf.c; else echo obprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+obprntffuns_.c: obprntffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obprntffuns.c; then echo $(srcdir)/obprntffuns.c; else echo obprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+obvprintf_.c: obvprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obvprintf.c; then echo $(srcdir)/obvprintf.c; else echo obvprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+printf_.c: printf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/printf.c; then echo $(srcdir)/printf.c; else echo printf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+printffuns_.c: printffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/printffuns.c; then echo $(srcdir)/printffuns.c; else echo printffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+repl-vsnprintf_.c: repl-vsnprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/repl-vsnprintf.c; then echo $(srcdir)/repl-vsnprintf.c; else echo repl-vsnprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+snprintf_.c: snprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/snprintf.c; then echo $(srcdir)/snprintf.c; else echo snprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+snprntffuns_.c: snprntffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/snprntffuns.c; then echo $(srcdir)/snprntffuns.c; else echo snprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sprintf_.c: sprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sprintf.c; then echo $(srcdir)/sprintf.c; else echo sprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sprintffuns_.c: sprintffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sprintffuns.c; then echo $(srcdir)/sprintffuns.c; else echo sprintffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vasprintf_.c: vasprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vasprintf.c; then echo $(srcdir)/vasprintf.c; else echo vasprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vfprintf_.c: vfprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vfprintf.c; then echo $(srcdir)/vfprintf.c; else echo vfprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vprintf_.c: vprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vprintf.c; then echo $(srcdir)/vprintf.c; else echo vprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vsnprintf_.c: vsnprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsnprintf.c; then echo $(srcdir)/vsnprintf.c; else echo vsnprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vsprintf_.c: vsprintf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsprintf.c; then echo $(srcdir)/vsprintf.c; else echo vsprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+asprintf_.$(OBJEXT) asprintf_.lo asprntffuns_.$(OBJEXT) \
+asprntffuns_.lo doprnt_.$(OBJEXT) doprnt_.lo doprntf_.$(OBJEXT) \
+doprntf_.lo doprnti_.$(OBJEXT) doprnti_.lo fprintf_.$(OBJEXT) \
+fprintf_.lo obprintf_.$(OBJEXT) obprintf_.lo obprntffuns_.$(OBJEXT) \
+obprntffuns_.lo obvprintf_.$(OBJEXT) obvprintf_.lo printf_.$(OBJEXT) \
+printf_.lo printffuns_.$(OBJEXT) printffuns_.lo \
+repl-vsnprintf_.$(OBJEXT) repl-vsnprintf_.lo snprintf_.$(OBJEXT) \
+snprintf_.lo snprntffuns_.$(OBJEXT) snprntffuns_.lo sprintf_.$(OBJEXT) \
+sprintf_.lo sprintffuns_.$(OBJEXT) sprintffuns_.lo \
+vasprintf_.$(OBJEXT) vasprintf_.lo vfprintf_.$(OBJEXT) vfprintf_.lo \
+vprintf_.$(OBJEXT) vprintf_.lo vsnprintf_.$(OBJEXT) vsnprintf_.lo \
+vsprintf_.$(OBJEXT) vsprintf_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/printf/asprintf.c b/printf/asprintf.c

new file mode 100644 (file)

index 0000000..e2edef6
--- /dev/null
+++ b/printf/asprintf.c
@@ -0,0 +1,56 @@
+/* gmp_asprintf -- formatted output to an allocated space.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_asprintf (char **result, const char *fmt, ...)
+#else
+gmp_asprintf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  char       **result;
+  const char *fmt;
+  va_start (ap);
+  result = va_arg (ap, char **);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ret = gmp_vasprintf (result, fmt, ap);
+  va_end (ap);
+  return ret;
+}
diff --git a/printf/asprntffuns.c b/printf/asprntffuns.c

new file mode 100644 (file)

index 0000000..1a2b9e1
--- /dev/null
+++ b/printf/asprntffuns.c
@@ -0,0 +1,68 @@
+/* __gmp_asprintf_memory etc -- formatted output to allocated space.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* These routines are in a separate file so that the mpz_t, mpq_t and mpf_t
+   operator<< routines can avoid dragging vsnprintf into the link (via
+   __gmp_asprintf_format).  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+__gmp_asprintf_memory (struct gmp_asprintf_t *d, const char *str, size_t len)
+{
+  GMP_ASPRINTF_T_NEED (d, len);
+  memcpy (d->buf + d->size, str, len);
+  d->size += len;
+  return len;
+}
+
+int
+__gmp_asprintf_reps (struct gmp_asprintf_t *d, int c, int reps)
+{
+  GMP_ASPRINTF_T_NEED (d, reps);
+  memset (d->buf + d->size, c, reps);
+  d->size += reps;
+  return reps;
+}
+
+int
+__gmp_asprintf_final (struct gmp_asprintf_t *d)
+{
+  char  *buf = d->buf;
+  ASSERT (d->alloc >= d->size + 1);
+  buf[d->size] = '\0';
+  __GMP_REALLOCATE_FUNC_MAYBE_TYPE (buf, d->alloc, d->size+1, char);
+  *d->result = buf;
+  return 0;
+}
diff --git a/printf/doprnt.c b/printf/doprnt.c

new file mode 100644 (file)

index 0000000..c1ee0a2
--- /dev/null
+++ b/printf/doprnt.c
@@ -0,0 +1,621 @@
+/* __gmp_doprnt -- printf style formatted output.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <ctype.h>     /* for isdigit */
+#include <stddef.h>    /* for ptrdiff_t */
+#include <string.h>
+#include <stdio.h>     /* for NULL */
+#include <stdlib.h>
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h> /* for quad_t */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* Should be portable, but in any case this is only used under some ASSERTs. */
+#define va_equal(x, y)                           \
+  (memcmp (&(x), &(y), sizeof(va_list)) == 0)
+
+
+/* printf is convenient because it allows various types to be printed in one
+   fairly compact call, so having gmp_printf support the standard types as
+   well as the gmp ones is important.  This ends up meaning all the standard
+   parsing must be duplicated, to get a new routine recognising the gmp
+   extras.
+
+   With the currently favoured handling of mpz etc as Z, Q and F type
+   markers, it's not possible to use glibc register_printf_function since
+   that only accepts new conversion characters, not new types.  If Z was a
+   conversion there'd be no way to specify hex, decimal or octal, or
+   similarly with F no way to specify fixed point or scientific format.
+
+   It seems wisest to pass conversions %f, %e and %g of float, double and
+   long double over to the standard printf.  It'd be hard to be sure of
+   getting the right handling for NaNs, rounding, etc.  Integer conversions
+   %d etc and string conversions %s on the other hand could be easily enough
+   handled within gmp_doprnt, but if floats are going to libc then it's just
+   as easy to send all non-gmp types there.
+
+   "Z" was a type marker for size_t in old glibc, but there seems no need to
+   provide access to that now "z" is standard.
+
+   In GMP 4.1.1 we documented "ll" and "L" as being equivalent, but in C99
+   in fact "ll" is just for long long and "L" just for long double.
+   Apparentely GLIBC allows "L" for long long though.  This doesn't affect
+   us as such, since both are passed through to the C library.  To be
+   consistent with what we said before, the two are treated equivalently
+   here, and it's left to the C library to do what it thinks with them.
+
+   Possibilities:
+
+   "b" might be nice for binary output, and could even be supported for the
+   standard C types too if desired.
+
+   POSIX style "%n$" parameter numbering would be possible, but would need
+   to be handled completely within gmp_doprnt, since the numbering will be
+   all different once the format string it cut into pieces.
+
+   Some options for mpq formatting would be good.  Perhaps a non-zero
+   precision field could give a width for the denominator and mean always
+   put a "/".  A form "n+p/q" might interesting too, though perhaps that's
+   better left to applications.
+
+   Right now there's no way for an application to know whether types like
+   intmax_t are supported here.  If configure is doing its job and the same
+   compiler is used for gmp as for the application then there shouldn't be
+   any problem, but perhaps gmp.h should have some preprocessor symbols to
+   say what libgmp can do.  */
+
+
+
+/* If a gmp format is the very first thing or there are two gmp formats with
+   nothing in between then we'll reach here with this_fmt == last_fmt and we
+   can do nothing in that case.
+
+   last_ap is always replaced after a FLUSH, so it doesn't matter if va_list
+   is a call-by-reference and the funs->format routine modifies it.  */
+
+#define FLUSH()                                         \
+  do {                                                  \
+    if (this_fmt == last_fmt)                           \
+      {                                                 \
+       TRACE (printf ("nothing to flush\n"));          \
+       ASSERT (va_equal (this_ap, last_ap));           \
+      }                                                 \
+    else                                                \
+      {                                                 \
+       ASSERT (*this_fmt == '%');                      \
+       *this_fmt = '\0';                               \
+       TRACE (printf ("flush \"%s\"\n", last_fmt));    \
+       DOPRNT_FORMAT (last_fmt, last_ap);              \
+      }                                                 \
+  } while (0)
+
+
+/* Parse up the given format string and do the appropriate output using the
+   given "funs" routines.  The data parameter is passed through to those
+   routines.  */
+
+int
+__gmp_doprnt (const struct doprnt_funs_t *funs, void *data,
+             const char *orig_fmt, va_list orig_ap)
+{
+  va_list  ap, this_ap, last_ap;
+  size_t   alloc_fmt_size;
+  char     *fmt, *alloc_fmt, *last_fmt, *this_fmt, *gmp_str;
+  int      retval = 0;
+  int      type, fchar, *value, seen_precision;
+  struct doprnt_params_t param;
+
+  TRACE (printf ("gmp_doprnt \"%s\"\n", orig_fmt));
+
+  /* Don't modify orig_ap, if va_list is actually an array and hence call by
+     reference.  It could be argued that it'd be more efficient to leave the
+     caller to make a copy if it cared, but doing so here is going to be a
+     very small part of the total work, and we may as well keep applications
+     out of trouble.  */
+  va_copy (ap, orig_ap);
+
+  /* The format string is chopped up into pieces to be passed to
+     funs->format.  Unfortunately that means it has to be copied so each
+     piece can be null-terminated.  We're not going to be very fast here, so
+     use __gmp_allocate_func rather than TMP_ALLOC, to avoid overflowing the
+     stack if a long output string is given.  */
+  alloc_fmt_size = strlen (orig_fmt) + 1;
+#if _LONG_LONG_LIMB
+  /* for a long long limb we change %Mx to %llx, so could need an extra 1
+     char for every 3 existing */
+  alloc_fmt_size += alloc_fmt_size / 3;
+#endif
+  alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
+  fmt = alloc_fmt;
+  strcpy (fmt, orig_fmt);
+
+  /* last_fmt and last_ap are just after the last output, and hence where
+     the next output will begin, when that's done */
+  last_fmt = fmt;
+  va_copy (last_ap, ap);
+
+  for (;;)
+    {
+      TRACE (printf ("next: \"%s\"\n", fmt));
+
+      fmt = strchr (fmt, '%');
+      if (fmt == NULL)
+       break;
+
+      /* this_fmt and this_ap are the current '%' sequence being considered */
+      this_fmt = fmt;
+      va_copy (this_ap, ap);
+      fmt++; /* skip the '%' */
+
+      TRACE (printf ("considering\n");
+            printf ("  last: \"%s\"\n", last_fmt);
+            printf ("  this: \"%s\"\n", this_fmt));
+
+      type = '\0';
+      value = &param.width;
+
+      param.base = 10;
+      param.conv = 0;
+      param.expfmt = "e%c%02ld";
+      param.exptimes4 = 0;
+      param.fill = ' ';
+      param.justify = DOPRNT_JUSTIFY_RIGHT;
+      param.prec = 6;
+      param.showbase = DOPRNT_SHOWBASE_NO;
+      param.showpoint = 0;
+      param.showtrailing = 1;
+      param.sign = '\0';
+      param.width = 0;
+      seen_precision = 0;
+
+      /* This loop parses a single % sequence.  "break" from the switch
+        means continue with this %, "goto next" means the conversion
+        character has been seen and a new % should be sought.  */
+      for (;;)
+       {
+         fchar = *fmt++;
+         if (fchar == '\0')
+           break;
+
+         switch (fchar) {
+
+         case 'a':
+           /* %a behaves like %e, but defaults to all significant digits,
+              and there's no leading zeros on the exponent (which is in
+              fact bit-based) */
+           param.base = 16;
+           param.expfmt = "p%c%ld";
+           goto conv_a;
+         case 'A':
+           param.base = -16;
+           param.expfmt = "P%c%ld";
+         conv_a:
+           param.conv = DOPRNT_CONV_SCIENTIFIC;
+           param.exptimes4 = 1;
+           if (! seen_precision)
+             param.prec = -1;  /* default to all digits */
+           param.showbase = DOPRNT_SHOWBASE_YES;
+           param.showtrailing = 1;
+           goto floating_a;
+
+         case 'c':
+           /* Let's assume wchar_t will be promoted to "int" in the call,
+              the same as char will be. */
+           (void) va_arg (ap, int);
+           goto next;
+
+         case 'd':
+         case 'i':
+         case 'u':
+         integer:
+           TRACE (printf ("integer, base=%d\n", param.base));
+           if (! seen_precision)
+             param.prec = -1;
+           switch (type) {
+           case 'j':
+             /* Let's assume uintmax_t is the same size as intmax_t. */
+#if HAVE_INTMAX_T
+             (void) va_arg (ap, intmax_t);
+#else
+             ASSERT_FAIL (intmax_t not available);
+#endif
+             break;
+           case 'l':
+             (void) va_arg (ap, long);
+             break;
+           case 'L':
+#if HAVE_LONG_LONG
+             (void) va_arg (ap, long long);
+#else
+             ASSERT_FAIL (long long not available);
+#endif
+             break;
+           case 'N':
+             {
+               mp_ptr     xp;
+               mp_size_t  xsize, abs_xsize;
+               mpz_t      z;
+               FLUSH ();
+               xp = va_arg (ap, mp_ptr);
+               PTR(z) = xp;
+               xsize = (int) va_arg (ap, mp_size_t);
+               abs_xsize = ABS (xsize);
+               MPN_NORMALIZE (xp, abs_xsize);
+               SIZ(z) = (xsize >= 0 ? abs_xsize : -abs_xsize);
+               ASSERT_CODE (ALLOC(z) = abs_xsize);
+               gmp_str = mpz_get_str (NULL, param.base, z);
+               goto gmp_integer;
+             }
+             /* break; */
+           case 'q':
+             /* quad_t is probably the same as long long, but let's treat
+                it separately just to be sure.  Also let's assume u_quad_t
+                will be the same size as quad_t.  */
+#if HAVE_QUAD_T
+             (void) va_arg (ap, quad_t);
+#else
+             ASSERT_FAIL (quad_t not available);
+#endif
+             break;
+           case 'Q':
+             FLUSH ();
+             gmp_str = mpq_get_str (NULL, param.base, va_arg(ap, mpq_srcptr));
+             goto gmp_integer;
+           case 't':
+#if HAVE_PTRDIFF_T
+             (void) va_arg (ap, ptrdiff_t);
+#else
+             ASSERT_FAIL (ptrdiff_t not available);
+#endif
+             break;
+           case 'z':
+             (void) va_arg (ap, size_t);
+             break;
+           case 'Z':
+             {
+               int   ret;
+               FLUSH ();
+               gmp_str = mpz_get_str (NULL, param.base,
+                                      va_arg (ap, mpz_srcptr));
+             gmp_integer:
+               ret = __gmp_doprnt_integer (funs, data, &param, gmp_str);
+               (*__gmp_free_func) (gmp_str, strlen(gmp_str)+1);
+               DOPRNT_ACCUMULATE (ret);
+               va_copy (last_ap, ap);
+               last_fmt = fmt;
+             }
+             break;
+           default:
+             /* default is an "int", and this includes h=short and hh=char
+                since they're promoted to int in a function call */
+             (void) va_arg (ap, int);
+             break;
+           }
+           goto next;
+
+         case 'E':
+           param.base = -10;
+           param.expfmt = "E%c%02ld";
+           /*FALLTHRU*/
+         case 'e':
+           param.conv = DOPRNT_CONV_SCIENTIFIC;
+         floating:
+           if (param.showbase == DOPRNT_SHOWBASE_NONZERO)
+             {
+               /* # in %e, %f and %g */
+               param.showpoint = 1;
+               param.showtrailing = 1;
+             }
+         floating_a:
+           switch (type) {
+           case 'F':
+             FLUSH ();
+             DOPRNT_ACCUMULATE (__gmp_doprnt_mpf (funs, data, &param,
+                                                  GMP_DECIMAL_POINT,
+                                                  va_arg (ap, mpf_srcptr)));
+             va_copy (last_ap, ap);
+             last_fmt = fmt;
+             break;
+           case 'L':
+#if HAVE_LONG_DOUBLE
+             (void) va_arg (ap, long double);
+#else
+             ASSERT_FAIL (long double not available);
+#endif
+             break;
+           default:
+             (void) va_arg (ap, double);
+             break;
+           }
+           goto next;
+
+         case 'f':
+           param.conv = DOPRNT_CONV_FIXED;
+           goto floating;
+
+         case 'F': /* mpf_t     */
+         case 'j': /* intmax_t  */
+         case 'L': /* long long */
+         case 'N': /* mpn       */
+         case 'q': /* quad_t    */
+         case 'Q': /* mpq_t     */
+         case 't': /* ptrdiff_t */
+         case 'z': /* size_t    */
+         case 'Z': /* mpz_t     */
+         set_type:
+           type = fchar;
+           break;
+
+         case 'G':
+           param.base = -10;
+           param.expfmt = "E%c%02ld";
+           /*FALLTHRU*/
+         case 'g':
+           param.conv = DOPRNT_CONV_GENERAL;
+           param.showtrailing = 0;
+           goto floating;
+
+         case 'h':
+           if (type != 'h')
+             goto set_type;
+           type = 'H';   /* internal code for "hh" */
+           break;
+
+         case 'l':
+           if (type != 'l')
+             goto set_type;
+           type = 'L';   /* "ll" means "L" */
+           break;
+
+         case 'm':
+           /* glibc strerror(errno), no argument */
+           goto next;
+
+         case 'M': /* mp_limb_t */
+           /* mung format string to l or ll and let plain printf handle it */
+#if _LONG_LONG_LIMB
+           memmove (fmt+1, fmt, strlen (fmt)+1);
+           fmt[-1] = 'l';
+           fmt[0] = 'l';
+           fmt++;
+           type = 'L';
+#else
+           fmt[-1] = 'l';
+           type = 'l';
+#endif
+           break;
+
+         case 'n':
+           {
+             void  *p;
+             FLUSH ();
+             p = va_arg (ap, void *);
+             switch (type) {
+             case '\0': * (int       *) p = retval; break;
+             case 'F':  mpf_set_si ((mpf_ptr) p, (long) retval); break;
+             case 'H':  * (char      *) p = retval; break;
+             case 'h':  * (short     *) p = retval; break;
+#if HAVE_INTMAX_T
+             case 'j':  * (intmax_t  *) p = retval; break;
+#else
+             case 'j':  ASSERT_FAIL (intmax_t not available); break;
+#endif
+             case 'l':  * (long      *) p = retval; break;
+#if HAVE_QUAD_T && HAVE_LONG_LONG
+             case 'q':
+               ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
+               /*FALLTHRU*/
+#else
+             case 'q':  ASSERT_FAIL (quad_t not available); break;
+#endif
+#if HAVE_LONG_LONG
+             case 'L':  * (long long *) p = retval; break;
+#else
+             case 'L':  ASSERT_FAIL (long long not available); break;
+#endif
+             case 'N':
+               {
+                 mp_size_t  n;
+                 n = va_arg (ap, mp_size_t);
+                 n = ABS (n);
+                 if (n != 0)
+                   {
+                     * (mp_ptr) p = retval;
+                     MPN_ZERO ((mp_ptr) p + 1, n - 1);
+                   }
+               }
+               break;
+             case 'Q':  mpq_set_si ((mpq_ptr) p, (long) retval, 1L); break;
+#if HAVE_PTRDIFF_T
+             case 't':  * (ptrdiff_t *) p = retval; break;
+#else
+             case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
+#endif
+             case 'z':  * (size_t    *) p = retval; break;
+             case 'Z':  mpz_set_si ((mpz_ptr) p, (long) retval); break;
+             }
+           }
+           va_copy (last_ap, ap);
+           last_fmt = fmt;
+           goto next;
+
+         case 'o':
+           param.base = 8;
+           goto integer;
+
+         case 'p':
+         case 's':
+           /* "void *" will be good enough for "char *" or "wchar_t *", no
+              need for separate code.  */
+           (void) va_arg (ap, const void *);
+           goto next;
+
+         case 'x':
+           param.base = 16;
+           goto integer;
+         case 'X':
+           param.base = -16;
+           goto integer;
+
+         case '%':
+           goto next;
+
+         case '#':
+           param.showbase = DOPRNT_SHOWBASE_NONZERO;
+           break;
+
+         case '\'':
+           /* glibc digit grouping, just pass it through, no support for it
+              on gmp types */
+           break;
+
+         case '+':
+         case ' ':
+           param.sign = fchar;
+           break;
+
+         case '-':
+           param.justify = DOPRNT_JUSTIFY_LEFT;
+           break;
+         case '.':
+           seen_precision = 1;
+           param.prec = -1; /* "." alone means all necessary digits */
+           value = &param.prec;
+           break;
+
+         case '*':
+           {
+             int n = va_arg (ap, int);
+
+             if (value == &param.width)
+               {
+                 /* negative width means left justify */
+                 if (n < 0)
+                   {
+                     param.justify = DOPRNT_JUSTIFY_LEFT;
+                     n = -n;
+                   }
+                 param.width = n;
+               }
+             else
+               {
+                 /* don't allow negative precision */
+                 param.prec = MAX (0, n);
+               }
+           }
+           break;
+
+         case '0':
+           if (value == &param.width)
+             {
+               /* in width field, set fill */
+               param.fill = '0';
+
+               /* for right justify, put the fill after any minus sign */
+               if (param.justify == DOPRNT_JUSTIFY_RIGHT)
+                 param.justify = DOPRNT_JUSTIFY_INTERNAL;
+             }
+           else
+             {
+               /* in precision field, set value */
+               *value = 0;
+             }
+           break;
+
+         case '1': case '2': case '3': case '4': case '5':
+         case '6': case '7': case '8': case '9':
+           /* process all digits to form a value */
+           {
+             int  n = 0;
+             do {
+               n = n * 10 + (fchar-'0');
+               fchar = *fmt++;
+             } while (isascii (fchar) && isdigit (fchar));
+             fmt--; /* unget the non-digit */
+             *value = n;
+           }
+           break;
+
+         default:
+           /* something invalid */
+           ASSERT (0);
+           goto next;
+         }
+       }
+
+    next:
+      /* Stop parsing the current "%" format, look for a new one. */
+      ;
+    }
+
+  TRACE (printf ("remainder: \"%s\"\n", last_fmt));
+  if (*last_fmt != '\0')
+    DOPRNT_FORMAT (last_fmt, last_ap);
+
+  if (funs->final != NULL)
+    if ((*funs->final) (data) == -1)
+      goto error;
+
+ done:
+  (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
+  return retval;
+
+ error:
+  retval = -1;
+  goto done;
+}
diff --git a/printf/doprntf.c b/printf/doprntf.c

new file mode 100644 (file)

index 0000000..0a1928b
--- /dev/null
+++ b/printf/doprntf.c
@@ -0,0 +1,385 @@
+/* __gmp_doprnt_mpf -- mpf formatted output.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#else
+#include <varargs.h>
+#endif
+
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* change this to "#define TRACE(x) x" for diagnostics */
+#define TRACE(x)
+
+
+/* The separate of __gmp_doprnt_float_digits and __gmp_doprnt_float is so
+   some C++ can do the mpf_get_str and release it in case of an exception */
+
+#define DIGIT_VALUE(c)                  \
+  (isdigit (c)   ? (c) - '0'            \
+   : islower (c) ? (c) - 'a' + 10       \
+   :               (c) - 'A' + 10)
+
+int
+__gmp_doprnt_mpf (const struct doprnt_funs_t *funs,
+                 void *data,
+                 const struct doprnt_params_t *p,
+                 const char *point,
+                 mpf_srcptr f)
+{
+  int         prec, ndigits, free_size, len, newlen, justify, justlen, explen;
+  int         showbaselen, sign, signlen, intlen, intzeros, pointlen;
+  int         fraczeros, fraclen, preczeros;
+  char        *s, *free_ptr;
+  mp_exp_t    exp;
+  char        exponent[GMP_LIMB_BITS + 10];
+  const char  *showbase;
+  int         retval = 0;
+
+  TRACE (printf ("__gmp_doprnt_float\n");
+        printf ("  conv=%d prec=%d\n", p->conv, p->prec));
+
+  prec = p->prec;
+  if (prec <= -1)
+    {
+      /* all digits */
+      ndigits = 0;
+
+      /* arrange the fixed/scientific decision on a "prec" implied by how
+        many significant digits there are */
+      if (p->conv == DOPRNT_CONV_GENERAL)
+       MPF_SIGNIFICANT_DIGITS (prec, PREC(f), ABS(p->base));
+    }
+  else
+    {
+      switch (p->conv) {
+      case DOPRNT_CONV_FIXED:
+       /* Precision is digits after the radix point.  Try not to generate
+          too many more than will actually be required.  If f>=1 then
+          overestimate the integer part, and add prec.  If f<1 then
+          underestimate the zeros between the radix point and the first
+          digit and subtract that from prec.  In either case add 2 so the
+          round to nearest can be applied accurately.  Finally, we add 1 to
+          handle the case of 1-eps where EXP(f) = 0 but mpf_get_str returns
+          exp as 1.  */
+       ndigits = prec + 2 + 1
+         + EXP(f) * (mp_bases[ABS(p->base)].chars_per_limb + (EXP(f)>=0));
+       ndigits = MAX (ndigits, 1);
+       break;
+
+      case DOPRNT_CONV_SCIENTIFIC:
+       /* precision is digits after the radix point, and there's one digit
+          before */
+       ndigits = prec + 1;
+       break;
+
+      default:
+       ASSERT (0);
+       /*FALLTHRU*/
+
+      case DOPRNT_CONV_GENERAL:
+       /* precision is total digits, but be sure to ask mpf_get_str for at
+          least 1, not 0 */
+       ndigits = MAX (prec, 1);
+       break;
+      }
+    }
+  TRACE (printf ("  ndigits %d\n", ndigits));
+
+  s = mpf_get_str (NULL, &exp, p->base, ndigits, f);
+  len = strlen (s);
+  free_ptr = s;
+  free_size = len + 1;
+  TRACE (printf ("  s   %s\n", s);
+        printf ("  exp %ld\n", exp);
+        printf ("  len %d\n", len));
+
+  /* For fixed mode check the ndigits formed above was in fact enough for
+     the integer part plus p->prec after the radix point. */
+  ASSERT ((p->conv == DOPRNT_CONV_FIXED && p->prec > -1)
+         ? ndigits >= MAX (1, exp + p->prec + 2) : 1);
+
+  sign = p->sign;
+  if (s[0] == '-')
+    {
+      sign = s[0];
+      s++, len--;
+    }
+  signlen = (sign != '\0');
+  TRACE (printf ("  sign %c  signlen %d\n", sign, signlen));
+
+  switch (p->conv) {
+  case DOPRNT_CONV_FIXED:
+    if (prec <= -1)
+      prec = MAX (0, len-exp);   /* retain all digits */
+
+    /* Truncate if necessary so fraction will be at most prec digits. */
+    ASSERT (prec >= 0);
+    newlen = exp + prec;
+    if (newlen < 0)
+      {
+       /* first non-zero digit is below target prec, and at least one zero
+          digit in between, so print zero */
+       len = 0;
+       exp = 0;
+      }
+    else if (len <= newlen)
+      {
+       /* already got few enough digits */
+      }
+    else
+      {
+       /* discard excess digits and round to nearest */
+
+       const char  *num_to_text = (p->base >= 0
+                                   ? "0123456789abcdefghijklmnopqrstuvwxyz"
+                                   : "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+       int  base = ABS(p->base);
+       int  n;
+
+       ASSERT (base <= 36);
+
+       len = newlen;
+       n = DIGIT_VALUE (s[len]);
+       TRACE (printf ("  rounding with %d\n", n));
+       if (n >= (base + 1) / 2)
+         {
+           /* propagate a carry */
+           for (;;)
+             {
+               if (len == 0)
+                 {
+                   s[0] = '1';
+                   len = 1;
+                   exp++;
+                   break;
+                 }
+               n = DIGIT_VALUE (s[len-1]);
+               ASSERT (n >= 0 && n < base);
+               n++;
+               if (n != base)
+                 {
+                   TRACE (printf ("  storing now %d\n", n));
+                   s[len-1] = num_to_text[n];
+                   break;
+                 }
+               len--;
+             }
+         }
+       else
+         {
+           /* truncate only, strip any trailing zeros now exposed */
+           while (len > 0 && s[len-1] == '0')
+             len--;
+         }
+
+       /* Can have newlen==0, in which case the truncate was just to check
+          for a carry turning it into "1".  If we're left with len==0 then
+          adjust exp to match.  */
+       if (len == 0)
+         exp = 0;
+      }
+
+  fixed:
+    ASSERT (len == 0 ? exp == 0 : 1);
+    if (exp <= 0)
+      {
+       TRACE (printf ("  fixed 0.000sss\n"));
+       intlen = 0;
+       intzeros = 1;
+       fraczeros = -exp;
+       fraclen = len;
+      }
+    else
+      {
+       TRACE (printf ("  fixed sss.sss or sss000\n"));
+       intlen = MIN (len, exp);
+       intzeros = exp - intlen;
+       fraczeros = 0;
+       fraclen = len - intlen;
+      }
+    explen = 0;
+    break;
+
+  case DOPRNT_CONV_SCIENTIFIC:
+    {
+      long int expval;
+      char  expsign;
+
+      if (prec <= -1)
+       prec = MAX (0, len-1);   /* retain all digits */
+
+    scientific:
+      TRACE (printf ("  scientific s.sss\n"));
+
+      intlen = MIN (1, len);
+      intzeros = (intlen == 0 ? 1 : 0);
+      fraczeros = 0;
+      fraclen = len - intlen;
+
+      expval = (exp-intlen);
+      if (p->exptimes4)
+       expval <<= 2;
+
+      /* Split out the sign since %o or %x in expfmt give negatives as twos
+        complement, not with a sign. */
+      expsign = (expval >= 0 ? '+' : '-');
+      expval = ABS (expval);
+
+#if HAVE_VSNPRINTF
+      explen = snprintf (exponent, sizeof(exponent),
+                        p->expfmt, expsign, expval);
+      /* test for < sizeof-1 since a glibc 2.0.x return of sizeof-1 might
+        mean truncation */
+      ASSERT (explen >= 0 && explen < sizeof(exponent)-1);
+#else
+      sprintf (exponent, p->expfmt, expsign, expval);
+      explen = strlen (exponent);
+      ASSERT (explen < sizeof(exponent));
+#endif
+      TRACE (printf ("  expfmt %s gives %s\n", p->expfmt, exponent));
+    }
+    break;
+
+  default:
+    ASSERT (0);
+    /*FALLTHRU*/  /* to stop variables looking uninitialized */
+
+  case DOPRNT_CONV_GENERAL:
+    /* The exponent for "scientific" will be exp-1, choose scientific if
+       this is < -4 or >= prec (and minimum 1 for prec).  For f==0 will have
+       exp==0 and get the desired "fixed".  This rule follows glibc.  For
+       fixed there's no need to truncate, the desired ndigits will already
+       be as required.  */
+    if (exp-1 < -4 || exp-1 >= MAX (1, prec))
+      goto scientific;
+    else
+      goto fixed;
+  }
+
+  TRACE (printf ("  intlen %d intzeros %d fraczeros %d fraclen %d\n",
+                intlen, intzeros, fraczeros, fraclen));
+  ASSERT (p->prec <= -1
+         ? intlen + fraclen == strlen (s)
+         : intlen + fraclen <= strlen (s));
+
+  if (p->showtrailing)
+    {
+      /* Pad to requested precision with trailing zeros, for general this is
+        all digits, for fixed and scientific just the fraction.  */
+      preczeros = prec - (fraczeros + fraclen
+                         + (p->conv == DOPRNT_CONV_GENERAL
+                            ? intlen + intzeros : 0));
+      preczeros = MAX (0, preczeros);
+    }
+  else
+    preczeros = 0;
+  TRACE (printf ("  prec=%d showtrailing=%d, pad with preczeros %d\n",
+                prec, p->showtrailing, preczeros));
+
+  /* radix point if needed, or if forced */
+  pointlen = ((fraczeros + fraclen + preczeros) != 0 || p->showpoint != 0)
+    ? strlen (point) : 0;
+  TRACE (printf ("  point |%s|  pointlen %d\n", point, pointlen));
+
+  /* Notice the test for a non-zero value is done after any truncation for
+     DOPRNT_CONV_FIXED. */
+  showbase = NULL;
+  showbaselen = 0;
+  switch (p->showbase) {
+  default:
+    ASSERT (0);
+    /*FALLTHRU*/
+  case DOPRNT_SHOWBASE_NO:
+    break;
+  case DOPRNT_SHOWBASE_NONZERO:
+    if (intlen == 0 && fraclen == 0)
+      break;
+    /*FALLTHRU*/
+  case DOPRNT_SHOWBASE_YES:
+    switch (p->base) {
+    case 16:  showbase = "0x"; showbaselen = 2; break;
+    case -16: showbase = "0X"; showbaselen = 2; break;
+    case 8:   showbase = "0";  showbaselen = 1; break;
+    }
+    break;
+  }
+  TRACE (printf ("  showbase %s showbaselen %d\n",
+                showbase == NULL ? "" : showbase, showbaselen));
+
+  /* left over field width */
+  justlen = p->width - (signlen + showbaselen + intlen + intzeros + pointlen
+                       + fraczeros + fraclen + preczeros + explen);
+  TRACE (printf ("  justlen %d fill 0x%X\n", justlen, p->fill));
+
+  justify = p->justify;
+  if (justlen <= 0) /* no justifying if exceed width */
+    justify = DOPRNT_JUSTIFY_NONE;
+
+  TRACE (printf ("  justify type %d  intlen %d pointlen %d fraclen %d\n",
+                justify, intlen, pointlen, fraclen));
+
+  if (justify == DOPRNT_JUSTIFY_RIGHT)         /* pad for right */
+    DOPRNT_REPS (p->fill, justlen);
+
+  if (signlen)                                 /* sign */
+    DOPRNT_REPS (sign, 1);
+
+  DOPRNT_MEMORY_MAYBE (showbase, showbaselen); /* base */
+
+  if (justify == DOPRNT_JUSTIFY_INTERNAL)      /* pad for internal */
+    DOPRNT_REPS (p->fill, justlen);
+
+  DOPRNT_MEMORY (s, intlen);                   /* integer */
+  DOPRNT_REPS_MAYBE ('0', intzeros);
+
+  DOPRNT_MEMORY_MAYBE (point, pointlen);       /* point */
+
+  DOPRNT_REPS_MAYBE ('0', fraczeros);          /* frac */
+  DOPRNT_MEMORY_MAYBE (s+intlen, fraclen);
+
+  DOPRNT_REPS_MAYBE ('0', preczeros);          /* prec */
+
+  DOPRNT_MEMORY_MAYBE (exponent, explen);      /* exp */
+
+  if (justify == DOPRNT_JUSTIFY_LEFT)          /* pad for left */
+    DOPRNT_REPS (p->fill, justlen);
+
+ done:
+  (*__gmp_free_func) (free_ptr, free_size);
+  return retval;
+
+ error:
+  retval = -1;
+  goto done;
+}
diff --git a/printf/doprnti.c b/printf/doprnti.c

new file mode 100644 (file)

index 0000000..df474df
--- /dev/null
+++ b/printf/doprnti.c
@@ -0,0 +1,133 @@
+/* __gmp_doprnt_integer -- integer style formatted output.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>    /* for va_list and hence doprnt_funs_t */
+#else
+#include <varargs.h>
+#endif
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+__gmp_doprnt_integer (const struct doprnt_funs_t *funs,
+                     void *data,
+                     const struct doprnt_params_t *p,
+                     const char *s)
+{
+  int         retval = 0;
+  int         slen, justlen, showbaselen, sign, signlen, slashlen, zeros;
+  int         justify, den_showbaselen;
+  const char  *slash, *showbase;
+
+  /* '+' or ' ' if wanted, and don't already have '-' */
+  sign = p->sign;
+  if (s[0] == '-')
+    {
+      sign = s[0];
+      s++;
+    }
+  signlen = (sign != '\0');
+
+  /* if the precision was explicitly 0, print nothing for a 0 value */
+  if (*s == '0' && p->prec == 0)
+    s++;
+
+  slen = strlen (s);
+  slash = strchr (s, '/');
+
+  showbase = NULL;
+  showbaselen = 0;
+
+  if (p->showbase != DOPRNT_SHOWBASE_NO)
+    {
+      switch (p->base) {
+      case 16:  showbase = "0x"; showbaselen = 2; break;
+      case -16: showbase = "0X"; showbaselen = 2; break;
+      case 8:   showbase = "0";  showbaselen = 1; break;
+      }
+    }
+
+  den_showbaselen = showbaselen;
+  if (slash == NULL
+      || (p->showbase == DOPRNT_SHOWBASE_NONZERO && slash[1] == '0'))
+    den_showbaselen = 0;
+
+  if (p->showbase == DOPRNT_SHOWBASE_NONZERO && s[0] == '0')
+    showbaselen = 0;
+
+  /* the influence of p->prec on mpq is currently undefined */
+  zeros = MAX (0, p->prec - slen);
+
+  /* space left over after actual output length */
+  justlen = p->width
+    - (strlen(s) + signlen + showbaselen + den_showbaselen + zeros);
+
+  justify = p->justify;
+  if (justlen <= 0) /* no justifying if exceed width */
+    justify = DOPRNT_JUSTIFY_NONE;
+
+  if (justify == DOPRNT_JUSTIFY_RIGHT)             /* pad right */
+    DOPRNT_REPS (p->fill, justlen);
+
+  DOPRNT_REPS_MAYBE (sign, signlen);               /* sign */
+
+  DOPRNT_MEMORY_MAYBE (showbase, showbaselen);     /* base */
+
+  DOPRNT_REPS_MAYBE ('0', zeros);                  /* zeros */
+
+  if (justify == DOPRNT_JUSTIFY_INTERNAL)          /* pad internal */
+    DOPRNT_REPS (p->fill, justlen);
+
+  /* if there's a showbase on the denominator, then print the numerator
+     separately so it can be inserted */
+  if (den_showbaselen != 0)
+    {
+      ASSERT (slash != NULL);
+      slashlen = slash+1 - s;
+      DOPRNT_MEMORY (s, slashlen);                 /* numerator and slash */
+      slen -= slashlen;
+      s += slashlen;
+      DOPRNT_MEMORY (showbase, den_showbaselen);
+    }
+
+  DOPRNT_MEMORY (s, slen);                         /* number, or denominator */
+
+  if (justify == DOPRNT_JUSTIFY_LEFT)              /* pad left */
+    DOPRNT_REPS (p->fill, justlen);
+
+ done:
+  return retval;
+
+ error:
+  retval = -1;
+  goto done;
+}
diff --git a/printf/fprintf.c b/printf/fprintf.c

new file mode 100644 (file)

index 0000000..dba30b9
--- /dev/null
+++ b/printf/fprintf.c
@@ -0,0 +1,58 @@
+/* gmp_fprintf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_fprintf (FILE *fp, const char *fmt, ...)
+#else
+gmp_fprintf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  FILE       *fp;
+  const char *fmt;
+  va_start (ap);
+  fp = va_arg (ap, FILE *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ret = __gmp_doprnt (&__gmp_fprintf_funs, fp, fmt, ap);
+  va_end (ap);
+  return ret;
+}
diff --git a/printf/obprintf.c b/printf/obprintf.c

new file mode 100644 (file)

index 0000000..51ad703
--- /dev/null
+++ b/printf/obprintf.c
@@ -0,0 +1,66 @@
+/* gmp_obstack_printf -- formatted output to an obstack.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_OBSTACK_VPRINTF
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <obstack.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_obstack_printf (struct obstack *ob, const char *fmt, ...)
+#else
+gmp_obstack_printf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  struct obstack *ob;
+  const char     *fmt;
+  va_start (ap);
+  ob = va_arg (ap, struct obstack *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ASSERT (! MEM_OVERLAP_P (obstack_base(ob), obstack_object_size(ob),
+                           fmt, strlen(fmt)+1));
+
+  ret = __gmp_doprnt (&__gmp_obstack_printf_funs, ob, fmt, ap);
+  va_end (ap);
+  return ret;
+}
+
+#endif /* HAVE_OBSTACK_VPRINTF */
diff --git a/printf/obprntffuns.c b/printf/obprntffuns.c

new file mode 100644 (file)

index 0000000..092e6cc
--- /dev/null
+++ b/printf/obprntffuns.c
@@ -0,0 +1,66 @@
+/* __gmp_obstack_printf_funs -- support for gmp_obstack_printf and
+   gmp_obstack_vprintf.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_OBSTACK_VPRINTF
+
+#define _GNU_SOURCE   /* ask glibc <stdio.h> for obstack_vprintf */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>    /* for obstack_vprintf */
+#include <string.h>
+#include <obstack.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+static int
+gmp_obstack_memory (struct obstack *ob, const char *ptr, size_t len)
+{
+  obstack_grow (ob, ptr, len);
+  return len;
+}
+
+static int
+gmp_obstack_reps (struct obstack *ob, int c, int reps)
+{
+  obstack_blank (ob, reps);
+  memset ((char *) obstack_next_free(ob) - reps, c, reps);
+  return reps;
+}
+
+const struct doprnt_funs_t  __gmp_obstack_printf_funs = {
+  (doprnt_format_t) obstack_vprintf,
+  (doprnt_memory_t) gmp_obstack_memory,
+  (doprnt_reps_t)   gmp_obstack_reps
+};
+
+#endif /* HAVE_OBSTACK_VPRINTF */
diff --git a/printf/obvprintf.c b/printf/obvprintf.c

new file mode 100644 (file)

index 0000000..4e3712f
--- /dev/null
+++ b/printf/obvprintf.c
@@ -0,0 +1,46 @@
+/* gmp_obstack_vprintf -- formatted output to an obstack.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_OBSTACK_VPRINTF
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <obstack.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_obstack_vprintf (struct obstack *ob, const char *fmt, va_list ap)
+{
+  ASSERT (! MEM_OVERLAP_P (obstack_base(ob), obstack_object_size(ob),
+                           fmt, strlen(fmt)+1));
+
+  return __gmp_doprnt (&__gmp_obstack_printf_funs, ob, fmt, ap);
+}
+
+#endif /* HAVE_OBSTACK_VPRINTF */
diff --git a/printf/printf.c b/printf/printf.c

new file mode 100644 (file)

index 0000000..bcb0acb
--- /dev/null
+++ b/printf/printf.c
@@ -0,0 +1,56 @@
+/* gmp_printf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_printf (const char *fmt, ...)
+#else
+gmp_printf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  const char *fmt;
+  va_start (ap);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ret = __gmp_doprnt (&__gmp_fprintf_funs, stdout, fmt, ap);
+  va_end (ap);
+  return ret;
+}
diff --git a/printf/printffuns.c b/printf/printffuns.c

new file mode 100644 (file)

index 0000000..4f4e74d
--- /dev/null
+++ b/printf/printffuns.c
@@ -0,0 +1,76 @@
+/* __gmp_fprintf_funs -- support for formatted output to FILEs.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* SunOS 4 stdio.h doesn't provide a prototype for this */
+#if ! HAVE_DECL_VFPRINTF
+int vfprintf __GMP_PROTO ((FILE *, const char *, va_list));
+#endif
+
+
+static int
+gmp_fprintf_memory (FILE *fp, const char *str, size_t len)
+{
+  return fwrite (str, 1, len, fp);
+}
+
+/* glibc putc is a function, at least when it's in multi-threaded mode or
+   some such, so fwrite chunks instead of making many calls. */
+static int
+gmp_fprintf_reps (FILE *fp, int c, int reps)
+{
+  char  buf[256];
+  int   i, piece, ret;
+  ASSERT (reps >= 0);
+
+  memset (buf, c, MIN (reps, sizeof (buf)));
+  for (i = reps; i > 0; i -= sizeof (buf))
+    {
+      piece = MIN (i, sizeof (buf));
+      ret = fwrite (buf, 1, piece, fp);
+      if (ret == -1)
+        return ret;
+      ASSERT (ret == piece);
+    }
+
+  return reps;
+}
+
+const struct doprnt_funs_t  __gmp_fprintf_funs = {
+  (doprnt_format_t) vfprintf,
+  (doprnt_memory_t) gmp_fprintf_memory,
+  (doprnt_reps_t)   gmp_fprintf_reps,
+};
diff --git a/printf/repl-vsnprintf.c b/printf/repl-vsnprintf.c

new file mode 100644 (file)

index 0000000..06809dc
--- /dev/null
+++ b/printf/repl-vsnprintf.c
@@ -0,0 +1,389 @@
+/* __gmp_replacement_vsnprintf -- for systems which don't have vsnprintf, or
+   only have a broken one.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if ! HAVE_VSNPRINTF   /* only need this file if we don't have vsnprintf */
+
+
+#define _GNU_SOURCE    /* for strnlen prototype */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <ctype.h>     /* for isdigit */
+#include <stddef.h>    /* for ptrdiff_t */
+#include <string.h>
+#include <stdio.h>     /* for NULL */
+#include <stdlib.h>
+
+#if HAVE_FLOAT_H
+#include <float.h>     /* for DBL_MAX_10_EXP etc */
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h> /* for quad_t */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Autoconf notes that AIX 4.3 has a broken strnlen, but fortunately it
+   doesn't affect us since __gmp_replacement_vsnprintf is not required on
+   that system.  */
+#if ! HAVE_STRNLEN
+static size_t
+strnlen (const char *s, size_t n)
+{
+  size_t  i;
+  for (i = 0; i < n; i++)
+    if (s[i] == '\0')
+      break;
+  return i;
+}
+#endif
+
+
+/* The approach here is to parse the fmt string, and decide how much space
+   it requires, then use vsprintf into a big enough buffer.  The space
+   calculated isn't an exact amount, but it's certainly no less than
+   required.
+
+   This code was inspired by GNU libiberty/vasprintf.c but we support more
+   datatypes, when available.
+
+   mingw32 - doesn't have vsnprintf, it seems.  Because gcc is used a full
+       set of types are available, but "long double" is just a plain IEEE
+       64-bit "double" and LDBL_MAX_EXP_10 is correspondingly defined, so we
+       avoid the big 15-bit exponent estimate.  */
+
+int
+__gmp_replacement_vsnprintf (char *buf, size_t buf_size,
+                            const char *orig_fmt, va_list orig_ap)
+{
+  va_list     ap;
+  const char  *fmt;
+  size_t      total_width, integer_sizeof, floating_sizeof, len;
+  char        fchar, type;
+  int         width, prec, seen_prec, double_digits, long_double_digits;
+  int         *value;
+
+  /* preserve orig_ap for use after size estimation */
+  va_copy (ap, orig_ap);
+
+  fmt = orig_fmt;
+  total_width = strlen (fmt) + 1;   /* 1 extra for the '\0' */
+
+  integer_sizeof = sizeof (long);
+#if HAVE_LONG_LONG
+  integer_sizeof = MAX (integer_sizeof, sizeof (long long));
+#endif
+#if HAVE_QUAD_T
+  integer_sizeof = MAX (integer_sizeof, sizeof (quad_t));
+#endif
+
+  floating_sizeof = sizeof (double);
+#if HAVE_LONG_DOUBLE
+  floating_sizeof = MAX (floating_sizeof, sizeof (long double));
+#endif
+
+  /* IEEE double or VAX G floats have an 11 bit exponent, so the default is
+     a maximum 308 decimal digits.  VAX D floats have only an 8 bit
+     exponent, but we don't bother trying to detect that directly.  */
+  double_digits = 308;
+#ifdef DBL_MAX_10_EXP
+  /* but in any case prefer a value the compiler says */
+  double_digits = DBL_MAX_10_EXP;
+#endif
+
+  /* IEEE 128-bit quad, Intel 80-bit temporary, or VAX H floats all have 15
+     bit exponents, so the default is a maximum 4932 decimal digits.  */
+  long_double_digits = 4932;
+  /* but if double == long double, then go with that size */
+#if HAVE_LONG_DOUBLE
+  if (sizeof (double) == sizeof (long double))
+    long_double_digits = double_digits;
+#endif
+#ifdef LDBL_MAX_10_EXP
+  /* but in any case prefer a value the compiler says */
+  long_double_digits = LDBL_MAX_10_EXP;
+#endif
+
+  for (;;)
+    {
+      fmt = strchr (fmt, '%');
+      if (fmt == NULL)
+       break;
+      fmt++;
+
+      type = '\0';
+      width = 0;
+      prec = 6;
+      seen_prec = 0;
+      value = &width;
+
+      for (;;)
+       {
+         fchar = *fmt++;
+         switch (fchar) {
+
+         case 'c':
+           /* char, already accounted for by strlen(fmt) */
+           goto next;
+
+         case 'd':
+         case 'i':
+         case 'o':
+         case 'x':
+         case 'X':
+         case 'u':
+           /* at most 3 digits per byte in hex, dec or octal, plus a sign */
+           total_width += 3 * integer_sizeof + 1;
+
+           switch (type) {
+           case 'j':
+             /* Let's assume uintmax_t is the same size as intmax_t. */
+#if HAVE_INTMAX_T
+             (void) va_arg (ap, intmax_t);
+#else
+             ASSERT_FAIL (intmax_t not available);
+#endif
+             break;
+           case 'l':
+             (void) va_arg (ap, long);
+             break;
+           case 'L':
+#if HAVE_LONG_LONG
+             (void) va_arg (ap, long long);
+#else
+             ASSERT_FAIL (long long not available);
+#endif
+             break;
+           case 'q':
+             /* quad_t is probably the same as long long, but let's treat
+                it separately just to be sure.  Also let's assume u_quad_t
+                will be the same size as quad_t.  */
+#if HAVE_QUAD_T
+             (void) va_arg (ap, quad_t);
+#else
+             ASSERT_FAIL (quad_t not available);
+#endif
+             break;
+           case 't':
+#if HAVE_PTRDIFF_T
+             (void) va_arg (ap, ptrdiff_t);
+#else
+             ASSERT_FAIL (ptrdiff_t not available);
+#endif
+             break;
+           case 'z':
+             (void) va_arg (ap, size_t);
+             break;
+           default:
+             /* default is an "int", and this includes h=short and hh=char
+                since they're promoted to int in a function call */
+             (void) va_arg (ap, int);
+             break;
+           }
+           goto next;
+
+         case 'E':
+         case 'e':
+         case 'G':
+         case 'g':
+           /* Requested decimals, sign, point and e, plus an overestimate
+              of exponent digits (the assumption is all the float is
+              exponent!).  */
+           total_width += prec + 3 + floating_sizeof * 3;
+           if (type == 'L')
+             {
+#if HAVE_LONG_DOUBLE
+               (void) va_arg (ap, long double);
+#else
+               ASSERT_FAIL (long double not available);
+#endif
+             }
+           else
+             (void) va_arg (ap, double);
+           break;
+
+         case 'f':
+           /* Requested decimals, sign and point, and a margin for error,
+              then add the maximum digits that can be in the integer part,
+              based on the maximum exponent value. */
+           total_width += prec + 2 + 10;
+           if (type == 'L')
+             {
+#if HAVE_LONG_DOUBLE
+               (void) va_arg (ap, long double);
+               total_width += long_double_digits;
+#else
+               ASSERT_FAIL (long double not available);
+#endif
+             }
+           else
+             {
+               (void) va_arg (ap, double);
+               total_width += double_digits;
+             }
+           break;
+
+         case 'h':  /* short or char */
+         case 'j':  /* intmax_t */
+         case 'L':  /* long long or long double */
+         case 'q':  /* quad_t */
+         case 't':  /* ptrdiff_t */
+         set_type:
+           type = fchar;
+           break;
+
+         case 'l':
+           /* long or long long */
+           if (type != 'l')
+             goto set_type;
+           type = 'L';   /* "ll" means "L" */
+           break;
+
+         case 'n':
+           /* bytes written, no output as such */
+           (void) va_arg (ap, void *);
+           goto next;
+
+         case 's':
+           /* If no precision was given, then determine the string length
+              and put it there, to be added to the total under "next".  If
+              a precision was given then that's already the maximum from
+              this field, but see whether the string is shorter than that,
+              in case the limit was very big.  */
+           {
+             const char  *s = va_arg (ap, const char *);
+             prec = (seen_prec ? strnlen (s, prec) : strlen (s));
+           }
+           goto next;
+
+         case 'p':
+           /* pointer, let's assume at worst it's octal with some padding */
+           (void) va_arg (ap, const void *);
+           total_width += 3 * sizeof (void *) + 16;
+           goto next;
+
+         case '%':
+           /* literal %, already accounted for by strlen(fmt) */
+           goto next;
+
+         case '#':
+           /* showbase, at most 2 for "0x" */
+           total_width += 2;
+           break;
+
+         case '+':
+         case ' ':
+           /* sign, already accounted for under numerics */
+           break;
+
+         case '-':
+           /* left justify, no effect on total width */
+           break;
+
+         case '.':
+           seen_prec = 1;
+           value = &prec;
+           break;
+
+         case '*':
+           {
+             /* negative width means left justify which can be ignored,
+                negative prec would be invalid, just use absolute value */
+             int n = va_arg (ap, int);
+             *value = ABS (n);
+           }
+           break;
+
+         case '0': case '1': case '2': case '3': case '4':
+         case '5': case '6': case '7': case '8': case '9':
+           /* process all digits to form a value */
+           {
+             int  n = 0;
+             do {
+               n = n * 10 + (fchar-'0');
+               fchar = *fmt++;
+             } while (isascii (fchar) && isdigit (fchar));
+             fmt--; /* unget the non-digit */
+             *value = n;
+           }
+           break;
+
+         default:
+           /* incomplete or invalid % sequence */
+           ASSERT (0);
+           goto next;
+         }
+       }
+
+    next:
+      total_width += width;
+      total_width += prec;
+    }
+
+  if (total_width <= buf_size)
+    {
+      vsprintf (buf, orig_fmt, orig_ap);
+      len = strlen (buf);
+    }
+  else
+    {
+      char  *s;
+
+      s = __GMP_ALLOCATE_FUNC_TYPE (total_width, char);
+      vsprintf (s, orig_fmt, orig_ap);
+      len = strlen (s);
+      if (buf_size != 0)
+       {
+         size_t  copylen = MIN (len, buf_size-1);
+         memcpy (buf, s, copylen);
+         buf[copylen] = '\0';
+       }
+      (*__gmp_free_func) (s, total_width);
+    }
+
+  /* If total_width was somehow wrong then chances are we've already
+     clobbered memory, but maybe this check will still work.  */
+  ASSERT_ALWAYS (len < total_width);
+
+  return len;
+}
+
+#endif /* ! HAVE_VSNPRINTF */
diff --git a/printf/snprintf.c b/printf/snprintf.c

new file mode 100644 (file)

index 0000000..88afc32
--- /dev/null
+++ b/printf/snprintf.c
@@ -0,0 +1,64 @@
+/* gmp_snprintf -- formatted output to an fixed size buffer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <string.h>    /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_snprintf (char *buf, size_t size, const char *fmt, ...)
+#else
+gmp_snprintf (va_alist)
+     va_dcl
+#endif
+{
+  struct gmp_snprintf_t d;
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+  d.buf = buf;
+  d.size = size;
+
+#else
+  const char *fmt;
+  va_start (ap);
+  d.buf = va_arg (ap, char *);
+  d.size = va_arg (ap, size_t);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ASSERT (! MEM_OVERLAP_P (buf, size, fmt, strlen(fmt)+1));
+
+  ret = __gmp_doprnt (&__gmp_snprintf_funs, &d, fmt, ap);
+  va_end (ap);
+  return ret;
+}
diff --git a/printf/snprntffuns.c b/printf/snprntffuns.c

new file mode 100644 (file)

index 0000000..340ee05
--- /dev/null
+++ b/printf/snprntffuns.c
@@ -0,0 +1,156 @@
+/* __gmp_snprintf_funs -- support for gmp_snprintf and gmp_vsnprintf.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if ! HAVE_VSNPRINTF
+#define vsnprintf  __gmp_replacement_vsnprintf
+#endif
+
+
+/* glibc 2.0.x vsnprintf returns either -1 or size-1 for an overflow, with
+   no indication how big the output would have been.  It's necessary to
+   re-run to determine that size.
+
+   "size-1" would mean success from a C99 vsnprintf, and the re-run is
+   unnecessary in this case, but we don't bother to try to detect what sort
+   of vsnprintf we've got.  size-1 should occur rarely in normal
+   circumstances.
+
+   vsnprintf might trash it's given ap (it does for instance in glibc 2.1.3
+   on powerpc), so copy it in case we need to use it to probe for the size
+   output that would have been produced.  Note there's no need to preserve
+   it for our callers, just for ourselves.  */
+
+static int
+gmp_snprintf_format (struct gmp_snprintf_t *d, const char *fmt,
+                     va_list orig_ap)
+{
+  int      ret, step, alloc, avail;
+  va_list  ap;
+  char     *p;
+
+  ASSERT (d->size >= 0);
+
+  avail = d->size;
+  if (avail > 1)
+    {
+      va_copy (ap, orig_ap);
+      ret = vsnprintf (d->buf, avail, fmt, ap);
+      if (ret == -1)
+        {
+          ASSERT (strlen (d->buf) == avail-1);
+          ret = avail-1;
+        }
+
+      step = MIN (ret, avail-1);
+      d->size -= step;
+      d->buf += step;
+
+      if (ret != avail-1)
+        return ret;
+
+      /* probably glibc 2.0.x truncated output, probe for actual size */
+      alloc = MAX (128, ret);
+    }
+  else
+    {
+      /* no space to write anything, just probe for size */
+      alloc = 128;
+    }
+
+  do
+    {
+      alloc *= 2;
+      p = __GMP_ALLOCATE_FUNC_TYPE (alloc, char);
+      va_copy (ap, orig_ap);
+      ret = vsnprintf (p, alloc, fmt, ap);
+      (*__gmp_free_func) (p, alloc);
+    }
+  while (ret == alloc-1 || ret == -1);
+
+  return ret;
+}
+
+static int
+gmp_snprintf_memory (struct gmp_snprintf_t *d, const char *str, size_t len)
+{
+  size_t n;
+
+  ASSERT (d->size >= 0);
+
+  if (d->size > 1)
+    {
+      n = MIN (d->size-1, len);
+      memcpy (d->buf, str, n);
+      d->buf += n;
+      d->size -= n;
+    }
+  return len;
+}
+
+static int
+gmp_snprintf_reps (struct gmp_snprintf_t *d, int c, int reps)
+{
+  size_t n;
+
+  ASSERT (reps >= 0);
+  ASSERT (d->size >= 0);
+
+  if (d->size > 1)
+    {
+      n = MIN (d->size-1, reps);
+      memset (d->buf, c, n);
+      d->buf += n;
+      d->size -= n;
+    }
+  return reps;
+}
+
+static int
+gmp_snprintf_final (struct gmp_snprintf_t *d)
+{
+  if (d->size >= 1)
+    d->buf[0] = '\0';
+  return 0;
+}
+
+const struct doprnt_funs_t  __gmp_snprintf_funs = {
+  (doprnt_format_t) gmp_snprintf_format,
+  (doprnt_memory_t) gmp_snprintf_memory,
+  (doprnt_reps_t)   gmp_snprintf_reps,
+  (doprnt_final_t)  gmp_snprintf_final
+};
diff --git a/printf/sprintf.c b/printf/sprintf.c

new file mode 100644 (file)

index 0000000..0a3294d
--- /dev/null
+++ b/printf/sprintf.c
@@ -0,0 +1,64 @@
+/* gmp_sprintf -- formatted output to an unrestricted string.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <string.h>    /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_sprintf (char *buf, const char *fmt, ...)
+#else
+gmp_sprintf (va_alist)
+     va_dcl
+#endif
+{
+#if WANT_ASSERT
+  int      fmtlen = strlen(fmt);
+#endif
+  va_list  ap;
+  int      ret;
+
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  char       *buf;
+  const char *fmt;
+  va_start (ap);
+  buf = va_arg (ap, char *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ret = __gmp_doprnt (&__gmp_sprintf_funs, &buf, fmt, ap);
+  va_end (ap);
+
+  ASSERT (! MEM_OVERLAP_P (buf, strlen(buf)+1, fmt, fmtlen+1));
+
+  return ret;
+}
diff --git a/printf/sprintffuns.c b/printf/sprintffuns.c

new file mode 100644 (file)

index 0000000..01fb3c9
--- /dev/null
+++ b/printf/sprintffuns.c
@@ -0,0 +1,91 @@
+/* __gmp_sprintf_funs -- support for gmp_sprintf and gmp_vsprintf.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* The data parameter "bufp" points to a "char *buf" which is the next
+   character to be written, having started as the destination from the
+   application.  This is then increased each time output is produced.  */
+
+
+/* If vsprintf returns -1 then pass it upwards.  It doesn't matter that
+   "*bufp" is ruined in this case, since gmp_doprint will bail out
+   immediately anyway.  */
+static int
+gmp_sprintf_format (char **bufp, const char *fmt, va_list ap)
+{
+  char  *buf = *bufp;
+  int   ret;
+  vsprintf (buf, fmt, ap);
+  ret = strlen (buf);
+  *bufp = buf + ret;
+  return ret;
+}
+
+static int
+gmp_sprintf_memory (char **bufp, const char *str, size_t len)
+{
+  char  *buf = *bufp;
+  *bufp = buf + len;
+  memcpy (buf, str, len);
+  return len;
+}
+
+static int
+gmp_sprintf_reps (char **bufp, int c, int reps)
+{
+  char  *buf = *bufp;
+  ASSERT (reps >= 0);
+  *bufp = buf + reps;
+  memset (buf, c, reps);
+  return reps;
+}
+
+static int
+gmp_sprintf_final (char **bufp, int c, int reps)
+{
+  char  *buf = *bufp;
+  *buf = '\0';
+  return 0;
+}
+
+const struct doprnt_funs_t  __gmp_sprintf_funs = {
+  (doprnt_format_t) gmp_sprintf_format,
+  (doprnt_memory_t) gmp_sprintf_memory,
+  (doprnt_reps_t)   gmp_sprintf_reps,
+  (doprnt_final_t)  gmp_sprintf_final
+};
diff --git a/printf/vasprintf.c b/printf/vasprintf.c

new file mode 100644 (file)

index 0000000..4ed4c5d
--- /dev/null
+++ b/printf/vasprintf.c
@@ -0,0 +1,113 @@
+/* gmp_vasprintf -- formatted output to an allocated space.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if ! HAVE_VSNPRINTF
+#define vsnprintf  __gmp_replacement_vsnprintf
+#endif
+
+
+/* vasprintf isn't used since we prefer all GMP allocs to go through
+   __gmp_allocate_func, and in particular we don't want the -1 return from
+   vasprintf for out-of-memory, instead __gmp_allocate_func should handle
+   that.  Using vsnprintf unfortunately means we might have to re-run it if
+   our current space is insufficient.
+
+   The initial guess for the needed space is an arbitrary 256 bytes.  If
+   that (and any extra GMP_ASPRINTF_T_NEED might give) isn't enough then an
+   ISO C99 standard vsnprintf will tell us what we really need.
+
+   GLIBC 2.0.x vsnprintf returns either -1 or space-1 to indicate overflow,
+   without giving any indication how much is really needed.  In this case
+   keep trying with double the space each time.
+
+   A return of space-1 is success on a C99 vsnprintf, but we're not
+   bothering to identify which style vsnprintf we've got, so just take the
+   pessimistic option and assume it's glibc 2.0.x.
+
+   Notice the use of ret+2 for the new space in the C99 case.  This ensures
+   the next vsnprintf return value will be space-2, which is unambiguously
+   successful.  But actually GMP_ASPRINTF_T_NEED() will realloc to even
+   bigger than that ret+2.
+
+   vsnprintf might trash it's given ap, so copy it in case we need to use it
+   more than once.  See comments with gmp_snprintf_format.  */
+
+static int
+gmp_asprintf_format (struct gmp_asprintf_t *d, const char *fmt,
+                     va_list orig_ap)
+{
+  int      ret;
+  va_list  ap;
+  size_t   space = 256;
+
+  for (;;)
+    {
+      GMP_ASPRINTF_T_NEED (d, space);
+      space = d->alloc - d->size;
+      va_copy (ap, orig_ap);
+      ret = vsnprintf (d->buf + d->size, space, fmt, ap);
+      if (ret == -1)
+        {
+          ASSERT (strlen (d->buf + d->size) == space-1);
+          ret = space-1;
+        }
+
+      /* done if output fits in our space */
+      if (ret < space-1)
+        break;
+
+      if (ret == space-1)
+        space *= 2;     /* possible glibc 2.0.x, so double */
+      else
+        space = ret+2;  /* C99, so now know space required */
+    }
+
+  d->size += ret;
+  return ret;
+}
+
+const struct doprnt_funs_t  __gmp_asprintf_funs = {
+  (doprnt_format_t) gmp_asprintf_format,
+  (doprnt_memory_t) __gmp_asprintf_memory,
+  (doprnt_reps_t)   __gmp_asprintf_reps,
+  (doprnt_final_t)  __gmp_asprintf_final
+};
+
+int
+gmp_vasprintf (char **result, const char *fmt, va_list ap)
+{
+  struct gmp_asprintf_t  d;
+  GMP_ASPRINTF_T_INIT (d, result);
+  return __gmp_doprnt (&__gmp_asprintf_funs, &d, fmt, ap);
+}
diff --git a/printf/vfprintf.c b/printf/vfprintf.c

new file mode 100644 (file)

index 0000000..8bed677
--- /dev/null
+++ b/printf/vfprintf.c
@@ -0,0 +1,38 @@
+/* gmp_vfprintf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vfprintf (FILE *fp, const char *fmt, va_list ap)
+{
+  return __gmp_doprnt (&__gmp_fprintf_funs, fp, fmt, ap);
+}
diff --git a/printf/vprintf.c b/printf/vprintf.c

new file mode 100644 (file)

index 0000000..f8da0ef
--- /dev/null
+++ b/printf/vprintf.c
@@ -0,0 +1,38 @@
+/* gmp_vprintf -- formatted output.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vprintf (const char *fmt, va_list ap)
+{
+  return __gmp_doprnt (&__gmp_fprintf_funs, stdout, fmt, ap);
+}
diff --git a/printf/vsnprintf.c b/printf/vsnprintf.c

new file mode 100644 (file)

index 0000000..565fdfd
--- /dev/null
+++ b/printf/vsnprintf.c
@@ -0,0 +1,44 @@
+/* gmp_vsnprintf -- formatted output to an fixed size buffer.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <string.h>    /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vsnprintf (char *buf, size_t size, const char *fmt, va_list ap)
+{
+  struct gmp_snprintf_t d;
+
+  ASSERT (! MEM_OVERLAP_P (buf, size, fmt, strlen(fmt)+1));
+
+  d.buf = buf;
+  d.size = size;
+  return __gmp_doprnt (&__gmp_snprintf_funs, &d, fmt, ap);
+}
diff --git a/printf/vsprintf.c b/printf/vsprintf.c

new file mode 100644 (file)

index 0000000..b1a3045
--- /dev/null
+++ b/printf/vsprintf.c
@@ -0,0 +1,47 @@
+/* gmp_vsprintf -- formatted output to an unrestricted string.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <string.h>    /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vsprintf (char *buf, const char *fmt, va_list ap)
+{
+#if WANT_ASSERT
+  int  fmtlen = strlen(fmt);
+#endif
+  int  ret;
+
+  ret = __gmp_doprnt (&__gmp_sprintf_funs, &buf, fmt, ap);
+
+  ASSERT (! MEM_OVERLAP_P (buf, strlen(buf)+1, fmt, fmtlen+1));
+
+  return ret;
+}
diff --git a/rand.c b/rand.c

new file mode 100644 (file)

index 0000000..31969b2
--- /dev/null
+++ b/rand.c
@@ -0,0 +1,64 @@
+/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h> /* for NULL */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+gmp_randinit (gmp_randstate_t rstate,
+             gmp_randalg_t alg,
+             ...)
+#else
+gmp_randinit (va_alist)
+     va_dcl
+#endif
+{
+  va_list ap;
+#if HAVE_STDARG
+  va_start (ap, alg);
+#else
+  __gmp_randstate_struct *rstate;
+  gmp_randalg_t alg;
+  va_start (ap);
+  rstate = va_arg (ap, __gmp_randstate_struct *);
+  alg = va_arg (ap, gmp_randalg_t);
+#endif
+
+  switch (alg) {
+  case GMP_RAND_ALG_LC:
+    if (! gmp_randinit_lc_2exp_size (rstate, va_arg (ap, unsigned long)))
+      gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
+    break;
+  default:
+    gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+    break;
+  }
+  va_end (ap);
+}
diff --git a/randbui.c b/randbui.c

new file mode 100644 (file)

index 0000000..5fc49b2
--- /dev/null
+++ b/randbui.c
@@ -0,0 +1,46 @@
+/* gmp_urandomb_ui -- random bits returned in a ulong.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,
+   maybe this should raise an exception or something.  */
+
+unsigned long
+gmp_urandomb_ui (gmp_randstate_ptr rstate, unsigned long bits)
+{
+  mp_limb_t  a[LIMBS_PER_ULONG];
+
+  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+     all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */
+  a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+  a[1] = 0;
+#endif
+
+  _gmp_rand (a, rstate, MIN (bits, BITS_PER_ULONG));
+
+#if LIMBS_PER_ULONG == 1
+  return a[0];
+#else
+  return a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+}
diff --git a/randclr.c b/randclr.c

new file mode 100644 (file)

index 0000000..a4e8242
--- /dev/null
+++ b/randclr.c
@@ -0,0 +1,27 @@
+/* gmp_randclear (state) -- Clear and deallocate random state STATE.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randclear (gmp_randstate_t rstate)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);
+}
diff --git a/randdef.c b/randdef.c

new file mode 100644 (file)

index 0000000..171a0bd
--- /dev/null
+++ b/randdef.c
@@ -0,0 +1,27 @@
+/* gmp_randinit_default -- initialize a random state with a default algorithm.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randinit_default (gmp_randstate_t rstate)
+{
+  gmp_randinit_mt (rstate);
+}
diff --git a/randiset.c b/randiset.c

new file mode 100644 (file)

index 0000000..f140a33
--- /dev/null
+++ b/randiset.c
@@ -0,0 +1,28 @@
+/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+gmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);
+}
diff --git a/randlc2s.c b/randlc2s.c

new file mode 100644 (file)

index 0000000..4dcde73
--- /dev/null
+++ b/randlc2s.c
@@ -0,0 +1,82 @@
+/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear
+   congruential generator of a requested size.
+
+Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Array of LC-schemes, ordered in increasing order of the first
+   member (the 'm2exp' value).  The end of the array is indicated with
+   an entry containing all zeros.  */
+
+/* All multipliers are in the range 0.01*m and 0.99*m, and are
+congruent to 5 (mod 8).
+They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
+(Up to and including 196 bits, merit is >= 3.)  */
+
+struct __gmp_rand_lc_scheme_struct
+{
+  unsigned long int m2exp;     /* Modulus is 2 ^ m2exp. */
+  const char *astr;            /* Multiplier in string form. */
+  unsigned long int c;         /* Addend. */
+};
+
+static const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
+{
+  {32, "29CF535",           1},
+  {33, "51F666D",           1},
+  {34, "A3D73AD",           1},
+  {35, "147E5B85",          1},
+  {36, "28F725C5",          1},
+  {37, "51EE3105",          1},
+  {38, "A3DD5CDD",          1},
+  {39, "147AF833D",         1},
+  {40, "28F5DA175",         1},
+  {56, "AA7D735234C0DD",  1},
+  {64, "BAECD515DAF0B49D", 1},
+  {100, "292787EBD3329AD7E7575E2FD", 1},
+  {128, "48A74F367FA7B5C8ACBB36901308FA85", 1},
+  {156, "78A7FDDDC43611B527C3F1D760F36E5D7FC7C45", 1},
+  {196, "41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5", 1},
+  {200, "4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5", 1},
+  {256, "AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5", 1},
+  {0, NULL, 0}                 /* End of array. */
+};
+
+int
+gmp_randinit_lc_2exp_size (gmp_randstate_t rstate, mp_bitcnt_t size)
+{
+  const struct __gmp_rand_lc_scheme_struct *sp;
+  mpz_t a;
+
+  /* Pick a scheme.  */
+  for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
+    if (sp->m2exp / 2 >= size)
+      goto found;
+  return 0;
+
+ found:
+  /* Install scheme.  */
+  mpz_init_set_str (a, sp->astr, 16);
+  gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
+  mpz_clear (a);
+  return 1;
+}
diff --git a/randlc2x.c b/randlc2x.c

new file mode 100644 (file)

index 0000000..ba45b60
--- /dev/null
+++ b/randlc2x.c
@@ -0,0 +1,322 @@
+/* Linear Congruential pseudo-random number generator functions.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.
+
+   _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.
+   SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is
+   padded with high zero limbs if necessary.  ALLOC(_mp_seed) is the current
+   size of PTR(_mp_seed) in the usual way.  There only needs to be
+   BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the
+   initialization and seeding end up making it a bit more than this.
+
+   _mp_a is the "a" multiplier, in the range 0 to 2^m2exp-1.  SIZ(_mp_a) is
+   the size of the value in the normal way for an mpz_t, except that a value
+   of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0.  This makes it
+   easy to call mpn_mul, and the case of a==0 is highly un-random and not
+   worth any trouble to optimize.
+
+   {_cp,_cn} is the "c" addend.  Normally _cn is 1, but when nails are in
+   use a ulong can be bigger than one limb, and in this case _cn is 2 if
+   necessary.  c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy
+   to call __GMPN_ADD.  c==0 is fairly un-random so isn't worth optimizing.
+
+   _mp_m2exp gives the modulus, namely 2^m2exp.  We demand m2exp>=1, since
+   m2exp==0 would mean no bits at all out of each iteration, which makes no
+   sense.  */
+
+typedef struct {
+  mpz_t          _mp_seed;
+  mpz_t          _mp_a;
+  mp_size_t      _cn;
+  mp_limb_t      _cp[LIMBS_PER_ULONG];
+  unsigned long  _mp_m2exp;
+} gmp_rand_lc_struct;
+
+
+/* lc (rp, state) -- Generate next number in LC sequence.  Return the
+   number of valid bits in the result.  Discards the lower half of the
+   result.  */
+
+static unsigned long int
+lc (mp_ptr rp, gmp_randstate_t rstate)
+{
+  mp_ptr tp, seedp, ap;
+  mp_size_t ta;
+  mp_size_t tn, seedn, an;
+  unsigned long int m2exp;
+  unsigned long int bits;
+  int cy;
+  mp_size_t xn;
+  gmp_rand_lc_struct *p;
+  TMP_DECL;
+
+  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  m2exp = p->_mp_m2exp;
+
+  seedp = PTR (p->_mp_seed);
+  seedn = SIZ (p->_mp_seed);
+
+  ap = PTR (p->_mp_a);
+  an = SIZ (p->_mp_a);
+
+  /* Allocate temporary storage.  Let there be room for calculation of
+     (A * seed + C) % M, or M if bigger than that.  */
+
+  TMP_MARK;
+
+  ta = an + seedn + 1;
+  tn = BITS_TO_LIMBS (m2exp);
+  if (ta <= tn) /* that is, if (ta < tn + 1) */
+    {
+      mp_size_t tmp = an + seedn;
+      ta = tn + 1;
+      tp = TMP_ALLOC_LIMBS (ta);
+      MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out.  */
+    }
+  else
+    tp = TMP_ALLOC_LIMBS (ta);
+
+  /* t = a * seed.  NOTE: an is always > 0; see initialization.  */
+  ASSERT (seedn >= an && an > 0);
+  mpn_mul (tp, seedp, seedn, ap, an);
+
+  /* t = t + c.  NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);
+     see initialization.  */
+  ASSERT (tn >= p->_cn);
+  __GMPN_ADD (cy, tp, tp, tn, p->_cp, p->_cn);
+
+  /* t = t % m */
+  tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;
+
+  /* Save result as next seed.  */
+  MPN_COPY (PTR (p->_mp_seed), tp, tn);
+
+  /* Discard the lower m2exp/2 of the result.  */
+  bits = m2exp / 2;
+  xn = bits / GMP_NUMB_BITS;
+
+  tn -= xn;
+  if (tn > 0)
+    {
+      unsigned int cnt = bits % GMP_NUMB_BITS;
+      if (cnt != 0)
+       {
+         mpn_rshift (tp, tp + xn, tn, cnt);
+         MPN_COPY_INCR (rp, tp, xn + 1);
+       }
+      else                     /* Even limb boundary.  */
+       MPN_COPY_INCR (rp, tp + xn, tn);
+    }
+
+  TMP_FREE;
+
+  /* Return number of valid bits in the result.  */
+  return (m2exp + 1) / 2;
+}
+
+
+/* Obtain a sequence of random numbers.  */
+static void
+randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits)
+{
+  unsigned long int rbitpos;
+  int chunk_nbits;
+  mp_ptr tp;
+  mp_size_t tn;
+  gmp_rand_lc_struct *p;
+  TMP_DECL;
+
+  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  TMP_MARK;
+
+  chunk_nbits = p->_mp_m2exp / 2;
+  tn = BITS_TO_LIMBS (chunk_nbits);
+
+  tp = TMP_ALLOC_LIMBS (tn);
+
+  rbitpos = 0;
+  while (rbitpos + chunk_nbits <= nbits)
+    {
+      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+
+      if (rbitpos % GMP_NUMB_BITS != 0)
+       {
+         mp_limb_t savelimb, rcy;
+         /* Target of new chunk is not bit aligned.  Use temp space
+            and align things by shifting it up.  */
+         lc (tp, rstate);
+         savelimb = r2p[0];
+         rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+         r2p[0] |= savelimb;
+         /* bogus */
+         if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
+             > GMP_NUMB_BITS)
+           r2p[tn] = rcy;
+       }
+      else
+       {
+         /* Target of new chunk is bit aligned.  Let `lc' put bits
+            directly into our target variable.  */
+         lc (r2p, rstate);
+       }
+      rbitpos += chunk_nbits;
+    }
+
+  /* Handle last [0..chunk_nbits) bits.  */
+  if (rbitpos != nbits)
+    {
+      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+      int last_nbits = nbits - rbitpos;
+      tn = BITS_TO_LIMBS (last_nbits);
+      lc (tp, rstate);
+      if (rbitpos % GMP_NUMB_BITS != 0)
+       {
+         mp_limb_t savelimb, rcy;
+         /* Target of new chunk is not bit aligned.  Use temp space
+            and align things by shifting it up.  */
+         savelimb = r2p[0];
+         rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+         r2p[0] |= savelimb;
+         if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
+           r2p[tn] = rcy;
+       }
+      else
+       {
+         MPN_COPY (r2p, tp, tn);
+       }
+      /* Mask off top bits if needed.  */
+      if (nbits % GMP_NUMB_BITS != 0)
+       rp[nbits / GMP_NUMB_BITS]
+         &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
+    }
+
+  TMP_FREE;
+}
+
+
+static void
+randseed_lc (gmp_randstate_t rstate, mpz_srcptr seed)
+{
+  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+  mpz_ptr seedz = p->_mp_seed;
+  mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);
+
+  /* Store p->_mp_seed as an unnormalized integer with size enough
+     for numbers up to 2^m2exp-1.  That size can't be zero.  */
+  mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);
+  MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));
+  SIZ (seedz) = seedn;
+}
+
+
+static void
+randclear_lc (gmp_randstate_t rstate)
+{
+  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  mpz_clear (p->_mp_seed);
+  mpz_clear (p->_mp_a);
+  (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));
+}
+
+static void randiset_lc __GMP_PROTO ((gmp_randstate_ptr dst, gmp_randstate_srcptr src));
+
+static const gmp_randfnptr_t Linear_Congruential_Generator = {
+  randseed_lc,
+  randget_lc,
+  randclear_lc,
+  randiset_lc
+};
+
+static void
+randiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  gmp_rand_lc_struct *dstp, *srcp;
+
+  srcp = (gmp_rand_lc_struct *) RNG_STATE (src);
+  dstp = (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));
+
+  RNG_STATE (dst) = (void *) dstp;
+  RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;
+
+  /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but
+     mpz_init_set won't worry about that */
+  mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);
+  mpz_init_set (dstp->_mp_a,    srcp->_mp_a);
+
+  dstp->_cn = srcp->_cn;
+
+  dstp->_cp[0] = srcp->_cp[0];
+  if (LIMBS_PER_ULONG > 1)
+    dstp->_cp[1] = srcp->_cp[1];
+  if (LIMBS_PER_ULONG > 2)  /* usually there's only 1 or 2 */
+    MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_ULONG - 2);
+
+  dstp->_mp_m2exp = srcp->_mp_m2exp;
+}
+
+
+void
+gmp_randinit_lc_2exp (gmp_randstate_t rstate,
+                     mpz_srcptr a,
+                     unsigned long int c,
+                     mp_bitcnt_t m2exp)
+{
+  gmp_rand_lc_struct *p;
+  mp_size_t seedn = BITS_TO_LIMBS (m2exp);
+
+  ASSERT_ALWAYS (m2exp != 0);
+
+  p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);
+  RNG_STATE (rstate) = (void *) p;
+  RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;
+
+  /* allocate m2exp bits of space for p->_mp_seed, and initial seed "1" */
+  mpz_init2 (p->_mp_seed, m2exp);
+  MPN_ZERO (PTR (p->_mp_seed), seedn);
+  SIZ (p->_mp_seed) = seedn;
+  PTR (p->_mp_seed)[0] = 1;
+
+  /* "a", forced to 0 to 2^m2exp-1 */
+  mpz_init (p->_mp_a);
+  mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);
+
+  /* Avoid SIZ(a) == 0 to avoid checking for special case in lc().  */
+  if (SIZ (p->_mp_a) == 0)
+    {
+      SIZ (p->_mp_a) = 1;
+      PTR (p->_mp_a)[0] = CNST_LIMB (0);
+    }
+
+  MPN_SET_UI (p->_cp, p->_cn, c);
+
+  /* Internally we may discard any bits of c above m2exp.  The following
+     code ensures that __GMPN_ADD in lc() will always work.  */
+  if (seedn < p->_cn)
+    p->_cn = (p->_cp[0] != 0);
+
+  p->_mp_m2exp = m2exp;
+}
diff --git a/randmt.c b/randmt.c

new file mode 100644 (file)

index 0000000..ccd4a11
--- /dev/null
+++ b/randmt.c
@@ -0,0 +1,405 @@
+/* Mersenne Twister pseudo-random number generator functions.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2003, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>   /* for NULL */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* This code implements the Mersenne Twister pseudorandom number generator
+   by Takuji Nishimura and Makoto Matsumoto.  The buffer initialization
+   function is different in order to permit seeds greater than 2^32-1.
+
+   This file contains a special __gmp_randinit_mt_noseed which excludes the
+   seeding function from the gmp_randfnptr_t routines.  This is for use by
+   mpn_random and mpn_random2 on the global random generator.  MT seeding
+   uses mpz functions, and we don't want mpn routines dragging mpz functions
+   into the link.  */
+
+
+/* Default seed to use when the generator is not initialized.  */
+#define DEFAULT_SEED 5489 /* was 4357 */
+
+/* Tempering masks.  */
+#define MASK_1 0x9D2C5680
+#define MASK_2 0xEFC60000
+
+/* Initial state of buffer when initialized with default seed.  */
+static const gmp_uint_least32_t default_state[N] =
+{
+  0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,
+  0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,
+  0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,
+  0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,
+  0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,
+  0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,
+  0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,
+  0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,
+  0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,
+  0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,
+  0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,
+  0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,
+  0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,
+  0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,
+  0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,
+  0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,
+  0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,
+  0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,
+  0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,
+  0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,
+  0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,
+  0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,
+  0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,
+  0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,
+  0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,
+  0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,
+  0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,
+  0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,
+  0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,
+  0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,
+  0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,
+  0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,
+  0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,
+  0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,
+  0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,
+  0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,
+  0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,
+  0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,
+  0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,
+  0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,
+  0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,
+  0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,
+  0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,
+  0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,
+  0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,
+  0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,
+  0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,
+  0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,
+  0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,
+  0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,
+  0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,
+  0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,
+  0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,
+  0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,
+  0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,
+  0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,
+  0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,
+  0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,
+  0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,
+  0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,
+  0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,
+  0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,
+  0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,
+  0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,
+  0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,
+  0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,
+  0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,
+  0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,
+  0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,
+  0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,
+  0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,
+  0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,
+  0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,
+  0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,
+  0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,
+  0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,
+  0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,
+  0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,
+  0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,
+  0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,
+  0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,
+  0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,
+  0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,
+  0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,
+  0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,
+  0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,
+  0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,
+  0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,
+  0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,
+  0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,
+  0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,
+  0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,
+  0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,
+  0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,
+  0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,
+  0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,
+  0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,
+  0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,
+  0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,
+  0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,
+  0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,
+  0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,
+  0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,
+  0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112
+};
+
+void
+__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])
+{
+  gmp_uint_least32_t y;
+  int kk;
+
+  for (kk = 0; kk < N - M; kk++)
+    {
+      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+      mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+    }
+  for (; kk < N - 1; kk++)
+    {
+      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+      mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+    }
+
+  y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);
+  mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+}
+
+
+/* Get nbits bits of output from the generator into dest.
+   Note that Mersenne Twister is designed to produce outputs in
+   32-bit words.  */
+void
+__gmp_randget_mt (gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits)
+{
+  gmp_uint_least32_t y;
+  int rbits;
+  mp_size_t i;
+  mp_size_t nlimbs;
+  int *pmti;
+  gmp_uint_least32_t *mt;
+
+  pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;
+  mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;
+
+  nlimbs = nbits / GMP_NUMB_BITS;
+  rbits = nbits % GMP_NUMB_BITS;
+
+#define NEXT_RANDOM                    \
+  do                                   \
+    {                                  \
+      if (*pmti >= N)                  \
+       {                               \
+         __gmp_mt_recalc_buffer (mt);  \
+         *pmti = 0;                    \
+       }                               \
+      y = mt[(*pmti)++];               \
+      y ^= (y >> 11);                  \
+      y ^= (y << 7) & MASK_1;          \
+      y ^= (y << 15) & MASK_2;         \
+      y ^= (y >> 18);                  \
+    }                                  \
+  while (0)
+
+
+  /* Handle the common cases of 32- or 64-bit limbs with fast,
+     optimized routines, and the rest of cases with a general
+     routine.  In all cases, no more than 31 bits are rejected
+     for the last limb so that every version of the code is
+     consistent with the others.  */
+
+#if (GMP_NUMB_BITS == 32)
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      NEXT_RANDOM;
+      dest[i] = (mp_limb_t) y;
+    }
+  if (rbits)
+    {
+      NEXT_RANDOM;
+      dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+    }
+
+#else /* GMP_NUMB_BITS != 32 */
+#if (GMP_NUMB_BITS == 64)
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      NEXT_RANDOM;
+      dest[i] = (mp_limb_t) y;
+      NEXT_RANDOM;
+      dest[i] |= (mp_limb_t) y << 32;
+    }
+  if (rbits)
+    {
+      if (rbits < 32)
+       {
+         NEXT_RANDOM;
+         dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+       }
+      else
+       {
+         NEXT_RANDOM;
+         dest[nlimbs] = (mp_limb_t) y;
+         if (rbits > 32)
+           {
+             NEXT_RANDOM;
+             dest[nlimbs] |=
+               ((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;
+           }
+       }
+    }
+
+#else /* GMP_NUMB_BITS != 64 */
+
+  {
+    /* Fall back to a general algorithm.  This algorithm works by
+       keeping a pool of up to 64 bits (2 outputs from MT) acting
+       as a shift register from which bits are consumed as needed.
+       Bits are consumed using the LSB bits of bitpool_l, and
+       inserted via bitpool_h and shifted to the right place.  */
+
+    gmp_uint_least32_t bitpool_h = 0;
+    gmp_uint_least32_t bitpool_l = 0;
+    int bits_in_pool = 0;      /* Holds number of valid bits in the pool.  */
+    int bits_to_fill;          /* Holds total number of bits to put in
+                                  destination.  */
+    int bitidx;                        /* Holds the destination bit position.  */
+    mp_size_t nlimbs2;         /* Number of whole+partial limbs to fill.  */
+
+    nlimbs2 = nlimbs + (rbits != 0);
+
+    for (i = 0; i < nlimbs2; i++)
+      {
+       bitidx = 0;
+       if (i < nlimbs)
+         bits_to_fill = GMP_NUMB_BITS;
+       else
+         bits_to_fill = rbits;
+
+       dest[i] = CNST_LIMB (0);
+       while (bits_to_fill >= 32) /* Process whole 32-bit blocks first.  */
+         {
+           if (bits_in_pool < 32)      /* Need more bits.  */
+             {
+               /* 64-bit right shift.  */
+               NEXT_RANDOM;
+               bitpool_h = y;
+               bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+               if (bits_in_pool == 0)
+                 bitpool_h = 0;
+               else
+                 bitpool_h >>= 32 - bits_in_pool;
+               bits_in_pool += 32;     /* We've got 32 more bits.  */
+             }
+
+           /* Fill a 32-bit chunk.  */
+           dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;
+           bitpool_l = bitpool_h;
+           bits_in_pool -= 32;
+           bits_to_fill -= 32;
+           bitidx += 32;
+         }
+
+       /* Cover the case where GMP_NUMB_BITS is not a multiple of 32.  */
+       if (bits_to_fill != 0)
+         {
+           if (bits_in_pool < bits_to_fill)
+             {
+               NEXT_RANDOM;
+               bitpool_h = y;
+               bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+               if (bits_in_pool == 0)
+                 bitpool_h = 0;
+               else
+                 bitpool_h >>= 32 - bits_in_pool;
+               bits_in_pool += 32;
+             }
+
+           dest[i] |= (((mp_limb_t) bitpool_l
+                        & ~(~CNST_LIMB (0) << bits_to_fill))
+                       << bitidx);
+           bitpool_l = ((bitpool_l >> bits_to_fill)
+                        | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;
+           bitpool_h >>= bits_to_fill;
+           bits_in_pool -= bits_to_fill;
+         }
+      }
+  }
+
+#endif /* GMP_NUMB_BITS != 64 */
+#endif /* GMP_NUMB_BITS != 32 */
+}
+
+void
+__gmp_randclear_mt (gmp_randstate_t rstate)
+{
+  (*__gmp_free_func) ((void *) RNG_STATE (rstate),
+                     ALLOC (rstate->_mp_seed) * BYTES_PER_MP_LIMB);
+}
+
+void __gmp_randiset_mt __GMP_PROTO ((gmp_randstate_ptr dst, gmp_randstate_srcptr src));
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {
+  NULL,
+  __gmp_randget_mt,
+  __gmp_randclear_mt,
+  __gmp_randiset_mt
+};
+
+void
+__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
+  gmp_rand_mt_struct *dstp, *srcp;
+  mp_size_t i;
+
+  /* Set the generator functions.  */
+  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+  /* Allocate the MT-specific state.  */
+  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+  RNG_STATE (dst) = (mp_ptr) dstp;
+  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
+
+  /* Copy state.  */
+  srcp = (gmp_rand_mt_struct *) RNG_STATE (src);
+  for (i = 0; i < N; i++)
+    dstp->mt[i] = srcp->mt[i];
+
+  dstp->mti = srcp->mti;
+}
+
+void
+__gmp_randinit_mt_noseed (gmp_randstate_ptr dst)
+{
+  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
+  gmp_rand_mt_struct *dstp;
+  mp_size_t i;
+
+  /* Set the generator functions.  */
+  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+  /* Allocate the MT-specific state.  */
+  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+  RNG_STATE (dst) = (mp_ptr) dstp;
+  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
+
+  /* Set state for default seed.  */
+  for (i = 0; i < N; i++)
+    dstp->mt[i] = default_state[i];
+
+  dstp->mti = WARM_UP % N;
+}
diff --git a/randmt.h b/randmt.h

new file mode 100644 (file)

index 0000000..fc23381
--- /dev/null
+++ b/randmt.h
@@ -0,0 +1,40 @@
+/* Mersenne Twister pseudo-random number generator defines.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Number of extractions used to warm the buffer up.  */
+#define WARM_UP 2000
+
+/* Period parameters.  */
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908B0DF   /* Constant vector a.  */
+
+/* State structure for MT.  */
+typedef struct
+{
+  gmp_uint_least32_t mt[N];    /* State array.  */
+  int mti;                     /* Index of current value.  */
+} gmp_rand_mt_struct;
+
+
+void __gmp_mt_recalc_buffer __GMP_PROTO ((gmp_uint_least32_t *));
+void __gmp_randget_mt __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
+void __gmp_randclear_mt __GMP_PROTO ((gmp_randstate_t rstate));
+void __gmp_randiset_mt __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
diff --git a/randmts.c b/randmts.c

new file mode 100644 (file)

index 0000000..e3b0338
--- /dev/null
+++ b/randmts.c
@@ -0,0 +1,157 @@
+/* Mersenne Twister pseudo-random number generator functions.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,
+   needed by the seeding function below.  */
+static void
+mangle_seed (mpz_ptr r, mpz_srcptr b_orig)
+{
+  mpz_t          t, b;
+  unsigned long  e = 0x40118124;
+  unsigned long  bit = 0x20000000;
+
+  mpz_init (t);
+  mpz_init_set (b, b_orig);  /* in case r==b_orig */
+
+  mpz_set (r, b);
+  do
+    {
+      mpz_mul (r, r, r);
+
+    reduce:
+      for (;;)
+        {
+          mpz_tdiv_q_2exp (t, r, 19937L);
+          if (mpz_sgn (t) == 0)
+            break;
+          mpz_tdiv_r_2exp (r, r, 19937L);
+          mpz_addmul_ui (r, t, 20023L);
+        }
+
+      if ((e & bit) != 0)
+        {
+          e &= ~bit;
+          mpz_mul (r, r, b);
+          goto reduce;
+        }
+
+      bit >>= 1;
+    }
+  while (bit != 0);
+
+  mpz_clear (t);
+  mpz_clear (b);
+}
+
+
+/* Seeding function.  Uses powering modulo a non-Mersenne prime to obtain
+   a permutation of the input seed space.  The modulus is 2^19937-20023,
+   which is probably prime.  The power is 1074888996.  In order to avoid
+   seeds 0 and 1 generating invalid or strange output, the input seed is
+   first manipulated as follows:
+
+     seed1 = seed mod (2^19937-20027) + 2
+
+   so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the
+   powering is performed as follows:
+
+     seed2 = (seed1^1074888996) mod (2^19937-20023)
+
+   and then seed2 is used to bootstrap the buffer.
+
+   This method aims to give guarantees that:
+     a) seed2 will never be zero,
+     b) seed2 will very seldom have a very low population of ones in its
+       binary representation, and
+     c) every seed between 0 and 2^19937-20028 (inclusive) will yield a
+       different sequence.
+
+   CAVEATS:
+
+   The period of the seeding function is 2^19937-20027.  This means that
+   with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences
+   are obtained as with seeds 0, 1, etc.; it also means that seed -1
+   produces the same sequence as seed 2^19937-20028, etc.
+ */
+
+static void
+randseed_mt (gmp_randstate_t rstate, mpz_srcptr seed)
+{
+  int i;
+  size_t cnt;
+
+  gmp_rand_mt_struct *p;
+  mpz_t mod;    /* Modulus.  */
+  mpz_t seed1;  /* Intermediate result.  */
+
+  p = (gmp_rand_mt_struct *) RNG_STATE (rstate);
+
+  mpz_init (mod);
+  mpz_init (seed1);
+
+  mpz_set_ui (mod, 0L);
+  mpz_setbit (mod, 19937L);
+  mpz_sub_ui (mod, mod, 20027L);
+  mpz_mod (seed1, seed, mod);  /* Reduce `seed' modulo `mod'.  */
+  mpz_add_ui (seed1, seed1, 2L);       /* seed1 is now ready.  */
+  mangle_seed (seed1, seed1);  /* Perform the mangling by powering.  */
+
+  /* Copy the last bit into bit 31 of mt[0] and clear it.  */
+  p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;
+  mpz_clrbit (seed1, 19936L);
+
+  /* Split seed1 into N-1 32-bit chunks.  */
+  mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,
+              8 * sizeof (p->mt[1]) - 32, seed1);
+  cnt++;
+  ASSERT (cnt <= N);
+  while (cnt < N)
+    p->mt[cnt++] = 0;
+
+  mpz_clear (mod);
+  mpz_clear (seed1);
+
+  /* Warm the generator up if necessary.  */
+  if (WARM_UP != 0)
+    for (i = 0; i < WARM_UP / N; i++)
+      __gmp_mt_recalc_buffer (p->mt);
+
+  p->mti = WARM_UP % N;
+}
+
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator = {
+  randseed_mt,
+  __gmp_randget_mt,
+  __gmp_randclear_mt,
+  __gmp_randiset_mt
+};
+
+/* Initialize MT-specific data.  */
+void
+gmp_randinit_mt (gmp_randstate_t rstate)
+{
+  __gmp_randinit_mt_noseed (rstate);
+  RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;
+}
diff --git a/randmui.c b/randmui.c

new file mode 100644 (file)

index 0000000..f349d35
--- /dev/null
+++ b/randmui.c
@@ -0,0 +1,75 @@
+/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* If n is a power of 2 then the test ret<n is always true and the loop is
+   unnecessary, but there's no need to add special code for this.  Just get
+   the "bits" calculation correct and let it go through normally.
+
+   If n is 1 then will have bits==0 and _gmp_rand will produce no output and
+   we always return 0.  Again there seems no need for a special case, just
+   initialize a[0]=0 and let it go through normally.  */
+
+#define MAX_URANDOMM_ITER  80
+
+unsigned long
+gmp_urandomm_ui (gmp_randstate_ptr rstate, unsigned long n)
+{
+  mp_limb_t      a[LIMBS_PER_ULONG];
+  unsigned long  ret, bits, leading;
+  int            i;
+
+  if (UNLIKELY (n == 0))
+    DIVIDE_BY_ZERO;
+
+  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+     all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it
+     will store only a[0].  */
+  a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+  a[1] = 0;
+#endif
+
+  count_leading_zeros (leading, (mp_limb_t) n);
+  bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);
+
+  for (i = 0; i < MAX_URANDOMM_ITER; i++)
+    {
+      _gmp_rand (a, rstate, bits);
+#if LIMBS_PER_ULONG == 1
+      ret = a[0];
+#else
+      ret = a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+      if (LIKELY (ret < n))   /* usually one iteration suffices */
+        goto done;
+    }
+
+  /* Too many iterations, there must be something degenerate about the
+     rstate algorithm.  Return r%n.  */
+  ret -= n;
+  ASSERT (ret < n);
+
+ done:
+  return ret;
+}
diff --git a/rands.c b/rands.c

new file mode 100644 (file)

index 0000000..93eb3e7
--- /dev/null
+++ b/rands.c
@@ -0,0 +1,31 @@
+/* __gmp_rands -- global random state for old-style random functions.
+
+   EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY.  IT'S ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU
+   MP RELEASES.  */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Use this via the RANDS macro in gmp-impl.h */
+char             __gmp_rands_initialized = 0;
+gmp_randstate_t  __gmp_rands;
diff --git a/randsd.c b/randsd.c

new file mode 100644 (file)

index 0000000..077382e
--- /dev/null
+++ b/randsd.c
@@ -0,0 +1,28 @@
+/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randseed (gmp_randstate_t rstate,
+             mpz_srcptr seed)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);
+}
diff --git a/randsdui.c b/randsdui.c

new file mode 100644 (file)

index 0000000..9039eda
--- /dev/null
+++ b/randsdui.c
@@ -0,0 +1,33 @@
+/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
+   state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randseed_ui (gmp_randstate_t rstate,
+                 unsigned long int seed)
+{
+  mpz_t zseed;
+  mp_limb_t zlimbs[LIMBS_PER_ULONG];
+
+  MPZ_FAKE_UI (zseed, zlimbs, seed);
+  gmp_randseed (rstate, zseed);
+}
diff --git a/scanf/Makefile.am b/scanf/Makefile.am

new file mode 100644 (file)

index 0000000..5359b5e
--- /dev/null
+++ b/scanf/Makefile.am
@@ -0,0 +1,27 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = libscanf.la
+
+libscanf_la_SOURCES = \
+  doscan.c fscanf.c fscanffuns.c scanf.c sscanf.c sscanffuns.c \
+  vfscanf.c vscanf.c vsscanf.c
diff --git a/scanf/Makefile.in b/scanf/Makefile.in

new file mode 100644 (file)

index 0000000..c95eea6
--- /dev/null
+++ b/scanf/Makefile.in
@@ -0,0 +1,549 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+subdir = scanf
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libscanf_la_LIBADD =
+am_libscanf_la_OBJECTS = doscan$U.lo fscanf$U.lo fscanffuns$U.lo \
+       scanf$U.lo sscanf$U.lo sscanffuns$U.lo vfscanf$U.lo \
+       vscanf$U.lo vsscanf$U.lo
+libscanf_la_OBJECTS = $(am_libscanf_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libscanf_la_SOURCES)
+DIST_SOURCES = $(libscanf_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = libscanf.la
+libscanf_la_SOURCES = \
+  doscan.c fscanf.c fscanffuns.c scanf.c sscanf.c sscanffuns.c \
+  vfscanf.c vscanf.c vsscanf.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps scanf/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps scanf/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libscanf.la: $(libscanf_la_OBJECTS) $(libscanf_la_DEPENDENCIES) 
+       $(LINK)  $(libscanf_la_OBJECTS) $(libscanf_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+doscan_.c: doscan.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doscan.c; then echo $(srcdir)/doscan.c; else echo doscan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fscanf_.c: fscanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fscanf.c; then echo $(srcdir)/fscanf.c; else echo fscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fscanffuns_.c: fscanffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fscanffuns.c; then echo $(srcdir)/fscanffuns.c; else echo fscanffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scanf_.c: scanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scanf.c; then echo $(srcdir)/scanf.c; else echo scanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sscanf_.c: sscanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sscanf.c; then echo $(srcdir)/sscanf.c; else echo sscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sscanffuns_.c: sscanffuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sscanffuns.c; then echo $(srcdir)/sscanffuns.c; else echo sscanffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vfscanf_.c: vfscanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vfscanf.c; then echo $(srcdir)/vfscanf.c; else echo vfscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vscanf_.c: vscanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vscanf.c; then echo $(srcdir)/vscanf.c; else echo vscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+vsscanf_.c: vsscanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsscanf.c; then echo $(srcdir)/vsscanf.c; else echo vsscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+doscan_.$(OBJEXT) doscan_.lo fscanf_.$(OBJEXT) fscanf_.lo \
+fscanffuns_.$(OBJEXT) fscanffuns_.lo scanf_.$(OBJEXT) scanf_.lo \
+sscanf_.$(OBJEXT) sscanf_.lo sscanffuns_.$(OBJEXT) sscanffuns_.lo \
+vfscanf_.$(OBJEXT) vfscanf_.lo vscanf_.$(OBJEXT) vscanf_.lo \
+vsscanf_.$(OBJEXT) vsscanf_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+       uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/scanf/doscan.c b/scanf/doscan.c

new file mode 100644 (file)

index 0000000..2c5b1d9
--- /dev/null
+++ b/scanf/doscan.c
@@ -0,0 +1,762 @@
+/* __gmp_doscan -- formatted input internals.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <ctype.h>
+#include <stddef.h>    /* for ptrdiff_t */
+#include <stdio.h>
+#include <stdlib.h>    /* for strtol */
+#include <string.h>
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for localeconv */
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h> /* for quad_t */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+/* General:
+
+       It's necessary to parse up the format string to recognise the GMP
+       extra types F, Q and Z.  Other types and conversions are passed
+       across to the standard sscanf or fscanf via funs->scan, for ease of
+       implementation.  This is essential in the case of something like glibc
+       %p where the pointer format isn't actually documented.
+
+       Because funs->scan doesn't get the whole input it can't put the right
+       values in for %n, so that's handled in __gmp_doscan.  Neither sscanf
+       nor fscanf directly indicate how many characters were read, so an
+       extra %n is appended to each run for that.  For fscanf this merely
+       supports our %n output, but for sscanf it lets funs->step move us
+       along the input string.
+
+       Whitespace and literal matches in the format string, including %%,
+       are handled directly within __gmp_doscan.  This is reasonably
+       efficient, and avoids some suspicious behaviour observed in various
+       system libc's.  GLIBC 2.2.4 for instance returns 0 on
+
+          sscanf(" ", " x")
+       or
+          sscanf(" ", " x%d",&n)
+
+       whereas we think they should return EOF, since end-of-string is
+       reached when a match of "x" is required.
+
+       For standard % conversions, funs->scan is called once for each
+       conversion.  If we had vfscanf and vsscanf and could rely on their
+       fixed text matching behaviour then we could call them with multiple
+       consecutive standard conversions.  But plain fscanf and sscanf work
+       fine, and parsing one field at a time shouldn't be too much of a
+       slowdown.
+
+   gmpscan:
+
+       gmpscan reads a gmp type.  It's only used from one place, but is a
+       separate subroutine to avoid a big chunk of complicated code in the
+       middle of __gmp_doscan.  Within gmpscan a couple of loopbacks make it
+       possible to share code for parsing integers, rationals and floats.
+
+       In gmpscan normally one char of lookahead is maintained, but when width
+       is reached that stops, on the principle that an fgetc/ungetc of a char
+       past where we're told to stop would be undesirable.  "chars" is how many
+       characters have been read so far, including the current c.  When
+       chars==width and another character is desired then a jump is done to the
+       "convert" stage.  c is invalid and mustn't be unget'ed in this case;
+       chars is set to width+1 to indicate that.
+
+       gmpscan normally returns the number of characters read.  -1 means an
+       invalid field, -2 means EOF reached before any matching characters
+       were read.
+
+       For hex floats, the mantissa part is passed to mpf_set_str, then the
+       exponent is applied with mpf_mul_exp or mpf_div_2exp.  This is easier
+       than teaching mpf_set_str about an exponent factor (ie. 2) differing
+       from the mantissa radix point factor (ie. 16).  mpf_mul_exp and
+       mpf_div_2exp will preserve the application requested precision, so
+       nothing in that respect is lost by making this a two-step process.
+
+   Matching and errors:
+
+       C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
+       string which is a match for the appropriate type, or a prefix of a
+       match.  With that done, if it's only a prefix then the result is a
+       matching failure, ie. invalid input.
+
+       This rule seems fairly clear, but doesn't seem to be universally
+       applied in system C libraries.  Even GLIBC doesn't seem to get it
+       right, insofar as it seems to accept some apparently invalid forms.
+       Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
+       standard would suggest a non-empty sequence of digits should be
+       required after an "0x".
+
+       A footnote to 7.19.6.2 para 17 notes how this input item reading can
+       mean inputs acceptable to strtol are not acceptable to fscanf.  We
+       think this confirms our reading of "0x" as invalid.
+
+       Clearly gmp_sscanf could backtrack to a longest input which was a
+       valid match for a given item, but this is not done, since C99 says
+       sscanf is identical to fscanf, so we make gmp_sscanf identical to
+       gmp_fscanf.
+
+   Types:
+
+       C99 says "ll" is for long long, and "L" is for long double floats.
+       Unfortunately in GMP 4.1.1 we documented the two as equivalent.  This
+       doesn't affect us directly, since both are passed through to plain
+       scanf.  It seems wisest not to try to enforce the C99 rule.  This is
+       consistent with what we said before, though whether it actually
+       worked was always up to the C library.
+
+   Alternatives:
+
+       Consideration was given to using separate code for gmp_fscanf and
+       gmp_sscanf.  The sscanf case could zip across a string doing literal
+       matches or recognising digits in gmpscan, rather than making a
+       function call fun->get per character.  The fscanf could use getc
+       rather than fgetc too, which might help those systems where getc is a
+       macro or otherwise inlined.  But none of this scanning and converting
+       will be particularly fast, so the two are done together to keep it a
+       little simpler for now.
+
+       Various multibyte string issues are not addressed, for a start C99
+       scanf says the format string is multibyte.  Since we pass %c, %s and
+       %[ to the system scanf, they might do multibyte reads already, but
+       it's another matter whether or not that can be used, since our digit
+       and whitespace parsing is only unibyte.  The plan is to quietly
+       ignore multibyte locales for now.  This is not as bad as it sounds,
+       since GMP is presumably used mostly on numbers, which can be
+       perfectly adequately treated in plain ASCII.
+
+*/
+
+
+struct gmp_doscan_params_t {
+  int  base;
+  int  ignore;
+  char type;
+  int  width;
+};
+
+
+#define GET(c)                 \
+  do {                         \
+    ASSERT (chars <= width);   \
+    chars++;                   \
+    if (chars > width)         \
+      goto convert;            \
+    (c) = (*funs->get) (data); \
+  } while (0)
+
+/* store into "s", extending if necessary */
+#define STORE(c)                                                       \
+  do {                                                                 \
+    ASSERT (s_upto <= s_alloc);                                                \
+    if (s_upto >= s_alloc)                                             \
+      {                                                                        \
+       size_t  s_alloc_new = s_alloc + S_ALLOC_STEP;                   \
+       s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
+       s_alloc = s_alloc_new;                                          \
+      }                                                                        \
+    s[s_upto++] = c;                                                   \
+  } while (0)
+
+#define S_ALLOC_STEP  512
+
+static int
+gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
+        const struct gmp_doscan_params_t *p, void *dst)
+{
+  int    chars, c, base, first, width, seen_point, seen_digit, hexfloat;
+  size_t  s_upto, s_alloc, hexexp;
+  char   *s;
+  int    invalid = 0;
+
+  TRACE (printf ("gmpscan\n"));
+
+  ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
+
+  c = (*funs->get) (data);
+  if (c == EOF)
+    return -2;
+
+  chars = 1;
+  first = 1;
+  seen_point = 0;
+  width = (p->width == 0 ? INT_MAX-1 : p->width);
+  base = p->base;
+  s_alloc = S_ALLOC_STEP;
+  s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
+  s_upto = 0;
+  hexfloat = 0;
+  hexexp = 0;
+
+ another:
+  seen_digit = 0;
+  if (c == '-')
+    {
+      STORE (c);
+      goto get_for_sign;
+    }
+  else if (c == '+')
+    {
+      /* don't store '+', it's not accepted by mpz_set_str etc */
+    get_for_sign:
+      GET (c);
+    }
+
+  if (base == 0)
+    {
+      base = 10;                 /* decimal if no base indicator */
+      if (c == '0')
+       {
+         seen_digit = 1;         /* 0 alone is a valid number */
+         if (p->type != 'F')
+           base = 8;             /* leading 0 is octal, for non-floats */
+         STORE (c);
+         GET (c);
+         if (c == 'x' || c == 'X')
+           {
+             base = 16;
+             seen_digit = 0;     /* must have digits after an 0x */
+             if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
+               hexfloat = 1;
+             else
+               STORE (c);
+             GET (c);
+           }
+       }
+    }
+
+ digits:
+  for (;;)
+    {
+      if (base == 16)
+       {
+         if (! isxdigit (c))
+           break;
+       }
+      else
+       {
+         if (! isdigit (c))
+           break;
+         if (base == 8 && (c == '8' || c == '9'))
+           break;
+       }
+
+      seen_digit = 1;
+      STORE (c);
+      GET (c);
+    }
+
+  if (first)
+    {
+      /* decimal point */
+      if (p->type == 'F' && ! seen_point)
+       {
+         /* For a multi-character decimal point, if the first character is
+            present then all of it must be, otherwise the input is
+            considered invalid.  */
+         const char  *point = GMP_DECIMAL_POINT;
+         int         pc = (unsigned char) *point++;
+         if (c == pc)
+           {
+             for (;;)
+               {
+                 STORE (c);
+                 GET (c);
+                 pc = (unsigned char) *point++;
+                 if (pc == '\0')
+                   break;
+                 if (c != pc)
+                   goto set_invalid;
+               }
+             seen_point = 1;
+             goto digits;
+           }
+       }
+
+      /* exponent */
+      if (p->type == 'F')
+       {
+         if (hexfloat && (c == 'p' || c == 'P'))
+           {
+             hexexp = s_upto; /* exponent location */
+             base = 10;       /* exponent in decimal */
+             goto exponent;
+           }
+         else if (! hexfloat && (c == 'e' || c == 'E'))
+           {
+           exponent:
+             /* must have at least one digit in the mantissa, just an exponent
+                is not good enough */
+             if (! seen_digit)
+               goto set_invalid;
+
+           do_second:
+             first = 0;
+             STORE (c);
+             GET (c);
+             goto another;
+           }
+       }
+
+      /* denominator */
+      if (p->type == 'Q' && c == '/')
+       {
+         /* must have at least one digit in the numerator */
+         if (! seen_digit)
+           goto set_invalid;
+
+         /* now look for at least one digit in the denominator */
+         seen_digit = 0;
+
+         /* allow the base to be redetermined for "%i" */
+         base = p->base;
+         goto do_second;
+       }
+    }
+
+ convert:
+  if (! seen_digit)
+    {
+    set_invalid:
+      invalid = 1;
+      goto done;
+    }
+
+  if (! p->ignore)
+    {
+      STORE ('\0');
+      TRACE (printf (" convert \"%s\"\n", s));
+
+      /* We ought to have parsed out a valid string above, so just test
+        mpz_set_str etc with an ASSERT.  */
+      switch (p->type) {
+      case 'F':
+       {
+         mpf_ptr  f = (mpf_ptr) dst;
+         if (hexexp != 0)
+           s[hexexp] = '\0';
+         ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
+         if (hexexp != 0)
+           {
+             char *dummy;
+             long  exp;
+             exp = strtol (s + hexexp + 1, &dummy, 10);
+             if (exp >= 0)
+               mpf_mul_2exp (f, f, (unsigned long) exp);
+             else
+               mpf_div_2exp (f, f, - (unsigned long) exp);
+           }
+       }
+       break;
+      case 'Q':
+       ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
+       break;
+      case 'Z':
+       ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
+       break;
+      default:
+       ASSERT (0);
+       /*FALLTHRU*/
+       break;
+      }
+    }
+
+ done:
+  ASSERT (chars <= width+1);
+  if (chars != width+1)
+    {
+      (*funs->unget) (c, data);
+      TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
+    }
+  chars--;
+
+  (*__gmp_free_func) (s, s_alloc);
+
+  if (invalid)
+    {
+      TRACE (printf (" invalid\n"));
+      return -1;
+    }
+
+  TRACE (printf ("  return %d chars (cf width %d)\n", chars, width));
+  return chars;
+}
+
+
+/* Read and discard whitespace, if any.  Return number of chars skipped.
+   Whitespace skipping never provokes the EOF return from __gmp_doscan, so
+   it's not necessary to watch for EOF from funs->get, */
+static int
+skip_white (const struct gmp_doscan_funs_t *funs, void *data)
+{
+  int  c;
+  int  ret = 0;
+
+  do
+    {
+      c = (funs->get) (data);
+      ret++;
+    }
+  while (isspace (c));
+
+  (funs->unget) (c, data);
+  ret--;
+
+  TRACE (printf ("  skip white %d\n", ret));
+  return ret;
+}
+
+
+int
+__gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
+             const char *orig_fmt, va_list orig_ap)
+{
+  struct gmp_doscan_params_t  param;
+  va_list     ap;
+  char       *alloc_fmt;
+  const char  *fmt, *this_fmt, *end_fmt;
+  size_t      orig_fmt_len, alloc_fmt_size, len;
+  int        new_fields, new_chars;
+  char       fchar;
+  int        fields = 0;
+  int        chars = 0;
+
+  TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
+        if (funs->scan == (gmp_doscan_scan_t) sscanf)
+          printf ("  s=\"%s\"\n", * (const char **) data));
+
+  /* Don't modify orig_ap, if va_list is actually an array and hence call by
+     reference.  It could be argued that it'd be more efficient to leave
+     callers to make a copy if they care, but doing so here is going to be a
+     very small part of the total work, and we may as well keep applications
+     out of trouble.  */
+  va_copy (ap, orig_ap);
+
+  /* Parts of the format string are going to be copied so that a " %n" can
+     be appended.  alloc_fmt is some space for that.  orig_fmt_len+4 will be
+     needed if fmt consists of a single "%" specifier, but otherwise is an
+     overestimate.  We're not going to be very fast here, so use
+     __gmp_allocate_func rather than TMP_ALLOC.  */
+  orig_fmt_len = strlen (orig_fmt);
+  alloc_fmt_size = orig_fmt_len + 4;
+  alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
+
+  fmt = orig_fmt;
+  end_fmt = orig_fmt + orig_fmt_len;
+
+  for (;;)
+    {
+    next:
+      fchar = *fmt++;
+
+      if (fchar == '\0')
+       break;
+
+      if (isspace (fchar))
+       {
+         chars += skip_white (funs, data);
+         continue;
+       }
+
+      if (fchar != '%')
+       {
+         int  c;
+       literal:
+         c = (funs->get) (data);
+         if (c != fchar)
+           {
+             (funs->unget) (c, data);
+             if (c == EOF)
+               {
+               eof_no_match:
+                 if (fields == 0)
+                   fields = EOF;
+               }
+             goto done;
+           }
+         chars++;
+         continue;
+       }
+
+      param.type = '\0';
+      param.base = 0;   /* for e,f,g,i */
+      param.ignore = 0;
+      param.width = 0;
+
+      this_fmt = fmt-1;
+      TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
+
+      for (;;)
+       {
+         ASSERT (fmt <= end_fmt);
+
+         fchar = *fmt++;
+         switch (fchar) {
+
+         case '\0':  /* unterminated % sequence */
+           ASSERT (0);
+           goto done;
+
+         case '%':   /* literal % */
+           goto literal;
+
+         case '[':   /* character range */
+           fchar = *fmt++;
+           if (fchar == '^')
+             fchar = *fmt++;
+           /* ']' allowed as the first char (possibly after '^') */
+           if (fchar == ']')
+             fchar = *fmt++;
+           for (;;)
+             {
+               ASSERT (fmt <= end_fmt);
+               if (fchar == '\0')
+                 {
+                   /* unterminated % sequence */
+                   ASSERT (0);
+                   goto done;
+                 }
+               if (fchar == ']')
+                 break;
+               fchar = *fmt++;
+             }
+           /*FALLTHRU*/
+         case 'c':   /* characters */
+         case 's':   /* string of non-whitespace */
+         case 'p':   /* pointer */
+         libc_type:
+           len = fmt - this_fmt;
+           memcpy (alloc_fmt, this_fmt, len);
+           alloc_fmt[len++] = '%';
+           alloc_fmt[len++] = 'n';
+           alloc_fmt[len] = '\0';
+
+           TRACE (printf ("  scan \"%s\"\n", alloc_fmt);
+                  if (funs->scan == (gmp_doscan_scan_t) sscanf)
+                    printf ("  s=\"%s\"\n", * (const char **) data));
+
+           new_chars = -1;
+           if (param.ignore)
+             {
+               new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
+               ASSERT (new_fields == 0 || new_fields == EOF);
+             }
+           else
+             {
+               void *arg = va_arg (ap, void *);
+               new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
+               ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
+
+               if (new_fields == 0)
+                 goto done;  /* invalid input */
+
+               if (new_fields == 1)
+                 ASSERT (new_chars != -1);
+             }
+           TRACE (printf ("  new_fields %d   new_chars %d\n",
+                          new_fields, new_chars));
+
+           if (new_fields == -1)
+             goto eof_no_match;  /* EOF before anything matched */
+
+           /* Under param.ignore, when new_fields==0 we don't know if
+              it's a successful match or an invalid field.  new_chars
+              won't have been assigned if it was an invalid field.  */
+           if (new_chars == -1)
+             goto done;  /* invalid input */
+
+           chars += new_chars;
+           (*funs->step) (data, new_chars);
+
+         increment_fields:
+           if (! param.ignore)
+             fields++;
+           goto next;
+
+         case 'd':   /* decimal */
+         case 'u':   /* decimal */
+           param.base = 10;
+           goto numeric;
+
+         case 'e':   /* float */
+         case 'E':   /* float */
+         case 'f':   /* float */
+         case 'g':   /* float */
+         case 'G':   /* float */
+         case 'i':   /* integer with base marker */
+         numeric:
+           if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
+             goto libc_type;
+
+           chars += skip_white (funs, data);
+
+           new_chars = gmpscan (funs, data, &param,
+                                param.ignore ? NULL : va_arg (ap, void*));
+           if (new_chars == -2)
+             goto eof_no_match;
+           if (new_chars == -1)
+             goto done;
+
+           ASSERT (new_chars >= 0);
+           chars += new_chars;
+           goto increment_fields;
+
+         case 'a':   /* glibc allocate string */
+         case '\'':  /* glibc digit groupings */
+           break;
+
+         case 'F':   /* mpf_t */
+         case 'j':   /* intmax_t */
+         case 'L':   /* long long */
+         case 'q':   /* quad_t */
+         case 'Q':   /* mpq_t */
+         case 't':   /* ptrdiff_t */
+         case 'z':   /* size_t */
+         case 'Z':   /* mpz_t */
+         set_type:
+           param.type = fchar;
+           break;
+
+         case 'h':   /* short or char */
+           if (param.type != 'h')
+             goto set_type;
+           param.type = 'H';   /* internal code for "hh" */
+           break;
+
+           goto numeric;
+
+         case 'l':   /* long, long long, double or long double */
+           if (param.type != 'l')
+             goto set_type;
+           param.type = 'L';   /* "ll" means "L" */
+           break;
+
+         case 'n':
+           if (! param.ignore)
+             {
+               void  *p;
+               p = va_arg (ap, void *);
+               TRACE (printf ("  store %%n to %p\n", p));
+               switch (param.type) {
+               case '\0': * (int       *) p = chars; break;
+               case 'F':  mpf_set_si ((mpf_ptr) p, (long) chars); break;
+               case 'H':  * (char      *) p = chars; break;
+               case 'h':  * (short     *) p = chars; break;
+#if HAVE_INTMAX_T
+               case 'j':  * (intmax_t  *) p = chars; break;
+#else
+               case 'j':  ASSERT_FAIL (intmax_t not available); break;
+#endif
+               case 'l':  * (long      *) p = chars; break;
+#if HAVE_QUAD_T && HAVE_LONG_LONG
+               case 'q':
+                 ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
+                 /*FALLTHRU*/
+#else
+               case 'q':  ASSERT_FAIL (quad_t not available); break;
+#endif
+#if HAVE_LONG_LONG
+               case 'L':  * (long long *) p = chars; break;
+#else
+               case 'L':  ASSERT_FAIL (long long not available); break;
+#endif
+               case 'Q':  mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
+#if HAVE_PTRDIFF_T
+               case 't':  * (ptrdiff_t *) p = chars; break;
+#else
+               case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
+#endif
+               case 'z':  * (size_t    *) p = chars; break;
+               case 'Z':  mpz_set_si ((mpz_ptr) p, (long) chars); break;
+               default: ASSERT (0); break;
+               }
+             }
+           goto next;
+
+         case 'o':
+           param.base = 8;
+           goto numeric;
+
+         case 'x':
+         case 'X':
+           param.base = 16;
+           goto numeric;
+
+         case '0': case '1': case '2': case '3': case '4':
+         case '5': case '6': case '7': case '8': case '9':
+           param.width = 0;
+           do {
+             param.width = param.width * 10 + (fchar-'0');
+             fchar = *fmt++;
+           } while (isdigit (fchar));
+           fmt--; /* unget the non-digit */
+           break;
+
+         case '*':
+           param.ignore = 1;
+           break;
+
+         default:
+           /* something invalid in a % sequence */
+           ASSERT (0);
+           goto next;
+         }
+       }
+    }
+
+ done:
+  (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
+  return fields;
+}
diff --git a/scanf/fscanf.c b/scanf/fscanf.c

new file mode 100644 (file)

index 0000000..596571e
--- /dev/null
+++ b/scanf/fscanf.c
@@ -0,0 +1,57 @@
+/* gmp_fscanf -- formatted input from a FILE.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_fscanf (FILE *fp, const char *fmt, ...)
+#else
+gmp_fscanf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  FILE       *fp;
+  const char *fmt;
+  va_start (ap);
+  fp = va_arg (ap, FILE *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ret = __gmp_doscan (&__gmp_fscanf_funs, fp, fmt, ap);
+  va_end (ap);
+  return ret;
+}
diff --git a/scanf/fscanffuns.c b/scanf/fscanffuns.c

new file mode 100644 (file)

index 0000000..236aec6
--- /dev/null
+++ b/scanf/fscanffuns.c
@@ -0,0 +1,51 @@
+/* __gmp_fscanf_funs -- support for formatted input from a FILE.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* SunOS 4 stdio.h doesn't provide prototypes for these */
+#if ! HAVE_DECL_FGETC
+int fgetc __GMP_PROTO ((FILE *fp));
+#endif
+#if ! HAVE_DECL_FSCANF
+int fscanf __GMP_PROTO ((FILE *fp, const char *fmt, ...));
+#endif
+#if ! HAVE_DECL_UNGETC
+int ungetc __GMP_PROTO ((int c, FILE *fp));
+#endif
+
+
+static void
+step (FILE *fp, int n)
+{
+}
+
+const struct gmp_doscan_funs_t  __gmp_fscanf_funs = {
+  (gmp_doscan_scan_t)  fscanf,
+  (gmp_doscan_step_t)  step,
+  (gmp_doscan_get_t)   fgetc,
+  (gmp_doscan_unget_t) ungetc,
+};
diff --git a/scanf/scanf.c b/scanf/scanf.c

new file mode 100644 (file)

index 0000000..b4840d1
--- /dev/null
+++ b/scanf/scanf.c
@@ -0,0 +1,55 @@
+/* gmp_scanf -- formatted input from stdin.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_scanf (const char *fmt, ...)
+#else
+gmp_scanf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  const char *fmt;
+  va_start (ap);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  ret = __gmp_doscan (&__gmp_fscanf_funs, stdin, fmt, ap);
+  va_end (ap);
+  return ret;
+}
diff --git a/scanf/sscanf.c b/scanf/sscanf.c

new file mode 100644 (file)

index 0000000..3eb10ee
--- /dev/null
+++ b/scanf/sscanf.c
@@ -0,0 +1,62 @@
+/* gmp_sscanf -- formatted input from a string.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if HAVE_STDARG
+gmp_sscanf (const char *s, const char *fmt, ...)
+#else
+gmp_sscanf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  int      ret;
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  const char *s;
+  const char *fmt;
+  va_start (ap);
+  s = va_arg (ap, const char *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+#if SSCANF_WRITABLE_INPUT
+  /* let gmp_vsscanf handle the copying */
+  ret = gmp_vsscanf (s, fmt, ap);
+#else
+  ret = __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);
+#endif
+  va_end (ap);
+  return ret;
+}
diff --git a/scanf/sscanffuns.c b/scanf/sscanffuns.c

new file mode 100644 (file)

index 0000000..008bad7
--- /dev/null
+++ b/scanf/sscanffuns.c
@@ -0,0 +1,113 @@
+/* __gmp_sscanf_funs -- support for formatted input from a string.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2001, 2002, 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if 0
+static int
+scan (const char **sp, const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = vsscanf(*sp, fmt, ap);
+    va_end(ap);
+
+    return ret;
+}
+#else
+static int
+scan (const char **sp, const char *fmt, ...)
+{
+  va_list ap;
+  void *p1, *p2;
+  int ret;
+
+  va_start (ap, fmt);
+  p1 = va_arg (ap, void *);
+  p2 = va_arg (ap, void *);
+
+  ret = sscanf (*sp, fmt, p1, p2);
+
+  va_end (ap);
+
+  return ret;
+}
+#endif
+
+static void
+step (const char **sp, int n)
+{
+  ASSERT (n >= 0);
+
+  /* shouldn't push us past the end of the string */
+#if WANT_ASSERT
+  {
+    int  i;
+    for (i = 0; i < n; i++)
+      ASSERT ((*sp)[i] != '\0');
+  }
+#endif
+
+  (*sp) += n;
+}
+
+static int
+get (const char **sp)
+{
+  const char  *s;
+  int  c;
+  s = *sp;
+  c = (unsigned char) *s++;
+  if (c == '\0')
+    return EOF;
+  *sp = s;
+  return c;
+}
+
+static void
+unget (int c, const char **sp)
+{
+  const char  *s;
+  s = *sp;
+  if (c == EOF)
+    {
+      ASSERT (*s == '\0');
+      return;
+    }
+  s--;
+  ASSERT ((unsigned char) *s == c);
+  *sp = s;
+}
+
+const struct gmp_doscan_funs_t  __gmp_sscanf_funs = {
+  (gmp_doscan_scan_t)  scan,
+  (gmp_doscan_step_t)  step,
+  (gmp_doscan_get_t)   get,
+  (gmp_doscan_unget_t) unget,
+};
diff --git a/scanf/vfscanf.c b/scanf/vfscanf.c

new file mode 100644 (file)

index 0000000..2f8684c
--- /dev/null
+++ b/scanf/vfscanf.c
@@ -0,0 +1,38 @@
+/* gmp_vfscanf -- formatted input from a FILE.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vfscanf (FILE *fp, const char *fmt, va_list ap)
+{
+  return __gmp_doscan (&__gmp_fscanf_funs, fp, fmt, ap);
+}
diff --git a/scanf/vscanf.c b/scanf/vscanf.c

new file mode 100644 (file)

index 0000000..a411a7c
--- /dev/null
+++ b/scanf/vscanf.c
@@ -0,0 +1,38 @@
+/* gmp_vscanf -- formatted input from stdin.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vscanf (const char *fmt, va_list ap)
+{
+  return __gmp_doscan (&__gmp_fscanf_funs, stdin, fmt, ap);
+}
diff --git a/scanf/vsscanf.c b/scanf/vsscanf.c

new file mode 100644 (file)

index 0000000..160e62d
--- /dev/null
+++ b/scanf/vsscanf.c
@@ -0,0 +1,56 @@
+/* gmp_vsscanf -- formatted input from a string.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+gmp_vsscanf (const char *s, const char *fmt, va_list ap)
+{
+#if SSCANF_WRITABLE_INPUT
+  /* We only actually need this if there's standard C types in fmt, and if
+     "s" is not already writable, but it's too much trouble to check that,
+     and in any case this writable sscanf input business is only for a few
+     old systems. */
+  size_t size;
+  char   *alloc;
+  int    ret;
+  size = strlen (s) + 1;
+  alloc = (char *) (*__gmp_allocate_func) (size);
+  memcpy (alloc, s, size);
+  s = alloc;
+  ret = __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);
+  (*__gmp_free_func) (alloc, size);
+  return ret;
+
+#else
+  return __gmp_doscan (&__gmp_sscanf_funs, (void *) &s, fmt, ap);
+#endif
+}
diff --git a/tal-debug.c b/tal-debug.c

new file mode 100644 (file)

index 0000000..7e0917f
--- /dev/null
+++ b/tal-debug.c
@@ -0,0 +1,140 @@
+/* TMP_ALLOC routines for debugging.
+
+Copyright 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* This method aims to help a malloc debugger find problems.  A linked list
+   of allocated block is kept for TMP_FREE to release.  This is reentrant
+   and thread safe.
+
+   Each TMP_ALLOC is a separate malloced block, so redzones or sentinels
+   applied by a malloc debugger either above or below can guard against
+   accesses outside the allocated area.
+
+   A marker is a "struct tmp_debug_t *" so that TMP_DECL can initialize it
+   to NULL and we can detect TMP_ALLOC without TMP_MARK.
+
+   It will work to realloc an MPZ_TMP_INIT variable, but when TMP_FREE comes
+   to release the memory it will have the old size, thereby triggering an
+   error from tests/memory.c.
+
+   Possibilities:
+
+   It'd be possible to keep a global list of active "struct tmp_debug_t"
+   records, so at the end of a program any TMP leaks could be printed.  But
+   if only a couple of routines are under test at any one time then the
+   likely culprit should be easy enough to spot.  */
+
+
+void
+__gmp_tmp_debug_mark (const char *file, int line,
+                      struct tmp_debug_t **markp, struct tmp_debug_t *mark,
+                      const char *decl_name, const char *mark_name)
+{
+  if (strcmp (mark_name, decl_name) != 0)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_MARK(%s) but TMP_DECL(%s) is in scope\n",
+               mark_name, decl_name);
+      abort ();
+    }
+
+  if (*markp != NULL)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: Repeat of TMP_MARK(%s)\n", mark_name);
+      if (mark->file != NULL && mark->file[0] != '\0' && mark->line != -1)
+        {
+          __gmp_assert_header (mark->file, mark->line);
+          fprintf (stderr, "previous was here\n");
+        }
+      abort ();
+    }
+
+  *markp = mark;
+  mark->file = file;
+  mark->line = line;
+  mark->list = NULL;
+}
+
+void *
+__gmp_tmp_debug_alloc (const char *file, int line, int dummy,
+                       struct tmp_debug_t **markp,
+                       const char *decl_name, size_t size)
+{
+  struct tmp_debug_t        *mark = *markp;
+  struct tmp_debug_entry_t  *p;
+
+  ASSERT_ALWAYS (size >= 1);
+
+  if (mark == NULL)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_ALLOC without TMP_MARK(%s)\n", decl_name);
+      abort ();
+    }
+
+  p = __GMP_ALLOCATE_FUNC_TYPE (1, struct tmp_debug_entry_t);
+  p->size = size;
+  p->block = (*__gmp_allocate_func) (size);
+  p->next = mark->list;
+  mark->list = p;
+  return p->block;
+}
+
+void
+__gmp_tmp_debug_free (const char *file, int line, int dummy,
+                      struct tmp_debug_t **markp,
+                      const char *decl_name, const char *free_name)
+{
+  struct tmp_debug_t        *mark = *markp;
+  struct tmp_debug_entry_t  *p, *next;
+
+  if (mark == NULL)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_FREE(%s) without TMP_MARK(%s)\n",
+               free_name, decl_name);
+      abort ();
+    }
+
+  if (strcmp (free_name, decl_name) != 0)
+    {
+      __gmp_assert_header (file, line);
+      fprintf (stderr, "GNU MP: TMP_FREE(%s) when TMP_DECL(%s) is in scope\n",
+               free_name, decl_name);
+      abort ();
+    }
+
+  p = mark->list;
+  while (p != NULL)
+    {
+      next = p->next;
+      (*__gmp_free_func) (p->block, p->size);
+      __GMP_FREE_FUNC_TYPE (p, 1, struct tmp_debug_entry_t);
+      p = next;
+    }
+
+  *markp = NULL;
+}
diff --git a/tal-notreent.c b/tal-notreent.c

new file mode 100644 (file)

index 0000000..662d4ea
--- /dev/null
+++ b/tal-notreent.c
@@ -0,0 +1,119 @@
+/* Stack allocation routines.  This is intended for machines without support
+   for the `alloca' function.
+
+Copyright 1996, 1997, 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+struct tmp_stack
+{
+  void *end;
+  void *alloc_point;
+  struct tmp_stack *prev;
+};
+typedef struct tmp_stack tmp_stack;
+
+
+static unsigned long max_total_allocation = 0;
+static unsigned long current_total_allocation = 0;
+
+static tmp_stack xxx = {&xxx, &xxx, 0};
+static tmp_stack *current = &xxx;
+
+/* The rounded size of the header of each allocation block.  */
+#define HSIZ   ROUND_UP_MULTIPLE (sizeof (tmp_stack), __TMP_ALIGN)
+
+
+/* Allocate a block of exactly <size> bytes.  This should only be called
+   through the TMP_ALLOC macro, which takes care of rounding/alignment.  */
+void *
+__gmp_tmp_alloc (unsigned long size)
+{
+  void *that;
+
+  ASSERT ((size % __TMP_ALIGN) == 0);
+  ASSERT (((unsigned) current->alloc_point % __TMP_ALIGN) == 0);
+
+  if (size > (char *) current->end - (char *) current->alloc_point)
+    {
+      void *chunk;
+      tmp_stack *header;
+      unsigned long chunk_size;
+      unsigned long now;
+
+      /* Allocate a chunk that makes the total current allocation somewhat
+        larger than the maximum allocation ever.  If size is very large, we
+        allocate that much.  */
+
+      now = current_total_allocation + size;
+      if (now > max_total_allocation)
+       {
+         /* We need more temporary memory than ever before.  Increase
+            for future needs.  */
+         now = (now * 3 / 2 + __TMP_ALIGN - 1) & -__TMP_ALIGN;
+         chunk_size = now - current_total_allocation + HSIZ;
+         current_total_allocation = now;
+         max_total_allocation = current_total_allocation;
+       }
+      else
+       {
+         chunk_size = max_total_allocation - current_total_allocation + HSIZ;
+         current_total_allocation = max_total_allocation;
+       }
+
+      chunk = (*__gmp_allocate_func) (chunk_size);
+      header = (tmp_stack *) chunk;
+      header->end = (char *) chunk + chunk_size;
+      header->alloc_point = (char *) chunk + HSIZ;
+      header->prev = current;
+      current = header;
+    }
+
+  that = current->alloc_point;
+  current->alloc_point = (char *) that + size;
+  ASSERT (((unsigned) that % __TMP_ALIGN) == 0);
+  return that;
+}
+
+/* Typically called at function entry.  <mark> is assigned so that
+   __gmp_tmp_free can later be used to reclaim all subsequently allocated
+   storage.  */
+void
+__gmp_tmp_mark (struct tmp_marker *mark)
+{
+  mark->which_chunk = current;
+  mark->alloc_point = current->alloc_point;
+}
+
+/* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */
+void
+__gmp_tmp_free (struct tmp_marker *mark)
+{
+  while (mark->which_chunk != current)
+    {
+      tmp_stack *tmp;
+
+      tmp = current;
+      current = tmp->prev;
+      current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);
+      (*__gmp_free_func) (tmp, (char *) tmp->end - (char *) tmp);
+    }
+  current->alloc_point = mark->alloc_point;
+}
diff --git a/tal-reent.c b/tal-reent.c

new file mode 100644 (file)

index 0000000..32db934
--- /dev/null
+++ b/tal-reent.c
@@ -0,0 +1,71 @@
+/* TMP_ALLOC routines using malloc in a reentrant fashion.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Each TMP_ALLOC uses __gmp_allocate_func to get a block of memory of the
+   size requested, plus a header at the start which is used to hold the
+   blocks on a linked list in the marker variable, ready for TMP_FREE to
+   release.
+
+   Callers should try to do multiple allocs with one call, in the style of
+   TMP_ALLOC_LIMBS_2 if it's easy to arrange, since that will keep down the
+   number of separate malloc calls.
+
+   Enhancements:
+
+   Could inline both TMP_ALLOC and TMP_FREE, though TMP_ALLOC would need the
+   compiler to have "inline" since it returns a value.  The calls to malloc
+   will be slow though, so it hardly seems worth worrying about one extra
+   level of function call.  */
+
+
+#define HSIZ   ROUND_UP_MULTIPLE (sizeof (struct tmp_reentrant_t), __TMP_ALIGN)
+
+void *
+__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **markp, size_t size)
+{
+  char    *p;
+  size_t  total_size;
+
+#define P   ((struct tmp_reentrant_t *) p)
+
+  total_size = size + HSIZ;
+  p = (*__gmp_allocate_func) (total_size);
+  P->size = total_size;
+  P->next = *markp;
+  *markp = P;
+  return p + HSIZ;
+}
+
+void
+__gmp_tmp_reentrant_free (struct tmp_reentrant_t *mark)
+{
+  struct tmp_reentrant_t  *next;
+
+  while (mark != NULL)
+    {
+      next = mark->next;
+      (*__gmp_free_func) ((char *) mark, mark->size);
+      mark = next;
+    }
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am

new file mode 100644 (file)

index 0000000..30975cb
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,38 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx mpbsd
+
+include ../mpn/Makeasm.am
+
+INCLUDES = -I$(top_srcdir)
+LDADD = libtests.la $(top_builddir)/libgmp.la
+
+check_LTLIBRARIES = libtests.la
+
+EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c
+libtests_la_SOURCES = tests.h \
+  memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c
+libtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@
+libtests_la_LIBADD = $(libtests_la_DEPENDENCIES) $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-bswap t-constants t-count_zeros t-gmpmax t-hightomask \
+  t-modlinv t-popc t-parity t-sub
+TESTS = $(check_PROGRAMS)
diff --git a/tests/Makefile.in b/tests/Makefile.in

new file mode 100644 (file)

index 0000000..76ac384
--- /dev/null
+++ b/tests/Makefile.in
@@ -0,0 +1,1011 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+DIST_COMMON = $(srcdir)/../mpn/Makeasm.am $(srcdir)/Makefile.am \
+       $(srcdir)/Makefile.in
+check_PROGRAMS = t-bswap$(EXEEXT) t-constants$(EXEEXT) \
+       t-count_zeros$(EXEEXT) t-gmpmax$(EXEEXT) t-hightomask$(EXEEXT) \
+       t-modlinv$(EXEEXT) t-popc$(EXEEXT) t-parity$(EXEEXT) \
+       t-sub$(EXEEXT)
+subdir = tests
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__DEPENDENCIES_1 =
+am_libtests_la_OBJECTS = memory$U.lo misc$U.lo refmpf$U.lo refmpn$U.lo \
+       refmpq$U.lo refmpz$U.lo spinner$U.lo trace$U.lo
+libtests_la_OBJECTS = $(am_libtests_la_OBJECTS)
+t_bswap_SOURCES = t-bswap.c
+t_bswap_OBJECTS = t-bswap$U.$(OBJEXT)
+t_bswap_LDADD = $(LDADD)
+t_bswap_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_constants_SOURCES = t-constants.c
+t_constants_OBJECTS = t-constants$U.$(OBJEXT)
+t_constants_LDADD = $(LDADD)
+t_constants_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_count_zeros_SOURCES = t-count_zeros.c
+t_count_zeros_OBJECTS = t-count_zeros$U.$(OBJEXT)
+t_count_zeros_LDADD = $(LDADD)
+t_count_zeros_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_gmpmax_SOURCES = t-gmpmax.c
+t_gmpmax_OBJECTS = t-gmpmax$U.$(OBJEXT)
+t_gmpmax_LDADD = $(LDADD)
+t_gmpmax_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_hightomask_SOURCES = t-hightomask.c
+t_hightomask_OBJECTS = t-hightomask$U.$(OBJEXT)
+t_hightomask_LDADD = $(LDADD)
+t_hightomask_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_modlinv_SOURCES = t-modlinv.c
+t_modlinv_OBJECTS = t-modlinv$U.$(OBJEXT)
+t_modlinv_LDADD = $(LDADD)
+t_modlinv_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_parity_SOURCES = t-parity.c
+t_parity_OBJECTS = t-parity$U.$(OBJEXT)
+t_parity_LDADD = $(LDADD)
+t_parity_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_popc_SOURCES = t-popc.c
+t_popc_OBJECTS = t-popc$U.$(OBJEXT)
+t_popc_LDADD = $(LDADD)
+t_popc_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+t_sub_SOURCES = t-sub.c
+t_sub_OBJECTS = t-sub$U.$(OBJEXT)
+t_sub_LDADD = $(LDADD)
+t_sub_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libtests_la_SOURCES) $(EXTRA_libtests_la_SOURCES) \
+       t-bswap.c t-constants.c t-count_zeros.c t-gmpmax.c \
+       t-hightomask.c t-modlinv.c t-parity.c t-popc.c t-sub.c
+DIST_SOURCES = $(libtests_la_SOURCES) $(EXTRA_libtests_la_SOURCES) \
+       t-bswap.c t-constants.c t-count_zeros.c t-gmpmax.c \
+       t-hightomask.c t-modlinv.c t-parity.c t-popc.c t-sub.c
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+       html-recursive info-recursive install-data-recursive \
+       install-dvi-recursive install-exec-recursive \
+       install-html-recursive install-info-recursive \
+       install-pdf-recursive install-ps-recursive install-recursive \
+       installcheck-recursive installdirs-recursive pdf-recursive \
+       ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive        \
+  distclean-recursive maintainer-clean-recursive
+AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
+       $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
+       distdir
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx mpbsd
+
+# COMPILE minus CC.
+#
+COMPILE_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(ASMFLAGS)
+
+
+# Flags used for preprocessing (in ansi2knr rules).
+#
+PREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS)
+
+
+# Recent versions of automake (1.5 and up for instance) append automake
+# generated suffixes to this $(SUFFIXES) list.  This is essential for us,
+# since .c must come after .s, .S and .asm.  If .c is before .s, for
+# instance, then in the mpn directory "make" will see add_n.c mentioned in
+# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,
+# even if add_n.c doesn't exist but add_n.s does.  See GNU make
+# documentation "(make)Implicit Rule Search", part 5c.
+#
+# On IRIX 6 native make this doesn't work properly though.  Somehow .c
+# remains ahead of .s, perhaps because .c.s is a builtin rule.  .asm works
+# fine though, and mpn/mips3 uses this.
+#
+SUFFIXES = .s .S .asm
+
+# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+RM_TMP = rm -f
+INCLUDES = -I$(top_srcdir)
+LDADD = libtests.la $(top_builddir)/libgmp.la
+check_LTLIBRARIES = libtests.la
+EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c
+libtests_la_SOURCES = tests.h \
+  memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c
+
+libtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@
+libtests_la_LIBADD = $(libtests_la_DEPENDENCIES) $(top_builddir)/libgmp.la
+TESTS = $(check_PROGRAMS)
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .s .S .asm .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/../mpn/Makeasm.am $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkLTLIBRARIES:
+       -test -z "$(check_LTLIBRARIES)" || rm -f $(check_LTLIBRARIES)
+       @list='$(check_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+libtests.la: $(libtests_la_OBJECTS) $(libtests_la_DEPENDENCIES) 
+       $(LINK)  $(libtests_la_OBJECTS) $(libtests_la_LIBADD) $(LIBS)
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+t-bswap$(EXEEXT): $(t_bswap_OBJECTS) $(t_bswap_DEPENDENCIES) 
+       @rm -f t-bswap$(EXEEXT)
+       $(LINK) $(t_bswap_OBJECTS) $(t_bswap_LDADD) $(LIBS)
+t-constants$(EXEEXT): $(t_constants_OBJECTS) $(t_constants_DEPENDENCIES) 
+       @rm -f t-constants$(EXEEXT)
+       $(LINK) $(t_constants_OBJECTS) $(t_constants_LDADD) $(LIBS)
+t-count_zeros$(EXEEXT): $(t_count_zeros_OBJECTS) $(t_count_zeros_DEPENDENCIES) 
+       @rm -f t-count_zeros$(EXEEXT)
+       $(LINK) $(t_count_zeros_OBJECTS) $(t_count_zeros_LDADD) $(LIBS)
+t-gmpmax$(EXEEXT): $(t_gmpmax_OBJECTS) $(t_gmpmax_DEPENDENCIES) 
+       @rm -f t-gmpmax$(EXEEXT)
+       $(LINK) $(t_gmpmax_OBJECTS) $(t_gmpmax_LDADD) $(LIBS)
+t-hightomask$(EXEEXT): $(t_hightomask_OBJECTS) $(t_hightomask_DEPENDENCIES) 
+       @rm -f t-hightomask$(EXEEXT)
+       $(LINK) $(t_hightomask_OBJECTS) $(t_hightomask_LDADD) $(LIBS)
+t-modlinv$(EXEEXT): $(t_modlinv_OBJECTS) $(t_modlinv_DEPENDENCIES) 
+       @rm -f t-modlinv$(EXEEXT)
+       $(LINK) $(t_modlinv_OBJECTS) $(t_modlinv_LDADD) $(LIBS)
+t-parity$(EXEEXT): $(t_parity_OBJECTS) $(t_parity_DEPENDENCIES) 
+       @rm -f t-parity$(EXEEXT)
+       $(LINK) $(t_parity_OBJECTS) $(t_parity_LDADD) $(LIBS)
+t-popc$(EXEEXT): $(t_popc_OBJECTS) $(t_popc_DEPENDENCIES) 
+       @rm -f t-popc$(EXEEXT)
+       $(LINK) $(t_popc_OBJECTS) $(t_popc_LDADD) $(LIBS)
+t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) 
+       @rm -f t-sub$(EXEEXT)
+       $(LINK) $(t_sub_OBJECTS) $(t_sub_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+amd64check_.c: amd64check.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/amd64check.c; then echo $(srcdir)/amd64check.c; else echo amd64check.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+memory_.c: memory.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+misc_.c: misc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/misc.c; then echo $(srcdir)/misc.c; else echo misc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+refmpf_.c: refmpf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpf.c; then echo $(srcdir)/refmpf.c; else echo refmpf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+refmpn_.c: refmpn.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpn.c; then echo $(srcdir)/refmpn.c; else echo refmpn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+refmpq_.c: refmpq.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpq.c; then echo $(srcdir)/refmpq.c; else echo refmpq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+refmpz_.c: refmpz.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpz.c; then echo $(srcdir)/refmpz.c; else echo refmpz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+spinner_.c: spinner.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/spinner.c; then echo $(srcdir)/spinner.c; else echo spinner.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-bswap_.c: t-bswap.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bswap.c; then echo $(srcdir)/t-bswap.c; else echo t-bswap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-constants_.c: t-constants.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-constants.c; then echo $(srcdir)/t-constants.c; else echo t-constants.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-count_zeros_.c: t-count_zeros.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-count_zeros.c; then echo $(srcdir)/t-count_zeros.c; else echo t-count_zeros.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-gmpmax_.c: t-gmpmax.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gmpmax.c; then echo $(srcdir)/t-gmpmax.c; else echo t-gmpmax.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-hightomask_.c: t-hightomask.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hightomask.c; then echo $(srcdir)/t-hightomask.c; else echo t-hightomask.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-modlinv_.c: t-modlinv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-modlinv.c; then echo $(srcdir)/t-modlinv.c; else echo t-modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-parity_.c: t-parity.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-parity.c; then echo $(srcdir)/t-parity.c; else echo t-parity.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-popc_.c: t-popc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-popc.c; then echo $(srcdir)/t-popc.c; else echo t-popc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sub_.c: t-sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sub.c; then echo $(srcdir)/t-sub.c; else echo t-sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+trace_.c: trace.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/trace.c; then echo $(srcdir)/trace.c; else echo trace.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+x86check_.c: x86check.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/x86check.c; then echo $(srcdir)/x86check.c; else echo x86check.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+amd64check_.$(OBJEXT) amd64check_.lo memory_.$(OBJEXT) memory_.lo \
+misc_.$(OBJEXT) misc_.lo refmpf_.$(OBJEXT) refmpf_.lo \
+refmpn_.$(OBJEXT) refmpn_.lo refmpq_.$(OBJEXT) refmpq_.lo \
+refmpz_.$(OBJEXT) refmpz_.lo spinner_.$(OBJEXT) spinner_.lo \
+t-bswap_.$(OBJEXT) t-bswap_.lo t-constants_.$(OBJEXT) t-constants_.lo \
+t-count_zeros_.$(OBJEXT) t-count_zeros_.lo t-gmpmax_.$(OBJEXT) \
+t-gmpmax_.lo t-hightomask_.$(OBJEXT) t-hightomask_.lo \
+t-modlinv_.$(OBJEXT) t-modlinv_.lo t-parity_.$(OBJEXT) t-parity_.lo \
+t-popc_.$(OBJEXT) t-popc_.lo t-sub_.$(OBJEXT) t-sub_.lo \
+trace_.$(OBJEXT) trace_.lo x86check_.$(OBJEXT) x86check_.lo : \
+$(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+       @fail= failcom='exit 1'; \
+       for f in x $$MAKEFLAGS; do \
+         case $$f in \
+           *=* | --[!k]*);; \
+           *k*) failcom='fail=yes';; \
+         esac; \
+       done; \
+       dot_seen=no; \
+       target=`echo $@ | sed s/-recursive//`; \
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         echo "Making $$target in $$subdir"; \
+         if test "$$subdir" = "."; then \
+           dot_seen=yes; \
+           local_target="$$target-am"; \
+         else \
+           local_target="$$target"; \
+         fi; \
+         ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+         || eval $$failcom; \
+       done; \
+       if test "$$dot_seen" = "no"; then \
+         $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+       fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+       @fail= failcom='exit 1'; \
+       for f in x $$MAKEFLAGS; do \
+         case $$f in \
+           *=* | --[!k]*);; \
+           *k*) failcom='fail=yes';; \
+         esac; \
+       done; \
+       dot_seen=no; \
+       case "$@" in \
+         distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+         *) list='$(SUBDIRS)' ;; \
+       esac; \
+       rev=''; for subdir in $$list; do \
+         if test "$$subdir" = "."; then :; else \
+           rev="$$subdir $$rev"; \
+         fi; \
+       done; \
+       rev="$$rev ."; \
+       target=`echo $@ | sed s/-recursive//`; \
+       for subdir in $$rev; do \
+         echo "Making $$target in $$subdir"; \
+         if test "$$subdir" = "."; then \
+           local_target="$$target-am"; \
+         else \
+           local_target="$$target"; \
+         fi; \
+         ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+         || eval $$failcom; \
+       done && test -z "$$fail"
+tags-recursive:
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+       done
+ctags-recursive:
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+       done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+         include_option=--etags-include; \
+         empty_fix=.; \
+       else \
+         include_option=--include; \
+         empty_fix=; \
+       fi; \
+       list='$(SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           test ! -f $$subdir/TAGS || \
+             set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+         fi; \
+       done; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           test -d "$(distdir)/$$subdir" \
+           || $(MKDIR_P) "$(distdir)/$$subdir" \
+           || exit 1; \
+         fi; \
+       done
+       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+         if test "$$subdir" = .; then :; else \
+           dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+           $(am__relativize); \
+           new_distdir=$$reldir; \
+           dir1=$$subdir; dir2="$(top_distdir)"; \
+           $(am__relativize); \
+           new_top_distdir=$$reldir; \
+           echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+           echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+           ($(am__cd) $$subdir && \
+             $(MAKE) $(AM_MAKEFLAGS) \
+               top_distdir="$$new_top_distdir" \
+               distdir="$$new_distdir" \
+               am__remove_distdir=: \
+               am__skip_length_check=: \
+               am__skip_mode_fix=: \
+               distdir) \
+             || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_LTLIBRARIES) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-checkLTLIBRARIES clean-checkPROGRAMS clean-generic \
+       clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
+       $(top_builddir)/ansi2knr check-am ctags-recursive install-am \
+       install-strip tags-recursive
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+       all all-am check check-TESTS check-am clean \
+       clean-checkLTLIBRARIES clean-checkPROGRAMS clean-generic \
+       clean-libtool ctags ctags-recursive distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       installdirs-am maintainer-clean maintainer-clean-generic \
+       mostlyclean mostlyclean-compile mostlyclean-generic \
+       mostlyclean-kr mostlyclean-libtool pdf pdf-am ps ps-am tags \
+       tags-recursive uninstall uninstall-am
+
+
+# .s assembler, no preprocessing.
+#
+.s.o:
+       $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+.s.obj:
+       $(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+.s.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .S assembler, preprocessed with cpp.
+#
+# It's necessary to run $(CPP) separately, since it seems not all compilers
+# recognise .S files, in particular "cc" on HP-UX 10 and 11 doesn't (and
+# will silently do nothing if given a .S).
+#
+# For .lo we need a helper script, as described below for .asm.lo.
+#
+.S.o:
+       $(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.obj:
+       $(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp="$(CPP) $(PREPROCESS_FLAGS)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .asm assembler, preprocessed with m4.
+#
+# .o and .obj are non-PIC and just need m4 followed by a compile.
+#
+# .lo is a bit tricky.  Libtool (as of version 1.5) has foo.lo as a little
+# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,
+# respectively.  It'd be asking for lots of trouble to try to create foo.lo
+# ourselves, so instead arrange to invoke libtool like a --mode=compile, but
+# with a special m4-ccas script which first m4 preprocesses, then compiles.
+# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.
+#
+# Libtool adds -DPIC when building a shared object and the .asm files look
+# for that.  But it should be noted that the other PIC flags are on occasion
+# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before
+# it accepts PIC constructs like @GOT, and gcc adds that flag only under
+# -fPIC.  (Later versions of gas are happy to accept PIC stuff any time.)
+#
+.asm.o:
+       $(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.obj:
+       $(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/amd64call.asm b/tests/amd64call.asm

new file mode 100644 (file)

index 0000000..f23f476
--- /dev/null
+++ b/tests/amd64call.asm
@@ -0,0 +1,165 @@
+dnl  AMD64 calling conventions checking.
+
+dnl  Copyright 2000, 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C void x86_fldcw (unsigned short cw);
+C
+C Execute an fldcw, setting the x87 control word to cw.
+
+PROLOGUE(x86_fldcw)
+       movq    %rdi, -8(%rsp)
+       fldcw   -8(%rsp)
+       ret
+EPILOGUE()
+
+
+C unsigned short x86_fstcw (void);
+C
+C Execute an fstcw, returning the current x87 control word.
+
+PROLOGUE(x86_fstcw)
+        movq   $0, -8(%rsp)
+        fstcw  -8(%rsp)
+        movq   -8(%rsp), %rax
+       ret
+EPILOGUE()
+
+
+dnl  Instrumented profiling won't come out quite right below, since we don't
+dnl  do an actual "ret".  There's only a few instructions here, so there's
+dnl  no great need to get them separately accounted, just let them get
+dnl  attributed to the caller.
+
+ifelse(WANT_PROFILING,instrument,
+`define(`WANT_PROFILING',no)')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+C
+C Perhaps the finit should be done only if the tags word isn't clear, but
+C nothing uses the rounding mode or anything at the moment.
+
+define(`WANT_RBX', eval(8*0)($1))
+define(`WANT_RBP', eval(8*1)($1))
+define(`WANT_R12', eval(8*2)($1))
+define(`WANT_R13', eval(8*3)($1))
+define(`WANT_R14', eval(8*4)($1))
+define(`WANT_R15', eval(8*5)($1))
+
+define(`JUNK_RAX', eval(8*6)($1))
+define(`JUNK_R10', eval(8*7)($1))
+define(`JUNK_R11', eval(8*8)($1))
+
+define(`SAVE_RBX', eval(8*9)($1))
+define(`SAVE_RBP', eval(8*10)($1))
+define(`SAVE_R12', eval(8*11)($1))
+define(`SAVE_R13', eval(8*12)($1))
+define(`SAVE_R14', eval(8*13)($1))
+define(`SAVE_R15', eval(8*14)($1))
+
+define(`RETADDR',  eval(8*15)($1))
+
+define(`RBX',     eval(8*16)($1))
+define(`RBP',     eval(8*17)($1))
+define(`R12',     eval(8*18)($1))
+define(`R13',     eval(8*19)($1))
+define(`R14',     eval(8*20)($1))
+define(`R15',     eval(8*21)($1))
+define(`RFLAGS',   eval(8*22)($1))
+
+
+define(G,
+m4_assert_numargs(1)
+`GSYM_PREFIX`'$1')
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(calling_conventions)
+       push    %rdi
+       movq    G(calling_conventions_values)@GOTPCREL(%rip), %rdi
+
+       movq    8(%rsp), %rax
+       movq    %rax, RETADDR(%rdi)
+
+       leaq    L(return)(%rip), %rax
+       movq    %rax, 8(%rsp)
+
+       movq    %rbx, SAVE_RBX(%rdi)
+       movq    %rbp, SAVE_RBP(%rdi)
+       movq    %r12, SAVE_R12(%rdi)
+       movq    %r13, SAVE_R13(%rdi)
+       movq    %r14, SAVE_R14(%rdi)
+       movq    %r15, SAVE_R15(%rdi)
+
+       C values we expect to see unchanged, as per amd64check.c
+       movq    WANT_RBX(%rdi), %rbx
+       movq    WANT_RBP(%rdi), %rbp
+       movq    WANT_R12(%rdi), %r12
+       movq    WANT_R13(%rdi), %r13
+       movq    WANT_R14(%rdi), %r14
+       movq    WANT_R15(%rdi), %r15
+
+       C Try to provoke a problem by starting with junk in the registers,
+       C especially %rax which will be the return value.
+       C
+       C ENHANCE-ME: If we knew how many of the parameter registers were
+       C actually being used we could put junk in the rest.  Maybe we could
+       C get try.c to communicate this to us.
+C      movq    JUNK_RAX(%rdi), %rax            C overwritten below anyway
+       movq    JUNK_R10(%rdi), %r10
+       movq    JUNK_R11(%rdi), %r11
+
+       movq    G(calling_conventions_function)@GOTPCREL(%rip), %rax
+       pop     %rdi
+       jmp     *(%rax)
+
+L(return):
+       movq    G(calling_conventions_values)@GOTPCREL(%rip), %rdi
+
+       movq    %rbx, RBX(%rdi)
+       movq    %rbp, RBP(%rdi)
+       movq    %r12, R12(%rdi)
+       movq    %r13, R13(%rdi)
+       movq    %r14, R14(%rdi)
+       movq    %r15, R15(%rdi)
+
+       pushfq
+       popq    %rbx
+       movq    %rbx, RFLAGS(%rdi)
+
+       movq    G(calling_conventions_fenv)@GOTPCREL(%rip), %rbx
+       fstenv  (%rbx)
+       finit
+
+       movq    SAVE_RBX(%rdi), %rbx
+       movq    SAVE_RBP(%rdi), %rbp
+       movq    SAVE_R12(%rdi), %r12
+       movq    SAVE_R13(%rdi), %r13
+       movq    SAVE_R14(%rdi), %r14
+       movq    SAVE_R15(%rdi), %r15
+
+       jmp     *RETADDR(%rdi)
+
+EPILOGUE()
diff --git a/tests/amd64check.c b/tests/amd64check.c

new file mode 100644 (file)

index 0000000..46eaae0
--- /dev/null
+++ b/tests/amd64check.c
@@ -0,0 +1,106 @@
+/* AMD64 calling conventions checking.
+
+Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Vector if constants and register values.  We use one vector to allow access
+   via a base pointer, very beneficial for the PIC-enabled amd64call.asm.  */
+mp_limb_t calling_conventions_values[23] =
+{
+  CNST_LIMB(0x1234567887654321),       /* want_rbx */
+  CNST_LIMB(0x89ABCDEFFEDCBA98),       /* want_rbp */
+  CNST_LIMB(0xDEADBEEFBADECAFE),       /* want_r12 */
+  CNST_LIMB(0xFFEEDDCCBBAA9988),       /* want_r13 */
+  CNST_LIMB(0x0011223344556677),       /* want_r14 */
+  CNST_LIMB(0x1234432156788765),       /* want_r15 */
+
+  CNST_LIMB(0xFEEDABBACAAFBEED),       /* JUNK_RAX */
+  CNST_LIMB(0xAB78DE89FF5125BB),       /* JUNK_R10 */
+  CNST_LIMB(0x1238901890189031)                /* JUNK_R11 */
+
+  /* rest of array used for dynamic values.  */
+};
+
+/* Index starts for various regions in above vector.  */
+#define WANT   0
+#define JUNK   6
+#define SAVE   9
+#define RETADDR        15
+#define VAL    16
+#define RFLAGS 22
+
+/* values to check */
+struct {
+  int  control;
+  int  status;
+  int  tag;
+  int  other[4];
+} calling_conventions_fenv;
+
+
+char *regname[6] = {"rbx", "rbp", "r12", "r13", "r14", "r15"};
+
+#define DIR_BIT(rflags)   (((rflags) & (1<<10)) != 0)
+
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+  const char  *header = "Violated calling conventions:\n";
+  int  ret = 1;
+  int i;
+
+#define CHECK(callreg, regstr, value)                  \
+  if (callreg != value)                                        \
+    {                                                  \
+      printf ("%s   %s got 0x%016lX want 0x%016lX\n",  \
+             header, regstr, callreg, value);          \
+      header = "";                                     \
+      ret = 0;                                         \
+    }
+
+  for (i = 0; i < 6; i++)
+    {
+      CHECK (calling_conventions_values[VAL+i], regname[i], calling_conventions_values[WANT+i]);
+    }
+
+  if (DIR_BIT (calling_conventions_values[RFLAGS]) != 0)
+    {
+      printf ("%s   rflags dir bit  got %d want 0\n",
+             header, DIR_BIT (calling_conventions_values[RFLAGS]));
+      header = "";
+      ret = 0;
+    }
+
+  if ((calling_conventions_fenv.tag & 0xFFFF) != 0xFFFF)
+    {
+      printf ("%s   fpu tags  got 0x%X want 0xFFFF\n",
+             header, calling_conventions_fenv.tag & 0xFFFF);
+      header = "";
+      ret = 0;
+    }
+
+  return ret;
+}
diff --git a/tests/cxx/Makefile.am b/tests/cxx/Makefile.am

new file mode 100644 (file)

index 0000000..41764ee
--- /dev/null
+++ b/tests/cxx/Makefile.am
@@ -0,0 +1,68 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# LDADD has an explicit -L of $(top_builddir)/.libs for the benefit of gcc
+# 3.2 on itanium2-hp-hpux11.22.  Without this option, the libgmp.sl.6
+# required by libgmpxx.sl (ie. in its NEEDED records) is not found by the
+# linker.  FIXME: Presumably libtool should do something about this itself.
+#
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = -L$(top_builddir)/.libs \
+  $(top_builddir)/tests/libtests.la \
+  $(top_builddir)/libgmpxx.la \
+  $(top_builddir)/libgmp.la
+
+if WANT_CXX
+check_PROGRAMS = t-assign t-binary t-cast t-constr t-headers \
+  t-istream t-locale t-misc t-ops t-ostream t-prec t-rand t-ternary t-unary
+TESTS = $(check_PROGRAMS)
+endif
+
+t_assign_SOURCES  = t-assign.cc
+t_binary_SOURCES  = t-binary.cc
+t_cast_SOURCES    = t-cast.cc
+t_constr_SOURCES  = t-constr.cc
+t_headers_SOURCES = t-headers.cc
+t_istream_SOURCES = t-istream.cc
+t_locale_SOURCES  = t-locale.cc clocale.c
+t_misc_SOURCES    = t-misc.cc
+t_ops_SOURCES     = t-ops.cc
+t_ostream_SOURCES = t-ostream.cc
+t_prec_SOURCES    = t-prec.cc
+t_rand_SOURCES    = t-rand.cc
+t_ternary_SOURCES = t-ternary.cc
+t_unary_SOURCES   = t-unary.cc
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+
+# Libtool (1.5) somehow botches its uninstalled shared library setups on
+# OpenBSD 3.2, making the C++ test programs here fail.  libgmpxx.so ends up
+# with a NEEDED record asking for ./.libs/libgmp.so.N, but the loader can't
+# find that unless it exists in the current directory.
+#
+# FIXME: Clearly libtool ought to handle this itself, in which case the hack
+# here can be removed.
+#
+# Note this fix applies only when running "make check".  The cp here should
+# be done manually if just one program is to be built and run.
+#
+TESTS_ENVIRONMENT = cp $(top_builddir)/.libs/libgmp.so.* .libs 2>/dev/null || true;
diff --git a/tests/cxx/Makefile.in b/tests/cxx/Makefile.in

new file mode 100644 (file)

index 0000000..86592f9
--- /dev/null
+++ b/tests/cxx/Makefile.in
@@ -0,0 +1,798 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+@WANT_CXX_TRUE@check_PROGRAMS = t-assign$(EXEEXT) t-binary$(EXEEXT) \
+@WANT_CXX_TRUE@        t-cast$(EXEEXT) t-constr$(EXEEXT) \
+@WANT_CXX_TRUE@        t-headers$(EXEEXT) t-istream$(EXEEXT) \
+@WANT_CXX_TRUE@        t-locale$(EXEEXT) t-misc$(EXEEXT) \
+@WANT_CXX_TRUE@        t-ops$(EXEEXT) t-ostream$(EXEEXT) \
+@WANT_CXX_TRUE@        t-prec$(EXEEXT) t-rand$(EXEEXT) \
+@WANT_CXX_TRUE@        t-ternary$(EXEEXT) t-unary$(EXEEXT)
+subdir = tests/cxx
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am_t_assign_OBJECTS = t-assign.$(OBJEXT)
+t_assign_OBJECTS = $(am_t_assign_OBJECTS)
+t_assign_LDADD = $(LDADD)
+t_assign_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_binary_OBJECTS = t-binary.$(OBJEXT)
+t_binary_OBJECTS = $(am_t_binary_OBJECTS)
+t_binary_LDADD = $(LDADD)
+t_binary_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_cast_OBJECTS = t-cast.$(OBJEXT)
+t_cast_OBJECTS = $(am_t_cast_OBJECTS)
+t_cast_LDADD = $(LDADD)
+t_cast_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_constr_OBJECTS = t-constr.$(OBJEXT)
+t_constr_OBJECTS = $(am_t_constr_OBJECTS)
+t_constr_LDADD = $(LDADD)
+t_constr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_headers_OBJECTS = t-headers.$(OBJEXT)
+t_headers_OBJECTS = $(am_t_headers_OBJECTS)
+t_headers_LDADD = $(LDADD)
+t_headers_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_istream_OBJECTS = t-istream.$(OBJEXT)
+t_istream_OBJECTS = $(am_t_istream_OBJECTS)
+t_istream_LDADD = $(LDADD)
+t_istream_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_locale_OBJECTS = t-locale.$(OBJEXT) clocale$U.$(OBJEXT)
+t_locale_OBJECTS = $(am_t_locale_OBJECTS)
+t_locale_LDADD = $(LDADD)
+t_locale_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_misc_OBJECTS = t-misc.$(OBJEXT)
+t_misc_OBJECTS = $(am_t_misc_OBJECTS)
+t_misc_LDADD = $(LDADD)
+t_misc_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ops_OBJECTS = t-ops.$(OBJEXT)
+t_ops_OBJECTS = $(am_t_ops_OBJECTS)
+t_ops_LDADD = $(LDADD)
+t_ops_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ostream_OBJECTS = t-ostream.$(OBJEXT)
+t_ostream_OBJECTS = $(am_t_ostream_OBJECTS)
+t_ostream_LDADD = $(LDADD)
+t_ostream_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_prec_OBJECTS = t-prec.$(OBJEXT)
+t_prec_OBJECTS = $(am_t_prec_OBJECTS)
+t_prec_LDADD = $(LDADD)
+t_prec_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_rand_OBJECTS = t-rand.$(OBJEXT)
+t_rand_OBJECTS = $(am_t_rand_OBJECTS)
+t_rand_LDADD = $(LDADD)
+t_rand_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ternary_OBJECTS = t-ternary.$(OBJEXT)
+t_ternary_OBJECTS = $(am_t_ternary_OBJECTS)
+t_ternary_LDADD = $(LDADD)
+t_ternary_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_unary_OBJECTS = t-unary.$(OBJEXT)
+t_unary_OBJECTS = $(am_t_unary_OBJECTS)
+t_unary_LDADD = $(LDADD)
+t_unary_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(t_assign_SOURCES) $(t_binary_SOURCES) $(t_cast_SOURCES) \
+       $(t_constr_SOURCES) $(t_headers_SOURCES) $(t_istream_SOURCES) \
+       $(t_locale_SOURCES) $(t_misc_SOURCES) $(t_ops_SOURCES) \
+       $(t_ostream_SOURCES) $(t_prec_SOURCES) $(t_rand_SOURCES) \
+       $(t_ternary_SOURCES) $(t_unary_SOURCES)
+DIST_SOURCES = $(t_assign_SOURCES) $(t_binary_SOURCES) \
+       $(t_cast_SOURCES) $(t_constr_SOURCES) $(t_headers_SOURCES) \
+       $(t_istream_SOURCES) $(t_locale_SOURCES) $(t_misc_SOURCES) \
+       $(t_ops_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
+       $(t_rand_SOURCES) $(t_ternary_SOURCES) $(t_unary_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# LDADD has an explicit -L of $(top_builddir)/.libs for the benefit of gcc
+# 3.2 on itanium2-hp-hpux11.22.  Without this option, the libgmp.sl.6
+# required by libgmpxx.sl (ie. in its NEEDED records) is not found by the
+# linker.  FIXME: Presumably libtool should do something about this itself.
+#
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = -L$(top_builddir)/.libs \
+  $(top_builddir)/tests/libtests.la \
+  $(top_builddir)/libgmpxx.la \
+  $(top_builddir)/libgmp.la
+
+@WANT_CXX_TRUE@TESTS = $(check_PROGRAMS)
+t_assign_SOURCES = t-assign.cc
+t_binary_SOURCES = t-binary.cc
+t_cast_SOURCES = t-cast.cc
+t_constr_SOURCES = t-constr.cc
+t_headers_SOURCES = t-headers.cc
+t_istream_SOURCES = t-istream.cc
+t_locale_SOURCES = t-locale.cc clocale.c
+t_misc_SOURCES = t-misc.cc
+t_ops_SOURCES = t-ops.cc
+t_ostream_SOURCES = t-ostream.cc
+t_prec_SOURCES = t-prec.cc
+t_rand_SOURCES = t-rand.cc
+t_ternary_SOURCES = t-ternary.cc
+t_unary_SOURCES = t-unary.cc
+
+# Libtool (1.5) somehow botches its uninstalled shared library setups on
+# OpenBSD 3.2, making the C++ test programs here fail.  libgmpxx.so ends up
+# with a NEEDED record asking for ./.libs/libgmp.so.N, but the loader can't
+# find that unless it exists in the current directory.
+#
+# FIXME: Clearly libtool ought to handle this itself, in which case the hack
+# here can be removed.
+#
+# Note this fix applies only when running "make check".  The cp here should
+# be done manually if just one program is to be built and run.
+#
+TESTS_ENVIRONMENT = cp $(top_builddir)/.libs/libgmp.so.* .libs 2>/dev/null || true;
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cc .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/cxx/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/cxx/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+t-assign$(EXEEXT): $(t_assign_OBJECTS) $(t_assign_DEPENDENCIES) 
+       @rm -f t-assign$(EXEEXT)
+       $(CXXLINK) $(t_assign_OBJECTS) $(t_assign_LDADD) $(LIBS)
+t-binary$(EXEEXT): $(t_binary_OBJECTS) $(t_binary_DEPENDENCIES) 
+       @rm -f t-binary$(EXEEXT)
+       $(CXXLINK) $(t_binary_OBJECTS) $(t_binary_LDADD) $(LIBS)
+t-cast$(EXEEXT): $(t_cast_OBJECTS) $(t_cast_DEPENDENCIES) 
+       @rm -f t-cast$(EXEEXT)
+       $(CXXLINK) $(t_cast_OBJECTS) $(t_cast_LDADD) $(LIBS)
+t-constr$(EXEEXT): $(t_constr_OBJECTS) $(t_constr_DEPENDENCIES) 
+       @rm -f t-constr$(EXEEXT)
+       $(CXXLINK) $(t_constr_OBJECTS) $(t_constr_LDADD) $(LIBS)
+t-headers$(EXEEXT): $(t_headers_OBJECTS) $(t_headers_DEPENDENCIES) 
+       @rm -f t-headers$(EXEEXT)
+       $(CXXLINK) $(t_headers_OBJECTS) $(t_headers_LDADD) $(LIBS)
+t-istream$(EXEEXT): $(t_istream_OBJECTS) $(t_istream_DEPENDENCIES) 
+       @rm -f t-istream$(EXEEXT)
+       $(CXXLINK) $(t_istream_OBJECTS) $(t_istream_LDADD) $(LIBS)
+t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) 
+       @rm -f t-locale$(EXEEXT)
+       $(CXXLINK) $(t_locale_OBJECTS) $(t_locale_LDADD) $(LIBS)
+t-misc$(EXEEXT): $(t_misc_OBJECTS) $(t_misc_DEPENDENCIES) 
+       @rm -f t-misc$(EXEEXT)
+       $(CXXLINK) $(t_misc_OBJECTS) $(t_misc_LDADD) $(LIBS)
+t-ops$(EXEEXT): $(t_ops_OBJECTS) $(t_ops_DEPENDENCIES) 
+       @rm -f t-ops$(EXEEXT)
+       $(CXXLINK) $(t_ops_OBJECTS) $(t_ops_LDADD) $(LIBS)
+t-ostream$(EXEEXT): $(t_ostream_OBJECTS) $(t_ostream_DEPENDENCIES) 
+       @rm -f t-ostream$(EXEEXT)
+       $(CXXLINK) $(t_ostream_OBJECTS) $(t_ostream_LDADD) $(LIBS)
+t-prec$(EXEEXT): $(t_prec_OBJECTS) $(t_prec_DEPENDENCIES) 
+       @rm -f t-prec$(EXEEXT)
+       $(CXXLINK) $(t_prec_OBJECTS) $(t_prec_LDADD) $(LIBS)
+t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) 
+       @rm -f t-rand$(EXEEXT)
+       $(CXXLINK) $(t_rand_OBJECTS) $(t_rand_LDADD) $(LIBS)
+t-ternary$(EXEEXT): $(t_ternary_OBJECTS) $(t_ternary_DEPENDENCIES) 
+       @rm -f t-ternary$(EXEEXT)
+       $(CXXLINK) $(t_ternary_OBJECTS) $(t_ternary_LDADD) $(LIBS)
+t-unary$(EXEEXT): $(t_unary_OBJECTS) $(t_unary_DEPENDENCIES) 
+       @rm -f t-unary$(EXEEXT)
+       $(CXXLINK) $(t_unary_OBJECTS) $(t_unary_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+clocale_.c: clocale.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clocale.c; then echo $(srcdir)/clocale.c; else echo clocale.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+clocale_.$(OBJEXT) clocale_.lo : $(ANSI2KNR)
+
+.cc.o:
+       $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+       $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+       $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/cxx/clocale.c b/tests/cxx/clocale.c

new file mode 100644 (file)

index 0000000..9c942b1
--- /dev/null
+++ b/tests/cxx/clocale.c
@@ -0,0 +1,60 @@
+/* Manipulable localeconv and nl_langinfo.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#if HAVE_NL_TYPES_H
+#include <nl_types.h>  /* for nl_item */
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for lconv */
+#endif
+
+
+/* Replace the libc localeconv and nl_langinfo with ones we can manipulate.
+
+   This is done in a C file since if it was in a C++ file then we'd have to
+   match the "throw" or lack thereof declared for localeconv in <locale.h>.
+   g++ 3.2 gives an error about mismatched throws under "-pedantic", other
+   C++ compilers may very possibly do so too.  */
+
+extern char point_string[];
+
+#if HAVE_LOCALECONV
+struct lconv *
+localeconv (void)
+{
+  static struct lconv  l;
+  l.decimal_point = point_string;
+  return &l;
+}
+#endif
+
+#if HAVE_NL_LANGINFO
+char *
+nl_langinfo (nl_item n)
+{
+  return point_string;
+}
+#endif
diff --git a/tests/cxx/t-assign.cc b/tests/cxx/t-assign.cc

new file mode 100644 (file)

index 0000000..407f726
--- /dev/null
+++ b/tests/cxx/t-assign.cc
@@ -0,0 +1,516 @@
+/* Test mp*_class assignment operators.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+#include <string>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // operator=(const mpz_class &)
+  {
+    mpz_class a(123), b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // template <class T, class U> operator=(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // operator=(signed char)
+  {
+    signed char a = -127;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -127);
+  }
+
+  // operator=(unsigned char)
+  {
+    unsigned char a = 255;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpz_class a;
+    a = 'A'; ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpz_class a;
+    a = 'z'; ASSERT_ALWAYS(a == 122);
+  }
+
+  // operator=(signed int)
+  {
+    signed int a = 0;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 32767;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 32767);
+  }
+
+  // operator=(unsigned int)
+  {
+    unsigned int a = 65535u;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // operator=(signed short int)
+  {
+    signed short int a = -12345;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -12345);
+  }
+
+  // operator=(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // operator=(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // operator=(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // operator=(float)
+  {
+    float a = 123.0;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // operator=(double)
+  {
+    double a = 0.0;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    double a = -12.375;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == -12);
+  }
+  {
+    double a = 6.789e+3;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 6789);
+  }
+  {
+    double a = 9.375e-1;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+
+  // operator=(long double)
+  // currently not implemented
+
+  // operator=(const char *)
+  {
+    const char *a = "1234567890";
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const std::string &)
+  {
+    string a("1234567890");
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpz_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // operator=(const std::string &) with invalid
+  {
+    try {
+      string a("def");
+      mpz_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+}
+
+void
+check_mpq (void)
+{
+  // operator=(const mpq_class &)
+  {
+    mpq_class a(1, 2), b;
+    b = a; ASSERT_ALWAYS(b == 0.5);
+  }
+
+  // template <class T, class U> operator=(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // operator=(signed char)
+  {
+    signed char a = -127;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -127);
+  }
+
+  // operator=(unsigned char)
+  {
+    unsigned char a = 255;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpq_class a;
+    a = 'A'; ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpq_class a;
+    a = 'z'; ASSERT_ALWAYS(a == 122);
+  }
+
+  // operator=(signed int)
+  {
+    signed int a = 0;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 32767;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 32767);
+  }
+
+  // operator=(unsigned int)
+  {
+    unsigned int a = 65535u;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // operator=(signed short int)
+  {
+    signed short int a = -12345;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -12345);
+  }
+
+  // operator=(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpz_class b;
+    b = a; ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // operator=(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // operator=(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // operator=(float)
+  {
+    float a = 123.0;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // operator=(double)
+  {
+    double a = 0.0;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    double a = -12.375;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == -12.375);
+  }
+  {
+    double a = 6.789e+3;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 6789);
+  }
+  {
+    double a = 9.375e-1;
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 0.9375);
+  }
+
+  // operator=(long double)
+  // currently not implemented
+
+  // operator=(const char *)
+  {
+    const char *a = "1234567890";
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const std::string &)
+  {
+    string a("1234567890");
+    mpq_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpq_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // operator=(const std::string &) with invalid
+  {
+    try {
+      string a("def");
+      mpq_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+}
+
+void
+check_mpf (void)
+{
+  // operator=(const mpf_class &)
+  {
+    mpf_class a(123), b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // template <class T, class U> operator=(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // operator=(signed char)
+  {
+    signed char a = -127;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -127);
+  }
+
+  // operator=(unsigned char)
+  {
+    unsigned char a = 255;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpf_class a;
+    a = 'A'; ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpf_class a;
+    a = 'z'; ASSERT_ALWAYS(a == 122);
+  }
+
+  // operator=(signed int)
+  {
+    signed int a = 0;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 32767;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 32767);
+  }
+
+  // operator=(unsigned int)
+  {
+    unsigned int a = 65535u;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // operator=(signed short int)
+  {
+    signed short int a = -12345;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -12345);
+  }
+
+  // operator=(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // operator=(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // operator=(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // operator=(float)
+  {
+    float a = 123.0;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 123);
+  }
+
+  // operator=(double)
+  {
+    double a = 0.0;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 0);
+  }
+  {
+    double a = -12.375;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == -12.375);
+  }
+  {
+    double a = 6.789e+3;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 6789);
+  }
+  {
+    double a = 9.375e-1;
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 0.9375);
+  }
+
+  // operator=(long double)
+  // currently not implemented
+
+  // operator=(const char *)
+  {
+    const char *a = "1234567890";
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const std::string &)
+  {
+    string a("1234567890");
+    mpf_class b;
+    b = a; ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // operator=(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpf_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // operator=(const std::string &) with invalid
+  {
+    try {
+      string a("def");
+      mpf_class b;
+      b = a;
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-binary.cc b/tests/cxx/t-binary.cc

new file mode 100644 (file)

index 0000000..8099662
--- /dev/null
+++ b/tests/cxx/t-binary.cc
@@ -0,0 +1,447 @@
+/* Test mp*_class binary expressions.
+
+Copyright 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(1), b(2);
+    mpz_class c(a + b); ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpz_class a(3), b(4);
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == 12);
+  }
+  {
+    mpz_class a(5), b(3);
+    mpz_class c;
+    c = a % b; ASSERT_ALWAYS(c == 2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+  {
+    mpz_class a(1);
+    signed int b = 3;
+    mpz_class c(a - b); ASSERT_ALWAYS(c == -2);
+  }
+  {
+    mpz_class a(-8);
+    unsigned int b = 2;
+    mpz_class c;
+    c = a / b; ASSERT_ALWAYS(c == -4);
+  }
+  {
+    mpz_class a(2);
+    double b = 3.0;
+    mpz_class c(a + b); ASSERT_ALWAYS(c == 5);
+  }
+  {
+    mpz_class a(4);
+    mpz_class b;
+    b = a + 0; ASSERT_ALWAYS(b == 4);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(3);
+    signed int b = 9;
+    mpz_class c(b / a); ASSERT_ALWAYS(c == 3);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+  {
+    mpz_class a(3), b(4);
+    mpz_class c(a * (-b)); ASSERT_ALWAYS(c == -12);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(3), b(2), c(1);
+    mpz_class d;
+    d = (a % b) + c; ASSERT_ALWAYS(d == 2);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+  {
+    mpz_class a(-5);
+    unsigned int b = 2;
+    mpz_class c((-a) << b); ASSERT_ALWAYS(c == 20);
+  }
+  {
+    mpz_class a(5), b(-4);
+    signed int c = 3;
+    mpz_class d;
+    d = (a * b) >> c; ASSERT_ALWAYS(d == -3);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+  {
+    mpz_class a(2), b(4);
+    double c = 6;
+    mpz_class d(c / (a - b)); ASSERT_ALWAYS(d == -3);
+  }
+  {
+    mpz_class a(3), b(2);
+    double c = 1;
+    mpz_class d;
+    d = c + (a + b); ASSERT_ALWAYS(d == 6);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  // type of result can't be mpz
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+  {
+    mpz_class a(3), b(5), c(7);
+    mpz_class d;
+    d = (a - b) * (-c); ASSERT_ALWAYS(d == 14);
+  }
+
+  {
+    mpz_class a(0xcafe), b(0xbeef), c, want;
+    c = a & b; ASSERT_ALWAYS (c == 0x8aee);
+    c = a | b; ASSERT_ALWAYS (c == 0xfeff);
+    c = a ^ b; ASSERT_ALWAYS (c == 0x7411);
+    c = a & 0xbeef; ASSERT_ALWAYS (c == 0x8aee);
+    c = a | 0xbeef; ASSERT_ALWAYS (c == 0xfeff);
+    c = a ^ 0xbeef; ASSERT_ALWAYS (c == 0x7411);
+    c = a & -0xbeef; ASSERT_ALWAYS (c == 0x4010);
+    c = a | -0xbeef; ASSERT_ALWAYS (c == -0x3401);
+    c = a ^ -0xbeef; ASSERT_ALWAYS (c == -0x7411);
+    c = a & 48879.0; ASSERT_ALWAYS (c == 0x8aee);
+    c = a | 48879.0; ASSERT_ALWAYS (c == 0xfeff);
+    c = a ^ 48879.0; ASSERT_ALWAYS (c == 0x7411);
+
+    c = a | 1267650600228229401496703205376.0; // 2^100
+    want = "0x1000000000000000000000cafe";
+    ASSERT_ALWAYS (c == want);
+  }
+
+}
+
+void
+check_mpq (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(1, 2), b(3, 4);
+    mpq_class c(a + b); ASSERT_ALWAYS(c == 1.25);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+  {
+    mpq_class a(1, 2);
+    signed int b = 3;
+    mpq_class c(a - b); ASSERT_ALWAYS(c == -2.5);
+  }
+  {
+    mpq_class a(1, 2);
+    mpq_class b;
+    b = a + 0; ASSERT_ALWAYS(b == 0.5);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(2, 3);
+    signed int b = 4;
+    mpq_class c;
+    c = b / a; ASSERT_ALWAYS(c == 6);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >
+  {
+    mpq_class a(1, 2);
+    mpz_class b(1);
+    mpq_class c(a + b); ASSERT_ALWAYS(c == 1.5);
+  }
+  {
+    mpq_class a(2, 3);
+    mpz_class b(1);
+    double c = 2.0;
+    mpq_class d;
+    d = a * (b + c); ASSERT_ALWAYS(d == 2);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(2, 3);
+    mpz_class b(4);
+    mpq_class c(b / a); ASSERT_ALWAYS(c == 6);
+  }
+  {
+    mpq_class a(2, 3);
+    mpz_class b(1), c(4);
+    mpq_class d;
+    d = (b - c) * a; ASSERT_ALWAYS(d == -2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+  {
+    mpq_class a(1, 3), b(3, 4);
+    mpq_class c;
+    c = a * (-b); ASSERT_ALWAYS(c == -0.25);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(1, 3), b(2, 3), c(1, 4);
+    mpq_class d((a / b) + c); ASSERT_ALWAYS(d == 0.75);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+  {
+    mpq_class a(3, 8);
+    unsigned int b = 4;
+    mpq_class c((-a) << b); ASSERT_ALWAYS(c == -6);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+  {
+    mpq_class a(1, 2), b(1, 4);
+    double c = 6.0;
+    mpq_class d;
+    d = c / (a + b); ASSERT_ALWAYS(d == 8);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  {
+    mpq_class a(1, 2), b(1, 4);
+    mpz_class c(1);
+    mpq_class d((a + b) - c); ASSERT_ALWAYS(d == -0.25);
+  }
+  {
+    mpq_class a(1, 3), b(3, 2);
+    mpz_class c(2), d(4);
+    mpq_class e;
+    e = (a * b) / (c - d); ASSERT_ALWAYS(e == -0.25);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  {
+    mpq_class a(1, 3), b(3, 4);
+    mpz_class c(-3);
+    mpq_class d(c * (a * b)); ASSERT_ALWAYS(d == -0.75);
+  }
+  {
+    mpq_class a(1, 3), b(3, 5);
+    mpz_class c(6);
+    signed int d = 4;
+    mpq_class e;
+    e = (c % d) / (a * b); ASSERT_ALWAYS(e == 10);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+  {
+    mpq_class a(1, 3), b(3, 4), c(2, 5);
+    mpq_class d;
+    d = (a * b) / (-c); ASSERT_ALWAYS(d == -0.625);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(1), b(2);
+    mpf_class c(a + b); ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpf_class a(1.5), b(6);
+    mpf_class c;
+    c = a / b; ASSERT_ALWAYS(c == 0.25);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, U, Op> >
+  {
+    mpf_class a(1);
+    signed int b = -2;
+    mpf_class c(a - b); ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = a + 0; ASSERT_ALWAYS(b == 2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(2);
+    unsigned int b = 3;
+    mpf_class c;
+    c = b / a; ASSERT_ALWAYS(c == 1.5);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<U, V>, Op> >
+  {
+    mpf_class a(2);
+    mpz_class b(3);
+    mpf_class c(a - b); ASSERT_ALWAYS(c == -1);
+  }
+  {
+    mpf_class a(3);
+    mpz_class b(2), c(1);
+    mpf_class d;
+    d = a * (b + c); ASSERT_ALWAYS(d == 9);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(6);
+    mpq_class b(3, 4);
+    mpf_class c(a * b); ASSERT_ALWAYS(c == 4.5);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, T>, __gmp_expr<T, U>, Op> >
+  {
+    mpf_class a(2), b(-3);
+    mpf_class c;
+    c = a * (-b); ASSERT_ALWAYS(c == 6);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(3), b(4), c(5);
+    mpf_class d;
+    d = (a / b) - c; ASSERT_ALWAYS(d == -4.25);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, V, Op> >
+  {
+    mpf_class a(3);
+    unsigned int b = 2;
+    mpf_class c((-a) >> b); ASSERT_ALWAYS(c == -0.75);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<U, __gmp_expr<T, V>, Op> >
+  {
+    mpf_class a(2), b(3);
+    double c = 5.0;
+    mpf_class d;
+    d = c / (a + b); ASSERT_ALWAYS(d == 1);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<V, W>, Op> >
+  {
+    mpf_class a(2), b(3);
+    mpz_class c(4);
+    mpf_class d;
+    d = (a + b) * c; ASSERT_ALWAYS(d == 20);
+  }
+  {
+    mpf_class a(2), b(3);
+    mpq_class c(1, 2), d(1, 4);
+    mpf_class e;
+    e = (a * b) / (c + d); ASSERT_ALWAYS(e == 8);
+  }
+
+  // template <class T, class U, class V, class W, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<U, V>, __gmp_expr<T, W>, Op> >
+  {
+    mpf_class a(1), b(2);
+    mpq_class c(3);
+    mpf_class d(c / (a + b)); ASSERT_ALWAYS(d == 1);
+  }
+  {
+    mpf_class a(1);
+    mpz_class b(2);
+    mpq_class c(3, 4);
+    mpf_class d;
+    d = (-c) + (a + b); ASSERT_ALWAYS(d == 2.25);
+  }
+
+  // template <class T, class U, class V, class Op>
+  // __gmp_expr<T, __gmp_binary_expr<__gmp_expr<T, U>, __gmp_expr<T, V>, Op> >
+  {
+    mpf_class a(1), b(2), c(3);
+    mpf_class d;
+    d = (a + b) * (-c); ASSERT_ALWAYS(d == -9);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-cast.cc b/tests/cxx/t-cast.cc

new file mode 100644 (file)

index 0000000..58fda58
--- /dev/null
+++ b/tests/cxx/t-cast.cc
@@ -0,0 +1,57 @@
+/* Test g++ -Wold-style-cast cleanliness.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmpxx.h"
+
+
+/* This code doesn't do anything when run, it just expands various C macros
+   to see that they don't trigger compile-time warnings from g++
+   -Wold-style-cast.  This option isn't used in a normal build, it has to be
+   added manually to make this test worthwhile.  */
+
+void
+check_macros (void)
+{
+  mpz_t          z;
+  long           l = 123;
+  unsigned long  u = 456;
+  int            i;
+  mp_limb_t      limb;
+
+  mpz_init_set_ui (z, 0L);
+  i = mpz_odd_p (z);
+  i = mpz_even_p (z);
+  i = mpz_cmp_si (z, l);
+  i = mpz_cmp_ui (z, u);
+  mpz_clear (z);
+
+  limb = GMP_NUMB_MASK;
+  limb = GMP_NUMB_MAX;
+  limb = GMP_NAIL_MASK;
+
+  mpn_divmod (&limb, &limb, 1, &limb, 1);
+  mpn_divexact_by3 (&limb, &limb, 1);
+}
+
+int
+main (void)
+{
+  return 0;
+}
diff --git a/tests/cxx/t-constr.cc b/tests/cxx/t-constr.cc

new file mode 100644 (file)

index 0000000..6d588d3
--- /dev/null
+++ b/tests/cxx/t-constr.cc
@@ -0,0 +1,755 @@
+/* Test mp*_class constructors.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+#include <string>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // mpz_class()
+  {
+    mpz_class a; ASSERT_ALWAYS(a == 0);
+  }
+
+  // mpz_class(const mpz_class &)
+  // see below
+
+  // template <class T, class U> mpz_class(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // mpz_class(signed char)
+  {
+    signed char a = -127;
+    mpz_class b(a); ASSERT_ALWAYS(b == -127);
+  }
+
+  // mpz_class(unsigned char)
+  {
+    unsigned char a = 255;
+    mpz_class b(a); ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpz_class a('A'); ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpz_class a('z'); ASSERT_ALWAYS(a == 122);
+  }
+
+  // mpz_class(signed int)
+  {
+    signed int a = 0;
+    mpz_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpz_class b(a); ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 4567;
+    mpz_class b(a); ASSERT_ALWAYS(b == 4567);
+  }
+
+  // mpz_class(unsigned int)
+  {
+    unsigned int a = 890;
+    mpz_class b(a); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpz_class(signed short int)
+  {
+    signed short int a = -12345;
+    mpz_class b(a); ASSERT_ALWAYS(b == -12345);
+  }
+
+  // mpz_class(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpz_class b(a); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpz_class(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpz_class b(a); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpz_class(unsigned long int)
+  {
+    unsigned long int a = 1UL << 30;
+    mpz_class b(a); ASSERT_ALWAYS(b == 1073741824L);
+  }
+
+  // mpz_class(float)
+  {
+    float a = 123.45;
+    mpz_class b(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // mpz_class(double)
+  {
+    double a = 3.141592653589793238;
+    mpz_class b(a); ASSERT_ALWAYS(b == 3);
+  }
+
+  // mpz_class(long double)
+  // currently not implemented
+
+  // mpz_class(const char *)
+  {
+    const char *a = "1234567890";
+    mpz_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpz_class(const char *, int)
+  {
+    const char *a = "FFFF";
+    int base = 16;
+    mpz_class b(a, base); ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // mpz_class(const std::string &)
+  {
+    string a("1234567890");
+    mpz_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpz_class(const std::string &, int)
+  {
+    string a("7777");
+    int base = 8;
+    mpz_class b(a, base); ASSERT_ALWAYS(b == 4095);
+  }
+
+  // mpz_class(const char *) with invalid
+  {
+    try {
+      const char *a = "ABC";
+      mpz_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpz_class(const char *, int) with invalid
+  {
+    try {
+      const char *a = "GHI";
+      int base = 16;
+      mpz_class b(a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpz_class(const std::string &) with invalid
+  {
+    try {
+      string a("abc");
+      mpz_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpz_class(const std::string &, int) with invalid
+  {
+    try {
+      string a("ZZZ");
+      int base = 8;
+      mpz_class b(a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpz_class(mpz_srcptr)
+  {
+    mpz_t a;
+    mpz_init_set_ui(a, 100);
+    mpz_class b(a); ASSERT_ALWAYS(b == 100);
+    mpz_clear(a);
+  }
+
+  // mpz_class(const mpz_class &)
+  {
+    mpz_class a(12345); // tested above, assume it works
+    mpz_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // no constructor for bool, but it gets casted to int
+  {
+    bool a = true;
+    mpz_class b(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    mpz_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // mpq_class()
+  {
+    mpq_class a; ASSERT_ALWAYS(a == 0);
+  }
+
+  // mpq_class(const mpq_class &)
+  // see below
+
+  // template <class T, class U> mpq_class(const __gmp_expr<T, U> &)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // mpq_class(signed char)
+  {
+    signed char a = -127;
+    mpq_class b(a); ASSERT_ALWAYS(b == -127);
+  }
+
+  // mpq_class(unsigned char)
+  {
+    unsigned char a = 255;
+    mpq_class b(a); ASSERT_ALWAYS(b == 255);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpq_class a('A'); ASSERT_ALWAYS(a == 65);
+  }
+  {
+    mpq_class a('z'); ASSERT_ALWAYS(a == 122);
+  }
+
+  // mpq_class(signed int)
+  {
+    signed int a = 0;
+    mpq_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpq_class b(a); ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 4567;
+    mpq_class b(a); ASSERT_ALWAYS(b == 4567);
+  }
+
+  // mpq_class(unsigned int)
+  {
+    unsigned int a = 890;
+    mpq_class b(a); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpq_class(signed short int)
+  {
+    signed short int a = -12345;
+    mpq_class b(a); ASSERT_ALWAYS(b == -12345);
+  }
+
+  // mpq_class(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpq_class b(a); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpq_class(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpq_class b(a); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpq_class(unsigned long int)
+  {
+    unsigned long int a = 1UL << 30;
+    mpq_class b(a); ASSERT_ALWAYS(b == 1073741824L);
+  }
+
+  // mpq_class(float)
+  {
+    float a = 0.625;
+    mpq_class b(a); ASSERT_ALWAYS(b == 0.625);
+  }
+
+  // mpq_class(double)
+  {
+    double a = 1.25;
+    mpq_class b(a); ASSERT_ALWAYS(b == 1.25);
+  }
+
+  // mpq_class(long double)
+  // currently not implemented
+
+  // mpq_class(const char *)
+  {
+    const char *a = "1234567890";
+    mpq_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpq_class(const char *, int)
+  {
+    const char *a = "FFFF";
+    int base = 16;
+    mpq_class b(a, base); ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // mpq_class(const std::string &)
+  {
+    string a("1234567890");
+    mpq_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpq_class(const std::string &, int)
+  {
+    string a("7777");
+    int base = 8;
+    mpq_class b(a, base); ASSERT_ALWAYS(b == 4095);
+  }
+
+  // mpq_class(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpq_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpq_class(const char *, int) with invalid
+  {
+    try {
+      const char *a = "ZZZ";
+      int base = 16;
+      mpq_class b (a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpq_class(const std::string &) with invalid
+  {
+    try {
+      string a("abc");
+      mpq_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpq_class(const std::string &, int) with invalid
+  {
+    try {
+      string a("ZZZ");
+      int base = 8;
+      mpq_class b (a, base);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpq_class(mpq_srcptr)
+  {
+    mpq_t a;
+    mpq_init(a);
+    mpq_set_ui(a, 100, 1);
+    mpq_class b(a); ASSERT_ALWAYS(b == 100);
+    mpq_clear(a);
+  }
+
+  // mpq_class(const mpz_class &, const mpz_class &)
+  {
+    mpz_class a(123), b(4); // tested above, assume it works
+    mpq_class c(a, b); ASSERT_ALWAYS(c == 30.75);
+  }
+  {
+    mpz_class a(-1), b(2);  // tested above, assume it works
+    mpq_class c(a, b); ASSERT_ALWAYS(c == -0.5);
+  }
+  {
+    mpz_class a(5), b(4); // tested above, assume it works
+    mpq_class c(a, b); ASSERT_ALWAYS(c == 1.25);
+  }
+
+  // mpq_class(const mpz_class &)
+  {
+    mpq_class a(12345); // tested above, assume it works
+    mpq_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // no constructor for bool, but it gets casted to int
+  {
+    bool a = true;
+    mpq_class b(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    mpq_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // mpf_class()
+  {
+    mpf_class a; ASSERT_ALWAYS(a == 0);
+  }
+
+  // mpf_class(const mpf_class &)
+  // mpf_class(const mpf_class &, unsigned long int)
+  // see below
+
+  // template <class T, class U> mpf_class(const __gmp_expr<T, U> &)
+  // template <class T, class U> mpf_class(const __gmp_expr<T, U> &,
+  //                                       unsigned long int)
+  // not tested here, see t-unary.cc, t-binary.cc
+
+  // mpf_class(signed char)
+  {
+    signed char a = -127;
+    mpf_class b(a); ASSERT_ALWAYS(b == -127);
+  }
+
+  // mpf_class(signed char, unsigned long int)
+  {
+    signed char a = -1;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == -1);
+  }
+
+  // mpf_class(unsigned char)
+  {
+    unsigned char a = 255;
+    mpf_class b(a); ASSERT_ALWAYS(b == 255);
+  }
+
+  // mpf_class(unsigned char, unsigned long int)
+  {
+    unsigned char a = 128;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 128);
+  }
+
+  // either signed or unsigned char, machine dependent
+  {
+    mpf_class a('A'); ASSERT_ALWAYS(a == 65);
+  }
+  {
+    int prec = 256;
+    mpf_class a('z', prec); ASSERT_ALWAYS(a == 122);
+  }
+
+  // mpf_class(signed int)
+  {
+    signed int a = 0;
+    mpf_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    signed int a = -123;
+    mpf_class b(a); ASSERT_ALWAYS(b == -123);
+  }
+  {
+    signed int a = 4567;
+    mpf_class b(a); ASSERT_ALWAYS(b == 4567);
+  }
+
+  // mpf_class(signed int, unsigned long int)
+  {
+    signed int a = -123;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == -123);
+  }
+
+  // mpf_class(unsigned int)
+  {
+    unsigned int a = 890;
+    mpf_class b(a); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpf_class(unsigned int, unsigned long int)
+  {
+    unsigned int a = 890;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 890);
+  }
+
+  // mpf_class(signed short int)
+  {
+    signed short int a = -12345;
+    mpf_class b(a); ASSERT_ALWAYS(b == -12345);
+  }
+
+  // mpf_class(signed short int, unsigned long int)
+  {
+    signed short int a = 6789;
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 6789);
+  }
+
+  // mpf_class(unsigned short int)
+  {
+    unsigned short int a = 54321u;
+    mpf_class b(a); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpf_class(unsigned short int, unsigned long int)
+  {
+    unsigned short int a = 54321u;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 54321u);
+  }
+
+  // mpf_class(signed long int)
+  {
+    signed long int a = -1234567890L;
+    mpf_class b(a); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpf_class(signed long int, unsigned long int)
+  {
+    signed long int a = -1234567890L;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == -1234567890L);
+  }
+
+  // mpf_class(unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    mpf_class b(a); ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // mpf_class(unsigned long int, unsigned long int)
+  {
+    unsigned long int a = 3456789012UL;
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 3456789012UL);
+  }
+
+  // mpf_class(float)
+  {
+    float a = 1234.5;
+    mpf_class b(a); ASSERT_ALWAYS(b == 1234.5);
+  }
+
+  // mpf_class(float, unsigned long int)
+  {
+    float a = 1234.5;
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234.5);
+  }
+
+  // mpf_class(double)
+  {
+    double a = 12345.0;
+    mpf_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+  {
+    double a = 1.2345e+4;
+    mpf_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+  {
+    double a = 312.5e-2;
+    mpf_class b(a); ASSERT_ALWAYS(b == 3.125);
+  }
+
+  // mpf_class(double, unsigned long int)
+  {
+    double a = 5.4321e+4;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 54321L);
+  }
+
+  // mpf_class(long double)
+  // mpf_class(long double, unsigned long int)
+  // currently not implemented
+
+  // mpf_class(const char *)
+  {
+    const char *a = "1234567890";
+    mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpf_class(const char *, unsigned long int, int = 0)
+  {
+    const char *a = "1234567890";
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+  }
+  {
+    const char *a = "777777";
+    int prec = 64, base = 8;
+    mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 262143L);
+  }
+
+  // mpf_class(const std::string &)
+  {
+    string a("1234567890");
+    mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+  }
+
+  // mpf_class(const std::string &, unsigned long int, int = 0)
+  {
+    string a("1234567890");
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+  }
+  {
+    string a("FFFF");
+    int prec = 256, base = 16;
+    mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 65535u);
+  }
+
+  // mpf_class(const char *) with invalid
+  {
+    try {
+      const char *a = "abc";
+      mpf_class b(a);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpf_class(const char *, unsigned long int, int = 0) with invalid
+  {
+    try {
+      const char *a = "def";
+      int prec = 256;
+      mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+  {
+    try {
+      const char *a = "ghi";
+      int prec = 64, base = 8;
+      mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 262143L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpf_class(const std::string &) with invalid
+  {
+    try {
+      string a("abc");
+      mpf_class b(a); ASSERT_ALWAYS(b == 1234567890L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpf_class(const std::string &, unsigned long int, int = 0) with invalid
+  {
+    try {
+      string a("def");
+      int prec = 128;
+      mpf_class b(a, prec); ASSERT_ALWAYS(b == 1234567890L);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+  {
+    try {
+      string a("ghi");
+      int prec = 256, base = 16;
+      mpf_class b(a, prec, base); ASSERT_ALWAYS(b == 65535u);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (invalid_argument) {
+    }
+  }
+
+  // mpf_class(mpf_srcptr)
+  {
+    mpf_t a;
+    mpf_init_set_ui(a, 100);
+    mpf_class b(a); ASSERT_ALWAYS(b == 100);
+    mpf_clear(a);
+  }
+
+  // mpf_class(mpf_srcptr, unsigned long int)
+  {
+    mpf_t a;
+    int prec = 64;
+    mpf_init_set_ui(a, 100);
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 100);
+    mpf_clear(a);
+  }
+
+  // mpf_class(const mpf_class &)
+  {
+    mpf_class a(12345); // tested above, assume it works
+    mpf_class b(a); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // mpf_class(const mpf_class &, unsigned long int)
+  {
+    mpf_class a(12345); // tested above, assume it works
+    int prec = 64;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 12345);
+  }
+
+  // no constructors for bool, but it gets casted to int
+  {
+    bool a = true;
+    mpf_class b(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    mpf_class b(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    bool a = true;
+    int prec = 128;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    bool a = false;
+    int prec = 256;
+    mpf_class b(a, prec); ASSERT_ALWAYS(b == 0);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-headers.cc b/tests/cxx/t-headers.cc

new file mode 100644 (file)

index 0000000..0f7b74f
--- /dev/null
+++ b/tests/cxx/t-headers.cc
@@ -0,0 +1,26 @@
+/* Test that gmpxx.h compiles correctly.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmpxx.h"
+
+int
+main (void)
+{
+  return 0;
+}
diff --git a/tests/cxx/t-istream.cc b/tests/cxx/t-istream.cc

new file mode 100644 (file)

index 0000000..59600be
--- /dev/null
+++ b/tests/cxx/t-istream.cc
@@ -0,0 +1,538 @@
+/* Test istream formatted input.
+
+Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <cstdlib>
+#include <cstring>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+// Under option_check_standard, the various test cases for mpz operator>>
+// are put through the standard operator>> for long, and likewise mpf
+// operator>> is put through double.
+//
+// In g++ 3.3 this results in some printouts about the final position
+// indicated for something like ".e123".  Our mpf code stops at the "e"
+// since there's no mantissa digits, but g++ reads the whole thing and only
+// then decides it's bad.
+
+int   option_check_standard = 0;
+
+
+// On some versions of g++ 2.96 it's been observed that putback() may leave
+// tellg() unchanged.  We believe this is incorrect and presumably the
+// result of a bug, since for instance it's ok in g++ 2.95 and g++ 3.3.  We
+// detect the problem at runtime and disable affected checks.
+
+int putback_tellg_works = 1;
+
+void
+check_putback_tellg (void)
+{
+  istringstream input ("hello");
+  streampos  old_pos, new_pos;
+  char  c;
+
+  input.get(c);
+  old_pos = input.tellg();
+  input.putback(c);
+  new_pos = input.tellg();
+
+  if (old_pos == new_pos)
+    {
+      cout << "Warning, istringstream has a bug: putback() doesn't update tellg().\n";;
+      cout << "Tests on tellg() will be skipped.\n";
+      putback_tellg_works = 0;
+    }
+}
+
+
+#define WRONG(str)                                              \
+  do {                                                          \
+    cout << str ", data[" << i << "]\n";                        \
+    cout << "  input: \"" << data[i].input << "\"\n";           \
+    cout << "  flags: " << hex << input.flags() << dec << "\n"; \
+  } while (0)
+
+void
+check_mpz (void)
+{
+  static const struct {
+    const char     *input;
+    int            want_pos;
+    const char     *want;
+    ios::fmtflags  flags;
+
+  } data[] = {
+
+    { "0",      -1, "0",    (ios::fmtflags) 0 },
+    { "123",    -1, "123",  (ios::fmtflags) 0 },
+    { "0123",   -1, "83",   (ios::fmtflags) 0 },
+    { "0x123",  -1, "291",  (ios::fmtflags) 0 },
+    { "-123",   -1, "-123", (ios::fmtflags) 0 },
+    { "-0123",  -1, "-83",  (ios::fmtflags) 0 },
+    { "-0x123", -1, "-291", (ios::fmtflags) 0 },
+    { "+123",   -1, "123", (ios::fmtflags) 0 },
+    { "+0123",  -1, "83",  (ios::fmtflags) 0 },
+    { "+0x123", -1, "291", (ios::fmtflags) 0 },
+
+    { "0",     -1, "0",    ios::dec },
+    { "1f",     1, "1",    ios::dec },
+    { "011f",   3, "11",   ios::dec },
+    { "123",   -1, "123",  ios::dec },
+    { "-1f",    2, "-1",   ios::dec },
+    { "-011f",  4, "-11",  ios::dec },
+    { "-123",  -1, "-123", ios::dec },
+    { "+1f",    2, "1",    ios::dec },
+    { "+011f",  4, "11",   ios::dec },
+    { "+123",  -1, "123",  ios::dec },
+
+    { "0",    -1, "0",   ios::oct },
+    { "123",  -1, "83",  ios::oct },
+    { "-123", -1, "-83", ios::oct },
+    { "+123", -1, "83",  ios::oct },
+
+    { "0",    -1, "0",    ios::hex },
+    { "123",  -1, "291",  ios::hex },
+    { "ff",   -1, "255",  ios::hex },
+    { "FF",   -1, "255",  ios::hex },
+    { "-123", -1, "-291", ios::hex },
+    { "-ff",  -1, "-255", ios::hex },
+    { "-FF",  -1, "-255", ios::hex },
+    { "+123", -1, "291",  ios::hex },
+    { "+ff",  -1, "255",  ios::hex },
+    { "+FF",  -1, "255",  ios::hex },
+    { "ab",   -1, "171",  ios::hex },
+    { "cd",   -1, "205",  ios::hex },
+    { "ef",   -1, "239",  ios::hex },
+
+    { " 123",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws
+    { " 123", -1, "123", ios::skipws },
+  };
+
+  mpz_t      got, want;
+  int        got_ok, want_ok;
+  long       got_si, want_si;
+  streampos  init_tellg, got_pos, want_pos;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (size_t i = 0; i < numberof (data); i++)
+    {
+      want_pos = (data[i].want_pos == -1
+                  ? strlen (data[i].input) : data[i].want_pos);
+
+      want_ok = (data[i].want != NULL);
+
+      if (data[i].want != NULL)
+        mpz_set_str_or_abort (want, data[i].want, 0);
+      else
+        mpz_set_ui (want, 0L);
+
+      if (option_check_standard && mpz_fits_slong_p (want))
+        {
+          istringstream  input (data[i].input);
+          input.flags (data[i].flags);
+          init_tellg = input.tellg();
+          want_si = mpz_get_si (want);
+
+          input >> got_si;
+          got_ok = (input ? 1 : 0);
+          input.clear();
+          got_pos = input.tellg() - init_tellg;
+
+          if (got_ok != want_ok)
+            {
+              WRONG ("stdc++ operator>> wrong status, check_mpz");
+              cout << "  want_ok: " << want_ok << "\n";
+              cout << "  got_ok:  " << got_ok << "\n";
+            }
+          if (want_ok && got_si != want_si)
+            {
+              WRONG ("stdc++ operator>> wrong result, check_mpz");
+              cout << "  got_si:  " << got_si << "\n";
+              cout << "  want_si: " << want_si << "\n";
+            }
+          if (putback_tellg_works && got_pos != want_pos)
+            {
+              WRONG ("stdc++ operator>> wrong position, check_mpz");
+              cout << "  want_pos: " << want_pos << "\n";
+              cout << "  got_pos:  " << got_pos << "\n";
+            }
+        }
+
+      {
+        istringstream  input (data[i].input);
+        input.flags (data[i].flags);
+        init_tellg = input.tellg();
+
+        mpz_set_ui (got, 0xDEAD);
+        input >> got;
+        got_ok = (input ? 1 : 0);
+        input.clear();
+        got_pos = input.tellg() - init_tellg;
+
+        if (got_ok != want_ok)
+          {
+            WRONG ("mpz operator>> wrong status");
+            cout << "  want_ok: " << want_ok << "\n";
+            cout << "  got_ok:  " << got_ok << "\n";
+            abort ();
+          }
+        if (want_ok && mpz_cmp (got, want) != 0)
+          {
+            WRONG ("mpz operator>> wrong result");
+            mpz_trace ("  got ", got);
+            mpz_trace ("  want", want);
+            abort ();
+          }
+        if (putback_tellg_works && got_pos != want_pos)
+          {
+            WRONG ("mpz operator>> wrong position");
+            cout << "  want_pos: " << want_pos << "\n";
+            cout << "  got_pos:  " << got_pos << "\n";
+            abort ();
+          }
+      }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+void
+check_mpq (void)
+{
+  static const struct {
+    const char     *input;
+    int            want_pos;
+    const char     *want;
+    ios::fmtflags  flags;
+
+  } data[] = {
+
+    { "0",   -1, "0", (ios::fmtflags) 0 },
+    { "00",  -1, "0", (ios::fmtflags) 0 },
+    { "0x0", -1, "0", (ios::fmtflags) 0 },
+
+    { "123/456",   -1, "123/456", ios::dec },
+    { "0123/456",  -1, "123/456", ios::dec },
+    { "123/0456",  -1, "123/456", ios::dec },
+    { "0123/0456", -1, "123/456", ios::dec },
+
+    { "123/456",   -1, "83/302", ios::oct },
+    { "0123/456",  -1, "83/302", ios::oct },
+    { "123/0456",  -1, "83/302", ios::oct },
+    { "0123/0456", -1, "83/302", ios::oct },
+
+    { "ab",   -1, "171",  ios::hex },
+    { "cd",   -1, "205",  ios::hex },
+    { "ef",   -1, "239",  ios::hex },
+
+    { "0/0",     -1, "0/0", (ios::fmtflags) 0 },
+    { "5/8",     -1, "5/8", (ios::fmtflags) 0 },
+    { "0x5/0x8", -1, "5/8", (ios::fmtflags) 0 },
+
+    { "123/456",   -1, "123/456",  (ios::fmtflags) 0 },
+    { "123/0456",  -1, "123/302",  (ios::fmtflags) 0 },
+    { "123/0x456", -1, "123/1110", (ios::fmtflags) 0 },
+    { "123/0X456", -1, "123/1110", (ios::fmtflags) 0 },
+
+    { "0123/123",   -1, "83/123", (ios::fmtflags) 0 },
+    { "0123/0123",  -1, "83/83",  (ios::fmtflags) 0 },
+    { "0123/0x123", -1, "83/291", (ios::fmtflags) 0 },
+    { "0123/0X123", -1, "83/291", (ios::fmtflags) 0 },
+
+    { "0x123/123",   -1, "291/123", (ios::fmtflags) 0 },
+    { "0X123/0123",  -1, "291/83",  (ios::fmtflags) 0 },
+    { "0x123/0x123", -1, "291/291", (ios::fmtflags) 0 },
+
+    { " 123",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws
+    { " 123", -1, "123", ios::skipws },
+  };
+
+  mpq_t      got, want;
+  int        got_ok, want_ok;
+  long       got_si, want_si;
+  streampos  init_tellg, got_pos, want_pos;
+
+  mpq_init (got);
+  mpq_init (want);
+
+  for (size_t i = 0; i < numberof (data); i++)
+    {
+      want_pos = (data[i].want_pos == -1
+                  ? strlen (data[i].input) : data[i].want_pos);
+
+      want_ok = (data[i].want != NULL);
+
+      if (data[i].want != NULL)
+        mpq_set_str_or_abort (want, data[i].want, 0);
+      else
+        mpq_set_ui (want, 0L, 1L);
+
+      if (option_check_standard
+          && mpz_fits_slong_p (mpq_numref(want))
+          && mpz_cmp_ui (mpq_denref(want), 1L) == 0)
+        {
+          istringstream  input (data[i].input);
+          input.flags (data[i].flags);
+          init_tellg = input.tellg();
+          want_si = mpz_get_si (mpq_numref(want));
+
+          input >> got_si;
+          got_ok = (input ? 1 : 0);
+          input.clear();
+          got_pos = input.tellg() - init_tellg;
+
+          if (got_ok != want_ok)
+            {
+              WRONG ("stdc++ operator>> wrong status, check_mpq");
+              cout << "  want_ok: " << want_ok << "\n";
+              cout << "  got_ok:  " << got_ok << "\n";
+            }
+          if (want_ok && want_si != got_si)
+            {
+              WRONG ("stdc++ operator>> wrong result, check_mpq");
+              cout << "  got_si:  " << got_si << "\n";
+              cout << "  want_si: " << want_si << "\n";
+            }
+          if (putback_tellg_works && got_pos != want_pos)
+            {
+              WRONG ("stdc++ operator>> wrong position, check_mpq");
+              cout << "  want_pos: " << want_pos << "\n";
+              cout << "  got_pos:  " << got_pos << "\n";
+            }
+        }
+
+      {
+        istringstream  input (data[i].input);
+        input.flags (data[i].flags);
+        init_tellg = input.tellg();
+        mpq_set_si (got, 0xDEAD, 0xBEEF);
+
+        input >> got;
+        got_ok = (input ? 1 : 0);
+        input.clear();
+        got_pos = input.tellg() - init_tellg;
+
+        if (got_ok != want_ok)
+          {
+            WRONG ("mpq operator>> wrong status");
+            cout << "  want_ok: " << want_ok << "\n";
+            cout << "  got_ok:  " << got_ok << "\n";
+            abort ();
+          }
+        // don't use mpq_equal, since we allow non-normalized values to be
+        // read, which can trigger ASSERTs in mpq_equal
+        if (want_ok && (mpz_cmp (mpq_numref (got), mpq_numref(want)) != 0
+                        || mpz_cmp (mpq_denref (got), mpq_denref(want)) != 0))
+          {
+            WRONG ("mpq operator>> wrong result");
+            mpq_trace ("  got ", got);
+            mpq_trace ("  want", want);
+            abort ();
+          }
+        if (putback_tellg_works && got_pos != want_pos)
+          {
+            WRONG ("mpq operator>> wrong position");
+            cout << "  want_pos: " << want_pos << "\n";
+            cout << "  got_pos:  " << got_pos << "\n";
+            abort ();
+          }
+      }
+    }
+
+  mpq_clear (got);
+  mpq_clear (want);
+}
+
+
+void
+check_mpf (void)
+{
+  static const struct {
+    const char     *input;
+    int            want_pos;
+    const char     *want;
+    ios::fmtflags  flags;
+
+  } data[] = {
+
+    { "0",      -1, "0", (ios::fmtflags) 0 },
+    { "+0",     -1, "0", (ios::fmtflags) 0 },
+    { "-0",     -1, "0", (ios::fmtflags) 0 },
+    { "0.0",    -1, "0", (ios::fmtflags) 0 },
+    { "0.",     -1, "0", (ios::fmtflags) 0 },
+    { ".0",     -1, "0", (ios::fmtflags) 0 },
+    { "+.0",    -1, "0", (ios::fmtflags) 0 },
+    { "-.0",    -1, "0", (ios::fmtflags) 0 },
+    { "+0.00",  -1, "0", (ios::fmtflags) 0 },
+    { "-0.000", -1, "0", (ios::fmtflags) 0 },
+    { "+0.00",  -1, "0", (ios::fmtflags) 0 },
+    { "-0.000", -1, "0", (ios::fmtflags) 0 },
+    { "0.0e0",  -1, "0", (ios::fmtflags) 0 },
+    { "0.e0",   -1, "0", (ios::fmtflags) 0 },
+    { ".0e0",   -1, "0", (ios::fmtflags) 0 },
+    { "0.0e-0", -1, "0", (ios::fmtflags) 0 },
+    { "0.e-0",  -1, "0", (ios::fmtflags) 0 },
+    { ".0e-0",  -1, "0", (ios::fmtflags) 0 },
+    { "0.0e+0", -1, "0", (ios::fmtflags) 0 },
+    { "0.e+0",  -1, "0", (ios::fmtflags) 0 },
+    { ".0e+0",  -1, "0", (ios::fmtflags) 0 },
+
+    { "1",  -1,  "1", (ios::fmtflags) 0 },
+    { "+1", -1,  "1", (ios::fmtflags) 0 },
+    { "-1", -1, "-1", (ios::fmtflags) 0 },
+
+    { " 0",  0,  NULL, (ios::fmtflags) 0 },  // not without skipws
+    { " 0",  -1, "0", ios::skipws },
+    { " +0", -1, "0", ios::skipws },
+    { " -0", -1, "0", ios::skipws },
+
+    { "+-123", 1, NULL, (ios::fmtflags) 0 },
+    { "-+123", 1, NULL, (ios::fmtflags) 0 },
+    { "1e+-123", 3, NULL, (ios::fmtflags) 0 },
+    { "1e-+123", 3, NULL, (ios::fmtflags) 0 },
+
+    { "e123",   0, NULL, (ios::fmtflags) 0 }, // at least one mantissa digit
+    { ".e123",  1, NULL, (ios::fmtflags) 0 },
+    { "+.e123", 2, NULL, (ios::fmtflags) 0 },
+    { "-.e123", 2, NULL, (ios::fmtflags) 0 },
+
+    { "123e",   4, NULL, (ios::fmtflags) 0 }, // at least one exponent digit
+    { "123e-",  5, NULL, (ios::fmtflags) 0 },
+    { "123e+",  5, NULL, (ios::fmtflags) 0 },
+  };
+
+  mpf_t      got, want;
+  int        got_ok, want_ok;
+  double     got_d, want_d;
+  streampos  init_tellg, got_pos, want_pos;
+
+  mpf_init (got);
+  mpf_init (want);
+
+  for (size_t i = 0; i < numberof (data); i++)
+    {
+      want_pos = (data[i].want_pos == -1
+                  ? strlen (data[i].input) : data[i].want_pos);
+
+      want_ok = (data[i].want != NULL);
+
+      if (data[i].want != NULL)
+        mpf_set_str_or_abort (want, data[i].want, 0);
+      else
+        mpf_set_ui (want, 0L);
+
+      want_d = mpf_get_d (want);
+      if (option_check_standard && mpf_cmp_d (want, want_d) == 0)
+        {
+          istringstream  input (data[i].input);
+          input.flags (data[i].flags);
+          init_tellg = input.tellg();
+
+          input >> got_d;
+          got_ok = (input ? 1 : 0);
+          input.clear();
+          got_pos = input.tellg() - init_tellg;
+
+          if (got_ok != want_ok)
+            {
+              WRONG ("stdc++ operator>> wrong status, check_mpf");
+              cout << "  want_ok: " << want_ok << "\n";
+              cout << "  got_ok:  " << got_ok << "\n";
+            }
+          if (want_ok && want_d != got_d)
+            {
+              WRONG ("stdc++ operator>> wrong result, check_mpf");
+              cout << "  got:   " << got_d << "\n";
+              cout << "  want:  " << want_d << "\n";
+            }
+          if (putback_tellg_works && got_pos != want_pos)
+            {
+              WRONG ("stdc++ operator>> wrong position, check_mpf");
+              cout << "  want_pos: " << want_pos << "\n";
+              cout << "  got_pos:  " << got_pos << "\n";
+            }
+        }
+
+      {
+        istringstream  input (data[i].input);
+        input.flags (data[i].flags);
+        init_tellg = input.tellg();
+
+        mpf_set_ui (got, 0xDEAD);
+        input >> got;
+        got_ok = (input ? 1 : 0);
+        input.clear();
+        got_pos = input.tellg() - init_tellg;
+
+        if (got_ok != want_ok)
+          {
+            WRONG ("mpf operator>> wrong status");
+            cout << "  want_ok: " << want_ok << "\n";
+            cout << "  got_ok:  " << got_ok << "\n";
+            abort ();
+          }
+        if (want_ok && mpf_cmp (got, want) != 0)
+          {
+            WRONG ("mpf operator>> wrong result");
+            mpf_trace ("  got ", got);
+            mpf_trace ("  want", want);
+            abort ();
+          }
+        if (putback_tellg_works && got_pos != want_pos)
+          {
+            WRONG ("mpf operator>> wrong position");
+            cout << "  want_pos: " << want_pos << "\n";
+            cout << "  got_pos:  " << got_pos << "\n";
+            abort ();
+          }
+      }
+    }
+
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_check_standard = 1;
+
+  tests_start ();
+
+  check_putback_tellg ();
+  check_mpz ();
+  check_mpq ();
+  check_mpf ();
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/cxx/t-locale.cc b/tests/cxx/t-locale.cc

new file mode 100644 (file)

index 0000000..24e08ff
--- /dev/null
+++ b/tests/cxx/t-locale.cc
@@ -0,0 +1,195 @@
+/* Test locale support in C++ functions.
+
+Copyright 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <clocale>
+#include <iostream>
+#include <cstdlib>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+extern "C" {
+  char point_string[2];
+}
+
+#if HAVE_STD__LOCALE
+// Like std::numpunct, but with decimal_point coming from point_string[].
+class my_numpunct : public numpunct<char> {
+ public:
+  explicit my_numpunct (size_t r = 0) : numpunct<char>(r) { }
+ protected:
+  char do_decimal_point() const { return point_string[0]; }
+};
+#endif
+
+void
+set_point (char c)
+{
+  point_string[0] = c;
+
+#if HAVE_STD__LOCALE
+  locale loc (locale::classic(), new my_numpunct ());
+  locale::global (loc);
+#endif
+}
+
+
+void
+check_input (void)
+{
+  static const struct {
+    const char  *str1;
+    const char  *str2;
+    double      want;
+  } data[] = {
+
+    { "1","",   1.0 },
+    { "1","0",  1.0 },
+    { "1","00", 1.0 },
+
+    { "","5",    0.5 },
+    { "0","5",   0.5 },
+    { "00","5",  0.5 },
+    { "00","50", 0.5 },
+
+    { "1","5",    1.5 },
+    { "1","5e1", 15.0 },
+  };
+
+  static char point[] = {
+    '.', ',', 'x', '\xFF'
+  };
+
+  mpf_t  got;
+  mpf_init (got);
+
+  for (size_t i = 0; i < numberof (point); i++)
+    {
+      set_point (point[i]);
+
+      for (int neg = 0; neg <= 1; neg++)
+        {
+          for (size_t j = 0; j < numberof (data); j++)
+            {
+              string str = string(data[j].str1)+point[i]+string(data[j].str2);
+              if (neg)
+                str = "-" + str;
+
+              istringstream is (str.c_str());
+
+              mpf_set_ui (got, 123);   // dummy initial value
+
+              if (! (is >> got))
+                {
+                  cout << "istream mpf_t operator>> error\n";
+                  cout << "  point " << point[i] << "\n";
+                  cout << "  str   \"" << str << "\"\n";
+                  cout << "  localeconv point \""
+                       << localeconv()->decimal_point << "\"\n";
+                  abort ();
+                }
+
+              double want = data[j].want;
+              if (neg)
+                want = -want;
+              if (mpf_cmp_d (got, want) != 0)
+                {
+                  cout << "istream mpf_t operator>> wrong\n";
+                  cout << "  point " << point[i] << "\n";
+                  cout << "  str   \"" << str << "\"\n";
+                  cout << "  got   " << got << "\n";
+                  cout << "  want  " << want << "\n";
+                  cout << "  localeconv point \""
+                       << localeconv()->decimal_point << "\"\n";
+                  abort ();
+                }
+            }
+        }
+    }
+
+  mpf_clear (got);
+}
+
+void
+check_output (void)
+{
+  static char point[] = {
+    '.', ',', 'x', '\xFF'
+  };
+
+  for (size_t i = 0; i < numberof (point); i++)
+    {
+      set_point (point[i]);
+      ostringstream  got;
+
+      mpf_t  f;
+      mpf_init (f);
+      mpf_set_d (f, 1.5);
+      got << f;
+      mpf_clear (f);
+
+      string  want = string("1") + point[i] + string("5");
+
+      if (want.compare (got.str()) != 0)
+        {
+          cout << "ostream mpf_t operator<< doesn't respect locale\n";
+          cout << "  point " << point[i] << "\n";
+          cout << "  got   \"" << got.str() << "\"\n";
+          cout << "  want  \"" << want      << "\"\n";
+          abort ();
+        }
+    }
+}
+
+int
+replacement_works (void)
+{
+  set_point ('x');
+  mpf_t  f;
+  mpf_init (f);
+  mpf_set_d (f, 1.5);
+  ostringstream s;
+  s << f;
+  mpf_clear (f);
+
+  return (s.str().compare("1x5") == 0);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  if (replacement_works())
+    {
+      check_input ();
+      check_output ();
+    }
+  else
+    {
+      cout << "Replacing decimal point didn't work, tests skipped\n";
+    }
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/cxx/t-misc.cc b/tests/cxx/t-misc.cc

new file mode 100644 (file)

index 0000000..19c5499
--- /dev/null
+++ b/tests/cxx/t-misc.cc
@@ -0,0 +1,385 @@
+/* Test mp*_class functions.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Note that we don't use <climits> for LONG_MIN, but instead our own
+   definitions in gmp-impl.h.  In g++ 2.95.4 (debian 3.0) under
+   -mcpu=ultrasparc, limits.h sees __sparc_v9__ defined and assumes that
+   means long is 64-bit long, but it's only 32-bits, causing fatal compile
+   errors.  */
+
+#include "config.h"
+
+#include <string>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // mpz_class::fits_sint_p
+  {
+    int        fits;
+    mpz_class  z;
+    z = INT_MIN; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);
+    z--;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);
+    z = INT_MAX; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);
+    z++;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_uint_p
+  {
+    int        fits;
+    mpz_class  z;
+    z = 0;        fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);
+    z--;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);
+    z = UINT_MAX; fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);
+    z++;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_slong_p
+  {
+    int        fits;
+    mpz_class  z;
+    z = LONG_MIN; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);
+    z--;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);
+    z = LONG_MAX; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);
+    z++;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_ulong_p
+  {
+    int        fits;
+    mpz_class  z;
+    z = 0;         fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    z--;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+    z = ULONG_MAX; fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    z++;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_sshort_p
+  {
+    int        fits;
+    mpz_class  z;
+    z = SHRT_MIN; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    z--;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+    z = SHRT_MAX; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    z++;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::fits_ushort_p
+  {
+    int        fits;
+    mpz_class  z;
+    z = 0;         fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    z--;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+    z = USHRT_MAX; fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    z++;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpz_class::get_mpz_t
+  {
+    mpz_class  z(0);
+    mpz_ptr    p = z.get_mpz_t();
+    ASSERT_ALWAYS (mpz_cmp_ui (p, 0) == 0);
+  }
+  {
+    mpz_class  z(0);
+    mpz_srcptr p = z.get_mpz_t();
+    ASSERT_ALWAYS (mpz_cmp_ui (p, 0) == 0);
+  }
+
+  // mpz_class::get_d
+  // mpz_class::get_si
+  // mpz_class::get_ui
+  {
+    mpz_class  z(123);
+    { double d = z.get_d();  ASSERT_ALWAYS (d == 123.0); }
+    { long   l = z.get_si(); ASSERT_ALWAYS (l == 123L); }
+    { long   u = z.get_ui(); ASSERT_ALWAYS (u == 123L); }
+  }
+  {
+    mpz_class  z(-123);
+    { double d = z.get_d();  ASSERT_ALWAYS (d == -123.0); }
+    { long   l = z.get_si(); ASSERT_ALWAYS (l == -123L); }
+  }
+
+  // mpz_class::get_str
+  {
+    mpz_class  z(123);
+    string     s;
+    s = z.get_str(); ASSERT_ALWAYS (s == "123");
+    s = z.get_str(16); ASSERT_ALWAYS (s == "7b");
+    s = z.get_str(-16); ASSERT_ALWAYS (s == "7B");
+  }
+
+  // mpz_class::set_str
+  {
+    mpz_class  z;
+    int        ret;
+    ret = z.set_str ("123", 10);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str ("7b",  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str ("7B",  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str ("0x7B", 0);  ASSERT_ALWAYS (ret == 0 && z == 123);
+
+    ret = z.set_str (string("123"), 10);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str (string("7b"),  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str (string("7B"),  16);  ASSERT_ALWAYS (ret == 0 && z == 123);
+    ret = z.set_str (string("0x7B"), 0);  ASSERT_ALWAYS (ret == 0 && z == 123);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // mpq_class::canonicalize
+  {
+    mpq_class  q(12,9);
+    q.canonicalize();
+    ASSERT_ALWAYS (q.get_num() == 4);
+    ASSERT_ALWAYS (q.get_den() == 3);
+  }
+
+  // mpq_class::get_d
+  {
+    mpq_class  q(123);
+    { double d = q.get_d();  ASSERT_ALWAYS (d == 123.0); }
+  }
+  {
+    mpq_class  q(-123);
+    { double d = q.get_d();  ASSERT_ALWAYS (d == -123.0); }
+  }
+
+  // mpq_class::get_mpq_t
+  {
+    mpq_class  q(0);
+    mpq_ptr    p = q.get_mpq_t();
+    ASSERT_ALWAYS (mpq_cmp_ui (p, 0, 1) == 0);
+  }
+  {
+    mpq_class  q(0);
+    mpq_srcptr p = q.get_mpq_t();
+    ASSERT_ALWAYS (mpq_cmp_ui (p, 0, 1) == 0);
+  }
+
+  // mpq_class::get_num, mpq_class::get_den
+  {
+    mpq_class  q(4,5);
+    mpz_class  z;
+    z = q.get_num(); ASSERT_ALWAYS (z == 4);
+    z = q.get_den(); ASSERT_ALWAYS (z == 5);
+  }
+
+  // mpq_class::get_num_mpz_t, mpq_class::get_den_mpz_t
+  {
+    mpq_class  q(4,5);
+    mpz_ptr    p;
+    p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);
+    p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
+  }
+  {
+    mpq_class  q(4,5);
+    mpz_srcptr p;
+    p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);
+    p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
+  }
+
+  // mpq_class::get_str
+  {
+    mpq_class  q(17,11);
+    string     s;
+    s = q.get_str();    ASSERT_ALWAYS (s == "17/11");
+    s = q.get_str(10);  ASSERT_ALWAYS (s == "17/11");
+    s = q.get_str(16);  ASSERT_ALWAYS (s == "11/b");
+    s = q.get_str(-16); ASSERT_ALWAYS (s == "11/B");
+  }
+
+  // mpq_class::set_str
+  {
+    mpq_class  q;
+    int        ret;
+    ret = q.set_str ("123", 10);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("4/5", 10);     ASSERT_ALWAYS (ret == 0 && q == mpq_class(4,5));
+    ret = q.set_str ("7b",  16);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("7B",  16);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("0x7B", 0);     ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str ("0x10/17", 0);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(16,17));
+
+    ret = q.set_str (string("4/5"), 10);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(4,5));
+    ret = q.set_str (string("123"), 10);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("7b"),  16);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("7B"),  16);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("0x7B"), 0);  ASSERT_ALWAYS (ret == 0 && q == 123);
+    ret = q.set_str (string("0x10/17"), 0);  ASSERT_ALWAYS (ret == 0 && q == mpq_class(16,17));
+  }
+}
+
+void
+check_mpf (void)
+{
+  // mpf_class::fits_sint_p
+  {
+    int        fits;
+    mpf_class  f (0, 2*8*sizeof(int));
+    f = INT_MIN; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);
+    f--;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);
+    f = INT_MAX; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);
+    f++;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_uint_p
+  {
+    int        fits;
+    mpf_class  f (0, 2*8*sizeof(int));
+    f = 0;        fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);
+    f--;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);
+    f = UINT_MAX; fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);
+    f++;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_slong_p
+  {
+    int        fits;
+    mpf_class  f (0, 2*8*sizeof(long));
+    f = LONG_MIN; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);
+    f--;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);
+    f = LONG_MAX; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);
+    f++;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_ulong_p
+  {
+    int        fits;
+    mpf_class  f (0, 2*8*sizeof(long));
+    f = 0;         fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    f--;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+    f = ULONG_MAX; fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);
+    f++;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_sshort_p
+  {
+    int        fits;
+    mpf_class  f (0, 2*8*sizeof(short));
+    f = SHRT_MIN; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    f--;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+    f = SHRT_MAX; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);
+    f++;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::fits_ushort_p
+  {
+    int        fits;
+    mpf_class  f (0, 2*8*sizeof(short));
+    f = 0;         fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    f--;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+    f = USHRT_MAX; fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);
+    f++;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);
+  }
+
+  // mpf_class::get_d
+  // mpf_class::get_si
+  // mpf_class::get_ui
+  {
+    mpf_class  f(123);
+    { double d = f.get_d();  ASSERT_ALWAYS (d == 123.0); }
+    { long   l = f.get_si(); ASSERT_ALWAYS (l == 123L); }
+    { long   u = f.get_ui(); ASSERT_ALWAYS (u == 123L); }
+  }
+  {
+    mpf_class  f(-123);
+    { double d = f.get_d();  ASSERT_ALWAYS (d == -123.0); }
+    { long   l = f.get_si(); ASSERT_ALWAYS (l == -123L); }
+  }
+
+  // mpf_class::get_prec
+  {
+    mpf_class  f;
+    ASSERT_ALWAYS (f.get_prec() == mpf_get_default_prec());
+  }
+
+  // mpf_class::get_str
+  {
+    mpf_class  f(123);
+    string     s;
+    mp_exp_t   e;
+    s = f.get_str(e);        ASSERT_ALWAYS (s == "123" && e == 3);
+    s = f.get_str(e,  16);   ASSERT_ALWAYS (s == "7b"  && e == 2);
+    s = f.get_str(e, -16);   ASSERT_ALWAYS (s == "7B"  && e == 2);
+    s = f.get_str(e, 10, 2); ASSERT_ALWAYS (s == "12"  && e == 3);
+    s = f.get_str(e, 10, 1); ASSERT_ALWAYS (s == "1"   && e == 3);
+  }
+
+  // mpf_class::set_str
+  {
+    mpf_class  f;
+    int        ret;
+    ret = f.set_str ("123",     10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("123e1",   10);  ASSERT_ALWAYS (ret == 0 && f == 1230);
+    ret = f.set_str ("1230e-1", 10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("7b",      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("7B",      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str ("7B@1",    16);  ASSERT_ALWAYS (ret == 0 && f == 1968);
+    ret = f.set_str ("7B0@-1",  16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+
+    ret = f.set_str (string("123"),     10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("123e1"),   10);  ASSERT_ALWAYS (ret == 0 && f == 1230);
+    ret = f.set_str (string("1230e-1"), 10);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("7b"),      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("7B"),      16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+    ret = f.set_str (string("7B@1"),    16);  ASSERT_ALWAYS (ret == 0 && f == 1968);
+    ret = f.set_str (string("7B0@-1"),  16);  ASSERT_ALWAYS (ret == 0 && f == 123);
+  }
+
+  // mpf_class::set_prec
+  {
+    mpf_class  f;
+    f.set_prec (256);
+    ASSERT_ALWAYS (f.get_prec () >= 256);
+  }
+
+  // mpf_class::set_prec_raw
+  {
+    mpf_class  f (0, 100 * GMP_NUMB_BITS);
+    f.set_prec_raw (5 * GMP_NUMB_BITS);
+    ASSERT_ALWAYS (f.get_prec () >= 5 * GMP_NUMB_BITS);
+    ASSERT_ALWAYS (f.get_prec () < 100 * GMP_NUMB_BITS);
+    f.set_prec_raw (100 * GMP_NUMB_BITS);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-ops.cc b/tests/cxx/t-ops.cc

new file mode 100644 (file)

index 0000000..01fe3b0
--- /dev/null
+++ b/tests/cxx/t-ops.cc
@@ -0,0 +1,720 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // unary operators and functions
+
+  // operator+
+  {
+    mpz_class a(1);
+    mpz_class b;
+    b = +a; ASSERT_ALWAYS(b == 1);
+  }
+
+  // operator-
+  {
+    mpz_class a(2);
+    mpz_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // operator~
+  {
+    mpz_class a(3);
+    mpz_class b;
+    b = ~a; ASSERT_ALWAYS(b == -4);
+  }
+
+  // abs
+  {
+    mpz_class a(-123);
+    mpz_class b;
+    b = abs(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // sqrt
+  {
+    mpz_class a(25);
+    mpz_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 5);
+  }
+  {
+    mpz_class a(125);
+    mpz_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 11); // round toward zero
+  }
+
+  // sgn
+  {
+    mpz_class a(123);
+    int b = sgn(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpz_class a(0);
+    int b = sgn(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    mpz_class a(-123);
+    int b = sgn(a); ASSERT_ALWAYS(b == -1);
+  }
+
+
+  // binary operators and functions
+
+  // operator+
+  {
+    mpz_class a(1), b(2);
+    mpz_class c;
+    c = a + b; ASSERT_ALWAYS(c == 3);
+  }
+  {
+    mpz_class a(3);
+    signed int b = 4;
+    mpz_class c;
+    c = a + b; ASSERT_ALWAYS(c == 7);
+  }
+  {
+    mpz_class a(5);
+    double b = 6.0;
+    mpz_class c;
+    c = b + a; ASSERT_ALWAYS(c == 11);
+  }
+
+  // operator-
+  {
+    mpz_class a(3), b(6);
+    mpz_class c;
+    c = a - b; ASSERT_ALWAYS(c == -3);
+  }
+
+  // operator*
+  {
+    mpz_class a(-2), b(4);
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == -8);
+  }
+  {
+    mpz_class a(2);
+    long b = -4;
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == -8);
+    c = b * a; ASSERT_ALWAYS(c == -8);
+  }
+  {
+    mpz_class a(-2);
+    unsigned long b = 4;
+    mpz_class c;
+    c = a * b; ASSERT_ALWAYS(c == -8);
+    c = b * a; ASSERT_ALWAYS(c == -8);
+  }
+
+  // operator/ and operator%
+  {
+    mpz_class a(12), b(4);
+    mpz_class c;
+    c = a / b; ASSERT_ALWAYS(c == 3);
+    c = a % b; ASSERT_ALWAYS(c == 0);
+  }
+  {
+    mpz_class a(7), b(5);
+    mpz_class c;
+    c = a / b; ASSERT_ALWAYS(c == 1);
+    c = a % b; ASSERT_ALWAYS(c == 2);
+  }
+  {
+    mpz_class a(-10);
+    signed int ai = -10;
+    mpz_class b(3);
+    signed int bi = 3;
+    mpz_class c;
+    c = a / b;  ASSERT_ALWAYS(c == -3);
+    c = a % b;  ASSERT_ALWAYS(c == -1);
+    c = a / bi; ASSERT_ALWAYS(c == -3);
+    c = a % bi; ASSERT_ALWAYS(c == -1);
+    c = ai / b; ASSERT_ALWAYS(c == -3);
+    c = ai % b; ASSERT_ALWAYS(c == -1);
+  }
+  {
+    mpz_class a(-10);
+    signed int ai = -10;
+    mpz_class b(-3);
+    signed int bi = -3;
+    mpz_class c;
+    c = a / b;  ASSERT_ALWAYS(c == 3);
+    c = a % b;  ASSERT_ALWAYS(c == -1);
+    c = a / bi; ASSERT_ALWAYS(c == 3);
+    c = a % bi; ASSERT_ALWAYS(c == -1);
+    c = ai / b; ASSERT_ALWAYS(c == 3);
+    c = ai % b; ASSERT_ALWAYS(c == -1);
+  }
+  {
+    mpz_class a (LONG_MIN);
+    signed long ai = LONG_MIN;
+    mpz_class b = - mpz_class (LONG_MIN);
+    mpz_class c;
+    c = a / b;  ASSERT_ALWAYS(c == -1);
+    c = a % b;  ASSERT_ALWAYS(c == 0);
+    c = ai / b; ASSERT_ALWAYS(c == -1);
+    c = ai % b; ASSERT_ALWAYS(c == 0);
+  }
+
+  // operator&
+  // operator|
+  // operator^
+
+  // operator<<
+  {
+    mpz_class a(3);
+    unsigned int b = 4;
+    mpz_class c;
+    c = a << b; ASSERT_ALWAYS(c == 48);
+  }
+
+  // operator>>
+  {
+    mpz_class a(127);
+    unsigned int b = 4;
+    mpz_class c;
+    c = a >> b; ASSERT_ALWAYS(c == 7);
+  }
+
+  // operator==
+  // operator!=
+  // operator<
+  // operator<=
+  // operator>
+  // operator>=
+
+  // cmp
+  {
+    mpz_class a(123), b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpz_class a(123);
+    unsigned long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpz_class a(123);
+    long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpz_class a(123);
+    double b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+
+
+  // ternary operators
+
+  // mpz_addmul
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(3);
+    unsigned int c = 2;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(3);
+    signed int c = 2;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(3);
+    double c = 2.0;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b + c; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(4);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(3), b(4);
+    unsigned int c = 2;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(4);
+    signed int c = 3;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(3), b(4);
+    signed int c = 2;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(4);
+    double c = 3.0;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(3), b(4);
+    double c = 2.0;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 10);
+  }
+
+  // mpz_submul
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(3);
+    unsigned int c = 2;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(3);
+    signed int c = 2;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+  {
+    mpz_class a(1), b(3);
+    double c = 2.0;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b - c; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(2), b(4);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(3), b(4);
+    unsigned int c = 2;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(2), b(4);
+    signed int c = 3;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(3), b(4);
+    signed int c = 2;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(2), b(4);
+    double c = 3.0;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 2);
+  }
+  {
+    mpz_class a(3), b(4);
+    double c = 2.0;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 2);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // unary operators and functions
+
+  // operator+
+  {
+    mpq_class a(1, 2);
+    mpq_class b;
+    b = +a; ASSERT_ALWAYS(b == 0.5);
+  }
+
+  // operator-
+  {
+    mpq_class a(3, 4);
+    mpq_class b;
+    b = -a; ASSERT_ALWAYS(b == -0.75);
+  }
+
+  // abs
+  {
+    mpq_class a(-123);
+    mpq_class b;
+    b = abs(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // sgn
+  {
+    mpq_class a(123);
+    int b = sgn(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpq_class a(0);
+    int b = sgn(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    mpq_class a(-123);
+    int b = sgn(a); ASSERT_ALWAYS(b == -1);
+  }
+
+
+  // binary operators and functions
+
+  // operator+
+  {
+    mpq_class a(1, 2), b(3, 4);
+    mpq_class c;
+    c = a + b; ASSERT_ALWAYS(c == 1.25);
+  }
+  {
+    mpq_class a(1, 2);
+    signed int b = 2;
+    mpq_class c;
+    c = a + b; ASSERT_ALWAYS(c == 2.5);
+  }
+  {
+    mpq_class a(1, 2);
+    double b = 1.5;
+    mpq_class c;
+    c = b + a; ASSERT_ALWAYS(c == 2);
+  }
+
+  // operator-
+  {
+    mpq_class a(1, 2), b(3, 4);
+    mpq_class c;
+    c = a - b; ASSERT_ALWAYS(c == -0.25);
+  }
+
+  // operator*
+  {
+    mpq_class a(1, 3), b(3, 4);
+    mpq_class c;
+    c = a * b; ASSERT_ALWAYS(c == 0.25);
+  }
+
+  // operator/
+  {
+    mpq_class a(1, 2), b(2, 3);
+    mpq_class c;
+    c = a / b; ASSERT_ALWAYS(c == 0.75);
+  }
+
+  // operator<<
+  // operator>>
+  // operator==
+  // operator!=
+  // operator<
+  // operator<=
+  // operator>
+  // operator>=
+
+  // cmp
+  {
+    mpq_class a(123), b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    unsigned long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpq_class a(123);
+    double b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // unary operators and functions
+
+  // operator+
+  {
+    mpf_class a(1);
+    mpf_class b;
+    b = +a; ASSERT_ALWAYS(b == 1);
+  }
+
+  // operator-
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // abs
+  {
+    mpf_class a(-123);
+    mpf_class b;
+    b = abs(a); ASSERT_ALWAYS(b == 123);
+  }
+
+  // trunc
+  {
+    mpf_class a(1.5);
+    mpf_class b;
+    b = trunc(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(-1.5);
+    mpf_class b;
+    b = trunc(a); ASSERT_ALWAYS(b == -1);
+  }
+
+  // floor
+  {
+    mpf_class a(1.9);
+    mpf_class b;
+    b = floor(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(-1.1);
+    mpf_class b;
+    b = floor(a); ASSERT_ALWAYS(b == -2);
+  }
+
+  // ceil
+  {
+    mpf_class a(1.1);
+    mpf_class b;
+    b = ceil(a); ASSERT_ALWAYS(b == 2);
+  }
+  {
+    mpf_class a(-1.9);
+    mpf_class b;
+    b = ceil(a); ASSERT_ALWAYS(b == -1);
+  }
+
+  // sqrt
+  {
+    mpf_class a(25);
+    mpf_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 5);
+  }
+  {
+    mpf_class a(2.25);
+    mpf_class b;
+    b = sqrt(a); ASSERT_ALWAYS(b == 1.5);
+  }
+
+  // sgn
+  {
+    mpf_class a(123);
+    int b = sgn(a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(0);
+    int b = sgn(a); ASSERT_ALWAYS(b == 0);
+  }
+  {
+    mpf_class a(-123);
+    int b = sgn(a); ASSERT_ALWAYS(b == -1);
+  }
+
+
+  // binary operators and functions
+
+  // operator+
+  {
+    mpf_class a(1), b(2);
+    mpf_class c;
+    c = a + b; ASSERT_ALWAYS(c == 3);
+  }
+
+  // operator-
+  {
+    mpf_class a(3), b(4);
+    mpf_class c;
+    c = a - b; ASSERT_ALWAYS(c == -1);
+  }
+
+  // operator*
+  {
+    mpf_class a(2), b(5);
+    mpf_class c;
+    c = a * b; ASSERT_ALWAYS(c == 10);
+  }
+
+  // operator/
+  {
+    mpf_class a(7), b(4);
+    mpf_class c;
+    c = a / b; ASSERT_ALWAYS(c == 1.75);
+  }
+
+  // operator<<
+  // operator>>
+  // operator==
+  // operator!=
+  // operator<
+  // operator<=
+  // operator>
+  // operator>=
+
+  // hypot
+  {
+    mpf_class a(3), b(4);
+    mpf_class c;
+    c = hypot(a, b); ASSERT_ALWAYS(c == 5);
+  }
+
+  // cmp
+  {
+    mpf_class a(123), b(45);
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    unsigned long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    long b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+  {
+    mpf_class a(123);
+    double b = 45;
+    int c;
+    c = cmp(a, b); ASSERT_ALWAYS(c > 0);
+    c = cmp(b, a); ASSERT_ALWAYS(c < 0);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-ostream.cc b/tests/cxx/t-ostream.cc

new file mode 100644 (file)

index 0000000..0eeb7be
--- /dev/null
+++ b/tests/cxx/t-ostream.cc
@@ -0,0 +1,450 @@
+/* Test ostream formatted output.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <iostream>
+#include <cstdlib>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+int   option_check_standard = 0;
+
+
+#define CALL(expr)                                                     \
+  do {                                                                 \
+    got.flags (data[i].flags);                                         \
+    got.width (data[i].width);                                         \
+    got.precision (data[i].precision);                                 \
+    if (data[i].fill == '\0')                                          \
+      got.fill (' ');                                                  \
+    else                                                               \
+      got.fill (data[i].fill);                                         \
+                                                                       \
+    if (! (expr))                                                      \
+      {                                                                        \
+       cout << "\"got\" output error\n";                               \
+       abort ();                                                       \
+      }                                                                        \
+    if (got.width() != 0)                                              \
+      {                                                                        \
+       cout << "\"got\" width not reset to 0\n";                       \
+       abort ();                                                       \
+      }                                                                        \
+                                                                       \
+  } while (0)
+
+
+#define DUMP()                                                         \
+  do {                                                                 \
+    cout << "  want:  |" << data[i].want << "|\n";                     \
+    cout << "  got:   |" << got.str() << "|\n";                                \
+    cout << "  width: " << data[i].width << "\n";                      \
+    cout << "  prec:  " << got.precision() << "\n";                    \
+    cout << "  flags: " << hex << (unsigned long) got.flags() << "\n"; \
+  } while (0)
+
+#define ABORT() \
+  do {          \
+    DUMP ();    \
+    abort ();   \
+  } while (0)
+
+void
+check_mpz (void)
+{
+  static const struct {
+    const char     *z;
+    const char     *want;
+    ios::fmtflags  flags;
+    int            width;
+    int            precision;
+    char           fill;
+
+  } data[] = {
+
+    { "0", "0", ios::dec },
+
+    { "0", "0", ios::oct },
+    { "0", "0", ios::oct | ios::showbase },
+
+    { "0", "0", ios::hex },
+    { "0", "0x0", ios::hex | ios::showbase },
+    { "0", "0X0", ios::hex | ios::showbase | ios::uppercase },
+
+    { "1", "****1", ios::dec, 5, 0, '*' },
+
+    { "-1", "   -1",  ios::dec | ios::right,    5 },
+    { "-1", "-   1",  ios::dec | ios::internal, 5 },
+    { "-1", "-1   ",  ios::dec | ios::left,     5 },
+
+    { "1", "   0x1", ios::hex | ios::showbase | ios::right,    6 },
+    { "1", "0x   1", ios::hex | ios::showbase | ios::internal, 6 },
+    { "1", "0x1   ", ios::hex | ios::showbase | ios::left,     6 },
+
+    { "1", "   +0x1", ios::hex | ios::showbase | ios::showpos | ios::right,
+      7 },
+    { "1", "+0x   1", ios::hex | ios::showbase | ios::showpos | ios::internal,
+      7 },
+    { "1", "+0x1   ", ios::hex | ios::showbase | ios::showpos | ios::left,
+      7 },
+
+    {  "123",    "7b", ios::hex },
+    {  "123",    "7B", ios::hex | ios::uppercase },
+    {  "123",  "0x7b", ios::hex | ios::showbase },
+    {  "123",  "0X7B", ios::hex | ios::showbase | ios::uppercase },
+    { "-123", "-0x7b", ios::hex | ios::showbase },
+    { "-123", "-0X7B", ios::hex | ios::showbase | ios::uppercase },
+
+    {  "123",   "173", ios::oct },
+    {  "123",   "173", ios::oct | ios::uppercase },
+    {  "123",  "0173", ios::oct | ios::showbase },
+    {  "123",  "0173", ios::oct | ios::showbase | ios::uppercase },
+    { "-123", "-0173", ios::oct | ios::showbase },
+    { "-123", "-0173", ios::oct | ios::showbase | ios::uppercase },
+
+  };
+
+  size_t  i;
+  mpz_t   z;
+
+  mpz_init (z);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (z, data[i].z, 0);
+
+      if (option_check_standard
+         && mpz_fits_slong_p (z)
+
+         // no negatives or showpos in hex or oct
+         && (((data[i].flags & ios::basefield) == ios::hex
+              || (data[i].flags & ios::basefield) == ios::oct)
+             ? (mpz_sgn (z) >= 0
+                && ! (data[i].flags & ios::showpos))
+             : 1)
+         )
+       {
+         ostringstream  got;
+         long  n = mpz_get_si (z);
+         CALL (got << n);
+         if (got.str().compare (data[i].want) != 0)
+           {
+             cout << "check_mpz data[" << i
+                  << "] doesn't match standard ostream output\n";
+             cout << "  z:     " << data[i].z << "\n";
+             cout << "  n:     " << n << "\n";
+             DUMP ();
+           }
+       }
+
+      {
+       ostringstream  got;
+       CALL (got << z);
+       if (got.str().compare (data[i].want) != 0)
+         {
+           cout << "mpz operator<< wrong, data[" << i << "]\n";
+           cout << "  z:     " << data[i].z << "\n";
+           ABORT ();
+         }
+      }
+    }
+
+  mpz_clear (z);
+}
+
+void
+check_mpq (void)
+{
+  static const struct {
+    const char     *q;
+    const char     *want;
+    ios::fmtflags  flags;
+    int            width;
+    int            precision;
+    char           fill;
+
+  } data[] = {
+
+    { "0", "0", ios::dec },
+    { "0", "0", ios::hex },
+    { "0", "0x0", ios::hex | ios::showbase },
+    { "0", "0X0", ios::hex | ios::showbase | ios::uppercase },
+
+    { "5/8", "5/8", ios::dec },
+    { "5/8", "0X5/0X8", ios::hex | ios::showbase | ios::uppercase },
+
+    // zero denominator with showbase
+    { "0/0",   "       0/0", ios::oct | ios::showbase, 10 },
+    { "0/0",   "       0/0", ios::dec | ios::showbase, 10 },
+    { "0/0",   "   0x0/0x0", ios::hex | ios::showbase, 10 },
+    { "123/0", "    0173/0", ios::oct | ios::showbase, 10 },
+    { "123/0", "     123/0", ios::dec | ios::showbase, 10 },
+    { "123/0", "  0x7b/0x0", ios::hex | ios::showbase, 10 },
+    { "123/0", "  0X7B/0X0", ios::hex | ios::showbase | ios::uppercase, 10 },
+    { "0/123", "    0/0173", ios::oct | ios::showbase, 10 },
+    { "0/123", "     0/123", ios::dec | ios::showbase, 10 },
+    { "0/123", "  0x0/0x7b", ios::hex | ios::showbase, 10 },
+    { "0/123", "  0X0/0X7B", ios::hex | ios::showbase | ios::uppercase, 10 },
+  };
+
+  size_t  i;
+  mpq_t   q;
+
+  mpq_init (q);
+
+#define mpq_integer_p(q)  (mpz_cmp_ui (mpq_denref(q), 1L) == 0)
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (q, data[i].q, 0);
+      MPZ_CHECK_FORMAT (mpq_numref (q));
+      MPZ_CHECK_FORMAT (mpq_denref (q));
+
+      if (option_check_standard
+         && mpz_fits_slong_p (mpq_numref(q))
+         && mpq_integer_p (q))
+       {
+         ostringstream  got;
+         long  n = mpz_get_si (mpq_numref(q));
+         CALL (got << n);
+         if (got.str().compare (data[i].want) != 0)
+           {
+             cout << "check_mpq data[" << i
+                  << "] doesn't match standard ostream output\n";
+             cout << "  q:     " << data[i].q << "\n";
+             cout << "  n:     " << n << "\n";
+             DUMP ();
+           }
+       }
+
+      {
+       ostringstream  got;
+       CALL (got << q);
+       if (got.str().compare (data[i].want) != 0)
+         {
+           cout << "mpq operator<< wrong, data[" << i << "]\n";
+           cout << "  q:     " << data[i].q << "\n";
+           ABORT ();
+         }
+      }
+    }
+
+  mpq_clear (q);
+}
+
+
+void
+check_mpf (void)
+{
+  static const struct {
+    const char     *f;
+    const char     *want;
+    ios::fmtflags  flags;
+    int            width;
+    int            precision;
+    char           fill;
+
+  } data[] = {
+
+    { "0", "0",            ios::dec },
+    { "0", "+0",           ios::dec | ios::showpos },
+    { "0", "0.00000",      ios::dec | ios::showpoint },
+    { "0", "0",            ios::dec | ios::fixed },
+    { "0", "0.",           ios::dec | ios::fixed | ios::showpoint },
+    { "0", "0.000000e+00", ios::dec | ios::scientific },
+    { "0", "0.000000e+00", ios::dec | ios::scientific | ios::showpoint },
+
+    { "0", "0",          ios::dec, 0, 4 },
+    { "0", "0.000",      ios::dec | ios::showpoint, 0, 4 },
+    { "0", "0.0000",     ios::dec | ios::fixed, 0, 4 },
+    { "0", "0.0000",     ios::dec | ios::fixed | ios::showpoint, 0, 4 },
+    { "0", "0.0000e+00", ios::dec | ios::scientific, 0, 4 },
+    { "0", "0.0000e+00", ios::dec | ios::scientific | ios::showpoint, 0, 4 },
+
+    { "1", "1",       ios::dec },
+    { "1", "+1",      ios::dec | ios::showpos },
+    { "1", "1.00000", ios::dec | ios::showpoint },
+    { "1", "1",       ios::dec | ios::fixed },
+    { "1", "1.",      ios::dec | ios::fixed | ios::showpoint },
+    { "1", "1.000000e+00",   ios::dec | ios::scientific },
+    { "1", "1.000000e+00",  ios::dec | ios::scientific | ios::showpoint },
+
+    { "1", "1",          ios::dec,                   0, 4 },
+    { "1", "1.000",      ios::dec | ios::showpoint,  0, 4 },
+    { "1", "1.0000",     ios::dec | ios::fixed,      0, 4 },
+    { "1", "1.0000",     ios::dec | ios::fixed | ios::showpoint, 0, 4 },
+    { "1", "1.0000e+00", ios::dec | ios::scientific, 0, 4 },
+    { "1", "1.0000e+00", ios::dec | ios::scientific | ios::showpoint, 0, 4 },
+
+    { "-1", "-1",        ios::dec | ios::showpos },
+
+    { "-1", "  -1",      ios::dec, 4 },
+    { "-1", "-  1",      ios::dec | ios::internal, 4 },
+    { "-1", "-1  ",      ios::dec | ios::left, 4 },
+
+    { "-1", "  -0x1",    ios::hex | ios::showbase, 6 },
+    { "-1", "-0x  1",    ios::hex | ios::showbase | ios::internal, 6 },
+    { "-1", "-0x1  ",    ios::hex | ios::showbase | ios::left, 6 },
+
+    {    "1", "*********1", ios::dec, 10, 4, '*' },
+    { "1234", "******1234", ios::dec, 10, 4, '*' },
+    { "1234", "*****1234.", ios::dec | ios::showpoint, 10, 4, '*' },
+
+    { "12345", "1.23e+04", ios::dec, 0, 3 },
+
+    { "12345", "12345.", ios::dec | ios::fixed | ios::showpoint },
+
+    { "1.9999999",    "2",     ios::dec, 0, 1 },
+    { "1.0009999999", "1.001", ios::dec, 0, 4 },
+    { "1.0001",       "1",     ios::dec, 0, 4 },
+    { "1.0004",       "1",     ios::dec, 0, 4 },
+    { "1.000555",     "1.001", ios::dec, 0, 4 },
+
+    { "1.0002",       "1.000", ios::dec | ios::fixed, 0, 3 },
+    { "1.0008",       "1.001", ios::dec | ios::fixed, 0, 3 },
+
+    { "0", "0", ios::hex },
+    { "0", "0x0", ios::hex | ios::showbase },
+    { "0", "0X0", ios::hex | ios::showbase | ios::uppercase },
+    { "123",   "7b", ios::hex },
+    { "123", "0x7b", ios::hex | ios::showbase },
+    { "123", "0X7B", ios::hex | ios::showbase | ios::uppercase },
+
+    { "0", "0.000@+00", ios::hex | ios::scientific, 0, 3 },
+    { "256", "1.000@+02", ios::hex | ios::scientific, 0, 3 },
+
+    { "123",   "7.b@+01", ios::hex | ios::scientific, 0, 1 },
+    { "123",   "7.B@+01", ios::hex | ios::scientific | ios::uppercase, 0, 1 },
+    { "123", "0x7.b@+01", ios::hex | ios::scientific | ios::showbase, 0, 1 },
+    { "123", "0X7.B@+01",
+      ios::hex | ios::scientific | ios::showbase | ios::uppercase, 0, 1 },
+
+    { "1099511627776", "1.0@+10", ios::hex | ios::scientific, 0, 1 },
+    { "1099511627776", "1.0@+10",
+      ios::hex | ios::scientific | ios::uppercase, 0, 1 },
+
+    { "0.0625", "1.00@-01", ios::hex | ios::scientific, 0, 2 },
+
+    { "0", "0", ios::oct },
+    { "123",  "173", ios::oct },
+    { "123", "0173", ios::oct | ios::showbase },
+
+    // octal showbase suppressed for 0
+    { "0", "0", ios::oct | ios::showbase },
+    { ".125",    "00.1",  ios::oct | ios::showbase, 0, 1 },
+    { ".015625", "00.01", ios::oct | ios::showbase, 0, 2 },
+    { ".125",    "00.1",  ios::fixed | ios::oct | ios::showbase, 0, 1 },
+    { ".015625", "0.0",   ios::fixed | ios::oct | ios::showbase, 0, 1 },
+    { ".015625", "00.01", ios::fixed | ios::oct | ios::showbase, 0, 2 },
+
+    {  "0.125",  "1.000000e-01", ios::oct | ios::scientific },
+    {  "0.125", "+1.000000e-01", ios::oct | ios::scientific | ios::showpos },
+    { "-0.125", "-1.000000e-01", ios::oct | ios::scientific },
+    { "-0.125", "-1.000000e-01", ios::oct | ios::scientific | ios::showpos },
+
+    { "0", "0.000e+00", ios::oct | ios::scientific, 0, 3 },
+    { "256",  "4.000e+02", ios::oct | ios::scientific, 0, 3 },
+    { "256", "04.000e+02", ios::oct | ios::scientific | ios::showbase, 0, 3 },
+    { "256",  "4.000E+02", ios::oct | ios::scientific | ios::uppercase, 0, 3 },
+    { "256", "04.000E+02",
+      ios::oct | ios::scientific | ios::showbase | ios::uppercase, 0, 3 },
+
+    { "16777216",    "1.000000e+08", ios::oct | ios::scientific },
+    { "16777216",    "1.000000E+08",
+      ios::oct | ios::scientific | ios::uppercase },
+    { "16777216",   "01.000000e+08",
+      ios::oct | ios::scientific | ios::showbase },
+    { "16777216",   "01.000000E+08",
+      ios::oct | ios::scientific | ios::showbase | ios::uppercase },
+    { "16777216",  "+01.000000e+08",
+      ios::oct | ios::scientific | ios::showbase | ios::showpos },
+    { "16777216",  "+01.000000E+08", ios::oct | ios::scientific
+      | ios::showbase | ios::showpos | ios::uppercase },
+    { "-16777216", "-01.000000e+08",
+      ios::oct | ios::scientific | ios::showbase | ios::showpos },
+    { "-16777216", "-01.000000E+08", ios::oct | ios::scientific
+      | ios::showbase | ios::showpos | ios::uppercase },
+
+  };
+
+  size_t  i;
+  mpf_t   f, f2;
+  double  d;
+
+  mpf_init (f);
+  mpf_init (f2);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (f, data[i].f, 0);
+
+      d = mpf_get_d (f);
+      mpf_set_d (f2, d);
+      if (option_check_standard && mpf_cmp (f, f2) == 0
+         && ! (data[i].flags & (ios::hex | ios::oct | ios::showbase)))
+       {
+         ostringstream  got;
+         CALL (got << d);
+         if (got.str().compare (data[i].want) != 0)
+           {
+             cout << "check_mpf data[" << i
+                  << "] doesn't match standard ostream output\n";
+             cout << "  f:     " << data[i].f << "\n";
+             cout << "  d:     " << d << "\n";
+             DUMP ();
+           }
+       }
+
+      {
+       ostringstream  got;
+       CALL (got << f);
+       if (got.str().compare (data[i].want) != 0)
+         {
+           cout << "mpf operator<< wrong, data[" << i << "]\n";
+           cout << "  f:     " << data[i].f << "\n";
+           ABORT ();
+         }
+      }
+    }
+
+  mpf_clear (f);
+  mpf_clear (f2);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_check_standard = 1;
+
+  tests_start ();
+
+  check_mpz ();
+  check_mpq ();
+  check_mpf ();
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/cxx/t-prec.cc b/tests/cxx/t-prec.cc

new file mode 100644 (file)

index 0000000..e9f4e48
--- /dev/null
+++ b/tests/cxx/t-prec.cc
@@ -0,0 +1,217 @@
+/* Test precision of mpf_class expressions.
+
+Copyright 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+const int
+small_prec = 64, medium_prec = 128, large_prec = 192, very_large_prec = 256;
+
+#define ASSERT_ALWAYS_PREC(a, s, prec) \
+{                                      \
+  mpf_srcptr _a = a.get_mpf_t();       \
+  mpf_class _b(s, prec);               \
+  mpf_srcptr _c = _b.get_mpf_t();      \
+  ASSERT_ALWAYS(mpf_eq(_a, _c, prec)); \
+}
+
+
+
+void
+check_mpf (void)
+{
+  mpf_set_default_prec(medium_prec);
+
+  // simple expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(1 / f, very_large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "     33333 33333 33333 33333 33333 333", very_large_prec);
+  }
+  {
+    mpf_class f(9.0, medium_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = 1 / f;
+    ASSERT_ALWAYS_PREC
+      (g, "0.11111 11111 11111 11111 11111 11111 11111 11111 11111 11111"
+       "     11111 11111 11111 11111 11111 111", very_large_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = 1 / f;
+    ASSERT_ALWAYS_PREC
+      (g, "0.06666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 667", very_large_prec);
+  }
+
+  // compound expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(-(-(-1 / f)), very_large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "-0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33333 33333 33333 33333 333", very_large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec);
+    mpf_class h(0.0, very_large_prec);
+    h = 1/f + 1/g;
+    ASSERT_ALWAYS_PREC
+      (h, "0.44444 44444 44444 44444 44444 44444 44444 44444 44444 44444"
+       "     44444 44444 44444 44444 44444 444", very_large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec), h(15.0, large_prec);
+    mpf_class i(0.0, very_large_prec);
+    i = f / g + h;
+    ASSERT_ALWAYS_PREC
+      (i, "15.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33333 33333 33333 33333 3", very_large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(-(1 + f) / 3, very_large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "-1.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33333 33333 33333 33333 33", very_large_prec);
+  }
+  {
+    mpf_class f(9.0, medium_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = sqrt(1 / f);
+    ASSERT_ALWAYS_PREC
+      (g, "0.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "     33333 33333 33333 33333 33333 333", very_large_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = hypot(1 + 5 / f, 1.0);
+    ASSERT_ALWAYS_PREC
+      (g, "1.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 67", very_large_prec);
+  }
+
+  // compound assignments
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec);
+    mpf_class h(1.0, very_large_prec);
+    h -= f / g;
+    ASSERT_ALWAYS_PREC
+      (h, "0.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 667", very_large_prec);
+  }
+
+  // construction from expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(1 / f);
+    ASSERT_ALWAYS_PREC(g, "0.33333 33333 33333 33333", small_prec);
+  }
+  {
+    mpf_class f(9.0, medium_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(1 / f);
+    ASSERT_ALWAYS_PREC
+      (g, "0.11111 11111 11111 11111 11111 11111 11111 1111", medium_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(1 / f);
+    ASSERT_ALWAYS_PREC
+      (g, "0.06666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 6667", large_prec);
+  }
+
+  {
+    mpf_class f(3.0, small_prec), g(9.0, medium_prec);
+    mpf_class h(0.0, very_large_prec);
+    h = mpf_class(f / g + 1, large_prec);
+    ASSERT_ALWAYS_PREC
+      (h, "1.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "     33333 333",
+       large_prec);
+  }
+
+  // mixed mpf/mpq expressions
+  {
+    mpf_class f(3.0, small_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = f - q;
+    ASSERT_ALWAYS_PREC
+      (g, "2.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 66666 66666 66666 66666 67", very_large_prec);
+  }
+
+  {
+    mpf_class f(3.0, small_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(f - q, large_prec);
+    ASSERT_ALWAYS_PREC
+      (g, "2.66666 66666 66666 66666 66666 66666 66666 66666 66666 66666"
+       "     66666 667",
+       large_prec);
+  }
+  {
+    mpf_class f(3.0, small_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(f - q);
+    ASSERT_ALWAYS_PREC
+      (g, "2.66666 66666 66666 66666 66666 66666 66666 667", medium_prec);
+  }
+  {
+    mpf_class f(15.0, large_prec);
+    mpq_class q(1, 3);
+    mpf_class g(0.0, very_large_prec);
+    g = mpf_class(f + q);
+    ASSERT_ALWAYS_PREC
+      (g, "15.33333 33333 33333 33333 33333 33333 33333 33333 33333 33333"
+       "      33333 33",
+       large_prec);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-rand.cc b/tests/cxx/t-rand.cc

new file mode 100644 (file)

index 0000000..506a137
--- /dev/null
+++ b/tests/cxx/t-rand.cc
@@ -0,0 +1,138 @@
+/* Test gmp_randclass.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+/* all flavours of initialization */
+void
+check_randinit (void)
+{
+  {
+    gmp_randclass r(gmp_randinit_default);
+  }
+
+  {
+    mpz_class a(0);
+    unsigned long c = 0, m2exp = 8;
+    gmp_randclass r(gmp_randinit_lc_2exp, a, c, m2exp);
+  }
+
+  {
+    unsigned long m2exp = 64;
+    gmp_randclass r(gmp_randinit_lc_2exp_size, m2exp);
+  }
+
+  /* gmp_randinit_lc_2exp_size, with excessive size */
+  {
+    try {
+      unsigned long m2exp = ULONG_MAX;
+      gmp_randclass r(gmp_randinit_lc_2exp_size, m2exp);
+      ASSERT_ALWAYS (0);  /* should not be reached */
+    } catch (length_error) {
+    }
+  }
+
+  {
+    gmp_randclass r(gmp_randinit_mt);
+  }
+
+  /* obsolete, but still available */
+  {
+    gmp_randalg_t alg = GMP_RAND_ALG_LC;
+    unsigned long m2exp = 64;
+    gmp_randclass r(alg, m2exp);
+  }
+  {
+    gmp_randalg_t alg = GMP_RAND_ALG_DEFAULT;
+    unsigned long m2exp = 64;
+    gmp_randclass r(alg, m2exp);
+  }
+  {
+    gmp_randalg_t alg = (gmp_randalg_t) 0;
+    unsigned long m2exp = 64;
+    gmp_randclass r(alg, m2exp);
+  }
+}
+
+void
+check_mpz (void)
+{
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(123);
+    unsigned int b = 256;
+    mpz_class c;
+    r.seed(a);
+    c = r.get_z_bits(b);
+  }
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(256);
+    unsigned long b = 123;
+    mpz_class c;
+    r.seed(b);
+    c = r.get_z_bits(a);
+  }
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(123), b(256);
+    mpz_class c;
+    r.seed(a);
+    c = r.get_z_range(b);
+  }
+}
+
+void
+check_mpf (void)
+{
+  {
+    gmp_randclass r(gmp_randinit_default);
+    mpz_class a(123);
+    r.seed(a);
+    mpf_class b;
+    b = r.get_f();
+  }
+  {
+    gmp_randclass r(gmp_randinit_default);
+    int a = 123, b = 128;
+    r.seed(a);
+    mpf_class c;
+    c = r.get_f(b);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_randinit();
+  check_mpz();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-ternary.cc b/tests/cxx/t-ternary.cc

new file mode 100644 (file)

index 0000000..39b7347
--- /dev/null
+++ b/tests/cxx/t-ternary.cc
@@ -0,0 +1,735 @@
+/* Test mp*_class ternary expressions.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+/* The various test cases are broken up into separate functions to keep down
+   compiler memory use.  They're static so that any mistakenly omitted from
+   main() will provoke warnings (under gcc -Wall at least).  */
+
+static void
+check_mpz_1 (void)
+{
+  // template<class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+}
+
+static void
+check_mpz_2 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, T, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a + b * c; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3;
+    mpz_class d;
+    d = a - b * c; ASSERT_ALWAYS(d == -5);
+  }
+}
+
+static void
+check_mpz_3 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<T, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a + c * b; ASSERT_ALWAYS(d == 7);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3;
+    mpz_class d;
+    d = a - c * b; ASSERT_ALWAYS(d == -5);
+  }
+}
+
+static void
+check_mpz_4 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpz_t, T>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0;
+    mpz_class e;
+    e = a + b * (c + d); ASSERT_ALWAYS(e == 15);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0;
+    mpz_class e;
+    e = a - b * (c + d); ASSERT_ALWAYS(e == -13);
+  }
+}
+
+static void
+check_mpz_5 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4;
+    mpz_class e;
+    e = a + (b - d) * c; ASSERT_ALWAYS(e == -5);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4;
+    mpz_class e;
+    e = a - (b - d) * c; ASSERT_ALWAYS(e == 7);
+  }
+}
+
+static void
+check_mpz_6 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, U, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4;
+    mpz_class e;
+    e = a + (b + c) * d; ASSERT_ALWAYS(e == 21);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4;
+    mpz_class e;
+    e = a - (b + c) * d; ASSERT_ALWAYS(e == -19);
+  }
+}
+
+static void
+check_mpz_7 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<T, __gmp_expr<mpz_t, U>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = a + c * (b + d); ASSERT_ALWAYS(e == 19);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = a - c * (b + d); ASSERT_ALWAYS(e == -17);
+  }
+}
+
+static void
+check_mpz_8 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>,
+  // Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4, e = 5;
+    mpz_class f;
+    f = a + (b - d) * (c + e); ASSERT_ALWAYS(f == -15);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    signed int d = 4, e = 5;
+    mpz_class f;
+    f = a - (b - d) * (c + e); ASSERT_ALWAYS(f == 17);
+  }
+}
+
+static void
+check_mpz_9 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4;
+    mpz_class e;
+    e = (a + d) + b * c; ASSERT_ALWAYS(e == 11);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4;
+    mpz_class e;
+    e = (a + d) - b * c; ASSERT_ALWAYS(e == -1);
+  }
+}
+
+static void
+check_mpz_10 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,
+  // __gmp_expr<mpz_t, __gmp_binary_expr<mpz_class, U, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = (a - c) + b * d; ASSERT_ALWAYS(e == 6);
+  }
+  {
+    mpz_class a(1), b(2);
+    double c = 3.0, d = 4.0;
+    mpz_class e;
+    e = (a - c) - b * d; ASSERT_ALWAYS(e == -10);
+  }
+}
+
+static void
+check_mpz_11 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>,
+  // __gmp_expr<mpz_t, __gmp_binary_expr<U, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4;
+    mpz_class e;
+    e = (a - c) + d * b; ASSERT_ALWAYS(e == 6);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4;
+    mpz_class e;
+    e = (a - c) - d * b; ASSERT_ALWAYS(e == -10);
+  }
+}
+
+static void
+check_mpz_12 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, __gmp_expr<mpz_t, U>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4, e = 5;
+    mpz_class f;
+    f = (a + d) + b * (c - e); ASSERT_ALWAYS(f == 1);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    unsigned int d = 4, e = 5;
+    mpz_class f;
+    f = (a + d) - b * (c - e); ASSERT_ALWAYS(f == 9);
+  }
+}
+
+static void
+check_mpz_13 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, mpz_class, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0;
+    mpz_class f;
+    f = (a - d) + (b + e) * c; ASSERT_ALWAYS(f == 18);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0;
+    mpz_class f;
+    f = (a - d) - (b + e) * c; ASSERT_ALWAYS(f == -24);
+  }
+
+}
+
+static void
+check_mpz_14 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, V, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a + c) + (b + d) * e; ASSERT_ALWAYS(f == 34);
+  }
+  {
+    mpz_class a(1), b(2);
+    signed int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a + c) - (b + d) * e; ASSERT_ALWAYS(f == -26);
+  }
+}
+
+static void
+check_mpz_15 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<U, __gmp_expr<mpz_t, V>, Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a - c) + d * (b - e); ASSERT_ALWAYS(f == -14);
+  }
+  {
+    mpz_class a(1), b(2);
+    unsigned int c = 3, d = 4, e = 5;
+    mpz_class f;
+    f = (a - c) - d * (b - e); ASSERT_ALWAYS(f == 10);
+  }
+
+}
+
+static void
+check_mpz_16 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, T>, __gmp_expr
+  // <mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, U>, __gmp_expr<mpz_t, V>,
+  // Op1> >, Op2> >
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0, f = 6.0;
+    mpz_class g;
+    g = (a + d) + (b - e) * (c + f); ASSERT_ALWAYS(g == -22);
+  }
+  {
+    mpz_class a(1), b(2), c(3);
+    double d = 4.0, e = 5.0, f = 6.0;
+    mpz_class g;
+    g = (a + d) - (b - e) * (c + f); ASSERT_ALWAYS(g == 32);
+  }
+}
+
+static void
+check_mpz_17 (void)
+{
+  // template <class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, mpz_class, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b + c; ASSERT_ALWAYS(d == 10);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    mpz_class d;
+    d = a * b - c; ASSERT_ALWAYS(d == 2);
+  }
+}
+
+static void
+check_mpz_18 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr
+  // <mpz_t, __gmp_binary_expr<mpz_class, T, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4;
+    mpz_class d;
+    d = a * c + b; ASSERT_ALWAYS(d == 11);
+  }
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4;
+    mpz_class d;
+    d = a * c - b; ASSERT_ALWAYS(d == 5);
+  }
+
+}
+
+static void
+check_mpz_19 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr
+  // <mpz_t, __gmp_binary_expr<T, mpz_class, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4;
+    mpz_class d;
+    d = c * a + b; ASSERT_ALWAYS(d == 11);
+  }
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4;
+    mpz_class d;
+    d = c * a - b; ASSERT_ALWAYS(d == 5);
+  }
+}
+
+static void
+check_mpz_20 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, __gmp_expr<mpz_t, T>, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0;
+    mpz_class e;
+    e = a * (b + d) + c; ASSERT_ALWAYS(e == 20);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0;
+    mpz_class e;
+    e = a * (b + d) - c; ASSERT_ALWAYS(e == 12);
+  }
+}
+
+static void
+check_mpz_21 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, mpz_class, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5;
+    mpz_class e;
+    e = (a - d) * b + c; ASSERT_ALWAYS(e == -5);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5;
+    mpz_class e;
+    e = (a - d) * b - c; ASSERT_ALWAYS(e == -13);
+  }
+}
+
+static void
+check_mpz_22 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, U, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5;
+    mpz_class e;
+    e = (a + c) * d + b; ASSERT_ALWAYS(e == 33);
+  }
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5;
+    mpz_class e;
+    e = (a + c) * d - b; ASSERT_ALWAYS(e == 27);
+  }
+}
+
+static void
+check_mpz_23 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <T, __gmp_expr<mpz_t, U>, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = c * (a + d) + b; ASSERT_ALWAYS(e == 31);
+  }
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = c * (a + d) - b; ASSERT_ALWAYS(e == 25);
+  }
+
+}
+
+static void
+check_mpz_24 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>, Op1> >, mpz_class, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5, e = 6;
+    mpz_class f;
+    f = (a - d) * (b + e) + c; ASSERT_ALWAYS(f == -23);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    signed int d = 5, e = 6;
+    mpz_class f;
+    f = (a - d) * (b + e) - c; ASSERT_ALWAYS(f == -31);
+  }
+}
+
+static void
+check_mpz_25 (void)
+{
+  // template <class T, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, mpz_class, Op1> >, __gmp_expr<mpz_t, T>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5;
+    mpz_class e;
+    e = a * b + (c - d); ASSERT_ALWAYS(e == 5);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5;
+    mpz_class e;
+    e = a * b - (c - d); ASSERT_ALWAYS(e == 7);
+  }
+}
+
+static void
+check_mpz_26 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, T, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = a * c + (b + d); ASSERT_ALWAYS(e == 16);
+  }
+  {
+    mpz_class a(2), b(3);
+    double c = 4.0, d = 5.0;
+    mpz_class e;
+    e = a * c - (b + d); ASSERT_ALWAYS(e == 0);
+  }
+}
+
+static void
+check_mpz_27 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <T, mpz_class, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5;
+    mpz_class e;
+    e = c * a + (b - d); ASSERT_ALWAYS(e == 6);
+  }
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5;
+    mpz_class e;
+    e = c * a - (b - d); ASSERT_ALWAYS(e == 10);
+  }
+}
+
+static void
+check_mpz_28 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <mpz_class, __gmp_expr<mpz_t, T>, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5, e = 6;
+    mpz_class f;
+    f = a * (b - d) + (c + e); ASSERT_ALWAYS(f == 6);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    unsigned int d = 5, e = 6;
+    mpz_class f;
+    f = a * (b - d) - (c + e); ASSERT_ALWAYS(f == -14);
+  }
+}
+
+static void
+check_mpz_29 (void)
+{
+  // template <class T, class U, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, mpz_class, Op1> >, __gmp_expr<mpz_t, U>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0;
+    mpz_class f;
+    f = (a + d) * b + (c - e); ASSERT_ALWAYS(f == 19);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0;
+    mpz_class f;
+    f = (a + d) * b - (c - e); ASSERT_ALWAYS(f == 23);
+  }
+}
+
+static void
+check_mpz_30 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, U, Op1> >, __gmp_expr<mpz_t, V>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = (a + c) * d + (b + e); ASSERT_ALWAYS(f == 39);
+  }
+  {
+    mpz_class a(2), b(3);
+    signed int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = (a + c) * d - (b + e); ASSERT_ALWAYS(f == 21);
+  }
+}
+
+static void
+check_mpz_31 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <T, __gmp_expr<mpz_t, U>, Op1> >, __gmp_expr<mpz_t, V>, Op2> >
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = c * (a + d) + (b - e); ASSERT_ALWAYS(f == 25);
+  }
+  {
+    mpz_class a(2), b(3);
+    unsigned int c = 4, d = 5, e = 6;
+    mpz_class f;
+    f = c * (a + d) - (b - e); ASSERT_ALWAYS(f == 31);
+  }
+}
+
+static void
+check_mpz_32 (void)
+{
+  // template <class T, class U, class V, class Op1, class Op2>
+  // __gmp_expr<mpz_t, __gmp_binary_expr<__gmp_expr<mpz_t, __gmp_binary_expr
+  // <__gmp_expr<mpz_t, T>, __gmp_expr<mpz_t, U>, Op1> >,
+  // __gmp_expr<mpz_t, V>, Op2> >
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0, f = 7.0;
+    mpz_class g;
+    g = (a + d) * (b - e) + (c + f); ASSERT_ALWAYS(g == -10);
+  }
+  {
+    mpz_class a(2), b(3), c(4);
+    double d = 5.0, e = 6.0, f = 7.0;
+    mpz_class g;
+    g = (a + d) * (b - e) - (c + f); ASSERT_ALWAYS(g == -32);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // currently there's no ternary mpq operation
+}
+
+void
+check_mpf (void)
+{
+  // currently there's no ternary mpf operation
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz_1 ();
+  check_mpz_2 ();
+  check_mpz_3 ();
+  check_mpz_4 ();
+  check_mpz_5 ();
+  check_mpz_6 ();
+  check_mpz_7 ();
+  check_mpz_8 ();
+  check_mpz_9 ();
+  check_mpz_10 ();
+  check_mpz_11 ();
+  check_mpz_12 ();
+  check_mpz_13 ();
+  check_mpz_14 ();
+  check_mpz_15 ();
+  check_mpz_16 ();
+  check_mpz_17 ();
+  check_mpz_18 ();
+  check_mpz_19 ();
+  check_mpz_20 ();
+  check_mpz_21 ();
+  check_mpz_22 ();
+  check_mpz_23 ();
+  check_mpz_24 ();
+  check_mpz_25 ();
+  check_mpz_26 ();
+  check_mpz_27 ();
+  check_mpz_28 ();
+  check_mpz_29 ();
+  check_mpz_30 ();
+  check_mpz_31 ();
+  check_mpz_32 ();
+
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-unary.cc b/tests/cxx/t-unary.cc

new file mode 100644 (file)

index 0000000..eda437c
--- /dev/null
+++ b/tests/cxx/t-unary.cc
@@ -0,0 +1,133 @@
+/* Test mp*_class unary expressions.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <iostream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+
+void
+check_mpz (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+  {
+    mpz_class a(1);
+    mpz_class b(+a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpz_class a(2);
+    mpz_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+  {
+    mpz_class a(3);
+    mpz_class b;
+    b = ~a; ASSERT_ALWAYS(b == -4);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+  {
+    mpz_class a(1);
+    mpz_class b(-(-a)); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpz_class a(2);
+    mpz_class b;
+    b = -(-(-a)); ASSERT_ALWAYS(b == -2);
+  }
+}
+
+void
+check_mpq (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+  {
+    mpq_class a(1);
+    mpq_class b(+a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpq_class a(2);
+    mpq_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+  {
+    mpq_class a(1);
+    mpq_class b(-(-a)); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpq_class a(2);
+    mpq_class b;
+    b = -(-(-a)); ASSERT_ALWAYS(b == -2);
+  }
+}
+
+void
+check_mpf (void)
+{
+  // template <class T, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, T>, Op> >
+  {
+    mpf_class a(1);
+    mpf_class b(+a); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = -a; ASSERT_ALWAYS(b == -2);
+  }
+
+  // template <class T, class U, class Op>
+  // __gmp_expr<T, __gmp_unary_expr<__gmp_expr<T, U>, Op> >
+  {
+    mpf_class a(1);
+    mpf_class b(-(-a)); ASSERT_ALWAYS(b == 1);
+  }
+  {
+    mpf_class a(2);
+    mpf_class b;
+    b = -(-(-a)); ASSERT_ALWAYS(b == -2);
+  }
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check_mpz();
+  check_mpq();
+  check_mpf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/devel/Makefile.am b/tests/devel/Makefile.am

new file mode 100644 (file)

index 0000000..5eada53
--- /dev/null
+++ b/tests/devel/Makefile.am
@@ -0,0 +1,34 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+# add_n_sub_n add_n_sub_n_2 not yet built since mpn_add_n_sub_n doesn't yet exist
+#
+EXTRA_PROGRAMS = \
+  aors_n anymul_1 copy divmod_1 divrem shift logops_n tst-addsub try
+
+allprogs: $(EXTRA_PROGRAMS)
+
+CLEANFILES = $(EXTRA_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/devel/Makefile.in b/tests/devel/Makefile.in

new file mode 100644 (file)

index 0000000..23d443e
--- /dev/null
+++ b/tests/devel/Makefile.in
@@ -0,0 +1,609 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+EXTRA_PROGRAMS = aors_n$(EXEEXT) anymul_1$(EXEEXT) copy$(EXEEXT) \
+       divmod_1$(EXEEXT) divrem$(EXEEXT) shift$(EXEEXT) \
+       logops_n$(EXEEXT) tst-addsub$(EXEEXT) try$(EXEEXT)
+subdir = tests/devel
+DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+anymul_1_SOURCES = anymul_1.c
+anymul_1_OBJECTS = anymul_1$U.$(OBJEXT)
+anymul_1_LDADD = $(LDADD)
+anymul_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+aors_n_SOURCES = aors_n.c
+aors_n_OBJECTS = aors_n$U.$(OBJEXT)
+aors_n_LDADD = $(LDADD)
+aors_n_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+copy_SOURCES = copy.c
+copy_OBJECTS = copy$U.$(OBJEXT)
+copy_LDADD = $(LDADD)
+copy_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+divmod_1_SOURCES = divmod_1.c
+divmod_1_OBJECTS = divmod_1$U.$(OBJEXT)
+divmod_1_LDADD = $(LDADD)
+divmod_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+divrem_SOURCES = divrem.c
+divrem_OBJECTS = divrem$U.$(OBJEXT)
+divrem_LDADD = $(LDADD)
+divrem_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+logops_n_SOURCES = logops_n.c
+logops_n_OBJECTS = logops_n$U.$(OBJEXT)
+logops_n_LDADD = $(LDADD)
+logops_n_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+shift_SOURCES = shift.c
+shift_OBJECTS = shift$U.$(OBJEXT)
+shift_LDADD = $(LDADD)
+shift_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+try_SOURCES = try.c
+try_OBJECTS = try$U.$(OBJEXT)
+try_LDADD = $(LDADD)
+try_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+tst_addsub_SOURCES = tst-addsub.c
+tst_addsub_OBJECTS = tst-addsub$U.$(OBJEXT)
+tst_addsub_LDADD = $(LDADD)
+tst_addsub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = anymul_1.c aors_n.c copy.c divmod_1.c divrem.c logops_n.c \
+       shift.c try.c tst-addsub.c
+DIST_SOURCES = anymul_1.c aors_n.c copy.c divmod_1.c divrem.c \
+       logops_n.c shift.c try.c tst-addsub.c
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+CLEANFILES = $(EXTRA_PROGRAMS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/devel/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/devel/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+anymul_1$(EXEEXT): $(anymul_1_OBJECTS) $(anymul_1_DEPENDENCIES) 
+       @rm -f anymul_1$(EXEEXT)
+       $(LINK) $(anymul_1_OBJECTS) $(anymul_1_LDADD) $(LIBS)
+aors_n$(EXEEXT): $(aors_n_OBJECTS) $(aors_n_DEPENDENCIES) 
+       @rm -f aors_n$(EXEEXT)
+       $(LINK) $(aors_n_OBJECTS) $(aors_n_LDADD) $(LIBS)
+copy$(EXEEXT): $(copy_OBJECTS) $(copy_DEPENDENCIES) 
+       @rm -f copy$(EXEEXT)
+       $(LINK) $(copy_OBJECTS) $(copy_LDADD) $(LIBS)
+divmod_1$(EXEEXT): $(divmod_1_OBJECTS) $(divmod_1_DEPENDENCIES) 
+       @rm -f divmod_1$(EXEEXT)
+       $(LINK) $(divmod_1_OBJECTS) $(divmod_1_LDADD) $(LIBS)
+divrem$(EXEEXT): $(divrem_OBJECTS) $(divrem_DEPENDENCIES) 
+       @rm -f divrem$(EXEEXT)
+       $(LINK) $(divrem_OBJECTS) $(divrem_LDADD) $(LIBS)
+logops_n$(EXEEXT): $(logops_n_OBJECTS) $(logops_n_DEPENDENCIES) 
+       @rm -f logops_n$(EXEEXT)
+       $(LINK) $(logops_n_OBJECTS) $(logops_n_LDADD) $(LIBS)
+shift$(EXEEXT): $(shift_OBJECTS) $(shift_DEPENDENCIES) 
+       @rm -f shift$(EXEEXT)
+       $(LINK) $(shift_OBJECTS) $(shift_LDADD) $(LIBS)
+try$(EXEEXT): $(try_OBJECTS) $(try_DEPENDENCIES) 
+       @rm -f try$(EXEEXT)
+       $(LINK) $(try_OBJECTS) $(try_LDADD) $(LIBS)
+tst-addsub$(EXEEXT): $(tst_addsub_OBJECTS) $(tst_addsub_DEPENDENCIES) 
+       @rm -f tst-addsub$(EXEEXT)
+       $(LINK) $(tst_addsub_OBJECTS) $(tst_addsub_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+anymul_1_.c: anymul_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/anymul_1.c; then echo $(srcdir)/anymul_1.c; else echo anymul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+aors_n_.c: aors_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aors_n.c; then echo $(srcdir)/aors_n.c; else echo aors_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+copy_.c: copy.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copy.c; then echo $(srcdir)/copy.c; else echo copy.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divmod_1_.c: divmod_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divmod_1.c; then echo $(srcdir)/divmod_1.c; else echo divmod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_.c: divrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+logops_n_.c: logops_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logops_n.c; then echo $(srcdir)/logops_n.c; else echo logops_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+shift_.c: shift.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/shift.c; then echo $(srcdir)/shift.c; else echo shift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+try_.c: try.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/try.c; then echo $(srcdir)/try.c; else echo try.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tst-addsub_.c: tst-addsub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tst-addsub.c; then echo $(srcdir)/tst-addsub.c; else echo tst-addsub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+anymul_1_.$(OBJEXT) anymul_1_.lo aors_n_.$(OBJEXT) aors_n_.lo \
+copy_.$(OBJEXT) copy_.lo divmod_1_.$(OBJEXT) divmod_1_.lo \
+divrem_.$(OBJEXT) divrem_.lo logops_n_.$(OBJEXT) logops_n_.lo \
+shift_.$(OBJEXT) shift_.lo try_.$(OBJEXT) try_.lo \
+tst-addsub_.$(OBJEXT) tst-addsub_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool ctags distclean distclean-compile \
+       distclean-generic distclean-libtool distclean-tags distdir dvi \
+       dvi-am html html-am info info-am install install-am \
+       install-data install-data-am install-dvi install-dvi-am \
+       install-exec install-exec-am install-html install-html-am \
+       install-info install-info-am install-man install-pdf \
+       install-pdf-am install-ps install-ps-am install-strip \
+       installcheck installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/devel/README b/tests/devel/README

new file mode 100644 (file)

index 0000000..d224f1a
--- /dev/null
+++ b/tests/devel/README
@@ -0,0 +1,37 @@
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+                       DEVELOPMENT TEST PROGRAMS
+
+
+This directory contains various programs used during development.  Casual
+GMP users are unlikely to find anything of interest.
+
+Nothing here is built or installed, nor even run in a "make check", but
+there's Makefile rules to build each program, or "allprogs" to build
+everything.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/tests/devel/anymul_1.c b/tests/devel/anymul_1.c

new file mode 100644 (file)

index 0000000..27a1a74
--- /dev/null
+++ b/tests/devel/anymul_1.c
@@ -0,0 +1,251 @@
+/*
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2006, 2007, 2008
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+#ifdef OPERATION_mul_1
+#define func __gmpn_mul_1
+#define reffunc refmpn_mul_1
+#define funcname "mpn_mul_1"
+#endif
+
+#ifdef OPERATION_addmul_1
+#define func __gmpn_addmul_1
+#define reffunc refmpn_addmul_1
+#define funcname "mpn_addmul_1"
+#endif
+
+#ifdef OPERATION_submul_1
+#define func __gmpn_submul_1
+#define reffunc refmpn_submul_1
+#define funcname "mpn_submul_1"
+#endif
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+static void print_posneg (mp_limb_t);
+static void mpn_print (mp_ptr, mp_size_t);
+
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS (CLOCK/5)
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/(SIZE+1)
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr s1, ref, rp;
+  mp_limb_t cy_ref, cy_try;
+  int i;
+  long t0, t;
+  unsigned int test;
+  mp_limb_t xlimb;
+  mp_size_t size;
+  double cyc;
+  unsigned int ntests;
+
+  s1 = malloc (SIZE * sizeof (mp_limb_t));
+  ref = malloc (SIZE * sizeof (mp_limb_t));
+  rp = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+  rp++;
+
+  ntests = ~(unsigned) 0;
+  if (argc == 2)
+    ntests = strtol (argv[1], 0, 0);
+
+  for (test = 1; test <= ntests; test++)
+    {
+#if TIMES == 1 && ! defined (PRINT)
+      if (test % (1 + 0x80000 / (SIZE + 20)) == 0)
+       {
+         printf ("\r%u", test);
+         fflush (stdout);
+       }
+#endif
+
+#ifdef RANDOM
+      size = random () % SIZE + 1;
+#else
+      size = SIZE;
+#endif
+
+      rp[-1] = 0x87654321;
+      rp[size] = 0x12345678;
+
+#ifdef FIXED_XLIMB
+      xlimb = FIXED_XLIMB;
+#else
+      mpn_random2 (&xlimb, 1);
+#endif
+
+#if TIMES != 1
+      mpn_random (s1, size);
+      mpn_random (rp, size);
+
+      MPN_COPY (ref, rp, size);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       func (ref, s1, size, xlimb);
+      t = cputime() - t0;
+      cyc = ((double) t * CLOCK) / (TIMES * size * 1000.0);
+      printf (funcname ":    %5ldms (%.3f cycles/limb) [%.2f Gb/s]\n",
+             t, cyc,
+             CLOCK/cyc*GMP_LIMB_BITS*GMP_LIMB_BITS/1e9);
+#endif
+
+#ifndef NOCHECK
+      mpn_random2 (s1, size);
+#ifdef ZERO
+      memset (rp, 0, size * sizeof *rp);
+#else
+      mpn_random2 (rp, size);
+#endif
+#if defined (PRINT) || defined (XPRINT)
+      printf ("xlimb=");
+      mpn_print (&xlimb, 1);
+#endif
+#ifdef PRINT
+#ifndef OPERATION_mul_1
+      printf ("%*s ", (int) (2 * sizeof(mp_limb_t)), "");
+      mpn_print (rp, size);
+#endif
+      printf ("%*s ", (int) (2 * sizeof(mp_limb_t)), "");
+      mpn_print (s1, size);
+#endif
+
+      MPN_COPY (ref, rp, size);
+      cy_ref = reffunc (ref, s1, size, xlimb);
+      cy_try = func (rp, s1, size, xlimb);
+
+#ifdef PRINT
+      mpn_print (&cy_ref, 1);
+      mpn_print (ref, size);
+      mpn_print (&cy_try, 1);
+      mpn_print (rp, size);
+#endif
+
+      if (cy_ref != cy_try || mpn_cmp (ref, rp, size) != 0
+         || rp[-1] != 0x87654321 || rp[size] != 0x12345678)
+       {
+         printf ("\n        ref%*s try%*s diff\n", LXW - 3, "", 2 * LXW - 6, "");
+         for (i = 0; i < size; i++)
+           {
+             printf ("%6d: ", i);
+             printf ("%0*llX ", LXW, (unsigned long long) ref[i]);
+             printf ("%0*llX ", LXW, (unsigned long long) rp[i]);
+             print_posneg (rp[i] - ref[i]);
+             printf ("\n");
+           }
+         printf ("retval: ");
+         printf ("%0*llX ", LXW, (unsigned long long) cy_ref);
+         printf ("%0*llX ", LXW, (unsigned long long) cy_try);
+         print_posneg (cy_try - cy_ref);
+         printf ("\n");
+         if (rp[-1] != 0x87654321)
+           printf ("clobbered at low end\n");
+         if (rp[size] != 0x12345678)
+           printf ("clobbered at high end\n");
+         printf ("TEST NUMBER %u\n", test);
+         abort();
+       }
+#endif
+    }
+  exit (0);
+}
+
+static void
+print_posneg (mp_limb_t d)
+{
+  char buf[LXW + 2];
+  if (d == 0)
+    printf (" %*X", LXW, 0);
+  else if (-d < d)
+    {
+      sprintf (buf, "%llX", (unsigned long long) -d);
+      printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+    }
+  else
+    {
+      sprintf (buf, "%llX", (unsigned long long) d);
+      printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+    }
+}
+
+static void
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+#ifdef _LONG_LONG_LIMB
+      printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
+             (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
+              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+#else
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#endif
+#ifdef SPACE
+      if (i != 0)
+       printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/tests/devel/aors_n.c b/tests/devel/aors_n.c

new file mode 100644 (file)

index 0000000..99069f2
--- /dev/null
+++ b/tests/devel/aors_n.c
@@ -0,0 +1,228 @@
+/*
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifdef OPERATION_add_n
+#define func __gmpn_add_n
+#define reffunc refmpn_add_n
+#define funcname "mpn_add_n"
+#endif
+
+#ifdef OPERATION_sub_n
+#define func __gmpn_sub_n
+#define reffunc refmpn_sub_n
+#define funcname "mpn_sub_n"
+#endif
+
+#ifdef OPERATION_addlsh1_n
+#define func __gmpn_addlsh1_n
+#define reffunc refmpn_addlsh1_n
+#define funcname "mpn_addlsh1_n"
+#endif
+
+#ifdef OPERATION_sublsh1_n
+#define func __gmpn_sublsh1_n
+#define reffunc refmpn_sublsh1_n
+#define funcname "mpn_sublsh1_n"
+#endif
+
+#ifdef OPERATION_rsh1add_n
+#define func __gmpn_rsh1add_n
+#define reffunc refmpn_rsh1add_n
+#define funcname "mpn_rsh1add_n"
+#endif
+
+#ifdef OPERATION_rsh1sub_n
+#define func __gmpn_rsh1sub_n
+#define reffunc refmpn_rsh1sub_n
+#define funcname "mpn_rsh1sub_n"
+#endif
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+static void mpn_print (mp_ptr, mp_size_t);
+
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS (CLOCK/5)
+#endif
+#ifndef SIZE
+#define SIZE 328
+#endif
+#ifndef TIMES
+#define TIMES OPS/(SIZE+1)
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr s1, s2, dx, dy;
+  mp_limb_t cyx, cyy;
+  int i;
+#if TIMES != 1
+  long t0, t;
+#endif
+  unsigned int test;
+  mp_size_t size;
+  unsigned int ntests;
+
+  s1 = malloc (SIZE * sizeof (mp_limb_t));
+  s2 = malloc (SIZE * sizeof (mp_limb_t));
+  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+
+  ntests = ~(unsigned) 0;
+  if (argc == 2)
+    ntests = strtol (argv[1], 0, 0);
+
+  for (test = 1; test <= ntests; test++)
+    {
+#if TIMES == 1 && ! defined (PRINT)
+      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)
+       {
+         printf ("\r%u", test);
+         fflush (stdout);
+       }
+#endif
+
+#ifdef RANDOM
+      size = random () % SIZE + 1;
+#else
+      size = SIZE;
+#endif
+
+      dx[0] = 0x87654321;
+      dy[0] = 0x87654321;
+      dx[size+1] = 0x12345678;
+      dy[size+1] = 0x12345678;
+
+#if TIMES != 1
+      mpn_random (s1, size);
+      mpn_random (s2, size);
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       func (dx+1, s1, s2, size);
+      t = cputime() - t0;
+      printf (funcname ":    %5ldms (%.3f cycles/limb)\n",
+             t, ((double) t * CLOCK) / (TIMES * size * 1000.0));
+#endif
+
+#ifndef NOCHECK
+      mpn_random2 (s1, size);
+      mpn_random2 (s2, size);
+
+#ifdef PRINT
+      mpn_print (s1, size);
+      mpn_print (s2, size);
+#endif
+
+      /* Put garbage in the destination.  */
+      for (i = 0; i < size; i++)
+       {
+         dx[i+1] = 0xdead;
+         dy[i+1] = 0xbeef;
+       }
+
+      cyx = reffunc (dx+1, s1, s2, size);
+      cyy = func (dy+1, s1, s2, size);
+
+#ifdef PRINT
+      mpn_print (&cyx, 1);
+      mpn_print (dx+1, size);
+      mpn_print (&cyy, 1);
+      mpn_print (dy+1, size);
+#endif
+
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+         || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
+       {
+#ifndef PRINT
+         mpn_print (&cyx, 1);
+         mpn_print (dx+1, size);
+         mpn_print (&cyy, 1);
+         mpn_print (dy+1, size);
+#endif
+         printf ("\n");
+         if (dy[0] != 0x87654321)
+           printf ("clobbered at low end\n");
+         if (dy[size+1] != 0x12345678)
+           printf ("clobbered at high end\n");
+         printf ("TEST NUMBER %u\n", test);
+         abort();
+       }
+#endif
+    }
+  exit (0);
+}
+
+static void
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+#ifdef _LONG_LONG_LIMB
+      printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
+             (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
+              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+#else
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#endif
+#ifdef SPACE
+      if (i != 0)
+       printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/tests/devel/copy.c b/tests/devel/copy.c

new file mode 100644 (file)

index 0000000..d8cbbd9
--- /dev/null
+++ b/tests/devel/copy.c
@@ -0,0 +1,192 @@
+/*
+Copyright 1999, 2000, 2001, 2004, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifdef OPERATION_copyi
+#define func MPN_COPY_INCR
+#define reffunc refmpn_copyi
+#define funcname "MPN_COPY_INCR"
+#endif
+
+#ifdef OPERATION_copyd
+#define func MPN_COPY_DECR
+#define reffunc refmpn_copyd
+#define funcname "MPN_COPY_DECR"
+#endif
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+static void mpn_print (mp_ptr, mp_size_t);
+
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS (CLOCK/2)
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/(SIZE+1)
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr s1, dx, dy;
+  int i;
+  long t0, t;
+  unsigned int test;
+  mp_size_t size;
+  unsigned int ntests;
+
+  s1 = malloc (SIZE * sizeof (mp_limb_t));
+  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+
+  ntests = ~(unsigned) 0;
+  if (argc == 2)
+    ntests = strtol (argv[1], 0, 0);
+
+  for (test = 1; test <= ntests; test++)
+    {
+#if TIMES == 1 && ! defined (PRINT)
+      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)
+       {
+         printf ("\r%u", test);
+         fflush (stdout);
+       }
+#endif
+
+#ifdef RANDOM
+      size = random () % SIZE + 1;
+#else
+      size = SIZE;
+#endif
+
+      dx[0] = 0x87654321;
+      dy[0] = 0x87654321;
+      dx[size+1] = 0x12345678;
+      dy[size+1] = 0x12345678;
+
+#if TIMES != 1
+      mpn_random (s1, size);
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       func (dx+1, s1, size);
+      t = cputime() - t0;
+      printf (funcname ":    %5ldms (%.3f cycles/limb)\n",
+             t, ((double) t * CLOCK) / (TIMES * size * 1000.0));
+#endif
+
+#ifndef NOCHECK
+      mpn_random2 (s1, size);
+
+#ifdef PRINT
+      mpn_print (s1, size);
+#endif
+
+      /* Put garbage in the destination.  */
+      for (i = 0; i < size; i++)
+       {
+         dx[i+1] = 0xdead;
+         dy[i+1] = 0xbeef;
+       }
+
+      reffunc (dx+1, s1, size);
+      func (dy+1, s1, size);
+
+#ifdef PRINT
+      mpn_print (dx+1, size);
+      mpn_print (dy+1, size);
+#endif
+
+      if (mpn_cmp (dx, dy, size+2) != 0
+         || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
+       {
+#ifndef PRINT
+         mpn_print (dx+1, size);
+         mpn_print (dy+1, size);
+#endif
+         printf ("\n");
+         if (dy[0] != 0x87654321)
+           printf ("clobbered at low end\n");
+         if (dy[size+1] != 0x12345678)
+           printf ("clobbered at high end\n");
+         printf ("TEST NUMBER %u\n", test);
+         abort();
+       }
+#endif
+    }
+  exit (0);
+}
+
+static void
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+#ifdef _LONG_LONG_LIMB
+      printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
+             (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
+              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+#else
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#endif
+#ifdef SPACE
+      if (i != 0)
+       printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/tests/devel/divmod_1.c b/tests/devel/divmod_1.c

new file mode 100644 (file)

index 0000000..8487775
--- /dev/null
+++ b/tests/devel/divmod_1.c
@@ -0,0 +1,200 @@
+/*
+Copyright 1996, 1998, 2000, 2001, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+static void mpn_print (mp_ptr, mp_size_t);
+
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 1000
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#endif
+
+#ifndef FSIZE
+#define FSIZE SIZE
+#endif
+
+int
+main ()
+{
+  mp_limb_t np[SIZE];
+  mp_limb_t dx[SIZE + FSIZE + 2];
+  mp_limb_t dy[SIZE + FSIZE + 2];
+  mp_limb_t dlimb;
+  mp_size_t nn, fn;
+  mp_limb_t retx, rety;
+  int test;
+#if TIMES != 1
+  int i;
+  long t0, t;
+  double cyc;
+#endif
+
+  for (test = 0; ; test++)
+    {
+#if TIMES == 1 && ! defined (PRINT)
+      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)
+       {
+         printf ("\r%u", test);
+         fflush (stdout);
+       }
+#endif
+
+#ifdef RANDOM
+      nn = random () % (SIZE + 1);
+      fn = random () % (FSIZE + 1);
+#else
+      nn = SIZE;
+      fn = FSIZE;
+#endif
+
+      dx[0] = 0x87654321;
+      dx[nn + fn + 1] = 0x12345678;
+      dy[0] = 0x87654321;
+      dy[nn + fn + 1] = 0x12345678;
+      mpn_random2 (np, nn);
+
+#ifdef FIXED_DLIMB
+      dlimb = FIXED_DLIMB;
+#else
+      do
+       {
+         mpn_random2 (&dlimb, 1);
+#ifdef FORCE_NORM
+         dlimb |= GMP_NUMB_HIGHBIT;
+#endif
+#ifdef FORCE_UNNORM
+         dlimb &= GMP_NUMB_MAX >> 1;
+#endif
+       }
+      while (dlimb == 0);
+#endif
+
+#if defined (PRINT) || defined (XPRINT)
+      printf ("N=");
+      mpn_print (np, nn);
+      printf ("D=");
+      mpn_print (&dlimb, 1);
+      printf ("nn=%ld\n", (long) nn);
+#endif
+
+#if TIMES != 1
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       mpn_divrem_1 (dx + 1, 0L, np, nn, dlimb);
+      t = cputime() - t0;
+      cyc = ((double) t * CLOCK) / (TIMES * nn * 1000.0);
+      printf ("mpn_divrem_1 int:    %5ldms (%.3f cycles/limb) [%.2f Gb/s]\n",
+             t, cyc,
+             CLOCK/cyc*GMP_LIMB_BITS*GMP_LIMB_BITS/1e9);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       mpn_divrem_1 (dx + 1, fn, np, 0, dlimb);
+      t = cputime() - t0;
+      cyc = ((double) t * CLOCK) / (TIMES * fn * 1000.0);
+      printf ("mpn_divrem_1 frac:   %5ldms (%.3f cycles/limb) [%.2f Gb/s]\n",
+             t, cyc,
+             CLOCK/cyc*GMP_LIMB_BITS*GMP_LIMB_BITS/1e9);
+#endif
+
+      retx = refmpn_divrem_1 (dx + 1, fn, np, nn, dlimb);
+      rety = mpn_divrem_1 (dy + 1, fn, np, nn, dlimb);
+
+#ifndef NOCHECK
+      if (retx != rety || mpn_cmp (dx, dy, fn + nn + 2) != 0)
+       {
+         printf ("ERROR in test %d, nn=%ld, fn=%ld\n", test, nn, fn);
+         mpn_print (np, nn);
+         mpn_print (&dlimb, 1);
+         printf ("rq: ");
+         mpn_print (dx + 1, nn + fn);
+         printf ("rr: %*lX\n", (int) (2 * sizeof(mp_limb_t)), retx);
+         printf (" q: ");
+         mpn_print (dy + 1, nn + fn);
+         printf (" r: %*lX\n", (int) (2 * sizeof(mp_limb_t)), rety);
+         if (dy[0] != 0x87654321)
+           printf ("clobbered at low end %*lX\n", (int) (2 * sizeof(mp_limb_t)), dy[0]);
+         if (dy[nn + fn + 1] != 0x12345678)
+           printf ("clobbered at high end\n");
+         abort ();
+       }
+#endif
+    }
+}
+
+static void
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+#ifdef _LONG_LONG_LIMB
+      printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
+             (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
+              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+#else
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#endif
+#ifdef SPACE
+      if (i != 0)
+       printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/tests/devel/divrem.c b/tests/devel/divrem.c

new file mode 100644 (file)

index 0000000..8d76227
--- /dev/null
+++ b/tests/devel/divrem.c
@@ -0,0 +1,120 @@
+/*
+Copyright 1996, 1997, 1998, 2000, 2001, 2007, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 100
+#endif
+#ifndef TIMES
+#define TIMES OPS/(SIZE+1)
+#endif
+
+int
+main ()
+{
+  mp_limb_t nptr[2 * SIZE];
+  mp_limb_t dptr[2 * SIZE];
+  mp_limb_t qptr[2 * SIZE];
+  mp_limb_t pptr[2 * SIZE + 1];
+  mp_limb_t rptr[2 * SIZE];
+  mp_size_t nsize, dsize, qsize, rsize, psize;
+  int test;
+  mp_limb_t qlimb;
+
+  for (test = 0; ; test++)
+    {
+      printf ("%d\n", test);
+#ifdef RANDOM
+      nsize = random () % (2 * SIZE) + 1;
+      dsize = random () % nsize + 1;
+#else
+      nsize = 2 * SIZE;
+      dsize = SIZE;
+#endif
+
+      mpn_random2 (nptr, nsize);
+      mpn_random2 (dptr, dsize);
+      dptr[dsize - 1] |= (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+
+      MPN_COPY (rptr, nptr, nsize);
+      qlimb = mpn_divrem (qptr, (mp_size_t) 0, rptr, nsize, dptr, dsize);
+      rsize = dsize;
+      qsize = nsize - dsize;
+      qptr[qsize] = qlimb;
+      qsize += qlimb;
+      if (qsize == 0 || qsize > 2 * SIZE)
+       {
+         continue;             /* bogus */
+       }
+      else
+       {
+         mp_limb_t cy;
+         if (qsize > dsize)
+           mpn_mul (pptr, qptr, qsize, dptr, dsize);
+         else
+           mpn_mul (pptr, dptr, dsize, qptr, qsize);
+         psize = qsize + dsize;
+         psize -= pptr[psize - 1] == 0;
+         cy = mpn_add (pptr, pptr, psize, rptr, rsize);
+         pptr[psize] = cy;
+         psize += cy;
+       }
+
+      if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0)
+       abort ();
+    }
+}
diff --git a/tests/devel/logops_n.c b/tests/devel/logops_n.c

new file mode 100644 (file)

index 0000000..db0be2c
--- /dev/null
+++ b/tests/devel/logops_n.c
@@ -0,0 +1,231 @@
+/*
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifdef OPERATION_and_n
+#define func __gmpn_and_n
+#define reffunc refmpn_and_n
+#define funcname "mpn_and_n"
+#endif
+
+#ifdef OPERATION_andn_n
+#define func __gmpn_andn_n
+#define reffunc refmpn_andn_n
+#define funcname "mpn_andn_n"
+#endif
+
+#ifdef OPERATION_nand_n
+#define func __gmpn_nand_n
+#define reffunc refmpn_nand_n
+#define funcname "mpn_nand_n"
+#endif
+
+#ifdef OPERATION_ior_n
+#define func __gmpn_ior_n
+#define reffunc refmpn_ior_n
+#define funcname "mpn_ior_n"
+#endif
+
+#ifdef OPERATION_iorn_n
+#define func __gmpn_iorn_n
+#define reffunc refmpn_iorn_n
+#define funcname "mpn_iorn_n"
+#endif
+
+#ifdef OPERATION_nior_n
+#define func __gmpn_nior_n
+#define reffunc refmpn_nior_n
+#define funcname "mpn_nior_n"
+#endif
+
+#ifdef OPERATION_xor_n
+#define func __gmpn_xor_n
+#define reffunc refmpn_xor_n
+#define funcname "mpn_xor_n"
+#endif
+
+#ifdef OPERATION_xnor_n
+#define func __gmpn_xnor_n
+#define reffunc refmpn_xnor_n
+#define funcname "mpn_xnor_n"
+#endif
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+static void mpn_print (mp_ptr, mp_size_t);
+
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS (CLOCK/5)
+#endif
+#ifndef SIZE
+#define SIZE 328
+#endif
+#ifndef TIMES
+#define TIMES OPS/(SIZE+1)
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr s1, s2, dx, dy;
+  int i;
+  long t0, t;
+  unsigned int test;
+  mp_size_t size;
+  unsigned int ntests;
+
+  s1 = malloc (SIZE * sizeof (mp_limb_t));
+  s2 = malloc (SIZE * sizeof (mp_limb_t));
+  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+
+  ntests = ~(unsigned) 0;
+  if (argc == 2)
+    ntests = strtol (argv[1], 0, 0);
+
+  for (test = 1; test <= ntests; test++)
+    {
+#if TIMES == 1 && ! defined (PRINT)
+      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)
+       {
+         printf ("\r%d", test);
+         fflush (stdout);
+       }
+#endif
+
+#ifdef RANDOM
+      size = random () % SIZE + 1;
+#else
+      size = SIZE;
+#endif
+
+      dx[0] = 0x87654321;
+      dy[0] = 0x87654321;
+      dx[size+1] = 0x12345678;
+      dy[size+1] = 0x12345678;
+
+#if TIMES != 1
+      mpn_random (s1, size);
+      mpn_random (s2, size);
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       func (dx+1, s1, s2, size);
+      t = cputime() - t0;
+      printf (funcname ":    %5ldms (%.3f cycles/limb)\n",
+             t, ((double) t * CLOCK) / (TIMES * size * 1000.0));
+#endif
+
+#ifndef NOCHECK
+      mpn_random2 (s1, size);
+      mpn_random2 (s2, size);
+
+#ifdef PRINT
+      mpn_print (s1, size);
+      mpn_print (s2, size);
+#endif
+
+      /* Put garbage in the destination.  */
+      for (i = 0; i < size; i++)
+       {
+         dx[i+1] = 0xdead;
+         dy[i+1] = 0xbeef;
+       }
+
+      reffunc (dx+1, s1, s2, size);
+      func (dy+1, s1, s2, size);
+#ifdef PRINT
+      mpn_print (dx+1, size);
+      mpn_print (dy+1, size);
+#endif
+      if (mpn_cmp (dx, dy, size+2) != 0
+         || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
+       {
+#ifndef PRINT
+         mpn_print (dx+1, size);
+         mpn_print (dy+1, size);
+#endif
+         printf ("\n");
+         if (dy[0] != 0x87654321)
+           printf ("clobbered at low end\n");
+         if (dy[size+1] != 0x12345678)
+           printf ("clobbered at high end\n");
+         printf ("TEST NUMBER %u\n", test);
+         abort();
+       }
+#endif
+    }
+  exit (0);
+}
+
+static void
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+#ifdef _LONG_LONG_LIMB
+      printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
+             (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
+              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+#else
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#endif
+#ifdef SPACE
+      if (i != 0)
+       printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/tests/devel/shift.c b/tests/devel/shift.c

new file mode 100644 (file)

index 0000000..089edd3
--- /dev/null
+++ b/tests/devel/shift.c
@@ -0,0 +1,208 @@
+/*
+Copyright 1996, 1998, 1999, 2000, 2001, 2004, 2007, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifdef OPERATION_lshift
+#define func __gmpn_lshift
+#define reffunc refmpn_lshift
+#define funcname "mpn_lshift"
+#endif
+
+#ifdef OPERATION_rshift
+#define func __gmpn_rshift
+#define reffunc refmpn_rshift
+#define funcname "mpn_rshift"
+#endif
+
+#if defined (USG) || defined (__SVR4) || defined (_UNICOS) || defined (__hpux)
+#include <time.h>
+
+int
+cputime ()
+{
+  if (CLOCKS_PER_SEC < 100000)
+    return clock () * 1000 / CLOCKS_PER_SEC;
+  return clock () / (CLOCKS_PER_SEC / 1000);
+}
+#else
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int
+cputime ()
+{
+  struct rusage rus;
+
+  getrusage (0, &rus);
+  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#endif
+
+static void mpn_print (mp_ptr, mp_size_t);
+
+#define M * 1000000
+
+#ifndef CLOCK
+#error "Don't know CLOCK of your machine"
+#endif
+
+#ifndef OPS
+#define OPS (CLOCK/5)
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/(SIZE+1)
+#endif
+
+#ifndef CNT
+int CNT = 4;
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr s1, dx, dy;
+  mp_limb_t cyx, cyy;
+  int i;
+  long t0, t;
+  unsigned int test;
+  int cnt = CNT;
+  mp_size_t size;
+  unsigned int ntests;
+
+  s1 = malloc (SIZE * sizeof (mp_limb_t));
+  dx = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+  dy = malloc ((SIZE + 2) * sizeof (mp_limb_t));
+
+  ntests = ~(unsigned) 0;
+  if (argc == 2)
+    ntests = strtol (argv[1], 0, 0);
+
+  for (test = 1; test <= ntests; test++)
+    {
+#if TIMES == 1 && ! defined (PRINT)
+      if (test % (SIZE > 100000 ? 1 : 100000 / SIZE) == 0)
+       {
+         printf ("\r%u", test);
+         fflush (stdout);
+       }
+#endif
+
+#if TIMES == 1
+      cnt = random () % (GMP_NUMB_BITS - 1) + 1;
+#endif
+
+#ifdef RANDOM
+      size = random () % SIZE + 1;
+#else
+      size = SIZE;
+#endif
+
+      dx[0] = 0x87654321;
+      dy[0] = 0x87654321;
+      dx[size+1] = 0x12345678;
+      dy[size+1] = 0x12345678;
+
+#if TIMES != 1
+      mpn_random (s1, size);
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+       func (dx+1, s1, size, cnt);
+      t = cputime() - t0;
+      printf (funcname ":    %5ldms (%.3f cycles/limb)\n",
+             t, ((double) t * CLOCK) / (TIMES * size * 1000.0));
+#endif
+
+#ifndef NOCHECK
+      mpn_random (s1, size);
+
+#ifdef PRINT
+      printf ("cnt=%-*d ", (int) (2 * sizeof(mp_limb_t)) - 4, cnt);
+      mpn_print (s1, size);
+#endif
+
+      /* Put garbage in the destination.  */
+      for (i = 0; i < size; i++)
+       {
+         dx[i+1] = 0xdead;
+         dy[i+1] = 0xbeef;
+       }
+
+      cyx = reffunc (dx+1, s1, size, cnt);
+      cyy = func (dy+1, s1, size, cnt);
+
+#ifdef PRINT
+      mpn_print (&cyx, 1);
+      mpn_print (dx+1, size);
+      mpn_print (&cyy, 1);
+      mpn_print (dy+1, size);
+#endif
+
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+         || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
+       {
+#ifndef PRINT
+         mpn_print (&cyx, 1);
+         mpn_print (dx+1, size);
+         mpn_print (&cyy, 1);
+         mpn_print (dy+1, size);
+#endif
+         printf ("\n");
+         if (dy[0] != 0x87654321)
+           printf ("clobbered at low end\n");
+         if (dy[size+1] != 0x12345678)
+           printf ("clobbered at high end\n");
+         printf ("TEST NUMBER %u\n", test);
+         abort();
+       }
+#endif
+    }
+  exit (0);
+}
+
+static void
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+#ifdef _LONG_LONG_LIMB
+      printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
+             (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
+             (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+#else
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#endif
+#ifdef SPACE
+      if (i != 0)
+       printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/tests/devel/try.c b/tests/devel/try.c

new file mode 100644 (file)

index 0000000..f8d1b0d
--- /dev/null
+++ b/tests/devel/try.c
@@ -0,0 +1,3122 @@
+/* Run some tests on various mpn routines.
+
+   THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT.  IT'S ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
+
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage: try [options] <function>...
+
+   For example, "./try mpn_add_n" to run tests of that function.
+
+   Combinations of alignments and overlaps are tested, with redzones above
+   or below the destinations, and with the sources write-protected.
+
+   The number of tests performed becomes ridiculously large with all the
+   combinations, and for that reason this can't be a part of a "make check",
+   it's meant only for development.  The code isn't very pretty either.
+
+   During development it can help to disable the redzones, since seeing the
+   rest of the destination written can show where the wrong part is, or if
+   the dst pointers are off by 1 or whatever.  The magic DEADVAL initial
+   fill (see below) will show locations never written.
+
+   The -s option can be used to test only certain size operands, which is
+   useful if some new code doesn't yet support say sizes less than the
+   unrolling, or whatever.
+
+   When a problem occurs it'll of course be necessary to run the program
+   under gdb to find out quite where, how and why it's going wrong.  Disable
+   the spinner with the -W option when doing this, or single stepping won't
+   work.  Using the "-1" option to run with simple data can be useful.
+
+   New functions to test can be added in try_array[].  If a new TYPE is
+   required then add it to the existing constants, set up its parameters in
+   param_init(), and add it to the call() function.  Extra parameter fields
+   can be added if necessary, or further interpretations given to existing
+   fields.
+
+
+   Portability:
+
+   This program is not designed for use on Cray vector systems under Unicos,
+   it will fail to compile due to missing _SC_PAGE_SIZE.  Those systems
+   don't really have pages or mprotect.  We could arrange to run the tests
+   without the redzones, but we haven't bothered currently.
+
+
+   Enhancements:
+
+   umul_ppmm support is not very good, lots of source data is generated
+   whereas only two limbs are needed.
+
+   Make a little scheme for interpreting the "SIZE" selections uniformly.
+
+   Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
+   source limbs.  Possibly increase the default repetitions in that case.
+
+   Automatically detect gdb and disable the spinner (use -W for now).
+
+   Make a way to re-run a failing case in the debugger.  Have an option to
+   snapshot each test case before it's run so the data is available if a
+   segv occurs.  (This should be more reliable than the current print_all()
+   in the signal handler.)
+
+   When alignment means a dst isn't hard against the redzone, check the
+   space in between remains unchanged.
+
+   When a source overlaps a destination, don't run both s[i].high 0 and 1,
+   as s[i].high has no effect.  Maybe encode s[i].high into overlap->s[i].
+
+   When partial overlaps aren't done, don't loop over source alignments
+   during overlaps.
+
+   Try to make the looping code a bit less horrible.  Right now it's pretty
+   hard to see what iterations are actually done.
+
+   Perhaps specific setups and loops for each style of function under test
+   would be clearer than a parameterized general loop.  There's lots of
+   stuff common to all functions, but the exceptions get messy.
+
+   When there's no overlap, run with both src>dst and src<dst.  A subtle
+   calling-conventions violation occurred in a P6 copy which depended on the
+   relative location of src and dst.
+
+   multiplier_N is more or less a third source region for the addmul_N
+   routines, and could be done with the redzoned region scheme.
+
+*/
+
+
+/* always do assertion checking */
+#define WANT_ASSERT 1
+
+#include "config.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#if ! HAVE_DECL_SYS_NERR
+extern int sys_nerr;
+#endif
+
+#if ! HAVE_DECL_SYS_ERRLIST
+extern char *sys_errlist[];
+#endif
+
+#if ! HAVE_STRERROR
+char *
+strerror (int n)
+{
+  if (n < 0 || n >= sys_nerr)
+    return "errno out of range";
+  else
+    return sys_errlist[n];
+}
+#endif
+
+/* Rumour has it some systems lack a define of PROT_NONE. */
+#ifndef PROT_NONE
+#define PROT_NONE   0
+#endif
+
+/* Dummy defines for when mprotect doesn't exist. */
+#ifndef PROT_READ
+#define PROT_READ   0
+#endif
+#ifndef PROT_WRITE
+#define PROT_WRITE  0
+#endif
+
+/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
+   _SC_PAGE_SIZE instead. */
+#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
+#define _SC_PAGESIZE  _SC_PAGE_SIZE
+#endif
+
+
+#ifdef EXTRA_PROTOS
+EXTRA_PROTOS
+#endif
+#ifdef EXTRA_PROTOS2
+EXTRA_PROTOS2
+#endif
+
+
+#define DEFAULT_REPETITIONS  10
+
+int  option_repetitions = DEFAULT_REPETITIONS;
+int  option_spinner = 1;
+int  option_redzones = 1;
+int  option_firstsize = 0;
+int  option_lastsize = 500;
+int  option_firstsize2 = 0;
+
+#define ALIGNMENTS          4
+#define OVERLAPS            4
+#define CARRY_RANDOMS       5
+#define MULTIPLIER_RANDOMS  5
+#define DIVISOR_RANDOMS     5
+#define FRACTION_COUNT      4
+
+int  option_print = 0;
+
+#define DATA_TRAND  0
+#define DATA_ZEROS  1
+#define DATA_SEQ    2
+#define DATA_FFS    3
+#define DATA_2FD    4
+int  option_data = DATA_TRAND;
+
+
+mp_size_t  pagesize;
+#define PAGESIZE_LIMBS  (pagesize / BYTES_PER_MP_LIMB)
+
+/* must be a multiple of the page size */
+#define REDZONE_BYTES   (pagesize * 16)
+#define REDZONE_LIMBS   (REDZONE_BYTES / BYTES_PER_MP_LIMB)
+
+
+#define MAX3(x,y,z)   (MAX (x, MAX (y, z)))
+
+#if GMP_LIMB_BITS == 32
+#define DEADVAL  CNST_LIMB(0xDEADBEEF)
+#else
+#define DEADVAL  CNST_LIMB(0xDEADBEEFBADDCAFE)
+#endif
+
+
+struct region_t {
+  mp_ptr     ptr;
+  mp_size_t  size;
+};
+
+
+#define TRAP_NOWHERE 0
+#define TRAP_REF     1
+#define TRAP_FUN     2
+#define TRAP_SETUPS  3
+int trap_location = TRAP_NOWHERE;
+
+
+#define NUM_SOURCES  2
+#define NUM_DESTS    2
+
+struct source_t {
+  struct region_t  region;
+  int        high;
+  mp_size_t  align;
+  mp_ptr     p;
+};
+
+struct source_t  s[NUM_SOURCES];
+
+struct dest_t {
+  int        high;
+  mp_size_t  align;
+  mp_size_t  size;
+};
+
+struct dest_t  d[NUM_DESTS];
+
+struct source_each_t {
+  mp_ptr     p;
+};
+
+struct dest_each_t {
+  struct region_t  region;
+  mp_ptr     p;
+};
+
+mp_size_t       size;
+mp_size_t       size2;
+unsigned long   shift;
+mp_limb_t       carry;
+mp_limb_t       divisor;
+mp_limb_t       multiplier;
+mp_limb_t       multiplier_N[8];
+
+struct each_t {
+  const char  *name;
+  struct dest_each_t    d[NUM_DESTS];
+  struct source_each_t  s[NUM_SOURCES];
+  mp_limb_t  retval;
+};
+
+struct each_t  ref = { "Ref" };
+struct each_t  fun = { "Fun" };
+
+#define SRC_SIZE(n)  ((n) == 1 && tr->size2 ? size2 : size)
+
+void validate_fail __GMP_PROTO ((void));
+
+
+#if HAVE_TRY_NEW_C
+#include "try-new.c"
+#endif
+
+
+typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
+
+struct try_t {
+  char  retval;
+
+  char  src[2];
+  char  dst[2];
+
+#define SIZE_YES          1
+#define SIZE_ALLOW_ZERO   2
+#define SIZE_1            3  /* 1 limb  */
+#define SIZE_2            4  /* 2 limbs */
+#define SIZE_3            5  /* 3 limbs */
+#define SIZE_FRACTION     6  /* size2 is fraction for divrem etc */
+#define SIZE_SIZE2        7
+#define SIZE_PLUS_1       8
+#define SIZE_SUM          9
+#define SIZE_DIFF        10
+#define SIZE_DIFF_PLUS_1 11
+#define SIZE_RETVAL      12
+#define SIZE_CEIL_HALF   13
+#define SIZE_GET_STR     14
+#define SIZE_PLUS_MSIZE_SUB_1 15  /* size+msize-1 */
+  char  size;
+  char  size2;
+  char  dst_size[2];
+
+  /* multiplier_N size in limbs */
+  mp_size_t  msize;
+
+  char  dst_bytes[2];
+
+  char  dst0_from_src1;
+
+#define CARRY_BIT     1  /* single bit 0 or 1 */
+#define CARRY_3       2  /* 0, 1, 2 */
+#define CARRY_4       3  /* 0 to 3 */
+#define CARRY_LIMB    4  /* any limb value */
+#define CARRY_DIVISOR 5  /* carry<divisor */
+  char  carry;
+
+  /* a fudge to tell the output when to print negatives */
+  char  carry_sign;
+
+  char  multiplier;
+  char  shift;
+
+#define DIVISOR_LIMB  1
+#define DIVISOR_NORM  2
+#define DIVISOR_ODD   3
+  char  divisor;
+
+#define DATA_NON_ZERO         1
+#define DATA_GCD              2
+#define DATA_SRC0_ODD         3
+#define DATA_SRC0_HIGHBIT     4
+#define DATA_SRC1_ODD         5
+#define DATA_SRC1_HIGHBIT     6
+#define DATA_MULTIPLE_DIVISOR 7
+#define DATA_UDIV_QRNND       8
+  char  data;
+
+/* Default is allow full overlap. */
+#define OVERLAP_NONE         1
+#define OVERLAP_LOW_TO_HIGH  2
+#define OVERLAP_HIGH_TO_LOW  3
+#define OVERLAP_NOT_SRCS     4
+#define OVERLAP_NOT_SRC2     8
+  char  overlap;
+
+  tryfun_t    reference;
+  const char  *reference_name;
+
+  void        (*validate) __GMP_PROTO ((void));
+  const char  *validate_name;
+};
+
+struct try_t  *tr;
+
+
+void
+validate_mod_34lsub1 (void)
+{
+#define CNST_34LSUB1   ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
+
+  mp_srcptr  ptr = s[0].p;
+  int        error = 0;
+  mp_limb_t  got, got_mod, want, want_mod;
+
+  ASSERT (size >= 1);
+
+  got = fun.retval;
+  got_mod = got % CNST_34LSUB1;
+
+  want = refmpn_mod_34lsub1 (ptr, size);
+  want_mod = want % CNST_34LSUB1;
+
+  if (got_mod != want_mod)
+    {
+      gmp_printf ("got   0x%MX reduced from 0x%MX\n", got_mod, got);
+      gmp_printf ("want  0x%MX reduced from 0x%MX\n", want_mod, want);
+      error = 1;
+    }
+
+  if (error)
+    validate_fail ();
+}
+
+void
+validate_divexact_1 (void)
+{
+  mp_srcptr  src = s[0].p;
+  mp_srcptr  dst = fun.d[0].p;
+  int  error = 0;
+
+  ASSERT (size >= 1);
+
+  {
+    mp_ptr     tp = refmpn_malloc_limbs (size);
+    mp_limb_t  rem;
+
+    rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
+    if (rem != 0)
+      {
+       gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
+       error = 1;
+      }
+    if (! refmpn_equal_anynail (tp, dst, size))
+      {
+       printf ("Quotient a/d wrong\n");
+       mpn_trace ("fun ", dst, size);
+       mpn_trace ("want", tp, size);
+       error = 1;
+      }
+    free (tp);
+  }
+
+  if (error)
+    validate_fail ();
+}
+
+
+void
+validate_modexact_1c_odd (void)
+{
+  mp_srcptr  ptr = s[0].p;
+  mp_limb_t  r = fun.retval;
+  int  error = 0;
+
+  ASSERT (size >= 1);
+  ASSERT (divisor & 1);
+
+  if ((r & GMP_NAIL_MASK) != 0)
+    printf ("r has non-zero nail\n");
+
+  if (carry < divisor)
+    {
+      if (! (r < divisor))
+       {
+         printf ("Don't have r < divisor\n");
+         error = 1;
+       }
+    }
+  else /* carry >= divisor */
+    {
+      if (! (r <= divisor))
+       {
+         printf ("Don't have r <= divisor\n");
+         error = 1;
+       }
+    }
+
+  {
+    mp_limb_t  c = carry % divisor;
+    mp_ptr     tp = refmpn_malloc_limbs (size+1);
+    mp_size_t  k;
+
+    for (k = size-1; k <= size; k++)
+      {
+       /* set {tp,size+1} to r*b^k + a - c */
+       refmpn_copyi (tp, ptr, size);
+       tp[size] = 0;
+       ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
+       if (refmpn_sub_1 (tp, tp, size+1, c))
+         ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
+
+       if (refmpn_mod_1 (tp, size+1, divisor) == 0)
+         goto good_remainder;
+      }
+    printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
+    error = 1;
+
+  good_remainder:
+    free (tp);
+  }
+
+  if (error)
+    validate_fail ();
+}
+
+void
+validate_modexact_1_odd (void)
+{
+  carry = 0;
+  validate_modexact_1c_odd ();
+}
+
+
+void
+validate_sqrtrem (void)
+{
+  mp_srcptr  orig_ptr = s[0].p;
+  mp_size_t  orig_size = size;
+  mp_size_t  root_size = (size+1)/2;
+  mp_srcptr  root_ptr = fun.d[0].p;
+  mp_size_t  rem_size = fun.retval;
+  mp_srcptr  rem_ptr = fun.d[1].p;
+  mp_size_t  prod_size = 2*root_size;
+  mp_ptr     p;
+  int  error = 0;
+
+  if (rem_size < 0 || rem_size > size)
+    {
+      printf ("Bad remainder size retval %ld\n", (long) rem_size);
+      validate_fail ();
+    }
+
+  p = refmpn_malloc_limbs (prod_size);
+
+  p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
+  if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
+    {
+      printf ("Remainder bigger than 2*root\n");
+      error = 1;
+    }
+
+  refmpn_sqr (p, root_ptr, root_size);
+  if (rem_size != 0)
+    refmpn_add (p, p, prod_size, rem_ptr, rem_size);
+  if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
+    {
+      printf ("root^2+rem != original\n");
+      mpn_trace ("prod", p, prod_size);
+      error = 1;
+    }
+  free (p);
+
+  if (error)
+    validate_fail ();
+}
+
+
+/* These types are indexes into the param[] array and are arbitrary so long
+   as they're all distinct and within the size of param[].  Renumber
+   whenever necessary or desired.  */
+
+#define TYPE_ADD               1
+#define TYPE_ADD_N             2
+#define TYPE_ADD_NC            3
+#define TYPE_SUB               4
+#define TYPE_SUB_N             5
+#define TYPE_SUB_NC            6
+
+#define TYPE_MUL_1             7
+#define TYPE_MUL_1C            8
+
+#define TYPE_MUL_2             9
+#define TYPE_MUL_3             92
+#define TYPE_MUL_4             93
+
+#define TYPE_ADDMUL_1         10
+#define TYPE_ADDMUL_1C        11
+#define TYPE_SUBMUL_1         12
+#define TYPE_SUBMUL_1C        13
+
+#define TYPE_ADDMUL_2         14
+#define TYPE_ADDMUL_3         15
+#define TYPE_ADDMUL_4         16
+#define TYPE_ADDMUL_5         17
+#define TYPE_ADDMUL_6         18
+#define TYPE_ADDMUL_7         19
+#define TYPE_ADDMUL_8         20
+
+#define TYPE_ADDSUB_N         21
+#define TYPE_ADDSUB_NC        22
+
+#define TYPE_RSHIFT           23
+#define TYPE_LSHIFT           24
+#define TYPE_LSHIFTC          25
+
+#define TYPE_COPY             26
+#define TYPE_COPYI            27
+#define TYPE_COPYD            28
+#define TYPE_COM              29
+
+#define TYPE_ADDLSH1_N        30
+#define TYPE_ADDLSH2_N        48
+#define TYPE_ADDLSH_N         49
+#define TYPE_SUBLSH1_N        31
+#define TYPE_SUBLSH_N        130
+#define TYPE_RSBLSH1_N        34
+#define TYPE_RSBLSH2_N        46
+#define TYPE_RSBLSH_N         47
+#define TYPE_RSH1ADD_N        32
+#define TYPE_RSH1SUB_N        33
+
+#define TYPE_MOD_1            35
+#define TYPE_MOD_1C           36
+#define TYPE_DIVMOD_1         37
+#define TYPE_DIVMOD_1C        38
+#define TYPE_DIVREM_1         39
+#define TYPE_DIVREM_1C        40
+#define TYPE_PREINV_DIVREM_1  41
+#define TYPE_PREINV_MOD_1     42
+#define TYPE_MOD_34LSUB1      43
+#define TYPE_UDIV_QRNND       44
+#define TYPE_UDIV_QRNND_R     45
+
+#define TYPE_DIVEXACT_1       50
+#define TYPE_DIVEXACT_BY3     51
+#define TYPE_DIVEXACT_BY3C    52
+#define TYPE_MODEXACT_1_ODD   53
+#define TYPE_MODEXACT_1C_ODD  54
+
+#define TYPE_INVERT           55
+#define TYPE_BINVERT          56
+
+#define TYPE_GCD              60
+#define TYPE_GCD_1            61
+#define TYPE_GCD_FINDA        62
+#define TYPE_MPZ_JACOBI       63
+#define TYPE_MPZ_KRONECKER    64
+#define TYPE_MPZ_KRONECKER_UI 65
+#define TYPE_MPZ_KRONECKER_SI 66
+#define TYPE_MPZ_UI_KRONECKER 67
+#define TYPE_MPZ_SI_KRONECKER 68
+
+#define TYPE_AND_N            70
+#define TYPE_NAND_N           71
+#define TYPE_ANDN_N           72
+#define TYPE_IOR_N            73
+#define TYPE_IORN_N           74
+#define TYPE_NIOR_N           75
+#define TYPE_XOR_N            76
+#define TYPE_XNOR_N           77
+
+#define TYPE_MUL_MN           80
+#define TYPE_MUL_N            81
+#define TYPE_SQR              82
+#define TYPE_UMUL_PPMM        83
+#define TYPE_UMUL_PPMM_R      84
+#define TYPE_MULLO_N          85
+
+#define TYPE_SBPI1_DIV_QR     90
+#define TYPE_TDIV_QR          91
+
+#define TYPE_SQRTREM          100
+#define TYPE_ZERO             101
+#define TYPE_GET_STR          102
+#define TYPE_POPCOUNT         103
+#define TYPE_HAMDIST          104
+
+#define TYPE_EXTRA            110
+
+struct try_t  param[150];
+
+
+void
+param_init (void)
+{
+  struct try_t  *p;
+
+#define COPY(index)  memcpy (p, &param[index], sizeof (*p))
+
+#if HAVE_STRINGIZE
+#define REFERENCE(fun)                  \
+  p->reference = (tryfun_t) fun;        \
+  p->reference_name = #fun
+#define VALIDATE(fun)           \
+  p->validate = fun;            \
+  p->validate_name = #fun
+#else
+#define REFERENCE(fun)                  \
+  p->reference = (tryfun_t) fun;        \
+  p->reference_name = "fun"
+#define VALIDATE(fun)           \
+  p->validate = fun;            \
+  p->validate_name = "fun"
+#endif
+
+
+  p = &param[TYPE_ADD_N];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  REFERENCE (refmpn_add_n);
+
+  p = &param[TYPE_ADD_NC];
+  COPY (TYPE_ADD_N);
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpn_add_nc);
+
+  p = &param[TYPE_SUB_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_sub_n);
+
+  p = &param[TYPE_SUB_NC];
+  COPY (TYPE_ADD_NC);
+  REFERENCE (refmpn_sub_nc);
+
+  p = &param[TYPE_ADD];
+  COPY (TYPE_ADD_N);
+  p->size = SIZE_ALLOW_ZERO;
+  p->size2 = 1;
+  REFERENCE (refmpn_add);
+
+  p = &param[TYPE_SUB];
+  COPY (TYPE_ADD);
+  REFERENCE (refmpn_sub);
+
+
+  p = &param[TYPE_MUL_1];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->multiplier = 1;
+  p->overlap = OVERLAP_LOW_TO_HIGH;
+  REFERENCE (refmpn_mul_1);
+
+  p = &param[TYPE_MUL_1C];
+  COPY (TYPE_MUL_1);
+  p->carry = CARRY_LIMB;
+  REFERENCE (refmpn_mul_1c);
+
+
+  p = &param[TYPE_MUL_2];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->msize = 2;
+  p->overlap = OVERLAP_NOT_SRC2;
+  REFERENCE (refmpn_mul_2);
+
+  p = &param[TYPE_MUL_3];
+  COPY (TYPE_MUL_2);
+  p->msize = 3;
+  REFERENCE (refmpn_mul_3);
+
+  p = &param[TYPE_MUL_4];
+  COPY (TYPE_MUL_2);
+  p->msize = 4;
+  REFERENCE (refmpn_mul_4);
+
+
+  p = &param[TYPE_ADDMUL_1];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->multiplier = 1;
+  p->dst0_from_src1 = 1;
+  REFERENCE (refmpn_addmul_1);
+
+  p = &param[TYPE_ADDMUL_1C];
+  COPY (TYPE_ADDMUL_1);
+  p->carry = CARRY_LIMB;
+  REFERENCE (refmpn_addmul_1c);
+
+  p = &param[TYPE_SUBMUL_1];
+  COPY (TYPE_ADDMUL_1);
+  REFERENCE (refmpn_submul_1);
+
+  p = &param[TYPE_SUBMUL_1C];
+  COPY (TYPE_ADDMUL_1C);
+  REFERENCE (refmpn_submul_1c);
+
+
+  p = &param[TYPE_ADDMUL_2];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->msize = 2;
+  p->dst0_from_src1 = 1;
+  p->overlap = OVERLAP_NOT_SRC2;
+  REFERENCE (refmpn_addmul_2);
+
+  p = &param[TYPE_ADDMUL_3];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 3;
+  REFERENCE (refmpn_addmul_3);
+
+  p = &param[TYPE_ADDMUL_4];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 4;
+  REFERENCE (refmpn_addmul_4);
+
+  p = &param[TYPE_ADDMUL_5];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 5;
+  REFERENCE (refmpn_addmul_5);
+
+  p = &param[TYPE_ADDMUL_6];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 6;
+  REFERENCE (refmpn_addmul_6);
+
+  p = &param[TYPE_ADDMUL_7];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 7;
+  REFERENCE (refmpn_addmul_7);
+
+  p = &param[TYPE_ADDMUL_8];
+  COPY (TYPE_ADDMUL_2);
+  p->msize = 8;
+  REFERENCE (refmpn_addmul_8);
+
+
+  p = &param[TYPE_AND_N];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  REFERENCE (refmpn_and_n);
+
+  p = &param[TYPE_ANDN_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_andn_n);
+
+  p = &param[TYPE_NAND_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_nand_n);
+
+  p = &param[TYPE_IOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_ior_n);
+
+  p = &param[TYPE_IORN_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_iorn_n);
+
+  p = &param[TYPE_NIOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_nior_n);
+
+  p = &param[TYPE_XOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_xor_n);
+
+  p = &param[TYPE_XNOR_N];
+  COPY (TYPE_AND_N);
+  REFERENCE (refmpn_xnor_n);
+
+
+  p = &param[TYPE_ADDSUB_N];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  REFERENCE (refmpn_add_n_sub_n);
+
+  p = &param[TYPE_ADDSUB_NC];
+  COPY (TYPE_ADDSUB_N);
+  p->carry = CARRY_4;
+  REFERENCE (refmpn_add_n_sub_nc);
+
+
+  p = &param[TYPE_COPY];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->overlap = OVERLAP_NONE;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_copy);
+
+  p = &param[TYPE_COPYI];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->overlap = OVERLAP_LOW_TO_HIGH;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_copyi);
+
+  p = &param[TYPE_COPYD];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->overlap = OVERLAP_HIGH_TO_LOW;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_copyd);
+
+  p = &param[TYPE_COM];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  REFERENCE (refmpn_com);
+
+
+  p = &param[TYPE_ADDLSH1_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_addlsh1_n);
+
+  p = &param[TYPE_ADDLSH2_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_addlsh2_n);
+
+  p = &param[TYPE_ADDLSH_N];
+  COPY (TYPE_ADD_N);
+  p->shift = 1;
+  REFERENCE (refmpn_addlsh_n);
+
+  p = &param[TYPE_SUBLSH1_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_sublsh1_n);
+
+  p = &param[TYPE_SUBLSH_N];
+  COPY (TYPE_ADDLSH_N);
+  REFERENCE (refmpn_sublsh_n);
+
+  p = &param[TYPE_RSBLSH1_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsblsh1_n);
+
+  p = &param[TYPE_RSBLSH2_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsblsh2_n);
+
+  p = &param[TYPE_RSBLSH_N];
+  COPY (TYPE_ADDLSH_N);
+  REFERENCE (refmpn_rsblsh_n);
+
+  p = &param[TYPE_RSH1ADD_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsh1add_n);
+
+  p = &param[TYPE_RSH1SUB_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_rsh1sub_n);
+
+
+  p = &param[TYPE_MOD_1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->divisor = DIVISOR_LIMB;
+  REFERENCE (refmpn_mod_1);
+
+  p = &param[TYPE_MOD_1C];
+  COPY (TYPE_MOD_1);
+  p->carry = CARRY_DIVISOR;
+  REFERENCE (refmpn_mod_1c);
+
+  p = &param[TYPE_DIVMOD_1];
+  COPY (TYPE_MOD_1);
+  p->dst[0] = 1;
+  REFERENCE (refmpn_divmod_1);
+
+  p = &param[TYPE_DIVMOD_1C];
+  COPY (TYPE_DIVMOD_1);
+  p->carry = CARRY_DIVISOR;
+  REFERENCE (refmpn_divmod_1c);
+
+  p = &param[TYPE_DIVREM_1];
+  COPY (TYPE_DIVMOD_1);
+  p->size2 = SIZE_FRACTION;
+  p->dst_size[0] = SIZE_SUM;
+  REFERENCE (refmpn_divrem_1);
+
+  p = &param[TYPE_DIVREM_1C];
+  COPY (TYPE_DIVREM_1);
+  p->carry = CARRY_DIVISOR;
+  REFERENCE (refmpn_divrem_1c);
+
+  p = &param[TYPE_PREINV_DIVREM_1];
+  COPY (TYPE_DIVREM_1);
+  p->size = SIZE_YES; /* ie. no size==0 */
+  REFERENCE (refmpn_preinv_divrem_1);
+
+  p = &param[TYPE_PREINV_MOD_1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_NORM;
+  REFERENCE (refmpn_preinv_mod_1);
+
+  p = &param[TYPE_MOD_34LSUB1];
+  p->retval = 1;
+  p->src[0] = 1;
+  VALIDATE (validate_mod_34lsub1);
+
+  p = &param[TYPE_UDIV_QRNND];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_1;
+  p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
+  p->data = DATA_UDIV_QRNND;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_udiv_qrnnd);
+
+  p = &param[TYPE_UDIV_QRNND_R];
+  COPY (TYPE_UDIV_QRNND);
+  REFERENCE (refmpn_udiv_qrnnd_r);
+
+
+  p = &param[TYPE_DIVEXACT_1];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_LIMB;
+  p->data = DATA_MULTIPLE_DIVISOR;
+  VALIDATE (validate_divexact_1);
+  REFERENCE (refmpn_divmod_1);
+
+
+  p = &param[TYPE_DIVEXACT_BY3];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  REFERENCE (refmpn_divexact_by3);
+
+  p = &param[TYPE_DIVEXACT_BY3C];
+  COPY (TYPE_DIVEXACT_BY3);
+  p->carry = CARRY_3;
+  REFERENCE (refmpn_divexact_by3c);
+
+
+  p = &param[TYPE_MODEXACT_1_ODD];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_ODD;
+  VALIDATE (validate_modexact_1_odd);
+
+  p = &param[TYPE_MODEXACT_1C_ODD];
+  COPY (TYPE_MODEXACT_1_ODD);
+  p->carry = CARRY_LIMB;
+  VALIDATE (validate_modexact_1c_odd);
+
+
+  p = &param[TYPE_GCD_1];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->data = DATA_NON_ZERO;
+  p->divisor = DIVISOR_LIMB;
+  REFERENCE (refmpn_gcd_1);
+
+  p = &param[TYPE_GCD];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->size2 = 1;
+  p->dst_size[0] = SIZE_RETVAL;
+  p->overlap = OVERLAP_NOT_SRCS;
+  p->data = DATA_GCD;
+  REFERENCE (refmpn_gcd);
+
+
+  p = &param[TYPE_MPZ_JACOBI];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->src[1] = 1;
+  p->data = DATA_SRC1_ODD;
+  p->size2 = 1;
+  p->carry = CARRY_4;
+  p->carry_sign = 1;
+  REFERENCE (refmpz_jacobi);
+
+  p = &param[TYPE_MPZ_KRONECKER];
+  COPY (TYPE_MPZ_JACOBI);
+  p->data = 0;                 /* clear inherited DATA_SRC1_ODD */
+  REFERENCE (refmpz_kronecker);
+
+
+  p = &param[TYPE_MPZ_KRONECKER_UI];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->multiplier = 1;
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpz_kronecker_ui);
+
+  p = &param[TYPE_MPZ_KRONECKER_SI];
+  COPY (TYPE_MPZ_KRONECKER_UI);
+  REFERENCE (refmpz_kronecker_si);
+
+  p = &param[TYPE_MPZ_UI_KRONECKER];
+  COPY (TYPE_MPZ_KRONECKER_UI);
+  REFERENCE (refmpz_ui_kronecker);
+
+  p = &param[TYPE_MPZ_SI_KRONECKER];
+  COPY (TYPE_MPZ_KRONECKER_UI);
+  REFERENCE (refmpz_si_kronecker);
+
+
+  p = &param[TYPE_SQR];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->dst_size[0] = SIZE_SUM;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_sqr);
+
+  p = &param[TYPE_MUL_N];
+  COPY (TYPE_SQR);
+  p->src[1] = 1;
+  REFERENCE (refmpn_mul_n);
+
+  p = &param[TYPE_MULLO_N];
+  COPY (TYPE_MUL_N);
+  p->dst_size[0] = 0;
+  REFERENCE (refmpn_mullo_n);
+
+  p = &param[TYPE_MUL_MN];
+  COPY (TYPE_MUL_N);
+  p->size2 = 1;
+  REFERENCE (refmpn_mul_basecase);
+
+  p = &param[TYPE_UMUL_PPMM];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->dst[0] = 1;
+  p->dst_size[0] = SIZE_1;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_umul_ppmm);
+
+  p = &param[TYPE_UMUL_PPMM_R];
+  COPY (TYPE_UMUL_PPMM);
+  REFERENCE (refmpn_umul_ppmm_r);
+
+
+  p = &param[TYPE_RSHIFT];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->shift = 1;
+  p->overlap = OVERLAP_LOW_TO_HIGH;
+  REFERENCE (refmpn_rshift);
+
+  p = &param[TYPE_LSHIFT];
+  COPY (TYPE_RSHIFT);
+  p->overlap = OVERLAP_HIGH_TO_LOW;
+  REFERENCE (refmpn_lshift);
+
+  p = &param[TYPE_LSHIFTC];
+  COPY (TYPE_RSHIFT);
+  p->overlap = OVERLAP_HIGH_TO_LOW;
+  REFERENCE (refmpn_lshiftc);
+
+
+  p = &param[TYPE_POPCOUNT];
+  p->retval = 1;
+  p->src[0] = 1;
+  REFERENCE (refmpn_popcount);
+
+  p = &param[TYPE_HAMDIST];
+  COPY (TYPE_POPCOUNT);
+  p->src[1] = 1;
+  REFERENCE (refmpn_hamdist);
+
+
+  p = &param[TYPE_SBPI1_DIV_QR];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->data = DATA_SRC1_HIGHBIT;
+  p->size2 = 1;
+  p->dst_size[0] = SIZE_DIFF;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_sb_div_qr);
+
+  p = &param[TYPE_TDIV_QR];
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->size2 = 1;
+  p->dst_size[0] = SIZE_DIFF_PLUS_1;
+  p->dst_size[1] = SIZE_SIZE2;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_tdiv_qr);
+
+  p = &param[TYPE_SQRTREM];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->dst_size[0] = SIZE_CEIL_HALF;
+  p->dst_size[1] = SIZE_RETVAL;
+  p->overlap = OVERLAP_NONE;
+  VALIDATE (validate_sqrtrem);
+  REFERENCE (refmpn_sqrtrem);
+
+  p = &param[TYPE_ZERO];
+  p->dst[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  REFERENCE (refmpn_zero);
+
+  p = &param[TYPE_GET_STR];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->dst_size[0] = SIZE_GET_STR;
+  p->dst_bytes[0] = 1;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_get_str);
+
+  p = &param[TYPE_BINVERT];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->data = DATA_SRC0_ODD;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_binvert);
+
+  p = &param[TYPE_INVERT];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->data = DATA_SRC0_HIGHBIT;
+  p->overlap = OVERLAP_NONE;
+  REFERENCE (refmpn_invert);
+
+#ifdef EXTRA_PARAM_INIT
+  EXTRA_PARAM_INIT
+#endif
+}
+
+
+/* The following are macros if there's no native versions, so wrap them in
+   functions that can be in try_array[]. */
+
+void
+MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ MPN_COPY (rp, sp, size); }
+
+void
+MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ MPN_COPY_INCR (rp, sp, size); }
+
+void
+MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ MPN_COPY_DECR (rp, sp, size); }
+
+void
+__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ __GMPN_COPY (rp, sp, size); }
+
+#ifdef __GMPN_COPY_INCR
+void
+__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ __GMPN_COPY_INCR (rp, sp, size); }
+#endif
+
+void
+mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{ mpn_com (rp, sp, size); }
+
+void
+mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_and_n (rp, s1, s2, size); }
+
+void
+mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_andn_n (rp, s1, s2, size); }
+
+void
+mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_nand_n (rp, s1, s2, size); }
+
+void
+mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_ior_n (rp, s1, s2, size); }
+
+void
+mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_iorn_n (rp, s1, s2, size); }
+
+void
+mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_nior_n (rp, s1, s2, size); }
+
+void
+mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_xor_n (rp, s1, s2, size); }
+
+void
+mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
+{ mpn_xnor_n (rp, s1, s2, size); }
+
+mp_limb_t
+udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
+{
+  mp_limb_t  q;
+  udiv_qrnnd (q, *remptr, n1, n0, d);
+  return q;
+}
+
+mp_limb_t
+mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_divexact_by3 (rp, sp, size);
+}
+
+mp_limb_t
+mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
+{
+  return mpn_modexact_1_odd (ptr, size, divisor);
+}
+
+void
+mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
+{
+  mp_ptr  tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
+  mpn_toom22_mul (dst, src1, size, src2, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
+  mpn_toom2_sqr (dst, src, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
+{
+  mp_ptr  tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
+  mpn_toom33_mul (dst, src1, size, src2, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
+  mpn_toom3_sqr (dst, src, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
+{
+  mp_ptr  tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
+  mpn_toom44_mul (dst, src1, size, src2, size, tspace);
+  TMP_FREE;
+}
+void
+mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr tspace;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
+  mpn_toom4_sqr (dst, src, size, tspace);
+  TMP_FREE;
+}
+
+mp_limb_t
+umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
+{
+  mp_limb_t  high;
+  umul_ppmm (high, *lowptr, m1, m2);
+  return high;
+}
+
+void
+MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
+{ MPN_ZERO (ptr, size); }
+
+
+struct choice_t {
+  const char  *name;
+  tryfun_t    function;
+  int         type;
+  mp_size_t   minsize;
+};
+
+#if HAVE_STRINGIZE
+#define TRY(fun)        #fun, (tryfun_t) fun
+#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
+#else
+#define TRY(fun)        "fun", (tryfun_t) fun
+#define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
+#endif
+
+const struct choice_t choice_array[] = {
+  { TRY(mpn_add),       TYPE_ADD    },
+  { TRY(mpn_sub),       TYPE_SUB    },
+
+  { TRY(mpn_add_n),     TYPE_ADD_N  },
+  { TRY(mpn_sub_n),     TYPE_SUB_N  },
+
+#if HAVE_NATIVE_mpn_add_nc
+  { TRY(mpn_add_nc),    TYPE_ADD_NC },
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+  { TRY(mpn_sub_nc),    TYPE_SUB_NC },
+#endif
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  { TRY(mpn_add_n_sub_n),  TYPE_ADDSUB_N  },
+#endif
+#if HAVE_NATIVE_mpn_add_n_sub_nc
+  { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
+#endif
+
+  { TRY(mpn_addmul_1),  TYPE_ADDMUL_1  },
+  { TRY(mpn_submul_1),  TYPE_SUBMUL_1  },
+#if HAVE_NATIVE_mpn_addmul_1c
+  { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
+#endif
+#if HAVE_NATIVE_mpn_submul_1c
+  { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+  { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_3
+  { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_4
+  { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_5
+  { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_6
+  { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_7
+  { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
+#endif
+#if HAVE_NATIVE_mpn_addmul_8
+  { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
+#endif
+
+  { TRY_FUNFUN(mpn_com),  TYPE_COM },
+
+  { TRY_FUNFUN(MPN_COPY),      TYPE_COPY },
+  { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
+  { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
+
+  { TRY_FUNFUN(__GMPN_COPY),      TYPE_COPY },
+#ifdef __GMPN_COPY_INCR
+  { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+  { TRY(mpn_copyi), TYPE_COPYI },
+#endif
+#if HAVE_NATIVE_mpn_copyd
+  { TRY(mpn_copyd), TYPE_COPYD },
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n
+  { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+  { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n
+  { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+  { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_n
+  { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+  { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
+#endif
+#if HAVE_NATIVE_mpn_rsh1add_n
+  { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
+#endif
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
+#endif
+
+  { TRY_FUNFUN(mpn_and_n),  TYPE_AND_N  },
+  { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
+  { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
+  { TRY_FUNFUN(mpn_ior_n),  TYPE_IOR_N  },
+  { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
+  { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
+  { TRY_FUNFUN(mpn_xor_n),  TYPE_XOR_N  },
+  { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
+
+  { TRY(mpn_divrem_1),     TYPE_DIVREM_1 },
+#if USE_PREINV_DIVREM_1
+  { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
+#endif
+  { TRY(mpn_mod_1),        TYPE_MOD_1 },
+#if USE_PREINV_MOD_1
+  { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
+#endif
+#if HAVE_NATIVE_mpn_divrem_1c
+  { TRY(mpn_divrem_1c),    TYPE_DIVREM_1C },
+#endif
+#if HAVE_NATIVE_mpn_mod_1c
+  { TRY(mpn_mod_1c),       TYPE_MOD_1C },
+#endif
+#if GMP_NUMB_BITS % 4 == 0
+  { TRY(mpn_mod_34lsub1),  TYPE_MOD_34LSUB1 },
+#endif
+
+  { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+  { TRY(mpn_udiv_qrnnd),    TYPE_UDIV_QRNND, 2 },
+#endif
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+  { TRY(mpn_udiv_qrnnd_r),  TYPE_UDIV_QRNND_R, 2 },
+#endif
+
+  { TRY(mpn_divexact_1),          TYPE_DIVEXACT_1 },
+  { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
+  { TRY(mpn_divexact_by3c),       TYPE_DIVEXACT_BY3C },
+
+  { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
+  { TRY(mpn_modexact_1c_odd),       TYPE_MODEXACT_1C_ODD },
+
+
+  { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
+  { TRY(mpn_tdiv_qr),      TYPE_TDIV_QR },
+
+  { TRY(mpn_mul_1),      TYPE_MUL_1 },
+#if HAVE_NATIVE_mpn_mul_1c
+  { TRY(mpn_mul_1c),     TYPE_MUL_1C },
+#endif
+#if HAVE_NATIVE_mpn_mul_2
+  { TRY(mpn_mul_2),      TYPE_MUL_2, 2 },
+#endif
+#if HAVE_NATIVE_mpn_mul_3
+  { TRY(mpn_mul_3),      TYPE_MUL_3, 3 },
+#endif
+#if HAVE_NATIVE_mpn_mul_4
+  { TRY(mpn_mul_4),      TYPE_MUL_4, 4 },
+#endif
+
+  { TRY(mpn_rshift),     TYPE_RSHIFT },
+  { TRY(mpn_lshift),     TYPE_LSHIFT },
+  { TRY(mpn_lshiftc),    TYPE_LSHIFTC },
+
+
+  { TRY(mpn_mul_basecase), TYPE_MUL_MN },
+  { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
+#if SQR_TOOM2_THRESHOLD > 0
+  { TRY(mpn_sqr_basecase), TYPE_SQR },
+#endif
+
+  { TRY(mpn_mul),    TYPE_MUL_MN },
+  { TRY(mpn_mul_n),  TYPE_MUL_N },
+  { TRY(mpn_sqr),    TYPE_SQR },
+
+  { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
+#if HAVE_NATIVE_mpn_umul_ppmm
+  { TRY(mpn_umul_ppmm),    TYPE_UMUL_PPMM, 2 },
+#endif
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+  { TRY(mpn_umul_ppmm_r),  TYPE_UMUL_PPMM_R, 2 },
+#endif
+
+  { TRY_FUNFUN(mpn_toom22_mul),  TYPE_MUL_N,  MPN_TOOM22_MUL_MINSIZE },
+  { TRY_FUNFUN(mpn_toom2_sqr),   TYPE_SQR,    MPN_TOOM2_SQR_MINSIZE },
+  { TRY_FUNFUN(mpn_toom33_mul),  TYPE_MUL_N,  MPN_TOOM33_MUL_MINSIZE },
+  { TRY_FUNFUN(mpn_toom3_sqr),   TYPE_SQR,    MPN_TOOM3_SQR_MINSIZE },
+  { TRY_FUNFUN(mpn_toom44_mul),  TYPE_MUL_N,  MPN_TOOM44_MUL_MINSIZE },
+  { TRY_FUNFUN(mpn_toom4_sqr),   TYPE_SQR,    MPN_TOOM4_SQR_MINSIZE },
+
+  { TRY(mpn_gcd_1),        TYPE_GCD_1            },
+  { TRY(mpn_gcd),          TYPE_GCD              },
+  { TRY(mpz_jacobi),       TYPE_MPZ_JACOBI       },
+  { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
+  { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
+  { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
+  { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
+
+  { TRY(mpn_popcount),   TYPE_POPCOUNT },
+  { TRY(mpn_hamdist),    TYPE_HAMDIST },
+
+  { TRY(mpn_sqrtrem),    TYPE_SQRTREM },
+
+  { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
+
+  { TRY(mpn_get_str),    TYPE_GET_STR },
+
+  { TRY(mpn_binvert),    TYPE_BINVERT },
+  { TRY(mpn_invert),     TYPE_INVERT  },
+
+#ifdef EXTRA_ROUTINES
+  EXTRA_ROUTINES
+#endif
+};
+
+const struct choice_t *choice = NULL;
+
+
+void
+mprotect_maybe (void *addr, size_t len, int prot)
+{
+  if (!option_redzones)
+    return;
+
+#if HAVE_MPROTECT
+  if (mprotect (addr, len, prot) != 0)
+    {
+      fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
+              addr, (unsigned) len, prot, strerror (errno));
+      exit (1);
+    }
+#else
+  {
+    static int  warned = 0;
+    if (!warned)
+      {
+       fprintf (stderr,
+                "mprotect not available, bounds testing not performed\n");
+       warned = 1;
+      }
+  }
+#endif
+}
+
+/* round "a" up to a multiple of "m" */
+size_t
+round_up_multiple (size_t a, size_t m)
+{
+  unsigned long  r;
+
+  r = a % m;
+  if (r == 0)
+    return a;
+  else
+    return a + (m - r);
+}
+
+
+/* On some systems it seems that only an mmap'ed region can be mprotect'ed,
+   for instance HP-UX 10.
+
+   mmap will almost certainly return a pointer already aligned to a page
+   boundary, but it's easy enough to share the alignment handling with the
+   malloc case. */
+
+void
+malloc_region (struct region_t *r, mp_size_t n)
+{
+  mp_ptr  p;
+  size_t  nbytes;
+
+  ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
+
+  n = round_up_multiple (n, PAGESIZE_LIMBS);
+  r->size = n;
+
+  nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
+
+#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
+#define MAP_ANON  MAP_ANONYMOUS
+#endif
+
+#if HAVE_MMAP && defined (MAP_ANON)
+  /* note must pass fd=-1 for MAP_ANON on BSD */
+  p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+  if (p == (void *) -1)
+    {
+      fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
+              (unsigned) nbytes, strerror (errno));
+      exit (1);
+    }
+#else
+  p = (mp_ptr) malloc (nbytes);
+  ASSERT_ALWAYS (p != NULL);
+#endif
+
+  p = align_pointer (p, pagesize);
+
+  mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
+  p += REDZONE_LIMBS;
+  r->ptr = p;
+
+  mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
+}
+
+void
+mprotect_region (const struct region_t *r, int prot)
+{
+  mprotect_maybe (r->ptr, r->size, prot);
+}
+
+
+/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
+   and CARRY_4 */
+mp_limb_t  carry_array[] = {
+  0, 1, 2, 3,
+  4,
+  CNST_LIMB(1) << 8,
+  CNST_LIMB(1) << 16,
+  GMP_NUMB_MAX
+};
+int        carry_index;
+
+#define CARRY_COUNT                                             \
+  ((tr->carry == CARRY_BIT) ? 2                                 \
+   : tr->carry == CARRY_3   ? 3                                 \
+   : tr->carry == CARRY_4   ? 4                                 \
+   : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR)    \
+     ? numberof(carry_array) + CARRY_RANDOMS                    \
+   : 1)
+
+#define MPN_RANDOM_ALT(index,dst,size) \
+  (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
+
+/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
+   the same type */
+#define CARRY_ITERATION                                                 \
+  for (carry_index = 0;                                                 \
+       (carry_index < numberof (carry_array)                            \
+       ? (carry = carry_array[carry_index])                            \
+       : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)),    \
+        (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0),           \
+        carry_index < CARRY_COUNT;                                     \
+       carry_index++)
+
+
+mp_limb_t  multiplier_array[] = {
+  0, 1, 2, 3,
+  CNST_LIMB(1) << 8,
+  CNST_LIMB(1) << 16,
+  GMP_NUMB_MAX - 2,
+  GMP_NUMB_MAX - 1,
+  GMP_NUMB_MAX
+};
+int        multiplier_index;
+
+mp_limb_t  divisor_array[] = {
+  1, 2, 3,
+  CNST_LIMB(1) << 8,
+  CNST_LIMB(1) << 16,
+  CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
+  GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
+  GMP_NUMB_HIGHBIT,
+  GMP_NUMB_HIGHBIT + 1,
+  GMP_NUMB_MAX - 2,
+  GMP_NUMB_MAX - 1,
+  GMP_NUMB_MAX
+};
+
+int        divisor_index;
+
+/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
+   the same type */
+#define ARRAY_ITERATION(var, index, limit, array, randoms, cond)        \
+  for (index = 0;                                                       \
+       (index < numberof (array)                                        \
+       ? (var = array[index])                                          \
+       : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)),            \
+       index < limit;                                                   \
+       index++)
+
+#define MULTIPLIER_COUNT                                \
+  (tr->multiplier                                       \
+    ? numberof (multiplier_array) + MULTIPLIER_RANDOMS  \
+    : 1)
+
+#define MULTIPLIER_ITERATION                                            \
+  ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT,       \
+                 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
+
+#define DIVISOR_COUNT                           \
+  (tr->divisor                                  \
+   ? numberof (divisor_array) + DIVISOR_RANDOMS \
+   : 1)
+
+#define DIVISOR_ITERATION                                               \
+  ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
+                 DIVISOR_RANDOMS, TRY_DIVISOR)
+
+
+/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
+   d[0] or d[1] respectively, -1 means a separate (write-protected)
+   location. */
+
+struct overlap_t {
+  int  s[NUM_SOURCES];
+} overlap_array[] = {
+  { { -1, -1 } },
+  { {  0, -1 } },
+  { { -1,  0 } },
+  { {  0,  0 } },
+  { {  1, -1 } },
+  { { -1,  1 } },
+  { {  1,  1 } },
+  { {  0,  1 } },
+  { {  1,  0 } },
+};
+
+struct overlap_t  *overlap, *overlap_limit;
+
+#define OVERLAP_COUNT                   \
+  (tr->overlap & OVERLAP_NONE       ? 1 \
+   : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
+   : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
+   : tr->dst[1]                     ? 9 \
+   : tr->src[1]                     ? 4 \
+   : tr->dst[0]                     ? 2 \
+   : 1)
+
+#define OVERLAP_ITERATION                               \
+  for (overlap = &overlap_array[0],                     \
+    overlap_limit = &overlap_array[OVERLAP_COUNT];      \
+    overlap < overlap_limit;                            \
+    overlap++)
+
+
+int  base = 10;
+
+#define T_RAND_COUNT  2
+int  t_rand;
+
+void
+t_random (mp_ptr ptr, mp_size_t n)
+{
+  if (n == 0)
+    return;
+
+  switch (option_data) {
+  case DATA_TRAND:
+    switch (t_rand) {
+    case 0: refmpn_random (ptr, n); break;
+    case 1: refmpn_random2 (ptr, n); break;
+    default: abort();
+    }
+    break;
+  case DATA_SEQ:
+    {
+      static mp_limb_t  counter = 0;
+      mp_size_t  i;
+      for (i = 0; i < n; i++)
+       ptr[i] = ++counter;
+    }
+    break;
+  case DATA_ZEROS:
+    refmpn_zero (ptr, n);
+    break;
+  case DATA_FFS:
+    refmpn_fill (ptr, n, GMP_NUMB_MAX);
+    break;
+  case DATA_2FD:
+    /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
+       inducing the q1_ff special case in the mul-by-inverse part of some
+       versions of divrem_1 and mod_1. */
+    refmpn_fill (ptr, n, (mp_limb_t) -1);
+    ptr[n-1] = 2;
+    ptr[0] -= 2;
+    break;
+
+  default:
+    abort();
+  }
+}
+#define T_RAND_ITERATION \
+  for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
+
+
+void
+print_each (const struct each_t *e)
+{
+  int  i;
+
+  printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
+  if (tr->retval)
+    mpn_trace ("   retval", &e->retval, 1);
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      if (tr->dst[i])
+       {
+         if (tr->dst_bytes[i])
+           byte_tracen ("   d[%d]", i, e->d[i].p, d[i].size);
+         else
+           mpn_tracen ("   d[%d]", i, e->d[i].p, d[i].size);
+         printf ("        located %p\n", (void *) (e->d[i].p));
+       }
+    }
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    if (tr->src[i])
+      printf ("   s[%d] located %p\n", i, (void *)  (e->s[i].p));
+}
+
+
+void
+print_all (void)
+{
+  int  i;
+
+  printf ("\n");
+  printf ("size  %ld\n", (long) size);
+  if (tr->size2)
+    printf ("size2 %ld\n", (long) size2);
+
+  for (i = 0; i < NUM_DESTS; i++)
+    if (d[i].size != size)
+      printf ("d[%d].size %ld\n", i, (long) d[i].size);
+
+  if (tr->multiplier)
+    mpn_trace ("   multiplier", &multiplier, 1);
+  if (tr->divisor)
+    mpn_trace ("   divisor", &divisor, 1);
+  if (tr->shift)
+    printf ("   shift %lu\n", shift);
+  if (tr->carry)
+    mpn_trace ("   carry", &carry, 1);
+  if (tr->msize)
+    mpn_trace ("   multiplier_N", multiplier_N, tr->msize);
+
+  for (i = 0; i < NUM_DESTS; i++)
+    if (tr->dst[i])
+      printf ("   d[%d] %s, align %ld, size %ld\n",
+             i, d[i].high ? "high" : "low",
+             (long) d[i].align, (long) d[i].size);
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (tr->src[i])
+       {
+         printf ("   s[%d] %s, align %ld, ",
+                 i, s[i].high ? "high" : "low", (long) s[i].align);
+         switch (overlap->s[i]) {
+         case -1:
+           printf ("no overlap\n");
+           break;
+         default:
+           printf ("==d[%d]%s\n",
+                   overlap->s[i],
+                   tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
+                   : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
+                   : "");
+           break;
+         }
+         printf ("   s[%d]=", i);
+         if (tr->carry_sign && (carry & (1 << i)))
+           printf ("-");
+         mpn_trace (NULL, s[i].p, SRC_SIZE(i));
+       }
+    }
+
+  if (tr->dst0_from_src1)
+    mpn_trace ("   d[0]", s[1].region.ptr, size);
+
+  if (tr->reference)
+    print_each (&ref);
+  print_each (&fun);
+}
+
+void
+compare (void)
+{
+  int  error = 0;
+  int  i;
+
+  if (tr->retval && ref.retval != fun.retval)
+    {
+      gmp_printf ("Different return values (%Mu, %Mu)\n",
+                 ref.retval, fun.retval);
+      error = 1;
+    }
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      switch (tr->dst_size[i]) {
+      case SIZE_RETVAL:
+      case SIZE_GET_STR:
+       d[i].size = ref.retval;
+       break;
+      }
+    }
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      if (! tr->dst[i])
+       continue;
+
+      if (tr->dst_bytes[i])
+       {
+         if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
+           {
+             printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
+                     i,
+                     (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
+                     (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
+             error = 1;
+           }
+       }
+      else
+       {
+         if (d[i].size != 0
+             && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
+           {
+             printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
+                     i,
+                     (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
+                     (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
+             error = 1;
+           }
+       }
+    }
+
+  if (error)
+    {
+      print_all();
+      abort();
+    }
+}
+
+
+/* The functions are cast if the return value should be a long rather than
+   the default mp_limb_t.  This is necessary under _LONG_LONG_LIMB.  This
+   might not be enough if some actual calling conventions checking is
+   implemented on a long long limb system.  */
+
+void
+call (struct each_t *e, tryfun_t function)
+{
+  switch (choice->type) {
+  case TYPE_ADD:
+  case TYPE_SUB:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
+    break;
+
+  case TYPE_ADD_N:
+  case TYPE_SUB_N:
+  case TYPE_ADDLSH1_N:
+  case TYPE_ADDLSH2_N:
+  case TYPE_SUBLSH1_N:
+  case TYPE_RSBLSH1_N:
+  case TYPE_RSBLSH2_N:
+  case TYPE_RSH1ADD_N:
+  case TYPE_RSH1SUB_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_ADDLSH_N:
+  case TYPE_SUBLSH_N:
+  case TYPE_RSBLSH_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
+    break;
+  case TYPE_ADD_NC:
+  case TYPE_SUB_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
+    break;
+
+  case TYPE_MUL_1:
+  case TYPE_ADDMUL_1:
+  case TYPE_SUBMUL_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier);
+    break;
+  case TYPE_MUL_1C:
+  case TYPE_ADDMUL_1C:
+  case TYPE_SUBMUL_1C:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier, carry);
+    break;
+
+  case TYPE_MUL_2:
+  case TYPE_MUL_3:
+  case TYPE_MUL_4:
+    if (size == 1)
+      abort ();
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier_N);
+    break;
+
+  case TYPE_ADDMUL_2:
+  case TYPE_ADDMUL_3:
+  case TYPE_ADDMUL_4:
+  case TYPE_ADDMUL_5:
+  case TYPE_ADDMUL_6:
+  case TYPE_ADDMUL_7:
+  case TYPE_ADDMUL_8:
+    if (size == 1)
+      abort ();
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, multiplier_N);
+    break;
+
+  case TYPE_AND_N:
+  case TYPE_ANDN_N:
+  case TYPE_NAND_N:
+  case TYPE_IOR_N:
+  case TYPE_IORN_N:
+  case TYPE_NIOR_N:
+  case TYPE_XOR_N:
+  case TYPE_XNOR_N:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+
+  case TYPE_ADDSUB_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_ADDSUB_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
+    break;
+
+  case TYPE_COPY:
+  case TYPE_COPYI:
+  case TYPE_COPYD:
+  case TYPE_COM:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
+    break;
+
+
+  case TYPE_DIVEXACT_BY3:
+    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
+    break;
+  case TYPE_DIVEXACT_BY3C:
+    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
+                                               carry);
+    break;
+
+
+  case TYPE_DIVMOD_1:
+  case TYPE_DIVEXACT_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, divisor);
+    break;
+  case TYPE_DIVMOD_1C:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, divisor, carry);
+    break;
+  case TYPE_DIVREM_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, size2, e->s[0].p, size, divisor);
+    break;
+  case TYPE_DIVREM_1C:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
+    break;
+  case TYPE_PREINV_DIVREM_1:
+    {
+      mp_limb_t  dinv;
+      unsigned   shift;
+      shift = refmpn_count_leading_zeros (divisor);
+      dinv = refmpn_invert_limb (divisor << shift);
+      e->retval = CALLING_CONVENTIONS (function)
+       (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
+    }
+    break;
+  case TYPE_MOD_1:
+  case TYPE_MODEXACT_1_ODD:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p, size, divisor);
+    break;
+  case TYPE_MOD_1C:
+  case TYPE_MODEXACT_1C_ODD:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p, size, divisor, carry);
+    break;
+  case TYPE_PREINV_MOD_1:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
+    break;
+  case TYPE_MOD_34LSUB1:
+    e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
+    break;
+
+  case TYPE_UDIV_QRNND:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
+    break;
+  case TYPE_UDIV_QRNND_R:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
+    break;
+
+  case TYPE_SBPI1_DIV_QR:
+    {
+      gmp_pi1_t dinv;
+      invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
+      refmpn_copyi (e->d[1].p, e->s[0].p, size);        /* dividend */
+      refmpn_fill (e->d[0].p, size-size2, 0x98765432);  /* quotient */
+      e->retval = CALLING_CONVENTIONS (function)
+       (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
+      refmpn_zero (e->d[1].p+size2, size-size2);    /* excess over remainder */
+    }
+    break;
+
+  case TYPE_TDIV_QR:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
+                                   e->s[0].p, size, e->s[1].p, size2);
+    break;
+
+  case TYPE_GCD_1:
+    /* Must have a non-zero src, but this probably isn't the best way to do
+       it. */
+    if (refmpn_zero_p (e->s[0].p, size))
+      e->retval = 0;
+    else
+      e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
+    break;
+
+  case TYPE_GCD:
+    /* Sources are destroyed, so they're saved and replaced, but a general
+       approach to this might be better.  Note that it's still e->s[0].p and
+       e->s[1].p that are passed, to get the desired alignments. */
+    {
+      mp_ptr  s0 = refmpn_malloc_limbs (size);
+      mp_ptr  s1 = refmpn_malloc_limbs (size2);
+      refmpn_copyi (s0, e->s[0].p, size);
+      refmpn_copyi (s1, e->s[1].p, size2);
+
+      mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
+      mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
+      e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
+                                                 e->s[0].p, size,
+                                                 e->s[1].p, size2);
+      refmpn_copyi (e->s[0].p, s0, size);
+      refmpn_copyi (e->s[1].p, s1, size2);
+      free (s0);
+      free (s1);
+    }
+    break;
+
+  case TYPE_GCD_FINDA:
+    {
+      /* FIXME: do this with a flag */
+      mp_limb_t  c[2];
+      c[0] = e->s[0].p[0];
+      c[0] += (c[0] == 0);
+      c[1] = e->s[0].p[0];
+      c[1] += (c[1] == 0);
+      e->retval = CALLING_CONVENTIONS (function) (c);
+    }
+    break;
+
+  case TYPE_MPZ_JACOBI:
+  case TYPE_MPZ_KRONECKER:
+    {
+      mpz_t  a, b;
+      PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
+      PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
+      e->retval = CALLING_CONVENTIONS (function) (a, b);
+    }
+    break;
+  case TYPE_MPZ_KRONECKER_UI:
+    {
+      mpz_t  a;
+      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
+    }
+    break;
+  case TYPE_MPZ_KRONECKER_SI:
+    {
+      mpz_t  a;
+      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
+    }
+    break;
+  case TYPE_MPZ_UI_KRONECKER:
+    {
+      mpz_t  b;
+      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
+    }
+    break;
+  case TYPE_MPZ_SI_KRONECKER:
+    {
+      mpz_t  b;
+      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
+      e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
+    }
+    break;
+
+  case TYPE_MUL_MN:
+    CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
+    break;
+  case TYPE_MUL_N:
+  case TYPE_MULLO_N:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
+    break;
+  case TYPE_SQR:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
+    break;
+
+  case TYPE_UMUL_PPMM:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
+    break;
+  case TYPE_UMUL_PPMM_R:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
+    break;
+
+  case TYPE_LSHIFT:
+  case TYPE_LSHIFTC:
+  case TYPE_RSHIFT:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, size, shift);
+    break;
+
+  case TYPE_POPCOUNT:
+    e->retval = (* (unsigned long (*)(ANYARGS))
+                CALLING_CONVENTIONS (function)) (e->s[0].p, size);
+    break;
+  case TYPE_HAMDIST:
+    e->retval = (* (unsigned long (*)(ANYARGS))
+                CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
+    break;
+
+  case TYPE_SQRTREM:
+    e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
+      (e->d[0].p, e->d[1].p, e->s[0].p, size);
+    break;
+
+  case TYPE_ZERO:
+    CALLING_CONVENTIONS (function) (e->d[0].p, size);
+    break;
+
+  case TYPE_GET_STR:
+    {
+      size_t  sizeinbase, fill;
+      char    *dst;
+      MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
+      ASSERT_ALWAYS (sizeinbase <= d[0].size);
+      fill = d[0].size - sizeinbase;
+      if (d[0].high)
+       {
+         memset (e->d[0].p, 0xBA, fill);
+         dst = (char *) e->d[0].p + fill;
+       }
+      else
+       {
+         dst = (char *) e->d[0].p;
+         memset (dst + sizeinbase, 0xBA, fill);
+       }
+      if (POW2_P (base))
+       {
+         e->retval = CALLING_CONVENTIONS (function) (dst, base,
+                                                     e->s[0].p, size);
+       }
+      else
+       {
+         refmpn_copy (e->d[1].p, e->s[0].p, size);
+         e->retval = CALLING_CONVENTIONS (function) (dst, base,
+                                                     e->d[1].p, size);
+       }
+      refmpn_zero (e->d[1].p, size);  /* clobbered or unused */
+    }
+    break;
+
+ case TYPE_INVERT:
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
+      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
+      TMP_FREE;
+    }
+    break;
+  case TYPE_BINVERT:
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
+      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
+      TMP_FREE;
+    }
+    break;
+
+#ifdef EXTRA_CALL
+    EXTRA_CALL
+#endif
+
+  default:
+    printf ("Unknown routine type %d\n", choice->type);
+    abort ();
+    break;
+  }
+}
+
+
+void
+pointer_setup (struct each_t *e)
+{
+  int  i, j;
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      switch (tr->dst_size[i]) {
+      case 0:
+      case SIZE_RETVAL: /* will be adjusted later */
+       d[i].size = size;
+       break;
+
+      case SIZE_1:
+       d[i].size = 1;
+       break;
+      case SIZE_2:
+       d[i].size = 2;
+       break;
+      case SIZE_3:
+       d[i].size = 3;
+       break;
+
+      case SIZE_PLUS_1:
+       d[i].size = size+1;
+       break;
+      case SIZE_PLUS_MSIZE_SUB_1:
+       d[i].size = size + tr->msize - 1;
+       break;
+
+      case SIZE_SUM:
+       if (tr->size2)
+         d[i].size = size + size2;
+       else
+         d[i].size = 2*size;
+       break;
+
+      case SIZE_SIZE2:
+       d[i].size = size2;
+       break;
+
+      case SIZE_DIFF:
+       d[i].size = size - size2;
+       break;
+
+      case SIZE_DIFF_PLUS_1:
+       d[i].size = size - size2 + 1;
+       break;
+
+      case SIZE_CEIL_HALF:
+       d[i].size = (size+1)/2;
+       break;
+
+      case SIZE_GET_STR:
+       {
+         mp_limb_t ff = GMP_NUMB_MAX;
+         MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
+       }
+       break;
+
+      default:
+       printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
+       abort ();
+      }
+    }
+
+  /* establish e->d[].p destinations */
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      mp_size_t  offset = 0;
+
+      /* possible room for overlapping sources */
+      for (j = 0; j < numberof (overlap->s); j++)
+       if (overlap->s[j] == i)
+         offset = MAX (offset, s[j].align);
+
+      if (d[i].high)
+       {
+         if (tr->dst_bytes[i])
+           {
+             e->d[i].p = (mp_ptr)
+               ((char *) (e->d[i].region.ptr + e->d[i].region.size)
+                - d[i].size - d[i].align);
+           }
+         else
+           {
+             e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
+               - d[i].size - d[i].align;
+             if (tr->overlap == OVERLAP_LOW_TO_HIGH)
+               e->d[i].p -= offset;
+           }
+       }
+      else
+       {
+         if (tr->dst_bytes[i])
+           {
+             e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
+           }
+         else
+           {
+             e->d[i].p = e->d[i].region.ptr + d[i].align;
+             if (tr->overlap == OVERLAP_HIGH_TO_LOW)
+               e->d[i].p += offset;
+           }
+       }
+    }
+
+  /* establish e->s[].p sources */
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      int  o = overlap->s[i];
+      switch (o) {
+      case -1:
+       /* no overlap */
+       e->s[i].p = s[i].p;
+       break;
+      case 0:
+      case 1:
+       /* overlap with d[o] */
+       if (tr->overlap == OVERLAP_HIGH_TO_LOW)
+         e->s[i].p = e->d[o].p - s[i].align;
+       else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
+         e->s[i].p = e->d[o].p + s[i].align;
+       else if (tr->size2 == SIZE_FRACTION)
+         e->s[i].p = e->d[o].p + size2;
+       else
+         e->s[i].p = e->d[o].p;
+       break;
+      default:
+       abort();
+       break;
+      }
+    }
+}
+
+
+void
+validate_fail (void)
+{
+  if (tr->reference)
+    {
+      trap_location = TRAP_REF;
+      call (&ref, tr->reference);
+      trap_location = TRAP_NOWHERE;
+    }
+
+  print_all();
+  abort();
+}
+
+
+void
+try_one (void)
+{
+  int  i;
+
+  if (option_spinner)
+    spinner();
+  spinner_count++;
+
+  trap_location = TRAP_SETUPS;
+
+  if (tr->divisor == DIVISOR_NORM)
+    divisor |= GMP_NUMB_HIGHBIT;
+  if (tr->divisor == DIVISOR_ODD)
+    divisor |= 1;
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (s[i].high)
+       s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
+      else
+       s[i].p = s[i].region.ptr + s[i].align;
+    }
+
+  pointer_setup (&ref);
+  pointer_setup (&fun);
+
+  ref.retval = 0x04152637;
+  fun.retval = 0x8C9DAEBF;
+
+  t_random (multiplier_N, tr->msize);
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (! tr->src[i])
+       continue;
+
+      mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
+      t_random (s[i].p, SRC_SIZE(i));
+
+      switch (tr->data) {
+      case DATA_NON_ZERO:
+       if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
+         s[i].p[0] = 1;
+       break;
+
+      case DATA_MULTIPLE_DIVISOR:
+       /* same number of low zero bits as divisor */
+       s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
+       refmpn_sub_1 (s[i].p, s[i].p, size,
+                     refmpn_mod_1 (s[i].p, size, divisor));
+       break;
+
+      case DATA_GCD:
+       /* s[1] no more bits than s[0] */
+       if (i == 1 && size2 == size)
+         s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
+
+       /* high limb non-zero */
+       s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
+
+       /* odd */
+       s[i].p[0] |= 1;
+       break;
+
+      case DATA_SRC0_ODD:
+       if (i == 0)
+         s[i].p[0] |= 1;
+       break;
+
+      case DATA_SRC1_ODD:
+       if (i == 1)
+         s[i].p[0] |= 1;
+       break;
+
+      case DATA_SRC1_HIGHBIT:
+       if (i == 1)
+         {
+           if (tr->size2)
+             s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
+           else
+             s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
+         }
+       break;
+
+      case DATA_SRC0_HIGHBIT:
+       if (i == 0)
+         {
+           s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
+         }
+       break;
+
+      case DATA_UDIV_QRNND:
+       s[i].p[1] %= divisor;
+       break;
+      }
+
+      mprotect_region (&s[i].region, PROT_READ);
+    }
+
+  for (i = 0; i < NUM_DESTS; i++)
+    {
+      if (! tr->dst[i])
+       continue;
+
+      if (tr->dst0_from_src1 && i==0)
+       {
+         mp_size_t  copy = MIN (d[0].size, SRC_SIZE(1));
+         mp_size_t  fill = MAX (0, d[0].size - copy);
+         MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
+         MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
+         refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
+         refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
+       }
+      else if (tr->dst_bytes[i])
+       {
+         memset (ref.d[i].p, 0xBA, d[i].size);
+         memset (fun.d[i].p, 0xBA, d[i].size);
+       }
+      else
+       {
+         refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
+         refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
+       }
+    }
+
+  for (i = 0; i < NUM_SOURCES; i++)
+    {
+      if (! tr->src[i])
+       continue;
+
+      if (ref.s[i].p != s[i].p)
+       {
+         refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
+         refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
+       }
+    }
+
+  if (option_print)
+    print_all();
+
+  if (tr->validate != NULL)
+    {
+      trap_location = TRAP_FUN;
+      call (&fun, choice->function);
+      trap_location = TRAP_NOWHERE;
+
+      if (! CALLING_CONVENTIONS_CHECK ())
+       {
+         print_all();
+         abort();
+       }
+
+      (*tr->validate) ();
+    }
+  else
+    {
+      trap_location = TRAP_REF;
+      call (&ref, tr->reference);
+      trap_location = TRAP_FUN;
+      call (&fun, choice->function);
+      trap_location = TRAP_NOWHERE;
+
+      if (! CALLING_CONVENTIONS_CHECK ())
+       {
+         print_all();
+         abort();
+       }
+
+      compare ();
+    }
+}
+
+
+#define SIZE_ITERATION                                          \
+  for (size = MAX3 (option_firstsize,                           \
+                   choice->minsize,                            \
+                   (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1);     \
+       size <= option_lastsize;                                 \
+       size++)
+
+#define SIZE2_FIRST                                     \
+  (tr->size2 == SIZE_2 ? 2                              \
+   : tr->size2 == SIZE_FRACTION ? option_firstsize2     \
+   : tr->size2 ?                                        \
+   MAX (choice->minsize, (option_firstsize2 != 0        \
+                         ? option_firstsize2 : 1))     \
+   : 0)
+
+#define SIZE2_LAST                                      \
+  (tr->size2 == SIZE_2 ? 2                              \
+   : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1      \
+   : tr->size2 ? size                                   \
+   : 0)
+
+#define SIZE2_ITERATION \
+  for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
+
+#define ALIGN_COUNT(cond)  ((cond) ? ALIGNMENTS : 1)
+#define ALIGN_ITERATION(w,n,cond) \
+  for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
+
+#define HIGH_LIMIT(cond)  ((cond) != 0)
+#define HIGH_COUNT(cond)  (HIGH_LIMIT (cond) + 1)
+#define HIGH_ITERATION(w,n,cond) \
+  for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
+
+#define SHIFT_LIMIT                                     \
+  ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
+
+#define SHIFT_ITERATION                                 \
+  for (shift = 1; shift <= SHIFT_LIMIT; shift++)
+
+
+void
+try_many (void)
+{
+  int   i;
+
+  {
+    unsigned long  total = 1;
+
+    total *= option_repetitions;
+    total *= option_lastsize;
+    if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
+    else if (tr->size2)             total *= (option_lastsize+1)/2;
+
+    total *= SHIFT_LIMIT;
+    total *= MULTIPLIER_COUNT;
+    total *= DIVISOR_COUNT;
+    total *= CARRY_COUNT;
+    total *= T_RAND_COUNT;
+
+    total *= HIGH_COUNT (tr->dst[0]);
+    total *= HIGH_COUNT (tr->dst[1]);
+    total *= HIGH_COUNT (tr->src[0]);
+    total *= HIGH_COUNT (tr->src[1]);
+
+    total *= ALIGN_COUNT (tr->dst[0]);
+    total *= ALIGN_COUNT (tr->dst[1]);
+    total *= ALIGN_COUNT (tr->src[0]);
+    total *= ALIGN_COUNT (tr->src[1]);
+
+    total *= OVERLAP_COUNT;
+
+    printf ("%s %lu\n", choice->name, total);
+  }
+
+  spinner_count = 0;
+
+  for (i = 0; i < option_repetitions; i++)
+    SIZE_ITERATION
+      SIZE2_ITERATION
+
+      SHIFT_ITERATION
+      MULTIPLIER_ITERATION
+      DIVISOR_ITERATION
+      CARRY_ITERATION /* must be after divisor */
+      T_RAND_ITERATION
+
+      HIGH_ITERATION(d,0, tr->dst[0])
+      HIGH_ITERATION(d,1, tr->dst[1])
+      HIGH_ITERATION(s,0, tr->src[0])
+      HIGH_ITERATION(s,1, tr->src[1])
+
+      ALIGN_ITERATION(d,0, tr->dst[0])
+      ALIGN_ITERATION(d,1, tr->dst[1])
+      ALIGN_ITERATION(s,0, tr->src[0])
+      ALIGN_ITERATION(s,1, tr->src[1])
+
+      OVERLAP_ITERATION
+      try_one();
+
+  printf("\n");
+}
+
+
+/* Usually print_all() doesn't show much, but it might give a hint as to
+   where the function was up to when it died. */
+void
+trap (int sig)
+{
+  const char *name = "noname";
+
+  switch (sig) {
+  case SIGILL:  name = "SIGILL";  break;
+#ifdef SIGBUS
+  case SIGBUS:  name = "SIGBUS";  break;
+#endif
+  case SIGSEGV: name = "SIGSEGV"; break;
+  case SIGFPE:  name = "SIGFPE";  break;
+  }
+
+  printf ("\n\nSIGNAL TRAP: %s\n", name);
+
+  switch (trap_location) {
+  case TRAP_REF:
+    printf ("  in reference function: %s\n", tr->reference_name);
+    break;
+  case TRAP_FUN:
+    printf ("  in test function: %s\n", choice->name);
+    print_all ();
+    break;
+  case TRAP_SETUPS:
+    printf ("  in parameter setups\n");
+    print_all ();
+    break;
+  default:
+    printf ("  somewhere unknown\n");
+    break;
+  }
+  exit (1);
+}
+
+
+void
+try_init (void)
+{
+#if HAVE_GETPAGESIZE
+  /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
+     know _SC_PAGESIZE. */
+  pagesize = getpagesize ();
+#else
+#if HAVE_SYSCONF
+  if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
+    {
+      /* According to the linux man page, sysconf doesn't set errno */
+      fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
+      exit (1);
+    }
+#else
+Error, error, cannot get page size
+#endif
+#endif
+
+  printf ("pagesize is 0x%lX bytes\n", pagesize);
+
+  signal (SIGILL,  trap);
+#ifdef SIGBUS
+  signal (SIGBUS,  trap);
+#endif
+  signal (SIGSEGV, trap);
+  signal (SIGFPE,  trap);
+
+  {
+    int  i;
+
+    for (i = 0; i < NUM_SOURCES; i++)
+      {
+       malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
+       printf ("s[%d] %p to %p (0x%lX bytes)\n",
+               i, (void *) (s[i].region.ptr),
+               (void *) (s[i].region.ptr + s[i].region.size),
+               (long) s[i].region.size * BYTES_PER_MP_LIMB);
+      }
+
+#define INIT_EACH(e,es)                                                 \
+    for (i = 0; i < NUM_DESTS; i++)                                     \
+      {                                                                 \
+       malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
+       printf ("%s d[%d] %p to %p (0x%lX bytes)\n",                    \
+               es, i, (void *) (e.d[i].region.ptr),                    \
+               (void *)  (e.d[i].region.ptr + e.d[i].region.size),     \
+               (long) e.d[i].region.size * BYTES_PER_MP_LIMB);         \
+      }
+
+    INIT_EACH(ref, "ref");
+    INIT_EACH(fun, "fun");
+  }
+}
+
+int
+strmatch_wild (const char *pattern, const char *str)
+{
+  size_t  plen, slen;
+
+  /* wildcard at start */
+  if (pattern[0] == '*')
+    {
+      pattern++;
+      plen = strlen (pattern);
+      slen = strlen (str);
+      return (plen == 0
+             || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
+    }
+
+  /* wildcard at end */
+  plen = strlen (pattern);
+  if (plen >= 1 && pattern[plen-1] == '*')
+    return (memcmp (pattern, str, plen-1) == 0);
+
+  /* no wildcards */
+  return (strcmp (pattern, str) == 0);
+}
+
+void
+try_name (const char *name)
+{
+  int  found = 0;
+  int  i;
+
+  for (i = 0; i < numberof (choice_array); i++)
+    {
+      if (strmatch_wild (name, choice_array[i].name))
+       {
+         choice = &choice_array[i];
+         tr = &param[choice->type];
+         try_many ();
+         found = 1;
+       }
+    }
+
+  if (!found)
+    {
+      printf ("%s unknown\n", name);
+      /* exit (1); */
+    }
+}
+
+
+void
+usage (const char *prog)
+{
+  int  col = 0;
+  int  i;
+
+  printf ("Usage: %s [options] function...\n", prog);
+  printf ("    -1        use limb data 1,2,3,etc\n");
+  printf ("    -9        use limb data all 0xFF..FFs\n");
+  printf ("    -a zeros  use limb data all zeros\n");
+  printf ("    -a ffs    use limb data all 0xFF..FFs (same as -9)\n");
+  printf ("    -a 2fd    use data 0x2FFF...FFFD\n");
+  printf ("    -p        print each case tried (try this if seg faulting)\n");
+  printf ("    -R        seed random numbers from time()\n");
+  printf ("    -r reps   set repetitions (default %d)\n", DEFAULT_REPETITIONS);
+  printf ("    -s size   starting size to test\n");
+  printf ("    -S size2  starting size2 to test\n");
+  printf ("    -s s1-s2  range of sizes to test\n");
+  printf ("    -W        don't show the spinner (use this in gdb)\n");
+  printf ("    -z        disable mprotect() redzones\n");
+  printf ("Default data is refmpn_random() and refmpn_random2().\n");
+  printf ("\n");
+  printf ("Functions that can be tested:\n");
+
+  for (i = 0; i < numberof (choice_array); i++)
+    {
+      if (col + 1 + strlen (choice_array[i].name) > 79)
+       {
+         printf ("\n");
+         col = 0;
+       }
+      printf (" %s", choice_array[i].name);
+      col += 1 + strlen (choice_array[i].name);
+    }
+  printf ("\n");
+
+  exit(1);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int  i;
+
+  /* unbuffered output */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  /* default trace in hex, and in upper-case so can paste into bc */
+  mp_trace_base = -16;
+
+  param_init ();
+
+  {
+    unsigned long  seed = 123;
+    int   opt;
+
+    while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
+      {
+       switch (opt) {
+       case '1':
+         /* use limb data values 1, 2, 3, ... etc */
+         option_data = DATA_SEQ;
+         break;
+       case '9':
+         /* use limb data values 0xFFF...FFF always */
+         option_data = DATA_FFS;
+         break;
+       case 'a':
+         if (strcmp (optarg, "zeros") == 0)     option_data = DATA_ZEROS;
+         else if (strcmp (optarg, "seq") == 0)  option_data = DATA_SEQ;
+         else if (strcmp (optarg, "ffs") == 0)  option_data = DATA_FFS;
+         else if (strcmp (optarg, "2fd") == 0)  option_data = DATA_2FD;
+         else
+           {
+             fprintf (stderr, "unrecognised data option: %s\n", optarg);
+             exit (1);
+           }
+         break;
+       case 'b':
+         mp_trace_base = atoi (optarg);
+         break;
+       case 'E':
+         /* re-seed */
+         sscanf (optarg, "%lu", &seed);
+         printf ("Re-seeding with %lu\n", seed);
+         break;
+       case 'p':
+         option_print = 1;
+         break;
+       case 'R':
+         /* randomize */
+         seed = time (NULL);
+         printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
+         break;
+       case 'r':
+         option_repetitions = atoi (optarg);
+         break;
+       case 's':
+         {
+           char  *p;
+           option_firstsize = strtol (optarg, 0, 0);
+           if ((p = strchr (optarg, '-')) != NULL)
+             option_lastsize = strtol (p+1, 0, 0);
+         }
+         break;
+       case 'S':
+         /* -S <size> sets the starting size for the second of a two size
+            routine (like mpn_mul_basecase) */
+         option_firstsize2 = strtol (optarg, 0, 0);
+         break;
+       case 'W':
+         /* use this when running in the debugger */
+         option_spinner = 0;
+         break;
+       case 'z':
+         /* disable redzones */
+         option_redzones = 0;
+         break;
+       case '?':
+         usage (argv[0]);
+         break;
+       }
+      }
+
+    gmp_randinit_default (__gmp_rands);
+    __gmp_rands_initialized = 1;
+    gmp_randseed_ui (__gmp_rands, seed);
+  }
+
+  try_init();
+
+  if (argc <= optind)
+    usage (argv[0]);
+
+  for (i = optind; i < argc; i++)
+    try_name (argv[i]);
+
+  return 0;
+}
diff --git a/tests/devel/tst-addsub.c b/tests/devel/tst-addsub.c

new file mode 100644 (file)

index 0000000..5b819e9
--- /dev/null
+++ b/tests/devel/tst-addsub.c
@@ -0,0 +1,98 @@
+/* Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define ADD 1
+#define SUB 2
+
+#ifndef METHOD
+#define METHOD ADD
+#endif
+
+#if METHOD == ADD
+#define REFCALL refmpn_add_n
+#define TESTCALL mpn_add_n
+#endif
+
+#if METHOD == SUB
+#define REFCALL refmpn_sub_n
+#define TESTCALL mpn_sub_n
+#endif
+
+#define SIZE 100
+
+int
+main (int argc, char **argv)
+{
+  mp_size_t alloc_size, max_size, size, i, cumul_size;
+  mp_ptr s1, s2, dx, dy;
+  int s1_align, s2_align, d_align;
+  long pass, n_passes;
+  mp_limb_t cx, cy;
+
+  max_size = SIZE;
+  n_passes = 1000000;
+
+  argc--; argv++;
+  if (argc)
+    {
+      max_size = atol (*argv);
+      argc--; argv++;
+    }
+
+  alloc_size = max_size + 32;
+  s1 = malloc (alloc_size * BYTES_PER_MP_LIMB);
+  s2 = malloc (alloc_size * BYTES_PER_MP_LIMB);
+  dx = malloc (alloc_size * BYTES_PER_MP_LIMB);
+  dy = malloc (alloc_size * BYTES_PER_MP_LIMB);
+
+  cumul_size = 0;
+  for (pass = 0; pass < n_passes; pass++)
+    {
+      size = random () % max_size + 1;
+
+      cumul_size += size;
+      if (cumul_size >= 1000000)
+       {
+         cumul_size -= 1000000;
+         printf ("\r%ld", pass); fflush (stdout);
+       }
+      s1_align = random () % 32;
+      s2_align = random () % 32;
+      d_align = random () % 32;
+
+      mpn_random2 (s1 + s1_align, size);
+      mpn_random2 (s2 + s2_align, size);
+
+      for (i = 0; i < alloc_size; i++)
+       dx[i] = dy[i] = i + 0x9876500;
+
+      cx = TESTCALL (dx + d_align, s1 + s1_align, s2 + s2_align, size);
+      cy = REFCALL (dy + d_align, s1 + s1_align, s2 + s2_align, size);
+
+      if (cx != cy || mpn_cmp (dx, dy, alloc_size) != 0)
+       abort ();
+    }
+
+  printf ("%ld passes OK\n", n_passes);
+  exit (0);
+}
diff --git a/tests/memory.c b/tests/memory.c

new file mode 100644 (file)

index 0000000..ea12381
--- /dev/null
+++ b/tests/memory.c
@@ -0,0 +1,240 @@
+/* Memory allocation used during tests.
+
+Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>            /* for abort */
+#include <string.h>            /* for memcpy, memcmp */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if GMP_LIMB_BITS == 64
+#define PATTERN1 CNST_LIMB(0xcafebabedeadbeef)
+#define PATTERN2 CNST_LIMB(0xabacadabaedeedab)
+#else
+#define PATTERN1 CNST_LIMB(0xcafebabe)
+#define PATTERN2 CNST_LIMB(0xdeadbeef)
+#endif
+
+/* Each block allocated is a separate malloc, for the benefit of a redzoning
+   malloc debugger during development or when bug hunting.
+
+   Sizes passed when reallocating or freeing are checked (the default
+   routines don't care about these).
+
+   Memory leaks are checked by requiring that all blocks have been freed
+   when tests_memory_end() is called.  Test programs must be sure to have
+   "clear"s for all temporary variables used.  */
+
+
+struct header {
+  void           *ptr;
+  size_t         size;
+  struct header  *next;
+};
+
+struct header  *tests_memory_list = NULL;
+
+/* Return a pointer to a pointer to the found block (so it can be updated
+   when unlinking). */
+struct header **
+tests_memory_find (void *ptr)
+{
+  struct header  **hp;
+
+  for (hp = &tests_memory_list; *hp != NULL; hp = &((*hp)->next))
+    if ((*hp)->ptr == ptr)
+      return hp;
+
+  return NULL;
+}
+
+int
+tests_memory_valid (void *ptr)
+{
+  return (tests_memory_find (ptr) != NULL);
+}
+
+void *
+tests_allocate (size_t size)
+{
+  struct header  *h;
+  void *rptr, *ptr;
+  mp_limb_t PATTERN2_var;
+
+  if (size == 0)
+    {
+      fprintf (stderr, "tests_allocate(): attempt to allocate 0 bytes\n");
+      abort ();
+    }
+
+  h = (struct header *) __gmp_default_allocate (sizeof (*h));
+  h->next = tests_memory_list;
+  tests_memory_list = h;
+
+  rptr = __gmp_default_allocate (size + 2 * sizeof (mp_limb_t));
+  ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
+
+  *((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+    = PATTERN1 - ((mp_limb_t) ptr);
+  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+  memcpy ((void *) ((gmp_intptr_t) ptr + size), &PATTERN2_var, sizeof (mp_limb_t));
+
+  h->size = size;
+  h->ptr = ptr;
+  return h->ptr;
+}
+
+void *
+tests_reallocate (void *ptr, size_t old_size, size_t new_size)
+{
+  struct header  **hp, *h;
+  void *rptr;
+  mp_limb_t PATTERN2_var;
+
+  if (new_size == 0)
+    {
+      fprintf (stderr, "tests_reallocate(): attempt to reallocate %p to 0 bytes\n",
+              ptr);
+      abort ();
+    }
+
+  hp = tests_memory_find (ptr);
+  if (hp == NULL)
+    {
+      fprintf (stderr, "tests_reallocate(): attempt to reallocate bad pointer %p\n",
+              ptr);
+      abort ();
+    }
+  h = *hp;
+
+  if (h->size != old_size)
+    {
+      fprintf (stderr, "tests_reallocate(): bad old size %lu, should be %lu\n",
+              (unsigned long) old_size, (unsigned long) h->size);
+      abort ();
+    }
+
+  if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+      != PATTERN1 - ((mp_limb_t) ptr))
+    {
+      fprintf (stderr, "in realloc: redzone clobbered before block\n");
+      abort ();
+    }
+  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+  if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
+    {
+      fprintf (stderr, "in realloc: redzone clobbered after block\n");
+      abort ();
+    }
+
+  rptr = __gmp_default_reallocate ((void *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)),
+                                old_size + 2 * sizeof (mp_limb_t),
+                                new_size + 2 * sizeof (mp_limb_t));
+  ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
+
+  *((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+    = PATTERN1 - ((mp_limb_t) ptr);
+  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+  memcpy ((void *) ((gmp_intptr_t) ptr + new_size), &PATTERN2_var, sizeof (mp_limb_t));
+
+  h->size = new_size;
+  h->ptr = ptr;
+  return h->ptr;
+}
+
+struct header **
+tests_free_find (void *ptr)
+{
+  struct header  **hp = tests_memory_find (ptr);
+  if (hp == NULL)
+    {
+      fprintf (stderr, "tests_free(): attempt to free bad pointer %p\n",
+              ptr);
+      abort ();
+    }
+  return hp;
+}
+
+void
+tests_free_nosize (void *ptr)
+{
+  struct header  **hp = tests_free_find (ptr);
+  struct header  *h = *hp;
+  mp_limb_t PATTERN2_var;
+
+  *hp = h->next;  /* unlink */
+
+  if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
+      != PATTERN1 - ((mp_limb_t) ptr))
+    {
+      fprintf (stderr, "in free: redzone clobbered before block\n");
+      abort ();
+    }
+  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+  if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
+    {
+      fprintf (stderr, "in free: redzone clobbered after block\n");
+      abort ();
+    }
+
+  __gmp_default_free ((void *) ((gmp_intptr_t) ptr - sizeof(mp_limb_t)),
+                     h->size + 2 * sizeof (mp_limb_t));
+  __gmp_default_free (h, sizeof (*h));
+}
+
+void
+tests_free (void *ptr, size_t size)
+{
+  struct header  **hp = tests_free_find (ptr);
+  struct header  *h = *hp;
+
+  if (h->size != size)
+    {
+      fprintf (stderr, "tests_free(): bad size %lu, should be %lu\n",
+              (unsigned long) size, (unsigned long) h->size);
+      abort ();
+    }
+
+  tests_free_nosize (ptr);
+}
+
+void
+tests_memory_start (void)
+{
+  mp_set_memory_functions (tests_allocate, tests_reallocate, tests_free);
+}
+
+void
+tests_memory_end (void)
+{
+  if (tests_memory_list != NULL)
+    {
+      struct header  *h;
+      unsigned  count;
+
+      fprintf (stderr, "tests_memory_end(): not all memory freed\n");
+
+      count = 0;
+      for (h = tests_memory_list; h != NULL; h = h->next)
+       count++;
+
+      fprintf (stderr, "    %u blocks remaining\n", count);
+      abort ();
+    }
+}
diff --git a/tests/misc.c b/tests/misc.c

new file mode 100644 (file)

index 0000000..3ad27e5
--- /dev/null
+++ b/tests/misc.c
@@ -0,0 +1,565 @@
+/* Miscellaneous test program support routines.
+
+Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <ctype.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>     /* for getenv */
+#include <string.h>
+
+#if HAVE_FLOAT_H
+#include <float.h>      /* for DBL_MANT_DIG */
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>  /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* The various tests setups and final checks, collected up together. */
+void
+tests_start (void)
+{
+  /* don't buffer, so output is not lost if a test causes a segv etc */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  tests_memory_start ();
+  tests_rand_start ();
+}
+void
+tests_end (void)
+{
+  tests_rand_end ();
+  tests_memory_end ();
+}
+
+
+void
+tests_rand_start (void)
+{
+  gmp_randstate_ptr  rands;
+  char           *perform_seed;
+  unsigned long  seed;
+
+  if (__gmp_rands_initialized)
+    {
+      printf ("Please let tests_start() initialize the global __gmp_rands.\n");
+      printf ("ie. ensure that function is called before the first use of RANDS.\n");
+      abort ();
+    }
+
+  gmp_randinit_default (__gmp_rands);
+  __gmp_rands_initialized = 1;
+  rands = __gmp_rands;
+
+  perform_seed = getenv ("GMP_CHECK_RANDOMIZE");
+  if (perform_seed != NULL)
+    {
+#ifdef HAVE_STRTOUL
+      seed = strtoul (perform_seed, 0, 0);
+#else
+      /* This will not work right for seeds >= 2^31 on 64-bit machines.
+        Perhaps use atol unconditionally?  Is that ubiquitous?  */
+      seed = atoi (perform_seed);
+#endif
+      if (! (seed == 0 || seed == 1))
+        {
+          printf ("Re-seeding with GMP_CHECK_RANDOMIZE=%lu\n", seed);
+          gmp_randseed_ui (rands, seed);
+        }
+      else
+        {
+#if HAVE_GETTIMEOFDAY
+          struct timeval  tv;
+          gettimeofday (&tv, NULL);
+          seed = tv.tv_sec ^ (tv.tv_usec << 12);
+         seed &= 0xffffffff;
+#else
+          time_t  tv;
+          time (&tv);
+          seed = tv;
+#endif
+          gmp_randseed_ui (rands, seed);
+          printf ("Seed GMP_CHECK_RANDOMIZE=%lu (include this in bug reports)\n", seed);
+        }
+      fflush (stdout);
+    }
+}
+void
+tests_rand_end (void)
+{
+  RANDS_CLEAR ();
+}
+
+
+/* Only used if CPU calling conventions checking is available. */
+mp_limb_t (*calling_conventions_function) __GMP_PROTO ((ANYARGS));
+
+
+/* Return p advanced to the next multiple of "align" bytes.  "align" must be
+   a power of 2.  Care is taken not to assume sizeof(int)==sizeof(pointer).
+   Using "unsigned long" avoids a warning on hpux.  */
+void *
+align_pointer (void *p, size_t align)
+{
+  gmp_intptr_t d;
+  d = ((gmp_intptr_t) p) & (align-1);
+  d = (d != 0 ? align-d : 0);
+  return (void *) (((char *) p) + d);
+}
+
+
+/* Note that memory allocated with this function can never be freed, because
+   the start address of the block allocated is lost. */
+void *
+__gmp_allocate_func_aligned (size_t bytes, size_t align)
+{
+  return align_pointer ((*__gmp_allocate_func) (bytes + align-1), align);
+}
+
+
+void *
+__gmp_allocate_or_reallocate (void *ptr, size_t oldsize, size_t newsize)
+{
+  if (ptr == NULL)
+    return (*__gmp_allocate_func) (newsize);
+  else
+    return (*__gmp_reallocate_func) (ptr, oldsize, newsize);
+}
+
+char *
+__gmp_allocate_strdup (const char *s)
+{
+  size_t  len;
+  char    *t;
+  len = strlen (s);
+  t = (*__gmp_allocate_func) (len+1);
+  memcpy (t, s, len+1);
+  return t;
+}
+
+
+char *
+strtoupper (char *s_orig)
+{
+  char  *s;
+  for (s = s_orig; *s != '\0'; s++)
+    if (isascii (*s))
+      *s = toupper (*s);
+  return s_orig;
+}
+
+
+void
+mpz_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)
+{
+  ASSERT (size >= 0);
+  MPN_NORMALIZE (p, size);
+  MPZ_REALLOC (z, size);
+  MPN_COPY (PTR(z), p, size);
+  SIZ(z) = size;
+}
+
+void
+mpz_init_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)
+{
+  ASSERT (size >= 0);
+
+  MPN_NORMALIZE (p, size);
+  ALLOC(z) = MAX (size, 1);
+  PTR(z) = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC(z));
+  SIZ(z) = size;
+  MPN_COPY (PTR(z), p, size);
+}
+
+
+/* Find least significant limb position where p1,size and p2,size differ.  */
+mp_size_t
+mpn_diff_lowest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = 0; i < size; i++)
+    if (p1[i] != p2[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+/* Find most significant limb position where p1,size and p2,size differ.  */
+mp_size_t
+mpn_diff_highest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = size-1; i >= 0; i--)
+    if (p1[i] != p2[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+/* Find least significant byte position where p1,size and p2,size differ.  */
+mp_size_t
+byte_diff_lowest (const void *p1, const void *p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = 0; i < size; i++)
+    if (((const char *) p1)[i] != ((const char *) p2)[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+/* Find most significant limb position where p1,size and p2,size differ.  */
+mp_size_t
+byte_diff_highest (const void *p1, const void *p2, mp_size_t size)
+{
+  mp_size_t  i;
+
+  for (i = size-1; i >= 0; i--)
+    if (((const char *) p1)[i] != ((const char *) p2)[i])
+      return i;
+
+  /* no differences */
+  return -1;
+}
+
+
+void
+mpz_set_str_or_abort (mpz_ptr z, const char *str, int base)
+{
+  if (mpz_set_str (z, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR: mpz_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+void
+mpq_set_str_or_abort (mpq_ptr q, const char *str, int base)
+{
+  if (mpq_set_str (q, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR: mpq_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+void
+mpf_set_str_or_abort (mpf_ptr f, const char *str, int base)
+{
+  if (mpf_set_str (f, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR mpf_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+
+/* Whether the absolute value of z is a power of 2. */
+int
+mpz_pow2abs_p (mpz_srcptr z)
+{
+  mp_size_t  size, i;
+  mp_srcptr  ptr;
+
+  size = SIZ (z);
+  if (size == 0)
+    return 0;  /* zero is not a power of 2 */
+  size = ABS (size);
+
+  ptr = PTR (z);
+  for (i = 0; i < size-1; i++)
+    if (ptr[i] != 0)
+      return 0;  /* non-zero low limb means not a power of 2 */
+
+  return POW2_P (ptr[i]);  /* high limb power of 2 */
+}
+
+
+/* Exponentially distributed between 0 and 2^nbits-1, meaning the number of
+   bits in the result is uniformly distributed between 0 and nbits-1.
+
+   FIXME: This is not a proper exponential distribution, since the
+   probability function will have a stepped shape due to using a uniform
+   distribution after choosing how many bits.  */
+
+void
+mpz_erandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_urandomb (rop, rstate, gmp_urandomm_ui (rstate, nbits));
+}
+
+void
+mpz_erandomb_nonzero (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_erandomb (rop, rstate, nbits);
+  if (mpz_sgn (rop) == 0)
+    mpz_set_ui (rop, 1L);
+}
+
+void
+mpz_errandomb (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_rrandomb (rop, rstate, gmp_urandomm_ui (rstate, nbits));
+}
+
+void
+mpz_errandomb_nonzero (mpz_ptr rop, gmp_randstate_t rstate, unsigned long nbits)
+{
+  mpz_errandomb (rop, rstate, nbits);
+  if (mpz_sgn (rop) == 0)
+    mpz_set_ui (rop, 1L);
+}
+
+void
+mpz_negrandom (mpz_ptr rop, gmp_randstate_t rstate)
+{
+  mp_limb_t  n;
+  _gmp_rand (&n, rstate, 1);
+  if (n != 0)
+    mpz_neg (rop, rop);
+}
+
+mp_limb_t
+urandom (void)
+{
+#if GMP_NAIL_BITS == 0
+  mp_limb_t  n;
+  _gmp_rand (&n, RANDS, GMP_LIMB_BITS);
+  return n;
+#else
+  mp_limb_t n[2];
+  _gmp_rand (n, RANDS, GMP_LIMB_BITS);
+  return n[0] + (n[1] << GMP_NUMB_BITS);
+#endif
+}
+
+
+/* Call (*func)() with various random number generators. */
+void
+call_rand_algs (void (*func) __GMP_PROTO ((const char *, gmp_randstate_ptr)))
+{
+  gmp_randstate_t  rstate;
+  mpz_t            a;
+
+  mpz_init (a);
+
+  gmp_randinit_default (rstate);
+  (*func) ("gmp_randinit_default", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_mt (rstate);
+  (*func) ("gmp_randinit_mt", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_lc_2exp_size (rstate, 8L);
+  (*func) ("gmp_randinit_lc_2exp_size 8", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_lc_2exp_size (rstate, 16L);
+  (*func) ("gmp_randinit_lc_2exp_size 16", rstate);
+  gmp_randclear (rstate);
+
+  gmp_randinit_lc_2exp_size (rstate, 128L);
+  (*func) ("gmp_randinit_lc_2exp_size 128", rstate);
+  gmp_randclear (rstate);
+
+  /* degenerate always zeros */
+  mpz_set_ui (a, 0L);
+  gmp_randinit_lc_2exp (rstate, a, 0L, 8L);
+  (*func) ("gmp_randinit_lc_2exp a=0 c=0 m=8", rstate);
+  gmp_randclear (rstate);
+
+  /* degenerate always FFs */
+  mpz_set_ui (a, 0L);
+  gmp_randinit_lc_2exp (rstate, a, 0xFFL, 8L);
+  (*func) ("gmp_randinit_lc_2exp a=0 c=0xFF m=8", rstate);
+  gmp_randclear (rstate);
+
+  mpz_clear (a);
+}
+
+
+/* Return +infinity if available, or 0 if not.
+   We don't want to use libm, so INFINITY or other system values are not
+   used here.  */
+double
+tests_infinity_d (void)
+{
+#if _GMP_IEEE_FLOATS
+  union ieee_double_extract x;
+  x.s.exp = 2047;
+  x.s.manl = 0;
+  x.s.manh = 0;
+  x.s.sig = 0;
+  return x.d;
+#else
+  return 0;
+#endif
+}
+
+
+/* Return non-zero if d is an infinity (either positive or negative).
+   Don't want libm, so don't use isinf() or other system tests.  */
+int
+tests_isinf (double d)
+{
+#if _GMP_IEEE_FLOATS
+  union ieee_double_extract x;
+  x.d = d;
+  return (x.s.exp == 2047 && x.s.manl == 0 && x.s.manh == 0);
+#else
+  return 0;
+#endif
+}
+
+
+/* Set the hardware floating point rounding mode.  Same mode values as mpfr,
+   namely 0=nearest, 1=tozero, 2=up, 3=down.  Return 1 if successful, 0 if
+   not.  */
+int
+tests_hardware_setround (int mode)
+{
+#if HAVE_HOST_CPU_FAMILY_x86
+  int  rc;
+  switch (mode) {
+  case 0: rc = 0; break;  /* nearest */
+  case 1: rc = 3; break;  /* tozero  */
+  case 2: rc = 2; break;  /* up      */
+  case 3: rc = 1; break;  /* down    */
+  default:
+    return 0;
+  }
+  x86_fldcw ((x86_fstcw () & ~0xC00) | (rc << 10));
+  return 1;
+#endif
+
+  return 0;
+}
+
+/* Return the hardware floating point rounding mode, or -1 if unknown. */
+int
+tests_hardware_getround (void)
+{
+#if HAVE_HOST_CPU_FAMILY_x86
+  switch ((x86_fstcw () & ~0xC00) >> 10) {
+  case 0: return 0; break;  /* nearest */
+  case 1: return 3; break;  /* down    */
+  case 2: return 2; break;  /* up      */
+  case 3: return 1; break;  /* tozero  */
+  }
+#endif
+
+  return -1;
+}
+
+
+/* tests_dbl_mant_bits() determines by experiment the number of bits in the
+   mantissa of a "double".  If it's not possible to find a value (perhaps
+   due to the compiler optimizing too aggressively), then return 0.
+
+   This code is used rather than DBL_MANT_DIG from <float.h> since ancient
+   systems like SunOS don't have that file, and since one GNU/Linux ARM
+   system was seen where the float emulation seemed to have only 32 working
+   bits, not the 53 float.h claimed.  */
+
+int
+tests_dbl_mant_bits (void)
+{
+  static int n = -1;
+  volatile double x, y, d;
+
+  if (n != -1)
+    return n;
+
+  n = 1;
+  x = 2.0;
+  for (;;)
+    {
+      /* see if 2^(n+1)+1 can be formed without rounding, if so then
+         continue, if not then "n" is the answer */
+      y = x + 1.0;
+      d = y - x;
+      if (d != 1.0)
+        {
+#if defined (DBL_MANT_DIG) && DBL_RADIX == 2
+          if (n != DBL_MANT_DIG)
+            printf ("Warning, tests_dbl_mant_bits got %d but DBL_MANT_DIG says %d\n", n, DBL_MANT_DIG);
+#endif
+          break;
+        }
+
+      x *= 2;
+      n++;
+
+      if (n > 1000)
+        {
+          printf ("Oops, tests_dbl_mant_bits can't determine mantissa size\n");
+          n = 0;
+          break;
+        }
+    }
+  return n;
+}
+
+
+/* See tests_setjmp_sigfpe in tests.h. */
+
+jmp_buf    tests_sigfpe_target;
+
+RETSIGTYPE
+tests_sigfpe_handler (int sig)
+{
+  longjmp (tests_sigfpe_target, 1);
+}
+
+void
+tests_sigfpe_done (void)
+{
+  signal (SIGFPE, SIG_DFL);
+}
diff --git a/tests/misc/Makefile.am b/tests/misc/Makefile.am

new file mode 100644 (file)

index 0000000..588652e
--- /dev/null
+++ b/tests/misc/Makefile.am
@@ -0,0 +1,33 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-printf t-scanf t-locale
+TESTS = $(check_PROGRAMS)
+
+# Temporary files used by the tests.  Removed automatically if the tests
+# pass, but ensure they're cleaned if they fail.
+#
+CLEANFILES = *.tmp
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/misc/Makefile.in b/tests/misc/Makefile.in

new file mode 100644 (file)

index 0000000..3ab448e
--- /dev/null
+++ b/tests/misc/Makefile.in
@@ -0,0 +1,652 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = t-printf$(EXEEXT) t-scanf$(EXEEXT) t-locale$(EXEEXT)
+subdir = tests/misc
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+t_locale_SOURCES = t-locale.c
+t_locale_OBJECTS = t-locale$U.$(OBJEXT)
+t_locale_LDADD = $(LDADD)
+t_locale_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_printf_SOURCES = t-printf.c
+t_printf_OBJECTS = t-printf$U.$(OBJEXT)
+t_printf_LDADD = $(LDADD)
+t_printf_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_scanf_SOURCES = t-scanf.c
+t_scanf_OBJECTS = t-scanf$U.$(OBJEXT)
+t_scanf_LDADD = $(LDADD)
+t_scanf_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = t-locale.c t-printf.c t-scanf.c
+DIST_SOURCES = t-locale.c t-printf.c t-scanf.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+TESTS = $(check_PROGRAMS)
+
+# Temporary files used by the tests.  Removed automatically if the tests
+# pass, but ensure they're cleaned if they fail.
+#
+CLEANFILES = *.tmp
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/misc/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/misc/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) 
+       @rm -f t-locale$(EXEEXT)
+       $(LINK) $(t_locale_OBJECTS) $(t_locale_LDADD) $(LIBS)
+t-printf$(EXEEXT): $(t_printf_OBJECTS) $(t_printf_DEPENDENCIES) 
+       @rm -f t-printf$(EXEEXT)
+       $(LINK) $(t_printf_OBJECTS) $(t_printf_LDADD) $(LIBS)
+t-scanf$(EXEEXT): $(t_scanf_OBJECTS) $(t_scanf_DEPENDENCIES) 
+       @rm -f t-scanf$(EXEEXT)
+       $(LINK) $(t_scanf_OBJECTS) $(t_scanf_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+t-locale_.c: t-locale.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-locale.c; then echo $(srcdir)/t-locale.c; else echo t-locale.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-printf_.c: t-printf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-printf.c; then echo $(srcdir)/t-printf.c; else echo t-printf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-scanf_.c: t-scanf.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scanf.c; then echo $(srcdir)/t-scanf.c; else echo t-scanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-locale_.$(OBJEXT) t-locale_.lo t-printf_.$(OBJEXT) t-printf_.lo \
+t-scanf_.$(OBJEXT) t-scanf_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/misc/t-locale.c b/tests/misc/t-locale.c

new file mode 100644 (file)

index 0000000..09819fa
--- /dev/null
+++ b/tests/misc/t-locale.c
@@ -0,0 +1,201 @@
+/* Test locale support, or attempt to do so.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_NL_TYPES_H
+#include <nl_types.h>  /* for nl_item (on netbsd 1.4.1 at least) */
+#endif
+
+#if HAVE_LANGINFO_H
+#include <langinfo.h>  /* for nl_langinfo */
+#endif
+
+#if HAVE_LOCALE_H
+#include <locale.h>    /* for lconv */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifdef __MINGW32__
+int
+main (void)
+{
+  exit (0);
+}
+#else
+
+char *decimal_point;
+
+/* Replace the libc localeconv with one we can manipulate. */
+#if HAVE_LOCALECONV
+struct lconv *
+localeconv (void)
+{
+  static struct lconv  l;
+  l.decimal_point = decimal_point;
+  return &l;
+}
+#endif
+
+/* Replace the libc nl_langinfo with one we can manipulate. */
+#if HAVE_NL_LANGINFO
+char *
+nl_langinfo (nl_item n)
+{
+#if defined (DECIMAL_POINT)
+  if (n == DECIMAL_POINT)
+    return decimal_point;
+#endif
+#if defined (RADIXCHAR)
+  if (n == RADIXCHAR)
+    return decimal_point;
+#endif
+  return "";
+}
+#endif
+
+void
+check_input (void)
+{
+  static char *point[] = {
+    ".", ",", "WU", "STR", "ZTV***"
+  };
+
+  static const struct {
+    const char  *str;
+    double      d;
+  } data[] = {
+
+    { "1%s",   1.0 },
+    { "1%s0",  1.0 },
+    { "1%s00", 1.0 },
+
+    { "%s5",    0.5 },
+    { "0%s5",   0.5 },
+    { "00%s5",  0.5 },
+    { "00%s50", 0.5 },
+
+    { "1%s5",    1.5 },
+    { "1%s5e1", 15.0 },
+  };
+
+  int     i, j, neg, ret;
+  char    str[128];
+  mpf_t   f;
+  double  d;
+
+  mpf_init (f);
+
+  for (i = 0; i < numberof (point); i++)
+    {
+      decimal_point = point[i];
+
+      for (neg = 0; neg <= 1; neg++)
+        {
+          for (j = 0; j < numberof (data); j++)
+            {
+              strcpy (str, neg ? "-" : "");
+              sprintf (str+strlen(str), data[j].str, decimal_point);
+
+              d = data[j].d;
+              if (neg)
+                d = -d;
+
+              mpf_set_d (f, 123.0);
+              if (mpf_set_str (f, str, 10) != 0)
+                {
+                  printf ("mpf_set_str error\n");
+                  printf ("  point  %s\n", decimal_point);
+                  printf ("  str    %s\n", str);
+                  abort ();
+                }
+              if (mpf_cmp_d (f, d) != 0)
+                {
+                  printf    ("mpf_set_str wrong result\n");
+                  printf    ("  point  %s\n", decimal_point);
+                  printf    ("  str    %s\n", str);
+                  mpf_trace ("  f", f);
+                  printf    ("  d=%g\n", d);
+                  abort ();
+                }
+
+              mpf_set_d (f, 123.0);
+              ret = gmp_sscanf (str, "%Ff", f);
+              if (ret != 1)
+                {
+                  printf ("gmp_sscanf wrong return value\n");
+                  printf ("  point  %s\n", decimal_point);
+                  printf ("  str    %s\n", str);
+                  printf ("  ret    %d\n", ret);
+                  abort ();
+                }
+              if (mpf_cmp_d (f, d) != 0)
+                {
+                  printf    ("gmp_sscanf wrong result\n");
+                  printf    ("  point  %s\n", decimal_point);
+                  printf    ("  str    %s\n", str);
+                  mpf_trace ("  f", f);
+                  printf    ("  d=%g\n", d);
+                  abort ();
+                }
+            }
+        }
+    }
+  mpf_clear (f);
+}
+
+int
+main (void)
+{
+  /* The localeconv replacement breaks printf "%lu" on SunOS 4, so we can't
+     print the seed in tests_rand_start().  Nothing random is used in this
+     program though, so just use the memory tests alone.  */
+  tests_memory_start ();
+
+  {
+    mpf_t  f;
+    char   buf[128];
+    mpf_init (f);
+    decimal_point = ",";
+    mpf_set_d (f, 1.5);
+    gmp_snprintf (buf, sizeof(buf), "%.1Ff", f);
+    mpf_clear (f);
+    if (strcmp (buf, "1,5") != 0)
+      {
+        printf ("Test skipped, replacing localeconv/nl_langinfo doesn't work\n");
+        goto done;
+      }
+  }
+
+  check_input ();
+
+ done:
+  tests_memory_end ();
+  exit (0);
+}
+#endif
diff --git a/tests/misc/t-printf.c b/tests/misc/t-printf.c

new file mode 100644 (file)

index 0000000..d6026b2
--- /dev/null
+++ b/tests/misc/t-printf.c
@@ -0,0 +1,979 @@
+/* Test gmp_printf and related functions.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage: t-printf [-s]
+
+   -s  Check the data against the system printf, where possible.  This is
+       only an option since we don't want to fail if the system printf is
+       faulty or strange.  */
+
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stddef.h>    /* for ptrdiff_t */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_OBSTACK_VPRINTF
+#define obstack_chunk_alloc tests_allocate
+#define obstack_chunk_free  tests_free_nosize
+#include <obstack.h>
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+int   option_check_printf = 0;
+
+
+#define CHECK_VFPRINTF_FILENAME  "t-printf.tmp"
+FILE  *check_vfprintf_fp;
+
+
+/* From any of the tests run here. */
+#define MAX_OUTPUT  1024
+
+
+void
+#if HAVE_STDARG
+check_plain (const char *want, const char *fmt_orig, ...)
+#else
+check_plain (va_alist)
+     va_dcl
+#endif
+{
+  char        got[MAX_OUTPUT];
+  int         got_len, want_len;
+  size_t      fmtsize;
+  char        *fmt, *q;
+  const char  *p;
+  va_list     ap;
+#if HAVE_STDARG
+  va_start (ap, fmt_orig);
+#else
+  const char  *want;
+  const char  *fmt_orig;
+  va_start (ap);
+  want = va_arg (ap, const char *);
+  fmt_orig = va_arg (ap, const char *);
+#endif
+
+  if (! option_check_printf)
+    return;
+
+  fmtsize = strlen (fmt_orig) + 1;
+  fmt = (*__gmp_allocate_func) (fmtsize);
+
+  for (p = fmt_orig, q = fmt; *p != '\0'; p++)
+    {
+      switch (*p) {
+      case 'a':
+      case 'A':
+       /* The exact value of the exponent isn't guaranteed in glibc, and it
+          and gmp_printf do slightly different things, so don't compare
+          directly. */
+       goto done;
+      case 'F':
+       if (p > fmt_orig && *(p-1) == '.')
+         goto done;  /* don't test the "all digits" cases */
+       /* discard 'F' type */
+       break;
+      case 'Z':
+       /* transmute */
+       *q++ = 'l';
+       break;
+      default:
+       *q++ = *p;
+       break;
+      }
+    }
+  *q = '\0';
+
+  want_len = strlen (want);
+  ASSERT_ALWAYS (want_len < sizeof(got));
+
+  got_len = vsprintf (got, fmt, ap);
+
+  if (got_len != want_len || strcmp (got, want) != 0)
+    {
+      printf ("wanted data doesn't match plain vsprintf\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+
+ done:
+  (*__gmp_free_func) (fmt, fmtsize);
+}
+
+void
+check_vsprintf (const char *want, const char *fmt, va_list ap)
+{
+  char  got[MAX_OUTPUT];
+  int   got_len, want_len;
+
+  want_len = strlen (want);
+  got_len = gmp_vsprintf (got, fmt, ap);
+
+  if (got_len != want_len || strcmp (got, want) != 0)
+    {
+      printf ("gmp_vsprintf wrong\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+}
+
+void
+check_vfprintf (const char *want, const char *fmt, va_list ap)
+{
+  char  got[MAX_OUTPUT];
+  int   got_len, want_len, fread_len;
+  long  ftell_len;
+
+  want_len = strlen (want);
+
+  rewind (check_vfprintf_fp);
+  got_len = gmp_vfprintf (check_vfprintf_fp, fmt, ap);
+  ASSERT_ALWAYS (got_len != -1);
+  ASSERT_ALWAYS (fflush (check_vfprintf_fp) == 0);
+
+  ftell_len = ftell (check_vfprintf_fp);
+  ASSERT_ALWAYS (ftell_len != -1);
+
+  rewind (check_vfprintf_fp);
+  ASSERT_ALWAYS (ftell_len <= sizeof(got));
+  fread_len = fread (got, 1, ftell_len, check_vfprintf_fp);
+
+  if (got_len != want_len
+      || ftell_len != want_len
+      || fread_len != want_len
+      || memcmp (got, want, want_len) != 0)
+    {
+      printf ("gmp_vfprintf wrong\n");
+      printf ("  fmt       |%s|\n", fmt);
+      printf ("  got       |%.*s|\n", fread_len, got);
+      printf ("  want      |%s|\n", want);
+      printf ("  got_len   %d\n", got_len);
+      printf ("  ftell_len %ld\n", ftell_len);
+      printf ("  fread_len %d\n", fread_len);
+      printf ("  want_len  %d\n", want_len);
+      abort ();
+    }
+}
+
+void
+check_vsnprintf (const char *want, const char *fmt, va_list ap)
+{
+  char    got[MAX_OUTPUT+1];
+  int     ret, got_len, want_len;
+  size_t  bufsize;
+
+  want_len = strlen (want);
+
+  bufsize = -1;
+  for (;;)
+    {
+      /* do 0 to 5, then want-5 to want+5 */
+      bufsize++;
+      if (bufsize > 5 && bufsize < want_len-5)
+       bufsize = want_len-5;
+      if (bufsize > want_len + 5)
+       break;
+      ASSERT_ALWAYS (bufsize+1 <= sizeof (got));
+
+      got[bufsize] = '!';
+      ret = gmp_vsnprintf (got, bufsize, fmt, ap);
+
+      got_len = MIN (MAX(1,bufsize)-1, want_len);
+
+      if (got[bufsize] != '!')
+       {
+         printf ("gmp_vsnprintf overwrote bufsize sentinel\n");
+         goto error;
+       }
+
+      if (ret != want_len)
+       {
+         printf ("gmp_vsnprintf return value wrong\n");
+         goto error;
+       }
+
+      if (bufsize > 0)
+       {
+         if (memcmp (got, want, got_len) != 0 || got[got_len] != '\0')
+           {
+             printf ("gmp_vsnprintf wrong result string\n");
+           error:
+             printf ("  fmt       |%s|\n", fmt);
+             printf ("  bufsize   %lu\n", (unsigned long) bufsize);
+             printf ("  got       |%s|\n", got);
+             printf ("  want      |%.*s|\n", got_len, want);
+             printf ("  want full |%s|\n", want);
+             printf ("  ret       %d\n", ret);
+             printf ("  want_len  %d\n", want_len);
+             abort ();
+           }
+       }
+    }
+}
+
+void
+check_vasprintf (const char *want, const char *fmt, va_list ap)
+{
+  char  *got;
+  int   got_len, want_len;
+
+  want_len = strlen (want);
+  got_len = gmp_vasprintf (&got, fmt, ap);
+
+  if (got_len != want_len || strcmp (got, want) != 0)
+    {
+      printf ("gmp_vasprintf wrong\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+  (*__gmp_free_func) (got, strlen(got)+1);
+}
+
+void
+check_obstack_vprintf (const char *want, const char *fmt, va_list ap)
+{
+#if HAVE_OBSTACK_VPRINTF
+  struct obstack  ob;
+  int   got_len, want_len, ob_len;
+  char  *got;
+
+  want_len = strlen (want);
+
+  obstack_init (&ob);
+  got_len = gmp_obstack_vprintf (&ob, fmt, ap);
+  got = obstack_base (&ob);
+  ob_len = obstack_object_size (&ob);
+
+  if (got_len != want_len
+      || ob_len != want_len
+      || memcmp (got, want, want_len) != 0)
+    {
+      printf ("gmp_obstack_vprintf wrong\n");
+      printf ("  fmt      |%s|\n", fmt);
+      printf ("  got      |%s|\n", got);
+      printf ("  want     |%s|\n", want);
+      printf ("  got_len  %d\n", got_len);
+      printf ("  ob_len   %d\n", ob_len);
+      printf ("  want_len %d\n", want_len);
+      abort ();
+    }
+  obstack_free (&ob, NULL);
+#endif
+}
+
+
+void
+#if HAVE_STDARG
+check_one (const char *want, const char *fmt, ...)
+#else
+check_one (va_alist)
+     va_dcl
+#endif
+{
+  va_list ap;
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  const char  *want;
+  const char  *fmt;
+  va_start (ap);
+  want = va_arg (ap, const char *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  /* simplest first */
+  check_vsprintf (want, fmt, ap);
+  check_vfprintf (want, fmt, ap);
+  check_vsnprintf (want, fmt, ap);
+  check_vasprintf (want, fmt, ap);
+  check_obstack_vprintf (want, fmt, ap);
+}
+
+
+#define hex_or_octal_p(fmt)             \
+  (strchr (fmt, 'x') != NULL            \
+   || strchr (fmt, 'X') != NULL         \
+   || strchr (fmt, 'o') != NULL)
+
+void
+check_z (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *z;
+    const char  *want;
+  } data[] = {
+    { "%Zd", "0",    "0" },
+    { "%Zd", "1",    "1" },
+    { "%Zd", "123",  "123" },
+    { "%Zd", "-1",   "-1" },
+    { "%Zd", "-123", "-123" },
+
+    { "%+Zd", "0",      "+0" },
+    { "%+Zd", "123",  "+123" },
+    { "%+Zd", "-123", "-123" },
+
+    { "%Zx",  "123",   "7b" },
+    { "%ZX",  "123",   "7B" },
+    { "%Zx", "-123",  "-7b" },
+    { "%ZX", "-123",  "-7B" },
+    { "%Zo",  "123",  "173" },
+    { "%Zo", "-123", "-173" },
+
+    { "%#Zx",    "0",     "0" },
+    { "%#ZX",    "0",     "0" },
+    { "%#Zx",  "123",  "0x7b" },
+    { "%#ZX",  "123",  "0X7B" },
+    { "%#Zx", "-123", "-0x7b" },
+    { "%#ZX", "-123", "-0X7B" },
+
+    { "%#Zo",    "0",     "0" },
+    { "%#Zo",  "123",  "0173" },
+    { "%#Zo", "-123", "-0173" },
+
+    { "%10Zd",      "0", "         0" },
+    { "%10Zd",    "123", "       123" },
+    { "%10Zd",   "-123", "      -123" },
+
+    { "%-10Zd",     "0", "0         " },
+    { "%-10Zd",   "123", "123       " },
+    { "%-10Zd",  "-123", "-123      " },
+
+    { "%+10Zd",   "123", "      +123" },
+    { "%+-10Zd",  "123", "+123      " },
+    { "%+10Zd",  "-123", "      -123" },
+    { "%+-10Zd", "-123", "-123      " },
+
+    { "%08Zd",    "0", "00000000" },
+    { "%08Zd",  "123", "00000123" },
+    { "%08Zd", "-123", "-0000123" },
+
+    { "%+08Zd",    "0", "+0000000" },
+    { "%+08Zd",  "123", "+0000123" },
+    { "%+08Zd", "-123", "-0000123" },
+
+    { "%#08Zx",    "0", "00000000" },
+    { "%#08Zx",  "123", "0x00007b" },
+    { "%#08Zx", "-123", "-0x0007b" },
+
+    { "%+#08Zx",    "0", "+0000000" },
+    { "%+#08Zx",  "123", "+0x0007b" },
+    { "%+#08Zx", "-123", "-0x0007b" },
+
+    { "%.0Zd", "0", "" },
+    { "%.1Zd", "0", "0" },
+    { "%.2Zd", "0", "00" },
+    { "%.3Zd", "0", "000" },
+  };
+
+  int        i, j;
+  mpz_t      z;
+  char       *nfmt;
+  mp_size_t  nsize, zeros;
+
+  mpz_init (z);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (z, data[i].z, 0);
+
+      /* don't try negatives or forced sign in hex or octal */
+      if (mpz_fits_slong_p (z)
+         && ! (hex_or_octal_p (data[i].fmt)
+               && (strchr (data[i].fmt, '+') != NULL || mpz_sgn(z) < 0)))
+       {
+         check_plain (data[i].want, data[i].fmt, mpz_get_si (z));
+       }
+
+      check_one (data[i].want, data[i].fmt, z);
+
+      /* Same again, with %N and possibly some high zero limbs */
+      nfmt = __gmp_allocate_strdup (data[i].fmt);
+      for (j = 0; nfmt[j] != '\0'; j++)
+       if (nfmt[j] == 'Z')
+         nfmt[j] = 'N';
+      for (zeros = 0; zeros <= 3; zeros++)
+       {
+         nsize = ABSIZ(z)+zeros;
+         MPZ_REALLOC (z, nsize);
+         nsize = (SIZ(z) >= 0 ? nsize : -nsize);
+         refmpn_zero (PTR(z)+ABSIZ(z), zeros);
+         check_one (data[i].want, nfmt, PTR(z), nsize);
+       }
+      __gmp_free_func (nfmt, strlen(nfmt)+1);
+    }
+
+  mpz_clear (z);
+}
+
+void
+check_q (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *q;
+    const char  *want;
+  } data[] = {
+    { "%Qd",    "0",    "0" },
+    { "%Qd",    "1",    "1" },
+    { "%Qd",  "123",  "123" },
+    { "%Qd",   "-1",   "-1" },
+    { "%Qd", "-123", "-123" },
+    { "%Qd",  "3/2",  "3/2" },
+    { "%Qd", "-3/2", "-3/2" },
+
+    { "%+Qd", "0",      "+0" },
+    { "%+Qd", "123",  "+123" },
+    { "%+Qd", "-123", "-123" },
+    { "%+Qd", "5/8",  "+5/8" },
+    { "%+Qd", "-5/8", "-5/8" },
+
+    { "%Qx",  "123",   "7b" },
+    { "%QX",  "123",   "7B" },
+    { "%Qx",  "15/16", "f/10" },
+    { "%QX",  "15/16", "F/10" },
+    { "%Qx", "-123",  "-7b" },
+    { "%QX", "-123",  "-7B" },
+    { "%Qx", "-15/16", "-f/10" },
+    { "%QX", "-15/16", "-F/10" },
+    { "%Qo",  "123",  "173" },
+    { "%Qo", "-123", "-173" },
+    { "%Qo",  "16/17",  "20/21" },
+    { "%Qo", "-16/17", "-20/21" },
+
+    { "%#Qx",    "0",     "0" },
+    { "%#QX",    "0",     "0" },
+    { "%#Qx",  "123",  "0x7b" },
+    { "%#QX",  "123",  "0X7B" },
+    { "%#Qx",  "5/8",  "0x5/0x8" },
+    { "%#QX",  "5/8",  "0X5/0X8" },
+    { "%#Qx", "-123", "-0x7b" },
+    { "%#QX", "-123", "-0X7B" },
+    { "%#Qx", "-5/8", "-0x5/0x8" },
+    { "%#QX", "-5/8", "-0X5/0X8" },
+    { "%#Qo",    "0",     "0" },
+    { "%#Qo",  "123",  "0173" },
+    { "%#Qo", "-123", "-0173" },
+    { "%#Qo",  "5/7",  "05/07" },
+    { "%#Qo", "-5/7", "-05/07" },
+
+    /* zero denominator and showbase */
+    { "%#10Qo", "0/0",     "       0/0" },
+    { "%#10Qd", "0/0",     "       0/0" },
+    { "%#10Qx", "0/0",     "       0/0" },
+    { "%#10Qo", "123/0",   "    0173/0" },
+    { "%#10Qd", "123/0",   "     123/0" },
+    { "%#10Qx", "123/0",   "    0x7b/0" },
+    { "%#10QX", "123/0",   "    0X7B/0" },
+    { "%#10Qo", "-123/0",  "   -0173/0" },
+    { "%#10Qd", "-123/0",  "    -123/0" },
+    { "%#10Qx", "-123/0",  "   -0x7b/0" },
+    { "%#10QX", "-123/0",  "   -0X7B/0" },
+
+    { "%10Qd",      "0", "         0" },
+    { "%-10Qd",     "0", "0         " },
+    { "%10Qd",    "123", "       123" },
+    { "%-10Qd",   "123", "123       " },
+    { "%10Qd",   "-123", "      -123" },
+    { "%-10Qd",  "-123", "-123      " },
+
+    { "%+10Qd",   "123", "      +123" },
+    { "%+-10Qd",  "123", "+123      " },
+    { "%+10Qd",  "-123", "      -123" },
+    { "%+-10Qd", "-123", "-123      " },
+
+    { "%08Qd",    "0", "00000000" },
+    { "%08Qd",  "123", "00000123" },
+    { "%08Qd", "-123", "-0000123" },
+
+    { "%+08Qd",    "0", "+0000000" },
+    { "%+08Qd",  "123", "+0000123" },
+    { "%+08Qd", "-123", "-0000123" },
+
+    { "%#08Qx",    "0", "00000000" },
+    { "%#08Qx",  "123", "0x00007b" },
+    { "%#08Qx", "-123", "-0x0007b" },
+
+    { "%+#08Qx",    "0", "+0000000" },
+    { "%+#08Qx",  "123", "+0x0007b" },
+    { "%+#08Qx", "-123", "-0x0007b" },
+  };
+
+  int    i;
+  mpq_t  q;
+
+  mpq_init (q);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (q, data[i].q, 0);
+      check_one (data[i].want, data[i].fmt, q);
+    }
+
+  mpq_clear (q);
+}
+
+void
+check_f (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *f;
+    const char  *want;
+
+  } data[] = {
+
+    { "%Ff",    "0",    "0.000000" },
+    { "%Ff",  "123",  "123.000000" },
+    { "%Ff", "-123", "-123.000000" },
+
+    { "%+Ff",    "0",   "+0.000000" },
+    { "%+Ff",  "123", "+123.000000" },
+    { "%+Ff", "-123", "-123.000000" },
+
+    { "%.0Ff",    "0",    "0" },
+    { "%.0Ff",  "123",  "123" },
+    { "%.0Ff", "-123", "-123" },
+
+    { "%8.0Ff",    "0", "       0" },
+    { "%8.0Ff",  "123", "     123" },
+    { "%8.0Ff", "-123", "    -123" },
+
+    { "%08.0Ff",    "0", "00000000" },
+    { "%08.0Ff",  "123", "00000123" },
+    { "%08.0Ff", "-123", "-0000123" },
+
+    { "%10.2Ff",       "0", "      0.00" },
+    { "%10.2Ff",    "0.25", "      0.25" },
+    { "%10.2Ff",  "123.25", "    123.25" },
+    { "%10.2Ff", "-123.25", "   -123.25" },
+
+    { "%-10.2Ff",       "0", "0.00      " },
+    { "%-10.2Ff",    "0.25", "0.25      " },
+    { "%-10.2Ff",  "123.25", "123.25    " },
+    { "%-10.2Ff", "-123.25", "-123.25   " },
+
+    { "%.2Ff", "0.00000000000001", "0.00" },
+    { "%.2Ff", "0.002",            "0.00" },
+    { "%.2Ff", "0.008",            "0.01" },
+
+    { "%.0Ff", "123.00000000000001", "123" },
+    { "%.0Ff", "123.2",              "123" },
+    { "%.0Ff", "123.8",              "124" },
+
+    { "%.0Ff",  "999999.9", "1000000" },
+    { "%.0Ff", "3999999.9", "4000000" },
+
+    { "%Fe",    "0",  "0.000000e+00" },
+    { "%Fe",    "1",  "1.000000e+00" },
+    { "%Fe",  "123",  "1.230000e+02" },
+
+    { "%FE",    "0",  "0.000000E+00" },
+    { "%FE",    "1",  "1.000000E+00" },
+    { "%FE",  "123",  "1.230000E+02" },
+
+    { "%Fe",    "0",  "0.000000e+00" },
+    { "%Fe",    "1",  "1.000000e+00" },
+
+    { "%.0Fe",     "10000000000",    "1e+10" },
+    { "%.0Fe",    "-10000000000",   "-1e+10" },
+
+    { "%.2Fe",     "10000000000",  "1.00e+10" },
+    { "%.2Fe",    "-10000000000", "-1.00e+10" },
+
+    { "%8.0Fe",    "10000000000", "   1e+10" },
+    { "%8.0Fe",   "-10000000000", "  -1e+10" },
+
+    { "%-8.0Fe",   "10000000000", "1e+10   " },
+    { "%-8.0Fe",  "-10000000000", "-1e+10  " },
+
+    { "%12.2Fe",   "10000000000", "    1.00e+10" },
+    { "%12.2Fe",  "-10000000000", "   -1.00e+10" },
+
+    { "%012.2Fe",  "10000000000", "00001.00e+10" },
+    { "%012.2Fe", "-10000000000", "-0001.00e+10" },
+
+    { "%Fg",   "0", "0" },
+    { "%Fg",   "1", "1" },
+    { "%Fg",   "-1", "-1" },
+
+    { "%.0Fg", "0", "0" },
+    { "%.0Fg", "1", "1" },
+    { "%.0Fg", "-1", "-1" },
+
+    { "%.1Fg", "100", "1e+02" },
+    { "%.2Fg", "100", "1e+02" },
+    { "%.3Fg", "100", "100" },
+    { "%.4Fg", "100", "100" },
+
+    { "%Fg", "0.001",    "0.001" },
+    { "%Fg", "0.0001",   "0.0001" },
+    { "%Fg", "0.00001",  "1e-05" },
+    { "%Fg", "0.000001", "1e-06" },
+
+    { "%.4Fg", "1.00000000000001", "1" },
+    { "%.4Fg", "100000000000001",  "1e+14" },
+
+    { "%.4Fg", "12345678", "1.235e+07" },
+
+    { "%Fa", "0","0x0p+0" },
+    { "%FA", "0","0X0P+0" },
+
+    { "%Fa", "1","0x1p+0" },
+    { "%Fa", "65535","0xf.fffp+12" },
+    { "%Fa", "65536","0x1p+16" },
+    { "%F.10a", "65536","0x1.0000000000p+16" },
+    { "%F.1a", "65535","0x1.0p+16" },
+    { "%F.0a", "65535","0x1p+16" },
+
+    { "%.2Ff", "0.99609375", "1.00" },
+    { "%.Ff",  "0.99609375", "0.99609375" },
+    { "%.Fe",  "0.99609375", "9.9609375e-01" },
+    { "%.Fg",  "0.99609375", "0.99609375" },
+    { "%.20Fg",  "1000000", "1000000" },
+    { "%.Fg",  "1000000", "1000000" },
+
+    { "%#.0Ff", "1", "1." },
+    { "%#.0Fe", "1", "1.e+00" },
+    { "%#.0Fg", "1", "1." },
+
+    { "%#.1Ff", "1", "1.0" },
+    { "%#.1Fe", "1", "1.0e+00" },
+    { "%#.1Fg", "1", "1." },
+
+    { "%#.4Ff", "1234", "1234.0000" },
+    { "%#.4Fe", "1234", "1.2340e+03" },
+    { "%#.4Fg", "1234", "1234." },
+
+    { "%#.8Ff", "1234", "1234.00000000" },
+    { "%#.8Fe", "1234", "1.23400000e+03" },
+    { "%#.8Fg", "1234", "1234.0000" },
+
+  };
+
+  int     i;
+  mpf_t   f;
+  double  d;
+
+  mpf_init2 (f, 256L);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      if (data[i].f[0] == '0' && data[i].f[1] == 'x')
+       mpf_set_str_or_abort (f, data[i].f, 16);
+      else
+       mpf_set_str_or_abort (f, data[i].f, 10);
+
+      /* if mpf->double doesn't truncate, then expect same result */
+      d = mpf_get_d (f);
+      if (mpf_cmp_d (f, d) == 0)
+       check_plain (data[i].want, data[i].fmt, d);
+
+      check_one (data[i].want, data[i].fmt, f);
+    }
+
+  mpf_clear (f);
+}
+
+
+void
+check_limb (void)
+{
+  int        i;
+  mp_limb_t  limb;
+  mpz_t      z;
+  char       *s;
+
+  check_one ("0", "%Md", CNST_LIMB(0));
+  check_one ("1", "%Md", CNST_LIMB(1));
+
+  /* "i" many 1 bits, tested against mpz_get_str in decimal and hex */
+  limb = 1;
+  mpz_init_set_ui (z, 1L);
+  for (i = 1; i <= GMP_LIMB_BITS; i++)
+    {
+      s = mpz_get_str (NULL, 10, z);
+      check_one (s, "%Mu", limb);
+      (*__gmp_free_func) (s, strlen (s) + 1);
+
+      s = mpz_get_str (NULL, 16, z);
+      check_one (s, "%Mx", limb);
+      (*__gmp_free_func) (s, strlen (s) + 1);
+
+      s = mpz_get_str (NULL, -16, z);
+      check_one (s, "%MX", limb);
+      (*__gmp_free_func) (s, strlen (s) + 1);
+
+      limb = 2*limb + 1;
+      mpz_mul_2exp (z, z, 1L);
+      mpz_add_ui (z, z, 1L);
+    }
+
+  mpz_clear (z);
+}
+
+
+void
+check_n (void)
+{
+  {
+    int  n = -1;
+    check_one ("blah", "%nblah", &n);
+    ASSERT_ALWAYS (n == 0);
+  }
+
+  {
+    int  n = -1;
+    check_one ("hello ", "hello %n", &n);
+    ASSERT_ALWAYS (n == 6);
+  }
+
+  {
+    int  n = -1;
+    check_one ("hello  world", "hello %n world", &n);
+    ASSERT_ALWAYS (n == 6);
+  }
+
+#define CHECK_N(type, string)                           \
+  do {                                                  \
+    type  x[2];                                         \
+    char  fmt[128];                                     \
+                                                       \
+    x[0] = ~ (type) 0;                                  \
+    x[1] = ~ (type) 0;                                  \
+    sprintf (fmt, "%%d%%%sn%%d", string);               \
+    check_one ("123456", fmt, 123, &x[0], 456);         \
+                                                       \
+    /* should write whole of x[0] and none of x[1] */   \
+    ASSERT_ALWAYS (x[0] == 3);                          \
+    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);         \
+                                                       \
+  } while (0)
+
+  CHECK_N (mp_limb_t, "M");
+  CHECK_N (char,      "hh");
+  CHECK_N (long,      "l");
+#if HAVE_LONG_LONG
+  CHECK_N (long long, "L");
+#endif
+#if HAVE_INTMAX_T
+  CHECK_N (intmax_t,  "j");
+#endif
+#if HAVE_PTRDIFF_T
+  CHECK_N (ptrdiff_t, "t");
+#endif
+  CHECK_N (short,     "h");
+  CHECK_N (size_t,    "z");
+
+  {
+    mpz_t  x[2];
+    mpz_init_set_si (x[0], -987L);
+    mpz_init_set_si (x[1],  654L);
+    check_one ("123456", "%d%Zn%d", 123, x[0], 456);
+    MPZ_CHECK_FORMAT (x[0]);
+    MPZ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);
+    mpz_clear (x[0]);
+    mpz_clear (x[1]);
+  }
+
+  {
+    mpq_t  x[2];
+    mpq_init (x[0]);
+    mpq_init (x[1]);
+    mpq_set_ui (x[0], 987L, 654L);
+    mpq_set_ui (x[1], 4115L, 226L);
+    check_one ("123456", "%d%Qn%d", 123, x[0], 456);
+    MPQ_CHECK_FORMAT (x[0]);
+    MPQ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);
+    mpq_clear (x[0]);
+    mpq_clear (x[1]);
+  }
+
+  {
+    mpf_t  x[2];
+    mpf_init (x[0]);
+    mpf_init (x[1]);
+    mpf_set_ui (x[0], 987L);
+    mpf_set_ui (x[1], 654L);
+    check_one ("123456", "%d%Fn%d", 123, x[0], 456);
+    MPF_CHECK_FORMAT (x[0]);
+    MPF_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);
+    mpf_clear (x[0]);
+    mpf_clear (x[1]);
+  }
+
+  {
+    mp_limb_t  a[5];
+    mp_limb_t  a_want[numberof(a)];
+    mp_size_t  i;
+
+    a[0] = 123;
+    check_one ("blah", "bl%Nnah", a, (mp_size_t) 0);
+    ASSERT_ALWAYS (a[0] == 123);
+
+    MPN_ZERO (a_want, numberof (a_want));
+    for (i = 1; i < numberof (a); i++)
+      {
+       check_one ("blah", "bl%Nnah", a, i);
+       a_want[0] = 2;
+       ASSERT_ALWAYS (mpn_cmp (a, a_want, i) == 0);
+      }
+  }
+}
+
+
+void
+check_misc (void)
+{
+  mpz_t  z;
+  mpf_t  f;
+
+  mpz_init (z);
+  mpf_init2 (f, 128L);
+
+  check_one ("!", "%c", '!');
+
+  check_one ("hello world", "hello %s", "world");
+  check_one ("hello:", "%s:", "hello");
+  mpz_set_ui (z, 0L);
+  check_one ("hello0", "%s%Zd", "hello", z, z);
+
+  {
+    static char  xs[801];
+    memset (xs, 'x', sizeof(xs)-1);
+    check_one (xs, "%s", xs);
+  }
+
+  mpz_set_ui (z, 12345L);
+  check_one ("     12345", "%*Zd", 10, z);
+  check_one ("0000012345", "%0*Zd", 10, z);
+  check_one ("12345     ", "%*Zd", -10, z);
+  check_one ("12345 and 678", "%Zd and %d", z, 678);
+  check_one ("12345,1,12345,2,12345", "%Zd,%d,%Zd,%d,%Zd", z, 1, z, 2, z);
+
+  /* from the glibc info docs */
+  mpz_set_si (z, 0L);
+  check_one ("|    0|0    |   +0|+0   |    0|00000|     |   00|0|",
+            "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+            /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, 1L);
+  check_one ("|    1|1    |   +1|+1   |    1|00001|    1|   01|1|",
+            "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+            /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, -1L);
+  check_one ("|   -1|-1   |   -1|-1   |   -1|-0001|   -1|  -01|-1|",
+            "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+            /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, 100000L);
+  check_one ("|100000|100000|+100000|+100000| 100000|100000|100000|100000|100000|",
+            "|%5Zd|%-5Zd|%+5Zd|%+-5Zd|% 5Zd|%05Zd|%5.0Zd|%5.2Zd|%Zd|",
+            /**/ z,    z,    z,     z,    z,    z,     z,     z,  z);
+  mpz_set_si (z, 0L);
+  check_one ("|    0|    0|    0|    0|    0|    0|  00000000|",
+            "|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|",
+            /**/ z,   z,   z,    z,    z,    z,       z);
+  mpz_set_si (z, 1L);
+  check_one ("|    1|    1|    1|   01|  0x1|  0X1|0x00000001|",
+            "|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|",
+            /**/ z,   z,   z,    z,    z,    z,       z);
+  mpz_set_si (z, 100000L);
+  check_one ("|303240|186a0|186A0|0303240|0x186a0|0X186A0|0x000186a0|",
+            "|%5Zo|%5Zx|%5ZX|%#5Zo|%#5Zx|%#5ZX|%#10.8Zx|",
+            /**/ z,   z,   z,    z,    z,    z,       z);
+
+  /* %zd for size_t won't be available on old systems, and running something
+     to see if it works might be bad, so only try it on glibc, and only on a
+     new enough version (glibc 2.0 doesn't have %zd) */
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 0)
+  mpz_set_ui (z, 789L);
+  check_one ("456 789 blah", "%zd %Zd blah", (size_t) 456, z);
+#endif
+
+  mpz_clear (z);
+  mpf_clear (f);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_check_printf = 1;
+
+  tests_start ();
+  check_vfprintf_fp = fopen (CHECK_VFPRINTF_FILENAME, "w+");
+  ASSERT_ALWAYS (check_vfprintf_fp != NULL);
+
+  check_z ();
+  check_q ();
+  check_f ();
+  check_limb ();
+  check_n ();
+  check_misc ();
+
+  ASSERT_ALWAYS (fclose (check_vfprintf_fp) == 0);
+  unlink (CHECK_VFPRINTF_FILENAME);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/misc/t-scanf.c b/tests/misc/t-scanf.c

new file mode 100644 (file)

index 0000000..5a9eda1
--- /dev/null
+++ b/tests/misc/t-scanf.c
@@ -0,0 +1,1635 @@
+/* Test gmp_scanf and related functions.
+
+Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage: t-scanf [-s]
+
+   -s  Check the data against the system scanf, where possible.  This is
+       only an option since we don't want to fail if the system scanf is
+       faulty or strange.
+
+   There's some fairly unattractive repetition between check_z, check_q and
+   check_f, but enough differences to make a common loop or a set of macros
+   seem like too much trouble. */
+
+
+#include "config.h"
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <stddef.h>    /* for ptrdiff_t */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* for intmax_t */
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define TEMPFILE  "t-scanf.tmp"
+
+int   option_libc_scanf = 0;
+
+typedef int (*fun_t) __GMP_PROTO ((const char *, const char *, void *, void *));
+
+
+/* This problem was seen on powerpc7450-apple-darwin7.0.0, sscanf returns 0
+   where it should return EOF.  A workaround in gmp_sscanf would be a bit
+   tedious, and since this is a rather obvious libc bug, quite likely
+   affecting other programs, we'll just suppress affected tests for now.  */
+int
+test_sscanf_eof_ok (void)
+{
+  static int  result = -1;
+
+  if (result == -1)
+    {
+      int  x;
+      if (sscanf ("", "%d", &x) == EOF)
+        {
+          result = 1;
+        }
+      else
+        {
+          printf ("Warning, sscanf(\"\",\"%%d\",&x) doesn't return EOF.\n");
+          printf ("This affects gmp_sscanf, tests involving it will be suppressed.\n");
+          printf ("You should try to get a fix for your libc.\n");
+          result = 0;
+        }
+    }
+  return result;
+}
+
+
+/* Convert fmt from a GMP scanf format string to an equivalent for a plain
+   libc scanf, for example "%Zd" becomes "%ld".  Return 1 if this succeeds,
+   0 if it cannot (or should not) be done.  */
+int
+libc_scanf_convert (char *fmt)
+{
+  char  *p = fmt;
+
+  if (! option_libc_scanf)
+    return 0;
+
+  for ( ; *fmt != '\0'; fmt++)
+    {
+      switch (*fmt) {
+      case 'F':
+      case 'Q':
+      case 'Z':
+        /* transmute */
+        *p++ = 'l';
+        break;
+      default:
+        *p++ = *fmt;
+        break;
+      }
+    }
+  *p = '\0';
+  return 1;
+}
+
+
+long  got_ftell;
+int   fromstring_next_c;
+
+/* Call gmp_fscanf, reading the "input" string data provided. */
+int
+#if HAVE_STDARG
+fromstring_gmp_fscanf (const char *input, const char *fmt, ...)
+#else
+fromstring_gmp_fscanf (va_alist)
+     va_dcl
+#endif
+{
+  va_list  ap;
+  FILE     *fp;
+  int      ret;
+#if HAVE_STDARG
+  va_start (ap, fmt);
+#else
+  const char    *input;
+  const char    *fmt;
+  va_start (ap);
+  input = va_arg (ap, const char *);
+  fmt = va_arg (ap, const char *);
+#endif
+
+  fp = fopen (TEMPFILE, "w+");
+  ASSERT_ALWAYS (fp != NULL);
+  ASSERT_ALWAYS (fputs (input, fp) != EOF);
+  ASSERT_ALWAYS (fflush (fp) == 0);
+  rewind (fp);
+
+  ret = gmp_vfscanf (fp, fmt, ap);
+  got_ftell = ftell (fp);
+  ASSERT_ALWAYS (got_ftell != -1L);
+
+  fromstring_next_c = getc (fp);
+
+  ASSERT_ALWAYS (fclose (fp) == 0);
+  va_end (ap);
+  return ret;
+}
+
+
+int
+fun_gmp_sscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  if (a2 == NULL)
+    return gmp_sscanf (input, fmt, a1);
+  else
+    return gmp_sscanf (input, fmt, a1, a2);
+}
+
+int
+fun_gmp_fscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  if (a2 == NULL)
+    return fromstring_gmp_fscanf (input, fmt, a1);
+  else
+    return fromstring_gmp_fscanf (input, fmt, a1, a2);
+}
+
+
+int
+fun_fscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  FILE  *fp;
+  int   ret;
+
+  fp = fopen (TEMPFILE, "w+");
+  ASSERT_ALWAYS (fp != NULL);
+  ASSERT_ALWAYS (fputs (input, fp) != EOF);
+  ASSERT_ALWAYS (fflush (fp) == 0);
+  rewind (fp);
+
+  if (a2 == NULL)
+    ret = fscanf (fp, fmt, a1);
+  else
+    ret = fscanf (fp, fmt, a1, a2);
+
+  got_ftell = ftell (fp);
+  ASSERT_ALWAYS (got_ftell != -1L);
+
+  fromstring_next_c = getc (fp);
+
+  ASSERT_ALWAYS (fclose (fp) == 0);
+  return ret;
+}
+
+
+/* On various old systems, for instance HP-UX 9, the C library sscanf needs
+   to be able to write into the input string.  Ensure that this is possible,
+   when gcc is putting the test data into a read-only section.
+
+   Actually we ought to only need this under SSCANF_WRITABLE_INPUT from
+   configure, but it's just as easy to do it unconditionally, and in any
+   case this code is only executed under the -s option.  */
+
+int
+fun_sscanf (const char *input, const char *fmt, void *a1, void *a2)
+{
+  char    *input_writable;
+  size_t  size;
+  int     ret;
+
+  size = strlen (input) + 1;
+  input_writable = (*__gmp_allocate_func) (size);
+  memcpy (input_writable, input, size);
+
+  if (a2 == NULL)
+    ret = sscanf (input_writable, fmt, a1);
+  else
+    ret = sscanf (input_writable, fmt, a1, a2);
+
+  (*__gmp_free_func) (input_writable, size);
+  return ret;
+}
+
+
+/* whether the format string consists entirely of ignored fields */
+int
+fmt_allignore (const char *fmt)
+{
+  int  saw_star = 1;
+  for ( ; *fmt != '\0'; fmt++)
+    {
+      switch (*fmt) {
+      case '%':
+        if (! saw_star)
+          return 0;
+        saw_star = 0;
+        break;
+      case '*':
+        saw_star = 1;
+        break;
+      }
+    }
+  return 1;
+}
+
+void
+check_z (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *input;
+    const char  *want;
+    int         want_ret;
+    long        want_ftell;
+    int         want_upto;
+    int         not_glibc;
+
+  } data[] = {
+
+    { "%Zd",    "0",    "0", 1, -1, -1 },
+    { "%Zd",    "1",    "1", 1, -1, -1 },
+    { "%Zd",  "123",  "123", 1, -1, -1 },
+    { "%Zd",   "+0",    "0", 1, -1, -1 },
+    { "%Zd",   "+1",    "1", 1, -1, -1 },
+    { "%Zd", "+123",  "123", 1, -1, -1 },
+    { "%Zd",   "-0",    "0", 1, -1, -1 },
+    { "%Zd",   "-1",   "-1", 1, -1, -1 },
+    { "%Zd", "-123", "-123", 1, -1, -1 },
+
+    { "%Zo",    "0",    "0", 1, -1, -1 },
+    { "%Zo",  "173",  "123", 1, -1, -1 },
+    { "%Zo",   "+0",    "0", 1, -1, -1 },
+    { "%Zo", "+173",  "123", 1, -1, -1 },
+    { "%Zo",   "-0",    "0", 1, -1, -1 },
+    { "%Zo", "-173", "-123", 1, -1, -1 },
+
+    { "%Zx",    "0",    "0", 1, -1, -1 },
+    { "%Zx",   "7b",  "123", 1, -1, -1 },
+    { "%Zx",   "7b",  "123", 1, -1, -1 },
+    { "%Zx",   "+0",    "0", 1, -1, -1 },
+    { "%Zx",  "+7b",  "123", 1, -1, -1 },
+    { "%Zx",  "+7b",  "123", 1, -1, -1 },
+    { "%Zx",   "-0",   "-0", 1, -1, -1 },
+    { "%Zx",  "-7b", "-123", 1, -1, -1 },
+    { "%Zx",  "-7b", "-123", 1, -1, -1 },
+    { "%ZX",    "0",    "0", 1, -1, -1 },
+    { "%ZX",   "7b",  "123", 1, -1, -1 },
+    { "%ZX",   "7b",  "123", 1, -1, -1 },
+    { "%ZX",   "+0",    "0", 1, -1, -1 },
+    { "%ZX",  "+7b",  "123", 1, -1, -1 },
+    { "%ZX",  "+7b",  "123", 1, -1, -1 },
+    { "%ZX",   "-0",   "-0", 1, -1, -1 },
+    { "%ZX",  "-7b", "-123", 1, -1, -1 },
+    { "%ZX",  "-7b", "-123", 1, -1, -1 },
+    { "%Zx",    "0",    "0", 1, -1, -1 },
+    { "%Zx",   "7B",  "123", 1, -1, -1 },
+    { "%Zx",   "7B",  "123", 1, -1, -1 },
+    { "%Zx",   "+0",    "0", 1, -1, -1 },
+    { "%Zx",  "+7B",  "123", 1, -1, -1 },
+    { "%Zx",  "+7B",  "123", 1, -1, -1 },
+    { "%Zx",   "-0",   "-0", 1, -1, -1 },
+    { "%Zx",  "-7B", "-123", 1, -1, -1 },
+    { "%Zx",  "-7B", "-123", 1, -1, -1 },
+    { "%ZX",    "0",    "0", 1, -1, -1 },
+    { "%ZX",   "7B",  "123", 1, -1, -1 },
+    { "%ZX",   "7B",  "123", 1, -1, -1 },
+    { "%ZX",   "+0",    "0", 1, -1, -1 },
+    { "%ZX",  "+7B",  "123", 1, -1, -1 },
+    { "%ZX",  "+7B",  "123", 1, -1, -1 },
+    { "%ZX",   "-0",   "-0", 1, -1, -1 },
+    { "%ZX",  "-7B", "-123", 1, -1, -1 },
+    { "%ZX",  "-7B", "-123", 1, -1, -1 },
+
+    { "%Zi",    "0",    "0", 1, -1, -1 },
+    { "%Zi",    "1",    "1", 1, -1, -1 },
+    { "%Zi",  "123",  "123", 1, -1, -1 },
+    { "%Zi",   "+0",    "0", 1, -1, -1 },
+    { "%Zi",   "+1",    "1", 1, -1, -1 },
+    { "%Zi", "+123",  "123", 1, -1, -1 },
+    { "%Zi",   "-0",    "0", 1, -1, -1 },
+    { "%Zi",   "-1",   "-1", 1, -1, -1 },
+    { "%Zi", "-123", "-123", 1, -1, -1 },
+
+    { "%Zi",    "00",    "0", 1, -1, -1 },
+    { "%Zi",  "0173",  "123", 1, -1, -1 },
+    { "%Zi",   "+00",    "0", 1, -1, -1 },
+    { "%Zi", "+0173",  "123", 1, -1, -1 },
+    { "%Zi",   "-00",    "0", 1, -1, -1 },
+    { "%Zi", "-0173", "-123", 1, -1, -1 },
+
+    { "%Zi",    "0x0",    "0", 1, -1, -1 },
+    { "%Zi",   "0x7b",  "123", 1, -1, -1 },
+    { "%Zi",   "0x7b",  "123", 1, -1, -1 },
+    { "%Zi",   "+0x0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0x7b",  "123", 1, -1, -1 },
+    { "%Zi",  "+0x7b",  "123", 1, -1, -1 },
+    { "%Zi",   "-0x0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0x7b", "-123", 1, -1, -1 },
+    { "%Zi",  "-0x7b", "-123", 1, -1, -1 },
+    { "%Zi",    "0X0",    "0", 1, -1, -1 },
+    { "%Zi",   "0X7b",  "123", 1, -1, -1 },
+    { "%Zi",   "0X7b",  "123", 1, -1, -1 },
+    { "%Zi",   "+0X0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0X7b",  "123", 1, -1, -1 },
+    { "%Zi",  "+0X7b",  "123", 1, -1, -1 },
+    { "%Zi",   "-0X0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0X7b", "-123", 1, -1, -1 },
+    { "%Zi",  "-0X7b", "-123", 1, -1, -1 },
+    { "%Zi",    "0x0",    "0", 1, -1, -1 },
+    { "%Zi",   "0x7B",  "123", 1, -1, -1 },
+    { "%Zi",   "0x7B",  "123", 1, -1, -1 },
+    { "%Zi",   "+0x0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0x7B",  "123", 1, -1, -1 },
+    { "%Zi",  "+0x7B",  "123", 1, -1, -1 },
+    { "%Zi",   "-0x0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0x7B", "-123", 1, -1, -1 },
+    { "%Zi",  "-0x7B", "-123", 1, -1, -1 },
+    { "%Zi",    "0X0",    "0", 1, -1, -1 },
+    { "%Zi",   "0X7B",  "123", 1, -1, -1 },
+    { "%Zi",   "0X7B",  "123", 1, -1, -1 },
+    { "%Zi",   "+0X0",    "0", 1, -1, -1 },
+    { "%Zi",  "+0X7B",  "123", 1, -1, -1 },
+    { "%Zi",  "+0X7B",  "123", 1, -1, -1 },
+    { "%Zi",   "-0X0",   "-0", 1, -1, -1 },
+    { "%Zi",  "-0X7B", "-123", 1, -1, -1 },
+    { "%Zi",  "-0X7B", "-123", 1, -1, -1 },
+
+    { "%Zd",    " 0",    "0", 1, -1, -1 },
+    { "%Zd",   "  0",    "0", 1, -1, -1 },
+    { "%Zd",  "   0",    "0", 1, -1, -1 },
+    { "%Zd",   "\t0",    "0", 1, -1, -1 },
+    { "%Zd", "\t\t0",    "0", 1, -1, -1 },
+
+    { "hello%Zd",      "hello0",       "0", 1, -1, -1 },
+    { "hello%Zd",      "hello 0",      "0", 1, -1, -1 },
+    { "hello%Zd",      "hello \t0",    "0", 1, -1, -1 },
+    { "hello%Zdworld", "hello 0world", "0", 1, -1, -1 },
+
+    { "hello%*Zd",      "hello0",       "-999", 0, -1, -1 },
+    { "hello%*Zd",      "hello 0",      "-999", 0, -1, -1 },
+    { "hello%*Zd",      "hello \t0",    "-999", 0, -1, -1 },
+    { "hello%*Zdworld", "hello 0world", "-999", 0, -1, -1 },
+
+    { "%Zd",    "",     "-999", -1, -1, -555 },
+    { "%Zd",    " ",    "-999", -1, -1, -555 },
+    { " %Zd",   "",     "-999", -1, -1, -555 },
+    { "xyz%Zd", "",     "-999", -1, -1, -555 },
+
+    { "%*Zd",    "",     "-999", -1, -1, -555 },
+    { " %*Zd",   "",     "-999", -1, -1, -555 },
+    { "xyz%*Zd", "",     "-999", -1, -1, -555 },
+
+    { "%Zd",    "xyz",  "0",     0, 0, -555 },
+
+    /* match something, but invalid */
+    { "%Zd",    "-",    "-999",  0, 1, -555 },
+    { "%Zd",    "+",    "-999",  0, 1, -555 },
+    { "xyz%Zd", "xyz-", "-999",  0, 4, -555 },
+    { "xyz%Zd", "xyz+", "-999",  0, 4, -555 },
+    { "%Zi",    "0x",   "-999",  0, 2, -555 },
+    { "%Zi",    "0X",   "-999",  0, 2, -555 },
+    { "%Zi",    "0x-",  "-999",  0, 2, -555 },
+    { "%Zi",    "0X+",  "-999",  0, 2, -555 },
+    { "%Zi",    "-0x",  "-999",  0, 3, -555 },
+    { "%Zi",    "-0X",  "-999",  0, 3, -555 },
+    { "%Zi",    "+0x",  "-999",  0, 3, -555 },
+    { "%Zi",    "+0X",  "-999",  0, 3, -555 },
+
+    { "%1Zi",  "1234", "1",    1, 1, 1 },
+    { "%2Zi",  "1234", "12",   1, 2, 2 },
+    { "%3Zi",  "1234", "123",  1, 3, 3 },
+    { "%4Zi",  "1234", "1234", 1, 4, 4 },
+    { "%5Zi",  "1234", "1234", 1, 4, 4 },
+    { "%6Zi",  "1234", "1234", 1, 4, 4 },
+
+    { "%1Zi",  "01234", "0",     1, 1, 1 },
+    { "%2Zi",  "01234", "01",    1, 2, 2 },
+    { "%3Zi",  "01234", "012",   1, 3, 3 },
+    { "%4Zi",  "01234", "0123",  1, 4, 4 },
+    { "%5Zi",  "01234", "01234", 1, 5, 5 },
+    { "%6Zi",  "01234", "01234", 1, 5, 5 },
+    { "%7Zi",  "01234", "01234", 1, 5, 5 },
+
+    { "%1Zi",  "0x1234", "0",      1, 1, 1 },
+    { "%2Zi",  "0x1234", "-999",   0, 2, -555 },
+    { "%3Zi",  "0x1234", "0x1",    1, 3, 3 },
+    { "%4Zi",  "0x1234", "0x12",   1, 4, 4 },
+    { "%5Zi",  "0x1234", "0x123",  1, 5, 5 },
+    { "%6Zi",  "0x1234", "0x1234", 1, 6, 6 },
+    { "%7Zi",  "0x1234", "0x1234", 1, 6, 6 },
+    { "%8Zi",  "0x1234", "0x1234", 1, 6, 6 },
+
+    { "%%xyz%Zd",  "%xyz123",  "123", 1, -1, -1 },
+    { "12%%34%Zd", "12%34567", "567", 1, -1, -1 },
+    { "%%%%%Zd",   "%%123",    "123", 1, -1, -1 },
+
+    /* various subtle EOF cases */
+    { "x",       "",    "-999", EOF, 0, -555 },
+    { " x",      "",    "-999", EOF, 0, -555 },
+    { "xyz",     "",    "-999", EOF, 0, -555 },
+    { " ",       "",    "-999",   0, 0,    0 },
+    { " ",       " ",   "-999",   0, 1,    1 },
+    { "%*Zd%Zd", "",    "-999", EOF, 0, -555 },
+    { "%*Zd%Zd", "123", "-999", EOF, 3, -555 },
+    { "x",       "x",   "-999",   0, 1,    1 },
+    { "xyz",     "x",   "-999", EOF, 1, -555 },
+    { "xyz",     "xy",  "-999", EOF, 2, -555 },
+    { "xyz",     "xyz", "-999",   0, 3,    3 },
+    { "%Zn",     "",    "0",      0, 0,    0 },
+    { " %Zn",    "",    "0",      0, 0,    0 },
+    { " x%Zn",   "",    "-999", EOF, 0, -555 },
+    { "xyz%Zn",  "",    "-999", EOF, 0, -555 },
+    { " x%Zn",   "",    "-999", EOF, 0, -555 },
+    { " %Zn x",  " ",   "-999", EOF, 1, -555 },
+
+    /* these seem to tickle a bug in glibc 2.2.4 */
+    { " x",      " ",   "-999", EOF, 1, -555, 1 },
+    { " xyz",    " ",   "-999", EOF, 1, -555, 1 },
+    { " x%Zn",   " ",   "-999", EOF, 1, -555, 1 },
+  };
+
+  int         i, j, ignore;
+  int         got_ret, want_ret, got_upto, want_upto;
+  mpz_t       got, want;
+  long        got_l, want_ftell;
+  int         error = 0;
+  fun_t       fun;
+  const char  *name;
+  char        fmt[128];
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (want, data[i].want, 0);
+
+      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));
+      strcpy (fmt, data[i].fmt);
+      strcat (fmt, "%n");
+
+      ignore = fmt_allignore (fmt);
+
+      for (j = 0; j <= 3; j++)
+        {
+          want_ret = data[i].want_ret;
+
+          want_ftell = data[i].want_ftell;
+          if (want_ftell == -1)
+            want_ftell = strlen (data[i].input);
+
+          want_upto = data[i].want_upto;
+          if (want_upto == -1)
+            want_upto = strlen (data[i].input);
+
+          switch (j) {
+          case 0:
+            name = "gmp_sscanf";
+            fun = fun_gmp_sscanf;
+            break;
+          case 1:
+            name = "gmp_fscanf";
+            fun = fun_gmp_fscanf;
+            break;
+          case 2:
+#ifdef __GLIBC__
+            if (data[i].not_glibc)
+              continue;
+#endif
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard sscanf";
+            fun = fun_sscanf;
+            break;
+          case 3:
+#ifdef __GLIBC__
+            if (data[i].not_glibc)
+              continue;
+#endif
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard fscanf";
+            fun = fun_fscanf;
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          got_upto = -555;
+          got_ftell = -1L;
+
+          switch (j) {
+          case 0:
+          case 1:
+            mpz_set_si (got, -999L);
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);
+            break;
+          case 2:
+          case 3:
+            got_l = -999L;
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, &got_l, &got_upto);
+            mpz_set_si (got, got_l);
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          MPZ_CHECK_FORMAT (got);
+
+          if (got_ret != want_ret)
+            {
+              printf ("%s wrong return value\n", name);
+              error = 1;
+            }
+          if (want_ret == 1 && mpz_cmp (want, got) != 0)
+            {
+              printf ("%s wrong result\n", name);
+              error = 1;
+            }
+          if (got_upto != want_upto)
+            {
+              printf ("%s wrong upto\n", name);
+              error = 1;
+            }
+          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)
+            {
+              printf ("%s wrong ftell\n", name);
+              error = 1;
+            }
+          if (error)
+            {
+              printf    ("  fmt   \"%s\"\n", data[i].fmt);
+              printf    ("  input \"%s\"\n", data[i].input);
+              printf    ("  ignore %d\n", ignore);
+              printf    ("  ret   want=%d\n", want_ret);
+              printf    ("        got =%d\n", got_ret);
+              mpz_trace ("  value want", want);
+              mpz_trace ("        got ", got);
+              printf    ("  upto  want =%d\n", want_upto);
+              printf    ("        got  =%d\n", got_upto);
+              if (got_ftell != -1)
+                {
+                  printf    ("  ftell want =%ld\n", want_ftell);
+                  printf    ("        got  =%ld\n", got_ftell);
+                }
+              abort ();
+            }
+        }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+void
+check_q (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *input;
+    const char  *want;
+    int         ret;
+    long        ftell;
+
+  } data[] = {
+
+    { "%Qd",    "0",    "0", 1, -1 },
+    { "%Qd",    "1",    "1", 1, -1 },
+    { "%Qd",  "123",  "123", 1, -1 },
+    { "%Qd",   "+0",    "0", 1, -1 },
+    { "%Qd",   "+1",    "1", 1, -1 },
+    { "%Qd", "+123",  "123", 1, -1 },
+    { "%Qd",   "-0",    "0", 1, -1 },
+    { "%Qd",   "-1",   "-1", 1, -1 },
+    { "%Qd", "-123", "-123", 1, -1 },
+
+    { "%Qo",    "0",    "0", 1, -1 },
+    { "%Qo",  "173",  "123", 1, -1 },
+    { "%Qo",   "+0",    "0", 1, -1 },
+    { "%Qo", "+173",  "123", 1, -1 },
+    { "%Qo",   "-0",    "0", 1, -1 },
+    { "%Qo", "-173", "-123", 1, -1 },
+
+    { "%Qx",    "0",    "0", 1, -1 },
+    { "%Qx",   "7b",  "123", 1, -1 },
+    { "%Qx",   "7b",  "123", 1, -1 },
+    { "%Qx",   "+0",    "0", 1, -1 },
+    { "%Qx",  "+7b",  "123", 1, -1 },
+    { "%Qx",  "+7b",  "123", 1, -1 },
+    { "%Qx",   "-0",   "-0", 1, -1 },
+    { "%Qx",  "-7b", "-123", 1, -1 },
+    { "%Qx",  "-7b", "-123", 1, -1 },
+    { "%QX",    "0",    "0", 1, -1 },
+    { "%QX",   "7b",  "123", 1, -1 },
+    { "%QX",   "7b",  "123", 1, -1 },
+    { "%QX",   "+0",    "0", 1, -1 },
+    { "%QX",  "+7b",  "123", 1, -1 },
+    { "%QX",  "+7b",  "123", 1, -1 },
+    { "%QX",   "-0",   "-0", 1, -1 },
+    { "%QX",  "-7b", "-123", 1, -1 },
+    { "%QX",  "-7b", "-123", 1, -1 },
+    { "%Qx",    "0",    "0", 1, -1 },
+    { "%Qx",   "7B",  "123", 1, -1 },
+    { "%Qx",   "7B",  "123", 1, -1 },
+    { "%Qx",   "+0",    "0", 1, -1 },
+    { "%Qx",  "+7B",  "123", 1, -1 },
+    { "%Qx",  "+7B",  "123", 1, -1 },
+    { "%Qx",   "-0",   "-0", 1, -1 },
+    { "%Qx",  "-7B", "-123", 1, -1 },
+    { "%Qx",  "-7B", "-123", 1, -1 },
+    { "%QX",    "0",    "0", 1, -1 },
+    { "%QX",   "7B",  "123", 1, -1 },
+    { "%QX",   "7B",  "123", 1, -1 },
+    { "%QX",   "+0",    "0", 1, -1 },
+    { "%QX",  "+7B",  "123", 1, -1 },
+    { "%QX",  "+7B",  "123", 1, -1 },
+    { "%QX",   "-0",   "-0", 1, -1 },
+    { "%QX",  "-7B", "-123", 1, -1 },
+    { "%QX",  "-7B", "-123", 1, -1 },
+
+    { "%Qi",    "0",    "0", 1, -1 },
+    { "%Qi",    "1",    "1", 1, -1 },
+    { "%Qi",  "123",  "123", 1, -1 },
+    { "%Qi",   "+0",    "0", 1, -1 },
+    { "%Qi",   "+1",    "1", 1, -1 },
+    { "%Qi", "+123",  "123", 1, -1 },
+    { "%Qi",   "-0",    "0", 1, -1 },
+    { "%Qi",   "-1",   "-1", 1, -1 },
+    { "%Qi", "-123", "-123", 1, -1 },
+
+    { "%Qi",    "00",    "0", 1, -1 },
+    { "%Qi",  "0173",  "123", 1, -1 },
+    { "%Qi",   "+00",    "0", 1, -1 },
+    { "%Qi", "+0173",  "123", 1, -1 },
+    { "%Qi",   "-00",    "0", 1, -1 },
+    { "%Qi", "-0173", "-123", 1, -1 },
+
+    { "%Qi",    "0x0",    "0", 1, -1 },
+    { "%Qi",   "0x7b",  "123", 1, -1 },
+    { "%Qi",   "0x7b",  "123", 1, -1 },
+    { "%Qi",   "+0x0",    "0", 1, -1 },
+    { "%Qi",  "+0x7b",  "123", 1, -1 },
+    { "%Qi",  "+0x7b",  "123", 1, -1 },
+    { "%Qi",   "-0x0",   "-0", 1, -1 },
+    { "%Qi",  "-0x7b", "-123", 1, -1 },
+    { "%Qi",  "-0x7b", "-123", 1, -1 },
+    { "%Qi",    "0X0",    "0", 1, -1 },
+    { "%Qi",   "0X7b",  "123", 1, -1 },
+    { "%Qi",   "0X7b",  "123", 1, -1 },
+    { "%Qi",   "+0X0",    "0", 1, -1 },
+    { "%Qi",  "+0X7b",  "123", 1, -1 },
+    { "%Qi",  "+0X7b",  "123", 1, -1 },
+    { "%Qi",   "-0X0",   "-0", 1, -1 },
+    { "%Qi",  "-0X7b", "-123", 1, -1 },
+    { "%Qi",  "-0X7b", "-123", 1, -1 },
+    { "%Qi",    "0x0",    "0", 1, -1 },
+    { "%Qi",   "0x7B",  "123", 1, -1 },
+    { "%Qi",   "0x7B",  "123", 1, -1 },
+    { "%Qi",   "+0x0",    "0", 1, -1 },
+    { "%Qi",  "+0x7B",  "123", 1, -1 },
+    { "%Qi",  "+0x7B",  "123", 1, -1 },
+    { "%Qi",   "-0x0",   "-0", 1, -1 },
+    { "%Qi",  "-0x7B", "-123", 1, -1 },
+    { "%Qi",  "-0x7B", "-123", 1, -1 },
+    { "%Qi",    "0X0",    "0", 1, -1 },
+    { "%Qi",   "0X7B",  "123", 1, -1 },
+    { "%Qi",   "0X7B",  "123", 1, -1 },
+    { "%Qi",   "+0X0",    "0", 1, -1 },
+    { "%Qi",  "+0X7B",  "123", 1, -1 },
+    { "%Qi",  "+0X7B",  "123", 1, -1 },
+    { "%Qi",   "-0X0",   "-0", 1, -1 },
+    { "%Qi",  "-0X7B", "-123", 1, -1 },
+    { "%Qi",  "-0X7B", "-123", 1, -1 },
+
+    { "%Qd",    " 0",    "0", 1, -1 },
+    { "%Qd",   "  0",    "0", 1, -1 },
+    { "%Qd",  "   0",    "0", 1, -1 },
+    { "%Qd",   "\t0",    "0", 1, -1 },
+    { "%Qd", "\t\t0",    "0", 1, -1 },
+
+    { "%Qd",  "3/2",   "3/2", 1, -1 },
+    { "%Qd", "+3/2",   "3/2", 1, -1 },
+    { "%Qd", "-3/2",  "-3/2", 1, -1 },
+
+    { "%Qx",  "f/10", "15/16", 1, -1 },
+    { "%Qx",  "F/10", "15/16", 1, -1 },
+    { "%QX",  "f/10", "15/16", 1, -1 },
+    { "%QX",  "F/10", "15/16", 1, -1 },
+
+    { "%Qo",  "20/21",  "16/17", 1, -1 },
+    { "%Qo", "-20/21", "-16/17", 1, -1 },
+
+    { "%Qi",    "10/11",  "10/11", 1, -1 },
+    { "%Qi",   "+10/11",  "10/11", 1, -1 },
+    { "%Qi",   "-10/11", "-10/11", 1, -1 },
+    { "%Qi",   "010/11",   "8/11", 1, -1 },
+    { "%Qi",  "+010/11",   "8/11", 1, -1 },
+    { "%Qi",  "-010/11",  "-8/11", 1, -1 },
+    { "%Qi",  "0x10/11",  "16/11", 1, -1 },
+    { "%Qi", "+0x10/11",  "16/11", 1, -1 },
+    { "%Qi", "-0x10/11", "-16/11", 1, -1 },
+
+    { "%Qi",    "10/011",  "10/9", 1, -1 },
+    { "%Qi",   "+10/011",  "10/9", 1, -1 },
+    { "%Qi",   "-10/011", "-10/9", 1, -1 },
+    { "%Qi",   "010/011",   "8/9", 1, -1 },
+    { "%Qi",  "+010/011",   "8/9", 1, -1 },
+    { "%Qi",  "-010/011",  "-8/9", 1, -1 },
+    { "%Qi",  "0x10/011",  "16/9", 1, -1 },
+    { "%Qi", "+0x10/011",  "16/9", 1, -1 },
+    { "%Qi", "-0x10/011", "-16/9", 1, -1 },
+
+    { "%Qi",    "10/0x11",  "10/17", 1, -1 },
+    { "%Qi",   "+10/0x11",  "10/17", 1, -1 },
+    { "%Qi",   "-10/0x11", "-10/17", 1, -1 },
+    { "%Qi",   "010/0x11",   "8/17", 1, -1 },
+    { "%Qi",  "+010/0x11",   "8/17", 1, -1 },
+    { "%Qi",  "-010/0x11",  "-8/17", 1, -1 },
+    { "%Qi",  "0x10/0x11",  "16/17", 1, -1 },
+    { "%Qi", "+0x10/0x11",  "16/17", 1, -1 },
+    { "%Qi", "-0x10/0x11", "-16/17", 1, -1 },
+
+    { "hello%Qd",      "hello0",         "0", 1, -1 },
+    { "hello%Qd",      "hello 0",        "0", 1, -1 },
+    { "hello%Qd",      "hello \t0",      "0", 1, -1 },
+    { "hello%Qdworld", "hello 0world",   "0", 1, -1 },
+    { "hello%Qd",      "hello3/2",     "3/2", 1, -1 },
+
+    { "hello%*Qd",      "hello0",        "-999/121", 0, -1 },
+    { "hello%*Qd",      "hello 0",       "-999/121", 0, -1 },
+    { "hello%*Qd",      "hello \t0",     "-999/121", 0, -1 },
+    { "hello%*Qdworld", "hello 0world",  "-999/121", 0, -1 },
+    { "hello%*Qdworld", "hello3/2world", "-999/121", 0, -1 },
+
+    { "%Qd",    "",     "-999/121", -1, -1 },
+    { "%Qd",   " ",     "-999/121", -1, -1 },
+    { " %Qd",   "",     "-999/121", -1, -1 },
+    { "xyz%Qd", "",     "-999/121", -1, -1 },
+
+    { "%*Qd",    "",     "-999/121", -1, -1 },
+    { " %*Qd",   "",     "-999/121", -1, -1 },
+    { "xyz%*Qd", "",     "-999/121", -1, -1 },
+
+    /* match something, but invalid */
+    { "%Qd",    "-",     "-999/121",  0, 1 },
+    { "%Qd",    "+",     "-999/121",  0, 1 },
+    { "%Qd",    "/-",    "-999/121",  0, 1 },
+    { "%Qd",    "/+",    "-999/121",  0, 1 },
+    { "%Qd",    "-/",    "-999/121",  0, 1 },
+    { "%Qd",    "+/",    "-999/121",  0, 1 },
+    { "%Qd",    "-/-",   "-999/121",  0, 1 },
+    { "%Qd",    "-/+",   "-999/121",  0, 1 },
+    { "%Qd",    "+/+",   "-999/121",  0, 1 },
+    { "%Qd",    "/123",  "-999/121",  0, 1 },
+    { "%Qd",    "-/123", "-999/121",  0, 1 },
+    { "%Qd",    "+/123", "-999/121",  0, 1 },
+    { "%Qd",    "123/",  "-999/121",  0, 1 },
+    { "%Qd",    "123/-", "-999/121",  0, 1 },
+    { "%Qd",    "123/+", "-999/121",  0, 1 },
+    { "xyz%Qd", "xyz-",  "-999/121",  0, 4 },
+    { "xyz%Qd", "xyz+",  "-999/121",  0, 4 },
+
+    { "%1Qi",  "12/57", "1",        1, 1 },
+    { "%2Qi",  "12/57", "12",       1, 2 },
+    { "%3Qi",  "12/57", "-999/121", 0, -1 },
+    { "%4Qi",  "12/57", "12/5",     1, 4 },
+    { "%5Qi",  "12/57", "12/57",    1, 5 },
+    { "%6Qi",  "12/57", "12/57",    1, 5 },
+    { "%7Qi",  "12/57", "12/57",    1, 5 },
+
+    { "%1Qi",  "012/057", "0",        1, 1 },
+    { "%2Qi",  "012/057", "01",       1, 2 },
+    { "%3Qi",  "012/057", "012",      1, 3 },
+    { "%4Qi",  "012/057", "-999/121", 0, -1 },
+    { "%5Qi",  "012/057", "012/0",    1, 5 },
+    { "%6Qi",  "012/057", "012/5",    1, 6 },
+    { "%7Qi",  "012/057", "012/057",  1, 7 },
+    { "%8Qi",  "012/057", "012/057",  1, 7 },
+    { "%9Qi",  "012/057", "012/057",  1, 7 },
+
+    { "%1Qi",  "0x12/0x57", "0",         1, 1 },
+    { "%2Qi",  "0x12/0x57", "-999",      0, 2 },
+    { "%3Qi",  "0x12/0x57", "0x1",       1, 3 },
+    { "%4Qi",  "0x12/0x57", "0x12",      1, 4 },
+    { "%5Qi",  "0x12/0x57", "-999/121",  0, 5 },
+    { "%6Qi",  "0x12/0x57", "0x12/0",    1, 6 },
+    { "%7Qi",  "0x12/0x57", "-999/121",  0, 7 },
+    { "%8Qi",  "0x12/0x57", "0x12/0x5",  1, 8 },
+    { "%9Qi",  "0x12/0x57", "0x12/0x57", 1, 9 },
+    { "%10Qi", "0x12/0x57", "0x12/0x57", 1, 9 },
+    { "%11Qi", "0x12/0x57", "0x12/0x57", 1, 9 },
+
+    { "%Qd",  "xyz", "0", 0, 0 },
+  };
+
+  int         i, j, ignore, got_ret, want_ret, got_upto, want_upto;
+  mpq_t       got, want;
+  long        got_l, want_ftell;
+  int         error = 0;
+  fun_t       fun;
+  const char  *name;
+  char        fmt[128];
+
+  mpq_init (got);
+  mpq_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (want, data[i].want, 0);
+
+      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));
+      strcpy (fmt, data[i].fmt);
+      strcat (fmt, "%n");
+
+      ignore = (strchr (fmt, '*') != NULL);
+
+      for (j = 0; j <= 3; j++)
+        {
+          want_ret = data[i].ret;
+
+          want_ftell = data[i].ftell;
+          if (want_ftell == -1)
+            want_ftell = strlen (data[i].input);
+          want_upto = want_ftell;
+
+          if (want_ret == -1 || (want_ret == 0 && ! ignore))
+            {
+              want_ftell = -1;
+              want_upto = -555;
+            }
+
+          switch (j) {
+          case 0:
+            name = "gmp_sscanf";
+            fun = fun_gmp_sscanf;
+            break;
+          case 1:
+            name = "gmp_fscanf";
+            fun = fun_gmp_fscanf;
+            break;
+          case 2:
+            if (strchr (data[i].input, '/') != NULL)
+              continue;
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard sscanf";
+            fun = fun_sscanf;
+            break;
+          case 3:
+            if (strchr (data[i].input, '/') != NULL)
+              continue;
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard fscanf";
+            fun = fun_fscanf;
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          got_upto = -555;
+          got_ftell = -1;
+
+          switch (j) {
+          case 0:
+          case 1:
+            mpq_set_si (got, -999L, 121L);
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);
+            break;
+          case 2:
+          case 3:
+            got_l = -999L;
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, &got_l, &got_upto);
+            mpq_set_si (got, got_l, (got_l == -999L ? 121L : 1L));
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          MPZ_CHECK_FORMAT (mpq_numref (got));
+          MPZ_CHECK_FORMAT (mpq_denref (got));
+
+          if (got_ret != want_ret)
+            {
+              printf ("%s wrong return value\n", name);
+              error = 1;
+            }
+          /* use direct mpz compares, since some of the test data is
+             non-canonical and can trip ASSERTs in mpq_equal */
+          if (want_ret == 1
+              && ! (mpz_cmp (mpq_numref(want), mpq_numref(got)) == 0
+                    && mpz_cmp (mpq_denref(want), mpq_denref(got)) == 0))
+            {
+              printf ("%s wrong result\n", name);
+              error = 1;
+            }
+          if (got_upto != want_upto)
+            {
+              printf ("%s wrong upto\n", name);
+              error = 1;
+            }
+          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)
+            {
+              printf ("%s wrong ftell\n", name);
+              error = 1;
+            }
+          if (error)
+            {
+              printf    ("  fmt   \"%s\"\n", data[i].fmt);
+              printf    ("  input \"%s\"\n", data[i].input);
+              printf    ("  ret   want=%d\n", want_ret);
+              printf    ("        got =%d\n", got_ret);
+              mpq_trace ("  value want", want);
+              mpq_trace ("        got ", got);
+              printf    ("  upto  want=%d\n", want_upto);
+              printf    ("        got =%d\n", got_upto);
+              if (got_ftell != -1)
+                {
+                  printf    ("  ftell want =%ld\n", want_ftell);
+                  printf    ("        got  =%ld\n", got_ftell);
+                }
+              abort ();
+            }
+        }
+    }
+
+  mpq_clear (got);
+  mpq_clear (want);
+}
+
+void
+check_f (void)
+{
+  static const struct {
+    const char  *fmt;
+    const char  *input;
+    const char  *want;
+    int         ret;
+    long        ftell;    /* or -1 for length of input string */
+
+  } data[] = {
+
+    { "%Ff",    "0",    "0", 1, -1 },
+    { "%Fe",    "0",    "0", 1, -1 },
+    { "%FE",    "0",    "0", 1, -1 },
+    { "%Fg",    "0",    "0", 1, -1 },
+    { "%FG",    "0",    "0", 1, -1 },
+
+    { "%Ff",  "123",    "123", 1, -1 },
+    { "%Ff", "+123",    "123", 1, -1 },
+    { "%Ff", "-123",   "-123", 1, -1 },
+    { "%Ff",  "123.",   "123", 1, -1 },
+    { "%Ff", "+123.",   "123", 1, -1 },
+    { "%Ff", "-123.",  "-123", 1, -1 },
+    { "%Ff",  "123.0",  "123", 1, -1 },
+    { "%Ff", "+123.0",  "123", 1, -1 },
+    { "%Ff", "-123.0", "-123", 1, -1 },
+    { "%Ff",  "0123",   "123", 1, -1 },
+    { "%Ff", "-0123",  "-123", 1, -1 },
+
+    { "%Ff",  "123.456e3",   "123456", 1, -1 },
+    { "%Ff", "-123.456e3",  "-123456", 1, -1 },
+    { "%Ff",  "123.456e+3",  "123456", 1, -1 },
+    { "%Ff", "-123.456e+3", "-123456", 1, -1 },
+    { "%Ff",  "123000e-3",      "123", 1, -1 },
+    { "%Ff", "-123000e-3",     "-123", 1, -1 },
+    { "%Ff",  "123000.e-3",     "123", 1, -1 },
+    { "%Ff", "-123000.e-3",    "-123", 1, -1 },
+
+    { "%Ff",  "123.456E3",   "123456", 1, -1 },
+    { "%Ff", "-123.456E3",  "-123456", 1, -1 },
+    { "%Ff",  "123.456E+3",  "123456", 1, -1 },
+    { "%Ff", "-123.456E+3", "-123456", 1, -1 },
+    { "%Ff",  "123000E-3",      "123", 1, -1 },
+    { "%Ff", "-123000E-3",     "-123", 1, -1 },
+    { "%Ff",  "123000.E-3",     "123", 1, -1 },
+    { "%Ff", "-123000.E-3",    "-123", 1, -1 },
+
+    { "%Ff",  ".456e3",   "456", 1, -1 },
+    { "%Ff", "-.456e3",  "-456", 1, -1 },
+    { "%Ff",  ".456e+3",  "456", 1, -1 },
+    { "%Ff", "-.456e+3", "-456", 1, -1 },
+
+    { "%Ff",    " 0",    "0", 1, -1 },
+    { "%Ff",   "  0",    "0", 1, -1 },
+    { "%Ff",  "   0",    "0", 1, -1 },
+    { "%Ff",   "\t0",    "0", 1, -1 },
+    { "%Ff", "\t\t0",    "0", 1, -1 },
+
+    { "hello%Fg",      "hello0",       "0",   1, -1 },
+    { "hello%Fg",      "hello 0",      "0",   1, -1 },
+    { "hello%Fg",      "hello \t0",    "0",   1, -1 },
+    { "hello%Fgworld", "hello 0world", "0",   1, -1 },
+    { "hello%Fg",      "hello3.0",     "3.0", 1, -1 },
+
+    { "hello%*Fg",      "hello0",        "-999", 0, -1 },
+    { "hello%*Fg",      "hello 0",       "-999", 0, -1 },
+    { "hello%*Fg",      "hello \t0",     "-999", 0, -1 },
+    { "hello%*Fgworld", "hello 0world",  "-999", 0, -1 },
+    { "hello%*Fgworld", "hello3.0world", "-999", 0, -1 },
+
+    { "%Ff",     "",   "-999", -1, -1 },
+    { "%Ff",    " ",   "-999", -1, -1 },
+    { "%Ff",   "\t",   "-999", -1, -1 },
+    { "%Ff",  " \t",   "-999", -1, -1 },
+    { " %Ff",    "",   "-999", -1, -1 },
+    { "xyz%Ff",  "",   "-999", -1, -1 },
+
+    { "%*Ff",    "",   "-999", -1, -1 },
+    { " %*Ff",   "",   "-999", -1, -1 },
+    { "xyz%*Ff", "",   "-999", -1, -1 },
+
+    { "%Ff",    "xyz", "0", 0 },
+
+    /* various non-empty but invalid */
+    { "%Ff",    "-",      "-999",  0, 1 },
+    { "%Ff",    "+",      "-999",  0, 1 },
+    { "xyz%Ff", "xyz-",   "-999",  0, 4 },
+    { "xyz%Ff", "xyz+",   "-999",  0, 4 },
+    { "%Ff",    "-.",     "-999",  0, 2 },
+    { "%Ff",    "+.",     "-999",  0, 2 },
+    { "%Ff",    ".e",     "-999",  0, 1 },
+    { "%Ff",   "-.e",     "-999",  0, 2 },
+    { "%Ff",   "+.e",     "-999",  0, 2 },
+    { "%Ff",    ".E",     "-999",  0, 1 },
+    { "%Ff",   "-.E",     "-999",  0, 2 },
+    { "%Ff",   "+.E",     "-999",  0, 2 },
+    { "%Ff",    ".e123",  "-999",  0, 1 },
+    { "%Ff",   "-.e123",  "-999",  0, 2 },
+    { "%Ff",   "+.e123",  "-999",  0, 2 },
+    { "%Ff",    "123e",   "-999",  0, 4 },
+    { "%Ff",   "-123e",   "-999",  0, 5 },
+    { "%Ff",    "123e-",  "-999",  0, 5 },
+    { "%Ff",   "-123e-",  "-999",  0, 6 },
+    { "%Ff",    "123e+",  "-999",  0, 5 },
+    { "%Ff",   "-123e+",  "-999",  0, 6 },
+    { "%Ff",   "123e-Z",  "-999",  0, 5 },
+
+    /* hex floats */
+    { "%Ff", "0x123p0",       "291",  1, -1 },
+    { "%Ff", "0x123P0",       "291",  1, -1 },
+    { "%Ff", "0X123p0",       "291",  1, -1 },
+    { "%Ff", "0X123P0",       "291",  1, -1 },
+    { "%Ff", "-0x123p0",     "-291",  1, -1 },
+    { "%Ff", "+0x123p0",      "291",  1, -1 },
+    { "%Ff", "0x123.p0",      "291",  1, -1 },
+    { "%Ff", "0x12.3p4",      "291",  1, -1 },
+    { "%Ff", "-0x12.3p4",    "-291",  1, -1 },
+    { "%Ff", "+0x12.3p4",     "291",  1, -1 },
+    { "%Ff", "0x1230p-4",     "291",  1, -1 },
+    { "%Ff", "-0x1230p-4",   "-291",  1, -1 },
+    { "%Ff", "+0x1230p-4",    "291",  1, -1 },
+    { "%Ff", "+0x.1230p12",   "291",  1, -1 },
+    { "%Ff", "+0x123000p-12", "291",  1, -1 },
+    { "%Ff", "0x123 p12",     "291",  1, 5 },
+    { "%Ff", "0x9 9",           "9",  1, 3 },
+    { "%Ff", "0x01",            "1",  1, 4 },
+    { "%Ff", "0x23",           "35",  1, 4 },
+    { "%Ff", "0x45",           "69",  1, 4 },
+    { "%Ff", "0x67",          "103",  1, 4 },
+    { "%Ff", "0x89",          "137",  1, 4 },
+    { "%Ff", "0xAB",          "171",  1, 4 },
+    { "%Ff", "0xCD",          "205",  1, 4 },
+    { "%Ff", "0xEF",          "239",  1, 4 },
+    { "%Ff", "0xab",          "171",  1, 4 },
+    { "%Ff", "0xcd",          "205",  1, 4 },
+    { "%Ff", "0xef",          "239",  1, 4 },
+    { "%Ff", "0x100p0A",      "256",  1, 7 },
+    { "%Ff", "0x1p9",         "512",  1, -1 },
+
+    /* invalid hex floats */
+    { "%Ff", "0x",     "-999",  0, 2 },
+    { "%Ff", "-0x",    "-999",  0, 3 },
+    { "%Ff", "+0x",    "-999",  0, 3 },
+    { "%Ff", "0x-",    "-999",  0, 2 },
+    { "%Ff", "0x+",    "-999",  0, 2 },
+    { "%Ff", "0x.",    "-999",  0, 3 },
+    { "%Ff", "-0x.",   "-999",  0, 4 },
+    { "%Ff", "+0x.",   "-999",  0, 4 },
+    { "%Ff", "0x.p",   "-999",  0, 3 },
+    { "%Ff", "-0x.p",  "-999",  0, 4 },
+    { "%Ff", "+0x.p",  "-999",  0, 4 },
+    { "%Ff", "0x.P",   "-999",  0, 3 },
+    { "%Ff", "-0x.P",  "-999",  0, 4 },
+    { "%Ff", "+0x.P",  "-999",  0, 4 },
+    { "%Ff", ".p123",  "-999",  0, 1 },
+    { "%Ff", "-.p123", "-999",  0, 2 },
+    { "%Ff", "+.p123", "-999",  0, 2 },
+    { "%Ff", "0x1p",   "-999",  0, 4 },
+    { "%Ff", "0x1p-",  "-999",  0, 5 },
+    { "%Ff", "0x1p+",  "-999",  0, 5 },
+    { "%Ff", "0x123p 12", "291",  0, 6 },
+    { "%Ff", "0x 123p12", "291",  0, 2 },
+
+  };
+
+  int         i, j, ignore, got_ret, want_ret, got_upto, want_upto;
+  mpf_t       got, want;
+  double      got_d;
+  long        want_ftell;
+  int         error = 0;
+  fun_t       fun;
+  const char  *name;
+  char        fmt[128];
+
+  mpf_init (got);
+  mpf_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (want, data[i].want, 10);
+
+      ASSERT_ALWAYS (strlen (data[i].fmt) + 2 < sizeof (fmt));
+      strcpy (fmt, data[i].fmt);
+      strcat (fmt, "%n");
+
+      ignore = (strchr (fmt, '*') != NULL);
+
+      for (j = 0; j <= 3; j++)
+        {
+          want_ret = data[i].ret;
+
+          want_ftell = data[i].ftell;
+          if (want_ftell == -1)
+            want_ftell = strlen (data[i].input);
+          want_upto = want_ftell;
+
+          if (want_ret == -1 || (want_ret == 0 && ! ignore))
+            want_upto = -555;
+
+          switch (j) {
+          case 0:
+            name = "gmp_sscanf";
+            fun = fun_gmp_sscanf;
+            break;
+          case 1:
+            name = "gmp_fscanf";
+            fun = fun_gmp_fscanf;
+            break;
+          case 2:
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard sscanf";
+            fun = fun_sscanf;
+            break;
+          case 3:
+            if (! libc_scanf_convert (fmt))
+              continue;
+            name = "standard fscanf";
+            fun = fun_fscanf;
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          got_upto = -555;
+          got_ftell = -1;
+
+          switch (j) {
+          case 0:
+          case 1:
+            mpf_set_si (got, -999L);
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, got, &got_upto);
+            break;
+          case 2:
+          case 3:
+            got_d = -999L;
+            if (ignore)
+              got_ret = (*fun) (data[i].input, fmt, &got_upto, NULL);
+            else
+              got_ret = (*fun) (data[i].input, fmt, &got_d, &got_upto);
+            mpf_set_d (got, got_d);
+            break;
+          default:
+            ASSERT_ALWAYS (0);
+            break;
+          }
+
+          MPF_CHECK_FORMAT (got);
+
+          if (got_ret != want_ret)
+            {
+              printf ("%s wrong return value\n", name);
+              error = 1;
+            }
+          if (want_ret == 1 && mpf_cmp (want, got) != 0)
+            {
+              printf ("%s wrong result\n", name);
+              error = 1;
+            }
+          if (got_upto != want_upto)
+            {
+              printf ("%s wrong upto\n", name);
+              error = 1;
+            }
+          if (got_ftell != -1 && want_ftell != -1 && got_ftell != want_ftell)
+            {
+              printf ("%s wrong ftell\n", name);
+              error = 1;
+            }
+          if (error)
+            {
+              printf    ("  fmt   \"%s\"\n", data[i].fmt);
+              printf    ("  input \"%s\"\n", data[i].input);
+              printf    ("  ret   want=%d\n", want_ret);
+              printf    ("        got =%d\n", got_ret);
+              mpf_trace ("  value want", want);
+              mpf_trace ("        got ", got);
+              printf    ("  upto  want=%d\n", want_upto);
+              printf    ("        got =%d\n", got_upto);
+              if (got_ftell != -1)
+                {
+                  printf    ("  ftell want =%ld\n", want_ftell);
+                  printf    ("        got  =%ld\n", got_ftell);
+                }
+              abort ();
+            }
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+
+void
+check_n (void)
+{
+  int    ret;
+
+  /* %n suppressed */
+  {
+    int n = 123;
+    gmp_sscanf ("   ", " %*n", &n);
+    ASSERT_ALWAYS (n == 123);
+  }
+  {
+    int n = 123;
+    fromstring_gmp_fscanf ("   ", " %*n", &n);
+    ASSERT_ALWAYS (n == 123);
+  }
+
+
+#define CHECK_N(type, string)                           \
+  do {                                                  \
+    type  x[2];                                         \
+    char  fmt[128];                                     \
+    int   ret;                                          \
+                                                        \
+    x[0] = ~ (type) 0;                                  \
+    x[1] = ~ (type) 0;                                  \
+    sprintf (fmt, "abc%%%sn", string);                  \
+    ret = gmp_sscanf ("abc", fmt, &x[0]);               \
+                                                        \
+    ASSERT_ALWAYS (ret == 0);                           \
+                                                        \
+    /* should write whole of x[0] and none of x[1] */   \
+    ASSERT_ALWAYS (x[0] == 3);                          \
+    ASSERT_ALWAYS (x[1] == (type) ~ (type) 0);         \
+                                                        \
+  } while (0)
+
+  CHECK_N (char,      "hh");
+  CHECK_N (long,      "l");
+#if HAVE_LONG_LONG
+  CHECK_N (long long, "L");
+#endif
+#if HAVE_INTMAX_T
+  CHECK_N (intmax_t,  "j");
+#endif
+#if HAVE_PTRDIFF_T
+  CHECK_N (ptrdiff_t, "t");
+#endif
+  CHECK_N (short,     "h");
+  CHECK_N (size_t,    "z");
+
+  /* %Zn */
+  {
+    mpz_t  x[2];
+    mpz_init_set_si (x[0], -987L);
+    mpz_init_set_si (x[1],  654L);
+    ret = gmp_sscanf ("xyz   ", "xyz%Zn", x[0]);
+    MPZ_CHECK_FORMAT (x[0]);
+    MPZ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x[1], 654L) == 0);
+    mpz_clear (x[0]);
+    mpz_clear (x[1]);
+  }
+  {
+    mpz_t  x;
+    mpz_init (x);
+    ret = fromstring_gmp_fscanf ("xyz   ", "xyz%Zn", x);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpz_cmp_ui (x, 3L) == 0);
+    mpz_clear (x);
+  }
+
+  /* %Qn */
+  {
+    mpq_t  x[2];
+    mpq_init (x[0]);
+    mpq_init (x[1]);
+    mpq_set_ui (x[0], 987L, 654L);
+    mpq_set_ui (x[1], 4115L, 226L);
+    ret = gmp_sscanf ("xyz   ", "xyz%Qn", x[0]);
+    MPQ_CHECK_FORMAT (x[0]);
+    MPQ_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[0], 3L, 1L) == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x[1], 4115L, 226L) == 0);
+    mpq_clear (x[0]);
+    mpq_clear (x[1]);
+  }
+  {
+    mpq_t  x;
+    mpq_init (x);
+    ret = fromstring_gmp_fscanf ("xyz   ", "xyz%Qn", x);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpq_cmp_ui (x, 3L, 1L) == 0);
+    mpq_clear (x);
+  }
+
+  /* %Fn */
+  {
+    mpf_t  x[2];
+    mpf_init (x[0]);
+    mpf_init (x[1]);
+    mpf_set_ui (x[0], 987L);
+    mpf_set_ui (x[1], 654L);
+    ret = gmp_sscanf ("xyz   ", "xyz%Fn", x[0]);
+    MPF_CHECK_FORMAT (x[0]);
+    MPF_CHECK_FORMAT (x[1]);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[0], 3L) == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x[1], 654L) == 0);
+    mpf_clear (x[0]);
+    mpf_clear (x[1]);
+  }
+  {
+    mpf_t  x;
+    mpf_init (x);
+    ret = fromstring_gmp_fscanf ("xyz   ", "xyz%Fn", x);
+    ASSERT_ALWAYS (ret == 0);
+    ASSERT_ALWAYS (mpf_cmp_ui (x, 3L) == 0);
+    mpf_clear (x);
+  }
+}
+
+
+void
+check_misc (void)
+{
+  int  ret, cmp;
+  {
+    int  a=9, b=8, c=7, n=66;
+    mpz_t  z;
+    mpz_init (z);
+    ret = gmp_sscanf ("1 2 3 4", "%d %d %d %Zd%n",
+                      &a, &b, &c, z, &n);
+    ASSERT_ALWAYS (ret == 4);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (b == 2);
+    ASSERT_ALWAYS (c == 3);
+    ASSERT_ALWAYS (n == 7);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    mpz_clear (z);
+  }
+  {
+    int  a=9, b=8, c=7, n=66;
+    mpz_t  z;
+    mpz_init (z);
+    ret = fromstring_gmp_fscanf ("1 2 3 4", "%d %d %d %Zd%n",
+                                 &a, &b, &c, z, &n);
+    ASSERT_ALWAYS (ret == 4);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (b == 2);
+    ASSERT_ALWAYS (c == 3);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    ASSERT_ALWAYS (n == 7);
+    ASSERT_ALWAYS (got_ftell == 7);
+    mpz_clear (z);
+  }
+
+  {
+    int  a=9, n=8;
+    mpz_t  z;
+    mpz_init (z);
+    ret = gmp_sscanf ("1 2 3 4", "%d %*d %*d %Zd%n", &a, z, &n);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    ASSERT_ALWAYS (n == 7);
+    mpz_clear (z);
+  }
+  {
+    int  a=9, n=8;
+    mpz_t  z;
+    mpz_init (z);
+    ret = fromstring_gmp_fscanf ("1 2 3 4", "%d %*d %*d %Zd%n",
+                                 &a, z, &n);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (a == 1);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 4L) == 0);
+    ASSERT_ALWAYS (n == 7);
+    ASSERT_ALWAYS (got_ftell == 7);
+    mpz_clear (z);
+  }
+
+  /* EOF for no matching */
+  {
+    char buf[128];
+    ret = gmp_sscanf ("   ", "%s", buf);
+    ASSERT_ALWAYS (ret == EOF);
+    ret = fromstring_gmp_fscanf ("   ", "%s", buf);
+    ASSERT_ALWAYS (ret == EOF);
+    if (option_libc_scanf)
+      {
+        ret = sscanf ("   ", "%s", buf);
+        ASSERT_ALWAYS (ret == EOF);
+        ret = fun_fscanf ("   ", "%s", buf, NULL);
+        ASSERT_ALWAYS (ret == EOF);
+      }
+  }
+
+  /* suppressed field, then eof */
+  {
+    int  x;
+    if (test_sscanf_eof_ok ())
+      {
+        ret = gmp_sscanf ("123", "%*d%d", &x);
+        ASSERT_ALWAYS (ret == EOF);
+      }
+    ret = fromstring_gmp_fscanf ("123", "%*d%d", &x);
+    ASSERT_ALWAYS (ret == EOF);
+    if (option_libc_scanf)
+      {
+        ret = sscanf ("123", "%*d%d", &x);
+        ASSERT_ALWAYS (ret == EOF);
+        ret = fun_fscanf ("123", "%*d%d", &x, NULL);
+        ASSERT_ALWAYS (ret == EOF);
+      }
+  }
+  {
+    mpz_t  x;
+    mpz_init (x);
+    ret = gmp_sscanf ("123", "%*Zd%Zd", x);
+    ASSERT_ALWAYS (ret == EOF);
+    ret = fromstring_gmp_fscanf ("123", "%*Zd%Zd", x);
+    ASSERT_ALWAYS (ret == EOF);
+    mpz_clear (x);
+  }
+
+  /* %[...], glibc only */
+#ifdef __GLIBC__
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("abcdefgh", "%[a-d]ef%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "abcd");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 6);
+  }
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("xyza", "%[^a]a%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "xyz");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 4);
+  }
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("ab]ab]", "%[]ab]%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "ab]ab]");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 6);
+  }
+  {
+    char  buf[128];
+    int   n = -1;
+    buf[0] = '\0';
+    ret = gmp_sscanf ("xyzb", "%[^]ab]b%n", buf, &n);
+    ASSERT_ALWAYS (ret == 1);
+    cmp = strcmp (buf, "xyz");
+    ASSERT_ALWAYS (cmp == 0);
+    ASSERT_ALWAYS (n == 4);
+  }
+#endif
+
+  /* %zd etc won't be accepted by sscanf on old systems, and running
+     something to see if they work might be bad, so only try it on glibc,
+     and only on a new enough version (glibc 2.0 doesn't have %zd) */
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 0)
+  {
+    mpz_t   z;
+    size_t  s = -1;
+    mpz_init (z);
+    ret = gmp_sscanf ("456 789", "%zd %Zd", &s, z);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (s == 456);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);
+    mpz_clear (z);
+  }
+  {
+    mpz_t      z;
+    ptrdiff_t  d = -1;
+    mpz_init (z);
+    ret = gmp_sscanf ("456 789", "%td %Zd", &d, z);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (d == 456);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);
+    mpz_clear (z);
+  }
+  {
+    mpz_t      z;
+    long long  ll = -1;
+    mpz_init (z);
+    ret = gmp_sscanf ("456 789", "%Ld %Zd", &ll, z);
+    ASSERT_ALWAYS (ret == 2);
+    ASSERT_ALWAYS (ll == 456);
+    ASSERT_ALWAYS (mpz_cmp_ui (z, 789L) == 0);
+    mpz_clear (z);
+  }
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+  if (argc > 1 && strcmp (argv[1], "-s") == 0)
+    option_libc_scanf = 1;
+
+  tests_start ();
+
+  mp_trace_base = 16;
+
+  check_z ();
+  check_q ();
+  check_f ();
+  check_n ();
+  check_misc ();
+
+  unlink (TEMPFILE);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpbsd/Makefile.am b/tests/mpbsd/Makefile.am

new file mode 100644 (file)

index 0000000..f609a6a
--- /dev/null
+++ b/tests/mpbsd/Makefile.am
@@ -0,0 +1,35 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmp.la
+
+if WANT_MPBSD
+MPBSD_check_OPTION = allfuns t-itom t-mtox
+endif
+
+check_PROGRAMS = $(MPBSD_check_OPTION)
+TESTS = $(check_PROGRAMS)
+
+# check linking only against libmp
+allfuns_LDADD = $(top_builddir)/libmp.la
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/mpbsd/Makefile.in b/tests/mpbsd/Makefile.in

new file mode 100644 (file)

index 0000000..81107ec
--- /dev/null
+++ b/tests/mpbsd/Makefile.in
@@ -0,0 +1,650 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = $(am__EXEEXT_1)
+subdir = tests/mpbsd
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+@WANT_MPBSD_TRUE@am__EXEEXT_1 = allfuns$(EXEEXT) t-itom$(EXEEXT) \
+@WANT_MPBSD_TRUE@      t-mtox$(EXEEXT)
+allfuns_SOURCES = allfuns.c
+allfuns_OBJECTS = allfuns$U.$(OBJEXT)
+allfuns_DEPENDENCIES = $(top_builddir)/libmp.la
+t_itom_SOURCES = t-itom.c
+t_itom_OBJECTS = t-itom$U.$(OBJEXT)
+t_itom_LDADD = $(LDADD)
+t_itom_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libmp.la
+t_mtox_SOURCES = t-mtox.c
+t_mtox_OBJECTS = t-mtox$U.$(OBJEXT)
+t_mtox_LDADD = $(LDADD)
+t_mtox_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = allfuns.c t-itom.c t-mtox.c
+DIST_SOURCES = allfuns.c t-itom.c t-mtox.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmp.la
+@WANT_MPBSD_TRUE@MPBSD_check_OPTION = allfuns t-itom t-mtox
+TESTS = $(check_PROGRAMS)
+
+# check linking only against libmp
+allfuns_LDADD = $(top_builddir)/libmp.la
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpbsd/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/mpbsd/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+allfuns$(EXEEXT): $(allfuns_OBJECTS) $(allfuns_DEPENDENCIES) 
+       @rm -f allfuns$(EXEEXT)
+       $(LINK) $(allfuns_OBJECTS) $(allfuns_LDADD) $(LIBS)
+t-itom$(EXEEXT): $(t_itom_OBJECTS) $(t_itom_DEPENDENCIES) 
+       @rm -f t-itom$(EXEEXT)
+       $(LINK) $(t_itom_OBJECTS) $(t_itom_LDADD) $(LIBS)
+t-mtox$(EXEEXT): $(t_mtox_OBJECTS) $(t_mtox_DEPENDENCIES) 
+       @rm -f t-mtox$(EXEEXT)
+       $(LINK) $(t_mtox_OBJECTS) $(t_mtox_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+allfuns_.c: allfuns.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/allfuns.c; then echo $(srcdir)/allfuns.c; else echo allfuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-itom_.c: t-itom.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-itom.c; then echo $(srcdir)/t-itom.c; else echo t-itom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mtox_.c: t-mtox.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mtox.c; then echo $(srcdir)/t-mtox.c; else echo t-mtox.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+allfuns_.$(OBJEXT) allfuns_.lo t-itom_.$(OBJEXT) t-itom_.lo \
+t-mtox_.$(OBJEXT) t-mtox_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/mpbsd/allfuns.c b/tests/mpbsd/allfuns.c

new file mode 100644 (file)

index 0000000..2aa25d3
--- /dev/null
+++ b/tests/mpbsd/allfuns.c
@@ -0,0 +1,56 @@
+/* A test program doing nothing really, just linking to all the BSD MP
+   functions that're supposed to exist.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mp.h"
+
+int
+main (int argc, char *argv[])
+{
+  MINT *a, *b, *c, *d;
+  short  h;
+
+  mp_set_memory_functions (NULL, NULL, NULL);
+  a = itom (123);
+  b = xtom ("DEADBEEF");
+  c = itom (0);
+  d = itom (0);
+  move (a, b);
+  madd (a, b, c);
+  msub (a, b, c);
+  mult (a, b, c);
+  mdiv (b, a, c, d);
+  sdiv (b, 2, c, &h);
+  msqrt (a, c, d);
+  pow (b, a, a, c);
+  rpow (a, 3, c);
+  gcd (a, b, c);
+  mcmp (a, b);
+  if (argc > 1)
+    {
+      min (c);
+      mout (a);
+    }
+  mtox (b);
+  mfree(a);
+
+  exit (0);
+}
diff --git a/tests/mpbsd/t-itom.c b/tests/mpbsd/t-itom.c

new file mode 100644 (file)

index 0000000..9b49cb0
--- /dev/null
+++ b/tests/mpbsd/t-itom.c
@@ -0,0 +1,74 @@
+/* Test itom.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "mp.h"
+#include "tests.h"
+
+#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
+
+
+void
+check_data (void)
+{
+  static const struct {
+    short      m;
+    mp_size_t  want_size;
+    mp_limb_t  want_limb;
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, 1 },
+    { -1L, -1, 1 },
+
+    {  SHRT_MAX,  1,  SHRT_MAX },
+    { -SHRT_MAX, -1,  SHRT_MAX },
+    {  SHRT_MIN, -1, -SHRT_MIN },
+  };
+
+  MINT  *m;
+  int   i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      m = itom (data[i].m);
+      if (m->_mp_size != data[i].want_size
+         || (m->_mp_size != 0 && m->_mp_d[0] != data[i].want_limb))
+       {
+         printf ("itom wrong on data[%d]\n", i);
+         abort();
+       }
+      mfree (m);
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpbsd/t-mtox.c b/tests/mpbsd/t-mtox.c

new file mode 100644 (file)

index 0000000..1138e76
--- /dev/null
+++ b/tests/mpbsd/t-mtox.c
@@ -0,0 +1,86 @@
+/* Test mtox.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <string.h>            /* for strcmp, strlen */
+#include <stdlib.h>            /* for abort */
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "mp.h"
+#include "tests.h"
+
+
+void
+check_random (void)
+{
+  mpz_t  z;
+  int    i;
+  char   *got, *want;
+  gmp_randstate_ptr  rands = RANDS;
+
+  mpz_init (z);
+
+  for (i = 0; i < 1000; i++)
+    {
+      mpz_erandomb (z, rands, 6 * GMP_LIMB_BITS);
+      got = mtox (z);
+      want = mpz_get_str (NULL, 16, z);
+      if (strcmp (got, want) != 0)
+        {
+          printf ("mtox wrong result\n");
+          printf ("  got  \"%s\"\n", got);
+          printf ("  want \"%s\"\n", want);
+          abort ();
+        }
+      (*__gmp_free_func) (got, strlen (got) + 1);
+      (*__gmp_free_func) (want, strlen (want) + 1);
+    }
+
+  mpz_clear (z);
+}
+
+void
+check_mem (void)
+{
+  MINT  *m;
+  char  *s;
+
+  m = itom (0);
+  s = mtox (m);
+  if (! tests_memory_valid (s))
+    {
+      printf ("Skipping t-mtox, cannot test libgmp and libmp memory together\n");
+      exit (0);
+    }
+  mfree (m);
+  (*__gmp_free_func) (s, strlen (s) + 1);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_mem ();
+  check_random ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/Makefile.am b/tests/mpf/Makefile.am

new file mode 100644 (file)

index 0000000..dece8f7
--- /dev/null
+++ b/tests/mpf/Makefile.am
@@ -0,0 +1,32 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-add t-sub t-conv t-sqrt t-sqrt_ui t-muldiv t-dm2exp reuse \
+  t-cmp_d t-cmp_si t-div t-fits t-get_d t-get_d_2exp \
+  t-get_si t-get_ui t-gsprec t-inp_str t-int_p t-mul_ui \
+  t-set t-set_q t-set_si t-set_ui t-trunc t-ui_div t-eq
+TESTS = $(check_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/mpf/Makefile.in b/tests/mpf/Makefile.in

new file mode 100644 (file)

index 0000000..f56e7de
--- /dev/null
+++ b/tests/mpf/Makefile.in
@@ -0,0 +1,915 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = t-add$(EXEEXT) t-sub$(EXEEXT) t-conv$(EXEEXT) \
+       t-sqrt$(EXEEXT) t-sqrt_ui$(EXEEXT) t-muldiv$(EXEEXT) \
+       t-dm2exp$(EXEEXT) reuse$(EXEEXT) t-cmp_d$(EXEEXT) \
+       t-cmp_si$(EXEEXT) t-div$(EXEEXT) t-fits$(EXEEXT) \
+       t-get_d$(EXEEXT) t-get_d_2exp$(EXEEXT) t-get_si$(EXEEXT) \
+       t-get_ui$(EXEEXT) t-gsprec$(EXEEXT) t-inp_str$(EXEEXT) \
+       t-int_p$(EXEEXT) t-mul_ui$(EXEEXT) t-set$(EXEEXT) \
+       t-set_q$(EXEEXT) t-set_si$(EXEEXT) t-set_ui$(EXEEXT) \
+       t-trunc$(EXEEXT) t-ui_div$(EXEEXT) t-eq$(EXEEXT)
+subdir = tests/mpf
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+reuse_SOURCES = reuse.c
+reuse_OBJECTS = reuse$U.$(OBJEXT)
+reuse_LDADD = $(LDADD)
+reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_add_SOURCES = t-add.c
+t_add_OBJECTS = t-add$U.$(OBJEXT)
+t_add_LDADD = $(LDADD)
+t_add_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_d_SOURCES = t-cmp_d.c
+t_cmp_d_OBJECTS = t-cmp_d$U.$(OBJEXT)
+t_cmp_d_LDADD = $(LDADD)
+t_cmp_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_si_SOURCES = t-cmp_si.c
+t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_LDADD = $(LDADD)
+t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_conv_SOURCES = t-conv.c
+t_conv_OBJECTS = t-conv$U.$(OBJEXT)
+t_conv_LDADD = $(LDADD)
+t_conv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_div_SOURCES = t-div.c
+t_div_OBJECTS = t-div$U.$(OBJEXT)
+t_div_LDADD = $(LDADD)
+t_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_dm2exp_SOURCES = t-dm2exp.c
+t_dm2exp_OBJECTS = t-dm2exp$U.$(OBJEXT)
+t_dm2exp_LDADD = $(LDADD)
+t_dm2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_eq_SOURCES = t-eq.c
+t_eq_OBJECTS = t-eq$U.$(OBJEXT)
+t_eq_LDADD = $(LDADD)
+t_eq_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fits_SOURCES = t-fits.c
+t_fits_OBJECTS = t-fits$U.$(OBJEXT)
+t_fits_LDADD = $(LDADD)
+t_fits_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_d_SOURCES = t-get_d.c
+t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_LDADD = $(LDADD)
+t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_d_2exp_SOURCES = t-get_d_2exp.c
+t_get_d_2exp_OBJECTS = t-get_d_2exp$U.$(OBJEXT)
+t_get_d_2exp_LDADD = $(LDADD)
+t_get_d_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_si_SOURCES = t-get_si.c
+t_get_si_OBJECTS = t-get_si$U.$(OBJEXT)
+t_get_si_LDADD = $(LDADD)
+t_get_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_ui_SOURCES = t-get_ui.c
+t_get_ui_OBJECTS = t-get_ui$U.$(OBJEXT)
+t_get_ui_LDADD = $(LDADD)
+t_get_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_gsprec_SOURCES = t-gsprec.c
+t_gsprec_OBJECTS = t-gsprec$U.$(OBJEXT)
+t_gsprec_LDADD = $(LDADD)
+t_gsprec_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_inp_str_SOURCES = t-inp_str.c
+t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_LDADD = $(LDADD)
+t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_int_p_SOURCES = t-int_p.c
+t_int_p_OBJECTS = t-int_p$U.$(OBJEXT)
+t_int_p_LDADD = $(LDADD)
+t_int_p_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mul_ui_SOURCES = t-mul_ui.c
+t_mul_ui_OBJECTS = t-mul_ui$U.$(OBJEXT)
+t_mul_ui_LDADD = $(LDADD)
+t_mul_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_muldiv_SOURCES = t-muldiv.c
+t_muldiv_OBJECTS = t-muldiv$U.$(OBJEXT)
+t_muldiv_LDADD = $(LDADD)
+t_muldiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_SOURCES = t-set.c
+t_set_OBJECTS = t-set$U.$(OBJEXT)
+t_set_LDADD = $(LDADD)
+t_set_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_q_SOURCES = t-set_q.c
+t_set_q_OBJECTS = t-set_q$U.$(OBJEXT)
+t_set_q_LDADD = $(LDADD)
+t_set_q_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_si_SOURCES = t-set_si.c
+t_set_si_OBJECTS = t-set_si$U.$(OBJEXT)
+t_set_si_LDADD = $(LDADD)
+t_set_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_ui_SOURCES = t-set_ui.c
+t_set_ui_OBJECTS = t-set_ui$U.$(OBJEXT)
+t_set_ui_LDADD = $(LDADD)
+t_set_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_sqrt_SOURCES = t-sqrt.c
+t_sqrt_OBJECTS = t-sqrt$U.$(OBJEXT)
+t_sqrt_LDADD = $(LDADD)
+t_sqrt_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_sqrt_ui_SOURCES = t-sqrt_ui.c
+t_sqrt_ui_OBJECTS = t-sqrt_ui$U.$(OBJEXT)
+t_sqrt_ui_LDADD = $(LDADD)
+t_sqrt_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_sub_SOURCES = t-sub.c
+t_sub_OBJECTS = t-sub$U.$(OBJEXT)
+t_sub_LDADD = $(LDADD)
+t_sub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_trunc_SOURCES = t-trunc.c
+t_trunc_OBJECTS = t-trunc$U.$(OBJEXT)
+t_trunc_LDADD = $(LDADD)
+t_trunc_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_ui_div_SOURCES = t-ui_div.c
+t_ui_div_OBJECTS = t-ui_div$U.$(OBJEXT)
+t_ui_div_LDADD = $(LDADD)
+t_ui_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = reuse.c t-add.c t-cmp_d.c t-cmp_si.c t-conv.c t-div.c \
+       t-dm2exp.c t-eq.c t-fits.c t-get_d.c t-get_d_2exp.c t-get_si.c \
+       t-get_ui.c t-gsprec.c t-inp_str.c t-int_p.c t-mul_ui.c \
+       t-muldiv.c t-set.c t-set_q.c t-set_si.c t-set_ui.c t-sqrt.c \
+       t-sqrt_ui.c t-sub.c t-trunc.c t-ui_div.c
+DIST_SOURCES = reuse.c t-add.c t-cmp_d.c t-cmp_si.c t-conv.c t-div.c \
+       t-dm2exp.c t-eq.c t-fits.c t-get_d.c t-get_d_2exp.c t-get_si.c \
+       t-get_ui.c t-gsprec.c t-inp_str.c t-int_p.c t-mul_ui.c \
+       t-muldiv.c t-set.c t-set_q.c t-set_si.c t-set_ui.c t-sqrt.c \
+       t-sqrt_ui.c t-sub.c t-trunc.c t-ui_div.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+TESTS = $(check_PROGRAMS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpf/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/mpf/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) 
+       @rm -f reuse$(EXEEXT)
+       $(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
+t-add$(EXEEXT): $(t_add_OBJECTS) $(t_add_DEPENDENCIES) 
+       @rm -f t-add$(EXEEXT)
+       $(LINK) $(t_add_OBJECTS) $(t_add_LDADD) $(LIBS)
+t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) 
+       @rm -f t-cmp_d$(EXEEXT)
+       $(LINK) $(t_cmp_d_OBJECTS) $(t_cmp_d_LDADD) $(LIBS)
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) 
+       @rm -f t-cmp_si$(EXEEXT)
+       $(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
+t-conv$(EXEEXT): $(t_conv_OBJECTS) $(t_conv_DEPENDENCIES) 
+       @rm -f t-conv$(EXEEXT)
+       $(LINK) $(t_conv_OBJECTS) $(t_conv_LDADD) $(LIBS)
+t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) 
+       @rm -f t-div$(EXEEXT)
+       $(LINK) $(t_div_OBJECTS) $(t_div_LDADD) $(LIBS)
+t-dm2exp$(EXEEXT): $(t_dm2exp_OBJECTS) $(t_dm2exp_DEPENDENCIES) 
+       @rm -f t-dm2exp$(EXEEXT)
+       $(LINK) $(t_dm2exp_OBJECTS) $(t_dm2exp_LDADD) $(LIBS)
+t-eq$(EXEEXT): $(t_eq_OBJECTS) $(t_eq_DEPENDENCIES) 
+       @rm -f t-eq$(EXEEXT)
+       $(LINK) $(t_eq_OBJECTS) $(t_eq_LDADD) $(LIBS)
+t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) 
+       @rm -f t-fits$(EXEEXT)
+       $(LINK) $(t_fits_OBJECTS) $(t_fits_LDADD) $(LIBS)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+       @rm -f t-get_d$(EXEEXT)
+       $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
+t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) 
+       @rm -f t-get_d_2exp$(EXEEXT)
+       $(LINK) $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_LDADD) $(LIBS)
+t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) 
+       @rm -f t-get_si$(EXEEXT)
+       $(LINK) $(t_get_si_OBJECTS) $(t_get_si_LDADD) $(LIBS)
+t-get_ui$(EXEEXT): $(t_get_ui_OBJECTS) $(t_get_ui_DEPENDENCIES) 
+       @rm -f t-get_ui$(EXEEXT)
+       $(LINK) $(t_get_ui_OBJECTS) $(t_get_ui_LDADD) $(LIBS)
+t-gsprec$(EXEEXT): $(t_gsprec_OBJECTS) $(t_gsprec_DEPENDENCIES) 
+       @rm -f t-gsprec$(EXEEXT)
+       $(LINK) $(t_gsprec_OBJECTS) $(t_gsprec_LDADD) $(LIBS)
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) 
+       @rm -f t-inp_str$(EXEEXT)
+       $(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
+t-int_p$(EXEEXT): $(t_int_p_OBJECTS) $(t_int_p_DEPENDENCIES) 
+       @rm -f t-int_p$(EXEEXT)
+       $(LINK) $(t_int_p_OBJECTS) $(t_int_p_LDADD) $(LIBS)
+t-mul_ui$(EXEEXT): $(t_mul_ui_OBJECTS) $(t_mul_ui_DEPENDENCIES) 
+       @rm -f t-mul_ui$(EXEEXT)
+       $(LINK) $(t_mul_ui_OBJECTS) $(t_mul_ui_LDADD) $(LIBS)
+t-muldiv$(EXEEXT): $(t_muldiv_OBJECTS) $(t_muldiv_DEPENDENCIES) 
+       @rm -f t-muldiv$(EXEEXT)
+       $(LINK) $(t_muldiv_OBJECTS) $(t_muldiv_LDADD) $(LIBS)
+t-set$(EXEEXT): $(t_set_OBJECTS) $(t_set_DEPENDENCIES) 
+       @rm -f t-set$(EXEEXT)
+       $(LINK) $(t_set_OBJECTS) $(t_set_LDADD) $(LIBS)
+t-set_q$(EXEEXT): $(t_set_q_OBJECTS) $(t_set_q_DEPENDENCIES) 
+       @rm -f t-set_q$(EXEEXT)
+       $(LINK) $(t_set_q_OBJECTS) $(t_set_q_LDADD) $(LIBS)
+t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) 
+       @rm -f t-set_si$(EXEEXT)
+       $(LINK) $(t_set_si_OBJECTS) $(t_set_si_LDADD) $(LIBS)
+t-set_ui$(EXEEXT): $(t_set_ui_OBJECTS) $(t_set_ui_DEPENDENCIES) 
+       @rm -f t-set_ui$(EXEEXT)
+       $(LINK) $(t_set_ui_OBJECTS) $(t_set_ui_LDADD) $(LIBS)
+t-sqrt$(EXEEXT): $(t_sqrt_OBJECTS) $(t_sqrt_DEPENDENCIES) 
+       @rm -f t-sqrt$(EXEEXT)
+       $(LINK) $(t_sqrt_OBJECTS) $(t_sqrt_LDADD) $(LIBS)
+t-sqrt_ui$(EXEEXT): $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_DEPENDENCIES) 
+       @rm -f t-sqrt_ui$(EXEEXT)
+       $(LINK) $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_LDADD) $(LIBS)
+t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) 
+       @rm -f t-sub$(EXEEXT)
+       $(LINK) $(t_sub_OBJECTS) $(t_sub_LDADD) $(LIBS)
+t-trunc$(EXEEXT): $(t_trunc_OBJECTS) $(t_trunc_DEPENDENCIES) 
+       @rm -f t-trunc$(EXEEXT)
+       $(LINK) $(t_trunc_OBJECTS) $(t_trunc_LDADD) $(LIBS)
+t-ui_div$(EXEEXT): $(t_ui_div_OBJECTS) $(t_ui_div_DEPENDENCIES) 
+       @rm -f t-ui_div$(EXEEXT)
+       $(LINK) $(t_ui_div_OBJECTS) $(t_ui_div_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+reuse_.c: reuse.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reuse.c; then echo $(srcdir)/reuse.c; else echo reuse.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-add_.c: t-add.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-add.c; then echo $(srcdir)/t-add.c; else echo t-add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_d_.c: t-cmp_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_d.c; then echo $(srcdir)/t-cmp_d.c; else echo t-cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-conv_.c: t-conv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-conv.c; then echo $(srcdir)/t-conv.c; else echo t-conv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-div_.c: t-div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div.c; then echo $(srcdir)/t-div.c; else echo t-div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-dm2exp_.c: t-dm2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-dm2exp.c; then echo $(srcdir)/t-dm2exp.c; else echo t-dm2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-eq_.c: t-eq.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-eq.c; then echo $(srcdir)/t-eq.c; else echo t-eq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fits_.c: t-fits.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fits.c; then echo $(srcdir)/t-fits.c; else echo t-fits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_d_.c: t-get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_d_2exp_.c: t-get_d_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d_2exp.c; then echo $(srcdir)/t-get_d_2exp.c; else echo t-get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_si_.c: t-get_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_si.c; then echo $(srcdir)/t-get_si.c; else echo t-get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_ui_.c: t-get_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_ui.c; then echo $(srcdir)/t-get_ui.c; else echo t-get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-gsprec_.c: t-gsprec.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gsprec.c; then echo $(srcdir)/t-gsprec.c; else echo t-gsprec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-int_p_.c: t-int_p.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-int_p.c; then echo $(srcdir)/t-int_p.c; else echo t-int_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mul_ui_.c: t-mul_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul_ui.c; then echo $(srcdir)/t-mul_ui.c; else echo t-mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-muldiv_.c: t-muldiv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-muldiv.c; then echo $(srcdir)/t-muldiv.c; else echo t-muldiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_.c: t-set.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set.c; then echo $(srcdir)/t-set.c; else echo t-set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_q_.c: t-set_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_q.c; then echo $(srcdir)/t-set_q.c; else echo t-set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_si_.c: t-set_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_si.c; then echo $(srcdir)/t-set_si.c; else echo t-set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_ui_.c: t-set_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_ui.c; then echo $(srcdir)/t-set_ui.c; else echo t-set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sqrt_.c: t-sqrt.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrt.c; then echo $(srcdir)/t-sqrt.c; else echo t-sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sqrt_ui_.c: t-sqrt_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrt_ui.c; then echo $(srcdir)/t-sqrt_ui.c; else echo t-sqrt_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sub_.c: t-sub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sub.c; then echo $(srcdir)/t-sub.c; else echo t-sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-trunc_.c: t-trunc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-trunc.c; then echo $(srcdir)/t-trunc.c; else echo t-trunc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-ui_div_.c: t-ui_div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-ui_div.c; then echo $(srcdir)/t-ui_div.c; else echo t-ui_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+reuse_.$(OBJEXT) reuse_.lo t-add_.$(OBJEXT) t-add_.lo \
+t-cmp_d_.$(OBJEXT) t-cmp_d_.lo t-cmp_si_.$(OBJEXT) t-cmp_si_.lo \
+t-conv_.$(OBJEXT) t-conv_.lo t-div_.$(OBJEXT) t-div_.lo \
+t-dm2exp_.$(OBJEXT) t-dm2exp_.lo t-eq_.$(OBJEXT) t-eq_.lo \
+t-fits_.$(OBJEXT) t-fits_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
+t-get_d_2exp_.$(OBJEXT) t-get_d_2exp_.lo t-get_si_.$(OBJEXT) \
+t-get_si_.lo t-get_ui_.$(OBJEXT) t-get_ui_.lo t-gsprec_.$(OBJEXT) \
+t-gsprec_.lo t-inp_str_.$(OBJEXT) t-inp_str_.lo t-int_p_.$(OBJEXT) \
+t-int_p_.lo t-mul_ui_.$(OBJEXT) t-mul_ui_.lo t-muldiv_.$(OBJEXT) \
+t-muldiv_.lo t-set_.$(OBJEXT) t-set_.lo t-set_q_.$(OBJEXT) t-set_q_.lo \
+t-set_si_.$(OBJEXT) t-set_si_.lo t-set_ui_.$(OBJEXT) t-set_ui_.lo \
+t-sqrt_.$(OBJEXT) t-sqrt_.lo t-sqrt_ui_.$(OBJEXT) t-sqrt_ui_.lo \
+t-sub_.$(OBJEXT) t-sub_.lo t-trunc_.$(OBJEXT) t-trunc_.lo \
+t-ui_div_.$(OBJEXT) t-ui_div_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/mpf/reuse.c b/tests/mpf/reuse.c

new file mode 100644 (file)

index 0000000..f373ac7
--- /dev/null
+++ b/tests/mpf/reuse.c
@@ -0,0 +1,211 @@
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpf_add etc can't be used as
+   initializers for global variables because they're effectively global
+   variables (function pointers) themselves.  Perhaps calling a test
+   function successively with mpf_add etc would be better.  */
+
+int
+main (void)
+{
+  printf ("Test suppressed for windows DLL\n");
+  exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+#ifndef EXPO
+#define EXPO 32
+#endif
+
+void dump_abort __GMP_PROTO ((char *, mpf_t, mpf_t));
+
+typedef void (*dss_func) __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+
+dss_func dss_funcs[] =
+{
+  mpf_div, mpf_add, mpf_mul, mpf_sub,
+};
+
+char *dss_func_names[] =
+{
+  "mpf_div", "mpf_add", "mpf_mul", "mpf_sub",
+};
+
+typedef void (*dsi_func) __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+
+dsi_func dsi_funcs[] =
+{
+  mpf_div_ui, mpf_add_ui, mpf_mul_ui, mpf_sub_ui,
+  mpf_mul_2exp, mpf_div_2exp
+};
+
+char *dsi_func_names[] =
+{
+  "mpf_div_ui", "mpf_add_ui", "mpf_mul_ui", "mpf_sub_ui",
+  "mpf_mul_2exp", "mpf_div_2exp"
+};
+
+typedef void (*dis_func) __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+
+dis_func dis_funcs[] =
+{
+  mpf_ui_div, mpf_ui_sub,
+};
+
+char *dis_func_names[] =
+{
+  "mpf_ui_div", "mpf_ui_sub",
+};
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int pass, reps = 10000;
+  mpf_t in1, in2, out1;
+  unsigned long int in1i, in2i;
+  mpf_t res1, res2, res3;
+  mp_size_t bprec = 100;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init (in1);
+  mpf_init (in2);
+  mpf_init (out1);
+  mpf_init (res1);
+  mpf_init (res2);
+  mpf_init (res3);
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+      mpf_random2 (in1, urandom () % SIZE - SIZE/2, urandom () % EXPO);
+      mpf_random2 (in2, urandom () % SIZE - SIZE/2, urandom () % EXPO);
+
+      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+       {
+         /* Don't divide by 0.  */
+         if (i == 0 && mpf_cmp_ui (in2, 0) == 0)
+           continue;
+
+         (dss_funcs[i]) (res1, in1, in2);
+
+         mpf_set (out1, in1);
+         (dss_funcs[i]) (out1, out1, in2);
+         mpf_set (res2, out1);
+
+         mpf_set (out1, in2);
+         (dss_funcs[i]) (out1, in1, out1);
+         mpf_set (res3, out1);
+
+         if (mpf_cmp (res1, res2) != 0)
+           dump_abort (dss_func_names[i], res1, res2);
+         if (mpf_cmp (res1, res3) != 0)
+           dump_abort (dss_func_names[i], res1, res3);
+       }
+
+      in2i = urandom ();
+      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+       {
+         /* Don't divide by 0.  */
+         if (strcmp (dsi_func_names[i], "mpf_div_ui") == 0 && in2i == 0)
+           continue;
+
+         (dsi_funcs[i]) (res1, in1, in2i);
+
+         mpf_set (out1, in1);
+         (dsi_funcs[i]) (out1, out1, in2i);
+         mpf_set (res2, out1);
+
+         if (mpf_cmp (res1, res2) != 0)
+           dump_abort (dsi_func_names[i], res1, res2);
+       }
+
+      in1i = urandom ();
+      for (i = 0; i < sizeof (dis_funcs) / sizeof (dis_func); i++)
+       {
+         /* Don't divide by 0.  */
+         if (strcmp (dis_func_names[i], "mpf_ui_div") == 0
+             && mpf_cmp_ui (in2, 0) == 0)
+           continue;
+
+         (dis_funcs[i]) (res1, in1i, in2);
+
+         mpf_set (out1, in2);
+         (dis_funcs[i]) (out1, in1i, out1);
+         mpf_set (res2, out1);
+
+         if (mpf_cmp (res1, res2) != 0)
+           dump_abort (dis_func_names[i], res1, res2);
+       }
+
+    }
+
+  mpf_clear (in1);
+  mpf_clear (in2);
+  mpf_clear (out1);
+  mpf_clear (res1);
+  mpf_clear (res2);
+  mpf_clear (res3);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (char *name, mpf_t res1, mpf_t res2)
+{
+  printf ("failure in %s:\n", name);
+  mpf_dump (res1);
+  mpf_dump (res2);
+  abort ();
+}
+
+#if 0
+void mpf_abs           __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+void mpf_sqrt          __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+void mpf_neg           __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+#endif
+
+#endif /* ! DLL_EXPORT */
diff --git a/tests/mpf/t-add.c b/tests/mpf/t-add.c

new file mode 100644 (file)

index 0000000..db0dbe5
--- /dev/null
+++ b/tests/mpf/t-add.c
@@ -0,0 +1,108 @@
+/* Test mpf_add.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 20000;
+  int i;
+  mpf_t u, v, w, wref;
+  mp_size_t bprec = 100;
+  mpf_t rerr, max_rerr, limit_rerr;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w);
+  mpf_init (wref);
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (u, size, exp);
+
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (v, size, exp);
+
+      mpf_add (w, u, v);
+      refmpf_add (wref, u, v);
+
+      mpf_reldiff (rerr, w, wref);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+       {
+         mpf_set (max_rerr, rerr);
+#if VERBOSE
+         mpf_dump (max_rerr);
+#endif
+         if (mpf_cmp (rerr, limit_rerr) > 0)
+           {
+             printf ("ERROR after %d tests\n", i);
+             printf ("   u = "); mpf_dump (u);
+             printf ("   v = "); mpf_dump (v);
+             printf ("wref = "); mpf_dump (wref);
+             printf ("   w = "); mpf_dump (w);
+             abort ();
+           }
+       }
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w);
+  mpf_clear (wref);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-cmp_d.c b/tests/mpf/t-cmp_d.c

new file mode 100644 (file)

index 0000000..6b5385f
--- /dev/null
+++ b/tests/mpf/t-cmp_d.c
@@ -0,0 +1,104 @@
+/* Test mpf_cmp_d.
+
+Copyright 2001, 2003, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)
+
+void
+check_one (const char *name, mpf_srcptr x, double y, int cmp)
+{
+  int   got;
+
+  got = mpf_cmp_d (x, y);
+  if (SGN(got) != cmp)
+    {
+      int i;
+      printf    ("mpf_cmp_d wrong (from %s)\n", name);
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmp);
+      mpf_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      mp_trace_base=-16;
+      mpf_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      printf    ("  y");
+      for (i = 0; i < sizeof(y); i++)
+        printf (" %02X", (unsigned) ((unsigned char *) &y)[i]);
+      printf ("\n");
+      abort ();
+    }
+}
+
+void
+check_infinity (void)
+{
+  mpf_t   x;
+  double  y = tests_infinity_d ();
+  if (y == 0.0)
+    return;
+
+  mpf_init (x);
+
+  /* 0 cmp inf */
+  mpf_set_ui (x, 0L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* 123 cmp inf */
+  mpf_set_ui (x, 123L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* -123 cmp inf */
+  mpf_set_si (x, -123L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* 2^5000 cmp inf */
+  mpf_set_ui (x, 1L);
+  mpf_mul_2exp (x, x, 5000L);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  /* -2^5000 cmp inf */
+  mpf_neg (x, x);
+  check_one ("check_infinity", x,  y, -1);
+  check_one ("check_infinity", x, -y,  1);
+
+  mpf_clear (x);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_infinity ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-cmp_si.c b/tests/mpf/t-cmp_si.c

new file mode 100644 (file)

index 0000000..e4b9514
--- /dev/null
+++ b/tests/mpf/t-cmp_si.c
@@ -0,0 +1,107 @@
+/* Test mpf_cmp_si.
+
+Copyright 2000, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
+
+void
+check_data (void)
+{
+  static const struct {
+    int         a_base;
+    const char  *a;
+    const char  *b;
+    int         want;
+  } data[] = {
+    { 10, "0",  "1", -1 },
+    { 10, "0",  "0",  0 },
+    { 10, "0", "-1",  1 },
+
+    { 10, "1",  "1", 0 },
+    { 10, "1",  "0", 1 },
+    { 10, "1", "-1", 1 },
+
+    { 10, "-1",  "1", -1 },
+    { 10, "-1",  "0", -1 },
+    { 10, "-1", "-1", 0 },
+
+    { 16,         "0", "-0x80000000",  1 },
+    { 16,  "80000000", "-0x80000000",  1 },
+    { 16,  "80000001", "-0x80000000",  1 },
+    { 16, "-80000000", "-0x80000000",  0 },
+    { 16, "-80000001", "-0x80000000", -1 },
+    { 16, "-FF0080000001", "-0x80000000", -1 },
+
+    { 16,                 "0", "-0x8000000000000000",  1 },
+    { 16,  "8000000000000000", "-0x8000000000000000",  1 },
+    { 16,  "8000000000000001", "-0x8000000000000000",  1 },
+    { 16, "-8000000000000000", "-0x8000000000000000",  0 },
+    { 16, "-8000000000000001", "-0x8000000000000000", -1 },
+    { 16, "-FF008000000000000001", "-0x8000000000000000", -1 },
+  };
+
+  mpf_t  a;
+  mpz_t  bz;
+  long   b;
+  int    got;
+  int    i;
+
+  mpf_init (a);
+  mpz_init (bz);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (a, data[i].a, data[i].a_base);
+      mpz_set_str_or_abort (bz, data[i].b, 0);
+
+      if (mpz_fits_slong_p (bz))
+        {
+          b = mpz_get_si (bz);
+          got = mpf_cmp_si (a, b);
+          if (SGN (got) != data[i].want)
+            {
+              printf ("mpf_cmp_si wrong on data[%d]\n", i);
+              printf ("  a="); mpf_out_str (stdout, 10, 0, a);
+              printf (" (%s)\n", data[i].a);
+              printf ("  b=%ld (%s)\n", b, data[i].b);
+              printf ("  got=%d\n", got);
+              printf ("  want=%d\n", data[i].want);
+              abort();
+            }
+        }
+    }
+
+  mpf_clear (a);
+  mpz_clear (bz);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-conv.c b/tests/mpf/t-conv.c

new file mode 100644 (file)

index 0000000..c151174
--- /dev/null
+++ b/tests/mpf/t-conv.c
@@ -0,0 +1,143 @@
+/* Test mpf_get_str and mpf_set_str.
+
+Copyright 1996, 2000, 2001, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 10
+#endif
+
+#ifndef EXPO
+#define EXPO 200
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpf_t x, y;
+  int reps = 20000;
+  int i;
+  mp_size_t bprec = 100;
+  mpf_t d, rerr, max_rerr, limit_rerr;
+  char *str;
+  mp_exp_t bexp;
+  long size, exp;
+  int base;
+  char buf[SIZE * GMP_LIMB_BITS + 5];
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (x);
+  mpf_init (y);
+  mpf_init (d);
+
+  /* First test some specific values.  */
+
+  mpf_set_str (y, "1.23456e1000", 0);
+
+  mpf_set_str (x, "1.23456e1000", 10);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "1.23456e+1000", 0);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+  mpf_set_str (x, "1.23456e+1000", 10);
+  if (mpf_cmp (x, y) != 0)
+    abort ();
+
+  /* Now test random values.  */
+
+  for (i = 0; i < reps; i++)
+    {
+      if (i == 0)
+        {
+          /* exercise the special case in get_str for for x==0 */
+          mpf_set_ui (x, 0L);
+          base = 10;
+        }
+      else
+        {
+          size = urandom () % (2 * SIZE) - SIZE;
+          exp = urandom () % EXPO;
+          mpf_random2 (x, size, exp);
+          base = urandom () % 61 + 2;
+        }
+
+      str = mpf_get_str (0, &bexp, base, 0, x);
+
+      if (str[0] == '-')
+       sprintf (buf, "-0.%s@%ld", str + 1, bexp);
+      else
+       sprintf (buf, "0.%s@%ld", str, bexp);
+
+      mpf_set_str_or_abort (y, buf, -base);
+      (*__gmp_free_func) (str, strlen (str) + 1);
+
+      mpf_reldiff (rerr, x, y);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+       {
+         mpf_set (max_rerr, rerr);
+#if VERBOSE
+         mpf_dump (max_rerr);
+#endif
+         if (mpf_cmp (rerr, limit_rerr) > 0)
+           {
+             printf ("ERROR after %d tests\n", i);
+             printf ("base = %d\n", base);
+             printf ("   x = "); mpf_dump (x);
+             printf ("   y = "); mpf_dump (y);
+             abort ();
+           }
+       }
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (x);
+  mpf_clear (y);
+  mpf_clear (d);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-div.c b/tests/mpf/t-div.c

new file mode 100644 (file)

index 0000000..a88f1cc
--- /dev/null
+++ b/tests/mpf/t-div.c
@@ -0,0 +1,186 @@
+/* Test mpf_div.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (const char *desc, mpf_ptr got, mpf_srcptr u, mpf_srcptr v)
+{
+  if (! refmpf_validate_division ("mpf_div", got, u, v))
+    {
+      mp_trace_base = -16;
+      mpf_trace ("  u", u);
+      mpf_trace ("  v", v);
+      printf    ("  %s\n", desc);
+      abort ();
+    }
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  prec;
+  mpf_t  got, u, v;
+  int    i;
+
+  mpf_init (got);
+  mpf_init (u);
+  mpf_init (v);
+
+  /* separate */
+  for (i = 0; i < 100; i++)
+    {
+      /* got precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* u */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (u, prec);
+      do {
+        mpf_random2 (u, PREC(u), (mp_exp_t) 20);
+      } while (SIZ(u) == 0);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (u, u);
+
+      /* v */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (v, prec);
+      do {
+        mpf_random2 (v, PREC(v), (mp_exp_t) 20);
+      } while (SIZ(v) == 0);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (v, v);
+
+      switch (i % 3) {
+      case 0:
+        mpf_div (got, u, v);
+        check_one ("separate", got, u, v);
+        break;
+      case 1:
+        prec = refmpf_set_overlap (got, u);
+        mpf_div (got, got, v);
+        check_one ("dst == u", got, u, v);
+        mpf_set_prec_raw (got, prec);
+        break;
+      case 2:
+        prec = refmpf_set_overlap (got, v);
+        mpf_div (got, u, got);
+        check_one ("dst == v", got, u, v);
+        mpf_set_prec_raw (got, prec);
+        break;
+      }
+    }
+
+  mpf_clear (got);
+  mpf_clear (u);
+  mpf_clear (v);
+}
+
+/* Exercise calls mpf(x,x,x) */
+void
+check_reuse_three (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  result_prec, input_prec, set_prec;
+  mpf_t  got;
+  int    i;
+
+  mpf_init (got);
+
+  for (i = 0; i < 8; i++)
+    {
+      result_prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      input_prec = min_prec + gmp_urandomm_ui (rands, 15L);
+
+      set_prec = MAX (result_prec, input_prec);
+      refmpf_set_prec_limbs (got, set_prec);
+
+      /* input, non-zero, possibly negative */
+      PREC(got) = input_prec;
+      do {
+        mpf_random2 (got, input_prec, (mp_exp_t) 20);
+      } while (SIZ(got) == 0);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (got, got);
+
+      PREC(got) = result_prec;
+
+      mpf_div (got, got, got);
+
+      /* expect exactly 1.0 always */
+      ASSERT_ALWAYS (mpf_cmp_ui (got, 1L) == 0);
+
+      PREC(got) = set_prec;
+    }
+
+  mpf_clear (got);
+}
+
+void
+check_various (void)
+{
+  mpf_t got, u, v;
+
+  mpf_init (got);
+  mpf_init (u);
+  mpf_init (v);
+
+  /* 100/4 == 25 */
+  mpf_set_prec (got, 20L);
+  mpf_set_ui (u, 100L);
+  mpf_set_ui (v, 4L);
+  mpf_div (got, u, v);
+  MPF_CHECK_FORMAT (got);
+  ASSERT_ALWAYS (mpf_cmp_ui (got, 25L) == 0);
+
+  /* 1/(2^n+1), a case where truncating the divisor would be wrong */
+  mpf_set_prec (got, 500L);
+  mpf_set_prec (v, 900L);
+  mpf_set_ui (v, 1L);
+  mpf_mul_2exp (v, v, 800L);
+  mpf_add_ui (v, v, 1L);
+  mpf_div (got, u, v);
+  check_one ("1/2^n+1, separate", got, u, v);
+
+  mpf_clear (got);
+  mpf_clear (u);
+  mpf_clear (v);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+  check_reuse_three ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-dm2exp.c b/tests/mpf/t-dm2exp.c

new file mode 100644 (file)

index 0000000..da43e28
--- /dev/null
+++ b/tests/mpf/t-dm2exp.c
@@ -0,0 +1,119 @@
+/* Test mpf_div, mpf_div_2exp, mpf_mul_2exp.
+
+Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+int
+main (int argc, char **argv)
+{
+  int reps = 100000;
+  int i;
+  mpf_t u, v, w1, w2, w3;
+  mp_size_t bprec = 100;
+  mpf_t rerr, limit_rerr;
+  mp_size_t un;
+  mp_exp_t ue;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init (rerr);
+  mpf_init (limit_rerr);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w1);
+  mpf_init (w2);
+  mpf_init (w3);
+
+  for (i = 0; i < reps; i++)
+    {
+      unsigned long int res_prec;
+      unsigned long int pow2;
+
+      res_prec = urandom () % (bprec + 100);
+      mpf_set_prec (w1, res_prec);
+      mpf_set_prec (w2, res_prec);
+      mpf_set_prec (w3, res_prec);
+
+      mpf_set_ui (limit_rerr, 1);
+      mpf_div_2exp (limit_rerr, limit_rerr, res_prec);
+
+      pow2 = urandom () % 0x10000;
+      mpf_set_ui (v, 1);
+      mpf_mul_2exp (v, v, pow2);
+
+      un = urandom () % (2 * SIZE) - SIZE;
+      ue = urandom () % SIZE;
+      mpf_random2 (u, un, ue);
+
+      mpf_div_2exp (w1, u, pow2);
+      mpf_div (w2, u, v);
+      mpf_reldiff (rerr, w1, w2);
+      if (mpf_cmp (rerr, limit_rerr) > 0)
+       {
+         printf ("ERROR in mpf_div or mpf_div_2exp after %d tests\n", i);
+         printf ("   u = "); mpf_dump (u);
+         printf ("   v = "); mpf_dump (v);
+         printf ("  w1 = "); mpf_dump (w1);
+         printf ("  w2 = "); mpf_dump (w2);
+         abort ();
+       }
+      mpf_mul_2exp (w3, w1, pow2);
+      mpf_reldiff (rerr, u, w3);
+      if (mpf_cmp (rerr, limit_rerr) > 0)
+       {
+         printf ("ERROR in mpf_mul_2exp after %d tests\n", i);
+         printf ("   u = "); mpf_dump (u);
+         printf ("   v = "); mpf_dump (v);
+         printf ("  w1 = "); mpf_dump (w1);
+         printf ("  w3 = "); mpf_dump (w3);
+         abort ();
+       }
+    }
+
+  mpf_clear (rerr);
+  mpf_clear (limit_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w1);
+  mpf_clear (w2);
+  mpf_clear (w3);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-eq.c b/tests/mpf/t-eq.c

new file mode 100644 (file)

index 0000000..73fde44
--- /dev/null
+++ b/tests/mpf/t-eq.c
@@ -0,0 +1,141 @@
+/* Test mpf_eq.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SZ (2 * sizeof(mp_limb_t))
+
+void insert_random_low_zero_limbs (mpf_t, gmp_randstate_ptr);
+void dump_abort (mpf_t, mpf_t, int, int, int, int, int, long);
+void hexdump (mpf_t);
+
+int
+main (int argc, char **argv)
+{
+  unsigned long test, reps = 10000;
+  mpf_t a, b, x;
+  gmp_randstate_ptr rands;
+  mpz_t ds;
+  int hibits, lshift1, lshift2;
+  int xtra;
+
+#define HIBITS 10
+#define LSHIFT1 10
+#define LSHIFT2 10
+
+  if (argc > 1)
+    reps = strtol (argv[1], 0, 0);
+
+  tests_start ();
+
+  rands = RANDS;
+
+  mpf_set_default_prec ((1 << HIBITS) + (1 << LSHIFT1) + (1 << LSHIFT2));
+
+  mpz_init (ds);
+  mpf_inits (a, b, x, NULL);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_urandomb (ds, rands, HIBITS);
+      hibits = mpz_get_ui (ds) + 1;
+      mpz_urandomb (ds, rands, hibits);
+      mpz_setbit (ds, hibits  - 1);    /* make sure msb is set */
+      mpf_set_z (a, ds);
+      mpf_set_z (b, ds);
+
+      mpz_urandomb (ds, rands, LSHIFT1);
+      lshift1 = mpz_get_ui (ds);
+      mpf_mul_2exp (a, a, lshift1 + 1);
+      mpf_mul_2exp (b, b, lshift1 + 1);
+      mpf_add_ui (a, a, 1);    /* make a one-bit difference */
+
+      mpz_urandomb (ds, rands, LSHIFT2);
+      lshift2 = mpz_get_ui (ds);
+      mpf_mul_2exp (a, a, lshift2);
+      mpf_mul_2exp (b, b, lshift2);
+      mpz_urandomb (ds, rands, lshift2);
+      mpf_set_z (x, ds);
+      mpf_add (a, a, x);
+      mpf_add (b, b, x);
+
+      insert_random_low_zero_limbs (a, rands);
+      insert_random_low_zero_limbs (b, rands);
+
+      if (mpf_eq (a, b, lshift1 + hibits) == 0)
+       {
+         dump_abort (a, b, lshift1 + hibits, lshift1, lshift2, hibits, 1, test);
+       }
+      for (xtra = 1; xtra < 100; xtra++)
+       if (mpf_eq (a, b, lshift1 + hibits + xtra) != 0)
+         {
+           dump_abort (a, b, lshift1 + hibits + xtra, lshift1, lshift2, hibits, 0, test);
+         }
+    }
+
+  mpf_clears (a, b, x, NULL);
+  mpz_clear (ds);
+  tests_end ();
+  exit (0);
+}
+
+void
+insert_random_low_zero_limbs (mpf_t x, gmp_randstate_ptr rands)
+{
+  mp_size_t max = PREC(x) - SIZ(x);
+  mp_size_t s;
+  mpz_t ds; mpz_init (ds);
+  mpz_urandomb (ds, rands, 32);
+  s = mpz_get_ui (ds) % (max + 1);
+  MPN_COPY_DECR (PTR(x) + s, PTR(x), SIZ(x));
+  MPN_ZERO (PTR(x), s);
+  SIZ(x) += s;
+  mpz_clear (ds);
+}
+
+void
+dump_abort (mpf_t a, mpf_t b, int cmp_prec, int lshift1, int lshift2, int hibits, int want, long test)
+{
+  printf ("ERROR in test %ld\n", test);
+  printf ("want %d got %d from mpf_eq\n", want, 1-want);
+  printf ("cmp_prec = %d\n", cmp_prec);
+  printf ("lshift1 = %d\n", lshift1);
+  printf ("lshift2 = %d\n", lshift2);
+  printf ("hibits = %d\n", hibits);
+  hexdump (a); puts ("");
+  hexdump (b); puts ("");
+  abort ();
+}
+
+void
+hexdump (mpf_t x)
+{
+  mp_size_t i;
+  for (i = ABSIZ(x) - 1; i >= 0; i--)
+    {
+      gmp_printf ("%0*MX", SZ, PTR(x)[i]);
+      if (i != 0)
+       printf (" ");
+    }
+}
diff --git a/tests/mpf/t-fits.c b/tests/mpf/t-fits.c

new file mode 100644 (file)

index 0000000..5318e47
--- /dev/null
+++ b/tests/mpf/t-fits.c
@@ -0,0 +1,324 @@
+/* Test mpf_fits_*_p
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Nothing sophisticated here, just exercise mpf_fits_*_p on a small amount
+   of data. */
+
+#define EXPECT_S(fun,name,answer)                                        \
+  got = fun (f);                                                         \
+  if (got != answer)                                                     \
+    {                                                                    \
+      printf ("%s (%s) got %d want %d\n", name, expr, got, answer);      \
+      printf (" f size %d exp %ld\n", SIZ(f), EXP(f));                   \
+      printf (" f dec "); mpf_out_str (stdout, 10, 0, f); printf ("\n"); \
+      printf (" f hex "); mpf_out_str (stdout, 16, 0, f); printf ("\n"); \
+      error = 1;                                                         \
+    }
+
+#if HAVE_STRINGIZE
+#define EXPECT(fun,answer)  EXPECT_S(fun,#fun,answer)
+#else
+#define EXPECT(fun,answer)  EXPECT_S(fun,"fun",answer)
+#endif
+
+int
+main (void)
+{
+  mpf_t       f, f0p5;
+  int         got;
+  const char  *expr;
+  int         error = 0;
+
+  tests_start ();
+  mpf_init2 (f, 200L);
+  mpf_init2 (f0p5, 200L);
+
+  /* 0.5 */
+  mpf_set_ui (f0p5, 1L);
+  mpf_div_2exp (f0p5, f0p5, 1L);
+
+  mpf_set_ui (f, 0L);
+  expr = "0";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_ui (f, 1L);
+  expr = "1";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, -1L);
+  expr = "-1";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+
+  mpf_set_ui (f, (unsigned long) USHRT_MAX);
+  expr = "USHRT_MAX";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+
+  mpf_set_ui (f, (unsigned long) USHRT_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "USHRT_MAX + 0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+
+  mpf_set_ui (f, (unsigned long) USHRT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "USHRT_MAX + 1";
+  EXPECT (mpf_fits_ushort_p, 0);
+
+
+  mpf_set_ui (f, (unsigned long) UINT_MAX);
+  expr = "UINT_MAX";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+
+  mpf_set_ui (f, (unsigned long) UINT_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "UINT_MAX + 0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+
+  mpf_set_ui (f, (unsigned long) UINT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "UINT_MAX + 1";
+  EXPECT (mpf_fits_uint_p, 0);
+
+
+  mpf_set_ui (f, ULONG_MAX);
+  expr = "ULONG_MAX";
+  EXPECT (mpf_fits_ulong_p, 1);
+
+  mpf_set_ui (f, ULONG_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "ULONG_MAX + 0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+
+  mpf_set_ui (f, ULONG_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "ULONG_MAX + 1";
+  EXPECT (mpf_fits_ulong_p, 0);
+
+
+  mpf_set_si (f, (long) SHRT_MAX);
+  expr = "SHRT_MAX";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MAX);
+  expr = "SHRT_MAX + 0.5";
+  mpf_add (f, f, f0p5);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "SHRT_MAX + 1";
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  mpf_set_si (f, (long) INT_MAX);
+  expr = "INT_MAX";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "INT_MAX + 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "INT_MAX + 1";
+  EXPECT (mpf_fits_sint_p, 0);
+
+
+  mpf_set_si (f, LONG_MAX);
+  expr = "LONG_MAX";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MAX);
+  mpf_add (f, f, f0p5);
+  expr = "LONG_MAX + 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MAX);
+  mpf_add_ui (f, f, 1L);
+  expr = "LONG_MAX + 1";
+  EXPECT (mpf_fits_slong_p, 0);
+
+
+  mpf_set_si (f, (long) SHRT_MIN);
+  expr = "SHRT_MIN";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MIN);
+  mpf_sub (f, f, f0p5);
+  expr = "SHRT_MIN - 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_si (f, (long) SHRT_MIN);
+  mpf_sub_ui (f, f, 1L);
+  expr = "SHRT_MIN + 1";
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  mpf_set_si (f, (long) INT_MIN);
+  expr = "INT_MIN";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MIN);
+  mpf_sub (f, f, f0p5);
+  expr = "INT_MIN - 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+
+  mpf_set_si (f, (long) INT_MIN);
+  mpf_sub_ui (f, f, 1L);
+  expr = "INT_MIN + 1";
+  EXPECT (mpf_fits_sint_p, 0);
+
+
+  mpf_set_si (f, LONG_MIN);
+  expr = "LONG_MIN";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MIN);
+  mpf_sub (f, f, f0p5);
+  expr = "LONG_MIN - 0.5";
+  EXPECT (mpf_fits_slong_p, 1);
+
+  mpf_set_si (f, LONG_MIN);
+  mpf_sub_ui (f, f, 1L);
+  expr = "LONG_MIN + 1";
+  EXPECT (mpf_fits_slong_p, 0);
+
+
+  mpf_set_str_or_abort (f, "0.5", 10);
+  expr = "0.5";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_str_or_abort (f, "-0.5", 10);
+  expr = "-0.5";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+
+  mpf_set_str_or_abort (f, "1.000000000000000000000000000000000001", 16);
+  expr = "1.000000000000000000000000000000000001 base 16";
+  EXPECT (mpf_fits_ulong_p, 1);
+  EXPECT (mpf_fits_uint_p, 1);
+  EXPECT (mpf_fits_ushort_p, 1);
+  EXPECT (mpf_fits_slong_p, 1);
+  EXPECT (mpf_fits_sint_p, 1);
+  EXPECT (mpf_fits_sshort_p, 1);
+
+  mpf_set_str_or_abort (f, "1@1000", 16);
+  expr = "1@1000 base 16";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, BITS_PER_ULONG + 1);
+  mpf_sub_ui (f, f, 1L);
+  expr = "2^(BITS_PER_ULONG+1) - 1";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, BITS_PER_ULONG + 1);
+  mpf_sub_ui (f, f, 1L);
+  mpf_neg (f, f);
+  expr = "- (2^(BITS_PER_ULONG+1) - 1)";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, BITS_PER_ULONG + 5);
+  mpf_sub_ui (f, f, 1L);
+  expr = "2^(BITS_PER_ULONG+5) - 1";
+  EXPECT (mpf_fits_ulong_p, 0);
+  EXPECT (mpf_fits_uint_p, 0);
+  EXPECT (mpf_fits_ushort_p, 0);
+  EXPECT (mpf_fits_slong_p, 0);
+  EXPECT (mpf_fits_sint_p, 0);
+  EXPECT (mpf_fits_sshort_p, 0);
+
+
+  if (error)
+    abort ();
+
+  mpf_clear (f);
+  mpf_clear (f0p5);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-get_d.c b/tests/mpf/t-get_d.c

new file mode 100644 (file)

index 0000000..8f18f44
--- /dev/null
+++ b/tests/mpf/t-get_d.c
@@ -0,0 +1,106 @@
+/* Test mpf_get_d and mpf_set_d.
+
+   Copyright 1996, 1999, 2000, 2001, 2009 Free Software Foundation, Inc.
+
+   This file is part of the GNU MP Library.
+
+   The GNU MP Library is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or (at your
+   option) any later version.
+
+   The GNU MP Library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+   License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "tests.h"
+
+#if defined (__vax__)
+#define LOW_BOUND 1e-38
+#define HIGH_BOUND 8e37
+#endif
+
+#if defined (_CRAY) && ! defined (_CRAYIEEE)
+/* The range varies mysteriously between Cray version.  On an SV1,
+   the range seem to be 1e-600..1e603, but a cfp (non-ieee) T90
+   has a much smaller range of 1e-240..1e240.  */
+#define LOW_BOUND 1e-240
+#define HIGH_BOUND 1e240
+#endif
+
+#if ! defined (LOW_BOUND)
+#define LOW_BOUND 1e-300
+#define HIGH_BOUND 1e300
+#endif
+
+void
+test_denorms (int prc)
+{
+#ifdef _GMP_IEEE_FLOATS
+  double d1, d2;
+  mpf_t f;
+  int i;
+
+  mpf_set_default_prec (prc);
+
+  mpf_init (f);
+
+  d1 = 1.9;
+  for (i = 0; i < 820; i++)
+    {
+      mpf_set_d (f, d1);
+      d2 = mpf_get_d (f);
+      if (d1 != d2)
+        abort ();
+      d1 *= 0.4;
+    }
+
+  mpf_clear (f);
+#endif
+}
+
+int
+main (int argc, char **argv)
+{
+  double d, e, r;
+  mpf_t u, v;
+
+  tests_start ();
+  mpf_init (u);
+  mpf_init (v);
+
+  mpf_set_d (u, LOW_BOUND);
+  for (d = 2.0 * LOW_BOUND; d < HIGH_BOUND; d *= 1.01)
+    {
+      mpf_set_d (v, d);
+      if (mpf_cmp (u, v) >= 0)
+       abort ();
+      e = mpf_get_d (v);
+      r = e/d;
+      if (r < 0.99999999999999 || r > 1.00000000000001)
+       {
+         fprintf (stderr, "should be one ulp from 1: %.16f\n", r);
+         abort ();
+       }
+      mpf_set (u, v);
+    }
+
+  mpf_clear (u);
+  mpf_clear (v);
+
+  test_denorms (10);
+  test_denorms (32);
+  test_denorms (64);
+  test_denorms (100);
+  test_denorms (200);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-get_d_2exp.c b/tests/mpf/t-get_d_2exp.c

new file mode 100644 (file)

index 0000000..91ab97e
--- /dev/null
+++ b/tests/mpf/t-get_d_2exp.c
@@ -0,0 +1,121 @@
+/* Test mpf_get_d_2exp.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+static void
+check_onebit (void)
+{
+  static const long data[] = {
+    -513, -512, -511, -65, -64, -63, -32, -1,
+    0, 1, 32, 53, 54, 64, 128, 256, 511, 512, 513
+  };
+  mpf_t   f;
+  double  got, want;
+  long    got_exp, want_exp;
+  int     i;
+
+  mpf_init2 (f, 1024L);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_ui (f, 1L);
+      if (data[i] >= 0)
+        mpf_mul_2exp (f, f, data[i]);
+      else
+        mpf_div_2exp (f, f, -data[i]);
+      want = 0.5;
+      want_exp = data[i] + 1;
+
+      got = mpf_get_d_2exp (&got_exp, f);
+      if (got != want || got_exp != want_exp)
+        {
+          printf    ("mpf_get_d_2exp wrong on 2**%ld\n", data[i]);
+          mpf_trace ("   f    ", f);
+          d_trace   ("   want ", want);
+          d_trace   ("   got  ", got);
+          printf    ("   want exp %ld\n", want_exp);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+    }
+  mpf_clear (f);
+}
+
+/* Check that hardware rounding doesn't make mpf_get_d_2exp return a value
+   outside its defined range. */
+static void
+check_round (void)
+{
+  static const unsigned long data[] = { 1, 32, 53, 54, 64, 128, 256, 512 };
+  mpf_t   f;
+  double  got;
+  long    got_exp;
+  int     i, rnd_mode, old_rnd_mode;
+
+  mpf_init2 (f, 1024L);
+  old_rnd_mode = tests_hardware_getround ();
+
+  for (rnd_mode = 0; rnd_mode < 4; rnd_mode++)
+    {
+      tests_hardware_setround (rnd_mode);
+
+      for (i = 0; i < numberof (data); i++)
+        {
+          mpf_set_ui (f, 1L);
+          mpf_mul_2exp (f, f, data[i]);
+          mpf_sub_ui (f, f, 1L);
+
+          got = mpf_get_d_2exp (&got_exp, f);
+          if (got < 0.5 || got >= 1.0)
+            {
+              printf    ("mpf_get_d_2exp bad on 2**%lu-1\n", data[i]);
+              printf    ("result out of range, expect 0.5 <= got < 1.0\n");
+              printf    ("   rnd_mode = %d\n", rnd_mode);
+              printf    ("   data[i]  = %lu\n", data[i]);
+              mpf_trace ("   f    ", f);
+              d_trace   ("   got  ", got);
+              printf    ("   got exp  %ld\n", got_exp);
+              abort();
+            }
+        }
+    }
+
+  mpf_clear (f);
+  tests_hardware_setround (old_rnd_mode);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_onebit ();
+  check_round ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-get_si.c b/tests/mpf/t-get_si.c

new file mode 100644 (file)

index 0000000..c508041
--- /dev/null
+++ b/tests/mpf/t-get_si.c
@@ -0,0 +1,223 @@
+/* Exercise mpz_get_si.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    int         base;
+    const char  *f;
+    long        want;
+  } data[] = {
+    { 10, "0",      0L },
+    { 10, "1",      1L },
+    { 10, "-1",     -1L },
+    { 10, "2",      2L },
+    { 10, "-2",     -2L },
+    { 10, "12345",  12345L },
+    { 10, "-12345", -12345L },
+
+    /* fraction bits ignored */
+    { 10, "0.5",    0L },
+    { 10, "-0.5",   0L },
+    { 10, "1.1",    1L },
+    { 10, "-1.1",   -1L },
+    { 10, "1.9",    1L },
+    { 10, "-1.9",   -1L },
+    { 16, "1.000000000000000000000000000000000000000000000000001", 1L },
+    { 16, "-1.000000000000000000000000000000000000000000000000001", -1L },
+
+    /* low bits extracted (this is undocumented) */
+    { 16, "1000000000000000000000000000000000000000000000000001", 1L },
+    { 16, "-1000000000000000000000000000000000000000000000000001", -1L },
+  };
+
+  int    i;
+  mpf_t  f;
+  long   got;
+
+  mpf_init2 (f, 2000L);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_set_str_or_abort (f, data[i].f, data[i].base);
+
+      got = mpf_get_si (f);
+      if (got != data[i].want)
+       {
+         printf ("mpf_get_si wrong at data[%d]\n", i);
+         printf ("   f     \"%s\"\n", data[i].f);
+         printf ("     dec "); mpf_out_str (stdout, 10, 0, f); printf ("\n");
+         printf ("     hex "); mpf_out_str (stdout, 16, 0, f); printf ("\n");
+         printf ("     size %ld\n", (long) SIZ(f));
+         printf ("     exp  %ld\n", (long) EXP(f));
+         printf ("   got   %ld (0x%lX)\n", got, got);
+         printf ("   want  %ld (0x%lX)\n", data[i].want, data[i].want);
+         abort();
+       }
+    }
+  mpf_clear (f);
+}
+
+
+void
+check_max (void)
+{
+  mpf_t  f;
+  long   want;
+  long   got;
+
+  mpf_init2 (f, 200L);
+
+#define CHECK_MAX(name)                                         \
+  if (got != want)                                              \
+    {                                                           \
+      printf ("mpf_get_si wrong on %s\n", name);                \
+      printf ("   f    ");                                      \
+      mpf_out_str (stdout, 10, 0, f); printf (", hex ");        \
+      mpf_out_str (stdout, 16, 0, f); printf ("\n");            \
+      printf ("   got  %ld, hex %lX\n", got, got);              \
+      printf ("   want %ld, hex %lX\n", want, want);            \
+      abort();                                                  \
+    }
+
+  want = LONG_MAX;
+  mpf_set_si (f, want);
+  got = mpf_get_si (f);
+  CHECK_MAX ("LONG_MAX");
+
+  want = LONG_MIN;
+  mpf_set_si (f, want);
+  got = mpf_get_si (f);
+  CHECK_MAX ("LONG_MIN");
+
+  mpf_clear (f);
+}
+
+
+void
+check_limbdata (void)
+{
+#define M  GMP_NUMB_MAX
+
+  static const struct {
+    mp_exp_t       exp;
+    mp_size_t      size;
+    mp_limb_t      d[10];
+    unsigned long  want;
+
+  } data[] = {
+
+    /* in the comments here, a "_" indicates a digit (ie. limb) position not
+       included in the d data, and therefore zero */
+
+    { 0, 0, { 0 }, 0L },    /* 0 */
+
+    { 1,  1, { 1 }, 1L },   /* 1 */
+    { 1, -1, { 1 }, -1L },  /* -1 */
+
+    { 0,  1, { 1 }, 0L },   /* .1 */
+    { 0, -1, { 1 }, 0L },   /* -.1 */
+
+    { -1,  1, { 1 }, 0L },  /* ._1 */
+    { -1, -1, { 1 }, 0L },  /* -._1 */
+
+    { -999,          1, { 1 }, 0L },   /* .___1 small */
+    { MP_EXP_T_MIN,  1, { 1 }, 0L },   /* .____1 very small */
+
+    { 999,          1, { 1 }, 0L },    /* 1____. big */
+    { MP_EXP_T_MAX, 1, { 1 }, 0L },    /* 1_____. very big */
+
+    { 1, 2, { 999, 2 }, 2L },                  /* 2.9 */
+    { 5, 8, { 7, 8, 9, 3, 0, 0, 0, 1 }, 3L },  /* 10003.987 */
+
+    { 2, 2, { M, M },    LONG_MAX }, /* FF. */
+    { 2, 2, { M, M, M }, LONG_MAX }, /* FF.F */
+    { 3, 3, { M, M, M }, LONG_MAX }, /* FFF. */
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    /* normal case, numb bigger than long */
+    { 2,  1, { 1 },    0L },      /* 1_. */
+    { 2,  2, { 0, 1 }, 0L },      /* 10. */
+    { 2,  2, { 999, 1 }, 999L },  /* 19. */
+    { 3,  2, { 999, 1 }, 0L },    /* 19_. */
+
+#else
+    /* nails case, numb smaller than long */
+    { 2,  1, { 1 }, 1L << GMP_NUMB_BITS },  /* 1_. */
+    { 3,  1, { 1 }, 0L },                   /* 1__. */
+
+    { 2,  2, { 99, 1 },    99L + (1L << GMP_NUMB_BITS) },  /* 19. */
+    { 3,  2, { 1, 99 },    1L << GMP_NUMB_BITS },          /* 91_. */
+    { 3,  3, { 0, 1, 99 }, 1L << GMP_NUMB_BITS },          /* 910. */
+
+#endif
+  };
+
+  mpf_t          f;
+  unsigned long  got;
+  int            i;
+  mp_limb_t      buf[20 + numberof(data[i].d)];
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_fill (buf, 10, CNST_LIMB(0xDEADBEEF));
+      refmpn_copy (buf+10, data[i].d, ABS(data[i].size));
+      refmpn_fill (buf+10+ABS(data[i].size), 10, CNST_LIMB(0xDEADBEEF));
+
+      PTR(f) = buf+10;
+      EXP(f) = data[i].exp;
+      SIZ(f) = data[i].size;
+      PREC(f) = numberof (data[i].d);
+      MPF_CHECK_FORMAT (f);
+
+      got = mpf_get_si (f);
+      if (got != data[i].want)
+       {
+         printf    ("mpf_get_si wrong at limb data[%d]\n", i);
+         mpf_trace ("  f", f);
+         mpn_trace ("  d", data[i].d, data[i].size);
+         printf    ("  size %ld\n", (long) data[i].size);
+         printf    ("  exp %ld\n", (long) data[i].exp);
+         printf    ("  got   %lu (0x%lX)\n", got, got);
+         printf    ("  want  %lu (0x%lX)\n", data[i].want, data[i].want);
+         abort();
+       }
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_max ();
+  check_limbdata ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-get_ui.c b/tests/mpf/t-get_ui.c

new file mode 100644 (file)

index 0000000..f579513
--- /dev/null
+++ b/tests/mpf/t-get_ui.c
@@ -0,0 +1,128 @@
+/* Exercise mpf_get_ui.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_limbdata (void)
+{
+#define M  GMP_NUMB_MAX
+
+  static const struct {
+    mp_exp_t       exp;
+    mp_size_t      size;
+    mp_limb_t      d[10];
+    unsigned long  want;
+
+  } data[] = {
+
+    /* in the comments here, a "_" indicates a digit (ie. limb) position not
+       included in the d data, and therefore zero */
+
+    { 0, 0, { 0 }, 0L },    /* 0 */
+
+    { 1,  1, { 1 }, 1L },   /* 1 */
+    { 1, -1, { 1 }, 1L },   /* -1 */
+
+    { 0,  1, { 1 }, 0L },   /* .1 */
+    { 0, -1, { 1 }, 0L },   /* -.1 */
+
+    { -1,  1, { 1 }, 0L },  /* ._1 */
+    { -1, -1, { 1 }, 0L },  /* -._1 */
+
+    { -999,          1, { 1 }, 0L },   /* .___1 small */
+    { MP_EXP_T_MIN,  1, { 1 }, 0L },   /* .____1 very small */
+
+    { 999,          1, { 1 }, 0L },    /* 1____. big */
+    { MP_EXP_T_MAX, 1, { 1 }, 0L },    /* 1_____. very big */
+
+    { 1, 2, { 999, 2 }, 2L },                  /* 2.9 */
+    { 5, 8, { 7, 8, 9, 3, 0, 0, 0, 1 }, 3L },  /* 10003.987 */
+
+    { 2, 2, { M, M },    ULONG_MAX }, /* FF. */
+    { 2, 2, { M, M, M }, ULONG_MAX }, /* FF.F */
+    { 3, 3, { M, M, M }, ULONG_MAX }, /* FFF. */
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    /* normal case, numb bigger than long */
+    { 2,  1, { 1 },    0L },      /* 1_. */
+    { 2,  2, { 0, 1 }, 0L },      /* 10. */
+    { 2,  2, { 999, 1 }, 999L },  /* 19. */
+    { 3,  2, { 999, 1 }, 0L },    /* 19_. */
+
+#else
+    /* nails case, numb smaller than long */
+    { 2,  1, { 1 }, 1L << GMP_NUMB_BITS },  /* 1_. */
+    { 3,  1, { 1 }, 0L },                   /* 1__. */
+
+    { 2,  2, { 99, 1 },    99L + (1L << GMP_NUMB_BITS) },  /* 19. */
+    { 3,  2, { 1, 99 },    1L << GMP_NUMB_BITS },          /* 91_. */
+    { 3,  3, { 0, 1, 99 }, 1L << GMP_NUMB_BITS },          /* 910. */
+
+#endif
+  };
+
+  mpf_t          f;
+  unsigned long  got;
+  int            i;
+  mp_limb_t      buf[20 + numberof(data[i].d)];
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_fill (buf, 10, CNST_LIMB(0xDEADBEEF));
+      refmpn_copy (buf+10, data[i].d, ABS(data[i].size));
+      refmpn_fill (buf+10+ABS(data[i].size), 10, CNST_LIMB(0xDEADBEEF));
+
+      PTR(f) = buf+10;
+      EXP(f) = data[i].exp;
+      SIZ(f) = data[i].size;
+      PREC(f) = numberof (data[i].d);
+      MPF_CHECK_FORMAT (f);
+
+      got = mpf_get_ui (f);
+      if (got != data[i].want)
+       {
+         printf    ("mpf_get_ui wrong at limb data[%d]\n", i);
+         mpf_trace ("  f", f);
+         mpn_trace ("  d", data[i].d, data[i].size);
+         printf    ("  size %ld\n", (long) data[i].size);
+         printf    ("  exp %ld\n", (long) data[i].exp);
+         printf    ("  got   %lu (0x%lX)\n", got, got);
+         printf    ("  want  %lu (0x%lX)\n", data[i].want, data[i].want);
+         abort();
+       }
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_limbdata ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-gsprec.c b/tests/mpf/t-gsprec.c

new file mode 100644 (file)

index 0000000..d0d7aba
--- /dev/null
+++ b/tests/mpf/t-gsprec.c
@@ -0,0 +1,62 @@
+/* Test mpf_get_prec and mpf_set_prec.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_consistency (void)
+{
+  mpf_t  x;
+  unsigned long  i, a, b;
+
+  mpf_init (x);
+
+  for (i = 1; i < 2000; i++)
+    {
+      mpf_set_prec (x, i);
+      a = mpf_get_prec (x);
+      mpf_set_prec (x, a);
+      b = mpf_get_prec (x);
+      if (a != b)
+        {
+          printf ("mpf_get_prec / mpf_set_prec inconsistent\n");
+          printf ("   set %lu gives %lu, but then set %lu gives %lu\n",
+                  i, a,
+                  a, b);
+          abort ();
+        }
+    }
+
+  mpf_clear (x);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_consistency ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-inp_str.c b/tests/mpf/t-inp_str.c

new file mode 100644 (file)

index 0000000..8a50816
--- /dev/null
+++ b/tests/mpf/t-inp_str.c
@@ -0,0 +1,191 @@
+/* Test mpf_inp_str.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>            /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define FILENAME  "t-inp_str.tmp"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *inp;
+    int         base;
+    const char  *want;
+    int         want_nread;
+
+  } data[] = {
+
+    { "0",   10, "0", 1 },
+
+    { "abc", 10, "0", 0 },
+    { "ghi", 16, "0", 0 },
+
+    { "125",    10, "125",  3 },
+    { "125e1",  10, "1250", 5 },
+    { "125e-1", 10, "12.5", 6 },
+
+    {  "ff", 16,  "255", 2 },
+    { "-ff", 16, "-255", 3 },
+    {  "FF", 16,  "255", 2 },
+    { "-FF", 16, "-255", 3 },
+
+    { "100",     16, "256",  3 },
+    { "100@1",   16, "4096", 5 },
+    { "100@10",  16, "4722366482869645213696", 6 },
+    { "100@10", -16, "281474976710656",        6 },
+    { "100@-1",  16, "16",   6 },
+    { "10000000000000000@-10",  16, "1", 21 },
+    { "10000000000@-10",       -16, "1", 15 },
+
+    { "z", 36, "35", 1 },
+    { "Z", 36, "35", 1 },
+    { "z@1", 36, "1260", 3 },
+    { "Z@1", 36, "1260", 3 },
+
+    {  "0",      0,   "0", 1 },
+  };
+
+  mpf_t  got, want;
+  long   ftell_nread;
+  int    i, pre, post, j, got_nread, want_nread;
+  FILE   *fp;
+
+  mpf_init (got);
+  mpf_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (pre = 0; pre <= 3; pre++)
+        {
+          for (post = 0; post <= 2; post++)
+            {
+              mpf_set_str_or_abort (want, data[i].want, 10);
+              MPF_CHECK_FORMAT (want);
+
+              /* create the file new each time to ensure its length is what
+                 we want */
+              fp = fopen (FILENAME, "w+");
+              ASSERT_ALWAYS (fp != NULL);
+              for (j = 0; j < pre; j++)
+                putc (' ', fp);
+              fputs (data[i].inp, fp);
+              for (j = 0; j < post; j++)
+                putc (' ', fp);
+              fflush (fp);
+              ASSERT_ALWAYS (! ferror(fp));
+
+              rewind (fp);
+              got_nread = mpf_inp_str (got, fp, data[i].base);
+
+              if (got_nread != 0)
+                {
+                  ftell_nread = ftell (fp);
+                  if (got_nread != ftell_nread)
+                    {
+                      printf ("mpf_inp_str nread wrong\n");
+                      printf ("  inp          \"%s\"\n", data[i].inp);
+                      printf ("  base         %d\n", data[i].base);
+                      printf ("  pre          %d\n", pre);
+                      printf ("  post         %d\n", post);
+                      printf ("  got_nread    %d\n", got_nread);
+                      printf ("  ftell_nread  %ld\n", ftell_nread);
+                      abort ();
+                    }
+                }
+
+              /* if data[i].inp is a whole string to read and there's no post
+                 whitespace then expect to have EOF */
+              if (post == 0 && data[i].want_nread == strlen(data[i].inp))
+                {
+                  int  c = getc(fp);
+                  if (c != EOF)
+                    {
+                      printf ("mpf_inp_str didn't read to EOF\n");
+                      printf ("  inp   \"%s\"\n", data[i].inp);
+                      printf ("  base  %d\n", data[i].base);
+                      printf ("  pre   %d\n", pre);
+                      printf ("  post  %d\n", post);
+                      printf ("  c     '%c' %#x\n", c, c);
+                      abort ();
+                    }
+                }
+
+              /* only expect "pre" included in the count when non-zero */
+              want_nread = data[i].want_nread;
+              if (want_nread != 0)
+                want_nread += pre;
+
+              if (got_nread != want_nread)
+                {
+                  printf ("mpf_inp_str nread wrong\n");
+                  printf ("  inp         \"%s\"\n", data[i].inp);
+                  printf ("  base        %d\n", data[i].base);
+                  printf ("  pre         %d\n", pre);
+                  printf ("  post        %d\n", post);
+                  printf ("  got_nread   %d\n", got_nread);
+                  printf ("  want_nread  %d\n", want_nread);
+                  abort ();
+                }
+
+              MPF_CHECK_FORMAT (got);
+
+              if (mpf_cmp (got, want) != 0)
+                {
+                  printf ("mpf_inp_str wrong result\n");
+                  printf ("  inp   \"%s\"\n", data[i].inp);
+                  printf ("  base  %d\n", data[i].base);
+                  mpf_trace ("  got ",  got);
+                  mpf_trace ("  want", want);
+                  abort ();
+                }
+
+              ASSERT_ALWAYS (fclose (fp) == 0);
+            }
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  unlink (FILENAME);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-int_p.c b/tests/mpf/t-int_p.c

new file mode 100644 (file)

index 0000000..11bc90f
--- /dev/null
+++ b/tests/mpf/t-int_p.c
@@ -0,0 +1,84 @@
+/* Test mpf_integer_p.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+one (mpf_srcptr f, int want)
+{
+  int  got;
+  got = mpf_integer_p (f);
+  if (got != want)
+    {
+      printf ("mpf_integer_p got %d want %d\n", got, want);
+      mpf_trace (" f", f);
+      abort ();
+    }
+}
+
+void
+all (mpf_ptr f, int want)
+{
+  one (f, want);
+  mpf_neg (f, f);
+  one (f, want);
+}
+
+int
+main (void)
+{
+  mpf_t  f;
+
+  tests_start ();
+  mpf_init2 (f, 200L);
+
+  mpf_set_ui (f, 0L);
+  one (f, 1);
+
+  mpf_set_ui (f, 1L);
+  all (f, 1);
+
+  mpf_set_ui (f, 1L);
+  mpf_div_2exp (f, f, 1L);
+  all (f, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_div_2exp (f, f, 5000L);
+  all (f, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_mul_2exp (f, f, 5000L);
+  all (f, 1);
+
+  mpf_set_str (f, "0.5", 10);
+  all (f, 0);
+
+  mpf_set_ui (f, 1L);
+  mpf_div_ui (f, f, 3L);
+  all (f, 0);
+
+  mpf_clear (f);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-mul_ui.c b/tests/mpf/t-mul_ui.c

new file mode 100644 (file)

index 0000000..a4fd75c
--- /dev/null
+++ b/tests/mpf/t-mul_ui.c
@@ -0,0 +1,165 @@
+/* Exercise mpf_mul_ui.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (const char *desc, mpf_ptr got, mpf_srcptr u, unsigned long v)
+{
+  mp_size_t  usize, usign;
+  mp_ptr     wp;
+  mpf_t      want;
+
+  MPF_CHECK_FORMAT (got);
+
+  /* this code not nailified yet */
+  ASSERT_ALWAYS (BITS_PER_ULONG <= GMP_NUMB_BITS);
+  usign = SIZ (u);
+  usize = ABS (usign);
+  wp = refmpn_malloc_limbs (usize + 1);
+  wp[usize] = mpn_mul_1 (wp, PTR(u), usize, (mp_limb_t) v);
+
+  PTR(want) = wp;
+  SIZ(want) = (usign >= 0 ? usize+1 : -(usize+1));
+  EXP(want) = EXP(u) + 1;
+  refmpf_normalize (want);
+
+  if (! refmpf_validate ("mpf_mul_ui", got, want))
+    {
+      mp_trace_base = -16;
+      printf    ("  %s\n", desc);
+      mpf_trace ("  u", u);
+      printf    ("  v %ld  0x%lX\n", v, v);
+      abort ();
+    }
+
+  free (wp);
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  mpf_t              got, u;
+  unsigned long      prec, v;
+  int                i;
+
+  /* The nails code in mpf_mul_ui currently isn't exact, so suppress these
+     tests for now.  */
+  if (BITS_PER_ULONG > GMP_NUMB_BITS)
+    return;
+
+  mpf_init (got);
+  mpf_init (u);
+
+  for (i = 0; i < 200; i++)
+    {
+      /* got precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* u precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (u, prec);
+
+      /* u, possibly negative */
+      mpf_random2 (u, PREC(u), (mp_exp_t) 20);
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (u, u);
+
+      /* v, 0 to BITS_PER_ULONG bits (inclusive) */
+      prec = gmp_urandomm_ui (rands, BITS_PER_ULONG+1);
+      v = gmp_urandomb_ui (rands, prec);
+
+      if ((i % 2) == 0)
+        {
+          /* separate */
+          mpf_mul_ui (got, u, v);
+          check_one ("separate", got, u, v);
+        }
+      else
+        {
+          /* overlap */
+          prec = refmpf_set_overlap (got, u);
+          mpf_mul_ui (got, got, v);
+          check_one ("overlap src==dst", got, u, v);
+
+          mpf_set_prec_raw (got, prec);
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (u);
+}
+
+void
+check_various (void)
+{
+  mpf_t  u, got, want;
+  char   *s;
+
+  mpf_init2 (u,    2*8*sizeof(long));
+  mpf_init2 (got,  2*8*sizeof(long));
+  mpf_init2 (want, 2*8*sizeof(long));
+
+  s = "0 * ULONG_MAX";
+  mpf_set_ui (u, 0L);
+  mpf_mul_ui (got, u, ULONG_MAX);
+  MPF_CHECK_FORMAT (got);
+  mpf_set_ui (want, 0L);
+  if (mpf_cmp (got, want) != 0)
+    {
+    error:
+      printf ("Wrong result from %s\n", s);
+      mpf_trace ("u   ", u);
+      mpf_trace ("got ", got);
+      mpf_trace ("want", want);
+      abort ();
+    }
+
+  s = "1 * ULONG_MAX";
+  mpf_set_ui (u, 1L);
+  mpf_mul_ui (got, u, ULONG_MAX);
+  MPF_CHECK_FORMAT (got);
+  mpf_set_ui (want, ULONG_MAX);
+  if (mpf_cmp (got, want) != 0)
+    goto error;
+
+  mpf_clear (u);
+  mpf_clear (got);
+  mpf_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-muldiv.c b/tests/mpf/t-muldiv.c

new file mode 100644 (file)

index 0000000..3ce1292
--- /dev/null
+++ b/tests/mpf/t-muldiv.c
@@ -0,0 +1,159 @@
+/* Test mpf_mul, mpf_div, mpf_ui_div, and mpf_div_ui.
+
+Copyright 1996, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 10000;
+  int i;
+  mpf_t u, v, w, x;
+  mp_size_t bprec = SIZE * GMP_LIMB_BITS;
+  mpf_t rerr, limit_rerr;
+  unsigned long ulimb, vlimb;
+  int single_flag;
+
+  tests_start ();
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init (rerr);
+  mpf_init (limit_rerr);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w);
+  mpf_init (x);
+
+  for (i = 0; i < reps; i++)
+    {
+      mp_size_t res_prec;
+
+      res_prec = urandom () % bprec + 1;
+      mpf_set_prec (w, res_prec);
+      mpf_set_prec (x, res_prec);
+
+      mpf_set_ui (limit_rerr, 1);
+      mpf_div_2exp (limit_rerr, limit_rerr, res_prec - 1);
+
+      single_flag = 0;
+
+      if ((urandom () & 1) != 0)
+       {
+         size = urandom () % (2 * SIZE) - SIZE;
+         exp = urandom () % SIZE;
+         mpf_random2 (u, size, exp);
+       }
+      else
+       {
+         ulimb = urandom ();
+         mpf_set_ui (u, ulimb);
+         single_flag = 1;
+       }
+
+      if ((urandom () & 1) != 0)
+       {
+         size = urandom () % (2 * SIZE) - SIZE;
+         exp = urandom () % SIZE;
+         mpf_random2 (v, size, exp);
+       }
+      else
+       {
+         vlimb = urandom ();
+         mpf_set_ui (v, vlimb);
+         single_flag = 2;
+       }
+
+      if (mpf_sgn (v) == 0)
+       continue;
+
+      mpf_div (w, u, v);
+      mpf_mul (x, w, v);
+      mpf_reldiff (rerr, u, x);
+      if (mpf_cmp (rerr, limit_rerr) > 0)
+       {
+         printf ("ERROR in mpf_mul or mpf_div after %d tests\n", i);
+         printf ("   u = "); mpf_dump (u);
+         printf ("   v = "); mpf_dump (v);
+         printf ("   x = "); mpf_dump (x);
+         printf ("   w = "); mpf_dump (w);
+         abort ();
+       }
+
+      if (single_flag == 2)
+       {
+         mpf_div_ui (x, u, vlimb);
+         mpf_reldiff (rerr, w, x);
+         if (mpf_cmp (rerr, limit_rerr) > 0)
+           {
+             printf ("ERROR in mpf_div or mpf_div_ui after %d tests\n", i);
+             printf ("   u = "); mpf_dump (u);
+             printf ("   v = "); mpf_dump (v);
+             printf ("   x = "); mpf_dump (x);
+             printf ("   w = "); mpf_dump (w);
+             abort ();
+           }
+       }
+
+      if (single_flag == 1)
+       {
+         mpf_ui_div (x, ulimb, v);
+         mpf_reldiff (rerr, w, x);
+         if (mpf_cmp (rerr, limit_rerr) > 0)
+           {
+             printf ("ERROR in mpf_div or mpf_ui_div after %d tests\n", i);
+             printf ("   u = "); mpf_dump (u);
+             printf ("   v = "); mpf_dump (v);
+             printf ("   x = "); mpf_dump (x);
+             printf ("   w = "); mpf_dump (w);
+             abort ();
+           }
+       }
+    }
+
+  mpf_clear (rerr);
+  mpf_clear (limit_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w);
+  mpf_clear (x);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-set.c b/tests/mpf/t-set.c

new file mode 100644 (file)

index 0000000..48336a8
--- /dev/null
+++ b/tests/mpf/t-set.c
@@ -0,0 +1,52 @@
+/* Test mpf_set.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_reuse (void)
+{
+  /* Try mpf_set(f,f) when f is bigger than prec.  In the past this had
+     resulted in an MPN_COPY with invalid operand overlap. */
+  mpf_t  f;
+  mp_size_t      limbs = 20;
+  unsigned long  bits = limbs * GMP_NUMB_BITS;
+  mpf_init2 (f, bits);
+  refmpf_fill (f, limbs, GMP_NUMB_MAX);
+  mpf_set_prec_raw (f, bits / 2);
+  mpf_set (f, f);
+  MPF_CHECK_FORMAT (f);
+  mpf_set_prec_raw (f, bits);
+  mpf_clear (f);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_reuse ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-set_q.c b/tests/mpf/t-set_q.c

new file mode 100644 (file)

index 0000000..9dfa04f
--- /dev/null
+++ b/tests/mpf/t-set_q.c
@@ -0,0 +1,127 @@
+/* Test mpf_set_q.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpf_ptr got, mpq_srcptr q)
+{
+  mpf_t  n, d;
+
+  mpf_set_q (got, q);
+
+  PTR(n) = PTR(&q->_mp_num);
+  SIZ(n) = SIZ(&q->_mp_num);
+  EXP(n) = ABSIZ(&q->_mp_num);
+
+  PTR(d) = PTR(&q->_mp_den);
+  SIZ(d) = SIZ(&q->_mp_den);
+  EXP(d) = ABSIZ(&q->_mp_den);
+
+  if (! refmpf_validate_division ("mpf_set_q", got, n, d))
+    {
+      mp_trace_base = -16;
+      mpq_trace ("   q", q);
+      abort ();
+    }
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  prec;
+  mpf_t  got;
+  mpq_t  q;
+  int    i;
+
+  mpf_init (got);
+  mpq_init (q);
+
+  for (i = 0; i < 400; i++)
+    {
+      /* result precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 20L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* num */
+      prec = gmp_urandomm_ui (rands, 20L * GMP_NUMB_BITS);
+      mpz_rrandomb (mpq_numref(q), rands, prec);
+
+      /* possibly negative num */
+      if (gmp_urandomb_ui (rands, 1L))
+        mpz_neg (mpq_numref(q), mpq_numref(q));
+
+      /* den, non-zero */
+      do {
+        prec = gmp_urandomm_ui (rands, 20L * GMP_NUMB_BITS);
+        mpz_rrandomb (mpq_denref(q), rands, prec);
+      } while (mpz_sgn (mpq_denref(q)) <= 0);
+
+      check_one (got, q);
+    }
+
+  mpf_clear (got);
+  mpq_clear (q);
+}
+
+void
+check_various (void)
+{
+  mpf_t got;
+  mpq_t q;
+
+  mpf_init (got);
+  mpq_init (q);
+
+  /* 1/1 == 1 */
+  mpf_set_prec (got, 20L);
+  mpq_set_ui (q, 1L, 1L);
+  mpf_set_q (got, q);
+  MPF_CHECK_FORMAT (got);
+  ASSERT_ALWAYS (mpf_cmp_ui (got, 1L) == 0);
+
+  /* 1/(2^n+1), a case where truncating the divisor would be wrong */
+  mpf_set_prec (got, 500L);
+  mpq_set_ui (q, 1L, 1L);
+  mpz_mul_2exp (mpq_denref(q), mpq_denref(q), 800L);
+  mpz_add_ui (mpq_denref(q), mpq_denref(q), 1L);
+  check_one (got, q);
+
+  mpf_clear (got);
+  mpq_clear (q);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-set_si.c b/tests/mpf/t-set_si.c

new file mode 100644 (file)

index 0000000..b9519ee
--- /dev/null
+++ b/tests/mpf/t-set_si.c
@@ -0,0 +1,91 @@
+/* Test mpf_set_si and mpf_init_set_si.
+
+Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  static const struct {
+    long       x;
+    mp_size_t  want_size;
+    mp_limb_t  want_data[2];
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, { 1 } },
+    { -1L, -1, { 1 } },
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    { LONG_MAX,  1, { LONG_MAX, 0 } },
+    { -LONG_MAX,  -1, { LONG_MAX, 0 } },
+    { LONG_HIGHBIT,  -1, { ULONG_HIGHBIT, 0 } },
+#else
+    { LONG_MAX,  2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS } },
+    { -LONG_MAX,  -2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS }},
+    { LONG_HIGHBIT,  -2, { 0, ULONG_HIGHBIT >> GMP_NUMB_BITS } },
+#endif
+  };
+
+  mpf_t  x;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_init (x);
+      mpf_set_si (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+
+      mpf_init_set_si (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_init_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-set_ui.c b/tests/mpf/t-set_ui.c

new file mode 100644 (file)

index 0000000..fef529b
--- /dev/null
+++ b/tests/mpf/t-set_ui.c
@@ -0,0 +1,90 @@
+/* Test mpf_set_ui and mpf_init_set_ui.
+
+Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  static const struct {
+    unsigned long  x;
+    mp_size_t      want_size;
+    mp_limb_t      want_data[2];
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, { 1 } },
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    { ULONG_MAX,     1, { ULONG_MAX, 0 } },
+    { ULONG_HIGHBIT, 1, { ULONG_HIGHBIT, 0 } },
+#else
+    { ULONG_MAX,     2, { ULONG_MAX & GMP_NUMB_MASK,
+                          ULONG_MAX >> GMP_NUMB_BITS } },
+    { LONG_HIGHBIT,  2, { 0,
+                          ULONG_HIGHBIT >> GMP_NUMB_BITS } },
+#endif
+  };
+
+  mpf_t  x;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpf_init (x);
+      mpf_set_ui (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_set_ui wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+
+      mpf_init_set_ui (x, data[i].x);
+      MPF_CHECK_FORMAT (x);
+      if (x->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (x->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0
+          || x->_mp_exp != ABS (data[i].want_size))
+        {
+          printf ("mpf_init_set_ui wrong on data[%d]\n", i);
+          abort();
+        }
+      mpf_clear (x);
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-sqrt.c b/tests/mpf/t-sqrt.c

new file mode 100644 (file)

index 0000000..2df7bb2
--- /dev/null
+++ b/tests/mpf/t-sqrt.c
@@ -0,0 +1,194 @@
+/* Test mpf_sqrt, mpf_mul.
+
+Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void
+check_rand1 (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 20000;
+  int i;
+  mpf_t x, y, y2;
+  mp_size_t bprec = 100;
+  mpf_t rerr, max_rerr, limit_rerr;
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (x);
+  mpf_init (y);
+  mpf_init (y2);
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (x, size, exp);
+
+      mpf_sqrt (y, x);
+      MPF_CHECK_FORMAT (y);
+      mpf_mul (y2, y, y);
+
+      mpf_reldiff (rerr, x, y2);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+       {
+         mpf_set (max_rerr, rerr);
+#if VERBOSE
+         mpf_dump (max_rerr);
+#endif
+         if (mpf_cmp (rerr, limit_rerr) > 0)
+           {
+             printf ("ERROR after %d tests\n", i);
+             printf ("   x = "); mpf_dump (x);
+             printf ("   y = "); mpf_dump (y);
+             printf ("  y2 = "); mpf_dump (y2);
+             printf ("   rerr       = "); mpf_dump (rerr);
+             printf ("   limit_rerr = "); mpf_dump (limit_rerr);
+              printf ("in hex:\n");
+              mp_trace_base = 16;
+             mpf_trace ("   x  ", x);
+             mpf_trace ("   y  ", y);
+             mpf_trace ("   y2 ", y2);
+             mpf_trace ("   rerr      ", rerr);
+             mpf_trace ("   limit_rerr", limit_rerr);
+             abort ();
+           }
+       }
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (x);
+  mpf_clear (y);
+  mpf_clear (y2);
+}
+
+void
+check_rand2 (void)
+{
+  unsigned long      max_prec = 20;
+  unsigned long      min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long      x_prec, r_prec;
+  mpf_t              x, r, s;
+  int                i;
+
+  mpf_init (x);
+  mpf_init (r);
+  mpf_init (s);
+  refmpf_set_prec_limbs (s, 2*max_prec+10);
+
+  for (i = 0; i < 500; i++)
+    {
+      /* input precision */
+      x_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;
+      refmpf_set_prec_limbs (x, x_prec);
+
+      /* result precision */
+      r_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;
+      refmpf_set_prec_limbs (r, r_prec);
+
+      mpf_random2 (x, x_prec, 1000);
+
+      mpf_sqrt (r, x);
+      MPF_CHECK_FORMAT (r);
+
+      /* Expect to prec limbs of result.
+         In the current implementation there's no stripping of low zero
+         limbs in mpf_sqrt, so size should be exactly prec.  */
+      if (SIZ(r) != r_prec)
+        {
+          printf ("mpf_sqrt wrong number of result limbs\n");
+          mpf_trace ("  x", x);
+          mpf_trace ("  r", r);
+          printf    ("  r_prec=%lu\n", r_prec);
+          printf    ("  SIZ(r)  %ld\n", (long) SIZ(r));
+          printf    ("  PREC(r) %ld\n", (long) PREC(r));
+          abort ();
+        }
+
+      /* Must have r^2 <= x, since r has been truncated. */
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp (s, x) <= 0))
+        {
+          printf    ("mpf_sqrt result too big\n");
+          mpf_trace ("  x", x);
+          printf    ("  r_prec=%lu\n", r_prec);
+          mpf_trace ("  r", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+
+      /* Must have (r+ulp)^2 > x, or else r is too small. */
+      refmpf_add_ulp (r);
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp (s, x) > 0))
+        {
+          printf    ("mpf_sqrt result too small\n");
+          mpf_trace ("  x", x);
+          printf    ("  r_prec=%lu\n", r_prec);
+          mpf_trace ("  r+ulp", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+    }
+
+  mpf_clear (x);
+  mpf_clear (r);
+  mpf_clear (s);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_rand1 (argc, argv);
+  check_rand2 ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-sqrt_ui.c b/tests/mpf/t-sqrt_ui.c

new file mode 100644 (file)

index 0000000..7ae572b
--- /dev/null
+++ b/tests/mpf/t-sqrt_ui.c
@@ -0,0 +1,113 @@
+/* Test mpf_sqrt_ui.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_rand (void)
+{
+  unsigned long      max_prec = 15;
+  unsigned long      min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long      x, prec;
+  mpf_t              r, s;
+  int                i;
+
+  mpf_init (r);
+  mpf_init (s);
+  refmpf_set_prec_limbs (s, 2*max_prec+10);
+
+  for (i = 0; i < 50; i++)
+    {
+      /* input, a random non-zero ulong, exponentially distributed */
+      do {
+        x = gmp_urandomb_ui (rands,
+                             gmp_urandomm_ui (rands, BITS_PER_ULONG) + 1);
+      } while (x == 0);
+
+      /* result precision */
+      prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec;
+      refmpf_set_prec_limbs (r, prec);
+
+      mpf_sqrt_ui (r, x);
+      MPF_CHECK_FORMAT (r);
+
+      /* Expect to prec limbs of result.
+         In the current implementation there's no stripping of low zero
+         limbs in mpf_sqrt_ui, not even on perfect squares, so size should
+         be exactly prec.  */
+      if (SIZ(r) != prec)
+        {
+          printf ("mpf_sqrt_ui result not enough result limbs\n");
+          printf    ("  x=%lu\n", x);
+          printf    ("  want prec=%lu\n", prec);
+          mpf_trace ("  r", r);
+          printf    ("  r size %ld\n", (long) SIZ(r));
+          printf    ("  r prec %ld\n", (long) PREC(r));
+          abort ();
+        }
+
+      /* Must have r^2 <= x, since r has been truncated. */
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp_ui (s, x) <= 0))
+        {
+          printf    ("mpf_sqrt_ui result too big\n");
+          printf    ("  x=%lu\n", x);
+          printf    ("  want prec=%lu\n", prec);
+          mpf_trace ("  r", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+
+      /* Must have (r+ulp)^2 > x.
+         No overflow from refmpf_add_ulp since r is only prec limbs. */
+      refmpf_add_ulp (r);
+      mpf_mul (s, r, r);
+      if (! (mpf_cmp_ui (s, x) > 0))
+        {
+          printf    ("mpf_sqrt_ui result too small\n");
+          printf    ("  x=%lu\n", x);
+          printf    ("  want prec=%lu\n", prec);
+          mpf_trace ("  r+ulp", r);
+          mpf_trace ("  s", s);
+          abort ();
+        }
+    }
+
+  mpf_clear (r);
+  mpf_clear (s);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-sub.c b/tests/mpf/t-sub.c

new file mode 100644 (file)

index 0000000..92d4f05
--- /dev/null
+++ b/tests/mpf/t-sub.c
@@ -0,0 +1,206 @@
+/* Test mpf_sub.
+
+Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void
+check_rand (int argc, char **argv)
+{
+  mp_size_t size;
+  mp_exp_t exp;
+  int reps = 20000;
+  int i;
+  mpf_t u, v, w, wref;
+  mp_size_t bprec = 100;
+  mpf_t rerr, max_rerr, limit_rerr;
+
+  if (argc > 1)
+    {
+      reps = strtol (argv[1], 0, 0);
+      if (argc > 2)
+       bprec = strtol (argv[2], 0, 0);
+    }
+
+  mpf_set_default_prec (bprec);
+
+  mpf_init_set_ui (limit_rerr, 1);
+  mpf_div_2exp (limit_rerr, limit_rerr, bprec);
+#if VERBOSE
+  mpf_dump (limit_rerr);
+#endif
+  mpf_init (rerr);
+  mpf_init_set_ui (max_rerr, 0);
+
+  mpf_init (u);
+  mpf_init (v);
+  mpf_init (w);
+  mpf_init (wref);
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (u, size, exp);
+
+      size = urandom () % (2 * SIZE) - SIZE;
+      exp = urandom () % SIZE;
+      mpf_random2 (v, size, exp);
+
+      if ((urandom () & 1) != 0)
+       mpf_add_ui (u, v, 1);
+      else if ((urandom () & 1) != 0)
+       mpf_sub_ui (u, v, 1);
+
+      mpf_sub (w, u, v);
+      refmpf_sub (wref, u, v);
+
+      mpf_reldiff (rerr, w, wref);
+      if (mpf_cmp (rerr, max_rerr) > 0)
+       {
+         mpf_set (max_rerr, rerr);
+#if VERBOSE
+         mpf_dump (max_rerr);
+#endif
+         if (mpf_cmp (rerr, limit_rerr) > 0)
+           {
+             printf ("ERROR after %d tests\n", i);
+             printf ("   u = "); mpf_dump (u);
+             printf ("   v = "); mpf_dump (v);
+             printf ("wref = "); mpf_dump (wref);
+             printf ("   w = "); mpf_dump (w);
+             abort ();
+           }
+       }
+    }
+
+  mpf_clear (limit_rerr);
+  mpf_clear (rerr);
+  mpf_clear (max_rerr);
+
+  mpf_clear (u);
+  mpf_clear (v);
+  mpf_clear (w);
+  mpf_clear (wref);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    struct {
+      int        exp, size;
+      mp_limb_t  d[10];
+    } x, y, want;
+
+  } data[] = {
+    { { 123, 2, { 8, 9 } },             { 123, 1, { 9 } }, { 122, 1, { 8 } } },
+
+    /* f - f == 0, various sizes.
+       These exercise a past problem (gmp 4.1.3 and earlier) where the
+       result exponent was not zeroed on a zero result like this.  */
+    { { 0, 0 }, { 0, 0 }, { 0, 0 } },
+    { { 99, 1, { 1 } },             { 99, 1, { 1 } },             { 0, 0 } },
+    { { 99, 2, { 123, 456 } },      { 99, 2, { 123, 456 } },      { 0, 0 } },
+    { { 99, 3, { 123, 456, 789 } }, { 99, 3, { 123, 456, 789 } }, { 0, 0 } },
+
+    /* High limbs cancel, leaving just the low limbs of the longer operand.
+       This exercises a past problem (gmp 4.1.3 and earlier) where high zero
+       limbs on the remainder were not stripped before truncating to the
+       destination, causing loss of precision.  */
+    { { 123, 2, { 8, 9 } },             { 123, 1, { 9 } }, { 122, 1, { 8 } } },
+    { { 123, 3, { 8, 0, 9 } },          { 123, 1, { 9 } }, { 121, 1, { 8 } } },
+    { { 123, 4, { 8, 0, 0, 9 } },       { 123, 1, { 9 } }, { 120, 1, { 8 } } },
+    { { 123, 5, { 8, 0, 0, 0, 9 } },    { 123, 1, { 9 } }, { 119, 1, { 8 } } },
+    { { 123, 6, { 8, 0, 0, 0, 0, 9 } }, { 123, 1, { 9 } }, { 118, 1, { 8 } } },
+
+  };
+
+  mpf_t  x, y, got, want;
+  int  i, swap;
+
+  mp_trace_base = 16;
+  mpf_init (got);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (swap = 0; swap <= 1; swap++)
+        {
+          PTR(x) = (mp_ptr) data[i].x.d;
+          SIZ(x) = data[i].x.size;
+          EXP(x) = data[i].x.exp;
+          PREC(x) = numberof (data[i].x.d);
+          MPF_CHECK_FORMAT (x);
+
+          PTR(y) = (mp_ptr) data[i].y.d;
+          SIZ(y) = data[i].y.size;
+          EXP(y) = data[i].y.exp;
+          PREC(y) = numberof (data[i].y.d);
+          MPF_CHECK_FORMAT (y);
+
+          PTR(want) = (mp_ptr) data[i].want.d;
+          SIZ(want) = data[i].want.size;
+          EXP(want) = data[i].want.exp;
+          PREC(want) = numberof (data[i].want.d);
+          MPF_CHECK_FORMAT (want);
+
+          if (swap)
+            {
+              mpf_swap (x, y);
+              SIZ(want) = - SIZ(want);
+            }
+
+          mpf_sub (got, x, y);
+/*           MPF_CHECK_FORMAT (got); */
+
+          if (mpf_cmp (got, want) != 0)
+            {
+              printf ("check_data() wrong reault at data[%d] (operands%s swapped)\n", i, swap ? "" : " not");
+              mpf_trace ("x   ", x);
+              mpf_trace ("y   ", y);
+              mpf_trace ("got ", got);
+              mpf_trace ("want", want);
+              abort ();
+            }
+        }
+    }
+
+  mpf_clear (got);
+}
+
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_data ();
+  check_rand (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-trunc.c b/tests/mpf/t-trunc.c

new file mode 100644 (file)

index 0000000..30e3703
--- /dev/null
+++ b/tests/mpf/t-trunc.c
@@ -0,0 +1,271 @@
+/* Test mpf_trunc, mpf_ceil, mpf_floor.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_print (mpf_srcptr src, mpf_srcptr got, mpf_srcptr want)
+{
+  mp_trace_base = 16;
+  mpf_trace ("src ", src);
+  mpf_trace ("got ", got);
+  mpf_trace ("want", want);
+
+  printf ("got  size=%d exp=%ld\n", SIZ(got), EXP(got));
+  mpn_trace ("     limbs=", PTR(got), (mp_size_t) ABSIZ(got));
+
+  printf ("want size=%d exp=%ld\n", SIZ(want), EXP(want));
+  mpn_trace ("     limbs=", PTR(want), (mp_size_t) ABSIZ(want));
+}
+
+void
+check_one (mpf_srcptr src, mpf_srcptr trunc, mpf_srcptr ceil, mpf_srcptr floor)
+{
+  mpf_t  got;
+
+  mpf_init2 (got, mpf_get_prec (trunc));
+  ASSERT_ALWAYS (PREC(got) == PREC(trunc));
+  ASSERT_ALWAYS (PREC(got) == PREC(ceil));
+  ASSERT_ALWAYS (PREC(got) == PREC(floor));
+
+#define CHECK_SEP(name, fun, want)              \
+  mpf_set_ui (got, 54321L); /* initial junk */  \
+  fun (got, src);                               \
+  MPF_CHECK_FORMAT (got);                       \
+  if (mpf_cmp (got, want) != 0)                 \
+    {                                           \
+       printf ("%s wrong\n", name);            \
+       check_print (src, got, want);           \
+       abort ();                               \
+    }
+
+  CHECK_SEP ("mpf_trunc", mpf_trunc, trunc);
+  CHECK_SEP ("mpf_ceil",  mpf_ceil,  ceil);
+  CHECK_SEP ("mpf_floor", mpf_floor, floor);
+
+#define CHECK_INPLACE(name, fun, want)  \
+  mpf_set (got, src);                   \
+  fun (got, got);                       \
+  MPF_CHECK_FORMAT (got);               \
+  if (mpf_cmp (got, want) != 0)         \
+    {                                   \
+       printf ("%s wrong\n", name);    \
+       check_print (src, got, want);   \
+       abort ();                       \
+    }
+
+  CHECK_INPLACE ("mpf_trunc", mpf_trunc, trunc);
+
+  /* Can't do these unconditionally in case truncation by mpf_set strips
+     some low non-zero limbs which would have rounded the result.  */
+  if (ABSIZ(src) <= PREC(trunc)+1)
+    {
+      CHECK_INPLACE ("mpf_ceil",  mpf_ceil,  ceil);
+      CHECK_INPLACE ("mpf_floor", mpf_floor, floor);
+    }
+
+  mpf_clear (got);
+}
+
+void
+check_all (mpf_ptr src, mpf_ptr trunc, mpf_ptr ceil, mpf_ptr floor)
+{
+  /* some of these values are generated with direct field assignments */
+  MPF_CHECK_FORMAT (src);
+  MPF_CHECK_FORMAT (trunc);
+  MPF_CHECK_FORMAT (ceil);
+  MPF_CHECK_FORMAT (floor);
+
+  check_one (src, trunc, ceil, floor);
+
+  mpf_neg (src,   src);
+  mpf_neg (trunc, trunc);
+  mpf_neg (ceil,  ceil);
+  mpf_neg (floor, floor);
+  check_one (src, trunc, floor, ceil);
+}
+
+void
+check_various (void)
+{
+  mpf_t  src, trunc, ceil, floor;
+  int    n, i;
+
+  mpf_init2 (src, 512L);
+  mpf_init2 (trunc, 256L);
+  mpf_init2 (ceil,  256L);
+  mpf_init2 (floor, 256L);
+
+  /* 0 */
+  mpf_set_ui (src, 0L);
+  mpf_set_ui (trunc, 0L);
+  mpf_set_ui (ceil, 0L);
+  mpf_set_ui (floor, 0L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 1 */
+  mpf_set_ui (src, 1L);
+  mpf_set_ui (trunc, 1L);
+  mpf_set_ui (ceil, 1L);
+  mpf_set_ui (floor, 1L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 2^1024 */
+  mpf_set_ui (src, 1L);
+  mpf_mul_2exp (src,   src,   1024L);
+  mpf_set (trunc, src);
+  mpf_set (ceil,  src);
+  mpf_set (floor, src);
+  check_all (src, trunc, ceil, floor);
+
+  /* 1/2^1024, fraction only */
+  mpf_set_ui (src, 1L);
+  mpf_div_2exp (src,  src, 1024L);
+  mpf_set_si (trunc, 0L);
+  mpf_set_si (ceil, 1L);
+  mpf_set_si (floor, 0L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 1/2 */
+  mpf_set_ui (src, 1L);
+  mpf_div_2exp (src,  src, 1L);
+  mpf_set_si (trunc, 0L);
+  mpf_set_si (ceil, 1L);
+  mpf_set_si (floor, 0L);
+  check_all (src, trunc, ceil, floor);
+
+  /* 123+1/2^64 */
+  mpf_set_ui (src, 1L);
+  mpf_div_2exp (src,  src, 64L);
+  mpf_add_ui (src,  src, 123L);
+  mpf_set_si (trunc, 123L);
+  mpf_set_si (ceil, 124L);
+  mpf_set_si (floor, 123L);
+  check_all (src, trunc, ceil, floor);
+
+  /* integer of full prec+1 limbs, unchanged */
+  n = PREC(trunc)+1;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+100;
+  mpf_set (trunc, src);
+  mpf_set (ceil, src);
+  mpf_set (floor, src);
+  check_all (src, trunc, ceil, floor);
+
+  /* full prec+1 limbs, 1 trimmed for integer */
+  n = PREC(trunc)+1;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n-1;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+200;
+  EXP(trunc) = n-1;
+  SIZ(trunc) = n-1;
+  for (i = 0; i < SIZ(trunc); i++)
+    PTR(trunc)[i] = i+201;
+  mpf_set (floor, trunc);
+  mpf_add_ui (ceil, trunc, 1L);
+  check_all (src, trunc, ceil, floor);
+
+  /* prec+3 limbs, 2 trimmed for size */
+  n = PREC(trunc)+3;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+300;
+  EXP(trunc) = n;
+  SIZ(trunc) = n-2;
+  for (i = 0; i < SIZ(trunc); i++)
+    PTR(trunc)[i] = i+302;
+  mpf_set (floor, trunc);
+  mpf_set (ceil, trunc);
+  PTR(ceil)[0]++;
+  check_all (src, trunc, ceil, floor);
+
+  /* prec+4 limbs, 2 trimmed for size, 1 trimmed for integer */
+  n = PREC(trunc)+4;
+  ASSERT_ALWAYS (n <= PREC(src)+1);
+  EXP(src) = n-1;
+  SIZ(src) = n;
+  for (i = 0; i < SIZ(src); i++)
+    PTR(src)[i] = i+400;
+  EXP(trunc) = n-1;
+  SIZ(trunc) = n-3;
+  for (i = 0; i < SIZ(trunc); i++)
+    PTR(trunc)[i] = i+403;
+  mpf_set (floor, trunc);
+  mpf_set (ceil, trunc);
+  PTR(ceil)[0]++;
+  check_all (src, trunc, ceil, floor);
+
+  /* F.F, carry out of ceil */
+  EXP(src) = 1;
+  SIZ(src) = 2;
+  PTR(src)[0] = GMP_NUMB_MAX;
+  PTR(src)[1] = GMP_NUMB_MAX;
+  EXP(trunc) = 1;
+  SIZ(trunc) = 1;
+  PTR(trunc)[0] = GMP_NUMB_MAX;
+  mpf_set (floor, trunc);
+  EXP(ceil) = 2;
+  SIZ(ceil) = 1;
+  PTR(ceil)[0] = 1;
+  check_all (src, trunc, ceil, floor);
+
+  /* FF.F, carry out of ceil */
+  EXP(src) = 2;
+  SIZ(src) = 3;
+  PTR(src)[0] = GMP_NUMB_MAX;
+  PTR(src)[1] = GMP_NUMB_MAX;
+  PTR(src)[2] = GMP_NUMB_MAX;
+  EXP(trunc) = 2;
+  SIZ(trunc) = 2;
+  PTR(trunc)[0] = GMP_NUMB_MAX;
+  PTR(trunc)[1] = GMP_NUMB_MAX;
+  mpf_set (floor, trunc);
+  EXP(ceil) = 3;
+  SIZ(ceil) = 1;
+  PTR(ceil)[0] = 1;
+  check_all (src, trunc, ceil, floor);
+
+  mpf_clear (src);
+  mpf_clear (trunc);
+  mpf_clear (ceil);
+  mpf_clear (floor);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-ui_div.c b/tests/mpf/t-ui_div.c

new file mode 100644 (file)

index 0000000..542ecb1
--- /dev/null
+++ b/tests/mpf/t-ui_div.c
@@ -0,0 +1,152 @@
+/* Test mpf_ui_div.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (const char *desc, mpf_ptr got, unsigned long u, mpf_srcptr v)
+{
+  mpf_t      uf;
+  mp_limb_t  ulimbs[2];
+  mp_size_t  usize;
+
+  ulimbs[0] = u & GMP_NUMB_MASK;
+  usize = (u != 0);
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  u >>= GMP_NUMB_BITS;
+  ulimbs[1] = u;
+  usize += (u != 0);
+#endif
+  PTR(uf) = ulimbs;
+  SIZ(uf) = usize;
+  EXP(uf) = usize;
+
+  if (! refmpf_validate_division ("mpf_ui_div", got, uf, v))
+    {
+      mp_trace_base = -16;
+      printf    ("  u 0x%lX  (%lu)\n", u, u);
+      mpf_trace ("  v", v);
+      printf    ("  %s\n", desc);
+      abort ();
+    }
+}
+
+void
+check_rand (void)
+{
+  unsigned long  min_prec = __GMPF_BITS_TO_PREC (1);
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  prec, u;
+  mpf_t  got, v;
+  int    i;
+
+  mpf_init (got);
+  mpf_init (v);
+
+  for (i = 0; i < 200; i++)
+    {
+      /* got precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (got, prec);
+
+      /* u */
+      prec = gmp_urandomm_ui (rands, BITS_PER_ULONG+1);
+      u = gmp_urandomb_ui (rands, prec);
+
+      /* v precision */
+      prec = min_prec + gmp_urandomm_ui (rands, 15L);
+      refmpf_set_prec_limbs (v, prec);
+
+      /* v, non-zero */
+      do {
+        mpf_random2 (v, PREC(v), (mp_exp_t) 20);
+      } while (SIZ(v) == 0);
+
+      /* v possibly negative */
+      if (gmp_urandomb_ui (rands, 1L))
+        mpf_neg (v, v);
+
+      if ((i % 2) == 0)
+        {
+          /* src != dst */
+          mpf_ui_div (got, u, v);
+          check_one ("separate", got, u, v);
+        }
+      else
+        {
+          /* src == dst */
+          prec = refmpf_set_overlap (got, v);
+          mpf_ui_div (got, u, got);
+          check_one ("overlap src==dst", got, u, v);
+
+          mpf_set_prec_raw (got, prec);
+        }
+    }
+
+  mpf_clear (got);
+  mpf_clear (v);
+}
+
+void
+check_various (void)
+{
+  mpf_t got, v;
+
+  mpf_init (got);
+  mpf_init (v);
+
+  /* 100/4 == 25 */
+  mpf_set_prec (got, 20L);
+  mpf_set_ui (v, 4L);
+  mpf_ui_div (got, 100L, v);
+  MPF_CHECK_FORMAT (got);
+  ASSERT_ALWAYS (mpf_cmp_ui (got, 25L) == 0);
+
+  {
+    /* 1/(2^n+1), a case where truncating the divisor would be wrong */
+    unsigned long  u = 1L;
+    mpf_set_prec (got, 500L);
+    mpf_set_prec (v, 900L);
+    mpf_set_ui (v, 1L);
+    mpf_mul_2exp (v, v, 800L);
+    mpf_add_ui (v, v, 1L);
+    mpf_ui_div (got, u, v);
+    check_one ("1/2^n+1, separate", got, u, v);
+  }
+
+  mpf_clear (got);
+  mpf_clear (v);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_various ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/Makefile.am b/tests/mpn/Makefile.am

new file mode 100644 (file)

index 0000000..e3a43ae
--- /dev/null
+++ b/tests/mpn/Makefile.am
@@ -0,0 +1,36 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002, 2003, 2009, 2010, 2012 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-asmtype t-aors_1 t-divrem_1 t-mod_1 t-fat t-get_d   \
+  t-instrument t-iord_u t-mp_bases t-perfsqr t-scan logic              \
+  t-toom22 t-toom32 t-toom33 t-toom42 t-toom43 t-toom44                        \
+  t-toom52 t-toom53 t-toom62 t-toom63 t-toom6h t-toom8h                        \
+  t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1                            \
+  t-hgcd t-matrix22 t-invert t-div t-bdiv
+
+EXTRA_DIST = toom-shared.h
+
+TESTS = $(check_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/mpn/Makefile.in b/tests/mpn/Makefile.in

new file mode 100644 (file)

index 0000000..dde45b4
--- /dev/null
+++ b/tests/mpn/Makefile.in
@@ -0,0 +1,986 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002, 2003, 2009, 2010, 2012 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = t-asmtype$(EXEEXT) t-aors_1$(EXEEXT) \
+       t-divrem_1$(EXEEXT) t-mod_1$(EXEEXT) t-fat$(EXEEXT) \
+       t-get_d$(EXEEXT) t-instrument$(EXEEXT) t-iord_u$(EXEEXT) \
+       t-mp_bases$(EXEEXT) t-perfsqr$(EXEEXT) t-scan$(EXEEXT) \
+       logic$(EXEEXT) t-toom22$(EXEEXT) t-toom32$(EXEEXT) \
+       t-toom33$(EXEEXT) t-toom42$(EXEEXT) t-toom43$(EXEEXT) \
+       t-toom44$(EXEEXT) t-toom52$(EXEEXT) t-toom53$(EXEEXT) \
+       t-toom62$(EXEEXT) t-toom63$(EXEEXT) t-toom6h$(EXEEXT) \
+       t-toom8h$(EXEEXT) t-mul$(EXEEXT) t-mullo$(EXEEXT) \
+       t-mulmod_bnm1$(EXEEXT) t-sqrmod_bnm1$(EXEEXT) t-hgcd$(EXEEXT) \
+       t-matrix22$(EXEEXT) t-invert$(EXEEXT) t-div$(EXEEXT) \
+       t-bdiv$(EXEEXT)
+subdir = tests/mpn
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+logic_SOURCES = logic.c
+logic_OBJECTS = logic$U.$(OBJEXT)
+logic_LDADD = $(LDADD)
+logic_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_aors_1_SOURCES = t-aors_1.c
+t_aors_1_OBJECTS = t-aors_1$U.$(OBJEXT)
+t_aors_1_LDADD = $(LDADD)
+t_aors_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_asmtype_SOURCES = t-asmtype.c
+t_asmtype_OBJECTS = t-asmtype$U.$(OBJEXT)
+t_asmtype_LDADD = $(LDADD)
+t_asmtype_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_bdiv_SOURCES = t-bdiv.c
+t_bdiv_OBJECTS = t-bdiv$U.$(OBJEXT)
+t_bdiv_LDADD = $(LDADD)
+t_bdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_div_SOURCES = t-div.c
+t_div_OBJECTS = t-div$U.$(OBJEXT)
+t_div_LDADD = $(LDADD)
+t_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_divrem_1_SOURCES = t-divrem_1.c
+t_divrem_1_OBJECTS = t-divrem_1$U.$(OBJEXT)
+t_divrem_1_LDADD = $(LDADD)
+t_divrem_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fat_SOURCES = t-fat.c
+t_fat_OBJECTS = t-fat$U.$(OBJEXT)
+t_fat_LDADD = $(LDADD)
+t_fat_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_d_SOURCES = t-get_d.c
+t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_LDADD = $(LDADD)
+t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_hgcd_SOURCES = t-hgcd.c
+t_hgcd_OBJECTS = t-hgcd$U.$(OBJEXT)
+t_hgcd_LDADD = $(LDADD)
+t_hgcd_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_instrument_SOURCES = t-instrument.c
+t_instrument_OBJECTS = t-instrument$U.$(OBJEXT)
+t_instrument_LDADD = $(LDADD)
+t_instrument_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_invert_SOURCES = t-invert.c
+t_invert_OBJECTS = t-invert$U.$(OBJEXT)
+t_invert_LDADD = $(LDADD)
+t_invert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_iord_u_SOURCES = t-iord_u.c
+t_iord_u_OBJECTS = t-iord_u$U.$(OBJEXT)
+t_iord_u_LDADD = $(LDADD)
+t_iord_u_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_matrix22_SOURCES = t-matrix22.c
+t_matrix22_OBJECTS = t-matrix22$U.$(OBJEXT)
+t_matrix22_LDADD = $(LDADD)
+t_matrix22_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mod_1_SOURCES = t-mod_1.c
+t_mod_1_OBJECTS = t-mod_1$U.$(OBJEXT)
+t_mod_1_LDADD = $(LDADD)
+t_mod_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mp_bases_SOURCES = t-mp_bases.c
+t_mp_bases_OBJECTS = t-mp_bases$U.$(OBJEXT)
+t_mp_bases_LDADD = $(LDADD)
+t_mp_bases_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mul_SOURCES = t-mul.c
+t_mul_OBJECTS = t-mul$U.$(OBJEXT)
+t_mul_LDADD = $(LDADD)
+t_mul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mullo_SOURCES = t-mullo.c
+t_mullo_OBJECTS = t-mullo$U.$(OBJEXT)
+t_mullo_LDADD = $(LDADD)
+t_mullo_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mulmod_bnm1_SOURCES = t-mulmod_bnm1.c
+t_mulmod_bnm1_OBJECTS = t-mulmod_bnm1$U.$(OBJEXT)
+t_mulmod_bnm1_LDADD = $(LDADD)
+t_mulmod_bnm1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_perfsqr_SOURCES = t-perfsqr.c
+t_perfsqr_OBJECTS = t-perfsqr$U.$(OBJEXT)
+t_perfsqr_LDADD = $(LDADD)
+t_perfsqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_scan_SOURCES = t-scan.c
+t_scan_OBJECTS = t-scan$U.$(OBJEXT)
+t_scan_LDADD = $(LDADD)
+t_scan_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_sqrmod_bnm1_SOURCES = t-sqrmod_bnm1.c
+t_sqrmod_bnm1_OBJECTS = t-sqrmod_bnm1$U.$(OBJEXT)
+t_sqrmod_bnm1_LDADD = $(LDADD)
+t_sqrmod_bnm1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom22_SOURCES = t-toom22.c
+t_toom22_OBJECTS = t-toom22$U.$(OBJEXT)
+t_toom22_LDADD = $(LDADD)
+t_toom22_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom32_SOURCES = t-toom32.c
+t_toom32_OBJECTS = t-toom32$U.$(OBJEXT)
+t_toom32_LDADD = $(LDADD)
+t_toom32_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom33_SOURCES = t-toom33.c
+t_toom33_OBJECTS = t-toom33$U.$(OBJEXT)
+t_toom33_LDADD = $(LDADD)
+t_toom33_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom42_SOURCES = t-toom42.c
+t_toom42_OBJECTS = t-toom42$U.$(OBJEXT)
+t_toom42_LDADD = $(LDADD)
+t_toom42_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom43_SOURCES = t-toom43.c
+t_toom43_OBJECTS = t-toom43$U.$(OBJEXT)
+t_toom43_LDADD = $(LDADD)
+t_toom43_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom44_SOURCES = t-toom44.c
+t_toom44_OBJECTS = t-toom44$U.$(OBJEXT)
+t_toom44_LDADD = $(LDADD)
+t_toom44_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom52_SOURCES = t-toom52.c
+t_toom52_OBJECTS = t-toom52$U.$(OBJEXT)
+t_toom52_LDADD = $(LDADD)
+t_toom52_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom53_SOURCES = t-toom53.c
+t_toom53_OBJECTS = t-toom53$U.$(OBJEXT)
+t_toom53_LDADD = $(LDADD)
+t_toom53_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom62_SOURCES = t-toom62.c
+t_toom62_OBJECTS = t-toom62$U.$(OBJEXT)
+t_toom62_LDADD = $(LDADD)
+t_toom62_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom63_SOURCES = t-toom63.c
+t_toom63_OBJECTS = t-toom63$U.$(OBJEXT)
+t_toom63_LDADD = $(LDADD)
+t_toom63_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom6h_SOURCES = t-toom6h.c
+t_toom6h_OBJECTS = t-toom6h$U.$(OBJEXT)
+t_toom6h_LDADD = $(LDADD)
+t_toom6h_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom8h_SOURCES = t-toom8h.c
+t_toom8h_OBJECTS = t-toom8h$U.$(OBJEXT)
+t_toom8h_LDADD = $(LDADD)
+t_toom8h_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-div.c t-divrem_1.c \
+       t-fat.c t-get_d.c t-hgcd.c t-instrument.c t-invert.c \
+       t-iord_u.c t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c \
+       t-mullo.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c t-sqrmod_bnm1.c \
+       t-toom22.c t-toom32.c t-toom33.c t-toom42.c t-toom43.c \
+       t-toom44.c t-toom52.c t-toom53.c t-toom62.c t-toom63.c \
+       t-toom6h.c t-toom8h.c
+DIST_SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-div.c \
+       t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c t-instrument.c \
+       t-invert.c t-iord_u.c t-matrix22.c t-mod_1.c t-mp_bases.c \
+       t-mul.c t-mullo.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
+       t-sqrmod_bnm1.c t-toom22.c t-toom32.c t-toom33.c t-toom42.c \
+       t-toom43.c t-toom44.c t-toom52.c t-toom53.c t-toom62.c \
+       t-toom63.c t-toom6h.c t-toom8h.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+EXTRA_DIST = toom-shared.h
+TESTS = $(check_PROGRAMS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpn/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/mpn/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) 
+       @rm -f logic$(EXEEXT)
+       $(LINK) $(logic_OBJECTS) $(logic_LDADD) $(LIBS)
+t-aors_1$(EXEEXT): $(t_aors_1_OBJECTS) $(t_aors_1_DEPENDENCIES) 
+       @rm -f t-aors_1$(EXEEXT)
+       $(LINK) $(t_aors_1_OBJECTS) $(t_aors_1_LDADD) $(LIBS)
+t-asmtype$(EXEEXT): $(t_asmtype_OBJECTS) $(t_asmtype_DEPENDENCIES) 
+       @rm -f t-asmtype$(EXEEXT)
+       $(LINK) $(t_asmtype_OBJECTS) $(t_asmtype_LDADD) $(LIBS)
+t-bdiv$(EXEEXT): $(t_bdiv_OBJECTS) $(t_bdiv_DEPENDENCIES) 
+       @rm -f t-bdiv$(EXEEXT)
+       $(LINK) $(t_bdiv_OBJECTS) $(t_bdiv_LDADD) $(LIBS)
+t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) 
+       @rm -f t-div$(EXEEXT)
+       $(LINK) $(t_div_OBJECTS) $(t_div_LDADD) $(LIBS)
+t-divrem_1$(EXEEXT): $(t_divrem_1_OBJECTS) $(t_divrem_1_DEPENDENCIES) 
+       @rm -f t-divrem_1$(EXEEXT)
+       $(LINK) $(t_divrem_1_OBJECTS) $(t_divrem_1_LDADD) $(LIBS)
+t-fat$(EXEEXT): $(t_fat_OBJECTS) $(t_fat_DEPENDENCIES) 
+       @rm -f t-fat$(EXEEXT)
+       $(LINK) $(t_fat_OBJECTS) $(t_fat_LDADD) $(LIBS)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+       @rm -f t-get_d$(EXEEXT)
+       $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
+t-hgcd$(EXEEXT): $(t_hgcd_OBJECTS) $(t_hgcd_DEPENDENCIES) 
+       @rm -f t-hgcd$(EXEEXT)
+       $(LINK) $(t_hgcd_OBJECTS) $(t_hgcd_LDADD) $(LIBS)
+t-instrument$(EXEEXT): $(t_instrument_OBJECTS) $(t_instrument_DEPENDENCIES) 
+       @rm -f t-instrument$(EXEEXT)
+       $(LINK) $(t_instrument_OBJECTS) $(t_instrument_LDADD) $(LIBS)
+t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) 
+       @rm -f t-invert$(EXEEXT)
+       $(LINK) $(t_invert_OBJECTS) $(t_invert_LDADD) $(LIBS)
+t-iord_u$(EXEEXT): $(t_iord_u_OBJECTS) $(t_iord_u_DEPENDENCIES) 
+       @rm -f t-iord_u$(EXEEXT)
+       $(LINK) $(t_iord_u_OBJECTS) $(t_iord_u_LDADD) $(LIBS)
+t-matrix22$(EXEEXT): $(t_matrix22_OBJECTS) $(t_matrix22_DEPENDENCIES) 
+       @rm -f t-matrix22$(EXEEXT)
+       $(LINK) $(t_matrix22_OBJECTS) $(t_matrix22_LDADD) $(LIBS)
+t-mod_1$(EXEEXT): $(t_mod_1_OBJECTS) $(t_mod_1_DEPENDENCIES) 
+       @rm -f t-mod_1$(EXEEXT)
+       $(LINK) $(t_mod_1_OBJECTS) $(t_mod_1_LDADD) $(LIBS)
+t-mp_bases$(EXEEXT): $(t_mp_bases_OBJECTS) $(t_mp_bases_DEPENDENCIES) 
+       @rm -f t-mp_bases$(EXEEXT)
+       $(LINK) $(t_mp_bases_OBJECTS) $(t_mp_bases_LDADD) $(LIBS)
+t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) 
+       @rm -f t-mul$(EXEEXT)
+       $(LINK) $(t_mul_OBJECTS) $(t_mul_LDADD) $(LIBS)
+t-mullo$(EXEEXT): $(t_mullo_OBJECTS) $(t_mullo_DEPENDENCIES) 
+       @rm -f t-mullo$(EXEEXT)
+       $(LINK) $(t_mullo_OBJECTS) $(t_mullo_LDADD) $(LIBS)
+t-mulmod_bnm1$(EXEEXT): $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_DEPENDENCIES) 
+       @rm -f t-mulmod_bnm1$(EXEEXT)
+       $(LINK) $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_LDADD) $(LIBS)
+t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) 
+       @rm -f t-perfsqr$(EXEEXT)
+       $(LINK) $(t_perfsqr_OBJECTS) $(t_perfsqr_LDADD) $(LIBS)
+t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) 
+       @rm -f t-scan$(EXEEXT)
+       $(LINK) $(t_scan_OBJECTS) $(t_scan_LDADD) $(LIBS)
+t-sqrmod_bnm1$(EXEEXT): $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_DEPENDENCIES) 
+       @rm -f t-sqrmod_bnm1$(EXEEXT)
+       $(LINK) $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_LDADD) $(LIBS)
+t-toom22$(EXEEXT): $(t_toom22_OBJECTS) $(t_toom22_DEPENDENCIES) 
+       @rm -f t-toom22$(EXEEXT)
+       $(LINK) $(t_toom22_OBJECTS) $(t_toom22_LDADD) $(LIBS)
+t-toom32$(EXEEXT): $(t_toom32_OBJECTS) $(t_toom32_DEPENDENCIES) 
+       @rm -f t-toom32$(EXEEXT)
+       $(LINK) $(t_toom32_OBJECTS) $(t_toom32_LDADD) $(LIBS)
+t-toom33$(EXEEXT): $(t_toom33_OBJECTS) $(t_toom33_DEPENDENCIES) 
+       @rm -f t-toom33$(EXEEXT)
+       $(LINK) $(t_toom33_OBJECTS) $(t_toom33_LDADD) $(LIBS)
+t-toom42$(EXEEXT): $(t_toom42_OBJECTS) $(t_toom42_DEPENDENCIES) 
+       @rm -f t-toom42$(EXEEXT)
+       $(LINK) $(t_toom42_OBJECTS) $(t_toom42_LDADD) $(LIBS)
+t-toom43$(EXEEXT): $(t_toom43_OBJECTS) $(t_toom43_DEPENDENCIES) 
+       @rm -f t-toom43$(EXEEXT)
+       $(LINK) $(t_toom43_OBJECTS) $(t_toom43_LDADD) $(LIBS)
+t-toom44$(EXEEXT): $(t_toom44_OBJECTS) $(t_toom44_DEPENDENCIES) 
+       @rm -f t-toom44$(EXEEXT)
+       $(LINK) $(t_toom44_OBJECTS) $(t_toom44_LDADD) $(LIBS)
+t-toom52$(EXEEXT): $(t_toom52_OBJECTS) $(t_toom52_DEPENDENCIES) 
+       @rm -f t-toom52$(EXEEXT)
+       $(LINK) $(t_toom52_OBJECTS) $(t_toom52_LDADD) $(LIBS)
+t-toom53$(EXEEXT): $(t_toom53_OBJECTS) $(t_toom53_DEPENDENCIES) 
+       @rm -f t-toom53$(EXEEXT)
+       $(LINK) $(t_toom53_OBJECTS) $(t_toom53_LDADD) $(LIBS)
+t-toom62$(EXEEXT): $(t_toom62_OBJECTS) $(t_toom62_DEPENDENCIES) 
+       @rm -f t-toom62$(EXEEXT)
+       $(LINK) $(t_toom62_OBJECTS) $(t_toom62_LDADD) $(LIBS)
+t-toom63$(EXEEXT): $(t_toom63_OBJECTS) $(t_toom63_DEPENDENCIES) 
+       @rm -f t-toom63$(EXEEXT)
+       $(LINK) $(t_toom63_OBJECTS) $(t_toom63_LDADD) $(LIBS)
+t-toom6h$(EXEEXT): $(t_toom6h_OBJECTS) $(t_toom6h_DEPENDENCIES) 
+       @rm -f t-toom6h$(EXEEXT)
+       $(LINK) $(t_toom6h_OBJECTS) $(t_toom6h_LDADD) $(LIBS)
+t-toom8h$(EXEEXT): $(t_toom8h_OBJECTS) $(t_toom8h_DEPENDENCIES) 
+       @rm -f t-toom8h$(EXEEXT)
+       $(LINK) $(t_toom8h_OBJECTS) $(t_toom8h_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+logic_.c: logic.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logic.c; then echo $(srcdir)/logic.c; else echo logic.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-aors_1_.c: t-aors_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aors_1.c; then echo $(srcdir)/t-aors_1.c; else echo t-aors_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-asmtype_.c: t-asmtype.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-asmtype.c; then echo $(srcdir)/t-asmtype.c; else echo t-asmtype.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-bdiv_.c: t-bdiv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bdiv.c; then echo $(srcdir)/t-bdiv.c; else echo t-bdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-div_.c: t-div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div.c; then echo $(srcdir)/t-div.c; else echo t-div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-divrem_1_.c: t-divrem_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divrem_1.c; then echo $(srcdir)/t-divrem_1.c; else echo t-divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fat_.c: t-fat.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fat.c; then echo $(srcdir)/t-fat.c; else echo t-fat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_d_.c: t-get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-hgcd_.c: t-hgcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hgcd.c; then echo $(srcdir)/t-hgcd.c; else echo t-hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-instrument_.c: t-instrument.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-instrument.c; then echo $(srcdir)/t-instrument.c; else echo t-instrument.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-invert_.c: t-invert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-invert.c; then echo $(srcdir)/t-invert.c; else echo t-invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-iord_u_.c: t-iord_u.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-iord_u.c; then echo $(srcdir)/t-iord_u.c; else echo t-iord_u.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-matrix22_.c: t-matrix22.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-matrix22.c; then echo $(srcdir)/t-matrix22.c; else echo t-matrix22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mod_1_.c: t-mod_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mod_1.c; then echo $(srcdir)/t-mod_1.c; else echo t-mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mp_bases_.c: t-mp_bases.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mp_bases.c; then echo $(srcdir)/t-mp_bases.c; else echo t-mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mul_.c: t-mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul.c; then echo $(srcdir)/t-mul.c; else echo t-mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mullo_.c: t-mullo.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mullo.c; then echo $(srcdir)/t-mullo.c; else echo t-mullo.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mulmod_bnm1_.c: t-mulmod_bnm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mulmod_bnm1.c; then echo $(srcdir)/t-mulmod_bnm1.c; else echo t-mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-perfsqr_.c: t-perfsqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfsqr.c; then echo $(srcdir)/t-perfsqr.c; else echo t-perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-scan_.c: t-scan.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scan.c; then echo $(srcdir)/t-scan.c; else echo t-scan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sqrmod_bnm1_.c: t-sqrmod_bnm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrmod_bnm1.c; then echo $(srcdir)/t-sqrmod_bnm1.c; else echo t-sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom22_.c: t-toom22.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom22.c; then echo $(srcdir)/t-toom22.c; else echo t-toom22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom32_.c: t-toom32.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom32.c; then echo $(srcdir)/t-toom32.c; else echo t-toom32.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom33_.c: t-toom33.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom33.c; then echo $(srcdir)/t-toom33.c; else echo t-toom33.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom42_.c: t-toom42.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom42.c; then echo $(srcdir)/t-toom42.c; else echo t-toom42.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom43_.c: t-toom43.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom43.c; then echo $(srcdir)/t-toom43.c; else echo t-toom43.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom44_.c: t-toom44.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom44.c; then echo $(srcdir)/t-toom44.c; else echo t-toom44.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom52_.c: t-toom52.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom52.c; then echo $(srcdir)/t-toom52.c; else echo t-toom52.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom53_.c: t-toom53.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom53.c; then echo $(srcdir)/t-toom53.c; else echo t-toom53.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom62_.c: t-toom62.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom62.c; then echo $(srcdir)/t-toom62.c; else echo t-toom62.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom63_.c: t-toom63.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom63.c; then echo $(srcdir)/t-toom63.c; else echo t-toom63.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom6h_.c: t-toom6h.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom6h.c; then echo $(srcdir)/t-toom6h.c; else echo t-toom6h.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-toom8h_.c: t-toom8h.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom8h.c; then echo $(srcdir)/t-toom8h.c; else echo t-toom8h.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+logic_.$(OBJEXT) logic_.lo t-aors_1_.$(OBJEXT) t-aors_1_.lo \
+t-asmtype_.$(OBJEXT) t-asmtype_.lo t-bdiv_.$(OBJEXT) t-bdiv_.lo \
+t-div_.$(OBJEXT) t-div_.lo t-divrem_1_.$(OBJEXT) t-divrem_1_.lo \
+t-fat_.$(OBJEXT) t-fat_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
+t-hgcd_.$(OBJEXT) t-hgcd_.lo t-instrument_.$(OBJEXT) t-instrument_.lo \
+t-invert_.$(OBJEXT) t-invert_.lo t-iord_u_.$(OBJEXT) t-iord_u_.lo \
+t-matrix22_.$(OBJEXT) t-matrix22_.lo t-mod_1_.$(OBJEXT) t-mod_1_.lo \
+t-mp_bases_.$(OBJEXT) t-mp_bases_.lo t-mul_.$(OBJEXT) t-mul_.lo \
+t-mullo_.$(OBJEXT) t-mullo_.lo t-mulmod_bnm1_.$(OBJEXT) \
+t-mulmod_bnm1_.lo t-perfsqr_.$(OBJEXT) t-perfsqr_.lo t-scan_.$(OBJEXT) \
+t-scan_.lo t-sqrmod_bnm1_.$(OBJEXT) t-sqrmod_bnm1_.lo \
+t-toom22_.$(OBJEXT) t-toom22_.lo t-toom32_.$(OBJEXT) t-toom32_.lo \
+t-toom33_.$(OBJEXT) t-toom33_.lo t-toom42_.$(OBJEXT) t-toom42_.lo \
+t-toom43_.$(OBJEXT) t-toom43_.lo t-toom44_.$(OBJEXT) t-toom44_.lo \
+t-toom52_.$(OBJEXT) t-toom52_.lo t-toom53_.$(OBJEXT) t-toom53_.lo \
+t-toom62_.$(OBJEXT) t-toom62_.lo t-toom63_.$(OBJEXT) t-toom63_.lo \
+t-toom6h_.$(OBJEXT) t-toom6h_.lo t-toom8h_.$(OBJEXT) t-toom8h_.lo : \
+$(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/mpn/logic.c b/tests/mpn/logic.c

new file mode 100644 (file)

index 0000000..f6bfe7f
--- /dev/null
+++ b/tests/mpn/logic.c
@@ -0,0 +1,109 @@
+/* Test mpn_and, mpn_ior, mpn_xor, mpn_andn, mpn_iorn, mpn_xnor, mpn_nand, and
+   mpn_nior.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_one (mp_srcptr refp, mp_srcptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, char *funcname)
+{
+  if (mpn_cmp (refp, rp, n))
+    {
+      printf ("ERROR in mpn_%s_n\n", funcname);
+      printf ("a: "); mpn_dump (ap, n);
+      printf ("b: "); mpn_dump (bp, n);
+      printf ("r:   "); mpn_dump (rp, n);
+      printf ("ref: "); mpn_dump (refp, n);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, rp, refp;
+  mp_size_t max_n, n;
+  gmp_randstate_ptr rands;
+  long test, reps = 1000;
+  TMP_SDECL;
+  TMP_SMARK;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  max_n = 32;
+
+  ap = TMP_SALLOC_LIMBS (max_n);
+  bp = TMP_SALLOC_LIMBS (max_n);
+  rp = TMP_SALLOC_LIMBS (max_n);
+  refp = TMP_SALLOC_LIMBS (max_n);
+
+  for (test = 0; test < reps; test++)
+    {
+      for (n = 1; n <= max_n; n++)
+       {
+         mpn_random2 (ap, n);
+         mpn_random2 (bp, n);
+
+         refmpn_and_n (refp, ap, bp, n);
+         mpn_and_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "and");
+
+         refmpn_ior_n (refp, ap, bp, n);
+         mpn_ior_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "ior");
+
+         refmpn_xor_n (refp, ap, bp, n);
+         mpn_xor_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "xor");
+
+         refmpn_andn_n (refp, ap, bp, n);
+         mpn_andn_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "andn");
+
+         refmpn_iorn_n (refp, ap, bp, n);
+         mpn_iorn_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "iorn");
+
+         refmpn_nand_n (refp, ap, bp, n);
+         mpn_nand_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "nand");
+
+         refmpn_nior_n (refp, ap, bp, n);
+         mpn_nior_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "nior");
+
+         refmpn_xnor_n (refp, ap, bp, n);
+         mpn_xnor_n (rp, ap, bp, n);
+         check_one (refp, rp, ap, bp, n, "xnor");
+       }
+    }
+
+  TMP_SFREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-aors_1.c b/tests/mpn/t-aors_1.c

new file mode 100644 (file)

index 0000000..a1878bb
--- /dev/null
+++ b/tests/mpn/t-aors_1.c
@@ -0,0 +1,268 @@
+/* Test mpn_add_1 and mpn_sub_1.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define M      GMP_NUMB_MAX
+#define ASIZE  10
+#define MAGIC  0x1234
+
+#define SETUP()                         \
+  do {                                  \
+    refmpn_random (got, data[i].size);  \
+    got[data[i].size] = MAGIC;          \
+  } while (0)
+
+#define SETUP_INPLACE()                                 \
+  do {                                                  \
+    refmpn_copyi (got, data[i].src, data[i].size);      \
+    got[data[i].size] = MAGIC;                          \
+  } while (0)
+
+#define VERIFY(name)                            \
+  do {                                          \
+    verify (name, i, data[i].src, data[i].n,    \
+            got_c, data[i].want_c,              \
+            got, data[i].want, data[i].size);   \
+  } while (0)
+
+typedef mp_limb_t (*mpn_aors_1_t)
+     __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+mpn_aors_1_t fudge __GMP_PROTO ((mpn_aors_1_t));
+
+
+void
+verify (const char *name, int i,
+        mp_srcptr src, mp_limb_t n,
+        mp_limb_t got_c, mp_limb_t want_c,
+        mp_srcptr got, mp_srcptr want, mp_size_t size)
+{
+  if (got[size] != MAGIC)
+    {
+      printf ("Overwrite at %s i=%d\n", name, i);
+      abort ();
+    }
+
+  if (got_c != want_c || ! refmpn_equal_anynail (got, want, size))
+    {
+      printf ("Wrong at %s i=%d size=%ld\n", name, i, size);
+      mpn_trace ("   src", src,  size);
+      mpn_trace ("     n", &n,   (mp_size_t) 1);
+      mpn_trace ("   got", got,  size);
+      mpn_trace ("  want", want, size);
+      mpn_trace (" got c", &got_c,  (mp_size_t) 1);
+      mpn_trace ("want c", &want_c, (mp_size_t) 1);
+      abort ();
+    }
+}
+
+
+void
+check_add_1 (void)
+{
+  static const struct {
+    mp_size_t        size;
+    mp_limb_t        n;
+    const mp_limb_t  src[ASIZE];
+    mp_limb_t        want_c;
+    const mp_limb_t  want[ASIZE];
+  } data[] = {
+    { 1, 0, { 0 },  0, { 0 } },
+    { 1, 0, { 1 },  0, { 1 } },
+    { 1, 1, { 0 },  0, { 1 } },
+    { 1, 0, { M },  0, { M } },
+    { 1, M, { 0 },  0, { M } },
+    { 1, 1, { 123 }, 0, { 124 } },
+
+    { 1, 1, { M },  1, { 0 } },
+    { 1, M, { 1 },  1, { 0 } },
+    { 1, M, { M },  1, { M-1 } },
+
+    { 2, 0, { 0, 0 },  0, { 0, 0 } },
+    { 2, 0, { 1, 0 },  0, { 1, 0 } },
+    { 2, 1, { 0, 0 },  0, { 1, 0 } },
+    { 2, 0, { M, 0 },  0, { M, 0 } },
+    { 2, M, { 0, 0 },  0, { M, 0 } },
+    { 2, 1, { M, 0 },  0, { 0, 1 } },
+    { 2, M, { 1, 0 },  0, { 0, 1 } },
+    { 2, M, { M, 0 },  0, { M-1, 1 } },
+    { 2, M, { M, 0 },  0, { M-1, 1 } },
+
+    { 2, 1, { M, M },  1, { 0, 0 } },
+    { 2, M, { 1, M },  1, { 0, 0 } },
+    { 2, M, { M, M },  1, { M-1, 0 } },
+    { 2, M, { M, M },  1, { M-1, 0 } },
+
+    { 3, 1, { M, M, M },  1, { 0, 0, 0 } },
+    { 3, M, { 1, M, M },  1, { 0, 0, 0 } },
+    { 3, M, { M, M, M },  1, { M-1, 0, 0 } },
+    { 3, M, { M, M, M },  1, { M-1, 0, 0 } },
+
+    { 4, 1, { M, M, M, M },  1, { 0, 0, 0, 0 } },
+    { 4, M, { 1, M, M, M },  1, { 0, 0, 0, 0 } },
+    { 4, M, { M, M, M, M },  1, { M-1, 0, 0, 0 } },
+    { 4, M, { M, M, M, M },  1, { M-1, 0, 0, 0 } },
+
+    { 4, M, { M, 0,   M, M },  0, { M-1, 1, M, M } },
+    { 4, M, { M, M-1, M, M },  0, { M-1, M, M, M } },
+
+    { 4, M, { M, M, 0,   M },  0, { M-1, 0, 1, M } },
+    { 4, M, { M, M, M-1, M },  0, { M-1, 0, M, M } },
+  };
+
+  mp_limb_t  got[ASIZE];
+  mp_limb_t  got_c;
+  int        i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      SETUP ();
+      got_c = mpn_add_1 (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (separate)");
+
+      SETUP_INPLACE ();
+      got_c = mpn_add_1 (got, got, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (in-place)");
+
+      if (data[i].n == 1)
+        {
+          SETUP ();
+          got_c = mpn_add_1 (got, data[i].src, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_add_1 (separate, const 1)");
+
+          SETUP_INPLACE ();
+          got_c = mpn_add_1 (got, got, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_add_1 (in-place, const 1)");
+        }
+
+      /* Same again on functions, not inlines. */
+      SETUP ();
+      got_c = (*fudge(mpn_add_1)) (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (function, separate)");
+
+      SETUP_INPLACE ();
+      got_c = (*fudge(mpn_add_1)) (got, got, data[i].size, data[i].n);
+      VERIFY ("check_add_1 (function, in-place)");
+    }
+}
+
+void
+check_sub_1 (void)
+{
+  static const struct {
+    mp_size_t        size;
+    mp_limb_t        n;
+    const mp_limb_t  src[ASIZE];
+    mp_limb_t        want_c;
+    const mp_limb_t  want[ASIZE];
+  } data[] = {
+    { 1, 0, { 0 },  0, { 0 } },
+    { 1, 0, { 1 },  0, { 1 } },
+    { 1, 1, { 1 },  0, { 0 } },
+    { 1, 0, { M },  0, { M } },
+    { 1, 1, { M },  0, { M-1 } },
+    { 1, 1, { 123 }, 0, { 122 } },
+
+    { 1, 1, { 0 },  1, { M } },
+    { 1, M, { 0 },  1, { 1 } },
+
+    { 2, 0, { 0, 0 },  0, { 0, 0 } },
+    { 2, 0, { 1, 0 },  0, { 1, 0 } },
+    { 2, 1, { 1, 0 },  0, { 0, 0 } },
+    { 2, 0, { M, 0 },  0, { M, 0 } },
+    { 2, 1, { M, 0 },  0, { M-1, 0 } },
+    { 2, 1, { 123, 0 }, 0, { 122, 0 } },
+
+    { 2, 1, { 0, 0 },  1, { M, M } },
+    { 2, M, { 0, 0 },  1, { 1, M } },
+
+    { 3, 0, { 0,   0, 0 },  0, { 0,   0, 0 } },
+    { 3, 0, { 123, 0, 0 },  0, { 123, 0, 0 } },
+
+    { 3, 1, { 0, 0, 0 },  1, { M, M, M } },
+    { 3, M, { 0, 0, 0 },  1, { 1, M, M } },
+
+    { 4, 1, { 0, 0, 0, 0 },  1, { M, M, M, M } },
+    { 4, M, { 0, 0, 0, 0 },  1, { 1, M, M, M } },
+
+    { 4, 1, { 0, 0, 1,   42 },  0, { M, M, 0,   42 } },
+    { 4, M, { 0, 0, 123, 24 },  0, { 1, M, 122, 24 } },
+  };
+
+  mp_limb_t  got[ASIZE];
+  mp_limb_t  got_c;
+  int        i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      SETUP ();
+      got_c = mpn_sub_1 (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (separate)");
+
+      SETUP_INPLACE ();
+      got_c = mpn_sub_1 (got, got, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (in-place)");
+
+      if (data[i].n == 1)
+        {
+          SETUP ();
+          got_c = mpn_sub_1 (got, data[i].src, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_sub_1 (separate, const 1)");
+
+          SETUP_INPLACE ();
+          got_c = mpn_sub_1 (got, got, data[i].size, CNST_LIMB(1));
+          VERIFY ("check_sub_1 (in-place, const 1)");
+        }
+
+      /* Same again on functions, not inlines. */
+      SETUP ();
+      got_c = (*fudge(mpn_sub_1)) (got, data[i].src, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (function, separate)");
+
+      SETUP_INPLACE ();
+      got_c = (*fudge(mpn_sub_1)) (got, got, data[i].size, data[i].n);
+      VERIFY ("check_sub_1 (function, in-place)");
+    }
+}
+
+/* Try to prevent the optimizer inlining. */
+mpn_aors_1_t
+fudge (mpn_aors_1_t f)
+{
+  return f;
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_add_1 ();
+  check_sub_1 ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-asmtype.c b/tests/mpn/t-asmtype.c

new file mode 100644 (file)

index 0000000..4ee5a7a
--- /dev/null
+++ b/tests/mpn/t-asmtype.c
@@ -0,0 +1,64 @@
+/* Test .type directives on assembler functions.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+/* This apparently trivial test is designed to detect missing .type and
+   .size directives in asm code, per the problem described under
+   GMP_ASM_TYPE in acinclude.m4.
+
+   A failure can be provoked in a shared or shared+static build by making
+   TYPE and SIZE in config.m4 empty, either by editing it or by configuring
+   with
+
+       ./configure gmp_cv_asm_type= gmp_cv_asm_size=
+
+   mpn_add_n is used for the test because normally it's implemented in
+   assembler on a CPU that has any asm code.
+
+   Enhancement: As noted with GMP_ASM_TYPE, if .type is wrong but .size is
+   right then everything works, but uses code copied down to the mainline
+   data area.  Maybe we could detect that if we built a test library with an
+   object that had .size deliberately disabled.  */
+
+int
+main (void)
+{
+  static const mp_limb_t x[3]    = { 1, 2, 3 };
+  static const mp_limb_t y[3]    = { 4, 5, 6 };
+  static const mp_limb_t want[3] = { 5, 7, 9 };
+  mp_limb_t  got[3];
+
+  mpn_add_n (got, x, y, (mp_size_t) 3);
+
+  if (refmpn_cmp (got, want, (mp_size_t) 3) != 0)
+    {
+      printf ("Wrong result from mpn_add_n\n");
+      abort ();
+    }
+
+  exit (0);
+}
diff --git a/tests/mpn/t-bdiv.c b/tests/mpn/t-bdiv.c

new file mode 100644 (file)

index 0000000..4d56bfb
--- /dev/null
+++ b/tests/mpn/t-bdiv.c
@@ -0,0 +1,344 @@
+/* Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>            /* for strtol */
+#include <stdio.h>             /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+
+static void
+dumpy (mp_srcptr p, mp_size_t n)
+{
+  mp_size_t i;
+  if (n > 20)
+    {
+      for (i = n - 1; i >= n - 4; i--)
+       {
+         printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+         printf (" ");
+       }
+      printf ("... ");
+      for (i = 3; i >= 0; i--)
+       {
+         printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+         printf (" " + (i == 0));
+       }
+    }
+  else
+    {
+      for (i = n - 1; i >= 0; i--)
+       {
+         printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+         printf (" " + (i == 0));
+       }
+    }
+  puts ("");
+}
+
+static unsigned long test;
+
+void
+check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh,
+          mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, char *fname)
+{
+  mp_size_t qn;
+  int cmp;
+  mp_ptr tp;
+  mp_limb_t cy = 4711;         /* silence warnings */
+  TMP_DECL;
+
+  qn = nn - dn;
+
+  if (qn == 0)
+    return;
+
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (nn + 1);
+
+  if (dn >= qn)
+    mpn_mul (tp, dp, dn, qp, qn);
+  else
+    mpn_mul (tp, qp, qn, dp, dn);
+
+  if (rp != NULL)
+    {
+      cy = mpn_add_n (tp + qn, tp + qn, rp, dn);
+      cmp = cy != rh || mpn_cmp (tp, np, nn) != 0;
+    }
+  else
+    cmp = mpn_cmp (tp, np, nn - dn) != 0;
+
+  if (cmp != 0)
+    {
+      printf ("\r*******************************************************************************\n");
+      printf ("%s inconsistent in test %lu\n", fname, test);
+      printf ("N=   "); dumpy (np, nn);
+      printf ("D=   "); dumpy (dp, dn);
+      printf ("Q=   "); dumpy (qp, qn);
+      if (rp != NULL)
+       {
+         printf ("R=   "); dumpy (rp, dn);
+         printf ("Rb=  %d, Cy=%d\n", (int) cy, (int) rh);
+       }
+      printf ("T=   "); dumpy (tp, nn);
+      printf ("nn = %ld, dn = %ld, qn = %ld", nn, dn, qn);
+      printf ("\n*******************************************************************************\n");
+      abort ();
+    }
+
+  TMP_FREE;
+}
+
+
+/* These are *bit* sizes. */
+#define SIZE_LOG 16
+#define MAX_DN (1L << SIZE_LOG)
+#define MAX_NN (1L << (SIZE_LOG + 1))
+
+#define COUNT 500
+
+mp_limb_t
+random_word (gmp_randstate_ptr rs)
+{
+  mpz_t x;
+  mp_limb_t r;
+  TMP_DECL;
+  TMP_MARK;
+
+  MPZ_TMP_INIT (x, 2);
+  mpz_urandomb (x, rs, 32);
+  r = mpz_get_ui (x);
+  TMP_FREE;
+  return r;
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  unsigned long maxnbits, maxdbits, nbits, dbits;
+  mpz_t n, d, tz;
+  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
+  mp_ptr np, dp, qp, rp;
+  mp_limb_t rh;
+  mp_limb_t t;
+  mp_limb_t dinv;
+  int count = COUNT;
+  mp_ptr scratch;
+  mp_limb_t ran;
+  mp_size_t alloc, itch;
+  mp_limb_t rran0, rran1, qran0, qran1;
+  TMP_DECL;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+
+  maxdbits = MAX_DN;
+  maxnbits = MAX_NN;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (n);
+  mpz_init (d);
+  mpz_init (tz);
+
+  maxnn = maxnbits / GMP_NUMB_BITS + 1;
+  maxdn = maxdbits / GMP_NUMB_BITS + 1;
+
+  TMP_MARK;
+
+  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+
+  alloc = 1;
+  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
+
+  for (test = 0; test < count;)
+    {
+      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
+      if (maxdbits > nbits)
+       dbits = random_word (rands) % nbits + 1;
+      else
+       dbits = random_word (rands) % maxdbits + 1;
+
+#if RAND_UNIFORM
+#define RANDFUNC mpz_urandomb
+#else
+#define RANDFUNC mpz_rrandomb
+#endif
+
+      do
+       {
+         RANDFUNC (n, rands, nbits);
+         do
+           {
+             RANDFUNC (d, rands, dbits);
+           }
+         while (mpz_sgn (d) == 0);
+
+         np = PTR (n);
+         dp = PTR (d);
+         nn = SIZ (n);
+         dn = SIZ (d);
+       }
+      while (nn < dn);
+
+      dp[0] |= 1;
+
+      mpz_urandomb (tz, rands, 32);
+      t = mpz_get_ui (tz);
+
+      if (t % 17 == 0)
+       dp[0] = GMP_NUMB_MAX;
+
+      switch ((int) t % 16)
+       {
+       case 0:
+         clearn = random_word (rands) % nn;
+         for (i = 0; i <= clearn; i++)
+           np[i] = 0;
+         break;
+       case 1:
+         mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
+         break;
+       case 2:
+         mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
+         break;
+       }
+
+      test++;
+
+      binvert_limb (dinv, dp[0]);
+
+      rran0 = random_word (rands);
+      rran1 = random_word (rands);
+      qran0 = random_word (rands);
+      qran1 = random_word (rands);
+
+      qp[-1] = qran0;
+      qp[nn - dn + 1] = qran1;
+      rp[-1] = rran0;
+
+      ran = random_word (rands);
+
+      if ((double) (nn - dn) * dn < 1e5)
+       {
+         if (nn > dn)
+           {
+             /* Test mpn_sbpi1_bdiv_qr */
+             MPN_ZERO (qp, nn - dn);
+             MPN_ZERO (rp, dn);
+             MPN_COPY (rp, np, nn);
+             rh = mpn_sbpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
+             ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+             ASSERT_ALWAYS (rp[-1] == rran0);
+             check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_sbpi1_bdiv_qr");
+           }
+
+         if (nn > dn)
+           {
+             /* Test mpn_sbpi1_bdiv_q */
+             MPN_COPY (rp, np, nn);
+             MPN_ZERO (qp, nn - dn);
+             mpn_sbpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
+             ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+             ASSERT_ALWAYS (rp[-1] == rran0);
+             check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_sbpi1_bdiv_q");
+           }
+       }
+
+      if (dn >= 4 && nn - dn >= 2)
+       {
+         /* Test mpn_dcpi1_bdiv_qr */
+         MPN_COPY (rp, np, nn);
+         MPN_ZERO (qp, nn - dn);
+         rh = mpn_dcpi1_bdiv_qr (qp, rp, nn, dp, dn, -dinv);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);
+         check_one (qp, rp + nn - dn, rh, np, nn, dp, dn, "mpn_dcpi1_bdiv_qr");
+       }
+
+      if (dn >= 4 && nn - dn >= 2)
+       {
+         /* Test mpn_dcpi1_bdiv_q */
+         MPN_COPY (rp, np, nn);
+         MPN_ZERO (qp, nn - dn);
+         mpn_dcpi1_bdiv_q (qp, rp, nn - dn, dp, MIN(dn,nn-dn), -dinv);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);
+         check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
+       }
+
+      if (nn - dn < 2 || dn < 2)
+       continue;
+
+      /* Test mpn_mu_bdiv_qr */
+      itch = mpn_mu_bdiv_qr_itch (nn, dn);
+      if (itch + 1 > alloc)
+       {
+         scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+         alloc = itch + 1;
+       }
+      scratch[itch] = ran;
+      MPN_ZERO (qp, nn - dn);
+      MPN_ZERO (rp, dn);
+      rp[dn] = rran1;
+      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
+      ASSERT_ALWAYS (ran == scratch[itch]);
+      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+      ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
+      check_one (qp, rp, rh, np, nn, dp, dn, "mpn_mu_bdiv_qr");
+
+      /* Test mpn_mu_bdiv_q */
+      itch = mpn_mu_bdiv_q_itch (nn, dn);
+      if (itch + 1 > alloc)
+       {
+         scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+         alloc = itch + 1;
+       }
+      scratch[itch] = ran;
+      MPN_ZERO (qp, nn - dn + 1);
+      mpn_mu_bdiv_q (qp, np, nn - dn, dp, dn, scratch);
+      ASSERT_ALWAYS (ran == scratch[itch]);
+      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+      check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_mu_bdiv_q");
+    }
+
+  __GMP_FREE_FUNC_LIMBS (scratch, alloc);
+
+  TMP_FREE;
+
+  mpz_clear (n);
+  mpz_clear (d);
+  mpz_clear (tz);
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-div.c b/tests/mpn/t-div.c

new file mode 100644 (file)

index 0000000..5ef0ec6
--- /dev/null
+++ b/tests/mpn/t-div.c
@@ -0,0 +1,443 @@
+/* Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>            /* for strtol */
+#include <stdio.h>             /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+
+static void
+dumpy (mp_srcptr p, mp_size_t n)
+{
+  mp_size_t i;
+  if (n > 20)
+    {
+      for (i = n - 1; i >= n - 4; i--)
+       {
+         printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+         printf (" ");
+       }
+      printf ("... ");
+      for (i = 3; i >= 0; i--)
+       {
+         printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+         printf (" " + (i == 0));
+       }
+    }
+  else
+    {
+      for (i = n - 1; i >= 0; i--)
+       {
+         printf ("%0*lx", (int) (2 * sizeof (mp_limb_t)), p[i]);
+         printf (" " + (i == 0));
+       }
+    }
+  puts ("");
+}
+
+static unsigned long test;
+
+static void
+check_one (mp_ptr qp, mp_srcptr rp,
+          mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
+          char *fname, mp_limb_t q_allowed_err)
+{
+  mp_size_t qn = nn - dn + 1;
+  mp_ptr tp;
+  const char *msg;
+  const char *tvalue;
+  mp_limb_t i;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (nn + 1);
+  if (dn >= qn)
+    refmpn_mul (tp, dp, dn, qp, qn);
+  else
+    refmpn_mul (tp, qp, qn, dp, dn);
+
+  for (i = 0; i < q_allowed_err && (tp[nn] > 0 || mpn_cmp (tp, np, nn) > 0); i++)
+    ASSERT_NOCARRY (refmpn_sub (tp, tp, nn+1, dp, dn));
+
+  if (tp[nn] > 0 || mpn_cmp (tp, np, nn) > 0)
+    {
+      msg = "q too large";
+      tvalue = "Q*D";
+    error:
+      printf ("\r*******************************************************************************\n");
+      printf ("%s failed test %lu: %s\n", fname, test, msg);
+      printf ("N=    "); dumpy (np, nn);
+      printf ("D=    "); dumpy (dp, dn);
+      printf ("Q=    "); dumpy (qp, qn);
+      if (rp)
+       { printf ("R=    "); dumpy (rp, dn); }
+      printf ("%5s=", tvalue); dumpy (tp, nn+1);
+      printf ("nn = %ld, dn = %ld, qn = %ld\n", nn, dn, qn);
+      abort ();
+    }
+
+  ASSERT_NOCARRY (refmpn_sub_n (tp, np, tp, nn));
+  tvalue = "N-Q*D";
+  if (!mpn_zero_p (tp + dn, nn - dn) || mpn_cmp (tp, dp, dn) >= 0)
+    {
+      msg = "q too small";
+      goto error;
+    }
+
+  if (rp && mpn_cmp (rp, tp, dn) != 0)
+    {
+      msg = "r incorrect";
+      goto error;
+    }
+
+  TMP_FREE;
+}
+
+
+/* These are *bit* sizes. */
+#ifndef SIZE_LOG
+#define SIZE_LOG 17
+#endif
+#define MAX_DN (1L << SIZE_LOG)
+#define MAX_NN (1L << (SIZE_LOG + 1))
+
+#define COUNT 200
+
+mp_limb_t
+random_word (gmp_randstate_ptr rs)
+{
+  mpz_t x;
+  mp_limb_t r;
+  TMP_DECL;
+  TMP_MARK;
+
+  MPZ_TMP_INIT (x, 2);
+  mpz_urandomb (x, rs, 32);
+  r = mpz_get_ui (x);
+  TMP_FREE;
+  return r;
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  unsigned long maxnbits, maxdbits, nbits, dbits;
+  mpz_t n, d, q, r, tz;
+  mp_size_t maxnn, maxdn, nn, dn, clearn, i;
+  mp_ptr np, dp, qp, rp;
+  mp_limb_t t;
+  gmp_pi1_t dinv;
+  int count = COUNT;
+  mp_ptr scratch;
+  mp_limb_t ran;
+  mp_size_t alloc, itch;
+  mp_limb_t rran0, rran1, qran0, qran1;
+  TMP_DECL;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+
+  maxdbits = MAX_DN;
+  maxnbits = MAX_NN;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (n);
+  mpz_init (d);
+  mpz_init (q);
+  mpz_init (r);
+  mpz_init (tz);
+
+  maxnn = maxnbits / GMP_NUMB_BITS + 1;
+  maxdn = maxdbits / GMP_NUMB_BITS + 1;
+
+  TMP_MARK;
+
+  qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+  rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+
+  alloc = 1;
+  scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
+
+  for (test = 0; test < count;)
+    {
+      do
+       {
+         nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
+         if (maxdbits > nbits)
+           dbits = random_word (rands) % nbits + 1;
+         else
+           dbits = random_word (rands) % maxdbits + 1;
+       }
+      while (nbits < dbits);
+
+#if RAND_UNIFORM
+#define RANDFUNC mpz_urandomb
+#else
+#define RANDFUNC mpz_rrandomb
+#endif
+
+      do
+       RANDFUNC (d, rands, dbits);
+      while (mpz_sgn (d) == 0);
+      dn = SIZ (d);
+      dp = PTR (d);
+      dp[dn - 1] |= GMP_NUMB_HIGHBIT;
+
+      if (test % 2 == 0)
+       {
+         RANDFUNC (n, rands, nbits);
+         nn = SIZ (n);
+         ASSERT_ALWAYS (nn >= dn);
+       }
+      else
+       {
+         do
+           {
+             RANDFUNC (q, rands, random_word (rands) % (nbits - dbits + 1));
+             RANDFUNC (r, rands, random_word (rands) % mpz_sizeinbase (d, 2));
+             mpz_mul (n, q, d);
+             mpz_add (n, n, r);
+             nn = SIZ (n);
+           }
+         while (nn > maxnn || nn < dn);
+       }
+
+      ASSERT_ALWAYS (nn <= maxnn);
+      ASSERT_ALWAYS (dn <= maxdn);
+
+      np = PTR (n);
+
+      mpz_urandomb (tz, rands, 32);
+      t = mpz_get_ui (tz);
+
+      if (t % 17 == 0)
+       dp[dn - 1] = GMP_NUMB_MAX;
+
+      switch ((int) t % 16)
+       {
+       case 0:
+         clearn = random_word (rands) % nn;
+         for (i = clearn; i < nn; i++)
+           np[i] = 0;
+         break;
+       case 1:
+         mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
+         break;
+       case 2:
+         mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
+         break;
+       }
+
+      test++;
+
+      invert_pi1 (dinv, dp[dn - 1], dp[dn - 2]);
+
+      rran0 = random_word (rands);
+      rran1 = random_word (rands);
+      qran0 = random_word (rands);
+      qran1 = random_word (rands);
+
+      qp[-1] = qran0;
+      qp[nn - dn + 1] = qran1;
+      rp[-1] = rran0;
+
+      ran = random_word (rands);
+
+      if ((double) (nn - dn) * dn < 1e5)
+       {
+         /* Test mpn_sbpi1_div_qr */
+         if (dn > 2)
+           {
+             MPN_COPY (rp, np, nn);
+             if (nn > dn)
+               MPN_ZERO (qp, nn - dn);
+             qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dp, dn, dinv.inv32);
+             check_one (qp, rp, np, nn, dp, dn, "mpn_sbpi1_div_qr", 0);
+           }
+
+         /* Test mpn_sbpi1_divappr_q */
+         if (dn > 2)
+           {
+             MPN_COPY (rp, np, nn);
+             if (nn > dn)
+               MPN_ZERO (qp, nn - dn);
+             qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dp, dn, dinv.inv32);
+             check_one (qp, NULL, np, nn, dp, dn, "mpn_sbpi1_divappr_q", 1);
+           }
+
+         /* Test mpn_sbpi1_div_q */
+         if (dn > 2)
+           {
+             MPN_COPY (rp, np, nn);
+             if (nn > dn)
+               MPN_ZERO (qp, nn - dn);
+             qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dp, dn, dinv.inv32);
+             check_one (qp, NULL, np, nn, dp, dn, "mpn_sbpi1_div_q", 0);
+           }
+       }
+
+      /* Test mpn_dcpi1_div_qr */
+      if (dn >= 6 && nn - dn >= 3)
+       {
+         MPN_COPY (rp, np, nn);
+         if (nn > dn)
+           MPN_ZERO (qp, nn - dn);
+         qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dp, dn, &dinv);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);
+         check_one (qp, rp, np, nn, dp, dn, "mpn_dcpi1_div_qr", 0);
+       }
+
+      /* Test mpn_dcpi1_divappr_q */
+      if (dn >= 6 && nn - dn >= 3)
+       {
+         MPN_COPY (rp, np, nn);
+         if (nn > dn)
+           MPN_ZERO (qp, nn - dn);
+         qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dp, dn, &dinv);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);
+         check_one (qp, NULL, np, nn, dp, dn, "mpn_dcpi1_divappr_q", 1);
+       }
+
+      /* Test mpn_dcpi1_div_q */
+      if (dn >= 6 && nn - dn >= 3)
+       {
+         MPN_COPY (rp, np, nn);
+         if (nn > dn)
+           MPN_ZERO (qp, nn - dn);
+         qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dp, dn, &dinv);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);
+         check_one (qp, NULL, np, nn, dp, dn, "mpn_dcpi1_div_q", 0);
+       }
+
+     /* Test mpn_mu_div_qr */
+      if (nn - dn > 2 && dn >= 2)
+       {
+         itch = mpn_mu_div_qr_itch (nn, dn, 0);
+         if (itch + 1 > alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         MPN_ZERO (qp, nn - dn);
+         MPN_ZERO (rp, dn);
+         rp[dn] = rran1;
+         qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
+         check_one (qp, rp, np, nn, dp, dn, "mpn_mu_div_qr", 0);
+       }
+
+      /* Test mpn_mu_divappr_q */
+      if (nn - dn > 2 && dn >= 2)
+       {
+         itch = mpn_mu_divappr_q_itch (nn, dn, 0);
+         if (itch + 1 > alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         MPN_ZERO (qp, nn - dn);
+         qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dp, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         check_one (qp, NULL, np, nn, dp, dn, "mpn_mu_divappr_q", 4);
+       }
+
+      /* Test mpn_mu_div_q */
+      if (nn - dn > 2 && dn >= 2)
+       {
+         itch = mpn_mu_div_q_itch (nn, dn, 0);
+         if (itch + 1> alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         MPN_ZERO (qp, nn - dn);
+         qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         check_one (qp, NULL, np, nn, dp, dn, "mpn_mu_div_q", 0);
+       }
+
+
+      if (1)
+       {
+         itch = nn + 1;
+         if (itch + 1> alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         mpn_div_q (qp, np, nn, dp, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         check_one (qp, NULL, np, nn, dp, dn, "mpn_div_q", 0);
+       }
+
+      /* Finally, test mpn_div_q without msb set.  */
+      dp[dn - 1] &= ~GMP_NUMB_HIGHBIT;
+      if (dp[dn - 1] == 0)
+       continue;
+
+      itch = nn + 1;
+      if (itch + 1> alloc)
+       {
+         scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+         alloc = itch + 1;
+       }
+      scratch[itch] = ran;
+      mpn_div_q (qp, np, nn, dp, dn, scratch);
+      ASSERT_ALWAYS (ran == scratch[itch]);
+      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+      check_one (qp, NULL, np, nn, dp, dn, "mpn_div_q", 0);
+    }
+
+  __GMP_FREE_FUNC_LIMBS (scratch, alloc);
+
+  TMP_FREE;
+
+  mpz_clear (n);
+  mpz_clear (d);
+  mpz_clear (q);
+  mpz_clear (r);
+  mpz_clear (tz);
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-divrem_1.c b/tests/mpn/t-divrem_1.c

new file mode 100644 (file)

index 0000000..27eba9c
--- /dev/null
+++ b/tests/mpn/t-divrem_1.c
@@ -0,0 +1,119 @@
+/* Test mpn_divrem_1 and mpn_preinv_divrem_1.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    mp_limb_t  n[1];
+    mp_size_t  nsize;
+    mp_limb_t  d;
+    mp_size_t  qxn;
+    mp_limb_t  want_q[5];
+    mp_limb_t  want_r;
+  } data[] = {
+    { { 0 }, 1, 1, 0,
+      { 0 }, 0},
+
+    { { 5 }, 1, 2, 0,
+      { 2 }, 1},
+
+#if GMP_NUMB_BITS == 32
+    { { 0x3C }, 1, 0xF2, 1,
+      { 0x3F789854, 0 }, 0x98 },
+#endif
+
+#if GMP_NUMB_BITS == 64
+    { { 0x3C }, 1, 0xF2, 1,
+      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, 0x0E },
+
+    /* This case exposed some wrong code generated by SGI cc on mips64 irix
+       6.5 with -n32 -O2, in the fractional loop for normalized divisor
+       using udiv_qrnnd_preinv.  A test "x>al" in one of the sub_ddmmss
+       expansions came out wrong, leading to an incorrect quotient.  */
+    { { CNST_LIMB(0x3C00000000000000) }, 1, CNST_LIMB(0xF200000000000000), 1,
+      { CNST_LIMB(0x3F789854A0CB1B81), 0 }, CNST_LIMB(0x0E00000000000000) },
+#endif
+  };
+
+  mp_limb_t  dinv, got_r, got_q[numberof(data[0].want_q)];
+  mp_size_t  qsize;
+  int        i, shift;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      qsize = data[i].nsize + data[i].qxn;
+      ASSERT_ALWAYS (qsize <= numberof (got_q));
+
+      got_r = mpn_divrem_1 (got_q, data[i].qxn, data[i].n, data[i].nsize,
+                            data[i].d);
+      if (got_r != data[i].want_r
+          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)
+        {
+          printf        ("mpn_divrem_1 wrong at data[%d]\n", i);
+        bad:
+          mpn_trace     ("  n", data[i].n, data[i].nsize);
+          printf        ("  nsize=%ld\n", (long) data[i].nsize);
+          mp_limb_trace ("  d", data[i].d);
+          printf        ("  qxn=%ld\n", (long) data[i].qxn);
+          mpn_trace     ("  want q", data[i].want_q, qsize);
+          mpn_trace     ("  got  q", got_q, qsize);
+          mp_limb_trace ("  want r", data[i].want_r);
+          mp_limb_trace ("  got  r", got_r);
+          abort ();
+        }
+
+      /* test if available */
+#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
+      shift = refmpn_count_leading_zeros (data[i].d);
+      dinv = refmpn_invert_limb (data[i].d << shift);
+      got_r = mpn_preinv_divrem_1 (got_q, data[i].qxn,
+                                   data[i].n, data[i].nsize,
+                                   data[i].d, dinv, shift);
+      if (got_r != data[i].want_r
+          || refmpn_cmp (got_q, data[i].want_q, qsize) != 0)
+        {
+          printf        ("mpn_preinv divrem_1 wrong at data[%d]\n", i);
+          printf        ("  shift=%d\n", shift);
+          mp_limb_trace ("  dinv", dinv);
+          goto bad;
+        }
+#endif
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-fat.c b/tests/mpn/t-fat.c

new file mode 100644 (file)

index 0000000..9e20824
--- /dev/null
+++ b/tests/mpn/t-fat.c
@@ -0,0 +1,301 @@
+/* Test fat binary setups.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+/* In this program we're aiming to pick up certain subtle problems that
+   might creep into a fat binary.
+
+   1. We want to ensure the application entry point routines like
+      __gmpn_add_n dispatch to the correct field of __gmpn_cpuvec.
+
+      Note that these routines are not exercised as a side effect of other
+      tests (eg. the mpz routines).  Internally the fields of __gmpn_cpuvec
+      are used directly, so we need to write test code explicitly calling
+      the mpn functions, like an application will have.
+
+   2. We want to ensure the initial __gmpn_cpuvec data has the initializer
+      function pointers in the correct fields, and that those initializer
+      functions dispatch to their correct corresponding field once
+      initialization has been done.
+
+      Only one of the initializer routines executes in a normal program,
+      since that routine sets all the pointers to actual mpn functions.  We
+      forcibly reset __gmpn_cpuvec so we can run each.
+
+   In both cases for the above, the data put through the functions is
+   nothing special, just enough to verify that for instance an add_n is
+   really doing an add_n and has not for instance mistakenly gone to sub_n
+   or something.
+
+   The loop around each test will exercise the initializer routine on the
+   first iteration, and the dispatcher routine on the second.
+
+   If the dispatcher and/or initializer routines are generated mechanically
+   via macros (eg. mpn/x86/fat/fat_entry.asm) then there shouldn't be too
+   much risk of them going wrong, provided the structure layout is correctly
+   expressed.  But if they're in C then it's good to guard against typos in
+   what is rather repetitive code.  The initializer data for __gmpn_cpuvec
+   in fat.c is always done by hand and is likewise a bit repetitive.  */
+
+
+/* dummies when not a fat binary */
+#if ! WANT_FAT_BINARY
+struct cpuvec_t {
+  int  initialized;
+};
+struct cpuvec_t __gmpn_cpuvec;
+#define ITERATE_FAT_THRESHOLDS()  do { } while (0)
+#endif
+
+/* saved from program startup */
+struct cpuvec_t  initial_cpuvec;
+
+void
+check_functions (void)
+{
+  mp_limb_t  wp[2], xp[2], yp[2], r;
+  int  i;
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      yp[0] = 456;
+      mpn_add_n (wp, xp, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 579);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      wp[0] = 456;
+      r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
+      ASSERT_ALWAYS (wp[0] == 702);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+#if HAVE_NATIVE_mpn_copyd
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      xp[1] = 456;
+      mpn_copyd (xp+1, xp, (mp_size_t) 1);
+      ASSERT_ALWAYS (xp[1] == 123);
+    }
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      xp[1] = 456;
+      mpn_copyi (xp, xp+1, (mp_size_t) 1);
+      ASSERT_ALWAYS (xp[0] == 456);
+    }
+#endif
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 1605;
+      mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5));
+      ASSERT_ALWAYS (wp[0] == 321);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 1296;
+      r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0));
+      ASSERT_ALWAYS (wp[0] == 432);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 287;
+      r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7));
+      ASSERT_ALWAYS (wp[1] == 41);
+      ASSERT_ALWAYS (wp[0] == 0);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 12;
+      r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9));
+      ASSERT_ALWAYS (r == 3);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 0x1001;
+      mpn_lshift (wp, xp, (mp_size_t) 1, 1);
+      ASSERT_ALWAYS (wp[0] == 0x2002);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 14;
+      r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4));
+      ASSERT_ALWAYS (r == 2);
+    }
+
+#if (GMP_NUMB_BITS % 4) == 0
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      int  bits = (GMP_NUMB_BITS / 4) * 3;
+      mp_limb_t  mod = (CNST_LIMB(1) << bits) - 1;
+      mp_limb_t  want = GMP_NUMB_MAX % mod;
+      xp[0] = GMP_NUMB_MAX;
+      r = mpn_mod_34lsub1 (xp, (mp_size_t) 1);
+      ASSERT_ALWAYS (r % mod == want);
+    }
+#endif
+
+  /*   DECL_modexact_1c_odd ((*modexact_1c_odd)); */
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 14;
+      r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4));
+      ASSERT_ALWAYS (wp[0] == 56);
+      ASSERT_ALWAYS (r == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 5;
+      yp[0] = 7;
+      mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 35);
+      ASSERT_ALWAYS (wp[1] == 0);
+    }
+
+#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 0x101;
+      r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1,
+                               GMP_LIMB_HIGHBIT,
+                               refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0);
+      ASSERT_ALWAYS (wp[0] == 0x202);
+      ASSERT_ALWAYS (wp[1] == 0);
+      ASSERT_ALWAYS (r == 0);
+    }
+#endif
+
+#if GMP_NAIL_BITS == 0
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = GMP_LIMB_HIGHBIT+123;
+      r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT,
+                            refmpn_invert_limb (GMP_LIMB_HIGHBIT));
+      ASSERT_ALWAYS (r == 123);
+    }
+#endif
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 0x8008;
+      mpn_rshift (wp, xp, (mp_size_t) 1, 1);
+      ASSERT_ALWAYS (wp[0] == 0x4004);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 5;
+      mpn_sqr_basecase (wp, xp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 25);
+      ASSERT_ALWAYS (wp[1] == 0);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 999;
+      yp[0] = 666;
+      mpn_sub_n (wp, xp, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 333);
+    }
+
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 123;
+      wp[0] = 456;
+      r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2));
+      ASSERT_ALWAYS (wp[0] == 210);
+      ASSERT_ALWAYS (r == 0);
+    }
+}
+
+/* Expect the first use of a each fat threshold to invoke the necessary
+   initialization.  */
+void
+check_thresholds (void)
+{
+#define ITERATE(name,field)                                             \
+  do {                                                                  \
+    memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));   \
+    ASSERT_ALWAYS (name != 0);                                          \
+    ASSERT_ALWAYS (name == __gmpn_cpuvec.field);                        \
+    ASSERT_ALWAYS (__gmpn_cpuvec.initialized);                          \
+  } while (0)
+
+  ITERATE_FAT_THRESHOLDS ();
+}
+
+
+int
+main (void)
+{
+  memcpy (&initial_cpuvec, &__gmpn_cpuvec, sizeof (__gmpn_cpuvec));
+
+  tests_start ();
+
+  check_functions ();
+  check_thresholds ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-get_d.c b/tests/mpn/t-get_d.c

new file mode 100644 (file)

index 0000000..c3999e2
--- /dev/null
+++ b/tests/mpn/t-get_d.c
@@ -0,0 +1,502 @@
+/* Test mpn_get_d.
+
+Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Note that we don't use <limits.h> for LONG_MIN, but instead our own
+   definition in gmp-impl.h.  In gcc 2.95.4 (debian 3.0) under
+   -mcpu=ultrasparc, limits.h sees __sparc_v9__ defined and assumes that
+   means long is 64-bit long, but it's only 32-bits, causing fatal compile
+   errors.  */
+
+#include "config.h"
+
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+
+/* Exercise various 2^n values, with various exponents and positive and
+   negative.  */
+void
+check_onebit (void)
+{
+  static const int bit_table[] = {
+    0, 1, 2, 3,
+    GMP_NUMB_BITS - 2, GMP_NUMB_BITS - 1,
+    GMP_NUMB_BITS,
+    GMP_NUMB_BITS + 1, GMP_NUMB_BITS + 2,
+    2 * GMP_NUMB_BITS - 2, 2 * GMP_NUMB_BITS - 1,
+    2 * GMP_NUMB_BITS,
+    2 * GMP_NUMB_BITS + 1, 2 * GMP_NUMB_BITS + 2,
+    3 * GMP_NUMB_BITS - 2, 3 * GMP_NUMB_BITS - 1,
+    3 * GMP_NUMB_BITS,
+    3 * GMP_NUMB_BITS + 1, 3 * GMP_NUMB_BITS + 2,
+    4 * GMP_NUMB_BITS - 2, 4 * GMP_NUMB_BITS - 1,
+    4 * GMP_NUMB_BITS,
+    4 * GMP_NUMB_BITS + 1, 4 * GMP_NUMB_BITS + 2,
+    5 * GMP_NUMB_BITS - 2, 5 * GMP_NUMB_BITS - 1,
+    5 * GMP_NUMB_BITS,
+    5 * GMP_NUMB_BITS + 1, 5 * GMP_NUMB_BITS + 2,
+    6 * GMP_NUMB_BITS - 2, 6 * GMP_NUMB_BITS - 1,
+    6 * GMP_NUMB_BITS,
+    6 * GMP_NUMB_BITS + 1, 6 * GMP_NUMB_BITS + 2,
+  };
+  static const int exp_table[] = {
+    0, -100, -10, -1, 1, 10, 100,
+  };
+
+  /* FIXME: It'd be better to base this on the float format. */
+#ifdef __vax
+  int     limit = 127;  /* vax fp numbers have limited range */
+#else
+  int     limit = 511;
+#endif
+
+  int        bit_i, exp_i, i;
+  double     got, want;
+  mp_size_t  nsize, sign;
+  long       bit, exp, want_bit;
+  mp_limb_t  np[20];
+
+  for (bit_i = 0; bit_i < numberof (bit_table); bit_i++)
+    {
+      bit = bit_table[bit_i];
+
+      nsize = BITS_TO_LIMBS (bit+1);
+      refmpn_zero (np, nsize);
+      np[bit/GMP_NUMB_BITS] = CNST_LIMB(1) << (bit % GMP_NUMB_BITS);
+
+      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)
+        {
+          exp = exp_table[exp_i];
+
+          want_bit = bit + exp;
+          if (want_bit > limit || want_bit < -limit)
+            continue;
+
+          want = 1.0;
+          for (i = 0; i < want_bit; i++)
+            want *= 2.0;
+          for (i = 0; i > want_bit; i--)
+            want *= 0.5;
+
+          for (sign = 0; sign >= -1; sign--, want = -want)
+            {
+              got = mpn_get_d (np, nsize, sign, exp);
+              if (got != want)
+                {
+                  printf    ("mpn_get_d wrong on 2^n\n");
+                  printf    ("   bit      %ld\n", bit);
+                  printf    ("   exp      %ld\n", exp);
+                  printf    ("   want_bit %ld\n", want_bit);
+                  printf    ("   sign     %ld\n", (long) sign);
+                  mpn_trace ("   n        ", np, nsize);
+                  printf    ("   nsize    %ld\n", (long) nsize);
+                  d_trace   ("   want     ", want);
+                  d_trace   ("   got      ", got);
+                  abort();
+                }
+            }
+        }
+    }
+}
+
+
+/* Exercise values 2^n+1, while such a value fits the mantissa of a double. */
+void
+check_twobit (void)
+{
+  int        i, mant_bits;
+  double     got, want;
+  mp_size_t  nsize, sign;
+  mp_ptr     np;
+
+  mant_bits = tests_dbl_mant_bits ();
+  if (mant_bits == 0)
+    return;
+
+  np = refmpn_malloc_limbs (BITS_TO_LIMBS (mant_bits));
+  want = 3.0;
+  for (i = 1; i < mant_bits; i++)
+    {
+      nsize = BITS_TO_LIMBS (i+1);
+      refmpn_zero (np, nsize);
+      np[i/GMP_NUMB_BITS] = CNST_LIMB(1) << (i % GMP_NUMB_BITS);
+      np[0] |= 1;
+
+      for (sign = 0; sign >= -1; sign--)
+        {
+          got = mpn_get_d (np, nsize, sign, 0);
+          if (got != want)
+            {
+              printf    ("mpn_get_d wrong on 2^%d + 1\n", i);
+              printf    ("   sign     %ld\n", (long) sign);
+              mpn_trace ("   n        ", np, nsize);
+              printf    ("   nsize    %ld\n", (long) nsize);
+              d_trace   ("   want     ", want);
+              d_trace   ("   got      ", got);
+              abort();
+            }
+          want = -want;
+        }
+
+      want = 2.0 * want - 1.0;
+    }
+
+  free (np);
+}
+
+
+/* Expect large negative exponents to underflow to 0.0.
+   Some systems might have hardware traps for such an underflow (though
+   usually it's not the default), so watch out for SIGFPE. */
+void
+check_underflow (void)
+{
+  static const long exp_table[] = {
+    -999999L, LONG_MIN,
+  };
+  static const mp_limb_t  np[1] = { 1 };
+
+  static long exp;
+  mp_size_t  nsize, sign;
+  double     got;
+  int        exp_i;
+
+  nsize = numberof (np);
+
+  if (tests_setjmp_sigfpe() == 0)
+    {
+      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)
+        {
+          exp = exp_table[exp_i];
+
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (np, nsize, sign, exp);
+              if (got != 0.0)
+                {
+                  printf  ("mpn_get_d wrong, didn't get 0.0 on underflow\n");
+                  printf  ("  nsize    %ld\n", (long) nsize);
+                  printf  ("  exp      %ld\n", exp);
+                  printf  ("  sign     %ld\n", (long) sign);
+                  d_trace ("  got      ", got);
+                  abort ();
+                }
+            }
+        }
+    }
+  else
+    {
+      printf ("Warning, underflow to zero tests skipped due to SIGFPE (exp=%ld)\n", exp);
+    }
+  tests_sigfpe_done ();
+}
+
+
+/* Expect large values to result in +/-inf, on IEEE systems. */
+void
+check_inf (void)
+{
+  static const long exp_table[] = {
+    999999L, LONG_MAX,
+  };
+  static const mp_limb_t  np[4] = { 1, 1, 1, 1 };
+  long       exp;
+  mp_size_t  nsize, sign, got_sign;
+  double     got;
+  int        exp_i;
+
+  if (! _GMP_IEEE_FLOATS)
+    return;
+
+  for (nsize = 1; nsize <= numberof (np); nsize++)
+    {
+      for (exp_i = 0; exp_i < numberof (exp_table); exp_i++)
+        {
+          exp = exp_table[exp_i];
+
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (np, nsize, sign, exp);
+              got_sign = (got >= 0 ? 0 : -1);
+              if (! tests_isinf (got))
+                {
+                  printf  ("mpn_get_d wrong, didn't get infinity\n");
+                bad:
+                  printf  ("  nsize    %ld\n", (long) nsize);
+                  printf  ("  exp      %ld\n", exp);
+                  printf  ("  sign     %ld\n", (long) sign);
+                  d_trace ("  got      ", got);
+                  printf  ("  got sign %ld\n", (long) got_sign);
+                  abort ();
+                }
+              if (got_sign != sign)
+                {
+                  printf  ("mpn_get_d wrong sign on infinity\n");
+                  goto bad;
+                }
+            }
+        }
+    }
+}
+
+/* Check values 2^n approaching and into IEEE denorm range.
+   Some systems might not support denorms, or might have traps setup, so
+   watch out for SIGFPE.  */
+void
+check_ieee_denorm (void)
+{
+  static long exp;
+  mp_limb_t  n = 1;
+  long       i;
+  mp_size_t  sign;
+  double     want, got;
+
+  if (! _GMP_IEEE_FLOATS)
+    return;
+
+  if (tests_setjmp_sigfpe() == 0)
+    {
+      exp = -1020;
+      want = 1.0;
+      for (i = 0; i > exp; i--)
+        want *= 0.5;
+
+      for ( ; exp > -1500 && want != 0.0; exp--)
+        {
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (&n, (mp_size_t) 1, sign, exp);
+              if (got != want)
+                {
+                  printf  ("mpn_get_d wrong on denorm\n");
+                  printf  ("  n=1\n");
+                  printf  ("  exp   %ld\n", exp);
+                  printf  ("  sign  %ld\n", (long) sign);
+                  d_trace ("  got   ", got);
+                  d_trace ("  want  ", want);
+                  abort ();
+                }
+              want = -want;
+            }
+          want *= 0.5;
+          FORCE_DOUBLE (want);
+        }
+    }
+  else
+    {
+      printf ("Warning, IEEE denorm tests skipped due to SIGFPE (exp=%ld)\n", exp);
+    }
+  tests_sigfpe_done ();
+}
+
+
+/* Check values 2^n approaching exponent overflow.
+   Some systems might trap on overflow, so watch out for SIGFPE.  */
+void
+check_ieee_overflow (void)
+{
+  static long exp;
+  mp_limb_t  n = 1;
+  long       i;
+  mp_size_t  sign;
+  double     want, got;
+
+  if (! _GMP_IEEE_FLOATS)
+    return;
+
+  if (tests_setjmp_sigfpe() == 0)
+    {
+      exp = 1010;
+      want = 1.0;
+      for (i = 0; i < exp; i++)
+        want *= 2.0;
+
+      for ( ; exp < 1050; exp++)
+        {
+          for (sign = 0; sign >= -1; sign--)
+            {
+              got = mpn_get_d (&n, (mp_size_t) 1, sign, exp);
+              if (got != want)
+                {
+                  printf  ("mpn_get_d wrong on overflow\n");
+                  printf  ("  n=1\n");
+                  printf  ("  exp   %ld\n", exp);
+                  printf  ("  sign  %ld\n", (long) sign);
+                  d_trace ("  got   ", got);
+                  d_trace ("  want  ", want);
+                  abort ();
+                }
+              want = -want;
+            }
+          want *= 2.0;
+          FORCE_DOUBLE (want);
+        }
+    }
+  else
+    {
+      printf ("Warning, IEEE overflow tests skipped due to SIGFPE (exp=%ld)\n", exp);
+    }
+  tests_sigfpe_done ();
+}
+
+
+/* ARM gcc 2.95.4 was seen generating bad code for ulong->double
+   conversions, resulting in for instance 0x81c25113 incorrectly converted.
+   This test exercises that value, to see mpn_get_d has avoided the
+   problem.  */
+void
+check_0x81c25113 (void)
+{
+#if GMP_NUMB_BITS >= 32
+  double     want = 2176995603.0;
+  double     got;
+  mp_limb_t  np[4];
+  mp_size_t  nsize;
+  long       exp;
+
+  if (tests_dbl_mant_bits() < 32)
+    return;
+
+  for (nsize = 1; nsize <= numberof (np); nsize++)
+    {
+      refmpn_zero (np, nsize-1);
+      np[nsize-1] = CNST_LIMB(0x81c25113);
+      exp = - (nsize-1) * GMP_NUMB_BITS;
+      got = mpn_get_d (np, nsize, (mp_size_t) 0, exp);
+      if (got != want)
+        {
+          printf  ("mpn_get_d wrong on 2176995603 (0x81c25113)\n");
+          printf  ("  nsize  %ld\n", (long) nsize);
+          printf  ("  exp    %ld\n", exp);
+          d_trace ("  got    ", got);
+          d_trace ("  want   ", want);
+          abort ();
+        }
+    }
+#endif
+}
+
+
+void
+check_rand (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  int            rep, i;
+  unsigned long  mant_bits;
+  long           exp, exp_min, exp_max;
+  double         got, want, d;
+  mp_size_t      nalloc, nsize, sign;
+  mp_limb_t      nhigh_mask;
+  mp_ptr         np;
+
+  mant_bits = tests_dbl_mant_bits ();
+  if (mant_bits == 0)
+    return;
+
+  /* Allow for vax D format with exponent 127 to -128 only.
+     FIXME: Do something to probe for a valid exponent range.  */
+  exp_min = -100 - mant_bits;
+  exp_max =  100 - mant_bits;
+
+  /* space for mant_bits */
+  nalloc = BITS_TO_LIMBS (mant_bits);
+  np = refmpn_malloc_limbs (nalloc);
+  nhigh_mask = MP_LIMB_T_MAX
+    >> (GMP_NAIL_BITS + nalloc * GMP_NUMB_BITS - mant_bits);
+
+  for (rep = 0; rep < 200; rep++)
+    {
+      /* random exp_min to exp_max, inclusive */
+      exp = exp_min + (long) gmp_urandomm_ui (rands, exp_max - exp_min + 1);
+
+      /* mant_bits worth of random at np */
+      if (rep & 1)
+        mpn_random (np, nalloc);
+      else
+        mpn_random2 (np, nalloc);
+      nsize = nalloc;
+      np[nsize-1] &= nhigh_mask;
+      MPN_NORMALIZE (np, nsize);
+      if (nsize == 0)
+        continue;
+
+      sign = (mp_size_t) gmp_urandomb_ui (rands, 1L) - 1;
+
+      /* want = {np,nsize}, converting one bit at a time */
+      want = 0.0;
+      for (i = 0, d = 1.0; i < mant_bits; i++, d *= 2.0)
+        if (np[i/GMP_NUMB_BITS] & (CNST_LIMB(1) << (i%GMP_NUMB_BITS)))
+          want += d;
+      if (sign < 0)
+        want = -want;
+
+      /* want = want * 2^exp */
+      for (i = 0; i < exp; i++)
+        want *= 2.0;
+      for (i = 0; i > exp; i--)
+        want *= 0.5;
+
+      got = mpn_get_d (np, nsize, sign, exp);
+
+      if (got != want)
+        {
+          printf    ("mpn_get_d wrong on random data\n");
+          printf    ("   sign     %ld\n", (long) sign);
+          mpn_trace ("   n        ", np, nsize);
+          printf    ("   nsize    %ld\n", (long) nsize);
+          printf    ("   exp      %ld\n", exp);
+          d_trace   ("   want     ", want);
+          d_trace   ("   got      ", got);
+          abort();
+        }
+    }
+
+  free (np);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_onebit ();
+  check_twobit ();
+  check_inf ();
+  check_underflow ();
+  check_ieee_denorm ();
+  check_ieee_overflow ();
+  check_0x81c25113 ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-hgcd.c b/tests/mpn/t-hgcd.c

new file mode 100644 (file)

index 0000000..60615ce
--- /dev/null
+++ b/tests/mpn/t-hgcd.c
@@ -0,0 +1,409 @@
+/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+static mp_size_t one_test __GMP_PROTO ((mpz_t, mpz_t, int));
+static void debug_mp __GMP_PROTO ((mpz_t, int));
+
+#define MIN_OPERAND_SIZE 2
+
+/* Fixed values, for regression testing of mpn_hgcd. */
+struct value { int res; const char *a; const char *b; };
+static const struct value hgcd_values[] = {
+#if GMP_NUMB_BITS == 32
+  { 5,
+    "0x1bddff867272a9296ac493c251d7f46f09a5591fe",
+    "0xb55930a2a68a916450a7de006031068c5ddb0e5c" },
+  { 4,
+    "0x2f0ece5b1ee9c15e132a01d55768dc13",
+    "0x1c6f4fd9873cdb24466e6d03e1cc66e7" },
+  { 3, "0x7FFFFC003FFFFFFFFFC5", "0x3FFFFE001FFFFFFFFFE3"},
+#endif
+  { -1, NULL, NULL }
+};
+
+struct hgcd_ref
+{
+  mpz_t m[2][2];
+};
+
+static void hgcd_ref_init __GMP_PROTO ((struct hgcd_ref *hgcd));
+static void hgcd_ref_clear __GMP_PROTO ((struct hgcd_ref *hgcd));
+static int hgcd_ref __GMP_PROTO ((struct hgcd_ref *hgcd, mpz_t a, mpz_t b));
+static int hgcd_ref_equal __GMP_PROTO ((const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, temp1, temp2;
+  int i, j, chain_len;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (temp1);
+  mpz_init (temp2);
+
+  for (i = 0; hgcd_values[i].res >= 0; i++)
+    {
+      mp_size_t res;
+
+      mpz_set_str (op1, hgcd_values[i].a, 0);
+      mpz_set_str (op2, hgcd_values[i].b, 0);
+
+      res = one_test (op1, op2, -1-i);
+      if (res != hgcd_values[i].res)
+       {
+         fprintf (stderr, "ERROR in test %d\n", -1-i);
+         fprintf (stderr, "Bad return code from hgcd\n");
+         fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+         fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+         fprintf (stderr, "expected: %d\n", hgcd_values[i].res);
+         fprintf (stderr, "hgcd:     %d\n", (int) res);
+         abort ();
+       }
+    }
+
+  for (i = 0; i < 15; i++)
+    {
+      /* Generate plain operands with unknown gcd.  These types of operands
+        have proven to trigger certain bugs in development versions of the
+        gcd code.  The "hgcd->row[3].rsize > M" ASSERT is not triggered by
+        the division chain code below, but that is most likely just a result
+        of that other ASSERTs are triggered before it.  */
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+
+      if (mpz_cmp (op1, op2) < 0)
+       mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+       one_test (op1, op2, i);
+
+      /* Generate a division chain backwards, allowing otherwise
+        unlikely huge quotients.  */
+
+      mpz_set_ui (op1, 0);
+      mpz_urandomb (bs, rands, 32);
+      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+      mpz_add_ui (op2, op2, 1);
+
+#if 0
+      chain_len = 1000000;
+#else
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * GCD_DC_THRESHOLD / 256);
+#endif
+
+      for (j = 0; j < chain_len; j++)
+       {
+         mpz_urandomb (bs, rands, 32);
+         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+         mpz_add_ui (temp2, temp2, 1);
+         mpz_mul (temp1, op2, temp2);
+         mpz_add (op1, op1, temp1);
+
+         /* Don't generate overly huge operands.  */
+         if (SIZ (op1) > 3 * GCD_DC_THRESHOLD)
+           break;
+
+         mpz_urandomb (bs, rands, 32);
+         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+         mpz_add_ui (temp2, temp2, 1);
+         mpz_mul (temp1, op1, temp2);
+         mpz_add (op2, op2, temp1);
+
+         /* Don't generate overly huge operands.  */
+         if (SIZ (op2) > 3 * GCD_DC_THRESHOLD)
+           break;
+       }
+      if (mpz_cmp (op1, op2) < 0)
+       mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+       one_test (op1, op2, i);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (temp1);
+  mpz_clear (temp2);
+
+  tests_end ();
+  exit (0);
+}
+
+static void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize);
+
+static mp_size_t
+one_test (mpz_t a, mpz_t b, int i)
+{
+  struct hgcd_matrix hgcd;
+  struct hgcd_ref ref;
+
+  mpz_t ref_r0;
+  mpz_t ref_r1;
+  mpz_t hgcd_r0;
+  mpz_t hgcd_r1;
+
+  mp_size_t res[2];
+  mp_size_t asize;
+  mp_size_t bsize;
+
+  mp_size_t hgcd_init_scratch;
+  mp_size_t hgcd_scratch;
+
+  mp_ptr hgcd_init_tp;
+  mp_ptr hgcd_tp;
+
+  asize = a->_mp_size;
+  bsize = b->_mp_size;
+
+  ASSERT (asize >= bsize);
+
+  hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (asize);
+  hgcd_init_tp = refmpn_malloc_limbs (hgcd_init_scratch);
+  mpn_hgcd_matrix_init (&hgcd, asize, hgcd_init_tp);
+
+  hgcd_scratch = mpn_hgcd_itch (asize);
+  hgcd_tp = refmpn_malloc_limbs (hgcd_scratch);
+
+#if 0
+  fprintf (stderr,
+          "one_test: i = %d asize = %d, bsize = %d\n",
+          i, a->_mp_size, b->_mp_size);
+
+  gmp_fprintf (stderr,
+              "one_test: i = %d\n"
+              "  a = %Zx\n"
+              "  b = %Zx\n",
+              i, a, b);
+#endif
+  hgcd_ref_init (&ref);
+
+  mpz_init_set (ref_r0, a);
+  mpz_init_set (ref_r1, b);
+  res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
+
+  mpz_init_set (hgcd_r0, a);
+  mpz_init_set (hgcd_r1, b);
+  if (bsize < asize)
+    {
+      _mpz_realloc (hgcd_r1, asize);
+      MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
+    }
+  res[1] = mpn_hgcd (hgcd_r0->_mp_d,
+                    hgcd_r1->_mp_d,
+                    asize,
+                    &hgcd, hgcd_tp);
+
+  if (res[0] != res[1])
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "Different return value from hgcd and hgcd_ref\n");
+      fprintf (stderr, "op1=");                 debug_mp (a, -16);
+      fprintf (stderr, "op2=");                 debug_mp (b, -16);
+      fprintf (stderr, "hgcd_ref: %ld\n", (long) res[0]);
+      fprintf (stderr, "mpn_hgcd: %ld\n", (long) res[1]);
+      abort ();
+    }
+  if (res[0] > 0)
+    {
+      if (!hgcd_ref_equal (&hgcd, &ref)
+         || !mpz_mpn_equal (ref_r0, hgcd_r0->_mp_d, res[1])
+         || !mpz_mpn_equal (ref_r1, hgcd_r1->_mp_d, res[1]))
+       {
+         fprintf (stderr, "ERROR in test %d\n", i);
+         fprintf (stderr, "mpn_hgcd and hgcd_ref returned different values\n");
+         fprintf (stderr, "op1=");                 debug_mp (a, -16);
+         fprintf (stderr, "op2=");                 debug_mp (b, -16);
+         abort ();
+       }
+    }
+
+  refmpn_free_limbs (hgcd_init_tp);
+  refmpn_free_limbs (hgcd_tp);
+  hgcd_ref_clear (&ref);
+  mpz_clear (ref_r0);
+  mpz_clear (ref_r1);
+  mpz_clear (hgcd_r0);
+  mpz_clear (hgcd_r1);
+
+  return res[0];
+}
+
+static void
+hgcd_ref_init (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+       mpz_init (hgcd->m[i][j]);
+    }
+}
+
+static void
+hgcd_ref_clear (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+       mpz_clear (hgcd->m[i][j]);
+    }
+}
+
+
+static int
+sdiv_qr (mpz_t q, mpz_t r, mp_size_t s, const mpz_t a, const mpz_t b)
+{
+  mpz_fdiv_qr (q, r, a, b);
+  if (mpz_size (r) <= s)
+    {
+      mpz_add (r, r, b);
+      mpz_sub_ui (q, q, 1);
+    }
+
+  return (mpz_sgn (q) > 0);
+}
+
+static int
+hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b)
+{
+  mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+  mp_size_t s = n/2 + 1;
+  mp_size_t asize;
+  mp_size_t bsize;
+  mpz_t q;
+  int res;
+
+  if (mpz_size (a) <= s || mpz_size (b) <= s)
+    return 0;
+
+  res = mpz_cmp (a, b);
+  if (res < 0)
+    {
+      mpz_sub (b, b, a);
+      if (mpz_size (b) <= s)
+       return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 0);
+      mpz_set_ui (hgcd->m[1][0], 1); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else if (res > 0)
+    {
+      mpz_sub (a, a, b);
+      if (mpz_size (a) <= s)
+       return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 1);
+      mpz_set_ui (hgcd->m[1][0], 0); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else
+    return 0;
+
+  mpz_init (q);
+
+  for (;;)
+    {
+      ASSERT (mpz_size (a) > s);
+      ASSERT (mpz_size (b) > s);
+
+      if (mpz_cmp (a, b) > 0)
+       {
+         if (!sdiv_qr (q, a, s, a, b))
+           break;
+         mpz_addmul (hgcd->m[0][1], q, hgcd->m[0][0]);
+         mpz_addmul (hgcd->m[1][1], q, hgcd->m[1][0]);
+       }
+      else
+       {
+         if (!sdiv_qr (q, b, s, b, a))
+           break;
+         mpz_addmul (hgcd->m[0][0], q, hgcd->m[0][1]);
+         mpz_addmul (hgcd->m[1][0], q, hgcd->m[1][1]);
+       }
+    }
+
+  mpz_clear (q);
+
+  asize = mpz_size (a);
+  bsize = mpz_size (b);
+  return MAX (asize, bsize);
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize)
+{
+  mp_srcptr ap = a->_mp_d;
+  mp_size_t asize = a->_mp_size;
+
+  MPN_NORMALIZE (bp, bsize);
+  return asize == bsize && mpn_cmp (ap, bp, asize) == 0;
+}
+
+static int
+hgcd_ref_equal (const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref)
+{
+  unsigned i;
+
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+
+      for (j = 0; j<2; j++)
+       if (!mpz_mpn_equal (ref->m[i][j], hgcd->p[i][j], hgcd->n))
+         return 0;
+    }
+
+  return 1;
+}
diff --git a/tests/mpn/t-instrument.c b/tests/mpn/t-instrument.c

new file mode 100644 (file)

index 0000000..a3d2969
--- /dev/null
+++ b/tests/mpn/t-instrument.c
@@ -0,0 +1,416 @@
+/* Test assembler support for --enable-profiling=instrument.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+#if WANT_PROFILING_INSTRUMENT
+
+/* This program exercises each mpn routine that might be implemented in
+   assembler.  It ensures the __cyg_profile_func_enter and exit calls have
+   come out right, and that in the x86 code "ret_internal" is correctly used
+   for PIC setups.  */
+
+
+/* Changes to enter_seen done by __cyg_profile_func_enter are essentially
+   unknown to the optimizer, so must use volatile.  */
+volatile int  enter_seen;
+
+/* Dummy used to stop various calls going dead. */
+unsigned long  notdead;
+
+const char     *name = "<none>";
+int  old_ncall;
+
+struct {
+  void  *this_fn;
+  void  *call_site;
+} call[100];
+int  ncall;
+
+
+void __cyg_profile_func_enter __GMP_PROTO ((void *this_fn, void *call_site))
+     __attribute__ ((no_instrument_function));
+
+void
+__cyg_profile_func_enter (void *this_fn, void *call_site)
+{
+#if 0
+  printf ("%24s %p %p\n", name, this_fn, call_site);
+#endif
+  ASSERT_ALWAYS (ncall >= 0);
+  ASSERT_ALWAYS (ncall <= numberof (call));
+
+  if (ncall >= numberof (call))
+    {
+      printf ("__cyg_profile_func_enter: oops, call stack full, from %s\n", name);
+      abort ();
+    }
+
+  enter_seen = 1;
+  call[ncall].this_fn = this_fn;
+  call[ncall].call_site = call_site;
+  ncall++;
+}
+
+void __cyg_profile_func_exit __GMP_PROTO ((void *this_fn, void *call_site))
+     __attribute__ ((no_instrument_function));
+
+void
+__cyg_profile_func_exit  (void *this_fn, void *call_site)
+{
+  ASSERT_ALWAYS (ncall >= 0);
+  ASSERT_ALWAYS (ncall <= numberof (call));
+
+  if (ncall == 0)
+    {
+      printf ("__cyg_profile_func_exit: call stack empty, from %s\n", name);
+      abort ();
+    }
+
+  ncall--;
+  if (this_fn != call[ncall].this_fn || call_site != call[ncall].call_site)
+    {
+      printf ("__cyg_profile_func_exit: unbalanced this_fn/call_site from %s\n", name);
+      printf ("  this_fn got  %p\n", this_fn);
+      printf ("          want %p\n", call[ncall].this_fn);
+      printf ("  call_site got  %p\n", call_site);
+      printf ("            want %p\n", call[ncall].call_site);
+      abort ();
+    }
+}
+
+
+void
+pre (const char *str)
+{
+  name = str;
+  enter_seen = 0;
+  old_ncall = ncall;
+}
+
+void
+post (void)
+{
+  if (! enter_seen)
+    {
+      printf ("did not reach __cyg_profile_func_enter from %s\n", name);
+      abort ();
+    }
+
+  if (ncall != old_ncall)
+    {
+      printf ("unbalance enter/exit calls from %s\n", name);
+      printf ("  ncall     %d\n", ncall);
+      printf ("  old_ncall %d\n", old_ncall);
+      abort ();
+    }
+}
+
+void
+check (void)
+{
+  mp_limb_t  wp[100], xp[100], yp[100];
+  mp_size_t  size = 100;
+
+  refmpn_zero (xp, size);
+  refmpn_zero (yp, size);
+  refmpn_zero (wp, size);
+
+  pre ("mpn_add_n");
+  mpn_add_n (wp, xp, yp, size);
+  post ();
+
+#if HAVE_NATIVE_mpn_add_nc
+  pre ("mpn_add_nc");
+  mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  pre ("mpn_addlsh1_n");
+  mpn_addlsh1_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_and_n
+  pre ("mpn_and_n");
+  mpn_and_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_andn_n
+  pre ("mpn_andn_n");
+  mpn_andn_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_addmul_1");
+  mpn_addmul_1 (wp, xp, size, yp[0]);
+  post ();
+
+#if HAVE_NATIVE_mpn_addmul_1c
+  pre ("mpn_addmul_1c");
+  mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_com
+  pre ("mpn_com");
+  mpn_com (wp, xp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_copyd
+  pre ("mpn_copyd");
+  mpn_copyd (wp, xp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+  pre ("mpn_copyi");
+  mpn_copyi (wp, xp, size);
+  post ();
+#endif
+
+  pre ("mpn_divexact_1");
+  mpn_divexact_1 (wp, xp, size, CNST_LIMB(123));
+  post ();
+
+  pre ("mpn_divexact_by3c");
+  mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0));
+  post ();
+
+  pre ("mpn_divrem_1");
+  mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123));
+  post ();
+
+#if HAVE_NATIVE_mpn_divrem_1c
+  pre ("mpn_divrem_1c");
+  mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122));
+  post ();
+#endif
+
+  pre ("mpn_gcd_1");
+  xp[0] |= 1;
+  notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123));
+  post ();
+
+  pre ("mpn_hamdist");
+  notdead += mpn_hamdist (xp, yp, size);
+  post ();
+
+#if HAVE_NATIVE_mpn_ior_n
+  pre ("mpn_ior_n");
+  mpn_ior_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_iorn_n
+  pre ("mpn_iorn_n");
+  mpn_iorn_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_lshift");
+  mpn_lshift (wp, xp, size, 1);
+  post ();
+
+  pre ("mpn_mod_1");
+  notdead += mpn_mod_1 (xp, size, CNST_LIMB(123));
+  post ();
+
+#if HAVE_NATIVE_mpn_mod_1c
+  pre ("mpn_mod_1c");
+  notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122));
+  post ();
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+  pre ("mpn_mod_34lsub1");
+  notdead += mpn_mod_34lsub1 (xp, size);
+  post ();
+#endif
+
+  pre ("mpn_modexact_1_odd");
+  notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123));
+  post ();
+
+  pre ("mpn_modexact_1c_odd");
+  notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456));
+  post ();
+
+  pre ("mpn_mul_1");
+  mpn_mul_1 (wp, xp, size, yp[0]);
+  post ();
+
+#if HAVE_NATIVE_mpn_mul_1c
+  pre ("mpn_mul_1c");
+  mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_mul_2
+  pre ("mpn_mul_2");
+  mpn_mul_2 (wp, xp, size-1, yp);
+  post ();
+#endif
+
+  pre ("mpn_mul_basecase");
+  mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3);
+  post ();
+
+#if HAVE_NATIVE_mpn_nand_n
+  pre ("mpn_nand_n");
+  mpn_nand_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_nior_n
+  pre ("mpn_nior_n");
+  mpn_nior_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_popcount");
+  notdead += mpn_popcount (xp, size);
+  post ();
+
+  pre ("mpn_preinv_mod_1");
+  notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX,
+                               refmpn_invert_limb (GMP_NUMB_MAX));
+  post ();
+
+#if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1
+  pre ("mpn_preinv_divrem_1");
+  mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX,
+                       refmpn_invert_limb (GMP_NUMB_MAX), 0);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_rsh1add_n
+  pre ("mpn_rsh1add_n");
+  mpn_rsh1add_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  pre ("mpn_rsh1sub_n");
+  mpn_rsh1sub_n (wp, xp, yp, size);
+  post ();
+#endif
+
+  pre ("mpn_rshift");
+  mpn_rshift (wp, xp, size, 1);
+  post ();
+
+  pre ("mpn_sqr_basecase");
+  mpn_sqr_basecase (wp, xp, (mp_size_t) 3);
+  post ();
+
+  pre ("mpn_submul_1");
+  mpn_submul_1 (wp, xp, size, yp[0]);
+  post ();
+
+#if HAVE_NATIVE_mpn_submul_1c
+  pre ("mpn_submul_1c");
+  mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0));
+  post ();
+#endif
+
+  pre ("mpn_sub_n");
+  mpn_sub_n (wp, xp, yp, size);
+  post ();
+
+#if HAVE_NATIVE_mpn_sub_nc
+  pre ("mpn_sub_nc");
+  mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_sublsh1_n
+  pre ("mpn_sublsh1_n");
+  mpn_sublsh1_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+  pre ("mpn_udiv_qrnnd");
+  mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123));
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+  pre ("mpn_udiv_qrnnd_r");
+  mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_umul_ppmm
+  pre ("mpn_umul_ppmm");
+  mpn_umul_ppmm (&wp[0], xp[0], yp[0]);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+  pre ("mpn_umul_ppmm_r");
+  mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_xor_n
+  pre ("mpn_xor_n");
+  mpn_xor_n (wp, xp, yp, size);
+  post ();
+#endif
+
+#if HAVE_NATIVE_mpn_xnor_n
+  pre ("mpn_xnor_n");
+  mpn_xnor_n (wp, xp, yp, size);
+  post ();
+#endif
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check ();
+
+  tests_end ();
+  exit (0);
+}
+
+
+#else /* ! WANT_PROFILING_INSTRUMENT */
+
+int
+main (void)
+{
+  exit (0);
+}
+
+#endif
diff --git a/tests/mpn/t-invert.c b/tests/mpn/t-invert.c

new file mode 100644 (file)

index 0000000..7f74751
--- /dev/null
+++ b/tests/mpn/t-invert.c
@@ -0,0 +1,161 @@
+/* Test for mpn_invert function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 12
+#endif
+
+#ifndef COUNT
+#define COUNT 1000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N 1
+
+
+static int
+invert_valid (mp_srcptr ip, mp_srcptr dp, mp_size_t n)
+{
+  mp_ptr tp;
+  int cy;
+  TMP_DECL;
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (2*n);
+
+  refmpn_mul (tp, ip, n, dp, n);
+  cy  = refmpn_add_n (tp + n, tp + n, dp, n); /* This must not give a carry. */
+  cy -= refmpn_add (tp, tp, 2*n, dp, n); /* This must give a carry. */
+  TMP_FREE;
+
+  return (cy == -1);
+}
+
+/*
+  Chech the result of the mpn_invert function in the library.
+*/
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ip, dp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  dp = TMP_ALLOC_LIMBS (MAX_N);
+  ip = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_invert_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t n;
+      mp_size_t itch;
+      mp_limb_t i_before, i_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+       ;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+       + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+       + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+
+      mpn_random2 (dp, n);
+
+      mpn_random2 (ip-1, n + 2);
+      i_before = ip[-1];
+      i_after = ip[n];
+
+      itch = mpn_invert_itch (n);
+      ASSERT_ALWAYS (itch <= mpn_invert_itch (MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      dp[n-1] |= GMP_NUMB_HIGHBIT;
+      mpn_invert (ip, dp, n, scratch);
+      if (ip[-1] != i_before || ip[n] != i_after
+         || scratch[-1] != s_before || scratch[itch] != s_after
+         || ! invert_valid(ip, dp, n))
+       {
+         printf ("ERROR in test %d, n = %d\n",
+                 test, (int) n);
+         if (ip[-1] != i_before)
+           {
+             printf ("before ip:"); mpn_dump (ip -1, 1);
+             printf ("keep:   "); mpn_dump (&i_before, 1);
+           }
+         if (ip[n] != i_after)
+           {
+             printf ("after ip:"); mpn_dump (ip + n, 1);
+             printf ("keep:   "); mpn_dump (&i_after, 1);
+           }
+         if (scratch[-1] != s_before)
+           {
+             printf ("before scratch:"); mpn_dump (scratch-1, 1);
+             printf ("keep:   "); mpn_dump (&s_before, 1);
+           }
+         if (scratch[itch] != s_after)
+           {
+             printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+             printf ("keep:   "); mpn_dump (&s_after, 1);
+           }
+         mpn_dump (dp, n);
+         mpn_dump (ip, n);
+
+         abort();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-iord_u.c b/tests/mpn/t-iord_u.c

new file mode 100644 (file)

index 0000000..3b472fd
--- /dev/null
+++ b/tests/mpn/t-iord_u.c
@@ -0,0 +1,221 @@
+/* Test MPN_INCR_U and MPN_DECR_U.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* The i386 MPN_INCR_U and MPN_DECR_U have special cases for "n" being a
+   compile-time constant 1, so that's exercised explicitly.  */
+
+
+#define M     GMP_NUMB_MAX
+#define SIZE  ((mp_size_t) 10)
+
+
+void
+check_one (const char *name, int i,
+           mp_srcptr src, mp_limb_t n,
+           mp_srcptr got, mp_srcptr want, mp_size_t size)
+{
+  if (! refmpn_equal_anynail (got, want, size))
+    {
+      printf ("Wrong at %s i=%d\n", name, i);
+      mpn_trace ("  src", src,  size);
+      mpn_trace ("    n", &n,   (mp_size_t) 1);
+      mpn_trace ("  got", got,  size);
+      mpn_trace (" want", want, size);
+      abort ();
+    }
+}
+
+
+void
+check_incr_data (void)
+{
+  static const struct {
+    mp_limb_t        n;
+    const mp_limb_t  src[SIZE];
+    const mp_limb_t  want[SIZE];
+  } data[] = {
+    { 1, { 0 },   { 1 } },
+    { 1, { 123 }, { 124 } },
+    { 2, { 0 },   { 2 } },
+    { 2, { 123 }, { 125 } },
+    { M, { 0 },   { M } },
+
+    { 1, { M, 0 },   { 0,   1 } },
+    { 1, { M, 123 }, { 0,   124 } },
+    { 2, { M, 0 },   { 1,   1 } },
+    { 2, { M, 123 }, { 1,   124 } },
+    { M, { M, 0 },   { M-1, 1 } },
+    { M, { M, 123 }, { M-1, 124 } },
+
+    { 1, { M, M, 0 },   { 0,   0, 1 } },
+    { 1, { M, M, 123 }, { 0,   0, 124 } },
+    { 2, { M, M, 0 },   { 1,   0, 1 } },
+    { 2, { M, M, 123 }, { 1,   0, 124 } },
+    { M, { M, M, 0 },   { M-1, 0, 1 } },
+    { M, { M, M, 123 }, { M-1, 0, 124 } },
+
+    { 1, { M, M, M, 0 },   { 0,   0, 0, 1 } },
+    { 1, { M, M, M, 123 }, { 0,   0, 0, 124 } },
+    { 2, { M, M, M, 0 },   { 1,   0, 0, 1 } },
+    { 2, { M, M, M, 123 }, { 1,   0, 0, 124 } },
+    { M, { M, M, M, 0 },   { M-1, 0, 0, 1 } },
+    { M, { M, M, M, 123 }, { M-1, 0, 0, 124 } },
+
+    { 1, { M, M, M, M, 0 },   { 0,   0, 0, 0, 1 } },
+    { 1, { M, M, M, M, 123 }, { 0,   0, 0, 0, 124 } },
+    { 2, { M, M, M, M, 0 },   { 1,   0, 0, 0, 1 } },
+    { 2, { M, M, M, M, 123 }, { 1,   0, 0, 0, 124 } },
+    { M, { M, M, M, M, 0 },   { M-1, 0, 0, 0, 1 } },
+    { M, { M, M, M, M, 123 }, { M-1, 0, 0, 0, 124
+#if defined (__hpux) && ! defined (__GNUC__)
+    /* Some versions (at least HP92453-01 B.11.11.23709.GP) of the
+       HP C compilers fail to zero-fill aggregates as the ISO C standard
+       requires (cf 6.5.7 Initialization).  Compensate here:  */
+                               , 0, 0, 0, 0, 0
+#endif
+    } }
+  };
+
+  mp_limb_t  got[SIZE];
+  int   i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_copyi (got, data[i].src, SIZE);
+      MPN_INCR_U (got, SIZE, data[i].n);
+      check_one ("check_incr (general)", i,
+                 data[i].src, data[i].n,
+                 got, data[i].want, SIZE);
+
+      if (data[i].n == 1)
+        {
+          refmpn_copyi (got, data[i].src, SIZE);
+          MPN_INCR_U (got, SIZE, CNST_LIMB(1));
+          check_one ("check_incr (const 1)", i,
+                     data[i].src, data[i].n,
+                     got, data[i].want, SIZE);
+        }
+    }
+}
+
+void
+check_decr_data (void)
+{
+  static const struct {
+    mp_limb_t        n;
+    const mp_limb_t  src[SIZE];
+    const mp_limb_t  want[SIZE];
+  } data[] = {
+    { 1,   { 1 },   { 0   } },
+    { 1,   { 123 }, { 122 } },
+    { 1,   { M },   { M-1 } },
+    { 2,   { 2 },   { 0   } },
+    { 2,   { 123 }, { 121 } },
+    { M,   { M },   { 0   } },
+    { M-1, { M },   { 1   } },
+
+    { 1,   { 0,   1   }, { M,   0   } },
+    { 1,   { 0,   123 }, { M,   122 } },
+    { 1,   { 0,   M   }, { M,   M-1 } },
+    { 2,   { 0,   123 }, { M-1, 122 } },
+    { 2,   { 1,   123 }, { M,   122 } },
+    { M,   { 0,   123 }, { 1,   122 } },
+    { M,   { M-1, M   }, { M,   M-1 } },
+
+    { 1,   { 0,   0, 1   }, { M,   M, 0   } },
+    { 1,   { 0,   0, 123 }, { M,   M, 122 } },
+    { 1,   { 0,   0, M   }, { M,   M, M-1 } },
+    { 2,   { 0,   0, 123 }, { M-1, M, 122 } },
+    { 2,   { 1,   0, 123 }, { M,   M, 122 } },
+    { M,   { 0,   0, 123 }, { 1,   M, 122 } },
+    { M,   { M-1, 0, M   }, { M,   M, M-1 } },
+
+    { 1,   { 0,   0, 0, 1   }, { M,   M, M, 0   } },
+    { 1,   { 0,   0, 0, 123 }, { M,   M, M, 122 } },
+    { 1,   { 0,   0, 0, M   }, { M,   M, M, M-1 } },
+    { 2,   { 0,   0, 0, 123 }, { M-1, M, M, 122 } },
+    { 2,   { 1,   0, 0, 123 }, { M,   M, M, 122 } },
+    { M,   { 0,   0, 0, 123 }, { 1,   M, M, 122 } },
+    { M,   { M-1, 0, 0, M   }, { M,   M, M, M-1 } },
+
+    { 1,   { 0,   0, 0, 0, 1   }, { M,   M, M, M, 0   } },
+    { 1,   { 0,   0, 0, 0, 123 }, { M,   M, M, M, 122 } },
+    { 1,   { 0,   0, 0, 0, M   }, { M,   M, M, M, M-1 } },
+    { 2,   { 0,   0, 0, 0, 123 }, { M-1, M, M, M, 122 } },
+    { 2,   { 1,   0, 0, 0, 123 }, { M,   M, M, M, 122 } },
+    { M,   { 0,   0, 0, 0, 123 }, { 1,   M, M, M, 122 } },
+    { M,   { M-1, 0, 0, 0, M   }, { M,   M, M, M, M-1 } },
+
+    { 1,   { 0,   0, 0, 0, 0, 1   }, { M,   M, M, M, M, 0   } },
+    { 1,   { 0,   0, 0, 0, 0, 123 }, { M,   M, M, M, M, 122 } },
+    { 1,   { 0,   0, 0, 0, 0, M   }, { M,   M, M, M, M, M-1 } },
+    { 2,   { 0,   0, 0, 0, 0, 123 }, { M-1, M, M, M, M, 122 } },
+    { 2,   { 1,   0, 0, 0, 0, 123 }, { M,   M, M, M, M, 122 } },
+    { M,   { 0,   0, 0, 0, 0, 123 }, { 1,   M, M, M, M, 122 } },
+    { M,   { M-1, 0, 0, 0, 0, M   }, { M,   M, M, M, M, M-1
+#if defined (__hpux) && ! defined (__GNUC__)
+    /* For explanation of this garbage, see previous function.  */
+                                      , 0, 0, 0, 0
+#endif
+    } }
+  };
+
+  mp_limb_t  got[SIZE];
+  int   i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      refmpn_copyi (got, data[i].src, SIZE);
+      MPN_DECR_U (got, SIZE, data[i].n);
+      check_one ("check_decr_data", i,
+                 data[i].src, data[i].n,
+                 got, data[i].want, SIZE);
+
+      if (data[i].n == 1)
+        {
+          refmpn_copyi (got, data[i].src, SIZE);
+          MPN_DECR_U (got, SIZE, CNST_LIMB(1));
+          check_one ("check_decr (const 1)", i,
+                     data[i].src, data[i].n,
+                     got, data[i].want, SIZE);
+        }
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_incr_data ();
+  check_decr_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-matrix22.c b/tests/mpn/t-matrix22.c

new file mode 100644 (file)

index 0000000..7521df0
--- /dev/null
+++ b/tests/mpn/t-matrix22.c
@@ -0,0 +1,207 @@
+/* Tests matrix22_mul.
+
+Copyright 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+struct matrix {
+  mp_size_t alloc;
+  mp_size_t n;
+  mp_ptr e00, e01, e10, e11;
+};
+
+static void
+matrix_init (struct matrix *M, mp_size_t n)
+{
+  mp_ptr p = refmpn_malloc_limbs (4*(n+1));
+  M->e00 = p; p += n+1;
+  M->e01 = p; p += n+1;
+  M->e10 = p; p += n+1;
+  M->e11 = p;
+  M->alloc = n + 1;
+  M->n = 0;
+}
+
+static void
+matrix_clear (struct matrix *M)
+{
+  refmpn_free_limbs (M->e00);
+}
+
+static void
+matrix_copy (struct matrix *R, const struct matrix *M)
+{
+  R->n = M->n;
+  MPN_COPY (R->e00, M->e00, M->n);
+  MPN_COPY (R->e01, M->e01, M->n);
+  MPN_COPY (R->e10, M->e10, M->n);
+  MPN_COPY (R->e11, M->e11, M->n);
+}
+
+/* Used with same size, so no need for normalization. */
+static int
+matrix_equal_p (const struct matrix *A, const struct matrix *B)
+{
+  return (A->n == B->n
+         && mpn_cmp (A->e00, B->e00, A->n) == 0
+         && mpn_cmp (A->e01, B->e01, A->n) == 0
+         && mpn_cmp (A->e10, B->e10, A->n) == 0
+         && mpn_cmp (A->e11, B->e11, A->n) == 0);
+}
+
+static void
+matrix_random(struct matrix *M, mp_size_t n, gmp_randstate_ptr rands)
+{
+  M->n = n;
+  mpn_random (M->e00, n);
+  mpn_random (M->e01, n);
+  mpn_random (M->e10, n);
+  mpn_random (M->e11, n);
+}
+
+#define MUL(rp, ap, an, bp, bn) do { \
+    if (an > bn)                    \
+      mpn_mul (rp, ap, an, bp, bn);  \
+    else                            \
+      mpn_mul (rp, bp, bn, ap, an);  \
+  } while(0)
+
+static void
+ref_matrix22_mul (struct matrix *R,
+                 const struct matrix *A,
+                 const struct matrix *B, mp_ptr tp)
+{
+  mp_size_t an, bn, n;
+  mp_ptr r00, r01, r10, r11, a00, a01, a10, a11, b00, b01, b10, b11;
+
+  if (A->n >= B->n)
+    {
+      r00 = R->e00; a00 = A->e00; b00 = B->e00;
+      r01 = R->e01; a01 = A->e01; b01 = B->e01;
+      r10 = R->e10; a10 = A->e10; b10 = B->e10;
+      r11 = R->e11; a11 = A->e11; b11 = B->e11;
+      an = A->n, bn = B->n;
+    }
+  else
+    {
+      /* Transpose */
+      r00 = R->e00; a00 = B->e00; b00 = A->e00;
+      r01 = R->e10; a01 = B->e10; b01 = A->e10;
+      r10 = R->e01; a10 = B->e01; b10 = A->e01;
+      r11 = R->e11; a11 = B->e11; b11 = A->e11;
+      an = B->n, bn = A->n;
+    }
+  n = an + bn;
+  R->n = n + 1;
+
+  mpn_mul (r00, a00, an, b00, bn);
+  mpn_mul (tp, a01, an, b10, bn);
+  r00[n] = mpn_add_n (r00, r00, tp, n);
+
+  mpn_mul (r01, a00, an, b01, bn);
+  mpn_mul (tp, a01, an, b11, bn);
+  r01[n] = mpn_add_n (r01, r01, tp, n);
+
+  mpn_mul (r10, a10, an, b00, bn);
+  mpn_mul (tp, a11, an, b10, bn);
+  r10[n] = mpn_add_n (r10, r10, tp, n);
+
+  mpn_mul (r11, a10, an, b01, bn);
+  mpn_mul (tp, a11, an, b11, bn);
+  r11[n] = mpn_add_n (r11, r11, tp, n);
+}
+
+static void
+one_test (const struct matrix *A, const struct matrix *B, int i)
+{
+  struct matrix R;
+  struct matrix P;
+  mp_ptr tp;
+
+  matrix_init (&R, A->n + B->n + 1);
+  matrix_init (&P, A->n + B->n + 1);
+
+  tp = refmpn_malloc_limbs (mpn_matrix22_mul_itch (A->n, B->n));
+
+  ref_matrix22_mul (&R, A, B, tp);
+  matrix_copy (&P, A);
+  mpn_matrix22_mul (P.e00, P.e01, P.e10, P.e11, A->n,
+                   B->e00, B->e01, B->e10, B->e11, B->n, tp);
+  P.n = A->n + B->n + 1;
+  if (!matrix_equal_p (&R, &P))
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      gmp_fprintf (stderr, "A = (%Nx, %Nx\n      %Nx, %Nx)\n"
+                  "B = (%Nx, %Nx\n      %Nx, %Nx)\n"
+                  "R = (%Nx, %Nx (expected)\n      %Nx, %Nx)\n"
+                  "P = (%Nx, %Nx (incorrect)\n      %Nx, %Nx)\n",
+                  A->e00, A->n, A->e01, A->n, A->e10, A->n, A->e11, A->n,
+                  B->e00, B->n, B->e01, B->n, B->e10, B->n, B->e11, B->n,
+                  R.e00, R.n, R.e01, R.n, R.e10, R.n, R.e11, R.n,
+                  P.e00, P.n, P.e01, P.n, P.e10, P.n, P.e11, P.n);
+      abort();
+    }
+  refmpn_free_limbs (tp);
+  matrix_clear (&R);
+  matrix_clear (&P);
+}
+
+#define MAX_SIZE (2+2*MATRIX22_STRASSEN_THRESHOLD)
+
+int
+main (int argc, char **argv)
+{
+  struct matrix A;
+  struct matrix B;
+
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  int i;
+
+  tests_start ();
+  rands = RANDS;
+
+  matrix_init (&A, MAX_SIZE);
+  matrix_init (&B, MAX_SIZE);
+  mpz_init (bs);
+
+  for (i = 0; i < 1000; i++)
+    {
+      mp_size_t an, bn;
+      mpz_urandomb (bs, rands, 32);
+      an = 1 + mpz_get_ui (bs) % MAX_SIZE;
+      mpz_urandomb (bs, rands, 32);
+      bn = 1 + mpz_get_ui (bs) % MAX_SIZE;
+
+      matrix_random (&A, an, rands);
+      matrix_random (&B, bn, rands);
+
+      one_test (&A, &B, i);
+    }
+  mpz_clear (bs);
+  matrix_clear (&A);
+  matrix_clear (&B);
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-mod_1.c b/tests/mpn/t-mod_1.c

new file mode 100644 (file)

index 0000000..2f86ba2
--- /dev/null
+++ b/tests/mpn/t-mod_1.c
@@ -0,0 +1,118 @@
+/* Test mpn_mod_1 variants.
+
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+static void
+check_one (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_limb_t r_ref = refmpn_mod_1 (ap, n, b);
+  mp_limb_t r;
+
+  if (n >= 2)
+    {
+      mp_limb_t pre[4];
+      mpn_mod_1_1p_cps (pre, b);
+      r = mpn_mod_1_1p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+       {
+         printf ("mpn_mod_1_1p failed\n");
+         goto fail;
+       }
+    }
+  if ((b & GMP_NUMB_HIGHBIT) == 0)
+    {
+      mp_limb_t pre[5];
+      mpn_mod_1s_2p_cps (pre, b);
+      r = mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+       {
+         printf ("mpn_mod_1s_2p failed\n");
+         goto fail;
+       }
+    }
+  if (b <= GMP_NUMB_MASK / 4)
+    {
+      mp_limb_t pre[7];
+      mpn_mod_1s_4p_cps (pre, b);
+      r = mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+       {
+         printf ("mpn_mod_1s_4p failed\n");
+         goto fail;
+       }
+    }
+  r = mpn_mod_1 (ap, n, b);
+  if (r != r_ref)
+    {
+      printf ("mpn_mod_1 failed\n");
+    fail:
+      printf ("an = %d, a: ", (int) n); mpn_dump (ap, n);
+      printf ("b           : "); mpn_dump (&b, 1);
+      printf ("r (expected): "); mpn_dump (&r_ref, 1);
+      printf ("r (bad)     : "); mpn_dump (&r, 1);
+      abort();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+  int i;
+  unsigned a_bits;
+  unsigned b_bits;
+  mpz_t a;
+  mpz_t b;
+
+  tests_start ();
+  rands = RANDS;
+  mpz_init (a);
+  mpz_init (b);
+
+  for (i = 0; i < 300; i++)
+    {
+      mp_size_t asize;
+      a_bits = 1 + gmp_urandomm_ui (rands, 1000);
+      b_bits = 1 + gmp_urandomm_ui (rands, GMP_NUMB_BITS);
+
+      mpz_rrandomb (a, rands, a_bits);
+      mpz_rrandomb (b, rands, b_bits);
+
+      asize = SIZ(a);
+      if (!asize)
+       asize = 1;
+      if (mpz_sgn (b) == 0)
+       mpz_set_ui (b, 1);
+
+      check_one (PTR(a), asize, PTR(b)[0]);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+
+  tests_end ();
+  return 0;
+}
+
diff --git a/tests/mpn/t-mp_bases.c b/tests/mpn/t-mp_bases.c

new file mode 100644 (file)

index 0000000..17950ec
--- /dev/null
+++ b/tests/mpn/t-mp_bases.c
@@ -0,0 +1,105 @@
+/* Check mp_bases values.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+int
+main (int argc, char *argv[])
+{
+  mp_limb_t  want_bb, want_bb_inv;
+  int        base, want_chars_per_limb;
+
+  want_chars_per_limb = refmpn_chars_per_limb (10);
+  if (MP_BASES_CHARS_PER_LIMB_10 != want_chars_per_limb)
+    {
+      printf ("MP_BASES_CHARS_PER_LIMB_10 wrong\n");
+      abort ();
+    }
+
+  want_bb = refmpn_big_base (10);
+  if (MP_BASES_BIG_BASE_10 != want_bb)
+    {
+      printf ("MP_BASES_BIG_BASE_10 wrong\n");
+      abort ();
+    }
+
+  want_bb_inv = refmpn_invert_limb
+    (want_bb << refmpn_count_leading_zeros (want_bb));
+  if (MP_BASES_BIG_BASE_INVERTED_10 != want_bb_inv)
+    {
+      printf ("MP_BASES_BIG_BASE_INVERTED_10 wrong\n");
+      abort ();
+    }
+
+  if (MP_BASES_NORMALIZATION_STEPS_10
+      != refmpn_count_leading_zeros (MP_BASES_BIG_BASE_10))
+    {
+      printf ("MP_BASES_NORMALIZATION_STEPS_10 wrong\n");
+      abort ();
+    }
+
+  for (base = 2; base < numberof (mp_bases); base++)
+    {
+      want_chars_per_limb = refmpn_chars_per_limb (base);
+      if (mp_bases[base].chars_per_limb != want_chars_per_limb)
+        {
+          printf ("mp_bases[%d].chars_per_limb wrong\n", base);
+          printf ("  got  %d\n", mp_bases[base].chars_per_limb);
+          printf ("  want %d\n", want_chars_per_limb);
+          abort ();
+        }
+
+      if (POW2_P (base))
+        {
+          want_bb = refmpn_count_trailing_zeros ((mp_limb_t) base);
+          if (mp_bases[base].big_base != want_bb)
+            {
+              printf ("mp_bases[%d].big_base (log2 of base) wrong\n", base);
+              abort ();
+            }
+        }
+      else
+        {
+          want_bb = refmpn_big_base (base);
+          if (mp_bases[base].big_base != want_bb)
+            {
+              printf ("mp_bases[%d].big_base wrong\n", base);
+              abort ();
+            }
+
+#if USE_PREINV_DIVREM_1
+          want_bb_inv = refmpn_invert_limb
+            (want_bb << refmpn_count_leading_zeros (want_bb));
+          if (mp_bases[base].big_base_inverted != want_bb_inv)
+            {
+              printf ("mp_bases[%d].big_base_inverted wrong\n", base);
+              abort ();
+            }
+#endif
+        }
+    }
+
+  exit (0);
+}
diff --git a/tests/mpn/t-mul.c b/tests/mpn/t-mul.c

new file mode 100644 (file)

index 0000000..79d3a38
--- /dev/null
+++ b/tests/mpn/t-mul.c
@@ -0,0 +1,101 @@
+/* Test mpn_mul function for all sizes up to a selected limit.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+static unsigned
+isqrt (unsigned t)
+{
+  unsigned s, b;
+
+  for (b = 0, s = t;  b++, s >>= 1; )
+    ;
+
+  s = 1 << (b >> 1);
+  if (b & 1)
+    s += s >> 1;
+
+  do
+    {
+      b = t / s;
+      s = (s + b) >> 1;
+    }
+  while (b < s);
+
+  return s;
+}
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, rp, refp;
+  mp_size_t max_n, an, bn, rn;
+  gmp_randstate_ptr rands;
+  int reps;
+  TMP_DECL;
+  TMP_MARK;
+
+  reps = 1;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  /* Re-interpret reps argument as a size argument.  */
+  max_n = isqrt (reps * 25000);
+
+  ap = TMP_ALLOC_LIMBS (max_n + 1);
+  bp = TMP_ALLOC_LIMBS (max_n + 1);
+  rp = TMP_ALLOC_LIMBS (2 * max_n);
+  refp = TMP_ALLOC_LIMBS (2 * max_n);
+
+  for (an = 1; an <= max_n; an += 1)
+    {
+      for (bn = 1; bn <= an; bn += 1)
+       {
+         mpn_random2 (ap, an + 1);
+         mpn_random2 (bp, bn + 1);
+
+         refmpn_mul (refp, ap, an, bp, bn);
+         mpn_mul (rp, ap, an, bp, bn);
+
+         rn = an + bn;
+         if (mpn_cmp (refp, rp, rn))
+           {
+             printf ("ERROR, an = %d, bn = %d, rn = %d\n",
+                     (int) an, (int) bn, (int) rn);
+             printf ("a: "); mpn_dump (ap, an);
+             printf ("b: "); mpn_dump (bp, bn);
+             printf ("r:   "); mpn_dump (rp, rn);
+             printf ("ref: "); mpn_dump (refp, rn);
+             abort();
+           }
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-mullo.c b/tests/mpn/t-mullo.c

new file mode 100644 (file)

index 0000000..b44e236
--- /dev/null
+++ b/tests/mpn/t-mullo.c
@@ -0,0 +1,142 @@
+/* Test for mullo function.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 10
+#endif
+
+#ifndef COUNT
+#define COUNT 10000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N (1)
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+#define mpn_mullo_itch(n) (0)
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  bp = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 2);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_mullo_itch (MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+       ;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+       + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+       + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+
+      mpn_random2 (ap, n);
+      mpn_random2 (bp, n);
+      mpn_random2 (pp-1, n + 2);
+      p_before = pp[-1];
+      p_after = pp[n];
+
+      itch = mpn_mullo_itch (n);
+      ASSERT_ALWAYS (itch <= mpn_mullo_itch (MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_mullo_n (pp, ap, bp, n);
+      mpn_mul_n (refp, ap, bp, n);
+      if (pp[-1] != p_before || pp[n] != p_after
+         || scratch[-1] != s_before || scratch[itch] != s_after
+         || mpn_cmp (refp, pp, n) != 0)
+       {
+         printf ("ERROR in test %d, n = %d",
+                 test, (int) n);
+         if (pp[-1] != p_before)
+           {
+             printf ("before pp:"); mpn_dump (pp -1, 1);
+             printf ("keep:   "); mpn_dump (&p_before, 1);
+           }
+         if (pp[n] != p_after)
+           {
+             printf ("after pp:"); mpn_dump (pp + n, 1);
+             printf ("keep:   "); mpn_dump (&p_after, 1);
+           }
+         if (scratch[-1] != s_before)
+           {
+             printf ("before scratch:"); mpn_dump (scratch-1, 1);
+             printf ("keep:   "); mpn_dump (&s_before, 1);
+           }
+         if (scratch[itch] != s_after)
+           {
+             printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+             printf ("keep:   "); mpn_dump (&s_after, 1);
+           }
+         mpn_dump (ap, n);
+         mpn_dump (bp, n);
+         mpn_dump (pp, n);
+         mpn_dump (refp, n);
+
+         abort();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-mulmod_bnm1.c b/tests/mpn/t-mulmod_bnm1.c

new file mode 100644 (file)

index 0000000..87a2013
--- /dev/null
+++ b/tests/mpn/t-mulmod_bnm1.c
@@ -0,0 +1,218 @@
+/* Test for mulmod_bnm1 function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 11
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N 1
+
+/*
+  Reference function for multiplication modulo B^rn-1.
+
+  The result is expected to be ZERO if and only if one of the operand
+  already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+  B^rn-1. This should not be a problem if mulmod_bnm1 is used to
+  combine results and obtain a natural number when one knows in
+  advance that the final value is less than (B^rn-1).
+*/
+
+static void
+ref_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < an && an <= rn);
+  ASSERT (0 < bn && bn <= rn);
+
+  if (an >= bn)
+    refmpn_mul (rp, ap, an, bp, bn);
+  else
+    refmpn_mul (rp, bp, bn, ap, an);
+  an += bn;
+  if (an > rn) {
+    cy = mpn_add (rp, rp, rn, rp + rn, an - rn);
+    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+     * be no overflow when adding in the carry. */
+    MPN_INCR_U (rp, rn, cy);
+  }
+}
+
+/*
+  Compare the result of the mpn_mulmod_bnm1 function in the library
+  with the reference function above.
+*/
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  ASSERT_ALWAYS (mpn_mulmod_bnm1_next_size (MAX_N) == MAX_N);
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  bp = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 4);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (MAX_N, MAX_N, MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t an,bn,rn,n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+       ;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+       + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+       + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+      n = mpn_mulmod_bnm1_next_size (n);
+
+      if ( (test & 1) || n == 1) {
+       /* Half of the tests are done with the main scenario in mind:
+          both an and bn >= rn/2 */
+       an = ((n+1) >> 1) + gmp_urandomm_ui (rands, (n+1) >> 1);
+       bn = ((n+1) >> 1) + gmp_urandomm_ui (rands, (n+1) >> 1);
+      } else {
+       /* Second half of the tests are done using mulmod to compute a
+          full product with n/2 < an+bn <= n. */
+       an = 1 + gmp_urandomm_ui (rands, n - 1);
+       if (an >= n/2)
+         bn = 1 + gmp_urandomm_ui (rands, n - an);
+       else
+         bn = n/2 + 1 - an + gmp_urandomm_ui (rands, (n+1)/2);
+      }
+
+      /* Make sure an >= bn */
+      if (an < bn)
+       MP_SIZE_T_SWAP (an, bn);
+
+      mpn_random2 (ap, an);
+      mpn_random2 (bp, bn);
+
+      /* Sometime trigger the borderline conditions
+        A = -1,0,+1 or B = -1,0,+1 or A*B == -1,0,1 Mod(B^{n/2}+1).
+        This only makes sense if there is at least a split, i.e. n is even. */
+      if ((test & 0x1f) == 1 && (n & 1) == 0) {
+       mp_size_t x;
+       MPN_COPY (ap, ap + (n >> 1), an - (n >> 1));
+       MPN_ZERO (ap + an - (n >> 1) , n - an);
+       MPN_COPY (bp, bp + (n >> 1), bn - (n >> 1));
+       MPN_ZERO (bp + bn - (n >> 1) , n - bn);
+       x = (n == an) ? 0 : gmp_urandomm_ui (rands, n - an);
+       ap[x] += gmp_urandomm_ui (rands, 3) - 1;
+       x = (n >> 1) - x % (n >> 1);
+       bp[x] += gmp_urandomm_ui (rands, 3) - 1;
+       /* We don't propagate carry, this means that the desired condition
+          is not triggered all the times. A few times are enough anyway. */
+      }
+      rn = MIN(n, an + bn);
+      mpn_random2 (pp-1, rn + 2);
+      p_before = pp[-1];
+      p_after = pp[rn];
+
+      itch = mpn_mulmod_bnm1_itch (n, an, bn);
+      ASSERT_ALWAYS (itch <= mpn_mulmod_bnm1_itch (MAX_N, MAX_N, MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_mulmod_bnm1 (  pp, n, ap, an, bp, bn, scratch);
+      ref_mulmod_bnm1 (refp, n, ap, an, bp, bn);
+      if (pp[-1] != p_before || pp[rn] != p_after
+         || scratch[-1] != s_before || scratch[itch] != s_after
+         || mpn_cmp (refp, pp, rn) != 0)
+       {
+         printf ("ERROR in test %d, an = %d, bn = %d, n = %d\n",
+                 test, (int) an, (int) bn, (int) n);
+         if (pp[-1] != p_before)
+           {
+             printf ("before pp:"); mpn_dump (pp -1, 1);
+             printf ("keep:   "); mpn_dump (&p_before, 1);
+           }
+         if (pp[rn] != p_after)
+           {
+             printf ("after pp:"); mpn_dump (pp + rn, 1);
+             printf ("keep:   "); mpn_dump (&p_after, 1);
+           }
+         if (scratch[-1] != s_before)
+           {
+             printf ("before scratch:"); mpn_dump (scratch-1, 1);
+             printf ("keep:   "); mpn_dump (&s_before, 1);
+           }
+         if (scratch[itch] != s_after)
+           {
+             printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+             printf ("keep:   "); mpn_dump (&s_after, 1);
+           }
+         mpn_dump (ap, an);
+         mpn_dump (bp, bn);
+         mpn_dump (pp, rn);
+         mpn_dump (refp, rn);
+
+         abort();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-perfsqr.c b/tests/mpn/t-perfsqr.c

new file mode 100644 (file)

index 0000000..6afe708
--- /dev/null
+++ b/tests/mpn/t-perfsqr.c
@@ -0,0 +1,117 @@
+/* Test mpn_perfect_square_p data.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include "mpn/perfsqr.h"
+
+
+#define PERFSQR_MOD_MASK   ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
+
+void
+check_mod_2 (mp_limb_t d, mp_limb_t inv, mp_limb_t got_hi, mp_limb_t got_lo)
+{
+  int        want[2*GMP_LIMB_BITS], got;
+  unsigned   r, idx;
+  mp_limb_t  q;
+
+  ASSERT_ALWAYS (d <= numberof (want));
+  ASSERT_ALWAYS (((inv * d) & PERFSQR_MOD_MASK) == 1);
+  ASSERT_ALWAYS (MP_LIMB_T_MAX / d >= PERFSQR_MOD_MASK);
+
+  /* the squares mod d */
+  for (r = 0; r < d; r++)
+    want[r] = 0;
+  for (r = 0; r < d; r++)
+    want[(r*r)%d] = 1;
+
+  /* for each remainder mod d, expect the table data to correctly identify
+     it as a residue or non-residue */
+  for (r = 0; r < d; r++)
+    {
+      /* as per PERFSQR_MOD_IDX */
+      q = ((r) * (inv)) & PERFSQR_MOD_MASK;
+      idx = (q * (d)) >> PERFSQR_MOD_BITS;
+
+      if (idx >= GMP_LIMB_BITS)
+        got = (got_hi >> (idx - GMP_LIMB_BITS)) & 1;
+      else
+        got = (got_lo >> idx) & 1;
+
+      if (got != want[r])
+        {
+          printf ("Wrong generated data\n");
+          printf ("  d=%u\n", (unsigned) d);
+          printf ("  r=%u\n", r);
+          printf ("  idx=%u\n", idx);
+          printf ("  got  %d\n", got);
+          printf ("  want %d\n", want[r]);
+          abort ();
+        }
+    }
+}
+
+/* Check the generated data in perfsqr.h. */
+void
+check_mod (void)
+{
+#define PERFSQR_MOD_34(r, up, usize)       { r = 0; } /* so r isn't unused */
+#define PERFSQR_MOD_PP(r, up, usize)       { r = 0; }
+#define PERFSQR_MOD_1(r, d, inv, mask)     check_mod_2 (d, inv, CNST_LIMB(0), mask)
+#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) check_mod_2 (d, inv, mhi, mlo)
+
+  PERFSQR_MOD_TEST (dummy, dummy);
+}
+
+/* Check PERFSQR_PP, if in use. */
+void
+check_pp (void)
+{
+#ifdef PERFSQR_PP
+  ASSERT_ALWAYS_LIMB (PERFSQR_PP);
+  ASSERT_ALWAYS_LIMB (PERFSQR_PP_NORM);
+  ASSERT_ALWAYS_LIMB (PERFSQR_PP_INVERTED);
+
+  /* preinv stuff only for nails==0 */
+  if (GMP_NAIL_BITS == 0)
+    {
+      ASSERT_ALWAYS (PERFSQR_PP_NORM
+                     == PERFSQR_PP << refmpn_count_leading_zeros (PERFSQR_PP));
+      ASSERT_ALWAYS (PERFSQR_PP_INVERTED
+                     == refmpn_invert_limb (PERFSQR_PP_NORM));
+    }
+#endif
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_mod ();
+  check_pp ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-scan.c b/tests/mpn/t-scan.c

new file mode 100644 (file)

index 0000000..fc8d3cc
--- /dev/null
+++ b/tests/mpn/t-scan.c
@@ -0,0 +1,145 @@
+/* Test mpn_scan0 and mpn_scan1.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+#define SIZE  ((mp_size_t) 3)
+mp_limb_t  x[SIZE+1];
+
+void
+check (void)
+{
+  unsigned long  i, got, want;
+
+  x[SIZE] = 1;
+  for (i = 0; i < SIZE*GMP_NUMB_BITS; i++)
+    {
+      got = refmpn_scan1 (x, i);
+      want = mpn_scan1 (x, i);
+      if (got != want)
+        {
+          printf ("mpn_scan1\n");
+          printf ("  i     %lu\n", i);
+          printf ("  got   %lu\n", got);
+          printf ("  want  %lu\n", want);
+          mpn_trace ("  x    ", x, SIZE);
+          abort ();
+        }
+    }
+
+  x[SIZE] = 0;
+  for (i = 0; i < SIZE*GMP_NUMB_BITS; i++)
+    {
+      got = refmpn_scan0 (x, i);
+      want = mpn_scan0 (x, i);
+      if (got != want)
+        {
+          printf ("mpn_scan0\n");
+          printf ("  i     %lu\n", i);
+          printf ("  got   %lu\n", got);
+          printf ("  want  %lu\n", want);
+          mpn_trace ("  x    ", x, SIZE);
+          abort ();
+        }
+    }
+}
+
+void
+check_twobits (void)
+{
+#define TWOBITS(a, b) \
+  ((CNST_LIMB(1) << (a)) | (CNST_LIMB(1) << (b)))
+
+  refmpn_zero (x, SIZE);
+  x[0] = TWOBITS (1, 0);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[0] = TWOBITS (GMP_NUMB_BITS-1, 1);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[0] = CNST_LIMB(1);
+  x[1] = CNST_LIMB(1);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[0] = CNST_LIMB(1) << (GMP_NUMB_BITS-1);
+  x[1] = CNST_LIMB(1);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[1] = TWOBITS (1, 0);
+  check ();
+
+  refmpn_zero (x, SIZE);
+  x[1] = CNST_LIMB(1);
+  x[2] = CNST_LIMB(1);
+  check ();
+}
+
+/* This is unused, it takes too long, especially on 64-bit systems. */
+void
+check_twobits_exhaustive (void)
+{
+  unsigned long  i, j;
+
+  for (i = 0; i < GMP_NUMB_BITS * SIZE; i++)
+    {
+      for (j = 0; j < GMP_NUMB_BITS * SIZE; j++)
+        {
+          refmpn_zero (x, SIZE);
+          refmpn_setbit (x, i);
+          refmpn_setbit (x, j);
+          check ();
+        }
+    }
+}
+
+void
+check_rand (void)
+{
+  int  i;
+
+  for (i = 0; i < 100; i++)
+    {
+      refmpn_random2 (x, SIZE);
+      check ();
+    }
+}
+
+int
+main (void)
+{
+  mp_trace_base = -16;
+  tests_start ();
+
+  check_twobits ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpn/t-sqrmod_bnm1.c b/tests/mpn/t-sqrmod_bnm1.c

new file mode 100644 (file)

index 0000000..8dfd532
--- /dev/null
+++ b/tests/mpn/t-sqrmod_bnm1.c
@@ -0,0 +1,191 @@
+/* Test for sqrmod_bnm1 function.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 12
+#endif
+
+#ifndef COUNT
+#define COUNT 3000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+#define MIN_N 1
+
+/*
+  Reference function for squaring modulo B^rn-1.
+
+  The result is expected to be ZERO if and only if one of the operand
+  already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+  B^rn-1. This should not be a problem if sqrmod_bnm1 is used to
+  combine results and obtain a natural number when one knows in
+  advance that the final value is less than (B^rn-1).
+*/
+
+static void
+ref_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < an && an <= rn);
+
+  refmpn_mul (rp, ap, an, ap, an);
+  an *= 2;
+  if (an > rn) {
+    cy = mpn_add (rp, rp, rn, rp + rn, an - rn);
+    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+     * be no overflow when adding in the carry. */
+    MPN_INCR_U (rp, rn, cy);
+  }
+}
+
+/*
+  Compare the result of the mpn_sqrmod_bnm1 function in the library
+  with the reference function above.
+*/
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  ASSERT_ALWAYS (mpn_sqrmod_bnm1_next_size (MAX_N) == MAX_N);
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  refp = TMP_ALLOC_LIMBS (MAX_N * 4);
+  pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (MAX_N, MAX_N) + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t an,rn,n;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_N; size_min++)
+       ;
+
+      /* We generate an in the MIN_N <= n <= (1 << size_range). */
+      size_range = size_min
+       + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      n = MIN_N
+       + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N);
+      n = mpn_sqrmod_bnm1_next_size (n);
+
+      if (n == 1)
+       an = 1;
+      else
+       an = ((n+1) >> 1) + gmp_urandomm_ui (rands, (n+1) >> 1);
+
+      mpn_random2 (ap, an);
+
+      /* Sometime trigger the borderline conditions
+        A = -1,0,+1 Mod(B^{n/2}+1).
+        This only makes sense if there is at least a split, i.e. n is even. */
+      if ((test & 0x1f) == 1 && (n & 1) == 0) {
+       mp_size_t x;
+       MPN_COPY (ap, ap + (n >> 1), an - (n >> 1));
+       MPN_ZERO (ap + an - (n >> 1) , n - an);
+       x = (n == an) ? 0 : gmp_urandomm_ui (rands, n - an);
+       ap[x] += gmp_urandomm_ui (rands, 3) - 1;
+      }
+      rn = MIN(n, 2*an);
+      mpn_random2 (pp-1, rn + 2);
+      p_before = pp[-1];
+      p_after = pp[rn];
+
+      itch = mpn_sqrmod_bnm1_itch (n, an);
+      ASSERT_ALWAYS (itch <= mpn_sqrmod_bnm1_itch (MAX_N, MAX_N));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_sqrmod_bnm1 (  pp, n, ap, an, scratch);
+      ref_sqrmod_bnm1 (refp, n, ap, an);
+      if (pp[-1] != p_before || pp[rn] != p_after
+         || scratch[-1] != s_before || scratch[itch] != s_after
+         || mpn_cmp (refp, pp, rn) != 0)
+       {
+         printf ("ERROR in test %d, an = %d, n = %d\n",
+                 test, (int) an, (int) n);
+         if (pp[-1] != p_before)
+           {
+             printf ("before pp:"); mpn_dump (pp -1, 1);
+             printf ("keep:   "); mpn_dump (&p_before, 1);
+           }
+         if (pp[rn] != p_after)
+           {
+             printf ("after pp:"); mpn_dump (pp + rn, 1);
+             printf ("keep:   "); mpn_dump (&p_after, 1);
+           }
+         if (scratch[-1] != s_before)
+           {
+             printf ("before scratch:"); mpn_dump (scratch-1, 1);
+             printf ("keep:   "); mpn_dump (&s_before, 1);
+           }
+         if (scratch[itch] != s_after)
+           {
+             printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+             printf ("keep:   "); mpn_dump (&s_after, 1);
+           }
+         mpn_dump (ap, an);
+         mpn_dump (pp, rn);
+         mpn_dump (refp, rn);
+
+         abort();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-toom22.c b/tests/mpn/t-toom22.c

new file mode 100644 (file)

index 0000000..939a88e
--- /dev/null
+++ b/tests/mpn/t-toom22.c
@@ -0,0 +1,10 @@
+#define mpn_toomMN_mul mpn_toom22_mul
+#define mpn_toomMN_mul_itch mpn_toom22_mul_itch
+#define MIN_AN 2
+
+#define MIN_BN(an)                             \
+  ((an) >= 2*MUL_TOOM22_THRESHOLD              \
+   ? (an) + 2 - MUL_TOOM22_THRESHOLD           \
+   : ((an)+1)/2 + 1)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom32.c b/tests/mpn/t-toom32.c

new file mode 100644 (file)

index 0000000..e42745d
--- /dev/null
+++ b/tests/mpn/t-toom32.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom32_mul
+#define mpn_toomMN_mul_itch mpn_toom32_mul_itch
+
+#define MIN_AN 6
+#define MIN_BN(an) (((an) + 8) / (size_t) 3)
+#define MAX_BN(an) ((an) - 2)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom33.c b/tests/mpn/t-toom33.c

new file mode 100644 (file)

index 0000000..7de82b2
--- /dev/null
+++ b/tests/mpn/t-toom33.c
@@ -0,0 +1,11 @@
+#define mpn_toomMN_mul mpn_toom33_mul
+#define mpn_toomMN_mul_itch mpn_toom33_mul_itch
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul with invalid input size. */
+#define MIN_AN MUL_TOOM33_THRESHOLD
+#define MIN_BN(an) (1 + 2*(((an)+2)/(size_t) 3))
+
+#define COUNT 1000
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom42.c b/tests/mpn/t-toom42.c

new file mode 100644 (file)

index 0000000..09a4a0c
--- /dev/null
+++ b/tests/mpn/t-toom42.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom42_mul
+#define mpn_toomMN_mul_itch mpn_toom42_mul_itch
+
+#define MIN_AN 10
+#define MIN_BN(an) (((an) + 7) >> 2)
+#define MAX_BN(an) ((2*(an)-5) / (size_t) 3)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom43.c b/tests/mpn/t-toom43.c

new file mode 100644 (file)

index 0000000..224a45b
--- /dev/null
+++ b/tests/mpn/t-toom43.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom43_mul
+#define mpn_toomMN_mul_itch mpn_toom43_mul_itch
+
+#define MIN_AN 25
+#define MIN_BN(an) (1 + 2*(((an)+3) >> 2))
+#define MAX_BN(an) ((an)-3)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom44.c b/tests/mpn/t-toom44.c

new file mode 100644 (file)

index 0000000..6c627e3
--- /dev/null
+++ b/tests/mpn/t-toom44.c
@@ -0,0 +1,11 @@
+#define mpn_toomMN_mul mpn_toom44_mul
+#define mpn_toomMN_mul_itch mpn_toom44_mul_itch
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul or toom33_mul with invalid input size. */
+#define MIN_AN MUL_TOOM44_THRESHOLD
+#define MIN_BN(an) (1 + 3*(((an)+3)>>2))
+
+#define COUNT 1000
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom52.c b/tests/mpn/t-toom52.c

new file mode 100644 (file)

index 0000000..d3fb134
--- /dev/null
+++ b/tests/mpn/t-toom52.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom52_mul
+#define mpn_toomMN_mul_itch mpn_toom52_mul_itch
+
+#define MIN_AN 32
+#define MIN_BN(an) (((an) + 9) / (size_t) 5)
+#define MAX_BN(an) (((an) - 3) >> 1)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom53.c b/tests/mpn/t-toom53.c

new file mode 100644 (file)

index 0000000..ddbf177
--- /dev/null
+++ b/tests/mpn/t-toom53.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom53_mul
+#define mpn_toomMN_mul_itch mpn_toom53_mul_itch
+
+#define MIN_AN 17
+#define MIN_BN(an) (1 + 2*(((an) + 4) / (size_t) 5))
+#define MAX_BN(an) ((3*(an) - 11) >> 2)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom62.c b/tests/mpn/t-toom62.c

new file mode 100644 (file)

index 0000000..1cb2aab
--- /dev/null
+++ b/tests/mpn/t-toom62.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom62_mul
+#define mpn_toomMN_mul_itch mpn_toom62_mul_itch
+
+#define MIN_AN 31
+#define MIN_BN(an) (((an) + 11) / (size_t) 6)
+#define MAX_BN(an) ((2*(an) - 7) / (size_t) 5)
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom63.c b/tests/mpn/t-toom63.c

new file mode 100644 (file)

index 0000000..d79165d
--- /dev/null
+++ b/tests/mpn/t-toom63.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom63_mul
+#define mpn_toomMN_mul_itch mpn_toom63_mul_itch
+
+#define MIN_AN 49
+#define MIN_BN(an) (2*(((an) + 23) / (size_t) 6))      /* 2/6 */
+#define MAX_BN(an) ((3*(an) - 23)  / (size_t) 5)       /* 3/5 */
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom6h.c b/tests/mpn/t-toom6h.c

new file mode 100644 (file)

index 0000000..fc5df5d
--- /dev/null
+++ b/tests/mpn/t-toom6h.c
@@ -0,0 +1,13 @@
+#define mpn_toomMN_mul mpn_toom6h_mul
+#define mpn_toomMN_mul_itch mpn_toom6h_mul_itch
+
+#define SIZE_LOG 11
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul, toom33_mul, or toom44_mul with invalid input size. */
+#define MIN_AN MUL_TOOM6H_THRESHOLD
+#define MIN_BN(an) (MAX ((an*3)>>3, 46))
+
+#define COUNT 1000
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom8h.c b/tests/mpn/t-toom8h.c

new file mode 100644 (file)

index 0000000..3360460
--- /dev/null
+++ b/tests/mpn/t-toom8h.c
@@ -0,0 +1,26 @@
+#define mpn_toomMN_mul mpn_toom8h_mul
+#define mpn_toomMN_mul_itch mpn_toom8h_mul_itch
+
+#define SIZE_LOG 11
+
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom{22,33,44,6h}_mul with invalid input size. */
+#define MIN_AN MUL_TOOM8H_THRESHOLD
+
+#if GMP_NUMB_BITS <= 10*3
+#define MIN_BN(an) (MAX ((an*6)/10, 86) )
+#else
+#if GMP_NUMB_BITS <= 11*3
+#define MIN_BN(an) (MAX ((an*5)/11, 86) )
+#else
+#if GMP_NUMB_BITS <= 12*3
+#define MIN_BN(an) (MAX ((an*4)/12, 86) )
+#else
+#define MIN_BN(an) (MAX ((an*4)/13, 86) )
+#endif
+#endif
+#endif
+
+#define COUNT 1000
+
+#include "toom-shared.h"
diff --git a/tests/mpn/toom-shared.h b/tests/mpn/toom-shared.h

new file mode 100644 (file)

index 0000000..57b3181
--- /dev/null
+++ b/tests/mpn/toom-shared.h
@@ -0,0 +1,158 @@
+/* Test for various Toom functions.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Main file is expected to define mpn_toomMN_mul,
+ * mpn_toomMN_mul_itch, MIN_AN, MIN_BN(an), MAX_BN(an) and then
+ * include this file. */
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 10
+#endif
+
+#ifndef COUNT
+#define COUNT 2000
+#endif
+
+#define MAX_AN (1L << SIZE_LOG)
+
+#ifndef MAX_BN
+#define MAX_BN(an) (an)
+#endif
+
+/* For general toomMN_mul, we need
+ *
+ * MIN_BN(an) = N + floor(((N-1)*an + M - N)/M)
+ *
+ * MAX_BN(an) = floor(N*(an-1)/(M-1)) - N + 1
+ */
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_AN);
+  bp = TMP_ALLOC_LIMBS (MAX_BN(MAX_AN));
+  refp = TMP_ALLOC_LIMBS (MAX_AN + MAX_BN(MAX_AN));
+  pp = 1+TMP_ALLOC_LIMBS (MAX_AN + MAX_BN(MAX_AN)+2);
+  scratch
+    = 1+TMP_ALLOC_LIMBS (mpn_toomMN_mul_itch (MAX_AN, MAX_BN(MAX_AN))
+                        + 2);
+
+  for (test = 0; test < count; test++)
+    {
+      unsigned size_min;
+      unsigned size_range;
+      mp_size_t an, bn;
+      mp_size_t itch;
+      mp_limb_t p_before, p_after, s_before, s_after;
+
+      for (size_min = 1; (1L << size_min) < MIN_AN; size_min++)
+       ;
+
+      /* We generate an in the MIN_AN <= an <= (1 << size_range). */
+      size_range = size_min
+       + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min);
+
+      an = MIN_AN
+       + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_AN);
+      bn = MIN_BN(an)
+       + gmp_urandomm_ui (rands, MAX_BN(an) + 1 - MIN_BN(an));
+
+      mpn_random2 (ap, an);
+      mpn_random2 (bp, bn);
+      mpn_random2 (pp-1, an + bn + 2);
+      p_before = pp[-1];
+      p_after = pp[an + bn];
+
+      itch = mpn_toomMN_mul_itch (an, bn);
+      ASSERT_ALWAYS (itch <= mpn_toomMN_mul_itch (MAX_AN, MAX_BN(MAX_AN)));
+      mpn_random2 (scratch-1, itch+2);
+      s_before = scratch[-1];
+      s_after = scratch[itch];
+
+      mpn_toomMN_mul (pp, ap, an, bp, bn, scratch);
+      refmpn_mul (refp, ap, an, bp, bn);
+      if (pp[-1] != p_before || pp[an + bn] != p_after
+         || scratch[-1] != s_before || scratch[itch] != s_after
+         || mpn_cmp (refp, pp, an + bn) != 0)
+       {
+         printf ("ERROR in test %d, an = %d, bn = %d\n",
+                 test, (int) an, (int) bn);
+         if (pp[-1] != p_before)
+           {
+             printf ("before pp:"); mpn_dump (pp -1, 1);
+             printf ("keep:   "); mpn_dump (&p_before, 1);
+           }
+         if (pp[an + bn] != p_after)
+           {
+             printf ("after pp:"); mpn_dump (pp + an + bn, 1);
+             printf ("keep:   "); mpn_dump (&p_after, 1);
+           }
+         if (scratch[-1] != s_before)
+           {
+             printf ("before scratch:"); mpn_dump (scratch-1, 1);
+             printf ("keep:   "); mpn_dump (&s_before, 1);
+           }
+         if (scratch[itch] != s_after)
+           {
+             printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+             printf ("keep:   "); mpn_dump (&s_after, 1);
+           }
+         mpn_dump (ap, an);
+         mpn_dump (bp, bn);
+         mpn_dump (pp, an + bn);
+         mpn_dump (refp, an + bn);
+
+         abort();
+       }
+    }
+  TMP_FREE;
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpq/Makefile.am b/tests/mpq/Makefile.am

new file mode 100644 (file)

index 0000000..57c6817
--- /dev/null
+++ b/tests/mpq/Makefile.am
@@ -0,0 +1,34 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-aors t-cmp t-cmp_ui t-cmp_si t-equal t-get_d t-get_str \
+  t-inp_str t-md_2exp t-set_f t-set_str
+TESTS = $(check_PROGRAMS)
+
+# Temporary files used by the tests.  Removed automatically if the tests
+# pass, but ensure they're cleaned if they fail.
+#
+CLEANFILES = *.tmp
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/mpq/Makefile.in b/tests/mpq/Makefile.in

new file mode 100644 (file)

index 0000000..05ba2ef
--- /dev/null
+++ b/tests/mpq/Makefile.in
@@ -0,0 +1,742 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = t-aors$(EXEEXT) t-cmp$(EXEEXT) t-cmp_ui$(EXEEXT) \
+       t-cmp_si$(EXEEXT) t-equal$(EXEEXT) t-get_d$(EXEEXT) \
+       t-get_str$(EXEEXT) t-inp_str$(EXEEXT) t-md_2exp$(EXEEXT) \
+       t-set_f$(EXEEXT) t-set_str$(EXEEXT)
+subdir = tests/mpq
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+t_aors_SOURCES = t-aors.c
+t_aors_OBJECTS = t-aors$U.$(OBJEXT)
+t_aors_LDADD = $(LDADD)
+t_aors_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_SOURCES = t-cmp.c
+t_cmp_OBJECTS = t-cmp$U.$(OBJEXT)
+t_cmp_LDADD = $(LDADD)
+t_cmp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_si_SOURCES = t-cmp_si.c
+t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_LDADD = $(LDADD)
+t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_ui_SOURCES = t-cmp_ui.c
+t_cmp_ui_OBJECTS = t-cmp_ui$U.$(OBJEXT)
+t_cmp_ui_LDADD = $(LDADD)
+t_cmp_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_equal_SOURCES = t-equal.c
+t_equal_OBJECTS = t-equal$U.$(OBJEXT)
+t_equal_LDADD = $(LDADD)
+t_equal_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_d_SOURCES = t-get_d.c
+t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_LDADD = $(LDADD)
+t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_str_SOURCES = t-get_str.c
+t_get_str_OBJECTS = t-get_str$U.$(OBJEXT)
+t_get_str_LDADD = $(LDADD)
+t_get_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_inp_str_SOURCES = t-inp_str.c
+t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_LDADD = $(LDADD)
+t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_md_2exp_SOURCES = t-md_2exp.c
+t_md_2exp_OBJECTS = t-md_2exp$U.$(OBJEXT)
+t_md_2exp_LDADD = $(LDADD)
+t_md_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_f_SOURCES = t-set_f.c
+t_set_f_OBJECTS = t-set_f$U.$(OBJEXT)
+t_set_f_LDADD = $(LDADD)
+t_set_f_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_str_SOURCES = t-set_str.c
+t_set_str_OBJECTS = t-set_str$U.$(OBJEXT)
+t_set_str_LDADD = $(LDADD)
+t_set_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c t-equal.c t-get_d.c \
+       t-get_str.c t-inp_str.c t-md_2exp.c t-set_f.c t-set_str.c
+DIST_SOURCES = t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c t-equal.c \
+       t-get_d.c t-get_str.c t-inp_str.c t-md_2exp.c t-set_f.c \
+       t-set_str.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+TESTS = $(check_PROGRAMS)
+
+# Temporary files used by the tests.  Removed automatically if the tests
+# pass, but ensure they're cleaned if they fail.
+#
+CLEANFILES = *.tmp
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpq/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/mpq/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+t-aors$(EXEEXT): $(t_aors_OBJECTS) $(t_aors_DEPENDENCIES) 
+       @rm -f t-aors$(EXEEXT)
+       $(LINK) $(t_aors_OBJECTS) $(t_aors_LDADD) $(LIBS)
+t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) 
+       @rm -f t-cmp$(EXEEXT)
+       $(LINK) $(t_cmp_OBJECTS) $(t_cmp_LDADD) $(LIBS)
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) 
+       @rm -f t-cmp_si$(EXEEXT)
+       $(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
+t-cmp_ui$(EXEEXT): $(t_cmp_ui_OBJECTS) $(t_cmp_ui_DEPENDENCIES) 
+       @rm -f t-cmp_ui$(EXEEXT)
+       $(LINK) $(t_cmp_ui_OBJECTS) $(t_cmp_ui_LDADD) $(LIBS)
+t-equal$(EXEEXT): $(t_equal_OBJECTS) $(t_equal_DEPENDENCIES) 
+       @rm -f t-equal$(EXEEXT)
+       $(LINK) $(t_equal_OBJECTS) $(t_equal_LDADD) $(LIBS)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+       @rm -f t-get_d$(EXEEXT)
+       $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
+t-get_str$(EXEEXT): $(t_get_str_OBJECTS) $(t_get_str_DEPENDENCIES) 
+       @rm -f t-get_str$(EXEEXT)
+       $(LINK) $(t_get_str_OBJECTS) $(t_get_str_LDADD) $(LIBS)
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) 
+       @rm -f t-inp_str$(EXEEXT)
+       $(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
+t-md_2exp$(EXEEXT): $(t_md_2exp_OBJECTS) $(t_md_2exp_DEPENDENCIES) 
+       @rm -f t-md_2exp$(EXEEXT)
+       $(LINK) $(t_md_2exp_OBJECTS) $(t_md_2exp_LDADD) $(LIBS)
+t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) 
+       @rm -f t-set_f$(EXEEXT)
+       $(LINK) $(t_set_f_OBJECTS) $(t_set_f_LDADD) $(LIBS)
+t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) 
+       @rm -f t-set_str$(EXEEXT)
+       $(LINK) $(t_set_str_OBJECTS) $(t_set_str_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+t-aors_.c: t-aors.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aors.c; then echo $(srcdir)/t-aors.c; else echo t-aors.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_.c: t-cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp.c; then echo $(srcdir)/t-cmp.c; else echo t-cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_ui_.c: t-cmp_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_ui.c; then echo $(srcdir)/t-cmp_ui.c; else echo t-cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-equal_.c: t-equal.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-equal.c; then echo $(srcdir)/t-equal.c; else echo t-equal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_d_.c: t-get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_str_.c: t-get_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_str.c; then echo $(srcdir)/t-get_str.c; else echo t-get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-md_2exp_.c: t-md_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-md_2exp.c; then echo $(srcdir)/t-md_2exp.c; else echo t-md_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_f_.c: t-set_f.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_f.c; then echo $(srcdir)/t-set_f.c; else echo t-set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_str_.c: t-set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_str.c; then echo $(srcdir)/t-set_str.c; else echo t-set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-aors_.$(OBJEXT) t-aors_.lo t-cmp_.$(OBJEXT) t-cmp_.lo \
+t-cmp_si_.$(OBJEXT) t-cmp_si_.lo t-cmp_ui_.$(OBJEXT) t-cmp_ui_.lo \
+t-equal_.$(OBJEXT) t-equal_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
+t-get_str_.$(OBJEXT) t-get_str_.lo t-inp_str_.$(OBJEXT) t-inp_str_.lo \
+t-md_2exp_.$(OBJEXT) t-md_2exp_.lo t-set_f_.$(OBJEXT) t-set_f_.lo \
+t-set_str_.$(OBJEXT) t-set_str_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/mpq/t-aors.c b/tests/mpq/t-aors.c

new file mode 100644 (file)

index 0000000..df2dbe2
--- /dev/null
+++ b/tests/mpq/t-aors.c
@@ -0,0 +1,183 @@
+/* Test mpq_add and mpq_sub.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_all (mpq_ptr x, mpq_ptr y, mpq_ptr want_add, mpq_ptr want_sub)
+{
+  mpq_t  got;
+  int    neg_x, neg_y, swap;
+
+  mpq_init (got);
+
+  MPQ_CHECK_FORMAT (want_add);
+  MPQ_CHECK_FORMAT (want_sub);
+  MPQ_CHECK_FORMAT (x);
+  MPQ_CHECK_FORMAT (y);
+
+  for (swap = 0; swap <= 1; swap++)
+    {
+      for (neg_x = 0; neg_x <= 1; neg_x++)
+        {
+          for (neg_y = 0; neg_y <= 1; neg_y++)
+            {
+              mpq_add (got, x, y);
+              MPQ_CHECK_FORMAT (got);
+              if (! mpq_equal (got, want_add))
+                {
+                  printf ("mpq_add wrong\n");
+                  mpq_trace ("  x   ", x);
+                  mpq_trace ("  y   ", y);
+                  mpq_trace ("  got ", got);
+                  mpq_trace ("  want", want_add);
+                  abort ();
+                }
+
+              mpq_sub (got, x, y);
+              MPQ_CHECK_FORMAT (got);
+              if (! mpq_equal (got, want_sub))
+                {
+                  printf ("mpq_sub wrong\n");
+                  mpq_trace ("  x   ", x);
+                  mpq_trace ("  y   ", y);
+                  mpq_trace ("  got ", got);
+                  mpq_trace ("  want", want_sub);
+                  abort ();
+                }
+
+
+              mpq_neg (y, y);
+              mpq_swap (want_add, want_sub);
+            }
+
+          mpq_neg (x, x);
+          mpq_swap (want_add, want_sub);
+          mpq_neg (want_add, want_add);
+          mpq_neg (want_sub, want_sub);
+        }
+
+      mpq_swap (x, y);
+      mpq_neg (want_sub, want_sub);
+    }
+
+  mpq_clear (got);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *x;
+    const char  *y;
+    const char  *want_add;
+    const char  *want_sub;
+
+  } data[] = {
+
+    { "0", "0", "0", "0" },
+    { "1", "0", "1", "1" },
+    { "1", "1", "2", "0" },
+
+    { "1/2", "1/2", "1", "0" },
+    { "5/6", "14/15", "53/30", "-1/10" },
+  };
+
+  mpq_t  x, y, want_add, want_sub;
+  int i;
+
+  mpq_init (x);
+  mpq_init (y);
+  mpq_init (want_add);
+  mpq_init (want_sub);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (x, data[i].x, 0);
+      mpq_set_str_or_abort (y, data[i].y, 0);
+      mpq_set_str_or_abort (want_add, data[i].want_add, 0);
+      mpq_set_str_or_abort (want_sub, data[i].want_sub, 0);
+
+      check_all (x, y, want_add, want_sub);
+    }
+
+  mpq_clear (x);
+  mpq_clear (y);
+  mpq_clear (want_add);
+  mpq_clear (want_sub);
+}
+
+
+void
+check_rand (void)
+{
+  mpq_t  x, y, want_add, want_sub;
+  int i;
+  gmp_randstate_ptr  rands = RANDS;
+
+  mpq_init (x);
+  mpq_init (y);
+  mpq_init (want_add);
+  mpq_init (want_sub);
+
+  for (i = 0; i < 500; i++)
+    {
+      mpz_errandomb (mpq_numref(x), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(x), rands, 512L);
+      mpq_canonicalize (x);
+
+      mpz_errandomb (mpq_numref(y), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(y), rands, 512L);
+      mpq_canonicalize (y);
+
+      refmpq_add (want_add, x, y);
+      refmpq_sub (want_sub, x, y);
+
+      check_all (x, y, want_add, want_sub);
+    }
+
+  mpq_clear (x);
+  mpq_clear (y);
+  mpq_clear (want_add);
+  mpq_clear (want_sub);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_rand ();
+
+  tests_end ();
+
+  exit (0);
+}
diff --git a/tests/mpq/t-cmp.c b/tests/mpq/t-cmp.c

new file mode 100644 (file)

index 0000000..ac0dc72
--- /dev/null
+++ b/tests/mpq/t-cmp.c
@@ -0,0 +1,105 @@
+/* Test mpq_cmp.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define NUM(x) (&((x)->_mp_num))
+#define DEN(x) (&((x)->_mp_den))
+
+#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
+
+int
+ref_mpq_cmp (mpq_t a, mpq_t b)
+{
+  mpz_t ai, bi;
+  int cc;
+
+  mpz_init (ai);
+  mpz_init (bi);
+
+  mpz_mul (ai, NUM (a), DEN (b));
+  mpz_mul (bi, NUM (b), DEN (a));
+  cc = mpz_cmp (ai, bi);
+  mpz_clear (ai);
+  mpz_clear (bi);
+  return cc;
+}
+
+#ifndef SIZE
+#define SIZE 8 /* increasing this lowers the probabilty of finding an error */
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a, b;
+  mp_size_t size;
+  int reps = 10000;
+  int i;
+  int cc, ccref;
+
+  tests_start ();
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpq_init (a);
+  mpq_init (b);
+
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (a), size);
+      do
+       {
+         size = urandom () % SIZE - SIZE/2;
+         mpz_random2 (DEN (a), size);
+       }
+      while (mpz_cmp_ui (DEN (a), 0) == 0);
+
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (b), size);
+      do
+       {
+         size = urandom () % SIZE - SIZE/2;
+         mpz_random2 (DEN (b), size);
+       }
+      while (mpz_cmp_ui (DEN (b), 0) == 0);
+
+      mpq_canonicalize (a);
+      mpq_canonicalize (b);
+
+      ccref = ref_mpq_cmp (a, b);
+      cc = mpq_cmp (a, b);
+
+      if (SGN (ccref) != SGN (cc))
+       abort ();
+    }
+
+  mpq_clear (a);
+  mpq_clear (b);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-cmp_si.c b/tests/mpq/t-cmp_si.c

new file mode 100644 (file)

index 0000000..89b2239
--- /dev/null
+++ b/tests/mpq/t-cmp_si.c
@@ -0,0 +1,118 @@
+/* Test mpq_cmp_si.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define SGN(x)   ((x)<0 ? -1 : (x) != 0)
+
+void
+check_data (void)
+{
+  static const struct {
+    const char     *q;
+    long           n;
+    unsigned long  d;
+    int            want;
+  } data[] = {
+    { "0", 0, 1, 0 },
+    { "0", 0, 123, 0 },
+    { "0", 0, ULONG_MAX, 0 },
+    { "1", 0, 1, 1 },
+    { "1", 0, 123, 1 },
+    { "1", 0, ULONG_MAX, 1 },
+    { "-1", 0, 1, -1 },
+    { "-1", 0, 123, -1 },
+    { "-1", 0, ULONG_MAX, -1 },
+
+    { "123", 123, 1, 0 },
+    { "124", 123, 1, 1 },
+    { "122", 123, 1, -1 },
+
+    { "-123", 123, 1, -1 },
+    { "-124", 123, 1, -1 },
+    { "-122", 123, 1, -1 },
+
+    { "123", -123, 1, 1 },
+    { "124", -123, 1, 1 },
+    { "122", -123, 1, 1 },
+
+    { "-123", -123, 1, 0 },
+    { "-124", -123, 1, -1 },
+    { "-122", -123, 1, 1 },
+
+    { "5/7", 3,4, -1 },
+    { "5/7", -3,4, 1 },
+    { "-5/7", 3,4, -1 },
+    { "-5/7", -3,4, 1 },
+  };
+
+  mpq_t  q;
+  int    i, got;
+
+  mpq_init (q);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpq_set_str_or_abort (q, data[i].q, 0);
+      MPQ_CHECK_FORMAT (q);
+
+      got = mpq_cmp_si (q, data[i].n, data[i].d);
+      if (SGN(got) != data[i].want)
+        {
+          printf ("mpq_cmp_si wrong\n");
+        error:
+          mpq_trace ("  q", q);
+          printf ("  n=%ld\n", data[i].n);
+          printf ("  d=%lu\n", data[i].d);
+          printf ("  got=%d\n", got);
+          printf ("  want=%d\n", data[i].want);
+          abort ();
+        }
+
+      if (data[i].n == 0)
+        {
+          got = mpq_cmp_si (q, 0L, data[i].d);
+          if (SGN(got) != data[i].want)
+            {
+              printf ("mpq_cmp_si wrong\n");
+              goto error;
+            }
+        }
+    }
+
+  mpq_clear (q);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-cmp_ui.c b/tests/mpq/t-cmp_ui.c

new file mode 100644 (file)

index 0000000..3768a77
--- /dev/null
+++ b/tests/mpq/t-cmp_ui.c
@@ -0,0 +1,108 @@
+/* Test mpq_cmp_ui.
+
+Copyright 1996, 1997, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define NUM(x) (&((x)->_mp_num))
+#define DEN(x) (&((x)->_mp_den))
+
+#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
+
+int
+ref_mpq_cmp_ui (mpq_t a, unsigned long int bn, unsigned long int bd)
+{
+  mpz_t ai, bi;
+  int cc;
+
+  mpz_init (ai);
+  mpz_init (bi);
+
+  mpz_mul_ui (ai, NUM (a), bd);
+  mpz_mul_ui (bi, DEN (a), bn);
+  cc = mpz_cmp (ai, bi);
+  mpz_clear (ai);
+  mpz_clear (bi);
+  return cc;
+}
+
+#ifndef SIZE
+#define SIZE 8 /* increasing this lowers the probabilty of finding an error */
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a, b;
+  mp_size_t size;
+  int reps = 10000;
+  int i;
+  int cc, ccref;
+  unsigned long int bn, bd;
+
+  tests_start ();
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpq_init (a);
+  mpq_init (b);
+
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (NUM (a), size);
+      do
+       {
+         size = urandom () % SIZE - SIZE/2;
+         mpz_random2 (DEN (a), size);
+       }
+      while (mpz_cmp_ui (DEN (a), 0) == 0);
+
+      mpz_random2 (NUM (b), (mp_size_t) 1);
+      mpz_mod_ui (NUM (b), NUM (b), ~(unsigned long int) 0);
+      mpz_add_ui (NUM (b), NUM (b), 1);
+
+      mpz_random2 (DEN (b), (mp_size_t) 1);
+      mpz_mod_ui (DEN (b), DEN (b), ~(unsigned long int) 0);
+      mpz_add_ui (DEN (b), DEN (b), 1);
+
+      mpq_canonicalize (a);
+      mpq_canonicalize (b);
+
+      bn = mpz_get_ui (NUM (b));
+      bd = mpz_get_ui (DEN (b));
+
+      ccref = ref_mpq_cmp_ui (a, bn, bd);
+      cc = mpq_cmp_ui (a, bn, bd);
+
+      if (SGN (ccref) != SGN (cc))
+       abort ();
+    }
+
+  mpq_clear (a);
+  mpq_clear (b);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-equal.c b/tests/mpq/t-equal.c

new file mode 100644 (file)

index 0000000..982b143
--- /dev/null
+++ b/tests/mpq/t-equal.c
@@ -0,0 +1,144 @@
+/* Test mpq_equal.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpq_srcptr x, mpq_srcptr y, int want)
+{
+  int  got;
+
+  MPQ_CHECK_FORMAT (x);
+  MPQ_CHECK_FORMAT (y);
+
+  got = mpq_equal (x, y);
+  if ((got != 0) != (want != 0))
+    {
+      printf ("mpq_equal got %d want %d\n", got, want);
+      mpq_trace ("x", x);
+      mpq_trace ("y", y);
+      abort ();
+    }
+}
+
+
+void
+check_all (mpq_ptr x, mpq_ptr y, int want)
+{
+  check_one (x, y, want);
+  check_one (y, x, want);
+
+  mpq_neg (x, x);
+  mpq_neg (y, y);
+
+  check_one (x, y, want);
+  check_one (y, x, want);
+}
+
+
+#define SET4Z(z, size,l3,l2,l1,l0) \
+  SIZ(z) = size; PTR(z)[3] = l3; PTR(z)[2] = l2; PTR(z)[1] = l1; PTR(z)[0] = l0
+
+#define SET4(q, nsize,n3,n2,n1,n0, dsize,d3,d2,d1,d0)   \
+  SET4Z (mpq_numref(q), nsize,n3,n2,n1,n0);             \
+  SET4Z (mpq_denref(q), dsize,d3,d2,d1,d0)
+
+
+/* Exercise various combinations of same and slightly different values. */
+
+void
+check_various (void)
+{
+  mpq_t  x, y;
+
+  mpq_init (x);
+  mpq_init (y);
+
+  mpz_realloc (mpq_numref(x), (mp_size_t) 20);
+  mpz_realloc (mpq_denref(x), (mp_size_t) 20);
+  mpz_realloc (mpq_numref(y), (mp_size_t) 20);
+  mpz_realloc (mpq_denref(y), (mp_size_t) 20);
+
+  /* 0 == 0 */
+  SET4 (x, 0,13,12,11,10, 1,23,22,21,1);
+  SET4 (y, 0,33,32,31,30, 1,43,42,41,1);
+  check_all (x, y, 1);
+
+  /* 83/99 == 83/99 */
+  SET4 (x, 1,13,12,11,83, 1,23,22,21,99);
+  SET4 (y, 1,33,32,31,83, 1,43,42,41,99);
+  check_all (x, y, 1);
+
+  /* 1:2:3:4/5:6:7 == 1:2:3:4/5:6:7 */
+  SET4 (x, 4,1,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 1);
+
+  /* various individual changes making != */
+  SET4 (x, 4,1,2,3,667, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,666,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,666,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+#if GMP_NUMB_BITS != 62
+  SET4 (x, 4,667,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+#endif
+  SET4 (x, 4,1,2,3,4, 3,88,5,6,667);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,3,4, 3,88,5,667,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 4,1,2,3,4, 3,88,666,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, -4,1,2,3,4, 3,88,5,6,7);
+  SET4 (y,  4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+  SET4 (x, 1,1,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
+  check_all (x, y, 0);
+
+  mpq_clear (x);
+  mpq_clear (y);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-get_d.c b/tests/mpq/t-get_d.c

new file mode 100644 (file)

index 0000000..f116189
--- /dev/null
+++ b/tests/mpq/t-get_d.c
@@ -0,0 +1,272 @@
+/* Test mpq_get_d and mpq_set_d
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 8
+#endif
+
+/* VAX D floats only have an 8 bit signed exponent, so anything 2^128 or
+   bigger will overflow, that being 4 limbs. */
+#if defined (__vax__) && SIZE > 4
+#undef SIZE
+#define SIZE 4
+#define EPSIZE 3
+#else
+#define EPSIZE SIZE
+#endif
+
+void dump __GMP_PROTO ((mpq_t));
+
+void
+check_monotonic (int argc, char **argv)
+{
+  mpq_t a;
+  mp_size_t size;
+  int reps = 100;
+  int i, j;
+  double last_d, new_d;
+  mpq_t qlast_d, qnew_d;
+  mpq_t eps;
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  /* The idea here is to test the monotonousness of mpq_get_d by adding
+     numbers to the numerator and denominator.  */
+
+  mpq_init (a);
+  mpq_init (eps);
+  mpq_init (qlast_d);
+  mpq_init (qnew_d);
+
+  for (i = 0; i < reps; i++)
+    {
+      size = urandom () % SIZE - SIZE/2;
+      mpz_random2 (mpq_numref (a), size);
+      do
+       {
+         size = urandom () % SIZE - SIZE/2;
+         mpz_random2 (mpq_denref (a), size);
+       }
+      while (mpz_cmp_ui (mpq_denref (a), 0) == 0);
+
+      mpq_canonicalize (a);
+
+      last_d = mpq_get_d (a);
+      mpq_set_d (qlast_d, last_d);
+      for (j = 0; j < 10; j++)
+       {
+         size = urandom () % EPSIZE + 1;
+         mpz_random2 (mpq_numref (eps), size);
+         size = urandom () % EPSIZE + 1;
+         mpz_random2 (mpq_denref (eps), size);
+         mpq_canonicalize (eps);
+
+         mpq_add (a, a, eps);
+         mpq_canonicalize (a);
+         new_d = mpq_get_d (a);
+         if (last_d > new_d)
+           {
+             printf ("\nERROR (test %d/%d): bad mpq_get_d results\n", i, j);
+             printf ("last: %.16g\n", last_d);
+             printf (" new: %.16g\n", new_d); dump (a);
+             abort ();
+           }
+         mpq_set_d (qnew_d, new_d);
+         MPQ_CHECK_FORMAT (qnew_d);
+         if (mpq_cmp (qlast_d, qnew_d) > 0)
+           {
+             printf ("ERROR (test %d/%d): bad mpq_set_d results\n", i, j);
+             printf ("last: %.16g\n", last_d); dump (qlast_d);
+             printf (" new: %.16g\n", new_d); dump (qnew_d);
+             abort ();
+           }
+         last_d = new_d;
+         mpq_set (qlast_d, qnew_d);
+       }
+    }
+
+  mpq_clear (a);
+  mpq_clear (eps);
+  mpq_clear (qlast_d);
+  mpq_clear (qnew_d);
+}
+
+double
+my_ldexp (double d, int e)
+{
+  for (;;)
+    {
+      if (e > 0)
+       {
+         if (e >= 16)
+           {
+             d *= 65536.0;
+             e -= 16;
+           }
+         else
+           {
+             d *= 2.0;
+             e -= 1;
+           }
+       }
+      else if (e < 0)
+       {
+
+         if (e <= -16)
+           {
+             d /= 65536.0;
+             e += 16;
+           }
+         else
+           {
+             d /= 2.0;
+             e += 1;
+           }
+       }
+      else
+       return d;
+    }
+}
+
+void
+check_random (int argc, char **argv)
+{
+  double d, d2, nd, dd;
+  mpq_t q;
+  mp_limb_t rp[LIMBS_PER_DOUBLE + 1];
+  int test, reps = 100000;
+  int i;
+
+  if (argc == 2)
+     reps = 100 * atoi (argv[1]);
+
+  mpq_init (q);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpn_random2 (rp, LIMBS_PER_DOUBLE + 1);
+      d = 0.0;
+      for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)
+       d = d * MP_BASE_AS_DOUBLE + rp[i];
+      d = my_ldexp (d, (int) (rp[LIMBS_PER_DOUBLE] % 1000) - 500);
+      mpq_set_d (q, d);
+      nd = mpz_get_d (mpq_numref (q));
+      dd = mpz_get_d (mpq_denref (q));
+      d2 = nd / dd;
+      if (d != d2)
+       {
+         printf ("ERROR (check_random test %d): bad mpq_set_d results\n", test);
+         printf ("%.16g\n", d);
+         printf ("%.16g\n", d2);
+         abort ();
+       }
+    }
+  mpq_clear (q);
+}
+
+void
+dump (mpq_t x)
+{
+  mpz_out_str (stdout, 10, mpq_numref (x));
+  printf ("/");
+  mpz_out_str (stdout, 10, mpq_denref (x));
+  printf ("\n");
+}
+
+/* Check various values 2^n and 1/2^n. */
+void
+check_onebit (void)
+{
+  static const long data[] = {
+    -3*GMP_NUMB_BITS-1, -3*GMP_NUMB_BITS, -3*GMP_NUMB_BITS+1,
+    -2*GMP_NUMB_BITS-1, -2*GMP_NUMB_BITS, -2*GMP_NUMB_BITS+1,
+    -GMP_NUMB_BITS-1, -GMP_NUMB_BITS, -GMP_NUMB_BITS+1,
+    -5, -2, -1, 0, 1, 2, 5,
+    GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,
+    2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,
+    3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1,
+  };
+
+  int     i, neg;
+  long    exp, l;
+  mpq_t   q;
+  double  got, want;
+
+  mpq_init (q);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      exp = data[i];
+
+      mpq_set_ui (q, 1L, 1L);
+      if (exp >= 0)
+       mpq_mul_2exp (q, q, exp);
+      else
+       mpq_div_2exp (q, q, -exp);
+
+      want = 1.0;
+      for (l = 0; l < exp; l++)
+       want *= 2.0;
+      for (l = 0; l > exp; l--)
+       want /= 2.0;
+
+      for (neg = 0; neg <= 1; neg++)
+       {
+         if (neg)
+           {
+             mpq_neg (q, q);
+             want = -want;
+           }
+
+         got = mpq_get_d (q);
+
+         if (got != want)
+           {
+             printf    ("mpq_get_d wrong on %s2**%ld\n", neg ? "-" : "", exp);
+             mpq_trace ("   q    ", q);
+             d_trace   ("   want ", want);
+             d_trace   ("   got  ", got);
+             abort();
+           }
+       }
+    }
+  mpq_clear (q);
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_onebit ();
+  check_monotonic (argc, argv);
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-get_str.c b/tests/mpq/t-get_str.c

new file mode 100644 (file)

index 0000000..3a13ffa
--- /dev/null
+++ b/tests/mpq/t-get_str.c
@@ -0,0 +1,143 @@
+/* Test mpq_get_str.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpq_srcptr q, int base, const char *want)
+{
+  char    *str, *ret;
+  size_t  str_alloc;
+
+  MPQ_CHECK_FORMAT (q);
+  mp_trace_base = base;
+
+  str_alloc =
+    mpz_sizeinbase (mpq_numref(q), ABS(base)) +
+    mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3;
+
+  str = mpq_get_str (NULL, base, q);
+  if (strlen(str)+1 > str_alloc)
+    {
+      printf ("mpq_get_str size bigger than should be (passing NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  size %lu \"%s\"\n", (unsigned long)  strlen(str)+1, str);
+      printf ("  want size %lu\n", (unsigned long) str_alloc);
+      abort ();
+    }
+  if (strcmp (str, want) != 0)
+    {
+      printf ("mpq_get_str wrong (passing NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  \"%s\"\n", str);
+      printf ("  want \"%s\"\n", want);
+      mpq_trace ("  q", q);
+      abort ();
+    }
+  (*__gmp_free_func) (str, strlen (str) + 1);
+
+  str = (char *) (*__gmp_allocate_func) (str_alloc);
+
+  ret = mpq_get_str (str, base, q);
+  if (str != ret)
+    {
+      printf ("mpq_get_str wrong return value (passing non-NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  %p\n", ret);
+      printf ("  want %p\n", want);
+      abort ();
+    }
+  if (strcmp (str, want) != 0)
+    {
+      printf ("mpq_get_str wrong (passing non-NULL)\n");
+      printf ("  base %d\n", base);
+      printf ("  got  \"%s\"\n", str);
+      printf ("  want \"%s\"\n", want);
+      abort ();
+    }
+  (*__gmp_free_func) (str, str_alloc);
+}
+
+
+void
+check_all (mpq_srcptr q, int base, const char *want)
+{
+  char  *s;
+
+  check_one (q, base, want);
+
+  s = __gmp_allocate_strdup (want);
+  strtoupper (s);
+  check_one (q, -base, s);
+  (*__gmp_free_func) (s, strlen(s)+1);
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    int         base;
+    const char  *num;
+    const char  *den;
+    const char  *want;
+  } data[] = {
+    { 10, "0", "1", "0" },
+    { 10, "1", "1", "1" },
+
+    { 16, "ffffffff", "1", "ffffffff" },
+    { 16, "ffffffffffffffff", "1", "ffffffffffffffff" },
+
+    { 16, "1", "ffffffff", "1/ffffffff" },
+    { 16, "1", "ffffffffffffffff", "1/ffffffffffffffff" },
+    { 16, "1", "10000000000000003", "1/10000000000000003" },
+
+    { 10, "12345678901234567890", "9876543210987654323",
+      "12345678901234567890/9876543210987654323" },
+  };
+
+  mpq_t  q;
+  int    i;
+
+  mpq_init (q);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (mpq_numref(q), data[i].num, data[i].base);
+      mpz_set_str_or_abort (mpq_denref(q), data[i].den, data[i].base);
+      check_all (q, data[i].base, data[i].want);
+    }
+  mpq_clear (q);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-inp_str.c b/tests/mpq/t-inp_str.c

new file mode 100644 (file)

index 0000000..24d9642
--- /dev/null
+++ b/tests/mpq/t-inp_str.c
@@ -0,0 +1,172 @@
+/* Test mpq_inp_str.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>   /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define FILENAME  "t-inp_str.tmp"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *inp;
+    int         base;
+    const char  *want;
+    int         want_nread;
+
+  } data[] = {
+
+    { "0",   10, "0", 1 },
+    { "0/1", 10, "0", 3 },
+
+    { "0/",   10, "0", 0 },
+    { "/123", 10, "0", 0 },
+    { "blah", 10, "0", 0 },
+    { "123/blah", 10, "0", 0 },
+    { "5 /8", 10, "5", 1 },
+    { "5/ 8", 10, "0", 0 },
+
+    {  "ff", 16,  "255", 2 },
+    { "-ff", 16, "-255", 3 },
+    {  "FF", 16,  "255", 2 },
+    { "-FF", 16, "-255", 3 },
+
+    { "z", 36, "35", 1 },
+    { "Z", 36, "35", 1 },
+
+    {  "0x0",    0,   "0", 3 },
+    {  "0x10",   0,  "16", 4 },
+    { "-0x0",    0,   "0", 4 },
+    { "-0x10",   0, "-16", 5 },
+    { "-0x10/5", 0, "-16/5", 7 },
+
+    {  "00",   0,  "0", 2 },
+    {  "010",  0,  "8", 3 },
+    { "-00",   0,  "0", 3 },
+    { "-010",  0, "-8", 4 },
+  };
+
+  mpq_t  got, want;
+  long   ftell_nread;
+  int    i, post, j, got_nread;
+  FILE   *fp;
+
+  mpq_init (got);
+  mpq_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (post = 0; post <= 2; post++)
+       {
+         mpq_set_str_or_abort (want, data[i].want, 0);
+         MPQ_CHECK_FORMAT (want);
+
+         fp = fopen (FILENAME, "w+");
+         ASSERT_ALWAYS (fp != NULL);
+         fputs (data[i].inp, fp);
+         for (j = 0; j < post; j++)
+           putc (' ', fp);
+         fflush (fp);
+         ASSERT_ALWAYS (! ferror(fp));
+
+         rewind (fp);
+         got_nread = mpq_inp_str (got, fp, data[i].base);
+
+         if (got_nread != 0)
+           {
+             ftell_nread = ftell (fp);
+             if (got_nread != ftell_nread)
+               {
+                 printf ("mpq_inp_str nread wrong\n");
+                 printf ("  inp          \"%s\"\n", data[i].inp);
+                 printf ("  base         %d\n", data[i].base);
+                 printf ("  got_nread    %d\n", got_nread);
+                 printf ("  ftell_nread  %ld\n", ftell_nread);
+                 abort ();
+               }
+           }
+
+         if (post == 0 && data[i].want_nread == strlen(data[i].inp))
+           {
+             int  c = getc(fp);
+             if (c != EOF)
+               {
+                 printf ("mpq_inp_str didn't read to EOF\n");
+                 printf ("  inp         \"%s\"\n", data[i].inp);
+                 printf ("  base        %d\n", data[i].base);
+                 printf ("  c '%c' %#x\n", c, c);
+                 abort ();
+               }
+           }
+
+         if (got_nread != data[i].want_nread)
+           {
+             printf ("mpq_inp_str nread wrong\n");
+             printf ("  inp         \"%s\"\n", data[i].inp);
+             printf ("  base        %d\n", data[i].base);
+             printf ("  got_nread   %d\n", got_nread);
+             printf ("  want_nread  %d\n", data[i].want_nread);
+             abort ();
+           }
+
+         MPQ_CHECK_FORMAT (got);
+
+         if (! mpq_equal (got, want))
+           {
+             printf ("mpq_inp_str wrong result\n");
+             printf ("  inp   \"%s\"\n", data[i].inp);
+             printf ("  base  %d\n", data[i].base);
+             mpq_trace ("  got ",  got);
+             mpq_trace ("  want", want);
+             abort ();
+           }
+
+         ASSERT_ALWAYS (fclose (fp) == 0);
+       }
+    }
+
+  mpq_clear (got);
+  mpq_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  unlink (FILENAME);
+  tests_end ();
+
+  exit (0);
+}
diff --git a/tests/mpq/t-md_2exp.c b/tests/mpq/t-md_2exp.c

new file mode 100644 (file)

index 0000000..3c1f9f4
--- /dev/null
+++ b/tests/mpq/t-md_2exp.c
@@ -0,0 +1,182 @@
+/* Test mpq_mul_2exp and mpq_div_2exp.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+struct pair_t {
+  const char     *num;
+  const char     *den;
+};
+
+int
+main (void)
+{
+  static const struct {
+    struct pair_t  left;
+    unsigned long  n;
+    struct pair_t  right;
+
+  } data[] = {
+    { {"0","1"}, 0, {"0","1"} },
+    { {"0","1"}, 1, {"0","1"} },
+    { {"0","1"}, 2, {"0","1"} },
+
+    { {"1","1"}, 0, {"1","1"} },
+    { {"1","1"}, 1, {"2","1"} },
+    { {"1","1"}, 2, {"4","1"} },
+    { {"1","1"}, 3, {"8","1"} },
+
+    { {"1","1"}, 31, {"0x80000000","1"} },
+    { {"1","1"}, 32, {"0x100000000","1"} },
+    { {"1","1"}, 33, {"0x200000000","1"} },
+    { {"1","1"}, 63, {"0x8000000000000000","1"} },
+    { {"1","1"}, 64, {"0x10000000000000000","1"} },
+    { {"1","1"}, 65, {"0x20000000000000000","1"} },
+    { {"1","1"}, 95, {"0x800000000000000000000000","1"} },
+    { {"1","1"}, 96, {"0x1000000000000000000000000","1"} },
+    { {"1","1"}, 97, {"0x2000000000000000000000000","1"} },
+    { {"1","1"}, 127, {"0x80000000000000000000000000000000","1"} },
+    { {"1","1"}, 128, {"0x100000000000000000000000000000000","1"} },
+    { {"1","1"}, 129, {"0x200000000000000000000000000000000","1"} },
+
+    { {"1","2"}, 31, {"0x40000000","1"} },
+    { {"1","2"}, 32, {"0x80000000","1"} },
+    { {"1","2"}, 33, {"0x100000000","1"} },
+    { {"1","2"}, 63, {"0x4000000000000000","1"} },
+    { {"1","2"}, 64, {"0x8000000000000000","1"} },
+    { {"1","2"}, 65, {"0x10000000000000000","1"} },
+    { {"1","2"}, 95, {"0x400000000000000000000000","1"} },
+    { {"1","2"}, 96, {"0x800000000000000000000000","1"} },
+    { {"1","2"}, 97, {"0x1000000000000000000000000","1"} },
+    { {"1","2"}, 127, {"0x40000000000000000000000000000000","1"} },
+    { {"1","2"}, 128, {"0x80000000000000000000000000000000","1"} },
+    { {"1","2"}, 129, {"0x100000000000000000000000000000000","1"} },
+
+    { {"1","0x80000000"}, 30, {"1","2"} },
+    { {"1","0x80000000"}, 31, {"1","1"} },
+    { {"1","0x80000000"}, 32, {"2","1"} },
+    { {"1","0x80000000"}, 33, {"4","1"} },
+    { {"1","0x80000000"}, 62, {"0x80000000","1"} },
+    { {"1","0x80000000"}, 63, {"0x100000000","1"} },
+    { {"1","0x80000000"}, 64, {"0x200000000","1"} },
+    { {"1","0x80000000"}, 94, {"0x8000000000000000","1"} },
+    { {"1","0x80000000"}, 95, {"0x10000000000000000","1"} },
+    { {"1","0x80000000"}, 96, {"0x20000000000000000","1"} },
+    { {"1","0x80000000"}, 126, {"0x800000000000000000000000","1"} },
+    { {"1","0x80000000"}, 127, {"0x1000000000000000000000000","1"} },
+    { {"1","0x80000000"}, 128, {"0x2000000000000000000000000","1"} },
+
+    { {"1","0x100000000"}, 1, {"1","0x80000000"} },
+    { {"1","0x100000000"}, 2, {"1","0x40000000"} },
+    { {"1","0x100000000"}, 3, {"1","0x20000000"} },
+
+    { {"1","0x10000000000000000"}, 1, {"1","0x8000000000000000"} },
+    { {"1","0x10000000000000000"}, 2, {"1","0x4000000000000000"} },
+    { {"1","0x10000000000000000"}, 3, {"1","0x2000000000000000"} },
+  };
+
+  void (*fun) __GMP_PROTO ((mpq_ptr, mpq_srcptr, unsigned long));
+  const struct pair_t  *p_start, *p_want;
+  const char  *name;
+  mpq_t    sep, got, want;
+  mpq_ptr  q;
+  int      i, muldiv, sign, overlap;
+
+  tests_start ();
+
+  mpq_init (sep);
+  mpq_init (got);
+  mpq_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (muldiv = 0; muldiv < 2; muldiv++)
+        {
+          if (muldiv == 0)
+            {
+              fun = mpq_mul_2exp;
+              name = "mpq_mul_2exp";
+              p_start = &data[i].left;
+              p_want = &data[i].right;
+            }
+          else
+            {
+              fun = mpq_div_2exp;
+              name = "mpq_div_2exp";
+              p_start = &data[i].right;
+              p_want = &data[i].left;
+            }
+
+          for (sign = 0; sign <= 1; sign++)
+            {
+              mpz_set_str_or_abort (mpq_numref(want), p_want->num, 0);
+              mpz_set_str_or_abort (mpq_denref(want), p_want->den, 0);
+              if (sign)
+                mpq_neg (want, want);
+
+              for (overlap = 0; overlap <= 1; overlap++)
+                {
+                  q = overlap ? got : sep;
+
+                  /* initial garbage in "got" */
+                  mpq_set_ui (got, 123L, 456L);
+
+                  mpz_set_str_or_abort (mpq_numref(q), p_start->num, 0);
+                  mpz_set_str_or_abort (mpq_denref(q), p_start->den, 0);
+                  if (sign)
+                    mpq_neg (q, q);
+
+                  (*fun) (got, q, data[i].n);
+                  MPQ_CHECK_FORMAT (got);
+
+                  if (! mpq_equal (got, want))
+                    {
+                      printf ("%s wrong at data[%d], sign %d, overlap %d\n",
+                              name, i, sign, overlap);
+                      printf ("   num \"%s\"\n", p_start->num);
+                      printf ("   den \"%s\"\n", p_start->den);
+                      printf ("   n   %lu\n", data[i].n);
+
+                      printf ("   got  ");
+                      mpq_out_str (stdout, 16, got);
+                      printf (" (hex)\n");
+
+                      printf ("   want ");
+                      mpq_out_str (stdout, 16, want);
+                      printf (" (hex)\n");
+
+                      abort ();
+                    }
+                }
+            }
+        }
+    }
+
+  mpq_clear (sep);
+  mpq_clear (got);
+  mpq_clear (want);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/t-set_f.c b/tests/mpq/t-set_f.c

new file mode 100644 (file)

index 0000000..2b30f5e
--- /dev/null
+++ b/tests/mpq/t-set_f.c
@@ -0,0 +1,170 @@
+/* Test mpq_set_f.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+#if GMP_NAIL_BITS == 0
+  static const struct {
+    int         f_base;
+    const char  *f;
+    int         z_base;
+    const char  *want_num;
+    const char  *want_den;
+
+  } data[] = {
+
+    { -2, "0",    16, "0", "1" },
+    { -2, "1",    16, "1", "1" },
+    { -2, "1@1",  16, "2", "1" },
+    { -2, "1@2",  16, "4", "1" },
+    { -2, "1@3",  16, "8", "1" },
+
+    { -2, "1@30", 16,  "40000000", "1" },
+    { -2, "1@31", 16,  "80000000", "1" },
+    { -2, "1@32", 16, "100000000", "1" },
+    { -2, "1@33", 16, "200000000", "1" },
+    { -2, "1@34", 16, "400000000", "1" },
+
+    { -2, "1@62", 16,  "4000000000000000", "1" },
+    { -2, "1@63", 16,  "8000000000000000", "1" },
+    { -2, "1@64", 16, "10000000000000000", "1" },
+    { -2, "1@65", 16, "20000000000000000", "1" },
+    { -2, "1@66", 16, "40000000000000000", "1" },
+
+    { -2, "1@126", 16,  "40000000000000000000000000000000", "1" },
+    { -2, "1@127", 16,  "80000000000000000000000000000000", "1" },
+    { -2, "1@128", 16, "100000000000000000000000000000000", "1" },
+    { -2, "1@129", 16, "200000000000000000000000000000000", "1" },
+    { -2, "1@130", 16, "400000000000000000000000000000000", "1" },
+
+    { -2, "1@-1",  16, "1", "2" },
+    { -2, "1@-2",  16, "1", "4" },
+    { -2, "1@-3",  16, "1", "8" },
+
+    { -2, "1@-30", 16, "1",  "40000000" },
+    { -2, "1@-31", 16, "1",  "80000000" },
+    { -2, "1@-32", 16, "1", "100000000" },
+    { -2, "1@-33", 16, "1", "200000000" },
+    { -2, "1@-34", 16, "1", "400000000" },
+
+    { -2, "1@-62", 16, "1",  "4000000000000000" },
+    { -2, "1@-63", 16, "1",  "8000000000000000" },
+    { -2, "1@-64", 16, "1", "10000000000000000" },
+    { -2, "1@-65", 16, "1", "20000000000000000" },
+    { -2, "1@-66", 16, "1", "40000000000000000" },
+
+    { -2, "1@-126", 16, "1",  "40000000000000000000000000000000" },
+    { -2, "1@-127", 16, "1",  "80000000000000000000000000000000" },
+    { -2, "1@-128", 16, "1", "100000000000000000000000000000000" },
+    { -2, "1@-129", 16, "1", "200000000000000000000000000000000" },
+    { -2, "1@-130", 16, "1", "400000000000000000000000000000000" },
+
+    { -2, "1@-30", 16, "1",  "40000000" },
+    { -2, "1@-31", 16, "1",  "80000000" },
+    { -2, "1@-32", 16, "1", "100000000" },
+    { -2, "1@-33", 16, "1", "200000000" },
+    { -2, "1@-34", 16, "1", "400000000" },
+
+    { -2, "11@-62", 16, "3",  "4000000000000000" },
+    { -2, "11@-63", 16, "3",  "8000000000000000" },
+    { -2, "11@-64", 16, "3", "10000000000000000" },
+    { -2, "11@-65", 16, "3", "20000000000000000" },
+    { -2, "11@-66", 16, "3", "40000000000000000" },
+
+    { 16, "80000000.00000001", 16, "8000000000000001", "100000000" },
+    { 16, "80000000.00000008", 16, "1000000000000001",  "20000000" },
+    { 16, "80000000.8",        16, "100000001", "2" },
+
+  };
+
+  mpf_t  f;
+  mpq_t  got;
+  mpz_t  want_num, want_den;
+  int    i, neg;
+
+  tests_start ();
+
+  mpf_init2 (f, 1024L);
+  mpq_init (got);
+  mpz_init (want_num);
+  mpz_init (want_den);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (neg = 0; neg <= 1; neg++)
+        {
+          mpf_set_str_or_abort (f, data[i].f, data[i].f_base);
+          mpz_set_str_or_abort (want_num, data[i].want_num, data[i].z_base);
+          mpz_set_str_or_abort (want_den, data[i].want_den, data[i].z_base);
+
+          if (neg)
+            {
+              mpf_neg (f, f);
+              mpz_neg (want_num, want_num);
+            }
+
+          mpq_set_f (got, f);
+          MPQ_CHECK_FORMAT (got);
+
+          if (mpz_cmp (mpq_numref(got), want_num) != 0
+              || mpz_cmp (mpq_denref(got), want_den) != 0)
+            {
+              printf ("wrong at data[%d]\n", i);
+              printf ("   f_base %d, z_base %d\n",
+                      data[i].f_base, data[i].z_base);
+
+              printf ("   f \"%s\" hex ", data[i].f);
+              mpf_out_str (stdout, 16, 0, f);
+              printf ("\n");
+
+              printf ("   want num 0x");
+              mpz_out_str (stdout, 16, want_num);
+              printf ("\n");
+              printf ("   want den 0x");
+              mpz_out_str (stdout, 16, want_den);
+              printf ("\n");
+
+              printf ("   got num 0x");
+              mpz_out_str (stdout, 16, mpq_numref(got));
+              printf ("\n");
+              printf ("   got den 0x");
+              mpz_out_str (stdout, 16, mpq_denref(got));
+              printf ("\n");
+
+              abort ();
+            }
+        }
+    }
+
+  mpf_clear (f);
+  mpq_clear (got);
+  mpz_clear (want_num);
+  mpz_clear (want_den);
+
+  tests_end ();
+#endif
+  exit (0);
+}
diff --git a/tests/mpq/t-set_str.c b/tests/mpq/t-set_str.c

new file mode 100644 (file)

index 0000000..9f95d26
--- /dev/null
+++ b/tests/mpq/t-set_str.c
@@ -0,0 +1,103 @@
+/* Test mpq_set_str.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpq_srcptr want, int base, const char *str)
+{
+  mpq_t   got;
+
+  MPQ_CHECK_FORMAT (want);
+  mp_trace_base = base;
+
+  mpq_init (got);
+
+  if (mpq_set_str (got, str, base) != 0)
+    {
+      printf ("mpq_set_str unexpectedly failed\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      abort ();
+    }
+  MPQ_CHECK_FORMAT (got);
+
+  if (! mpq_equal (got, want))
+    {
+      printf ("mpq_set_str wrong\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      mpq_trace ("got ", got);
+      mpq_trace ("want", want);
+      abort ();
+    }
+
+  mpq_clear (got);
+}
+
+void
+check_samples (void)
+{
+  mpq_t  q;
+
+  mpq_init (q);
+
+  mpq_set_ui (q, 0L, 1L);
+  check_one (q, 10, "0");
+  check_one (q, 10, "0/1");
+  check_one (q, 10, "0  / 1");
+  check_one (q, 0, "0x0/ 1");
+  check_one (q, 0, "0x0/ 0x1");
+  check_one (q, 0, "0 / 0x1");
+
+  check_one (q, 10, "-0");
+  check_one (q, 10, "-0/1");
+  check_one (q, 10, "-0  / 1");
+  check_one (q, 0, "-0x0/ 1");
+  check_one (q, 0, "-0x0/ 0x1");
+  check_one (q, 0, "-0 / 0x1");
+
+  mpq_set_ui (q, 255L, 256L);
+  check_one (q, 10, "255/256");
+  check_one (q, 0,  "0xFF/0x100");
+  check_one (q, 16, "FF/100");
+
+  mpq_neg (q, q);
+  check_one (q, 10, "-255/256");
+  check_one (q, 0,  "-0xFF/0x100");
+  check_one (q, 16, "-FF/100");
+
+  mpq_clear (q);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_samples ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/Makefile.am b/tests/mpz/Makefile.am

new file mode 100644 (file)

index 0000000..f081bfb
--- /dev/null
+++ b/tests/mpz/Makefile.am
@@ -0,0 +1,43 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2009, 2012 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-addsub t-cmp t-mul t-mul_i t-tdiv t-tdiv_ui t-fdiv   \
+  t-fdiv_ui t-cdiv_ui t-gcd t-gcd_ui t-lcm t-invert dive dive_ui t-sqrtrem \
+  convert io t-inp_str logic bit t-powm t-powm_ui t-pow t-div_2exp reuse   \
+  t-root t-perfsqr t-perfpow t-jac t-bin t-get_d t-get_d_2exp t-get_si \
+  t-set_d t-set_si                                                     \
+  t-fac_ui t-fib_ui t-lucnum_ui t-scan t-fits                           \
+  t-divis t-divis_2exp t-cong t-cong_2exp t-sizeinbase t-set_str        \
+  t-aorsmul t-cmp_d t-cmp_si t-hamdist t-oddeven t-popcount t-set_f     \
+  t-io_raw t-import t-export t-pprime_p t-nextprime
+
+TESTS = $(check_PROGRAMS)
+
+# Temporary files used by the tests.  Removed automatically if the tests
+# pass, but ensure they're cleaned if they fail.
+#
+CLEANFILES = *.tmp
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/mpz/Makefile.in b/tests/mpz/Makefile.in

new file mode 100644 (file)

index 0000000..aa2349c
--- /dev/null
+++ b/tests/mpz/Makefile.in
@@ -0,0 +1,1280 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2009, 2012 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = t-addsub$(EXEEXT) t-cmp$(EXEEXT) t-mul$(EXEEXT) \
+       t-mul_i$(EXEEXT) t-tdiv$(EXEEXT) t-tdiv_ui$(EXEEXT) \
+       t-fdiv$(EXEEXT) t-fdiv_ui$(EXEEXT) t-cdiv_ui$(EXEEXT) \
+       t-gcd$(EXEEXT) t-gcd_ui$(EXEEXT) t-lcm$(EXEEXT) \
+       t-invert$(EXEEXT) dive$(EXEEXT) dive_ui$(EXEEXT) \
+       t-sqrtrem$(EXEEXT) convert$(EXEEXT) io$(EXEEXT) \
+       t-inp_str$(EXEEXT) logic$(EXEEXT) bit$(EXEEXT) t-powm$(EXEEXT) \
+       t-powm_ui$(EXEEXT) t-pow$(EXEEXT) t-div_2exp$(EXEEXT) \
+       reuse$(EXEEXT) t-root$(EXEEXT) t-perfsqr$(EXEEXT) \
+       t-perfpow$(EXEEXT) t-jac$(EXEEXT) t-bin$(EXEEXT) \
+       t-get_d$(EXEEXT) t-get_d_2exp$(EXEEXT) t-get_si$(EXEEXT) \
+       t-set_d$(EXEEXT) t-set_si$(EXEEXT) t-fac_ui$(EXEEXT) \
+       t-fib_ui$(EXEEXT) t-lucnum_ui$(EXEEXT) t-scan$(EXEEXT) \
+       t-fits$(EXEEXT) t-divis$(EXEEXT) t-divis_2exp$(EXEEXT) \
+       t-cong$(EXEEXT) t-cong_2exp$(EXEEXT) t-sizeinbase$(EXEEXT) \
+       t-set_str$(EXEEXT) t-aorsmul$(EXEEXT) t-cmp_d$(EXEEXT) \
+       t-cmp_si$(EXEEXT) t-hamdist$(EXEEXT) t-oddeven$(EXEEXT) \
+       t-popcount$(EXEEXT) t-set_f$(EXEEXT) t-io_raw$(EXEEXT) \
+       t-import$(EXEEXT) t-export$(EXEEXT) t-pprime_p$(EXEEXT) \
+       t-nextprime$(EXEEXT)
+subdir = tests/mpz
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+bit_SOURCES = bit.c
+bit_OBJECTS = bit$U.$(OBJEXT)
+bit_LDADD = $(LDADD)
+bit_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+convert_SOURCES = convert.c
+convert_OBJECTS = convert$U.$(OBJEXT)
+convert_LDADD = $(LDADD)
+convert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+dive_SOURCES = dive.c
+dive_OBJECTS = dive$U.$(OBJEXT)
+dive_LDADD = $(LDADD)
+dive_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+dive_ui_SOURCES = dive_ui.c
+dive_ui_OBJECTS = dive_ui$U.$(OBJEXT)
+dive_ui_LDADD = $(LDADD)
+dive_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+io_SOURCES = io.c
+io_OBJECTS = io$U.$(OBJEXT)
+io_LDADD = $(LDADD)
+io_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+logic_SOURCES = logic.c
+logic_OBJECTS = logic$U.$(OBJEXT)
+logic_LDADD = $(LDADD)
+logic_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+reuse_SOURCES = reuse.c
+reuse_OBJECTS = reuse$U.$(OBJEXT)
+reuse_LDADD = $(LDADD)
+reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_addsub_SOURCES = t-addsub.c
+t_addsub_OBJECTS = t-addsub$U.$(OBJEXT)
+t_addsub_LDADD = $(LDADD)
+t_addsub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_aorsmul_SOURCES = t-aorsmul.c
+t_aorsmul_OBJECTS = t-aorsmul$U.$(OBJEXT)
+t_aorsmul_LDADD = $(LDADD)
+t_aorsmul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_bin_SOURCES = t-bin.c
+t_bin_OBJECTS = t-bin$U.$(OBJEXT)
+t_bin_LDADD = $(LDADD)
+t_bin_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cdiv_ui_SOURCES = t-cdiv_ui.c
+t_cdiv_ui_OBJECTS = t-cdiv_ui$U.$(OBJEXT)
+t_cdiv_ui_LDADD = $(LDADD)
+t_cdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_SOURCES = t-cmp.c
+t_cmp_OBJECTS = t-cmp$U.$(OBJEXT)
+t_cmp_LDADD = $(LDADD)
+t_cmp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_d_SOURCES = t-cmp_d.c
+t_cmp_d_OBJECTS = t-cmp_d$U.$(OBJEXT)
+t_cmp_d_LDADD = $(LDADD)
+t_cmp_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cmp_si_SOURCES = t-cmp_si.c
+t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_LDADD = $(LDADD)
+t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cong_SOURCES = t-cong.c
+t_cong_OBJECTS = t-cong$U.$(OBJEXT)
+t_cong_LDADD = $(LDADD)
+t_cong_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_cong_2exp_SOURCES = t-cong_2exp.c
+t_cong_2exp_OBJECTS = t-cong_2exp$U.$(OBJEXT)
+t_cong_2exp_LDADD = $(LDADD)
+t_cong_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_div_2exp_SOURCES = t-div_2exp.c
+t_div_2exp_OBJECTS = t-div_2exp$U.$(OBJEXT)
+t_div_2exp_LDADD = $(LDADD)
+t_div_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_divis_SOURCES = t-divis.c
+t_divis_OBJECTS = t-divis$U.$(OBJEXT)
+t_divis_LDADD = $(LDADD)
+t_divis_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_divis_2exp_SOURCES = t-divis_2exp.c
+t_divis_2exp_OBJECTS = t-divis_2exp$U.$(OBJEXT)
+t_divis_2exp_LDADD = $(LDADD)
+t_divis_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_export_SOURCES = t-export.c
+t_export_OBJECTS = t-export$U.$(OBJEXT)
+t_export_LDADD = $(LDADD)
+t_export_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fac_ui_SOURCES = t-fac_ui.c
+t_fac_ui_OBJECTS = t-fac_ui$U.$(OBJEXT)
+t_fac_ui_LDADD = $(LDADD)
+t_fac_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fdiv_SOURCES = t-fdiv.c
+t_fdiv_OBJECTS = t-fdiv$U.$(OBJEXT)
+t_fdiv_LDADD = $(LDADD)
+t_fdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fdiv_ui_SOURCES = t-fdiv_ui.c
+t_fdiv_ui_OBJECTS = t-fdiv_ui$U.$(OBJEXT)
+t_fdiv_ui_LDADD = $(LDADD)
+t_fdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fib_ui_SOURCES = t-fib_ui.c
+t_fib_ui_OBJECTS = t-fib_ui$U.$(OBJEXT)
+t_fib_ui_LDADD = $(LDADD)
+t_fib_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_fits_SOURCES = t-fits.c
+t_fits_OBJECTS = t-fits$U.$(OBJEXT)
+t_fits_LDADD = $(LDADD)
+t_fits_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_gcd_SOURCES = t-gcd.c
+t_gcd_OBJECTS = t-gcd$U.$(OBJEXT)
+t_gcd_LDADD = $(LDADD)
+t_gcd_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_gcd_ui_SOURCES = t-gcd_ui.c
+t_gcd_ui_OBJECTS = t-gcd_ui$U.$(OBJEXT)
+t_gcd_ui_LDADD = $(LDADD)
+t_gcd_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_d_SOURCES = t-get_d.c
+t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_LDADD = $(LDADD)
+t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_d_2exp_SOURCES = t-get_d_2exp.c
+t_get_d_2exp_OBJECTS = t-get_d_2exp$U.$(OBJEXT)
+t_get_d_2exp_LDADD = $(LDADD)
+t_get_d_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_get_si_SOURCES = t-get_si.c
+t_get_si_OBJECTS = t-get_si$U.$(OBJEXT)
+t_get_si_LDADD = $(LDADD)
+t_get_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_hamdist_SOURCES = t-hamdist.c
+t_hamdist_OBJECTS = t-hamdist$U.$(OBJEXT)
+t_hamdist_LDADD = $(LDADD)
+t_hamdist_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_import_SOURCES = t-import.c
+t_import_OBJECTS = t-import$U.$(OBJEXT)
+t_import_LDADD = $(LDADD)
+t_import_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_inp_str_SOURCES = t-inp_str.c
+t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_LDADD = $(LDADD)
+t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_invert_SOURCES = t-invert.c
+t_invert_OBJECTS = t-invert$U.$(OBJEXT)
+t_invert_LDADD = $(LDADD)
+t_invert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_io_raw_SOURCES = t-io_raw.c
+t_io_raw_OBJECTS = t-io_raw$U.$(OBJEXT)
+t_io_raw_LDADD = $(LDADD)
+t_io_raw_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_jac_SOURCES = t-jac.c
+t_jac_OBJECTS = t-jac$U.$(OBJEXT)
+t_jac_LDADD = $(LDADD)
+t_jac_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_lcm_SOURCES = t-lcm.c
+t_lcm_OBJECTS = t-lcm$U.$(OBJEXT)
+t_lcm_LDADD = $(LDADD)
+t_lcm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_lucnum_ui_SOURCES = t-lucnum_ui.c
+t_lucnum_ui_OBJECTS = t-lucnum_ui$U.$(OBJEXT)
+t_lucnum_ui_LDADD = $(LDADD)
+t_lucnum_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mul_SOURCES = t-mul.c
+t_mul_OBJECTS = t-mul$U.$(OBJEXT)
+t_mul_LDADD = $(LDADD)
+t_mul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mul_i_SOURCES = t-mul_i.c
+t_mul_i_OBJECTS = t-mul_i$U.$(OBJEXT)
+t_mul_i_LDADD = $(LDADD)
+t_mul_i_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_nextprime_SOURCES = t-nextprime.c
+t_nextprime_OBJECTS = t-nextprime$U.$(OBJEXT)
+t_nextprime_LDADD = $(LDADD)
+t_nextprime_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_oddeven_SOURCES = t-oddeven.c
+t_oddeven_OBJECTS = t-oddeven$U.$(OBJEXT)
+t_oddeven_LDADD = $(LDADD)
+t_oddeven_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_perfpow_SOURCES = t-perfpow.c
+t_perfpow_OBJECTS = t-perfpow$U.$(OBJEXT)
+t_perfpow_LDADD = $(LDADD)
+t_perfpow_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_perfsqr_SOURCES = t-perfsqr.c
+t_perfsqr_OBJECTS = t-perfsqr$U.$(OBJEXT)
+t_perfsqr_LDADD = $(LDADD)
+t_perfsqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_popcount_SOURCES = t-popcount.c
+t_popcount_OBJECTS = t-popcount$U.$(OBJEXT)
+t_popcount_LDADD = $(LDADD)
+t_popcount_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_pow_SOURCES = t-pow.c
+t_pow_OBJECTS = t-pow$U.$(OBJEXT)
+t_pow_LDADD = $(LDADD)
+t_pow_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_powm_SOURCES = t-powm.c
+t_powm_OBJECTS = t-powm$U.$(OBJEXT)
+t_powm_LDADD = $(LDADD)
+t_powm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_powm_ui_SOURCES = t-powm_ui.c
+t_powm_ui_OBJECTS = t-powm_ui$U.$(OBJEXT)
+t_powm_ui_LDADD = $(LDADD)
+t_powm_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_pprime_p_SOURCES = t-pprime_p.c
+t_pprime_p_OBJECTS = t-pprime_p$U.$(OBJEXT)
+t_pprime_p_LDADD = $(LDADD)
+t_pprime_p_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_root_SOURCES = t-root.c
+t_root_OBJECTS = t-root$U.$(OBJEXT)
+t_root_LDADD = $(LDADD)
+t_root_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_scan_SOURCES = t-scan.c
+t_scan_OBJECTS = t-scan$U.$(OBJEXT)
+t_scan_LDADD = $(LDADD)
+t_scan_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_d_SOURCES = t-set_d.c
+t_set_d_OBJECTS = t-set_d$U.$(OBJEXT)
+t_set_d_LDADD = $(LDADD)
+t_set_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_f_SOURCES = t-set_f.c
+t_set_f_OBJECTS = t-set_f$U.$(OBJEXT)
+t_set_f_LDADD = $(LDADD)
+t_set_f_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_si_SOURCES = t-set_si.c
+t_set_si_OBJECTS = t-set_si$U.$(OBJEXT)
+t_set_si_LDADD = $(LDADD)
+t_set_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_set_str_SOURCES = t-set_str.c
+t_set_str_OBJECTS = t-set_str$U.$(OBJEXT)
+t_set_str_LDADD = $(LDADD)
+t_set_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_sizeinbase_SOURCES = t-sizeinbase.c
+t_sizeinbase_OBJECTS = t-sizeinbase$U.$(OBJEXT)
+t_sizeinbase_LDADD = $(LDADD)
+t_sizeinbase_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_sqrtrem_SOURCES = t-sqrtrem.c
+t_sqrtrem_OBJECTS = t-sqrtrem$U.$(OBJEXT)
+t_sqrtrem_LDADD = $(LDADD)
+t_sqrtrem_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_tdiv_SOURCES = t-tdiv.c
+t_tdiv_OBJECTS = t-tdiv$U.$(OBJEXT)
+t_tdiv_LDADD = $(LDADD)
+t_tdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_tdiv_ui_SOURCES = t-tdiv_ui.c
+t_tdiv_ui_OBJECTS = t-tdiv_ui$U.$(OBJEXT)
+t_tdiv_ui_LDADD = $(LDADD)
+t_tdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = bit.c convert.c dive.c dive_ui.c io.c logic.c reuse.c \
+       t-addsub.c t-aorsmul.c t-bin.c t-cdiv_ui.c t-cmp.c t-cmp_d.c \
+       t-cmp_si.c t-cong.c t-cong_2exp.c t-div_2exp.c t-divis.c \
+       t-divis_2exp.c t-export.c t-fac_ui.c t-fdiv.c t-fdiv_ui.c \
+       t-fib_ui.c t-fits.c t-gcd.c t-gcd_ui.c t-get_d.c \
+       t-get_d_2exp.c t-get_si.c t-hamdist.c t-import.c t-inp_str.c \
+       t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c t-mul.c \
+       t-mul_i.c t-nextprime.c t-oddeven.c t-perfpow.c t-perfsqr.c \
+       t-popcount.c t-pow.c t-powm.c t-powm_ui.c t-pprime_p.c \
+       t-root.c t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
+       t-sizeinbase.c t-sqrtrem.c t-tdiv.c t-tdiv_ui.c
+DIST_SOURCES = bit.c convert.c dive.c dive_ui.c io.c logic.c reuse.c \
+       t-addsub.c t-aorsmul.c t-bin.c t-cdiv_ui.c t-cmp.c t-cmp_d.c \
+       t-cmp_si.c t-cong.c t-cong_2exp.c t-div_2exp.c t-divis.c \
+       t-divis_2exp.c t-export.c t-fac_ui.c t-fdiv.c t-fdiv_ui.c \
+       t-fib_ui.c t-fits.c t-gcd.c t-gcd_ui.c t-get_d.c \
+       t-get_d_2exp.c t-get_si.c t-hamdist.c t-import.c t-inp_str.c \
+       t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c t-mul.c \
+       t-mul_i.c t-nextprime.c t-oddeven.c t-perfpow.c t-perfsqr.c \
+       t-popcount.c t-pow.c t-powm.c t-powm_ui.c t-pprime_p.c \
+       t-root.c t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
+       t-sizeinbase.c t-sqrtrem.c t-tdiv.c t-tdiv_ui.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+TESTS = $(check_PROGRAMS)
+
+# Temporary files used by the tests.  Removed automatically if the tests
+# pass, but ensure they're cleaned if they fail.
+#
+CLEANFILES = *.tmp
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpz/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/mpz/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+bit$(EXEEXT): $(bit_OBJECTS) $(bit_DEPENDENCIES) 
+       @rm -f bit$(EXEEXT)
+       $(LINK) $(bit_OBJECTS) $(bit_LDADD) $(LIBS)
+convert$(EXEEXT): $(convert_OBJECTS) $(convert_DEPENDENCIES) 
+       @rm -f convert$(EXEEXT)
+       $(LINK) $(convert_OBJECTS) $(convert_LDADD) $(LIBS)
+dive$(EXEEXT): $(dive_OBJECTS) $(dive_DEPENDENCIES) 
+       @rm -f dive$(EXEEXT)
+       $(LINK) $(dive_OBJECTS) $(dive_LDADD) $(LIBS)
+dive_ui$(EXEEXT): $(dive_ui_OBJECTS) $(dive_ui_DEPENDENCIES) 
+       @rm -f dive_ui$(EXEEXT)
+       $(LINK) $(dive_ui_OBJECTS) $(dive_ui_LDADD) $(LIBS)
+io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES) 
+       @rm -f io$(EXEEXT)
+       $(LINK) $(io_OBJECTS) $(io_LDADD) $(LIBS)
+logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) 
+       @rm -f logic$(EXEEXT)
+       $(LINK) $(logic_OBJECTS) $(logic_LDADD) $(LIBS)
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) 
+       @rm -f reuse$(EXEEXT)
+       $(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
+t-addsub$(EXEEXT): $(t_addsub_OBJECTS) $(t_addsub_DEPENDENCIES) 
+       @rm -f t-addsub$(EXEEXT)
+       $(LINK) $(t_addsub_OBJECTS) $(t_addsub_LDADD) $(LIBS)
+t-aorsmul$(EXEEXT): $(t_aorsmul_OBJECTS) $(t_aorsmul_DEPENDENCIES) 
+       @rm -f t-aorsmul$(EXEEXT)
+       $(LINK) $(t_aorsmul_OBJECTS) $(t_aorsmul_LDADD) $(LIBS)
+t-bin$(EXEEXT): $(t_bin_OBJECTS) $(t_bin_DEPENDENCIES) 
+       @rm -f t-bin$(EXEEXT)
+       $(LINK) $(t_bin_OBJECTS) $(t_bin_LDADD) $(LIBS)
+t-cdiv_ui$(EXEEXT): $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_DEPENDENCIES) 
+       @rm -f t-cdiv_ui$(EXEEXT)
+       $(LINK) $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_LDADD) $(LIBS)
+t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) 
+       @rm -f t-cmp$(EXEEXT)
+       $(LINK) $(t_cmp_OBJECTS) $(t_cmp_LDADD) $(LIBS)
+t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) 
+       @rm -f t-cmp_d$(EXEEXT)
+       $(LINK) $(t_cmp_d_OBJECTS) $(t_cmp_d_LDADD) $(LIBS)
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) 
+       @rm -f t-cmp_si$(EXEEXT)
+       $(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
+t-cong$(EXEEXT): $(t_cong_OBJECTS) $(t_cong_DEPENDENCIES) 
+       @rm -f t-cong$(EXEEXT)
+       $(LINK) $(t_cong_OBJECTS) $(t_cong_LDADD) $(LIBS)
+t-cong_2exp$(EXEEXT): $(t_cong_2exp_OBJECTS) $(t_cong_2exp_DEPENDENCIES) 
+       @rm -f t-cong_2exp$(EXEEXT)
+       $(LINK) $(t_cong_2exp_OBJECTS) $(t_cong_2exp_LDADD) $(LIBS)
+t-div_2exp$(EXEEXT): $(t_div_2exp_OBJECTS) $(t_div_2exp_DEPENDENCIES) 
+       @rm -f t-div_2exp$(EXEEXT)
+       $(LINK) $(t_div_2exp_OBJECTS) $(t_div_2exp_LDADD) $(LIBS)
+t-divis$(EXEEXT): $(t_divis_OBJECTS) $(t_divis_DEPENDENCIES) 
+       @rm -f t-divis$(EXEEXT)
+       $(LINK) $(t_divis_OBJECTS) $(t_divis_LDADD) $(LIBS)
+t-divis_2exp$(EXEEXT): $(t_divis_2exp_OBJECTS) $(t_divis_2exp_DEPENDENCIES) 
+       @rm -f t-divis_2exp$(EXEEXT)
+       $(LINK) $(t_divis_2exp_OBJECTS) $(t_divis_2exp_LDADD) $(LIBS)
+t-export$(EXEEXT): $(t_export_OBJECTS) $(t_export_DEPENDENCIES) 
+       @rm -f t-export$(EXEEXT)
+       $(LINK) $(t_export_OBJECTS) $(t_export_LDADD) $(LIBS)
+t-fac_ui$(EXEEXT): $(t_fac_ui_OBJECTS) $(t_fac_ui_DEPENDENCIES) 
+       @rm -f t-fac_ui$(EXEEXT)
+       $(LINK) $(t_fac_ui_OBJECTS) $(t_fac_ui_LDADD) $(LIBS)
+t-fdiv$(EXEEXT): $(t_fdiv_OBJECTS) $(t_fdiv_DEPENDENCIES) 
+       @rm -f t-fdiv$(EXEEXT)
+       $(LINK) $(t_fdiv_OBJECTS) $(t_fdiv_LDADD) $(LIBS)
+t-fdiv_ui$(EXEEXT): $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_DEPENDENCIES) 
+       @rm -f t-fdiv_ui$(EXEEXT)
+       $(LINK) $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_LDADD) $(LIBS)
+t-fib_ui$(EXEEXT): $(t_fib_ui_OBJECTS) $(t_fib_ui_DEPENDENCIES) 
+       @rm -f t-fib_ui$(EXEEXT)
+       $(LINK) $(t_fib_ui_OBJECTS) $(t_fib_ui_LDADD) $(LIBS)
+t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) 
+       @rm -f t-fits$(EXEEXT)
+       $(LINK) $(t_fits_OBJECTS) $(t_fits_LDADD) $(LIBS)
+t-gcd$(EXEEXT): $(t_gcd_OBJECTS) $(t_gcd_DEPENDENCIES) 
+       @rm -f t-gcd$(EXEEXT)
+       $(LINK) $(t_gcd_OBJECTS) $(t_gcd_LDADD) $(LIBS)
+t-gcd_ui$(EXEEXT): $(t_gcd_ui_OBJECTS) $(t_gcd_ui_DEPENDENCIES) 
+       @rm -f t-gcd_ui$(EXEEXT)
+       $(LINK) $(t_gcd_ui_OBJECTS) $(t_gcd_ui_LDADD) $(LIBS)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+       @rm -f t-get_d$(EXEEXT)
+       $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
+t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) 
+       @rm -f t-get_d_2exp$(EXEEXT)
+       $(LINK) $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_LDADD) $(LIBS)
+t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) 
+       @rm -f t-get_si$(EXEEXT)
+       $(LINK) $(t_get_si_OBJECTS) $(t_get_si_LDADD) $(LIBS)
+t-hamdist$(EXEEXT): $(t_hamdist_OBJECTS) $(t_hamdist_DEPENDENCIES) 
+       @rm -f t-hamdist$(EXEEXT)
+       $(LINK) $(t_hamdist_OBJECTS) $(t_hamdist_LDADD) $(LIBS)
+t-import$(EXEEXT): $(t_import_OBJECTS) $(t_import_DEPENDENCIES) 
+       @rm -f t-import$(EXEEXT)
+       $(LINK) $(t_import_OBJECTS) $(t_import_LDADD) $(LIBS)
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) 
+       @rm -f t-inp_str$(EXEEXT)
+       $(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
+t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) 
+       @rm -f t-invert$(EXEEXT)
+       $(LINK) $(t_invert_OBJECTS) $(t_invert_LDADD) $(LIBS)
+t-io_raw$(EXEEXT): $(t_io_raw_OBJECTS) $(t_io_raw_DEPENDENCIES) 
+       @rm -f t-io_raw$(EXEEXT)
+       $(LINK) $(t_io_raw_OBJECTS) $(t_io_raw_LDADD) $(LIBS)
+t-jac$(EXEEXT): $(t_jac_OBJECTS) $(t_jac_DEPENDENCIES) 
+       @rm -f t-jac$(EXEEXT)
+       $(LINK) $(t_jac_OBJECTS) $(t_jac_LDADD) $(LIBS)
+t-lcm$(EXEEXT): $(t_lcm_OBJECTS) $(t_lcm_DEPENDENCIES) 
+       @rm -f t-lcm$(EXEEXT)
+       $(LINK) $(t_lcm_OBJECTS) $(t_lcm_LDADD) $(LIBS)
+t-lucnum_ui$(EXEEXT): $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_DEPENDENCIES) 
+       @rm -f t-lucnum_ui$(EXEEXT)
+       $(LINK) $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_LDADD) $(LIBS)
+t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) 
+       @rm -f t-mul$(EXEEXT)
+       $(LINK) $(t_mul_OBJECTS) $(t_mul_LDADD) $(LIBS)
+t-mul_i$(EXEEXT): $(t_mul_i_OBJECTS) $(t_mul_i_DEPENDENCIES) 
+       @rm -f t-mul_i$(EXEEXT)
+       $(LINK) $(t_mul_i_OBJECTS) $(t_mul_i_LDADD) $(LIBS)
+t-nextprime$(EXEEXT): $(t_nextprime_OBJECTS) $(t_nextprime_DEPENDENCIES) 
+       @rm -f t-nextprime$(EXEEXT)
+       $(LINK) $(t_nextprime_OBJECTS) $(t_nextprime_LDADD) $(LIBS)
+t-oddeven$(EXEEXT): $(t_oddeven_OBJECTS) $(t_oddeven_DEPENDENCIES) 
+       @rm -f t-oddeven$(EXEEXT)
+       $(LINK) $(t_oddeven_OBJECTS) $(t_oddeven_LDADD) $(LIBS)
+t-perfpow$(EXEEXT): $(t_perfpow_OBJECTS) $(t_perfpow_DEPENDENCIES) 
+       @rm -f t-perfpow$(EXEEXT)
+       $(LINK) $(t_perfpow_OBJECTS) $(t_perfpow_LDADD) $(LIBS)
+t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) 
+       @rm -f t-perfsqr$(EXEEXT)
+       $(LINK) $(t_perfsqr_OBJECTS) $(t_perfsqr_LDADD) $(LIBS)
+t-popcount$(EXEEXT): $(t_popcount_OBJECTS) $(t_popcount_DEPENDENCIES) 
+       @rm -f t-popcount$(EXEEXT)
+       $(LINK) $(t_popcount_OBJECTS) $(t_popcount_LDADD) $(LIBS)
+t-pow$(EXEEXT): $(t_pow_OBJECTS) $(t_pow_DEPENDENCIES) 
+       @rm -f t-pow$(EXEEXT)
+       $(LINK) $(t_pow_OBJECTS) $(t_pow_LDADD) $(LIBS)
+t-powm$(EXEEXT): $(t_powm_OBJECTS) $(t_powm_DEPENDENCIES) 
+       @rm -f t-powm$(EXEEXT)
+       $(LINK) $(t_powm_OBJECTS) $(t_powm_LDADD) $(LIBS)
+t-powm_ui$(EXEEXT): $(t_powm_ui_OBJECTS) $(t_powm_ui_DEPENDENCIES) 
+       @rm -f t-powm_ui$(EXEEXT)
+       $(LINK) $(t_powm_ui_OBJECTS) $(t_powm_ui_LDADD) $(LIBS)
+t-pprime_p$(EXEEXT): $(t_pprime_p_OBJECTS) $(t_pprime_p_DEPENDENCIES) 
+       @rm -f t-pprime_p$(EXEEXT)
+       $(LINK) $(t_pprime_p_OBJECTS) $(t_pprime_p_LDADD) $(LIBS)
+t-root$(EXEEXT): $(t_root_OBJECTS) $(t_root_DEPENDENCIES) 
+       @rm -f t-root$(EXEEXT)
+       $(LINK) $(t_root_OBJECTS) $(t_root_LDADD) $(LIBS)
+t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) 
+       @rm -f t-scan$(EXEEXT)
+       $(LINK) $(t_scan_OBJECTS) $(t_scan_LDADD) $(LIBS)
+t-set_d$(EXEEXT): $(t_set_d_OBJECTS) $(t_set_d_DEPENDENCIES) 
+       @rm -f t-set_d$(EXEEXT)
+       $(LINK) $(t_set_d_OBJECTS) $(t_set_d_LDADD) $(LIBS)
+t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) 
+       @rm -f t-set_f$(EXEEXT)
+       $(LINK) $(t_set_f_OBJECTS) $(t_set_f_LDADD) $(LIBS)
+t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) 
+       @rm -f t-set_si$(EXEEXT)
+       $(LINK) $(t_set_si_OBJECTS) $(t_set_si_LDADD) $(LIBS)
+t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) 
+       @rm -f t-set_str$(EXEEXT)
+       $(LINK) $(t_set_str_OBJECTS) $(t_set_str_LDADD) $(LIBS)
+t-sizeinbase$(EXEEXT): $(t_sizeinbase_OBJECTS) $(t_sizeinbase_DEPENDENCIES) 
+       @rm -f t-sizeinbase$(EXEEXT)
+       $(LINK) $(t_sizeinbase_OBJECTS) $(t_sizeinbase_LDADD) $(LIBS)
+t-sqrtrem$(EXEEXT): $(t_sqrtrem_OBJECTS) $(t_sqrtrem_DEPENDENCIES) 
+       @rm -f t-sqrtrem$(EXEEXT)
+       $(LINK) $(t_sqrtrem_OBJECTS) $(t_sqrtrem_LDADD) $(LIBS)
+t-tdiv$(EXEEXT): $(t_tdiv_OBJECTS) $(t_tdiv_DEPENDENCIES) 
+       @rm -f t-tdiv$(EXEEXT)
+       $(LINK) $(t_tdiv_OBJECTS) $(t_tdiv_LDADD) $(LIBS)
+t-tdiv_ui$(EXEEXT): $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_DEPENDENCIES) 
+       @rm -f t-tdiv_ui$(EXEEXT)
+       $(LINK) $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+bit_.c: bit.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bit.c; then echo $(srcdir)/bit.c; else echo bit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+convert_.c: convert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/convert.c; then echo $(srcdir)/convert.c; else echo convert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dive_.c: dive.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive.c; then echo $(srcdir)/dive.c; else echo dive.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dive_ui_.c: dive_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_ui.c; then echo $(srcdir)/dive_ui.c; else echo dive_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+io_.c: io.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/io.c; then echo $(srcdir)/io.c; else echo io.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+logic_.c: logic.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logic.c; then echo $(srcdir)/logic.c; else echo logic.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+reuse_.c: reuse.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reuse.c; then echo $(srcdir)/reuse.c; else echo reuse.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-addsub_.c: t-addsub.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-addsub.c; then echo $(srcdir)/t-addsub.c; else echo t-addsub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-aorsmul_.c: t-aorsmul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aorsmul.c; then echo $(srcdir)/t-aorsmul.c; else echo t-aorsmul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-bin_.c: t-bin.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bin.c; then echo $(srcdir)/t-bin.c; else echo t-bin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cdiv_ui_.c: t-cdiv_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cdiv_ui.c; then echo $(srcdir)/t-cdiv_ui.c; else echo t-cdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_.c: t-cmp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp.c; then echo $(srcdir)/t-cmp.c; else echo t-cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_d_.c: t-cmp_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_d.c; then echo $(srcdir)/t-cmp_d.c; else echo t-cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cong_.c: t-cong.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cong.c; then echo $(srcdir)/t-cong.c; else echo t-cong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-cong_2exp_.c: t-cong_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cong_2exp.c; then echo $(srcdir)/t-cong_2exp.c; else echo t-cong_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-div_2exp_.c: t-div_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div_2exp.c; then echo $(srcdir)/t-div_2exp.c; else echo t-div_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-divis_.c: t-divis.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divis.c; then echo $(srcdir)/t-divis.c; else echo t-divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-divis_2exp_.c: t-divis_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divis_2exp.c; then echo $(srcdir)/t-divis_2exp.c; else echo t-divis_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-export_.c: t-export.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-export.c; then echo $(srcdir)/t-export.c; else echo t-export.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fac_ui_.c: t-fac_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fac_ui.c; then echo $(srcdir)/t-fac_ui.c; else echo t-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fdiv_.c: t-fdiv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fdiv.c; then echo $(srcdir)/t-fdiv.c; else echo t-fdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fdiv_ui_.c: t-fdiv_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fdiv_ui.c; then echo $(srcdir)/t-fdiv_ui.c; else echo t-fdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fib_ui_.c: t-fib_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fib_ui.c; then echo $(srcdir)/t-fib_ui.c; else echo t-fib_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-fits_.c: t-fits.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fits.c; then echo $(srcdir)/t-fits.c; else echo t-fits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-gcd_.c: t-gcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gcd.c; then echo $(srcdir)/t-gcd.c; else echo t-gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-gcd_ui_.c: t-gcd_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gcd_ui.c; then echo $(srcdir)/t-gcd_ui.c; else echo t-gcd_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_d_.c: t-get_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_d_2exp_.c: t-get_d_2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d_2exp.c; then echo $(srcdir)/t-get_d_2exp.c; else echo t-get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-get_si_.c: t-get_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_si.c; then echo $(srcdir)/t-get_si.c; else echo t-get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-hamdist_.c: t-hamdist.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hamdist.c; then echo $(srcdir)/t-hamdist.c; else echo t-hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-import_.c: t-import.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-import.c; then echo $(srcdir)/t-import.c; else echo t-import.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-invert_.c: t-invert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-invert.c; then echo $(srcdir)/t-invert.c; else echo t-invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-io_raw_.c: t-io_raw.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-io_raw.c; then echo $(srcdir)/t-io_raw.c; else echo t-io_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-jac_.c: t-jac.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-jac.c; then echo $(srcdir)/t-jac.c; else echo t-jac.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-lcm_.c: t-lcm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lcm.c; then echo $(srcdir)/t-lcm.c; else echo t-lcm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-lucnum_ui_.c: t-lucnum_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lucnum_ui.c; then echo $(srcdir)/t-lucnum_ui.c; else echo t-lucnum_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mul_.c: t-mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul.c; then echo $(srcdir)/t-mul.c; else echo t-mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mul_i_.c: t-mul_i.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul_i.c; then echo $(srcdir)/t-mul_i.c; else echo t-mul_i.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-nextprime_.c: t-nextprime.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-nextprime.c; then echo $(srcdir)/t-nextprime.c; else echo t-nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-oddeven_.c: t-oddeven.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-oddeven.c; then echo $(srcdir)/t-oddeven.c; else echo t-oddeven.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-perfpow_.c: t-perfpow.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfpow.c; then echo $(srcdir)/t-perfpow.c; else echo t-perfpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-perfsqr_.c: t-perfsqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfsqr.c; then echo $(srcdir)/t-perfsqr.c; else echo t-perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-popcount_.c: t-popcount.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-popcount.c; then echo $(srcdir)/t-popcount.c; else echo t-popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-pow_.c: t-pow.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-pow.c; then echo $(srcdir)/t-pow.c; else echo t-pow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-powm_.c: t-powm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-powm.c; then echo $(srcdir)/t-powm.c; else echo t-powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-powm_ui_.c: t-powm_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-powm_ui.c; then echo $(srcdir)/t-powm_ui.c; else echo t-powm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-pprime_p_.c: t-pprime_p.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-pprime_p.c; then echo $(srcdir)/t-pprime_p.c; else echo t-pprime_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-root_.c: t-root.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-root.c; then echo $(srcdir)/t-root.c; else echo t-root.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-scan_.c: t-scan.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scan.c; then echo $(srcdir)/t-scan.c; else echo t-scan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_d_.c: t-set_d.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_d.c; then echo $(srcdir)/t-set_d.c; else echo t-set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_f_.c: t-set_f.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_f.c; then echo $(srcdir)/t-set_f.c; else echo t-set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_si_.c: t-set_si.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_si.c; then echo $(srcdir)/t-set_si.c; else echo t-set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-set_str_.c: t-set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_str.c; then echo $(srcdir)/t-set_str.c; else echo t-set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sizeinbase_.c: t-sizeinbase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sizeinbase.c; then echo $(srcdir)/t-sizeinbase.c; else echo t-sizeinbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-sqrtrem_.c: t-sqrtrem.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrtrem.c; then echo $(srcdir)/t-sqrtrem.c; else echo t-sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-tdiv_.c: t-tdiv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-tdiv.c; then echo $(srcdir)/t-tdiv.c; else echo t-tdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-tdiv_ui_.c: t-tdiv_ui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-tdiv_ui.c; then echo $(srcdir)/t-tdiv_ui.c; else echo t-tdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+bit_.$(OBJEXT) bit_.lo convert_.$(OBJEXT) convert_.lo dive_.$(OBJEXT) \
+dive_.lo dive_ui_.$(OBJEXT) dive_ui_.lo io_.$(OBJEXT) io_.lo \
+logic_.$(OBJEXT) logic_.lo reuse_.$(OBJEXT) reuse_.lo \
+t-addsub_.$(OBJEXT) t-addsub_.lo t-aorsmul_.$(OBJEXT) t-aorsmul_.lo \
+t-bin_.$(OBJEXT) t-bin_.lo t-cdiv_ui_.$(OBJEXT) t-cdiv_ui_.lo \
+t-cmp_.$(OBJEXT) t-cmp_.lo t-cmp_d_.$(OBJEXT) t-cmp_d_.lo \
+t-cmp_si_.$(OBJEXT) t-cmp_si_.lo t-cong_.$(OBJEXT) t-cong_.lo \
+t-cong_2exp_.$(OBJEXT) t-cong_2exp_.lo t-div_2exp_.$(OBJEXT) \
+t-div_2exp_.lo t-divis_.$(OBJEXT) t-divis_.lo t-divis_2exp_.$(OBJEXT) \
+t-divis_2exp_.lo t-export_.$(OBJEXT) t-export_.lo t-fac_ui_.$(OBJEXT) \
+t-fac_ui_.lo t-fdiv_.$(OBJEXT) t-fdiv_.lo t-fdiv_ui_.$(OBJEXT) \
+t-fdiv_ui_.lo t-fib_ui_.$(OBJEXT) t-fib_ui_.lo t-fits_.$(OBJEXT) \
+t-fits_.lo t-gcd_.$(OBJEXT) t-gcd_.lo t-gcd_ui_.$(OBJEXT) t-gcd_ui_.lo \
+t-get_d_.$(OBJEXT) t-get_d_.lo t-get_d_2exp_.$(OBJEXT) \
+t-get_d_2exp_.lo t-get_si_.$(OBJEXT) t-get_si_.lo t-hamdist_.$(OBJEXT) \
+t-hamdist_.lo t-import_.$(OBJEXT) t-import_.lo t-inp_str_.$(OBJEXT) \
+t-inp_str_.lo t-invert_.$(OBJEXT) t-invert_.lo t-io_raw_.$(OBJEXT) \
+t-io_raw_.lo t-jac_.$(OBJEXT) t-jac_.lo t-lcm_.$(OBJEXT) t-lcm_.lo \
+t-lucnum_ui_.$(OBJEXT) t-lucnum_ui_.lo t-mul_.$(OBJEXT) t-mul_.lo \
+t-mul_i_.$(OBJEXT) t-mul_i_.lo t-nextprime_.$(OBJEXT) t-nextprime_.lo \
+t-oddeven_.$(OBJEXT) t-oddeven_.lo t-perfpow_.$(OBJEXT) t-perfpow_.lo \
+t-perfsqr_.$(OBJEXT) t-perfsqr_.lo t-popcount_.$(OBJEXT) \
+t-popcount_.lo t-pow_.$(OBJEXT) t-pow_.lo t-powm_.$(OBJEXT) t-powm_.lo \
+t-powm_ui_.$(OBJEXT) t-powm_ui_.lo t-pprime_p_.$(OBJEXT) \
+t-pprime_p_.lo t-root_.$(OBJEXT) t-root_.lo t-scan_.$(OBJEXT) \
+t-scan_.lo t-set_d_.$(OBJEXT) t-set_d_.lo t-set_f_.$(OBJEXT) \
+t-set_f_.lo t-set_si_.$(OBJEXT) t-set_si_.lo t-set_str_.$(OBJEXT) \
+t-set_str_.lo t-sizeinbase_.$(OBJEXT) t-sizeinbase_.lo \
+t-sqrtrem_.$(OBJEXT) t-sqrtrem_.lo t-tdiv_.$(OBJEXT) t-tdiv_.lo \
+t-tdiv_ui_.$(OBJEXT) t-tdiv_ui_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/mpz/bit.c b/tests/mpz/bit.c

new file mode 100644 (file)

index 0000000..9f63e03
--- /dev/null
+++ b/tests/mpz/bit.c
@@ -0,0 +1,373 @@
+/* Test mpz_setbit, mpz_clrbit, mpz_tstbit.
+
+Copyright 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef SIZE
+#define SIZE 4
+#endif
+
+
+void
+debug_mp (mpz_srcptr x, int base)
+{
+  mpz_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+
+/* exercise the case where mpz_clrbit or mpz_combit ends up extending a
+   value like -2^(k*GMP_NUMB_BITS-1) when clearing bit k*GMP_NUMB_BITS-1.  */
+void
+check_clr_extend (void)
+{
+  mpz_t          got, want;
+  unsigned long  i;
+  int            f;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 1; i < 5; i++)
+    {
+      for (f = 0; f <= 1; f++)
+       {
+         /* lots of 1 bits in _mp_d */
+         mpz_set_ui (got, 1L);
+         mpz_mul_2exp (got, got, 10*GMP_NUMB_BITS);
+         mpz_sub_ui (got, got, 1L);
+
+         /* value -2^(n-1) representing ..11100..00 */
+         mpz_set_si (got, -1L);
+         mpz_mul_2exp (got, got, i*GMP_NUMB_BITS-1);
+
+         /* complement bit n, giving ..11000..00 which is -2^n */
+         if (f == 0)
+           mpz_clrbit (got, i*GMP_NUMB_BITS-1);
+         else
+           mpz_combit (got, i*GMP_NUMB_BITS-1);
+         MPZ_CHECK_FORMAT (got);
+
+         mpz_set_si (want, -1L);
+         mpz_mul_2exp (want, want, i*GMP_NUMB_BITS);
+
+         if (mpz_cmp (got, want) != 0)
+           {
+             if (f == 0)
+               printf ("mpz_clrbit: ");
+             else
+               printf ("mpz_combit: ");
+             printf ("wrong after extension\n");
+             mpz_trace ("got ", got);
+             mpz_trace ("want", want);
+             abort ();
+           }
+       }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+void
+check_com_negs (void)
+{
+  static const struct {
+    unsigned long  bit;
+    mp_size_t      inp_size;
+    mp_limb_t      inp_n[5];
+    mp_size_t      want_size;
+    mp_limb_t      want_n[5];
+  } data[] = {
+    { GMP_NUMB_BITS,   2, { 1, 1 },  1, { 1 } },
+    { GMP_NUMB_BITS+1, 2, { 1, 1 },  2, { 1, 3 } },
+
+    { GMP_NUMB_BITS,   2, { 0, 1 },  2, { 0, 2 } },
+    { GMP_NUMB_BITS+1, 2, { 0, 1 },  2, { 0, 3 } },
+  };
+  mpz_t  inp, got, want;
+  int    i;
+
+  mpz_init (got);
+  mpz_init (want);
+  mpz_init (inp);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_n (inp, data[i].inp_n, data[i].inp_size);
+      mpz_neg (inp, inp);
+
+      mpz_set_n (want, data[i].want_n, data[i].want_size);
+      mpz_neg (want, want);
+
+      mpz_set (got, inp);
+      mpz_combit (got, data[i].bit);
+
+      if (mpz_cmp (got, want) != 0)
+       {
+         printf ("mpz_combit: wrong on neg data[%d]\n", i);
+         mpz_trace ("inp ", inp);
+         printf    ("bit %lu\n", data[i].bit);
+         mpz_trace ("got ", got);
+         mpz_trace ("want", want);
+         abort ();
+       }
+    }
+
+  mpz_clear (inp);
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+/* See that mpz_tstbit matches a twos complement calculated explicitly, for
+   various low zeros.  */
+void
+check_tstbit (void)
+{
+#define MAX_ZEROS  3
+#define NUM_LIMBS  3
+
+  mp_limb_t      pos[1+NUM_LIMBS+MAX_ZEROS];
+  mp_limb_t      neg[1+NUM_LIMBS+MAX_ZEROS];
+  mpz_t          z;
+  unsigned long  i;
+  int            zeros, low1;
+  int            got, want;
+
+  mpz_init (z);
+  for (zeros = 0; zeros <= MAX_ZEROS; zeros++)
+    {
+      MPN_ZERO (pos, numberof(pos));
+      mpn_random2 (pos+zeros, (mp_size_t) NUM_LIMBS);
+
+      for (low1 = 0; low1 <= 1; low1++)
+       {
+         if (low1)
+           pos[0] |= 1;
+
+         refmpn_neg (neg, pos, (mp_size_t) numberof(neg));
+         mpz_set_n (z, neg, (mp_size_t) numberof(neg));
+         mpz_neg (z, z);
+
+         for (i = 0; i < numberof(pos)*GMP_NUMB_BITS; i++)
+           {
+             got = mpz_tstbit (z, i);
+             want = refmpn_tstbit (pos, i);
+             if (got != want)
+               {
+                 printf ("wrong at bit %lu, with %d zeros\n", i, zeros);
+                 printf ("z neg "); debug_mp (z, -16);
+                 mpz_set_n (z, pos, (mp_size_t) numberof(pos));
+                 printf ("pos   "); debug_mp (z, -16);
+                 mpz_set_n (z, neg, (mp_size_t) numberof(neg));
+                 printf ("neg   "); debug_mp (z, -16);
+                 exit (1);
+               }
+           }
+       }
+    }
+  mpz_clear (z);
+}
+
+
+void
+check_single (void)
+{
+  mpz_t  x;
+  int    limb, offset, initial;
+  unsigned long  bit;
+
+  mpz_init (x);
+
+  for (limb = 0; limb < 4; limb++)
+    {
+      for (offset = (limb==0 ? 0 : -2); offset <= 2; offset++)
+       {
+         for (initial = 0; initial >= -1; initial--)
+           {
+             mpz_set_si (x, (long) initial);
+
+             bit = (unsigned long) limb*GMP_LIMB_BITS + offset;
+
+             mpz_clrbit (x, bit);
+             MPZ_CHECK_FORMAT (x);
+             if (mpz_tstbit (x, bit) != 0)
+               {
+                 printf ("check_single(): expected 0\n");
+                 abort ();
+               }
+
+             mpz_setbit (x, bit);
+             MPZ_CHECK_FORMAT (x);
+             if (mpz_tstbit (x, bit) != 1)
+               {
+                 printf ("check_single(): expected 1\n");
+                 abort ();
+               }
+
+             mpz_clrbit (x, bit);
+             MPZ_CHECK_FORMAT (x);
+             if (mpz_tstbit (x, bit) != 0)
+               {
+                 printf ("check_single(): expected 0\n");
+                 abort ();
+               }
+
+             mpz_combit (x, bit);
+             MPZ_CHECK_FORMAT (x);
+             if (mpz_tstbit (x, bit) != 1)
+               {
+                 printf ("check_single(): expected 1\n");
+                 abort ();
+               }
+
+             mpz_combit (x, bit);
+             MPZ_CHECK_FORMAT (x);
+             if (mpz_tstbit (x, bit) != 0)
+               {
+                 printf ("check_single(): expected 0\n");
+                 abort ();
+               }
+           }
+       }
+    }
+
+  mpz_clear (x);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  mpz_t x, s0, s1, s2, s3, m;
+  mp_size_t xsize;
+  int i;
+  int reps = 100000;
+  int bit0, bit1, bit2, bit3;
+  unsigned long int bitindex;
+  const char  *s = "";
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (x);
+  mpz_init (s0);
+  mpz_init (s1);
+  mpz_init (s2);
+  mpz_init (s3);
+  mpz_init (m);
+
+  for (i = 0; i < reps; i++)
+    {
+      xsize = urandom () % (2 * SIZE) - SIZE;
+      mpz_random2 (x, xsize);
+      bitindex = urandom () % SIZE;
+
+      mpz_set (s0, x);
+      bit0 = mpz_tstbit (x, bitindex);
+      mpz_setbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+
+      mpz_set (s1, x);
+      bit1 = mpz_tstbit (x, bitindex);
+      mpz_clrbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+
+      mpz_set (s2, x);
+      bit2 = mpz_tstbit (x, bitindex);
+      mpz_setbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+
+      mpz_set (s3, x);
+      bit3 = mpz_tstbit (x, bitindex);
+
+#define FAIL(str) do { s = str; goto fail; } while (0)
+
+      if (bit1 != 1)  FAIL ("bit1 != 1");
+      if (bit2 != 0)  FAIL ("bit2 != 0");
+      if (bit3 != 1)  FAIL ("bit3 != 1");
+
+      if (bit0 == 0)
+       {
+         if (mpz_cmp (s0, s1) == 0 || mpz_cmp (s0, s2) != 0 || mpz_cmp (s0, s3) == 0)
+           abort ();
+       }
+      else
+       {
+         if (mpz_cmp (s0, s1) != 0 || mpz_cmp (s0, s2) == 0 || mpz_cmp (s0, s3) != 0)
+           abort ();
+       }
+
+      if (mpz_cmp (s1, s2) == 0 || mpz_cmp (s1, s3) != 0)
+       abort ();
+      if (mpz_cmp (s2, s3) == 0)
+       abort ();
+
+      mpz_ui_pow_ui (m, 2L, bitindex);
+      MPZ_CHECK_FORMAT (m);
+      mpz_ior (x, s2, m);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (x, s3) != 0)
+       abort ();
+
+      mpz_com (m, m);
+      MPZ_CHECK_FORMAT (m);
+      mpz_and (x, s1, m);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (x, s2) != 0)
+       abort ();
+    }
+
+  mpz_clear (x);
+  mpz_clear (s0);
+  mpz_clear (s1);
+  mpz_clear (s2);
+  mpz_clear (s3);
+  mpz_clear (m);
+  return;
+
+
+ fail:
+  printf ("%s\n", s);
+  printf ("bitindex = %lu\n", bitindex);
+  printf ("x = "); mpz_out_str (stdout, -16, x); printf (" hex\n");
+  exit (1);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_clr_extend ();
+  check_com_negs ();
+  check_tstbit ();
+  check_random (argc, argv);
+  check_single ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/convert.c b/tests/mpz/convert.c

new file mode 100644 (file)

index 0000000..2ab4205
--- /dev/null
+++ b/tests/mpz/convert.c
@@ -0,0 +1,170 @@
+/* Test conversion using mpz_get_str and mpz_set_str.
+
+Copyright 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2006, 2007 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+
+void
+string_urandomb (char *bp, size_t len, int base, gmp_randstate_ptr rands)
+{
+  mpz_t bs;
+  unsigned long bsi;
+  int d, l;
+  char *collseq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+  mpz_init (bs);
+
+  mpz_urandomb (bs, rands, 32);
+  bsi = mpz_get_ui (bs);
+  d = bsi % base;
+  while (len != 0)
+    {
+      l = (bsi >> 16) % 20;
+      l = MIN (l, len);
+
+      memset (bp, collseq[d], l);
+
+      len -= l;
+      bp += l;
+
+      mpz_urandomb (bs, rands, 32);
+      bsi = mpz_get_ui (bs);
+      d = bsi & 0xfff;
+      if (d >= base)
+       d = 0;
+    }
+
+  bp[0] = '\0';
+  mpz_clear (bs);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  mp_size_t size;
+  int i;
+  int reps = 2000;
+  char *str, *buf, *bp;
+  int base;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  size_t len;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (op1);
+  mpz_init (op2);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* 1. Generate random mpz_t and convert to a string and back to mpz_t
+        again.  */
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;   /* 2..18 */
+      mpz_urandomb (bs, rands, size_range);    /* 3..262144 bits */
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, size);
+
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (op1, op1);
+
+      mpz_urandomb (bs, rands, 32);
+      bsi = mpz_get_ui (bs);
+      base = bsi % 62 + 1;
+      if (base == 1)
+       base = 0;
+
+      str = mpz_get_str ((char *) 0, base, op1);
+      mpz_set_str_or_abort (op2, str, base);
+
+      if (mpz_cmp (op1, op2))
+       {
+         fprintf (stderr, "ERROR, op1 and op2 different in test %d\n", i);
+         fprintf (stderr, "str  = %s\n", str);
+         fprintf (stderr, "base = %d\n", base);
+         fprintf (stderr, "op1  = "); debug_mp (op1, -16);
+         fprintf (stderr, "op2  = "); debug_mp (op2, -16);
+         abort ();
+       }
+
+      (*__gmp_free_func) (str, strlen (str) + 1);
+
+      /* 2. Generate random string and convert to mpz_t and back to a string
+        again.  */
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 1;   /* 1..16 */
+      mpz_urandomb (bs, rands, size_range);    /* 1..65536 digits */
+      len = mpz_get_ui (bs) + 1;
+      buf = (*__gmp_allocate_func) (len + 1);
+      if (base == 0)
+       base = 10;
+      string_urandomb (buf, len, base, rands);
+
+      mpz_set_str_or_abort (op1, buf, base);
+      str = mpz_get_str ((char *) 0, base, op1);
+
+      /* Skip over leading zeros, but don't leave the string at zero length. */
+      for (bp = buf; bp[0] == '0' && bp[1] != '\0'; bp++)
+       ;
+
+      if (strcasecmp (str, bp) != 0)
+       {
+         fprintf (stderr, "ERROR, str and buf different in test %d\n", i);
+         fprintf (stderr, "str  = %s\n", str);
+         fprintf (stderr, "buf  = %s\n", buf);
+         fprintf (stderr, "base = %d\n", base);
+         fprintf (stderr, "op1  = "); debug_mp (op1, -16);
+         abort ();
+       }
+
+      (*__gmp_free_func) (buf, len + 1);
+      (*__gmp_free_func) (str, strlen (str) + 1);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/dive.c b/tests/mpz/dive.c

new file mode 100644 (file)

index 0000000..9aaea57
--- /dev/null
+++ b/tests/mpz/dive.c
@@ -0,0 +1,101 @@
+/* Test mpz_mul, mpz_divexact.
+
+Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  mpz_t prod, quot;
+  mp_size_t size;
+  int i;
+  int reps = 5000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mp_trace_base = -16;
+
+  mpz_init (bs);
+
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (prod);
+  mpz_init (quot);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2; /* 0..2047 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, size);
+
+      do
+       {
+         mpz_urandomb (bs, rands, size_range);
+         size = mpz_get_ui (bs);
+         mpz_rrandomb (op2, rands, size);
+       }
+      while (mpz_sgn (op2) == 0);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (op1, op1);
+      if ((bsi & 2) != 0)
+       mpz_neg (op2, op2);
+
+      mpz_mul (prod, op1, op2);
+
+      mpz_divexact (quot, prod, op2);
+      MPZ_CHECK_FORMAT (quot);
+
+      if (mpz_cmp (quot, op1) != 0)
+        {
+          printf ("Wrong results:\n");
+          mpz_trace ("  got     ", quot);
+          mpz_trace ("  want    ", op1);
+          mpz_trace ("  dividend", prod);
+          mpz_trace ("  divisor ", op2);
+          abort ();
+        }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (prod);
+  mpz_clear (quot);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/dive_ui.c b/tests/mpz/dive_ui.c

new file mode 100644 (file)

index 0000000..93f7a9b
--- /dev/null
+++ b/tests/mpz/dive_ui.c
@@ -0,0 +1,87 @@
+/* Test mpz_divexact_ui.
+
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  int    reps = 500000;
+  mpz_t  a, q, got;
+  int    i, qneg;
+  unsigned long  d;
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (a);
+  mpz_init (q);
+  mpz_init (got);
+
+  for (i = 0; i < reps; i++)
+    {
+      do
+       d = (unsigned long) urandom();
+      while (d == 0);
+      mpz_erandomb (q, rands, 512);
+      mpz_mul_ui (a, q, d);
+
+      for (qneg = 0; qneg <= 1; qneg++)
+        {
+          mpz_divexact_ui (got, a, d);
+          MPZ_CHECK_FORMAT (got);
+          if (mpz_cmp (got, q) != 0)
+            {
+              printf    ("mpz_divexact_ui wrong\n");
+              mpz_trace ("    a", a);
+              printf    ("    d=%lu\n", d);
+              mpz_trace ("    q", q);
+              mpz_trace ("  got", got);
+              abort ();
+            }
+
+          mpz_neg (q, q);
+          mpz_neg (a, a);
+        }
+
+    }
+
+  mpz_clear (a);
+  mpz_clear (q);
+  mpz_clear (got);
+}
+
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/io.c b/tests/mpz/io.c

new file mode 100644 (file)

index 0000000..d6bbbba
--- /dev/null
+++ b/tests/mpz/io.c
@@ -0,0 +1,134 @@
+/* Test conversion and I/O using mpz_out_str and mpz_inp_str.
+
+Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>            /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME  "io.tmp"
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t  op1, op2;
+  mp_size_t size;
+  int i;
+  int reps = 10000;
+  FILE *fp;
+  int base;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  size_t nread;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (op1);
+  mpz_init (op2);
+
+  fp = fopen (FILENAME, "w+");
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, size);
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (op1, op1);
+
+      mpz_urandomb (bs, rands, 16);
+      bsi = mpz_get_ui (bs);
+      base = bsi % 36 + 1;
+      if (base == 1)
+       base = 0;
+
+      rewind (fp);
+      if (mpz_out_str (fp, base, op1) == 0
+         || putc (' ', fp) == EOF
+         || fflush (fp) != 0)
+       {
+         printf ("mpz_out_str write error\n");
+         abort ();
+       }
+
+      rewind (fp);
+      nread = mpz_inp_str (op2, fp, base);
+      if (nread == 0)
+       {
+         if (ferror (fp))
+           printf ("mpz_inp_str stream read error\n");
+         else
+           printf ("mpz_inp_str data conversion error\n");
+         abort ();
+       }
+
+      if (nread != ftell(fp))
+       {
+         printf ("mpz_inp_str nread doesn't match ftell\n");
+         printf ("  nread  %lu\n", (unsigned long) nread);
+         printf ("  ftell  %ld\n", ftell(fp));
+         abort ();
+       }
+
+      if (mpz_cmp (op1, op2))
+       {
+         printf ("ERROR\n");
+         printf ("op1  = "); debug_mp (op1, -16);
+         printf ("op2  = "); debug_mp (op2, -16);
+         printf ("base = %d\n", base);
+         abort ();
+       }
+    }
+
+  fclose (fp);
+
+  unlink (FILENAME);
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/logic.c b/tests/mpz/logic.c

new file mode 100644 (file)

index 0000000..e5d3930
--- /dev/null
+++ b/tests/mpz/logic.c
@@ -0,0 +1,156 @@
+/* Test mpz_com, mpz_and, mpz_ior, and mpz_xor.
+
+Copyright 1993, 1994, 1996, 1997, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO (());
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t x, y, r1, r2;
+  mpz_t t1, t2, t3;
+  mp_size_t xsize, ysize;
+  int i;
+  int reps = 100000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (x);
+  mpz_init (y);
+  mpz_init (r1);
+  mpz_init (r2);
+  mpz_init (t1);
+  mpz_init (t2);
+  mpz_init (t3);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 8 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      xsize = mpz_get_ui (bs);
+      mpz_rrandomb (x, rands, xsize);
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (x, x);
+
+      mpz_urandomb (bs, rands, size_range);
+      ysize = mpz_get_ui (bs);
+      mpz_rrandomb (y, rands, ysize);
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (y, y);
+
+      mpz_com (r1, x);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_com (r1, r1);
+      MPZ_CHECK_FORMAT (r1);
+      if (mpz_cmp (r1, x) != 0)
+       dump_abort ();
+
+      mpz_com (r1, y);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_com (r2, r1);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r2, y) != 0)
+       dump_abort ();
+
+      mpz_com (t1, x);
+      MPZ_CHECK_FORMAT (t1);
+      mpz_com (t2, y);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_and (t3, t1, t2);
+      MPZ_CHECK_FORMAT (t3);
+      mpz_com (r1, t3);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_ior (r2, x, y);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r1, r2) != 0)
+       dump_abort ();
+
+      mpz_com (t1, x);
+      MPZ_CHECK_FORMAT (t1);
+      mpz_com (t2, y);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_ior (t3, t1, t2);
+      MPZ_CHECK_FORMAT (t3);
+      mpz_com (r1, t3);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_and (r2, x, y);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r1, r2) != 0)
+       dump_abort ();
+
+      mpz_ior (t1, x, y);
+      MPZ_CHECK_FORMAT (t1);
+      mpz_and (t2, x, y);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_com (t3, t2);
+      MPZ_CHECK_FORMAT (t3);
+      mpz_and (r1, t1, t3);
+      MPZ_CHECK_FORMAT (r1);
+      mpz_xor (r2, x, y);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r1, r2) != 0)
+       dump_abort ();
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x);
+  mpz_clear (y);
+  mpz_clear (r1);
+  mpz_clear (r2);
+  mpz_clear (t1);
+  mpz_clear (t2);
+  mpz_clear (t3);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort ()
+{
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/reuse.c b/tests/mpz/reuse.c

new file mode 100644 (file)

index 0000000..ae13fe9
--- /dev/null
+++ b/tests/mpz/reuse.c
@@ -0,0 +1,691 @@
+/* Test that routines allow reusing a source variable as destination.
+
+   Test all relevant functions except:
+       mpz_bin_ui
+       mpz_nextprime
+       mpz_mul_si
+       mpz_addmul_ui (should this really allow a+=a*c?)
+
+Copyright 1996, 1999, 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpz_add etc can't be used as
+   initializers for global variables because they're effectively global
+   variables (function pointers) themselves.  Perhaps calling a test
+   function successively with mpz_add etc would be better.  */
+
+int
+main (void)
+{
+  printf ("Test suppressed for windows DLL\n");
+  exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+void dump __GMP_PROTO ((char *, mpz_t, mpz_t, mpz_t));
+
+typedef void (*dss_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+typedef void (*dsi_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+typedef unsigned long int (*dsi_div_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+typedef unsigned long int (*ddsi_div_func) __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+typedef void (*ddss_div_func) __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+typedef void (*ds_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+
+
+void
+mpz_xinvert (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)
+{
+  int res;
+  res = mpz_invert (r, a, b);
+  if (res == 0)
+    mpz_set_ui (r, 0);
+}
+
+dss_func dss_funcs[] =
+{
+  mpz_add, mpz_sub, mpz_mul,
+  mpz_cdiv_q, mpz_cdiv_r, mpz_fdiv_q, mpz_fdiv_r, mpz_tdiv_q, mpz_tdiv_r,
+  mpz_xinvert,
+  mpz_gcd, mpz_lcm, mpz_and, mpz_ior, mpz_xor
+};
+char *dss_func_names[] =
+{
+  "mpz_add", "mpz_sub", "mpz_mul",
+  "mpz_cdiv_q", "mpz_cdiv_r", "mpz_fdiv_q", "mpz_fdiv_r", "mpz_tdiv_q", "mpz_tdiv_r",
+  "mpz_xinvert",
+  "mpz_gcd", "mpz_lcm", "mpz_and", "mpz_ior", "mpz_xor"
+};
+char dss_func_division[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
+
+dsi_func dsi_funcs[] =
+{
+  /* Don't change order here without changing the code in main(). */
+  mpz_add_ui, mpz_mul_ui, mpz_sub_ui,
+  mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
+  mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
+  mpz_tdiv_q_2exp, mpz_tdiv_r_2exp,
+  mpz_mul_2exp,
+  mpz_pow_ui
+};
+char *dsi_func_names[] =
+{
+  "mpz_add_ui", "mpz_mul_ui", "mpz_sub_ui",
+  "mpz_fdiv_q_2exp", "mpz_fdiv_r_2exp",
+  "mpz_cdiv_q_2exp", "mpz_cdiv_r_2exp",
+  "mpz_tdiv_q_2exp", "mpz_tdiv_r_2exp",
+  "mpz_mul_2exp",
+  "mpz_pow_ui"
+};
+
+dsi_div_func dsi_div_funcs[] =
+{
+  mpz_cdiv_q_ui, mpz_cdiv_r_ui,
+  mpz_fdiv_q_ui, mpz_fdiv_r_ui,
+  mpz_tdiv_q_ui, mpz_tdiv_r_ui
+};
+char *dsi_div_func_names[] =
+{
+  "mpz_cdiv_q_ui", "mpz_cdiv_r_ui",
+  "mpz_fdiv_q_ui", "mpz_fdiv_r_ui",
+  "mpz_tdiv_q_ui", "mpz_tdiv_r_ui"
+};
+
+ddsi_div_func ddsi_div_funcs[] =
+{
+  mpz_cdiv_qr_ui,
+  mpz_fdiv_qr_ui,
+  mpz_tdiv_qr_ui
+};
+char *ddsi_div_func_names[] =
+{
+  "mpz_cdiv_qr_ui",
+  "mpz_fdiv_qr_ui",
+  "mpz_tdiv_qr_ui"
+};
+
+ddss_div_func ddss_div_funcs[] =
+{
+  mpz_cdiv_qr,
+  mpz_fdiv_qr,
+  mpz_tdiv_qr
+};
+char *ddss_div_func_names[] =
+{
+  "mpz_cdiv_qr",
+  "mpz_fdiv_qr",
+  "mpz_tdiv_qr"
+};
+
+ds_func ds_funcs[] =
+{
+  mpz_abs, mpz_com, mpz_neg, mpz_sqrt
+};
+char *ds_func_names[] =
+{
+  "mpz_abs", "mpz_com", "mpz_neg", "mpz_sqrt"
+};
+
+
+/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */
+#if defined (__STDC__) || defined (__cplusplus)
+#define FAIL(class,indx,op1,op2,op3) \
+  do {                                                                 \
+  class##_funcs[indx] = 0;                                             \
+  dump (class##_func_names[indx], op1, op2, op3);                      \
+  failures++;                                                          \
+  } while (0)
+#define FAIL2(fname,op1,op2,op3) \
+  do {                                                                 \
+  dump (#fname, op1, op2, op3);                                                \
+  failures++;                                                          \
+  } while (0)
+#else
+#define FAIL(class,indx,op1,op2,op3) \
+  do {                                                                 \
+  class/**/_funcs[indx] = 0;                                           \
+  dump (class/**/_func_names[indx], op1, op2, op3);                    \
+  failures++;                                                          \
+  } while (0)
+#define FAIL2(fname,op1,op2,op3) \
+  do {                                                                 \
+  dump ("fname", op1, op2, op3);                                       \
+  failures++;                                                          \
+  } while (0)
+#endif
+
+
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int pass, reps = 100;
+  mpz_t in1, in2, in3;
+  unsigned long int in2i;
+  mp_size_t size;
+  mpz_t res1, res2, res3;
+  mpz_t ref1, ref2, ref3;
+  mpz_t t;
+  unsigned long int r1, r2;
+  long failures = 0;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (in1);
+  mpz_init (in2);
+  mpz_init (in3);
+  mpz_init (ref1);
+  mpz_init (ref2);
+  mpz_init (ref3);
+  mpz_init (res1);
+  mpz_init (res2);
+  mpz_init (res3);
+  mpz_init (t);
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (in1, rands, size);
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (in2, rands, size);
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (in3, rands, size);
+
+      mpz_urandomb (bs, rands, 3);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (in1, in1);
+      if ((bsi & 1) != 0)
+       mpz_neg (in2, in2);
+      if ((bsi & 1) != 0)
+       mpz_neg (in3, in3);
+
+      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+       {
+         if (dss_funcs[i] == 0)
+           continue;
+         if (dss_func_division[i] && mpz_sgn (in2) == 0)
+           continue;
+
+         (dss_funcs[i]) (ref1, in1, in2);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         (dss_funcs[i]) (res1, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (dss, i, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         (dss_funcs[i]) (res1, in1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (dss, i, in1, in2, NULL);
+       }
+
+      for (i = 0; i < sizeof (ddss_div_funcs) / sizeof (ddss_div_func); i++)
+       {
+         if (ddss_div_funcs[i] == 0)
+           continue;
+         if (mpz_sgn (in2) == 0)
+           continue;
+
+         (ddss_div_funcs[i]) (ref1, ref2, in1, in2);
+         MPZ_CHECK_FORMAT (ref1);
+         MPZ_CHECK_FORMAT (ref2);
+
+         mpz_set (res1, in1);
+         (ddss_div_funcs[i]) (res1, res2, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         (ddss_div_funcs[i]) (res1, res2, res2, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         (ddss_div_funcs[i]) (res1, res2, in1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+
+         mpz_set (res2, in2);
+         (ddss_div_funcs[i]) (res1, res2, in1, res2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+       }
+
+      for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+       {
+         if (ds_funcs[i] == 0)
+           continue;
+         if (strcmp (ds_func_names[i], "mpz_sqrt") == 0
+             && mpz_sgn (in1) < 0)
+           continue;
+
+         (ds_funcs[i]) (ref1, in1);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         (ds_funcs[i]) (res1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (ds, i, in1, in2, NULL);
+       }
+
+      in2i = mpz_get_ui (in2);
+
+      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+       {
+         if (dsi_funcs[i] == 0)
+           continue;
+         if (strcmp (dsi_func_names[i], "mpz_fdiv_q_2exp") == 0)
+           /* Limit exponent to something reasonable for the division
+              functions.  Without this, we'd  normally shift things off
+              the end and just generate the trivial values 1, 0, -1.  */
+           in2i %= 0x1000;
+         if (strcmp (dsi_func_names[i], "mpz_mul_2exp") == 0)
+           /* Limit exponent more for mpz_mul_2exp to save time.  */
+           in2i %= 0x100;
+         if (strcmp (dsi_func_names[i], "mpz_pow_ui") == 0)
+           /* Limit exponent yet more for mpz_pow_ui to save time.  */
+           in2i %= 0x10;
+
+         (dsi_funcs[i]) (ref1, in1, in2i);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         (dsi_funcs[i]) (res1, res1, in2i);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (dsi, i, in1, in2, NULL);
+       }
+
+      if (in2i != 0)     /* Don't divide by 0.  */
+       {
+         for (i = 0; i < sizeof (dsi_div_funcs) / sizeof (dsi_div_funcs); i++)
+           {
+             r1 = (dsi_div_funcs[i]) (ref1, in1, in2i);
+             MPZ_CHECK_FORMAT (ref1);
+
+             mpz_set (res1, in1);
+             r2 = (dsi_div_funcs[i]) (res1, res1, in2i);
+             MPZ_CHECK_FORMAT (res1);
+             if (mpz_cmp (ref1, res1) != 0 || r1 != r2)
+               FAIL (dsi_div, i, in1, in2, NULL);
+           }
+
+         for (i = 0; i < sizeof (ddsi_div_funcs) / sizeof (ddsi_div_funcs); i++)
+           {
+             r1 = (ddsi_div_funcs[i]) (ref1, ref2, in1, in2i);
+             MPZ_CHECK_FORMAT (ref1);
+
+             mpz_set (res1, in1);
+             r2 = (ddsi_div_funcs[i]) (res1, res2, res1, in2i);
+             MPZ_CHECK_FORMAT (res1);
+             if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+               FAIL (ddsi_div, i, in1, in2, NULL);
+
+             mpz_set (res2, in1);
+             (ddsi_div_funcs[i]) (res1, res2, res2, in2i);
+             MPZ_CHECK_FORMAT (res1);
+             if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+               FAIL (ddsi_div, i, in1, in2, NULL);
+           }
+       }
+
+      if (mpz_sgn (in1) >= 0)
+       {
+         mpz_sqrtrem (ref1, ref2, in1);
+         MPZ_CHECK_FORMAT (ref1);
+         MPZ_CHECK_FORMAT (ref2);
+
+         mpz_set (res1, in1);
+         mpz_sqrtrem (res1, res2, res1);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+         mpz_set (res2, in1);
+         mpz_sqrtrem (res1, res2, res2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+       }
+
+      if (mpz_sgn (in1) >= 0)
+       {
+         mpz_root (ref1, in1, in2i % 0x1000 + 1);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         mpz_root (res1, res1, in2i % 0x1000 + 1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_root, in1, in2, NULL);
+       }
+
+      if (mpz_sgn (in1) >= 0)
+       {
+         mpz_rootrem (ref1, ref2, in1, in2i % 0x1000 + 1);
+         MPZ_CHECK_FORMAT (ref1);
+         MPZ_CHECK_FORMAT (ref2);
+
+         mpz_set (res1, in1);
+         mpz_rootrem (res1, res2, res1, in2i % 0x1000 + 1);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_rootrem, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         mpz_rootrem (res1, res2, res2, in2i % 0x1000 + 1);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_rootrem, in1, in2, NULL);
+       }
+
+      if (pass < reps / 2)     /* run fewer tests since gcdext lots of time */
+       {
+         mpz_gcdext (ref1, ref2, ref3, in1, in2);
+         MPZ_CHECK_FORMAT (ref1);
+         MPZ_CHECK_FORMAT (ref2);
+         MPZ_CHECK_FORMAT (ref3);
+
+         mpz_set (res1, in1);
+         mpz_gcdext (res1, res2, res3, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         MPZ_CHECK_FORMAT (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         mpz_gcdext (res1, res2, res3, res2, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         MPZ_CHECK_FORMAT (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res3, in1);
+         mpz_gcdext (res1, res2, res3, res3, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         MPZ_CHECK_FORMAT (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_gcdext (res1, res2, res3, in1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         MPZ_CHECK_FORMAT (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in2);
+         mpz_gcdext (res1, res2, res3, in1, res2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         MPZ_CHECK_FORMAT (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res3, in2);
+         mpz_gcdext (res1, res2, res3, in1, res3);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         MPZ_CHECK_FORMAT (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res1, in1);
+         mpz_gcdext (res1, res2, NULL, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         mpz_gcdext (res1, res2, NULL, res2, in2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_gcdext (res1, res2, NULL, in1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in2);
+         mpz_gcdext (res1, res2, NULL, in1, res2);
+         MPZ_CHECK_FORMAT (res1);
+         MPZ_CHECK_FORMAT (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+       }
+
+      /* Don't run mpz_powm for huge exponents or when undefined.  */
+      if (mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
+         && (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))
+       {
+         mpz_powm (ref1, in1, in2, in3);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         mpz_powm (res1, res1, in2, in3);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm, in1, in2, in3);
+
+         mpz_set (res1, in2);
+         mpz_powm (res1, in1, res1, in3);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm, in1, in2, in3);
+
+         mpz_set (res1, in3);
+         mpz_powm (res1, in1, in2, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm, in1, in2, in3);
+       }
+
+      /* Don't run mpz_powm_ui when undefined.  */
+      if (mpz_sgn (in3) != 0)
+       {
+         mpz_powm_ui (ref1, in1, in2i, in3);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         mpz_powm_ui (res1, res1, in2i, in3);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm_ui, in1, in2, in3);
+
+         mpz_set (res1, in3);
+         mpz_powm_ui (res1, in1, in2i, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm_ui, in1, in2, in3);
+       }
+
+      {
+       r1 = mpz_gcd_ui (ref1, in1, in2i);
+       MPZ_CHECK_FORMAT (ref1);
+
+       mpz_set (res1, in1);
+       r2 = mpz_gcd_ui (res1, res1, in2i);
+       MPZ_CHECK_FORMAT (res1);
+       if (mpz_cmp (ref1, res1) != 0)
+         FAIL2 (mpz_gcd_ui, in1, in2, NULL);
+      }
+
+      if (mpz_cmp_ui (in2, 1L) > 0 && mpz_sgn (in1) != 0)
+       {
+         /* Test mpz_remove */
+         mpz_remove (ref1, in1, in2);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, in1);
+         mpz_remove (res1, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_remove, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_remove (res1, in1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_remove, in1, in2, NULL);
+       }
+
+      if (mpz_sgn (in2) != 0)
+       {
+         /* Test mpz_divexact */
+         mpz_mul (t, in1, in2);
+         mpz_divexact (ref1, t, in2);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, t);
+         mpz_divexact (res1, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact, t, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_divexact (res1, t, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact, t, in2, NULL);
+       }
+
+      if (mpz_sgn (in2) > 0)
+       {
+         /* Test mpz_divexact_gcd, same as mpz_divexact */
+         mpz_mul (t, in1, in2);
+         mpz_divexact_gcd (ref1, t, in2);
+         MPZ_CHECK_FORMAT (ref1);
+
+         mpz_set (res1, t);
+         mpz_divexact_gcd (res1, res1, in2);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_divexact_gcd (res1, t, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+       }
+    }
+
+  if (failures != 0)
+    {
+      fprintf (stderr, "mpz/reuse: %ld error%s\n", failures, "s" + (failures == 1));
+      exit (1);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (in1);
+  mpz_clear (in2);
+  mpz_clear (in3);
+  mpz_clear (ref1);
+  mpz_clear (ref2);
+  mpz_clear (ref3);
+  mpz_clear (res1);
+  mpz_clear (res2);
+  mpz_clear (res3);
+  mpz_clear (t);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump (char *name, mpz_t in1, mpz_t in2, mpz_t in3)
+{
+  printf ("failure in %s (", name);
+  mpz_out_str (stdout, -16, in1);
+  if (in2 != NULL)
+    {
+      printf (" ");
+      mpz_out_str (stdout, -16, in2);
+    }
+  if (in3 != NULL)
+    {
+      printf (" ");
+      mpz_out_str (stdout, -16, in3);
+    }
+  printf (")\n");
+}
+
+#endif /* ! DLL_EXPORT */
diff --git a/tests/mpz/t-addsub.c b/tests/mpz/t-addsub.c

new file mode 100644 (file)

index 0000000..815a839
--- /dev/null
+++ b/tests/mpz/t-addsub.c
@@ -0,0 +1,122 @@
+/* Test mpz_add, mpz_sub, mpz_add_ui, mpz_sub_ui, and mpz_ui_sub.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, r1, r2;
+  mp_size_t op1n, op2n;
+  unsigned long int op2long;
+  int i;
+  int reps = 100000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (r1);
+  mpz_init (r2);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      op1n = mpz_get_ui (bs);
+      mpz_rrandomb (op1, rands, op1n);
+
+      mpz_urandomb (bs, rands, size_range);
+      op2n = mpz_get_ui (bs);
+      mpz_rrandomb (op2, rands, op2n);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (op1, op1);
+      if ((bsi & 2) != 0)
+       mpz_neg (op2, op2);
+
+      /* printf ("%ld %ld\n", SIZ (multiplier), SIZ (multiplicand)); */
+
+      mpz_add (r1, op1, op2);
+      mpz_sub (r2, r1, op2);
+      if (mpz_cmp (r2, op1) != 0)
+       dump_abort (i, "mpz_add or mpz_sub incorrect", op1, op2);
+
+      if (mpz_fits_ulong_p (op2))
+       {
+         op2long = mpz_get_ui (op2);
+         mpz_add_ui (r1, op1, op2long);
+         mpz_sub_ui (r2, r1, op2long);
+         if (mpz_cmp (r2, op1) != 0)
+           dump_abort (i, "mpz_add_ui or mpz_sub_ui incorrect", op1, op2);
+
+         mpz_ui_sub (r1, op2long, op1);
+         mpz_sub_ui (r2, op1, op2long);
+         mpz_neg (r2, r2);
+         if (mpz_cmp (r1, r2) != 0)
+           dump_abort (i, "mpz_add_ui or mpz_ui_sub incorrect", op1, op2);
+       }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (r1);
+  mpz_clear (r2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (int i, char *s, mpz_t op1, mpz_t op2)
+{
+  fprintf (stderr, "ERROR: %s in test %d\n", s, i);
+  fprintf (stderr, "op1 = "); debug_mp (op1, -16);
+  fprintf (stderr, "op2 = "); debug_mp (op2, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-aorsmul.c b/tests/mpz/t-aorsmul.c

new file mode 100644 (file)

index 0000000..ecec5d5
--- /dev/null
+++ b/tests/mpz/t-aorsmul.c
@@ -0,0 +1,422 @@
+/* Test mpz_addmul, mpz_addmul_ui, mpz_submul, mpz_submul_ui.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define M GMP_NUMB_MAX
+
+
+void
+check_one_inplace (mpz_srcptr w, mpz_srcptr y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul (want, w, y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul (got, got, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul inplace fail\n");
+    fail:
+      mpz_trace ("w", w);
+      mpz_trace ("y", y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul (want, w, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul (got, got, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul inplace fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_one_ui_inplace (mpz_ptr w, unsigned long y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul_ui (want, w, (unsigned long) y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul_ui (got, got, (unsigned long) y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul_ui fail\n");
+    fail:
+      mpz_trace ("w", w);
+      printf    ("y=0x%lX   %lu\n", y, y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul_ui (want, w, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul_ui (got, got, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul_ui fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_all_inplace (mpz_ptr w, mpz_ptr y)
+{
+  int  wneg, yneg;
+
+  MPZ_CHECK_FORMAT (w);
+  MPZ_CHECK_FORMAT (y);
+
+  for (wneg = 0; wneg < 2; wneg++)
+    {
+      for (yneg = 0; yneg < 2; yneg++)
+        {
+          check_one_inplace (w, y);
+
+          if (mpz_fits_ulong_p (y))
+            check_one_ui_inplace (w, mpz_get_ui (y));
+
+          mpz_neg (y, y);
+        }
+      mpz_neg (w, w);
+    }
+}
+
+void
+check_one (mpz_srcptr w, mpz_srcptr x, mpz_srcptr y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul (want, x, y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul (got, x, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul fail\n");
+    fail:
+      mpz_trace ("w", w);
+      mpz_trace ("x", x);
+      mpz_trace ("y", y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul (want, x, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul (got, x, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+void
+check_one_ui (mpz_ptr w, mpz_ptr x, unsigned long y)
+{
+  mpz_t  want, got;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  mpz_mul_ui (want, x, (unsigned long) y);
+  mpz_add (want, w, want);
+  mpz_set (got, w);
+  mpz_addmul_ui (got, x, (unsigned long) y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_addmul_ui fail\n");
+    fail:
+      mpz_trace ("w", w);
+      mpz_trace ("x", x);
+      printf    ("y=0x%lX   %lu\n", y, y);
+      mpz_trace ("want", want);
+      mpz_trace ("got ", got);
+      abort ();
+    }
+
+  mpz_mul_ui (want, x, y);
+  mpz_sub (want, w, want);
+  mpz_set (got, w);
+  mpz_submul_ui (got, x, y);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (want, got) != 0)
+    {
+      printf ("mpz_submul_ui fail\n");
+      goto fail;
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+
+void
+check_all (mpz_ptr w, mpz_ptr x, mpz_ptr y)
+{
+  int    swap, wneg, xneg, yneg;
+
+  MPZ_CHECK_FORMAT (w);
+  MPZ_CHECK_FORMAT (x);
+  MPZ_CHECK_FORMAT (y);
+
+  for (swap = 0; swap < 2; swap++)
+    {
+      for (wneg = 0; wneg < 2; wneg++)
+        {
+          for (xneg = 0; xneg < 2; xneg++)
+            {
+              for (yneg = 0; yneg < 2; yneg++)
+                {
+                  check_one (w, x, y);
+
+                  if (mpz_fits_ulong_p (y))
+                    check_one_ui (w, x, mpz_get_ui (y));
+
+                  mpz_neg (y, y);
+                }
+              mpz_neg (x, x);
+            }
+          mpz_neg (w, w);
+        }
+      mpz_swap (x, y);
+    }
+}
+
+void
+check_data_inplace_ui (void)
+{
+  static const struct {
+    mp_limb_t      w[6];
+    unsigned long  y;
+
+  } data[] = {
+
+    { { 0 }, 0 },
+    { { 0 }, 1 },
+    { { 1 }, 1 },
+    { { 2 }, 1 },
+
+    { { 123 }, 1 },
+    { { 123 }, ULONG_MAX },
+    { { M }, 1 },
+    { { M }, ULONG_MAX },
+
+    { { 123, 456 }, 1 },
+    { { M, M }, 1 },
+    { { 123, 456 }, ULONG_MAX },
+    { { M, M }, ULONG_MAX },
+
+    { { 123, 456, 789 }, 1 },
+    { { M, M, M }, 1 },
+    { { 123, 456, 789 }, ULONG_MAX },
+    { { M, M, M }, ULONG_MAX },
+  };
+
+  mpz_t  w, y;
+  int    i;
+
+  mpz_init (w);
+  mpz_init (y);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_n (w, data[i].w, (mp_size_t) numberof(data[i].w));
+      mpz_set_ui (y, data[i].y);
+      check_all_inplace (w, y);
+    }
+
+  mpz_clear (w);
+  mpz_clear (y);
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    mp_limb_t  w[6];
+    mp_limb_t  x[6];
+    mp_limb_t  y[6];
+
+  } data[] = {
+
+    /* reducing to zero */
+    { { 1 }, { 1 }, { 1 } },
+    { { 2 }, { 1 }, { 2 } },
+    { { 0,1 }, { 0,1 }, { 1 } },
+
+    /* reducing to 1 */
+    { { 0,1 },       { M },       { 1 } },
+    { { 0,0,1 },     { M,M },     { 1 } },
+    { { 0,0,0,1 },   { M,M,M },   { 1 } },
+    { { 0,0,0,0,1 }, { M,M,M,M }, { 1 } },
+
+    /* reducing to -1 */
+    { { M },       { 0,1 },       { 1 } },
+    { { M,M },     { 0,0,1 },     { 1 } },
+    { { M,M,M },   { 0,0,0,1 },   { 1 } },
+    { { M,M,M,M }, { 0,0,0,0,1 }, { 1 } },
+
+    /* carry out of addmul */
+    { { M },     { 1 }, { 1 } },
+    { { M,M },   { 1 }, { 1 } },
+    { { M,M,M }, { 1 }, { 1 } },
+
+    /* borrow from submul */
+    { { 0,1 },     { 1 }, { 1 } },
+    { { 0,0,1 },   { 1 }, { 1 } },
+    { { 0,0,0,1 }, { 1 }, { 1 } },
+
+    /* borrow from submul */
+    { { 0,0,1 },     { 0,1 }, { 1 } },
+    { { 0,0,0,1 },   { 0,1 }, { 1 } },
+    { { 0,0,0,0,1 }, { 0,1 }, { 1 } },
+
+    /* more borrow from submul */
+    { { M }, { 0,1 },       { 1 } },
+    { { M }, { 0,0,1 },     { 1 } },
+    { { M }, { 0,0,0,1 },   { 1 } },
+    { { M }, { 0,0,0,0,1 }, { 1 } },
+
+    /* big borrow from submul */
+    { { 0,0,1 },     { M,M }, { M } },
+    { { 0,0,0,1 },   { M,M }, { M } },
+    { { 0,0,0,0,1 }, { M,M }, { M } },
+
+    /* small w */
+    { { 0,1 }, { M,M },       { M } },
+    { { 0,1 }, { M,M,M },     { M } },
+    { { 0,1 }, { M,M,M,M },   { M } },
+    { { 0,1 }, { M,M,M,M,M }, { M } },
+  };
+
+  mpz_t  w, x, y;
+  int    i;
+
+  mpz_init (w);
+  mpz_init (x);
+  mpz_init (y);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_n (w, data[i].w, (mp_size_t) numberof(data[i].w));
+      mpz_set_n (x, data[i].x, (mp_size_t) numberof(data[i].x));
+      mpz_set_n (y, data[i].y, (mp_size_t) numberof(data[i].y));
+      check_all (w, x, y);
+    }
+
+  mpz_clear (w);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t  w, x, y;
+  int    i, reps = 2000;
+
+  mpz_init (w);
+  mpz_init (x);
+  mpz_init (y);
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_errandomb (w, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (x, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (y, rands, 5*GMP_LIMB_BITS);
+      check_all (w, x, y);
+      check_all_inplace (w, y);
+
+      mpz_errandomb (w, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (x, rands, 5*GMP_LIMB_BITS);
+      mpz_errandomb (y, rands, BITS_PER_ULONG);
+      check_all (w, x, y);
+      check_all_inplace (w, y);
+    }
+
+  mpz_clear (w);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_data ();
+  check_data_inplace_ui ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-bin.c b/tests/mpz/t-bin.c

new file mode 100644 (file)

index 0000000..6e9a34b
--- /dev/null
+++ b/tests/mpz/t-bin.c
@@ -0,0 +1,218 @@
+/* Exercise mpz_bin_ui and mpz_bin_uiui.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+try_mpz_bin_ui (mpz_srcptr want, mpz_srcptr n, unsigned long k)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+  mpz_bin_ui (got, n, k);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_bin_ui wrong\n");
+      printf ("  n="); mpz_out_str (stdout, 10, n); printf ("\n");
+      printf ("  k=%lu\n", k);
+      printf ("  got="); mpz_out_str (stdout, 10, got); printf ("\n");
+      printf ("  want="); mpz_out_str (stdout, 10, want); printf ("\n");
+      abort();
+    }
+  mpz_clear (got);
+}
+
+
+void
+try_mpz_bin_uiui (mpz_srcptr want, unsigned long n, unsigned long k)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+  mpz_bin_uiui (got, n, k);
+  MPZ_CHECK_FORMAT (got);
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_bin_uiui wrong\n");
+      printf ("  n=%lu\n", n);
+      printf ("  k=%lu\n", k);
+      printf ("  got="); mpz_out_str (stdout, 10, got); printf ("\n");
+      printf ("  want="); mpz_out_str (stdout, 10, want); printf ("\n");
+      abort();
+    }
+  mpz_clear (got);
+}
+
+
+void
+samples (void)
+{
+  static const struct {
+    const char     *n;
+    unsigned long  k;
+    const char     *want;
+  } data[] = {
+
+    {   "0",  0, "1"   },
+    {   "0",  1, "0"   },
+    {   "0",  2, "0"   },
+    {   "0",  3, "0"   },
+    {   "0",  4, "0"   },
+    {   "0", 123456, "0" },
+
+    {   "1",  0, "1"   },
+    {   "1",  1, "1"   },
+    {   "1",  2, "0"   },
+    {   "1",  3, "0"   },
+    {   "1",  4, "0"   },
+    {   "1", 123456, "0" },
+
+    {   "2",  0, "1"   },
+    {   "2",  1, "2"   },
+    {   "2",  2, "1"   },
+    {   "2",  3, "0"   },
+    {   "2",  4, "0"   },
+    {   "2", 123456, "0" },
+
+    {   "3",  0, "1"   },
+    {   "3",  1, "3"   },
+    {   "3",  2, "3"   },
+    {   "3",  3, "1"   },
+    {   "3",  4, "0"   },
+    {   "3",  5, "0"   },
+    {   "3", 123456, "0" },
+
+    {   "4",  0, "1"   },
+    {   "4",  1, "4"   },
+    {   "4",  2, "6"   },
+    {   "4",  3, "4"   },
+    {   "4",  4, "1"   },
+    {   "4",  5, "0"   },
+    {   "4",  6, "0"   },
+    {   "4", 123456, "0" },
+
+    {   "10",  0, "1"   },
+    {   "10",  1, "10"  },
+    {   "10",  2, "45"  },
+    {   "10",  3, "120" },
+    {   "10",  4, "210" },
+    {   "10",  5, "252" },
+    {   "10",  6, "210" },
+    {   "10",  7, "120" },
+    {   "10",  8, "45"  },
+    {   "10",  9, "10"  },
+    {   "10", 10, "1"   },
+    {   "10", 11,     "0" },
+    {   "10", 12,     "0" },
+    {   "10", 123456, "0" },
+
+    /* negatives, using bin(-n,k)=bin(n+k-1,k) */
+    {   "-1",  0,  "1"  },
+    {   "-1",  1, "-1"  },
+    {   "-1",  2,  "1"  },
+    {   "-1",  3, "-1"  },
+    {   "-1",  4,  "1"  },
+
+    {   "-2",  0,  "1"  },
+    {   "-2",  1, "-2"  },
+    {   "-2",  2,  "3"  },
+    {   "-2",  3, "-4"  },
+    {   "-2",  4,  "5"  },
+    {   "-2",  5, "-6"  },
+    {   "-2",  6,  "7"  },
+
+    {   "-3",  0,   "1"  },
+    {   "-3",  1,  "-3"  },
+    {   "-3",  2,   "6"  },
+    {   "-3",  3, "-10"  },
+    {   "-3",  4,  "15"  },
+    {   "-3",  5, "-21"  },
+    {   "-3",  6,  "28"  },
+
+    {   "40", 20,  "137846528820" },
+    {   "60", 30,  "118264581564861424" },
+  };
+
+  mpz_t  n, want;
+  int    i;
+
+  mpz_init (n);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+      mpz_set_str_or_abort (want, data[i].want, 0);
+
+      try_mpz_bin_ui (want, n, data[i].k);
+
+      if (mpz_fits_ulong_p (n))
+       try_mpz_bin_uiui (want, mpz_get_ui (n), data[i].k);
+    }
+
+  mpz_clear (n);
+  mpz_clear (want);
+}
+
+
+/* Test some bin(2k,k) cases.  This produces some biggish numbers to
+   exercise the limb accumulating code.  */
+void
+twos (void)
+{
+  mpz_t          n, want;
+  unsigned long  k;
+
+  mpz_init (n);
+  mpz_init (want);
+
+  mpz_set_ui (want, (unsigned long) 2);
+  for (k = 1; k < 200; k++)
+    {
+      mpz_set_ui (n, 2*k);
+      try_mpz_bin_ui (want, n, k);
+
+      try_mpz_bin_uiui (want, 2*k, k);
+
+      mpz_mul_ui (want, want, 2*(2*k+1));
+      mpz_fdiv_q_ui (want, want, k+1);
+    }
+
+  mpz_clear (n);
+  mpz_clear (want);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  samples ();
+  twos ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-cdiv_ui.c b/tests/mpz/t-cdiv_ui.c

new file mode 100644 (file)

index 0000000..1f6be7f
--- /dev/null
+++ b/tests/mpz/t-cdiv_ui.c
@@ -0,0 +1,159 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_cdiv_qr_ui, mpz_cdiv_q_ui,
+   mpz_cdiv_r_ui, , mpz_cdiv_ui, mpz_mul_ui.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size;
+  unsigned long divisor;
+  int i;
+  int reps = 10000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned long r_rq, r_q, r_r, r;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */
+
+      do
+       {
+         mpz_rrandomb (bs, rands, 64);
+         divisor = mpz_get_ui (bs);
+       }
+      while (divisor == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs);
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (dividend, dividend);
+
+      /* printf ("%ld\n", SIZ (dividend)); */
+
+      r_rq = mpz_cdiv_qr_ui (quotient, remainder, dividend, divisor);
+      r_q = mpz_cdiv_q_ui (quotient2, dividend, divisor);
+      r_r = mpz_cdiv_r_ui (remainder2, dividend, divisor);
+      r = mpz_cdiv_ui (dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+        with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+       dump_abort ("quotients from mpz_cdiv_qr_ui and mpz_cdiv_q_ui differ",
+                   dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+       dump_abort ("remainders from mpz_cdiv_qr_ui and mpz_cdiv_r_ui differ",
+                   dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+       if ((mpz_cmp_ui (quotient, 0) < 0)
+           != (mpz_cmp_ui (dividend, 0) < 0))
+       dump_abort ("quotient sign wrong", dividend, divisor);
+
+      /* Check if the remainder has the opposite sign as the (positive) divisor
+        (quotient rounded towards minus infinity).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+       if (mpz_cmp_ui (remainder, 0) > 0)
+         dump_abort ("remainder sign wrong", dividend, divisor);
+
+      mpz_mul_ui (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+       dump_abort ("n mod d != n - [n/d]*d", dividend, divisor);
+
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp_ui (remainder, divisor) >= 0)
+       dump_abort ("remainder greater than divisor", dividend, divisor);
+
+      if (mpz_cmp_ui (remainder, r_rq) != 0)
+       dump_abort ("remainder returned from mpz_cdiv_qr_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_q) != 0)
+       dump_abort ("remainder returned from mpz_cdiv_q_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_r) != 0)
+       dump_abort ("remainder returned from mpz_cdiv_r_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r) != 0)
+       dump_abort ("remainder returned from mpz_cdiv_ui is wrong",
+                   dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+{
+  fprintf (stderr, "ERROR: %s\n", str);
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = %lX\n", divisor);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-cmp.c b/tests/mpz/t-cmp.c

new file mode 100644 (file)

index 0000000..40c54a1
--- /dev/null
+++ b/tests/mpz/t-cmp.c
@@ -0,0 +1,182 @@
+/* Test mpz_cmp and mpz_cmpabs.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Nothing sophisticated here, just exercise some combinations of sizes and
+   signs.  */
+
+
+void
+check_one (mpz_ptr x, mpz_ptr y, int want_cmp, int want_cmpabs)
+{
+  int  got;
+
+  got = mpz_cmp (x, y);
+  if ((   got <  0) != (want_cmp <  0)
+      || (got == 0) != (want_cmp == 0)
+      || (got >  0) != (want_cmp >  0))
+    {
+      printf ("mpz_cmp got %d want %d\n", got, want_cmp);
+      mpz_trace ("x", x);
+      mpz_trace ("y", y);
+      abort ();
+    }
+
+  got = mpz_cmpabs (x, y);
+  if ((   got <  0) != (want_cmpabs <  0)
+      || (got == 0) != (want_cmpabs == 0)
+      || (got >  0) != (want_cmpabs >  0))
+    {
+      printf ("mpz_cmpabs got %d want %d\n", got, want_cmpabs);
+      mpz_trace ("x", x);
+      mpz_trace ("y", y);
+      abort ();
+    }
+}
+
+
+void
+check_all (mpz_ptr x, mpz_ptr y, int want_cmp, int want_cmpabs)
+{
+  check_one (x, y,  want_cmp,  want_cmpabs);
+  check_one (y, x, -want_cmp, -want_cmpabs);
+
+  mpz_neg (x, x);
+  mpz_neg (y, y);
+  want_cmp = -want_cmp;
+
+  check_one (x, y,  want_cmp,  want_cmpabs);
+  check_one (y, x, -want_cmp, -want_cmpabs);
+}
+
+
+#define SET1(z,size, n) \
+  SIZ(z) = size; PTR(z)[0] = n
+
+#define SET2(z,size, n1,n0) \
+  SIZ(z) = size; PTR(z)[1] = n1; PTR(z)[0] = n0
+
+#define SET4(z,size, n3,n2,n1,n0) \
+  SIZ(z) = size; PTR(z)[3] = n3; PTR(z)[2] = n2; PTR(z)[1] = n1; PTR(z)[0] = n0
+
+void
+check_various (void)
+{
+  mpz_t  x, y;
+
+  mpz_init (x);
+  mpz_init (y);
+
+  mpz_realloc (x, (mp_size_t) 20);
+  mpz_realloc (y, (mp_size_t) 20);
+
+  /* 0 cmp 0, junk in low limbs */
+  SET1 (x,0, 123);
+  SET1 (y,0, 456);
+  check_all (x, y, 0, 0);
+
+
+  /* 123 cmp 0 */
+  SET1 (x,1, 123);
+  SET1 (y,0, 456);
+  check_all (x, y, 1, 1);
+
+  /* 123:456 cmp 0 */
+  SET2 (x,2, 456,123);
+  SET1 (y,0, 9999);
+  check_all (x, y, 1, 1);
+
+
+  /* 123 cmp 123 */
+  SET1(x,1, 123);
+  SET1(y,1, 123);
+  check_all (x, y, 0, 0);
+
+  /* -123 cmp 123 */
+  SET1(x,-1, 123);
+  SET1(y,1,  123);
+  check_all (x, y, -1, 0);
+
+
+  /* 123 cmp 456 */
+  SET1(x,1, 123);
+  SET1(y,1, 456);
+  check_all (x, y, -1, -1);
+
+  /* -123 cmp 456 */
+  SET1(x,-1, 123);
+  SET1(y,1,  456);
+  check_all (x, y, -1, -1);
+
+  /* 123 cmp -456 */
+  SET1(x,1,  123);
+  SET1(y,-1, 456);
+  check_all (x, y, 1, -1);
+
+
+  /* 1:0 cmp 1:0 */
+  SET2 (x,2, 1,0);
+  SET2 (y,2, 1,0);
+  check_all (x, y, 0, 0);
+
+  /* -1:0 cmp 1:0 */
+  SET2 (x,-2, 1,0);
+  SET2 (y,2,  1,0);
+  check_all (x, y, -1, 0);
+
+
+  /* 2:0 cmp 1:0 */
+  SET2 (x,2, 2,0);
+  SET2 (y,2, 1,0);
+  check_all (x, y, 1, 1);
+
+
+  /* 4:3:2:1 cmp 2:1 */
+  SET4 (x,4, 4,3,2,1);
+  SET2 (y,2, 2,1);
+  check_all (x, y, 1, 1);
+
+  /* -4:3:2:1 cmp 2:1 */
+  SET4 (x,-4, 4,3,2,1);
+  SET2 (y,2,  2,1);
+  check_all (x, y, -1, 1);
+
+
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-cmp_d.c b/tests/mpz/t-cmp_d.c

new file mode 100644 (file)

index 0000000..9cefd20
--- /dev/null
+++ b/tests/mpz/t-cmp_d.c
@@ -0,0 +1,290 @@
+/* Test mpz_cmp_d and mpz_cmpabs_d.
+
+Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* FIXME: Not sure if the tests here are exhaustive.  Ought to try to get
+   each possible exit from mpz_cmp_d (and mpz_cmpabs_d) exercised.  */
+
+
+#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)
+
+
+void
+check_one (const char *name, mpz_srcptr x, double y, int cmp, int cmpabs)
+{
+  int   got;
+
+  got = mpz_cmp_d (x, y);
+  if (SGN(got) != cmp)
+    {
+      int i;
+      printf    ("mpz_cmp_d wrong (from %s)\n", name);
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmp);
+    fail:
+      mpz_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      mp_trace_base=-16;
+      mpz_trace ("  x", x);
+      printf    ("  y %g\n", y);
+      printf    ("  y");
+      for (i = 0; i < sizeof(y); i++)
+        printf (" %02X", (unsigned) ((unsigned char *) &y)[i]);
+      printf ("\n");
+      abort ();
+    }
+
+  got = mpz_cmpabs_d (x, y);
+  if (SGN(got) != cmpabs)
+    {
+      printf    ("mpz_cmpabs_d wrong\n");
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmpabs);
+      goto fail;
+    }
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *x;
+    double      y;
+    int         cmp, cmpabs;
+
+  } data[] = {
+
+    {  "0",  0.0,  0,  0 },
+
+    {  "1",  0.0,  1,  1 },
+    { "-1",  0.0, -1,  1 },
+
+    {  "0",  1.0, -1, -1 },
+    {  "0", -1.0,  1, -1 },
+
+    {  "0x1000000000000000000000000000000000000000000000000", 0.0,  1, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000", 0.0, -1, 1 },
+
+    {  "0",  1e100, -1, -1 },
+    {  "0", -1e100,  1, -1 },
+
+    {  "2",  1.5,   1,  1 },
+    {  "2", -1.5,   1,  1 },
+    { "-2",  1.5,  -1,  1 },
+    { "-2", -1.5,  -1,  1 },
+  };
+
+  mpz_t  x;
+  int    i;
+
+  mpz_init (x);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (x, data[i].x, 0);
+      check_one ("check_data", x, data[i].y, data[i].cmp, data[i].cmpabs);
+    }
+
+  mpz_clear (x);
+}
+
+
+/* Equality of integers with up to 53 bits */
+void
+check_onebits (void)
+{
+  mpz_t   x, x2;
+  double  y;
+  int     i;
+
+  mpz_init_set_ui (x, 0L);
+  mpz_init (x2);
+
+  for (i = 0; i < 512; i++)
+    {
+      mpz_mul_2exp (x, x, 1);
+      mpz_add_ui (x, x, 1L);
+
+      y = mpz_get_d (x);
+      mpz_set_d (x2, y);
+
+      /* stop if any truncation is occurring */
+      if (mpz_cmp (x, x2) != 0)
+        break;
+
+      check_one ("check_onebits", x, y, 0, 0);
+      check_one ("check_onebits", x, -y, 1, 0);
+      mpz_neg (x, x);
+      check_one ("check_onebits", x, y, -1, 0);
+      check_one ("check_onebits", x, -y, 0, 0);
+      mpz_neg (x, x);
+    }
+
+  mpz_clear (x);
+  mpz_clear (x2);
+}
+
+
+/* With the mpz differing by 1, in a limb position possibly below the double */
+void
+check_low_z_one (void)
+{
+  mpz_t          x;
+  double         y;
+  unsigned long  i;
+
+  mpz_init (x);
+
+  /* FIXME: It'd be better to base this on the float format. */
+#ifdef __vax
+#define LIM 127                        /* vax fp numbers have limited range */
+#else
+#define LIM 512
+#endif
+
+  for (i = 1; i < LIM; i++)
+    {
+      mpz_set_ui (x, 1L);
+      mpz_mul_2exp (x, x, i);
+      y = mpz_get_d (x);
+
+      check_one ("check_low_z_one", x, y,   0, 0);
+      check_one ("check_low_z_one", x, -y,  1, 0);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, 0);
+      check_one ("check_low_z_one", x, -y,  0, 0);
+      mpz_neg (x, x);
+
+      mpz_sub_ui (x, x, 1);
+
+      check_one ("check_low_z_one", x, y,  -1, -1);
+      check_one ("check_low_z_one", x, -y,  1, -1);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, -1);
+      check_one ("check_low_z_one", x, -y,  1, -1);
+      mpz_neg (x, x);
+
+      mpz_add_ui (x, x, 2);
+
+      check_one ("check_low_z_one", x, y,   1, 1);
+      check_one ("check_low_z_one", x, -y,  1, 1);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, 1);
+      check_one ("check_low_z_one", x, -y, -1, 1);
+      mpz_neg (x, x);
+    }
+
+  mpz_clear (x);
+}
+
+/* Comparing 1 and 1+2^-n.  "y" is volatile to make gcc store and fetch it,
+   which forces it to a 64-bit double, whereas on x86 it would otherwise
+   remain on the float stack as an 80-bit long double.  */
+void
+check_one_2exp (void)
+{
+  double           e;
+  mpz_t            x;
+  volatile double  y;
+  int              i;
+
+  mpz_init (x);
+
+  e = 1.0;
+  for (i = 0; i < 128; i++)
+    {
+      e /= 2.0;
+      y = 1.0 + e;
+      if (y == 1.0)
+        break;
+
+      mpz_set_ui (x, 1L);
+      check_one ("check_one_2exp", x,  y, -1, -1);
+      check_one ("check_one_2exp", x, -y,  1, -1);
+
+      mpz_set_si (x, -1L);
+      check_one ("check_one_2exp", x,  y, -1, -1);
+      check_one ("check_one_2exp", x, -y,  1, -1);
+    }
+
+  mpz_clear (x);
+}
+
+void
+check_infinity (void)
+{
+  mpz_t   x;
+  double  y = tests_infinity_d ();
+  if (y == 0.0)
+    return;
+
+  mpz_init (x);
+
+  /* 0 cmp inf */
+  mpz_set_ui (x, 0L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* 123 cmp inf */
+  mpz_set_ui (x, 123L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* -123 cmp inf */
+  mpz_set_si (x, -123L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* 2^5000 cmp inf */
+  mpz_set_ui (x, 1L);
+  mpz_mul_2exp (x, x, 5000L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* -2^5000 cmp inf */
+  mpz_neg (x, x);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  mpz_clear (x);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+  check_onebits ();
+  check_low_z_one ();
+  check_one_2exp ();
+  check_infinity ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-cmp_si.c b/tests/mpz/t-cmp_si.c

new file mode 100644 (file)

index 0000000..25e8a3b
--- /dev/null
+++ b/tests/mpz/t-cmp_si.c
@@ -0,0 +1,102 @@
+/* Test mpz_cmp_si.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *a, *b;
+    int         want;
+  } data[] = {
+    { "0",  "1", -1 },
+    { "0",  "0",  0 },
+    { "0", "-1",  1 },
+
+    { "1",  "1", 0 },
+    { "1",  "0", 1 },
+    { "1", "-1", 1 },
+
+    { "-1",  "1", -1 },
+    { "-1",  "0", -1 },
+    { "-1", "-1", 0 },
+
+    {           "0", "-0x80000000",  1 },
+    {  "0x80000000", "-0x80000000",  1 },
+    {  "0x80000001", "-0x80000000",  1 },
+    { "-0x80000000", "-0x80000000",  0 },
+    { "-0x80000001", "-0x80000000", -1 },
+
+    {                   "0", "-0x8000000000000000",  1 },
+    {  "0x8000000000000000", "-0x8000000000000000",  1 },
+    {  "0x8000000000000001", "-0x8000000000000000",  1 },
+    { "-0x8000000000000000", "-0x8000000000000000",  0 },
+    { "-0x8000000000000001", "-0x8000000000000000", -1 },
+  };
+
+  mpz_t  a, bz;
+  long   b;
+  int    got;
+  int    i;
+
+  mpz_init (a);
+  mpz_init (bz);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (bz, data[i].b, 0);
+
+      if (mpz_fits_slong_p (bz))
+       {
+         b = mpz_get_si (bz);
+         got = mpz_cmp_si (a, b);
+         if (SGN (got) != data[i].want)
+           {
+             printf ("mpz_cmp_si wrong on data[%d]\n", i);
+             printf ("  a="); mpz_out_str (stdout, 10, a); printf ("\n");
+             printf ("  b=%ld\n", b);
+             printf ("  got=%d\n", got);
+             printf ("  want=%d\n", data[i].want);
+             abort();
+           }
+       }
+    }
+
+  mpz_clear (a);
+  mpz_clear (bz);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-cong.c b/tests/mpz/t-cong.c

new file mode 100644 (file)

index 0000000..f263f24
--- /dev/null
+++ b/tests/mpz/t-cong.c
@@ -0,0 +1,197 @@
+/* test mpz_congruent_p and mpz_congruent_ui_p
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d, int want)
+{
+  int   got;
+  int   swap;
+
+  for (swap = 0; swap <= 1; swap++)
+    {
+      got = (mpz_congruent_p (a, c, d) != 0);
+      if (want != got)
+        {
+          printf ("mpz_congruent_p wrong\n");
+          printf ("   expected %d got %d\n", want, got);
+          mpz_trace ("   a", a);
+          mpz_trace ("   c", c);
+          mpz_trace ("   d", d);
+          mp_trace_base = -16;
+          mpz_trace ("   a", a);
+          mpz_trace ("   c", c);
+          mpz_trace ("   d", d);
+          abort ();
+        }
+
+      if (mpz_fits_ulong_p (c) && mpz_fits_ulong_p (d))
+        {
+          unsigned long  uc = mpz_get_ui (c);
+          unsigned long  ud = mpz_get_ui (d);
+          got = (mpz_congruent_ui_p (a, uc, ud) != 0);
+          if (want != got)
+            {
+              printf    ("mpz_congruent_ui_p wrong\n");
+              printf    ("   expected %d got %d\n", want, got);
+              mpz_trace ("   a", a);
+              printf    ("   c=%lu\n", uc);
+              printf    ("   d=%lu\n", ud);
+              mp_trace_base = -16;
+              mpz_trace ("   a", a);
+              printf    ("   c=0x%lX\n", uc);
+              printf    ("   d=0x%lX\n", ud);
+              abort ();
+            }
+        }
+
+      MPZ_SRCPTR_SWAP (a, c);
+    }
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char *a;
+    const char *c;
+    const char *d;
+    int        want;
+
+  } data[] = {
+
+    /* anything congruent mod 1 */
+    { "0", "0", "1", 1 },
+    { "1", "0", "1", 1 },
+    { "0", "1", "1", 1 },
+    { "123", "456", "1", 1 },
+    { "0x123456789123456789", "0x987654321987654321", "1", 1 },
+
+    /* csize==1, dsize==2 changing to 1 after stripping 2s */
+    { "0x3333333333333333",  "0x33333333",
+      "0x180000000", 1 },
+    { "0x33333333333333333333333333333333", "0x3333333333333333",
+      "0x18000000000000000", 1 },
+
+    /* another dsize==2 becoming 1, with opposite signs this time */
+    {  "0x444444441",
+      "-0x22222221F",
+       "0x333333330", 1 },
+    {  "0x44444444444444441",
+      "-0x2222222222222221F",
+       "0x33333333333333330", 1 },
+  };
+
+  mpz_t   a, c, d;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (c);
+  mpz_init (d);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (c, data[i].c, 0);
+      mpz_set_str_or_abort (d, data[i].d, 0);
+      check_one (a, c, d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+  mpz_clear (d);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t   a, c, d, ra, rc;
+  int     i;
+  int     want;
+  int     reps = 50000;
+
+  if (argc >= 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (a);
+  mpz_init (c);
+  mpz_init (d);
+  mpz_init (ra);
+  mpz_init (rc);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_errandomb (a, rands, 8*GMP_LIMB_BITS);
+      MPZ_CHECK_FORMAT (a);
+      mpz_errandomb (c, rands, 8*GMP_LIMB_BITS);
+      MPZ_CHECK_FORMAT (c);
+      mpz_errandomb_nonzero (d, rands, 8*GMP_LIMB_BITS);
+
+      mpz_negrandom (a, rands);
+      MPZ_CHECK_FORMAT (a);
+      mpz_negrandom (c, rands);
+      MPZ_CHECK_FORMAT (c);
+      mpz_negrandom (d, rands);
+
+      mpz_fdiv_r (ra, a, d);
+      mpz_fdiv_r (rc, c, d);
+
+      want = (mpz_cmp (ra, rc) == 0);
+      check_one (a, c, d, want);
+
+      mpz_sub (ra, ra, rc);
+      mpz_sub (a, a, ra);
+      MPZ_CHECK_FORMAT (a);
+      check_one (a, c, d, 1);
+
+      if (! mpz_pow2abs_p (d))
+        {
+          refmpz_combit (a, urandom() % (8*GMP_LIMB_BITS));
+          check_one (a, c, d, 0);
+        }
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+  mpz_clear (d);
+  mpz_clear (ra);
+  mpz_clear (rc);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-cong_2exp.c b/tests/mpz/t-cong_2exp.c

new file mode 100644 (file)

index 0000000..fedcdf9
--- /dev/null
+++ b/tests/mpz/t-cong_2exp.c
@@ -0,0 +1,165 @@
+/* test mpz_congruent_2exp_p */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, mpz_srcptr c, unsigned long d, int want)
+{
+  mpz_t  diff, d2exp;
+  int    got;
+  int    swap;
+
+  for (swap = 0; swap <= 1; swap++)
+    {
+      got = (mpz_congruent_2exp_p (a, c, d) != 0);
+      if (want != got)
+        {
+          mpz_init (diff);
+          mpz_init (d2exp);
+
+          mpz_sub (diff, a, c);
+          mpz_set_ui (d2exp, 1L);
+          mpz_mul_2exp (d2exp, d2exp, d);
+
+          printf ("mpz_congruent_2exp_p wrong\n");
+          printf ("   expected %d got %d\n", want, got);
+          mpz_trace ("   a", a);
+          mpz_trace ("   c", c);
+          mpz_trace (" a-c", diff);
+          mpz_trace (" 2^d", d2exp);
+          printf    ("   d=%lu\n", d);
+
+          mp_trace_base = -16;
+          mpz_trace ("   a", a);
+          mpz_trace ("   c", c);
+          mpz_trace (" a-c", diff);
+          mpz_trace (" 2^d", d2exp);
+          printf    ("   d=0x%lX\n", d);
+          abort ();
+        }
+
+      MPZ_SRCPTR_SWAP (a, c);
+    }
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char     *a;
+    const char     *c;
+    unsigned long  d;
+    int            want;
+
+  } data[] = {
+
+    /* anything is congruent mod 1 */
+    { "0", "0", 0, 1 },
+    { "1", "0", 0, 1 },
+    { "0", "1", 0, 1 },
+    { "123", "456", 0, 1 },
+    { "0x123456789123456789", "0x987654321987654321", 0, 1 },
+
+  };
+
+  mpz_t   a, c;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (c);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (c, data[i].c, 0);
+      check_one (a, c, data[i].d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr rands = RANDS;
+  unsigned long  d;
+  mpz_t  a, c, ra, rc;
+  int    i;
+  int    want;
+  int    reps = 5000;
+
+  if (argc >= 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (a);
+  mpz_init (c);
+  mpz_init (ra);
+  mpz_init (rc);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_errandomb (a, rands, 8*GMP_LIMB_BITS);
+      mpz_errandomb (c, rands, 8*GMP_LIMB_BITS);
+      d = urandom() % (8*GMP_LIMB_BITS);
+
+      mpz_mul_2exp (a, a, urandom() % (2*GMP_LIMB_BITS));
+      mpz_mul_2exp (c, c, urandom() % (2*GMP_LIMB_BITS));
+
+      mpz_negrandom (a, rands);
+      mpz_negrandom (c, rands);
+
+      mpz_fdiv_r_2exp (ra, a, d);
+      mpz_fdiv_r_2exp (rc, c, d);
+
+      want = (mpz_cmp (ra, rc) == 0);
+      check_one (a, c, d, want);
+
+      mpz_sub (ra, ra, rc);
+      mpz_sub (a, a, ra);
+      check_one (a, c, d, 1);
+    }
+
+  mpz_clear (a);
+  mpz_clear (c);
+  mpz_clear (ra);
+  mpz_clear (rc);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-div_2exp.c b/tests/mpz/t-div_2exp.c

new file mode 100644 (file)

index 0000000..934ef18
--- /dev/null
+++ b/tests/mpz/t-div_2exp.c
@@ -0,0 +1,224 @@
+/* Test mpz_[cft]div_[qr]_2exp.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* If the remainder is in the correct range and q*d+r is correct, then q
+   must have rounded correctly.  */
+
+void
+check_one (mpz_srcptr a, unsigned long d)
+{
+  mpz_t  q, r, p, d2exp;
+  int    inplace;
+
+  mpz_init (d2exp);
+  mpz_init (q);
+  mpz_init (r);
+  mpz_init (p);
+
+  mpz_set_ui (d2exp, 1L);
+  mpz_mul_2exp (d2exp, d2exp, d);
+
+#define INPLACE(fun,dst,src,d)  \
+  if (inplace)                  \
+    {                           \
+      mpz_set (dst, src);       \
+      fun (dst, dst, d);        \
+    }                           \
+  else                          \
+    fun (dst, src, d);
+
+  for (inplace = 0; inplace <= 1; inplace++)
+    {
+      INPLACE (mpz_fdiv_q_2exp, q, a, d);
+      INPLACE (mpz_fdiv_r_2exp, r, a, d);
+
+      mpz_mul_2exp (p, q, d);
+      mpz_add (p, p, r);
+      if (mpz_sgn (r) < 0 || mpz_cmp (r, d2exp) >= 0)
+       {
+         printf ("mpz_fdiv_r_2exp result out of range\n");
+         goto error;
+       }
+      if (mpz_cmp (p, a) != 0)
+       {
+         printf ("mpz_fdiv_[qr]_2exp doesn't multiply back\n");
+         goto error;
+       }
+
+
+      INPLACE (mpz_cdiv_q_2exp, q, a, d);
+      INPLACE (mpz_cdiv_r_2exp, r, a, d);
+
+      mpz_mul_2exp (p, q, d);
+      mpz_add (p, p, r);
+      if (mpz_sgn (r) > 0 || mpz_cmpabs (r, d2exp) >= 0)
+       {
+         printf ("mpz_cdiv_r_2exp result out of range\n");
+         goto error;
+       }
+      if (mpz_cmp (p, a) != 0)
+       {
+         printf ("mpz_cdiv_[qr]_2exp doesn't multiply back\n");
+         goto error;
+       }
+
+
+      INPLACE (mpz_tdiv_q_2exp, q, a, d);
+      INPLACE (mpz_tdiv_r_2exp, r, a, d);
+
+      mpz_mul_2exp (p, q, d);
+      mpz_add (p, p, r);
+      if (mpz_sgn (r) != 0 && mpz_sgn (r) != mpz_sgn (a))
+       {
+         printf ("mpz_tdiv_r_2exp result wrong sign\n");
+         goto error;
+       }
+      if (mpz_cmpabs (r, d2exp) >= 0)
+       {
+         printf ("mpz_tdiv_r_2exp result out of range\n");
+         goto error;
+       }
+      if (mpz_cmp (p, a) != 0)
+       {
+         printf ("mpz_tdiv_[qr]_2exp doesn't multiply back\n");
+         goto error;
+       }
+    }
+
+  mpz_clear (d2exp);
+  mpz_clear (q);
+  mpz_clear (r);
+  mpz_clear (p);
+  return;
+
+
+ error:
+  mpz_trace ("a", a);
+  printf    ("d=%lu\n", d);
+  mpz_trace ("q", q);
+  mpz_trace ("r", r);
+  mpz_trace ("p", p);
+
+  mp_trace_base = -16;
+  mpz_trace ("a", a);
+  printf    ("d=0x%lX\n", d);
+  mpz_trace ("q", q);
+  mpz_trace ("r", r);
+  mpz_trace ("p", p);
+
+  abort ();
+}
+
+
+void
+check_all (mpz_ptr a, unsigned long d)
+{
+  check_one (a, d);
+  mpz_neg (a, a);
+  check_one (a, d);
+}
+
+
+void
+check_various (void)
+{
+  static const unsigned long  table[] = {
+    0, 1, 2, 3, 4, 5,
+    GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,
+    2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,
+    3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1,
+    4*GMP_NUMB_BITS-1, 4*GMP_NUMB_BITS, 4*GMP_NUMB_BITS+1
+  };
+
+  int            i, j;
+  unsigned long  n, d;
+  mpz_t          a;
+
+  mpz_init (a);
+
+  /* a==0, and various d */
+  mpz_set_ui (a, 0L);
+  for (i = 0; i < numberof (table); i++)
+    check_one (a, table[i]);
+
+  /* a==2^n, and various d */
+  for (i = 0; i < numberof (table); i++)
+    {
+      n = table[i];
+      mpz_set_ui (a, 1L);
+      mpz_mul_2exp (a, a, n);
+
+      for (j = 0; j < numberof (table); j++)
+       {
+         d = table[j];
+         check_all (a, d);
+       }
+    }
+
+  mpz_clear (a);
+}
+
+
+void
+check_random (int argc, char *argv[])
+{
+  gmp_randstate_ptr  rands = RANDS;
+  int            reps = 100;
+  mpz_t          a;
+  unsigned long  d;
+  int            i;
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_init (a);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* exponentially within 2 to 257 bits */
+      mpz_erandomb (a, rands, urandom () % 8 + 2);
+
+      d = urandom () % 256;
+
+      check_all (a, d);
+    }
+
+  mpz_clear (a);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_various ();
+  check_random (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-divis.c b/tests/mpz/t-divis.c

new file mode 100644 (file)

index 0000000..ba99a48
--- /dev/null
+++ b/tests/mpz/t-divis.c
@@ -0,0 +1,166 @@
+/* test mpz_divisible_p and mpz_divisible_ui_p
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, mpz_srcptr d, int want)
+{
+  int   got;
+
+  if (mpz_fits_ulong_p (d))
+    {
+      unsigned long  u = mpz_get_ui (d);
+      got = (mpz_divisible_ui_p (a, u) != 0);
+      if (want != got)
+        {
+          printf ("mpz_divisible_ui_p wrong\n");
+          printf ("   expected %d got %d\n", want, got);
+          mpz_trace ("   a", a);
+          printf ("   d=%lu\n", u);
+          mp_trace_base = -16;
+          mpz_trace ("   a", a);
+          printf ("   d=0x%lX\n", u);
+          abort ();
+        }
+    }
+
+  got = (mpz_divisible_p (a, d) != 0);
+  if (want != got)
+    {
+      printf ("mpz_divisible_p wrong\n");
+      printf ("   expected %d got %d\n", want, got);
+      mpz_trace ("   a", a);
+      mpz_trace ("   d", d);
+      mp_trace_base = -16;
+      mpz_trace ("   a", a);
+      mpz_trace ("   d", d);
+      abort ();
+    }
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    const char *a;
+    const char *d;
+    int        want;
+
+  } data[] = {
+
+    { "0",    "1", 1 },
+    { "123",  "1", 1 },
+    { "-123", "1", 1 },
+
+    { "0",  "2", 1 },
+    { "1",  "2", 0 },
+    { "2",  "2", 1 },
+    { "-2", "2", 1 },
+    { "0x100000000000000000000000000000000", "2", 1 },
+    { "0x100000000000000000000000000000001", "2", 0 },
+
+    { "0x3333333333333333", "3", 1 },
+    { "0x3333333333333332", "3", 0 },
+    { "0x33333333333333333333333333333333", "3", 1 },
+    { "0x33333333333333333333333333333332", "3", 0 },
+
+    /* divisor changes from 2 to 1 limb after stripping 2s */
+    {          "0x3333333300000000",         "0x180000000",         1 },
+    {  "0x33333333333333330000000000000000", "0x18000000000000000", 1 },
+    { "0x133333333333333330000000000000000", "0x18000000000000000", 0 },
+  };
+
+  mpz_t   a, d;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (d);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (d, data[i].d, 0);
+      check_one (a, d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (d);
+}
+
+void
+check_random (int reps)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t   a, d, r;
+  int     i;
+  int     want;
+
+  mpz_init (a);
+  mpz_init (d);
+  mpz_init (r);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_erandomb (a, rands, 1 << 19);
+      mpz_erandomb_nonzero (d, rands, 1 << 18);
+
+      mpz_fdiv_r (r, a, d);
+
+      want = (mpz_sgn (r) == 0);
+      check_one (a, d, want);
+
+      mpz_sub (a, a, r);
+      check_one (a, d, 1);
+
+      if (mpz_cmpabs_ui (d, 1L) == 0)
+        continue;
+
+      mpz_add_ui (a, a, 1L);
+      check_one (a, d, 0);
+    }
+
+  mpz_clear (a);
+  mpz_clear (d);
+  mpz_clear (r);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int  reps = 100;
+
+  tests_start ();
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  check_data ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-divis_2exp.c b/tests/mpz/t-divis_2exp.c

new file mode 100644 (file)

index 0000000..88588a3
--- /dev/null
+++ b/tests/mpz/t-divis_2exp.c
@@ -0,0 +1,133 @@
+/* test mpz_divisible_2exp_p */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr a, unsigned long d, int want)
+{
+  int   got;
+
+  got = (mpz_divisible_2exp_p (a, d) != 0);
+  if (want != got)
+    {
+      printf ("mpz_divisible_2exp_p wrong\n");
+      printf ("   expected %d got %d\n", want, got);
+      mpz_trace ("   a", a);
+      printf    ("   d=%lu\n", d);
+      mp_trace_base = -16;
+      mpz_trace ("   a", a);
+      printf    ("   d=0x%lX\n", d);
+      abort ();
+    }
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    const char    *a;
+    unsigned long d;
+    int           want;
+
+  } data[] = {
+
+    { "0", 0, 1 },
+    { "0", 1, 1 },
+    { "0", 2, 1 },
+    { "0", 3, 1 },
+
+    { "1", 0, 1 },
+    { "1", 1, 0 },
+    { "1", 2, 0 },
+    { "1", 3, 0 },
+    { "1", 10000, 0 },
+
+    { "4", 0, 1 },
+    { "4", 1, 1 },
+    { "4", 2, 1 },
+    { "4", 3, 0 },
+    { "4", 4, 0 },
+    { "4", 10000, 0 },
+
+    { "0x80000000", 31, 1 },
+    { "0x80000000", 32, 0 },
+    { "0x80000000", 64, 0 },
+
+    { "0x100000000", 32, 1 },
+    { "0x100000000", 33, 0 },
+    { "0x100000000", 64, 0 },
+
+    { "0x8000000000000000", 63, 1 },
+    { "0x8000000000000000", 64, 0 },
+    { "0x8000000000000000", 128, 0 },
+
+    { "0x10000000000000000", 64, 1 },
+    { "0x10000000000000000", 65, 0 },
+    { "0x10000000000000000", 128, 0 },
+    { "0x10000000000000000", 256, 0 },
+
+    { "0x10000000000000000100000000", 32, 1 },
+    { "0x10000000000000000100000000", 33, 0 },
+    { "0x10000000000000000100000000", 64, 0 },
+
+    { "0x1000000000000000010000000000000000", 64, 1 },
+    { "0x1000000000000000010000000000000000", 65, 0 },
+    { "0x1000000000000000010000000000000000", 128, 0 },
+    { "0x1000000000000000010000000000000000", 256, 0 },
+    { "0x1000000000000000010000000000000000", 1024, 0 },
+
+  };
+
+  mpz_t   a, d;
+  int     i;
+
+  mpz_init (a);
+  mpz_init (d);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      check_one (a, data[i].d, data[i].want);
+
+      mpz_neg (a, a);
+      check_one (a, data[i].d, data[i].want);
+    }
+
+  mpz_clear (a);
+  mpz_clear (d);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-export.c b/tests/mpz/t-export.c

new file mode 100644 (file)

index 0000000..f2fb3bd
--- /dev/null
+++ b/tests/mpz/t-export.c
@@ -0,0 +1,206 @@
+/* Test mpz_export.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *src;
+    size_t      want_count;
+    int         order;
+    size_t      size;
+    int         endian;
+    int         nail;
+    char        want_data[64];
+
+  } data[] = {
+
+    { "0", 0,1, 1,1, 0 },
+    { "0", 0,1, 2,1, 0 },
+    { "0", 0,1, 3,1, 0 },
+
+    { "0x12345678", 4,1,  1,1, 0, { '\022', '\064', '\126', '\170' } },
+    { "0x12345678", 1,1,  4,1, 0, { '\022', '\064', '\126', '\170' } },
+    { "0x12345678", 1,-1, 4,1, 0, { '\022', '\064', '\126', '\170' } },
+
+    { "0x12345678", 4,-1, 1,-1, 0, { '\170', '\126', '\064', '\022' } },
+    { "0x12345678", 1,1,  4,-1, 0, { '\170', '\126', '\064', '\022' } },
+    { "0x12345678", 1,-1, 4,-1, 0, { '\170', '\126', '\064', '\022' } },
+
+    { "0x15", 5,1,  1,1, 7, { '\001', '\000', '\001', '\000', '\001' } },
+
+    { "0x1FFFFFFFFFFF", 3,1,  2,1,   1, {
+       '\177','\377', '\177','\377', '\177','\377' } },
+    { "0x1FFFFFFFFFFF", 3,1,  2,-1,  1, {
+       '\377','\177', '\377','\177', '\377','\177' } },
+    { "0x7",            3,1,  2,1,  15, {
+       '\000','\001', '\000','\001', '\000','\001' } },
+    { "0x7",            3,1,  2,-1, 15, {
+       '\001','\000', '\001','\000', '\001','\000' } },
+
+    { "0x24", 3,1,  2,1,  14, { '\000','\002', '\000','\001', '\000','\000' }},
+    { "0x24", 3,1,  2,-1, 14, { '\002','\000', '\001','\000', '\000','\000' }},
+    { "0x24", 3,-1, 2,-1, 14, { '\000','\000', '\001','\000', '\002','\000' }},
+    { "0x24", 3,-1, 2,1,  14, { '\000','\000', '\000','\001', '\000','\002' }},
+
+    { "0x123456789ABC", 3,1,  2,1,  0, {
+       '\022','\064', '\126','\170', '\232','\274' } },
+    { "0x123456789ABC", 3,-1, 2,1,  0, {
+       '\232','\274', '\126','\170', '\022','\064' } },
+    { "0x123456789ABC", 3,1,  2,-1, 0, {
+       '\064','\022', '\170','\126', '\274','\232' } },
+    { "0x123456789ABC", 3,-1, 2,-1, 0, {
+       '\274','\232', '\170','\126', '\064','\022' } },
+
+    { "0x112233445566778899AABBCC", 3,1,  4,1,  0,
+      { '\021','\042','\063','\104',
+       '\125','\146','\167','\210',
+       '\231','\252','\273','\314' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,1,  0,
+      { '\231','\252','\273','\314',
+       '\125','\146','\167','\210',
+       '\021','\042','\063','\104' } },
+    { "0x112233445566778899AABBCC", 3,1,  4,-1, 0,
+      { '\104','\063','\042','\021',
+       '\210','\167','\146','\125',
+       '\314','\273','\252','\231' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,-1, 0,
+      { '\314','\273','\252','\231',
+       '\210','\167','\146','\125',
+       '\104','\063','\042','\021' } },
+
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,1,  0,
+      { '\020','\001','\040','\002','\060','\003','\100','\004',
+       '\120','\005','\140','\006','\160','\007','\200','\010',
+       '\220','\011','\240','\012','\260','\013','\300','\014' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,1,  0,
+      { '\220','\011','\240','\012','\260','\013','\300','\014',
+       '\120','\005','\140','\006','\160','\007','\200','\010',
+       '\020','\001','\040','\002','\060','\003','\100','\004' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,-1, 0,
+      { '\004','\100','\003','\060','\002','\040','\001','\020',
+       '\010','\200','\007','\160','\006','\140','\005','\120',
+       '\014','\300','\013','\260','\012','\240','\011','\220' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,-1, 0,
+      { '\014','\300','\013','\260','\012','\240','\011','\220',
+       '\010','\200','\007','\160','\006','\140','\005','\120',
+       '\004','\100','\003','\060','\002','\040','\001','\020' } },
+
+    { "0x155555555555555555555555", 3,1,  4,1,  1,
+      { '\125','\125','\125','\125',
+       '\052','\252','\252','\252',
+       '\125','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,-1,  4,1,  1,
+      { '\125','\125','\125','\125',
+       '\052','\252','\252','\252',
+       '\125','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,1,  4,-1,  1,
+      { '\125','\125','\125','\125',
+       '\252','\252','\252','\052',
+       '\125','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,-1,  4,-1,  1,
+      { '\125','\125','\125','\125',
+       '\252','\252','\252','\052',
+       '\125','\125','\125','\125' } },
+  };
+
+  char    buf[sizeof(data[0].src) + sizeof (mp_limb_t) + 128];
+  char    *got_data;
+  void    *ret;
+  size_t  align, got_count, j;
+  int     i, error = 0;
+  mpz_t   src;
+
+  mpz_init (src);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (align = 0; align < sizeof (mp_limb_t); align++)
+       {
+         mpz_set_str_or_abort (src, data[i].src, 0);
+         MPZ_CHECK_FORMAT (src);
+         got_data = buf + align;
+
+         ASSERT_ALWAYS (data[i].want_count * data[i].size + align
+                        <= sizeof (buf));
+
+         memset (got_data, '\0', data[i].want_count * data[i].size);
+         ret = mpz_export (got_data, &got_count, data[i].order,
+                           data[i].size, data[i].endian, data[i].nail, src);
+
+         if (ret != got_data)
+           {
+             printf ("return doesn't equal given pointer\n");
+             error = 1;
+           }
+         if (got_count != data[i].want_count)
+           {
+             printf ("wrong count\n");
+             error = 1;
+           }
+         if (memcmp (got_data, data[i].want_data, got_count * data[i].size) != 0)
+           {
+             printf ("wrong result data\n");
+             error = 1;
+           }
+         if (error)
+           {
+             printf ("    at data[%d]  align=%d\n", i, (int) align);
+             printf ("    src \"%s\"\n", data[i].src);
+             mpz_trace ("    src", src);
+             printf ("    order=%d  size=%lu endian=%d nail=%u\n",
+                     data[i].order,
+                     (unsigned long) data[i].size, data[i].endian, data[i].nail);
+             printf ("    want count %lu\n", (unsigned long) data[i].want_count);
+             printf ("    got count  %lu\n", (unsigned long) got_count);
+             printf ("    want");
+             for (j = 0; j < data[i].want_count*data[i].size; j++)
+               printf (" 0x%02X,", (unsigned) (unsigned char) data[i].want_data[j]);
+             printf ("\n");
+             printf ("    got ");
+             for (j = 0; j < got_count*data[i].size; j++)
+               printf (" 0x%02X,", (unsigned) (unsigned char) got_data[j]);
+             printf ("\n");
+             abort ();
+           }
+       }
+    }
+  mpz_clear (src);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  mp_trace_base = -16;
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-fac_ui.c b/tests/mpz/t-fac_ui.c

new file mode 100644 (file)

index 0000000..1b494d1
--- /dev/null
+++ b/tests/mpz/t-fac_ui.c
@@ -0,0 +1,77 @@
+/* Exercise mpz_fac_ui.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-fac_ui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 1500;
+  mpz_t          f, r;
+
+  tests_start ();
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else if (argc > 1)
+    limit = atoi (argv[1]);
+
+  /* for small limb testing */
+  limit = MIN (limit, MP_LIMB_T_MAX);
+
+  mpz_init_set_ui (f, 1);  /* 0! = 1 */
+  mpz_init (r);
+
+  for (n = 0; n < limit; n++)
+    {
+      mpz_fac_ui (r, n);
+      MPZ_CHECK_FORMAT (r);
+
+      if (mpz_cmp (f, r) != 0)
+        {
+          printf ("mpz_fac_ui(%lu) wrong\n", n);
+          printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+          abort ();
+        }
+
+      mpz_mul_ui (f, f, n+1);  /* (n+1)! = n! * (n+1) */
+    }
+
+  mpz_clear (f);
+  mpz_clear (r);
+
+  tests_end ();
+
+  exit (0);
+}
diff --git a/tests/mpz/t-fdiv.c b/tests/mpz/t-fdiv.c

new file mode 100644 (file)

index 0000000..c4053fa
--- /dev/null
+++ b/tests/mpz/t-fdiv.c
@@ -0,0 +1,147 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_fdiv_qr, mpz_fdiv_q,
+   mpz_fdiv_r, mpz_mul.
+
+Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend, divisor;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size, divisor_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (divisor);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 2; /* 0..131071 bit operands */
+
+      do
+       {
+         mpz_urandomb (bs, rands, size_range);
+         divisor_size = mpz_get_ui (bs);
+         mpz_rrandomb (divisor, rands, divisor_size);
+       }
+      while (mpz_sgn (divisor) == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs) + divisor_size;
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (dividend, dividend);
+      if ((bsi & 2) != 0)
+       mpz_neg (divisor, divisor);
+
+      /* printf ("%ld %ld\n", SIZ (dividend), SIZ (divisor)); */
+
+      mpz_fdiv_qr (quotient, remainder, dividend, divisor);
+      mpz_fdiv_q (quotient2, dividend, divisor);
+      mpz_fdiv_r (remainder2, dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+        with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+       dump_abort (dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+       dump_abort (dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+       if ((mpz_cmp_ui (quotient, 0) < 0)
+           != ((mpz_cmp_ui (dividend, 0) ^ mpz_cmp_ui (divisor, 0)) < 0))
+       dump_abort (dividend, divisor);
+
+      /* Check if the remainder has the same sign as the divisor
+        (quotient rounded towards minus infinity).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+       if ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (divisor, 0) < 0))
+         dump_abort (dividend, divisor);
+
+      mpz_mul (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+       dump_abort (dividend, divisor);
+
+      mpz_abs (temp, divisor);
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp (remainder, temp) >= 0)
+       dump_abort (dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (divisor);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t dividend, mpz_t divisor)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = "); debug_mp (divisor, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-fdiv_ui.c b/tests/mpz/t-fdiv_ui.c

new file mode 100644 (file)

index 0000000..3012d9b
--- /dev/null
+++ b/tests/mpz/t-fdiv_ui.c
@@ -0,0 +1,159 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_fdiv_qr_ui, mpz_fdiv_q_ui,
+   mpz_fdiv_r_ui, mpz_fdiv_ui, mpz_mul_ui.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size;
+  unsigned long divisor;
+  int i;
+  int reps = 10000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned long r_rq, r_q, r_r, r;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */
+
+      do
+       {
+         mpz_rrandomb (bs, rands, 64);
+         divisor = mpz_get_ui (bs);
+       }
+      while (divisor == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs);
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (dividend, dividend);
+
+      /* printf ("%ld\n", SIZ (dividend)); */
+
+      r_rq = mpz_fdiv_qr_ui (quotient, remainder, dividend, divisor);
+      r_q = mpz_fdiv_q_ui (quotient2, dividend, divisor);
+      r_r = mpz_fdiv_r_ui (remainder2, dividend, divisor);
+      r = mpz_fdiv_ui (dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+        with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+       dump_abort ("quotients from mpz_fdiv_qr_ui and mpz_fdiv_q_ui differ",
+                   dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+       dump_abort ("remainders from mpz_fdiv_qr_ui and mpz_fdiv_r_ui differ",
+                   dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+       if ((mpz_cmp_ui (quotient, 0) < 0)
+           != (mpz_cmp_ui (dividend, 0) < 0))
+       dump_abort ("quotient sign wrong", dividend, divisor);
+
+      /* Check if the remainder has the same sign as the (positive) divisor
+        (quotient rounded towards minus infinity).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+       if (mpz_cmp_ui (remainder, 0) < 0)
+         dump_abort ("remainder sign wrong", dividend, divisor);
+
+      mpz_mul_ui (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+       dump_abort ("n mod d != n - [n/d]*d", dividend, divisor);
+
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp_ui (remainder, divisor) >= 0)
+       dump_abort ("remainder greater than divisor", dividend, divisor);
+
+      if (mpz_cmp_ui (remainder, r_rq) != 0)
+       dump_abort ("remainder returned from mpz_fdiv_qr_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_q) != 0)
+       dump_abort ("remainder returned from mpz_fdiv_q_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_r) != 0)
+       dump_abort ("remainder returned from mpz_fdiv_r_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r) != 0)
+       dump_abort ("remainder returned from mpz_fdiv_ui is wrong",
+                   dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+{
+  fprintf (stderr, "ERROR: %s\n", str);
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = %lX\n", divisor);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-fib_ui.c b/tests/mpz/t-fib_ui.c

new file mode 100644 (file)

index 0000000..77e52f5
--- /dev/null
+++ b/tests/mpz/t-fib_ui.c
@@ -0,0 +1,156 @@
+/* Test mpz_fib_ui and mpz_fib2_ui.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-fib_ui [x|num]
+
+   Run with no arguments, tests goes up to the initial value of "limit"
+   below.  With a number argument tests are carried up that far, or with a
+   literal "x" tests are continued without limit (this being only meant for
+   development purposes).
+
+   The size tests performed are designed to partially replicate what will be
+   going on in mpz_fib_ui.  There's plenty of ASSERTs there, but of course
+   they're not normally enabled.
+
+   Misfeatures:
+
+   The tests on MPN_FIB2_SIZE are a bit useless, since that macro includes a
+   +2 for the internal purposes of mpn_fib2_ui.  It's probably better to
+   give mpn_fib2_ui a run with assertion checking enabled.  */
+
+
+#define MPZ_FIB_SIZE_FLOAT(n) \
+  ((mp_size_t) ((n) * 0.6942419 / GMP_NUMB_BITS + 1))
+
+
+void
+check_fib_table (void)
+{
+  int        i;
+  mp_limb_t  want;
+
+  ASSERT_ALWAYS (FIB_TABLE(-1) == 1);
+  ASSERT_ALWAYS (FIB_TABLE(0) == 0);
+
+  for (i = 1; i <= FIB_TABLE_LIMIT; i++)
+    {
+      want = FIB_TABLE(i-1) + FIB_TABLE(i-2);
+      if (FIB_TABLE(i) != want)
+        {
+          printf ("FIB_TABLE(%d) wrong\n", i);
+          gmp_printf ("  got  %#Nx\n", &FIB_TABLE(i), 1);
+          gmp_printf ("  want %#Nx\n", &want, 1);
+          abort ();
+        }
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 100 * GMP_LIMB_BITS;
+  mpz_t          want_fn, want_fn1, got_fn, got_fn1;
+
+  tests_start ();
+  mp_trace_base = -16;
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else if (argc > 1)
+    limit = atoi (argv[1]);
+
+  check_fib_table ();
+
+  /* start at n==0 */
+  mpz_init_set_ui (want_fn1, 1);  /* F[-1] */
+  mpz_init_set_ui (want_fn,  0);  /* F[0]   */
+  mpz_init (got_fn);
+  mpz_init (got_fn1);
+
+  for (n = 0; n < limit; n++)
+    {
+      /* check our float formula seems right */
+      if (MPZ_FIB_SIZE_FLOAT (n) < SIZ(want_fn))
+        {
+          printf ("MPZ_FIB_SIZE_FLOAT wrong at n=%lu\n", n);
+          printf ("  MPZ_FIB_SIZE_FLOAT  %ld\n", MPZ_FIB_SIZE_FLOAT (n));
+          printf ("  SIZ(want_fn)        %d\n", SIZ(want_fn));
+          abort ();
+        }
+
+      /* check MPN_FIB2_SIZE seems right, compared to actual size and
+         compared to our float formula */
+      if (MPN_FIB2_SIZE (n) < MPZ_FIB_SIZE_FLOAT (n))
+        {
+          printf ("MPN_FIB2_SIZE wrong at n=%lu\n", n);
+          printf ("  MPN_FIB2_SIZE       %ld\n", MPN_FIB2_SIZE (n));
+          printf ("  MPZ_FIB_SIZE_FLOAT  %ld\n", MPZ_FIB_SIZE_FLOAT (n));
+          abort ();
+        }
+      if (MPN_FIB2_SIZE (n) < SIZ(want_fn))
+        {
+          printf ("MPN_FIB2_SIZE wrong at n=%lu\n", n);
+          printf ("  MPN_FIB2_SIZE  %ld\n", MPN_FIB2_SIZE (n));
+          printf ("  SIZ(want_fn)   %d\n", SIZ(want_fn));
+          abort ();
+        }
+
+      mpz_fib2_ui (got_fn, got_fn1, n);
+      MPZ_CHECK_FORMAT (got_fn);
+      MPZ_CHECK_FORMAT (got_fn1);
+      if (mpz_cmp (got_fn, want_fn) != 0 || mpz_cmp (got_fn1, want_fn1) != 0)
+        {
+          printf ("mpz_fib2_ui(%lu) wrong\n", n);
+          mpz_trace ("want fn ", want_fn);
+          mpz_trace ("got  fn ",  got_fn);
+          mpz_trace ("want fn1", want_fn1);
+          mpz_trace ("got  fn1",  got_fn1);
+          abort ();
+        }
+
+      mpz_fib_ui (got_fn, n);
+      MPZ_CHECK_FORMAT (got_fn);
+      if (mpz_cmp (got_fn, want_fn) != 0)
+        {
+          printf ("mpz_fib_ui(%lu) wrong\n", n);
+          mpz_trace ("want fn", want_fn);
+          mpz_trace ("got  fn", got_fn);
+          abort ();
+        }
+
+      mpz_add (want_fn1, want_fn1, want_fn);  /* F[n+1] = F[n] + F[n-1] */
+      mpz_swap (want_fn1, want_fn);
+    }
+
+  mpz_clear (want_fn);
+  mpz_clear (want_fn1);
+  mpz_clear (got_fn);
+  mpz_clear (got_fn1);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-fits.c b/tests/mpz/t-fits.c

new file mode 100644 (file)

index 0000000..3f08802
--- /dev/null
+++ b/tests/mpz/t-fits.c
@@ -0,0 +1,202 @@
+/* Test mpz_fits_*_p */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Nothing sophisticated here, just exercise mpz_fits_*_p on a small amount
+   of data. */
+
+#define EXPECT_S(fun,name,answer)                                       \
+  got = fun (z);                                                        \
+  if (got != answer)                                                    \
+    {                                                                   \
+      printf ("%s (%s) got %d want %d\n", name, expr, got, answer);     \
+      printf (" z size %d\n", SIZ(z));                                  \
+      printf (" z dec "); mpz_out_str (stdout, 10, z); printf ("\n");   \
+      printf (" z hex "); mpz_out_str (stdout, 16, z); printf ("\n");   \
+      error = 1;                                                        \
+    }
+
+#if HAVE_STRINGIZE
+#define EXPECT(fun,answer)  EXPECT_S(fun,#fun,answer)
+#else
+#define EXPECT(fun,answer)  EXPECT_S(fun,"fun",answer)
+#endif
+
+int
+main (void)
+{
+  mpz_t       z;
+  int         got;
+  const char  *expr;
+  int         error = 0;
+
+  tests_start ();
+  mpz_init (z);
+
+  mpz_set_ui (z, 0L);
+  expr = "0";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+  EXPECT (mpz_fits_ushort_p, 1);
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_ui (z, 1L);
+  expr = "1";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+  EXPECT (mpz_fits_ushort_p, 1);
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_si (z, -1L);
+  expr = "-1";
+  EXPECT (mpz_fits_ulong_p, 0);
+  EXPECT (mpz_fits_uint_p, 0);
+  EXPECT (mpz_fits_ushort_p, 0);
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_ui (z, 1L);
+  mpz_mul_2exp (z, z, 5L*GMP_LIMB_BITS);
+  expr = "2^(5*BPML)";
+  EXPECT (mpz_fits_ulong_p, 0);
+  EXPECT (mpz_fits_uint_p, 0);
+  EXPECT (mpz_fits_ushort_p, 0);
+  EXPECT (mpz_fits_slong_p, 0);
+  EXPECT (mpz_fits_sint_p, 0);
+  EXPECT (mpz_fits_sshort_p, 0);
+
+
+  mpz_set_ui (z, (unsigned long) USHRT_MAX);
+  expr = "USHRT_MAX";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+  EXPECT (mpz_fits_ushort_p, 1);
+
+  mpz_set_ui (z, (unsigned long) USHRT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "USHRT_MAX + 1";
+  EXPECT (mpz_fits_ushort_p, 0);
+
+
+  mpz_set_ui (z, (unsigned long) UINT_MAX);
+  expr = "UINT_MAX";
+  EXPECT (mpz_fits_ulong_p, 1);
+  EXPECT (mpz_fits_uint_p, 1);
+
+  mpz_set_ui (z, (unsigned long) UINT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "UINT_MAX + 1";
+  EXPECT (mpz_fits_uint_p, 0);
+
+
+  mpz_set_ui (z, ULONG_MAX);
+  expr = "ULONG_MAX";
+  EXPECT (mpz_fits_ulong_p, 1);
+
+  mpz_set_ui (z, ULONG_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "ULONG_MAX + 1";
+  EXPECT (mpz_fits_ulong_p, 0);
+
+
+  mpz_set_si (z, (long) SHRT_MAX);
+  expr = "SHRT_MAX";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_si (z, (long) SHRT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "SHRT_MAX + 1";
+  EXPECT (mpz_fits_sshort_p, 0);
+
+
+  mpz_set_si (z, (long) INT_MAX);
+  expr = "INT_MAX";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+
+  mpz_set_si (z, (long) INT_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "INT_MAX + 1";
+  EXPECT (mpz_fits_sint_p, 0);
+
+
+  mpz_set_si (z, LONG_MAX);
+  expr = "LONG_MAX";
+  EXPECT (mpz_fits_slong_p, 1);
+
+  mpz_set_si (z, LONG_MAX);
+  mpz_add_ui (z, z, 1L);
+  expr = "LONG_MAX + 1";
+  EXPECT (mpz_fits_slong_p, 0);
+
+
+  mpz_set_si (z, (long) SHRT_MIN);
+  expr = "SHRT_MIN";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+  EXPECT (mpz_fits_sshort_p, 1);
+
+  mpz_set_si (z, (long) SHRT_MIN);
+  mpz_sub_ui (z, z, 1L);
+  expr = "SHRT_MIN + 1";
+  EXPECT (mpz_fits_sshort_p, 0);
+
+
+  mpz_set_si (z, (long) INT_MIN);
+  expr = "INT_MIN";
+  EXPECT (mpz_fits_slong_p, 1);
+  EXPECT (mpz_fits_sint_p, 1);
+
+  mpz_set_si (z, (long) INT_MIN);
+  mpz_sub_ui (z, z, 1L);
+  expr = "INT_MIN + 1";
+  EXPECT (mpz_fits_sint_p, 0);
+
+
+  mpz_set_si (z, LONG_MIN);
+  expr = "LONG_MIN";
+  EXPECT (mpz_fits_slong_p, 1);
+
+  mpz_set_si (z, LONG_MIN);
+  mpz_sub_ui (z, z, 1L);
+  expr = "LONG_MIN + 1";
+  EXPECT (mpz_fits_slong_p, 0);
+
+
+  if (error)
+    abort ();
+
+  mpz_clear (z);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-gcd.c b/tests/mpz/t-gcd.c

new file mode 100644 (file)

index 0000000..687e055
--- /dev/null
+++ b/tests/mpz/t-gcd.c
@@ -0,0 +1,393 @@
+/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,
+2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void one_test __GMP_PROTO ((mpz_t, mpz_t, mpz_t, int));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+static int gcdext_valid_p __GMP_PROTO ((const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s));
+
+void
+check_data (void)
+{
+  static const struct {
+    const char *a;
+    const char *b;
+    const char *want;
+  } data[] = {
+    /* This tickled a bug in gmp 4.1.2 mpn/x86/k6/gcd_finda.asm. */
+    { "0x3FFC000007FFFFFFFFFF00000000003F83FFFFFFFFFFFFFFF80000000000000001",
+      "0x1FFE0007FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC000000000000000000000001",
+      "5" }
+  };
+
+  mpz_t  a, b, got, want;
+  int    i;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (b, data[i].b, 0);
+      mpz_set_str_or_abort (want, data[i].want, 0);
+      mpz_gcd (got, a, b);
+      MPZ_CHECK_FORMAT (got);
+      if (mpz_cmp (got, want) != 0)
+       {
+         printf    ("mpz_gcd wrong on data[%d]\n", i);
+         printf    (" a  %s\n", data[i].a);
+         printf    (" b  %s\n", data[i].b);
+         mpz_trace (" a", a);
+         mpz_trace (" b", b);
+         mpz_trace (" want", want);
+         mpz_trace (" got ", got);
+         abort ();
+       }
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+/* Keep one_test's variables global, so that we don't need
+   to reinitialize them for each test.  */
+mpz_t gcd1, gcd2, s, t, temp1, temp2, temp3;
+
+#if GCD_DC_THRESHOLD > GCDEXT_DC_THRESHOLD
+#define MAX_SCHOENHAGE_THRESHOLD GCD_DC_THRESHOLD
+#else
+#define MAX_SCHOENHAGE_THRESHOLD GCDEXT_DC_THRESHOLD
+#endif
+
+/* Define this to make all operands be large enough for Schoenhage gcd
+   to be used.  */
+#ifndef WHACK_SCHOENHAGE
+#define WHACK_SCHOENHAGE 0
+#endif
+
+#if WHACK_SCHOENHAGE
+#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)
+#else
+#define MIN_OPERAND_BITSIZE 1
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, ref;
+  int i, j, chain_len;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  int reps = 200;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  check_data ();
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (ref);
+  mpz_init (gcd1);
+  mpz_init (gcd2);
+  mpz_init (temp1);
+  mpz_init (temp2);
+  mpz_init (temp3);
+  mpz_init (s);
+  mpz_init (t);
+
+  /* Testcase to exercise the u0 == u1 case in mpn_gcdext_lehmer_n. */
+  mpz_set_ui (op2, GMP_NUMB_MAX);
+  mpz_mul_2exp (op1, op2, 100);
+  mpz_add (op1, op1, op2);
+  mpz_mul_ui (op2, op2, 2);
+  one_test (op1, op2, NULL, -1);
+
+#if 0
+  mpz_set_str (op1, "4da8e405e0d2f70d6d679d3de08a5100a81ec2cff40f97b313ae75e1183f1df2b244e194ebb02a4ece50d943640a301f0f6cc7f539117b783c3f3a3f91649f8a00d2e1444d52722810562bce02fccdbbc8fe3276646e306e723dd3b", 16);
+  mpz_set_str (op2, "76429e12e4fdd8929d89c21657097fbac09d1dc08cf7f1323a34e78ca34226e1a7a29b86fee0fa7fe2cc2a183d46d50df1fe7029590974ad7da77605f35f902cb8b9b8d22dd881eaae5919675d49a337145a029c3b33fc2b0", 16);
+  one_test (op1, op2, NULL, -1);
+#endif
+
+  for (i = 0; i < reps; i++)
+    {
+      /* Generate plain operands with unknown gcd.  These types of operands
+        have proven to trigger certain bugs in development versions of the
+        gcd code.  The "hgcd->row[3].rsize > M" ASSERT is not triggered by
+        the division chain code below, but that is most likely just a result
+        of that other ASSERTs are triggered before it.  */
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_BITSIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_BITSIZE);
+
+      mpz_urandomb (bs, rands, 8);
+      bsi = mpz_get_ui (bs);
+
+      if ((bsi & 0x3c) == 4)
+       mpz_mul (op1, op1, op2);        /* make op1 a multiple of op2 */
+      else if ((bsi & 0x3c) == 8)
+       mpz_mul (op2, op1, op2);        /* make op2 a multiple of op1 */
+
+      if ((bsi & 1) != 0)
+       mpz_neg (op1, op1);
+      if ((bsi & 2) != 0)
+       mpz_neg (op2, op2);
+
+      one_test (op1, op2, NULL, i);
+
+      /* Generate a division chain backwards, allowing otherwise unlikely huge
+        quotients.  */
+
+      mpz_set_ui (op1, 0);
+      mpz_urandomb (bs, rands, 32);
+      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+      mpz_add_ui (op2, op2, 1);
+      mpz_set (ref, op2);
+
+#if WHACK_SCHOENHAGE
+      chain_len = 1000000;
+#else
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD / 256);
+#endif
+
+      for (j = 0; j < chain_len; j++)
+       {
+         mpz_urandomb (bs, rands, 32);
+         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+         mpz_add_ui (temp2, temp2, 1);
+         mpz_mul (temp1, op2, temp2);
+         mpz_add (op1, op1, temp1);
+
+         /* Don't generate overly huge operands.  */
+         if (SIZ (op1) > 3 * MAX_SCHOENHAGE_THRESHOLD)
+           break;
+
+         mpz_urandomb (bs, rands, 32);
+         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+         mpz_add_ui (temp2, temp2, 1);
+         mpz_mul (temp1, op1, temp2);
+         mpz_add (op2, op2, temp1);
+
+         /* Don't generate overly huge operands.  */
+         if (SIZ (op2) > 3 * MAX_SCHOENHAGE_THRESHOLD)
+           break;
+       }
+      one_test (op1, op2, ref, i);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (ref);
+  mpz_clear (gcd1);
+  mpz_clear (gcd2);
+  mpz_clear (temp1);
+  mpz_clear (temp2);
+  mpz_clear (temp3);
+  mpz_clear (s);
+  mpz_clear (t);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+void
+one_test (mpz_t op1, mpz_t op2, mpz_t ref, int i)
+{
+  /*
+  printf ("%ld %ld %ld\n", SIZ (op1), SIZ (op2), SIZ (ref));
+  fflush (stdout);
+  */
+
+  /*
+  fprintf (stderr, "op1=");  debug_mp (op1, -16);
+  fprintf (stderr, "op2=");  debug_mp (op2, -16);
+  */
+
+  mpz_gcdext (gcd1, s, NULL, op1, op2);
+  MPZ_CHECK_FORMAT (gcd1);
+  MPZ_CHECK_FORMAT (s);
+
+  if (ref && mpz_cmp (ref, gcd1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcdext returned incorrect result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "expected result:\n");   debug_mp (ref, -16);
+      fprintf (stderr, "mpz_gcdext returns:\n");debug_mp (gcd1, -16);
+      abort ();
+    }
+
+  if (!gcdext_valid_p(op1, op2, gcd1, s))
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcdext returned invalid result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "mpz_gcdext returns:\n");debug_mp (gcd1, -16);
+      fprintf (stderr, "s=");                   debug_mp (s, -16);
+      abort ();
+    }
+
+  mpz_gcd (gcd2, op1, op2);
+  MPZ_CHECK_FORMAT (gcd2);
+
+  if (mpz_cmp (gcd2, gcd1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcd returned incorrect result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "expected result:\n");   debug_mp (gcd1, -16);
+      fprintf (stderr, "mpz_gcd returns:\n");   debug_mp (gcd2, -16);
+      abort ();
+    }
+
+  /* This should probably move to t-gcd_ui.c */
+  if (mpz_fits_ulong_p (op1) || mpz_fits_ulong_p (op2))
+    {
+      if (mpz_fits_ulong_p (op1))
+       mpz_gcd_ui (gcd2, op2, mpz_get_ui (op1));
+      else
+       mpz_gcd_ui (gcd2, op1, mpz_get_ui (op2));
+      if (mpz_cmp (gcd2, gcd1))
+       {
+         fprintf (stderr, "ERROR in test %d\n", i);
+         fprintf (stderr, "mpz_gcd_ui returned incorrect result\n");
+         fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+         fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+         fprintf (stderr, "expected result:\n");   debug_mp (gcd1, -16);
+         fprintf (stderr, "mpz_gcd_ui returns:\n");   debug_mp (gcd2, -16);
+         abort ();
+       }
+    }
+
+  mpz_gcdext (gcd2, temp1, temp2, op1, op2);
+  MPZ_CHECK_FORMAT (gcd2);
+  MPZ_CHECK_FORMAT (temp1);
+  MPZ_CHECK_FORMAT (temp2);
+
+  mpz_mul (temp1, temp1, op1);
+  mpz_mul (temp2, temp2, op2);
+  mpz_add (temp1, temp1, temp2);
+
+  if (mpz_cmp (gcd1, gcd2) != 0
+      || mpz_cmp (gcd2, temp1) != 0)
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "mpz_gcdext returned incorrect result\n");
+      fprintf (stderr, "op1=");                 debug_mp (op1, -16);
+      fprintf (stderr, "op2=");                 debug_mp (op2, -16);
+      fprintf (stderr, "expected result:\n");   debug_mp (gcd1, -16);
+      fprintf (stderr, "mpz_gcdext returns:\n");debug_mp (gcd2, -16);
+      abort ();
+    }
+}
+
+/* Called when g is supposed to be gcd(a,b), and g = s a + t b, for some t.
+   Uses temp1, temp2 and temp3. */
+static int
+gcdext_valid_p (const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s)
+{
+  /* It's not clear that gcd(0,0) is well defined, but we allow it and require that
+     gcd(0,0) = 0. */
+  if (mpz_sgn (g) < 0)
+    return 0;
+
+  if (mpz_sgn (a) == 0)
+    {
+      /* Must have g == abs (b). Any value for s is in some sense "correct",
+        but it makes sense to require that s == 0. */
+      return mpz_cmpabs (g, b) == 0 && mpz_sgn (s) == 0;
+    }
+  else if (mpz_sgn (b) == 0)
+    {
+      /* Must have g == abs (a), s == sign (a) */
+      return mpz_cmpabs (g, a) == 0 && mpz_cmp_si (s, mpz_sgn (a)) == 0;
+    }
+
+  if (mpz_sgn (g) <= 0)
+    return 0;
+
+  mpz_tdiv_qr (temp1, temp3, a, g);
+  if (mpz_sgn (temp3) != 0)
+    return 0;
+
+  mpz_tdiv_qr (temp2, temp3, b, g);
+  if (mpz_sgn (temp3) != 0)
+    return 0;
+
+  /* Require that 2 |s| < |b/g|, or |s| == 1. */
+  if (mpz_cmpabs_ui (s, 1) > 0)
+    {
+      mpz_mul_2exp (temp3, s, 1);
+      if (mpz_cmpabs (temp3, temp2) > 0)
+       return 0;
+    }
+
+  /* Compute the other cofactor. */
+  mpz_mul(temp2, s, a);
+  mpz_sub(temp2, g, temp2);
+  mpz_tdiv_qr(temp2, temp3, temp2, b);
+
+  if (mpz_sgn (temp3) != 0)
+    return 0;
+
+  /* Require that 2 |t| < |a/g| or |t| == 1*/
+  if (mpz_cmpabs_ui (temp2, 1) > 0)
+    {
+      mpz_mul_2exp (temp2, temp2, 1);
+      if (mpz_cmpabs (temp2, temp1) > 0)
+       return 0;
+    }
+  return 1;
+}
diff --git a/tests/mpz/t-gcd_ui.c b/tests/mpz/t-gcd_ui.c

new file mode 100644 (file)

index 0000000..ac6431c
--- /dev/null
+++ b/tests/mpz/t-gcd_ui.c
@@ -0,0 +1,63 @@
+/* Test mpz_gcd_ui.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Check mpz_gcd_ui doesn't try to return a value out of range.
+   This was wrong in gmp 4.1.2 with a long long limb.  */
+static void
+check_ui_range (void)
+{
+  unsigned long  got;
+  mpz_t  x;
+  int  i;
+
+  mpz_init_set_ui (x, ULONG_MAX);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_mul_2exp (x, x, 1L);
+      got = mpz_gcd_ui (NULL, x, 0L);
+      if (got != 0)
+        {
+          printf ("mpz_gcd_ui (ULONG_MAX*2^%d, 0)\n", i);
+          printf ("   return %#lx\n", got);
+          printf ("   should be 0\n");
+          abort ();
+        }
+    }
+
+  mpz_clear (x);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_ui_range ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-get_d.c b/tests/mpz/t-get_d.c

new file mode 100644 (file)

index 0000000..41c0ba8
--- /dev/null
+++ b/tests/mpz/t-get_d.c
@@ -0,0 +1,74 @@
+/* Test mpz_get_d.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_onebit (void)
+{
+  int     i;
+  mpz_t   z;
+  double  got, want;
+  /* FIXME: It'd be better to base this on the float format. */
+#ifdef __vax
+  int     limit = 127;  /* vax fp numbers have limited range */
+#else
+  int     limit = 512;
+#endif
+
+  mpz_init (z);
+
+  mpz_set_ui (z, 1L);
+  want = 1.0;
+
+  for (i = 0; i < limit; i++)
+    {
+      got = mpz_get_d (z);
+
+      if (got != want)
+        {
+          printf    ("mpz_get_d wrong on 2**%d\n", i);
+          mpz_trace ("   z    ", z);
+          printf    ("   want  %.20g\n", want);
+          printf    ("   got   %.20g\n", got);
+          abort();
+        }
+
+      mpz_mul_2exp (z, z, 1L);
+      want *= 2.0;
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_onebit ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-get_d_2exp.c b/tests/mpz/t-get_d_2exp.c

new file mode 100644 (file)

index 0000000..3b659f8
--- /dev/null
+++ b/tests/mpz/t-get_d_2exp.c
@@ -0,0 +1,196 @@
+/* Test mpz_get_d_2exp.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+static void
+check_onebit (void)
+{
+  static const unsigned long data[] = {
+    1, 32, 52, 53, 54, 63, 64, 65, 128, 256, 511, 512, 513
+  };
+  mpz_t   z;
+  double  got, want;
+  long    got_exp, want_exp;
+  int     i;
+
+  mpz_init (z);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_ui (z, 1L);
+      mpz_mul_2exp (z, z, data[i]);
+      want = 0.5;
+      want_exp = data[i] + 1;
+      got = mpz_get_d_2exp (&got_exp, z);
+      if (got != want || got_exp != want_exp)
+        {
+          printf    ("mpz_get_d_2exp wrong on 2**%ld\n", data[i]);
+          mpz_trace ("   z    ", z);
+          d_trace   ("   want ", want);
+          d_trace   ("   got  ", got);
+          printf    ("   want exp %ld\n", want_exp);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+
+      mpz_set_si (z, -1L);
+      mpz_mul_2exp (z, z, data[i]);
+      want = -0.5;
+      want_exp = data[i] + 1;
+      got = mpz_get_d_2exp (&got_exp, z);
+      if (got != want || got_exp != want_exp)
+        {
+          printf    ("mpz_get_d_2exp wrong on -2**%ld\n", data[i]);
+          mpz_trace ("   z    ", z);
+          d_trace   ("   want ", want);
+          d_trace   ("   got  ", got);
+          printf    ("   want exp %ld\n", want_exp);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+    }
+  mpz_clear (z);
+}
+
+/* Check that hardware rounding doesn't make mpz_get_d_2exp return a value
+   outside its defined range. */
+static void
+check_round (void)
+{
+  static const unsigned long data[] = { 1, 32, 53, 54, 64, 128, 256, 512 };
+  mpz_t   z;
+  double  got;
+  long    got_exp;
+  int     i, rnd_mode, old_rnd_mode;
+
+  mpz_init (z);
+  old_rnd_mode = tests_hardware_getround ();
+
+  for (rnd_mode = 0; rnd_mode < 4; rnd_mode++)
+    {
+      tests_hardware_setround (rnd_mode);
+
+      for (i = 0; i < numberof (data); i++)
+        {
+          mpz_set_ui (z, 1L);
+          mpz_mul_2exp (z, z, data[i]);
+          mpz_sub_ui (z, z, 1L);
+
+          got = mpz_get_d_2exp (&got_exp, z);
+          if (got < 0.5 || got >= 1.0)
+            {
+              printf    ("mpz_get_d_2exp wrong on 2**%lu-1\n", data[i]);
+              printf    ("result out of range, expect 0.5 <= got < 1.0\n");
+              printf    ("   rnd_mode = %d\n", rnd_mode);
+              printf    ("   data[i]  = %lu\n", data[i]);
+              mpz_trace ("   z    ", z);
+              d_trace   ("   got  ", got);
+              printf    ("   got exp  %ld\n", got_exp);
+              abort();
+            }
+
+          mpz_neg (z, z);
+          got = mpz_get_d_2exp (&got_exp, z);
+          if (got <= -1.0 || got > -0.5)
+            {
+              printf    ("mpz_get_d_2exp wrong on -2**%lu-1\n", data[i]);
+              printf    ("result out of range, expect -1.0 < got <= -0.5\n");
+              printf    ("   rnd_mode = %d\n", rnd_mode);
+              printf    ("   data[i]  = %lu\n", data[i]);
+              mpz_trace ("   z    ", z);
+              d_trace   ("   got  ", got);
+              printf    ("   got exp  %ld\n", got_exp);
+              abort();
+            }
+        }
+    }
+
+  mpz_clear (z);
+  tests_hardware_setround (old_rnd_mode);
+}
+
+static void
+check_rand (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  int     i;
+  mpz_t   z;
+  double  got;
+  long    got_exp;
+  unsigned long  bits;
+
+  mpz_init (z);
+
+  for (i = 0; i < 200; i++)
+    {
+      bits = gmp_urandomm_ui (rands, 512L);
+      mpz_urandomb (z, rands, bits);
+
+      got = mpz_get_d_2exp (&got_exp, z);
+      if (mpz_sgn (z) == 0)
+        continue;
+      bits = mpz_sizeinbase (z, 2);
+
+      if (got < 0.5 || got >= 1.0)
+        {
+          printf    ("mpz_get_d_2exp out of range, expect 0.5 <= got < 1.0\n");
+          mpz_trace ("   z    ", z);
+          d_trace   ("   got  ", got);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+
+      /* FIXME: If mpz_get_d_2exp rounds upwards we might have got_exp ==
+         bits+1, so leave this test disabled until we decide if that's what
+         should happen, or not.  */
+#if 0
+      if (got_exp != bits)
+        {
+          printf    ("mpz_get_d_2exp wrong exponent\n", i);
+          mpz_trace ("   z    ", z);
+          d_trace   ("   bits ", bits);
+          d_trace   ("   got  ", got);
+          printf    ("   got exp  %ld\n", got_exp);
+          abort();
+        }
+#endif
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_onebit ();
+  check_round ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-get_si.c b/tests/mpz/t-get_si.c

new file mode 100644 (file)

index 0000000..0a9739b
--- /dev/null
+++ b/tests/mpz/t-get_si.c
@@ -0,0 +1,122 @@
+/* Exercise mpz_get_si.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *n;
+    long        want;
+  } data[] = {
+    { "0",      0L },
+    { "1",      1L },
+    { "-1",     -1L },
+    { "2",      2L },
+    { "-2",     -2L },
+    { "12345",  12345L },
+    { "-12345", -12345L },
+  };
+
+  int    i;
+  mpz_t  n;
+  long   got;
+
+  mpz_init (n);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+
+      got = mpz_get_si (n);
+      if (got != data[i].want)
+       {
+         printf ("mpz_get_si wrong at data[%d]\n", i);
+         printf ("   n     \"%s\" (", data[i].n);
+         mpz_out_str (stdout, 10, n); printf (", hex ");
+         mpz_out_str (stdout, 16, n); printf (")\n");
+         printf ("   got   %ld (0x%lX)\n", got, got);
+         printf ("   want  %ld (0x%lX)\n", data[i].want, data[i].want);
+         abort();
+       }
+    }
+  mpz_clear (n);
+}
+
+
+void
+check_max (void)
+{
+  mpz_t  n;
+  long   want;
+  long   got;
+
+  mpz_init (n);
+
+#define CHECK_MAX(name)                                 \
+  if (got != want)                                      \
+    {                                                   \
+      printf ("mpz_get_si wrong on %s\n", name);        \
+      printf ("   n    ");                              \
+      mpz_out_str (stdout, 10, n); printf (", hex ");   \
+      mpz_out_str (stdout, 16, n); printf ("\n");       \
+      printf ("   got  %ld, hex %lX\n", got, got);      \
+      printf ("   want %ld, hex %lX\n", want, want);    \
+      abort();                                          \
+    }
+
+  want = LONG_MAX;
+  mpz_set_si (n, want);
+  got = mpz_get_si (n);
+  CHECK_MAX ("LONG_MAX");
+
+  want = LONG_MIN;
+  mpz_set_si (n, want);
+  got = mpz_get_si (n);
+  CHECK_MAX ("LONG_MIN");
+
+  /* The following checks that -0x100000000 gives -0x80000000.  This doesn't
+     actually fit in a long and the result from mpz_get_si() is undefined,
+     but -0x80000000 is what comes out currently, and it should be that
+     value irrespective of the mp_limb_t size (long or long long).  */
+
+  want = LONG_MIN;
+  mpz_mul_2exp (n, n, 1);
+  CHECK_MAX ("-0x100...00");
+
+  mpz_clear (n);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_max ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-hamdist.c b/tests/mpz/t-hamdist.c

new file mode 100644 (file)

index 0000000..8109365
--- /dev/null
+++ b/tests/mpz/t-hamdist.c
@@ -0,0 +1,124 @@
+/* Test mpz_hamdist.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_twobits (void)
+{
+  unsigned long  i, j, got, want;
+  mpz_t  x, y;
+
+  mpz_init (x);
+  mpz_init (y);
+  for (i = 0; i < 5 * GMP_NUMB_BITS; i++)
+    {
+      for (j = 0; j < 5 * GMP_NUMB_BITS; j++)
+        {
+          mpz_set_ui (x, 0L);
+          mpz_setbit (x, i);
+          mpz_set_ui (y, 0L);
+          mpz_setbit (y, j);
+
+          want = 2 * (i != j);
+          got = mpz_hamdist (x, y);
+          if (got != want)
+            {
+              printf    ("mpz_hamdist wrong on 2 bits pos/pos\n");
+            wrong:
+              printf    ("  i    %lu\n", i);
+              printf    ("  j    %lu\n", j);
+              printf    ("  got  %lu\n", got);
+              printf    ("  want %lu\n", want);
+              mpz_trace ("  x   ", x);
+              mpz_trace ("  y   ", y);
+              abort();
+            }
+
+          mpz_neg (x, x);
+          mpz_neg (y, y);
+          want = ABS ((long) (i-j));
+          got = mpz_hamdist (x, y);
+          if (got != want)
+            {
+              printf    ("mpz_hamdist wrong on 2 bits neg/neg\n");
+              goto wrong;
+            }
+        }
+
+    }
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+void
+check_rand (void)
+{
+  gmp_randstate_ptr  rands = RANDS;
+  unsigned long  got, want;
+  int    i;
+  mpz_t  x, y;
+
+  mpz_init (x);
+  mpz_init (y);
+
+  for (i = 0; i < 2000; i++)
+    {
+      mpz_erandomb (x, rands, 6 * GMP_NUMB_BITS);
+      mpz_negrandom (x, rands);
+      mpz_mul_2exp (x, x, urandom() % (4 * GMP_NUMB_BITS));
+
+      mpz_erandomb (y, rands, 6 * GMP_NUMB_BITS);
+      mpz_negrandom (y, rands);
+      mpz_mul_2exp (y, y, urandom() % (4 * GMP_NUMB_BITS));
+
+      want = refmpz_hamdist (x, y);
+      got = mpz_hamdist (x, y);
+      if (got != want)
+        {
+          printf    ("mpz_hamdist wrong on random\n");
+          printf    ("  got  %lu\n", got);
+          printf    ("  want %lu\n", want);
+          mpz_trace ("  x   ", x);
+          mpz_trace ("  y   ", y);
+          abort();
+        }
+    }
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_twobits ();
+  check_rand ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-import.c b/tests/mpz/t-import.c

new file mode 100644 (file)

index 0000000..0c7b929
--- /dev/null
+++ b/tests/mpz/t-import.c
@@ -0,0 +1,176 @@
+/* Test mpz_import.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *want;
+    size_t      count;
+    int         order;
+    size_t      size;
+    int         endian;
+    int         nail;
+    char        src[64];
+
+  } data[] = {
+
+    { "0", 0,1, 1,1, 0 },
+    { "0", 1,1, 0,1, 0 },
+
+    { "0x12345678", 4,1,  1,1, 0, { '\22', '\64', '\126', '\170' } },
+    { "0x12345678", 1,1,  4,1, 0, { '\22', '\64', '\126', '\170' } },
+    { "0x12345678", 1,-1, 4,1, 0, { '\22', '\64', '\126', '\170' } },
+
+    { "0x12345678", 4,-1, 1,-1, 0, { '\170', '\126', '\064', '\22' } },
+    { "0x12345678", 1,1,  4,-1, 0, { '\170', '\126', '\064', '\22' } },
+    { "0x12345678", 1,-1, 4,-1, 0, { '\170', '\126', '\064', '\22' } },
+
+    { "0",    5,1,  1,1, 7, { '\376', '\376', '\376', '\376', '\376' } },
+    { "0",    5,-1, 1,1, 7, { '\376', '\376', '\376', '\376', '\376' } },
+    { "0x15", 5,1,  1,1, 7, { '\377', '\376', '\377', '\376', '\377' } },
+
+    { "0",    3,1,  2,1,   1, { '\200','\000', '\200','\000', '\200','\000' }},
+    { "0",    3,1,  2,-1,  1, { '\000','\200', '\000','\200', '\000','\200' }},
+    { "0",    3,1,  2,1,  15, { '\377','\376', '\377','\376', '\377','\376' }},
+
+    { "0x2A", 3,1,  2,1, 14, { '\377','\376', '\377','\376', '\377','\376' } },
+    { "0x06", 3,1,  2,1, 14, { '\377','\374', '\377','\375', '\377','\376' } },
+    { "0x24", 3,-1, 2,1, 14, { '\377','\374', '\377','\375', '\377','\376' } },
+
+    { "0x123456789ABC", 3,1,  2,1,  0, {
+        '\022','\064', '\126','\170', '\232','\274' } },
+    { "0x123456789ABC", 3,-1, 2,1,  0, {
+        '\232','\274', '\126','\170', '\022','\064' } },
+    { "0x123456789ABC", 3,1,  2,-1, 0, {
+        '\064','\022', '\170','\126', '\274','\232' } },
+    { "0x123456789ABC", 3,-1, 2,-1, 0, {
+        '\274','\232', '\170','\126', '\064','\022' } },
+
+    { "0x112233445566778899AABBCC", 3,1,  4,1,  0,
+      { '\021','\042','\063','\104',
+        '\125','\146','\167','\210',
+        '\231','\252','\273','\314' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,1,  0,
+      { '\231','\252','\273','\314',
+        '\125','\146','\167','\210',
+        '\021','\042','\063','\104' } },
+    { "0x112233445566778899AABBCC", 3,1,  4,-1, 0,
+      { '\104','\063','\042','\021',
+        '\210','\167','\146','\125',
+        '\314','\273','\252','\231' } },
+    { "0x112233445566778899AABBCC", 3,-1, 4,-1, 0,
+      { '\314','\273','\252','\231',
+        '\210','\167','\146','\125',
+        '\104','\063','\042','\021' } },
+
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,1,  0,
+      { '\020','\001','\040','\002','\060','\003','\100','\004',
+        '\120','\005','\140','\006','\160','\007','\200','\010',
+        '\220','\011','\240','\012','\260','\013','\300','\014' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,1,  0,
+      { '\220','\011','\240','\012','\260','\013','\300','\014',
+        '\120','\005','\140','\006','\160','\007','\200','\010',
+        '\020','\001','\040','\002','\060','\003','\100','\004' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,1,  8,-1, 0,
+      { '\004','\100','\003','\060','\002','\040','\001','\020',
+        '\010','\200','\007','\160','\006','\140','\005','\120',
+        '\014','\300','\013','\260','\012','\240','\011','\220' } },
+    { "0x100120023003400450056006700780089009A00AB00BC00C", 3,-1, 8,-1, 0,
+      { '\014','\300','\013','\260','\012','\240','\011','\220',
+        '\010','\200','\007','\160','\006','\140','\005','\120',
+        '\004','\100','\003','\060','\002','\040','\001','\020' } },
+
+    { "0x155555555555555555555555", 3,1,  4,1,  1,
+      { '\325','\125','\125','\125',
+        '\252','\252','\252','\252',
+        '\325','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,-1,  4,1,  1,
+      { '\325','\125','\125','\125',
+        '\252','\252','\252','\252',
+        '\325','\125','\125','\125' } },
+    { "0x155555555555555555555555", 3,1,  4,-1,  1,
+      { '\125','\125','\125','\325',
+        '\252','\252','\252','\252',
+        '\125','\125','\125','\325' } },
+    { "0x155555555555555555555555", 3,-1,  4,-1,  1,
+      { '\125','\125','\125','\325',
+        '\252','\252','\252','\252',
+        '\125','\125','\125','\325' } },
+  };
+
+  char    buf[sizeof(data[0].src) + sizeof (mp_limb_t)];
+  char    *src;
+  size_t  align;
+  int     i;
+  mpz_t   got, want;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (align = 0; align < sizeof (mp_limb_t); align++)
+        {
+          mpz_set_str_or_abort (want, data[i].want, 0);
+          src = buf + align;
+          memcpy (src, data[i].src, data[i].count * data[i].size);
+
+          mpz_set_ui (got, 0L);
+          mpz_import (got, data[i].count, data[i].order,
+                      data[i].size, data[i].endian, data[i].nail, src);
+
+          MPZ_CHECK_FORMAT (got);
+          if (mpz_cmp (got, want) != 0)
+            {
+              printf ("wrong at data[%d]\n", i);
+              printf ("    count=%lu order=%d  size=%lu endian=%d nail=%u  align=%lu\n",
+                      (unsigned long) data[i].count, data[i].order,
+                      (unsigned long) data[i].size, data[i].endian, data[i].nail,
+                      (unsigned long) align);
+              mpz_trace ("    got ", got);
+              mpz_trace ("    want", want);
+              abort ();
+            }
+        }
+    }
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  mp_trace_base = -16;
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-inp_str.c b/tests/mpz/t-inp_str.c

new file mode 100644 (file)

index 0000000..7c08937
--- /dev/null
+++ b/tests/mpz/t-inp_str.c
@@ -0,0 +1,188 @@
+/* Test mpz_inp_str.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>            /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define FILENAME  "t-inp_str.tmp"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *inp;
+    int         base;
+    const char  *want;
+    int         want_nread;
+
+  } data[] = {
+
+    { "0",   10, "0", 1 },
+
+    { "abc", 10, "0", 0 },
+    { "ghi", 16, "0", 0 },
+
+    {  "ff", 16,  "255", 2 },
+    { "-ff", 16, "-255", 3 },
+    {  "FF", 16,  "255", 2 },
+    { "-FF", 16, "-255", 3 },
+
+    { "z", 36, "35", 1 },
+    { "Z", 36, "35", 1 },
+
+    {  "0x0",    0,   "0", 3 },
+    {  "0x10",   0,  "16", 4 },
+    { "-0x0",    0,   "0", 4 },
+    { "-0x10",   0, "-16", 5 },
+
+    {  "00",   0,  "0", 2 },
+    {  "010",  0,  "8", 3 },
+    { "-00",   0,  "0", 3 },
+    { "-010",  0, "-8", 4 },
+
+    {  "0x",     0,   "0", 2 },
+    {  "0",      0,   "0", 1 },
+  };
+
+  mpz_t  got, want;
+  long   ftell_nread;
+  int    i, pre, post, j, got_nread, want_nread;
+  FILE   *fp;
+
+  mpz_init (got);
+  mpz_init (want);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      for (pre = 0; pre <= 3; pre++)
+       {
+         for (post = 0; post <= 2; post++)
+           {
+             mpz_set_str_or_abort (want, data[i].want, 0);
+             MPZ_CHECK_FORMAT (want);
+
+             /* create the file new each time to ensure its length is what
+                we want */
+             fp = fopen (FILENAME, "w+");
+             ASSERT_ALWAYS (fp != NULL);
+             for (j = 0; j < pre; j++)
+               putc (' ', fp);
+             fputs (data[i].inp, fp);
+             for (j = 0; j < post; j++)
+               putc (' ', fp);
+             fflush (fp);
+             ASSERT_ALWAYS (! ferror(fp));
+
+             rewind (fp);
+             got_nread = mpz_inp_str (got, fp, data[i].base);
+
+             if (got_nread != 0)
+               {
+                 ftell_nread = ftell (fp);
+                 if (got_nread != ftell_nread)
+                   {
+                     printf ("mpz_inp_str nread wrong\n");
+                     printf ("  inp          \"%s\"\n", data[i].inp);
+                     printf ("  base         %d\n", data[i].base);
+                     printf ("  pre          %d\n", pre);
+                     printf ("  post         %d\n", post);
+                     printf ("  got_nread    %d\n", got_nread);
+                     printf ("  ftell_nread  %ld\n", ftell_nread);
+                     abort ();
+                   }
+               }
+
+             /* if data[i].inp is a whole string to read and there's no post
+                whitespace then expect to have EOF */
+             if (post == 0 && data[i].want_nread == strlen(data[i].inp))
+               {
+                 int  c = getc(fp);
+                 if (c != EOF)
+                   {
+                     printf ("mpz_inp_str didn't read to EOF\n");
+                     printf ("  inp   \"%s\"\n", data[i].inp);
+                     printf ("  base  %d\n", data[i].base);
+                     printf ("  pre   %d\n", pre);
+                     printf ("  post  %d\n", post);
+                     printf ("  c     '%c' %#x\n", c, c);
+                     abort ();
+                   }
+               }
+
+             /* only expect "pre" included in the count when non-zero */
+             want_nread = data[i].want_nread;
+             if (want_nread != 0)
+               want_nread += pre;
+
+             if (got_nread != want_nread)
+               {
+                 printf ("mpz_inp_str nread wrong\n");
+                 printf ("  inp         \"%s\"\n", data[i].inp);
+                 printf ("  base        %d\n", data[i].base);
+                 printf ("  pre         %d\n", pre);
+                 printf ("  post        %d\n", post);
+                 printf ("  got_nread   %d\n", got_nread);
+                 printf ("  want_nread  %d\n", want_nread);
+                 abort ();
+               }
+
+             MPZ_CHECK_FORMAT (got);
+
+             if (mpz_cmp (got, want) != 0)
+               {
+                 printf ("mpz_inp_str wrong result\n");
+                 printf ("  inp   \"%s\"\n", data[i].inp);
+                 printf ("  base  %d\n", data[i].base);
+                 mpz_trace ("  got ",  got);
+                 mpz_trace ("  want", want);
+                 abort ();
+               }
+
+             ASSERT_ALWAYS (fclose (fp) == 0);
+           }
+       }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  unlink (FILENAME);
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-invert.c b/tests/mpz/t-invert.c

new file mode 100644 (file)

index 0000000..842310d
--- /dev/null
+++ b/tests/mpz/t-invert.c
@@ -0,0 +1,120 @@
+/* Test mpz_invert.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,
+2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpz_t a, m, ainv, t;
+  int test, r;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  int reps = 1000;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (a);
+  mpz_init (m);
+  mpz_init (ainv);
+  mpz_init (t);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (a, rands, mpz_get_ui (bs));
+      do {
+       mpz_urandomb (bs, rands, size_range);
+       mpz_rrandomb (m, rands, mpz_get_ui (bs));
+      } while (mpz_sgn (m) == 0);
+
+      mpz_urandomb (bs, rands, 8);
+      bsi = mpz_get_ui (bs);
+
+      if ((bsi & 1) != 0)
+       mpz_neg (a, a);
+      if ((bsi & 2) != 0)
+       mpz_neg (m, m);
+
+      r = mpz_invert (ainv, a, m);
+      if (r != 0)
+       {
+         MPZ_CHECK_FORMAT (ainv);
+
+         if (mpz_cmp_ui (ainv, 0) <= 0 || mpz_cmpabs (ainv, m) >= 0)
+           {
+             fprintf (stderr, "ERROR in test %d\n", test);
+             gmp_fprintf (stderr, "Inverse out of range.\n");
+             gmp_fprintf (stderr, "a = %Zx\n", a);
+             gmp_fprintf (stderr, "m = %Zx\n", m);
+             abort ();
+           }
+
+         mpz_mul (t, ainv, a);
+         mpz_mod (t, t, m);
+
+         if (mpz_cmp_ui (t, 1) != 0)
+           {
+             fprintf (stderr, "ERROR in test %d\n", test);
+             gmp_fprintf (stderr, "a^(-1)*a != 1 (mod m)\n");
+             gmp_fprintf (stderr, "a = %Zx\n", a);
+             gmp_fprintf (stderr, "m = %Zx\n", m);
+             abort ();
+           }
+       }
+      else /* Inverse deos not exist */
+       {
+         if (mpz_cmpabs_ui (m, 1) <= 0)
+           continue; /* OK */
+
+         mpz_gcd (t, a, m);
+         if (mpz_cmp_ui (t, 1) == 0)
+           {
+             fprintf (stderr, "ERROR in test %d\n", test);
+             gmp_fprintf (stderr, "Inverse exists, but was not found.\n");
+             gmp_fprintf (stderr, "a = %Zx\n", a);
+             gmp_fprintf (stderr, "m = %Zx\n", m);
+             abort ();
+           }
+       }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (a);
+  mpz_clear (m);
+  mpz_clear (ainv);
+  mpz_clear (t);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-io_raw.c b/tests/mpz/t-io_raw.c

new file mode 100644 (file)

index 0000000..433d7d6
--- /dev/null
+++ b/tests/mpz/t-io_raw.c
@@ -0,0 +1,287 @@
+/* Test mpz_inp_raw and mpz_out_raw.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME  "t-io_raw.tmp"
+
+
+/* In the fopen, "b" selects binary mode on DOS systems, meaning no
+   conversion of '\n' to and from CRLF.  It's believed systems without such
+   nonsense will simply ignore the "b", but in case that's not so a plain
+   "w+" is attempted if "w+b" fails.  */
+
+FILE *
+fopen_wplusb_or_die (const char *filename)
+{
+  FILE  *fp;
+  fp = fopen (filename, "w+b");
+  if (fp == NULL)
+    fp = fopen (filename, "w+");
+
+  if (fp == NULL)
+    {
+      printf ("Cannot create file %s\n", filename);
+      abort ();
+    }
+  return fp;
+}
+
+/* use 0x80 to check nothing bad happens with sign extension etc */
+#define BYTEVAL(i)  (((i) + 1) | 0x80)
+
+void
+check_in (void)
+{
+  int        i, j, zeros, neg, error = 0;
+  mpz_t      want, got;
+  size_t     want_ret, got_ret;
+  mp_size_t  size;
+  FILE       *fp;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  for (i = 0; i < 32; i++)
+    {
+      for (zeros = 0; zeros < 8; zeros++)
+       {
+         for (neg = 0; neg <= 1; neg++)
+           {
+             want_ret = i + zeros + 4;
+
+             /* need this to get the twos complement right */
+             ASSERT_ALWAYS (sizeof (size) >= 4);
+
+             size = i + zeros;
+             if (neg)
+               size = -size;
+
+             fp = fopen_wplusb_or_die (FILENAME);
+             for (j = 3; j >= 0; j--)
+               ASSERT_ALWAYS (putc ((size >> (j*8)) & 0xFF, fp) != EOF);
+             for (j = 0; j < zeros; j++)
+               ASSERT_ALWAYS (putc ('\0', fp) != EOF);
+             for (j = 0; j < i; j++)
+               ASSERT_ALWAYS (putc (BYTEVAL (j), fp) != EOF);
+             /* and some trailing garbage */
+             ASSERT_ALWAYS (putc ('x', fp) != EOF);
+             ASSERT_ALWAYS (putc ('y', fp) != EOF);
+             ASSERT_ALWAYS (putc ('z', fp) != EOF);
+             ASSERT_ALWAYS (fflush (fp) == 0);
+             rewind (fp);
+
+             got_ret = mpz_inp_raw (got, fp);
+             ASSERT_ALWAYS (! ferror(fp));
+             ASSERT_ALWAYS (fclose (fp) == 0);
+
+             MPZ_CHECK_FORMAT (got);
+
+             if (got_ret != want_ret)
+               {
+                 printf ("check_in: return value wrong\n");
+                 error = 1;
+               }
+             if (mpz_cmp (got, want) != 0)
+               {
+                 printf ("check_in: result wrong\n");
+                 error = 1;
+               }
+             if (error)
+               {
+                 printf    ("  i=%d zeros=%d neg=%d\n", i, zeros, neg);
+                 printf    ("  got_ret  %lu\n", (unsigned long) got_ret);
+                 printf    ("  want_ret %lu\n", (unsigned long) want_ret);
+                 mpz_trace ("  got      ", got);
+                 mpz_trace ("  want     ", want);
+                 abort ();
+               }
+
+             mpz_neg (want, want);
+           }
+       }
+      mpz_mul_2exp (want, want, 8);
+      mpz_add_ui (want, want, (unsigned long) BYTEVAL (i));
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+
+void
+check_out (void)
+{
+  int        i, j, neg, error = 0;
+  mpz_t      z;
+  char       want[256], got[256], *p;
+  size_t     want_len, got_ret, got_read;
+  mp_size_t  size;
+  FILE       *fp;
+
+  mpz_init (z);
+
+  for (i = 0; i < 32; i++)
+    {
+      for (neg = 0; neg <= 1; neg++)
+       {
+         want_len = i + 4;
+
+         /* need this to get the twos complement right */
+         ASSERT_ALWAYS (sizeof (size) >= 4);
+
+         size = i;
+         if (neg)
+           size = -size;
+
+         p = want;
+         for (j = 3; j >= 0; j--)
+           *p++ = size >> (j*8);
+         for (j = 0; j < i; j++)
+           *p++ = BYTEVAL (j);
+         ASSERT_ALWAYS (p <= want + sizeof (want));
+
+         fp = fopen_wplusb_or_die (FILENAME);
+         got_ret = mpz_out_raw (fp, z);
+         ASSERT_ALWAYS (fflush (fp) == 0);
+         rewind (fp);
+         got_read = fread (got, 1, sizeof(got), fp);
+         ASSERT_ALWAYS (! ferror(fp));
+         ASSERT_ALWAYS (fclose (fp) == 0);
+
+         if (got_ret != want_len)
+           {
+             printf ("check_out: wrong return value\n");
+             error = 1;
+           }
+         if (got_read != want_len)
+           {
+             printf ("check_out: wrong number of bytes read back\n");
+             error = 1;
+           }
+         if (memcmp (want, got, want_len) != 0)
+           {
+             printf ("check_out: wrong data\n");
+             error = 1;
+           }
+         if (error)
+           {
+             printf    ("  i=%d neg=%d\n", i, neg);
+             mpz_trace ("  z", z);
+             printf    ("  got_ret  %lu\n", (unsigned long) got_ret);
+             printf    ("  got_read %lu\n", (unsigned long) got_read);
+             printf    ("  want_len %lu\n", (unsigned long) want_len);
+             printf    ("  want");
+             for (j = 0; j < want_len; j++)
+               printf (" %02X", (unsigned) (unsigned char) want[j]);
+             printf    ("\n");
+             printf    ("  got ");
+             for (j = 0; j < want_len; j++)
+               printf (" %02X", (unsigned) (unsigned char) got[j]);
+             printf    ("\n");
+             abort ();
+           }
+
+         mpz_neg (z, z);
+       }
+      mpz_mul_2exp (z, z, 8);
+      mpz_add_ui (z, z, (unsigned long) BYTEVAL (i));
+    }
+
+  mpz_clear (z);
+}
+
+
+void
+check_rand (void)
+{
+  gmp_randstate_ptr  rands = RANDS;
+  int        i, error = 0;
+  mpz_t      got, want;
+  size_t     inp_ret, out_ret;
+  FILE       *fp;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  for (i = 0; i < 500; i++)
+    {
+      mpz_erandomb (want, rands, 10*GMP_LIMB_BITS);
+      mpz_negrandom (want, rands);
+
+      fp = fopen_wplusb_or_die (FILENAME);
+      out_ret = mpz_out_raw (fp, want);
+      ASSERT_ALWAYS (fflush (fp) == 0);
+      rewind (fp);
+      inp_ret = mpz_inp_raw (got, fp);
+      ASSERT_ALWAYS (fclose (fp) == 0);
+
+      MPZ_CHECK_FORMAT (got);
+
+      if (inp_ret != out_ret)
+       {
+         printf ("check_rand: different inp/out return values\n");
+         error = 1;
+       }
+      if (mpz_cmp (got, want) != 0)
+       {
+         printf ("check_rand: wrong result\n");
+         error = 1;
+       }
+      if (error)
+       {
+         printf    ("  out_ret %lu\n", (unsigned long) out_ret);
+         printf    ("  inp_ret %lu\n", (unsigned long) inp_ret);
+         mpz_trace ("  want", want);
+         mpz_trace ("  got ", got);
+         abort ();
+       }
+    }
+
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_in ();
+  check_out ();
+  check_rand ();
+
+  unlink (FILENAME);
+  tests_end ();
+
+  exit (0);
+}
diff --git a/tests/mpz/t-jac.c b/tests/mpz/t-jac.c

new file mode 100644 (file)

index 0000000..1b3e092
--- /dev/null
+++ b/tests/mpz/t-jac.c
@@ -0,0 +1,747 @@
+/* Exercise mpz_*_kronecker_*() and mpz_jacobi() functions.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* With no arguments the various Kronecker/Jacobi symbol routines are
+   checked against some test data and a lot of derived data.
+
+   To check the test data against PARI-GP, run
+
+          t-jac -p | gp -q
+
+   It takes a while because the output from "t-jac -p" is big.
+
+
+   Enhancements:
+
+   More big test cases than those given by check_squares_zi would be good.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#ifdef _LONG_LONG_LIMB
+#define LL(l,ll)  ll
+#else
+#define LL(l,ll)  l
+#endif
+
+
+int option_pari = 0;
+
+
+unsigned long
+mpz_mod4 (mpz_srcptr z)
+{
+  mpz_t          m;
+  unsigned long  ret;
+
+  mpz_init (m);
+  mpz_fdiv_r_2exp (m, z, 2);
+  ret = mpz_get_ui (m);
+  mpz_clear (m);
+  return ret;
+}
+
+int
+mpz_fits_ulimb_p (mpz_srcptr z)
+{
+  return (SIZ(z) == 1 || SIZ(z) == 0);
+}
+
+mp_limb_t
+mpz_get_ulimb (mpz_srcptr z)
+{
+  if (SIZ(z) == 0)
+    return 0;
+  else
+    return PTR(z)[0];
+}
+
+
+void
+try_base (mp_limb_t a, mp_limb_t b, int answer)
+{
+  int  got;
+
+  if ((b & 1) == 0 || b == 1 || a > b)
+    return;
+
+  got = mpn_jacobi_base (a, b, 0);
+  if (got != answer)
+    {
+      printf (LL("mpn_jacobi_base (%lu, %lu) is %d should be %d\n",
+                "mpn_jacobi_base (%llu, %llu) is %d should be %d\n"),
+             a, b, got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_zi_ui (mpz_srcptr a, unsigned long b, int answer)
+{
+  int  got;
+
+  got = mpz_kronecker_ui (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_kronecker_ui (");
+      mpz_out_str (stdout, 10, a);
+      printf (", %lu) is %d should be %d\n", b, got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_zi_si (mpz_srcptr a, long b, int answer)
+{
+  int  got;
+
+  got = mpz_kronecker_si (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_kronecker_si (");
+      mpz_out_str (stdout, 10, a);
+      printf (", %ld) is %d should be %d\n", b, got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_ui_zi (unsigned long a, mpz_srcptr b, int answer)
+{
+  int  got;
+
+  got = mpz_ui_kronecker (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_ui_kronecker (%lu, ", a);
+      mpz_out_str (stdout, 10, b);
+      printf (") is %d should be %d\n", got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_si_zi (long a, mpz_srcptr b, int answer)
+{
+  int  got;
+
+  got = mpz_si_kronecker (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_si_kronecker (%ld, ", a);
+      mpz_out_str (stdout, 10, b);
+      printf (") is %d should be %d\n", got, answer);
+      abort ();
+    }
+}
+
+
+/* Don't bother checking mpz_jacobi, since it only differs for b even, and
+   we don't have an actual expected answer for it.  tests/devel/try.c does
+   some checks though.  */
+void
+try_zi_zi (mpz_srcptr a, mpz_srcptr b, int answer)
+{
+  int  got;
+
+  got = mpz_kronecker (a, b);
+  if (got != answer)
+    {
+      printf ("mpz_kronecker (");
+      mpz_out_str (stdout, 10, a);
+      printf (", ");
+      mpz_out_str (stdout, 10, b);
+      printf (") is %d should be %d\n", got, answer);
+      abort ();
+    }
+}
+
+
+void
+try_pari (mpz_srcptr a, mpz_srcptr b, int answer)
+{
+  printf ("try(");
+  mpz_out_str (stdout, 10, a);
+  printf (",");
+  mpz_out_str (stdout, 10, b);
+  printf (",%d)\n", answer);
+}
+
+
+void
+try_each (mpz_srcptr a, mpz_srcptr b, int answer)
+{
+  if (option_pari)
+    {
+      try_pari (a, b, answer);
+      return;
+    }
+
+  if (mpz_fits_ulimb_p (a) && mpz_fits_ulimb_p (b))
+    try_base (mpz_get_ulimb (a), mpz_get_ulimb (b), answer);
+
+  if (mpz_fits_ulong_p (b))
+    try_zi_ui (a, mpz_get_ui (b), answer);
+
+  if (mpz_fits_slong_p (b))
+    try_zi_si (a, mpz_get_si (b), answer);
+
+  if (mpz_fits_ulong_p (a))
+    try_ui_zi (mpz_get_ui (a), b, answer);
+
+  if (mpz_fits_sint_p (a))
+    try_si_zi (mpz_get_si (a), b, answer);
+
+  try_zi_zi (a, b, answer);
+}
+
+
+/* Try (a/b) and (a/-b). */
+void
+try_pn (mpz_srcptr a, mpz_srcptr b_orig, int answer)
+{
+  mpz_t  b;
+
+  mpz_init_set (b, b_orig);
+  try_each (a, b, answer);
+
+  mpz_neg (b, b);
+  if (mpz_sgn (a) < 0)
+    answer = -answer;
+
+  try_each (a, b, answer);
+
+  mpz_clear (b);
+}
+
+
+/* Try (a+k*p/b) for various k, using the fact (a/b) is periodic in a with
+   period p.  For b>0, p=b if b!=2mod4 or p=4*b if b==2mod4. */
+
+void
+try_periodic_num (mpz_srcptr a_orig, mpz_srcptr b, int answer)
+{
+  mpz_t  a, a_period;
+  int    i;
+
+  if (mpz_sgn (b) <= 0)
+    return;
+
+  mpz_init_set (a, a_orig);
+  mpz_init_set (a_period, b);
+  if (mpz_mod4 (b) == 2)
+    mpz_mul_ui (a_period, a_period, 4);
+
+  /* don't bother with these tests if they're only going to produce
+     even/even */
+  if (mpz_even_p (a) && mpz_even_p (b) && mpz_even_p (a_period))
+    goto done;
+
+  for (i = 0; i < 6; i++)
+    {
+      mpz_add (a, a, a_period);
+      try_pn (a, b, answer);
+    }
+
+  mpz_set (a, a_orig);
+  for (i = 0; i < 6; i++)
+    {
+      mpz_sub (a, a, a_period);
+      try_pn (a, b, answer);
+    }
+
+ done:
+  mpz_clear (a);
+  mpz_clear (a_period);
+}
+
+
+/* Try (a/b+k*p) for various k, using the fact (a/b) is periodic in b of
+   period p.
+
+                              period p
+          a==0,1mod4             a
+          a==2mod4              4*a
+          a==3mod4 and b odd    4*a
+          a==3mod4 and b even   8*a
+
+   In Henri Cohen's book the period is given as 4*a for all a==2,3mod4, but
+   a counterexample would seem to be (3/2)=-1 which with (3/14)=+1 doesn't
+   have period 4*a (but rather 8*a with (3/26)=-1).  Maybe the plain 4*a is
+   to be read as applying to a plain Jacobi symbol with b odd, rather than
+   the Kronecker extension to b even. */
+
+void
+try_periodic_den (mpz_srcptr a, mpz_srcptr b_orig, int answer)
+{
+  mpz_t  b, b_period;
+  int    i;
+
+  if (mpz_sgn (a) == 0 || mpz_sgn (b_orig) == 0)
+    return;
+
+  mpz_init_set (b, b_orig);
+
+  mpz_init_set (b_period, a);
+  if (mpz_mod4 (a) == 3 && mpz_even_p (b))
+    mpz_mul_ui (b_period, b_period, 8L);
+  else if (mpz_mod4 (a) >= 2)
+    mpz_mul_ui (b_period, b_period, 4L);
+
+  /* don't bother with these tests if they're only going to produce
+     even/even */
+  if (mpz_even_p (a) && mpz_even_p (b) && mpz_even_p (b_period))
+    goto done;
+
+  for (i = 0; i < 6; i++)
+    {
+      mpz_add (b, b, b_period);
+      try_pn (a, b, answer);
+    }
+
+  mpz_set (b, b_orig);
+  for (i = 0; i < 6; i++)
+    {
+      mpz_sub (b, b, b_period);
+      try_pn (a, b, answer);
+    }
+
+ done:
+  mpz_clear (b);
+  mpz_clear (b_period);
+}
+
+
+static const unsigned long  ktable[] = {
+  0, 1, 2, 3, 4, 5, 6, 7,
+  GMP_NUMB_BITS-1, GMP_NUMB_BITS, GMP_NUMB_BITS+1,
+  2*GMP_NUMB_BITS-1, 2*GMP_NUMB_BITS, 2*GMP_NUMB_BITS+1,
+  3*GMP_NUMB_BITS-1, 3*GMP_NUMB_BITS, 3*GMP_NUMB_BITS+1
+};
+
+
+/* Try (a/b*2^k) for various k. */
+void
+try_2den (mpz_srcptr a, mpz_srcptr b_orig, int answer)
+{
+  mpz_t  b;
+  int    kindex;
+  int    answer_a2, answer_k;
+  unsigned long k;
+
+  /* don't bother when b==0 */
+  if (mpz_sgn (b_orig) == 0)
+    return;
+
+  mpz_init_set (b, b_orig);
+
+  /* (a/2) is 0 if a even, 1 if a==1 or 7 mod 8, -1 if a==3 or 5 mod 8 */
+  answer_a2 = (mpz_even_p (a) ? 0
+              : (((SIZ(a) >= 0 ? PTR(a)[0] : -PTR(a)[0]) + 2) & 7) < 4 ? 1
+              : -1);
+
+  for (kindex = 0; kindex < numberof (ktable); kindex++)
+    {
+      k = ktable[kindex];
+
+      /* answer_k = answer*(answer_a2^k) */
+      answer_k = (answer_a2 == 0 && k != 0 ? 0
+                 : (k & 1) == 1 && answer_a2 == -1 ? -answer
+                 : answer);
+
+      mpz_mul_2exp (b, b_orig, k);
+      try_pn (a, b, answer_k);
+    }
+
+  mpz_clear (b);
+}
+
+
+/* Try (a*2^k/b) for various k.  If it happens mpz_ui_kronecker() gets (2/b)
+   wrong it will show up as wrong answers demanded. */
+void
+try_2num (mpz_srcptr a_orig, mpz_srcptr b, int answer)
+{
+  mpz_t  a;
+  int    kindex;
+  int    answer_2b, answer_k;
+  unsigned long  k;
+
+  /* don't bother when a==0 */
+  if (mpz_sgn (a_orig) == 0)
+    return;
+
+  mpz_init (a);
+
+  /* (2/b) is 0 if b even, 1 if b==1 or 7 mod 8, -1 if b==3 or 5 mod 8 */
+  answer_2b = (mpz_even_p (b) ? 0
+              : (((SIZ(b) >= 0 ? PTR(b)[0] : -PTR(b)[0]) + 2) & 7) < 4 ? 1
+              : -1);
+
+  for (kindex = 0; kindex < numberof (ktable); kindex++)
+    {
+      k = ktable[kindex];
+
+      /* answer_k = answer*(answer_2b^k) */
+      answer_k = (answer_2b == 0 && k != 0 ? 0
+                 : (k & 1) == 1 && answer_2b == -1 ? -answer
+                 : answer);
+
+       mpz_mul_2exp (a, a_orig, k);
+      try_pn (a, b, answer_k);
+    }
+
+  mpz_clear (a);
+}
+
+
+/* The try_2num() and try_2den() routines don't in turn call
+   try_periodic_num() and try_periodic_den() because it hugely increases the
+   number of tests performed, without obviously increasing coverage.
+
+   Useful extra derived cases can be added here. */
+
+void
+try_all (mpz_t a, mpz_t b, int answer)
+{
+  try_pn (a, b, answer);
+  try_periodic_num (a, b, answer);
+  try_periodic_den (a, b, answer);
+  try_2num (a, b, answer);
+  try_2den (a, b, answer);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *a;
+    const char  *b;
+    int         answer;
+
+  } data[] = {
+
+    /* Note that the various derived checks in try_all() reduce the cases
+       that need to be given here.  */
+
+    /* some zeros */
+    {  "0",  "0", 0 },
+    {  "0",  "2", 0 },
+    {  "0",  "6", 0 },
+    {  "5",  "0", 0 },
+    { "24", "60", 0 },
+
+    /* (a/1) = 1, any a
+       In particular note (0/1)=1 so that (a/b)=(a mod b/b). */
+    { "0", "1", 1 },
+    { "1", "1", 1 },
+    { "2", "1", 1 },
+    { "3", "1", 1 },
+    { "4", "1", 1 },
+    { "5", "1", 1 },
+
+    /* (0/b) = 0, b != 1 */
+    { "0",  "3", 0 },
+    { "0",  "5", 0 },
+    { "0",  "7", 0 },
+    { "0",  "9", 0 },
+    { "0", "11", 0 },
+    { "0", "13", 0 },
+    { "0", "15", 0 },
+
+    /* (1/b) = 1 */
+    { "1",  "1", 1 },
+    { "1",  "3", 1 },
+    { "1",  "5", 1 },
+    { "1",  "7", 1 },
+    { "1",  "9", 1 },
+    { "1", "11", 1 },
+
+    /* (-1/b) = (-1)^((b-1)/2) which is -1 for b==3 mod 4 */
+    { "-1",  "1",  1 },
+    { "-1",  "3", -1 },
+    { "-1",  "5",  1 },
+    { "-1",  "7", -1 },
+    { "-1",  "9",  1 },
+    { "-1", "11", -1 },
+    { "-1", "13",  1 },
+    { "-1", "15", -1 },
+    { "-1", "17",  1 },
+    { "-1", "19", -1 },
+
+    /* (2/b) = (-1)^((b^2-1)/8) which is -1 for b==3,5 mod 8.
+       try_2num() will exercise multiple powers of 2 in the numerator.  */
+    { "2",  "1",  1 },
+    { "2",  "3", -1 },
+    { "2",  "5", -1 },
+    { "2",  "7",  1 },
+    { "2",  "9",  1 },
+    { "2", "11", -1 },
+    { "2", "13", -1 },
+    { "2", "15",  1 },
+    { "2", "17",  1 },
+
+    /* (-2/b) = (-1)^((b^2-1)/8)*(-1)^((b-1)/2) which is -1 for b==5,7mod8.
+       try_2num() will exercise multiple powers of 2 in the numerator, which
+       will test that the shift in mpz_si_kronecker() uses unsigned not
+       signed.  */
+    { "-2",  "1",  1 },
+    { "-2",  "3",  1 },
+    { "-2",  "5", -1 },
+    { "-2",  "7", -1 },
+    { "-2",  "9",  1 },
+    { "-2", "11",  1 },
+    { "-2", "13", -1 },
+    { "-2", "15", -1 },
+    { "-2", "17",  1 },
+
+    /* (a/2)=(2/a).
+       try_2den() will exercise multiple powers of 2 in the denominator. */
+    {  "3",  "2", -1 },
+    {  "5",  "2", -1 },
+    {  "7",  "2",  1 },
+    {  "9",  "2",  1 },
+    {  "11", "2", -1 },
+
+    /* Harriet Griffin, "Elementary Theory of Numbers", page 155, various
+       examples.  */
+    {   "2", "135",  1 },
+    { "135",  "19", -1 },
+    {   "2",  "19", -1 },
+    {  "19", "135",  1 },
+    { "173", "135",  1 },
+    {  "38", "135",  1 },
+    { "135", "173",  1 },
+    { "173",   "5", -1 },
+    {   "3",   "5", -1 },
+    {   "5", "173", -1 },
+    { "173",   "3", -1 },
+    {   "2",   "3", -1 },
+    {   "3", "173", -1 },
+    { "253",  "21",  1 },
+    {   "1",  "21",  1 },
+    {  "21", "253",  1 },
+    {  "21",  "11", -1 },
+    {  "-1",  "11", -1 },
+
+    /* Griffin page 147 */
+    {  "-1",  "17",  1 },
+    {   "2",  "17",  1 },
+    {  "-2",  "17",  1 },
+    {  "-1",  "89",  1 },
+    {   "2",  "89",  1 },
+
+    /* Griffin page 148 */
+    {  "89",  "11",  1 },
+    {   "1",  "11",  1 },
+    {  "89",   "3", -1 },
+    {   "2",   "3", -1 },
+    {   "3",  "89", -1 },
+    {  "11",  "89",  1 },
+    {  "33",  "89", -1 },
+
+    /* H. Davenport, "The Higher Arithmetic", page 65, the quadratic
+       residues and non-residues mod 19.  */
+    {  "1", "19",  1 },
+    {  "4", "19",  1 },
+    {  "5", "19",  1 },
+    {  "6", "19",  1 },
+    {  "7", "19",  1 },
+    {  "9", "19",  1 },
+    { "11", "19",  1 },
+    { "16", "19",  1 },
+    { "17", "19",  1 },
+    {  "2", "19", -1 },
+    {  "3", "19", -1 },
+    {  "8", "19", -1 },
+    { "10", "19", -1 },
+    { "12", "19", -1 },
+    { "13", "19", -1 },
+    { "14", "19", -1 },
+    { "15", "19", -1 },
+    { "18", "19", -1 },
+
+    /* Residues and non-residues mod 13 */
+    {  "0",  "13",  0 },
+    {  "1",  "13",  1 },
+    {  "2",  "13", -1 },
+    {  "3",  "13",  1 },
+    {  "4",  "13",  1 },
+    {  "5",  "13", -1 },
+    {  "6",  "13", -1 },
+    {  "7",  "13", -1 },
+    {  "8",  "13", -1 },
+    {  "9",  "13",  1 },
+    { "10",  "13",  1 },
+    { "11",  "13", -1 },
+    { "12",  "13",  1 },
+
+    /* various */
+    {  "5",   "7", -1 },
+    { "15",  "17",  1 },
+    { "67",  "89",  1 },
+
+    /* special values inducing a==b==1 at the end of jac_or_kron() */
+    { "0x10000000000000000000000000000000000000000000000001",
+      "0x10000000000000000000000000000000000000000000000003", 1 },
+  };
+
+  int    i;
+  mpz_t  a, b;
+
+  mpz_init (a);
+  mpz_init (b);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (a, data[i].a, 0);
+      mpz_set_str_or_abort (b, data[i].b, 0);
+      try_all (a, b, data[i].answer);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+}
+
+
+/* (a^2/b)=1 if gcd(a,b)=1, or (a^2/b)=0 if gcd(a,b)!=1.
+   This includes when a=0 or b=0. */
+void
+check_squares_zi (void)
+{
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t  a, b, g;
+  int    i, answer;
+  mp_size_t size_range, an, bn;
+  mpz_t bs;
+
+  mpz_init (bs);
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (g);
+
+  for (i = 0; i < 50; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      an = mpz_get_ui (bs);
+      mpz_rrandomb (a, rands, an);
+
+      mpz_urandomb (bs, rands, size_range);
+      bn = mpz_get_ui (bs);
+      mpz_rrandomb (b, rands, bn);
+
+      mpz_gcd (g, a, b);
+      if (mpz_cmp_ui (g, 1L) == 0)
+       answer = 1;
+      else
+       answer = 0;
+
+      mpz_mul (a, a, a);
+
+      try_all (a, b, answer);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (g);
+}
+
+
+/* Check the handling of asize==0, make sure it isn't affected by the low
+   limb. */
+void
+check_a_zero (void)
+{
+  mpz_t  a, b;
+
+  mpz_init_set_ui (a, 0);
+  mpz_init (b);
+
+  mpz_set_ui (b, 1L);
+  PTR(a)[0] = 0;
+  try_all (a, b, 1);   /* (0/1)=1 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 1);   /* (0/1)=1 */
+
+  mpz_set_si (b, -1L);
+  PTR(a)[0] = 0;
+  try_all (a, b, 1);   /* (0/-1)=1 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 1);   /* (0/-1)=1 */
+
+  mpz_set_ui (b, 0);
+  PTR(a)[0] = 0;
+  try_all (a, b, 0);   /* (0/0)=0 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 0);   /* (0/0)=0 */
+
+  mpz_set_ui (b, 2);
+  PTR(a)[0] = 0;
+  try_all (a, b, 0);   /* (0/2)=0 */
+  PTR(a)[0] = 1;
+  try_all (a, b, 0);   /* (0/2)=0 */
+
+  mpz_clear (a);
+  mpz_clear (b);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  if (argc >= 2 && strcmp (argv[1], "-p") == 0)
+    {
+      option_pari = 1;
+
+      printf ("\
+try(a,b,answer) =\n\
+{\n\
+  if (kronecker(a,b) != answer,\n\
+    print(\"wrong at \", a, \",\", b,\n\
+      \" expected \", answer,\n\
+      \" pari says \", kronecker(a,b)))\n\
+}\n");
+    }
+
+  check_data ();
+  check_squares_zi ();
+  check_a_zero ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-lcm.c b/tests/mpz/t-lcm.c

new file mode 100644 (file)

index 0000000..195f0ee
--- /dev/null
+++ b/tests/mpz/t-lcm.c
@@ -0,0 +1,178 @@
+/* Test mpz_lcm and mpz_lcm_ui.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_all (mpz_ptr want, mpz_srcptr x_orig, mpz_srcptr y_orig)
+{
+  mpz_t  got, x, y;
+  int    negx, negy, swap, inplace;
+
+  mpz_init (got);
+  mpz_init_set (x, x_orig);
+  mpz_init_set (y, y_orig);
+
+  for (swap = 0; swap < 2; swap++)
+    {
+      mpz_swap (x, y);
+
+      for (negx = 0; negx < 2; negx++)
+       {
+         mpz_neg (x, x);
+
+         for (negy = 0; negy < 2; negy++)
+           {
+             mpz_neg (y, y);
+
+             for (inplace = 0; inplace <= 1; inplace++)
+               {
+                 if (inplace)
+                   { mpz_set (got, x); mpz_lcm (got, got, y); }
+                 else
+                   mpz_lcm (got, x, y);
+                 MPZ_CHECK_FORMAT (got);
+
+                 if (mpz_cmp (got, want) != 0)
+                   {
+                     printf ("mpz_lcm wrong, inplace=%d\n", inplace);
+                   fail:
+                     mpz_trace ("x", x);
+                     mpz_trace ("y", y);
+                     mpz_trace ("got", got);
+                     mpz_trace ("want", want);
+                     abort ();
+                   }
+
+                 if (mpz_fits_ulong_p (y))
+                   {
+                     unsigned long  yu = mpz_get_ui (y);
+                     if (inplace)
+                       { mpz_set (got, x); mpz_lcm_ui (got, got, yu); }
+                     else
+                       mpz_lcm_ui (got, x, yu);
+
+                     if (mpz_cmp (got, want) != 0)
+                       {
+                         printf ("mpz_lcm_ui wrong, inplace=%d\n", inplace);
+                         printf    ("yu=%lu\n", yu);
+                         goto fail;
+                       }
+                   }
+               }
+           }
+       }
+    }
+
+  mpz_clear (got);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+void
+check_primes (void)
+{
+  static unsigned long  prime[] = {
+    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
+    101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,
+    191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,
+    281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,
+    389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,
+  };
+  mpz_t  want, x, y;
+  int    i;
+
+  mpz_init (want);
+  mpz_init (x);
+  mpz_init (y);
+
+  /* New prime each time. */
+  mpz_set_ui (want, 1L);
+  for (i = 0; i < numberof (prime); i++)
+    {
+      mpz_set (x, want);
+      mpz_set_ui (y, prime[i]);
+      mpz_mul_ui (want, want, prime[i]);
+      check_all (want, x, y);
+    }
+
+  /* Old prime each time. */
+  mpz_set (x, want);
+  for (i = 0; i < numberof (prime); i++)
+    {
+      mpz_set_ui (y, prime[i]);
+      check_all (want, x, y);
+    }
+
+  /* One old, one new each time. */
+  mpz_set_ui (want, prime[0]);
+  for (i = 1; i < numberof (prime); i++)
+    {
+      mpz_set (x, want);
+      mpz_set_ui (y, prime[i] * prime[i-1]);
+      mpz_mul_ui (want, want, prime[i]);
+      check_all (want, x, y);
+    }
+
+  /* Triplets with A,B in x and B,C in y. */
+  mpz_set_ui (want, 1L);
+  mpz_set_ui (x, 1L);
+  mpz_set_ui (y, 1L);
+  for (i = 0; i+2 < numberof (prime); i += 3)
+    {
+      mpz_mul_ui (want, want, prime[i]);
+      mpz_mul_ui (want, want, prime[i+1]);
+      mpz_mul_ui (want, want, prime[i+2]);
+
+      mpz_mul_ui (x, x, prime[i]);
+      mpz_mul_ui (x, x, prime[i+1]);
+
+      mpz_mul_ui (y, y, prime[i+1]);
+      mpz_mul_ui (y, y, prime[i+2]);
+
+      check_all (want, x, y);
+    }
+
+
+  mpz_clear (want);
+  mpz_clear (x);
+  mpz_clear (y);
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_primes ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-lucnum_ui.c b/tests/mpz/t-lucnum_ui.c

new file mode 100644 (file)

index 0000000..814034f
--- /dev/null
+++ b/tests/mpz/t-lucnum_ui.c
@@ -0,0 +1,97 @@
+/* Test mpz_lucnum_ui and mpz_lucnum2_ui.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-lucnum_ui [n]
+
+   Test up to L[n], or if n is omitted then the default limit below.  A
+   literal "x" for the limit means continue forever, this being meant only
+   for development.  */
+
+
+void
+check_sequence (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 100 * GMP_LIMB_BITS;
+  mpz_t          want_ln, want_ln1, got_ln, got_ln1;
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else if (argc > 1)
+    limit = atoi (argv[1]);
+
+  /* start at n==0 */
+  mpz_init_set_si (want_ln1, -1); /* L[-1] */
+  mpz_init_set_ui (want_ln,  2);  /* L[0]   */
+  mpz_init (got_ln);
+  mpz_init (got_ln1);
+
+  for (n = 0; n < limit; n++)
+    {
+      mpz_lucnum2_ui (got_ln, got_ln1, n);
+      MPZ_CHECK_FORMAT (got_ln);
+      MPZ_CHECK_FORMAT (got_ln1);
+      if (mpz_cmp (got_ln, want_ln) != 0 || mpz_cmp (got_ln1, want_ln1) != 0)
+        {
+          printf ("mpz_lucnum2_ui(%lu) wrong\n", n);
+          mpz_trace ("want ln ", want_ln);
+          mpz_trace ("got  ln ",  got_ln);
+          mpz_trace ("want ln1", want_ln1);
+          mpz_trace ("got  ln1",  got_ln1);
+          abort ();
+        }
+
+      mpz_lucnum_ui (got_ln, n);
+      MPZ_CHECK_FORMAT (got_ln);
+      if (mpz_cmp (got_ln, want_ln) != 0)
+        {
+          printf ("mpz_lucnum_ui(%lu) wrong\n", n);
+          mpz_trace ("want ln", want_ln);
+          mpz_trace ("got  ln", got_ln);
+          abort ();
+        }
+
+      mpz_add (want_ln1, want_ln1, want_ln);  /* L[n+1] = L[n] + L[n-1] */
+      mpz_swap (want_ln1, want_ln);
+    }
+
+  mpz_clear (want_ln);
+  mpz_clear (want_ln1);
+  mpz_clear (got_ln);
+  mpz_clear (got_ln1);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_sequence (argc, argv);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-mul.c b/tests/mpz/t-mul.c

new file mode 100644 (file)

index 0000000..fc718bb
--- /dev/null
+++ b/tests/mpz/t-mul.c
@@ -0,0 +1,219 @@
+/* Test mpz_cmp, mpz_mul.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+void debug_mp __GMP_PROTO ((mpz_t));
+static void refmpz_mul __GMP_PROTO ((mpz_t, const mpz_t, const mpz_t));
+void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t, mpz_t, mpz_t));
+
+#define FFT_MIN_BITSIZE 100000
+
+char *extra_fft;
+
+void
+one (int i, mpz_t multiplicand, mpz_t multiplier)
+{
+  mpz_t product, ref_product;
+
+  mpz_init (product);
+  mpz_init (ref_product);
+
+  /* Test plain multiplication comparing results against reference code.  */
+  mpz_mul (product, multiplier, multiplicand);
+  refmpz_mul (ref_product, multiplier, multiplicand);
+  if (mpz_cmp (product, ref_product))
+    dump_abort (i, "incorrect plain product",
+               multiplier, multiplicand, product, ref_product);
+
+  /* Test squaring, comparing results against plain multiplication  */
+  mpz_mul (product, multiplier, multiplier);
+  mpz_set (multiplicand, multiplier);
+  mpz_mul (ref_product, multiplier, multiplicand);
+  if (mpz_cmp (product, ref_product))
+    dump_abort (i, "incorrect square product",
+               multiplier, multiplier, product, ref_product);
+
+  mpz_clear (product);
+  mpz_clear (ref_product);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2;
+  int i;
+  int fft_max_2exp;
+
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range, fsize_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  extra_fft = getenv ("GMP_CHECK_FFT");
+  fft_max_2exp = 0;
+  if (extra_fft != NULL)
+    fft_max_2exp = atoi (extra_fft);
+
+  if (fft_max_2exp <= 1)       /* compat with old use of GMP_CHECK_FFT */
+    fft_max_2exp = 22;         /* default limit, good for any machine */
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+
+  fsize_range = 4 << 8;                /* a fraction 1/256 of size_range */
+  for (i = 0;; i++)
+    {
+      size_range = fsize_range >> 8;
+      fsize_range = fsize_range * 33 / 32;
+
+      if (size_range > fft_max_2exp)
+       break;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs));
+      if (i & 1)
+       mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+
+      mpz_urandomb (bs, rands, 4);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 0x3) == 0)
+       mpz_neg (op1, op1);
+      if ((bsi & 0xC) == 0)
+       mpz_neg (op2, op2);
+
+      /* printf ("%d %d\n", SIZ (op1), SIZ (op2)); */
+      one (i, op2, op1);
+    }
+
+  for (i = -50; i < 0; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % fft_max_2exp;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op1, rands, mpz_get_ui (bs) + FFT_MIN_BITSIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs) + FFT_MIN_BITSIZE);
+
+      /* printf ("%d: %d %d\n", i, SIZ (op1), SIZ (op2)); */
+      fflush (stdout);
+      one (-1, op2, op1);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+
+  tests_end ();
+  exit (0);
+}
+
+static void
+refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
+{
+  mp_size_t usize = u->_mp_size;
+  mp_size_t vsize = v->_mp_size;
+  mp_size_t wsize;
+  mp_size_t sign_product;
+  mp_ptr up, vp;
+  mp_ptr wp;
+  mp_size_t talloc;
+
+  sign_product = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+
+  if (usize == 0 || vsize == 0)
+    {
+      SIZ (w) = 0;
+      return;
+    }
+
+  talloc = usize + vsize;
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc);
+
+  if (usize > vsize)
+    refmpn_mul (wp, up, usize, vp, vsize);
+  else
+    refmpn_mul (wp, vp, vsize, up, usize);
+  wsize = usize + vsize;
+  wsize -= wp[wsize - 1] == 0;
+  MPZ_REALLOC (w, wsize);
+  MPN_COPY (PTR(w), wp, wsize);
+
+  SIZ(w) = sign_product < 0 ? -wsize : wsize;
+  __GMP_FREE_FUNC_LIMBS (wp, talloc);
+}
+
+void
+dump_abort (int i, char *s,
+            mpz_t op1, mpz_t op2, mpz_t product, mpz_t ref_product)
+{
+  mp_size_t b, e;
+  fprintf (stderr, "ERROR: %s in test %d\n", s, i);
+  fprintf (stderr, "op1          = "); debug_mp (op1);
+  fprintf (stderr, "op2          = "); debug_mp (op2);
+  fprintf (stderr, "    product  = "); debug_mp (product);
+  fprintf (stderr, "ref_product  = "); debug_mp (ref_product);
+  for (b = 0; b < ABSIZ(ref_product); b++)
+    if (PTR(ref_product)[b] != PTR(product)[b])
+      break;
+  for (e = ABSIZ(ref_product) - 1; e >= 0; e--)
+    if (PTR(ref_product)[e] != PTR(product)[e])
+      break;
+  printf ("ERRORS in %ld--%ld\n", b, e);
+  abort();
+}
+
+void
+debug_mp (mpz_t x)
+{
+  size_t siz = mpz_sizeinbase (x, 16);
+
+  if (siz > 65)
+    {
+      mpz_t q;
+      mpz_init (q);
+      mpz_tdiv_q_2exp (q, x, 4 * (mpz_sizeinbase (x, 16) - 25));
+      gmp_fprintf (stderr, "%ZX...", q);
+      mpz_tdiv_r_2exp (q, x, 4 * 25);
+      gmp_fprintf (stderr, "%025ZX [%d]\n", q, (int) siz);
+      mpz_clear (q);
+    }
+  else
+    {
+      gmp_fprintf (stderr, "%ZX\n", x);
+    }
+}
diff --git a/tests/mpz/t-mul_i.c b/tests/mpz/t-mul_i.c

new file mode 100644 (file)

index 0000000..bf95a15
--- /dev/null
+++ b/tests/mpz/t-mul_i.c
@@ -0,0 +1,135 @@
+/* Test mpz_mul_ui and mpz_mul_si.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+mpz_t got, want, x;
+
+void
+compare_si (long y)
+{
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf    ("mpz_mul_si wrong\n");
+      mpz_trace ("  x", x);
+      printf    ("  y=%ld (0x%lX)\n", y, y);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+}
+
+void
+compare_ui (unsigned long y)
+{
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf    ("mpz_mul_ui wrong\n");
+      mpz_trace ("  x", x);
+      printf    ("  y=%lu (0x%lX)\n", y, y);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+}
+
+void
+check_samples (void)
+{
+  {
+    long  y;
+
+    mpz_set_ui (x, 1L);
+    y = 0;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = 1;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = -1;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = LONG_MIN;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+
+    mpz_set_ui (x, 1L);
+    y = LONG_MAX;
+    mpz_mul_si (got, x, y);
+    mpz_set_si (want, y);
+    compare_si (y);
+  }
+
+  {
+    unsigned long y;
+
+    mpz_set_ui (x, 1L);
+    y = 0;
+    mpz_mul_ui (got, x, y);
+    mpz_set_ui (want, y);
+    compare_ui (y);
+
+    mpz_set_ui (x, 1L);
+    y = 1;
+    mpz_mul_ui (got, x, y);
+    mpz_set_ui (want, y);
+    compare_ui (y);
+
+    mpz_set_ui (x, 1L);
+    y = ULONG_MAX;
+    mpz_mul_ui (got, x, y);
+    mpz_set_ui (want, y);
+    compare_ui (y);
+  }
+}
+
+int
+main (int argc, char **argv)
+{
+  tests_start ();
+
+  mpz_init (x);
+  mpz_init (got);
+  mpz_init (want);
+
+  check_samples ();
+
+  mpz_clear (x);
+  mpz_clear (got);
+  mpz_clear (want);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-nextprime.c b/tests/mpz/t-nextprime.c

new file mode 100644 (file)

index 0000000..1734f61
--- /dev/null
+++ b/tests/mpz/t-nextprime.c
@@ -0,0 +1,222 @@
+/* Test mpz_nextprime.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+refmpz_nextprime (mpz_ptr p, mpz_srcptr t)
+{
+  mpz_add_ui (p, t, 1L);
+  while (! mpz_probab_prime_p (p, 10))
+    mpz_add_ui (p, p, 1L);
+}
+
+void
+run (char *start, int reps, char *end, short diffs[])
+{
+  mpz_t x, y;
+  int i;
+
+  mpz_init_set_str (x, start, 0);
+  mpz_init (y);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_nextprime (y, x);
+      mpz_sub (x, y, x);
+      if (diffs != NULL && diffs[i] != mpz_get_ui (x))
+       {
+         gmp_printf ("diff list discrepancy\n");
+         abort ();
+       }
+      mpz_set (x, y);
+    }
+
+  mpz_set_str (y, end, 0);
+
+  if (mpz_cmp (x, y) != 0)
+    {
+      gmp_printf ("got  %Zx\n", x);
+      gmp_printf ("want %Zx\n", y);
+      abort ();
+    }
+
+  mpz_clear (y);
+  mpz_clear (x);
+}
+
+extern short diff1[];
+extern short diff3[];
+extern short diff4[];
+extern short diff5[];
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int reps = 20;
+  gmp_randstate_ptr rands;
+  mpz_t bs, x, nxtp, ref_nxtp;
+  unsigned long size_range;
+
+  tests_start();
+  rands = RANDS;
+
+  run ("2", 1000, "0x1ef7", diff1);
+
+  run ("3", 1000 - 1, "0x1ef7", NULL);
+
+  run ("0x8a43866f5776ccd5b02186e90d28946aeb0ed914", 50,
+       "0x8a43866f5776ccd5b02186e90d28946aeb0eeec5", diff3);
+
+  run ("0x10000000000000000000000000000000000000", 50,
+       "0x100000000000000000000000000000000010ab", diff4);
+
+  run ("0x1c2c26be55317530311facb648ea06b359b969715db83292ab8cf898d8b1b", 50,
+       "0x1c2c26be55317530311facb648ea06b359b969715db83292ab8cf898da957", diff5);
+
+  mpz_init (bs);
+  mpz_init (x);
+  mpz_init (nxtp);
+  mpz_init (ref_nxtp);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 8 + 2; /* 0..1024 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_rrandomb (x, rands, mpz_get_ui (bs));
+
+/*      gmp_printf ("%ld: %Zd\n", mpz_sizeinbase (x, 2), x); */
+
+      mpz_nextprime (nxtp, x);
+      refmpz_nextprime (ref_nxtp, x);
+      if (mpz_cmp (nxtp, ref_nxtp) != 0)
+       abort ();
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x);
+  mpz_clear (nxtp);
+  mpz_clear (ref_nxtp);
+
+  tests_end ();
+  return 0;
+}
+
+short diff1[] =
+{
+  1,2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,
+  2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
+  2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,
+  2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
+  4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,
+  2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
+  12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,
+  2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8,
+  6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,
+  10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6,
+  6,14,4,6,6,8,6,12,4,6,2,10,2,6,10,2,
+  10,2,6,18,4,2,4,6,6,8,6,6,22,2,10,8,
+  10,6,6,8,12,4,6,6,2,6,12,10,18,2,4,6,
+  2,6,4,2,4,12,2,6,34,6,6,8,18,10,14,4,
+  2,4,6,8,4,2,6,12,10,2,4,2,4,6,12,12,
+  8,12,6,4,6,8,4,8,4,14,4,6,2,4,6,2,
+  6,10,20,6,4,2,24,4,2,10,12,2,10,8,6,6,
+  6,18,6,4,2,12,10,12,8,16,14,6,4,2,4,2,
+  10,12,6,6,18,2,16,2,22,6,8,6,4,2,4,8,
+  6,10,2,10,14,10,6,12,2,4,2,10,12,2,16,2,
+  6,4,2,10,8,18,24,4,6,8,16,2,4,8,16,2,
+  4,8,6,6,4,12,2,22,6,2,6,4,6,14,6,4,
+  2,6,4,6,12,6,6,14,4,6,12,8,6,4,26,18,
+  10,8,4,6,2,6,22,12,2,16,8,4,12,14,10,2,
+  4,8,6,6,4,2,4,6,8,4,2,6,10,2,10,8,
+  4,14,10,12,2,6,4,2,16,14,4,6,8,6,4,18,
+  8,10,6,6,8,10,12,14,4,6,6,2,28,2,10,8,
+  4,14,4,8,12,6,12,4,6,20,10,2,16,26,4,2,
+  12,6,4,12,6,8,4,8,22,2,4,2,12,28,2,6,
+  6,6,4,6,2,12,4,12,2,10,2,16,2,16,6,20,
+  16,8,4,2,4,2,22,8,12,6,10,2,4,6,2,6,
+  10,2,12,10,2,10,14,6,4,6,8,6,6,16,12,2,
+  4,14,6,4,8,10,8,6,6,22,6,2,10,14,4,6,
+  18,2,10,14,4,2,10,14,4,8,18,4,6,2,4,6,
+  2,12,4,20,22,12,2,4,6,6,2,6,22,2,6,16,
+  6,12,2,6,12,16,2,4,6,14,4,2,18,24,10,6,
+  2,10,2,10,2,10,6,2,10,2,10,6,8,30,10,2,
+  10,8,6,10,18,6,12,12,2,18,6,4,6,6,18,2,
+  10,14,6,4,2,4,24,2,12,6,16,8,6,6,18,16,
+  2,4,6,2,6,6,10,6,12,12,18,2,6,4,18,8,
+  24,4,2,4,6,2,12,4,14,30,10,6,12,14,6,10,
+  12,2,4,6,8,6,10,2,4,14,6,6,4,6,2,10,
+  2,16,12,8,18,4,6,12,2,6,6,6,28,6,14,4,
+  8,10,8,12,18,4,2,4,24,12,6,2,16,6,6,14,
+  10,14,4,30,6,6,6,8,6,4,2,12,6,4,2,6,
+  22,6,2,4,18,2,4,12,2,6,4,26,6,6,4,8,
+  10,32,16,2,6,4,2,4,2,10,14,6,4,8,10,6,
+  20,4,2,6,30,4,8,10,6,6,8,6,12,4,6,2,
+  6,4,6,2,10,2,16,6,20,4,12,14,28,6,20,4,
+  18,8,6,4,6,14,6,6,10,2,10,12,8,10,2,10,
+  8,12,10,24,2,4,8,6,4,8,18,10,6,6,2,6,
+  10,12,2,10,6,6,6,8,6,10,6,2,6,6,6,10,
+  8,24,6,22,2,18,4,8,10,30,8,18,4,2,10,6,
+  2,6,4,18,8,12,18,16,6,2,12,6,10,2,10,2,
+  6,10,14,4,24,2,16,2,10,2,10,20,4,2,4,8,
+  16,6,6,2,12,16,8,4,6,30,2,10,2,6,4,6,
+  6,8,6,4,12,6,8,12,4,14,12,10,24,6,12,6,
+  2,22,8,18,10,6,14,4,2,6,10,8,6,4,6,30,
+  14,10,2,12,10,2,16,2,18,24,18,6,16,18,6,2,
+  18,4,6,2,10,8,10,6,6,8,4,6,2,10,2,12,
+  4,6,6,2,12,4,14,18,4,6,20,4,8,6,4,8,
+  4,14,6,4,14,12,4,2,30,4,24,6,6,12,12,14,
+  6,4,2,4,18,6,12,8
+};
+
+short diff3[] =
+{
+  33,32,136,116,24,22,104,114,76,278,238,162,36,44,388,134,
+  130,26,312,42,138,28,24,80,138,108,270,12,330,130,98,102,
+  162,34,36,170,90,34,14,6,24,66,154,218,70,132,188,88,
+  80,82
+};
+
+short diff4[] =
+{
+  91,92,64,6,104,24,46,258,68,18,54,100,68,154,26,4,
+  38,142,168,42,18,26,286,104,136,116,40,2,28,110,52,78,
+  104,24,54,96,4,626,196,24,56,36,52,102,48,156,26,18,
+  42,40
+};
+
+short diff5[] =
+{
+  268,120,320,184,396,2,94,108,20,318,274,14,64,122,220,108,
+  18,174,6,24,348,32,64,116,268,162,20,156,28,110,52,428,
+  196,14,262,30,194,120,300,66,268,12,428,370,212,198,192,130,
+  30,80
+};
diff --git a/tests/mpz/t-oddeven.c b/tests/mpz/t-oddeven.c

new file mode 100644 (file)

index 0000000..09e2c93
--- /dev/null
+++ b/tests/mpz/t-oddeven.c
@@ -0,0 +1,88 @@
+/* Test mpz_odd_p and mpz_even_p.
+
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *n;
+    int          odd, even;
+  } data[] = {
+    {   "0", 0, 1 },
+    {   "1", 1, 0 },
+    {   "2", 0, 1 },
+    {   "3", 1, 0 },
+    {   "4", 0, 1 },
+
+    {  "-4", 0, 1 },
+    {  "-3", 1, 0 },
+    {  "-2", 0, 1 },
+    {  "-1", 1, 0 },
+
+    {  "0x1000000000000000000000000000000000000000000000000000", 0, 1 },
+    {  "0x1000000000000000000000000000000000000000000000000001", 1, 0 },
+    {  "0x1000000000000000000000000000000000000000000000000002", 0, 1 },
+    {  "0x1000000000000000000000000000000000000000000000000003", 1, 0 },
+
+    { "-0x1000000000000000000000000000000000000000000000000004", 0, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000003", 1, 0 },
+    { "-0x1000000000000000000000000000000000000000000000000002", 0, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000001", 1, 0 },
+  };
+
+  mpz_t  n;
+  int    i;
+
+  mpz_init (n);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+
+      if ((mpz_odd_p (n) != 0) != data[i].odd)
+       {
+         printf ("mpz_odd_p wrong on data[%d]\n", i);
+         abort();
+       }
+
+      if ((mpz_even_p (n) != 0) != data[i].even)
+       {
+         printf ("mpz_even_p wrong on data[%d]\n", i);
+         abort();
+       }
+    }
+
+  mpz_clear (n);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-perfpow.c b/tests/mpz/t-perfpow.c

new file mode 100644 (file)

index 0000000..f603089
--- /dev/null
+++ b/tests/mpz/t-perfpow.c
@@ -0,0 +1,243 @@
+/* Test mpz_perfect_power_p.
+
+   Contributed to the GNU project by Torbjorn Granlund and Martin Boij.
+
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+struct
+{
+  char *num_as_str;
+  char want;
+} tests[] =
+  {
+    { "0", 1},
+    { "1", 1},
+    {"-1", 1},
+    { "2", 0},
+    {"-2", 0},
+    { "3", 0},
+    {"-3", 0},
+    { "4", 1},
+    {"-4", 0},
+    { "64", 1},
+    {"-64", 1},
+    { "128", 1},
+    {"-128", 1},
+    { "256", 1},
+    {"-256", 0},
+    { "512", 1},
+    {"-512", 1},
+    { "0x4000000", 1},
+    {"-0x4000000", 1},
+    { "0x3cab640", 1},
+    {"-0x3cab640", 0},
+    { "0x3e23840", 1},
+    {"-0x3e23840", 0},
+    { "0x3d3a7ed1", 1},
+    {"-0x3d3a7ed1", 1},
+    { "0x30a7a6000", 1},
+    {"-0x30a7a6000", 1},
+    { "0xf33e5a5a59", 1},
+    {"-0xf33e5a5a59", 0},
+    { "0xed1b1182118135d", 1},
+    {"-0xed1b1182118135d", 1},
+    { "0xe71f6eb7689cc276b2f1", 1},
+    {"-0xe71f6eb7689cc276b2f1", 0},
+    { "0x12644507fe78cf563a4b342c92e7da9fe5e99cb75a01", 1},
+    {"-0x12644507fe78cf563a4b342c92e7da9fe5e99cb75a01", 0},
+    { "0x1ff2e7c581bb0951df644885bd33f50e472b0b73a204e13cbe98fdb424d66561e4000000", 1},
+    {"-0x1ff2e7c581bb0951df644885bd33f50e472b0b73a204e13cbe98fdb424d66561e4000000", 1},
+    { "0x2b9b44db2d91a6f8165c8c7339ef73633228ea29e388592e80354e4380004aad84000000", 1},
+    {"-0x2b9b44db2d91a6f8165c8c7339ef73633228ea29e388592e80354e4380004aad84000000", 1},
+    { "0x28d5a2b8f330910a9d3cda06036ae0546442e5b1a83b26a436efea5b727bf1bcbe7e12b47d81", 1},
+    {"-0x28d5a2b8f330910a9d3cda06036ae0546442e5b1a83b26a436efea5b727bf1bcbe7e12b47d81", 1},
+    {NULL, 0}
+  };
+
+
+void
+check_tests ()
+{
+  mpz_t x;
+  int i;
+  int got, want;
+
+  mpz_init (x);
+
+  for (i = 0; tests[i].num_as_str != NULL; i++)
+    {
+      mpz_set_str (x, tests[i].num_as_str, 0);
+      got = mpz_perfect_power_p (x);
+      want = tests[i].want;
+      if (got != want)
+       {
+         fprintf (stderr, "mpz_perfect_power_p returns %d when %d was expected\n", got, want);
+         fprintf (stderr, "fault operand: %s\n", tests[i].num_as_str);
+         abort ();
+       }
+    }
+
+  mpz_clear (x);
+}
+
+#define NRP 15
+
+void
+check_random (int reps)
+{
+  mpz_t n, np, temp, primes[NRP];
+  int i, j, k, unique, destroy, res;
+  unsigned long int nrprimes, primebits;
+  mp_limb_t g, exp[NRP], e;
+  gmp_randstate_ptr rands;
+
+  rands = RANDS;
+
+  mpz_init (n);
+  mpz_init (np);
+  mpz_init (temp);
+
+  for (i = 0; i < NRP; i++)
+    mpz_init (primes[i]);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (np, rands, 32);
+      nrprimes = mpz_get_ui (np) % NRP + 1; /* 1-NRP unique primes */
+
+      mpz_urandomb (np, rands, 32);
+      g = mpz_get_ui (np) % 32 + 2; /* gcd 2-33 */
+
+      for (j = 0; j < nrprimes;)
+       {
+         mpz_urandomb (np, rands, 32);
+         primebits = mpz_get_ui (np) % 100 + 3; /* 3-102 bit primes */
+         mpz_urandomb (primes[j], rands, primebits);
+         mpz_nextprime (primes[j], primes[j]);
+         unique = 1;
+         for (k = 0; k < j; k++)
+           {
+             if (mpz_cmp (primes[j], primes[k]) == 0)
+               {
+                 unique = 0;
+                 break;
+               }
+           }
+         if (unique)
+           {
+             mpz_urandomb (np, rands, 32);
+             e = 371 / (10 * primebits) + mpz_get_ui (np) % 11 + 1; /* Magic constants */
+             exp[j++] = g * e;
+           }
+       }
+
+      if (nrprimes > 1)
+       {
+         /* Destroy d exponents, d in [1, nrprimes - 1] */
+         if (nrprimes == 2)
+           {
+             destroy = 1;
+           }
+         else
+           {
+             mpz_urandomb (np, rands, 32);
+             destroy = mpz_get_ui (np) % (nrprimes - 2) + 1;
+           }
+
+         g = exp[destroy];
+         for (k = destroy + 1; k < nrprimes; k++)
+           g = mpn_gcd_1 (&g, 1, exp[k]);
+
+         for (j = 0; j < destroy; j++)
+           {
+             mpz_urandomb (np, rands, 32);
+             e = mpz_get_ui (np) % 50 + 1;
+             while (mpn_gcd_1 (&g, 1, e) > 1)
+               e++;
+
+             exp[j] = e;
+           }
+       }
+
+      /* Compute n */
+      mpz_pow_ui (n, primes[0], exp[0]);
+      for (j = 1; j < nrprimes; j++)
+       {
+         mpz_pow_ui (temp, primes[j], exp[j]);
+         mpz_mul (n, n, temp);
+       }
+
+      res = mpz_perfect_power_p (n);
+
+      if (nrprimes == 1)
+       {
+       if (res == 0 && exp[0] > 1)
+         {
+           printf("n is a perfect power, perfpow_p disagrees\n");
+           gmp_printf("n = %Zu\nprimes[0] = %Zu\nexp[0] = %lu\n", n, primes[0], exp[0]);
+           abort ();
+         }
+       else if (res == 1 && exp[0] == 1)
+         {
+           gmp_printf("n = %Zu\n", n);
+           printf("n is now a prime number, but perfpow_p still believes n is a perfect power\n");
+           abort ();
+         }
+       }
+      else
+       {
+         if (res == 1)
+           {
+             gmp_printf("n = %Zu\nn was destroyed, but perfpow_p still believes n is a perfect power\n", n);
+             abort ();
+           }
+       }
+    }
+
+  mpz_clear (n);
+  mpz_clear (np);
+  mpz_clear (temp);
+  for (i = 0; i < NRP; i++)
+    mpz_clear (primes[i]);
+}
+
+int
+main (int argc, char **argv)
+{
+  int n_tests;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_tests ();
+
+  n_tests = 1000;
+  if (argc == 2)
+    n_tests = atoi (argv[1]);
+  check_random (n_tests);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-perfsqr.c b/tests/mpz/t-perfsqr.c

new file mode 100644 (file)

index 0000000..f5fa15b
--- /dev/null
+++ b/tests/mpz/t-perfsqr.c
@@ -0,0 +1,155 @@
+/* Test mpz_perfect_square_p.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#include "mpn/perfsqr.h"
+
+
+/* check_modulo() exercises mpz_perfect_square_p on squares which cover each
+   possible quadratic residue to each divisor used within
+   mpn_perfect_square_p, ensuring those residues aren't incorrectly claimed
+   to be non-residues.
+
+   Each divisor is taken separately.  It's arranged that n is congruent to 0
+   modulo the other divisors, 0 of course being a quadratic residue to any
+   modulus.
+
+   The values "(j*others)^2" cover all quadratic residues mod divisor[i],
+   but in no particular order.  j is run from 1<=j<=divisor[i] so that zero
+   is excluded.  A literal n==0 doesn't reach the residue tests.  */
+
+void
+check_modulo (void)
+{
+  static const unsigned long  divisor[] = PERFSQR_DIVISORS;
+  unsigned long  i, j;
+
+  mpz_t  alldiv, others, n;
+
+  mpz_init (alldiv);
+  mpz_init (others);
+  mpz_init (n);
+
+  /* product of all divisors */
+  mpz_set_ui (alldiv, 1L);
+  for (i = 0; i < numberof (divisor); i++)
+    mpz_mul_ui (alldiv, alldiv, divisor[i]);
+
+  for (i = 0; i < numberof (divisor); i++)
+    {
+      /* product of all divisors except i */
+      mpz_set_ui (others, 1L);
+      for (j = 0; j < numberof (divisor); j++)
+        if (i != j)
+          mpz_mul_ui (others, others, divisor[j]);
+
+      for (j = 1; j <= divisor[i]; j++)
+        {
+          /* square */
+          mpz_mul_ui (n, others, j);
+          mpz_mul (n, n, n);
+          if (! mpz_perfect_square_p (n))
+            {
+              printf ("mpz_perfect_square_p got 0, want 1\n");
+              mpz_trace ("  n", n);
+              abort ();
+            }
+        }
+    }
+
+  mpz_clear (alldiv);
+  mpz_clear (others);
+  mpz_clear (n);
+}
+
+
+/* Exercise mpz_perfect_square_p compared to what mpz_sqrt says. */
+void
+check_sqrt (int reps)
+{
+  mpz_t x2, x2t, x;
+  mp_size_t x2n;
+  int res;
+  int i;
+  /* int cnt = 0; */
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t bs;
+
+  mpz_init (bs);
+
+  mpz_init (x2);
+  mpz_init (x);
+  mpz_init (x2t);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 9);
+      x2n = mpz_get_ui (bs);
+      mpz_rrandomb (x2, rands, x2n);
+      /* mpz_out_str (stdout, -16, x2); puts (""); */
+
+      res = mpz_perfect_square_p (x2);
+      mpz_sqrt (x, x2);
+      mpz_mul (x2t, x, x);
+
+      if (res != (mpz_cmp (x2, x2t) == 0))
+        {
+          printf    ("mpz_perfect_square_p and mpz_sqrt differ\n");
+          mpz_trace ("   x  ", x);
+          mpz_trace ("   x2 ", x2);
+          mpz_trace ("   x2t", x2t);
+          printf    ("   mpz_perfect_square_p %d\n", res);
+          printf    ("   mpz_sqrt             %d\n", mpz_cmp (x2, x2t) == 0);
+          abort ();
+        }
+
+      /* cnt += res != 0; */
+    }
+  /* printf ("%d/%d perfect squares\n", cnt, reps); */
+
+  mpz_clear (bs);
+  mpz_clear (x2);
+  mpz_clear (x);
+  mpz_clear (x2t);
+}
+
+
+int
+main (int argc, char **argv)
+{
+  int reps = 200000;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  check_modulo ();
+  check_sqrt (reps);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-popcount.c b/tests/mpz/t-popcount.c

new file mode 100644 (file)

index 0000000..313bc07
--- /dev/null
+++ b/tests/mpz/t-popcount.c
@@ -0,0 +1,169 @@
+/* Test mpz_popcount.
+
+Copyright 2001, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+
+void
+check_onebit (void)
+{
+  mpz_t          n;
+  unsigned long  i, got;
+
+  mpz_init (n);
+  for (i = 0; i < 5 * GMP_LIMB_BITS; i++)
+    {
+      mpz_setbit (n, i);
+      got = mpz_popcount (n);
+      if (got != 1)
+       {
+         printf ("mpz_popcount wrong on single bit at %lu\n", i);
+         printf ("   got %lu, want 1\n", got);
+         abort();
+       }
+      mpz_clrbit (n, i);
+    }
+  mpz_clear (n);
+}
+
+
+void
+check_data (void)
+{
+  static const struct {
+    const char     *n;
+    unsigned long  want;
+  } data[] = {
+    { "-1", ~ (unsigned long) 0 },
+    { "-12345678", ~ (unsigned long) 0 },
+    { "0", 0 },
+    { "1", 1 },
+    { "3", 2 },
+    { "5", 2 },
+    { "0xFFFF", 16 },
+    { "0xFFFFFFFF", 32 },
+    { "0xFFFFFFFFFFFFFFFF", 64 },
+    { "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 128 },
+  };
+
+  unsigned long   got;
+  int    i;
+  mpz_t  n;
+
+  mpz_init (n);
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (n, data[i].n, 0);
+      got = mpz_popcount (n);
+      if (got != data[i].want)
+       {
+         printf ("mpz_popcount wrong at data[%d]\n", i);
+         printf ("   n     \"%s\"\n", data[i].n);
+         printf ("         ");   mpz_out_str (stdout, 10, n); printf ("\n");
+         printf ("         0x"); mpz_out_str (stdout, 16, n); printf ("\n");
+         printf ("   got   %lu\n", got);
+         printf ("   want  %lu\n", data[i].want);
+         abort();
+       }
+    }
+  mpz_clear (n);
+}
+
+unsigned long
+refmpz_popcount (mpz_t arg)
+{
+  mp_size_t n, i;
+  unsigned long cnt;
+  mp_limb_t x;
+
+  n = SIZ(arg);
+  if (n < 0)
+    return ~(unsigned long) 0;
+
+  cnt = 0;
+  for (i = 0; i < n; i++)
+    {
+      x = PTR(arg)[i];
+      while (x != 0)
+       {
+         cnt += (x & 1);
+         x >>= 1;
+       }
+    }
+  return cnt;
+}
+
+void
+check_random (void)
+{
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  mpz_t arg;
+  unsigned long arg_size, size_range;
+  unsigned long got, ref;
+  int i;
+
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (arg);
+
+  for (i = 0; i < 10000; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 11 + 2; /* 0..4096 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      arg_size = mpz_get_ui (bs);
+      mpz_rrandomb (arg, rands, arg_size);
+
+      got = mpz_popcount (arg);
+      ref = refmpz_popcount (arg);
+      if (got != ref)
+       {
+         printf ("mpz_popcount wrong on random\n");
+         printf ("         ");   mpz_out_str (stdout, 10, arg); printf ("\n");
+         printf ("         0x"); mpz_out_str (stdout, 16, arg); printf ("\n");
+         printf ("   got   %lu\n", got);
+         printf ("   want  %lu\n", ref);
+         abort();
+         abort ();
+       }
+    }
+  mpz_clear (arg);
+  mpz_clear (bs);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_onebit ();
+  check_data ();
+  check_random ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c

new file mode 100644 (file)

index 0000000..d6373ea
--- /dev/null
+++ b/tests/mpz/t-pow.c
@@ -0,0 +1,218 @@
+/* Test mpz_pow_ui and mpz_ui_pow_ui.
+
+Copyright 1997, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr want, mpz_srcptr base, unsigned long exp)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+
+  MPZ_CHECK_FORMAT (want);
+
+  mpz_pow_ui (got, base, exp);
+  if (mpz_cmp (got, want))
+    {
+      printf ("mpz_pow_ui wrong\n");
+      mpz_trace ("  base", base);
+      printf    ("  exp = %lu (0x%lX)\n", exp, exp);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+
+  mpz_set (got, base);
+  mpz_pow_ui (got, got, exp);
+  if (mpz_cmp (got, want))
+    {
+      printf ("mpz_pow_ui wrong\n");
+      mpz_trace ("  base", base);
+      printf    ("  exp = %lu (0x%lX)\n", exp, exp);
+      mpz_trace ("  got ", got);
+      mpz_trace ("  want", want);
+      abort ();
+    }
+
+  if (mpz_fits_ulong_p (base))
+    {
+      unsigned long  base_u = mpz_get_ui (base);
+      mpz_ui_pow_ui (got, base_u, exp);
+      if (mpz_cmp (got, want))
+       {
+         printf    ("mpz_ui_pow_ui wrong\n");
+         printf    ("  base=%lu (0x%lX)\n", base_u, base_u);
+         printf    ("  exp = %lu (0x%lX)\n", exp, exp);
+         mpz_trace ("  got ", got);
+         mpz_trace ("  want", want);
+         abort ();
+       }
+    }
+
+  mpz_clear (got);
+}
+
+void
+check_base (mpz_srcptr base)
+{
+  unsigned long  exp;
+  mpz_t          want;
+
+  mpz_init (want);
+  mpz_set_ui (want, 1L);
+
+  for (exp = 0; exp < 20; exp++)
+    {
+      check_one (want, base, exp);
+      mpz_mul (want, want, base);
+    }
+
+  mpz_clear (want);
+}
+
+void
+check_various (void)
+{
+  static const struct {
+    const char *base;
+  } data[] = {
+    { "0" },
+    { "1" },
+    { "2" },
+    { "3" },
+    { "4" },
+    { "5" },
+    { "6" },
+    { "10" },
+    { "15" },
+    { "16" },
+
+    { "0x1F" },
+    { "0xFF" },
+    { "0x1001" },
+    { "0xFFFF" },
+    { "0x10000001" },
+    { "0x1000000000000001" },
+
+    /* actual size closest to estimate */
+    { "0xFFFFFFFF" },
+    { "0xFFFFFFFFFFFFFFFF" },
+
+    /* same after rshift */
+    { "0xFFFFFFFF0" },
+    { "0xFFFFFFFF00" },
+    { "0xFFFFFFFFFFFFFFFF0" },
+    { "0xFFFFFFFFFFFFFFFF00" },
+
+    /* change from 2 limbs to 1 after rshift */
+    { "0x180000000" },
+    { "0x18000000000000000" },
+
+    /* change from 3 limbs to 2 after rshift */
+    { "0x18000000100000000" },
+    { "0x180000000000000010000000000000000" },
+
+    /* handling of absolute value */
+    { "-0x80000000" },
+    { "-0x8000000000000000" },
+
+    /* low zero limb, and size>2, checking argument overlap detection */
+    { "0x3000000000000000300000000000000030000000000000000" },
+  };
+
+  mpz_t  base;
+  int    i;
+
+  mpz_init (base);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (base, data[i].base, 0);
+      check_base (base);
+    }
+
+  mpz_clear (base);
+}
+
+void
+check_random (int reps)
+{
+  mpz_t              base, want;
+  mp_size_t          base_size;
+  int                i;
+  unsigned long      size_range, exp;
+  gmp_randstate_ptr  rands = RANDS;
+
+  mpz_init (base);
+  mpz_init (want);
+
+  for (i = 0; i < reps; i++)
+    {
+      /* exponentially random 0 to 2^13 bits for base */
+      mpz_urandomb (want, rands, 32);
+      size_range = mpz_get_ui (want) % 12 + 2;
+      mpz_urandomb (want, rands, size_range);
+      base_size = mpz_get_ui (want);
+      mpz_rrandomb (base, rands, base_size);
+
+      /* randomly signed base */
+      mpz_urandomb (want, rands, 2);
+      if ((mpz_get_ui (want) & 1) != 0)
+       mpz_neg (base, base);
+
+      /* random 5 bits for exponent */
+      mpz_urandomb (want, rands, 5L);
+      exp = mpz_get_ui (want);
+
+      refmpz_pow_ui (want, base, exp);
+      check_one (want, base, exp);
+    }
+
+  mpz_clear (base);
+  mpz_clear (want);
+}
+
+int
+main (int argc, char **argv)
+{
+  int reps = 5000;
+
+  /* dummy call to drag in refmpn.o for testing mpz/n_pow_ui.c with
+     refmpn_mul_2 */
+  refmpn_zero_p (NULL, (mp_size_t) 0);
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  check_various ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-powm.c b/tests/mpz/t-powm.c

new file mode 100644 (file)

index 0000000..b14d098
--- /dev/null
+++ b/tests/mpz/t-powm.c
@@ -0,0 +1,184 @@
+/* Test mpz_powm, mpz_mul, mpz_mod, mpz_mod_ui, mpz_div_ui.
+
+Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2009, 2012 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+#define SIZEM 13
+
+/* Check that all sizes up to just above MUL_TOOM22_THRESHOLD have been tested
+   a few times.  FIXME: If SIZEM is set too low, this will never happen.  */
+int
+allsizes_seen (int *allsizes)
+{
+  mp_size_t i;
+
+  for (i = 1; i < MUL_TOOM22_THRESHOLD + 4; i++)
+    if (allsizes[i] < 4)
+      return 0;
+  return 1;
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t base, exp, mod;
+  mpz_t r1, r2, t1, exp2, base2;
+  mp_size_t base_size, exp_size, mod_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned int allsizes[1 << (SIZEM + 2 - 1)];
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (base);
+  mpz_init (exp);
+  mpz_init (mod);
+  mpz_init (r1);
+  mpz_init (r2);
+  mpz_init (t1);
+  mpz_init (exp2);
+  mpz_init (base2);
+
+  memset (allsizes, 0, (1 << (SIZEM + 2 - 1)) * sizeof (int));
+
+  for (i = 0; i < reps || ! allsizes_seen (allsizes); i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % SIZEM + 2;
+
+      do  /* Loop until mathematically well-defined.  */
+       {
+         mpz_urandomb (bs, rands, size_range);
+         base_size = mpz_get_ui (bs);
+         mpz_rrandomb (base, rands, base_size);
+
+         mpz_urandomb (bs, rands, 7L);
+         exp_size = mpz_get_ui (bs);
+         mpz_rrandomb (exp, rands, exp_size);
+       }
+      while (mpz_cmp_ui (base, 0) == 0 && mpz_cmp_ui (exp, 0) == 0);
+
+      do
+        {
+         mpz_urandomb (bs, rands, size_range);
+         mod_size = mpz_get_ui (bs);
+         mpz_rrandomb (mod, rands, mod_size);
+       }
+      while (mpz_cmp_ui (mod, 0) == 0);
+
+      allsizes[SIZ(mod)] += 1;
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (base, base);
+
+      /* printf ("%ld %ld %ld\n", SIZ (base), SIZ (exp), SIZ (mod)); */
+
+      mpz_set_ui (r2, 1);
+      mpz_mod (base2, base, mod);
+      mpz_set (exp2, exp);
+      mpz_mod (r2, r2, mod);
+
+      for (;;)
+       {
+         if (mpz_tstbit (exp2, 0))
+           {
+             mpz_mul (r2, r2, base2);
+             mpz_mod (r2, r2, mod);
+           }
+         if  (mpz_cmp_ui (exp2, 1) <= 0)
+           break;
+         mpz_mul (base2, base2, base2);
+         mpz_mod (base2, base2, mod);
+         mpz_tdiv_q_2exp (exp2, exp2, 1);
+       }
+
+      mpz_powm (r1, base, exp, mod);
+      MPZ_CHECK_FORMAT (r1);
+
+      if (mpz_cmp (r1, r2) != 0)
+       {
+         fprintf (stderr, "\nIncorrect results in test %d for operands:\n", i);
+         debug_mp (base, -16);
+         debug_mp (exp, -16);
+         debug_mp (mod, -16);
+         fprintf (stderr, "mpz_powm result:\n");
+         debug_mp (r1, -16);
+         fprintf (stderr, "reference result:\n");
+         debug_mp (r2, -16);
+         abort ();
+       }
+
+      if (mpz_tdiv_ui (mod, 2) == 0)
+       continue;
+
+      mpz_powm_sec (r1, base, exp, mod);
+      MPZ_CHECK_FORMAT (r1);
+
+      if (mpz_cmp (r1, r2) != 0)
+       {
+         fprintf (stderr, "\nIncorrect results in test %d for operands:\n", i);
+         debug_mp (base, -16);
+         debug_mp (exp, -16);
+         debug_mp (mod, -16);
+         fprintf (stderr, "mpz_powm_sec result:\n");
+         debug_mp (r1, -16);
+         fprintf (stderr, "reference result:\n");
+         debug_mp (r2, -16);
+         abort ();
+       }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (base);
+  mpz_clear (exp);
+  mpz_clear (mod);
+  mpz_clear (r1);
+  mpz_clear (r2);
+  mpz_clear (t1);
+  mpz_clear (exp2);
+  mpz_clear (base2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-powm_ui.c b/tests/mpz/t-powm_ui.c

new file mode 100644 (file)

index 0000000..224e604
--- /dev/null
+++ b/tests/mpz/t-powm_ui.c
@@ -0,0 +1,161 @@
+/* Test mpz_powm_ui, mpz_mul, mpz_mod.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t base, exp, mod;
+  mpz_t r1, r2, base2;
+  mp_size_t base_size, exp_size, mod_size;
+  unsigned long int exp2;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (base);
+  mpz_init (exp);
+  mpz_init (mod);
+  mpz_init (r1);
+  mpz_init (r2);
+  mpz_init (base2);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 2;
+
+      do  /* Loop until mathematically well-defined.  */
+       {
+         mpz_urandomb (bs, rands, size_range);
+         base_size = mpz_get_ui (bs);
+         mpz_rrandomb (base, rands, base_size);
+
+         mpz_urandomb (bs, rands, 6L);
+         exp_size = mpz_get_ui (bs);
+         mpz_rrandomb (exp, rands, exp_size);
+         exp2 = mpz_getlimbn (exp, (mp_size_t) 0);
+       }
+      while (mpz_cmp_ui (base, 0) == 0 && exp2 == 0);
+
+      do
+        {
+         mpz_urandomb (bs, rands, size_range);
+         mod_size = mpz_get_ui (bs);
+         mpz_rrandomb (mod, rands, mod_size);
+       }
+      while (mpz_cmp_ui (mod, 0) == 0);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (base, base);
+
+      /* printf ("%ld %ld\n", SIZ (base), SIZ (mod)); */
+
+#if 0
+      putc ('\n', stderr);
+      debug_mp (base, -16);
+      debug_mp (mod, -16);
+#endif
+
+      mpz_powm_ui (r1, base, exp2, mod);
+      MPZ_CHECK_FORMAT (r1);
+
+      mpz_set_ui (r2, 1);
+      mpz_set (base2, base);
+
+      mpz_mod (r2, r2, mod);   /* needed when exp==0 and mod==1 */
+      while (exp2 != 0)
+       {
+         if (exp2 % 2 != 0)
+           {
+             mpz_mul (r2, r2, base2);
+             mpz_mod (r2, r2, mod);
+           }
+         mpz_mul (base2, base2, base2);
+         mpz_mod (base2, base2, mod);
+         exp2 = exp2 / 2;
+       }
+
+#if 0
+      debug_mp (r1, -16);
+      debug_mp (r2, -16);
+#endif
+
+      if (mpz_cmp (r1, r2) != 0)
+       {
+         fprintf (stderr, "\ntest %d: Incorrect results for operands:\n", i);
+         debug_mp (base, -16);
+         debug_mp (exp, -16);
+         debug_mp (mod, -16);
+         fprintf (stderr, "mpz_powm_ui result:\n");
+         debug_mp (r1, -16);
+         fprintf (stderr, "reference result:\n");
+         debug_mp (r2, -16);
+         abort ();
+       }
+    }
+
+  mpz_clear (bs);
+  mpz_clear (base);
+  mpz_clear (exp);
+  mpz_clear (mod);
+  mpz_clear (r1);
+  mpz_clear (r2);
+  mpz_clear (base2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t dividend, mpz_t divisor)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = "); debug_mp (divisor, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-pprime_p.c b/tests/mpz/t-pprime_p.c

new file mode 100644 (file)

index 0000000..0907379
--- /dev/null
+++ b/tests/mpz/t-pprime_p.c
@@ -0,0 +1,113 @@
+/* Exercise mpz_probab_prime_p.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Enhancements:
+
+   - Test some big primes don't come back claimed to be composite.
+   - Test some big composites don't come back claimed to be certainly prime.
+   - Test some big composites with small factors are identified as certainly
+     composite.  */
+
+
+/* return 1 if prime, 0 if composite */
+int
+isprime (long n)
+{
+  long  i;
+
+  n = ABS(n);
+
+  if (n < 2)
+    return 0;
+  if (n == 2)
+    return 1;
+  if ((n & 1) == 0)
+    return 0;
+
+  for (i = 3; i < n; i++)
+    if ((n % i) == 0)
+      return 0;
+
+  return 1;
+}
+
+void
+check_one (mpz_srcptr n, int want)
+{
+  int  got;
+
+  got = mpz_probab_prime_p (n, 25);
+
+  /* "definitely prime" is fine if we only wanted "probably prime" */
+  if (got == 2 && want == 1)
+    want = 2;
+
+  if (got != want)
+    {
+      printf ("mpz_probab_prime_p\n");
+      mpz_trace ("  n    ", n);
+      printf    ("  got =%d", got);
+      printf    ("  want=%d", want);
+      abort ();
+    }
+}
+
+void
+check_pn (mpz_ptr n, int want)
+{
+  check_one (n, want);
+  mpz_neg (n, n);
+  check_one (n, want);
+}
+
+/* expect certainty for small n */
+void
+check_small (void)
+{
+  mpz_t  n;
+  long   i;
+
+  mpz_init (n);
+
+  for (i = 0; i < 300; i++)
+    {
+      mpz_set_si (n, i);
+      check_pn (n, isprime (i));
+    }
+
+  mpz_clear (n);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_small ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-root.c b/tests/mpz/t-root.c

new file mode 100644 (file)

index 0000000..c1a2b64
--- /dev/null
+++ b/tests/mpz/t-root.c
@@ -0,0 +1,167 @@
+/* Test mpz_root, mpz_rootrem, and mpz_perfect_power_p.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+void
+check_one (mpz_t root1, mpz_t x2, unsigned long nth, int i)
+{
+  mpz_t temp, temp2;
+  mpz_t root2, rem2;
+
+  mpz_init (root2);
+  mpz_init (rem2);
+  mpz_init (temp);
+  mpz_init (temp2);
+
+  MPZ_CHECK_FORMAT (root1);
+
+  mpz_rootrem (root2, rem2, x2, nth);
+  MPZ_CHECK_FORMAT (root2);
+  MPZ_CHECK_FORMAT (rem2);
+
+  mpz_pow_ui (temp, root1, nth);
+  MPZ_CHECK_FORMAT (temp);
+
+  mpz_add (temp2, temp, rem2);
+
+  /* Is power of result > argument?  */
+  if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0 || mpz_cmp (temp, x2) > 0)
+    {
+      fprintf (stderr, "ERROR after test %d\n", i);
+      debug_mp (x2, 10);
+      debug_mp (root1, 10);
+      debug_mp (root2, 10);
+      fprintf (stderr, "nth: %lu\n", nth);
+      abort ();
+    }
+
+  if (nth > 1 && mpz_cmp_ui (temp, 1L) > 0 && ! mpz_perfect_power_p (temp))
+    {
+      fprintf (stderr, "ERROR in mpz_perfect_power_p after test %d\n", i);
+      debug_mp (temp, 10);
+      debug_mp (root1, 10);
+      fprintf (stderr, "nth: %lu\n", nth);
+      abort ();
+    }
+
+  if (nth <= 10000)            /* skip too expensive test */
+    {
+      mpz_add_ui (temp2, root1, 1L);
+      mpz_pow_ui (temp2, temp2, nth);
+      MPZ_CHECK_FORMAT (temp2);
+
+      /* Is square of (result + 1) <= argument?  */
+      if (mpz_cmp (temp2, x2) <= 0)
+       {
+         fprintf (stderr, "ERROR after test %d\n", i);
+         debug_mp (x2, 10);
+         debug_mp (root1, 10);
+         fprintf (stderr, "nth: %lu\n", nth);
+         abort ();
+       }
+    }
+
+  mpz_clear (root2);
+  mpz_clear (rem2);
+  mpz_clear (temp);
+  mpz_clear (temp2);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpz_t x2;
+  mpz_t root1;
+  mp_size_t x2_size;
+  int i;
+  int reps = 500;
+  unsigned long nth;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (x2);
+  mpz_init (root1);
+
+  /* This triggers a gcc 4.3.2 bug */
+  mpz_set_str (x2, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000000000002", 16);
+  mpz_root (root1, x2, 2);
+  check_one (root1, x2, 2, -1);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      x2_size = mpz_get_ui (bs) + 10;
+      mpz_rrandomb (x2, rands, x2_size);
+
+      mpz_urandomb (bs, rands, 15);
+      nth = mpz_getlimbn (bs, 0) % mpz_sizeinbase (x2, 2) + 2;
+
+      mpz_root (root1, x2, nth);
+
+      mpz_urandomb (bs, rands, 4);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       {
+         /* With 50% probability, set x2 near a perfect power.  */
+         mpz_pow_ui (x2, root1, nth);
+         if ((bsi & 2) != 0)
+           {
+             mpz_sub_ui (x2, x2, bsi >> 2);
+             mpz_abs (x2, x2);
+           }
+         else
+           mpz_add_ui (x2, x2, bsi >> 2);
+         mpz_root (root1, x2, nth);
+       }
+
+      check_one (root1, x2, nth, i);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x2);
+  mpz_clear (root1);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-scan.c b/tests/mpz/t-scan.c

new file mode 100644 (file)

index 0000000..8ae97cb
--- /dev/null
+++ b/tests/mpz/t-scan.c
@@ -0,0 +1,132 @@
+/* Tests of mpz_scan0 and mpz_scan1.
+
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+unsigned long
+refmpz_scan (mpz_srcptr z, unsigned long i, int sought)
+{
+  unsigned long  z_bits = (unsigned long) ABSIZ(z) * GMP_NUMB_BITS;
+
+  do
+    {
+      if (mpz_tstbit (z, i) == sought)
+        return i;
+      i++;
+    }
+  while (i <= z_bits);
+
+  return ULONG_MAX;
+}
+
+unsigned long
+refmpz_scan0 (mpz_srcptr z, unsigned long starting_bit)
+{
+  return refmpz_scan (z, starting_bit, 0);
+}
+
+unsigned long
+refmpz_scan1 (mpz_srcptr z, unsigned long starting_bit)
+{
+  return refmpz_scan (z, starting_bit, 1);
+}
+
+
+void
+check_ref (void)
+{
+  static const int offset[] = {
+    -2, -1, 0, 1, 2, 3
+  };
+
+  mpz_t          z;
+  int            test, neg, sought, oindex, o;
+  mp_size_t      size, isize;
+  unsigned long  start, got, want;
+
+  mpz_init (z);
+  for (test = 0; test < 5; test++)
+    {
+      for (size = 0; size < 5; size++)
+        {
+          mpz_random2 (z, size);
+
+          for (neg = 0; neg <= 1; neg++)
+            {
+              if (neg)
+                mpz_neg (z, z);
+
+              for (isize = 0; isize <= size; isize++)
+                {
+                  for (oindex = 0; oindex < numberof (offset); oindex++)
+                    {
+                      o = offset[oindex];
+                      if ((int) isize*GMP_NUMB_BITS < -o)
+                        continue;  /* start would be negative */
+
+                      start = isize*GMP_NUMB_BITS + o;
+
+                      for (sought = 0; sought <= 1; sought++)
+                        {
+                          if (sought == 0)
+                            {
+                              got = mpz_scan0 (z, start);
+                              want = refmpz_scan0 (z, start);
+                            }
+                          else
+                            {
+                              got = mpz_scan1 (z, start);
+                              want = refmpz_scan1 (z, start);
+                            }
+
+                          if (got != want)
+                            {
+                              printf ("wrong at test=%d, size=%ld, neg=%d, start=%lu, sought=%d\n",
+                                      test, size, neg, start, sought);
+                              printf ("   z 0x");
+                              mpz_out_str (stdout, -16, z);
+                              printf ("\n");
+                              printf ("   got=%lu, want=%lu\n", got, want);
+                              exit (1);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+  mpz_clear (z);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  check_ref ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-set_d.c b/tests/mpz/t-set_d.c

new file mode 100644 (file)

index 0000000..c4d646d
--- /dev/null
+++ b/tests/mpz/t-set_d.c
@@ -0,0 +1,140 @@
+/* Test mpz_set_d and mpz_init_set_d.
+
+Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+  static const struct {
+    double     d;
+    mp_size_t  want_size;
+    mp_limb_t  want_data[2];
+  } data[] = {
+
+    {  0.0,  0 },
+    {  1.0,  1, { 1 } },
+    { -1.0, -1, { 1 } },
+
+    {  123.0,  1, { 123 } },
+    { -123.0, -1, { 123 } },
+
+    { 1e-1, 0, { 0 } },
+    { -1e-1, 0, { 0 } },
+    { 2.328306436538696e-10, 0, { 0 } },
+    { -2.328306436538696e-10, 0, { 0 } },
+    { 5.421010862427522e-20, 0, { 0 } },
+    { -5.421010862427522e-20, 0, { 0 } },
+    { 2.938735877055719e-39, 0, { 0 } },
+    { -2.938735877055719e-39, 0, { 0 } },
+  };
+
+  mpz_t  z;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_init (z);
+      mpz_set_d (z, data[i].d);
+      MPZ_CHECK_FORMAT (z);
+      if (z->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_set_d wrong on data[%d]\n", i);
+        bad:
+          d_trace   ("  d  ", data[i].d);
+          printf    ("  got  size %ld\n", (long) z->_mp_size);
+          printf    ("  want size %ld\n", (long) data[i].want_size);
+          mpn_trace ("  got  z", z->_mp_d, z->_mp_size);
+          mpn_trace ("  want z", data[i].want_data, data[i].want_size);
+          abort();
+        }
+      mpz_clear (z);
+
+      mpz_init_set_d (z, data[i].d);
+      MPZ_CHECK_FORMAT (z);
+      if (z->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_init_set_d wrong on data[%d]\n", i);
+          goto bad;
+        }
+      mpz_clear (z);
+    }
+}
+
+/* Try mpz_set_d on values 2^i+1, while such a value fits a double. */
+void
+check_2n_plus_1 (void)
+{
+  volatile double  p, d, diff;
+  mpz_t  want, got;
+  int    i;
+
+  mpz_init (want);
+  mpz_init (got);
+
+  p = 1.0;
+  mpz_set_ui (want, 2L);  /* gives 3 on first step */
+
+  for (i = 1; i < 500; i++)
+    {
+      mpz_mul_2exp (want, want, 1L);
+      mpz_sub_ui (want, want, 1L);   /* want = 2^i+1 */
+
+      p *= 2.0;  /* p = 2^i */
+      d = p + 1.0;
+      diff = d - p;
+      if (diff != 1.0)
+        break;   /* rounding occurred, stop now */
+
+      mpz_set_d (got, d);
+      MPZ_CHECK_FORMAT (got);
+      if (mpz_cmp (got, want) != 0)
+        {
+          printf ("mpz_set_d wrong on 2^%d+1\n", i);
+          d_trace   ("  d ", d);
+          mpz_trace ("  got  ", got);
+          mpz_trace ("  want ", want);
+          abort ();
+        }
+    }
+
+  mpz_clear (want);
+  mpz_clear (got);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+  check_2n_plus_1 ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-set_f.c b/tests/mpz/t-set_f.c

new file mode 100644 (file)

index 0000000..35cb61c
--- /dev/null
+++ b/tests/mpz/t-set_f.c
@@ -0,0 +1,126 @@
+/* Test mpz_set_f.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr z)
+{
+  static const int shift[] = {
+    0, 1, GMP_LIMB_BITS, 2*GMP_LIMB_BITS, 5*GMP_LIMB_BITS
+  };
+
+  int    sh, shneg, neg;
+  mpf_t  f;
+  mpz_t  got, want;
+
+  mpf_init2 (f, mpz_sizeinbase(z,2));
+  mpz_init (got);
+  mpz_init (want);
+
+  for (sh = 0; sh < numberof(shift); sh++)
+    {
+      for (shneg = 0; shneg <= 1; shneg++)
+       {
+         for (neg = 0; neg <= 1; neg++)
+           {
+             mpf_set_z (f, z);
+             mpz_set (want, z);
+
+             if (neg)
+               {
+                 mpf_neg (f, f);
+                 mpz_neg (want, want);
+               }
+
+             if (shneg)
+               {
+                 mpz_tdiv_q_2exp (want, want, shift[sh]);
+                 mpf_div_2exp (f, f, shift[sh]);
+               }
+             else
+               {
+                 mpz_mul_2exp (want, want, shift[sh]);
+                 mpf_mul_2exp (f, f, shift[sh]);
+               }
+
+             mpz_set_f (got, f);
+             MPZ_CHECK_FORMAT (got);
+
+             if (mpz_cmp (got, want) != 0)
+               {
+                 printf ("wrong result\n");
+                 printf ("  shift  %d\n", shneg ? -shift[sh] : shift[sh]);
+                 printf ("  neg    %d\n", neg);
+                 mpf_trace ("     f", f);
+                 mpz_trace ("   got", got);
+                 mpz_trace ("  want", want);
+                 abort ();
+               }
+           }
+       }
+    }
+
+  mpf_clear (f);
+  mpz_clear (got);
+  mpz_clear (want);
+}
+
+
+void
+check_various (void)
+{
+  mpz_t  z;
+
+  mpz_init (z);
+
+  mpz_set_ui (z, 0L);
+  check_one (z);
+
+  mpz_set_si (z, 123L);
+  check_one (z);
+
+  mpz_rrandomb (z, RANDS, 2*GMP_LIMB_BITS);
+  check_one (z);
+
+  mpz_rrandomb (z, RANDS, 5*GMP_LIMB_BITS);
+  check_one (z);
+
+  mpz_clear (z);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+#if GMP_NAIL_BITS == 0
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_various ();
+
+  tests_end ();
+#endif
+  exit (0);
+}
diff --git a/tests/mpz/t-set_si.c b/tests/mpz/t-set_si.c

new file mode 100644 (file)

index 0000000..7af40e1
--- /dev/null
+++ b/tests/mpz/t-set_si.c
@@ -0,0 +1,97 @@
+/* Test mpz_set_si and mpz_init_set_si.
+
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+#if GMP_NUMB_BITS <= BITS_PER_ULONG
+#define ENTRY(n)   { n, { n, 0 } }
+#else
+#define ENTRY(n)   { n, { (n) & GMP_NUMB_MASK, (n) >> GMP_NUMB_BITS } }
+#endif
+
+  static const struct {
+    long       n;
+    mp_size_t  want_size;
+    mp_limb_t  want_data[2];
+  } data[] = {
+
+    {  0L,  0 },
+    {  1L,  1, { 1 } },
+    { -1L, -1, { 1 } },
+
+#if GMP_NUMB_BITS >= BITS_PER_ULONG
+    { LONG_MAX,  1, { LONG_MAX, 0 } },
+    { -LONG_MAX,  -1, { LONG_MAX, 0 } },
+    { LONG_HIGHBIT,  -1, { ULONG_HIGHBIT, 0 } },
+#else
+    { LONG_MAX,  2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS } },
+    { -LONG_MAX,  -2, { LONG_MAX & GMP_NUMB_MASK, LONG_MAX >> GMP_NUMB_BITS }},
+    { LONG_HIGHBIT,  -2, { 0, ULONG_HIGHBIT >> GMP_NUMB_BITS } },
+#endif
+  };
+
+  mpz_t  n;
+  int    i;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_init (n);
+      mpz_set_si (n, data[i].n);
+      MPZ_CHECK_FORMAT (n);
+      if (n->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpz_clear (n);
+
+      mpz_init_set_si (n, data[i].n);
+      MPZ_CHECK_FORMAT (n);
+      if (n->_mp_size != data[i].want_size
+          || refmpn_cmp_allowzero (n->_mp_d, data[i].want_data,
+                                   ABS (data[i].want_size)) != 0)
+        {
+          printf ("mpz_init_set_si wrong on data[%d]\n", i);
+          abort();
+        }
+      mpz_clear (n);
+    }
+}
+
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_data ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-set_str.c b/tests/mpz/t-set_str.c

new file mode 100644 (file)

index 0000000..cb589ca
--- /dev/null
+++ b/tests/mpz/t-set_str.c
@@ -0,0 +1,95 @@
+/* Test mpz_set_str.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+check_one (mpz_srcptr want, int base, const char *str)
+{
+  mpz_t   got;
+
+  MPZ_CHECK_FORMAT (want);
+  mp_trace_base = (base == 0 ? 16 : base);
+
+  mpz_init (got);
+
+  if (mpz_set_str (got, str, base) != 0)
+    {
+      printf ("mpz_set_str unexpectedly failed\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      abort ();
+    }
+  MPZ_CHECK_FORMAT (got);
+
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_set_str wrong\n");
+      printf ("  base %d\n", base);
+      printf ("  str  \"%s\"\n", str);
+      mpz_trace ("got ", got);
+      mpz_trace ("want", want);
+      abort ();
+    }
+
+  mpz_clear (got);
+}
+
+void
+check_samples (void)
+{
+  mpz_t  z;
+
+  mpz_init (z);
+
+  mpz_set_ui (z, 0L);
+  check_one (z, 0, "0 ");
+  check_one (z, 0, "0    ");
+  check_one (z, 10, "0 ");
+  check_one (z, 10, "0    ");
+  check_one (z, 10, "0000000    ");
+
+  mpz_set_ui (z, 123L);
+  check_one (z, 0, "123 ");
+  check_one (z, 0, "123    ");
+  check_one (z, 10, "123 ");
+  check_one (z, 10, "123    ");
+  check_one (z, 0, " 123 ");
+  check_one (z, 0, "  123    ");
+  check_one (z, 10, "  0000123 ");
+  check_one (z, 10, "  123    ");
+
+  mpz_clear (z);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_samples ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-sizeinbase.c b/tests/mpz/t-sizeinbase.c

new file mode 100644 (file)

index 0000000..c9d0f6c
--- /dev/null
+++ b/tests/mpz/t-sizeinbase.c
@@ -0,0 +1,90 @@
+/* Test mpz_sizeinbase.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#if 0
+  /* Disabled due to the bogosity of trying to fake an _mp_d pointer to
+     below an object.  Has been seen to fail on a hppa system and on ia64.  */
+
+
+/* Create a fake mpz consisting of just a single 1 bit, with totbits being
+   the total number of bits, inclusive of that 1 bit.  */
+void
+mpz_fake_bits (mpz_ptr z, unsigned long totbits)
+{
+  static mp_limb_t  n;
+  unsigned long     zero_bits, zero_limbs;
+
+  zero_bits = totbits - 1;
+  zero_limbs = zero_bits / GMP_NUMB_BITS;
+  zero_bits %= GMP_NUMB_BITS;
+
+  SIZ(z) = zero_limbs + 1;
+  PTR(z) = (&n) - (SIZ(z) - 1);
+  n = CNST_LIMB(1) << zero_bits;
+
+  ASSERT_ALWAYS (mpz_sizeinbase (z, 2) == totbits);
+}
+
+
+/* This was seen to fail on a GNU/Linux powerpc32 with gcc 2.95.2,
+   apparently due to a doubtful value of mp_bases[10].chars_per_bit_exactly
+   (0X1.34413509F79FDP-2 whereas 0X1.34413509F79FFP-2 is believed correct).
+   Presumably this is a glibc problem when gcc converts the decimal string
+   in mp_bases.c, or maybe it's only a function of the rounding mode during
+   compilation.  */
+void
+check_sample (void)
+{
+  unsigned long  totbits = 198096465;
+  int        base = 10;
+  size_t     want = 59632979;
+  size_t     got;
+  mpz_t      z;
+
+  mpz_fake_bits (z, totbits);
+  got = mpz_sizeinbase (z, base);
+  if (got != want)
+    {
+      printf ("mpz_sizeinbase\n");
+      printf ("  base    %d\n",  base);
+      printf ("  totbits %lu\n", totbits);
+      printf ("  got     %u\n",  got);
+      printf ("  want    %u\n",  want);
+      abort ();
+    }
+}
+#endif
+
+int
+main (void)
+{
+  tests_start ();
+
+  /* check_sample (); */
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpz/t-sqrtrem.c b/tests/mpz/t-sqrtrem.c

new file mode 100644 (file)

index 0000000..ce97532
--- /dev/null
+++ b/tests/mpz/t-sqrtrem.c
@@ -0,0 +1,117 @@
+/* Test mpz_add, mpz_add_ui, mpz_cmp, mpz_cmp, mpz_mul, mpz_sqrtrem.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((mpz_t, mpz_t, mpz_t));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t x2;
+  mpz_t x, rem;
+  mpz_t temp, temp2;
+  mp_size_t x2_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (x2);
+  mpz_init (x);
+  mpz_init (rem);
+  mpz_init (temp);
+  mpz_init (temp2);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 17 + 2; /* 0..262144 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      x2_size = mpz_get_ui (bs);
+      mpz_rrandomb (x2, rands, x2_size);
+
+      /* printf ("%ld\n", SIZ (x2)); */
+
+      mpz_sqrtrem (x, rem, x2);
+      MPZ_CHECK_FORMAT (x);
+      MPZ_CHECK_FORMAT (rem);
+
+      mpz_mul (temp, x, x);
+
+      /* Is square of result > argument?  */
+      if (mpz_cmp (temp, x2) > 0)
+       dump_abort (x2, x, rem);
+
+      mpz_add_ui (temp2, x, 1);
+      mpz_mul (temp2, temp2, temp2);
+
+      /* Is square of (result + 1) <= argument?  */
+      if (mpz_cmp (temp2, x2) <= 0)
+       dump_abort (x2, x, rem);
+
+      mpz_add (temp2, temp, rem);
+
+      /* Is the remainder wrong?  */
+      if (mpz_cmp (x2, temp2) != 0)
+       dump_abort (x2, x, rem);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (x2);
+  mpz_clear (x);
+  mpz_clear (rem);
+  mpz_clear (temp);
+  mpz_clear (temp2);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t x2, mpz_t x, mpz_t rem)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "x2        = "); debug_mp (x2, -16);
+  fprintf (stderr, "x         = "); debug_mp (x, -16);
+  fprintf (stderr, "remainder = "); debug_mp (rem, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-tdiv.c b/tests/mpz/t-tdiv.c

new file mode 100644 (file)

index 0000000..c94eff8
--- /dev/null
+++ b/tests/mpz/t-tdiv.c
@@ -0,0 +1,146 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_tdiv_qr, mpz_tdiv_q,
+   mpz_tdiv_r, mpz_mul.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend, divisor;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size, divisor_size;
+  int i;
+  int reps = 1000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  tests_start ();
+  TESTS_REPS (reps, argv, argc);
+
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  mpz_init (dividend);
+  mpz_init (divisor);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 18 + 2; /* 0..524288 bit operands */
+
+      do
+       {
+         mpz_urandomb (bs, rands, size_range);
+         divisor_size = mpz_get_ui (bs);
+         mpz_rrandomb (divisor, rands, divisor_size);
+       }
+      while (mpz_sgn (divisor) == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs) + divisor_size;
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (dividend, dividend);
+      if ((bsi & 2) != 0)
+       mpz_neg (divisor, divisor);
+
+      /* printf ("%ld %ld\n", SIZ (dividend), SIZ (divisor)); */
+
+      mpz_tdiv_qr (quotient, remainder, dividend, divisor);
+      mpz_tdiv_q (quotient2, dividend, divisor);
+      mpz_tdiv_r (remainder2, dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+        with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+       dump_abort (dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+       dump_abort (dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+       if ((mpz_cmp_ui (quotient, 0) < 0)
+           != ((mpz_cmp_ui (dividend, 0) ^ mpz_cmp_ui (divisor, 0)) < 0))
+       dump_abort (dividend, divisor);
+
+      /* Check if the remainder has the same sign as the dividend
+        (quotient rounded towards 0).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+       if ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (dividend, 0) < 0))
+         dump_abort (dividend, divisor);
+
+      mpz_mul (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+       dump_abort (dividend, divisor);
+
+      mpz_abs (temp, divisor);
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp (remainder, temp) >= 0)
+       dump_abort (dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (divisor);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (mpz_t dividend, mpz_t divisor)
+{
+  fprintf (stderr, "ERROR\n");
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = "); debug_mp (divisor, -16);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/mpz/t-tdiv_ui.c b/tests/mpz/t-tdiv_ui.c

new file mode 100644 (file)

index 0000000..8ceaac1
--- /dev/null
+++ b/tests/mpz/t-tdiv_ui.c
@@ -0,0 +1,159 @@
+/* Test mpz_abs, mpz_add, mpz_cmp, mpz_cmp_ui, mpz_tdiv_qr_ui, mpz_tdiv_q_ui,
+   mpz_tdiv_r_ui, mpz_tdiv_ui, mpz_mul_ui.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
+void debug_mp __GMP_PROTO ((mpz_t, int));
+
+int
+main (int argc, char **argv)
+{
+  mpz_t dividend;
+  mpz_t quotient, remainder;
+  mpz_t quotient2, remainder2;
+  mpz_t temp;
+  mp_size_t dividend_size;
+  unsigned long divisor;
+  int i;
+  int reps = 200000;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  unsigned long r_rq, r_q, r_r, r;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+     reps = atoi (argv[1]);
+
+  mpz_init (dividend);
+  mpz_init (quotient);
+  mpz_init (remainder);
+  mpz_init (quotient2);
+  mpz_init (remainder2);
+  mpz_init (temp);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2; /* 0..2047 bit operands */
+
+      do
+       {
+         mpz_rrandomb (bs, rands, 64);
+         divisor = mpz_get_ui (bs);
+       }
+      while (divisor == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs);
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 2);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (dividend, dividend);
+
+      /* printf ("%ld\n", SIZ (dividend)); */
+
+      r_rq = mpz_tdiv_qr_ui (quotient, remainder, dividend, divisor);
+      r_q = mpz_tdiv_q_ui (quotient2, dividend, divisor);
+      r_r = mpz_tdiv_r_ui (remainder2, dividend, divisor);
+      r = mpz_tdiv_ui (dividend, divisor);
+
+      /* First determine that the quotients and remainders computed
+        with different functions are equal.  */
+      if (mpz_cmp (quotient, quotient2) != 0)
+       dump_abort ("quotients from mpz_tdiv_qr_ui and mpz_tdiv_q_ui differ",
+                   dividend, divisor);
+      if (mpz_cmp (remainder, remainder2) != 0)
+       dump_abort ("remainders from mpz_tdiv_qr_ui and mpz_tdiv_r_ui differ",
+                   dividend, divisor);
+
+      /* Check if the sign of the quotient is correct.  */
+      if (mpz_cmp_ui (quotient, 0) != 0)
+       if ((mpz_cmp_ui (quotient, 0) < 0)
+           != (mpz_cmp_ui (dividend, 0) < 0))
+       dump_abort ("quotient sign wrong", dividend, divisor);
+
+      /* Check if the remainder has the same sign as the dividend
+        (quotient rounded towards 0).  */
+      if (mpz_cmp_ui (remainder, 0) != 0)
+       if ((mpz_cmp_ui (remainder, 0) < 0) != (mpz_cmp_ui (dividend, 0) < 0))
+         dump_abort ("remainder sign wrong", dividend, divisor);
+
+      mpz_mul_ui (temp, quotient, divisor);
+      mpz_add (temp, temp, remainder);
+      if (mpz_cmp (temp, dividend) != 0)
+       dump_abort ("n mod d != n - [n/d]*d", dividend, divisor);
+
+      mpz_abs (remainder, remainder);
+      if (mpz_cmp_ui (remainder, divisor) >= 0)
+       dump_abort ("remainder greater than divisor", dividend, divisor);
+
+      if (mpz_cmp_ui (remainder, r_rq) != 0)
+       dump_abort ("remainder returned from mpz_tdiv_qr_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_q) != 0)
+       dump_abort ("remainder returned from mpz_tdiv_q_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r_r) != 0)
+       dump_abort ("remainder returned from mpz_tdiv_r_ui is wrong",
+                   dividend, divisor);
+      if (mpz_cmp_ui (remainder, r) != 0)
+       dump_abort ("remainder returned from mpz_tdiv_ui is wrong",
+                   dividend, divisor);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (dividend);
+  mpz_clear (quotient);
+  mpz_clear (remainder);
+  mpz_clear (quotient2);
+  mpz_clear (remainder2);
+  mpz_clear (temp);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+{
+  fprintf (stderr, "ERROR: %s\n", str);
+  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
+  fprintf (stderr, "divisor  = %lX\n", divisor);
+  abort();
+}
+
+void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
diff --git a/tests/rand/Makefile.am b/tests/rand/Makefile.am

new file mode 100644 (file)

index 0000000..dd995db
--- /dev/null
+++ b/tests/rand/Makefile.am
@@ -0,0 +1,89 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+check_PROGRAMS = t-iset t-lc2exp t-mt t-rand t-urbui t-urmui t-urndmm
+TESTS = $(check_PROGRAMS)
+
+EXTRA_PROGRAMS = findlc gen gen.static spect stat
+gen_static_SOURCES = gen.c
+gen_static_LDFLAGS = -static
+findlc_LDADD = libstat.la
+spect_LDADD = libstat.la
+stat_LDADD = libstat.la
+
+EXTRA_LTLIBRARIES = libstat.la
+libstat_la_SOURCES = gmpstat.h statlib.c zdiv_round.c
+libstat_la_LIBADD = $(top_builddir)/libgmp.la $(LIBM)
+
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES)
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+manual-test: gen$(EXEEXT) stat$(EXEEXT)
+       @(echo -n '16i: '; ./gen -f mpz_urandomb -z 16 1000 \
+               | ./stat -i 0xffff | grep '^[0-9]')
+       @(echo -n '32i: '; ./gen -f mpz_urandomb -z 32 1000 \
+               | ./stat -i 0xffffffff  | grep '^[0-9]')
+       @(echo -n '33i: '; ./gen -f mpz_urandomb -z 33 1000 \
+               | ./stat -i 0x1ffffffff  | grep '^[0-9]')
+       @(echo -n '64i: '; ./gen -f mpz_urandomb -z 64 1000 \
+               | ./stat -i 0xffffffffffffffff  | grep '^[0-9]')
+       @(echo -n '128i: '; ./gen -f mpz_urandomb -z 128 1000 \
+               | ./stat -i 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | grep '^[0-9]')
+
+       @(echo -n '16f: '; ./gen -f mpf_urandomb -z 16 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '32f: '; ./gen -f mpf_urandomb -z 32 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '33f: '; ./gen -f mpf_urandomb -z 33 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '64f: '; ./gen -f mpf_urandomb -z 64 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '128f: '; ./gen -f mpf_urandomb -z 128 1000 \
+               | ./stat | grep '^[0-9]')
+
+manual-bigtest: gen$(EXEEXT) stat$(EXEEXT)
+       @(echo '16i: '; ./gen -f mpz_urandomb -z 16 50000 \
+               | ./stat -2 1000 -i 0xffff | grep '^K[mp]')
+       @(echo '32i: '; ./gen -f mpz_urandomb -z 32 50000 \
+               | ./stat -2 1000 -i 0xffffffff | grep '^K[mp]')
+       @(echo '33i: '; ./gen -f mpz_urandomb -z 33 50000 \
+               | ./stat -2 1000 -i 0x1ffffffff | grep '^K[mp]')
+       @(echo '64i: '; ./gen -f mpz_urandomb -z 64 50000 \
+               | ./stat -2 1000 -i 0xffffffffffffffff  | grep '^K[mp]')
+       @(echo '128i: '; ./gen -f mpz_urandomb -z 128 50000 \
+               | ./stat -2 1000 -i 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | grep '^K[mp]')
+
+       @(echo '16f: '; ./gen -f mpf_urandomb -z 16 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '32f: '; ./gen -f mpf_urandomb -z 32 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '33f: '; ./gen -f mpf_urandomb -z 33 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '64f: '; ./gen -f mpf_urandomb -z 64 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '128f: '; ./gen -f mpf_urandomb -z 128 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
diff --git a/tests/rand/Makefile.in b/tests/rand/Makefile.in

new file mode 100644 (file)

index 0000000..5fb3235
--- /dev/null
+++ b/tests/rand/Makefile.in
@@ -0,0 +1,813 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+check_PROGRAMS = t-iset$(EXEEXT) t-lc2exp$(EXEEXT) t-mt$(EXEEXT) \
+       t-rand$(EXEEXT) t-urbui$(EXEEXT) t-urmui$(EXEEXT) \
+       t-urndmm$(EXEEXT)
+EXTRA_PROGRAMS = findlc$(EXEEXT) gen$(EXEEXT) gen.static$(EXEEXT) \
+       spect$(EXEEXT) stat$(EXEEXT)
+subdir = tests/rand
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__DEPENDENCIES_1 =
+libstat_la_DEPENDENCIES = $(top_builddir)/libgmp.la \
+       $(am__DEPENDENCIES_1)
+am_libstat_la_OBJECTS = statlib$U.lo zdiv_round$U.lo
+libstat_la_OBJECTS = $(am_libstat_la_OBJECTS)
+findlc_SOURCES = findlc.c
+findlc_OBJECTS = findlc$U.$(OBJEXT)
+findlc_DEPENDENCIES = libstat.la
+gen_SOURCES = gen.c
+gen_OBJECTS = gen$U.$(OBJEXT)
+gen_LDADD = $(LDADD)
+gen_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+am_gen_static_OBJECTS = gen$U.$(OBJEXT)
+gen_static_OBJECTS = $(am_gen_static_OBJECTS)
+gen_static_LDADD = $(LDADD)
+gen_static_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+gen_static_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+       $(gen_static_LDFLAGS) $(LDFLAGS) -o $@
+spect_SOURCES = spect.c
+spect_OBJECTS = spect$U.$(OBJEXT)
+spect_DEPENDENCIES = libstat.la
+stat_SOURCES = stat.c
+stat_OBJECTS = stat$U.$(OBJEXT)
+stat_DEPENDENCIES = libstat.la
+t_iset_SOURCES = t-iset.c
+t_iset_OBJECTS = t-iset$U.$(OBJEXT)
+t_iset_LDADD = $(LDADD)
+t_iset_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_lc2exp_SOURCES = t-lc2exp.c
+t_lc2exp_OBJECTS = t-lc2exp$U.$(OBJEXT)
+t_lc2exp_LDADD = $(LDADD)
+t_lc2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_mt_SOURCES = t-mt.c
+t_mt_OBJECTS = t-mt$U.$(OBJEXT)
+t_mt_LDADD = $(LDADD)
+t_mt_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_rand_SOURCES = t-rand.c
+t_rand_OBJECTS = t-rand$U.$(OBJEXT)
+t_rand_LDADD = $(LDADD)
+t_rand_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_urbui_SOURCES = t-urbui.c
+t_urbui_OBJECTS = t-urbui$U.$(OBJEXT)
+t_urbui_LDADD = $(LDADD)
+t_urbui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_urmui_SOURCES = t-urmui.c
+t_urmui_OBJECTS = t-urmui$U.$(OBJEXT)
+t_urmui_LDADD = $(LDADD)
+t_urmui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_urndmm_SOURCES = t-urndmm.c
+t_urndmm_OBJECTS = t-urndmm$U.$(OBJEXT)
+t_urndmm_LDADD = $(LDADD)
+t_urndmm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libstat_la_SOURCES) findlc.c gen.c $(gen_static_SOURCES) \
+       spect.c stat.c t-iset.c t-lc2exp.c t-mt.c t-rand.c t-urbui.c \
+       t-urmui.c t-urndmm.c
+DIST_SOURCES = $(libstat_la_SOURCES) findlc.c gen.c \
+       $(gen_static_SOURCES) spect.c stat.c t-iset.c t-lc2exp.c \
+       t-mt.c t-rand.c t-urbui.c t-urmui.c t-urndmm.c
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+TESTS = $(check_PROGRAMS)
+gen_static_SOURCES = gen.c
+gen_static_LDFLAGS = -static
+findlc_LDADD = libstat.la
+spect_LDADD = libstat.la
+stat_LDADD = libstat.la
+EXTRA_LTLIBRARIES = libstat.la
+libstat_la_SOURCES = gmpstat.h statlib.c zdiv_round.c
+libstat_la_LIBADD = $(top_builddir)/libgmp.la $(LIBM)
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/rand/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tests/rand/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+libstat.la: $(libstat_la_OBJECTS) $(libstat_la_DEPENDENCIES) 
+       $(LINK)  $(libstat_la_OBJECTS) $(libstat_la_LIBADD) $(LIBS)
+
+clean-checkPROGRAMS:
+       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+       echo " rm -f" $$list; \
+       rm -f $$list || exit $$?; \
+       test -n "$(EXEEXT)" || exit 0; \
+       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+       echo " rm -f" $$list; \
+       rm -f $$list
+findlc$(EXEEXT): $(findlc_OBJECTS) $(findlc_DEPENDENCIES) 
+       @rm -f findlc$(EXEEXT)
+       $(LINK) $(findlc_OBJECTS) $(findlc_LDADD) $(LIBS)
+gen$(EXEEXT): $(gen_OBJECTS) $(gen_DEPENDENCIES) 
+       @rm -f gen$(EXEEXT)
+       $(LINK) $(gen_OBJECTS) $(gen_LDADD) $(LIBS)
+gen.static$(EXEEXT): $(gen_static_OBJECTS) $(gen_static_DEPENDENCIES) 
+       @rm -f gen.static$(EXEEXT)
+       $(gen_static_LINK) $(gen_static_OBJECTS) $(gen_static_LDADD) $(LIBS)
+spect$(EXEEXT): $(spect_OBJECTS) $(spect_DEPENDENCIES) 
+       @rm -f spect$(EXEEXT)
+       $(LINK) $(spect_OBJECTS) $(spect_LDADD) $(LIBS)
+stat$(EXEEXT): $(stat_OBJECTS) $(stat_DEPENDENCIES) 
+       @rm -f stat$(EXEEXT)
+       $(LINK) $(stat_OBJECTS) $(stat_LDADD) $(LIBS)
+t-iset$(EXEEXT): $(t_iset_OBJECTS) $(t_iset_DEPENDENCIES) 
+       @rm -f t-iset$(EXEEXT)
+       $(LINK) $(t_iset_OBJECTS) $(t_iset_LDADD) $(LIBS)
+t-lc2exp$(EXEEXT): $(t_lc2exp_OBJECTS) $(t_lc2exp_DEPENDENCIES) 
+       @rm -f t-lc2exp$(EXEEXT)
+       $(LINK) $(t_lc2exp_OBJECTS) $(t_lc2exp_LDADD) $(LIBS)
+t-mt$(EXEEXT): $(t_mt_OBJECTS) $(t_mt_DEPENDENCIES) 
+       @rm -f t-mt$(EXEEXT)
+       $(LINK) $(t_mt_OBJECTS) $(t_mt_LDADD) $(LIBS)
+t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) 
+       @rm -f t-rand$(EXEEXT)
+       $(LINK) $(t_rand_OBJECTS) $(t_rand_LDADD) $(LIBS)
+t-urbui$(EXEEXT): $(t_urbui_OBJECTS) $(t_urbui_DEPENDENCIES) 
+       @rm -f t-urbui$(EXEEXT)
+       $(LINK) $(t_urbui_OBJECTS) $(t_urbui_LDADD) $(LIBS)
+t-urmui$(EXEEXT): $(t_urmui_OBJECTS) $(t_urmui_DEPENDENCIES) 
+       @rm -f t-urmui$(EXEEXT)
+       $(LINK) $(t_urmui_OBJECTS) $(t_urmui_LDADD) $(LIBS)
+t-urndmm$(EXEEXT): $(t_urndmm_OBJECTS) $(t_urndmm_DEPENDENCIES) 
+       @rm -f t-urndmm$(EXEEXT)
+       $(LINK) $(t_urndmm_OBJECTS) $(t_urndmm_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+findlc_.c: findlc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/findlc.c; then echo $(srcdir)/findlc.c; else echo findlc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gen_.c: gen.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gen.c; then echo $(srcdir)/gen.c; else echo gen.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+spect_.c: spect.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/spect.c; then echo $(srcdir)/spect.c; else echo spect.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+stat_.c: stat.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/stat.c; then echo $(srcdir)/stat.c; else echo stat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+statlib_.c: statlib.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/statlib.c; then echo $(srcdir)/statlib.c; else echo statlib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-iset_.c: t-iset.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-iset.c; then echo $(srcdir)/t-iset.c; else echo t-iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-lc2exp_.c: t-lc2exp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lc2exp.c; then echo $(srcdir)/t-lc2exp.c; else echo t-lc2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-mt_.c: t-mt.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mt.c; then echo $(srcdir)/t-mt.c; else echo t-mt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-rand_.c: t-rand.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-rand.c; then echo $(srcdir)/t-rand.c; else echo t-rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-urbui_.c: t-urbui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urbui.c; then echo $(srcdir)/t-urbui.c; else echo t-urbui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-urmui_.c: t-urmui.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urmui.c; then echo $(srcdir)/t-urmui.c; else echo t-urmui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+t-urndmm_.c: t-urndmm.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urndmm.c; then echo $(srcdir)/t-urndmm.c; else echo t-urndmm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+zdiv_round_.c: zdiv_round.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/zdiv_round.c; then echo $(srcdir)/zdiv_round.c; else echo zdiv_round.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+findlc_.$(OBJEXT) findlc_.lo gen_.$(OBJEXT) gen_.lo spect_.$(OBJEXT) \
+spect_.lo stat_.$(OBJEXT) stat_.lo statlib_.$(OBJEXT) statlib_.lo \
+t-iset_.$(OBJEXT) t-iset_.lo t-lc2exp_.$(OBJEXT) t-lc2exp_.lo \
+t-mt_.$(OBJEXT) t-mt_.lo t-rand_.$(OBJEXT) t-rand_.lo \
+t-urbui_.$(OBJEXT) t-urbui_.lo t-urmui_.$(OBJEXT) t-urmui_.lo \
+t-urndmm_.$(OBJEXT) t-urndmm_.lo zdiv_round_.$(OBJEXT) zdiv_round_.lo \
+: $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+check-TESTS: $(TESTS)
+       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+       srcdir=$(srcdir); export srcdir; \
+       list=' $(TESTS) '; \
+       $(am__tty_colors); \
+       if test -n "$$list"; then \
+         for tst in $$list; do \
+           if test -f ./$$tst; then dir=./; \
+           elif test -f $$tst; then dir=; \
+           else dir="$(srcdir)/"; fi; \
+           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xpass=`expr $$xpass + 1`; \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=XPASS; \
+             ;; \
+             *) \
+               col=$$grn; res=PASS; \
+             ;; \
+             esac; \
+           elif test $$? -ne 77; then \
+             all=`expr $$all + 1`; \
+             case " $(XFAIL_TESTS) " in \
+             *[\ \     ]$$tst[\ \      ]*) \
+               xfail=`expr $$xfail + 1`; \
+               col=$$lgn; res=XFAIL; \
+             ;; \
+             *) \
+               failed=`expr $$failed + 1`; \
+               col=$$red; res=FAIL; \
+             ;; \
+             esac; \
+           else \
+             skip=`expr $$skip + 1`; \
+             col=$$blu; res=SKIP; \
+           fi; \
+           echo "$${col}$$res$${std}: $$tst"; \
+         done; \
+         if test "$$all" -eq 1; then \
+           tests="test"; \
+           All=""; \
+         else \
+           tests="tests"; \
+           All="All "; \
+         fi; \
+         if test "$$failed" -eq 0; then \
+           if test "$$xfail" -eq 0; then \
+             banner="$$All$$all $$tests passed"; \
+           else \
+             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+           fi; \
+         else \
+           if test "$$xpass" -eq 0; then \
+             banner="$$failed of $$all $$tests failed"; \
+           else \
+             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+           fi; \
+         fi; \
+         dashes="$$banner"; \
+         skipped=""; \
+         if test "$$skip" -ne 0; then \
+           if test "$$skip" -eq 1; then \
+             skipped="($$skip test was not run)"; \
+           else \
+             skipped="($$skip tests were not run)"; \
+           fi; \
+           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$skipped"; \
+         fi; \
+         report=""; \
+         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+           report="Please report to $(PACKAGE_BUGREPORT)"; \
+           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+             dashes="$$report"; \
+         fi; \
+         dashes=`echo "$$dashes" | sed s/./=/g`; \
+         if test "$$failed" -eq 0; then \
+           echo "$$grn$$dashes"; \
+         else \
+           echo "$$red$$dashes"; \
+         fi; \
+         echo "$$banner"; \
+         test -z "$$skipped" || echo "$$skipped"; \
+         test -z "$$report" || echo "$$report"; \
+         echo "$$dashes$$std"; \
+         test "$$failed" -eq 0; \
+       else :; fi
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+       clean-checkPROGRAMS clean-generic clean-libtool ctags \
+       distclean distclean-compile distclean-generic \
+       distclean-libtool distclean-tags distdir dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-dvi install-dvi-am install-exec \
+       install-exec-am install-html install-html-am install-info \
+       install-info-am install-man install-pdf install-pdf-am \
+       install-ps install-ps-am install-strip installcheck \
+       installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+manual-test: gen$(EXEEXT) stat$(EXEEXT)
+       @(echo -n '16i: '; ./gen -f mpz_urandomb -z 16 1000 \
+               | ./stat -i 0xffff | grep '^[0-9]')
+       @(echo -n '32i: '; ./gen -f mpz_urandomb -z 32 1000 \
+               | ./stat -i 0xffffffff  | grep '^[0-9]')
+       @(echo -n '33i: '; ./gen -f mpz_urandomb -z 33 1000 \
+               | ./stat -i 0x1ffffffff  | grep '^[0-9]')
+       @(echo -n '64i: '; ./gen -f mpz_urandomb -z 64 1000 \
+               | ./stat -i 0xffffffffffffffff  | grep '^[0-9]')
+       @(echo -n '128i: '; ./gen -f mpz_urandomb -z 128 1000 \
+               | ./stat -i 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | grep '^[0-9]')
+
+       @(echo -n '16f: '; ./gen -f mpf_urandomb -z 16 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '32f: '; ./gen -f mpf_urandomb -z 32 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '33f: '; ./gen -f mpf_urandomb -z 33 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '64f: '; ./gen -f mpf_urandomb -z 64 1000 \
+               | ./stat | grep '^[0-9]')
+       @(echo -n '128f: '; ./gen -f mpf_urandomb -z 128 1000 \
+               | ./stat | grep '^[0-9]')
+
+manual-bigtest: gen$(EXEEXT) stat$(EXEEXT)
+       @(echo '16i: '; ./gen -f mpz_urandomb -z 16 50000 \
+               | ./stat -2 1000 -i 0xffff | grep '^K[mp]')
+       @(echo '32i: '; ./gen -f mpz_urandomb -z 32 50000 \
+               | ./stat -2 1000 -i 0xffffffff | grep '^K[mp]')
+       @(echo '33i: '; ./gen -f mpz_urandomb -z 33 50000 \
+               | ./stat -2 1000 -i 0x1ffffffff | grep '^K[mp]')
+       @(echo '64i: '; ./gen -f mpz_urandomb -z 64 50000 \
+               | ./stat -2 1000 -i 0xffffffffffffffff  | grep '^K[mp]')
+       @(echo '128i: '; ./gen -f mpz_urandomb -z 128 50000 \
+               | ./stat -2 1000 -i 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | grep '^K[mp]')
+
+       @(echo '16f: '; ./gen -f mpf_urandomb -z 16 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '32f: '; ./gen -f mpf_urandomb -z 32 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '33f: '; ./gen -f mpf_urandomb -z 33 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '64f: '; ./gen -f mpf_urandomb -z 64 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+       @(echo '128f: '; ./gen -f mpf_urandomb -z 128 50000 \
+               | ./stat -2 1000 | grep '^K[mp]')
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/rand/findlc.c b/tests/rand/findlc.c

new file mode 100644 (file)

index 0000000..c027970
--- /dev/null
+++ b/tests/rand/findlc.c
@@ -0,0 +1,252 @@
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <math.h>
+#include "gmp.h"
+#include "gmpstat.h"
+
+#define RCSID(msg) \
+static /**/const char *const rcsid[] = { (char *)rcsid, "\100(#)" msg }
+
+RCSID("$Id$");
+
+int g_debug = 0;
+
+static mpz_t a;
+
+static void
+sh_status (int sig)
+{
+  printf ("sh_status: signal %d caught. dumping status.\n", sig);
+
+  printf ("  a = ");
+  mpz_out_str (stdout, 10, a);
+  printf ("\n");
+  fflush (stdout);
+
+  if (SIGSEGV == sig)          /* remove SEGV handler */
+    signal (SIGSEGV, SIG_DFL);
+}
+
+/* Input is a modulus (m).  We shall find multiplier (a) and adder (c)
+   conforming to the rules found in the first comment block in file
+   mpz/urandom.c.
+
+   Then run a spectral test on the generator and discard any
+   multipliers not passing.  */
+
+/* TODO:
+
+   . find a better algorithm than a+=8; bigger jumps perhaps?
+
+*/
+
+void
+mpz_true_random (mpz_t s, unsigned long int nbits)
+{
+#if __FreeBSD__
+  FILE *fs;
+  char c[1];
+  int i;
+
+  mpz_set_ui (s, 0);
+  for (i = 0; i < nbits; i += 8)
+    {
+      for (;;)
+       {
+         int nread;
+         fs = fopen ("/dev/random", "r");
+         nread = fread (c, 1, 1, fs);
+         fclose (fs);
+         if (nread != 0)
+           break;
+         sleep (1);
+       }
+      mpz_mul_2exp (s, s, 8);
+      mpz_add_ui (s, s, ((unsigned long int) c[0]) & 0xff);
+      printf ("%d random bits\n", i + 8);
+    }
+  if (nbits % 8 != 0)
+    mpz_mod_2exp (s, s, nbits);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+  const char usage[] = "usage: findlc [-dv] m2exp [low_merit [high_merit]]\n";
+  int f;
+  int v_lose, m_lose, v_best, m_best;
+  int c;
+  int debug = 1;
+  int cnt_high_merit;
+  mpz_t m;
+  unsigned long int m2exp;
+#define DIMS 6                 /* dimensions run in spectral test */
+  mpf_t v[DIMS-1];             /* spectral test result (there's no v
+                                  for 1st dimension */
+  mpf_t f_merit, low_merit, high_merit;
+  mpz_t acc, minus8;
+  mpz_t min, max;
+  mpz_t s;
+
+
+  mpz_init (m);
+  mpz_init (a);
+  for (f = 0; f < DIMS-1; f++)
+    mpf_init (v[f]);
+  mpf_init (f_merit);
+  mpf_init_set_d (low_merit, .1);
+  mpf_init_set_d (high_merit, .1);
+
+  while ((c = getopt (argc, argv, "a:di:hv")) != -1)
+    switch (c)
+      {
+      case 'd':                        /* debug */
+       g_debug++;
+       break;
+
+      case 'v':                        /* print version */
+       puts (rcsid[1]);
+       exit (0);
+
+      case 'h':
+      case '?':
+      default:
+       fputs (usage, stderr);
+       exit (1);
+      }
+
+  argc -= optind;
+  argv += optind;
+
+  if (argc < 1)
+    {
+      fputs (usage, stderr);
+      exit (1);
+    }
+
+  /* Install signal handler. */
+  if (SIG_ERR == signal (SIGSEGV, sh_status))
+    {
+      perror ("signal (SIGSEGV)");
+      exit (1);
+    }
+  if (SIG_ERR == signal (SIGHUP, sh_status))
+    {
+      perror ("signal (SIGHUP)");
+      exit (1);
+    }
+
+  printf ("findlc: version: %s\n", rcsid[1]);
+  m2exp = atol (argv[0]);
+  mpz_init_set_ui (m, 1);
+  mpz_mul_2exp (m, m, m2exp);
+  printf ("m = 0x");
+  mpz_out_str (stdout, 16, m);
+  puts ("");
+
+  if (argc > 1)                        /* have low_merit */
+    mpf_set_str (low_merit, argv[1], 0);
+  if (argc > 2)                        /* have high_merit */
+    mpf_set_str (high_merit, argv[2], 0);
+
+  if (debug)
+    {
+      fprintf (stderr, "low_merit = ");
+      mpf_out_str (stderr, 10, 2, low_merit);
+      fprintf (stderr, "; high_merit = ");
+      mpf_out_str (stderr, 10, 2, high_merit);
+      fputs ("\n", stderr);
+    }
+
+  mpz_init (minus8);
+  mpz_set_si (minus8, -8L);
+  mpz_init_set_ui (acc, 0);
+  mpz_init (s);
+  mpz_init_set_d (min, 0.01 * pow (2.0, (double) m2exp));
+  mpz_init_set_d (max, 0.99 * pow (2.0, (double) m2exp));
+
+  mpz_true_random (s, m2exp);  /* Start.  */
+  mpz_setbit (s, 0);           /* Make it odd.  */
+
+  v_best = m_best = 2*(DIMS-1);
+  for (;;)
+    {
+      mpz_add (acc, acc, s);
+      mpz_mod_2exp (acc, acc, m2exp);
+#if later
+      mpz_and_si (a, acc, -8L);
+#else
+      mpz_and (a, acc, minus8);
+#endif
+      mpz_add_ui (a, a, 5);
+      if (mpz_cmp (a, min) <= 0 || mpz_cmp (a, max) >= 0)
+       continue;
+
+      spectral_test (v, DIMS, a, m);
+      for (f = 0, v_lose = m_lose = 0, cnt_high_merit = DIMS-1;
+          f < DIMS-1; f++)
+       {
+         merit (f_merit, f + 2, v[f], m);
+
+         if (mpf_cmp_ui (v[f], 1 << (30 / (f + 2) + (f == 2))) < 0)
+           v_lose++;
+
+         if (mpf_cmp (f_merit, low_merit) < 0)
+           m_lose++;
+
+         if (mpf_cmp (f_merit, high_merit) >= 0)
+           cnt_high_merit--;
+       }
+
+      if (0 == v_lose && 0 == m_lose)
+       {
+         mpz_out_str (stdout, 10, a); puts (""); fflush (stdout);
+         if (0 == cnt_high_merit)
+           break;              /* leave loop */
+       }
+      if (v_lose < v_best)
+       {
+         v_best = v_lose;
+         printf ("best (v_lose=%d; m_lose=%d): ", v_lose, m_lose);
+         mpz_out_str (stdout, 10, a); puts (""); fflush (stdout);
+       }
+      if (m_lose < m_best)
+       {
+         m_best = m_lose;
+         printf ("best (v_lose=%d; m_lose=%d): ", v_lose, m_lose);
+         mpz_out_str (stdout, 10, a); puts (""); fflush (stdout);
+       }
+    }
+
+  mpz_clear (m);
+  mpz_clear (a);
+  for (f = 0; f < DIMS-1; f++)
+    mpf_clear (v[f]);
+  mpf_clear (f_merit);
+  mpf_clear (low_merit);
+  mpf_clear (high_merit);
+
+  printf ("done.\n");
+  return 0;
+}
diff --git a/tests/rand/gen.c b/tests/rand/gen.c

new file mode 100644 (file)

index 0000000..511a3e2
--- /dev/null
+++ b/tests/rand/gen.c
@@ -0,0 +1,481 @@
+/* gen.c -- Generate pseudorandom numbers.
+
+Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Examples:
+
+  $ gen 10
+10 integers 0 <= X < 2^32 generated by mpz_urandomb()
+
+  $ gen -f mpf_urandomb 10
+10 real numbers 0 <= X < 1
+
+  $ gen -z 127 10
+10 integers 0 <= X < 2^127
+
+  $ gen -f mpf_urandomb -x .9,1 10
+10 real numbers 0 <= X < .9
+
+  $ gen -s 1 10
+10 integers, sequence seeded with 1
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  const char usage[] =
+    "usage: gen [-bhpq] [-a n] [-c a,c,m2exp] [-C a,c,m] [-f func] [-g alg] [-m n] [-s n] " \
+    "[-x f,t] [-z n] [n]\n" \
+    "  n        number of random numbers to generate\n" \
+    "  -a n     ASCII output in radix n (default, with n=10)\n" \
+    "  -b       binary output\n" \
+    "  -c a,c,m2exp use supplied LC scheme\n" \
+    "  -f func  random function, one of\n" \
+    "           mpz_urandomb (default), mpz_urandomm, mpf_urandomb, rand, random\n" \
+    "  -g alg   algorithm, one of mt (default), lc\n" \
+    "  -h       print this text and exit\n" \
+    "  -m n     maximum size of generated number plus 1 (0<= X < n) for mpz_urandomm\n" \
+    "  -p       print used seed on stderr\n" \
+    "  -q       quiet, no output\n" \
+    "  -s n     initial seed (default: output from time(3))\n" \
+    "  -x f,t   exclude all numbers f <= x <= t\n" \
+    "  -z n     size in bits of generated numbers (0<= X <2^n) (default 32)\n" \
+    "";
+
+  unsigned long int f;
+  unsigned long int n = 0;
+  unsigned long int seed;
+  unsigned long int m2exp = 0;
+  unsigned int size = 32;
+  int seed_from_user = 0;
+  int ascout = 1, binout = 0, printseed = 0;
+  int output_radix = 10;
+  int lc_scheme_from_user = 0;
+  int quiet_flag = 0;
+  mpz_t z_seed;
+  mpz_t z1;
+  mpf_t f1;
+  gmp_randstate_t rstate;
+  int c, i;
+  double drand;
+  long lrand;
+  int do_exclude = 0;
+  mpf_t f_xf, f_xt;            /* numbers to exclude from sequence */
+  char *str_xf, *str_xt;       /* numbers to exclude from sequence */
+  char *str_a, *str_adder, *str_m;
+  mpz_t z_a, z_m, z_mmax;
+  unsigned long int ul_adder;
+
+  enum
+  {
+    RFUNC_mpz_urandomb = 0,
+    RFUNC_mpz_urandomm,
+    RFUNC_mpf_urandomb,
+    RFUNC_rand,
+    RFUNC_random,
+  } rfunc = RFUNC_mpz_urandomb;
+  char *rfunc_str[] =  { "mpz_urandomb", "mpz_urandomm", "mpf_urandomb",
+                        "rand", "random" };
+  enum
+  {
+    RNG_MT = 0,
+    RNG_LC
+  };
+  gmp_randalg_t ralg = RNG_MT;
+  /* Texts for the algorithms.  The index of each must match the
+     corresponding algorithm in the enum above.  */
+  char *ralg_str[] = { "mt", "lc" };
+
+  mpf_init (f_xf);
+  mpf_init (f_xt);
+  mpf_init (f1);
+  mpz_init (z1);
+  mpz_init (z_seed);
+  mpz_init_set_ui (z_mmax, 0);
+
+
+  while ((c = getopt (argc, argv, "a:bc:f:g:hm:n:pqs:z:x:")) != -1)
+    switch (c)
+      {
+      case 'a':
+       ascout = 1;
+       binout = 0;
+       output_radix = atoi (optarg);
+       break;
+
+      case 'b':
+       ascout = 0;
+       binout = 1;
+       break;
+
+      case 'c':                        /* User supplied LC scheme: a,c,m2exp */
+       if (NULL == (str_a = strtok (optarg, ","))
+           || NULL == (str_adder = strtok (NULL, ","))
+           || NULL == (str_m = strtok (NULL, ",")))
+         {
+           fprintf (stderr, "gen: bad LC scheme parameters: %s\n", optarg);
+           exit (1);
+         }
+#ifdef HAVE_STRTOUL
+       ul_adder = strtoul (str_adder, NULL, 0);
+#elif HAVE_STRTOL
+       ul_adder = (unsigned long int) strtol (str_adder, NULL, 0);
+#else
+       ul_adder = (unsigned long int) atoi (str_adder);
+#endif
+
+       if (mpz_init_set_str (z_a, str_a, 0))
+         {
+           fprintf (stderr, "gen: bad LC scheme parameter `a': %s\n", str_a);
+           exit (1);
+         }
+       if (ULONG_MAX == ul_adder)
+         {
+           fprintf (stderr, "gen: bad LC scheme parameter `c': %s\n",
+                    str_adder);
+           exit (1);
+         }
+       m2exp = atol (str_m);
+
+       lc_scheme_from_user = 1;
+       break;
+
+
+      case 'f':
+       rfunc = -1;
+       for (f = 0; f < sizeof (rfunc_str) / sizeof (*rfunc_str); f++)
+           if (!strcmp (optarg, rfunc_str[f]))
+             {
+               rfunc = f;
+               break;
+             }
+       if (rfunc == -1)
+         {
+           fputs (usage, stderr);
+           exit (1);
+         }
+       break;
+
+      case 'g':                        /* algorithm */
+       ralg = -1;
+       for (f = 0; f < sizeof (ralg_str) / sizeof (*ralg_str); f++)
+           if (!strcmp (optarg, ralg_str[f]))
+             {
+               ralg = f;
+               break;
+             }
+       if (ralg == -1)
+         {
+           fputs (usage, stderr);
+           exit (1);
+         }
+       break;
+
+      case 'm':                        /* max for mpz_urandomm() */
+       if (mpz_set_str (z_mmax, optarg, 0))
+         {
+           fprintf (stderr, "gen: bad max value: %s\n", optarg);
+           exit (1);
+         }
+       break;
+
+      case 'p':                        /* print seed on stderr */
+       printseed = 1;
+       break;
+
+      case 'q':                        /* quiet */
+       quiet_flag = 1;
+       break;
+
+      case 's':                        /* user provided seed */
+       if (mpz_set_str (z_seed, optarg, 0))
+         {
+           fprintf (stderr, "gen: bad seed argument %s\n", optarg);
+           exit (1);
+         }
+       seed_from_user = 1;
+       break;
+
+      case 'z':
+       size = atoi (optarg);
+       if (size < 1)
+         {
+           fprintf (stderr, "gen: bad size argument (-z %u)\n", size);
+           exit (1);
+         }
+       break;
+
+      case 'x':                        /* Exclude. from,to */
+       str_xf = optarg;
+       str_xt = strchr (optarg, ',');
+       if (NULL == str_xt)
+         {
+           fprintf (stderr, "gen: bad exclusion parameters: %s\n", optarg);
+           exit (1);
+         }
+       *str_xt++ = '\0';
+       do_exclude = 1;
+       break;
+
+      case 'h':
+      case '?':
+      default:
+       fputs (usage, stderr);
+       exit (1);
+      }
+  argc -= optind;
+  argv += optind;
+
+  if (! seed_from_user)
+    mpz_set_ui (z_seed, (unsigned long int) time (NULL));
+  seed = mpz_get_ui (z_seed);
+  if (printseed)
+    {
+      fprintf (stderr, "gen: seed used: ");
+      mpz_out_str (stderr, output_radix, z_seed);
+      fprintf (stderr, "\n");
+    }
+
+  mpf_set_prec (f1, size);
+
+  /* init random state and plant seed */
+  switch (rfunc)
+    {
+    case RFUNC_mpf_urandomb:
+#if 0
+      /* Don't init a too small generator.  */
+      size = PREC (f1) * GMP_LIMB_BITS;
+      /* Fall through.  */
+#endif
+    case RFUNC_mpz_urandomb:
+    case RFUNC_mpz_urandomm:
+      switch (ralg)
+       {
+       case RNG_MT:
+         gmp_randinit_mt (rstate);
+         break;
+
+       case RNG_LC:
+         if (! lc_scheme_from_user)
+           gmp_randinit_lc_2exp_size (rstate, MIN (128, size));
+         else
+           gmp_randinit_lc_2exp (rstate, z_a, ul_adder, m2exp);
+         break;
+
+       default:
+         fprintf (stderr, "gen: unsupported algorithm\n");
+         exit (1);
+       }
+
+      gmp_randseed (rstate, z_seed);
+      break;
+
+    case RFUNC_rand:
+      srand (seed);
+      break;
+
+    case RFUNC_random:
+#ifdef __FreeBSD__             /* FIXME */
+      if (seed_from_user)
+       srandom (seed);
+      else
+       srandomdev ();
+#else
+      fprintf (stderr, "gen: unsupported algorithm\n");
+#endif
+      break;
+
+    default:
+      fprintf (stderr, "gen: random function not implemented\n");
+      exit (1);
+    }
+
+  /* set up excludes */
+  if (do_exclude)
+    switch (rfunc)
+      {
+      case RFUNC_mpf_urandomb:
+
+       if (mpf_set_str (f_xf, str_xf, 10) ||
+           mpf_set_str (f_xt, str_xt, 10))
+         {
+           fprintf (stderr, "gen: bad exclusion-from (\"%s\") " \
+                    "or exclusion-to (\"%s\") string.  no exclusion done.\n",
+                    str_xf, str_xt);
+           do_exclude = 0;
+         }
+       break;
+
+      default:
+       fprintf (stderr, "gen: exclusion not implemented for chosen " \
+                "randomization function.  all numbers included in sequence.\n");
+      }
+
+  /* generate and print */
+  if (argc > 0)
+    {
+#if HAVE_STRTOUL
+      n = strtoul (argv[0], (char **) NULL, 10);
+#elif HAVE_STRTOL
+      n = (unsigned long int) strtol (argv[0], (char **) NULL, 10);
+#else
+      n = (unsigned long int) atoi (argv[0]);
+#endif
+    }
+
+  for (f = 0; n == 0 || f < n; f++)
+    {
+      switch (rfunc)
+       {
+       case RFUNC_mpz_urandomb:
+         mpz_urandomb (z1, rstate, size);
+         if (quiet_flag)
+           break;
+         if (binout)
+           {
+             /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/
+             fprintf (stderr, "gen: binary output for mpz_urandom* is broken\n");
+             exit (1);
+           }
+         else
+           {
+             mpz_out_str (stdout, output_radix, z1);
+             puts ("");
+           }
+         break;
+
+       case RFUNC_mpz_urandomm:
+         mpz_urandomm (z1, rstate, z_mmax);
+         if (quiet_flag)
+           break;
+         if (binout)
+           {
+             /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/
+             fprintf (stderr, "gen: binary output for mpz_urandom* is broken\n");
+             exit (1);
+           }
+         else
+           {
+             mpz_out_str (stdout, output_radix, z1);
+             puts ("");
+           }
+         break;
+
+       case RFUNC_mpf_urandomb:
+         mpf_urandomb (f1, rstate, size);
+         if (do_exclude)
+           if (mpf_cmp (f1, f_xf) >= 0 && mpf_cmp (f1, f_xt) <= 0)
+               break;
+         if (quiet_flag)
+           break;
+         if (binout)
+           {
+             fprintf (stderr, "gen: binary output for floating point numbers "\
+                      "not implemented\n");
+             exit (1);
+           }
+         else
+           {
+             mpf_out_str (stdout, output_radix, 0, f1);
+             puts ("");
+           }
+         break;
+
+       case RFUNC_rand:
+         i = rand ();
+#ifdef FLOAT_OUTPUT
+         if (i)
+           drand = (double) i / (double) RAND_MAX;
+         else
+           drand = 0.0;
+         if (quiet_flag)
+           break;
+         if (binout)
+           fwrite (&drand, sizeof (drand), 1, stdout);
+         else
+           printf ("%e\n", drand);
+#else
+         if (quiet_flag)
+           break;
+         if (binout)
+           fwrite (&i, sizeof (i), 1, stdout);
+         else
+           printf ("%d\n", i);
+#endif
+         break;
+
+       case RFUNC_random:
+         lrand = random ();
+         if (lrand)
+           drand = (double) lrand / (double) 0x7fffffff;
+         else
+           drand = 0;
+         if (quiet_flag)
+           break;
+         if (binout)
+           fwrite (&drand, sizeof (drand), 1, stdout);
+         else
+           printf ("%e\n", drand);
+         break;
+
+       default:
+         fprintf (stderr, "gen: random function not implemented\n");
+         exit (1);
+       }
+
+    }
+
+  /* clean up */
+  switch (rfunc)
+    {
+    case RFUNC_mpz_urandomb:
+    case RFUNC_mpf_urandomb:
+      gmp_randclear (rstate);
+      break;
+    default:
+      break;
+    }
+  mpf_clear (f1);
+  mpf_clear (f_xf);
+  mpf_clear (f_xt);
+  mpz_clear (z1);
+  mpz_clear (z_seed);
+
+  return 0;
+}
+
+static void *debug_dummyz = mpz_dump;
+static void *debug_dummyf = mpf_dump;
diff --git a/tests/rand/gmpstat.h b/tests/rand/gmpstat.h

new file mode 100644 (file)

index 0000000..1e4d6af
--- /dev/null
+++ b/tests/rand/gmpstat.h
@@ -0,0 +1,75 @@
+/* gmpstat.h */
+
+/*
+Copyright 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* This file requires the following header files: gmp.h */
+
+#ifndef        __GMPSTAT_H__
+#define        __GMPSTAT_H__
+
+/* Global debug flag.  FIXME: Remove. */
+extern int g_debug;
+#define DEBUG_1 0
+#define DEBUG_2 1
+
+/* Max number of dimensions in spectral test.  FIXME: Makw dynamic. */
+#define GMP_SPECT_MAXT 10
+
+void
+mpf_freqt (mpf_t Kp,
+          mpf_t Km,
+          mpf_t X[],
+          const unsigned long int n);
+unsigned long int
+mpz_freqt (mpf_t V,
+          mpz_t X[],
+          unsigned int imax,
+          const unsigned long int n);
+
+/* Low level functions. */
+void
+ks (mpf_t Kp,
+    mpf_t Km,
+    mpf_t X[],
+    void (P) (mpf_t, mpf_t),
+    const unsigned long int n);
+
+void
+ks_table (mpf_t p, mpf_t val, const unsigned int n);
+
+void
+x2_table (double t[],
+         unsigned int v);
+
+void
+spectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m);
+void
+vz_dot (mpz_t rop, mpz_t V1[], mpz_t V2[], unsigned int n);
+void
+f_floor (mpf_t rop, mpf_t op);
+
+void
+merit (mpf_t rop, unsigned int t, mpf_t v, mpz_t m);
+double
+merit_u (unsigned int t, mpf_t v, mpz_t m);
+
+/* From separate source files: */
+void zdiv_round (mpz_t rop, mpz_t n, mpz_t d);
+
+#endif /* !__GMPSTAT_H__ */
diff --git a/tests/rand/spect.c b/tests/rand/spect.c

new file mode 100644 (file)

index 0000000..f6f872b
--- /dev/null
+++ b/tests/rand/spect.c
@@ -0,0 +1,137 @@
+/* spect.c -- the spectral test */
+
+/*
+Copyright 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* T is upper dimension.  Z_A is the LC multiplier, which is
+   relatively prime to Z_M, the LC modulus.  The result is put in
+   rop[] with v[t] in rop[t-2]. */
+
+/* BUGS: Due to lazy allocation scheme, maximum T is hard coded to MAXT. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+#include "gmp.h"
+
+#include "gmpstat.h"
+
+int g_debug = 0;
+
+int
+main (int argc, char *argv[])
+{
+  const char usage[] = "usage: spect [-d] a m n\n";
+  int c;
+  unsigned int n;
+  mpz_t a, m;
+  mpf_t res[GMP_SPECT_MAXT], res_min[GMP_SPECT_MAXT], f_tmp;
+  register int f;
+
+
+  mpz_init (a);
+  mpz_init (m);
+  for (f = 0; f < GMP_SPECT_MAXT; f++)
+    {
+      mpf_init (res[f]);
+      mpf_init (res_min[f]);
+    }
+  mpf_init (f_tmp);
+  mpf_set_ui (res_min[0], 32768); /* 2^15 */
+  mpf_set_ui (res_min[1], 1024); /* 2^10 */
+  mpf_set_ui (res_min[2], 256); /* 2^8 */
+  mpf_set_ui (res_min[3], 64); /* 2^6 */
+  mpf_set_ui (res_min[4], 32); /* 2^5 */
+
+  while ((c = getopt (argc, argv, "dh")) != -1)
+    switch (c)
+      {
+      case 'd':                        /* debug */
+       g_debug++;
+       break;
+      case 'h':
+      default:
+       fputs (usage, stderr);
+       exit (1);
+      }
+  argc -= optind;
+  argv += optind;
+
+  if (argc < 3)
+    {
+      fputs (usage, stderr);
+      exit (1);
+    }
+
+  mpz_set_str (a, argv[0], 0);
+  mpz_set_str (m, argv[1], 0);
+  n = (unsigned int) atoi (argv[2]);
+  if (n + 1 > GMP_SPECT_MAXT)
+    n = GMP_SPECT_MAXT + 1;
+
+  spectral_test (res, n, a, m);
+
+  for (f = 0; f < n - 1; f++)
+    {
+      /* print v */
+      printf ("%d: v = ", f + 2);
+      mpf_out_str (stdout, 10, 4, res[f]);
+
+#ifdef PRINT_RAISED_BY_TWO_AS_WELL
+      printf (" (^2 = ");
+      mpf_mul (f_tmp, res[f], res[f]);
+      mpf_out_str (stdout, 10, 4, f_tmp);
+      printf (")");
+#endif /* PRINT_RAISED_BY_TWO_AS_WELL */
+
+      /* print merit */
+      printf (" m = ");
+      merit (f_tmp, f + 2, res[f], m);
+      mpf_out_str (stdout, 10, 4, f_tmp);
+
+      if (mpf_cmp (res[f], res_min[f]) < 0)
+       printf ("\t*** v too low ***");
+      if (mpf_get_d (f_tmp) < .1)
+       printf ("\t*** merit too low ***");
+
+      puts ("");
+    }
+
+  mpz_clear (a);
+  mpz_clear (m);
+  for (f = 0; f < GMP_SPECT_MAXT; f++)
+    {
+      mpf_clear (res[f]);
+      mpf_clear (res_min[f]);
+    }
+  mpf_clear (f_tmp);
+
+  return 0;
+}
+
+
+void
+debug_foo()
+{
+  if (0)
+    {
+      mpz_dump (0);
+      mpf_dump (0);
+    }
+}
diff --git a/tests/rand/stat.c b/tests/rand/stat.c

new file mode 100644 (file)

index 0000000..e6917d9
--- /dev/null
+++ b/tests/rand/stat.c
@@ -0,0 +1,407 @@
+/* stat.c -- statistical tests of random number sequences. */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Examples:
+
+  $ gen 1000 | stat
+Test 1000 real numbers.
+
+  $ gen 30000 | stat -2 1000
+Test 1000 real numbers 30 times and then test the 30 results in a
+``second level''.
+
+  $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
+Test 1000 integers 0 <= X <= 2^32-1.
+
+  $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
+Test 1000 integers 0 <= X <= 2^34-1.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+#include "gmp.h"
+#include "gmpstat.h"
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#define FVECSIZ (100000L)
+
+int g_debug = 0;
+
+static void
+print_ks_results (mpf_t f_p, mpf_t f_p_prob,
+                 mpf_t f_m, mpf_t f_m_prob,
+                 FILE *fp)
+{
+  double p, pp, m, mp;
+
+  p = mpf_get_d (f_p);
+  m = mpf_get_d (f_m);
+  pp = mpf_get_d (f_p_prob);
+  mp = mpf_get_d (f_m_prob);
+
+  fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
+  fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
+}
+
+static void
+print_x2_table (unsigned int v, FILE *fp)
+{
+  double t[7];
+  int f;
+
+
+  fprintf (fp, "Chi-square table for v=%u\n", v);
+  fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
+  x2_table (t, v);
+  for (f = 0; f < 7; f++)
+    fprintf (fp, "%.2f\t", t[f]);
+  fputs ("\n", fp);
+}
+
+
+
+/* Pks () -- Distribution function for KS results with a big n (like 1000
+   or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
+/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
+
+static void
+Pks (mpf_t p, mpf_t x)
+{
+  double dt;                   /* temp double */
+
+  mpf_set (p, x);
+  mpf_mul (p, p, p);           /* p = x^2 */
+  mpf_mul_ui (p, p, 2);                /* p = 2*x^2 */
+  mpf_neg (p, p);              /* p = -2*x^2 */
+  /* No pow() in gmp.  Use doubles. */
+  /* FIXME: Use exp()? */
+  dt = pow (M_E, mpf_get_d (p));
+  mpf_set_d (p, dt);
+  mpf_ui_sub (p, 1, p);
+}
+
+/* f_freq() -- frequency test on real numbers 0<=f<1*/
+static void
+f_freq (const unsigned l1runs, const unsigned l2runs,
+       mpf_t fvec[], const unsigned long n)
+{
+  unsigned f;
+  mpf_t f_p, f_p_prob;
+  mpf_t f_m, f_m_prob;
+  mpf_t *l1res;                        /* level 1 result array */
+
+  mpf_init (f_p);  mpf_init (f_m);
+  mpf_init (f_p_prob);  mpf_init (f_m_prob);
+
+
+  /* Allocate space for 1st level results. */
+  l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
+  if (NULL == l1res)
+    {
+      fprintf (stderr, "stat: malloc failure\n");
+      exit (1);
+    }
+
+  printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
+  printf ("\tKp\t\tKm\n");
+
+  for (f = 0; f < l2runs; f++)
+    {
+      /*  f_printvec (fvec, n); */
+      mpf_freqt (f_p, f_m, fvec + f * n, n);
+
+      /* what's the probability of getting these results? */
+      ks_table (f_p_prob, f_p, n);
+      ks_table (f_m_prob, f_m, n);
+
+      if (l1runs == 0)
+       {
+         /*printf ("%u:\t", f + 1);*/
+         print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
+       }
+      else
+       {
+         /* save result */
+         mpf_init_set (l1res[f], f_p);
+         mpf_init_set (l1res[f + l2runs], f_m);
+       }
+    }
+
+  /* Now, apply the KS test on the results from the 1st level rounds
+     with the distribution
+     F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */
+
+  if (l1runs != 0)
+    {
+      /*printf ("-------------------------------------\n");*/
+
+      /* The Kp's. */
+      ks (f_p, f_m, l1res, Pks, l2runs);
+      ks_table (f_p_prob, f_p, l2runs);
+      ks_table (f_m_prob, f_m, l2runs);
+      printf ("Kp:\t");
+      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
+
+      /* The Km's. */
+      ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
+      ks_table (f_p_prob, f_p, l2runs);
+      ks_table (f_m_prob, f_m, l2runs);
+      printf ("Km:\t");
+      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
+    }
+
+  mpf_clear (f_p);  mpf_clear (f_m);
+  mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
+  free (l1res);
+}
+
+/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
+   0<=z<=MAX */
+static void
+z_freq (const unsigned l1runs,
+       const unsigned l2runs,
+       mpz_t zvec[],
+       const unsigned long n,
+       unsigned int max)
+{
+  mpf_t V;                     /* result */
+  double d_V;                  /* result as a double */
+
+  mpf_init (V);
+
+
+  printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
+  print_x2_table (max, stdout);
+
+  mpz_freqt (V, zvec, max, n);
+
+  d_V = mpf_get_d (V);
+  printf ("V = %.2f (n = %lu)\n", d_V, n);
+
+  mpf_clear (V);
+}
+
+unsigned int stat_debug = 0;
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  const char usage[] =
+    "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
+    "       file     filename\n" \
+    "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
+    "       -d       increase debugging level\n" \
+    "       -i max   input is integers 0 <= Z <= MAX\n" \
+    "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
+    "                maximum value when converting real numbers to integers\n" \
+    "";
+
+  mpf_t fvec[FVECSIZ];
+  mpz_t zvec[FVECSIZ];
+  unsigned long int f, n, vecentries;
+  char *filen;
+  FILE *fp;
+  int c;
+  int omitoutput = 0;
+  int realinput = -1;          /* 1: input is real numbers 0<=R<1;
+                                  0: input is integers 0 <= Z <= MAX. */
+  long l1runs = 0,             /* 1st level runs */
+    l2runs = 1;                        /* 2nd level runs */
+  mpf_t f_temp;
+  mpz_t z_imax;                        /* max value when converting between
+                                  real number and integer. */
+  mpf_t f_imax_plus1;          /* f_imax + 1 stored in an mpf_t for
+                                  convenience */
+  mpf_t f_imax_minus1;         /* f_imax - 1 stored in an mpf_t for
+                                  convenience */
+
+
+  mpf_init (f_temp);
+  mpz_init_set_ui (z_imax, 0x7fffffff);
+  mpf_init (f_imax_plus1);
+  mpf_init (f_imax_minus1);
+
+  while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
+    switch (c)
+      {
+      case '2':
+       l1runs = atol (optarg);
+       l2runs = -1;            /* set later on */
+       break;
+      case 'd':                        /* increase debug level */
+       stat_debug++;
+       break;
+      case 'i':
+       if (1 == realinput)
+         {
+           fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
+           exit (1);
+         }
+       if (mpz_set_str (z_imax, optarg, 0))
+         {
+           fprintf (stderr, "stat: bad max value %s\n", optarg);
+           exit (1);
+         }
+       realinput = 0;
+       break;
+      case 'r':
+       if (0 == realinput)
+         {
+           fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
+           exit (1);
+         }
+       if (mpz_set_str (z_imax, optarg, 0))
+         {
+           fprintf (stderr, "stat: bad max value %s\n", optarg);
+           exit (1);
+         }
+       realinput = 1;
+       break;
+      case 'o':
+       omitoutput = atoi (optarg);
+       break;
+      case '?':
+      default:
+       fputs (usage, stderr);
+       exit (1);
+      }
+  argc -= optind;
+  argv += optind;
+
+  if (argc < 1)
+    fp = stdin;
+  else
+    filen = argv[0];
+
+  if (fp != stdin)
+    if (NULL == (fp = fopen (filen, "r")))
+      {
+       perror (filen);
+       exit (1);
+      }
+
+  if (-1 == realinput)
+    realinput = 1;             /* default is real numbers */
+
+  /* read file and fill appropriate vec */
+  if (1 == realinput)          /* real input */
+    {
+      for (f = 0; f < FVECSIZ ; f++)
+       {
+         mpf_init (fvec[f]);
+         if (!mpf_inp_str (fvec[f], fp, 10))
+           break;
+       }
+    }
+  else                         /* integer input */
+    {
+      for (f = 0; f < FVECSIZ ; f++)
+       {
+         mpz_init (zvec[f]);
+         if (!mpz_inp_str (zvec[f], fp, 10))
+           break;
+       }
+    }
+  vecentries = n = f;          /* number of entries read */
+  fclose (fp);
+
+  if (FVECSIZ == f)
+    fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
+            "of only %ld entries.  sorry.\n", FVECSIZ);
+
+  printf ("Got %lu numbers.\n", n);
+
+  /* convert and fill the other vec */
+  /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
+     0<=z<=imax and we are truncating all fractions when
+     converting float to int, we have to add 1 to imax.*/
+  mpf_set_z (f_imax_plus1, z_imax);
+  mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
+  if (1 == realinput)          /* fill zvec[] */
+    {
+      for (f = 0; f < n; f++)
+       {
+         mpf_mul (f_temp, fvec[f], f_imax_plus1);
+         mpz_init (zvec[f]);
+         mpz_set_f (zvec[f], f_temp); /* truncating fraction */
+         if (stat_debug > 1)
+           {
+             mpz_out_str (stderr, 10, zvec[f]);
+             fputs ("\n", stderr);
+           }
+       }
+    }
+  else                         /* integer input; fill fvec[] */
+    {
+      /*    mpf_set_z (f_imax_minus1, z_imax);
+           mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
+      for (f = 0; f < n; f++)
+       {
+         mpf_init (fvec[f]);
+         mpf_set_z (fvec[f], zvec[f]);
+         mpf_div (fvec[f], fvec[f], f_imax_plus1);
+         if (stat_debug > 1)
+           {
+             mpf_out_str (stderr, 10, 0, fvec[f]);
+             fputs ("\n", stderr);
+           }
+       }
+    }
+
+  /* 2 levels? */
+  if (1 != l2runs)
+    {
+      l2runs = n / l1runs;
+      printf ("Doing %ld second level rounds "\
+             "with %ld entries in each round", l2runs, l1runs);
+      if (n % l1runs)
+       printf (" (discarding %ld entr%s)", n % l1runs,
+               n % l1runs == 1 ? "y" : "ies");
+      puts (".");
+      n = l1runs;
+    }
+
+#ifndef DONT_FFREQ
+  f_freq (l1runs, l2runs, fvec, n);
+#endif
+#ifdef DO_ZFREQ
+  z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
+#endif
+
+  mpf_clear (f_temp); mpz_clear (z_imax);
+  mpf_clear (f_imax_plus1);
+  mpf_clear (f_imax_minus1);
+  for (f = 0; f < vecentries; f++)
+    {
+      mpf_clear (fvec[f]);
+      mpz_clear (zvec[f]);
+    }
+
+  return 0;
+}
diff --git a/tests/rand/statlib.c b/tests/rand/statlib.c

new file mode 100644 (file)

index 0000000..0bf22e3
--- /dev/null
+++ b/tests/rand/statlib.c
@@ -0,0 +1,837 @@
+/* statlib.c -- Statistical functions for testing the randomness of
+   number sequences. */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* The theories for these functions are taken from D. Knuth's "The Art
+of Computer Programming: Volume 2, Seminumerical Algorithms", Third
+Edition, Addison Wesley, 1998. */
+
+/* Implementation notes.
+
+The Kolmogorov-Smirnov test.
+
+Eq. (13) in Knuth, p. 50, says that if X1, X2, ..., Xn are independent
+observations arranged into ascending order
+
+       Kp = sqr(n) * max(j/n - F(Xj))          for all 1<=j<=n
+       Km = sqr(n) * max(F(Xj) - (j-1)/n))     for all 1<=j<=n
+
+where F(x) = Pr(X <= x) = probability that (X <= x), which for a
+uniformly distributed random real number between zero and one is
+exactly the number itself (x).
+
+
+The answer to exercise 23 gives the following implementation, which
+doesn't need the observations to be sorted in ascending order:
+
+for (k = 0; k < m; k++)
+       a[k] = 1.0
+       b[k] = 0.0
+       c[k] = 0
+
+for (each observation Xj)
+       Y = F(Xj)
+       k = floor (m * Y)
+       a[k] = min (a[k], Y)
+       b[k] = max (b[k], Y)
+       c[k] += 1
+
+       j = 0
+       rp = rm = 0
+       for (k = 0; k < m; k++)
+               if (c[k] > 0)
+                       rm = max (rm, a[k] - j/n)
+                       j += c[k]
+                       rp = max (rp, j/n - b[k])
+
+Kp = sqr (n) * rp
+Km = sqr (n) * rm
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "gmp.h"
+#include "gmpstat.h"
+
+/* ks (Kp, Km, X, P, n) -- Perform a Kolmogorov-Smirnov test on the N
+   real numbers between zero and one in vector X.  P is the
+   distribution function, called for each entry in X, which should
+   calculate the probability of X being greater than or equal to any
+   number in the sequence.  (For a uniformly distributed sequence of
+   real numbers between zero and one, this is simply equal to X.)  The
+   result is put in Kp and Km.  */
+
+void
+ks (mpf_t Kp,
+    mpf_t Km,
+    mpf_t X[],
+    void (P) (mpf_t, mpf_t),
+    unsigned long int n)
+{
+  mpf_t Kt;                    /* temp */
+  mpf_t f_x;
+  mpf_t f_j;                   /* j */
+  mpf_t f_jnq;                 /* j/n or (j-1)/n */
+  unsigned long int j;
+
+  /* Sort the vector in ascending order. */
+  qsort (X, n, sizeof (__mpf_struct), mpf_cmp);
+
+  /* K-S test. */
+  /*   Kp = sqr(n) * max(j/n - F(Xj))          for all 1<=j<=n
+       Km = sqr(n) * max(F(Xj) - (j-1)/n))     for all 1<=j<=n
+  */
+
+  mpf_init (Kt); mpf_init (f_x); mpf_init (f_j); mpf_init (f_jnq);
+  mpf_set_ui (Kp, 0);  mpf_set_ui (Km, 0);
+  for (j = 1; j <= n; j++)
+    {
+      P (f_x, X[j-1]);
+      mpf_set_ui (f_j, j);
+
+      mpf_div_ui (f_jnq, f_j, n);
+      mpf_sub (Kt, f_jnq, f_x);
+      if (mpf_cmp (Kt, Kp) > 0)
+       mpf_set (Kp, Kt);
+      if (g_debug > DEBUG_2)
+       {
+         printf ("j=%lu ", j);
+         printf ("P()="); mpf_out_str (stdout, 10, 2, f_x); printf ("\t");
+
+         printf ("jnq="); mpf_out_str (stdout, 10, 2, f_jnq); printf (" ");
+         printf ("diff="); mpf_out_str (stdout, 10, 2, Kt); printf (" ");
+         printf ("Kp="); mpf_out_str (stdout, 10, 2, Kp); printf ("\t");
+       }
+      mpf_sub_ui (f_j, f_j, 1);
+      mpf_div_ui (f_jnq, f_j, n);
+      mpf_sub (Kt, f_x, f_jnq);
+      if (mpf_cmp (Kt, Km) > 0)
+       mpf_set (Km, Kt);
+
+      if (g_debug > DEBUG_2)
+       {
+         printf ("jnq="); mpf_out_str (stdout, 10, 2, f_jnq); printf (" ");
+         printf ("diff="); mpf_out_str (stdout, 10, 2, Kt); printf (" ");
+         printf ("Km="); mpf_out_str (stdout, 10, 2, Km); printf (" ");
+         printf ("\n");
+       }
+    }
+  mpf_sqrt_ui (Kt, n);
+  mpf_mul (Kp, Kp, Kt);
+  mpf_mul (Km, Km, Kt);
+
+  mpf_clear (Kt); mpf_clear (f_x); mpf_clear (f_j); mpf_clear (f_jnq);
+}
+
+/* ks_table(val, n) -- calculate probability for Kp/Km less than or
+   equal to VAL with N observations.  See [Knuth section 3.3.1] */
+
+void
+ks_table (mpf_t p, mpf_t val, const unsigned int n)
+{
+  /* We use Eq. (27), Knuth p.58, skipping O(1/n) for simplicity.
+     This shortcut will result in too high probabilities, especially
+     when n is small.
+
+     Pr(Kp(n) <= s) = 1 - pow(e, -2*s^2) * (1 - 2/3*s/sqrt(n) + O(1/n)) */
+
+  /* We have 's' in variable VAL and store the result in P. */
+
+  mpf_t t1, t2;
+
+  mpf_init (t1); mpf_init (t2);
+
+  /* t1 = 1 - 2/3 * s/sqrt(n) */
+  mpf_sqrt_ui (t1, n);
+  mpf_div (t1, val, t1);
+  mpf_mul_ui (t1, t1, 2);
+  mpf_div_ui (t1, t1, 3);
+  mpf_ui_sub (t1, 1, t1);
+
+  /* t2 = pow(e, -2*s^2) */
+#ifndef OLDGMP
+  mpf_pow_ui (t2, val, 2);     /* t2 = s^2 */
+  mpf_set_d (t2, exp (-(2.0 * mpf_get_d (t2))));
+#else
+  /* hmmm, gmp doesn't have pow() for floats.  use doubles. */
+  mpf_set_d (t2, pow (M_E, -(2 * pow (mpf_get_d (val), 2))));
+#endif
+
+  /* p = 1 - t1 * t2 */
+  mpf_mul (t1, t1, t2);
+  mpf_ui_sub (p, 1, t1);
+
+  mpf_clear (t1); mpf_clear (t2);
+}
+
+static double x2_table_X[][7] = {
+  { -2.33, -1.64, -.674, 0.0, 0.674, 1.64, 2.33 }, /* x */
+  { 5.4289, 2.6896, .454276, 0.0, .454276, 2.6896, 5.4289} /* x^2 */
+};
+
+#define _2D3 ((double) .6666666666)
+
+/* x2_table (t, v, n) -- return chi-square table row for V in T[]. */
+void
+x2_table (double t[],
+         unsigned int v)
+{
+  int f;
+
+
+  /* FIXME: Do a table lookup for v <= 30 since the following formula
+     [Knuth, vol 2, 3.3.1] is only good for v > 30. */
+
+  /* value = v + sqrt(2*v) * X[p] + (2/3) * X[p]^2 - 2/3 + O(1/sqrt(t) */
+  /* NOTE: The O() term is ignored for simplicity. */
+
+  for (f = 0; f < 7; f++)
+      t[f] =
+       v +
+       sqrt (2 * v) * x2_table_X[0][f] +
+       _2D3 * x2_table_X[1][f] - _2D3;
+}
+
+
+/* P(p, x) -- Distribution function.  Calculate the probability of X
+being greater than or equal to any number in the sequence.  For a
+random real number between zero and one given by a uniformly
+distributed random number generator, this is simply equal to X. */
+
+static void
+P (mpf_t p, mpf_t x)
+{
+  mpf_set (p, x);
+}
+
+/* mpf_freqt() -- Frequency test using KS on N real numbers between zero
+   and one.  See [Knuth vol 2, p.61]. */
+void
+mpf_freqt (mpf_t Kp,
+          mpf_t Km,
+          mpf_t X[],
+          const unsigned long int n)
+{
+  ks (Kp, Km, X, P, n);
+}
+
+
+/* The Chi-square test.  Eq. (8) in Knuth vol. 2 says that if Y[]
+   holds the observations and p[] is the probability for.. (to be
+   continued!)
+
+   V = 1/n * sum((s=1 to k) Y[s]^2 / p[s]) - n */
+
+void
+x2 (mpf_t V,                   /* result */
+    unsigned long int X[],     /* data */
+    unsigned int k,            /* #of categories */
+    void (P) (mpf_t, unsigned long int, void *), /* probability func */
+    void *x,                   /* extra user data passed to P() */
+    unsigned long int n)       /* #of samples */
+{
+  unsigned int f;
+  mpf_t f_t, f_t2;             /* temp floats */
+
+  mpf_init (f_t); mpf_init (f_t2);
+
+
+  mpf_set_ui (V, 0);
+  for (f = 0; f < k; f++)
+    {
+      if (g_debug > DEBUG_2)
+       fprintf (stderr, "%u: P()=", f);
+      mpf_set_ui (f_t, X[f]);
+      mpf_mul (f_t, f_t, f_t); /* f_t = X[f]^2 */
+      P (f_t2, f, x);          /* f_t2 = Pr(f) */
+      if (g_debug > DEBUG_2)
+       mpf_out_str (stderr, 10, 2, f_t2);
+      mpf_div (f_t, f_t, f_t2);
+      mpf_add (V, V, f_t);
+      if (g_debug > DEBUG_2)
+       {
+         fprintf (stderr, "\tV=");
+         mpf_out_str (stderr, 10, 2, V);
+         fprintf (stderr, "\t");
+       }
+    }
+  if (g_debug > DEBUG_2)
+    fprintf (stderr, "\n");
+  mpf_div_ui (V, V, n);
+  mpf_sub_ui (V, V, n);
+
+  mpf_clear (f_t); mpf_clear (f_t2);
+}
+
+/* Pzf(p, s, x) -- Probability for category S in mpz_freqt().  It's
+   1/d for all S.  X is a pointer to an unsigned int holding 'd'. */
+static void
+Pzf (mpf_t p, unsigned long int s, void *x)
+{
+  mpf_set_ui (p, 1);
+  mpf_div_ui (p, p, *((unsigned int *) x));
+}
+
+/* mpz_freqt(V, X, imax, n) -- Frequency test on integers.  [Knuth,
+   vol 2, 3.3.2].  Keep IMAX low on this one, since we loop from 0 to
+   IMAX.  128 or 256 could be nice.
+
+   X[] must not contain numbers outside the range 0 <= X <= IMAX.
+
+   Return value is number of observations actually used, after
+   discarding entries out of range.
+
+   Since X[] contains integers between zero and IMAX, inclusive, we
+   have IMAX+1 categories.
+
+   Note that N should be at least 5*IMAX.  Result is put in V and can
+   be compared to output from x2_table (v=IMAX). */
+
+unsigned long int
+mpz_freqt (mpf_t V,
+          mpz_t X[],
+          unsigned int imax,
+          const unsigned long int n)
+{
+  unsigned long int *v;                /* result */
+  unsigned int f;
+  unsigned int d;              /* number of categories = imax+1 */
+  unsigned int uitemp;
+  unsigned long int usedn;
+
+
+  d = imax + 1;
+
+  v = (unsigned long int *) calloc (imax + 1, sizeof (unsigned long int));
+  if (NULL == v)
+    {
+      fprintf (stderr, "mpz_freqt(): out of memory\n");
+      exit (1);
+    }
+
+  /* count */
+  usedn = n;                   /* actual number of observations */
+  for (f = 0; f < n; f++)
+    {
+      uitemp = mpz_get_ui(X[f]);
+      if (uitemp > imax)       /* sanity check */
+       {
+         if (g_debug)
+           fprintf (stderr, "mpz_freqt(): warning: input insanity: %u, "\
+                    "ignored.\n", uitemp);
+         usedn--;
+         continue;
+       }
+      v[uitemp]++;
+    }
+
+  if (g_debug > DEBUG_2)
+    {
+      fprintf (stderr, "counts:\n");
+      for (f = 0; f <= imax; f++)
+       fprintf (stderr, "%u:\t%lu\n", f, v[f]);
+    }
+
+  /* chi-square with k=imax+1 and P(x)=1/(imax+1) for all x.*/
+  x2 (V, v, d, Pzf, (void *) &d, usedn);
+
+  free (v);
+  return (usedn);
+}
+
+/* debug dummy to drag in dump funcs */
+void
+foo_debug ()
+{
+  if (0)
+    {
+      mpf_dump (0);
+#ifndef OLDGMP
+      mpz_dump (0);
+#endif
+    }
+}
+
+/* merit (rop, t, v, m) -- calculate merit for spectral test result in
+   dimension T, see Knuth p. 105.  BUGS: Only valid for 2 <= T <=
+   6. */
+void
+merit (mpf_t rop, unsigned int t, mpf_t v, mpz_t m)
+{
+  int f;
+  mpf_t f_m, f_const, f_pi;
+
+  mpf_init (f_m);
+  mpf_set_z (f_m, m);
+  mpf_init_set_d (f_const, M_PI);
+  mpf_init_set_d (f_pi, M_PI);
+
+  switch (t)
+    {
+    case 2:                    /* PI */
+      break;
+    case 3:                    /* PI * 4/3 */
+      mpf_mul_ui (f_const, f_const, 4);
+      mpf_div_ui (f_const, f_const, 3);
+      break;
+    case 4:                    /* PI^2 * 1/2 */
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_div_ui (f_const, f_const, 2);
+      break;
+    case 5:                    /* PI^2 * 8/15 */
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_mul_ui (f_const, f_const, 8);
+      mpf_div_ui (f_const, f_const, 15);
+      break;
+    case 6:                    /* PI^3 * 1/6 */
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_mul (f_const, f_const, f_pi);
+      mpf_div_ui (f_const, f_const, 6);
+      break;
+    default:
+      fprintf (stderr,
+              "spect (merit): can't calculate merit for dimensions > 6\n");
+      mpf_set_ui (f_const, 0);
+      break;
+    }
+
+  /* rop = v^t */
+  mpf_set (rop, v);
+  for (f = 1; f < t; f++)
+    mpf_mul (rop, rop, v);
+  mpf_mul (rop, rop, f_const);
+  mpf_div (rop, rop, f_m);
+
+  mpf_clear (f_m);
+  mpf_clear (f_const);
+  mpf_clear (f_pi);
+}
+
+double
+merit_u (unsigned int t, mpf_t v, mpz_t m)
+{
+  mpf_t rop;
+  double res;
+
+  mpf_init (rop);
+  merit (rop, t, v, m);
+  res = mpf_get_d (rop);
+  mpf_clear (rop);
+  return res;
+}
+
+/* f_floor (rop, op) -- Set rop = floor (op). */
+void
+f_floor (mpf_t rop, mpf_t op)
+{
+  mpz_t z;
+
+  mpz_init (z);
+
+  /* No mpf_floor().  Convert to mpz and back. */
+  mpz_set_f (z, op);
+  mpf_set_z (rop, z);
+
+  mpz_clear (z);
+}
+
+
+/* vz_dot (rop, v1, v2, nelem) -- compute dot product of z-vectors V1,
+   V2.  N is number of elements in vectors V1 and V2. */
+
+void
+vz_dot (mpz_t rop, mpz_t V1[], mpz_t V2[], unsigned int n)
+{
+  mpz_t t;
+
+  mpz_init (t);
+  mpz_set_ui (rop, 0);
+  while (n--)
+    {
+      mpz_mul (t, V1[n], V2[n]);
+      mpz_add (rop, rop, t);
+    }
+
+  mpz_clear (t);
+}
+
+void
+spectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m)
+{
+  /* Knuth "Seminumerical Algorithms, Third Edition", section 3.3.4
+     (pp. 101-103). */
+
+  /* v[t] = min { sqrt (x[1]^2 + ... + x[t]^2) |
+     x[1] + a*x[2] + ... + pow (a, t-1) * x[t] is congruent to 0 (mod m) } */
+
+
+  /* Variables. */
+  unsigned int ui_t;
+  unsigned int ui_i, ui_j, ui_k, ui_l;
+  mpf_t f_tmp1, f_tmp2;
+  mpz_t tmp1, tmp2, tmp3;
+  mpz_t U[GMP_SPECT_MAXT][GMP_SPECT_MAXT],
+    V[GMP_SPECT_MAXT][GMP_SPECT_MAXT],
+    X[GMP_SPECT_MAXT],
+    Y[GMP_SPECT_MAXT],
+    Z[GMP_SPECT_MAXT];
+  mpz_t h, hp, r, s, p, pp, q, u, v;
+
+  /* GMP inits. */
+  mpf_init (f_tmp1);
+  mpf_init (f_tmp2);
+  for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++)
+    {
+      for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++)
+       {
+         mpz_init_set_ui (U[ui_i][ui_j], 0);
+         mpz_init_set_ui (V[ui_i][ui_j], 0);
+       }
+      mpz_init_set_ui (X[ui_i], 0);
+      mpz_init_set_ui (Y[ui_i], 0);
+      mpz_init (Z[ui_i]);
+    }
+  mpz_init (tmp1);
+  mpz_init (tmp2);
+  mpz_init (tmp3);
+  mpz_init (h);
+  mpz_init (hp);
+  mpz_init (r);
+  mpz_init (s);
+  mpz_init (p);
+  mpz_init (pp);
+  mpz_init (q);
+  mpz_init (u);
+  mpz_init (v);
+
+  /* Implementation inits. */
+  if (T > GMP_SPECT_MAXT)
+    T = GMP_SPECT_MAXT;                        /* FIXME: Lazy. */
+
+  /* S1 [Initialize.] */
+  ui_t = 2 - 1;                        /* NOTE: `t' in description == ui_t + 1
+                                  for easy indexing */
+  mpz_set (h, a);
+  mpz_set (hp, m);
+  mpz_set_ui (p, 1);
+  mpz_set_ui (pp, 0);
+  mpz_set (r, a);
+  mpz_pow_ui (s, a, 2);
+  mpz_add_ui (s, s, 1);                /* s = 1 + a^2 */
+
+  /* S2 [Euclidean step.] */
+  while (1)
+    {
+      if (g_debug > DEBUG_1)
+       {
+         mpz_mul (tmp1, h, pp);
+         mpz_mul (tmp2, hp, p);
+         mpz_sub (tmp1, tmp1, tmp2);
+         if (mpz_cmpabs (m, tmp1))
+           {
+             printf ("***BUG***: h*pp - hp*p = ");
+             mpz_out_str (stdout, 10, tmp1);
+             printf ("\n");
+           }
+       }
+      if (g_debug > DEBUG_2)
+       {
+         printf ("hp = ");
+         mpz_out_str (stdout, 10, hp);
+         printf ("\nh = ");
+         mpz_out_str (stdout, 10, h);
+         printf ("\n");
+         fflush (stdout);
+       }
+
+      if (mpz_sgn (h))
+       mpz_tdiv_q (q, hp, h);  /* q = floor(hp/h) */
+      else
+       mpz_set_ui (q, 1);
+
+      if (g_debug > DEBUG_2)
+       {
+         printf ("q = ");
+         mpz_out_str (stdout, 10, q);
+         printf ("\n");
+         fflush (stdout);
+       }
+
+      mpz_mul (tmp1, q, h);
+      mpz_sub (u, hp, tmp1);   /* u = hp - q*h */
+
+      mpz_mul (tmp1, q, p);
+      mpz_sub (v, pp, tmp1);   /* v = pp - q*p */
+
+      mpz_pow_ui (tmp1, u, 2);
+      mpz_pow_ui (tmp2, v, 2);
+      mpz_add (tmp1, tmp1, tmp2);
+      if (mpz_cmp (tmp1, s) < 0)
+       {
+         mpz_set (s, tmp1);    /* s = u^2 + v^2 */
+         mpz_set (hp, h);      /* hp = h */
+         mpz_set (h, u);       /* h = u */
+         mpz_set (pp, p);      /* pp = p */
+         mpz_set (p, v);       /* p = v */
+       }
+      else
+       break;
+    }
+
+  /* S3 [Compute v2.] */
+  mpz_sub (u, u, h);
+  mpz_sub (v, v, p);
+
+  mpz_pow_ui (tmp1, u, 2);
+  mpz_pow_ui (tmp2, v, 2);
+  mpz_add (tmp1, tmp1, tmp2);
+  if (mpz_cmp (tmp1, s) < 0)
+    {
+      mpz_set (s, tmp1);       /* s = u^2 + v^2 */
+      mpz_set (hp, u);
+      mpz_set (pp, v);
+    }
+  mpf_set_z (f_tmp1, s);
+  mpf_sqrt (rop[ui_t - 1], f_tmp1);
+
+  /* S4 [Advance t.] */
+  mpz_neg (U[0][0], h);
+  mpz_set (U[0][1], p);
+  mpz_neg (U[1][0], hp);
+  mpz_set (U[1][1], pp);
+
+  mpz_set (V[0][0], pp);
+  mpz_set (V[0][1], hp);
+  mpz_neg (V[1][0], p);
+  mpz_neg (V[1][1], h);
+  if (mpz_cmp_ui (pp, 0) > 0)
+    {
+      mpz_neg (V[0][0], V[0][0]);
+      mpz_neg (V[0][1], V[0][1]);
+      mpz_neg (V[1][0], V[1][0]);
+      mpz_neg (V[1][1], V[1][1]);
+    }
+
+  while (ui_t + 1 != T)                /* S4 loop */
+    {
+      ui_t++;
+      mpz_mul (r, a, r);
+      mpz_mod (r, r, m);
+
+      /* Add new row and column to U and V.  They are initialized with
+        all elements set to zero, so clearing is not necessary. */
+
+      mpz_neg (U[ui_t][0], r); /* U: First col in new row. */
+      mpz_set_ui (U[ui_t][ui_t], 1); /* U: Last col in new row. */
+
+      mpz_set (V[ui_t][ui_t], m); /* V: Last col in new row. */
+
+      /* "Finally, for 1 <= i < t,
+          set q = round (vi1 * r / m),
+          vit = vi1*r - q*m,
+          and Ut=Ut+q*Ui */
+
+      for (ui_i = 0; ui_i < ui_t; ui_i++)
+       {
+         mpz_mul (tmp1, V[ui_i][0], r); /* tmp1=vi1*r */
+         zdiv_round (q, tmp1, m); /* q=round(vi1*r/m) */
+         mpz_mul (tmp2, q, m); /* tmp2=q*m */
+         mpz_sub (V[ui_i][ui_t], tmp1, tmp2);
+
+         for (ui_j = 0; ui_j <= ui_t; ui_j++) /* U[t] = U[t] + q*U[i] */
+           {
+             mpz_mul (tmp1, q, U[ui_i][ui_j]); /* tmp=q*uij */
+             mpz_add (U[ui_t][ui_j], U[ui_t][ui_j], tmp1); /* utj = utj + q*uij */
+           }
+       }
+
+      /* s = min (s, zdot (U[t], U[t]) */
+      vz_dot (tmp1, U[ui_t], U[ui_t], ui_t + 1);
+      if (mpz_cmp (tmp1, s) < 0)
+       mpz_set (s, tmp1);
+
+      ui_k = ui_t;
+      ui_j = 0;                        /* WARNING: ui_j no longer a temp. */
+
+      /* S5 [Transform.] */
+      if (g_debug > DEBUG_2)
+       printf ("(t, k, j, q1, q2, ...)\n");
+      do
+       {
+         if (g_debug > DEBUG_2)
+           printf ("(%u, %u, %u", ui_t + 1, ui_k + 1, ui_j + 1);
+
+         for (ui_i = 0; ui_i <= ui_t; ui_i++)
+           {
+             if (ui_i != ui_j)
+               {
+                 vz_dot (tmp1, V[ui_i], V[ui_j], ui_t + 1); /* tmp1=dot(Vi,Vj). */
+                 mpz_abs (tmp2, tmp1);
+                 mpz_mul_ui (tmp2, tmp2, 2); /* tmp2 = 2*abs(dot(Vi,Vj) */
+                 vz_dot (tmp3, V[ui_j], V[ui_j], ui_t + 1); /* tmp3=dot(Vj,Vj). */
+
+                 if (mpz_cmp (tmp2, tmp3) > 0)
+                   {
+                     zdiv_round (q, tmp1, tmp3); /* q=round(Vi.Vj/Vj.Vj) */
+                     if (g_debug > DEBUG_2)
+                       {
+                         printf (", ");
+                         mpz_out_str (stdout, 10, q);
+                       }
+
+                     for (ui_l = 0; ui_l <= ui_t; ui_l++)
+                       {
+                         mpz_mul (tmp1, q, V[ui_j][ui_l]);
+                         mpz_sub (V[ui_i][ui_l], V[ui_i][ui_l], tmp1); /* Vi=Vi-q*Vj */
+                         mpz_mul (tmp1, q, U[ui_i][ui_l]);
+                         mpz_add (U[ui_j][ui_l], U[ui_j][ui_l], tmp1); /* Uj=Uj+q*Ui */
+                       }
+
+                     vz_dot (tmp1, U[ui_j], U[ui_j], ui_t + 1); /* tmp1=dot(Uj,Uj) */
+                     if (mpz_cmp (tmp1, s) < 0) /* s = min(s,dot(Uj,Uj)) */
+                       mpz_set (s, tmp1);
+                     ui_k = ui_j;
+                   }
+                 else if (g_debug > DEBUG_2)
+                   printf (", #"); /* 2|Vi.Vj| <= Vj.Vj */
+               }
+             else if (g_debug > DEBUG_2)
+               printf (", *"); /* i == j */
+           }
+
+         if (g_debug > DEBUG_2)
+           printf (")\n");
+
+         /* S6 [Advance j.] */
+         if (ui_j == ui_t)
+           ui_j = 0;
+         else
+           ui_j++;
+       }
+      while (ui_j != ui_k);    /* S5 */
+
+      /* From Knuth p. 104: "The exhaustive search in steps S8-S10
+        reduces the value of s only rarely." */
+#ifdef DO_SEARCH
+      /* S7 [Prepare for search.] */
+      /* Find minimum in (x[1], ..., x[t]) satisfying condition
+        x[k]^2 <= f(y[1], ...,y[t]) * dot(V[k],V[k]) */
+
+      ui_k = ui_t;
+      if (g_debug > DEBUG_2)
+       {
+         printf ("searching...");
+         /*for (f = 0; f < ui_t*/
+         fflush (stdout);
+       }
+
+      /* Z[i] = floor (sqrt (floor (dot(V[i],V[i]) * s / m^2))); */
+      mpz_pow_ui (tmp1, m, 2);
+      mpf_set_z (f_tmp1, tmp1);
+      mpf_set_z (f_tmp2, s);
+      mpf_div (f_tmp1, f_tmp2, f_tmp1);        /* f_tmp1 = s/m^2 */
+      for (ui_i = 0; ui_i <= ui_t; ui_i++)
+       {
+         vz_dot (tmp1, V[ui_i], V[ui_i], ui_t + 1);
+         mpf_set_z (f_tmp2, tmp1);
+         mpf_mul (f_tmp2, f_tmp2, f_tmp1);
+         f_floor (f_tmp2, f_tmp2);
+         mpf_sqrt (f_tmp2, f_tmp2);
+         mpz_set_f (Z[ui_i], f_tmp2);
+       }
+
+      /* S8 [Advance X[k].] */
+      do
+       {
+         if (g_debug > DEBUG_2)
+           {
+             printf ("X[%u] = ", ui_k);
+             mpz_out_str (stdout, 10, X[ui_k]);
+             printf ("\tZ[%u] = ", ui_k);
+             mpz_out_str (stdout, 10, Z[ui_k]);
+             printf ("\n");
+             fflush (stdout);
+           }
+
+         if (mpz_cmp (X[ui_k], Z[ui_k]))
+           {
+             mpz_add_ui (X[ui_k], X[ui_k], 1);
+             for (ui_i = 0; ui_i <= ui_t; ui_i++)
+               mpz_add (Y[ui_i], Y[ui_i], U[ui_k][ui_i]);
+
+             /* S9 [Advance k.] */
+             while (++ui_k <= ui_t)
+               {
+                 mpz_neg (X[ui_k], Z[ui_k]);
+                 mpz_mul_ui (tmp1, Z[ui_k], 2);
+                 for (ui_i = 0; ui_i <= ui_t; ui_i++)
+                   {
+                     mpz_mul (tmp2, tmp1, U[ui_k][ui_i]);
+                     mpz_sub (Y[ui_i], Y[ui_i], tmp2);
+                   }
+               }
+             vz_dot (tmp1, Y, Y, ui_t + 1);
+             if (mpz_cmp (tmp1, s) < 0)
+               mpz_set (s, tmp1);
+           }
+       }
+      while (--ui_k);
+#endif /* DO_SEARCH */
+      mpf_set_z (f_tmp1, s);
+      mpf_sqrt (rop[ui_t - 1], f_tmp1);
+#ifdef DO_SEARCH
+      if (g_debug > DEBUG_2)
+       printf ("done.\n");
+#endif /* DO_SEARCH */
+    } /* S4 loop */
+
+  /* Clear GMP variables. */
+
+  mpf_clear (f_tmp1);
+  mpf_clear (f_tmp2);
+  for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++)
+    {
+      for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++)
+       {
+         mpz_clear (U[ui_i][ui_j]);
+         mpz_clear (V[ui_i][ui_j]);
+       }
+      mpz_clear (X[ui_i]);
+      mpz_clear (Y[ui_i]);
+      mpz_clear (Z[ui_i]);
+    }
+  mpz_clear (tmp1);
+  mpz_clear (tmp2);
+  mpz_clear (tmp3);
+  mpz_clear (h);
+  mpz_clear (hp);
+  mpz_clear (r);
+  mpz_clear (s);
+  mpz_clear (p);
+  mpz_clear (pp);
+  mpz_clear (q);
+  mpz_clear (u);
+  mpz_clear (v);
+
+  return;
+}
diff --git a/tests/rand/t-iset.c b/tests/rand/t-iset.c

new file mode 100644 (file)

index 0000000..b1e51e4
--- /dev/null
+++ b/tests/rand/t-iset.c
@@ -0,0 +1,68 @@
+/* Test gmp_randinit_set.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* expect after a gmp_randinit_set that the new and old generators will
+   produce the same sequence of numbers */
+void
+check_one (const char *name, gmp_randstate_ptr src)
+{
+  gmp_randstate_t dst;
+  mpz_t  sz, dz;
+  int    i;
+
+  gmp_randinit_set (dst, src);
+  mpz_init (sz);
+  mpz_init (dz);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_urandomb (sz, src, 123);
+      mpz_urandomb (dz, dst, 123);
+
+      if (mpz_cmp (sz, dz) != 0)
+        {
+          printf     ("gmp_randinit_set didn't duplicate randstate\n");
+          printf     ("  algorithm: %s\n", name);
+          gmp_printf ("  from src:  %#Zx\n", sz);
+          gmp_printf ("  from dst:  %#Zx\n", dz);
+          abort ();
+        }
+    }
+
+  mpz_clear (sz);
+  mpz_clear (dz);
+  gmp_randclear (dst);
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  call_rand_algs (check_one);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/rand/t-lc2exp.c b/tests/rand/t-lc2exp.c

new file mode 100644 (file)

index 0000000..ce7d4c4
--- /dev/null
+++ b/tests/rand/t-lc2exp.c
@@ -0,0 +1,217 @@
+/* Exercise the lc2exp random functions.
+
+Copyright 2002, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* a=0 and c=0 produces zero results always. */
+void
+check_zero (unsigned long m2exp)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c;
+  int              i;
+
+  mpz_init_set_ui (a, 0L);
+  c = 0L;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 5; i++)
+    {
+      mpz_urandomb (a, r, 123L);
+      if (mpz_sgn (a) != 0)
+        {
+          printf ("check_zero m2exp=%lu: didn't get zero\n", m2exp);
+          gmp_printf ("  rand=%#Zx\n", a);
+          abort ();
+        }
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+/* negative a */
+void
+check_nega (void)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c, m2exp;
+  int              i;
+
+  mpz_init (a);
+  mpz_setbit (a, 1000L);
+  mpz_neg (a, a);
+  c = 0L;
+  m2exp = 45L;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 5; i++)
+    {
+      mpz_urandomb (a, r, 123L);
+      if (mpz_sgn (a) != 0)
+        printf ("check_nega m2exp=%lu: didn't get zero\n", m2exp);
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+void
+check_bigc (void)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c, m2exp, bits;
+  int              i;
+
+  mpz_init_set_ui (a, 0L);
+  c = ULONG_MAX;
+  m2exp = 8;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 20; i++)
+    {
+      bits = 123L;
+      mpz_urandomb (a, r, bits);
+      if (mpz_sgn (a) < 0 || mpz_sizeinbase (a, 2) > bits)
+        {
+          printf     ("check_bigc: mpz_urandomb out of range\n");
+          printf     ("   m2exp=%lu\n", m2exp);
+          gmp_printf ("   rand=%#ZX\n", a);
+          gmp_printf ("   sizeinbase2=%u\n", mpz_sizeinbase (a, 2));
+         abort ();
+        }
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+void
+check_bigc1 (void)
+{
+  gmp_randstate_t  r;
+  mpz_t            a;
+  unsigned long    c, m2exp;
+  int              i;
+
+  mpz_init_set_ui (a, 0L);
+  c = ULONG_MAX;
+  m2exp = 2;
+
+  gmp_randinit_lc_2exp (r, a, c, m2exp);
+  gmp_randseed_ui (r, 0L);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_urandomb (a, r, 1L);
+      if (mpz_cmp_ui (a, 1L) != 0)
+        {
+          printf     ("check_bigc1: mpz_urandomb didn't give 1\n");
+          printf     ("   m2exp=%lu\n", m2exp);
+          gmp_printf ("   got rand=%#ZX\n", a);
+          abort ();
+        }
+    }
+
+  mpz_clear (a);
+  gmp_randclear (r);
+}
+
+/* Checks parameters which triggered an assertion failure in the past.
+   Happened when limbs(a)+limbs(c) < bits_to_limbs(m2exp).  */
+void
+check_bigm (void)
+{
+  gmp_randstate_t rstate;
+  mpz_t a;
+
+  mpz_init_set_ui (a, 5L);
+  gmp_randinit_lc_2exp (rstate, a, 1L, 384L);
+
+  mpz_urandomb (a, rstate, 20L);
+
+  gmp_randclear (rstate);
+  mpz_clear (a);
+}
+
+/* Checks for seeds bigger than the modulus.  */
+void
+check_bigs (void)
+{
+  gmp_randstate_t rstate;
+  mpz_t sd, a;
+  int i;
+
+  mpz_init (sd);
+  mpz_setbit (sd, 300L);
+  mpz_sub_ui (sd, sd, 1L);
+  mpz_clrbit (sd, 13L);
+  mpz_init_set_ui (a, 123456789L);
+
+  gmp_randinit_lc_2exp (rstate, a, 5L, 64L);
+
+  for (i = 0; i < 20; i++)
+    {
+      mpz_neg (sd, sd);
+      gmp_randseed (rstate, sd);
+      mpz_mul_ui (sd, sd, 7L);
+
+      mpz_urandomb (a, rstate, 80L);
+    }
+
+  gmp_randclear (rstate);
+  mpz_clear (a);
+  mpz_clear (sd);
+}
+
+int
+main (void)
+{
+  tests_start ();
+
+  check_zero (2L);
+  check_zero (7L);
+  check_zero (32L);
+  check_zero (64L);
+  check_zero (1000L);
+
+  check_nega ();
+  check_bigc ();
+  check_bigc1 ();
+
+  check_bigm ();
+  check_bigs ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/rand/t-mt.c b/tests/rand/t-mt.c

new file mode 100644 (file)

index 0000000..71f1e11
--- /dev/null
+++ b/tests/rand/t-mt.c
@@ -0,0 +1,83 @@
+/* Test the Mersenne Twister random number generator.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/* Test that the sequence without seeding equals the sequence with the
+   default seed.  */
+int
+chk_default_seed (void)
+{
+  gmp_randstate_t r1, r2;
+  mpz_t a, b;
+  int i;
+  int ok = TRUE;
+
+  mpz_init2 (a, 19936L);
+  mpz_init2 (b, 19936L);
+
+  gmp_randinit_mt (r1);
+  gmp_randinit_mt (r2);
+  gmp_randseed_ui (r2, 5489L); /* Must match DEFAULT_SEED in randmt.c */
+  for (i = 0; i < 3; i++)
+    {
+      /* Extract one whole buffer per iteration.  */
+      mpz_urandomb (a, r1, 19936L);
+      mpz_urandomb (b, r2, 19936L);
+      if (mpz_cmp (a, b) != 0)
+       {
+         ok = FALSE;
+         printf ("Default seed fails in iteration %d\n", i);
+         break;
+       }
+    }
+  gmp_randclear (r1);
+  gmp_randclear (r2);
+
+  mpz_clear (a);
+  mpz_clear (b);
+  return ok;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int ok;
+
+  tests_start ();
+
+  ok = chk_default_seed ();
+
+  tests_end ();
+
+  if (ok)
+    return 0; /* pass */
+  else
+    return 1; /* fail */
+}
diff --git a/tests/rand/t-rand.c b/tests/rand/t-rand.c

new file mode 100644 (file)

index 0000000..d3e3c25
--- /dev/null
+++ b/tests/rand/t-rand.c
@@ -0,0 +1,296 @@
+/* t-rand -- Test random number generators.  */
+
+/*
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "gmp.h"
+
+#define SEED 1
+#define BASE 16
+#define ENTS 10                        /* Number of entries in array when
+                                  printing.  */
+
+/* These were generated by this very program.  Do not edit!  */
+/* Integers.  */
+char *z1[ENTS] = {"0", "1", "1", "1", "1", "0", "1", "1", "1", "1"};
+char *z2[ENTS] = {"0", "3", "1", "3", "3", "0", "3", "3", "3", "1"};
+char *z3[ENTS] = {"4", "3", "1", "7", "3", "0", "3", "3", "3", "1"};
+char *z4[ENTS] = {"c", "3", "1", "f", "b", "8", "3", "3", "3", "1"};
+char *z5[ENTS] = {"1c", "13", "11", "1f", "b", "18", "3", "13", "3", "1"};
+
+char *z10[ENTS] = {"29c", "213", "f1", "17f", "12b", "178", "383", "d3", "3a3", "281"};
+
+char *z15[ENTS] = {"29c", "1a13", "74f1", "257f", "592b", "4978", "4783", "7cd3", "5ba3", "4681"};
+char *z16[ENTS] = {"29c", "9a13", "74f1", "a57f", "d92b", "4978", "c783", "fcd3", "5ba3", "c681"};
+char *z17[ENTS] = {"51e", "f17a", "54ff", "1a335", "cf65", "5d6f", "583f", "618f", "1bc6", "98ff"};
+
+char *z31[ENTS] = {"3aecd515", "13ae8ec6", "518c8090", "81ca077", "70b7134", "7ee78d71", "323a7636", "2122cb1a", "19811941", "41fd605"};
+char *z32[ENTS] = {"baecd515", "13ae8ec6", "518c8090", "881ca077", "870b7134", "7ee78d71", "323a7636", "a122cb1a", "99811941", "841fd605"};
+char *z33[ENTS] = {"1faf4cca", "15d6ef83b", "9095fe72", "1b6a3dff6", "b17cbddd", "16e5209d4", "6f65b12c", "493bbbc6", "abf2a5d5", "6d491a3c"};
+
+char *z63[ENTS] = {"48a74f367fa7b5c8", "3ba9e9dc1b263076", "1e0ac84e7678e0fb", "11416581728b3e35", "36ab610523f0f1f7", "3e540e8e95c0eb4b", "439ae16057dbc9d3", "734fb260db243950", "7d3a317effc289bf", "1d80301fb3d1a0d1"};
+char *z64[ENTS] = {"48a74f367fa7b5c8", "bba9e9dc1b263076", "9e0ac84e7678e0fb", "11416581728b3e35", "b6ab610523f0f1f7", "be540e8e95c0eb4b", "439ae16057dbc9d3", "f34fb260db243950", "fd3a317effc289bf", "1d80301fb3d1a0d1"};
+char *z65[ENTS] = {"1ff77710d846d49f0", "1b1411701d709ee10", "31ffa81a208b6af4", "446638d431d3c681", "df5c569d5baa8b55", "197d99ea9bf28e5a0", "191ade09edd94cfae", "194acefa6dde5e18d", "1afc1167c56272d92", "d092994da72f206f"};
+
+char *z127[ENTS] = {"2f66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "2ab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "77848bb991fd0be331adcf1457fbc672"};
+char *z128[ENTS] = {"af66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "8b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "aab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "f7848bb991fd0be331adcf1457fbc672"};
+
+/* Floats.  */
+char *f1[ENTS] = {"0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0", "0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0"};
+char *f2[ENTS] = {"0.@0", "0.c@0", "0.4@0", "0.c@0", "0.c@0", "0.@0", "0.c@0", "0.c@0", "0.c@0", "0.4@0"};
+char *f3[ENTS] = {"0.8@0", "0.6@0", "0.2@0", "0.e@0", "0.6@0", "0.@0", "0.6@0", "0.6@0", "0.6@0", "0.2@0"};
+char *f4[ENTS] = {"0.c@0", "0.3@0", "0.1@0", "0.f@0", "0.b@0", "0.8@0", "0.3@0", "0.3@0", "0.3@0", "0.1@0"};
+char *f5[ENTS] = {"0.e@0", "0.98@0", "0.88@0", "0.f8@0", "0.58@0", "0.c@0", "0.18@0", "0.98@0", "0.18@0", "0.8@-1"};
+
+char *f10[ENTS] = {"0.a7@0", "0.84c@0", "0.3c4@0", "0.5fc@0", "0.4ac@0", "0.5e@0", "0.e0c@0", "0.34c@0", "0.e8c@0", "0.a04@0"};
+
+char *f15[ENTS] = {"0.538@-1", "0.3426@0", "0.e9e2@0", "0.4afe@0", "0.b256@0", "0.92f@0", "0.8f06@0", "0.f9a6@0", "0.b746@0", "0.8d02@0"};
+char *f16[ENTS] = {"0.29c@-1", "0.9a13@0", "0.74f1@0", "0.a57f@0", "0.d92b@0", "0.4978@0", "0.c783@0", "0.fcd3@0", "0.5ba3@0", "0.c681@0"};
+char *f17[ENTS] = {"0.28f@-1", "0.78bd@0", "0.2a7f8@0", "0.d19a8@0", "0.67b28@0", "0.2eb78@0", "0.2c1f8@0", "0.30c78@0", "0.de3@-1", "0.4c7f8@0"};
+
+char *f31[ENTS] = {"0.75d9aa2a@0", "0.275d1d8c@0", "0.a319012@0", "0.103940ee@0", "0.e16e268@-1", "0.fdcf1ae2@0", "0.6474ec6c@0", "0.42459634@0", "0.33023282@0", "0.83fac0a@-1"};
+char *f32[ENTS] = {"0.baecd515@0", "0.13ae8ec6@0", "0.518c809@0", "0.881ca077@0", "0.870b7134@0", "0.7ee78d71@0", "0.323a7636@0", "0.a122cb1a@0", "0.99811941@0", "0.841fd605@0"};
+char *f33[ENTS] = {"0.fd7a665@-1", "0.aeb77c1d8@0", "0.484aff39@0", "0.db51effb@0", "0.58be5eee8@0", "0.b72904ea@0", "0.37b2d896@0", "0.249ddde3@0", "0.55f952ea8@0", "0.36a48d1e@0"};
+
+char *f63[ENTS] = {"0.914e9e6cff4f6b9@0", "0.7753d3b8364c60ec@0", "0.3c15909cecf1c1f6@0", "0.2282cb02e5167c6a@0", "0.6d56c20a47e1e3ee@0", "0.7ca81d1d2b81d696@0", "0.8735c2c0afb793a6@0", "0.e69f64c1b64872a@0", "0.fa7462fdff85137e@0", "0.3b00603f67a341a2@0"};
+char *f64[ENTS] = {"0.48a74f367fa7b5c8@0", "0.bba9e9dc1b263076@0", "0.9e0ac84e7678e0fb@0", "0.11416581728b3e35@0", "0.b6ab610523f0f1f7@0", "0.be540e8e95c0eb4b@0", "0.439ae16057dbc9d3@0", "0.f34fb260db24395@0", "0.fd3a317effc289bf@0", "0.1d80301fb3d1a0d1@0"};
+char *f65[ENTS] = {"0.ffbbb886c236a4f8@0", "0.d8a08b80eb84f708@0", "0.18ffd40d1045b57a@0", "0.22331c6a18e9e3408@0", "0.6fae2b4eadd545aa8@0", "0.cbeccf54df9472d@0", "0.c8d6f04f6eca67d7@0", "0.ca5677d36ef2f0c68@0", "0.d7e08b3e2b1396c9@0", "0.68494ca6d39790378@0"};
+
+char *f127[ENTS] = {"0.5ecd7526555eb140e3fb1e0e85533418@0", "0.e79f478cc99382ea6a0f94c1d8d7084a@0", "0.a7d40e994263bd8259ad171551c404f@0", "0.79eb5918686a65f14a7981d68eb03ee6@0", "0.a1823ab0d3c411543735462f7185a152@0", "0.16462c791250ec8e563de32c85d59c12@0", "0.913e9807a83f0ea1391ad9219cce9f2a@0", "0.5570e9192d54cec5d4326568993ae2c8@0", "0.13196ab23f80b5a635f7783acf721dba@0", "0.ef09177323fa17c6635b9e28aff78ce4@0"};
+char *f128[ENTS] = {"0.af66ba932aaf58a071fd8f0742a99a0c@0", "0.73cfa3c664c9c1753507ca60ec6b8425@0", "0.53ea074ca131dec12cd68b8aa8e20278@0", "0.3cf5ac8c343532f8a53cc0eb47581f73@0", "0.50c11d5869e208aa1b9aa317b8c2d0a9@0", "0.8b23163c892876472b1ef19642eace09@0", "0.489f4c03d41f87509c8d6c90ce674f95@0", "0.aab8748c96aa6762ea1932b44c9d7164@0", "0.98cb5591fc05ad31afbbc1d67b90edd@-1", "0.f7848bb991fd0be331adcf1457fbc672@0"};
+
+
+struct rt
+{
+  char **s;
+  int nbits;
+};
+
+static struct rt zarr[] =
+{
+  {z1, 1},
+  {z2, 2},
+  {z3, 3},
+  {z4, 4},
+  {z5, 5},
+  {z10, 10},
+  {z15, 15},
+  {z16, 16},
+  {z17, 17},
+  {z31, 31},
+  {z32, 32},
+  {z33, 33},
+  {z63, 63},
+  {z64, 64},
+  {z65, 65},
+  {z127, 127},
+  {z128, 128},
+  {NULL, 0}
+};
+
+static struct rt farr[] =
+{
+  {f1, 1},
+  {f2, 2},
+  {f3, 3},
+  {f4, 4},
+  {f5, 5},
+  {f10, 10},
+  {f15, 15},
+  {f16, 16},
+  {f17, 17},
+  {f31, 31},
+  {f32, 32},
+  {f33, 33},
+  {f63, 63},
+  {f64, 64},
+  {f65, 65},
+  {f127, 127},
+  {f128, 128},
+  {NULL, 0}
+};
+
+
+int
+#if __STDC__
+main (int argc, char *argv[])
+#else
+main (argc, argv)
+     int argc;
+     char *argv[];
+#endif
+{
+  static char usage[] = "\
+usage: t-rand [function nbits]\n\
+  function is one of z, f\n\
+  nbits is number of bits\n\
+";
+  gmp_randstate_t rstate;
+  mpz_t z, rz;
+  mpf_t f, rf;
+  enum { Z, F } func = Z;
+  int nbits = 1;
+  int verify_mode_flag = 1;
+  register int i;
+  struct rt *a;
+
+
+  if (argc > 1)
+    {
+      if (argc < 3)
+       {
+         fputs (usage, stderr);
+         exit (1);
+       }
+      verify_mode_flag = 0;
+      if (*argv[1] == 'z')
+       func = Z;
+      if (*argv[1] == 'f')
+       func = F;
+      nbits = atoi (argv[2]);
+    }
+
+  mpz_init (rz);
+
+  if (verify_mode_flag)
+    {
+#ifdef VERBOSE
+      printf ("%s: verifying random numbers: ", argv[0]);
+#endif
+
+      /* Test z.  */
+      mpz_init (z);
+      for (a = zarr; a->s != NULL; a++)
+       {
+         gmp_randinit (rstate, GMP_RAND_ALG_LC, a->nbits);
+         if (gmp_errno != GMP_ERROR_NONE)
+           exit (1);
+         gmp_randseed_ui (rstate, SEED);
+
+         for (i = 0; i < ENTS; i++)
+           {
+             mpz_urandomb (rz, rstate, a->nbits);
+             mpz_set_str (z, a->s[i], BASE);
+             if (mpz_cmp (z, rz) != 0)
+               {
+                 printf ("z%d: ", a->nbits);
+                 mpz_out_str (stdout, BASE, rz);
+                 printf (" should be ");
+                 mpz_out_str (stdout, BASE, z);
+                 puts ("");
+                 exit (1);
+               }
+           }
+#ifdef VERBOSE
+         printf ("z%d ", a->nbits);
+#endif
+         gmp_randclear (rstate);
+       }
+      mpz_clear (z);
+
+
+      /* Test f.  */
+      for (a = farr; a->s != NULL; a++)
+       {
+         gmp_randinit (rstate, GMP_RAND_ALG_LC, a->nbits);
+         if (gmp_errno != GMP_ERROR_NONE)
+           exit (1);
+         gmp_randseed_ui (rstate, SEED);
+
+         mpf_init2 (f, a->nbits);
+         mpf_init2 (rf, a->nbits);
+         for (i = 0; i < ENTS; i++)
+           {
+             mpf_urandomb (rf, rstate, a->nbits);
+             mpf_set_str (f, a->s[i], BASE);
+             if (mpf_cmp (f, rf) != 0)
+               {
+                 printf ("f%d: ", a->nbits);
+                 mpf_out_str (stdout, BASE, a->nbits, rf);
+                 printf (" should be ");
+                 mpf_out_str (stdout, BASE, a->nbits, f);
+                 puts ("");
+                 exit (1);
+               }
+           }
+#ifdef VERBOSE
+         printf ("f%d ", a->nbits);
+#endif
+         gmp_randclear (rstate);
+         mpf_clear (f);
+         mpf_clear (rf);
+       }
+
+#ifdef VERBOSE
+      puts ("");
+#endif
+    }
+  else                         /* Print mode.  */
+    {
+      gmp_randinit (rstate, GMP_RAND_ALG_LC, nbits);
+      if (gmp_errno != GMP_ERROR_NONE)
+       exit (1);
+      gmp_randseed_ui (rstate, SEED);
+
+      switch (func)
+       {
+       case Z:
+         printf ("char *z%d[ENTS] = {", nbits);
+         for (i = 0; i < ENTS; i++)
+           {
+             mpz_urandomb (rz, rstate, nbits);
+             printf ("\"");
+             mpz_out_str (stdout, BASE, rz);
+             printf ("\"");
+             if (i != ENTS - 1)
+               printf (", ");
+           }
+         printf ("};\n");
+         printf ("  {z%d, %d},\n", nbits, nbits);
+         break;
+
+       case F:
+         printf ("char *f%d[ENTS] = {", nbits);
+         mpf_init2 (rf, nbits);
+         for (i = 0; i < ENTS; i++)
+           {
+             mpf_urandomb (rf, rstate, nbits);
+             printf ("\"");
+             mpf_out_str (stdout, BASE, nbits, rf);
+             printf ("\"");
+             if (i != ENTS - 1)
+               printf (", ");
+           }
+         printf ("};\n");
+         printf ("  {f%d, %d},\n", nbits, nbits);
+         mpf_clear (rf);
+         break;
+
+       default:
+         exit (1);
+       }
+
+      gmp_randclear (rstate);
+    }
+
+  mpz_clear (rz);
+
+  return 0;
+}
diff --git a/tests/rand/t-urbui.c b/tests/rand/t-urbui.c

new file mode 100644 (file)

index 0000000..0251f4c
--- /dev/null
+++ b/tests/rand/t-urbui.c
@@ -0,0 +1,65 @@
+/* Test gmp_urandomb_ui.
+
+Copyright 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Expect numbers generated by rstate to obey the number of bits requested.
+   No point testing bits==BITS_PER_ULONG, since any return is acceptable in
+   that case.  */
+void
+check_one (const char *name, gmp_randstate_ptr rstate)
+{
+  unsigned long  bits, limit, got;
+  int    i;
+
+  for (bits = 0; bits < BITS_PER_ULONG; bits++)
+    {
+      /* will demand got < limit */
+      limit = (1L << bits);
+
+      for (i = 0; i < 5; i++)
+        {
+          got = gmp_urandomb_ui (rstate, bits);
+          if (got >= limit)
+            {
+              printf ("Return value out of range:\n");
+              printf ("  algorithm: %s\n", name);
+              printf ("  bits:  %lu\n", bits);
+              printf ("  limit: %#lx\n", limit);
+              printf ("  got:   %#lx\n", got);
+              abort ();
+            }
+        }
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  call_rand_algs (check_one);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/rand/t-urmui.c b/tests/rand/t-urmui.c

new file mode 100644 (file)

index 0000000..798286b
--- /dev/null
+++ b/tests/rand/t-urmui.c
@@ -0,0 +1,75 @@
+/* Test gmp_urandomm_ui.
+
+Copyright 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Expect numbers generated by rstate to obey the limit requested. */
+void
+check_one (const char *name, gmp_randstate_ptr rstate)
+{
+  static const unsigned long  n_table[] = {
+    1, 2, 3, 4, 5, 6, 7, 8,
+    123, 456, 789,
+
+    255, 256, 257,
+    1023, 1024, 1025,
+    32767, 32768, 32769,
+
+    ULONG_MAX/2-2, ULONG_MAX/2-1, ULONG_MAX/2, ULONG_MAX/2+1, ULONG_MAX/2+2,
+
+    ULONG_MAX-2, ULONG_MAX-1, ULONG_MAX,
+  };
+
+  unsigned long  got, n;
+  int    i, j;
+
+  for (i = 0; i < numberof (n_table); i++)
+    {
+      n = n_table[i];
+
+      for (j = 0; j < 5; j++)
+        {
+          got = gmp_urandomm_ui (rstate, n);
+          if (got >= n)
+            {
+              printf ("Return value out of range:\n");
+              printf ("  algorithm: %s\n", name);
+              printf ("  n:     %#lx\n", n);
+              printf ("  got:   %#lx\n", got);
+              abort ();
+            }
+        }
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  call_rand_algs (check_one);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/rand/t-urndmm.c b/tests/rand/t-urndmm.c

new file mode 100644 (file)

index 0000000..11eeef2
--- /dev/null
+++ b/tests/rand/t-urndmm.c
@@ -0,0 +1,159 @@
+/* Test mpz_urandomm.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+int
+check_params (void)
+{
+  gmp_randstate_t r1, r2;
+  mpz_t a, b, m;
+  int i;
+  int result;
+
+  result = TRUE;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (m);
+
+  if (result)
+    {
+      /* Test the consistency between urandomm and urandomb. */
+      gmp_randinit_default (r1);
+      gmp_randinit_default (r2);
+      gmp_randseed_ui (r1, 85L);
+      gmp_randseed_ui (r2, 85L);
+      mpz_set_ui (m, 0L);
+      mpz_setbit (m, 80L);
+      for (i = 0; i < 100; i++)
+       {
+         mpz_urandomm (a, r1, m);
+         mpz_urandomb (b, r2, 80L);
+         if (mpz_cmp (a, b) != 0)
+           {
+             result = FALSE;
+             printf ("mpz_urandomm != mpz_urandomb\n");
+             break;
+           }
+       }
+      gmp_randclear (r1);
+      gmp_randclear (r2);
+    }
+
+  if (result)
+    {
+      /* Test that mpz_urandomm returns the correct result with a
+        broken LC.  */
+      mpz_set_ui (a, 0L);
+      gmp_randinit_lc_2exp (r1, a, 0xffL, 8L);
+      mpz_set_ui (m, 5L);
+      /* Warning: This code hangs in gmp 4.1 and below */
+      for (i = 0; i < 100; i++)
+       {
+         mpz_urandomm (a, r1, m);
+         if (mpz_cmp_ui (a, 2L) != 0)
+           {
+             result = FALSE;
+             gmp_printf ("mpz_urandomm returns %Zd instead of 2\n", a);
+             break;
+           }
+       }
+      gmp_randclear (r1);
+    }
+
+  if (result)
+    {
+      /* Test that the results are always in range for either
+         positive or negative values of m.  */
+      gmp_randinit_default (r1);
+      mpz_set_ui (m, 5L);
+      mpz_set_si (b, -5L);
+      for (i = 0; i < 100; i++)
+       {
+         mpz_urandomm (a, r1, m);
+         if (mpz_cmp_ui (a, 5L) >= 0 || mpz_sgn (a) < 0)
+           {
+             result = FALSE;
+             gmp_printf ("Out-of-range or non-positive value: %Zd\n", a);
+             break;
+           }
+         mpz_urandomm (a, r1, b);
+         if (mpz_cmp_ui (a, 5L) >= 0 || mpz_sgn (a) < 0)
+           {
+             result = FALSE;
+             gmp_printf ("Out-of-range or non-positive value (from negative modulus): %Zd\n", a);
+             break;
+           }
+       }
+      gmp_randclear (r1);
+    }
+
+  if (result)
+    {
+      /* Test that m=1 forces always result=0.  */
+      gmp_randinit_default (r1);
+      mpz_set_ui (m, 1L);
+      for (i = 0; i < 100; i++)
+       {
+         mpz_urandomm (a, r1, m);
+         if (mpz_sgn (a) != 0)
+           {
+             result = FALSE;
+             gmp_printf ("mpz_urandomm fails with m=1 (result=%Zd)\n", a);
+             break;
+           }
+       }
+      gmp_randclear (r1);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (m);
+  return result;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int result = TRUE;
+
+  tests_start ();
+
+  if (result)
+    if (!check_params ())
+      result = FALSE;
+
+  tests_end ();
+
+  if (result)
+    return 0; /* pass */
+  else
+    return 1; /* fail */
+}
diff --git a/tests/rand/zdiv_round.c b/tests/rand/zdiv_round.c

new file mode 100644 (file)

index 0000000..362d19c
--- /dev/null
+++ b/tests/rand/zdiv_round.c
@@ -0,0 +1,44 @@
+/* zdiv_round() -- divide integers, round to nearest */
+
+/*
+Copyright 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+
+void
+zdiv_round (mpz_t rop, mpz_t n, mpz_t d)
+{
+  mpf_t f_n, f_d;
+
+  mpf_init (f_n);
+  mpf_init (f_d);
+
+  mpf_set_z (f_d, d);
+  mpf_set_z (f_n, n);
+
+  mpf_div (f_n, f_n, f_d);
+  mpf_set_d (f_d, .5);
+  if (mpf_sgn (f_n) < 0)
+    mpf_neg (f_d, f_d);
+  mpf_add (f_n, f_n, f_d);
+  mpz_set_f (rop, f_n);
+
+  mpf_clear (f_n);
+  mpf_clear (f_d);
+  return;
+}
diff --git a/tests/refmpf.c b/tests/refmpf.c

new file mode 100644 (file)

index 0000000..3dbbd8a
--- /dev/null
+++ b/tests/refmpf.c
@@ -0,0 +1,428 @@
+/* Reference floating point routines.
+
+Copyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+refmpf_add (mpf_ptr w, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_size_t hi, lo, size;
+  mp_ptr ut, vt, wt;
+  int neg;
+  mp_exp_t exp;
+  mp_limb_t cy;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (SIZ (u) == 0)
+    {
+      size = ABSIZ (v);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (v), size);
+      exp = EXP (v);
+      neg = SIZ (v) < 0;
+      goto done;
+    }
+  if (SIZ (v) == 0)
+    {
+      size = ABSIZ (u);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (u), size);
+      exp = EXP (u);
+      neg = SIZ (u) < 0;
+      goto done;
+    }
+  if ((SIZ (u) ^ SIZ (v)) < 0)
+    {
+      mpf_t tmp;
+      SIZ (tmp) = -SIZ (v);
+      EXP (tmp) = EXP (v);
+      PTR (tmp) = PTR (v);
+      refmpf_sub (w, u, tmp);
+      return;
+    }
+  neg = SIZ (u) < 0;
+
+  /* Compute the significance of the hi and lo end of the result.  */
+  hi = MAX (EXP (u), EXP (v));
+  lo = MIN (EXP (u) - ABSIZ (u), EXP (v) - ABSIZ (v));
+  size = hi - lo;
+  ut = TMP_ALLOC_LIMBS (size + 1);
+  vt = TMP_ALLOC_LIMBS (size + 1);
+  wt = TMP_ALLOC_LIMBS (size + 1);
+  MPN_ZERO (ut, size);
+  MPN_ZERO (vt, size);
+  {int off;
+  off = size + (EXP (u) - hi) - ABSIZ (u);
+  MPN_COPY (ut + off, PTR (u), ABSIZ (u));
+  off = size + (EXP (v) - hi) - ABSIZ (v);
+  MPN_COPY (vt + off, PTR (v), ABSIZ (v));
+  }
+
+  cy = mpn_add_n (wt, ut, vt, size);
+  wt[size] = cy;
+  size += cy;
+  exp = hi + cy;
+
+done:
+  if (size > PREC (w))
+    {
+      wt += size - PREC (w);
+      size = PREC (w);
+    }
+  MPN_COPY (PTR (w), wt, size);
+  SIZ (w) = neg == 0 ? size : -size;
+  EXP (w) = exp;
+  TMP_FREE;
+}
+
+
+/* Add 1 "unit in last place" (ie. in the least significant limb) to f.
+   f cannot be zero, since that has no well-defined "last place".
+
+   This routine is designed for use in cases where we pay close attention to
+   the size of the data value and are using that (and the exponent) to
+   indicate the accurate part of a result, or similar.  For this reason, if
+   there's a carry out we don't store 1 and adjust the exponent, we just
+   leave 100..00.  We don't even adjust if there's a carry out of prec+1
+   limbs, but instead give up in that case (which we intend shouldn't arise
+   in normal circumstances).  */
+
+void
+refmpf_add_ulp (mpf_ptr f)
+{
+  mp_ptr     fp = PTR(f);
+  mp_size_t  fsize = SIZ(f);
+  mp_size_t  abs_fsize = ABSIZ(f);
+  mp_limb_t  c;
+
+  if (fsize == 0)
+    {
+      printf ("Oops, refmpf_add_ulp called with f==0\n");
+      abort ();
+    }
+
+  c = refmpn_add_1 (fp, fp, abs_fsize, CNST_LIMB(1));
+  if (c != 0)
+    {
+      if (abs_fsize >= PREC(f) + 1)
+        {
+          printf ("Oops, refmpf_add_ulp carried out of prec+1 limbs\n");
+          abort ();
+        }
+
+      fp[abs_fsize] = c;
+      abs_fsize++;
+      SIZ(f) = (fsize > 0 ? abs_fsize : - abs_fsize);
+      EXP(f)++;
+    }
+}
+
+/* Fill f with size limbs of the given value, setup as an integer. */
+void
+refmpf_fill (mpf_ptr f, mp_size_t size, mp_limb_t value)
+{
+  ASSERT (size >= 0);
+  size = MIN (PREC(f) + 1, size);
+  SIZ(f) = size;
+  EXP(f) = size;
+  refmpn_fill (PTR(f), size, value);
+}
+
+/* Strip high zero limbs from the f data, adjusting exponent accordingly. */
+void
+refmpf_normalize (mpf_ptr f)
+{
+  while (SIZ(f) != 0 && PTR(f)[ABSIZ(f)-1] == 0)
+    {
+      SIZ(f) = (SIZ(f) >= 0 ? SIZ(f)-1 : SIZ(f)+1);
+      EXP(f) --;
+    }
+  if (SIZ(f) == 0)
+    EXP(f) = 0;
+}
+
+/* refmpf_set_overlap sets up dst as a copy of src, but with PREC(dst)
+   unchanged, in preparation for an overlap test.
+
+   The full value of src is copied, and the space at PTR(dst) is extended as
+   necessary.  The way PREC(dst) is unchanged is as per an mpf_set_prec_raw.
+   The return value is the new PTR(dst) space precision, in bits, ready for
+   a restoring mpf_set_prec_raw before mpf_clear.  */
+
+unsigned long
+refmpf_set_overlap (mpf_ptr dst, mpf_srcptr src)
+{
+  mp_size_t  dprec = PREC(dst);
+  mp_size_t  ssize = ABSIZ(src);
+  unsigned long  ret;
+
+  refmpf_set_prec_limbs (dst, (unsigned long) MAX (dprec, ssize));
+  mpf_set (dst, src);
+
+  ret = mpf_get_prec (dst);
+  PREC(dst) = dprec;
+  return ret;
+}
+
+/* Like mpf_set_prec, but taking a precision in limbs.
+   PREC(f) ends up as the given "prec" value.  */
+void
+refmpf_set_prec_limbs (mpf_ptr f, unsigned long prec)
+{
+  mpf_set_prec (f, __GMPF_PREC_TO_BITS (prec));
+}
+
+
+void
+refmpf_sub (mpf_ptr w, mpf_srcptr u, mpf_srcptr v)
+{
+  mp_size_t hi, lo, size;
+  mp_ptr ut, vt, wt;
+  int neg;
+  mp_exp_t exp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (SIZ (u) == 0)
+    {
+      size = ABSIZ (v);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (v), size);
+      exp = EXP (v);
+      neg = SIZ (v) > 0;
+      goto done;
+    }
+  if (SIZ (v) == 0)
+    {
+      size = ABSIZ (u);
+      wt = TMP_ALLOC_LIMBS (size + 1);
+      MPN_COPY (wt, PTR (u), size);
+      exp = EXP (u);
+      neg = SIZ (u) < 0;
+      goto done;
+    }
+  if ((SIZ (u) ^ SIZ (v)) < 0)
+    {
+      mpf_t tmp;
+      SIZ (tmp) = -SIZ (v);
+      EXP (tmp) = EXP (v);
+      PTR (tmp) = PTR (v);
+      refmpf_add (w, u, tmp);
+      if (SIZ (u) < 0)
+       mpf_neg (w, w);
+      return;
+    }
+  neg = SIZ (u) < 0;
+
+  /* Compute the significance of the hi and lo end of the result.  */
+  hi = MAX (EXP (u), EXP (v));
+  lo = MIN (EXP (u) - ABSIZ (u), EXP (v) - ABSIZ (v));
+  size = hi - lo;
+  ut = TMP_ALLOC_LIMBS (size + 1);
+  vt = TMP_ALLOC_LIMBS (size + 1);
+  wt = TMP_ALLOC_LIMBS (size + 1);
+  MPN_ZERO (ut, size);
+  MPN_ZERO (vt, size);
+  {int off;
+  off = size + (EXP (u) - hi) - ABSIZ (u);
+  MPN_COPY (ut + off, PTR (u), ABSIZ (u));
+  off = size + (EXP (v) - hi) - ABSIZ (v);
+  MPN_COPY (vt + off, PTR (v), ABSIZ (v));
+  }
+
+  if (mpn_cmp (ut, vt, size) >= 0)
+    mpn_sub_n (wt, ut, vt, size);
+  else
+    {
+      mpn_sub_n (wt, vt, ut, size);
+      neg ^= 1;
+    }
+  exp = hi;
+  while (size != 0 && wt[size - 1] == 0)
+    {
+      size--;
+      exp--;
+    }
+
+done:
+  if (size > PREC (w))
+    {
+      wt += size - PREC (w);
+      size = PREC (w);
+    }
+  MPN_COPY (PTR (w), wt, size);
+  SIZ (w) = neg == 0 ? size : -size;
+  EXP (w) = exp;
+  TMP_FREE;
+}
+
+
+/* Validate got by comparing to want.  Return 1 if good, 0 if bad.
+
+   The data in got is compared to that in want, up to either PREC(got) limbs
+   or the size of got, whichever is bigger.  Clearly we always demand
+   PREC(got) of accuracy, but we go further and say that if got is bigger
+   then any extra must be correct too.
+
+   want needs to have enough data to allow this comparison.  The size in
+   want doesn't have to be that big though, if it's smaller then further low
+   limbs are taken to be zero.
+
+   This validation approach is designed to allow some flexibility in exactly
+   how much data is generated by an mpf function, ie. either prec or prec+1
+   limbs.  We don't try to make a reference function that emulates that same
+   size decision, instead the idea is for a validation function to generate
+   at least as much data as the real function, then compare.  */
+
+int
+refmpf_validate (const char *name, mpf_srcptr got, mpf_srcptr want)
+{
+  int  bad = 0;
+  mp_size_t  gsize, wsize, cmpsize, i;
+  mp_srcptr  gp, wp;
+  mp_limb_t  glimb, wlimb;
+
+  MPF_CHECK_FORMAT (got);
+
+  if (EXP (got) != EXP (want))
+    {
+      printf ("%s: wrong exponent\n", name);
+      bad = 1;
+    }
+
+  gsize = SIZ (got);
+  wsize = SIZ (want);
+  if ((gsize < 0 && wsize > 0) || (gsize > 0 && wsize < 0))
+    {
+      printf ("%s: wrong sign\n", name);
+      bad = 1;
+    }
+
+  gsize = ABS (gsize);
+  wsize = ABS (wsize);
+
+  /* most significant limb of respective data */
+  gp = PTR (got) + gsize - 1;
+  wp = PTR (want) + wsize - 1;
+
+  /* compare limb data */
+  cmpsize = MAX (PREC (got), gsize);
+  for (i = 0; i < cmpsize; i++)
+    {
+      glimb = (i < gsize ? gp[-i] : 0);
+      wlimb = (i < wsize ? wp[-i] : 0);
+
+      if (glimb != wlimb)
+        {
+          printf ("%s: wrong data starting at index %ld from top\n",
+                  name, (long) i);
+          bad = 1;
+          break;
+        }
+    }
+
+  if (bad)
+    {
+      printf ("  prec       %d\n", PREC(got));
+      printf ("  exp got    %ld\n", (long) EXP(got));
+      printf ("  exp want   %ld\n", (long) EXP(want));
+      printf ("  size got   %d\n", SIZ(got));
+      printf ("  size want  %d\n", SIZ(want));
+      printf ("  limbs (high to low)\n");
+      printf ("   got  ");
+      for (i = ABSIZ(got)-1; i >= 0; i--)
+        {
+          gmp_printf ("%MX", PTR(got)[i]);
+          if (i != 0)
+            printf (",");
+        }
+      printf ("\n");
+      printf ("   want ");
+      for (i = ABSIZ(want)-1; i >= 0; i--)
+        {
+          gmp_printf ("%MX", PTR(want)[i]);
+          if (i != 0)
+            printf (",");
+        }
+      printf ("\n");
+      return 0;
+    }
+
+  return 1;
+}
+
+
+int
+refmpf_validate_division (const char *name, mpf_srcptr got,
+                          mpf_srcptr n, mpf_srcptr d)
+{
+  mp_size_t  nsize, dsize, sign, prec, qsize, tsize;
+  mp_srcptr  np, dp;
+  mp_ptr     tp, qp, rp;
+  mpf_t      want;
+  int        ret;
+
+  nsize = SIZ (n);
+  dsize = SIZ (d);
+  ASSERT_ALWAYS (dsize != 0);
+
+  sign = nsize ^ dsize;
+  nsize = ABS (nsize);
+  dsize = ABS (dsize);
+
+  np = PTR (n);
+  dp = PTR (d);
+  prec = PREC (got);
+
+  EXP (want) = EXP (n) - EXP (d) + 1;
+
+  qsize = prec + 2;            /* at least prec+1 limbs, after high zero */
+  tsize = qsize + dsize - 1;   /* dividend size to give desired qsize */
+
+  /* dividend n, extended or truncated */
+  tp = refmpn_malloc_limbs (tsize);
+  refmpn_copy_extend (tp, tsize, np, nsize);
+
+  qp = refmpn_malloc_limbs (qsize);
+  rp = refmpn_malloc_limbs (dsize);  /* remainder, unused */
+
+  ASSERT_ALWAYS (qsize == tsize - dsize + 1);
+  refmpn_tdiv_qr (qp, rp, (mp_size_t) 0, tp, tsize, dp, dsize);
+
+  PTR (want) = qp;
+  SIZ (want) = (sign >= 0 ? qsize : -qsize);
+  refmpf_normalize (want);
+
+  ret = refmpf_validate (name, got, want);
+
+  free (tp);
+  free (qp);
+  free (rp);
+
+  return ret;
+}
diff --git a/tests/refmpn.c b/tests/refmpn.c

new file mode 100644 (file)

index 0000000..2f245e5
--- /dev/null
+++ b/tests/refmpn.c
@@ -0,0 +1,2181 @@
+/* Reference mpn functions, designed to be simple, portable and independent
+   of the normal gmp code.  Speed isn't a consideration.
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+2007, 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Most routines have assertions representing what the mpn routines are
+   supposed to accept.  Many of these reference routines do sensible things
+   outside these ranges (eg. for size==0), but the assertions are present to
+   pick up bad parameters passed here that are about to be passed the same
+   to a real mpn routine being compared.  */
+
+/* always do assertion checking */
+#define WANT_ASSERT  1
+
+#include <stdio.h>  /* for NULL */
+#include <stdlib.h> /* for malloc */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "tests.h"
+
+
+
+/* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
+   in bytes. */
+int
+byte_overlap_p (const void *v_xp, mp_size_t xsize,
+               const void *v_yp, mp_size_t ysize)
+{
+  const char *xp = v_xp;
+  const char *yp = v_yp;
+
+  ASSERT (xsize >= 0);
+  ASSERT (ysize >= 0);
+
+  /* no wraparounds */
+  ASSERT (xp+xsize >= xp);
+  ASSERT (yp+ysize >= yp);
+
+  if (xp + xsize <= yp)
+    return 0;
+
+  if (yp + ysize <= xp)
+    return 0;
+
+  return 1;
+}
+
+/* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
+int
+refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
+{
+  return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
+                        yp, ysize * BYTES_PER_MP_LIMB);
+}
+
+/* Check overlap for a routine defined to work low to high. */
+int
+refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
+{
+  return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
+}
+
+/* Check overlap for a routine defined to work high to low. */
+int
+refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
+{
+  return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
+}
+
+/* Check overlap for a standard routine requiring equal or separate. */
+int
+refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
+{
+  return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
+}
+int
+refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
+                              mp_size_t size)
+{
+  return (refmpn_overlap_fullonly_p (dst, src1, size)
+         && refmpn_overlap_fullonly_p (dst, src2, size));
+}
+
+
+mp_ptr
+refmpn_malloc_limbs (mp_size_t size)
+{
+  mp_ptr  p;
+  ASSERT (size >= 0);
+  if (size == 0)
+    size = 1;
+  p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB));
+  ASSERT (p != NULL);
+  return p;
+}
+
+/* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
+ * memory allocated by refmpn_malloc_limbs_aligned. */
+void
+refmpn_free_limbs (mp_ptr p)
+{
+  free (p);
+}
+
+mp_ptr
+refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
+{
+  mp_ptr  p;
+  p = refmpn_malloc_limbs (size);
+  refmpn_copyi (p, ptr, size);
+  return p;
+}
+
+/* malloc n limbs on a multiple of m bytes boundary */
+mp_ptr
+refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
+{
+  return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
+}
+
+
+void
+refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
+{
+  mp_size_t  i;
+  ASSERT (size >= 0);
+  for (i = 0; i < size; i++)
+    ptr[i] = value;
+}
+
+void
+refmpn_zero (mp_ptr ptr, mp_size_t size)
+{
+  refmpn_fill (ptr, size, CNST_LIMB(0));
+}
+
+void
+refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
+{
+  ASSERT (newsize >= oldsize);
+  refmpn_zero (ptr+oldsize, newsize-oldsize);
+}
+
+int
+refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
+{
+  mp_size_t  i;
+  for (i = 0; i < size; i++)
+    if (ptr[i] != 0)
+      return 0;
+  return 1;
+}
+
+mp_size_t
+refmpn_normalize (mp_srcptr ptr, mp_size_t size)
+{
+  ASSERT (size >= 0);
+  while (size > 0 && ptr[size-1] == 0)
+    size--;
+  return size;
+}
+
+/* the highest one bit in x */
+mp_limb_t
+refmpn_msbone (mp_limb_t x)
+{
+  mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
+
+  while (n != 0)
+    {
+      if (x & n)
+       break;
+      n >>= 1;
+    }
+  return n;
+}
+
+/* a mask of the highest one bit plus and all bits below */
+mp_limb_t
+refmpn_msbone_mask (mp_limb_t x)
+{
+  if (x == 0)
+    return 0;
+
+  return (refmpn_msbone (x) << 1) - 1;
+}
+
+/* How many digits in the given base will fit in a limb.
+   Notice that the product b is allowed to be equal to the limit
+   2^GMP_NUMB_BITS, this ensures the result for base==2 will be
+   GMP_NUMB_BITS (and similarly other powers of 2).  */
+int
+refmpn_chars_per_limb (int base)
+{
+  mp_limb_t  limit[2], b[2];
+  int        chars_per_limb;
+
+  ASSERT (base >= 2);
+
+  limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
+  limit[1] = 1;
+  b[0] = 1;      /* b = 1 */
+  b[1] = 0;
+
+  chars_per_limb = 0;
+  for (;;)
+    {
+      if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
+       break;
+      if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
+       break;
+      chars_per_limb++;
+    }
+  return chars_per_limb;
+}
+
+/* The biggest value base**n which fits in GMP_NUMB_BITS. */
+mp_limb_t
+refmpn_big_base (int base)
+{
+  int        chars_per_limb = refmpn_chars_per_limb (base);
+  int        i;
+  mp_limb_t  bb;
+
+  ASSERT (base >= 2);
+  bb = 1;
+  for (i = 0; i < chars_per_limb; i++)
+    bb *= base;
+  return bb;
+}
+
+
+void
+refmpn_setbit (mp_ptr ptr, unsigned long bit)
+{
+  ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
+}
+
+void
+refmpn_clrbit (mp_ptr ptr, unsigned long bit)
+{
+  ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
+}
+
+#define REFMPN_TSTBIT(ptr,bit) \
+  (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
+
+int
+refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
+{
+  return REFMPN_TSTBIT (ptr, bit);
+}
+
+unsigned long
+refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
+{
+  while (REFMPN_TSTBIT (ptr, bit) != 0)
+    bit++;
+  return bit;
+}
+
+unsigned long
+refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
+{
+  while (REFMPN_TSTBIT (ptr, bit) == 0)
+    bit++;
+  return bit;
+}
+
+void
+refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
+  refmpn_copyi (rp, sp, size);
+}
+
+void
+refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  mp_size_t i;
+
+  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
+  ASSERT (size >= 0);
+
+  for (i = 0; i < size; i++)
+    rp[i] = sp[i];
+}
+
+void
+refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  mp_size_t i;
+
+  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
+  ASSERT (size >= 0);
+
+  for (i = size-1; i >= 0; i--)
+    rp[i] = sp[i];
+}
+
+/* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
+   zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
+void
+refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
+{
+  ASSERT (wsize >= 0);
+  ASSERT (xsize >= 0);
+
+  /* high part of x if x bigger than w */
+  if (xsize > wsize)
+    {
+      xp += xsize - wsize;
+      xsize = wsize;
+    }
+
+  refmpn_copy (wp + wsize-xsize, xp, xsize);
+  refmpn_zero (wp, wsize-xsize);
+}
+
+int
+refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+  ASSERT_MPN (xp, size);
+  ASSERT_MPN (yp, size);
+
+  for (i = size-1; i >= 0; i--)
+    {
+      if (xp[i] > yp[i])  return 1;
+      if (xp[i] < yp[i])  return -1;
+    }
+  return 0;
+}
+
+int
+refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  if (size == 0)
+    return 0;
+  else
+    return refmpn_cmp (xp, yp, size);
+}
+
+int
+refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
+                    mp_srcptr yp, mp_size_t ysize)
+{
+  int  opp, cmp;
+
+  ASSERT_MPN (xp, xsize);
+  ASSERT_MPN (yp, ysize);
+
+  opp = (xsize < ysize);
+  if (opp)
+    MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
+
+  if (! refmpn_zero_p (xp+ysize, xsize-ysize))
+    cmp = 1;
+  else
+    cmp = refmpn_cmp (xp, yp, ysize);
+
+  return (opp ? -cmp : cmp);
+}
+
+int
+refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_size_t  i;
+  ASSERT (size >= 0);
+
+  for (i = 0; i < size; i++)
+      if (xp[i] != yp[i])
+       return 0;
+  return 1;
+}
+
+
+#define LOGOPS(operation)                                               \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+                                                                       \
+    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
+    ASSERT (size >= 1);                                                 \
+    ASSERT_MPN (s1p, size);                                             \
+    ASSERT_MPN (s2p, size);                                             \
+                                                                       \
+    for (i = 0; i < size; i++)                                          \
+      rp[i] = operation;                                                \
+  }
+
+void
+refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] & s2p[i]);
+}
+void
+refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] & ~s2p[i]);
+}
+void
+refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
+}
+void
+refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] | s2p[i]);
+}
+void
+refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
+}
+void
+refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
+}
+void
+refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS (s1p[i] ^ s2p[i]);
+}
+void
+refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
+}
+
+
+/* set *dh,*dl to mh:ml - sh:sl, in full limbs */
+void
+refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
+                  mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
+{
+  *dl = ml - sl;
+  *dh = mh - sh - (ml < sl);
+}
+
+
+/* set *w to x+y, return 0 or 1 carry */
+mp_limb_t
+ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
+{
+  mp_limb_t  sum, cy;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+
+  sum = x + y;
+#if GMP_NAIL_BITS == 0
+  *w = sum;
+  cy = (sum < x);
+#else
+  *w = sum & GMP_NUMB_MASK;
+  cy = (sum >> GMP_NUMB_BITS);
+#endif
+  return cy;
+}
+
+/* set *w to x-y, return 0 or 1 borrow */
+mp_limb_t
+ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
+{
+  mp_limb_t  diff, cy;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+
+  diff = x - y;
+#if GMP_NAIL_BITS == 0
+  *w = diff;
+  cy = (diff > x);
+#else
+  *w = diff & GMP_NUMB_MASK;
+  cy = (diff >> GMP_NUMB_BITS) & 1;
+#endif
+  return cy;
+}
+
+/* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
+mp_limb_t
+adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
+{
+  mp_limb_t  r;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+  ASSERT (c == 0 || c == 1);
+
+  r = ref_addc_limb (w, x, y);
+  return r + ref_addc_limb (w, *w, c);
+}
+
+/* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
+mp_limb_t
+sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
+{
+  mp_limb_t  r;
+
+  ASSERT_LIMB (x);
+  ASSERT_LIMB (y);
+  ASSERT (c == 0 || c == 1);
+
+  r = ref_subc_limb (w, x, y);
+  return r + ref_subc_limb (w, *w, c);
+}
+
+
+#define AORS_1(operation)                               \
+  {                                                     \
+    mp_limb_t  i;                                       \
+                                                       \
+    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
+    ASSERT (size >= 1);                                 \
+    ASSERT_MPN (sp, size);                              \
+    ASSERT_LIMB (n);                                    \
+                                                       \
+    for (i = 0; i < size; i++)                          \
+      n = operation (&rp[i], sp[i], n);                 \
+    return n;                                           \
+  }
+
+mp_limb_t
+refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
+{
+  AORS_1 (ref_addc_limb);
+}
+mp_limb_t
+refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
+{
+  AORS_1 (ref_subc_limb);
+}
+
+#define AORS_NC(operation)                                              \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+                                                                       \
+    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
+    ASSERT (carry == 0 || carry == 1);                                  \
+    ASSERT (size >= 1);                                                 \
+    ASSERT_MPN (s1p, size);                                             \
+    ASSERT_MPN (s2p, size);                                             \
+                                                                       \
+    for (i = 0; i < size; i++)                                          \
+      carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
+              mp_limb_t carry)
+{
+  AORS_NC (adc);
+}
+mp_limb_t
+refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
+              mp_limb_t carry)
+{
+  AORS_NC (sbb);
+}
+
+
+mp_limb_t
+refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
+}
+mp_limb_t
+refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
+}
+
+mp_limb_t
+refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_size_t n, unsigned int s)
+{
+  mp_limb_t cy;
+  mp_ptr tp;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT (0 < s && s < GMP_NUMB_BITS);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  tp = refmpn_malloc_limbs (n);
+  cy  = refmpn_lshift (tp, vp, n, s);
+  cy += refmpn_add_n (rp, up, tp, n);
+  free (tp);
+  return cy;
+}
+mp_limb_t
+refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, up, vp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, up, vp, n, 2);
+}
+
+mp_limb_t
+refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_size_t n, unsigned int s)
+{
+  mp_limb_t cy;
+  mp_ptr tp;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT (0 < s && s < GMP_NUMB_BITS);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  tp = refmpn_malloc_limbs (n);
+  cy  = mpn_lshift (tp, vp, n, s);
+  cy += mpn_sub_n (rp, up, tp, n);
+  free (tp);
+  return cy;
+}
+mp_limb_t
+refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, up, vp, n, 1);
+}
+
+mp_limb_signed_t
+refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_size_t n, unsigned int s)
+{
+  mp_limb_signed_t cy;
+  mp_ptr tp;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT (0 < s && s < GMP_NUMB_BITS);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  tp = refmpn_malloc_limbs (n);
+  cy  = mpn_lshift (tp, vp, n, s);
+  cy -= mpn_sub_n (rp, tp, up, n);
+  free (tp);
+  return cy;
+}
+mp_limb_signed_t
+refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_rsblsh_n (rp, up, vp, n, 1);
+}
+mp_limb_signed_t
+refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_rsblsh_n (rp, up, vp, n, 2);
+}
+
+mp_limb_t
+refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cya, cys;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  cya = mpn_add_n (rp, up, vp, n);
+  cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
+  rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
+  return cys;
+}
+mp_limb_t
+refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_limb_t cya, cys;
+
+  ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
+  ASSERT (n >= 1);
+  ASSERT_MPN (up, n);
+  ASSERT_MPN (vp, n);
+
+  cya = mpn_sub_n (rp, up, vp, n);
+  cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
+  rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
+  return cys;
+}
+
+/* Twos complement, return borrow. */
+mp_limb_t
+refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  mp_ptr     zeros;
+  mp_limb_t  ret;
+
+  ASSERT (size >= 1);
+
+  zeros = refmpn_malloc_limbs (size);
+  refmpn_fill (zeros, size, CNST_LIMB(0));
+  ret = refmpn_sub_n (dst, zeros, src, size);
+  free (zeros);
+  return ret;
+}
+
+
+#define AORS(aors_n, aors_1)                                    \
+  {                                                             \
+    mp_limb_t  c;                                               \
+    ASSERT (s1size >= s2size);                                  \
+    ASSERT (s2size >= 1);                                       \
+    c = aors_n (rp, s1p, s2p, s2size);                          \
+    if (s1size-s2size != 0)                                     \
+      c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
+    return c;                                                   \
+  }
+mp_limb_t
+refmpn_add (mp_ptr rp,
+           mp_srcptr s1p, mp_size_t s1size,
+           mp_srcptr s2p, mp_size_t s2size)
+{
+  AORS (refmpn_add_n, refmpn_add_1);
+}
+mp_limb_t
+refmpn_sub (mp_ptr rp,
+           mp_srcptr s1p, mp_size_t s1size,
+           mp_srcptr s2p, mp_size_t s2size)
+{
+  AORS (refmpn_sub_n, refmpn_sub_1);
+}
+
+
+#define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
+#define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
+
+#define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
+#define HIGHMASK  SHIFTHIGH(LOWMASK)
+
+#define LOWPART(x)   ((x) & LOWMASK)
+#define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
+
+/* Set return:*lo to x*y, using full limbs not nails. */
+mp_limb_t
+refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
+{
+  mp_limb_t  hi, s;
+
+  *lo = LOWPART(x) * LOWPART(y);
+  hi = HIGHPART(x) * HIGHPART(y);
+
+  s = LOWPART(x) * HIGHPART(y);
+  hi += HIGHPART(s);
+  s = SHIFTHIGH(LOWPART(s));
+  *lo += s;
+  hi += (*lo < s);
+
+  s = HIGHPART(x) * LOWPART(y);
+  hi += HIGHPART(s);
+  s = SHIFTHIGH(LOWPART(s));
+  *lo += s;
+  hi += (*lo < s);
+
+  return hi;
+}
+
+mp_limb_t
+refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
+{
+  return refmpn_umul_ppmm (lo, x, y);
+}
+
+mp_limb_t
+refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
+              mp_limb_t carry)
+{
+  mp_size_t  i;
+  mp_limb_t  hi, lo;
+
+  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT_MPN (sp, size);
+  ASSERT_LIMB (multiplier);
+  ASSERT_LIMB (carry);
+
+  multiplier <<= GMP_NAIL_BITS;
+  for (i = 0; i < size; i++)
+    {
+      hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
+      lo >>= GMP_NAIL_BITS;
+      ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
+      rp[i] = lo;
+      carry = hi;
+    }
+  return carry;
+}
+
+mp_limb_t
+refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
+{
+  return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
+}
+
+
+mp_limb_t
+refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
+             mp_srcptr mult, mp_size_t msize)
+{
+  mp_ptr     src_copy;
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
+  ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
+  ASSERT (size >= msize);
+  ASSERT_MPN (mult, msize);
+
+  /* in case dst==src */
+  src_copy = refmpn_malloc_limbs (size);
+  refmpn_copyi (src_copy, src, size);
+  src = src_copy;
+
+  dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
+  for (i = 1; i < msize-1; i++)
+    dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+  ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+
+  free (src_copy);
+  return ret;
+}
+
+mp_limb_t
+refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
+}
+mp_limb_t
+refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
+}
+mp_limb_t
+refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
+}
+
+#define AORSMUL_1C(operation_n)                                 \
+  {                                                             \
+    mp_ptr     p;                                               \
+    mp_limb_t  ret;                                             \
+                                                               \
+    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
+                                                               \
+    p = refmpn_malloc_limbs (size);                             \
+    ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
+    ret += operation_n (rp, rp, p, size);                       \
+                                                               \
+    free (p);                                                   \
+    return ret;                                                 \
+  }
+
+mp_limb_t
+refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                 mp_limb_t multiplier, mp_limb_t carry)
+{
+  AORSMUL_1C (refmpn_add_n);
+}
+mp_limb_t
+refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                 mp_limb_t multiplier, mp_limb_t carry)
+{
+  AORSMUL_1C (refmpn_sub_n);
+}
+
+
+mp_limb_t
+refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
+{
+  return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
+}
+mp_limb_t
+refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
+{
+  return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
+}
+
+
+mp_limb_t
+refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
+                mp_srcptr mult, mp_size_t msize)
+{
+  mp_ptr     src_copy;
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
+  ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
+  ASSERT (size >= msize);
+  ASSERT_MPN (mult, msize);
+
+  /* in case dst==src */
+  src_copy = refmpn_malloc_limbs (size);
+  refmpn_copyi (src_copy, src, size);
+  src = src_copy;
+
+  for (i = 0; i < msize-1; i++)
+    dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+  ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
+
+  free (src_copy);
+  return ret;
+}
+
+mp_limb_t
+refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
+}
+mp_limb_t
+refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
+}
+mp_limb_t
+refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
+}
+mp_limb_t
+refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
+}
+mp_limb_t
+refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
+}
+mp_limb_t
+refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
+}
+mp_limb_t
+refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
+}
+
+mp_limb_t
+refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
+                 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
+                 mp_limb_t carry)
+{
+  mp_ptr p;
+  mp_limb_t acy, scy;
+
+  /* Destinations can't overlap. */
+  ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
+  ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
+  ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
+  ASSERT (size >= 1);
+
+  /* in case r1p==s1p or r1p==s2p */
+  p = refmpn_malloc_limbs (size);
+
+  acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
+  scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
+  refmpn_copyi (r1p, p, size);
+
+  free (p);
+  return 2 * acy + scy;
+}
+
+mp_limb_t
+refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
+                mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
+}
+
+
+/* Right shift hi,lo and return the low limb of the result.
+   Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
+mp_limb_t
+rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
+{
+  ASSERT (shift < GMP_NUMB_BITS);
+  if (shift == 0)
+    return lo;
+  else
+    return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
+}
+
+/* Left shift hi,lo and return the high limb of the result.
+   Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
+mp_limb_t
+lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
+{
+  ASSERT (shift < GMP_NUMB_BITS);
+  if (shift == 0)
+    return hi;
+  else
+    return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
+}
+
+
+mp_limb_t
+refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
+  ASSERT_MPN (sp, size);
+
+  ret = rshift_make (sp[0], CNST_LIMB(0), shift);
+
+  for (i = 0; i < size-1; i++)
+    rp[i] = rshift_make (sp[i+1], sp[i], shift);
+
+  rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
+  return ret;
+}
+
+mp_limb_t
+refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  mp_limb_t  ret;
+  mp_size_t  i;
+
+  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
+  ASSERT_MPN (sp, size);
+
+  ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
+
+  for (i = size-2; i >= 0; i--)
+    rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
+
+  rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
+  return ret;
+}
+
+void
+refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  mp_size_t i;
+
+  /* We work downwards since mpn_lshiftc needs that. */
+  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
+
+  for (i = size - 1; i >= 0; i--)
+    rp[i] = (~sp[i]) & GMP_NUMB_MASK;
+}
+
+mp_limb_t
+refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  mp_limb_t res;
+
+  /* No asserts here, refmpn_lshift will assert what we need. */
+
+  res = refmpn_lshift (rp, sp, size, shift);
+  refmpn_com (rp, rp, size);
+  return res;
+}
+
+/* accepting shift==0 and doing a plain copyi or copyd in that case */
+mp_limb_t
+refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  if (shift == 0)
+    {
+      refmpn_copyi (rp, sp, size);
+      return 0;
+    }
+  else
+    {
+      return refmpn_rshift (rp, sp, size, shift);
+    }
+}
+mp_limb_t
+refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
+{
+  if (shift == 0)
+    {
+      refmpn_copyd (rp, sp, size);
+      return 0;
+    }
+  else
+    {
+      return refmpn_lshift (rp, sp, size, shift);
+    }
+}
+
+/* accepting size==0 too */
+mp_limb_t
+refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                          unsigned shift)
+{
+  return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
+}
+mp_limb_t
+refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                          unsigned shift)
+{
+  return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
+}
+
+/* Divide h,l by d, return quotient, store remainder to *rp.
+   Operates on full limbs, not nails.
+   Must have h < d.
+   __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
+mp_limb_t
+refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
+{
+  mp_limb_t  q, r;
+  int  n;
+
+  ASSERT (d != 0);
+  ASSERT (h < d);
+
+#if 0
+  udiv_qrnnd (q, r, h, l, d);
+  *rp = r;
+  return q;
+#endif
+
+  n = refmpn_count_leading_zeros (d);
+  d <<= n;
+
+  if (n != 0)
+    {
+      h = (h << n) | (l >> (GMP_LIMB_BITS - n));
+      l <<= n;
+    }
+
+  __udiv_qrnnd_c (q, r, h, l, d);
+  r >>= n;
+  *rp = r;
+  return q;
+}
+
+mp_limb_t
+refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
+{
+  return refmpn_udiv_qrnnd (rp, h, l, d);
+}
+
+/* This little subroutine avoids some bad code generation from i386 gcc 3.0
+   -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
+static mp_limb_t
+refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                            mp_limb_t divisor, mp_limb_t carry)
+{
+  mp_size_t  i;
+  mp_limb_t rem[1];
+  for (i = size-1; i >= 0; i--)
+    {
+      rp[i] = refmpn_udiv_qrnnd (rem, carry,
+                                sp[i] << GMP_NAIL_BITS,
+                                divisor << GMP_NAIL_BITS);
+      carry = *rem >> GMP_NAIL_BITS;
+    }
+  return carry;
+}
+
+mp_limb_t
+refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                 mp_limb_t divisor, mp_limb_t carry)
+{
+  mp_ptr     sp_orig;
+  mp_ptr     prod;
+  mp_limb_t  carry_orig;
+
+  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
+  ASSERT (size >= 0);
+  ASSERT (carry < divisor);
+  ASSERT_MPN (sp, size);
+  ASSERT_LIMB (divisor);
+  ASSERT_LIMB (carry);
+
+  if (size == 0)
+    return carry;
+
+  sp_orig = refmpn_memdup_limbs (sp, size);
+  prod = refmpn_malloc_limbs (size);
+  carry_orig = carry;
+
+  carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
+
+  /* check by multiplying back */
+#if 0
+  printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
+         size, divisor, carry_orig, carry);
+  mpn_trace("s",sp_copy,size);
+  mpn_trace("r",rp,size);
+  printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
+  mpn_trace("p",prod,size);
+#endif
+  ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
+  ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
+  free (sp_orig);
+  free (prod);
+
+  return carry;
+}
+
+mp_limb_t
+refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
+{
+  return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
+}
+
+
+mp_limb_t
+refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
+              mp_limb_t carry)
+{
+  mp_ptr  p = refmpn_malloc_limbs (size);
+  carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
+  free (p);
+  return carry;
+}
+
+mp_limb_t
+refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
+{
+  return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
+}
+
+mp_limb_t
+refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
+                    mp_limb_t inverse)
+{
+  ASSERT (divisor & GMP_NUMB_HIGHBIT);
+  ASSERT (inverse == refmpn_invert_limb (divisor));
+  return refmpn_mod_1 (sp, size, divisor);
+}
+
+/* This implementation will be rather slow, but has the advantage of being
+   in a different style than the libgmp versions.  */
+mp_limb_t
+refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
+{
+  ASSERT ((GMP_NUMB_BITS % 4) == 0);
+  return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
+}
+
+
+mp_limb_t
+refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
+                 mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
+                 mp_limb_t carry)
+{
+  mp_ptr  z;
+
+  z = refmpn_malloc_limbs (xsize);
+  refmpn_fill (z, xsize, CNST_LIMB(0));
+
+  carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
+  carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
+
+  free (z);
+  return carry;
+}
+
+mp_limb_t
+refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
+                mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
+{
+  return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
+}
+
+mp_limb_t
+refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
+                       mp_srcptr sp, mp_size_t size,
+                       mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
+{
+  ASSERT (size >= 0);
+  ASSERT (shift == refmpn_count_leading_zeros (divisor));
+  ASSERT (inverse == refmpn_invert_limb (divisor << shift));
+
+  return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
+}
+
+mp_limb_t
+refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+                mp_ptr np, mp_size_t nn,
+                mp_srcptr dp)
+{
+  mp_ptr tp;
+  mp_limb_t qh;
+
+  tp = refmpn_malloc_limbs (nn + qxn);
+  refmpn_zero (tp, qxn);
+  refmpn_copyi (tp + qxn, np, nn);
+  qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
+  refmpn_copyi (np, tp, 2);
+  free (tp);
+  return qh;
+}
+
+/* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
+   paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
+   since d has the high bit set. */
+
+mp_limb_t
+refmpn_invert_limb (mp_limb_t d)
+{
+  mp_limb_t r;
+  ASSERT (d & GMP_LIMB_HIGHBIT);
+  return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
+}
+
+void
+refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr qp, tp;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (2 * n);
+  qp = TMP_ALLOC_LIMBS (n + 1);
+
+  MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
+
+  refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
+  refmpn_copyi (rp, qp, n);
+
+  TMP_FREE;
+}
+
+void
+refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
+{
+  mp_ptr tp;
+  mp_limb_t binv;
+  TMP_DECL;
+  TMP_MARK;
+
+  /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
+     code.  To make up for it, we check that the inverse is correct using a
+     multiply.  */
+
+  tp = TMP_ALLOC_LIMBS (2 * n);
+
+  MPN_ZERO (tp, n);
+  tp[0] = 1;
+  binvert_limb (binv, up[0]);
+  mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
+
+  refmpn_mul_n (tp, rp, up, n);
+  ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
+
+  TMP_FREE;
+}
+
+/* The aim is to produce a dst quotient and return a remainder c, satisfying
+   c*b^n + src-i == 3*dst, where i is the incoming carry.
+
+   Some value c==0, c==1 or c==2 will satisfy, so just try each.
+
+   If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
+   remainder from the first division attempt determines the correct
+   remainder (3-c), but don't bother with that, since we can't guarantee
+   anything about GMP_NUMB_BITS when using nails.
+
+   If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
+   complement negative, ie. b^n+a-i, and the calculation produces c1
+   satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
+   means it's enough to just add any borrow back at the end.
+
+   A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
+   mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
+   or 1 respectively, so with 1 added the final return value is still in the
+   prescribed range 0 to 2. */
+
+mp_limb_t
+refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
+{
+  mp_ptr     spcopy;
+  mp_limb_t  c, cs;
+
+  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
+  ASSERT (size >= 1);
+  ASSERT (carry <= 2);
+  ASSERT_MPN (sp, size);
+
+  spcopy = refmpn_malloc_limbs (size);
+  cs = refmpn_sub_1 (spcopy, sp, size, carry);
+
+  for (c = 0; c <= 2; c++)
+    if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
+      goto done;
+  ASSERT_FAIL (no value of c satisfies);
+
+ done:
+  c += cs;
+  ASSERT (c <= 2);
+
+  free (spcopy);
+  return c;
+}
+
+mp_limb_t
+refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
+}
+
+
+/* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
+void
+refmpn_mul_basecase (mp_ptr prodp,
+                    mp_srcptr up, mp_size_t usize,
+                    mp_srcptr vp, mp_size_t vsize)
+{
+  mp_size_t i;
+
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
+  ASSERT (usize >= vsize);
+  ASSERT (vsize >= 1);
+  ASSERT_MPN (up, usize);
+  ASSERT_MPN (vp, vsize);
+
+  prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
+  for (i = 1; i < vsize; i++)
+    prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
+}
+
+#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
+#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
+#if WANT_FFT
+#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
+#else
+#define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
+#endif
+
+void
+refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+  mp_ptr tp;
+  mp_size_t tn;
+  mp_limb_t cy;
+
+  if (vn < TOOM3_THRESHOLD)
+    {
+      /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
+        mul_basecase.  */
+      if (vn != 0)
+       refmpn_mul_basecase (wp, up, un, vp, vn);
+      else
+       MPN_ZERO (wp, un);
+      return;
+    }
+
+  if (vn < TOOM4_THRESHOLD)
+    {
+      /* In the mpn_toom33_mul range, use mpn_toom22_mul.  */
+      tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
+      tp = refmpn_malloc_limbs (tn);
+      mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+    }
+  else if (vn < FFT_THRESHOLD)
+    {
+      /* In the mpn_toom44_mul range, use mpn_toom33_mul.  */
+      tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
+      tp = refmpn_malloc_limbs (tn);
+      mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+    }
+  else
+    {
+      /* Finally, for the largest operands, use mpn_toom44_mul.  */
+      tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
+      tp = refmpn_malloc_limbs (tn);
+      mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+    }
+
+  if (un != vn)
+    {
+      if (un - vn < vn)
+       refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
+      else
+       refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
+
+      MPN_COPY (wp, tp, vn);
+      cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn);
+    }
+  else
+    {
+      MPN_COPY (wp, tp, 2 * vn);
+    }
+
+  free (tp);
+}
+
+void
+refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+{
+  refmpn_mul (prodp, up, size, vp, size);
+}
+
+void
+refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+{
+  mp_ptr tp = refmpn_malloc_limbs (2*size);
+  refmpn_mul (tp, up, size, vp, size);
+  refmpn_copyi (prodp, tp, size);
+  free (tp);
+}
+
+void
+refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+  refmpn_mul (dst, src, size, src, size);
+}
+
+/* Allowing usize<vsize, usize==0 or vsize==0. */
+void
+refmpn_mul_any (mp_ptr prodp,
+                    mp_srcptr up, mp_size_t usize,
+                    mp_srcptr vp, mp_size_t vsize)
+{
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
+  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
+  ASSERT (usize >= 0);
+  ASSERT (vsize >= 0);
+  ASSERT_MPN (up, usize);
+  ASSERT_MPN (vp, vsize);
+
+  if (usize == 0)
+    {
+      refmpn_fill (prodp, vsize, CNST_LIMB(0));
+      return;
+    }
+
+  if (vsize == 0)
+    {
+      refmpn_fill (prodp, usize, CNST_LIMB(0));
+      return;
+    }
+
+  if (usize >= vsize)
+    refmpn_mul (prodp, up, usize, vp, vsize);
+  else
+    refmpn_mul (prodp, vp, vsize, up, usize);
+}
+
+
+mp_limb_t
+refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
+{
+  mp_limb_t  x;
+  int  twos;
+
+  ASSERT (y != 0);
+  ASSERT (! refmpn_zero_p (xp, xsize));
+  ASSERT_MPN (xp, xsize);
+  ASSERT_LIMB (y);
+
+  x = refmpn_mod_1 (xp, xsize, y);
+  if (x == 0)
+    return y;
+
+  twos = 0;
+  while ((x & 1) == 0 && (y & 1) == 0)
+    {
+      x >>= 1;
+      y >>= 1;
+      twos++;
+    }
+
+  for (;;)
+    {
+      while ((x & 1) == 0)  x >>= 1;
+      while ((y & 1) == 0)  y >>= 1;
+
+      if (x < y)
+       MP_LIMB_T_SWAP (x, y);
+
+      x -= y;
+      if (x == 0)
+       break;
+    }
+
+  return y << twos;
+}
+
+
+/* Based on the full limb x, not nails. */
+unsigned
+refmpn_count_leading_zeros (mp_limb_t x)
+{
+  unsigned  n = 0;
+
+  ASSERT (x != 0);
+
+  while ((x & GMP_LIMB_HIGHBIT) == 0)
+    {
+      x <<= 1;
+      n++;
+    }
+  return n;
+}
+
+/* Full limbs allowed, not limited to nails. */
+unsigned
+refmpn_count_trailing_zeros (mp_limb_t x)
+{
+  unsigned  n = 0;
+
+  ASSERT (x != 0);
+  ASSERT_LIMB (x);
+
+  while ((x & 1) == 0)
+    {
+      x >>= 1;
+      n++;
+    }
+  return n;
+}
+
+/* Strip factors of two (low zero bits) from {p,size} by right shifting.
+   The return value is the number of twos stripped.  */
+mp_size_t
+refmpn_strip_twos (mp_ptr p, mp_size_t size)
+{
+  mp_size_t  limbs;
+  unsigned   shift;
+
+  ASSERT (size >= 1);
+  ASSERT (! refmpn_zero_p (p, size));
+  ASSERT_MPN (p, size);
+
+  for (limbs = 0; p[0] == 0; limbs++)
+    {
+      refmpn_copyi (p, p+1, size-1);
+      p[size-1] = 0;
+    }
+
+  shift = refmpn_count_trailing_zeros (p[0]);
+  if (shift)
+    refmpn_rshift (p, p, size, shift);
+
+  return limbs*GMP_NUMB_BITS + shift;
+}
+
+mp_limb_t
+refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
+{
+  int       cmp;
+
+  ASSERT (ysize >= 1);
+  ASSERT (xsize >= ysize);
+  ASSERT ((xp[0] & 1) != 0);
+  ASSERT ((yp[0] & 1) != 0);
+  /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
+  ASSERT (yp[ysize-1] != 0);
+  ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
+  ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
+  ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
+  if (xsize == ysize)
+    ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
+  ASSERT_MPN (xp, xsize);
+  ASSERT_MPN (yp, ysize);
+
+  refmpn_strip_twos (xp, xsize);
+  MPN_NORMALIZE (xp, xsize);
+  MPN_NORMALIZE (yp, ysize);
+
+  for (;;)
+    {
+      cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
+      if (cmp == 0)
+       break;
+      if (cmp < 0)
+       MPN_PTR_SWAP (xp,xsize, yp,ysize);
+
+      ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
+
+      refmpn_strip_twos (xp, xsize);
+      MPN_NORMALIZE (xp, xsize);
+    }
+
+  refmpn_copyi (gp, xp, xsize);
+  return xsize;
+}
+
+unsigned long
+ref_popc_limb (mp_limb_t src)
+{
+  unsigned long  count;
+  int  i;
+
+  count = 0;
+  for (i = 0; i < GMP_LIMB_BITS; i++)
+    {
+      count += (src & 1);
+      src >>= 1;
+    }
+  return count;
+}
+
+unsigned long
+refmpn_popcount (mp_srcptr sp, mp_size_t size)
+{
+  unsigned long  count = 0;
+  mp_size_t  i;
+
+  ASSERT (size >= 0);
+  ASSERT_MPN (sp, size);
+
+  for (i = 0; i < size; i++)
+    count += ref_popc_limb (sp[i]);
+  return count;
+}
+
+unsigned long
+refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
+{
+  mp_ptr  d;
+  unsigned long  count;
+
+  ASSERT (size >= 0);
+  ASSERT_MPN (s1p, size);
+  ASSERT_MPN (s2p, size);
+
+  if (size == 0)
+    return 0;
+
+  d = refmpn_malloc_limbs (size);
+  refmpn_xor_n (d, s1p, s2p, size);
+  count = refmpn_popcount (d, size);
+  free (d);
+  return count;
+}
+
+
+/* set r to a%d */
+void
+refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
+{
+  mp_limb_t  D[2];
+  int        n;
+
+  ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
+  ASSERT_MPN (a, 2);
+  ASSERT_MPN (d, 2);
+
+  D[1] = d[1], D[0] = d[0];
+  r[1] = a[1], r[0] = a[0];
+  n = 0;
+
+  for (;;)
+    {
+      if (D[1] & GMP_NUMB_HIGHBIT)
+       break;
+      if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
+       break;
+      refmpn_lshift (D, D, (mp_size_t) 2, 1);
+      n++;
+      ASSERT (n <= GMP_NUMB_BITS);
+    }
+
+  while (n >= 0)
+    {
+      if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
+       ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
+      refmpn_rshift (D, D, (mp_size_t) 2, 1);
+      n--;
+    }
+
+  ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
+}
+
+
+
+/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
+   particular the trial quotient is allowed to be 2 too big. */
+mp_limb_t
+refmpn_sb_div_qr (mp_ptr qp,
+                 mp_ptr np, mp_size_t nsize,
+                 mp_srcptr dp, mp_size_t dsize)
+{
+  mp_limb_t  retval = 0;
+  mp_size_t  i;
+  mp_limb_t  d1 = dp[dsize-1];
+  mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
+
+  ASSERT (nsize >= dsize);
+  /* ASSERT (dsize > 2); */
+  ASSERT (dsize >= 2);
+  ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
+  ASSERT_MPN (np, nsize);
+  ASSERT_MPN (dp, dsize);
+
+  i = nsize-dsize;
+  if (refmpn_cmp (np+i, dp, dsize) >= 0)
+    {
+      ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
+      retval = 1;
+    }
+
+  for (i--; i >= 0; i--)
+    {
+      mp_limb_t  n0 = np[i+dsize];
+      mp_limb_t  n1 = np[i+dsize-1];
+      mp_limb_t  q, dummy_r;
+
+      ASSERT (n0 <= d1);
+      if (n0 == d1)
+       q = GMP_NUMB_MAX;
+      else
+       q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
+                              d1 << GMP_NAIL_BITS);
+
+      n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
+      ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
+      if (n0)
+       {
+         q--;
+         if (! refmpn_add_n (np+i, np+i, dp, dsize))
+           {
+             q--;
+             ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
+           }
+       }
+      np[i+dsize] = 0;
+
+      qp[i] = q;
+    }
+
+  /* remainder < divisor */
+#if 0          /* ASSERT triggers gcc 4.2.1 bug */
+  ASSERT (refmpn_cmp (np, dp, dsize) < 0);
+#endif
+
+  /* multiply back to original */
+  {
+    mp_ptr  mp = refmpn_malloc_limbs (nsize);
+
+    refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
+    if (retval)
+      ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
+    ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
+    ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
+
+    free (mp);
+  }
+
+  free (np_orig);
+  return retval;
+}
+
+/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
+   particular the trial quotient is allowed to be 2 too big. */
+void
+refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+               mp_ptr np, mp_size_t nsize,
+               mp_srcptr dp, mp_size_t dsize)
+{
+  ASSERT (qxn == 0);
+  ASSERT_MPN (np, nsize);
+  ASSERT_MPN (dp, dsize);
+  ASSERT (dsize > 0);
+  ASSERT (dp[dsize-1] != 0);
+
+  if (dsize == 1)
+    {
+      rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
+      return;
+    }
+  else
+    {
+      mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
+      mp_ptr  d2p = refmpn_malloc_limbs (dsize);
+      int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
+
+      n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
+      ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
+
+      refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
+      refmpn_rshift_or_copy (rp, n2p, dsize, norm);
+
+      /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
+      free (n2p);
+      free (d2p);
+    }
+}
+
+void
+refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+{
+  mp_size_t j;
+  mp_limb_t cy;
+
+  ASSERT_MPN (up, 2*n);
+  /* ASSERT about directed overlap rp, up */
+  /* ASSERT about overlap rp, mp */
+  /* ASSERT about overlap up, mp */
+
+  for (j = n - 1; j >= 0; j--)
+    {
+      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      up++;
+    }
+  cy = mpn_add_n (rp, up, up - n, n);
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
+}
+
+size_t
+refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
+{
+  unsigned char  *d;
+  size_t  dsize;
+
+  ASSERT (size >= 0);
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (mp_bases));
+  ASSERT (size == 0 || src[size-1] != 0);
+  ASSERT_MPN (src, size);
+
+  MPN_SIZEINBASE (dsize, src, size, base);
+  ASSERT (dsize >= 1);
+  ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB));
+
+  if (size == 0)
+    {
+      dst[0] = 0;
+      return 1;
+    }
+
+  /* don't clobber input for power of 2 bases */
+  if (POW2_P (base))
+    src = refmpn_memdup_limbs (src, size);
+
+  d = dst + dsize;
+  do
+    {
+      d--;
+      ASSERT (d >= dst);
+      *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
+      size -= (src[size-1] == 0);
+    }
+  while (size != 0);
+
+  /* Move result back and decrement dsize if we didn't generate
+     the maximum possible digits.  */
+  if (d != dst)
+    {
+      size_t i;
+      dsize -= d - dst;
+      for (i = 0; i < dsize; i++)
+       dst[i] = d[i];
+    }
+
+  if (POW2_P (base))
+    free (src);
+
+  return dsize;
+}
+
+
+mp_limb_t
+ref_bswap_limb (mp_limb_t src)
+{
+  mp_limb_t  dst;
+  int        i;
+
+  dst = 0;
+  for (i = 0; i < BYTES_PER_MP_LIMB; i++)
+    {
+      dst = (dst << 8) + (src & 0xFF);
+      src >>= 8;
+    }
+  return dst;
+}
+
+
+/* These random functions are mostly for transitional purposes while adding
+   nail support, since they're independent of the normal mpn routines.  They
+   can probably be removed when those normal routines are reliable, though
+   perhaps something independent would still be useful at times.  */
+
+#if GMP_LIMB_BITS == 32
+#define RAND_A  CNST_LIMB(0x29CF535)
+#endif
+#if GMP_LIMB_BITS == 64
+#define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
+#endif
+
+mp_limb_t  refmpn_random_seed;
+
+mp_limb_t
+refmpn_random_half (void)
+{
+  refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
+  return (refmpn_random_seed >> GMP_LIMB_BITS/2);
+}
+
+mp_limb_t
+refmpn_random_limb (void)
+{
+  return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
+          | refmpn_random_half ()) & GMP_NUMB_MASK;
+}
+
+void
+refmpn_random (mp_ptr ptr, mp_size_t size)
+{
+  mp_size_t  i;
+  if (GMP_NAIL_BITS == 0)
+    {
+      mpn_random (ptr, size);
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    ptr[i] = refmpn_random_limb ();
+}
+
+void
+refmpn_random2 (mp_ptr ptr, mp_size_t size)
+{
+  mp_size_t  i;
+  mp_limb_t  bit, mask, limb;
+  int        run;
+
+  if (GMP_NAIL_BITS == 0)
+    {
+      mpn_random2 (ptr, size);
+      return;
+    }
+
+#define RUN_MODULUS  32
+
+  /* start with ones at a random pos in the high limb */
+  bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
+  mask = 0;
+  run = 0;
+
+  for (i = size-1; i >= 0; i--)
+    {
+      limb = 0;
+      do
+       {
+         if (run == 0)
+           {
+             run = (refmpn_random_half () % RUN_MODULUS) + 1;
+             mask = ~mask;
+           }
+
+         limb |= (bit & mask);
+         bit >>= 1;
+         run--;
+       }
+      while (bit != 0);
+
+      ptr[i] = limb;
+      bit = GMP_NUMB_HIGHBIT;
+    }
+}
+
+/* This is a simple bitwise algorithm working high to low across "s" and
+   testing each time whether setting the bit would make s^2 exceed n.  */
+mp_size_t
+refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
+{
+  mp_ptr     tp, dp;
+  mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
+  unsigned   ibit;
+  long       i;
+  mp_limb_t  c;
+
+  ASSERT (nsize >= 0);
+
+  /* If n==0, then s=0 and r=0.  */
+  if (nsize == 0)
+    return 0;
+
+  ASSERT (np[nsize - 1] != 0);
+  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
+  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
+  ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
+
+  /* root */
+  ssize = (nsize+1)/2;
+  refmpn_zero (sp, ssize);
+
+  /* the remainder so far */
+  dp = refmpn_memdup_limbs (np, nsize);
+  dsize = nsize;
+
+  /* temporary */
+  talloc = 2*ssize + 1;
+  tp = refmpn_malloc_limbs (talloc);
+
+  for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
+    {
+      /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
+        is added to it */
+
+      ilimbs = (i+1) / GMP_NUMB_BITS;
+      ibit = (i+1) % GMP_NUMB_BITS;
+      refmpn_zero (tp, ilimbs);
+      c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
+      tsize = ilimbs + ssize;
+      tp[tsize] = c;
+      tsize += (c != 0);
+
+      ilimbs = (2*i) / GMP_NUMB_BITS;
+      ibit = (2*i) % GMP_NUMB_BITS;
+      if (ilimbs + 1 > tsize)
+       {
+         refmpn_zero_extend (tp, tsize, ilimbs + 1);
+         tsize = ilimbs + 1;
+       }
+      c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
+                       CNST_LIMB(1) << ibit);
+      ASSERT (tsize < talloc);
+      tp[tsize] = c;
+      tsize += (c != 0);
+
+      if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
+       {
+         /* set this bit in s and subtract from the remainder */
+         refmpn_setbit (sp, i);
+
+         ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
+         dsize = refmpn_normalize (dp, dsize);
+       }
+    }
+
+  if (rp == NULL)
+    {
+      ret = ! refmpn_zero_p (dp, dsize);
+    }
+  else
+    {
+      ASSERT (dsize == 0 || dp[dsize-1] != 0);
+      refmpn_copy (rp, dp, dsize);
+      ret = dsize;
+    }
+
+  free (dp);
+  free (tp);
+  return ret;
+}
diff --git a/tests/refmpq.c b/tests/refmpq.c

new file mode 100644 (file)

index 0000000..460a480
--- /dev/null
+++ b/tests/refmpq.c
@@ -0,0 +1,41 @@
+/* Reference rational routines.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+void
+refmpq_add (mpq_ptr w, mpq_srcptr x, mpq_srcptr y)
+{
+  mpz_mul    (mpq_numref(w), mpq_numref(x), mpq_denref(y));
+  mpz_addmul (mpq_numref(w), mpq_denref(x), mpq_numref(y));
+  mpz_mul    (mpq_denref(w), mpq_denref(x), mpq_denref(y));
+  mpq_canonicalize (w);
+}
+
+void
+refmpq_sub (mpq_ptr w, mpq_srcptr x, mpq_srcptr y)
+{
+  mpz_mul    (mpq_numref(w), mpq_numref(x), mpq_denref(y));
+  mpz_submul (mpq_numref(w), mpq_denref(x), mpq_numref(y));
+  mpz_mul    (mpq_denref(w), mpq_denref(x), mpq_denref(y));
+  mpq_canonicalize (w);
+}
diff --git a/tests/refmpz.c b/tests/refmpz.c

new file mode 100644 (file)

index 0000000..29e0a16
--- /dev/null
+++ b/tests/refmpz.c
@@ -0,0 +1,269 @@
+/* Reference mpz functions.
+
+Copyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* always do assertion checking */
+#define WANT_ASSERT  1
+
+#include <stdio.h>
+#include <stdlib.h> /* for free */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+/* FIXME: Shouldn't use plain mpz functions in a reference routine. */
+void
+refmpz_combit (mpz_ptr r, unsigned long bit)
+{
+  if (mpz_tstbit (r, bit))
+    mpz_clrbit (r, bit);
+  else
+    mpz_setbit (r, bit);
+}
+
+
+unsigned long
+refmpz_hamdist (mpz_srcptr x, mpz_srcptr y)
+{
+  mp_size_t      xsize, ysize, tsize;
+  mp_ptr         xp, yp;
+  unsigned long  ret;
+
+  if ((SIZ(x) < 0 && SIZ(y) >= 0)
+      || (SIZ(y) < 0 && SIZ(x) >= 0))
+    return ULONG_MAX;
+
+  xsize = ABSIZ(x);
+  ysize = ABSIZ(y);
+  tsize = MAX (xsize, ysize);
+
+  xp = refmpn_malloc_limbs (tsize);
+  refmpn_zero (xp, tsize);
+  refmpn_copy (xp, PTR(x), xsize);
+
+  yp = refmpn_malloc_limbs (tsize);
+  refmpn_zero (yp, tsize);
+  refmpn_copy (yp, PTR(y), ysize);
+
+  if (SIZ(x) < 0)
+    refmpn_neg (xp, xp, tsize);
+
+  if (SIZ(x) < 0)
+    refmpn_neg (yp, yp, tsize);
+
+  ret = refmpn_hamdist (xp, yp, tsize);
+
+  free (xp);
+  free (yp);
+  return ret;
+}
+
+
+/* (0/b), with mpz b; is 1 if b=+/-1, 0 otherwise */
+#define JACOBI_0Z(b)  JACOBI_0LS (PTR(b)[0], SIZ(b))
+
+/* (a/b) effect due to sign of b: mpz/mpz */
+#define JACOBI_BSGN_ZZ_BIT1(a, b)   JACOBI_BSGN_SS_BIT1 (SIZ(a), SIZ(b))
+
+/* (a/b) effect due to sign of a: mpz/unsigned-mpz, b odd;
+   is (-1/b) if a<0, or +1 if a>=0 */
+#define JACOBI_ASGN_ZZU_BIT1(a, b)  JACOBI_ASGN_SU_BIT1 (SIZ(a), PTR(b)[0])
+
+int
+refmpz_kronecker (mpz_srcptr a_orig, mpz_srcptr b_orig)
+{
+  unsigned long  twos;
+  mpz_t  a, b;
+  int    result_bit1 = 0;
+
+  if (mpz_sgn (b_orig) == 0)
+    return JACOBI_Z0 (a_orig);  /* (a/0) */
+
+  if (mpz_sgn (a_orig) == 0)
+    return JACOBI_0Z (b_orig);  /* (0/b) */
+
+  if (mpz_even_p (a_orig) && mpz_even_p (b_orig))
+    return 0;
+
+  if (mpz_cmp_ui (b_orig, 1) == 0)
+    return 1;
+
+  mpz_init_set (a, a_orig);
+  mpz_init_set (b, b_orig);
+
+  if (mpz_sgn (b) < 0)
+    {
+      result_bit1 ^= JACOBI_BSGN_ZZ_BIT1 (a, b);
+      mpz_neg (b, b);
+    }
+  if (mpz_even_p (b))
+    {
+      twos = mpz_scan1 (b, 0L);
+      mpz_tdiv_q_2exp (b, b, twos);
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(a)[0]);
+    }
+
+  if (mpz_sgn (a) < 0)
+    {
+      result_bit1 ^= JACOBI_N1B_BIT1 (PTR(b)[0]);
+      mpz_neg (a, a);
+    }
+  if (mpz_even_p (a))
+    {
+      twos = mpz_scan1 (a, 0L);
+      mpz_tdiv_q_2exp (a, a, twos);
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(b)[0]);
+    }
+
+  for (;;)
+    {
+      ASSERT (mpz_odd_p (a));
+      ASSERT (mpz_odd_p (b));
+      ASSERT (mpz_sgn (a) > 0);
+      ASSERT (mpz_sgn (b) > 0);
+
+      TRACE (printf ("top\n");
+            mpz_trace (" a", a);
+            mpz_trace (" b", b));
+
+      if (mpz_cmp (a, b) < 0)
+       {
+         TRACE (printf ("swap\n"));
+         mpz_swap (a, b);
+         result_bit1 ^= JACOBI_RECIP_UU_BIT1 (PTR(a)[0], PTR(b)[0]);
+       }
+
+      if (mpz_cmp_ui (b, 1) == 0)
+       break;
+
+      mpz_sub (a, a, b);
+      TRACE (printf ("sub\n");
+            mpz_trace (" a", a));
+      if (mpz_sgn (a) == 0)
+       goto zero;
+
+      twos = mpz_scan1 (a, 0L);
+      mpz_fdiv_q_2exp (a, a, twos);
+      TRACE (printf ("twos %lu\n", twos);
+            mpz_trace (" a", a));
+      result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(b)[0]);
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  return JACOBI_BIT1_TO_PN (result_bit1);
+
+ zero:
+  mpz_clear (a);
+  mpz_clear (b);
+  return 0;
+}
+
+/* Same as mpz_kronecker, but ignoring factors of 2 on b */
+int
+refmpz_jacobi (mpz_srcptr a, mpz_srcptr b)
+{
+  mpz_t  b_odd;
+  mpz_init_set (b_odd, b);
+  if (mpz_sgn (b_odd) != 0)
+    mpz_fdiv_q_2exp (b_odd, b_odd, mpz_scan1 (b_odd, 0L));
+  return refmpz_kronecker (a, b_odd);
+}
+
+int
+refmpz_legendre (mpz_srcptr a, mpz_srcptr b)
+{
+  return refmpz_jacobi (a, b);
+}
+
+
+int
+refmpz_kronecker_ui (mpz_srcptr a, unsigned long b)
+{
+  mpz_t  bz;
+  int    ret;
+  mpz_init_set_ui (bz, b);
+  ret = refmpz_kronecker (a, bz);
+  mpz_clear (bz);
+  return ret;
+}
+
+int
+refmpz_kronecker_si (mpz_srcptr a, long b)
+{
+  mpz_t  bz;
+  int    ret;
+  mpz_init_set_si (bz, b);
+  ret = refmpz_kronecker (a, bz);
+  mpz_clear (bz);
+  return ret;
+}
+
+int
+refmpz_ui_kronecker (unsigned long a, mpz_srcptr b)
+{
+  mpz_t  az;
+  int    ret;
+  mpz_init_set_ui (az, a);
+  ret = refmpz_kronecker (az, b);
+  mpz_clear (az);
+  return ret;
+}
+
+int
+refmpz_si_kronecker (long a, mpz_srcptr b)
+{
+  mpz_t  az;
+  int    ret;
+  mpz_init_set_si (az, a);
+  ret = refmpz_kronecker (az, b);
+  mpz_clear (az);
+  return ret;
+}
+
+
+void
+refmpz_pow_ui (mpz_ptr w, mpz_srcptr b, unsigned long e)
+{
+  mpz_t          s, t;
+  unsigned long  i;
+
+  mpz_init_set_ui (t, 1L);
+  mpz_init_set (s, b);
+
+  if ((e & 1) != 0)
+    mpz_mul (t, t, s);
+
+  for (i = 2; i <= e; i <<= 1)
+    {
+      mpz_mul (s, s, s);
+      if ((i & e) != 0)
+       mpz_mul (t, t, s);
+    }
+
+  mpz_set (w, t);
+
+  mpz_clear (s);
+  mpz_clear (t);
+}
diff --git a/tests/spinner.c b/tests/spinner.c

new file mode 100644 (file)

index 0000000..343ecb5
--- /dev/null
+++ b/tests/spinner.c
@@ -0,0 +1,129 @@
+/* A stupid little spinning wheel designed to make it look like useful work
+   is being done.
+
+Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>     /* for isatty */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+/* "alarm" is not available on mingw32, and the SIGALRM constant is not
+   defined.  Don't bother with a spinner in this case.  */
+#if ! HAVE_ALARM || ! defined (SIGALRM)
+#define alarm(n)          abort()
+#define signal(sig,func)  SIG_ERR
+#endif
+
+
+/* An application can update this to get a count printed with the spinner.
+   If left at 0, no count is printed. */
+
+unsigned long  spinner_count = 0;
+
+
+int  spinner_wanted = -1;  /* -1 uninitialized, 1 wanted, 0 not */
+int  spinner_tick = 1;     /* 1 ready to print, 0 not */
+
+
+/*ARGSUSED*/
+RETSIGTYPE
+spinner_signal (int signum)
+{
+  spinner_tick = 1;
+
+  if (signal (SIGALRM, spinner_signal) == SIG_ERR)
+    {
+      printf ("spinner_signal(): Oops, cannot reinstall SIGALRM\n");
+      abort ();
+    }
+  alarm (1);
+}
+
+
+/* Initialize the spinner.
+
+   This is done the first time spinner() is called, so an application
+   doesn't need to call this directly.
+
+   The spinner is only wanted if the output is a tty.  */
+
+#define SPINNER_WANTED_INIT() \
+  if (spinner_wanted < 0) spinner_init ()
+
+void
+spinner_init (void)
+{
+  spinner_wanted = isatty (fileno (stdout));
+  if (spinner_wanted == -1)
+    abort ();
+
+  if (!spinner_wanted)
+    return;
+
+  if (signal (SIGALRM, spinner_signal) == SIG_ERR)
+    {
+      printf ("(no spinner)\r");
+      spinner_tick = 0;
+      return;
+    }
+  alarm (1);
+
+  /* unbufferred output so the spinner will show up */
+  setbuf (stdout, NULL);
+}
+
+
+void
+spinner (void)
+{
+  static const char  data[] = { '|', '/', '-', '\\' };
+  static int         pos = 0;
+
+  char  buf[128];
+
+  SPINNER_WANTED_INIT ();
+
+  if (spinner_tick)
+    {
+      buf[0] = data[pos];
+      pos = (pos + 1) % numberof (data);
+      spinner_tick = 0;
+
+      if (spinner_count != 0)
+       {
+         sprintf (buf+1, " %lu\r", spinner_count);
+       }
+      else
+       {
+         buf[1] = '\r';
+         buf[2] = '\0';
+       }
+      fputs (buf, stdout);
+    }
+}
diff --git a/tests/t-bswap.c b/tests/t-bswap.c

new file mode 100644 (file)

index 0000000..814ddfc
--- /dev/null
+++ b/tests/t-bswap.c
@@ -0,0 +1,71 @@
+/* Test BSWAP_LIMB.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+int
+main (void)
+{
+  mp_limb_t  src, want, got;
+  int        i;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  for (i = 0; i < 1000; i++)
+    {
+      mpn_random (&src, (mp_size_t) 1);
+
+      want = ref_bswap_limb (src);
+
+      BSWAP_LIMB (got, src);
+      if (got != want)
+        {
+          printf ("BSWAP_LIMB wrong result\n");
+        error:
+          mpn_trace ("  src ", &src,  (mp_size_t) 1);
+          mpn_trace ("  want", &want, (mp_size_t) 1);
+          mpn_trace ("  got ", &got,  (mp_size_t) 1);
+          abort ();
+        }
+
+      BSWAP_LIMB_FETCH (got, &src);
+      if (got != want)
+        {
+          printf ("BSWAP_LIMB_FETCH wrong result\n");
+          goto error;
+        }
+
+      BSWAP_LIMB_STORE (&got, src);
+      if (got != want)
+        {
+          printf ("BSWAP_LIMB_STORE wrong result\n");
+          goto error;
+        }
+    }
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/t-constants.c b/tests/t-constants.c

new file mode 100644 (file)

index 0000000..1c267e6
--- /dev/null
+++ b/tests/t-constants.c
@@ -0,0 +1,352 @@
+/* Check the values of some constants.
+
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "tests.h"
+
+
+#ifdef ULONG_MAX
+char *ulong_max_def = "defined";
+#else
+char *ulong_max_def = "not defined";
+#endif
+#ifdef LONG_MAX
+char *long_max_def = "defined";
+#else
+char *long_max_def = "not defined";
+#endif
+
+#ifdef UINT_MAX
+char *uint_max_def = "defined";
+#else
+char *uint_max_def = "not defined";
+#endif
+#ifdef INT_MAX
+char *int_max_def = "defined";
+#else
+char *int_max_def = "not defined";
+#endif
+
+#ifdef USHRT_MAX
+char *ushrt_max_def = "defined";
+#else
+char *ushrt_max_def = "not defined";
+#endif
+#ifdef SHRT_MAX
+char *shrt_max_def = "defined";
+#else
+char *shrt_max_def = "not defined";
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#ifdef _LONG_LONG_LIMB
+#define LL(l,ll)  ll
+#else
+#define LL(l,ll)  l
+#endif
+
+#if __GMP_MP_SIZE_T_INT
+#define SS(i,l)   i
+#else
+#define SS(i,l)   l
+#endif
+
+
+#define CHECK_LIMB_S(x, xname, y, yname)                \
+  do {                                                  \
+    if ((x) != (y))                                     \
+      {                                                 \
+        printf (LL("%s == %lx, but %s == %lx\n",        \
+                   "%s == %llx, but %s == %llx\n"),     \
+                xname, x, yname, y);                    \
+        error = 1;                                      \
+      }                                                 \
+  } while (0)
+
+#define CHECK_INT_S(x, xname, y, yname)                                 \
+  do {                                                                  \
+    if ((x) != (y))                                                     \
+      {                                                                 \
+        printf ("%s == %d, but %s == %d\n", xname, x, yname, y);        \
+        error = 1;                                                      \
+      }                                                                 \
+  } while (0)
+
+
+
+#define CHECK_CONDITION_S(x, xname)             \
+  do {                                          \
+    if (!(x))                                   \
+      {                                         \
+        printf ("%s is false\n", xname);        \
+        error = 1;                              \
+      }                                         \
+  } while (0)
+
+
+/* How many bits seem to work in the given type. */
+#define CALC_BITS(result, type) \
+  do {                          \
+    type  n = 1;                \
+    result = 0;                 \
+    while (n != 0)              \
+      {                         \
+        n <<= 1;                \
+        result++;               \
+      }                         \
+  } while (0)
+
+#define CHECK_BITS_S(constant, constant_name, type)     \
+  do {                                                  \
+    int   calculated;                                   \
+    CALC_BITS (calculated, type);                       \
+    if (calculated != constant)                         \
+      {                                                 \
+        printf ("%s == %d, but calculated %d\n",        \
+                constant_name, constant, calculated);   \
+        error = 1;                                      \
+      }                                                 \
+  } while (0)
+
+
+#define CHECK_HIGHBIT_S(value, value_name, type, format)        \
+  do {                                                          \
+    type  n = value;                                            \
+    if (n == 0)                                                 \
+      {                                                         \
+        printf ("%s == 0\n", value_name);                       \
+        error = 1;                                              \
+      }                                                         \
+    n <<= 1;                                                    \
+    if (n != 0)                                                 \
+      {                                                         \
+        printf ("%s << 1 = ", value_name);                      \
+        printf (format, n);                                     \
+        printf (" != 0\n");                                     \
+        error = 1;                                              \
+      }                                                         \
+  } while (0)
+
+
+#define CHECK_MAX_S(max_val, max_name, min_val, min_name, type, format) \
+  do {                                                                  \
+    type  maxval = max_val;                                             \
+    type  minval = min_val;                                             \
+    type  n = maxval;                                                   \
+    n++;                                                                \
+    if (n != minval)                                                    \
+      {                                                                 \
+        printf ("%s + 1 = ", max_name);                                 \
+        printf (format, n);                                             \
+        printf (" != %s = ", min_name);                                 \
+        printf (format, minval);                                        \
+        printf ("\n");                                                  \
+        error = 1;                                                      \
+      }                                                                 \
+    if (maxval <= minval)                                               \
+      {                                                                 \
+        printf ("%s = ", max_name);                                     \
+        printf (format, maxval);                                        \
+        printf (" <= %s = ", min_name);                                 \
+        printf (format, minval);                                        \
+        printf ("\n");                                                  \
+        error = 1;                                                      \
+      }                                                                 \
+  } while (0)
+
+
+#if HAVE_STRINGIZE
+#define CHECK_LIMB(x,y)      CHECK_LIMB_S (x, #x, y, #y)
+#define CHECK_INT(x,y)       CHECK_INT_S (x, #x, y, #y)
+#define CHECK_CONDITION(x)   CHECK_CONDITION_S (x, #x)
+#define CHECK_BITS(c,t)      CHECK_BITS_S (c, #c, t)
+#define CHECK_MAX(m,n,t,f)   CHECK_MAX_S (m, #m, n, #n, t, f)
+#define CHECK_HIGHBIT(n,t,f) CHECK_HIGHBIT_S (n, #n, t, f)
+#else
+#define CHECK_LIMB(x,y)      CHECK_LIMB_S (x, "x", y, "y")
+#define CHECK_INT(x,y)       CHECK_INT_S (x, "x", y, "y")
+#define CHECK_CONDITION(x)   CHECK_CONDITION_S (x, "x")
+#define CHECK_BITS(c,t)      CHECK_BITS_S (c, "c", t)
+#define CHECK_MAX(m,n,t,f)   CHECK_MAX_S (m, "m", n, "n", t, f)
+#define CHECK_HIGHBIT(n,t,f) CHECK_HIGHBIT_S (n, "n", t, f)
+#endif
+
+
+/* The tests below marked "Bad!" fail on Cray T90 systems, where int, short
+   and mp_size_t are 48 bits or some such but don't wraparound in a plain
+   twos complement fashion.  In particular,
+
+       INT_HIGHBIT << 1 = 0xFFFFC00000000000 != 0
+       INT_MAX + 1 = 35184372088832 != INT_MIN = -35184372088832
+
+   This is a bit bizarre, but doesn't matter because GMP doesn't rely on any
+   particular overflow behaviour for int or short, only for mp_limb_t.  */
+
+int
+main (int argc, char *argv[])
+{
+  int  error = 0;
+
+  CHECK_INT (BYTES_PER_MP_LIMB, (int) sizeof(mp_limb_t));
+  CHECK_INT (mp_bits_per_limb, GMP_LIMB_BITS);
+
+  CHECK_BITS (GMP_LIMB_BITS, mp_limb_t);
+  CHECK_BITS (BITS_PER_ULONG, unsigned long);
+
+  CHECK_HIGHBIT (GMP_LIMB_HIGHBIT, mp_limb_t,      LL("0x%lX","0x%llX"));
+  CHECK_HIGHBIT (ULONG_HIGHBIT,     unsigned long,  "0x%lX");
+  CHECK_HIGHBIT (UINT_HIGHBIT,      unsigned int,   "0x%X");
+  CHECK_HIGHBIT (USHRT_HIGHBIT,     unsigned short, "0x%hX");
+  CHECK_HIGHBIT (LONG_HIGHBIT,      long,           "0x%lX");
+#if 0 /* Bad! */
+  CHECK_HIGHBIT (INT_HIGHBIT,       int,            "0x%X");
+  CHECK_HIGHBIT (SHRT_HIGHBIT,      short,          "0x%hX");
+#endif
+
+#if 0 /* Bad! */
+  CHECK_MAX (LONG_MAX,      LONG_MIN,      long,           "%ld");
+  CHECK_MAX (INT_MAX,       INT_MIN,       int,            "%d");
+  CHECK_MAX (SHRT_MAX,      SHRT_MIN,      short,          "%hd");
+#endif
+  CHECK_MAX (ULONG_MAX,     0,             unsigned long,  "%lu");
+  CHECK_MAX (UINT_MAX,      0,             unsigned int,   "%u");
+  CHECK_MAX (USHRT_MAX,     0,             unsigned short, "%hu");
+#if 0 /* Bad! */
+  CHECK_MAX (MP_SIZE_T_MAX, MP_SIZE_T_MIN, mp_size_t,      SS("%d","%ld"));
+#endif
+
+  /* UHWtype should have at least enough bits for half a UWtype */
+  {
+    int  bits_per_UWtype, bits_per_UHWtype;
+    CALC_BITS (bits_per_UWtype,  UWtype);
+    CALC_BITS (bits_per_UHWtype, UHWtype);
+    CHECK_CONDITION (2*bits_per_UHWtype >= bits_per_UWtype);
+  }
+
+  ASSERT_ALWAYS_LIMB (MODLIMB_INVERSE_3);
+  {
+    mp_limb_t  modlimb_inverse_3_calc;
+    binvert_limb (modlimb_inverse_3_calc, CNST_LIMB(3));
+    ASSERT_ALWAYS_LIMB (modlimb_inverse_3_calc);
+    CHECK_LIMB (MODLIMB_INVERSE_3, modlimb_inverse_3_calc);
+  }
+  {
+    mp_limb_t  MODLIMB_INVERSE_3_times_3
+      = (MODLIMB_INVERSE_3 * CNST_LIMB(3)) & GMP_NUMB_MASK;
+    CHECK_LIMB (MODLIMB_INVERSE_3_times_3, CNST_LIMB(1));
+  }
+
+  {
+    mp_limb_t  hi, lo;
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_MAX_DIV3-1,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi < 1))
+      {
+        printf ("GMP_NUMB_CEIL_MAX_DIV3 too big\n");
+        error = 1;
+      }
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_MAX_DIV3,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi >= 1))
+      {
+        printf ("GMP_NUMB_CEIL_MAX_DIV3 too small\n");
+        error = 1;
+      }
+  }
+
+  {
+    mp_limb_t  hi, lo;
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_2MAX_DIV3-1,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi < 2))
+      {
+        printf ("GMP_NUMB_CEIL_2MAX_DIV3 too big\n");
+        error = 1;
+      }
+    hi = refmpn_umul_ppmm (&lo, GMP_NUMB_CEIL_2MAX_DIV3,
+                           CNST_LIMB(3) << GMP_NAIL_BITS);
+    if (! (hi >= 2))
+      {
+        printf ("GMP_NUMB_CEIL_2MAX_DIV3 too small\n");
+        error = 1;
+      }
+  }
+
+#ifdef PP_INVERTED
+  {
+    mp_limb_t  pp_inverted_calc;
+    invert_limb (pp_inverted_calc, PP);
+    CHECK_LIMB (PP_INVERTED, pp_inverted_calc);
+  }
+#endif
+
+  if (argc >= 2 || error)
+    {
+      int  bits;
+
+      printf ("\n");
+      printf ("After gmp.h,\n");
+      printf ("  ULONG_MAX  %s\n", ulong_max_def);
+      printf ("  LONG_MAX   %s\n", long_max_def);
+      printf ("  UINT_MAX   %s\n", uint_max_def);
+      printf ("  INT_MAX    %s\n", int_max_def);
+      printf ("  USHRT_MAX  %s\n", ushrt_max_def);
+      printf ("  SHRT_MAX   %s\n", shrt_max_def);
+      printf ("\n");
+
+#ifdef _CRAY
+      printf ("_CRAY is defined, so limits.h is being used\n");
+#endif
+
+      printf ("ULONG_MAX      %lX\n", ULONG_MAX);
+      printf ("ULONG_HIGHBIT  %lX\n", ULONG_HIGHBIT);
+      printf ("LONG_MAX       %lX\n", LONG_MAX);
+      printf ("LONG_MIN       %lX\n", LONG_MIN);
+
+      printf ("UINT_MAX       %X\n", UINT_MAX);
+      printf ("UINT_HIGHBIT   %X\n", UINT_HIGHBIT);
+      printf ("INT_MAX        %X\n", INT_MAX);
+      printf ("INT_MIN        %X\n", INT_MIN);
+
+      printf ("USHRT_MAX      %hX\n", USHRT_MAX);
+      printf ("USHRT_HIGHBIT  %hX\n", USHRT_HIGHBIT);
+      printf ("SHRT_MAX       %hX\n", SHRT_MAX);
+      printf ("SHRT_MIN       %hX\n", SHRT_MIN);
+
+      printf ("\n");
+      printf ("Bits\n");
+      CALC_BITS (bits, long);           printf ("  long           %d\n", bits);
+      CALC_BITS (bits, int);            printf ("  int            %d\n", bits);
+      CALC_BITS (bits, short);          printf ("  short          %d\n", bits);
+      CALC_BITS (bits, unsigned long);  printf ("  unsigned long  %d\n", bits);
+      CALC_BITS (bits, unsigned int);   printf ("  unsigned int   %d\n", bits);
+      CALC_BITS (bits, unsigned short); printf ("  unsigned short %d\n", bits);
+      CALC_BITS (bits, mp_size_t);      printf ("  mp_size_t      %d\n", bits);
+    }
+
+  if (error)
+    abort ();
+
+  exit (0);
+}
diff --git a/tests/t-count_zeros.c b/tests/t-count_zeros.c

new file mode 100644 (file)

index 0000000..8dc34eb
--- /dev/null
+++ b/tests/t-count_zeros.c
@@ -0,0 +1,87 @@
+/* Test count_leading_zeros and count_trailing_zeros.
+
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+void
+check_clz (int want, mp_limb_t n)
+{
+  int  got;
+  count_leading_zeros (got, n);
+  if (got != want)
+    {
+      printf        ("count_leading_zeros wrong\n");
+      mp_limb_trace ("  n    ", n);
+      printf        ("  want %d\n", want);
+      printf        ("  got  %d\n", got);
+      abort ();
+    }
+}
+
+void
+check_ctz (int want, mp_limb_t n)
+{
+  int  got;
+  count_trailing_zeros (got, n);
+  if (got != want)
+    {
+      printf ("count_trailing_zeros wrong\n");
+      mpn_trace ("  n    ", &n, (mp_size_t) 1);
+      printf    ("  want %d\n", want);
+      printf    ("  got  %d\n", got);
+      abort ();
+    }
+}
+
+void
+check_various (void)
+{
+  int        i;
+
+#ifdef COUNT_LEADING_ZEROS_0
+  check_clz (COUNT_LEADING_ZEROS_0, CNST_LIMB(0));
+#endif
+
+  for (i=0; i < GMP_LIMB_BITS; i++)
+    {
+      check_clz (i, CNST_LIMB(1) << (GMP_LIMB_BITS-1-i));
+      check_ctz (i, CNST_LIMB(1) << i);
+
+      check_ctz (i, MP_LIMB_T_MAX << i);
+      check_clz (i, MP_LIMB_T_MAX >> i);
+    }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/t-gmpmax.c b/tests/t-gmpmax.c

new file mode 100644 (file)

index 0000000..efe166f
--- /dev/null
+++ b/tests/t-gmpmax.c
@@ -0,0 +1,73 @@
+/* Check the values of __GMP_UINT_MAX etc.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "gmp.h"
+
+
+/* __GMP_UINT_MAX etc are generated with expressions in gmp.h since we don't
+   want to demand <limits.h> or forcibly include it.  Check the expressions
+   come out the same as <limits.h>.  */
+
+int
+main (int argc, char *argv[])
+{
+  int  error = 0;
+
+#ifdef UINT_MAX
+  if (__GMP_UINT_MAX != UINT_MAX)
+    {
+      printf ("__GMP_UINT_MAX incorrect\n");
+      printf ("  __GMP_UINT_MAX  %u  0x%X\n", __GMP_UINT_MAX, __GMP_UINT_MAX);
+      printf ("  UINT_MAX        %u  0x%X\n", UINT_MAX, UINT_MAX);
+      error = 1;
+    }
+#endif
+
+  /* gcc 2.95.2 limits.h on solaris 2.5.1 incorrectly selects a 64-bit
+     LONG_MAX, leading to some integer overflow in ULONG_MAX and a spurious
+     __GMP_ULONG_MAX != ULONG_MAX.  Casting ULONG_MAX to unsigned long is a
+     workaround.  */
+#ifdef ULONG_MAX
+  if (__GMP_ULONG_MAX != (unsigned long) ULONG_MAX)
+    {
+      printf ("__GMP_ULONG_MAX incorrect\n");
+      printf ("  __GMP_ULONG_MAX  %lu  0x%lX\n", __GMP_ULONG_MAX, __GMP_ULONG_MAX);
+      printf ("  ULONG_MAX        %lu  0x%lX\n", ULONG_MAX, ULONG_MAX);
+      error = 1;
+    }
+#endif
+
+#ifdef USHRT_MAX
+  if (__GMP_USHRT_MAX != USHRT_MAX)
+    {
+      printf ("__GMP_USHRT_MAX incorrect\n");
+      printf ("  __GMP_USHRT_MAX  %hu  0x%hX\n", __GMP_USHRT_MAX, __GMP_USHRT_MAX);
+      printf ("  USHRT_MAX        %hu  0x%hX\n", USHRT_MAX, USHRT_MAX);
+      error = 1;
+    }
+#endif
+
+  if (error)
+    abort ();
+
+  exit (0);
+}
diff --git a/tests/t-hightomask.c b/tests/t-hightomask.c

new file mode 100644 (file)

index 0000000..9e6c109
--- /dev/null
+++ b/tests/t-hightomask.c
@@ -0,0 +1,43 @@
+/* Test LIMB_HIGHBIT_TO_MASK.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* There's very little to these tests, but it's nice to have them since if
+   something has gone wrong with the arithmetic right shift business in
+   LIMB_HIGHBIT_TO_MASK then the only symptom is likely to be failures in
+   udiv_qrnnd_preinv, which would not be easy to diagnose.  */
+
+int
+main (void)
+{
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (0) == 0);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (GMP_LIMB_HIGHBIT) == MP_LIMB_T_MAX);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (MP_LIMB_T_MAX) == MP_LIMB_T_MAX);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (GMP_LIMB_HIGHBIT >> 1) == 0);
+  ASSERT_ALWAYS (LIMB_HIGHBIT_TO_MASK (MP_LIMB_T_MAX >> 1) == 0);
+
+  exit (0);
+}
diff --git a/tests/t-modlinv.c b/tests/t-modlinv.c

new file mode 100644 (file)

index 0000000..2baf6c7
--- /dev/null
+++ b/tests/t-modlinv.c
@@ -0,0 +1,84 @@
+/* Test binvert_limb.
+
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+void
+one (mp_limb_t n)
+{
+  mp_limb_t  inv, prod;
+
+  binvert_limb (inv, n);
+  prod = (inv * n) & GMP_NUMB_MASK;
+  if (prod != 1)
+    {
+      printf ("binvert_limb wrong\n");
+      mp_limb_trace ("  n       ", n);
+      mp_limb_trace ("  got     ", inv);
+      mp_limb_trace ("  product ", prod);
+      abort ();
+    }
+}
+
+void
+some (void)
+{
+  int  i;
+  for (i = 0; i < 10000; i++)
+    one (refmpn_random_limb () | 1);
+}
+
+void
+all (void)
+{
+  mp_limb_t  n;
+
+  n = 1;
+  do {
+    one (n);
+    n += 2;
+  } while (n != 1);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  if (argc >= 2 && strcmp (argv[1], "-a") == 0)
+    {
+      /* it's feasible to run all values on a 32-bit limb, but not a 64-bit */
+      all ();
+    }
+  else
+    {
+      some ();
+    }
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/t-parity.c b/tests/t-parity.c

new file mode 100644 (file)

index 0000000..922dd51
--- /dev/null
+++ b/tests/t-parity.c
@@ -0,0 +1,67 @@
+/* Test ULONG_PARITY.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void
+check_one (int want, unsigned long n)
+{
+  int  got;
+  ULONG_PARITY (got, n);
+  if (got != want)
+    {
+      printf ("ULONG_PARITY wrong\n");
+      printf ("  n    %lX\n", n);
+      printf ("  want %d\n", want);
+      printf ("  got  %d\n", got);
+      abort ();
+    }
+}
+
+void
+check_various (void)
+{
+  int  i;
+
+  check_one (0, 0L);
+  check_one (BITS_PER_ULONG & 1, ULONG_MAX);
+  check_one (0, 0x11L);
+  check_one (1, 0x111L);
+  check_one (1, 0x3111L);
+
+  for (i = 0; i < BITS_PER_ULONG; i++)
+    check_one (1, 1L << i);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+  mp_trace_base = 16;
+
+  check_various ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/t-popc.c b/tests/t-popc.c

new file mode 100644 (file)

index 0000000..7d0f4e0
--- /dev/null
+++ b/tests/t-popc.c
@@ -0,0 +1,66 @@
+/* Test popc_limb.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+int
+main (void)
+{
+  mp_limb_t  src, want, got;
+  int        i;
+
+  tests_start ();
+  mp_trace_base = -16;
+
+  for (i = 0; i < GMP_LIMB_BITS; i++)
+    {
+      src = CNST_LIMB(1) << i;
+      want = 1;
+
+      popc_limb (got, src);
+      if (got != want)
+        {
+        error:
+          printf ("popc_limb wrong result\n");
+          mpn_trace ("  src ", &src,  (mp_size_t) 1);
+          mpn_trace ("  want", &want, (mp_size_t) 1);
+          mpn_trace ("  got ", &got,  (mp_size_t) 1);
+          abort ();
+        }
+    }
+
+  for (i = 0; i < 100; i++)
+    {
+      mpn_random2 (&src, (mp_size_t) 1);
+      want = ref_popc_limb (src);
+
+      popc_limb (got, src);
+      if (got != want)
+        goto error;
+    }
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/t-sub.c b/tests/t-sub.c

new file mode 100644 (file)

index 0000000..5eb78c9
--- /dev/null
+++ b/tests/t-sub.c
@@ -0,0 +1,115 @@
+/* Test sub_ddmmss.
+
+Copyright 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+
+void
+check_data (void)
+{
+#define M  MP_LIMB_T_MAX
+
+  static const struct {
+    mp_limb_t  want_dh,want_dl, mh,ml, sh,sl;
+  } data[] = {
+    { 0,0,  0,0,  0,0 },
+    { 0,0,  0,1,  0,1 },
+    { 0,0,  1,2,  1,2 },
+
+    { 0,1,  0,2,  0,1 },
+    { 0,M,  1,0,  0,1 },
+    { M,M,  0,0,  0,1 },
+
+    { M,M,  0,M-1,  0,M },
+    { 0,0,  0,M-1,  0,M-1 },
+    { 0,1,  0,M-1,  0,M-2 },
+  };
+  int  i;
+  mp_limb_t  got_dh, got_dl;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      sub_ddmmss (got_dh,got_dl, data[i].mh,data[i].ml, data[i].sh,data[i].sl);
+      if (got_dh != data[i].want_dh || got_dl != data[i].want_dl)
+        {
+          printf ("check_data wrong at data[%d]\n", i);
+          mp_limb_trace ("  mh", data[i].mh);
+          mp_limb_trace ("  ml", data[i].ml);
+          mp_limb_trace ("  sh", data[i].sh);
+          mp_limb_trace ("  sl", data[i].sl);
+          mp_limb_trace ("  want dh", data[i].want_dh);
+          mp_limb_trace ("  want dl", data[i].want_dl);
+          mp_limb_trace ("  got dh ", got_dh);
+          mp_limb_trace ("  got dl ", got_dl);
+          abort ();
+        }
+    }
+}
+
+void
+check_random (void)
+{
+  mp_limb_t  want_dh,want_dl, got_dh,got_dl, mh,ml, sh,sl;
+  int  i;
+
+  for (i = 0; i < 20; i++)
+    {
+      mh = urandom ();
+      ml = urandom ();
+      sh = urandom ();
+      sl = urandom ();
+
+      refmpn_sub_ddmmss (&want_dh,&want_dl, mh,ml, sh,sl);
+
+      sub_ddmmss (got_dh,got_dl, mh,ml, sh,sl);
+
+      if (got_dh != want_dh || got_dl != want_dl)
+        {
+          printf ("check_data wrong at data[%d]\n", i);
+          mp_limb_trace ("  mh", mh);
+          mp_limb_trace ("  ml", ml);
+          mp_limb_trace ("  sh", sh);
+          mp_limb_trace ("  sl", sl);
+          mp_limb_trace ("  want dh", want_dh);
+          mp_limb_trace ("  want dl", want_dl);
+          mp_limb_trace ("  got dh ", got_dh);
+          mp_limb_trace ("  got dl ", got_dl);
+          abort ();
+        }
+    }
+}
+
+int
+main (void)
+{
+  tests_start ();
+  mp_trace_base = -16;
+
+  check_data ();
+  check_random ();
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/tests.h b/tests/tests.h

new file mode 100644 (file)

index 0000000..b12b3d4
--- /dev/null
+++ b/tests/tests.h
@@ -0,0 +1,493 @@
+/* Tests support prototypes etc.
+
+Copyright 2000, 2001, 2002, 2003, 2004, 2008, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#ifndef __TESTS_H__
+#define __TESTS_H__
+
+#include "config.h"
+
+#include <setjmp.h>  /* for jmp_buf */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+#ifdef __cplusplus
+#define ANYARGS  ...
+#else
+#define ANYARGS
+#endif
+
+
+void tests_start __GMP_PROTO ((void));
+void tests_end __GMP_PROTO ((void));
+
+void tests_memory_start __GMP_PROTO ((void));
+void tests_memory_end __GMP_PROTO ((void));
+void *tests_allocate __GMP_PROTO ((size_t size));
+void *tests_reallocate __GMP_PROTO ((void *ptr, size_t old_size, size_t new_size));
+void tests_free __GMP_PROTO ((void *ptr, size_t size));
+void tests_free_nosize __GMP_PROTO ((void *ptr));
+int tests_memory_valid __GMP_PROTO ((void *ptr));
+
+void tests_rand_start __GMP_PROTO ((void));
+void tests_rand_end __GMP_PROTO ((void));
+
+double tests_infinity_d __GMP_PROTO (());
+int tests_hardware_getround __GMP_PROTO ((void));
+int tests_hardware_setround __GMP_PROTO ((int));
+int tests_isinf __GMP_PROTO ((double));
+int tests_dbl_mant_bits __GMP_PROTO ((void));
+
+void x86_fldcw __GMP_PROTO ((unsigned short));
+unsigned short x86_fstcw __GMP_PROTO ((void));
+
+
+/* tests_setjmp_sigfpe is like a setjmp, establishing a trap for SIGFPE.
+   The initial return is 0, if SIGFPE is trapped execution goes back there
+   with return value 1.
+
+   tests_sigfpe_done puts SIGFPE back to SIG_DFL, which should be used once
+   the setjmp point is out of scope, so a later SIGFPE won't try to go back
+   there.  */
+
+#define tests_setjmp_sigfpe()                   \
+  (signal (SIGFPE, tests_sigfpe_handler),       \
+   setjmp (tests_sigfpe_target))
+
+RETSIGTYPE tests_sigfpe_handler __GMP_PROTO ((int));
+void tests_sigfpe_done __GMP_PROTO ((void));
+extern jmp_buf  tests_sigfpe_target;
+
+
+#if HAVE_CALLING_CONVENTIONS
+extern mp_limb_t (*calling_conventions_function) __GMP_PROTO ((ANYARGS));
+mp_limb_t calling_conventions __GMP_PROTO ((ANYARGS));
+int calling_conventions_check __GMP_PROTO ((void));
+#define CALLING_CONVENTIONS(function) \
+  (calling_conventions_function = (function), calling_conventions)
+#define CALLING_CONVENTIONS_CHECK()    (calling_conventions_check())
+#else
+#define CALLING_CONVENTIONS(function)  (function)
+#define CALLING_CONVENTIONS_CHECK()    1 /* always ok */
+#endif
+
+
+extern int mp_trace_base;
+void mp_limb_trace __GMP_PROTO ((const char *, mp_limb_t));
+void mpn_trace __GMP_PROTO ((const char *name, mp_srcptr ptr, mp_size_t size));
+void mpn_tracea __GMP_PROTO ((const char *name, const mp_ptr *a, int count,
+                 mp_size_t size));
+void mpn_tracen __GMP_PROTO ((const char *name, int num, mp_srcptr ptr,
+                 mp_size_t size));
+void mpn_trace_file __GMP_PROTO ((const char *filename,
+                             mp_srcptr ptr, mp_size_t size));
+void mpn_tracea_file __GMP_PROTO ((const char *filename,
+                              const mp_ptr *a, int count, mp_size_t size));
+void mpf_trace __GMP_PROTO ((const char *name, mpf_srcptr z));
+void mpq_trace __GMP_PROTO ((const char *name, mpq_srcptr q));
+void mpz_trace __GMP_PROTO ((const char *name, mpz_srcptr z));
+void mpz_tracen __GMP_PROTO ((const char *name, int num, mpz_srcptr z));
+void byte_trace __GMP_PROTO ((const char *, const void *, mp_size_t));
+void byte_tracen __GMP_PROTO ((const char *, int, const void *, mp_size_t));
+void d_trace __GMP_PROTO ((const char *, double));
+
+
+void spinner __GMP_PROTO ((void));
+extern unsigned long  spinner_count;
+extern int  spinner_wanted;
+extern int  spinner_tick;
+
+
+void *align_pointer __GMP_PROTO ((void *p, size_t align));
+void *__gmp_allocate_func_aligned __GMP_PROTO ((size_t bytes, size_t align));
+void *__gmp_allocate_or_reallocate __GMP_PROTO ((void *ptr,
+                                          size_t oldsize, size_t newsize));
+char *__gmp_allocate_strdup __GMP_PROTO ((const char *s));
+char *strtoupper __GMP_PROTO ((char *s_orig));
+mp_limb_t urandom __GMP_PROTO ((void));
+void call_rand_algs __GMP_PROTO ((void (*func) (const char *, gmp_randstate_t)));
+
+
+void mpf_set_str_or_abort __GMP_PROTO ((mpf_ptr f, const char *str, int base));
+
+
+void mpq_set_str_or_abort __GMP_PROTO ((mpq_ptr q, const char *str, int base));
+
+
+void mpz_erandomb __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
+                           unsigned long nbits));
+void mpz_erandomb_nonzero __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
+                                   unsigned long nbits));
+void mpz_errandomb __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
+                            unsigned long nbits));
+void mpz_errandomb_nonzero __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
+                                    unsigned long nbits));
+void mpz_init_set_n __GMP_PROTO ((mpz_ptr z, mp_srcptr p, mp_size_t size));
+void mpz_negrandom __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate));
+int mpz_pow2abs_p __GMP_PROTO ((mpz_srcptr z)) __GMP_ATTRIBUTE_PURE;
+void mpz_set_n __GMP_PROTO ((mpz_ptr z, mp_srcptr p, mp_size_t size));
+void mpz_set_str_or_abort __GMP_PROTO ((mpz_ptr z, const char *str, int base));
+
+mp_size_t mpn_diff_highest __GMP_PROTO ((mp_srcptr p1, mp_srcptr p2, mp_size_t n)) __GMP_ATTRIBUTE_PURE;
+mp_size_t mpn_diff_lowest __GMP_PROTO ((mp_srcptr p1, mp_srcptr p2, mp_size_t n)) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_highest __GMP_PROTO ((const void *p1, const void *p2, mp_size_t size)) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_lowest __GMP_PROTO ((const void *p1, const void *p2, mp_size_t size)) __GMP_ATTRIBUTE_PURE;
+
+
+mp_limb_t ref_addc_limb __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
+mp_limb_t ref_bswap_limb __GMP_PROTO ((mp_limb_t src));
+unsigned long ref_popc_limb __GMP_PROTO ((mp_limb_t src));
+mp_limb_t ref_subc_limb __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
+
+
+void refmpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+void refmpf_add_ulp __GMP_PROTO ((mpf_ptr f));
+void refmpf_fill __GMP_PROTO ((mpf_ptr f, mp_size_t size, mp_limb_t value));
+void refmpf_normalize __GMP_PROTO ((mpf_ptr f));
+void refmpf_set_prec_limbs __GMP_PROTO ((mpf_ptr f, unsigned long prec));
+unsigned long refmpf_set_overlap __GMP_PROTO ((mpf_ptr dst, mpf_srcptr src));
+void refmpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+int refmpf_validate __GMP_PROTO ((const char *name, mpf_srcptr got, mpf_srcptr want));
+int refmpf_validate_division __GMP_PROTO ((const char *name, mpf_srcptr got,
+                                           mpf_srcptr n, mpf_srcptr d));
+
+
+mp_limb_t refmpn_add __GMP_PROTO ((mp_ptr rp,
+                              mp_srcptr s1p, mp_size_t s1size,
+                              mp_srcptr s2p, mp_size_t s2size));
+mp_limb_t refmpn_add_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                                mp_limb_t n));
+mp_limb_t refmpn_add_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_add_nc __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                 mp_size_t size, mp_limb_t carry));
+mp_limb_t refmpn_addlsh1_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_addlsh2_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_addlsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size, unsigned int));
+mp_limb_t refmpn_addmul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                   mp_limb_t multiplier));
+mp_limb_t refmpn_addmul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                    mp_limb_t multiplier, mp_limb_t carry));
+mp_limb_t refmpn_addmul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addmul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addmul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addmul_5 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addmul_6 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addmul_7 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addmul_8 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
+                                        mp_size_t size, mp_srcptr mult));
+
+mp_limb_t refmpn_add_n_sub_n __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
+                                   mp_srcptr s1p, mp_srcptr s2p,
+                                   mp_size_t size));
+mp_limb_t refmpn_add_n_sub_nc __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
+                                    mp_srcptr s1p, mp_srcptr s2p,
+                                    mp_size_t size, mp_limb_t carry));
+
+void refmpn_and_n  __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+void refmpn_andn_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+
+mp_limb_t refmpn_big_base __GMP_PROTO ((int));
+
+int refmpn_chars_per_limb __GMP_PROTO ((int));
+void refmpn_clrbit __GMP_PROTO ((mp_ptr, unsigned long));
+int refmpn_cmp __GMP_PROTO ((mp_srcptr s1p, mp_srcptr s2p, mp_size_t size));
+int refmpn_cmp_allowzero __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+int refmpn_cmp_twosizes __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize,
+                                 mp_srcptr yp, mp_size_t ysize));
+
+void refmpn_com __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
+void refmpn_copy  __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
+void refmpn_copyi __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
+void refmpn_copyd __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
+void refmpn_copy_extend __GMP_PROTO ((mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize));
+
+unsigned refmpn_count_leading_zeros __GMP_PROTO ((mp_limb_t x));
+unsigned refmpn_count_trailing_zeros __GMP_PROTO ((mp_limb_t x));
+
+mp_limb_t refmpn_divexact_by3 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp,
+                                       mp_size_t size));
+mp_limb_t refmpn_divexact_by3c __GMP_PROTO ((mp_ptr rp, mp_srcptr sp,
+                                       mp_size_t size, mp_limb_t carry));
+
+mp_limb_t refmpn_divmod_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                                   mp_limb_t divisor));
+mp_limb_t refmpn_divmod_1c __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                                    mp_limb_t divisor, mp_limb_t carry));
+mp_limb_t refmpn_divrem_1 __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
+                                   mp_srcptr sp, mp_size_t size,
+                                   mp_limb_t divisor));
+mp_limb_t refmpn_divrem_1c __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
+                                    mp_srcptr sp, mp_size_t size,
+                                    mp_limb_t divisor, mp_limb_t carry));
+mp_limb_t refmpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t,
+                                       mp_srcptr));
+
+int refmpn_equal_anynail __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+
+void refmpn_fill __GMP_PROTO ((mp_ptr p, mp_size_t s, mp_limb_t v));
+
+mp_limb_t refmpn_gcd_1 __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize, mp_limb_t y));
+mp_limb_t refmpn_gcd __GMP_PROTO ((mp_ptr gp, mp_ptr xp, mp_size_t xsize,
+                              mp_ptr yp, mp_size_t ysize));
+
+size_t refmpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+
+unsigned long refmpn_hamdist __GMP_PROTO ((mp_srcptr s1p, mp_srcptr s2p,
+                                      mp_size_t size));
+
+mp_limb_t refmpn_invert_limb __GMP_PROTO ((mp_limb_t d));
+void refmpn_ior_n  __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+void refmpn_iorn_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+
+mp_limb_t refmpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
+mp_limb_t refmpn_lshift_or_copy __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
+mp_limb_t refmpn_lshift_or_copy_any __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
+mp_limb_t refmpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
+void refmpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+
+mp_ptr refmpn_malloc_limbs __GMP_PROTO ((mp_size_t size));
+mp_ptr refmpn_malloc_limbs_aligned __GMP_PROTO ((mp_size_t n, size_t m));
+void refmpn_free_limbs __GMP_PROTO ((mp_ptr p));
+mp_limb_t refmpn_msbone __GMP_PROTO ((mp_limb_t x));
+mp_limb_t refmpn_msbone_mask __GMP_PROTO ((mp_limb_t x));
+mp_ptr refmpn_memdup_limbs __GMP_PROTO ((mp_srcptr ptr, mp_size_t size));
+
+mp_limb_t refmpn_mod_1 __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
+                                mp_limb_t divisor));
+mp_limb_t refmpn_mod_1c __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
+                                 mp_limb_t divisor, mp_limb_t carry));
+mp_limb_t refmpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr p, mp_size_t n));
+
+mp_limb_t refmpn_mul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                mp_limb_t multiplier));
+mp_limb_t refmpn_mul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                 mp_limb_t multiplier, mp_limb_t carry));
+mp_limb_t refmpn_mul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
+                                    mp_srcptr mult));
+mp_limb_t refmpn_mul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
+                                    mp_srcptr mult));
+mp_limb_t refmpn_mul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
+                                    mp_srcptr mult));
+
+void refmpn_mul_basecase __GMP_PROTO ((mp_ptr prodp,
+                                  mp_srcptr up, mp_size_t usize,
+                                  mp_srcptr vp, mp_size_t vsize));
+void refmpn_mullo_n __GMP_PROTO ((mp_ptr prodp,
+                                 mp_srcptr up, mp_srcptr vp, mp_size_t vsize));
+void refmpn_mul_any __GMP_PROTO ((mp_ptr prodp,
+                             mp_srcptr up, mp_size_t usize,
+                             mp_srcptr vp, mp_size_t vsize));
+void refmpn_mul_n __GMP_PROTO ((mp_ptr prodp, mp_srcptr up, mp_srcptr vp,
+                           mp_size_t size));
+void refmpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+void refmpn_nand_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+void refmpn_nior_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+mp_limb_t refmpn_neg __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size));
+mp_size_t refmpn_normalize __GMP_PROTO ((mp_srcptr, mp_size_t));
+
+unsigned long refmpn_popcount __GMP_PROTO ((mp_srcptr sp, mp_size_t size));
+mp_limb_t refmpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
+                                          mp_srcptr sp, mp_size_t size,
+                                          mp_limb_t divisor,
+                                          mp_limb_t inverse, unsigned shift));
+mp_limb_t refmpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
+                                       mp_limb_t divisor,
+                                       mp_limb_t divisor_inverse));
+
+void refmpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
+void refmpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
+mp_limb_t refmpn_random_limb __GMP_PROTO ((void));
+
+mp_limb_t refmpn_rsh1add_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_rsh1sub_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_rshift __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                 unsigned shift));
+mp_limb_t refmpn_rshift_or_copy __GMP_PROTO ((mp_ptr wp,
+                                         mp_srcptr xp, mp_size_t size,
+                                         unsigned shift));
+mp_limb_t refmpn_rshift_or_copy_any __GMP_PROTO ((mp_ptr wp,
+                                                  mp_srcptr xp, mp_size_t size,
+                                                  unsigned shift));
+
+mp_limb_t refmpn_sb_div_qr __GMP_PROTO ((mp_ptr,
+                                        mp_ptr, mp_size_t,
+                                        mp_srcptr, mp_size_t));
+unsigned long refmpn_scan0 __GMP_PROTO ((mp_srcptr, unsigned long));
+unsigned long refmpn_scan1 __GMP_PROTO ((mp_srcptr, unsigned long));
+void refmpn_setbit __GMP_PROTO ((mp_ptr, unsigned long));
+void refmpn_sqr __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size));
+mp_size_t refmpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+
+void refmpn_sub_ddmmss __GMP_PROTO ((mp_limb_t *, mp_limb_t *,
+                                     mp_limb_t, mp_limb_t,
+                                     mp_limb_t, mp_limb_t));
+mp_limb_t refmpn_sub __GMP_PROTO ((mp_ptr rp,
+                              mp_srcptr s1p, mp_size_t s1size,
+                              mp_srcptr s2p, mp_size_t s2size));
+mp_limb_t refmpn_sub_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
+                                mp_limb_t n));
+mp_limb_t refmpn_sub_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_sub_nc __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                 mp_size_t size, mp_limb_t carry));
+mp_limb_t refmpn_sublsh1_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size));
+mp_limb_t refmpn_sublsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                                mp_size_t size, unsigned int));
+mp_limb_t refmpn_submul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                   mp_limb_t multiplier));
+mp_limb_t refmpn_submul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
+                                    mp_limb_t multiplier, mp_limb_t carry));
+
+mp_limb_signed_t refmpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+mp_limb_signed_t refmpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+mp_limb_signed_t refmpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+
+void refmpn_tdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+                             mp_ptr np, mp_size_t nsize,
+                             mp_srcptr dp, mp_size_t dsize));
+int refmpn_tstbit __GMP_PROTO ((mp_srcptr, unsigned long));
+
+mp_limb_t refmpn_udiv_qrnnd __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t));
+mp_limb_t refmpn_udiv_qrnnd_r __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *));
+mp_limb_t refmpn_umul_ppmm __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
+mp_limb_t refmpn_umul_ppmm_r __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t *));
+
+void refmpn_xnor_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+void refmpn_xor_n  __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
+                            mp_size_t size));
+
+void refmpn_zero __GMP_PROTO ((mp_ptr p, mp_size_t s));
+void refmpn_zero_extend __GMP_PROTO ((mp_ptr, mp_size_t, mp_size_t));
+int refmpn_zero_p __GMP_PROTO ((mp_srcptr ptr, mp_size_t size));
+
+void refmpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+void refmpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+
+void refmpq_add __GMP_PROTO ((mpq_ptr w, mpq_srcptr x, mpq_srcptr y));
+void refmpq_sub __GMP_PROTO ((mpq_ptr w, mpq_srcptr x, mpq_srcptr y));
+
+
+void refmpz_combit __GMP_PROTO ((mpz_ptr r, unsigned long bit));
+unsigned long refmpz_hamdist __GMP_PROTO ((mpz_srcptr x, mpz_srcptr y));
+int refmpz_kronecker __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
+int refmpz_jacobi __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
+int refmpz_legendre __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
+int refmpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long));
+int refmpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long));
+int refmpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr));
+int refmpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr));
+
+void refmpz_pow_ui __GMP_PROTO ((mpz_ptr w, mpz_srcptr b, unsigned long e));
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* Establish ostringstream and istringstream.  Do this here so as to hide
+   the conditionals, rather than putting stuff in each test program.
+
+   Oldish versions of g++, like 2.95.2, don't have <sstream>, only
+   <strstream>.  Fake up ostringstream and istringstream classes, but not a
+   full implementation, just enough for our purposes.  */
+
+#ifdef __cplusplus
+#if HAVE_SSTREAM
+#include <sstream>
+#else /* ! HAVE_SSTREAM */
+#include <string>
+#include <strstream>
+class
+ostringstream : public std::ostrstream {
+ public:
+  string str() {
+    int  pcount = ostrstream::pcount ();
+    char *s = (char *) (*__gmp_allocate_func) (pcount + 1);
+    memcpy (s, ostrstream::str(), pcount);
+    s[pcount] = '\0';
+    string ret = string(s);
+    (*__gmp_free_func) (s, pcount + 1);
+    return ret; }
+};
+class
+istringstream : public std::istrstream {
+ public:
+  istringstream (const char *s) : istrstream (s) { };
+};
+#endif /* ! HAVE_SSTREAM */
+#endif /* __cplusplus */
+
+
+#define TESTS_REPS(count, argv, argc)                                  \
+  do {                                                                 \
+  char *envval, *end;                                                  \
+  long repfactor;                                                      \
+  if (argc > 1)                                                                \
+    {                                                                  \
+      count = strtol (argv[1], &end, 0);                               \
+      if (*end || count <= 0)                                          \
+       {                                                               \
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);       \
+         exit (1);                                                     \
+       }                                                               \
+      argv++;                                                          \
+      argc--;                                                          \
+    }                                                                  \
+  envval = getenv ("GMP_CHECK_REPFACTOR");                             \
+  if (envval != NULL)                                                  \
+    {                                                                  \
+      repfactor = strtol (envval, &end, 0);                            \
+      if (*end || repfactor <= 0)                                      \
+       {                                                               \
+         fprintf (stderr, "Invalid repfactor: %ld.\n", repfactor);     \
+         exit (1);                                                     \
+       }                                                               \
+      count *= repfactor;                                              \
+    }                                                                  \
+  } while (0)
+
+
+#endif /* __TESTS_H__ */
diff --git a/tests/trace.c b/tests/trace.c

new file mode 100644 (file)

index 0000000..93ee735
--- /dev/null
+++ b/tests/trace.c
@@ -0,0 +1,319 @@
+/* Support for diagnostic traces.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Future: Would like commas printed between limbs in hex or binary, but
+   perhaps not always since it might upset cutting and pasting into bc or
+   whatever.  */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for strlen */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "tests.h"
+
+
+/* Number base for the various trace printing routines.
+   Set this in main() or with the debugger.
+   If hexadecimal is going to be fed into GNU bc, remember to use -16
+   because bc requires upper case.  */
+
+int  mp_trace_base = 10;
+
+
+void
+mp_trace_start (const char *name)
+{
+  if (name != NULL && name[0] != '\0')
+    printf ("%s=", name);
+
+  switch (ABS (mp_trace_base)) {
+  case  2: printf ("bin:");                         break;
+  case  8: printf ("oct:");                         break;
+  case 10:                                          break;
+  case 16: printf ("0x");                           break;
+  default: printf ("base%d:", ABS (mp_trace_base)); break;
+  }
+}
+
+/* Print "name=value\n" to stdout for an mpq_t value.  */
+void
+mpq_trace (const char *name, mpq_srcptr q)
+{
+  mp_trace_start (name);
+  if (q == NULL)
+    {
+      printf ("NULL\n");
+      return;
+    }
+
+  mpq_out_str (stdout, mp_trace_base, q);
+  printf ("\n");
+}
+
+
+/* Print "name=value\n" to stdout for an mpz_t value.  */
+void
+mpz_trace (const char *name, mpz_srcptr z)
+{
+  mpq_t      q;
+  mp_limb_t  one;
+
+  if (z == NULL)
+    {
+      mpq_trace (name, NULL);
+      return;
+    }
+
+  q->_mp_num._mp_alloc = ALLOC(z);
+  q->_mp_num._mp_size = SIZ(z);
+  q->_mp_num._mp_d = PTR(z);
+
+  one = 1;
+  q->_mp_den._mp_alloc = 1;
+  q->_mp_den._mp_size = 1;
+  q->_mp_den._mp_d = &one;
+
+  mpq_trace(name, q);
+}
+
+
+/* Print "name=value\n" to stdout for an mpf_t value. */
+void
+mpf_trace (const char *name, mpf_srcptr f)
+{
+  mp_trace_start (name);
+  if (f == NULL)
+    {
+      printf ("NULL\n");
+      return;
+    }
+
+  mpf_out_str (stdout, ABS (mp_trace_base), 0, f);
+  printf ("\n");
+}
+
+
+/* Print "namenum=value\n" to stdout for an mpz_t value.
+   "name" should have a "%d" to get the number. */
+void
+mpz_tracen (const char *name, int num, mpz_srcptr z)
+{
+  if (name != NULL && name[0] != '\0')
+    {
+      printf (name, num);
+      putchar ('=');
+    }
+  mpz_trace (NULL, z);
+}
+
+
+/* Print "name=value\n" to stdout for an mpn style ptr,size. */
+void
+mpn_trace (const char *name, mp_srcptr ptr, mp_size_t size)
+{
+  mpz_t  z;
+  if (ptr == NULL)
+    {
+      mpz_trace (name, NULL);
+      return;
+    }
+  MPN_NORMALIZE (ptr, size);
+  PTR(z) = (mp_ptr) ptr;
+  SIZ(z) = size;
+  ALLOC(z) = size;
+  mpz_trace (name, z);
+}
+
+/* Print "name=value\n" to stdout for a limb, nail doesn't have to be zero. */
+void
+mp_limb_trace (const char *name, mp_limb_t n)
+{
+#if GMP_NAIL_BITS != 0
+  mp_limb_t  a[2];
+  a[0] = n & GMP_NUMB_MASK;
+  a[1] = n >> GMP_NUMB_BITS;
+  mpn_trace (name, a, (mp_size_t) 2);
+#else
+  mpn_trace (name, &n, (mp_size_t) 1);
+#endif
+}
+
+
+/* Print "namenum=value\n" to stdout for an mpn style ptr,size.
+   "name" should have a "%d" to get the number.  */
+void
+mpn_tracen (const char *name, int num, mp_srcptr ptr, mp_size_t size)
+{
+  if (name != NULL && name[0] != '\0')
+    {
+      printf (name, num);
+      putchar ('=');
+    }
+  mpn_trace (NULL, ptr, size);
+}
+
+
+/* Print "namenum=value\n" to stdout for an array of mpn style ptr,size.
+
+   "a" is an array of pointers, each a[i] is a pointer to "size" many limbs.
+   The formal parameter isn't mp_srcptr because that causes compiler
+   warnings, but the values aren't modified.
+
+   "name" should have a printf style "%d" to get the array index.  */
+
+void
+mpn_tracea (const char *name, const mp_ptr *a, int count, mp_size_t size)
+{
+  int i;
+  for (i = 0; i < count; i++)
+    mpn_tracen (name, i, a[i], size);
+}
+
+
+/* Print "value\n" to a file for an mpz_t value.  Any previous contents of
+   the file are overwritten, so you need different file names each time this
+   is called.
+
+   Overwriting the file is a feature, it means you get old data replaced
+   when you run a test program repeatedly.  */
+
+void
+mpn_trace_file (const char *filename, mp_srcptr ptr, mp_size_t size)
+{
+  FILE   *fp;
+  mpz_t  z;
+
+  fp = fopen (filename, "w");
+  if (fp == NULL)
+    {
+      perror ("fopen");
+      abort();
+    }
+
+  MPN_NORMALIZE (ptr, size);
+  PTR(z) = (mp_ptr) ptr;
+  SIZ(z) = (int) size;
+
+  mpz_out_str (fp, mp_trace_base, z);
+  fprintf (fp, "\n");
+
+  if (ferror (fp) || fclose (fp) != 0)
+    {
+      printf ("error writing %s\n", filename);
+      abort();
+    }
+}
+
+
+/* Print "value\n" to a set of files, one file for each element of the given
+   array of mpn style ptr,size.  Any previous contents of the files are
+   overwritten, so you need different file names each time this is called.
+   Each file is "filenameN" where N is 0 to count-1.
+
+   "a" is an array of pointers, each a[i] is a pointer to "size" many limbs.
+   The formal parameter isn't mp_srcptr because that causes compiler
+   warnings, but the values aren't modified.
+
+   Overwriting the files is a feature, it means you get old data replaced
+   when you run a test program repeatedly.  The output style isn't
+   particularly pretty, but at least it gets something out, and you can cat
+   the files into bc, or whatever. */
+
+void
+mpn_tracea_file (const char *filename,
+                 const mp_ptr *a, int count, mp_size_t size)
+{
+  char  *s;
+  int   i;
+  TMP_DECL;
+
+  TMP_MARK;
+  s = (char *) TMP_ALLOC (strlen (filename) + 50);
+
+  for (i = 0; i < count; i++)
+    {
+      sprintf (s, "%s%d", filename, i);
+      mpn_trace_file (s, a[i], size);
+    }
+
+  TMP_FREE;
+}
+
+
+void
+byte_trace (const char *name, const void *ptr, mp_size_t size)
+{
+  char       *fmt;
+  mp_size_t  i;
+
+  mp_trace_start (name);
+
+  switch (mp_trace_base) {
+  case   8: fmt = " %o"; break;
+  case  10: fmt = " %d"; break;
+  case  16: fmt = " %x"; break;
+  case -16: fmt = " %X"; break;
+  default: printf ("Oops, unsupported base in byte_trace\n"); abort (); break;
+  }
+
+  for (i = 0; i < size; i++)
+    printf (fmt, (int) ((unsigned char *) ptr)[i]);
+  printf ("\n");
+}
+
+void
+byte_tracen (const char *name, int num, const void *ptr, mp_size_t size)
+{
+  if (name != NULL && name[0] != '\0')
+    {
+      printf (name, num);
+      putchar ('=');
+    }
+  byte_trace (NULL, ptr, size);
+}
+
+
+void
+d_trace (const char *name, double d)
+{
+  union {
+    double         d;
+    unsigned char  b[sizeof(double)];
+  } u;
+  int  i;
+
+  if (name != NULL && name[0] != '\0')
+    printf ("%s=", name);
+
+  u.d = d;
+  printf ("[");
+  for (i = 0; i < sizeof (u.b); i++)
+    {
+      if (i != 0)
+        printf (" ");
+      printf ("%02X", (int) u.b[i]);
+    }
+  printf ("] %.20g\n", d);
+}
diff --git a/tests/x86call.asm b/tests/x86call.asm

new file mode 100644 (file)

index 0000000..47af42b
--- /dev/null
+++ b/tests/x86call.asm
@@ -0,0 +1,115 @@
+dnl  x86 calling conventions checking.
+
+dnl  Copyright 2000, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C void x86_fldcw (unsigned short cw);
+C
+C Execute an fldcw, setting the x87 control word to cw.
+
+PROLOGUE(x86_fldcw)
+        fldcw   4(%esp)
+        ret
+EPILOGUE()
+
+
+C unsigned short x86_fstcw (void);
+C
+C Execute an fstcw, returning the current x87 control word.
+
+PROLOGUE(x86_fstcw)
+        xorl    %eax, %eax
+        pushl   %eax
+        fstcw   (%esp)
+        popl    %eax
+        ret
+EPILOGUE()
+
+
+dnl  Instrumented profiling doesn't come out quite right below, since we
+dnl  don't do an actual "ret".  There's only a few instructions here, so
+dnl  there's no great need to get them separately accounted, just let them
+dnl  get attributed to the caller.
+
+ifelse(WANT_PROFILING,instrument,
+`define(`WANT_PROFILING',no)')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+C
+C Perhaps the finit should be done only if the tags word isn't clear, but
+C nothing uses the rounding mode or anything at the moment.
+
+define(G,
+m4_assert_numargs(1)
+`GSYM_PREFIX`'$1')
+
+       .text
+       ALIGN(8)
+PROLOGUE(calling_conventions)
+       movl    (%esp), %eax
+       movl    %eax, G(calling_conventions_retaddr)
+
+       movl    $L(return), (%esp)
+
+       movl    %ebx, G(calling_conventions_save_ebx)
+       movl    %esi, G(calling_conventions_save_esi)
+       movl    %edi, G(calling_conventions_save_edi)
+       movl    %ebp, G(calling_conventions_save_ebp)
+
+       movl    $0x01234567, %ebx
+       movl    $0x89ABCDEF, %esi
+       movl    $0xFEDCBA98, %edi
+       movl    $0x76543210, %ebp
+
+       C try to provoke a problem by starting with junk in the registers,
+       C especially in %eax and %edx which will be return values
+       movl    $0x70246135, %eax
+       movl    $0x8ACE9BDF, %ecx
+       movl    $0xFDB97531, %edx
+
+       jmp     *G(calling_conventions_function)
+
+L(return):
+       movl    %ebx, G(calling_conventions_ebx)
+       movl    %esi, G(calling_conventions_esi)
+       movl    %edi, G(calling_conventions_edi)
+       movl    %ebp, G(calling_conventions_ebp)
+
+       pushf
+       popl    %ebx
+       movl    %ebx, G(calling_conventions_eflags)
+
+       fstenv  G(calling_conventions_fenv)
+       finit
+
+       movl    G(calling_conventions_save_ebx), %ebx
+       movl    G(calling_conventions_save_esi), %esi
+       movl    G(calling_conventions_save_edi), %edi
+       movl    G(calling_conventions_save_ebp), %ebp
+
+       jmp     *G(calling_conventions_retaddr)
+
+EPILOGUE()
+
diff --git a/tests/x86check.c b/tests/x86check.c

new file mode 100644 (file)

index 0000000..dfebd7a
--- /dev/null
+++ b/tests/x86check.c
@@ -0,0 +1,96 @@
+/* x86 calling conventions checking. */
+
+/*
+Copyright 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* temporaries */
+int  calling_conventions_save_ebx;
+int  calling_conventions_save_esi;
+int  calling_conventions_save_edi;
+int  calling_conventions_save_ebp;
+int  calling_conventions_retaddr;
+int  calling_conventions_retval;
+
+/* values to check */
+struct {
+  unsigned  control;
+  unsigned  status;
+  unsigned  tag;
+  unsigned  other[4];
+} calling_conventions_fenv;
+int  calling_conventions_ebx;
+int  calling_conventions_esi;
+int  calling_conventions_edi;
+int  calling_conventions_ebp;
+int  calling_conventions_eflags;
+
+/* expected values, as per x86call.asm */
+#define VALUE_EBX   0x01234567
+#define VALUE_ESI   0x89ABCDEF
+#define VALUE_EDI   0xFEDCBA98
+#define VALUE_EBP   0x76543210
+
+#define DIR_BIT(eflags)   (((eflags) & (1<<10)) != 0)
+
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+  const char  *header = "Violated calling conventions:\n";
+  int  ret = 1;
+
+#define CHECK(callreg, regstr, value)                   \
+  if (callreg != value)                                 \
+    {                                                   \
+      printf ("%s   %s  got 0x%08X want 0x%08X\n",      \
+              header, regstr, callreg, value);          \
+      header = "";                                      \
+      ret = 0;                                          \
+    }
+
+  CHECK (calling_conventions_ebx, "ebx", VALUE_EBX);
+  CHECK (calling_conventions_esi, "esi", VALUE_ESI);
+  CHECK (calling_conventions_edi, "edi", VALUE_EDI);
+  CHECK (calling_conventions_ebp, "ebp", VALUE_EBP);
+
+  if (DIR_BIT (calling_conventions_eflags) != 0)
+    {
+      printf ("%s   eflags dir bit  got %d want 0\n",
+              header, DIR_BIT (calling_conventions_eflags));
+      header = "";
+      ret = 0;
+    }
+
+  if ((calling_conventions_fenv.tag & 0xFFFF) != 0xFFFF)
+    {
+      printf ("%s   fpu tags  got 0x%X want 0xFFFF\n",
+              header, calling_conventions_fenv.tag & 0xFFFF);
+      header = "";
+      ret = 0;
+    }
+
+  return ret;
+}
diff --git a/tune/Makefile.am b/tune/Makefile.am

new file mode 100644 (file)

index 0000000..022aa7c
--- /dev/null
+++ b/tune/Makefile.am
@@ -0,0 +1,154 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+
+EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
+  ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
+noinst_HEADERS = speed.h
+
+# Prefer -static on the speed and tune programs, since that can avoid
+# overheads of shared library linkages on some systems.  Libtool tends to
+# botch -static if configured with --disable-static, perhaps reasonably
+# enough.  In any event under --disable-static the only choice is a dynamic
+# link so there's no point in -static.
+#
+if ENABLE_STATIC
+STATIC = -static
+else
+STATIC =
+endif
+
+
+EXTRA_LTLIBRARIES = libspeed.la
+
+libspeed_la_SOURCES =                                                  \
+  common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c         \
+  freq.c                                                               \
+  gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c                        \
+  jacbase1.c jacbase2.c jacbase3.c                                     \
+  mod_1_div.c mod_1_inv.c modlinv.c                                    \
+  noop.c powm_mod.c powm_redc.c pre_divrem_1.c                         \
+  set_strb.c set_strs.c set_strp.c time.c
+
+libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
+  $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
+libspeed_la_LDFLAGS = $(STATIC)
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+
+# The library code is faster static than shared on some systems, so do
+# tuning and measuring with static, since users who care about maximizing
+# speed will be using that.  speed-dynamic exists to show the difference.
+#
+# On Solaris 8, gcc 2.95.2 -static is somehow broken (it creates executables
+# that immediately seg fault), so -all-static is not used.  The only thing
+# -all-static does is make libc static linked as well as libgmp, and that
+# makes a difference only when measuring malloc and friends in the speed
+# program.  This can always be forced with "make speed_LDFLAGS=-all-static
+# ..." if desired, see tune/README.
+
+EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup
+
+DEPENDENCIES = libspeed.la
+LDADD = $(DEPENDENCIES)
+
+speed_SOURCES = speed.c
+speed_LDFLAGS = $(STATIC)
+
+speed_dynamic_SOURCES = speed.c
+
+speed_ext_SOURCES = speed-ext.c
+speed_ext_LDFLAGS = $(STATIC)
+
+tuneup_SOURCES = tuneup.c
+nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
+tuneup_LDADD = $(tuneup_DEPENDENCIES)
+tuneup_LDFLAGS = $(STATIC)
+
+
+tune:
+       $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
+       ./tuneup
+
+allprogs: $(EXTRA_PROGRAMS)
+
+# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
+       $(TUNE_MPN_SRCS) sqr_asm.asm \
+       stg.gnuplot stg.data \
+       mtg.gnuplot mtg.data \
+       fibg.gnuplot fibg.data \
+       graph.gnuplot graph.data \
+       $(MANY_CLEAN)
+DISTCLEANFILES = sqr_basecase.c  $(MANY_DISTCLEAN)
+
+
+# Generating these little files at build time seems better than including
+# them in the distribution, since the list can be changed more easily.
+#
+# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1
+# and 2 limb divisors, which are never used during tuning, so it doesn't
+# matter whether it picks up a tuned or untuned version of those.
+#
+# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking
+# problem.  If a native divrem_1 provides an mpn_divrem_1c entrypoint then
+# common.c will want that, but the generic divrem_1 doesn't provide it,
+# likewise for mod_1.  The simplest way around this is to have the tune
+# build versions renamed suitably.
+#
+# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
+# recompiled object will be rebuilt if that file changes.
+
+TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
+TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c                               \
+  dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c      \
+  invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c            \
+  get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c              \
+  mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c      \
+  nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c   \
+  toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+
+$(TUNE_MPN_SRCS_BASIC):
+       for i in $(TUNE_MPN_SRCS_BASIC); do \
+         echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
+         echo "#include \"mpn/generic/$$i\"" >>$$i; \
+       done
+
+divrem_1.c:
+       echo "#define TUNE_PROGRAM_BUILD 1"                >divrem_1.c
+       echo "#define __gmpn_divrem_1  mpn_divrem_1_tune" >>divrem_1.c
+       echo "#include \"mpn/generic/divrem_1.c\""        >>divrem_1.c
+
+mod_1.c:
+       echo "#define TUNE_PROGRAM_BUILD 1"          >mod_1.c
+       echo "#define __gmpn_mod_1  mpn_mod_1_tune" >>mod_1.c
+       echo "#include \"mpn/generic/mod_1.c\""     >>mod_1.c
+
+sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
+       echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+       echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+
+
+include ../mpn/Makeasm.am
diff --git a/tune/Makefile.in b/tune/Makefile.in

new file mode 100644 (file)

index 0000000..64f177e
--- /dev/null
+++ b/tune/Makefile.in
@@ -0,0 +1,956 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
+EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
+       speed-ext$(EXEEXT) tuneup$(EXEEXT)
+DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/../mpn/Makeasm.am \
+       $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+subdir = tune
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
+       $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+am_libspeed_la_OBJECTS = common$U.lo divrem1div$U.lo divrem1inv$U.lo \
+       divrem2div$U.lo divrem2inv$U.lo freq$U.lo gcdext_single$U.lo \
+       gcdext_double$U.lo gcdextod$U.lo gcdextos$U.lo jacbase1$U.lo \
+       jacbase2$U.lo jacbase3$U.lo mod_1_div$U.lo mod_1_inv$U.lo \
+       modlinv$U.lo noop$U.lo powm_mod$U.lo powm_redc$U.lo \
+       pre_divrem_1$U.lo set_strb$U.lo set_strs$U.lo set_strp$U.lo \
+       time$U.lo
+libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
+libspeed_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+       $(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
+am_speed_OBJECTS = speed$U.$(OBJEXT)
+speed_OBJECTS = $(am_speed_OBJECTS)
+speed_LDADD = $(LDADD)
+speed_DEPENDENCIES = $(DEPENDENCIES)
+speed_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(speed_LDFLAGS) \
+       $(LDFLAGS) -o $@
+am_speed_dynamic_OBJECTS = speed$U.$(OBJEXT)
+speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
+speed_dynamic_LDADD = $(LDADD)
+speed_dynamic_DEPENDENCIES = $(DEPENDENCIES)
+am_speed_ext_OBJECTS = speed-ext$U.$(OBJEXT)
+speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
+speed_ext_LDADD = $(LDADD)
+speed_ext_DEPENDENCIES = $(DEPENDENCIES)
+speed_ext_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+       $(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
+am_tuneup_OBJECTS = tuneup$U.$(OBJEXT)
+am__objects_1 = bdiv_q$U.$(OBJEXT) bdiv_qr$U.$(OBJEXT) \
+       dcpi1_div_qr$U.$(OBJEXT) dcpi1_divappr_q$U.$(OBJEXT) \
+       dcpi1_bdiv_qr$U.$(OBJEXT) dcpi1_bdiv_q$U.$(OBJEXT) \
+       invertappr$U.$(OBJEXT) invert$U.$(OBJEXT) binvert$U.$(OBJEXT) \
+       divrem_2$U.$(OBJEXT) gcd$U.$(OBJEXT) gcdext$U.$(OBJEXT) \
+       get_str$U.$(OBJEXT) set_str$U.$(OBJEXT) \
+       matrix22_mul$U.$(OBJEXT) hgcd$U.$(OBJEXT) mul_n$U.$(OBJEXT) \
+       sqr$U.$(OBJEXT) mullo_n$U.$(OBJEXT) mul_fft$U.$(OBJEXT) \
+       mul$U.$(OBJEXT) tdiv_qr$U.$(OBJEXT) mulmod_bnm1$U.$(OBJEXT) \
+       sqrmod_bnm1$U.$(OBJEXT) nussbaumer_mul$U.$(OBJEXT) \
+       toom6h_mul$U.$(OBJEXT) toom8h_mul$U.$(OBJEXT) \
+       toom6_sqr$U.$(OBJEXT) toom8_sqr$U.$(OBJEXT) \
+       toom22_mul$U.$(OBJEXT) toom2_sqr$U.$(OBJEXT) \
+       toom33_mul$U.$(OBJEXT) toom3_sqr$U.$(OBJEXT) \
+       toom44_mul$U.$(OBJEXT) toom4_sqr$U.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1$U.$(OBJEXT) \
+       mod_1$U.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase$U.$(OBJEXT) $(am__objects_2)
+tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
+am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
+tuneup_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(tuneup_LDFLAGS) \
+       $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
+       $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
+       $(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
+DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
+       $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
+       $(tuneup_SOURCES)
+HEADERS = $(noinst_HEADERS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
+EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
+  ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
+
+noinst_HEADERS = speed.h
+@ENABLE_STATIC_FALSE@STATIC = 
+
+# Prefer -static on the speed and tune programs, since that can avoid
+# overheads of shared library linkages on some systems.  Libtool tends to
+# botch -static if configured with --disable-static, perhaps reasonably
+# enough.  In any event under --disable-static the only choice is a dynamic
+# link so there's no point in -static.
+#
+@ENABLE_STATIC_TRUE@STATIC = -static
+EXTRA_LTLIBRARIES = libspeed.la
+libspeed_la_SOURCES = \
+  common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c         \
+  freq.c                                                               \
+  gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c                        \
+  jacbase1.c jacbase2.c jacbase3.c                                     \
+  mod_1_div.c mod_1_inv.c modlinv.c                                    \
+  noop.c powm_mod.c powm_redc.c pre_divrem_1.c                         \
+  set_strb.c set_strs.c set_strp.c time.c
+
+libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
+  $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
+libspeed_la_LDFLAGS = $(STATIC)
+DEPENDENCIES = libspeed.la
+LDADD = $(DEPENDENCIES)
+speed_SOURCES = speed.c
+speed_LDFLAGS = $(STATIC)
+speed_dynamic_SOURCES = speed.c
+speed_ext_SOURCES = speed-ext.c
+speed_ext_LDFLAGS = $(STATIC)
+tuneup_SOURCES = tuneup.c
+nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
+tuneup_LDADD = $(tuneup_DEPENDENCIES)
+tuneup_LDFLAGS = $(STATIC)
+
+# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
+       $(TUNE_MPN_SRCS) sqr_asm.asm \
+       stg.gnuplot stg.data \
+       mtg.gnuplot mtg.data \
+       fibg.gnuplot fibg.data \
+       graph.gnuplot graph.data \
+       $(MANY_CLEAN)
+
+DISTCLEANFILES = sqr_basecase.c  $(MANY_DISTCLEAN)
+
+# Generating these little files at build time seems better than including
+# them in the distribution, since the list can be changed more easily.
+#
+# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1
+# and 2 limb divisors, which are never used during tuning, so it doesn't
+# matter whether it picks up a tuned or untuned version of those.
+#
+# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking
+# problem.  If a native divrem_1 provides an mpn_divrem_1c entrypoint then
+# common.c will want that, but the generic divrem_1 doesn't provide it,
+# likewise for mod_1.  The simplest way around this is to have the tune
+# build versions renamed suitably.
+#
+# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
+# recompiled object will be rebuilt if that file changes.
+TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
+TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c                               \
+  dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c      \
+  invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c            \
+  get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c              \
+  mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c      \
+  nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c   \
+  toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+
+
+# COMPILE minus CC.
+#
+COMPILE_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(ASMFLAGS)
+
+
+# Flags used for preprocessing (in ansi2knr rules).
+#
+PREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS)
+
+
+# Recent versions of automake (1.5 and up for instance) append automake
+# generated suffixes to this $(SUFFIXES) list.  This is essential for us,
+# since .c must come after .s, .S and .asm.  If .c is before .s, for
+# instance, then in the mpn directory "make" will see add_n.c mentioned in
+# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,
+# even if add_n.c doesn't exist but add_n.s does.  See GNU make
+# documentation "(make)Implicit Rule Search", part 5c.
+#
+# On IRIX 6 native make this doesn't work properly though.  Somehow .c
+# remains ahead of .s, perhaps because .c.s is a builtin rule.  .asm works
+# fine though, and mpn/mips3 uses this.
+#
+SUFFIXES = .s .S .asm
+
+# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+RM_TMP = rm -f
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .s .S .asm .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/../mpn/Makeasm.am $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps tune/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) 
+       $(libspeed_la_LINK)  $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) 
+       @rm -f speed$(EXEEXT)
+       $(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) 
+       @rm -f speed-dynamic$(EXEEXT)
+       $(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) 
+       @rm -f speed-ext$(EXEEXT)
+       $(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) 
+       @rm -f tuneup$(EXEEXT)
+       $(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
+
+mostlyclean-kr:
+       -test "$U" = "" || rm -f *_.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+bdiv_q_.c: bdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdiv_q.c; then echo $(srcdir)/bdiv_q.c; else echo bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+bdiv_qr_.c: bdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdiv_qr.c; then echo $(srcdir)/bdiv_qr.c; else echo bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+binvert_.c: binvert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/binvert.c; then echo $(srcdir)/binvert.c; else echo binvert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+common_.c: common.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/common.c; then echo $(srcdir)/common.c; else echo common.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_bdiv_q_.c: dcpi1_bdiv_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_q.c; then echo $(srcdir)/dcpi1_bdiv_q.c; else echo dcpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_bdiv_qr_.c: dcpi1_bdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_qr.c; then echo $(srcdir)/dcpi1_bdiv_qr.c; else echo dcpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_div_qr_.c: dcpi1_div_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_qr.c; then echo $(srcdir)/dcpi1_div_qr.c; else echo dcpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dcpi1_divappr_q_.c: dcpi1_divappr_q.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_divappr_q.c; then echo $(srcdir)/dcpi1_divappr_q.c; else echo dcpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem1div_.c: divrem1div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1div.c; then echo $(srcdir)/divrem1div.c; else echo divrem1div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem1inv_.c: divrem1inv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1inv.c; then echo $(srcdir)/divrem1inv.c; else echo divrem1inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem2div_.c: divrem2div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2div.c; then echo $(srcdir)/divrem2div.c; else echo divrem2div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem2inv_.c: divrem2inv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2inv.c; then echo $(srcdir)/divrem2inv.c; else echo divrem2inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_1_.c: divrem_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_2_.c: divrem_2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+freq_.c: freq.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/freq.c; then echo $(srcdir)/freq.c; else echo freq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: gcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_.c: gcdext.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_double_.c: gcdext_double.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_double.c; then echo $(srcdir)/gcdext_double.c; else echo gcdext_double.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_single_.c: gcdext_single.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_single.c; then echo $(srcdir)/gcdext_single.c; else echo gcdext_single.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdextod_.c: gcdextod.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextod.c; then echo $(srcdir)/gcdextod.c; else echo gcdextod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdextos_.c: gcdextos.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextos.c; then echo $(srcdir)/gcdextos.c; else echo gcdextos.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd_.c: hgcd.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invert_.c: invert.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invertappr_.c: invertappr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invertappr.c; then echo $(srcdir)/invertappr.c; else echo invertappr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase1_.c: jacbase1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase1.c; then echo $(srcdir)/jacbase1.c; else echo jacbase1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase2_.c: jacbase2.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase2.c; then echo $(srcdir)/jacbase2.c; else echo jacbase2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase3_.c: jacbase3.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase3.c; then echo $(srcdir)/jacbase3.c; else echo jacbase3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_.c: mod_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_div_.c: mod_1_div.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_div.c; then echo $(srcdir)/mod_1_div.c; else echo mod_1_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_inv_.c: mod_1_inv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_inv.c; then echo $(srcdir)/mod_1_inv.c; else echo mod_1_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+modlinv_.c: modlinv.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/modlinv.c; then echo $(srcdir)/modlinv.c; else echo modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_fft_.c: mul_fft.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_n_.c: mul_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullo_n_.c: mullo_n.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_n.c; then echo $(srcdir)/mullo_n.c; else echo mullo_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mulmod_bnm1_.c: mulmod_bnm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mulmod_bnm1.c; then echo $(srcdir)/mulmod_bnm1.c; else echo mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+noop_.c: noop.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/noop.c; then echo $(srcdir)/noop.c; else echo noop.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nussbaumer_mul_.c: nussbaumer_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nussbaumer_mul.c; then echo $(srcdir)/nussbaumer_mul.c; else echo nussbaumer_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_mod_.c: powm_mod.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_mod.c; then echo $(srcdir)/powm_mod.c; else echo powm_mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_redc_.c: powm_redc.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_redc.c; then echo $(srcdir)/powm_redc.c; else echo powm_redc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strb_.c: set_strb.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strb.c; then echo $(srcdir)/set_strb.c; else echo set_strb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strp_.c: set_strp.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strp.c; then echo $(srcdir)/set_strp.c; else echo set_strp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strs_.c: set_strs.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strs.c; then echo $(srcdir)/set_strs.c; else echo set_strs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+speed_.c: speed.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed.c; then echo $(srcdir)/speed.c; else echo speed.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+speed-ext_.c: speed-ext.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed-ext.c; then echo $(srcdir)/speed-ext.c; else echo speed-ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_.c: sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr.c; then echo $(srcdir)/sqr.c; else echo sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrmod_bnm1_.c: sqrmod_bnm1.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrmod_bnm1.c; then echo $(srcdir)/sqrmod_bnm1.c; else echo sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+time_.c: time.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/time.c; then echo $(srcdir)/time.c; else echo time.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom22_mul_.c: toom22_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom22_mul.c; then echo $(srcdir)/toom22_mul.c; else echo toom22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom2_sqr_.c: toom2_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom2_sqr.c; then echo $(srcdir)/toom2_sqr.c; else echo toom2_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom33_mul_.c: toom33_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom33_mul.c; then echo $(srcdir)/toom33_mul.c; else echo toom33_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom3_sqr_.c: toom3_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom3_sqr.c; then echo $(srcdir)/toom3_sqr.c; else echo toom3_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom6_sqr_.c: toom6_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6_sqr.c; then echo $(srcdir)/toom6_sqr.c; else echo toom6_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom6h_mul_.c: toom6h_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6h_mul.c; then echo $(srcdir)/toom6h_mul.c; else echo toom6h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom8_sqr_.c: toom8_sqr.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8_sqr.c; then echo $(srcdir)/toom8_sqr.c; else echo toom8_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom8h_mul_.c: toom8h_mul.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8h_mul.c; then echo $(srcdir)/toom8h_mul.c; else echo toom8h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tuneup_.c: tuneup.c $(ANSI2KNR)
+       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tuneup.c; then echo $(srcdir)/tuneup.c; else echo tuneup.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+bdiv_q_.$(OBJEXT) bdiv_q_.lo bdiv_qr_.$(OBJEXT) bdiv_qr_.lo \
+binvert_.$(OBJEXT) binvert_.lo common_.$(OBJEXT) common_.lo \
+dcpi1_bdiv_q_.$(OBJEXT) dcpi1_bdiv_q_.lo dcpi1_bdiv_qr_.$(OBJEXT) \
+dcpi1_bdiv_qr_.lo dcpi1_div_qr_.$(OBJEXT) dcpi1_div_qr_.lo \
+dcpi1_divappr_q_.$(OBJEXT) dcpi1_divappr_q_.lo divrem1div_.$(OBJEXT) \
+divrem1div_.lo divrem1inv_.$(OBJEXT) divrem1inv_.lo \
+divrem2div_.$(OBJEXT) divrem2div_.lo divrem2inv_.$(OBJEXT) \
+divrem2inv_.lo divrem_1_.$(OBJEXT) divrem_1_.lo divrem_2_.$(OBJEXT) \
+divrem_2_.lo freq_.$(OBJEXT) freq_.lo gcd_.$(OBJEXT) gcd_.lo \
+gcdext_.$(OBJEXT) gcdext_.lo gcdext_double_.$(OBJEXT) \
+gcdext_double_.lo gcdext_single_.$(OBJEXT) gcdext_single_.lo \
+gcdextod_.$(OBJEXT) gcdextod_.lo gcdextos_.$(OBJEXT) gcdextos_.lo \
+get_str_.$(OBJEXT) get_str_.lo hgcd_.$(OBJEXT) hgcd_.lo \
+invert_.$(OBJEXT) invert_.lo invertappr_.$(OBJEXT) invertappr_.lo \
+jacbase1_.$(OBJEXT) jacbase1_.lo jacbase2_.$(OBJEXT) jacbase2_.lo \
+jacbase3_.$(OBJEXT) jacbase3_.lo matrix22_mul_.$(OBJEXT) \
+matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo mod_1_div_.$(OBJEXT) \
+mod_1_div_.lo mod_1_inv_.$(OBJEXT) mod_1_inv_.lo modlinv_.$(OBJEXT) \
+modlinv_.lo mul_.$(OBJEXT) mul_.lo mul_fft_.$(OBJEXT) mul_fft_.lo \
+mul_n_.$(OBJEXT) mul_n_.lo mullo_n_.$(OBJEXT) mullo_n_.lo \
+mulmod_bnm1_.$(OBJEXT) mulmod_bnm1_.lo noop_.$(OBJEXT) noop_.lo \
+nussbaumer_mul_.$(OBJEXT) nussbaumer_mul_.lo powm_mod_.$(OBJEXT) \
+powm_mod_.lo powm_redc_.$(OBJEXT) powm_redc_.lo \
+pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo set_str_.$(OBJEXT) \
+set_str_.lo set_strb_.$(OBJEXT) set_strb_.lo set_strp_.$(OBJEXT) \
+set_strp_.lo set_strs_.$(OBJEXT) set_strs_.lo speed_.$(OBJEXT) \
+speed_.lo speed-ext_.$(OBJEXT) speed-ext_.lo sqr_.$(OBJEXT) sqr_.lo \
+sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqrmod_bnm1_.$(OBJEXT) \
+sqrmod_bnm1_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo time_.$(OBJEXT) \
+time_.lo toom22_mul_.$(OBJEXT) toom22_mul_.lo toom2_sqr_.$(OBJEXT) \
+toom2_sqr_.lo toom33_mul_.$(OBJEXT) toom33_mul_.lo \
+toom3_sqr_.$(OBJEXT) toom3_sqr_.lo toom44_mul_.$(OBJEXT) \
+toom44_mul_.lo toom4_sqr_.$(OBJEXT) toom4_sqr_.lo toom6_sqr_.$(OBJEXT) \
+toom6_sqr_.lo toom6h_mul_.$(OBJEXT) toom6h_mul_.lo \
+toom8_sqr_.$(OBJEXT) toom8_sqr_.lo toom8h_mul_.$(OBJEXT) \
+toom8h_mul_.lo tuneup_.$(OBJEXT) tuneup_.lo : $(ANSI2KNR)
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+         `test -z '$(STRIP)' || \
+           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+       -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+       -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool ctags distclean distclean-compile \
+       distclean-generic distclean-libtool distclean-tags distdir dvi \
+       dvi-am html html-am info info-am install install-am \
+       install-data install-data-am install-dvi install-dvi-am \
+       install-exec install-exec-am install-html install-html-am \
+       install-info install-info-am install-man install-pdf \
+       install-pdf-am install-ps install-ps-am install-strip \
+       installcheck installcheck-am installdirs maintainer-clean \
+       maintainer-clean-generic mostlyclean mostlyclean-compile \
+       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+       pdf-am ps ps-am tags uninstall uninstall-am
+
+
+$(top_builddir)/tests/libtests.la:
+       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+tune:
+       $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
+       ./tuneup
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(TUNE_MPN_SRCS_BASIC):
+       for i in $(TUNE_MPN_SRCS_BASIC); do \
+         echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
+         echo "#include \"mpn/generic/$$i\"" >>$$i; \
+       done
+
+divrem_1.c:
+       echo "#define TUNE_PROGRAM_BUILD 1"                >divrem_1.c
+       echo "#define __gmpn_divrem_1  mpn_divrem_1_tune" >>divrem_1.c
+       echo "#include \"mpn/generic/divrem_1.c\""        >>divrem_1.c
+
+mod_1.c:
+       echo "#define TUNE_PROGRAM_BUILD 1"          >mod_1.c
+       echo "#define __gmpn_mod_1  mpn_mod_1_tune" >>mod_1.c
+       echo "#include \"mpn/generic/mod_1.c\""     >>mod_1.c
+
+sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
+       echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+       echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+
+# .s assembler, no preprocessing.
+#
+.s.o:
+       $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+.s.obj:
+       $(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+.s.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .S assembler, preprocessed with cpp.
+#
+# It's necessary to run $(CPP) separately, since it seems not all compilers
+# recognise .S files, in particular "cc" on HP-UX 10 and 11 doesn't (and
+# will silently do nothing if given a .S).
+#
+# For .lo we need a helper script, as described below for .asm.lo.
+#
+.S.o:
+       $(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.obj:
+       $(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.S.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp="$(CPP) $(PREPROCESS_FLAGS)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .asm assembler, preprocessed with m4.
+#
+# .o and .obj are non-PIC and just need m4 followed by a compile.
+#
+# .lo is a bit tricky.  Libtool (as of version 1.5) has foo.lo as a little
+# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,
+# respectively.  It'd be asking for lots of trouble to try to create foo.lo
+# ourselves, so instead arrange to invoke libtool like a --mode=compile, but
+# with a special m4-ccas script which first m4 preprocesses, then compiles.
+# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.
+#
+# Libtool adds -DPIC when building a shared object and the .asm files look
+# for that.  But it should be noted that the other PIC flags are on occasion
+# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before
+# it accepts PIC constructs like @GOT, and gcc adds that flag only under
+# -fPIC.  (Later versions of gas are happy to accept PIC stuff any time.)
+#
+.asm.o:
+       $(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.obj:
+       $(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s
+       $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+       $(RM_TMP) tmp-$*.s
+.asm.lo:
+       $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tune/README b/tune/README

new file mode 100644 (file)

index 0000000..b6e41ed
--- /dev/null
+++ b/tune/README
@@ -0,0 +1,484 @@
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+               GMP SPEED MEASURING AND PARAMETER TUNING
+
+
+The programs in this directory are for knowledgeable users who want to
+measure GMP routines on their machine, and perhaps tweak some settings or
+identify things that can be improved.
+
+The programs here are tools, not ready to run solutions.  Nothing is built
+in a normal "make all", but various Makefile targets described below exist.
+
+Relatively few systems and CPUs have been tested, so be sure to verify that
+results are sensible before relying on them.
+
+
+
+
+MISCELLANEOUS NOTES
+
+--enable-assert
+
+    Don't configure with --enable-assert, since the extra code added by
+    assertion checking may influence measurements.
+
+Direct mapped caches
+
+    Some effort has been made to accommodate CPUs with direct mapped caches,
+    by putting data blocks more or less contiguously on the stack.  But this
+    will depend on TMP_ALLOC using alloca, and even then it may or may not
+    be enough.
+
+FreeBSD 4.2 i486 getrusage
+
+    This getrusage seems to be a bit doubtful, it looks like it's
+    microsecond accurate, but sometimes ru_utime remains unchanged after a
+    time of many microseconds has elapsed.  It'd be good to detect this in
+    the time.c initializations, but for now the suggestion is to pretend it
+    doesn't exist.
+
+        ./configure ac_cv_func_getrusage=no
+
+NetBSD 1.4.1 m68k macintosh time base
+
+    On this system it's been found getrusage often goes backwards, making it
+    unusable (time.c getrusage_backwards_p detects this).  gettimeofday
+    sometimes doesn't update atomically when it crosses a 1 second boundary.
+    Not sure what to do about this.  Expect possible intermittent failures.
+
+SCO OpenUNIX 8 /etc/hw
+
+    /etc/hw takes about a second to return the cpu frequency, which suggests
+    perhaps it's measuring each time it runs.  If this is annoying when
+    running the speed program repeatedly then set a GMP_CPU_FREQUENCY
+    environment variable (see TIME BASE section below).
+
+Timing on GNU/Linux
+
+    On Linux, timing currently uses the cycle counter. This is unreliable,
+    since the counter is not saved and restored at context switches (unlike
+    FreeBSD and Solaris where the cycle counter is "virtualized").
+
+    Using the clock_gettime method with CLOCK_PROCESS_CPUTIME_ID (posix) or
+    CLOCK_VIRTUAL (BSD) should be more reliable. To get clock_gettime
+    with glibc, one has to link with -lrt (which also drags in the pthreads
+    threading library). configure.in must be hacked to detect this and
+    arrange proper linking. Something like
+
+      old_LIBS="$LIBS"
+      AC_SEARCH_LIBS(clock_gettime, rt, [AC_DEFINE(HAVE_CLOCK_GETTIME)])
+      TUNE_LIBS="$LIBS"
+      LIBS="$old_LIBS"
+
+      AC_SUBST(TUNE_LIBS)
+
+    might work.
+
+Low resolution timebase
+
+    Parameter tuning can be very time consuming if the only timebase
+    available is a 10 millisecond clock tick, to the point of being
+    unusable.  This is currently the case on VAX and ARM systems.
+
+
+
+
+PARAMETER TUNING
+
+The "tuneup" program runs some tests designed to find the best settings for
+various thresholds, like MUL_TOOM22_THRESHOLD.  Its output can be put
+into gmp-mparam.h.  The program is built and run with
+
+        make tune
+
+If the thresholds indicated are grossly different from the values in the
+selected gmp-mparam.h then there may be a performance boost in applicable
+size ranges by changing gmp-mparam.h accordingly.
+
+Be sure to do a full reconfigure and rebuild to get any newly set thresholds
+to take effect.  A partial rebuild is enough sometimes, but a fresh
+configure and make is certain to be correct.
+
+If a CPU has specific tuned parameters coming from a gmp-mparam.h in one of
+the mpn subdirectories then the values from "make tune" should be similar.
+But check that the configured CPU is right and there are no machine specific
+effects causing a difference.
+
+It's hoped the compiler and options used won't have too much effect on
+thresholds, since for most CPUs they ultimately come down to comparisons
+between assembler subroutines.  Missing out on the longlong.h macros by not
+using gcc will probably have an effect.
+
+Some thresholds produced by the tune program are merely single values chosen
+from what's a range of sizes where two algorithms are pretty much the same
+speed.  When this happens the program is likely to give somewhat different
+values on successive runs.  This is noticeable on the toom3 thresholds for
+instance.
+
+
+
+
+SPEED PROGRAM
+
+The "speed" program can be used for measuring and comparing various
+routines, and producing tables of data or gnuplot graphs.  Compile it with
+
+       make speed
+
+(Or on DOS systems "make speed.exe".)
+
+Here are some examples of how to use it.  Check the code for all the
+options.
+
+Draw a graph of mpn_mul_n, stepping through sizes by 10 or a factor of 1.05
+(whichever is greater).
+
+        ./speed -s 10-5000 -t 10 -f 1.05 -P foo mpn_mul_n
+       gnuplot foo.gnuplot
+
+Compare mpn_add_n and an mpn_lshift by 1, showing times in cycles and
+showing under mpn_lshift the difference between it and mpn_add_n.
+
+       ./speed -s 1-40 -c -d mpn_add_n mpn_lshift.1
+
+Using option -c for times in cycles is interesting but normally only
+necessary when looking carefully at assembler subroutines.  You might think
+it would always give an integer value, but this doesn't happen in practice,
+probably due to overheads in the time measurements.
+
+In the free-form output the "#" symbol against a measurement means the
+corresponding routine is fastest at that size.  This is a convenient visual
+cue when comparing different routines.  The graph data files <name>.data
+don't get this since it would upset gnuplot or other data viewers.
+
+
+
+
+TIME BASE
+
+The time measuring method is determined in time.c, based on what the
+configured host has available.  A cycle counter is preferred, possibly
+supplemented by another method if the counter has a limited range.  A
+microsecond accurate getrusage() or gettimeofday() will work quite well too.
+
+The cycle counters (except possibly on alpha) and gettimeofday() will depend
+on the machine being otherwise idle, or rather on other jobs not stealing
+CPU time from the measuring program.  Short routines (those that complete
+within a timeslice) should work even on a busy machine.
+
+Some trouble is taken by speed_measure() in common.c to avoid ill effects
+from sporadic interrupts, or other intermittent things (like cron waking up
+every minute).  But generally an idle machine will be necessary to be
+certain of consistent results.
+
+The CPU frequency is needed to convert between cycles and seconds, or for
+when a cycle counter is supplemented by getrusage() etc.  The speed program
+will convert as necessary according to the output format requested.  The
+tune program will work with either cycles or seconds.
+
+freq.c knows how to get the frequency on some systems, or can measure a
+cycle counter against gettimeofday() or getrusage(), but when that fails, or
+needs to be overridden, an environment variable GMP_CPU_FREQUENCY can be
+used (in Hertz).  For example in "bash" on a 650 MHz machine,
+
+       export GMP_CPU_FREQUENCY=650e6
+
+A high precision time base makes it possible to get accurate measurements in
+a shorter time.
+
+
+
+
+EXAMPLE COMPARISONS - VARIOUS
+
+Here are some ideas for things that can be done with the speed program.
+
+There's always going to be a certain amount of overhead in the time
+measurements, due to reading the time base, and in the loop that runs a
+routine enough times to get a reading of the desired precision.  Noop
+functions taking various arguments are available to measure this.  The
+"overhead" printed by the speed program each time in its intro is the "noop"
+routine, but note that this is just for information, it isn't deducted from
+the times printed or anything.
+
+       ./speed -s 1 noop noop_wxs noop_wxys
+
+To see how many cycles per limb a routine is taking, look at the time
+increase when the size increments, using option -D.  This avoids fixed
+overheads in the measuring.  Also, remember many of the assembler routines
+have unrolled loops, so it might be necessary to compare times at, say, 16,
+32, 48, 64 etc to see what the unrolled part is taking, as opposed to any
+finishing off.
+
+        ./speed -s 16-64 -t 16 -C -D mpn_add_n
+
+The -C option on its own gives cycles per limb, but is really only useful at
+big sizes where fixed overheads are small compared to the code doing the
+real work.  Remember of course memory caching and/or page swapping will
+affect results at large sizes.
+
+        ./speed -s 500000 -C mpn_add_n
+
+Once a calculation stops fitting in the CPU data cache, it's going to start
+taking longer.  Exactly where this happens depends on the cache priming in
+the measuring routines, and on what sort of "least recently used" the
+hardware does.  Here's an example for a CPU with a 16kbyte L1 data cache and
+32-bit limb, showing a suddenly steeper curve for mpn_add_n at about 2000
+limbs.
+
+        ./speed -s 1-4000 -t 5 -f 1.02 -P foo mpn_add_n
+       gnuplot foo.gnuplot
+
+When a routine has an unrolled loop for, say, multiples of 8 limbs and then
+an ordinary loop for the remainder, it can happen that it's actually faster
+to do an operation on, say, 8 limbs than it is on 7 limbs.  The following
+draws a graph of mpn_sub_n, to see whether times smoothly increase with
+size.
+
+        ./speed -s 1-100 -c -P foo mpn_sub_n
+       gnuplot foo.gnuplot
+
+If mpn_lshift and mpn_rshift have special case code for shifts by 1, it
+ought to be faster (or at least not slower) than shifting by, say, 2 bits.
+
+        ./speed -s 1-200 -c mpn_rshift.1 mpn_rshift.2
+
+An mpn_lshift by 1 can be done by mpn_add_n adding a number to itself, and
+if the lshift isn't faster there's an obvious improvement that's possible.
+
+        ./speed -s 1-200 -c mpn_lshift.1 mpn_add_n_self
+
+On some CPUs (AMD K6 for example) an "in-place" mpn_add_n where the
+destination is one of the sources is faster than a separate destination.
+Here's an example to see this.  ".1" selects dst==src1 for mpn_add_n (and
+mpn_sub_n), for other values see speed.h SPEED_ROUTINE_MPN_BINARY_N_CALL.
+
+        ./speed -s 1-200 -c mpn_add_n mpn_add_n.1
+
+The gmp manual points out that divisions by powers of two should be done
+using a right shift because it'll be significantly faster than an actual
+division.  The following shows by what factor mpn_rshift is faster than
+mpn_divrem_1, using division by 32 as an example.
+
+        ./speed -s 10-20 -r mpn_rshift.5 mpn_divrem_1.32
+
+
+
+
+EXAMPLE COMPARISONS - MULTIPLICATION
+
+mul_basecase takes a ".<r>" parameter which is the first (larger) size
+parameter.  For example to show speeds for 20x1 up to 20x15 in cycles,
+
+        ./speed -s 1-15 -c mpn_mul_basecase.20
+
+mul_basecase with no parameter does an NxN multiply, so for example to show
+speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,
+
+        ./speed -s 1-20 -c mpn_mul_basecase
+
+sqr_basecase is implemented by a "triangular" method on most CPUs, making it
+up to twice as fast as mul_basecase.  In practice loop overheads and the
+products on the diagonal mean it falls short of this.  Here's an example
+running the two and showing by what factor an NxN mul_basecase is slower
+than an NxN sqr_basecase.  (Some versions of sqr_basecase only allow sizes
+below SQR_TOOM2_THRESHOLD, so if it crashes at that point don't worry.)
+
+        ./speed -s 1-20 -r mpn_sqr_basecase mpn_mul_basecase
+
+The technique described above with -CD for showing the time difference in
+cycles per limb between two size operations can be done on an NxN
+mul_basecase using -E to change the basis for the size increment to N*N.
+For instance a 20x20 operation is taken to be doing 400 limbs, and a 16x16
+doing 256 limbs.  The following therefore shows the per crossproduct speed
+of mul_basecase and sqr_basecase at around 20x20 limbs.
+
+        ./speed -s 16-20 -t 4 -CDE mpn_mul_basecase mpn_sqr_basecase
+
+Of course sqr_basecase isn't really doing NxN crossproducts, but it can be
+interesting to compare it to mul_basecase as if it was.  For sqr_basecase
+the -F option can be used to base the deltas on N*(N+1)/2 operations, which
+is the triangular products sqr_basecase does.  For example,
+
+        ./speed -s 16-20 -t 4 -CDF mpn_sqr_basecase
+
+Both -E and -F are preliminary and might change.  A consistent approach to
+using them when claiming certain per crossproduct or per triangularproduct
+speeds hasn't really been established, but the increment between speeds in
+the range karatsuba will call seems sensible, that being k to k/2.  For
+instance, if the karatsuba threshold was 20 for the multiply and 30 for the
+square,
+
+        ./speed -s 10-20 -t 10 -CDE mpn_mul_basecase
+        ./speed -s 15-30 -t 15 -CDF mpn_sqr_basecase
+
+
+
+EXAMPLE COMPARISONS - MALLOC
+
+The gmp manual recommends application programs avoid excessive initializing
+and clearing of mpz_t variables (and mpq_t and mpf_t too).  Every new
+variable will at a minimum go through an init, a realloc for its first
+store, and finally a clear.  Quite how long that takes depends on the C
+library.  The following compares an mpz_init/realloc/clear to a 10 limb
+mpz_add.  Don't be surprised if the mallocing is quite slow.
+
+        ./speed -s 10 -c mpz_init_realloc_clear mpz_add
+
+On some systems malloc and free are much slower when dynamic linked.  The
+speed-dynamic program can be used to see this.  For example the following
+measures malloc/free, first static then dynamic.
+
+        ./speed -s 10 -c malloc_free
+        ./speed-dynamic -s 10 -c malloc_free
+
+Of course a real world program has big problems if it's doing so many
+mallocs and frees that it gets slowed down by a dynamic linked malloc.
+
+
+
+
+
+EXAMPLE COMPARISONS - STRING CONVERSIONS
+
+mpn_get_str does a binary to string conversion.  The base is specified with
+a ".<r>" parameter, or decimal by default.  Power of 2 bases are much faster
+than general bases.  The following compares decimal and hex for instance.
+
+        ./speed -s 1-20 -c mpn_get_str mpn_get_str.16
+
+Smaller bases need more divisions to split a given size number, and so are
+slower.  The following compares base 3 and base 9.  On small operands 9 will
+be nearly twice as fast, though at bigger sizes this reduces since in the
+current implementation both divide repeatedly by 3^20 (or 3^40 for 64 bit
+limbs) and those divisions come to dominate.
+
+        ./speed -s 1-20 -cr mpn_get_str.3 mpn_get_str.9
+
+mpn_set_str does a string to binary conversion.  The base is specified with
+a ".<r>" parameter, or decimal by default.  Power of 2 bases are faster than
+general bases on large conversions.
+
+       ./speed -s 1-512 -f 2 -c mpn_set_str.8 mpn_set_str.10
+
+mpn_set_str also has some special case code for decimal which is a bit
+faster than the general case, basically by giving the compiler a chance to
+optimize some multiplications by 10.
+
+       ./speed -s 20-40 -c mpn_set_str.9 mpn_set_str.10 mpn_set_str.11
+
+
+
+
+EXAMPLE COMPARISONS - GCDs
+
+mpn_gcd_1 has a threshold for when to reduce using an initial x%y when both
+x and y are single limbs.  This isn't tuned currently, but a value can be
+established by a measurement like
+
+       ./speed -s 10-32 mpn_gcd_1.10
+
+This runs src[0] from 10 to 32 bits, and y fixed at 10 bits.  If the div
+threshold is high, say 31 so it's effectively disabled then a 32x10 bit gcd
+is done by nibbling away at the 32-bit operands bit-by-bit.  When the
+threshold is small, say 1 bit, then an initial x%y is done to reduce it to a
+10x10 bit operation.
+
+The threshold in mpn/generic/gcd_1.c or the various assembler
+implementations can be tweaked up or down until there's no more speedups on
+interesting combinations of sizes.  Note that this affects only a 1x1 limb
+operation and so isn't very important.  (An Nx1 limb operation always does
+an initial modular reduction, using mpn_mod_1 or mpn_modexact_1_odd.)
+
+
+
+
+SPEED PROGRAM EXTENSIONS
+
+Potentially lots of things could be made available in the program, but it's
+been left at only the things that have actually been wanted and are likely
+to be reasonably useful in the future.
+
+Extensions should be fairly easy to make though.  speed-ext.c is an example,
+in a style that should suit one-off tests, or new code fragments under
+development.
+
+many.pl is a script for generating a new speed program supplemented with
+alternate versions of the standard routines.  It can be used for measuring
+experimental code, or for comparing different implementations that exist
+within a CPU family.
+
+
+
+
+THRESHOLD EXAMINING
+
+The speed program can be used to examine the speeds of different algorithms
+to check the tune program has done the right thing.  For example to examine
+the karatsuba multiply threshold,
+
+       ./speed -s 5-40 mpn_mul_basecase mpn_kara_mul_n
+
+When examining the toom3 threshold, remember it depends on the karatsuba
+threshold, so the right karatsuba threshold needs to be compiled into the
+library first.  The tune program uses specially recompiled versions of
+mpn/mul_n.c etc for this reason, but the speed program simply uses the
+normal libgmp.la.
+
+Note further that the various routines may recurse into themselves on sizes
+far enough above applicable thresholds.  For example, mpn_kara_mul_n will
+recurse into itself on sizes greater than twice the compiled-in
+MUL_TOOM22_THRESHOLD.
+
+When doing the above comparison between mul_basecase and kara_mul_n what's
+probably of interest is mul_basecase versus a kara_mul_n that does one level
+of Karatsuba then calls to mul_basecase, but this only happens on sizes less
+than twice the compiled MUL_TOOM22_THRESHOLD.  A larger value for that
+setting can be compiled-in to avoid the problem if necessary.  The same
+applies to toom3 and DC, though in a trickier fashion.
+
+There are some upper limits on some of the thresholds, arising from arrays
+dimensioned according to a threshold (mpn_mul_n), or asm code with certain
+sized displacements (some x86 versions of sqr_basecase).  So putting huge
+values for the thresholds, even just for testing, may fail.
+
+
+
+
+FUTURE
+
+Make a program to check the time base is working properly, for small and
+large measurements.  Make it able to test each available method, including
+perhaps the apparent resolution of each.
+
+Make a general mechanism for specifying operand overlap, and a syntax like
+maybe "mpn_add_n.dst=src2" to select it.  Some measuring routines do this
+sort of thing with the "r" parameter currently.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/tune/alpha.asm b/tune/alpha.asm

new file mode 100644 (file)

index 0000000..b447462
--- /dev/null
+++ b/tune/alpha.asm
@@ -0,0 +1,48 @@
+dnl  Alpha time stamp counter access routine.
+
+dnl  Copyright 2000, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned int p[2]);
+C
+
+C The rpcc instruction returns a 64-bit value split into two 32-bit fields.
+C The lower 32 bits are set by the hardware, and the upper 32 bits are set
+C by the operating system.  The real per-process cycle count is the sum of
+C these halves.
+
+C Unfortunately, some operating systems don't get this right.  NetBSD 1.3 is
+C known to sometimes put garbage in the upper half.  Whether newer NetBSD
+C versions get it right, is unknown to us.
+
+C rpcc measures cycles elapsed in the user program and hence should be very
+C accurate even on a busy system.  Losing cache contents due to task
+C switching may have an effect though.
+
+ASM_START()
+PROLOGUE(speed_cyclecounter)
+       rpcc    r0
+       srl     r0,32,r1
+       addq    r1,r0,r0
+       stl     r0,0(r16)
+       stl     r31,4(r16)              C zero upper return word
+       ret     r31,(r26),1
+EPILOGUE(speed_cyclecounter)
+ASM_END()
diff --git a/tune/common.c b/tune/common.c

new file mode 100644 (file)

index 0000000..4c66291
--- /dev/null
+++ b/tune/common.c
@@ -0,0 +1,2433 @@
+/* Shared speed subroutines.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_NO_ATTRIBUTE_CONST_PURE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h> /* for qsort */
+#include <string.h>
+#include <unistd.h>
+#if 0
+#include <sys/ioctl.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "tests.h"
+#include "speed.h"
+
+
+int   speed_option_addrs = 0;
+int   speed_option_verbose = 0;
+
+
+/* Provide __clz_tab even if it's not required, for the benefit of new code
+   being tested with many.pl. */
+#ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#include "mp_clz_tab.c"
+#undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+
+void
+pentium_wbinvd(void)
+{
+#if 0
+  {
+    static int  fd = -2;
+
+    if (fd == -2)
+      {
+       fd = open ("/dev/wbinvd", O_RDWR);
+       if (fd == -1)
+         perror ("open /dev/wbinvd");
+      }
+
+    if (fd != -1)
+      ioctl (fd, 0, 0);
+  }
+#endif
+
+#if 0
+#define WBINVDSIZE  1024*1024*2
+  {
+    static char  *p = NULL;
+    int   i, sum;
+
+    if (p == NULL)
+      p = malloc (WBINVDSIZE);
+
+#if 0
+    for (i = 0; i < WBINVDSIZE; i++)
+      p[i] = i & 0xFF;
+#endif
+
+    sum = 0;
+    for (i = 0; i < WBINVDSIZE; i++)
+      sum += p[i];
+
+    mpn_cache_fill_dummy (sum);
+  }
+#endif
+}
+
+
+int
+double_cmp_ptr (const double *p, const double *q)
+{
+  if (*p > *q)  return 1;
+  if (*p < *q)  return -1;
+  return 0;
+}
+
+
+/* Measure the speed of a given routine.
+
+   The routine is run with enough repetitions to make it take at least
+   speed_precision * speed_unittime.  This aims to minimize the effects of a
+   limited accuracy time base and the overhead of the measuring itself.
+
+   Measurements are made looking for 4 results within TOLERANCE of each
+   other (or 3 for routines taking longer than 2 seconds).  This aims to get
+   an accurate reading even if some runs are bloated by interrupts or task
+   switches or whatever.
+
+   The given (*fun)() is expected to run its function "s->reps" many times
+   and return the total elapsed time measured using speed_starttime() and
+   speed_endtime().  If the function doesn't support the given s->size or
+   s->r, -1.0 should be returned.  See the various base routines below.  */
+
+double
+speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
+              struct speed_params *s)
+{
+#define TOLERANCE    1.005  /* 0.5% */
+  const int max_zeros = 10;
+
+  struct speed_params  s_dummy;
+  int     i, j, e;
+  double  t[30];
+  double  t_unsorted[30];
+  double  reps_d;
+  int     zeros = 0;
+
+  /* Use dummy parameters if caller doesn't provide any.  Only a few special
+     "fun"s will cope with this, speed_noop() is one.  */
+  if (s == NULL)
+    {
+      memset (&s_dummy, '\0', sizeof (s_dummy));
+      s = &s_dummy;
+    }
+
+  s->reps = 1;
+  s->time_divisor = 1.0;
+  for (i = 0; i < numberof (t); i++)
+    {
+      for (;;)
+       {
+         s->src_num = 0;
+         s->dst_num = 0;
+
+         t[i] = (*fun) (s);
+
+         if (speed_option_verbose >= 3)
+           gmp_printf("size=%ld reps=%u r=%Md attempt=%d  %.9f\n",
+                      (long) s->size, s->reps, s->r, i, t[i]);
+
+         if (t[i] == 0.0)
+           {
+             zeros++;
+             if (zeros > max_zeros)
+               {
+                 fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
+                 abort ();
+               }
+             continue;
+           }
+
+         if (t[i] == -1.0)
+           return -1.0;
+
+         if (t[i] >= speed_unittime * speed_precision)
+           break;
+
+         /* go to a value of reps to make t[i] >= precision */
+         reps_d = ceil (1.1 * s->reps
+                        * speed_unittime * speed_precision
+                        / MAX (t[i], speed_unittime));
+         if (reps_d > 2e9 || reps_d < 1.0)
+           {
+             fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
+             fprintf (stderr, "  (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
+                      s->reps, speed_unittime, speed_precision, t[i]);
+             abort ();
+           }
+         s->reps = (unsigned) reps_d;
+       }
+      t[i] /= s->reps;
+      t_unsorted[i] = t[i];
+
+      if (speed_precision == 0)
+       return t[i];
+
+      /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
+      if (t[0] >= 2.0)
+       e = 3;
+      else
+       e = 4;
+
+      /* Look for e many t[]'s within TOLERANCE of each other to consider a
+        valid measurement.  Return smallest among them.  */
+      if (i >= e)
+       {
+         qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
+         for (j = e-1; j < i; j++)
+           if (t[j] <= t[j-e+1] * TOLERANCE)
+             return t[j-e+1] / s->time_divisor;
+       }
+    }
+
+  fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
+          e, (TOLERANCE-1.0)*100.0);
+  fprintf (stderr, "    unsorted         sorted\n");
+  fprintf (stderr, "  %.12f    %.12f    is about 0.5%%\n",
+          t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
+  for (i = 0; i < numberof (t); i++)
+    fprintf (stderr, "  %.09f       %.09f\n", t_unsorted[i], t[i]);
+
+  return -1.0;
+}
+
+
+/* Read all of ptr,size to get it into the CPU memory cache.
+
+   A call to mpn_cache_fill_dummy() is used to make sure the compiler
+   doesn't optimize away the whole loop.  Using "volatile mp_limb_t sum"
+   would work too, but the function call means we don't rely on every
+   compiler actually implementing volatile properly.
+
+   mpn_cache_fill_dummy() is in a separate source file to stop gcc thinking
+   it can inline it.  */
+
+void
+mpn_cache_fill (mp_srcptr ptr, mp_size_t size)
+{
+  mp_limb_t  sum = 0;
+  mp_size_t  i;
+
+  for (i = 0; i < size; i++)
+    sum += ptr[i];
+
+  mpn_cache_fill_dummy(sum);
+}
+
+
+void
+mpn_cache_fill_write (mp_ptr ptr, mp_size_t size)
+{
+  mpn_cache_fill (ptr, size);
+
+#if 0
+  mpn_random (ptr, size);
+#endif
+
+#if 0
+  mp_size_t  i;
+
+  for (i = 0; i < size; i++)
+    ptr[i] = i;
+#endif
+}
+
+
+void
+speed_operand_src (struct speed_params *s, mp_ptr ptr, mp_size_t size)
+{
+  if (s->src_num >= numberof (s->src))
+    {
+      fprintf (stderr, "speed_operand_src: no room left in s->src[]\n");
+      abort ();
+    }
+  s->src[s->src_num].ptr = ptr;
+  s->src[s->src_num].size = size;
+  s->src_num++;
+}
+
+
+void
+speed_operand_dst (struct speed_params *s, mp_ptr ptr, mp_size_t size)
+{
+  if (s->dst_num >= numberof (s->dst))
+    {
+      fprintf (stderr, "speed_operand_dst: no room left in s->dst[]\n");
+      abort ();
+    }
+  s->dst[s->dst_num].ptr = ptr;
+  s->dst[s->dst_num].size = size;
+  s->dst_num++;
+}
+
+
+void
+speed_cache_fill (struct speed_params *s)
+{
+  static struct speed_params  prev;
+  int  i;
+
+  /* FIXME: need a better way to get the format string for a pointer */
+
+  if (speed_option_addrs)
+    {
+      int  different;
+
+      different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num);
+      for (i = 0; i < s->dst_num; i++)
+       different |= (s->dst[i].ptr != prev.dst[i].ptr);
+      for (i = 0; i < s->src_num; i++)
+       different |= (s->src[i].ptr != prev.src[i].ptr);
+
+      if (different)
+       {
+         if (s->dst_num != 0)
+           {
+             printf ("dst");
+             for (i = 0; i < s->dst_num; i++)
+               printf (" %08lX", (unsigned long) s->dst[i].ptr);
+             printf (" ");
+           }
+
+         if (s->src_num != 0)
+           {
+             printf ("src");
+             for (i = 0; i < s->src_num; i++)
+               printf (" %08lX", (unsigned long) s->src[i].ptr);
+             printf (" ");
+           }
+         printf ("  (cf sp approx %08lX)\n", (unsigned long) &different);
+
+       }
+
+      memcpy (&prev, s, sizeof(prev));
+    }
+
+  switch (s->cache) {
+  case 0:
+    for (i = 0; i < s->dst_num; i++)
+      mpn_cache_fill_write (s->dst[i].ptr, s->dst[i].size);
+    for (i = 0; i < s->src_num; i++)
+      mpn_cache_fill (s->src[i].ptr, s->src[i].size);
+    break;
+  case 1:
+    pentium_wbinvd();
+    break;
+  }
+}
+
+
+/* Miscellanous options accepted by tune and speed programs under -o. */
+
+void
+speed_option_set (const char *s)
+{
+  int  n;
+
+  if (strcmp (s, "addrs") == 0)
+    {
+      speed_option_addrs = 1;
+    }
+  else if (strcmp (s, "verbose") == 0)
+    {
+      speed_option_verbose++;
+    }
+  else if (sscanf (s, "verbose=%d", &n) == 1)
+    {
+      speed_option_verbose = n;
+    }
+  else
+    {
+      printf ("Unrecognised -o option: %s\n", s);
+      exit (1);
+    }
+}
+
+
+/* The following are basic speed running routines for various gmp functions.
+   Many are very similar and use speed.h macros.
+
+   Each routine allocates it's own destination space for the result of the
+   function, because only it can know what the function needs.
+
+   speed_starttime() and speed_endtime() are put tight around the code to be
+   measured.  Any setups are done outside the timed portion.
+
+   Each routine is responsible for its own cache priming.
+   speed_cache_fill() is a good way to do this, see examples in speed.h.
+   One cache priming possibility, for CPUs with write-allocate cache, and
+   functions that don't take too long, is to do one dummy call before timing
+   so as to cache everything that gets used.  But speed_measure() runs a
+   routine at least twice and will take the smaller time, so this might not
+   be necessary.
+
+   Data alignment will be important, for source, destination and temporary
+   workspace.  A routine can align its destination and workspace.  Programs
+   using the routines will ensure s->xp and s->yp are aligned.  Aligning
+   onto a CACHE_LINE_SIZE boundary is suggested.  s->align_wp and
+   s->align_wp2 should be respected where it makes sense to do so.
+   SPEED_TMP_ALLOC_LIMBS is a good way to do this.
+
+   A loop of the following form can be expected to turn into good assembler
+   code on most CPUs, thereby minimizing overhead in the measurement.  It
+   can always be assumed s->reps >= 1.
+
+         i = s->reps
+         do
+           foo();
+         while (--i != 0);
+
+   Additional parameters might be added to "struct speed_params" in the
+   future.  Routines should ignore anything they don't use.
+
+   s->size can be used creatively, and s->xp and s->yp can be ignored.  For
+   example, speed_mpz_fac_ui() uses s->size as n for the factorial.  s->r is
+   just a user-supplied parameter.  speed_mpn_lshift() uses it as a shift,
+   speed_mpn_mul_1() uses it as a multiplier.  */
+
+
+/* MPN_COPY etc can be macros, so the _CALL forms are necessary */
+double
+speed_MPN_COPY (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (MPN_COPY);
+}
+double
+speed_MPN_COPY_INCR (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR);
+}
+double
+speed_MPN_COPY_DECR (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR);
+}
+#if HAVE_NATIVE_mpn_copyi
+double
+speed_mpn_copyi (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_copyi);
+}
+#endif
+#if HAVE_NATIVE_mpn_copyd
+double
+speed_mpn_copyd (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_copyd);
+}
+#endif
+double
+speed_memcpy (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);
+}
+double
+speed_mpn_com (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_com);
+}
+
+
+double
+speed_mpn_addmul_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1 (mpn_addmul_1);
+}
+double
+speed_mpn_submul_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1 (mpn_submul_1);
+}
+
+#if HAVE_NATIVE_mpn_addmul_2
+double
+speed_mpn_addmul_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_2 (mpn_addmul_2);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_3
+double
+speed_mpn_addmul_3 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_3 (mpn_addmul_3);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_4
+double
+speed_mpn_addmul_4 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_4 (mpn_addmul_4);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_5
+double
+speed_mpn_addmul_5 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_5 (mpn_addmul_5);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_6
+double
+speed_mpn_addmul_6 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_6 (mpn_addmul_6);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_7
+double
+speed_mpn_addmul_7 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_7 (mpn_addmul_7);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_8
+double
+speed_mpn_addmul_8 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_8 (mpn_addmul_8);
+}
+#endif
+
+double
+speed_mpn_mul_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1);
+}
+double
+speed_mpn_mul_1_inplace (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1);
+}
+
+#if HAVE_NATIVE_mpn_mul_2
+double
+speed_mpn_mul_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_2 (mpn_mul_2);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_3
+double
+speed_mpn_mul_3 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_3 (mpn_mul_3);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_4
+double
+speed_mpn_mul_4 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
+}
+#endif
+
+
+double
+speed_mpn_lshift (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift);
+}
+double
+speed_mpn_lshiftc (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc);
+}
+double
+speed_mpn_rshift (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift);
+}
+
+
+/* The carry-in variants (if available) are good for measuring because they
+   won't skip a division if high<divisor.  Alternately, use -1 as a divisor
+   with the plain _1 forms. */
+double
+speed_mpn_divrem_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1);
+}
+double
+speed_mpn_divrem_1f (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1);
+}
+#if HAVE_NATIVE_mpn_divrem_1c
+double
+speed_mpn_divrem_1c (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1C (mpn_divrem_1c);
+}
+double
+speed_mpn_divrem_1cf (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1CF (mpn_divrem_1c);
+}
+#endif
+
+double
+speed_mpn_divrem_1_div (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div);
+}
+double
+speed_mpn_divrem_1f_div (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div);
+}
+double
+speed_mpn_divrem_1_inv (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv);
+}
+double
+speed_mpn_divrem_1f_inv (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv);
+}
+double
+speed_mpn_mod_1_div (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div);
+}
+double
+speed_mpn_mod_1_inv (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv);
+}
+
+double
+speed_mpn_preinv_divrem_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1);
+}
+double
+speed_mpn_preinv_divrem_1f (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1);
+}
+
+#if GMP_NUMB_BITS % 4 == 0
+double
+speed_mpn_mod_34lsub1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1);
+}
+#endif
+
+double
+speed_mpn_divrem_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2);
+}
+double
+speed_mpn_divrem_2_div (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div);
+}
+double
+speed_mpn_divrem_2_inv (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);
+}
+
+double
+speed_mpn_mod_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1);
+}
+#if HAVE_NATIVE_mpn_mod_1c
+double
+speed_mpn_mod_1c (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c);
+}
+#endif
+double
+speed_mpn_preinv_mod_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);
+}
+double
+speed_mpn_mod_1_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
+}
+double
+speed_mpn_mod_1_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
+}
+double
+speed_mpn_mod_1_3 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3);
+}
+double
+speed_mpn_mod_1_4 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4);
+}
+
+double
+speed_mpn_divexact_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1);
+}
+
+double
+speed_mpn_divexact_by3 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3);
+}
+
+double
+speed_mpn_bdiv_dbm1c (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c);
+}
+
+double
+speed_mpn_bdiv_q_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1);
+}
+
+double
+speed_mpn_pi1_bdiv_q_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1);
+}
+
+#if HAVE_NATIVE_mpn_modexact_1_odd
+double
+speed_mpn_modexact_1_odd (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd);
+}
+#endif
+
+double
+speed_mpn_modexact_1c_odd (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);
+}
+
+double
+speed_mpz_mod (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_MOD (mpz_mod);
+}
+
+double
+speed_mpn_sbpi1_div_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0);
+}
+double
+speed_mpn_dcpi1_div_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3);
+}
+double
+speed_mpn_sbpi1_divappr_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0);
+}
+double
+speed_mpn_dcpi1_divappr_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3);
+}
+double
+speed_mpn_mu_div_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch);
+}
+double
+speed_mpn_mu_divappr_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch);
+}
+double
+speed_mpn_mu_div_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch);
+}
+double
+speed_mpn_mupi_div_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch);
+}
+
+double
+speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
+}
+double
+speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
+}
+double
+speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
+}
+double
+speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
+}
+double
+speed_mpn_mu_bdiv_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch);
+}
+double
+speed_mpn_mu_bdiv_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
+}
+
+double
+speed_mpn_binvert (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
+}
+
+double
+speed_mpn_invert (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch);
+}
+
+double
+speed_mpn_invertappr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch);
+}
+
+double
+speed_mpn_ni_invertappr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch);
+}
+
+double
+speed_mpn_redc_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_REDC_1 (mpn_redc_1);
+}
+double
+speed_mpn_redc_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_REDC_2 (mpn_redc_2);
+}
+double
+speed_mpn_redc_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_REDC_N (mpn_redc_n);
+}
+
+
+double
+speed_mpn_popcount (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount);
+}
+double
+speed_mpn_hamdist (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HAMDIST (mpn_hamdist);
+}
+
+
+double
+speed_mpn_add_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_add_n);
+}
+double
+speed_mpn_sub_n (struct speed_params *s)
+{
+SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
+}
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+double
+speed_mpn_add_n_sub_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size));
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+double
+speed_mpn_addlsh1_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n
+double
+speed_mpn_sublsh1_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_n
+double
+speed_mpn_rsblsh1_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n
+double
+speed_mpn_addlsh2_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n
+double
+speed_mpn_sublsh2_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_n
+double
+speed_mpn_rsblsh2_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsh1add_n
+double
+speed_mpn_rsh1add_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1add_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsh1sub_n
+double
+speed_mpn_rsh1sub_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1sub_n);
+}
+#endif
+
+/* mpn_and_n etc can be macros and so have to be handled with
+   SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
+double
+speed_mpn_and_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_andn_n (struct speed_params *s)
+{
+SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_nand_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_ior_n (struct speed_params *s)
+{
+SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_iorn_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_nior_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_xor_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size));
+}
+double
+speed_mpn_xnor_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size));
+}
+
+
+double
+speed_mpn_mul_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n);
+}
+double
+speed_mpn_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR (mpn_sqr);
+}
+double
+speed_mpn_mul_n_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR_CALL (mpn_mul_n (wp, s->xp, s->xp, s->size));
+}
+
+double
+speed_mpn_mul_basecase (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL(mpn_mul_basecase);
+}
+double
+speed_mpn_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL(mpn_mul);
+}
+double
+speed_mpn_sqr_basecase (struct speed_params *s)
+{
+  /* FIXME: size restrictions on some versions of sqr_basecase */
+  SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase);
+}
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+double
+speed_mpn_sqr_diagonal (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal);
+}
+#endif
+
+double
+speed_mpn_toom2_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr);
+}
+double
+speed_mpn_toom3_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr);
+}
+double
+speed_mpn_toom4_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
+}
+double
+speed_mpn_toom6_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
+}
+double
+speed_mpn_toom8_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr);
+}
+double
+speed_mpn_toom22_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
+}
+double
+speed_mpn_toom33_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
+}
+double
+speed_mpn_toom44_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
+}
+double
+speed_mpn_toom6h_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
+}
+double
+speed_mpn_toom8h_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
+}
+
+double
+speed_mpn_toom32_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom42_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
+}
+double
+speed_mpn_toom43_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom63_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
+}
+double
+speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
+}
+double
+speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
+}
+double
+speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
+}
+
+double
+speed_mpn_nussbaumer_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL_N_CALL
+    (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size));
+}
+double
+speed_mpn_nussbaumer_mul_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR_CALL
+    (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size));
+}
+
+#if WANT_OLD_FFT_FULL
+double
+speed_mpn_mul_fft_full (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL_N_CALL
+    (mpn_mul_fft_full (wp, s->xp, s->size, s->yp, s->size));
+}
+double
+speed_mpn_mul_fft_full_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR_CALL
+    (mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size));
+}
+#endif
+
+/* These are mod 2^N+1 multiplies and squares.  If s->r is supplied it's
+   used as k, otherwise the best k for the size is used.  If s->size isn't a
+   multiple of 2^k it's rounded up to make the effective operation size.  */
+
+#define SPEED_ROUTINE_MPN_MUL_FFT_CALL(call, sqr)       \
+  {                                                     \
+    mp_ptr     wp;                                      \
+    mp_size_t  pl;                                      \
+    int        k;                                       \
+    unsigned   i;                                       \
+    double     t;                                       \
+    TMP_DECL;                                           \
+                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                 \
+                                                       \
+    if (s->r != 0)                                      \
+      k = s->r;                                         \
+    else                                                \
+      k = mpn_fft_best_k (s->size, sqr);                \
+                                                       \
+    TMP_MARK;                                           \
+    pl = mpn_fft_next_size (s->size, k);                \
+    SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp);      \
+                                                       \
+    speed_operand_src (s, s->xp, s->size);              \
+    if (!sqr)                                           \
+      speed_operand_src (s, s->yp, s->size);            \
+    speed_operand_dst (s, wp, pl+1);                    \
+    speed_cache_fill (s);                               \
+                                                       \
+    speed_starttime ();                                 \
+    i = s->reps;                                        \
+    do                                                  \
+      call;                                             \
+    while (--i != 0);                                   \
+    t = speed_endtime ();                               \
+                                                       \
+    TMP_FREE;                                           \
+    return t;                                           \
+  }
+
+double
+speed_mpn_mul_fft (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL_FFT_CALL
+    (mpn_mul_fft (wp, pl, s->xp, s->size, s->yp, s->size, k), 0);
+}
+
+double
+speed_mpn_mul_fft_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL_FFT_CALL
+    (mpn_mul_fft (wp, pl, s->xp, s->size, s->xp, s->size, k), 1);
+}
+
+double
+speed_mpn_fft_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size));
+}
+
+double
+speed_mpn_fft_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size));
+}
+
+double
+speed_mpn_mullo_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n);
+}
+double
+speed_mpn_mullo_basecase (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
+}
+
+double
+speed_mpn_mulmod_bnm1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp));
+}
+
+double
+speed_mpn_bc_mulmod_bnm1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp));
+}
+
+double
+speed_mpn_mulmod_bnm1_rounded (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1);
+}
+
+double
+speed_mpn_sqrmod_bnm1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp));
+}
+
+double
+speed_mpn_matrix22_mul (struct speed_params *s)
+{
+  /* Speed params only includes 2 inputs, so we have to invent the
+     other 6. */
+
+  mp_ptr a;
+  mp_ptr r;
+  mp_ptr b;
+  mp_ptr tp;
+  mp_size_t itch;
+  unsigned i;
+  double t;
+  TMP_DECL;
+
+  TMP_MARK;
+  SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp);
+  SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp);
+
+  MPN_COPY (a, s->xp, s->size);
+  mpn_random (a + s->size, 3 * s->size);
+  MPN_COPY (b, s->yp, s->size);
+  mpn_random (b + s->size, 3 * s->size);
+
+  itch = mpn_matrix22_mul_itch (s->size, s->size);
+  SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
+
+  speed_operand_src (s, a, 4 * s->size);
+  speed_operand_src (s, b, 4 * s->size);
+  speed_operand_dst (s, r, 8 * s->size + 4);
+  speed_operand_dst (s, tp, itch);
+  speed_cache_fill (s);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      mp_size_t sz = s->size;
+      MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz);
+      MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz);
+      MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz);
+      MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz);
+      mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz,
+                       b, b + 1 * sz,     b + 2 * sz,     b + 3 * sz,     sz,
+                       tp);
+    }
+  while (--i != 0);
+  t = speed_endtime();
+  TMP_FREE;
+  return t;
+}
+
+double
+speed_mpn_hgcd (struct speed_params *s)
+{
+  mp_ptr wp;
+  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
+  mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr tmp1;
+
+  struct hgcd_matrix hgcd;
+  int res;
+  unsigned i;
+  double t;
+  TMP_DECL;
+
+  if (s->size < 2)
+    return -1;
+
+  TMP_MARK;
+
+  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
+
+  s->xp[s->size - 1] |= 1;
+  s->yp[s->size - 1] |= 1;
+
+  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
+  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      MPN_COPY (ap, s->xp, s->size);
+      MPN_COPY (bp, s->yp, s->size);
+      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
+      res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+  TMP_FREE;
+  return t;
+}
+
+double
+speed_mpn_hgcd_lehmer (struct speed_params *s)
+{
+  mp_ptr wp;
+  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
+  mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr tmp1;
+
+  struct hgcd_matrix hgcd;
+  int res;
+  unsigned i;
+  double t;
+  TMP_DECL;
+
+  if (s->size < 2)
+    return -1;
+
+  TMP_MARK;
+
+  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
+
+  s->xp[s->size - 1] |= 1;
+  s->yp[s->size - 1] |= 1;
+
+  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
+  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      MPN_COPY (ap, s->xp, s->size);
+      MPN_COPY (bp, s->yp, s->size);
+      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
+      res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+  TMP_FREE;
+  return t;
+}
+
+double
+speed_mpn_gcd (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCD (mpn_gcd);
+}
+
+double
+speed_mpn_gcdext (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);
+}
+#if 0
+double
+speed_mpn_gcdext_lehmer (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCDEXT (__gmpn_gcdext_lehmer);
+}
+#endif
+double
+speed_mpn_gcdext_single (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single);
+}
+double
+speed_mpn_gcdext_double (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double);
+}
+double
+speed_mpn_gcdext_one_single (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single);
+}
+double
+speed_mpn_gcdext_one_double (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double);
+}
+double
+speed_mpn_gcd_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);
+}
+double
+speed_mpn_gcd_1N (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);
+}
+
+
+double
+speed_mpz_jacobi (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi);
+}
+double
+speed_mpn_jacobi_base (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base);
+}
+double
+speed_mpn_jacobi_base_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1);
+}
+double
+speed_mpn_jacobi_base_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2);
+}
+double
+speed_mpn_jacobi_base_3 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
+}
+
+
+double
+speed_mpn_sqrtrem (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQRTREM (mpn_sqrtrem);
+}
+
+double
+speed_mpn_rootrem (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_ROOTREM (mpn_rootrem);
+}
+
+
+double
+speed_mpz_fac_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui);
+}
+
+
+double
+speed_mpn_fib2_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui);
+}
+double
+speed_mpz_fib_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui);
+}
+double
+speed_mpz_fib2_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui);
+}
+double
+speed_mpz_lucnum_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui);
+}
+double
+speed_mpz_lucnum2_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui);
+}
+
+
+double
+speed_mpz_powm (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_POWM (mpz_powm);
+}
+double
+speed_mpz_powm_mod (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod);
+}
+double
+speed_mpz_powm_redc (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
+}
+double
+speed_mpz_powm_ui (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
+}
+
+
+double
+speed_binvert_limb (struct speed_params *s)
+{
+  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb);
+}
+
+
+double
+speed_noop (struct speed_params *s)
+{
+  unsigned  i;
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    noop ();
+  while (--i != 0);
+  return speed_endtime ();
+}
+
+double
+speed_noop_wxs (struct speed_params *s)
+{
+  mp_ptr   wp;
+  unsigned i;
+  double   t;
+  TMP_DECL;
+
+  TMP_MARK;
+  wp = TMP_ALLOC_LIMBS (1);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    noop_wxs (wp, s->xp, s->size);
+  while (--i != 0);
+  t = speed_endtime ();
+
+  TMP_FREE;
+  return t;
+}
+
+double
+speed_noop_wxys (struct speed_params *s)
+{
+  mp_ptr   wp;
+  unsigned i;
+  double   t;
+  TMP_DECL;
+
+  TMP_MARK;
+  wp = TMP_ALLOC_LIMBS (1);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    noop_wxys (wp, s->xp, s->yp, s->size);
+  while (--i != 0);
+  t = speed_endtime ();
+
+  TMP_FREE;
+  return t;
+}
+
+
+#define SPEED_ROUTINE_ALLOC_FREE(variables, calls)      \
+  {                                                     \
+    unsigned  i;                                        \
+    variables;                                          \
+                                                       \
+    speed_starttime ();                                 \
+    i = s->reps;                                        \
+    do                                                  \
+      {                                                 \
+       calls;                                          \
+      }                                                 \
+    while (--i != 0);                                   \
+    return speed_endtime ();                            \
+  }
+
+
+/* Compare these to see how much malloc/free costs and then how much
+   __gmp_default_allocate/free and mpz_init/clear add.  mpz_init/clear or
+   mpq_init/clear will be doing a 1 limb allocate, so use that as the size
+   when including them in comparisons.  */
+
+double
+speed_malloc_free (struct speed_params *s)
+{
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
+  SPEED_ROUTINE_ALLOC_FREE (void *p,
+                           p = malloc (bytes);
+                           free (p));
+}
+
+double
+speed_malloc_realloc_free (struct speed_params *s)
+{
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
+  SPEED_ROUTINE_ALLOC_FREE (void *p,
+                           p = malloc (BYTES_PER_MP_LIMB);
+                           p = realloc (p, bytes);
+                           free (p));
+}
+
+double
+speed_gmp_allocate_free (struct speed_params *s)
+{
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
+  SPEED_ROUTINE_ALLOC_FREE (void *p,
+                           p = (*__gmp_allocate_func) (bytes);
+                           (*__gmp_free_func) (p, bytes));
+}
+
+double
+speed_gmp_allocate_reallocate_free (struct speed_params *s)
+{
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
+  SPEED_ROUTINE_ALLOC_FREE
+    (void *p,
+     p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+     p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB);
+     (*__gmp_free_func) (p, bytes));
+}
+
+double
+speed_mpz_init_clear (struct speed_params *s)
+{
+  SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
+                           mpz_init (z);
+                           mpz_clear (z));
+}
+
+double
+speed_mpz_init_realloc_clear (struct speed_params *s)
+{
+  SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
+                           mpz_init (z);
+                           _mpz_realloc (z, s->size);
+                           mpz_clear (z));
+}
+
+double
+speed_mpq_init_clear (struct speed_params *s)
+{
+  SPEED_ROUTINE_ALLOC_FREE (mpq_t q,
+                           mpq_init (q);
+                           mpq_clear (q));
+}
+
+double
+speed_mpf_init_clear (struct speed_params *s)
+{
+  SPEED_ROUTINE_ALLOC_FREE (mpf_t f,
+                           mpf_init (f);
+                           mpf_clear (f));
+}
+
+
+/* Compare this to mpn_add_n to see how much overhead mpz_add adds.  Note
+   that repeatedly calling mpz_add with the same data gives branch prediction
+   in it an advantage.  */
+
+double
+speed_mpz_add (struct speed_params *s)
+{
+  mpz_t     w, x, y;
+  unsigned  i;
+  double    t;
+
+  mpz_init (w);
+  mpz_init (x);
+  mpz_init (y);
+
+  mpz_set_n (x, s->xp, s->size);
+  mpz_set_n (y, s->yp, s->size);
+  mpz_add (w, x, y);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      mpz_add (w, x, y);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  mpz_clear (w);
+  mpz_clear (x);
+  mpz_clear (y);
+  return t;
+}
+
+
+/* If r==0, calculate (size,size/2),
+   otherwise calculate (size,r). */
+
+double
+speed_mpz_bin_uiui (struct speed_params *s)
+{
+  mpz_t          w;
+  unsigned long  k;
+  unsigned  i;
+  double    t;
+
+  mpz_init (w);
+  if (s->r != 0)
+    k = s->r;
+  else
+    k = s->size/2;
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      mpz_bin_uiui (w, s->size, k);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  mpz_clear (w);
+  return t;
+}
+
+
+/* The multiplies are successively dependent so the latency is measured, not
+   the issue rate.  There's only 10 per loop so the code doesn't get too big
+   since umul_ppmm is several instructions on some cpus.
+
+   Putting the arguments as "h,l,l,h" gets slightly better code from gcc
+   2.95.2 on x86, it puts only one mov between each mul, not two.  That mov
+   though will probably show up as a bogus extra cycle though.
+
+   The measuring function macros are into three parts to avoid overflowing
+   preprocessor expansion space if umul_ppmm is big.
+
+   Limitations:
+
+   Don't blindly use this to set UMUL_TIME in gmp-mparam.h, check the code
+   generated first, especially on CPUs with low latency multipliers.
+
+   The default umul_ppmm doing h*l will be getting increasing numbers of
+   high zero bits in the calculation.  CPUs with data-dependent multipliers
+   will want to use umul_ppmm.1 to get some randomization into the
+   calculation.  The extra xors and fetches will be a slowdown of course.  */
+
+#define SPEED_MACRO_UMUL_PPMM_A \
+  {                             \
+    mp_limb_t  h, l;            \
+    unsigned   i;               \
+    double     t;               \
+                               \
+    s->time_divisor = 10;       \
+                               \
+    h = s->xp[0];               \
+    l = s->yp[0];               \
+                               \
+    if (s->r == 1)              \
+      {                         \
+       speed_starttime ();     \
+       i = s->reps;            \
+       do                      \
+         {
+
+#define SPEED_MACRO_UMUL_PPMM_B \
+         }                     \
+       while (--i != 0);       \
+       t = speed_endtime ();   \
+      }                         \
+    else                        \
+      {                         \
+       speed_starttime ();     \
+       i = s->reps;            \
+       do                      \
+         {
+
+#define SPEED_MACRO_UMUL_PPMM_C                                         \
+         }                                                             \
+       while (--i != 0);                                               \
+       t = speed_endtime ();                                           \
+      }                                                                 \
+                                                                       \
+    /* stop the compiler optimizing away the whole calculation! */      \
+    noop_1 (h);                                                         \
+    noop_1 (l);                                                         \
+                                                                       \
+    return t;                                                           \
+  }
+
+
+double
+speed_umul_ppmm (struct speed_params *s)
+{
+  SPEED_MACRO_UMUL_PPMM_A;
+  {
+    umul_ppmm (h, l, l, h);  h ^= s->xp_block[0]; l ^= s->yp_block[0];
+     umul_ppmm (h, l, l, h); h ^= s->xp_block[1]; l ^= s->yp_block[1];
+     umul_ppmm (h, l, l, h); h ^= s->xp_block[2]; l ^= s->yp_block[2];
+    umul_ppmm (h, l, l, h);  h ^= s->xp_block[3]; l ^= s->yp_block[3];
+     umul_ppmm (h, l, l, h); h ^= s->xp_block[4]; l ^= s->yp_block[4];
+     umul_ppmm (h, l, l, h); h ^= s->xp_block[5]; l ^= s->yp_block[5];
+    umul_ppmm (h, l, l, h);  h ^= s->xp_block[6]; l ^= s->yp_block[6];
+     umul_ppmm (h, l, l, h); h ^= s->xp_block[7]; l ^= s->yp_block[7];
+     umul_ppmm (h, l, l, h); h ^= s->xp_block[8]; l ^= s->yp_block[8];
+    umul_ppmm (h, l, l, h);  h ^= s->xp_block[9]; l ^= s->yp_block[9];
+  }
+  SPEED_MACRO_UMUL_PPMM_B;
+  {
+    umul_ppmm (h, l, l, h);
+     umul_ppmm (h, l, l, h);
+     umul_ppmm (h, l, l, h);
+    umul_ppmm (h, l, l, h);
+     umul_ppmm (h, l, l, h);
+     umul_ppmm (h, l, l, h);
+    umul_ppmm (h, l, l, h);
+     umul_ppmm (h, l, l, h);
+     umul_ppmm (h, l, l, h);
+    umul_ppmm (h, l, l, h);
+  }
+  SPEED_MACRO_UMUL_PPMM_C;
+}
+
+
+#if HAVE_NATIVE_mpn_umul_ppmm
+double
+speed_mpn_umul_ppmm (struct speed_params *s)
+{
+  SPEED_MACRO_UMUL_PPMM_A;
+  {
+    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[0]; l ^= s->yp_block[0];
+     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[1]; l ^= s->yp_block[1];
+     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[2]; l ^= s->yp_block[2];
+    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[3]; l ^= s->yp_block[3];
+     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[4]; l ^= s->yp_block[4];
+     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[5]; l ^= s->yp_block[5];
+    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[6]; l ^= s->yp_block[6];
+     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[7]; l ^= s->yp_block[7];
+     h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[8]; l ^= s->yp_block[8];
+    h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[9]; l ^= s->yp_block[9];
+  }
+  SPEED_MACRO_UMUL_PPMM_B;
+  {
+    h = mpn_umul_ppmm (&l, h, l);
+     h = mpn_umul_ppmm (&l, h, l);
+     h = mpn_umul_ppmm (&l, h, l);
+    h = mpn_umul_ppmm (&l, h, l);
+     h = mpn_umul_ppmm (&l, h, l);
+     h = mpn_umul_ppmm (&l, h, l);
+    h = mpn_umul_ppmm (&l, h, l);
+     h = mpn_umul_ppmm (&l, h, l);
+     h = mpn_umul_ppmm (&l, h, l);
+    h = mpn_umul_ppmm (&l, h, l);
+  }
+  SPEED_MACRO_UMUL_PPMM_C;
+}
+#endif
+
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+double
+speed_mpn_umul_ppmm_r (struct speed_params *s)
+{
+  SPEED_MACRO_UMUL_PPMM_A;
+  {
+    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[0]; l ^= s->yp_block[0];
+     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[1]; l ^= s->yp_block[1];
+     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[2]; l ^= s->yp_block[2];
+    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[3]; l ^= s->yp_block[3];
+     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[4]; l ^= s->yp_block[4];
+     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[5]; l ^= s->yp_block[5];
+    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[6]; l ^= s->yp_block[6];
+     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[7]; l ^= s->yp_block[7];
+     h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[8]; l ^= s->yp_block[8];
+    h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[9]; l ^= s->yp_block[9];
+  }
+  SPEED_MACRO_UMUL_PPMM_B;
+  {
+    h = mpn_umul_ppmm_r (h, l, &l);
+     h = mpn_umul_ppmm_r (h, l, &l);
+     h = mpn_umul_ppmm_r (h, l, &l);
+    h = mpn_umul_ppmm_r (h, l, &l);
+     h = mpn_umul_ppmm_r (h, l, &l);
+     h = mpn_umul_ppmm_r (h, l, &l);
+    h = mpn_umul_ppmm_r (h, l, &l);
+     h = mpn_umul_ppmm_r (h, l, &l);
+     h = mpn_umul_ppmm_r (h, l, &l);
+    h = mpn_umul_ppmm_r (h, l, &l);
+  }
+  SPEED_MACRO_UMUL_PPMM_C;
+}
+#endif
+
+
+/* The divisions are successively dependent so latency is measured, not
+   issue rate.  There's only 10 per loop so the code doesn't get too big,
+   especially for udiv_qrnnd_preinv and preinv2norm, which are several
+   instructions each.
+
+   Note that it's only the division which is measured here, there's no data
+   fetching and no shifting if the divisor gets normalized.
+
+   In speed_udiv_qrnnd with gcc 2.95.2 on x86 the parameters "q,r,r,q,d"
+   generate x86 div instructions with nothing in between.
+
+   The measuring function macros are in two parts to avoid overflowing
+   preprocessor expansion space if udiv_qrnnd etc are big.
+
+   Limitations:
+
+   Don't blindly use this to set UDIV_TIME in gmp-mparam.h, check the code
+   generated first.
+
+   CPUs with data-dependent divisions may want more attention paid to the
+   randomness of the data used.  Probably the measurement wanted is over
+   uniformly distributed numbers, but what's here might not be giving that.  */
+
+#define SPEED_ROUTINE_UDIV_QRNND_A(normalize)           \
+  {                                                     \
+    double     t;                                       \
+    unsigned   i;                                       \
+    mp_limb_t  q, r, d;                                 \
+    mp_limb_t  dinv;                                    \
+                                                       \
+    s->time_divisor = 10;                               \
+                                                       \
+    /* divisor from "r" parameter, or a default */      \
+    d = s->r;                                           \
+    if (d == 0)                                         \
+      d = mp_bases[10].big_base;                        \
+                                                       \
+    if (normalize)                                      \
+      {                                                 \
+       unsigned  norm;                                 \
+       count_leading_zeros (norm, d);                  \
+       d <<= norm;                                     \
+       invert_limb (dinv, d);                          \
+      }                                                 \
+                                                       \
+    q = s->xp[0];                                       \
+    r = s->yp[0] % d;                                   \
+                                                       \
+    speed_starttime ();                                 \
+    i = s->reps;                                        \
+    do                                                  \
+      {
+
+#define SPEED_ROUTINE_UDIV_QRNND_B                                      \
+      }                                                                 \
+    while (--i != 0);                                                   \
+    t = speed_endtime ();                                               \
+                                                                       \
+    /* stop the compiler optimizing away the whole calculation! */      \
+    noop_1 (q);                                                         \
+    noop_1 (r);                                                         \
+                                                                       \
+    return t;                                                           \
+  }
+
+double
+speed_udiv_qrnnd (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (UDIV_NEEDS_NORMALIZATION);
+  {
+    udiv_qrnnd (q, r, r, q, d);
+     udiv_qrnnd (q, r, r, q, d);
+     udiv_qrnnd (q, r, r, q, d);
+    udiv_qrnnd (q, r, r, q, d);
+     udiv_qrnnd (q, r, r, q, d);
+     udiv_qrnnd (q, r, r, q, d);
+    udiv_qrnnd (q, r, r, q, d);
+     udiv_qrnnd (q, r, r, q, d);
+     udiv_qrnnd (q, r, r, q, d);
+    udiv_qrnnd (q, r, r, q, d);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
+speed_udiv_qrnnd_preinv1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
+speed_udiv_qrnnd_preinv2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
+speed_udiv_qrnnd_c (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    __udiv_qrnnd_c (q, r, r, q, d);
+     __udiv_qrnnd_c (q, r, r, q, d);
+     __udiv_qrnnd_c (q, r, r, q, d);
+    __udiv_qrnnd_c (q, r, r, q, d);
+     __udiv_qrnnd_c (q, r, r, q, d);
+     __udiv_qrnnd_c (q, r, r, q, d);
+    __udiv_qrnnd_c (q, r, r, q, d);
+     __udiv_qrnnd_c (q, r, r, q, d);
+     __udiv_qrnnd_c (q, r, r, q, d);
+    __udiv_qrnnd_c (q, r, r, q, d);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+double
+speed_mpn_udiv_qrnnd (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    q = mpn_udiv_qrnnd (&r, r, q, d);
+     q = mpn_udiv_qrnnd (&r, r, q, d);
+     q = mpn_udiv_qrnnd (&r, r, q, d);
+    q = mpn_udiv_qrnnd (&r, r, q, d);
+     q = mpn_udiv_qrnnd (&r, r, q, d);
+     q = mpn_udiv_qrnnd (&r, r, q, d);
+    q = mpn_udiv_qrnnd (&r, r, q, d);
+     q = mpn_udiv_qrnnd (&r, r, q, d);
+     q = mpn_udiv_qrnnd (&r, r, q, d);
+    q = mpn_udiv_qrnnd (&r, r, q, d);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+#endif
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+double
+speed_mpn_udiv_qrnnd_r (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    q = mpn_udiv_qrnnd_r (r, q, d, &r);
+     q = mpn_udiv_qrnnd_r (r, q, d, &r);
+     q = mpn_udiv_qrnnd_r (r, q, d, &r);
+    q = mpn_udiv_qrnnd_r (r, q, d, &r);
+     q = mpn_udiv_qrnnd_r (r, q, d, &r);
+     q = mpn_udiv_qrnnd_r (r, q, d, &r);
+    q = mpn_udiv_qrnnd_r (r, q, d, &r);
+     q = mpn_udiv_qrnnd_r (r, q, d, &r);
+     q = mpn_udiv_qrnnd_r (r, q, d, &r);
+    q = mpn_udiv_qrnnd_r (r, q, d, &r);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+#endif
+
+
+double
+speed_invert_limb (struct speed_params *s)
+{
+  SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d));
+}
+
+
+/* xp[0] might not be particularly random, but should give an indication how
+   "/" runs.  Same for speed_operator_mod below.  */
+double
+speed_operator_div (struct speed_params *s)
+{
+  double     t;
+  unsigned   i;
+  mp_limb_t  x, q, d;
+
+  s->time_divisor = 10;
+
+  /* divisor from "r" parameter, or a default */
+  d = s->r;
+  if (d == 0)
+    d = mp_bases[10].big_base;
+
+  x = s->xp[0];
+  q = 0;
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      q ^= x; q /= d;
+       q ^= x; q /= d;
+       q ^= x; q /= d;
+      q ^= x; q /= d;
+       q ^= x; q /= d;
+       q ^= x; q /= d;
+      q ^= x; q /= d;
+       q ^= x; q /= d;
+       q ^= x; q /= d;
+      q ^= x; q /= d;
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  /* stop the compiler optimizing away the whole calculation! */
+  noop_1 (q);
+
+  return t;
+}
+
+double
+speed_operator_mod (struct speed_params *s)
+{
+  double     t;
+  unsigned   i;
+  mp_limb_t  x, r, d;
+
+  s->time_divisor = 10;
+
+  /* divisor from "r" parameter, or a default */
+  d = s->r;
+  if (d == 0)
+    d = mp_bases[10].big_base;
+
+  x = s->xp[0];
+  r = 0;
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      r ^= x; r %= d;
+       r ^= x; r %= d;
+       r ^= x; r %= d;
+      r ^= x; r %= d;
+       r ^= x; r %= d;
+       r ^= x; r %= d;
+      r ^= x; r %= d;
+       r ^= x; r %= d;
+       r ^= x; r %= d;
+      r ^= x; r %= d;
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  /* stop the compiler optimizing away the whole calculation! */
+  noop_1 (r);
+
+  return t;
+}
+
+
+/* r==0 measures on data with the values uniformly distributed.  This will
+   be typical for count_trailing_zeros in a GCD etc.
+
+   r==1 measures on data with the resultant count uniformly distributed
+   between 0 and GMP_LIMB_BITS-1.  This is probably sensible for
+   count_leading_zeros on the high limbs of divisors.  */
+
+int
+speed_routine_count_zeros_setup (struct speed_params *s,
+                                mp_ptr xp, int leading, int zero)
+{
+  int        i, c;
+  mp_limb_t  n;
+
+  if (s->r == 0)
+    {
+      /* Make uniformly distributed data.  If zero isn't allowed then change
+        it to 1 for leading, or 0x800..00 for trailing.  */
+      MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);
+      if (! zero)
+       for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+         if (xp[i] == 0)
+           xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
+    }
+  else if (s->r == 1)
+    {
+      /* Make counts uniformly distributed.  A randomly chosen bit is set, and
+        for leading the rest above it are cleared, or for trailing then the
+        rest below.  */
+      for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+       {
+         mp_limb_t  set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS);
+         mp_limb_t  keep_below = set-1;
+         mp_limb_t  keep_above = MP_LIMB_T_MAX ^ keep_below;
+         mp_limb_t  keep = (leading ? keep_below : keep_above);
+         xp[i] = (s->xp_block[i] & keep) | set;
+       }
+    }
+  else
+    {
+      return 0;
+    }
+
+  /* Account for the effect of n^=c. */
+  c = 0;
+  for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+    {
+      n = xp[i];
+      xp[i] ^= c;
+
+      if (leading)
+       count_leading_zeros (c, n);
+      else
+       count_trailing_zeros (c, n);
+    }
+
+  return 1;
+}
+
+double
+speed_count_leading_zeros (struct speed_params *s)
+{
+#ifdef COUNT_LEADING_ZEROS_0
+#define COUNT_LEADING_ZEROS_0_ALLOWED   1
+#else
+#define COUNT_LEADING_ZEROS_0_ALLOWED   0
+#endif
+
+  SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
+  count_leading_zeros (c, n);
+  SPEED_ROUTINE_COUNT_ZEROS_B ();
+}
+double
+speed_count_trailing_zeros (struct speed_params *s)
+{
+  SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
+  count_trailing_zeros (c, n);
+  SPEED_ROUTINE_COUNT_ZEROS_B ();
+}
+
+
+double
+speed_mpn_get_str (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GET_STR (mpn_get_str);
+}
+
+double
+speed_mpn_set_str (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_set_str (wp, xp, s->size, base));
+}
+double
+speed_mpn_bc_set_str (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_bc_set_str (wp, xp, s->size, base));
+}
+
+double
+speed_MPN_ZERO (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size));
+}
+
+
+int
+speed_randinit (struct speed_params *s, gmp_randstate_ptr rstate)
+{
+  if (s->r == 0)
+    gmp_randinit_default (rstate);
+  else if (s->r == 1)
+    gmp_randinit_mt (rstate);
+  else
+    {
+      return gmp_randinit_lc_2exp_size (rstate, s->r);
+    }
+  return 1;
+}
+
+double
+speed_gmp_randseed (struct speed_params *s)
+{
+  gmp_randstate_t  rstate;
+  unsigned  i;
+  double    t;
+  mpz_t     x;
+
+  SPEED_RESTRICT_COND (s->size >= 1);
+  SPEED_RESTRICT_COND (speed_randinit (s, rstate));
+
+  /* s->size bits of seed */
+  mpz_init_set_n (x, s->xp, s->size);
+  mpz_fdiv_r_2exp (x, x, (unsigned long) s->size);
+
+  /* cache priming */
+  gmp_randseed (rstate, x);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    gmp_randseed (rstate, x);
+  while (--i != 0);
+  t = speed_endtime ();
+
+  gmp_randclear (rstate);
+  mpz_clear (x);
+  return t;
+}
+
+double
+speed_gmp_randseed_ui (struct speed_params *s)
+{
+  gmp_randstate_t  rstate;
+  unsigned  i, j;
+  double    t;
+
+  SPEED_RESTRICT_COND (speed_randinit (s, rstate));
+
+  /* cache priming */
+  gmp_randseed_ui (rstate, 123L);
+
+  speed_starttime ();
+  i = s->reps;
+  j = 0;
+  do
+    {
+      gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]);
+      j++;
+      if (j >= SPEED_BLOCK_SIZE)
+       j = 0;
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  gmp_randclear (rstate);
+  return t;
+}
+
+double
+speed_mpz_urandomb (struct speed_params *s)
+{
+  gmp_randstate_t  rstate;
+  mpz_t     z;
+  unsigned  i;
+  double    t;
+
+  SPEED_RESTRICT_COND (s->size >= 0);
+  SPEED_RESTRICT_COND (speed_randinit (s, rstate));
+
+  mpz_init (z);
+
+  /* cache priming */
+  mpz_urandomb (z, rstate, (unsigned long) s->size);
+  mpz_urandomb (z, rstate, (unsigned long) s->size);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    mpz_urandomb (z, rstate, (unsigned long) s->size);
+  while (--i != 0);
+  t = speed_endtime ();
+
+  mpz_clear (z);
+  gmp_randclear (rstate);
+  return t;
+}
diff --git a/tune/divrem1div.c b/tune/divrem1div.c

new file mode 100644 (file)

index 0000000..5580f80
--- /dev/null
+++ b/tune/divrem1div.c
@@ -0,0 +1,31 @@
+/* mpn/generic/divrem_1.c forced to use plain udiv_qrnnd.
+
+Copyright 2000, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define OPERATION_divrem_1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef DIVREM_1_NORM_THRESHOLD
+#undef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD    MP_SIZE_T_MAX
+#define DIVREM_1_UNNORM_THRESHOLD  MP_SIZE_T_MAX
+#define __gmpn_divrem_1  mpn_divrem_1_div
+
+#include "mpn/generic/divrem_1.c"
diff --git a/tune/divrem1inv.c b/tune/divrem1inv.c

new file mode 100644 (file)

index 0000000..73ed57f
--- /dev/null
+++ b/tune/divrem1inv.c
@@ -0,0 +1,31 @@
+/* mpn/generic/divrem_1.c forced to use mul-by-inverse udiv_qrnnd_preinv.
+
+Copyright 2000, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define OPERATION_divrem_1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef DIVREM_1_NORM_THRESHOLD
+#undef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD    0
+#define DIVREM_1_UNNORM_THRESHOLD  0
+#define __gmpn_divrem_1  mpn_divrem_1_inv
+
+#include "mpn/generic/divrem_1.c"
diff --git a/tune/divrem2div.c b/tune/divrem2div.c

new file mode 100644 (file)

index 0000000..10b50e2
--- /dev/null
+++ b/tune/divrem2div.c
@@ -0,0 +1,30 @@
+/* mpn/generic/divrem_2.c forced to use plain udiv_qrnnd. */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef DIVREM_2_THRESHOLD
+#undef DIVREM_2_THRESHOLD
+#endif
+#define DIVREM_2_THRESHOLD  MP_SIZE_T_MAX
+#define __gmpn_divrem_2     mpn_divrem_2_div
+
+#include "mpn/generic/divrem_2.c"
diff --git a/tune/divrem2inv.c b/tune/divrem2inv.c

new file mode 100644 (file)

index 0000000..05644b2
--- /dev/null
+++ b/tune/divrem2inv.c
@@ -0,0 +1,30 @@
+/* mpn/generic/divrem_2.c forced to use udiv_qrnnd_preinv. */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef DIVREM_2_THRESHOLD
+#undef DIVREM_2_THRESHOLD
+#endif
+#define DIVREM_2_THRESHOLD  0
+#define __gmpn_divrem_2     mpn_divrem_2_inv
+
+#include "mpn/generic/divrem_2.c"
diff --git a/tune/freq.c b/tune/freq.c

new file mode 100644 (file)

index 0000000..f1092e2
--- /dev/null
+++ b/tune/freq.c
@@ -0,0 +1,883 @@
+/* CPU frequency determination.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Currently we don't get a CPU frequency on the following systems,
+
+   alphaev5-cray-unicosmk2.0.6.X
+       times() has been seen at 13.33 ns (75 MHz), which is probably not the
+       cpu frequency.  Measuring the cycle counter against that would be
+       possible though.  But currently we don't use the cycle counter due to
+       unicos having int==8bytes where tune/alpha.asm assumes int==4bytes.
+
+   m68040-unknown-netbsd1.4.1
+       Not sure if the system even knows the cpu frequency.  There's no
+       cycle counter to measure, though we could perhaps make a loop taking
+       a known number of cycles and measure that.
+
+   power-ibm-aix4.2.1.0
+   power2-ibm-aix4.3.1.0
+   powerpc604-ibm-aix4.3.1.0
+   powerpc604-ibm-aix4.3.3.0
+   powerpc630-ibm-aix4.3.3.0
+   powerpc-unknown-netbsd1.6
+       Don't know where any info hides on these.  mftb is not related to the
+       cpu frequency so doesn't help.
+
+   sparc-unknown-linux-gnu [maybe]
+       Don't know where any info hides on this.
+
+   t90-cray-unicos10.0.X
+       The times() call seems to be for instance 2.22 nanoseconds, which
+       might be the cpu frequency (450 mhz), but need to confirm that.
+
+*/
+
+#include "config.h"
+
+#if HAVE_INVENT_H
+#include <invent.h> /* for IRIX invent_cpuinfo_t */
+#endif
+
+#include <stdio.h>
+#include <stdlib.h> /* for getenv, qsort */
+#include <string.h> /* for memcmp */
+
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for sysconf */
+#endif
+
+#include <sys/types.h>
+
+#if HAVE_SYS_ATTRIBUTES_H
+#include <sys/attributes.h>   /* for IRIX attr_get(), needs sys/types.h */
+#endif
+
+#if HAVE_SYS_IOGRAPH_H
+#include <sys/iograph.h>      /* for IRIX INFO_LBL_DETAIL_INVENT */
+#endif
+
+#if HAVE_SYS_PARAM_H     /* for constants needed by NetBSD <sys/sysctl.h> */
+#include <sys/param.h>   /* and needed by HPUX <sys/pstat.h> */
+#endif
+
+#if HAVE_SYS_PSTAT_H
+#include <sys/pstat.h>   /* for HPUX pstat_getprocessor() */
+#endif
+
+#if HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h>  /* for sysctlbyname() */
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>  /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>  /* for struct rusage */
+#endif
+
+#if HAVE_SYS_PROCESSOR_H
+#include <sys/processor.h>  /* for solaris processor_info_t */
+#endif
+
+/* On AIX 5.1 with gcc 2.9-aix51-020209 in -maix64 mode, <sys/sysinfo.h>
+   gets an error about "fill" in "struct cpuinfo" having a negative size,
+   apparently due to __64BIT_KERNEL not being defined because _KERNEL is not
+   defined.  Avoid this file if we don't actually need it, which we don't on
+   AIX since there's no getsysinfo there.  */
+#if HAVE_SYS_SYSINFO_H && HAVE_GETSYSINFO
+#include <sys/sysinfo.h>  /* for OSF getsysinfo */
+#endif
+
+#if HAVE_MACHINE_HAL_SYSINFO_H
+#include <machine/hal_sysinfo.h>  /* for OSF GSI_CPU_INFO, struct cpu_info */
+#endif
+
+/* Remove definitions from NetBSD <sys/param.h>, to avoid conflicts with
+   gmp-impl.h. */
+#ifdef MIN
+#undef MIN
+#endif
+#ifdef MAX
+#undef MAX
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+#define HELP(str)                       \
+  if (help)                             \
+    {                                   \
+      printf ("    - %s\n", str);       \
+      return 0;                         \
+    }
+
+
+/* GMP_CPU_FREQUENCY environment variable.  Should be in Hertz and can be
+   floating point, for example "450e6". */
+static int
+freq_environment (int help)
+{
+  char  *e;
+
+  HELP ("environment variable GMP_CPU_FREQUENCY (in Hertz)");
+
+  e = getenv ("GMP_CPU_FREQUENCY");
+  if (e == NULL)
+    return 0;
+
+  speed_cycletime = 1.0 / atof (e);
+
+  if (speed_option_verbose)
+    printf ("Using GMP_CPU_FREQUENCY %.2f for cycle time %.3g\n",
+            atof (e), speed_cycletime);
+
+  return 1;
+}
+
+
+/* getsysinfo is available on OSF, or 4.0 and up at least.
+   The man page (on 4.0) suggests a 0 return indicates information not
+   available, but that seems to be the normal return for GSI_CPU_INFO.  */
+static int
+freq_getsysinfo (int help)
+{
+#if HAVE_GETSYSINFO
+  struct cpu_info  c;
+  int              start;
+
+  HELP ("getsysinfo() GSI_CPU_INFO");
+
+  start = 0;
+  if (getsysinfo (GSI_CPU_INFO, (caddr_t) &c, sizeof (c),
+                  &start, NULL, NULL) != -1)
+    {
+      speed_cycletime = 1e-6 / (double) c.mhz;
+      if (speed_option_verbose)
+        printf ("Using getsysinfo() GSI_CPU_INFO %u for cycle time %.3g\n",
+                c.mhz, speed_cycletime);
+      return 1;
+    }
+#endif
+  return 0;
+}
+
+
+/* In HPUX 10 and up, pstat_getprocessor() psp_iticksperclktick is the
+   number of CPU cycles (ie. the CR16 register) per CLK_TCK.  HPUX 9 doesn't
+   have that field in pst_processor though, and has no apparent
+   equivalent.  */
+
+static int
+freq_pstat_getprocessor (int help)
+{
+#if HAVE_PSTAT_GETPROCESSOR && HAVE_PSP_ITICKSPERCLKTICK
+  struct pst_processor  p;
+
+  HELP ("pstat_getprocessor() psp_iticksperclktick");
+
+  if (pstat_getprocessor (&p, sizeof(p), 1, 0) != -1)
+    {
+      long  c = clk_tck();
+      speed_cycletime = 1.0 / (c * p.psp_iticksperclktick);
+      if (speed_option_verbose)
+        printf ("Using pstat_getprocessor() psp_iticksperclktick %lu and clk_tck %ld for cycle time %.3g\n",
+                (unsigned long) p.psp_iticksperclktick, c,
+                speed_cycletime);
+      return 1;
+    }
+#endif
+  return 0;
+}
+
+
+/* i386 FreeBSD 2.2.8 sysctlbyname machdep.i586_freq is in Hertz.
+   There's no obvious defines available to get this from plain sysctl.  */
+static int
+freq_sysctlbyname_i586_freq (int help)
+{
+#if HAVE_SYSCTLBYNAME
+  unsigned  val;
+  size_t    size;
+
+  HELP ("sysctlbyname() machdep.i586_freq");
+
+  size = sizeof(val);
+  if (sysctlbyname ("machdep.i586_freq", &val, &size, NULL, 0) == 0
+      && size == sizeof(val))
+    {
+      speed_cycletime = 1.0 / (double) val;
+      if (speed_option_verbose)
+        printf ("Using sysctlbyname() machdep.i586_freq %u for cycle time %.3g\n",
+                val, speed_cycletime);
+      return 1;
+    }
+#endif
+  return 0;
+}
+
+
+/* i368 FreeBSD 3.3 sysctlbyname machdep.tsc_freq is in Hertz.
+   There's no obvious defines to get this from plain sysctl.  */
+
+static int
+freq_sysctlbyname_tsc_freq (int help)
+{
+#if HAVE_SYSCTLBYNAME
+  unsigned  val;
+  size_t    size;
+
+  HELP ("sysctlbyname() machdep.tsc_freq");
+
+  size = sizeof(val);
+  if (sysctlbyname ("machdep.tsc_freq", &val, &size, NULL, 0) == 0
+      && size == sizeof(val))
+    {
+      speed_cycletime = 1.0 / (double) val;
+      if (speed_option_verbose)
+        printf ("Using sysctlbyname() machdep.tsc_freq %u for cycle time %.3g\n",
+                val, speed_cycletime);
+      return 1;
+    }
+#endif
+  return 0;
+}
+
+
+/* Apple powerpc Darwin 1.3 sysctl hw.cpufrequency is in hertz.  For some
+   reason only seems to be available from sysctl(), not sysctlbyname().  */
+
+static int
+freq_sysctl_hw_cpufrequency (int help)
+{
+#if HAVE_SYSCTL && defined (CTL_HW) && defined (HW_CPU_FREQ)
+  int       mib[2];
+  unsigned  val;
+  size_t    size;
+
+  HELP ("sysctl() hw.cpufrequency");
+
+  mib[0] = CTL_HW;
+  mib[1] = HW_CPU_FREQ;
+  size = sizeof(val);
+  if (sysctl (mib, 2, &val, &size, NULL, 0) == 0)
+    {
+      speed_cycletime = 1.0 / (double) val;
+      if (speed_option_verbose)
+        printf ("Using sysctl() hw.cpufrequency %u for cycle time %.3g\n",
+                val, speed_cycletime);
+      return 1;
+    }
+#endif
+  return 0;
+}
+
+
+/* The following ssyctl hw.model strings have been observed,
+
+       Alpha FreeBSD 4.1:   Digital AlphaPC 164LX 599 MHz
+       NetBSD 1.4:          Digital AlphaPC 164LX 599 MHz
+       NetBSD 1.6.1:        CY7C601 @ 40 MHz, TMS390C602A FPU
+
+   NetBSD 1.4 doesn't seem to have sysctlbyname, so sysctl() is used.  */
+
+static int
+freq_sysctl_hw_model (int help)
+{
+#if HAVE_SYSCTL && defined (CTL_HW) && defined (HW_MODEL)
+  int       mib[2];
+  char      str[128];
+  unsigned  val;
+  size_t    size;
+  char      *p;
+  int       end;
+
+  HELP ("sysctl() hw.model");
+
+  mib[0] = CTL_HW;
+  mib[1] = HW_MODEL;
+  size = sizeof(str);
+  if (sysctl (mib, 2, str, &size, NULL, 0) == 0)
+    {
+      for (p = str; *p != '\0'; p++)
+        {
+          end = 0;
+          if (sscanf (p, "%u MHz%n", &val, &end) == 1 && end != 0)
+            {
+              speed_cycletime = 1e-6 / (double) val;
+              if (speed_option_verbose)
+                printf ("Using sysctl() hw.model %u for cycle time %.3g\n",
+                        val, speed_cycletime);
+              return 1;
+            }
+        }
+    }
+#endif
+  return 0;
+}
+
+
+/* /proc/cpuinfo for linux kernel.
+
+   Linux doesn't seem to have any system call to get the CPU frequency, at
+   least not in 2.0.x or 2.2.x, so it's necessary to read /proc/cpuinfo.
+
+   i386 2.0.36 - "bogomips" is the CPU frequency.
+
+   i386 2.2.13 - has both "cpu MHz" and "bogomips", and it's "cpu MHz" which
+                 is the frequency.
+
+   alpha 2.2.5 - "cycle frequency [Hz]" seems to be right, "BogoMIPS" is
+                 very slightly different.
+
+   alpha 2.2.18pre21 - "cycle frequency [Hz]" is 0 on at least one system,
+                 "BogoMIPS" seems near enough.
+
+   powerpc 2.2.19 - "clock" is the frequency, bogomips is something weird
+  */
+
+static int
+freq_proc_cpuinfo (int help)
+{
+  FILE    *fp;
+  char    buf[128];
+  double  val;
+  int     ret = 0;
+  int     end;
+
+  HELP ("linux kernel /proc/cpuinfo file, cpu MHz or bogomips");
+
+  if ((fp = fopen ("/proc/cpuinfo", "r")) != NULL)
+    {
+      while (fgets (buf, sizeof (buf), fp) != NULL)
+        {
+          if (sscanf (buf, "cycle frequency [Hz]    : %lf", &val) == 1
+              && val != 0.0)
+            {
+              speed_cycletime = 1.0 / val;
+              if (speed_option_verbose)
+                printf ("Using /proc/cpuinfo \"cycle frequency\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+          if (sscanf (buf, "cpu MHz : %lf\n", &val) == 1)
+            {
+              speed_cycletime = 1e-6 / val;
+              if (speed_option_verbose)
+                printf ("Using /proc/cpuinfo \"cpu MHz\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+          end = 0;
+          if (sscanf (buf, "clock : %lfMHz\n%n", &val, &end) == 1 && end != 0)
+            {
+              speed_cycletime = 1e-6 / val;
+              if (speed_option_verbose)
+                printf ("Using /proc/cpuinfo \"clock\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+          if (sscanf (buf, "bogomips : %lf\n", &val) == 1
+              || sscanf (buf, "BogoMIPS : %lf\n", &val) == 1)
+            {
+              speed_cycletime = 1e-6 / val;
+              if (speed_option_verbose)
+                printf ("Using /proc/cpuinfo \"bogomips\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+        }
+      fclose (fp);
+    }
+  return ret;
+}
+
+
+/* /bin/sysinfo for SunOS 4.
+   Prints a line like: cpu0 is a "75 MHz TI,TMS390Z55" CPU */
+static int
+freq_sunos_sysinfo (int help)
+{
+  int     ret = 0;
+#if HAVE_POPEN
+  FILE    *fp;
+  char    buf[128];
+  double  val;
+  int     end;
+
+  HELP ("SunOS /bin/sysinfo program output, cpu0");
+
+  /* Error messages are sent to /dev/null in case /bin/sysinfo doesn't
+     exist.  The brackets are necessary for some shells. */
+  if ((fp = popen ("(/bin/sysinfo) 2>/dev/null", "r")) != NULL)
+    {
+      while (fgets (buf, sizeof (buf), fp) != NULL)
+        {
+          end = 0;
+          if (sscanf (buf, " cpu0 is a \"%lf MHz%n", &val, &end) == 1
+              && end != 0)
+            {
+              speed_cycletime = 1e-6 / val;
+              if (speed_option_verbose)
+                printf ("Using /bin/sysinfo \"cpu0 MHz\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+        }
+      pclose (fp);
+    }
+#endif
+  return ret;
+}
+
+
+/* "/etc/hw -r cpu" for SCO OpenUnix 8, printing a line like
+       The speed of the CPU is approximately 450Mhz
+ */
+static int
+freq_sco_etchw (int help)
+{
+  int     ret = 0;
+#if HAVE_POPEN
+  FILE    *fp;
+  char    buf[128];
+  double  val;
+  int     end;
+
+  HELP ("SCO /etc/hw program output");
+
+  /* Error messages are sent to /dev/null in case /etc/hw doesn't exist.
+     The brackets are necessary for some shells. */
+  if ((fp = popen ("(/etc/hw -r cpu) 2>/dev/null", "r")) != NULL)
+    {
+      while (fgets (buf, sizeof (buf), fp) != NULL)
+        {
+          end = 0;
+          if (sscanf (buf, " The speed of the CPU is approximately %lfMhz%n",
+                      &val, &end) == 1 && end != 0)
+            {
+              speed_cycletime = 1e-6 / val;
+              if (speed_option_verbose)
+                printf ("Using /etc/hw %.2f MHz, for cycle time %.3g\n",
+                        val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+        }
+      pclose (fp);
+    }
+#endif
+  return ret;
+}
+
+
+/* attr_get("/hw/cpunum/0",INFO_LBL_DETAIL_INVENT) ic_cpu_info.cpufq for
+   IRIX 6.5.  Past versions don't have INFO_LBL_DETAIL_INVENT,
+   invent_cpuinfo_t, or /hw/cpunum/0.
+
+   The same information is available from the "hinv -c processor" command,
+   but it seems better to make a system call where possible. */
+
+static int
+freq_attr_get_invent (int help)
+{
+  int     ret = 0;
+#if HAVE_ATTR_GET && HAVE_INVENT_H && defined (INFO_LBL_DETAIL_INVENT)
+  invent_cpuinfo_t  inv;
+  int               len, val;
+
+  HELP ("attr_get(\"/hw/cpunum/0\") ic_cpu_info.cpufq");
+
+  len = sizeof (inv);
+  if (attr_get ("/hw/cpunum/0", INFO_LBL_DETAIL_INVENT,
+                (char *) &inv, &len, 0) == 0
+      && len == sizeof (inv)
+      && inv.ic_gen.ig_invclass == INV_PROCESSOR)
+    {
+      val = inv.ic_cpu_info.cpufq;
+      speed_cycletime = 1e-6 / val;
+      if (speed_option_verbose)
+        printf ("Using attr_get(\"/hw/cpunum/0\") ic_cpu_info.cpufq %d MHz for cycle time %.3g\n", val, speed_cycletime);
+      ret = 1;
+    }
+#endif
+  return ret;
+}
+
+
+/* FreeBSD on i386 gives a line like the following at bootup, and which can
+   be read back from /var/run/dmesg.boot.
+
+       CPU: AMD Athlon(tm) Processor (755.29-MHz 686-class CPU)
+       CPU: Pentium 4 (1707.56-MHz 686-class CPU)
+       CPU: i486 DX4 (486-class CPU)
+
+   This is useful on FreeBSD 4.x, where there's no sysctl machdep.tsc_freq
+   or machdep.i586_freq.
+
+   It's better to use /var/run/dmesg.boot than to run /sbin/dmesg, since the
+   latter prints the current system message buffer, which is a limited size
+   and can wrap around if the system is up for a long time.  */
+
+static int
+freq_bsd_dmesg (int help)
+{
+  FILE    *fp;
+  char    buf[256], *p;
+  double  val;
+  int     ret = 0;
+  int     end;
+
+  HELP ("BSD /var/run/dmesg.boot file");
+
+  if ((fp = fopen ("/var/run/dmesg.boot", "r")) != NULL)
+    {
+      while (fgets (buf, sizeof (buf), fp) != NULL)
+        {
+          if (memcmp (buf, "CPU:", 4) == 0)
+            {
+              for (p = buf; *p != '\0'; p++)
+                {
+                  end = 0;
+                  if (sscanf (p, "(%lf-MHz%n", &val, &end) == 1 && end != 0)
+                    {
+                      speed_cycletime = 1e-6 / val;
+                      if (speed_option_verbose)
+                        printf ("Using /var/run/dmesg.boot CPU: %.2f MHz for cycle time %.3g\n", val, speed_cycletime);
+                      ret = 1;
+                      break;
+                    }
+                }
+            }
+        }
+      fclose (fp);
+    }
+  return ret;
+}
+
+
+/* "hinv -c processor" for IRIX.  The following lines have been seen,
+
+              1 150 MHZ IP20 Processor
+              2 195 MHZ IP27 Processors
+              Processor 0: 500 MHZ IP35
+
+   This information is available from attr_get() on IRIX 6.5 (see above),
+   but on IRIX 6.2 it's not clear where to look, so fall back on
+   parsing.  */
+
+static int
+freq_irix_hinv (int help)
+{
+  int     ret = 0;
+#if HAVE_POPEN
+  FILE    *fp;
+  char    buf[128];
+  double  val;
+  int     nproc, end;
+
+  HELP ("IRIX \"hinv -c processor\" output");
+
+  /* Error messages are sent to /dev/null in case hinv doesn't exist.  The
+     brackets are necessary for some shells. */
+  if ((fp = popen ("(hinv -c processor) 2>/dev/null", "r")) != NULL)
+    {
+      while (fgets (buf, sizeof (buf), fp) != NULL)
+        {
+          end = 0;
+          if (sscanf (buf, "Processor 0: %lf MHZ%n", &val, &end) == 1
+              && end != 0)
+            {
+            found:
+              speed_cycletime = 1e-6 / val;
+              if (speed_option_verbose)
+                printf ("Using hinv -c processor \"%.2f MHZ\" for cycle time %.3g\n", val, speed_cycletime);
+              ret = 1;
+              break;
+            }
+          end = 0;
+          if (sscanf (buf, "%d %lf MHZ%n", &nproc, &val, &end) == 2
+              && end != 0)
+            goto found;
+        }
+      pclose (fp);
+    }
+#endif
+  return ret;
+}
+
+
+/* processor_info() for Solaris.  "psrinfo" is the command-line interface to
+   this.  "prtconf -vp" gives similar information.
+
+   Apple Darwin has a processor_info, but in an incompatible style.  It
+   doesn't have <sys/processor.h>, so test for that.  */
+
+static int
+freq_processor_info (int help)
+{
+#if HAVE_PROCESSOR_INFO && HAVE_SYS_PROCESSOR_H
+  processor_info_t  p;
+  int  i, n, mhz = 0;
+
+  HELP ("processor_info() pi_clock");
+
+  n = sysconf (_SC_NPROCESSORS_CONF);
+  for (i = 0; i < n; i++)
+    {
+      if (processor_info (i, &p) != 0)
+        continue;
+      if (p.pi_state != P_ONLINE)
+        continue;
+
+      if (mhz != 0 && p.pi_clock != mhz)
+        {
+          fprintf (stderr,
+                   "freq_processor_info(): There's more than one CPU and they have different clock speeds\n");
+          return 0;
+        }
+
+      mhz = p.pi_clock;
+    }
+
+  speed_cycletime = 1.0e-6 / (double) mhz;
+
+  if (speed_option_verbose)
+    printf ("Using processor_info() %d mhz for cycle time %.3g\n",
+            mhz, speed_cycletime);
+  return 1;
+
+#else
+  return 0;
+#endif
+}
+
+
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETTIMEOFDAY
+static double
+freq_measure_gettimeofday_one (void)
+{
+#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t)      ((t).tv_sec)
+#define timeval_tv_usec(t)     ((t).tv_usec)
+  FREQ_MEASURE_ONE ("gettimeofday", struct timeval,
+                    call_gettimeofday, speed_cyclecounter,
+                    timeval_tv_sec, timeval_tv_usec);
+}
+#endif
+
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETRUSAGE
+static double
+freq_measure_getrusage_one (void)
+{
+#define call_getrusage(t)   getrusage (0, &(t))
+#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
+#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
+  FREQ_MEASURE_ONE ("getrusage", struct rusage,
+                    call_getrusage, speed_cyclecounter,
+                    rusage_tv_sec, rusage_tv_usec);
+}
+#endif
+
+
+/* MEASURE_MATCH is how many readings within MEASURE_TOLERANCE of each other
+   are required.  This must be at least 2.  */
+#define MEASURE_MAX_ATTEMPTS   20
+#define MEASURE_TOLERANCE      1.005  /* 0.5% */
+#define MEASURE_MATCH          3
+
+double
+freq_measure (const char *name, double (*one) (void))
+{
+  double  t[MEASURE_MAX_ATTEMPTS];
+  int     i, j;
+
+  for (i = 0; i < numberof (t); i++)
+    {
+      t[i] = (*one) ();
+
+      qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
+      if (speed_option_verbose >= 3)
+        for (j = 0; j <= i; j++)
+          printf ("   t[%d] is %.6g\n", j, t[j]);
+
+      for (j = 0; j+MEASURE_MATCH-1 <= i; j++)
+        {
+          if (t[j+MEASURE_MATCH-1] <= t[j] * MEASURE_TOLERANCE)
+            {
+              /* use the average of the range found */
+                return (t[j+MEASURE_MATCH-1] + t[j]) / 2.0;
+            }
+        }
+    }
+  return -1.0;
+}
+
+static int
+freq_measure_getrusage (int help)
+{
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETRUSAGE
+  double  cycletime;
+
+  if (! getrusage_microseconds_p ())
+    return 0;
+  if (! cycles_works_p ())
+    return 0;
+
+  HELP ("cycle counter measured with microsecond getrusage()");
+
+  cycletime = freq_measure ("getrusage", freq_measure_getrusage_one);
+  if (cycletime == -1.0)
+    return 0;
+
+  speed_cycletime = cycletime;
+  if (speed_option_verbose)
+    printf ("Using getrusage() measured cycle counter %.4g (%.2f MHz)\n",
+            speed_cycletime, 1e-6/speed_cycletime);
+  return 1;
+
+#else
+  return 0;
+#endif
+}
+
+static int
+freq_measure_gettimeofday (int help)
+{
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETTIMEOFDAY
+  double  cycletime;
+
+  if (! gettimeofday_microseconds_p ())
+    return 0;
+  if (! cycles_works_p ())
+    return 0;
+
+  HELP ("cycle counter measured with microsecond gettimeofday()");
+
+  cycletime = freq_measure ("gettimeofday", freq_measure_gettimeofday_one);
+  if (cycletime == -1.0)
+    return 0;
+
+  speed_cycletime = cycletime;
+  if (speed_option_verbose)
+    printf ("Using gettimeofday() measured cycle counter %.4g (%.2f MHz)\n",
+            speed_cycletime, 1e-6/speed_cycletime);
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+
+/* Each function returns 1 if it succeeds in setting speed_cycletime, or 0
+   if not.
+
+   In general system call tests are first since they're fast, then file
+   tests, then tests running programs.  Necessary exceptions to this rule
+   are noted.  The measuring is last since it's time consuming, and rather
+   wasteful of cpu.  */
+
+static int
+freq_all (int help)
+{
+  return
+    /* This should be first, so an environment variable can override
+       anything the system gives. */
+    freq_environment (help)
+
+    || freq_attr_get_invent (help)
+    || freq_getsysinfo (help)
+    || freq_pstat_getprocessor (help)
+    || freq_sysctl_hw_model (help)
+    || freq_sysctl_hw_cpufrequency (help)
+    || freq_sysctlbyname_i586_freq (help)
+    || freq_sysctlbyname_tsc_freq (help)
+
+    /* SCO openunix 8 puts a dummy pi_clock==16 in processor_info, so be
+       sure to check /etc/hw before that function. */
+    || freq_sco_etchw (help)
+
+    || freq_processor_info (help)
+    || freq_proc_cpuinfo (help)
+    || freq_bsd_dmesg (help)
+    || freq_irix_hinv (help)
+    || freq_sunos_sysinfo (help)
+    || freq_measure_getrusage (help)
+    || freq_measure_gettimeofday (help);
+}
+
+
+void
+speed_cycletime_init (void)
+{
+  static int  attempted = 0;
+
+  if (attempted)
+    return;
+  attempted = 1;
+
+  if (freq_all (0))
+    return;
+
+  if (speed_option_verbose)
+    printf ("CPU frequency couldn't be determined\n");
+}
+
+
+void
+speed_cycletime_fail (const char *str)
+{
+  fprintf (stderr, "Measuring with: %s\n", speed_time_string);
+  fprintf (stderr, "%s,\n", str);
+  fprintf (stderr, "but none of the following are available,\n");
+  freq_all (1);
+  abort ();
+}
+
+/* speed_time_init leaves speed_cycletime set to either 0.0 or 1.0 when the
+   CPU frequency is unknown.  0.0 is when the time base is in seconds, so
+   that's no good if cycles are wanted.  1.0 is when the time base is in
+   cycles, which conversely is no good if seconds are wanted.  */
+void
+speed_cycletime_need_cycles (void)
+{
+  speed_time_init ();
+  if (speed_cycletime == 0.0)
+    speed_cycletime_fail
+      ("Need to know CPU frequency to give times in cycles");
+}
+void
+speed_cycletime_need_seconds (void)
+{
+  speed_time_init ();
+  if (speed_cycletime == 1.0)
+    speed_cycletime_fail
+      ("Need to know CPU frequency to convert cycles to seconds");
+}
diff --git a/tune/gcdext_double.c b/tune/gcdext_double.c

new file mode 100644 (file)

index 0000000..5470f1a
--- /dev/null
+++ b/tune/gcdext_double.c
@@ -0,0 +1,28 @@
+/* mpn/generic/gcdext.c forced to use double limb calculations. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD  0
+#define __gmpn_gcdext  mpn_gcdext_double
+
+#include "../mpn/generic/gcdext.c"
diff --git a/tune/gcdext_single.c b/tune/gcdext_single.c

new file mode 100644 (file)

index 0000000..1bc47e7
--- /dev/null
+++ b/tune/gcdext_single.c
@@ -0,0 +1,28 @@
+/* mpn/generic/gcdext.c forced to use single limb calculations. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD  MP_SIZE_T_MAX
+#define __gmpn_gcdext  mpn_gcdext_single
+
+#include "../mpn/generic/gcdext.c"
diff --git a/tune/gcdextod.c b/tune/gcdextod.c

new file mode 100644 (file)

index 0000000..957864c
--- /dev/null
+++ b/tune/gcdextod.c
@@ -0,0 +1,29 @@
+/* mpn/generic/gcdext.c forced to one double limb step. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD  0
+#define WANT_GCDEXT_ONE_STEP 1
+#define __gmpn_gcdext  mpn_gcdext_one_double
+
+#include "../mpn/generic/gcdext.c"
diff --git a/tune/gcdextos.c b/tune/gcdextos.c

new file mode 100644 (file)

index 0000000..afde776
--- /dev/null
+++ b/tune/gcdextos.c
@@ -0,0 +1,29 @@
+/* mpn/generic/gcdext.c forced to one single limb step. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD  MP_SIZE_T_MAX
+#define WANT_GCDEXT_ONE_STEP 1
+#define __gmpn_gcdext  mpn_gcdext_one_single
+
+#include "../mpn/generic/gcdext.c"
diff --git a/tune/hppa.asm b/tune/hppa.asm

new file mode 100644 (file)

index 0000000..e99a399
--- /dev/null
+++ b/tune/hppa.asm
@@ -0,0 +1,31 @@
+dnl  HPPA 32-bit time stamp counter access routine.
+
+dnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl void speed_cyclecounter (unsigned p[2]);
+dnl
+dnl Get the HPPA interval timer.
+
+PROLOGUE(speed_cyclecounter)
+       mfctl   %cr16,%r28
+       stw     %r28,0(0,%r26)
+       bv      0(%r2)
+       stw     %r0,4(0,%r26)
+EPILOGUE(speed_cyclecounter)
diff --git a/tune/hppa2.asm b/tune/hppa2.asm

new file mode 100644 (file)

index 0000000..9755c90
--- /dev/null
+++ b/tune/hppa2.asm
@@ -0,0 +1,33 @@
+dnl  HPPA 64-bit time stamp counter access routine.
+
+dnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl void speed_cyclecounter (unsigned p[2]);
+dnl
+dnl Get the HPPA interval timer.
+
+       .level 2.0
+PROLOGUE(speed_cyclecounter)
+       mfctl   %cr16,%r28
+       stw     %r28,0(0,%r26)          ; low word
+       extrd,u %r28,31,32,%r28
+       bve     (%r2)
+       stw     %r28,4(0,%r26)          ; high word
+EPILOGUE(speed_cyclecounter)
diff --git a/tune/hppa2w.asm b/tune/hppa2w.asm

new file mode 100644 (file)

index 0000000..ddf0ea9
--- /dev/null
+++ b/tune/hppa2w.asm
@@ -0,0 +1,33 @@
+dnl  HPPA 64-bit time stamp counter access routine.
+
+dnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl void speed_cyclecounter (unsigned p[2]);
+dnl
+dnl Get the HPPA interval timer.
+
+       .level 2.0w
+PROLOGUE(speed_cyclecounter)
+       mfctl   %cr16,%r28
+       stw     %r28,0(0,%r26)          ; low word
+       extrd,u %r28,31,32,%r28
+       bve     (%r2)
+       stw     %r28,4(0,%r26)          ; high word
+EPILOGUE(speed_cyclecounter)
diff --git a/tune/ia64.asm b/tune/ia64.asm

new file mode 100644 (file)

index 0000000..ef487db
--- /dev/null
+++ b/tune/ia64.asm
@@ -0,0 +1,36 @@
+dnl  IA-64 time stamp counter access routine.
+
+dnl  Copyright 2000, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned int p[2]);
+C
+
+ASM_START()
+PROLOGUE(speed_cyclecounter)
+       mov     r14 = ar.itc
+       ;;
+       st4     [r32] = r14, 4
+       shr.u   r14 = r14, 32
+       ;;
+       st4     [r32] = r14
+       br.ret.sptk.many b0
+EPILOGUE(speed_cyclecounter)
+ASM_END()
diff --git a/tune/jacbase1.c b/tune/jacbase1.c

new file mode 100644 (file)

index 0000000..2a0b859
--- /dev/null
+++ b/tune/jacbase1.c
@@ -0,0 +1,27 @@
+/* mpn/generic/jacbase.c method 1.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 1
+#define __gmpn_jacobi_base mpn_jacobi_base_1
+
+#include "mpn/generic/jacbase.c"
diff --git a/tune/jacbase2.c b/tune/jacbase2.c

new file mode 100644 (file)

index 0000000..6bbe7e9
--- /dev/null
+++ b/tune/jacbase2.c
@@ -0,0 +1,27 @@
+/* mpn/generic/jacbase.c method 2.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 2
+#define __gmpn_jacobi_base mpn_jacobi_base_2
+
+#include "mpn/generic/jacbase.c"
diff --git a/tune/jacbase3.c b/tune/jacbase3.c

new file mode 100644 (file)

index 0000000..f8f89d4
--- /dev/null
+++ b/tune/jacbase3.c
@@ -0,0 +1,27 @@
+/* mpn/generic/jacbase.c method 3.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 3
+#define __gmpn_jacobi_base mpn_jacobi_base_3
+
+#include "mpn/generic/jacbase.c"
diff --git a/tune/many.pl b/tune/many.pl

new file mode 100644 (file)

index 0000000..5c5648a
--- /dev/null
+++ b/tune/many.pl
@@ -0,0 +1,1323 @@
+#! /usr/bin/perl -w
+
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# Usage:  cd $builddir/tune
+#        perl $srcdir/tune/many.pl [-t] <files/dirs>...
+#
+# Output: speed-many.c
+#         try-many.c
+#         Makefile.many
+#
+# Make alternate versions of various mpn routines available for measuring
+# and testing.
+#
+# The $srcdir and $builddir in the invocation above just means the script
+# lives in the tune source directory, but should be run in the tune build
+# directory.  When not using a separate object directory this just becomes
+#
+#      cd tune
+#      perl many.pl [-t] <files/dirs>...
+#
+#
+# SINGLE FILES
+#
+# Suppose $HOME/newcode/mul_1_experiment.asm is a new implementation of
+# mpn_mul_1, then
+#
+#      cd $builddir/tune
+#      perl $srcdir/tune/many.pl $HOME/newcode/mul_1_experiment.asm
+#
+# will produce rules and renaming so that a speed program incorporating it
+# can be built,
+#
+#      make -f Makefile.many speed-many
+#
+# then for example it can be compared to the standard mul_1,
+#
+#      ./speed-many -s 1-30 mpn_mul_1 mpn_mul_1_experiment
+#
+# An expanded try program can be used to check correctness,
+#
+#      make -f Makefile.many try-many
+#
+# and run
+#
+#      ./try-many mpn_mul_1_experiment
+#
+# Files can be ".c", ".S" or ".asm".  ".s" files can't be used because they
+# don't get any preprocessing so there's no way to do renaming of their
+# functions.
+#
+#
+# WHOLE DIRECTORIES
+#
+# If a directory is given, then all files in it will be made available.
+# For example,
+#
+#      cd $builddir/tune
+#      perl $srcdir/tune/many.pl $HOME/newcode
+#
+# Each file should have a suffix, like "_experiment" above.
+#
+#
+# MPN DIRECTORIES
+#
+# mpn directories from the GMP source tree can be included, and this is a
+# convenient way to compare multiple implementations suiting different chips
+# in a CPU family.  For example the following would make all x86 routines
+# available,
+#
+#      cd $builddir/tune
+#      perl $srcdir/tune/many.pl `find $srcdir/mpn/x86 -type d`
+#
+# On a new x86 chip a comparison could then be made to see how existing code
+# runs.  For example,
+#
+#      make -f Makefile.many speed-many
+#      ./speed-many -s 1-30 -c \
+#              mpn_add_n_x86 mpn_add_n_pentium mpn_add_n_k6 mpn_add_n_k7
+#
+# Files in "mpn" subdirectories don't need the "_experiment" style suffix
+# described above, instead a suffix is constructed from the subdirectory.
+# For example "mpn/x86/k7/mmx/mod_1.asm" will generate a function
+# mpn_mod_1_k7_mmx.  The rule is to take the last directory name after the
+# "mpn", or the last two if there's three or more.  (Check the generated
+# speed-many.c if in doubt.)
+#
+#
+# GENERIC C
+#
+# The mpn/generic directory can be included too, just like any processor
+# specific directory.  This is a good way to compare assembler and generic C
+# implementations.  For example,
+#
+#      cd $builddir/tune
+#      perl $srcdir/tune/many.pl $srcdir/mpn/generic
+#
+# or if just a few routines are of interest, then for example
+#
+#      cd $builddir/tune
+#      perl $srcdir/tune/many.pl \
+#              $srcdir/mpn/generic/lshift.c \
+#              $srcdir/mpn/generic/mod_1.c \
+#              $srcdir/mpn/generic/aorsmul_1.c
+#
+# giving mpn_lshift_generic etc.
+#
+#
+# TESTS/DEVEL PROGRAMS
+#
+# Makefile.many also has rules to build the tests/devel programs with suitable
+# renaming, and with some parameters for correctness or speed.  This is less
+# convenient than the speed and try programs, but provides an independent
+# check.  For example,
+#
+#      make -f Makefile.many tests_mul_1_experimental
+#      ./tests_mul_1_experimental
+#
+# and for speed
+#
+#      make -f Makefile.many tests_mul_1_experimental_sp
+#      ./tests_mul_1_experimental_sp
+#
+# Not all the programs support speed measuring, in which case only the
+# correctness test will be useful.
+#
+# The parameters for repetitions and host clock speed are -D defines.  Some
+# defaults are provided at the end of Makefile.many, but probably these will
+# want to be overridden.  For example,
+#
+#      rm tests_mul_1_experimental.o
+#      make -f Makefile.many \
+#         CFLAGS_TESTS="-DSIZE=50 -DTIMES=1000 -DRANDOM -DCLOCK=175000000" \
+#         tests_mul_1_experimental
+#      ./tests_mul_1_experimental
+#
+#
+# OTHER NOTES
+#
+# The mappings of file names to functions, and the macros to then use for
+# speed measuring etc are driven by @table below.  The scheme isn't
+# completely general, it's only got as many variations as have been needed
+# so far.
+#
+# Some functions are only made available in speed-many, or others only in
+# try-many.  An @table entry speed=>none means no speed measuring is
+# available, or try=>none no try program testing.  These can be removed
+# if/when the respective programs get the necessary support.
+#
+# If a file has "1c" or "nc" carry-in entrypoints, they're renamed and made
+# available too.  These are recognised from PROLOGUE or MULFUNC_PROLOGUE in
+# .S and .asm files, or from a line starting with "mpn_foo_1c" in a .c file
+# (possibly via a #define), and on that basis are entirely optional.  This
+# entrypoint matching is done for the standard entrypoints too, but it would
+# be very unusual to have for instance a mul_1c without a mul_1.
+#
+# Some mpz files are recognized.  For example an experimental copy of
+# mpz/powm.c could be included as powm_new.c and would be called
+# mpz_powm_new.  So far only speed measuring is available for these.
+#
+# For the ".S" and ".asm" files, both PIC and non-PIC objects are built.
+# The PIC functions have a "_pic" suffix, for example "mpn_mod_1_k7_mmx_pic".
+# This can be ignored for routines that don't differ for PIC, or for CPUs
+# where everything is PIC anyway.
+#
+# K&R compilers are supported via the same ansi2knr mechanism used by
+# automake, though it's hard to believe anyone will have much interest in
+# measuring a compiler so old that it doesn't even have an ANSI mode.
+#
+# The "-t" option can be used to print a trace of the files found and what's
+# done with them.  A great deal of obscure output is produced, but it can
+# indicate where or why some files aren't being recognised etc.  For
+# example,
+#
+#      cd $builddir/tune
+#      perl $srcdir/tune/many.pl -t $HOME/newcode/add_n_weird.asm
+#
+# In general, when including new code, all that's really necessary is that
+# it will compile or assemble under the current configuration.  It's fine if
+# some code doesn't actually run due to bugs, or to needing a newer CPU or
+# whatever, simply don't ask for the offending routines when invoking
+# speed-many or try-many, or don't try to run them on sizes they don't yet
+# support, or whatever.
+#
+#
+# CPU SPECIFICS
+#
+# x86 - All the x86 code will assemble on any system, but code for newer
+#       chips might not run on older chips.  Expect SIGILLs from new
+#       instructions on old chips.
+#
+#       A few "new" instructions, like cmov for instance, are done as macros
+#       and will generate some equivalent plain i386 code when HAVE_HOST_CPU
+#       in config.m4 indicates an old CPU.  It won't run fast, but it does
+#       make it possible to test correctness.
+#
+#
+# INTERNALS
+#
+# The nonsense involving $ENV is some hooks used during development to add
+# additional functions temporarily.
+#
+#
+# FUTURE
+#
+# Maybe the C files should be compiled pic and non-pic too.  Wait until
+# there's a difference that might be of interest.
+#
+# Warn if a file provides no functions.
+#
+# Allow mpz and mpn files of the same name.  Currently the mpn fib2_ui
+# matching hides the mpz version of that.  Will need to check the file
+# contents to see which it is.  Would be worth allowing an "mpz_" or "mpn_"
+# prefix on the filenames to have working versions of both in one directory.
+#
+#
+# LIMITATIONS
+#
+# Some of the command lines can become very long when a lot of files are
+# included.  If this is a problem on a given system the only suggestion is
+# to run many.pl for just those that are actually wanted at a particular
+# time.
+#
+# DOS 8.3 or SysV 14 char filesystems won't work, since the long filenames
+# generated will almost certainly fail to be unique.
+
+
+use strict;
+use File::Basename;
+use Getopt::Std;
+
+my %opt;
+getopts('t', \%opt);
+
+my @DIRECTORIES = @ARGV;
+if (defined $ENV{directories}) { push @DIRECTORIES, @{$ENV{directories}} }
+
+
+# regexp - matched against the start of the filename.  If a grouping "(...)"
+#          is present then only the first such part is used.
+#
+# mulfunc - filenames to be generated from a multi-function file.
+#
+# funs - functions provided by the file, defaulting to the filename with mpn
+#          (or mpX).
+#
+# mpX - prefix like "mpz", defaulting to "mpn".
+#
+# ret - return value type.
+#
+# args, args_<fun> - arguments for the given function.  If an args_<fun> is
+#          set then it's used, otherwise plain args is used.  "mp_limb_t
+#          carry" is appended for carry-in variants.
+#
+# try - try.c TYPE_ to use, defaulting to TYPE_fun with the function name
+#          in upper case.  "C" is appended for carry-in variants.  Can be
+#          'none' for no try program entry.
+#
+# speed - SPEED_ROUTINE_ to use, handled like "try".
+#
+# speed_flags - SPEED_ROUTINE_ to use, handled like "try".
+
+
+my @table =
+    (
+     {
+       'regexp'=> 'add_n|sub_n|addlsh1_n|sublsh1_n|rsh1add_n|rsh1sub_n',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+       'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+     },
+     {
+       'regexp'=> 'aors_n',
+       'mulfunc'=> ['add_n','sub_n'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+       'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+     },
+
+     {
+       'regexp'=> 'addmul_1|submul_1',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+       'speed_flags'=> 'FLAG_R',
+     },
+     {
+       'regexp'=> 'aorsmul_1',
+       'mulfunc'=> ['addmul_1','submul_1'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+       'speed_flags'=> 'FLAG_R',
+     },
+
+     {
+       'regexp'=> 'addmul_2|submul_2',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 2,
+     },
+     {
+       'regexp'=> 'addmul_3|submul_3',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_3',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 3,
+     },
+     {
+       'regexp'=> 'addmul_4|submul_4',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_4',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 4,
+     },
+     {
+       'regexp'=> 'addmul_5|submul_5',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_5',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 5,
+     },
+     {
+       'regexp'=> 'addmul_6|submul_6',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_6',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 6,
+     },
+     {
+       'regexp'=> 'addmul_7|submul_7',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_7',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 7,
+     },
+     {
+       'regexp'=> 'addmul_8|submul_8',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_8',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try-minsize' => 8,
+     },
+
+     {
+       'regexp'=> 'add_n_sub_n',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+     },
+
+     {
+       'regexp'=> 'com|copyi|copyd',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
+       'speed' => 'SPEED_ROUTINE_MPN_COPY',
+     },
+
+     {
+       'regexp'=> 'dive_1',
+       'funs'  => ['divexact_1'],
+       'ret'   => 'void',
+       'args'  => 'mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor',
+       'speed_flags'=> 'FLAG_R',
+     },
+     {
+       'regexp'=> 'diveby3',
+       'funs'  => ['divexact_by3c'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr dst, mp_srcptr src, mp_size_t size',
+       'carrys'=> [''],
+       'speed' => 'SPEED_ROUTINE_MPN_COPY',
+     },
+
+     # mpn_preinv_divrem_1 is an optional extra entrypoint
+     {
+       'regexp'=> 'divrem_1',
+       'funs'  => ['divrem_1', 'preinv_divrem_1'],
+       'ret'   => 'mp_limb_t',
+       'args_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor',
+       'args_preinv_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift',
+       'speed_flags'=> 'FLAG_R',
+       'speed_suffixes' => ['f'],
+     },
+     {
+       'regexp'=> 'pre_divrem_1',
+       'funs'  => ['preinv_divrem_1'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr qp, mp_size_t qxn, mp_srcptr ap, mp_size_t asize, mp_limb_t divisor, mp_limb_t inverse, int shift',
+       'speed_flags' => 'FLAG_R',
+     },
+
+     {
+       'regexp'=> 'divrem_2',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr qp, mp_size_t qxn, mp_srcptr np, mp_size_t nsize, mp_srcptr dp',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'sb_divrem_mn',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',
+       'speed' => 'SPEED_ROUTINE_MPN_DC_DIVREM_SB',
+       'try-minsize' => 3,
+     },
+     {
+       'regexp'=> 'tdiv_qr',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',
+       'speed' => 'none',
+     },
+
+     {
+       'regexp'=> 'get_str',
+       'ret'   => 'size_t',
+       'args'  => 'unsigned char *str, int base, mp_ptr mptr, mp_size_t msize',
+       'speed_flags' => 'FLAG_R_OPTIONAL',
+       'try'   => 'none',
+     },
+     {
+       'regexp'=> 'set_str',
+       'ret'   => 'mp_size_t',
+       'args'  => 'mp_ptr xp, const unsigned char *str, size_t str_len, int base',
+       'speed_flags' => 'FLAG_R_OPTIONAL',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'fac_ui',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr r, unsigned long n',
+       'speed_flags' => 'FLAG_NODATA',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'fib2_ui',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr fp, mp_ptr f1p, unsigned long n',
+       'rename'=> ['__gmp_fib_table'],
+       'speed_flags' => 'FLAG_NODATA',
+       'try'   => 'none',
+     },
+     {
+       'regexp'=> 'fib_ui',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr fn, unsigned long n',
+       'speed_flags' => 'FLAG_NODATA',
+       'try'   => 'none',
+     },
+     {
+       'regexp'=> 'fib2_ui',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr fn, mpz_ptr fnsub1, unsigned long n',
+       'speed_flags' => 'FLAG_NODATA',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'lucnum_ui',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr ln, unsigned long n',
+       'speed_flags' => 'FLAG_NODATA',
+       'try'   => 'none',
+     },
+     {
+       'regexp'=> 'lucnum2_ui',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr ln, mpz_ptr lnsub1, unsigned long n',
+       'speed_flags' => 'FLAG_NODATA',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'gcd_1',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr xp, mp_size_t xsize, mp_limb_t y',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'speed_suffixes' => ['N'],
+     },
+     {
+       'regexp'=> '(gcd)(?!(_1|ext|_finda))',
+       'ret'   => 'mp_size_t',
+       'args'  => 'mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize',
+     },
+     {
+       'regexp'=> 'gcd_finda',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_srcptr cp',
+     },
+
+
+     {
+       'regexp'=> 'jacobi',
+       'funs'  => ['jacobi', 'legendre', 'kronecker'],
+       'mpX'   => 'mpz',
+       'ret'   => 'int',
+       'args'  => 'mpz_srcptr a, mpz_srcptr b',
+       'try-legendre' => 'TYPE_MPZ_JACOBI',
+     },
+     {
+       'regexp'=> 'jacbase',
+       'funs'  => ['jacobi_base'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_limb_t a, mp_limb_t b, int bit1',
+       'speed' => 'SPEED_ROUTINE_MPN_JACBASE',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'logops_n',
+       'mulfunc'=> ['and_n','andn_n','nand_n','ior_n','iorn_n','nior_n','xor_n','xnor_n'],
+       'ret'   => 'void',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+       'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
+     },
+
+     {
+       'regexp'=> '[lr]shift',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+       'speed_flags'=> 'FLAG_R',
+     },
+
+     # mpn_preinv_mod_1 is an optional extra entrypoint
+     {
+       'regexp'=> '(mod_1)(?!_rs)',
+       'funs'  => ['mod_1','preinv_mod_1'],
+       'ret'   => 'mp_limb_t',
+       'args_mod_1'       => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor',
+       'args_preinv_mod_1'=> 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',
+       'speed_flags'=> 'FLAG_R',
+     },
+     {
+       'regexp'=> 'pre_mod_1',
+       'funs'  => ['preinv_mod_1'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',
+       'speed_flags'=> 'FLAG_R',
+     },
+     {
+       'regexp'=> 'mod_34lsub1',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_srcptr src, mp_size_t len',
+     },
+     {
+       'regexp'=> 'invert_limb',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_limb_t divisor',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try'   => 'none',
+     },
+
+     {
+       # not for use with hppa reversed argument versions of mpn_umul_ppmm
+       'regexp'=> 'udiv',
+       'funs'  => ['udiv_qrnnd','udiv_qrnnd_r'],
+       'ret'   => 'mp_limb_t',
+       'args_udiv_qrnnd'   => 'mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t',
+       'args_udiv_qrnnd_r' => 'mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *',
+       'speed' => 'none',
+       'try-minsize' => 2,
+     },
+
+     {
+       'regexp'=> 'mode1o',
+       'funs'  => ['modexact_1_odd'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_srcptr src, mp_size_t size, mp_limb_t divisor',
+       'speed_flags'=> 'FLAG_R',
+     },
+     {
+       'regexp'=> 'modlinv',
+       'funs'  => ['modlimb_invert'],
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_limb_t v',
+       'carrys'=> [''],
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'mul_1',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+       'speed_flags'=> 'FLAG_R',
+     },
+     {
+       'regexp'=> 'mul_2',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr mult',
+       'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',
+       'speed_flags'=> 'FLAG_R',
+     },
+
+     {
+       'regexp'=> 'mul_basecase',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize',
+       'speed_flags' => 'FLAG_R_OPTIONAL | FLAG_RSIZE',
+     },
+     {
+       'regexp'=> '(mul_n)[_.]',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+       'rename'=> ['kara_mul_n','kara_sqr_n','toom3_mul_n','toom3_sqr_n'],
+     },
+     {
+       'regexp'=> 'umul',
+       'funs'  => ['umul_ppmm','umul_ppmm_r'],
+       'ret'   => 'mp_limb_t',
+       'args_umul_ppmm'   => 'mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2',
+       'args_umul_ppmm_r' => 'mp_limb_t m1, mp_limb_t m2, mp_limb_t *lowptr',
+       'speed' => 'none',
+       'try-minsize' => 3,
+     },
+
+
+     {
+       'regexp'=> 'popham',
+       'mulfunc'=> ['popcount','hamdist'],
+       'ret'   => 'unsigned long',
+       'args_popcount'=> 'mp_srcptr xp, mp_size_t size',
+       'args_hamdist' => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+     },
+     {
+       'regexp'=> 'popcount',
+       'ret'   => 'unsigned long',
+       'args'  => 'mp_srcptr xp, mp_size_t size',
+     },
+     {
+       'regexp'=> 'hamdist',
+       'ret'   => 'unsigned long',
+       'args'  => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+       # extra renaming to support sharing a data table with mpn_popcount
+       'rename'=> ['popcount'],
+     },
+
+     {
+       'regexp'=> 'sqr_basecase',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
+       'speed' => 'SPEED_ROUTINE_MPN_SQR',
+       'try'   => 'TYPE_SQR',
+     },
+     {
+       'regexp'=> 'sqr_diagonal',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'sqrtrem',
+       'ret'   => 'mp_size_t',
+       'args'  => 'mp_ptr root, mp_ptr rem, mp_srcptr src, mp_size_t size',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'cntlz',
+       'funs'  => ['count_leading_zeros'],
+       'ret'   => 'unsigned',
+       'args'  => 'mp_limb_t',
+       'macro-before' => "#undef COUNT_LEADING_ZEROS_0",
+       'macro-speed'  =>
+'#ifdef COUNT_LEADING_ZEROS_0
+#define COUNT_LEADING_ZEROS_0_ALLOWED   1
+#else
+#define COUNT_LEADING_ZEROS_0_ALLOWED   0
+#endif
+  SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
+  $fun (c, n);
+  SPEED_ROUTINE_COUNT_ZEROS_B ()',
+       'speed_flags'=> 'FLAG_R_OPTIONAL',
+       'try'   => 'none',
+     },
+     {
+       'regexp'=> 'cnttz',
+       'funs'  => ['count_trailing_zeros'],
+       'ret'   => 'unsigned',
+       'args'  => 'mp_limb_t',
+       'macro-speed' => '
+  SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
+  $fun (c, n);
+  SPEED_ROUTINE_COUNT_ZEROS_B ()',
+       'speed_flags' => 'FLAG_R_OPTIONAL',
+       'try'   => 'none',
+     },
+
+     {
+       'regexp'=> 'zero',
+       'ret'   => 'void',
+       'args'  => 'mp_ptr ptr, mp_size_t size',
+     },
+
+     {
+       'regexp'=> '(powm)(?!_ui)',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m',
+       'try'   => 'none',
+     },
+     {
+       'regexp'=> 'powm_ui',
+       'mpX'   => 'mpz',
+       'ret'   => 'void',
+       'args'  => 'mpz_ptr r, mpz_srcptr b, unsigned long e, mpz_srcptr m',
+       'try'   => 'none',
+     },
+
+     # special for use during development
+     {
+       'regexp'=> 'back',
+       'funs'  => ['back_to_back'],
+       'ret'   => 'void',
+       'args'  => 'void',
+       'pic'   => 'no',
+       'try'   => 'none',
+       'speed_flags'=> 'FLAG_NODATA',
+     },
+     );
+
+if (defined $ENV{table2}) {
+  my @newtable = @{$ENV{table2}};
+  push @newtable, @table;
+  @table = @newtable;
+}
+
+
+my %pictable =
+    (
+     'yes' => {
+       'suffix' =>  '_pic',
+       'asmflags'=> '$(ASMFLAGS_PIC)',
+       'cflags' =>  '$(CFLAGS_PIC)',
+     },
+     'no' => {
+       'suffix' =>  '',
+       'asmflags'=> '',
+       'cflags' =>  '',
+     },
+     );
+
+
+my $builddir = $ENV{builddir};
+$builddir = "." if (! defined $builddir);
+
+my $top_builddir = "${builddir}/..";
+
+
+open(MAKEFILE, "<${builddir}/Makefile")
+  or die "Cannot open ${builddir}/Makefile: $!\n"
+       . "Is this a tune build directory?";
+my ($srcdir, $top_srcdir);
+while (<MAKEFILE>) {
+  if (/^srcdir = (.*)/) {     $srcdir = $1;     }
+  if (/^top_srcdir = (.*)/) { $top_srcdir = $1; }
+}
+die "Cannot find \$srcdir in Makefile\n" if (! defined $srcdir);
+die "Cannot find \$top_srcdir in Makefile\n" if (! defined $top_srcdir);
+print "srcdir $srcdir\n" if $opt{'t'};
+print "top_srcdir $top_srcdir\n" if $opt{'t'};
+close(MAKEFILE);
+
+
+open(SPEED, ">speed-many.c") or die;
+print SPEED
+"/* speed-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */
+
+";
+my $SPEED_EXTRA_ROUTINES = "#define SPEED_EXTRA_ROUTINES \\\n";
+my $SPEED_EXTRA_PROTOS = "#define SPEED_EXTRA_PROTOS \\\n";
+my $SPEED_CODE = "";
+
+open(TRY, ">try-many.c") or die;
+print TRY
+    "/* try-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\n" .
+    "\n";
+my $TRY_EXTRA_ROUTINES = "#define EXTRA_ROUTINES \\\n";
+my $TRY_EXTRA_PROTOS = "#define EXTRA_PROTOS \\\n";
+
+open(FD,"<${top_builddir}/libtool") or die "Cannot open \"${top_builddir}/libtool\": $!\n";
+my $pic_flag;
+while (<FD>) {
+  if (/^pic_flag="?([^"]*)"?$/) {
+    $pic_flag=$1;
+    last;
+  }
+}
+close FD;
+if (! defined $pic_flag) {
+  die "Cannot find pic_flag in ${top_builddir}/libtool";
+}
+
+my $CFLAGS_PIC = $pic_flag;
+
+my $ASMFLAGS_PIC = "";
+foreach (split /[ \t]/, $pic_flag) {
+  if (/^-D/) {
+    $ASMFLAGS_PIC .= " " . $_;
+  }
+}
+
+open(MAKEFILE, ">Makefile.many") or die;
+print MAKEFILE
+    "# Makefile.many generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST\n" .
+    "\n" .
+    "all: speed-many try-many\n" .
+    "\n" .
+    "#--------- begin included copy of basic Makefile ----------\n" .
+    "\n";
+open(FD,"<${builddir}/Makefile") or die "Cannot open \"${builddir}/Makefile\": $!\n";
+print MAKEFILE <FD>;
+close FD;
+print MAKEFILE
+    "\n" .
+    "#--------- end included copy of basic Makefile ----------\n" .
+    "\n" .
+    "CFLAGS_PIC = $CFLAGS_PIC\n" .
+    "ASMFLAGS_PIC = $ASMFLAGS_PIC\n" .
+    "\n";
+
+my $CLEAN="";
+my $MANY_OBJS="";
+
+
+sub print_ansi2knr {
+  my ($base,$file,$includes) = @_;
+  if (! defined $file)     { $file = "$base.c"; }
+  if (! defined $includes) { $includes = ""; }
+
+  print MAKEFILE <<EOF;
+${base}_.c: $file \$(ANSI2KNR)
+       \$(CPP) \$(DEFS) \$(INCLUDES) $includes \$(AM_CPPFLAGS) \$(CPPFLAGS) $file | sed 's/^# \([0-9]\)/#line \\1/' | \$(ANSI2KNR) >${base}_.c
+
+EOF
+}
+
+
+# Spawning a glob is a touch slow when there's lots of files.
+my @files = ();
+foreach my $dir (@DIRECTORIES) {
+  print "dir $dir\n" if $opt{'t'};
+  if (-f $dir) {
+    push @files,$dir;
+  } else {
+    if (! opendir DD,$dir) {
+      print "Cannot open $dir: $!\n";
+    } else {
+      push @files, map {$_="$dir/$_"} grep /\.(c|asm|S|h)$/, readdir DD;
+      closedir DD;
+    }
+  }
+}
+@files = sort @files;
+print "@files ",join(" ",@files),"\n" if $opt{'t'};
+
+my $count_files = 0;
+my $count_functions = 0;
+my %seen_obj;
+my %seen_file;
+
+foreach my $file_full (@files) {
+  if (! -f $file_full) {
+    print "Not a file: $file_full\n";
+    next;
+  }
+  if (defined $seen_file{$file_full}) {
+    print "Skipping duplicate file: $file_full\n";
+    next;
+  }
+  $seen_file{$file_full} = 1;
+
+  my ($FILE,$path,$lang) = fileparse($file_full,"\.[a-zA-Z]+");
+  $path =~ s/\/$//;
+  print "file $FILE path $path lang $lang\n" if $opt{'t'};
+
+  my @pic_choices;
+  if ($lang eq '.asm')  { @pic_choices=('no','yes'); }
+  elsif ($lang eq '.c') { @pic_choices=('no'); }
+  elsif ($lang eq '.S') { @pic_choices=('no','yes'); }
+  elsif ($lang eq '.h') { @pic_choices=('no'); }
+  else { next };
+
+  my ($t, $file_match);
+  foreach my $p (@table) {
+    # print " ",$p->{'regexp'},"\n" if $opt{'t'};
+    if ($FILE =~ "^($p->{'regexp'})") {
+      $t = $p;
+      $file_match = $1;
+      $file_match = $2 if defined $2;
+      last;
+    }
+  }
+  next if ! defined $t;
+  print "match $t->{'regexp'} $FILE ($file_full)\n" if $opt{'t'};
+
+  if (! open FD,"<$file_full") { print "Can't open $file_full: $!\n"; next }
+  my @file_contents = <FD>;
+  close FD;
+
+  my $objs;
+  if (defined $t->{'mulfunc'}) { $objs = $t->{'mulfunc'}; }
+  else                         { $objs = [$file_match]; }
+  print "objs @$objs\n" if $opt{'t'};
+
+  my $ret = $t->{'ret'};
+  if (! defined $ret && $lang eq '.h') { $ret = ''; }
+  if (! defined $ret) { die "$FILE return type not defined\n" };
+  print "ret $ret\n" if $opt{'t'};
+
+  my $mpX = $t->{'mpX'};
+  if (! defined $mpX) { $mpX = ($lang eq '.h' ? '' : 'mpn'); }
+  $mpX = "${mpX}_" if $mpX ne '';
+  print "mpX $mpX\n" if $opt{'t'};
+
+  my $carrys;
+  if (defined $t->{'carrys'}) { $carrys = $t->{'carrys'}; }
+  else                        { $carrys = ['','c'];       }
+  print "carrys $carrys @$carrys\n" if $opt{'t'};
+
+  # some restriction functions are implemented, but they're not very useful
+  my $restriction='';
+
+  my $suffix;
+  if ($FILE =~ ("${file_match}_(.+)")) {
+    $suffix = $1;
+  } elsif ($path =~ /\/mp[zn]\/(.*)$/) {
+    # derive the suffix from the path
+    $suffix = $1;
+    $suffix =~ s/\//_/g;
+    # use last directory name, or if there's 3 or more then the last two
+    if ($suffix =~ /([^_]*_)+([^_]+_[^_]+)$/) {
+      $suffix = $2;
+    } elsif ($suffix =~ /([^_]*_)*([^_]+)$/) {
+      $suffix = $2;
+    }
+  } else {
+    die "Can't determine suffix for: $file_full (path $path)\n";
+  }
+  print "suffix $suffix\n" if $opt{'t'};
+
+  $count_files++;
+
+  foreach my $obj (@{$objs}) {
+    print "obj $obj\n" if $opt{'t'};
+
+    my $obj_with_suffix = "${obj}_$suffix";
+    if (defined $seen_obj{$obj_with_suffix}) {
+      print "Skipping duplicate object: $obj_with_suffix\n";
+      print "   first from: $seen_obj{$obj_with_suffix}\n";
+      print "   now from:   $file_full\n";
+      next;
+    }
+    $seen_obj{$obj_with_suffix} = $file_full;
+
+    my $funs = $t->{'funs'};
+    $funs = [$obj] if ! defined $funs;
+    print "funs @$funs\n" if $opt{'t'};
+
+    if (defined $t->{'pic'}) { @pic_choices = ('no'); }
+
+    foreach my $pic (map {$pictable{$_}} @pic_choices) {
+      print "pic $pic->{'suffix'}\n" if $opt{'t'};
+
+      my $objbase = "${obj}_$suffix$pic->{'suffix'}";
+      print "objbase $objbase\n" if $opt{'t'};
+
+      if ($path !~ "." && -f "${objbase}.c") {
+       die "Already have ${objbase}.c";
+      }
+
+      my $tmp_file = "tmp-$objbase.c";
+
+      my $renaming;
+      foreach my $fun (@{$funs}) {
+        if ($mpX eq 'mpn_' && $lang eq '.c') {
+          $renaming .= "\t\t-DHAVE_NATIVE_mpn_$fun=1 \\\n";
+        }
+
+        # The carry-in variant is with a "c" appended, unless there's a "_1"
+        # somewhere, eg. "modexact_1_odd", in which case that becomes "_1c".
+       my $fun_carry = $fun;
+       if (! ($fun_carry =~ s/_1/_1c/)) { $fun_carry = "${fun}c"; }
+
+       $renaming .=
+           "\t\t-D__g$mpX$fun=$mpX${fun}_$suffix$pic->{'suffix'} \\\n" .
+           "\t\t-D__g$mpX$fun_carry=$mpX${fun_carry}_$suffix$pic->{'suffix'} \\\n";
+      }
+      foreach my $r (@{$t->{'rename'}}) {
+       if ($r =~ /^__gmp/) {
+         $renaming .= "\\\n" .
+             "\t\t-D$r=${r}_$suffix$pic->{'suffix'}";
+       } else {
+         $renaming .= "\\\n" .
+             "\t\t-D__g$mpX$r=$mpX${r}_$suffix$pic->{'suffix'}";
+       }
+      }
+      print "renaming $renaming\n" if $opt{'t'};
+
+      print MAKEFILE "\n";
+      if ($lang eq '.asm') {
+       print MAKEFILE
+           "$objbase.o: $file_full \$(ASM_HEADERS)\n" .
+           "   \$(M4) \$(M4FLAGS) -DOPERATION_$obj $pic->{'asmflags'} \\\n" .
+           "$renaming" .
+           "           $file_full >tmp-$objbase.s\n" .
+            "  \$(CCAS) \$(COMPILE_FLAGS) $pic->{'cflags'} tmp-$objbase.s -o $objbase.o\n" .
+            "  \$(RM_TMP) tmp-$objbase.s\n";
+       $MANY_OBJS .= " $objbase.o";
+
+      } elsif ($lang eq '.c') {
+       print MAKEFILE
+           "$objbase.o: $file_full\n" .
+           "   \$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" .
+           "$renaming" .
+           "           -c $file_full -o $objbase.o\n";
+       print_ansi2knr($objbase,
+                      $file_full,
+                      " -DOPERATION_$obj\\\n$renaming\t\t");
+       $MANY_OBJS .= " $objbase\$U.o";
+
+      } elsif ($lang eq '.S') {
+       print MAKEFILE
+           "$objbase.o: $file_full\n" .
+            "  \$(COMPILE) -g $pic->{'asmflags'} \\\n" .
+           "$renaming" .
+            "  -c $file_full -o $objbase.o\n";
+       $MANY_OBJS .= " $objbase.o";
+
+      } elsif ($lang eq '.h') {
+       print MAKEFILE
+           "$objbase.o: tmp-$objbase.c $file_full\n" .
+           "   \$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" .
+           "$renaming" .
+           "           -c tmp-$objbase.c -o $objbase.o\n";
+       print_ansi2knr($objbase,
+                      "tmp-$objbase.c",
+                      " -DOPERATION_$obj\\\n$renaming\t\t");
+       $MANY_OBJS .= " $objbase\$U.o";
+
+        $CLEAN .= " tmp-$objbase.c";
+       open(TMP_C,">tmp-$objbase.c")
+           or die "Can't create tmp-$objbase.c: $!\n";
+       print TMP_C
+"/* tmp-$objbase.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */
+
+#include \"gmp.h\"
+#include \"gmp-impl.h\"
+#include \"longlong.h\"
+#include \"speed.h\"
+
+";
+      }
+
+      my $tests_program = "$top_srcdir/tests/devel/$obj.c";
+      if (-f $tests_program) {
+       $tests_program = "\$(top_srcdir)/tests/devel/$obj.c";
+       print_ansi2knr("tests_${objbase}",
+                      $tests_program,
+                      "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)");
+       print_ansi2knr("tests_${objbase}_sp",
+                      $tests_program,
+                      "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)");
+
+       print MAKEFILE <<EOF;
+tests_$objbase.o: $tests_program
+       \$(COMPILE) \$(CFLAGS_TESTS) \\
+$renaming              -c $tests_program -o tests_$objbase.o
+
+tests_$objbase: $objbase\$U.o tests_$objbase\$U.o ../libgmp.la
+       \$(LINK) tests_$objbase\$U.o $objbase\$U.o ../libgmp.la -o tests_$objbase
+
+tests_${objbase}_sp.o: $tests_program
+       \$(COMPILE) \$(CFLAGS_TESTS_SP) \\
+$renaming              -c $tests_program -o tests_${objbase}_sp.o
+
+tests_${objbase}_sp: $objbase\$U.o tests_${objbase}_sp\$U.o ../libgmp.la
+       \$(LINK) tests_${objbase}_sp\$U.o $objbase\$U.o ../libgmp.la -o tests_${objbase}_sp
+
+EOF
+        $CLEAN .= " tests_$objbase tests_${objbase}_sp";
+      }
+
+      foreach my $fun (@{$funs}) {
+       print "fun $fun\n" if $opt{'t'};
+
+       if ($lang eq '.h') {
+          my $macro_before = $t->{'macro_before'};
+          $macro_before = "" if ! defined $macro_before;
+         print TMP_C
+"$macro_before
+#undef $fun
+#include \"$file_full\"
+
+";
+       }
+
+       my $args = $t->{"args_$fun"};
+       if (! defined $args) { $args = $t->{'args'}; }
+       if (! defined $args) { die "Need args for $fun\n"; }
+       print "args $args\n" if $opt{'t'};
+
+       foreach my $carry (@$carrys) {
+         print "carry $carry\n" if $opt{'t'};
+
+         my $fun_carry = $fun;
+         if (! ($fun_carry =~ s/_1/_1$carry/)) { $fun_carry = "$fun$carry"; }
+          print "fun_carry $fun_carry\n" if $opt{'t'};
+
+         if ($lang =~ /\.(asm|S)/
+             && ! grep(m"PROLOGUE\((.* )?$mpX$fun_carry[ ,)]",@file_contents)) {
+           print "no PROLOGUE $mpX$fun_carry\n" if $opt{'t'};
+           next;
+         }
+         if ($lang eq '.c'
+             && ! grep(m"^(#define FUNCTION\s+)?$mpX$fun_carry\W", @file_contents)) {
+           print "no mention of $mpX$fun_carry\n" if $opt{'t'};
+           next;
+         }
+         if ($lang eq '.h'
+             && ! grep(m"^#define $fun_carry\W", @file_contents)) {
+           print "no mention of #define $fun_carry\n" if $opt{'t'};
+           next;
+         }
+
+         $count_functions++;
+
+         my $carryarg;
+         if (defined $t->{'carryarg'}) { $carryarg = $t->{'carryarg'}; }
+         if ($carry eq '')             { $carryarg = ''; }
+         else                          { $carryarg = ', mp_limb_t carry'; }
+         print "carryarg $carryarg\n" if $opt{'t'};
+
+         my $funfull="$mpX${fun_carry}_$suffix$pic->{'suffix'}";
+         print "funfull $funfull\n" if $opt{'t'};
+
+         if ($lang ne '.h') {
+           my $proto = "$t->{'ret'} $funfull _PROTO (($args$carryarg)); \\\n";
+           $SPEED_EXTRA_PROTOS .= $proto;
+           $TRY_EXTRA_PROTOS .= $proto;
+         }
+
+         my $try_type = $t->{"try-$fun"};
+         $try_type = $t->{'try'} if ! defined $try_type;
+         if (! defined $try_type) {
+           if ($mpX eq 'mpn_') {
+             $try_type = "TYPE_\U$fun_carry";
+           } else {
+             $try_type = "TYPE_\U$mpX\U$fun_carry";
+           }
+         }
+         print "try_type $try_type\n" if $opt{'t'};
+
+         my $try_minsize = $t->{'try-minsize'};
+         if (defined $try_minsize) {
+           $try_minsize = ", " . $try_minsize;
+         } else {
+           $try_minsize = "";
+         }
+         print "try_minsize $try_minsize\n" if $opt{'t'};
+
+         if ($try_type ne 'none') {
+           $TRY_EXTRA_ROUTINES .=
+               "  { TRY($mpX${fun_carry}_$suffix$pic->{'suffix'}), $try_type$try_minsize }, \\\n";
+         }
+
+         my $speed_flags = $t->{'speed_flags'};
+         $speed_flags = '0' if ! defined $speed_flags;
+         print "speed_flags $speed_flags\n" if $opt{'t'};
+
+         my $speed_routine = $t->{'speed'};
+         $speed_routine = "SPEED_ROUTINE_\U$mpX\U$fun"
+             if !defined $speed_routine;
+         if (! ($speed_routine =~ s/_1/_1\U$carry/)) {
+           $speed_routine = "$speed_routine\U$carry";
+         }
+         print "speed_routine $speed_routine\n" if $opt{'t'};
+
+         my @speed_suffixes = ();
+         push (@speed_suffixes, '') if $speed_routine ne 'none';
+         push (@speed_suffixes, @{$t->{'speed_suffixes'}})
+             if defined $t->{'speed_suffixes'};
+
+          my $macro_speed = $t->{'macro-speed'};
+          $macro_speed = "$speed_routine ($fun_carry)" if ! defined $macro_speed;
+          $macro_speed =~ s/\$fun/$fun_carry/g;
+
+         foreach my $S (@speed_suffixes) {
+           my $Sfunfull="$mpX${fun_carry}${S}_$suffix$pic->{'suffix'}";
+
+           $SPEED_EXTRA_PROTOS .=
+             "double speed_$Sfunfull _PROTO ((struct speed_params *s)); \\\n";
+           $SPEED_EXTRA_ROUTINES .=
+             "  { \"$Sfunfull\", speed_$Sfunfull, $speed_flags }, \\\n";
+           if ($lang eq '.h') {
+              print TMP_C
+"double
+speed_$Sfunfull (struct speed_params *s)
+{
+$macro_speed
+}
+
+";
+            } else {
+             $SPEED_CODE .=
+               "double\n" .
+               "speed_$Sfunfull (struct speed_params *s)\n" .
+                "{\n" .
+                "$restriction" .
+               "  $speed_routine\U$S\E ($funfull)\n" .
+                "}\n";
+            }
+         }
+       }
+      }
+    }
+  }
+}
+
+
+print SPEED $SPEED_EXTRA_PROTOS . "\n";
+print SPEED $SPEED_EXTRA_ROUTINES . "\n";
+if (defined $ENV{speedinc}) { print SPEED $ENV{speedinc} . "\n"; }
+print SPEED
+    "#include \"speed.c\"\n" .
+    "\n";
+print SPEED $SPEED_CODE;
+
+print TRY $TRY_EXTRA_ROUTINES . "\n";
+print TRY $TRY_EXTRA_PROTOS . "\n";
+my $tryinc = "";
+if (defined $ENV{tryinc}) {
+  $tryinc = $ENV{tryinc};
+  print TRY "#include \"$tryinc\"\n";
+}
+print "tryinc $tryinc\n" if $opt{'t'};
+print TRY
+    "#include \"try.c\"\n" .
+    "\n";
+
+my $extra_libraries = "";
+if (defined $ENV{extra_libraries}) { $extra_libraries = $ENV{extra_libraries};}
+
+my $trydeps = "";
+if (defined $ENV{trydeps}) { $trydeps = $ENV{trydeps}; }
+$trydeps .= " $tryinc";
+print "trydeps $trydeps\n" if $opt{'t'};
+
+print MAKEFILE <<EOF;
+
+MANY_OBJS = $MANY_OBJS
+MANY_CLEAN = \$(MANY_OBJS) \\
+       speed-many.c speed-many\$U.o speed-many\$(EXEEXT) \\
+       try-many.c try-many\$U.o try-many \\
+       $CLEAN
+MANY_DISTCLEAN = Makefile.many
+
+speed-many: \$(MANY_OBJS) speed-many\$U.o libspeed.la $extra_libraries
+       \$(LINK) \$(LDFLAGS) speed-many\$U.o \$(MANY_OBJS) \$(LDADD) \$(LIBS) $extra_libraries
+
+try-many: \$(MANY_OBJS) try-many\$U.o libspeed.la $extra_libraries
+       \$(LINK) \$(LDFLAGS) try-many\$U.o \$(MANY_OBJS)  \$(LDADD) \$(LIBS) $extra_libraries
+
+try-many.o: try-many.c \$(top_srcdir)/tests/devel/try.c $trydeps
+       \$(COMPILE) -I\$(top_srcdir)/tests/devel -c try-many.c
+
+EOF
+
+print_ansi2knr("speed-many");
+print_ansi2knr("try-many",
+              "\$(top_srcdir)/tests/devel/try.c",
+              "-I\$(top_srcdir)/tests/devel");
+
+print MAKEFILE <<EOF;
+RM_TMP = rm -f
+CFLAGS_TESTS = -DSIZE=50 -DTIMES=1 -DRANDOM -DCLOCK=333000000
+CFLAGS_TESTS_SP = -DSIZE=1024 -DNOCHECK -DOPS=200000000 -DCLOCK=333000000
+EOF
+
+close MAKEFILE or die;
+
+print "Total $count_files files, $count_functions functions\n";
+
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/tune/mod_1_div.c b/tune/mod_1_div.c

new file mode 100644 (file)

index 0000000..e29921f
--- /dev/null
+++ b/tune/mod_1_div.c
@@ -0,0 +1,35 @@
+/* mpn/generic/mod_1.c forced to use plain udiv_qrnnd.
+
+Copyright 2000, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define OPERATION_mod_1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
+#undef MOD_1N_TO_MOD_1_1_THRESHOLD
+#undef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1_NORM_THRESHOLD    MP_SIZE_T_MAX
+#define MOD_1_UNNORM_THRESHOLD  MP_SIZE_T_MAX
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_mod_1  mpn_mod_1_div
+
+#include "mpn/generic/mod_1.c"
diff --git a/tune/mod_1_inv.c b/tune/mod_1_inv.c

new file mode 100644 (file)

index 0000000..5c09fb5
--- /dev/null
+++ b/tune/mod_1_inv.c
@@ -0,0 +1,35 @@
+/* mpn/generic/mod_1.c forced to use mul-by-inverse udiv_qrnnd_preinv.
+
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define OPERATION_mod_1
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
+#undef MOD_1N_TO_MOD_1_1_THRESHOLD
+#undef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1_NORM_THRESHOLD    0
+#define MOD_1_UNNORM_THRESHOLD  0
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_mod_1  mpn_mod_1_inv
+
+#include "mpn/generic/mod_1.c"
diff --git a/tune/modlinv.c b/tune/modlinv.c

new file mode 100644 (file)

index 0000000..77c7013
--- /dev/null
+++ b/tune/modlinv.c
@@ -0,0 +1,167 @@
+/* Alternate implementations of binvert_limb to compare speeds. */
+
+/*
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "speed.h"
+
+
+/* Like the standard version in gmp-impl.h, but with the expressions using a
+   "1-" form.  This has the same number of steps, but "1-" is on the
+   dependent chain, whereas the "2*" in the standard version isn't.
+   Depending on the CPU this should be the same or a touch slower.  */
+
+#if GMP_LIMB_BITS <= 32
+#define binvert_limb_mul1(inv,n)                                \
+  do {                                                          \
+    mp_limb_t  __n = (n);                                       \
+    mp_limb_t  __inv;                                           \
+    ASSERT ((__n & 1) == 1);                                    \
+    __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
+    __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
+    __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
+    ASSERT (__inv * __n == 1);                                  \
+    (inv) = __inv;                                              \
+  } while (0)
+#endif
+
+#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
+#define binvert_limb_mul1(inv,n)                                \
+  do {                                                          \
+    mp_limb_t  __n = (n);                                       \
+    mp_limb_t  __inv;                                           \
+    ASSERT ((__n & 1) == 1);                                    \
+    __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
+    __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
+    __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
+    __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
+    ASSERT (__inv * __n == 1);                                  \
+    (inv) = __inv;                                              \
+  } while (0)
+#endif
+
+
+/* The loop based version used in GMP 3.0 and earlier.  Usually slower than
+   multiplying, due to the number of steps that must be performed.  Much
+   slower when the processor has a good multiply.  */
+
+#define binvert_limb_loop(inv,n)                \
+  do {                                          \
+    mp_limb_t  __v = (n);                       \
+    mp_limb_t  __v_orig = __v;                  \
+    mp_limb_t  __make_zero = 1;                 \
+    mp_limb_t  __two_i = 1;                     \
+    mp_limb_t  __v_inv = 0;                     \
+                                                \
+    ASSERT ((__v & 1) == 1);                    \
+                                                \
+    do                                          \
+      {                                         \
+        while ((__two_i & __make_zero) == 0)    \
+          __two_i <<= 1, __v <<= 1;             \
+        __v_inv += __two_i;                     \
+        __make_zero -= __v;                     \
+      }                                         \
+    while (__make_zero);                        \
+                                                \
+    ASSERT (__v_orig * __v_inv == 1);           \
+    (inv) = __v_inv;                            \
+  } while (0)
+
+
+/* Another loop based version with conditionals, but doing a fixed number of
+   steps. */
+
+#define binvert_limb_cond(inv,n)                \
+  do {                                          \
+    mp_limb_t  __n = (n);                       \
+    mp_limb_t  __rem = (1 - __n) >> 1;          \
+    mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
+    int        __count;                         \
+                                                \
+    ASSERT ((__n & 1) == 1);                    \
+                                                \
+    __count = GMP_LIMB_BITS-1;               \
+    do                                          \
+      {                                         \
+        __inv >>= 1;                            \
+        if (__rem & 1)                          \
+          {                                     \
+            __inv |= GMP_LIMB_HIGHBIT;          \
+            __rem -= __n;                       \
+          }                                     \
+        __rem >>= 1;                            \
+      }                                         \
+    while (-- __count);                         \
+                                                \
+    ASSERT (__inv * __n == 1);                  \
+    (inv) = __inv;                              \
+  } while (0)
+
+
+/* Another loop based bitwise version, but purely arithmetic, no
+   conditionals. */
+
+#define binvert_limb_arith(inv,n)                                       \
+  do {                                                                  \
+    mp_limb_t  __n = (n);                                               \
+    mp_limb_t  __rem = (1 - __n) >> 1;                                  \
+    mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
+    mp_limb_t  __lowbit;                                                \
+    int        __count;                                                 \
+                                                                        \
+    ASSERT ((__n & 1) == 1);                                            \
+                                                                        \
+    __count = GMP_LIMB_BITS-1;                                       \
+    do                                                                  \
+      {                                                                 \
+        __lowbit = __rem & 1;                                           \
+        __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
+        __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
+      }                                                                 \
+    while (-- __count);                                                 \
+                                                                        \
+    ASSERT (__inv * __n == 1);                                          \
+    (inv) = __inv;                                                      \
+  } while (0)
+
+
+double
+speed_binvert_limb_mul1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
+}
+double
+speed_binvert_limb_loop (struct speed_params *s)
+{
+  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
+}
+double
+speed_binvert_limb_cond (struct speed_params *s)
+{
+  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
+}
+double
+speed_binvert_limb_arith (struct speed_params *s)
+{
+  SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
+}
diff --git a/tune/noop.c b/tune/noop.c

new file mode 100644 (file)

index 0000000..7c7f1b5
--- /dev/null
+++ b/tune/noop.c
@@ -0,0 +1,57 @@
+/* Noop routines.
+
+   These are in a separate file to stop gcc recognising do-nothing functions
+   and optimizing away calls to them.  */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+void
+noop (void)
+{
+}
+
+/*ARGSUSED*/
+void
+noop_1 (mp_limb_t n)
+{
+}
+
+/*ARGSUSED*/
+void
+noop_wxs (mp_ptr wp, mp_srcptr xp, mp_size_t size)
+{
+}
+
+/*ARGSUSED*/
+void
+noop_wxys (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+}
+
+/*ARGSUSED*/
+void
+mpn_cache_fill_dummy (mp_limb_t n)
+{
+}
diff --git a/tune/pentium.asm b/tune/pentium.asm

new file mode 100644 (file)

index 0000000..369a8ea
--- /dev/null
+++ b/tune/pentium.asm
@@ -0,0 +1,49 @@
+dnl  x86 pentium time stamp counter access routine.
+
+dnl  Copyright 1999, 2000, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned p[2]);
+C
+C Get the pentium rdtsc cycle counter, storing the least significant word in
+C p[0] and the most significant in p[1].
+C
+C cpuid is used to serialize execution.  On big measurements this won't be
+C significant but it may help make small single measurements more accurate.
+
+       .text
+       ALIGN(8)
+
+defframe(PARAM_P,4)
+
+PROLOGUE(speed_cyclecounter)
+deflit(`FRAME',0)
+       pushl   %ebx
+FRAME_pushl()
+       xorl    %eax, %eax
+       cpuid
+       rdtsc
+       movl    PARAM_P, %ebx
+       movl    %eax, (%ebx)
+       movl    %edx, 4(%ebx)
+       popl    %ebx
+       ret
+EPILOGUE()
diff --git a/tune/powerpc.asm b/tune/powerpc.asm

new file mode 100644 (file)

index 0000000..4391ec1
--- /dev/null
+++ b/tune/powerpc.asm
@@ -0,0 +1,42 @@
+dnl  PowerPC mftb_function -- read time base registers.
+
+dnl  Copyright 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundationn; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void mftb_function (unsigned a[2]);
+C
+
+ASM_START()
+PROLOGUE(mftb_function)
+
+       C r3    a
+
+L(again):
+       mftbu   r4
+       mftb    r5
+       mftbu   r6
+       cmpw    cr0, r4, r6
+       bne     L(again)
+
+       stw     r5, 0(r3)
+       stw     r4, 4(r3)
+       blr
+
+EPILOGUE()
diff --git a/tune/powerpc64.asm b/tune/powerpc64.asm

new file mode 100644 (file)

index 0000000..eb70546
--- /dev/null
+++ b/tune/powerpc64.asm
@@ -0,0 +1,38 @@
+dnl  PowerPC mftb_function -- read time base registers, 64-bit integer.
+
+dnl  Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundationn; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void mftb_function (unsigned a[2]);
+C
+
+ASM_START()
+PROLOGUE(mftb_function)
+
+       C r3    a
+
+       mftb    r5
+
+       srdi    r4, r5, 32
+       stw     r5, 0(r3)
+       stw     r4, 4(r3)
+       blr
+
+EPILOGUE()
diff --git a/tune/powm_mod.c b/tune/powm_mod.c

new file mode 100644 (file)

index 0000000..e65f512
--- /dev/null
+++ b/tune/powm_mod.c
@@ -0,0 +1,28 @@
+/* mpz/powm.c forced to use division. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef POWM_THRESHOLD
+#define POWM_THRESHOLD  1
+#define __gmpz_powm  mpz_powm_mod
+
+#include "../mpz/powm.c"
diff --git a/tune/powm_redc.c b/tune/powm_redc.c

new file mode 100644 (file)

index 0000000..a9e4bb5
--- /dev/null
+++ b/tune/powm_redc.c
@@ -0,0 +1,30 @@
+/* mpz/powm.c forced to use REDC. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* WANT_GLOBAL_REDC makes redc() available for speed and tune program use. */
+#undef POWM_THRESHOLD
+#define POWM_THRESHOLD    MP_SIZE_T_MAX
+#define WANT_REDC_GLOBAL  1
+#define __gmpz_powm  mpz_powm_redc
+
+#include "../mpz/powm.c"
diff --git a/tune/pre_divrem_1.c b/tune/pre_divrem_1.c

new file mode 100644 (file)

index 0000000..2b3fb79
--- /dev/null
+++ b/tune/pre_divrem_1.c
@@ -0,0 +1,30 @@
+/* mpn_preinv_divrem_1 -- if not already in libgmp.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if ! USE_PREINV_DIVREM_1
+
+#undef USE_PREINV_DIVREM_1
+#define USE_PREINV_DIVREM_1 1
+
+#include "mpn/generic/pre_divrem_1.c"
+
+#endif
diff --git a/tune/set_strb.c b/tune/set_strb.c

new file mode 100644 (file)

index 0000000..c67b09c
--- /dev/null
+++ b/tune/set_strb.c
@@ -0,0 +1,37 @@
+/* mpn_set_str_basecase -- mpn_set_str forced to its basecase.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __gmpn_set_str mpn_set_str_basecase
+#define __gmpn_bc_set_str mpn_bc_set_str_basecase
+#define __gmpn_dc_set_str mpn_dc_set_str_basecase
+#define __gmpn_set_str_compute_powtab mpn_set_str_compute_powtab_basecase
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef SIZE_T_MAX
+#define SIZE_T_MAX  ((size_t) ULONG_MAX)
+#endif
+
+#undef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD           SIZE_T_MAX /* always */
+#undef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD   SIZE_T_MAX /* always */
+
+#include "mpn/generic/set_str.c"
diff --git a/tune/set_strp.c b/tune/set_strp.c

new file mode 100644 (file)

index 0000000..701ab2b
--- /dev/null
+++ b/tune/set_strp.c
@@ -0,0 +1,32 @@
+/* mpn_set_str_subquad -- mpn_set_str forced to the sub-quadratic case.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define TUNE_PROGRAM_BUILD  1   /* for gmp-impl.h */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_pre_set_str (mp_ptr wp, unsigned char *str, size_t str_len, powers_t *powtab, mp_ptr tp)
+{
+  if (BELOW_THRESHOLD (str_len, set_str_dc_threshold))
+    mpn_bc_set_str (wp, str, str_len, powtab->base);
+  else
+    mpn_dc_set_str (wp, str, str_len, powtab, tp);
+}
diff --git a/tune/set_strs.c b/tune/set_strs.c

new file mode 100644 (file)

index 0000000..d8edc7d
--- /dev/null
+++ b/tune/set_strs.c
@@ -0,0 +1,33 @@
+/* mpn_set_str_subquad -- mpn_set_str forced to the sub-quadratic case.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __gmpn_set_str mpn_set_str_subquad
+#define __gmpn_bc_set_str mpn_bc_set_str_subquad
+#define __gmpn_dc_set_str mpn_dc_set_str_subquad
+#define __gmpn_set_str_compute_powtab mpn_set_str_compute_powtab_subquad
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD  2 /* never */
+#undef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD  2 /* never */
+
+#include "mpn/generic/set_str.c"
diff --git a/tune/sparcv9.asm b/tune/sparcv9.asm

new file mode 100644 (file)

index 0000000..b951ff3
--- /dev/null
+++ b/tune/sparcv9.asm
@@ -0,0 +1,34 @@
+dnl  Sparc v9 32-bit time stamp counter access routine.
+
+dnl  Copyright 2000, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned p[2]);
+C
+C Get the sparc v9 tick counter.
+
+ASM_START()
+PROLOGUE(speed_cyclecounter)
+       rd      %tick,%g1
+       st      %g1,[%o0]               C low 32 bits
+       srlx    %g1,32,%g4
+       retl
+       st      %g4,[%o0+4]             C high 32 bits
+EPILOGUE(speed_cyclecounter)
diff --git a/tune/speed-ext.c b/tune/speed-ext.c

new file mode 100644 (file)

index 0000000..2035fa8
--- /dev/null
+++ b/tune/speed-ext.c
@@ -0,0 +1,222 @@
+/* An example of extending the speed program to measure routines not in GMP.
+
+Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* The extension here is three versions of an mpn arithmetic mean.  These
+   aren't meant to be particularly useful, just examples.
+
+   You can run something like the following to compare their speeds.
+
+           ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
+
+   On RISC chips, mean_open() might be fastest if the compiler is doing a
+   good job.  On the register starved x86s, mean_calls will be fastest.
+
+
+   Notes:
+
+   SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
+   by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
+   files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
+
+   The advantage of this #include "speed.c" scheme is that there's no
+   editing of a copy of that file, and new features in new versions of it
+   will be immediately available.
+
+   In a real program the routines mean_calls() etc would probably be in
+   separate C or assembler source files, and just the measuring
+   speed_mean_calls() etc would be here.  Linking against other libraries
+   for things to measure is perfectly possible too.
+
+   When attempting to compare two versions of the same named routine, say
+   like the generic and assembler versions of mpn_add_n(), creative use of
+   cc -D or #define is suggested, so one or both can be renamed and linked
+   into the same program.  It'll be much easier to compare them side by side
+   than with separate programs for each.
+
+   common.c has notes on writing speed measuring routines.
+
+   Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
+   not using libtool) to get common.o and other objects needed by speed.c.  */
+
+
+#define SPEED_EXTRA_PROTOS                                              \
+  double speed_mean_calls __GMP_PROTO ((struct speed_params *s));       \
+  double speed_mean_open  __GMP_PROTO ((struct speed_params *s));       \
+  double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
+
+#define SPEED_EXTRA_ROUTINES            \
+  { "mean_calls",  speed_mean_calls  }, \
+  { "mean_open",   speed_mean_open   }, \
+  { "mean_open2",  speed_mean_open2  },
+
+#include "speed.c"
+
+
+/* A straightforward implementation calling mpn subroutines.
+
+   wp,size is set to (xp,size + yp,size) / 2.  The return value is the
+   remainder from the division.  The other versions are the same.  */
+
+mp_limb_t
+mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_limb_t  c, ret;
+
+  ASSERT (size >= 1);
+
+  c = mpn_add_n (wp, xp, yp, size);
+  ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
+  wp[size-1] |= (c << (GMP_LIMB_BITS-1));
+  return ret;
+}
+
+
+/* An open-coded version, making one pass over the data.  The right shift is
+   done as the added limbs are produced.  The addition code follows
+   mpn/generic/add_n.c. */
+
+mp_limb_t
+mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_limb_t  w, wprev, x, y, c, ret;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+
+  x = xp[0];
+  y = yp[0];
+
+  wprev = x + y;
+  c = (wprev < x);
+  ret = (wprev & 1);
+
+#define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
+
+  for (i = 1; i < size; i++)
+    {
+      x = xp[i];
+      y = yp[i];
+
+      w = x + c;
+      c = (w < x);
+      w += y;
+      c += (w < y);
+
+      wp[i-1] = RSHIFT (w, wprev);
+      wprev = w;
+    }
+
+  wp[i-1] = RSHIFT (c, wprev);
+
+  return ret;
+}
+
+
+/* Another one-pass version, but right shifting the source limbs rather than
+   the result limbs.  There's not much chance of this being better than the
+   above, but it's an alternative at least. */
+
+mp_limb_t
+mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+  mp_limb_t  w, x, y, xnext, ynext, c, ret;
+  mp_size_t  i;
+
+  ASSERT (size >= 1);
+
+  x = xp[0];
+  y = yp[0];
+
+  /* ret is the low bit of x+y, c is the carry out of that low bit add */
+  ret = (x ^ y) & 1;
+  c   = (x & y) & 1;
+
+  for (i = 0; i < size-1; i++)
+    {
+      xnext = xp[i+1];
+      ynext = yp[i+1];
+      x = RSHIFT (xnext, x);
+      y = RSHIFT (ynext, y);
+
+      w = x + c;
+      c = (w < x);
+      w += y;
+      c += (w < y);
+      wp[i] = w;
+
+      x = xnext;
+      y = ynext;
+    }
+
+  wp[i] = (x >> 1) + (y >> 1) + c;
+
+  return ret;
+}
+
+
+/* The speed measuring routines are the same apart from which function they
+   run, so a macro is used.  Actually this macro is the same as
+   SPEED_ROUTINE_MPN_BINARY_N.  */
+
+#define SPEED_ROUTINE_MEAN(mean_fun)                    \
+  {                                                     \
+    unsigned  i;                                        \
+    mp_ptr    wp;                                       \
+    double    t;                                        \
+    TMP_DECL;                                  \
+                                                        \
+    SPEED_RESTRICT_COND (s->size >= 1);                 \
+                                                        \
+    TMP_MARK;                                  \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \
+                                                        \
+    speed_operand_src (s, s->xp, s->size);              \
+    speed_operand_src (s, s->yp, s->size);              \
+    speed_operand_dst (s, wp, s->size);                 \
+    speed_cache_fill (s);                               \
+                                                        \
+    speed_starttime ();                                 \
+    i = s->reps;                                        \
+    do                                                  \
+      mean_fun (wp, s->xp, s->yp, s->size);             \
+    while (--i != 0);                                   \
+    t = speed_endtime ();                               \
+                                                        \
+    TMP_FREE;                                  \
+    return t;                                           \
+  }
+
+double
+speed_mean_calls (struct speed_params *s)
+{
+  SPEED_ROUTINE_MEAN (mean_calls);
+}
+
+double
+speed_mean_open (struct speed_params *s)
+{
+  SPEED_ROUTINE_MEAN (mean_open);
+}
+
+double
+speed_mean_open2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MEAN (mean_open2);
+}
diff --git a/tune/speed.c b/tune/speed.c

new file mode 100644 (file)

index 0000000..2ead1be
--- /dev/null
+++ b/tune/speed.c
@@ -0,0 +1,1296 @@
+/* Speed measuring program.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Usage message is in the code below, run with no arguments to print it.
+   See README for interesting applications.
+
+   To add a new routine foo(), create a speed_foo() function in the style of
+   the existing ones and add an entry in the routine[] array.  Put FLAG_R if
+   speed_foo() wants an "r" parameter.
+
+   The routines don't have help messages or descriptions, but most have
+   suggestive names.  See the source code for full details.
+
+*/
+
+#include "config.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>  /* for getpid, R_OK */
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>  /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>  /* for getrusage() */
+#endif
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"  /* for the benefit of speed-many.c */
+#include "tests.h"
+#include "speed.h"
+
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#if !HAVE_STRTOUL
+#define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
+#endif
+
+#ifdef SPEED_EXTRA_PROTOS
+SPEED_EXTRA_PROTOS
+#endif
+#ifdef SPEED_EXTRA_PROTOS2
+SPEED_EXTRA_PROTOS2
+#endif
+
+
+#define MPN_FILL(ptr, size, n)          \
+  do {                                  \
+    mp_size_t __i;                      \
+    ASSERT ((size) >= 0);               \
+    for (__i = 0; __i < (size); __i++)  \
+      (ptr)[__i] = (n);                 \
+  } while (0)
+
+
+#if GMP_LIMB_BITS == 32
+#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
+#endif
+#if GMP_LIMB_BITS == 64
+#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
+#endif
+
+
+#define CMP_ABSOLUTE     1
+#define CMP_RATIO        2
+#define CMP_DIFFERENCE   3
+#define CMP_DIFFPREV     4
+int  option_cmp = CMP_ABSOLUTE;
+
+#define UNIT_SECONDS        1
+#define UNIT_CYCLES         2
+#define UNIT_CYCLESPERLIMB  3
+int  option_unit = UNIT_SECONDS;
+
+#define DATA_RANDOM   1
+#define DATA_RANDOM2  2
+#define DATA_ZEROS    3
+#define DATA_AAS      4
+#define DATA_FFS      5
+#define DATA_2FD      6
+int  option_data = DATA_RANDOM;
+
+int        option_square = 0;
+double     option_factor = 0.0;
+mp_size_t  option_step = 1;
+int        option_gnuplot = 0;
+char      *option_gnuplot_basename;
+struct size_array_t {
+  mp_size_t start, end;
+} *size_array = NULL;
+mp_size_t  size_num = 0;
+mp_size_t  size_allocnum = 0;
+int        option_resource_usage = 0;
+long       option_seed = 123456789;
+
+struct speed_params  sp;
+
+#define COLUMN_WIDTH  13  /* for the free-form output */
+
+#define FLAG_R            (1<<0)  /* require ".r" */
+#define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
+#define FLAG_RSIZE        (1<<2)
+#define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
+
+const struct routine_t {
+  /* constants */
+  const char        *name;
+  speed_function_t  fun;
+  int               flag;
+} routine[] = {
+
+  { "noop",              speed_noop                 },
+  { "noop_wxs",          speed_noop_wxs             },
+  { "noop_wxys",         speed_noop_wxys            },
+
+  { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
+  { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
+#endif
+
+  { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
+  { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
+#if HAVE_NATIVE_mpn_addmul_2
+  { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_3
+  { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_4
+  { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_5
+  { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_6
+  { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_7
+  { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_8
+  { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
+#endif
+  { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
+  { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
+#if HAVE_NATIVE_mpn_mul_2
+  { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_3
+  { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_4
+  { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
+#endif
+
+  { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
+  { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
+#if HAVE_NATIVE_mpn_divrem_1c
+  { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
+  { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
+#endif
+  { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_mod_1c
+  { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R_OPTIONAL },
+#endif
+  { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
+  { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
+  { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
+
+  { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R_OPTIONAL },
+  { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R_OPTIONAL },
+  { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R_OPTIONAL },
+  { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R_OPTIONAL },
+
+  { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
+  { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
+  { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
+  { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
+  { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
+  { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
+
+  { "mpn_divrem_2",      speed_mpn_divrem_2,        },
+  { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
+  { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
+
+  { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
+  { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
+
+  { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R_OPTIONAL },
+  { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
+  { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
+
+#if HAVE_NATIVE_mpn_modexact_1_odd
+  { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
+#endif
+  { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
+
+#if GMP_NUMB_BITS % 4 == 0
+  { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
+#endif
+
+  { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
+  { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
+  { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
+
+  { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
+  { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
+  { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
+  { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
+  { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
+  { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
+  { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
+  { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
+  { "mpn_com",           speed_mpn_com              },
+
+  { "mpn_popcount",      speed_mpn_popcount         },
+  { "mpn_hamdist",       speed_mpn_hamdist          },
+
+  { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
+
+  { "mpn_hgcd",          speed_mpn_hgcd             },
+  { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
+
+  { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
+  { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
+
+  { "mpn_gcd",           speed_mpn_gcd                    },
+#if 0
+  { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
+  { "mpn_gcd_accel",     speed_mpn_gcd_accel              },
+  { "find_a",            speed_find_a,        FLAG_NODATA },
+#endif
+
+  { "mpn_gcdext",            speed_mpn_gcdext            },
+  { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
+  { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
+  { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
+  { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
+#if 0
+  { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
+#endif
+  { "mpz_jacobi",        speed_mpz_jacobi           },
+  { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
+  { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
+  { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
+  { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
+
+  { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
+  { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
+  { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
+#if HAVE_NATIVE_mpn_sqr_diagonal
+  { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
+#endif
+
+  { "mpn_mul_n",         speed_mpn_mul_n            },
+  { "mpn_sqr",           speed_mpn_sqr              },
+
+  { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
+  { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
+  { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
+  { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
+  { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
+  { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
+  { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
+  { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
+  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
+  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
+  { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
+  { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
+  { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
+  { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
+  { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
+  { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
+#if WANT_OLD_FFT_FULL
+  { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
+  { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
+#endif
+  { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
+  { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
+
+  { "mpn_mullo_n",        speed_mpn_mullo_n         },
+  { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
+
+  { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
+  { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
+  { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
+  { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
+
+  { "mpn_invert",              speed_mpn_invert              },
+  { "mpn_invertappr",          speed_mpn_invertappr          },
+  { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
+  { "mpn_binvert",             speed_mpn_binvert             },
+
+  { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
+  { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
+  { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
+  { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
+  { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
+  { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
+
+  { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
+  { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
+  { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
+  { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
+
+  { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
+  { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
+  { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
+
+  { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
+  { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
+
+  { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
+  { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
+  { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
+  { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
+  { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
+
+  { "mpz_add",           speed_mpz_add              },
+  { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
+  { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
+  { "mpz_powm",          speed_mpz_powm             },
+  { "mpz_powm_mod",      speed_mpz_powm_mod         },
+  { "mpz_powm_redc",     speed_mpz_powm_redc        },
+  { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
+
+  { "mpz_mod",           speed_mpz_mod              },
+  { "mpn_redc_1",        speed_mpn_redc_1           },
+  { "mpn_redc_2",        speed_mpn_redc_2           },
+  { "mpn_redc_n",        speed_mpn_redc_n           },
+
+  { "MPN_COPY",          speed_MPN_COPY             },
+  { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
+  { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
+  { "memcpy",            speed_memcpy               },
+#if HAVE_NATIVE_mpn_copyi
+  { "mpn_copyi",         speed_mpn_copyi            },
+#endif
+#if HAVE_NATIVE_mpn_copyd
+  { "mpn_copyd",         speed_mpn_copyd            },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n
+  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n        },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n
+  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n        },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_n
+  { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n        },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n
+  { "mpn_addlsh2_n",     speed_mpn_addlsh2_n        },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n
+  { "mpn_sublsh2_n",     speed_mpn_sublsh2_n        },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_n
+  { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n        },
+#endif
+#if HAVE_NATIVE_mpn_rsh1add_n
+  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n        },
+#endif
+#if HAVE_NATIVE_mpn_rsh1sub_n
+  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n        },
+#endif
+
+  { "MPN_ZERO",          speed_MPN_ZERO             },
+
+  { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
+  { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
+  { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
+  { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
+  { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
+
+  { "malloc_free",                  speed_malloc_free                  },
+  { "malloc_realloc_free",          speed_malloc_realloc_free          },
+  { "gmp_allocate_free",            speed_gmp_allocate_free            },
+  { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
+  { "mpz_init_clear",               speed_mpz_init_clear               },
+  { "mpq_init_clear",               speed_mpq_init_clear               },
+  { "mpf_init_clear",               speed_mpf_init_clear               },
+  { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
+
+  { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_umul_ppmm
+  { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+  { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
+#endif
+
+  { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
+  { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
+
+  { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
+  { "udiv_qrnnd_preinv1",     speed_udiv_qrnnd_preinv1,     FLAG_R_OPTIONAL },
+  { "udiv_qrnnd_preinv2",     speed_udiv_qrnnd_preinv2,     FLAG_R_OPTIONAL },
+  { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+  { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+  { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
+#endif
+  { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
+
+  { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
+  { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
+
+  { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
+  { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
+  { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
+
+#ifdef SPEED_EXTRA_ROUTINES
+  SPEED_EXTRA_ROUTINES
+#endif
+#ifdef SPEED_EXTRA_ROUTINES2
+  SPEED_EXTRA_ROUTINES2
+#endif
+};
+
+
+struct choice_t {
+  const struct routine_t  *p;
+  mp_limb_t               r;
+  double                  scale;
+  double                  time;
+  int                     no_time;
+  double                  prev_time;
+  const char              *name;
+};
+struct choice_t  *choice;
+int  num_choices = 0;
+
+
+void
+data_fill (mp_ptr ptr, mp_size_t size)
+{
+  switch (option_data) {
+  case DATA_RANDOM:
+    mpn_random (ptr, size);
+    break;
+  case DATA_RANDOM2:
+    mpn_random2 (ptr, size);
+    break;
+  case DATA_ZEROS:
+    MPN_ZERO (ptr, size);
+    break;
+  case DATA_AAS:
+    MPN_FILL (ptr, size, GMP_NUMB_0xAA);
+    break;
+  case DATA_FFS:
+    MPN_FILL (ptr, size, GMP_NUMB_MAX);
+    break;
+  case DATA_2FD:
+    MPN_FILL (ptr, size, GMP_NUMB_MAX);
+    ptr[0] -= 2;
+    break;
+  default:
+    abort();
+    /*NOTREACHED*/
+  }
+}
+
+/* The code here handling the various combinations of output options isn't
+   too attractive, but it works and is fairly clean.  */
+
+#define SIZE_TO_DIVISOR(n)              \
+  (option_square == 1 ? (n)*(n)         \
+  : option_square == 2 ? (n)*((n)+1)/2  \
+  : (n))
+
+void
+run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
+{
+  const char  *first_open_fastest, *first_open_notfastest, *first_close;
+  int         i, fastest, want_data;
+  double      fastest_time;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  /* allocate data, unless all routines are NODATA */
+  want_data = 0;
+  for (i = 0; i < num_choices; i++)
+    want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
+
+  if (want_data)
+    {
+      SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
+      SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
+
+      data_fill (s->xp, s->size);
+      data_fill (s->yp, s->size);
+    }
+  else
+    {
+      sp.xp = NULL;
+      sp.yp = NULL;
+    }
+
+  if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
+    {
+      first_open_fastest = "(#";
+      first_open_notfastest = " (";
+      first_close = ")";
+    }
+  else
+    {
+      first_open_fastest = "#";
+      first_open_notfastest = " ";
+      first_close = "";
+    }
+
+  fastest = -1;
+  fastest_time = -1.0;
+  for (i = 0; i < num_choices; i++)
+    {
+      s->r = choice[i].r;
+      choice[i].time = speed_measure (choice[i].p->fun, s);
+      choice[i].no_time = (choice[i].time == -1.0);
+      if (! choice[i].no_time)
+        choice[i].time *= choice[i].scale;
+
+      /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
+         is before any differences.  */
+      {
+        double     t;
+        t = choice[i].time;
+        if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
+          {
+            if (choice[i].prev_time == -1.0)
+              choice[i].no_time = 1;
+            else
+              choice[i].time = choice[i].time - choice[i].prev_time;
+          }
+        choice[i].prev_time = t;
+      }
+
+      if (choice[i].no_time)
+        continue;
+
+      /* Look for the fastest after CMP_DIFFPREV has been applied, but
+         before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
+         if there's more than one routine.  */
+      if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
+        {
+          fastest = i;
+          fastest_time = choice[i].time;
+        }
+
+      if (option_cmp == CMP_DIFFPREV)
+        {
+          /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
+          if (option_unit == UNIT_CYCLES)
+            choice[i].time /= speed_cycletime;
+          else if (option_unit == UNIT_CYCLESPERLIMB)
+            {
+              if (prev_size == -1)
+                choice[i].time /= speed_cycletime;
+              else
+                choice[i].time /=  (speed_cycletime
+                                    * (SIZE_TO_DIVISOR(s->size)
+                                       - SIZE_TO_DIVISOR(prev_size)));
+            }
+        }
+      else
+        {
+          if (option_unit == UNIT_CYCLES)
+            choice[i].time /= speed_cycletime;
+          else if (option_unit == UNIT_CYCLESPERLIMB)
+            choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
+
+          if (option_cmp == CMP_RATIO && i > 0)
+            {
+              /* A ratio isn't affected by the units chosen. */
+              if (choice[0].no_time || choice[0].time == 0.0)
+                choice[i].no_time = 1;
+              else
+                choice[i].time /= choice[0].time;
+            }
+          else if (option_cmp == CMP_DIFFERENCE && i > 0)
+            {
+              if (choice[0].no_time)
+                {
+                  choice[i].no_time = 1;
+                  continue;
+                }
+              choice[i].time -= choice[0].time;
+            }
+        }
+    }
+
+  if (option_gnuplot)
+    {
+      /* In CMP_DIFFPREV, don't print anything for the first size, start
+         with the second where an actual difference is available.
+
+         In CMP_RATIO, print the first column as 1.0.
+
+         The 9 decimals printed is much more than the expected precision of
+         the measurements actually. */
+
+      if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
+        {
+          fprintf (fp, "%-6ld ", s->size);
+          for (i = 0; i < num_choices; i++)
+            fprintf (fp, "  %.9e",
+                     choice[i].no_time ? 0.0
+                     : (option_cmp == CMP_RATIO && i == 0) ? 1.0
+                     : choice[i].time);
+          fprintf (fp, "\n");
+        }
+    }
+  else
+    {
+      fprintf (fp, "%-6ld ", s->size);
+      for (i = 0; i < num_choices; i++)
+        {
+          char  buf[128];
+          int   decimals;
+
+          if (choice[i].no_time)
+            {
+              fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
+            }
+          else
+            {if (option_unit == UNIT_CYCLESPERLIMB
+                 || (option_cmp == CMP_RATIO && i > 0))
+                decimals = 4;
+              else if (option_unit == UNIT_CYCLES)
+                decimals = 2;
+              else
+                decimals = 9;
+
+              sprintf (buf, "%s%.*f%s",
+                       i == fastest ? first_open_fastest : first_open_notfastest,
+                       decimals, choice[i].time, first_close);
+              fprintf (fp, " %*s", COLUMN_WIDTH, buf);
+            }
+        }
+      fprintf (fp, "\n");
+    }
+
+  TMP_FREE;
+}
+
+void
+run_all (FILE *fp)
+{
+  mp_size_t  prev_size;
+  int        i;
+  TMP_DECL;
+
+  TMP_MARK;
+  SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
+  SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
+
+  data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
+  data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
+
+  for (i = 0; i < size_num; i++)
+    {
+      sp.size = size_array[i].start;
+      prev_size = -1;
+      for (;;)
+        {
+          mp_size_t  step;
+
+          if (option_data == DATA_2FD && sp.size >= 2)
+            sp.xp[sp.size-1] = 2;
+
+          run_one (fp, &sp, prev_size);
+          prev_size = sp.size;
+
+          if (option_data == DATA_2FD && sp.size >= 2)
+            sp.xp[sp.size-1] = MP_LIMB_T_MAX;
+
+          if (option_factor != 0.0)
+            {
+              step = (mp_size_t) (sp.size * option_factor - sp.size);
+              if (step < 1)
+                step = 1;
+            }
+          else
+            step = 1;
+          if (step < option_step)
+            step = option_step;
+
+          sp.size += step;
+          if (sp.size > size_array[i].end)
+            break;
+        }
+    }
+
+  TMP_FREE;
+}
+
+
+FILE *
+fopen_for_write (const char *filename)
+{
+  FILE  *fp;
+  if ((fp = fopen (filename, "w")) == NULL)
+    {
+      fprintf (stderr, "Cannot create %s\n", filename);
+      exit(1);
+    }
+  return fp;
+}
+
+void
+fclose_written (FILE *fp, const char *filename)
+{
+  int  err;
+
+  err = ferror (fp);
+  err |= fclose (fp);
+
+  if (err)
+    {
+      fprintf (stderr, "Error writing %s\n", filename);
+      exit(1);
+    }
+}
+
+
+void
+run_gnuplot (int argc, char *argv[])
+{
+  char  *plot_filename;
+  char  *data_filename;
+  FILE  *fp;
+  int   i;
+
+  plot_filename = (char *) (*__gmp_allocate_func)
+    (strlen (option_gnuplot_basename) + 20);
+  data_filename = (char *) (*__gmp_allocate_func)
+    (strlen (option_gnuplot_basename) + 20);
+
+  sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
+  sprintf (data_filename, "%s.data",    option_gnuplot_basename);
+
+  fp = fopen_for_write (plot_filename);
+
+  fprintf (fp, "# Generated with:\n");
+  fprintf (fp, "#");
+  for (i = 0; i < argc; i++)
+    fprintf (fp, " %s", argv[i]);
+  fprintf (fp, "\n");
+  fprintf (fp, "\n");
+
+  fprintf (fp, "reset\n");
+
+  /* Putting the key at the top left is usually good, and you can change it
+     interactively if it's not. */
+  fprintf (fp, "set key left\n");
+
+  /* designed to make it possible to see crossovers easily */
+  fprintf (fp, "set data style lines\n");
+
+  fprintf (fp, "plot ");
+  for (i = 0; i < num_choices; i++)
+    {
+      fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
+      fprintf (fp, " title \"%s\"", choice[i].name);
+
+      if (i != num_choices-1)
+        fprintf (fp, ", \\");
+      fprintf (fp, "\n");
+    }
+
+  fprintf (fp, "load \"-\"\n");
+  fclose_written (fp, plot_filename);
+
+  fp = fopen_for_write (data_filename);
+
+  /* Unbuffered so you can see where the program was up to if it crashes or
+     you kill it. */
+  setbuf (fp, NULL);
+
+  run_all (fp);
+  fclose_written (fp, data_filename);
+}
+
+
+/* Return a limb with n many one bits (starting from the least significant) */
+
+#define LIMB_ONES(n) \
+  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
+    : (n) == 0 ? CNST_LIMB(0)                   \
+    : (CNST_LIMB(1) << (n)) - 1)
+
+mp_limb_t
+r_string (const char *s)
+{
+  const char  *s_orig = s;
+  long        n;
+
+  if (strcmp (s, "aas") == 0)
+    return GMP_NUMB_0xAA;
+
+  {
+    mpz_t      z;
+    mp_limb_t  l;
+    int        set, siz;
+
+    mpz_init (z);
+    set = mpz_set_str (z, s, 0);
+    siz = SIZ(z);
+    l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
+    mpz_clear (z);
+    if (set == 0)
+      {
+        if (siz > 1 || siz < -1)
+          printf ("Warning, r parameter %s truncated to %d bits\n",
+                  s_orig, GMP_LIMB_BITS);
+        return l;
+      }
+  }
+
+  if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
+    n = strtoul (s+2, (char **) &s, 16);
+  else
+    n = strtol (s, (char **) &s, 10);
+
+  if (strcmp (s, "bits") == 0)
+    {
+      mp_limb_t  l;
+      if (n > GMP_LIMB_BITS)
+        {
+          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
+                   n, GMP_LIMB_BITS);
+          exit (1);
+        }
+      mpn_random (&l, 1);
+      return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
+    }
+  else  if (strcmp (s, "ones") == 0)
+    {
+      if (n > GMP_LIMB_BITS)
+        {
+          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
+                   n, GMP_LIMB_BITS);
+          exit (1);
+        }
+      return LIMB_ONES (n);
+    }
+  else if (*s != '\0')
+    {
+      fprintf (stderr, "invalid r parameter: %s\n", s_orig);
+      exit (1);
+    }
+
+  return n;
+}
+
+
+void
+routine_find (struct choice_t *c, const char *s_orig)
+{
+  const char  *s;
+  int     i;
+  size_t  nlen;
+
+  c->name = s_orig;
+  s = strchr (s_orig, '*');
+  if (s != NULL)
+    {
+      c->scale = atof(s_orig);
+      s++;
+    }
+  else
+    {
+      c->scale = 1.0;
+      s = s_orig;
+    }
+
+  for (i = 0; i < numberof (routine); i++)
+    {
+      nlen = strlen (routine[i].name);
+      if (memcmp (s, routine[i].name, nlen) != 0)
+        continue;
+
+      if (s[nlen] == '.')
+        {
+          /* match, with a .r parameter */
+
+          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
+            {
+              fprintf (stderr,
+                       "Choice %s bad: doesn't take a \".<r>\" parameter\n",
+                       s_orig);
+              exit (1);
+            }
+
+          c->p = &routine[i];
+          c->r = r_string (s + nlen + 1);
+          return;
+        }
+
+      if (s[nlen] == '\0')
+        {
+          /* match, with no parameter */
+
+          if (routine[i].flag & FLAG_R)
+            {
+              fprintf (stderr,
+                       "Choice %s bad: needs a \".<r>\" parameter\n",
+                       s_orig);
+              exit (1);
+            }
+
+          c->p = &routine[i];
+          c->r = 0;
+          return;
+        }
+    }
+
+  fprintf (stderr, "Choice %s unrecognised\n", s_orig);
+  exit (1);
+}
+
+
+void
+usage (void)
+{
+  int  i;
+
+  speed_time_init ();
+
+  printf ("Usage: speed [-options] -s size <routine>...\n");
+  printf ("Measure the speed of some routines.\n");
+  printf ("Times are in seconds, accuracy is shown.\n");
+  printf ("\n");
+  printf ("   -p num     set precision as number of time units each routine must run\n");
+  printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
+  printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
+  printf ("   -t step    step through sizes by given amount\n");
+  printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
+  printf ("   -r         show times as ratios of the first routine\n");
+  printf ("   -d         show times as difference from the first routine\n");
+  printf ("   -D         show times as difference from previous size shown\n");
+  printf ("   -c         show times in CPU cycles\n");
+  printf ("   -C         show times in cycles per limb\n");
+  printf ("   -u         print resource usage (memory) at end\n");
+  printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
+  printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
+  printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
+  printf ("   -o addrs   print addresses of data blocks\n");
+  printf ("\n");
+  printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
+  printf ("is greater.\n");
+  printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
+  printf ("size and the previous size.\n");
+  printf ("\n");
+  printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
+  printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
+  printf ("a log/log plot).\n");
+  printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
+  printf ("when viewing more than one routine, it means same axis scales for all data).\n");
+  printf ("\n");
+  printf ("The available routines are as follows.\n");
+  printf ("\n");
+
+  for (i = 0; i < numberof (routine); i++)
+    {
+      if (routine[i].flag & FLAG_R)
+        printf ("\t%s.r\n", routine[i].name);
+      else if (routine[i].flag & FLAG_R_OPTIONAL)
+        printf ("\t%s (optional .r)\n", routine[i].name);
+      else
+        printf ("\t%s\n", routine[i].name);
+    }
+  printf ("\n");
+  printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
+  printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
+  printf ("\n");
+  printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
+  printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
+  printf ("\n");
+  printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
+  printf ("The fastest routine at each size is marked with a # (free form output only).\n");
+  printf ("\n");
+  printf ("%s", speed_time_string);
+  printf ("\n");
+  printf ("Gnuplot home page http://www.gnuplot.info/\n");
+  printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
+}
+
+void
+check_align_option (const char *name, mp_size_t align)
+{
+  if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
+    {
+      fprintf (stderr, "Alignment request out of range: %s %ld\n",
+               name, (long) align);
+      fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
+               SPEED_TMP_ALLOC_ADJUST_MASK);
+      exit (1);
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  int  i;
+  int  opt;
+
+  /* Unbuffered so output goes straight out when directed to a pipe or file
+     and isn't lost on killing the program half way.  */
+  setbuf (stdout, NULL);
+
+  for (;;)
+    {
+      opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
+      if (opt == EOF)
+        break;
+
+      switch (opt) {
+      case 'a':
+        if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
+        else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
+        else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
+        else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
+        else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
+        else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
+        else
+          {
+            fprintf (stderr, "unrecognised data option: %s\n", optarg);
+            exit (1);
+          }
+        break;
+      case 'C':
+        if (option_unit  != UNIT_SECONDS) goto bad_unit;
+        option_unit = UNIT_CYCLESPERLIMB;
+        break;
+      case 'c':
+        if (option_unit != UNIT_SECONDS)
+          {
+          bad_unit:
+            fprintf (stderr, "cannot use more than one of -c, -C\n");
+            exit (1);
+          }
+        option_unit = UNIT_CYCLES;
+        break;
+      case 'D':
+        if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
+        option_cmp = CMP_DIFFPREV;
+        break;
+      case 'd':
+        if (option_cmp != CMP_ABSOLUTE)
+          {
+          bad_cmp:
+            fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
+            exit (1);
+          }
+        option_cmp = CMP_DIFFERENCE;
+        break;
+      case 'E':
+        option_square = 1;
+        break;
+      case 'F':
+        option_square = 2;
+        break;
+      case 'f':
+        option_factor = atof (optarg);
+        if (option_factor <= 1.0)
+          {
+            fprintf (stderr, "-f factor must be > 1.0\n");
+            exit (1);
+          }
+        break;
+      case 'o':
+        speed_option_set (optarg);
+        break;
+      case 'P':
+        option_gnuplot = 1;
+        option_gnuplot_basename = optarg;
+        break;
+      case 'p':
+        speed_precision = atoi (optarg);
+        break;
+      case 'R':
+        option_seed = time (NULL);
+        break;
+      case 'r':
+        if (option_cmp != CMP_ABSOLUTE)
+          goto bad_cmp;
+        option_cmp = CMP_RATIO;
+        break;
+      case 's':
+        {
+          char  *s;
+          for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
+            {
+              if (size_num == size_allocnum)
+                {
+                  size_array = (struct size_array_t *)
+                    __gmp_allocate_or_reallocate
+                    (size_array,
+                     size_allocnum * sizeof(size_array[0]),
+                     (size_allocnum+10) * sizeof(size_array[0]));
+                  size_allocnum += 10;
+                }
+              if (sscanf (s, "%ld-%ld",
+                          &size_array[size_num].start,
+                          &size_array[size_num].end) != 2)
+                {
+                  size_array[size_num].start = size_array[size_num].end
+                    = atol (s);
+                }
+
+              if (size_array[size_num].start < 0
+                  || size_array[size_num].end < 0
+                  || size_array[size_num].start > size_array[size_num].end)
+                {
+                  fprintf (stderr, "invalid size parameter: %s\n", s);
+                  exit (1);
+                }
+
+              size_num++;
+            }
+        }
+        break;
+      case 't':
+        option_step = atol (optarg);
+        if (option_step < 1)
+          {
+            fprintf (stderr, "-t step must be >= 1\n");
+            exit (1);
+          }
+        break;
+      case 'u':
+        option_resource_usage = 1;
+        break;
+      case 'z':
+        sp.cache = 1;
+        break;
+      case 'x':
+        sp.align_xp = atol (optarg);
+        check_align_option ("-x", sp.align_xp);
+        break;
+      case 'y':
+        sp.align_yp = atol (optarg);
+        check_align_option ("-y", sp.align_yp);
+        break;
+      case 'w':
+        sp.align_wp = atol (optarg);
+        check_align_option ("-w", sp.align_wp);
+        break;
+      case 'W':
+        sp.align_wp2 = atol (optarg);
+        check_align_option ("-W", sp.align_wp2);
+        break;
+      case '?':
+        exit(1);
+      }
+    }
+
+  if (optind >= argc)
+    {
+      usage ();
+      exit (1);
+    }
+
+  if (size_num == 0)
+    {
+      fprintf (stderr, "-s <size> must be specified\n");
+      exit (1);
+    }
+
+  gmp_randinit_default (__gmp_rands);
+  __gmp_rands_initialized = 1;
+  gmp_randseed_ui (__gmp_rands, option_seed);
+
+  choice = (struct choice_t *) (*__gmp_allocate_func)
+    ((argc - optind) * sizeof(choice[0]));
+  for ( ; optind < argc; optind++)
+    {
+      struct choice_t  c;
+      routine_find (&c, argv[optind]);
+      choice[num_choices] = c;
+      num_choices++;
+    }
+
+  if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
+      num_choices < 2)
+    {
+      fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
+    }
+
+  speed_time_init ();
+  if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
+    speed_cycletime_need_cycles ();
+  else
+    speed_cycletime_need_seconds ();
+
+  if (option_gnuplot)
+    {
+      run_gnuplot (argc, argv);
+    }
+  else
+    {
+      if (option_unit == UNIT_SECONDS)
+        printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
+      else
+        printf ("overhead %.2f cycles",
+                speed_measure (speed_noop, NULL) / speed_cycletime);
+      printf (", precision %d units of %.2e secs",
+              speed_precision, speed_unittime);
+
+      if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
+        printf (", CPU freq unknown\n");
+      else
+        printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
+
+      printf ("       ");
+      for (i = 0; i < num_choices; i++)
+        printf (" %*s", COLUMN_WIDTH, choice[i].name);
+      printf ("\n");
+
+      run_all (stdout);
+    }
+
+  if (option_resource_usage)
+    {
+#if HAVE_GETRUSAGE
+      {
+        /* This doesn't give data sizes on linux 2.0.x, only utime. */
+        struct rusage  r;
+        if (getrusage (RUSAGE_SELF, &r) != 0)
+          perror ("getrusage");
+        else
+          printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
+                  r.ru_utime.tv_sec, r.ru_utime.tv_usec,
+                  r.ru_idrss, r.ru_isrss, r.ru_ixrss);
+      }
+#else
+      printf ("getrusage() not available\n");
+#endif
+
+      /* Linux kernel. */
+      {
+        char  buf[128];
+        sprintf (buf, "/proc/%d/status", getpid());
+        if (access (buf, R_OK) == 0)
+          {
+            sprintf (buf, "cat /proc/%d/status", getpid());
+            system (buf);
+          }
+
+      }
+    }
+
+  return 0;
+}
diff --git a/tune/speed.h b/tune/speed.h

new file mode 100644 (file)

index 0000000..ac004ce
--- /dev/null
+++ b/tune/speed.h
@@ -0,0 +1,3057 @@
+/* Header for speed and threshold things.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#ifndef __SPEED_H__
+#define __SPEED_H__
+
+
+/* Pad ptr,oldsize with zero limbs (at the most significant end) to make it
+   newsize long. */
+#define MPN_ZERO_EXTEND(ptr, oldsize, newsize)         \
+  do {                                                 \
+    ASSERT ((newsize) >= (oldsize));                   \
+    MPN_ZERO ((ptr)+(oldsize), (newsize)-(oldsize));   \
+  } while (0)
+
+/* A mask of the least significant n bits.  Note 1<<32 doesn't give zero on
+   x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
+#define MP_LIMB_T_LOWBITMASK(n)        \
+  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
+
+
+/* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
+
+#define TMP_ALLOC_ALIGNED(bytes, align)        \
+  align_pointer (TMP_ALLOC ((bytes) + (align)-1), (align))
+#define TMP_ALLOC_LIMBS_ALIGNED(limbs, align)  \
+  ((mp_ptr) TMP_ALLOC_ALIGNED ((limbs)*sizeof(mp_limb_t), align))
+
+/* CACHE_LINE_SIZE is our default alignment for speed operands, and the
+   limit on what s->align_xp etc and then request for off-alignment.  Maybe
+   this should be an option of some sort, but in any case here are some line
+   sizes,
+
+       bytes
+        32   pentium
+        64   athlon
+        64   itanium-2 L1
+       128   itanium-2 L2
+*/
+#define CACHE_LINE_SIZE   64 /* bytes */
+
+#define SPEED_TMP_ALLOC_ADJUST_MASK  (CACHE_LINE_SIZE/BYTES_PER_MP_LIMB - 1)
+
+/* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
+   alignment.  */
+#define SPEED_TMP_ALLOC_LIMBS(ptr, limbs, align)                       \
+  do {                                                                 \
+    mp_ptr     __ptr;                                                  \
+    mp_size_t  __ptr_align, __ptr_add;                                 \
+                                                                       \
+    ASSERT ((CACHE_LINE_SIZE % BYTES_PER_MP_LIMB) == 0);               \
+    __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK);   \
+    __ptr_align = (__ptr - (mp_ptr) NULL);                             \
+    __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK; \
+    (ptr) = __ptr + __ptr_add;                                         \
+  } while (0)
+
+
+/* This is the size for s->xp_block and s->yp_block, used in certain
+   routines that want to run across many different data values and use
+   s->size for a different purpose, eg. SPEED_ROUTINE_MPN_GCD_1.
+
+   512 means 2kbytes of data for each of xp_block and yp_block, making 4k
+   total, which should fit easily in any L1 data cache. */
+
+#define SPEED_BLOCK_SIZE   512 /* limbs */
+
+
+extern double  speed_unittime;
+extern double  speed_cycletime;
+extern int     speed_precision;
+extern char    speed_time_string[];
+void speed_time_init __GMP_PROTO ((void));
+void speed_cycletime_fail __GMP_PROTO ((const char *str));
+void speed_cycletime_init __GMP_PROTO ((void));
+void speed_cycletime_need_cycles __GMP_PROTO ((void));
+void speed_cycletime_need_seconds __GMP_PROTO ((void));
+void speed_starttime __GMP_PROTO ((void));
+double speed_endtime __GMP_PROTO ((void));
+
+
+struct speed_params {
+  unsigned   reps;     /* how many times to run the routine */
+  mp_ptr     xp;       /* first argument */
+  mp_ptr     yp;       /* second argument */
+  mp_size_t  size;     /* size of both arguments */
+  mp_limb_t  r;                /* user supplied parameter */
+  mp_size_t  align_xp; /* alignment of xp */
+  mp_size_t  align_yp; /* alignment of yp */
+  mp_size_t  align_wp; /* intended alignment of wp */
+  mp_size_t  align_wp2; /* intended alignment of wp2 */
+  mp_ptr     xp_block; /* first special SPEED_BLOCK_SIZE block */
+  mp_ptr     yp_block; /* second special SPEED_BLOCK_SIZE block */
+
+  double     time_divisor; /* optionally set by the speed routine */
+
+  /* used by the cache priming things */
+  int       cache;
+  unsigned   src_num, dst_num;
+  struct {
+    mp_ptr    ptr;
+    mp_size_t size;
+  } src[3], dst[3];
+};
+
+typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
+
+double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s));
+
+/* Prototypes for speed measuring routines */
+
+double speed_back_to_back __GMP_PROTO ((struct speed_params *s));
+double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s));
+double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s));
+double speed_find_a __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s));
+double speed_invert_limb __GMP_PROTO ((struct speed_params *s));
+double speed_malloc_free __GMP_PROTO ((struct speed_params *s));
+double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s));
+double speed_memcpy __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addlsh2_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_add_n_sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_com __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
+double speed_mpn_pi1_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
+double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_lshiftc __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_fft_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_fft_sqr __GMP_PROTO ((struct speed_params *s));
+#if WANT_OLD_FFT_FULL
+double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
+#endif
+double speed_mpn_nussbaumer_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nussbaumer_mul_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mullo_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mullo_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mu_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mu_divappr_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mupi_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mu_div_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mu_bdiv_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mu_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_invert __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_invertappr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_ni_invertappr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_binvert __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_redc_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_redc_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsblsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsblsh2_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sublsh2_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom8_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom8h_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom43_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_for_toom43_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom43_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqrmod_bnm1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpz_add __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_mod __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s));
+
+double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s));
+
+double speed_noop __GMP_PROTO ((struct speed_params *s));
+double speed_noop_wxs __GMP_PROTO ((struct speed_params *s));
+double speed_noop_wxys __GMP_PROTO ((struct speed_params *s));
+
+double speed_operator_div __GMP_PROTO ((struct speed_params *s));
+double speed_operator_mod __GMP_PROTO ((struct speed_params *s));
+
+double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s));
+double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s));
+
+/* Prototypes for other routines */
+
+/* low 32-bits in p[0], high 32-bits in p[1] */
+void speed_cyclecounter __GMP_PROTO ((unsigned p[2]));
+
+void mftb_function __GMP_PROTO ((unsigned p[2]));
+
+/* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy
+   output or a clobber for the cpuid, hence an explicit save and restore.  A
+   clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use
+   the dummy output style in non-PIC, so there's an error if somehow -fPIC
+   is used without a -DPIC to tell us about it.         */
+#if defined(__GNUC__) && ! defined (NO_ASM)    \
+  && (defined (__i386__) || defined (__i486__))
+#if defined (PIC) || defined (__APPLE_CC__)
+#define speed_cyclecounter(p)                                          \
+  do {                                                                 \
+    int         __speed_cyclecounter__save_ebx;                                \
+    int         __speed_cyclecounter__dummy;                                   \
+    __asm__ __volatile__ ("movl %%ebx, %1\n"                           \
+                         "cpuid\n"                                     \
+                         "movl %1, %%ebx\n"                            \
+                         "rdtsc"                                       \
+                         : "=a"   ((p)[0]),                            \
+                           "=&rm" (__speed_cyclecounter__save_ebx),    \
+                           "=c"   (__speed_cyclecounter__dummy),       \
+                           "=d"   ((p)[1]));                           \
+  } while (0)
+#else
+#define speed_cyclecounter(p)                                          \
+  do {                                                                 \
+    int         __speed_cyclecounter__dummy1;                                  \
+    int         __speed_cyclecounter__dummy2;                                  \
+    __asm__ __volatile__ ("cpuid\n"                                    \
+                         "rdtsc"                                       \
+                         : "=a" ((p)[0]),                              \
+                           "=b" (__speed_cyclecounter__dummy1),        \
+                           "=c" (__speed_cyclecounter__dummy2),        \
+                           "=d" ((p)[1]));                             \
+  } while (0)
+#endif
+#endif
+
+double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2]));
+int gettimeofday_microseconds_p __GMP_PROTO ((void));
+int getrusage_microseconds_p __GMP_PROTO ((void));
+int cycles_works_p __GMP_PROTO ((void));
+long clk_tck __GMP_PROTO ((void));
+double freq_measure __GMP_PROTO ((const char *, double (*)(void)));
+
+int double_cmp_ptr __GMP_PROTO ((const double *, const double *));
+void pentium_wbinvd __GMP_PROTO ((void));
+typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *));
+
+void noop __GMP_PROTO ((void));
+void noop_1 __GMP_PROTO ((mp_limb_t));
+void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t));
+void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t));
+void speed_cache_fill __GMP_PROTO ((struct speed_params *));
+void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
+void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
+
+extern int  speed_option_addrs;
+extern int  speed_option_verbose;
+void speed_option_set __GMP_PROTO((const char *));
+
+mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+
+int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+
+mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+
+mp_size_t mpn_gcd_binary
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcd_accel
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_one_double
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_one_single
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_single
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_double
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+
+mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+
+mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
+void mpn_pre_set_str __GMP_PROTO ((mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr));
+
+void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+
+int speed_routine_count_zeros_setup
+  __GMP_PROTO ((struct speed_params *, mp_ptr, int, int));
+
+
+/* "get" is called repeatedly until it ticks over, just in case on a fast
+   processor it takes less than a microsecond, though this is probably
+   unlikely if it's a system call.
+
+   speed_cyclecounter is called on the same side of the "get" for the start
+   and end measurements.  It doesn't matter how long it takes from the "get"
+   sample to the cycles sample, since that period will cancel out in the
+   difference calculation (assuming it's the same each time).
+
+   Letting the test run for more than a process time slice is probably only
+   going to reduce accuracy, especially for getrusage when the cycle counter
+   is real time, or for gettimeofday if the cycle counter is in fact process
+   time.  Use CLK_TCK/2 as a reasonable stop.
+
+   It'd be desirable to be quite accurate here.  The default speed_precision
+   for a cycle counter is 10000 cycles, so to mix that with getrusage or
+   gettimeofday the frequency should be at least that accurate.  But running
+   measurements for 10000 microseconds (or more) is too long.  Be satisfied
+   with just a half clock tick (5000 microseconds usually).  */
+
+#define FREQ_MEASURE_ONE(name, type, get, getc, sec, usec)             \
+  do {                                                                 \
+    type      st1, st, et1, et;                                                \
+    unsigned  sc[2], ec[2];                                            \
+    long      dt, half_tick;                                           \
+    double    dc, cyc;                                                 \
+                                                                       \
+    half_tick = (1000000L / clk_tck()) / 2;                            \
+                                                                       \
+    get (st1);                                                         \
+    do {                                                               \
+      get (st);                                                                \
+    } while (usec(st) == usec(st1) && sec(st) == sec(st1));            \
+                                                                       \
+    getc (sc);                                                         \
+                                                                       \
+    for (;;)                                                           \
+      {                                                                        \
+       get (et1);                                                      \
+       do {                                                            \
+         get (et);                                                     \
+       } while (usec(et) == usec(et1) && sec(et) == sec(et1));         \
+                                                                       \
+       getc (ec);                                                      \
+                                                                       \
+       dc = speed_cyclecounter_diff (ec, sc);                          \
+                                                                       \
+       /* allow secs to cancel before multiplying */                   \
+       dt = sec(et) - sec(st);                                         \
+       dt = dt * 1000000L + (usec(et) - usec(st));                     \
+                                                                       \
+       if (dt >= half_tick)                                            \
+         break;                                                        \
+      }                                                                        \
+                                                                       \
+    cyc = dt * 1e-6 / dc;                                              \
+                                                                       \
+    if (speed_option_verbose >= 2)                                     \
+      printf ("freq_measure_%s_one() dc=%.6g dt=%ld cyc=%.6g\n",       \
+             name, dc, dt, cyc);                                       \
+                                                                       \
+    return dt * 1e-6 / dc;                                             \
+                                                                       \
+  } while (0)
+
+
+
+
+/* The measuring routines use these big macros to save duplication for
+   similar forms.  They also get used for some automatically generated
+   measuring of new implementations of functions.
+
+   Having something like SPEED_ROUTINE_BINARY_N as a subroutine accepting a
+   function pointer is considered undesirable since it's not the way a
+   normal application will be calling, and some processors might do
+   different things with an indirect call, like not branch predicting, or
+   doing a full pipe flush.  At least some of the "functions" measured are
+   actually macros too.
+
+   The net effect is to bloat the object code, possibly in a big way, but
+   only what's being measured is being run, so that doesn't matter.
+
+   The loop forms don't try to cope with __GMP_ATTRIBUTE_PURE or
+   ATTRIBUTE_CONST on the called functions.  Adding a cast to a non-pure
+   function pointer doesn't work in gcc 3.2.  Using an actual non-pure
+   function pointer variable works, but stands a real risk of a
+   non-optimizing compiler generating unnecessary overheads in the call.
+   Currently the best idea is not to use those attributes for a timing
+   program build.  __GMP_NO_ATTRIBUTE_CONST_PURE will tell gmp.h and
+   gmp-impl.h to omit them from routines there.  */
+
+#define SPEED_RESTRICT_COND(cond)   if (!(cond)) return -1.0;
+
+/* For mpn_copy or similar. */
+#define SPEED_ROUTINE_MPN_COPY(function)                               \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, s->xp, s->size);                                   \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_COPYC(function)                              \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, s->xp, s->size, 0);                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+/* s->size is still in limbs, and it's limbs which are copied, but
+   "function" takes a size in bytes not limbs. */
+#define SPEED_ROUTINE_MPN_COPY_BYTES(function)                         \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, s->xp, s->size * BYTES_PER_MP_LIMB);               \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+/* For mpn_add_n, mpn_sub_n, or similar. */
+#define SPEED_ROUTINE_MPN_BINARY_N_CALL(call)                          \
+  {                                                                    \
+    mp_ptr     wp;                                                     \
+    mp_ptr     xp, yp;                                                 \
+    unsigned   i;                                                      \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    xp = s->xp;                                                                \
+    yp = s->yp;                                                                \
+                                                                       \
+    if (s->r == 0)     ;                                               \
+    else if (s->r == 1) { xp = wp;         }                           \
+    else if (s->r == 2) {         yp = wp; }                           \
+    else if (s->r == 3) { xp = wp; yp = wp; }                          \
+    else if (s->r == 4) {     yp = xp;     }                           \
+    else               {                                               \
+      TMP_FREE;                                                                \
+      return -1.0;                                                     \
+    }                                                                  \
+                                                                       \
+    /* initialize wp if operand overlap */                             \
+    if (xp == wp || yp == wp)                                          \
+      MPN_COPY (wp, s->xp, s->size);                                   \
+                                                                       \
+    speed_operand_src (s, xp, s->size);                                        \
+    speed_operand_src (s, yp, s->size);                                        \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+/* For mpn_add_n, mpn_sub_n, or similar. */
+#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call)                          \
+  {                                                                    \
+    mp_ptr     ap, sp;                                                 \
+    mp_ptr     xp, yp;                                                 \
+    unsigned   i;                                                      \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp);                  \
+                                                                       \
+    xp = s->xp;                                                                \
+    yp = s->yp;                                                                \
+                                                                       \
+    if ((s->r & 1) != 0) { xp = ap; }                                  \
+    if ((s->r & 2) != 0) { yp = ap; }                                  \
+    if ((s->r & 4) != 0) { xp = sp; }                                  \
+    if ((s->r & 8) != 0) { yp = sp; }                                  \
+    if ((s->r & 3) == 3  ||  (s->r & 12) == 12)                                \
+      {                                                                        \
+       TMP_FREE;                                                       \
+       return -1.0;                                                    \
+      }                                                                        \
+                                                                       \
+    /* initialize ap if operand overlap */                             \
+    if (xp == ap || yp == ap)                                          \
+      MPN_COPY (ap, s->xp, s->size);                                   \
+    /* initialize sp if operand overlap */                             \
+    if (xp == sp || yp == sp)                                          \
+      MPN_COPY (sp, s->xp, s->size);                                   \
+                                                                       \
+    speed_operand_src (s, xp, s->size);                                        \
+    speed_operand_src (s, yp, s->size);                                        \
+    speed_operand_dst (s, ap, s->size);                                        \
+    speed_operand_dst (s, sp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_BINARY_N(function)                           \
+   SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size))
+
+#define SPEED_ROUTINE_MPN_BINARY_NC(function)                          \
+   SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size, 0))
+
+
+/* For mpn_lshift, mpn_rshift, mpn_mul_1, with r, or similar. */
+#define SPEED_ROUTINE_MPN_UNARY_1_CALL(call)                           \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_UNARY_1(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_UNARY_1C(function)                           \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
+
+/* FIXME: wp is uninitialized here, should start it off from xp */
+#define SPEED_ROUTINE_MPN_UNARY_1_INPLACE(function)                    \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, wp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_DIVEXACT_1(function)                         \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_BDIV_Q_1(function)                           \
+    SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call)                      \
+  {                                                                    \
+    unsigned   shift;                                                  \
+    mp_limb_t  dinv;                                                   \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size > 0);                                 \
+    SPEED_RESTRICT_COND (s->r != 0);                                   \
+                                                                       \
+    count_trailing_zeros (shift, s->r);                                        \
+    binvert_limb (dinv, s->r >> shift);                                        \
+                                                                       \
+    SPEED_ROUTINE_MPN_UNARY_1_CALL (call);                             \
+  }
+#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function)                       \
+  SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL                                  \
+  ((*function) (wp, s->xp, s->size, s->r, dinv, shift))
+
+#define SPEED_ROUTINE_MPN_BDIV_DBM1C(function)                         \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1(function)                           \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1C(function)                          \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r, 0))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1F(function)                          \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1CF(function)                         \
+  SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r, 0))
+
+
+#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL(call)                   \
+  {                                                                    \
+    unsigned   shift;                                                  \
+    mp_limb_t  dinv;                                                   \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+    SPEED_RESTRICT_COND (s->r != 0);                                   \
+                                                                       \
+    count_leading_zeros (shift, s->r);                                 \
+    invert_limb (dinv, s->r << shift);                                 \
+                                                                       \
+    SPEED_ROUTINE_MPN_UNARY_1_CALL (call);                             \
+  }                                                                    \
+
+#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1(function)                    \
+  SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL                               \
+  ((*function) (wp, 0, s->xp, s->size, s->r, dinv, shift))
+
+/* s->size limbs worth of fraction part */
+#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1F(function)                   \
+  SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL                               \
+  ((*function) (wp, s->size, s->xp, 0, s->r, dinv, shift))
+
+
+/* s->r is duplicated to form the multiplier, defaulting to
+   MP_BASES_BIG_BASE_10.  Not sure if that's particularly useful, but at
+   least it provides some control.  */
+#define SPEED_ROUTINE_MPN_UNARY_N(function,N)                          \
+  {                                                                    \
+    mp_ptr     wp;                                                     \
+    mp_size_t  wn;                                                     \
+    unsigned   i;                                                      \
+    double     t;                                                      \
+    mp_limb_t  yp[N];                                                  \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= N);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    wn = s->size + N-1;                                                        \
+    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);                       \
+    for (i = 0; i < N; i++)                                            \
+      yp[i] = (s->r != 0 ? s->r : MP_BASES_BIG_BASE_10);               \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, yp, (mp_size_t) N);                          \
+    speed_operand_dst (s, wp, wn);                                     \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, s->xp, s->size, yp);                               \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_UNARY_2(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 2)
+#define SPEED_ROUTINE_MPN_UNARY_3(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 3)
+#define SPEED_ROUTINE_MPN_UNARY_4(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 4)
+#define SPEED_ROUTINE_MPN_UNARY_5(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 5)
+#define SPEED_ROUTINE_MPN_UNARY_6(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 6)
+#define SPEED_ROUTINE_MPN_UNARY_7(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 7)
+#define SPEED_ROUTINE_MPN_UNARY_8(function)                            \
+  SPEED_ROUTINE_MPN_UNARY_N (function, 8)
+
+
+/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MUL(function)                                        \
+  {                                                                    \
+    mp_ptr    wp, xp;                                                  \
+    mp_size_t size1;                                                   \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = (s->r == 0 ? s->size : s->r);                              \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+    SPEED_RESTRICT_COND (size1 >= s->size);                            \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);          \
+    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);                    \
+                                                                       \
+    speed_operand_src (s, xp, size1);                                  \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, size1 + s->size);                                \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, xp, size1, s->yp, s->size);                                \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+#define SPEED_ROUTINE_MPN_MUL_N_CALL(call)                             \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, 2*s->size);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_MUL_N(function)                              \
+  SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
+
+#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call)                           \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_MULLO_N(function)                            \
+  SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
+
+/* For mpn_mul_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function)                     \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, s->xp, s->yp, s->size);                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call)                       \
+  {                                                                    \
+    mp_ptr    wp, tp;                                                  \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    mp_size_t itch;                                                    \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size);           \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);              \
+    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);                    \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, 2 * s->size);                            \
+    speed_operand_dst (s, tp, itch);                                   \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function)                        \
+  {                                                                    \
+    mp_ptr    wp, tp;                                                  \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    mp_size_t size, itch;                                              \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    size = mpn_mulmod_bnm1_next_size (s->size);                                \
+    itch = mpn_mulmod_bnm1_itch (size, size, size);                    \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp);                     \
+    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);                    \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, size);                                   \
+    speed_operand_dst (s, tp, itch);                                   \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, size, s->xp, s->size, s->yp, s->size, tp);         \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize)           \
+  {                                                                    \
+    mp_ptr    wp, tspace;                                              \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= minsize);                          \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);                        \
+    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);               \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, 2*s->size);                              \
+    speed_operand_dst (s, tspace, tsize);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function)                       \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),            \
+     mpn_toom22_mul_itch (s->size, s->size),                           \
+     MPN_TOOM22_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function)                       \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),            \
+     mpn_toom33_mul_itch (s->size, s->size),                           \
+     MPN_TOOM33_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function)                       \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),            \
+     mpn_toom44_mul_itch (s->size, s->size),                           \
+     MPN_TOOM44_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function)                       \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),            \
+     mpn_toom6h_mul_itch (s->size, s->size),                           \
+     MPN_TOOM6H_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function)                       \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),            \
+     mpn_toom8h_mul_itch (s->size, s->size),                           \
+     MPN_TOOM8H_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_MUL(function)                         \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),                \
+     mpn_toom32_mul_itch (s->size, 2*s->size/3),                       \
+     MPN_TOOM32_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM42_MUL(function)                         \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),          \
+     mpn_toom42_mul_itch (s->size, s->size/2),                         \
+     MPN_TOOM42_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM43_MUL(function)                         \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace),                \
+     mpn_toom43_mul_itch (s->size, s->size*3/4),                       \
+     MPN_TOOM43_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM63_MUL(function)                         \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),          \
+     mpn_toom63_mul_itch (s->size, s->size/2),                         \
+     MPN_TOOM63_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),      \
+     mpn_toom32_mul_itch (s->size, 17*s->size/24),                     \
+     MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),      \
+     mpn_toom43_mul_itch (s->size, 17*s->size/24),                     \
+     MPN_TOOM43_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),      \
+     mpn_toom32_mul_itch (s->size, 19*s->size/30),                     \
+     MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),      \
+     mpn_toom53_mul_itch (s->size, 19*s->size/30),                     \
+     MPN_TOOM53_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),      \
+     mpn_toom42_mul_itch (s->size, 11*s->size/20),                     \
+     MPN_TOOM42_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),      \
+     mpn_toom53_mul_itch (s->size, 11*s->size/20),                     \
+     MPN_TOOM53_MUL_MINSIZE)
+
+
+
+#define SPEED_ROUTINE_MPN_SQR_CALL(call)                               \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, 2*s->size);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_SQR(function)                                        \
+  SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function)                       \
+  SPEED_ROUTINE_MPN_SQR (function)
+
+
+#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize)             \
+  {                                                                    \
+    mp_ptr    wp, tspace;                                              \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= minsize);                          \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);                        \
+    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);               \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, 2*s->size);                              \
+    speed_operand_dst (s, tspace, tsize);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_TOOM2_SQR(function)                          \
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+                               mpn_toom2_sqr_itch (s->size),           \
+                               MPN_TOOM2_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM3_SQR(function)                          \
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+                               mpn_toom3_sqr_itch (s->size),           \
+                               MPN_TOOM3_SQR_MINSIZE)
+
+
+#define SPEED_ROUTINE_MPN_TOOM4_SQR(function)                          \
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+                               mpn_toom4_sqr_itch (s->size),           \
+                               MPN_TOOM4_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM6_SQR(function)                          \
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+                               mpn_toom6_sqr_itch (s->size),           \
+                               MPN_TOOM6_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM8_SQR(function)                          \
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+                               mpn_toom8_sqr_itch (s->size),           \
+                               MPN_TOOM8_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_MOD_CALL(call)                               \
+  {                                                                    \
+    unsigned   i;                                                      \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+                                                                       \
+    return speed_endtime ();                                           \
+  }
+
+#define SPEED_ROUTINE_MPN_MOD_1(function)                              \
+   SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_MOD_1C(function)                             \
+   SPEED_ROUTINE_MPN_MOD_CALL ((*function)(s->xp, s->size, s->r, CNST_LIMB(0)))
+
+#define SPEED_ROUTINE_MPN_MODEXACT_1_ODD(function)                     \
+  SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r));
+
+#define SPEED_ROUTINE_MPN_MODEXACT_1C_ODD(function)                    \
+  SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r, CNST_LIMB(0)));
+
+#define SPEED_ROUTINE_MPN_MOD_34LSUB1(function)                                \
+   SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_PREINV_MOD_1(function)                       \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_limb_t  inv;                                                    \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+    SPEED_RESTRICT_COND (s->r & GMP_LIMB_HIGHBIT);                     \
+                                                                       \
+    invert_limb (inv, s->r);                                           \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      (*function) (s->xp, s->size, s->r, inv);                         \
+    while (--i != 0);                                                  \
+                                                                       \
+    return speed_endtime ();                                           \
+  }
+
+#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc)                      \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_limb_t  inv[4];                                                 \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    mpn_mod_1_1p_cps (inv, s->r);                                      \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      pfunc (inv, s->r);                                               \
+      function (s->xp, s->size, s->r, inv);                            \
+    } while (--i != 0);                                                        \
+                                                                       \
+    return speed_endtime ();                                           \
+  }
+#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N)                    \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_limb_t  inv[N+3];                                               \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+    SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N);                   \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      pfunc (inv, s->r);                                               \
+      function (s->xp, s->size, s->r, inv);                            \
+    } while (--i != 0);                                                        \
+                                                                       \
+    return speed_endtime ();                                           \
+  }
+
+
+/* A division of 2*s->size by s->size limbs */
+
+#define SPEED_ROUTINE_MPN_DC_DIVREM_CALL(call)                         \
+  {                                                                    \
+    unsigned  i;                                                       \
+    mp_ptr    a, d, q, r;                                              \
+    double    t;                                                       \
+    gmp_pi1_t dinv;                                                    \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (a, 2*s->size, s->align_xp);                 \
+    SPEED_TMP_ALLOC_LIMBS (d, s->size,   s->align_yp);                 \
+    SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp);                 \
+    SPEED_TMP_ALLOC_LIMBS (r, s->size,   s->align_wp2);                        \
+                                                                       \
+    MPN_COPY (a, s->xp, s->size);                                      \
+    MPN_COPY (a+s->size, s->xp, s->size);                              \
+                                                                       \
+    MPN_COPY (d, s->yp, s->size);                                      \
+                                                                       \
+    /* normalize the data */                                           \
+    d[s->size-1] |= GMP_NUMB_HIGHBIT;                                  \
+    a[2*s->size-1] = d[s->size-1] - 1;                                 \
+                                                                       \
+    invert_pi1 (dinv, d[s->size-1], d[s->size-2]);                     \
+                                                                       \
+    speed_operand_src (s, a, 2*s->size);                               \
+    speed_operand_src (s, d, s->size);                                 \
+    speed_operand_dst (s, q, s->size+1);                               \
+    speed_operand_dst (s, r, s->size);                                 \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+/* A remainder 2*s->size by s->size limbs */
+
+#define SPEED_ROUTINE_MPZ_MOD(function)                                        \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mpz_t      a, d, r;                                                        \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    mpz_init_set_n (d, s->yp, s->size);                                        \
+                                                                       \
+    /* high part less than d, low part a duplicate copied in */                \
+    mpz_init_set_n (a, s->xp, s->size);                                        \
+    mpz_mod (a, a, d);                                                 \
+    mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size);                      \
+    MPN_COPY (PTR(a), s->xp, s->size);                                 \
+                                                                       \
+    mpz_init (r);                                                      \
+                                                                       \
+    speed_operand_src (s, PTR(a), SIZ(a));                             \
+    speed_operand_src (s, PTR(d), SIZ(d));                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (r, a, d);                                              \
+    while (--i != 0);                                                  \
+    return speed_endtime ();                                           \
+  }
+
+#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN)           \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, ap, qp;                                         \
+    gmp_pi1_t  inv;                                                    \
+    double     t;                                                      \
+    mp_size_t size1;                                                   \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = (s->r == 0 ? 2 * s->size : s->r);                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= DMIN);                             \
+    SPEED_RESTRICT_COND (size1 - s->size >= QMIN);                     \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);                    \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);          \
+    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2);                   \
+                                                                       \
+    /* we don't fill in dividend completely when size1 > s->size */    \
+    MPN_COPY (ap,         s->xp, s->size);                             \
+    MPN_COPY (ap + size1 - s->size, s->xp, s->size);                   \
+                                                                       \
+    MPN_COPY (dp,         s->yp, s->size);                             \
+                                                                       \
+    /* normalize the data */                                           \
+    dp[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+    ap[size1 - 1] = dp[s->size - 1] - 1;                               \
+                                                                       \
+    invert_pi1 (inv, dp[s->size-1], dp[s->size-2]);                    \
+                                                                       \
+    speed_operand_src (s, ap, size1);                                  \
+    speed_operand_dst (s, tp, size1);                                  \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, qp, size1 - s->size);                                \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      MPN_COPY (tp, ap, size1);                                                \
+      function (qp, tp, size1, dp, s->size, INV);                      \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn)                    \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, qp, scratch;                                    \
+    double     t;                                                      \
+    mp_size_t itch;                                                    \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    itch = itchfn (2 * s->size, s->size, 0);                           \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);               \
+                                                                       \
+    MPN_COPY (tp,         s->xp, s->size);                             \
+    MPN_COPY (tp+s->size, s->xp, s->size);                             \
+                                                                       \
+    /* normalize the data */                                           \
+    dp[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+    tp[2*s->size-1] = dp[s->size-1] - 1;                               \
+                                                                       \
+    speed_operand_dst (s, qp, s->size);                                        \
+    speed_operand_src (s, tp, 2 * s->size);                            \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, scratch, itch);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      function (qp, tp, 2 * s->size, dp, s->size, scratch);            \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn)                   \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, qp, rp, scratch;                                        \
+    double     t;                                                      \
+    mp_size_t size1, itch;                                             \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = (s->r == 0 ? 2 * s->size : s->r);                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+    SPEED_RESTRICT_COND (size1 >= s->size);                            \
+                                                                       \
+    itch = itchfn (size1, s->size, 0);                                 \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);          \
+    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);                    \
+    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);               \
+    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */        \
+                                                                       \
+    /* we don't fill in dividend completely when size1 > s->size */    \
+    MPN_COPY (tp,         s->xp, s->size);                             \
+    MPN_COPY (tp + size1 - s->size, s->xp, s->size);                   \
+                                                                       \
+    MPN_COPY (dp,         s->yp, s->size);                             \
+                                                                       \
+    /* normalize the data */                                           \
+    dp[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+    tp[size1 - 1] = dp[s->size - 1] - 1;                               \
+                                                                       \
+    speed_operand_dst (s, qp, size1 - s->size);                                \
+    speed_operand_dst (s, rp, s->size);                                        \
+    speed_operand_src (s, tp, size1);                                  \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, scratch, itch);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      function (qp, rp, tp, size1, dp, s->size, scratch);              \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn)                 \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, qp, rp, ip, scratch;                            \
+    double     t;                                                      \
+    mp_size_t size1, itch;                                             \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = (s->r == 0 ? 2 * s->size : s->r);                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+    SPEED_RESTRICT_COND (size1 >= s->size);                            \
+                                                                       \
+    itch = itchfn (size1, s->size, s->size);                           \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);          \
+    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);                    \
+    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);               \
+    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */        \
+    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */        \
+                                                                       \
+    /* we don't fill in dividend completely when size1 > s->size */    \
+    MPN_COPY (tp,         s->xp, s->size);                             \
+    MPN_COPY (tp + size1 - s->size, s->xp, s->size);                   \
+                                                                       \
+    MPN_COPY (dp,         s->yp, s->size);                             \
+                                                                       \
+    /* normalize the data */                                           \
+    dp[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+    tp[size1 - 1] = dp[s->size-1] - 1;                                 \
+                                                                       \
+    mpn_invert (ip, dp, s->size, NULL);                                        \
+                                                                       \
+    speed_operand_dst (s, qp, size1 - s->size);                                \
+    speed_operand_dst (s, rp, s->size);                                        \
+    speed_operand_src (s, tp, size1);                                  \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_src (s, ip, s->size);                                        \
+    speed_operand_dst (s, scratch, itch);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch); \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function)                                \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, ap, qp;                                         \
+    mp_limb_t  inv;                                                    \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);                        \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);               \
+                                                                       \
+    MPN_COPY (ap,         s->xp, s->size);                             \
+    MPN_COPY (ap+s->size, s->xp, s->size);                             \
+                                                                       \
+    /* divisor must be odd */                                          \
+    MPN_COPY (dp, s->yp, s->size);                                     \
+    dp[0] |= 1;                                                                \
+    binvert_limb (inv, dp[0]);                                         \
+    inv = -inv;                                                                \
+                                                                       \
+    speed_operand_src (s, ap, 2*s->size);                              \
+    speed_operand_dst (s, tp, 2*s->size);                              \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, qp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      MPN_COPY (tp, ap, 2*s->size);                                    \
+      function (qp, tp, 2*s->size, dp, s->size, inv);                  \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function)                         \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, qp;                                             \
+    mp_limb_t  inv;                                                    \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2);                 \
+                                                                       \
+    /* divisor must be odd */                                          \
+    MPN_COPY (dp, s->yp, s->size);                                     \
+    dp[0] |= 1;                                                                \
+    binvert_limb (inv, dp[0]);                                         \
+    inv = -inv;                                                                \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, tp, s->size);                                        \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, qp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      MPN_COPY (tp, s->xp, s->size);                                   \
+      function (qp, tp, s->size, dp, s->size, inv);                    \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn)                   \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, qp, scratch;                                                \
+    double     t;                                                      \
+    mp_size_t itch;                                                    \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    itch = itchfn (s->size, s->size);                                  \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);               \
+                                                                       \
+    /* divisor must be odd */                                          \
+    MPN_COPY (dp, s->yp, s->size);                                     \
+    dp[0] |= 1;                                                                \
+                                                                       \
+    speed_operand_dst (s, qp, s->size);                                        \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, scratch, itch);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      function (qp, s->xp, s->size, dp, s->size, scratch);             \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn)                  \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     dp, tp, qp, rp, scratch;                                        \
+    double     t;                                                      \
+    mp_size_t itch;                                                    \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    itch = itchfn (2 * s->size, s->size);                              \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);               \
+    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */        \
+                                                                       \
+    MPN_COPY (tp,         s->xp, s->size);                             \
+    MPN_COPY (tp+s->size, s->xp, s->size);                             \
+                                                                       \
+    /* divisor must be odd */                                          \
+    MPN_COPY (dp, s->yp, s->size);                                     \
+    dp[0] |= 1;                                                                \
+                                                                       \
+    speed_operand_dst (s, qp, s->size);                                        \
+    speed_operand_dst (s, rp, s->size);                                        \
+    speed_operand_src (s, tp, 2 * s->size);                            \
+    speed_operand_src (s, dp, s->size);                                        \
+    speed_operand_dst (s, scratch, itch);                              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      function (qp, rp, tp, 2 * s->size, dp, s->size, scratch);                \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_INVERT(function,itchfn)                      \
+  {                                                                    \
+    long  i;                                                           \
+    mp_ptr    up, tp, ip;                                              \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);                  \
+    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);                        \
+    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);         \
+                                                                       \
+    MPN_COPY (up, s->xp, s->size);                                     \
+                                                                       \
+    /* normalize the data */                                           \
+    up[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+                                                                       \
+    speed_operand_src (s, up, s->size);                                        \
+    speed_operand_dst (s, tp, s->size);                                        \
+    speed_operand_dst (s, ip, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (ip, up, s->size, tp);                                  \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn)                  \
+  {                                                                    \
+    long  i;                                                           \
+    mp_ptr    up, tp, ip;                                              \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);                  \
+    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);         \
+                                                                       \
+    MPN_COPY (up, s->xp, s->size);                                     \
+                                                                       \
+    /* normalize the data */                                           \
+    up[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+                                                                       \
+    speed_operand_src (s, up, s->size);                                        \
+    speed_operand_dst (s, tp, s->size);                                        \
+    speed_operand_dst (s, ip, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (ip, up, s->size, tp);                                  \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn)               \
+  {                                                                    \
+    long  i;                                                           \
+    mp_ptr    up, tp, ip;                                              \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 3);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);                  \
+    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);                  \
+    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);         \
+                                                                       \
+    MPN_COPY (up, s->xp, s->size);                                     \
+                                                                       \
+    /* normalize the data */                                           \
+    up[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
+                                                                       \
+    speed_operand_src (s, up, s->size);                                        \
+    speed_operand_dst (s, tp, s->size);                                        \
+    speed_operand_dst (s, ip, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (ip, up, s->size, tp);                                  \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn)                     \
+  {                                                                    \
+    long  i;                                                           \
+    mp_ptr    up, tp, ip;                                              \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);                  \
+    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);                        \
+    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);         \
+                                                                       \
+    MPN_COPY (up, s->xp, s->size);                                     \
+                                                                       \
+    /* normalize the data */                                           \
+    up[0] |= 1;                                                                \
+                                                                       \
+    speed_operand_src (s, up, s->size);                                        \
+    speed_operand_dst (s, tp, s->size);                                        \
+    speed_operand_dst (s, ip, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (ip, up, s->size, tp);                                  \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_REDC_1(function)                                 \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     cp, mp, tp, ap;                                         \
+    mp_limb_t  inv;                                                    \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);              \
+    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);              \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);             \
+                                                                       \
+    MPN_COPY (ap,         s->xp, s->size);                             \
+    MPN_COPY (ap+s->size, s->xp, s->size);                             \
+                                                                       \
+    /* modulus must be odd */                                          \
+    MPN_COPY (mp, s->yp, s->size);                                     \
+    mp[0] |= 1;                                                                \
+    binvert_limb (inv, mp[0]);                                         \
+    inv = -inv;                                                                \
+                                                                       \
+    speed_operand_src (s, ap, 2*s->size+1);                            \
+    speed_operand_dst (s, tp, 2*s->size+1);                            \
+    speed_operand_src (s, mp, s->size);                                        \
+    speed_operand_dst (s, cp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      MPN_COPY (tp, ap, 2*s->size);                                    \
+      function (cp, tp, mp, s->size, inv);                             \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_REDC_2(function)                                 \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     cp, mp, tp, ap;                                         \
+    mp_limb_t  invp[2];                                                        \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);              \
+    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);              \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);             \
+                                                                       \
+    MPN_COPY (ap,         s->xp, s->size);                             \
+    MPN_COPY (ap+s->size, s->xp, s->size);                             \
+                                                                       \
+    /* modulus must be odd */                                          \
+    MPN_COPY (mp, s->yp, s->size);                                     \
+    mp[0] |= 1;                                                                \
+    mpn_binvert (invp, mp, 2, tp);                                     \
+    invp[0] = -invp[0]; invp[1] = ~invp[1];                            \
+                                                                       \
+    speed_operand_src (s, ap, 2*s->size+1);                            \
+    speed_operand_dst (s, tp, 2*s->size+1);                            \
+    speed_operand_src (s, mp, s->size);                                        \
+    speed_operand_dst (s, cp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      MPN_COPY (tp, ap, 2*s->size);                                    \
+      function (cp, tp, mp, s->size, invp);                            \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+#define SPEED_ROUTINE_REDC_N(function)                                 \
+  {                                                                    \
+    unsigned   i;                                                      \
+    mp_ptr     cp, mp, tp, ap, invp;                                   \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size > 8);                                 \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);              \
+    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);              \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);             \
+    SPEED_TMP_ALLOC_LIMBS (invp, s->size,   s->align_wp2); /* align? */        \
+                                                                       \
+    MPN_COPY (ap,         s->xp, s->size);                             \
+    MPN_COPY (ap+s->size, s->xp, s->size);                             \
+                                                                       \
+    /* modulus must be odd */                                          \
+    MPN_COPY (mp, s->yp, s->size);                                     \
+    mp[0] |= 1;                                                                \
+    mpn_binvert (invp, mp, s->size, tp);                               \
+                                                                       \
+    speed_operand_src (s, ap, 2*s->size+1);                            \
+    speed_operand_dst (s, tp, 2*s->size+1);                            \
+    speed_operand_src (s, mp, s->size);                                        \
+    speed_operand_dst (s, cp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do {                                                               \
+      MPN_COPY (tp, ap, 2*s->size);                                    \
+      function (cp, tp, mp, s->size, invp);                            \
+    } while (--i != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+#define SPEED_ROUTINE_MPN_POPCOUNT(function)                           \
+  {                                                                    \
+    unsigned i;                                                                \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (s->xp, s->size);                                       \
+    while (--i != 0);                                                  \
+                                                                       \
+    return speed_endtime ();                                           \
+  }
+
+#define SPEED_ROUTINE_MPN_HAMDIST(function)                            \
+  {                                                                    \
+    unsigned i;                                                                \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (s->xp, s->yp, s->size);                                        \
+    while (--i != 0);                                                  \
+                                                                       \
+    return speed_endtime ();                                           \
+  }
+
+
+#define SPEED_ROUTINE_MPZ_UI(function)                                 \
+  {                                                                    \
+    mpz_t     z;                                                       \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    mpz_init (z);                                                      \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (z, s->size);                                           \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    mpz_clear (z);                                                     \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPZ_FAC_UI(function)    SPEED_ROUTINE_MPZ_UI(function)
+#define SPEED_ROUTINE_MPZ_FIB_UI(function)    SPEED_ROUTINE_MPZ_UI(function)
+#define SPEED_ROUTINE_MPZ_LUCNUM_UI(function) SPEED_ROUTINE_MPZ_UI(function)
+
+
+#define SPEED_ROUTINE_MPZ_2_UI(function)                               \
+  {                                                                    \
+    mpz_t     z, z2;                                                   \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    mpz_init (z);                                                      \
+    mpz_init (z2);                                                     \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (z, z2, s->size);                                       \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    mpz_clear (z);                                                     \
+    mpz_clear (z2);                                                    \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPZ_FIB2_UI(function)    SPEED_ROUTINE_MPZ_2_UI(function)
+#define SPEED_ROUTINE_MPZ_LUCNUM2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
+
+
+#define SPEED_ROUTINE_MPN_FIB2_UI(function)                            \
+  {                                                                    \
+    mp_ptr     fp, f1p;                                                        \
+    mp_size_t  alloc;                                                  \
+    unsigned   i;                                                      \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    alloc = MPN_FIB2_SIZE (s->size);                                   \
+    SPEED_TMP_ALLOC_LIMBS (fp, alloc, s->align_xp);                    \
+    SPEED_TMP_ALLOC_LIMBS (f1p, alloc, s->align_yp);                   \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (fp, f1p, s->size);                                     \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+
+/* Calculate b^e mod m for random b and m of s->size limbs and random e of 6
+   limbs.  m is forced to odd so that redc can be used.  e is limited in
+   size so the calculation doesn't take too long. */
+#define SPEED_ROUTINE_MPZ_POWM(function)                               \
+  {                                                                    \
+    mpz_t     r, b, e, m;                                              \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    mpz_init (r);                                                      \
+    mpz_init_set_n (b, s->xp, s->size);                                        \
+    mpz_init_set_n (m, s->yp, s->size);                                        \
+    mpz_setbit (m, 0); /* force m to odd */                            \
+    mpz_init_set_n (e, s->xp_block, 6);                                        \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (r, b, e, m);                                           \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    mpz_clear (r);                                                     \
+    mpz_clear (b);                                                     \
+    mpz_clear (e);                                                     \
+    mpz_clear (m);                                                     \
+    return t;                                                          \
+  }
+
+/* (m-2)^0xAAAAAAAA mod m */
+#define SPEED_ROUTINE_MPZ_POWM_UI(function)                            \
+  {                                                                    \
+    mpz_t     r, b, m;                                                 \
+    unsigned  long  e;                                                 \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    mpz_init (r);                                                      \
+                                                                       \
+    /* force m to odd */                                               \
+    mpz_init (m);                                                      \
+    mpz_set_n (m, s->xp, s->size);                                     \
+    PTR(m)[0] |= 1;                                                    \
+                                                                       \
+    e = (~ (unsigned long) 0) / 3;                                     \
+    if (s->r != 0)                                                     \
+      e = s->r;                                                                \
+                                                                       \
+    mpz_init_set (b, m);                                               \
+    mpz_sub_ui (b, b, 2);                                              \
+/* printf ("%X\n", mpz_get_ui(m)); */                                  \
+    i = s->reps;                                                       \
+    speed_starttime ();                                                        \
+    do                                                                 \
+      function (r, b, e, m);                                           \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    mpz_clear (r);                                                     \
+    mpz_clear (b);                                                     \
+    mpz_clear (m);                                                     \
+    return t;                                                          \
+  }
+
+
+#define SPEED_ROUTINE_MPN_ADDSUB_CALL(call)                            \
+  {                                                                    \
+    mp_ptr    wp, wp2, xp, yp;                                         \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);                        \
+    xp = s->xp;                                                                \
+    yp = s->yp;                                                                \
+                                                                       \
+    if (s->r == 0)     ;                                               \
+    else if (s->r == 1) { xp = wp;           }                         \
+    else if (s->r == 2) {          yp = wp2; }                         \
+    else if (s->r == 3) { xp = wp;  yp = wp2; }                                \
+    else if (s->r == 4) { xp = wp2; yp = wp;  }                                \
+    else {                                                             \
+      TMP_FREE;                                                                \
+      return -1.0;                                                     \
+    }                                                                  \
+    if (xp != s->xp) MPN_COPY (xp, s->xp, s->size);                    \
+    if (yp != s->yp) MPN_COPY (yp, s->yp, s->size);                    \
+                                                                       \
+    speed_operand_src (s, xp, s->size);                                        \
+    speed_operand_src (s, yp, s->size);                                        \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_operand_dst (s, wp2, s->size);                               \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_ADDSUB_N(function)                           \
+  SPEED_ROUTINE_MPN_ADDSUB_CALL                                                \
+    (function (wp, wp2, xp, yp, s->size));
+
+#define SPEED_ROUTINE_MPN_ADDSUB_NC(function)                          \
+  SPEED_ROUTINE_MPN_ADDSUB_CALL                                                \
+    (function (wp, wp2, xp, yp, s->size, 0));
+
+
+/* Doing an Nx1 gcd with the given r. */
+#define SPEED_ROUTINE_MPN_GCD_1N(function)                             \
+  {                                                                    \
+    mp_ptr    xp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+    SPEED_RESTRICT_COND (s->r != 0);                                   \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);                  \
+    MPN_COPY (xp, s->xp, s->size);                                     \
+    xp[0] |= refmpn_zero_p (xp, s->size);                              \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (xp, s->size, s->r);                                    \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+/* SPEED_BLOCK_SIZE many one GCDs of s->size bits each. */
+
+#define SPEED_ROUTINE_MPN_GCD_1_CALL(setup, call)                      \
+  {                                                                    \
+    unsigned  i, j;                                                    \
+    mp_ptr    px, py;                                                  \
+    mp_limb_t x_mask, y_mask;                                          \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+    SPEED_RESTRICT_COND (s->size <= mp_bits_per_limb);                 \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (px, SPEED_BLOCK_SIZE, s->align_xp);         \
+    SPEED_TMP_ALLOC_LIMBS (py, SPEED_BLOCK_SIZE, s->align_yp);         \
+    MPN_COPY (px, s->xp_block, SPEED_BLOCK_SIZE);                      \
+    MPN_COPY (py, s->yp_block, SPEED_BLOCK_SIZE);                      \
+                                                                       \
+    x_mask = MP_LIMB_T_LOWBITMASK (s->size);                           \
+    y_mask = MP_LIMB_T_LOWBITMASK (s->r != 0 ? s->r : s->size);                \
+    for (i = 0; i < SPEED_BLOCK_SIZE; i++)                             \
+      {                                                                        \
+       px[i] &= x_mask; px[i] += (px[i] == 0);                         \
+       py[i] &= y_mask; py[i] += (py[i] == 0);                         \
+       setup;                                                          \
+      }                                                                        \
+                                                                       \
+    speed_operand_src (s, px, SPEED_BLOCK_SIZE);                       \
+    speed_operand_src (s, py, SPEED_BLOCK_SIZE);                       \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       j = SPEED_BLOCK_SIZE;                                           \
+       do                                                              \
+         {                                                             \
+           call;                                                       \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+                                                                       \
+    s->time_divisor = SPEED_BLOCK_SIZE;                                        \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_GCD_1(function)                              \
+  SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1], 1, py[j-1]))
+
+#define SPEED_ROUTINE_MPN_JACBASE(function)                            \
+  SPEED_ROUTINE_MPN_GCD_1_CALL                                         \
+    ({                                                                 \
+       /* require x<y, y odd, y!=1 */                                  \
+       px[i] %= py[i];                                                 \
+       px[i] |= 1;                                                     \
+       py[i] |= 1;                                                     \
+       if (py[i]==1) py[i]=3;                                          \
+     },                                                                        \
+     function (px[j-1], py[j-1], 0))
+
+
+/* Run some GCDs of s->size limbs each.  The number of different data values
+   is decreased as s->size**2, since GCD is a quadratic algorithm.
+   SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
+   though, because the plain gcd is about twice as fast as gcdext.  */
+
+#define SPEED_ROUTINE_MPN_GCD_CALL(datafactor, call)                   \
+  {                                                                    \
+    unsigned  i;                                                       \
+    mp_size_t j, pieces, psize;                                                \
+    mp_ptr    wp, wp2, xtmp, ytmp, px, py;                             \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);              \
+    SPEED_TMP_ALLOC_LIMBS (wp,   s->size+1, s->align_wp);              \
+    SPEED_TMP_ALLOC_LIMBS (wp2,  s->size+1, s->align_wp2);             \
+                                                                       \
+    pieces = SPEED_BLOCK_SIZE * datafactor / s->size / s->size;                \
+    pieces = MIN (pieces, SPEED_BLOCK_SIZE / s->size);                 \
+    pieces = MAX (pieces, 1);                                          \
+                                                                       \
+    psize = pieces * s->size;                                          \
+    px = TMP_ALLOC_LIMBS (psize);                                      \
+    py = TMP_ALLOC_LIMBS (psize);                                      \
+    MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);             \
+    MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);             \
+                                                                       \
+    /* Requirements: x >= y, y must be odd, high limbs != 0.           \
+       No need to ensure random numbers are really great.  */          \
+    for (j = 0; j < pieces; j++)                                       \
+      {                                                                        \
+       mp_ptr  x = px + j * s->size;                                   \
+       mp_ptr  y = py + j * s->size;                                   \
+       if (x[s->size - 1] == 0) x[s->size - 1] = 1;                    \
+       if (y[s->size - 1] == 0) y[s->size - 1] = 1;                    \
+                                                                       \
+       if (x[s->size - 1] < y[s->size - 1])                            \
+         MP_LIMB_T_SWAP (x[s->size - 1], y[s->size - 1]);              \
+       else if (x[s->size - 1] == y[s->size - 1])                      \
+         {                                                             \
+           x[s->size - 1] = 2;                                         \
+           y[s->size - 1] = 1;                                         \
+         }                                                             \
+       y[0] |= 1;                                                      \
+      }                                                                        \
+                                                                       \
+    speed_operand_src (s, px, psize);                                  \
+    speed_operand_src (s, py, psize);                                  \
+    speed_operand_dst (s, xtmp, s->size);                              \
+    speed_operand_dst (s, ytmp, s->size);                              \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       j = pieces;                                                     \
+       do                                                              \
+         {                                                             \
+           MPN_COPY (xtmp, px+(j - 1)*s->size, s->size);               \
+           MPN_COPY (ytmp, py+(j - 1)*s->size, s->size);               \
+           call;                                                       \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+                                                                       \
+    s->time_divisor = pieces;                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_GCD(function)        \
+  SPEED_ROUTINE_MPN_GCD_CALL (8, function (wp, xtmp, s->size, ytmp, s->size))
+
+#define SPEED_ROUTINE_MPN_GCDEXT(function)                             \
+  SPEED_ROUTINE_MPN_GCD_CALL                                           \
+    (4, { mp_size_t  wp2size;                                          \
+         function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); })
+
+
+#define SPEED_ROUTINE_MPN_GCDEXT_ONE(function)                         \
+  {                                                                    \
+    unsigned  i;                                                       \
+    mp_size_t j, pieces, psize, wp2size;                               \
+    mp_ptr    wp, wp2, xtmp, ytmp, px, py;                             \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+                                                                       \
+    SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp);              \
+    MPN_COPY (xtmp, s->xp, s->size);                                   \
+    MPN_COPY (ytmp, s->yp, s->size);                                   \
+                                                                       \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size+1, s->align_wp);                \
+    SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2);              \
+                                                                       \
+    pieces = SPEED_BLOCK_SIZE / 3;                                     \
+    psize = 3 * pieces;                                                        \
+    px = TMP_ALLOC_LIMBS (psize);                                      \
+    py = TMP_ALLOC_LIMBS (psize);                                      \
+    MPN_COPY (px, s->xp_block, psize);                                 \
+    MPN_COPY (py, s->yp_block, psize);                                 \
+                                                                       \
+    /* x must have at least as many bits as y,                         \
+       high limbs must be non-zero */                                  \
+    for (j = 0; j < pieces; j++)                                       \
+      {                                                                        \
+       mp_ptr  x = px+3*j;                                             \
+       mp_ptr  y = py+3*j;                                             \
+       x[2] += (x[2] == 0);                                            \
+       y[2] += (y[2] == 0);                                            \
+       if (x[2] < y[2])                                                \
+         MP_LIMB_T_SWAP (x[2], y[2]);                                  \
+      }                                                                        \
+                                                                       \
+    speed_operand_src (s, px, psize);                                  \
+    speed_operand_src (s, py, psize);                                  \
+    speed_operand_dst (s, xtmp, s->size);                              \
+    speed_operand_dst (s, ytmp, s->size);                              \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       mp_ptr  x = px;                                                 \
+       mp_ptr  y = py;                                                 \
+       mp_ptr  xth = &xtmp[s->size-3];                                 \
+       mp_ptr  yth = &ytmp[s->size-3];                                 \
+       j = pieces;                                                     \
+       do                                                              \
+         {                                                             \
+           xth[0] = x[0], xth[1] = x[1], xth[2] = x[2];                \
+           yth[0] = y[0], yth[1] = y[1], yth[2] = y[2];                \
+                                                                       \
+           ytmp[0] |= 1; /* y must be odd, */                          \
+                                                                       \
+           function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); \
+                                                                       \
+           x += 3;                                                     \
+           y += 3;                                                     \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+                                                                       \
+    s->time_divisor = pieces;                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPZ_JACOBI(function)                             \
+  {                                                                    \
+    mpz_t     a, b;                                                    \
+    unsigned  i;                                                       \
+    mp_size_t j, pieces, psize;                                                \
+    mp_ptr    px, py;                                                  \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    TMP_MARK;                                                          \
+    pieces = SPEED_BLOCK_SIZE / MAX (s->size, 1);                      \
+    pieces = MAX (pieces, 1);                                          \
+    s->time_divisor = pieces;                                          \
+                                                                       \
+    psize = pieces * s->size;                                          \
+    px = TMP_ALLOC_LIMBS (psize);                                      \
+    py = TMP_ALLOC_LIMBS (psize);                                      \
+    MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize);             \
+    MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize);             \
+                                                                       \
+    for (j = 0; j < pieces; j++)                                       \
+      {                                                                        \
+       mp_ptr  x = px+j*s->size;                                       \
+       mp_ptr  y = py+j*s->size;                                       \
+                                                                       \
+       /* y odd */                                                     \
+       y[0] |= 1;                                                      \
+                                                                       \
+       /* high limbs non-zero */                                       \
+       if (x[s->size-1] == 0) x[s->size-1] = 1;                        \
+       if (y[s->size-1] == 0) y[s->size-1] = 1;                        \
+      }                                                                        \
+                                                                       \
+    SIZ(a) = s->size;                                                  \
+    SIZ(b) = s->size;                                                  \
+                                                                       \
+    speed_operand_src (s, px, psize);                                  \
+    speed_operand_src (s, py, psize);                                  \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       j = pieces;                                                     \
+       do                                                              \
+         {                                                             \
+           PTR(a) = px+(j-1)*s->size;                                  \
+           PTR(b) = py+(j-1)*s->size;                                  \
+           function (a, b);                                            \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_DIVREM_2(function)                           \
+  {                                                                    \
+    mp_ptr    wp, xp;                                                  \
+    mp_limb_t yp[2];                                                   \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp);                  \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    /* source is destroyed */                                          \
+    MPN_COPY (xp, s->xp, s->size);                                     \
+                                                                       \
+    /* divisor must be normalized */                                   \
+    MPN_COPY (yp, s->yp_block, 2);                                     \
+    yp[1] |= GMP_NUMB_HIGHBIT;                                         \
+                                                                       \
+    speed_operand_src (s, xp, s->size);                                        \
+    speed_operand_src (s, yp, 2);                                      \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, 0, xp, s->size, yp);                               \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+#define SPEED_ROUTINE_MODLIMB_INVERT(function)                         \
+  {                                                                    \
+    unsigned   i, j;                                                   \
+    mp_ptr     xp;                                                     \
+    mp_limb_t  n = 1;                                                  \
+    double     t;                                                      \
+                                                                       \
+    xp = s->xp_block-1;                                                        \
+                                                                       \
+    speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE);              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       j = SPEED_BLOCK_SIZE;                                           \
+       do                                                              \
+         {                                                             \
+           /* randomized but successively dependent */                 \
+           n += (xp[j] << 1);                                          \
+                                                                       \
+           function (n, n);                                            \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    /* make sure the compiler won't optimize away n */                 \
+    noop_1 (n);                                                                \
+                                                                       \
+    s->time_divisor = SPEED_BLOCK_SIZE;                                        \
+    return t;                                                          \
+  }
+
+
+#define SPEED_ROUTINE_MPN_SQRTREM(function)                            \
+  {                                                                    \
+    mp_ptr    wp, wp2;                                                 \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_operand_dst (s, wp2, s->size);                               \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, wp2, s->xp, s->size);                              \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_ROOTREM(function)                            \
+  {                                                                    \
+    mp_ptr    wp, wp2;                                                 \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+    SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2);                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_operand_dst (s, wp2, s->size);                               \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, wp2, s->xp, s->size, s->r);                                \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+/* s->size controls the number of limbs in the input, s->r is the base, or
+   decimal by default. */
+#define SPEED_ROUTINE_MPN_GET_STR(function)                            \
+  {                                                                    \
+    unsigned char *wp;                                                 \
+    mp_size_t wn;                                                      \
+    mp_ptr xp;                                                         \
+    int base;                                                          \
+    unsigned i;                                                                \
+    double t;                                                          \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    base = s->r == 0 ? 10 : s->r;                                      \
+    SPEED_RESTRICT_COND (base >= 2 && base <= 256);                    \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (xp, s->size + 1, s->align_xp);              \
+                                                                       \
+    MPN_SIZEINBASE (wn, s->xp, s->size, base);                         \
+    wp = TMP_ALLOC (wn);                                               \
+                                                                       \
+    /* use this during development to guard against overflowing wp */  \
+    /*                                                                 \
+    MPN_COPY (xp, s->xp, s->size);                                     \
+    ASSERT_ALWAYS (mpn_get_str (wp, base, xp, s->size) <= wn);         \
+    */                                                                 \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_dst (s, xp, s->size);                                        \
+    speed_operand_dst (s, (mp_ptr) wp, wn/BYTES_PER_MP_LIMB);          \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       MPN_COPY (xp, s->xp, s->size);                                  \
+       function (wp, base, xp, s->size);                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+/* s->size controls the number of digits in the input, s->r is the base, or
+   decimal by default. */
+#define SPEED_ROUTINE_MPN_SET_STR_CALL(call)                           \
+  {                                                                    \
+    unsigned char *xp;                                                 \
+    mp_ptr     wp;                                                     \
+    mp_size_t  wn;                                                     \
+    unsigned   i;                                                      \
+    int        base;                                                   \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    base = s->r == 0 ? 10 : s->r;                                      \
+    SPEED_RESTRICT_COND (base >= 2 && base <= 256);                    \
+                                                                       \
+    TMP_MARK;                                                          \
+                                                                       \
+    xp = TMP_ALLOC (s->size);                                          \
+    for (i = 0; i < s->size; i++)                                      \
+      xp[i] = s->xp[i] % base;                                         \
+                                                                       \
+    wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly)) \
+      / GMP_LIMB_BITS + 2;                                             \
+    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);                       \
+                                                                       \
+    /* use this during development to check wn is big enough */                \
+    /*                                                                 \
+    ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn);         \
+    */                                                                 \
+                                                                       \
+    speed_operand_src (s, (mp_ptr) xp, s->size/BYTES_PER_MP_LIMB);     \
+    speed_operand_dst (s, wp, wn);                                     \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+
+/* Run an accel gcd find_a() function over various data values.         A set of
+   values is used in case some run particularly fast or slow.  The size
+   parameter is ignored, the amount of data tested is fixed.  */
+
+#define SPEED_ROUTINE_MPN_GCD_FINDA(function)                          \
+  {                                                                    \
+    unsigned  i, j;                                                    \
+    mp_limb_t cp[SPEED_BLOCK_SIZE][2];                                 \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    TMP_MARK;                                                          \
+                                                                       \
+    /* low must be odd, high must be non-zero */                       \
+    for (i = 0; i < SPEED_BLOCK_SIZE; i++)                             \
+      {                                                                        \
+       cp[i][0] = s->xp_block[i] | 1;                                  \
+       cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0);              \
+      }                                                                        \
+                                                                       \
+    speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE);              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       j = SPEED_BLOCK_SIZE;                                           \
+       do                                                              \
+         {                                                             \
+           function (cp[j-1]);                                         \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+                                                                       \
+    s->time_divisor = SPEED_BLOCK_SIZE;                                        \
+    return t;                                                          \
+  }
+
+
+/* "call" should do "count_foo_zeros(c,n)".
+   Give leading=1 if foo is leading zeros, leading=0 for trailing.
+   Give zero=1 if n=0 is allowed in the call, zero=0 if not.  */
+
+#define SPEED_ROUTINE_COUNT_ZEROS_A(leading, zero)                     \
+  {                                                                    \
+    mp_ptr     xp;                                                     \
+    int        i, c;                                                   \
+    unsigned   j;                                                      \
+    mp_limb_t  n;                                                      \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (xp, SPEED_BLOCK_SIZE, s->align_xp);         \
+                                                                       \
+    if (! speed_routine_count_zeros_setup (s, xp, leading, zero))      \
+      return -1.0;                                                     \
+    speed_operand_src (s, xp, SPEED_BLOCK_SIZE);                       \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    c = 0;                                                             \
+    speed_starttime ();                                                        \
+    j = s->reps;                                                       \
+    do {                                                               \
+      for (i = 0; i < SPEED_BLOCK_SIZE; i++)                           \
+       {                                                               \
+         n = xp[i];                                                    \
+         n ^= c;                                                       \
+
+#define SPEED_ROUTINE_COUNT_ZEROS_B()                                  \
+       }                                                               \
+    } while (--j != 0);                                                        \
+    t = speed_endtime ();                                              \
+                                                                       \
+    /* don't let c go dead */                                          \
+    noop_1 (c);                                                                \
+                                                                       \
+    s->time_divisor = SPEED_BLOCK_SIZE;                                        \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }                                                                    \
+
+#define SPEED_ROUTINE_COUNT_ZEROS_C(call, leading, zero)               \
+  do {                                                                 \
+    SPEED_ROUTINE_COUNT_ZEROS_A (leading, zero);                       \
+    call;                                                              \
+    SPEED_ROUTINE_COUNT_ZEROS_B ();                                    \
+  } while (0)                                                          \
+
+#define SPEED_ROUTINE_COUNT_LEADING_ZEROS_C(call,zero)                 \
+  SPEED_ROUTINE_COUNT_ZEROS_C (call, 1, zero)
+#define SPEED_ROUTINE_COUNT_LEADING_ZEROS(fun)                         \
+  SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 1, 0)
+
+#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS_C(call,zero)                        \
+  SPEED_ROUTINE_COUNT_ZEROS_C (call, 0, zero)
+#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS(call)                       \
+  SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 0, 0)
+
+
+#define SPEED_ROUTINE_INVERT_LIMB_CALL(call)                           \
+  {                                                                    \
+    unsigned   i, j;                                                   \
+    mp_limb_t  d, dinv=0;                                              \
+    mp_ptr     xp = s->xp_block - 1;                                   \
+                                                                       \
+    s->time_divisor = SPEED_BLOCK_SIZE;                                        \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       j = SPEED_BLOCK_SIZE;                                           \
+       do                                                              \
+         {                                                             \
+           d = dinv ^ xp[j];                                           \
+           d |= GMP_LIMB_HIGHBIT;                                      \
+           do { call; } while (0);                                     \
+         }                                                             \
+       while (--j != 0);                                               \
+      }                                                                        \
+    while (--i != 0);                                                  \
+                                                                       \
+    /* don't let the compiler optimize everything away */              \
+    noop_1 (dinv);                                                     \
+                                                                       \
+    return speed_endtime();                                            \
+  }
+
+
+#endif
+
+
+#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function)                       \
+  {                                                                    \
+    unsigned  i;                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function ();                                                     \
+    while (--i != 0);                                                  \
+    return speed_endtime ();                                           \
+  }
+
+
+#define SPEED_ROUTINE_MPN_ZERO_CALL(call)                              \
+  {                                                                    \
+    mp_ptr    wp;                                                      \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 0);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_ZERO(function)                               \
+  SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
diff --git a/tune/time.c b/tune/time.c

new file mode 100644 (file)

index 0000000..613f2ae
--- /dev/null
+++ b/tune/time.c
@@ -0,0 +1,1533 @@
+/* Time routines for speed measurments.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage:
+
+   The code in this file implements the lowest level of time measuring,
+   simple one-time measuring of time between two points.
+
+   void speed_starttime (void)
+   double speed_endtime (void)
+       Call speed_starttime to start measuring, and then call speed_endtime
+       when done.
+
+       speed_endtime returns the time taken, in seconds.  Or if the timebase
+       is in CPU cycles and the CPU frequency is unknown then speed_endtime
+       returns cycles.  Applications can identify the cycles return by
+       checking for speed_cycletime (described below) equal to 1.0.
+
+       If some sort of temporary glitch occurs then speed_endtime returns
+       0.0.  Currently this is for various cases where a negative time has
+       occurred.  This unfortunately occurs with getrusage on some systems,
+       and with the hppa cycle counter on hpux.
+
+   double speed_cycletime
+       The time in seconds for each CPU cycle.  For example on a 100 MHz CPU
+       this would be 1.0e-8.
+
+       If the CPU frequency is unknown, then speed_cycletime is either 0.0
+       or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's
+       1.0 when speed_endtime is returning cycles.
+
+       It may be noted that "speed_endtime() / speed_cycletime" gives a
+       measured time in cycles, irrespective of whether speed_endtime is
+       returning cycles or seconds.  (Assuming cycles can be had, ie. it's
+       either cycles already or the cpu frequency is known.  See also
+       speed_cycletime_need_cycles below.)
+
+   double speed_unittime
+       The unit of time measurement accuracy for the timing method in use.
+       This is in seconds or cycles, as per speed_endtime.
+
+   char speed_time_string[]
+       A null-terminated string describing the time method in use.
+
+   void speed_time_init (void)
+       Initialize time measuring.  speed_starttime() does this
+       automatically, so it's only needed if an application wants to inspect
+       the above global variables before making a measurement.
+
+   int speed_precision
+       The intended accuracy of time measurements.  speed_measure() in
+       common.c for instance runs target routines with enough repetitions so
+       it takes at least "speed_unittime * speed_precision" (this expression
+       works for both cycles or seconds from speed_endtime).
+
+       A program can provide an option so the user to set speed_precision.
+       If speed_precision is zero when speed_time_init or speed_starttime
+       first run then it gets a default based on the measuring method
+       chosen.  (More precision for higher accuracy methods.)
+
+   void speed_cycletime_need_seconds (void)
+       Call this to demand that speed_endtime will return seconds, and not
+       cycles.  If only cycles are available then an error is printed and
+       the program exits.
+
+   void speed_cycletime_need_cycles (void)
+       Call this to demand that speed_cycletime is non-zero, so that
+       "speed_endtime() / speed_cycletime" will give times in cycles.
+
+
+
+   Notes:
+
+   Various combinations of cycle counter, read_real_time(), getrusage(),
+   gettimeofday() and times() can arise, according to which are available
+   and their precision.
+
+
+   Allowing speed_endtime() to return either seconds or cycles is only a
+   slight complication and makes it possible for the speed program to do
+   some sensible things without demanding the CPU frequency.  If seconds are
+   being measured then it can always print seconds, and if cycles are being
+   measured then it can always print them without needing to know how long
+   they are.  Also the tune program doesn't care at all what the units are.
+
+   GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
+   fail.  This will be needed if times in seconds are wanted but a cycle
+   counter is being used, or if times in cycles are wanted but getrusage or
+   another seconds based timer is in use.
+
+   If the measuring method uses a cycle counter but supplements it with
+   getrusage or the like, then knowing the CPU frequency is mandatory since
+   the code compares values from the two.
+
+
+   Not done:
+
+   Solaris gethrtime() seems no more than a slow way to access the Sparc V9
+   cycle counter.  gethrvtime() seems to be relevant only to light weight
+   processes, it doesn't for instance give nanosecond virtual time.  So
+   neither of these are used.
+
+
+   Bugs:
+
+   getrusage_microseconds_p is fundamentally flawed, getrusage and
+   gettimeofday can have resolutions other than clock ticks or microseconds,
+   for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
+
+
+   Enhancements:
+
+   The SGI hardware counter has 64 bits on some machines, which could be
+   used when available.  But perhaps 32 bits is enough range, and then rely
+   on the getrusage supplement.
+
+   Maybe getrusage (or times) should be used as a supplement for any
+   wall-clock measuring method.  Currently a wall clock with a good range
+   (eg. a 64-bit cycle counter) is used without a supplement.
+
+   On PowerPC the timebase registers could be used, but would have to do
+   something to find out the speed.  On 6xx chips it's normally 1/4 bus
+   speed, on 4xx chips it's either that or an external clock.  Measuring
+   against gettimeofday might be ok.  */
+
+
+#include "config.h"
+
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h> /* for getenv() */
+
+#if HAVE_FCNTL_H
+#include <fcntl.h>  /* for open() */
+#endif
+
+#if HAVE_STDINT_H
+#include <stdint.h> /* for uint64_t */
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for sysconf() */
+#endif
+
+#include <sys/types.h>
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>  /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>      /* for mmap() */
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>  /* for struct rusage */
+#endif
+
+#if HAVE_SYS_SYSSGI_H
+#include <sys/syssgi.h>    /* for syssgi() */
+#endif
+
+#if HAVE_SYS_SYSTEMCFG_H
+#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
+#endif
+
+#if HAVE_SYS_TIMES_H
+#include <sys/times.h>  /* for times() and struct tms */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+/* strerror is only used for some stuff on newish systems, no need to have a
+   proper replacement */
+#if ! HAVE_STRERROR
+#define strerror(n)  "<strerror not available>"
+#endif
+
+
+char    speed_time_string[256];
+int     speed_precision = 0;
+double  speed_unittime;
+double  speed_cycletime = 0.0;
+
+
+/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
+   native cc */
+#define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
+
+#define M_2POW32  4294967296.0
+#define M_2POW64  (M_2POW32 * M_2POW32)
+
+
+/* Conditionals for the time functions available are done with normal C
+   code, which is a lot easier than wildly nested preprocessor directives.
+
+   The choice of what to use is partly made at run-time, according to
+   whether the cycle counter works and the measured accuracy of getrusage
+   and gettimeofday.
+
+   A routine that's not available won't be getting called, but is an abort()
+   to be sure it isn't called mistakenly.
+
+   It can be assumed that if a function exists then its data type will, but
+   if the function doesn't then the data type might or might not exist, so
+   the type can't be used unconditionally.  The "struct_rusage" etc macros
+   provide dummies when the respective function doesn't exist. */
+
+
+#if HAVE_SPEED_CYCLECOUNTER
+static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
+#else
+static const int have_cycles = 0;
+#define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
+#endif
+
+/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
+   microseconds.  Same #ifdefs here as in longlong.h.  */
+#if defined (__GNUC__) && ! defined (NO_ASM)                            \
+  && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
+static const int  have_stck = 1;
+static const int  use_stck = 1;  /* always use when available */
+typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
+#define STCK(timestamp)                 \
+  do {                                  \
+    asm ("stck %0" : "=m" (timestamp)); \
+  } while (0)
+#else
+static const int  have_stck = 0;
+static const int  use_stck = 0;
+typedef unsigned long  stck_t;   /* dummy */
+#define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
+#endif
+#define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
+
+/* mftb
+   Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
+   and a loop (see powerpc64.asm).  */
+#if HAVE_HOST_CPU_FAMILY_powerpc
+static const int  have_mftb = 1;
+#if defined (__GNUC__) && ! defined (NO_ASM)
+#define MFTB(a)                         \
+  do {                                  \
+    unsigned  __h1, __l, __h2;          \
+    do {                                \
+      asm volatile ("mftbu %0\n"        \
+                   "mftb  %1\n"        \
+                   "mftbu %2"          \
+                   : "=r" (__h1),      \
+                     "=r" (__l),       \
+                     "=r" (__h2));     \
+    } while (__h1 != __h2);             \
+    a[0] = __l;                         \
+    a[1] = __h1;                        \
+  } while (0)
+#else
+#define MFTB(a)   mftb_function (a)
+#endif
+#else /* ! powerpc */
+static const int  have_mftb = 0;
+#define MFTB(a)                         \
+  do {                                  \
+    a[0] = 0;                           \
+    a[1] = 0;                           \
+    ASSERT_FAIL (mftb not available);   \
+  } while (0)
+#endif
+
+/* Unicos 10.X has syssgi(), but not mmap(). */
+#if HAVE_SYSSGI && HAVE_MMAP
+static const int  have_sgi = 1;
+#else
+static const int  have_sgi = 0;
+#endif
+
+#if HAVE_READ_REAL_TIME
+static const int have_rrt = 1;
+#else
+static const int have_rrt = 0;
+#define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
+#define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
+#define RTC_POWER     1
+#define RTC_POWER_PC  2
+#define timebasestruct_t   struct timebasestruct_dummy
+struct timebasestruct_dummy {
+  int             flag;
+  unsigned int    tb_high;
+  unsigned int    tb_low;
+};
+#endif
+
+#if HAVE_CLOCK_GETTIME
+static const int have_cgt = 1;
+#define struct_timespec  struct timespec
+#else
+static const int have_cgt = 0;
+#define struct_timespec       struct timespec_dummy
+#define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
+#define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
+#endif
+
+#if HAVE_GETRUSAGE
+static const int have_grus = 1;
+#define struct_rusage   struct rusage
+#else
+static const int have_grus = 0;
+#define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
+#define struct_rusage    struct rusage_dummy
+#endif
+
+#if HAVE_GETTIMEOFDAY
+static const int have_gtod = 1;
+#define struct_timeval   struct timeval
+#else
+static const int have_gtod = 0;
+#define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
+#define struct_timeval   struct timeval_dummy
+#endif
+
+#if HAVE_TIMES
+static const int have_times = 1;
+#define struct_tms   struct tms
+#else
+static const int have_times = 0;
+#define times(tms)   ASSERT_FAIL (times not available)
+#define struct_tms   struct tms_dummy
+#endif
+
+struct tms_dummy {
+  long  tms_utime;
+};
+struct timeval_dummy {
+  long  tv_sec;
+  long  tv_usec;
+};
+struct rusage_dummy {
+  struct_timeval ru_utime;
+};
+struct timespec_dummy {
+  long  tv_sec;
+  long  tv_nsec;
+};
+
+static int  use_cycles;
+static int  use_mftb;
+static int  use_sgi;
+static int  use_rrt;
+static int  use_cgt;
+static int  use_gtod;
+static int  use_grus;
+static int  use_times;
+static int  use_tick_boundary;
+
+static unsigned         start_cycles[2];
+static stck_t           start_stck;
+static unsigned         start_mftb[2];
+static unsigned         start_sgi;
+static timebasestruct_t start_rrt;
+static struct_timespec  start_cgt;
+static struct_rusage    start_grus;
+static struct_timeval   start_gtod;
+static struct_tms       start_times;
+
+static double  cycles_limit = 1e100;
+static double  mftb_unittime;
+static double  sgi_unittime;
+static double  cgt_unittime;
+static double  grus_unittime;
+static double  gtod_unittime;
+static double  times_unittime;
+
+/* for RTC_POWER format, ie. seconds and nanoseconds */
+#define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
+
+
+/* Return a string representing a time in seconds, nicely formatted.
+   Eg. "10.25ms".  */
+char *
+unittime_string (double t)
+{
+  static char  buf[128];
+
+  const char  *unit;
+  int         prec;
+
+  /* choose units and scale */
+  if (t < 1e-6)
+    t *= 1e9, unit = "ns";
+  else if (t < 1e-3)
+    t *= 1e6, unit = "us";
+  else if (t < 1.0)
+    t *= 1e3, unit = "ms";
+  else
+    unit = "s";
+
+  /* want 4 significant figures */
+  if (t < 1.0)
+    prec = 4;
+  else if (t < 10.0)
+    prec = 3;
+  else if (t < 100.0)
+    prec = 2;
+  else
+    prec = 1;
+
+  sprintf (buf, "%.*f%s", prec, t, unit);
+  return buf;
+}
+
+
+static jmp_buf  cycles_works_buf;
+
+static RETSIGTYPE
+cycles_works_handler (int sig)
+{
+  longjmp (cycles_works_buf, 1);
+}
+
+int
+cycles_works_p (void)
+{
+  static int  result = -1;
+
+  if (result != -1)
+    goto done;
+
+#ifdef SIGILL
+  {
+    RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
+    unsigned  cycles[2];
+
+    old_handler = signal (SIGILL, cycles_works_handler);
+    if (old_handler == SIG_ERR)
+      {
+       if (speed_option_verbose)
+         printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
+       goto yes;
+      }
+    if (setjmp (cycles_works_buf))
+      {
+       if (speed_option_verbose)
+         printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
+       result = 0;
+       goto done;
+      }
+    speed_cyclecounter (cycles);
+    signal (SIGILL, old_handler);
+    if (speed_option_verbose)
+      printf ("cycles_works_p(): speed_cyclecounter() works\n");
+  }
+#else
+
+  if (speed_option_verbose)
+    printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
+  goto yes;
+#endif
+
+ yes:
+  result = 1;
+
+ done:
+  return result;
+}
+
+
+/* The number of clock ticks per second, but looking at sysconf rather than
+   just CLK_TCK, where possible.  */
+long
+clk_tck (void)
+{
+  static long  result = -1L;
+  if (result != -1L)
+    return result;
+
+#if HAVE_SYSCONF
+  result = sysconf (_SC_CLK_TCK);
+  if (result != -1L)
+    {
+      if (speed_option_verbose)
+       printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
+      return result;
+    }
+
+  fprintf (stderr,
+          "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
+#endif
+
+#ifdef CLK_TCK
+  result = CLK_TCK;
+  if (speed_option_verbose)
+    printf ("CLK_TCK is %ld per second\n", result);
+  return result;
+#else
+  fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
+  abort ();
+#endif
+}
+
+
+/* If two times can be observed less than half a clock tick apart, then
+   assume "get" is microsecond accurate.
+
+   Two times only 1 microsecond apart are not believed, since some kernels
+   take it upon themselves to ensure gettimeofday doesn't return the same
+   value twice, for the benefit of applications using it for a timestamp.
+   This is obviously very stupid given the speed of CPUs these days.
+
+   Making "reps" many calls to noop_1() is designed to waste some CPU, with
+   a view to getting measurements 2 microseconds (or more) apart.  "reps" is
+   increased progressively until such a period is seen.
+
+   The outer loop "attempts" are just to allow for any random nonsense or
+   system load upsetting the measurements (ie. making two successive calls
+   to "get" come out as a longer interval than normal).
+
+   Bugs:
+
+   The assumption that any interval less than a half tick implies
+   microsecond resolution is obviously fairly rash, the true resolution
+   could be anything between a microsecond and that half tick.  Perhaps
+   something special would have to be done on a system where this is the
+   case, since there's no obvious reliable way to detect it
+   automatically.  */
+
+#define MICROSECONDS_P(name, type, get, sec, usec)                      \
+  {                                                                     \
+    static int  result = -1;                                            \
+    type      st, et;                                                   \
+    long      dt, half_tick;                                            \
+    unsigned  attempt, reps, i, j;                                      \
+                                                                       \
+    if (result != -1)                                                   \
+      return result;                                                    \
+                                                                       \
+    result = 0;                                                         \
+    half_tick = (1000000L / clk_tck ()) / 2;                            \
+                                                                       \
+    for (attempt = 0; attempt < 5; attempt++)                           \
+      {                                                                 \
+       reps = 0;                                                       \
+       for (;;)                                                        \
+         {                                                             \
+           get (st);                                                   \
+           for (i = 0; i < reps; i++)                                  \
+             for (j = 0; j < 100; j++)                                 \
+               noop_1 (CNST_LIMB(0));                                  \
+           get (et);                                                   \
+                                                                       \
+           dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
+                                                                       \
+           if (speed_option_verbose >= 2)                              \
+             printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
+                     name, attempt, reps, dt);                         \
+                                                                       \
+           if (dt >= 2)                                                \
+             break;                                                    \
+                                                                       \
+           reps = (reps == 0 ? 1 : 2*reps);                            \
+           if (reps == 0)                                              \
+             break;  /* uint overflow, not normal */                   \
+         }                                                             \
+                                                                       \
+       if (dt < half_tick)                                             \
+         {                                                             \
+           result = 1;                                                 \
+           break;                                                      \
+         }                                                             \
+      }                                                                 \
+                                                                       \
+    if (speed_option_verbose)                                           \
+      {                                                                 \
+       if (result)                                                     \
+         printf ("%s is microsecond accurate\n", name);                \
+       else                                                            \
+         printf ("%s is only %s clock tick accurate\n",                \
+                 name, unittime_string (1.0/clk_tck()));               \
+      }                                                                 \
+    return result;                                                      \
+  }
+
+
+int
+gettimeofday_microseconds_p (void)
+{
+#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t)      ((t).tv_sec)
+#define timeval_tv_usec(t)     ((t).tv_usec)
+  MICROSECONDS_P ("gettimeofday", struct_timeval,
+                 call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
+}
+
+int
+getrusage_microseconds_p (void)
+{
+#define call_getrusage(t)   getrusage (0, &(t))
+#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
+#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
+  MICROSECONDS_P ("getrusage", struct_rusage,
+                 call_getrusage, rusage_tv_sec, rusage_tv_usec);
+}
+
+/* Test whether getrusage goes backwards, return non-zero if it does
+   (suggesting it's flawed).
+
+   On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
+   microsecond accurate, but has been seen remaining unchanged after many
+   microseconds have elapsed.  It also regularly goes backwards by 1000 to
+   5000 usecs, this has been seen after between 500 and 4000 attempts taking
+   perhaps 0.03 seconds.  We consider this too broken for good measuring.
+   We used to have configure pretend getrusage didn't exist on this system,
+   but a runtime test should be more reliable, since we imagine the problem
+   is not confined to just this exact system tuple.  */
+
+int
+getrusage_backwards_p (void)
+{
+  static int result = -1;
+  struct rusage  start, prev, next;
+  long  d;
+  int   i;
+
+  if (result != -1)
+    return result;
+
+  getrusage (0, &start);
+  memcpy (&next, &start, sizeof (next));
+
+  result = 0;
+  i = 0;
+  for (;;)
+    {
+      memcpy (&prev, &next, sizeof (prev));
+      getrusage (0, &next);
+
+      if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
+         || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
+             && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
+       {
+         if (speed_option_verbose)
+           printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
+                   i,
+                   prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
+                   next.ru_utime.tv_sec, next.ru_utime.tv_usec);
+         result = 1;
+         break;
+       }
+
+      /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
+        attempts, whichever comes first */
+      d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
+       + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
+      i++;
+      if (i > 50000 || (i > 1000 && d > 100000))
+       break;
+    }
+
+  return result;
+}
+
+/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
+   of glibc (some time post 2.2).
+
+   CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
+   defined, but returning -1 for an error).  */
+
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+# define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
+#else
+# ifdef CLOCK_VIRTUAL
+#  define CGT_ID       CLOCK_VIRTUAL
+# endif
+#endif
+#ifdef CGT_ID
+const int  have_cgt_id = 1;
+#else
+const int  have_cgt_id = 0;
+# define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
+#endif
+
+int
+cgt_works_p (void)
+{
+  static int  result = -1;
+  struct_timespec  unit;
+
+  if (! have_cgt)
+    return 0;
+
+  if (! have_cgt_id)
+    {
+      if (speed_option_verbose)
+       printf ("clock_gettime don't know what ID to use\n");
+      result = 0;
+      return result;
+    }
+
+  if (result != -1)
+    return result;
+
+  /* trial run to see if it works */
+  if (clock_gettime (CGT_ID, &unit) != 0)
+    {
+      if (speed_option_verbose)
+       printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+      result = 0;
+      return result;
+    }
+
+  /* get the resolution */
+  if (clock_getres (CGT_ID, &unit) != 0)
+    {
+      if (speed_option_verbose)
+       printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
+      result = 0;
+      return result;
+    }
+
+  cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
+  printf ("clock_gettime is %s accurate\n",
+         unittime_string (cgt_unittime));
+  result = 1;
+  return result;
+}
+
+
+static double
+freq_measure_mftb_one (void)
+{
+#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t)      ((t).tv_sec)
+#define timeval_tv_usec(t)     ((t).tv_usec)
+  FREQ_MEASURE_ONE ("mftb", struct_timeval,
+                   call_gettimeofday, MFTB,
+                   timeval_tv_sec, timeval_tv_usec);
+}
+
+
+static jmp_buf  mftb_works_buf;
+
+static RETSIGTYPE
+mftb_works_handler (int sig)
+{
+  longjmp (mftb_works_buf, 1);
+}
+
+int
+mftb_works_p (void)
+{
+  unsigned   a[2];
+  RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
+  double     cycletime;
+
+  /* suppress a warning about a[] unused */
+  a[0] = 0;
+
+  if (! have_mftb)
+    return 0;
+
+#ifdef SIGILL
+  old_handler = signal (SIGILL, mftb_works_handler);
+  if (old_handler == SIG_ERR)
+    {
+      if (speed_option_verbose)
+       printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
+      return 1;
+    }
+  if (setjmp (mftb_works_buf))
+    {
+      if (speed_option_verbose)
+       printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
+      return 0;
+    }
+  MFTB (a);
+  signal (SIGILL, old_handler);
+  if (speed_option_verbose)
+    printf ("mftb_works_p(): mftb works\n");
+#else
+
+  if (speed_option_verbose)
+    printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
+#endif
+
+#if ! HAVE_GETTIMEOFDAY
+  if (speed_option_verbose)
+    printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
+  return 0;
+#endif
+
+  /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
+     other chips it can be driven from an external clock. */
+  cycletime = freq_measure ("mftb", freq_measure_mftb_one);
+  if (cycletime == -1.0)
+    {
+      if (speed_option_verbose)
+       printf ("mftb_works_p(): cannot measure mftb period\n");
+      return 0;
+    }
+
+  mftb_unittime = cycletime;
+  return 1;
+}
+
+
+volatile unsigned  *sgi_addr;
+
+int
+sgi_works_p (void)
+{
+#if HAVE_SYSSGI && HAVE_MMAP
+  static int  result = -1;
+
+  size_t          pagesize, offset;
+  __psunsigned_t  phys, physpage;
+  void            *virtpage;
+  unsigned        period_picoseconds;
+  int             size, fd;
+
+  if (result != -1)
+    return result;
+
+  phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
+  if (phys == (__psunsigned_t) -1)
+    {
+      /* ENODEV is the error when a counter is not available */
+      if (speed_option_verbose)
+       printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
+      result = 0;
+      return result;
+    }
+  sgi_unittime = period_picoseconds * 1e-12;
+
+  /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
+     Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
+     obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
+#ifdef SGI_CYCLECNTR_SIZE
+  size = syssgi (SGI_CYCLECNTR_SIZE);
+  if (size == -1)
+    {
+      if (speed_option_verbose)
+       {
+         printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
+         printf ("    will assume size==4\n");
+       }
+      size = 32;
+    }
+#else
+  size = 32;
+#endif
+
+  if (size < 32)
+    {
+      printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
+      result = 0;
+      return result;
+    }
+
+  pagesize = getpagesize();
+  offset = (size_t) phys & (pagesize-1);
+  physpage = phys - offset;
+
+  /* shouldn't cross over a page boundary */
+  ASSERT_ALWAYS (offset + size/8 <= pagesize);
+
+  fd = open("/dev/mmem", O_RDONLY);
+  if (fd == -1)
+    {
+      if (speed_option_verbose)
+       printf ("open /dev/mmem: %s\n", strerror (errno));
+      result = 0;
+      return result;
+    }
+
+  virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
+  if (virtpage == (void *) -1)
+    {
+      if (speed_option_verbose)
+       printf ("mmap /dev/mmem: %s\n", strerror (errno));
+      result = 0;
+      return result;
+    }
+
+  /* address of least significant 4 bytes, knowing mips is big endian */
+  sgi_addr = (unsigned *) ((char *) virtpage + offset
+                          + size/8 - sizeof(unsigned));
+  result = 1;
+  return result;
+
+#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
+  return 0;
+#endif
+}
+
+
+#define DEFAULT(var,n)  \
+  do {                  \
+    if (! (var))        \
+      (var) = (n);      \
+  } while (0)
+
+void
+speed_time_init (void)
+{
+  double supplement_unittime = 0.0;
+
+  static int  speed_time_initialized = 0;
+  if (speed_time_initialized)
+    return;
+  speed_time_initialized = 1;
+
+  speed_cycletime_init ();
+
+  if (have_cycles && cycles_works_p ())
+    {
+      use_cycles = 1;
+      DEFAULT (speed_cycletime, 1.0);
+      speed_unittime = speed_cycletime;
+      DEFAULT (speed_precision, 10000);
+      strcpy (speed_time_string, "CPU cycle counter");
+
+      /* only used if a supplementary method is chosen below */
+      cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
+       * speed_cycletime;
+
+      if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
+       {
+         /* this is a good combination */
+         use_grus = 1;
+         supplement_unittime = grus_unittime = 1.0e-6;
+         strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
+       }
+      else if (have_cycles == 1)
+       {
+         /* When speed_cyclecounter has a limited range, look for something
+            to supplement it. */
+         if (have_gtod && gettimeofday_microseconds_p())
+           {
+             use_gtod = 1;
+             supplement_unittime = gtod_unittime = 1.0e-6;
+             strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
+           }
+         else if (have_grus)
+           {
+             use_grus = 1;
+             supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
+             sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
+           }
+         else if (have_times)
+           {
+             use_times = 1;
+             supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
+             sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
+           }
+         else if (have_gtod)
+           {
+             use_gtod = 1;
+             supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
+             sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
+           }
+         else
+           {
+             fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
+             fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
+           }
+       }
+
+      if (use_grus || use_times || use_gtod)
+       {
+         /* must know cycle period to compare cycles to other measuring
+            (via cycles_limit) */
+         speed_cycletime_need_seconds ();
+
+         if (speed_precision * supplement_unittime > cycles_limit)
+           {
+             fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
+             fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
+             fprintf (stderr, "    (%s)\n", speed_time_string);
+           }
+       }
+    }
+  else if (have_stck)
+    {
+      strcpy (speed_time_string, "STCK timestamp");
+      /* stck is in units of 2^-12 microseconds, which is very likely higher
+        resolution than a cpu cycle */
+      if (speed_cycletime == 0.0)
+       speed_cycletime_fail
+         ("Need to know CPU frequency for effective stck unit");
+      speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
+      DEFAULT (speed_precision, 10000);
+    }
+  else if (have_mftb && mftb_works_p ())
+    {
+      use_mftb = 1;
+      DEFAULT (speed_precision, 10000);
+      speed_unittime = mftb_unittime;
+      sprintf (speed_time_string, "mftb counter (%s)",
+              unittime_string (speed_unittime));
+    }
+  else if (have_sgi && sgi_works_p ())
+    {
+      use_sgi = 1;
+      DEFAULT (speed_precision, 10000);
+      speed_unittime = sgi_unittime;
+      sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
+              unittime_string (speed_unittime));
+      /* supplemented with getrusage, which we assume to have 1ms resolution */
+      use_grus = 1;
+      supplement_unittime = 1e-3;
+    }
+  else if (have_rrt)
+    {
+      timebasestruct_t  t;
+      use_rrt = 1;
+      DEFAULT (speed_precision, 10000);
+      read_real_time (&t, sizeof(t));
+      switch (t.flag) {
+      case RTC_POWER:
+       /* FIXME: What's the actual RTC resolution? */
+       speed_unittime = 1e-7;
+       strcpy (speed_time_string, "read_real_time() power nanoseconds");
+       break;
+      case RTC_POWER_PC:
+       t.tb_high = 1;
+       t.tb_low = 0;
+       time_base_to_time (&t, sizeof(t));
+       speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
+       sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
+                unittime_string (speed_unittime));
+       break;
+      default:
+       fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
+                t.flag);
+       abort ();
+      }
+    }
+  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
+    {
+      /* use clock_gettime if microsecond or better resolution */
+    choose_cgt:
+      use_cgt = 1;
+      speed_unittime = cgt_unittime;
+      DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
+      strcpy (speed_time_string, "microsecond accurate getrusage()");
+    }
+  else if (have_times && clk_tck() > 1000000)
+    {
+      /* Cray vector systems have times() which is clock cycle resolution
+        (eg. 450 MHz).  */
+      DEFAULT (speed_precision, 10000);
+      goto choose_times;
+    }
+  else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
+    {
+      use_grus = 1;
+      speed_unittime = grus_unittime = 1.0e-6;
+      DEFAULT (speed_precision, 1000);
+      strcpy (speed_time_string, "microsecond accurate getrusage()");
+    }
+  else if (have_gtod && gettimeofday_microseconds_p())
+    {
+      use_gtod = 1;
+      speed_unittime = gtod_unittime = 1.0e-6;
+      DEFAULT (speed_precision, 1000);
+      strcpy (speed_time_string, "microsecond accurate gettimeofday()");
+    }
+  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
+    {
+      /* use clock_gettime if 1 tick or better resolution */
+      goto choose_cgt;
+    }
+  else if (have_times)
+    {
+      use_tick_boundary = 1;
+      DEFAULT (speed_precision, 200);
+    choose_times:
+      use_times = 1;
+      speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
+      sprintf (speed_time_string, "%s clock tick times()",
+              unittime_string (speed_unittime));
+    }
+  else if (have_grus)
+    {
+      use_grus = 1;
+      use_tick_boundary = 1;
+      speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
+      DEFAULT (speed_precision, 200);
+      sprintf (speed_time_string, "%s clock tick getrusage()\n",
+              unittime_string (speed_unittime));
+    }
+  else if (have_gtod)
+    {
+      use_gtod = 1;
+      use_tick_boundary = 1;
+      speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
+      DEFAULT (speed_precision, 200);
+      sprintf (speed_time_string, "%s clock tick gettimeofday()",
+              unittime_string (speed_unittime));
+    }
+  else
+    {
+      fprintf (stderr, "No time measuring method available\n");
+      fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
+      abort ();
+    }
+
+  if (speed_option_verbose)
+    {
+      printf ("speed_time_init: %s\n", speed_time_string);
+      printf ("    speed_precision     %d\n", speed_precision);
+      printf ("    speed_unittime      %.2g\n", speed_unittime);
+      if (supplement_unittime)
+       printf ("    supplement_unittime %.2g\n", supplement_unittime);
+      printf ("    use_tick_boundary   %d\n", use_tick_boundary);
+      if (have_cycles)
+       printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
+    }
+}
+
+
+
+/* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
+   corresponding "start_foo" appropriately too. */
+
+void
+grus_tick_boundary (void)
+{
+  struct_rusage  prev;
+  getrusage (0, &prev);
+  do {
+    getrusage (0, &start_grus);
+  } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
+}
+
+void
+gtod_tick_boundary (void)
+{
+  struct_timeval  prev;
+  gettimeofday (&prev, NULL);
+  do {
+    gettimeofday (&start_gtod, NULL);
+  } while (start_gtod.tv_usec == prev.tv_usec);
+}
+
+void
+times_tick_boundary (void)
+{
+  struct_tms  prev;
+  times (&prev);
+  do
+    times (&start_times);
+  while (start_times.tms_utime == prev.tms_utime);
+}
+
+
+/* "have_" values are tested to let unused code go dead.  */
+
+void
+speed_starttime (void)
+{
+  speed_time_init ();
+
+  if (have_grus && use_grus)
+    {
+      if (use_tick_boundary)
+       grus_tick_boundary ();
+      else
+       getrusage (0, &start_grus);
+    }
+
+  if (have_gtod && use_gtod)
+    {
+      if (use_tick_boundary)
+       gtod_tick_boundary ();
+      else
+       gettimeofday (&start_gtod, NULL);
+    }
+
+  if (have_times && use_times)
+    {
+      if (use_tick_boundary)
+       times_tick_boundary ();
+      else
+       times (&start_times);
+    }
+
+  if (have_cgt && use_cgt)
+    clock_gettime (CGT_ID, &start_cgt);
+
+  if (have_rrt && use_rrt)
+    read_real_time (&start_rrt, sizeof(start_rrt));
+
+  if (have_sgi && use_sgi)
+    start_sgi = *sgi_addr;
+
+  if (have_mftb && use_mftb)
+    MFTB (start_mftb);
+
+  if (have_stck && use_stck)
+    STCK (start_stck);
+
+  /* Cycles sampled last for maximum accuracy. */
+  if (have_cycles && use_cycles)
+    speed_cyclecounter (start_cycles);
+}
+
+
+/* Calculate the difference between two cycle counter samples, as a "double"
+   counter of cycles.
+
+   The start and end values are allowed to cancel in integers in case the
+   counter values are bigger than the 53 bits that normally fit in a double.
+
+   This works even if speed_cyclecounter() puts a value bigger than 32-bits
+   in the low word (the high word always gets a 2**32 multiplier though). */
+
+double
+speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
+{
+  unsigned  d;
+  double    t;
+
+  if (have_cycles == 1)
+    {
+      t = (end[0] - start[0]);
+    }
+  else
+    {
+      d = end[0] - start[0];
+      t = d - (d > end[0] ? M_2POWU : 0.0);
+      t += (end[1] - start[1]) * M_2POW32;
+    }
+  return t;
+}
+
+
+double
+speed_mftb_diff (const unsigned end[2], const unsigned start[2])
+{
+  unsigned  d;
+  double    t;
+
+  d = end[0] - start[0];
+  t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
+  t += (end[1] - start[1]) * M_2POW32;
+  return t;
+}
+
+
+/* Calculate the difference between "start" and "end" using fields "sec" and
+   "psec", where each "psec" is a "punit" of a second.
+
+   The seconds parts are allowed to cancel before being combined with the
+   psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
+   double.
+
+   Total time is only calculated in a "double" since an integer count of
+   psecs might overflow.  2^32 microseconds is only a bit over an hour, or
+   2^32 nanoseconds only about 4 seconds.
+
+   The casts to "long" are for the benefit of timebasestruct_t, where the
+   fields are only "unsigned int", but we want a signed difference.  */
+
+#define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
+  {                                                             \
+    long  sec_diff, psec_diff;                                  \
+    sec_diff = (long) end->sec - (long) start->sec;             \
+    psec_diff = (long) end->psec - (long) start->psec;          \
+    return (double) sec_diff + punit * (double) psec_diff;      \
+  }
+
+double
+timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
+{
+  DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
+}
+
+double
+rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
+{
+  DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
+}
+
+double
+timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
+{
+  DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
+}
+
+/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
+double
+timebasestruct_diff_secs (const timebasestruct_t *end,
+                         const timebasestruct_t *start)
+{
+  DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
+}
+
+
+double
+speed_endtime (void)
+{
+#define END_USE(name,value)                             \
+  do {                                                  \
+    if (speed_option_verbose >= 3)                      \
+      printf ("speed_endtime(): used %s\n", name);      \
+    result = value;                                     \
+    goto done;                                          \
+  } while (0)
+
+#define END_ENOUGH(name,value)                                          \
+  do {                                                                  \
+    if (speed_option_verbose >= 3)                                      \
+      printf ("speed_endtime(): %s gives enough precision\n", name);    \
+    result = value;                                                     \
+    goto done;                                                          \
+  } while (0)
+
+#define END_EXCEED(name,value)                                            \
+  do {                                                                    \
+    if (speed_option_verbose >= 3)                                        \
+      printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
+             name);                                                      \
+    result = value;                                                       \
+    goto done;                                                            \
+  } while (0)
+
+  unsigned          end_cycles[2];
+  stck_t            end_stck;
+  unsigned          end_mftb[2];
+  unsigned          end_sgi;
+  timebasestruct_t  end_rrt;
+  struct_timespec   end_cgt;
+  struct_timeval    end_gtod;
+  struct_rusage     end_grus;
+  struct_tms        end_times;
+  double            t_gtod, t_grus, t_times, t_cgt;
+  double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
+  double            result;
+
+  /* Cycles sampled first for maximum accuracy.
+     "have_" values tested to let unused code go dead.  */
+
+  if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
+  if (have_stck   && use_stck)    STCK (end_stck);
+  if (have_mftb   && use_mftb)    MFTB (end_mftb);
+  if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
+  if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
+  if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
+  if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
+  if (have_grus   && use_grus)    getrusage (0, &end_grus);
+  if (have_times  && use_times)   times (&end_times);
+
+  result = -1.0;
+
+  if (speed_option_verbose >= 4)
+    {
+      printf ("speed_endtime():\n");
+      if (use_cycles)
+       printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
+               start_cycles[1], start_cycles[0],
+               end_cycles[1], end_cycles[0]);
+
+      if (use_stck)
+       printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
+
+      if (use_mftb)
+       printf ("   mftb  0x%X,%08X -> 0x%X,%08X\n",
+               start_mftb[1], start_mftb[0],
+               end_mftb[1], end_mftb[0]);
+
+      if (use_sgi)
+       printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
+
+      if (use_rrt)
+       printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
+               start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
+               end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
+
+      if (use_cgt)
+       printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
+               start_cgt.tv_sec, start_cgt.tv_nsec,
+               end_cgt.tv_sec, end_cgt.tv_nsec);
+
+      if (use_gtod)
+       printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
+               start_gtod.tv_sec, start_gtod.tv_usec,
+               end_gtod.tv_sec, end_gtod.tv_usec);
+
+      if (use_grus)
+       printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
+               start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
+               end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
+
+      if (use_times)
+       printf ("   times  %ld -> %ld\n",
+               start_times.tms_utime, end_times.tms_utime);
+    }
+
+  if (use_rrt)
+    {
+      time_base_to_time (&start_rrt, sizeof(start_rrt));
+      time_base_to_time (&end_rrt, sizeof(end_rrt));
+      t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
+      END_USE ("read_real_time()", t_rrt);
+    }
+
+  if (use_cgt)
+    {
+      t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
+      END_USE ("clock_gettime()", t_cgt);
+    }
+
+  if (use_grus)
+    {
+      t_grus = rusage_diff_secs (&end_grus, &start_grus);
+
+      /* Use getrusage() if the cycle counter limit would be exceeded, or if
+        it provides enough accuracy already. */
+      if (use_cycles)
+       {
+         if (t_grus >= speed_precision*grus_unittime)
+           END_ENOUGH ("getrusage()", t_grus);
+         if (t_grus >= cycles_limit)
+           END_EXCEED ("getrusage()", t_grus);
+       }
+    }
+
+  if (use_times)
+    {
+      t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
+
+      /* Use times() if the cycle counter limit would be exceeded, or if
+        it provides enough accuracy already. */
+      if (use_cycles)
+       {
+         if (t_times >= speed_precision*times_unittime)
+           END_ENOUGH ("times()", t_times);
+         if (t_times >= cycles_limit)
+           END_EXCEED ("times()", t_times);
+       }
+    }
+
+  if (use_gtod)
+    {
+      t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
+
+      /* Use gettimeofday() if it measured a value bigger than the cycle
+        counter can handle.  */
+      if (use_cycles)
+       {
+         if (t_gtod >= cycles_limit)
+           END_EXCEED ("gettimeofday()", t_gtod);
+       }
+    }
+
+  if (use_mftb)
+    {
+      t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
+      END_USE ("mftb", t_mftb);
+    }
+
+  if (use_stck)
+    {
+      t_stck = (end_stck - start_stck) * STCK_PERIOD;
+      END_USE ("stck", t_stck);
+    }
+
+  if (use_sgi)
+    {
+      t_sgi = (end_sgi - start_sgi) * sgi_unittime;
+      END_USE ("SGI hardware counter", t_sgi);
+    }
+
+  if (use_cycles)
+    {
+      t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
+       * speed_cycletime;
+      END_USE ("cycle counter", t_cycles);
+    }
+
+  if (use_grus && getrusage_microseconds_p())
+    END_USE ("getrusage()", t_grus);
+
+  if (use_gtod && gettimeofday_microseconds_p())
+    END_USE ("gettimeofday()", t_gtod);
+
+  if (use_times)  END_USE ("times()",        t_times);
+  if (use_grus)   END_USE ("getrusage()",    t_grus);
+  if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
+
+  fprintf (stderr, "speed_endtime(): oops, no time method available\n");
+  abort ();
+
+ done:
+  if (result < 0.0)
+    {
+      if (speed_option_verbose >= 2)
+       fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
+      result = 0.0;
+    }
+  return result;
+}
diff --git a/tune/tuneup.c b/tune/tuneup.c

new file mode 100644 (file)

index 0000000..54827c1
--- /dev/null
+++ b/tune/tuneup.c
@@ -0,0 +1,2456 @@
+/* Create tuned thresholds for various algorithms.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 2011
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Usage: tuneup [-t] [-t] [-p precision]
+
+   -t turns on some diagnostic traces, a second -t turns on more traces.
+
+   Notes:
+
+   The code here isn't a vision of loveliness, mainly because it's subject
+   to ongoing changes according to new things wanting to be tuned, and
+   practical requirements of systems tested.
+
+   Sometimes running the program twice produces slightly different results.
+   This is probably because there's so little separating algorithms near
+   their crossover, and on that basis it should make little or no difference
+   to the final speed of the relevant routines, but nothing has been done to
+   check that carefully.
+
+   Algorithm:
+
+   The thresholds are determined as follows.  A crossover may not be a
+   single size but rather a range where it oscillates between method A or
+   method B faster.  If the threshold is set making B used where A is faster
+   (or vice versa) that's bad.  Badness is the percentage time lost and
+   total badness is the sum of this over all sizes measured.  The threshold
+   is set to minimize total badness.
+
+   Suppose, as sizes increase, method B becomes faster than method A.  The
+   effect of the rule is that, as you look at increasing sizes, isolated
+   points where B is faster are ignored, but when it's consistently faster,
+   or faster on balance, then the threshold is set there.  The same result
+   is obtained thinking in the other direction of A becoming faster at
+   smaller sizes.
+
+   In practice the thresholds tend to be chosen to bring on the next
+   algorithm fairly quickly.
+
+   This rule is attractive because it's got a basis in reason and is fairly
+   easy to implement, but no work has been done to actually compare it in
+   absolute terms to other possibilities.
+
+   Implementation:
+
+   In a normal library build the thresholds are constants.  To tune them
+   selected objects are recompiled with the thresholds as global variables
+   instead.  #define TUNE_PROGRAM_BUILD does this, with help from code at
+   the end of gmp-impl.h, and rules in tune/Makefile.am.
+
+   MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n.  The
+   threshold is set to "size+1" to avoid karatsuba, or to "size" to use one
+   level, but recurse into the basecase.
+
+   MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value.
+   Other routines in turn will make use of both of those.  Naturally the
+   dependants must be tuned first.
+
+   In a couple of cases, like DIVEXACT_1_THRESHOLD, there's no recompiling,
+   just a threshold based on comparing two routines (mpn_divrem_1 and
+   mpn_divexact_1), and no further use of the value determined.
+
+   Flags like USE_PREINV_MOD_1 or JACOBI_BASE_METHOD are even simpler, being
+   just comparisons between certain routines on representative data.
+
+   Shortcuts are applied when native (assembler) versions of routines exist.
+   For instance a native mpn_sqr_basecase is assumed to be always faster
+   than mpn_mul_basecase, with no measuring.
+
+   No attempt is made to tune within assembler routines, for instance
+   DIVREM_1_NORM_THRESHOLD.  An assembler mpn_divrem_1 is expected to be
+   written and tuned all by hand.  Assembler routines that might have hard
+   limits are recompiled though, to make them accept a bigger range of sizes
+   than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr.
+
+   Limitations:
+
+   The FFTs aren't subject to the same badness rule as the other thresholds,
+   so each k is probably being brought on a touch early.  This isn't likely
+   to make a difference, and the simpler probing means fewer tests.
+
+*/
+
+#define TUNE_PROGRAM_BUILD  1   /* for gmp-impl.h */
+
+#include "config.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "tests.h"
+#include "speed.h"
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+
+#define DEFAULT_MAX_SIZE   1000  /* limbs */
+
+#if WANT_FFT
+mp_size_t  option_fft_max_size = 50000;  /* limbs */
+#else
+mp_size_t  option_fft_max_size = 0;
+#endif
+int        option_trace = 0;
+int        option_fft_trace = 0;
+struct speed_params  s;
+
+struct dat_t {
+  mp_size_t  size;
+  double     d;
+} *dat = NULL;
+int  ndat = 0;
+int  allocdat = 0;
+
+/* This is not defined if mpn_sqr_basecase doesn't declare a limit.  In that
+   case use zero here, which for params.max_size means no limit.  */
+#ifndef TUNE_SQR_TOOM2_MAX
+#define TUNE_SQR_TOOM2_MAX  0
+#endif
+
+mp_size_t  mul_toom22_threshold         = MP_SIZE_T_MAX;
+mp_size_t  mul_toom33_threshold         = MUL_TOOM33_THRESHOLD_LIMIT;
+mp_size_t  mul_toom44_threshold         = MUL_TOOM44_THRESHOLD_LIMIT;
+mp_size_t  mul_toom6h_threshold         = MUL_TOOM6H_THRESHOLD_LIMIT;
+mp_size_t  mul_toom8h_threshold         = MUL_TOOM8H_THRESHOLD_LIMIT;
+mp_size_t  mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_fft_threshold            = MP_SIZE_T_MAX;
+mp_size_t  mul_fft_modf_threshold       = MP_SIZE_T_MAX;
+mp_size_t  sqr_basecase_threshold       = MP_SIZE_T_MAX;
+mp_size_t  sqr_toom2_threshold
+  = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
+mp_size_t  sqr_toom3_threshold          = SQR_TOOM3_THRESHOLD_LIMIT;
+mp_size_t  sqr_toom4_threshold          = SQR_TOOM4_THRESHOLD_LIMIT;
+mp_size_t  sqr_toom6_threshold          = SQR_TOOM6_THRESHOLD_LIMIT;
+mp_size_t  sqr_toom8_threshold          = SQR_TOOM8_THRESHOLD_LIMIT;
+mp_size_t  sqr_fft_threshold            = MP_SIZE_T_MAX;
+mp_size_t  sqr_fft_modf_threshold       = MP_SIZE_T_MAX;
+mp_size_t  mullo_basecase_threshold     = MP_SIZE_T_MAX;
+mp_size_t  mullo_dc_threshold           = MP_SIZE_T_MAX;
+mp_size_t  mullo_mul_n_threshold        = MP_SIZE_T_MAX;
+mp_size_t  mulmod_bnm1_threshold        = MP_SIZE_T_MAX;
+mp_size_t  sqrmod_bnm1_threshold        = MP_SIZE_T_MAX;
+mp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  dc_divappr_q_threshold       = MP_SIZE_T_MAX;
+mp_size_t  mu_div_qr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  mu_divappr_q_threshold       = MP_SIZE_T_MAX;
+mp_size_t  mupi_div_qr_threshold        = MP_SIZE_T_MAX;
+mp_size_t  mu_div_q_threshold           = MP_SIZE_T_MAX;
+mp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;
+mp_size_t  dc_bdiv_q_threshold          = MP_SIZE_T_MAX;
+mp_size_t  mu_bdiv_qr_threshold         = MP_SIZE_T_MAX;
+mp_size_t  mu_bdiv_q_threshold          = MP_SIZE_T_MAX;
+mp_size_t  inv_mulmod_bnm1_threshold    = MP_SIZE_T_MAX;
+mp_size_t  inv_newton_threshold         = MP_SIZE_T_MAX;
+mp_size_t  inv_appr_threshold           = MP_SIZE_T_MAX;
+mp_size_t  binv_newton_threshold        = MP_SIZE_T_MAX;
+mp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;
+mp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;
+mp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;
+mp_size_t  powm_threshold               = MP_SIZE_T_MAX;
+mp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;
+mp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;
+mp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;
+mp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;
+mp_size_t  divrem_1_norm_threshold      = MP_SIZE_T_MAX;
+mp_size_t  divrem_1_unnorm_threshold    = MP_SIZE_T_MAX;
+mp_size_t  mod_1_norm_threshold         = MP_SIZE_T_MAX;
+mp_size_t  mod_1_unnorm_threshold       = MP_SIZE_T_MAX;
+mp_size_t  mod_1n_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
+mp_size_t  mod_1u_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
+mp_size_t  mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
+mp_size_t  mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX;
+mp_size_t  preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX;
+mp_size_t  divrem_2_threshold           = MP_SIZE_T_MAX;
+mp_size_t  get_str_dc_threshold         = MP_SIZE_T_MAX;
+mp_size_t  get_str_precompute_threshold = MP_SIZE_T_MAX;
+mp_size_t  set_str_dc_threshold         = MP_SIZE_T_MAX;
+mp_size_t  set_str_precompute_threshold = MP_SIZE_T_MAX;
+
+mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
+mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
+
+struct param_t {
+  const char        *name;
+  speed_function_t  function;
+  speed_function_t  function2;
+  double            step_factor;    /* how much to step relatively */
+  int               step;           /* how much to step absolutely */
+  double            function_fudge; /* multiplier for "function" speeds */
+  int               stop_since_change;
+  double            stop_factor;
+  mp_size_t         min_size;
+  int               min_is_always;
+  mp_size_t         max_size;
+  mp_size_t         check_size;
+  mp_size_t         size_extra;
+
+#define DATA_HIGH_LT_R  1
+#define DATA_HIGH_GE_R  2
+  int               data_high;
+
+  int               noprint;
+};
+
+
+/* These are normally undefined when false, which suits "#if" fine.
+   But give them zero values so they can be used in plain C "if"s.  */
+#ifndef UDIV_PREINV_ALWAYS
+#define UDIV_PREINV_ALWAYS 0
+#endif
+#ifndef HAVE_NATIVE_mpn_divexact_1
+#define HAVE_NATIVE_mpn_divexact_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_divrem_1
+#define HAVE_NATIVE_mpn_divrem_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_divrem_2
+#define HAVE_NATIVE_mpn_divrem_2 0
+#endif
+#ifndef HAVE_NATIVE_mpn_mod_1
+#define HAVE_NATIVE_mpn_mod_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_modexact_1_odd
+#define HAVE_NATIVE_mpn_modexact_1_odd 0
+#endif
+#ifndef HAVE_NATIVE_mpn_preinv_divrem_1
+#define HAVE_NATIVE_mpn_preinv_divrem_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_preinv_mod_1
+#define HAVE_NATIVE_mpn_preinv_mod_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_sqr_basecase
+#define HAVE_NATIVE_mpn_sqr_basecase 0
+#endif
+
+
+#define MAX3(a,b,c)  MAX (MAX (a, b), c)
+
+mp_limb_t
+randlimb_norm (void)
+{
+  mp_limb_t  n;
+  mpn_random (&n, 1);
+  n |= GMP_NUMB_HIGHBIT;
+  return n;
+}
+
+#define GMP_NUMB_HALFMASK  ((CNST_LIMB(1) << (GMP_NUMB_BITS/2)) - 1)
+
+mp_limb_t
+randlimb_half (void)
+{
+  mp_limb_t  n;
+  mpn_random (&n, 1);
+  n &= GMP_NUMB_HALFMASK;
+  n += (n==0);
+  return n;
+}
+
+
+/* Add an entry to the end of the dat[] array, reallocing to make it bigger
+   if necessary.  */
+void
+add_dat (mp_size_t size, double d)
+{
+#define ALLOCDAT_STEP  500
+
+  ASSERT_ALWAYS (ndat <= allocdat);
+
+  if (ndat == allocdat)
+    {
+      dat = (struct dat_t *) __gmp_allocate_or_reallocate
+        (dat, allocdat * sizeof(dat[0]),
+         (allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));
+      allocdat += ALLOCDAT_STEP;
+    }
+
+  dat[ndat].size = size;
+  dat[ndat].d = d;
+  ndat++;
+}
+
+
+/* Return the threshold size based on the data accumulated. */
+mp_size_t
+analyze_dat (int final)
+{
+  double  x, min_x;
+  int     j, min_j;
+
+  /* If the threshold is set at dat[0].size, any positive values are bad. */
+  x = 0.0;
+  for (j = 0; j < ndat; j++)
+    if (dat[j].d > 0.0)
+      x += dat[j].d;
+
+  if (option_trace >= 2 && final)
+    {
+      printf ("\n");
+      printf ("x is the sum of the badness from setting thresh at given size\n");
+      printf ("  (minimum x is sought)\n");
+      printf ("size=%ld  first x=%.4f\n", (long) dat[j].size, x);
+    }
+
+  min_x = x;
+  min_j = 0;
+
+
+  /* When stepping to the next dat[j].size, positive values are no longer
+     bad (so subtracted), negative values become bad (so add the absolute
+     value, meaning subtract). */
+  for (j = 0; j < ndat; x -= dat[j].d, j++)
+    {
+      if (option_trace >= 2 && final)
+        printf ("size=%ld  x=%.4f\n", (long) dat[j].size, x);
+
+      if (x < min_x)
+        {
+          min_x = x;
+          min_j = j;
+        }
+    }
+
+  return min_j;
+}
+
+
+/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */
+
+mp_limb_t mpn_divrem_1_tune
+  __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_tune
+   __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+
+double
+speed_mpn_mod_1_tune (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_tune);
+}
+double
+speed_mpn_divrem_1_tune (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
+}
+
+
+double
+tuneup_measure (speed_function_t fun,
+                const struct param_t *param,
+                struct speed_params *s)
+{
+  static struct param_t  dummy;
+  double   t;
+  TMP_DECL;
+
+  if (! param)
+    param = &dummy;
+
+  s->size += param->size_extra;
+
+  TMP_MARK;
+  SPEED_TMP_ALLOC_LIMBS (s->xp, s->size, 0);
+  SPEED_TMP_ALLOC_LIMBS (s->yp, s->size, 0);
+
+  mpn_random (s->xp, s->size);
+  mpn_random (s->yp, s->size);
+
+  switch (param->data_high) {
+  case DATA_HIGH_LT_R:
+    s->xp[s->size-1] %= s->r;
+    s->yp[s->size-1] %= s->r;
+    break;
+  case DATA_HIGH_GE_R:
+    s->xp[s->size-1] |= s->r;
+    s->yp[s->size-1] |= s->r;
+    break;
+  }
+
+  t = speed_measure (fun, s);
+
+  s->size -= param->size_extra;
+
+  TMP_FREE;
+  return t;
+}
+
+
+#define PRINT_WIDTH  31
+
+void
+print_define_start (const char *name)
+{
+  printf ("#define %-*s  ", PRINT_WIDTH, name);
+  if (option_trace)
+    printf ("...\n");
+}
+
+void
+print_define_end_remark (const char *name, mp_size_t value, const char *remark)
+{
+  if (option_trace)
+    printf ("#define %-*s  ", PRINT_WIDTH, name);
+
+  if (value == MP_SIZE_T_MAX)
+    printf ("MP_SIZE_T_MAX");
+  else
+    printf ("%5ld", (long) value);
+
+  if (remark != NULL)
+    printf ("  /* %s */", remark);
+  printf ("\n");
+  fflush (stdout);
+}
+
+void
+print_define_end (const char *name, mp_size_t value)
+{
+  const char  *remark;
+  if (value == MP_SIZE_T_MAX)
+    remark = "never";
+  else if (value == 0)
+    remark = "always";
+  else
+    remark = NULL;
+  print_define_end_remark (name, value, remark);
+}
+
+void
+print_define (const char *name, mp_size_t value)
+{
+  print_define_start (name);
+  print_define_end (name, value);
+}
+
+void
+print_define_remark (const char *name, mp_size_t value, const char *remark)
+{
+  print_define_start (name);
+  print_define_end_remark (name, value, remark);
+}
+
+
+void
+one (mp_size_t *threshold, struct param_t *param)
+{
+  int  since_positive, since_thresh_change;
+  int  thresh_idx, new_thresh_idx;
+
+#define DEFAULT(x,n)  do { if (! (x))  (x) = (n); } while (0)
+
+  DEFAULT (param->function_fudge, 1.0);
+  DEFAULT (param->function2, param->function);
+  DEFAULT (param->step_factor, 0.01);  /* small steps by default */
+  DEFAULT (param->step, 1);            /* small steps by default */
+  DEFAULT (param->stop_since_change, 80);
+  DEFAULT (param->stop_factor, 1.2);
+  DEFAULT (param->min_size, 10);
+  DEFAULT (param->max_size, DEFAULT_MAX_SIZE);
+
+  if (param->check_size != 0)
+    {
+      double   t1, t2;
+      s.size = param->check_size;
+
+      *threshold = s.size+1;
+      t1 = tuneup_measure (param->function, param, &s);
+
+      *threshold = s.size;
+      t2 = tuneup_measure (param->function2, param, &s);
+      if (t1 == -1.0 || t2 == -1.0)
+        {
+          printf ("Oops, can't run both functions at size %ld\n",
+                  (long) s.size);
+          abort ();
+        }
+      t1 *= param->function_fudge;
+
+      /* ask that t2 is at least 4% below t1 */
+      if (t1 < t2*1.04)
+        {
+          if (option_trace)
+            printf ("function2 never enough faster: t1=%.9f t2=%.9f\n", t1, t2);
+          *threshold = MP_SIZE_T_MAX;
+          if (! param->noprint)
+            print_define (param->name, *threshold);
+          return;
+        }
+
+      if (option_trace >= 2)
+        printf ("function2 enough faster at size=%ld: t1=%.9f t2=%.9f\n",
+                (long) s.size, t1, t2);
+    }
+
+  if (! param->noprint || option_trace)
+    print_define_start (param->name);
+
+  ndat = 0;
+  since_positive = 0;
+  since_thresh_change = 0;
+  thresh_idx = 0;
+
+  if (option_trace >= 2)
+    {
+      printf ("             algorithm-A  algorithm-B   ratio  possible\n");
+      printf ("              (seconds)    (seconds)    diff    thresh\n");
+    }
+
+  for (s.size = param->min_size;
+       s.size < param->max_size;
+       s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step))
+    {
+      double   ti, tiplus1, d;
+
+      /*
+        FIXME: check minimum size requirements are met, possibly by just
+        checking for the -1 returns from the speed functions.
+      */
+
+      /* using method A at this size */
+      *threshold = s.size+1;
+      ti = tuneup_measure (param->function, param, &s);
+      if (ti == -1.0)
+        abort ();
+      ti *= param->function_fudge;
+
+      /* using method B at this size */
+      *threshold = s.size;
+      tiplus1 = tuneup_measure (param->function2, param, &s);
+      if (tiplus1 == -1.0)
+        abort ();
+
+      /* Calculate the fraction by which the one or the other routine is
+         slower.  */
+      if (tiplus1 >= ti)
+        d = (tiplus1 - ti) / tiplus1;  /* negative */
+      else
+        d = (tiplus1 - ti) / ti;       /* positive */
+
+      add_dat (s.size, d);
+
+      new_thresh_idx = analyze_dat (0);
+
+      if (option_trace >= 2)
+        printf ("size=%ld  %.9f  %.9f  % .4f %c  %ld\n",
+                (long) s.size, ti, tiplus1, d,
+                ti > tiplus1 ? '#' : ' ',
+                (long) dat[new_thresh_idx].size);
+
+      /* Stop if the last time method i was faster was more than a
+         certain number of measurements ago.  */
+#define STOP_SINCE_POSITIVE  200
+      if (d >= 0)
+        since_positive = 0;
+      else
+        if (++since_positive > STOP_SINCE_POSITIVE)
+          {
+            if (option_trace >= 1)
+              printf ("stopped due to since_positive (%d)\n",
+                      STOP_SINCE_POSITIVE);
+            break;
+          }
+
+      /* Stop if method A has become slower by a certain factor. */
+      if (ti >= tiplus1 * param->stop_factor)
+        {
+          if (option_trace >= 1)
+            printf ("stopped due to ti >= tiplus1 * factor (%.1f)\n",
+                    param->stop_factor);
+          break;
+        }
+
+      /* Stop if the threshold implied hasn't changed in a certain
+         number of measurements.  (It's this condition that usually
+         stops the loop.) */
+      if (thresh_idx != new_thresh_idx)
+        since_thresh_change = 0, thresh_idx = new_thresh_idx;
+      else
+        if (++since_thresh_change > param->stop_since_change)
+          {
+            if (option_trace >= 1)
+              printf ("stopped due to since_thresh_change (%d)\n",
+                      param->stop_since_change);
+            break;
+          }
+
+      /* Stop if the threshold implied is more than a certain number of
+         measurements ago.  */
+#define STOP_SINCE_AFTER   500
+      if (ndat - thresh_idx > STOP_SINCE_AFTER)
+        {
+          if (option_trace >= 1)
+            printf ("stopped due to ndat - thresh_idx > amount (%d)\n",
+                    STOP_SINCE_AFTER);
+          break;
+        }
+
+      /* Stop when the size limit is reached before the end of the
+         crossover, but only show this as an error for >= the default max
+         size.  FIXME: Maybe should make it a param choice whether this is
+         an error.  */
+      if (s.size >= param->max_size && param->max_size >= DEFAULT_MAX_SIZE)
+        {
+          fprintf (stderr, "%s\n", param->name);
+          fprintf (stderr, "sizes %ld to %ld total %d measurements\n",
+                   (long) dat[0].size, (long) dat[ndat-1].size, ndat);
+          fprintf (stderr, "    max size reached before end of crossover\n");
+          break;
+        }
+    }
+
+  if (option_trace >= 1)
+    printf ("sizes %ld to %ld total %d measurements\n",
+            (long) dat[0].size, (long) dat[ndat-1].size, ndat);
+
+  *threshold = dat[analyze_dat (1)].size;
+
+  if (param->min_is_always)
+    {
+      if (*threshold == param->min_size)
+        *threshold = 0;
+    }
+
+  if (! param->noprint || option_trace)
+    print_define_end (param->name, *threshold);
+}
+
+
+/* Special probing for the fft thresholds.  The size restrictions on the
+   FFTs mean the graph of time vs size has a step effect.  See this for
+   example using
+
+       ./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9
+       gnuplot foo.gnuplot
+
+   The current approach is to compare routines at the midpoint of relevant
+   steps.  Arguably a more sophisticated system of threshold data is wanted
+   if this step effect remains. */
+
+struct fft_param_t {
+  const char        *table_name;
+  const char        *threshold_name;
+  const char        *modf_threshold_name;
+  mp_size_t         *p_threshold;
+  mp_size_t         *p_modf_threshold;
+  mp_size_t         first_size;
+  mp_size_t         max_size;
+  speed_function_t  function;
+  speed_function_t  mul_modf_function;
+  speed_function_t  mul_function;
+  mp_size_t         sqr;
+};
+
+
+/* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
+   N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
+   of 2^(k-1) bits. */
+
+mp_size_t
+fft_step_size (int k)
+{
+  mp_size_t  step;
+
+  step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS;
+  step *= (mp_size_t) 1 << k;
+
+  if (step <= 0)
+    {
+      printf ("Can't handle k=%d\n", k);
+      abort ();
+    }
+
+  return step;
+}
+
+mp_size_t
+fft_next_size (mp_size_t pl, int k)
+{
+  mp_size_t  m = fft_step_size (k);
+
+/*    printf ("[k=%d %ld] %ld ->", k, m, pl); */
+
+  if (pl == 0 || (pl & (m-1)) != 0)
+    pl = (pl | (m-1)) + 1;
+
+/*    printf (" %ld\n", pl); */
+  return pl;
+}
+
+#define NMAX_DEFAULT 1000000
+#define MAX_REPS 25
+#define MIN_REPS 5
+
+static inline size_t
+mpn_mul_fft_lcm (size_t a, unsigned int k)
+{
+  unsigned int l = k;
+
+  while (a % 2 == 0 && k > 0)
+    {
+      a >>= 1;
+      k--;
+    }
+  return a << l;
+}
+
+mp_size_t
+fftfill (mp_size_t pl, int k, int sqr)
+{
+  mp_size_t maxLK;
+  mp_bitcnt_t N, Nprime, nprime, M;
+
+  N = pl * GMP_NUMB_BITS;
+  M = N >> k;
+
+  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k);
+
+  Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
+  nprime = Nprime / GMP_NUMB_BITS;
+  if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      size_t K2;
+      for (;;)
+       {
+         K2 = 1L << mpn_fft_best_k (nprime, sqr);
+         if ((nprime & (K2 - 1)) == 0)
+           break;
+         nprime = (nprime + K2 - 1) & -K2;
+         Nprime = nprime * GMP_LIMB_BITS;
+       }
+    }
+  ASSERT_ALWAYS (nprime < pl);
+
+  return Nprime;
+}
+
+static int
+compare_double (const void *ap, const void *bp)
+{
+  double a = * (const double *) ap;
+  double b = * (const double *) bp;
+
+  if (a < b)
+    return -1;
+  else if (a > b)
+    return 1;
+  else
+    return 0;
+}
+
+double
+median (double *times, int n)
+{
+  qsort (times, n, sizeof (double), compare_double);
+  return times[n/2];
+}
+
+#define FFT_CACHE_SIZE 25
+typedef struct fft_cache
+{
+  mp_size_t n;
+  double time;
+} fft_cache_t;
+
+fft_cache_t fft_cache[FFT_CACHE_SIZE];
+
+double
+cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k,
+               int n_measurements)
+{
+  int i;
+  double t, ttab[MAX_REPS];
+
+  if (fft_cache[k].n == n)
+    return fft_cache[k].time;
+
+  for (i = 0; i < n_measurements; i++)
+    {
+      speed_starttime ();
+      mpn_mul_fft (rp, n, ap, n, bp, n, k);
+      ttab[i] = speed_endtime ();
+    }
+
+  t = median (ttab, n_measurements);
+  fft_cache[k].n = n;
+  fft_cache[k].time = t;
+  return t;
+}
+
+#define INSERT_FFTTAB(idx, nval, kval)                                 \
+  do {                                                                 \
+    fft_tab[idx].n = nval;                                             \
+    fft_tab[idx].k = kval;                                             \
+    fft_tab[idx+1].n = -1;     /* sentinel */                          \
+    fft_tab[idx+1].k = -1;                                             \
+  } while (0)
+
+int
+fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
+{
+  mp_size_t n, n1, prev_n1;
+  int k, best_k, last_best_k, kmax;
+  int eff, prev_eff;
+  double t0, t1;
+  int n_measurements;
+  mp_limb_t *ap, *bp, *rp;
+  mp_size_t alloc;
+  char *linepref;
+  struct fft_table_nk *fft_tab;
+
+  fft_tab = mpn_fft_table3[p->sqr];
+
+  for (k = 0; k < FFT_CACHE_SIZE; k++)
+    fft_cache[k].n = 0;
+
+  if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+    {
+      nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD);
+    }
+
+  if (print)
+    printf ("#define %s%*s", p->table_name, 38, "");
+
+  if (idx == 0)
+    {
+      INSERT_FFTTAB (0, nmin, initial_k);
+
+      if (print)
+       {
+         printf ("\\\n  { ");
+         printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
+         linepref = "    ";
+       }
+
+      idx = 1;
+    }
+
+  ap = malloc (sizeof (mp_limb_t));
+  if (p->sqr)
+    bp = ap;
+  else
+    bp = malloc (sizeof (mp_limb_t));
+  rp = malloc (sizeof (mp_limb_t));
+  alloc = 1;
+
+  /* Round n to comply to initial k value */
+  n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k);
+
+  n_measurements = (18 - initial_k) | 1;
+  n_measurements = MAX (n_measurements, MIN_REPS);
+  n_measurements = MIN (n_measurements, MAX_REPS);
+
+  last_best_k = initial_k;
+  best_k = initial_k;
+
+  while (n < nmax)
+    {
+      int start_k, end_k;
+
+      /* Assume the current best k is best until we hit its next FFT step.  */
+      t0 = 99999;
+
+      prev_n1 = n + 1;
+
+      start_k = MAX (4, best_k - 4);
+      end_k = MIN (24, best_k + 4);
+      for (k = start_k; k <= end_k; k++)
+       {
+          n1 = mpn_fft_next_size (prev_n1, k);
+
+         eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr);
+
+         if (eff < 70)         /* avoid measuring too slow fft:s */
+           continue;
+
+         if (n1 > alloc)
+           {
+             alloc = n1;
+             if (p->sqr)
+               {
+                 ap = realloc (ap, sizeof (mp_limb_t));
+                 rp = realloc (rp, sizeof (mp_limb_t));
+                 ap = bp = realloc (ap, alloc * sizeof (mp_limb_t));
+                 mpn_random (ap, alloc);
+                 rp = realloc (rp, alloc * sizeof (mp_limb_t));
+               }
+             else
+               {
+                 ap = realloc (ap, sizeof (mp_limb_t));
+                 bp = realloc (bp, sizeof (mp_limb_t));
+                 rp = realloc (rp, sizeof (mp_limb_t));
+                 ap = realloc (ap, alloc * sizeof (mp_limb_t));
+                 mpn_random (ap, alloc);
+                 bp = realloc (bp, alloc * sizeof (mp_limb_t));
+                 mpn_random (bp, alloc);
+                 rp = realloc (rp, alloc * sizeof (mp_limb_t));
+               }
+           }
+
+         t1 = cached_measure (rp, ap, bp, n1, k, n_measurements);
+
+         if (t1 * n_measurements > 0.3)
+           n_measurements -= 2;
+         n_measurements = MAX (n_measurements, MIN_REPS);
+
+         if (t1 < t0)
+           {
+             best_k = k;
+             t0 = t1;
+           }
+       }
+
+      n1 = mpn_fft_next_size (prev_n1, best_k);
+
+      if (last_best_k != best_k)
+       {
+         ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1);
+
+         if (idx >= FFT_TABLE3_SIZE)
+           {
+             printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
+             abort ();
+           }
+         INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
+
+         if (print)
+           {
+             printf (", ");
+             if (idx % 4 == 0)
+               printf ("\\\n    ");
+             printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
+           }
+
+         if (option_trace >= 2)
+           {
+             printf ("{%lu,%u}\n", prev_n1, best_k);
+             fflush (stdout);
+           }
+
+         last_best_k = best_k;
+         idx++;
+       }
+
+      for (;;)
+       {
+         prev_n1 = n1;
+         prev_eff = fftfill (prev_n1, best_k, p->sqr);
+         n1 = mpn_fft_next_size (prev_n1 + 1, best_k);
+         eff = fftfill (n1, best_k, p->sqr);
+
+         if (eff != prev_eff)
+           break;
+       }
+
+      n = prev_n1;
+    }
+
+  kmax = sizeof (mp_size_t) * 4;       /* GMP_MP_SIZE_T_BITS / 2 */
+  kmax = MIN (kmax, 25-1);
+  for (k = last_best_k + 1; k <= kmax; k++)
+    {
+      if (idx >= FFT_TABLE3_SIZE)
+       {
+         printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
+         abort ();
+       }
+      INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
+
+      if (print)
+       {
+         printf (", ");
+         if (idx % 4 == 0)
+           printf ("\\\n    ");
+         printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
+       }
+
+      idx++;
+    }
+
+  if (print)
+    printf (" }\n");
+
+  free (ap);
+  if (! p->sqr)
+    free (bp);
+  free (rp);
+
+  return idx;
+}
+
+void
+fft (struct fft_param_t *p)
+{
+  mp_size_t  size;
+  int        k, idx, initial_k;
+
+  /*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/
+
+#if 1
+  {
+    /* Use plain one() mechanism, for some reasonable initial values of k.  The
+       advantage is that we don't depend on mpn_fft_table3, which can therefore
+       leave it completely uninitialized.  */
+
+    static struct param_t param;
+    mp_size_t thres, best_thres;
+    int best_k;
+    char buf[20];
+
+    best_thres = MP_SIZE_T_MAX;
+    best_k = -1;
+
+    for (k = 5; k <= 7; k++)
+      {
+       param.name = p->modf_threshold_name;
+       param.min_size = 100;
+       param.max_size = 2000;
+       param.function  = p->mul_function;
+       param.step_factor = 0.0;
+       param.step = 4;
+       param.function2 = p->mul_modf_function;
+       param.noprint = 1;
+       s.r = k;
+       one (&thres, &param);
+       if (thres < best_thres)
+         {
+           best_thres = thres;
+           best_k = k;
+         }
+      }
+
+    *(p->p_modf_threshold) = best_thres;
+    sprintf (buf, "k = %d", best_k);
+    print_define_remark (p->modf_threshold_name, best_thres, buf);
+    initial_k = best_k;
+  }
+#else
+  size = p->first_size;
+  for (;;)
+    {
+      double  tk, tm;
+
+      size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr));
+      k = mpn_fft_best_k (size, p->sqr);
+
+      if (size >= p->max_size)
+        break;
+
+      s.size = size + fft_step_size (k) / 2;
+      s.r = k;
+      tk = tuneup_measure (p->mul_modf_function, NULL, &s);
+      if (tk == -1.0)
+        abort ();
+
+      tm = tuneup_measure (p->mul_function, NULL, &s);
+      if (tm == -1.0)
+        abort ();
+
+      if (option_trace >= 2)
+        printf ("at %ld   size=%ld  k=%d  %.9f   size=%ld modf %.9f\n",
+                (long) size,
+                (long) size + fft_step_size (k) / 2, k, tk,
+                (long) s.size, tm);
+
+      if (tk < tm)
+        {
+         *p->p_modf_threshold = s.size;
+         print_define (p->modf_threshold_name, *p->p_modf_threshold);
+         break;
+        }
+    }
+  initial_k = ?;
+#endif
+
+  /*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/
+
+  idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1);
+  printf ("#define %s_SIZE %d\n", p->table_name, idx);
+
+  /*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/
+
+  size = 2 * *p->p_modf_threshold;     /* OK? */
+  for (;;)
+    {
+      double  tk, tm;
+      mp_size_t mulmod_size, mul_size;;
+
+      if (size >= p->max_size)
+        break;
+
+      mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2;
+      mul_size = (size + mulmod_size) / 2;     /* middle of step */
+
+      s.size = mulmod_size;
+      tk = tuneup_measure (p->function, NULL, &s);
+      if (tk == -1.0)
+        abort ();
+
+      s.size = mul_size;
+      tm = tuneup_measure (p->mul_function, NULL, &s);
+      if (tm == -1.0)
+        abort ();
+
+      if (option_trace >= 2)
+        printf ("at %ld   size=%ld  %.9f   size=%ld mul %.9f\n",
+                (long) size,
+                (long) mulmod_size, tk,
+                (long) mul_size, tm);
+
+      size = mulmod_size;
+
+      if (tk < tm)
+        {
+         *p->p_threshold = s.size;
+         print_define (p->threshold_name, *p->p_threshold);
+         break;
+        }
+    }
+}
+
+
+
+/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
+   giving wrong results.  */
+void
+tune_mul_n (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpn_mul_n;
+
+  param.name = "MUL_TOOM22_THRESHOLD";
+  param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE);
+  param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
+  one (&mul_toom22_threshold, &param);
+
+  param.name = "MUL_TOOM33_THRESHOLD";
+  param.min_size = MAX (mul_toom22_threshold, MPN_TOOM33_MUL_MINSIZE);
+  param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
+  one (&mul_toom33_threshold, &param);
+
+  param.name = "MUL_TOOM44_THRESHOLD";
+  param.min_size = MAX (mul_toom33_threshold, MPN_TOOM44_MUL_MINSIZE);
+  param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
+  one (&mul_toom44_threshold, &param);
+
+  param.name = "MUL_TOOM6H_THRESHOLD";
+  param.min_size = MAX (mul_toom44_threshold, MPN_TOOM6H_MUL_MINSIZE);
+  param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
+  one (&mul_toom6h_threshold, &param);
+
+  param.name = "MUL_TOOM8H_THRESHOLD";
+  param.min_size = MAX (mul_toom6h_threshold, MPN_TOOM8H_MUL_MINSIZE);
+  param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
+  one (&mul_toom8h_threshold, &param);
+
+  /* disabled until tuned */
+  MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
+}
+
+void
+tune_mul (void)
+{
+  static struct param_t  param;
+  mp_size_t thres;
+
+  param.noprint = 1;
+
+  param.function = speed_mpn_toom32_for_toom43_mul;
+  param.function2 = speed_mpn_toom43_for_toom32_mul;
+  param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
+  param.min_size = MPN_TOOM43_MUL_MINSIZE;
+  one (&thres, &param);
+  mul_toom32_to_toom43_threshold = 17*thres/24;
+  print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
+
+  param.function = speed_mpn_toom32_for_toom53_mul;
+  param.function2 = speed_mpn_toom53_for_toom32_mul;
+  param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
+  param.min_size = MPN_TOOM53_MUL_MINSIZE;
+  one (&thres, &param);
+  mul_toom32_to_toom53_threshold = 19*thres/30;
+  print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
+
+  param.function = speed_mpn_toom42_for_toom53_mul;
+  param.function2 = speed_mpn_toom53_for_toom42_mul;
+  param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
+  param.min_size = MPN_TOOM53_MUL_MINSIZE;
+  one (&thres, &param);
+  mul_toom42_to_toom53_threshold = 11*thres/20;
+  print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
+
+  param.function = speed_mpn_toom42_mul;
+  param.function2 = speed_mpn_toom63_mul;
+  param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
+  param.min_size = MPN_TOOM63_MUL_MINSIZE;
+  one (&thres, &param);
+  mul_toom42_to_toom63_threshold = thres/2;
+  print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
+}
+
+
+void
+tune_mullo (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpn_mullo_n;
+
+  param.name = "MULLO_BASECASE_THRESHOLD";
+  param.min_size = 1;
+  param.min_is_always = 1;
+  param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1;
+  param.stop_factor = 1.5;
+  param.noprint = 1;
+  one (&mullo_basecase_threshold, &param);
+
+  param.name = "MULLO_DC_THRESHOLD";
+  param.min_size = 8;
+  param.min_is_always = 0;
+  param.max_size = 1000;
+  one (&mullo_dc_threshold, &param);
+
+  if (mullo_basecase_threshold >= mullo_dc_threshold)
+    {
+      print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold);
+      print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase");
+    }
+  else
+    {
+      print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold);
+      print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold);
+    }
+
+#if WANT_FFT
+  param.name = "MULLO_MUL_N_THRESHOLD";
+  param.min_size = mullo_dc_threshold;
+  param.max_size = 2 * mul_fft_threshold;
+  param.noprint = 0;
+  param.step_factor = 0.03;
+  one (&mullo_mul_n_threshold, &param);
+#else
+  print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX,
+                           "without FFT use mullo forever");
+#endif
+}
+
+void
+tune_mulmod_bnm1 (void)
+{
+  static struct param_t  param;
+
+  param.name = "MULMOD_BNM1_THRESHOLD";
+  param.function = speed_mpn_mulmod_bnm1;
+  param.min_size = 4;
+  param.max_size = 100;
+  one (&mulmod_bnm1_threshold, &param);
+}
+
+void
+tune_sqrmod_bnm1 (void)
+{
+  static struct param_t  param;
+
+  param.name = "SQRMOD_BNM1_THRESHOLD";
+  param.function = speed_mpn_sqrmod_bnm1;
+  param.min_size = 4;
+  param.max_size = 100;
+  one (&sqrmod_bnm1_threshold, &param);
+}
+
+
+/* Start the basecase from 3, since 1 is a special case, and if mul_basecase
+   is faster only at size==2 then we don't want to bother with extra code
+   just for that.  Start karatsuba from 4 same as MUL above.  */
+
+void
+tune_sqr (void)
+{
+  /* disabled until tuned */
+  SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
+
+  if (HAVE_NATIVE_mpn_sqr_basecase)
+    {
+      print_define_remark ("SQR_BASECASE_THRESHOLD", 0, "always (native)");
+      sqr_basecase_threshold = 0;
+    }
+  else
+    {
+      static struct param_t  param;
+      param.name = "SQR_BASECASE_THRESHOLD";
+      param.function = speed_mpn_sqr;
+      param.min_size = 3;
+      param.min_is_always = 1;
+      param.max_size = TUNE_SQR_TOOM2_MAX;
+      param.noprint = 1;
+      one (&sqr_basecase_threshold, &param);
+    }
+
+  {
+    static struct param_t  param;
+    param.name = "SQR_TOOM2_THRESHOLD";
+    param.function = speed_mpn_sqr;
+    param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE);
+    param.max_size = TUNE_SQR_TOOM2_MAX;
+    param.noprint = 1;
+    one (&sqr_toom2_threshold, &param);
+
+    if (! HAVE_NATIVE_mpn_sqr_basecase
+        && sqr_toom2_threshold < sqr_basecase_threshold)
+      {
+        /* Karatsuba becomes faster than mul_basecase before
+           sqr_basecase does.  Arrange for the expression
+           "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which
+           selects mpn_sqr_basecase in mpn_sqr to be false, by setting
+           SQR_TOOM2_THRESHOLD to zero, making
+           SQR_BASECASE_THRESHOLD the toom2 threshold.  */
+
+        sqr_basecase_threshold = SQR_TOOM2_THRESHOLD;
+        SQR_TOOM2_THRESHOLD = 0;
+
+        print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold,
+                             "toom2");
+        print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD,
+                             "never sqr_basecase");
+      }
+    else
+      {
+        if (! HAVE_NATIVE_mpn_sqr_basecase)
+          print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold);
+        print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD);
+      }
+  }
+
+  {
+    static struct param_t  param;
+    mp_size_t toom3_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
+
+    param.function = speed_mpn_sqr;
+
+    param.name = "SQR_TOOM3_THRESHOLD";
+    param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_MINSIZE);
+    param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
+    one (&sqr_toom3_threshold, &param);
+
+    param.name = "SQR_TOOM4_THRESHOLD";
+    param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_MINSIZE);
+    param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
+    one (&sqr_toom4_threshold, &param);
+
+    param.name = "SQR_TOOM6_THRESHOLD";
+    param.min_size = MAX (sqr_toom4_threshold, MPN_TOOM6_SQR_MINSIZE);
+    param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
+    one (&sqr_toom6_threshold, &param);
+
+    param.name = "SQR_TOOM8_THRESHOLD";
+    param.min_size = MAX (sqr_toom6_threshold, MPN_TOOM8_SQR_MINSIZE);
+    param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
+    one (&sqr_toom8_threshold, &param);
+  }
+}
+
+
+void
+tune_dc_div (void)
+{
+  s.r = 0;             /* clear to make speed function do 2n/n */
+  {
+    static struct param_t  param;
+    param.name = "DC_DIV_QR_THRESHOLD";
+    param.function = speed_mpn_sbpi1_div_qr;
+    param.function2 = speed_mpn_dcpi1_div_qr;
+    param.min_size = 6;
+    one (&dc_div_qr_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "DC_DIVAPPR_Q_THRESHOLD";
+    param.function = speed_mpn_sbpi1_divappr_q;
+    param.function2 = speed_mpn_dcpi1_divappr_q;
+    param.min_size = 6;
+    one (&dc_divappr_q_threshold, &param);
+  }
+}
+
+static double
+speed_mpn_sbordcpi1_div_qr (struct speed_params *s)
+{
+  if (s->size < DC_DIV_QR_THRESHOLD)
+    return speed_mpn_sbpi1_div_qr (s);
+  else
+    return speed_mpn_dcpi1_div_qr (s);
+}
+
+void
+tune_mu_div (void)
+{
+  s.r = 0;             /* clear to make speed function do 2n/n */
+  {
+    static struct param_t  param;
+    param.name = "MU_DIV_QR_THRESHOLD";
+    param.function = speed_mpn_dcpi1_div_qr;
+    param.function2 = speed_mpn_mu_div_qr;
+    param.min_size = 6;
+    param.max_size = 5000;
+    param.step_factor = 0.02;
+    one (&mu_div_qr_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "MU_DIVAPPR_Q_THRESHOLD";
+    param.function = speed_mpn_dcpi1_divappr_q;
+    param.function2 = speed_mpn_mu_divappr_q;
+    param.min_size = 6;
+    param.max_size = 5000;
+    param.step_factor = 0.02;
+    one (&mu_divappr_q_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "MUPI_DIV_QR_THRESHOLD";
+    param.function = speed_mpn_sbordcpi1_div_qr;
+    param.function2 = speed_mpn_mupi_div_qr;
+    param.min_size = 6;
+    param.min_is_always = 1;
+    param.max_size = 1000;
+    param.step_factor = 0.02;
+    one (&mupi_div_qr_threshold, &param);
+  }
+}
+
+void
+tune_dc_bdiv (void)
+{
+  s.r = 0;             /* clear to make speed function do 2n/n*/
+  {
+    static struct param_t  param;
+    param.name = "DC_BDIV_QR_THRESHOLD";
+    param.function = speed_mpn_sbpi1_bdiv_qr;
+    param.function2 = speed_mpn_dcpi1_bdiv_qr;
+    param.min_size = 4;
+    one (&dc_bdiv_qr_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "DC_BDIV_Q_THRESHOLD";
+    param.function = speed_mpn_sbpi1_bdiv_q;
+    param.function2 = speed_mpn_dcpi1_bdiv_q;
+    param.min_size = 4;
+    one (&dc_bdiv_q_threshold, &param);
+  }
+}
+
+void
+tune_mu_bdiv (void)
+{
+  s.r = 0;             /* clear to make speed function do 2n/n*/
+  {
+    static struct param_t  param;
+    param.name = "MU_BDIV_QR_THRESHOLD";
+    param.function = speed_mpn_dcpi1_bdiv_qr;
+    param.function2 = speed_mpn_mu_bdiv_qr;
+    param.min_size = 4;
+    param.max_size = 5000;
+    param.step_factor = 0.02;
+    one (&mu_bdiv_qr_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "MU_BDIV_Q_THRESHOLD";
+    param.function = speed_mpn_dcpi1_bdiv_q;
+    param.function2 = speed_mpn_mu_bdiv_q;
+    param.min_size = 4;
+    param.max_size = 5000;
+    param.step_factor = 0.02;
+    one (&mu_bdiv_q_threshold, &param);
+  }
+}
+
+void
+tune_invertappr (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpn_ni_invertappr;
+  param.name = "INV_MULMOD_BNM1_THRESHOLD";
+  param.min_size = 4;
+  one (&inv_mulmod_bnm1_threshold, &param);
+
+  param.function = speed_mpn_invertappr;
+  param.name = "INV_NEWTON_THRESHOLD";
+  param.min_size = 3;
+  one (&inv_newton_threshold, &param);
+}
+
+void
+tune_invert (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpn_invert;
+  param.name = "INV_APPR_THRESHOLD";
+  param.min_size = 3;
+  one (&inv_appr_threshold, &param);
+}
+
+void
+tune_binvert (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpn_binvert;
+  param.name = "BINV_NEWTON_THRESHOLD";
+  param.min_size = 8;          /* pointless with smaller operands */
+  one (&binv_newton_threshold, &param);
+}
+
+void
+tune_redc (void)
+{
+#define TUNE_REDC_2_MAX 100
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
+#if WANT_REDC_2
+  {
+    static struct param_t  param;
+    param.name = "REDC_1_TO_REDC_2_THRESHOLD";
+    param.function = speed_mpn_redc_1;
+    param.function2 = speed_mpn_redc_2;
+    param.min_size = 1;
+    param.min_is_always = 1;
+    param.max_size = TUNE_REDC_2_MAX;
+    param.noprint = 1;
+    one (&redc_1_to_redc_2_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "REDC_2_TO_REDC_N_THRESHOLD";
+    param.function = speed_mpn_redc_2;
+    param.function2 = speed_mpn_redc_n;
+    param.min_size = 16;
+    param.noprint = 1;
+    one (&redc_2_to_redc_n_threshold, &param);
+  }
+  if (redc_1_to_redc_2_threshold >= TUNE_REDC_2_MAX - 1)
+    {
+      /* Disable REDC_2.  This is not supposed to happen.  */
+      print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
+      print_define_remark ("REDC_2_TO_REDC_N_THRESHOLD", 0, "anomaly: never REDC_2");
+    }
+  else
+    {
+      print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
+      print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
+    }
+#else
+  {
+    static struct param_t  param;
+    param.name = "REDC_1_TO_REDC_N_THRESHOLD";
+    param.function = speed_mpn_redc_1;
+    param.function2 = speed_mpn_redc_n;
+    param.min_size = 16;
+    one (&redc_1_to_redc_n_threshold, &param);
+  }
+#endif
+}
+
+void
+tune_matrix22_mul (void)
+{
+  static struct param_t  param;
+  param.name = "MATRIX22_STRASSEN_THRESHOLD";
+  param.function = speed_mpn_matrix22_mul;
+  param.min_size = 2;
+  one (&matrix22_strassen_threshold, &param);
+}
+
+void
+tune_hgcd (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_THRESHOLD";
+  param.function = speed_mpn_hgcd;
+  /* We seem to get strange results for small sizes */
+  param.min_size = 30;
+  one (&hgcd_threshold, &param);
+}
+
+void
+tune_gcd_dc (void)
+{
+  static struct param_t  param;
+  param.name = "GCD_DC_THRESHOLD";
+  param.function = speed_mpn_gcd;
+  param.min_size = hgcd_threshold;
+  param.max_size = 3000;
+  param.step_factor = 0.02;
+  one (&gcd_dc_threshold, &param);
+}
+
+void
+tune_gcdext_dc (void)
+{
+  static struct param_t  param;
+  param.name = "GCDEXT_DC_THRESHOLD";
+  param.function = speed_mpn_gcdext;
+  param.min_size = hgcd_threshold;
+  param.max_size = 3000;
+  param.step_factor = 0.02;
+  one (&gcdext_dc_threshold, &param);
+}
+
+
+/* size_extra==1 reflects the fact that with high<divisor one division is
+   always skipped.  Forcing high<divisor while testing ensures consistency
+   while stepping through sizes, ie. that size-1 divides will be done each
+   time.
+
+   min_size==2 and min_is_always are used so that if plain division is only
+   better at size==1 then don't bother including that code just for that
+   case, instead go with preinv always and get a size saving.  */
+
+#define DIV_1_PARAMS                    \
+  param.check_size = 256;               \
+  param.min_size = 2;                   \
+  param.min_is_always = 1;              \
+  param.data_high = DATA_HIGH_LT_R;     \
+  param.size_extra = 1;                 \
+  param.stop_factor = 2.0;
+
+
+double (*tuned_speed_mpn_divrem_1) __GMP_PROTO ((struct speed_params *));
+
+void
+tune_divrem_1 (void)
+{
+  /* plain version by default */
+  tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1;
+
+  /* No support for tuning native assembler code, do that by hand and put
+     the results in the .asm file, there's no need for such thresholds to
+     appear in gmp-mparam.h.  */
+  if (HAVE_NATIVE_mpn_divrem_1)
+    return;
+
+  if (GMP_NAIL_BITS != 0)
+    {
+      print_define_remark ("DIVREM_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
+                           "no preinv with nails");
+      print_define_remark ("DIVREM_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
+                           "no preinv with nails");
+      return;
+    }
+
+  if (UDIV_PREINV_ALWAYS)
+    {
+      print_define_remark ("DIVREM_1_NORM_THRESHOLD", 0L, "preinv always");
+      print_define ("DIVREM_1_UNNORM_THRESHOLD", 0L);
+      return;
+    }
+
+  tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1_tune;
+
+  /* Tune for the integer part of mpn_divrem_1.  This will very possibly be
+     a bit out for the fractional part, but that's too bad, the integer part
+     is more important. */
+  {
+    static struct param_t  param;
+    param.name = "DIVREM_1_NORM_THRESHOLD";
+    DIV_1_PARAMS;
+    s.r = randlimb_norm ();
+    param.function = speed_mpn_divrem_1_tune;
+    one (&divrem_1_norm_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "DIVREM_1_UNNORM_THRESHOLD";
+    DIV_1_PARAMS;
+    s.r = randlimb_half ();
+    param.function = speed_mpn_divrem_1_tune;
+    one (&divrem_1_unnorm_threshold, &param);
+  }
+}
+
+
+void
+tune_mod_1 (void)
+{
+  /* No support for tuning native assembler code, do that by hand and put
+     the results in the .asm file, there's no need for such thresholds to
+     appear in gmp-mparam.h.  */
+  if (HAVE_NATIVE_mpn_mod_1)
+    return;
+
+  if (GMP_NAIL_BITS != 0)
+    {
+      print_define_remark ("MOD_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
+                           "no preinv with nails");
+      print_define_remark ("MOD_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
+                           "no preinv with nails");
+      return;
+    }
+
+  if (UDIV_PREINV_ALWAYS)
+    {
+      print_define ("MOD_1_NORM_THRESHOLD", 0L);
+      print_define ("MOD_1_UNNORM_THRESHOLD", 0L);
+    }
+  else
+    {
+      {
+       static struct param_t  param;
+       param.name = "MOD_1_NORM_THRESHOLD";
+       DIV_1_PARAMS;
+       s.r = randlimb_norm ();
+       param.function = speed_mpn_mod_1_tune;
+       one (&mod_1_norm_threshold, &param);
+      }
+      {
+       static struct param_t  param;
+       param.name = "MOD_1_UNNORM_THRESHOLD";
+       DIV_1_PARAMS;
+       s.r = randlimb_half ();
+       param.function = speed_mpn_mod_1_tune;
+       one (&mod_1_unnorm_threshold, &param);
+      }
+    }
+  {
+    static struct param_t  param;
+
+    param.check_size = 256;
+
+    s.r = randlimb_norm ();
+    param.function = speed_mpn_mod_1_tune;
+
+    param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD";
+    param.min_size = 2;
+    one (&mod_1n_to_mod_1_1_threshold, &param);
+  }
+
+  {
+    static struct param_t  param;
+
+    param.check_size = 256;
+    s.r = randlimb_norm () / 5;
+    param.noprint = 1;
+
+    param.function = speed_mpn_mod_1_1;
+    param.function2 = speed_mpn_mod_1_2;
+    param.min_is_always = 1;
+    param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD";
+    param.min_size = 2;
+    one (&mod_1_1_to_mod_1_2_threshold, &param);
+
+    param.function = speed_mpn_mod_1_2;
+    param.function2 = speed_mpn_mod_1_4;
+    param.min_is_always = 1;
+    param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD";
+    param.min_size = 1;
+    one (&mod_1_2_to_mod_1_4_threshold, &param);
+
+    if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold)
+      {
+       /* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */
+       mod_1_2_to_mod_1_4_threshold = 0;
+
+       param.function = speed_mpn_mod_1_1;
+       param.function2 = speed_mpn_mod_1_4;
+       param.min_is_always = 1;
+       param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake";
+       param.min_size = 2;
+       one (&mod_1_1_to_mod_1_2_threshold, &param);
+      }
+
+    param.function = speed_mpn_mod_1_tune;
+    param.function2 = NULL;
+    param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD";
+    param.min_size = 2;
+    param.min_is_always = 0;
+    one (&mod_1u_to_mod_1_1_threshold, &param);
+
+    if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold)
+      mod_1_1_to_mod_1_2_threshold = 0;
+    if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold)
+      mod_1_2_to_mod_1_4_threshold = 0;
+
+    print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL);
+    print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold,
+                        mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL);
+    print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold,
+                        mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL);
+  }
+
+  {
+    static struct param_t  param;
+
+    param.check_size = 256;
+
+    param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD";
+    s.r = randlimb_norm ();
+    param.function = speed_mpn_preinv_mod_1;
+    param.function2 = speed_mpn_mod_1_tune;
+    param.min_size = 1;
+    one (&preinv_mod_1_to_mod_1_threshold, &param);
+  }
+}
+
+
+/* A non-zero DIVREM_1_UNNORM_THRESHOLD (or DIVREM_1_NORM_THRESHOLD) would
+   imply that udiv_qrnnd_preinv is worth using, but it seems most
+   straightforward to compare mpn_preinv_divrem_1 and mpn_divrem_1_div
+   directly.  */
+
+void
+tune_preinv_divrem_1 (void)
+{
+  static struct param_t  param;
+  speed_function_t  divrem_1;
+  const char        *divrem_1_name;
+  double            t1, t2;
+
+  if (GMP_NAIL_BITS != 0)
+    {
+      print_define_remark ("USE_PREINV_DIVREM_1", 0, "no preinv with nails");
+      return;
+    }
+
+  /* Any native version of mpn_preinv_divrem_1 is assumed to exist because
+     it's faster than mpn_divrem_1.  */
+  if (HAVE_NATIVE_mpn_preinv_divrem_1)
+    {
+      print_define_remark ("USE_PREINV_DIVREM_1", 1, "native");
+      return;
+    }
+
+  /* If udiv_qrnnd_preinv is the only division method then of course
+     mpn_preinv_divrem_1 should be used.  */
+  if (UDIV_PREINV_ALWAYS)
+    {
+      print_define_remark ("USE_PREINV_DIVREM_1", 1, "preinv always");
+      return;
+    }
+
+  /* If we've got an assembler version of mpn_divrem_1, then compare against
+     that, not the mpn_divrem_1_div generic C.  */
+  if (HAVE_NATIVE_mpn_divrem_1)
+    {
+      divrem_1 = speed_mpn_divrem_1;
+      divrem_1_name = "mpn_divrem_1";
+    }
+  else
+    {
+      divrem_1 = speed_mpn_divrem_1_div;
+      divrem_1_name = "mpn_divrem_1_div";
+    }
+
+  param.data_high = DATA_HIGH_LT_R; /* allow skip one division */
+  s.size = 200;                     /* generous but not too big */
+  /* Divisor, nonzero.  Unnormalized so as to exercise the shift!=0 case,
+     since in general that's probably most common, though in fact for a
+     64-bit limb mp_bases[10].big_base is normalized.  */
+  s.r = urandom() & (GMP_NUMB_MASK >> 4);
+  if (s.r == 0) s.r = 123;
+
+  t1 = tuneup_measure (speed_mpn_preinv_divrem_1, &param, &s);
+  t2 = tuneup_measure (divrem_1, &param, &s);
+  if (t1 == -1.0 || t2 == -1.0)
+    {
+      printf ("Oops, can't measure mpn_preinv_divrem_1 and %s at %ld\n",
+              divrem_1_name, (long) s.size);
+      abort ();
+    }
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_preinv_divrem_1 %.9f, %s %.9f\n",
+            (long) s.size, t1, divrem_1_name, t2);
+
+  print_define_remark ("USE_PREINV_DIVREM_1", (mp_size_t) (t1 < t2), NULL);
+}
+
+
+
+void
+tune_divrem_2 (void)
+{
+  static struct param_t  param;
+
+  /* No support for tuning native assembler code, do that by hand and put
+     the results in the .asm file, and there's no need for such thresholds
+     to appear in gmp-mparam.h.  */
+  if (HAVE_NATIVE_mpn_divrem_2)
+    return;
+
+  if (GMP_NAIL_BITS != 0)
+    {
+      print_define_remark ("DIVREM_2_THRESHOLD", MP_SIZE_T_MAX,
+                           "no preinv with nails");
+      return;
+    }
+
+  if (UDIV_PREINV_ALWAYS)
+    {
+      print_define_remark ("DIVREM_2_THRESHOLD", 0L, "preinv always");
+      return;
+    }
+
+  /* Tune for the integer part of mpn_divrem_2.  This will very possibly be
+     a bit out for the fractional part, but that's too bad, the integer part
+     is more important.
+
+     min_size must be >=2 since nsize>=2 is required, but is set to 4 to save
+     code space if plain division is better only at size==2 or size==3. */
+  param.name = "DIVREM_2_THRESHOLD";
+  param.check_size = 256;
+  param.min_size = 4;
+  param.min_is_always = 1;
+  param.size_extra = 2;      /* does qsize==nsize-2 divisions */
+  param.stop_factor = 2.0;
+
+  s.r = randlimb_norm ();
+  param.function = speed_mpn_divrem_2;
+  one (&divrem_2_threshold, &param);
+}
+
+
+/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
+   tune for that.  Its speed can differ on odd or even divisor, so take an
+   average threshold for the two.
+
+   mpn_divrem_1 can vary with high<divisor or not, whereas mpn_divexact_1
+   might not vary that way, but don't test this since high<divisor isn't
+   expected to occur often with small divisors.  */
+
+void
+tune_divexact_1 (void)
+{
+  static struct param_t  param;
+  mp_size_t  thresh[2], average;
+  int        low, i;
+
+  /* Any native mpn_divexact_1 is assumed to incorporate all the speed of a
+     full mpn_divrem_1.  */
+  if (HAVE_NATIVE_mpn_divexact_1)
+    {
+      print_define_remark ("DIVEXACT_1_THRESHOLD", 0, "always (native)");
+      return;
+    }
+
+  ASSERT_ALWAYS (tuned_speed_mpn_divrem_1 != NULL);
+
+  param.name = "DIVEXACT_1_THRESHOLD";
+  param.data_high = DATA_HIGH_GE_R;
+  param.check_size = 256;
+  param.min_size = 2;
+  param.stop_factor = 1.5;
+  param.function  = tuned_speed_mpn_divrem_1;
+  param.function2 = speed_mpn_divexact_1;
+  param.noprint = 1;
+
+  print_define_start (param.name);
+
+  for (low = 0; low <= 1; low++)
+    {
+      s.r = randlimb_half();
+      if (low == 0)
+        s.r |= 1;
+      else
+        s.r &= ~CNST_LIMB(7);
+
+      one (&thresh[low], &param);
+      if (option_trace)
+        printf ("low=%d thresh %ld\n", low, (long) thresh[low]);
+
+      if (thresh[low] == MP_SIZE_T_MAX)
+        {
+          average = MP_SIZE_T_MAX;
+          goto divexact_1_done;
+        }
+    }
+
+  if (option_trace)
+    {
+      printf ("average of:");
+      for (i = 0; i < numberof(thresh); i++)
+        printf (" %ld", (long) thresh[i]);
+      printf ("\n");
+    }
+
+  average = 0;
+  for (i = 0; i < numberof(thresh); i++)
+    average += thresh[i];
+  average /= numberof(thresh);
+
+  /* If divexact turns out to be better as early as 3 limbs, then use it
+     always, so as to reduce code size and conditional jumps.  */
+  if (average <= 3)
+    average = 0;
+
+ divexact_1_done:
+  print_define_end (param.name, average);
+}
+
+
+/* The generic mpn_modexact_1_odd skips a divide step if high<divisor, the
+   same as mpn_mod_1, but this might not be true of an assembler
+   implementation.  The threshold used is an average based on data where a
+   divide can be skipped and where it can't.
+
+   If modexact turns out to be better as early as 3 limbs, then use it
+   always, so as to reduce code size and conditional jumps.  */
+
+void
+tune_modexact_1_odd (void)
+{
+  static struct param_t  param;
+  mp_size_t  thresh_lt, thresh_ge, average;
+
+#if 0
+  /* Any native mpn_modexact_1_odd is assumed to incorporate all the speed
+     of a full mpn_mod_1.  */
+  if (HAVE_NATIVE_mpn_modexact_1_odd)
+    {
+      print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1");
+      return;
+    }
+#endif
+
+  param.name = "BMOD_1_TO_MOD_1_THRESHOLD";
+  param.check_size = 256;
+  param.min_size = 2;
+  param.stop_factor = 1.5;
+  param.function  = speed_mpn_modexact_1c_odd;
+  param.function2 = speed_mpn_mod_1_tune;
+  param.noprint = 1;
+  s.r = randlimb_half () | 1;
+
+  print_define_start (param.name);
+
+  param.data_high = DATA_HIGH_LT_R;
+  one (&thresh_lt, &param);
+  if (option_trace)
+    printf ("lt thresh %ld\n", (long) thresh_lt);
+
+  average = thresh_lt;
+  if (thresh_lt != MP_SIZE_T_MAX)
+    {
+      param.data_high = DATA_HIGH_GE_R;
+      one (&thresh_ge, &param);
+      if (option_trace)
+        printf ("ge thresh %ld\n", (long) thresh_ge);
+
+      if (thresh_ge != MP_SIZE_T_MAX)
+        {
+          average = (thresh_ge + thresh_lt) / 2;
+          if (thresh_ge <= 3)
+            average = 0;
+        }
+    }
+
+  print_define_end (param.name, average);
+}
+
+
+void
+tune_jacobi_base (void)
+{
+  static struct param_t  param;
+  double   t1, t2, t3;
+  int      method;
+
+  s.size = GMP_LIMB_BITS * 3 / 4;
+
+  t1 = tuneup_measure (speed_mpn_jacobi_base_1, &param, &s);
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_jacobi_base_1 %.9f\n", (long) s.size, t1);
+
+  t2 = tuneup_measure (speed_mpn_jacobi_base_2, &param, &s);
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_jacobi_base_2 %.9f\n", (long) s.size, t2);
+
+  t3 = tuneup_measure (speed_mpn_jacobi_base_3, &param, &s);
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
+
+  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0)
+    {
+      printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
+              (long) s.size);
+      abort ();
+    }
+
+  if (t1 < t2 && t1 < t3)
+    method = 1;
+  else if (t2 < t3)
+    method = 2;
+  else
+    method = 3;
+
+  print_define ("JACOBI_BASE_METHOD", method);
+}
+
+
+void
+tune_get_str (void)
+{
+  /* Tune for decimal, it being most common.  Some rough testing suggests
+     other bases are different, but not by very much.  */
+  s.r = 10;
+  {
+    static struct param_t  param;
+    GET_STR_PRECOMPUTE_THRESHOLD = 0;
+    param.name = "GET_STR_DC_THRESHOLD";
+    param.function = speed_mpn_get_str;
+    param.min_size = 4;
+    param.max_size = GET_STR_THRESHOLD_LIMIT;
+    one (&get_str_dc_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "GET_STR_PRECOMPUTE_THRESHOLD";
+    param.function = speed_mpn_get_str;
+    param.min_size = GET_STR_DC_THRESHOLD;
+    param.max_size = GET_STR_THRESHOLD_LIMIT;
+    one (&get_str_precompute_threshold, &param);
+  }
+}
+
+
+double
+speed_mpn_pre_set_str (struct speed_params *s)
+{
+  unsigned char *str;
+  mp_ptr     wp;
+  mp_size_t  wn;
+  unsigned   i;
+  int        base;
+  double     t;
+  mp_ptr powtab_mem, tp;
+  powers_t powtab[GMP_LIMB_BITS];
+  mp_size_t un;
+  int chars_per_limb;
+  TMP_DECL;
+
+  SPEED_RESTRICT_COND (s->size >= 1);
+
+  base = s->r == 0 ? 10 : s->r;
+  SPEED_RESTRICT_COND (base >= 2 && base <= 256);
+
+  TMP_MARK;
+
+  str = TMP_ALLOC (s->size);
+  for (i = 0; i < s->size; i++)
+    str[i] = s->xp[i] % base;
+
+  wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly))
+    / GMP_LIMB_BITS + 2;
+  SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
+
+  /* use this during development to check wn is big enough */
+  /*
+  ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);
+  */
+
+  speed_operand_src (s, (mp_ptr) str, s->size/BYTES_PER_MP_LIMB);
+  speed_operand_dst (s, wp, wn);
+  speed_cache_fill (s);
+
+  chars_per_limb = mp_bases[base].chars_per_limb;
+  un = s->size / chars_per_limb + 1;
+  powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
+  mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
+  tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      mpn_pre_set_str (wp, str, s->size, powtab, tp);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  TMP_FREE;
+  return t;
+}
+
+void
+tune_set_str (void)
+{
+  s.r = 10;  /* decimal */
+  {
+    static struct param_t  param;
+    SET_STR_PRECOMPUTE_THRESHOLD = 0;
+    param.step_factor = 0.01;
+    param.name = "SET_STR_DC_THRESHOLD";
+    param.function = speed_mpn_pre_set_str;
+    param.min_size = 100;
+    param.max_size = 50000;
+    one (&set_str_dc_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.step_factor = 0.02;
+    param.name = "SET_STR_PRECOMPUTE_THRESHOLD";
+    param.function = speed_mpn_set_str;
+    param.min_size = SET_STR_DC_THRESHOLD;
+    param.max_size = 100000;
+    one (&set_str_precompute_threshold, &param);
+  }
+}
+
+
+void
+tune_fft_mul (void)
+{
+  static struct fft_param_t  param;
+
+  if (option_fft_max_size == 0)
+    return;
+
+  param.table_name          = "MUL_FFT_TABLE3";
+  param.threshold_name      = "MUL_FFT_THRESHOLD";
+  param.p_threshold         = &mul_fft_threshold;
+  param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
+  param.p_modf_threshold    = &mul_fft_modf_threshold;
+  param.first_size          = MUL_TOOM33_THRESHOLD / 2;
+  param.max_size            = option_fft_max_size;
+  param.function            = speed_mpn_fft_mul;
+  param.mul_modf_function   = speed_mpn_mul_fft;
+  param.mul_function        = speed_mpn_mul_n;
+  param.sqr = 0;
+  fft (&param);
+}
+
+
+void
+tune_fft_sqr (void)
+{
+  static struct fft_param_t  param;
+
+  if (option_fft_max_size == 0)
+    return;
+
+  param.table_name          = "SQR_FFT_TABLE3";
+  param.threshold_name      = "SQR_FFT_THRESHOLD";
+  param.p_threshold         = &sqr_fft_threshold;
+  param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
+  param.p_modf_threshold    = &sqr_fft_modf_threshold;
+  param.first_size          = SQR_TOOM3_THRESHOLD / 2;
+  param.max_size            = option_fft_max_size;
+  param.function            = speed_mpn_fft_sqr;
+  param.mul_modf_function   = speed_mpn_mul_fft_sqr;
+  param.mul_function        = speed_mpn_sqr;
+  param.sqr = 1;
+  fft (&param);
+}
+
+void
+all (void)
+{
+  time_t  start_time, end_time;
+  TMP_DECL;
+
+  TMP_MARK;
+  SPEED_TMP_ALLOC_LIMBS (s.xp_block, SPEED_BLOCK_SIZE, 0);
+  SPEED_TMP_ALLOC_LIMBS (s.yp_block, SPEED_BLOCK_SIZE, 0);
+
+  mpn_random (s.xp_block, SPEED_BLOCK_SIZE);
+  mpn_random (s.yp_block, SPEED_BLOCK_SIZE);
+
+  fprintf (stderr, "Parameters for %s\n", GMP_MPARAM_H_SUGGEST);
+
+  speed_time_init ();
+  fprintf (stderr, "Using: %s\n", speed_time_string);
+
+  fprintf (stderr, "speed_precision %d", speed_precision);
+  if (speed_unittime == 1.0)
+    fprintf (stderr, ", speed_unittime 1 cycle");
+  else
+    fprintf (stderr, ", speed_unittime %.2e secs", speed_unittime);
+  if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
+    fprintf (stderr, ", CPU freq unknown\n");
+  else
+    fprintf (stderr, ", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
+
+  fprintf (stderr, "DEFAULT_MAX_SIZE %d, fft_max_size %ld\n",
+           DEFAULT_MAX_SIZE, (long) option_fft_max_size);
+  fprintf (stderr, "\n");
+
+  time (&start_time);
+  {
+    struct tm  *tp;
+    tp = localtime (&start_time);
+    printf ("/* Generated by tuneup.c, %d-%02d-%02d, ",
+            tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
+
+#ifdef __GNUC__
+    /* gcc sub-minor version doesn't seem to come through as a define */
+    printf ("gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
+#define PRINTED_COMPILER
+#endif
+#if defined (__SUNPRO_C)
+    printf ("Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
+#define PRINTED_COMPILER
+#endif
+#if ! defined (__GNUC__) && defined (__sgi) && defined (_COMPILER_VERSION)
+    /* gcc defines __sgi and _COMPILER_VERSION on irix 6, avoid that */
+    printf ("MIPSpro C %d.%d.%d */\n",
+           _COMPILER_VERSION / 100,
+           _COMPILER_VERSION / 10 % 10,
+           _COMPILER_VERSION % 10);
+#define PRINTED_COMPILER
+#endif
+#if defined (__DECC) && defined (__DECC_VER)
+    printf ("DEC C %d */\n", __DECC_VER);
+#define PRINTED_COMPILER
+#endif
+#if ! defined (PRINTED_COMPILER)
+    printf ("system compiler */\n");
+#endif
+  }
+  printf ("\n");
+
+  tune_divrem_1 ();
+  tune_mod_1 ();
+  tune_preinv_divrem_1 ();
+  tune_divrem_2 ();
+  tune_divexact_1 ();
+  tune_modexact_1_odd ();
+  printf("\n");
+
+  tune_mul_n ();
+  printf("\n");
+
+  tune_mul ();
+  printf("\n");
+
+  tune_sqr ();
+  printf("\n");
+
+  tune_mulmod_bnm1 ();
+  tune_sqrmod_bnm1 ();
+  printf("\n");
+
+  tune_fft_mul ();
+  printf("\n");
+
+  tune_fft_sqr ();
+  printf ("\n");
+
+  tune_mullo ();
+  printf("\n");
+
+  tune_dc_div ();
+  tune_dc_bdiv ();
+
+  printf("\n");
+  tune_invertappr ();
+  tune_invert ();
+  printf("\n");
+
+  tune_binvert ();
+  tune_redc ();
+  printf("\n");
+
+  tune_mu_div ();
+  tune_mu_bdiv ();
+  printf("\n");
+
+  tune_matrix22_mul ();
+  tune_hgcd ();
+  tune_gcd_dc ();
+  tune_gcdext_dc ();
+  tune_jacobi_base ();
+  printf("\n");
+
+  tune_get_str ();
+  tune_set_str ();
+  printf("\n");
+
+  time (&end_time);
+  printf ("/* Tuneup completed successfully, took %ld seconds */\n",
+          (long) (end_time - start_time));
+
+  TMP_FREE;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int  opt;
+
+  /* Unbuffered so if output is redirected to a file it isn't lost if the
+     program is killed part way through.  */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  while ((opt = getopt(argc, argv, "f:o:p:t")) != EOF)
+    {
+      switch (opt) {
+      case 'f':
+        if (optarg[0] == 't')
+          option_fft_trace = 2;
+        else
+          option_fft_max_size = atol (optarg);
+        break;
+      case 'o':
+        speed_option_set (optarg);
+        break;
+      case 'p':
+        speed_precision = atoi (optarg);
+        break;
+      case 't':
+        option_trace++;
+        break;
+      case '?':
+        exit(1);
+      }
+    }
+
+  all ();
+  exit (0);
+}
diff --git a/tune/x86_64.asm b/tune/x86_64.asm

new file mode 100644 (file)

index 0000000..5099090
--- /dev/null
+++ b/tune/x86_64.asm
@@ -0,0 +1,44 @@
+dnl  x86 pentium time stamp counter access routine.
+
+dnl  Copyright 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned p[2]);
+C
+C Get the pentium rdtsc cycle counter, storing the least significant word in
+C p[0] and the most significant in p[1].
+C
+C cpuid is used to serialize execution.  On big measurements this won't be
+C significant but it may help make small single measurements more accurate.
+
+PROLOGUE(speed_cyclecounter)
+
+       C rdi   p
+
+       movq    %rbx, %r10
+       xorl    %eax, %eax
+       cpuid
+       rdtsc
+       movl    %eax, (%rdi)
+       movl    %edx, 4(%rdi)
+       movq    %r10, %rbx
+       ret
+EPILOGUE()
diff --git a/version.c b/version.c

new file mode 100644 (file)

index 0000000..da16b77
--- /dev/null
+++ b/version.c
@@ -0,0 +1,23 @@
+/* gmp_version -- version number compiled into the library.
+
+Copyright 1996, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const char * const gmp_version = VERSION;
diff --git a/ylwrap b/ylwrap

new file mode 100755 (executable)

index 0000000..84d5634
--- /dev/null
+++ b/ylwrap
@@ -0,0 +1,222 @@
+#! /bin/sh
+# ylwrap - wrapper for lex/yacc invocations.
+
+scriptversion=2009-04-28.21; # UTC
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
+# 2007, 2009 Free Software Foundation, Inc.
+#
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+case "$1" in
+  '')
+    echo "$0: No files given.  Try \`$0 --help' for more information." 1>&2
+    exit 1
+    ;;
+  --basedir)
+    basedir=$2
+    shift 2
+    ;;
+  -h|--h*)
+    cat <<\EOF
+Usage: ylwrap [--help|--version] INPUT [OUTPUT DESIRED]... -- PROGRAM [ARGS]...
+
+Wrapper for lex/yacc invocations, renaming files as desired.
+
+  INPUT is the input file
+  OUTPUT is one file PROG generates
+  DESIRED is the file we actually want instead of OUTPUT
+  PROGRAM is program to run
+  ARGS are passed to PROG
+
+Any number of OUTPUT,DESIRED pairs may be used.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v|--v*)
+    echo "ylwrap $scriptversion"
+    exit $?
+    ;;
+esac
+
+
+# The input.
+input="$1"
+shift
+case "$input" in
+  [\\/]* | ?:[\\/]*)
+    # Absolute path; do nothing.
+    ;;
+  *)
+    # Relative path.  Make it absolute.
+    input="`pwd`/$input"
+    ;;
+esac
+
+pairlist=
+while test "$#" -ne 0; do
+  if test "$1" = "--"; then
+    shift
+    break
+  fi
+  pairlist="$pairlist $1"
+  shift
+done
+
+# The program to run.
+prog="$1"
+shift
+# Make any relative path in $prog absolute.
+case "$prog" in
+  [\\/]* | ?:[\\/]*) ;;
+  *[\\/]*) prog="`pwd`/$prog" ;;
+esac
+
+# FIXME: add hostname here for parallel makes that run commands on
+# other machines.  But that might take us over the 14-char limit.
+dirname=ylwrap$$
+trap "cd '`pwd`'; rm -rf $dirname > /dev/null 2>&1" 1 2 3 15
+mkdir $dirname || exit 1
+
+cd $dirname
+
+case $# in
+  0) "$prog" "$input" ;;
+  *) "$prog" "$@" "$input" ;;
+esac
+ret=$?
+
+if test $ret -eq 0; then
+  set X $pairlist
+  shift
+  first=yes
+  # Since DOS filename conventions don't allow two dots,
+  # the DOS version of Bison writes out y_tab.c instead of y.tab.c
+  # and y_tab.h instead of y.tab.h. Test to see if this is the case.
+  y_tab_nodot="no"
+  if test -f y_tab.c || test -f y_tab.h; then
+    y_tab_nodot="yes"
+  fi
+
+  # The directory holding the input.
+  input_dir=`echo "$input" | sed -e 's,\([\\/]\)[^\\/]*$,\1,'`
+  # Quote $INPUT_DIR so we can use it in a regexp.
+  # FIXME: really we should care about more than `.' and `\'.
+  input_rx=`echo "$input_dir" | sed 's,\\\\,\\\\\\\\,g;s,\\.,\\\\.,g'`
+
+  while test "$#" -ne 0; do
+    from="$1"
+    # Handle y_tab.c and y_tab.h output by DOS
+    if test $y_tab_nodot = "yes"; then
+      if test $from = "y.tab.c"; then
+       from="y_tab.c"
+      else
+       if test $from = "y.tab.h"; then
+         from="y_tab.h"
+       fi
+      fi
+    fi
+    if test -f "$from"; then
+      # If $2 is an absolute path name, then just use that,
+      # otherwise prepend `../'.
+      case "$2" in
+       [\\/]* | ?:[\\/]*) target="$2";;
+       *) target="../$2";;
+      esac
+
+      # We do not want to overwrite a header file if it hasn't
+      # changed.  This avoid useless recompilations.  However the
+      # parser itself (the first file) should always be updated,
+      # because it is the destination of the .y.c rule in the
+      # Makefile.  Divert the output of all other files to a temporary
+      # file so we can compare them to existing versions.
+      if test $first = no; then
+       realtarget="$target"
+       target="tmp-`echo $target | sed s/.*[\\/]//g`"
+      fi
+      # Edit out `#line' or `#' directives.
+      #
+      # We don't want the resulting debug information to point at
+      # an absolute srcdir; it is better for it to just mention the
+      # .y file with no path.
+      #
+      # We want to use the real output file name, not yy.lex.c for
+      # instance.
+      #
+      # We want the include guards to be adjusted too.
+      FROM=`echo "$from" | sed \
+            -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\
+            -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`
+      TARGET=`echo "$2" | sed \
+            -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\
+            -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`
+
+      sed -e "/^#/!b" -e "s,$input_rx,," -e "s,$from,$2," \
+          -e "s,$FROM,$TARGET," "$from" >"$target" || ret=$?
+
+      # Check whether header files must be updated.
+      if test $first = no; then
+       if test -f "$realtarget" && cmp -s "$realtarget" "$target"; then
+         echo "$2" is unchanged
+         rm -f "$target"
+       else
+          echo updating "$2"
+          mv -f "$target" "$realtarget"
+        fi
+      fi
+    else
+      # A missing file is only an error for the first file.  This
+      # is a blatant hack to let us support using "yacc -d".  If -d
+      # is not specified, we don't want an error when the header
+      # file is "missing".
+      if test $first = yes; then
+        ret=1
+      fi
+    fi
+    shift
+    shift
+    first=no
+  done
+else
+  ret=$?
+fi
+
+# Remove the directory.
+cd ..
+rm -rf $dirname
+
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
author	Anas Nashif <anas.nashif@intel.com>
	Mon, 5 Nov 2012 15:23:53 +0000 (07:23 -0800)
committer	Anas Nashif <anas.nashif@intel.com>
	Mon, 5 Nov 2012 15:23:53 +0000 (07:23 -0800)